File indexing completed on 2024-04-21 04:36:19
0001 /* This file is part of kdev-pg-qt 0002 Copyright (C) 2010 Jonathan Schmidt-Dominé <devel@the-user.org> 0003 0004 This library is free software; you can redistribute it and/or 0005 modify it under the terms of the GNU Library General Public 0006 License as published by the Free Software Foundation; either 0007 version 2 of the License, or (at your option) any later version. 0008 0009 This library is distributed in the hope that it will be useful, 0010 but WITHOUT ANY WARRANTY; without even the implied warranty of 0011 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 0012 Library General Public License for more details. 0013 0014 You should have received a copy of the GNU Library General Public License 0015 along with this library; see the file COPYING.LIB. If not, write to 0016 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 0017 Boston, MA 02110-1301, USA. 0018 */ 0019 0020 #ifndef KDEV_PG_REGEXP 0021 #define KDEV_PG_REGEXP 0022 0023 #include <kdev-pg-char-sets.h> 0024 0025 #include <vector> 0026 #include <algorithm> 0027 #include <stack> 0028 #include <string> 0029 #include <cassert> 0030 0031 0032 #include <QString> 0033 0034 namespace KDevPG 0035 { 0036 0037 template<typename T> class DFA; 0038 template<typename T> class NFA; 0039 template<CharEncoding enc> class SeqCharSet; 0040 template<CharEncoding enc> class TableCharSet; 0041 0042 // General 0043 0044 class GNFA; 0045 0046 enum AutomatonType { SAscii, SLatin1, SUtf8, SUcs2, SUtf16, SUcs4, TAscii, TLatin1, TUtf8, TUcs2, TUtf16/*, TUcs4*/ }; 0047 0048 /** 0049 * Deterministic finite automaton 0050 */ 0051 class GDFA 0052 { 0053 union 0054 { 0055 DFA<SeqCharSet<Ascii> > *s0; 0056 DFA<SeqCharSet<Latin1> > *s1; 0057 DFA<SeqCharSet<Ucs2> > *s2; 0058 DFA<SeqCharSet<Ucs4> > *s3; 0059 DFA<TableCharSet<Ascii> > *t0; 0060 DFA<TableCharSet<Latin1> > *t1; 0061 DFA<TableCharSet<Ucs2> > *t2; 0062 // DFA<TableCharSet<Ucs4> > *t3; 0063 }; 0064 friend class GNFA; 0065 public: 0066 static AutomatonType type; 0067 /// Generation of the core state-machine 0068 void codegen(QTextStream& str); 0069 /// Minimization of the automaton 0070 GDFA& minimize(); 0071 /// Code used for the detected tokens in the generated code 0072 void setActions(const vector<QString>& actions); 0073 GDFA(const GDFA& o); 0074 ~GDFA(); 0075 GDFA& operator=(const GDFA& o); 0076 /// Debugging-information 0077 void inspect(); 0078 /// Nice output in .dot-format 0079 void dotOutput(QTextStream& o, const QString& name); 0080 /// NFA 0081 GNFA nfa(); 0082 private: 0083 /// Has to be generated using a GNFA 0084 GDFA(); 0085 }; 0086 0087 /// Non-deterministic finite automaton 0088 class GNFA 0089 { 0090 union 0091 { 0092 NFA<SeqCharSet<Ascii> > *s0; 0093 NFA<SeqCharSet<Latin1> > *s1; 0094 NFA<SeqCharSet<Ucs2> > *s2; 0095 NFA<SeqCharSet<Ucs4> > *s3; 0096 NFA<TableCharSet<Ascii> > *t0; 0097 NFA<TableCharSet<Latin1> > *t1; 0098 NFA<TableCharSet<Ucs2> > *t2; 0099 // NFA<TableCharSet<Ucs4> > *t3; 0100 }; 0101 friend class GDFA; 0102 public: 0103 GNFA(const GNFA& o); 0104 ~GNFA(); 0105 GNFA& operator=(const GNFA& o); 0106 explicit GNFA(const std::vector<GNFA*>& init); 0107 /// Intersection 0108 GNFA& operator&=(const GNFA& o); 0109 /// Concatenation 0110 GNFA& operator<<=(const GNFA& o); 0111 /// Union 0112 GNFA& operator|=(const GNFA& o); 0113 /// Difference 0114 GNFA& operator^=(const GNFA& o); 0115 /// Kleene-star 0116 GNFA& operator*(); 0117 /// Complement 0118 GNFA& negate(); 0119 /// Whether it accepts the empty word 0120 bool acceptsEpsilon() const; 0121 /// Whether it represents the empty set 0122 bool isEmpty() const; 0123 /// Whether it contains arrivable loops 0124 bool isUnbounded() const; 0125 /// Length of the shortest accepted input (-1 iff isEmpty) 0126 int minLength() const; 0127 /// Length of the longest accepted input (-1 iff isEmpty, -2 iff isUnbounded) 0128 int maxLength() const; 0129 /// DFA 0130 GDFA dfa(); 0131 /// Minimize 0132 GNFA& minimize(); 0133 /// Debugging-information 0134 void inspect(); 0135 /// Nice output in .dot-format 0136 void dotOutput(QTextStream& o, const QString& name); 0137 0138 /// Accepts nothing 0139 static GNFA empty(); 0140 /// Accepts any single character 0141 static GNFA anyChar(); 0142 /// Accepts the given word 0143 static GNFA word(const QString& str); 0144 /// Accepts any of the chars in the string 0145 static GNFA collection(const QString& str); 0146 /// Accepts only the empty word 0147 static GNFA emptyWord(); 0148 /// Accepts anything 0149 static GNFA anything(); 0150 /// Accepts any character between begin and end (including begin, excluding end) 0151 static GNFA range(quint32 begin, quint32 end); 0152 /// Accepts a single codepoint (or nothing if it is not represantable with tha charset) 0153 static GNFA character(quint32 codepoint); 0154 private: 0155 GNFA(); // has to be constructed using a static member 0156 }; 0157 0158 } 0159 0160 #endif