File indexing completed on 2024-04-21 04:36:19

0001 /* This file is part of kdev-pg-qt
0002    Copyright (C) 2010 Jonathan Schmidt-Dominé <devel@the-user.org>
0003 
0004    This library is free software; you can redistribute it and/or
0005    modify it under the terms of the GNU Library General Public
0006    License as published by the Free Software Foundation; either
0007    version 2 of the License, or (at your option) any later version.
0008 
0009    This library is distributed in the hope that it will be useful,
0010    but WITHOUT ANY WARRANTY; without even the implied warranty of
0011    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
0012    Library General Public License for more details.
0013 
0014    You should have received a copy of the GNU Library General Public License
0015    along with this library; see the file COPYING.LIB.  If not, write to
0016    the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
0017    Boston, MA 02110-1301, USA.
0018 */
0019 
0020 #ifndef KDEV_PG_REGEXP
0021 #define KDEV_PG_REGEXP
0022 
0023 #include <kdev-pg-char-sets.h>
0024 
0025 #include <vector>
0026 #include <algorithm>
0027 #include <stack>
0028 #include <string>
0029 #include <cassert>
0030 
0031 
0032 #include <QString>
0033 
0034 namespace KDevPG
0035 {
0036 
0037 template<typename T> class DFA;
0038 template<typename T> class NFA;
0039 template<CharEncoding enc> class SeqCharSet;
0040 template<CharEncoding enc> class TableCharSet;
0041 
0042 // General
0043 
0044 class GNFA;
0045 
0046 enum AutomatonType { SAscii, SLatin1, SUtf8, SUcs2, SUtf16, SUcs4, TAscii, TLatin1, TUtf8, TUcs2, TUtf16/*, TUcs4*/ };
0047 
0048 /**
0049  * Deterministic finite automaton
0050  */
0051 class GDFA
0052 {
0053   union
0054   {
0055     DFA<SeqCharSet<Ascii> > *s0;
0056     DFA<SeqCharSet<Latin1> > *s1;
0057     DFA<SeqCharSet<Ucs2> > *s2;
0058     DFA<SeqCharSet<Ucs4> > *s3;
0059     DFA<TableCharSet<Ascii> > *t0;
0060     DFA<TableCharSet<Latin1> > *t1;
0061     DFA<TableCharSet<Ucs2> > *t2;
0062 //     DFA<TableCharSet<Ucs4> > *t3;
0063   };
0064   friend class GNFA;
0065 public:
0066   static AutomatonType type;
0067   /// Generation of the core state-machine
0068   void codegen(QTextStream& str);
0069   /// Minimization of the automaton
0070   GDFA& minimize();
0071   /// Code used for the detected tokens in the generated code
0072   void setActions(const vector<QString>& actions);
0073   GDFA(const GDFA& o);
0074   ~GDFA();
0075   GDFA& operator=(const GDFA& o);
0076   /// Debugging-information
0077   void inspect();
0078   /// Nice output in .dot-format
0079   void dotOutput(QTextStream& o, const QString& name);
0080   /// NFA
0081   GNFA nfa();
0082 private:
0083   /// Has to be generated using a GNFA
0084   GDFA();
0085 };
0086 
0087 /// Non-deterministic finite automaton
0088 class GNFA
0089 {
0090   union
0091   {
0092     NFA<SeqCharSet<Ascii> > *s0;
0093     NFA<SeqCharSet<Latin1> > *s1;
0094     NFA<SeqCharSet<Ucs2> > *s2;
0095     NFA<SeqCharSet<Ucs4> > *s3;
0096     NFA<TableCharSet<Ascii> > *t0;
0097     NFA<TableCharSet<Latin1> > *t1;
0098     NFA<TableCharSet<Ucs2> > *t2;
0099 //     NFA<TableCharSet<Ucs4> > *t3;
0100   };
0101   friend class GDFA;
0102 public:
0103   GNFA(const GNFA& o);
0104   ~GNFA();
0105   GNFA& operator=(const GNFA& o);
0106   explicit GNFA(const std::vector<GNFA*>& init);
0107   /// Intersection
0108   GNFA& operator&=(const GNFA& o);
0109   /// Concatenation
0110   GNFA& operator<<=(const GNFA& o);
0111   /// Union
0112   GNFA& operator|=(const GNFA& o);
0113   /// Difference
0114   GNFA& operator^=(const GNFA& o);
0115   /// Kleene-star
0116   GNFA& operator*();
0117   /// Complement
0118   GNFA& negate();
0119   /// Whether it accepts the empty word
0120   bool acceptsEpsilon() const;
0121   /// Whether it represents the empty set
0122   bool isEmpty() const;
0123   /// Whether it contains arrivable loops
0124   bool isUnbounded() const;
0125   /// Length of the shortest accepted input (-1 iff isEmpty)
0126   int minLength() const;
0127   /// Length of the longest accepted input (-1 iff isEmpty, -2 iff isUnbounded)
0128   int maxLength() const;
0129   /// DFA
0130   GDFA dfa();
0131   /// Minimize
0132   GNFA& minimize();
0133   /// Debugging-information
0134   void inspect();
0135   /// Nice output in .dot-format
0136   void dotOutput(QTextStream& o, const QString& name);
0137   
0138   /// Accepts nothing
0139   static GNFA empty();
0140   /// Accepts any single character
0141   static GNFA anyChar();
0142   /// Accepts the given word
0143   static GNFA word(const QString& str);
0144   /// Accepts any of the chars in the string
0145   static GNFA collection(const QString& str);
0146   /// Accepts only the empty word
0147   static GNFA emptyWord();
0148   /// Accepts anything
0149   static GNFA anything();
0150   /// Accepts any character between begin and end (including begin, excluding end)
0151   static GNFA range(quint32 begin, quint32 end);
0152   /// Accepts a single codepoint (or nothing if it is not represantable with tha charset)
0153   static GNFA character(quint32 codepoint);
0154 private:
0155   GNFA(); // has to be constructed using a static member
0156 };
0157 
0158 }
0159 
0160 #endif