kdevelop-pg-qt/kdev-pg/kdev-pg-regexp.cpp

0001 /* This file is part of kdev-pg-qt
0002    Copyright (C) 2010 Jonathan Schmidt-Dominé <devel@the-user.org>
0003
0004    This library is free software; you can redistribute it and/or
0005    modify it under the terms of the GNU Library General Public
0006    License as published by the Free Software Foundation; either
0007    version 2 of the License, or (at your option) any later version.
0008
0009    This library is distributed in the hope that it will be useful,
0010    but WITHOUT ANY WARRANTY; without even the implied warranty of
0011    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
0012    Library General Public License for more details.
0013
0014    You should have received a copy of the GNU Library General Public License
0015    along with this library; see the file COPYING.LIB.  If not, write to
0016    the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
0017    Boston, MA 02110-1301, USA.
0018 */
0019
0020 #include "kdev-pg-char-sets.h"
0021 #include "kdev-pg-regexp.h"
0022 #include "kdev-pg.h"
0023 #include "kdev-pg-regexp-helper.h"
0024 #include "kdev-pg-bit-array.h"
0025 #include <iostream>
0026 #include <queue>
0027 #include <stack>
0028 #include <QHash>
0029 #include <unordered_set>
0030 #include <unordered_map>
0031
0032 using namespace std;
0033
0034 // TODO: Lookahead operator
0035 // TODO: Line-End etc.
0036 // TODO: Deactivate/activate rules (have to keep the bitsets of possible final states)
0037
0038 #define q_Hash_to_tr1_hash(type) \
0039 namespace std                                               \
0040 {                                                           \
0041     template<> struct hash< type >                          \
0042     {                                                       \
0043       inline size_t operator()(const type &x) const         \
0044       {                                                     \
0045         return qHash(x);                                    \
0046       }                                                     \
0047     };                                                      \
0048 }
0049
0050 #if (QT_VERSION < QT_VERSION_CHECK(5, 14, 0))
0051 // Qt >= 5.14 already has std::hash for QBitArray
0052 q_Hash_to_tr1_hash(QBitArray)
0053 #endif
0054
0055 namespace KDevPG
0056 {
0057
0058 typedef BitArray UsedBitArray;
0059 typedef QUtf8ToUcs4Iterator Iterator;
0060 // typedef TableCharSet<Ascii> CharSet;
0061
0062 template<typename CharSet> class NFA;
0063
0064 inline QString codeForDot(QString str)
0065 {
0066   QString out = "";
0067   int pos = 0;
0068   forever
0069   {
0070     int npos = str.indexOf("\n\01!ASIgnore\"!!\n# ", pos);
0071     if(npos == -1)
0072     {
0073       out += str.midRef(pos);
0074       break;
0075     }
0076     out += str.midRef(pos, npos - pos);
0077     int nlpos = str.indexOf('\n', npos + 17);
0078     int codeendpos = str.indexOf("\n\01!AS/Ignore\"!!\n", nlpos);
0079     if(nlpos == -1 || codeendpos == -1)
0080     {
0081       out += "<junk>";
0082       break;
0083     }
0084     out += str.midRef(nlpos + 1, codeendpos - nlpos - 1);
0085     pos = codeendpos + 17;
0086   }
0087   return out.replace('\"', "\\\"").replace('\n', '\t').trimmed();
0088 }
0089
0090 /**
0091  * Deterministic finite automaton.
0092  */
0093 template<typename CS>
0094 class DFA
0095 {
0096 public:
0097     typedef CS CharSet;
0098 private:
0099     size_t nstates;
0100     vector<vector<pair<CharSet, size_t> > > rules;
0101     size_t numActions;
0102     vector<size_t> accept;
0103     vector<QString> actions;
0104     friend class NFA<CharSet>;
0105 public:
0106     /// Code used for the detected tokens in the generated code
0107     void setActions(const vector<QString>& _actions)
0108     {
0109       actions = _actions;
0110       numActions = actions.size() - 1;
0111     }
0112     /// Generate code for transitions
0113     void codegen(QTextStream& str)
0114     {
0115       str << "goto _state_0; // no warning about unused label\n";
0116       CharSetCondition<CharSet> csc;
0117       for(size_t i = 0; i != nstates; ++i)
0118       {
0119         str << "\n_state_" + QString::number(i) + ":\n";
0120         if(accept[i])
0121           str << "lpos = lxCURR_POS; lstate = " << accept[i] << ";\n";
0122         str << "lxNEXT_CHR(chr); ";
0123         csc(str, rules[i]);
0124       }
0125       str << "_end:\nplain() = lpos;\nswitch(lstate) {\n";
0126       for(size_t i = 0; i <= numActions; ++i)
0127       {
0128         str << "case " << QString::number(i) << ": ";
0129         if(i == 0)
0130           str << "goto _fail; // no warning about unused label\n_fail: ";
0131         str << "{" << actions[i] << "\nlxFINISH\n}\n";
0132       }
0133       str << "}\n";
0134     }
0135     /// .dot-output
0136     void dotOutput(QTextStream& out, const QString& name)
0137     {
0138       out << "digraph " << name << "{" << endl;
0139       for(size_t i = 0; i != nstates; ++i)
0140       {
0141         out << "s" << i << " [ label = \"" << i << "\"";
0142         if(i == 0)
0143           out << ", shape=rect, style=rounded";
0144         else
0145           out << ", shape=oval";
0146         if(accept[i] != 0)
0147           out << ", penwidth=4";
0148         out << " ];" << endl;
0149       }
0150       for(size_t i = 1; i <= numActions; ++i)
0151       {
0152         out << "f" << i << " [ label = \"" << codeForDot(actions[i]) << "\", shape=rect, penwidth=2 ];" << endl;
0153       }
0154       for(size_t i = 0; i != nstates; ++i)
0155       {
0156         for(auto j = rules[i].begin(); j != rules[i].end(); ++j)
0157         {
0158           out << "s" << i << " -> " << "s" << j->second << " [ label = \"" << j->first << "\" ];" << endl;
0159         }
0160         if(accept[i] != 0)
0161         {
0162           out << "s" << i << " -> " << "f" << accept[i] << ";" << endl;
0163         }
0164       }
0165       out << "}" << endl;
0166     }
0167     /// Debugging output
0168     void inspect()
0169     {
0170         cout << "#states = " << nstates << " accept = <";
0171         for(size_t i = 0; i != nstates; ++i)
0172             cout << ", " << accept[i];
0173         cout << "> rules = {";
0174         for(size_t i = 0; i != nstates; ++i)
0175         {
0176             cout << i << " = {";
0177             for(size_t j = 0; j != rules[i].size(); ++j)
0178                 cout << rules[i][j].first << " = " << rules[i][j].second << ", ";
0179             cout << "} ";
0180         }
0181         cout << "}" << endl;
0182     }
0183     /// Test if the string matches the regexp
0184     size_t accepts(const QString& str)
0185     {
0186         size_t state = 0;
0187         foreach(const QChar c, str)
0188         {
0189             foreach(const auto& r, rules[state])
0190             {
0191                 if(r.first.accepts(c.unicode())) // make it more portable!!
0192                 {
0193                     state = r.second;
0194                     goto success;
0195                 }
0196             }
0197             return false;
0198             success:;
0199         }
0200         return accept[state];
0201     }
0202     /// Can be used to test tokenization
0203     pair<Iterator, size_t> nextAction(Iterator iter)
0204     {
0205       Iterator npos = iter;
0206       size_t action = -1;
0207       size_t state = 0;
0208       while(iter.hasNext())
0209       {
0210         if(accept[state])
0211         {
0212           npos = iter;
0213           action = accept[state];
0214         }
0215         quint64 chr = iter.next(); // make it more portable!!
0216         foreach(const auto& r, rules[state])
0217         {
0218           if(r.first.accepts(chr))
0219           {
0220             state = r.second;
0221             goto success;
0222           }
0223         }
0224         return make_pair(npos, action);
0225         success:;
0226       }
0227       if(accept[state])
0228       {
0229         npos = iter;
0230         action = accept[state];
0231       }
0232       return make_pair(npos, action);
0233     }
0234     /// Remove each state x iff ¬active[x]
0235     DFA<CharSet>& filterStates(const UsedBitArray& active)
0236     {
0237         size_t curr = 0;
0238         vector<size_t> mapping(nstates);
0239         for(size_t i = 0; i != nstates; ++i)
0240         {
0241             if(active[i])
0242             {
0243                 rules[curr] = rules[i];
0244                 accept[curr] = accept[i];
0245                 mapping[i] = curr;
0246                 ++curr;
0247             }
0248         }
0249         rules.resize(curr);
0250         accept.resize(curr);
0251         nstates = curr;
0252         for(size_t i = 0; i != nstates; ++i)
0253         {
0254             auto orules = rules[i];
0255             rules[i].clear();
0256             for(auto j = orules.begin(); j != orules.end(); ++j)
0257             {
0258                 if(active[j->second])
0259                   rules[i].push_back(make_pair(j->first, mapping[j->second]));
0260             }
0261         }
0262         return *this;
0263     }
0264     /// Eliminate each state unarrivable from the start
0265     DFA<CharSet>& eliminateUnarrivableStates()
0266     {
0267         UsedBitArray arrivable(nstates);
0268         arrivable[0] = true;
0269         stack<size_t> todo;
0270         todo.push(0);
0271         while(todo.size())
0272         {
0273             size_t curr = todo.top();
0274             todo.pop();
0275             foreach(const auto& i, rules[curr])
0276             {
0277                 if(!arrivable[i.second])
0278                 {
0279                     arrivable[i.second] = true;
0280                     todo.push(i.second);
0281                 }
0282             }
0283         }
0284         return filterStates(arrivable);
0285     }
0286     /// Eliminate each state iff no final state is  arivable from it
0287     DFA<CharSet>& eliminateInactiveStates()
0288     {
0289       UsedBitArray active(nstates);
0290       for(size_t i = 0; i != nstates; ++i)
0291         if(accept[i])
0292           active[i] = true;
0293       bool changed;
0294       do
0295       {
0296         changed = false;
0297         for(size_t i = 0; i != nstates; ++i)
0298         {
0299           if(!active[i])
0300           {
0301             for(size_t j = 0; j != rules[i].size(); ++j)
0302             {
0303               if(active[rules[i][j].second])
0304               {
0305                 active[i] = true;
0306                 changed = true;
0307                 break;
0308               }
0309             }
0310           }
0311         }
0312       } while(changed);
0313       return filterStates(active);
0314     }
0315     /// followers in same group for every input
0316     bool sameGroup(size_t ngroups, const vector<size_t>& ogroups, size_t x, size_t y)
0317     {
0318         vector<CharSet> xsets(ngroups), ysets(ngroups);
0319         foreach(const auto& i, rules[x])
0320             xsets[ogroups[i.second]].unite(i.first);
0321         foreach(const auto& i, rules[y])
0322             ysets[ogroups[i.second]].unite(i.first);
0323         return xsets == ysets;
0324     }
0325     DFA<CharSet>& minimize()
0326     {
0327         eliminateUnarrivableStates();
0328         eliminateInactiveStates();
0329         vector<vector<size_t> > grinv(numActions + 1);
0330         for(size_t i = 0; i != nstates; ++i)
0331             grinv[(accept[i] - accept[0] + grinv.size()) % grinv.size()].push_back(i);
0332         size_t ongroups;
0333         vector<size_t> groups(nstates);
0334         do
0335         {
0336             for(size_t i = 0; i != grinv.size(); ++i)
0337                 for(size_t j = 0; j != grinv[i].size(); ++j)
0338                     groups[grinv[i][j]] = i;
0339             ongroups = grinv.size();
0340             grinv.clear();
0341             for(size_t i = 0; i != nstates; ++i)
0342             {
0343                 for(size_t j = 0; j != grinv.size(); ++j)
0344                 {
0345                     if(groups[grinv[j][0]] == groups[i] && sameGroup(ongroups, groups, grinv[j][0], i))
0346                     {
0347                         grinv[j].push_back(i);
0348                         goto found;
0349                     }
0350                 }
0351                 grinv.resize(grinv.size()+1);
0352                 grinv.back().push_back(i);
0353                 found:;
0354             }
0355         }
0356         while(ongroups != grinv.size());
0357         vector<vector<pair<CharSet, size_t> > > _rules(grinv.size());
0358         vector<size_t> _accept(grinv.size());
0359         for(size_t i = 0; i != nstates; ++i)
0360         {
0361             _accept[groups[i]] = accept[i];
0362             foreach(const auto& j, rules[i])
0363                 _rules[groups[i]].push_back(make_pair(j.first, groups[j.second]));
0364         }
0365         rules = _rules;
0366         nstates = grinv.size();
0367         accept = _accept;
0368         for(size_t i = 0; i != nstates; ++i)
0369         {
0370             vector<CharSet> mapping(nstates, CharSet::emptySet());
0371             foreach(const auto& j, rules[i])
0372                 mapping[j.second].unite(j.first);
0373             rules[i].clear();
0374             for(size_t j = 0; j != nstates; ++j)
0375                 if(!mapping[j].empty())
0376                     rules[i].push_back(make_pair(mapping[j], j));
0377         }
0378         eliminateUnarrivableStates();
0379         eliminateInactiveStates();
0380         return *this;
0381     }
0382     NFA<CharSet> nfa() const;
0383 };
0384
0385 template<typename CS>
0386 class NFA
0387 {
0388 public:
0389     typedef CS CharSet;
0390 private:
0391     size_t nstates;
0392     vector<vector<pair<CharSet, size_t> > > rules;
0393     size_t accept;
0394     friend class DFA<CharSet>;
0395 public:
0396     void inspect()
0397     {
0398         cout << "#states = " << nstates << " accept = " << accept << " rules = {";
0399         for(size_t i = 0; i != nstates; ++i)
0400         {
0401             cout << i << " = {";
0402             for(size_t j = 0; j != rules[i].size(); ++j)
0403                 cout << rules[i][j].first << " = " << rules[i][j].second << ", ";
0404             cout << "} ";
0405         }
0406         cout << "}" << endl;
0407     }
0408     void dotOutput(QTextStream& out, const QString& name)
0409     {
0410       out << "digraph " << name << "{" << endl;
0411       for(size_t i = 0; i != nstates; ++i)
0412       {
0413         out << "s" << i << " [ label = \"" << i << "\"";
0414         if(i == 0)
0415           out << ", shape=rect, style=rounded";
0416         else
0417           out << ", shape=oval";
0418         if(i >= accept)
0419           out << ", penwidth=4";
0420         out << " ];" << endl;
0421       }
0422       for(size_t i = 0; i != nstates; ++i)
0423       {
0424         for(auto j = rules[i].begin(); j != rules[i].end(); ++j)
0425         {
0426           out << "s" << i << " -> " << "s" << j->second << " [ label = \"" << j->first << "\" ];" << endl;
0427         }
0428       }
0429       out << "}" << endl;
0430     }
0431     /**
0432      * Accepts no words.
0433      */
0434     NFA() : nstates(2), rules(2), accept(1)
0435     {}
0436     static NFA<CharSet> emptyWord()
0437     {
0438       NFA<CharSet> res;
0439       res.nstates = 1;
0440       res.rules.resize(1);
0441       res.accept = 0;
0442       return res;
0443     }
0444     NFA(const NFA<CharSet>& o) : nstates(o.nstates), rules(o.rules), accept(o.accept)
0445     {}
0446     NFA(const CharSet& set) : nstates(2), rules(2), accept(1)
0447     {
0448         rules[0].push_back(make_pair(set, 1));
0449     }
0450     explicit NFA(const std::vector<NFA<CharSet> >& list)
0451     {
0452       if(list.size() == 0)
0453       {
0454         nstates = 2;
0455         rules.resize(2);
0456         accept = 1;
0457       }
0458       else
0459       {
0460         accept = 1;
0461         for(size_t i = 0; i != list.size(); ++i)
0462           accept += list.begin()[i].nstates;
0463         nstates = accept + list.size();
0464         rules.resize(nstates);
0465         size_t start = 1;
0466         for(size_t i = 0; i != list.size(); ++i)
0467         {
0468           for(size_t j = start; j != start + list.begin()[i].nstates; ++j)
0469           {
0470               rules[j] = list.begin()[i].rules[j - start];
0471               for(size_t k = 0; k != rules[j].size(); ++k)
0472                   rules[j][k].second += start;
0473           }
0474           rules[0].push_back(make_pair(CharSet(), start));
0475           start += list.begin()[i].nstates;
0476           rules[start - 1].push_back(make_pair(CharSet(), nstates - list.size() + i));
0477         }
0478       }
0479     }
0480     void adjust(size_t startnum)
0481     {
0482         accept += startnum;
0483         nstates += startnum;
0484         rules.resize(nstates);
0485         for(size_t i = nstates; i != startnum; )
0486         {
0487             --i;
0488             rules[i] = rules[i - startnum];
0489             for(size_t j = 0; j != rules[i].size(); ++j)
0490                 rules[i][j].second += startnum;
0491         }
0492         for(size_t i = 0; i != startnum; ++i)
0493             rules[i].clear();
0494     }
0495     NFA<CharSet>& operator|=(const NFA<CharSet>& o)
0496     {
0497         adjust(1);
0498         rules.resize(o.nstates+nstates+2);
0499         for(size_t i = nstates; i != nstates + o.nstates; ++i)
0500         {
0501             rules[i] = o.rules[i - nstates];
0502             for(size_t j = 0; j != rules[i].size(); ++j)
0503                 rules[i][j].second += nstates;
0504         }
0505         rules[0].push_back(make_pair(CharSet(), 1));
0506         rules[0].push_back(make_pair(CharSet(), nstates));
0507         nstates += o.nstates;
0508         rules[accept].push_back(make_pair(CharSet(), nstates));
0509         rules[nstates-1].push_back(make_pair(CharSet(), nstates));
0510         accept = nstates;
0511         ++nstates;
0512         return *this;
0513     }
0514     NFA<CharSet>& operator<<=(const NFA<CharSet>& o)
0515     {
0516         rules.resize(nstates + o.nstates + 1);
0517         rules[accept].push_back(make_pair(CharSet(), nstates));
0518         accept = o.accept + nstates;
0519         for(size_t i = nstates; i != nstates+o.nstates; ++i)
0520         {
0521             rules[i] = o.rules[i-nstates];
0522             for(size_t j = 0; j != rules[i].size(); ++j)
0523                 rules[i][j].second += nstates;
0524         }
0525         nstates += o.nstates;
0526         return *this;
0527     }
0528     NFA<CharSet>& operator*()
0529     {
0530         rules[accept].push_back(make_pair(CharSet(), 0));
0531         rules[0].push_back(make_pair(CharSet(), nstates));
0532         accept = nstates;
0533         ++nstates;
0534         rules.resize(nstates);
0535         return *this;
0536     }
0537     UsedBitArray closure(const UsedBitArray& states)
0538     {
0539         UsedBitArray s(states);
0540         assert(s.size() == nstates);
0541         stack<size_t> todo;
0542         for(size_t i = 0; i != nstates; ++i)
0543             if(s[i])
0544                 todo.push(i);
0545         while(todo.size())
0546         {
0547             size_t x = todo.top();
0548             todo.pop();
0549             for(size_t i = 0; i != rules[x].size(); ++i)
0550             {
0551                 if(rules[x][i].first.epsilon() && !s[rules[x][i].second])
0552                 {
0553                     s[rules[x][i].second] = true;
0554                     todo.push(rules[x][i].second);
0555                 }
0556             }
0557         }
0558         return s;
0559     }
0560     UsedBitArray vecUnion(const UsedBitArray& a, const UsedBitArray& b)
0561     {
0562         UsedBitArray res(a);
0563         for(size_t i = 0; i != a.size(); ++i)
0564             res[i] = res[i] || b[i];
0565         return res;
0566     }
0567     vector< pair<CharSet, UsedBitArray > > follow(const UsedBitArray& states)
0568     {
0569         UsedBitArray s(states);
0570         vector<pair<CharSet, UsedBitArray > > pr(nstates);
0571         for(size_t i = 0; i != nstates; ++i)
0572         {
0573             if(s[i])
0574             {
0575                 foreach(const auto& r, rules[i])
0576                 {
0577                     UsedBitArray nxt(nstates);
0578                     nxt[r.second] = true;
0579                     pr.push_back(make_pair(r.first, nxt));
0580                 }
0581             }
0582         }
0583         bool b = true;
0584         while(b)
0585         {
0586             b = false;
0587             for(size_t i = 0; i != pr.size(); ++i)
0588             {
0589                 if(!pr[i].first.epsilon())
0590                 {
0591                     for(size_t j = 0; j != i; ++j)
0592                     {
0593                         if(!pr[j].first.epsilon())
0594                         {
0595                             CharSet inter = pr[i].first.intersection(pr[j].first);
0596                             if(!inter.empty())
0597                             {
0598                                 CharSet diff1 = pr[i].first.difference(inter);
0599                                 CharSet diff2 = pr[j].first.difference(inter);
0600                                 UsedBitArray v1 = pr[i].second, v2 = pr[j].second;
0601                                 pr[i].first = inter;
0602                                 pr[i].second = vecUnion(pr[i].second, pr[j].second);
0603                                 if(diff1.empty())
0604                                 {
0605                                     if(diff2.empty())
0606                                     {
0607                                         swap(pr[j], pr.back());
0608                                         pr.pop_back();
0609                                     }
0610                                     else
0611                                     {
0612                                         pr[j].first = diff2;
0613                                         pr[j].second = v2;
0614                                     }
0615                                 }
0616                                 else
0617                                 {
0618                                     pr[j].first = diff1;
0619                                     pr[j].second = v1;
0620                                     if(!diff2.empty())
0621                                         pr.push_back(make_pair(diff2, v2));
0622                                 }
0623                                 b = true;
0624                                 goto retry;
0625                             }
0626                         }
0627                     }
0628                 }
0629             }
0630             retry:;
0631         }
0632         return pr;
0633     }
0634     DFA<CharSet> dfa()
0635     {
0636         unordered_set<UsedBitArray> states;
0637         unordered_map<UsedBitArray, vector<pair<CharSet, UsedBitArray > > > rules;
0638         stack<UsedBitArray > todo;
0639         UsedBitArray start(nstates);
0640         start[0] = true;
0641         start = closure(start);
0642         todo.push(start);
0643         while(todo.size())
0644         {
0645             UsedBitArray x = todo.top();
0646             todo.pop();
0647             states.insert(x);
0648             rules[x];
0649             vector<pair<CharSet, UsedBitArray > > nxt = follow(x);
0650             foreach(const auto& nx, nxt)
0651             {
0652                 if(!nx.first.epsilon())
0653                 {
0654                     UsedBitArray n = closure(nx.second);
0655                     if(!states.count(n))
0656                         todo.push(n);
0657                     rules[x].push_back(make_pair(nx.first, n));
0658                 }
0659             }
0660         }
0661         DFA<CharSet> _;
0662         _.nstates = states.size();
0663         _.accept.resize(_.nstates);
0664         _.numActions = nstates - accept;
0665         _.actions.resize(_.numActions + 1);
0666         unordered_map<UsedBitArray, size_t> st;
0667         size_t cnt = 0;
0668         foreach(const UsedBitArray& i, states)
0669         {
0670             if(i == start && cnt)
0671             {
0672                 st[*states.begin()] = cnt;
0673                 st[i] = 0;
0674             }
0675             else
0676                 st[i] = cnt;
0677             ++cnt;
0678         }
0679         _.rules.resize(_.nstates);
0680         for(auto i = st.begin(); i != st.end(); ++i)
0681         {
0682           for(size_t j = accept; j != nstates; ++j)
0683           {
0684             if(i->first[j])
0685             {
0686               _.accept[i->second] = j - accept + 1;
0687               break;
0688             }
0689           }
0690         }
0691         for(auto i = rules.begin(); i != rules.end(); ++i)
0692         {
0693             size_t key = st[i->first];
0694             foreach(const auto& nx, i->second)
0695                 _.rules[key].push_back(make_pair(nx.first, st[nx.second]));
0696         }
0697         return _;
0698     }
0699     bool acceptsEpsilon() const
0700     {
0701       stack<size_t> todo;
0702       todo.push(0);
0703       UsedBitArray vis(nstates);
0704       vis[0] = true;
0705       while(!todo.empty())
0706       {
0707         size_t curr = todo.top();
0708         todo.pop();
0709         if(curr >= accept)
0710           return true;
0711         foreach(const auto& nx, rules[curr])
0712         {
0713           if(nx.first.epsilon() && !vis[nx.second])
0714           {
0715             vis[nx.second] = true;
0716             todo.push(nx.second);
0717           }
0718         }
0719       }
0720       return false;
0721     }
0722     bool isEmpty() const
0723     {
0724       stack<size_t> todo;
0725       todo.push(0);
0726       UsedBitArray vis(nstates);
0727       vis[0] = true;
0728       while(!todo.empty())
0729       {
0730         size_t curr = todo.top();
0731         todo.pop();
0732         if(curr >= accept)
0733           return false;
0734         foreach(const auto& nx, rules[curr])
0735         {
0736           if(!vis[nx.second])
0737           {
0738             vis[nx.second] =true;
0739             todo.push(nx.second);
0740           }
0741         }
0742       }
0743       return true;
0744     }
0745     int minLength() const
0746     {
0747       queue<size_t> _todo0, _todo1;
0748       queue<size_t> *todo0 = &_todo0, *todo1 = &_todo1;
0749       todo0->push(0);
0750       UsedBitArray vis(nstates);
0751       vis[0] = true;
0752       int length = 0;
0753       while(!todo0->empty())
0754       {
0755         size_t curr = todo0->front();
0756         todo0->pop();
0757         if(curr >= accept)
0758           return length;
0759         foreach(const auto& nx, rules[curr])
0760         {
0761           if(!vis[nx.second])
0762           {
0763             vis[nx.second] = true;
0764             if(nx.first.epsilon())
0765               todo0->push(nx.second);
0766             else
0767               todo1->push(nx.second);
0768           }
0769         }
0770         if(todo0->empty())
0771         {
0772           ++length;
0773           swap(todo0, todo1);
0774         }
0775       }
0776       return -1;
0777     }
0778 private:
0779     bool unboundedCheck(size_t x, UsedBitArray& vis) const
0780     {
0781       vis[x] = true;
0782       foreach(const auto& nx, rules[x])
0783       {
0784         if(vis[nx.second])
0785           return true;
0786         else
0787           if(unboundedCheck(nx.second, vis))
0788             return true;
0789       }
0790       vis[x] = false;
0791       return false;
0792     }
0793 public:
0794     bool isUnbounded() const
0795     {
0796       UsedBitArray vis(nstates);
0797       return unboundedCheck(0, vis);
0798     }
0799     int maxLength() const
0800     {
0801       // assumes that there are no circle unarrivable from start
0802       if(isUnbounded())
0803         return -2;
0804       if(isEmpty())
0805         return -1;
0806       queue<size_t> _todo0, _todo1;
0807       queue<size_t> *todo0 = &_todo0, *todo1 = &_todo1;
0808       vector<int> dist(nstates, -1);
0809       vector<vector<pair<CharSet, size_t> > > brules(nstates);
0810       for(size_t i = 0; i != nstates; ++i)
0811       {
0812         foreach(const auto& nx, rules[i])
0813         {
0814           brules[nx.second].push_back(nx);
0815           brules[nx.second].back().second = i;
0816         }
0817       }
0818       for(size_t i = accept; i != nstates; ++i)
0819       {
0820         todo0->push(i);
0821         dist[i] = 0;
0822       }
0823       int length = 0;
0824       while(!todo0->empty())
0825       {
0826         size_t curr = todo0->front();
0827         todo0->pop();
0828         foreach(const auto& nx, brules[curr])
0829         {
0830           if(nx.first.epsilon())
0831           {
0832             if(dist[nx.second] < length)
0833             {
0834               dist[nx.second] = length;
0835               todo0->push(nx.second);
0836             }
0837           }
0838           else
0839           {
0840             if(dist[nx.second] <= length)
0841             {
0842               dist[nx.second] = length + 1;
0843               todo1->push(nx.second);
0844             }
0845           }
0846         }
0847         if(todo0->empty())
0848         {
0849           ++length;
0850           swap(todo0, todo1);
0851         }
0852       }
0853       return dist[0];
0854     }
0855     NFA<CharSet>& negate()
0856     {
0857       DFA<CharSet> tmp = dfa();
0858       tmp.rules.push_back(vector< pair<CharSet, size_t> >());
0859       tmp.rules.back().push_back(make_pair(CharSet::fullSet(), tmp.nstates));
0860       ++tmp.nstates;
0861       for(auto i = tmp.accept.begin(); i != tmp.accept.end(); ++i)
0862       {
0863         if(*i == 0)
0864           *i = 1;
0865         else
0866           *i = 0;
0867       }
0868       tmp.accept.push_back(1);
0869       for(auto i = tmp.rules.begin(); i != tmp.rules.end(); ++i)
0870       {
0871         CharSet successSet = CharSet::fullSet();
0872         for(auto j = i->begin(); j != i->end(); ++j)
0873         {
0874           CharSet tmp = j->first;
0875           tmp.negate();
0876           successSet.intersect(tmp);
0877         }
0878         if(!successSet.empty())
0879           i->push_back(make_pair(successSet, tmp.nstates - 1));
0880       }
0881       tmp.eliminateUnarrivableStates();
0882       tmp.eliminateInactiveStates();
0883       return *this = tmp.nfa();
0884     }
0885     NFA<CharSet>& operator&=(const NFA<CharSet>& o)
0886     {
0887       NFA<CharSet> _o = o;
0888       _o.negate();
0889       negate();
0890       *this |= _o;
0891       negate();
0892       return *this;
0893     }
0894     NFA<CharSet>& operator^=(const NFA<CharSet>& o)
0895     {
0896       negate();
0897       return (*this |= o).negate();
0898     }
0899 };
0900
0901 template<typename CharSet>
0902 NFA< CharSet > DFA<CharSet>::nfa() const
0903 {
0904   NFA<CharSet> res;
0905   if(nstates == 0)
0906     return res;
0907   res.nstates = nstates;
0908   res.rules = rules;
0909   res.accept = nstates;
0910   for(size_t i = 0; i != nstates; ++i)
0911   {
0912     if(nstates + accept[i] > res.nstates)
0913     {
0914       res.nstates = nstates + accept[i];
0915       res.rules.resize(res.nstates);
0916     }
0917     if(accept[i] != 0)
0918       res.rules[i].push_back(make_pair(CharSet(), nstates + accept[i] - 1));
0919   }
0920   if(res.nstates == nstates)
0921   {
0922     ++res.nstates;
0923     res.rules.push_back(vector<pair<CharSet, size_t> >());
0924   }
0925   return res;
0926 }
0927
0928 // notice: utf8/8bit and utf16/ucs2 do not require different NFAs and DFAs
0929 #define EACH_TYPE(macro) \
0930 switch(GDFA::type) \
0931 { \
0932   case SAscii: { typedef SeqCharSet<Ascii> T; macro(s0); } break; \
0933   case SLatin1: case SUtf8: { typedef SeqCharSet<Latin1> T; macro(s1); } break; \
0934   case SUcs2: case SUtf16: { typedef SeqCharSet<Ucs2> T; macro(s2); } break; \
0935   case SUcs4: { typedef SeqCharSet<Ucs4> T; macro(s3); } break; \
0936   case TAscii: { typedef TableCharSet<Ascii> T; macro(t0); } break; \
0937   case TLatin1: case TUtf8: { typedef TableCharSet<Latin1> T; macro(t1); } break; \
0938   case TUcs2: case TUtf16: { typedef TableCharSet<Ucs2> T; macro(t2); } break; \
0939   default: exit(-1); \
0940 }
0941
0942 void GDFA::codegen(QTextStream& str)
0943 {
0944 #define macro(x) x->codegen(str);
0945     EACH_TYPE(macro)
0946 #undef macro
0947 }
0948
0949 GDFA& GDFA::minimize()
0950 {
0951 #define macro(x) x->minimize();
0952   EACH_TYPE(macro)
0953 #undef macro
0954   return *this;
0955 }
0956
0957 void GDFA::setActions(const std::vector< QString >& actions)
0958 {
0959 #define macro(x) x->setActions(actions);
0960   EACH_TYPE(macro)
0961 #undef macro
0962 }
0963
0964 GDFA::GDFA()
0965 {
0966 #define macro(x) x = new DFA<T>;
0967     EACH_TYPE(macro)
0968 #undef macro
0969 }
0970
0971 GDFA::GDFA(const KDevPG::GDFA& o)
0972 {
0973 #define macro(x) x = new DFA<T>(*o.x);
0974     EACH_TYPE(macro)
0975 #undef macro
0976 }
0977
0978 GDFA::~GDFA()
0979 {
0980 #define macro(x) delete x;
0981     EACH_TYPE(macro)
0982 #undef macro
0983 }
0984
0985 GDFA& GDFA::operator=(const KDevPG::GDFA& o)
0986 {
0987 #define macro(x) *x = *o.x;
0988   EACH_TYPE(macro)
0989 #undef macro
0990   return *this;
0991 }
0992
0993 void GDFA::inspect()
0994 {
0995 #define DO_INSPECT(x) x->inspect();
0996   EACH_TYPE(DO_INSPECT)
0997 #undef DO_INSPECT
0998 }
0999
1000 void GDFA::dotOutput(QTextStream& o, const QString& name)
1001 {
1002 #define DO_DOT(x) x->dotOutput(o, name);
1003   EACH_TYPE(DO_DOT)
1004 #undef DO_DOT
1005 }
1006
1007 GNFA GDFA::nfa()
1008 {
1009   GNFA res;
1010 #define DO_NFA(x) res.x = new NFA<T>(x->nfa());
1011   EACH_TYPE(DO_NFA)
1012 #undef DO_NFA
1013   return res;
1014 }
1015
1016 GNFA::GNFA()
1017 {
1018   /* no initialization */
1019 }
1020
1021 GNFA::GNFA(const KDevPG::GNFA& o)
1022 {
1023 #define macro(x) x = new NFA<T>(*o.x);
1024   EACH_TYPE(macro)
1025 #undef macro
1026 }
1027
1028 GNFA::GNFA(const std::vector< GNFA* >& init)
1029 {
1030 #define macro(x) vector< NFA<T> > vec(init.size()); for(size_t i = 0; i != init.size(); ++i) vec[i] = *(init[i]->x); x = new NFA<T>(vec);
1031   EACH_TYPE(macro)
1032 #undef macro
1033 }
1034
1035 GNFA::~GNFA()
1036 {
1037 #define macro(x) delete x;
1038   EACH_TYPE(macro)
1039 #undef macro
1040 }
1041
1042 GNFA& GNFA::operator=(const KDevPG::GNFA& o)
1043 {
1044 #define macro(x) *x = *o.x;
1045   EACH_TYPE(macro)
1046 #undef macro
1047   return *this;
1048 }
1049
1050 bool GNFA::acceptsEpsilon() const
1051 {
1052 #define DO_AE(x) return x->acceptsEpsilon();
1053   EACH_TYPE(DO_AE)
1054 #undef DO_AE
1055 }
1056
1057 bool GNFA::isEmpty() const
1058 {
1059 #define DO_IE(x) return x->isEmpty();
1060   EACH_TYPE(DO_IE)
1061 #undef DO_IE
1062 }
1063
1064 bool GNFA::isUnbounded() const
1065 {
1066 #define DO_IU(x) return x->isUnbounded();
1067   EACH_TYPE(DO_IU)
1068 #undef DO_IU
1069 }
1070
1071 int GNFA::minLength() const
1072 {
1073 #define DO_ML(x) return x->minLength();
1074   EACH_TYPE(DO_ML)
1075 #undef DO_ML
1076 }
1077
1078 int GNFA::maxLength() const
1079 {
1080   #define DO_ML(x) return x->maxLength();
1081   EACH_TYPE(DO_ML)
1082   #undef DO_ML
1083 }
1084
1085 GNFA& GNFA::operator<<=(const KDevPG::GNFA& o)
1086 {
1087 #define DO_SHIFT(x) *x <<= *o.x;
1088   EACH_TYPE(DO_SHIFT)
1089 #undef DO_SHIFT
1090   return *this;
1091 }
1092
1093 GNFA& GNFA::operator|=(const KDevPG::GNFA& o)
1094 {
1095 #define DO_OR(x) *x |= *o.x;
1096   EACH_TYPE(DO_OR)
1097 #undef DO_OR
1098   return *this;
1099 }
1100
1101 GNFA& GNFA::operator&=(const KDevPG::GNFA& o)
1102 {
1103 #define DO_AND(x) *x &= *o.x;
1104   EACH_TYPE(DO_AND)
1105 #undef DO_AND
1106   return *this;
1107 }
1108
1109 GNFA& GNFA::operator^=(const KDevPG::GNFA& o)
1110 {
1111   #define DO_BUT(x) *x ^= *o.x;
1112   EACH_TYPE(DO_BUT)
1113   #undef DO_BUT
1114   return *this;
1115 }
1116
1117 GNFA& GNFA::operator*()
1118 {
1119 #define DO_STAR(x) **x;
1120   EACH_TYPE(DO_STAR)
1121 #undef DO_STAR
1122   return *this;
1123 }
1124
1125 GNFA& GNFA::negate()
1126 {
1127 #define DO_NEGATE(x) x->negate();
1128   EACH_TYPE(DO_NEGATE)
1129 #undef DO_NEGATE
1130   return *this;
1131 }
1132
1133 GNFA& GNFA::minimize()
1134 {
1135   *this = dfa().minimize().nfa();
1136   return *this;
1137 }
1138
1139 void GNFA::inspect()
1140 {
1141 #define DO_INSPECT(x) x->inspect();
1142   EACH_TYPE(DO_INSPECT)
1143 #undef DO_INSPECT
1144 }
1145
1146 void GNFA::dotOutput(QTextStream& o, const QString& name)
1147 {
1148 #define DO_DOT(x) x->dotOutput(o, name);
1149   EACH_TYPE(DO_DOT)
1150 #undef DO_DOT
1151 }
1152
1153 GDFA GNFA::dfa()
1154 {
1155   GDFA res;
1156 #define macro(x) *res.x = x->dfa();
1157   EACH_TYPE(macro)
1158 #undef macro
1159   return res;
1160 }
1161
1162 GNFA GNFA::empty()
1163 {
1164   GNFA res;
1165 #define macro(x) res.x = new NFA<T>;
1166   EACH_TYPE(macro)
1167 #undef macro
1168   return res;
1169 }
1170
1171 GNFA GNFA::word(const QString& str)
1172 {
1173 #define macro(x) \
1174   GNFA res = GNFA::emptyWord(); \
1175   QByteArray qba(str.toUtf8()); \
1176   Codec2FromUtf8Iterator<T::codec>::Result iter(qba); \
1177   while(iter.hasNext()) \
1178   { \
1179     *res.x <<= T(iter.next()); \
1180   } \
1181   return res;
1182   EACH_TYPE(macro)
1183 #undef macro
1184 }
1185
1186 GNFA GNFA::collection(const QString& str)
1187 {
1188   GNFA res = GNFA::empty();
1189   QUtf16ToUcs4Iterator iter(str);
1190   while(iter.hasNext())
1191   {
1192     quint32 next = iter.next();
1193     res |= range(next, next+1);
1194   }
1195   return res;
1196 }
1197
1198 GNFA GNFA::anyChar()
1199 {
1200   GNFA res;
1201 #define macro(x) res.x = new NFA<T>(T::fullSet());
1202   EACH_TYPE(macro)
1203 #undef macro
1204   // remove the surrogate range
1205   if(/*GDFA::type == TUcs2 || */GDFA::type == TUtf16)
1206   {
1207     *res.t2 ^= NFA<TableCharSet<Ucs2> >(TableCharSet<Ucs2>::range(0xd800, 0xe000));
1208   }
1209   else if(/*GDFA::type == SUcs2 || */GDFA::type == SUtf16)
1210   {
1211     *res.s2 ^= NFA<SeqCharSet<Ucs2> >(SeqCharSet<Ucs2>::range(0xd800, 0xe000));
1212   }
1213   // add surrogate pairs
1214   if(GDFA::type == SUtf16)
1215   {
1216     NFA<SeqCharSet<Ucs2> > surrpairs(SeqCharSet<Ucs2>::range(0xd800, 0xdc00));
1217     surrpairs <<= NFA<SeqCharSet<Ucs2> >(SeqCharSet<Ucs2>::range(0xdc00, 0xe000));
1218   }
1219   if(GDFA::type == TUtf16)
1220   {
1221     NFA<TableCharSet<Ucs2> > surrpairs(TableCharSet<Ucs2>::range(0xd800, 0xdc00));
1222     surrpairs <<= NFA<TableCharSet<Ucs2> >(TableCharSet<Ucs2>::range(0xdc00, 0xe000));
1223   }
1224   // all valid utf-8 values
1225   if(GDFA::type == SUtf8)
1226   {
1227     NFA<SeqCharSet<Latin1> > lowerBytes(SeqCharSet<Latin1>::range(0x80, 0xc0));
1228     NFA<SeqCharSet<Latin1> > ascii(SeqCharSet<Latin1>::range(0, 0x7f));
1229     NFA<SeqCharSet<Latin1> > topOf2(SeqCharSet<Latin1>::range(0xc0, 0xe0));
1230     NFA<SeqCharSet<Latin1> > topOf3(SeqCharSet<Latin1>::range(0xe0, 0xf0));
1231     NFA<SeqCharSet<Latin1> > topOf4(SeqCharSet<Latin1>::range(0xf0, 0x100));
1232     *res.s1 = (ascii |= (topOf2 <<= lowerBytes) |= ((topOf3 <<= lowerBytes) <<= lowerBytes) |= (((topOf4 <<= lowerBytes) <<= lowerBytes) <<= lowerBytes));
1233   }
1234   if(GDFA::type == TUtf8)
1235   {
1236     NFA<TableCharSet<Latin1> > lowerBytes(TableCharSet<Latin1>::range(0x80, 0xc0));
1237     NFA<TableCharSet<Latin1> > ascii(TableCharSet<Latin1>::range(0, 0x7f));
1238     NFA<TableCharSet<Latin1> > topOf2(TableCharSet<Latin1>::range(0xc0, 0xe0));
1239     NFA<TableCharSet<Latin1> > topOf3(TableCharSet<Latin1>::range(0xe0, 0xf0));
1240     NFA<TableCharSet<Latin1> > topOf4(TableCharSet<Latin1>::range(0xf0, 0x100));
1241     *res.t1 = (ascii |= (topOf2 <<= lowerBytes) |= ((topOf3 <<= lowerBytes) <<= lowerBytes) |= (((topOf4 <<= lowerBytes) <<= lowerBytes) <<= lowerBytes));
1242   }
1243   return res;
1244 }
1245
1246 GNFA GNFA::anything()
1247 {
1248   GNFA any = anyChar();
1249   return *any;
1250 }
1251
1252 GNFA GNFA::emptyWord()
1253 {
1254   GNFA res;
1255 #define macro(x) res.x = new NFA<T>(NFA<T>::emptyWord());
1256   EACH_TYPE(macro)
1257 #undef macro
1258   return res;
1259 }
1260
1261 #include "generated-kdev-utf8-tuples.h"
1262
1263 template<template<CharEncoding> class CharSet>
1264 void addUtf8Range(NFA<CharSet<Latin1> >& res, quint32 begin, quint32 end)
1265 {
1266   typedef typename Codec2Int<Latin1>::Result Int;
1267   Int from0, from1, from2, from3, to0, to1, to2, to3;
1268   if(begin < 0x80)
1269   {
1270     if(end <= 0x80)
1271     {
1272       res |= getUtf8Tuples<CharSet>(Int(begin), Int(end));
1273       return;
1274     }
1275     res |= getUtf8Tuples<CharSet>(Int(begin), Int(0x80));
1276     begin = 0x80;
1277   }
1278   if(begin < 0x800)
1279   {
1280     from1 = (begin & 0x3f) + 0x80;
1281     from0 = (begin >> 6) + 0xc0;
1282     Int to0, to1;
1283     if(end <= 0x800)
1284     {
1285       to1 = (end & 0x3f) + 0x80;
1286       to0 = (end >> 6) + 0xc0;
1287     }
1288     else
1289     {
1290       to1 = 0x80 + (1<<6);
1291       to0 = 0xc0 + (1<<5);
1292     }
1293     res |= getUtf8Tuples<CharSet>(from0, to0, from1, to1);
1294     if(end <= 0x800)
1295       return;
1296     begin = 0x800;
1297   }
1298   if(begin < 0x10000)
1299   {
1300     from2 = (begin & 0x3f) + 0x80;
1301     from1 = ((begin >> 6) & 0x3f) + 0x80;
1302     from0 = (begin >> 12) + 0xe0;
1303     if(end <= 0x10000)
1304     {
1305       to2 = (end & 0x3f) + 0x80;
1306       to1 = ((end >> 6) & 0x3f) + 0x80;
1307       to0 = (end >> 12) + 0xe0;
1308     }
1309     else
1310     {
1311       to2 = to1 = 0x80 + (1<<6);
1312       to0 = 0xe0 + (1 << 4);
1313     }
1314     res |= getUtf8Tuples<CharSet>(from0, to0, from1, to1, from2, to2);
1315     if(end <= 0x10000)
1316       return;
1317     begin = 0x10000;
1318   }
1319   from3 = (begin & 0x3f) + 0x80;
1320   from2 = ((begin >> 6) & 0x3f) + 0x80;
1321   from1 = ((begin >> 12) & 0x3f) + 0x80;
1322   from0 = (begin >> 18) + 0xf0;
1323   to3 = (end & 0x3f) + 0x80;
1324   to2 = ((end >> 6) & 0x3f) + 0x80;
1325   to1 = ((end >> 12) & 0x3f) + 0x80;
1326   to0 = (end >> 18) + 0xf0;
1327   res |= getUtf8Tuples<CharSet>(from0, to0, from1, to1, from2, to2, from3, to3);
1328 }
1329
1330 GNFA GNFA::range(quint32 begin, quint32 end)
1331 {
1332   // Utf32: take it
1333   // Latin1, Ascii, Ucs2: cut it
1334   // Utf8, Utf32: complicate
1335   GNFA res;
1336   switch(GDFA::type)
1337   {
1338     case SAscii: { res.s0 = new NFA<SeqCharSet<Ascii> >(begin >= 0x80 ? SeqCharSet<Ascii>::emptySet() : SeqCharSet<Ascii>::range(begin, min((quint32)0x80, end))); } break;
1339     case SLatin1: { res.s1 = new NFA<SeqCharSet<Latin1> >(begin >= 0x100 ? SeqCharSet<Latin1>::emptySet() : SeqCharSet<Latin1>::range(begin, min((quint32)0x100, end))); } break;
1340     case SUtf8: {
1341       res.s1 = new NFA<SeqCharSet<Latin1> >();
1342       addUtf8Range(*res.s1, begin, end);
1343     } break;
1344     case SUcs2: {
1345       res.s2 = new NFA<SeqCharSet<Ucs2> >(begin >= 0x10000 ? SeqCharSet<Ucs2>::emptySet() : SeqCharSet<Ucs2>::range(begin, min((quint32)0x10000, end))/*.difference(SeqCharSet<Ucs2>::range(0xd800, 0xe000))*/);
1346     } break;
1347     case SUtf16: {
1348 #define UTF16_IMPL(CS, field) \
1349       if(end >= 0x110000 || begin >= 0x110000)                                          \
1350       {                                                                                 \
1351         res.field = new NFA<CS<Ucs2> >();                                               \
1352       }                                                                                 \
1353       else if(end < 0x10000)                                                            \
1354       {                                                                                 \
1355         if(begin <= 0xd800)                                                             \
1356         {                                                                               \
1357           if(end <= begin)                                                              \
1358           {                                                                             \
1359             res.field = new NFA<CS<Ucs2> >();                                           \
1360           }                                                                             \
1361           else if(end > 0xe000)                                                         \
1362           {                                                                             \
1363             res.field = new NFA<CS<Ucs2> >(CS<Ucs2>::range(begin, 0xd800).getUnion(CS<Ucs2>::range(0xe000, end))); \
1364           }                                                                             \
1365           else                                                                          \
1366           {                                                                             \
1367             res.field = new NFA<CS<Ucs2> >(CS<Ucs2>::range(begin, min(end, (quint32)0xd800))); \
1368           }\
1369         }\
1370         else\
1371         {\
1372           auto mbegin = max(begin, (quint32)0xe000);\
1373           if(end <= mbegin)\
1374           {\
1375             res.field = new NFA<CS<Ucs2> >();\
1376           }\
1377           else\
1378           {\
1379             res.field = new NFA<CS<Ucs2> >(CS<Ucs2>::range(mbegin, end));\
1380           }\
1381         }\
1382       }\
1383       else if(begin >= 0x10000)\
1384       {\
1385         if(end <= begin)\
1386         {\
1387           res.field = new NFA<CS<Ucs2> >();\
1388         }\
1389         else\
1390         {\
1391           quint32 startHighSurrogate = (((begin - 0x10000) & 0xffc00) >> 10) + 0xd800;\
1392           quint32 startLowSurrogate = ((begin - 0x10000) & 0x3ff) + 0xdc00;\
1393           quint32 endHighSurrogate = (((end - 0x10000) & 0xffc00) >> 10) + 0xd800;\
1394           quint32 endLowSurrogate = ((end - 0x10000) & 0x3ff) + 0xdc00;\
1395           if(endLowSurrogate == 0xdc00)\
1396           {\
1397             if(startLowSurrogate == 0xdc00)\
1398             {\
1399               res.field = new NFA<CS<Ucs2> >(CS<Ucs2>::range(startHighSurrogate, endHighSurrogate));\
1400               *res.field <<= NFA<CS<Ucs2> >(CS<Ucs2>::range(0xdc00, 0xe000));\
1401             }\
1402             else\
1403             {\
1404               auto firstPairs = NFA<CS<Ucs2> >(CS<Ucs2>::range(startHighSurrogate, startHighSurrogate + 1));\
1405               firstPairs <<= NFA<CS<Ucs2> >(CS<Ucs2>::range(startLowSurrogate, 0xe000));\
1406               res.field = new NFA<CS<Ucs2> >(firstPairs);\
1407               if(startHighSurrogate < endHighSurrogate)\
1408               {\
1409                 auto surrPairs = NFA<CS<Ucs2> >(CS<Ucs2>::range(startHighSurrogate + 1, endHighSurrogate + 1));\
1410                 surrPairs <<= NFA<CS<Ucs2> >(CS<Ucs2>::range(0xdc00, 0xe000));\
1411                 *res.field |= surrPairs;\
1412               }\
1413             }\
1414           }\
1415           else\
1416           {\
1417             if(startLowSurrogate == 0xdc00)\
1418             {\
1419               res.field = new NFA<CS<Ucs2> >(CS<Ucs2>::range(startHighSurrogate, endHighSurrogate));\
1420             }\
1421             else\
1422             {\
1423               res.field = new NFA<CS<Ucs2> >(CS<Ucs2>::range(startHighSurrogate, startHighSurrogate + 1));\
1424               *res.field <<= NFA<CS<Ucs2> >(CS<Ucs2>::range(startLowSurrogate, (startHighSurrogate == endHighSurrogate ? endLowSurrogate : 0xe000)));\
1425               if(startHighSurrogate != endHighSurrogate)\
1426               {\
1427                 if(startHighSurrogate + 1 != endHighSurrogate)\
1428                 {\
1429                   auto midPairs = NFA<CS<Ucs2> >(CS<Ucs2>::range(startHighSurrogate + 1, endHighSurrogate));\
1430                   midPairs <<= NFA<CS<Ucs2> >(CS<Ucs2>::range(startLowSurrogate, 0xe000));\
1431                   *res.field |= midPairs;\
1432                 }\
1433                 auto lastPairs = NFA<CS<Ucs2> >(CS<Ucs2>::range(endHighSurrogate, endHighSurrogate + 1));\
1434                 lastPairs <<= NFA<CS<Ucs2> >(CS<Ucs2>::range(startLowSurrogate, endLowSurrogate));\
1435                 *res.field |= lastPairs;\
1436               }\
1437             }\
1438           }\
1439         }\
1440       }\
1441       else\
1442       {\
1443         if(end <= begin)\
1444         {\
1445           res.field = new NFA<CS<Ucs2> >();\
1446         }\
1447         else\
1448         {\
1449           if(begin < 0xd800)\
1450           {\
1451             res.field = new NFA<CS<Ucs2> >(CS<Ucs2>::range(begin, 0xd800).getUnion(CS<Ucs2>::range(0xe000, 0x10000)));\
1452           }\
1453           else\
1454           {\
1455             res.field = new NFA<CS<Ucs2> >(CS<Ucs2>::range(max(begin, 0xe000u), 0x10000));\
1456           }\
1457           quint32 startHighSurrogate = 0xd800;\
1458           quint32 startLowSurrogate = 0xdc00;\
1459           quint32 endHighSurrogate = (((end - 0x10000) & 0xffc00) >> 10) + 0xd800;\
1460           quint32 endLowSurrogate = ((end - 0x10000) & 0x3ff) + 0xdc00;\
1461           if(endLowSurrogate == 0xdc00)\
1462           {\
1463             auto surrPairs = NFA<CS<Ucs2> >(CS<Ucs2>::range(startHighSurrogate, endHighSurrogate));\
1464             surrPairs <<= NFA<CS<Ucs2> >(CS<Ucs2>::range(startLowSurrogate, 0xe000));\
1465             *res.field |= surrPairs;\
1466           }\
1467           else\
1468           {\
1469             if(endHighSurrogate > startHighSurrogate)\
1470             {\
1471               auto midPairs = NFA<CS<Ucs2> >(CS<Ucs2>::range(startHighSurrogate, endHighSurrogate));\
1472               midPairs <<= NFA<CS<Ucs2> >(CS<Ucs2>::range(startLowSurrogate, 0xe000));\
1473               *res.field |= midPairs;\
1474             }\
1475             auto lastPairs = NFA<CS<Ucs2> >(CS<Ucs2>::range(endHighSurrogate, endHighSurrogate + 1));\
1476             lastPairs <<= NFA<CS<Ucs2> >(CS<Ucs2>::range(startLowSurrogate, endLowSurrogate));\
1477             *res.field |= lastPairs;\
1478           }\
1479         }\
1480       }
1481       UTF16_IMPL(SeqCharSet, s2)
1482     } break;
1483     case SUcs4: { res.s3 = new NFA<SeqCharSet<Ucs4> >(SeqCharSet<Ucs4>::range(begin, end)); } break;
1484     case TAscii: { res.t0 = new NFA<TableCharSet<Ascii> >(begin >= 0x80 ? TableCharSet<Ascii>::emptySet() : TableCharSet<Ascii>::range(begin, min((quint32)0x80, end))); } break;
1485     case TLatin1: { res.t1 = new NFA<TableCharSet<Latin1> >(begin >= 0x100 ? TableCharSet<Latin1>::emptySet() : TableCharSet<Latin1>::range(begin, min((quint32)0x100, end))); } break;
1486     case TUtf8: {
1487       res.t1 = new NFA<TableCharSet<Latin1> >();
1488       addUtf8Range(*res.t1, begin, end);
1489     } break;
1490     case TUcs2: {
1491       res.t2 = new NFA<TableCharSet<Ucs2> >(begin >= 0x10000 ? TableCharSet<Ucs2>::emptySet() : TableCharSet<Ucs2>::range(begin, min((quint32)0x10000, end))/*.difference(TableCharSet<Ucs2>::range(0xd800, 0xe000))*/);
1492     }
1493     case TUtf16: {
1494       UTF16_IMPL(TableCharSet, t2)
1495       #undef UTF16_IMPL
1496     } break;
1497     default: exit(-1);
1498   }
1499   // TODO: warning if empty
1500   return res;
1501 }
1502
1503 GNFA GNFA::character(quint32 codepoint)
1504 {
1505   return range(codepoint, codepoint+1);
1506 }
1507
1508 void deleteNFA(GNFA *ptr)
1509 {
1510   delete ptr;
1511 }
1512
1513 void deleteDFA(GDFA *ptr)
1514 {
1515   delete ptr;
1516 }
1517
1518 AutomatonType GDFA::type = SUcs2;
1519
1520 template<CharEncoding enc>
1521 const typename TableCharSet<enc>::BitArray TableCharSet<enc>::nullData = bitset<TableCharSet<enc>::nChars>();
1522
1523 }