File indexing completed on 2024-04-14 14:32:22

0001 //
0002 // C++ Implementation: cpattern
0003 //
0004 // Description: Pattern matching.
0005 //
0006 /*
0007 Copyright 2007-2011 Tomas Mecir <kmuddy@kmuddy.com>
0008 
0009 This program is free software; you can redistribute it and/or
0010 modify it under the terms of the GNU General Public License as
0011 published by the Free Software Foundation; either version 2 of 
0012 the License, or (at your option) any later version.
0013 
0014 This program is distributed in the hope that it will be useful,
0015 but WITHOUT ANY WARRANTY; without even the implied warranty of
0016 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
0017 GNU General Public License for more details.
0018 
0019 You should have received a copy of the GNU General Public License
0020 along with this program.  If not, see <http://www.gnu.org/licenses/>.
0021 */
0022 
0023 #include "cpattern.h"
0024 
0025 #include <qregexp.h>
0026 
0027 struct cPattern::Private {
0028   /** pattern */
0029   QString pattern;
0030   /** matching type */
0031   cPattern::PatternType type;
0032   /** case-sensitive comparison */
0033   Qt::CaseSensitivity cs;
0034   /** whole words only */
0035   bool wholewords;
0036 
0037   /** last text that has been successfully compared */
0038   QString lasttext;
0039   /** prefix/suffix parts of that text (before/after matched part) */
0040   QString prefix, suffix;
0041   /** position/length of matching part */
0042   int lastpos, lastlen;
0043   /** list of back-references (if doing regexp compare) */
0044   QStringList backreflist;
0045   /** positions of backreferences */
0046   int *backrefpos;
0047 
0048   /** creating regexp objects on each trigger matching is SLOW - pre-parse them */
0049   QRegExp regexp;
0050 };
0051 
0052 cPattern::cPattern (const QString &pattern, PatternType pt)
0053 {
0054   d = new Private;
0055 
0056   d->cs = Qt::CaseSensitive;
0057   d->wholewords = true;
0058   d->regexp.setCaseSensitivity (d->cs);
0059   d->backrefpos = nullptr;
0060   d->lastlen = d->lastpos = 0;
0061 
0062   d->pattern = pattern;
0063   d->type = pt;
0064   if (d->type == regexp)
0065     d->regexp.setPattern (pattern);
0066 }
0067 
0068 cPattern::~cPattern ()
0069 {
0070   if (d->backrefpos != nullptr)
0071     delete[] d->backrefpos;
0072   delete d;
0073 }
0074 
0075 void cPattern::setPattern (const QString &pattern)
0076 {
0077   d->pattern = pattern;
0078   if (d->type == regexp)
0079     d->regexp.setPattern (pattern);
0080 }
0081 
0082 QString cPattern::pattern () const
0083 {
0084   return d->pattern;
0085 }
0086 
0087 void cPattern::setMatching (PatternType pt) {
0088   d->type = pt;
0089   if (d->type == regexp)
0090     d->regexp.setPattern (d->pattern);
0091 }
0092 
0093 cPattern::PatternType cPattern::matching () const
0094 {
0095   return d->type;
0096 }
0097 
0098 void cPattern::setCaseSensitive (bool cs)
0099 {
0100   d->cs = cs ? Qt::CaseSensitive : Qt::CaseInsensitive;
0101   d->regexp.setCaseSensitivity (d->cs);
0102 }
0103 
0104 bool cPattern::caseSensitive () const
0105 {
0106   return d->cs;
0107 }
0108 
0109 void cPattern::setWholeWords (bool ww)
0110 {
0111   d->wholewords = ww;
0112 }
0113 
0114 bool cPattern::wholeWords () const
0115 {
0116   return d->wholewords;
0117 }
0118 
0119 bool cPattern::match (const QString &text, int matchingPos)
0120 {
0121   //do NOTHING if my d->pattern has zero length...
0122   if (d->pattern.length() == 0)
0123     return false;
0124 
0125   if (d->backrefpos != nullptr)
0126     delete[] d->backrefpos;
0127   d->backrefpos = nullptr;
0128 
0129   bool matched = false;
0130   int n;
0131   
0132   switch (d->type) {
0133     case exact:
0134       if (matchingPos != 0)
0135         matched = false;
0136       else if (d->cs)
0137         matched = (text == d->pattern);
0138       else
0139         matched = (text.toLower() == d->pattern.toLower());
0140       if (matched)
0141       {
0142         d->prefix = d->suffix = "";
0143         d->lastpos = 0;
0144         d->lastlen = d->pattern.length ();
0145       }
0146       break;
0147     case substring:
0148       n = text.indexOf (d->pattern, matchingPos, d->cs);
0149       matched = (n != -1);
0150       if (matched)
0151       {
0152         d->prefix = text.left (n);
0153         d->suffix = text.right (text.length() - (n + d->pattern.length()));
0154         d->lastpos = n;
0155         d->lastlen = d->pattern.length ();
0156       }
0157       break;
0158     case begin:
0159       if (matchingPos != 0)
0160         matched = false;
0161       else if (d->cs)
0162         matched = text.startsWith (d->pattern);
0163       else
0164         matched = text.toLower().startsWith (d->pattern.toLower());
0165       if (matched)
0166       {
0167         d->prefix = "";
0168         d->suffix = text.right (text.length() - d->pattern.length());
0169         d->lastpos = 0;
0170         d->lastlen = d->pattern.length ();
0171       }
0172       break;
0173     case end:
0174       if (matchingPos != 0)
0175         matched = false;
0176       else if (d->cs)
0177         matched = text.endsWith (d->pattern);
0178       else
0179         matched = text.toLower().endsWith (d->pattern.toLower());
0180       if (matched)
0181       {
0182         d->prefix = text.left (text.length() - d->pattern.length());
0183         d->suffix = "";
0184         d->lastpos = text.length() - d->pattern.length();
0185         d->lastlen = d->pattern.length ();
0186       }
0187       break;
0188     case regexp:
0189       //regexp's case-sensitivity is set in constructor and in function
0190       // setCaseSensitive
0191       n = d->regexp.indexIn (text, matchingPos);
0192       if (n != -1)    //MATCH!
0193       {
0194         matched = true;
0195         d->lastpos = n;
0196         d->lastlen = d->regexp.matchedLength();
0197         d->prefix = text.left (n);
0198         d->suffix = text.right (text.length() - (n + d->lastlen));
0199         d->backreflist.clear ();
0200         d->backreflist = d->regexp.capturedTexts ();
0201         //positions of back-references
0202         int npos = d->backreflist.count();
0203         d->backrefpos = new int[npos];
0204         for (int i = 0; i < npos; i++)
0205           d->backrefpos[i] = d->regexp.pos (i);
0206       }
0207       break;
0208   };
0209 
0210   // whole words only ?
0211   if (matched && d->wholewords) {
0212     int len = d->prefix.length();
0213     if ((len > 0) && (!d->prefix[len-1].isSpace ()))
0214       matched = false;
0215     len = d->suffix.length();
0216     if ((len > 0) && (!d->suffix[0].isSpace ()))
0217       matched = false;
0218   }
0219 
0220   // return the result...
0221   if (matched)
0222   {
0223     d->lasttext = text;
0224     return true;
0225   }
0226   else
0227     return false;
0228 }
0229 
0230 QString cPattern::getLastText () const
0231 {
0232   return d->lasttext;
0233 }
0234 
0235 QString cPattern::getPrefix () const
0236 {
0237   return d->prefix;
0238 }
0239 
0240 QString cPattern::getSuffix () const
0241 {
0242   return d->suffix;
0243 }
0244 
0245 int cPattern::getLastPos () const
0246 {
0247   return d->lastpos;
0248 }
0249 
0250 int cPattern::getLastLength () const
0251 {
0252   return d->lastlen;
0253 }
0254 
0255 QStringList cPattern::getBackRefList () const
0256 {
0257   return d->backreflist;
0258 }
0259 
0260 int cPattern::getBackRefPos (int which) const
0261 {
0262   if ((which < 0) || (which >= d->backreflist.count())) return -1;
0263   return d->backrefpos[which];
0264 }
0265 
0266 void cPattern::variablePosition (const QString &varname, int *start, int *len)
0267 {
0268   if ((!start) || (!len)) return;  // sanity check
0269 
0270   // by default, report that we found nothing
0271   *start = -1;
0272   *len = 0;
0273 
0274   bool ok;
0275   int number = varname.toInt (&ok);
0276   if (ok)   //it was a number
0277   {
0278     // must be a valid backref
0279     if (d->type != regexp) return;
0280     if (number >= (int) d->backreflist.count()) return;
0281     *start = getBackRefPos (number);
0282     *len = d->backreflist[number].length();
0283     return;
0284   }
0285   // prefixtrim and suffixtrim exist for compatibility with KMuddy <= 0.8
0286   // TODO: remove them sometimes after 1.0
0287   if ((varname == "prefix") || (varname == "prefixtrim")) {
0288     *start = 0;
0289     // locate first non-whitespace
0290     int length = d->prefix.length();
0291     while ((*start < length) && d->prefix[*start].isSpace())
0292       (*start)++;
0293     *len = d->prefix.trimmed().length();
0294   } else if ((varname == "suffix") || (varname == "suffixtrim")) {
0295     // locate first non-whitespace
0296     int end;
0297     for (end = d->suffix.length() - 1; end >= 0; --end)
0298       if (!d->suffix[end].isSpace()) break;
0299     *len = d->suffix.trimmed ().length();
0300     *start = end + 1 - *len;
0301   } else if (varname == "prefixfull") {
0302     *start = 0;
0303     *len = d->prefix.length();
0304   } else if (varname == "suffixfull") {
0305     *len = d->suffix.length();
0306     *start = d->lasttext.length() - *len;
0307   } else if (varname == "matched") {
0308     *start = d->lastpos;
0309     *len = d->lastlen;
0310   } else if (varname == "line") {
0311     *start = 0;
0312     *len = d->lasttext.length();
0313   }
0314 }
0315 
0316 QString cPattern::getVariable (const QString &varname, const QString &def) const
0317 {
0318   //look if we know the name; return the real string if we do...
0319   QString result;
0320   bool ok;
0321   int number = varname.toInt (&ok);
0322   if (ok)   //it was a number
0323   {
0324     if (d->type == regexp)
0325     {
0326       if (number >= (int) d->backreflist.count())
0327         result = QString();
0328       else
0329         result = d->backreflist[number];
0330     }
0331     else
0332       result = def;
0333   }
0334   else
0335   {
0336     // prefixtrim and suffixtrim exist for compatibility with KMuddy <= 0.8
0337     // TODO: remove them sometimes after 1.0
0338     if ((varname == "prefix") || (varname == "prefixtrim"))
0339       result = d->prefix.trimmed ();
0340     else if ((varname == "suffix") || (varname == "suffixtrim"))
0341       result = d->suffix.trimmed ();
0342     else if (varname == "prefixfull")
0343       result = d->prefix;
0344     else if (varname == "suffixfull")
0345       result = d->suffix;
0346     else if (varname == "matched")
0347       result = d->lasttext.mid (d->lastpos, d->lastlen);
0348     else if (varname == "line")
0349       result = d->lasttext;
0350     else
0351       result = def;
0352   }
0353   return result;
0354 }
0355 
0356 // Pseudo-variables could in theory be done using local variables in a command queue,
0357 // the problem is that aliases need them too, thus we cannot expand like that ...
0358 // Hence we need to have this thingie ...
0359 void cPattern::expandPseudoVariables (QString &string) const
0360 {
0361   QString newstring = "";
0362   int len = string.length ();
0363   bool invar = false;
0364   bool inpar = false; //variable name is in parentheses '(' and ')'
0365   QString varname;
0366   for (int i = 0; i < len; i++)
0367   {
0368     QChar ch = string[i];
0369 
0370     //support things like $$$a correctly (when mixing pseudo and normal vars)
0371     if (invar && (!inpar) && (ch == '$') && (varname.isEmpty()))
0372     {
0373       invar = false;
0374       newstring += ch;
0375     }
0376 
0377     if (!invar)
0378     {
0379       if (ch == '$')
0380       {
0381         invar = true;
0382         inpar = false;
0383         varname = "";
0384       }
0385       else
0386         newstring += ch;
0387     }
0388     else
0389     {
0390       if ((varname == "") && (ch == '(') && (!inpar))
0391         inpar = true;
0392       else
0393         if (!(ch.isLetterOrNumber ()))  //end of pseudo-variable name
0394       {
0395         invar = false;
0396 
0397         if (inpar)
0398         {
0399           inpar = false;
0400           if (ch == ')')
0401             newstring += getVariable (varname, "$(" + varname + ")");
0402           else
0403             newstring += "$(" + varname + ((ch == '$') ? QString() : QString(ch));
0404         }
0405         else
0406         {
0407           newstring += getVariable (varname, "$" + varname);
0408           if (ch != '$')
0409             newstring += ch;
0410         }
0411         if (ch == '$')  //new variable follows immediately
0412         {
0413           invar = true;
0414           varname = "";
0415         }
0416       }
0417       else
0418         varname += ch;
0419     }
0420   }
0421 
0422   if (invar) {    //if a variable ends the string
0423     if (inpar)
0424       newstring += "$(" + varname;  //no ending parenthesis - no variable
0425     else
0426       newstring += getVariable (varname, "$"+varname);
0427   }
0428 
0429   //apply changes!
0430   string = newstring;
0431 }
0432 
0433 QMap<QString, QVariant> cPattern::scriptVariables ()
0434 {
0435   QMap<QString, QVariant> res;
0436   res["prefix"] = getVariable("prefix");
0437   res["prefixfull"] = getVariable("prefixfull");
0438   res["suffix"] = getVariable("suffix");
0439   res["suffixfull"] = getVariable("suffixfull");
0440   res["matched"] = getVariable("matched");
0441   res["line"] = getVariable("line");
0442   if (d->type == regexp) res["matches"] = d->backreflist;
0443   return res;
0444 }
0445