File indexing completed on 2024-04-21 03:57:39

0001 /*
0002     SPDX-FileCopyrightText: 2010 Bernhard Beschow <bbeschow@cs.tu-berlin.de>
0003     SPDX-FileCopyrightText: 2007 Sebastian Pipping <webmaster@hartwork.org>
0004 
0005     SPDX-License-Identifier: LGPL-2.0-or-later
0006 */
0007 
0008 // BEGIN includes
0009 #include "kateregexpsearch.h"
0010 
0011 #include <ktexteditor/document.h>
0012 // END  includes
0013 
0014 // Turn debug messages on/off here
0015 // #define FAST_DEBUG_ENABLE
0016 
0017 #ifdef FAST_DEBUG_ENABLE
0018 #define FAST_DEBUG(x) qCDebug(LOG_KTE) << x
0019 #else
0020 #define FAST_DEBUG(x)
0021 #endif
0022 
0023 class KateRegExpSearch::ReplacementStream
0024 {
0025 public:
0026     struct counter {
0027         counter(int value, int minWidth)
0028             : value(value)
0029             , minWidth(minWidth)
0030         {
0031         }
0032 
0033         const int value;
0034         const int minWidth;
0035     };
0036 
0037     struct cap {
0038         cap(int n)
0039             : n(n)
0040         {
0041         }
0042 
0043         const int n;
0044     };
0045 
0046     enum CaseConversion {
0047         upperCase, ///< \U ... uppercase from now on
0048         upperCaseFirst, ///< \u ... uppercase the first letter
0049         lowerCase, ///< \L ... lowercase from now on
0050         lowerCaseFirst, ///< \l ... lowercase the first letter
0051         keepCase ///< \E ... back to original case
0052     };
0053 
0054 public:
0055     ReplacementStream(const QStringList &capturedTexts);
0056 
0057     QString str() const
0058     {
0059         return m_str;
0060     }
0061 
0062     ReplacementStream &operator<<(const QString &);
0063     ReplacementStream &operator<<(const counter &);
0064     ReplacementStream &operator<<(const cap &);
0065     ReplacementStream &operator<<(CaseConversion);
0066 
0067 private:
0068     const QStringList m_capturedTexts;
0069     CaseConversion m_caseConversion;
0070     QString m_str;
0071 };
0072 
0073 KateRegExpSearch::ReplacementStream::ReplacementStream(const QStringList &capturedTexts)
0074     : m_capturedTexts(capturedTexts)
0075     , m_caseConversion(keepCase)
0076 {
0077 }
0078 
0079 KateRegExpSearch::ReplacementStream &KateRegExpSearch::ReplacementStream::operator<<(const QString &str)
0080 {
0081     switch (m_caseConversion) {
0082     case upperCase:
0083         // Copy as uppercase
0084         m_str.append(str.toUpper());
0085         break;
0086 
0087     case upperCaseFirst:
0088         if (str.length() > 0) {
0089             m_str.append(str.at(0).toUpper());
0090             m_str.append(QStringView(str).mid(1));
0091             m_caseConversion = keepCase;
0092         }
0093         break;
0094 
0095     case lowerCase:
0096         // Copy as lowercase
0097         m_str.append(str.toLower());
0098         break;
0099 
0100     case lowerCaseFirst:
0101         if (str.length() > 0) {
0102             m_str.append(str.at(0).toLower());
0103             m_str.append(QStringView(str).mid(1));
0104             m_caseConversion = keepCase;
0105         }
0106         break;
0107 
0108     case keepCase: // FALLTHROUGH
0109     default:
0110         // Copy unmodified
0111         m_str.append(str);
0112         break;
0113     }
0114 
0115     return *this;
0116 }
0117 
0118 KateRegExpSearch::ReplacementStream &KateRegExpSearch::ReplacementStream::operator<<(const counter &c)
0119 {
0120     // Zero padded counter value
0121     m_str.append(QStringLiteral("%1").arg(c.value, c.minWidth, 10, QLatin1Char('0')));
0122 
0123     return *this;
0124 }
0125 
0126 KateRegExpSearch::ReplacementStream &KateRegExpSearch::ReplacementStream::operator<<(const cap &cap)
0127 {
0128     if (0 <= cap.n && cap.n < m_capturedTexts.size()) {
0129         (*this) << m_capturedTexts[cap.n];
0130     } else {
0131         // Insert just the number to be consistent with QRegExp ("\c" becomes "c")
0132         m_str.append(QString::number(cap.n));
0133     }
0134 
0135     return *this;
0136 }
0137 
0138 KateRegExpSearch::ReplacementStream &KateRegExpSearch::ReplacementStream::operator<<(CaseConversion caseConversion)
0139 {
0140     m_caseConversion = caseConversion;
0141 
0142     return *this;
0143 }
0144 
0145 // BEGIN d'tor, c'tor
0146 //
0147 // KateSearch Constructor
0148 //
0149 KateRegExpSearch::KateRegExpSearch(const KTextEditor::Document *document)
0150     : m_document(document)
0151 {
0152 }
0153 
0154 // helper structs for captures re-construction
0155 struct TwoViewCursor {
0156     int index;
0157     int line;
0158     int col;
0159 };
0160 
0161 struct IndexPair {
0162     int openIndex;
0163     int closeIndex;
0164 };
0165 
0166 QList<KTextEditor::Range>
0167 KateRegExpSearch::search(const QString &pattern, KTextEditor::Range inputRange, bool backwards, QRegularExpression::PatternOptions options)
0168 {
0169     // Save regexes to avoid reconstructing regexes all the time
0170     static QRegularExpression preRegex;
0171     static QRegularExpression repairedRegex;
0172 
0173     // Returned if no matches are found
0174     QList<KTextEditor::Range> noResult(1, KTextEditor::Range::invalid());
0175 
0176     // Note that some methods in vimode (e.g. Searcher::findPatternWorker) rely on the
0177     // this method returning here if 'pattern' is empty.
0178     if (pattern.isEmpty() || inputRange.isEmpty() || !inputRange.isValid()) {
0179         return noResult;
0180     }
0181 
0182     // Always enable Unicode support
0183     options |= QRegularExpression::UseUnicodePropertiesOption;
0184 
0185     if (preRegex.pattern() != pattern || preRegex.patternOptions() != options) {
0186         preRegex = QRegularExpression(pattern, options);
0187     }
0188 
0189     // If repairPattern() is called on an invalid regex pattern it may cause asserts
0190     // in QString (e.g. if the pattern is just '\\', pattern.size() is 1, and repaierPattern
0191     // expects at least one character after a '\')
0192     if (!preRegex.isValid()) {
0193         return noResult;
0194     }
0195 
0196     // detect pattern type (single- or mutli-line)
0197     bool stillMultiLine;
0198     const QString repairedPattern = repairPattern(pattern, stillMultiLine);
0199 
0200     // Enable multiline mode, so that the ^ and $ metacharacters in the pattern
0201     // are allowed to match, respectively, immediately after and immediately
0202     // before any newline in the subject string, as well as at the very beginning
0203     // and at the very end of the subject string (see QRegularExpression docs).
0204     //
0205     // Whole lines are passed to QRegularExpression, so that e.g. if the inputRange
0206     // ends in the middle of a line, then a '$' won't match at that position. And
0207     // matches that are out of the inputRange are rejected.
0208     if (stillMultiLine) {
0209         options |= QRegularExpression::MultilineOption;
0210     }
0211 
0212     // check if anything changed at all
0213     if (repairedRegex.pattern() != repairedPattern || repairedRegex.patternOptions() != options) {
0214         repairedRegex.setPattern(repairedPattern);
0215         repairedRegex.setPatternOptions(options);
0216     }
0217     if (!repairedRegex.isValid()) {
0218         return noResult;
0219     }
0220 
0221     const int rangeStartLine = inputRange.start().line();
0222     const int rangeStartCol = inputRange.start().column();
0223 
0224     const int rangeEndLine = inputRange.end().line();
0225     const int rangeEndCol = inputRange.end().column();
0226 
0227     if (stillMultiLine) {
0228         const int rangeLineCount = rangeEndLine - rangeStartLine + 1;
0229         FAST_DEBUG("regular expression search (lines " << rangeStartLine << ".." << rangeEndLine << ")");
0230 
0231         const int docLineCount = m_document->lines();
0232         // nothing to do...
0233         if (rangeStartLine >= docLineCount) {
0234             return noResult;
0235         }
0236 
0237         QList<int> lineLens(rangeLineCount);
0238         int maxMatchOffset = 0;
0239 
0240         // all lines in the input range
0241         QString wholeRange;
0242         for (int i = 0; i < rangeLineCount; ++i) {
0243             const int docLineIndex = rangeStartLine + i;
0244             if (docLineIndex < 0 || docLineCount <= docLineIndex) { // invalid index
0245                 return noResult;
0246             }
0247 
0248             const QString textLine = m_document->line(docLineIndex);
0249             lineLens[i] = textLine.length();
0250             wholeRange.append(textLine);
0251 
0252             // This check is needed as some parts in vimode rely on this behaviour.
0253             // We add an '\n' as a delimiter between lines in the range; but never after the
0254             // last line as that would add an '\n' that isn't there in the original text,
0255             // and can skew search results or hit an assert when accessing lineLens later
0256             // in the code.
0257             if (i != (rangeLineCount - 1)) {
0258                 wholeRange.append(QLatin1Char('\n'));
0259             }
0260 
0261             // lineLens.at(i) + 1, because '\n' was added
0262             maxMatchOffset += (i == rangeEndLine) ? rangeEndCol : lineLens.at(i) + 1;
0263 
0264             FAST_DEBUG("  line" << i << "has length" << lineLens.at(i));
0265         }
0266 
0267         FAST_DEBUG("Max. match offset" << maxMatchOffset);
0268 
0269         QRegularExpressionMatch match;
0270         bool found = false;
0271         QRegularExpressionMatchIterator iter = repairedRegex.globalMatch(wholeRange, rangeStartCol);
0272 
0273         if (backwards) {
0274             while (iter.hasNext()) {
0275                 QRegularExpressionMatch curMatch = iter.next();
0276                 if (curMatch.capturedEnd() <= maxMatchOffset) {
0277                     match.swap(curMatch);
0278                     found = true;
0279                 }
0280             }
0281         } else { /* forwards */
0282             QRegularExpressionMatch curMatch;
0283             if (iter.hasNext()) {
0284                 curMatch = iter.next();
0285             }
0286             if (curMatch.capturedEnd() <= maxMatchOffset) {
0287                 match.swap(curMatch);
0288                 found = true;
0289             }
0290         }
0291 
0292         if (!found) {
0293             // no match
0294             FAST_DEBUG("not found");
0295             return noResult;
0296         }
0297 
0298         // Capture groups: save opening and closing indices and build a map,
0299         // the correct values will be written into it later
0300         QMap<int, TwoViewCursor *> indicesToCursors;
0301         const int numCaptures = repairedRegex.captureCount();
0302         QList<IndexPair> indexPairs(numCaptures + 1);
0303         for (int c = 0; c <= numCaptures; ++c) {
0304             const int openIndex = match.capturedStart(c);
0305             IndexPair &pair = indexPairs[c];
0306             if (openIndex == -1) {
0307                 // An invalid index indicates an empty capture group
0308                 pair.openIndex = -1;
0309                 pair.closeIndex = -1;
0310                 FAST_DEBUG("capture []");
0311             } else {
0312                 const int closeIndex = match.capturedEnd(c);
0313                 pair.openIndex = openIndex;
0314                 pair.closeIndex = closeIndex;
0315                 FAST_DEBUG("capture [" << pair.openIndex << ".." << pair.closeIndex << "]");
0316 
0317                 // each key no more than once
0318                 if (!indicesToCursors.contains(openIndex)) {
0319                     TwoViewCursor *twoViewCursor = new TwoViewCursor;
0320                     twoViewCursor->index = openIndex;
0321                     indicesToCursors.insert(openIndex, twoViewCursor);
0322                     FAST_DEBUG("  capture group start index added: " << openIndex);
0323                 }
0324                 if (!indicesToCursors.contains(closeIndex)) {
0325                     TwoViewCursor *twoViewCursor = new TwoViewCursor;
0326                     twoViewCursor->index = closeIndex;
0327                     indicesToCursors.insert(closeIndex, twoViewCursor);
0328                     FAST_DEBUG("  capture group end index added: " << closeIndex);
0329                 }
0330             }
0331         }
0332 
0333         // find out where they belong
0334         int curRelLine = 0;
0335         int curRelCol = 0;
0336         int curRelIndex = 0;
0337 
0338         for (TwoViewCursor *twoViewCursor : std::as_const(indicesToCursors)) {
0339             // forward to index, save line/col
0340             const int index = twoViewCursor->index;
0341             FAST_DEBUG("resolving position" << index);
0342 
0343             while (curRelIndex <= index) {
0344                 FAST_DEBUG("walk pos (" << curRelLine << "," << curRelCol << ") = " << curRelIndex << "relative, steps more to go" << index - curRelIndex);
0345 
0346                 const int curRelLineLen = lineLens.at(curRelLine);
0347                 const int curLineRemainder = curRelLineLen - curRelCol;
0348                 const int lineFeedIndex = curRelIndex + curLineRemainder;
0349                 if (index <= lineFeedIndex) {
0350                     if (index == lineFeedIndex) {
0351                         // on this line _at_ line feed
0352                         FAST_DEBUG("  on line feed");
0353                         const int absLine = curRelLine + rangeStartLine;
0354                         twoViewCursor->line = absLine;
0355                         twoViewCursor->col = curRelLineLen;
0356 
0357                         // advance to next line
0358                         const int advance = (index - curRelIndex) + 1;
0359                         ++curRelLine;
0360                         curRelCol = 0;
0361                         curRelIndex += advance;
0362                     } else { // index < lineFeedIndex
0363                         // on this line _before_ line feed
0364                         FAST_DEBUG("  before line feed");
0365                         const int diff = (index - curRelIndex);
0366                         const int absLine = curRelLine + rangeStartLine;
0367                         const int absCol = curRelCol + diff;
0368                         twoViewCursor->line = absLine;
0369                         twoViewCursor->col = absCol;
0370 
0371                         // advance on same line
0372                         const int advance = diff + 1;
0373                         curRelCol += advance;
0374                         curRelIndex += advance;
0375                     }
0376                     FAST_DEBUG("position(" << twoViewCursor->line << "," << twoViewCursor->col << ")");
0377                 } else { // if (index > lineFeedIndex)
0378                     // not on this line
0379                     // advance to next line
0380                     FAST_DEBUG("  not on this line");
0381                     ++curRelLine;
0382                     curRelCol = 0;
0383                     const int advance = curLineRemainder + 1;
0384                     curRelIndex += advance;
0385                 }
0386             }
0387         }
0388 
0389         // build result array
0390         QList<KTextEditor::Range> result(numCaptures + 1, KTextEditor::Range::invalid());
0391         for (int y = 0; y <= numCaptures; y++) {
0392             IndexPair &pair = indexPairs[y];
0393             if (!(pair.openIndex == -1 || pair.closeIndex == -1)) {
0394                 const TwoViewCursor *const openCursors = indicesToCursors.value(pair.openIndex);
0395                 const TwoViewCursor *const closeCursors = indicesToCursors.value(pair.closeIndex);
0396                 const int startLine = openCursors->line;
0397                 const int startCol = openCursors->col;
0398                 const int endLine = closeCursors->line;
0399                 const int endCol = closeCursors->col;
0400                 FAST_DEBUG("range " << y << ": (" << startLine << ", " << startCol << ")..(" << endLine << ", " << endCol << ")");
0401                 result[y] = KTextEditor::Range(startLine, startCol, endLine, endCol);
0402             }
0403         }
0404 
0405         // free structs allocated for indicesToCursors
0406         qDeleteAll(indicesToCursors);
0407 
0408         return result;
0409     } else {
0410         // single-line regex search (forwards and backwards)
0411         const int rangeStartCol = inputRange.start().column();
0412         const uint rangeEndCol = inputRange.end().column();
0413 
0414         const int rangeStartLine = inputRange.start().line();
0415         const int rangeEndLine = inputRange.end().line();
0416 
0417         const int forInit = backwards ? rangeEndLine : rangeStartLine;
0418 
0419         const int forInc = backwards ? -1 : +1;
0420 
0421         FAST_DEBUG("single line " << (backwards ? rangeEndLine : rangeStartLine) << ".." << (backwards ? rangeStartLine : rangeEndLine));
0422 
0423         for (int j = forInit; (rangeStartLine <= j) && (j <= rangeEndLine); j += forInc) {
0424             if (j < 0 || m_document->lines() <= j) {
0425                 FAST_DEBUG("searchText | line " << j << ": no");
0426                 return noResult;
0427             }
0428 
0429             const QString textLine = m_document->line(j);
0430 
0431             const int offset = (j == rangeStartLine) ? rangeStartCol : 0;
0432             const int endLineMaxOffset = (j == rangeEndLine) ? rangeEndCol : textLine.length();
0433 
0434             bool found = false;
0435 
0436             QRegularExpressionMatch match;
0437 
0438             if (backwards) {
0439                 QRegularExpressionMatchIterator iter = repairedRegex.globalMatch(textLine, offset);
0440                 while (iter.hasNext()) {
0441                     QRegularExpressionMatch curMatch = iter.next();
0442                     if (curMatch.capturedEnd() <= endLineMaxOffset) {
0443                         match.swap(curMatch);
0444                         found = true;
0445                     }
0446                 }
0447             } else {
0448                 match = repairedRegex.match(textLine, offset);
0449                 if (match.hasMatch() && match.capturedEnd() <= endLineMaxOffset) {
0450                     found = true;
0451                 }
0452             }
0453 
0454             if (found) {
0455                 FAST_DEBUG("line " << j << ": yes");
0456 
0457                 // build result array
0458                 const int numCaptures = repairedRegex.captureCount();
0459                 QList<KTextEditor::Range> result(numCaptures + 1);
0460                 result[0] = KTextEditor::Range(j, match.capturedStart(), j, match.capturedEnd());
0461 
0462                 FAST_DEBUG("result range " << 0 << ": (" << j << ", " << match.capturedStart << ")..(" << j << ", " << match.capturedEnd() << ")");
0463 
0464                 for (int y = 1; y <= numCaptures; ++y) {
0465                     const int openIndex = match.capturedStart(y);
0466 
0467                     if (openIndex == -1) {
0468                         result[y] = KTextEditor::Range::invalid();
0469 
0470                         FAST_DEBUG("capture []");
0471                     } else {
0472                         const int closeIndex = match.capturedEnd(y);
0473 
0474                         FAST_DEBUG("result range " << y << ": (" << j << ", " << openIndex << ")..(" << j << ", " << closeIndex << ")");
0475 
0476                         result[y] = KTextEditor::Range(j, openIndex, j, closeIndex);
0477                     }
0478                 }
0479                 return result;
0480             } else {
0481                 FAST_DEBUG("searchText | line " << j << ": no");
0482             }
0483         }
0484     }
0485     return noResult;
0486 }
0487 
0488 /*static*/ QString KateRegExpSearch::escapePlaintext(const QString &text)
0489 {
0490     return buildReplacement(text, QStringList(), 0, false);
0491 }
0492 
0493 /*static*/ QString KateRegExpSearch::buildReplacement(const QString &text, const QStringList &capturedTexts, int replacementCounter)
0494 {
0495     return buildReplacement(text, capturedTexts, replacementCounter, true);
0496 }
0497 
0498 /*static*/ QString KateRegExpSearch::buildReplacement(const QString &text, const QStringList &capturedTexts, int replacementCounter, bool replacementGoodies)
0499 {
0500     // get input
0501     const int inputLen = text.length();
0502     int input = 0; // walker index
0503 
0504     // prepare output
0505     ReplacementStream out(capturedTexts);
0506 
0507     while (input < inputLen) {
0508         switch (text[input].unicode()) {
0509         case L'\n':
0510             out << text[input];
0511             input++;
0512             break;
0513 
0514         case L'\\':
0515             if (input + 1 >= inputLen) {
0516                 // copy backslash
0517                 out << text[input];
0518                 input++;
0519                 break;
0520             }
0521 
0522             switch (text[input + 1].unicode()) {
0523             case L'0': // "\0000".."\0377"
0524                 if (input + 4 >= inputLen) {
0525                     out << ReplacementStream::cap(0);
0526                     input += 2;
0527                 } else {
0528                     bool stripAndSkip = false;
0529                     const ushort text_2 = text[input + 2].unicode();
0530                     if ((text_2 >= L'0') && (text_2 <= L'3')) {
0531                         const ushort text_3 = text[input + 3].unicode();
0532                         if ((text_3 >= L'0') && (text_3 <= L'7')) {
0533                             const ushort text_4 = text[input + 4].unicode();
0534                             if ((text_4 >= L'0') && (text_4 <= L'7')) {
0535                                 int digits[3];
0536                                 for (int i = 0; i < 3; i++) {
0537                                     digits[i] = 7 - (L'7' - text[input + 2 + i].unicode());
0538                                 }
0539                                 const int ch = 64 * digits[0] + 8 * digits[1] + digits[2];
0540                                 out << QChar(ch);
0541                                 input += 5;
0542                             } else {
0543                                 stripAndSkip = true;
0544                             }
0545                         } else {
0546                             stripAndSkip = true;
0547                         }
0548                     } else {
0549                         stripAndSkip = true;
0550                     }
0551 
0552                     if (stripAndSkip) {
0553                         out << ReplacementStream::cap(0);
0554                         input += 2;
0555                     }
0556                 }
0557                 break;
0558 
0559             // single letter captures \x
0560             case L'1':
0561             case L'2':
0562             case L'3':
0563             case L'4':
0564             case L'5':
0565             case L'6':
0566             case L'7':
0567             case L'8':
0568             case L'9':
0569                 out << ReplacementStream::cap(9 - (L'9' - text[input + 1].unicode()));
0570                 input += 2;
0571                 break;
0572 
0573             // multi letter captures \{xxxx}
0574             case L'{': {
0575                 // allow {1212124}.... captures, see bug 365124 + testReplaceManyCapturesBug365124
0576                 int capture = 0;
0577                 int captureSize = 2;
0578                 while ((input + captureSize) < inputLen) {
0579                     const ushort nextDigit = text[input + captureSize].unicode();
0580                     if ((nextDigit >= L'0') && (nextDigit <= L'9')) {
0581                         capture = (10 * capture) + (9 - (L'9' - nextDigit));
0582                         ++captureSize;
0583                         continue;
0584                     }
0585                     if (nextDigit == L'}') {
0586                         ++captureSize;
0587                         break;
0588                     }
0589                     break;
0590                 }
0591                 out << ReplacementStream::cap(capture);
0592                 input += captureSize;
0593                 break;
0594             }
0595 
0596             case L'E': // FALLTHROUGH
0597             case L'L': // FALLTHROUGH
0598             case L'l': // FALLTHROUGH
0599             case L'U': // FALLTHROUGH
0600             case L'u':
0601                 if (!replacementGoodies) {
0602                     // strip backslash ("\?" -> "?")
0603                     out << text[input + 1];
0604                 } else {
0605                     // handle case switcher
0606                     switch (text[input + 1].unicode()) {
0607                     case L'L':
0608                         out << ReplacementStream::lowerCase;
0609                         break;
0610 
0611                     case L'l':
0612                         out << ReplacementStream::lowerCaseFirst;
0613                         break;
0614 
0615                     case L'U':
0616                         out << ReplacementStream::upperCase;
0617                         break;
0618 
0619                     case L'u':
0620                         out << ReplacementStream::upperCaseFirst;
0621                         break;
0622 
0623                     case L'E': // FALLTHROUGH
0624                     default:
0625                         out << ReplacementStream::keepCase;
0626                     }
0627                 }
0628                 input += 2;
0629                 break;
0630 
0631             case L'#':
0632                 if (!replacementGoodies) {
0633                     // strip backslash ("\?" -> "?")
0634                     out << text[input + 1];
0635                     input += 2;
0636                 } else {
0637                     // handle replacement counter
0638                     // eat and count all following hash marks
0639                     // each hash stands for a leading zero: \### will produces 001, 002, ...
0640                     int minWidth = 1;
0641                     while ((input + minWidth + 1 < inputLen) && (text[input + minWidth + 1].unicode() == L'#')) {
0642                         minWidth++;
0643                     }
0644                     out << ReplacementStream::counter(replacementCounter, minWidth);
0645                     input += 1 + minWidth;
0646                 }
0647                 break;
0648 
0649             case L'a':
0650                 out << QChar(0x07);
0651                 input += 2;
0652                 break;
0653 
0654             case L'f':
0655                 out << QChar(0x0c);
0656                 input += 2;
0657                 break;
0658 
0659             case L'n':
0660                 out << QChar(0x0a);
0661                 input += 2;
0662                 break;
0663 
0664             case L'r':
0665                 out << QChar(0x0d);
0666                 input += 2;
0667                 break;
0668 
0669             case L't':
0670                 out << QChar(0x09);
0671                 input += 2;
0672                 break;
0673 
0674             case L'v':
0675                 out << QChar(0x0b);
0676                 input += 2;
0677                 break;
0678 
0679             case L'x': // "\x0000".."\xffff"
0680                 if (input + 5 >= inputLen) {
0681                     // strip backslash ("\x" -> "x")
0682                     out << text[input + 1];
0683                     input += 2;
0684                 } else {
0685                     bool stripAndSkip = false;
0686                     const ushort text_2 = text[input + 2].unicode();
0687                     if (((text_2 >= L'0') && (text_2 <= L'9')) || ((text_2 >= L'a') && (text_2 <= L'f')) || ((text_2 >= L'A') && (text_2 <= L'F'))) {
0688                         const ushort text_3 = text[input + 3].unicode();
0689                         if (((text_3 >= L'0') && (text_3 <= L'9')) || ((text_3 >= L'a') && (text_3 <= L'f')) || ((text_3 >= L'A') && (text_3 <= L'F'))) {
0690                             const ushort text_4 = text[input + 4].unicode();
0691                             if (((text_4 >= L'0') && (text_4 <= L'9')) || ((text_4 >= L'a') && (text_4 <= L'f')) || ((text_4 >= L'A') && (text_4 <= L'F'))) {
0692                                 const ushort text_5 = text[input + 5].unicode();
0693                                 if (((text_5 >= L'0') && (text_5 <= L'9')) || ((text_5 >= L'a') && (text_5 <= L'f'))
0694                                     || ((text_5 >= L'A') && (text_5 <= L'F'))) {
0695                                     int digits[4];
0696                                     for (int i = 0; i < 4; i++) {
0697                                         const ushort cur = text[input + 2 + i].unicode();
0698                                         if ((cur >= L'0') && (cur <= L'9')) {
0699                                             digits[i] = 9 - (L'9' - cur);
0700                                         } else if ((cur >= L'a') && (cur <= L'f')) {
0701                                             digits[i] = 15 - (L'f' - cur);
0702                                         } else { // if ((cur >= L'A') && (cur <= L'F')))
0703                                             digits[i] = 15 - (L'F' - cur);
0704                                         }
0705                                     }
0706 
0707                                     const int ch = 4096 * digits[0] + 256 * digits[1] + 16 * digits[2] + digits[3];
0708                                     out << QChar(ch);
0709                                     input += 6;
0710                                 } else {
0711                                     stripAndSkip = true;
0712                                 }
0713                             } else {
0714                                 stripAndSkip = true;
0715                             }
0716                         } else {
0717                             stripAndSkip = true;
0718                         }
0719                     }
0720 
0721                     if (stripAndSkip) {
0722                         // strip backslash ("\x" -> "x")
0723                         out << text[input + 1];
0724                         input += 2;
0725                     }
0726                 }
0727                 break;
0728 
0729             default:
0730                 // strip backslash ("\?" -> "?")
0731                 out << text[input + 1];
0732                 input += 2;
0733             }
0734             break;
0735 
0736         default:
0737             out << text[input];
0738             input++;
0739         }
0740     }
0741 
0742     return out.str();
0743 }
0744 
0745 QString KateRegExpSearch::repairPattern(const QString &pattern, bool &stillMultiLine)
0746 {
0747     // '\s' can make a pattern multi-line, it's replaced here with '[ \t]';
0748     // besides \s, the following characters can make a pattern multi-line:
0749     // \n, \x000A (Line Feed), \x????-\x????, \0012, \0???-\0???
0750     // a multi-line pattern must not pass as single-line, the other
0751     // way around will just result in slower searches and is therefore
0752     // not as critical
0753 
0754     const int inputLen = pattern.length();
0755     const QStringView patternView{pattern};
0756 
0757     // prepare output
0758     QString output;
0759     output.reserve(2 * inputLen + 1); // twice should be enough for the average case
0760 
0761     // parser state
0762     bool insideClass = false;
0763 
0764     stillMultiLine = false;
0765     int input = 0;
0766     while (input < inputLen) {
0767         if (insideClass) {
0768             // wait for closing, unescaped ']'
0769             switch (pattern[input].unicode()) {
0770             case L'\\':
0771                 switch (pattern[input + 1].unicode()) {
0772                 case L'x':
0773                     if (input + 5 < inputLen) {
0774                         // copy "\x????" unmodified
0775                         output.append(patternView.mid(input, 6));
0776                         input += 6;
0777                     } else {
0778                         // copy "\x" unmodified
0779                         output.append(patternView.mid(input, 2));
0780                         input += 2;
0781                     }
0782                     stillMultiLine = true;
0783                     break;
0784 
0785                 case L'0':
0786                     if (input + 4 < inputLen) {
0787                         // copy "\0???" unmodified
0788                         output.append(patternView.mid(input, 5));
0789                         input += 5;
0790                     } else {
0791                         // copy "\0" unmodified
0792                         output.append(patternView.mid(input, 2));
0793                         input += 2;
0794                     }
0795                     stillMultiLine = true;
0796                     break;
0797 
0798                 case L's':
0799                     // replace "\s" with "[ \t]"
0800                     output.append(QLatin1String(" \\t"));
0801                     input += 2;
0802                     break;
0803 
0804                 case L'n':
0805                     stillMultiLine = true;
0806                     // FALLTROUGH
0807                     Q_FALLTHROUGH();
0808 
0809                 default:
0810                     // copy "\?" unmodified
0811                     output.append(patternView.mid(input, 2));
0812                     input += 2;
0813                 }
0814                 break;
0815 
0816             case L']':
0817                 // copy "]" unmodified
0818                 insideClass = false;
0819                 output.append(pattern[input]);
0820                 ++input;
0821                 break;
0822 
0823             default:
0824                 // copy "?" unmodified
0825                 output.append(pattern[input]);
0826                 ++input;
0827             }
0828         } else {
0829             switch (pattern[input].unicode()) {
0830             case L'\\':
0831                 switch (pattern[input + 1].unicode()) {
0832                 case L'x':
0833                     if (input + 5 < inputLen) {
0834                         // copy "\x????" unmodified
0835                         output.append(patternView.mid(input, 6));
0836                         input += 6;
0837                     } else {
0838                         // copy "\x" unmodified
0839                         output.append(patternView.mid(input, 2));
0840                         input += 2;
0841                     }
0842                     stillMultiLine = true;
0843                     break;
0844 
0845                 case L'0':
0846                     if (input + 4 < inputLen) {
0847                         // copy "\0???" unmodified
0848                         output.append(patternView.mid(input, 5));
0849                         input += 5;
0850                     } else {
0851                         // copy "\0" unmodified
0852                         output.append(patternView.mid(input, 2));
0853                         input += 2;
0854                     }
0855                     stillMultiLine = true;
0856                     break;
0857 
0858                 case L's':
0859                     // replace "\s" with "[ \t]"
0860                     output.append(QLatin1String("[ \\t]"));
0861                     input += 2;
0862                     break;
0863 
0864                 case L'n':
0865                     stillMultiLine = true;
0866                     // FALLTROUGH
0867                     Q_FALLTHROUGH();
0868                 default:
0869                     // copy "\?" unmodified
0870                     output.append(patternView.mid(input, 2));
0871                     input += 2;
0872                 }
0873                 break;
0874 
0875             case L'[':
0876                 // copy "[" unmodified
0877                 insideClass = true;
0878                 output.append(pattern[input]);
0879                 ++input;
0880                 break;
0881 
0882             default:
0883                 // copy "?" unmodified
0884                 output.append(pattern[input]);
0885                 ++input;
0886             }
0887         }
0888     }
0889     return output;
0890 }
0891 
0892 // Kill our helpers again
0893 #ifdef FAST_DEBUG_ENABLE
0894 #undef FAST_DEBUG_ENABLE
0895 #endif
0896 #undef FAST_DEBUG