File indexing completed on 2024-04-21 03:57:39
0001 /* 0002 SPDX-FileCopyrightText: 2010 Bernhard Beschow <bbeschow@cs.tu-berlin.de> 0003 SPDX-FileCopyrightText: 2007 Sebastian Pipping <webmaster@hartwork.org> 0004 0005 SPDX-License-Identifier: LGPL-2.0-or-later 0006 */ 0007 0008 // BEGIN includes 0009 #include "kateregexpsearch.h" 0010 0011 #include <ktexteditor/document.h> 0012 // END includes 0013 0014 // Turn debug messages on/off here 0015 // #define FAST_DEBUG_ENABLE 0016 0017 #ifdef FAST_DEBUG_ENABLE 0018 #define FAST_DEBUG(x) qCDebug(LOG_KTE) << x 0019 #else 0020 #define FAST_DEBUG(x) 0021 #endif 0022 0023 class KateRegExpSearch::ReplacementStream 0024 { 0025 public: 0026 struct counter { 0027 counter(int value, int minWidth) 0028 : value(value) 0029 , minWidth(minWidth) 0030 { 0031 } 0032 0033 const int value; 0034 const int minWidth; 0035 }; 0036 0037 struct cap { 0038 cap(int n) 0039 : n(n) 0040 { 0041 } 0042 0043 const int n; 0044 }; 0045 0046 enum CaseConversion { 0047 upperCase, ///< \U ... uppercase from now on 0048 upperCaseFirst, ///< \u ... uppercase the first letter 0049 lowerCase, ///< \L ... lowercase from now on 0050 lowerCaseFirst, ///< \l ... lowercase the first letter 0051 keepCase ///< \E ... back to original case 0052 }; 0053 0054 public: 0055 ReplacementStream(const QStringList &capturedTexts); 0056 0057 QString str() const 0058 { 0059 return m_str; 0060 } 0061 0062 ReplacementStream &operator<<(const QString &); 0063 ReplacementStream &operator<<(const counter &); 0064 ReplacementStream &operator<<(const cap &); 0065 ReplacementStream &operator<<(CaseConversion); 0066 0067 private: 0068 const QStringList m_capturedTexts; 0069 CaseConversion m_caseConversion; 0070 QString m_str; 0071 }; 0072 0073 KateRegExpSearch::ReplacementStream::ReplacementStream(const QStringList &capturedTexts) 0074 : m_capturedTexts(capturedTexts) 0075 , m_caseConversion(keepCase) 0076 { 0077 } 0078 0079 KateRegExpSearch::ReplacementStream &KateRegExpSearch::ReplacementStream::operator<<(const QString &str) 0080 { 0081 switch (m_caseConversion) { 0082 case upperCase: 0083 // Copy as uppercase 0084 m_str.append(str.toUpper()); 0085 break; 0086 0087 case upperCaseFirst: 0088 if (str.length() > 0) { 0089 m_str.append(str.at(0).toUpper()); 0090 m_str.append(QStringView(str).mid(1)); 0091 m_caseConversion = keepCase; 0092 } 0093 break; 0094 0095 case lowerCase: 0096 // Copy as lowercase 0097 m_str.append(str.toLower()); 0098 break; 0099 0100 case lowerCaseFirst: 0101 if (str.length() > 0) { 0102 m_str.append(str.at(0).toLower()); 0103 m_str.append(QStringView(str).mid(1)); 0104 m_caseConversion = keepCase; 0105 } 0106 break; 0107 0108 case keepCase: // FALLTHROUGH 0109 default: 0110 // Copy unmodified 0111 m_str.append(str); 0112 break; 0113 } 0114 0115 return *this; 0116 } 0117 0118 KateRegExpSearch::ReplacementStream &KateRegExpSearch::ReplacementStream::operator<<(const counter &c) 0119 { 0120 // Zero padded counter value 0121 m_str.append(QStringLiteral("%1").arg(c.value, c.minWidth, 10, QLatin1Char('0'))); 0122 0123 return *this; 0124 } 0125 0126 KateRegExpSearch::ReplacementStream &KateRegExpSearch::ReplacementStream::operator<<(const cap &cap) 0127 { 0128 if (0 <= cap.n && cap.n < m_capturedTexts.size()) { 0129 (*this) << m_capturedTexts[cap.n]; 0130 } else { 0131 // Insert just the number to be consistent with QRegExp ("\c" becomes "c") 0132 m_str.append(QString::number(cap.n)); 0133 } 0134 0135 return *this; 0136 } 0137 0138 KateRegExpSearch::ReplacementStream &KateRegExpSearch::ReplacementStream::operator<<(CaseConversion caseConversion) 0139 { 0140 m_caseConversion = caseConversion; 0141 0142 return *this; 0143 } 0144 0145 // BEGIN d'tor, c'tor 0146 // 0147 // KateSearch Constructor 0148 // 0149 KateRegExpSearch::KateRegExpSearch(const KTextEditor::Document *document) 0150 : m_document(document) 0151 { 0152 } 0153 0154 // helper structs for captures re-construction 0155 struct TwoViewCursor { 0156 int index; 0157 int line; 0158 int col; 0159 }; 0160 0161 struct IndexPair { 0162 int openIndex; 0163 int closeIndex; 0164 }; 0165 0166 QList<KTextEditor::Range> 0167 KateRegExpSearch::search(const QString &pattern, KTextEditor::Range inputRange, bool backwards, QRegularExpression::PatternOptions options) 0168 { 0169 // Save regexes to avoid reconstructing regexes all the time 0170 static QRegularExpression preRegex; 0171 static QRegularExpression repairedRegex; 0172 0173 // Returned if no matches are found 0174 QList<KTextEditor::Range> noResult(1, KTextEditor::Range::invalid()); 0175 0176 // Note that some methods in vimode (e.g. Searcher::findPatternWorker) rely on the 0177 // this method returning here if 'pattern' is empty. 0178 if (pattern.isEmpty() || inputRange.isEmpty() || !inputRange.isValid()) { 0179 return noResult; 0180 } 0181 0182 // Always enable Unicode support 0183 options |= QRegularExpression::UseUnicodePropertiesOption; 0184 0185 if (preRegex.pattern() != pattern || preRegex.patternOptions() != options) { 0186 preRegex = QRegularExpression(pattern, options); 0187 } 0188 0189 // If repairPattern() is called on an invalid regex pattern it may cause asserts 0190 // in QString (e.g. if the pattern is just '\\', pattern.size() is 1, and repaierPattern 0191 // expects at least one character after a '\') 0192 if (!preRegex.isValid()) { 0193 return noResult; 0194 } 0195 0196 // detect pattern type (single- or mutli-line) 0197 bool stillMultiLine; 0198 const QString repairedPattern = repairPattern(pattern, stillMultiLine); 0199 0200 // Enable multiline mode, so that the ^ and $ metacharacters in the pattern 0201 // are allowed to match, respectively, immediately after and immediately 0202 // before any newline in the subject string, as well as at the very beginning 0203 // and at the very end of the subject string (see QRegularExpression docs). 0204 // 0205 // Whole lines are passed to QRegularExpression, so that e.g. if the inputRange 0206 // ends in the middle of a line, then a '$' won't match at that position. And 0207 // matches that are out of the inputRange are rejected. 0208 if (stillMultiLine) { 0209 options |= QRegularExpression::MultilineOption; 0210 } 0211 0212 // check if anything changed at all 0213 if (repairedRegex.pattern() != repairedPattern || repairedRegex.patternOptions() != options) { 0214 repairedRegex.setPattern(repairedPattern); 0215 repairedRegex.setPatternOptions(options); 0216 } 0217 if (!repairedRegex.isValid()) { 0218 return noResult; 0219 } 0220 0221 const int rangeStartLine = inputRange.start().line(); 0222 const int rangeStartCol = inputRange.start().column(); 0223 0224 const int rangeEndLine = inputRange.end().line(); 0225 const int rangeEndCol = inputRange.end().column(); 0226 0227 if (stillMultiLine) { 0228 const int rangeLineCount = rangeEndLine - rangeStartLine + 1; 0229 FAST_DEBUG("regular expression search (lines " << rangeStartLine << ".." << rangeEndLine << ")"); 0230 0231 const int docLineCount = m_document->lines(); 0232 // nothing to do... 0233 if (rangeStartLine >= docLineCount) { 0234 return noResult; 0235 } 0236 0237 QList<int> lineLens(rangeLineCount); 0238 int maxMatchOffset = 0; 0239 0240 // all lines in the input range 0241 QString wholeRange; 0242 for (int i = 0; i < rangeLineCount; ++i) { 0243 const int docLineIndex = rangeStartLine + i; 0244 if (docLineIndex < 0 || docLineCount <= docLineIndex) { // invalid index 0245 return noResult; 0246 } 0247 0248 const QString textLine = m_document->line(docLineIndex); 0249 lineLens[i] = textLine.length(); 0250 wholeRange.append(textLine); 0251 0252 // This check is needed as some parts in vimode rely on this behaviour. 0253 // We add an '\n' as a delimiter between lines in the range; but never after the 0254 // last line as that would add an '\n' that isn't there in the original text, 0255 // and can skew search results or hit an assert when accessing lineLens later 0256 // in the code. 0257 if (i != (rangeLineCount - 1)) { 0258 wholeRange.append(QLatin1Char('\n')); 0259 } 0260 0261 // lineLens.at(i) + 1, because '\n' was added 0262 maxMatchOffset += (i == rangeEndLine) ? rangeEndCol : lineLens.at(i) + 1; 0263 0264 FAST_DEBUG(" line" << i << "has length" << lineLens.at(i)); 0265 } 0266 0267 FAST_DEBUG("Max. match offset" << maxMatchOffset); 0268 0269 QRegularExpressionMatch match; 0270 bool found = false; 0271 QRegularExpressionMatchIterator iter = repairedRegex.globalMatch(wholeRange, rangeStartCol); 0272 0273 if (backwards) { 0274 while (iter.hasNext()) { 0275 QRegularExpressionMatch curMatch = iter.next(); 0276 if (curMatch.capturedEnd() <= maxMatchOffset) { 0277 match.swap(curMatch); 0278 found = true; 0279 } 0280 } 0281 } else { /* forwards */ 0282 QRegularExpressionMatch curMatch; 0283 if (iter.hasNext()) { 0284 curMatch = iter.next(); 0285 } 0286 if (curMatch.capturedEnd() <= maxMatchOffset) { 0287 match.swap(curMatch); 0288 found = true; 0289 } 0290 } 0291 0292 if (!found) { 0293 // no match 0294 FAST_DEBUG("not found"); 0295 return noResult; 0296 } 0297 0298 // Capture groups: save opening and closing indices and build a map, 0299 // the correct values will be written into it later 0300 QMap<int, TwoViewCursor *> indicesToCursors; 0301 const int numCaptures = repairedRegex.captureCount(); 0302 QList<IndexPair> indexPairs(numCaptures + 1); 0303 for (int c = 0; c <= numCaptures; ++c) { 0304 const int openIndex = match.capturedStart(c); 0305 IndexPair &pair = indexPairs[c]; 0306 if (openIndex == -1) { 0307 // An invalid index indicates an empty capture group 0308 pair.openIndex = -1; 0309 pair.closeIndex = -1; 0310 FAST_DEBUG("capture []"); 0311 } else { 0312 const int closeIndex = match.capturedEnd(c); 0313 pair.openIndex = openIndex; 0314 pair.closeIndex = closeIndex; 0315 FAST_DEBUG("capture [" << pair.openIndex << ".." << pair.closeIndex << "]"); 0316 0317 // each key no more than once 0318 if (!indicesToCursors.contains(openIndex)) { 0319 TwoViewCursor *twoViewCursor = new TwoViewCursor; 0320 twoViewCursor->index = openIndex; 0321 indicesToCursors.insert(openIndex, twoViewCursor); 0322 FAST_DEBUG(" capture group start index added: " << openIndex); 0323 } 0324 if (!indicesToCursors.contains(closeIndex)) { 0325 TwoViewCursor *twoViewCursor = new TwoViewCursor; 0326 twoViewCursor->index = closeIndex; 0327 indicesToCursors.insert(closeIndex, twoViewCursor); 0328 FAST_DEBUG(" capture group end index added: " << closeIndex); 0329 } 0330 } 0331 } 0332 0333 // find out where they belong 0334 int curRelLine = 0; 0335 int curRelCol = 0; 0336 int curRelIndex = 0; 0337 0338 for (TwoViewCursor *twoViewCursor : std::as_const(indicesToCursors)) { 0339 // forward to index, save line/col 0340 const int index = twoViewCursor->index; 0341 FAST_DEBUG("resolving position" << index); 0342 0343 while (curRelIndex <= index) { 0344 FAST_DEBUG("walk pos (" << curRelLine << "," << curRelCol << ") = " << curRelIndex << "relative, steps more to go" << index - curRelIndex); 0345 0346 const int curRelLineLen = lineLens.at(curRelLine); 0347 const int curLineRemainder = curRelLineLen - curRelCol; 0348 const int lineFeedIndex = curRelIndex + curLineRemainder; 0349 if (index <= lineFeedIndex) { 0350 if (index == lineFeedIndex) { 0351 // on this line _at_ line feed 0352 FAST_DEBUG(" on line feed"); 0353 const int absLine = curRelLine + rangeStartLine; 0354 twoViewCursor->line = absLine; 0355 twoViewCursor->col = curRelLineLen; 0356 0357 // advance to next line 0358 const int advance = (index - curRelIndex) + 1; 0359 ++curRelLine; 0360 curRelCol = 0; 0361 curRelIndex += advance; 0362 } else { // index < lineFeedIndex 0363 // on this line _before_ line feed 0364 FAST_DEBUG(" before line feed"); 0365 const int diff = (index - curRelIndex); 0366 const int absLine = curRelLine + rangeStartLine; 0367 const int absCol = curRelCol + diff; 0368 twoViewCursor->line = absLine; 0369 twoViewCursor->col = absCol; 0370 0371 // advance on same line 0372 const int advance = diff + 1; 0373 curRelCol += advance; 0374 curRelIndex += advance; 0375 } 0376 FAST_DEBUG("position(" << twoViewCursor->line << "," << twoViewCursor->col << ")"); 0377 } else { // if (index > lineFeedIndex) 0378 // not on this line 0379 // advance to next line 0380 FAST_DEBUG(" not on this line"); 0381 ++curRelLine; 0382 curRelCol = 0; 0383 const int advance = curLineRemainder + 1; 0384 curRelIndex += advance; 0385 } 0386 } 0387 } 0388 0389 // build result array 0390 QList<KTextEditor::Range> result(numCaptures + 1, KTextEditor::Range::invalid()); 0391 for (int y = 0; y <= numCaptures; y++) { 0392 IndexPair &pair = indexPairs[y]; 0393 if (!(pair.openIndex == -1 || pair.closeIndex == -1)) { 0394 const TwoViewCursor *const openCursors = indicesToCursors.value(pair.openIndex); 0395 const TwoViewCursor *const closeCursors = indicesToCursors.value(pair.closeIndex); 0396 const int startLine = openCursors->line; 0397 const int startCol = openCursors->col; 0398 const int endLine = closeCursors->line; 0399 const int endCol = closeCursors->col; 0400 FAST_DEBUG("range " << y << ": (" << startLine << ", " << startCol << ")..(" << endLine << ", " << endCol << ")"); 0401 result[y] = KTextEditor::Range(startLine, startCol, endLine, endCol); 0402 } 0403 } 0404 0405 // free structs allocated for indicesToCursors 0406 qDeleteAll(indicesToCursors); 0407 0408 return result; 0409 } else { 0410 // single-line regex search (forwards and backwards) 0411 const int rangeStartCol = inputRange.start().column(); 0412 const uint rangeEndCol = inputRange.end().column(); 0413 0414 const int rangeStartLine = inputRange.start().line(); 0415 const int rangeEndLine = inputRange.end().line(); 0416 0417 const int forInit = backwards ? rangeEndLine : rangeStartLine; 0418 0419 const int forInc = backwards ? -1 : +1; 0420 0421 FAST_DEBUG("single line " << (backwards ? rangeEndLine : rangeStartLine) << ".." << (backwards ? rangeStartLine : rangeEndLine)); 0422 0423 for (int j = forInit; (rangeStartLine <= j) && (j <= rangeEndLine); j += forInc) { 0424 if (j < 0 || m_document->lines() <= j) { 0425 FAST_DEBUG("searchText | line " << j << ": no"); 0426 return noResult; 0427 } 0428 0429 const QString textLine = m_document->line(j); 0430 0431 const int offset = (j == rangeStartLine) ? rangeStartCol : 0; 0432 const int endLineMaxOffset = (j == rangeEndLine) ? rangeEndCol : textLine.length(); 0433 0434 bool found = false; 0435 0436 QRegularExpressionMatch match; 0437 0438 if (backwards) { 0439 QRegularExpressionMatchIterator iter = repairedRegex.globalMatch(textLine, offset); 0440 while (iter.hasNext()) { 0441 QRegularExpressionMatch curMatch = iter.next(); 0442 if (curMatch.capturedEnd() <= endLineMaxOffset) { 0443 match.swap(curMatch); 0444 found = true; 0445 } 0446 } 0447 } else { 0448 match = repairedRegex.match(textLine, offset); 0449 if (match.hasMatch() && match.capturedEnd() <= endLineMaxOffset) { 0450 found = true; 0451 } 0452 } 0453 0454 if (found) { 0455 FAST_DEBUG("line " << j << ": yes"); 0456 0457 // build result array 0458 const int numCaptures = repairedRegex.captureCount(); 0459 QList<KTextEditor::Range> result(numCaptures + 1); 0460 result[0] = KTextEditor::Range(j, match.capturedStart(), j, match.capturedEnd()); 0461 0462 FAST_DEBUG("result range " << 0 << ": (" << j << ", " << match.capturedStart << ")..(" << j << ", " << match.capturedEnd() << ")"); 0463 0464 for (int y = 1; y <= numCaptures; ++y) { 0465 const int openIndex = match.capturedStart(y); 0466 0467 if (openIndex == -1) { 0468 result[y] = KTextEditor::Range::invalid(); 0469 0470 FAST_DEBUG("capture []"); 0471 } else { 0472 const int closeIndex = match.capturedEnd(y); 0473 0474 FAST_DEBUG("result range " << y << ": (" << j << ", " << openIndex << ")..(" << j << ", " << closeIndex << ")"); 0475 0476 result[y] = KTextEditor::Range(j, openIndex, j, closeIndex); 0477 } 0478 } 0479 return result; 0480 } else { 0481 FAST_DEBUG("searchText | line " << j << ": no"); 0482 } 0483 } 0484 } 0485 return noResult; 0486 } 0487 0488 /*static*/ QString KateRegExpSearch::escapePlaintext(const QString &text) 0489 { 0490 return buildReplacement(text, QStringList(), 0, false); 0491 } 0492 0493 /*static*/ QString KateRegExpSearch::buildReplacement(const QString &text, const QStringList &capturedTexts, int replacementCounter) 0494 { 0495 return buildReplacement(text, capturedTexts, replacementCounter, true); 0496 } 0497 0498 /*static*/ QString KateRegExpSearch::buildReplacement(const QString &text, const QStringList &capturedTexts, int replacementCounter, bool replacementGoodies) 0499 { 0500 // get input 0501 const int inputLen = text.length(); 0502 int input = 0; // walker index 0503 0504 // prepare output 0505 ReplacementStream out(capturedTexts); 0506 0507 while (input < inputLen) { 0508 switch (text[input].unicode()) { 0509 case L'\n': 0510 out << text[input]; 0511 input++; 0512 break; 0513 0514 case L'\\': 0515 if (input + 1 >= inputLen) { 0516 // copy backslash 0517 out << text[input]; 0518 input++; 0519 break; 0520 } 0521 0522 switch (text[input + 1].unicode()) { 0523 case L'0': // "\0000".."\0377" 0524 if (input + 4 >= inputLen) { 0525 out << ReplacementStream::cap(0); 0526 input += 2; 0527 } else { 0528 bool stripAndSkip = false; 0529 const ushort text_2 = text[input + 2].unicode(); 0530 if ((text_2 >= L'0') && (text_2 <= L'3')) { 0531 const ushort text_3 = text[input + 3].unicode(); 0532 if ((text_3 >= L'0') && (text_3 <= L'7')) { 0533 const ushort text_4 = text[input + 4].unicode(); 0534 if ((text_4 >= L'0') && (text_4 <= L'7')) { 0535 int digits[3]; 0536 for (int i = 0; i < 3; i++) { 0537 digits[i] = 7 - (L'7' - text[input + 2 + i].unicode()); 0538 } 0539 const int ch = 64 * digits[0] + 8 * digits[1] + digits[2]; 0540 out << QChar(ch); 0541 input += 5; 0542 } else { 0543 stripAndSkip = true; 0544 } 0545 } else { 0546 stripAndSkip = true; 0547 } 0548 } else { 0549 stripAndSkip = true; 0550 } 0551 0552 if (stripAndSkip) { 0553 out << ReplacementStream::cap(0); 0554 input += 2; 0555 } 0556 } 0557 break; 0558 0559 // single letter captures \x 0560 case L'1': 0561 case L'2': 0562 case L'3': 0563 case L'4': 0564 case L'5': 0565 case L'6': 0566 case L'7': 0567 case L'8': 0568 case L'9': 0569 out << ReplacementStream::cap(9 - (L'9' - text[input + 1].unicode())); 0570 input += 2; 0571 break; 0572 0573 // multi letter captures \{xxxx} 0574 case L'{': { 0575 // allow {1212124}.... captures, see bug 365124 + testReplaceManyCapturesBug365124 0576 int capture = 0; 0577 int captureSize = 2; 0578 while ((input + captureSize) < inputLen) { 0579 const ushort nextDigit = text[input + captureSize].unicode(); 0580 if ((nextDigit >= L'0') && (nextDigit <= L'9')) { 0581 capture = (10 * capture) + (9 - (L'9' - nextDigit)); 0582 ++captureSize; 0583 continue; 0584 } 0585 if (nextDigit == L'}') { 0586 ++captureSize; 0587 break; 0588 } 0589 break; 0590 } 0591 out << ReplacementStream::cap(capture); 0592 input += captureSize; 0593 break; 0594 } 0595 0596 case L'E': // FALLTHROUGH 0597 case L'L': // FALLTHROUGH 0598 case L'l': // FALLTHROUGH 0599 case L'U': // FALLTHROUGH 0600 case L'u': 0601 if (!replacementGoodies) { 0602 // strip backslash ("\?" -> "?") 0603 out << text[input + 1]; 0604 } else { 0605 // handle case switcher 0606 switch (text[input + 1].unicode()) { 0607 case L'L': 0608 out << ReplacementStream::lowerCase; 0609 break; 0610 0611 case L'l': 0612 out << ReplacementStream::lowerCaseFirst; 0613 break; 0614 0615 case L'U': 0616 out << ReplacementStream::upperCase; 0617 break; 0618 0619 case L'u': 0620 out << ReplacementStream::upperCaseFirst; 0621 break; 0622 0623 case L'E': // FALLTHROUGH 0624 default: 0625 out << ReplacementStream::keepCase; 0626 } 0627 } 0628 input += 2; 0629 break; 0630 0631 case L'#': 0632 if (!replacementGoodies) { 0633 // strip backslash ("\?" -> "?") 0634 out << text[input + 1]; 0635 input += 2; 0636 } else { 0637 // handle replacement counter 0638 // eat and count all following hash marks 0639 // each hash stands for a leading zero: \### will produces 001, 002, ... 0640 int minWidth = 1; 0641 while ((input + minWidth + 1 < inputLen) && (text[input + minWidth + 1].unicode() == L'#')) { 0642 minWidth++; 0643 } 0644 out << ReplacementStream::counter(replacementCounter, minWidth); 0645 input += 1 + minWidth; 0646 } 0647 break; 0648 0649 case L'a': 0650 out << QChar(0x07); 0651 input += 2; 0652 break; 0653 0654 case L'f': 0655 out << QChar(0x0c); 0656 input += 2; 0657 break; 0658 0659 case L'n': 0660 out << QChar(0x0a); 0661 input += 2; 0662 break; 0663 0664 case L'r': 0665 out << QChar(0x0d); 0666 input += 2; 0667 break; 0668 0669 case L't': 0670 out << QChar(0x09); 0671 input += 2; 0672 break; 0673 0674 case L'v': 0675 out << QChar(0x0b); 0676 input += 2; 0677 break; 0678 0679 case L'x': // "\x0000".."\xffff" 0680 if (input + 5 >= inputLen) { 0681 // strip backslash ("\x" -> "x") 0682 out << text[input + 1]; 0683 input += 2; 0684 } else { 0685 bool stripAndSkip = false; 0686 const ushort text_2 = text[input + 2].unicode(); 0687 if (((text_2 >= L'0') && (text_2 <= L'9')) || ((text_2 >= L'a') && (text_2 <= L'f')) || ((text_2 >= L'A') && (text_2 <= L'F'))) { 0688 const ushort text_3 = text[input + 3].unicode(); 0689 if (((text_3 >= L'0') && (text_3 <= L'9')) || ((text_3 >= L'a') && (text_3 <= L'f')) || ((text_3 >= L'A') && (text_3 <= L'F'))) { 0690 const ushort text_4 = text[input + 4].unicode(); 0691 if (((text_4 >= L'0') && (text_4 <= L'9')) || ((text_4 >= L'a') && (text_4 <= L'f')) || ((text_4 >= L'A') && (text_4 <= L'F'))) { 0692 const ushort text_5 = text[input + 5].unicode(); 0693 if (((text_5 >= L'0') && (text_5 <= L'9')) || ((text_5 >= L'a') && (text_5 <= L'f')) 0694 || ((text_5 >= L'A') && (text_5 <= L'F'))) { 0695 int digits[4]; 0696 for (int i = 0; i < 4; i++) { 0697 const ushort cur = text[input + 2 + i].unicode(); 0698 if ((cur >= L'0') && (cur <= L'9')) { 0699 digits[i] = 9 - (L'9' - cur); 0700 } else if ((cur >= L'a') && (cur <= L'f')) { 0701 digits[i] = 15 - (L'f' - cur); 0702 } else { // if ((cur >= L'A') && (cur <= L'F'))) 0703 digits[i] = 15 - (L'F' - cur); 0704 } 0705 } 0706 0707 const int ch = 4096 * digits[0] + 256 * digits[1] + 16 * digits[2] + digits[3]; 0708 out << QChar(ch); 0709 input += 6; 0710 } else { 0711 stripAndSkip = true; 0712 } 0713 } else { 0714 stripAndSkip = true; 0715 } 0716 } else { 0717 stripAndSkip = true; 0718 } 0719 } 0720 0721 if (stripAndSkip) { 0722 // strip backslash ("\x" -> "x") 0723 out << text[input + 1]; 0724 input += 2; 0725 } 0726 } 0727 break; 0728 0729 default: 0730 // strip backslash ("\?" -> "?") 0731 out << text[input + 1]; 0732 input += 2; 0733 } 0734 break; 0735 0736 default: 0737 out << text[input]; 0738 input++; 0739 } 0740 } 0741 0742 return out.str(); 0743 } 0744 0745 QString KateRegExpSearch::repairPattern(const QString &pattern, bool &stillMultiLine) 0746 { 0747 // '\s' can make a pattern multi-line, it's replaced here with '[ \t]'; 0748 // besides \s, the following characters can make a pattern multi-line: 0749 // \n, \x000A (Line Feed), \x????-\x????, \0012, \0???-\0??? 0750 // a multi-line pattern must not pass as single-line, the other 0751 // way around will just result in slower searches and is therefore 0752 // not as critical 0753 0754 const int inputLen = pattern.length(); 0755 const QStringView patternView{pattern}; 0756 0757 // prepare output 0758 QString output; 0759 output.reserve(2 * inputLen + 1); // twice should be enough for the average case 0760 0761 // parser state 0762 bool insideClass = false; 0763 0764 stillMultiLine = false; 0765 int input = 0; 0766 while (input < inputLen) { 0767 if (insideClass) { 0768 // wait for closing, unescaped ']' 0769 switch (pattern[input].unicode()) { 0770 case L'\\': 0771 switch (pattern[input + 1].unicode()) { 0772 case L'x': 0773 if (input + 5 < inputLen) { 0774 // copy "\x????" unmodified 0775 output.append(patternView.mid(input, 6)); 0776 input += 6; 0777 } else { 0778 // copy "\x" unmodified 0779 output.append(patternView.mid(input, 2)); 0780 input += 2; 0781 } 0782 stillMultiLine = true; 0783 break; 0784 0785 case L'0': 0786 if (input + 4 < inputLen) { 0787 // copy "\0???" unmodified 0788 output.append(patternView.mid(input, 5)); 0789 input += 5; 0790 } else { 0791 // copy "\0" unmodified 0792 output.append(patternView.mid(input, 2)); 0793 input += 2; 0794 } 0795 stillMultiLine = true; 0796 break; 0797 0798 case L's': 0799 // replace "\s" with "[ \t]" 0800 output.append(QLatin1String(" \\t")); 0801 input += 2; 0802 break; 0803 0804 case L'n': 0805 stillMultiLine = true; 0806 // FALLTROUGH 0807 Q_FALLTHROUGH(); 0808 0809 default: 0810 // copy "\?" unmodified 0811 output.append(patternView.mid(input, 2)); 0812 input += 2; 0813 } 0814 break; 0815 0816 case L']': 0817 // copy "]" unmodified 0818 insideClass = false; 0819 output.append(pattern[input]); 0820 ++input; 0821 break; 0822 0823 default: 0824 // copy "?" unmodified 0825 output.append(pattern[input]); 0826 ++input; 0827 } 0828 } else { 0829 switch (pattern[input].unicode()) { 0830 case L'\\': 0831 switch (pattern[input + 1].unicode()) { 0832 case L'x': 0833 if (input + 5 < inputLen) { 0834 // copy "\x????" unmodified 0835 output.append(patternView.mid(input, 6)); 0836 input += 6; 0837 } else { 0838 // copy "\x" unmodified 0839 output.append(patternView.mid(input, 2)); 0840 input += 2; 0841 } 0842 stillMultiLine = true; 0843 break; 0844 0845 case L'0': 0846 if (input + 4 < inputLen) { 0847 // copy "\0???" unmodified 0848 output.append(patternView.mid(input, 5)); 0849 input += 5; 0850 } else { 0851 // copy "\0" unmodified 0852 output.append(patternView.mid(input, 2)); 0853 input += 2; 0854 } 0855 stillMultiLine = true; 0856 break; 0857 0858 case L's': 0859 // replace "\s" with "[ \t]" 0860 output.append(QLatin1String("[ \\t]")); 0861 input += 2; 0862 break; 0863 0864 case L'n': 0865 stillMultiLine = true; 0866 // FALLTROUGH 0867 Q_FALLTHROUGH(); 0868 default: 0869 // copy "\?" unmodified 0870 output.append(patternView.mid(input, 2)); 0871 input += 2; 0872 } 0873 break; 0874 0875 case L'[': 0876 // copy "[" unmodified 0877 insideClass = true; 0878 output.append(pattern[input]); 0879 ++input; 0880 break; 0881 0882 default: 0883 // copy "?" unmodified 0884 output.append(pattern[input]); 0885 ++input; 0886 } 0887 } 0888 } 0889 return output; 0890 } 0891 0892 // Kill our helpers again 0893 #ifdef FAST_DEBUG_ENABLE 0894 #undef FAST_DEBUG_ENABLE 0895 #endif 0896 #undef FAST_DEBUG