File indexing completed on 2024-05-05 04:38:45
0001 /* 0002 SPDX-FileCopyrightText: 2011 David Nolden <david.nolden.kdevelop@art-master.de> 0003 SPDX-FileCopyrightText: 2023 Igor Kushnir <igorkuo@gmail.com> 0004 0005 SPDX-License-Identifier: GPL-2.0-or-later 0006 */ 0007 0008 #include "formattinghelpers.h" 0009 0010 #include "debug.h" 0011 0012 #include <QChar> 0013 #include <QString> 0014 #include <QStringView> 0015 #include <QVarLengthArray> 0016 0017 #include <algorithm> 0018 #include <array> 0019 #include <cstdlib> 0020 #include <cstring> 0021 #include <iterator> 0022 #include <utility> 0023 0024 namespace { 0025 0026 bool asciiStringContains(const char* str, QChar c) 0027 { 0028 constexpr ushort maxAscii{127}; 0029 return c.unicode() <= maxAscii && std::strchr(str, c.unicode()); 0030 } 0031 0032 bool isFuzzy(QChar c) 0033 { 0034 return asciiStringContains("{}()\"/\\*", c); 0035 } 0036 0037 bool isFuzzyBracket(QChar c) 0038 { 0039 return asciiStringContains("{}()", c); 0040 } 0041 0042 class DoubleQuoteValidator 0043 { 0044 public: 0045 /** 0046 * Disable matching and clearing two consecutive inserted or removed double quotes. 0047 * 0048 * A double quote is closed by the next double quote by default. 0049 * Once this function is called, such a sequence is considered invalid and reported as a match failure. 0050 */ 0051 void disableMatchingDoubleQuotes() 0052 { 0053 m_matchDoubleQuotes = false; 0054 } 0055 0056 /** 0057 * Insert or remove a double quote. 0058 * @return @c false if double quote matching fails. 0059 */ 0060 bool addDoubleQuote(bool isInserted); 0061 0062 bool hasUnmatchedDoubleQuote() const 0063 { 0064 return m_unmatchedDoubleQuote; 0065 } 0066 0067 /** 0068 * Finish validation. 0069 * @return @c false if double quote matching fails. 0070 */ 0071 bool validate() 0072 { 0073 if (m_unmatchedDoubleQuote) { 0074 printFailureWarning(); 0075 return false; 0076 } 0077 return true; 0078 } 0079 0080 static void printFailureWarning() 0081 { 0082 qCWarning(UTIL) << "giving up formatting because the formatter inserted or removed " 0083 "a pair of double quotes across context-text boundaries"; 0084 } 0085 0086 private: 0087 bool m_matchDoubleQuotes = true; 0088 /// Whether a double quote has been inserted or removed and not yet matched. 0089 bool m_unmatchedDoubleQuote = false; 0090 /// Whether the unmatched double quote was inserted or removed. 0091 /// This data member has a meaning only if @a m_unmatchedDoubleQuote equals @c true. 0092 bool m_unmatchedDoubleQuoteWasInserted = false; 0093 }; 0094 0095 bool DoubleQuoteValidator::addDoubleQuote(bool isInserted) 0096 { 0097 if (!m_unmatchedDoubleQuote) { 0098 m_unmatchedDoubleQuote = true; 0099 m_unmatchedDoubleQuoteWasInserted = isInserted; 0100 return true; 0101 } 0102 0103 if (m_matchDoubleQuotes || m_unmatchedDoubleQuoteWasInserted != isInserted) { 0104 // Either the two double quotes are matched and cleared, or one was inserted and another removed, 0105 // which unconditionally cancels them out (could be some code reordering by the formatter). 0106 m_unmatchedDoubleQuote = false; 0107 return true; 0108 } 0109 0110 // Fail because of two consecutive inserted or removed double quotes, matching which has been disabled. 0111 printFailureWarning(); 0112 return false; 0113 } 0114 0115 /** 0116 * Tracks and validates inserted and removed, opening and closing brackets of a single type, e.g. '{' and '}'. 0117 */ 0118 class BracketStack 0119 { 0120 public: 0121 /** 0122 * Insert or remove a bracket. 0123 */ 0124 void addBracket(bool isOpening, bool isInserted) 0125 { 0126 const auto countIncrement = isInserted ? 1 : -1; 0127 if (!m_data.empty()) { 0128 auto& last = m_data.back(); 0129 if (last.isOpening == isOpening) { 0130 last.count += countIncrement; 0131 if (last.count == 0) { 0132 m_data.pop_back(); // remove the now empty sequence 0133 } 0134 return; 0135 } 0136 } 0137 0138 m_data.push_back({isOpening, countIncrement}); 0139 } 0140 0141 /** 0142 * Validate brackets once parsing ends. 0143 * @return @c false if matching brackets fails. 0144 * @note Validation is destructive, that is, it transitions this object into an undefined state. 0145 */ 0146 bool validate() 0147 { 0148 // Inserted opening brackets normally match inserted closing brackets. Same for removed brackets. 0149 // In addition, inserted opening brackets match removed opening brackets. Same for closing brackets. 0150 // An inserted-removed match can mean that an unformatted-formatted text matcher misinterpreted the formatter's 0151 // intention (other fuzzy characters could actually be inserted or removed near the untouched bracket). 0152 // Such a misinterpretation is not a problem at all thanks to the inserted-removed match. 0153 0154 // We move backwards, so closing brackets must be encountered before the matching open brackets. 0155 int closingBracketCount = 0; 0156 for (int i = m_data.size() - 1; i >= 0; --i) { 0157 auto p = m_data.at(i); 0158 Q_ASSERT(p.count != 0); 0159 0160 if (p.isOpening) { 0161 if ((closingBracketCount > 0) != (p.count > 0)) { 0162 // Insertion/removal mismatch of opening and closing brackets. When a closing bracket is 0163 // inserted and an opening bracket is removed (or vice versa), they clearly cannot match. 0164 return false; 0165 } 0166 if (std::abs(closingBracketCount) < std::abs(p.count)) { 0167 return false; // not enough closing brackets to match all opening brackets 0168 } 0169 closingBracketCount -= p.count; 0170 } else { 0171 closingBracketCount += p.count; 0172 } 0173 } 0174 return closingBracketCount == 0; // are all closing brackets matched by opening brackets? 0175 } 0176 0177 private: 0178 struct BracketSequence 0179 { 0180 bool isOpening : 1; ///< whether the brackets in this sequence are of the opening type 0181 int count : 31; ///< abs(count) is the number of consecutive inserted (count > 0) or removed (count < 0) brackets 0182 }; 0183 0184 QVarLengthArray<BracketSequence, 64> m_data; 0185 }; 0186 0187 class BracketValidator 0188 { 0189 public: 0190 /** 0191 * Insert or remove a fuzzy bracket. 0192 * @pre isFuzzyBracket(@p bracket) 0193 */ 0194 void addBracket(QChar bracket, bool isInserted); 0195 0196 /// Insert or remove an opening "/*" or closing "*/" comment sequence. 0197 void addCommentSequence(bool isOpening, bool isInserted) 0198 { 0199 m_stacks[commentsIndex].addBracket(isOpening, isInserted); 0200 } 0201 0202 /** 0203 * Validate brackets and comments once parsing ends. 0204 * @return @c false if matching brackets or comments fails. 0205 * @note Validation is destructive, that is, it transitions this object into an undefined state. 0206 */ 0207 bool validate() 0208 { 0209 for (std::size_t i = 0; i < m_stacks.size(); ++i) { 0210 if (!m_stacks[i].validate()) { 0211 qCWarning(UTIL) << "giving up formatting because the formatter inserted or removed the pair" 0212 << m_stackStrings[i] << "across context-text boundaries"; 0213 return false; 0214 } 0215 } 0216 return true; 0217 } 0218 0219 private: 0220 // the following 3 constants are indices into m_stacks and m_stackStrings 0221 static constexpr std::size_t bracesIndex = 0; ///< { } 0222 static constexpr std::size_t parenthesesIndex = 1; ///< ( ) 0223 static constexpr std::size_t commentsIndex = 2; ///< /* */ 0224 /// human-readable identifications of fuzzy bracket types 0225 static constexpr std::array<const char*, 3> m_stackStrings = {"{ }", "( )", "/* */"}; 0226 0227 std::array<BracketStack, 3> m_stacks; ///< stacks of fuzzy brackets of each type 0228 }; 0229 0230 void BracketValidator::addBracket(QChar bracket, bool isInserted) 0231 { 0232 Q_ASSERT(isFuzzyBracket(bracket)); 0233 0234 std::size_t index; 0235 bool isOpening; 0236 switch (bracket.unicode()) { 0237 case '{': 0238 index = bracesIndex; 0239 isOpening = true; 0240 break; 0241 case '}': 0242 index = bracesIndex; 0243 isOpening = false; 0244 break; 0245 case '(': 0246 index = parenthesesIndex; 0247 isOpening = true; 0248 break; 0249 case ')': 0250 index = parenthesesIndex; 0251 isOpening = false; 0252 break; 0253 default: 0254 Q_UNREACHABLE(); 0255 } 0256 0257 m_stacks[index].addBracket(isOpening, isInserted); 0258 } 0259 0260 enum class FuzzyMatchResult { Fuzzy, NotFuzzy, MatchingFailed }; 0261 0262 class FuzzyMatcher 0263 { 0264 Q_DISABLE_COPY_MOVE(FuzzyMatcher) 0265 public: 0266 virtual ~FuzzyMatcher() = default; 0267 /** 0268 * Inserts (if @p isInserted == @c true) or removes (otherwise) a single character at 0269 * the specified address @p characterLocation. 0270 * 0271 * Prints an explaining warning when @c FuzzyMatchResult::MatchingFailed is returned. 0272 * 0273 * @param characterLocation a valid character address into a complete string or view. 0274 * The address is used to group consecutive characters '/' and '*' into a C-style comment. 0275 * @return the added character's classification (fuzzy or not) or @c MatchingFailed if a match failure occurs. 0276 */ 0277 virtual FuzzyMatchResult add(const QChar* characterLocation, bool isInserted) = 0; 0278 0279 /** 0280 * Informs this matcher that the first exact match of a character in unformatted and formatted text just occurred. 0281 * 0282 * May be called twice. For example, the first call right after construction indicates that the ranges 0283 * do not begin at a context-text boundary, and thus the matcher should not fail if a character, which is 0284 * forbidden at a boundary, is removed or inserted before the second (regular) call when the exact match occurs. 0285 */ 0286 virtual void firstNonWhitespaceCharacterMatch() = 0; 0287 /** 0288 * Specifies the locations in the two matched ranges of the last exact match of a character. 0289 * 0290 * Call this function if the ranges end at a context-text boundary. 0291 * The function must be called after all fuzzy characters, which precede 0292 * the last-match addresses in the iteration order, have been added. 0293 * Prints an explaining warning when @c false is returned. 0294 * 0295 * @param removalRangeMatchAddress the address of the last matching character (or the very first 0296 * address if there were no matches) in the complete string or view, 0297 * from which fuzzy characters can be removed. 0298 * @param insertionRangeMatchAddress the address of the last matching character (or the very first 0299 * address if there were no matches) in the complete string or view, 0300 * into which fuzzy characters can be inserted. 0301 * @return @c false if matching fails immediately. 0302 */ 0303 virtual bool lastNonWhitespaceCharacterMatch(const QChar* removalRangeMatchAddress, 0304 const QChar* insertionRangeMatchAddress) = 0; 0305 0306 protected: 0307 FuzzyMatcher() = default; 0308 }; 0309 0310 /** 0311 * This class reports a match failure if a double quote is removed or inserted at the boundary between context and text. 0312 * That is, after a nonempty left context and before the first exact non-whitespace character match between 0313 * prefix and text, or after the last exact non-whitespace character match and before text or nonempty right context. 0314 * 0315 * Such a removal or insertion should cancel formatting, because whitespace within string literals is significant, 0316 * but the implementation of formatted text extraction is not sophisticated enough to distinguish it from regular 0317 * whitespace that is subject to formatting manipulations. So the combination of a double quote insertion/removal 0318 * by a formatter and an unlucky selection of a text fragment for formatting can cause whitespace changes within 0319 * a string literal. The purpose of this class is to reduce the probability of such a quiet breaking code change. 0320 */ 0321 class BoundaryFuzzyMatcher : public FuzzyMatcher 0322 { 0323 public: 0324 /** 0325 * Indicates that higher addresses are encountered before lower addresses (reverse iteration). 0326 */ 0327 void setReverseAddressDirection() 0328 { 0329 m_reverseAddressDirection = true; 0330 } 0331 0332 void firstNonWhitespaceCharacterMatch() override 0333 { 0334 m_allowDoubleQuoteChanges = true; 0335 } 0336 0337 bool lastNonWhitespaceCharacterMatch(const QChar* removalRangeMatchAddress, 0338 const QChar* insertionRangeMatchAddress) override 0339 { 0340 if (!validate(m_lastRemovedDoubleQuoteAddress, removalRangeMatchAddress)) { 0341 printFailureWarning(false); 0342 return false; 0343 } 0344 if (!validate(m_lastInsertedDoubleQuoteAddress, insertionRangeMatchAddress)) { 0345 printFailureWarning(true); 0346 return false; 0347 } 0348 m_allowDoubleQuoteChanges = false; 0349 return true; 0350 } 0351 0352 protected: 0353 /** 0354 * Derived classes must call this function each time a double quote is passed to add(). 0355 * @return @c false if matching fails immediately. 0356 */ 0357 bool addDoubleQuote(const QChar* location, bool isInserted) 0358 { 0359 Q_ASSERT(location); 0360 Q_ASSERT(*location == QLatin1Char{'"'}); 0361 if (!m_allowDoubleQuoteChanges) { 0362 printFailureWarning(isInserted); 0363 return false; 0364 } 0365 (isInserted ? m_lastInsertedDoubleQuoteAddress : m_lastRemovedDoubleQuoteAddress) = location; 0366 return true; 0367 } 0368 0369 private: 0370 bool validate(const QChar* lastDoubleQuoteAddress, const QChar* lastMatchAddress) const 0371 { 0372 if (!lastDoubleQuoteAddress) { 0373 return true; 0374 } 0375 // lastDoubleQuoteAddress can equal lastMatchAddress in case there were no matches (lastMatchAddress 0376 // is the very first address, i.e. the beginning of the range, then), and the formatter happened to 0377 // remove or insert a double quote at this same address. In this case we correctly return false. 0378 return m_reverseAddressDirection ? lastDoubleQuoteAddress > lastMatchAddress 0379 : lastDoubleQuoteAddress < lastMatchAddress; 0380 } 0381 0382 static void printFailureWarning(bool isInserted) 0383 { 0384 qCWarning(UTIL) << "giving up formatting because the formatter" << (isInserted ? "inserted" : "removed") 0385 << "a double quote at a context-text boundary"; 0386 } 0387 0388 bool m_reverseAddressDirection = false; 0389 bool m_allowDoubleQuoteChanges = false; 0390 const QChar* m_lastRemovedDoubleQuoteAddress = nullptr; 0391 const QChar* m_lastInsertedDoubleQuoteAddress = nullptr; 0392 }; 0393 0394 class DoubleQuoteFuzzyMatcher : public BoundaryFuzzyMatcher 0395 { 0396 public: 0397 FuzzyMatchResult add(const QChar* characterLocation, bool isInserted) override 0398 { 0399 Q_ASSERT(characterLocation); 0400 const auto c = *characterLocation; 0401 if (c == QLatin1Char{'"'}) { 0402 const bool valid = addDoubleQuote(characterLocation, isInserted) && m_validator.addDoubleQuote(isInserted); 0403 return valid ? FuzzyMatchResult::Fuzzy : FuzzyMatchResult::MatchingFailed; 0404 } 0405 return isFuzzy(c) ? FuzzyMatchResult::Fuzzy : FuzzyMatchResult::NotFuzzy; 0406 } 0407 0408 bool hasUnmatchedDoubleQuote() const 0409 { 0410 return m_validator.hasUnmatchedDoubleQuote(); 0411 } 0412 0413 private: 0414 DoubleQuoteValidator m_validator; 0415 }; 0416 0417 /** 0418 * A fuzzy matcher that tracks and validates double quotes, brackets and comments. 0419 * 0420 * Supports only direct iteration over a string [view], that is, each added insertion 0421 * address must be greater than all previous added insertion addresses. Same for removal addresses. 0422 */ 0423 class CompleteFuzzyMatcher : public BoundaryFuzzyMatcher 0424 { 0425 public: 0426 struct Range 0427 { 0428 const QChar* first; 0429 const QChar* last; 0430 }; 0431 0432 /** 0433 * @param removalRange the address range of the complete string or view, 0434 * from which fuzzy characters can be removed. 0435 * @param insertionRange the address range of the complete string or view, 0436 * into which fuzzy characters can be inserted. 0437 */ 0438 explicit CompleteFuzzyMatcher(Range removalRange, Range insertionRange) 0439 : m_validRanges{removalRange, insertionRange} 0440 { 0441 } 0442 0443 void disableMatchingDoubleQuotes() 0444 { 0445 m_doubleQuoteValidator.disableMatchingDoubleQuotes(); 0446 } 0447 0448 FuzzyMatchResult add(const QChar* characterLocation, bool isInserted) override 0449 { 0450 Q_ASSERT(characterLocation); 0451 const auto& validRange = m_validRanges[isInserted]; 0452 Q_ASSERT(characterLocation >= validRange.first); 0453 Q_ASSERT(characterLocation < validRange.last); 0454 0455 const auto c = *characterLocation; 0456 switch (c.unicode()) { 0457 case '"': { 0458 const bool valid = 0459 addDoubleQuote(characterLocation, isInserted) && m_doubleQuoteValidator.addDoubleQuote(isInserted); 0460 return valid ? FuzzyMatchResult::Fuzzy : FuzzyMatchResult::MatchingFailed; 0461 } 0462 case '/': 0463 case '*': { 0464 const auto*& prev = m_lastEndOfCommentAddresses[isInserted]; 0465 Q_ASSERT(!prev || prev <= characterLocation); 0466 if (prev == characterLocation) { 0467 // a comment sequence ending at characterLocation has been added already, do nothing else 0468 } else if (characterLocation > validRange.first && prev != characterLocation - 1 0469 && addIfCommentSequence(*(characterLocation - 1), c, isInserted)) { 0470 // NOTE: the (prev != characterLocation - 1) check above prevents 0471 // finding two comment sequences in the string "/*/". 0472 prev = characterLocation; 0473 } else if (characterLocation + 1 < validRange.last 0474 && addIfCommentSequence(c, *(characterLocation + 1), isInserted)) { 0475 prev = characterLocation + 1; 0476 } 0477 return FuzzyMatchResult::Fuzzy; 0478 } 0479 default: 0480 if (!isFuzzy(c)) { 0481 return FuzzyMatchResult::NotFuzzy; 0482 } 0483 if (isFuzzyBracket(c)) { 0484 m_bracketValidator.addBracket(c, isInserted); 0485 } 0486 return FuzzyMatchResult::Fuzzy; 0487 } 0488 } 0489 0490 bool validate() 0491 { 0492 return m_doubleQuoteValidator.validate() && m_bracketValidator.validate(); 0493 } 0494 0495 private: 0496 bool addIfCommentSequence(QChar a, QChar b, bool isInserted) 0497 { 0498 bool isOpening; 0499 if (a == QLatin1Char{'/'} && b == QLatin1Char{'*'}) { 0500 isOpening = true; 0501 } else if (a == QLatin1Char{'*'} && b == QLatin1Char{'/'}) { 0502 isOpening = false; 0503 } else { 0504 return false; 0505 } 0506 m_bracketValidator.addCommentSequence(isOpening, isInserted); 0507 return true; 0508 } 0509 0510 // NOTE: both array data members are indexed by isInserted. 0511 0512 const std::array<Range, 2> m_validRanges; 0513 /// Each element is the address of the second part of a C-style comment sequence, which was 0514 /// last added as a removed/inserted comment. So each element equals @c nullptr or points to '/' or '*'. 0515 std::array<const QChar*, 2> m_lastEndOfCommentAddresses{nullptr, nullptr}; 0516 0517 DoubleQuoteValidator m_doubleQuoteValidator; 0518 BracketValidator m_bracketValidator; 0519 }; 0520 0521 template<typename ForwardIt> 0522 void skipWhitespace(ForwardIt& first, ForwardIt last) 0523 { 0524 first = std::find_if_not(first, last, [](QChar c) { 0525 return c.isSpace(); 0526 }); 0527 } 0528 0529 template<typename ForwardIt> 0530 struct FindResult 0531 { 0532 bool found = false; ///< whether the needle has been found 0533 int fuzzyCount = 0; ///< the number of fuzzy characters skipped before @a location 0534 ForwardIt location; ///< the location of the needle or a non-whitespace non-fuzzy character, or the search range end 0535 }; 0536 0537 /** 0538 * Finds @p needle in [@p first, @p last). 0539 * 0540 * If a non-whitespace non-fuzzy character is encountered before @p needle, 0541 * sets the returned result's @a found to @c false and returns the said character's location. 0542 */ 0543 template<typename ForwardIt> 0544 FindResult<ForwardIt> findUntilNeitherFuzzyNorWhitespace(ForwardIt first, ForwardIt last, QChar needle) 0545 { 0546 FindResult<ForwardIt> result; 0547 for (; first != last; ++first) { 0548 if (*first == needle) { 0549 result.found = true; 0550 break; 0551 } 0552 if (first->isSpace()) { 0553 } else if (isFuzzy(*first)) { 0554 ++result.fuzzyCount; 0555 } else { 0556 break; 0557 } 0558 } 0559 result.location = first; 0560 return result; 0561 } 0562 0563 /** 0564 * Advances @p first until it points to a neither fuzzy nor whitespace character or until 0565 * @p fuzzyMatcher fails or until @p first becomes equal to @p last, whichever happens first. 0566 * 0567 * @pre @p first != @p last && @p !first->isSpace() 0568 * @post @p first == @p last if and only if only whitespace and fuzzy characters 0569 * have been encountered and @p fuzzyMatcher hasn't failed. 0570 * @return the last return value of FuzzyMatcher::add() 0571 */ 0572 template<typename ForwardIt> 0573 FuzzyMatchResult skipFuzzyAndWhitespace(ForwardIt& first, ForwardIt last, FuzzyMatcher& fuzzyMatcher, bool isInserted) 0574 { 0575 Q_ASSERT(first != last); 0576 Q_ASSERT(!first->isSpace()); 0577 do { 0578 const auto result = fuzzyMatcher.add(&*first, isInserted); 0579 if (result != FuzzyMatchResult::Fuzzy) { 0580 return result; 0581 } 0582 ++first; 0583 skipWhitespace(first, last); 0584 } while (first != last); 0585 return FuzzyMatchResult::Fuzzy; 0586 } 0587 0588 /** 0589 * Matches the given unformatted prefix against the given formatted text. 0590 * Skips whitespace. Skips fuzzy (defined by isFuzzy()) characters using the given @c FuzzyMatcher. 0591 */ 0592 template<typename ForwardIt> 0593 class PrefixMatcher 0594 { 0595 public: 0596 explicit PrefixMatcher(ForwardIt prefixFirst, ForwardIt prefixLast, ForwardIt textFirst, ForwardIt textLast, 0597 FuzzyMatcher& fuzzyMatcher) 0598 : m_prefixFirst{prefixFirst} 0599 , m_prefixLast{prefixLast} 0600 , m_textFirst{textFirst} 0601 , m_textLast{textLast} 0602 , m_fuzzyMatcher{fuzzyMatcher} 0603 { 0604 } 0605 0606 struct Result 0607 { 0608 /// @c true if the prefix has been matched successfully 0609 bool hasMatched; 0610 /// a text iterator that points to where the prefix match ends, or where a mismatch or match failure occurs 0611 ForwardIt matchEnd; 0612 }; 0613 0614 /// Call this destructive function once, because it transitions this object into an undefined state. 0615 Result match(bool isEndAContextTextBoundary = true) 0616 { 0617 bool characterMatchOccurred = false; 0618 auto lastPrefixCharacterMatchIt = m_prefixFirst; 0619 auto lastTextCharacterMatchIt = m_textFirst; 0620 for (;; ++m_textFirst, ++m_prefixFirst) { 0621 skipWhitespace(m_prefixFirst, m_prefixLast); 0622 if (m_prefixFirst == m_prefixLast) { 0623 setResult(HasMatched::Yes); 0624 break; 0625 } 0626 0627 skipWhitespace(m_textFirst, m_textLast); 0628 if (m_textFirst == m_textLast) { 0629 skipFuzzyAndWhitespace(m_prefixFirst, m_prefixLast, m_fuzzyMatcher, /*isInserted=*/false); 0630 if (m_prefixFirst == m_prefixLast) { 0631 setResult(HasMatched::Yes); 0632 } else { 0633 setUnmatchedPrefixCharacterResult(); 0634 } 0635 break; 0636 } 0637 0638 if (*m_prefixFirst != *m_textFirst && !skipToMatchingPositions()) { 0639 break; 0640 } 0641 0642 if (*m_prefixFirst == *m_textFirst) { 0643 if (!characterMatchOccurred) { 0644 characterMatchOccurred = true; 0645 m_fuzzyMatcher.firstNonWhitespaceCharacterMatch(); 0646 } 0647 lastPrefixCharacterMatchIt = m_prefixFirst; 0648 lastTextCharacterMatchIt = m_textFirst; 0649 } 0650 } 0651 0652 if (isEndAContextTextBoundary && m_result.hasMatched) { 0653 m_result.hasMatched = m_fuzzyMatcher.lastNonWhitespaceCharacterMatch(&*lastPrefixCharacterMatchIt, 0654 &*lastTextCharacterMatchIt); 0655 } 0656 0657 return m_result; 0658 } 0659 0660 private: 0661 /** 0662 * Skips fuzzy and whitespace characters in prefix and text until @a *m_prefixFirst == @a *m_textFirst 0663 * or until a fuzzy @a *m_textFirst replaces a fuzzy @a *m_prefixFirst using @a m_fuzzyMatcher. 0664 * 0665 * @return @c true in case of success; 0666 * @c false in case of no matching characters left (successful match), 0667 * unrecoverable mismatch or match failure. 0668 * @post @a m_result is set and ready to be returned if this function returns @c false. 0669 */ 0670 bool skipToMatchingPositions() 0671 { 0672 Q_ASSERT(m_prefixFirst != m_prefixLast); 0673 Q_ASSERT(!m_prefixFirst->isSpace()); 0674 0675 Q_ASSERT(m_textFirst != m_textLast); 0676 Q_ASSERT(!m_textFirst->isSpace()); 0677 0678 Q_ASSERT(*m_prefixFirst != *m_textFirst); 0679 0680 const bool prefixIsFuzzy = isFuzzy(*m_prefixFirst); 0681 const bool textIsFuzzy = isFuzzy(*m_textFirst); 0682 if (!prefixIsFuzzy && !textIsFuzzy) { 0683 setUnrecoverableMismatchResult(); 0684 return false; 0685 } 0686 if (prefixIsFuzzy != textIsFuzzy) { 0687 auto& first = prefixIsFuzzy ? m_prefixFirst : m_textFirst; 0688 const auto last = prefixIsFuzzy ? m_prefixLast : m_textLast; 0689 const auto result = skipFuzzyAndWhitespace(first, last, m_fuzzyMatcher, /*isInserted=*/textIsFuzzy); 0690 switch (result) { 0691 case FuzzyMatchResult::Fuzzy: 0692 Q_ASSERT(first == last); 0693 if (prefixIsFuzzy) { 0694 setResult(HasMatched::Yes); 0695 } else { 0696 setUnmatchedPrefixCharacterResult(); 0697 } 0698 return false; 0699 case FuzzyMatchResult::NotFuzzy: 0700 if (*m_prefixFirst == *m_textFirst) { 0701 return true; 0702 } 0703 setUnrecoverableMismatchResult(); 0704 return false; 0705 case FuzzyMatchResult::MatchingFailed: 0706 setResult(HasMatched::No); 0707 return false; 0708 } 0709 Q_UNREACHABLE(); 0710 } 0711 Q_ASSERT(prefixIsFuzzy && textIsFuzzy); 0712 return selectFuzzyInsertionRemovalOrReplacement(); 0713 } 0714 0715 /// Chooses between valid insertion and valid removal. 0716 bool shouldRemove(const FindResult<ForwardIt>& insertionResult, const FindResult<ForwardIt>& removalResult) const 0717 { 0718 Q_ASSERT(insertionResult.found); 0719 Q_ASSERT(removalResult.found); 0720 0721 // prefer inserting/removing fewer fuzzy characters 0722 if (removalResult.fuzzyCount != insertionResult.fuzzyCount) { 0723 return removalResult.fuzzyCount < insertionResult.fuzzyCount; 0724 } 0725 0726 // break the tie: prefer inserting/removing fewer whitespace characters 0727 const auto removalDistance = std::distance(m_prefixFirst, removalResult.location); 0728 const auto insertionDistance = std::distance(m_textFirst, insertionResult.location); 0729 if (removalDistance != insertionDistance) { 0730 return removalDistance < insertionDistance; 0731 } 0732 0733 // break the tie: consider an insertion (of e.g. a brace) more likely 0734 return false; 0735 } 0736 0737 /** 0738 * Implements the fuzzy prefix and fuzzy text characters case of skipToMatchingPositions() 0739 * by selecting the likeliest of the 4 possibilities: 0740 * 1) the formatter kept *m_prefixFirst but inserted fuzzy characters before it in text; 0741 * 2) the formatter kept *m_textFirst but removed fuzzy characters before it in prefix; 0742 * 3) the formatter replaced *m_prefixFirst with *m_textFirst; 0743 * 4) the formatter removed all (fuzzy and whitespace) characters in the range [m_prefixFirst, m_prefixLast). 0744 */ 0745 bool selectFuzzyInsertionRemovalOrReplacement() 0746 { 0747 Q_ASSERT(m_prefixFirst != m_prefixLast); 0748 Q_ASSERT(isFuzzy(*m_prefixFirst)); 0749 0750 Q_ASSERT(m_textFirst != m_textLast); 0751 Q_ASSERT(isFuzzy(*m_textFirst)); 0752 0753 Q_ASSERT(*m_prefixFirst != *m_textFirst); 0754 0755 auto insertionResult = findUntilNeitherFuzzyNorWhitespace(m_textFirst, m_textLast, *m_prefixFirst); 0756 auto removalResult = findUntilNeitherFuzzyNorWhitespace(m_prefixFirst, m_prefixLast, *m_textFirst); 0757 0758 if (removalResult.location == m_prefixLast) { 0759 Q_ASSERT(!removalResult.found); 0760 // The formatter could have removed all remaining characters in prefix (the 4th possibility). When 0761 // removalResult.found is true, this possibility is strictly worse than the one stored in removalResult 0762 // (the 2nd possibility). In this scope removalResult.found is false, therefore, the 2nd possibility is 0763 // unavailable and the 4th possibility is essentially stored in removalResult instead. 0764 0765 if (!insertionResult.found) { 0766 // The formatter must have removed *m_prefixFirst, because insertionResult.found is false, and so the 0767 // 1st possibility is unavailable. Choose among the (roughly) 3rd and the 4th possibilities like this: 0768 // 1. Skip fuzzy characters in prefix, starting from *m_prefixFirst. 0769 // 2. When only one fuzzy character absent from the range 0770 // [m_textFirst, insertionResult.location) remains in prefix (the loop condition), 0771 // leave insertionResult.found == false and thus select the 4th possibility. 0772 // 3. When the findUntilNeitherFuzzyNorWhitespace() call below finds a prefix's fuzzy 0773 // character in text, insertionResult.found is set to true and the code below chooses 0774 // between the 3rd and the 4th possibility (with an increased value of m_prefixFirst). 0775 0776 // skippedFuzzySet is a set of skipped fuzzy characters in prefix that are absent from 0777 // the range [m_textFirst, insertionResult.location). This set is used only for optimization: 0778 // to avoid searching for the same fuzzy character in text more than once. 0779 QVarLengthArray<QChar, 4> skippedFuzzySet{*m_prefixFirst}; 0780 while (--removalResult.fuzzyCount > 0) { 0781 const auto result = m_fuzzyMatcher.add(&*m_prefixFirst, /*isInserted=*/false); 0782 if (result == FuzzyMatchResult::MatchingFailed) { 0783 setResult(HasMatched::No); 0784 return false; 0785 } 0786 Q_ASSERT(result == FuzzyMatchResult::Fuzzy); 0787 ++m_prefixFirst; 0788 0789 skipWhitespace(m_prefixFirst, m_prefixLast); 0790 Q_ASSERT_X(m_prefixFirst != m_prefixLast, Q_FUNC_INFO, "Wrong value of removalResult.fuzzyCount?"); 0791 Q_ASSERT(isFuzzy(*m_prefixFirst)); 0792 0793 if (skippedFuzzySet.indexOf(*m_prefixFirst) != -1) { 0794 continue; // do not fruitlessly search for the same fuzzy character in text again 0795 } 0796 0797 insertionResult = findUntilNeitherFuzzyNorWhitespace(m_textFirst, m_textLast, *m_prefixFirst); 0798 if (insertionResult.found) { 0799 break; // let the code below choose between valid insertion and valid removal 0800 } 0801 skippedFuzzySet.push_back(*m_prefixFirst); 0802 } 0803 } 0804 0805 // Mark removalResult as valid in order to let the code below consider the 4th possibility stored in it. 0806 removalResult.found = true; 0807 } else if (!insertionResult.found && !removalResult.found) { 0808 // *m_textFirst replaces *m_prefixFirst 0809 bool isInserted = false; 0810 for (auto it : {m_prefixFirst, m_textFirst}) { 0811 const auto result = m_fuzzyMatcher.add(&*it, isInserted); 0812 if (result == FuzzyMatchResult::MatchingFailed) { 0813 setResult(HasMatched::No); 0814 return false; 0815 } 0816 Q_ASSERT(result == FuzzyMatchResult::Fuzzy); 0817 0818 isInserted = true; 0819 } 0820 return true; 0821 } 0822 0823 if (insertionResult.found && removalResult.found) { 0824 const bool remove = shouldRemove(insertionResult, removalResult); 0825 // select insertion or removal by disabling the losing choice 0826 (remove ? insertionResult : removalResult).found = false; 0827 } 0828 0829 bool isInserted; 0830 ForwardIt* first; // this is a pointer in order to automatically modify the referenced data member 0831 ForwardIt last; 0832 if (insertionResult.found) { 0833 Q_ASSERT(!removalResult.found); 0834 isInserted = true; 0835 first = &m_textFirst; 0836 last = insertionResult.location; 0837 } else { 0838 Q_ASSERT(removalResult.found); 0839 isInserted = false; 0840 first = &m_prefixFirst; 0841 last = removalResult.location; 0842 } 0843 0844 const auto result = skipFuzzyAndWhitespace(*first, last, m_fuzzyMatcher, isInserted); 0845 if (result == FuzzyMatchResult::MatchingFailed) { 0846 setResult(HasMatched::No); 0847 return false; 0848 } 0849 Q_ASSERT(result == FuzzyMatchResult::Fuzzy); 0850 Q_ASSERT(*first == last); 0851 0852 Q_ASSERT(m_textFirst != m_textLast); 0853 if (m_prefixFirst == m_prefixLast) { 0854 setResult(HasMatched::Yes); 0855 return false; 0856 } 0857 Q_ASSERT(*m_prefixFirst == *m_textFirst); 0858 return true; 0859 } 0860 0861 enum class HasMatched { No, Yes }; 0862 0863 void setResult(HasMatched hasMatched) 0864 { 0865 Q_ASSERT_X(hasMatched == HasMatched::No || m_prefixFirst == m_prefixLast, Q_FUNC_INFO, 0866 "The entire prefix must be examined before reporting a successful match."); 0867 0868 m_result.hasMatched = hasMatched == HasMatched::Yes; 0869 m_result.matchEnd = m_textFirst; 0870 } 0871 0872 void setUnmatchedPrefixCharacterResult() 0873 { 0874 Q_ASSERT(m_prefixFirst != m_prefixLast); 0875 Q_ASSERT(!m_prefixFirst->isSpace()); 0876 Q_ASSERT(!isFuzzy(*m_prefixFirst)); 0877 0878 Q_ASSERT(m_textFirst == m_textLast); 0879 0880 qCWarning(UTIL) << "giving up formatting because of a character in prefix not matched in text:" 0881 << *m_prefixFirst; 0882 setResult(HasMatched::No); 0883 } 0884 0885 void setUnrecoverableMismatchResult() 0886 { 0887 Q_ASSERT(m_prefixFirst != m_prefixLast); 0888 Q_ASSERT(!m_prefixFirst->isSpace()); 0889 Q_ASSERT(!isFuzzy(*m_prefixFirst)); 0890 0891 Q_ASSERT(m_textFirst != m_textLast); 0892 Q_ASSERT(!m_textFirst->isSpace()); 0893 Q_ASSERT(!isFuzzy(*m_textFirst)); 0894 0895 Q_ASSERT(*m_prefixFirst != *m_textFirst); 0896 0897 qCWarning(UTIL) << "giving up formatting because of unrecoverable character mismatch:" << *m_prefixFirst 0898 << "!=" << *m_textFirst; 0899 setResult(HasMatched::No); 0900 } 0901 0902 ForwardIt m_prefixFirst; 0903 const ForwardIt m_prefixLast; 0904 ForwardIt m_textFirst; 0905 const ForwardIt m_textLast; 0906 0907 FuzzyMatcher& m_fuzzyMatcher; 0908 0909 Result m_result{false, m_textFirst}; 0910 }; 0911 0912 /** 0913 * Matches the given unformatted text against the given formatted text exactly and validates the match. 0914 * Skips whitespace. Skips fuzzy (defined by isFuzzy()) characters using the given @p fuzzyMatcher. 0915 * @return whether the match was successful. 0916 */ 0917 bool matchFormattedText(const QString& text, QStringView formattedText, CompleteFuzzyMatcher&& fuzzyMatcher, 0918 bool isEndAContextTextBoundary) 0919 { 0920 // This intermediary view guarantees that iterators passed to PrefixMatcher() are of the same type. 0921 const QStringView textView = text; 0922 PrefixMatcher prefixMatcher(textView.cbegin(), textView.cend(), formattedText.cbegin(), formattedText.cend(), 0923 fuzzyMatcher); 0924 auto result = prefixMatcher.match(isEndAContextTextBoundary); 0925 if (!result.hasMatched) { 0926 return false; 0927 } 0928 0929 // Skip to verify that only whitespace and fuzzy characters remain in the formatted text after result.matchEnd. 0930 // Satisfy the preconditions of skipFuzzyAndWhitespace() by skipping whitespace 0931 // and checking whether the range is empty before calling it. 0932 skipWhitespace(result.matchEnd, formattedText.cend()); 0933 if (result.matchEnd != formattedText.cend()) { 0934 const auto fuzzyResult = 0935 skipFuzzyAndWhitespace(result.matchEnd, formattedText.cend(), fuzzyMatcher, /*isInserted=*/true); 0936 if (fuzzyResult == FuzzyMatchResult::MatchingFailed) { 0937 return false; 0938 } 0939 if (result.matchEnd != formattedText.cend()) { 0940 Q_ASSERT(fuzzyResult == FuzzyMatchResult::NotFuzzy); 0941 qCWarning(UTIL) << "giving up formatting because of a character in formatted text not matched in text:" 0942 << *result.matchEnd; 0943 return false; 0944 } 0945 Q_ASSERT(fuzzyResult == FuzzyMatchResult::Fuzzy); 0946 } 0947 0948 return fuzzyMatcher.validate(); 0949 } 0950 0951 QString reverse(QStringView str) 0952 { 0953 QString ret; 0954 ret.reserve(str.length()); 0955 for (int a = str.length() - 1; a >= 0; --a) 0956 ret.append(str[a]); 0957 0958 return ret; 0959 } 0960 0961 // Returns the text start position with all whitespace that is redundant in the given context skipped 0962 int skipRedundantWhiteSpace(QStringView context, QStringView text, int tabWidth) 0963 { 0964 if (context.isEmpty() || !context[context.size() - 1].isSpace() || text.isEmpty() || !text[0].isSpace()) 0965 return 0; 0966 0967 int textPosition = 0; 0968 0969 // Extract trailing whitespace in the context 0970 int contextPosition = context.size() - 1; 0971 while (contextPosition > 0 && context[contextPosition - 1].isSpace()) 0972 --contextPosition; 0973 0974 int textWhitespaceEnd = 0; 0975 while (textWhitespaceEnd < text.size() && text[textWhitespaceEnd].isSpace()) 0976 ++textWhitespaceEnd; 0977 0978 auto contextWhiteSpace = context.mid(contextPosition); 0979 auto textWhiteSpace = text.left(textWhitespaceEnd); 0980 0981 // Step 1: Remove redundant newlines 0982 while (contextWhiteSpace.contains(QLatin1Char('\n')) && textWhiteSpace.contains(QLatin1Char('\n'))) { 0983 int contextOffset = contextWhiteSpace.indexOf(QLatin1Char('\n')) + 1; 0984 int textOffset = textWhiteSpace.indexOf(QLatin1Char('\n')) + 1; 0985 0986 contextWhiteSpace = contextWhiteSpace.mid(contextOffset); 0987 0988 textPosition += textOffset; 0989 textWhiteSpace = textWhiteSpace.mid(textOffset); 0990 } 0991 0992 int contextOffset = 0; 0993 int textOffset = 0; 0994 // Skip redundant ordinary whitespace 0995 while (contextOffset < contextWhiteSpace.size() && textOffset < textWhiteSpace.size() && 0996 contextWhiteSpace[contextOffset].isSpace() && contextWhiteSpace[contextOffset] != QLatin1Char('\n') && 0997 textWhiteSpace[textOffset].isSpace() && textWhiteSpace[textOffset] != QLatin1Char('\n')) { 0998 bool contextWasTab = contextWhiteSpace[contextOffset] == QLatin1Char('\t'); 0999 bool textWasTab = textWhiteSpace[contextOffset] == QLatin1Char('\t'); 1000 ++contextOffset; 1001 ++textOffset; 1002 if (contextWasTab != textWasTab) { 1003 // Problem: We have a mismatch of tabs and/or ordinary whitespaces 1004 if (contextWasTab) { 1005 for (int s = 1; s < tabWidth; ++s) 1006 if (textOffset < textWhiteSpace.size() && textWhiteSpace[textOffset] == QLatin1Char(' ')) 1007 ++textOffset; 1008 } else if (textWasTab) { 1009 for (int s = 1; s < tabWidth; ++s) 1010 if (contextOffset < contextWhiteSpace.size() && 1011 contextWhiteSpace[contextOffset] == QLatin1Char(' ')) 1012 ++contextOffset; 1013 } 1014 } 1015 } 1016 textPosition += textOffset; 1017 1018 Q_ASSERT(textPosition >= 0); 1019 Q_ASSERT(textPosition <= text.size()); 1020 return textPosition; 1021 } 1022 1023 } // unnamed namespace 1024 1025 QString KDevelop::extractFormattedTextFromContext(const QString& _formattedMergedText, const QString& text, 1026 QStringView leftContext, QStringView rightContext, int tabWidth) 1027 { 1028 if (leftContext.isEmpty() && rightContext.isEmpty()) { 1029 return _formattedMergedText; // fast path, nothing to do 1030 } 1031 1032 QStringView formattedMergedText = _formattedMergedText; 1033 //Now remove "leftContext" and "rightContext" from the sides 1034 1035 // For double quotes, C-style comments and brackets of each type, ensure that inserted or removed opening characters 1036 // (or sequences) are matched by inserted or removed closing characters (or sequences) in the formatted version of 1037 // text. The matching closing entities should be encountered after the opening ones. 1038 // Tracking/matching only inserted and removed (ignoring untouched) fuzzy characters is important to prevent 1039 // issues with commented out and (conditionally) disabled by preprocessor code. The assumption is that formatters 1040 // do not break code and do not insert or remove unmatched quotes, comments or brackets into disabled code. 1041 1042 // For simplicity, only track double quotes (and not comments or brackets) while prefix-matching the two 1043 // contexts. Brackets opened in the left context can be closed in the right context. Such closing could be 1044 // validated, but is not, in order to avoid further complicating the implementation. Validating quotes, 1045 // comments and all bracket types while matching text against its formatted counterpart should be sufficient. 1046 1047 // Fuzzy characters inserted at context-text boundaries are always included into the formatted text fragment. 1048 // If this greedy inclusion produces a quote, comment or bracket mismatch, unformatted text is returned. 1049 // We could try to achieve a match by giving some of such inserted fuzzy characters to contexts. But that would 1050 // substantially complicate the implementation and likely worsen existing formatting. For example, a brace inserted 1051 // at a context-text boundary affects the surrounding whitespace. If we give the brace to the adjacent context, 1052 // it won't be part of the final version of formatted text, and therefore our attempts to compute whitespace to be 1053 // included into formatted text at the affected boundary would probably produce an unsatisfactory result. 1054 1055 const auto handleDoubleQuoteMatchFailure = [] { 1056 // The formatter inserted or removed a double quote in exactly one of two contexts, which means that it 1057 // inserted/removed the matching double quote into the formatted text. Therefore, reformatting only 1058 // the text fragment would produce an unpaired quote and break code. Give up formatting to prevent that. 1059 DoubleQuoteValidator::printFailureWarning(); 1060 }; 1061 1062 bool hasUnmatchedDoubleQuote = false; 1063 1064 if (!leftContext.isEmpty()) { 1065 DoubleQuoteFuzzyMatcher fuzzyMatcher; 1066 // Inform the matcher that the beginning of the left context is not a context-text boundary. 1067 fuzzyMatcher.firstNonWhitespaceCharacterMatch(); 1068 PrefixMatcher prefixMatcher(leftContext.cbegin(), leftContext.cend(), formattedMergedText.cbegin(), 1069 formattedMergedText.cend(), fuzzyMatcher); 1070 const auto result = prefixMatcher.match(); 1071 if (!result.hasMatched) { 1072 return text; 1073 } 1074 1075 hasUnmatchedDoubleQuote = fuzzyMatcher.hasUnmatchedDoubleQuote(); 1076 if (hasUnmatchedDoubleQuote && rightContext.isEmpty()) { 1077 handleDoubleQuoteMatchFailure(); 1078 return text; 1079 } 1080 1081 // include all possible whitespace at the context-text boundary 1082 auto rMatchEnd = std::make_reverse_iterator(result.matchEnd); 1083 skipWhitespace(rMatchEnd, formattedMergedText.crend()); 1084 // remove the left context from formattedMergedText 1085 formattedMergedText = formattedMergedText.right(rMatchEnd - formattedMergedText.crbegin()); 1086 1087 int skip = skipRedundantWhiteSpace(leftContext, formattedMergedText, tabWidth); 1088 formattedMergedText = formattedMergedText.mid(skip); 1089 } 1090 1091 if (!rightContext.isEmpty()) { 1092 DoubleQuoteFuzzyMatcher fuzzyMatcher; 1093 fuzzyMatcher.setReverseAddressDirection(); 1094 // Inform the matcher that the end of the right context is not a context-text boundary. 1095 fuzzyMatcher.firstNonWhitespaceCharacterMatch(); 1096 PrefixMatcher prefixMatcher(rightContext.crbegin(), rightContext.crend(), formattedMergedText.crbegin(), 1097 formattedMergedText.crend(), fuzzyMatcher); 1098 const auto result = prefixMatcher.match(); 1099 if (!result.hasMatched) { 1100 return text; 1101 } 1102 1103 if (fuzzyMatcher.hasUnmatchedDoubleQuote() != hasUnmatchedDoubleQuote) { 1104 handleDoubleQuoteMatchFailure(); 1105 return text; 1106 } 1107 1108 // include all possible whitespace at the context-text boundary 1109 auto matchEnd = result.matchEnd.base(); 1110 skipWhitespace(matchEnd, formattedMergedText.cend()); 1111 // remove the right context from formattedMergedText 1112 formattedMergedText.chop(formattedMergedText.cend() - matchEnd); 1113 1114 int skip = skipRedundantWhiteSpace(reverse(rightContext), reverse(formattedMergedText), tabWidth); 1115 formattedMergedText.chop(skip); 1116 } 1117 1118 CompleteFuzzyMatcher fuzzyMatcher({&*text.cbegin(), &*text.cend()}, 1119 {&*_formattedMergedText.cbegin(), &*_formattedMergedText.cend()}); 1120 if (leftContext.isEmpty()) { 1121 // Inform the matcher that the beginning of the text is not a context-text boundary. 1122 fuzzyMatcher.firstNonWhitespaceCharacterMatch(); 1123 } 1124 if (hasUnmatchedDoubleQuote) { 1125 // The formatter inserted or removed a double quote into each context. A double quote inserted into 1126 // or removed from text would be a matching closing or moved across context-text boundaries double quote. 1127 // Disable matching double quotes to treat two consecutive inserted or removed double quotes as 1128 // a match failure rather than the usual opening and closing double quotes, which they are not. 1129 fuzzyMatcher.disableMatchingDoubleQuotes(); 1130 } 1131 if (!matchFormattedText(text, formattedMergedText, std::move(fuzzyMatcher), 1132 /*isEndAContextTextBoundary =*/!rightContext.isEmpty())) { 1133 return text; 1134 } 1135 1136 return formattedMergedText.toString(); 1137 }