kdevplatform/util/formattinghelpers.cpp

0001 /*
0002     SPDX-FileCopyrightText: 2011 David Nolden <david.nolden.kdevelop@art-master.de>
0003     SPDX-FileCopyrightText: 2023 Igor Kushnir <igorkuo@gmail.com>
0004
0005     SPDX-License-Identifier: GPL-2.0-or-later
0006 */
0007
0008 #include "formattinghelpers.h"
0009
0010 #include "debug.h"
0011
0012 #include <QChar>
0013 #include <QString>
0014 #include <QStringView>
0015 #include <QVarLengthArray>
0016
0017 #include <algorithm>
0018 #include <array>
0019 #include <cstdlib>
0020 #include <cstring>
0021 #include <iterator>
0022 #include <utility>
0023
0024 namespace {
0025
0026 bool asciiStringContains(const char* str, QChar c)
0027 {
0028     constexpr ushort maxAscii{127};
0029     return c.unicode() <= maxAscii && std::strchr(str, c.unicode());
0030 }
0031
0032 bool isFuzzy(QChar c)
0033 {
0034     return asciiStringContains("{}()\"/\\*", c);
0035 }
0036
0037 bool isFuzzyBracket(QChar c)
0038 {
0039     return asciiStringContains("{}()", c);
0040 }
0041
0042 class DoubleQuoteValidator
0043 {
0044 public:
0045     /**
0046      * Disable matching and clearing two consecutive inserted or removed double quotes.
0047      *
0048      * A double quote is closed by the next double quote by default.
0049      * Once this function is called, such a sequence is considered invalid and reported as a match failure.
0050      */
0051     void disableMatchingDoubleQuotes()
0052     {
0053         m_matchDoubleQuotes = false;
0054     }
0055
0056     /**
0057      * Insert or remove a double quote.
0058      * @return @c false if double quote matching fails.
0059      */
0060     bool addDoubleQuote(bool isInserted);
0061
0062     bool hasUnmatchedDoubleQuote() const
0063     {
0064         return m_unmatchedDoubleQuote;
0065     }
0066
0067     /**
0068      * Finish validation.
0069      * @return @c false if double quote matching fails.
0070      */
0071     bool validate()
0072     {
0073         if (m_unmatchedDoubleQuote) {
0074             printFailureWarning();
0075             return false;
0076         }
0077         return true;
0078     }
0079
0080     static void printFailureWarning()
0081     {
0082         qCWarning(UTIL) << "giving up formatting because the formatter inserted or removed "
0083                            "a pair of double quotes across context-text boundaries";
0084     }
0085
0086 private:
0087     bool m_matchDoubleQuotes = true;
0088     /// Whether a double quote has been inserted or removed and not yet matched.
0089     bool m_unmatchedDoubleQuote = false;
0090     /// Whether the unmatched double quote was inserted or removed.
0091     /// This data member has a meaning only if @a m_unmatchedDoubleQuote equals @c true.
0092     bool m_unmatchedDoubleQuoteWasInserted = false;
0093 };
0094
0095 bool DoubleQuoteValidator::addDoubleQuote(bool isInserted)
0096 {
0097     if (!m_unmatchedDoubleQuote) {
0098         m_unmatchedDoubleQuote = true;
0099         m_unmatchedDoubleQuoteWasInserted = isInserted;
0100         return true;
0101     }
0102
0103     if (m_matchDoubleQuotes || m_unmatchedDoubleQuoteWasInserted != isInserted) {
0104         // Either the two double quotes are matched and cleared, or one was inserted and another removed,
0105         // which unconditionally cancels them out (could be some code reordering by the formatter).
0106         m_unmatchedDoubleQuote = false;
0107         return true;
0108     }
0109
0110     // Fail because of two consecutive inserted or removed double quotes, matching which has been disabled.
0111     printFailureWarning();
0112     return false;
0113 }
0114
0115 /**
0116  * Tracks and validates inserted and removed, opening and closing brackets of a single type, e.g. '{' and '}'.
0117  */
0118 class BracketStack
0119 {
0120 public:
0121     /**
0122      * Insert or remove a bracket.
0123      */
0124     void addBracket(bool isOpening, bool isInserted)
0125     {
0126         const auto countIncrement = isInserted ? 1 : -1;
0127         if (!m_data.empty()) {
0128             auto& last = m_data.back();
0129             if (last.isOpening == isOpening) {
0130                 last.count += countIncrement;
0131                 if (last.count == 0) {
0132                     m_data.pop_back(); // remove the now empty sequence
0133                 }
0134                 return;
0135             }
0136         }
0137
0138         m_data.push_back({isOpening, countIncrement});
0139     }
0140
0141     /**
0142      * Validate brackets once parsing ends.
0143      * @return @c false if matching brackets fails.
0144      * @note Validation is destructive, that is, it transitions this object into an undefined state.
0145      */
0146     bool validate()
0147     {
0148         // Inserted opening brackets normally match inserted closing brackets. Same for removed brackets.
0149         // In addition, inserted opening brackets match removed opening brackets. Same for closing brackets.
0150         // An inserted-removed match can mean that an unformatted-formatted text matcher misinterpreted the formatter's
0151         // intention (other fuzzy characters could actually be inserted or removed near the untouched bracket).
0152         // Such a misinterpretation is not a problem at all thanks to the inserted-removed match.
0153
0154         // We move backwards, so closing brackets must be encountered before the matching open brackets.
0155         int closingBracketCount = 0;
0156         for (int i = m_data.size() - 1; i >= 0; --i) {
0157             auto p = m_data.at(i);
0158             Q_ASSERT(p.count != 0);
0159
0160             if (p.isOpening) {
0161                 if ((closingBracketCount > 0) != (p.count > 0)) {
0162                     // Insertion/removal mismatch of opening and closing brackets. When a closing bracket is
0163                     // inserted and an opening bracket is removed (or vice versa), they clearly cannot match.
0164                     return false;
0165                 }
0166                 if (std::abs(closingBracketCount) < std::abs(p.count)) {
0167                     return false; // not enough closing brackets to match all opening brackets
0168                 }
0169                 closingBracketCount -= p.count;
0170             } else {
0171                 closingBracketCount += p.count;
0172             }
0173         }
0174         return closingBracketCount == 0; // are all closing brackets matched by opening brackets?
0175     }
0176
0177 private:
0178     struct BracketSequence
0179     {
0180         bool isOpening : 1; ///< whether the brackets in this sequence are of the opening type
0181         int count : 31; ///< abs(count) is the number of consecutive inserted (count > 0) or removed (count < 0) brackets
0182     };
0183
0184     QVarLengthArray<BracketSequence, 64> m_data;
0185 };
0186
0187 class BracketValidator
0188 {
0189 public:
0190     /**
0191      * Insert or remove a fuzzy bracket.
0192      * @pre isFuzzyBracket(@p bracket)
0193      */
0194     void addBracket(QChar bracket, bool isInserted);
0195
0196     /// Insert or remove an opening "/*" or closing "*/" comment sequence.
0197     void addCommentSequence(bool isOpening, bool isInserted)
0198     {
0199         m_stacks[commentsIndex].addBracket(isOpening, isInserted);
0200     }
0201
0202     /**
0203      * Validate brackets and comments once parsing ends.
0204      * @return @c false if matching brackets or comments fails.
0205      * @note Validation is destructive, that is, it transitions this object into an undefined state.
0206      */
0207     bool validate()
0208     {
0209         for (std::size_t i = 0; i < m_stacks.size(); ++i) {
0210             if (!m_stacks[i].validate()) {
0211                 qCWarning(UTIL) << "giving up formatting because the formatter inserted or removed the pair"
0212                                 << m_stackStrings[i] << "across context-text boundaries";
0213                 return false;
0214             }
0215         }
0216         return true;
0217     }
0218
0219 private:
0220     // the following 3 constants are indices into m_stacks and m_stackStrings
0221     static constexpr std::size_t bracesIndex = 0; ///< { }
0222     static constexpr std::size_t parenthesesIndex = 1; ///< ( )
0223     static constexpr std::size_t commentsIndex = 2; ///< /* */
0224     /// human-readable identifications of fuzzy bracket types
0225     static constexpr std::array<const char*, 3> m_stackStrings = {"{ }", "( )", "/* */"};
0226
0227     std::array<BracketStack, 3> m_stacks; ///< stacks of fuzzy brackets of each type
0228 };
0229
0230 void BracketValidator::addBracket(QChar bracket, bool isInserted)
0231 {
0232     Q_ASSERT(isFuzzyBracket(bracket));
0233
0234     std::size_t index;
0235     bool isOpening;
0236     switch (bracket.unicode()) {
0237     case '{':
0238         index = bracesIndex;
0239         isOpening = true;
0240         break;
0241     case '}':
0242         index = bracesIndex;
0243         isOpening = false;
0244         break;
0245     case '(':
0246         index = parenthesesIndex;
0247         isOpening = true;
0248         break;
0249     case ')':
0250         index = parenthesesIndex;
0251         isOpening = false;
0252         break;
0253     default:
0254         Q_UNREACHABLE();
0255     }
0256
0257     m_stacks[index].addBracket(isOpening, isInserted);
0258 }
0259
0260 enum class FuzzyMatchResult { Fuzzy, NotFuzzy, MatchingFailed };
0261
0262 class FuzzyMatcher
0263 {
0264     Q_DISABLE_COPY_MOVE(FuzzyMatcher)
0265 public:
0266     virtual ~FuzzyMatcher() = default;
0267     /**
0268      * Inserts (if @p isInserted == @c true) or removes (otherwise) a single character at
0269      * the specified address @p characterLocation.
0270      *
0271      * Prints an explaining warning when @c FuzzyMatchResult::MatchingFailed is returned.
0272      *
0273      * @param characterLocation a valid character address into a complete string or view.
0274      *        The address is used to group consecutive characters '/' and '*' into a C-style comment.
0275      * @return the added character's classification (fuzzy or not) or @c MatchingFailed if a match failure occurs.
0276      */
0277     virtual FuzzyMatchResult add(const QChar* characterLocation, bool isInserted) = 0;
0278
0279     /**
0280      * Informs this matcher that the first exact match of a character in unformatted and formatted text just occurred.
0281      *
0282      * May be called twice. For example, the first call right after construction indicates that the ranges
0283      * do not begin at a context-text boundary, and thus the matcher should not fail if a character, which is
0284      * forbidden at a boundary, is removed or inserted before the second (regular) call when the exact match occurs.
0285      */
0286     virtual void firstNonWhitespaceCharacterMatch() = 0;
0287     /**
0288      * Specifies the locations in the two matched ranges of the last exact match of a character.
0289      *
0290      * Call this function if the ranges end at a context-text boundary.
0291      * The function must be called after all fuzzy characters, which precede
0292      * the last-match addresses in the iteration order, have been added.
0293      * Prints an explaining warning when @c false is returned.
0294      *
0295      * @param removalRangeMatchAddress the address of the last matching character (or the very first
0296      *                                 address if there were no matches) in the complete string or view,
0297      *                                 from which fuzzy characters can be removed.
0298      * @param insertionRangeMatchAddress the address of the last matching character (or the very first
0299      *                                   address if there were no matches) in the complete string or view,
0300      *                                   into which fuzzy characters can be inserted.
0301      * @return @c false if matching fails immediately.
0302      */
0303     virtual bool lastNonWhitespaceCharacterMatch(const QChar* removalRangeMatchAddress,
0304                                                  const QChar* insertionRangeMatchAddress) = 0;
0305
0306 protected:
0307     FuzzyMatcher() = default;
0308 };
0309
0310 /**
0311  * This class reports a match failure if a double quote is removed or inserted at the boundary between context and text.
0312  * That is, after a nonempty left context and before the first exact non-whitespace character match between
0313  * prefix and text, or after the last exact non-whitespace character match and before text or nonempty right context.
0314  *
0315  * Such a removal or insertion should cancel formatting, because whitespace within string literals is significant,
0316  * but the implementation of formatted text extraction is not sophisticated enough to distinguish it from regular
0317  * whitespace that is subject to formatting manipulations. So the combination of a double quote insertion/removal
0318  * by a formatter and an unlucky selection of a text fragment for formatting can cause whitespace changes within
0319  * a string literal. The purpose of this class is to reduce the probability of such a quiet breaking code change.
0320  */
0321 class BoundaryFuzzyMatcher : public FuzzyMatcher
0322 {
0323 public:
0324     /**
0325      * Indicates that higher addresses are encountered before lower addresses (reverse iteration).
0326      */
0327     void setReverseAddressDirection()
0328     {
0329         m_reverseAddressDirection = true;
0330     }
0331
0332     void firstNonWhitespaceCharacterMatch() override
0333     {
0334         m_allowDoubleQuoteChanges = true;
0335     }
0336
0337     bool lastNonWhitespaceCharacterMatch(const QChar* removalRangeMatchAddress,
0338                                          const QChar* insertionRangeMatchAddress) override
0339     {
0340         if (!validate(m_lastRemovedDoubleQuoteAddress, removalRangeMatchAddress)) {
0341             printFailureWarning(false);
0342             return false;
0343         }
0344         if (!validate(m_lastInsertedDoubleQuoteAddress, insertionRangeMatchAddress)) {
0345             printFailureWarning(true);
0346             return false;
0347         }
0348         m_allowDoubleQuoteChanges = false;
0349         return true;
0350     }
0351
0352 protected:
0353     /**
0354      * Derived classes must call this function each time a double quote is passed to add().
0355      * @return @c false if matching fails immediately.
0356      */
0357     bool addDoubleQuote(const QChar* location, bool isInserted)
0358     {
0359         Q_ASSERT(location);
0360         Q_ASSERT(*location == QLatin1Char{'"'});
0361         if (!m_allowDoubleQuoteChanges) {
0362             printFailureWarning(isInserted);
0363             return false;
0364         }
0365         (isInserted ? m_lastInsertedDoubleQuoteAddress : m_lastRemovedDoubleQuoteAddress) = location;
0366         return true;
0367     }
0368
0369 private:
0370     bool validate(const QChar* lastDoubleQuoteAddress, const QChar* lastMatchAddress) const
0371     {
0372         if (!lastDoubleQuoteAddress) {
0373             return true;
0374         }
0375         // lastDoubleQuoteAddress can equal lastMatchAddress in case there were no matches (lastMatchAddress
0376         // is the very first address, i.e. the beginning of the range, then), and the formatter happened to
0377         // remove or insert a double quote at this same address. In this case we correctly return false.
0378         return m_reverseAddressDirection ? lastDoubleQuoteAddress > lastMatchAddress
0379                                          : lastDoubleQuoteAddress < lastMatchAddress;
0380     }
0381
0382     static void printFailureWarning(bool isInserted)
0383     {
0384         qCWarning(UTIL) << "giving up formatting because the formatter" << (isInserted ? "inserted" : "removed")
0385                         << "a double quote at a context-text boundary";
0386     }
0387
0388     bool m_reverseAddressDirection = false;
0389     bool m_allowDoubleQuoteChanges = false;
0390     const QChar* m_lastRemovedDoubleQuoteAddress = nullptr;
0391     const QChar* m_lastInsertedDoubleQuoteAddress = nullptr;
0392 };
0393
0394 class DoubleQuoteFuzzyMatcher : public BoundaryFuzzyMatcher
0395 {
0396 public:
0397     FuzzyMatchResult add(const QChar* characterLocation, bool isInserted) override
0398     {
0399         Q_ASSERT(characterLocation);
0400         const auto c = *characterLocation;
0401         if (c == QLatin1Char{'"'}) {
0402             const bool valid = addDoubleQuote(characterLocation, isInserted) && m_validator.addDoubleQuote(isInserted);
0403             return valid ? FuzzyMatchResult::Fuzzy : FuzzyMatchResult::MatchingFailed;
0404         }
0405         return isFuzzy(c) ? FuzzyMatchResult::Fuzzy : FuzzyMatchResult::NotFuzzy;
0406     }
0407
0408     bool hasUnmatchedDoubleQuote() const
0409     {
0410         return m_validator.hasUnmatchedDoubleQuote();
0411     }
0412
0413 private:
0414     DoubleQuoteValidator m_validator;
0415 };
0416
0417 /**
0418  * A fuzzy matcher that tracks and validates double quotes, brackets and comments.
0419  *
0420  * Supports only direct iteration over a string [view], that is, each added insertion
0421  * address must be greater than all previous added insertion addresses. Same for removal addresses.
0422  */
0423 class CompleteFuzzyMatcher : public BoundaryFuzzyMatcher
0424 {
0425 public:
0426     struct Range
0427     {
0428         const QChar* first;
0429         const QChar* last;
0430     };
0431
0432     /**
0433      * @param removalRange the address range of the complete string or view,
0434      *                     from which fuzzy characters can be removed.
0435      * @param insertionRange the address range of the complete string or view,
0436      *                       into which fuzzy characters can be inserted.
0437      */
0438     explicit CompleteFuzzyMatcher(Range removalRange, Range insertionRange)
0439         : m_validRanges{removalRange, insertionRange}
0440     {
0441     }
0442
0443     void disableMatchingDoubleQuotes()
0444     {
0445         m_doubleQuoteValidator.disableMatchingDoubleQuotes();
0446     }
0447
0448     FuzzyMatchResult add(const QChar* characterLocation, bool isInserted) override
0449     {
0450         Q_ASSERT(characterLocation);
0451         const auto& validRange = m_validRanges[isInserted];
0452         Q_ASSERT(characterLocation >= validRange.first);
0453         Q_ASSERT(characterLocation < validRange.last);
0454
0455         const auto c = *characterLocation;
0456         switch (c.unicode()) {
0457         case '"': {
0458             const bool valid =
0459                 addDoubleQuote(characterLocation, isInserted) && m_doubleQuoteValidator.addDoubleQuote(isInserted);
0460             return valid ? FuzzyMatchResult::Fuzzy : FuzzyMatchResult::MatchingFailed;
0461         }
0462         case '/':
0463         case '*': {
0464             const auto*& prev = m_lastEndOfCommentAddresses[isInserted];
0465             Q_ASSERT(!prev || prev <= characterLocation);
0466             if (prev == characterLocation) {
0467                 // a comment sequence ending at characterLocation has been added already, do nothing else
0468             } else if (characterLocation > validRange.first && prev != characterLocation - 1
0469                        && addIfCommentSequence(*(characterLocation - 1), c, isInserted)) {
0470                 // NOTE: the (prev != characterLocation - 1) check above prevents
0471                 // finding two comment sequences in the string "/*/".
0472                 prev = characterLocation;
0473             } else if (characterLocation + 1 < validRange.last
0474                        && addIfCommentSequence(c, *(characterLocation + 1), isInserted)) {
0475                 prev = characterLocation + 1;
0476             }
0477             return FuzzyMatchResult::Fuzzy;
0478         }
0479         default:
0480             if (!isFuzzy(c)) {
0481                 return FuzzyMatchResult::NotFuzzy;
0482             }
0483             if (isFuzzyBracket(c)) {
0484                 m_bracketValidator.addBracket(c, isInserted);
0485             }
0486             return FuzzyMatchResult::Fuzzy;
0487         }
0488     }
0489
0490     bool validate()
0491     {
0492         return m_doubleQuoteValidator.validate() && m_bracketValidator.validate();
0493     }
0494
0495 private:
0496     bool addIfCommentSequence(QChar a, QChar b, bool isInserted)
0497     {
0498         bool isOpening;
0499         if (a == QLatin1Char{'/'} && b == QLatin1Char{'*'}) {
0500             isOpening = true;
0501         } else if (a == QLatin1Char{'*'} && b == QLatin1Char{'/'}) {
0502             isOpening = false;
0503         } else {
0504             return false;
0505         }
0506         m_bracketValidator.addCommentSequence(isOpening, isInserted);
0507         return true;
0508     }
0509
0510     // NOTE: both array data members are indexed by isInserted.
0511
0512     const std::array<Range, 2> m_validRanges;
0513     /// Each element is the address of the second part of a C-style comment sequence, which was
0514     /// last added as a removed/inserted comment. So each element equals @c nullptr or points to '/' or '*'.
0515     std::array<const QChar*, 2> m_lastEndOfCommentAddresses{nullptr, nullptr};
0516
0517     DoubleQuoteValidator m_doubleQuoteValidator;
0518     BracketValidator m_bracketValidator;
0519 };
0520
0521 template<typename ForwardIt>
0522 void skipWhitespace(ForwardIt& first, ForwardIt last)
0523 {
0524     first = std::find_if_not(first, last, [](QChar c) {
0525         return c.isSpace();
0526     });
0527 }
0528
0529 template<typename ForwardIt>
0530 struct FindResult
0531 {
0532     bool found = false; ///< whether the needle has been found
0533     int fuzzyCount = 0; ///< the number of fuzzy characters skipped before @a location
0534     ForwardIt location; ///< the location of the needle or a non-whitespace non-fuzzy character, or the search range end
0535 };
0536
0537 /**
0538  * Finds @p needle in [@p first, @p last).
0539  *
0540  * If a non-whitespace non-fuzzy character is encountered before @p needle,
0541  * sets the returned result's @a found to @c false and returns the said character's location.
0542  */
0543 template<typename ForwardIt>
0544 FindResult<ForwardIt> findUntilNeitherFuzzyNorWhitespace(ForwardIt first, ForwardIt last, QChar needle)
0545 {
0546     FindResult<ForwardIt> result;
0547     for (; first != last; ++first) {
0548         if (*first == needle) {
0549             result.found = true;
0550             break;
0551         }
0552         if (first->isSpace()) {
0553         } else if (isFuzzy(*first)) {
0554             ++result.fuzzyCount;
0555         } else {
0556             break;
0557         }
0558     }
0559     result.location = first;
0560     return result;
0561 }
0562
0563 /**
0564  * Advances @p first until it points to a neither fuzzy nor whitespace character or until
0565  * @p fuzzyMatcher fails or until @p first becomes equal to @p last, whichever happens first.
0566  *
0567  * @pre @p first != @p last && @p !first->isSpace()
0568  * @post @p first == @p last if and only if only whitespace and fuzzy characters
0569  *       have been encountered and @p fuzzyMatcher hasn't failed.
0570  * @return the last return value of FuzzyMatcher::add()
0571  */
0572 template<typename ForwardIt>
0573 FuzzyMatchResult skipFuzzyAndWhitespace(ForwardIt& first, ForwardIt last, FuzzyMatcher& fuzzyMatcher, bool isInserted)
0574 {
0575     Q_ASSERT(first != last);
0576     Q_ASSERT(!first->isSpace());
0577     do {
0578         const auto result = fuzzyMatcher.add(&*first, isInserted);
0579         if (result != FuzzyMatchResult::Fuzzy) {
0580             return result;
0581         }
0582         ++first;
0583         skipWhitespace(first, last);
0584     } while (first != last);
0585     return FuzzyMatchResult::Fuzzy;
0586 }
0587
0588 /**
0589  * Matches the given unformatted prefix against the given formatted text.
0590  * Skips whitespace. Skips fuzzy (defined by isFuzzy()) characters using the given @c FuzzyMatcher.
0591  */
0592 template<typename ForwardIt>
0593 class PrefixMatcher
0594 {
0595 public:
0596     explicit PrefixMatcher(ForwardIt prefixFirst, ForwardIt prefixLast, ForwardIt textFirst, ForwardIt textLast,
0597                            FuzzyMatcher& fuzzyMatcher)
0598         : m_prefixFirst{prefixFirst}
0599         , m_prefixLast{prefixLast}
0600         , m_textFirst{textFirst}
0601         , m_textLast{textLast}
0602         , m_fuzzyMatcher{fuzzyMatcher}
0603     {
0604     }
0605
0606     struct Result
0607     {
0608         /// @c true if the prefix has been matched successfully
0609         bool hasMatched;
0610         /// a text iterator that points to where the prefix match ends, or where a mismatch or match failure occurs
0611         ForwardIt matchEnd;
0612     };
0613
0614     /// Call this destructive function once, because it transitions this object into an undefined state.
0615     Result match(bool isEndAContextTextBoundary = true)
0616     {
0617         bool characterMatchOccurred = false;
0618         auto lastPrefixCharacterMatchIt = m_prefixFirst;
0619         auto lastTextCharacterMatchIt = m_textFirst;
0620         for (;; ++m_textFirst, ++m_prefixFirst) {
0621             skipWhitespace(m_prefixFirst, m_prefixLast);
0622             if (m_prefixFirst == m_prefixLast) {
0623                 setResult(HasMatched::Yes);
0624                 break;
0625             }
0626
0627             skipWhitespace(m_textFirst, m_textLast);
0628             if (m_textFirst == m_textLast) {
0629                 skipFuzzyAndWhitespace(m_prefixFirst, m_prefixLast, m_fuzzyMatcher, /*isInserted=*/false);
0630                 if (m_prefixFirst == m_prefixLast) {
0631                     setResult(HasMatched::Yes);
0632                 } else {
0633                     setUnmatchedPrefixCharacterResult();
0634                 }
0635                 break;
0636             }
0637
0638             if (*m_prefixFirst != *m_textFirst && !skipToMatchingPositions()) {
0639                 break;
0640             }
0641
0642             if (*m_prefixFirst == *m_textFirst) {
0643                 if (!characterMatchOccurred) {
0644                     characterMatchOccurred = true;
0645                     m_fuzzyMatcher.firstNonWhitespaceCharacterMatch();
0646                 }
0647                 lastPrefixCharacterMatchIt = m_prefixFirst;
0648                 lastTextCharacterMatchIt = m_textFirst;
0649             }
0650         }
0651
0652         if (isEndAContextTextBoundary && m_result.hasMatched) {
0653             m_result.hasMatched = m_fuzzyMatcher.lastNonWhitespaceCharacterMatch(&*lastPrefixCharacterMatchIt,
0654                                                                                  &*lastTextCharacterMatchIt);
0655         }
0656
0657         return m_result;
0658     }
0659
0660 private:
0661     /**
0662      * Skips fuzzy and whitespace characters in prefix and text until @a *m_prefixFirst == @a *m_textFirst
0663      * or until a fuzzy @a *m_textFirst replaces a fuzzy @a *m_prefixFirst using @a m_fuzzyMatcher.
0664      *
0665      * @return @c true in case of success;
0666      *         @c false in case of no matching characters left (successful match),
0667      *                  unrecoverable mismatch or match failure.
0668      * @post @a m_result is set and ready to be returned if this function returns @c false.
0669      */
0670     bool skipToMatchingPositions()
0671     {
0672         Q_ASSERT(m_prefixFirst != m_prefixLast);
0673         Q_ASSERT(!m_prefixFirst->isSpace());
0674
0675         Q_ASSERT(m_textFirst != m_textLast);
0676         Q_ASSERT(!m_textFirst->isSpace());
0677
0678         Q_ASSERT(*m_prefixFirst != *m_textFirst);
0679
0680         const bool prefixIsFuzzy = isFuzzy(*m_prefixFirst);
0681         const bool textIsFuzzy = isFuzzy(*m_textFirst);
0682         if (!prefixIsFuzzy && !textIsFuzzy) {
0683             setUnrecoverableMismatchResult();
0684             return false;
0685         }
0686         if (prefixIsFuzzy != textIsFuzzy) {
0687             auto& first = prefixIsFuzzy ? m_prefixFirst : m_textFirst;
0688             const auto last = prefixIsFuzzy ? m_prefixLast : m_textLast;
0689             const auto result = skipFuzzyAndWhitespace(first, last, m_fuzzyMatcher, /*isInserted=*/textIsFuzzy);
0690             switch (result) {
0691             case FuzzyMatchResult::Fuzzy:
0692                 Q_ASSERT(first == last);
0693                 if (prefixIsFuzzy) {
0694                     setResult(HasMatched::Yes);
0695                 } else {
0696                     setUnmatchedPrefixCharacterResult();
0697                 }
0698                 return false;
0699             case FuzzyMatchResult::NotFuzzy:
0700                 if (*m_prefixFirst == *m_textFirst) {
0701                     return true;
0702                 }
0703                 setUnrecoverableMismatchResult();
0704                 return false;
0705             case FuzzyMatchResult::MatchingFailed:
0706                 setResult(HasMatched::No);
0707                 return false;
0708             }
0709             Q_UNREACHABLE();
0710         }
0711         Q_ASSERT(prefixIsFuzzy && textIsFuzzy);
0712         return selectFuzzyInsertionRemovalOrReplacement();
0713     }
0714
0715     /// Chooses between valid insertion and valid removal.
0716     bool shouldRemove(const FindResult<ForwardIt>& insertionResult, const FindResult<ForwardIt>& removalResult) const
0717     {
0718         Q_ASSERT(insertionResult.found);
0719         Q_ASSERT(removalResult.found);
0720
0721         // prefer inserting/removing fewer fuzzy characters
0722         if (removalResult.fuzzyCount != insertionResult.fuzzyCount) {
0723             return removalResult.fuzzyCount < insertionResult.fuzzyCount;
0724         }
0725
0726         // break the tie: prefer inserting/removing fewer whitespace characters
0727         const auto removalDistance = std::distance(m_prefixFirst, removalResult.location);
0728         const auto insertionDistance = std::distance(m_textFirst, insertionResult.location);
0729         if (removalDistance != insertionDistance) {
0730             return removalDistance < insertionDistance;
0731         }
0732
0733         // break the tie: consider an insertion (of e.g. a brace) more likely
0734         return false;
0735     }
0736
0737     /**
0738      * Implements the fuzzy prefix and fuzzy text characters case of skipToMatchingPositions()
0739      * by selecting the likeliest of the 4 possibilities:
0740      * 1) the formatter kept *m_prefixFirst but inserted fuzzy characters before it in text;
0741      * 2) the formatter kept *m_textFirst but removed fuzzy characters before it in prefix;
0742      * 3) the formatter replaced *m_prefixFirst with *m_textFirst;
0743      * 4) the formatter removed all (fuzzy and whitespace) characters in the range [m_prefixFirst, m_prefixLast).
0744      */
0745     bool selectFuzzyInsertionRemovalOrReplacement()
0746     {
0747         Q_ASSERT(m_prefixFirst != m_prefixLast);
0748         Q_ASSERT(isFuzzy(*m_prefixFirst));
0749
0750         Q_ASSERT(m_textFirst != m_textLast);
0751         Q_ASSERT(isFuzzy(*m_textFirst));
0752
0753         Q_ASSERT(*m_prefixFirst != *m_textFirst);
0754
0755         auto insertionResult = findUntilNeitherFuzzyNorWhitespace(m_textFirst, m_textLast, *m_prefixFirst);
0756         auto removalResult = findUntilNeitherFuzzyNorWhitespace(m_prefixFirst, m_prefixLast, *m_textFirst);
0757
0758         if (removalResult.location == m_prefixLast) {
0759             Q_ASSERT(!removalResult.found);
0760             // The formatter could have removed all remaining characters in prefix (the 4th possibility). When
0761             // removalResult.found is true, this possibility is strictly worse than the one stored in removalResult
0762             // (the 2nd possibility). In this scope removalResult.found is false, therefore, the 2nd possibility is
0763             // unavailable and the 4th possibility is essentially stored in removalResult instead.
0764
0765             if (!insertionResult.found) {
0766                 // The formatter must have removed *m_prefixFirst, because insertionResult.found is false, and so the
0767                 // 1st possibility is unavailable. Choose among the (roughly) 3rd and the 4th possibilities like this:
0768                 // 1. Skip fuzzy characters in prefix, starting from *m_prefixFirst.
0769                 // 2. When only one fuzzy character absent from the range
0770                 //    [m_textFirst, insertionResult.location) remains in prefix (the loop condition),
0771                 //    leave insertionResult.found == false and thus select the 4th possibility.
0772                 // 3. When the findUntilNeitherFuzzyNorWhitespace() call below finds a prefix's fuzzy
0773                 //    character in text, insertionResult.found is set to true and the code below chooses
0774                 //    between the 3rd and the 4th possibility (with an increased value of m_prefixFirst).
0775
0776                 // skippedFuzzySet is a set of skipped fuzzy characters in prefix that are absent from
0777                 // the range [m_textFirst, insertionResult.location). This set is used only for optimization:
0778                 // to avoid searching for the same fuzzy character in text more than once.
0779                 QVarLengthArray<QChar, 4> skippedFuzzySet{*m_prefixFirst};
0780                 while (--removalResult.fuzzyCount > 0) {
0781                     const auto result = m_fuzzyMatcher.add(&*m_prefixFirst, /*isInserted=*/false);
0782                     if (result == FuzzyMatchResult::MatchingFailed) {
0783                         setResult(HasMatched::No);
0784                         return false;
0785                     }
0786                     Q_ASSERT(result == FuzzyMatchResult::Fuzzy);
0787                     ++m_prefixFirst;
0788
0789                     skipWhitespace(m_prefixFirst, m_prefixLast);
0790                     Q_ASSERT_X(m_prefixFirst != m_prefixLast, Q_FUNC_INFO, "Wrong value of removalResult.fuzzyCount?");
0791                     Q_ASSERT(isFuzzy(*m_prefixFirst));
0792
0793                     if (skippedFuzzySet.indexOf(*m_prefixFirst) != -1) {
0794                         continue; // do not fruitlessly search for the same fuzzy character in text again
0795                     }
0796
0797                     insertionResult = findUntilNeitherFuzzyNorWhitespace(m_textFirst, m_textLast, *m_prefixFirst);
0798                     if (insertionResult.found) {
0799                         break; // let the code below choose between valid insertion and valid removal
0800                     }
0801                     skippedFuzzySet.push_back(*m_prefixFirst);
0802                 }
0803             }
0804
0805             // Mark removalResult as valid in order to let the code below consider the 4th possibility stored in it.
0806             removalResult.found = true;
0807         } else if (!insertionResult.found && !removalResult.found) {
0808             // *m_textFirst replaces *m_prefixFirst
0809             bool isInserted = false;
0810             for (auto it : {m_prefixFirst, m_textFirst}) {
0811                 const auto result = m_fuzzyMatcher.add(&*it, isInserted);
0812                 if (result == FuzzyMatchResult::MatchingFailed) {
0813                     setResult(HasMatched::No);
0814                     return false;
0815                 }
0816                 Q_ASSERT(result == FuzzyMatchResult::Fuzzy);
0817
0818                 isInserted = true;
0819             }
0820             return true;
0821         }
0822
0823         if (insertionResult.found && removalResult.found) {
0824             const bool remove = shouldRemove(insertionResult, removalResult);
0825             // select insertion or removal by disabling the losing choice
0826             (remove ? insertionResult : removalResult).found = false;
0827         }
0828
0829         bool isInserted;
0830         ForwardIt* first; // this is a pointer in order to automatically modify the referenced data member
0831         ForwardIt last;
0832         if (insertionResult.found) {
0833             Q_ASSERT(!removalResult.found);
0834             isInserted = true;
0835             first = &m_textFirst;
0836             last = insertionResult.location;
0837         } else {
0838             Q_ASSERT(removalResult.found);
0839             isInserted = false;
0840             first = &m_prefixFirst;
0841             last = removalResult.location;
0842         }
0843
0844         const auto result = skipFuzzyAndWhitespace(*first, last, m_fuzzyMatcher, isInserted);
0845         if (result == FuzzyMatchResult::MatchingFailed) {
0846             setResult(HasMatched::No);
0847             return false;
0848         }
0849         Q_ASSERT(result == FuzzyMatchResult::Fuzzy);
0850         Q_ASSERT(*first == last);
0851
0852         Q_ASSERT(m_textFirst != m_textLast);
0853         if (m_prefixFirst == m_prefixLast) {
0854             setResult(HasMatched::Yes);
0855             return false;
0856         }
0857         Q_ASSERT(*m_prefixFirst == *m_textFirst);
0858         return true;
0859     }
0860
0861     enum class HasMatched { No, Yes };
0862
0863     void setResult(HasMatched hasMatched)
0864     {
0865         Q_ASSERT_X(hasMatched == HasMatched::No || m_prefixFirst == m_prefixLast, Q_FUNC_INFO,
0866                    "The entire prefix must be examined before reporting a successful match.");
0867
0868         m_result.hasMatched = hasMatched == HasMatched::Yes;
0869         m_result.matchEnd = m_textFirst;
0870     }
0871
0872     void setUnmatchedPrefixCharacterResult()
0873     {
0874         Q_ASSERT(m_prefixFirst != m_prefixLast);
0875         Q_ASSERT(!m_prefixFirst->isSpace());
0876         Q_ASSERT(!isFuzzy(*m_prefixFirst));
0877
0878         Q_ASSERT(m_textFirst == m_textLast);
0879
0880         qCWarning(UTIL) << "giving up formatting because of a character in prefix not matched in text:"
0881                         << *m_prefixFirst;
0882         setResult(HasMatched::No);
0883     }
0884
0885     void setUnrecoverableMismatchResult()
0886     {
0887         Q_ASSERT(m_prefixFirst != m_prefixLast);
0888         Q_ASSERT(!m_prefixFirst->isSpace());
0889         Q_ASSERT(!isFuzzy(*m_prefixFirst));
0890
0891         Q_ASSERT(m_textFirst != m_textLast);
0892         Q_ASSERT(!m_textFirst->isSpace());
0893         Q_ASSERT(!isFuzzy(*m_textFirst));
0894
0895         Q_ASSERT(*m_prefixFirst != *m_textFirst);
0896
0897         qCWarning(UTIL) << "giving up formatting because of unrecoverable character mismatch:" << *m_prefixFirst
0898                         << "!=" << *m_textFirst;
0899         setResult(HasMatched::No);
0900     }
0901
0902     ForwardIt m_prefixFirst;
0903     const ForwardIt m_prefixLast;
0904     ForwardIt m_textFirst;
0905     const ForwardIt m_textLast;
0906
0907     FuzzyMatcher& m_fuzzyMatcher;
0908
0909     Result m_result{false, m_textFirst};
0910 };
0911
0912 /**
0913  * Matches the given unformatted text against the given formatted text exactly and validates the match.
0914  * Skips whitespace. Skips fuzzy (defined by isFuzzy()) characters using the given @p fuzzyMatcher.
0915  * @return whether the match was successful.
0916  */
0917 bool matchFormattedText(const QString& text, QStringView formattedText, CompleteFuzzyMatcher&& fuzzyMatcher,
0918                         bool isEndAContextTextBoundary)
0919 {
0920     // This intermediary view guarantees that iterators passed to PrefixMatcher() are of the same type.
0921     const QStringView textView = text;
0922     PrefixMatcher prefixMatcher(textView.cbegin(), textView.cend(), formattedText.cbegin(), formattedText.cend(),
0923                                 fuzzyMatcher);
0924     auto result = prefixMatcher.match(isEndAContextTextBoundary);
0925     if (!result.hasMatched) {
0926         return false;
0927     }
0928
0929     // Skip to verify that only whitespace and fuzzy characters remain in the formatted text after result.matchEnd.
0930     // Satisfy the preconditions of skipFuzzyAndWhitespace() by skipping whitespace
0931     // and checking whether the range is empty before calling it.
0932     skipWhitespace(result.matchEnd, formattedText.cend());
0933     if (result.matchEnd != formattedText.cend()) {
0934         const auto fuzzyResult =
0935             skipFuzzyAndWhitespace(result.matchEnd, formattedText.cend(), fuzzyMatcher, /*isInserted=*/true);
0936         if (fuzzyResult == FuzzyMatchResult::MatchingFailed) {
0937             return false;
0938         }
0939         if (result.matchEnd != formattedText.cend()) {
0940             Q_ASSERT(fuzzyResult == FuzzyMatchResult::NotFuzzy);
0941             qCWarning(UTIL) << "giving up formatting because of a character in formatted text not matched in text:"
0942                             << *result.matchEnd;
0943             return false;
0944         }
0945         Q_ASSERT(fuzzyResult == FuzzyMatchResult::Fuzzy);
0946     }
0947
0948     return fuzzyMatcher.validate();
0949 }
0950
0951 QString reverse(QStringView str)
0952 {
0953     QString ret;
0954     ret.reserve(str.length());
0955     for (int a = str.length() - 1; a >= 0; --a)
0956         ret.append(str[a]);
0957
0958     return ret;
0959 }
0960
0961 // Returns the text start position with all whitespace that is redundant in the given context skipped
0962 int skipRedundantWhiteSpace(QStringView context, QStringView text, int tabWidth)
0963 {
0964     if (context.isEmpty() || !context[context.size() - 1].isSpace() || text.isEmpty() || !text[0].isSpace())
0965         return 0;
0966
0967     int textPosition = 0;
0968
0969     // Extract trailing whitespace in the context
0970     int contextPosition = context.size() - 1;
0971     while (contextPosition > 0 && context[contextPosition - 1].isSpace())
0972         --contextPosition;
0973
0974     int textWhitespaceEnd = 0;
0975     while (textWhitespaceEnd < text.size() && text[textWhitespaceEnd].isSpace())
0976         ++textWhitespaceEnd;
0977
0978     auto contextWhiteSpace = context.mid(contextPosition);
0979     auto textWhiteSpace = text.left(textWhitespaceEnd);
0980
0981     // Step 1: Remove redundant newlines
0982     while (contextWhiteSpace.contains(QLatin1Char('\n')) && textWhiteSpace.contains(QLatin1Char('\n'))) {
0983         int contextOffset = contextWhiteSpace.indexOf(QLatin1Char('\n')) + 1;
0984         int textOffset = textWhiteSpace.indexOf(QLatin1Char('\n')) + 1;
0985
0986         contextWhiteSpace = contextWhiteSpace.mid(contextOffset);
0987
0988         textPosition += textOffset;
0989         textWhiteSpace = textWhiteSpace.mid(textOffset);
0990     }
0991
0992     int contextOffset = 0;
0993     int textOffset = 0;
0994     // Skip redundant ordinary whitespace
0995     while (contextOffset < contextWhiteSpace.size() && textOffset < textWhiteSpace.size() &&
0996            contextWhiteSpace[contextOffset].isSpace() && contextWhiteSpace[contextOffset] != QLatin1Char('\n') &&
0997            textWhiteSpace[textOffset].isSpace() && textWhiteSpace[textOffset] != QLatin1Char('\n')) {
0998         bool contextWasTab = contextWhiteSpace[contextOffset] == QLatin1Char('\t');
0999         bool textWasTab = textWhiteSpace[contextOffset] == QLatin1Char('\t');
1000         ++contextOffset;
1001         ++textOffset;
1002         if (contextWasTab != textWasTab) {
1003             // Problem: We have a mismatch of tabs and/or ordinary whitespaces
1004             if (contextWasTab) {
1005                 for (int s = 1; s < tabWidth; ++s)
1006                     if (textOffset < textWhiteSpace.size() && textWhiteSpace[textOffset] == QLatin1Char(' '))
1007                         ++textOffset;
1008             } else if (textWasTab) {
1009                 for (int s = 1; s < tabWidth; ++s)
1010                     if (contextOffset < contextWhiteSpace.size() &&
1011                         contextWhiteSpace[contextOffset] == QLatin1Char(' '))
1012                         ++contextOffset;
1013             }
1014         }
1015     }
1016     textPosition += textOffset;
1017
1018     Q_ASSERT(textPosition >= 0);
1019     Q_ASSERT(textPosition <= text.size());
1020     return textPosition;
1021 }
1022
1023 } // unnamed namespace
1024
1025 QString KDevelop::extractFormattedTextFromContext(const QString& _formattedMergedText, const QString& text,
1026                                                   QStringView leftContext, QStringView rightContext, int tabWidth)
1027 {
1028     if (leftContext.isEmpty() && rightContext.isEmpty()) {
1029         return _formattedMergedText; // fast path, nothing to do
1030     }
1031
1032     QStringView formattedMergedText = _formattedMergedText;
1033     //Now remove "leftContext" and "rightContext" from the sides
1034
1035     // For double quotes, C-style comments and brackets of each type, ensure that inserted or removed opening characters
1036     // (or sequences) are matched by inserted or removed closing characters (or sequences) in the formatted version of
1037     // text. The matching closing entities should be encountered after the opening ones.
1038     // Tracking/matching only inserted and removed (ignoring untouched) fuzzy characters is important to prevent
1039     // issues with commented out and (conditionally) disabled by preprocessor code. The assumption is that formatters
1040     // do not break code and do not insert or remove unmatched quotes, comments or brackets into disabled code.
1041
1042     // For simplicity, only track double quotes (and not comments or brackets) while prefix-matching the two
1043     // contexts. Brackets opened in the left context can be closed in the right context. Such closing could be
1044     // validated, but is not, in order to avoid further complicating the implementation. Validating quotes,
1045     // comments and all bracket types while matching text against its formatted counterpart should be sufficient.
1046
1047     // Fuzzy characters inserted at context-text boundaries are always included into the formatted text fragment.
1048     // If this greedy inclusion produces a quote, comment or bracket mismatch, unformatted text is returned.
1049     // We could try to achieve a match by giving some of such inserted fuzzy characters to contexts. But that would
1050     // substantially complicate the implementation and likely worsen existing formatting. For example, a brace inserted
1051     // at a context-text boundary affects the surrounding whitespace. If we give the brace to the adjacent context,
1052     // it won't be part of the final version of formatted text, and therefore our attempts to compute whitespace to be
1053     // included into formatted text at the affected boundary would probably produce an unsatisfactory result.
1054
1055     const auto handleDoubleQuoteMatchFailure = [] {
1056         // The formatter inserted or removed a double quote in exactly one of two contexts, which means that it
1057         // inserted/removed the matching double quote into the formatted text. Therefore, reformatting only
1058         // the text fragment would produce an unpaired quote and break code. Give up formatting to prevent that.
1059         DoubleQuoteValidator::printFailureWarning();
1060     };
1061
1062     bool hasUnmatchedDoubleQuote = false;
1063
1064     if (!leftContext.isEmpty()) {
1065         DoubleQuoteFuzzyMatcher fuzzyMatcher;
1066         // Inform the matcher that the beginning of the left context is not a context-text boundary.
1067         fuzzyMatcher.firstNonWhitespaceCharacterMatch();
1068         PrefixMatcher prefixMatcher(leftContext.cbegin(), leftContext.cend(), formattedMergedText.cbegin(),
1069                                     formattedMergedText.cend(), fuzzyMatcher);
1070         const auto result = prefixMatcher.match();
1071         if (!result.hasMatched) {
1072             return text;
1073         }
1074
1075         hasUnmatchedDoubleQuote = fuzzyMatcher.hasUnmatchedDoubleQuote();
1076         if (hasUnmatchedDoubleQuote && rightContext.isEmpty()) {
1077             handleDoubleQuoteMatchFailure();
1078             return text;
1079         }
1080
1081         // include all possible whitespace at the context-text boundary
1082         auto rMatchEnd = std::make_reverse_iterator(result.matchEnd);
1083         skipWhitespace(rMatchEnd, formattedMergedText.crend());
1084         // remove the left context from formattedMergedText
1085         formattedMergedText = formattedMergedText.right(rMatchEnd - formattedMergedText.crbegin());
1086
1087         int skip = skipRedundantWhiteSpace(leftContext, formattedMergedText, tabWidth);
1088         formattedMergedText = formattedMergedText.mid(skip);
1089     }
1090
1091     if (!rightContext.isEmpty()) {
1092         DoubleQuoteFuzzyMatcher fuzzyMatcher;
1093         fuzzyMatcher.setReverseAddressDirection();
1094         // Inform the matcher that the end of the right context is not a context-text boundary.
1095         fuzzyMatcher.firstNonWhitespaceCharacterMatch();
1096         PrefixMatcher prefixMatcher(rightContext.crbegin(), rightContext.crend(), formattedMergedText.crbegin(),
1097                                     formattedMergedText.crend(), fuzzyMatcher);
1098         const auto result = prefixMatcher.match();
1099         if (!result.hasMatched) {
1100             return text;
1101         }
1102
1103         if (fuzzyMatcher.hasUnmatchedDoubleQuote() != hasUnmatchedDoubleQuote) {
1104             handleDoubleQuoteMatchFailure();
1105             return text;
1106         }
1107
1108         // include all possible whitespace at the context-text boundary
1109         auto matchEnd = result.matchEnd.base();
1110         skipWhitespace(matchEnd, formattedMergedText.cend());
1111         // remove the right context from formattedMergedText
1112         formattedMergedText.chop(formattedMergedText.cend() - matchEnd);
1113
1114         int skip = skipRedundantWhiteSpace(reverse(rightContext), reverse(formattedMergedText), tabWidth);
1115         formattedMergedText.chop(skip);
1116     }
1117
1118     CompleteFuzzyMatcher fuzzyMatcher({&*text.cbegin(), &*text.cend()},
1119                                       {&*_formattedMergedText.cbegin(), &*_formattedMergedText.cend()});
1120     if (leftContext.isEmpty()) {
1121         // Inform the matcher that the beginning of the text is not a context-text boundary.
1122         fuzzyMatcher.firstNonWhitespaceCharacterMatch();
1123     }
1124     if (hasUnmatchedDoubleQuote) {
1125         // The formatter inserted or removed a double quote into each context. A double quote inserted into
1126         // or removed from text would be a matching closing or moved across context-text boundaries double quote.
1127         // Disable matching double quotes to treat two consecutive inserted or removed double quotes as
1128         // a match failure rather than the usual opening and closing double quotes, which they are not.
1129         fuzzyMatcher.disableMatchingDoubleQuotes();
1130     }
1131     if (!matchFormattedText(text, formattedMergedText, std::move(fuzzyMatcher),
1132                             /*isEndAContextTextBoundary =*/!rightContext.isEmpty())) {
1133         return text;
1134     }
1135
1136     return formattedMergedText.toString();
1137 }