File indexing completed on 2024-04-28 05:34:16

0001 // SPDX-FileCopyrightText: 2014 Sven Brauch <svenbrauch@gmail.com>
0002 //
0003 // SPDX-License-Identifier: LGPL-2.1-or-later
0004 
0005 #include "abbreviations.h"
0006 
0007 #include <QVarLengthArray>
0008 
0009 namespace
0010 {
0011 constexpr const std::size_t offsetsSize = 32;
0012 constexpr const int maxDepth = 128;
0013 
0014 // Taken and adapted for kdevelop from katecompletionmodel.cpp
0015 #if QT_VERSION < QT_VERSION_CHECK(6, 0, 0)
0016 bool matchesAbbreviationHelper(const QStringRef &word,
0017                                const QStringRef &typed,
0018                                const QVarLengthArray<int, offsetsSize> &offsets,
0019                                int &depth,
0020                                int atWord = -1,
0021                                int i = 0)
0022 #else
0023 bool matchesAbbreviationHelper(const QStringView &word,
0024                                const QStringView &typed,
0025                                const QVarLengthArray<int, offsetsSize> &offsets,
0026                                int &depth,
0027                                int atWord = -1,
0028                                int i = 0)
0029 
0030 #endif
0031 {
0032     int atLetter = 1;
0033     for (; i < typed.size(); i++) {
0034         const QChar c = typed.at(i).toLower();
0035         bool haveNextWord = offsets.size() > atWord + 1;
0036         bool canCompare = atWord != -1 && word.size() > offsets.at(atWord) + atLetter;
0037         if (canCompare && c == word.at(offsets.at(atWord) + atLetter).toLower()) {
0038             // the typed letter matches a letter after the current word beginning
0039             if (!haveNextWord || c != word.at(offsets.at(atWord + 1)).toLower()) {
0040                 // good, simple case, no conflict
0041                 atLetter += 1;
0042                 continue;
0043             }
0044             // For maliciously crafted data, the code used here theoretically can have very high
0045             // complexity. Thus ensure we don't run into this case, by limiting the amount of branches
0046             // we walk through to 128.
0047             depth++;
0048             if (depth > maxDepth) {
0049                 return false;
0050             }
0051             // the letter matches both the next word beginning and the next character in the word
0052             if (haveNextWord && matchesAbbreviationHelper(word, typed, offsets, depth, atWord + 1, i + 1)) {
0053                 // resolving the conflict by taking the next word's first character worked, fine
0054                 return true;
0055             }
0056             // otherwise, continue by taking the next letter in the current word.
0057             atLetter += 1;
0058             continue;
0059         }
0060 
0061         if (haveNextWord && c == word.at(offsets.at(atWord + 1)).toLower()) {
0062             // the typed letter matches the next word beginning
0063             atWord++;
0064             atLetter = 1;
0065             continue;
0066         }
0067 
0068         // no match
0069         return false;
0070     }
0071     // all characters of the typed word were matched
0072     return true;
0073 }
0074 
0075 }
0076 #if QT_VERSION < QT_VERSION_CHECK(6, 0, 0)
0077 bool PlasmaPass::matchesAbbreviation(const QStringRef &word, const QStringRef &typed)
0078 #else
0079 bool PlasmaPass::matchesAbbreviation(const QStringView &word, const QStringView &typed)
0080 #endif
0081 {
0082     // A mismatch is very likely for random even for the first letter,
0083     // thus this optimization makes sense.
0084     if (word.at(0).toLower() != typed.at(0).toLower()) {
0085         return false;
0086     }
0087 
0088     // First, check if all letters are contained in the word in the right order.
0089     int atLetter = 0;
0090     for (const auto c : typed) {
0091         while (c.toLower() != word.at(atLetter).toLower()) {
0092             atLetter += 1;
0093             if (atLetter >= word.size()) {
0094                 return false;
0095             }
0096         }
0097     }
0098 
0099     bool haveUnderscore = true;
0100     QVarLengthArray<int, offsetsSize> offsets;
0101     // We want to make "KComplM" match "KateCompletionModel"; this means we need
0102     // to allow parts of the typed text to be not part of the actual abbreviation,
0103     // which consists only of the uppercased / underscored letters (so "KCM" in this case).
0104     // However it might be ambigous whether a letter is part of such a word or part of
0105     // the following abbreviation, so we need to find all possible word offsets first,
0106     // then compare.
0107     for (int i = 0; i < word.size(); ++i) {
0108         const QChar c = word.at(i);
0109         if (c == QLatin1Char('_') || c == QLatin1Char('-')) {
0110             haveUnderscore = true;
0111         } else if (haveUnderscore || c.isUpper()) {
0112             offsets.append(i);
0113             haveUnderscore = false;
0114         }
0115     }
0116     int depth = 0;
0117     return matchesAbbreviationHelper(word, typed, offsets, depth);
0118 }
0119 #if QT_VERSION < QT_VERSION_CHECK(6, 0, 0)
0120 bool PlasmaPass::matchesPath(const QStringRef &path, const QStringRef &typed)
0121 #else
0122 bool PlasmaPass::matchesPath(const QStringView &path, const QStringView &typed)
0123 #endif
0124 {
0125     int consumed = 0;
0126     int pos = 0;
0127     // try to find all the characters in typed in the right order in the path;
0128     // jumps are allowed everywhere
0129     while (consumed < typed.size() && pos < path.size()) {
0130         if (typed.at(consumed).toLower() == path.at(pos).toLower()) {
0131             consumed++;
0132         }
0133         pos++;
0134     }
0135     return consumed == typed.size();
0136 }
0137 #if QT_VERSION < QT_VERSION_CHECK(6, 0, 0)
0138 int PlasmaPass::matchPathFilter(const QVector<QStringRef> &toFilter, const QVector<QStringRef> &text)
0139 #else
0140 int PlasmaPass::matchPathFilter(const QVector<QStringView> &toFilter, const QVector<QStringView> &text)
0141 #endif
0142 {
0143     enum PathFilterMatchQuality {
0144         NoMatch = -1,
0145         ExactMatch = 0,
0146         StartMatch = 1,
0147         OtherMatch = 2, // and anything higher than that
0148     };
0149     const auto &segments = toFilter;
0150 
0151     if (text.count() > segments.count()) {
0152         // number of segments mismatches, thus item cannot match
0153         return NoMatch;
0154     }
0155 
0156     bool allMatched = true;
0157     int searchIndex = text.size() - 1;
0158     int pathIndex = segments.size() - 1;
0159     int lastMatchIndex = -1;
0160     // stop early if more search fragments remain than available after path index
0161     while (pathIndex >= 0 && searchIndex >= 0 && (pathIndex + text.size() - searchIndex - 1) < segments.size()) {
0162         const auto &segment = segments.at(pathIndex);
0163         const auto &typedSegment = text.at(searchIndex);
0164         const int matchIndex = segment.indexOf(typedSegment, 0, Qt::CaseInsensitive);
0165         const bool isLastPathSegment = pathIndex == segments.size() - 1;
0166         const bool isLastSearchSegment = searchIndex == text.size() - 1;
0167 
0168         // check for exact matches
0169         allMatched &= matchIndex == 0 && segment.size() == typedSegment.size();
0170 
0171         // check for fuzzy matches
0172         bool isMatch = matchIndex != -1;
0173         // do fuzzy path matching on the last segment
0174         if (!isMatch && isLastPathSegment && isLastSearchSegment) {
0175             isMatch = matchesPath(segment, typedSegment);
0176         } else if (!isMatch) { // check other segments for abbreviations
0177             isMatch = matchesAbbreviation(segment.mid(0), typedSegment);
0178         }
0179 
0180         if (!isMatch) {
0181             // no match, try with next path segment
0182             --pathIndex;
0183             continue;
0184         }
0185         // else we matched
0186         if (isLastPathSegment) {
0187             lastMatchIndex = matchIndex;
0188         }
0189         --searchIndex;
0190         --pathIndex;
0191     }
0192 
0193     if (searchIndex != -1) {
0194         return NoMatch;
0195     }
0196 
0197     const int segmentMatchDistance = segments.size() - (pathIndex + 1);
0198 
0199     if (allMatched) {
0200         return ExactMatch;
0201     }
0202 
0203     if (lastMatchIndex == 0) {
0204         // prefer matches whose last element starts with the filter
0205         return StartMatch;
0206     }
0207 
0208     // prefer matches closer to the end of the path
0209     return OtherMatch + segmentMatchDistance;
0210 }