language/interfaces/abbreviations.cpp

0001 /*
0002     SPDX-FileCopyrightText: 2014 Sven Brauch <svenbrauch@gmail.com>
0003
0004     SPDX-License-Identifier: LGPL-2.0-or-later
0005 */
0006
0007 #include "abbreviations.h"
0008
0009 #include <QStringList>
0010 #include <util/path.h>
0011
0012 namespace KDevelop {
0013 // Taken and adapted for kdevelop from katecompletionmodel.cpp
0014 static bool matchesAbbreviationHelper(const QStringRef& word, const QString& typed,
0015                                       const QVarLengthArray<int, 32>& offsets,
0016                                       int& depth, int atWord = -1, int i = 0)
0017 {
0018     int atLetter = 1;
0019     for (; i < typed.size(); i++) {
0020         const QChar c = typed.at(i).toLower();
0021         bool haveNextWord = offsets.size() > atWord + 1;
0022         bool canCompare = atWord != -1 && word.size() > offsets.at(atWord) + atLetter;
0023         if (canCompare && c == word.at(offsets.at(atWord) + atLetter).toLower()) {
0024             // the typed letter matches a letter after the current word beginning
0025             if (!haveNextWord || c != word.at(offsets.at(atWord + 1)).toLower()) {
0026                 // good, simple case, no conflict
0027                 atLetter += 1;
0028                 continue;
0029             }
0030             // For maliciously crafted data, the code used here theoretically can have very high
0031             // complexity. Thus ensure we don't run into this case, by limiting the amount of branches
0032             // we walk through to 128.
0033             depth++;
0034             if (depth > 128) {
0035                 return false;
0036             }
0037             // the letter matches both the next word beginning and the next character in the word
0038             if (haveNextWord && matchesAbbreviationHelper(word, typed, offsets, depth, atWord + 1, i + 1)) {
0039                 // resolving the conflict by taking the next word's first character worked, fine
0040                 return true;
0041             }
0042             // otherwise, continue by taking the next letter in the current word.
0043             atLetter += 1;
0044             continue;
0045         } else if (haveNextWord && c == word.at(offsets.at(atWord + 1)).toLower()) {
0046             // the typed letter matches the next word beginning
0047             atWord++;
0048             atLetter = 1;
0049             continue;
0050         }
0051         // no match
0052         return false;
0053     }
0054
0055     // all characters of the typed word were matched
0056     return true;
0057 }
0058
0059 bool matchesAbbreviation(const QStringRef& word, const QString& typed)
0060 {
0061     // A mismatch is very likely for random even for the first letter,
0062     // thus this optimization makes sense.
0063     if (word.at(0).toLower() != typed.at(0).toLower()) {
0064         return false;
0065     }
0066
0067     // First, check if all letters are contained in the word in the right order.
0068     int atLetter = 0;
0069     for (const QChar c : typed) {
0070         while (c.toLower() != word.at(atLetter).toLower()) {
0071             atLetter += 1;
0072             if (atLetter >= word.size()) {
0073                 return false;
0074             }
0075         }
0076     }
0077
0078     bool haveUnderscore = true;
0079     QVarLengthArray<int, 32> offsets;
0080     // We want to make "KComplM" match "KateCompletionModel"; this means we need
0081     // to allow parts of the typed text to be not part of the actual abbreviation,
0082     // which consists only of the uppercased / underscored letters (so "KCM" in this case).
0083     // However it might be ambiguous whether a letter is part of such a word or part of
0084     // the following abbreviation, so we need to find all possible word offsets first,
0085     // then compare.
0086     for (int i = 0; i < word.size(); i++) {
0087         const QChar c = word.at(i);
0088         if (c == QLatin1Char('_') || c == QLatin1Char('-')) {
0089             haveUnderscore = true;
0090         } else if (haveUnderscore || c.isUpper()) {
0091             offsets.append(i);
0092             haveUnderscore = false;
0093         }
0094     }
0095
0096     int depth = 0;
0097     return matchesAbbreviationHelper(word, typed, offsets, depth);
0098 }
0099
0100 bool matchesPath(const QString& path, const QString& typed)
0101 {
0102     int consumed = 0;
0103     int pos = 0;
0104     // try to find all the characters in typed in the right order in the path;
0105     // jumps are allowed everywhere
0106     while (consumed < typed.size() && pos < path.size()) {
0107         if (typed.at(consumed).toLower() == path.at(pos).toLower()) {
0108             consumed++;
0109         }
0110         pos++;
0111     }
0112     return consumed == typed.size();
0113 }
0114
0115 bool matchesAbbreviationMulti(const QString& word, const QStringList& typedFragments)
0116 {
0117     if (word.size() == 0) {
0118         return true;
0119     }
0120     int lastSpace = 0;
0121     int matchedFragments = 0;
0122     for (int i = 0; i < word.size(); i++) {
0123         const QChar& c = word.at(i);
0124         bool isDoubleColon = false;
0125         // if it's not a separation char, walk over it.
0126         if (c != QLatin1Char(' ') && c != QLatin1Char('/') && i != word.size() - 1) {
0127             if (c != QLatin1Char(':') && i < word.size() - 1 && word.at(i + 1) != QLatin1Char(':')) {
0128                 continue;
0129             }
0130             isDoubleColon = true;
0131             i++;
0132         }
0133         // if it's '/', ' ' or '::', split the word here and check the next sub-word.
0134         const QStringRef wordFragment = word.midRef(lastSpace, i - lastSpace);
0135         const QString& typedFragment = typedFragments.at(matchedFragments);
0136         Q_ASSERT(!typedFragment.isEmpty());
0137         if (!wordFragment.isEmpty() && matchesAbbreviation(wordFragment, typedFragment)) {
0138             matchedFragments += 1;
0139             if (matchedFragments == typedFragments.size()) {
0140                 break;
0141             }
0142         }
0143         lastSpace = isDoubleColon ? i : i + 1;
0144     }
0145
0146     return matchedFragments == typedFragments.size();
0147 }
0148
0149 int matchPathFilter(const Path& toFilter, const QStringList& text, const Path& prefixPath)
0150 {
0151     enum PathFilterMatchQuality {
0152         NoMatch = -1,
0153         ExactMatch = 0,
0154         StartMatch = 1,
0155         OtherMatch = 2 // and anything higher than that
0156     };
0157     const QVector<QString>& segments = toFilter.segments();
0158
0159     if (text.count() > segments.count()) {
0160         // number of segments mismatches, thus item cannot match
0161         return NoMatch;
0162     }
0163
0164     bool allMatched = true;
0165     int searchIndex = text.size() - 1;
0166     int pathIndex = segments.size() - 1;
0167     int lastMatchIndex = -1;
0168     // stop early if more search fragments remain than available after path index
0169     while (pathIndex >= 0 && searchIndex >= 0
0170            && (pathIndex + text.size() - searchIndex - 1) < segments.size()) {
0171         const QString& segment = segments.at(pathIndex);
0172         const QString& typedSegment = text.at(searchIndex);
0173         const int matchIndex = segment.indexOf(typedSegment, 0, Qt::CaseInsensitive);
0174         const bool isLastPathSegment = pathIndex == segments.size() - 1;
0175         const bool isLastSearchSegment = searchIndex == text.size() - 1;
0176
0177         // check for exact matches
0178         allMatched &= matchIndex == 0 && segment.size() == typedSegment.size();
0179
0180         // check for fuzzy matches
0181         bool isMatch = matchIndex != -1;
0182         // do fuzzy path matching on the last segment
0183         if (!isMatch && isLastPathSegment && isLastSearchSegment) {
0184             isMatch = matchesPath(segment, typedSegment);
0185         } else if (!isMatch) { // check other segments for abbreviations
0186             isMatch = matchesAbbreviation(segment.midRef(0), typedSegment);
0187         }
0188
0189         if (!isMatch) {
0190             // no match, try with next path segment
0191             --pathIndex;
0192             continue;
0193         }
0194         // else we matched
0195         if (isLastPathSegment) {
0196             lastMatchIndex = matchIndex;
0197         }
0198         --searchIndex;
0199         --pathIndex;
0200     }
0201
0202     if (searchIndex != -1) {
0203         return NoMatch;
0204     }
0205
0206     const int segmentMatchDistance = segments.size() - (pathIndex + 1);
0207     const bool inPrefixPath = segmentMatchDistance > (segments.size() - prefixPath.segments().size())
0208                               && prefixPath.isParentOf(toFilter);
0209     // penalize matches that fall into the shared suffix
0210     const int penalty = (inPrefixPath) ? 1024 : 0;
0211
0212     if (allMatched && !inPrefixPath) {
0213         return ExactMatch + penalty;
0214     } else if (lastMatchIndex == 0) {
0215         // prefer matches whose last element starts with the filter
0216         return StartMatch + penalty;
0217     } else {
0218         // prefer matches closer to the end of the path
0219         return OtherMatch + segmentMatchDistance + penalty;
0220     }
0221 }
0222 } // namespace KDevelop