File indexing completed on 2024-05-12 04:38:07
0001 /* 0002 SPDX-FileCopyrightText: 2014 Sven Brauch <svenbrauch@gmail.com> 0003 0004 SPDX-License-Identifier: LGPL-2.0-or-later 0005 */ 0006 0007 #include "abbreviations.h" 0008 0009 #include <QStringList> 0010 #include <util/path.h> 0011 0012 namespace KDevelop { 0013 // Taken and adapted for kdevelop from katecompletionmodel.cpp 0014 static bool matchesAbbreviationHelper(const QStringRef& word, const QString& typed, 0015 const QVarLengthArray<int, 32>& offsets, 0016 int& depth, int atWord = -1, int i = 0) 0017 { 0018 int atLetter = 1; 0019 for (; i < typed.size(); i++) { 0020 const QChar c = typed.at(i).toLower(); 0021 bool haveNextWord = offsets.size() > atWord + 1; 0022 bool canCompare = atWord != -1 && word.size() > offsets.at(atWord) + atLetter; 0023 if (canCompare && c == word.at(offsets.at(atWord) + atLetter).toLower()) { 0024 // the typed letter matches a letter after the current word beginning 0025 if (!haveNextWord || c != word.at(offsets.at(atWord + 1)).toLower()) { 0026 // good, simple case, no conflict 0027 atLetter += 1; 0028 continue; 0029 } 0030 // For maliciously crafted data, the code used here theoretically can have very high 0031 // complexity. Thus ensure we don't run into this case, by limiting the amount of branches 0032 // we walk through to 128. 0033 depth++; 0034 if (depth > 128) { 0035 return false; 0036 } 0037 // the letter matches both the next word beginning and the next character in the word 0038 if (haveNextWord && matchesAbbreviationHelper(word, typed, offsets, depth, atWord + 1, i + 1)) { 0039 // resolving the conflict by taking the next word's first character worked, fine 0040 return true; 0041 } 0042 // otherwise, continue by taking the next letter in the current word. 0043 atLetter += 1; 0044 continue; 0045 } else if (haveNextWord && c == word.at(offsets.at(atWord + 1)).toLower()) { 0046 // the typed letter matches the next word beginning 0047 atWord++; 0048 atLetter = 1; 0049 continue; 0050 } 0051 // no match 0052 return false; 0053 } 0054 0055 // all characters of the typed word were matched 0056 return true; 0057 } 0058 0059 bool matchesAbbreviation(const QStringRef& word, const QString& typed) 0060 { 0061 // A mismatch is very likely for random even for the first letter, 0062 // thus this optimization makes sense. 0063 if (word.at(0).toLower() != typed.at(0).toLower()) { 0064 return false; 0065 } 0066 0067 // First, check if all letters are contained in the word in the right order. 0068 int atLetter = 0; 0069 for (const QChar c : typed) { 0070 while (c.toLower() != word.at(atLetter).toLower()) { 0071 atLetter += 1; 0072 if (atLetter >= word.size()) { 0073 return false; 0074 } 0075 } 0076 } 0077 0078 bool haveUnderscore = true; 0079 QVarLengthArray<int, 32> offsets; 0080 // We want to make "KComplM" match "KateCompletionModel"; this means we need 0081 // to allow parts of the typed text to be not part of the actual abbreviation, 0082 // which consists only of the uppercased / underscored letters (so "KCM" in this case). 0083 // However it might be ambiguous whether a letter is part of such a word or part of 0084 // the following abbreviation, so we need to find all possible word offsets first, 0085 // then compare. 0086 for (int i = 0; i < word.size(); i++) { 0087 const QChar c = word.at(i); 0088 if (c == QLatin1Char('_') || c == QLatin1Char('-')) { 0089 haveUnderscore = true; 0090 } else if (haveUnderscore || c.isUpper()) { 0091 offsets.append(i); 0092 haveUnderscore = false; 0093 } 0094 } 0095 0096 int depth = 0; 0097 return matchesAbbreviationHelper(word, typed, offsets, depth); 0098 } 0099 0100 bool matchesPath(const QString& path, const QString& typed) 0101 { 0102 int consumed = 0; 0103 int pos = 0; 0104 // try to find all the characters in typed in the right order in the path; 0105 // jumps are allowed everywhere 0106 while (consumed < typed.size() && pos < path.size()) { 0107 if (typed.at(consumed).toLower() == path.at(pos).toLower()) { 0108 consumed++; 0109 } 0110 pos++; 0111 } 0112 return consumed == typed.size(); 0113 } 0114 0115 bool matchesAbbreviationMulti(const QString& word, const QStringList& typedFragments) 0116 { 0117 if (word.size() == 0) { 0118 return true; 0119 } 0120 int lastSpace = 0; 0121 int matchedFragments = 0; 0122 for (int i = 0; i < word.size(); i++) { 0123 const QChar& c = word.at(i); 0124 bool isDoubleColon = false; 0125 // if it's not a separation char, walk over it. 0126 if (c != QLatin1Char(' ') && c != QLatin1Char('/') && i != word.size() - 1) { 0127 if (c != QLatin1Char(':') && i < word.size() - 1 && word.at(i + 1) != QLatin1Char(':')) { 0128 continue; 0129 } 0130 isDoubleColon = true; 0131 i++; 0132 } 0133 // if it's '/', ' ' or '::', split the word here and check the next sub-word. 0134 const QStringRef wordFragment = word.midRef(lastSpace, i - lastSpace); 0135 const QString& typedFragment = typedFragments.at(matchedFragments); 0136 Q_ASSERT(!typedFragment.isEmpty()); 0137 if (!wordFragment.isEmpty() && matchesAbbreviation(wordFragment, typedFragment)) { 0138 matchedFragments += 1; 0139 if (matchedFragments == typedFragments.size()) { 0140 break; 0141 } 0142 } 0143 lastSpace = isDoubleColon ? i : i + 1; 0144 } 0145 0146 return matchedFragments == typedFragments.size(); 0147 } 0148 0149 int matchPathFilter(const Path& toFilter, const QStringList& text, const Path& prefixPath) 0150 { 0151 enum PathFilterMatchQuality { 0152 NoMatch = -1, 0153 ExactMatch = 0, 0154 StartMatch = 1, 0155 OtherMatch = 2 // and anything higher than that 0156 }; 0157 const QVector<QString>& segments = toFilter.segments(); 0158 0159 if (text.count() > segments.count()) { 0160 // number of segments mismatches, thus item cannot match 0161 return NoMatch; 0162 } 0163 0164 bool allMatched = true; 0165 int searchIndex = text.size() - 1; 0166 int pathIndex = segments.size() - 1; 0167 int lastMatchIndex = -1; 0168 // stop early if more search fragments remain than available after path index 0169 while (pathIndex >= 0 && searchIndex >= 0 0170 && (pathIndex + text.size() - searchIndex - 1) < segments.size()) { 0171 const QString& segment = segments.at(pathIndex); 0172 const QString& typedSegment = text.at(searchIndex); 0173 const int matchIndex = segment.indexOf(typedSegment, 0, Qt::CaseInsensitive); 0174 const bool isLastPathSegment = pathIndex == segments.size() - 1; 0175 const bool isLastSearchSegment = searchIndex == text.size() - 1; 0176 0177 // check for exact matches 0178 allMatched &= matchIndex == 0 && segment.size() == typedSegment.size(); 0179 0180 // check for fuzzy matches 0181 bool isMatch = matchIndex != -1; 0182 // do fuzzy path matching on the last segment 0183 if (!isMatch && isLastPathSegment && isLastSearchSegment) { 0184 isMatch = matchesPath(segment, typedSegment); 0185 } else if (!isMatch) { // check other segments for abbreviations 0186 isMatch = matchesAbbreviation(segment.midRef(0), typedSegment); 0187 } 0188 0189 if (!isMatch) { 0190 // no match, try with next path segment 0191 --pathIndex; 0192 continue; 0193 } 0194 // else we matched 0195 if (isLastPathSegment) { 0196 lastMatchIndex = matchIndex; 0197 } 0198 --searchIndex; 0199 --pathIndex; 0200 } 0201 0202 if (searchIndex != -1) { 0203 return NoMatch; 0204 } 0205 0206 const int segmentMatchDistance = segments.size() - (pathIndex + 1); 0207 const bool inPrefixPath = segmentMatchDistance > (segments.size() - prefixPath.segments().size()) 0208 && prefixPath.isParentOf(toFilter); 0209 // penalize matches that fall into the shared suffix 0210 const int penalty = (inPrefixPath) ? 1024 : 0; 0211 0212 if (allMatched && !inPrefixPath) { 0213 return ExactMatch + penalty; 0214 } else if (lastMatchIndex == 0) { 0215 // prefer matches whose last element starts with the filter 0216 return StartMatch + penalty; 0217 } else { 0218 // prefer matches closer to the end of the path 0219 return OtherMatch + segmentMatchDistance + penalty; 0220 } 0221 } 0222 } // namespace KDevelop