File indexing completed on 2024-05-12 16:02:26
0001 /* 0002 SPDX-FileCopyrightText: 2017 Forrest Smith 0003 SPDX-FileCopyrightText: 2020 Waqar Ahmed 0004 0005 SPDX-License-Identifier: LGPL-2.0-or-later 0006 */ 0007 0008 #ifndef KFTS_FUZZY_MATCH_H 0009 #define KFTS_FUZZY_MATCH_H 0010 0011 #include <QString> 0012 0013 /** 0014 * This is based on https://github.com/forrestthewoods/lib_fts/blob/master/code/fts_fuzzy_match.h 0015 * with modifications for Qt 0016 * 0017 * Dont include this file in a header file, please :) 0018 */ 0019 0020 namespace kfts 0021 { 0022 /** 0023 * @brief simple fuzzy matching of chars in @a pattern with chars in @a str sequentially 0024 */ 0025 Q_DECL_UNUSED static bool fuzzy_match_simple(const QString pattern, const QString str); 0026 0027 /** 0028 * @brief This should be the main function you should use. @a outscore is the score 0029 * of this match and should be used to sort the results later. Without sorting of the 0030 * results this function won't be as effective. 0031 */ 0032 Q_DECL_UNUSED static bool fuzzy_match(const QString pattern, const QString str, int &outScore); 0033 Q_DECL_UNUSED static bool fuzzy_match(const QString pattern, const QString str, int &outScore, uint8_t *matches, int maxMatches); 0034 0035 /** 0036 * @brief This is a special case function which doesn't score separator matches higher than sequential matches. 0037 * This is currently used in Kate's command bar where the string items are usually space separated names. 0038 */ 0039 Q_DECL_UNUSED static bool fuzzy_match_sequential(const QString pattern, const QString str, int &outScore); 0040 /** 0041 * @brief get string for display in treeview / listview. This should be used from style delegate. 0042 * For example: with @a pattern = "kate", @a str = "kateapp" and @htmlTag = "<b> 0043 * the output will be <b>k</b><b>a</b><b>t</b><b>e</b>app which will be visible to user as 0044 * <b>kate</b>app. 0045 * 0046 * TODO: improve this so that we don't have to put html tags on every char probably using some kind 0047 * of interval container 0048 */ 0049 Q_DECL_UNUSED static QString to_fuzzy_matched_display_string(const QString pattern, QString &str, const QString &htmlTag, const QString &htmlTagClose); 0050 } 0051 0052 namespace kfts 0053 { 0054 // Forward declarations for "private" implementation 0055 namespace fuzzy_internal 0056 { 0057 static bool fuzzy_match_recursive(QString::const_iterator pattern, 0058 QString::const_iterator str, 0059 int &outScore, 0060 const QString::const_iterator strBegin, 0061 const QString::const_iterator strEnd, 0062 const QString::const_iterator patternEnd, 0063 uint8_t const *srcMatches, 0064 uint8_t *newMatches, 0065 int maxMatches, 0066 int nextMatch, 0067 int &recursionCount, 0068 int seqBonus = 15); 0069 } 0070 0071 // Public interface 0072 static bool fuzzy_match_simple(const QString pattern, const QString str) 0073 { 0074 auto patternIt = pattern.cbegin(); 0075 for (auto strIt = str.cbegin(); strIt != str.cend() && patternIt != pattern.cend(); ++strIt) { 0076 if (strIt->toLower() == patternIt->toLower()) { 0077 ++patternIt; 0078 } 0079 } 0080 return patternIt == pattern.cend(); 0081 } 0082 0083 static bool fuzzy_match(const QString pattern, const QString str, int &outScore) 0084 { 0085 uint8_t matches[256]; 0086 return fuzzy_match(pattern, str, outScore, matches, sizeof(matches)); 0087 } 0088 0089 static bool fuzzy_match(const QString pattern, const QString str, int &outScore, uint8_t *matches, int maxMatches) 0090 { 0091 int recursionCount = 0; 0092 0093 auto strIt = str.cbegin(); 0094 auto patternIt = pattern.cbegin(); 0095 const auto patternEnd = pattern.cend(); 0096 const auto strEnd = str.cend(); 0097 0098 return fuzzy_internal::fuzzy_match_recursive(patternIt, strIt, outScore, strIt, strEnd, patternEnd, nullptr, matches, maxMatches, 0, recursionCount); 0099 } 0100 0101 static bool fuzzy_match_sequential(const QString pattern, const QString str, int &outScore) 0102 { 0103 int recursionCount = 0; 0104 uint8_t matches[256]; 0105 auto maxMatches = sizeof(matches); 0106 auto strIt = str.cbegin(); 0107 auto patternIt = pattern.cbegin(); 0108 const auto patternEnd = pattern.cend(); 0109 const auto strEnd = str.cend(); 0110 0111 return fuzzy_internal::fuzzy_match_recursive(patternIt, strIt, outScore, strIt, strEnd, patternEnd, nullptr, matches, maxMatches, 0, recursionCount, 40); 0112 } 0113 0114 // Private implementation 0115 static bool fuzzy_internal::fuzzy_match_recursive(QString::const_iterator pattern, 0116 QString::const_iterator str, 0117 int &outScore, 0118 const QString::const_iterator strBegin, 0119 const QString::const_iterator strEnd, 0120 const QString::const_iterator patternEnd, 0121 const uint8_t *srcMatches, 0122 uint8_t *matches, 0123 int maxMatches, 0124 int nextMatch, 0125 int &recursionCount, 0126 int seqBonus) 0127 { 0128 // Count recursions 0129 static constexpr int recursionLimit = 10; 0130 ++recursionCount; 0131 if (recursionCount >= recursionLimit) { 0132 return false; 0133 } 0134 0135 // Detect end of strings 0136 if (pattern == patternEnd || str == strEnd) { 0137 return false; 0138 } 0139 0140 // Recursion params 0141 bool recursiveMatch = false; 0142 uint8_t bestRecursiveMatches[256]; 0143 int bestRecursiveScore = 0; 0144 0145 // Loop through pattern and str looking for a match 0146 bool first_match = true; 0147 while (pattern != patternEnd && str != strEnd) { 0148 // Found match 0149 if (pattern->toLower() == str->toLower()) { 0150 // Supplied matches buffer was too short 0151 if (nextMatch >= maxMatches) { 0152 return false; 0153 } 0154 0155 // "Copy-on-Write" srcMatches into matches 0156 if (first_match && srcMatches) { 0157 memcpy(matches, srcMatches, nextMatch); 0158 first_match = false; 0159 } 0160 0161 // Recursive call that "skips" this match 0162 uint8_t recursiveMatches[256]; 0163 int recursiveScore; 0164 auto strNextChar = std::next(str); 0165 if (fuzzy_match_recursive(pattern, 0166 strNextChar, 0167 recursiveScore, 0168 strBegin, 0169 strEnd, 0170 patternEnd, 0171 matches, 0172 recursiveMatches, 0173 sizeof(recursiveMatches), 0174 nextMatch, 0175 recursionCount)) { 0176 // Pick best recursive score 0177 if (!recursiveMatch || recursiveScore > bestRecursiveScore) { 0178 memcpy(bestRecursiveMatches, recursiveMatches, 256); 0179 bestRecursiveScore = recursiveScore; 0180 } 0181 recursiveMatch = true; 0182 } 0183 0184 // Advance 0185 matches[nextMatch++] = (uint8_t)(std::distance(strBegin, str)); 0186 ++pattern; 0187 } 0188 ++str; 0189 } 0190 0191 // Determine if full pattern was matched 0192 bool matched = pattern == patternEnd ? true : false; 0193 0194 // Calculate score 0195 if (matched) { 0196 int sequential_bonus = seqBonus; // bonus for adjacent matches 0197 static constexpr int separator_bonus = 30; // bonus if match occurs after a separator 0198 static constexpr int camel_bonus = 30; // bonus if match is uppercase and prev is lower 0199 static constexpr int first_letter_bonus = 15; // bonus if the first letter is matched 0200 0201 static constexpr int leading_letter_penalty = -5; // penalty applied for every letter in str before the first match 0202 static constexpr int max_leading_letter_penalty = -15; // maximum penalty for leading letters 0203 static constexpr int unmatched_letter_penalty = -1; // penalty for every letter that doesn't matter 0204 0205 // Iterate str to end 0206 while (str != strEnd) { 0207 ++str; 0208 } 0209 0210 // Initialize score 0211 outScore = 100; 0212 0213 // Apply leading letter penalty 0214 int penalty = leading_letter_penalty * matches[0]; 0215 if (penalty < max_leading_letter_penalty) { 0216 penalty = max_leading_letter_penalty; 0217 } 0218 outScore += penalty; 0219 0220 // Apply unmatched penalty 0221 const int unmatched = (int)(std::distance(strBegin, str)) - nextMatch; 0222 outScore += unmatched_letter_penalty * unmatched; 0223 0224 // Apply ordering bonuses 0225 for (int i = 0; i < nextMatch; ++i) { 0226 uint8_t currIdx = matches[i]; 0227 0228 if (i > 0) { 0229 uint8_t prevIdx = matches[i - 1]; 0230 0231 // Sequential 0232 if (currIdx == (prevIdx + 1)) { 0233 outScore += sequential_bonus; 0234 } 0235 } 0236 0237 // Check for bonuses based on neighbor character value 0238 if (currIdx > 0) { 0239 // Camel case 0240 QChar neighbor = *(strBegin + currIdx - 1); 0241 QChar curr = *(strBegin + currIdx); 0242 if (neighbor.isLower() && curr.isUpper()) { 0243 outScore += camel_bonus; 0244 } 0245 0246 // Separator 0247 bool neighborSeparator = neighbor == QLatin1Char('_') || neighbor == QLatin1Char(' '); 0248 if (neighborSeparator) { 0249 outScore += separator_bonus; 0250 } 0251 } else { 0252 // First letter 0253 outScore += first_letter_bonus; 0254 } 0255 } 0256 } 0257 0258 // Return best result 0259 if (recursiveMatch && (!matched || bestRecursiveScore > outScore)) { 0260 // Recursive score is better than "this" 0261 memcpy(matches, bestRecursiveMatches, maxMatches); 0262 outScore = bestRecursiveScore; 0263 return true; 0264 } else if (matched) { 0265 // "this" score is better than recursive 0266 return true; 0267 } else { 0268 // no match 0269 return false; 0270 } 0271 } 0272 0273 static QString to_fuzzy_matched_display_string(const QString pattern, QString &str, const QString &htmlTag, const QString &htmlTagClose) 0274 { 0275 /** 0276 * FIXME Don't do so many appends. Instead consider using some interval based solution to wrap a range 0277 * of text with the html <tag></tag> 0278 */ 0279 int j = 0; 0280 for (int i = 0; i < str.size() && j < pattern.size(); ++i) { 0281 if (str.at(i).toLower() == pattern.at(j).toLower()) { 0282 str.replace(i, 1, htmlTag + str.at(i) + htmlTagClose); 0283 i += htmlTag.size() + htmlTagClose.size(); 0284 ++j; 0285 } 0286 } 0287 return str; 0288 } 0289 } // namespace kfts 0290 0291 #endif // KFTS_FUZZY_MATCH_H