File indexing completed on 2024-05-12 16:02:26

0001 /*
0002     SPDX-FileCopyrightText: 2017 Forrest Smith
0003     SPDX-FileCopyrightText: 2020 Waqar Ahmed
0004 
0005     SPDX-License-Identifier: LGPL-2.0-or-later
0006 */
0007 
0008 #ifndef KFTS_FUZZY_MATCH_H
0009 #define KFTS_FUZZY_MATCH_H
0010 
0011 #include <QString>
0012 
0013 /**
0014  * This is based on https://github.com/forrestthewoods/lib_fts/blob/master/code/fts_fuzzy_match.h
0015  * with modifications for Qt
0016  *
0017  * Dont include this file in a header file, please :)
0018  */
0019 
0020 namespace kfts
0021 {
0022 /**
0023  * @brief simple fuzzy matching of chars in @a pattern with chars in @a str sequentially
0024  */
0025 Q_DECL_UNUSED static bool fuzzy_match_simple(const QString pattern, const QString str);
0026 
0027 /**
0028  * @brief This should be the main function you should use. @a outscore is the score
0029  * of this match and should be used to sort the results later. Without sorting of the
0030  * results this function won't be as effective.
0031  */
0032 Q_DECL_UNUSED static bool fuzzy_match(const QString pattern, const QString str, int &outScore);
0033 Q_DECL_UNUSED static bool fuzzy_match(const QString pattern, const QString str, int &outScore, uint8_t *matches, int maxMatches);
0034 
0035 /**
0036  * @brief This is a special case function which doesn't score separator matches higher than sequential matches.
0037  * This is currently used in Kate's command bar where the string items are usually space separated names.
0038  */
0039 Q_DECL_UNUSED static bool fuzzy_match_sequential(const QString pattern, const QString str, int &outScore);
0040 /**
0041  * @brief get string for display in treeview / listview. This should be used from style delegate.
0042  * For example: with @a pattern = "kate", @a str = "kateapp" and @htmlTag = "<b>
0043  * the output will be <b>k</b><b>a</b><b>t</b><b>e</b>app which will be visible to user as
0044  * <b>kate</b>app.
0045  *
0046  * TODO: improve this so that we don't have to put html tags on every char probably using some kind
0047  * of interval container
0048  */
0049 Q_DECL_UNUSED static QString to_fuzzy_matched_display_string(const QString pattern, QString &str, const QString &htmlTag, const QString &htmlTagClose);
0050 }
0051 
0052 namespace kfts
0053 {
0054 // Forward declarations for "private" implementation
0055 namespace fuzzy_internal
0056 {
0057 static bool fuzzy_match_recursive(QString::const_iterator pattern,
0058                                   QString::const_iterator str,
0059                                   int &outScore,
0060                                   const QString::const_iterator strBegin,
0061                                   const QString::const_iterator strEnd,
0062                                   const QString::const_iterator patternEnd,
0063                                   uint8_t const *srcMatches,
0064                                   uint8_t *newMatches,
0065                                   int maxMatches,
0066                                   int nextMatch,
0067                                   int &recursionCount,
0068                                   int seqBonus = 15);
0069 }
0070 
0071 // Public interface
0072 static bool fuzzy_match_simple(const QString pattern, const QString str)
0073 {
0074     auto patternIt = pattern.cbegin();
0075     for (auto strIt = str.cbegin(); strIt != str.cend() && patternIt != pattern.cend(); ++strIt) {
0076         if (strIt->toLower() == patternIt->toLower()) {
0077             ++patternIt;
0078         }
0079     }
0080     return patternIt == pattern.cend();
0081 }
0082 
0083 static bool fuzzy_match(const QString pattern, const QString str, int &outScore)
0084 {
0085     uint8_t matches[256];
0086     return fuzzy_match(pattern, str, outScore, matches, sizeof(matches));
0087 }
0088 
0089 static bool fuzzy_match(const QString pattern, const QString str, int &outScore, uint8_t *matches, int maxMatches)
0090 {
0091     int recursionCount = 0;
0092 
0093     auto strIt = str.cbegin();
0094     auto patternIt = pattern.cbegin();
0095     const auto patternEnd = pattern.cend();
0096     const auto strEnd = str.cend();
0097 
0098     return fuzzy_internal::fuzzy_match_recursive(patternIt, strIt, outScore, strIt, strEnd, patternEnd, nullptr, matches, maxMatches, 0, recursionCount);
0099 }
0100 
0101 static bool fuzzy_match_sequential(const QString pattern, const QString str, int &outScore)
0102 {
0103     int recursionCount = 0;
0104     uint8_t matches[256];
0105     auto maxMatches = sizeof(matches);
0106     auto strIt = str.cbegin();
0107     auto patternIt = pattern.cbegin();
0108     const auto patternEnd = pattern.cend();
0109     const auto strEnd = str.cend();
0110 
0111     return fuzzy_internal::fuzzy_match_recursive(patternIt, strIt, outScore, strIt, strEnd, patternEnd, nullptr, matches, maxMatches, 0, recursionCount, 40);
0112 }
0113 
0114 // Private implementation
0115 static bool fuzzy_internal::fuzzy_match_recursive(QString::const_iterator pattern,
0116                                                   QString::const_iterator str,
0117                                                   int &outScore,
0118                                                   const QString::const_iterator strBegin,
0119                                                   const QString::const_iterator strEnd,
0120                                                   const QString::const_iterator patternEnd,
0121                                                   const uint8_t *srcMatches,
0122                                                   uint8_t *matches,
0123                                                   int maxMatches,
0124                                                   int nextMatch,
0125                                                   int &recursionCount,
0126                                                   int seqBonus)
0127 {
0128     // Count recursions
0129     static constexpr int recursionLimit = 10;
0130     ++recursionCount;
0131     if (recursionCount >= recursionLimit) {
0132         return false;
0133     }
0134 
0135     // Detect end of strings
0136     if (pattern == patternEnd || str == strEnd) {
0137         return false;
0138     }
0139 
0140     // Recursion params
0141     bool recursiveMatch = false;
0142     uint8_t bestRecursiveMatches[256];
0143     int bestRecursiveScore = 0;
0144 
0145     // Loop through pattern and str looking for a match
0146     bool first_match = true;
0147     while (pattern != patternEnd && str != strEnd) {
0148         // Found match
0149         if (pattern->toLower() == str->toLower()) {
0150             // Supplied matches buffer was too short
0151             if (nextMatch >= maxMatches) {
0152                 return false;
0153             }
0154 
0155             // "Copy-on-Write" srcMatches into matches
0156             if (first_match && srcMatches) {
0157                 memcpy(matches, srcMatches, nextMatch);
0158                 first_match = false;
0159             }
0160 
0161             // Recursive call that "skips" this match
0162             uint8_t recursiveMatches[256];
0163             int recursiveScore;
0164             auto strNextChar = std::next(str);
0165             if (fuzzy_match_recursive(pattern,
0166                                       strNextChar,
0167                                       recursiveScore,
0168                                       strBegin,
0169                                       strEnd,
0170                                       patternEnd,
0171                                       matches,
0172                                       recursiveMatches,
0173                                       sizeof(recursiveMatches),
0174                                       nextMatch,
0175                                       recursionCount)) {
0176                 // Pick best recursive score
0177                 if (!recursiveMatch || recursiveScore > bestRecursiveScore) {
0178                     memcpy(bestRecursiveMatches, recursiveMatches, 256);
0179                     bestRecursiveScore = recursiveScore;
0180                 }
0181                 recursiveMatch = true;
0182             }
0183 
0184             // Advance
0185             matches[nextMatch++] = (uint8_t)(std::distance(strBegin, str));
0186             ++pattern;
0187         }
0188         ++str;
0189     }
0190 
0191     // Determine if full pattern was matched
0192     bool matched = pattern == patternEnd ? true : false;
0193 
0194     // Calculate score
0195     if (matched) {
0196         int sequential_bonus = seqBonus; // bonus for adjacent matches
0197         static constexpr int separator_bonus = 30; // bonus if match occurs after a separator
0198         static constexpr int camel_bonus = 30; // bonus if match is uppercase and prev is lower
0199         static constexpr int first_letter_bonus = 15; // bonus if the first letter is matched
0200 
0201         static constexpr int leading_letter_penalty = -5; // penalty applied for every letter in str before the first match
0202         static constexpr int max_leading_letter_penalty = -15; // maximum penalty for leading letters
0203         static constexpr int unmatched_letter_penalty = -1; // penalty for every letter that doesn't matter
0204 
0205         // Iterate str to end
0206         while (str != strEnd) {
0207             ++str;
0208         }
0209 
0210         // Initialize score
0211         outScore = 100;
0212 
0213         // Apply leading letter penalty
0214         int penalty = leading_letter_penalty * matches[0];
0215         if (penalty < max_leading_letter_penalty) {
0216             penalty = max_leading_letter_penalty;
0217         }
0218         outScore += penalty;
0219 
0220         // Apply unmatched penalty
0221         const int unmatched = (int)(std::distance(strBegin, str)) - nextMatch;
0222         outScore += unmatched_letter_penalty * unmatched;
0223 
0224         // Apply ordering bonuses
0225         for (int i = 0; i < nextMatch; ++i) {
0226             uint8_t currIdx = matches[i];
0227 
0228             if (i > 0) {
0229                 uint8_t prevIdx = matches[i - 1];
0230 
0231                 // Sequential
0232                 if (currIdx == (prevIdx + 1)) {
0233                     outScore += sequential_bonus;
0234                 }
0235             }
0236 
0237             // Check for bonuses based on neighbor character value
0238             if (currIdx > 0) {
0239                 // Camel case
0240                 QChar neighbor = *(strBegin + currIdx - 1);
0241                 QChar curr = *(strBegin + currIdx);
0242                 if (neighbor.isLower() && curr.isUpper()) {
0243                     outScore += camel_bonus;
0244                 }
0245 
0246                 // Separator
0247                 bool neighborSeparator = neighbor == QLatin1Char('_') || neighbor == QLatin1Char(' ');
0248                 if (neighborSeparator) {
0249                     outScore += separator_bonus;
0250                 }
0251             } else {
0252                 // First letter
0253                 outScore += first_letter_bonus;
0254             }
0255         }
0256     }
0257 
0258     // Return best result
0259     if (recursiveMatch && (!matched || bestRecursiveScore > outScore)) {
0260         // Recursive score is better than "this"
0261         memcpy(matches, bestRecursiveMatches, maxMatches);
0262         outScore = bestRecursiveScore;
0263         return true;
0264     } else if (matched) {
0265         // "this" score is better than recursive
0266         return true;
0267     } else {
0268         // no match
0269         return false;
0270     }
0271 }
0272 
0273 static QString to_fuzzy_matched_display_string(const QString pattern, QString &str, const QString &htmlTag, const QString &htmlTagClose)
0274 {
0275     /**
0276      * FIXME Don't do so many appends. Instead consider using some interval based solution to wrap a range
0277      * of text with the html <tag></tag>
0278      */
0279     int j = 0;
0280     for (int i = 0; i < str.size() && j < pattern.size(); ++i) {
0281         if (str.at(i).toLower() == pattern.at(j).toLower()) {
0282             str.replace(i, 1, htmlTag + str.at(i) + htmlTagClose);
0283             i += htmlTag.size() + htmlTagClose.size();
0284             ++j;
0285         }
0286     }
0287     return str;
0288 }
0289 } // namespace kfts
0290 
0291 #endif // KFTS_FUZZY_MATCH_H