File indexing completed on 2024-05-12 11:48:03

0001 /*
0002     This file is part of the KDE libraries
0003 
0004     SPDX-FileCopyrightText: 2017 Forrest Smith <forrestthewoods@gmail.com>
0005     SPDX-FileCopyrightText: 2021 Waqar Ahmed <waqar.17a@gmail.com>
0006 
0007     SPDX-License-Identifier: LGPL-2.0-or-later
0008 */
0009 #ifndef KFUZZYMATCHER_H
0010 #define KFUZZYMATCHER_H
0011 
0012 #include <kcoreaddons_export.h>
0013 
0014 #include <QtContainerFwd>
0015 
0016 class QString;
0017 class QStringView;
0018 
0019 /**
0020  * This namespace contains functions for fuzzy matching a list of strings
0021  * against a pattern.
0022  *
0023  * This code is ported to Qt from lib_fts:
0024  * https://github.com/forrestthewoods/lib_fts
0025  * which tries to replicate SublimeText like fuzzy matching.
0026  *
0027  * @note
0028  * All character matches will happen sequentially. That means that this function is not
0029  * typo tolerant i.e., "gti" will not match "git", but "gt" will. All methods in here are
0030  * stateless i.e., the input string will not be modified. Also note that strings in all the
0031  * functions in this namespace will be matched case-insensitively.
0032  *
0033  * Limitations:
0034  * - Currently this will match only strings with length < 256 correctly. This is because we
0035  * intend on matching a pattern against words / short strings and not paragraphs.
0036  * - No more than 256 matches will happen.
0037  *
0038  * If you are using this with @c QSortFilterProxyModel, you need to override both
0039  * @c QSortFilterProxyModel::lessThan and @c QSortFilterProxyModel::filterAcceptsRow.
0040  * A simple example:
0041  *
0042  * \code
0043     bool filterAcceptsRow(int sourceRow, const QModelIndex &sourceParent) const override
0044     {
0045         int score = 0;
0046         const auto idx = sourceModel()->index(sourceRow, 0, sourceParent);
0047         const auto actionName = idx.data().toString().splitRef(QLatin1Char(':')).at(1);
0048         const bool res = kfts::fuzzy_match_sequential(m_pattern, actionName, score);
0049         // store the score in the source model
0050         sourceModel()->setData(idx, score, ScoreRole);
0051         return res;
0052     }
0053 
0054     bool lessThan(const QModelIndex &sourceLeft, const QModelIndex &sourceRight) const override
0055     {
0056         // use the score here to sort
0057         const int l = sourceLeft.data(ScoreRole).toInt();
0058         const int r = sourceRight.data(ScoreRole).toInt();
0059         return l < r;
0060     }
0061  * \endcode
0062  *
0063  * Additionally you must not use @c invalidateFilter() if you go with the above approach. Instead
0064  * use @c beginResetModel()/@c endResetModel():
0065  *
0066  * \code
0067  *  Q_SLOT void setFilterString(const QString &string)
0068     {
0069         beginResetModel();
0070         m_pattern = string;
0071         endResetModel();
0072     }
0073  * \endcode
0074  *
0075  * @short Namespace for fuzzy matching of strings
0076  * @author Waqar Ahmed <waqar.17a@gmail.com>
0077  */
0078 namespace KFuzzyMatcher
0079 {
0080 /**
0081  * @brief The result of a fuzzy match
0082  */
0083 struct KCOREADDONS_EXPORT Result {
0084     /**
0085      * Score of this match. This can be negative.if matched is @c false
0086      * then the score will be zero.
0087      */
0088     int score = 0;
0089     /** @c true if match was successful */
0090     bool matched = false;
0091 };
0092 
0093 /**
0094  * @brief A range representing a matched sequence in a string
0095  *
0096  * @since 5.84
0097  */
0098 struct KCOREADDONS_EXPORT Range {
0099     int start;
0100     int length;
0101 };
0102 
0103 /**
0104  * @brief The type of matches to consider when requesting for ranges.
0105  * @see matchedRanges
0106  *
0107  * @since 5.84
0108  */
0109 enum class RangeType : unsigned char {
0110     /**
0111      * We want ranges only where the pattern fully matches the user
0112      * supplied string
0113      */
0114     FullyMatched,
0115     /**
0116      * We want ranges for all matches, even if the pattern partially
0117      * matched the user supplied string
0118      */
0119     All
0120 };
0121 
0122 /**
0123  * @brief Simple fuzzy matching of chars in @p pattern with chars in @p str
0124  * sequentially. If there is a match, it will return true and false otherwise.
0125  * There is no scoring. You should use this if score is not important for you
0126  * and only matching is important.
0127  *
0128  * If @p pattern is empty, the function will return @c true
0129  *
0130  * @param pattern to search for. For e.g., text entered by a user to filter a
0131  * list
0132  * @param str the current string from your list of strings
0133  * @return @c true on sucessful match
0134  *
0135  * @since 5.79
0136  */
0137 KCOREADDONS_EXPORT bool matchSimple(QStringView pattern, QStringView str);
0138 
0139 /**
0140  * @brief This is the main function which does scored fuzzy matching.
0141  *
0142  * The return value of this function contains Result#score which should be used to
0143  * sort the results. Without sorting of the results this function won't very effective.
0144  *
0145  * If @p pattern is empty, the function will return @c true
0146  *
0147  * @param pattern to search for. For e.g., text entered by a user to filter a
0148  * list or model
0149  * @param str the current string from your list of strings
0150  * @return A Result type with score of this match and whether the match was
0151  * successful. If there is no match, score is zero. If the match is successful,
0152  * score must be used to sort the results.
0153  *
0154  * @since 5.79
0155  */
0156 KCOREADDONS_EXPORT Result match(QStringView pattern, QStringView str);
0157 
0158 /**
0159  * @brief A function which returns the positions + lengths where the @p pattern matched
0160  * inside the @p str. The resulting ranges can then be utilized to show the user where
0161  * the matches occurred. Example:
0162  *
0163  * @code
0164  * String: hello
0165  * Pattern: Hlo
0166  *
0167  * Output: [Range{0, 1}, Range{3, 2}]
0168  * @endcode
0169  *
0170  * In the above example @c "Hlo" matched inside the string @c "hello" in two places i.e.,
0171  * position 0 and position 3. At position 0 it matched 'h', and at position 3 it
0172  * matched 'lo'.
0173  *
0174  * The ranges themeselves can't do much so you will have to make the result useful
0175  * in your own way. Some possible uses can be:
0176  * - Transform the result into a vector of @c QTextLayout::FormatRange and then paint
0177  *   them in the view
0178  * - Use the result to transform the string into html, for example conver the string from
0179  *   above example to "\<b\>H\</b\>el\<b\>lo\</b\>, and then use @c QTextDocument
0180  *   to paint it into your view.
0181  *
0182  * Example with the first method:
0183  * @code
0184  *       auto ranges = KFuzzyMatcher::matchedRanges(pattern, string);
0185  *       QVector<QTextLayout::FormatRange> out;
0186  *       std::transform(ranges.begin(), ranges.end(), std::back_inserter(out), [](const KFuzzyMatcher::Range &fr){
0187  *          return QTextLayout::FormatRange{fr.start, fr.length, QTextCharFormat()};
0188  *       });
0189  *
0190  *       QTextLayout layout(text, font);
0191  *       layout.beginLayout();
0192  *       QTextLine line = layout.createLine();
0193  *       //...
0194  *       layout.endLayout();
0195  *
0196  *       layout.setFormats(layout.formats() + out);
0197  *       layout.draw(painter, position);
0198  * @endcode
0199  *
0200  * If @p pattern is empty, the function will return an empty vector
0201  *
0202  * if @p type is @c RangeType::All, the function will try to get ranges even if
0203  * the pattern didn't fully match. For example:
0204  * @code
0205  * pattern: "git"
0206  * string: "gti"
0207  * RangeType: All
0208  *
0209  * Output: [Range{0, 1}, Range{2, 1}]
0210  * @endcode
0211  *
0212  * @param pattern to search for. For e.g., text entered by a user to filter a
0213  * list or model
0214  * @param str the current string from your list of strings
0215  * @param type whether to consider ranges from full matches only or all matches including partial matches
0216  * @return A vector of ranges containing positions and lengths where the pattern
0217  * matched. If there was no match, the vector will be empty
0218  *
0219  * @since 5.84
0220  */
0221 KCOREADDONS_EXPORT QVector<KFuzzyMatcher::Range> matchedRanges(QStringView pattern, QStringView str, RangeType type = RangeType::FullyMatched);
0222 
0223 } // namespace KFuzzyMatcher
0224 
0225 #endif // KFUZZYMATCHER_H