File indexing completed on 2024-04-28 03:53:48

0001 /*
0002     This file is part of the KDE libraries
0003 
0004     SPDX-FileCopyrightText: 2017 Forrest Smith <forrestthewoods@gmail.com>
0005     SPDX-FileCopyrightText: 2021 Waqar Ahmed <waqar.17a@gmail.com>
0006 
0007     SPDX-License-Identifier: LGPL-2.0-or-later
0008 */
0009 #ifndef KFUZZYMATCHER_H
0010 #define KFUZZYMATCHER_H
0011 
0012 #include <kcoreaddons_export.h>
0013 
0014 #include <QtContainerFwd>
0015 #include <QtGlobal>
0016 
0017 class QString;
0018 class QStringView;
0019 
0020 /**
0021  * This namespace contains functions for fuzzy matching a list of strings
0022  * against a pattern.
0023  *
0024  * This code is ported to Qt from lib_fts:
0025  * https://github.com/forrestthewoods/lib_fts
0026  * which tries to replicate SublimeText like fuzzy matching.
0027  *
0028  * @note
0029  * All character matches will happen sequentially. That means that this function is not
0030  * typo tolerant i.e., "gti" will not match "git", but "gt" will. All methods in here are
0031  * stateless i.e., the input string will not be modified. Also note that strings in all the
0032  * functions in this namespace will be matched case-insensitively.
0033  *
0034  * Limitations:
0035  * - Currently this will match only strings with length < 256 correctly. This is because we
0036  * intend on matching a pattern against words / short strings and not paragraphs.
0037  * - No more than 256 matches will happen.
0038  *
0039  * If you are using this with @c QSortFilterProxyModel, you need to override both
0040  * @c QSortFilterProxyModel::lessThan and @c QSortFilterProxyModel::filterAcceptsRow.
0041  * A simple example:
0042  *
0043  * \code
0044     bool filterAcceptsRow(int sourceRow, const QModelIndex &sourceParent) const override
0045     {
0046         int score = 0;
0047         const auto idx = sourceModel()->index(sourceRow, 0, sourceParent);
0048         const auto actionName = idx.data().toString().splitRef(QLatin1Char(':')).at(1);
0049         const bool res = kfts::fuzzy_match_sequential(m_pattern, actionName, score);
0050         // store the score in the source model
0051         sourceModel()->setData(idx, score, ScoreRole);
0052         return res;
0053     }
0054 
0055     bool lessThan(const QModelIndex &sourceLeft, const QModelIndex &sourceRight) const override
0056     {
0057         // use the score here to sort
0058         const int l = sourceLeft.data(ScoreRole).toInt();
0059         const int r = sourceRight.data(ScoreRole).toInt();
0060         return l < r;
0061     }
0062  * \endcode
0063  *
0064  * Additionally you must not use @c invalidateFilter() if you go with the above approach. Instead
0065  * use @c beginResetModel()/@c endResetModel():
0066  *
0067  * \code
0068  *  Q_SLOT void setFilterString(const QString &string)
0069     {
0070         beginResetModel();
0071         m_pattern = string;
0072         endResetModel();
0073     }
0074  * \endcode
0075  *
0076  * @short Namespace for fuzzy matching of strings
0077  * @author Waqar Ahmed <waqar.17a@gmail.com>
0078  */
0079 namespace KFuzzyMatcher
0080 {
0081 /**
0082  * @brief The result of a fuzzy match
0083  */
0084 struct KCOREADDONS_EXPORT Result {
0085     /**
0086      * Score of this match. This can be negative.if matched is @c false
0087      * then the score will be zero.
0088      */
0089     int score = 0;
0090     /** @c true if match was successful */
0091     bool matched = false;
0092 };
0093 
0094 /**
0095  * @brief A range representing a matched sequence in a string
0096  *
0097  * @since 5.84
0098  */
0099 struct KCOREADDONS_EXPORT Range {
0100     int start;
0101     int length;
0102 };
0103 
0104 /**
0105  * @brief The type of matches to consider when requesting for ranges.
0106  * @see matchedRanges
0107  *
0108  * @since 5.84
0109  */
0110 enum class RangeType : unsigned char {
0111     /**
0112      * We want ranges only where the pattern fully matches the user
0113      * supplied string
0114      */
0115     FullyMatched,
0116     /**
0117      * We want ranges for all matches, even if the pattern partially
0118      * matched the user supplied string
0119      */
0120     All
0121 };
0122 
0123 /**
0124  * @brief Simple fuzzy matching of chars in @p pattern with chars in @p str
0125  * sequentially. If there is a match, it will return true and false otherwise.
0126  * There is no scoring. You should use this if score is not important for you
0127  * and only matching is important.
0128  *
0129  * If @p pattern is empty, the function will return @c true
0130  *
0131  * @param pattern to search for. For e.g., text entered by a user to filter a
0132  * list
0133  * @param str the current string from your list of strings
0134  * @return @c true on sucessful match
0135  *
0136  * @since 5.79
0137  */
0138 KCOREADDONS_EXPORT bool matchSimple(QStringView pattern, QStringView str);
0139 
0140 /**
0141  * @brief This is the main function which does scored fuzzy matching.
0142  *
0143  * The return value of this function contains Result#score which should be used to
0144  * sort the results. Without sorting of the results this function won't very effective.
0145  *
0146  * If @p pattern is empty, the function will return @c true
0147  *
0148  * @param pattern to search for. For e.g., text entered by a user to filter a
0149  * list or model
0150  * @param str the current string from your list of strings
0151  * @return A Result type with score of this match and whether the match was
0152  * successful. If there is no match, score is zero. If the match is successful,
0153  * score must be used to sort the results.
0154  *
0155  * @since 5.79
0156  */
0157 KCOREADDONS_EXPORT Result match(QStringView pattern, QStringView str);
0158 
0159 /**
0160  * @brief A function which returns the positions + lengths where the @p pattern matched
0161  * inside the @p str. The resulting ranges can then be utilized to show the user where
0162  * the matches occurred. Example:
0163  *
0164  * @code
0165  * String: hello
0166  * Pattern: Hlo
0167  *
0168  * Output: [Range{0, 1}, Range{3, 2}]
0169  * @endcode
0170  *
0171  * In the above example @c "Hlo" matched inside the string @c "hello" in two places i.e.,
0172  * position 0 and position 3. At position 0 it matched 'h', and at position 3 it
0173  * matched 'lo'.
0174  *
0175  * The ranges themeselves can't do much so you will have to make the result useful
0176  * in your own way. Some possible uses can be:
0177  * - Transform the result into a vector of @c QTextLayout::FormatRange and then paint
0178  *   them in the view
0179  * - Use the result to transform the string into html, for example conver the string from
0180  *   above example to "\<b\>H\</b\>el\<b\>lo\</b\>, and then use @c QTextDocument
0181  *   to paint it into your view.
0182  *
0183  * Example with the first method:
0184  * @code
0185  *       auto ranges = KFuzzyMatcher::matchedRanges(pattern, string);
0186  *       QList<QTextLayout::FormatRange> out;
0187  *       std::transform(ranges.begin(), ranges.end(), std::back_inserter(out), [](const KFuzzyMatcher::Range &fr){
0188  *          return QTextLayout::FormatRange{fr.start, fr.length, QTextCharFormat()};
0189  *       });
0190  *
0191  *       QTextLayout layout(text, font);
0192  *       layout.beginLayout();
0193  *       QTextLine line = layout.createLine();
0194  *       //...
0195  *       layout.endLayout();
0196  *
0197  *       layout.setFormats(layout.formats() + out);
0198  *       layout.draw(painter, position);
0199  * @endcode
0200  *
0201  * If @p pattern is empty, the function will return an empty vector
0202  *
0203  * if @p type is @c RangeType::All, the function will try to get ranges even if
0204  * the pattern didn't fully match. For example:
0205  * @code
0206  * pattern: "git"
0207  * string: "gti"
0208  * RangeType: All
0209  *
0210  * Output: [Range{0, 1}, Range{2, 1}]
0211  * @endcode
0212  *
0213  * @param pattern to search for. For e.g., text entered by a user to filter a
0214  * list or model
0215  * @param str the current string from your list of strings
0216  * @param type whether to consider ranges from full matches only or all matches including partial matches
0217  * @return A vector of ranges containing positions and lengths where the pattern
0218  * matched. If there was no match, the vector will be empty
0219  *
0220  * @since 5.84
0221  */
0222 KCOREADDONS_EXPORT QList<KFuzzyMatcher::Range> matchedRanges(QStringView pattern, QStringView str, RangeType type = RangeType::FullyMatched);
0223 
0224 } // namespace KFuzzyMatcher
0225 
0226 Q_DECLARE_TYPEINFO(KFuzzyMatcher::Range, Q_PRIMITIVE_TYPE);
0227 
0228 #endif // KFUZZYMATCHER_H