File indexing completed on 2024-04-28 03:55:43

0001 /*
0002     This file is part of the KDE project
0003     SPDX-FileCopyrightText: 2002, 2003 Dawit Alemayehu <adawit@kde.org>
0004     SPDX-FileCopyrightText: 2000 Yves Arrouye <yves@realnames.com>
0005     SPDX-FileCopyrightText: 1999 Simon Hausmann <hausmann@kde.org>
0006 
0007     Advanced web shortcuts:
0008     SPDX-FileCopyrightText: 2001 Andreas Hochsteger <e9625392@student.tuwien.ac.at>
0009 
0010     SPDX-License-Identifier: GPL-2.0-or-later
0011 */
0012 
0013 #include "kuriikwsfiltereng_p.h"
0014 #include "searchprovider.h"
0015 
0016 #include <KConfig>
0017 #include <KConfigGroup>
0018 #include <kprotocolinfo.h>
0019 
0020 #include <QDBusConnection>
0021 #include <QLoggingCategory>
0022 #include <QRegularExpression>
0023 #include <QStringEncoder>
0024 
0025 Q_LOGGING_CATEGORY(category, "kf.kio.urifilters.ikws", QtWarningMsg)
0026 using namespace KIO;
0027 
0028 /**
0029  * IMPORTANT: If you change anything here, make sure kiowidgets-kurifiltertest-{colon,space}-separator
0030  * unit tests still pass (they're usually run as part of "make test").
0031  */
0032 
0033 KURISearchFilterEngine::KURISearchFilterEngine()
0034 {
0035     configure();
0036     // Only after initial load, we would want to reparse the files on config changes.
0037     // When the registry is constructed, it automatically loads the searchproviders
0038     m_reloadRegistry = true;
0039     QDBusConnection::sessionBus()
0040         .connect(QString(), QStringLiteral("/"), QStringLiteral("org.kde.KUriFilterPlugin"), QStringLiteral("configure"), this, SLOT(configure()));
0041 }
0042 
0043 KURISearchFilterEngine::~KURISearchFilterEngine() = default;
0044 
0045 // static
0046 QStringList KURISearchFilterEngine::defaultSearchProviders()
0047 {
0048     static const QStringList defaultProviders{QStringLiteral("google"),
0049                                               QStringLiteral("youtube"),
0050                                               QStringLiteral("yahoo"),
0051                                               QStringLiteral("wikipedia"),
0052                                               QStringLiteral("wikit")};
0053     return defaultProviders;
0054 }
0055 
0056 SearchProvider *KURISearchFilterEngine::webShortcutQuery(const QString &typedString, QString &searchTerm) const
0057 {
0058     const auto getProviderForKey = [this, &searchTerm](const QString &key) {
0059         SearchProvider *provider = nullptr;
0060         // If the key contains a : an assertion in the isKnownProtocol method would fail. This can be
0061         // the case if the delimiter is switched to space, see kiowidgets_space_separator_test
0062         if (!key.isEmpty() && (key.contains(QLatin1Char(':')) || !KProtocolInfo::isKnownProtocol(key, false))) {
0063             provider = m_registry.findByKey(key);
0064             if (provider) {
0065                 if (!m_bUseOnlyPreferredWebShortcuts || m_preferredWebShortcuts.contains(provider->desktopEntryName())) {
0066                     qCDebug(category) << "found provider" << provider->desktopEntryName() << "searchTerm=" << searchTerm;
0067                 } else {
0068                     provider = nullptr;
0069                 }
0070             }
0071         }
0072         return provider;
0073     };
0074 
0075     SearchProvider *provider = nullptr;
0076     if (m_bWebShortcutsEnabled) {
0077         QString key;
0078         if (typedString.contains(QLatin1Char('!'))) {
0079             const static QRegularExpression bangRegex(QStringLiteral("!([^ ]+)"));
0080             const auto match = bangRegex.match(typedString);
0081             if (match.hasMatch() && match.lastCapturedIndex() == 1) {
0082                 key = match.captured(1);
0083                 searchTerm = QString(typedString).remove(bangRegex);
0084             }
0085         }
0086 
0087         // If we have found a bang-match it might be unintentionally triggered, because the ! character is contained
0088         // in the query. To avoid not returning any results we check if we can find a provider for the key, if not
0089         // we clear it and try the traditional query syntax, see https://bugs.kde.org/show_bug.cgi?id=437660
0090         if (!key.isEmpty()) {
0091             provider = getProviderForKey(key);
0092             if (!provider) {
0093                 key.clear();
0094             }
0095         }
0096         if (key.isEmpty()) {
0097             const int pos = typedString.indexOf(QLatin1Char(m_cKeywordDelimiter));
0098             if (pos > -1) {
0099                 key = typedString.left(pos).toLower(); // #169801
0100                 searchTerm = typedString.mid(pos + 1);
0101             } else if (!typedString.isEmpty() && m_cKeywordDelimiter == ' ') {
0102                 key = typedString;
0103                 searchTerm = typedString.mid(pos + 1);
0104             }
0105             provider = getProviderForKey(key);
0106         }
0107 
0108         qCDebug(category) << "m_cKeywordDelimiter=" << QLatin1Char(m_cKeywordDelimiter) << "key=" << key << "typedString=" << typedString;
0109     }
0110 
0111     return provider;
0112 }
0113 
0114 SearchProvider *KURISearchFilterEngine::autoWebSearchQuery(const QString &typedString, const QString &defaultShortcut) const
0115 {
0116     SearchProvider *provider = nullptr;
0117     const QString defaultSearchProvider = (m_defaultWebShortcut.isEmpty() ? defaultShortcut : m_defaultWebShortcut);
0118 
0119     if (m_bWebShortcutsEnabled && !defaultSearchProvider.isEmpty()) {
0120         // Make sure we ignore supported protocols, e.g. "smb:", "http:"
0121         const int pos = typedString.indexOf(QLatin1Char(':'));
0122 
0123         if (pos == -1 || !KProtocolInfo::isKnownProtocol(typedString.left(pos), false)) {
0124             provider = m_registry.findByDesktopName(defaultSearchProvider);
0125         }
0126     }
0127 
0128     return provider;
0129 }
0130 
0131 QByteArray KURISearchFilterEngine::name() const
0132 {
0133     return "kuriikwsfilter";
0134 }
0135 
0136 char KURISearchFilterEngine::keywordDelimiter() const
0137 {
0138     return m_cKeywordDelimiter;
0139 }
0140 
0141 QString KURISearchFilterEngine::defaultSearchEngine() const
0142 {
0143     return m_defaultWebShortcut;
0144 }
0145 
0146 QStringList KURISearchFilterEngine::favoriteEngineList() const
0147 {
0148     return m_preferredWebShortcuts;
0149 }
0150 
0151 KURISearchFilterEngine *KURISearchFilterEngine::self()
0152 {
0153     static KURISearchFilterEngine self;
0154     return &self;
0155 }
0156 
0157 QStringList KURISearchFilterEngine::modifySubstitutionMap(SubstMap &map, const QString &query) const
0158 {
0159     // Returns the number of query words
0160     QString userquery = query;
0161 
0162     // Do some pre-encoding, before we can start the work:
0163     {
0164         const static QRegularExpression qsexpr(QStringLiteral("\\\"[^\\\"]*\\\""));
0165         // Temporarily substitute spaces in quoted strings (" " -> "%20")
0166         // Needed to split user query into StringList correctly.
0167         int start = 0;
0168         QRegularExpressionMatch match;
0169         while ((match = qsexpr.match(userquery, start)).hasMatch()) {
0170             QString str = match.captured(0);
0171             str.replace(QLatin1Char(' '), QLatin1String("%20"));
0172             userquery.replace(match.capturedStart(0), match.capturedLength(0), str);
0173             start = match.capturedStart(0) + str.size(); // Move after last quote
0174         }
0175     }
0176 
0177     // Split user query between spaces:
0178     QStringList l = userquery.simplified().split(QLatin1Char(' '), Qt::SkipEmptyParts);
0179 
0180     // Back-substitute quoted strings (%20 -> " "):
0181     userquery.replace(QLatin1String("%20"), QLatin1String(" "));
0182     l.replaceInStrings(QStringLiteral("%20"), QStringLiteral(" "));
0183 
0184     qCDebug(category) << "Generating substitution map:\n";
0185     // Generate substitution map from user query:
0186     for (int i = 0; i <= l.count(); i++) {
0187         int pos = 0;
0188         QString v;
0189 
0190         // Add whole user query (\{0}) to substitution map:
0191         if (i == 0) {
0192             v = userquery;
0193         }
0194         // Add partial user query items to substitution map:
0195         else {
0196             v = l[i - 1];
0197         }
0198 
0199         // Insert partial queries (referenced by \1 ... \n) to map:
0200         map.insert(QString::number(i), v);
0201 
0202         // Insert named references (referenced by \name) to map:
0203         if ((i > 0) && (pos = v.indexOf(QLatin1Char('='))) > 0) {
0204             QString s = v.mid(pos + 1);
0205             QString k = v.left(pos);
0206 
0207             // Back-substitute references contained in references (e.g. '\refname' substitutes to 'thisquery=\0')
0208             s.replace(QLatin1String("%5C"), QLatin1String("\\"));
0209             map.insert(k, s);
0210         }
0211     }
0212 
0213     return l;
0214 }
0215 
0216 static QString encodeString(const QString &s, QStringEncoder &codec)
0217 {
0218     // we encode all characters, including the space character BUG: 304276
0219     QByteArray encoded = QByteArray(codec.encode(s)).toPercentEncoding();
0220     return QString::fromUtf8(encoded);
0221 }
0222 
0223 QString KURISearchFilterEngine::substituteQuery(const QString &url, SubstMap &map, const QString &userquery, QStringEncoder &codec) const
0224 {
0225     QString newurl = url;
0226     QStringList ql = modifySubstitutionMap(map, userquery);
0227     const int count = ql.count();
0228 
0229     // Substitute references (\{ref1,ref2,...}) with values from user query:
0230     {
0231         const static QRegularExpression reflistRe(QStringLiteral("\\\\\\{([^\\}]+)\\}"));
0232         // Substitute reflists (\{ref1,ref2,...}):
0233         int start = 0;
0234         QRegularExpressionMatch match;
0235         while ((match = reflistRe.match(newurl, start)).hasMatch()) {
0236             bool found = false;
0237 
0238             // bool rest = false;
0239             QString v;
0240             const QString rlstring = match.captured(1);
0241 
0242             // \{@} gets a special treatment later
0243             if (rlstring == QLatin1String("@")) {
0244                 v = QStringLiteral("\\@");
0245                 found = true;
0246             }
0247 
0248             // TODO: strip whitespaces around commas
0249             const QStringList refList = rlstring.split(QLatin1Char(','), Qt::SkipEmptyParts);
0250 
0251             for (const QString &rlitem : refList) {
0252                 if (found) {
0253                     break;
0254                 }
0255 
0256                 const static QRegularExpression rangeRe(QStringLiteral("([0-9]*)\\-([0-9]*)"));
0257                 const QRegularExpressionMatch rangeMatch = rangeRe.match(rlitem);
0258                 // Substitute a range of keywords
0259                 if (rangeMatch.hasMatch()) {
0260                     int first = rangeMatch.captured(1).toInt();
0261                     int last = rangeMatch.captured(2).toInt();
0262 
0263                     if (first == 0) {
0264                         first = 1;
0265                     }
0266 
0267                     if (last == 0) {
0268                         last = count;
0269                     }
0270 
0271                     for (int i = first; i <= last; i++) {
0272                         v += map[QString::number(i)] + QLatin1Char(' ');
0273                         // Remove used value from ql (needed for \{@}):
0274                         ql[i - 1].clear();
0275                     }
0276 
0277                     v = v.trimmed();
0278                     if (!v.isEmpty()) {
0279                         found = true;
0280                     }
0281 
0282                     v = encodeString(v, codec);
0283                 } else if (rlitem.startsWith(QLatin1Char('\"')) && rlitem.endsWith(QLatin1Char('\"'))) {
0284                     // Use default string from query definition:
0285                     found = true;
0286                     QString s = rlitem.mid(1, rlitem.length() - 2);
0287                     v = encodeString(s, codec);
0288                 } else if (map.contains(rlitem)) {
0289                     // Use value from substitution map:
0290                     found = true;
0291                     v = encodeString(map[rlitem], codec);
0292 
0293                     // Remove used value from ql (needed for \{@}):
0294                     const QChar c = rlitem.at(0); // rlitem can't be empty at this point
0295                     if (c == QLatin1Char('0')) {
0296                         // It's a numeric reference to '0'
0297                         for (QStringList::Iterator it = ql.begin(); it != ql.end(); ++it) {
0298                             (*it).clear();
0299                         }
0300                     } else if ((c >= QLatin1String("0")) && (c <= QLatin1String("9"))) { // krazy:excludeall=doublequote_chars
0301                         // It's a numeric reference > '0'
0302                         int n = rlitem.toInt();
0303                         ql[n - 1].clear();
0304                     } else {
0305                         // It's a alphanumeric reference
0306                         QStringList::Iterator it = ql.begin();
0307                         while ((it != ql.end()) && !it->startsWith(rlitem + QLatin1Char('='))) {
0308                             ++it;
0309                         }
0310                         if (it != ql.end()) {
0311                             it->clear();
0312                         }
0313                     }
0314 
0315                     // Encode '+', otherwise it would be interpreted as space in the resulting url:
0316                     v.replace(QLatin1Char('+'), QLatin1String("%2B"));
0317                 } else if (rlitem == QLatin1String("@")) {
0318                     v = QStringLiteral("\\@");
0319                 }
0320             }
0321 
0322             newurl.replace(match.capturedStart(0), match.capturedLength(0), v);
0323             start = match.capturedStart(0) + v.size();
0324         }
0325 
0326         // Special handling for \{@};
0327         {
0328             // Generate list of unmatched strings:
0329             QString v = ql.join(QLatin1Char(' ')).simplified();
0330             v = encodeString(v, codec);
0331 
0332             // Substitute \{@} with list of unmatched query strings
0333             newurl.replace(QLatin1String("\\@"), v);
0334         }
0335     }
0336 
0337     return newurl;
0338 }
0339 
0340 QUrl KURISearchFilterEngine::formatResult(const QString &url, const QString &cset1, const QString &cset2, const QString &query, bool isMalformed) const
0341 {
0342     SubstMap map;
0343     return formatResult(url, cset1, cset2, query, isMalformed, map);
0344 }
0345 
0346 QUrl KURISearchFilterEngine::formatResult(const QString &url,
0347                                           const QString &cset1,
0348                                           const QString &cset2,
0349                                           const QString &userquery,
0350                                           bool /* isMalformed */,
0351                                           SubstMap &map) const
0352 {
0353     // Return nothing if userquery is empty and it contains
0354     // substitution strings...
0355     if (userquery.isEmpty() && url.indexOf(QLatin1String("\\{")) > 0) {
0356         return QUrl();
0357     }
0358 
0359     // Create a codec for the desired encoding so that we can transcode the user's "url".
0360     QString cseta = cset1;
0361     if (cseta.isEmpty()) {
0362         cseta = QStringLiteral("UTF-8");
0363     }
0364 
0365     QStringEncoder csetacodec(cseta.toLatin1().constData());
0366     if (!csetacodec.isValid()) {
0367         cseta = QStringLiteral("UTF-8");
0368         csetacodec = QStringEncoder(QStringEncoder::Utf8);
0369     }
0370 
0371     // Add charset indicator for the query to substitution map:
0372     map.insert(QStringLiteral("ikw_charset"), cseta);
0373 
0374     // Add charset indicator for the fallback query to substitution map:
0375     QString csetb = cset2;
0376     if (csetb.isEmpty()) {
0377         csetb = QStringLiteral("UTF-8");
0378     }
0379     map.insert(QStringLiteral("wsc_charset"), csetb);
0380 
0381     QString newurl = substituteQuery(url, map, userquery, csetacodec);
0382 
0383     return QUrl(newurl, QUrl::StrictMode);
0384 }
0385 
0386 void KURISearchFilterEngine::configure()
0387 {
0388     qCDebug(category) << "Keywords Engine: Loading config...";
0389 
0390     // Load the config.
0391     KConfig config(QString::fromUtf8(name()) + QLatin1String("rc"), KConfig::NoGlobals);
0392     KConfigGroup group = config.group(QStringLiteral("General"));
0393 
0394     m_cKeywordDelimiter = group.readEntry("KeywordDelimiter", ":").at(0).toLatin1();
0395     m_bWebShortcutsEnabled = group.readEntry("EnableWebShortcuts", true);
0396     m_defaultWebShortcut = group.readEntry("DefaultWebShortcut", "duckduckgo");
0397     m_bUseOnlyPreferredWebShortcuts = group.readEntry("UsePreferredWebShortcutsOnly", false);
0398 
0399     QStringList defaultPreferredShortcuts;
0400     if (!group.hasKey("PreferredWebShortcuts")) {
0401         defaultPreferredShortcuts = KURISearchFilterEngine::defaultSearchProviders();
0402     }
0403     m_preferredWebShortcuts = group.readEntry("PreferredWebShortcuts", defaultPreferredShortcuts);
0404 
0405     // Use either a white space or a : as the keyword delimiter...
0406     if (strchr(" :", m_cKeywordDelimiter) == nullptr) {
0407         m_cKeywordDelimiter = ':';
0408     }
0409 
0410     qCDebug(category) << "Web Shortcuts Enabled: " << m_bWebShortcutsEnabled;
0411     qCDebug(category) << "Default Shortcut: " << m_defaultWebShortcut;
0412     qCDebug(category) << "Keyword Delimiter: " << m_cKeywordDelimiter;
0413     if (m_reloadRegistry) {
0414         m_registry.reload();
0415     }
0416 }
0417 
0418 SearchProviderRegistry *KURISearchFilterEngine::registry()
0419 {
0420     return &m_registry;
0421 }
0422 
0423 #include "moc_kuriikwsfiltereng_p.cpp"