File indexing completed on 2024-05-12 11:54:37
0001 /* 0002 This file is part of the KDE project 0003 SPDX-FileCopyrightText: 2002, 2003 Dawit Alemayehu <adawit@kde.org> 0004 SPDX-FileCopyrightText: 2000 Yves Arrouye <yves@realnames.com> 0005 SPDX-FileCopyrightText: 1999 Simon Hausmann <hausmann@kde.org> 0006 0007 Advanced web shortcuts: 0008 SPDX-FileCopyrightText: 2001 Andreas Hochsteger <e9625392@student.tuwien.ac.at> 0009 0010 SPDX-License-Identifier: GPL-2.0-or-later 0011 */ 0012 0013 #include "kuriikwsfiltereng.h" 0014 #include "searchprovider.h" 0015 0016 #include <KConfig> 0017 #include <KConfigGroup> 0018 #include <kprotocolinfo.h> 0019 0020 #include <QLoggingCategory> 0021 #include <QRegularExpression> 0022 #include <QTextCodec> 0023 0024 namespace 0025 { 0026 Q_LOGGING_CATEGORY(category, "kf.kio.urifilters.ikws", QtWarningMsg) 0027 } 0028 0029 static void kuriikws_debug(const QString &n, const QString &v) 0030 { 0031 qCDebug(category) << n << " = '" << v << "'"; 0032 } 0033 0034 /** 0035 * IMPORTANT: If you change anything here, make sure kiowidgets-kurifiltertest-{colon,space}-separator 0036 * unit tests still pass (they're usually run as part of "make test"). 0037 */ 0038 0039 KURISearchFilterEngine::KURISearchFilterEngine() 0040 { 0041 loadConfig(); 0042 } 0043 0044 KURISearchFilterEngine::~KURISearchFilterEngine() 0045 { 0046 } 0047 0048 // static 0049 QStringList KURISearchFilterEngine::defaultSearchProviders() 0050 { 0051 static const QStringList defaultProviders{QStringLiteral("google"), 0052 QStringLiteral("youtube"), 0053 QStringLiteral("yahoo"), 0054 QStringLiteral("wikipedia"), 0055 QStringLiteral("wikit")}; 0056 return defaultProviders; 0057 } 0058 0059 SearchProvider *KURISearchFilterEngine::webShortcutQuery(const QString &typedString, QString &searchTerm) const 0060 { 0061 const auto getProviderForKey = [this, &searchTerm](const QString &key) { 0062 SearchProvider *provider = nullptr; 0063 // If the key contains a : an assertion in the isKnownProtocol method would fail. This can be 0064 // the case if the delimiter is switched to space, see kiowidgets_space_separator_test 0065 if (!key.isEmpty() && (key.contains(QLatin1Char(':')) || !KProtocolInfo::isKnownProtocol(key))) { 0066 provider = m_registry.findByKey(key); 0067 if (provider) { 0068 if (!m_bUseOnlyPreferredWebShortcuts || m_preferredWebShortcuts.contains(provider->desktopEntryName())) { 0069 qCDebug(category) << "found provider" << provider->desktopEntryName() << "searchTerm=" << searchTerm; 0070 } else { 0071 provider = nullptr; 0072 } 0073 } 0074 } 0075 return provider; 0076 }; 0077 0078 SearchProvider *provider = nullptr; 0079 if (m_bWebShortcutsEnabled) { 0080 QString key; 0081 if (typedString.contains(QLatin1Char('!'))) { 0082 const static QRegularExpression bangRegex(QStringLiteral("!([^ ]+)")); 0083 const auto match = bangRegex.match(typedString); 0084 if (match.hasMatch() && match.lastCapturedIndex() == 1) { 0085 key = match.captured(1); 0086 searchTerm = QString(typedString).remove(bangRegex); 0087 } 0088 } 0089 0090 // If we have found a bang-match it might be unintentionally triggered, because the ! character is contained 0091 // in the query. To avoid not returning any results we check if we can find a provider for the key, if not 0092 // we clear it and try the traditional query syntax, see https://bugs.kde.org/show_bug.cgi?id=437660 0093 if (!key.isEmpty()) { 0094 provider = getProviderForKey(key); 0095 if (!provider) { 0096 key.clear(); 0097 } 0098 } 0099 if (key.isEmpty()) { 0100 const int pos = typedString.indexOf(QLatin1Char(m_cKeywordDelimiter)); 0101 if (pos > -1) { 0102 key = typedString.left(pos).toLower(); // #169801 0103 searchTerm = typedString.mid(pos + 1); 0104 } else if (!typedString.isEmpty() && m_cKeywordDelimiter == ' ') { 0105 key = typedString; 0106 searchTerm = typedString.mid(pos + 1); 0107 } 0108 provider = getProviderForKey(key); 0109 } 0110 0111 qCDebug(category) << "m_cKeywordDelimiter=" << QLatin1Char(m_cKeywordDelimiter) << "key=" << key << "typedString=" << typedString; 0112 } 0113 0114 return provider; 0115 } 0116 0117 SearchProvider *KURISearchFilterEngine::autoWebSearchQuery(const QString &typedString, const QString &defaultShortcut) const 0118 { 0119 SearchProvider *provider = nullptr; 0120 const QString defaultSearchProvider = (m_defaultWebShortcut.isEmpty() ? defaultShortcut : m_defaultWebShortcut); 0121 0122 if (m_bWebShortcutsEnabled && !defaultSearchProvider.isEmpty()) { 0123 // Make sure we ignore supported protocols, e.g. "smb:", "http:" 0124 const int pos = typedString.indexOf(QLatin1Char(':')); 0125 0126 if (pos == -1 || !KProtocolInfo::isKnownProtocol(typedString.left(pos))) { 0127 provider = m_registry.findByDesktopName(defaultSearchProvider); 0128 } 0129 } 0130 0131 return provider; 0132 } 0133 0134 QByteArray KURISearchFilterEngine::name() const 0135 { 0136 return "kuriikwsfilter"; 0137 } 0138 0139 char KURISearchFilterEngine::keywordDelimiter() const 0140 { 0141 return m_cKeywordDelimiter; 0142 } 0143 0144 QString KURISearchFilterEngine::defaultSearchEngine() const 0145 { 0146 return m_defaultWebShortcut; 0147 } 0148 0149 QStringList KURISearchFilterEngine::favoriteEngineList() const 0150 { 0151 return m_preferredWebShortcuts; 0152 } 0153 0154 Q_GLOBAL_STATIC(KURISearchFilterEngine, sSelfPtr) 0155 0156 KURISearchFilterEngine *KURISearchFilterEngine::self() 0157 { 0158 return sSelfPtr; 0159 } 0160 0161 QStringList KURISearchFilterEngine::modifySubstitutionMap(SubstMap &map, const QString &query) const 0162 { 0163 // Returns the number of query words 0164 QString userquery = query; 0165 0166 // Do some pre-encoding, before we can start the work: 0167 { 0168 const QRegularExpression qsexpr(QStringLiteral("\\\"[^\\\"]*\\\"")); 0169 // Temporarily substitute spaces in quoted strings (" " -> "%20") 0170 // Needed to split user query into StringList correctly. 0171 int start = 0; 0172 QRegularExpressionMatch match; 0173 while ((match = qsexpr.match(userquery, start)).hasMatch()) { 0174 QString str = match.captured(0); 0175 str.replace(QLatin1Char(' '), QLatin1String("%20")); 0176 userquery.replace(match.capturedStart(0), match.capturedLength(0), str); 0177 start = match.capturedStart(0) + str.size(); // Move after last quote 0178 } 0179 } 0180 0181 // Split user query between spaces: 0182 QStringList l = userquery.simplified().split(QLatin1Char(' '), Qt::SkipEmptyParts); 0183 0184 // Back-substitute quoted strings (%20 -> " "): 0185 userquery.replace(QLatin1String("%20"), QLatin1String(" ")); 0186 l.replaceInStrings(QStringLiteral("%20"), QStringLiteral(" ")); 0187 0188 qCDebug(category) << "Generating substitution map:\n"; 0189 // Generate substitution map from user query: 0190 for (int i = 0; i <= l.count(); i++) { 0191 int pos = 0; 0192 QString v; 0193 QString nr = QString::number(i); 0194 0195 // Add whole user query (\{0}) to substitution map: 0196 if (i == 0) { 0197 v = userquery; 0198 } 0199 // Add partial user query items to substitution map: 0200 else { 0201 v = l[i - 1]; 0202 } 0203 0204 // Insert partial queries (referenced by \1 ... \n) to map: 0205 map.insert(QString::number(i), v); 0206 kuriikws_debug(QLatin1String(" map['") + nr + QLatin1String("']"), map[nr]); 0207 0208 // Insert named references (referenced by \name) to map: 0209 if ((i > 0) && (pos = v.indexOf(QLatin1Char('='))) > 0) { 0210 QString s = v.mid(pos + 1); 0211 QString k = v.left(pos); 0212 0213 // Back-substitute references contained in references (e.g. '\refname' substitutes to 'thisquery=\0') 0214 s.replace(QLatin1String("%5C"), QLatin1String("\\")); 0215 map.insert(k, s); 0216 kuriikws_debug(QLatin1String(" map['") + k + QLatin1String("']"), map[k]); 0217 } 0218 } 0219 0220 return l; 0221 } 0222 0223 static QString encodeString(const QString &s, QTextCodec *codec) 0224 { 0225 // we encode all characters, including the space character BUG: 304276 0226 QByteArray encoded = codec->fromUnicode(s).toPercentEncoding(); 0227 return QString::fromUtf8(encoded); 0228 } 0229 0230 QString KURISearchFilterEngine::substituteQuery(const QString &url, SubstMap &map, const QString &userquery, QTextCodec *codec) const 0231 { 0232 QString newurl = url; 0233 QStringList ql = modifySubstitutionMap(map, userquery); 0234 const int count = ql.count(); 0235 0236 // Check, if old style '\1' is found and replace it with \{@} (compatibility mode): 0237 { 0238 int pos = -1; 0239 if ((pos = newurl.indexOf(QLatin1String("\\1"))) >= 0) { 0240 qCWarning(category) << "WARNING: Using compatibility mode for newurl='" << newurl 0241 << "'. Please replace old style '\\1' with new style '\\{0}' " 0242 "in the query definition.\n"; 0243 newurl.replace(pos, 2, QStringLiteral("\\{@}")); 0244 } 0245 } 0246 0247 qCDebug(category) << "Substitute references:\n"; 0248 // Substitute references (\{ref1,ref2,...}) with values from user query: 0249 { 0250 const QRegularExpression reflistRe(QStringLiteral("\\\\\\{([^\\}]+)\\}")); 0251 // Substitute reflists (\{ref1,ref2,...}): 0252 int start = 0; 0253 QRegularExpressionMatch match; 0254 while ((match = reflistRe.match(newurl, start)).hasMatch()) { 0255 bool found = false; 0256 0257 // bool rest = false; 0258 QString v; 0259 const QString rlstring = match.captured(1); 0260 kuriikws_debug(QStringLiteral(" reference list"), rlstring); 0261 0262 // \{@} gets a special treatment later 0263 if (rlstring == QLatin1String("@")) { 0264 v = QStringLiteral("\\@"); 0265 found = true; 0266 } 0267 0268 // TODO: strip whitespaces around commas 0269 const QStringList refList = rlstring.split(QLatin1Char(','), Qt::SkipEmptyParts); 0270 0271 for (const QString &rlitem : refList) { 0272 if (found) { 0273 break; 0274 } 0275 0276 const QRegularExpression rangeRe(QStringLiteral("([0-9]*)\\-([0-9]*)")); 0277 const QRegularExpressionMatch rangeMatch = rangeRe.match(rlitem); 0278 // Substitute a range of keywords 0279 if (rangeMatch.hasMatch()) { 0280 int first = rangeMatch.captured(1).toInt(); 0281 int last = rangeMatch.captured(2).toInt(); 0282 0283 if (first == 0) { 0284 first = 1; 0285 } 0286 0287 if (last == 0) { 0288 last = count; 0289 } 0290 0291 for (int i = first; i <= last; i++) { 0292 v += map[QString::number(i)] + QLatin1Char(' '); 0293 // Remove used value from ql (needed for \{@}): 0294 ql[i - 1].clear(); 0295 } 0296 0297 v = v.trimmed(); 0298 if (!v.isEmpty()) { 0299 found = true; 0300 } 0301 0302 kuriikws_debug(QStringLiteral(" range"), 0303 QString::number(first) + QLatin1Char('-') + QString::number(last) + QLatin1String(" => '") + v + QLatin1Char('\'')); 0304 v = encodeString(v, codec); 0305 } else if (rlitem.startsWith(QLatin1Char('\"')) && rlitem.endsWith(QLatin1Char('\"'))) { 0306 // Use default string from query definition: 0307 found = true; 0308 QString s = rlitem.mid(1, rlitem.length() - 2); 0309 v = encodeString(s, codec); 0310 kuriikws_debug(QStringLiteral(" default"), s); 0311 } else if (map.contains(rlitem)) { 0312 // Use value from substitution map: 0313 found = true; 0314 kuriikws_debug(QLatin1String(" map['") + rlitem + QLatin1String("']"), map[rlitem]); 0315 v = encodeString(map[rlitem], codec); 0316 0317 // Remove used value from ql (needed for \{@}): 0318 const QChar c = rlitem.at(0); // rlitem can't be empty at this point 0319 if (c == QLatin1Char('0')) { 0320 // It's a numeric reference to '0' 0321 for (QStringList::Iterator it = ql.begin(); it != ql.end(); ++it) { 0322 (*it).clear(); 0323 } 0324 } else if ((c >= QLatin1String("0")) && (c <= QLatin1String("9"))) { // krazy:excludeall=doublequote_chars 0325 // It's a numeric reference > '0' 0326 int n = rlitem.toInt(); 0327 ql[n - 1].clear(); 0328 } else { 0329 // It's a alphanumeric reference 0330 QStringList::Iterator it = ql.begin(); 0331 while ((it != ql.end()) && !it->startsWith(rlitem + QLatin1Char('='))) { 0332 ++it; 0333 } 0334 if (it != ql.end()) { 0335 it->clear(); 0336 } 0337 } 0338 0339 // Encode '+', otherwise it would be interpreted as space in the resulting url: 0340 v.replace(QLatin1Char('+'), QLatin1String("%2B")); 0341 } else if (rlitem == QLatin1String("@")) { 0342 v = QStringLiteral("\\@"); 0343 kuriikws_debug(QStringLiteral(" v"), v); 0344 } 0345 } 0346 0347 newurl.replace(match.capturedStart(0), match.capturedLength(0), v); 0348 start = match.capturedStart(0) + v.size(); 0349 } 0350 0351 // Special handling for \{@}; 0352 { 0353 kuriikws_debug(QStringLiteral(" newurl"), newurl); 0354 // Generate list of unmatched strings: 0355 QString v = ql.join(QLatin1Char(' ')).simplified(); 0356 0357 kuriikws_debug(QStringLiteral(" rest"), v); 0358 v = encodeString(v, codec); 0359 0360 // Substitute \{@} with list of unmatched query strings 0361 newurl.replace(QLatin1String("\\@"), v); 0362 } 0363 } 0364 0365 return newurl; 0366 } 0367 0368 QUrl KURISearchFilterEngine::formatResult(const QString &url, const QString &cset1, const QString &cset2, const QString &query, bool isMalformed) const 0369 { 0370 SubstMap map; 0371 return formatResult(url, cset1, cset2, query, isMalformed, map); 0372 } 0373 0374 QUrl KURISearchFilterEngine::formatResult(const QString &url, 0375 const QString &cset1, 0376 const QString &cset2, 0377 const QString &userquery, 0378 bool /* isMalformed */, 0379 SubstMap &map) const 0380 { 0381 // Return nothing if userquery is empty and it contains 0382 // substitution strings... 0383 if (userquery.isEmpty() && url.indexOf(QLatin1String("\\{")) > 0) { 0384 return QUrl(); 0385 } 0386 0387 // Debug info of map: 0388 if (!map.isEmpty()) { 0389 qCDebug(category) << "Got non-empty substitution map:\n"; 0390 for (SubstMap::Iterator it = map.begin(); it != map.end(); ++it) { 0391 kuriikws_debug(QLatin1String(" map['") + it.key() + QLatin1String("']"), it.value()); 0392 } 0393 } 0394 0395 // Create a codec for the desired encoding so that we can transcode the user's "url". 0396 QString cseta = cset1; 0397 if (cseta.isEmpty()) { 0398 cseta = QStringLiteral("UTF-8"); 0399 } 0400 0401 QTextCodec *csetacodec = QTextCodec::codecForName(cseta.toLatin1()); 0402 if (!csetacodec) { 0403 cseta = QStringLiteral("UTF-8"); 0404 csetacodec = QTextCodec::codecForName(cseta.toLatin1()); 0405 } 0406 0407 kuriikws_debug(QStringLiteral("user query"), userquery); 0408 kuriikws_debug(QStringLiteral("query definition"), url); 0409 0410 // Add charset indicator for the query to substitution map: 0411 map.insert(QStringLiteral("ikw_charset"), cseta); 0412 0413 // Add charset indicator for the fallback query to substitution map: 0414 QString csetb = cset2; 0415 if (csetb.isEmpty()) { 0416 csetb = QStringLiteral("UTF-8"); 0417 } 0418 map.insert(QStringLiteral("wsc_charset"), csetb); 0419 0420 QString newurl = substituteQuery(url, map, userquery, csetacodec); 0421 0422 kuriikws_debug(QStringLiteral("substituted query"), newurl); 0423 0424 return QUrl(newurl, QUrl::StrictMode); 0425 } 0426 0427 void KURISearchFilterEngine::loadConfig() 0428 { 0429 qCDebug(category) << "Keywords Engine: Loading config..."; 0430 0431 // Load the config. 0432 KConfig config(QString::fromUtf8(name()) + QLatin1String("rc"), KConfig::NoGlobals); 0433 KConfigGroup group = config.group("General"); 0434 0435 m_cKeywordDelimiter = QString(group.readEntry("KeywordDelimiter", ":")).at(0).toLatin1(); 0436 m_bWebShortcutsEnabled = group.readEntry("EnableWebShortcuts", true); 0437 m_defaultWebShortcut = group.readEntry("DefaultWebShortcut", "duckduckgo"); 0438 m_bUseOnlyPreferredWebShortcuts = group.readEntry("UsePreferredWebShortcutsOnly", false); 0439 0440 QStringList defaultPreferredShortcuts; 0441 if (!group.hasKey("PreferredWebShortcuts")) { 0442 defaultPreferredShortcuts = KURISearchFilterEngine::defaultSearchProviders(); 0443 } 0444 m_preferredWebShortcuts = group.readEntry("PreferredWebShortcuts", defaultPreferredShortcuts); 0445 0446 // Use either a white space or a : as the keyword delimiter... 0447 if (strchr(" :", m_cKeywordDelimiter) == nullptr) { 0448 m_cKeywordDelimiter = ':'; 0449 } 0450 0451 qCDebug(category) << "Web Shortcuts Enabled: " << m_bWebShortcutsEnabled; 0452 qCDebug(category) << "Default Shortcut: " << m_defaultWebShortcut; 0453 qCDebug(category) << "Keyword Delimiter: " << m_cKeywordDelimiter; 0454 m_registry.reload(); 0455 } 0456 0457 SearchProviderRegistry *KURISearchFilterEngine::registry() 0458 { 0459 return &m_registry; 0460 }