File indexing completed on 2024-04-28 03:55:43
0001 /* 0002 This file is part of the KDE project 0003 SPDX-FileCopyrightText: 2002, 2003 Dawit Alemayehu <adawit@kde.org> 0004 SPDX-FileCopyrightText: 2000 Yves Arrouye <yves@realnames.com> 0005 SPDX-FileCopyrightText: 1999 Simon Hausmann <hausmann@kde.org> 0006 0007 Advanced web shortcuts: 0008 SPDX-FileCopyrightText: 2001 Andreas Hochsteger <e9625392@student.tuwien.ac.at> 0009 0010 SPDX-License-Identifier: GPL-2.0-or-later 0011 */ 0012 0013 #include "kuriikwsfiltereng_p.h" 0014 #include "searchprovider.h" 0015 0016 #include <KConfig> 0017 #include <KConfigGroup> 0018 #include <kprotocolinfo.h> 0019 0020 #include <QDBusConnection> 0021 #include <QLoggingCategory> 0022 #include <QRegularExpression> 0023 #include <QStringEncoder> 0024 0025 Q_LOGGING_CATEGORY(category, "kf.kio.urifilters.ikws", QtWarningMsg) 0026 using namespace KIO; 0027 0028 /** 0029 * IMPORTANT: If you change anything here, make sure kiowidgets-kurifiltertest-{colon,space}-separator 0030 * unit tests still pass (they're usually run as part of "make test"). 0031 */ 0032 0033 KURISearchFilterEngine::KURISearchFilterEngine() 0034 { 0035 configure(); 0036 // Only after initial load, we would want to reparse the files on config changes. 0037 // When the registry is constructed, it automatically loads the searchproviders 0038 m_reloadRegistry = true; 0039 QDBusConnection::sessionBus() 0040 .connect(QString(), QStringLiteral("/"), QStringLiteral("org.kde.KUriFilterPlugin"), QStringLiteral("configure"), this, SLOT(configure())); 0041 } 0042 0043 KURISearchFilterEngine::~KURISearchFilterEngine() = default; 0044 0045 // static 0046 QStringList KURISearchFilterEngine::defaultSearchProviders() 0047 { 0048 static const QStringList defaultProviders{QStringLiteral("google"), 0049 QStringLiteral("youtube"), 0050 QStringLiteral("yahoo"), 0051 QStringLiteral("wikipedia"), 0052 QStringLiteral("wikit")}; 0053 return defaultProviders; 0054 } 0055 0056 SearchProvider *KURISearchFilterEngine::webShortcutQuery(const QString &typedString, QString &searchTerm) const 0057 { 0058 const auto getProviderForKey = [this, &searchTerm](const QString &key) { 0059 SearchProvider *provider = nullptr; 0060 // If the key contains a : an assertion in the isKnownProtocol method would fail. This can be 0061 // the case if the delimiter is switched to space, see kiowidgets_space_separator_test 0062 if (!key.isEmpty() && (key.contains(QLatin1Char(':')) || !KProtocolInfo::isKnownProtocol(key, false))) { 0063 provider = m_registry.findByKey(key); 0064 if (provider) { 0065 if (!m_bUseOnlyPreferredWebShortcuts || m_preferredWebShortcuts.contains(provider->desktopEntryName())) { 0066 qCDebug(category) << "found provider" << provider->desktopEntryName() << "searchTerm=" << searchTerm; 0067 } else { 0068 provider = nullptr; 0069 } 0070 } 0071 } 0072 return provider; 0073 }; 0074 0075 SearchProvider *provider = nullptr; 0076 if (m_bWebShortcutsEnabled) { 0077 QString key; 0078 if (typedString.contains(QLatin1Char('!'))) { 0079 const static QRegularExpression bangRegex(QStringLiteral("!([^ ]+)")); 0080 const auto match = bangRegex.match(typedString); 0081 if (match.hasMatch() && match.lastCapturedIndex() == 1) { 0082 key = match.captured(1); 0083 searchTerm = QString(typedString).remove(bangRegex); 0084 } 0085 } 0086 0087 // If we have found a bang-match it might be unintentionally triggered, because the ! character is contained 0088 // in the query. To avoid not returning any results we check if we can find a provider for the key, if not 0089 // we clear it and try the traditional query syntax, see https://bugs.kde.org/show_bug.cgi?id=437660 0090 if (!key.isEmpty()) { 0091 provider = getProviderForKey(key); 0092 if (!provider) { 0093 key.clear(); 0094 } 0095 } 0096 if (key.isEmpty()) { 0097 const int pos = typedString.indexOf(QLatin1Char(m_cKeywordDelimiter)); 0098 if (pos > -1) { 0099 key = typedString.left(pos).toLower(); // #169801 0100 searchTerm = typedString.mid(pos + 1); 0101 } else if (!typedString.isEmpty() && m_cKeywordDelimiter == ' ') { 0102 key = typedString; 0103 searchTerm = typedString.mid(pos + 1); 0104 } 0105 provider = getProviderForKey(key); 0106 } 0107 0108 qCDebug(category) << "m_cKeywordDelimiter=" << QLatin1Char(m_cKeywordDelimiter) << "key=" << key << "typedString=" << typedString; 0109 } 0110 0111 return provider; 0112 } 0113 0114 SearchProvider *KURISearchFilterEngine::autoWebSearchQuery(const QString &typedString, const QString &defaultShortcut) const 0115 { 0116 SearchProvider *provider = nullptr; 0117 const QString defaultSearchProvider = (m_defaultWebShortcut.isEmpty() ? defaultShortcut : m_defaultWebShortcut); 0118 0119 if (m_bWebShortcutsEnabled && !defaultSearchProvider.isEmpty()) { 0120 // Make sure we ignore supported protocols, e.g. "smb:", "http:" 0121 const int pos = typedString.indexOf(QLatin1Char(':')); 0122 0123 if (pos == -1 || !KProtocolInfo::isKnownProtocol(typedString.left(pos), false)) { 0124 provider = m_registry.findByDesktopName(defaultSearchProvider); 0125 } 0126 } 0127 0128 return provider; 0129 } 0130 0131 QByteArray KURISearchFilterEngine::name() const 0132 { 0133 return "kuriikwsfilter"; 0134 } 0135 0136 char KURISearchFilterEngine::keywordDelimiter() const 0137 { 0138 return m_cKeywordDelimiter; 0139 } 0140 0141 QString KURISearchFilterEngine::defaultSearchEngine() const 0142 { 0143 return m_defaultWebShortcut; 0144 } 0145 0146 QStringList KURISearchFilterEngine::favoriteEngineList() const 0147 { 0148 return m_preferredWebShortcuts; 0149 } 0150 0151 KURISearchFilterEngine *KURISearchFilterEngine::self() 0152 { 0153 static KURISearchFilterEngine self; 0154 return &self; 0155 } 0156 0157 QStringList KURISearchFilterEngine::modifySubstitutionMap(SubstMap &map, const QString &query) const 0158 { 0159 // Returns the number of query words 0160 QString userquery = query; 0161 0162 // Do some pre-encoding, before we can start the work: 0163 { 0164 const static QRegularExpression qsexpr(QStringLiteral("\\\"[^\\\"]*\\\"")); 0165 // Temporarily substitute spaces in quoted strings (" " -> "%20") 0166 // Needed to split user query into StringList correctly. 0167 int start = 0; 0168 QRegularExpressionMatch match; 0169 while ((match = qsexpr.match(userquery, start)).hasMatch()) { 0170 QString str = match.captured(0); 0171 str.replace(QLatin1Char(' '), QLatin1String("%20")); 0172 userquery.replace(match.capturedStart(0), match.capturedLength(0), str); 0173 start = match.capturedStart(0) + str.size(); // Move after last quote 0174 } 0175 } 0176 0177 // Split user query between spaces: 0178 QStringList l = userquery.simplified().split(QLatin1Char(' '), Qt::SkipEmptyParts); 0179 0180 // Back-substitute quoted strings (%20 -> " "): 0181 userquery.replace(QLatin1String("%20"), QLatin1String(" ")); 0182 l.replaceInStrings(QStringLiteral("%20"), QStringLiteral(" ")); 0183 0184 qCDebug(category) << "Generating substitution map:\n"; 0185 // Generate substitution map from user query: 0186 for (int i = 0; i <= l.count(); i++) { 0187 int pos = 0; 0188 QString v; 0189 0190 // Add whole user query (\{0}) to substitution map: 0191 if (i == 0) { 0192 v = userquery; 0193 } 0194 // Add partial user query items to substitution map: 0195 else { 0196 v = l[i - 1]; 0197 } 0198 0199 // Insert partial queries (referenced by \1 ... \n) to map: 0200 map.insert(QString::number(i), v); 0201 0202 // Insert named references (referenced by \name) to map: 0203 if ((i > 0) && (pos = v.indexOf(QLatin1Char('='))) > 0) { 0204 QString s = v.mid(pos + 1); 0205 QString k = v.left(pos); 0206 0207 // Back-substitute references contained in references (e.g. '\refname' substitutes to 'thisquery=\0') 0208 s.replace(QLatin1String("%5C"), QLatin1String("\\")); 0209 map.insert(k, s); 0210 } 0211 } 0212 0213 return l; 0214 } 0215 0216 static QString encodeString(const QString &s, QStringEncoder &codec) 0217 { 0218 // we encode all characters, including the space character BUG: 304276 0219 QByteArray encoded = QByteArray(codec.encode(s)).toPercentEncoding(); 0220 return QString::fromUtf8(encoded); 0221 } 0222 0223 QString KURISearchFilterEngine::substituteQuery(const QString &url, SubstMap &map, const QString &userquery, QStringEncoder &codec) const 0224 { 0225 QString newurl = url; 0226 QStringList ql = modifySubstitutionMap(map, userquery); 0227 const int count = ql.count(); 0228 0229 // Substitute references (\{ref1,ref2,...}) with values from user query: 0230 { 0231 const static QRegularExpression reflistRe(QStringLiteral("\\\\\\{([^\\}]+)\\}")); 0232 // Substitute reflists (\{ref1,ref2,...}): 0233 int start = 0; 0234 QRegularExpressionMatch match; 0235 while ((match = reflistRe.match(newurl, start)).hasMatch()) { 0236 bool found = false; 0237 0238 // bool rest = false; 0239 QString v; 0240 const QString rlstring = match.captured(1); 0241 0242 // \{@} gets a special treatment later 0243 if (rlstring == QLatin1String("@")) { 0244 v = QStringLiteral("\\@"); 0245 found = true; 0246 } 0247 0248 // TODO: strip whitespaces around commas 0249 const QStringList refList = rlstring.split(QLatin1Char(','), Qt::SkipEmptyParts); 0250 0251 for (const QString &rlitem : refList) { 0252 if (found) { 0253 break; 0254 } 0255 0256 const static QRegularExpression rangeRe(QStringLiteral("([0-9]*)\\-([0-9]*)")); 0257 const QRegularExpressionMatch rangeMatch = rangeRe.match(rlitem); 0258 // Substitute a range of keywords 0259 if (rangeMatch.hasMatch()) { 0260 int first = rangeMatch.captured(1).toInt(); 0261 int last = rangeMatch.captured(2).toInt(); 0262 0263 if (first == 0) { 0264 first = 1; 0265 } 0266 0267 if (last == 0) { 0268 last = count; 0269 } 0270 0271 for (int i = first; i <= last; i++) { 0272 v += map[QString::number(i)] + QLatin1Char(' '); 0273 // Remove used value from ql (needed for \{@}): 0274 ql[i - 1].clear(); 0275 } 0276 0277 v = v.trimmed(); 0278 if (!v.isEmpty()) { 0279 found = true; 0280 } 0281 0282 v = encodeString(v, codec); 0283 } else if (rlitem.startsWith(QLatin1Char('\"')) && rlitem.endsWith(QLatin1Char('\"'))) { 0284 // Use default string from query definition: 0285 found = true; 0286 QString s = rlitem.mid(1, rlitem.length() - 2); 0287 v = encodeString(s, codec); 0288 } else if (map.contains(rlitem)) { 0289 // Use value from substitution map: 0290 found = true; 0291 v = encodeString(map[rlitem], codec); 0292 0293 // Remove used value from ql (needed for \{@}): 0294 const QChar c = rlitem.at(0); // rlitem can't be empty at this point 0295 if (c == QLatin1Char('0')) { 0296 // It's a numeric reference to '0' 0297 for (QStringList::Iterator it = ql.begin(); it != ql.end(); ++it) { 0298 (*it).clear(); 0299 } 0300 } else if ((c >= QLatin1String("0")) && (c <= QLatin1String("9"))) { // krazy:excludeall=doublequote_chars 0301 // It's a numeric reference > '0' 0302 int n = rlitem.toInt(); 0303 ql[n - 1].clear(); 0304 } else { 0305 // It's a alphanumeric reference 0306 QStringList::Iterator it = ql.begin(); 0307 while ((it != ql.end()) && !it->startsWith(rlitem + QLatin1Char('='))) { 0308 ++it; 0309 } 0310 if (it != ql.end()) { 0311 it->clear(); 0312 } 0313 } 0314 0315 // Encode '+', otherwise it would be interpreted as space in the resulting url: 0316 v.replace(QLatin1Char('+'), QLatin1String("%2B")); 0317 } else if (rlitem == QLatin1String("@")) { 0318 v = QStringLiteral("\\@"); 0319 } 0320 } 0321 0322 newurl.replace(match.capturedStart(0), match.capturedLength(0), v); 0323 start = match.capturedStart(0) + v.size(); 0324 } 0325 0326 // Special handling for \{@}; 0327 { 0328 // Generate list of unmatched strings: 0329 QString v = ql.join(QLatin1Char(' ')).simplified(); 0330 v = encodeString(v, codec); 0331 0332 // Substitute \{@} with list of unmatched query strings 0333 newurl.replace(QLatin1String("\\@"), v); 0334 } 0335 } 0336 0337 return newurl; 0338 } 0339 0340 QUrl KURISearchFilterEngine::formatResult(const QString &url, const QString &cset1, const QString &cset2, const QString &query, bool isMalformed) const 0341 { 0342 SubstMap map; 0343 return formatResult(url, cset1, cset2, query, isMalformed, map); 0344 } 0345 0346 QUrl KURISearchFilterEngine::formatResult(const QString &url, 0347 const QString &cset1, 0348 const QString &cset2, 0349 const QString &userquery, 0350 bool /* isMalformed */, 0351 SubstMap &map) const 0352 { 0353 // Return nothing if userquery is empty and it contains 0354 // substitution strings... 0355 if (userquery.isEmpty() && url.indexOf(QLatin1String("\\{")) > 0) { 0356 return QUrl(); 0357 } 0358 0359 // Create a codec for the desired encoding so that we can transcode the user's "url". 0360 QString cseta = cset1; 0361 if (cseta.isEmpty()) { 0362 cseta = QStringLiteral("UTF-8"); 0363 } 0364 0365 QStringEncoder csetacodec(cseta.toLatin1().constData()); 0366 if (!csetacodec.isValid()) { 0367 cseta = QStringLiteral("UTF-8"); 0368 csetacodec = QStringEncoder(QStringEncoder::Utf8); 0369 } 0370 0371 // Add charset indicator for the query to substitution map: 0372 map.insert(QStringLiteral("ikw_charset"), cseta); 0373 0374 // Add charset indicator for the fallback query to substitution map: 0375 QString csetb = cset2; 0376 if (csetb.isEmpty()) { 0377 csetb = QStringLiteral("UTF-8"); 0378 } 0379 map.insert(QStringLiteral("wsc_charset"), csetb); 0380 0381 QString newurl = substituteQuery(url, map, userquery, csetacodec); 0382 0383 return QUrl(newurl, QUrl::StrictMode); 0384 } 0385 0386 void KURISearchFilterEngine::configure() 0387 { 0388 qCDebug(category) << "Keywords Engine: Loading config..."; 0389 0390 // Load the config. 0391 KConfig config(QString::fromUtf8(name()) + QLatin1String("rc"), KConfig::NoGlobals); 0392 KConfigGroup group = config.group(QStringLiteral("General")); 0393 0394 m_cKeywordDelimiter = group.readEntry("KeywordDelimiter", ":").at(0).toLatin1(); 0395 m_bWebShortcutsEnabled = group.readEntry("EnableWebShortcuts", true); 0396 m_defaultWebShortcut = group.readEntry("DefaultWebShortcut", "duckduckgo"); 0397 m_bUseOnlyPreferredWebShortcuts = group.readEntry("UsePreferredWebShortcutsOnly", false); 0398 0399 QStringList defaultPreferredShortcuts; 0400 if (!group.hasKey("PreferredWebShortcuts")) { 0401 defaultPreferredShortcuts = KURISearchFilterEngine::defaultSearchProviders(); 0402 } 0403 m_preferredWebShortcuts = group.readEntry("PreferredWebShortcuts", defaultPreferredShortcuts); 0404 0405 // Use either a white space or a : as the keyword delimiter... 0406 if (strchr(" :", m_cKeywordDelimiter) == nullptr) { 0407 m_cKeywordDelimiter = ':'; 0408 } 0409 0410 qCDebug(category) << "Web Shortcuts Enabled: " << m_bWebShortcutsEnabled; 0411 qCDebug(category) << "Default Shortcut: " << m_defaultWebShortcut; 0412 qCDebug(category) << "Keyword Delimiter: " << m_cKeywordDelimiter; 0413 if (m_reloadRegistry) { 0414 m_registry.reload(); 0415 } 0416 } 0417 0418 SearchProviderRegistry *KURISearchFilterEngine::registry() 0419 { 0420 return &m_registry; 0421 } 0422 0423 #include "moc_kuriikwsfiltereng_p.cpp"