File indexing completed on 2024-05-12 04:57:50
0001 /* ============================================================ 0002 * Falkon - Qt web browser 0003 * Copyright (C) 2010-2017 David Rosca <nowrep@gmail.com> 0004 * 0005 * This program is free software: you can redistribute it and/or modify 0006 * it under the terms of the GNU General Public License as published by 0007 * the Free Software Foundation, either version 3 of the License, or 0008 * (at your option) any later version. 0009 * 0010 * This program is distributed in the hope that it will be useful, 0011 * but WITHOUT ANY WARRANTY; without even the implied warranty of 0012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 0013 * GNU General Public License for more details. 0014 * 0015 * You should have received a copy of the GNU General Public License 0016 * along with this program. If not, see <http://www.gnu.org/licenses/>. 0017 * ============================================================ */ 0018 /** 0019 * Copyright (c) 2009, Zsombor Gegesy <gzsombor@gmail.com> 0020 * Copyright (c) 2009, Benjamin C. Meyer <ben@meyerhome.net> 0021 * 0022 * Redistribution and use in source and binary forms, with or without 0023 * modification, are permitted provided that the following conditions 0024 * are met: 0025 * 1. Redistributions of source code must retain the above copyright 0026 * notice, this list of conditions and the following disclaimer. 0027 * 2. Redistributions in binary form must reproduce the above copyright 0028 * notice, this list of conditions and the following disclaimer in the 0029 * documentation and/or other materials provided with the distribution. 0030 * 3. Neither the name of the Benjamin Meyer nor the names of its contributors 0031 * may be used to endorse or promote products derived from this software 0032 * without specific prior written permission. 0033 * 0034 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 0035 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 0036 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 0037 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 0038 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 0039 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 0040 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 0041 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 0042 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 0043 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 0044 * SUCH DAMAGE. 0045 */ 0046 0047 #include "adblockrule.h" 0048 #include "adblocksubscription.h" 0049 #include "qztools.h" 0050 0051 #include <QUrl> 0052 #include <QString> 0053 #include <QWebEnginePage> 0054 #include <QWebEngineUrlRequestInfo> 0055 0056 /* TODO Qt6 Replace with PUBLIC API */ 0057 #include <QtCore/private/qurl_p.h> 0058 #include <QtNetwork/private/qtldurl_p.h> 0059 0060 static QString getTopLevelDomain(const QUrl &url) 0061 { 0062 // QUrl::topLevelDomain() was removed in Qt6. 0063 // The following is copied from the old "qTopLevelDomain" code in Qt6::Network. 0064 // It was removed in this commit: https://github.com/qt/qtbase/commit/50b30976837be0969efdccced68cfb584d99981a 0065 const QString domainLower = url.host().toLower(); 0066 QVector<QStringView> sections = QStringView{domainLower}.split(QLatin1Char('.'), Qt::SkipEmptyParts); 0067 if (sections.isEmpty()) 0068 return QString(); 0069 0070 QString level, tld; 0071 for (int j = sections.count() - 1; j >= 0; --j) { 0072 level.prepend(QLatin1Char('.') + sections.at(j)); 0073 if (qIsEffectiveTLD(QStringView{level}.right(level.size() - 1))) 0074 tld = level; 0075 } 0076 0077 //return qt_ACE_do(tld, ToAceOnly, AllowLeadingDot, {}); 0078 // TODO QT6 - QUrl::toAce() uses ForbidLeadingDot, while the old QUrl::topLevelDomain() used AllowLeadingDot. Does this matter? 0079 return QString(QString::fromUtf8(QUrl::toAce(tld))); 0080 } 0081 0082 static QString toSecondLevelDomain(const QUrl &url) 0083 { 0084 const QString topLevelDomain = getTopLevelDomain(url); 0085 const QString urlHost = url.host(); 0086 0087 if (topLevelDomain.isEmpty() || urlHost.isEmpty()) { 0088 return {}; 0089 } 0090 0091 QString domain = urlHost.left(urlHost.size() - topLevelDomain.size()); 0092 0093 if (domain.count(QL1C('.')) == 0) { 0094 return urlHost; 0095 } 0096 0097 while (domain.count(QL1C('.')) != 0) { 0098 domain = domain.mid(domain.indexOf(QL1C('.')) + 1); 0099 } 0100 0101 return domain + topLevelDomain; 0102 } 0103 0104 AdBlockRule::AdBlockRule(const QString &filter, AdBlockSubscription* subscription) 0105 : m_subscription(subscription) 0106 , m_type(StringContainsMatchRule) 0107 , m_caseSensitivity(Qt::CaseInsensitive) 0108 , m_isEnabled(true) 0109 , m_isException(false) 0110 , m_isInternalDisabled(false) 0111 , m_regExp(nullptr) 0112 { 0113 setFilter(filter); 0114 } 0115 0116 AdBlockRule::~AdBlockRule() 0117 { 0118 delete m_regExp; 0119 } 0120 0121 AdBlockRule* AdBlockRule::copy() const 0122 { 0123 auto* rule = new AdBlockRule(); 0124 rule->m_subscription = m_subscription; 0125 rule->m_type = m_type; 0126 rule->m_options = m_options; 0127 rule->m_exceptions = m_exceptions; 0128 rule->m_filter = m_filter; 0129 rule->m_matchString = m_matchString; 0130 rule->m_caseSensitivity = m_caseSensitivity; 0131 rule->m_isEnabled = m_isEnabled; 0132 rule->m_isException = m_isException; 0133 rule->m_isInternalDisabled = m_isInternalDisabled; 0134 rule->m_allowedDomains = m_allowedDomains; 0135 rule->m_blockedDomains = m_blockedDomains; 0136 0137 if (m_regExp) { 0138 rule->m_regExp = new RegExp; 0139 rule->m_regExp->regExp = m_regExp->regExp; 0140 rule->m_regExp->matchers = m_regExp->matchers; 0141 } 0142 0143 return rule; 0144 } 0145 0146 AdBlockSubscription* AdBlockRule::subscription() const 0147 { 0148 return m_subscription; 0149 } 0150 0151 void AdBlockRule::setSubscription(AdBlockSubscription* subscription) 0152 { 0153 m_subscription = subscription; 0154 } 0155 0156 QString AdBlockRule::filter() const 0157 { 0158 return m_filter; 0159 } 0160 0161 void AdBlockRule::setFilter(const QString &filter) 0162 { 0163 m_filter = filter; 0164 parseFilter(); 0165 } 0166 0167 bool AdBlockRule::isCssRule() const 0168 { 0169 return m_type == CssRule; 0170 } 0171 0172 QString AdBlockRule::cssSelector() const 0173 { 0174 return m_matchString; 0175 } 0176 0177 bool AdBlockRule::isUnsupportedRule() const 0178 { 0179 return m_type == ExtendedCssRule || m_type == SnippetRule || m_isInternalDisabled; 0180 } 0181 0182 bool AdBlockRule::isDocument() const 0183 { 0184 return hasOption(DocumentOption); 0185 } 0186 0187 bool AdBlockRule::isElemhide() const 0188 { 0189 return hasOption(ElementHideOption); 0190 } 0191 0192 bool AdBlockRule::isGenerichide() const 0193 { 0194 return hasOption(GenericHideOption); 0195 } 0196 0197 bool AdBlockRule::isDomainRestricted() const 0198 { 0199 return hasOption(DomainRestrictedOption); 0200 } 0201 0202 bool AdBlockRule::isException() const 0203 { 0204 return m_isException; 0205 } 0206 0207 bool AdBlockRule::isComment() const 0208 { 0209 return m_filter.startsWith(QL1C('!')); 0210 } 0211 0212 bool AdBlockRule::isEnabled() const 0213 { 0214 return m_isEnabled; 0215 } 0216 0217 void AdBlockRule::setEnabled(bool enabled) 0218 { 0219 m_isEnabled = enabled; 0220 } 0221 0222 bool AdBlockRule::isSlow() const 0223 { 0224 return m_regExp != nullptr; 0225 } 0226 0227 bool AdBlockRule::isInternalDisabled() const 0228 { 0229 return m_isInternalDisabled; 0230 } 0231 0232 bool AdBlockRule::urlMatch(const QUrl &url) const 0233 { 0234 if (!hasOption(DocumentOption) && !hasOption(ElementHideOption) && !hasOption(GenericHideOption) && !hasOption(GenericBlockOption)) { 0235 return false; 0236 } 0237 0238 const QString encodedUrl = QString::fromUtf8(url.toEncoded()); 0239 const QString domain = url.host(); 0240 0241 return stringMatch(domain, encodedUrl); 0242 } 0243 0244 bool AdBlockRule::networkMatch(const QWebEngineUrlRequestInfo &request, const QString &domain, const QString &encodedUrl) const 0245 { 0246 if (m_type == CssRule || !m_isEnabled || m_isInternalDisabled) { 0247 return false; 0248 } 0249 0250 bool matched = stringMatch(domain, encodedUrl); 0251 0252 if (matched) { 0253 // Check domain restrictions 0254 if (hasOption(DomainRestrictedOption) && !matchDomain(request.firstPartyUrl().host())) { 0255 return false; 0256 } 0257 0258 // Check third-party restriction 0259 if (hasOption(ThirdPartyOption) && !matchThirdParty(request)) { 0260 return false; 0261 } 0262 0263 // Check type restrictions 0264 if (((m_exceptions | m_options) & TypeOptions) && !matchType(request)) 0265 return false; 0266 } 0267 0268 return matched; 0269 } 0270 0271 bool AdBlockRule::matchDomain(const QString &domain) const 0272 { 0273 if (!m_isEnabled) { 0274 return false; 0275 } 0276 0277 if (!hasOption(DomainRestrictedOption)) { 0278 return true; 0279 } 0280 0281 if (m_blockedDomains.isEmpty()) { 0282 for (const QString &d : std::as_const(m_allowedDomains)) { 0283 if (isMatchingDomain(domain, d)) { 0284 return true; 0285 } 0286 } 0287 } 0288 else if (m_allowedDomains.isEmpty()) { 0289 for (const QString &d : std::as_const(m_blockedDomains)) { 0290 if (isMatchingDomain(domain, d)) { 0291 return false; 0292 } 0293 } 0294 return true; 0295 } 0296 else { 0297 for (const QString &d : std::as_const(m_blockedDomains)) { 0298 if (isMatchingDomain(domain, d)) { 0299 return false; 0300 } 0301 } 0302 0303 for (const QString &d : std::as_const(m_allowedDomains)) { 0304 if (isMatchingDomain(domain, d)) { 0305 return true; 0306 } 0307 } 0308 } 0309 0310 return false; 0311 } 0312 0313 bool AdBlockRule::matchThirdParty(const QWebEngineUrlRequestInfo &request) const 0314 { 0315 // Third-party matching should be performed on second-level domains 0316 const QString firstPartyHost = toSecondLevelDomain(request.firstPartyUrl()); 0317 const QString host = toSecondLevelDomain(request.requestUrl()); 0318 0319 bool match = firstPartyHost != host; 0320 0321 return hasException(ThirdPartyOption) ? !match : match; 0322 } 0323 0324 bool AdBlockRule::matchType(const QWebEngineUrlRequestInfo &request) const 0325 { 0326 RuleOption type; 0327 switch (request.resourceType()) { 0328 case QWebEngineUrlRequestInfo::ResourceTypeMainFrame: 0329 type = DocumentOption; 0330 break; 0331 case QWebEngineUrlRequestInfo::ResourceTypeSubFrame: 0332 type = SubdocumentOption; 0333 break; 0334 case QWebEngineUrlRequestInfo::ResourceTypeStylesheet: 0335 type = StyleSheetOption; 0336 break; 0337 case QWebEngineUrlRequestInfo::ResourceTypeScript: 0338 type = ScriptOption; 0339 break; 0340 case QWebEngineUrlRequestInfo::ResourceTypeImage: 0341 type = ImageOption; 0342 break; 0343 case QWebEngineUrlRequestInfo::ResourceTypeFontResource: 0344 type = FontOption; 0345 break; 0346 case QWebEngineUrlRequestInfo::ResourceTypeObject: 0347 type = ObjectOption; 0348 break; 0349 case QWebEngineUrlRequestInfo::ResourceTypeMedia: 0350 type = MediaOption; 0351 break; 0352 case QWebEngineUrlRequestInfo::ResourceTypeXhr: 0353 type = XMLHttpRequestOption; 0354 break; 0355 case QWebEngineUrlRequestInfo::ResourceTypePing: 0356 type = PingOption; 0357 break; 0358 case QWebEngineUrlRequestInfo::ResourceTypePluginResource: 0359 type = ObjectSubrequestOption; 0360 break; 0361 case QWebEngineUrlRequestInfo::ResourceTypeSubResource: 0362 case QWebEngineUrlRequestInfo::ResourceTypeWorker: 0363 case QWebEngineUrlRequestInfo::ResourceTypeSharedWorker: 0364 case QWebEngineUrlRequestInfo::ResourceTypePrefetch: 0365 case QWebEngineUrlRequestInfo::ResourceTypeFavicon: 0366 case QWebEngineUrlRequestInfo::ResourceTypeServiceWorker: 0367 case QWebEngineUrlRequestInfo::ResourceTypeCspReport: 0368 case QWebEngineUrlRequestInfo::ResourceTypeNavigationPreloadMainFrame: 0369 case QWebEngineUrlRequestInfo::ResourceTypeNavigationPreloadSubFrame: 0370 case QWebEngineUrlRequestInfo::ResourceTypeUnknown: 0371 default: 0372 type = OtherOption; 0373 break; 0374 } 0375 if (!m_exceptions) 0376 return m_options.testFlag(type); 0377 return !m_exceptions.testFlag(type); 0378 } 0379 0380 void AdBlockRule::parseFilter() 0381 { 0382 QString parsedLine = m_filter; 0383 0384 // Empty rule or just comment 0385 if (m_filter.trimmed().isEmpty() || m_filter.startsWith(QL1C('!'))) { 0386 // We want to differentiate rule disabled by user and rule disabled in subscription file 0387 // m_isInternalDisabled is also used when rule is disabled due to all options not being supported 0388 m_isEnabled = false; 0389 m_isInternalDisabled = true; 0390 m_type = Invalid; 0391 return; 0392 } 0393 0394 // Exception always starts with @@ 0395 if (parsedLine.startsWith(QL1S("@@"))) { 0396 m_isException = true; 0397 parsedLine.remove(0, 2); 0398 } 0399 0400 // Extended CSS element hiding 0401 if (parsedLine.contains(QL1S("#?#"))) { 0402 m_type = ExtendedCssRule; 0403 int pos = parsedLine.indexOf(QL1C('#')); 0404 if (!parsedLine.startsWith(QL1S("#"))) { 0405 QString domains = parsedLine.left(pos); 0406 parseDomains(domains, QL1C(',')); 0407 } 0408 m_matchString = parsedLine.mid(pos + 3); 0409 // CSS rule cannot have more options -> stop parsing 0410 return; 0411 } 0412 0413 // Snippet rule 0414 if (parsedLine.contains(QL1S("#$#"))) { 0415 m_type = SnippetRule; 0416 int pos = parsedLine.indexOf(QL1C('#')); 0417 if (!parsedLine.startsWith(QL1S("#"))) { 0418 QString domains = parsedLine.left(pos); 0419 parseDomains(domains, QL1C(',')); 0420 } 0421 m_matchString = parsedLine.mid(pos + 3); 0422 return; 0423 } 0424 0425 // CSS Element hiding rule 0426 if (parsedLine.contains(QL1S("##")) || parsedLine.contains(QL1S("#@#"))) { 0427 m_type = CssRule; 0428 int pos = parsedLine.indexOf(QL1C('#')); 0429 0430 // Domain restricted rule 0431 if (!parsedLine.startsWith(QL1S("#"))) { 0432 QString domains = parsedLine.left(pos); 0433 parseDomains(domains, QL1C(',')); 0434 } 0435 0436 m_isException = parsedLine.at(pos + 1) == QL1C('@'); 0437 m_matchString = parsedLine.mid(m_isException ? pos + 3 : pos + 2); 0438 0439 // CSS rule cannot have more options -> stop parsing 0440 return; 0441 } 0442 0443 // Parse all options following $ char 0444 int optionsIndex = parsedLine.indexOf(QL1C('$')); 0445 if (optionsIndex >= 0) { 0446 const QStringList options = parsedLine.mid(optionsIndex + 1).split(QL1C(','), Qt::SkipEmptyParts); 0447 0448 int handledOptions = 0; 0449 for (const QString &option : options) { 0450 if (option.startsWith(QL1S("domain="))) { 0451 parseDomains(option.mid(7), QL1C('|')); 0452 ++handledOptions; 0453 } 0454 else if (option == QL1S("match-case")) { 0455 m_caseSensitivity = Qt::CaseSensitive; 0456 ++handledOptions; 0457 } 0458 else if (option.endsWith(QL1S("third-party"))) { 0459 setOption(ThirdPartyOption); 0460 setException(ThirdPartyOption, option.startsWith(QL1C('~'))); 0461 ++handledOptions; 0462 } 0463 else if (option.endsWith(QL1S("object"))) { 0464 setOption(ObjectOption); 0465 setException(ObjectOption, option.startsWith(QL1C('~'))); 0466 ++handledOptions; 0467 } 0468 else if (option.endsWith(QL1S("subdocument"))) { 0469 setOption(SubdocumentOption); 0470 setException(SubdocumentOption, option.startsWith(QL1C('~'))); 0471 ++handledOptions; 0472 } 0473 else if (option.endsWith(QL1S("xmlhttprequest"))) { 0474 setOption(XMLHttpRequestOption); 0475 setException(XMLHttpRequestOption, option.startsWith(QL1C('~'))); 0476 ++handledOptions; 0477 } 0478 else if (option.endsWith(QL1S("image"))) { 0479 setOption(ImageOption); 0480 setException(ImageOption, option.startsWith(QL1C('~'))); 0481 ++handledOptions; 0482 } 0483 else if (option.endsWith(QL1S("script"))) { 0484 setOption(ScriptOption); 0485 setException(ScriptOption, option.startsWith(QL1C('~'))); 0486 ++handledOptions; 0487 } 0488 else if (option.endsWith(QL1S("stylesheet"))) { 0489 setOption(StyleSheetOption); 0490 setException(StyleSheetOption, option.startsWith(QL1C('~'))); 0491 ++handledOptions; 0492 } 0493 else if (option.endsWith(QL1S("object-subrequest"))) { 0494 setOption(ObjectSubrequestOption); 0495 setException(ObjectSubrequestOption, option.startsWith(QL1C('~'))); 0496 ++handledOptions; 0497 } 0498 else if (option.endsWith(QL1S("ping"))) { 0499 setOption(PingOption); 0500 setException(PingOption, option.startsWith(QL1C('~'))); 0501 ++handledOptions; 0502 } 0503 else if (option.endsWith(QL1S("media"))) { 0504 setOption(MediaOption); 0505 setException(MediaOption, option.startsWith(QL1C('~'))); 0506 ++handledOptions; 0507 } 0508 else if (option.endsWith(QL1S("font"))) { 0509 setOption(FontOption); 0510 setException(FontOption, option.startsWith(QL1C('~'))); 0511 ++handledOptions; 0512 } 0513 else if (option.endsWith(QL1S("other"))) { 0514 setOption(OtherOption); 0515 setException(OtherOption, option.startsWith(QL1C('~'))); 0516 ++handledOptions; 0517 } 0518 else if (option == QL1S("collapse")) { 0519 // Hiding placeholders of blocked elements is enabled by default 0520 ++handledOptions; 0521 } 0522 else if (option == QL1S("popup")) { 0523 // doesn't do anything yet 0524 setOption(PopupOption); 0525 ++handledOptions; 0526 } 0527 else if (option == QL1S("document") && m_isException) { 0528 setOption(DocumentOption); 0529 ++handledOptions; 0530 } 0531 else if (option == QL1S("elemhide") && m_isException) { 0532 setOption(ElementHideOption); 0533 ++handledOptions; 0534 } 0535 else if (option == QL1S("generichide") && m_isException) { 0536 setOption(GenericHideOption); 0537 ++handledOptions; 0538 } 0539 else if (option == QL1S("genericblock") && m_isException) { 0540 // doesn't do anything yet 0541 setOption(GenericBlockOption); 0542 // ++handledOptions; 0543 } 0544 } 0545 0546 // If we don't handle all options, it's safer to just disable this rule 0547 if (handledOptions != options.count()) { 0548 m_isInternalDisabled = true; 0549 m_type = Invalid; 0550 return; 0551 } 0552 0553 parsedLine.truncate(optionsIndex); 0554 } 0555 0556 // Rule is classic regexp 0557 if (parsedLine.startsWith(QL1C('/')) && parsedLine.endsWith(QL1C('/'))) { 0558 parsedLine.remove(0, 1); 0559 parsedLine = parsedLine.left(parsedLine.size() - 1); 0560 0561 m_type = RegExpMatchRule; 0562 m_regExp = new RegExp; 0563 m_regExp->regExp = QRegularExpression(parsedLine, QRegularExpression::InvertedGreedinessOption); 0564 if (m_caseSensitivity == Qt::CaseInsensitive) { 0565 m_regExp->regExp.setPatternOptions(m_regExp->regExp.patternOptions() | QRegularExpression::CaseInsensitiveOption); 0566 } 0567 m_regExp->matchers = createStringMatchers(parseRegExpFilter(parsedLine)); 0568 return; 0569 } 0570 0571 // Remove starting and ending wildcards (*) 0572 if (parsedLine.startsWith(QL1C('*'))) { 0573 parsedLine.remove(0, 1); 0574 } 0575 0576 if (parsedLine.endsWith(QL1C('*'))) { 0577 parsedLine = parsedLine.left(parsedLine.size() - 1); 0578 } 0579 0580 // We can use fast string matching for domain here 0581 if (filterIsOnlyDomain(parsedLine)) { 0582 parsedLine.remove(0, 2); 0583 parsedLine = parsedLine.left(parsedLine.size() - 1); 0584 0585 m_type = DomainMatchRule; 0586 m_matchString = parsedLine; 0587 return; 0588 } 0589 0590 // If rule contains only | at end, we can also use string matching 0591 if (filterIsOnlyEndsMatch(parsedLine)) { 0592 parsedLine = parsedLine.left(parsedLine.size() - 1); 0593 0594 m_type = StringEndsMatchRule; 0595 m_matchString = parsedLine; 0596 return; 0597 } 0598 0599 // If we still find a wildcard (*) or separator (^) or (|) 0600 // we must modify parsedLine to comply with QRegularExpression 0601 if (parsedLine.contains(QL1C('*')) || 0602 parsedLine.contains(QL1C('^')) || 0603 parsedLine.contains(QL1C('|')) 0604 ) { 0605 m_type = RegExpMatchRule; 0606 m_regExp = new RegExp; 0607 m_regExp->regExp = QRegularExpression(createRegExpFromFilter(parsedLine), QRegularExpression::InvertedGreedinessOption); 0608 if (m_caseSensitivity == Qt::CaseInsensitive) { 0609 m_regExp->regExp.setPatternOptions(m_regExp->regExp.patternOptions() | QRegularExpression::CaseInsensitiveOption); 0610 } 0611 m_regExp->matchers = createStringMatchers(parseRegExpFilter(parsedLine)); 0612 return; 0613 } 0614 0615 // This rule matches all urls 0616 if (parsedLine.isEmpty()) { 0617 if (m_options == NoOption) { 0618 qWarning() << "Disabling unrestricted rule that would block all requests" << m_filter; 0619 m_isInternalDisabled = true; 0620 m_type = Invalid; 0621 return; 0622 } 0623 m_type = MatchAllUrlsRule; 0624 return; 0625 } 0626 0627 // We haven't found anything that needs use of regexp, yay! 0628 m_type = StringContainsMatchRule; 0629 m_matchString = parsedLine; 0630 } 0631 0632 void AdBlockRule::parseDomains(const QString &domains, const QChar &separator) 0633 { 0634 const QStringList domainsList = domains.split(separator, Qt::SkipEmptyParts); 0635 0636 for (const QString &domain : domainsList) { 0637 if (domain.isEmpty()) { 0638 continue; 0639 } 0640 if (domain.startsWith(QL1C('~'))) { 0641 m_blockedDomains.append(domain.mid(1)); 0642 } 0643 else { 0644 m_allowedDomains.append(domain); 0645 } 0646 } 0647 0648 if (!m_blockedDomains.isEmpty() || !m_allowedDomains.isEmpty()) { 0649 setOption(DomainRestrictedOption); 0650 } 0651 } 0652 0653 bool AdBlockRule::filterIsOnlyDomain(const QString &filter) const 0654 { 0655 if (!filter.endsWith(QL1C('^')) || !filter.startsWith(QL1S("||"))) 0656 return false; 0657 0658 for (int i = 0; i < filter.size(); ++i) { 0659 switch (filter.at(i).toLatin1()) { 0660 case '/': 0661 case ':': 0662 case '?': 0663 case '=': 0664 case '&': 0665 case '*': 0666 return false; 0667 default: 0668 break; 0669 } 0670 } 0671 0672 return true; 0673 } 0674 0675 bool AdBlockRule::filterIsOnlyEndsMatch(const QString &filter) const 0676 { 0677 for (int i = 0; i < filter.size(); ++i) { 0678 switch (filter.at(i).toLatin1()) { 0679 case '^': 0680 case '*': 0681 return false; 0682 case '|': 0683 return i == filter.size() - 1; 0684 default: 0685 break; 0686 } 0687 } 0688 0689 return false; 0690 } 0691 0692 static bool wordCharacter(const QChar &c) 0693 { 0694 return c.isLetterOrNumber() || c.isMark() || c == QL1C('_'); 0695 } 0696 0697 QString AdBlockRule::createRegExpFromFilter(const QString &filter) const 0698 { 0699 QString parsed; 0700 parsed.reserve(filter.size()); 0701 0702 bool hadWildcard = false; // Filter multiple wildcards 0703 0704 for (int i = 0; i < filter.size(); ++i) { 0705 const QChar c = filter.at(i); 0706 switch (c.toLatin1()) { 0707 case '^': 0708 parsed.append(QL1S("(?:[^\\w\\d\\-.%]|$)")); 0709 break; 0710 0711 case '*': 0712 if (!hadWildcard) 0713 parsed.append(QL1S(".*")); 0714 break; 0715 0716 case '|': 0717 if (i == 0) { 0718 if (filter.size() > 1 && filter.at(1) == QL1C('|')) { 0719 parsed.append(QL1S("^[\\w\\-]+:\\/+(?!\\/)(?:[^\\/]+\\.)?")); 0720 i++; 0721 } 0722 else { 0723 parsed.append(QL1C('^')); 0724 } 0725 break; 0726 } 0727 else if (i == filter.size() - 1) { 0728 parsed.append(QL1C('$')); 0729 break; 0730 } 0731 // fallthrough 0732 0733 default: 0734 if (!wordCharacter(c)) 0735 parsed.append(QL1C('\\') + c); 0736 else 0737 parsed.append(c); 0738 } 0739 0740 hadWildcard = c == QL1C('*'); 0741 } 0742 0743 return parsed; 0744 } 0745 0746 QList<QStringMatcher> AdBlockRule::createStringMatchers(const QStringList &filters) const 0747 { 0748 QList<QStringMatcher> matchers; 0749 matchers.reserve(filters.size()); 0750 0751 for (const QString &filter : filters) { 0752 matchers.append(QStringMatcher(filter, m_caseSensitivity)); 0753 } 0754 0755 return matchers; 0756 } 0757 0758 bool AdBlockRule::stringMatch(const QString &domain, const QString &encodedUrl) const 0759 { 0760 switch (m_type) { 0761 case StringContainsMatchRule: 0762 return encodedUrl.contains(m_matchString, m_caseSensitivity); 0763 0764 case DomainMatchRule: 0765 return isMatchingDomain(domain, m_matchString); 0766 0767 case StringEndsMatchRule: 0768 return encodedUrl.endsWith(m_matchString, m_caseSensitivity); 0769 0770 case RegExpMatchRule: 0771 if (!isMatchingRegExpStrings(encodedUrl)) { 0772 return false; 0773 } 0774 return m_regExp->regExp.match(encodedUrl).hasMatch(); 0775 0776 case MatchAllUrlsRule: 0777 return true; 0778 0779 default: 0780 return false; 0781 } 0782 } 0783 0784 bool AdBlockRule::isMatchingDomain(const QString &domain, const QString &filter) const 0785 { 0786 return QzTools::matchDomain(filter, domain); 0787 } 0788 0789 bool AdBlockRule::isMatchingRegExpStrings(const QString &url) const 0790 { 0791 Q_ASSERT(m_regExp); 0792 0793 const auto matchers = m_regExp->matchers; 0794 for (const QStringMatcher &matcher : matchers) { 0795 if (matcher.indexIn(url) == -1) 0796 return false; 0797 } 0798 0799 return true; 0800 } 0801 0802 // Split regexp filter into strings that can be used with QString::contains 0803 // Don't use parts that contains only 1 char and duplicated parts 0804 QStringList AdBlockRule::parseRegExpFilter(const QString &filter) const 0805 { 0806 QStringList list; 0807 int startPos = -1; 0808 0809 for (int i = 0; i < filter.size(); ++i) { 0810 const QChar c = filter.at(i); 0811 // Meta characters in AdBlock rules are | * ^ 0812 if (c == QL1C('|') || c == QL1C('*') || c == QL1C('^')) { 0813 const QString sub = filter.mid(startPos, i - startPos); 0814 if (sub.size() > 1) 0815 list.append(sub); 0816 startPos = i + 1; 0817 } 0818 } 0819 0820 const QString sub = filter.mid(startPos); 0821 if (sub.size() > 1) 0822 list.append(sub); 0823 0824 list.removeDuplicates(); 0825 0826 return list; 0827 } 0828 0829 bool AdBlockRule::hasOption(const AdBlockRule::RuleOption &opt) const 0830 { 0831 return (m_options & opt); 0832 } 0833 0834 bool AdBlockRule::hasException(const AdBlockRule::RuleOption &opt) const 0835 { 0836 return (m_exceptions & opt); 0837 } 0838 0839 void AdBlockRule::setOption(const AdBlockRule::RuleOption &opt) 0840 { 0841 m_options |= opt; 0842 } 0843 0844 void AdBlockRule::setException(const AdBlockRule::RuleOption &opt, bool on) 0845 { 0846 if (on) { 0847 m_exceptions |= opt; 0848 } 0849 }