File indexing completed on 2024-05-12 04:57:50

0001 /* ============================================================
0002 * Falkon - Qt web browser
0003 * Copyright (C) 2010-2017 David Rosca <nowrep@gmail.com>
0004 *
0005 * This program is free software: you can redistribute it and/or modify
0006 * it under the terms of the GNU General Public License as published by
0007 * the Free Software Foundation, either version 3 of the License, or
0008 * (at your option) any later version.
0009 *
0010 * This program is distributed in the hope that it will be useful,
0011 * but WITHOUT ANY WARRANTY; without even the implied warranty of
0012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
0013 * GNU General Public License for more details.
0014 *
0015 * You should have received a copy of the GNU General Public License
0016 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
0017 * ============================================================ */
0018 /**
0019  * Copyright (c) 2009, Zsombor Gegesy <gzsombor@gmail.com>
0020  * Copyright (c) 2009, Benjamin C. Meyer <ben@meyerhome.net>
0021  *
0022  * Redistribution and use in source and binary forms, with or without
0023  * modification, are permitted provided that the following conditions
0024  * are met:
0025  * 1. Redistributions of source code must retain the above copyright
0026  *    notice, this list of conditions and the following disclaimer.
0027  * 2. Redistributions in binary form must reproduce the above copyright
0028  *    notice, this list of conditions and the following disclaimer in the
0029  *    documentation and/or other materials provided with the distribution.
0030  * 3. Neither the name of the Benjamin Meyer nor the names of its contributors
0031  *    may be used to endorse or promote products derived from this software
0032  *    without specific prior written permission.
0033  *
0034  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
0035  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
0036  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
0037  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
0038  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
0039  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
0040  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
0041  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
0042  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
0043  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
0044  * SUCH DAMAGE.
0045  */
0046 
0047 #include "adblockrule.h"
0048 #include "adblocksubscription.h"
0049 #include "qztools.h"
0050 
0051 #include <QUrl>
0052 #include <QString>
0053 #include <QWebEnginePage>
0054 #include <QWebEngineUrlRequestInfo>
0055 
0056 /* TODO Qt6 Replace with PUBLIC API */
0057 #include <QtCore/private/qurl_p.h>
0058 #include <QtNetwork/private/qtldurl_p.h>
0059 
0060 static QString getTopLevelDomain(const QUrl &url)
0061 {
0062     // QUrl::topLevelDomain() was removed in Qt6.
0063     // The following is copied from the old "qTopLevelDomain" code in Qt6::Network.
0064     // It was removed in this commit: https://github.com/qt/qtbase/commit/50b30976837be0969efdccced68cfb584d99981a
0065     const QString domainLower = url.host().toLower();
0066     QVector<QStringView> sections = QStringView{domainLower}.split(QLatin1Char('.'), Qt::SkipEmptyParts);
0067     if (sections.isEmpty())
0068         return QString();
0069 
0070     QString level, tld;
0071     for (int j = sections.count() - 1; j >= 0; --j) {
0072         level.prepend(QLatin1Char('.') + sections.at(j));
0073         if (qIsEffectiveTLD(QStringView{level}.right(level.size() - 1)))
0074             tld = level;
0075     }
0076 
0077     //return qt_ACE_do(tld, ToAceOnly, AllowLeadingDot, {});
0078     // TODO QT6 - QUrl::toAce() uses ForbidLeadingDot, while the old QUrl::topLevelDomain() used AllowLeadingDot. Does this matter?
0079     return QString(QString::fromUtf8(QUrl::toAce(tld)));
0080 }
0081 
0082 static QString toSecondLevelDomain(const QUrl &url)
0083 {
0084     const QString topLevelDomain = getTopLevelDomain(url);
0085     const QString urlHost = url.host();
0086 
0087     if (topLevelDomain.isEmpty() || urlHost.isEmpty()) {
0088         return {};
0089     }
0090 
0091     QString domain = urlHost.left(urlHost.size() - topLevelDomain.size());
0092 
0093     if (domain.count(QL1C('.')) == 0) {
0094         return urlHost;
0095     }
0096 
0097     while (domain.count(QL1C('.')) != 0) {
0098         domain = domain.mid(domain.indexOf(QL1C('.')) + 1);
0099     }
0100 
0101     return domain + topLevelDomain;
0102 }
0103 
0104 AdBlockRule::AdBlockRule(const QString &filter, AdBlockSubscription* subscription)
0105     : m_subscription(subscription)
0106     , m_type(StringContainsMatchRule)
0107     , m_caseSensitivity(Qt::CaseInsensitive)
0108     , m_isEnabled(true)
0109     , m_isException(false)
0110     , m_isInternalDisabled(false)
0111     , m_regExp(nullptr)
0112 {
0113     setFilter(filter);
0114 }
0115 
0116 AdBlockRule::~AdBlockRule()
0117 {
0118     delete m_regExp;
0119 }
0120 
0121 AdBlockRule* AdBlockRule::copy() const
0122 {
0123     auto* rule = new AdBlockRule();
0124     rule->m_subscription = m_subscription;
0125     rule->m_type = m_type;
0126     rule->m_options = m_options;
0127     rule->m_exceptions = m_exceptions;
0128     rule->m_filter = m_filter;
0129     rule->m_matchString = m_matchString;
0130     rule->m_caseSensitivity = m_caseSensitivity;
0131     rule->m_isEnabled = m_isEnabled;
0132     rule->m_isException = m_isException;
0133     rule->m_isInternalDisabled = m_isInternalDisabled;
0134     rule->m_allowedDomains = m_allowedDomains;
0135     rule->m_blockedDomains = m_blockedDomains;
0136 
0137     if (m_regExp) {
0138         rule->m_regExp = new RegExp;
0139         rule->m_regExp->regExp = m_regExp->regExp;
0140         rule->m_regExp->matchers = m_regExp->matchers;
0141     }
0142 
0143     return rule;
0144 }
0145 
0146 AdBlockSubscription* AdBlockRule::subscription() const
0147 {
0148     return m_subscription;
0149 }
0150 
0151 void AdBlockRule::setSubscription(AdBlockSubscription* subscription)
0152 {
0153     m_subscription = subscription;
0154 }
0155 
0156 QString AdBlockRule::filter() const
0157 {
0158     return m_filter;
0159 }
0160 
0161 void AdBlockRule::setFilter(const QString &filter)
0162 {
0163     m_filter = filter;
0164     parseFilter();
0165 }
0166 
0167 bool AdBlockRule::isCssRule() const
0168 {
0169     return m_type == CssRule;
0170 }
0171 
0172 QString AdBlockRule::cssSelector() const
0173 {
0174     return m_matchString;
0175 }
0176 
0177 bool AdBlockRule::isUnsupportedRule() const
0178 {
0179     return m_type == ExtendedCssRule || m_type == SnippetRule || m_isInternalDisabled;
0180 }
0181 
0182 bool AdBlockRule::isDocument() const
0183 {
0184     return hasOption(DocumentOption);
0185 }
0186 
0187 bool AdBlockRule::isElemhide() const
0188 {
0189     return hasOption(ElementHideOption);
0190 }
0191 
0192 bool AdBlockRule::isGenerichide() const
0193 {
0194     return hasOption(GenericHideOption);
0195 }
0196 
0197 bool AdBlockRule::isDomainRestricted() const
0198 {
0199     return hasOption(DomainRestrictedOption);
0200 }
0201 
0202 bool AdBlockRule::isException() const
0203 {
0204     return m_isException;
0205 }
0206 
0207 bool AdBlockRule::isComment() const
0208 {
0209     return m_filter.startsWith(QL1C('!'));
0210 }
0211 
0212 bool AdBlockRule::isEnabled() const
0213 {
0214     return m_isEnabled;
0215 }
0216 
0217 void AdBlockRule::setEnabled(bool enabled)
0218 {
0219     m_isEnabled = enabled;
0220 }
0221 
0222 bool AdBlockRule::isSlow() const
0223 {
0224     return m_regExp != nullptr;
0225 }
0226 
0227 bool AdBlockRule::isInternalDisabled() const
0228 {
0229     return m_isInternalDisabled;
0230 }
0231 
0232 bool AdBlockRule::urlMatch(const QUrl &url) const
0233 {
0234     if (!hasOption(DocumentOption) && !hasOption(ElementHideOption) && !hasOption(GenericHideOption) && !hasOption(GenericBlockOption)) {
0235         return false;
0236     }
0237 
0238     const QString encodedUrl = QString::fromUtf8(url.toEncoded());
0239     const QString domain = url.host();
0240 
0241     return stringMatch(domain, encodedUrl);
0242 }
0243 
0244 bool AdBlockRule::networkMatch(const QWebEngineUrlRequestInfo &request, const QString &domain, const QString &encodedUrl) const
0245 {
0246     if (m_type == CssRule || !m_isEnabled || m_isInternalDisabled) {
0247         return false;
0248     }
0249 
0250     bool matched = stringMatch(domain, encodedUrl);
0251 
0252     if (matched) {
0253         // Check domain restrictions
0254         if (hasOption(DomainRestrictedOption) && !matchDomain(request.firstPartyUrl().host())) {
0255             return false;
0256         }
0257 
0258         // Check third-party restriction
0259         if (hasOption(ThirdPartyOption) && !matchThirdParty(request)) {
0260             return false;
0261         }
0262 
0263         // Check type restrictions
0264         if (((m_exceptions | m_options) & TypeOptions) && !matchType(request))
0265             return false;
0266     }
0267 
0268     return matched;
0269 }
0270 
0271 bool AdBlockRule::matchDomain(const QString &domain) const
0272 {
0273     if (!m_isEnabled) {
0274         return false;
0275     }
0276 
0277     if (!hasOption(DomainRestrictedOption)) {
0278         return true;
0279     }
0280 
0281     if (m_blockedDomains.isEmpty()) {
0282         for (const QString &d : std::as_const(m_allowedDomains)) {
0283             if (isMatchingDomain(domain, d)) {
0284                 return true;
0285             }
0286         }
0287     }
0288     else if (m_allowedDomains.isEmpty()) {
0289         for (const QString &d : std::as_const(m_blockedDomains)) {
0290             if (isMatchingDomain(domain, d)) {
0291                 return false;
0292             }
0293         }
0294         return true;
0295     }
0296     else {
0297         for (const QString &d : std::as_const(m_blockedDomains)) {
0298             if (isMatchingDomain(domain, d)) {
0299                 return false;
0300             }
0301         }
0302 
0303         for (const QString &d : std::as_const(m_allowedDomains)) {
0304             if (isMatchingDomain(domain, d)) {
0305                 return true;
0306             }
0307         }
0308     }
0309 
0310     return false;
0311 }
0312 
0313 bool AdBlockRule::matchThirdParty(const QWebEngineUrlRequestInfo &request) const
0314 {
0315     // Third-party matching should be performed on second-level domains
0316     const QString firstPartyHost = toSecondLevelDomain(request.firstPartyUrl());
0317     const QString host = toSecondLevelDomain(request.requestUrl());
0318 
0319     bool match = firstPartyHost != host;
0320 
0321     return hasException(ThirdPartyOption) ? !match : match;
0322 }
0323 
0324 bool AdBlockRule::matchType(const QWebEngineUrlRequestInfo &request) const
0325 {
0326     RuleOption type;
0327     switch (request.resourceType()) {
0328     case QWebEngineUrlRequestInfo::ResourceTypeMainFrame:
0329         type = DocumentOption;
0330         break;
0331     case QWebEngineUrlRequestInfo::ResourceTypeSubFrame:
0332         type = SubdocumentOption;
0333         break;
0334     case QWebEngineUrlRequestInfo::ResourceTypeStylesheet:
0335         type = StyleSheetOption;
0336         break;
0337     case QWebEngineUrlRequestInfo::ResourceTypeScript:
0338         type = ScriptOption;
0339         break;
0340     case QWebEngineUrlRequestInfo::ResourceTypeImage:
0341         type = ImageOption;
0342         break;
0343     case QWebEngineUrlRequestInfo::ResourceTypeFontResource:
0344         type = FontOption;
0345         break;
0346     case QWebEngineUrlRequestInfo::ResourceTypeObject:
0347         type = ObjectOption;
0348         break;
0349     case QWebEngineUrlRequestInfo::ResourceTypeMedia:
0350         type = MediaOption;
0351         break;
0352     case QWebEngineUrlRequestInfo::ResourceTypeXhr:
0353         type = XMLHttpRequestOption;
0354         break;
0355     case QWebEngineUrlRequestInfo::ResourceTypePing:
0356         type = PingOption;
0357         break;
0358     case QWebEngineUrlRequestInfo::ResourceTypePluginResource:
0359         type = ObjectSubrequestOption;
0360         break;
0361     case QWebEngineUrlRequestInfo::ResourceTypeSubResource:
0362     case QWebEngineUrlRequestInfo::ResourceTypeWorker:
0363     case QWebEngineUrlRequestInfo::ResourceTypeSharedWorker:
0364     case QWebEngineUrlRequestInfo::ResourceTypePrefetch:
0365     case QWebEngineUrlRequestInfo::ResourceTypeFavicon:
0366     case QWebEngineUrlRequestInfo::ResourceTypeServiceWorker:
0367     case QWebEngineUrlRequestInfo::ResourceTypeCspReport:
0368     case QWebEngineUrlRequestInfo::ResourceTypeNavigationPreloadMainFrame:
0369     case QWebEngineUrlRequestInfo::ResourceTypeNavigationPreloadSubFrame:
0370     case QWebEngineUrlRequestInfo::ResourceTypeUnknown:
0371     default:
0372         type = OtherOption;
0373         break;
0374     }
0375     if (!m_exceptions)
0376         return m_options.testFlag(type);
0377     return !m_exceptions.testFlag(type);
0378 }
0379 
0380 void AdBlockRule::parseFilter()
0381 {
0382     QString parsedLine = m_filter;
0383 
0384     // Empty rule or just comment
0385     if (m_filter.trimmed().isEmpty() || m_filter.startsWith(QL1C('!'))) {
0386         // We want to differentiate rule disabled by user and rule disabled in subscription file
0387         // m_isInternalDisabled is also used when rule is disabled due to all options not being supported
0388         m_isEnabled = false;
0389         m_isInternalDisabled = true;
0390         m_type = Invalid;
0391         return;
0392     }
0393 
0394     // Exception always starts with @@
0395     if (parsedLine.startsWith(QL1S("@@"))) {
0396         m_isException = true;
0397         parsedLine.remove(0, 2);
0398     }
0399 
0400     // Extended CSS element hiding
0401     if (parsedLine.contains(QL1S("#?#"))) {
0402         m_type = ExtendedCssRule;
0403         int pos = parsedLine.indexOf(QL1C('#'));
0404         if (!parsedLine.startsWith(QL1S("#"))) {
0405             QString domains = parsedLine.left(pos);
0406             parseDomains(domains, QL1C(','));
0407         }
0408         m_matchString = parsedLine.mid(pos + 3);
0409         // CSS rule cannot have more options -> stop parsing
0410         return;
0411     }
0412 
0413     // Snippet rule
0414     if (parsedLine.contains(QL1S("#$#"))) {
0415         m_type = SnippetRule;
0416         int pos = parsedLine.indexOf(QL1C('#'));
0417         if (!parsedLine.startsWith(QL1S("#"))) {
0418             QString domains = parsedLine.left(pos);
0419             parseDomains(domains, QL1C(','));
0420         }
0421         m_matchString = parsedLine.mid(pos + 3);
0422         return;
0423     }
0424 
0425     // CSS Element hiding rule
0426     if (parsedLine.contains(QL1S("##")) || parsedLine.contains(QL1S("#@#"))) {
0427         m_type = CssRule;
0428         int pos = parsedLine.indexOf(QL1C('#'));
0429 
0430         // Domain restricted rule
0431         if (!parsedLine.startsWith(QL1S("#"))) {
0432             QString domains = parsedLine.left(pos);
0433             parseDomains(domains, QL1C(','));
0434         }
0435 
0436         m_isException = parsedLine.at(pos + 1) == QL1C('@');
0437         m_matchString = parsedLine.mid(m_isException ? pos + 3 : pos + 2);
0438 
0439         // CSS rule cannot have more options -> stop parsing
0440         return;
0441     }
0442 
0443     // Parse all options following $ char
0444     int optionsIndex = parsedLine.indexOf(QL1C('$'));
0445     if (optionsIndex >= 0) {
0446         const QStringList options = parsedLine.mid(optionsIndex + 1).split(QL1C(','), Qt::SkipEmptyParts);
0447 
0448         int handledOptions = 0;
0449         for (const QString &option : options) {
0450             if (option.startsWith(QL1S("domain="))) {
0451                 parseDomains(option.mid(7), QL1C('|'));
0452                 ++handledOptions;
0453             }
0454             else if (option == QL1S("match-case")) {
0455                 m_caseSensitivity = Qt::CaseSensitive;
0456                 ++handledOptions;
0457             }
0458             else if (option.endsWith(QL1S("third-party"))) {
0459                 setOption(ThirdPartyOption);
0460                 setException(ThirdPartyOption, option.startsWith(QL1C('~')));
0461                 ++handledOptions;
0462             }
0463             else if (option.endsWith(QL1S("object"))) {
0464                 setOption(ObjectOption);
0465                 setException(ObjectOption, option.startsWith(QL1C('~')));
0466                 ++handledOptions;
0467             }
0468             else if (option.endsWith(QL1S("subdocument"))) {
0469                 setOption(SubdocumentOption);
0470                 setException(SubdocumentOption, option.startsWith(QL1C('~')));
0471                 ++handledOptions;
0472             }
0473             else if (option.endsWith(QL1S("xmlhttprequest"))) {
0474                 setOption(XMLHttpRequestOption);
0475                 setException(XMLHttpRequestOption, option.startsWith(QL1C('~')));
0476                 ++handledOptions;
0477             }
0478             else if (option.endsWith(QL1S("image"))) {
0479                 setOption(ImageOption);
0480                 setException(ImageOption, option.startsWith(QL1C('~')));
0481                 ++handledOptions;
0482             }
0483             else if (option.endsWith(QL1S("script"))) {
0484                 setOption(ScriptOption);
0485                 setException(ScriptOption, option.startsWith(QL1C('~')));
0486                 ++handledOptions;
0487             }
0488             else if (option.endsWith(QL1S("stylesheet"))) {
0489                 setOption(StyleSheetOption);
0490                 setException(StyleSheetOption, option.startsWith(QL1C('~')));
0491                 ++handledOptions;
0492             }
0493             else if (option.endsWith(QL1S("object-subrequest"))) {
0494                 setOption(ObjectSubrequestOption);
0495                 setException(ObjectSubrequestOption, option.startsWith(QL1C('~')));
0496                 ++handledOptions;
0497             }
0498             else if (option.endsWith(QL1S("ping"))) {
0499                 setOption(PingOption);
0500                 setException(PingOption, option.startsWith(QL1C('~')));
0501                 ++handledOptions;
0502             }
0503             else if (option.endsWith(QL1S("media"))) {
0504                 setOption(MediaOption);
0505                 setException(MediaOption, option.startsWith(QL1C('~')));
0506                 ++handledOptions;
0507             }
0508             else if (option.endsWith(QL1S("font"))) {
0509                 setOption(FontOption);
0510                 setException(FontOption, option.startsWith(QL1C('~')));
0511                 ++handledOptions;
0512             }
0513             else if (option.endsWith(QL1S("other"))) {
0514                 setOption(OtherOption);
0515                 setException(OtherOption, option.startsWith(QL1C('~')));
0516                 ++handledOptions;
0517             }
0518             else if (option == QL1S("collapse")) {
0519                 // Hiding placeholders of blocked elements is enabled by default
0520                 ++handledOptions;
0521             }
0522             else if (option == QL1S("popup")) {
0523                 // doesn't do anything yet
0524                 setOption(PopupOption);
0525                 ++handledOptions;
0526             }
0527             else if (option == QL1S("document") && m_isException) {
0528                 setOption(DocumentOption);
0529                 ++handledOptions;
0530             }
0531             else if (option == QL1S("elemhide") && m_isException) {
0532                 setOption(ElementHideOption);
0533                 ++handledOptions;
0534             }
0535             else if (option == QL1S("generichide") && m_isException) {
0536                 setOption(GenericHideOption);
0537                 ++handledOptions;
0538             }
0539             else if (option == QL1S("genericblock") && m_isException) {
0540                 // doesn't do anything yet
0541                 setOption(GenericBlockOption);
0542 //                 ++handledOptions;
0543             }
0544         }
0545 
0546         // If we don't handle all options, it's safer to just disable this rule
0547         if (handledOptions != options.count()) {
0548             m_isInternalDisabled = true;
0549             m_type = Invalid;
0550             return;
0551         }
0552 
0553         parsedLine.truncate(optionsIndex);
0554     }
0555 
0556     // Rule is classic regexp
0557     if (parsedLine.startsWith(QL1C('/')) && parsedLine.endsWith(QL1C('/'))) {
0558         parsedLine.remove(0, 1);
0559         parsedLine = parsedLine.left(parsedLine.size() - 1);
0560 
0561         m_type = RegExpMatchRule;
0562         m_regExp = new RegExp;
0563         m_regExp->regExp = QRegularExpression(parsedLine, QRegularExpression::InvertedGreedinessOption);
0564         if (m_caseSensitivity == Qt::CaseInsensitive) {
0565             m_regExp->regExp.setPatternOptions(m_regExp->regExp.patternOptions() | QRegularExpression::CaseInsensitiveOption);
0566         }
0567         m_regExp->matchers = createStringMatchers(parseRegExpFilter(parsedLine));
0568         return;
0569     }
0570 
0571     // Remove starting and ending wildcards (*)
0572     if (parsedLine.startsWith(QL1C('*'))) {
0573         parsedLine.remove(0, 1);
0574     }
0575 
0576     if (parsedLine.endsWith(QL1C('*'))) {
0577         parsedLine = parsedLine.left(parsedLine.size() - 1);
0578     }
0579 
0580     // We can use fast string matching for domain here
0581     if (filterIsOnlyDomain(parsedLine)) {
0582         parsedLine.remove(0, 2);
0583         parsedLine = parsedLine.left(parsedLine.size() - 1);
0584 
0585         m_type = DomainMatchRule;
0586         m_matchString = parsedLine;
0587         return;
0588     }
0589 
0590     // If rule contains only | at end, we can also use string matching
0591     if (filterIsOnlyEndsMatch(parsedLine)) {
0592         parsedLine = parsedLine.left(parsedLine.size() - 1);
0593 
0594         m_type = StringEndsMatchRule;
0595         m_matchString = parsedLine;
0596         return;
0597     }
0598 
0599     // If we still find a wildcard (*) or separator (^) or (|)
0600     // we must modify parsedLine to comply with QRegularExpression
0601     if (parsedLine.contains(QL1C('*')) ||
0602         parsedLine.contains(QL1C('^')) ||
0603         parsedLine.contains(QL1C('|'))
0604        ) {
0605         m_type = RegExpMatchRule;
0606         m_regExp = new RegExp;
0607         m_regExp->regExp = QRegularExpression(createRegExpFromFilter(parsedLine), QRegularExpression::InvertedGreedinessOption);
0608         if (m_caseSensitivity == Qt::CaseInsensitive) {
0609             m_regExp->regExp.setPatternOptions(m_regExp->regExp.patternOptions() | QRegularExpression::CaseInsensitiveOption);
0610         }
0611         m_regExp->matchers = createStringMatchers(parseRegExpFilter(parsedLine));
0612         return;
0613     }
0614 
0615     // This rule matches all urls
0616     if (parsedLine.isEmpty()) {
0617         if (m_options == NoOption) {
0618             qWarning() << "Disabling unrestricted rule that would block all requests" << m_filter;
0619             m_isInternalDisabled = true;
0620             m_type = Invalid;
0621             return;
0622         }
0623         m_type = MatchAllUrlsRule;
0624         return;
0625     }
0626 
0627     // We haven't found anything that needs use of regexp, yay!
0628     m_type = StringContainsMatchRule;
0629     m_matchString = parsedLine;
0630 }
0631 
0632 void AdBlockRule::parseDomains(const QString &domains, const QChar &separator)
0633 {
0634     const QStringList domainsList = domains.split(separator, Qt::SkipEmptyParts);
0635 
0636     for (const QString &domain : domainsList) {
0637         if (domain.isEmpty()) {
0638             continue;
0639         }
0640         if (domain.startsWith(QL1C('~'))) {
0641             m_blockedDomains.append(domain.mid(1));
0642         }
0643         else {
0644             m_allowedDomains.append(domain);
0645         }
0646     }
0647 
0648     if (!m_blockedDomains.isEmpty() || !m_allowedDomains.isEmpty()) {
0649         setOption(DomainRestrictedOption);
0650     }
0651 }
0652 
0653 bool AdBlockRule::filterIsOnlyDomain(const QString &filter) const
0654 {
0655     if (!filter.endsWith(QL1C('^')) || !filter.startsWith(QL1S("||")))
0656         return false;
0657 
0658     for (int i = 0; i < filter.size(); ++i) {
0659         switch (filter.at(i).toLatin1()) {
0660         case '/':
0661         case ':':
0662         case '?':
0663         case '=':
0664         case '&':
0665         case '*':
0666             return false;
0667         default:
0668             break;
0669         }
0670     }
0671 
0672     return true;
0673 }
0674 
0675 bool AdBlockRule::filterIsOnlyEndsMatch(const QString &filter) const
0676 {
0677     for (int i = 0; i < filter.size(); ++i) {
0678         switch (filter.at(i).toLatin1()) {
0679         case '^':
0680         case '*':
0681             return false;
0682         case '|':
0683             return i == filter.size() - 1;
0684         default:
0685             break;
0686         }
0687     }
0688 
0689     return false;
0690 }
0691 
0692 static bool wordCharacter(const QChar &c)
0693 {
0694     return c.isLetterOrNumber() || c.isMark() || c == QL1C('_');
0695 }
0696 
0697 QString AdBlockRule::createRegExpFromFilter(const QString &filter) const
0698 {
0699     QString parsed;
0700     parsed.reserve(filter.size());
0701 
0702     bool hadWildcard = false; // Filter multiple wildcards
0703 
0704     for (int i = 0; i < filter.size(); ++i) {
0705         const QChar c = filter.at(i);
0706         switch (c.toLatin1()) {
0707         case '^':
0708             parsed.append(QL1S("(?:[^\\w\\d\\-.%]|$)"));
0709             break;
0710 
0711         case '*':
0712             if (!hadWildcard)
0713                 parsed.append(QL1S(".*"));
0714             break;
0715 
0716         case '|':
0717             if (i == 0) {
0718                 if (filter.size() > 1 && filter.at(1) == QL1C('|')) {
0719                     parsed.append(QL1S("^[\\w\\-]+:\\/+(?!\\/)(?:[^\\/]+\\.)?"));
0720                     i++;
0721                 }
0722                 else {
0723                     parsed.append(QL1C('^'));
0724                 }
0725                 break;
0726             }
0727             else if (i == filter.size() - 1) {
0728                 parsed.append(QL1C('$'));
0729                 break;
0730             }
0731             // fallthrough
0732 
0733         default:
0734             if (!wordCharacter(c))
0735                 parsed.append(QL1C('\\') + c);
0736             else
0737                 parsed.append(c);
0738         }
0739 
0740         hadWildcard = c == QL1C('*');
0741     }
0742 
0743     return parsed;
0744 }
0745 
0746 QList<QStringMatcher> AdBlockRule::createStringMatchers(const QStringList &filters) const
0747 {
0748     QList<QStringMatcher> matchers;
0749     matchers.reserve(filters.size());
0750 
0751     for (const QString &filter : filters) {
0752         matchers.append(QStringMatcher(filter, m_caseSensitivity));
0753     }
0754 
0755     return matchers;
0756 }
0757 
0758 bool AdBlockRule::stringMatch(const QString &domain, const QString &encodedUrl) const
0759 {
0760     switch (m_type) {
0761     case StringContainsMatchRule:
0762         return encodedUrl.contains(m_matchString, m_caseSensitivity);
0763 
0764     case DomainMatchRule:
0765         return isMatchingDomain(domain, m_matchString);
0766 
0767     case StringEndsMatchRule:
0768         return encodedUrl.endsWith(m_matchString, m_caseSensitivity);
0769 
0770     case RegExpMatchRule:
0771         if (!isMatchingRegExpStrings(encodedUrl)) {
0772             return false;
0773         }
0774         return m_regExp->regExp.match(encodedUrl).hasMatch();
0775 
0776     case MatchAllUrlsRule:
0777         return true;
0778 
0779     default:
0780         return false;
0781     }
0782 }
0783 
0784 bool AdBlockRule::isMatchingDomain(const QString &domain, const QString &filter) const
0785 {
0786     return QzTools::matchDomain(filter, domain);
0787 }
0788 
0789 bool AdBlockRule::isMatchingRegExpStrings(const QString &url) const
0790 {
0791     Q_ASSERT(m_regExp);
0792 
0793     const auto matchers = m_regExp->matchers;
0794     for (const QStringMatcher &matcher : matchers) {
0795         if (matcher.indexIn(url) == -1)
0796             return false;
0797     }
0798 
0799     return true;
0800 }
0801 
0802 // Split regexp filter into strings that can be used with QString::contains
0803 // Don't use parts that contains only 1 char and duplicated parts
0804 QStringList AdBlockRule::parseRegExpFilter(const QString &filter) const
0805 {
0806     QStringList list;
0807     int startPos = -1;
0808 
0809     for (int i = 0; i < filter.size(); ++i) {
0810         const QChar c = filter.at(i);
0811         // Meta characters in AdBlock rules are | * ^
0812         if (c == QL1C('|') || c == QL1C('*') || c == QL1C('^')) {
0813             const QString sub = filter.mid(startPos, i - startPos);
0814             if (sub.size() > 1)
0815                 list.append(sub);
0816             startPos = i + 1;
0817         }
0818     }
0819 
0820     const QString sub = filter.mid(startPos);
0821     if (sub.size() > 1)
0822         list.append(sub);
0823 
0824     list.removeDuplicates();
0825 
0826     return list;
0827 }
0828 
0829 bool AdBlockRule::hasOption(const AdBlockRule::RuleOption &opt) const
0830 {
0831     return (m_options & opt);
0832 }
0833 
0834 bool AdBlockRule::hasException(const AdBlockRule::RuleOption &opt) const
0835 {
0836     return (m_exceptions & opt);
0837 }
0838 
0839 void AdBlockRule::setOption(const AdBlockRule::RuleOption &opt)
0840 {
0841     m_options |= opt;
0842 }
0843 
0844 void AdBlockRule::setException(const AdBlockRule::RuleOption &opt, bool on)
0845 {
0846     if (on) {
0847         m_exceptions |= opt;
0848     }
0849 }