File indexing completed on 2024-11-17 04:55:17

0001 /*
0002     SPDX-License-Identifier: MPL-2.0
0003 */
0004 
0005 /* Copyright (c) 2015 Brian R. Bondy. Distributed under the MPL2 license.
0006  * This Source Code Form is subject to the terms of the Mozilla Public
0007  * License, v. 2.0. If a copy of the MPL was not distributed with this
0008  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
0009 
0010 #ifndef FILTER_H_
0011 #define FILTER_H_
0012 #include <stdint.h>
0013 #include <string.h>
0014 #include "./base.h"
0015 #include "./context_domain.h"
0016 
0017 class BloomFilter;
0018 template <typename T> class HashSet;
0019 
0020 enum FilterType {
0021   FTNoFilterType = 0,
0022   FTRegex = 01,
0023   FTElementHiding = 02,
0024   FTElementHidingException = 04,
0025   FTHostAnchored = 010,
0026   FTLeftAnchored = 020,
0027   FTRightAnchored = 040,
0028   FTComment = 0100,
0029   FTException = 0200,
0030   FTEmpty = 0400,
0031   FTHostOnly = 01000,
0032   // E.g. example.org$$script[data-src="banner"] to delete
0033   // script element with matching attr
0034   FTHTMLFiltering = 02000,
0035   FTListTypesMask = FTException|FTElementHiding|
0036     FTElementHidingException|FTEmpty|FTComment|
0037     FTHTMLFiltering,
0038 };
0039 
0040 enum FilterOption {
0041   FONoFilterOption = 0,
0042   FOScript = 01,
0043   FOImage = 02,
0044   FOStylesheet = 04,
0045   FOObject = 010,
0046   FOXmlHttpRequest = 020,
0047   FOObjectSubrequest = 040,
0048   FOSubdocument = 0100,
0049   FODocument = 0200,
0050   FOOther = 0400,
0051   FOXBL = 01000,
0052   FOCollapse = 02000,
0053   FODoNotTrack = 04000,
0054   FOElemHide = 010000,
0055   // Used internally only, do not use
0056   FOThirdParty = 020000,
0057   // Used internally only, do not use
0058   FONotThirdParty = 040000,
0059   // Not supported, but we will ignore these rules
0060   FOPing = 0100000,
0061   // Not supported, but we will ignore these rules
0062   FOPopup = 0200000,
0063   // This is only used by uBlock and currently all instances are 1x1
0064   // transparent gif which we already do for images
0065   FORedirect = 0400000,
0066   // Parse CSPs but consider them unsupported
0067   FOCSP = 01000000,
0068   FOFont = 02000000,
0069   FOMedia = 04000000,
0070   FOWebRTC = 010000000,
0071   FOGenericHide = 020000000,
0072   FOGenericBlock = 040000000,
0073   // Used by Adguard, purpose unknown, ignore
0074   FOEmpty = 0100000000,
0075   FOWebsocket = 0200000000,
0076   // important means to ignore all exception filters (those prefixed with @@).
0077   FOImportant = 0400000000,
0078   // Cancel the request instead of using a 200 OK response
0079   FOExplicitCancel = 01000000000,
0080 
0081   FOUnknown = 04000000000,
0082   FOResourcesOnly = FOScript|FOImage|FOStylesheet|FOObject|FOXmlHttpRequest|
0083     FOObjectSubrequest|FOSubdocument|FODocument|FOOther|FOXBL|FOFont|FOMedia|
0084     FOWebRTC|FOWebsocket|FOPing,
0085   FOUnsupportedSoSkipCheck = FOPopup|FOCSP|FOElemHide|FOGenericHide|
0086     FOGenericBlock|FOEmpty|FOUnknown,
0087   // Non matching related filters, alters behavior
0088   BehavioralFilterOnly = FORedirect|FOImportant|FOExplicitCancel|
0089     FOThirdParty|FONotThirdParty
0090 };
0091 
0092 class Filter {
0093 friend class AdBlockClient;
0094  public:
0095   Filter();
0096   Filter(const Filter &other);
0097   Filter(const char * data, int dataLen, char *domainList = nullptr,
0098       const char * host = nullptr, int hostLen = -1,
0099       char *tag = nullptr, int tagLen = 0);
0100 
0101   Filter(FilterType filterType, FilterOption filterOption,
0102          FilterOption antiFilterOption,
0103          const char * data, int dataLen,
0104          char *domainList = nullptr,
0105          const char * host = nullptr, int hostLen = -1,
0106          char *tag = nullptr, int tagLen = 0);
0107 
0108   ~Filter();
0109 
0110   // Swaps the data members for 'this' and the passed in filter
0111   void swapData(Filter *f);
0112 
0113   // Checks to see if any filter matches the input but does not match
0114   // any exception rule You may want to call the first overload to be
0115   // slighly more efficient
0116   bool matches(const char *input, int inputLen,
0117       FilterOption contextOption = FONoFilterOption,
0118       const char *contextDomain = nullptr,
0119       BloomFilter *inputBloomFilter = nullptr,
0120       const char *inputHost = nullptr, int inputHostLen = 0);
0121   bool matches(const char *input, FilterOption contextOption = FONoFilterOption,
0122       const char *contextDomain = nullptr,
0123       BloomFilter *inputBloomFilter = nullptr,
0124       const char *inputHost = nullptr, int inputHostLen = 0);
0125 
0126   // Nothing needs to be updated when a filter is added multiple times
0127   void Update(const Filter &) {}
0128   bool hasUnsupportedOptions() const;
0129 
0130   // Checks to see if the filter options match for the passed in data
0131   bool matchesOptions(const char *input, FilterOption contextOption,
0132       const char *contextDomain = nullptr);
0133 
0134   void parseOptions(const char *input);
0135 
0136   // Checks to see if the specified context domain is in the
0137   // domain (or antiDmomain) list.
0138   bool containsDomain(const char* contextDomain, size_t contextDomainLen,
0139       bool anti = false) const;
0140   // Returns true if the filter is composed of only domains and no anti domains
0141   // Note that the set of all domain and anti-domain rules are not mutually
0142   // exclusive.  One xapmle is:
0143   // domain=example.com|~foo.example.com restricts the filter to the example.com
0144   // domain with the exception of "foo.example.com" subdomain.
0145   bool isDomainOnlyFilter();
0146   // Returns true if the filter is composed of only anti-domains and no domains
0147   bool isAntiDomainOnlyFilter();
0148   uint32_t getDomainCount(bool anti = false);
0149 
0150   uint64_t hash() const;
0151   uint64_t GetHash() const {
0152     return hash();
0153   }
0154 
0155   bool operator==(const Filter &rhs) const {
0156     /*
0157      if (filterType != rhs.filterType || filterOption != rhs.filterOption ||
0158          antiFilterOption != rhs.antiFilterOption) {
0159       return false;
0160     }
0161     */
0162 
0163     int hostLen = 0;
0164     if (host) {
0165       hostLen = this->hostLen == -1 ?
0166         static_cast<int>(strlen(host)) : this->hostLen;
0167     }
0168     int rhsHostLen = 0;
0169     if (rhs.host) {
0170       rhsHostLen = rhs.hostLen == -1 ?
0171         static_cast<int>(strlen(rhs.host)) : rhs.hostLen;
0172     }
0173 
0174     if (hostLen != rhsHostLen) {
0175       return false;
0176     }
0177 
0178     return !memcmp(host, rhs.host, hostLen);
0179   }
0180 
0181   bool operator!=(const Filter &rhs) const {
0182     return !(*this == rhs);
0183   }
0184 
0185   uint32_t Serialize(char *buffer);
0186   uint32_t Deserialize(char *buffer, uint32_t bufferSize);
0187 
0188   // Holds true if the filter should not free memory because for example it
0189   // was loaded from a large buffer somewhere else via the serialize and
0190   // deserialize functions.
0191   bool borrowed_data;
0192 
0193   FilterType filterType;
0194   FilterOption filterOption;
0195   FilterOption antiFilterOption;
0196 
0197   // The text of the filter list rule, as it appeared before being parsed.
0198   char *ruleDefinition;
0199 
0200   char *data;
0201   int dataLen;
0202   char *domainList;
0203   // A filter tag is used for identifying and tagally including
0204   // certain filters in Brave.
0205   char *tag;
0206   int tagLen;
0207   char *host;
0208   int hostLen;
0209   HashSet<ContextDomain>* domains;
0210   HashSet<ContextDomain>* antiDomains;
0211   bool domainsParsed;
0212 
0213  protected:
0214   // Fills |domains| and |antiDomains| sets
0215   void parseDomains(const char *domainList);
0216   bool contextDomainMatchesFilter(const char *contextDomain);
0217 
0218   // Parses a single option
0219   void parseOption(const char *input, int len);
0220 };
0221 
0222 bool isThirdPartyHost(const char *baseContextHost,
0223     int baseContextHostLen,
0224     const char *testHost,
0225     int testHostLen);
0226 
0227 static inline bool isEndOfLine(char c) {
0228   return c == '\r' || c == '\n';
0229 }
0230 
0231 #endif  // FILTER_H_