File indexing completed on 2024-11-17 04:55:17
0001 /* 0002 SPDX-License-Identifier: MPL-2.0 0003 */ 0004 0005 /* Copyright (c) 2015 Brian R. Bondy. Distributed under the MPL2 license. 0006 * This Source Code Form is subject to the terms of the Mozilla Public 0007 * License, v. 2.0. If a copy of the MPL was not distributed with this 0008 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 0009 0010 #ifndef FILTER_H_ 0011 #define FILTER_H_ 0012 #include <stdint.h> 0013 #include <string.h> 0014 #include "./base.h" 0015 #include "./context_domain.h" 0016 0017 class BloomFilter; 0018 template <typename T> class HashSet; 0019 0020 enum FilterType { 0021 FTNoFilterType = 0, 0022 FTRegex = 01, 0023 FTElementHiding = 02, 0024 FTElementHidingException = 04, 0025 FTHostAnchored = 010, 0026 FTLeftAnchored = 020, 0027 FTRightAnchored = 040, 0028 FTComment = 0100, 0029 FTException = 0200, 0030 FTEmpty = 0400, 0031 FTHostOnly = 01000, 0032 // E.g. example.org$$script[data-src="banner"] to delete 0033 // script element with matching attr 0034 FTHTMLFiltering = 02000, 0035 FTListTypesMask = FTException|FTElementHiding| 0036 FTElementHidingException|FTEmpty|FTComment| 0037 FTHTMLFiltering, 0038 }; 0039 0040 enum FilterOption { 0041 FONoFilterOption = 0, 0042 FOScript = 01, 0043 FOImage = 02, 0044 FOStylesheet = 04, 0045 FOObject = 010, 0046 FOXmlHttpRequest = 020, 0047 FOObjectSubrequest = 040, 0048 FOSubdocument = 0100, 0049 FODocument = 0200, 0050 FOOther = 0400, 0051 FOXBL = 01000, 0052 FOCollapse = 02000, 0053 FODoNotTrack = 04000, 0054 FOElemHide = 010000, 0055 // Used internally only, do not use 0056 FOThirdParty = 020000, 0057 // Used internally only, do not use 0058 FONotThirdParty = 040000, 0059 // Not supported, but we will ignore these rules 0060 FOPing = 0100000, 0061 // Not supported, but we will ignore these rules 0062 FOPopup = 0200000, 0063 // This is only used by uBlock and currently all instances are 1x1 0064 // transparent gif which we already do for images 0065 FORedirect = 0400000, 0066 // Parse CSPs but consider them unsupported 0067 FOCSP = 01000000, 0068 FOFont = 02000000, 0069 FOMedia = 04000000, 0070 FOWebRTC = 010000000, 0071 FOGenericHide = 020000000, 0072 FOGenericBlock = 040000000, 0073 // Used by Adguard, purpose unknown, ignore 0074 FOEmpty = 0100000000, 0075 FOWebsocket = 0200000000, 0076 // important means to ignore all exception filters (those prefixed with @@). 0077 FOImportant = 0400000000, 0078 // Cancel the request instead of using a 200 OK response 0079 FOExplicitCancel = 01000000000, 0080 0081 FOUnknown = 04000000000, 0082 FOResourcesOnly = FOScript|FOImage|FOStylesheet|FOObject|FOXmlHttpRequest| 0083 FOObjectSubrequest|FOSubdocument|FODocument|FOOther|FOXBL|FOFont|FOMedia| 0084 FOWebRTC|FOWebsocket|FOPing, 0085 FOUnsupportedSoSkipCheck = FOPopup|FOCSP|FOElemHide|FOGenericHide| 0086 FOGenericBlock|FOEmpty|FOUnknown, 0087 // Non matching related filters, alters behavior 0088 BehavioralFilterOnly = FORedirect|FOImportant|FOExplicitCancel| 0089 FOThirdParty|FONotThirdParty 0090 }; 0091 0092 class Filter { 0093 friend class AdBlockClient; 0094 public: 0095 Filter(); 0096 Filter(const Filter &other); 0097 Filter(const char * data, int dataLen, char *domainList = nullptr, 0098 const char * host = nullptr, int hostLen = -1, 0099 char *tag = nullptr, int tagLen = 0); 0100 0101 Filter(FilterType filterType, FilterOption filterOption, 0102 FilterOption antiFilterOption, 0103 const char * data, int dataLen, 0104 char *domainList = nullptr, 0105 const char * host = nullptr, int hostLen = -1, 0106 char *tag = nullptr, int tagLen = 0); 0107 0108 ~Filter(); 0109 0110 // Swaps the data members for 'this' and the passed in filter 0111 void swapData(Filter *f); 0112 0113 // Checks to see if any filter matches the input but does not match 0114 // any exception rule You may want to call the first overload to be 0115 // slighly more efficient 0116 bool matches(const char *input, int inputLen, 0117 FilterOption contextOption = FONoFilterOption, 0118 const char *contextDomain = nullptr, 0119 BloomFilter *inputBloomFilter = nullptr, 0120 const char *inputHost = nullptr, int inputHostLen = 0); 0121 bool matches(const char *input, FilterOption contextOption = FONoFilterOption, 0122 const char *contextDomain = nullptr, 0123 BloomFilter *inputBloomFilter = nullptr, 0124 const char *inputHost = nullptr, int inputHostLen = 0); 0125 0126 // Nothing needs to be updated when a filter is added multiple times 0127 void Update(const Filter &) {} 0128 bool hasUnsupportedOptions() const; 0129 0130 // Checks to see if the filter options match for the passed in data 0131 bool matchesOptions(const char *input, FilterOption contextOption, 0132 const char *contextDomain = nullptr); 0133 0134 void parseOptions(const char *input); 0135 0136 // Checks to see if the specified context domain is in the 0137 // domain (or antiDmomain) list. 0138 bool containsDomain(const char* contextDomain, size_t contextDomainLen, 0139 bool anti = false) const; 0140 // Returns true if the filter is composed of only domains and no anti domains 0141 // Note that the set of all domain and anti-domain rules are not mutually 0142 // exclusive. One xapmle is: 0143 // domain=example.com|~foo.example.com restricts the filter to the example.com 0144 // domain with the exception of "foo.example.com" subdomain. 0145 bool isDomainOnlyFilter(); 0146 // Returns true if the filter is composed of only anti-domains and no domains 0147 bool isAntiDomainOnlyFilter(); 0148 uint32_t getDomainCount(bool anti = false); 0149 0150 uint64_t hash() const; 0151 uint64_t GetHash() const { 0152 return hash(); 0153 } 0154 0155 bool operator==(const Filter &rhs) const { 0156 /* 0157 if (filterType != rhs.filterType || filterOption != rhs.filterOption || 0158 antiFilterOption != rhs.antiFilterOption) { 0159 return false; 0160 } 0161 */ 0162 0163 int hostLen = 0; 0164 if (host) { 0165 hostLen = this->hostLen == -1 ? 0166 static_cast<int>(strlen(host)) : this->hostLen; 0167 } 0168 int rhsHostLen = 0; 0169 if (rhs.host) { 0170 rhsHostLen = rhs.hostLen == -1 ? 0171 static_cast<int>(strlen(rhs.host)) : rhs.hostLen; 0172 } 0173 0174 if (hostLen != rhsHostLen) { 0175 return false; 0176 } 0177 0178 return !memcmp(host, rhs.host, hostLen); 0179 } 0180 0181 bool operator!=(const Filter &rhs) const { 0182 return !(*this == rhs); 0183 } 0184 0185 uint32_t Serialize(char *buffer); 0186 uint32_t Deserialize(char *buffer, uint32_t bufferSize); 0187 0188 // Holds true if the filter should not free memory because for example it 0189 // was loaded from a large buffer somewhere else via the serialize and 0190 // deserialize functions. 0191 bool borrowed_data; 0192 0193 FilterType filterType; 0194 FilterOption filterOption; 0195 FilterOption antiFilterOption; 0196 0197 // The text of the filter list rule, as it appeared before being parsed. 0198 char *ruleDefinition; 0199 0200 char *data; 0201 int dataLen; 0202 char *domainList; 0203 // A filter tag is used for identifying and tagally including 0204 // certain filters in Brave. 0205 char *tag; 0206 int tagLen; 0207 char *host; 0208 int hostLen; 0209 HashSet<ContextDomain>* domains; 0210 HashSet<ContextDomain>* antiDomains; 0211 bool domainsParsed; 0212 0213 protected: 0214 // Fills |domains| and |antiDomains| sets 0215 void parseDomains(const char *domainList); 0216 bool contextDomainMatchesFilter(const char *contextDomain); 0217 0218 // Parses a single option 0219 void parseOption(const char *input, int len); 0220 }; 0221 0222 bool isThirdPartyHost(const char *baseContextHost, 0223 int baseContextHostLen, 0224 const char *testHost, 0225 int testHostLen); 0226 0227 static inline bool isEndOfLine(char c) { 0228 return c == '\r' || c == '\n'; 0229 } 0230 0231 #endif // FILTER_H_