File indexing completed on 2024-11-17 04:55:16
0001 /* 0002 SPDX-License-Identifier: MPL-2.0 0003 */ 0004 0005 /* Copyright (c) 2015 Brian R. Bondy. Distributed under the MPL2 license. 0006 * This Source Code Form is subject to the terms of the Mozilla Public 0007 * License, v. 2.0. If a copy of the MPL was not distributed with this 0008 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 0009 0010 #include <string.h> 0011 #include <stdio.h> 0012 #include "./protocol.h" 0013 #include "./ad_block_client.h" 0014 #include "./bad_fingerprint.h" 0015 #include "./bad_fingerprints.h" 0016 #include "./cosmetic_filter.h" 0017 #include "./hashFn.h" 0018 #include "./no_fingerprint_domain.h" 0019 0020 #include "BloomFilter.h" 0021 0022 #ifdef PERF_STATS 0023 #include <iostream> 0024 using std::cout; 0025 using std::endl; 0026 #endif 0027 0028 #define UNUSED(x) ( (void)(x) ) 0029 0030 std::set<std::string> unknownOptions; 0031 0032 // Fast hash function applicable to 2 byte char checks 0033 class HashFn2Byte : public HashFn { 0034 public: 0035 HashFn2Byte() : HashFn(0, false) { 0036 } 0037 0038 uint64_t operator()(const char *input, int len, 0039 unsigned char lastCharCode, uint64_t lastHash) override; 0040 0041 uint64_t operator()(const char *input, int len) override; 0042 }; 0043 0044 const int kMaxLineLength = 2048; 0045 0046 const int AdBlockClient::kFingerprintSize = 6; 0047 0048 static HashFn2Byte hashFn2Byte; 0049 0050 /** 0051 * Finds the host within the passed in URL and returns its length 0052 */ 0053 const char * getUrlHost(const char *input, int *len) { 0054 const char *p = input; 0055 while (*p != '\0' && *p != ':') { 0056 p++; 0057 } 0058 if (*p != '\0') { 0059 p++; 0060 while (*p != '\0' && *p == '/') { 0061 p++; 0062 } 0063 } 0064 const char *q = p; 0065 while (*q != '\0') { 0066 q++; 0067 } 0068 *len = findFirstSeparatorChar(p, q); 0069 return p; 0070 } 0071 0072 void AddFilterDomainsToHashSet(Filter* filter, 0073 HashSet<NoFingerprintDomain> *hashSet) { 0074 if (filter->domainList) { 0075 char * filter_domain_list = filter->domainList; 0076 int start_offset = 0; 0077 int len = 0; 0078 const char *p = filter_domain_list; 0079 while (true) { 0080 if (*p == '|' || *p == '\0') { 0081 const char *domain = filter_domain_list + start_offset; 0082 if (len > 0 && *domain != '~') { 0083 char buffer[1024]; 0084 memset(buffer, 0, 1024); 0085 memcpy(buffer, domain, len); 0086 // cout << "Adding filter: " << buffer << endl; 0087 hashSet->Add(NoFingerprintDomain(domain, len)); 0088 } else if (len > 0 && *domain == '~') { 0089 char buffer[1024]; 0090 memset(buffer, 0, 1024); 0091 memcpy(buffer, domain + 1, len - 1); 0092 // cout << "Adding anti filter: " << buffer << endl; 0093 hashSet->Add(NoFingerprintDomain(domain + 1, len - 1)); 0094 } 0095 start_offset += len + 1; 0096 len = -1; 0097 } 0098 if (*p == '\0') { 0099 break; 0100 } 0101 p++; 0102 len++; 0103 } 0104 } 0105 } 0106 0107 inline bool isFingerprintChar(char c) { 0108 return c != '|' && c != '*' && c != '^'; 0109 } 0110 0111 bool isBadFingerprint(const char *fingerprint, const char * fingerprintEnd) { 0112 for (unsigned int i = 0; i < sizeof(badFingerprints) 0113 / sizeof(badFingerprints[0]); i++) { 0114 if (!strncmp(badFingerprints[i], fingerprint, 0115 fingerprintEnd - fingerprint)) { 0116 return true; 0117 } 0118 } 0119 return false; 0120 } 0121 0122 bool hasBadSubstring(const char *fingerprint, const char * fingerprintEnd) { 0123 for (unsigned int i = 0; i < sizeof(badSubstrings) 0124 / sizeof(badSubstrings[0]); i++) { 0125 const char * p = strstr(fingerprint, badSubstrings[i]); 0126 if (p && (p - fingerprint) + strlen(badSubstrings[i]) 0127 <= (unsigned int)(fingerprintEnd - fingerprint)) { 0128 return true; 0129 } 0130 } 0131 return false; 0132 } 0133 0134 /** 0135 * Obtains a fingerprint for the specified filter 0136 */ 0137 bool AdBlockClient::getFingerprint(char *buffer, const char *input) { 0138 if (!input) { 0139 return false; 0140 } 0141 int size = 0; 0142 const char *p = input; 0143 const char *start = input; 0144 while (*p != '\0') { 0145 if (!isFingerprintChar(*p)) { 0146 size = 0; 0147 p++; 0148 start = p; 0149 continue; 0150 } 0151 if (buffer) { 0152 buffer[size] = *p; 0153 } 0154 if (hasBadSubstring(start, start + size + 1)) { 0155 size = 0; 0156 start++; 0157 p = start; 0158 continue; 0159 } 0160 size++; 0161 0162 if (size == kFingerprintSize) { 0163 if (buffer) { 0164 buffer[size] = '\0'; 0165 } 0166 if (isBadFingerprint(start, start + size)) { 0167 size = 0; 0168 start++; 0169 p = start; 0170 continue; 0171 } 0172 return true; 0173 } 0174 p++; 0175 } 0176 if (buffer) { 0177 buffer[0] = '\0'; 0178 } 0179 return false; 0180 } 0181 0182 bool AdBlockClient::getFingerprint(char *buffer, const Filter &f) { 0183 if (f.filterType & FTRegex) { 0184 // cout << "Get fingerprint for regex returning false; " << endl; 0185 return false; 0186 } 0187 0188 if (f.filterType & FTHostAnchored) { 0189 if (AdBlockClient::getFingerprint(buffer, f.data + strlen(f.host))) { 0190 return true; 0191 } 0192 } 0193 0194 bool b = AdBlockClient::getFingerprint(buffer, f.data); 0195 // if (!b && f.data) { 0196 // cout << "No fingerprint for: " << f.data << endl; 0197 // } 0198 return b; 0199 } 0200 0201 // Separator chars are one of: :?/=^; 0202 signed char separatorBuffer[32] = { 0, 0, 0, 0, 16, -128, 0, -92, 0, 0, 0, 64 }; 0203 bool isSeparatorChar(char c) { 0204 return !!(separatorBuffer[(unsigned char)c / 8] & 1 << (unsigned char)c % 8); 0205 } 0206 0207 int findFirstSeparatorChar(const char *input, const char *end) { 0208 const char *p = input; 0209 while (p != end) { 0210 if (isSeparatorChar(*p)) { 0211 return static_cast<int>(p - input); 0212 } 0213 p++; 0214 } 0215 return static_cast<int>(end - input); 0216 } 0217 0218 void parseFilter(const char *input, Filter *f, BloomFilter *bloomFilter, 0219 BloomFilter *exceptionBloomFilter, 0220 HashSet<Filter> *hostAnchoredHashSet, 0221 HashSet<Filter> *hostAnchoredExceptionHashSet, 0222 HashSet<CosmeticFilter> *simpleCosmeticFilters, 0223 bool preserveRules) { 0224 UNUSED(preserveRules); 0225 const char *end = input; 0226 while (*end != '\0') end++; 0227 parseFilter(input, end, f, bloomFilter, exceptionBloomFilter, 0228 hostAnchoredHashSet, hostAnchoredExceptionHashSet, simpleCosmeticFilters); 0229 } 0230 0231 enum FilterParseState { 0232 FPStart, 0233 FPPastWhitespace, 0234 FPOneBar, 0235 FPOneAt, 0236 FPData, 0237 // Same as data but won't consider any special char handling like | or $ 0238 FPDataOnly 0239 }; 0240 0241 // Not currently multithreaded safe due to the static buffer named 'data' 0242 void parseFilter(const char *input, const char *end, Filter *f, 0243 BloomFilter *bloomFilter, 0244 BloomFilter *exceptionBloomFilter, 0245 HashSet<Filter> *hostAnchoredHashSet, 0246 HashSet<Filter> *hostAnchoredExceptionHashSet, 0247 HashSet<CosmeticFilter> *simpleCosmeticFilters, 0248 bool preserveRules) { 0249 FilterParseState parseState = FPStart; 0250 const char *p = input; 0251 const char *filterRuleStart = p; 0252 const char *filterRuleEndPos = p; 0253 char data[kMaxLineLength]; 0254 memset(data, 0, sizeof data); 0255 int i = 0; 0256 0257 bool earlyBreak = false; 0258 while (p != end && !earlyBreak) { 0259 // Check for the filter being too long 0260 if ((p - input) >= kMaxLineLength - 1) { 0261 return; 0262 } 0263 0264 if (parseState != FPDataOnly) { 0265 if (parseState == FPOneBar && *p != '|') { 0266 parseState = FPData; 0267 f->filterType = static_cast<FilterType>(f->filterType | FTLeftAnchored); 0268 } 0269 0270 switch (*p) { 0271 case '|': 0272 if (parseState == FPStart || parseState == FPPastWhitespace) { 0273 parseState = FPOneBar; 0274 filterRuleEndPos++; 0275 p++; 0276 continue; 0277 } else if (parseState == FPOneBar) { 0278 parseState = FPOneBar; 0279 f->filterType = 0280 static_cast<FilterType>(f->filterType | FTHostAnchored); 0281 parseState = FPData; 0282 filterRuleEndPos++; 0283 p++; 0284 0285 int len = findFirstSeparatorChar(p, end); 0286 // It's possible we have a host anchored filter 0287 // which also has a right anchored filter. 0288 if (len > 0 && p[len - 1] == '|') { 0289 len--; 0290 } 0291 f->host = new char[len + 1]; 0292 f->host[len] = '\0'; 0293 memcpy(f->host, p, len); 0294 0295 if ((*(p + len) == '^' && (*(p + len + 1) == '\0' 0296 || *(p + len + 1) == '$' || isEndOfLine(*(p + len + 1)))) || 0297 *(p + len) == '\0' || *(p + len) == '$' || 0298 isEndOfLine(*(p + len))) { 0299 f->filterType = 0300 static_cast<FilterType>(f->filterType | FTHostOnly); 0301 } 0302 0303 continue; 0304 } else { 0305 f->filterType = 0306 static_cast<FilterType>(f->filterType | FTRightAnchored); 0307 parseState = FPData; 0308 filterRuleEndPos++; 0309 p++; 0310 continue; 0311 } 0312 break; 0313 case '@': 0314 if (parseState == FPStart || parseState == FPPastWhitespace) { 0315 parseState = FPOneAt; 0316 filterRuleEndPos++; 0317 p++; 0318 continue; 0319 } else if (parseState == FPOneAt) { 0320 parseState = FPOneBar; 0321 f->filterType = FTException; 0322 parseState = FPPastWhitespace; 0323 filterRuleEndPos++; 0324 p++; 0325 continue; 0326 } 0327 break; 0328 case '!': 0329 case '[': 0330 if (parseState == FPStart || parseState == FPPastWhitespace) { 0331 f->filterType = FTComment; 0332 // We don't care about comments right now 0333 return; 0334 } 0335 break; 0336 case '\r': 0337 case '\n': 0338 case '\t': 0339 case ' ': 0340 // Skip leading whitespace 0341 if (parseState == FPStart) { 0342 filterRuleStart++; 0343 filterRuleEndPos++; 0344 p++; 0345 continue; 0346 } 0347 break; 0348 case '/': { 0349 const size_t inputLen = end - input; 0350 if (parseState == FPStart || parseState == FPPastWhitespace) { 0351 if (input[inputLen - 1] == '/' && inputLen > 1) { 0352 // Just copy out the whole regex and return early 0353 int len = static_cast<int>(inputLen) - i - 1; 0354 f->data = new char[len]; 0355 f->data[len - 1] = '\0'; 0356 memcpy(f->data, input + i + 1, len - 1); 0357 0358 if (preserveRules) { 0359 f->ruleDefinition = new char[len]; 0360 f->ruleDefinition[len - 1] = '\0'; 0361 memcpy(f->ruleDefinition, input + i + 1, len - 1); 0362 } 0363 0364 f->filterType = FTRegex; 0365 return; 0366 } else { 0367 parseState = FPData; 0368 } 0369 } 0370 break; 0371 } 0372 case '$': 0373 // Handle adguard HTML filtering rules syntax 0374 // e.g. example.org$$script[data-src="banner"] 0375 // see https://kb.adguard.com/en/general/how-to-create-your-own-ad-filters#html-filtering-rules-syntax-1 0376 if (*(p+1) == '$') { 0377 if (i != 0) { 0378 f->domainList = new char[i + 1]; 0379 memcpy(f->domainList, data, i + 1); 0380 i = 0; 0381 } 0382 parseState = FPDataOnly; 0383 f->filterType = FTHTMLFiltering; 0384 p += 2; 0385 filterRuleEndPos += 2; 0386 continue; 0387 } 0388 while (*filterRuleEndPos != '\0' && !isEndOfLine(*filterRuleEndPos)) { 0389 filterRuleEndPos++; 0390 } 0391 f->parseOptions(p + 1); 0392 earlyBreak = true; 0393 continue; 0394 case '#': 0395 // ublock uses some comments of the form #[space] 0396 if (parseState == FPStart || parseState == FPPastWhitespace) { 0397 if (*(p+1) == ' ') { 0398 f->filterType = FTComment; 0399 // We don't care about comments right now 0400 return; 0401 } 0402 } 0403 0404 if (*(p+1) == '#' || *(p+1) == '@') { 0405 if (i != 0) { 0406 f->domainList = new char[i + 1]; 0407 memcpy(f->domainList, data, i + 1); 0408 i = 0; 0409 } 0410 parseState = FPDataOnly; 0411 if (*(p+1) == '#') { 0412 f->filterType = FTElementHiding; 0413 } else { 0414 f->filterType = FTElementHidingException; 0415 } 0416 p += 2; 0417 continue; 0418 } 0419 // Copied from default label to avoid warning (unannotated 0420 // fall-through between switch labels) 0421 parseState = FPData; 0422 break; 0423 default: 0424 parseState = FPData; 0425 break; 0426 } 0427 } 0428 data[i] = *p; 0429 i++; 0430 filterRuleEndPos++; 0431 p++; 0432 } 0433 0434 if (parseState == FPStart) { 0435 f->filterType = FTEmpty; 0436 return; 0437 } 0438 0439 if (preserveRules) { 0440 int ruleTextLength = filterRuleEndPos - filterRuleStart; 0441 f->ruleDefinition = new char[ruleTextLength + 1]; 0442 memcpy(f->ruleDefinition, filterRuleStart, ruleTextLength); 0443 f->ruleDefinition[ruleTextLength] = '\0'; 0444 } 0445 0446 data[i] = '\0'; 0447 f->data = new char[i + 1]; 0448 memcpy(f->data, data, i + 1); 0449 0450 char fingerprintBuffer[AdBlockClient::kFingerprintSize + 1]; 0451 fingerprintBuffer[AdBlockClient::kFingerprintSize] = '\0'; 0452 0453 if (f->filterType == FTElementHiding) { 0454 if (simpleCosmeticFilters && !f->domainList) { 0455 simpleCosmeticFilters->Add(CosmeticFilter(data)); 0456 } 0457 } else if (f->filterType == FTElementHidingException) { 0458 if (simpleCosmeticFilters && f->domainList) { 0459 simpleCosmeticFilters->Remove(CosmeticFilter(data)); 0460 } 0461 } else if (exceptionBloomFilter 0462 && (f->filterType & FTException) && (f->filterType & FTHostOnly)) { 0463 // cout << "add host anchored exception bloom filter: " << f->host << endl; 0464 hostAnchoredExceptionHashSet->Add(*f); 0465 } else if (hostAnchoredHashSet && (f->filterType & FTHostOnly)) { 0466 // cout << "add host anchored bloom filter: " << f->host << endl; 0467 hostAnchoredHashSet->Add(*f); 0468 } else if (AdBlockClient::getFingerprint(fingerprintBuffer, *f)) { 0469 if (exceptionBloomFilter && f->filterType & FTException) { 0470 exceptionBloomFilter->add(fingerprintBuffer); 0471 } else if (bloomFilter) { 0472 // cout << "add fingerprint: " << fingerprintBuffer 0473 // << ", from string: " << f->data << endl; 0474 bloomFilter->add(fingerprintBuffer); 0475 } 0476 } 0477 } 0478 0479 0480 AdBlockClient::AdBlockClient() : filters(nullptr), 0481 cosmeticFilters(nullptr), 0482 htmlFilters(nullptr), 0483 exceptionFilters(nullptr), 0484 noFingerprintFilters(nullptr), 0485 noFingerprintExceptionFilters(nullptr), 0486 noFingerprintDomainOnlyFilters(nullptr), 0487 noFingerprintAntiDomainOnlyFilters(nullptr), 0488 noFingerprintDomainOnlyExceptionFilters(nullptr), 0489 noFingerprintAntiDomainOnlyExceptionFilters(nullptr), 0490 numFilters(0), 0491 numCosmeticFilters(0), 0492 numHtmlFilters(0), 0493 numExceptionFilters(0), 0494 numNoFingerprintFilters(0), 0495 numNoFingerprintExceptionFilters(0), 0496 numNoFingerprintDomainOnlyFilters(0), 0497 numNoFingerprintAntiDomainOnlyFilters(0), 0498 numNoFingerprintDomainOnlyExceptionFilters(0), 0499 numNoFingerprintAntiDomainOnlyExceptionFilters(0), 0500 numHostAnchoredFilters(0), 0501 numHostAnchoredExceptionFilters(0), 0502 bloomFilter(nullptr), 0503 exceptionBloomFilter(nullptr), 0504 hostAnchoredHashSet(nullptr), 0505 hostAnchoredExceptionHashSet(nullptr), 0506 noFingerprintDomainHashSet(nullptr), 0507 noFingerprintAntiDomainHashSet(nullptr), 0508 noFingerprintDomainExceptionHashSet(nullptr), 0509 noFingerprintAntiDomainExceptionHashSet(nullptr), 0510 badFingerprintsHashSet(nullptr), 0511 numFalsePositives(0), 0512 numExceptionFalsePositives(0), 0513 numBloomFilterSaves(0), 0514 numExceptionBloomFilterSaves(0), 0515 numHashSetSaves(0), 0516 numExceptionHashSetSaves(0), 0517 deserializedBuffer(nullptr) { 0518 } 0519 0520 AdBlockClient::~AdBlockClient() { 0521 clear(); 0522 } 0523 0524 // Clears all data and stats from the AdBlockClient 0525 void AdBlockClient::clear() { 0526 if (filters) { 0527 delete[] filters; 0528 filters = nullptr; 0529 } 0530 if (cosmeticFilters) { 0531 delete[] cosmeticFilters; 0532 cosmeticFilters = nullptr; 0533 } 0534 if (htmlFilters) { 0535 delete[] htmlFilters; 0536 htmlFilters = nullptr; 0537 } 0538 if (exceptionFilters) { 0539 delete[] exceptionFilters; 0540 exceptionFilters = nullptr; 0541 } 0542 if (noFingerprintFilters) { 0543 delete[] noFingerprintFilters; 0544 noFingerprintFilters = nullptr; 0545 } 0546 if (noFingerprintExceptionFilters) { 0547 delete[] noFingerprintExceptionFilters; 0548 noFingerprintExceptionFilters = nullptr; 0549 } 0550 if (noFingerprintDomainOnlyFilters) { 0551 delete[] noFingerprintDomainOnlyFilters; 0552 noFingerprintDomainOnlyFilters = nullptr; 0553 } 0554 if (noFingerprintAntiDomainOnlyFilters) { 0555 delete[] noFingerprintAntiDomainOnlyFilters; 0556 noFingerprintAntiDomainOnlyFilters = nullptr; 0557 } 0558 if (noFingerprintDomainOnlyExceptionFilters) { 0559 delete[] noFingerprintDomainOnlyExceptionFilters; 0560 noFingerprintDomainOnlyExceptionFilters = nullptr; 0561 } 0562 if (noFingerprintAntiDomainOnlyExceptionFilters) { 0563 delete[] noFingerprintAntiDomainOnlyExceptionFilters; 0564 noFingerprintAntiDomainOnlyExceptionFilters = nullptr; 0565 } 0566 if (bloomFilter) { 0567 delete bloomFilter; 0568 bloomFilter = nullptr; 0569 } 0570 if (exceptionBloomFilter) { 0571 delete exceptionBloomFilter; 0572 exceptionBloomFilter = nullptr; 0573 } 0574 if (hostAnchoredHashSet) { 0575 delete hostAnchoredHashSet; 0576 hostAnchoredHashSet = nullptr; 0577 } 0578 if (hostAnchoredExceptionHashSet) { 0579 delete hostAnchoredExceptionHashSet; 0580 hostAnchoredExceptionHashSet = nullptr; 0581 } 0582 if (noFingerprintDomainHashSet) { 0583 delete noFingerprintDomainHashSet; 0584 noFingerprintDomainHashSet = nullptr; 0585 } 0586 if (noFingerprintAntiDomainHashSet) { 0587 delete noFingerprintAntiDomainHashSet; 0588 noFingerprintAntiDomainHashSet = nullptr; 0589 } 0590 if (noFingerprintDomainExceptionHashSet) { 0591 delete noFingerprintDomainExceptionHashSet; 0592 noFingerprintDomainExceptionHashSet = nullptr; 0593 } 0594 if (noFingerprintAntiDomainExceptionHashSet) { 0595 delete noFingerprintAntiDomainExceptionHashSet; 0596 noFingerprintAntiDomainExceptionHashSet = nullptr; 0597 } 0598 if (badFingerprintsHashSet) { 0599 delete badFingerprintsHashSet; 0600 badFingerprintsHashSet = nullptr; 0601 } 0602 0603 numFilters = 0; 0604 numCosmeticFilters = 0; 0605 numHtmlFilters = 0; 0606 numExceptionFilters = 0; 0607 numNoFingerprintFilters = 0; 0608 numNoFingerprintExceptionFilters = 0; 0609 numNoFingerprintDomainOnlyFilters = 0; 0610 numNoFingerprintAntiDomainOnlyFilters = 0; 0611 numNoFingerprintDomainOnlyExceptionFilters = 0; 0612 numNoFingerprintAntiDomainOnlyExceptionFilters = 0; 0613 numHostAnchoredFilters = 0; 0614 numHostAnchoredExceptionFilters = 0; 0615 numFalsePositives = 0; 0616 numExceptionFalsePositives = 0; 0617 numBloomFilterSaves = 0; 0618 numExceptionBloomFilterSaves = 0; 0619 numHashSetSaves = 0; 0620 numExceptionHashSetSaves = 0; 0621 } 0622 0623 bool AdBlockClient::hasMatchingFilters(Filter *filter, int numFilters, 0624 const char *input, 0625 int inputLen, 0626 FilterOption contextOption, 0627 const char *contextDomain, 0628 BloomFilter *inputBloomFilter, 0629 const char *inputHost, 0630 int inputHostLen, 0631 Filter **matchingFilter) { 0632 for (int i = 0; i < numFilters; i++) { 0633 if (filter->matches(input, inputLen, contextOption, 0634 contextDomain, inputBloomFilter, inputHost, inputHostLen)) { 0635 if (filter->tagLen == 0 || 0636 tagExists(std::string(filter->tag, filter->tagLen))) { 0637 if (matchingFilter) { 0638 *matchingFilter = filter; 0639 } 0640 return true; 0641 } 0642 } 0643 filter++; 0644 } 0645 if (matchingFilter) { 0646 *matchingFilter = nullptr; 0647 } 0648 return false; 0649 } 0650 0651 void discoverMatchingPrefix(BadFingerprintsHashSet *badFingerprintsHashSet, 0652 const char *str, 0653 BloomFilter *bloomFilter, 0654 int prefixLen = AdBlockClient::kFingerprintSize) { 0655 char sz[32]; 0656 memset(sz, 0, sizeof(sz)); 0657 int strLen = static_cast<int>(strlen(str)); 0658 for (int i = 0; i < strLen - prefixLen + 1; i++) { 0659 if (bloomFilter->exists(str + i, prefixLen)) { 0660 memcpy(sz, str + i, prefixLen); 0661 // cout << "Bad fingerprint: " << sz << endl; 0662 if (badFingerprintsHashSet) { 0663 badFingerprintsHashSet->Add(BadFingerprint(sz)); 0664 } 0665 // We only want the first bad fingerprint since that's the one 0666 // that led us here. 0667 // If you do all bad fingerprint detection here it will lead to too many 0668 // bad fingerprints, which then leads to too many no fingerprint rules. 0669 // And too many no fingerprint rules causes perf problems. 0670 return; 0671 } 0672 // memcpy(sz, str + i, prefixLen); 0673 // cout << "Good fingerprint: " << sz; 0674 } 0675 } 0676 0677 bool isNoFingerprintDomainHashSetMiss(HashSet<NoFingerprintDomain> *hashSet, 0678 const char *host, int hostLen) { 0679 if (!hashSet) { 0680 return false; 0681 } 0682 const char *start = host + hostLen; 0683 // Skip past the TLD 0684 while (start != host) { 0685 start--; 0686 if (*(start) == '.') { 0687 break; 0688 } 0689 } 0690 while (start != host) { 0691 if (*(start - 1) == '.') { 0692 if (hashSet->Find(NoFingerprintDomain(start, 0693 static_cast<int>(host + hostLen - start)))) { 0694 return false; 0695 } 0696 } 0697 start--; 0698 } 0699 return !hashSet->Find(NoFingerprintDomain(start, 0700 static_cast<int>(host + hostLen - start))); 0701 } 0702 0703 bool AdBlockClient::isHostAnchoredHashSetMiss(const char *input, int inputLen, 0704 HashSet<Filter> *hashSet, 0705 const char *inputHost, 0706 int inputHostLen, 0707 FilterOption contextOption, 0708 const char *contextDomain, 0709 Filter **foundFilter) { 0710 if (!hashSet) { 0711 return false; 0712 } 0713 0714 const char *start = inputHost + inputHostLen; 0715 // Skip past the TLD 0716 while (start != inputHost) { 0717 start--; 0718 if (*(start) == '.') { 0719 break; 0720 } 0721 } 0722 0723 while (start != inputHost) { 0724 if (*(start - 1) == '.') { 0725 Filter *filter = hashSet->Find(Filter(start, 0726 static_cast<int>(inputHost + inputHostLen - start), 0727 nullptr, start, inputHostLen - (start - inputHost))); 0728 if (filter && filter->matches(input, inputLen, 0729 contextOption, contextDomain)) { 0730 if (filter->tagLen == 0 || 0731 tagExists(std::string(filter->tag, filter->tagLen))) { 0732 if (foundFilter) { 0733 *foundFilter = filter; 0734 } 0735 return false; 0736 } 0737 } 0738 } 0739 start--; 0740 } 0741 0742 Filter *filter = hashSet->Find(Filter(start, 0743 static_cast<int>(inputHost + inputHostLen - start), nullptr, 0744 start, inputHostLen)); 0745 if (!filter) { 0746 return true; 0747 } 0748 bool result = !filter->matches(input, inputLen, contextOption, contextDomain); 0749 if (!result) { 0750 if (filter->tagLen > 0 && 0751 !tagExists(std::string(filter->tag, filter->tagLen))) { 0752 return true; 0753 } 0754 if (foundFilter) { 0755 *foundFilter = filter; 0756 } 0757 } 0758 return result; 0759 } 0760 0761 bool AdBlockClient::matches(const char* input, FilterOption contextOption, 0762 const char* contextDomain, Filter** matchedFilter, 0763 Filter** matchedExceptionFilter) { 0764 if (matchedFilter) { 0765 *matchedFilter = nullptr; 0766 } 0767 if (matchedExceptionFilter) { 0768 *matchedExceptionFilter = nullptr; 0769 } 0770 int inputLen = static_cast<int>(strlen(input)); 0771 0772 if (!isBlockableProtocol(input, inputLen)) { 0773 return false; 0774 } 0775 0776 int inputHostLen; 0777 const char *inputHost = getUrlHost(input, &inputHostLen); 0778 0779 int contextDomainLen = 0; 0780 if (contextDomain) { 0781 contextDomainLen = static_cast<int>(strlen(contextDomain)); 0782 } 0783 // If neither first party nor third party was specified, try to figure it out 0784 if (contextDomain && !(contextOption & (FOThirdParty | FONotThirdParty))) { 0785 if (isThirdPartyHost(contextDomain, contextDomainLen, 0786 inputHost, static_cast<int>(inputHostLen))) { 0787 contextOption = 0788 static_cast<FilterOption>(contextOption | FOThirdParty); 0789 } else { 0790 contextOption = 0791 static_cast<FilterOption>(contextOption | FONotThirdParty); 0792 } 0793 } 0794 0795 // Optimization for the manual filter checks which are needed. 0796 // Avoid having to check individual filters if the filter parts are not found 0797 // inside the input bloom filter. 0798 HashFn2Byte hashFns[] = { hashFn2Byte }; 0799 BloomFilter inputBloomFilter(10, 1024, hashFns, 1); 0800 for (int i = 1; i < inputLen; i++) { 0801 inputBloomFilter.add(input + i - 1, 2); 0802 } 0803 0804 // We always have to check noFingerprintFilters because the bloom filter opt 0805 // cannot be used for them 0806 bool hasMatch = false; 0807 0808 // Only bother checking the no fingerprint domain related filters if needed 0809 if (!isNoFingerprintDomainHashSetMiss( 0810 noFingerprintDomainHashSet, contextDomain, contextDomainLen)) { 0811 hasMatch = hasMatch || hasMatchingFilters(noFingerprintDomainOnlyFilters, 0812 numNoFingerprintDomainOnlyFilters, input, inputLen, contextOption, 0813 contextDomain, &inputBloomFilter, inputHost, inputHostLen, 0814 matchedFilter); 0815 } 0816 if (isNoFingerprintDomainHashSetMiss( 0817 noFingerprintAntiDomainHashSet, contextDomain, contextDomainLen)) { 0818 hasMatch = hasMatch || 0819 hasMatchingFilters(noFingerprintAntiDomainOnlyFilters, 0820 numNoFingerprintAntiDomainOnlyFilters, input, inputLen, contextOption, 0821 contextDomain, &inputBloomFilter, inputHost, inputHostLen, 0822 matchedFilter); 0823 } 0824 0825 hasMatch = hasMatch || hasMatchingFilters(noFingerprintFilters, 0826 numNoFingerprintFilters, input, inputLen, contextOption, 0827 contextDomain, &inputBloomFilter, inputHost, inputHostLen, 0828 matchedFilter); 0829 0830 // If no noFingerprintFilters were hit, check the bloom filter substring 0831 // fingerprint for the normal 0832 // filter list. If no substring exists for the input then we know for sure 0833 // the URL should not be blocked. 0834 bool bloomFilterMiss = false; 0835 bool hostAnchoredHashSetMiss = false; 0836 if (!hasMatch) { 0837 bloomFilterMiss = bloomFilter 0838 && !bloomFilter->substringExists(input, AdBlockClient::kFingerprintSize); 0839 hostAnchoredHashSetMiss = isHostAnchoredHashSetMiss(input, inputLen, 0840 hostAnchoredHashSet, inputHost, inputHostLen, 0841 contextOption, contextDomain, matchedFilter); 0842 if (bloomFilterMiss && hostAnchoredHashSetMiss) { 0843 if (bloomFilterMiss) { 0844 numBloomFilterSaves++; 0845 } 0846 if (hostAnchoredHashSetMiss) { 0847 numHashSetSaves++; 0848 } 0849 return false; 0850 } 0851 0852 hasMatch = !hostAnchoredHashSetMiss; 0853 } 0854 0855 // We need to check the filters list manually because there is either a match 0856 // or a false positive 0857 if (!hasMatch && !bloomFilterMiss) { 0858 hasMatch = hasMatchingFilters(filters, numFilters, input, inputLen, 0859 contextOption, contextDomain, &inputBloomFilter, 0860 inputHost, inputHostLen, matchedFilter); 0861 // If there's still no match after checking the block filters, then no need 0862 // to try to block this because there is a false positive. 0863 if (!hasMatch) { 0864 numFalsePositives++; 0865 if (badFingerprintsHashSet) { 0866 // cout << "false positive for input: " << input << " bloomFilterMiss: " 0867 // << bloomFilterMiss << ", hostAnchoredHashSetMiss: " 0868 // << hostAnchoredHashSetMiss << endl; 0869 discoverMatchingPrefix(badFingerprintsHashSet, input, bloomFilter); 0870 } 0871 return false; 0872 } 0873 } 0874 0875 bool hasExceptionMatch = false; 0876 0877 // Only bother checking the no fingerprint domain related filters if needed 0878 if (!isNoFingerprintDomainHashSetMiss( 0879 noFingerprintDomainExceptionHashSet, contextDomain, contextDomainLen)) { 0880 hasExceptionMatch = hasExceptionMatch || 0881 hasMatchingFilters(noFingerprintDomainOnlyExceptionFilters, 0882 numNoFingerprintDomainOnlyExceptionFilters, input, inputLen, 0883 contextOption, contextDomain, &inputBloomFilter, inputHost, 0884 inputHostLen, matchedExceptionFilter); 0885 } 0886 0887 if (isNoFingerprintDomainHashSetMiss( 0888 noFingerprintAntiDomainExceptionHashSet, contextDomain, 0889 contextDomainLen)) { 0890 hasExceptionMatch = hasExceptionMatch || 0891 hasMatchingFilters(noFingerprintAntiDomainOnlyExceptionFilters, 0892 numNoFingerprintAntiDomainOnlyExceptionFilters, input, inputLen, 0893 contextOption, contextDomain, &inputBloomFilter, inputHost, inputHostLen, 0894 matchedExceptionFilter); 0895 } 0896 0897 hasExceptionMatch = hasExceptionMatch || 0898 hasMatchingFilters(noFingerprintExceptionFilters, 0899 numNoFingerprintExceptionFilters, input, inputLen, contextOption, 0900 contextDomain, &inputBloomFilter, inputHost, inputHostLen, 0901 matchedExceptionFilter); 0902 0903 // If there's a matching no fingerprint exception then we can just return 0904 // right away because we shouldn't block 0905 if (hasExceptionMatch) { 0906 return false; 0907 } 0908 0909 bool bloomExceptionFilterMiss = exceptionBloomFilter 0910 && !exceptionBloomFilter->substringExists(input, 0911 AdBlockClient::kFingerprintSize); 0912 bool hostAnchoredExceptionHashSetMiss = 0913 isHostAnchoredHashSetMiss(input, inputLen, hostAnchoredExceptionHashSet, 0914 inputHost, inputHostLen, contextOption, contextDomain, 0915 matchedExceptionFilter); 0916 0917 // Now that we have a matching rule, we should check if no exception rule 0918 // hits, if none hits, we should block 0919 if (bloomExceptionFilterMiss && hostAnchoredExceptionHashSetMiss) { 0920 if (bloomExceptionFilterMiss) { 0921 numExceptionBloomFilterSaves++; 0922 } 0923 if (hostAnchoredExceptionHashSetMiss) { 0924 numExceptionHashSetSaves++; 0925 } 0926 return true; 0927 } 0928 0929 // If tehre wasn't an exception has set miss, it was a hit, and hash set is 0930 // deterministic so we shouldn't block this resource. 0931 if (!hostAnchoredExceptionHashSetMiss) { 0932 numExceptionHashSetSaves++; 0933 return false; 0934 } 0935 0936 if (!bloomExceptionFilterMiss) { 0937 if (!hasMatchingFilters(exceptionFilters, numExceptionFilters, input, 0938 inputLen, contextOption, contextDomain, 0939 &inputBloomFilter, inputHost, inputHostLen, 0940 matchedExceptionFilter)) { 0941 // False positive on the exception filter list 0942 numExceptionFalsePositives++; 0943 // cout << "exception false positive for input: " << input << endl; 0944 if (badFingerprintsHashSet) { 0945 discoverMatchingPrefix(badFingerprintsHashSet, 0946 input, exceptionBloomFilter); 0947 } 0948 return true; 0949 } 0950 } 0951 0952 return false; 0953 } 0954 0955 /** 0956 * Obtains the first matching filter or nullptr, and if one is found, finds 0957 * the first matching exception filter or nullptr. 0958 * 0959 * @return true if the filter should be blocked 0960 */ 0961 bool AdBlockClient::findMatchingFilters(const char *input, 0962 FilterOption contextOption, 0963 const char *contextDomain, 0964 Filter **matchingFilter, 0965 Filter **matchingExceptionFilter) { 0966 *matchingFilter = nullptr; 0967 *matchingExceptionFilter = nullptr; 0968 int inputLen = static_cast<int>(strlen(input)); 0969 int inputHostLen; 0970 const char *inputHost = getUrlHost(input, &inputHostLen); 0971 0972 int contextDomainLen = 0; 0973 if (contextDomain) { 0974 contextDomainLen = static_cast<int>(strlen(contextDomain)); 0975 } 0976 // If neither first party nor third party was specified, try to figure it out 0977 if (contextDomain && !(contextOption & (FOThirdParty | FONotThirdParty))) { 0978 if (isThirdPartyHost(contextDomain, contextDomainLen, 0979 inputHost, static_cast<int>(inputHostLen))) { 0980 contextOption = 0981 static_cast<FilterOption>(contextOption | FOThirdParty); 0982 } else { 0983 contextOption = 0984 static_cast<FilterOption>(contextOption | FONotThirdParty); 0985 } 0986 } 0987 0988 hasMatchingFilters(noFingerprintFilters, 0989 numNoFingerprintFilters, input, inputLen, contextOption, 0990 contextDomain, nullptr, 0991 inputHost, inputHostLen, matchingFilter); 0992 0993 if (!*matchingFilter) { 0994 hasMatchingFilters(noFingerprintDomainOnlyFilters, 0995 numNoFingerprintDomainOnlyFilters, input, inputLen, contextOption, 0996 contextDomain, nullptr, 0997 inputHost, inputHostLen, matchingFilter); 0998 } 0999 if (!*matchingFilter) { 1000 hasMatchingFilters(noFingerprintAntiDomainOnlyFilters, 1001 numNoFingerprintAntiDomainOnlyFilters, input, inputLen, contextOption, 1002 contextDomain, nullptr, 1003 inputHost, inputHostLen, matchingFilter); 1004 } 1005 1006 if (!*matchingFilter) { 1007 hasMatchingFilters(filters, 1008 numFilters, input, inputLen, contextOption, 1009 contextDomain, nullptr, 1010 inputHost, inputHostLen, matchingFilter); 1011 } 1012 1013 if (!*matchingFilter) { 1014 isHostAnchoredHashSetMiss(input, inputLen, 1015 hostAnchoredHashSet, inputHost, inputHostLen, 1016 contextOption, contextDomain, matchingFilter); 1017 } 1018 1019 if (!*matchingFilter) { 1020 return false; 1021 } 1022 1023 hasMatchingFilters(noFingerprintExceptionFilters, 1024 numNoFingerprintExceptionFilters, input, inputLen, contextOption, 1025 contextDomain, 1026 nullptr, inputHost, inputHostLen, matchingExceptionFilter); 1027 1028 if (!*matchingExceptionFilter) { 1029 hasMatchingFilters(noFingerprintDomainOnlyExceptionFilters, 1030 numNoFingerprintDomainOnlyExceptionFilters, input, inputLen, 1031 contextOption, contextDomain, nullptr, inputHost, inputHostLen, 1032 matchingExceptionFilter); 1033 } 1034 1035 if (!*matchingExceptionFilter) { 1036 hasMatchingFilters(noFingerprintAntiDomainOnlyExceptionFilters, 1037 numNoFingerprintAntiDomainOnlyExceptionFilters, input, inputLen, 1038 contextOption, contextDomain, nullptr, inputHost, inputHostLen, 1039 matchingExceptionFilter); 1040 } 1041 1042 if (!*matchingExceptionFilter) { 1043 isHostAnchoredHashSetMiss(input, inputLen, hostAnchoredExceptionHashSet, 1044 inputHost, inputHostLen, contextOption, contextDomain, 1045 matchingExceptionFilter); 1046 } 1047 1048 if (!*matchingExceptionFilter) { 1049 hasMatchingFilters(exceptionFilters, 1050 numExceptionFilters, input, inputLen, contextOption, 1051 contextDomain, 1052 nullptr, inputHost, inputHostLen, matchingExceptionFilter); 1053 } 1054 return !*matchingExceptionFilter; 1055 } 1056 1057 void AdBlockClient::initBloomFilter(BloomFilter **pp, 1058 const char *buffer, int len) { 1059 if (*pp) { 1060 delete *pp; 1061 } 1062 if (len > 0) { 1063 *pp = new BloomFilter(buffer, len); 1064 } 1065 } 1066 1067 template<class T> 1068 bool AdBlockClient::initHashSet(HashSet<T> **pp, char *buffer, int len) { 1069 if (*pp) { 1070 delete *pp; 1071 } 1072 if (len > 0) { 1073 *pp = new HashSet<T>(0, false); 1074 1075 return (*pp)->Deserialize(buffer, len); 1076 } 1077 1078 return true; 1079 } 1080 1081 void setFilterBorrowedMemory(Filter *filters, int numFilters) { 1082 for (int i = 0; i < numFilters; i++) { 1083 filters[i].borrowed_data = true; 1084 } 1085 } 1086 1087 // Parses the filter data into a few collections of filters and enables 1088 // efficent querying. 1089 bool AdBlockClient::parse(const char *input, bool preserveRules) { 1090 // If the user is parsing and we have regex support, 1091 // then we can determine the fingerprints for the bloom filter. 1092 // Otherwise it needs to be done manually via initBloomFilter and 1093 // initExceptionBloomFilter 1094 if (!bloomFilter) { 1095 bloomFilter = new BloomFilter(15, 80000); 1096 } 1097 if (!exceptionBloomFilter) { 1098 exceptionBloomFilter = new BloomFilter(10, 20000); 1099 } 1100 if (!hostAnchoredHashSet) { 1101 // Optimized to be 1:1 with the easylist / easyprivacy 1102 // number of host anchored hosts. 1103 hostAnchoredHashSet = new HashSet<Filter>(18000, false); 1104 } 1105 if (!hostAnchoredExceptionHashSet) { 1106 // Optimized to be 1:1 with the easylist / easyprivacy 1107 // number of host anchored exception hosts. 1108 hostAnchoredExceptionHashSet = new HashSet<Filter>(2000, false); 1109 } 1110 if (!noFingerprintDomainHashSet) { 1111 noFingerprintDomainHashSet = new HashSet<NoFingerprintDomain>(1000, false); 1112 } 1113 if (!noFingerprintAntiDomainHashSet) { 1114 noFingerprintAntiDomainHashSet = 1115 new HashSet<NoFingerprintDomain>(100, false); 1116 } 1117 if (!noFingerprintDomainExceptionHashSet) { 1118 noFingerprintDomainExceptionHashSet = 1119 new HashSet<NoFingerprintDomain>(1000, false); 1120 } 1121 if (!noFingerprintAntiDomainExceptionHashSet) { 1122 noFingerprintAntiDomainExceptionHashSet = 1123 new HashSet<NoFingerprintDomain>(100, false); 1124 } 1125 1126 const char *p = input; 1127 const char *lineStart = p; 1128 1129 int newNumFilters = 0; 1130 int newNumCosmeticFilters = 0; 1131 int newNumHtmlFilters = 0; 1132 int newNumExceptionFilters = 0; 1133 int newNumNoFingerprintFilters = 0; 1134 int newNumNoFingerprintExceptionFilters = 0; 1135 int newNumNoFingerprintDomainOnlyFilters = 0; 1136 int newNumNoFingerprintAntiDomainOnlyFilters = 0; 1137 int newNumNoFingerprintDomainOnlyExceptionFilters = 0; 1138 int newNumNoFingerprintAntiDomainOnlyExceptionFilters = 0; 1139 int newNumHostAnchoredFilters = 0; 1140 int newNumHostAnchoredExceptionFilters = 0; 1141 1142 // Simple cosmetic filters apply to all sites without exception 1143 HashSet<CosmeticFilter> simpleCosmeticFilters(1000, false); 1144 1145 // Parsing does 2 passes, one just to determine the type of information we'll 1146 // need to setup. Note that the library will be used on a variety of builds 1147 // so sometimes we won't even have STL So we can't use something like a vector 1148 // here. 1149 while (true) { 1150 if (isEndOfLine(*p) || *p == '\0') { 1151 Filter f; 1152 parseFilter(lineStart, p, &f); 1153 if (!f.hasUnsupportedOptions()) { 1154 switch (f.filterType & FTListTypesMask) { 1155 case FTException: 1156 if (f.filterType & FTHostOnly) { 1157 newNumHostAnchoredExceptionFilters++; 1158 } else if (AdBlockClient::getFingerprint(nullptr, f)) { 1159 newNumExceptionFilters++; 1160 } else if (f.isDomainOnlyFilter()) { 1161 newNumNoFingerprintDomainOnlyExceptionFilters++; 1162 } else if (f.isAntiDomainOnlyFilter()) { 1163 newNumNoFingerprintAntiDomainOnlyExceptionFilters++; 1164 } else { 1165 newNumNoFingerprintExceptionFilters++; 1166 } 1167 break; 1168 case FTElementHiding: 1169 newNumCosmeticFilters++; 1170 break; 1171 case FTElementHidingException: 1172 newNumCosmeticFilters++; 1173 break; 1174 case FTHTMLFiltering: 1175 newNumHtmlFilters++; 1176 break; 1177 case FTEmpty: 1178 case FTComment: 1179 // No need to store comments 1180 break; 1181 default: 1182 if (f.filterType & FTHostOnly) { 1183 newNumHostAnchoredFilters++; 1184 } else if (AdBlockClient::getFingerprint(nullptr, f)) { 1185 newNumFilters++; 1186 } else if (f.isDomainOnlyFilter()) { 1187 newNumNoFingerprintDomainOnlyFilters++; 1188 } else if (f.isAntiDomainOnlyFilter()) { 1189 newNumNoFingerprintAntiDomainOnlyFilters++; 1190 } else { 1191 newNumNoFingerprintFilters++; 1192 } 1193 break; 1194 } 1195 } 1196 lineStart = p + 1; 1197 } 1198 1199 if (*p == '\0') { 1200 break; 1201 } 1202 1203 p++; 1204 } 1205 1206 #ifdef PERF_STATS 1207 cout << "Fingerprint size: " << AdBlockClient::kFingerprintSize << endl; 1208 cout << "Num new filters: " << newNumFilters << endl; 1209 cout << "Num new cosmetic filters: " << newNumCosmeticFilters << endl; 1210 cout << "Num new HTML filters: " << newNumHtmlFilters << endl; 1211 cout << "Num new exception filters: " << newNumExceptionFilters << endl; 1212 cout << "Num new no fingerprint filters: " 1213 << newNumNoFingerprintFilters << endl; 1214 cout << "Num new no fingerprint exception filters: " 1215 << newNumNoFingerprintExceptionFilters << endl; 1216 cout << "Num new host anchored filters: " 1217 << newNumHostAnchoredFilters << endl; 1218 cout << "Num new host anchored exception filters: " 1219 << newNumHostAnchoredExceptionFilters << endl; 1220 cout << "Num new no fingerprint domain only filters: " 1221 << newNumNoFingerprintDomainOnlyFilters << endl; 1222 cout << "Num new no fingerprint anti-domain only filters: " 1223 << newNumNoFingerprintAntiDomainOnlyFilters << endl; 1224 cout << "Num new no fingerprint domain only exception filters: " 1225 << newNumNoFingerprintDomainOnlyExceptionFilters << endl; 1226 cout << "Num new no fingerprint anti-domain only exception filters: " 1227 << newNumNoFingerprintAntiDomainOnlyExceptionFilters << endl; 1228 #endif 1229 1230 Filter *newFilters = new Filter[newNumFilters + numFilters]; 1231 Filter *newCosmeticFilters = 1232 new Filter[newNumCosmeticFilters + numCosmeticFilters]; 1233 Filter *newHtmlFilters = 1234 new Filter[newNumHtmlFilters + numHtmlFilters]; 1235 Filter *newExceptionFilters = 1236 new Filter[newNumExceptionFilters + numExceptionFilters]; 1237 Filter *newNoFingerprintFilters = 1238 new Filter[newNumNoFingerprintFilters + numNoFingerprintFilters]; 1239 Filter *newNoFingerprintExceptionFilters = 1240 new Filter[newNumNoFingerprintExceptionFilters 1241 + numNoFingerprintExceptionFilters]; 1242 Filter *newNoFingerprintDomainOnlyFilters = 1243 new Filter[newNumNoFingerprintDomainOnlyFilters + 1244 numNoFingerprintDomainOnlyFilters]; 1245 Filter *newNoFingerprintAntiDomainOnlyFilters = 1246 new Filter[newNumNoFingerprintAntiDomainOnlyFilters + 1247 numNoFingerprintAntiDomainOnlyFilters]; 1248 Filter *newNoFingerprintDomainOnlyExceptionFilters = 1249 new Filter[newNumNoFingerprintDomainOnlyExceptionFilters 1250 + numNoFingerprintDomainOnlyExceptionFilters]; 1251 Filter *newNoFingerprintAntiDomainOnlyExceptionFilters = 1252 new Filter[newNumNoFingerprintAntiDomainOnlyExceptionFilters 1253 + numNoFingerprintAntiDomainOnlyExceptionFilters]; 1254 1255 Filter *curFilters = newFilters; 1256 Filter *curCosmeticFilters = newCosmeticFilters; 1257 Filter *curHtmlFilters = newHtmlFilters; 1258 Filter *curExceptionFilters = newExceptionFilters; 1259 Filter *curNoFingerprintFilters = newNoFingerprintFilters; 1260 Filter *curNoFingerprintExceptionFilters = newNoFingerprintExceptionFilters; 1261 Filter *curNoFingerprintDomainOnlyFilters = newNoFingerprintDomainOnlyFilters; 1262 Filter *curNoFingerprintAntiDomainOnlyFilters = 1263 newNoFingerprintAntiDomainOnlyFilters; 1264 Filter *curNoFingerprintDomainOnlyExceptionFilters = 1265 newNoFingerprintDomainOnlyExceptionFilters; 1266 Filter *curNoFingerprintAntiDomainOnlyExceptionFilters = 1267 newNoFingerprintAntiDomainOnlyExceptionFilters; 1268 1269 // If we've had a parse before copy the old data into the new data structure 1270 if (filters || cosmeticFilters || htmlFilters || exceptionFilters || 1271 noFingerprintFilters || noFingerprintExceptionFilters || 1272 noFingerprintDomainOnlyFilters || 1273 noFingerprintDomainOnlyExceptionFilters || 1274 noFingerprintAntiDomainOnlyFilters || 1275 noFingerprintAntiDomainOnlyExceptionFilters) { 1276 // Copy the old data in, we can't simply use memcpy here 1277 // since filtres manages some pointers that get deleted. 1278 for (int i = 0; i < numFilters; i++) { 1279 newFilters[i].swapData(&(filters[i])); 1280 } 1281 for (int i = 0; i < numCosmeticFilters; i++) { 1282 newCosmeticFilters[i].swapData(&(cosmeticFilters[i])); 1283 } 1284 for (int i = 0; i < numHtmlFilters; i++) { 1285 newHtmlFilters[i].swapData(&(htmlFilters[i])); 1286 } 1287 for (int i = 0; i < numExceptionFilters; i++) { 1288 newExceptionFilters[i].swapData(&(exceptionFilters[i])); 1289 } 1290 for (int i = 0; i < numNoFingerprintFilters; i++) { 1291 newNoFingerprintFilters[i].swapData(&(noFingerprintFilters[i])); 1292 } 1293 for (int i = 0; i < numNoFingerprintExceptionFilters; i++) { 1294 newNoFingerprintExceptionFilters[i].swapData( 1295 &(noFingerprintExceptionFilters[i])); 1296 } 1297 for (int i = 0; i < numNoFingerprintDomainOnlyFilters; i++) { 1298 newNoFingerprintDomainOnlyFilters[i].swapData( 1299 &(noFingerprintDomainOnlyFilters[i])); 1300 } 1301 for (int i = 0; i < numNoFingerprintAntiDomainOnlyFilters; i++) { 1302 newNoFingerprintAntiDomainOnlyFilters[i].swapData( 1303 &(noFingerprintAntiDomainOnlyFilters[i])); 1304 } 1305 for (int i = 0; i < numNoFingerprintDomainOnlyExceptionFilters; i++) { 1306 newNoFingerprintDomainOnlyExceptionFilters[i].swapData( 1307 &(noFingerprintDomainOnlyExceptionFilters[i])); 1308 } 1309 for (int i = 0; i < numNoFingerprintAntiDomainOnlyExceptionFilters; i++) { 1310 newNoFingerprintAntiDomainOnlyExceptionFilters[i].swapData( 1311 &(noFingerprintAntiDomainOnlyExceptionFilters[i])); 1312 } 1313 1314 // Free up the old memory for filter storage 1315 // Set the old filter lists borrwedMemory to true since it'll be taken by 1316 // the new filters. 1317 setFilterBorrowedMemory(filters, numFilters); 1318 setFilterBorrowedMemory(cosmeticFilters, numCosmeticFilters); 1319 setFilterBorrowedMemory(htmlFilters, numHtmlFilters); 1320 setFilterBorrowedMemory(exceptionFilters, numExceptionFilters); 1321 setFilterBorrowedMemory(noFingerprintFilters, numNoFingerprintFilters); 1322 setFilterBorrowedMemory(noFingerprintExceptionFilters, 1323 numNoFingerprintExceptionFilters); 1324 setFilterBorrowedMemory(noFingerprintDomainOnlyFilters, 1325 numNoFingerprintDomainOnlyFilters); 1326 setFilterBorrowedMemory(noFingerprintAntiDomainOnlyFilters, 1327 numNoFingerprintAntiDomainOnlyFilters); 1328 setFilterBorrowedMemory(noFingerprintDomainOnlyExceptionFilters, 1329 numNoFingerprintDomainOnlyExceptionFilters); 1330 setFilterBorrowedMemory(noFingerprintAntiDomainOnlyExceptionFilters, 1331 numNoFingerprintAntiDomainOnlyExceptionFilters); 1332 delete[] filters; 1333 delete[] cosmeticFilters; 1334 delete[] htmlFilters; 1335 delete[] exceptionFilters; 1336 delete[] noFingerprintFilters; 1337 delete[] noFingerprintExceptionFilters; 1338 delete[] noFingerprintDomainOnlyFilters; 1339 delete[] noFingerprintAntiDomainOnlyFilters; 1340 delete[] noFingerprintDomainOnlyExceptionFilters; 1341 delete[] noFingerprintAntiDomainOnlyExceptionFilters; 1342 1343 // Adjust the current pointers to be just after the copied in data 1344 curFilters += numFilters; 1345 curCosmeticFilters += numCosmeticFilters; 1346 curHtmlFilters += numHtmlFilters; 1347 curExceptionFilters += numExceptionFilters; 1348 curNoFingerprintFilters += numNoFingerprintFilters; 1349 curNoFingerprintExceptionFilters += numNoFingerprintExceptionFilters; 1350 curNoFingerprintDomainOnlyFilters += numNoFingerprintDomainOnlyFilters; 1351 curNoFingerprintAntiDomainOnlyFilters += 1352 numNoFingerprintAntiDomainOnlyFilters; 1353 curNoFingerprintDomainOnlyExceptionFilters += 1354 numNoFingerprintDomainOnlyExceptionFilters; 1355 curNoFingerprintAntiDomainOnlyExceptionFilters += 1356 numNoFingerprintAntiDomainOnlyExceptionFilters; 1357 } 1358 1359 // And finally update with the new counts 1360 numFilters += newNumFilters; 1361 numCosmeticFilters += newNumCosmeticFilters; 1362 numHtmlFilters += newNumHtmlFilters; 1363 numExceptionFilters += newNumExceptionFilters; 1364 numNoFingerprintFilters += newNumNoFingerprintFilters; 1365 numNoFingerprintExceptionFilters += newNumNoFingerprintExceptionFilters; 1366 numNoFingerprintDomainOnlyFilters += newNumNoFingerprintDomainOnlyFilters; 1367 numNoFingerprintAntiDomainOnlyFilters += 1368 newNumNoFingerprintAntiDomainOnlyFilters; 1369 numNoFingerprintDomainOnlyExceptionFilters += 1370 newNumNoFingerprintDomainOnlyExceptionFilters; 1371 numNoFingerprintAntiDomainOnlyExceptionFilters += 1372 newNumNoFingerprintAntiDomainOnlyExceptionFilters; 1373 numHostAnchoredFilters += newNumHostAnchoredFilters; 1374 numHostAnchoredExceptionFilters += newNumHostAnchoredExceptionFilters; 1375 1376 // Adjust the new member list pointers 1377 filters = newFilters; 1378 cosmeticFilters = newCosmeticFilters; 1379 htmlFilters = newHtmlFilters; 1380 exceptionFilters = newExceptionFilters; 1381 noFingerprintFilters = newNoFingerprintFilters; 1382 noFingerprintExceptionFilters = newNoFingerprintExceptionFilters; 1383 noFingerprintDomainOnlyFilters = newNoFingerprintDomainOnlyFilters; 1384 noFingerprintAntiDomainOnlyFilters = newNoFingerprintAntiDomainOnlyFilters; 1385 noFingerprintDomainOnlyExceptionFilters = 1386 newNoFingerprintDomainOnlyExceptionFilters; 1387 noFingerprintAntiDomainOnlyExceptionFilters = 1388 newNoFingerprintAntiDomainOnlyExceptionFilters; 1389 1390 p = input; 1391 lineStart = p; 1392 1393 while (true) { 1394 if (isEndOfLine(*p) || *p == '\0') { 1395 Filter f; 1396 parseFilter(lineStart, p, &f, bloomFilter, exceptionBloomFilter, 1397 hostAnchoredHashSet, 1398 hostAnchoredExceptionHashSet, 1399 &simpleCosmeticFilters, 1400 preserveRules); 1401 if (!f.hasUnsupportedOptions()) { 1402 switch (f.filterType & FTListTypesMask) { 1403 case FTException: 1404 if (f.filterType & FTHostOnly) { 1405 // do nothing, handled by hash set. 1406 } else if (AdBlockClient::getFingerprint(nullptr, f)) { 1407 (*curExceptionFilters).swapData(&f); 1408 curExceptionFilters++; 1409 } else if (f.isDomainOnlyFilter()) { 1410 AddFilterDomainsToHashSet(&f, 1411 noFingerprintDomainExceptionHashSet); 1412 (*curNoFingerprintDomainOnlyExceptionFilters).swapData(&f); 1413 curNoFingerprintDomainOnlyExceptionFilters++; 1414 } else if (f.isAntiDomainOnlyFilter()) { 1415 AddFilterDomainsToHashSet(&f, 1416 noFingerprintAntiDomainExceptionHashSet); 1417 (*curNoFingerprintAntiDomainOnlyExceptionFilters).swapData(&f); 1418 curNoFingerprintAntiDomainOnlyExceptionFilters++; 1419 } else { 1420 (*curNoFingerprintExceptionFilters).swapData(&f); 1421 curNoFingerprintExceptionFilters++; 1422 } 1423 break; 1424 case FTElementHiding: 1425 case FTElementHidingException: 1426 (*curCosmeticFilters).swapData(&f); 1427 curCosmeticFilters++; 1428 break; 1429 case FTHTMLFiltering: 1430 (*curHtmlFilters).swapData(&f); 1431 curHtmlFilters++; 1432 break; 1433 case FTEmpty: 1434 case FTComment: 1435 // No need to store 1436 break; 1437 default: 1438 if (f.filterType & FTHostOnly) { 1439 // Do nothing 1440 } else if (AdBlockClient::getFingerprint(nullptr, f)) { 1441 (*curFilters).swapData(&f); 1442 curFilters++; 1443 } else if (f.isDomainOnlyFilter()) { 1444 AddFilterDomainsToHashSet(&f, 1445 noFingerprintDomainHashSet); 1446 (*curNoFingerprintDomainOnlyFilters).swapData(&f); 1447 curNoFingerprintDomainOnlyFilters++; 1448 } else if (f.isAntiDomainOnlyFilter()) { 1449 AddFilterDomainsToHashSet(&f, 1450 noFingerprintAntiDomainHashSet); 1451 (*curNoFingerprintAntiDomainOnlyFilters).swapData(&f); 1452 curNoFingerprintAntiDomainOnlyFilters++; 1453 } else { 1454 (*curNoFingerprintFilters).swapData(&f); 1455 curNoFingerprintFilters++; 1456 } 1457 break; 1458 } 1459 } 1460 lineStart = p + 1; 1461 } 1462 1463 if (*p == '\0') { 1464 break; 1465 } 1466 1467 p++; 1468 } 1469 1470 #ifdef PERF_STATS 1471 cout << "Simple cosmetic filter size: " 1472 << simpleCosmeticFilters.GetSize() << endl; 1473 #endif 1474 1475 return true; 1476 } 1477 1478 void AdBlockClient::addTag(const std::string &tag) { 1479 if (tags.find(tag) == tags.end()) { 1480 tags.insert(tag); 1481 } 1482 } 1483 1484 void AdBlockClient::removeTag(const std::string &tag) { 1485 auto it = tags.find(tag); 1486 if (it != tags.end()) { 1487 tags.erase(it); 1488 } 1489 } 1490 1491 bool AdBlockClient::tagExists(const std::string &tag) const { 1492 return tags.find(tag) != tags.end(); 1493 } 1494 1495 // Fills the specified buffer if specified, returns the number of characters 1496 // written or needed 1497 int serializeFilters(char * buffer, size_t bufferSizeAvail, 1498 Filter *f, int numFilters) { 1499 char sz[256]; 1500 int bufferSize = 0; 1501 for (int i = 0; i < numFilters; i++) { 1502 int sprintfLen = snprintf(sz, sizeof(sz), "%x,%x,%x", 1503 static_cast<int>(f->filterType), static_cast<int>(f->filterOption), 1504 static_cast<int>(f->antiFilterOption)); 1505 if (buffer) { 1506 snprintf(buffer + bufferSize, bufferSizeAvail, "%s", sz); 1507 } 1508 bufferSize += sprintfLen; 1509 // Extra null termination 1510 bufferSize++; 1511 1512 if (f->data) { 1513 if (buffer) { 1514 snprintf(buffer + bufferSize, bufferSizeAvail, "%s", f->data); 1515 } 1516 bufferSize += static_cast<int>(strlen(f->data)); 1517 } 1518 bufferSize++; 1519 1520 if (f->tagLen > 0) { 1521 if (buffer) { 1522 buffer[bufferSize] = '~'; 1523 buffer[bufferSize + 1] = '#'; 1524 memcpy(buffer + bufferSize + 2, f->tag, f->tagLen); 1525 buffer[bufferSize + 2 + f->tagLen] = ','; 1526 } 1527 bufferSize += f->tagLen + 3; 1528 } 1529 1530 if (f->domainList) { 1531 if (buffer) { 1532 snprintf(buffer + bufferSize, bufferSizeAvail, "%s", f->domainList); 1533 } 1534 bufferSize += static_cast<int>(strlen(f->domainList)); 1535 } 1536 // Extra null termination 1537 bufferSize++; 1538 if (f->host) { 1539 if (buffer) { 1540 snprintf(buffer + bufferSize, bufferSizeAvail, "%s", f->host); 1541 } 1542 bufferSize += static_cast<int>(strlen(f->host)); 1543 } 1544 // Extra null termination 1545 bufferSize++; 1546 f++; 1547 } 1548 return bufferSize; 1549 } 1550 1551 // Returns a newly allocated buffer, caller must manually delete[] the buffer 1552 char * AdBlockClient::serialize(int *totalSize, 1553 bool ignoreCosmeticFilters, 1554 bool ignoreHtmlFilters) { 1555 *totalSize = 0; 1556 int adjustedNumCosmeticFilters = 1557 ignoreCosmeticFilters ? 0 : numCosmeticFilters; 1558 int adjustedNumHtmlFilters = ignoreHtmlFilters ? 0 : numHtmlFilters; 1559 1560 uint32_t hostAnchoredHashSetSize = 0; 1561 char *hostAnchoredHashSetBuffer = nullptr; 1562 if (hostAnchoredHashSet) { 1563 hostAnchoredHashSetBuffer = 1564 hostAnchoredHashSet->Serialize(&hostAnchoredHashSetSize); 1565 } 1566 1567 uint32_t hostAnchoredExceptionHashSetSize = 0; 1568 char *hostAnchoredExceptionHashSetBuffer = nullptr; 1569 if (hostAnchoredExceptionHashSet) { 1570 hostAnchoredExceptionHashSetBuffer = 1571 hostAnchoredExceptionHashSet->Serialize( 1572 &hostAnchoredExceptionHashSetSize); 1573 } 1574 1575 uint32_t noFingerprintDomainHashSetSize = 0; 1576 char *noFingerprintDomainHashSetBuffer = nullptr; 1577 if (noFingerprintDomainHashSet) { 1578 noFingerprintDomainHashSetBuffer = 1579 noFingerprintDomainHashSet->Serialize(&noFingerprintDomainHashSetSize); 1580 } 1581 1582 uint32_t noFingerprintAntiDomainHashSetSize = 0; 1583 char *noFingerprintAntiDomainHashSetBuffer = nullptr; 1584 if (noFingerprintAntiDomainHashSet) { 1585 noFingerprintAntiDomainHashSetBuffer = 1586 noFingerprintAntiDomainHashSet->Serialize( 1587 &noFingerprintAntiDomainHashSetSize); 1588 } 1589 1590 uint32_t noFingerprintDomainExceptionHashSetSize = 0; 1591 char *noFingerprintDomainExceptionHashSetBuffer = nullptr; 1592 if (noFingerprintDomainExceptionHashSet) { 1593 noFingerprintDomainExceptionHashSetBuffer = 1594 noFingerprintDomainExceptionHashSet->Serialize( 1595 &noFingerprintDomainExceptionHashSetSize); 1596 } 1597 1598 uint32_t noFingerprintAntiDomainExceptionHashSetSize = 0; 1599 char *noFingerprintAntiDomainExceptionHashSetBuffer = nullptr; 1600 if (noFingerprintAntiDomainExceptionHashSet) { 1601 noFingerprintAntiDomainExceptionHashSetBuffer = 1602 noFingerprintAntiDomainExceptionHashSet->Serialize( 1603 &noFingerprintAntiDomainExceptionHashSetSize); 1604 } 1605 1606 // Get the number of bytes that we'll need 1607 char sz[512]; 1608 *totalSize += 1 + snprintf(sz, sizeof(sz), 1609 "%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x", 1610 numFilters, 1611 numExceptionFilters, adjustedNumCosmeticFilters, adjustedNumHtmlFilters, 1612 numNoFingerprintFilters, numNoFingerprintExceptionFilters, 1613 numNoFingerprintDomainOnlyFilters, 1614 numNoFingerprintAntiDomainOnlyFilters, 1615 numNoFingerprintDomainOnlyExceptionFilters, 1616 numNoFingerprintAntiDomainOnlyExceptionFilters, 1617 numHostAnchoredFilters, numHostAnchoredExceptionFilters, 1618 bloomFilter ? bloomFilter->getByteBufferSize() : 0, exceptionBloomFilter 1619 ? exceptionBloomFilter->getByteBufferSize() : 0, 1620 hostAnchoredHashSetSize, hostAnchoredExceptionHashSetSize, 1621 noFingerprintDomainHashSetSize, 1622 noFingerprintAntiDomainHashSetSize, 1623 noFingerprintDomainExceptionHashSetSize, 1624 noFingerprintAntiDomainExceptionHashSetSize); 1625 *totalSize += serializeFilters(nullptr, 0, filters, numFilters) + 1626 serializeFilters(nullptr, 0, exceptionFilters, numExceptionFilters) + 1627 serializeFilters(nullptr, 0, cosmeticFilters, adjustedNumCosmeticFilters) + 1628 serializeFilters(nullptr, 0, htmlFilters, adjustedNumHtmlFilters) + 1629 serializeFilters(nullptr, 0, 1630 noFingerprintFilters, numNoFingerprintFilters) + 1631 serializeFilters(nullptr, 0, noFingerprintExceptionFilters, 1632 numNoFingerprintExceptionFilters) + 1633 serializeFilters(nullptr, 0, 1634 noFingerprintDomainOnlyFilters, numNoFingerprintDomainOnlyFilters) + 1635 serializeFilters(nullptr, 0, 1636 noFingerprintAntiDomainOnlyFilters, 1637 numNoFingerprintAntiDomainOnlyFilters) + 1638 serializeFilters(nullptr, 0, noFingerprintDomainOnlyExceptionFilters, 1639 numNoFingerprintDomainOnlyExceptionFilters) + 1640 serializeFilters(nullptr, 0, noFingerprintAntiDomainOnlyExceptionFilters, 1641 numNoFingerprintAntiDomainOnlyExceptionFilters); 1642 1643 *totalSize += bloomFilter ? bloomFilter->getByteBufferSize() : 0; 1644 *totalSize += exceptionBloomFilter 1645 ? exceptionBloomFilter->getByteBufferSize() : 0; 1646 *totalSize += hostAnchoredHashSetSize; 1647 *totalSize += hostAnchoredExceptionHashSetSize; 1648 *totalSize += noFingerprintDomainHashSetSize; 1649 *totalSize += noFingerprintAntiDomainHashSetSize; 1650 *totalSize += noFingerprintDomainExceptionHashSetSize; 1651 *totalSize += noFingerprintAntiDomainExceptionHashSetSize; 1652 1653 // Allocate it 1654 int pos = 0; 1655 char *buffer = new char[*totalSize]; 1656 memset(buffer, 0, *totalSize); 1657 1658 // And start copying stuff in 1659 snprintf(buffer, *totalSize, "%s", sz); 1660 pos += static_cast<int>(strlen(sz)) + 1; 1661 pos += serializeFilters(buffer + pos, *totalSize - pos, filters, numFilters); 1662 pos += serializeFilters(buffer + pos, *totalSize - pos, 1663 exceptionFilters, numExceptionFilters); 1664 pos += serializeFilters(buffer + pos, *totalSize - pos, cosmeticFilters, 1665 adjustedNumCosmeticFilters); 1666 pos += serializeFilters(buffer + pos, *totalSize - pos, htmlFilters, 1667 adjustedNumHtmlFilters); 1668 pos += serializeFilters(buffer + pos, *totalSize - pos, noFingerprintFilters, 1669 numNoFingerprintFilters); 1670 pos += serializeFilters(buffer + pos, *totalSize - pos, 1671 noFingerprintExceptionFilters, numNoFingerprintExceptionFilters); 1672 pos += serializeFilters(buffer + pos, *totalSize - pos, 1673 noFingerprintDomainOnlyFilters, 1674 numNoFingerprintDomainOnlyFilters); 1675 pos += serializeFilters(buffer + pos, *totalSize - pos, 1676 noFingerprintAntiDomainOnlyFilters, 1677 numNoFingerprintAntiDomainOnlyFilters); 1678 pos += serializeFilters(buffer + pos, *totalSize - pos, 1679 noFingerprintDomainOnlyExceptionFilters, 1680 numNoFingerprintDomainOnlyExceptionFilters); 1681 pos += serializeFilters(buffer + pos, *totalSize - pos, 1682 noFingerprintAntiDomainOnlyExceptionFilters, 1683 numNoFingerprintAntiDomainOnlyExceptionFilters); 1684 1685 if (bloomFilter) { 1686 memcpy(buffer + pos, bloomFilter->getBuffer(), 1687 bloomFilter->getByteBufferSize()); 1688 pos += bloomFilter->getByteBufferSize(); 1689 } 1690 if (exceptionBloomFilter) { 1691 memcpy(buffer + pos, exceptionBloomFilter->getBuffer(), 1692 exceptionBloomFilter->getByteBufferSize()); 1693 pos += exceptionBloomFilter->getByteBufferSize(); 1694 } 1695 if (hostAnchoredHashSet) { 1696 memcpy(buffer + pos, hostAnchoredHashSetBuffer, hostAnchoredHashSetSize); 1697 pos += hostAnchoredHashSetSize; 1698 delete[] hostAnchoredHashSetBuffer; 1699 } 1700 if (hostAnchoredExceptionHashSet) { 1701 memcpy(buffer + pos, hostAnchoredExceptionHashSetBuffer, 1702 hostAnchoredExceptionHashSetSize); 1703 pos += hostAnchoredExceptionHashSetSize; 1704 delete[] hostAnchoredExceptionHashSetBuffer; 1705 } 1706 if (noFingerprintDomainHashSet) { 1707 memcpy(buffer + pos, noFingerprintDomainHashSetBuffer, 1708 noFingerprintDomainHashSetSize); 1709 pos += noFingerprintDomainHashSetSize; 1710 delete[] noFingerprintDomainHashSetBuffer; 1711 } 1712 if (noFingerprintAntiDomainHashSet) { 1713 memcpy(buffer + pos, noFingerprintAntiDomainHashSetBuffer, 1714 noFingerprintAntiDomainHashSetSize); 1715 pos += noFingerprintAntiDomainHashSetSize; 1716 delete[] noFingerprintAntiDomainHashSetBuffer; 1717 } 1718 if (noFingerprintDomainExceptionHashSet) { 1719 memcpy(buffer + pos, noFingerprintDomainExceptionHashSetBuffer, 1720 noFingerprintDomainExceptionHashSetSize); 1721 pos += noFingerprintDomainExceptionHashSetSize; 1722 delete[] noFingerprintDomainExceptionHashSetBuffer; 1723 } 1724 if (noFingerprintAntiDomainExceptionHashSet) { 1725 memcpy(buffer + pos, noFingerprintAntiDomainExceptionHashSetBuffer, 1726 noFingerprintAntiDomainExceptionHashSetSize); 1727 pos += noFingerprintAntiDomainExceptionHashSetSize; 1728 delete[] noFingerprintAntiDomainExceptionHashSetBuffer; 1729 } 1730 1731 return buffer; 1732 } 1733 1734 // Fills the specified buffer if specified, returns the number of characters 1735 // written or needed 1736 int deserializeFilters(char *buffer, Filter *f, int numFilters) { 1737 int pos = 0; 1738 for (int i = 0; i < numFilters; i++) { 1739 f->borrowed_data = true; 1740 sscanf(buffer + pos, "%x,%x,%x", 1741 reinterpret_cast<unsigned int*>(&f->filterType), 1742 reinterpret_cast<unsigned int*>(&f->filterOption), 1743 reinterpret_cast<unsigned int*>(&f->antiFilterOption)); 1744 pos += static_cast<int>(strlen(buffer + pos)) + 1; 1745 1746 if (*(buffer + pos) == '\0') { 1747 f->data = nullptr; 1748 } else { 1749 f->data = buffer + pos; 1750 pos += static_cast<int>(strlen(f->data)); 1751 } 1752 pos++; 1753 1754 // If the domain section starts with a # then we're in a tag 1755 // block. 1756 if (buffer[pos] == '~' && buffer[pos + 1] == '#') { 1757 pos += 2; 1758 f->tag = buffer + pos; 1759 f->tagLen = 0; 1760 while (buffer[pos + f->tagLen] != '\0') { 1761 if (buffer[pos + f->tagLen] == ',') { 1762 pos += f->tagLen + 1; 1763 break; 1764 } 1765 f->tagLen++; 1766 } 1767 } 1768 1769 if (*(buffer + pos) == '\0') { 1770 f->domainList = nullptr; 1771 } else { 1772 f->domainList = buffer + pos; 1773 pos += static_cast<int>(strlen(f->domainList)); 1774 } 1775 pos++; 1776 1777 if (*(buffer + pos) == '\0') { 1778 f->host = nullptr; 1779 } else { 1780 f->host = buffer + pos; 1781 pos += static_cast<int>(strlen(f->host)); 1782 } 1783 pos++; 1784 f++; 1785 } 1786 return pos; 1787 } 1788 1789 bool AdBlockClient::deserialize(char *buffer) { 1790 clear(); 1791 deserializedBuffer = buffer; 1792 int bloomFilterSize = 0, exceptionBloomFilterSize = 0, 1793 hostAnchoredHashSetSize = 0, hostAnchoredExceptionHashSetSize = 0, 1794 noFingerprintDomainHashSetSize = 0, 1795 noFingerprintAntiDomainHashSetSize = 0, 1796 noFingerprintDomainExceptionHashSetSize = 0, 1797 noFingerprintAntiDomainExceptionHashSetSize = 0; 1798 int pos = 0; 1799 sscanf(buffer + pos, 1800 "%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x", 1801 (unsigned int*)&numFilters, 1802 (unsigned int*)&numExceptionFilters, (unsigned int*)&numCosmeticFilters, (unsigned int*)&numHtmlFilters, 1803 (unsigned int*)&numNoFingerprintFilters, (unsigned int*)&numNoFingerprintExceptionFilters, 1804 (unsigned int*)&numNoFingerprintDomainOnlyFilters, 1805 (unsigned int*)&numNoFingerprintAntiDomainOnlyFilters, 1806 (unsigned int*)&numNoFingerprintDomainOnlyExceptionFilters, 1807 (unsigned int*)&numNoFingerprintAntiDomainOnlyExceptionFilters, 1808 (unsigned int*)&numHostAnchoredFilters, (unsigned int*)&numHostAnchoredExceptionFilters, 1809 (unsigned int*)&bloomFilterSize, (unsigned int*)&exceptionBloomFilterSize, 1810 (unsigned int*)&hostAnchoredHashSetSize, (unsigned int*)&hostAnchoredExceptionHashSetSize, 1811 (unsigned int*)&noFingerprintDomainHashSetSize, 1812 (unsigned int*)&noFingerprintAntiDomainHashSetSize, 1813 (unsigned int*)&noFingerprintDomainExceptionHashSetSize, 1814 (unsigned int*)&noFingerprintAntiDomainExceptionHashSetSize); 1815 pos += static_cast<int>(strlen(buffer + pos)) + 1; 1816 1817 filters = new Filter[numFilters]; 1818 exceptionFilters = new Filter[numExceptionFilters]; 1819 cosmeticFilters = new Filter[numCosmeticFilters]; 1820 htmlFilters = new Filter[numHtmlFilters]; 1821 noFingerprintFilters = new Filter[numNoFingerprintFilters]; 1822 noFingerprintExceptionFilters = new Filter[numNoFingerprintExceptionFilters]; 1823 noFingerprintDomainOnlyFilters = 1824 new Filter[numNoFingerprintDomainOnlyFilters]; 1825 noFingerprintAntiDomainOnlyFilters = 1826 new Filter[numNoFingerprintAntiDomainOnlyFilters]; 1827 noFingerprintDomainOnlyExceptionFilters = 1828 new Filter[numNoFingerprintDomainOnlyExceptionFilters]; 1829 noFingerprintAntiDomainOnlyExceptionFilters = 1830 new Filter[numNoFingerprintAntiDomainOnlyExceptionFilters]; 1831 1832 pos += deserializeFilters(buffer + pos, filters, numFilters); 1833 pos += deserializeFilters(buffer + pos, 1834 exceptionFilters, numExceptionFilters); 1835 pos += deserializeFilters(buffer + pos, 1836 cosmeticFilters, numCosmeticFilters); 1837 pos += deserializeFilters(buffer + pos, 1838 htmlFilters, numHtmlFilters); 1839 pos += deserializeFilters(buffer + pos, 1840 noFingerprintFilters, numNoFingerprintFilters); 1841 pos += deserializeFilters(buffer + pos, 1842 noFingerprintExceptionFilters, numNoFingerprintExceptionFilters); 1843 1844 pos += deserializeFilters(buffer + pos, 1845 noFingerprintDomainOnlyFilters, numNoFingerprintDomainOnlyFilters); 1846 pos += deserializeFilters(buffer + pos, 1847 noFingerprintAntiDomainOnlyFilters, 1848 numNoFingerprintAntiDomainOnlyFilters); 1849 pos += deserializeFilters(buffer + pos, 1850 noFingerprintDomainOnlyExceptionFilters, 1851 numNoFingerprintDomainOnlyExceptionFilters); 1852 pos += deserializeFilters(buffer + pos, 1853 noFingerprintAntiDomainOnlyExceptionFilters, 1854 numNoFingerprintAntiDomainOnlyExceptionFilters); 1855 1856 initBloomFilter(&bloomFilter, buffer + pos, bloomFilterSize); 1857 pos += bloomFilterSize; 1858 initBloomFilter(&exceptionBloomFilter, 1859 buffer + pos, exceptionBloomFilterSize); 1860 pos += exceptionBloomFilterSize; 1861 if (!initHashSet(&hostAnchoredHashSet, 1862 buffer + pos, hostAnchoredHashSetSize)) { 1863 return false; 1864 } 1865 pos += hostAnchoredHashSetSize; 1866 if (!initHashSet(&hostAnchoredExceptionHashSet, 1867 buffer + pos, hostAnchoredExceptionHashSetSize)) { 1868 return false; 1869 } 1870 pos += hostAnchoredExceptionHashSetSize; 1871 1872 1873 if (!initHashSet(&noFingerprintDomainHashSet, 1874 buffer + pos, noFingerprintDomainHashSetSize)) { 1875 return false; 1876 } 1877 pos += noFingerprintDomainHashSetSize; 1878 1879 if (!initHashSet(&noFingerprintAntiDomainHashSet, 1880 buffer + pos, noFingerprintAntiDomainHashSetSize)) { 1881 return false; 1882 } 1883 pos += noFingerprintAntiDomainHashSetSize; 1884 1885 if (!initHashSet(&noFingerprintDomainExceptionHashSet, 1886 buffer + pos, noFingerprintDomainExceptionHashSetSize)) { 1887 return false; 1888 } 1889 pos += noFingerprintDomainExceptionHashSetSize; 1890 1891 if (!initHashSet(&noFingerprintAntiDomainExceptionHashSet, 1892 buffer + pos, noFingerprintAntiDomainExceptionHashSetSize)) { 1893 return false; 1894 } 1895 pos += noFingerprintAntiDomainExceptionHashSetSize; 1896 1897 return true; 1898 } 1899 1900 void AdBlockClient::enableBadFingerprintDetection() { 1901 if (badFingerprintsHashSet) { 1902 return; 1903 } 1904 1905 badFingerprintsHashSet = new BadFingerprintsHashSet(); 1906 for (unsigned int i = 0; i < sizeof(badFingerprints) 1907 / sizeof(badFingerprints[0]); i++) { 1908 badFingerprintsHashSet->Add(BadFingerprint(badFingerprints[i])); 1909 } 1910 } 1911 1912 uint64_t HashFn2Byte::operator()(const char *input, int len, 1913 unsigned char lastCharCode, uint64_t lastHash) { 1914 UNUSED(len); 1915 UNUSED(lastCharCode); 1916 UNUSED(lastHash); 1917 return (((uint64_t)input[1]) << 8) | input[0]; } 1918 1919 uint64_t HashFn2Byte::operator()(const char *input, int len) { 1920 UNUSED(len); 1921 return (((uint64_t)input[1]) << 8) | input[0]; 1922 }