File indexing completed on 2024-11-17 04:55:16

0001 /*
0002     SPDX-License-Identifier: MPL-2.0
0003 */
0004 
0005 /* Copyright (c) 2015 Brian R. Bondy. Distributed under the MPL2 license.
0006  * This Source Code Form is subject to the terms of the Mozilla Public
0007  * License, v. 2.0. If a copy of the MPL was not distributed with this
0008  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
0009 
0010 #include <string.h>
0011 #include <stdio.h>
0012 #include "./protocol.h"
0013 #include "./ad_block_client.h"
0014 #include "./bad_fingerprint.h"
0015 #include "./bad_fingerprints.h"
0016 #include "./cosmetic_filter.h"
0017 #include "./hashFn.h"
0018 #include "./no_fingerprint_domain.h"
0019 
0020 #include "BloomFilter.h"
0021 
0022 #ifdef PERF_STATS
0023 #include <iostream>
0024 using std::cout;
0025 using std::endl;
0026 #endif
0027 
0028 #define UNUSED(x) ( (void)(x) )
0029 
0030 std::set<std::string> unknownOptions;
0031 
0032 // Fast hash function applicable to 2 byte char checks
0033 class HashFn2Byte : public HashFn {
0034  public:
0035   HashFn2Byte() : HashFn(0, false) {
0036   }
0037 
0038   uint64_t operator()(const char *input, int len,
0039       unsigned char lastCharCode, uint64_t lastHash) override;
0040 
0041   uint64_t operator()(const char *input, int len) override;
0042 };
0043 
0044 const int kMaxLineLength = 2048;
0045 
0046 const int AdBlockClient::kFingerprintSize = 6;
0047 
0048 static HashFn2Byte hashFn2Byte;
0049 
0050 /**
0051  * Finds the host within the passed in URL and returns its length
0052  */
0053 const char * getUrlHost(const char *input, int *len) {
0054   const char *p = input;
0055   while (*p != '\0' && *p != ':') {
0056     p++;
0057   }
0058   if (*p != '\0') {
0059     p++;
0060     while (*p != '\0' && *p == '/') {
0061       p++;
0062     }
0063   }
0064   const char *q = p;
0065   while (*q != '\0') {
0066     q++;
0067   }
0068   *len = findFirstSeparatorChar(p, q);
0069   return p;
0070 }
0071 
0072 void AddFilterDomainsToHashSet(Filter* filter,
0073     HashSet<NoFingerprintDomain> *hashSet) {
0074   if (filter->domainList) {
0075     char * filter_domain_list = filter->domainList;
0076     int start_offset = 0;
0077     int len = 0;
0078     const char *p = filter_domain_list;
0079     while (true) {
0080       if (*p == '|' || *p == '\0') {
0081         const char *domain = filter_domain_list + start_offset;
0082         if (len > 0 && *domain != '~') {
0083           char buffer[1024];
0084           memset(buffer, 0, 1024);
0085           memcpy(buffer, domain, len);
0086           // cout << "Adding filter: " << buffer << endl;
0087           hashSet->Add(NoFingerprintDomain(domain, len));
0088         } else if (len > 0 && *domain == '~') {
0089           char buffer[1024];
0090           memset(buffer, 0, 1024);
0091           memcpy(buffer, domain + 1, len - 1);
0092           // cout << "Adding anti filter: " << buffer << endl;
0093           hashSet->Add(NoFingerprintDomain(domain + 1, len - 1));
0094         }
0095         start_offset += len + 1;
0096         len = -1;
0097       }
0098       if (*p == '\0') {
0099         break;
0100       }
0101       p++;
0102       len++;
0103     }
0104   }
0105 }
0106 
0107 inline bool isFingerprintChar(char c) {
0108   return c != '|' && c != '*' && c != '^';
0109 }
0110 
0111 bool isBadFingerprint(const char *fingerprint, const char * fingerprintEnd) {
0112   for (unsigned int i = 0; i < sizeof(badFingerprints)
0113       / sizeof(badFingerprints[0]); i++) {
0114     if (!strncmp(badFingerprints[i], fingerprint,
0115           fingerprintEnd - fingerprint)) {
0116       return true;
0117     }
0118   }
0119   return false;
0120 }
0121 
0122 bool hasBadSubstring(const char *fingerprint, const char * fingerprintEnd) {
0123   for (unsigned int i = 0; i < sizeof(badSubstrings)
0124       / sizeof(badSubstrings[0]); i++) {
0125     const char * p = strstr(fingerprint, badSubstrings[i]);
0126     if (p && (p - fingerprint) + strlen(badSubstrings[i])
0127         <= (unsigned int)(fingerprintEnd - fingerprint)) {
0128       return true;
0129     }
0130   }
0131   return false;
0132 }
0133 
0134 /**
0135  * Obtains a fingerprint for the specified filter
0136  */
0137 bool AdBlockClient::getFingerprint(char *buffer, const char *input) {
0138   if (!input) {
0139     return false;
0140   }
0141   int size = 0;
0142   const char *p = input;
0143   const char *start = input;
0144   while (*p != '\0') {
0145     if (!isFingerprintChar(*p)) {
0146       size = 0;
0147       p++;
0148       start = p;
0149       continue;
0150     }
0151     if (buffer) {
0152       buffer[size] = *p;
0153     }
0154     if (hasBadSubstring(start, start + size + 1)) {
0155       size = 0;
0156       start++;
0157       p = start;
0158       continue;
0159     }
0160     size++;
0161 
0162     if (size == kFingerprintSize) {
0163       if (buffer) {
0164         buffer[size] = '\0';
0165       }
0166       if (isBadFingerprint(start, start + size)) {
0167         size = 0;
0168         start++;
0169         p = start;
0170         continue;
0171       }
0172       return true;
0173     }
0174     p++;
0175   }
0176   if (buffer) {
0177     buffer[0] = '\0';
0178   }
0179   return false;
0180 }
0181 
0182 bool AdBlockClient::getFingerprint(char *buffer, const Filter &f) {
0183   if (f.filterType & FTRegex) {
0184     // cout << "Get fingerprint for regex returning false; " << endl;
0185     return false;
0186   }
0187 
0188   if (f.filterType & FTHostAnchored) {
0189     if (AdBlockClient::getFingerprint(buffer, f.data + strlen(f.host))) {
0190       return true;
0191     }
0192   }
0193 
0194   bool b = AdBlockClient::getFingerprint(buffer, f.data);
0195   // if (!b && f.data) {
0196   //   cout << "No fingerprint for: " << f.data << endl;
0197   // }
0198   return b;
0199 }
0200 
0201 // Separator chars are one of: :?/=^;
0202 signed char separatorBuffer[32] = { 0, 0, 0, 0, 16, -128, 0, -92, 0, 0, 0, 64 };
0203 bool isSeparatorChar(char c) {
0204   return !!(separatorBuffer[(unsigned char)c / 8] & 1 << (unsigned char)c % 8);
0205 }
0206 
0207 int findFirstSeparatorChar(const char *input, const char *end) {
0208   const char *p = input;
0209   while (p != end) {
0210     if (isSeparatorChar(*p)) {
0211       return static_cast<int>(p - input);
0212     }
0213     p++;
0214   }
0215   return static_cast<int>(end - input);
0216 }
0217 
0218 void parseFilter(const char *input, Filter *f, BloomFilter *bloomFilter,
0219     BloomFilter *exceptionBloomFilter,
0220     HashSet<Filter> *hostAnchoredHashSet,
0221     HashSet<Filter> *hostAnchoredExceptionHashSet,
0222     HashSet<CosmeticFilter> *simpleCosmeticFilters,
0223     bool preserveRules) {
0224       UNUSED(preserveRules);
0225   const char *end = input;
0226   while (*end != '\0') end++;
0227   parseFilter(input, end, f, bloomFilter, exceptionBloomFilter,
0228       hostAnchoredHashSet, hostAnchoredExceptionHashSet, simpleCosmeticFilters);
0229 }
0230 
0231 enum FilterParseState {
0232   FPStart,
0233   FPPastWhitespace,
0234   FPOneBar,
0235   FPOneAt,
0236   FPData,
0237   // Same as data but won't consider any special char handling like | or $
0238   FPDataOnly
0239 };
0240 
0241 // Not currently multithreaded safe due to the static buffer named 'data'
0242 void parseFilter(const char *input, const char *end, Filter *f,
0243     BloomFilter *bloomFilter,
0244     BloomFilter *exceptionBloomFilter,
0245     HashSet<Filter> *hostAnchoredHashSet,
0246     HashSet<Filter> *hostAnchoredExceptionHashSet,
0247     HashSet<CosmeticFilter> *simpleCosmeticFilters,
0248     bool preserveRules) {
0249   FilterParseState parseState = FPStart;
0250   const char *p = input;
0251   const char *filterRuleStart = p;
0252   const char *filterRuleEndPos = p;
0253   char data[kMaxLineLength];
0254   memset(data, 0, sizeof data);
0255   int i = 0;
0256 
0257   bool earlyBreak = false;
0258   while (p != end && !earlyBreak) {
0259     // Check for the filter being too long
0260     if ((p - input) >= kMaxLineLength - 1) {
0261       return;
0262     }
0263 
0264     if (parseState != FPDataOnly) {
0265       if (parseState == FPOneBar && *p != '|') {
0266         parseState = FPData;
0267         f->filterType = static_cast<FilterType>(f->filterType | FTLeftAnchored);
0268       }
0269 
0270       switch (*p) {
0271         case '|':
0272           if (parseState == FPStart || parseState == FPPastWhitespace) {
0273             parseState = FPOneBar;
0274             filterRuleEndPos++;
0275             p++;
0276             continue;
0277           } else if (parseState == FPOneBar) {
0278             parseState = FPOneBar;
0279             f->filterType =
0280               static_cast<FilterType>(f->filterType | FTHostAnchored);
0281             parseState = FPData;
0282             filterRuleEndPos++;
0283             p++;
0284 
0285             int len = findFirstSeparatorChar(p, end);
0286             // It's possible we have a host anchored filter
0287             // which also has a right anchored filter.
0288             if (len > 0 && p[len - 1] == '|') {
0289               len--;
0290             }
0291             f->host = new char[len + 1];
0292             f->host[len] = '\0';
0293             memcpy(f->host, p, len);
0294 
0295             if ((*(p + len) == '^' && (*(p + len + 1) == '\0'
0296                     || *(p + len + 1) == '$' || isEndOfLine(*(p + len + 1)))) ||
0297                 *(p + len) == '\0' || *(p + len) == '$' ||
0298                 isEndOfLine(*(p + len))) {
0299               f->filterType =
0300                 static_cast<FilterType>(f->filterType | FTHostOnly);
0301             }
0302 
0303             continue;
0304           } else {
0305             f->filterType =
0306               static_cast<FilterType>(f->filterType | FTRightAnchored);
0307             parseState = FPData;
0308             filterRuleEndPos++;
0309             p++;
0310             continue;
0311           }
0312           break;
0313         case '@':
0314           if (parseState == FPStart || parseState == FPPastWhitespace) {
0315             parseState = FPOneAt;
0316             filterRuleEndPos++;
0317             p++;
0318             continue;
0319           } else if (parseState == FPOneAt) {
0320             parseState = FPOneBar;
0321             f->filterType = FTException;
0322             parseState = FPPastWhitespace;
0323             filterRuleEndPos++;
0324             p++;
0325             continue;
0326           }
0327           break;
0328         case '!':
0329         case '[':
0330           if (parseState == FPStart || parseState == FPPastWhitespace) {
0331             f->filterType = FTComment;
0332             // We don't care about comments right now
0333             return;
0334           }
0335           break;
0336         case '\r':
0337         case '\n':
0338         case '\t':
0339         case ' ':
0340           // Skip leading whitespace
0341           if (parseState == FPStart) {
0342             filterRuleStart++;
0343             filterRuleEndPos++;
0344             p++;
0345             continue;
0346           }
0347           break;
0348         case '/': {
0349           const size_t inputLen = end - input;
0350           if (parseState == FPStart || parseState == FPPastWhitespace) {
0351             if (input[inputLen - 1] == '/' && inputLen > 1) {
0352               // Just copy out the whole regex and return early
0353               int len = static_cast<int>(inputLen) - i - 1;
0354               f->data = new char[len];
0355               f->data[len - 1] = '\0';
0356               memcpy(f->data, input + i + 1, len - 1);
0357 
0358               if (preserveRules) {
0359                 f->ruleDefinition = new char[len];
0360                 f->ruleDefinition[len - 1] = '\0';
0361                 memcpy(f->ruleDefinition, input + i + 1, len - 1);
0362               }
0363 
0364               f->filterType = FTRegex;
0365               return;
0366             } else {
0367               parseState = FPData;
0368             }
0369           }
0370           break;
0371         }
0372         case '$':
0373           // Handle adguard HTML filtering rules syntax
0374           // e.g. example.org$$script[data-src="banner"]
0375           // see https://kb.adguard.com/en/general/how-to-create-your-own-ad-filters#html-filtering-rules-syntax-1
0376           if (*(p+1) == '$') {
0377               if (i != 0) {
0378                 f->domainList = new char[i + 1];
0379                 memcpy(f->domainList, data, i + 1);
0380                 i = 0;
0381               }
0382               parseState = FPDataOnly;
0383               f->filterType = FTHTMLFiltering;
0384               p += 2;
0385               filterRuleEndPos += 2;
0386               continue;
0387           }
0388           while (*filterRuleEndPos != '\0' && !isEndOfLine(*filterRuleEndPos)) {
0389             filterRuleEndPos++;
0390           }
0391           f->parseOptions(p + 1);
0392           earlyBreak = true;
0393           continue;
0394         case '#':
0395           // ublock uses some comments of the form #[space]
0396           if (parseState == FPStart || parseState == FPPastWhitespace) {
0397             if (*(p+1) == ' ') {
0398               f->filterType = FTComment;
0399               // We don't care about comments right now
0400               return;
0401             }
0402           }
0403 
0404           if (*(p+1) == '#' || *(p+1) == '@') {
0405             if (i != 0) {
0406               f->domainList = new char[i + 1];
0407               memcpy(f->domainList, data, i + 1);
0408               i = 0;
0409             }
0410             parseState = FPDataOnly;
0411             if (*(p+1) == '#') {
0412               f->filterType = FTElementHiding;
0413             } else {
0414               f->filterType = FTElementHidingException;
0415             }
0416             p += 2;
0417             continue;
0418           }
0419           // Copied from default label to avoid warning (unannotated
0420           // fall-through between switch labels)
0421           parseState = FPData;
0422           break;
0423         default:
0424           parseState = FPData;
0425           break;
0426       }
0427     }
0428     data[i] = *p;
0429     i++;
0430     filterRuleEndPos++;
0431     p++;
0432   }
0433 
0434   if (parseState == FPStart) {
0435     f->filterType = FTEmpty;
0436     return;
0437   }
0438 
0439   if (preserveRules) {
0440     int ruleTextLength = filterRuleEndPos - filterRuleStart;
0441     f->ruleDefinition = new char[ruleTextLength + 1];
0442     memcpy(f->ruleDefinition, filterRuleStart, ruleTextLength);
0443     f->ruleDefinition[ruleTextLength] = '\0';
0444   }
0445 
0446   data[i] = '\0';
0447   f->data = new char[i + 1];
0448   memcpy(f->data, data, i + 1);
0449 
0450   char fingerprintBuffer[AdBlockClient::kFingerprintSize + 1];
0451   fingerprintBuffer[AdBlockClient::kFingerprintSize] = '\0';
0452 
0453   if (f->filterType == FTElementHiding) {
0454     if (simpleCosmeticFilters && !f->domainList) {
0455       simpleCosmeticFilters->Add(CosmeticFilter(data));
0456     }
0457   } else if (f->filterType == FTElementHidingException) {
0458     if (simpleCosmeticFilters && f->domainList) {
0459       simpleCosmeticFilters->Remove(CosmeticFilter(data));
0460     }
0461   } else if (exceptionBloomFilter
0462       && (f->filterType & FTException) && (f->filterType & FTHostOnly)) {
0463     // cout << "add host anchored exception bloom filter: " << f->host << endl;
0464     hostAnchoredExceptionHashSet->Add(*f);
0465   } else if (hostAnchoredHashSet && (f->filterType & FTHostOnly)) {
0466     // cout << "add host anchored bloom filter: " << f->host << endl;
0467     hostAnchoredHashSet->Add(*f);
0468   } else if (AdBlockClient::getFingerprint(fingerprintBuffer, *f)) {
0469     if (exceptionBloomFilter && f->filterType & FTException) {
0470       exceptionBloomFilter->add(fingerprintBuffer);
0471     } else if (bloomFilter) {
0472       // cout << "add fingerprint: " << fingerprintBuffer
0473       // << ", from string: " << f->data << endl;
0474       bloomFilter->add(fingerprintBuffer);
0475     }
0476   }
0477 }
0478 
0479 
0480 AdBlockClient::AdBlockClient() : filters(nullptr),
0481   cosmeticFilters(nullptr),
0482   htmlFilters(nullptr),
0483   exceptionFilters(nullptr),
0484   noFingerprintFilters(nullptr),
0485   noFingerprintExceptionFilters(nullptr),
0486   noFingerprintDomainOnlyFilters(nullptr),
0487   noFingerprintAntiDomainOnlyFilters(nullptr),
0488   noFingerprintDomainOnlyExceptionFilters(nullptr),
0489   noFingerprintAntiDomainOnlyExceptionFilters(nullptr),
0490   numFilters(0),
0491   numCosmeticFilters(0),
0492   numHtmlFilters(0),
0493   numExceptionFilters(0),
0494   numNoFingerprintFilters(0),
0495   numNoFingerprintExceptionFilters(0),
0496   numNoFingerprintDomainOnlyFilters(0),
0497   numNoFingerprintAntiDomainOnlyFilters(0),
0498   numNoFingerprintDomainOnlyExceptionFilters(0),
0499   numNoFingerprintAntiDomainOnlyExceptionFilters(0),
0500   numHostAnchoredFilters(0),
0501   numHostAnchoredExceptionFilters(0),
0502   bloomFilter(nullptr),
0503   exceptionBloomFilter(nullptr),
0504   hostAnchoredHashSet(nullptr),
0505   hostAnchoredExceptionHashSet(nullptr),
0506   noFingerprintDomainHashSet(nullptr),
0507   noFingerprintAntiDomainHashSet(nullptr),
0508   noFingerprintDomainExceptionHashSet(nullptr),
0509   noFingerprintAntiDomainExceptionHashSet(nullptr),
0510   badFingerprintsHashSet(nullptr),
0511   numFalsePositives(0),
0512   numExceptionFalsePositives(0),
0513   numBloomFilterSaves(0),
0514   numExceptionBloomFilterSaves(0),
0515   numHashSetSaves(0),
0516   numExceptionHashSetSaves(0),
0517   deserializedBuffer(nullptr) {
0518 }
0519 
0520 AdBlockClient::~AdBlockClient() {
0521   clear();
0522 }
0523 
0524 // Clears all data and stats from the AdBlockClient
0525 void AdBlockClient::clear() {
0526   if (filters) {
0527     delete[] filters;
0528     filters = nullptr;
0529   }
0530   if (cosmeticFilters) {
0531     delete[] cosmeticFilters;
0532     cosmeticFilters = nullptr;
0533   }
0534   if (htmlFilters) {
0535     delete[] htmlFilters;
0536     htmlFilters = nullptr;
0537   }
0538   if (exceptionFilters) {
0539     delete[] exceptionFilters;
0540     exceptionFilters = nullptr;
0541   }
0542   if (noFingerprintFilters) {
0543     delete[] noFingerprintFilters;
0544     noFingerprintFilters = nullptr;
0545   }
0546   if (noFingerprintExceptionFilters) {
0547     delete[] noFingerprintExceptionFilters;
0548     noFingerprintExceptionFilters = nullptr;
0549   }
0550   if (noFingerprintDomainOnlyFilters) {
0551     delete[] noFingerprintDomainOnlyFilters;
0552     noFingerprintDomainOnlyFilters = nullptr;
0553   }
0554   if (noFingerprintAntiDomainOnlyFilters) {
0555     delete[] noFingerprintAntiDomainOnlyFilters;
0556     noFingerprintAntiDomainOnlyFilters = nullptr;
0557   }
0558   if (noFingerprintDomainOnlyExceptionFilters) {
0559     delete[] noFingerprintDomainOnlyExceptionFilters;
0560     noFingerprintDomainOnlyExceptionFilters = nullptr;
0561   }
0562   if (noFingerprintAntiDomainOnlyExceptionFilters) {
0563     delete[] noFingerprintAntiDomainOnlyExceptionFilters;
0564     noFingerprintAntiDomainOnlyExceptionFilters = nullptr;
0565   }
0566   if (bloomFilter) {
0567     delete bloomFilter;
0568     bloomFilter = nullptr;
0569   }
0570   if (exceptionBloomFilter) {
0571     delete exceptionBloomFilter;
0572     exceptionBloomFilter = nullptr;
0573   }
0574   if (hostAnchoredHashSet) {
0575     delete hostAnchoredHashSet;
0576     hostAnchoredHashSet = nullptr;
0577   }
0578   if (hostAnchoredExceptionHashSet) {
0579     delete hostAnchoredExceptionHashSet;
0580     hostAnchoredExceptionHashSet = nullptr;
0581   }
0582   if (noFingerprintDomainHashSet) {
0583     delete noFingerprintDomainHashSet;
0584     noFingerprintDomainHashSet = nullptr;
0585   }
0586   if (noFingerprintAntiDomainHashSet) {
0587     delete noFingerprintAntiDomainHashSet;
0588     noFingerprintAntiDomainHashSet = nullptr;
0589   }
0590   if (noFingerprintDomainExceptionHashSet) {
0591     delete noFingerprintDomainExceptionHashSet;
0592     noFingerprintDomainExceptionHashSet = nullptr;
0593   }
0594   if (noFingerprintAntiDomainExceptionHashSet) {
0595     delete noFingerprintAntiDomainExceptionHashSet;
0596     noFingerprintAntiDomainExceptionHashSet = nullptr;
0597   }
0598   if (badFingerprintsHashSet) {
0599     delete badFingerprintsHashSet;
0600     badFingerprintsHashSet = nullptr;
0601   }
0602 
0603   numFilters = 0;
0604   numCosmeticFilters = 0;
0605   numHtmlFilters = 0;
0606   numExceptionFilters = 0;
0607   numNoFingerprintFilters = 0;
0608   numNoFingerprintExceptionFilters = 0;
0609   numNoFingerprintDomainOnlyFilters = 0;
0610   numNoFingerprintAntiDomainOnlyFilters = 0;
0611   numNoFingerprintDomainOnlyExceptionFilters = 0;
0612   numNoFingerprintAntiDomainOnlyExceptionFilters = 0;
0613   numHostAnchoredFilters = 0;
0614   numHostAnchoredExceptionFilters = 0;
0615   numFalsePositives = 0;
0616   numExceptionFalsePositives = 0;
0617   numBloomFilterSaves = 0;
0618   numExceptionBloomFilterSaves = 0;
0619   numHashSetSaves = 0;
0620   numExceptionHashSetSaves = 0;
0621 }
0622 
0623 bool AdBlockClient::hasMatchingFilters(Filter *filter, int numFilters,
0624     const char *input,
0625     int inputLen,
0626     FilterOption contextOption,
0627     const char *contextDomain,
0628     BloomFilter *inputBloomFilter,
0629     const char *inputHost,
0630     int inputHostLen,
0631     Filter **matchingFilter) {
0632   for (int i = 0; i < numFilters; i++) {
0633     if (filter->matches(input, inputLen, contextOption,
0634           contextDomain, inputBloomFilter, inputHost, inputHostLen)) {
0635       if (filter->tagLen == 0 ||
0636           tagExists(std::string(filter->tag, filter->tagLen))) {
0637         if (matchingFilter) {
0638           *matchingFilter = filter;
0639         }
0640         return true;
0641       }
0642     }
0643     filter++;
0644   }
0645   if (matchingFilter) {
0646     *matchingFilter = nullptr;
0647   }
0648   return false;
0649 }
0650 
0651 void discoverMatchingPrefix(BadFingerprintsHashSet *badFingerprintsHashSet,
0652     const char *str,
0653     BloomFilter *bloomFilter,
0654     int prefixLen = AdBlockClient::kFingerprintSize) {
0655   char sz[32];
0656   memset(sz, 0, sizeof(sz));
0657   int strLen = static_cast<int>(strlen(str));
0658   for (int i = 0; i < strLen - prefixLen + 1; i++) {
0659     if (bloomFilter->exists(str + i, prefixLen)) {
0660       memcpy(sz, str + i, prefixLen);
0661       // cout <<  "Bad fingerprint: " << sz << endl;
0662       if (badFingerprintsHashSet) {
0663         badFingerprintsHashSet->Add(BadFingerprint(sz));
0664       }
0665       // We only want the first bad fingerprint since that's the one
0666       // that led us here.
0667       // If you do all bad fingerprint detection here it will lead to too many
0668       // bad fingerprints, which then leads to too many no fingerprint rules.
0669       // And too many no fingerprint rules causes perf problems.
0670       return;
0671     }
0672     // memcpy(sz, str + i, prefixLen);
0673     // cout <<  "Good fingerprint: " << sz;
0674   }
0675 }
0676 
0677 bool isNoFingerprintDomainHashSetMiss(HashSet<NoFingerprintDomain> *hashSet,
0678     const char *host, int hostLen) {
0679   if (!hashSet) {
0680     return false;
0681   }
0682   const char *start = host + hostLen;
0683   // Skip past the TLD
0684   while (start != host) {
0685     start--;
0686     if (*(start) == '.') {
0687       break;
0688     }
0689   }
0690   while (start != host) {
0691     if (*(start - 1) == '.') {
0692       if (hashSet->Find(NoFingerprintDomain(start,
0693           static_cast<int>(host + hostLen - start)))) {
0694         return false;
0695       }
0696     }
0697     start--;
0698   }
0699   return !hashSet->Find(NoFingerprintDomain(start,
0700       static_cast<int>(host + hostLen - start)));
0701 }
0702 
0703 bool AdBlockClient::isHostAnchoredHashSetMiss(const char *input, int inputLen,
0704     HashSet<Filter> *hashSet,
0705     const char *inputHost,
0706     int inputHostLen,
0707     FilterOption contextOption,
0708     const char *contextDomain,
0709     Filter **foundFilter) {
0710   if (!hashSet) {
0711     return false;
0712   }
0713 
0714   const char *start = inputHost + inputHostLen;
0715   // Skip past the TLD
0716   while (start != inputHost) {
0717     start--;
0718     if (*(start) == '.') {
0719       break;
0720     }
0721   }
0722 
0723   while (start != inputHost) {
0724     if (*(start - 1) == '.') {
0725       Filter *filter = hashSet->Find(Filter(start,
0726             static_cast<int>(inputHost + inputHostLen - start),
0727             nullptr, start, inputHostLen - (start - inputHost)));
0728       if (filter && filter->matches(input, inputLen,
0729             contextOption, contextDomain)) {
0730         if (filter->tagLen == 0 ||
0731             tagExists(std::string(filter->tag, filter->tagLen))) {
0732           if (foundFilter) {
0733             *foundFilter = filter;
0734           }
0735           return false;
0736         }
0737       }
0738     }
0739     start--;
0740   }
0741 
0742   Filter *filter = hashSet->Find(Filter(start,
0743         static_cast<int>(inputHost + inputHostLen - start), nullptr,
0744         start, inputHostLen));
0745   if (!filter) {
0746     return true;
0747   }
0748   bool result = !filter->matches(input, inputLen, contextOption, contextDomain);
0749   if (!result) {
0750     if (filter->tagLen > 0 &&
0751         !tagExists(std::string(filter->tag, filter->tagLen))) {
0752       return true;
0753     }
0754     if (foundFilter) {
0755       *foundFilter = filter;
0756     }
0757   }
0758   return result;
0759 }
0760 
0761 bool AdBlockClient::matches(const char* input, FilterOption contextOption,
0762     const char* contextDomain, Filter** matchedFilter,
0763     Filter** matchedExceptionFilter) {
0764   if (matchedFilter) {
0765     *matchedFilter = nullptr;
0766   }
0767   if (matchedExceptionFilter) {
0768     *matchedExceptionFilter = nullptr;
0769   }
0770   int inputLen = static_cast<int>(strlen(input));
0771 
0772   if (!isBlockableProtocol(input, inputLen)) {
0773       return false;
0774   }
0775 
0776   int inputHostLen;
0777   const char *inputHost = getUrlHost(input, &inputHostLen);
0778 
0779   int contextDomainLen = 0;
0780   if (contextDomain) {
0781     contextDomainLen = static_cast<int>(strlen(contextDomain));
0782   }
0783   // If neither first party nor third party was specified, try to figure it out
0784   if (contextDomain && !(contextOption & (FOThirdParty | FONotThirdParty))) {
0785     if (isThirdPartyHost(contextDomain, contextDomainLen,
0786         inputHost, static_cast<int>(inputHostLen))) {
0787       contextOption =
0788         static_cast<FilterOption>(contextOption | FOThirdParty);
0789     } else {
0790       contextOption =
0791         static_cast<FilterOption>(contextOption | FONotThirdParty);
0792     }
0793   }
0794 
0795   // Optimization for the manual filter checks which are needed.
0796   // Avoid having to check individual filters if the filter parts are not found
0797   // inside the input bloom filter.
0798   HashFn2Byte hashFns[] = { hashFn2Byte };
0799   BloomFilter inputBloomFilter(10, 1024, hashFns, 1);
0800   for (int i = 1; i < inputLen; i++) {
0801     inputBloomFilter.add(input + i - 1, 2);
0802   }
0803 
0804   // We always have to check noFingerprintFilters because the bloom filter opt
0805   // cannot be used for them
0806   bool hasMatch = false;
0807 
0808   // Only bother checking the no fingerprint domain related filters if needed
0809   if (!isNoFingerprintDomainHashSetMiss(
0810         noFingerprintDomainHashSet, contextDomain, contextDomainLen)) {
0811     hasMatch = hasMatch || hasMatchingFilters(noFingerprintDomainOnlyFilters,
0812         numNoFingerprintDomainOnlyFilters, input, inputLen, contextOption,
0813         contextDomain, &inputBloomFilter, inputHost, inputHostLen,
0814         matchedFilter);
0815   }
0816   if (isNoFingerprintDomainHashSetMiss(
0817         noFingerprintAntiDomainHashSet, contextDomain, contextDomainLen)) {
0818     hasMatch = hasMatch ||
0819       hasMatchingFilters(noFingerprintAntiDomainOnlyFilters,
0820         numNoFingerprintAntiDomainOnlyFilters, input, inputLen, contextOption,
0821         contextDomain, &inputBloomFilter, inputHost, inputHostLen,
0822         matchedFilter);
0823   }
0824 
0825   hasMatch = hasMatch || hasMatchingFilters(noFingerprintFilters,
0826       numNoFingerprintFilters, input, inputLen, contextOption,
0827       contextDomain, &inputBloomFilter, inputHost, inputHostLen,
0828       matchedFilter);
0829 
0830   // If no noFingerprintFilters were hit, check the bloom filter substring
0831   // fingerprint for the normal
0832   // filter list.   If no substring exists for the input then we know for sure
0833   // the URL should not be blocked.
0834   bool bloomFilterMiss = false;
0835   bool hostAnchoredHashSetMiss = false;
0836   if (!hasMatch) {
0837     bloomFilterMiss = bloomFilter
0838       && !bloomFilter->substringExists(input, AdBlockClient::kFingerprintSize);
0839     hostAnchoredHashSetMiss = isHostAnchoredHashSetMiss(input, inputLen,
0840         hostAnchoredHashSet, inputHost, inputHostLen,
0841         contextOption, contextDomain, matchedFilter);
0842     if (bloomFilterMiss && hostAnchoredHashSetMiss) {
0843       if (bloomFilterMiss) {
0844         numBloomFilterSaves++;
0845       }
0846       if (hostAnchoredHashSetMiss) {
0847         numHashSetSaves++;
0848       }
0849       return false;
0850     }
0851 
0852     hasMatch = !hostAnchoredHashSetMiss;
0853   }
0854 
0855   // We need to check the filters list manually because there is either a match
0856   // or a false positive
0857   if (!hasMatch && !bloomFilterMiss) {
0858     hasMatch = hasMatchingFilters(filters, numFilters, input, inputLen,
0859         contextOption, contextDomain, &inputBloomFilter,
0860         inputHost, inputHostLen, matchedFilter);
0861     // If there's still no match after checking the block filters, then no need
0862     // to try to block this because there is a false positive.
0863     if (!hasMatch) {
0864       numFalsePositives++;
0865       if (badFingerprintsHashSet) {
0866         // cout << "false positive for input: " << input << " bloomFilterMiss: "
0867         // << bloomFilterMiss << ", hostAnchoredHashSetMiss: "
0868         // << hostAnchoredHashSetMiss << endl;
0869         discoverMatchingPrefix(badFingerprintsHashSet, input, bloomFilter);
0870       }
0871       return false;
0872     }
0873   }
0874 
0875   bool hasExceptionMatch = false;
0876 
0877   // Only bother checking the no fingerprint domain related filters if needed
0878   if (!isNoFingerprintDomainHashSetMiss(
0879         noFingerprintDomainExceptionHashSet, contextDomain, contextDomainLen)) {
0880     hasExceptionMatch = hasExceptionMatch ||
0881       hasMatchingFilters(noFingerprintDomainOnlyExceptionFilters,
0882         numNoFingerprintDomainOnlyExceptionFilters, input, inputLen,
0883         contextOption, contextDomain, &inputBloomFilter, inputHost,
0884         inputHostLen, matchedExceptionFilter);
0885   }
0886 
0887   if (isNoFingerprintDomainHashSetMiss(
0888         noFingerprintAntiDomainExceptionHashSet, contextDomain,
0889         contextDomainLen)) {
0890     hasExceptionMatch = hasExceptionMatch ||
0891     hasMatchingFilters(noFingerprintAntiDomainOnlyExceptionFilters,
0892       numNoFingerprintAntiDomainOnlyExceptionFilters, input, inputLen,
0893       contextOption, contextDomain, &inputBloomFilter, inputHost, inputHostLen,
0894       matchedExceptionFilter);
0895   }
0896 
0897   hasExceptionMatch = hasExceptionMatch ||
0898     hasMatchingFilters(noFingerprintExceptionFilters,
0899       numNoFingerprintExceptionFilters, input, inputLen, contextOption,
0900       contextDomain, &inputBloomFilter, inputHost, inputHostLen,
0901       matchedExceptionFilter);
0902 
0903   // If there's a matching no fingerprint exception then we can just return
0904   // right away because we shouldn't block
0905   if (hasExceptionMatch) {
0906     return false;
0907   }
0908 
0909   bool bloomExceptionFilterMiss = exceptionBloomFilter
0910     && !exceptionBloomFilter->substringExists(input,
0911         AdBlockClient::kFingerprintSize);
0912   bool hostAnchoredExceptionHashSetMiss =
0913     isHostAnchoredHashSetMiss(input, inputLen, hostAnchoredExceptionHashSet,
0914         inputHost, inputHostLen, contextOption, contextDomain,
0915         matchedExceptionFilter);
0916 
0917   // Now that we have a matching rule, we should check if no exception rule
0918   // hits, if none hits, we should block
0919   if (bloomExceptionFilterMiss && hostAnchoredExceptionHashSetMiss) {
0920     if (bloomExceptionFilterMiss) {
0921       numExceptionBloomFilterSaves++;
0922     }
0923     if (hostAnchoredExceptionHashSetMiss) {
0924       numExceptionHashSetSaves++;
0925     }
0926     return true;
0927   }
0928 
0929   // If tehre wasn't an exception has set miss, it was a hit, and hash set is
0930   // deterministic so we shouldn't block this resource.
0931   if (!hostAnchoredExceptionHashSetMiss) {
0932     numExceptionHashSetSaves++;
0933     return false;
0934   }
0935 
0936   if (!bloomExceptionFilterMiss) {
0937     if (!hasMatchingFilters(exceptionFilters, numExceptionFilters, input,
0938           inputLen, contextOption, contextDomain,
0939           &inputBloomFilter, inputHost, inputHostLen,
0940           matchedExceptionFilter)) {
0941       // False positive on the exception filter list
0942       numExceptionFalsePositives++;
0943       // cout << "exception false positive for input: " << input << endl;
0944       if (badFingerprintsHashSet) {
0945         discoverMatchingPrefix(badFingerprintsHashSet,
0946             input, exceptionBloomFilter);
0947       }
0948       return true;
0949     }
0950   }
0951 
0952   return false;
0953 }
0954 
0955 /**
0956  * Obtains the first matching filter or nullptr, and if one is found, finds
0957  * the first matching exception filter or nullptr.
0958  *
0959  * @return true if the filter should be blocked
0960  */
0961 bool AdBlockClient::findMatchingFilters(const char *input,
0962     FilterOption contextOption,
0963     const char *contextDomain,
0964     Filter **matchingFilter,
0965     Filter **matchingExceptionFilter) {
0966   *matchingFilter = nullptr;
0967   *matchingExceptionFilter = nullptr;
0968   int inputLen = static_cast<int>(strlen(input));
0969   int inputHostLen;
0970   const char *inputHost = getUrlHost(input, &inputHostLen);
0971 
0972   int contextDomainLen = 0;
0973   if (contextDomain) {
0974     contextDomainLen = static_cast<int>(strlen(contextDomain));
0975   }
0976   // If neither first party nor third party was specified, try to figure it out
0977   if (contextDomain && !(contextOption & (FOThirdParty | FONotThirdParty))) {
0978     if (isThirdPartyHost(contextDomain, contextDomainLen,
0979         inputHost, static_cast<int>(inputHostLen))) {
0980       contextOption =
0981         static_cast<FilterOption>(contextOption | FOThirdParty);
0982     } else {
0983       contextOption =
0984         static_cast<FilterOption>(contextOption | FONotThirdParty);
0985     }
0986   }
0987 
0988   hasMatchingFilters(noFingerprintFilters,
0989     numNoFingerprintFilters, input, inputLen, contextOption,
0990     contextDomain, nullptr,
0991     inputHost, inputHostLen, matchingFilter);
0992 
0993   if (!*matchingFilter) {
0994     hasMatchingFilters(noFingerprintDomainOnlyFilters,
0995       numNoFingerprintDomainOnlyFilters, input, inputLen, contextOption,
0996       contextDomain, nullptr,
0997       inputHost, inputHostLen, matchingFilter);
0998   }
0999   if (!*matchingFilter) {
1000     hasMatchingFilters(noFingerprintAntiDomainOnlyFilters,
1001       numNoFingerprintAntiDomainOnlyFilters, input, inputLen, contextOption,
1002       contextDomain, nullptr,
1003       inputHost, inputHostLen, matchingFilter);
1004   }
1005 
1006   if (!*matchingFilter) {
1007     hasMatchingFilters(filters,
1008       numFilters, input, inputLen, contextOption,
1009       contextDomain, nullptr,
1010       inputHost, inputHostLen, matchingFilter);
1011   }
1012 
1013   if (!*matchingFilter) {
1014     isHostAnchoredHashSetMiss(input, inputLen,
1015       hostAnchoredHashSet, inputHost, inputHostLen,
1016       contextOption, contextDomain, matchingFilter);
1017   }
1018 
1019   if (!*matchingFilter) {
1020     return false;
1021   }
1022 
1023   hasMatchingFilters(noFingerprintExceptionFilters,
1024     numNoFingerprintExceptionFilters, input, inputLen, contextOption,
1025     contextDomain,
1026     nullptr, inputHost, inputHostLen, matchingExceptionFilter);
1027 
1028   if (!*matchingExceptionFilter) {
1029     hasMatchingFilters(noFingerprintDomainOnlyExceptionFilters,
1030       numNoFingerprintDomainOnlyExceptionFilters, input, inputLen,
1031       contextOption, contextDomain, nullptr, inputHost, inputHostLen,
1032       matchingExceptionFilter);
1033   }
1034 
1035   if (!*matchingExceptionFilter) {
1036     hasMatchingFilters(noFingerprintAntiDomainOnlyExceptionFilters,
1037       numNoFingerprintAntiDomainOnlyExceptionFilters, input, inputLen,
1038       contextOption, contextDomain, nullptr, inputHost, inputHostLen,
1039       matchingExceptionFilter);
1040   }
1041 
1042   if (!*matchingExceptionFilter) {
1043     isHostAnchoredHashSetMiss(input, inputLen, hostAnchoredExceptionHashSet,
1044         inputHost, inputHostLen, contextOption, contextDomain,
1045         matchingExceptionFilter);
1046   }
1047 
1048   if (!*matchingExceptionFilter) {
1049     hasMatchingFilters(exceptionFilters,
1050       numExceptionFilters, input, inputLen, contextOption,
1051       contextDomain,
1052       nullptr, inputHost, inputHostLen, matchingExceptionFilter);
1053   }
1054   return !*matchingExceptionFilter;
1055 }
1056 
1057 void AdBlockClient::initBloomFilter(BloomFilter **pp,
1058     const char *buffer, int len) {
1059   if (*pp) {
1060     delete *pp;
1061   }
1062   if (len > 0) {
1063     *pp = new BloomFilter(buffer, len);
1064   }
1065 }
1066 
1067 template<class T>
1068 bool AdBlockClient::initHashSet(HashSet<T> **pp, char *buffer, int len) {
1069   if (*pp) {
1070     delete *pp;
1071   }
1072   if (len > 0) {
1073     *pp = new HashSet<T>(0, false);
1074 
1075     return (*pp)->Deserialize(buffer, len);
1076   }
1077 
1078   return true;
1079 }
1080 
1081 void setFilterBorrowedMemory(Filter *filters, int numFilters) {
1082   for (int i = 0; i < numFilters; i++) {
1083     filters[i].borrowed_data = true;
1084   }
1085 }
1086 
1087 // Parses the filter data into a few collections of filters and enables
1088 // efficent querying.
1089 bool AdBlockClient::parse(const char *input, bool preserveRules) {
1090   // If the user is parsing and we have regex support,
1091   // then we can determine the fingerprints for the bloom filter.
1092   // Otherwise it needs to be done manually via initBloomFilter and
1093   // initExceptionBloomFilter
1094   if (!bloomFilter) {
1095     bloomFilter = new BloomFilter(15, 80000);
1096   }
1097   if (!exceptionBloomFilter) {
1098     exceptionBloomFilter = new BloomFilter(10, 20000);
1099   }
1100   if (!hostAnchoredHashSet) {
1101     // Optimized to be 1:1 with the easylist / easyprivacy
1102     // number of host anchored hosts.
1103     hostAnchoredHashSet = new HashSet<Filter>(18000, false);
1104   }
1105   if (!hostAnchoredExceptionHashSet) {
1106     // Optimized to be 1:1 with the easylist / easyprivacy
1107     // number of host anchored exception hosts.
1108     hostAnchoredExceptionHashSet = new HashSet<Filter>(2000, false);
1109   }
1110   if (!noFingerprintDomainHashSet) {
1111     noFingerprintDomainHashSet = new HashSet<NoFingerprintDomain>(1000, false);
1112   }
1113   if (!noFingerprintAntiDomainHashSet) {
1114     noFingerprintAntiDomainHashSet =
1115       new HashSet<NoFingerprintDomain>(100, false);
1116   }
1117   if (!noFingerprintDomainExceptionHashSet) {
1118     noFingerprintDomainExceptionHashSet =
1119       new HashSet<NoFingerprintDomain>(1000, false);
1120   }
1121   if (!noFingerprintAntiDomainExceptionHashSet) {
1122     noFingerprintAntiDomainExceptionHashSet =
1123       new HashSet<NoFingerprintDomain>(100, false);
1124   }
1125 
1126   const char *p = input;
1127   const char *lineStart = p;
1128 
1129   int newNumFilters = 0;
1130   int newNumCosmeticFilters = 0;
1131   int newNumHtmlFilters = 0;
1132   int newNumExceptionFilters = 0;
1133   int newNumNoFingerprintFilters = 0;
1134   int newNumNoFingerprintExceptionFilters = 0;
1135   int newNumNoFingerprintDomainOnlyFilters = 0;
1136   int newNumNoFingerprintAntiDomainOnlyFilters = 0;
1137   int newNumNoFingerprintDomainOnlyExceptionFilters = 0;
1138   int newNumNoFingerprintAntiDomainOnlyExceptionFilters = 0;
1139   int newNumHostAnchoredFilters = 0;
1140   int newNumHostAnchoredExceptionFilters = 0;
1141 
1142   // Simple cosmetic filters apply to all sites without exception
1143   HashSet<CosmeticFilter> simpleCosmeticFilters(1000, false);
1144 
1145   // Parsing does 2 passes, one just to determine the type of information we'll
1146   // need to setup.  Note that the library will be used on a variety of builds
1147   // so sometimes we won't even have STL So we can't use something like a vector
1148   // here.
1149   while (true) {
1150     if (isEndOfLine(*p) || *p == '\0') {
1151       Filter f;
1152       parseFilter(lineStart, p, &f);
1153       if (!f.hasUnsupportedOptions()) {
1154         switch (f.filterType & FTListTypesMask) {
1155           case FTException:
1156             if (f.filterType & FTHostOnly) {
1157               newNumHostAnchoredExceptionFilters++;
1158             } else if (AdBlockClient::getFingerprint(nullptr, f)) {
1159               newNumExceptionFilters++;
1160             } else if (f.isDomainOnlyFilter()) {
1161               newNumNoFingerprintDomainOnlyExceptionFilters++;
1162             } else if (f.isAntiDomainOnlyFilter()) {
1163               newNumNoFingerprintAntiDomainOnlyExceptionFilters++;
1164             } else {
1165               newNumNoFingerprintExceptionFilters++;
1166             }
1167             break;
1168           case FTElementHiding:
1169             newNumCosmeticFilters++;
1170             break;
1171           case FTElementHidingException:
1172             newNumCosmeticFilters++;
1173             break;
1174           case FTHTMLFiltering:
1175             newNumHtmlFilters++;
1176             break;
1177           case FTEmpty:
1178           case FTComment:
1179             // No need to store comments
1180             break;
1181           default:
1182             if (f.filterType & FTHostOnly) {
1183               newNumHostAnchoredFilters++;
1184             } else if (AdBlockClient::getFingerprint(nullptr, f)) {
1185               newNumFilters++;
1186             } else if (f.isDomainOnlyFilter()) {
1187               newNumNoFingerprintDomainOnlyFilters++;
1188             } else if (f.isAntiDomainOnlyFilter()) {
1189               newNumNoFingerprintAntiDomainOnlyFilters++;
1190             } else {
1191               newNumNoFingerprintFilters++;
1192             }
1193             break;
1194         }
1195       }
1196       lineStart = p + 1;
1197     }
1198 
1199     if (*p == '\0') {
1200       break;
1201     }
1202 
1203     p++;
1204   }
1205 
1206 #ifdef PERF_STATS
1207   cout << "Fingerprint size: " << AdBlockClient::kFingerprintSize << endl;
1208   cout << "Num new filters: " << newNumFilters << endl;
1209   cout << "Num new cosmetic filters: " << newNumCosmeticFilters << endl;
1210   cout << "Num new HTML filters: " << newNumHtmlFilters << endl;
1211   cout << "Num new exception filters: " << newNumExceptionFilters << endl;
1212   cout << "Num new no fingerprint filters: "
1213     << newNumNoFingerprintFilters << endl;
1214   cout << "Num new no fingerprint exception filters: "
1215     << newNumNoFingerprintExceptionFilters << endl;
1216   cout << "Num new host anchored filters: "
1217     << newNumHostAnchoredFilters << endl;
1218   cout << "Num new host anchored exception filters: "
1219     << newNumHostAnchoredExceptionFilters << endl;
1220   cout << "Num new no fingerprint domain only filters: "
1221     << newNumNoFingerprintDomainOnlyFilters << endl;
1222   cout << "Num new no fingerprint anti-domain only filters: "
1223     << newNumNoFingerprintAntiDomainOnlyFilters << endl;
1224   cout << "Num new no fingerprint domain only exception filters: "
1225     << newNumNoFingerprintDomainOnlyExceptionFilters << endl;
1226   cout << "Num new no fingerprint anti-domain only exception filters: "
1227     << newNumNoFingerprintAntiDomainOnlyExceptionFilters << endl;
1228 #endif
1229 
1230   Filter *newFilters = new Filter[newNumFilters + numFilters];
1231   Filter *newCosmeticFilters =
1232     new Filter[newNumCosmeticFilters + numCosmeticFilters];
1233   Filter *newHtmlFilters =
1234     new Filter[newNumHtmlFilters + numHtmlFilters];
1235   Filter *newExceptionFilters =
1236     new Filter[newNumExceptionFilters + numExceptionFilters];
1237   Filter *newNoFingerprintFilters =
1238     new Filter[newNumNoFingerprintFilters + numNoFingerprintFilters];
1239   Filter *newNoFingerprintExceptionFilters =
1240     new Filter[newNumNoFingerprintExceptionFilters
1241     + numNoFingerprintExceptionFilters];
1242   Filter *newNoFingerprintDomainOnlyFilters =
1243     new Filter[newNumNoFingerprintDomainOnlyFilters +
1244     numNoFingerprintDomainOnlyFilters];
1245   Filter *newNoFingerprintAntiDomainOnlyFilters =
1246     new Filter[newNumNoFingerprintAntiDomainOnlyFilters +
1247     numNoFingerprintAntiDomainOnlyFilters];
1248   Filter *newNoFingerprintDomainOnlyExceptionFilters =
1249     new Filter[newNumNoFingerprintDomainOnlyExceptionFilters
1250     + numNoFingerprintDomainOnlyExceptionFilters];
1251   Filter *newNoFingerprintAntiDomainOnlyExceptionFilters =
1252     new Filter[newNumNoFingerprintAntiDomainOnlyExceptionFilters
1253     + numNoFingerprintAntiDomainOnlyExceptionFilters];
1254 
1255   Filter *curFilters = newFilters;
1256   Filter *curCosmeticFilters = newCosmeticFilters;
1257   Filter *curHtmlFilters = newHtmlFilters;
1258   Filter *curExceptionFilters = newExceptionFilters;
1259   Filter *curNoFingerprintFilters = newNoFingerprintFilters;
1260   Filter *curNoFingerprintExceptionFilters = newNoFingerprintExceptionFilters;
1261   Filter *curNoFingerprintDomainOnlyFilters = newNoFingerprintDomainOnlyFilters;
1262   Filter *curNoFingerprintAntiDomainOnlyFilters =
1263     newNoFingerprintAntiDomainOnlyFilters;
1264   Filter *curNoFingerprintDomainOnlyExceptionFilters =
1265     newNoFingerprintDomainOnlyExceptionFilters;
1266   Filter *curNoFingerprintAntiDomainOnlyExceptionFilters =
1267     newNoFingerprintAntiDomainOnlyExceptionFilters;
1268 
1269   // If we've had a parse before copy the old data into the new data structure
1270   if (filters || cosmeticFilters || htmlFilters || exceptionFilters ||
1271       noFingerprintFilters || noFingerprintExceptionFilters ||
1272       noFingerprintDomainOnlyFilters ||
1273       noFingerprintDomainOnlyExceptionFilters ||
1274       noFingerprintAntiDomainOnlyFilters ||
1275       noFingerprintAntiDomainOnlyExceptionFilters) {
1276     // Copy the old data in, we can't simply use memcpy here
1277     // since filtres manages some pointers that get deleted.
1278     for (int i = 0; i < numFilters; i++) {
1279       newFilters[i].swapData(&(filters[i]));
1280     }
1281     for (int i = 0; i < numCosmeticFilters; i++) {
1282       newCosmeticFilters[i].swapData(&(cosmeticFilters[i]));
1283     }
1284     for (int i = 0; i < numHtmlFilters; i++) {
1285       newHtmlFilters[i].swapData(&(htmlFilters[i]));
1286     }
1287     for (int i = 0; i < numExceptionFilters; i++) {
1288       newExceptionFilters[i].swapData(&(exceptionFilters[i]));
1289     }
1290     for (int i = 0; i < numNoFingerprintFilters; i++) {
1291       newNoFingerprintFilters[i].swapData(&(noFingerprintFilters[i]));
1292     }
1293     for (int i = 0; i < numNoFingerprintExceptionFilters; i++) {
1294       newNoFingerprintExceptionFilters[i].swapData(
1295         &(noFingerprintExceptionFilters[i]));
1296     }
1297     for (int i = 0; i < numNoFingerprintDomainOnlyFilters; i++) {
1298       newNoFingerprintDomainOnlyFilters[i].swapData(
1299         &(noFingerprintDomainOnlyFilters[i]));
1300     }
1301     for (int i = 0; i < numNoFingerprintAntiDomainOnlyFilters; i++) {
1302       newNoFingerprintAntiDomainOnlyFilters[i].swapData(
1303         &(noFingerprintAntiDomainOnlyFilters[i]));
1304     }
1305     for (int i = 0; i < numNoFingerprintDomainOnlyExceptionFilters; i++) {
1306       newNoFingerprintDomainOnlyExceptionFilters[i].swapData(
1307         &(noFingerprintDomainOnlyExceptionFilters[i]));
1308     }
1309     for (int i = 0; i < numNoFingerprintAntiDomainOnlyExceptionFilters; i++) {
1310       newNoFingerprintAntiDomainOnlyExceptionFilters[i].swapData(
1311         &(noFingerprintAntiDomainOnlyExceptionFilters[i]));
1312     }
1313 
1314     // Free up the old memory for filter storage
1315     // Set the old filter lists borrwedMemory to true since it'll be taken by
1316     // the new filters.
1317     setFilterBorrowedMemory(filters, numFilters);
1318     setFilterBorrowedMemory(cosmeticFilters, numCosmeticFilters);
1319     setFilterBorrowedMemory(htmlFilters, numHtmlFilters);
1320     setFilterBorrowedMemory(exceptionFilters, numExceptionFilters);
1321     setFilterBorrowedMemory(noFingerprintFilters, numNoFingerprintFilters);
1322     setFilterBorrowedMemory(noFingerprintExceptionFilters,
1323         numNoFingerprintExceptionFilters);
1324     setFilterBorrowedMemory(noFingerprintDomainOnlyFilters,
1325         numNoFingerprintDomainOnlyFilters);
1326     setFilterBorrowedMemory(noFingerprintAntiDomainOnlyFilters,
1327         numNoFingerprintAntiDomainOnlyFilters);
1328     setFilterBorrowedMemory(noFingerprintDomainOnlyExceptionFilters,
1329         numNoFingerprintDomainOnlyExceptionFilters);
1330     setFilterBorrowedMemory(noFingerprintAntiDomainOnlyExceptionFilters,
1331         numNoFingerprintAntiDomainOnlyExceptionFilters);
1332     delete[] filters;
1333     delete[] cosmeticFilters;
1334     delete[] htmlFilters;
1335     delete[] exceptionFilters;
1336     delete[] noFingerprintFilters;
1337     delete[] noFingerprintExceptionFilters;
1338     delete[] noFingerprintDomainOnlyFilters;
1339     delete[] noFingerprintAntiDomainOnlyFilters;
1340     delete[] noFingerprintDomainOnlyExceptionFilters;
1341     delete[] noFingerprintAntiDomainOnlyExceptionFilters;
1342 
1343     // Adjust the current pointers to be just after the copied in data
1344     curFilters += numFilters;
1345     curCosmeticFilters += numCosmeticFilters;
1346     curHtmlFilters += numHtmlFilters;
1347     curExceptionFilters += numExceptionFilters;
1348     curNoFingerprintFilters += numNoFingerprintFilters;
1349     curNoFingerprintExceptionFilters += numNoFingerprintExceptionFilters;
1350     curNoFingerprintDomainOnlyFilters += numNoFingerprintDomainOnlyFilters;
1351     curNoFingerprintAntiDomainOnlyFilters +=
1352       numNoFingerprintAntiDomainOnlyFilters;
1353     curNoFingerprintDomainOnlyExceptionFilters +=
1354       numNoFingerprintDomainOnlyExceptionFilters;
1355     curNoFingerprintAntiDomainOnlyExceptionFilters +=
1356       numNoFingerprintAntiDomainOnlyExceptionFilters;
1357   }
1358 
1359   // And finally update with the new counts
1360   numFilters += newNumFilters;
1361   numCosmeticFilters += newNumCosmeticFilters;
1362   numHtmlFilters += newNumHtmlFilters;
1363   numExceptionFilters += newNumExceptionFilters;
1364   numNoFingerprintFilters += newNumNoFingerprintFilters;
1365   numNoFingerprintExceptionFilters += newNumNoFingerprintExceptionFilters;
1366   numNoFingerprintDomainOnlyFilters += newNumNoFingerprintDomainOnlyFilters;
1367   numNoFingerprintAntiDomainOnlyFilters +=
1368       newNumNoFingerprintAntiDomainOnlyFilters;
1369   numNoFingerprintDomainOnlyExceptionFilters +=
1370       newNumNoFingerprintDomainOnlyExceptionFilters;
1371   numNoFingerprintAntiDomainOnlyExceptionFilters +=
1372       newNumNoFingerprintAntiDomainOnlyExceptionFilters;
1373   numHostAnchoredFilters += newNumHostAnchoredFilters;
1374   numHostAnchoredExceptionFilters += newNumHostAnchoredExceptionFilters;
1375 
1376   // Adjust the new member list pointers
1377   filters = newFilters;
1378   cosmeticFilters = newCosmeticFilters;
1379   htmlFilters = newHtmlFilters;
1380   exceptionFilters = newExceptionFilters;
1381   noFingerprintFilters = newNoFingerprintFilters;
1382   noFingerprintExceptionFilters = newNoFingerprintExceptionFilters;
1383   noFingerprintDomainOnlyFilters = newNoFingerprintDomainOnlyFilters;
1384   noFingerprintAntiDomainOnlyFilters = newNoFingerprintAntiDomainOnlyFilters;
1385   noFingerprintDomainOnlyExceptionFilters =
1386     newNoFingerprintDomainOnlyExceptionFilters;
1387   noFingerprintAntiDomainOnlyExceptionFilters =
1388     newNoFingerprintAntiDomainOnlyExceptionFilters;
1389 
1390   p = input;
1391   lineStart = p;
1392 
1393   while (true) {
1394     if (isEndOfLine(*p) || *p == '\0') {
1395       Filter f;
1396       parseFilter(lineStart, p, &f, bloomFilter, exceptionBloomFilter,
1397           hostAnchoredHashSet,
1398           hostAnchoredExceptionHashSet,
1399           &simpleCosmeticFilters,
1400           preserveRules);
1401       if (!f.hasUnsupportedOptions()) {
1402         switch (f.filterType & FTListTypesMask) {
1403           case FTException:
1404             if (f.filterType & FTHostOnly) {
1405               // do nothing, handled by hash set.
1406             } else if (AdBlockClient::getFingerprint(nullptr, f)) {
1407               (*curExceptionFilters).swapData(&f);
1408               curExceptionFilters++;
1409             } else if (f.isDomainOnlyFilter()) {
1410               AddFilterDomainsToHashSet(&f,
1411                   noFingerprintDomainExceptionHashSet);
1412               (*curNoFingerprintDomainOnlyExceptionFilters).swapData(&f);
1413               curNoFingerprintDomainOnlyExceptionFilters++;
1414             } else if (f.isAntiDomainOnlyFilter()) {
1415               AddFilterDomainsToHashSet(&f,
1416                   noFingerprintAntiDomainExceptionHashSet);
1417               (*curNoFingerprintAntiDomainOnlyExceptionFilters).swapData(&f);
1418               curNoFingerprintAntiDomainOnlyExceptionFilters++;
1419             } else {
1420               (*curNoFingerprintExceptionFilters).swapData(&f);
1421               curNoFingerprintExceptionFilters++;
1422             }
1423             break;
1424           case FTElementHiding:
1425           case FTElementHidingException:
1426             (*curCosmeticFilters).swapData(&f);
1427             curCosmeticFilters++;
1428             break;
1429           case FTHTMLFiltering:
1430             (*curHtmlFilters).swapData(&f);
1431             curHtmlFilters++;
1432             break;
1433           case FTEmpty:
1434           case FTComment:
1435             // No need to store
1436             break;
1437           default:
1438             if (f.filterType & FTHostOnly) {
1439               // Do nothing
1440             } else if (AdBlockClient::getFingerprint(nullptr, f)) {
1441               (*curFilters).swapData(&f);
1442               curFilters++;
1443             } else if (f.isDomainOnlyFilter()) {
1444               AddFilterDomainsToHashSet(&f,
1445                   noFingerprintDomainHashSet);
1446               (*curNoFingerprintDomainOnlyFilters).swapData(&f);
1447               curNoFingerprintDomainOnlyFilters++;
1448             } else if (f.isAntiDomainOnlyFilter()) {
1449               AddFilterDomainsToHashSet(&f,
1450                   noFingerprintAntiDomainHashSet);
1451               (*curNoFingerprintAntiDomainOnlyFilters).swapData(&f);
1452               curNoFingerprintAntiDomainOnlyFilters++;
1453             } else {
1454               (*curNoFingerprintFilters).swapData(&f);
1455               curNoFingerprintFilters++;
1456             }
1457             break;
1458         }
1459       }
1460       lineStart = p + 1;
1461     }
1462 
1463     if (*p == '\0') {
1464       break;
1465     }
1466 
1467     p++;
1468   }
1469 
1470 #ifdef PERF_STATS
1471   cout << "Simple cosmetic filter size: "
1472     << simpleCosmeticFilters.GetSize() << endl;
1473 #endif
1474 
1475   return true;
1476 }
1477 
1478 void AdBlockClient::addTag(const std::string &tag) {
1479   if (tags.find(tag) == tags.end()) {
1480     tags.insert(tag);
1481   }
1482 }
1483 
1484 void AdBlockClient::removeTag(const std::string &tag) {
1485   auto it = tags.find(tag);
1486   if (it != tags.end()) {
1487     tags.erase(it);
1488   }
1489 }
1490 
1491 bool AdBlockClient::tagExists(const std::string &tag) const {
1492   return tags.find(tag) != tags.end();
1493 }
1494 
1495 // Fills the specified buffer if specified, returns the number of characters
1496 // written or needed
1497 int serializeFilters(char * buffer, size_t bufferSizeAvail,
1498     Filter *f, int numFilters) {
1499   char sz[256];
1500   int bufferSize = 0;
1501   for (int i = 0; i < numFilters; i++) {
1502     int sprintfLen = snprintf(sz, sizeof(sz), "%x,%x,%x",
1503         static_cast<int>(f->filterType), static_cast<int>(f->filterOption),
1504         static_cast<int>(f->antiFilterOption));
1505     if (buffer) {
1506       snprintf(buffer + bufferSize, bufferSizeAvail, "%s", sz);
1507     }
1508     bufferSize += sprintfLen;
1509     // Extra null termination
1510     bufferSize++;
1511 
1512     if (f->data) {
1513       if (buffer) {
1514         snprintf(buffer + bufferSize, bufferSizeAvail, "%s", f->data);
1515       }
1516       bufferSize += static_cast<int>(strlen(f->data));
1517     }
1518     bufferSize++;
1519 
1520     if (f->tagLen > 0) {
1521       if (buffer) {
1522         buffer[bufferSize] = '~';
1523         buffer[bufferSize + 1] = '#';
1524         memcpy(buffer + bufferSize + 2, f->tag, f->tagLen);
1525         buffer[bufferSize + 2 + f->tagLen] = ',';
1526       }
1527       bufferSize += f->tagLen + 3;
1528     }
1529 
1530     if (f->domainList) {
1531       if (buffer) {
1532         snprintf(buffer + bufferSize, bufferSizeAvail, "%s", f->domainList);
1533       }
1534       bufferSize += static_cast<int>(strlen(f->domainList));
1535     }
1536     // Extra null termination
1537     bufferSize++;
1538     if (f->host) {
1539       if (buffer) {
1540         snprintf(buffer + bufferSize, bufferSizeAvail, "%s", f->host);
1541       }
1542       bufferSize += static_cast<int>(strlen(f->host));
1543     }
1544     // Extra null termination
1545     bufferSize++;
1546     f++;
1547   }
1548   return bufferSize;
1549 }
1550 
1551 // Returns a newly allocated buffer, caller must manually delete[] the buffer
1552 char * AdBlockClient::serialize(int *totalSize,
1553     bool ignoreCosmeticFilters,
1554     bool ignoreHtmlFilters) {
1555   *totalSize = 0;
1556   int adjustedNumCosmeticFilters =
1557     ignoreCosmeticFilters ? 0 : numCosmeticFilters;
1558   int adjustedNumHtmlFilters = ignoreHtmlFilters ? 0 : numHtmlFilters;
1559 
1560   uint32_t hostAnchoredHashSetSize = 0;
1561   char *hostAnchoredHashSetBuffer = nullptr;
1562   if (hostAnchoredHashSet) {
1563     hostAnchoredHashSetBuffer =
1564       hostAnchoredHashSet->Serialize(&hostAnchoredHashSetSize);
1565   }
1566 
1567   uint32_t hostAnchoredExceptionHashSetSize = 0;
1568   char *hostAnchoredExceptionHashSetBuffer = nullptr;
1569   if (hostAnchoredExceptionHashSet) {
1570     hostAnchoredExceptionHashSetBuffer =
1571       hostAnchoredExceptionHashSet->Serialize(
1572           &hostAnchoredExceptionHashSetSize);
1573   }
1574 
1575   uint32_t noFingerprintDomainHashSetSize = 0;
1576   char *noFingerprintDomainHashSetBuffer = nullptr;
1577   if (noFingerprintDomainHashSet) {
1578     noFingerprintDomainHashSetBuffer =
1579       noFingerprintDomainHashSet->Serialize(&noFingerprintDomainHashSetSize);
1580   }
1581 
1582   uint32_t noFingerprintAntiDomainHashSetSize = 0;
1583   char *noFingerprintAntiDomainHashSetBuffer = nullptr;
1584   if (noFingerprintAntiDomainHashSet) {
1585     noFingerprintAntiDomainHashSetBuffer =
1586       noFingerprintAntiDomainHashSet->Serialize(
1587           &noFingerprintAntiDomainHashSetSize);
1588   }
1589 
1590   uint32_t noFingerprintDomainExceptionHashSetSize = 0;
1591   char *noFingerprintDomainExceptionHashSetBuffer = nullptr;
1592   if (noFingerprintDomainExceptionHashSet) {
1593     noFingerprintDomainExceptionHashSetBuffer =
1594       noFingerprintDomainExceptionHashSet->Serialize(
1595           &noFingerprintDomainExceptionHashSetSize);
1596   }
1597 
1598   uint32_t noFingerprintAntiDomainExceptionHashSetSize = 0;
1599   char *noFingerprintAntiDomainExceptionHashSetBuffer = nullptr;
1600   if (noFingerprintAntiDomainExceptionHashSet) {
1601     noFingerprintAntiDomainExceptionHashSetBuffer =
1602       noFingerprintAntiDomainExceptionHashSet->Serialize(
1603           &noFingerprintAntiDomainExceptionHashSetSize);
1604   }
1605 
1606   // Get the number of bytes that we'll need
1607   char sz[512];
1608   *totalSize += 1 + snprintf(sz, sizeof(sz),
1609       "%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x",
1610       numFilters,
1611       numExceptionFilters, adjustedNumCosmeticFilters, adjustedNumHtmlFilters,
1612       numNoFingerprintFilters, numNoFingerprintExceptionFilters,
1613       numNoFingerprintDomainOnlyFilters,
1614       numNoFingerprintAntiDomainOnlyFilters,
1615       numNoFingerprintDomainOnlyExceptionFilters,
1616       numNoFingerprintAntiDomainOnlyExceptionFilters,
1617       numHostAnchoredFilters, numHostAnchoredExceptionFilters,
1618       bloomFilter ? bloomFilter->getByteBufferSize() : 0, exceptionBloomFilter
1619         ? exceptionBloomFilter->getByteBufferSize() : 0,
1620         hostAnchoredHashSetSize, hostAnchoredExceptionHashSetSize,
1621         noFingerprintDomainHashSetSize,
1622         noFingerprintAntiDomainHashSetSize,
1623         noFingerprintDomainExceptionHashSetSize,
1624         noFingerprintAntiDomainExceptionHashSetSize);
1625   *totalSize += serializeFilters(nullptr, 0, filters, numFilters) +
1626     serializeFilters(nullptr, 0, exceptionFilters, numExceptionFilters) +
1627     serializeFilters(nullptr, 0, cosmeticFilters, adjustedNumCosmeticFilters) +
1628     serializeFilters(nullptr, 0, htmlFilters, adjustedNumHtmlFilters) +
1629     serializeFilters(nullptr, 0,
1630         noFingerprintFilters, numNoFingerprintFilters) +
1631     serializeFilters(nullptr, 0, noFingerprintExceptionFilters,
1632         numNoFingerprintExceptionFilters) +
1633     serializeFilters(nullptr, 0,
1634         noFingerprintDomainOnlyFilters, numNoFingerprintDomainOnlyFilters) +
1635     serializeFilters(nullptr, 0,
1636         noFingerprintAntiDomainOnlyFilters,
1637         numNoFingerprintAntiDomainOnlyFilters) +
1638     serializeFilters(nullptr, 0, noFingerprintDomainOnlyExceptionFilters,
1639         numNoFingerprintDomainOnlyExceptionFilters) +
1640     serializeFilters(nullptr, 0, noFingerprintAntiDomainOnlyExceptionFilters,
1641         numNoFingerprintAntiDomainOnlyExceptionFilters);
1642 
1643   *totalSize += bloomFilter ? bloomFilter->getByteBufferSize() : 0;
1644   *totalSize += exceptionBloomFilter
1645     ? exceptionBloomFilter->getByteBufferSize() : 0;
1646   *totalSize += hostAnchoredHashSetSize;
1647   *totalSize += hostAnchoredExceptionHashSetSize;
1648   *totalSize += noFingerprintDomainHashSetSize;
1649   *totalSize += noFingerprintAntiDomainHashSetSize;
1650   *totalSize += noFingerprintDomainExceptionHashSetSize;
1651   *totalSize += noFingerprintAntiDomainExceptionHashSetSize;
1652 
1653   // Allocate it
1654   int pos = 0;
1655   char *buffer = new char[*totalSize];
1656   memset(buffer, 0, *totalSize);
1657 
1658   // And start copying stuff in
1659   snprintf(buffer, *totalSize, "%s", sz);
1660   pos += static_cast<int>(strlen(sz)) + 1;
1661   pos += serializeFilters(buffer + pos, *totalSize - pos, filters, numFilters);
1662   pos += serializeFilters(buffer + pos, *totalSize - pos,
1663       exceptionFilters, numExceptionFilters);
1664   pos += serializeFilters(buffer + pos, *totalSize - pos, cosmeticFilters,
1665       adjustedNumCosmeticFilters);
1666   pos += serializeFilters(buffer + pos, *totalSize - pos, htmlFilters,
1667       adjustedNumHtmlFilters);
1668   pos += serializeFilters(buffer + pos, *totalSize - pos, noFingerprintFilters,
1669       numNoFingerprintFilters);
1670   pos += serializeFilters(buffer + pos, *totalSize - pos,
1671       noFingerprintExceptionFilters, numNoFingerprintExceptionFilters);
1672   pos += serializeFilters(buffer + pos, *totalSize - pos,
1673       noFingerprintDomainOnlyFilters,
1674       numNoFingerprintDomainOnlyFilters);
1675   pos += serializeFilters(buffer + pos, *totalSize - pos,
1676       noFingerprintAntiDomainOnlyFilters,
1677       numNoFingerprintAntiDomainOnlyFilters);
1678   pos += serializeFilters(buffer + pos, *totalSize - pos,
1679       noFingerprintDomainOnlyExceptionFilters,
1680       numNoFingerprintDomainOnlyExceptionFilters);
1681   pos += serializeFilters(buffer + pos, *totalSize - pos,
1682       noFingerprintAntiDomainOnlyExceptionFilters,
1683       numNoFingerprintAntiDomainOnlyExceptionFilters);
1684 
1685   if (bloomFilter) {
1686     memcpy(buffer + pos, bloomFilter->getBuffer(),
1687         bloomFilter->getByteBufferSize());
1688     pos += bloomFilter->getByteBufferSize();
1689   }
1690   if (exceptionBloomFilter) {
1691     memcpy(buffer + pos, exceptionBloomFilter->getBuffer(),
1692         exceptionBloomFilter->getByteBufferSize());
1693     pos += exceptionBloomFilter->getByteBufferSize();
1694   }
1695   if (hostAnchoredHashSet) {
1696     memcpy(buffer + pos, hostAnchoredHashSetBuffer, hostAnchoredHashSetSize);
1697     pos += hostAnchoredHashSetSize;
1698     delete[] hostAnchoredHashSetBuffer;
1699   }
1700   if (hostAnchoredExceptionHashSet) {
1701     memcpy(buffer + pos, hostAnchoredExceptionHashSetBuffer,
1702         hostAnchoredExceptionHashSetSize);
1703     pos += hostAnchoredExceptionHashSetSize;
1704     delete[] hostAnchoredExceptionHashSetBuffer;
1705   }
1706   if (noFingerprintDomainHashSet) {
1707     memcpy(buffer + pos, noFingerprintDomainHashSetBuffer,
1708         noFingerprintDomainHashSetSize);
1709     pos += noFingerprintDomainHashSetSize;
1710     delete[] noFingerprintDomainHashSetBuffer;
1711   }
1712   if (noFingerprintAntiDomainHashSet) {
1713     memcpy(buffer + pos, noFingerprintAntiDomainHashSetBuffer,
1714         noFingerprintAntiDomainHashSetSize);
1715     pos += noFingerprintAntiDomainHashSetSize;
1716     delete[] noFingerprintAntiDomainHashSetBuffer;
1717   }
1718   if (noFingerprintDomainExceptionHashSet) {
1719     memcpy(buffer + pos, noFingerprintDomainExceptionHashSetBuffer,
1720         noFingerprintDomainExceptionHashSetSize);
1721     pos += noFingerprintDomainExceptionHashSetSize;
1722     delete[] noFingerprintDomainExceptionHashSetBuffer;
1723   }
1724   if (noFingerprintAntiDomainExceptionHashSet) {
1725     memcpy(buffer + pos, noFingerprintAntiDomainExceptionHashSetBuffer,
1726         noFingerprintAntiDomainExceptionHashSetSize);
1727     pos += noFingerprintAntiDomainExceptionHashSetSize;
1728     delete[] noFingerprintAntiDomainExceptionHashSetBuffer;
1729   }
1730 
1731   return buffer;
1732 }
1733 
1734 // Fills the specified buffer if specified, returns the number of characters
1735 // written or needed
1736 int deserializeFilters(char *buffer, Filter *f, int numFilters) {
1737   int pos = 0;
1738   for (int i = 0; i < numFilters; i++) {
1739     f->borrowed_data = true;
1740     sscanf(buffer + pos, "%x,%x,%x",
1741         reinterpret_cast<unsigned int*>(&f->filterType),
1742         reinterpret_cast<unsigned int*>(&f->filterOption),
1743         reinterpret_cast<unsigned int*>(&f->antiFilterOption));
1744     pos += static_cast<int>(strlen(buffer + pos)) + 1;
1745 
1746     if (*(buffer + pos) == '\0') {
1747       f->data = nullptr;
1748     } else {
1749       f->data = buffer + pos;
1750       pos += static_cast<int>(strlen(f->data));
1751     }
1752     pos++;
1753 
1754     // If the domain section starts with a # then we're in a tag
1755     // block.
1756     if (buffer[pos] == '~' && buffer[pos + 1] == '#') {
1757       pos += 2;
1758       f->tag = buffer + pos;
1759       f->tagLen = 0;
1760       while (buffer[pos + f->tagLen] != '\0') {
1761         if (buffer[pos + f->tagLen] == ',') {
1762           pos += f->tagLen + 1;
1763           break;
1764         }
1765         f->tagLen++;
1766       }
1767     }
1768 
1769     if (*(buffer + pos) == '\0') {
1770       f->domainList = nullptr;
1771     } else {
1772       f->domainList = buffer + pos;
1773       pos += static_cast<int>(strlen(f->domainList));
1774     }
1775     pos++;
1776 
1777     if (*(buffer + pos) == '\0') {
1778       f->host = nullptr;
1779     } else {
1780       f->host = buffer + pos;
1781       pos += static_cast<int>(strlen(f->host));
1782     }
1783     pos++;
1784     f++;
1785   }
1786   return pos;
1787 }
1788 
1789 bool AdBlockClient::deserialize(char *buffer) {
1790   clear();
1791   deserializedBuffer = buffer;
1792   int bloomFilterSize = 0, exceptionBloomFilterSize = 0,
1793       hostAnchoredHashSetSize = 0, hostAnchoredExceptionHashSetSize = 0,
1794       noFingerprintDomainHashSetSize = 0,
1795       noFingerprintAntiDomainHashSetSize = 0,
1796       noFingerprintDomainExceptionHashSetSize = 0,
1797       noFingerprintAntiDomainExceptionHashSetSize = 0;
1798   int pos = 0;
1799   sscanf(buffer + pos,
1800       "%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x",
1801      (unsigned int*)&numFilters,
1802       (unsigned int*)&numExceptionFilters, (unsigned int*)&numCosmeticFilters, (unsigned int*)&numHtmlFilters,
1803       (unsigned int*)&numNoFingerprintFilters, (unsigned int*)&numNoFingerprintExceptionFilters,
1804       (unsigned int*)&numNoFingerprintDomainOnlyFilters,
1805       (unsigned int*)&numNoFingerprintAntiDomainOnlyFilters,
1806       (unsigned int*)&numNoFingerprintDomainOnlyExceptionFilters,
1807       (unsigned int*)&numNoFingerprintAntiDomainOnlyExceptionFilters,
1808       (unsigned int*)&numHostAnchoredFilters, (unsigned int*)&numHostAnchoredExceptionFilters,
1809       (unsigned int*)&bloomFilterSize, (unsigned int*)&exceptionBloomFilterSize,
1810       (unsigned int*)&hostAnchoredHashSetSize, (unsigned int*)&hostAnchoredExceptionHashSetSize,
1811       (unsigned int*)&noFingerprintDomainHashSetSize,
1812       (unsigned int*)&noFingerprintAntiDomainHashSetSize,
1813       (unsigned int*)&noFingerprintDomainExceptionHashSetSize,
1814       (unsigned int*)&noFingerprintAntiDomainExceptionHashSetSize);
1815   pos += static_cast<int>(strlen(buffer + pos)) + 1;
1816 
1817   filters = new Filter[numFilters];
1818   exceptionFilters = new Filter[numExceptionFilters];
1819   cosmeticFilters = new Filter[numCosmeticFilters];
1820   htmlFilters = new Filter[numHtmlFilters];
1821   noFingerprintFilters = new Filter[numNoFingerprintFilters];
1822   noFingerprintExceptionFilters = new Filter[numNoFingerprintExceptionFilters];
1823   noFingerprintDomainOnlyFilters =
1824     new Filter[numNoFingerprintDomainOnlyFilters];
1825   noFingerprintAntiDomainOnlyFilters =
1826     new Filter[numNoFingerprintAntiDomainOnlyFilters];
1827   noFingerprintDomainOnlyExceptionFilters =
1828     new Filter[numNoFingerprintDomainOnlyExceptionFilters];
1829   noFingerprintAntiDomainOnlyExceptionFilters =
1830     new Filter[numNoFingerprintAntiDomainOnlyExceptionFilters];
1831 
1832   pos += deserializeFilters(buffer + pos, filters, numFilters);
1833   pos += deserializeFilters(buffer + pos,
1834       exceptionFilters, numExceptionFilters);
1835   pos += deserializeFilters(buffer + pos,
1836       cosmeticFilters, numCosmeticFilters);
1837   pos += deserializeFilters(buffer + pos,
1838       htmlFilters, numHtmlFilters);
1839   pos += deserializeFilters(buffer + pos,
1840       noFingerprintFilters, numNoFingerprintFilters);
1841   pos += deserializeFilters(buffer + pos,
1842       noFingerprintExceptionFilters, numNoFingerprintExceptionFilters);
1843 
1844   pos += deserializeFilters(buffer + pos,
1845       noFingerprintDomainOnlyFilters, numNoFingerprintDomainOnlyFilters);
1846   pos += deserializeFilters(buffer + pos,
1847       noFingerprintAntiDomainOnlyFilters,
1848       numNoFingerprintAntiDomainOnlyFilters);
1849   pos += deserializeFilters(buffer + pos,
1850       noFingerprintDomainOnlyExceptionFilters,
1851       numNoFingerprintDomainOnlyExceptionFilters);
1852   pos += deserializeFilters(buffer + pos,
1853       noFingerprintAntiDomainOnlyExceptionFilters,
1854       numNoFingerprintAntiDomainOnlyExceptionFilters);
1855 
1856   initBloomFilter(&bloomFilter, buffer + pos, bloomFilterSize);
1857   pos += bloomFilterSize;
1858   initBloomFilter(&exceptionBloomFilter,
1859       buffer + pos, exceptionBloomFilterSize);
1860   pos += exceptionBloomFilterSize;
1861   if (!initHashSet(&hostAnchoredHashSet,
1862         buffer + pos, hostAnchoredHashSetSize)) {
1863       return false;
1864   }
1865   pos += hostAnchoredHashSetSize;
1866   if (!initHashSet(&hostAnchoredExceptionHashSet,
1867         buffer + pos, hostAnchoredExceptionHashSetSize)) {
1868       return false;
1869   }
1870   pos += hostAnchoredExceptionHashSetSize;
1871 
1872 
1873   if (!initHashSet(&noFingerprintDomainHashSet,
1874         buffer + pos, noFingerprintDomainHashSetSize)) {
1875       return false;
1876   }
1877   pos += noFingerprintDomainHashSetSize;
1878 
1879   if (!initHashSet(&noFingerprintAntiDomainHashSet,
1880         buffer + pos, noFingerprintAntiDomainHashSetSize)) {
1881       return false;
1882   }
1883   pos += noFingerprintAntiDomainHashSetSize;
1884 
1885   if (!initHashSet(&noFingerprintDomainExceptionHashSet,
1886         buffer + pos, noFingerprintDomainExceptionHashSetSize)) {
1887       return false;
1888   }
1889   pos += noFingerprintDomainExceptionHashSetSize;
1890 
1891   if (!initHashSet(&noFingerprintAntiDomainExceptionHashSet,
1892         buffer + pos, noFingerprintAntiDomainExceptionHashSetSize)) {
1893       return false;
1894   }
1895   pos += noFingerprintAntiDomainExceptionHashSetSize;
1896 
1897   return true;
1898 }
1899 
1900 void AdBlockClient::enableBadFingerprintDetection() {
1901   if (badFingerprintsHashSet) {
1902     return;
1903   }
1904 
1905   badFingerprintsHashSet = new BadFingerprintsHashSet();
1906   for (unsigned int i = 0; i < sizeof(badFingerprints)
1907       / sizeof(badFingerprints[0]); i++) {
1908     badFingerprintsHashSet->Add(BadFingerprint(badFingerprints[i]));
1909   }
1910 }
1911 
1912   uint64_t HashFn2Byte::operator()(const char *input, int len,
1913       unsigned char lastCharCode, uint64_t lastHash) {
1914         UNUSED(len);
1915         UNUSED(lastCharCode);
1916         UNUSED(lastHash);
1917     return (((uint64_t)input[1]) << 8) | input[0];  }
1918 
1919   uint64_t HashFn2Byte::operator()(const char *input, int len) {
1920     UNUSED(len);
1921     return (((uint64_t)input[1]) << 8) | input[0];
1922   }