File indexing completed on 2024-11-17 04:55:17

0001 /*
0002     SPDX-License-Identifier: MPL-2.0
0003 */
0004 
0005 /* Copyright (c) 2015 Brian R. Bondy. Distributed under the MPL2 license.
0006  * This Source Code Form is subject to the terms of the Mozilla Public
0007  * License, v. 2.0. If a copy of the MPL was not distributed with this
0008  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
0009 
0010 #include "./filter.h"
0011 #include <string.h>
0012 #include <stdio.h>
0013 #include <math.h>
0014 #include <iostream>
0015 #include <set>
0016 #include <string>
0017 #ifdef ENABLE_REGEX
0018 #include <regex> // NOLINT
0019 #endif
0020 #include "./hash_set.h"
0021 #include "./ad_block_client.h"
0022 #include "hashFn.h"
0023 #include "BloomFilter.h"
0024 
0025 #define UNUSED(x) ( (void)(x) )
0026 
0027 static HashFn h(19);
0028 
0029 const char * getUrlHost(const char *input, int *len);
0030 
0031 Filter::Filter() :
0032   borrowed_data(false),
0033   filterType(FTNoFilterType),
0034   filterOption(FONoFilterOption),
0035   antiFilterOption(FONoFilterOption),
0036   ruleDefinition(nullptr),
0037   data(nullptr),
0038   dataLen(-1),
0039   domainList(nullptr),
0040   tag(nullptr),
0041   tagLen(0),
0042   host(nullptr),
0043   hostLen(-1),
0044   domains(nullptr),
0045   antiDomains(nullptr),
0046   domainsParsed(false) {
0047 }
0048 
0049 Filter::~Filter() {
0050   if (domains) {
0051     delete domains;
0052   }
0053   if (antiDomains) {
0054     delete antiDomains;
0055   }
0056   if (!borrowed_data) {
0057     if (data) {
0058       delete[] data;
0059     }
0060     if (ruleDefinition) {
0061       delete[] ruleDefinition;
0062     }
0063     if (domainList) {
0064       delete[] domainList;
0065     }
0066     if (tag) {
0067       delete[] tag;
0068     }
0069     if (host) {
0070       delete[] host;
0071     }
0072   }
0073 }
0074 
0075 Filter::Filter(const char * data, int dataLen,
0076                char *domainList,
0077                const char * host, int hostLen,
0078                char *tag, int tagLen) :
0079       borrowed_data(true), filterType(FTNoFilterType),
0080       filterOption(FONoFilterOption),
0081       antiFilterOption(FONoFilterOption), ruleDefinition(nullptr),
0082       data(const_cast<char*>(data)), dataLen(dataLen),
0083       domainList(domainList),
0084       tag(tag), tagLen(tagLen),
0085       host(const_cast<char*>(host)),
0086       hostLen(hostLen), domains(nullptr),
0087       antiDomains(nullptr), domainsParsed(false) {
0088   }
0089 
0090 Filter::Filter(FilterType filterType, FilterOption filterOption,
0091                FilterOption antiFilterOption,
0092                const char * data, int dataLen,
0093                char *domainList,
0094                const char * host, int hostLen,
0095                char *tag, int tagLen) :
0096       borrowed_data(true), filterType(filterType),
0097       filterOption(filterOption),
0098       antiFilterOption(antiFilterOption), ruleDefinition(nullptr),
0099       data(const_cast<char*>(data)), dataLen(dataLen),
0100       domainList(domainList),
0101       tag(tag), tagLen(tagLen),
0102       host(const_cast<char *>(host)), hostLen(hostLen),
0103       domains(nullptr), antiDomains(nullptr), domainsParsed(false) {
0104   }
0105 
0106 Filter::Filter(const Filter &other) {
0107   borrowed_data = other.borrowed_data;
0108   filterType = other.filterType;
0109   filterOption = other.filterOption;
0110   antiFilterOption = other.antiFilterOption;
0111   dataLen = other.dataLen;
0112   hostLen = other.hostLen;
0113   domainsParsed = false;
0114   domains = nullptr;
0115   antiDomains = nullptr;
0116   if (other.dataLen == -1 && other.data) {
0117     dataLen = static_cast<int>(strlen(other.data));
0118   }
0119 
0120   if (other.borrowed_data) {
0121     data = other.data;
0122     domainList = other.domainList;
0123     tag = other.tag;
0124     tagLen = other.tagLen;
0125     host = other.host;
0126     ruleDefinition = other.ruleDefinition;
0127   } else {
0128     if (other.data) {
0129       data = new char[dataLen];
0130       memcpy(data, other.data, dataLen);
0131     } else {
0132       data = nullptr;
0133     }
0134     if (other.domainList) {
0135        size_t len = strlen(other.domainList) + 1;
0136        domainList = new char[len];
0137        snprintf(domainList, len, "%s", other.domainList);
0138     } else {
0139       domainList = nullptr;
0140     }
0141     if (other.tagLen > 0) {
0142        tag = new char[other.tagLen];
0143        memcpy(tag, other.tag, other.tagLen);
0144        tagLen = other.tagLen;
0145     } else {
0146       tag = nullptr;
0147       tagLen = 0;
0148     }
0149     if (other.host) {
0150       size_t len = strlen(other.host) + 1;
0151       host = new char[len];
0152       snprintf(host, len, "%s", other.host);
0153     } else {
0154       host = nullptr;
0155     }
0156 
0157     if (other.ruleDefinition) {
0158       size_t len = strlen(other.ruleDefinition) + 1;
0159       ruleDefinition = new char[len];
0160       snprintf(ruleDefinition, len, "%s", other.ruleDefinition);
0161     } else {
0162       ruleDefinition = nullptr;
0163     }
0164   }
0165 }
0166 
0167 void Filter::swapData(Filter *other) {
0168   FilterType tempFilterType = filterType;
0169   FilterOption tempFilterOption = filterOption;
0170   FilterOption tempAntiFilterOption = antiFilterOption;
0171   char *tempData = data;
0172   int tempDataLen = dataLen;
0173   char *tempRuleDefinition = ruleDefinition;
0174   char *tempDomainList = domainList;
0175   char *temptag = tag;
0176   int temptagLen = tagLen;
0177   char *tempHost = host;
0178   int tempHostLen = hostLen;
0179   bool tempDomainsParsed = domainsParsed;
0180   HashSet<ContextDomain>* tempDomains = domains;
0181   HashSet<ContextDomain>* tempAntiDomains = antiDomains;
0182 
0183   filterType = other->filterType;
0184   filterOption = other->filterOption;
0185   antiFilterOption = other->antiFilterOption;
0186   ruleDefinition = other->ruleDefinition;;
0187   data = other->data;
0188   dataLen = other->dataLen;
0189   domainList = other->domainList;
0190   tag = other->tag;
0191   tagLen = other->tagLen;
0192   host = other->host;
0193   hostLen = other->hostLen;
0194   domainsParsed = other->domainsParsed;
0195   domains = other->domains;
0196   antiDomains = other->antiDomains;
0197 
0198   other->filterType = tempFilterType;
0199   other->filterOption = tempFilterOption;
0200   other->antiFilterOption = tempAntiFilterOption;
0201   other->ruleDefinition = tempRuleDefinition;
0202   other->data = tempData;
0203   other->dataLen = tempDataLen;
0204   other->domainList = tempDomainList;
0205   other->tag = temptag;
0206   other->tagLen = temptagLen;
0207   other->host = tempHost;
0208   other->hostLen = tempHostLen;
0209   other->domainsParsed = tempDomainsParsed;
0210   other->domains = tempDomains;
0211   other->antiDomains = tempAntiDomains;
0212 }
0213 
0214 bool Filter::containsDomain(const char* domain, size_t domainLen,
0215     bool anti) const {
0216   if (!anti) {
0217     if (!domains) {
0218       return false;
0219     }
0220     return domains->Exists(ContextDomain(domain, domainLen));
0221   }
0222 
0223   if (!antiDomains) {
0224     return false;
0225   }
0226   return antiDomains->Exists(ContextDomain(domain, domainLen));
0227 }
0228 
0229 uint32_t Filter::getDomainCount(bool anti) {
0230   parseDomains(domainList);
0231   if (anti) {
0232     if (!antiDomains) {
0233       return 0;
0234     }
0235     return antiDomains->GetSize();
0236   }
0237   if (!domains) {
0238     return 0;
0239   }
0240   return domains->GetSize();
0241 }
0242 
0243 bool Filter::isDomainOnlyFilter() {
0244   parseDomains(domainList);
0245   return getDomainCount(false) && !getDomainCount(true);
0246 }
0247 
0248 bool Filter::isAntiDomainOnlyFilter() {
0249   parseDomains(domainList);
0250   return getDomainCount(true) && !getDomainCount(false);
0251 }
0252 
0253 void Filter::parseOption(const char *input, int len) {
0254   FilterOption *pFilterOption = &filterOption;
0255   const char *pStart = input;
0256   if (input[0] == '~') {
0257     pFilterOption = &antiFilterOption;
0258     pStart++;
0259     len--;
0260   }
0261 
0262   if (len >= 7 && !strncmp(pStart, "domain=", 7)) {
0263     len -= 7;
0264     domainList = new char[len + 1];
0265     domainList[len] = '\0';
0266     memcpy(domainList, pStart + 7, len);
0267   } else if (len >= 4 && !strncmp(pStart, "tag=", 4)) {
0268     len -= 4;
0269     tag = new char[len];
0270     memcpy(tag, pStart + 4, len);
0271     tagLen = len;
0272   } else if (!strncmp(pStart, "script", len)) {
0273     *pFilterOption = static_cast<FilterOption>(*pFilterOption | FOScript);
0274   } else if (!strncmp(pStart, "image", len)) {
0275     *pFilterOption = static_cast<FilterOption>(*pFilterOption | FOImage);
0276   } else if (!strncmp(pStart, "stylesheet", len)) {
0277     *pFilterOption = static_cast<FilterOption>(*pFilterOption | FOStylesheet);
0278   } else if (!strncmp(pStart, "object", len)) {
0279     *pFilterOption = static_cast<FilterOption>(*pFilterOption | FOObject);
0280   } else if (!strncmp(pStart, "xmlhttprequest", len)) {
0281     *pFilterOption =
0282       static_cast<FilterOption>(*pFilterOption | FOXmlHttpRequest);
0283   } else if (!strncmp(pStart, "object-subrequest", len)) {
0284     *pFilterOption =
0285       static_cast<FilterOption>(*pFilterOption | FOObjectSubrequest);
0286   } else if (!strncmp(pStart, "subdocument", len)) {
0287     *pFilterOption = static_cast<FilterOption>(*pFilterOption | FOSubdocument);
0288   } else if (!strncmp(pStart, "document", len)) {
0289     *pFilterOption = static_cast<FilterOption>(*pFilterOption | FODocument);
0290   } else if (!strncmp(pStart, "xbl", len)) {
0291     *pFilterOption = static_cast<FilterOption>(*pFilterOption | FOXBL);
0292   } else if (!strncmp(pStart, "collapse", len)) {
0293     *pFilterOption = static_cast<FilterOption>(*pFilterOption | FOCollapse);
0294   } else if (!strncmp(pStart, "donottrack", len)) {
0295     *pFilterOption = static_cast<FilterOption>(*pFilterOption | FODoNotTrack);
0296   } else if (!strncmp(pStart, "other", len)) {
0297     *pFilterOption = static_cast<FilterOption>(*pFilterOption | FOOther);
0298   } else if (!strncmp(pStart, "elemhide", len)) {
0299     *pFilterOption = static_cast<FilterOption>(*pFilterOption | FOElemHide);
0300   } else if (!strncmp(pStart, "third-party", len)) {
0301     *pFilterOption = static_cast<FilterOption>(*pFilterOption | FOThirdParty);
0302   } else if (!strncmp(pStart, "first-party", len)) {
0303     // Same as ~third-party
0304     *pFilterOption = static_cast<FilterOption>(
0305         *pFilterOption | FONotThirdParty);
0306   } else if (!strncmp(pStart, "ping", len)) {
0307     *pFilterOption = static_cast<FilterOption>(*pFilterOption | FOPing);
0308   } else if (!strncmp(pStart, "popup", len)) {
0309     *pFilterOption = static_cast<FilterOption>(*pFilterOption | FOPopup);
0310   } else if (len >= 4 && !strncmp(pStart, "csp=", 4)) {
0311     *pFilterOption = static_cast<FilterOption>(*pFilterOption | FOCSP);
0312   } else if (len >= 9 && !strncmp(pStart, "redirect=", 9)) {
0313     *pFilterOption = static_cast<FilterOption>(*pFilterOption | FORedirect);
0314   } else if (!strncmp(pStart, "font", len)) {
0315     *pFilterOption = static_cast<FilterOption>(*pFilterOption | FOFont);
0316   } else if (!strncmp(pStart, "media", len)) {
0317     *pFilterOption = static_cast<FilterOption>(*pFilterOption | FOMedia);
0318   } else if (!strncmp(pStart, "webrtc", len)) {
0319     *pFilterOption = static_cast<FilterOption>(*pFilterOption | FOWebRTC);
0320   } else if (!strncmp(pStart, "generichide", len)) {
0321     *pFilterOption = static_cast<FilterOption>(*pFilterOption | FOGenericHide);
0322   } else if (!strncmp(pStart, "genericblock", len)) {
0323     *pFilterOption = static_cast<FilterOption>(*pFilterOption | FOGenericBlock);
0324   } else if (!strncmp(pStart, "empty", len)) {
0325     *pFilterOption = static_cast<FilterOption>(*pFilterOption | FOEmpty);
0326   } else if (!strncmp(pStart, "websocket", len)) {
0327     *pFilterOption = static_cast<FilterOption>(*pFilterOption | FOWebsocket);
0328   } else if (!strncmp(pStart, "important", len)) {
0329     *pFilterOption = static_cast<FilterOption>(*pFilterOption | FOImportant);
0330   } else if (!strncmp(pStart, "explicitcancel", len)) {
0331     *pFilterOption = static_cast<FilterOption>(*pFilterOption | FOExplicitCancel);
0332   } else {
0333     *pFilterOption = static_cast<FilterOption>(*pFilterOption | FOUnknown);
0334     std::string option(pStart, len);
0335     if (unknownOptions.find(option) == unknownOptions.end()) {
0336       std::cout << "Unrecognized filter option: " << option << std::endl;
0337       unknownOptions.insert(option);
0338     }
0339   }
0340   // Otherwise just ignore the option, maybe something new we don't support yet
0341 }
0342 
0343 void Filter::parseOptions(const char *input) {
0344   filterOption = FONoFilterOption;
0345   antiFilterOption = FONoFilterOption;
0346   int startOffset = 0;
0347   int len = 0;
0348   const char *p = input;
0349   while (*p != '\0' && !isEndOfLine(*p)) {
0350     if (*p == ',') {
0351       parseOption(input + startOffset, len);
0352       startOffset += len + 1;
0353       len = -1;
0354     }
0355     p++;
0356     len++;
0357   }
0358   parseOption(input + startOffset, len);
0359 }
0360 
0361 bool endsWith(const char *input, const char *sub, int inputLen, int subLen) {
0362   if (subLen > inputLen) {
0363     return false;
0364   }
0365 
0366   int startCheckPos = inputLen - subLen;
0367   const char *p = input + startCheckPos;
0368   const char *q = sub;
0369   while (q != sub + subLen) {
0370     if (*(p++) != *(q++)) {
0371       return false;
0372     }
0373   }
0374   return true;
0375 }
0376 
0377 bool isThirdPartyHost(const char *baseContextHost, int baseContextHostLen,
0378     const char *testHost, int testHostLen) {
0379   if (!endsWith(testHost, baseContextHost, testHostLen, baseContextHostLen)) {
0380     return true;
0381   }
0382 
0383   // baseContextHost matches testHost exactly
0384   if (testHostLen == baseContextHostLen) {
0385     return false;
0386   }
0387 
0388   char c = testHost[testHostLen - baseContextHostLen - 1];
0389   return c != '.' && testHostLen != baseContextHostLen;
0390 }
0391 
0392 bool Filter::hasUnsupportedOptions() const {
0393   return (filterOption & FOUnsupportedSoSkipCheck) != 0;
0394 }
0395 
0396 bool Filter::contextDomainMatchesFilter(const char *contextDomain) {
0397   // If there are no context domains, then this filter can still apply
0398   // to all domains.
0399   if (getDomainCount(false) == 0 && getDomainCount(true) == 0) {
0400     return true;
0401   }
0402 
0403   const char *p = contextDomain;
0404   // Start keeps track of the start of the last match
0405   // We do this to avoid extraTLD checks for rules.
0406   const char *start = contextDomain;
0407   size_t contextDomainLen = strlen(contextDomain);
0408   while (*p != '\0') {
0409     if (*p == '.') {
0410       const size_t domainLen = contextDomainLen - (start - contextDomain);
0411       if (containsDomain(start, domainLen, false)) {
0412         return true;
0413       }
0414       if (containsDomain(start, domainLen, true)) {
0415         return false;
0416       }
0417       // Set start to just past the period
0418       start = p + 1;
0419     }
0420     p++;
0421   }
0422 
0423   // No exact match, if there are only anti domain filters, then this
0424   // rule applies.
0425   if (getDomainCount(false) == 0 && getDomainCount(true) > 0) {
0426     return true;
0427   }
0428 
0429   // Otherwise there are only domains, and we haven't matched anything
0430   // so it's not a match as long as there is at least one domain which
0431   // is not an antiDomain.
0432   return getDomainCount(false) == 0;
0433 }
0434 
0435 // Determines if there's a match based on the options, this doesn't
0436 // mean that the filter rule should be accepted, just that the filter rule
0437 // should be considered given the current context.
0438 // By specifying context params, you can filter out the number of rules
0439 // which are considered.
0440 bool Filter::matchesOptions(const char *input, FilterOption context,
0441     const char *contextDomain) {
0442       UNUSED(input);
0443   if (hasUnsupportedOptions()) {
0444     return false;
0445   }
0446 
0447   // If the context is for a document, but the filter option isn't an explicit
0448   // document rule, then ignore it.
0449   if (!(filterOption & FODocument) && (context & FODocument)) {
0450     return false;
0451   }
0452   // Maybe the user of the library can't determine a context because they're
0453   // blocking a the HTTP level, don't block here because we don't have enough
0454   // information
0455   if (context != FONoFilterOption) {
0456     if ((filterOption & ~BehavioralFilterOnly) != FONoFilterOption
0457         && !(filterOption & FOResourcesOnly & context)) {
0458       return false;
0459     }
0460 
0461     if ((antiFilterOption & ~BehavioralFilterOnly) != FONoFilterOption
0462         && (antiFilterOption & FOResourcesOnly & context)) {
0463       return false;
0464     }
0465   } else {
0466     // When there's no filter option specified for the context, the resource
0467     // type context is not known.  In this case, never match against
0468     // rules with an explicit resource type specified.
0469     if (filterOption & FOResourcesOnly) {
0470       return false;
0471     }
0472     if (antiFilterOption & FOResourcesOnly) {
0473       return false;
0474     }
0475   }
0476 
0477   // Domain options check
0478   if (domainList && contextDomain) {
0479     if (!contextDomainMatchesFilter(contextDomain)) {
0480       return false;
0481     }
0482   }
0483 
0484   // If we're in the context of third-party site, then consider
0485   // third-party option checks
0486   if (context & (FOThirdParty | FONotThirdParty)) {
0487     if ((filterOption & FOThirdParty) &&
0488         (context & FONotThirdParty)) {
0489       return false;
0490     }
0491     if ((antiFilterOption & FOThirdParty) &&
0492         (context & FOThirdParty)) {
0493       return false;
0494     }
0495   }
0496 
0497   return true;
0498 }
0499 
0500 
0501 const char * getNextPos(const char *input, char separator, const char *end) {
0502   const char *p = input;
0503   while (p != end && *p != '\0' && *p != separator) {
0504     p++;
0505   }
0506   return p;
0507 }
0508 
0509 /**
0510  * Similar to str1.indexOf(filter, startingPos) but with
0511  * extra consideration to some ABP filter rules like ^.
0512  */
0513 int indexOfFilter(const char* input, int inputLen,
0514                   const char* filterBegin, const char *filterEnd) {
0515   const int filterLen = filterEnd - filterBegin;
0516   if (1 == filterLen && '^' == *filterBegin) return -1;
0517   if (filterLen > inputLen) {
0518     return -1;
0519   }
0520 
0521   for (int i = 0; i < inputLen; ++i) {
0522     bool match = true;
0523     for (int j = 0; j < filterLen; ++j) {
0524       const char inputChar = input[i+j];
0525       const char filterChar = filterBegin[j];
0526 
0527       if (filterChar != inputChar) {
0528         // ^abc^ matches both /abc/ and /abc
0529         if ('^' == filterChar &&
0530             (isSeparatorChar(inputChar) || '\0' == inputChar)) {
0531           continue;
0532         }
0533         if ('\0' == inputChar) {
0534           return -1;
0535         }
0536         match = false;
0537         break;
0538       }
0539     }
0540     if (match) {
0541       return i;
0542     }
0543   }
0544   return -1;
0545 }
0546 
0547 bool Filter::matches(const char *input, FilterOption contextOption,
0548     const char *contextDomain, BloomFilter *inputBloomFilter,
0549     const char *inputHost, int inputHostLen) {
0550   return matches(input, static_cast<int>(strlen(input)), contextOption,
0551       contextDomain, inputBloomFilter, inputHost, inputHostLen);
0552 }
0553 
0554 bool Filter::matches(const char *input, int inputLen,
0555     FilterOption contextOption, const char *contextDomain,
0556     BloomFilter *inputBloomFilter, const char *inputHost, int inputHostLen) {
0557   if (!matchesOptions(input, contextOption, contextDomain)) {
0558     return false;
0559   }
0560 
0561   if (!data) {
0562     return false;
0563   }
0564 
0565   // We lazily figure out the dataLen only once
0566   if (dataLen == -1) {
0567     dataLen = static_cast<int>(strlen(data));
0568   }
0569 
0570   // Check for a regex match
0571   if (filterType & FTRegex) {
0572 #ifdef ENABLE_REGEX
0573     std::smatch m;
0574     std::regex e(data, std::regex_constants::extended);
0575     return std::regex_search(std::string(input), m, e);
0576 #else
0577     return false;
0578 #endif
0579   }
0580 
0581   // Check for both left and right anchored
0582   if ((filterType & FTLeftAnchored) && (filterType & FTRightAnchored)) {
0583     return !strcmp(data, input);
0584   }
0585 
0586   // Check for right anchored
0587   if (filterType & FTRightAnchored) {
0588     if (dataLen > inputLen) {
0589       return false;
0590     }
0591 
0592     return !strcmp(input + (inputLen - dataLen), data);
0593   }
0594 
0595   // Check for left anchored
0596   if (filterType & FTLeftAnchored) {
0597     return !strncmp(data, input, dataLen);
0598   }
0599 
0600   // Check for domain name anchored
0601   if (filterType & FTHostAnchored) {
0602     int currentHostLen = inputHostLen;
0603     const char *currentHost = inputHost;
0604     if (!currentHostLen) {
0605       currentHost = getUrlHost(input, &currentHostLen);
0606     }
0607     int hostLen = 0;
0608     if (host) {
0609       hostLen = this->hostLen == -1 ?
0610         static_cast<int>(strlen(host)) : this->hostLen;
0611     }
0612 
0613     if (inputBloomFilter) {
0614       for (int i = 1; i < hostLen; i++) {
0615         if (!inputBloomFilter->exists(host + i - 1, 2)) {
0616           return false;
0617         }
0618       }
0619     }
0620 
0621     if (isThirdPartyHost(host, hostLen, currentHost, currentHostLen)) {
0622       return false;
0623     }
0624   }
0625 
0626   // Wildcard match comparison
0627   const char *filterPartStart = data;
0628   const char *filterPartEnd = getNextPos(data, '*', data + dataLen);
0629   int index = 0;
0630   while (filterPartStart != filterPartEnd || *filterPartStart == '*') {
0631     int filterPartLen = static_cast<int>(filterPartEnd - filterPartStart);
0632 
0633     if (inputBloomFilter) {
0634       for (int i = 1; i < filterPartLen && filterPartEnd -
0635           filterPartStart - i >= 2; i++) {
0636         if (!isSeparatorChar(*(filterPartStart + i - 1)) &&
0637             !isSeparatorChar(*(filterPartStart + i)) &&
0638             !inputBloomFilter->exists(filterPartStart + i - 1, 2)) {
0639           return false;
0640         }
0641       }
0642     }
0643 
0644     int newIndex = indexOfFilter(input + index, inputLen - index,
0645         filterPartStart, filterPartEnd);
0646     if (newIndex == -1) {
0647       return false;
0648     }
0649     newIndex += index;
0650 
0651     if (filterPartEnd == data + dataLen || *filterPartEnd == '\0') {
0652       break;
0653     }
0654     const char *temp = getNextPos(filterPartEnd + 1, '*', data + dataLen);
0655     filterPartStart = filterPartEnd + 1;
0656     filterPartEnd = temp;
0657     index = newIndex + filterPartLen;
0658     if (*(input + newIndex) == '\0') {
0659       break;
0660     }
0661   }
0662 
0663   return true;
0664 }
0665 
0666 void Filter::parseDomains(const char* domainList) {
0667   if (!domainList || domainsParsed) {
0668     return;
0669   }
0670   int startOffset = 0;
0671   int len = 0;
0672   const char* p = domainList;
0673   while (true) {
0674     if (*p == '|' || *p == '\0') {
0675       const char *domain = domainList + startOffset;
0676       if (*domain == '~') {
0677         if (!antiDomains) {
0678           antiDomains = new HashSet<ContextDomain>(5, true);
0679         }
0680         antiDomains->Add(ContextDomain(domain + 1, len - 1));
0681       } else {
0682         if (!domains) {
0683           domains = new HashSet<ContextDomain>(5, true);
0684         }
0685         domains->Add(ContextDomain(domain, len));
0686       }
0687       startOffset += len + 1;
0688       len = -1;
0689     }
0690     if (*p == '\0') {
0691       break;
0692     }
0693     p++;
0694     len++;
0695   }
0696   domainsParsed = true;
0697 }
0698 
0699 uint64_t Filter::hash() const {
0700   if (!host && !data) {
0701     return 0;
0702   } else if (host) {
0703     return h(host, hostLen == -1 ? static_cast<int>(strlen(host)) : hostLen);
0704   }
0705 
0706   return h(data, dataLen);
0707 }
0708 
0709 uint32_t Filter::Serialize(char *buffer) {
0710   uint32_t totalSize = 0;
0711   char sz[64];
0712   uint32_t dataLenSize = 1 + snprintf(sz, sizeof(sz),
0713       "%x,%x,%x,%x", dataLen, filterType,
0714       filterOption, antiFilterOption);
0715   if (buffer) {
0716     memcpy(buffer + totalSize, sz, dataLenSize);
0717   }
0718   totalSize += dataLenSize;
0719   if (buffer) {
0720     memcpy(buffer + totalSize, data, dataLen);
0721   }
0722   totalSize += dataLen;
0723 
0724   if (host) {
0725     int hostLen = this->hostLen == -1 ?
0726       static_cast<int>(strlen(host)) : this->hostLen;
0727     if (buffer) {
0728       memcpy(buffer + totalSize, host, hostLen + 1);
0729     }
0730     totalSize += hostLen;
0731   }
0732   totalSize += 1;
0733 
0734   // Serialize any kind fo list based data here, as long as you can use a
0735   // separator between lists which is not \0.  Currently using #
0736   if (tagLen > 0) {
0737     if (buffer) {
0738       buffer[totalSize] = '~';
0739       buffer[totalSize+1] = '#';
0740       memcpy(buffer + totalSize + 2, tag, tagLen);
0741       buffer[totalSize + 2 + tagLen] = ',';
0742     }
0743     totalSize += tagLen + 3;
0744   }
0745   if (domainList) {
0746     int domainListLen = static_cast<int>(strlen(domainList));
0747     if (buffer) {
0748       memcpy(buffer + totalSize, domainList, domainListLen + 1);
0749     }
0750     totalSize += domainListLen;
0751   }
0752   totalSize += 1;
0753 
0754   return totalSize;
0755 }
0756 
0757 bool hasNewlineBefore(char *buffer, uint32_t bufferSize) {
0758   char *p = buffer;
0759   for (uint32_t i = 0; i < bufferSize; ++i) {
0760     if (*p == '\0')
0761       return true;
0762     p++;
0763   }
0764   return false;
0765 }
0766 
0767 uint32_t Filter::Deserialize(char *buffer, uint32_t bufferSize) {
0768   dataLen = 0;
0769   if (!hasNewlineBefore(buffer, bufferSize)) {
0770     return 0;
0771   }
0772   sscanf(buffer, "%x,%x,%x,%x", (unsigned int*)&dataLen, (unsigned int*)&filterType,
0773       (unsigned int*)&filterOption, (unsigned int*)&antiFilterOption);
0774   uint32_t consumed = static_cast<uint32_t>(strlen(buffer)) + 1;
0775   if (consumed + dataLen >= bufferSize) {
0776     return 0;
0777   }
0778 
0779   data = buffer + consumed;
0780   consumed += dataLen;
0781 
0782   uint32_t hostLen = static_cast<uint32_t>(strlen(buffer + consumed));
0783   if (hostLen != 0) {
0784     host = buffer + consumed;
0785   } else {
0786     host = nullptr;
0787   }
0788   consumed += hostLen + 1;
0789 
0790   // If the domain section starts with a # then we're in a tag
0791   // block.
0792   if (buffer[consumed] == '~' && buffer[consumed + 1] == '#') {
0793     consumed += 2;
0794     tag = buffer + consumed;
0795     tagLen = 0;
0796     while (buffer[consumed + tagLen] != '\0') {
0797       if (buffer[consumed + tagLen] == ',') {
0798         consumed += tagLen + 1;
0799         break;
0800       }
0801       tagLen++;
0802     }
0803   }
0804 
0805   uint32_t listSectionLen = static_cast<uint32_t>(strlen(buffer + consumed));
0806   if (listSectionLen != 0) {
0807     domainList = buffer + consumed;
0808   } else {
0809     domainList = nullptr;
0810   }
0811   consumed += listSectionLen + 1;
0812 
0813   borrowed_data = true;
0814   domainsParsed = false;
0815 
0816   if (domains) {
0817     delete domains;
0818     domains = nullptr;
0819   }
0820   if (antiDomains) {
0821     delete antiDomains;
0822     antiDomains = nullptr;
0823   }
0824 
0825   return consumed;
0826 }