File indexing completed on 2024-11-17 04:55:17
0001 /* 0002 SPDX-License-Identifier: MPL-2.0 0003 */ 0004 0005 /* Copyright (c) 2015 Brian R. Bondy. Distributed under the MPL2 license. 0006 * This Source Code Form is subject to the terms of the Mozilla Public 0007 * License, v. 2.0. If a copy of the MPL was not distributed with this 0008 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 0009 0010 #include "./filter.h" 0011 #include <string.h> 0012 #include <stdio.h> 0013 #include <math.h> 0014 #include <iostream> 0015 #include <set> 0016 #include <string> 0017 #ifdef ENABLE_REGEX 0018 #include <regex> // NOLINT 0019 #endif 0020 #include "./hash_set.h" 0021 #include "./ad_block_client.h" 0022 #include "hashFn.h" 0023 #include "BloomFilter.h" 0024 0025 #define UNUSED(x) ( (void)(x) ) 0026 0027 static HashFn h(19); 0028 0029 const char * getUrlHost(const char *input, int *len); 0030 0031 Filter::Filter() : 0032 borrowed_data(false), 0033 filterType(FTNoFilterType), 0034 filterOption(FONoFilterOption), 0035 antiFilterOption(FONoFilterOption), 0036 ruleDefinition(nullptr), 0037 data(nullptr), 0038 dataLen(-1), 0039 domainList(nullptr), 0040 tag(nullptr), 0041 tagLen(0), 0042 host(nullptr), 0043 hostLen(-1), 0044 domains(nullptr), 0045 antiDomains(nullptr), 0046 domainsParsed(false) { 0047 } 0048 0049 Filter::~Filter() { 0050 if (domains) { 0051 delete domains; 0052 } 0053 if (antiDomains) { 0054 delete antiDomains; 0055 } 0056 if (!borrowed_data) { 0057 if (data) { 0058 delete[] data; 0059 } 0060 if (ruleDefinition) { 0061 delete[] ruleDefinition; 0062 } 0063 if (domainList) { 0064 delete[] domainList; 0065 } 0066 if (tag) { 0067 delete[] tag; 0068 } 0069 if (host) { 0070 delete[] host; 0071 } 0072 } 0073 } 0074 0075 Filter::Filter(const char * data, int dataLen, 0076 char *domainList, 0077 const char * host, int hostLen, 0078 char *tag, int tagLen) : 0079 borrowed_data(true), filterType(FTNoFilterType), 0080 filterOption(FONoFilterOption), 0081 antiFilterOption(FONoFilterOption), ruleDefinition(nullptr), 0082 data(const_cast<char*>(data)), dataLen(dataLen), 0083 domainList(domainList), 0084 tag(tag), tagLen(tagLen), 0085 host(const_cast<char*>(host)), 0086 hostLen(hostLen), domains(nullptr), 0087 antiDomains(nullptr), domainsParsed(false) { 0088 } 0089 0090 Filter::Filter(FilterType filterType, FilterOption filterOption, 0091 FilterOption antiFilterOption, 0092 const char * data, int dataLen, 0093 char *domainList, 0094 const char * host, int hostLen, 0095 char *tag, int tagLen) : 0096 borrowed_data(true), filterType(filterType), 0097 filterOption(filterOption), 0098 antiFilterOption(antiFilterOption), ruleDefinition(nullptr), 0099 data(const_cast<char*>(data)), dataLen(dataLen), 0100 domainList(domainList), 0101 tag(tag), tagLen(tagLen), 0102 host(const_cast<char *>(host)), hostLen(hostLen), 0103 domains(nullptr), antiDomains(nullptr), domainsParsed(false) { 0104 } 0105 0106 Filter::Filter(const Filter &other) { 0107 borrowed_data = other.borrowed_data; 0108 filterType = other.filterType; 0109 filterOption = other.filterOption; 0110 antiFilterOption = other.antiFilterOption; 0111 dataLen = other.dataLen; 0112 hostLen = other.hostLen; 0113 domainsParsed = false; 0114 domains = nullptr; 0115 antiDomains = nullptr; 0116 if (other.dataLen == -1 && other.data) { 0117 dataLen = static_cast<int>(strlen(other.data)); 0118 } 0119 0120 if (other.borrowed_data) { 0121 data = other.data; 0122 domainList = other.domainList; 0123 tag = other.tag; 0124 tagLen = other.tagLen; 0125 host = other.host; 0126 ruleDefinition = other.ruleDefinition; 0127 } else { 0128 if (other.data) { 0129 data = new char[dataLen]; 0130 memcpy(data, other.data, dataLen); 0131 } else { 0132 data = nullptr; 0133 } 0134 if (other.domainList) { 0135 size_t len = strlen(other.domainList) + 1; 0136 domainList = new char[len]; 0137 snprintf(domainList, len, "%s", other.domainList); 0138 } else { 0139 domainList = nullptr; 0140 } 0141 if (other.tagLen > 0) { 0142 tag = new char[other.tagLen]; 0143 memcpy(tag, other.tag, other.tagLen); 0144 tagLen = other.tagLen; 0145 } else { 0146 tag = nullptr; 0147 tagLen = 0; 0148 } 0149 if (other.host) { 0150 size_t len = strlen(other.host) + 1; 0151 host = new char[len]; 0152 snprintf(host, len, "%s", other.host); 0153 } else { 0154 host = nullptr; 0155 } 0156 0157 if (other.ruleDefinition) { 0158 size_t len = strlen(other.ruleDefinition) + 1; 0159 ruleDefinition = new char[len]; 0160 snprintf(ruleDefinition, len, "%s", other.ruleDefinition); 0161 } else { 0162 ruleDefinition = nullptr; 0163 } 0164 } 0165 } 0166 0167 void Filter::swapData(Filter *other) { 0168 FilterType tempFilterType = filterType; 0169 FilterOption tempFilterOption = filterOption; 0170 FilterOption tempAntiFilterOption = antiFilterOption; 0171 char *tempData = data; 0172 int tempDataLen = dataLen; 0173 char *tempRuleDefinition = ruleDefinition; 0174 char *tempDomainList = domainList; 0175 char *temptag = tag; 0176 int temptagLen = tagLen; 0177 char *tempHost = host; 0178 int tempHostLen = hostLen; 0179 bool tempDomainsParsed = domainsParsed; 0180 HashSet<ContextDomain>* tempDomains = domains; 0181 HashSet<ContextDomain>* tempAntiDomains = antiDomains; 0182 0183 filterType = other->filterType; 0184 filterOption = other->filterOption; 0185 antiFilterOption = other->antiFilterOption; 0186 ruleDefinition = other->ruleDefinition;; 0187 data = other->data; 0188 dataLen = other->dataLen; 0189 domainList = other->domainList; 0190 tag = other->tag; 0191 tagLen = other->tagLen; 0192 host = other->host; 0193 hostLen = other->hostLen; 0194 domainsParsed = other->domainsParsed; 0195 domains = other->domains; 0196 antiDomains = other->antiDomains; 0197 0198 other->filterType = tempFilterType; 0199 other->filterOption = tempFilterOption; 0200 other->antiFilterOption = tempAntiFilterOption; 0201 other->ruleDefinition = tempRuleDefinition; 0202 other->data = tempData; 0203 other->dataLen = tempDataLen; 0204 other->domainList = tempDomainList; 0205 other->tag = temptag; 0206 other->tagLen = temptagLen; 0207 other->host = tempHost; 0208 other->hostLen = tempHostLen; 0209 other->domainsParsed = tempDomainsParsed; 0210 other->domains = tempDomains; 0211 other->antiDomains = tempAntiDomains; 0212 } 0213 0214 bool Filter::containsDomain(const char* domain, size_t domainLen, 0215 bool anti) const { 0216 if (!anti) { 0217 if (!domains) { 0218 return false; 0219 } 0220 return domains->Exists(ContextDomain(domain, domainLen)); 0221 } 0222 0223 if (!antiDomains) { 0224 return false; 0225 } 0226 return antiDomains->Exists(ContextDomain(domain, domainLen)); 0227 } 0228 0229 uint32_t Filter::getDomainCount(bool anti) { 0230 parseDomains(domainList); 0231 if (anti) { 0232 if (!antiDomains) { 0233 return 0; 0234 } 0235 return antiDomains->GetSize(); 0236 } 0237 if (!domains) { 0238 return 0; 0239 } 0240 return domains->GetSize(); 0241 } 0242 0243 bool Filter::isDomainOnlyFilter() { 0244 parseDomains(domainList); 0245 return getDomainCount(false) && !getDomainCount(true); 0246 } 0247 0248 bool Filter::isAntiDomainOnlyFilter() { 0249 parseDomains(domainList); 0250 return getDomainCount(true) && !getDomainCount(false); 0251 } 0252 0253 void Filter::parseOption(const char *input, int len) { 0254 FilterOption *pFilterOption = &filterOption; 0255 const char *pStart = input; 0256 if (input[0] == '~') { 0257 pFilterOption = &antiFilterOption; 0258 pStart++; 0259 len--; 0260 } 0261 0262 if (len >= 7 && !strncmp(pStart, "domain=", 7)) { 0263 len -= 7; 0264 domainList = new char[len + 1]; 0265 domainList[len] = '\0'; 0266 memcpy(domainList, pStart + 7, len); 0267 } else if (len >= 4 && !strncmp(pStart, "tag=", 4)) { 0268 len -= 4; 0269 tag = new char[len]; 0270 memcpy(tag, pStart + 4, len); 0271 tagLen = len; 0272 } else if (!strncmp(pStart, "script", len)) { 0273 *pFilterOption = static_cast<FilterOption>(*pFilterOption | FOScript); 0274 } else if (!strncmp(pStart, "image", len)) { 0275 *pFilterOption = static_cast<FilterOption>(*pFilterOption | FOImage); 0276 } else if (!strncmp(pStart, "stylesheet", len)) { 0277 *pFilterOption = static_cast<FilterOption>(*pFilterOption | FOStylesheet); 0278 } else if (!strncmp(pStart, "object", len)) { 0279 *pFilterOption = static_cast<FilterOption>(*pFilterOption | FOObject); 0280 } else if (!strncmp(pStart, "xmlhttprequest", len)) { 0281 *pFilterOption = 0282 static_cast<FilterOption>(*pFilterOption | FOXmlHttpRequest); 0283 } else if (!strncmp(pStart, "object-subrequest", len)) { 0284 *pFilterOption = 0285 static_cast<FilterOption>(*pFilterOption | FOObjectSubrequest); 0286 } else if (!strncmp(pStart, "subdocument", len)) { 0287 *pFilterOption = static_cast<FilterOption>(*pFilterOption | FOSubdocument); 0288 } else if (!strncmp(pStart, "document", len)) { 0289 *pFilterOption = static_cast<FilterOption>(*pFilterOption | FODocument); 0290 } else if (!strncmp(pStart, "xbl", len)) { 0291 *pFilterOption = static_cast<FilterOption>(*pFilterOption | FOXBL); 0292 } else if (!strncmp(pStart, "collapse", len)) { 0293 *pFilterOption = static_cast<FilterOption>(*pFilterOption | FOCollapse); 0294 } else if (!strncmp(pStart, "donottrack", len)) { 0295 *pFilterOption = static_cast<FilterOption>(*pFilterOption | FODoNotTrack); 0296 } else if (!strncmp(pStart, "other", len)) { 0297 *pFilterOption = static_cast<FilterOption>(*pFilterOption | FOOther); 0298 } else if (!strncmp(pStart, "elemhide", len)) { 0299 *pFilterOption = static_cast<FilterOption>(*pFilterOption | FOElemHide); 0300 } else if (!strncmp(pStart, "third-party", len)) { 0301 *pFilterOption = static_cast<FilterOption>(*pFilterOption | FOThirdParty); 0302 } else if (!strncmp(pStart, "first-party", len)) { 0303 // Same as ~third-party 0304 *pFilterOption = static_cast<FilterOption>( 0305 *pFilterOption | FONotThirdParty); 0306 } else if (!strncmp(pStart, "ping", len)) { 0307 *pFilterOption = static_cast<FilterOption>(*pFilterOption | FOPing); 0308 } else if (!strncmp(pStart, "popup", len)) { 0309 *pFilterOption = static_cast<FilterOption>(*pFilterOption | FOPopup); 0310 } else if (len >= 4 && !strncmp(pStart, "csp=", 4)) { 0311 *pFilterOption = static_cast<FilterOption>(*pFilterOption | FOCSP); 0312 } else if (len >= 9 && !strncmp(pStart, "redirect=", 9)) { 0313 *pFilterOption = static_cast<FilterOption>(*pFilterOption | FORedirect); 0314 } else if (!strncmp(pStart, "font", len)) { 0315 *pFilterOption = static_cast<FilterOption>(*pFilterOption | FOFont); 0316 } else if (!strncmp(pStart, "media", len)) { 0317 *pFilterOption = static_cast<FilterOption>(*pFilterOption | FOMedia); 0318 } else if (!strncmp(pStart, "webrtc", len)) { 0319 *pFilterOption = static_cast<FilterOption>(*pFilterOption | FOWebRTC); 0320 } else if (!strncmp(pStart, "generichide", len)) { 0321 *pFilterOption = static_cast<FilterOption>(*pFilterOption | FOGenericHide); 0322 } else if (!strncmp(pStart, "genericblock", len)) { 0323 *pFilterOption = static_cast<FilterOption>(*pFilterOption | FOGenericBlock); 0324 } else if (!strncmp(pStart, "empty", len)) { 0325 *pFilterOption = static_cast<FilterOption>(*pFilterOption | FOEmpty); 0326 } else if (!strncmp(pStart, "websocket", len)) { 0327 *pFilterOption = static_cast<FilterOption>(*pFilterOption | FOWebsocket); 0328 } else if (!strncmp(pStart, "important", len)) { 0329 *pFilterOption = static_cast<FilterOption>(*pFilterOption | FOImportant); 0330 } else if (!strncmp(pStart, "explicitcancel", len)) { 0331 *pFilterOption = static_cast<FilterOption>(*pFilterOption | FOExplicitCancel); 0332 } else { 0333 *pFilterOption = static_cast<FilterOption>(*pFilterOption | FOUnknown); 0334 std::string option(pStart, len); 0335 if (unknownOptions.find(option) == unknownOptions.end()) { 0336 std::cout << "Unrecognized filter option: " << option << std::endl; 0337 unknownOptions.insert(option); 0338 } 0339 } 0340 // Otherwise just ignore the option, maybe something new we don't support yet 0341 } 0342 0343 void Filter::parseOptions(const char *input) { 0344 filterOption = FONoFilterOption; 0345 antiFilterOption = FONoFilterOption; 0346 int startOffset = 0; 0347 int len = 0; 0348 const char *p = input; 0349 while (*p != '\0' && !isEndOfLine(*p)) { 0350 if (*p == ',') { 0351 parseOption(input + startOffset, len); 0352 startOffset += len + 1; 0353 len = -1; 0354 } 0355 p++; 0356 len++; 0357 } 0358 parseOption(input + startOffset, len); 0359 } 0360 0361 bool endsWith(const char *input, const char *sub, int inputLen, int subLen) { 0362 if (subLen > inputLen) { 0363 return false; 0364 } 0365 0366 int startCheckPos = inputLen - subLen; 0367 const char *p = input + startCheckPos; 0368 const char *q = sub; 0369 while (q != sub + subLen) { 0370 if (*(p++) != *(q++)) { 0371 return false; 0372 } 0373 } 0374 return true; 0375 } 0376 0377 bool isThirdPartyHost(const char *baseContextHost, int baseContextHostLen, 0378 const char *testHost, int testHostLen) { 0379 if (!endsWith(testHost, baseContextHost, testHostLen, baseContextHostLen)) { 0380 return true; 0381 } 0382 0383 // baseContextHost matches testHost exactly 0384 if (testHostLen == baseContextHostLen) { 0385 return false; 0386 } 0387 0388 char c = testHost[testHostLen - baseContextHostLen - 1]; 0389 return c != '.' && testHostLen != baseContextHostLen; 0390 } 0391 0392 bool Filter::hasUnsupportedOptions() const { 0393 return (filterOption & FOUnsupportedSoSkipCheck) != 0; 0394 } 0395 0396 bool Filter::contextDomainMatchesFilter(const char *contextDomain) { 0397 // If there are no context domains, then this filter can still apply 0398 // to all domains. 0399 if (getDomainCount(false) == 0 && getDomainCount(true) == 0) { 0400 return true; 0401 } 0402 0403 const char *p = contextDomain; 0404 // Start keeps track of the start of the last match 0405 // We do this to avoid extraTLD checks for rules. 0406 const char *start = contextDomain; 0407 size_t contextDomainLen = strlen(contextDomain); 0408 while (*p != '\0') { 0409 if (*p == '.') { 0410 const size_t domainLen = contextDomainLen - (start - contextDomain); 0411 if (containsDomain(start, domainLen, false)) { 0412 return true; 0413 } 0414 if (containsDomain(start, domainLen, true)) { 0415 return false; 0416 } 0417 // Set start to just past the period 0418 start = p + 1; 0419 } 0420 p++; 0421 } 0422 0423 // No exact match, if there are only anti domain filters, then this 0424 // rule applies. 0425 if (getDomainCount(false) == 0 && getDomainCount(true) > 0) { 0426 return true; 0427 } 0428 0429 // Otherwise there are only domains, and we haven't matched anything 0430 // so it's not a match as long as there is at least one domain which 0431 // is not an antiDomain. 0432 return getDomainCount(false) == 0; 0433 } 0434 0435 // Determines if there's a match based on the options, this doesn't 0436 // mean that the filter rule should be accepted, just that the filter rule 0437 // should be considered given the current context. 0438 // By specifying context params, you can filter out the number of rules 0439 // which are considered. 0440 bool Filter::matchesOptions(const char *input, FilterOption context, 0441 const char *contextDomain) { 0442 UNUSED(input); 0443 if (hasUnsupportedOptions()) { 0444 return false; 0445 } 0446 0447 // If the context is for a document, but the filter option isn't an explicit 0448 // document rule, then ignore it. 0449 if (!(filterOption & FODocument) && (context & FODocument)) { 0450 return false; 0451 } 0452 // Maybe the user of the library can't determine a context because they're 0453 // blocking a the HTTP level, don't block here because we don't have enough 0454 // information 0455 if (context != FONoFilterOption) { 0456 if ((filterOption & ~BehavioralFilterOnly) != FONoFilterOption 0457 && !(filterOption & FOResourcesOnly & context)) { 0458 return false; 0459 } 0460 0461 if ((antiFilterOption & ~BehavioralFilterOnly) != FONoFilterOption 0462 && (antiFilterOption & FOResourcesOnly & context)) { 0463 return false; 0464 } 0465 } else { 0466 // When there's no filter option specified for the context, the resource 0467 // type context is not known. In this case, never match against 0468 // rules with an explicit resource type specified. 0469 if (filterOption & FOResourcesOnly) { 0470 return false; 0471 } 0472 if (antiFilterOption & FOResourcesOnly) { 0473 return false; 0474 } 0475 } 0476 0477 // Domain options check 0478 if (domainList && contextDomain) { 0479 if (!contextDomainMatchesFilter(contextDomain)) { 0480 return false; 0481 } 0482 } 0483 0484 // If we're in the context of third-party site, then consider 0485 // third-party option checks 0486 if (context & (FOThirdParty | FONotThirdParty)) { 0487 if ((filterOption & FOThirdParty) && 0488 (context & FONotThirdParty)) { 0489 return false; 0490 } 0491 if ((antiFilterOption & FOThirdParty) && 0492 (context & FOThirdParty)) { 0493 return false; 0494 } 0495 } 0496 0497 return true; 0498 } 0499 0500 0501 const char * getNextPos(const char *input, char separator, const char *end) { 0502 const char *p = input; 0503 while (p != end && *p != '\0' && *p != separator) { 0504 p++; 0505 } 0506 return p; 0507 } 0508 0509 /** 0510 * Similar to str1.indexOf(filter, startingPos) but with 0511 * extra consideration to some ABP filter rules like ^. 0512 */ 0513 int indexOfFilter(const char* input, int inputLen, 0514 const char* filterBegin, const char *filterEnd) { 0515 const int filterLen = filterEnd - filterBegin; 0516 if (1 == filterLen && '^' == *filterBegin) return -1; 0517 if (filterLen > inputLen) { 0518 return -1; 0519 } 0520 0521 for (int i = 0; i < inputLen; ++i) { 0522 bool match = true; 0523 for (int j = 0; j < filterLen; ++j) { 0524 const char inputChar = input[i+j]; 0525 const char filterChar = filterBegin[j]; 0526 0527 if (filterChar != inputChar) { 0528 // ^abc^ matches both /abc/ and /abc 0529 if ('^' == filterChar && 0530 (isSeparatorChar(inputChar) || '\0' == inputChar)) { 0531 continue; 0532 } 0533 if ('\0' == inputChar) { 0534 return -1; 0535 } 0536 match = false; 0537 break; 0538 } 0539 } 0540 if (match) { 0541 return i; 0542 } 0543 } 0544 return -1; 0545 } 0546 0547 bool Filter::matches(const char *input, FilterOption contextOption, 0548 const char *contextDomain, BloomFilter *inputBloomFilter, 0549 const char *inputHost, int inputHostLen) { 0550 return matches(input, static_cast<int>(strlen(input)), contextOption, 0551 contextDomain, inputBloomFilter, inputHost, inputHostLen); 0552 } 0553 0554 bool Filter::matches(const char *input, int inputLen, 0555 FilterOption contextOption, const char *contextDomain, 0556 BloomFilter *inputBloomFilter, const char *inputHost, int inputHostLen) { 0557 if (!matchesOptions(input, contextOption, contextDomain)) { 0558 return false; 0559 } 0560 0561 if (!data) { 0562 return false; 0563 } 0564 0565 // We lazily figure out the dataLen only once 0566 if (dataLen == -1) { 0567 dataLen = static_cast<int>(strlen(data)); 0568 } 0569 0570 // Check for a regex match 0571 if (filterType & FTRegex) { 0572 #ifdef ENABLE_REGEX 0573 std::smatch m; 0574 std::regex e(data, std::regex_constants::extended); 0575 return std::regex_search(std::string(input), m, e); 0576 #else 0577 return false; 0578 #endif 0579 } 0580 0581 // Check for both left and right anchored 0582 if ((filterType & FTLeftAnchored) && (filterType & FTRightAnchored)) { 0583 return !strcmp(data, input); 0584 } 0585 0586 // Check for right anchored 0587 if (filterType & FTRightAnchored) { 0588 if (dataLen > inputLen) { 0589 return false; 0590 } 0591 0592 return !strcmp(input + (inputLen - dataLen), data); 0593 } 0594 0595 // Check for left anchored 0596 if (filterType & FTLeftAnchored) { 0597 return !strncmp(data, input, dataLen); 0598 } 0599 0600 // Check for domain name anchored 0601 if (filterType & FTHostAnchored) { 0602 int currentHostLen = inputHostLen; 0603 const char *currentHost = inputHost; 0604 if (!currentHostLen) { 0605 currentHost = getUrlHost(input, ¤tHostLen); 0606 } 0607 int hostLen = 0; 0608 if (host) { 0609 hostLen = this->hostLen == -1 ? 0610 static_cast<int>(strlen(host)) : this->hostLen; 0611 } 0612 0613 if (inputBloomFilter) { 0614 for (int i = 1; i < hostLen; i++) { 0615 if (!inputBloomFilter->exists(host + i - 1, 2)) { 0616 return false; 0617 } 0618 } 0619 } 0620 0621 if (isThirdPartyHost(host, hostLen, currentHost, currentHostLen)) { 0622 return false; 0623 } 0624 } 0625 0626 // Wildcard match comparison 0627 const char *filterPartStart = data; 0628 const char *filterPartEnd = getNextPos(data, '*', data + dataLen); 0629 int index = 0; 0630 while (filterPartStart != filterPartEnd || *filterPartStart == '*') { 0631 int filterPartLen = static_cast<int>(filterPartEnd - filterPartStart); 0632 0633 if (inputBloomFilter) { 0634 for (int i = 1; i < filterPartLen && filterPartEnd - 0635 filterPartStart - i >= 2; i++) { 0636 if (!isSeparatorChar(*(filterPartStart + i - 1)) && 0637 !isSeparatorChar(*(filterPartStart + i)) && 0638 !inputBloomFilter->exists(filterPartStart + i - 1, 2)) { 0639 return false; 0640 } 0641 } 0642 } 0643 0644 int newIndex = indexOfFilter(input + index, inputLen - index, 0645 filterPartStart, filterPartEnd); 0646 if (newIndex == -1) { 0647 return false; 0648 } 0649 newIndex += index; 0650 0651 if (filterPartEnd == data + dataLen || *filterPartEnd == '\0') { 0652 break; 0653 } 0654 const char *temp = getNextPos(filterPartEnd + 1, '*', data + dataLen); 0655 filterPartStart = filterPartEnd + 1; 0656 filterPartEnd = temp; 0657 index = newIndex + filterPartLen; 0658 if (*(input + newIndex) == '\0') { 0659 break; 0660 } 0661 } 0662 0663 return true; 0664 } 0665 0666 void Filter::parseDomains(const char* domainList) { 0667 if (!domainList || domainsParsed) { 0668 return; 0669 } 0670 int startOffset = 0; 0671 int len = 0; 0672 const char* p = domainList; 0673 while (true) { 0674 if (*p == '|' || *p == '\0') { 0675 const char *domain = domainList + startOffset; 0676 if (*domain == '~') { 0677 if (!antiDomains) { 0678 antiDomains = new HashSet<ContextDomain>(5, true); 0679 } 0680 antiDomains->Add(ContextDomain(domain + 1, len - 1)); 0681 } else { 0682 if (!domains) { 0683 domains = new HashSet<ContextDomain>(5, true); 0684 } 0685 domains->Add(ContextDomain(domain, len)); 0686 } 0687 startOffset += len + 1; 0688 len = -1; 0689 } 0690 if (*p == '\0') { 0691 break; 0692 } 0693 p++; 0694 len++; 0695 } 0696 domainsParsed = true; 0697 } 0698 0699 uint64_t Filter::hash() const { 0700 if (!host && !data) { 0701 return 0; 0702 } else if (host) { 0703 return h(host, hostLen == -1 ? static_cast<int>(strlen(host)) : hostLen); 0704 } 0705 0706 return h(data, dataLen); 0707 } 0708 0709 uint32_t Filter::Serialize(char *buffer) { 0710 uint32_t totalSize = 0; 0711 char sz[64]; 0712 uint32_t dataLenSize = 1 + snprintf(sz, sizeof(sz), 0713 "%x,%x,%x,%x", dataLen, filterType, 0714 filterOption, antiFilterOption); 0715 if (buffer) { 0716 memcpy(buffer + totalSize, sz, dataLenSize); 0717 } 0718 totalSize += dataLenSize; 0719 if (buffer) { 0720 memcpy(buffer + totalSize, data, dataLen); 0721 } 0722 totalSize += dataLen; 0723 0724 if (host) { 0725 int hostLen = this->hostLen == -1 ? 0726 static_cast<int>(strlen(host)) : this->hostLen; 0727 if (buffer) { 0728 memcpy(buffer + totalSize, host, hostLen + 1); 0729 } 0730 totalSize += hostLen; 0731 } 0732 totalSize += 1; 0733 0734 // Serialize any kind fo list based data here, as long as you can use a 0735 // separator between lists which is not \0. Currently using # 0736 if (tagLen > 0) { 0737 if (buffer) { 0738 buffer[totalSize] = '~'; 0739 buffer[totalSize+1] = '#'; 0740 memcpy(buffer + totalSize + 2, tag, tagLen); 0741 buffer[totalSize + 2 + tagLen] = ','; 0742 } 0743 totalSize += tagLen + 3; 0744 } 0745 if (domainList) { 0746 int domainListLen = static_cast<int>(strlen(domainList)); 0747 if (buffer) { 0748 memcpy(buffer + totalSize, domainList, domainListLen + 1); 0749 } 0750 totalSize += domainListLen; 0751 } 0752 totalSize += 1; 0753 0754 return totalSize; 0755 } 0756 0757 bool hasNewlineBefore(char *buffer, uint32_t bufferSize) { 0758 char *p = buffer; 0759 for (uint32_t i = 0; i < bufferSize; ++i) { 0760 if (*p == '\0') 0761 return true; 0762 p++; 0763 } 0764 return false; 0765 } 0766 0767 uint32_t Filter::Deserialize(char *buffer, uint32_t bufferSize) { 0768 dataLen = 0; 0769 if (!hasNewlineBefore(buffer, bufferSize)) { 0770 return 0; 0771 } 0772 sscanf(buffer, "%x,%x,%x,%x", (unsigned int*)&dataLen, (unsigned int*)&filterType, 0773 (unsigned int*)&filterOption, (unsigned int*)&antiFilterOption); 0774 uint32_t consumed = static_cast<uint32_t>(strlen(buffer)) + 1; 0775 if (consumed + dataLen >= bufferSize) { 0776 return 0; 0777 } 0778 0779 data = buffer + consumed; 0780 consumed += dataLen; 0781 0782 uint32_t hostLen = static_cast<uint32_t>(strlen(buffer + consumed)); 0783 if (hostLen != 0) { 0784 host = buffer + consumed; 0785 } else { 0786 host = nullptr; 0787 } 0788 consumed += hostLen + 1; 0789 0790 // If the domain section starts with a # then we're in a tag 0791 // block. 0792 if (buffer[consumed] == '~' && buffer[consumed + 1] == '#') { 0793 consumed += 2; 0794 tag = buffer + consumed; 0795 tagLen = 0; 0796 while (buffer[consumed + tagLen] != '\0') { 0797 if (buffer[consumed + tagLen] == ',') { 0798 consumed += tagLen + 1; 0799 break; 0800 } 0801 tagLen++; 0802 } 0803 } 0804 0805 uint32_t listSectionLen = static_cast<uint32_t>(strlen(buffer + consumed)); 0806 if (listSectionLen != 0) { 0807 domainList = buffer + consumed; 0808 } else { 0809 domainList = nullptr; 0810 } 0811 consumed += listSectionLen + 1; 0812 0813 borrowed_data = true; 0814 domainsParsed = false; 0815 0816 if (domains) { 0817 delete domains; 0818 domains = nullptr; 0819 } 0820 if (antiDomains) { 0821 delete antiDomains; 0822 antiDomains = nullptr; 0823 } 0824 0825 return consumed; 0826 }