File indexing completed on 2024-11-17 04:55:18

0001 /*
0002     SPDX-License-Identifier: MPL-2.0
0003 */
0004 
0005 /* Copyright (c) 2015 Brian R. Bondy. Distributed under the MPL2 license.
0006  * This Source Code Form is subject to the terms of the Mozilla Public
0007  * License, v. 2.0. If a copy of the MPL was not distributed with this
0008  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
0009 
0010 #include <string.h>
0011 #include "BloomFilter.h"
0012 
0013 BloomFilter::BloomFilter(unsigned int bitsPerElement,
0014     unsigned int estimatedNumElements, HashFn *hashFns, int numHashFns) :
0015     hashFns(nullptr), numHashFns(0), byteBufferSize(0), buffer(nullptr) {
0016   this->hashFns = hashFns;
0017   this->numHashFns = numHashFns;
0018   lastHashes = new uint64_t[numHashFns];
0019   byteBufferSize = bitsPerElement * estimatedNumElements / 8 + 1;
0020   bitBufferSize = byteBufferSize * 8;
0021   buffer = new char[byteBufferSize];
0022   memset(buffer, 0, byteBufferSize);
0023 }
0024 
0025 // Constructs a BloomFilter by copying the specified buffer and number of bytes
0026 BloomFilter::BloomFilter(const char *buffer, int byteBufferSize,
0027     HashFn *hashFns, int numHashFns) :
0028     hashFns(nullptr), numHashFns(0), byteBufferSize(0), buffer(nullptr) {
0029   this->hashFns = hashFns;
0030   this->numHashFns = numHashFns;
0031   lastHashes = new uint64_t[numHashFns];
0032   this->byteBufferSize = byteBufferSize;
0033   bitBufferSize = byteBufferSize * 8;
0034   this->buffer = new char[byteBufferSize];
0035   memcpy(this->buffer, buffer, byteBufferSize);
0036 }
0037 
0038 BloomFilter::~BloomFilter() {
0039   if (buffer) {
0040     delete[] buffer;
0041   }
0042   if (lastHashes) {
0043     delete[] lastHashes;
0044   }
0045 }
0046 
0047 void BloomFilter::setBit(unsigned int bitLocation) {
0048   buffer[bitLocation / 8] |= 1 << bitLocation % 8;
0049 }
0050 
0051 bool BloomFilter::isBitSet(unsigned int bitLocation) {
0052   return !!(buffer[bitLocation / 8] & 1 << bitLocation % 8);
0053 }
0054 
0055 void BloomFilter::add(const char *input, int len) {
0056   for (int j = 0; j < numHashFns; j++) {
0057     setBit(hashFns[j](input, len) % bitBufferSize);
0058   }
0059 }
0060 
0061 void BloomFilter::add(const char *sz) {
0062   add(sz, static_cast<int>(strlen(sz)));
0063 }
0064 
0065 bool BloomFilter::exists(const char *input, int len) {
0066   bool allSet = true;
0067   for (int j = 0; j < numHashFns; j++) {
0068     allSet = allSet && isBitSet(hashFns[j](input, len) % bitBufferSize);
0069   }
0070   return allSet;
0071 }
0072 
0073 bool BloomFilter::exists(const char *sz) {
0074   return exists(sz, static_cast<int>(strlen(sz)));
0075 }
0076 
0077 void BloomFilter::getHashesForCharCodes(const char *input, int inputLen,
0078     uint64_t *lastHashes, uint64_t *newHashes, unsigned char lastCharCode) {
0079   for (int i = 0; i < numHashFns; i++) {
0080     if (lastHashes) {
0081       *(newHashes + i) = hashFns[i](input, inputLen,
0082           lastCharCode, *(lastHashes+i));
0083     } else {
0084       *(newHashes + i) = hashFns[i](input, inputLen);
0085     }
0086   }
0087 }
0088 
0089 bool BloomFilter::substringExists(const char *data, int dataLen,
0090     int substringLength) {
0091   unsigned char lastCharCode = 0;
0092   for (int i = 0; i < dataLen - substringLength + 1; i++) {
0093     getHashesForCharCodes(data + i, substringLength, i == 0
0094         ? nullptr : lastHashes, lastHashes, lastCharCode);
0095     bool allSet = true;
0096     for (int j = 0; j < numHashFns; j++) {
0097       allSet = allSet && isBitSet(lastHashes[j] % bitBufferSize);
0098     }
0099     if (allSet) {
0100       return true;
0101     }
0102     lastCharCode = data[i];
0103   }
0104   return false;
0105 }
0106 
0107 bool BloomFilter::substringExists(const char *data, int substringLength) {
0108   return substringExists(data, static_cast<int>(strlen(data)), substringLength);
0109 }
0110 
0111 void BloomFilter::clear() {
0112   memset(buffer, 0, byteBufferSize);
0113 }