File indexing completed on 2024-11-24 04:54:34
0001 /* 0002 SPDX-License-Identifier: MPL-2.0 0003 */ 0004 0005 /* Copyright (c) 2015 Brian R. Bondy. Distributed under the MPL2 license. 0006 * This Source Code Form is subject to the terms of the Mozilla Public 0007 * License, v. 2.0. If a copy of the MPL was not distributed with this 0008 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 0009 0010 #include <string.h> 0011 #include "BloomFilter.h" 0012 0013 BloomFilter::BloomFilter(unsigned int bitsPerElement, 0014 unsigned int estimatedNumElements, HashFn *hashFns, int numHashFns) : 0015 hashFns(nullptr), numHashFns(0), byteBufferSize(0), buffer(nullptr) { 0016 this->hashFns = hashFns; 0017 this->numHashFns = numHashFns; 0018 lastHashes = new uint64_t[numHashFns]; 0019 byteBufferSize = bitsPerElement * estimatedNumElements / 8 + 1; 0020 bitBufferSize = byteBufferSize * 8; 0021 buffer = new char[byteBufferSize]; 0022 memset(buffer, 0, byteBufferSize); 0023 } 0024 0025 // Constructs a BloomFilter by copying the specified buffer and number of bytes 0026 BloomFilter::BloomFilter(const char *buffer, int byteBufferSize, 0027 HashFn *hashFns, int numHashFns) : 0028 hashFns(nullptr), numHashFns(0), byteBufferSize(0), buffer(nullptr) { 0029 this->hashFns = hashFns; 0030 this->numHashFns = numHashFns; 0031 lastHashes = new uint64_t[numHashFns]; 0032 this->byteBufferSize = byteBufferSize; 0033 bitBufferSize = byteBufferSize * 8; 0034 this->buffer = new char[byteBufferSize]; 0035 memcpy(this->buffer, buffer, byteBufferSize); 0036 } 0037 0038 BloomFilter::~BloomFilter() { 0039 if (buffer) { 0040 delete[] buffer; 0041 } 0042 if (lastHashes) { 0043 delete[] lastHashes; 0044 } 0045 } 0046 0047 void BloomFilter::setBit(unsigned int bitLocation) { 0048 buffer[bitLocation / 8] |= 1 << bitLocation % 8; 0049 } 0050 0051 bool BloomFilter::isBitSet(unsigned int bitLocation) { 0052 return !!(buffer[bitLocation / 8] & 1 << bitLocation % 8); 0053 } 0054 0055 void BloomFilter::add(const char *input, int len) { 0056 for (int j = 0; j < numHashFns; j++) { 0057 setBit(hashFns[j](input, len) % bitBufferSize); 0058 } 0059 } 0060 0061 void BloomFilter::add(const char *sz) { 0062 add(sz, static_cast<int>(strlen(sz))); 0063 } 0064 0065 bool BloomFilter::exists(const char *input, int len) { 0066 bool allSet = true; 0067 for (int j = 0; j < numHashFns; j++) { 0068 allSet = allSet && isBitSet(hashFns[j](input, len) % bitBufferSize); 0069 } 0070 return allSet; 0071 } 0072 0073 bool BloomFilter::exists(const char *sz) { 0074 return exists(sz, static_cast<int>(strlen(sz))); 0075 } 0076 0077 void BloomFilter::getHashesForCharCodes(const char *input, int inputLen, 0078 uint64_t *lastHashes, uint64_t *newHashes, unsigned char lastCharCode) { 0079 for (int i = 0; i < numHashFns; i++) { 0080 if (lastHashes) { 0081 *(newHashes + i) = hashFns[i](input, inputLen, 0082 lastCharCode, *(lastHashes+i)); 0083 } else { 0084 *(newHashes + i) = hashFns[i](input, inputLen); 0085 } 0086 } 0087 } 0088 0089 bool BloomFilter::substringExists(const char *data, int dataLen, 0090 int substringLength) { 0091 unsigned char lastCharCode = 0; 0092 for (int i = 0; i < dataLen - substringLength + 1; i++) { 0093 getHashesForCharCodes(data + i, substringLength, i == 0 0094 ? nullptr : lastHashes, lastHashes, lastCharCode); 0095 bool allSet = true; 0096 for (int j = 0; j < numHashFns; j++) { 0097 allSet = allSet && isBitSet(lastHashes[j] % bitBufferSize); 0098 } 0099 if (allSet) { 0100 return true; 0101 } 0102 lastCharCode = data[i]; 0103 } 0104 return false; 0105 } 0106 0107 bool BloomFilter::substringExists(const char *data, int substringLength) { 0108 return substringExists(data, static_cast<int>(strlen(data)), substringLength); 0109 } 0110 0111 void BloomFilter::clear() { 0112 memset(buffer, 0, byteBufferSize); 0113 }