File indexing completed on 2024-11-24 04:54:35
0001 /* 0002 SPDX-License-Identifier: MPL-2.0 0003 */ 0004 0005 /* Copyright (c) 2015 Brian R. Bondy. Distributed under the MPL2 license. 0006 * This Source Code Form is subject to the terms of the Mozilla Public 0007 * License, v. 2.0. If a copy of the MPL was not distributed with this 0008 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 0009 0010 #include <string.h> 0011 #include <stdlib.h> 0012 #include <fstream> 0013 #include <sstream> 0014 #include <string> 0015 #include <cerrno> 0016 #include <iostream> 0017 #include <set> 0018 #include "./CppUnitLite/TestHarness.h" 0019 #include "./CppUnitLite/Test.h" 0020 #include "./BloomFilter.h" 0021 #include "./util.h" 0022 0023 TEST(BloomFilter, isBitSetSetBit) { 0024 BloomFilter b(10, 3); 0025 0026 // First bit in a byte 0027 CHECK(!b.isBitSet(0)) 0028 b.setBit(0); 0029 CHECK(b.isBitSet(0)) 0030 0031 // Last bit in a byte 0032 CHECK(!b.isBitSet(7)) 0033 b.setBit(7); 0034 CHECK(b.isBitSet(7)) 0035 for (int i = 1; i <= 6; i++) { 0036 CHECK(!b.isBitSet(i)); 0037 } 0038 CHECK(b.isBitSet(0)); 0039 0040 // Second bit in non first byte 0041 CHECK(!b.isBitSet(9)) 0042 b.setBit(9); 0043 CHECK(b.isBitSet(9)) 0044 CHECK(!b.isBitSet(1)); 0045 } 0046 0047 // Generates a simple hash function for the specified prime 0048 TEST(BloomFilter, SimpleHashFn) { 0049 HashFn h(2); 0050 uint64_t hash = h("hi", 2); 0051 CHECK(hash == (static_cast<int>('h')) * pow(2, 1) + 0052 static_cast<int>('i') * pow(2, 0)); 0053 0054 { 0055 HashFn h(19, false); 0056 HashFn h2(19, true); 0057 const char *sz = "facebook.com"; 0058 const char *sz2 = "abcde"; 0059 LONGS_EQUAL(h(sz, strlen(sz)), 12510474367240317); 0060 LONGS_EQUAL(h2(sz, strlen(sz)), 12510474367240317); 0061 LONGS_EQUAL(h(sz2, strlen(sz2)), 13351059); 0062 LONGS_EQUAL(h2(sz2, strlen(sz2)), 13351059); 0063 } 0064 } 0065 0066 // Detects when elements are in the set and not in the set 0067 TEST(BloomFilter, Basic) { 0068 BloomFilter b; 0069 b.add("Brian"); 0070 b.add("Ronald"); 0071 b.add("Bondy"); 0072 CHECK(b.exists("Brian")); 0073 CHECK(!b.exists("Brian2")); 0074 CHECK(!b.exists("Bria")); 0075 0076 CHECK(b.exists("Ronald")); 0077 CHECK(!b.exists("Ronald2")); 0078 CHECK(!b.exists("onald2")); 0079 0080 CHECK(b.exists("Bondy")); 0081 CHECK(!b.exists("BrianRonaldBondy")); 0082 CHECK(!b.exists("RonaldBondy")); 0083 } 0084 0085 void genRandomBuffer(char *s, const int len) { 0086 for (int i = 0; i < len; ++i) { 0087 s[i] = rand() % 256; // NOLINT 0088 } 0089 s[len - 1] = 0; 0090 } 0091 0092 // Can handle long strings 0093 TEST(BloomFilter, BasicLongStrings) { 0094 const int kBufSize = 20000; 0095 char id1[kBufSize]; 0096 char id2[kBufSize]; 0097 char id3[kBufSize]; 0098 genRandomBuffer(id1, kBufSize); 0099 genRandomBuffer(id2, kBufSize); 0100 genRandomBuffer(id3, kBufSize); 0101 0102 HashFn h1 = HashFn(0); 0103 HashFn h2 = HashFn(1023); 0104 HashFn hashFns[2] = {h1, h2}; 0105 0106 BloomFilter b(10, 5000, hashFns, sizeof(hashFns)/sizeof(hashFns[0])); 0107 0108 b.add(id1, kBufSize); 0109 b.add(id2, kBufSize); 0110 CHECK(b.exists(id1, kBufSize)); 0111 CHECK(b.exists(id2, kBufSize)); 0112 CHECK(!b.exists("hello")); 0113 CHECK(!b.exists(id3, kBufSize)); 0114 } 0115 0116 // supports substringExists 0117 TEST(BloomFilter, substringExists) { 0118 BloomFilter b; 0119 b.add("abc"); 0120 b.add("hello"); 0121 b.add("world"); 0122 CHECK(b.substringExists("hello", 5)); 0123 // Only substrings of length 5 should exist in the bloom filter 0124 CHECK(!b.substringExists("ell", 3)); 0125 CHECK(b.substringExists("wow ok hello!!!!", 5)); 0126 CHECK(!b.substringExists("he!lloworl!d", 5)); 0127 } 0128 0129 // Can return false positives for a saturated set 0130 TEST(BloomFilter, falsePositives) { 0131 BloomFilter b(2, 2); 0132 char sz[64]; 0133 for (int i = 0; i < 100; i++) { 0134 snprintf(sz, sizeof(sz), "test-%i", i); 0135 b.add(sz); 0136 } 0137 CHECK(b.exists("test")); 0138 } 0139 0140 // It cannot return false negatives 0141 TEST(BloomFilter, noFalseNegatives) { 0142 BloomFilter b; 0143 char sz[64]; 0144 for (int i = 0; i < 100000; i++) { 0145 snprintf(sz, sizeof(sz), "test-%i", i); 0146 b.add(sz); 0147 } 0148 for (int i = 0; i < 100000; i++) { 0149 snprintf(sz, sizeof(sz), "test-%i", i); 0150 CHECK(b.exists(sz)); 0151 } 0152 } 0153 0154 // Works with some live examples 0155 TEST(BloomFilter, liveExamples) { 0156 BloomFilter b; 0157 b.add("googlesy"); 0158 const char *url1 = 0159 "http://tpc.googlesyndication.com/safeframe/1-0-2/html/container.html#" 0160 "xpc=sf-gdn-exp-2&p=http%3A//slashdot.org"; 0161 const char *url2 = 0162 "https://tpc.googlesyndication.com/pagead/gadgets/suggestion_autolayout_V2/" 0163 "suggestion_autolayout_V2.html#t=15174732506449260991&p=http%3A%2F%2F" 0164 "tpc.googlesyndication.com"; 0165 CHECK(b.substringExists("googlesy", 8)); 0166 CHECK(b.substringExists(url1, 8)); 0167 CHECK(b.substringExists(url2, 8)); 0168 } 0169 0170 // Works by transfering a buffer 0171 TEST(BloomFilter, transferingBuffer) { 0172 BloomFilter b; 0173 b.add("Brian"); 0174 b.add("Ronald"); 0175 b.add("Bondy"); 0176 0177 BloomFilter b2(b.getBuffer(), b.getByteBufferSize()); 0178 CHECK(b2.exists("Brian")); 0179 CHECK(!b2.exists("Brian2")); 0180 CHECK(!b2.exists("Bria")); 0181 0182 CHECK(b2.exists("Ronald")); 0183 CHECK(!b2.exists("Ronald2")); 0184 CHECK(!b2.exists("onald2")); 0185 0186 CHECK(b2.exists("Bondy")); 0187 CHECK(!b2.exists("BrianRonaldBondy")); 0188 CHECK(!b2.exists("RonaldBondy")); 0189 } 0190 0191 // Works by transfering a buffer 0192 TEST(BloomFilter, clearBloomFilter) { 0193 BloomFilter b; 0194 b.add("Brian"); 0195 CHECK(b.exists("Brian")); 0196 b.clear(); 0197 CHECK(!b.exists("Brian")); 0198 }