File indexing completed on 2024-11-24 04:54:35

0001 /*
0002     SPDX-License-Identifier: MPL-2.0
0003 */
0004 
0005 /* Copyright (c) 2015 Brian R. Bondy. Distributed under the MPL2 license.
0006  * This Source Code Form is subject to the terms of the Mozilla Public
0007  * License, v. 2.0. If a copy of the MPL was not distributed with this
0008  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
0009 
0010 #include <string.h>
0011 #include <stdlib.h>
0012 #include <fstream>
0013 #include <sstream>
0014 #include <string>
0015 #include <cerrno>
0016 #include <iostream>
0017 #include <set>
0018 #include "./CppUnitLite/TestHarness.h"
0019 #include "./CppUnitLite/Test.h"
0020 #include "./BloomFilter.h"
0021 #include "./util.h"
0022 
0023 TEST(BloomFilter, isBitSetSetBit) {
0024   BloomFilter b(10, 3);
0025 
0026   // First bit in a byte
0027   CHECK(!b.isBitSet(0))
0028   b.setBit(0);
0029   CHECK(b.isBitSet(0))
0030 
0031   // Last bit in a byte
0032   CHECK(!b.isBitSet(7))
0033   b.setBit(7);
0034   CHECK(b.isBitSet(7))
0035   for (int i = 1; i <= 6; i++) {
0036     CHECK(!b.isBitSet(i));
0037   }
0038   CHECK(b.isBitSet(0));
0039 
0040   // Second bit in non first byte
0041   CHECK(!b.isBitSet(9))
0042   b.setBit(9);
0043   CHECK(b.isBitSet(9))
0044   CHECK(!b.isBitSet(1));
0045 }
0046 
0047 // Generates a simple hash function for the specified prime
0048 TEST(BloomFilter, SimpleHashFn) {
0049   HashFn h(2);
0050   uint64_t hash = h("hi", 2);
0051   CHECK(hash == (static_cast<int>('h')) * pow(2, 1) +
0052       static_cast<int>('i') * pow(2, 0));
0053 
0054   {
0055     HashFn h(19, false);
0056     HashFn h2(19, true);
0057     const char *sz = "facebook.com";
0058     const char *sz2 = "abcde";
0059     LONGS_EQUAL(h(sz, strlen(sz)), 12510474367240317);
0060     LONGS_EQUAL(h2(sz, strlen(sz)), 12510474367240317);
0061     LONGS_EQUAL(h(sz2, strlen(sz2)), 13351059);
0062     LONGS_EQUAL(h2(sz2, strlen(sz2)), 13351059);
0063   }
0064 }
0065 
0066 // Detects when elements are in the set and not in the set
0067 TEST(BloomFilter, Basic) {
0068   BloomFilter b;
0069   b.add("Brian");
0070   b.add("Ronald");
0071   b.add("Bondy");
0072   CHECK(b.exists("Brian"));
0073   CHECK(!b.exists("Brian2"));
0074   CHECK(!b.exists("Bria"));
0075 
0076   CHECK(b.exists("Ronald"));
0077   CHECK(!b.exists("Ronald2"));
0078   CHECK(!b.exists("onald2"));
0079 
0080   CHECK(b.exists("Bondy"));
0081   CHECK(!b.exists("BrianRonaldBondy"));
0082   CHECK(!b.exists("RonaldBondy"));
0083 }
0084 
0085 void genRandomBuffer(char *s, const int len) {
0086   for (int i = 0; i < len; ++i) {
0087     s[i] = rand() % 256;  // NOLINT
0088   }
0089   s[len - 1] = 0;
0090 }
0091 
0092 // Can handle long strings
0093 TEST(BloomFilter, BasicLongStrings) {
0094   const int kBufSize = 20000;
0095   char id1[kBufSize];
0096   char id2[kBufSize];
0097   char id3[kBufSize];
0098   genRandomBuffer(id1, kBufSize);
0099   genRandomBuffer(id2, kBufSize);
0100   genRandomBuffer(id3, kBufSize);
0101 
0102   HashFn h1 = HashFn(0);
0103   HashFn h2 = HashFn(1023);
0104   HashFn hashFns[2] = {h1, h2};
0105 
0106   BloomFilter b(10, 5000, hashFns, sizeof(hashFns)/sizeof(hashFns[0]));
0107 
0108   b.add(id1, kBufSize);
0109   b.add(id2, kBufSize);
0110   CHECK(b.exists(id1, kBufSize));
0111   CHECK(b.exists(id2, kBufSize));
0112   CHECK(!b.exists("hello"));
0113   CHECK(!b.exists(id3, kBufSize));
0114 }
0115 
0116 // supports substringExists
0117 TEST(BloomFilter, substringExists) {
0118   BloomFilter b;
0119   b.add("abc");
0120   b.add("hello");
0121   b.add("world");
0122   CHECK(b.substringExists("hello", 5));
0123   // Only substrings of length 5 should exist in the bloom filter
0124   CHECK(!b.substringExists("ell", 3));
0125   CHECK(b.substringExists("wow ok hello!!!!", 5));
0126   CHECK(!b.substringExists("he!lloworl!d", 5));
0127 }
0128 
0129 // Can return false positives for a saturated set
0130 TEST(BloomFilter, falsePositives) {
0131   BloomFilter b(2, 2);
0132   char sz[64];
0133   for (int i = 0; i < 100; i++) {
0134     snprintf(sz, sizeof(sz), "test-%i", i);
0135     b.add(sz);
0136   }
0137   CHECK(b.exists("test"));
0138 }
0139 
0140 // It cannot return false negatives
0141 TEST(BloomFilter, noFalseNegatives) {
0142   BloomFilter b;
0143   char sz[64];
0144   for (int i = 0; i < 100000; i++) {
0145     snprintf(sz, sizeof(sz), "test-%i", i);
0146     b.add(sz);
0147   }
0148   for (int i = 0; i < 100000; i++) {
0149     snprintf(sz, sizeof(sz), "test-%i", i);
0150     CHECK(b.exists(sz));
0151   }
0152 }
0153 
0154 // Works with some live examples
0155 TEST(BloomFilter, liveExamples) {
0156   BloomFilter b;
0157   b.add("googlesy");
0158   const char *url1 =
0159     "http://tpc.googlesyndication.com/safeframe/1-0-2/html/container.html#"
0160     "xpc=sf-gdn-exp-2&p=http%3A//slashdot.org";
0161   const char *url2 =
0162     "https://tpc.googlesyndication.com/pagead/gadgets/suggestion_autolayout_V2/"
0163     "suggestion_autolayout_V2.html#t=15174732506449260991&p=http%3A%2F%2F"
0164     "tpc.googlesyndication.com";
0165   CHECK(b.substringExists("googlesy", 8));
0166   CHECK(b.substringExists(url1, 8));
0167   CHECK(b.substringExists(url2, 8));
0168 }
0169 
0170 // Works by transfering a buffer
0171 TEST(BloomFilter, transferingBuffer) {
0172   BloomFilter b;
0173   b.add("Brian");
0174   b.add("Ronald");
0175   b.add("Bondy");
0176 
0177   BloomFilter b2(b.getBuffer(), b.getByteBufferSize());
0178   CHECK(b2.exists("Brian"));
0179   CHECK(!b2.exists("Brian2"));
0180   CHECK(!b2.exists("Bria"));
0181 
0182   CHECK(b2.exists("Ronald"));
0183   CHECK(!b2.exists("Ronald2"));
0184   CHECK(!b2.exists("onald2"));
0185 
0186   CHECK(b2.exists("Bondy"));
0187   CHECK(!b2.exists("BrianRonaldBondy"));
0188   CHECK(!b2.exists("RonaldBondy"));
0189 }
0190 
0191 // Works by transfering a buffer
0192 TEST(BloomFilter, clearBloomFilter) {
0193   BloomFilter b;
0194   b.add("Brian");
0195   CHECK(b.exists("Brian"));
0196   b.clear();
0197   CHECK(!b.exists("Brian"));
0198 }