12 void *adu_calloc(size_t size)
14 void *ret = malloc(size);
19 #define INFO_LOG printf
22 static inline uint64_t filter_bits(struct bloom *b)
24 return 1ULL << b->order;
28 * SuperFastHash, by Paul Hsieh.
29 * http://www.azillionmonkeys.com/qed/hash.html
32 #if (defined(__GNUC__) && defined(__i386__))
33 #define get16bits(d) (*((const uint16_t *) (d)))
35 #define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8)\
36 +(uint32_t)(((const uint8_t *)(d))[0]) )
40 static uint32_t super_fast_hash(const uint8_t *data, uint32_t len, uint32_t hash)
47 for (;len > 0; len--) {
48 hash += get16bits (data);
49 tmp = (get16bits (data+2) << 11) ^ hash;
50 hash = (hash << 16) ^ tmp;
51 data += 2*sizeof (uint16_t);
55 /* Handle end cases */
58 hash += get16bits (data);
60 hash ^= data[sizeof (uint16_t)] << 18;
64 hash += get16bits (data);
73 /* Force "avalanching" of final 127 bits */
83 static int test_and_set_bit(uint64_t bitnum, struct bloom *b)
85 uint64_t byte = bitnum / 8;
86 uint8_t bit = 1 << (bitnum % 8);
87 int ret = b->filter[byte] & bit;
89 b->filter[byte] |= bit;
96 int bloom_test_and_insert(const uint8_t *data, size_t len, struct bloom *b)
99 uint32_t h = 0xb11924e1; /* some arbitrary value */
101 for (i = 1; i < b->num_hash_functions; i++) {
102 uint64_t bitnum = super_fast_hash(data, len, h);
105 bitnum = bitnum << 32;
106 h = super_fast_hash(data, len, h);
109 bitnum &= filter_bits(b) - 1;
110 ret |= !test_and_set_bit(bitnum, b);
117 int bloom_test_and_insert_string(const char *str, struct bloom *b)
119 uint32_t len = strlen(str);
121 return bloom_test_and_insert((const uint8_t *)str, len, b);
124 void bloom_free(struct bloom *b)
132 int bloom_init(unsigned order, unsigned num_hash_functions,
133 struct bloom **result)
135 struct bloom *b = adu_calloc(sizeof(*b));
137 INFO_LOG("allocating bloom filter (order: %u, %u hash functions)\n",
138 order, num_hash_functions);
140 b->num_hash_functions = num_hash_functions;
141 b->filter = adu_calloc(filter_bits(b) / 8);
148 void add_stdin(struct bloom *b)
151 unsigned false_positives = 0;
153 while (fgets(buf, sizeof(buf) - 1, stdin)) {
154 size_t len = strlen(buf);
157 if (buf[len - 1] == '\n')
159 if (bloom_test_and_insert_string(buf, b))
162 INFO_LOG("filter contains %llu entries\n", b->num_entries);
163 printf("%u possible false positives\n", false_positives);
166 int main(int argc, char **argv)
171 printf("usage: $0 j k\n");
172 printf("j: set bloom filter size to m=2^j bits\n");
173 printf("k: # of hash functions to use\n");
176 bloom_init(atoi(argv[1]), atoi(argv[2]), &b);
178 INFO_LOG("%u%% of bits are set\n", (unsigned)
179 (b->num_set_bits * 100ULL / filter_bits(b)));
183 #endif /* TEST_BLOOM */