bloom: always use a larger minimum size for bloom filter
[fio.git] / lib / bloom.c
CommitLineData
652ae149
JA
1#include <stdlib.h>
2#include <inttypes.h>
3
4#include "bloom.h"
5#include "../hash.h"
265c0032 6#include "../minmax.h"
652ae149
JA
7
8struct bloom {
9 uint64_t nentries;
10
11 uint32_t *map;
12};
13
14#define BITS_PER_INDEX (sizeof(uint32_t) * 8)
15#define BITS_INDEX_MASK (BITS_PER_INDEX - 1)
16
17static unsigned int jhash_init[] = { 0, 0x12db635, 0x2a4a53 };
18#define N_HASHES 3
19
265c0032
JA
20#define MIN_ENTRIES 1073741824UL
21
652ae149
JA
22struct bloom *bloom_new(uint64_t entries)
23{
24 struct bloom *b;
25 size_t no_uints;
26
27 b = malloc(sizeof(*b));
28 b->nentries = entries;
29 no_uints = (entries + BITS_PER_INDEX - 1) / BITS_PER_INDEX;
265c0032 30 no_uints = max((unsigned long) no_uints, MIN_ENTRIES);
652ae149
JA
31 b->map = calloc(no_uints, sizeof(uint32_t));
32 if (!b->map) {
33 free(b);
34 return NULL;
35 }
36
37 return b;
38}
39
40void bloom_free(struct bloom *b)
41{
42 free(b->map);
43 free(b);
44}
45
46static int __bloom_check(struct bloom *b, uint32_t *data, unsigned int nwords,
47 int set)
48{
49 uint32_t hashes[N_HASHES];
50 int i, was_set;
51
52 for (i = 0; i < N_HASHES; i++)
53 hashes[i] = jhash(data, nwords, jhash_init[i]) % b->nentries;
54
55 was_set = 0;
56 for (i = 0; i < N_HASHES; i++) {
57 const unsigned int index = hashes[i] / BITS_PER_INDEX;
58 const unsigned int bit = hashes[i] & BITS_INDEX_MASK;
59
60 if (b->map[index] & (1U << bit))
61 was_set++;
62 if (set)
63 b->map[index] |= 1U << bit;
64 }
65
66 return was_set == N_HASHES;
67}
68
69int bloom_check(struct bloom *b, uint32_t *data, unsigned int nwords)
70{
71 return __bloom_check(b, data, nwords, 0);
72}
73
74int bloom_set(struct bloom *b, uint32_t *data, unsigned int nwords)
75{
76 return __bloom_check(b, data, nwords, 1);
77}