dedupe: exit gracefully if device/file open fails
[fio.git] / lib / bloom.c
CommitLineData
652ae149
JA
1#include <stdlib.h>
2#include <inttypes.h>
3
4#include "bloom.h"
5#include "../hash.h"
265c0032 6#include "../minmax.h"
899834b5
JA
7#include "../crc/xxhash.h"
8#include "../crc/crc32c.h"
652ae149
JA
9
10struct bloom {
11 uint64_t nentries;
12
13 uint32_t *map;
14};
15
16#define BITS_PER_INDEX (sizeof(uint32_t) * 8)
17#define BITS_INDEX_MASK (BITS_PER_INDEX - 1)
18
899834b5
JA
19struct bloom_hash {
20 unsigned int seed;
21 uint32_t (*fn)(const void *, uint32_t, uint32_t);
22};
23
24static uint32_t b_crc32c(const void *buf, uint32_t len, uint32_t seed)
25{
26 return fio_crc32c(buf, len);
27}
28
29struct bloom_hash hashes[] = {
30 {
31 .seed = 0x8989,
32 .fn = jhash,
33 },
34 {
35 .seed = 0x8989,
36 .fn = XXH32,
37 },
38 {
39 .seed = 0,
40 .fn = b_crc32c,
41 },
42};
43
652ae149
JA
44#define N_HASHES 3
45
265c0032
JA
46#define MIN_ENTRIES 1073741824UL
47
652ae149
JA
48struct bloom *bloom_new(uint64_t entries)
49{
50 struct bloom *b;
51 size_t no_uints;
52
899834b5
JA
53 crc32c_intel_probe();
54
652ae149
JA
55 b = malloc(sizeof(*b));
56 b->nentries = entries;
57 no_uints = (entries + BITS_PER_INDEX - 1) / BITS_PER_INDEX;
265c0032 58 no_uints = max((unsigned long) no_uints, MIN_ENTRIES);
652ae149
JA
59 b->map = calloc(no_uints, sizeof(uint32_t));
60 if (!b->map) {
61 free(b);
62 return NULL;
63 }
64
65 return b;
66}
67
68void bloom_free(struct bloom *b)
69{
70 free(b->map);
71 free(b);
72}
73
74static int __bloom_check(struct bloom *b, uint32_t *data, unsigned int nwords,
75 int set)
76{
899834b5 77 uint32_t hash[N_HASHES];
652ae149
JA
78 int i, was_set;
79
899834b5
JA
80 for (i = 0; i < N_HASHES; i++) {
81 hash[i] = hashes[i].fn(data, nwords, hashes[i].seed);
82 hash[i] = hash[i] % b->nentries;
83 }
652ae149
JA
84
85 was_set = 0;
86 for (i = 0; i < N_HASHES; i++) {
899834b5
JA
87 const unsigned int index = hash[i] / BITS_PER_INDEX;
88 const unsigned int bit = hash[i] & BITS_INDEX_MASK;
652ae149
JA
89
90 if (b->map[index] & (1U << bit))
91 was_set++;
92 if (set)
93 b->map[index] |= 1U << bit;
94 }
95
96 return was_set == N_HASHES;
97}
98
99int bloom_check(struct bloom *b, uint32_t *data, unsigned int nwords)
100{
101 return __bloom_check(b, data, nwords, 0);
102}
103
104int bloom_set(struct bloom *b, uint32_t *data, unsigned int nwords)
105{
106 return __bloom_check(b, data, nwords, 1);
107}