X-Git-Url: https://git.kernel.dk/?a=blobdiff_plain;f=lib%2Fbloom.c;h=bb81dbbdf8c9b285b1f43f3b339f346a90881a63;hb=ec9a7e69d1e8060efa66012edc26c076695dc1fa;hp=fbae80820813a2f548dc54d4d90b97ff1626dbd2;hpb=652ae149194f753b5f074b4c5984acc76ebb24f1;p=fio.git diff --git a/lib/bloom.c b/lib/bloom.c index fbae8082..bb81dbbd 100644 --- a/lib/bloom.c +++ b/lib/bloom.c @@ -3,6 +3,11 @@ #include "bloom.h" #include "../hash.h" +#include "../minmax.h" +#include "../crc/xxhash.h" +#include "../crc/murmur3.h" +#include "../crc/crc32c.h" +#include "../crc/fnv.h" struct bloom { uint64_t nentries; @@ -13,14 +18,56 @@ struct bloom { #define BITS_PER_INDEX (sizeof(uint32_t) * 8) #define BITS_INDEX_MASK (BITS_PER_INDEX - 1) -static unsigned int jhash_init[] = { 0, 0x12db635, 0x2a4a53 }; -#define N_HASHES 3 +struct bloom_hash { + unsigned int seed; + uint32_t (*fn)(const void *, uint32_t, uint32_t); +}; + +static uint32_t bloom_crc32c(const void *buf, uint32_t len, uint32_t seed) +{ + return fio_crc32c(buf, len); +} + +static uint32_t bloom_fnv(const void *buf, uint32_t len, uint32_t seed) +{ + return fnv(buf, len, seed); +} + +#define BLOOM_SEED 0x8989 + +static struct bloom_hash hashes[] = { + { + .seed = BLOOM_SEED, + .fn = jhash, + }, + { + .seed = BLOOM_SEED, + .fn = XXH32, + }, + { + .seed = BLOOM_SEED, + .fn = murmurhash3, + }, + { + .seed = BLOOM_SEED, + .fn = bloom_crc32c, + }, + { + .seed = BLOOM_SEED, + .fn = bloom_fnv, + }, +}; + +#define N_HASHES 5 struct bloom *bloom_new(uint64_t entries) { struct bloom *b; size_t no_uints; + crc32c_arm64_probe(); + crc32c_intel_probe(); + b = malloc(sizeof(*b)); b->nentries = entries; no_uints = (entries + BITS_PER_INDEX - 1) / BITS_PER_INDEX; @@ -39,35 +86,40 @@ void bloom_free(struct bloom *b) free(b); } -static int __bloom_check(struct bloom *b, uint32_t *data, unsigned int nwords, - int set) +static bool __bloom_check(struct bloom *b, const void *data, unsigned int len, + bool set) { - uint32_t hashes[N_HASHES]; + uint32_t hash[N_HASHES]; int i, was_set; - for (i = 0; i < N_HASHES; i++) - hashes[i] = jhash(data, nwords, jhash_init[i]) % b->nentries; + for (i = 0; i < N_HASHES; i++) { + hash[i] = hashes[i].fn(data, len, hashes[i].seed); + hash[i] = hash[i] % b->nentries; + } was_set = 0; for (i = 0; i < N_HASHES; i++) { - const unsigned int index = hashes[i] / BITS_PER_INDEX; - const unsigned int bit = hashes[i] & BITS_INDEX_MASK; + const unsigned int index = hash[i] / BITS_PER_INDEX; + const unsigned int bit = hash[i] & BITS_INDEX_MASK; if (b->map[index] & (1U << bit)) was_set++; - if (set) + else if (set) b->map[index] |= 1U << bit; + else + break; } return was_set == N_HASHES; } -int bloom_check(struct bloom *b, uint32_t *data, unsigned int nwords) +bool bloom_set(struct bloom *b, uint32_t *data, unsigned int nwords) { - return __bloom_check(b, data, nwords, 0); + return __bloom_check(b, data, nwords * sizeof(uint32_t), true); } -int bloom_set(struct bloom *b, uint32_t *data, unsigned int nwords) +bool bloom_string(struct bloom *b, const char *data, unsigned int len, + bool set) { - return __bloom_check(b, data, nwords, 1); + return __bloom_check(b, data, len, set); }