From 899834b5290bf6302a8a01d3c7672a9a200392c7 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 26 Sep 2014 14:57:41 -0600 Subject: [PATCH] bloom: use independent hashes Signed-off-by: Jens Axboe --- Makefile | 3 ++- crc/xxhash.c | 2 +- crc/xxhash.h | 2 +- lib/bloom.c | 42 ++++++++++++++++++++++++++++++++++++------ 4 files changed, 40 insertions(+), 9 deletions(-) diff --git a/Makefile b/Makefile index d93ce780..8c424e3d 100644 --- a/Makefile +++ b/Makefile @@ -192,7 +192,8 @@ endif ifeq ($(CONFIG_TARGET_OS), Linux) T_DEDUPE_OBJS = t/dedupe.o T_DEDUPE_OBJS += lib/rbtree.o t/log.o mutex.o smalloc.o gettime.o crc/md5.o \ - memalign.o lib/bloom.o t/debug.o + memalign.o lib/bloom.o t/debug.o crc/xxhash.o crc/crc32c.o \ + crc/crc32c-intel.o T_DEDUPE_PROGS = t/dedupe endif diff --git a/crc/xxhash.c b/crc/xxhash.c index eedaecb4..4736c528 100644 --- a/crc/xxhash.c +++ b/crc/xxhash.c @@ -221,7 +221,7 @@ static uint32_t XXH32_endian_align(const void* input, int len, uint32_t seed, XX } -uint32_t XXH32(const void* input, int len, uint32_t seed) +uint32_t XXH32(const void* input, uint32_t len, uint32_t seed) { #if 0 // Simple version, good for code maintenance, but unfortunately slow for small inputs diff --git a/crc/xxhash.h b/crc/xxhash.h index e80a91d0..8850d208 100644 --- a/crc/xxhash.h +++ b/crc/xxhash.h @@ -88,7 +88,7 @@ typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode; // Simple Hash Functions //**************************** -unsigned int XXH32 (const void* input, int len, unsigned int seed); +uint32_t XXH32 (const void* input, uint32_t len, uint32_t seed); /* XXH32() : diff --git a/lib/bloom.c b/lib/bloom.c index 9ab89e4b..b469fded 100644 --- a/lib/bloom.c +++ b/lib/bloom.c @@ -4,6 +4,8 @@ #include "bloom.h" #include "../hash.h" #include "../minmax.h" +#include "../crc/xxhash.h" +#include "../crc/crc32c.h" struct bloom { uint64_t nentries; @@ -14,7 +16,31 @@ struct bloom { #define BITS_PER_INDEX (sizeof(uint32_t) * 8) #define BITS_INDEX_MASK (BITS_PER_INDEX - 1) -static unsigned int jhash_init[] = { 0, 0x12db635, 0x2a4a53 }; +struct bloom_hash { + unsigned int seed; + uint32_t (*fn)(const void *, uint32_t, uint32_t); +}; + +static uint32_t b_crc32c(const void *buf, uint32_t len, uint32_t seed) +{ + return fio_crc32c(buf, len); +} + +struct bloom_hash hashes[] = { + { + .seed = 0x8989, + .fn = jhash, + }, + { + .seed = 0x8989, + .fn = XXH32, + }, + { + .seed = 0, + .fn = b_crc32c, + }, +}; + #define N_HASHES 3 #define MIN_ENTRIES 1073741824UL @@ -24,6 +50,8 @@ struct bloom *bloom_new(uint64_t entries) struct bloom *b; size_t no_uints; + crc32c_intel_probe(); + b = malloc(sizeof(*b)); b->nentries = entries; no_uints = (entries + BITS_PER_INDEX - 1) / BITS_PER_INDEX; @@ -46,16 +74,18 @@ void bloom_free(struct bloom *b) static int __bloom_check(struct bloom *b, uint32_t *data, unsigned int nwords, int set) { - uint32_t hashes[N_HASHES]; + uint32_t hash[N_HASHES]; int i, was_set; - for (i = 0; i < N_HASHES; i++) - hashes[i] = jhash(data, nwords, jhash_init[i]) % b->nentries; + for (i = 0; i < N_HASHES; i++) { + hash[i] = hashes[i].fn(data, nwords, hashes[i].seed); + hash[i] = hash[i] % b->nentries; + } was_set = 0; for (i = 0; i < N_HASHES; i++) { - const unsigned int index = hashes[i] / BITS_PER_INDEX; - const unsigned int bit = hashes[i] & BITS_INDEX_MASK; + const unsigned int index = hash[i] / BITS_PER_INDEX; + const unsigned int bit = hash[i] & BITS_INDEX_MASK; if (b->map[index] & (1U << bit)) was_set++; -- 2.25.1