summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile3
-rw-r--r--crc/xxhash.c2
-rw-r--r--crc/xxhash.h2
-rw-r--r--lib/bloom.c42
4 files changed, 40 insertions, 9 deletions
diff --git a/Makefile b/Makefile
index d93ce780..8c424e3d 100644
--- a/Makefile
+++ b/Makefile
@@ -192,7 +192,8 @@ endif
ifeq ($(CONFIG_TARGET_OS), Linux)
T_DEDUPE_OBJS = t/dedupe.o
T_DEDUPE_OBJS += lib/rbtree.o t/log.o mutex.o smalloc.o gettime.o crc/md5.o \
- memalign.o lib/bloom.o t/debug.o
+ memalign.o lib/bloom.o t/debug.o crc/xxhash.o crc/crc32c.o \
+ crc/crc32c-intel.o
T_DEDUPE_PROGS = t/dedupe
endif
diff --git a/crc/xxhash.c b/crc/xxhash.c
index eedaecb4..4736c528 100644
--- a/crc/xxhash.c
+++ b/crc/xxhash.c
@@ -221,7 +221,7 @@ static uint32_t XXH32_endian_align(const void* input, int len, uint32_t seed, XX
}
-uint32_t XXH32(const void* input, int len, uint32_t seed)
+uint32_t XXH32(const void* input, uint32_t len, uint32_t seed)
{
#if 0
// Simple version, good for code maintenance, but unfortunately slow for small inputs
diff --git a/crc/xxhash.h b/crc/xxhash.h
index e80a91d0..8850d208 100644
--- a/crc/xxhash.h
+++ b/crc/xxhash.h
@@ -88,7 +88,7 @@ typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
// Simple Hash Functions
//****************************
-unsigned int XXH32 (const void* input, int len, unsigned int seed);
+uint32_t XXH32 (const void* input, uint32_t len, uint32_t seed);
/*
XXH32() :
diff --git a/lib/bloom.c b/lib/bloom.c
index 9ab89e4b..b469fded 100644
--- a/lib/bloom.c
+++ b/lib/bloom.c
@@ -4,6 +4,8 @@
#include "bloom.h"
#include "../hash.h"
#include "../minmax.h"
+#include "../crc/xxhash.h"
+#include "../crc/crc32c.h"
struct bloom {
uint64_t nentries;
@@ -14,7 +16,31 @@ struct bloom {
#define BITS_PER_INDEX (sizeof(uint32_t) * 8)
#define BITS_INDEX_MASK (BITS_PER_INDEX - 1)
-static unsigned int jhash_init[] = { 0, 0x12db635, 0x2a4a53 };
+struct bloom_hash {
+ unsigned int seed;
+ uint32_t (*fn)(const void *, uint32_t, uint32_t);
+};
+
+static uint32_t b_crc32c(const void *buf, uint32_t len, uint32_t seed)
+{
+ return fio_crc32c(buf, len);
+}
+
+struct bloom_hash hashes[] = {
+ {
+ .seed = 0x8989,
+ .fn = jhash,
+ },
+ {
+ .seed = 0x8989,
+ .fn = XXH32,
+ },
+ {
+ .seed = 0,
+ .fn = b_crc32c,
+ },
+};
+
#define N_HASHES 3
#define MIN_ENTRIES 1073741824UL
@@ -24,6 +50,8 @@ struct bloom *bloom_new(uint64_t entries)
struct bloom *b;
size_t no_uints;
+ crc32c_intel_probe();
+
b = malloc(sizeof(*b));
b->nentries = entries;
no_uints = (entries + BITS_PER_INDEX - 1) / BITS_PER_INDEX;
@@ -46,16 +74,18 @@ void bloom_free(struct bloom *b)
static int __bloom_check(struct bloom *b, uint32_t *data, unsigned int nwords,
int set)
{
- uint32_t hashes[N_HASHES];
+ uint32_t hash[N_HASHES];
int i, was_set;
- for (i = 0; i < N_HASHES; i++)
- hashes[i] = jhash(data, nwords, jhash_init[i]) % b->nentries;
+ for (i = 0; i < N_HASHES; i++) {
+ hash[i] = hashes[i].fn(data, nwords, hashes[i].seed);
+ hash[i] = hash[i] % b->nentries;
+ }
was_set = 0;
for (i = 0; i < N_HASHES; i++) {
- const unsigned int index = hashes[i] / BITS_PER_INDEX;
- const unsigned int bit = hashes[i] & BITS_INDEX_MASK;
+ const unsigned int index = hash[i] / BITS_PER_INDEX;
+ const unsigned int bit = hash[i] & BITS_INDEX_MASK;
if (b->map[index] & (1U << bit))
was_set++;