From 652ae149194f753b5f074b4c5984acc76ebb24f1 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 26 Sep 2014 09:51:16 -0600 Subject: [PATCH 1/1] Add bloom filter Very basic, but seems to do the job. Signed-off-by: Jens Axboe --- Makefile | 4 +-- lib/bloom.c | 73 +++++++++++++++++++++++++++++++++++++++++++++++++++++ lib/bloom.h | 13 ++++++++++ 3 files changed, 88 insertions(+), 2 deletions(-) create mode 100644 lib/bloom.c create mode 100644 lib/bloom.h diff --git a/Makefile b/Makefile index fe439c16..ee7f05a6 100644 --- a/Makefile +++ b/Makefile @@ -36,7 +36,7 @@ SOURCE := gettime.c ioengines.c init.c stat.c log.c time.c filesetup.c \ lib/lfsr.c gettime-thread.c helpers.c lib/flist_sort.c \ lib/hweight.c lib/getrusage.c idletime.c td_error.c \ profiles/tiobench.c profiles/act.c io_u_queue.c filelock.c \ - lib/tp.c + lib/tp.c lib/bloom.c ifdef CONFIG_LIBHDFS HDFSFLAGS= -I $(JAVA_HOME)/include -I $(JAVA_HOME)/include/linux -I $(FIO_LIBHDFS_INCLUDE) @@ -192,7 +192,7 @@ endif ifeq ($(CONFIG_TARGET_OS), Linux) T_DEDUPE_OBJS = t/dedupe.o T_DEDUPE_OBJS += lib/rbtree.o t/log.o mutex.o smalloc.o gettime.o crc/md5.o \ - memalign.o + memalign.o lib/bloom.o T_DEDUPE_PROGS = t/dedupe endif diff --git a/lib/bloom.c b/lib/bloom.c new file mode 100644 index 00000000..fbae8082 --- /dev/null +++ b/lib/bloom.c @@ -0,0 +1,73 @@ +#include +#include + +#include "bloom.h" +#include "../hash.h" + +struct bloom { + uint64_t nentries; + + uint32_t *map; +}; + +#define BITS_PER_INDEX (sizeof(uint32_t) * 8) +#define BITS_INDEX_MASK (BITS_PER_INDEX - 1) + +static unsigned int jhash_init[] = { 0, 0x12db635, 0x2a4a53 }; +#define N_HASHES 3 + +struct bloom *bloom_new(uint64_t entries) +{ + struct bloom *b; + size_t no_uints; + + b = malloc(sizeof(*b)); + b->nentries = entries; + no_uints = (entries + BITS_PER_INDEX - 1) / BITS_PER_INDEX; + b->map = calloc(no_uints, sizeof(uint32_t)); + if (!b->map) { + free(b); + return NULL; + } + + return b; +} + +void bloom_free(struct bloom *b) +{ + free(b->map); + free(b); +} + +static int __bloom_check(struct bloom *b, uint32_t *data, unsigned int nwords, + int set) +{ + uint32_t hashes[N_HASHES]; + int i, was_set; + + for (i = 0; i < N_HASHES; i++) + hashes[i] = jhash(data, nwords, jhash_init[i]) % b->nentries; + + was_set = 0; + for (i = 0; i < N_HASHES; i++) { + const unsigned int index = hashes[i] / BITS_PER_INDEX; + const unsigned int bit = hashes[i] & BITS_INDEX_MASK; + + if (b->map[index] & (1U << bit)) + was_set++; + if (set) + b->map[index] |= 1U << bit; + } + + return was_set == N_HASHES; +} + +int bloom_check(struct bloom *b, uint32_t *data, unsigned int nwords) +{ + return __bloom_check(b, data, nwords, 0); +} + +int bloom_set(struct bloom *b, uint32_t *data, unsigned int nwords) +{ + return __bloom_check(b, data, nwords, 1); +} diff --git a/lib/bloom.h b/lib/bloom.h new file mode 100644 index 00000000..b3cde950 --- /dev/null +++ b/lib/bloom.h @@ -0,0 +1,13 @@ +#ifndef FIO_BLOOM_H +#define FIO_BLOOM_H + +#include + +struct bloom; + +struct bloom *bloom_new(uint64_t entries); +void bloom_free(struct bloom *b); +int bloom_check(struct bloom *b, uint32_t *data, unsigned int nwords); +int bloom_set(struct bloom *b, uint32_t *data, unsigned int nwords); + +#endif -- 2.25.1