Add bloom filter
authorJens Axboe <axboe@fb.com>
Fri, 26 Sep 2014 15:51:16 +0000 (09:51 -0600)
committerJens Axboe <axboe@fb.com>
Fri, 26 Sep 2014 15:51:16 +0000 (09:51 -0600)
Very basic, but seems to do the job.

Signed-off-by: Jens Axboe <axboe@fb.com>
Makefile
lib/bloom.c [new file with mode: 0644]
lib/bloom.h [new file with mode: 0644]

index fe439c16c95a013cdf72955340e5e667714bca1c..ee7f05a6802742b018e4b4a391bab160952a3c1b 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -36,7 +36,7 @@ SOURCE := gettime.c ioengines.c init.c stat.c log.c time.c filesetup.c \
                lib/lfsr.c gettime-thread.c helpers.c lib/flist_sort.c \
                lib/hweight.c lib/getrusage.c idletime.c td_error.c \
                profiles/tiobench.c profiles/act.c io_u_queue.c filelock.c \
                lib/lfsr.c gettime-thread.c helpers.c lib/flist_sort.c \
                lib/hweight.c lib/getrusage.c idletime.c td_error.c \
                profiles/tiobench.c profiles/act.c io_u_queue.c filelock.c \
-               lib/tp.c
+               lib/tp.c lib/bloom.c
 
 ifdef CONFIG_LIBHDFS
   HDFSFLAGS= -I $(JAVA_HOME)/include -I $(JAVA_HOME)/include/linux -I $(FIO_LIBHDFS_INCLUDE)
 
 ifdef CONFIG_LIBHDFS
   HDFSFLAGS= -I $(JAVA_HOME)/include -I $(JAVA_HOME)/include/linux -I $(FIO_LIBHDFS_INCLUDE)
@@ -192,7 +192,7 @@ endif
 ifeq ($(CONFIG_TARGET_OS), Linux)
 T_DEDUPE_OBJS = t/dedupe.o
 T_DEDUPE_OBJS += lib/rbtree.o t/log.o mutex.o smalloc.o gettime.o crc/md5.o \
 ifeq ($(CONFIG_TARGET_OS), Linux)
 T_DEDUPE_OBJS = t/dedupe.o
 T_DEDUPE_OBJS += lib/rbtree.o t/log.o mutex.o smalloc.o gettime.o crc/md5.o \
-               memalign.o
+               memalign.o lib/bloom.o
 T_DEDUPE_PROGS = t/dedupe
 endif
 
 T_DEDUPE_PROGS = t/dedupe
 endif
 
diff --git a/lib/bloom.c b/lib/bloom.c
new file mode 100644 (file)
index 0000000..fbae808
--- /dev/null
@@ -0,0 +1,73 @@
+#include <stdlib.h>
+#include <inttypes.h>
+
+#include "bloom.h"
+#include "../hash.h"
+
+struct bloom {
+       uint64_t nentries;
+
+       uint32_t *map;
+};
+
+#define BITS_PER_INDEX (sizeof(uint32_t) * 8)
+#define BITS_INDEX_MASK        (BITS_PER_INDEX - 1)
+
+static unsigned int jhash_init[] = { 0, 0x12db635, 0x2a4a53 };
+#define N_HASHES       3
+
+struct bloom *bloom_new(uint64_t entries)
+{
+       struct bloom *b;
+       size_t no_uints;
+
+       b = malloc(sizeof(*b));
+       b->nentries = entries;
+       no_uints = (entries + BITS_PER_INDEX - 1) / BITS_PER_INDEX;
+       b->map = calloc(no_uints, sizeof(uint32_t));
+       if (!b->map) {
+               free(b);
+               return NULL;
+       }
+
+       return b;
+}
+
+void bloom_free(struct bloom *b)
+{
+       free(b->map);
+       free(b);
+}
+
+static int __bloom_check(struct bloom *b, uint32_t *data, unsigned int nwords,
+                        int set)
+{
+       uint32_t hashes[N_HASHES];
+       int i, was_set;
+
+       for (i = 0; i < N_HASHES; i++)
+               hashes[i] = jhash(data, nwords, jhash_init[i]) % b->nentries;
+
+       was_set = 0;
+       for (i = 0; i < N_HASHES; i++) {
+               const unsigned int index = hashes[i] / BITS_PER_INDEX;
+               const unsigned int bit = hashes[i] & BITS_INDEX_MASK;
+
+               if (b->map[index] & (1U << bit))
+                       was_set++;
+               if (set)
+                       b->map[index] |= 1U << bit;
+       }
+
+       return was_set == N_HASHES;
+}
+
+int bloom_check(struct bloom *b, uint32_t *data, unsigned int nwords)
+{
+       return __bloom_check(b, data, nwords, 0);
+}
+
+int bloom_set(struct bloom *b, uint32_t *data, unsigned int nwords)
+{
+       return __bloom_check(b, data, nwords, 1);
+}
diff --git a/lib/bloom.h b/lib/bloom.h
new file mode 100644 (file)
index 0000000..b3cde95
--- /dev/null
@@ -0,0 +1,13 @@
+#ifndef FIO_BLOOM_H
+#define FIO_BLOOM_H
+
+#include <inttypes.h>
+
+struct bloom;
+
+struct bloom *bloom_new(uint64_t entries);
+void bloom_free(struct bloom *b);
+int bloom_check(struct bloom *b, uint32_t *data, unsigned int nwords);
+int bloom_set(struct bloom *b, uint32_t *data, unsigned int nwords);
+
+#endif