From 9f0e365d0d08f9df142d48bd891370dc50148d5f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 27 Sep 2014 08:38:42 -0600 Subject: [PATCH] Add murmurhash3 And use it in bloom, add it to the crc tester as well. Signed-off-by: Jens Axboe --- Makefile | 5 ++-- crc/test.c | 15 ++++++++++++ lib/bloom.c | 13 +++------- lib/murmur3.c | 68 +++++++++++++++++++++++++++++++++++++++++++++++++++ lib/murmur3.h | 8 ++++++ 5 files changed, 96 insertions(+), 13 deletions(-) create mode 100644 lib/murmur3.c create mode 100644 lib/murmur3.h diff --git a/Makefile b/Makefile index 8c424e3d..d7cc1d4c 100644 --- a/Makefile +++ b/Makefile @@ -36,7 +36,7 @@ SOURCE := gettime.c ioengines.c init.c stat.c log.c time.c filesetup.c \ lib/lfsr.c gettime-thread.c helpers.c lib/flist_sort.c \ lib/hweight.c lib/getrusage.c idletime.c td_error.c \ profiles/tiobench.c profiles/act.c io_u_queue.c filelock.c \ - lib/tp.c lib/bloom.c + lib/tp.c lib/bloom.c lib/murmur3.c ifdef CONFIG_LIBHDFS HDFSFLAGS= -I $(JAVA_HOME)/include -I $(JAVA_HOME)/include/linux -I $(FIO_LIBHDFS_INCLUDE) @@ -192,8 +192,7 @@ endif ifeq ($(CONFIG_TARGET_OS), Linux) T_DEDUPE_OBJS = t/dedupe.o T_DEDUPE_OBJS += lib/rbtree.o t/log.o mutex.o smalloc.o gettime.o crc/md5.o \ - memalign.o lib/bloom.o t/debug.o crc/xxhash.o crc/crc32c.o \ - crc/crc32c-intel.o + memalign.o lib/bloom.o t/debug.o crc/xxhash.o T_DEDUPE_PROGS = t/dedupe endif diff --git a/crc/test.c b/crc/test.c index 36054e6c..95f1c2e7 100644 --- a/crc/test.c +++ b/crc/test.c @@ -17,6 +17,7 @@ #include "../crc/sha256.h" #include "../crc/sha512.h" #include "../crc/xxhash.h" +#include "../lib/murmur3.h" #include "test.h" @@ -40,6 +41,7 @@ enum { T_SHA256 = 1U << 7, T_SHA512 = 1U << 8, T_XXHASH = 1U << 9, + T_MURMUR3 = 1U << 10, }; static void t_md5(void *buf, size_t size) @@ -134,6 +136,14 @@ static void t_sha512(void *buf, size_t size) fio_sha512_update(&ctx, buf, size); } +static void t_murmur3(void *buf, size_t size) +{ + int i; + + for (i = 0; i < NR_CHUNKS; i++) + murmurhash3(buf, size, 0x8989); +} + static void t_xxhash(void *buf, size_t size) { void *state; @@ -198,6 +208,11 @@ static struct test_type t[] = { .mask = T_XXHASH, .fn = t_xxhash, }, + { + .name = "murmur3", + .mask = T_MURMUR3, + .fn = t_murmur3, + }, { .name = NULL, }, diff --git a/lib/bloom.c b/lib/bloom.c index b469fded..eb6deebe 100644 --- a/lib/bloom.c +++ b/lib/bloom.c @@ -5,7 +5,7 @@ #include "../hash.h" #include "../minmax.h" #include "../crc/xxhash.h" -#include "../crc/crc32c.h" +#include "../lib/murmur3.h" struct bloom { uint64_t nentries; @@ -21,11 +21,6 @@ struct bloom_hash { uint32_t (*fn)(const void *, uint32_t, uint32_t); }; -static uint32_t b_crc32c(const void *buf, uint32_t len, uint32_t seed) -{ - return fio_crc32c(buf, len); -} - struct bloom_hash hashes[] = { { .seed = 0x8989, @@ -36,8 +31,8 @@ struct bloom_hash hashes[] = { .fn = XXH32, }, { - .seed = 0, - .fn = b_crc32c, + .seed = 0x8989, + .fn = murmurhash3, }, }; @@ -50,8 +45,6 @@ struct bloom *bloom_new(uint64_t entries) struct bloom *b; size_t no_uints; - crc32c_intel_probe(); - b = malloc(sizeof(*b)); b->nentries = entries; no_uints = (entries + BITS_PER_INDEX - 1) / BITS_PER_INDEX; diff --git a/lib/murmur3.c b/lib/murmur3.c new file mode 100644 index 00000000..e316f592 --- /dev/null +++ b/lib/murmur3.c @@ -0,0 +1,68 @@ +#include "murmur3.h" + +static inline uint32_t rotl32(uint32_t x, int8_t r) +{ + return (x << r) | (x >> (32 - r)); +} + +//----------------------------------------------------------------------------- +// Finalization mix - force all bits of a hash block to avalanche + +static inline uint32_t fmix32(uint32_t h) +{ + h ^= h >> 16; + h *= 0x85ebca6b; + h ^= h >> 13; + h *= 0xc2b2ae35; + h ^= h >> 16; + + return h; +} + +static uint32_t murmur3_tail(const uint8_t *data, const int nblocks, + uint32_t len, const uint32_t c1, + const uint32_t c2, uint32_t h1) +{ + const uint8_t *tail = (const uint8_t *)(data + nblocks * 4); + + uint32_t k1 = 0; + switch (len & 3) { + case 3: + k1 ^= tail[2] << 16; + case 2: + k1 ^= tail[1] << 8; + case 1: + k1 ^= tail[0]; + k1 *= c1; + k1 = rotl32(k1, 15); + k1 *= c2; + h1 ^= k1; + }; + + return fmix32(h1 ^ len); +} + +uint32_t murmurhash3(const void *key, uint32_t len, uint32_t seed) +{ + const uint8_t *data = (const uint8_t *)key; + const int nblocks = len / 4; + uint32_t h1 = seed; + const uint32_t c1 = 0xcc9e2d51; + const uint32_t c2 = 0x1b873593; + const uint32_t *blocks = (const uint32_t *)(data + nblocks * 4); + int i; + + for (i = -nblocks; i; i++) { + uint32_t k1 = blocks[i]; + + k1 *= c1; + k1 = rotl32(k1, 15); + k1 *= c2; + + h1 ^= k1; + h1 = rotl32(h1, 13); + h1 = h1 * 5 + 0xe6546b64; + } + + return murmur3_tail(data, nblocks, len, c1, c2, h1); +} diff --git a/lib/murmur3.h b/lib/murmur3.h new file mode 100644 index 00000000..89f65003 --- /dev/null +++ b/lib/murmur3.h @@ -0,0 +1,8 @@ +#ifndef FIO_MURMUR3_H +#define FIO_MURMUR3_H + +#include + +uint32_t murmurhash3(const void *key, uint32_t len, uint32_t seed); + +#endif -- 2.25.1