From 1b2a83dcda752f411156f7967c0d4f80e7eb61ef Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 25 Sep 2016 13:57:32 -0600 Subject: [PATCH] file: add bloom filter to avoid quadratic lookup behavior If we have a lot of jobs, with a lot of subjobs, and they each have a lot of files, we get pathetic startup performance because each file add will check all the previously added files. Fixes: bcbfeefa7bce ("fio: add multi directory support") Signed-off-by: Jens Axboe --- filehash.c | 12 ++++++++++++ filehash.h | 3 +++ filesetup.c | 12 +++++++----- 3 files changed, 22 insertions(+), 5 deletions(-) diff --git a/filehash.c b/filehash.c index 0d61f54c..61aa6da3 100644 --- a/filehash.c +++ b/filehash.c @@ -5,14 +5,18 @@ #include "flist.h" #include "hash.h" #include "filehash.h" +#include "lib/bloom.h" #define HASH_BUCKETS 512 #define HASH_MASK (HASH_BUCKETS - 1) +#define BLOOM_SIZE 16*1024*1024 + unsigned int file_hash_size = HASH_BUCKETS * sizeof(struct flist_head); static struct flist_head *file_hash; static struct fio_mutex *hash_lock; +static struct bloom *file_bloom; static unsigned short hash(const char *name) { @@ -95,6 +99,11 @@ struct fio_file *add_file_hash(struct fio_file *f) return alias; } +bool file_bloom_exists(const char *fname, bool set) +{ + return bloom_string(file_bloom, fname, strlen(fname), set); +} + void file_hash_exit(void) { unsigned int i, has_entries = 0; @@ -110,6 +119,8 @@ void file_hash_exit(void) file_hash = NULL; fio_mutex_remove(hash_lock); hash_lock = NULL; + bloom_free(file_bloom); + file_bloom = NULL; } void file_hash_init(void *ptr) @@ -121,4 +132,5 @@ void file_hash_init(void *ptr) INIT_FLIST_HEAD(&file_hash[i]); hash_lock = fio_mutex_init(FIO_MUTEX_UNLOCKED); + file_bloom = bloom_new(BLOOM_SIZE); } diff --git a/filehash.h b/filehash.h index f316b208..b037bd7f 100644 --- a/filehash.h +++ b/filehash.h @@ -1,6 +1,8 @@ #ifndef FIO_FILE_HASH_H #define FIO_FILE_HASH_H +#include "lib/types.h" + extern unsigned int file_hash_size; extern void file_hash_init(void *); @@ -10,5 +12,6 @@ extern struct fio_file *add_file_hash(struct fio_file *); extern void remove_file_hash(struct fio_file *); extern void fio_file_hash_lock(void); extern void fio_file_hash_unlock(void); +extern bool file_bloom_exists(const char *, bool); #endif diff --git a/filesetup.c b/filesetup.c index c6ef3bf2..317d7380 100644 --- a/filesetup.c +++ b/filesetup.c @@ -1242,12 +1242,14 @@ static void get_file_type(struct fio_file *f) } } -static bool __is_already_allocated(const char *fname) +static bool __is_already_allocated(const char *fname, bool set) { struct flist_head *entry; + bool ret; - if (flist_empty(&filename_list)) - return false; + ret = file_bloom_exists(fname, set); + if (!ret) + return ret; flist_for_each(entry, &filename_list) { struct file_name *fn; @@ -1266,7 +1268,7 @@ static bool is_already_allocated(const char *fname) bool ret; fio_file_hash_lock(); - ret = __is_already_allocated(fname); + ret = __is_already_allocated(fname, false); fio_file_hash_unlock(); return ret; @@ -1280,7 +1282,7 @@ static void set_already_allocated(const char *fname) fn->filename = strdup(fname); fio_file_hash_lock(); - if (!__is_already_allocated(fname)) { + if (!__is_already_allocated(fname, true)) { flist_add_tail(&fn->list, &filename_list); fn = NULL; } -- 2.25.1