From e25839d4cb5fefcb5ffce76128a4faedb177e7af Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 6 Nov 2012 10:49:42 +0100 Subject: [PATCH] Add sample zipf distribution randomizer Instead of just doing purely random IO where each block is touched exactly (or close to, depending on random map) once, add a zipf distribution scheme where a selectable theta defines the spread and frequency of blocks read/written. Committing this so I don't lose it. Needs a few changes, for instance we need to hash the zipf output so that the spread doesn't always just favor the lower LBA range. Signed-off-by: Jens Axboe --- Makefile | 2 +- examples/zipf | 10 ++++ fio.h | 14 ++++++ init.c | 16 +++++++ io_u.c | 23 ++++++++- lib/zipf.c | 128 ++++++++++++++++++++++++++++++++++++++++++++++++++ lib/zipf.h | 17 +++++++ options.c | 41 ++++++++++++++++ parse.c | 5 +- parse.h | 8 +--- 10 files changed, 252 insertions(+), 12 deletions(-) create mode 100644 examples/zipf create mode 100644 lib/zipf.c create mode 100644 lib/zipf.h diff --git a/Makefile b/Makefile index ccfa802b..94856e0c 100644 --- a/Makefile +++ b/Makefile @@ -15,7 +15,7 @@ SOURCE := gettime.c fio.c ioengines.c init.c stat.c log.c time.c filesetup.c \ lib/num2str.c lib/ieee754.c $(wildcard crc/*.c) engines/cpu.c \ engines/mmap.c engines/sync.c engines/null.c engines/net.c \ memalign.c server.c client.c iolog.c backend.c libfio.c flow.c \ - json.c + json.c lib/zipf.c ifeq ($(UNAME), Linux) SOURCE += diskutil.c fifo.c blktrace.c helpers.c cgroup.c trim.c \ diff --git a/examples/zipf b/examples/zipf new file mode 100644 index 00000000..fcfa38d9 --- /dev/null +++ b/examples/zipf @@ -0,0 +1,10 @@ +# Example job file for using a zipf distribution instead +# of a purely random workload where each block is read +# or written once. +[job] +ioengine=null +rw=randread +norandommap +size=1280m +bs=4k +random_distribution=zipf:0.5 diff --git a/fio.h b/fio.h index 139b9383..15ab3084 100644 --- a/fio.h +++ b/fio.h @@ -39,6 +39,7 @@ struct thread_data; #include "server.h" #include "stat.h" #include "flow.h" +#include "lib/zipf.h" #ifdef FIO_HAVE_GUASI #include @@ -177,6 +178,9 @@ struct thread_options { unsigned int bs_unaligned; unsigned int fsync_on_close; + unsigned int random_distribution; + double zipf_theta; + unsigned int hugepage_size; unsigned int rw_min_bs; unsigned int thinktime; @@ -452,6 +456,11 @@ struct thread_data { struct frand_state __random_state; }; + /* + * Used for zipf random distribution + */ + struct zipf_state zipf; + struct timeval start; /* start of this loop */ struct timeval epoch; /* time job was started */ struct timeval last_issue; @@ -815,4 +824,9 @@ enum { FIO_OUTPUT_NORMAL, }; +enum { + FIO_RAND_DIST_RANDOM = 0, + FIO_RAND_DIST_ZIPF, +}; + #endif diff --git a/init.c b/init.c index 23be8631..1cee0964 100644 --- a/init.c +++ b/init.c @@ -382,6 +382,20 @@ static int fixed_block_size(struct thread_options *o) o->min_bs[DDIR_READ] == o->min_bs[DDIR_TRIM]; } +static void init_rand_distribution(struct thread_data *td) +{ + unsigned int range_size; + unsigned long nranges; + + if (td->o.random_distribution == FIO_RAND_DIST_RANDOM) + return; + + range_size = min(td->o.min_bs[DDIR_READ], td->o.min_bs[DDIR_WRITE]); + + nranges = (td->o.size + range_size - 1) / range_size; + zipf_init(&td->zipf, nranges, td->o.zipf_theta); +} + /* * Lazy way of fixing up options that depend on each other. We could also * define option callback handlers, but this is easier. @@ -592,6 +606,8 @@ static int fixup_options(struct thread_data *td) td->o.compress_percentage = 0; } + init_rand_distribution(td); + return ret; } diff --git a/io_u.c b/io_u.c index b049b618..8f2ce302 100644 --- a/io_u.c +++ b/io_u.c @@ -157,8 +157,8 @@ static int get_next_free_block(struct thread_data *td, struct fio_file *f, return 1; } -static int get_next_rand_offset(struct thread_data *td, struct fio_file *f, - enum fio_ddir ddir, unsigned long long *b) +static int __get_next_rand_offset(struct thread_data *td, struct fio_file *f, + enum fio_ddir ddir, unsigned long long *b) { unsigned long long rmax, r, lastb; int loops = 5; @@ -234,6 +234,25 @@ ret: return 0; } +static int __get_next_rand_offset_zipf(struct thread_data *td, struct fio_file *f, + enum fio_ddir ddir, unsigned long long *b) +{ + *b = zipf_next(&td->zipf); + return 0; +} + +static int get_next_rand_offset(struct thread_data *td, struct fio_file *f, + enum fio_ddir ddir, unsigned long long *b) +{ + if (td->o.random_distribution == FIO_RAND_DIST_RANDOM) + return __get_next_rand_offset(td, f, ddir, b); + else if (td->o.random_distribution == FIO_RAND_DIST_ZIPF) + return __get_next_rand_offset_zipf(td, f, ddir, b); + + log_err("fio: unknown random distribution: %d\n", td->o.random_distribution); + return 1; +} + static int get_next_rand_block(struct thread_data *td, struct fio_file *f, enum fio_ddir ddir, unsigned long long *b) { diff --git a/lib/zipf.c b/lib/zipf.c new file mode 100644 index 00000000..34f28772 --- /dev/null +++ b/lib/zipf.c @@ -0,0 +1,128 @@ +#include +#include +#include +#include +#include +#include +#include +#include "ieee754.h" +#include "../log.h" +#include "zipf.h" +#include "../minmax.h" +#include "../os/os.h" + +struct fio_zipf_disk { + uint64_t ver_magic; + uint64_t nranges; + uint64_t zetan; +}; + +#define FIO_ZIPF_DISK_MAGIC 0x7a697066 +#define FIO_ZIPF_DISK_VER 1 +#define FIO_ZIPF_MAGIC ((FIO_ZIPF_DISK_MAGIC << 16) | FIO_ZIPF_DISK_VER) + +static void write_zipf(struct zipf_state *zs) +{ + struct fio_zipf_disk f; + char tmp[80]; + int fd; + + sprintf(tmp, "fio.zipf.%f.%llu", zs->theta, (unsigned long long) zs->nranges); + fd = open(tmp, O_CREAT | O_WRONLY, 0644); + if (fd == -1) + return; + + f.ver_magic = __cpu_to_le64(FIO_ZIPF_MAGIC); + f.nranges = __cpu_to_le64(zs->nranges); + f.zetan = __cpu_to_le64(fio_double_to_uint64(zs->zetan)); + if (write(fd, &f, sizeof(f)) != sizeof(f)) + unlink(tmp); + + close(fd); +} + +static void zipf_update(struct zipf_state *zs) +{ + unsigned int i; + + log_info("fio: generating zetan for theta=%f, ranges=%lu\n", zs->theta, zs->nranges); + + for (i = 0; i < zs->nranges; i++) + zs->zetan += pow(1.0 / (double) (i + 1), zs->theta); + + write_zipf(zs); +} + +static void zipf_load_gen_zeta(struct zipf_state *zs) +{ + struct fio_zipf_disk f; + char tmp[80]; + int fd; + + sprintf(tmp, "fio.zipf.%f.%llu", zs->theta, (unsigned long long) zs->nranges); + fd = open(tmp, O_RDONLY); + if (fd == -1) { +punt: + zipf_update(zs); + return; + } + + if (read(fd, &f, sizeof(f)) != sizeof(f)) { + close(fd); + goto punt; + } + + close(fd); + + f.ver_magic = le64_to_cpu(f.ver_magic); + f.nranges = le64_to_cpu(f.nranges); + f.zetan = le64_to_cpu(f.zetan); + + if (f.ver_magic != FIO_ZIPF_MAGIC) { + unlink(tmp); + goto punt; + } + + zs->zetan = fio_uint64_to_double(f.zetan); + zs->nranges = f.nranges; +} + +void zipf_init(struct zipf_state *zs, unsigned long nranges, double theta) +{ + unsigned int i; + + memset(zs, 0, sizeof(*zs)); + + zs->nranges = nranges; + zs->theta = theta; + + for (i = 1; i <= 2; i++) + zs->zeta2 += pow(1.0 / (double) i, zs->theta); + + init_rand(&zs->rand); + + zipf_load_gen_zeta(zs); +} + +unsigned long long zipf_next(struct zipf_state *zs) +{ + + double alpha, eta, rand_uni, rand_z; + unsigned long long n = zs->nranges; + unsigned long long val; + + alpha = 1.0 / (1.0 - zs->theta); + eta = (1.0 - pow(2.0 / n, 1.0 - zs->theta)) / (1.0 - zs->zeta2 / zs->zetan); + + rand_uni = (double) __rand(&zs->rand) / (double) FRAND_MAX; + rand_z = rand_uni * zs->zetan; + + if (rand_z < 1.0) + val = 1; + else if (rand_z < (1.0 + pow(0.5, zs->theta))) + val = 2; + else + val = 1 + (unsigned long long)(n * pow(eta*rand_uni - eta + 1.0, alpha)); + + return val - 1; +} diff --git a/lib/zipf.h b/lib/zipf.h new file mode 100644 index 00000000..6578ef1c --- /dev/null +++ b/lib/zipf.h @@ -0,0 +1,17 @@ +#ifndef FIO_ZIPF_H +#define FIO_ZIPF_H + +#include "rand.h" + +struct zipf_state { + uint64_t nranges; + double theta; + double zeta2; + double zetan; + struct frand_state rand; +}; + +void zipf_init(struct zipf_state *zs, unsigned long nranges, double theta); +unsigned long long zipf_next(struct zipf_state *zs); + +#endif diff --git a/options.c b/options.c index 380df36a..05a6a508 100644 --- a/options.c +++ b/options.c @@ -728,6 +728,29 @@ static int str_sfr_cb(void *data, const char *str) } #endif +static int str_random_distribution_cb(void *data, const char *str) +{ + struct thread_data *td = data; + double val; + char *nr; + + if (td->o.random_distribution == FIO_RAND_DIST_RANDOM) + return 0; + + nr = get_opt_postfix(str); + if (!nr) + val = 0.6; + else if (!str_to_float(nr, &val)) { + log_err("fio: random postfix parsing failed\n"); + free(nr); + return 1; + } + + td->o.zipf_theta = val; + free(nr); + return 0; +} + static int check_dir(struct thread_data *td, char *fname) { #if 0 @@ -1472,6 +1495,24 @@ static struct fio_option options[FIO_MAX_OPTS] = { .parent = "norandommap", .def = "0", }, + { + .name = "random_distribution", + .type = FIO_OPT_STR, + .off1 = td_var_offset(random_distribution), + .cb = str_random_distribution_cb, + .help = "Random offset distribution generator", + .def = "random", + .posval = { + { .ival = "random", + .oval = FIO_RAND_DIST_RANDOM, + .help = "Completely random", + }, + { .ival = "zipf", + .oval = FIO_RAND_DIST_ZIPF, + .help = "Zipf distribution", + }, + }, + }, { .name = "nrfiles", .alias = "nr_files", diff --git a/parse.c b/parse.c index 1a686e4c..0bbb0b30 100644 --- a/parse.c +++ b/parse.c @@ -14,6 +14,7 @@ #include "parse.h" #include "debug.h" #include "options.h" +#include "minmax.h" static struct fio_option *fio_options; extern unsigned int fio_get_kb_base(void *); @@ -220,7 +221,7 @@ static unsigned long long get_mult_bytes(const char *str, int len, void *data, /* * Convert string into a floating number. Return 1 for success and 0 otherwise. */ -static int str_to_float(const char *str, double *val) +int str_to_float(const char *str, double *val) { return (1 == sscanf(str, "%lf", val)); } @@ -505,7 +506,7 @@ static int __handle_option(struct fio_option *o, const char *ptr, void *data, o->maxlen); return 1; } - if(!str_to_float(ptr, &uf)){ + if (!str_to_float(ptr, &uf)){ log_err("not a floating point value: %s\n", ptr); return 1; } diff --git a/parse.h b/parse.h index 71190ea0..b2f9e5a0 100644 --- a/parse.h +++ b/parse.h @@ -79,6 +79,7 @@ extern void options_free(struct fio_option *, void *); extern void strip_blank_front(char **); extern void strip_blank_end(char *); extern int str_to_decimal(const char *, long long *, int, void *); +extern int str_to_float(const char *str, double *val); /* * Handlers for the options @@ -90,13 +91,6 @@ typedef int (fio_opt_str_set_fn)(void *); #define td_var(start, offset) ((void *) start + (offset)) -#ifndef min -#define min(a, b) ((a) < (b) ? (a) : (b)) -#endif -#ifndef max -#define max(a, b) ((a) > (b) ? (a) : (b)) -#endif - static inline int parse_is_percent(unsigned long long val) { return val <= -1ULL && val >= (-1ULL - 100ULL); -- 2.25.1