From a87c90fd72823d5438d724e5a57ced8d1f7bed3f Mon Sep 17 00:00:00 2001 From: Adam Kupczyk Date: Tue, 29 Dec 2020 13:17:17 +0100 Subject: [PATCH] distibutions: Extend flexibility of non-uniform random distributions This change affects options random_distribution and file_service_type. For pareto, zipf and gauss distribution a contept of `center` is implemented. It allows to fix in place a value that is most probable to access. Example: fio --randseed=1 --ioengine=libaio --rw=randwrite --nrfiles=16 --bs=4k \ --size=256m --allow_file_create=1 --write_iolog=log.txt \ --file_service_type=gauss:10:0.1 --filename_format=object.\$filenum --name=x cat log.txt |grep write |cut -f 1 -d " " |sort |uniq -c | sort -n | \ sed "s/[.]/ /" | while read a b c; do echo $c $b $a; done |sort -n 0 object 13429 1 object 17928 2 object 14724 3 object 7845 4 object 2476 5 object 468 6 object 44 7 object 3 12 object 24 13 object 318 14 object 1795 15 object 6482 Signed-off-by: Adam Kupczyk --- HOWTO | 10 +++++++++- cconv.c | 2 ++ filesetup.c | 6 +++--- fio.1 | 10 +++++++++- fio.h | 1 + init.c | 6 +++--- lib/gauss.c | 8 ++++++-- lib/gauss.h | 3 ++- lib/zipf.c | 12 +++++++----- lib/zipf.h | 6 ++++-- options.c | 39 +++++++++++++++++++++++++++++++++++++-- server.h | 2 +- t/genzipf.c | 6 +++--- thread_options.h | 2 ++ 14 files changed, 89 insertions(+), 24 deletions(-) diff --git a/HOWTO b/HOWTO index 0547c721..372f268f 100644 --- a/HOWTO +++ b/HOWTO @@ -1361,7 +1361,7 @@ I/O type limit reads or writes to a certain rate. If that is the case, then the distribution may be skewed. Default: 50. -.. option:: random_distribution=str:float[,str:float][,str:float] +.. option:: random_distribution=str:float[:float][,str:float][,str:float] By default, fio will use a completely uniform random distribution when asked to perform random I/O. Sometimes it is useful to skew the distribution in @@ -1396,6 +1396,14 @@ I/O type map. For the **normal** distribution, a normal (Gaussian) deviation is supplied as a value between 0 and 100. + The second, optional float is allowed for **pareto**, **zipf** and **normal** distributions. + It allows to set base of distribution in non-default place, giving more control + over most probable outcome. This value is in range [0-1] which maps linearly to + range of possible random values. + Defaults are: random for **pareto** and **zipf**, and 0.5 for **normal**. + If you wanted to use **zipf** with a `theta` of 1.2 centered on 1/4 of allowed value range, + you would use ``random_distibution=zipf:1.2:0.25``. + For a **zoned** distribution, fio supports specifying percentages of I/O access that should fall within what range of the file or device. For example, given a criteria of: diff --git a/cconv.c b/cconv.c index 488dd799..62c2fc29 100644 --- a/cconv.c +++ b/cconv.c @@ -203,6 +203,7 @@ void convert_thread_options_to_cpu(struct thread_options *o, o->zipf_theta.u.f = fio_uint64_to_double(le64_to_cpu(top->zipf_theta.u.i)); o->pareto_h.u.f = fio_uint64_to_double(le64_to_cpu(top->pareto_h.u.i)); o->gauss_dev.u.f = fio_uint64_to_double(le64_to_cpu(top->gauss_dev.u.i)); + o->random_center.u.f = fio_uint64_to_double(le64_to_cpu(top->random_center.u.i)); o->random_generator = le32_to_cpu(top->random_generator); o->hugepage_size = le32_to_cpu(top->hugepage_size); o->rw_min_bs = le64_to_cpu(top->rw_min_bs); @@ -423,6 +424,7 @@ void convert_thread_options_to_net(struct thread_options_pack *top, top->zipf_theta.u.i = __cpu_to_le64(fio_double_to_uint64(o->zipf_theta.u.f)); top->pareto_h.u.i = __cpu_to_le64(fio_double_to_uint64(o->pareto_h.u.f)); top->gauss_dev.u.i = __cpu_to_le64(fio_double_to_uint64(o->gauss_dev.u.f)); + top->random_center.u.i = __cpu_to_le64(fio_double_to_uint64(o->random_center.u.f)); top->random_generator = cpu_to_le32(o->random_generator); top->hugepage_size = cpu_to_le32(o->hugepage_size); top->rw_min_bs = __cpu_to_le64(o->rw_min_bs); diff --git a/filesetup.c b/filesetup.c index 76b3f935..9d033757 100644 --- a/filesetup.c +++ b/filesetup.c @@ -1319,11 +1319,11 @@ static void __init_rand_distribution(struct thread_data *td, struct fio_file *f) seed = td->rand_seeds[4]; if (td->o.random_distribution == FIO_RAND_DIST_ZIPF) - zipf_init(&f->zipf, nranges, td->o.zipf_theta.u.f, seed); + zipf_init(&f->zipf, nranges, td->o.zipf_theta.u.f, td->o.random_center.u.f, seed); else if (td->o.random_distribution == FIO_RAND_DIST_PARETO) - pareto_init(&f->zipf, nranges, td->o.pareto_h.u.f, seed); + pareto_init(&f->zipf, nranges, td->o.pareto_h.u.f, td->o.random_center.u.f, seed); else if (td->o.random_distribution == FIO_RAND_DIST_GAUSS) - gauss_init(&f->gauss, nranges, td->o.gauss_dev.u.f, seed); + gauss_init(&f->gauss, nranges, td->o.gauss_dev.u.f, td->o.random_center.u.f, seed); } static bool init_rand_distribution(struct thread_data *td) diff --git a/fio.1 b/fio.1 index e361b05f..d477b508 100644 --- a/fio.1 +++ b/fio.1 @@ -1132,7 +1132,7 @@ first. This may interfere with a given rate setting, if fio is asked to limit reads or writes to a certain rate. If that is the case, then the distribution may be skewed. Default: 50. .TP -.BI random_distribution \fR=\fPstr:float[,str:float][,str:float] +.BI random_distribution \fR=\fPstr:float[:float][,str:float][,str:float] By default, fio will use a completely uniform random distribution when asked to perform random I/O. Sometimes it is useful to skew the distribution in specific ways, ensuring that some parts of the data is more hot than others. @@ -1168,6 +1168,14 @@ option. If a non\-uniform model is used, fio will disable use of the random map. For the \fBnormal\fR distribution, a normal (Gaussian) deviation is supplied as a value between 0 and 100. .P +The second, optional float is allowed for \fBpareto\fR, \fBzipf\fR and \fBnormal\fR +distributions. It allows to set base of distribution in non-default place, giving +more control over most probable outcome. This value is in range [0-1] which maps linearly to +range of possible random values. +Defaults are: random for \fBpareto\fR and \fBzipf\fR, and 0.5 for \fBnormal\fR. +If you wanted to use \fBzipf\fR with a `theta` of 1.2 centered on 1/4 of allowed value range, +you would use `random_distibution=zipf:1.2:0.25`. +.P For a \fBzoned\fR distribution, fio supports specifying percentages of I/O access that should fall within what range of the file or device. For example, given a criteria of: diff --git a/fio.h b/fio.h index fffec001..4d439d98 100644 --- a/fio.h +++ b/fio.h @@ -229,6 +229,7 @@ struct thread_data { double pareto_h; double gauss_dev; }; + double random_center; int error; int sig; int done; diff --git a/init.c b/init.c index f9c20bdb..42fc8a4e 100644 --- a/init.c +++ b/init.c @@ -971,13 +971,13 @@ static void init_rand_file_service(struct thread_data *td) const unsigned int seed = td->rand_seeds[FIO_RAND_FILE_OFF]; if (td->o.file_service_type == FIO_FSERVICE_ZIPF) { - zipf_init(&td->next_file_zipf, nranges, td->zipf_theta, seed); + zipf_init(&td->next_file_zipf, nranges, td->zipf_theta, td->random_center, seed); zipf_disable_hash(&td->next_file_zipf); } else if (td->o.file_service_type == FIO_FSERVICE_PARETO) { - pareto_init(&td->next_file_zipf, nranges, td->pareto_h, seed); + pareto_init(&td->next_file_zipf, nranges, td->pareto_h, td->random_center, seed); zipf_disable_hash(&td->next_file_zipf); } else if (td->o.file_service_type == FIO_FSERVICE_GAUSS) { - gauss_init(&td->next_file_gauss, nranges, td->gauss_dev, seed); + gauss_init(&td->next_file_gauss, nranges, td->gauss_dev, td->random_center, seed); gauss_disable_hash(&td->next_file_gauss); } } diff --git a/lib/gauss.c b/lib/gauss.c index 3f84dbc6..c64f61e7 100644 --- a/lib/gauss.c +++ b/lib/gauss.c @@ -40,11 +40,11 @@ unsigned long long gauss_next(struct gauss_state *gs) if (!gs->disable_hash) sum = __hash_u64(sum); - return sum % gs->nranges; + return (sum + gs->rand_off) % gs->nranges; } void gauss_init(struct gauss_state *gs, unsigned long nranges, double dev, - unsigned int seed) + double center, unsigned int seed) { memset(gs, 0, sizeof(*gs)); init_rand_seed(&gs->r, seed, 0); @@ -55,6 +55,10 @@ void gauss_init(struct gauss_state *gs, unsigned long nranges, double dev, if (gs->stddev > nranges / 2) gs->stddev = nranges / 2; } + if (center == -1) + gs->rand_off = 0; + else + gs->rand_off = nranges * (center - 0.5); } void gauss_disable_hash(struct gauss_state *gs) diff --git a/lib/gauss.h b/lib/gauss.h index 478aa146..19e3a666 100644 --- a/lib/gauss.h +++ b/lib/gauss.h @@ -8,11 +8,12 @@ struct gauss_state { struct frand_state r; uint64_t nranges; unsigned int stddev; + unsigned int rand_off; bool disable_hash; }; void gauss_init(struct gauss_state *gs, unsigned long nranges, double dev, - unsigned int seed); + double center, unsigned int seed); unsigned long long gauss_next(struct gauss_state *gs); void gauss_disable_hash(struct gauss_state *gs); diff --git a/lib/zipf.c b/lib/zipf.c index 321a4fb9..14d7928f 100644 --- a/lib/zipf.c +++ b/lib/zipf.c @@ -23,19 +23,21 @@ static void zipf_update(struct zipf_state *zs) } static void shared_rand_init(struct zipf_state *zs, uint64_t nranges, - unsigned int seed) + double center, unsigned int seed) { memset(zs, 0, sizeof(*zs)); zs->nranges = nranges; init_rand_seed(&zs->rand, seed, 0); zs->rand_off = __rand(&zs->rand); + if (center != -1) + zs->rand_off = nranges * center; } void zipf_init(struct zipf_state *zs, uint64_t nranges, double theta, - unsigned int seed) + double center, unsigned int seed) { - shared_rand_init(zs, nranges, seed); + shared_rand_init(zs, nranges, center, seed); zs->theta = theta; zs->zeta2 = pow(1.0, zs->theta) + pow(0.5, zs->theta); @@ -71,9 +73,9 @@ uint64_t zipf_next(struct zipf_state *zs) } void pareto_init(struct zipf_state *zs, uint64_t nranges, double h, - unsigned int seed) + double center, unsigned int seed) { - shared_rand_init(zs, nranges, seed); + shared_rand_init(zs, nranges, center, seed); zs->pareto_pow = log(h) / log(1.0 - h); } diff --git a/lib/zipf.h b/lib/zipf.h index 16b65f57..332e3b2f 100644 --- a/lib/zipf.h +++ b/lib/zipf.h @@ -16,10 +16,12 @@ struct zipf_state { bool disable_hash; }; -void zipf_init(struct zipf_state *zs, uint64_t nranges, double theta, unsigned int seed); +void zipf_init(struct zipf_state *zs, uint64_t nranges, double theta, + double center, unsigned int seed); uint64_t zipf_next(struct zipf_state *zs); -void pareto_init(struct zipf_state *zs, uint64_t nranges, double h, unsigned int seed); +void pareto_init(struct zipf_state *zs, uint64_t nranges, double h, + double center, unsigned int seed); uint64_t pareto_next(struct zipf_state *zs); void zipf_disable_hash(struct zipf_state *zs); diff --git a/options.c b/options.c index 4c472589..5dd31d23 100644 --- a/options.c +++ b/options.c @@ -44,6 +44,27 @@ static char *get_opt_postfix(const char *str) return strdup(p); } +static bool split_parse_distr(const char *str, double *val, double *center) +{ + char *cp, *p; + bool r; + + p = strdup(str); + if (!p) + return false; + + cp = strstr(p, ":"); + r = true; + if (cp) { + *cp = '\0'; + cp++; + r = str_to_float(cp, center, 0); + } + r = r && str_to_float(p, val, 0); + free(p); + return r; +} + static int bs_cmp(const void *p1, const void *p2) { const struct bssplit *bsp1 = p1; @@ -787,6 +808,7 @@ static int str_fst_cb(void *data, const char *str) { struct thread_data *td = cb_data_to_td(data); double val; + double center = -1; bool done = false; char *nr; @@ -821,7 +843,7 @@ static int str_fst_cb(void *data, const char *str) return 0; nr = get_opt_postfix(str); - if (nr && !str_to_float(nr, &val, 0)) { + if (nr && !split_parse_distr(nr, &val, ¢er)) { log_err("fio: file service type random postfix parsing failed\n"); free(nr); return 1; @@ -829,6 +851,12 @@ static int str_fst_cb(void *data, const char *str) free(nr); + if (center != -1 && (center < 0.00 || center > 1.00)) { + log_err("fio: distribution center out of range (0 <= center <= 1.0)\n"); + return 1; + } + td->random_center = center; + switch (td->o.file_service_type) { case FIO_FSERVICE_ZIPF: if (val == 1.00) { @@ -1030,6 +1058,7 @@ static int str_random_distribution_cb(void *data, const char *str) { struct thread_data *td = cb_data_to_td(data); double val; + double center = -1; char *nr; if (td->o.random_distribution == FIO_RAND_DIST_ZIPF) @@ -1046,7 +1075,7 @@ static int str_random_distribution_cb(void *data, const char *str) return 0; nr = get_opt_postfix(str); - if (nr && !str_to_float(nr, &val, 0)) { + if (nr && !split_parse_distr(nr, &val, ¢er)) { log_err("fio: random postfix parsing failed\n"); free(nr); return 1; @@ -1054,6 +1083,12 @@ static int str_random_distribution_cb(void *data, const char *str) free(nr); + if (center != -1 && (center < 0.00 || center > 1.00)) { + log_err("fio: distribution center out of range (0 <= center <= 1.0)\n"); + return 1; + } + td->o.random_center.u.f = center; + if (td->o.random_distribution == FIO_RAND_DIST_ZIPF) { if (val == 1.00) { log_err("fio: zipf theta must different than 1.0\n"); diff --git a/server.h b/server.h index 6d444749..9256d44c 100644 --- a/server.h +++ b/server.h @@ -48,7 +48,7 @@ struct fio_net_cmd_reply { }; enum { - FIO_SERVER_VER = 86, + FIO_SERVER_VER = 87, FIO_SERVER_MAX_FRAGMENT_PDU = 1024, FIO_SERVER_MAX_CMD_MB = 2048, diff --git a/t/genzipf.c b/t/genzipf.c index 4fc10ae7..cd62e584 100644 --- a/t/genzipf.c +++ b/t/genzipf.c @@ -297,11 +297,11 @@ int main(int argc, char *argv[]) nranges /= block_size; if (dist_type == TYPE_ZIPF) - zipf_init(&zs, nranges, dist_val, 1); + zipf_init(&zs, nranges, dist_val, -1, 1); else if (dist_type == TYPE_PARETO) - pareto_init(&zs, nranges, dist_val, 1); + pareto_init(&zs, nranges, dist_val, -1, 1); else - gauss_init(&gs, nranges, dist_val, 1); + gauss_init(&gs, nranges, dist_val, -1, 1); hash_bits = 0; hash_size = nranges; diff --git a/thread_options.h b/thread_options.h index 97c400fe..0a033430 100644 --- a/thread_options.h +++ b/thread_options.h @@ -166,6 +166,7 @@ struct thread_options { fio_fp64_t zipf_theta; fio_fp64_t pareto_h; fio_fp64_t gauss_dev; + fio_fp64_t random_center; unsigned int random_generator; @@ -467,6 +468,7 @@ struct thread_options_pack { fio_fp64_t zipf_theta; fio_fp64_t pareto_h; fio_fp64_t gauss_dev; + fio_fp64_t random_center; uint32_t random_generator; -- 2.25.1