Add support for non-uniformly random file service type
authorJens Axboe <axboe@fb.com>
Tue, 17 May 2016 00:09:54 +0000 (18:09 -0600)
committerJens Axboe <axboe@fb.com>
Tue, 17 May 2016 00:09:54 +0000 (18:09 -0600)
Similar options to random_distribution, this is just for the
selection of files. Like that option, you can do:

file_service_type=zipf:1.2

and get files selected through a zipfian distribution. This extends
the concept of hot and cold IO regions to have hotter and colder files
as well.

Signed-off-by: Jens Axboe <axboe@fb.com>
HOWTO
file.h
fio.1
fio.h
init.c
io_u.c
options.c

diff --git a/HOWTO b/HOWTO
index 88d10a171da9e373ee40e5844b2b1690c6b3db28..9ed2c5f5580365c69664eecb651f7e1b4065df7b 100644 (file)
--- a/HOWTO
+++ b/HOWTO
@@ -673,10 +673,23 @@ file_service_type=str  Defines how fio decides which file from a job to
                                the next. Multiple files can still be
                                open depending on 'openfiles'.
 
-               The string can have a number appended, indicating how
-               often to switch to a new file. So if option random:4 is
-               given, fio will switch to a new random file after 4 ios
-               have been issued.
+                       zipf    Use a zipfian distribution to decide what file
+                               to access.
+
+                       pareto  Use a pareto distribution to decide what file
+                               to access.
+
+                       gauss   Use a gaussian (normal) distribution to decide
+                               what file to access.
+
+               For random, roundrobin, and sequential, a postfix can be
+               appended to tell fio how many I/Os to issue before switching
+               to a new file. For example, specifying
+               'file_service_type=random:8' would cause fio to issue 8 I/Os
+               before selecting a new file at random. For the non-uniform
+               distributions, a floating point postfix can be given to
+               influence how the distribution is skewed. See
+               'random_distribution' for a description of how that would work.
 
 ioengine=str   Defines how the job issues io to the file. The following
                types are defined:
diff --git a/file.h b/file.h
index e7563b84638490ffda75998576ce57b492c012c7..0cf622fcbb213a7922ecb96a8e7b7f88bd76b2f2 100644 (file)
--- a/file.h
+++ b/file.h
@@ -39,13 +39,20 @@ enum file_lock_mode {
 };
 
 /*
- * roundrobin available files, or choose one at random, or do each one
- * serially.
+ * How fio chooses what file to service next. Choice of uniformly random, or
+ * some skewed random variants, or just sequentially go through them or
+ * roundrobing.
  */
 enum {
-       FIO_FSERVICE_RANDOM     = 1,
-       FIO_FSERVICE_RR         = 2,
-       FIO_FSERVICE_SEQ        = 3,
+       FIO_FSERVICE_RANDOM             = 1,
+       FIO_FSERVICE_RR                 = 2,
+       FIO_FSERVICE_SEQ                = 3,
+       __FIO_FSERVICE_NONUNIFORM       = 0x100,
+       FIO_FSERVICE_ZIPF               = __FIO_FSERVICE_NONUNIFORM | 4,
+       FIO_FSERVICE_PARETO             = __FIO_FSERVICE_NONUNIFORM | 5,
+       FIO_FSERVICE_GAUSS              = __FIO_FSERVICE_NONUNIFORM | 6,
+
+       FIO_FSERVICE_SHIFT              = 10,
 };
 
 /*
diff --git a/fio.1 b/fio.1
index ebb489905707d9b2b48511295477bed80d1b7fec..5e4cd4ff2663df1bff00eabcdf7382673cc5a453 100644 (file)
--- a/fio.1
+++ b/fio.1
@@ -566,10 +566,24 @@ Round robin over opened files (default).
 .TP
 .B sequential
 Do each file in the set sequentially.
+.TP
+.B zipf
+Use a zipfian distribution to decide what file to access.
+.TP
+.B pareto
+Use a pareto distribution to decide what file to access.
+.TP
+.B gauss
+Use a gaussian (normal) distribution to decide what file to access.
 .RE
 .P
-The number of I/Os to issue before switching to a new file can be specified by
-appending `:\fIint\fR' to the service type.
+For \fBrandom\fR, \fBroundrobin\fR, and \fBsequential\fR, a postfix can be
+appended to tell fio how many I/Os to issue before switching to a new file.
+For example, specifying \fBfile_service_type=random:8\fR would cause fio to
+issue \fI8\fR I/Os before selecting a new file at random. For the non-uniform
+distributions, a floating point postfix can be given to influence how the
+distribution is skewed. See \fBrandom_distribution\fR for a description of how
+that would work.
 .RE
 .TP
 .BI ioengine \fR=\fPstr
diff --git a/fio.h b/fio.h
index 6a244c38896e820c6c23da16a5a661c6fefb1f94..8b6a27220db7e3e1ea9323a6f5dd872ef3d946af 100644 (file)
--- a/fio.h
+++ b/fio.h
@@ -170,6 +170,15 @@ struct thread_data {
                unsigned int next_file;
                struct frand_state next_file_state;
        };
+       union {
+               struct zipf_state next_file_zipf;
+               struct gauss_state next_file_gauss;
+       };
+       union {
+               double zipf_theta;
+               double pareto_h;
+               double gauss_dev;
+       };
        int error;
        int sig;
        int done;
diff --git a/init.c b/init.c
index c579d5c04c82a3db6b9e7aca3dc1af9e955a7218..e8c8afb600cc488088de69cbd5d1684c23ac53d7 100644 (file)
--- a/init.c
+++ b/init.c
@@ -929,6 +929,22 @@ static void td_fill_rand_seeds_internal(struct thread_data *td, bool use64)
 
        if (td->o.file_service_type == FIO_FSERVICE_RANDOM)
                init_rand_seed(&td->next_file_state, td->rand_seeds[FIO_RAND_FILE_OFF], use64);
+       else if (td->o.file_service_type & __FIO_FSERVICE_NONUNIFORM) {
+               unsigned long nranges;
+
+               nranges = td->o.nr_files << FIO_FSERVICE_SHIFT;
+
+               if (td->o.file_service_type == FIO_FSERVICE_ZIPF) {
+                       zipf_init(&td->next_file_zipf, nranges, td->zipf_theta, td->rand_seeds[FIO_RAND_FILE_OFF]);
+                       zipf_disable_hash(&td->next_file_zipf);
+               } else if (td->o.file_service_type == FIO_FSERVICE_PARETO) {
+                       pareto_init(&td->next_file_zipf, nranges, td->pareto_h, td->rand_seeds[FIO_RAND_FILE_OFF]);
+                       zipf_disable_hash(&td->next_file_zipf);
+               } else if (td->o.file_service_type == FIO_FSERVICE_GAUSS) {
+                       gauss_init(&td->next_file_gauss, nranges, td->gauss_dev, td->rand_seeds[FIO_RAND_FILE_OFF]);
+                       gauss_disable_hash(&td->next_file_gauss);
+               }
+       }
 
        init_rand_seed(&td->file_size_state, td->rand_seeds[FIO_RAND_FILE_SIZE_OFF], use64);
        init_rand_seed(&td->trim_state, td->rand_seeds[FIO_RAND_TRIM_OFF], use64);
diff --git a/io_u.c b/io_u.c
index f9870e70bc8d408ab7bf72adcf46cddf6818b4da..c0790b254065acf59c279eda5e1d133d632d77f1 100644 (file)
--- a/io_u.c
+++ b/io_u.c
@@ -328,7 +328,8 @@ static int get_next_rand_block(struct thread_data *td, struct fio_file *f,
        if (!get_next_rand_offset(td, f, ddir, b))
                return 0;
 
-       if (td->o.time_based) {
+       if (td->o.time_based ||
+           (td->o.file_service_type & __FIO_FSERVICE_NONUNIFORM)) {
                fio_file_reset(td, f);
                if (!get_next_rand_offset(td, f, ddir, b))
                        return 0;
@@ -1070,6 +1071,34 @@ static void io_u_mark_latency(struct thread_data *td, unsigned long usec)
                io_u_mark_lat_msec(td, usec / 1000);
 }
 
+static unsigned int __get_next_fileno_rand(struct thread_data *td)
+{
+       unsigned long fileno;
+
+       if (td->o.file_service_type == FIO_FSERVICE_RANDOM) {
+               uint64_t frand_max = rand_max(&td->next_file_state);
+               unsigned long r;
+
+               r = __rand(&td->next_file_state);
+               return (unsigned int) ((double) td->o.nr_files
+                               * (r / (frand_max + 1.0)));
+       }
+
+       if (td->o.file_service_type == FIO_FSERVICE_ZIPF)
+               fileno = zipf_next(&td->next_file_zipf);
+       else if (td->o.file_service_type == FIO_FSERVICE_PARETO)
+               fileno = pareto_next(&td->next_file_zipf);
+       else if (td->o.file_service_type == FIO_FSERVICE_GAUSS)
+               fileno = gauss_next(&td->next_file_gauss);
+       else {
+               log_err("fio: bad file service type: %d\n", td->o.file_service_type);
+               assert(0);
+               return 0;
+       }
+
+       return fileno >> FIO_FSERVICE_SHIFT;
+}
+
 /*
  * Get next file to service by choosing one at random
  */
@@ -1077,17 +1106,13 @@ static struct fio_file *get_next_file_rand(struct thread_data *td,
                                           enum fio_file_flags goodf,
                                           enum fio_file_flags badf)
 {
-       uint64_t frand_max = rand_max(&td->next_file_state);
        struct fio_file *f;
        int fno;
 
        do {
                int opened = 0;
-               unsigned long r;
 
-               r = __rand(&td->next_file_state);
-               fno = (unsigned int) ((double) td->o.nr_files
-                               * (r / (frand_max + 1.0)));
+               fno = __get_next_fileno_rand(td);
 
                f = td->files[fno];
                if (fio_file_done(f))
@@ -1240,10 +1265,14 @@ static long set_io_u_file(struct thread_data *td, struct io_u *io_u)
                put_file_log(td, f);
                td_io_close_file(td, f);
                io_u->file = NULL;
-               fio_file_set_done(f);
-               td->nr_done_files++;
-               dprint(FD_FILE, "%s: is done (%d of %d)\n", f->file_name,
+               if (td->o.file_service_type & __FIO_FSERVICE_NONUNIFORM)
+                       fio_file_reset(td, f);
+               else {
+                       fio_file_set_done(f);
+                       td->nr_done_files++;
+                       dprint(FD_FILE, "%s: is done (%d of %d)\n", f->file_name,
                                        td->nr_done_files, td->o.nr_files);
+               }
        } while (1);
 
        return 0;
index 980b7e5e48d23a8cad051c197ea00b8664235666..71c77b9205a9ade59502c73454e5ce310c018b3d 100644 (file)
--- a/options.c
+++ b/options.c
@@ -724,12 +724,77 @@ out:
 static int str_fst_cb(void *data, const char *str)
 {
        struct thread_data *td = data;
-       char *nr = get_opt_postfix(str);
+       double val;
+       bool done = false;
+       char *nr;
 
        td->file_service_nr = 1;
-       if (nr) {
-               td->file_service_nr = atoi(nr);
+
+       switch (td->o.file_service_type) {
+       case FIO_FSERVICE_RANDOM:
+       case FIO_FSERVICE_RR:
+       case FIO_FSERVICE_SEQ:
+               nr = get_opt_postfix(str);
+               if (nr) {
+                       td->file_service_nr = atoi(nr);
+                       free(nr);
+               }
+               done = true;
+               break;
+       case FIO_FSERVICE_ZIPF:
+               val = FIO_DEF_ZIPF;
+               break;
+       case FIO_FSERVICE_PARETO:
+               val = FIO_DEF_PARETO;
+               break;
+       case FIO_FSERVICE_GAUSS:
+               val = 0.0;
+               break;
+       default:
+               log_err("fio: bad file service type: %d\n", td->o.file_service_type);
+               return 1;
+       }
+
+       if (done)
+               return 0;
+
+       nr = get_opt_postfix(str);
+       if (nr && !str_to_float(nr, &val, 0)) {
+               log_err("fio: file service type random postfix parsing failed\n");
                free(nr);
+               return 1;
+       }
+
+       free(nr);
+
+       switch (td->o.file_service_type) {
+       case FIO_FSERVICE_ZIPF:
+               if (val == 1.00) {
+                       log_err("fio: zipf theta must be different than 1.0\n");
+                       return 1;
+               }
+               if (parse_dryrun())
+                       return 0;
+               td->zipf_theta = val;
+               break;
+       case FIO_FSERVICE_PARETO:
+               if (val <= 0.00 || val >= 1.00) {
+                          log_err("fio: pareto input out of range (0 < input < 1.0)\n");
+                          return 1;
+               }
+               if (parse_dryrun())
+                       return 0;
+               td->pareto_h = val;
+               break;
+       case FIO_FSERVICE_GAUSS:
+               if (val < 0.00 || val >= 100.00) {
+                          log_err("fio: normal deviation out of range (0 < input < 100.0  )\n");
+                          return 1;
+               }
+               if (parse_dryrun())
+                       return 0;
+               td->gauss_dev = val;
+               break;
        }
 
        return 0;
@@ -2020,7 +2085,19 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                .posval = {
                          { .ival = "random",
                            .oval = FIO_FSERVICE_RANDOM,
-                           .help = "Choose a file at random",
+                           .help = "Choose a file at random (uniform)",
+                         },
+                         { .ival = "zipf",
+                           .oval = FIO_FSERVICE_ZIPF,
+                           .help = "Zipf randomized",
+                         },
+                         { .ival = "pareto",
+                           .oval = FIO_FSERVICE_PARETO,
+                           .help = "Pareto randomized",
+                         },
+                         { .ival = "gauss",
+                           .oval = FIO_FSERVICE_GAUSS,
+                           .help = "Normal (guassian) distribution",
                          },
                          { .ival = "roundrobin",
                            .oval = FIO_FSERVICE_RR,