Merge branch 'master' of https://github.com/jan--f/fio

author Jens Axboe <axboe@fb.com>

Thu, 15 Sep 2016 13:57:38 +0000 (07:57 -0600)

committer Jens Axboe <axboe@fb.com>

Thu, 15 Sep 2016 13:57:38 +0000 (07:57 -0600)
author Jens Axboe <axboe@fb.com>
Thu, 15 Sep 2016 13:57:38 +0000 (07:57 -0600)
committer Jens Axboe <axboe@fb.com>
Thu, 15 Sep 2016 13:57:38 +0000 (07:57 -0600)
diff --git a/.travis.yml b/.travis.yml

index 9bef750f8a61535b30d1064e221d633fe9ae42e8..bf0433de520d7383c3e0408095be1ebbe70ade73 100644 (file)
--- a/.travis.yml
+++ b/.travis.yml
@@ -4,4 +4,4 @@ compiler:
    - gcc
  before_install:
    - sudo apt-get -qq update
-  - sudo apt-get install -y libaio-dev libnuma-dev
+  - sudo apt-get install -qq -y libaio-dev libnuma-dev libz-dev
diff --git a/FIO-VERSION-GEN b/FIO-VERSION-GEN

index 7065a5790e2051bc05973d99e58803742147aef5..d19dccacdbdfacb966c84c7e6d12aa9673da57d7 100755 (executable)
--- a/FIO-VERSION-GEN
+++ b/FIO-VERSION-GEN
@@ -15,7 +15,7 @@ elif test -d .git -o -f .git &&
         VN=`git describe --match "fio-[0-9]*" --abbrev=4 HEAD 2>/dev/null` &&
         case "$VN" in
         *$LF*) (exit 1) ;;
-       v[0-9]*)
+       fio-[0-9]*)
                 git update-index -q --refresh
                 test -z "`git diff-index --name-only HEAD --`" ||
                 VN="$VN-dirty" ;;
@@ -38,5 +38,3 @@ test "$VN" = "$VC" || {
         echo >&2 "FIO_VERSION = $VN"
         echo "FIO_VERSION = $VN" >$GVF
  }
-
-
diff --git a/HOWTO b/HOWTO

index 2c5896d2c212149e65c108f931a589bc681aca29..07419a12054ed43b991e3300db29a07ca4efbec5 100644 (file)
--- a/HOWTO
+++ b/HOWTO
@@ -1175,7 +1175,7 @@ cpus_allowed_policy=str Set the policy of how fio distributes the CPUs
                 one cpu per job. If not enough CPUs are given for the jobs
                 listed, then fio will roundrobin the CPUs in the set.
  
-numa_cpu_nodes=str Set this job running on spcified NUMA nodes' CPUs. The
+numa_cpu_nodes=str Set this job running on specified NUMA nodes' CPUs. The
                 arguments allow comma delimited list of cpu numbers,
                 A-B ranges, or 'all'. Note, to enable numa options support,
                 fio must be built on a system with libnuma-dev(el) installed.
@@ -1295,7 +1295,7 @@ iopsavgtime=int   Average the calculated IOPS over the given time. Value
                 through 'write_iops_log', then the minimum of this option and
                 'log_avg_msec' will be used.  Default: 500ms.
  
-create_serialize=bool  If true, serialize the file creating for the jobs.
+create_serialize=bool  If true, serialize the file creation for the jobs.
                         This may be handy to avoid interleaving of data
                         files, which may greatly depend on the filesystem
                         used and even the number of processors in the system.
@@ -1334,6 +1334,8 @@ unlink=bool       Unlink the job files when done. Not the default, as repeated
                 runs of that job would then waste time recreating the file
                 set again and again.
  
+unlink_each_loop=bool  Unlink job files after each iteration or loop.
+
  loops=int      Run the specified number of iterations of this job. Used
                 to repeat the same workload a given number of times. Defaults
                 to 1.
@@ -1606,10 +1608,18 @@ write_lat_log=str Same as write_bw_log, except that this option stores io
                 The actual log names will be foo_slat.x.log, foo_clat.x.log,
                 and foo_lat.x.log, where x is the index of the job (1..N,
                 where N is the number of jobs). This helps fio_generate_plot
-               fine the logs automatically. If 'per_job_logs' is false, then
+               find the logs automatically. If 'per_job_logs' is false, then
                 the filename will not include the job index. See 'Log File
                 Formats'.
  
+write_hist_log=str Same as write_lat_log, but writes I/O completion
+               latency histograms. If no filename is given with this option, the
+               default filename of "jobname_clat_hist.x.log" is used, where x is
+               the index of the job (1..N, where N is the number of jobs). Even
+               if the filename is given, fio will still append the type of log.
+               If per_job_logs is false, then the filename will not include the
+               job index. See 'Log File Formats'.
+
  write_iops_log=str Same as write_bw_log, but writes IOPS. If no filename is
                 given with this option, the default filename of
                 "jobname_type.x.log" is used,where x is the index of the job
@@ -1625,6 +1635,20 @@ log_avg_msec=int By default, fio will log an entry in the iops, latency,
                 specified period of time, reducing the resolution of the log.
                 See log_max_value as well. Defaults to 0, logging all entries.
  
+log_hist_msec=int Same as log_avg_msec, but logs entries for completion
+               latency histograms. Computing latency percentiles from averages of
+               intervals using log_avg_msec is innacurate. Setting this option makes
+               fio log histogram entries over the specified period of time, reducing
+               log sizes for high IOPS devices while retaining percentile accuracy.
+               See log_hist_coarseness as well. Defaults to 0, meaning histogram
+               logging is disabled.
+
+log_hist_coarseness=int Integer ranging from 0 to 6, defining the coarseness
+               of the resolution of the histogram logs enabled with log_hist_msec. For
+               each increment in coarseness, fio outputs half as many bins. Defaults to
+               0, for which histogram logs contain 1216 latency bins. See
+               'Log File Formats'.
+
  log_max_value=bool     If log_avg_msec is set, fio logs the average over that
                 window. If you instead want to log the maximum value, set this
                 option to 1. Defaults to 0, meaning that averaged values are
@@ -1657,6 +1681,10 @@ log_store_compressed=bool        If set, fio will store the log files in a
                 the --inflate-log command line parameter. The files will be
                 stored with a .fz suffix.
  
+log_unix_epoch=bool    If set, fio will log Unix timestamps to the log
+               files produced by enabling write_type_log for each log type, instead
+               of the default zero-based timestamps.
+
  block_error_percentiles=bool   If set, record errors in trim block-sized
                 units from writes and trims and output a histogram of
                 how many trims it took to get to errors, and what kind
@@ -1927,7 +1955,7 @@ be the starting port number since fio will use a range of ports.
  [mtd] skip_bad=bool    Skip operations against known bad blocks.
  
  [libhdfs] hdfsdirectory        libhdfs will create chunk in this HDFS directory
-[libhdfs] chunck_size  the size of the chunck to use for each file.
+[libhdfs] chunk_size   the size of the chunk to use for each file.
  
  
  6.0 Interpreting the output
diff --git a/Makefile b/Makefile

index b54f7e9e0cfde374fa86d98b635e224d9c0ab457..6b5548a83574f504e83f332792b09356b33e3b7a 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -26,7 +26,7 @@ OPTFLAGS= -g -ffast-math
  CFLAGS = -std=gnu99 -Wwrite-strings -Wall -Wdeclaration-after-statement $(OPTFLAGS) $(EXTFLAGS) $(BUILD_CFLAGS) -I. -I$(SRCDIR)
  LIBS   += -lm $(EXTLIBS)
  PROGS  = fio
-SCRIPTS = $(addprefix $(SRCDIR)/,tools/fio_generate_plots tools/plot/fio2gnuplot tools/genfio tools/fiologparser.py tools/fio_latency2csv.py)
+SCRIPTS = $(addprefix $(SRCDIR)/,tools/fio_generate_plots tools/plot/fio2gnuplot tools/genfio tools/fiologparser.py tools/fio_latency2csv.py tools/hist/fiologparser_hist.py)
  
  ifndef CONFIG_FIO_NO_OPT
    CFLAGS += -O3
@@ -49,7 +49,7 @@ SOURCE :=     $(patsubst $(SRCDIR)/%,%,$(wildcard $(SRCDIR)/crc/*.c)) \
  
  ifdef CONFIG_LIBHDFS
    HDFSFLAGS= -I $(JAVA_HOME)/include -I $(JAVA_HOME)/include/linux -I $(FIO_LIBHDFS_INCLUDE)
-  HDFSLIB= -Wl,-rpath $(JAVA_HOME)/jre/lib/`uname -m`/server -L$(JAVA_HOME)/jre/lib/`uname -m`/server -ljvm $(FIO_LIBHDFS_LIB)/libhdfs.a
+  HDFSLIB= -Wl,-rpath $(JAVA_HOME)/jre/lib/$(FIO_HDFS_CPU)/server -L$(JAVA_HOME)/jre/lib/$(FIO_HDFS_CPU)/server -ljvm $(FIO_LIBHDFS_LIB)/libhdfs.a
    CFLAGS += $(HDFSFLAGS)
    SOURCE += engines/libhdfs.c
  endif
@@ -430,7 +430,7 @@ clean: FORCE
         @rm -f .depend $(FIO_OBJS) $(GFIO_OBJS) $(OBJS) $(T_OBJS) $(PROGS) $(T_PROGS) $(T_TEST_PROGS) core.* core gfio FIO-VERSION-FILE *.d lib/*.d oslib/*.d crc/*.d engines/*.d profiles/*.d t/*.d config-host.mak config-host.h y.tab.[ch] lex.yy.c exp/*.[do] lexer.h
  
  distclean: clean FORCE
-       @rm -f cscope.out fio.pdf fio_generate_plots.pdf fio2gnuplot.pdf
+       @rm -f cscope.out fio.pdf fio_generate_plots.pdf fio2gnuplot.pdf fiologparser_hist.pdf
  
  cscope:
         @cscope -b -R
@@ -442,6 +442,7 @@ doc: tools/plot/fio2gnuplot.1
         @man -t ./fio.1 | ps2pdf - fio.pdf
         @man -t tools/fio_generate_plots.1 | ps2pdf - fio_generate_plots.pdf
         @man -t tools/plot/fio2gnuplot.1 | ps2pdf - fio2gnuplot.pdf
+       @man -t tools/hist/fiologparser_hist.py.1 | ps2pdf - fiologparser_hist.pdf
  
  test:
  
@@ -452,5 +453,6 @@ install: $(PROGS) $(SCRIPTS) tools/plot/fio2gnuplot.1 FORCE
         $(INSTALL) -m 644 $(SRCDIR)/fio.1 $(DESTDIR)$(mandir)/man1
         $(INSTALL) -m 644 $(SRCDIR)/tools/fio_generate_plots.1 $(DESTDIR)$(mandir)/man1
         $(INSTALL) -m 644 $(SRCDIR)/tools/plot/fio2gnuplot.1 $(DESTDIR)$(mandir)/man1
+       $(INSTALL) -m 644 $(SRCDIR)/tools/hist/fiologparser_hist.py.1 $(DESTDIR)$(mandir)/man1
         $(INSTALL) -m 755 -d $(DESTDIR)$(sharedir)
         $(INSTALL) -m 644 $(SRCDIR)/tools/plot/*gpm $(DESTDIR)$(sharedir)/
diff --git a/backend.c b/backend.c

index c3ad8312de67f13d48e5e67ee9ae117945ba9a22..fb2a8551e396227fc01ed5849e95fbd0e15c38a8 100644 (file)
--- a/backend.c
+++ b/backend.c
@@ -571,6 +571,28 @@ static inline bool io_in_polling(struct thread_data *td)
         return !td->o.iodepth_batch_complete_min &&
                    !td->o.iodepth_batch_complete_max;
  }
+/*
+ * Unlinks files from thread data fio_file structure
+ */
+static int unlink_all_files(struct thread_data *td)
+{
+       struct fio_file *f;
+       unsigned int i;
+       int ret = 0;
+
+       for_each_file(td, f, i) {
+               if (f->filetype != FIO_TYPE_FILE)
+                       continue;
+               ret = td_io_unlink_file(td, f);
+               if (ret)
+                       break;
+       }
+
+       if (ret)
+               td_verror(td, ret, "unlink_all_files");
+
+       return ret;
+}
  
  /*
   * The main verify engine. Runs over the writes we previously submitted,
@@ -673,7 +695,7 @@ static void do_verify(struct thread_data *td, uint64_t verify_bytes)
                                         continue;
                                 } else if (io_u->ddir == DDIR_TRIM) {
                                         io_u->ddir = DDIR_READ;
-                                       io_u_set(io_u, IO_U_F_TRIMMED);
+                                       io_u_set(td, io_u, IO_U_F_TRIMMED);
                                         break;
                                 } else if (io_u->ddir == DDIR_WRITE) {
                                         io_u->ddir = DDIR_READ;
@@ -1002,7 +1024,7 @@ reap:
                 if (ret < 0)
                         break;
                 if (!ddir_rw_sum(td->bytes_done) &&
-                   !(td->io_ops->flags & FIO_NOIO))
+                   !td_ioengine_flagged(td, FIO_NOIO))
                         continue;
  
                 if (!in_ramp_time(td) && should_check_rate(td)) {
@@ -1153,7 +1175,7 @@ static int init_io_u(struct thread_data *td)
         td->orig_buffer_size = (unsigned long long) max_bs
                                         * (unsigned long long) max_units;
  
-       if ((td->io_ops->flags & FIO_NOIO) || !(td_read(td) || td_write(td)))
+       if (td_ioengine_flagged(td, FIO_NOIO) || !(td_read(td) || td_write(td)))
                 data_xfer = 0;
  
         err = 0;
@@ -1173,7 +1195,7 @@ static int init_io_u(struct thread_data *td)
          * lucky and the allocator gives us an aligned address.
          */
         if (td->o.odirect || td->o.mem_align || td->o.oatomic ||
-           (td->io_ops->flags & FIO_RAWIO))
+           td_ioengine_flagged(td, FIO_RAWIO))
                 td->orig_buffer_size += page_mask + td->o.mem_align;
  
         if (td->o.mem_type == MEM_SHMHUGE || td->o.mem_type == MEM_MMAPHUGE) {
@@ -1192,7 +1214,7 @@ static int init_io_u(struct thread_data *td)
                 return 1;
  
         if (td->o.odirect || td->o.mem_align || td->o.oatomic ||
-           (td->io_ops->flags & FIO_RAWIO))
+           td_ioengine_flagged(td, FIO_RAWIO))
                 p = PAGE_ALIGN(td->orig_buffer) + td->o.mem_align;
         else
                 p = td->orig_buffer;
@@ -1266,7 +1288,7 @@ static int switch_ioscheduler(struct thread_data *td)
         FILE *f;
         int ret;
  
-       if (td->io_ops->flags & FIO_DISKLESSIO)
+       if (td_ioengine_flagged(td, FIO_DISKLESSIO))
                 return 0;
  
         sprintf(tmp, "%s/queue/scheduler", td->sysfs_root);
@@ -1309,6 +1331,14 @@ static int switch_ioscheduler(struct thread_data *td)
          */
         tmp[strlen(tmp) - 1] = '\0';
  
+       /*
+        * Write to "none" entry doesn't fail, so check the result here.
+        */
+       if (!strcmp(tmp, "none")) {
+               log_err("fio: io scheduler is not tunable\n");
+               fclose(f);
+               return 0;
+       }
  
         sprintf(tmp2, "[%s]", td->o.ioscheduler);
         if (!strstr(tmp, tmp2)) {
@@ -1402,7 +1432,7 @@ static uint64_t do_dry_run(struct thread_data *td)
                 if (IS_ERR_OR_NULL(io_u))
                         break;
  
-               io_u_set(io_u, IO_U_F_FLIGHT);
+               io_u_set(td, io_u, IO_U_F_FLIGHT);
                 io_u->error = 0;
                 io_u->resid = 0;
                 if (ddir_rw(acct_ddir(io_u)))
@@ -1645,7 +1675,7 @@ static void *thread_main(void *data)
         if (rate_submit_init(td, sk_out))
                 goto err;
  
-       fio_gettime(&td->epoch, NULL);
+       set_epoch_time(td, o->log_unix_epoch);
         fio_getrusage(&td->ru_start);
         memcpy(&td->bw_sample_time, &td->epoch, sizeof(td->epoch));
         memcpy(&td->iops_sample_time, &td->epoch, sizeof(td->epoch));
@@ -1667,9 +1697,13 @@ static void *thread_main(void *data)
                 fio_gettime(&td->start, NULL);
                 memcpy(&td->tv_cache, &td->start, sizeof(td->start));
  
-               if (clear_state)
+               if (clear_state) {
                         clear_io_state(td, 0);
  
+                       if (o->unlink_each_loop && unlink_all_files(td))
+                               break;
+               }
+
                 prune_io_piece_log(td);
  
                 if (td->o.verify_only && (td_write(td) || td_rw(td)))
@@ -1697,9 +1731,13 @@ static void *thread_main(void *data)
                  * the rusage_sem, which would never get upped because
                  * this thread is waiting for the stat mutex.
                  */
-               check_update_rusage(td);
+               do {
+                       check_update_rusage(td);
+                       if (!fio_mutex_down_trylock(stat_mutex))
+                               break;
+                       usleep(1000);
+               } while (1);
  
-               fio_mutex_down(stat_mutex);
                 if (td_read(td) && td->io_bytes[DDIR_READ])
                         update_runtime(td, elapsed_us, DDIR_READ);
                 if (td_write(td) && td->io_bytes[DDIR_WRITE])
@@ -1714,7 +1752,7 @@ static void *thread_main(void *data)
  
                 if (!o->do_verify ||
                     o->verify == VERIFY_NONE ||
-                   (td->io_ops->flags & FIO_UNIDIR))
+                   td_ioengine_flagged(td, FIO_UNIDIR))
                         continue;
  
                 clear_io_state(td, 0);
diff --git a/cconv.c b/cconv.c

index ac826a3038c91bb3c4a23676227e2fc2ba5e076d..194e342c303f705a4b7f46a51cf504689490b08b 100644 (file)
--- a/cconv.c
+++ b/cconv.c
@@ -39,6 +39,7 @@ static void free_thread_options_to_cpu(struct thread_options *o)
         free(o->bw_log_file);
         free(o->lat_log_file);
         free(o->iops_log_file);
+       free(o->hist_log_file);
         free(o->replay_redirect);
         free(o->exec_prerun);
         free(o->exec_postrun);
@@ -74,6 +75,7 @@ void convert_thread_options_to_cpu(struct thread_options *o,
         string_to_cpu(&o->bw_log_file, top->bw_log_file);
         string_to_cpu(&o->lat_log_file, top->lat_log_file);
         string_to_cpu(&o->iops_log_file, top->iops_log_file);
+       string_to_cpu(&o->hist_log_file, top->hist_log_file);
         string_to_cpu(&o->replay_redirect, top->replay_redirect);
         string_to_cpu(&o->exec_prerun, top->exec_prerun);
         string_to_cpu(&o->exec_postrun, top->exec_postrun);
@@ -172,16 +174,20 @@ void convert_thread_options_to_cpu(struct thread_options *o,
         o->verify_batch = le32_to_cpu(top->verify_batch);
         o->use_thread = le32_to_cpu(top->use_thread);
         o->unlink = le32_to_cpu(top->unlink);
+       o->unlink_each_loop = le32_to_cpu(top->unlink_each_loop);
         o->do_disk_util = le32_to_cpu(top->do_disk_util);
         o->override_sync = le32_to_cpu(top->override_sync);
         o->rand_repeatable = le32_to_cpu(top->rand_repeatable);
         o->allrand_repeatable = le32_to_cpu(top->allrand_repeatable);
         o->rand_seed = le64_to_cpu(top->rand_seed);
         o->log_avg_msec = le32_to_cpu(top->log_avg_msec);
+       o->log_hist_msec = le32_to_cpu(top->log_hist_msec);
+       o->log_hist_coarseness = le32_to_cpu(top->log_hist_coarseness);
         o->log_max = le32_to_cpu(top->log_max);
         o->log_offset = le32_to_cpu(top->log_offset);
         o->log_gz = le32_to_cpu(top->log_gz);
         o->log_gz_store = le32_to_cpu(top->log_gz_store);
+       o->log_unix_epoch = le32_to_cpu(top->log_unix_epoch);
         o->norandommap = le32_to_cpu(top->norandommap);
         o->softrandommap = le32_to_cpu(top->softrandommap);
         o->bs_unaligned = le32_to_cpu(top->bs_unaligned);
@@ -309,6 +315,7 @@ void convert_thread_options_to_net(struct thread_options_pack *top,
         string_to_net(top->bw_log_file, o->bw_log_file);
         string_to_net(top->lat_log_file, o->lat_log_file);
         string_to_net(top->iops_log_file, o->iops_log_file);
+       string_to_net(top->hist_log_file, o->hist_log_file);
         string_to_net(top->replay_redirect, o->replay_redirect);
         string_to_net(top->exec_prerun, o->exec_prerun);
         string_to_net(top->exec_postrun, o->exec_postrun);
@@ -362,6 +369,7 @@ void convert_thread_options_to_net(struct thread_options_pack *top,
         top->verify_batch = cpu_to_le32(o->verify_batch);
         top->use_thread = cpu_to_le32(o->use_thread);
         top->unlink = cpu_to_le32(o->unlink);
+       top->unlink_each_loop = cpu_to_le32(o->unlink_each_loop);
         top->do_disk_util = cpu_to_le32(o->do_disk_util);
         top->override_sync = cpu_to_le32(o->override_sync);
         top->rand_repeatable = cpu_to_le32(o->rand_repeatable);
@@ -372,6 +380,7 @@ void convert_thread_options_to_net(struct thread_options_pack *top,
         top->log_offset = cpu_to_le32(o->log_offset);
         top->log_gz = cpu_to_le32(o->log_gz);
         top->log_gz_store = cpu_to_le32(o->log_gz_store);
+       top->log_unix_epoch = cpu_to_le32(o->log_unix_epoch);
         top->norandommap = cpu_to_le32(o->norandommap);
         top->softrandommap = cpu_to_le32(o->softrandommap);
         top->bs_unaligned = cpu_to_le32(o->bs_unaligned);
diff --git a/client.c b/client.c

index d502a4b34d4c57a060d96f3c24d4436f4a0bf9bb..c8069a0159acb64929be53c75974698d78d5047e 100644 (file)
--- a/client.c
+++ b/client.c
@@ -557,7 +557,7 @@ int fio_client_terminate(struct fio_client *client)
         return fio_net_send_quit(client->fd);
  }
  
-void fio_clients_terminate(void)
+static void fio_clients_terminate(void)
  {
         struct flist_head *entry;
         struct fio_client *client;
@@ -1183,7 +1183,7 @@ void fio_client_sum_jobs_eta(struct jobs_eta *dst, struct jobs_eta *je)
         strcpy((char *) dst->run_str, (char *) je->run_str);
  }
  
-static void remove_reply_cmd(struct fio_client *client, struct fio_net_cmd *cmd)
+static bool remove_reply_cmd(struct fio_client *client, struct fio_net_cmd *cmd)
  {
         struct fio_net_cmd_reply *reply = NULL;
         struct flist_head *entry;
@@ -1199,12 +1199,13 @@ static void remove_reply_cmd(struct fio_client *client, struct fio_net_cmd *cmd)
  
         if (!reply) {
                 log_err("fio: client: unable to find matching tag (%llx)\n", (unsigned long long) cmd->tag);
-               return;
+               return false;
         }
  
         flist_del(&reply->list);
         cmd->tag = reply->saved_tag;
         free(reply);
+       return true;
  }
  
  int fio_client_wait_for_reply(struct fio_client *client, uint64_t tag)
@@ -1251,6 +1252,44 @@ static void handle_eta(struct fio_client *client, struct fio_net_cmd *cmd)
         fio_client_dec_jobs_eta(eta, client->ops->eta);
  }
  
+static void client_flush_hist_samples(FILE *f, int hist_coarseness, void *samples,
+                                     uint64_t sample_size)
+{
+       struct io_sample *s;
+       int log_offset;
+       uint64_t i, j, nr_samples;
+       struct io_u_plat_entry *entry;
+       unsigned int *io_u_plat;
+
+       int stride = 1 << hist_coarseness;
+
+       if (!sample_size)
+               return;
+
+       s = __get_sample(samples, 0, 0);
+       log_offset = (s->__ddir & LOG_OFFSET_SAMPLE_BIT) != 0;
+
+       nr_samples = sample_size / __log_entry_sz(log_offset);
+
+       for (i = 0; i < nr_samples; i++) {
+
+               s = (struct io_sample *)((char *)__get_sample(samples, log_offset, i) +
+                       i * sizeof(struct io_u_plat_entry));
+
+               entry = s->plat_entry;
+               io_u_plat = entry->io_u_plat;
+
+               fprintf(f, "%lu, %u, %u, ", (unsigned long) s->time,
+                                               io_sample_ddir(s), s->bs);
+               for (j = 0; j < FIO_IO_U_PLAT_NR - stride; j += stride) {
+                       fprintf(f, "%lu, ", hist_sum(j, stride, io_u_plat, NULL));
+               }
+               fprintf(f, "%lu\n", (unsigned long)
+                       hist_sum(FIO_IO_U_PLAT_NR - stride, stride, io_u_plat, NULL));
+
+       }
+}
+
  static int fio_client_handle_iolog(struct fio_client *client,
                                    struct fio_net_cmd *cmd)
  {
@@ -1294,8 +1333,13 @@ static int fio_client_handle_iolog(struct fio_client *client,
                         return 1;
                 }
  
-               flush_samples(f, pdu->samples,
-                               pdu->nr_samples * sizeof(struct io_sample));
+               if (pdu->log_type == IO_LOG_TYPE_HIST) {
+                       client_flush_hist_samples(f, pdu->log_hist_coarseness, pdu->samples,
+                                          pdu->nr_samples * sizeof(struct io_sample));
+               } else {
+                       flush_samples(f, pdu->samples,
+                                       pdu->nr_samples * sizeof(struct io_sample));
+               }
                 fclose(f);
                 return 0;
         }
@@ -1395,7 +1439,11 @@ static struct cmd_iolog_pdu *convert_iolog_gz(struct fio_net_cmd *cmd,
          */
         nr_samples = le64_to_cpu(pdu->nr_samples);
  
-       total = nr_samples * __log_entry_sz(le32_to_cpu(pdu->log_offset));
+       if (pdu->log_type == IO_LOG_TYPE_HIST)
+               total = nr_samples * (__log_entry_sz(le32_to_cpu(pdu->log_offset)) +
+                                       sizeof(struct io_u_plat_entry));
+       else
+               total = nr_samples * __log_entry_sz(le32_to_cpu(pdu->log_offset));
         ret = malloc(total + sizeof(*pdu));
         ret->nr_samples = nr_samples;
  
@@ -1478,6 +1526,7 @@ static struct cmd_iolog_pdu *convert_iolog(struct fio_net_cmd *cmd,
         ret->log_type           = le32_to_cpu(ret->log_type);
         ret->compressed         = le32_to_cpu(ret->compressed);
         ret->log_offset         = le32_to_cpu(ret->log_offset);
+       ret->log_hist_coarseness = le32_to_cpu(ret->log_hist_coarseness);
  
         if (*store_direct)
                 return ret;
@@ -1487,6 +1536,9 @@ static struct cmd_iolog_pdu *convert_iolog(struct fio_net_cmd *cmd,
                 struct io_sample *s;
  
                 s = __get_sample(samples, ret->log_offset, i);
+               if (ret->log_type == IO_LOG_TYPE_HIST)
+                       s = (struct io_sample *)((void *)s + sizeof(struct io_u_plat_entry) * i);
+
                 s->time         = le64_to_cpu(s->time);
                 s->val          = le64_to_cpu(s->val);
                 s->__ddir       = le32_to_cpu(s->__ddir);
@@ -1497,6 +1549,12 @@ static struct cmd_iolog_pdu *convert_iolog(struct fio_net_cmd *cmd,
  
                         so->offset = le64_to_cpu(so->offset);
                 }
+
+               if (ret->log_type == IO_LOG_TYPE_HIST) {
+                       s->plat_entry = (struct io_u_plat_entry *)(((void *)s) + sizeof(*s));
+                       s->plat_entry->list.next = NULL;
+                       s->plat_entry->list.prev = NULL;
+               }
         }
  
         return ret;
@@ -1596,7 +1654,8 @@ int fio_handle_client(struct fio_client *client)
         case FIO_NET_CMD_ETA: {
                 struct jobs_eta *je = (struct jobs_eta *) cmd->payload;
  
-               remove_reply_cmd(client, cmd);
+               if (!remove_reply_cmd(client, cmd))
+                       break;
                 convert_jobs_eta(je);
                 handle_eta(client, cmd);
                 break;
diff --git a/client.h b/client.h

index ddacf785d0dfcd0e76aa979bd87f979ff80379b3..fc9c19693a9ff0c60bfcee6429755344b72cdfdf 100644 (file)
--- a/client.h
+++ b/client.h
@@ -131,7 +131,6 @@ extern struct fio_client *fio_client_add_explicit(struct client_ops *, const cha
  extern void fio_client_add_cmd_option(void *, const char *);
  extern int fio_client_add_ini_file(void *, const char *, bool);
  extern int fio_client_terminate(struct fio_client *);
-extern void fio_clients_terminate(void);
  extern struct fio_client *fio_get_client(struct fio_client *);
  extern void fio_put_client(struct fio_client *);
  extern int fio_client_update_options(struct fio_client *, struct thread_options *, uint64_t *);
@@ -145,5 +144,9 @@ enum {
         FIO_CLIENT_TYPE_GUI             = 2,
  };
  
+extern int sum_stat_clients;
+extern struct thread_stat client_ts;
+extern struct group_run_stats client_gs;
+
  #endif
  
diff --git a/configure b/configure

index 93c372006660b4ad26dfd8eb346e19ac51c161f2..2851f54d951a983c2c2467b7188989b0b30eabea 100755 (executable)
--- a/configure
+++ b/configure
@@ -1489,6 +1489,10 @@ if test "$libhdfs" = "yes" ; then
    if test "$hdfs_conf_error" = "1" ; then
      exit 1
    fi
+  FIO_HDFS_CPU=$cpu
+  if test "$FIO_HDFS_CPU" = "x86_64" ; then
+    FIO_HDFS_CPU="amd64"
+  fi
  fi
  echo "HDFS engine                   $libhdfs"
  
@@ -1829,6 +1833,7 @@ if test "$gf_trim" = "yes" ; then
  fi
  if test "$libhdfs" = "yes" ; then
    output_sym "CONFIG_LIBHDFS"
+  echo "FIO_HDFS_CPU=$FIO_HDFS_CPU" >> $config_host_mak
    echo "JAVA_HOME=$JAVA_HOME" >> $config_host_mak
    echo "FIO_LIBHDFS_INCLUDE=$FIO_LIBHDFS_INCLUDE" >> $config_host_mak
    echo "FIO_LIBHDFS_LIB=$FIO_LIBHDFS_LIB" >> $config_host_mak
diff --git a/diskutil.c b/diskutil.c

index a1077d4001459a159c37076478fa4e04f882ecc8..0f7a64209b57747139e66705a8fd2f71ebbe4697 100644 (file)
--- a/diskutil.c
+++ b/diskutil.c
@@ -491,7 +491,7 @@ void init_disk_util(struct thread_data *td)
         unsigned int i;
  
         if (!td->o.do_disk_util ||
-           (td->io_ops->flags & (FIO_DISKLESSIO | FIO_NODISKUTIL)))
+           td_ioengine_flagged(td, FIO_DISKLESSIO | FIO_NODISKUTIL))
                 return;
  
         for_each_file(td, f, i)
diff --git a/engines/e4defrag.c b/engines/e4defrag.c

index c599c9868ee6f40baaa61422e4daad60a6c4751e..e53636eb26e382b3e318abef48bd3b211d2695f1 100644 (file)
--- a/engines/e4defrag.c
+++ b/engines/e4defrag.c
@@ -45,6 +45,7 @@ struct e4defrag_options {
  static struct fio_option options[] = {
         {
                 .name   = "donorname",
+               .lname  = "Donor Name",
                 .type   = FIO_OPT_STR_STORE,
                 .off1   = offsetof(struct e4defrag_options, donor_name),
                 .help   = "File used as a block donor",
@@ -53,6 +54,7 @@ static struct fio_option options[] = {
         },
         {
                 .name   = "inplace",
+               .lname  = "In Place",
                 .type   = FIO_OPT_INT,
                 .off1   = offsetof(struct e4defrag_options, inplace),
                 .minval = 0,
diff --git a/engines/glusterfs_async.c b/engines/glusterfs_async.c

index 8e42a84999147838d316926d6504183ec7f31ae0..f46cb263dd781e9f1180d8b19b8ec5c2eb157f04 100644 (file)
--- a/engines/glusterfs_async.c
+++ b/engines/glusterfs_async.c
@@ -137,7 +137,7 @@ failed:
         return FIO_Q_COMPLETED;
  }
  
-int fio_gf_async_setup(struct thread_data *td)
+static int fio_gf_async_setup(struct thread_data *td)
  {
         struct gf_data *g;
         int r;
diff --git a/engines/libhdfs.c b/engines/libhdfs.c

index fba17c4fcbc1033767e6afcc8981852fc50f6cdc..96a0871d873ee8813876ce4ee47709de8b3c5b58 100644 (file)
--- a/engines/libhdfs.c
+++ b/engines/libhdfs.c
@@ -80,7 +80,9 @@ static struct fio_option options[] = {
                 .group  = FIO_OPT_G_HDFS,
         },
         {
-               .name   = "chunck_size",
+               .name   = "chunk_size",
+               .alias  = "chunck_size",
+               .lname  = "Chunk size",
                 .type   = FIO_OPT_INT,
                 .off1   = offsetof(struct hdfsio_options, chunck_size),
                 .def    = "1048576",
@@ -90,6 +92,7 @@ static struct fio_option options[] = {
         },
         {
                 .name   = "single_instance",
+               .lname  = "Single Instance",
                 .type   = FIO_OPT_BOOL,
                 .off1   = offsetof(struct hdfsio_options, single_instance),
                 .def    = "1",
@@ -99,6 +102,7 @@ static struct fio_option options[] = {
         },
         {
                 .name   = "hdfs_use_direct",
+               .lname  = "HDFS Use Direct",
                 .type   = FIO_OPT_BOOL,
                 .off1   = offsetof(struct hdfsio_options, use_direct),
                 .def    = "0",
diff --git a/engines/mtd.c b/engines/mtd.c

index 7b92c836692f388ad3dad8b179ef9687e10694dc..3c22a1b15be99032b53d1bb70a27cf1109811cde 100644 (file)
--- a/engines/mtd.c
+++ b/engines/mtd.c
@@ -16,7 +16,7 @@
  #include "../verify.h"
  #include "../oslib/libmtd.h"
  
-libmtd_t desc;
+static libmtd_t desc;
  
  struct fio_mtd_data {
         struct mtd_dev_info info;
@@ -168,7 +168,7 @@ static int fio_mtd_close_file(struct thread_data *td, struct fio_file *f)
         return generic_close_file(td, f);
  }
  
-int fio_mtd_get_file_size(struct thread_data *td, struct fio_file *f)
+static int fio_mtd_get_file_size(struct thread_data *td, struct fio_file *f)
  {
         struct mtd_dev_info info;
  
diff --git a/engines/net.c b/engines/net.c

index f24efc1d7fab63fbeab16a48c09b1ef3f3769668..5f1401c6e04fb9aa9d1249750e25a4404fb8c9a9 100644 (file)
--- a/engines/net.c
+++ b/engines/net.c
@@ -135,6 +135,7 @@ static struct fio_option options[] = {
  #ifdef CONFIG_TCP_NODELAY
         {
                 .name   = "nodelay",
+               .lname  = "No Delay",
                 .type   = FIO_OPT_BOOL,
                 .off1   = offsetof(struct netio_options, nodelay),
                 .help   = "Use TCP_NODELAY on TCP connections",
@@ -153,6 +154,7 @@ static struct fio_option options[] = {
         },
         {
                 .name   = "pingpong",
+               .lname  = "Ping Pong",
                 .type   = FIO_OPT_STR_SET,
                 .off1   = offsetof(struct netio_options, pingpong),
                 .help   = "Ping-pong IO requests",
diff --git a/engines/pmemblk.c b/engines/pmemblk.c

index 6d19864ae0dbf8d3b884e1f4743ba92a0bf61854..ca72697819511dc1190658bc687d2f5534f621f4 100644 (file)
--- a/engines/pmemblk.c
+++ b/engines/pmemblk.c
@@ -475,7 +475,7 @@ static int fio_pmemblk_unlink_file(struct thread_data *td, struct fio_file *f)
  
         pmb_parse_path(f->file_name, &path, &bsize, &fsize);
         if (!path)
-               return 1;
+               return ENOENT;
  
         unlink(path);
         free(path);
diff --git a/eta.c b/eta.c

index ffab34e2c740c8d38ac3c808a062013f6c0c1abd..3c1aeeee98cbd0cb0d3dcbe35f7c1287d7a39450 100644 (file)
--- a/eta.c
+++ b/eta.c
@@ -337,7 +337,7 @@ static void calc_iops(int unified_rw_rep, unsigned long mtime,
   * Print status of the jobs we know about. This includes rate estimates,
   * ETA, thread state, etc.
   */
-int calc_thread_status(struct jobs_eta *je, int force)
+bool calc_thread_status(struct jobs_eta *je, int force)
  {
         struct thread_data *td;
         int i, unified_rw_rep;
@@ -354,12 +354,12 @@ int calc_thread_status(struct jobs_eta *je, int force)
         if (!force) {
                 if (!(output_format & FIO_OUTPUT_NORMAL) &&
                     f_out == stdout)
-                       return 0;
+                       return false;
                 if (temp_stall_ts || eta_print == FIO_ETA_NEVER)
-                       return 0;
+                       return false;
  
                 if (!isatty(STDOUT_FILENO) && (eta_print != FIO_ETA_ALWAYS))
-                       return 0;
+                       return false;
         }
  
         if (!ddir_rw_sum(rate_io_bytes))
@@ -479,7 +479,7 @@ int calc_thread_status(struct jobs_eta *je, int force)
          * Allow a little slack, the target is to print it every 1000 msecs
          */
         if (!force && disp_time < 900)
-               return 0;
+               return false;
  
         calc_rate(unified_rw_rep, disp_time, io_bytes, disp_io_bytes, je->rate);
         calc_iops(unified_rw_rep, disp_time, io_iops, disp_io_iops, je->iops);
@@ -487,12 +487,12 @@ int calc_thread_status(struct jobs_eta *je, int force)
         memcpy(&disp_prev_time, &now, sizeof(now));
  
         if (!force && !je->nr_running && !je->nr_pending)
-               return 0;
+               return false;
  
         je->nr_threads = thread_number;
         update_condensed_str(__run_str, run_str);
         memcpy(je->run_str, run_str, strlen(run_str));
-       return 1;
+       return true;
  }
  
  void display_thread_status(struct jobs_eta *je)
diff --git a/examples/basic-verify.fio b/examples/basic-verify.fio

new file mode 100644 (file)

index 0000000..7871aeb
--- /dev/null
+++ b/examples/basic-verify.fio
@@ -0,0 +1,12 @@
+# The most basic form of data verification. Write the device randomly
+# in 4K chunks, then read it back and verify the contents.
+[write-and-verify]
+rw=randwrite
+bs=4k
+direct=1
+ioengine=libaio
+iodepth=16
+verify=crc32c
+# Use /dev/XXX. For running this on a file instead, remove the filename
+# option and add a size=32G (or whatever file size you want) instead.
+filename=/dev/XXX
diff --git a/examples/jesd219.fio b/examples/jesd219.fio

index ab2c40e2957de256f610cc0bc23a7b7a8b19c906..24f16f77e93bfba1b177b93844be31ab93deb92e 100644 (file)
--- a/examples/jesd219.fio
+++ b/examples/jesd219.fio
@@ -14,6 +14,7 @@ rwmixwrite=60
  iodepth=256
  numjobs=4
  bssplit=512/4:1024/1:1536/1:2048/1:2560/1:3072/1:3584/1:4k/67:8k/10:16k/7:32k/3:64k/3
+blockalign=4k
  random_distribution=zoned:50/5:30/15:20/80
  filename=/dev/nvme0n1
  group_reporting=1
diff --git a/file.h b/file.h

index 0cf622fcbb213a7922ecb96a8e7b7f88bd76b2f2..6f34dd5c3d315608778f638e6be45ef7f4726cd1 100644 (file)
--- a/file.h
+++ b/file.h
@@ -209,7 +209,8 @@ extern void dup_files(struct thread_data *, struct thread_data *);
  extern int get_fileno(struct thread_data *, const char *);
  extern void free_release_files(struct thread_data *);
  extern void filesetup_mem_free(void);
-void fio_file_reset(struct thread_data *, struct fio_file *);
-int fio_files_done(struct thread_data *);
+extern void fio_file_reset(struct thread_data *, struct fio_file *);
+extern bool fio_files_done(struct thread_data *);
+extern bool exists_and_not_regfile(const char *);
  
  #endif
diff --git a/filelock.c b/filelock.c

index b1130071ea982ad38e7277ec6c0b7886f4c84bf2..6e84970fc0064cafaeb0945befd5f4355fd9cf7b 100644 (file)
--- a/filelock.c
+++ b/filelock.c
@@ -165,7 +165,7 @@ static struct fio_filelock *fio_hash_get(uint32_t hash, int trylock)
         return ff;
  }
  
-static int __fio_lock_file(const char *fname, int trylock)
+static bool __fio_lock_file(const char *fname, int trylock)
  {
         struct fio_filelock *ff;
         uint32_t hash;
@@ -180,16 +180,16 @@ static int __fio_lock_file(const char *fname, int trylock)
  
         if (!ff) {
                 assert(!trylock);
-               return 1;
+               return true;
         }
  
         if (!trylock) {
                 fio_mutex_down(&ff->lock);
-               return 0;
+               return false;
         }
  
         if (!fio_mutex_down_trylock(&ff->lock))
-               return 0;
+               return false;
  
         fio_mutex_down(&fld->lock);
  
@@ -206,13 +206,13 @@ static int __fio_lock_file(const char *fname, int trylock)
  
         if (ff) {
                 fio_mutex_down(&ff->lock);
-               return 0;
+               return false;
         }
  
-       return 1;
+       return true;
  }
  
-int fio_trylock_file(const char *fname)
+bool fio_trylock_file(const char *fname)
  {
         return __fio_lock_file(fname, 1);
  }
diff --git a/filelock.h b/filelock.h

index 97d13b7a62f1a62e7dbd6fc4d591588c865831a5..4551bb0427ec6fc12e726bb4161b50ad98d79c40 100644 (file)
--- a/filelock.h
+++ b/filelock.h
@@ -1,8 +1,10 @@
  #ifndef FIO_LOCK_FILE_H
  #define FIO_LOCK_FILE_H
  
+#include "lib/types.h"
+
  extern void fio_lock_file(const char *);
-extern int fio_trylock_file(const char *);
+extern bool fio_trylock_file(const char *);
  extern void fio_unlock_file(const char *);
  
  extern int fio_filelock_init(void);
diff --git a/filesetup.c b/filesetup.c

index 1ecdda61c3f460f8ae03dda4a6a5cce9c7fd5278..c6ef3bf2744f52a70e8bf845a65d4877ed7e8e4b 100644 (file)
--- a/filesetup.c
+++ b/filesetup.c
@@ -52,14 +52,18 @@ static int extend_file(struct thread_data *td, struct fio_file *f)
          */
         if (td_read(td) ||
            (td_write(td) && td->o.overwrite && !td->o.file_append) ||
-           (td_write(td) && td->io_ops->flags & FIO_NOEXTEND))
+           (td_write(td) && td_ioengine_flagged(td, FIO_NOEXTEND)))
                 new_layout = 1;
         if (td_write(td) && !td->o.overwrite && !td->o.file_append)
                 unlink_file = 1;
  
         if (unlink_file || new_layout) {
+               int ret;
+
                 dprint(FD_FILE, "layout unlink %s\n", f->file_name);
-               if ((td_io_unlink_file(td, f) < 0) && (errno != ENOENT)) {
+
+               ret = td_io_unlink_file(td, f);
+               if (ret != 0 && ret != ENOENT) {
                         td_verror(td, errno, "unlink");
                         return 1;
                 }
@@ -213,7 +217,7 @@ static int pre_read_file(struct thread_data *td, struct fio_file *f)
         unsigned int bs;
         char *b;
  
-       if (td->io_ops->flags & FIO_PIPEIO)
+       if (td_ioengine_flagged(td, FIO_PIPEIO))
                 return 0;
  
         if (!fio_file_open(f)) {
@@ -823,7 +827,7 @@ int setup_files(struct thread_data *td)
          * device/file sizes are zero and no size given, punt
          */
         if ((!total_size || total_size == -1ULL) && !o->size &&
-           !(td->io_ops->flags & FIO_NOIO) && !o->fill_device &&
+           !td_ioengine_flagged(td, FIO_NOIO) && !o->fill_device &&
             !(o->nr_files && (o->file_size_low || o->file_size_high))) {
                 log_err("%s: you need to specify size=\n", o->name);
                 td_verror(td, EINVAL, "total_file_size");
@@ -899,7 +903,7 @@ int setup_files(struct thread_data *td)
  
                 if (f->filetype == FIO_TYPE_FILE &&
                     (f->io_size + f->file_offset) > f->real_file_size &&
-                   !(td->io_ops->flags & FIO_DISKLESSIO)) {
+                   !td_ioengine_flagged(td, FIO_DISKLESSIO)) {
                         if (!o->create_on_open) {
                                 need_extend++;
                                 extend_size += (f->io_size + f->file_offset);
@@ -1238,31 +1242,33 @@ static void get_file_type(struct fio_file *f)
         }
  }
  
-static int __is_already_allocated(const char *fname)
+static bool __is_already_allocated(const char *fname)
  {
         struct flist_head *entry;
-       char *filename;
  
         if (flist_empty(&filename_list))
-               return 0;
+               return false;
  
         flist_for_each(entry, &filename_list) {
-               filename = flist_entry(entry, struct file_name, list)->filename;
+               struct file_name *fn;
  
-               if (strcmp(filename, fname) == 0)
-                       return 1;
+               fn = flist_entry(entry, struct file_name, list);
+
+               if (!strcmp(fn->filename, fname))
+                       return true;
         }
  
-       return 0;
+       return false;
  }
  
-static int is_already_allocated(const char *fname)
+static bool is_already_allocated(const char *fname)
  {
-       int ret;
+       bool ret;
  
         fio_file_hash_lock();
         ret = __is_already_allocated(fname);
         fio_file_hash_unlock();
+
         return ret;
  }
  
@@ -1286,7 +1292,6 @@ static void set_already_allocated(const char *fname)
         }
  }
  
-
  static void free_already_allocated(void)
  {
         struct flist_head *entry, *tmp;
@@ -1323,6 +1328,26 @@ static struct fio_file *alloc_new_file(struct thread_data *td)
         return f;
  }
  
+bool exists_and_not_regfile(const char *filename)
+{
+       struct stat sb;
+
+       if (lstat(filename, &sb) == -1)
+               return false;
+
+#ifndef WIN32 /* NOT Windows */
+       if (S_ISREG(sb.st_mode))
+               return false;
+#else
+       /* \\.\ is the device namespace in Windows, where every file
+        * is a device node */
+       if (S_ISREG(sb.st_mode) && strncmp(filename, "\\\\.\\", 4) != 0)
+               return false;
+#endif
+
+       return true;
+}
+
  int add_file(struct thread_data *td, const char *fname, int numjob, int inc)
  {
         int cur_files = td->files_index;
@@ -1339,7 +1364,8 @@ int add_file(struct thread_data *td, const char *fname, int numjob, int inc)
         sprintf(file_name + len, "%s", fname);
  
         /* clean cloned siblings using existing files */
-       if (numjob && is_already_allocated(file_name))
+       if (numjob && is_already_allocated(file_name) &&
+           !exists_and_not_regfile(fname))
                 return 0;
  
         f = alloc_new_file(td);
@@ -1370,7 +1396,7 @@ int add_file(struct thread_data *td, const char *fname, int numjob, int inc)
         /*
          * init function, io engine may not be loaded yet
          */
-       if (td->io_ops && (td->io_ops->flags & FIO_DISKLESSIO))
+       if (td->io_ops && td_ioengine_flagged(td, FIO_DISKLESSIO))
                 f->real_file_size = -1ULL;
  
         f->file_name = smalloc_strdup(file_name);
@@ -1639,16 +1665,16 @@ void fio_file_reset(struct thread_data *td, struct fio_file *f)
                 lfsr_reset(&f->lfsr, td->rand_seeds[FIO_RAND_BLOCK_OFF]);
  }
  
-int fio_files_done(struct thread_data *td)
+bool fio_files_done(struct thread_data *td)
  {
         struct fio_file *f;
         unsigned int i;
  
         for_each_file(td, f, i)
                 if (!fio_file_done(f))
-                       return 0;
+                       return false;
  
-       return 1;
+       return true;
  }
  
  /* free memory used in initialization phase only */
diff --git a/fio.1 b/fio.1

index 85eb0fe981b0d0b89c40e9b714f48f9112778f25..8d596fb19105df2e6b3aabcbda171cf462f3157e 100644 (file)
--- a/fio.1
+++ b/fio.1
@@ -1246,6 +1246,9 @@ multiple times. Thus it will not work on eg network or splice IO.
  .BI unlink \fR=\fPbool
  Unlink job files when done.  Default: false.
  .TP
+.BI unlink_each_loop \fR=\fPbool
+Unlink job files after each iteration or loop.  Default: false.
+.TP
  .BI loops \fR=\fPint
  Specifies the number of iterations (runs of the same workload) of this job.
  Default: 1.
@@ -1476,6 +1479,14 @@ N is the number of jobs). Even if the filename is given, fio will still
  append the type of log. If \fBper_job_logs\fR is false, then the filename will
  not include the job index. See the \fBLOG FILE FORMATS\fR section.
  .TP
+.BI write_hist_log \fR=\fPstr
+Same as \fBwrite_lat_log\fR, but writes I/O completion latency histograms. If
+no filename is given with this option, the default filename of
+"jobname_clat_hist.x.log" is used, where x is the index of the job (1..N, where
+N is the number of jobs). Even if the filename is given, fio will still append
+the type of log. If \fBper_job_logs\fR is false, then the filename will not
+include the job index. See the \fBLOG FILE FORMATS\fR section.
+.TP
  .BI write_iops_log \fR=\fPstr
  Same as \fBwrite_bw_log\fR, but writes IOPS. If no filename is given with this
  option, the default filename of "jobname_type.x.log" is used, where x is the
@@ -1496,6 +1507,20 @@ If \fBlog_avg_msec\fR is set, fio logs the average over that window. If you
  instead want to log the maximum value, set this option to 1.  Defaults to
  0, meaning that averaged values are logged.
  .TP
+.BI log_hist_msec \fR=\fPint
+Same as \fBlog_avg_msec\fR, but logs entries for completion latency histograms.
+Computing latency percentiles from averages of intervals using \fBlog_avg_msec\fR
+is innacurate. Setting this option makes fio log histogram entries over the
+specified period of time, reducing log sizes for high IOPS devices while
+retaining percentile accuracy. See \fBlog_hist_coarseness\fR as well. Defaults
+to 0, meaning histogram logging is disabled.
+.TP
+.BI log_hist_coarseness \fR=\fPint
+Integer ranging from 0 to 6, defining the coarseness of the resolution of the
+histogram logs enabled with \fBlog_hist_msec\fR. For each increment in
+coarseness, fio outputs half as many bins. Defaults to 0, for which histogram
+logs contain 1216 latency bins. See the \fBLOG FILE FORMATS\fR section.
+.TP
  .BI log_offset \fR=\fPbool
  If this is set, the iolog options will include the byte offset for the IO
  entry as well as the other data values.
@@ -1521,6 +1546,11 @@ If set, fio will store the log files in a compressed format. They can be
  decompressed with fio, using the \fB\-\-inflate-log\fR command line parameter.
  The files will be stored with a \fB\.fz\fR suffix.
  .TP
+.BI log_unix_epoch \fR=\fPbool
+If set, fio will log Unix timestamps to the log files produced by enabling
+\fBwrite_type_log\fR for each log type, instead of the default zero-based
+timestamps.
+.TP
  .BI block_error_percentiles \fR=\fPbool
  If set, record errors in trim block-sized units from writes and trims and output
  a histogram of how many trims it took to get to errors, and what kind of error
@@ -2302,6 +2332,13 @@ they aren't applicable if windowed logging is enabled. If windowed logging
  is enabled and \fBlog_max_value\fR is set, then fio logs maximum values in
  that window instead of averages.
  
+For histogram logging the logs look like this:
+
+.B time (msec), data direction, block-size, bin 0, bin 1, ..., bin 1215
+
+Where 'bin i' gives the frequency of IO requests with a latency falling in
+the i-th bin. See \fBlog_hist_coarseness\fR for logging fewer bins.
+
  .RE
  
  .SH CLIENT / SERVER
diff --git a/fio.c b/fio.c

index 69014ddbb14b2c8d631755337010b50f73684dc2..7b3a50b6cba12df253bed9517be46cdc950fde2d 100644 (file)
--- a/fio.c
+++ b/fio.c
@@ -32,6 +32,8 @@ int main(int argc, char *argv[], char *envp[])
  {
         int ret = 1;
  
+       compiletime_assert(TD_NR <= TD_ENG_FLAG_SHIFT, "TD_ENG_FLAG_SHIFT");
+
         if (initialize_fio(envp))
                 return 1;
  
diff --git a/fio.h b/fio.h

index 87a94f6e64289372f935879c030f55e2bdc77c6f..df4fbb16da9d4ab7dec15870c3e6826d50c329dc 100644 (file)
--- a/fio.h
+++ b/fio.h
@@ -126,11 +126,10 @@ struct zone_split_index {
   * This describes a single thread/process executing a fio job.
   */
  struct thread_data {
-       struct thread_options o;
         struct flist_head opt_list;
         unsigned long flags;
+       struct thread_options o;
         void *eo;
-       char verror[FIO_VERROR_SIZE];
         pthread_t thread;
         unsigned int thread_number;
         unsigned int subjob_number;
@@ -141,6 +140,7 @@ struct thread_data {
  
         struct io_log *slat_log;
         struct io_log *clat_log;
+       struct io_log *clat_hist_log;
         struct io_log *lat_log;
         struct io_log *bw_log;
         struct io_log *iops_log;
@@ -311,6 +311,7 @@ struct thread_data {
  
         struct timeval start;   /* start of this loop */
         struct timeval epoch;   /* time job was started */
+       unsigned long long unix_epoch; /* Time job was started, unix epoch based. */
         struct timeval last_issue;
         long time_offset;
         struct timeval tv_cache;
@@ -393,6 +394,8 @@ struct thread_data {
         void *prof_data;
  
         void *pinned_mem;
+
+       char verror[FIO_VERROR_SIZE];
  };
  
  /*
@@ -449,7 +452,6 @@ extern int read_only;
  extern int eta_print;
  extern int eta_new_line;
  extern unsigned long done_secs;
-extern char *job_section;
  extern int fio_gtod_offload;
  extern int fio_gtod_cpu;
  extern enum fio_cs fio_clock_source;
@@ -512,7 +514,7 @@ extern void td_fill_verify_state_seed(struct thread_data *);
  extern void add_job_opts(const char **, int);
  extern char *num2str(uint64_t, int, int, int, int);
  extern int ioengine_load(struct thread_data *);
-extern int parse_dryrun(void);
+extern bool parse_dryrun(void);
  extern int fio_running_or_pending_io_threads(void);
  extern int fio_set_fd_nonblocking(int, const char *);
  extern void sig_show_status(int sig);
@@ -554,8 +556,29 @@ enum {
         TD_EXITED,
         TD_REAPED,
         TD_LAST,
+       TD_NR,
  };
  
+#define TD_ENG_FLAG_SHIFT      16
+#define TD_ENG_FLAG_MASK       ((1U << 16) - 1)
+
+static inline enum fio_ioengine_flags td_ioengine_flags(struct thread_data *td)
+{
+       return (enum fio_ioengine_flags)
+               ((td->flags >> TD_ENG_FLAG_SHIFT) & TD_ENG_FLAG_MASK);
+}
+
+static inline void td_set_ioengine_flags(struct thread_data *td)
+{
+       td->flags |= (td->io_ops->flags << TD_ENG_FLAG_SHIFT);
+}
+
+static inline bool td_ioengine_flagged(struct thread_data *td,
+                                      enum fio_ioengine_flags flags)
+{
+       return ((td->flags >> TD_ENG_FLAG_SHIFT) & flags) != 0;
+}
+
  extern void td_set_runstate(struct thread_data *, int);
  extern int td_bump_runstate(struct thread_data *, int);
  extern void td_restore_runstate(struct thread_data *, int);
@@ -622,17 +645,17 @@ extern void lat_target_reset(struct thread_data *);
         }       \
  } while (0)
  
-static inline int fio_fill_issue_time(struct thread_data *td)
+static inline bool fio_fill_issue_time(struct thread_data *td)
  {
         if (td->o.read_iolog_file ||
             !td->o.disable_clat || !td->o.disable_slat || !td->o.disable_bw)
-               return 1;
+               return true;
  
-       return 0;
+       return false;
  }
  
-static inline int __should_check_rate(struct thread_data *td,
-                                     enum fio_ddir ddir)
+static inline bool __should_check_rate(struct thread_data *td,
+                                      enum fio_ddir ddir)
  {
         struct thread_options *o = &td->o;
  
@@ -641,23 +664,21 @@ static inline int __should_check_rate(struct thread_data *td,
          */
         if (o->rate[ddir] || o->ratemin[ddir] || o->rate_iops[ddir] ||
             o->rate_iops_min[ddir])
-               return 1;
+               return true;
  
-       return 0;
+       return false;
  }
  
-static inline int should_check_rate(struct thread_data *td)
+static inline bool should_check_rate(struct thread_data *td)
  {
-       int ret = 0;
-
-       if (td->bytes_done[DDIR_READ])
-               ret |= __should_check_rate(td, DDIR_READ);
-       if (td->bytes_done[DDIR_WRITE])
-               ret |= __should_check_rate(td, DDIR_WRITE);
-       if (td->bytes_done[DDIR_TRIM])
-               ret |= __should_check_rate(td, DDIR_TRIM);
-
-       return ret;
+       if (td->bytes_done[DDIR_READ] && __should_check_rate(td, DDIR_READ))
+               return true;
+       if (td->bytes_done[DDIR_WRITE] && __should_check_rate(td, DDIR_WRITE))
+               return true;
+       if (td->bytes_done[DDIR_TRIM] && __should_check_rate(td, DDIR_TRIM))
+               return true;
+
+       return false;
  }
  
  static inline unsigned int td_max_bs(struct thread_data *td)
@@ -676,7 +697,7 @@ static inline unsigned int td_min_bs(struct thread_data *td)
         return min(td->o.min_bs[DDIR_TRIM], min_bs);
  }
  
-static inline int td_async_processing(struct thread_data *td)
+static inline bool td_async_processing(struct thread_data *td)
  {
         return (td->flags & TD_F_NEED_LOCK) != 0;
  }
@@ -703,6 +724,24 @@ static inline void td_io_u_free_notify(struct thread_data *td)
                 pthread_cond_signal(&td->free_cond);
  }
  
+static inline void td_flags_clear(struct thread_data *td, unsigned int *flags,
+                                 unsigned int value)
+{
+       if (!td_async_processing(td))
+               *flags &= ~value;
+       else
+               __sync_fetch_and_and(flags, ~value);
+}
+
+static inline void td_flags_set(struct thread_data *td, unsigned int *flags,
+                               unsigned int value)
+{
+       if (!td_async_processing(td))
+               *flags |= value;
+       else
+               __sync_fetch_and_or(flags, value);
+}
+
  extern const char *fio_get_arch_string(int);
  extern const char *fio_get_os_string(int);
  
diff --git a/fio_time.h b/fio_time.h

index e31ea0974da5d5168ba4db93b977cb0fd9624a63..b49cc828713e4c67d53e362372760cd510510353 100644 (file)
--- a/fio_time.h
+++ b/fio_time.h
@@ -20,5 +20,6 @@ extern bool ramp_time_over(struct thread_data *);
  extern bool in_ramp_time(struct thread_data *);
  extern void fio_time_init(void);
  extern void timeval_add_msec(struct timeval *, unsigned int);
+extern void set_epoch_time(struct thread_data *, int);
  
  #endif
diff --git a/gclient.c b/gclient.c

index 9c32474669dbf535e3875757baf29947e752e395..23b0899322052f20744c149561351829325169f1 100644 (file)
--- a/gclient.c
+++ b/gclient.c
@@ -280,10 +280,6 @@ static void gfio_disk_util_op(struct fio_client *client, struct fio_net_cmd *cmd
         gdk_threads_leave();
  }
  
-extern int sum_stat_clients;
-extern struct thread_stat client_ts;
-extern struct group_run_stats client_gs;
-
  static int sum_stat_nr;
  
  static void gfio_thread_status_op(struct fio_client *client,
@@ -1012,7 +1008,7 @@ static void gfio_show_lat(GtkWidget *vbox, const char *name, unsigned long min,
         char *minp, *maxp;
         char tmp[64];
  
-       if (!usec_to_msec(&min, &max, &mean, &dev))
+       if (usec_to_msec(&min, &max, &mean, &dev))
                 base = "(msec)";
  
         minp = num2str(min, 6, 1, 0, 0);
diff --git a/gfio.c b/gfio.c

index 37c181894d752e95c2d24cd5299f50d1ae0feba4..ce180911db343ae8b7b94054262f53ae023523a1 100644 (file)
--- a/gfio.c
+++ b/gfio.c
@@ -459,10 +459,12 @@ static int send_job_file(struct gui_entry *ge)
  
  static void *server_thread(void *arg)
  {
+       fio_server_create_sk_key();
         is_backend = 1;
         gfio_server_running = 1;
         fio_start_server(NULL);
         gfio_server_running = 0;
+       fio_server_destroy_sk_key();
         return NULL;
  }
  
diff --git a/init.c b/init.c

index f81db3c81cd643a54a490a7907dd9aab103a03b9..bc17b40213a5bacdcecae2ca89e937ef86d6c8b8 100644 (file)
--- a/init.c
+++ b/init.c
@@ -47,7 +47,6 @@ static char **job_sections;
  static int nr_job_sections;
  
  int exitall_on_terminate = 0;
-int exitall_on_terminate_error = 0;
  int output_format = FIO_OUTPUT_NORMAL;
  int eta_print = FIO_ETA_AUTO;
  int eta_new_line = 0;
@@ -114,7 +113,7 @@ static struct option l_opts[FIO_NR_OPTIONS] = {
         },
         {
                 .name           = (char *) "output-format",
-               .has_arg        = optional_argument,
+               .has_arg        = required_argument,
                 .val            = 'F' | FIO_CLIENT_FLAG,
         },
         {
@@ -310,7 +309,7 @@ static void free_shm(void)
         free(trigger_remote_cmd);
         trigger_file = trigger_cmd = trigger_remote_cmd = NULL;
  
-       options_free(fio_options, &def_thread);
+       options_free(fio_options, &def_thread.o);
         fio_filelock_exit();
         scleanup();
  }
@@ -677,7 +676,7 @@ static int fixup_options(struct thread_data *td)
                         "verify limited\n");
                 ret = warnings_fatal;
         }
-       if (o->bs_unaligned && (o->odirect || td->io_ops->flags & FIO_RAWIO))
+       if (o->bs_unaligned && (o->odirect || td_ioengine_flagged(td, FIO_RAWIO)))
                 log_err("fio: bs_unaligned may not work with raw io\n");
  
         /*
@@ -764,7 +763,7 @@ static int fixup_options(struct thread_data *td)
  
         if (o->pre_read) {
                 o->invalidate_cache = 0;
-               if (td->io_ops->flags & FIO_PIPEIO) {
+               if (td_ioengine_flagged(td, FIO_PIPEIO)) {
                         log_info("fio: cannot pre-read files with an IO engine"
                                  " that isn't seekable. Pre-read disabled.\n");
                         ret = warnings_fatal;
@@ -772,7 +771,7 @@ static int fixup_options(struct thread_data *td)
         }
  
         if (!o->unit_base) {
-               if (td->io_ops->flags & FIO_BIT_BASED)
+               if (td_ioengine_flagged(td, FIO_BIT_BASED))
                         o->unit_base = 1;
                 else
                         o->unit_base = 8;
@@ -795,7 +794,7 @@ static int fixup_options(struct thread_data *td)
          * Windows doesn't support O_DIRECT or O_SYNC with the _open interface,
          * so fail if we're passed those flags
          */
-       if ((td->io_ops->flags & FIO_SYNCIO) && (td->o.odirect || td->o.sync_io)) {
+       if (td_ioengine_flagged(td, FIO_SYNCIO) && (td->o.odirect || td->o.sync_io)) {
                 log_err("fio: Windows does not support direct or non-buffered io with"
                                 " the synchronous ioengines. Use the 'windowsaio' ioengine"
                                 " with 'direct=1' and 'iodepth=1' instead.\n");
@@ -844,7 +843,7 @@ static int fixup_options(struct thread_data *td)
         if (fio_option_is_set(&td->o, rand_seed))
                 td->o.rand_repeatable = 0;
  
-       if ((td->io_ops->flags & FIO_NOEXTEND) && td->o.file_append) {
+       if (td_ioengine_flagged(td, FIO_NOEXTEND) && td->o.file_append) {
                 log_err("fio: can't append/extent with IO engine %s\n", td->io_ops->name);
                 ret = 1;
         }
@@ -904,26 +903,6 @@ static const char *get_engine_name(const char *str)
         return p;
  }
  
-static int exists_and_not_regfile(const char *filename)
-{
-       struct stat sb;
-
-       if (lstat(filename, &sb) == -1)
-               return 0;
-
-#ifndef WIN32 /* NOT Windows */
-       if (S_ISREG(sb.st_mode))
-               return 0;
-#else
-       /* \\.\ is the device namespace in Windows, where every file
-        * is a device node */
-       if (S_ISREG(sb.st_mode) && strncmp(filename, "\\\\.\\", 4) != 0)
-               return 0;
-#endif
-
-       return 1;
-}
-
  static void init_rand_file_service(struct thread_data *td)
  {
         unsigned long nranges = td->o.nr_files << FIO_FSERVICE_SHIFT;
@@ -1069,6 +1048,10 @@ int ioengine_load(struct thread_data *td)
                 *(struct thread_data **)td->eo = td;
         }
  
+       if (td->o.odirect)
+               td->io_ops->flags |= FIO_RAWIO;
+
+       td_set_ioengine_flags(td);
         return 0;
  }
  
@@ -1244,7 +1227,7 @@ static char *make_filename(char *buf, size_t buf_size,struct thread_options *o,
         return buf;
  }
  
-int parse_dryrun(void)
+bool parse_dryrun(void)
  {
         return dump_cmdline || parse_only;
  }
@@ -1340,9 +1323,6 @@ static int add_job(struct thread_data *td, const char *jobname, int job_add_num,
         if (ioengine_load(td))
                 goto err;
  
-       if (o->odirect)
-               td->io_ops->flags |= FIO_RAWIO;
-
         file_alloced = 0;
         if (!o->filename && !td->files_index && !o->read_iolog_file) {
                 file_alloced = 1;
@@ -1373,7 +1353,7 @@ static int add_job(struct thread_data *td, const char *jobname, int job_add_num,
         if (td->eo)
                 *(struct thread_data **)td->eo = NULL;
  
-       if (td->io_ops->flags & FIO_DISKLESSIO) {
+       if (td_ioengine_flagged(td, FIO_DISKLESSIO)) {
                 struct fio_file *f;
  
                 for_each_file(td, f, i)
@@ -1418,6 +1398,8 @@ static int add_job(struct thread_data *td, const char *jobname, int job_add_num,
                 struct log_params p = {
                         .td = td,
                         .avg_msec = o->log_avg_msec,
+                       .hist_msec = o->log_hist_msec,
+                       .hist_coarseness = o->log_hist_coarseness,
                         .log_type = IO_LOG_TYPE_LAT,
                         .log_offset = o->log_offset,
                         .log_gz = o->log_gz,
@@ -1442,10 +1424,42 @@ static int add_job(struct thread_data *td, const char *jobname, int job_add_num,
                                 td->thread_number, suf, o->per_job_logs);
                 setup_log(&td->clat_log, &p, logname);
         }
+
+       if (o->hist_log_file) {
+#ifndef CONFIG_ZLIB
+               if (td->client_type) {
+                       log_err("fio: --write_hist_log requires zlib in client/server mode\n");
+                       goto err;
+               }
+#endif
+               struct log_params p = {
+                       .td = td,
+                       .avg_msec = o->log_avg_msec,
+                       .hist_msec = o->log_hist_msec,
+                       .hist_coarseness = o->log_hist_coarseness,
+                       .log_type = IO_LOG_TYPE_HIST,
+                       .log_offset = o->log_offset,
+                       .log_gz = o->log_gz,
+                       .log_gz_store = o->log_gz_store,
+               };
+               const char *suf;
+
+               if (p.log_gz_store)
+                       suf = "log.fz";
+               else
+                       suf = "log";
+
+               gen_log_name(logname, sizeof(logname), "clat_hist", o->hist_log_file,
+                               td->thread_number, suf, o->per_job_logs);
+               setup_log(&td->clat_hist_log, &p, logname);
+       }
+
         if (o->bw_log_file) {
                 struct log_params p = {
                         .td = td,
                         .avg_msec = o->log_avg_msec,
+                       .hist_msec = o->log_hist_msec,
+                       .hist_coarseness = o->log_hist_coarseness,
                         .log_type = IO_LOG_TYPE_BW,
                         .log_offset = o->log_offset,
                         .log_gz = o->log_gz,
@@ -1457,6 +1471,9 @@ static int add_job(struct thread_data *td, const char *jobname, int job_add_num,
                         p.avg_msec = min(o->log_avg_msec, o->bw_avg_time);
                 else
                         o->bw_avg_time = p.avg_msec;
+       
+               p.hist_msec = o->log_hist_msec;
+               p.hist_coarseness = o->log_hist_coarseness;
  
                 if (p.log_gz_store)
                         suf = "log.fz";
@@ -1471,6 +1488,8 @@ static int add_job(struct thread_data *td, const char *jobname, int job_add_num,
                 struct log_params p = {
                         .td = td,
                         .avg_msec = o->log_avg_msec,
+                       .hist_msec = o->log_hist_msec,
+                       .hist_coarseness = o->log_hist_coarseness,
                         .log_type = IO_LOG_TYPE_IOPS,
                         .log_offset = o->log_offset,
                         .log_gz = o->log_gz,
@@ -1482,6 +1501,9 @@ static int add_job(struct thread_data *td, const char *jobname, int job_add_num,
                         p.avg_msec = min(o->log_avg_msec, o->iops_avg_time);
                 else
                         o->iops_avg_time = p.avg_msec;
+       
+               p.hist_msec = o->log_hist_msec;
+               p.hist_coarseness = o->log_hist_coarseness;
  
                 if (p.log_gz_store)
                         suf = "log.fz";
@@ -1501,7 +1523,7 @@ static int add_job(struct thread_data *td, const char *jobname, int job_add_num,
                         if (is_backend && !recursed)
                                 fio_server_send_add_job(td);
  
-                       if (!(td->io_ops->flags & FIO_NOIO)) {
+                       if (!td_ioengine_flagged(td, FIO_NOIO)) {
                                 char *c1, *c2, *c3, *c4;
                                 char *c5 = NULL, *c6 = NULL;
  
@@ -2266,7 +2288,7 @@ int parse_cmd_line(int argc, char *argv[], int client_type)
         struct thread_data *td = NULL;
         int c, ini_idx = 0, lidx, ret = 0, do_exit = 0, exit_val = 0;
         char *ostr = cmd_optstr;
-       void *pid_file = NULL;
+       char *pid_file = NULL;
         void *cur_client = NULL;
         int backend = 0;
  
@@ -2316,12 +2338,6 @@ int parse_cmd_line(int argc, char *argv[], int client_type)
                         output_format = FIO_OUTPUT_TERSE;
                         break;
                 case 'F':
-                       if (!optarg) {
-                               log_err("fio: missing --output-format argument\n");
-                               exit_val = 1;
-                               do_exit++;
-                               break;
-                       }
                         if (parse_output_format(optarg)) {
                                 log_err("fio: failed parsing output-format\n");
                                 exit_val = 1;
diff --git a/io_ddir.h b/io_ddir.h

index 763e82659e3ce414827da9919a2339d32967f88c..2141119a2651cb0b73188f0f36b9739960c4458b 100644 (file)
--- a/io_ddir.h
+++ b/io_ddir.h
@@ -61,7 +61,7 @@ static inline int ddir_rw(enum fio_ddir ddir)
  
  static inline const char *ddir_str(enum td_ddir ddir)
  {
-       const char *__str[] = { NULL, "read", "write", "rw", NULL,
+       static const char *__str[] = { NULL, "read", "write", "rw", NULL,
                                 "randread", "randwrite", "randrw",
                                 "trim", NULL, NULL, NULL, "randtrim" };
  
diff --git a/io_u.c b/io_u.c

index c0790b254065acf59c279eda5e1d133d632d77f1..b6d530f2a23a542d28673587ababf8a0e091f3e9 100644 (file)
--- a/io_u.c
+++ b/io_u.c
@@ -362,8 +362,12 @@ static int get_next_seq_offset(struct thread_data *td, struct fio_file *f,
         if (f->last_pos[ddir] < f->real_file_size) {
                 uint64_t pos;
  
-               if (f->last_pos[ddir] == f->file_offset && o->ddir_seq_add < 0)
-                       f->last_pos[ddir] = f->real_file_size;
+               if (f->last_pos[ddir] == f->file_offset && o->ddir_seq_add < 0) {
+                       if (f->real_file_size > f->io_size)
+                               f->last_pos[ddir] = f->io_size;
+                       else
+                               f->last_pos[ddir] = f->real_file_size;
+               }
  
                 pos = f->last_pos[ddir] - f->file_offset;
                 if (pos && o->ddir_seq_add) {
@@ -378,8 +382,14 @@ static int get_next_seq_offset(struct thread_data *td, struct fio_file *f,
                         if (pos >= f->real_file_size) {
                                 if (o->ddir_seq_add > 0)
                                         pos = f->file_offset;
-                               else
-                                       pos = f->real_file_size + o->ddir_seq_add;
+                               else {
+                                       if (f->real_file_size > f->io_size)
+                                               pos = f->io_size;
+                                       else
+                                               pos = f->real_file_size;
+
+                                       pos += o->ddir_seq_add;
+                               }
                         }
                 }
  
@@ -409,7 +419,7 @@ static int get_next_block(struct thread_data *td, struct io_u *io_u,
                                 *is_random = 1;
                         } else {
                                 *is_random = 0;
-                               io_u_set(io_u, IO_U_F_BUSY_OK);
+                               io_u_set(td, io_u, IO_U_F_BUSY_OK);
                                 ret = get_next_seq_offset(td, f, ddir, &offset);
                                 if (ret)
                                         ret = get_next_rand_block(td, f, ddir, &b);
@@ -419,7 +429,7 @@ static int get_next_block(struct thread_data *td, struct io_u *io_u,
                         ret = get_next_seq_offset(td, f, ddir, &offset);
                 }
         } else {
-               io_u_set(io_u, IO_U_F_BUSY_OK);
+               io_u_set(td, io_u, IO_U_F_BUSY_OK);
                 *is_random = 0;
  
                 if (td->o.rw_seq == RW_SEQ_SEQ) {
@@ -768,11 +778,11 @@ static void set_rw_ddir(struct thread_data *td, struct io_u *io_u)
  
         io_u->ddir = io_u->acct_ddir = ddir;
  
-       if (io_u->ddir == DDIR_WRITE && (td->io_ops->flags & FIO_BARRIER) &&
+       if (io_u->ddir == DDIR_WRITE && td_ioengine_flagged(td, FIO_BARRIER) &&
             td->o.barrier_blocks &&
            !(td->io_issues[DDIR_WRITE] % td->o.barrier_blocks) &&
              td->io_issues[DDIR_WRITE])
-               io_u_set(io_u, IO_U_F_BARRIER);
+               io_u_set(td, io_u, IO_U_F_BARRIER);
  }
  
  void put_file_log(struct thread_data *td, struct fio_file *f)
@@ -794,7 +804,7 @@ void put_io_u(struct thread_data *td, struct io_u *io_u)
                 put_file_log(td, io_u->file);
  
         io_u->file = NULL;
-       io_u_set(io_u, IO_U_F_FREE);
+       io_u_set(td, io_u, IO_U_F_FREE);
  
         if (io_u->flags & IO_U_F_IN_CUR_DEPTH) {
                 td->cur_depth--;
@@ -807,7 +817,7 @@ void put_io_u(struct thread_data *td, struct io_u *io_u)
  
  void clear_io_u(struct thread_data *td, struct io_u *io_u)
  {
-       io_u_clear(io_u, IO_U_F_FLIGHT);
+       io_u_clear(td, io_u, IO_U_F_FLIGHT);
         put_io_u(td, io_u);
  }
  
@@ -823,11 +833,11 @@ void requeue_io_u(struct thread_data *td, struct io_u **io_u)
  
         td_io_u_lock(td);
  
-       io_u_set(__io_u, IO_U_F_FREE);
+       io_u_set(td, __io_u, IO_U_F_FREE);
         if ((__io_u->flags & IO_U_F_FLIGHT) && ddir_rw(ddir))
                 td->io_issues[ddir]--;
  
-       io_u_clear(__io_u, IO_U_F_FLIGHT);
+       io_u_clear(td, __io_u, IO_U_F_FLIGHT);
         if (__io_u->flags & IO_U_F_IN_CUR_DEPTH) {
                 td->cur_depth--;
                 assert(!(td->flags & TD_F_CHILD));
@@ -843,7 +853,7 @@ static int fill_io_u(struct thread_data *td, struct io_u *io_u)
  {
         unsigned int is_random;
  
-       if (td->io_ops->flags & FIO_NOIO)
+       if (td_ioengine_flagged(td, FIO_NOIO))
                 goto out;
  
         set_rw_ddir(td, io_u);
@@ -1457,7 +1467,7 @@ again:
  
         if (io_u) {
                 assert(io_u->flags & IO_U_F_FREE);
-               io_u_clear(io_u, IO_U_F_FREE | IO_U_F_NO_FILE_PUT |
+               io_u_clear(td, io_u, IO_U_F_FREE | IO_U_F_NO_FILE_PUT |
                                  IO_U_F_TRIMMED | IO_U_F_BARRIER |
                                  IO_U_F_VER_LIST);
  
@@ -1465,7 +1475,7 @@ again:
                 io_u->acct_ddir = -1;
                 td->cur_depth++;
                 assert(!(td->flags & TD_F_CHILD));
-               io_u_set(io_u, IO_U_F_IN_CUR_DEPTH);
+               io_u_set(td, io_u, IO_U_F_IN_CUR_DEPTH);
                 io_u->ipo = NULL;
         } else if (td_async_processing(td)) {
                 /*
@@ -1500,7 +1510,7 @@ static bool check_get_trim(struct thread_data *td, struct io_u *io_u)
                         get_trim = 1;
                 }
  
-               if (get_trim && !get_next_trim(td, io_u))
+               if (get_trim && get_next_trim(td, io_u))
                         return true;
         }
  
@@ -1622,7 +1632,7 @@ struct io_u *get_io_u(struct thread_data *td)
         assert(fio_file_open(f));
  
         if (ddir_rw(io_u->ddir)) {
-               if (!io_u->buflen && !(td->io_ops->flags & FIO_NOIO)) {
+               if (!io_u->buflen && !td_ioengine_flagged(td, FIO_NOIO)) {
                         dprint(FD_IO, "get_io_u: zero buflen on %p\n", io_u);
                         goto err_put;
                 }
@@ -1803,7 +1813,7 @@ static void io_completed(struct thread_data *td, struct io_u **io_u_ptr,
         dprint_io_u(io_u, "io complete");
  
         assert(io_u->flags & IO_U_F_FLIGHT);
-       io_u_clear(io_u, IO_U_F_FLIGHT | IO_U_F_BUSY_OK);
+       io_u_clear(td, io_u, IO_U_F_FLIGHT | IO_U_F_BUSY_OK);
  
         /*
          * Mark IO ok to verify
diff --git a/ioengine.h b/ioengine.h

index 0effadec5c602f6d1a0180d3ef103faf563e3634..08e8fabfacecb5b57996ba9dbec9bc87e9704d03 100644 (file)
--- a/ioengine.h
+++ b/ioengine.h
@@ -138,7 +138,7 @@ enum {
  
  struct ioengine_ops {
         struct flist_head list;
-       char name[16];
+       const char *name;
         int version;
         int flags;
         int (*setup)(struct thread_data *);
@@ -257,14 +257,9 @@ static inline enum fio_ddir acct_ddir(struct io_u *io_u)
         return io_u->ddir;
  }
  
-static inline void io_u_clear(struct io_u *io_u, unsigned int flags)
-{
-       __sync_fetch_and_and(&io_u->flags, ~flags);
-}
-
-static inline void io_u_set(struct io_u *io_u, unsigned int flags)
-{
-       __sync_fetch_and_or(&io_u->flags, flags);
-}
+#define io_u_clear(td, io_u, val)      \
+       td_flags_clear((td), &(io_u->flags), (val))
+#define io_u_set(td, io_u, val)                \
+       td_flags_set((td), &(io_u)->flags, (val))
  
  #endif
diff --git a/ioengines.c b/ioengines.c

index 4129ac2363b9757baf89e54788f94b656746e824..ae55f951d413fbaeaf92dae3c8eb0746167d4c9d 100644 (file)
--- a/ioengines.c
+++ b/ioengines.c
@@ -260,7 +260,7 @@ int td_io_queue(struct thread_data *td, struct io_u *io_u)
         fio_ro_check(td, io_u);
  
         assert((io_u->flags & IO_U_F_FLIGHT) == 0);
-       io_u_set(io_u, IO_U_F_FLIGHT);
+       io_u_set(td, io_u, IO_U_F_FLIGHT);
  
         assert(fio_file_open(io_u->file));
  
@@ -272,7 +272,7 @@ int td_io_queue(struct thread_data *td, struct io_u *io_u)
         io_u->error = 0;
         io_u->resid = 0;
  
-       if (td->io_ops->flags & FIO_SYNCIO) {
+       if (td_ioengine_flagged(td, FIO_SYNCIO)) {
                 if (fio_fill_issue_time(td))
                         fio_gettime(&io_u->issue_time, NULL);
  
@@ -346,7 +346,7 @@ int td_io_queue(struct thread_data *td, struct io_u *io_u)
                 }
         }
  
-       if ((td->io_ops->flags & FIO_SYNCIO) == 0) {
+       if (!td_ioengine_flagged(td, FIO_SYNCIO)) {
                 if (fio_fill_issue_time(td))
                         fio_gettime(&io_u->issue_time, NULL);
  
@@ -375,7 +375,7 @@ int td_io_init(struct thread_data *td)
                         td->error = ret;
         }
  
-       if (!ret && (td->io_ops->flags & FIO_NOIO))
+       if (!ret && td_ioengine_flagged(td, FIO_NOIO))
                 td->flags |= TD_F_NOIO;
  
         return ret;
@@ -441,7 +441,7 @@ int td_io_open_file(struct thread_data *td, struct fio_file *f)
                 }
         }
  
-       if (td->io_ops->flags & FIO_DISKLESSIO)
+       if (td_ioengine_flagged(td, FIO_DISKLESSIO))
                 goto done;
  
         if (td->o.invalidate_cache && file_invalidate_cache(td, f))
@@ -521,8 +521,15 @@ int td_io_unlink_file(struct thread_data *td, struct fio_file *f)
  {
         if (td->io_ops->unlink_file)
                 return td->io_ops->unlink_file(td, f);
-       else
-               return unlink(f->file_name);
+       else {
+               int ret;
+
+               ret = unlink(f->file_name);
+               if (ret < 0)
+                       return errno;
+
+               return 0;
+       }
  }
  
  int td_io_get_file_size(struct thread_data *td, struct fio_file *f)
diff --git a/iolog.c b/iolog.c

index 4c87f1cb88d723f90712da14474ce2d1fe71deba..baa4b855d6a709131be83c1c68173c7af317c713 100644 (file)
--- a/iolog.c
+++ b/iolog.c
@@ -576,6 +576,9 @@ void setup_log(struct io_log **log, struct log_params *p,
                const char *filename)
  {
         struct io_log *l;
+       int i;
+       struct io_u_plat_entry *entry;
+       struct flist_head *list;
  
         l = scalloc(1, sizeof(*l));
         INIT_FLIST_HEAD(&l->io_logs);
@@ -584,9 +587,21 @@ void setup_log(struct io_log **log, struct log_params *p,
         l->log_gz = p->log_gz;
         l->log_gz_store = p->log_gz_store;
         l->avg_msec = p->avg_msec;
+       l->hist_msec = p->hist_msec;
+       l->hist_coarseness = p->hist_coarseness;
         l->filename = strdup(filename);
         l->td = p->td;
  
+       /* Initialize histogram lists for each r/w direction,
+        * with initial io_u_plat of all zeros:
+        */
+       for (i = 0; i < DDIR_RWDIR_CNT; i++) {
+               list = &l->hist_window[i].list;
+               INIT_FLIST_HEAD(list);
+               entry = calloc(1, sizeof(struct io_u_plat_entry));
+               flist_add(&entry->list, list);
+       }
+
         if (l->td && l->td->o.io_submit_mode != IO_MODE_OFFLOAD) {
                 struct io_logs *p;
  
@@ -659,6 +674,67 @@ void free_log(struct io_log *log)
         sfree(log);
  }
  
+inline unsigned long hist_sum(int j, int stride, unsigned int *io_u_plat,
+               unsigned int *io_u_plat_last)
+{
+       unsigned long sum;
+       int k;
+
+       if (io_u_plat_last) {
+               for (k = sum = 0; k < stride; k++)
+                       sum += io_u_plat[j + k] - io_u_plat_last[j + k];
+       } else {
+               for (k = sum = 0; k < stride; k++)
+                       sum += io_u_plat[j + k];
+       }
+
+       return sum;
+}
+
+static void flush_hist_samples(FILE *f, int hist_coarseness, void *samples,
+                              uint64_t sample_size)
+{
+       struct io_sample *s;
+       int log_offset;
+       uint64_t i, j, nr_samples;
+       struct io_u_plat_entry *entry, *entry_before;
+       unsigned int *io_u_plat;
+       unsigned int *io_u_plat_before;
+
+       int stride = 1 << hist_coarseness;
+       
+       if (!sample_size)
+               return;
+
+       s = __get_sample(samples, 0, 0);
+       log_offset = (s->__ddir & LOG_OFFSET_SAMPLE_BIT) != 0;
+
+       nr_samples = sample_size / __log_entry_sz(log_offset);
+
+       for (i = 0; i < nr_samples; i++) {
+               s = __get_sample(samples, log_offset, i);
+
+               entry = (struct io_u_plat_entry *) s->val;
+               io_u_plat = entry->io_u_plat;
+
+               entry_before = flist_first_entry(&entry->list, struct io_u_plat_entry, list);
+               io_u_plat_before = entry_before->io_u_plat;
+
+               fprintf(f, "%lu, %u, %u, ", (unsigned long) s->time,
+                                               io_sample_ddir(s), s->bs);
+               for (j = 0; j < FIO_IO_U_PLAT_NR - stride; j += stride) {
+                       fprintf(f, "%lu, ", hist_sum(j, stride, io_u_plat,
+                                               io_u_plat_before));
+               }
+               fprintf(f, "%lu\n", (unsigned long)
+                       hist_sum(FIO_IO_U_PLAT_NR - stride, stride, io_u_plat,
+                                       io_u_plat_before));
+
+               flist_del(&entry_before->list);
+               free(entry_before);
+       }
+}
+
  void flush_samples(FILE *f, void *samples, uint64_t sample_size)
  {
         struct io_sample *s;
@@ -988,7 +1064,13 @@ void flush_log(struct io_log *log, bool do_append)
  
                 cur_log = flist_first_entry(&log->io_logs, struct io_logs, list);
                 flist_del_init(&cur_log->list);
-               flush_samples(f, cur_log->log, cur_log->nr_samples * log_entry_sz(log));
+               
+               if (log == log->td->clat_hist_log)
+                       flush_hist_samples(f, log->hist_coarseness, cur_log->log,
+                                          log_sample_sz(log, cur_log));
+               else
+                       flush_samples(f, cur_log->log, log_sample_sz(log, cur_log));
+               
                 sfree(cur_log);
         }
  
@@ -1353,6 +1435,20 @@ static int write_clat_log(struct thread_data *td, int try, bool unit_log)
         return ret;
  }
  
+static int write_clat_hist_log(struct thread_data *td, int try, bool unit_log)
+{
+       int ret;
+
+       if (!unit_log)
+               return 0;
+
+       ret = __write_log(td, td->clat_hist_log, try);
+       if (!ret)
+               td->clat_hist_log = NULL;
+
+       return ret;
+}
+
  static int write_lat_log(struct thread_data *td, int try, bool unit_log)
  {
         int ret;
@@ -1387,8 +1483,9 @@ enum {
         SLAT_LOG_MASK   = 4,
         CLAT_LOG_MASK   = 8,
         IOPS_LOG_MASK   = 16,
+       CLAT_HIST_LOG_MASK = 32,
  
-       ALL_LOG_NR      = 5,
+       ALL_LOG_NR      = 6,
  };
  
  struct log_type {
@@ -1417,6 +1514,10 @@ static struct log_type log_types[] = {
                 .mask   = IOPS_LOG_MASK,
                 .fn     = write_iops_log,
         },
+       {
+               .mask   = CLAT_HIST_LOG_MASK,
+               .fn     = write_clat_hist_log,
+       }
  };
  
  void td_writeout_logs(struct thread_data *td, bool unit_logs)
diff --git a/iolog.h b/iolog.h

index 0438fa7b2993b5b44d170da03fb75ce7dc2d8a5d..de641d54e191979595a9dbfaf142168e2c5dce69 100644 (file)
--- a/iolog.h
+++ b/iolog.h
@@ -18,12 +18,21 @@ struct io_stat {
         fio_fp64_t S;
  };
  
+struct io_hist {
+       uint64_t samples;
+       unsigned long hist_last;
+       struct flist_head list;
+};
+
  /*
   * A single data sample
   */
  struct io_sample {
         uint64_t time;
-       uint64_t val;
+       union {
+               uint64_t val;
+               struct io_u_plat_entry *plat_entry;
+       };
         uint32_t __ddir;
         uint32_t bs;
  };
@@ -39,6 +48,7 @@ enum {
         IO_LOG_TYPE_SLAT,
         IO_LOG_TYPE_BW,
         IO_LOG_TYPE_IOPS,
+       IO_LOG_TYPE_HIST,
  };
  
  #define DEF_LOG_ENTRIES                1024
@@ -103,6 +113,15 @@ struct io_log {
         unsigned long avg_msec;
         unsigned long avg_last;
  
+       /*
+        * Windowed latency histograms, for keeping track of when we need to
+        * save a copy of the histogram every approximately hist_msec
+        * milliseconds.
+        */
+       struct io_hist hist_window[DDIR_RWDIR_CNT];
+       unsigned long hist_msec;
+       unsigned int hist_coarseness;
+
         pthread_mutex_t chunk_lock;
         unsigned int chunk_seq;
         struct flist_head chunk_list;
@@ -134,6 +153,11 @@ static inline size_t log_entry_sz(struct io_log *log)
         return __log_entry_sz(log->log_offset);
  }
  
+static inline size_t log_sample_sz(struct io_log *log, struct io_logs *cur_log)
+{
+       return cur_log->nr_samples * log_entry_sz(log);
+}
+
  static inline struct io_sample *__get_sample(void *samples, int log_offset,
                                              uint64_t sample)
  {
@@ -218,6 +242,8 @@ extern int iolog_file_inflate(const char *);
  struct log_params {
         struct thread_data *td;
         unsigned long avg_msec;
+       unsigned long hist_msec;
+       int hist_coarseness;
         int log_type;
         int log_offset;
         int log_gz;
@@ -241,6 +267,7 @@ extern void finalize_logs(struct thread_data *td, bool);
  extern void setup_log(struct io_log **, struct log_params *, const char *);
  extern void flush_log(struct io_log *, bool);
  extern void flush_samples(FILE *, void *, uint64_t);
+extern unsigned long hist_sum(int, int, unsigned int *, unsigned int *);
  extern void free_log(struct io_log *);
  extern void fio_writeout_logs(bool);
  extern void td_writeout_logs(struct thread_data *, bool);
diff --git a/lib/bloom.c b/lib/bloom.c

index ee4ba0baabcc80c40d13ede42ccb204d4530c9d9..f4eff575082b171827b8319d16b9c938f7a6a979 100644 (file)
--- a/lib/bloom.c
+++ b/lib/bloom.c
@@ -35,7 +35,7 @@ static uint32_t bloom_fnv(const void *buf, uint32_t len, uint32_t seed)
  
  #define BLOOM_SEED     0x8989
  
-struct bloom_hash hashes[] = {
+static struct bloom_hash hashes[] = {
         {
                 .seed = BLOOM_SEED,
                 .fn = jhash,
diff --git a/lib/mountcheck.c b/lib/mountcheck.c

index e37e9f927b0c3cc2ecfca6cd26ea7017eb3c083f..e8780eb3b46a166f81726c9a2966a1a0c901a465 100644 (file)
--- a/lib/mountcheck.c
+++ b/lib/mountcheck.c
@@ -4,6 +4,8 @@
  #ifdef CONFIG_GETMNTENT
  #include <mntent.h>
  
+#include "lib/mountcheck.h"
+
  #define MTAB   "/etc/mtab"
  
  int device_is_mounted(const char *dev)
diff --git a/lib/strntol.c b/lib/strntol.c

index 713f63bb0c683201f61b545f6ba7386f3dc19055..adf45bd4d6b87996b63eaaf3d50641bd8f431873 100644 (file)
--- a/lib/strntol.c
+++ b/lib/strntol.c
@@ -2,6 +2,8 @@
  #include <stdlib.h>
  #include <limits.h>
  
+#include "lib/strntol.h"
+
  long strntol(const char *str, size_t sz, char **end, int base)
  {
         /* Expect that digit representation of LONG_MAX/MIN
diff --git a/libfio.c b/libfio.c

index fb7d35ae1ae5bb646f49dc6dc791bbbb74926cbe..d88ed4ed2032ecccfad5435b0de10abc42157973 100644 (file)
--- a/libfio.c
+++ b/libfio.c
@@ -134,7 +134,6 @@ void clear_io_state(struct thread_data *td, int all)
  
  void reset_all_stats(struct thread_data *td)
  {
-       struct timeval tv;
         int i;
  
         reset_io_counters(td, 1);
@@ -148,11 +147,10 @@ void reset_all_stats(struct thread_data *td)
                 td->rwmix_issues = 0;
         }
  
-       fio_gettime(&tv, NULL);
-       memcpy(&td->epoch, &tv, sizeof(tv));
-       memcpy(&td->start, &tv, sizeof(tv));
-       memcpy(&td->iops_sample_time, &tv, sizeof(tv));
-       memcpy(&td->bw_sample_time, &tv, sizeof(tv));
+       set_epoch_time(td, td->o.log_unix_epoch);
+       memcpy(&td->start, &td->epoch, sizeof(struct timeval));
+       memcpy(&td->iops_sample_time, &td->epoch, sizeof(struct timeval));
+       memcpy(&td->bw_sample_time, &td->epoch, sizeof(struct timeval));
  
         lat_target_reset(td);
         clear_rusage_stat(td);
diff --git a/memory.c b/memory.c

index af4d5ef8ee371701f02806369a3581ecfdc42628..91241175978ec1ce32310c9c58fef6c38f79da71 100644 (file)
--- a/memory.c
+++ b/memory.c
@@ -215,13 +215,13 @@ int allocate_io_mem(struct thread_data *td)
         size_t total_mem;
         int ret = 0;
  
-       if (td->io_ops->flags & FIO_NOIO)
+       if (td_ioengine_flagged(td, FIO_NOIO))
                 return 0;
  
         total_mem = td->orig_buffer_size;
  
         if (td->o.odirect || td->o.mem_align || td->o.oatomic ||
-           (td->io_ops->flags & FIO_MEMALIGN)) {
+           td_ioengine_flagged(td, FIO_MEMALIGN)) {
                 total_mem += page_mask;
                 if (td->o.mem_align && td->o.mem_align > page_size)
                         total_mem += td->o.mem_align - page_size;
diff --git a/options.c b/options.c

index 4c56dbe1f11767484190efa3148e48954771c901..50b4d09ec8ba66b1826602b24ea693b68103efaa 100644 (file)
--- a/options.c
+++ b/options.c
@@ -20,6 +20,8 @@
  
  char client_sockaddr_str[INET6_ADDRSTRLEN] = { 0 };
  
+#define cb_data_to_td(data)    container_of(data, struct thread_data, o)
+
  struct pattern_fmt_desc fmt_desc[] = {
         {
                 .fmt   = "%o",
@@ -223,7 +225,7 @@ static int str_split_parse(struct thread_data *td, char *str, split_parse_fn *fn
  
  static int str_bssplit_cb(void *data, const char *input)
  {
-       struct thread_data *td = data;
+       struct thread_data *td = cb_data_to_td(data);
         char *str, *p;
         int ret = 0;
  
@@ -324,7 +326,7 @@ static int ignore_error_type(struct thread_data *td, int etype, char *str)
  
  static int str_ignore_error_cb(void *data, const char *input)
  {
-       struct thread_data *td = data;
+       struct thread_data *td = cb_data_to_td(data);
         char *str, *p, *n;
         int type = 0, ret = 1;
  
@@ -352,7 +354,7 @@ static int str_ignore_error_cb(void *data, const char *input)
  
  static int str_rw_cb(void *data, const char *str)
  {
-       struct thread_data *td = data;
+       struct thread_data *td = cb_data_to_td(data);
         struct thread_options *o = &td->o;
         char *nr;
  
@@ -386,7 +388,7 @@ static int str_rw_cb(void *data, const char *str)
  
  static int str_mem_cb(void *data, const char *mem)
  {
-       struct thread_data *td = data;
+       struct thread_data *td = cb_data_to_td(data);
  
         if (td->o.mem_type == MEM_MMAPHUGE || td->o.mem_type == MEM_MMAP ||
             td->o.mem_type == MEM_MMAPSHARED)
@@ -397,7 +399,7 @@ static int str_mem_cb(void *data, const char *mem)
  
  static int fio_clock_source_cb(void *data, const char *str)
  {
-       struct thread_data *td = data;
+       struct thread_data *td = cb_data_to_td(data);
  
         fio_clock_source = td->o.clocksource;
         fio_clock_source_set = 1;
@@ -407,7 +409,7 @@ static int fio_clock_source_cb(void *data, const char *str)
  
  static int str_rwmix_read_cb(void *data, unsigned long long *val)
  {
-       struct thread_data *td = data;
+       struct thread_data *td = cb_data_to_td(data);
  
         td->o.rwmix[DDIR_READ] = *val;
         td->o.rwmix[DDIR_WRITE] = 100 - *val;
@@ -416,7 +418,7 @@ static int str_rwmix_read_cb(void *data, unsigned long long *val)
  
  static int str_rwmix_write_cb(void *data, unsigned long long *val)
  {
-       struct thread_data *td = data;
+       struct thread_data *td = cb_data_to_td(data);
  
         td->o.rwmix[DDIR_WRITE] = *val;
         td->o.rwmix[DDIR_READ] = 100 - *val;
@@ -454,7 +456,7 @@ int fio_cpus_split(os_cpu_mask_t *mask, unsigned int cpu_index)
  
  static int str_cpumask_cb(void *data, unsigned long long *val)
  {
-       struct thread_data *td = data;
+       struct thread_data *td = cb_data_to_td(data);
         unsigned int i;
         long max_cpu;
         int ret;
@@ -554,7 +556,7 @@ static int set_cpus_allowed(struct thread_data *td, os_cpu_mask_t *mask,
  
  static int str_cpus_allowed_cb(void *data, const char *input)
  {
-       struct thread_data *td = data;
+       struct thread_data *td = cb_data_to_td(data);
  
         if (parse_dryrun())
                 return 0;
@@ -564,7 +566,7 @@ static int str_cpus_allowed_cb(void *data, const char *input)
  
  static int str_verify_cpus_allowed_cb(void *data, const char *input)
  {
-       struct thread_data *td = data;
+       struct thread_data *td = cb_data_to_td(data);
  
         if (parse_dryrun())
                 return 0;
@@ -575,7 +577,7 @@ static int str_verify_cpus_allowed_cb(void *data, const char *input)
  #ifdef CONFIG_ZLIB
  static int str_log_cpus_allowed_cb(void *data, const char *input)
  {
-       struct thread_data *td = data;
+       struct thread_data *td = cb_data_to_td(data);
  
         if (parse_dryrun())
                 return 0;
@@ -589,7 +591,7 @@ static int str_log_cpus_allowed_cb(void *data, const char *input)
  #ifdef CONFIG_LIBNUMA
  static int str_numa_cpunodes_cb(void *data, char *input)
  {
-       struct thread_data *td = data;
+       struct thread_data *td = cb_data_to_td(data);
         struct bitmask *verify_bitmask;
  
         if (parse_dryrun())
@@ -614,7 +616,7 @@ static int str_numa_cpunodes_cb(void *data, char *input)
  
  static int str_numa_mpol_cb(void *data, char *input)
  {
-       struct thread_data *td = data;
+       struct thread_data *td = cb_data_to_td(data);
         const char * const policy_types[] =
                 { "default", "prefer", "bind", "interleave", "local", NULL };
         int i;
@@ -723,7 +725,7 @@ out:
  
  static int str_fst_cb(void *data, const char *str)
  {
-       struct thread_data *td = data;
+       struct thread_data *td = cb_data_to_td(data);
         double val;
         bool done = false;
         char *nr;
@@ -803,7 +805,7 @@ static int str_fst_cb(void *data, const char *str)
  #ifdef CONFIG_SYNC_FILE_RANGE
  static int str_sfr_cb(void *data, const char *str)
  {
-       struct thread_data *td = data;
+       struct thread_data *td = cb_data_to_td(data);
         char *nr = get_opt_postfix(str);
  
         td->sync_file_range_nr = 1;
@@ -1006,7 +1008,7 @@ static int parse_zoned_distribution(struct thread_data *td, const char *input)
  
  static int str_random_distribution_cb(void *data, const char *str)
  {
-       struct thread_data *td = data;
+       struct thread_data *td = cb_data_to_td(data);
         double val;
         char *nr;
  
@@ -1151,7 +1153,7 @@ int set_name_idx(char *target, size_t tlen, char *input, int index,
  
  static int str_filename_cb(void *data, const char *input)
  {
-       struct thread_data *td = data;
+       struct thread_data *td = cb_data_to_td(data);
         char *fname, *str, *p;
  
         p = str = strdup(input);
@@ -1174,7 +1176,7 @@ static int str_filename_cb(void *data, const char *input)
  
  static int str_directory_cb(void *data, const char fio_unused *unused)
  {
-       struct thread_data *td = data;
+       struct thread_data *td = cb_data_to_td(data);
         struct stat sb;
         char *dirname, *str, *p;
         int ret = 0;
@@ -1205,7 +1207,7 @@ out:
  
  static int str_opendir_cb(void *data, const char fio_unused *str)
  {
-       struct thread_data *td = data;
+       struct thread_data *td = cb_data_to_td(data);
  
         if (parse_dryrun())
                 return 0;
@@ -1218,7 +1220,7 @@ static int str_opendir_cb(void *data, const char fio_unused *str)
  
  static int str_buffer_pattern_cb(void *data, const char *input)
  {
-       struct thread_data *td = data;
+       struct thread_data *td = cb_data_to_td(data);
         int ret;
  
         /* FIXME: for now buffer pattern does not support formats */
@@ -1239,7 +1241,7 @@ static int str_buffer_pattern_cb(void *data, const char *input)
  
  static int str_buffer_compress_cb(void *data, unsigned long long *il)
  {
-       struct thread_data *td = data;
+       struct thread_data *td = cb_data_to_td(data);
  
         td->flags |= TD_F_COMPRESS;
         td->o.compress_percentage = *il;
@@ -1248,7 +1250,7 @@ static int str_buffer_compress_cb(void *data, unsigned long long *il)
  
  static int str_dedupe_cb(void *data, unsigned long long *il)
  {
-       struct thread_data *td = data;
+       struct thread_data *td = cb_data_to_td(data);
  
         td->flags |= TD_F_COMPRESS;
         td->o.dedupe_percentage = *il;
@@ -1258,7 +1260,7 @@ static int str_dedupe_cb(void *data, unsigned long long *il)
  
  static int str_verify_pattern_cb(void *data, const char *input)
  {
-       struct thread_data *td = data;
+       struct thread_data *td = cb_data_to_td(data);
         int ret;
  
         td->o.verify_fmt_sz = ARRAY_SIZE(td->o.verify_fmt);
@@ -1281,7 +1283,7 @@ static int str_verify_pattern_cb(void *data, const char *input)
  
  static int str_gtod_reduce_cb(void *data, int *il)
  {
-       struct thread_data *td = data;
+       struct thread_data *td = cb_data_to_td(data);
         int val = *il;
  
         td->o.disable_lat = !!val;
@@ -1297,7 +1299,7 @@ static int str_gtod_reduce_cb(void *data, int *il)
  
  static int str_size_cb(void *data, unsigned long long *__val)
  {
-       struct thread_data *td = data;
+       struct thread_data *td = cb_data_to_td(data);
         unsigned long long v = *__val;
  
         if (parse_is_percent(v)) {
@@ -1311,7 +1313,7 @@ static int str_size_cb(void *data, unsigned long long *__val)
  
  static int rw_verify(struct fio_option *o, void *data)
  {
-       struct thread_data *td = data;
+       struct thread_data *td = cb_data_to_td(data);
  
         if (read_only && td_write(td)) {
                 log_err("fio: job <%s> has write bit set, but fio is in"
@@ -1325,7 +1327,7 @@ static int rw_verify(struct fio_option *o, void *data)
  static int gtod_cpu_verify(struct fio_option *o, void *data)
  {
  #ifndef FIO_HAVE_CPU_AFFINITY
-       struct thread_data *td = data;
+       struct thread_data *td = cb_data_to_td(data);
  
         if (td->o.gtod_cpu) {
                 log_err("fio: platform must support CPU affinity for"
@@ -1345,7 +1347,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "description",
                 .lname  = "Description of job",
                 .type   = FIO_OPT_STR_STORE,
-               .off1   = td_var_offset(description),
+               .off1   = offsetof(struct thread_options, description),
                 .help   = "Text job description",
                 .category = FIO_OPT_C_GENERAL,
                 .group  = FIO_OPT_G_DESC,
@@ -1354,7 +1356,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "name",
                 .lname  = "Job name",
                 .type   = FIO_OPT_STR_STORE,
-               .off1   = td_var_offset(name),
+               .off1   = offsetof(struct thread_options, name),
                 .help   = "Name of this job",
                 .category = FIO_OPT_C_GENERAL,
                 .group  = FIO_OPT_G_DESC,
@@ -1363,7 +1365,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "wait_for",
                 .lname  = "Waitee name",
                 .type   = FIO_OPT_STR_STORE,
-               .off1   = td_var_offset(wait_for),
+               .off1   = offsetof(struct thread_options, wait_for),
                 .help   = "Name of the job this one wants to wait for before starting",
                 .category = FIO_OPT_C_GENERAL,
                 .group  = FIO_OPT_G_DESC,
@@ -1372,7 +1374,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "filename",
                 .lname  = "Filename(s)",
                 .type   = FIO_OPT_STR_STORE,
-               .off1   = td_var_offset(filename),
+               .off1   = offsetof(struct thread_options, filename),
                 .cb     = str_filename_cb,
                 .prio   = -1, /* must come after "directory" */
                 .help   = "File(s) to use for the workload",
@@ -1383,7 +1385,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "directory",
                 .lname  = "Directory",
                 .type   = FIO_OPT_STR_STORE,
-               .off1   = td_var_offset(directory),
+               .off1   = offsetof(struct thread_options, directory),
                 .cb     = str_directory_cb,
                 .help   = "Directory to store files in",
                 .category = FIO_OPT_C_FILE,
@@ -1393,7 +1395,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "filename_format",
                 .lname  = "Filename Format",
                 .type   = FIO_OPT_STR_STORE,
-               .off1   = td_var_offset(filename_format),
+               .off1   = offsetof(struct thread_options, filename_format),
                 .prio   = -1, /* must come after "directory" */
                 .help   = "Override default $jobname.$jobnum.$filenum naming",
                 .def    = "$jobname.$jobnum.$filenum",
@@ -1404,7 +1406,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "unique_filename",
                 .lname  = "Unique Filename",
                 .type   = FIO_OPT_BOOL,
-               .off1   = td_var_offset(unique_filename),
+               .off1   = offsetof(struct thread_options, unique_filename),
                 .help   = "For network clients, prefix file with source IP",
                 .def    = "1",
                 .category = FIO_OPT_C_FILE,
@@ -1414,7 +1416,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "lockfile",
                 .lname  = "Lockfile",
                 .type   = FIO_OPT_STR,
-               .off1   = td_var_offset(file_lock_mode),
+               .off1   = offsetof(struct thread_options, file_lock_mode),
                 .help   = "Lock file when doing IO to it",
                 .prio   = 1,
                 .parent = "filename",
@@ -1442,7 +1444,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "opendir",
                 .lname  = "Open directory",
                 .type   = FIO_OPT_STR_STORE,
-               .off1   = td_var_offset(opendir),
+               .off1   = offsetof(struct thread_options, opendir),
                 .cb     = str_opendir_cb,
                 .help   = "Recursively add files from this directory and down",
                 .category = FIO_OPT_C_FILE,
@@ -1454,7 +1456,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .alias  = "readwrite",
                 .type   = FIO_OPT_STR,
                 .cb     = str_rw_cb,
-               .off1   = td_var_offset(td_ddir),
+               .off1   = offsetof(struct thread_options, td_ddir),
                 .help   = "IO direction",
                 .def    = "read",
                 .verify = rw_verify,
@@ -1507,7 +1509,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "rw_sequencer",
                 .lname  = "RW Sequencer",
                 .type   = FIO_OPT_STR,
-               .off1   = td_var_offset(rw_seq),
+               .off1   = offsetof(struct thread_options, rw_seq),
                 .help   = "IO offset generator modifier",
                 .def    = "sequential",
                 .category = FIO_OPT_C_IO,
@@ -1528,7 +1530,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "ioengine",
                 .lname  = "IO Engine",
                 .type   = FIO_OPT_STR_STORE,
-               .off1   = td_var_offset(ioengine),
+               .off1   = offsetof(struct thread_options, ioengine),
                 .help   = "IO engine to use",
                 .def    = FIO_PREFERRED_ENGINE,
                 .category = FIO_OPT_C_IO,
@@ -1661,7 +1663,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "iodepth",
                 .lname  = "IO Depth",
                 .type   = FIO_OPT_INT,
-               .off1   = td_var_offset(iodepth),
+               .off1   = offsetof(struct thread_options, iodepth),
                 .help   = "Number of IO buffers to keep in flight",
                 .minval = 1,
                 .interval = 1,
@@ -1674,7 +1676,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .lname  = "IO Depth batch",
                 .alias  = "iodepth_batch_submit",
                 .type   = FIO_OPT_INT,
-               .off1   = td_var_offset(iodepth_batch),
+               .off1   = offsetof(struct thread_options, iodepth_batch),
                 .help   = "Number of IO buffers to submit in one go",
                 .parent = "iodepth",
                 .hide   = 1,
@@ -1688,7 +1690,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .lname  = "Min IO depth batch complete",
                 .alias  = "iodepth_batch_complete",
                 .type   = FIO_OPT_INT,
-               .off1   = td_var_offset(iodepth_batch_complete_min),
+               .off1   = offsetof(struct thread_options, iodepth_batch_complete_min),
                 .help   = "Min number of IO buffers to retrieve in one go",
                 .parent = "iodepth",
                 .hide   = 1,
@@ -1702,7 +1704,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "iodepth_batch_complete_max",
                 .lname  = "Max IO depth batch complete",
                 .type   = FIO_OPT_INT,
-               .off1   = td_var_offset(iodepth_batch_complete_max),
+               .off1   = offsetof(struct thread_options, iodepth_batch_complete_max),
                 .help   = "Max number of IO buffers to retrieve in one go",
                 .parent = "iodepth",
                 .hide   = 1,
@@ -1715,7 +1717,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "iodepth_low",
                 .lname  = "IO Depth batch low",
                 .type   = FIO_OPT_INT,
-               .off1   = td_var_offset(iodepth_low),
+               .off1   = offsetof(struct thread_options, iodepth_low),
                 .help   = "Low water mark for queuing depth",
                 .parent = "iodepth",
                 .hide   = 1,
@@ -1727,7 +1729,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "io_submit_mode",
                 .lname  = "IO submit mode",
                 .type   = FIO_OPT_STR,
-               .off1   = td_var_offset(io_submit_mode),
+               .off1   = offsetof(struct thread_options, io_submit_mode),
                 .help   = "How IO submissions and completions are done",
                 .def    = "inline",
                 .category = FIO_OPT_C_IO,
@@ -1748,7 +1750,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .lname  = "Size",
                 .type   = FIO_OPT_STR_VAL,
                 .cb     = str_size_cb,
-               .off1   = td_var_offset(size),
+               .off1   = offsetof(struct thread_options, size),
                 .help   = "Total size of device or files",
                 .interval = 1024 * 1024,
                 .category = FIO_OPT_C_IO,
@@ -1759,7 +1761,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .alias  = "io_limit",
                 .lname  = "IO Size",
                 .type   = FIO_OPT_STR_VAL,
-               .off1   = td_var_offset(io_limit),
+               .off1   = offsetof(struct thread_options, io_limit),
                 .interval = 1024 * 1024,
                 .category = FIO_OPT_C_IO,
                 .group  = FIO_OPT_G_INVALID,
@@ -1769,7 +1771,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .lname  = "Fill device",
                 .alias  = "fill_fs",
                 .type   = FIO_OPT_BOOL,
-               .off1   = td_var_offset(fill_device),
+               .off1   = offsetof(struct thread_options, fill_device),
                 .help   = "Write until an ENOSPC error occurs",
                 .def    = "0",
                 .category = FIO_OPT_C_FILE,
@@ -1779,8 +1781,8 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "filesize",
                 .lname  = "File size",
                 .type   = FIO_OPT_STR_VAL,
-               .off1   = td_var_offset(file_size_low),
-               .off2   = td_var_offset(file_size_high),
+               .off1   = offsetof(struct thread_options, file_size_low),
+               .off2   = offsetof(struct thread_options, file_size_high),
                 .minval = 1,
                 .help   = "Size of individual files",
                 .interval = 1024 * 1024,
@@ -1791,7 +1793,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "file_append",
                 .lname  = "File append",
                 .type   = FIO_OPT_BOOL,
-               .off1   = td_var_offset(file_append),
+               .off1   = offsetof(struct thread_options, file_append),
                 .help   = "IO will start at the end of the file(s)",
                 .def    = "0",
                 .category = FIO_OPT_C_FILE,
@@ -1802,7 +1804,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .lname  = "IO offset",
                 .alias  = "fileoffset",
                 .type   = FIO_OPT_STR_VAL,
-               .off1   = td_var_offset(start_offset),
+               .off1   = offsetof(struct thread_options, start_offset),
                 .help   = "Start IO from this offset",
                 .def    = "0",
                 .interval = 1024 * 1024,
@@ -1813,7 +1815,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "offset_increment",
                 .lname  = "IO offset increment",
                 .type   = FIO_OPT_STR_VAL,
-               .off1   = td_var_offset(offset_increment),
+               .off1   = offsetof(struct thread_options, offset_increment),
                 .help   = "What is the increment from one offset to the next",
                 .parent = "offset",
                 .hide   = 1,
@@ -1826,7 +1828,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "number_ios",
                 .lname  = "Number of IOs to perform",
                 .type   = FIO_OPT_STR_VAL,
-               .off1   = td_var_offset(number_ios),
+               .off1   = offsetof(struct thread_options, number_ios),
                 .help   = "Force job completion after this number of IOs",
                 .def    = "0",
                 .category = FIO_OPT_C_IO,
@@ -1837,9 +1839,9 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .lname  = "Block size",
                 .alias  = "blocksize",
                 .type   = FIO_OPT_INT,
-               .off1   = td_var_offset(bs[DDIR_READ]),
-               .off2   = td_var_offset(bs[DDIR_WRITE]),
-               .off3   = td_var_offset(bs[DDIR_TRIM]),
+               .off1   = offsetof(struct thread_options, bs[DDIR_READ]),
+               .off2   = offsetof(struct thread_options, bs[DDIR_WRITE]),
+               .off3   = offsetof(struct thread_options, bs[DDIR_TRIM]),
                 .minval = 1,
                 .help   = "Block size unit",
                 .def    = "4k",
@@ -1854,9 +1856,9 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .lname  = "Block size align",
                 .alias  = "blockalign",
                 .type   = FIO_OPT_INT,
-               .off1   = td_var_offset(ba[DDIR_READ]),
-               .off2   = td_var_offset(ba[DDIR_WRITE]),
-               .off3   = td_var_offset(ba[DDIR_TRIM]),
+               .off1   = offsetof(struct thread_options, ba[DDIR_READ]),
+               .off2   = offsetof(struct thread_options, ba[DDIR_WRITE]),
+               .off3   = offsetof(struct thread_options, ba[DDIR_TRIM]),
                 .minval = 1,
                 .help   = "IO block offset alignment",
                 .parent = "rw",
@@ -1870,12 +1872,12 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .lname  = "Block size range",
                 .alias  = "blocksize_range",
                 .type   = FIO_OPT_RANGE,
-               .off1   = td_var_offset(min_bs[DDIR_READ]),
-               .off2   = td_var_offset(max_bs[DDIR_READ]),
-               .off3   = td_var_offset(min_bs[DDIR_WRITE]),
-               .off4   = td_var_offset(max_bs[DDIR_WRITE]),
-               .off5   = td_var_offset(min_bs[DDIR_TRIM]),
-               .off6   = td_var_offset(max_bs[DDIR_TRIM]),
+               .off1   = offsetof(struct thread_options, min_bs[DDIR_READ]),
+               .off2   = offsetof(struct thread_options, max_bs[DDIR_READ]),
+               .off3   = offsetof(struct thread_options, min_bs[DDIR_WRITE]),
+               .off4   = offsetof(struct thread_options, max_bs[DDIR_WRITE]),
+               .off5   = offsetof(struct thread_options, min_bs[DDIR_TRIM]),
+               .off6   = offsetof(struct thread_options, max_bs[DDIR_TRIM]),
                 .minval = 1,
                 .help   = "Set block size range (in more detail than bs)",
                 .parent = "rw",
@@ -1889,7 +1891,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .lname  = "Block size split",
                 .type   = FIO_OPT_STR,
                 .cb     = str_bssplit_cb,
-               .off1   = td_var_offset(bssplit),
+               .off1   = offsetof(struct thread_options, bssplit),
                 .help   = "Set a specific mix of block sizes",
                 .parent = "rw",
                 .hide   = 1,
@@ -1901,7 +1903,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .lname  = "Block size unaligned",
                 .alias  = "blocksize_unaligned",
                 .type   = FIO_OPT_STR_SET,
-               .off1   = td_var_offset(bs_unaligned),
+               .off1   = offsetof(struct thread_options, bs_unaligned),
                 .help   = "Don't sector align IO buffer sizes",
                 .parent = "rw",
                 .hide   = 1,
@@ -1912,7 +1914,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "bs_is_seq_rand",
                 .lname  = "Block size division is seq/random (not read/write)",
                 .type   = FIO_OPT_BOOL,
-               .off1   = td_var_offset(bs_is_seq_rand),
+               .off1   = offsetof(struct thread_options, bs_is_seq_rand),
                 .help   = "Consider any blocksize setting to be sequential,random",
                 .def    = "0",
                 .parent = "blocksize",
@@ -1923,7 +1925,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "randrepeat",
                 .lname  = "Random repeatable",
                 .type   = FIO_OPT_BOOL,
-               .off1   = td_var_offset(rand_repeatable),
+               .off1   = offsetof(struct thread_options, rand_repeatable),
                 .help   = "Use repeatable random IO pattern",
                 .def    = "1",
                 .parent = "rw",
@@ -1935,7 +1937,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "randseed",
                 .lname  = "The random generator seed",
                 .type   = FIO_OPT_STR_VAL,
-               .off1   = td_var_offset(rand_seed),
+               .off1   = offsetof(struct thread_options, rand_seed),
                 .help   = "Set the random generator seed value",
                 .def    = "0x89",
                 .parent = "rw",
@@ -1946,7 +1948,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "use_os_rand",
                 .lname  = "Use OS random",
                 .type   = FIO_OPT_DEPRECATED,
-               .off1   = td_var_offset(dep_use_os_rand),
+               .off1   = offsetof(struct thread_options, dep_use_os_rand),
                 .category = FIO_OPT_C_IO,
                 .group  = FIO_OPT_G_RANDOM,
         },
@@ -1954,7 +1956,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "norandommap",
                 .lname  = "No randommap",
                 .type   = FIO_OPT_STR_SET,
-               .off1   = td_var_offset(norandommap),
+               .off1   = offsetof(struct thread_options, norandommap),
                 .help   = "Accept potential duplicate random blocks",
                 .parent = "rw",
                 .hide   = 1,
@@ -1966,7 +1968,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "softrandommap",
                 .lname  = "Soft randommap",
                 .type   = FIO_OPT_BOOL,
-               .off1   = td_var_offset(softrandommap),
+               .off1   = offsetof(struct thread_options, softrandommap),
                 .help   = "Set norandommap if randommap allocation fails",
                 .parent = "norandommap",
                 .hide   = 1,
@@ -1978,7 +1980,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "random_generator",
                 .lname  = "Random Generator",
                 .type   = FIO_OPT_STR,
-               .off1   = td_var_offset(random_generator),
+               .off1   = offsetof(struct thread_options, random_generator),
                 .help   = "Type of random number generator to use",
                 .def    = "tausworthe",
                 .posval = {
@@ -2003,7 +2005,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "random_distribution",
                 .lname  = "Random Distribution",
                 .type   = FIO_OPT_STR,
-               .off1   = td_var_offset(random_distribution),
+               .off1   = offsetof(struct thread_options, random_distribution),
                 .cb     = str_random_distribution_cb,
                 .help   = "Random offset distribution generator",
                 .def    = "random",
@@ -2037,9 +2039,9 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "percentage_random",
                 .lname  = "Percentage Random",
                 .type   = FIO_OPT_INT,
-               .off1   = td_var_offset(perc_rand[DDIR_READ]),
-               .off2   = td_var_offset(perc_rand[DDIR_WRITE]),
-               .off3   = td_var_offset(perc_rand[DDIR_TRIM]),
+               .off1   = offsetof(struct thread_options, perc_rand[DDIR_READ]),
+               .off2   = offsetof(struct thread_options, perc_rand[DDIR_WRITE]),
+               .off3   = offsetof(struct thread_options, perc_rand[DDIR_TRIM]),
                 .maxval = 100,
                 .help   = "Percentage of seq/random mix that should be random",
                 .def    = "100,100,100",
@@ -2059,7 +2061,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "allrandrepeat",
                 .lname  = "All Random Repeat",
                 .type   = FIO_OPT_BOOL,
-               .off1   = td_var_offset(allrand_repeatable),
+               .off1   = offsetof(struct thread_options, allrand_repeatable),
                 .help   = "Use repeatable random numbers for everything",
                 .def    = "0",
                 .category = FIO_OPT_C_IO,
@@ -2070,7 +2072,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .lname  = "Number of files",
                 .alias  = "nr_files",
                 .type   = FIO_OPT_INT,
-               .off1   = td_var_offset(nr_files),
+               .off1   = offsetof(struct thread_options, nr_files),
                 .help   = "Split job workload between this number of files",
                 .def    = "1",
                 .interval = 1,
@@ -2081,7 +2083,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "openfiles",
                 .lname  = "Number of open files",
                 .type   = FIO_OPT_INT,
-               .off1   = td_var_offset(open_files),
+               .off1   = offsetof(struct thread_options, open_files),
                 .help   = "Number of files to keep open at the same time",
                 .category = FIO_OPT_C_FILE,
                 .group  = FIO_OPT_G_INVALID,
@@ -2091,7 +2093,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .lname  = "File service type",
                 .type   = FIO_OPT_STR,
                 .cb     = str_fst_cb,
-               .off1   = td_var_offset(file_service_type),
+               .off1   = offsetof(struct thread_options, file_service_type),
                 .help   = "How to select which file to service next",
                 .def    = "roundrobin",
                 .category = FIO_OPT_C_FILE,
@@ -2130,7 +2132,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "fallocate",
                 .lname  = "Fallocate",
                 .type   = FIO_OPT_STR,
-               .off1   = td_var_offset(fallocate_mode),
+               .off1   = offsetof(struct thread_options, fallocate_mode),
                 .help   = "Whether pre-allocation is performed when laying out files",
                 .def    = "posix",
                 .category = FIO_OPT_C_FILE,
@@ -2173,7 +2175,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "fadvise_hint",
                 .lname  = "Fadvise hint",
                 .type   = FIO_OPT_BOOL,
-               .off1   = td_var_offset(fadvise_hint),
+               .off1   = offsetof(struct thread_options, fadvise_hint),
                 .help   = "Use fadvise() to advise the kernel on IO pattern",
                 .def    = "1",
                 .category = FIO_OPT_C_FILE,
@@ -2184,7 +2186,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "fadvise_stream",
                 .lname  = "Fadvise stream",
                 .type   = FIO_OPT_INT,
-               .off1   = td_var_offset(fadvise_stream),
+               .off1   = offsetof(struct thread_options, fadvise_stream),
                 .help   = "Use fadvise() to set stream ID",
                 .category = FIO_OPT_C_FILE,
                 .group  = FIO_OPT_G_INVALID,
@@ -2201,7 +2203,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "fsync",
                 .lname  = "Fsync",
                 .type   = FIO_OPT_INT,
-               .off1   = td_var_offset(fsync_blocks),
+               .off1   = offsetof(struct thread_options, fsync_blocks),
                 .help   = "Issue fsync for writes every given number of blocks",
                 .def    = "0",
                 .interval = 1,
@@ -2212,7 +2214,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "fdatasync",
                 .lname  = "Fdatasync",
                 .type   = FIO_OPT_INT,
-               .off1   = td_var_offset(fdatasync_blocks),
+               .off1   = offsetof(struct thread_options, fdatasync_blocks),
                 .help   = "Issue fdatasync for writes every given number of blocks",
                 .def    = "0",
                 .interval = 1,
@@ -2223,7 +2225,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "write_barrier",
                 .lname  = "Write barrier",
                 .type   = FIO_OPT_INT,
-               .off1   = td_var_offset(barrier_blocks),
+               .off1   = offsetof(struct thread_options, barrier_blocks),
                 .help   = "Make every Nth write a barrier write",
                 .def    = "0",
                 .interval = 1,
@@ -2254,7 +2256,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 },
                 .type   = FIO_OPT_STR_MULTI,
                 .cb     = str_sfr_cb,
-               .off1   = td_var_offset(sync_file_range),
+               .off1   = offsetof(struct thread_options, sync_file_range),
                 .help   = "Use sync_file_range()",
                 .category = FIO_OPT_C_FILE,
                 .group  = FIO_OPT_G_INVALID,
@@ -2271,7 +2273,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "direct",
                 .lname  = "Direct I/O",
                 .type   = FIO_OPT_BOOL,
-               .off1   = td_var_offset(odirect),
+               .off1   = offsetof(struct thread_options, odirect),
                 .help   = "Use O_DIRECT IO (negates buffered)",
                 .def    = "0",
                 .inverse = "buffered",
@@ -2282,7 +2284,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "atomic",
                 .lname  = "Atomic I/O",
                 .type   = FIO_OPT_BOOL,
-               .off1   = td_var_offset(oatomic),
+               .off1   = offsetof(struct thread_options, oatomic),
                 .help   = "Use Atomic IO with O_DIRECT (implies O_DIRECT)",
                 .def    = "0",
                 .category = FIO_OPT_C_IO,
@@ -2292,7 +2294,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "buffered",
                 .lname  = "Buffered I/O",
                 .type   = FIO_OPT_BOOL,
-               .off1   = td_var_offset(odirect),
+               .off1   = offsetof(struct thread_options, odirect),
                 .neg    = 1,
                 .help   = "Use buffered IO (negates direct)",
                 .def    = "1",
@@ -2304,7 +2306,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "overwrite",
                 .lname  = "Overwrite",
                 .type   = FIO_OPT_BOOL,
-               .off1   = td_var_offset(overwrite),
+               .off1   = offsetof(struct thread_options, overwrite),
                 .help   = "When writing, set whether to overwrite current data",
                 .def    = "0",
                 .category = FIO_OPT_C_FILE,
@@ -2314,7 +2316,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "loops",
                 .lname  = "Loops",
                 .type   = FIO_OPT_INT,
-               .off1   = td_var_offset(loops),
+               .off1   = offsetof(struct thread_options, loops),
                 .help   = "Number of times to run the job",
                 .def    = "1",
                 .interval = 1,
@@ -2325,7 +2327,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "numjobs",
                 .lname  = "Number of jobs",
                 .type   = FIO_OPT_INT,
-               .off1   = td_var_offset(numjobs),
+               .off1   = offsetof(struct thread_options, numjobs),
                 .help   = "Duplicate this job this many times",
                 .def    = "1",
                 .interval = 1,
@@ -2336,8 +2338,8 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "startdelay",
                 .lname  = "Start delay",
                 .type   = FIO_OPT_STR_VAL_TIME,
-               .off1   = td_var_offset(start_delay),
-               .off2   = td_var_offset(start_delay_high),
+               .off1   = offsetof(struct thread_options, start_delay),
+               .off2   = offsetof(struct thread_options, start_delay_high),
                 .help   = "Only start job when this period has passed",
                 .def    = "0",
                 .is_seconds = 1,
@@ -2350,7 +2352,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .lname  = "Runtime",
                 .alias  = "timeout",
                 .type   = FIO_OPT_STR_VAL_TIME,
-               .off1   = td_var_offset(timeout),
+               .off1   = offsetof(struct thread_options, timeout),
                 .help   = "Stop workload when this amount of time has passed",
                 .def    = "0",
                 .is_seconds = 1,
@@ -2362,7 +2364,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "time_based",
                 .lname  = "Time based",
                 .type   = FIO_OPT_STR_SET,
-               .off1   = td_var_offset(time_based),
+               .off1   = offsetof(struct thread_options, time_based),
                 .help   = "Keep running until runtime/timeout is met",
                 .category = FIO_OPT_C_GENERAL,
                 .group  = FIO_OPT_G_RUNTIME,
@@ -2371,7 +2373,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "verify_only",
                 .lname  = "Verify only",
                 .type   = FIO_OPT_STR_SET,
-               .off1   = td_var_offset(verify_only),
+               .off1   = offsetof(struct thread_options, verify_only),
                 .help   = "Verifies previously written data is still valid",
                 .category = FIO_OPT_C_GENERAL,
                 .group  = FIO_OPT_G_RUNTIME,
@@ -2380,7 +2382,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "ramp_time",
                 .lname  = "Ramp time",
                 .type   = FIO_OPT_STR_VAL_TIME,
-               .off1   = td_var_offset(ramp_time),
+               .off1   = offsetof(struct thread_options, ramp_time),
                 .help   = "Ramp up time before measuring performance",
                 .is_seconds = 1,
                 .is_time = 1,
@@ -2392,7 +2394,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .lname  = "Clock source",
                 .type   = FIO_OPT_STR,
                 .cb     = fio_clock_source_cb,
-               .off1   = td_var_offset(clocksource),
+               .off1   = offsetof(struct thread_options, clocksource),
                 .help   = "What type of timing source to use",
                 .category = FIO_OPT_C_GENERAL,
                 .group  = FIO_OPT_G_CLOCK,
@@ -2423,7 +2425,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .lname  = "I/O Memory",
                 .type   = FIO_OPT_STR,
                 .cb     = str_mem_cb,
-               .off1   = td_var_offset(mem_type),
+               .off1   = offsetof(struct thread_options, mem_type),
                 .help   = "Backing type for IO buffers",
                 .def    = "malloc",
                 .category = FIO_OPT_C_IO,
@@ -2466,7 +2468,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .alias  = "mem_align",
                 .lname  = "I/O memory alignment",
                 .type   = FIO_OPT_INT,
-               .off1   = td_var_offset(mem_align),
+               .off1   = offsetof(struct thread_options, mem_align),
                 .minval = 0,
                 .help   = "IO memory buffer offset alignment",
                 .def    = "0",
@@ -2479,7 +2481,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "verify",
                 .lname  = "Verify",
                 .type   = FIO_OPT_STR,
-               .off1   = td_var_offset(verify),
+               .off1   = offsetof(struct thread_options, verify),
                 .help   = "Verify data written",
                 .def    = "0",
                 .category = FIO_OPT_C_IO,
@@ -2556,7 +2558,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "do_verify",
                 .lname  = "Perform verify step",
                 .type   = FIO_OPT_BOOL,
-               .off1   = td_var_offset(do_verify),
+               .off1   = offsetof(struct thread_options, do_verify),
                 .help   = "Run verification stage after write",
                 .def    = "1",
                 .parent = "verify",
@@ -2568,7 +2570,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "verifysort",
                 .lname  = "Verify sort",
                 .type   = FIO_OPT_BOOL,
-               .off1   = td_var_offset(verifysort),
+               .off1   = offsetof(struct thread_options, verifysort),
                 .help   = "Sort written verify blocks for read back",
                 .def    = "1",
                 .parent = "verify",
@@ -2580,7 +2582,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "verifysort_nr",
                 .lname  = "Verify Sort Nr",
                 .type   = FIO_OPT_INT,
-               .off1   = td_var_offset(verifysort_nr),
+               .off1   = offsetof(struct thread_options, verifysort_nr),
                 .help   = "Pre-load and sort verify blocks for a read workload",
                 .minval = 0,
                 .maxval = 131072,
@@ -2593,7 +2595,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "verify_interval",
                 .lname  = "Verify interval",
                 .type   = FIO_OPT_INT,
-               .off1   = td_var_offset(verify_interval),
+               .off1   = offsetof(struct thread_options, verify_interval),
                 .minval = 2 * sizeof(struct verify_header),
                 .help   = "Store verify buffer header every N bytes",
                 .parent = "verify",
@@ -2607,7 +2609,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .lname  = "Verify offset",
                 .type   = FIO_OPT_INT,
                 .help   = "Offset verify header location by N bytes",
-               .off1   = td_var_offset(verify_offset),
+               .off1   = offsetof(struct thread_options, verify_offset),
                 .minval = sizeof(struct verify_header),
                 .parent = "verify",
                 .hide   = 1,
@@ -2619,7 +2621,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .lname  = "Verify pattern",
                 .type   = FIO_OPT_STR,
                 .cb     = str_verify_pattern_cb,
-               .off1   = td_var_offset(verify_pattern),
+               .off1   = offsetof(struct thread_options, verify_pattern),
                 .help   = "Fill pattern for IO buffers",
                 .parent = "verify",
                 .hide   = 1,
@@ -2630,7 +2632,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "verify_fatal",
                 .lname  = "Verify fatal",
                 .type   = FIO_OPT_BOOL,
-               .off1   = td_var_offset(verify_fatal),
+               .off1   = offsetof(struct thread_options, verify_fatal),
                 .def    = "0",
                 .help   = "Exit on a single verify failure, don't continue",
                 .parent = "verify",
@@ -2642,7 +2644,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "verify_dump",
                 .lname  = "Verify dump",
                 .type   = FIO_OPT_BOOL,
-               .off1   = td_var_offset(verify_dump),
+               .off1   = offsetof(struct thread_options, verify_dump),
                 .def    = "0",
                 .help   = "Dump contents of good and bad blocks on failure",
                 .parent = "verify",
@@ -2654,7 +2656,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "verify_async",
                 .lname  = "Verify asynchronously",
                 .type   = FIO_OPT_INT,
-               .off1   = td_var_offset(verify_async),
+               .off1   = offsetof(struct thread_options, verify_async),
                 .def    = "0",
                 .help   = "Number of async verifier threads to use",
                 .parent = "verify",
@@ -2666,7 +2668,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "verify_backlog",
                 .lname  = "Verify backlog",
                 .type   = FIO_OPT_STR_VAL,
-               .off1   = td_var_offset(verify_backlog),
+               .off1   = offsetof(struct thread_options, verify_backlog),
                 .help   = "Verify after this number of blocks are written",
                 .parent = "verify",
                 .hide   = 1,
@@ -2677,7 +2679,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "verify_backlog_batch",
                 .lname  = "Verify backlog batch",
                 .type   = FIO_OPT_INT,
-               .off1   = td_var_offset(verify_batch),
+               .off1   = offsetof(struct thread_options, verify_batch),
                 .help   = "Verify this number of IO blocks",
                 .parent = "verify",
                 .hide   = 1,
@@ -2690,7 +2692,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .lname  = "Async verify CPUs",
                 .type   = FIO_OPT_STR,
                 .cb     = str_verify_cpus_allowed_cb,
-               .off1   = td_var_offset(verify_cpumask),
+               .off1   = offsetof(struct thread_options, verify_cpumask),
                 .help   = "Set CPUs allowed for async verify threads",
                 .parent = "verify_async",
                 .hide   = 1,
@@ -2708,7 +2710,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
         {
                 .name   = "experimental_verify",
                 .lname  = "Experimental Verify",
-               .off1   = td_var_offset(experimental_verify),
+               .off1   = offsetof(struct thread_options, experimental_verify),
                 .type   = FIO_OPT_BOOL,
                 .help   = "Enable experimental verification",
                 .parent = "verify",
@@ -2718,7 +2720,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
         {
                 .name   = "verify_state_load",
                 .lname  = "Load verify state",
-               .off1   = td_var_offset(verify_state),
+               .off1   = offsetof(struct thread_options, verify_state),
                 .type   = FIO_OPT_BOOL,
                 .help   = "Load verify termination state",
                 .parent = "verify",
@@ -2728,7 +2730,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
         {
                 .name   = "verify_state_save",
                 .lname  = "Save verify state",
-               .off1   = td_var_offset(verify_state_save),
+               .off1   = offsetof(struct thread_options, verify_state_save),
                 .type   = FIO_OPT_BOOL,
                 .def    = "1",
                 .help   = "Save verify state on termination",
@@ -2741,7 +2743,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "trim_percentage",
                 .lname  = "Trim percentage",
                 .type   = FIO_OPT_INT,
-               .off1   = td_var_offset(trim_percentage),
+               .off1   = offsetof(struct thread_options, trim_percentage),
                 .minval = 0,
                 .maxval = 100,
                 .help   = "Number of verify blocks to discard/trim",
@@ -2757,7 +2759,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .lname  = "Verify trim zero",
                 .type   = FIO_OPT_BOOL,
                 .help   = "Verify that trim/discarded blocks are returned as zeroes",
-               .off1   = td_var_offset(trim_zero),
+               .off1   = offsetof(struct thread_options, trim_zero),
                 .parent = "trim_percentage",
                 .hide   = 1,
                 .def    = "1",
@@ -2768,7 +2770,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "trim_backlog",
                 .lname  = "Trim backlog",
                 .type   = FIO_OPT_STR_VAL,
-               .off1   = td_var_offset(trim_backlog),
+               .off1   = offsetof(struct thread_options, trim_backlog),
                 .help   = "Trim after this number of blocks are written",
                 .parent = "trim_percentage",
                 .hide   = 1,
@@ -2780,7 +2782,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "trim_backlog_batch",
                 .lname  = "Trim backlog batch",
                 .type   = FIO_OPT_INT,
-               .off1   = td_var_offset(trim_batch),
+               .off1   = offsetof(struct thread_options, trim_batch),
                 .help   = "Trim this number of IO blocks",
                 .parent = "trim_percentage",
                 .hide   = 1,
@@ -2818,7 +2820,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "write_iolog",
                 .lname  = "Write I/O log",
                 .type   = FIO_OPT_STR_STORE,
-               .off1   = td_var_offset(write_iolog_file),
+               .off1   = offsetof(struct thread_options, write_iolog_file),
                 .help   = "Store IO pattern to file",
                 .category = FIO_OPT_C_IO,
                 .group  = FIO_OPT_G_IOLOG,
@@ -2827,7 +2829,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "read_iolog",
                 .lname  = "Read I/O log",
                 .type   = FIO_OPT_STR_STORE,
-               .off1   = td_var_offset(read_iolog_file),
+               .off1   = offsetof(struct thread_options, read_iolog_file),
                 .help   = "Playback IO pattern from file",
                 .category = FIO_OPT_C_IO,
                 .group  = FIO_OPT_G_IOLOG,
@@ -2836,7 +2838,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "replay_no_stall",
                 .lname  = "Don't stall on replay",
                 .type   = FIO_OPT_BOOL,
-               .off1   = td_var_offset(no_stall),
+               .off1   = offsetof(struct thread_options, no_stall),
                 .def    = "0",
                 .parent = "read_iolog",
                 .hide   = 1,
@@ -2848,7 +2850,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "replay_redirect",
                 .lname  = "Redirect device for replay",
                 .type   = FIO_OPT_STR_STORE,
-               .off1   = td_var_offset(replay_redirect),
+               .off1   = offsetof(struct thread_options, replay_redirect),
                 .parent = "read_iolog",
                 .hide   = 1,
                 .help   = "Replay all I/O onto this device, regardless of trace device",
@@ -2859,7 +2861,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "replay_scale",
                 .lname  = "Replace offset scale factor",
                 .type   = FIO_OPT_INT,
-               .off1   = td_var_offset(replay_scale),
+               .off1   = offsetof(struct thread_options, replay_scale),
                 .parent = "read_iolog",
                 .def    = "1",
                 .help   = "Align offsets to this blocksize",
@@ -2870,7 +2872,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "replay_align",
                 .lname  = "Replace alignment",
                 .type   = FIO_OPT_INT,
-               .off1   = td_var_offset(replay_align),
+               .off1   = offsetof(struct thread_options, replay_align),
                 .parent = "read_iolog",
                 .help   = "Scale offset down by this factor",
                 .category = FIO_OPT_C_IO,
@@ -2881,7 +2883,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "exec_prerun",
                 .lname  = "Pre-execute runnable",
                 .type   = FIO_OPT_STR_STORE,
-               .off1   = td_var_offset(exec_prerun),
+               .off1   = offsetof(struct thread_options, exec_prerun),
                 .help   = "Execute this file prior to running job",
                 .category = FIO_OPT_C_GENERAL,
                 .group  = FIO_OPT_G_INVALID,
@@ -2890,7 +2892,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "exec_postrun",
                 .lname  = "Post-execute runnable",
                 .type   = FIO_OPT_STR_STORE,
-               .off1   = td_var_offset(exec_postrun),
+               .off1   = offsetof(struct thread_options, exec_postrun),
                 .help   = "Execute this file after running job",
                 .category = FIO_OPT_C_GENERAL,
                 .group  = FIO_OPT_G_INVALID,
@@ -2900,7 +2902,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "ioscheduler",
                 .lname  = "I/O scheduler",
                 .type   = FIO_OPT_STR_STORE,
-               .off1   = td_var_offset(ioscheduler),
+               .off1   = offsetof(struct thread_options, ioscheduler),
                 .help   = "Use this IO scheduler on the backing device",
                 .category = FIO_OPT_C_FILE,
                 .group  = FIO_OPT_G_INVALID,
@@ -2917,7 +2919,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "zonesize",
                 .lname  = "Zone size",
                 .type   = FIO_OPT_STR_VAL,
-               .off1   = td_var_offset(zone_size),
+               .off1   = offsetof(struct thread_options, zone_size),
                 .help   = "Amount of data to read per zone",
                 .def    = "0",
                 .interval = 1024 * 1024,
@@ -2928,7 +2930,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "zonerange",
                 .lname  = "Zone range",
                 .type   = FIO_OPT_STR_VAL,
-               .off1   = td_var_offset(zone_range),
+               .off1   = offsetof(struct thread_options, zone_range),
                 .help   = "Give size of an IO zone",
                 .def    = "0",
                 .interval = 1024 * 1024,
@@ -2939,7 +2941,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "zoneskip",
                 .lname  = "Zone skip",
                 .type   = FIO_OPT_STR_VAL,
-               .off1   = td_var_offset(zone_skip),
+               .off1   = offsetof(struct thread_options, zone_skip),
                 .help   = "Space between IO zones",
                 .def    = "0",
                 .interval = 1024 * 1024,
@@ -2950,7 +2952,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "lockmem",
                 .lname  = "Lock memory",
                 .type   = FIO_OPT_STR_VAL,
-               .off1   = td_var_offset(lockmem),
+               .off1   = offsetof(struct thread_options, lockmem),
                 .help   = "Lock down this amount of memory (per worker)",
                 .def    = "0",
                 .interval = 1024 * 1024,
@@ -2962,7 +2964,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .lname  = "Read/write mix read",
                 .type   = FIO_OPT_INT,
                 .cb     = str_rwmix_read_cb,
-               .off1   = td_var_offset(rwmix[DDIR_READ]),
+               .off1   = offsetof(struct thread_options, rwmix[DDIR_READ]),
                 .maxval = 100,
                 .help   = "Percentage of mixed workload that is reads",
                 .def    = "50",
@@ -2976,7 +2978,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .lname  = "Read/write mix write",
                 .type   = FIO_OPT_INT,
                 .cb     = str_rwmix_write_cb,
-               .off1   = td_var_offset(rwmix[DDIR_WRITE]),
+               .off1   = offsetof(struct thread_options, rwmix[DDIR_WRITE]),
                 .maxval = 100,
                 .help   = "Percentage of mixed workload that is writes",
                 .def    = "50",
@@ -2996,7 +2998,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "nice",
                 .lname  = "Nice",
                 .type   = FIO_OPT_INT,
-               .off1   = td_var_offset(nice),
+               .off1   = offsetof(struct thread_options, nice),
                 .help   = "Set job CPU nice value",
                 .minval = -19,
                 .maxval = 20,
@@ -3010,7 +3012,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "prio",
                 .lname  = "I/O nice priority",
                 .type   = FIO_OPT_INT,
-               .off1   = td_var_offset(ioprio),
+               .off1   = offsetof(struct thread_options, ioprio),
                 .help   = "Set job IO priority value",
                 .minval = IOPRIO_MIN_PRIO,
                 .maxval = IOPRIO_MAX_PRIO,
@@ -3034,7 +3036,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "prioclass",
                 .lname  = "I/O nice priority class",
                 .type   = FIO_OPT_INT,
-               .off1   = td_var_offset(ioprio_class),
+               .off1   = offsetof(struct thread_options, ioprio_class),
                 .help   = "Set job IO priority class",
                 .minval = IOPRIO_MIN_PRIO_CLASS,
                 .maxval = IOPRIO_MAX_PRIO_CLASS,
@@ -3054,7 +3056,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "thinktime",
                 .lname  = "Thinktime",
                 .type   = FIO_OPT_INT,
-               .off1   = td_var_offset(thinktime),
+               .off1   = offsetof(struct thread_options, thinktime),
                 .help   = "Idle time between IO buffers (usec)",
                 .def    = "0",
                 .is_time = 1,
@@ -3065,7 +3067,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "thinktime_spin",
                 .lname  = "Thinktime spin",
                 .type   = FIO_OPT_INT,
-               .off1   = td_var_offset(thinktime_spin),
+               .off1   = offsetof(struct thread_options, thinktime_spin),
                 .help   = "Start think time by spinning this amount (usec)",
                 .def    = "0",
                 .is_time = 1,
@@ -3078,7 +3080,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "thinktime_blocks",
                 .lname  = "Thinktime blocks",
                 .type   = FIO_OPT_INT,
-               .off1   = td_var_offset(thinktime_blocks),
+               .off1   = offsetof(struct thread_options, thinktime_blocks),
                 .help   = "IO buffer period between 'thinktime'",
                 .def    = "1",
                 .parent = "thinktime",
@@ -3090,9 +3092,9 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "rate",
                 .lname  = "I/O rate",
                 .type   = FIO_OPT_INT,
-               .off1   = td_var_offset(rate[DDIR_READ]),
-               .off2   = td_var_offset(rate[DDIR_WRITE]),
-               .off3   = td_var_offset(rate[DDIR_TRIM]),
+               .off1   = offsetof(struct thread_options, rate[DDIR_READ]),
+               .off2   = offsetof(struct thread_options, rate[DDIR_WRITE]),
+               .off3   = offsetof(struct thread_options, rate[DDIR_TRIM]),
                 .help   = "Set bandwidth rate",
                 .category = FIO_OPT_C_IO,
                 .group  = FIO_OPT_G_RATE,
@@ -3102,9 +3104,9 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .alias  = "ratemin",
                 .lname  = "I/O min rate",
                 .type   = FIO_OPT_INT,
-               .off1   = td_var_offset(ratemin[DDIR_READ]),
-               .off2   = td_var_offset(ratemin[DDIR_WRITE]),
-               .off3   = td_var_offset(ratemin[DDIR_TRIM]),
+               .off1   = offsetof(struct thread_options, ratemin[DDIR_READ]),
+               .off2   = offsetof(struct thread_options, ratemin[DDIR_WRITE]),
+               .off3   = offsetof(struct thread_options, ratemin[DDIR_TRIM]),
                 .help   = "Job must meet this rate or it will be shutdown",
                 .parent = "rate",
                 .hide   = 1,
@@ -3115,9 +3117,9 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "rate_iops",
                 .lname  = "I/O rate IOPS",
                 .type   = FIO_OPT_INT,
-               .off1   = td_var_offset(rate_iops[DDIR_READ]),
-               .off2   = td_var_offset(rate_iops[DDIR_WRITE]),
-               .off3   = td_var_offset(rate_iops[DDIR_TRIM]),
+               .off1   = offsetof(struct thread_options, rate_iops[DDIR_READ]),
+               .off2   = offsetof(struct thread_options, rate_iops[DDIR_WRITE]),
+               .off3   = offsetof(struct thread_options, rate_iops[DDIR_TRIM]),
                 .help   = "Limit IO used to this number of IO operations/sec",
                 .hide   = 1,
                 .category = FIO_OPT_C_IO,
@@ -3127,9 +3129,9 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "rate_iops_min",
                 .lname  = "I/O min rate IOPS",
                 .type   = FIO_OPT_INT,
-               .off1   = td_var_offset(rate_iops_min[DDIR_READ]),
-               .off2   = td_var_offset(rate_iops_min[DDIR_WRITE]),
-               .off3   = td_var_offset(rate_iops_min[DDIR_TRIM]),
+               .off1   = offsetof(struct thread_options, rate_iops_min[DDIR_READ]),
+               .off2   = offsetof(struct thread_options, rate_iops_min[DDIR_WRITE]),
+               .off3   = offsetof(struct thread_options, rate_iops_min[DDIR_TRIM]),
                 .help   = "Job must meet this rate or it will be shut down",
                 .parent = "rate_iops",
                 .hide   = 1,
@@ -3140,7 +3142,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "rate_process",
                 .lname  = "Rate Process",
                 .type   = FIO_OPT_STR,
-               .off1   = td_var_offset(rate_process),
+               .off1   = offsetof(struct thread_options, rate_process),
                 .help   = "What process controls how rated IO is managed",
                 .def    = "linear",
                 .category = FIO_OPT_C_IO,
@@ -3163,7 +3165,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .alias  = "ratecycle",
                 .lname  = "I/O rate cycle",
                 .type   = FIO_OPT_INT,
-               .off1   = td_var_offset(ratecycle),
+               .off1   = offsetof(struct thread_options, ratecycle),
                 .help   = "Window average for rate limits (msec)",
                 .def    = "1000",
                 .parent = "rate",
@@ -3175,7 +3177,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "max_latency",
                 .lname  = "Max Latency",
                 .type   = FIO_OPT_INT,
-               .off1   = td_var_offset(max_latency),
+               .off1   = offsetof(struct thread_options, max_latency),
                 .help   = "Maximum tolerated IO latency (usec)",
                 .is_time = 1,
                 .category = FIO_OPT_C_IO,
@@ -3185,7 +3187,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "latency_target",
                 .lname  = "Latency Target (usec)",
                 .type   = FIO_OPT_STR_VAL_TIME,
-               .off1   = td_var_offset(latency_target),
+               .off1   = offsetof(struct thread_options, latency_target),
                 .help   = "Ramp to max queue depth supporting this latency",
                 .is_time = 1,
                 .category = FIO_OPT_C_IO,
@@ -3195,7 +3197,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "latency_window",
                 .lname  = "Latency Window (usec)",
                 .type   = FIO_OPT_STR_VAL_TIME,
-               .off1   = td_var_offset(latency_window),
+               .off1   = offsetof(struct thread_options, latency_window),
                 .help   = "Time to sustain latency_target",
                 .is_time = 1,
                 .category = FIO_OPT_C_IO,
@@ -3205,7 +3207,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "latency_percentile",
                 .lname  = "Latency Percentile",
                 .type   = FIO_OPT_FLOAT_LIST,
-               .off1   = td_var_offset(latency_percentile),
+               .off1   = offsetof(struct thread_options, latency_percentile),
                 .help   = "Percentile of IOs must be below latency_target",
                 .def    = "100",
                 .maxlen = 1,
@@ -3218,7 +3220,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "invalidate",
                 .lname  = "Cache invalidate",
                 .type   = FIO_OPT_BOOL,
-               .off1   = td_var_offset(invalidate_cache),
+               .off1   = offsetof(struct thread_options, invalidate_cache),
                 .help   = "Invalidate buffer/page cache prior to running job",
                 .def    = "1",
                 .category = FIO_OPT_C_IO,
@@ -3228,7 +3230,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "sync",
                 .lname  = "Synchronous I/O",
                 .type   = FIO_OPT_BOOL,
-               .off1   = td_var_offset(sync_io),
+               .off1   = offsetof(struct thread_options, sync_io),
                 .help   = "Use O_SYNC for buffered writes",
                 .def    = "0",
                 .parent = "buffered",
@@ -3240,8 +3242,8 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "create_serialize",
                 .lname  = "Create serialize",
                 .type   = FIO_OPT_BOOL,
-               .off1   = td_var_offset(create_serialize),
-               .help   = "Serialize creating of job files",
+               .off1   = offsetof(struct thread_options, create_serialize),
+               .help   = "Serialize creation of job files",
                 .def    = "1",
                 .category = FIO_OPT_C_FILE,
                 .group  = FIO_OPT_G_INVALID,
@@ -3250,7 +3252,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "create_fsync",
                 .lname  = "Create fsync",
                 .type   = FIO_OPT_BOOL,
-               .off1   = td_var_offset(create_fsync),
+               .off1   = offsetof(struct thread_options, create_fsync),
                 .help   = "fsync file after creation",
                 .def    = "1",
                 .category = FIO_OPT_C_FILE,
@@ -3260,7 +3262,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "create_on_open",
                 .lname  = "Create on open",
                 .type   = FIO_OPT_BOOL,
-               .off1   = td_var_offset(create_on_open),
+               .off1   = offsetof(struct thread_options, create_on_open),
                 .help   = "Create files when they are opened for IO",
                 .def    = "0",
                 .category = FIO_OPT_C_FILE,
@@ -3270,7 +3272,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "create_only",
                 .lname  = "Create Only",
                 .type   = FIO_OPT_BOOL,
-               .off1   = td_var_offset(create_only),
+               .off1   = offsetof(struct thread_options, create_only),
                 .help   = "Only perform file creation phase",
                 .category = FIO_OPT_C_FILE,
                 .def    = "0",
@@ -3279,7 +3281,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "allow_file_create",
                 .lname  = "Allow file create",
                 .type   = FIO_OPT_BOOL,
-               .off1   = td_var_offset(allow_create),
+               .off1   = offsetof(struct thread_options, allow_create),
                 .help   = "Permit fio to create files, if they don't exist",
                 .def    = "1",
                 .category = FIO_OPT_C_FILE,
@@ -3289,7 +3291,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "allow_mounted_write",
                 .lname  = "Allow mounted write",
                 .type   = FIO_OPT_BOOL,
-               .off1   = td_var_offset(allow_mounted_write),
+               .off1   = offsetof(struct thread_options, allow_mounted_write),
                 .help   = "Allow writes to a mounted partition",
                 .def    = "0",
                 .category = FIO_OPT_C_FILE,
@@ -3299,7 +3301,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "pre_read",
                 .lname  = "Pre-read files",
                 .type   = FIO_OPT_BOOL,
-               .off1   = td_var_offset(pre_read),
+               .off1   = offsetof(struct thread_options, pre_read),
                 .help   = "Pre-read files before starting official testing",
                 .def    = "0",
                 .category = FIO_OPT_C_FILE,
@@ -3311,7 +3313,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .lname  = "CPU mask",
                 .type   = FIO_OPT_INT,
                 .cb     = str_cpumask_cb,
-               .off1   = td_var_offset(cpumask),
+               .off1   = offsetof(struct thread_options, cpumask),
                 .help   = "CPU affinity mask",
                 .category = FIO_OPT_C_GENERAL,
                 .group  = FIO_OPT_G_CRED,
@@ -3321,7 +3323,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .lname  = "CPUs allowed",
                 .type   = FIO_OPT_STR,
                 .cb     = str_cpus_allowed_cb,
-               .off1   = td_var_offset(cpumask),
+               .off1   = offsetof(struct thread_options, cpumask),
                 .help   = "Set CPUs allowed",
                 .category = FIO_OPT_C_GENERAL,
                 .group  = FIO_OPT_G_CRED,
@@ -3330,7 +3332,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "cpus_allowed_policy",
                 .lname  = "CPUs allowed distribution policy",
                 .type   = FIO_OPT_STR,
-               .off1   = td_var_offset(cpus_allowed_policy),
+               .off1   = offsetof(struct thread_options, cpus_allowed_policy),
                 .help   = "Distribution policy for cpus_allowed",
                 .parent = "cpus_allowed",
                 .prio   = 1,
@@ -3373,7 +3375,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .lname  = "NUMA CPU Nodes",
                 .type   = FIO_OPT_STR,
                 .cb     = str_numa_cpunodes_cb,
-               .off1   = td_var_offset(numa_cpunodes),
+               .off1   = offsetof(struct thread_options, numa_cpunodes),
                 .help   = "NUMA CPU nodes bind",
                 .category = FIO_OPT_C_GENERAL,
                 .group  = FIO_OPT_G_INVALID,
@@ -3383,7 +3385,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .lname  = "NUMA Memory Policy",
                 .type   = FIO_OPT_STR,
                 .cb     = str_numa_mpol_cb,
-               .off1   = td_var_offset(numa_memnodes),
+               .off1   = offsetof(struct thread_options, numa_memnodes),
                 .help   = "NUMA memory policy setup",
                 .category = FIO_OPT_C_GENERAL,
                 .group  = FIO_OPT_G_INVALID,
@@ -3406,7 +3408,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "end_fsync",
                 .lname  = "End fsync",
                 .type   = FIO_OPT_BOOL,
-               .off1   = td_var_offset(end_fsync),
+               .off1   = offsetof(struct thread_options, end_fsync),
                 .help   = "Include fsync at the end of job",
                 .def    = "0",
                 .category = FIO_OPT_C_FILE,
@@ -3416,7 +3418,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "fsync_on_close",
                 .lname  = "Fsync on close",
                 .type   = FIO_OPT_BOOL,
-               .off1   = td_var_offset(fsync_on_close),
+               .off1   = offsetof(struct thread_options, fsync_on_close),
                 .help   = "fsync files on close",
                 .def    = "0",
                 .category = FIO_OPT_C_FILE,
@@ -3426,12 +3428,22 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "unlink",
                 .lname  = "Unlink file",
                 .type   = FIO_OPT_BOOL,
-               .off1   = td_var_offset(unlink),
+               .off1   = offsetof(struct thread_options, unlink),
                 .help   = "Unlink created files after job has completed",
                 .def    = "0",
                 .category = FIO_OPT_C_FILE,
                 .group  = FIO_OPT_G_INVALID,
         },
+       {
+               .name   = "unlink_each_loop",
+               .lname  = "Unlink file after each loop of a job",
+               .type   = FIO_OPT_BOOL,
+               .off1   = offsetof(struct thread_options, unlink_each_loop),
+               .help   = "Unlink created files after each loop in a job has completed",
+               .def    = "0",
+               .category = FIO_OPT_C_FILE,
+               .group  = FIO_OPT_G_INVALID,
+       },
         {
                 .name   = "exitall",
                 .lname  = "Exit-all on terminate",
@@ -3445,7 +3457,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "exitall_on_error",
                 .lname  = "Exit-all on terminate in error",
                 .type   = FIO_OPT_STR_SET,
-               .off1   = td_var_offset(exitall_error),
+               .off1   = offsetof(struct thread_options, exitall_error),
                 .help   = "Terminate all jobs when one exits in error",
                 .category = FIO_OPT_C_GENERAL,
                 .group  = FIO_OPT_G_PROCESS,
@@ -3455,7 +3467,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .lname  = "Wait for previous",
                 .alias  = "wait_for_previous",
                 .type   = FIO_OPT_STR_SET,
-               .off1   = td_var_offset(stonewall),
+               .off1   = offsetof(struct thread_options, stonewall),
                 .help   = "Insert a hard barrier between this job and previous",
                 .category = FIO_OPT_C_GENERAL,
                 .group  = FIO_OPT_G_PROCESS,
@@ -3464,7 +3476,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "new_group",
                 .lname  = "New group",
                 .type   = FIO_OPT_STR_SET,
-               .off1   = td_var_offset(new_group),
+               .off1   = offsetof(struct thread_options, new_group),
                 .help   = "Mark the start of a new group (for reporting)",
                 .category = FIO_OPT_C_GENERAL,
                 .group  = FIO_OPT_G_PROCESS,
@@ -3473,7 +3485,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "thread",
                 .lname  = "Thread",
                 .type   = FIO_OPT_STR_SET,
-               .off1   = td_var_offset(use_thread),
+               .off1   = offsetof(struct thread_options, use_thread),
                 .help   = "Use threads instead of processes",
  #ifdef CONFIG_NO_SHM
                 .def    = "1",
@@ -3486,7 +3498,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "per_job_logs",
                 .lname  = "Per Job Logs",
                 .type   = FIO_OPT_BOOL,
-               .off1   = td_var_offset(per_job_logs),
+               .off1   = offsetof(struct thread_options, per_job_logs),
                 .help   = "Include job number in generated log files or not",
                 .def    = "1",
                 .category = FIO_OPT_C_LOG,
@@ -3496,7 +3508,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "write_bw_log",
                 .lname  = "Write bandwidth log",
                 .type   = FIO_OPT_STR_STORE,
-               .off1   = td_var_offset(bw_log_file),
+               .off1   = offsetof(struct thread_options, bw_log_file),
                 .help   = "Write log of bandwidth during run",
                 .category = FIO_OPT_C_LOG,
                 .group  = FIO_OPT_G_INVALID,
@@ -3505,7 +3517,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "write_lat_log",
                 .lname  = "Write latency log",
                 .type   = FIO_OPT_STR_STORE,
-               .off1   = td_var_offset(lat_log_file),
+               .off1   = offsetof(struct thread_options, lat_log_file),
                 .help   = "Write log of latency during run",
                 .category = FIO_OPT_C_LOG,
                 .group  = FIO_OPT_G_INVALID,
@@ -3514,7 +3526,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "write_iops_log",
                 .lname  = "Write IOPS log",
                 .type   = FIO_OPT_STR_STORE,
-               .off1   = td_var_offset(iops_log_file),
+               .off1   = offsetof(struct thread_options, iops_log_file),
                 .help   = "Write log of IOPS during run",
                 .category = FIO_OPT_C_LOG,
                 .group  = FIO_OPT_G_INVALID,
@@ -3523,17 +3535,48 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "log_avg_msec",
                 .lname  = "Log averaging (msec)",
                 .type   = FIO_OPT_INT,
-               .off1   = td_var_offset(log_avg_msec),
+               .off1   = offsetof(struct thread_options, log_avg_msec),
                 .help   = "Average bw/iops/lat logs over this period of time",
                 .def    = "0",
                 .category = FIO_OPT_C_LOG,
                 .group  = FIO_OPT_G_INVALID,
         },
+       {
+               .name   = "log_hist_msec",
+               .lname  = "Log histograms (msec)",
+               .type   = FIO_OPT_INT,
+               .off1   = offsetof(struct thread_options, log_hist_msec),
+               .help   = "Dump completion latency histograms at frequency of this time value",
+               .def    = "0",
+               .category = FIO_OPT_C_LOG,
+               .group  = FIO_OPT_G_INVALID,
+       },
+       {
+               .name   = "log_hist_coarseness",
+               .lname  = "Histogram logs coarseness",
+               .type   = FIO_OPT_INT,
+               .off1   = offsetof(struct thread_options, log_hist_coarseness),
+               .help   = "Integer in range [0,6]. Higher coarseness outputs"
+                       " fewer histogram bins per sample. The number of bins for"
+                       " these are [1216, 608, 304, 152, 76, 38, 19] respectively.",
+               .def    = "0",
+               .category = FIO_OPT_C_LOG,
+               .group  = FIO_OPT_G_INVALID,
+       },
+       {
+               .name   = "write_hist_log",
+               .lname  = "Write latency histogram logs",
+               .type   = FIO_OPT_STR_STORE,
+               .off1   = offsetof(struct thread_options, hist_log_file),
+               .help   = "Write log of latency histograms during run",
+               .category = FIO_OPT_C_LOG,
+               .group  = FIO_OPT_G_INVALID,
+       },
         {
                 .name   = "log_max_value",
                 .lname  = "Log maximum instead of average",
                 .type   = FIO_OPT_BOOL,
-               .off1   = td_var_offset(log_max),
+               .off1   = offsetof(struct thread_options, log_max),
                 .help   = "Log max sample in a window instead of average",
                 .def    = "0",
                 .category = FIO_OPT_C_LOG,
@@ -3543,7 +3586,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "log_offset",
                 .lname  = "Log offset of IO",
                 .type   = FIO_OPT_BOOL,
-               .off1   = td_var_offset(log_offset),
+               .off1   = offsetof(struct thread_options, log_offset),
                 .help   = "Include offset of IO for each log entry",
                 .def    = "0",
                 .category = FIO_OPT_C_LOG,
@@ -3554,7 +3597,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "log_compression",
                 .lname  = "Log compression",
                 .type   = FIO_OPT_INT,
-               .off1   = td_var_offset(log_gz),
+               .off1   = offsetof(struct thread_options, log_gz),
                 .help   = "Log in compressed chunks of this size",
                 .minval = 1024ULL,
                 .maxval = 512 * 1024 * 1024ULL,
@@ -3567,7 +3610,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .lname  = "Log Compression CPUs",
                 .type   = FIO_OPT_STR,
                 .cb     = str_log_cpus_allowed_cb,
-               .off1   = td_var_offset(log_gz_cpumask),
+               .off1   = offsetof(struct thread_options, log_gz_cpumask),
                 .parent = "log_compression",
                 .help   = "Limit log compression to these CPUs",
                 .category = FIO_OPT_C_LOG,
@@ -3585,7 +3628,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "log_store_compressed",
                 .lname  = "Log store compressed",
                 .type   = FIO_OPT_BOOL,
-               .off1   = td_var_offset(log_gz_store),
+               .off1   = offsetof(struct thread_options, log_gz_store),
                 .help   = "Store logs in a compressed format",
                 .category = FIO_OPT_C_LOG,
                 .group  = FIO_OPT_G_INVALID,
@@ -3604,11 +3647,20 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .help   = "Install libz-dev(el) to get compression support",
         },
  #endif
+       {
+               .name = "log_unix_epoch",
+               .lname = "Log epoch unix",
+               .type = FIO_OPT_BOOL,
+               .off1 = offsetof(struct thread_options, log_unix_epoch),
+               .help = "Use Unix time in log files",
+               .category = FIO_OPT_C_LOG,
+               .group = FIO_OPT_G_INVALID,
+       },
         {
                 .name   = "block_error_percentiles",
                 .lname  = "Block error percentiles",
                 .type   = FIO_OPT_BOOL,
-               .off1   = td_var_offset(block_error_hist),
+               .off1   = offsetof(struct thread_options, block_error_hist),
                 .help   = "Record trim block errors and make a histogram",
                 .def    = "0",
                 .category = FIO_OPT_C_LOG,
@@ -3618,7 +3670,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "bwavgtime",
                 .lname  = "Bandwidth average time",
                 .type   = FIO_OPT_INT,
-               .off1   = td_var_offset(bw_avg_time),
+               .off1   = offsetof(struct thread_options, bw_avg_time),
                 .help   = "Time window over which to calculate bandwidth"
                           " (msec)",
                 .def    = "500",
@@ -3632,7 +3684,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "iopsavgtime",
                 .lname  = "IOPS average time",
                 .type   = FIO_OPT_INT,
-               .off1   = td_var_offset(iops_avg_time),
+               .off1   = offsetof(struct thread_options, iops_avg_time),
                 .help   = "Time window over which to calculate IOPS (msec)",
                 .def    = "500",
                 .parent = "write_iops_log",
@@ -3645,7 +3697,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "group_reporting",
                 .lname  = "Group reporting",
                 .type   = FIO_OPT_STR_SET,
-               .off1   = td_var_offset(group_reporting),
+               .off1   = offsetof(struct thread_options, group_reporting),
                 .help   = "Do reporting on a per-group basis",
                 .category = FIO_OPT_C_STAT,
                 .group  = FIO_OPT_G_INVALID,
@@ -3654,7 +3706,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "zero_buffers",
                 .lname  = "Zero I/O buffers",
                 .type   = FIO_OPT_STR_SET,
-               .off1   = td_var_offset(zero_buffers),
+               .off1   = offsetof(struct thread_options, zero_buffers),
                 .help   = "Init IO buffers to all zeroes",
                 .category = FIO_OPT_C_IO,
                 .group  = FIO_OPT_G_IO_BUF,
@@ -3663,7 +3715,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "refill_buffers",
                 .lname  = "Refill I/O buffers",
                 .type   = FIO_OPT_STR_SET,
-               .off1   = td_var_offset(refill_buffers),
+               .off1   = offsetof(struct thread_options, refill_buffers),
                 .help   = "Refill IO buffers on every IO submit",
                 .category = FIO_OPT_C_IO,
                 .group  = FIO_OPT_G_IO_BUF,
@@ -3672,7 +3724,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "scramble_buffers",
                 .lname  = "Scramble I/O buffers",
                 .type   = FIO_OPT_BOOL,
-               .off1   = td_var_offset(scramble_buffers),
+               .off1   = offsetof(struct thread_options, scramble_buffers),
                 .help   = "Slightly scramble buffers on every IO submit",
                 .def    = "1",
                 .category = FIO_OPT_C_IO,
@@ -3683,7 +3735,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .lname  = "Buffer pattern",
                 .type   = FIO_OPT_STR,
                 .cb     = str_buffer_pattern_cb,
-               .off1   = td_var_offset(buffer_pattern),
+               .off1   = offsetof(struct thread_options, buffer_pattern),
                 .help   = "Fill pattern for IO buffers",
                 .category = FIO_OPT_C_IO,
                 .group  = FIO_OPT_G_IO_BUF,
@@ -3693,7 +3745,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .lname  = "Buffer compression percentage",
                 .type   = FIO_OPT_INT,
                 .cb     = str_buffer_compress_cb,
-               .off1   = td_var_offset(compress_percentage),
+               .off1   = offsetof(struct thread_options, compress_percentage),
                 .maxval = 100,
                 .minval = 0,
                 .help   = "How compressible the buffer is (approximately)",
@@ -3705,7 +3757,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "buffer_compress_chunk",
                 .lname  = "Buffer compression chunk size",
                 .type   = FIO_OPT_INT,
-               .off1   = td_var_offset(compress_chunk),
+               .off1   = offsetof(struct thread_options, compress_chunk),
                 .parent = "buffer_compress_percentage",
                 .hide   = 1,
                 .help   = "Size of compressible region in buffer",
@@ -3718,7 +3770,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .lname  = "Dedupe percentage",
                 .type   = FIO_OPT_INT,
                 .cb     = str_dedupe_cb,
-               .off1   = td_var_offset(dedupe_percentage),
+               .off1   = offsetof(struct thread_options, dedupe_percentage),
                 .maxval = 100,
                 .minval = 0,
                 .help   = "Percentage of buffers that are dedupable",
@@ -3730,7 +3782,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "clat_percentiles",
                 .lname  = "Completion latency percentiles",
                 .type   = FIO_OPT_BOOL,
-               .off1   = td_var_offset(clat_percentiles),
+               .off1   = offsetof(struct thread_options, clat_percentiles),
                 .help   = "Enable the reporting of completion latency percentiles",
                 .def    = "1",
                 .category = FIO_OPT_C_STAT,
@@ -3740,8 +3792,8 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "percentile_list",
                 .lname  = "Percentile list",
                 .type   = FIO_OPT_FLOAT_LIST,
-               .off1   = td_var_offset(percentile_list),
-               .off2   = td_var_offset(percentile_precision),
+               .off1   = offsetof(struct thread_options, percentile_list),
+               .off2   = offsetof(struct thread_options, percentile_precision),
                 .help   = "Specify a custom list of percentiles to report for "
                           "completion latency and block errors",
                 .def    = "1:5:10:20:30:40:50:60:70:80:90:95:99:99.5:99.9:99.95:99.99",
@@ -3757,7 +3809,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "disk_util",
                 .lname  = "Disk utilization",
                 .type   = FIO_OPT_BOOL,
-               .off1   = td_var_offset(do_disk_util),
+               .off1   = offsetof(struct thread_options, do_disk_util),
                 .help   = "Log disk utilization statistics",
                 .def    = "1",
                 .category = FIO_OPT_C_STAT,
@@ -3786,7 +3838,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "disable_lat",
                 .lname  = "Disable all latency stats",
                 .type   = FIO_OPT_BOOL,
-               .off1   = td_var_offset(disable_lat),
+               .off1   = offsetof(struct thread_options, disable_lat),
                 .help   = "Disable latency numbers",
                 .parent = "gtod_reduce",
                 .hide   = 1,
@@ -3798,7 +3850,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "disable_clat",
                 .lname  = "Disable completion latency stats",
                 .type   = FIO_OPT_BOOL,
-               .off1   = td_var_offset(disable_clat),
+               .off1   = offsetof(struct thread_options, disable_clat),
                 .help   = "Disable completion latency numbers",
                 .parent = "gtod_reduce",
                 .hide   = 1,
@@ -3810,7 +3862,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "disable_slat",
                 .lname  = "Disable submission latency stats",
                 .type   = FIO_OPT_BOOL,
-               .off1   = td_var_offset(disable_slat),
+               .off1   = offsetof(struct thread_options, disable_slat),
                 .help   = "Disable submission latency numbers",
                 .parent = "gtod_reduce",
                 .hide   = 1,
@@ -3822,7 +3874,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "disable_bw_measurement",
                 .lname  = "Disable bandwidth stats",
                 .type   = FIO_OPT_BOOL,
-               .off1   = td_var_offset(disable_bw),
+               .off1   = offsetof(struct thread_options, disable_bw),
                 .help   = "Disable bandwidth logging",
                 .parent = "gtod_reduce",
                 .hide   = 1,
@@ -3834,7 +3886,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "gtod_cpu",
                 .lname  = "Dedicated gettimeofday() CPU",
                 .type   = FIO_OPT_INT,
-               .off1   = td_var_offset(gtod_cpu),
+               .off1   = offsetof(struct thread_options, gtod_cpu),
                 .help   = "Set up dedicated gettimeofday() thread on this CPU",
                 .verify = gtod_cpu_verify,
                 .category = FIO_OPT_C_GENERAL,
@@ -3844,7 +3896,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "unified_rw_reporting",
                 .lname  = "Unified RW Reporting",
                 .type   = FIO_OPT_BOOL,
-               .off1   = td_var_offset(unified_rw_rep),
+               .off1   = offsetof(struct thread_options, unified_rw_rep),
                 .help   = "Unify reporting across data direction",
                 .def    = "0",
                 .category = FIO_OPT_C_GENERAL,
@@ -3854,7 +3906,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "continue_on_error",
                 .lname  = "Continue on error",
                 .type   = FIO_OPT_STR,
-               .off1   = td_var_offset(continue_on_error),
+               .off1   = offsetof(struct thread_options, continue_on_error),
                 .help   = "Continue on non-fatal errors during IO",
                 .def    = "none",
                 .category = FIO_OPT_C_GENERAL,
@@ -3899,7 +3951,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .lname  = "Ignore Error",
                 .type   = FIO_OPT_STR,
                 .cb     = str_ignore_error_cb,
-               .off1   = td_var_offset(ignore_error_nr),
+               .off1   = offsetof(struct thread_options, ignore_error_nr),
                 .help   = "Set a specific list of errors to ignore",
                 .parent = "rw",
                 .category = FIO_OPT_C_GENERAL,
@@ -3909,7 +3961,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "error_dump",
                 .lname  = "Error Dump",
                 .type   = FIO_OPT_BOOL,
-               .off1   = td_var_offset(error_dump),
+               .off1   = offsetof(struct thread_options, error_dump),
                 .def    = "0",
                 .help   = "Dump info on each error",
                 .category = FIO_OPT_C_GENERAL,
@@ -3919,7 +3971,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "profile",
                 .lname  = "Profile",
                 .type   = FIO_OPT_STR_STORE,
-               .off1   = td_var_offset(profile),
+               .off1   = offsetof(struct thread_options, profile),
                 .help   = "Select a specific builtin performance test",
                 .category = FIO_OPT_C_PROFILE,
                 .group  = FIO_OPT_G_INVALID,
@@ -3928,7 +3980,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "cgroup",
                 .lname  = "Cgroup",
                 .type   = FIO_OPT_STR_STORE,
-               .off1   = td_var_offset(cgroup),
+               .off1   = offsetof(struct thread_options, cgroup),
                 .help   = "Add job to cgroup of this name",
                 .category = FIO_OPT_C_GENERAL,
                 .group  = FIO_OPT_G_CGROUP,
@@ -3937,7 +3989,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "cgroup_nodelete",
                 .lname  = "Cgroup no-delete",
                 .type   = FIO_OPT_BOOL,
-               .off1   = td_var_offset(cgroup_nodelete),
+               .off1   = offsetof(struct thread_options, cgroup_nodelete),
                 .help   = "Do not delete cgroups after job completion",
                 .def    = "0",
                 .parent = "cgroup",
@@ -3948,7 +4000,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "cgroup_weight",
                 .lname  = "Cgroup weight",
                 .type   = FIO_OPT_INT,
-               .off1   = td_var_offset(cgroup_weight),
+               .off1   = offsetof(struct thread_options, cgroup_weight),
                 .help   = "Use given weight for cgroup",
                 .minval = 100,
                 .maxval = 1000,
@@ -3960,7 +4012,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "uid",
                 .lname  = "User ID",
                 .type   = FIO_OPT_INT,
-               .off1   = td_var_offset(uid),
+               .off1   = offsetof(struct thread_options, uid),
                 .help   = "Run job with this user ID",
                 .category = FIO_OPT_C_GENERAL,
                 .group  = FIO_OPT_G_CRED,
@@ -3969,7 +4021,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "gid",
                 .lname  = "Group ID",
                 .type   = FIO_OPT_INT,
-               .off1   = td_var_offset(gid),
+               .off1   = offsetof(struct thread_options, gid),
                 .help   = "Run job with this group ID",
                 .category = FIO_OPT_C_GENERAL,
                 .group  = FIO_OPT_G_CRED,
@@ -3978,7 +4030,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "kb_base",
                 .lname  = "KB Base",
                 .type   = FIO_OPT_INT,
-               .off1   = td_var_offset(kb_base),
+               .off1   = offsetof(struct thread_options, kb_base),
                 .prio   = 1,
                 .def    = "1024",
                 .posval = {
@@ -3999,7 +4051,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "unit_base",
                 .lname  = "Base unit for reporting (Bits or Bytes)",
                 .type   = FIO_OPT_INT,
-               .off1   = td_var_offset(unit_base),
+               .off1   = offsetof(struct thread_options, unit_base),
                 .prio   = 1,
                 .posval = {
                           { .ival = "0",
@@ -4023,7 +4075,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "hugepage-size",
                 .lname  = "Hugepage size",
                 .type   = FIO_OPT_INT,
-               .off1   = td_var_offset(hugepage_size),
+               .off1   = offsetof(struct thread_options, hugepage_size),
                 .help   = "When using hugepages, specify size of each page",
                 .def    = __fio_stringify(FIO_HUGE_PAGE),
                 .interval = 1024 * 1024,
@@ -4034,7 +4086,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "flow_id",
                 .lname  = "I/O flow ID",
                 .type   = FIO_OPT_INT,
-               .off1   = td_var_offset(flow_id),
+               .off1   = offsetof(struct thread_options, flow_id),
                 .help   = "The flow index ID to use",
                 .def    = "0",
                 .category = FIO_OPT_C_IO,
@@ -4044,7 +4096,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "flow",
                 .lname  = "I/O flow weight",
                 .type   = FIO_OPT_INT,
-               .off1   = td_var_offset(flow),
+               .off1   = offsetof(struct thread_options, flow),
                 .help   = "Weight for flow control of this job",
                 .parent = "flow_id",
                 .hide   = 1,
@@ -4056,7 +4108,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "flow_watermark",
                 .lname  = "I/O flow watermark",
                 .type   = FIO_OPT_INT,
-               .off1   = td_var_offset(flow_watermark),
+               .off1   = offsetof(struct thread_options, flow_watermark),
                 .help   = "High watermark for flow control. This option"
                         " should be set to the same value for all threads"
                         " with non-zero flow.",
@@ -4070,7 +4122,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "flow_sleep",
                 .lname  = "I/O flow sleep",
                 .type   = FIO_OPT_INT,
-               .off1   = td_var_offset(flow_sleep),
+               .off1   = offsetof(struct thread_options, flow_sleep),
                 .help   = "How many microseconds to sleep after being held"
                         " back by the flow control mechanism",
                 .parent = "flow_id",
@@ -4083,7 +4135,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "skip_bad",
                 .lname  = "Skip operations against bad blocks",
                 .type   = FIO_OPT_BOOL,
-               .off1   = td_var_offset(skip_bad),
+               .off1   = offsetof(struct thread_options, skip_bad),
                 .help   = "Skip operations against known bad blocks.",
                 .hide   = 1,
                 .def    = "0",
@@ -4433,7 +4485,7 @@ int fio_options_parse(struct thread_data *td, char **opts, int num_opts)
         for (ret = 0, i = 0, unknown = 0; i < num_opts; i++) {
                 struct fio_option *o;
                 int newret = parse_option(opts_copy[i], opts[i], fio_options,
-                                               &o, td, &td->opt_list);
+                                               &o, &td->o, &td->opt_list);
  
                 if (!newret && o)
                         fio_option_mark_set(&td->o, o);
@@ -4486,7 +4538,7 @@ int fio_cmd_option_parse(struct thread_data *td, const char *opt, char *val)
  {
         int ret;
  
-       ret = parse_cmd_option(opt, val, fio_options, td, &td->opt_list);
+       ret = parse_cmd_option(opt, val, fio_options, &td->o, &td->opt_list);
         if (!ret) {
                 struct fio_option *o;
  
@@ -4508,7 +4560,7 @@ int fio_cmd_ioengine_option_parse(struct thread_data *td, const char *opt,
  void fio_fill_default_options(struct thread_data *td)
  {
         td->o.magic = OPT_MAGIC;
-       fill_default_options(td, fio_options);
+       fill_default_options(&td->o, fio_options);
  }
  
  int fio_show_option_help(const char *opt)
@@ -4549,7 +4601,8 @@ void fio_options_mem_dupe(struct thread_data *td)
  
  unsigned int fio_get_kb_base(void *data)
  {
-       struct thread_options *o = data;
+       struct thread_data *td = cb_data_to_td(data);
+       struct thread_options *o = &td->o;
         unsigned int kb_base = 0;
  
         /*
@@ -4645,7 +4698,7 @@ void del_opt_posval(const char *optname, const char *ival)
  
  void fio_options_free(struct thread_data *td)
  {
-       options_free(fio_options, td);
+       options_free(fio_options, &td->o);
         if (td->eo && td->io_ops && td->io_ops->options) {
                 options_free(td->io_ops->options, td->eo);
                 free(td->eo);
diff --git a/options.h b/options.h

index 539a63667afb505ad6b61a8bc9b299297a8ed9dd..83a58e27d50718ba91d909795aec5f419440ca2a 100644 (file)
--- a/options.h
+++ b/options.h
@@ -9,8 +9,6 @@
  #include "flist.h"
  #include "lib/types.h"
  
-#define td_var_offset(var)     ((size_t) &((struct thread_options *)0)->var)
-
  int add_option(struct fio_option *);
  void invalidate_profile_options(const char *);
  extern char *exec_profile;
@@ -19,7 +17,6 @@ void add_opt_posval(const char *, const char *, const char *);
  void del_opt_posval(const char *, const char *);
  struct thread_data;
  void fio_options_free(struct thread_data *);
-char *get_name_idx(char *, int);
  int set_name_idx(char *, size_t, char *, int, bool);
  
  extern char client_sockaddr_str[];  /* used with --client option */
@@ -30,7 +27,7 @@ extern bool __fio_option_is_set(struct thread_options *, unsigned int off);
  
  #define fio_option_is_set(__td, name)                                  \
  ({                                                                     \
-       const unsigned int off = td_var_offset(name);                   \
+       const unsigned int off = offsetof(struct thread_options, name); \
         bool __r = __fio_option_is_set((__td), off);                    \
         __r;                                                            \
  })
diff --git a/oslib/libmtd.c b/oslib/libmtd.c

index 5c9eac2778c69a5b4bead766f70d74bdc0939705..5b22d6a8abb453cfd015be3f3f1ea8aef35d4e82 100644 (file)
--- a/oslib/libmtd.c
+++ b/oslib/libmtd.c
@@ -1190,7 +1190,7 @@ int mtd_write(libmtd_t desc, const struct mtd_dev_info *mtd, int fd, int eb,
         return 0;
  }
  
-int do_oob_op(libmtd_t desc, const struct mtd_dev_info *mtd, int fd,
+static int do_oob_op(libmtd_t desc, const struct mtd_dev_info *mtd, int fd,
               uint64_t start, uint64_t length, void *data, unsigned int cmd64,
               unsigned int cmd)
  {
diff --git a/oslib/linux-dev-lookup.c b/oslib/linux-dev-lookup.c

index 3a415dde00ea55a73467a691c2bdfb7a325ac1f8..2bbd14a070739c28a74e0be30d0723cbbb7eb589 100644 (file)
--- a/oslib/linux-dev-lookup.c
+++ b/oslib/linux-dev-lookup.c
@@ -6,6 +6,7 @@
  #include <unistd.h>
  
  #include "../os/os.h"
+#include "oslib/linux-dev-lookup.h"
  
  int blktrace_lookup_device(const char *redirect, char *path, unsigned int maj,
                            unsigned int min)
diff --git a/oslib/strlcat.c b/oslib/strlcat.c

index 643d4966b4e5369b72c8df9b320b880fc7ca160a..3329b8339fa2409930b6e15bcdb7e2d9d6da0908 100644 (file)
--- a/oslib/strlcat.c
+++ b/oslib/strlcat.c
@@ -1,4 +1,5 @@
  #include <string.h>
+#include "oslib/strlcat.h"
  
  size_t strlcat(char *dst, const char *src, size_t size)
  {
diff --git a/parse.c b/parse.c

index 086f7864b8b6eb16ee0aa6527e86a32079833bfd..8ed4619e6e08f765b783e3c59f3bf7b5632faf2d 100644 (file)
--- a/parse.c
+++ b/parse.c
@@ -1250,7 +1250,7 @@ void fill_default_options(void *data, struct fio_option *options)
                         handle_option(o, o->def, data);
  }
  
-void option_init(struct fio_option *o)
+static void option_init(struct fio_option *o)
  {
         if (o->type == FIO_OPT_DEPRECATED || o->type == FIO_OPT_UNSUPPORTED)
                 return;
diff --git a/parse.h b/parse.h

index aa00a679ef4fd918f0260b25d0e32b38ae83ad7b..7ba4e37b984bd46b17092acbc0c03d120f0c1f57 100644 (file)
--- a/parse.h
+++ b/parse.h
@@ -80,14 +80,11 @@ struct fio_option {
         int pow2;                       /* must be a power-of-2 */
  };
  
-typedef int (str_cb_fn)(void *, char *);
-
  extern int parse_option(char *, const char *, struct fio_option *, struct fio_option **, void *, struct flist_head *);
  extern void sort_options(char **, struct fio_option *, int);
  extern int parse_cmd_option(const char *t, const char *l, struct fio_option *, void *, struct flist_head *);
  extern int show_cmd_help(struct fio_option *, const char *);
  extern void fill_default_options(void *, struct fio_option *);
-extern void option_init(struct fio_option *);
  extern void options_init(struct fio_option *);
  extern void options_free(struct fio_option *, void *);
  
@@ -107,18 +104,19 @@ extern int string_distance_ok(const char *s1, int dist);
  typedef int (fio_opt_str_fn)(void *, const char *);
  typedef int (fio_opt_str_val_fn)(void *, long long *);
  typedef int (fio_opt_int_fn)(void *, int *);
-typedef int (fio_opt_str_set_fn)(void *);
-
-#define __td_var(start, offset)        ((char *) start + (offset))
  
  struct thread_options;
  static inline void *td_var(struct thread_options *to, struct fio_option *o,
                            unsigned int offset)
  {
+       void *ret;
+
         if (o->prof_opts)
-               return __td_var(o->prof_opts, offset);
+               ret = o->prof_opts;
+       else
+               ret = to;
  
-       return __td_var(to, offset);
+       return (char *) ret + offset;
  }
  
  static inline int parse_is_percent(unsigned long long val)
diff --git a/rate-submit.c b/rate-submit.c

index 0c31f29f5aaafcc27c7dce14d66072f5c5567b08..42927ffeaf4125dcefccc8fe964d2bc448197fea 100644 (file)
--- a/rate-submit.c
+++ b/rate-submit.c
@@ -19,7 +19,7 @@ static int io_workqueue_fn(struct submit_worker *sw,
  
         dprint(FD_RATE, "io_u %p queued by %u\n", io_u, gettid());
  
-       io_u_set(io_u, IO_U_F_NO_FILE_PUT);
+       io_u_set(td, io_u, IO_U_F_NO_FILE_PUT);
  
         td->cur_depth++;
  
@@ -30,7 +30,7 @@ static int io_workqueue_fn(struct submit_worker *sw,
                 ret = io_u_queued_complete(td, 1);
                 if (ret > 0)
                         td->cur_depth -= ret;
-               io_u_clear(io_u, IO_U_F_FLIGHT);
+               io_u_clear(td, io_u, IO_U_F_FLIGHT);
         } while (1);
  
         dprint(FD_RATE, "io_u %p ret %d by %u\n", io_u, ret, gettid());
@@ -110,9 +110,6 @@ static int io_workqueue_init_worker_fn(struct submit_worker *sw)
         if (ioengine_load(td))
                 goto err;
  
-       if (td->o.odirect)
-               td->io_ops->flags |= FIO_RAWIO;
-
         td->pid = gettid();
  
         INIT_FLIST_HEAD(&td->io_log_list);
@@ -126,7 +123,7 @@ static int io_workqueue_init_worker_fn(struct submit_worker *sw)
         if (td_io_init(td))
                 goto err_io_init;
  
-       fio_gettime(&td->epoch, NULL);
+       set_epoch_time(td, td->o.log_unix_epoch);
         fio_getrusage(&td->ru_start);
         clear_io_state(td, 1);
  
diff --git a/server.c b/server.c

index 667a66c0781e5e189c9c9d8fc92058a29eee7a03..38626998f279467695fbb27d161ada37f5adbedd 100644 (file)
--- a/server.c
+++ b/server.c
@@ -114,6 +114,7 @@ static const char *fio_server_ops[FIO_NET_CMD_NR] = {
         "LOAD_FILE",
         "VTRIGGER",
         "SENDFILE",
+       "JOB_OPT",
  };
  
  static void sk_lock(struct sk_out *sk_out)
@@ -621,7 +622,7 @@ static int fio_net_queue_quit(void)
  {
         dprint(FD_NET, "server: sending quit\n");
  
-       return fio_net_queue_cmd(FIO_NET_CMD_QUIT, NULL, 0, 0, SK_F_SIMPLE);
+       return fio_net_queue_cmd(FIO_NET_CMD_QUIT, NULL, 0, NULL, SK_F_SIMPLE);
  }
  
  int fio_net_send_quit(int sk)
@@ -1653,6 +1654,102 @@ void fio_server_send_du(void)
  }
  
  #ifdef CONFIG_ZLIB
+
+static inline void __fio_net_prep_tail(z_stream *stream, void *out_pdu,
+                                       struct sk_entry **last_entry,
+                                       struct sk_entry *first)
+{
+       unsigned int this_len = FIO_SERVER_MAX_FRAGMENT_PDU - stream->avail_out;
+
+       *last_entry = fio_net_prep_cmd(FIO_NET_CMD_IOLOG, out_pdu, this_len,
+                                NULL, SK_F_VEC | SK_F_INLINE | SK_F_FREE);
+       flist_add_tail(&(*last_entry)->list, &first->next);
+
+}
+
+/*
+ * Deflates the next input given, creating as many new packets in the
+ * linked list as necessary.
+ */
+static int __deflate_pdu_buffer(void *next_in, unsigned int next_sz, void **out_pdu,
+                               struct sk_entry **last_entry, z_stream *stream,
+                               struct sk_entry *first)
+{
+       int ret;
+
+       stream->next_in = next_in;
+       stream->avail_in = next_sz;
+       do {
+               if (! stream->avail_out) {
+
+                       __fio_net_prep_tail(stream, *out_pdu, last_entry, first);
+
+                       *out_pdu = malloc(FIO_SERVER_MAX_FRAGMENT_PDU);
+
+                       stream->avail_out = FIO_SERVER_MAX_FRAGMENT_PDU;
+                       stream->next_out = *out_pdu;
+               }
+
+               ret = deflate(stream, Z_BLOCK);
+
+               if (ret < 0) {
+                       free(*out_pdu);
+                       return 1;
+               }
+       } while (stream->avail_in);
+
+       return 0;
+}
+
+static int __fio_append_iolog_gz_hist(struct sk_entry *first, struct io_log *log,
+                                     struct io_logs *cur_log, z_stream *stream)
+{
+       struct sk_entry *entry;
+       void *out_pdu;
+       int ret, i, j;
+       int sample_sz = log_entry_sz(log);
+
+       out_pdu = malloc(FIO_SERVER_MAX_FRAGMENT_PDU);
+       stream->avail_out = FIO_SERVER_MAX_FRAGMENT_PDU;
+       stream->next_out = out_pdu;
+
+       for (i = 0; i < cur_log->nr_samples; i++) {
+               struct io_sample *s;
+               struct io_u_plat_entry *cur_plat_entry, *prev_plat_entry;
+               unsigned int *cur_plat, *prev_plat;
+
+               s = get_sample(log, cur_log, i);
+               ret = __deflate_pdu_buffer(s, sample_sz, &out_pdu, &entry, stream, first);
+               if (ret)
+                       return ret;
+
+               /* Do the subtraction on server side so that client doesn't have to
+                * reconstruct our linked list from packets.
+                */
+               cur_plat_entry  = s->plat_entry;
+               prev_plat_entry = flist_first_entry(&cur_plat_entry->list, struct io_u_plat_entry, list);
+               cur_plat  = cur_plat_entry->io_u_plat;
+               prev_plat = prev_plat_entry->io_u_plat;
+
+               for (j = 0; j < FIO_IO_U_PLAT_NR; j++) {
+                       cur_plat[j] -= prev_plat[j];
+               }
+
+               flist_del(&prev_plat_entry->list);
+               free(prev_plat_entry);
+
+               ret = __deflate_pdu_buffer(cur_plat_entry, sizeof(*cur_plat_entry),
+                                          &out_pdu, &entry, stream, first);
+
+               if (ret)
+                       return ret;
+       }
+
+       __fio_net_prep_tail(stream, out_pdu, &entry, first);
+
+       return 0;
+}
+
  static int __fio_append_iolog_gz(struct sk_entry *first, struct io_log *log,
                                  struct io_logs *cur_log, z_stream *stream)
  {
@@ -1660,6 +1757,9 @@ static int __fio_append_iolog_gz(struct sk_entry *first, struct io_log *log,
         void *out_pdu;
         int ret;
  
+       if (log->log_type == IO_LOG_TYPE_HIST)
+               return __fio_append_iolog_gz_hist(first, log, cur_log, stream);
+
         stream->next_in = (void *) cur_log->log;
         stream->avail_in = cur_log->nr_samples * log_entry_sz(log);
  
@@ -1804,6 +1904,7 @@ int fio_send_iolog(struct thread_data *td, struct io_log *log, const char *name)
         pdu.nr_samples = cpu_to_le64(iolog_nr_samples(log));
         pdu.thread_number = cpu_to_le32(td->thread_number);
         pdu.log_type = cpu_to_le32(log->log_type);
+       pdu.log_hist_coarseness = cpu_to_le32(log->hist_coarseness);
  
         if (!flist_empty(&log->chunk_list))
                 pdu.compressed = __cpu_to_le32(STORE_COMPRESSED);
@@ -1882,7 +1983,7 @@ void fio_server_send_start(struct thread_data *td)
  
         assert(sk_out->sk != -1);
  
-       fio_net_queue_cmd(FIO_NET_CMD_SERVER_START, NULL, 0, 0, SK_F_SIMPLE);
+       fio_net_queue_cmd(FIO_NET_CMD_SERVER_START, NULL, 0, NULL, SK_F_SIMPLE);
  }
  
  int fio_server_get_verify_state(const char *name, int threadnumber,
diff --git a/server.h b/server.h

index 79c751de238800aa3ed0dc362418bf421d2f8128..6c572a180cff40abba3484041789e13ec6a95438 100644 (file)
--- a/server.h
+++ b/server.h
@@ -38,7 +38,7 @@ struct fio_net_cmd_reply {
  };
  
  enum {
-       FIO_SERVER_VER                  = 54,
+       FIO_SERVER_VER                  = 57,
  
         FIO_SERVER_MAX_FRAGMENT_PDU     = 1024,
         FIO_SERVER_MAX_CMD_MB           = 2048,
@@ -183,6 +183,7 @@ struct cmd_iolog_pdu {
         uint32_t log_type;
         uint32_t compressed;
         uint32_t log_offset;
+       uint32_t log_hist_coarseness;
         uint8_t name[FIO_NET_NAME_MAX];
         struct io_sample samples[0];
  };
diff --git a/stat.c b/stat.c

index 7a35117a6f1998faae15cae2c1e3b660205470b7..74c2686c660ca33b76b12e85eefab3a68e933e7c 100644 (file)
--- a/stat.c
+++ b/stat.c
@@ -257,13 +257,13 @@ out:
                 free(ovals);
  }
  
-int calc_lat(struct io_stat *is, unsigned long *min, unsigned long *max,
-            double *mean, double *dev)
+bool calc_lat(struct io_stat *is, unsigned long *min, unsigned long *max,
+             double *mean, double *dev)
  {
         double n = (double) is->samples;
  
         if (n == 0)
-               return 0;
+               return false;
  
         *min = is->min_val;
         *max = is->max_val;
@@ -274,7 +274,7 @@ int calc_lat(struct io_stat *is, unsigned long *min, unsigned long *max,
         else
                 *dev = 0;
  
-       return 1;
+       return true;
  }
  
  void show_group_stats(struct group_run_stats *rs, struct buf_output *out)
@@ -364,7 +364,7 @@ static void display_lat(const char *name, unsigned long min, unsigned long max,
         const char *base = "(usec)";
         char *minp, *maxp;
  
-       if (!usec_to_msec(&min, &max, &mean, &dev))
+       if (usec_to_msec(&min, &max, &mean, &dev))
                 base = "(msec)";
  
         minp = num2str(min, 6, 1, 0, 0);
@@ -1090,8 +1090,8 @@ static void show_thread_status_terse_v3_v4(struct thread_stat *ts,
         log_buf(out, "\n");
  }
  
-void json_add_job_opts(struct json_object *root, const char *name,
-                      struct flist_head *opt_list, bool num_jobs)
+static void json_add_job_opts(struct json_object *root, const char *name,
+                             struct flist_head *opt_list, bool num_jobs)
  {
         struct json_object *dir_object;
         struct flist_head *entry;
@@ -1965,6 +1965,7 @@ void regrow_logs(struct thread_data *td)
  {
         regrow_log(td->slat_log);
         regrow_log(td->clat_log);
+       regrow_log(td->clat_hist_log);
         regrow_log(td->lat_log);
         regrow_log(td->bw_log);
         regrow_log(td->iops_log);
@@ -2019,7 +2020,7 @@ static void __add_log_sample(struct io_log *iolog, unsigned long val,
                 s = get_sample(iolog, cur_log, cur_log->nr_samples);
  
                 s->val = val;
-               s->time = t;
+               s->time = t + iolog->td->unix_epoch;
                 io_sample_set_ddir(iolog, s, ddir);
                 s->bs = bs;
  
@@ -2139,7 +2140,9 @@ static long add_log_sample(struct thread_data *td, struct io_log *iolog,
          * need to do.
          */
         this_window = elapsed - iolog->avg_last;
-       if (this_window < iolog->avg_msec) {
+       if (elapsed < iolog->avg_last)
+               return iolog->avg_last - elapsed;
+       else if (this_window < iolog->avg_msec) {
                 int diff = iolog->avg_msec - this_window;
  
                 if (inline_log(iolog) || diff > LOG_MSEC_SLACK)
@@ -2193,7 +2196,9 @@ static void add_clat_percentile_sample(struct thread_stat *ts,
  void add_clat_sample(struct thread_data *td, enum fio_ddir ddir,
                      unsigned long usec, unsigned int bs, uint64_t offset)
  {
+       unsigned long elapsed, this_window;
         struct thread_stat *ts = &td->ts;
+       struct io_log *iolog = td->clat_hist_log;
  
         td_io_u_lock(td);
  
@@ -2205,6 +2210,44 @@ void add_clat_sample(struct thread_data *td, enum fio_ddir ddir,
         if (ts->clat_percentiles)
                 add_clat_percentile_sample(ts, usec, ddir);
  
+       if (iolog && iolog->hist_msec) {
+               struct io_hist *hw = &iolog->hist_window[ddir];
+
+               hw->samples++;
+               elapsed = mtime_since_now(&td->epoch);
+               if (!hw->hist_last)
+                       hw->hist_last = elapsed;
+               this_window = elapsed - hw->hist_last;
+               
+               if (this_window >= iolog->hist_msec) {
+                       unsigned int *io_u_plat;
+                       struct io_u_plat_entry *dst;
+
+                       /*
+                        * Make a byte-for-byte copy of the latency histogram
+                        * stored in td->ts.io_u_plat[ddir], recording it in a
+                        * log sample. Note that the matching call to free() is
+                        * located in iolog.c after printing this sample to the
+                        * log file.
+                        */
+                       io_u_plat = (unsigned int *) td->ts.io_u_plat[ddir];
+                       dst = malloc(sizeof(struct io_u_plat_entry));
+                       memcpy(&(dst->io_u_plat), io_u_plat,
+                               FIO_IO_U_PLAT_NR * sizeof(unsigned int));
+                       flist_add(&dst->list, &hw->list);
+                       __add_log_sample(iolog, (unsigned long)dst, ddir, bs,
+                                               elapsed, offset);
+
+                       /*
+                        * Update the last time we recorded as being now, minus
+                        * any drift in time we encountered before actually
+                        * making the record.
+                        */
+                       hw->hist_last = elapsed - (this_window - iolog->hist_msec);
+                       hw->samples = 0;
+               }
+       }
+
         td_io_u_unlock(td);
  }
  
@@ -2414,12 +2457,12 @@ int calc_log_samples(void)
                         next = min(td->o.iops_avg_time, td->o.bw_avg_time);
                         continue;
                 }
-               if (!per_unit_log(td->bw_log)) {
+               if (td->bw_log && !per_unit_log(td->bw_log)) {
                         tmp = add_bw_samples(td, &now);
                         if (tmp < next)
                                 next = tmp;
                 }
-               if (!per_unit_log(td->iops_log)) {
+               if (td->iops_log && !per_unit_log(td->iops_log)) {
                         tmp = add_iops_samples(td, &now);
                         if (tmp < next)
                                 next = tmp;
diff --git a/stat.h b/stat.h

index 86f1a0b5f91af45af3288b7b37ba74ec84efec79..e6f77599310db55e996d55cfc9ad7f5513f11963 100644 (file)
--- a/stat.h
+++ b/stat.h
@@ -240,6 +240,11 @@ struct jobs_eta {
         uint8_t run_str[];
  } __attribute__((packed));
  
+struct io_u_plat_entry {
+       struct flist_head list;
+       unsigned int io_u_plat[FIO_IO_U_PLAT_NR];
+};
+
  extern struct fio_mutex *stat_mutex;
  
  extern struct jobs_eta *get_jobs_eta(bool force, size_t *size);
@@ -249,7 +254,7 @@ extern void stat_exit(void);
  
  extern struct json_object * show_thread_status(struct thread_stat *ts, struct group_run_stats *rs, struct flist_head *, struct buf_output *);
  extern void show_group_stats(struct group_run_stats *rs, struct buf_output *);
-extern int calc_thread_status(struct jobs_eta *je, int force);
+extern bool calc_thread_status(struct jobs_eta *je, int force);
  extern void display_thread_status(struct jobs_eta *je);
  extern void show_run_stats(void);
  extern void __show_run_stats(void);
@@ -261,7 +266,7 @@ extern void sum_group_stats(struct group_run_stats *dst, struct group_run_stats
  extern void init_thread_stat(struct thread_stat *ts);
  extern void init_group_run_stat(struct group_run_stats *gs);
  extern void eta_to_str(char *str, unsigned long eta_sec);
-extern int calc_lat(struct io_stat *is, unsigned long *min, unsigned long *max, double *mean, double *dev);
+extern bool calc_lat(struct io_stat *is, unsigned long *min, unsigned long *max, double *mean, double *dev);
  extern unsigned int calc_clat_percentiles(unsigned int *io_u_plat, unsigned long nr, fio_fp64_t *plist, unsigned int **output, unsigned int *maxv, unsigned int *minv);
  extern void stat_calc_lat_m(struct thread_stat *ts, double *io_u_lat);
  extern void stat_calc_lat_u(struct thread_stat *ts, double *io_u_lat);
@@ -286,18 +291,18 @@ extern int calc_log_samples(void);
  extern struct io_log *agg_io_log[DDIR_RWDIR_CNT];
  extern int write_bw_log;
  
-static inline int usec_to_msec(unsigned long *min, unsigned long *max,
-                              double *mean, double *dev)
+static inline bool usec_to_msec(unsigned long *min, unsigned long *max,
+                               double *mean, double *dev)
  {
         if (*min > 1000 && *max > 1000 && *mean > 1000.0 && *dev > 1000.0) {
                 *min /= 1000;
                 *max /= 1000;
                 *mean /= 1000.0;
                 *dev /= 1000.0;
-               return 0;
+               return true;
         }
  
-       return 1;
+       return false;
  }
  /*
   * Worst level condensing would be 1:5, so allow enough room for that
diff --git a/thread_options.h b/thread_options.h

index edf090da28d45880474badb129e6cbc2a08103f6..1b4590f751f0c72a7345ee730a52ac6bde360fa3 100644 (file)
--- a/thread_options.h
+++ b/thread_options.h
@@ -121,6 +121,7 @@ struct thread_options {
         unsigned int verify_state_save;
         unsigned int use_thread;
         unsigned int unlink;
+       unsigned int unlink_each_loop;
         unsigned int do_disk_util;
         unsigned int override_sync;
         unsigned int rand_repeatable;
@@ -128,10 +129,13 @@ struct thread_options {
         unsigned long long rand_seed;
         unsigned int dep_use_os_rand;
         unsigned int log_avg_msec;
+       unsigned int log_hist_msec;
+       unsigned int log_hist_coarseness;
         unsigned int log_max;
         unsigned int log_offset;
         unsigned int log_gz;
         unsigned int log_gz_store;
+       unsigned int log_unix_epoch;
         unsigned int norandommap;
         unsigned int softrandommap;
         unsigned int bs_unaligned;
@@ -232,6 +236,7 @@ struct thread_options {
         char *bw_log_file;
         char *lat_log_file;
         char *iops_log_file;
+       char *hist_log_file;
         char *replay_redirect;
  
         /*
@@ -375,6 +380,7 @@ struct thread_options_pack {
         uint32_t verify_state_save;
         uint32_t use_thread;
         uint32_t unlink;
+       uint32_t unlink_each_loop;
         uint32_t do_disk_util;
         uint32_t override_sync;
         uint32_t rand_repeatable;
@@ -382,10 +388,13 @@ struct thread_options_pack {
         uint64_t rand_seed;
         uint32_t dep_use_os_rand;
         uint32_t log_avg_msec;
+       uint32_t log_hist_msec;
+       uint32_t log_hist_coarseness;
         uint32_t log_max;
         uint32_t log_offset;
         uint32_t log_gz;
         uint32_t log_gz_store;
+       uint32_t log_unix_epoch;
         uint32_t norandommap;
         uint32_t softrandommap;
         uint32_t bs_unaligned;
@@ -486,6 +495,7 @@ struct thread_options_pack {
         uint8_t bw_log_file[FIO_TOP_STR_MAX];
         uint8_t lat_log_file[FIO_TOP_STR_MAX];
         uint8_t iops_log_file[FIO_TOP_STR_MAX];
+       uint8_t hist_log_file[FIO_TOP_STR_MAX];
         uint8_t replay_redirect[FIO_TOP_STR_MAX];
  
         /*
diff --git a/time.c b/time.c

index f1c5d3fe613e85c4ce0713701aee5c45afdcabdd..f5dc04969f3d47020fb4f61fa3ddf5dcfcdf0e09 100644 (file)
--- a/time.c
+++ b/time.c
@@ -151,6 +151,17 @@ void set_genesis_time(void)
         fio_gettime(&genesis, NULL);
  }
  
+void set_epoch_time(struct thread_data *td, int log_unix_epoch)
+{
+       fio_gettime(&td->epoch, NULL);
+       if (log_unix_epoch) {
+               struct timeval tv;
+               gettimeofday(&tv, NULL);
+               td->unix_epoch = (unsigned long long)(tv.tv_sec) * 1000 +
+                                (unsigned long long)(tv.tv_usec) / 1000;
+       }
+}
+
  void fill_start_time(struct timeval *t)
  {
         memcpy(t, &genesis, sizeof(genesis));
diff --git a/tools/hist/.gitignore b/tools/hist/.gitignore

new file mode 100644 (file)

index 0000000..4f875da
--- /dev/null
+++ b/tools/hist/.gitignore
@@ -0,0 +1,3 @@
+*.pyc
+*.ipynb
+.ipynb_checkpoints
diff --git a/tools/hist/fiologparser_hist.py b/tools/hist/fiologparser_hist.py

new file mode 100755 (executable)

index 0000000..ead5e54
--- /dev/null
+++ b/tools/hist/fiologparser_hist.py
@@ -0,0 +1,388 @@
+#!/usr/bin/env python2.7
+""" 
+    Utility for converting *_clat_hist* files generated by fio into latency statistics.
+    
+    Example usage:
+    
+            $ fiologparser_hist.py *_clat_hist*
+            end-time, samples, min, avg, median, 90%, 95%, 99%, max
+            1000, 15, 192, 1678.107, 1788.859, 1856.076, 1880.040, 1899.208, 1888.000
+            2000, 43, 152, 1642.368, 1714.099, 1816.659, 1845.552, 1888.131, 1888.000
+            4000, 39, 1152, 1546.962, 1545.785, 1627.192, 1640.019, 1691.204, 1744
+            ...
+    
+    @author Karl Cronburg <karl.cronburg@gmail.com>
+"""
+import os
+import sys
+import pandas
+import numpy as np
+
+err = sys.stderr.write
+
+def weighted_percentile(percs, vs, ws):
+    """ Use linear interpolation to calculate the weighted percentile.
+        
+        Value and weight arrays are first sorted by value. The cumulative
+        distribution function (cdf) is then computed, after which np.interp
+        finds the two values closest to our desired weighted percentile(s)
+        and linearly interpolates them.
+        
+        percs  :: List of percentiles we want to calculate
+        vs     :: Array of values we are computing the percentile of
+        ws     :: Array of weights for our corresponding values
+        return :: Array of percentiles
+    """
+    idx = np.argsort(vs)
+    vs, ws = vs[idx], ws[idx] # weights and values sorted by value
+    cdf = 100 * (ws.cumsum() - ws / 2.0) / ws.sum()
+    return np.interp(percs, cdf, vs) # linear interpolation
+
+def weights(start_ts, end_ts, start, end):
+    """ Calculate weights based on fraction of sample falling in the
+        given interval [start,end]. Weights computed using vector / array
+        computation instead of for-loops.
+    
+        Note that samples with zero time length are effectively ignored
+        (we set their weight to zero).
+
+        start_ts :: Array of start times for a set of samples
+        end_ts   :: Array of end times for a set of samples
+        start    :: int
+        end      :: int
+        return   :: Array of weights
+    """
+    sbounds = np.maximum(start_ts, start).astype(float)
+    ebounds = np.minimum(end_ts,   end).astype(float)
+    ws = (ebounds - sbounds) / (end_ts - start_ts)
+    if np.any(np.isnan(ws)):
+      err("WARNING: zero-length sample(s) detected. Log file corrupt"
+          " / bad time values? Ignoring these samples.\n")
+    ws[np.where(np.isnan(ws))] = 0.0;
+    return ws
+
+def weighted_average(vs, ws):
+    return np.sum(vs * ws) / np.sum(ws)
+
+columns = ["end-time", "samples", "min", "avg", "median", "90%", "95%", "99%", "max"]
+percs   = [50, 90, 95, 99]
+
+def fmt_float_list(ctx, num=1):
+  """ Return a comma separated list of float formatters to the required number
+      of decimal places. For instance:
+
+        fmt_float_list(ctx.decimals=4, num=3) == "%.4f, %.4f, %.4f"
+  """
+  return ', '.join(["%%.%df" % ctx.decimals] * num)
+
+# Default values - see beginning of main() for how we detect number columns in
+# the input files:
+__HIST_COLUMNS = 1216
+__NON_HIST_COLUMNS = 3
+__TOTAL_COLUMNS = __HIST_COLUMNS + __NON_HIST_COLUMNS
+    
+def read_chunk(rdr, sz):
+    """ Read the next chunk of size sz from the given reader. """
+    try:
+        """ StopIteration occurs when the pandas reader is empty, and AttributeError
+            occurs if rdr is None due to the file being empty. """
+        new_arr = rdr.read().values
+    except (StopIteration, AttributeError):
+        return None    
+
+    """ Extract array of just the times, and histograms matrix without times column. """
+    times, rws, szs = new_arr[:,0], new_arr[:,1], new_arr[:,2]
+    hists = new_arr[:,__NON_HIST_COLUMNS:]
+    times = times.reshape((len(times),1))
+    arr = np.append(times, hists, axis=1)
+
+    return arr
+
+def get_min(fps, arrs):
+    """ Find the file with the current first row with the smallest start time """
+    return min([fp for fp in fps if not arrs[fp] is None], key=lambda fp: arrs.get(fp)[0][0])
+
+def histogram_generator(ctx, fps, sz):
+    
+    # Create a chunked pandas reader for each of the files:
+    rdrs = {}
+    for fp in fps:
+        try:
+            rdrs[fp] = pandas.read_csv(fp, dtype=int, header=None, chunksize=sz)
+        except ValueError as e:
+            if e.message == 'No columns to parse from file':
+                if ctx.warn: sys.stderr.write("WARNING: Empty input file encountered.\n")
+                rdrs[fp] = None
+            else:
+                raise(e)
+
+    # Initial histograms from disk:
+    arrs = {fp: read_chunk(rdr, sz) for fp,rdr in rdrs.items()}
+    while True:
+
+        try:
+            """ ValueError occurs when nothing more to read """
+            fp = get_min(fps, arrs)
+        except ValueError:
+            return
+        arr = arrs[fp]
+        yield np.insert(arr[0], 1, fps.index(fp))
+        arrs[fp] = arr[1:]
+
+        if arrs[fp].shape[0] == 0:
+            arrs[fp] = read_chunk(rdrs[fp], sz)
+
+def _plat_idx_to_val(idx, edge=0.5, FIO_IO_U_PLAT_BITS=6, FIO_IO_U_PLAT_VAL=64):
+    """ Taken from fio's stat.c for calculating the latency value of a bin
+        from that bin's index.
+        
+            idx  : the value of the index into the histogram bins
+            edge : fractional value in the range [0,1]** indicating how far into
+            the bin we wish to compute the latency value of.
+        
+        ** edge = 0.0 and 1.0 computes the lower and upper latency bounds
+           respectively of the given bin index. """
+
+    # MSB <= (FIO_IO_U_PLAT_BITS-1), cannot be rounded off. Use
+    # all bits of the sample as index
+    if (idx < (FIO_IO_U_PLAT_VAL << 1)):
+        return idx 
+
+    # Find the group and compute the minimum value of that group
+    error_bits = (idx >> FIO_IO_U_PLAT_BITS) - 1 
+    base = 1 << (error_bits + FIO_IO_U_PLAT_BITS)
+
+    # Find its bucket number of the group
+    k = idx % FIO_IO_U_PLAT_VAL
+
+    # Return the mean (if edge=0.5) of the range of the bucket
+    return base + ((k + edge) * (1 << error_bits))
+    
+def plat_idx_to_val_coarse(idx, coarseness, edge=0.5):
+    """ Converts the given *coarse* index into a non-coarse index as used by fio
+        in stat.h:plat_idx_to_val(), subsequently computing the appropriate
+        latency value for that bin.
+        """
+
+    # Multiply the index by the power of 2 coarseness to get the bin
+    # bin index with a max of 1536 bins (FIO_IO_U_PLAT_GROUP_NR = 24 in stat.h)
+    stride = 1 << coarseness
+    idx = idx * stride
+    lower = _plat_idx_to_val(idx, edge=0.0)
+    upper = _plat_idx_to_val(idx + stride, edge=1.0)
+    return lower + (upper - lower) * edge
+
+def print_all_stats(ctx, end, mn, ss_cnt, vs, ws, mx):
+    ps = weighted_percentile(percs, vs, ws)
+
+    avg = weighted_average(vs, ws)
+    values = [mn, avg] + list(ps) + [mx]
+    row = [end, ss_cnt] + map(lambda x: float(x) / ctx.divisor, values)
+    fmt = "%d, %d, %d, " + fmt_float_list(ctx, 5) + ", %d"
+    print (fmt % tuple(row))
+
+def update_extreme(val, fncn, new_val):
+    """ Calculate min / max in the presence of None values """
+    if val is None: return new_val
+    else: return fncn(val, new_val)
+
+# See beginning of main() for how bin_vals are computed
+bin_vals = []
+lower_bin_vals = [] # lower edge of each bin
+upper_bin_vals = [] # upper edge of each bin 
+
+def process_interval(ctx, samples, iStart, iEnd):
+    """ Construct the weighted histogram for the given interval by scanning
+        through all the histograms and figuring out which of their bins have
+        samples with latencies which overlap with the given interval
+        [iStart,iEnd].
+    """
+    
+    times, files, hists = samples[:,0], samples[:,1], samples[:,2:]
+    iHist = np.zeros(__HIST_COLUMNS)
+    ss_cnt = 0 # number of samples affecting this interval
+    mn_bin_val, mx_bin_val = None, None
+
+    for end_time,file,hist in zip(times,files,hists):
+            
+        # Only look at bins of the current histogram sample which
+        # started before the end of the current time interval [start,end]
+        start_times = (end_time - 0.5 * ctx.interval) - bin_vals / 1000.0
+        idx = np.where(start_times < iEnd)
+        s_ts, l_bvs, u_bvs, hs = start_times[idx], lower_bin_vals[idx], upper_bin_vals[idx], hist[idx]
+
+        # Increment current interval histogram by weighted values of future histogram:
+        ws = hs * weights(s_ts, end_time, iStart, iEnd)
+        iHist[idx] += ws
+    
+        # Update total number of samples affecting current interval histogram:
+        ss_cnt += np.sum(hs)
+        
+        # Update min and max bin values seen if necessary:
+        idx = np.where(hs != 0)[0]
+        if idx.size > 0:
+            mn_bin_val = update_extreme(mn_bin_val, min, l_bvs[max(0,           idx[0]  - 1)])
+            mx_bin_val = update_extreme(mx_bin_val, max, u_bvs[min(len(hs) - 1, idx[-1] + 1)])
+
+    if ss_cnt > 0: print_all_stats(ctx, iEnd, mn_bin_val, ss_cnt, bin_vals, iHist, mx_bin_val)
+
+def guess_max_from_bins(ctx, hist_cols):
+    """ Try to guess the GROUP_NR from given # of histogram
+        columns seen in an input file """
+    max_coarse = 8
+    if ctx.group_nr < 19 or ctx.group_nr > 26:
+        bins = [ctx.group_nr * (1 << 6)]
+    else:
+        bins = [1216,1280,1344,1408,1472,1536,1600,1664]
+    coarses = range(max_coarse + 1)
+    fncn = lambda z: list(map(lambda x: z/2**x if z % 2**x == 0 else -10, coarses))
+    
+    arr = np.transpose(list(map(fncn, bins)))
+    idx = np.where(arr == hist_cols)
+    if len(idx[1]) == 0:
+        table = repr(arr.astype(int)).replace('-10', 'N/A').replace('array','     ')
+        err("Unable to determine bin values from input clat_hist files. Namely \n"
+            "the first line of file '%s' " % ctx.FILE[0] + "has %d \n" % (__TOTAL_COLUMNS,) +
+            "columns of which we assume %d " % (hist_cols,) + "correspond to histogram bins. \n"
+            "This number needs to be equal to one of the following numbers:\n\n"
+            + table + "\n\n"
+            "Possible reasons and corresponding solutions:\n"
+            "  - Input file(s) does not contain histograms.\n"
+            "  - You recompiled fio with a different GROUP_NR. If so please specify this\n"
+            "    new GROUP_NR on the command line with --group_nr\n")
+        exit(1)
+    return bins[idx[1][0]]
+
+def main(ctx):
+
+    if ctx.job_file:
+        try:
+            from configparser import SafeConfigParser, NoOptionError
+        except ImportError:
+            from ConfigParser import SafeConfigParser, NoOptionError
+
+        cp = SafeConfigParser(allow_no_value=True)
+        with open(ctx.job_file, 'r') as fp:
+            cp.readfp(fp)
+
+        if ctx.interval is None:
+            # Auto detect --interval value
+            for s in cp.sections():
+                try:
+                    hist_msec = cp.get(s, 'log_hist_msec')
+                    if hist_msec is not None:
+                        ctx.interval = int(hist_msec)
+                except NoOptionError:
+                    pass
+
+    if ctx.interval is None:
+        ctx.interval = 1000
+
+    # Automatically detect how many columns are in the input files,
+    # calculate the corresponding 'coarseness' parameter used to generate
+    # those files, and calculate the appropriate bin latency values:
+    with open(ctx.FILE[0], 'r') as fp:
+        global bin_vals,lower_bin_vals,upper_bin_vals,__HIST_COLUMNS,__TOTAL_COLUMNS
+        __TOTAL_COLUMNS = len(fp.readline().split(','))
+        __HIST_COLUMNS = __TOTAL_COLUMNS - __NON_HIST_COLUMNS
+
+        max_cols = guess_max_from_bins(ctx, __HIST_COLUMNS)
+        coarseness = int(np.log2(float(max_cols) / __HIST_COLUMNS))
+        bin_vals = np.array(map(lambda x: plat_idx_to_val_coarse(x, coarseness), np.arange(__HIST_COLUMNS)), dtype=float)
+        lower_bin_vals = np.array(map(lambda x: plat_idx_to_val_coarse(x, coarseness, 0.0), np.arange(__HIST_COLUMNS)), dtype=float)
+        upper_bin_vals = np.array(map(lambda x: plat_idx_to_val_coarse(x, coarseness, 1.0), np.arange(__HIST_COLUMNS)), dtype=float)
+
+    fps = [open(f, 'r') for f in ctx.FILE]
+    gen = histogram_generator(ctx, fps, ctx.buff_size)
+
+    print(', '.join(columns))
+
+    try:
+        start, end = 0, ctx.interval
+        arr = np.empty(shape=(0,__TOTAL_COLUMNS - 1))
+        more_data = True
+        while more_data or len(arr) > 0:
+            
+            # Read up to ctx.max_latency (default 20 seconds) of data from end of current interval.
+            while len(arr) == 0 or arr[-1][0] < ctx.max_latency * 1000 + end:
+                try:
+                    new_arr = next(gen)
+                except StopIteration:
+                    more_data = False
+                    break
+                arr = np.append(arr, new_arr.reshape((1,__TOTAL_COLUMNS - 1)), axis=0)
+            arr = arr.astype(int)
+            
+            if arr.size > 0:
+                # Jump immediately to the start of the input, rounding
+                # down to the nearest multiple of the interval (useful when --log_unix_epoch
+                # was used to create these histograms):
+                if start == 0 and arr[0][0] - ctx.max_latency > end:
+                    start = arr[0][0] - ctx.max_latency
+                    start = start - (start % ctx.interval)
+                    end = start + ctx.interval
+
+                process_interval(ctx, arr, start, end)
+                
+                # Update arr to throw away samples we no longer need - samples which
+                # end before the start of the next interval, i.e. the end of the
+                # current interval:
+                idx = np.where(arr[:,0] > end)
+                arr = arr[idx]
+            
+            start += ctx.interval
+            end = start + ctx.interval
+    finally:
+        map(lambda f: f.close(), fps)
+
+
+if __name__ == '__main__':
+    import argparse
+    p = argparse.ArgumentParser()
+    arg = p.add_argument
+    arg("FILE", help='space separated list of latency log filenames', nargs='+')
+    arg('--buff_size',
+        default=10000,
+        type=int,
+        help='number of samples to buffer into numpy at a time')
+
+    arg('--max_latency',
+        default=20,
+        type=float,
+        help='number of seconds of data to process at a time')
+
+    arg('-i', '--interval',
+        type=int,
+        help='interval width (ms), default 1000 ms')
+
+    arg('-d', '--divisor',
+        required=False,
+        type=int,
+        default=1,
+        help='divide the results by this value.')
+
+    arg('--decimals',
+        default=3,
+        type=int,
+        help='number of decimal places to print floats to')
+
+    arg('--warn',
+        dest='warn',
+        action='store_true',
+        default=False,
+        help='print warning messages to stderr')
+
+    arg('--group_nr',
+        default=19,
+        type=int,
+        help='FIO_IO_U_PLAT_GROUP_NR as defined in stat.h')
+
+    arg('--job-file',
+        default=None,
+        type=str,
+        help='Optional argument pointing to the job file used to create the '
+             'given histogram files. Useful for auto-detecting --log_hist_msec and '
+             '--log_unix_epoch (in fio) values.')
+
+    main(p.parse_args())
+
diff --git a/tools/hist/fiologparser_hist.py.1 b/tools/hist/fiologparser_hist.py.1

new file mode 100644 (file)

index 0000000..ed22c74
--- /dev/null
+++ b/tools/hist/fiologparser_hist.py.1
@@ -0,0 +1,201 @@
+.TH fiologparser_hist.py 1 "August 18, 2016"
+.SH NAME
+fiologparser_hist.py \- Calculate statistics from fio histograms
+.SH SYNOPSIS
+.B fiologparser_hist.py
+[\fIoptions\fR] [clat_hist_files]...
+.SH DESCRIPTION
+.B fiologparser_hist.py
+is a utility for converting *_clat_hist* files
+generated by fio into a CSV of latency statistics including minimum,
+average, maximum latency, and 50th, 95th, and 99th percentiles.
+.SH EXAMPLES
+.PP
+.nf
+$ fiologparser_hist.py *_clat_hist*
+end-time, samples, min, avg, median, 90%, 95%, 99%, max
+1000, 15, 192, 1678.107, 1788.859, 1856.076, 1880.040, 1899.208, 1888.000
+2000, 43, 152, 1642.368, 1714.099, 1816.659, 1845.552, 1888.131, 1888.000
+4000, 39, 1152, 1546.962, 1545.785, 1627.192, 1640.019, 1691.204, 1744
+...
+.fi
+.PP
+
+.SH OPTIONS
+.TP
+.BR \-\-help
+Print these options.
+.TP
+.BR \-\-buff_size \fR=\fPint
+Number of samples to buffer into numpy at a time. Default is 10,000.
+This can be adjusted to help performance.
+.TP
+.BR \-\-max_latency \fR=\fPint
+Number of seconds of data to process at a time. Defaults to 20 seconds,
+in order to handle the 17 second upper bound on latency in histograms
+reported by fio. This should be increased if fio has been
+run with a larger maximum latency. Lowering this when a lower maximum
+latency is known can improve performance. See NOTES for more details.
+.TP
+.BR \-i ", " \-\-interval \fR=\fPint
+Interval at which statistics are reported. Defaults to 1000 ms. This
+should be set a minimum of the value for \fBlog_hist_msec\fR as given
+to fio.
+.TP
+.BR \-d ", " \-\-divisor \fR=\fPint
+Divide statistics by this value. Defaults to 1. Useful if you want to
+convert latencies from milliseconds to seconds (\fBdivisor\fR=\fP1000\fR).
+.TP
+.BR \-\-warn
+Enables warning messages printed to stderr, useful for debugging.
+.TP
+.BR \-\-group_nr \fR=\fPint
+Set this to the value of \fIFIO_IO_U_PLAT_GROUP_NR\fR as defined in
+\fPstat.h\fR if fio has been recompiled. Defaults to 19, the
+current value used in fio. See NOTES for more details.
+
+.SH NOTES
+end-times are calculated to be uniform increments of the \fB\-\-interval\fR value given,
+regardless of when histogram samples are reported. Of note:
+
+.RS
+Intervals with no samples are omitted. In the example above this means
+"no statistics from 2 to 3 seconds" and "39 samples influenced the statistics
+of the interval from 3 to 4 seconds".
+.LP
+Intervals with a single sample will have the same value for all statistics
+.RE
+
+.PP
+The number of samples is unweighted, corresponding to the total number of samples
+which have any effect whatsoever on the interval.
+
+Min statistics are computed using value of the lower boundary of the first bin
+(in increasing bin order) with non-zero samples in it. Similarly for max,
+we take the upper boundary of the last bin with non-zero samples in it.
+This is semantically identical to taking the 0th and 100th percentiles with a
+50% bin-width buffer (because percentiles are computed using mid-points of
+the bins). This enforces the following nice properties:
+
+.RS
+min <= 50th <= 90th <= 95th <= 99th <= max
+.LP
+min and max are strict lower and upper bounds on the actual
+min / max seen by fio (and reported in *_clat.* with averaging turned off).
+.RE
+
+.PP
+Average statistics use a standard weighted arithmetic mean.
+
+Percentile statistics are computed using the weighted percentile method as
+described here: \fIhttps://en.wikipedia.org/wiki/Percentile#Weighted_percentile\fR.
+See weights() method for details on how weights are computed for individual
+samples. In process_interval() we further multiply by the height of each bin
+to get weighted histograms.
+
+We convert files given on the command line, assumed to be fio histogram files,
+An individual histogram file can contain the
+histograms for multiple different r/w directions (notably when \fB\-\-rw\fR=\fPrandrw\fR). This
+is accounted for by tracking each r/w direction separately. In the statistics
+reported we ultimately merge *all* histograms (regardless of r/w direction).
+
+The value of *_GROUP_NR in \fIstat.h\fR (and *_BITS) determines how many latency bins
+fio outputs when histogramming is enabled. Namely for the current default of
+GROUP_NR=19, we get 1,216 bins with a maximum latency of approximately 17
+seconds. For certain applications this may not be sufficient. With GROUP_NR=24
+we have 1,536 bins, giving us a maximum latency of 541 seconds (~ 9 minutes). If
+you expect your application to experience latencies greater than 17 seconds,
+you will need to recompile fio with a larger GROUP_NR, e.g. with:
+
+.RS
+.PP
+.nf
+sed -i.bak 's/^#define FIO_IO_U_PLAT_GROUP_NR 19\n/#define FIO_IO_U_PLAT_GROUP_NR 24/g' stat.h
+make fio
+.fi
+.PP
+.RE
+
+.PP
+Quick reference table for the max latency corresponding to a sampling of
+values for GROUP_NR:
+
+.RS
+.PP
+.nf
+GROUP_NR | # bins | max latency bin value
+19       | 1216   | 16.9 sec
+20       | 1280   | 33.8 sec
+21       | 1344   | 67.6 sec
+22       | 1408   | 2  min, 15 sec
+23       | 1472   | 4  min, 32 sec
+24       | 1536   | 9  min, 4  sec
+25       | 1600   | 18 min, 8  sec
+26       | 1664   | 36 min, 16 sec
+.fi
+.PP
+.RE
+
+.PP
+At present this program automatically detects the number of histogram bins in
+the log files, and adjusts the bin latency values accordingly. In particular if
+you use the \fB\-\-log_hist_coarseness\fR parameter of fio, you get output files with
+a number of bins according to the following table (note that the first
+row is identical to the table above):
+
+.RS
+.PP
+.nf
+coarse \\ GROUP_NR
+        19     20    21     22     23     24     25     26
+   -------------------------------------------------------
+  0  [[ 1216,  1280,  1344,  1408,  1472,  1536,  1600,  1664],
+  1   [  608,   640,   672,   704,   736,   768,   800,   832],
+  2   [  304,   320,   336,   352,   368,   384,   400,   416],
+  3   [  152,   160,   168,   176,   184,   192,   200,   208],
+  4   [   76,    80,    84,    88,    92,    96,   100,   104],
+  5   [   38,    40,    42,    44,    46,    48,    50,    52],
+  6   [   19,    20,    21,    22,    23,    24,    25,    26],
+  7   [  N/A,    10,   N/A,    11,   N/A,    12,   N/A,    13],
+  8   [  N/A,     5,   N/A,   N/A,   N/A,     6,   N/A,   N/A]]
+.fi
+.PP
+.RE
+
+.PP
+For other values of GROUP_NR and coarseness, this table can be computed like this:
+
+.RS
+.PP
+.nf
+bins = [1216,1280,1344,1408,1472,1536,1600,1664]
+max_coarse = 8
+fncn = lambda z: list(map(lambda x: z/2**x if z % 2**x == 0 else nan, range(max_coarse + 1)))
+np.transpose(list(map(fncn, bins)))
+.fi
+.PP
+.RE
+
+.PP
+If you have not adjusted GROUP_NR for your (high latency) application, then you
+will see the percentiles computed by this tool max out at the max latency bin
+value as in the first table above, and in this plot (where GROUP_NR=19 and thus we see
+a max latency of ~16.7 seconds in the red line):
+
+.RS
+\fIhttps://www.cronburg.com/fio/max_latency_bin_value_bug.png
+.RE
+
+.PP
+Motivation for, design decisions, and the implementation process are
+described in further detail here:
+
+.RS
+\fIhttps://www.cronburg.com/fio/cloud-latency-problem-measurement/
+.RE
+
+.SH AUTHOR
+.B fiologparser_hist.py
+and this manual page were written by Karl Cronburg <karl.cronburg@gmail.com>.
+.SH "REPORTING BUGS"
+Report bugs to the \fBfio\fR mailing list <fio@vger.kernel.org>.
diff --git a/tools/hist/half-bins.py b/tools/hist/half-bins.py

new file mode 100755 (executable)

index 0000000..d592af0
--- /dev/null
+++ b/tools/hist/half-bins.py
@@ -0,0 +1,38 @@
+#!/usr/bin/env python2.7
+""" Cut the number bins in half in fio histogram output. Example usage:
+
+        $ half-bins.py -c 2 output_clat_hist.1.log > smaller_clat_hist.1.log
+
+    Which merges e.g. bins [0 .. 3], [4 .. 7], ..., [1212 .. 1215] resulting in
+    304 = 1216 / (2**2) merged bins per histogram sample.
+
+    @author Karl Cronburg <karl.cronburg@gmail.com>
+"""
+import sys
+
+def main(ctx):
+    stride = 1 << ctx.coarseness
+    with open(ctx.FILENAME, 'r') as fp:
+        for line in fp.readlines():
+            vals = line.split(', ')
+            sys.stdout.write("%s, %s, %s, " % tuple(vals[:3]))
+
+            hist = list(map(int, vals[3:]))
+            for i in range(0, len(hist) - stride, stride):
+                sys.stdout.write("%d, " % sum(hist[i : i + stride],))
+            sys.stdout.write("%d\n" % sum(hist[len(hist) - stride:]))
+
+if __name__ == '__main__':
+    import argparse
+    p = argparse.ArgumentParser()
+    arg = p.add_argument
+    arg( 'FILENAME', help='clat_hist file for which we will reduce'
+                         ' (by half or more) the number of bins.')
+    arg('-c', '--coarseness',
+       default=1,
+       type=int,
+       help='number of times to reduce number of bins by half, '
+            'e.g. coarseness of 4 merges each 2^4 = 16 consecutive '
+            'bins.')
+    main(p.parse_args())
+
diff --git a/trim.c b/trim.c

index 434554129453915515a5b64d47bbed6e914bf765..78cf67244efa22534604fd6b6652379457a571a2 100644 (file)
--- a/trim.c
+++ b/trim.c
@@ -11,7 +11,7 @@
  #include "trim.h"
  
  #ifdef FIO_HAVE_TRIM
-int get_next_trim(struct thread_data *td, struct io_u *io_u)
+bool get_next_trim(struct thread_data *td, struct io_u *io_u)
  {
         struct io_piece *ipo;
  
@@ -19,9 +19,9 @@ int get_next_trim(struct thread_data *td, struct io_u *io_u)
          * this io_u is from a requeue, we already filled the offsets
          */
         if (io_u->file)
-               return 0;
+               return true;
         if (flist_empty(&td->trim_list))
-               return 1;
+               return false;
  
         assert(td->trim_entries);
         ipo = flist_first_entry(&td->trim_list, struct io_piece, trim_list);
@@ -53,7 +53,7 @@ int get_next_trim(struct thread_data *td, struct io_u *io_u)
                 if (r) {
                         dprint(FD_VERIFY, "failed file %s open\n",
                                         io_u->file->file_name);
-                       return 1;
+                       return false;
                 }
         }
  
@@ -64,17 +64,17 @@ int get_next_trim(struct thread_data *td, struct io_u *io_u)
         io_u->xfer_buflen = io_u->buflen;
  
         dprint(FD_VERIFY, "get_next_trim: ret io_u %p\n", io_u);
-       return 0;
+       return true;
  }
  
-int io_u_should_trim(struct thread_data *td, struct io_u *io_u)
+bool io_u_should_trim(struct thread_data *td, struct io_u *io_u)
  {
         unsigned long long val;
         uint64_t frand_max;
         unsigned long r;
  
         if (!td->o.trim_percentage)
-               return 0;
+               return false;
  
         frand_max = rand_max(&td->trim_state);
         r = __rand(&td->trim_state);
diff --git a/trim.h b/trim.h

index 6584606253f8edcab29e65d70f6bf23a6d49bc8d..37f5d7c871127a8bf5858a3fda620684a0e8438d 100644 (file)
--- a/trim.h
+++ b/trim.h
@@ -4,8 +4,8 @@
  #include "fio.h"
  
  #ifdef FIO_HAVE_TRIM
-extern int __must_check get_next_trim(struct thread_data *td, struct io_u *io_u);
-extern int io_u_should_trim(struct thread_data *td, struct io_u *io_u);
+extern bool __must_check get_next_trim(struct thread_data *td, struct io_u *io_u);
+extern bool io_u_should_trim(struct thread_data *td, struct io_u *io_u);
  
  /*
   * Determine whether a given io_u should be logged for verify or
@@ -20,13 +20,13 @@ static inline void remove_trim_entry(struct thread_data *td, struct io_piece *ip
  }
  
  #else
-static inline int get_next_trim(struct thread_data *td, struct io_u *io_u)
+static inline bool get_next_trim(struct thread_data *td, struct io_u *io_u)
  {
-       return 1;
+       return false;
  }
-static inline int io_u_should_trim(struct thread_data *td, struct io_u *io_u)
+static inline bool io_u_should_trim(struct thread_data *td, struct io_u *io_u)
  {
-       return 0;
+       return false;
  }
  static inline void remove_trim_entry(struct thread_data *td, struct io_piece *ipo)
  {
diff --git a/verify-state.h b/verify-state.h

index 901aa0a47bcec6d016485e4c6b518c8fa65171d0..e46265e48d7a6c4ddbbac34a458e26d3272d60da 100644 (file)
--- a/verify-state.h
+++ b/verify-state.h
@@ -3,6 +3,7 @@
  
  #include <stdint.h>
  #include <string.h>
+#include <limits.h>
  
  struct thread_rand32_state {
         uint32_t s[4];
diff --git a/verify.c b/verify.c

index 9a96fbb33a448c679df829c5be808ca00b8db22f..790ab31d3cddd68c0aa4379c9d811d048956efa1 100644 (file)
--- a/verify.c
+++ b/verify.c
@@ -41,13 +41,14 @@ void fill_buffer_pattern(struct thread_data *td, void *p, unsigned int len)
         (void)cpy_pattern(td->o.buffer_pattern, td->o.buffer_pattern_bytes, p, len);
  }
  
-void __fill_buffer(struct thread_options *o, unsigned long seed, void *p,
-                  unsigned int len)
+static void __fill_buffer(struct thread_options *o, unsigned long seed, void *p,
+                         unsigned int len)
  {
         __fill_random_buf_percentage(seed, p, o->compress_percentage, len, len, o->buffer_pattern, o->buffer_pattern_bytes);
  }
  
-unsigned long fill_buffer(struct thread_data *td, void *p, unsigned int len)
+static unsigned long fill_buffer(struct thread_data *td, void *p,
+                                unsigned int len)
  {
         struct frand_state *fs = &td->verify_state;
         struct thread_options *o = &td->o;
@@ -651,7 +652,7 @@ int verify_io_u_async(struct thread_data *td, struct io_u **io_u_ptr)
  
         if (io_u->flags & IO_U_F_IN_CUR_DEPTH) {
                 td->cur_depth--;
-               io_u_clear(io_u, IO_U_F_IN_CUR_DEPTH);
+               io_u_clear(td, io_u, IO_U_F_IN_CUR_DEPTH);
         }
         flist_add_tail(&io_u->verify_list, &td->verify_list);
         *io_u_ptr = NULL;
@@ -802,7 +803,7 @@ int verify_io_u(struct thread_data *td, struct io_u **io_u_ptr)
          * If the IO engine is faking IO (like null), then just pretend
          * we verified everything.
          */
-       if (td->io_ops->flags & FIO_FAKEIO)
+       if (td_ioengine_flagged(td, FIO_FAKEIO))
                 return 0;
  
         if (io_u->flags & IO_U_F_TRIMMED) {
@@ -1168,10 +1169,10 @@ int get_next_verify(struct thread_data *td, struct io_u *io_u)
                 io_u->buflen = ipo->len;
                 io_u->numberio = ipo->numberio;
                 io_u->file = ipo->file;
-               io_u_set(io_u, IO_U_F_VER_LIST);
+               io_u_set(td, io_u, IO_U_F_VER_LIST);
  
                 if (ipo->flags & IP_F_TRIMMED)
-                       io_u_set(io_u, IO_U_F_TRIMMED);
+                       io_u_set(td, io_u, IO_U_F_TRIMMED);
  
                 if (!fio_file_open(io_u->file)) {
                         int r = td_io_open_file(td, io_u->file);
@@ -1255,7 +1256,7 @@ static void *verify_async_thread(void *data)
                         io_u = flist_first_entry(&list, struct io_u, verify_list);
                         flist_del_init(&io_u->verify_list);
  
-                       io_u_set(io_u, IO_U_F_NO_FILE_PUT);
+                       io_u_set(td, io_u, IO_U_F_NO_FILE_PUT);
                         ret = verify_io_u(td, &io_u);
  
                         put_io_u(td, io_u);
author	Jens Axboe <axboe@fb.com>
	Thu, 15 Sep 2016 13:57:38 +0000 (07:57 -0600)
committer	Jens Axboe <axboe@fb.com>
	Thu, 15 Sep 2016 13:57:38 +0000 (07:57 -0600)
.travis.yml		patch \| blob \| blame \| history
FIO-VERSION-GEN		patch \| blob \| blame \| history
HOWTO		patch \| blob \| blame \| history
Makefile		patch \| blob \| blame \| history
backend.c		patch \| blob \| blame \| history
cconv.c		patch \| blob \| blame \| history
client.c		patch \| blob \| blame \| history
client.h		patch \| blob \| blame \| history
configure		patch \| blob \| blame \| history
diskutil.c		patch \| blob \| blame \| history
engines/e4defrag.c		patch \| blob \| blame \| history
engines/glusterfs_async.c		patch \| blob \| blame \| history
engines/libhdfs.c		patch \| blob \| blame \| history
engines/mtd.c		patch \| blob \| blame \| history
engines/net.c		patch \| blob \| blame \| history
engines/pmemblk.c		patch \| blob \| blame \| history
eta.c		patch \| blob \| blame \| history
examples/basic-verify.fio	[new file with mode: 0644]	patch \| blob
examples/jesd219.fio		patch \| blob \| blame \| history
file.h		patch \| blob \| blame \| history
filelock.c		patch \| blob \| blame \| history
filelock.h		patch \| blob \| blame \| history
filesetup.c		patch \| blob \| blame \| history
fio.1		patch \| blob \| blame \| history
fio.c		patch \| blob \| blame \| history
fio.h		patch \| blob \| blame \| history
fio_time.h		patch \| blob \| blame \| history
gclient.c		patch \| blob \| blame \| history
gfio.c		patch \| blob \| blame \| history
init.c		patch \| blob \| blame \| history
io_ddir.h		patch \| blob \| blame \| history
io_u.c		patch \| blob \| blame \| history
ioengine.h		patch \| blob \| blame \| history
ioengines.c		patch \| blob \| blame \| history
iolog.c		patch \| blob \| blame \| history
iolog.h		patch \| blob \| blame \| history
lib/bloom.c		patch \| blob \| blame \| history
lib/mountcheck.c		patch \| blob \| blame \| history
lib/strntol.c		patch \| blob \| blame \| history
libfio.c		patch \| blob \| blame \| history
memory.c		patch \| blob \| blame \| history
options.c		patch \| blob \| blame \| history
options.h		patch \| blob \| blame \| history
oslib/libmtd.c		patch \| blob \| blame \| history
oslib/linux-dev-lookup.c		patch \| blob \| blame \| history
oslib/strlcat.c		patch \| blob \| blame \| history
parse.c		patch \| blob \| blame \| history
parse.h		patch \| blob \| blame \| history
rate-submit.c		patch \| blob \| blame \| history
server.c		patch \| blob \| blame \| history
server.h		patch \| blob \| blame \| history
stat.c		patch \| blob \| blame \| history
stat.h		patch \| blob \| blame \| history
thread_options.h		patch \| blob \| blame \| history
time.c		patch \| blob \| blame \| history
tools/hist/.gitignore	[new file with mode: 0644]	patch \| blob
tools/hist/fiologparser_hist.py	[new file with mode: 0755]	patch \| blob
tools/hist/fiologparser_hist.py.1	[new file with mode: 0644]	patch \| blob
tools/hist/half-bins.py	[new file with mode: 0755]	patch \| blob
trim.c		patch \| blob \| blame \| history
trim.h		patch \| blob \| blame \| history
verify-state.h		patch \| blob \| blame \| history
verify.c		patch \| blob \| blame \| history