Merge branch 'master' into gfio
authorJens Axboe <axboe@kernel.dk>
Wed, 14 Nov 2012 21:25:31 +0000 (14:25 -0700)
committerJens Axboe <axboe@kernel.dk>
Wed, 14 Nov 2012 21:25:31 +0000 (14:25 -0700)
Conflicts:
Makefile
backend.c
client.c
fio.h
options.c
parse.c
parse.h
server.c
server.h

Signed-off-by: Jens Axboe <axboe@kernel.dk>
21 files changed:
1  2 
HOWTO
Makefile
backend.c
cconv.c
client.c
client.h
eta.c
filesetup.c
fio.1
fio.h
init.c
io_u.c
lib/rbtree.c
lib/rbtree.h
options.c
parse.c
parse.h
server.c
server.h
stat.c
thread_options.h

diff --combined HOWTO
index 1fb30db656e4c722dab850c8de6863ec62fe3f97,40fe65fe9bc6d65cfc14cbe2e5565189fee2ae9d..56118140621803d434264910e0329e196d7db2a4
--- 1/HOWTO
--- 2/HOWTO
+++ b/HOWTO
@@@ -780,6 -780,9 +780,9 @@@ rate_iops_min=int If fio doesn't meet t
                the job to exit. The same format as rate is used for read vs
                write seperation.
  
+ max_latency=int       If set, fio will exit the job if it exceeds this maximum
+               latency. It will exit with an ETIME error.
  ratecycle=int Average bandwidth for 'rate' and 'ratemin' over this number
                of milliseconds.
  
@@@ -799,6 -802,24 +802,24 @@@ cpus_allowed=str Controls the same opti
                allows a range of CPUs. Say you wanted a binding to CPUs
                1, 5, and 8-15, you would set cpus_allowed=1,5,8-15.
  
+ numa_cpu_nodes=str Set this job running on spcified NUMA nodes' CPUs. The
+               arguments allow comma delimited list of cpu numbers,
+               A-B ranges, or 'all'. Note, to enable numa options support,
+               export the following environment variables,
+                       export EXTFLAGS+=" -DFIO_HAVE_LIBNUMA "
+                       export EXTLIBS+=" -lnuma "
+ numa_mem_policy=str Set this job's memory policy and corresponding NUMA
+               nodes. Format of the argements:
+                       <mode>[:<nodelist>]
+               `mode' is one of the following memory policy:
+                       default, prefer, bind, interleave, local
+               For `default' and `local' memory policy, no node is
+               needed to be specified.
+               For `prefer', only one node is allowed.
+               For `bind' and `interleave', it allow comma delimited
+               list of numbers, A-B ranges, or 'all'.
  startdelay=time       Start this job the specified number of seconds after fio
                has started. Only useful if the job file contains several
                jobs, and you want to delay starting some jobs to a certain
@@@ -1165,6 -1186,12 +1186,6 @@@ exec_postrun=str After the job complete
  ioscheduler=str       Attempt to switch the device hosting the file to the specified
                io scheduler before running.
  
 -cpuload=int   If the job is a CPU cycle eater, attempt to use the specified
 -              percentage of CPU cycles.
 -
 -cpuchunks=int If the job is a CPU cycle eater, split the load into
 -              cycles of the given time. In microseconds.
 -
  disk_util=bool        Generate disk utilization statistics, if the platform
                supports it. Defaults to on.
  
@@@ -1308,11 -1335,6 +1329,11 @@@ that defines them is selected
                enabled when polling for a minimum of 0 events (eg when
                iodepth_batch_complete=0).
  
 +[cpu] cpuload=int Attempt to use the specified percentage of CPU cycles.
 +
 +[cpu] cpuchunks=int Split the load into cycles of the given time. In
 +              microseconds.
 +
  [netsplice] hostname=str
  [net] hostname=str The host name or IP address to use for TCP or UDP based IO.
                If the job is a TCP listener or UDP reader, the hostname is not
@@@ -1363,7 -1385,7 +1384,7 @@@ Idle    Ru
  ----    ---
  P             Thread setup, but not started.
  C             Thread created.
- I             Thread initialized, waiting.
+ I             Thread initialized, waiting or generating necessary data.
        p       Thread running pre-reading file(s).
        R       Running, doing sequential reads.
        r       Running, doing random reads.
diff --combined Makefile
index d851640ac38eda2c39da25b9528fbab02268b06f,358977054dbe18b21173e0cc9d85ad15bf4c417b..b0b68574b7a40da8e36796b42ac96e7498d8996b
+++ b/Makefile
@@@ -1,24 -1,23 +1,26 @@@
- CC    ?= gcc
+ ifneq ($(origin CC), environment)
+ CC    = gcc
+ endif
  DEBUGFLAGS = -D_FORTIFY_SOURCE=2 -DFIO_INC_DEBUG
  CPPFLAGS= -D_GNU_SOURCE -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64 \
        $(DEBUGFLAGS)
  OPTFLAGS= -O3 -fno-omit-frame-pointer -g $(EXTFLAGS)
  CFLAGS        = -std=gnu99 -Wwrite-strings -Wall $(OPTFLAGS)
 -LIBS  = -lm $(EXTLIBS)
 +LIBS  = -lm -lz $(EXTLIBS)
  PROGS = fio
  SCRIPTS = fio_generate_plots
  UNAME  := $(shell uname)
  
 -SOURCE := gettime.c fio.c ioengines.c init.c stat.c log.c time.c filesetup.c \
 +GTK_CFLAGS = `pkg-config --cflags gtk+-2.0 gthread-2.0`
 +GTK_LDFLAGS = `pkg-config --libs gtk+-2.0 gthread-2.0`
 +
 +SOURCE := gettime.c ioengines.c init.c stat.c log.c time.c filesetup.c \
                eta.c verify.c memory.c io_u.c parse.c mutex.c options.c \
 -              rbtree.c smalloc.c filehash.c profile.c debug.c lib/rand.c \
 +              lib/rbtree.c smalloc.c filehash.c profile.c debug.c lib/rand.c \
                lib/num2str.c lib/ieee754.c $(wildcard crc/*.c) engines/cpu.c \
                engines/mmap.c engines/sync.c engines/null.c engines/net.c \
                memalign.c server.c client.c iolog.c backend.c libfio.c flow.c \
-               cconv.c lib/prio_tree.c json.c
 -              json.c lib/zipf.c gettime-thread.c
++              cconv.c lib/prio_tree.c lib/zipf.c json.c gettime-thread.c
  
  ifeq ($(UNAME), Linux)
    SOURCE += diskutil.c fifo.c blktrace.c helpers.c cgroup.c trim.c \
@@@ -70,20 -69,26 +72,29 @@@ ifneq (,$(findstring CYGWIN,$(UNAME))
  endif
  
  OBJS = $(SOURCE:.c=.o)
 +FIO_OBJS = $(OBJS) fio.o
 +GFIO_OBJS = $(OBJS) gfio.o graph.o tickmarks.o ghelpers.o goptions.o gerror.o \
 +                      gclient.o gcompat.o cairo_text_helpers.o printing.o
  
  T_SMALLOC_OBJS = t/stest.o
- T_SMALLOC_OBJS += mutex.o smalloc.o t/log.o gettime.o time.o
+ T_SMALLOC_OBJS += gettime.o mutex.o smalloc.o t/log.o
  T_SMALLOC_PROGS = t/stest
  
  T_IEEE_OBJS = t/ieee754.o
  T_IEEE_OBJS += lib/ieee754.o
  T_IEEE_PROGS = t/ieee754
  
+ T_ZIPF_OBS = t/genzipf.o
+ T_ZIPF_OBJS += t/log.o lib/ieee754.o lib/rand.o lib/zipf.o t/genzipf.o
+ T_ZIPF_PROGS = t/genzipf
  T_OBJS = $(T_SMALLOC_OBJS)
  T_OBJS += $(T_IEEE_OBJS)
+ T_OBJS += $(T_ZIPF_OBJS)
+ T_PROGS = $(T_SMALLOC_PROGS)
+ T_PROGS += $(T_IEEE_PROGS)
+ T_PROGS += $(T_ZIPF_PROGS)
  
  ifneq ($(findstring $(MAKEFLAGS),s),s)
  ifndef V
@@@ -119,52 -124,25 +130,55 @@@ CFLAGS += -DFIO_VERSION='"$(FIO_VERSION
  init.o: FIO-VERSION-FILE
        $(QUIET_CC)$(CC) -o init.o -c $(CFLAGS) $(CPPFLAGS) -c init.c
  
 +gcompat.o: gcompat.c gcompat.h
 +      $(QUIET_CC)$(CC) $(CFLAGS) $(GTK_CFLAGS) $(CPPFLAGS) -c gcompat.c
 +
 +goptions.o: goptions.c goptions.h
 +      $(QUIET_CC)$(CC) $(CFLAGS) $(GTK_CFLAGS) $(CPPFLAGS) -c goptions.c
 +
 +ghelpers.o: ghelpers.c ghelpers.h
 +      $(QUIET_CC)$(CC) $(CFLAGS) $(GTK_CFLAGS) $(CPPFLAGS) -c ghelpers.c
 +
 +gerror.o: gerror.c gerror.h
 +      $(QUIET_CC)$(CC) $(CFLAGS) $(GTK_CFLAGS) $(CPPFLAGS) -c gerror.c
 +
 +gclient.o: gclient.c gclient.h
 +      $(QUIET_CC)$(CC) $(CFLAGS) $(GTK_CFLAGS) $(CPPFLAGS) -c gclient.c
 +
 +gfio.o: gfio.c ghelpers.c
 +      $(QUIET_CC)$(CC) $(CFLAGS) $(GTK_CFLAGS) $(CPPFLAGS) -c gfio.c
 +
 +graph.o: graph.c graph.h
 +      $(QUIET_CC)$(CC) $(CFLAGS) $(GTK_CFLAGS) $(CPPFLAGS) -c graph.c
 +
 +cairo_text_helpers.o: cairo_text_helpers.c cairo_text_helpers.h
 +      $(QUIET_CC)$(CC) $(CFLAGS) $(GTK_CFLAGS) $(CPPFLAGS) -c cairo_text_helpers.c
 +
 +printing.o: printing.c printing.h
 +      $(QUIET_CC)$(CC) $(CFLAGS) $(GTK_CFLAGS) $(CPPFLAGS) -c printing.c
 +
  t/stest: $(T_SMALLOC_OBJS)
        $(QUIET_CC)$(CC) $(LDFLAGS) $(CFLAGS) -o $@ $(T_SMALLOC_OBJS) $(LIBS) $(LDFLAGS)
  
  t/ieee754: $(T_IEEE_OBJS)
        $(QUIET_CC)$(CC) $(LDFLAGS) $(CFLAGS) -o $@ $(T_IEEE_OBJS) $(LIBS) $(LDFLAGS)
  
 -fio: $(OBJS)
 -      $(QUIET_CC)$(CC) $(LDFLAGS) $(CFLAGS) -o $@ $(OBJS) $(LIBS) $(LDFLAGS)
 -
 +fio: $(FIO_OBJS)
 +      $(QUIET_CC)$(CC) $(LDFLAGS) $(CFLAGS) -o $@ $(FIO_OBJS) $(LIBS) $(LDFLAGS)
 +
 +gfio: $(GFIO_OBJS)
 +      $(QUIET_CC)$(CC) $(LIBS) -o gfio $(GFIO_OBJS) $(LIBS) $(GTK_LDFLAGS)
 +
+ t/genzipf: $(T_ZIPF_OBJS)
+       $(QUIET_CC)$(CC) $(LDFLAGS) $(CFLAGS) -o $@ $(T_ZIPF_OBJS) $(LIBS) $(LDFLAGS)
  .depend: $(SOURCE)
        $(QUIET_DEP)$(CC) -MM $(CFLAGS) $(CPPFLAGS) $(SOURCE) 1> .depend
  
  $(PROGS): .depend
  
  clean: FORCE
 -      -rm -f .depend $(OBJS) $(T_OBJS) $(PROGS) $(T_PROGS) core.* core FIO-VERSION-FILE
 +      -rm -f .depend $(GFIO_OBJS )$(OBJS) $(T_OBJS) $(PROGS) $(T_PROGS) core.* core gfio FIO-VERSION-FILE
  
  cscope:
        @cscope -b -R
@@@ -179,5 -157,3 +193,5 @@@ install: $(PROGS) $(SCRIPTS) FORC
  ifneq ($(wildcard .depend),)
  include .depend
  endif
 +
 +
diff --combined backend.c
index 974384c17fb04d1dc10bea616835595496a29fe4,b80c9038fee4155a1728bdd8559b7ed8f1735dcf..39ef759f3187fcf89b18ee748bf97441a96e2df0
+++ b/backend.c
@@@ -57,13 -57,14 +57,14 @@@ static struct flist_head *cgroup_list
  static char *cgroup_mnt;
  static int exit_value;
  static volatile int fio_abort;
 +static unsigned int nr_process = 0;
 +static unsigned int nr_thread = 0;
  
  struct io_log *agg_io_log[DDIR_RWDIR_CNT];
  
  int groupid = 0;
  unsigned int thread_number = 0;
 -unsigned int nr_process = 0;
 -unsigned int nr_thread = 0;
+ unsigned int stat_number = 0;
  int shm_id = 0;
  int temp_stall_ts;
  unsigned long done_secs = 0;
@@@ -591,7 -592,7 +592,7 @@@ static void do_io(struct thread_data *t
                int ret2, full;
                enum fio_ddir ddir;
  
-               if (td->terminate)
+               if (td->terminate || td->done)
                        break;
  
                update_tv_cache(td);
@@@ -726,7 -727,7 +727,7 @@@ sync_done
  
                if (ret < 0)
                        break;
-               if (!ddir_rw_sum(bytes_done))
+               if (!ddir_rw_sum(bytes_done) && !(td->io_ops->flags & FIO_NOIO))
                        continue;
  
                if (!in_ramp_time(td) && should_check_rate(td, bytes_done)) {
@@@ -985,12 -986,10 +986,12 @@@ static void *thread_main(void *data
  {
        unsigned long long elapsed;
        struct thread_data *td = data;
 +      struct thread_options *o = &td->o;
        pthread_condattr_t attr;
        int clear_state;
 +      int ret;
  
 -      if (!td->o.use_thread) {
 +      if (!o->use_thread) {
                setsid();
                td->pid = getpid();
        } else
  
        dprint(FD_PROCESS, "jobs pid=%d started\n", (int) td->pid);
  
 +      if (is_backend)
 +              fio_server_send_start(td);
 +
        INIT_FLIST_HEAD(&td->io_u_freelist);
        INIT_FLIST_HEAD(&td->io_u_busylist);
        INIT_FLIST_HEAD(&td->io_u_requeues);
         * eating a file descriptor
         */
        fio_mutex_remove(td->mutex);
 +      td->mutex = NULL;
  
        /*
         * A new gid requires privilege, so we need to do this before setting
         * the uid.
         */
 -      if (td->o.gid != -1U && setgid(td->o.gid)) {
 +      if (o->gid != -1U && setgid(o->gid)) {
                td_verror(td, errno, "setgid");
                goto err;
        }
 -      if (td->o.uid != -1U && setuid(td->o.uid)) {
 +      if (o->uid != -1U && setuid(o->uid)) {
                td_verror(td, errno, "setuid");
                goto err;
        }
         * If we have a gettimeofday() thread, make sure we exclude that
         * thread from this job
         */
 -      if (td->o.gtod_cpu)
 -              fio_cpu_clear(&td->o.cpumask, td->o.gtod_cpu);
 +      if (o->gtod_cpu)
 +              fio_cpu_clear(&o->cpumask, o->gtod_cpu);
  
        /*
         * Set affinity first, in case it has an impact on the memory
         * allocations.
         */
 -      if (td->o.cpumask_set && fio_setaffinity(td->pid, td->o.cpumask) == -1) {
 -              td_verror(td, errno, "cpu_set_affinity");
 -              goto err;
 +      if (o->cpumask_set) {
 +              ret = fio_setaffinity(td->pid, o->cpumask);
 +              if (ret == -1) {
 +                      td_verror(td, errno, "cpu_set_affinity");
 +                      goto err;
 +              }
        }
  
 +      if (fio_pin_memory(td))
 +              goto err;
 +
+ #ifdef FIO_HAVE_LIBNUMA
+       /* numa node setup */
+       if (td->o.numa_cpumask_set || td->o.numa_memmask_set) {
+               int ret;
+               if (numa_available() < 0) {
+                       td_verror(td, errno, "Does not support NUMA API\n");
+                       goto err;
+               }
+               if (td->o.numa_cpumask_set) {
+                       ret = numa_run_on_node_mask(td->o.numa_cpunodesmask);
+                       if (ret == -1) {
+                               td_verror(td, errno, \
+                                       "numa_run_on_node_mask failed\n");
+                               goto err;
+                       }
+               }
+               if (td->o.numa_memmask_set) {
+                       switch (td->o.numa_mem_mode) {
+                       case MPOL_INTERLEAVE:
+                               numa_set_interleave_mask(td->o.numa_memnodesmask);
+                               break;
+                       case MPOL_BIND:
+                               numa_set_membind(td->o.numa_memnodesmask);
+                               break;
+                       case MPOL_LOCAL:
+                               numa_set_localalloc();
+                               break;
+                       case MPOL_PREFERRED:
+                               numa_set_preferred(td->o.numa_mem_prefer_node);
+                               break;
+                       case MPOL_DEFAULT:
+                       default:
+                               break;
+                       }
+               }
+       }
+ #endif
        /*
         * May alter parameters that init_io_u() will use, so we need to
         * do this first.
        if (init_io_u(td))
                goto err;
  
 -      if (td->o.verify_async && verify_async_init(td))
 +      if (o->verify_async && verify_async_init(td))
                goto err;
  
 -      if (td->ioprio_set) {
 -              if (ioprio_set(IOPRIO_WHO_PROCESS, 0, td->ioprio) == -1) {
 +      if (o->ioprio) {
 +              ret = ioprio_set(IOPRIO_WHO_PROCESS, 0, o->ioprio_class, o->ioprio);
 +              if (ret == -1) {
                        td_verror(td, errno, "ioprio_set");
                        goto err;
                }
                goto err;
  
        errno = 0;
 -      if (nice(td->o.nice) == -1 && errno != 0) {
 +      if (nice(o->nice) == -1 && errno != 0) {
                td_verror(td, errno, "nice");
                goto err;
        }
  
 -      if (td->o.ioscheduler && switch_ioscheduler(td))
 +      if (o->ioscheduler && switch_ioscheduler(td))
                goto err;
  
 -      if (!td->o.create_serialize && setup_files(td))
 +      if (!o->create_serialize && setup_files(td))
                goto err;
  
        if (td_io_init(td))
        if (init_random_map(td))
                goto err;
  
 -      if (td->o.exec_prerun) {
 -              if (exec_string(td->o.exec_prerun))
 -                      goto err;
 -      }
 +      if (o->exec_prerun && exec_string(o->exec_prerun))
 +              goto err;
  
 -      if (td->o.pre_read) {
 +      if (o->pre_read) {
                if (pre_read_files(td) < 0)
                        goto err;
        }
  
 +      fio_verify_init(td);
 +
        fio_gettime(&td->epoch, NULL);
        getrusage(RUSAGE_SELF, &td->ru_start);
  
        td->ts.io_bytes[DDIR_WRITE] = td->io_bytes[DDIR_WRITE];
        td->ts.io_bytes[DDIR_TRIM] = td->io_bytes[DDIR_TRIM];
  
 +      fio_unpin_memory(td);
 +
        fio_mutex_down(writeout_mutex);
        if (td->bw_log) {
                if (td->o.bw_log_file) {
@@@ -1246,8 -1275,8 +1290,8 @@@ err
        cleanup_io_u(td);
        cgroup_shutdown(td, &cgroup_mnt);
  
 -      if (td->o.cpumask_set) {
 -              int ret = fio_cpuset_exit(&td->o.cpumask);
 +      if (o->cpumask_set) {
 +              int ret = fio_cpuset_exit(&o->cpumask);
  
                td_verror(td, ret, "fio_cpuset_exit");
        }
@@@ -1401,19 -1430,14 +1445,19 @@@ static void run_threads(void
        unsigned long spent;
        unsigned int i, todo, nr_running, m_rate, t_rate, nr_started;
  
 -      if (fio_pin_memory())
 -              return;
 -
        if (fio_gtod_offload && fio_start_gtod_thread())
                return;
  
        set_sig_handlers();
  
 +      nr_thread = nr_process = 0;
 +      for_each_td(td, i) {
 +              if (td->o.use_thread)
 +                      nr_thread++;
 +              else
 +                      nr_process++;
 +      }
 +
        if (output_format == FIO_OUTPUT_NORMAL) {
                log_info("Starting ");
                if (nr_thread)
  
                reap_threads(&nr_running, &t_rate, &m_rate);
  
 -              if (todo) {
 -                      if (is_backend)
 -                              fio_server_idle_loop();
 -                      else
 -                              usleep(100000);
 -              }
 +              if (todo)
 +                      usleep(100000);
        }
  
        while (nr_running) {
                reap_threads(&nr_running, &t_rate, &m_rate);
 -
 -              if (is_backend)
 -                      fio_server_idle_loop();
 -              else
 -                      usleep(10000);
 +              usleep(10000);
        }
  
        update_io_ticks();
 -      fio_unpin_memory();
  }
  
  void wait_for_disk_thread_exit(void)
@@@ -1700,9 -1733,9 +1744,9 @@@ int fio_backend(void
                return 0;
  
        if (write_bw_log) {
 -              setup_log(&agg_io_log[DDIR_READ], 0);
 -              setup_log(&agg_io_log[DDIR_WRITE], 0);
 -              setup_log(&agg_io_log[DDIR_TRIM], 0);
 +              setup_log(&agg_io_log[DDIR_READ], 0, IO_LOG_TYPE_BW);
 +              setup_log(&agg_io_log[DDIR_WRITE], 0, IO_LOG_TYPE_BW);
 +              setup_log(&agg_io_log[DDIR_TRIM], 0, IO_LOG_TYPE_BW);
        }
  
        startup_mutex = fio_mutex_init(FIO_MUTEX_LOCKED);
diff --combined cconv.c
index b023315f2bb1fbfeb0f00026e284fe3b9e34b203,0000000000000000000000000000000000000000..ca97c7379f8527d5255a60991f064b245e95119b
mode 100644,000000..100644
--- /dev/null
+++ b/cconv.c
@@@ -1,402 -1,0 +1,410 @@@
 +#include <string.h>
 +
 +#include "thread_options.h"
 +
 +static void string_to_cpu(char **dst, const uint8_t *src)
 +{
 +      const char *__src = (const char *) src;
 +
 +      if (strlen(__src))
 +              *dst = strdup(__src);
 +}
 +
 +static void string_to_net(uint8_t *dst, const char *src)
 +{
 +      if (src)
 +              strcpy((char *) dst, src);
 +      else
 +              dst[0] = '\0';
 +}
 +
 +void convert_thread_options_to_cpu(struct thread_options *o,
 +                                 struct thread_options_pack *top)
 +{
 +      int i, j;
 +
 +      string_to_cpu(&o->description, top->description);
 +      string_to_cpu(&o->name, top->name);
 +      string_to_cpu(&o->directory, top->directory);
 +      string_to_cpu(&o->filename, top->filename);
 +      string_to_cpu(&o->opendir, top->opendir);
 +      string_to_cpu(&o->ioengine, top->ioengine);
 +      string_to_cpu(&o->mmapfile, top->mmapfile);
 +      string_to_cpu(&o->read_iolog_file, top->read_iolog_file);
 +      string_to_cpu(&o->write_iolog_file, top->write_iolog_file);
 +      string_to_cpu(&o->bw_log_file, top->bw_log_file);
 +      string_to_cpu(&o->lat_log_file, top->lat_log_file);
 +      string_to_cpu(&o->iops_log_file, top->iops_log_file);
 +      string_to_cpu(&o->replay_redirect, top->replay_redirect);
 +      string_to_cpu(&o->exec_prerun, top->exec_prerun);
 +      string_to_cpu(&o->exec_postrun, top->exec_postrun);
 +      string_to_cpu(&o->ioscheduler, top->ioscheduler);
 +      string_to_cpu(&o->profile, top->profile);
 +      string_to_cpu(&o->cgroup, top->cgroup);
 +
 +      o->td_ddir = le32_to_cpu(top->td_ddir);
 +      o->rw_seq = le32_to_cpu(top->rw_seq);
 +      o->kb_base = le32_to_cpu(top->kb_base);
 +      o->ddir_seq_nr = le32_to_cpu(top->ddir_seq_nr);
 +      o->ddir_seq_add = le64_to_cpu(top->ddir_seq_add);
 +      o->iodepth = le32_to_cpu(top->iodepth);
 +      o->iodepth_low = le32_to_cpu(top->iodepth_low);
 +      o->iodepth_batch = le32_to_cpu(top->iodepth_batch);
 +      o->iodepth_batch_complete = le32_to_cpu(top->iodepth_batch_complete);
 +      o->size = le64_to_cpu(top->size);
 +      o->size_percent = le32_to_cpu(top->size_percent);
 +      o->fill_device = le32_to_cpu(top->fill_device);
 +      o->file_size_low = le64_to_cpu(top->file_size_low);
 +      o->file_size_high = le64_to_cpu(top->file_size_high);
 +      o->start_offset = le64_to_cpu(top->start_offset);
 +
 +      for (i = 0; i < DDIR_RWDIR_CNT; i++) {
 +              o->bs[i] = le32_to_cpu(top->bs[i]);
 +              o->ba[i] = le32_to_cpu(top->ba[i]);
 +              o->min_bs[i] = le32_to_cpu(top->min_bs[i]);
 +              o->max_bs[i] = le32_to_cpu(top->max_bs[i]);
 +              o->bssplit_nr[i] = le32_to_cpu(top->bssplit_nr[i]);
 +
 +              if (o->bssplit_nr[i]) {
 +                      o->bssplit[i] = malloc(o->bssplit_nr[i] * sizeof(struct bssplit));
 +                      for (j = 0; j < o->bssplit_nr[i]; j++) {
 +                              o->bssplit[i][j].bs = le32_to_cpu(top->bssplit[i][j].bs);
 +                              o->bssplit[i][j].perc = le32_to_cpu(top->bssplit[i][j].perc);
 +                      }
 +              }
 +
 +              o->rwmix[i] = le32_to_cpu(top->rwmix[i]);
 +              o->rate[i] = le32_to_cpu(top->rate[i]);
 +              o->ratemin[i] = le32_to_cpu(top->ratemin[i]);
 +              o->rate_iops[i] = le32_to_cpu(top->rate_iops[i]);
 +              o->rate_iops_min[i] = le32_to_cpu(top->rate_iops_min[i]);
 +      }
 +
 +      o->ratecycle = le32_to_cpu(top->ratecycle);
 +      o->nr_files = le32_to_cpu(top->nr_files);
 +      o->open_files = le32_to_cpu(top->open_files);
 +      o->file_lock_mode = le32_to_cpu(top->file_lock_mode);
 +      o->lockfile_batch = le32_to_cpu(top->lockfile_batch);
 +      o->odirect = le32_to_cpu(top->odirect);
 +      o->invalidate_cache = le32_to_cpu(top->invalidate_cache);
 +      o->create_serialize = le32_to_cpu(top->create_serialize);
 +      o->create_fsync = le32_to_cpu(top->create_fsync);
 +      o->create_on_open = le32_to_cpu(top->create_on_open);
 +      o->create_only = le32_to_cpu(top->create_only);
 +      o->end_fsync = le32_to_cpu(top->end_fsync);
 +      o->pre_read = le32_to_cpu(top->pre_read);
 +      o->sync_io = le32_to_cpu(top->sync_io);
 +      o->verify = le32_to_cpu(top->verify);
 +      o->do_verify = le32_to_cpu(top->do_verify);
 +      o->verifysort = le32_to_cpu(top->verifysort);
 +      o->verify_interval = le32_to_cpu(top->verify_interval);
 +      o->verify_offset = le32_to_cpu(top->verify_offset);
 +
 +      memcpy(o->verify_pattern, top->verify_pattern, MAX_PATTERN_SIZE);
 +
 +      o->verify_pattern_bytes = le32_to_cpu(top->verify_pattern_bytes);
 +      o->verify_fatal = le32_to_cpu(top->verify_fatal);
 +      o->verify_dump = le32_to_cpu(top->verify_dump);
 +      o->verify_async = le32_to_cpu(top->verify_async);
 +      o->verify_batch = le32_to_cpu(top->verify_batch);
 +      o->use_thread = le32_to_cpu(top->use_thread);
 +      o->unlink = le32_to_cpu(top->unlink);
 +      o->do_disk_util = le32_to_cpu(top->do_disk_util);
 +      o->override_sync = le32_to_cpu(top->override_sync);
 +      o->rand_repeatable = le32_to_cpu(top->rand_repeatable);
 +      o->use_os_rand = le32_to_cpu(top->use_os_rand);
 +      o->log_avg_msec = le32_to_cpu(top->log_avg_msec);
 +      o->norandommap = le32_to_cpu(top->norandommap);
 +      o->softrandommap = le32_to_cpu(top->softrandommap);
 +      o->bs_unaligned = le32_to_cpu(top->bs_unaligned);
 +      o->fsync_on_close = le32_to_cpu(top->fsync_on_close);
++      o->random_distribution = le32_to_cpu(top->random_distribution);
++      o->zipf_theta.u.f = fio_uint64_to_double(le64_to_cpu(top->zipf_theta.u.i));
++      o->pareto_h.u.f = fio_uint64_to_double(le64_to_cpu(top->pareto_h.u.i));
 +      o->hugepage_size = le32_to_cpu(top->hugepage_size);
 +      o->rw_min_bs = le32_to_cpu(top->rw_min_bs);
 +      o->thinktime = le32_to_cpu(top->thinktime);
 +      o->thinktime_spin = le32_to_cpu(top->thinktime_spin);
 +      o->thinktime_blocks = le32_to_cpu(top->thinktime_blocks);
 +      o->fsync_blocks = le32_to_cpu(top->fsync_blocks);
 +      o->fdatasync_blocks = le32_to_cpu(top->fdatasync_blocks);
 +      o->barrier_blocks = le32_to_cpu(top->barrier_blocks);
 +
 +      o->verify_backlog = le64_to_cpu(top->verify_backlog);
 +      o->start_delay = le64_to_cpu(top->start_delay);
 +      o->timeout = le64_to_cpu(top->timeout);
 +      o->ramp_time = le64_to_cpu(top->ramp_time);
 +      o->zone_range = le64_to_cpu(top->zone_range);
 +      o->zone_size = le64_to_cpu(top->zone_size);
 +      o->zone_skip = le64_to_cpu(top->zone_skip);
 +      o->lockmem = le64_to_cpu(top->lockmem);
 +      o->offset_increment = le64_to_cpu(top->offset_increment);
 +
 +      o->overwrite = le32_to_cpu(top->overwrite);
 +      o->bw_avg_time = le32_to_cpu(top->bw_avg_time);
 +      o->iops_avg_time = le32_to_cpu(top->iops_avg_time);
 +      o->loops = le32_to_cpu(top->loops);
 +      o->mem_type = le32_to_cpu(top->mem_type);
 +      o->mem_align = le32_to_cpu(top->mem_align);
++      o->max_latency = le32_to_cpu(top->max_latency);
 +      o->stonewall = le32_to_cpu(top->stonewall);
 +      o->new_group = le32_to_cpu(top->new_group);
 +      o->numjobs = le32_to_cpu(top->numjobs);
 +      o->cpumask_set = le32_to_cpu(top->cpumask_set);
 +      o->verify_cpumask_set = le32_to_cpu(top->verify_cpumask_set);
 +      o->iolog = le32_to_cpu(top->iolog);
 +      o->rwmixcycle = le32_to_cpu(top->rwmixcycle);
 +      o->nice = le32_to_cpu(top->nice);
 +      o->ioprio = le32_to_cpu(top->ioprio);
 +      o->ioprio_class = le32_to_cpu(top->ioprio_class);
 +      o->file_service_type = le32_to_cpu(top->file_service_type);
 +      o->group_reporting = le32_to_cpu(top->group_reporting);
 +      o->fadvise_hint = le32_to_cpu(top->fadvise_hint);
 +      o->fallocate_mode = le32_to_cpu(top->fallocate_mode);
 +      o->zero_buffers = le32_to_cpu(top->zero_buffers);
 +      o->refill_buffers = le32_to_cpu(top->refill_buffers);
 +      o->scramble_buffers = le32_to_cpu(top->scramble_buffers);
 +      o->time_based = le32_to_cpu(top->time_based);
 +      o->disable_lat = le32_to_cpu(top->disable_lat);
 +      o->disable_clat = le32_to_cpu(top->disable_clat);
 +      o->disable_slat = le32_to_cpu(top->disable_slat);
 +      o->disable_bw = le32_to_cpu(top->disable_bw);
 +      o->gtod_reduce = le32_to_cpu(top->gtod_reduce);
 +      o->gtod_cpu = le32_to_cpu(top->gtod_cpu);
 +      o->gtod_offload = le32_to_cpu(top->gtod_offload);
 +      o->clocksource = le32_to_cpu(top->clocksource);
 +      o->no_stall = le32_to_cpu(top->no_stall);
 +      o->trim_percentage = le32_to_cpu(top->trim_percentage);
 +      o->trim_batch = le32_to_cpu(top->trim_batch);
 +      o->trim_zero = le32_to_cpu(top->trim_zero);
 +      o->clat_percentiles = le32_to_cpu(top->clat_percentiles);
 +      o->overwrite_plist = le32_to_cpu(top->overwrite_plist);
 +      o->continue_on_error = le32_to_cpu(top->continue_on_error);
 +      o->cgroup_weight = le32_to_cpu(top->cgroup_weight);
 +      o->cgroup_nodelete = le32_to_cpu(top->cgroup_nodelete);
 +      o->uid = le32_to_cpu(top->uid);
 +      o->gid = le32_to_cpu(top->gid);
 +      o->flow_id = __le32_to_cpu(top->flow_id);
 +      o->flow = __le32_to_cpu(top->flow);
 +      o->flow_watermark = __le32_to_cpu(top->flow_watermark);
 +      o->flow_sleep = le32_to_cpu(top->flow_sleep);
 +      o->sync_file_range = le32_to_cpu(top->sync_file_range);
 +      o->compress_percentage = le32_to_cpu(top->compress_percentage);
 +      o->compress_chunk = le32_to_cpu(top->compress_chunk);
 +
 +      o->trim_backlog = le64_to_cpu(top->trim_backlog);
 +
 +      for (i = 0; i < FIO_IO_U_LIST_MAX_LEN; i++)
 +              o->percentile_list[i].u.f = fio_uint64_to_double(le64_to_cpu(top->percentile_list[i].u.i));
 +#if 0
 +      uint8_t cpumask[FIO_TOP_STR_MAX];
 +      uint8_t verify_cpumask[FIO_TOP_STR_MAX];
 +#endif
 +}
 +
 +void convert_thread_options_to_net(struct thread_options_pack *top,
 +                                 struct thread_options *o)
 +{
 +      int i, j;
 +
 +      string_to_net(top->description, o->description);
 +      string_to_net(top->name, o->name);
 +      string_to_net(top->directory, o->directory);
 +      string_to_net(top->filename, o->filename);
 +      string_to_net(top->opendir, o->opendir);
 +      string_to_net(top->ioengine, o->ioengine);
 +      string_to_net(top->mmapfile, o->mmapfile);
 +      string_to_net(top->read_iolog_file, o->read_iolog_file);
 +      string_to_net(top->write_iolog_file, o->write_iolog_file);
 +      string_to_net(top->bw_log_file, o->bw_log_file);
 +      string_to_net(top->lat_log_file, o->lat_log_file);
 +      string_to_net(top->iops_log_file, o->iops_log_file);
 +      string_to_net(top->replay_redirect, o->replay_redirect);
 +      string_to_net(top->exec_prerun, o->exec_prerun);
 +      string_to_net(top->exec_postrun, o->exec_postrun);
 +      string_to_net(top->ioscheduler, o->ioscheduler);
 +      string_to_net(top->profile, o->profile);
 +      string_to_net(top->cgroup, o->cgroup);
 +
 +      top->td_ddir = cpu_to_le32(o->td_ddir);
 +      top->rw_seq = cpu_to_le32(o->rw_seq);
 +      top->kb_base = cpu_to_le32(o->kb_base);
 +      top->ddir_seq_nr = cpu_to_le32(o->ddir_seq_nr);
 +      top->iodepth = cpu_to_le32(o->iodepth);
 +      top->iodepth_low = cpu_to_le32(o->iodepth_low);
 +      top->iodepth_batch = cpu_to_le32(o->iodepth_batch);
 +      top->iodepth_batch_complete = cpu_to_le32(o->iodepth_batch_complete);
 +      top->size_percent = cpu_to_le32(o->size_percent);
 +      top->fill_device = cpu_to_le32(o->fill_device);
 +      top->ratecycle = cpu_to_le32(o->ratecycle);
 +      top->nr_files = cpu_to_le32(o->nr_files);
 +      top->open_files = cpu_to_le32(o->open_files);
 +      top->file_lock_mode = cpu_to_le32(o->file_lock_mode);
 +      top->lockfile_batch = cpu_to_le32(o->lockfile_batch);
 +      top->odirect = cpu_to_le32(o->odirect);
 +      top->invalidate_cache = cpu_to_le32(o->invalidate_cache);
 +      top->create_serialize = cpu_to_le32(o->create_serialize);
 +      top->create_fsync = cpu_to_le32(o->create_fsync);
 +      top->create_on_open = cpu_to_le32(o->create_on_open);
 +      top->create_only = cpu_to_le32(o->create_only);
 +      top->end_fsync = cpu_to_le32(o->end_fsync);
 +      top->pre_read = cpu_to_le32(o->pre_read);
 +      top->sync_io = cpu_to_le32(o->sync_io);
 +      top->verify = cpu_to_le32(o->verify);
 +      top->do_verify = cpu_to_le32(o->do_verify);
 +      top->verifysort = cpu_to_le32(o->verifysort);
 +      top->verify_interval = cpu_to_le32(o->verify_interval);
 +      top->verify_offset = cpu_to_le32(o->verify_offset);
 +      top->verify_pattern_bytes = cpu_to_le32(o->verify_pattern_bytes);
 +      top->verify_fatal = cpu_to_le32(o->verify_fatal);
 +      top->verify_dump = cpu_to_le32(o->verify_dump);
 +      top->verify_async = cpu_to_le32(o->verify_async);
 +      top->verify_batch = cpu_to_le32(o->verify_batch);
 +      top->use_thread = cpu_to_le32(o->use_thread);
 +      top->unlink = cpu_to_le32(o->unlink);
 +      top->do_disk_util = cpu_to_le32(o->do_disk_util);
 +      top->override_sync = cpu_to_le32(o->override_sync);
 +      top->rand_repeatable = cpu_to_le32(o->rand_repeatable);
 +      top->use_os_rand = cpu_to_le32(o->use_os_rand);
 +      top->log_avg_msec = cpu_to_le32(o->log_avg_msec);
 +      top->norandommap = cpu_to_le32(o->norandommap);
 +      top->softrandommap = cpu_to_le32(o->softrandommap);
 +      top->bs_unaligned = cpu_to_le32(o->bs_unaligned);
 +      top->fsync_on_close = cpu_to_le32(o->fsync_on_close);
++      top->random_distribution = cpu_to_le32(o->random_distribution);
++      top->zipf_theta.u.i = __cpu_to_le64(fio_double_to_uint64(o->zipf_theta.u.f));
++      top->pareto_h.u.i = __cpu_to_le64(fio_double_to_uint64(o->pareto_h.u.f));
 +      top->hugepage_size = cpu_to_le32(o->hugepage_size);
 +      top->rw_min_bs = cpu_to_le32(o->rw_min_bs);
 +      top->thinktime = cpu_to_le32(o->thinktime);
 +      top->thinktime_spin = cpu_to_le32(o->thinktime_spin);
 +      top->thinktime_blocks = cpu_to_le32(o->thinktime_blocks);
 +      top->fsync_blocks = cpu_to_le32(o->fsync_blocks);
 +      top->fdatasync_blocks = cpu_to_le32(o->fdatasync_blocks);
 +      top->barrier_blocks = cpu_to_le32(o->barrier_blocks);
 +      top->overwrite = cpu_to_le32(o->overwrite);
 +      top->bw_avg_time = cpu_to_le32(o->bw_avg_time);
 +      top->iops_avg_time = cpu_to_le32(o->iops_avg_time);
 +      top->loops = cpu_to_le32(o->loops);
 +      top->mem_type = cpu_to_le32(o->mem_type);
 +      top->mem_align = cpu_to_le32(o->mem_align);
++      top->max_latency = cpu_to_le32(o->max_latency);
 +      top->stonewall = cpu_to_le32(o->stonewall);
 +      top->new_group = cpu_to_le32(o->new_group);
 +      top->numjobs = cpu_to_le32(o->numjobs);
 +      top->cpumask_set = cpu_to_le32(o->cpumask_set);
 +      top->verify_cpumask_set = cpu_to_le32(o->verify_cpumask_set);
 +      top->iolog = cpu_to_le32(o->iolog);
 +      top->rwmixcycle = cpu_to_le32(o->rwmixcycle);
 +      top->nice = cpu_to_le32(o->nice);
 +      top->ioprio = cpu_to_le32(o->ioprio);
 +      top->ioprio_class = cpu_to_le32(o->ioprio_class);
 +      top->file_service_type = cpu_to_le32(o->file_service_type);
 +      top->group_reporting = cpu_to_le32(o->group_reporting);
 +      top->fadvise_hint = cpu_to_le32(o->fadvise_hint);
 +      top->fallocate_mode = cpu_to_le32(o->fallocate_mode);
 +      top->zero_buffers = cpu_to_le32(o->zero_buffers);
 +      top->refill_buffers = cpu_to_le32(o->refill_buffers);
 +      top->scramble_buffers = cpu_to_le32(o->scramble_buffers);
 +      top->time_based = cpu_to_le32(o->time_based);
 +      top->disable_lat = cpu_to_le32(o->disable_lat);
 +      top->disable_clat = cpu_to_le32(o->disable_clat);
 +      top->disable_slat = cpu_to_le32(o->disable_slat);
 +      top->disable_bw = cpu_to_le32(o->disable_bw);
 +      top->gtod_reduce = cpu_to_le32(o->gtod_reduce);
 +      top->gtod_cpu = cpu_to_le32(o->gtod_cpu);
 +      top->gtod_offload = cpu_to_le32(o->gtod_offload);
 +      top->clocksource = cpu_to_le32(o->clocksource);
 +      top->no_stall = cpu_to_le32(o->no_stall);
 +      top->trim_percentage = cpu_to_le32(o->trim_percentage);
 +      top->trim_batch = cpu_to_le32(o->trim_batch);
 +      top->trim_zero = cpu_to_le32(o->trim_zero);
 +      top->clat_percentiles = cpu_to_le32(o->clat_percentiles);
 +      top->overwrite_plist = cpu_to_le32(o->overwrite_plist);
 +      top->continue_on_error = cpu_to_le32(o->continue_on_error);
 +      top->cgroup_weight = cpu_to_le32(o->cgroup_weight);
 +      top->cgroup_nodelete = cpu_to_le32(o->cgroup_nodelete);
 +      top->uid = cpu_to_le32(o->uid);
 +      top->gid = cpu_to_le32(o->gid);
 +      top->flow_id = __cpu_to_le32(o->flow_id);
 +      top->flow = __cpu_to_le32(o->flow);
 +      top->flow_watermark = __cpu_to_le32(o->flow_watermark);
 +      top->flow_sleep = cpu_to_le32(o->flow_sleep);
 +      top->sync_file_range = cpu_to_le32(o->sync_file_range);
 +      top->compress_percentage = cpu_to_le32(o->compress_percentage);
 +      top->compress_chunk = cpu_to_le32(o->compress_chunk);
 +
 +      for (i = 0; i < DDIR_RWDIR_CNT; i++) {
 +              top->bs[i] = cpu_to_le32(o->bs[i]);
 +              top->ba[i] = cpu_to_le32(o->ba[i]);
 +              top->min_bs[i] = cpu_to_le32(o->min_bs[i]);
 +              top->max_bs[i] = cpu_to_le32(o->max_bs[i]);
 +              top->bssplit_nr[i] = cpu_to_le32(o->bssplit_nr[i]);
 +
 +              if (o->bssplit_nr[i]) {
 +                      unsigned int bssplit_nr = o->bssplit_nr[i];
 +
 +                      if (bssplit_nr > BSSPLIT_MAX) {
 +                              log_err("fio: BSSPLIT_MAX is too small\n");
 +                              bssplit_nr = BSSPLIT_MAX;
 +                      }
 +                      for (j = 0; j < bssplit_nr; j++) {
 +                              top->bssplit[i][j].bs = cpu_to_le32(o->bssplit[i][j].bs);
 +                              top->bssplit[i][j].perc = cpu_to_le32(o->bssplit[i][j].perc);
 +                      }
 +              }
 +
 +              top->rwmix[i] = cpu_to_le32(o->rwmix[i]);
 +              top->rate[i] = cpu_to_le32(o->rate[i]);
 +              top->ratemin[i] = cpu_to_le32(o->ratemin[i]);
 +              top->rate_iops[i] = cpu_to_le32(o->rate_iops[i]);
 +              top->rate_iops_min[i] = cpu_to_le32(o->rate_iops_min[i]);
 +      }
 +
 +      memcpy(top->verify_pattern, o->verify_pattern, MAX_PATTERN_SIZE);
 +
 +      top->size = __cpu_to_le64(o->size);
 +      top->verify_backlog = __cpu_to_le64(o->verify_backlog);
 +      top->start_delay = __cpu_to_le64(o->start_delay);
 +      top->timeout = __cpu_to_le64(o->timeout);
 +      top->ramp_time = __cpu_to_le64(o->ramp_time);
 +      top->zone_range = __cpu_to_le64(o->zone_range);
 +      top->zone_size = __cpu_to_le64(o->zone_size);
 +      top->zone_skip = __cpu_to_le64(o->zone_skip);
 +      top->lockmem = __cpu_to_le64(o->lockmem);
 +      top->ddir_seq_add = __cpu_to_le64(o->ddir_seq_add);
 +      top->file_size_low = __cpu_to_le64(o->file_size_low);
 +      top->file_size_high = __cpu_to_le64(o->file_size_high);
 +      top->start_offset = __cpu_to_le64(o->start_offset);
 +      top->trim_backlog = __cpu_to_le64(o->trim_backlog);
 +      top->offset_increment = __cpu_to_le64(o->offset_increment);
 +
 +      for (i = 0; i < FIO_IO_U_LIST_MAX_LEN; i++)
 +              top->percentile_list[i].u.i = __cpu_to_le64(fio_double_to_uint64(o->percentile_list[i].u.f));
 +#if 0
 +      uint8_t cpumask[FIO_TOP_STR_MAX];
 +      uint8_t verify_cpumask[FIO_TOP_STR_MAX];
 +#endif
 +
 +}
 +
 +/*
 + * Basic conversion test. We'd really need to fill in more of the options
 + * to have a thorough test. Even better, we should auto-generate the
 + * converter functions...
 + */
 +int fio_test_cconv(struct thread_options *__o)
 +{
 +      struct thread_options o;
 +      struct thread_options_pack top1, top2;
 +
 +      memset(&top1, 0, sizeof(top1));
 +      memset(&top2, 0, sizeof(top2));
 +
 +      convert_thread_options_to_net(&top1, __o);
 +      memset(&o, 0, sizeof(o));
 +      convert_thread_options_to_cpu(&o, &top1);
 +      convert_thread_options_to_net(&top2, &o);
 +
 +      return memcmp(&top1, &top2, sizeof(top1));
 +}
diff --combined client.c
index 7b8dc61e4365aa4fd191d4ad0adf084b950ba037,a483913062cfb527f64c718356366cd6d2266c33..9cbbcf66734877dd79d32fa284e2d7d3f4bb7a3d
+++ b/client.c
  #include <arpa/inet.h>
  #include <netdb.h>
  #include <signal.h>
 +#include <zlib.h>
  
  #include "fio.h"
 +#include "client.h"
  #include "server.h"
  #include "flist.h"
  #include "hash.h"
  
 -struct client_eta {
 -      struct jobs_eta eta;
 -      unsigned int pending;
 -};
 -
 -struct fio_client {
 -      struct flist_head list;
 -      struct flist_head hash_list;
 -      struct flist_head arg_list;
 -      union {
 -              struct sockaddr_in addr;
 -              struct sockaddr_in6 addr6;
 -              struct sockaddr_un addr_un;
 -      };
 -      char *hostname;
 -      int port;
 -      int fd;
 -      unsigned int refs;
 -
 -      char *name;
 -
 -      int state;
 -
 -      int skip_newline;
 -      int is_sock;
 -      int disk_stats_shown;
 -      unsigned int jobs;
 -      unsigned int nr_stat;
 -      int error;
 -      int ipv6;
 -      int sent_job;
 -      int did_stat;
 -
 -      struct flist_head eta_list;
 -      struct client_eta *eta_in_flight;
 -
 -      struct flist_head cmd_list;
 -
 -      uint16_t argc;
 -      char **argv;
 -
 -      char **ini_file;
 -      unsigned int nr_ini_file;
 +static void handle_du(struct fio_client *client, struct fio_net_cmd *cmd);
 +static void handle_ts(struct fio_client *client, struct fio_net_cmd *cmd);
 +static void handle_gs(struct fio_client *client, struct fio_net_cmd *cmd);
 +static void handle_probe(struct fio_client *client, struct fio_net_cmd *cmd);
 +static void handle_text(struct fio_client *client, struct fio_net_cmd *cmd);
 +static void handle_stop(struct fio_client *client, struct fio_net_cmd *cmd);
 +static void handle_start(struct fio_client *client, struct fio_net_cmd *cmd);
 +
 +struct client_ops fio_client_ops = {
 +      .text           = handle_text,
 +      .disk_util      = handle_du,
 +      .thread_status  = handle_ts,
 +      .group_stats    = handle_gs,
 +      .stop           = handle_stop,
 +      .start          = handle_start,
 +      .eta            = display_thread_status,
 +      .probe          = handle_probe,
 +      .eta_msec       = FIO_CLIENT_DEF_ETA_MSEC,
 +      .client_type    = FIO_CLIENT_TYPE_CLI,
  };
  
  static struct timeval eta_tv;
  
 -enum {
 -      Client_created          = 0,
 -      Client_connected        = 1,
 -      Client_started          = 2,
 -      Client_running          = 3,
 -      Client_stopped          = 4,
 -      Client_exited           = 5,
 -};
 -
  static FLIST_HEAD(client_list);
  static FLIST_HEAD(eta_list);
  
  static FLIST_HEAD(arg_list);
  
 -static struct thread_stat client_ts;
 -static struct group_run_stats client_gs;
 -static int sum_stat_clients = 0;
 +struct thread_stat client_ts;
 +struct group_run_stats client_gs;
 +int sum_stat_clients;
 +
  static int sum_stat_nr;
+ static int do_output_all_clients;
  
  #define FIO_CLIENT_HASH_BITS  7
  #define FIO_CLIENT_HASH_SZ    (1 << FIO_CLIENT_HASH_BITS)
  #define FIO_CLIENT_HASH_MASK  (FIO_CLIENT_HASH_SZ - 1)
  static struct flist_head client_hash[FIO_CLIENT_HASH_SZ];
  
 -static int handle_client(struct fio_client *client);
 -static void dec_jobs_eta(struct client_eta *eta);
 -
  static void fio_client_add_hash(struct fio_client *client)
  {
        int bucket = hash_long(client->fd, FIO_CLIENT_HASH_BITS);
@@@ -101,11 -135,23 +102,11 @@@ static struct fio_client *find_client_b
        return NULL;
  }
  
 -static void remove_client(struct fio_client *client)
 +void fio_put_client(struct fio_client *client)
  {
 -      assert(client->refs);
 -
        if (--client->refs)
                return;
  
 -      dprint(FD_NET, "client: removed <%s>\n", client->hostname);
 -      flist_del(&client->list);
 -
 -      fio_client_remove_hash(client);
 -
 -      if (!flist_empty(&client->eta_list)) {
 -              flist_del_init(&client->eta_list);
 -              dec_jobs_eta(client->eta_in_flight);
 -      }
 -
        free(client->hostname);
        if (client->argv)
                free(client->argv);
        if (client->ini_file)
                free(client->ini_file);
  
+       if (!client->did_stat)
+               sum_stat_clients -= client->nr_stat;
        free(client);
 +}
 +
 +static void remove_client(struct fio_client *client)
 +{
 +      assert(client->refs);
 +
 +      dprint(FD_NET, "client: removed <%s>\n", client->hostname);
 +
 +      if (!flist_empty(&client->list))
 +              flist_del_init(&client->list);
 +
 +      fio_client_remove_hash(client);
 +
 +      if (!flist_empty(&client->eta_list)) {
 +              flist_del_init(&client->eta_list);
 +              fio_client_dec_jobs_eta(client->eta_in_flight, client->ops->eta);
 +      }
 +
 +      close(client->fd);
 +      client->fd = -1;
 +
 +      if (client->ops->removed)
 +              client->ops->removed(client);
 +
        nr_clients--;
-       sum_stat_clients--;
 +      fio_put_client(client);
  }
  
 -static void put_client(struct fio_client *client)
 +struct fio_client *fio_get_client(struct fio_client *client)
  {
 -      remove_client(client);
 +      client->refs++;
 +      return client;
  }
  
  static void __fio_client_add_cmd_option(struct fio_client *client,
@@@ -184,53 -205,6 +186,53 @@@ void fio_client_add_cmd_option(void *co
        }
  }
  
 +struct fio_client *fio_client_add_explicit(struct client_ops *ops,
 +                                         const char *hostname, int type,
 +                                         int port)
 +{
 +      struct fio_client *client;
 +
 +      client = malloc(sizeof(*client));
 +      memset(client, 0, sizeof(*client));
 +
 +      INIT_FLIST_HEAD(&client->list);
 +      INIT_FLIST_HEAD(&client->hash_list);
 +      INIT_FLIST_HEAD(&client->arg_list);
 +      INIT_FLIST_HEAD(&client->eta_list);
 +      INIT_FLIST_HEAD(&client->cmd_list);
 +
 +      client->hostname = strdup(hostname);
 +
 +      if (type == Fio_client_socket)
 +              client->is_sock = 1;
 +      else {
 +              int ipv6;
 +
 +              ipv6 = type == Fio_client_ipv6;
 +              if (fio_server_parse_host(hostname, &ipv6,
 +                                              &client->addr.sin_addr,
 +                                              &client->addr6.sin6_addr))
 +                      goto err;
 +
 +              client->port = port;
 +      }
 +
 +      client->fd = -1;
 +      client->ops = ops;
 +      client->refs = 1;
 +      client->type = ops->client_type;
 +
 +      __fio_client_add_cmd_option(client, "fio");
 +
 +      flist_add(&client->list, &client_list);
 +      nr_clients++;
 +      dprint(FD_NET, "client: added <%s>\n", client->hostname);
 +      return client;
 +err:
 +      free(client);
 +      return NULL;
 +}
 +
  void fio_client_add_ini_file(void *cookie, const char *ini_file)
  {
        struct fio_client *client = cookie;
        client->nr_ini_file++;
  }
  
 -int fio_client_add(const char *hostname, void **cookie)
 +int fio_client_add(struct client_ops *ops, const char *hostname, void **cookie)
  {
        struct fio_client *existing = *cookie;
        struct fio_client *client;
                return -1;
  
        client->fd = -1;
 +      client->ops = ops;
        client->refs = 1;
 +      client->type = ops->client_type;
  
        __fio_client_add_cmd_option(client, "fio");
  
        return 0;
  }
  
 +static void probe_client(struct fio_client *client)
 +{
 +      dprint(FD_NET, "client: send probe\n");
 +
 +      fio_net_send_simple_cmd(client->fd, FIO_NET_CMD_PROBE, 0, &client->cmd_list);
 +}
 +
  static int fio_client_connect_ip(struct fio_client *client)
  {
        struct sockaddr *addr;
  
        fd = socket(domain, SOCK_STREAM, 0);
        if (fd < 0) {
 +              int ret = -errno;
 +
                log_err("fio: socket: %s\n", strerror(errno));
 -              return -1;
 +              return ret;
        }
  
        if (connect(fd, addr, socklen) < 0) {
 +              int ret = -errno;
 +
                log_err("fio: connect: %s\n", strerror(errno));
                log_err("fio: failed to connect to %s:%u\n", client->hostname,
                                                                client->port);
                close(fd);
 -              return -1;
 +              return ret;
        }
  
        return fd;
@@@ -352,25 -313,21 +354,25 @@@ static int fio_client_connect_sock(stru
  
        fd = socket(AF_UNIX, SOCK_STREAM, 0);
        if (fd < 0) {
 +              int ret = -errno;
 +
                log_err("fio: socket: %s\n", strerror(errno));
 -              return -1;
 +              return ret;
        }
  
        len = sizeof(addr->sun_family) + strlen(addr->sun_path) + 1;
        if (connect(fd, (struct sockaddr *) addr, len) < 0) {
 +              int ret = -errno;
 +
                log_err("fio: connect; %s\n", strerror(errno));
                close(fd);
 -              return -1;
 +              return ret;
        }
  
        return fd;
  }
  
 -static int fio_client_connect(struct fio_client *client)
 +int fio_client_connect(struct fio_client *client)
  {
        int fd;
  
        dprint(FD_NET, "client: %s connected %d\n", client->hostname, fd);
  
        if (fd < 0)
 -              return 1;
 +              return fd;
  
        client->fd = fd;
        fio_client_add_hash(client);
        client->state = Client_connected;
 +
 +      probe_client(client);
        return 0;
  }
  
 +int fio_client_terminate(struct fio_client *client)
 +{
 +      return fio_net_send_quit(client->fd);
 +}
 +
  void fio_clients_terminate(void)
  {
        struct flist_head *entry;
  
        flist_for_each(entry, &client_list) {
                client = flist_entry(entry, struct fio_client, list);
 -
 -              fio_net_send_simple_cmd(client->fd, FIO_NET_CMD_QUIT, 0, NULL);
 +              fio_client_terminate(client);
        }
  }
  
@@@ -451,6 -402,13 +453,6 @@@ static void client_signal_handler(void
        sigaction(SIGUSR1, &act, NULL);
  }
  
 -static void probe_client(struct fio_client *client)
 -{
 -      dprint(FD_NET, "client: send probe\n");
 -
 -      fio_net_send_simple_cmd(client->fd, FIO_NET_CMD_PROBE, 0, &client->cmd_list);
 -}
 -
  static int send_client_cmd_line(struct fio_client *client)
  {
        struct cmd_single_line_pdu *cslp;
  
        free(lens);
        clp->lines = cpu_to_le16(client->argc);
 -      ret = fio_net_send_cmd(client->fd, FIO_NET_CMD_JOBLINE, pdu, mem, 0);
 +      clp->client_type = __cpu_to_le16(client->type);
 +      ret = fio_net_send_cmd(client->fd, FIO_NET_CMD_JOBLINE, pdu, mem, NULL, NULL);
        free(pdu);
        return ret;
  }
@@@ -507,7 -464,7 +509,7 @@@ int fio_clients_connect(void
  
  #ifdef WIN32
        WSADATA wsd;
 -      WSAStartup(MAKEWORD(2,2), &wsd);
 +      WSAStartup(MAKEWORD(2, 2), &wsd);
  #endif
  
        dprint(FD_NET, "client: connect all\n");
                        continue;
                }
  
 -              probe_client(client);
 -
                if (client->argc > 1)
                        send_client_cmd_line(client);
        }
        return !nr_clients;
  }
  
 +int fio_start_client(struct fio_client *client)
 +{
 +      dprint(FD_NET, "client: start %s\n", client->hostname);
 +      return fio_net_send_simple_cmd(client->fd, FIO_NET_CMD_RUN, 0, NULL);
 +}
 +
 +int fio_start_all_clients(void)
 +{
 +      struct fio_client *client;
 +      struct flist_head *entry, *tmp;
 +      int ret;
 +
 +      dprint(FD_NET, "client: start all\n");
 +
 +      flist_for_each_safe(entry, tmp, &client_list) {
 +              client = flist_entry(entry, struct fio_client, list);
 +
 +              ret = fio_start_client(client);
 +              if (ret) {
 +                      remove_client(client);
 +                      continue;
 +              }
 +      }
 +
 +      return flist_empty(&client_list);
 +}
 +
  /*
   * Send file contents to server backend. We could use sendfile(), but to remain
   * more portable lets just read/write the darn thing.
   */
 -static int fio_client_send_ini(struct fio_client *client, const char *filename)
 +static int __fio_client_send_ini(struct fio_client *client, const char *filename)
  {
 +      struct cmd_job_pdu *pdu;
 +      size_t p_size;
        struct stat sb;
 -      char *p, *buf;
 +      char *p;
 +      void *buf;
        off_t len;
        int fd, ret;
  
  
        fd = open(filename, O_RDONLY);
        if (fd < 0) {
 +              int ret = -errno;
 +
                log_err("fio: job file <%s> open: %s\n", filename, strerror(errno));
 -              return 1;
 +              return ret;
        }
  
        if (fstat(fd, &sb) < 0) {
 +              int ret = -errno;
 +
                log_err("fio: job file stat: %s\n", strerror(errno));
                close(fd);
 -              return 1;
 +              return ret;
        }
  
 -      buf = malloc(sb.st_size);
 +      p_size = sb.st_size + sizeof(*pdu);
 +      pdu = malloc(p_size);
 +      buf = pdu->buf;
  
        len = sb.st_size;
        p = buf;
                return 1;
        }
  
 +      pdu->buf_len = __cpu_to_le32(sb.st_size);
 +      pdu->client_type = cpu_to_le32(client->type);
 +
        client->sent_job = 1;
 -      ret = fio_net_send_cmd(client->fd, FIO_NET_CMD_JOB, buf, sb.st_size, 0);
 -      free(buf);
 +      ret = fio_net_send_cmd(client->fd, FIO_NET_CMD_JOB, pdu, p_size, NULL, NULL);
 +      free(pdu);
        close(fd);
        return ret;
  }
  
 +int fio_client_send_ini(struct fio_client *client, const char *filename)
 +{
 +      int ret;
 +
 +      ret = __fio_client_send_ini(client, filename);
 +      if (!ret)
 +              client->sent_job = 1;
 +
 +      return ret;
 +}
 +
  int fio_clients_send_ini(const char *filename)
  {
        struct fio_client *client;
                        }
                } else if (!filename || fio_client_send_ini(client, filename))
                        remove_client(client);
 -
 -              client->sent_job = 1;
        }
  
        return !nr_clients;
  }
  
 +int fio_client_update_options(struct fio_client *client,
 +                            struct thread_options *o, uint64_t *tag)
 +{
 +      struct cmd_add_job_pdu pdu;
 +
 +      pdu.thread_number = cpu_to_le32(client->thread_number);
 +      pdu.groupid = cpu_to_le32(client->groupid);
 +      convert_thread_options_to_net(&pdu.top, o);
 +      
 +      return fio_net_send_cmd(client->fd, FIO_NET_CMD_UPDATE_JOB, &pdu, sizeof(pdu), tag, &client->cmd_list);
 +}
 +
  static void convert_io_stat(struct io_stat *dst, struct io_stat *src)
  {
        dst->max_val    = le64_to_cpu(src->max_val);
@@@ -692,13 -591,12 +694,13 @@@ static void convert_ts(struct thread_st
  {
        int i, j;
  
 -      dst->error      = le32_to_cpu(src->error);
 -      dst->groupid    = le32_to_cpu(src->groupid);
 -      dst->pid        = le32_to_cpu(src->pid);
 -      dst->members    = le32_to_cpu(src->members);
 +      dst->error              = le32_to_cpu(src->error);
 +      dst->thread_number      = le32_to_cpu(src->thread_number);
 +      dst->groupid            = le32_to_cpu(src->groupid);
 +      dst->pid                = le32_to_cpu(src->pid);
 +      dst->members            = le32_to_cpu(src->members);
  
 -      for (i = 0; i < 2; i++) {
 +      for (i = 0; i < DDIR_RWDIR_CNT; i++) {
                convert_io_stat(&dst->clat_stat[i], &src->clat_stat[i]);
                convert_io_stat(&dst->slat_stat[i], &src->slat_stat[i]);
                convert_io_stat(&dst->lat_stat[i], &src->lat_stat[i]);
                dst->io_u_lat_m[i]      = le32_to_cpu(src->io_u_lat_m[i]);
        }
  
 -      for (i = 0; i < 2; i++)
 +      for (i = 0; i < DDIR_RWDIR_CNT; i++)
                for (j = 0; j < FIO_IO_U_PLAT_NR; j++)
                        dst->io_u_plat[i][j] = le32_to_cpu(src->io_u_plat[i][j]);
  
        dst->total_submit       = le64_to_cpu(src->total_submit);
        dst->total_complete     = le64_to_cpu(src->total_complete);
  
 -      for (i = 0; i < 2; i++) {
 +      for (i = 0; i < DDIR_RWDIR_CNT; i++) {
                dst->io_bytes[i]        = le64_to_cpu(src->io_bytes[i]);
                dst->runtime[i]         = le64_to_cpu(src->runtime[i]);
        }
@@@ -758,7 -656,7 +760,7 @@@ static void convert_gs(struct group_run
  {
        int i;
  
 -      for (i = 0; i < 2; i++) {
 +      for (i = 0; i < DDIR_RWDIR_CNT; i++) {
                dst->max_run[i]         = le64_to_cpu(src->max_run[i]);
                dst->min_run[i]         = le64_to_cpu(src->min_run[i]);
                dst->max_bw[i]          = le64_to_cpu(src->max_bw[i]);
@@@ -775,16 -673,19 +777,17 @@@ static void handle_ts(struct fio_clien
  {
        struct cmd_ts_pdu *p = (struct cmd_ts_pdu *) cmd->payload;
  
 -      convert_ts(&p->ts, &p->ts);
 -      convert_gs(&p->rs, &p->rs);
 -
        show_thread_status(&p->ts, &p->rs);
+       client->did_stat = 1;
  
-       if (sum_stat_clients == 1)
+       if (!do_output_all_clients)
                return;
  
        sum_thread_stats(&client_ts, &p->ts, sum_stat_nr);
        sum_group_stats(&client_gs, &p->rs);
  
        client_ts.members++;
 +      client_ts.thread_number = p->ts.thread_number;
        client_ts.groupid = p->ts.groupid;
  
        if (++sum_stat_nr == sum_stat_clients) {
        }
  }
  
 -static void handle_gs(struct fio_net_cmd *cmd)
 +static void handle_gs(struct fio_client *client, struct fio_net_cmd *cmd)
  {
        struct group_run_stats *gs = (struct group_run_stats *) cmd->payload;
  
 -      convert_gs(gs, gs);
        show_group_stats(gs);
  }
  
 +static void handle_text(struct fio_client *client, struct fio_net_cmd *cmd)
 +{
 +      struct cmd_text_pdu *pdu = (struct cmd_text_pdu *) cmd->payload;
 +      const char *buf = (const char *) pdu->buf;
 +      const char *name;
 +      int fio_unused ret;
 +
 +      name = client->name ? client->name : client->hostname;
 +
 +      if (!client->skip_newline)
 +              fprintf(f_out, "<%s> ", name);
 +      ret = fwrite(buf, pdu->buf_len, 1, f_out);
 +      fflush(f_out);
 +      client->skip_newline = strchr(buf, '\n') == NULL;
 +}
 +
  static void convert_agg(struct disk_util_agg *agg)
  {
        int i;
@@@ -853,6 -739,9 +856,6 @@@ static void handle_du(struct fio_clien
  {
        struct cmd_du_pdu *du = (struct cmd_du_pdu *) cmd->payload;
  
 -      convert_dus(&du->dus);
 -      convert_agg(&du->agg);
 -
        if (!client->disk_stats_shown) {
                client->disk_stats_shown = 1;
                log_info("\nDisk stats (read/write):\n");
@@@ -869,23 -758,22 +872,23 @@@ static void convert_jobs_eta(struct job
        je->nr_ramp             = le32_to_cpu(je->nr_ramp);
        je->nr_pending          = le32_to_cpu(je->nr_pending);
        je->files_open          = le32_to_cpu(je->files_open);
 -      je->m_rate              = le32_to_cpu(je->m_rate);
 -      je->t_rate              = le32_to_cpu(je->t_rate);
 -      je->m_iops              = le32_to_cpu(je->m_iops);
 -      je->t_iops              = le32_to_cpu(je->t_iops);
  
 -      for (i = 0; i < 2; i++) {
 +      for (i = 0; i < DDIR_RWDIR_CNT; i++) {
 +              je->m_rate[i]   = le32_to_cpu(je->m_rate[i]);
 +              je->t_rate[i]   = le32_to_cpu(je->t_rate[i]);
 +              je->m_iops[i]   = le32_to_cpu(je->m_iops[i]);
 +              je->t_iops[i]   = le32_to_cpu(je->t_iops[i]);
                je->rate[i]     = le32_to_cpu(je->rate[i]);
                je->iops[i]     = le32_to_cpu(je->iops[i]);
        }
  
        je->elapsed_sec         = le64_to_cpu(je->elapsed_sec);
        je->eta_sec             = le64_to_cpu(je->eta_sec);
 +      je->nr_threads          = le32_to_cpu(je->nr_threads);
        je->is_pow2             = le32_to_cpu(je->is_pow2);
  }
  
 -static void sum_jobs_eta(struct jobs_eta *dst, struct jobs_eta *je)
 +void fio_client_sum_jobs_eta(struct jobs_eta *dst, struct jobs_eta *je)
  {
        int i;
  
        dst->nr_ramp            += je->nr_ramp;
        dst->nr_pending         += je->nr_pending;
        dst->files_open         += je->files_open;
 -      dst->m_rate             += je->m_rate;
 -      dst->t_rate             += je->t_rate;
 -      dst->m_iops             += je->m_iops;
 -      dst->t_iops             += je->t_iops;
  
 -      for (i = 0; i < 2; i++) {
 +      for (i = 0; i < DDIR_RWDIR_CNT; i++) {
 +              dst->m_rate[i]  += je->m_rate[i];
 +              dst->t_rate[i]  += je->t_rate[i];
 +              dst->m_iops[i]  += je->m_iops[i];
 +              dst->t_iops[i]  += je->t_iops[i];
                dst->rate[i]    += je->rate[i];
                dst->iops[i]    += je->iops[i];
        }
  
        if (je->eta_sec > dst->eta_sec)
                dst->eta_sec = je->eta_sec;
 +
 +      dst->nr_threads         += je->nr_threads;
 +      /* we need to handle je->run_str too ... */
  }
  
 -static void dec_jobs_eta(struct client_eta *eta)
 +void fio_client_dec_jobs_eta(struct client_eta *eta, client_eta_op eta_fn)
  {
        if (!--eta->pending) {
 -              display_thread_status(&eta->eta);
 +              eta_fn(&eta->eta);
                free(eta);
        }
  }
  
  static void remove_reply_cmd(struct fio_client *client, struct fio_net_cmd *cmd)
  {
 -      struct fio_net_int_cmd *icmd = NULL;
 +      struct fio_net_cmd_reply *reply = NULL;
        struct flist_head *entry;
  
        flist_for_each(entry, &client->cmd_list) {
 -              icmd = flist_entry(entry, struct fio_net_int_cmd, list);
 +              reply = flist_entry(entry, struct fio_net_cmd_reply, list);
  
 -              if (cmd->tag == (uintptr_t) icmd)
 +              if (cmd->tag == (uintptr_t) reply)
                        break;
  
 -              icmd = NULL;
 +              reply = NULL;
        }
  
 -      if (!icmd) {
 -              log_err("fio: client: unable to find matching tag\n");
 +      if (!reply) {
 +              log_err("fio: client: unable to find matching tag (%lx)\n", cmd->tag);
                return;
        }
  
 -      flist_del(&icmd->list);
 -      cmd->tag = icmd->saved_tag;
 -      free(icmd);
 +      flist_del(&reply->list);
 +      cmd->tag = reply->saved_tag;
 +      free(reply);
 +}
 +
 +int fio_client_wait_for_reply(struct fio_client *client, uint64_t tag)
 +{
 +      do {
 +              struct fio_net_cmd_reply *reply = NULL;
 +              struct flist_head *entry;
 +
 +              flist_for_each(entry, &client->cmd_list) {
 +                      reply = flist_entry(entry, struct fio_net_cmd_reply, list);
 +
 +                      if (tag == (uintptr_t) reply)
 +                              break;
 +
 +                      reply = NULL;
 +              }
 +
 +              if (!reply)
 +                      break;
 +
 +              usleep(1000);
 +      } while (1);
 +
 +      return 0;
  }
  
  static void handle_eta(struct fio_client *client, struct fio_net_cmd *cmd)
        client->eta_in_flight = NULL;
        flist_del_init(&client->eta_list);
  
 -      convert_jobs_eta(je);
 -      sum_jobs_eta(&eta->eta, je);
 -      dec_jobs_eta(eta);
 +      if (client->ops->jobs_eta)
 +              client->ops->jobs_eta(client, je);
 +
 +      fio_client_sum_jobs_eta(&eta->eta, je);
 +      fio_client_dec_jobs_eta(eta, client->ops->eta);
  }
  
  static void handle_probe(struct fio_client *client, struct fio_net_cmd *cmd)
@@@ -1016,112 -875,28 +1019,118 @@@ static void handle_start(struct fio_cli
        struct cmd_start_pdu *pdu = (struct cmd_start_pdu *) cmd->payload;
  
        client->state = Client_started;
-       client->jobs = pdu->jobs;
+       client->jobs = le32_to_cpu(pdu->jobs);
+       client->nr_stat = le32_to_cpu(pdu->stat_outputs);
+       if (sum_stat_clients > 1)
+               do_output_all_clients = 1;
+       sum_stat_clients += client->nr_stat;
  }
  
  static void handle_stop(struct fio_client *client, struct fio_net_cmd *cmd)
 +{
 +      if (client->error)
 +              log_info("client <%s>: exited with error %d\n", client->hostname, client->error);
 +}
 +
 +static void convert_stop(struct fio_net_cmd *cmd)
  {
        struct cmd_end_pdu *pdu = (struct cmd_end_pdu *) cmd->payload;
  
 -      client->state = Client_stopped;
 -      client->error = le32_to_cpu(pdu->error);
 +      pdu->error = le32_to_cpu(pdu->error);
 +}
  
 -      if (client->error)
 -              log_info("client <%s>: exited with error %d\n", client->hostname, client->error);
 +static void convert_text(struct fio_net_cmd *cmd)
 +{
 +      struct cmd_text_pdu *pdu = (struct cmd_text_pdu *) cmd->payload;
 +
 +      pdu->level      = le32_to_cpu(pdu->level);
 +      pdu->buf_len    = le32_to_cpu(pdu->buf_len);
 +      pdu->log_sec    = le64_to_cpu(pdu->log_sec);
 +      pdu->log_usec   = le64_to_cpu(pdu->log_usec);
 +}
 +
 +/*
 + * This has been compressed on the server side, since it can be big.
 + * Uncompress here.
 + */
 +static struct cmd_iolog_pdu *convert_iolog(struct fio_net_cmd *cmd)
 +{
 +      struct cmd_iolog_pdu *pdu = (struct cmd_iolog_pdu *) cmd->payload;
 +      struct cmd_iolog_pdu *ret;
 +      uint32_t nr_samples;
 +      unsigned long total;
 +      z_stream stream;
 +      void *p;
 +      int i;
 +
 +      stream.zalloc = Z_NULL;
 +      stream.zfree = Z_NULL;
 +      stream.opaque = Z_NULL;
 +      stream.avail_in = 0;
 +      stream.next_in = Z_NULL;
 +
 +      if (inflateInit(&stream) != Z_OK)
 +              return NULL;
 +
 +      /*
 +       * Get header first, it's not compressed
 +       */
 +      nr_samples = le32_to_cpu(pdu->nr_samples);
 +
 +      total = nr_samples * sizeof(struct io_sample);
 +      ret = malloc(total + sizeof(*pdu));
 +      ret->thread_number = le32_to_cpu(pdu->thread_number);
 +      ret->nr_samples = nr_samples;
 +      ret->log_type = le32_to_cpu(pdu->log_type);
 +      strcpy((char *) ret->name, (char *) pdu->name);
 +
 +      p = (void *) ret + sizeof(*pdu);
 +
 +      stream.avail_in = cmd->pdu_len - sizeof(*pdu);
 +      stream.next_in = (void *) pdu + sizeof(*pdu);
 +      while (stream.avail_in) {
 +              unsigned int this_chunk = 65536;
 +              unsigned int this_len;
 +              int err;
 +
 +              if (this_chunk > total)
 +                      this_chunk = total;
 +
 +              stream.avail_out = this_chunk;
 +              stream.next_out = p;
 +              err = inflate(&stream, Z_NO_FLUSH);
 +              /* may be Z_OK, or Z_STREAM_END */
 +              if (err < 0) {
 +                      log_err("fio: inflate error %d\n", err);
 +                      free(ret);
 +                      ret = NULL;
 +                      goto out;
 +              }
 +
 +              this_len = this_chunk - stream.avail_out;
 +              p += this_len;
 +              total -= this_len;
 +      }
 +
 +      for (i = 0; i < ret->nr_samples; i++) {
 +              struct io_sample *s = &ret->samples[i];
 +
 +              s->time = le64_to_cpu(s->time);
 +              s->val  = le64_to_cpu(s->val);
 +              s->ddir = le32_to_cpu(s->ddir);
 +              s->bs   = le32_to_cpu(s->bs);
 +      }
 +
 +out:
 +      inflateEnd(&stream);
 +      return ret;
  }
  
 -static int handle_client(struct fio_client *client)
 +int fio_handle_client(struct fio_client *client)
  {
 +      struct client_ops *ops = client->ops;
        struct fio_net_cmd *cmd;
  
        dprint(FD_NET, "client: handle %s\n", client->hostname);
        if (!cmd)
                return 0;
  
 -      dprint(FD_NET, "client: got cmd op %s from %s\n",
 -                              fio_server_op(cmd->opcode), client->hostname);
 +      dprint(FD_NET, "client: got cmd op %s from %s (pdu=%u)\n",
 +              fio_server_op(cmd->opcode), client->hostname, cmd->pdu_len);
  
        switch (cmd->opcode) {
        case FIO_NET_CMD_QUIT:
 +              if (ops->quit)
 +                      ops->quit(client, cmd);
                remove_client(client);
                free(cmd);
                break;
 -      case FIO_NET_CMD_TEXT: {
 -              const char *buf = (const char *) cmd->payload;
 -              const char *name;
 -              int fio_unused ret;
 -
 -              name = client->name ? client->name : client->hostname;
 -
 -              if (!client->skip_newline)
 -                      fprintf(f_out, "<%s> ", name);
 -              ret = fwrite(buf, cmd->pdu_len, 1, f_out);
 -              fflush(f_out);
 -              client->skip_newline = strchr(buf, '\n') == NULL;
 +      case FIO_NET_CMD_TEXT:
 +              convert_text(cmd);
 +              ops->text(client, cmd);
                free(cmd);
                break;
 -              }
 -      case FIO_NET_CMD_DU:
 -              handle_du(client, cmd);
 +      case FIO_NET_CMD_DU: {
 +              struct cmd_du_pdu *du = (struct cmd_du_pdu *) cmd->payload;
 +
 +              convert_dus(&du->dus);
 +              convert_agg(&du->agg);
 +
 +              ops->disk_util(client, cmd);
                free(cmd);
                break;
 -      case FIO_NET_CMD_TS:
 -              handle_ts(client, cmd);
 +              }
 +      case FIO_NET_CMD_TS: {
 +              struct cmd_ts_pdu *p = (struct cmd_ts_pdu *) cmd->payload;
 +
 +              convert_ts(&p->ts, &p->ts);
 +              convert_gs(&p->rs, &p->rs);
 +
 +              ops->thread_status(client, cmd);
                free(cmd);
                break;
 -      case FIO_NET_CMD_GS:
 -              handle_gs(cmd);
 +              }
 +      case FIO_NET_CMD_GS: {
 +              struct group_run_stats *gs = (struct group_run_stats *) cmd->payload;
 +
 +              convert_gs(gs, gs);
 +
 +              ops->group_stats(client, cmd);
                free(cmd);
                break;
 -      case FIO_NET_CMD_ETA:
 +              }
 +      case FIO_NET_CMD_ETA: {
 +              struct jobs_eta *je = (struct jobs_eta *) cmd->payload;
 +
                remove_reply_cmd(client, cmd);
 +              convert_jobs_eta(je);
                handle_eta(client, cmd);
                free(cmd);
                break;
 +              }
        case FIO_NET_CMD_PROBE:
                remove_reply_cmd(client, cmd);
 -              handle_probe(client, cmd);
 +              ops->probe(client, cmd);
                free(cmd);
                break;
 -      case FIO_NET_CMD_RUN:
 +      case FIO_NET_CMD_SERVER_START:
                client->state = Client_running;
 +              if (ops->job_start)
 +                      ops->job_start(client, cmd);
 +              free(cmd);
 +              break;
 +      case FIO_NET_CMD_START: {
 +              struct cmd_start_pdu *pdu = (struct cmd_start_pdu *) cmd->payload;
 +
 +              pdu->jobs = le32_to_cpu(pdu->jobs);
 +              ops->start(client, cmd);
                free(cmd);
                break;
 -      case FIO_NET_CMD_START:
 -              handle_start(client, cmd);
 +              }
 +      case FIO_NET_CMD_STOP: {
 +              struct cmd_end_pdu *pdu = (struct cmd_end_pdu *) cmd->payload;
 +
 +              convert_stop(cmd);
 +              client->state = Client_stopped;
 +              client->error = le32_to_cpu(pdu->error);
 +              client->signal = le32_to_cpu(pdu->signal);
 +              ops->stop(client, cmd);
 +              free(cmd);
 +              break;
 +              }
 +      case FIO_NET_CMD_ADD_JOB: {
 +              struct cmd_add_job_pdu *pdu = (struct cmd_add_job_pdu *) cmd->payload;
 +
 +              client->thread_number = le32_to_cpu(pdu->thread_number);
 +              client->groupid = le32_to_cpu(pdu->groupid);
 +
 +              if (ops->add_job)
 +                      ops->add_job(client, cmd);
 +              free(cmd);
 +              break;
 +              }
 +      case FIO_NET_CMD_IOLOG:
 +              if (ops->iolog) {
 +                      struct cmd_iolog_pdu *pdu;
 +
 +                      pdu = convert_iolog(cmd);
 +                      ops->iolog(client, pdu);
 +              }
                free(cmd);
                break;
 -      case FIO_NET_CMD_STOP:
 -              handle_stop(client, cmd);
 +      case FIO_NET_CMD_UPDATE_JOB:
 +              ops->update_job(client, cmd);
 +              remove_reply_cmd(client, cmd);
                free(cmd);
                break;
        default:
        return 1;
  }
  
 -static void request_client_etas(void)
 +static void request_client_etas(struct client_ops *ops)
  {
        struct fio_client *client;
        struct flist_head *entry;
        }
  
        while (skipped--)
 -              dec_jobs_eta(eta);
 +              fio_client_dec_jobs_eta(eta, ops->eta);
  
        dprint(FD_NET, "client: requested eta tag %p\n", eta);
  }
  static int client_check_cmd_timeout(struct fio_client *client,
                                    struct timeval *now)
  {
 -      struct fio_net_int_cmd *cmd;
 +      struct fio_net_cmd_reply *reply;
        struct flist_head *entry, *tmp;
        int ret = 0;
  
        flist_for_each_safe(entry, tmp, &client->cmd_list) {
 -              cmd = flist_entry(entry, struct fio_net_int_cmd, list);
 +              reply = flist_entry(entry, struct fio_net_cmd_reply, list);
  
 -              if (mtime_since(&cmd->tv, now) < FIO_NET_CLIENT_TIMEOUT)
 +              if (mtime_since(&reply->tv, now) < FIO_NET_CLIENT_TIMEOUT)
                        continue;
  
                log_err("fio: client %s, timeout on cmd %s\n", client->hostname,
 -                                              fio_server_op(cmd->cmd.opcode));
 -              flist_del(&cmd->list);
 -              free(cmd);
 +                                              fio_server_op(reply->opcode));
 +              flist_del(&reply->list);
 +              free(reply);
                ret = 1;
        }
  
        return flist_empty(&client->cmd_list) && ret;
  }
  
 -static int fio_client_timed_out(void)
 +static int fio_check_clients_timed_out(void)
  {
        struct fio_client *client;
        struct flist_head *entry, *tmp;
                if (!client_check_cmd_timeout(client, &tv))
                        continue;
  
 -              log_err("fio: client %s timed out\n", client->hostname);
 +              if (client->ops->timed_out)
 +                      client->ops->timed_out(client);
 +              else
 +                      log_err("fio: client %s timed out\n", client->hostname);
 +
                remove_client(client);
                ret = 1;
        }
        return ret;
  }
  
 -int fio_handle_clients(void)
 +int fio_handle_clients(struct client_ops *ops)
  {
        struct pollfd *pfds;
        int i, ret = 0, retval = 0;
  
        pfds = malloc(nr_clients * sizeof(struct pollfd));
  
-       sum_stat_clients = nr_clients;
        init_thread_stat(&client_ts);
        init_group_run_stat(&client_gs);
  
                flist_for_each_safe(entry, tmp, &client_list) {
                        client = flist_entry(entry, struct fio_client, list);
  
 -                      if (!client->sent_job &&
 +                      if (!client->sent_job && !client->ops->stay_connected &&
                            flist_empty(&client->cmd_list)) {
                                remove_client(client);
                                continue;
                        struct timeval tv;
  
                        gettimeofday(&tv, NULL);
 -                      if (mtime_since(&eta_tv, &tv) >= 900) {
 -                              request_client_etas();
 +                      if (mtime_since(&eta_tv, &tv) >= ops->eta_msec) {
 +                              request_client_etas(ops);
                                memcpy(&eta_tv, &tv, sizeof(tv));
  
 -                              if (fio_client_timed_out())
 +                              if (fio_check_clients_timed_out())
                                        break;
                        }
  
 -                      ret = poll(pfds, nr_clients, 100);
 +                      ret = poll(pfds, nr_clients, ops->eta_msec);
                        if (ret < 0) {
                                if (errno == EINTR)
                                        continue;
                                log_err("fio: unknown client fd %d\n", pfds[i].fd);
                                continue;
                        }
 -                      if (!handle_client(client)) {
 +                      if (!fio_handle_client(client)) {
                                log_info("client: host=%s disconnected\n",
                                                client->hostname);
                                remove_client(client);
                                retval = 1;
                        } else if (client->error)
                                retval = 1;
 -                      put_client(client);
 +                      fio_put_client(client);
                }
        }
  
diff --combined client.h
index 341d26069449a94f4b95ecaa69b0cb16e472ebed,0000000000000000000000000000000000000000..10d6ec36cac34f1de66e5cc0951cd191c9375e33
mode 100644,000000..100644
--- /dev/null
+++ b/client.h
@@@ -1,142 -1,0 +1,144 @@@
 +#ifndef CLIENT_H
 +#define CLIENT_H
 +
 +#include <sys/socket.h>
 +#include <sys/un.h>
 +#include <netinet/in.h>
 +#include <arpa/inet.h>
 +
 +#include "stat.h"
 +
 +struct fio_net_cmd;
 +struct client_ops;
 +
 +enum {
 +      Client_created          = 0,
 +      Client_connected        = 1,
 +      Client_started          = 2,
 +      Client_running          = 3,
 +      Client_stopped          = 4,
 +      Client_exited           = 5,
 +};
 +
 +struct fio_client {
 +      struct flist_head list;
 +      struct flist_head hash_list;
 +      struct flist_head arg_list;
 +      union {
 +              struct sockaddr_in addr;
 +              struct sockaddr_in6 addr6;
 +              struct sockaddr_un addr_un;
 +      };
 +      char *hostname;
 +      int port;
 +      int fd;
 +      unsigned int refs;
 +
 +      char *name;
 +
 +      int state;
 +
 +      int skip_newline;
 +      int is_sock;
 +      int disk_stats_shown;
 +      unsigned int jobs;
++      unsigned int nr_stat;
 +      int error;
 +      int signal;
 +      int ipv6;
 +      int sent_job;
++      int did_stat;
 +      uint32_t type;
 +
 +      uint32_t thread_number;
 +      uint32_t groupid;
 +
 +      struct flist_head eta_list;
 +      struct client_eta *eta_in_flight;
 +
 +      struct flist_head cmd_list;
 +
 +      uint16_t argc;
 +      char **argv;
 +
 +      struct client_ops *ops;
 +      void *client_data;
 +
 +      char **ini_file;
 +      unsigned int nr_ini_file;
 +};
 +
 +struct cmd_iolog_pdu;
 +typedef void (client_cmd_op)(struct fio_client *, struct fio_net_cmd *);
 +typedef void (client_eta_op)(struct jobs_eta *je);
 +typedef void (client_timed_out_op)(struct fio_client *);
 +typedef void (client_jobs_eta_op)(struct fio_client *client, struct jobs_eta *je);
 +typedef void (client_iolog_op)(struct fio_client *client, struct cmd_iolog_pdu *);
 +
 +struct client_ops {
 +      client_cmd_op           *text;
 +      client_cmd_op           *disk_util;
 +      client_cmd_op           *thread_status;
 +      client_cmd_op           *group_stats;
 +      client_jobs_eta_op      *jobs_eta;
 +      client_eta_op           *eta;
 +      client_cmd_op           *probe;
 +      client_cmd_op           *quit;
 +      client_cmd_op           *add_job;
 +      client_cmd_op           *update_job;
 +      client_timed_out_op     *timed_out;
 +      client_cmd_op           *stop;
 +      client_cmd_op           *start;
 +      client_cmd_op           *job_start;
 +      client_iolog_op         *iolog;
 +      client_timed_out_op     *removed;
 +
 +      unsigned int eta_msec;
 +      int stay_connected;
 +      uint32_t client_type;
 +};
 +
 +extern struct client_ops fio_client_ops;
 +
 +struct client_eta {
 +      struct jobs_eta eta;
 +      unsigned int pending;
 +};
 +
 +extern int fio_handle_client(struct fio_client *);
 +extern void fio_client_dec_jobs_eta(struct client_eta *eta, client_eta_op fn);
 +extern void fio_client_sum_jobs_eta(struct jobs_eta *dst, struct jobs_eta *je);
 +
 +enum {
 +      Fio_client_ipv4 = 1,
 +      Fio_client_ipv6,
 +      Fio_client_socket,
 +};
 +
 +extern int fio_client_connect(struct fio_client *);
 +extern int fio_clients_connect(void);
 +extern int fio_start_client(struct fio_client *);
 +extern int fio_start_all_clients(void);
 +extern int fio_client_send_ini(struct fio_client *, const char *);
 +extern int fio_clients_send_ini(const char *);
 +extern int fio_handle_clients(struct client_ops *);
 +extern int fio_client_add(struct client_ops *, const char *, void **);
 +extern struct fio_client *fio_client_add_explicit(struct client_ops *, const char *, int, int);
 +extern void fio_client_add_cmd_option(void *, const char *);
 +extern void fio_client_add_ini_file(void *, const char *);
 +extern int fio_client_terminate(struct fio_client *);
 +extern void fio_clients_terminate(void);
 +extern struct fio_client *fio_get_client(struct fio_client *);
 +extern void fio_put_client(struct fio_client *);
 +extern int fio_client_update_options(struct fio_client *, struct thread_options *, uint64_t *);
 +extern int fio_client_wait_for_reply(struct fio_client *, uint64_t);
 +
 +#define FIO_CLIENT_DEF_ETA_MSEC               900
 +
 +enum {
 +      FIO_CLIENT_TYPE_CLI             = 1,
 +      FIO_CLIENT_TYPE_GUI             = 2,
 +};
 +
 +#endif
 +
diff --combined eta.c
index 600b046ced8c86e16ff90b1d2dd05ac59e637d5a,1f67301a902124e6b922ca980bc5796928525eb3..bcf0676a157d2094b2298bf69e8bffd96a322016
--- 1/eta.c
--- 2/eta.c
+++ b/eta.c
@@@ -78,6 -78,7 +78,7 @@@ static void check_str_update(struct thr
                c = 'C';
                break;
        case TD_INITIALIZED:
+       case TD_SETTING_UP:
                c = 'I';
                break;
        case TD_NOT_CREATED:
@@@ -93,7 -94,7 +94,7 @@@
  /*
   * Convert seconds to a printable string.
   */
 -static void eta_to_str(char *str, unsigned long eta_sec)
 +void eta_to_str(char *str, unsigned long eta_sec)
  {
        unsigned int d, h, m, s;
        int disp_hour = 0;
@@@ -296,29 -297,31 +297,31 @@@ int calc_thread_status(struct jobs_eta 
                    || td->runstate == TD_PRE_READING) {
                        je->nr_running++;
                        if (td_read(td)) {
 -                              je->t_rate += td->o.rate[DDIR_READ];
 -                              je->t_iops += td->o.rate_iops[DDIR_READ];
 -                              je->m_rate += td->o.ratemin[DDIR_READ];
 -                              je->m_iops += td->o.rate_iops_min[DDIR_READ];
 +                              je->t_rate[0] += td->o.rate[DDIR_READ];
 +                              je->t_iops[0] += td->o.rate_iops[DDIR_READ];
 +                              je->m_rate[0] += td->o.ratemin[DDIR_READ];
 +                              je->m_iops[0] += td->o.rate_iops_min[DDIR_READ];
                        }
                        if (td_write(td)) {
 -                              je->t_rate += td->o.rate[DDIR_WRITE];
 -                              je->t_iops += td->o.rate_iops[DDIR_WRITE];
 -                              je->m_rate += td->o.ratemin[DDIR_WRITE];
 -                              je->m_iops += td->o.rate_iops_min[DDIR_WRITE];
 +                              je->t_rate[1] += td->o.rate[DDIR_WRITE];
 +                              je->t_iops[1] += td->o.rate_iops[DDIR_WRITE];
 +                              je->m_rate[1] += td->o.ratemin[DDIR_WRITE];
 +                              je->m_iops[1] += td->o.rate_iops_min[DDIR_WRITE];
                        }
                        if (td_trim(td)) {
 -                              je->t_rate += td->o.rate[DDIR_TRIM];
 -                              je->t_iops += td->o.rate_iops[DDIR_TRIM];
 -                              je->m_rate += td->o.ratemin[DDIR_TRIM];
 -                              je->m_iops += td->o.rate_iops_min[DDIR_TRIM];
 +                              je->t_rate[2] += td->o.rate[DDIR_TRIM];
 +                              je->t_iops[2] += td->o.rate_iops[DDIR_TRIM];
 +                              je->m_rate[2] += td->o.ratemin[DDIR_TRIM];
 +                              je->m_iops[2] += td->o.rate_iops_min[DDIR_TRIM];
                        }
  
                        je->files_open += td->nr_open_files;
                } else if (td->runstate == TD_RAMP) {
                        je->nr_running++;
                        je->nr_ramp++;
-               } else if (td->runstate < TD_RUNNING)
+               } else if (td->runstate == TD_SETTING_UP)
+                       je->nr_running++;
+               else if (td->runstate < TD_RUNNING)
                        je->nr_pending++;
  
                if (je->elapsed_sec >= 3)
@@@ -401,19 -404,16 +404,19 @@@ void display_thread_status(struct jobs_
        }
  
        p += sprintf(p, "Jobs: %d (f=%d)", je->nr_running, je->files_open);
 -      if (je->m_rate || je->t_rate) {
 +      if (je->m_rate[0] || je->m_rate[1] || je->t_rate[0] || je->t_rate[1]) {
                char *tr, *mr;
  
 -              mr = num2str(je->m_rate, 4, 0, je->is_pow2);
 -              tr = num2str(je->t_rate, 4, 0, je->is_pow2);
 +              mr = num2str(je->m_rate[0] + je->m_rate[1], 4, 0, je->is_pow2);
 +              tr = num2str(je->t_rate[0] + je->t_rate[1], 4, 0, je->is_pow2);
                p += sprintf(p, ", CR=%s/%s KB/s", tr, mr);
                free(tr);
                free(mr);
 -      } else if (je->m_iops || je->t_iops)
 -              p += sprintf(p, ", CR=%d/%d IOPS", je->t_iops, je->m_iops);
 +      } else if (je->m_iops[0] || je->m_iops[1] || je->t_iops[0] || je->t_iops[1]) {
 +              p += sprintf(p, ", CR=%d/%d IOPS",
 +                                      je->t_iops[0] + je->t_iops[1],
 +                                      je->m_iops[0] + je->m_iops[1]);
 +      }
        if (je->eta_sec != INT_MAX && je->nr_running) {
                char perc_str[32];
                char *iops_str[DDIR_RWDIR_CNT];
diff --combined filesetup.c
index 79e29da872cb85ebbb8eab682c2f935c82ce0606,8636e16661b3bad68a70e28a6805e227c5e9e543..4a2383f1718ab390b9fc7ab4d3a05cb747d0ee7d
@@@ -12,6 -12,7 +12,7 @@@
  #include "smalloc.h"
  #include "filehash.h"
  #include "os/os.h"
+ #include "hash.h"
  
  #ifdef FIO_HAVE_LINUX_FALLOCATE
  #include <linux/falloc.h>
@@@ -94,9 -95,9 +95,9 @@@ static int extend_file(struct thread_da
  
                        r = fallocate(f->fd, FALLOC_FL_KEEP_SIZE, 0,
                                        f->real_file_size);
 -                      if (r != 0) {
 +                      if (r != 0)
                                td_verror(td, errno, "fallocate");
 -                      }
 +
                        break;
  #endif /* FIO_HAVE_LINUX_FALLOCATE */
                default:
@@@ -862,12 -863,50 +863,50 @@@ int pre_read_files(struct thread_data *
        return 1;
  }
  
 -              zipf_init(&f->zipf, nranges, td->o.zipf_theta, seed);
+ static int __init_rand_distribution(struct thread_data *td, struct fio_file *f)
+ {
+       unsigned int range_size, seed;
+       unsigned long nranges;
+       range_size = min(td->o.min_bs[DDIR_READ], td->o.min_bs[DDIR_WRITE]);
+       nranges = (f->real_file_size + range_size - 1) / range_size;
+       seed = jhash(f->file_name, strlen(f->file_name), 0) * td->thread_number;
+       if (td->o.random_distribution == FIO_RAND_DIST_ZIPF)
 -              pareto_init(&f->zipf, nranges, td->o.pareto_h, seed);
++              zipf_init(&f->zipf, nranges, td->o.zipf_theta.u.f, seed);
+       else
++              pareto_init(&f->zipf, nranges, td->o.pareto_h.u.f, seed);
+       return 1;
+ }
+ static int init_rand_distribution(struct thread_data *td)
+ {
+       struct fio_file *f;
+       unsigned int i;
+       int state;
+       if (td->o.random_distribution == FIO_RAND_DIST_RANDOM)
+               return 0;
+       state = td->runstate;
+       td_set_runstate(td, TD_SETTING_UP);
+       for_each_file(td, f, i)
+               __init_rand_distribution(td, f);
+       td_set_runstate(td, state);
+       return 1;
+ }
  int init_random_map(struct thread_data *td)
  {
        unsigned long long blocks, num_maps;
        struct fio_file *f;
        unsigned int i;
  
+       if (init_rand_distribution(td))
+               return 0;
        if (td->o.norandommap || !td_random(td))
                return 0;
  
diff --combined fio.1
index 13abb94fd27715cae1377c250d643ba6c3e58953,08d6c0f4ba3da66191ba855de21f60a4ed0b838c..8d3fedf79a8fe034d4fb23cedd2730209498a949
--- 1/fio.1
--- 2/fio.1
+++ b/fio.1
@@@ -639,6 -639,10 +639,10 @@@ is used for read vs write seperation
  Average bandwidth for \fBrate\fR and \fBratemin\fR over this number of
  milliseconds.  Default: 1000ms.
  .TP
+ .BI max_latency \fR=\fPint
+ If set, fio will exit the job if it exceeds this maximum latency. It will exit
+ with an ETIME error.
+ .TP
  .BI cpumask \fR=\fPint
  Set CPU affinity for this job. \fIint\fR is a bitmask of allowed CPUs the job
  may run on.  See \fBsched_setaffinity\fR\|(2).
  .BI cpus_allowed \fR=\fPstr
  Same as \fBcpumask\fR, but allows a comma-delimited list of CPU numbers.
  .TP
+ .BI numa_cpu_nodes \fR=\fPstr
+ Set this job running on spcified NUMA nodes' CPUs. The arguments allow
+ comma delimited list of cpu numbers, A-B ranges, or 'all'.
+ .TP
+ .BI numa_mem_policy \fR=\fPstr
+ Set this job's memory policy and corresponding NUMA nodes. Format of
+ the argements:
+ .RS
+ .TP
+ .B <mode>[:<nodelist>]
+ .TP
+ .B mode
+ is one of the following memory policy:
+ .TP
+ .B default, prefer, bind, interleave, local
+ .TP
+ .RE
+ For \fBdefault\fR and \fBlocal\fR memory policy, no \fBnodelist\fR is
+ needed to be specified. For \fBprefer\fR, only one node is
+ allowed. For \fBbind\fR and \fBinterleave\fR, \fBnodelist\fR allows
+ comma delimited list of numbers, A-B ranges, or 'all'.
+ .TP
  .BI startdelay \fR=\fPint
  Delay start of job for the specified number of seconds.
  .TP
@@@ -1048,12 -1074,6 +1074,12 @@@ Some parameters are only valid when a s
  used identically to normal parameters, with the caveat that when used on the
  command line, the must come after the ioengine that defines them is selected.
  .TP
 +.BI (cpu)cpuload \fR=\fPint
 +Attempt to use the specified percentage of CPU cycles.
 +.TP
 +.BI (cpu)cpuchunks \fR=\fPint
 +Split the load into cycles of the given time. In microseconds.
 +.TP
  .BI (libaio)userspace_reap
  Normally, with the libaio engine in use, fio will use
  the io_getevents system call to reap newly returned events.
diff --combined fio.h
index 7f11861d074bb6855d6690fc4c0a8b1f765ea7e9,f69de0d321bbcd6282c611f27e8b86483ff04b35..5022cdfd300b2cf48b26309ec20fca04d152206d
--- 1/fio.h
--- 2/fio.h
+++ b/fio.h
  struct thread_data;
  
  #include "compiler/compiler.h"
 +#include "thread_options.h"
  #include "flist.h"
  #include "fifo.h"
 -#include "rbtree.h"
 +#include "lib/rbtree.h"
  #include "arch/arch.h"
  #include "os/os.h"
  #include "mutex.h"
@@@ -37,7 -36,6 +37,7 @@@
  #include "gettime.h"
  #include "lib/getopt.h"
  #include "lib/rand.h"
 +#include "client.h"
  #include "server.h"
  #include "stat.h"
  #include "flow.h"
  #include <sys/asynch.h>
  #endif
  
 -/*
 - * What type of allocation to use for io buffers
 - */
 -enum fio_memtype {
 -      MEM_MALLOC = 0, /* ordinary malloc */
 -      MEM_SHM,        /* use shared memory segments */
 -      MEM_SHMHUGE,    /* use shared memory segments with huge pages */
 -      MEM_MMAP,       /* use anonynomous mmap */
 -      MEM_MMAPHUGE,   /* memory mapped huge file */
 -};
 -
+ #ifdef FIO_HAVE_LIBNUMA
+ #include <linux/mempolicy.h>
+ #include <numa.h>
+ /*
+  * "local" is pseudo-policy
+  */
+ #define MPOL_LOCAL MPOL_MAX
+ #endif
  /*
   * offset generator types
   */
@@@ -58,6 -77,236 +68,6 @@@ enum 
        RW_SEQ_IDENT,
  };
  
 -/*
 - * What type of errors to continue on when continue_on_error is used
 - */
 -enum error_type_bit {
 -      ERROR_TYPE_READ_BIT = 0,
 -      ERROR_TYPE_WRITE_BIT = 1,
 -      ERROR_TYPE_VERIFY_BIT = 2,
 -      ERROR_TYPE_CNT = 3,
 -};
 -
 -enum error_type {
 -        ERROR_TYPE_NONE = 0,
 -        ERROR_TYPE_READ = 1 << ERROR_TYPE_READ_BIT,
 -        ERROR_TYPE_WRITE = 1 << ERROR_TYPE_WRITE_BIT,
 -        ERROR_TYPE_VERIFY = 1 << ERROR_TYPE_VERIFY_BIT,
 -        ERROR_TYPE_ANY = 0xffff,
 -};
 -
 -struct bssplit {
 -      unsigned int bs;
 -      unsigned char perc;
 -};
 -
 -struct thread_options {
 -      int pad;
 -      char *description;
 -      char *name;
 -      char *directory;
 -      char *filename;
 -      char *opendir;
 -      char *ioengine;
 -      enum td_ddir td_ddir;
 -      unsigned int rw_seq;
 -      unsigned int kb_base;
 -      unsigned int ddir_seq_nr;
 -      long ddir_seq_add;
 -      unsigned int iodepth;
 -      unsigned int iodepth_low;
 -      unsigned int iodepth_batch;
 -      unsigned int iodepth_batch_complete;
 -
 -      unsigned long long size;
 -      unsigned int size_percent;
 -      unsigned int fill_device;
 -      unsigned long long file_size_low;
 -      unsigned long long file_size_high;
 -      unsigned long long start_offset;
 -
 -      unsigned int bs[DDIR_RWDIR_CNT];
 -      unsigned int ba[DDIR_RWDIR_CNT];
 -      unsigned int min_bs[DDIR_RWDIR_CNT];
 -      unsigned int max_bs[DDIR_RWDIR_CNT];
 -      struct bssplit *bssplit[DDIR_RWDIR_CNT];
 -      unsigned int bssplit_nr[DDIR_RWDIR_CNT];
 -
 -      int *ignore_error[ERROR_TYPE_CNT];
 -      unsigned int ignore_error_nr[ERROR_TYPE_CNT];
 -      unsigned int error_dump;
 -
 -      unsigned int nr_files;
 -      unsigned int open_files;
 -      enum file_lock_mode file_lock_mode;
 -      unsigned int lockfile_batch;
 -
 -      unsigned int odirect;
 -      unsigned int invalidate_cache;
 -      unsigned int create_serialize;
 -      unsigned int create_fsync;
 -      unsigned int create_on_open;
 -      unsigned int create_only;
 -      unsigned int end_fsync;
 -      unsigned int pre_read;
 -      unsigned int sync_io;
 -      unsigned int verify;
 -      unsigned int do_verify;
 -      unsigned int verifysort;
 -      unsigned int verify_interval;
 -      unsigned int verify_offset;
 -      char verify_pattern[MAX_PATTERN_SIZE];
 -      unsigned int verify_pattern_bytes;
 -      unsigned int verify_fatal;
 -      unsigned int verify_dump;
 -      unsigned int verify_async;
 -      unsigned long long verify_backlog;
 -      unsigned int verify_batch;
 -      unsigned int use_thread;
 -      unsigned int unlink;
 -      unsigned int do_disk_util;
 -      unsigned int override_sync;
 -      unsigned int rand_repeatable;
 -      unsigned int use_os_rand;
 -      unsigned int write_lat_log;
 -      unsigned int write_bw_log;
 -      unsigned int write_iops_log;
 -      unsigned int log_avg_msec;
 -      unsigned int norandommap;
 -      unsigned int softrandommap;
 -      unsigned int bs_unaligned;
 -      unsigned int fsync_on_close;
 -
 -      unsigned int random_distribution;
 -      double zipf_theta;
 -      double pareto_h;
 -
 -      unsigned int hugepage_size;
 -      unsigned int rw_min_bs;
 -      unsigned int thinktime;
 -      unsigned int thinktime_spin;
 -      unsigned int thinktime_blocks;
 -      unsigned int fsync_blocks;
 -      unsigned int fdatasync_blocks;
 -      unsigned int barrier_blocks;
 -      unsigned long long start_delay;
 -      unsigned long long timeout;
 -      unsigned long long ramp_time;
 -      unsigned int overwrite;
 -      unsigned int bw_avg_time;
 -      unsigned int iops_avg_time;
 -      unsigned int loops;
 -      unsigned long long zone_range;
 -      unsigned long long zone_size;
 -      unsigned long long zone_skip;
 -      enum fio_memtype mem_type;
 -      unsigned int mem_align;
 -
 -      unsigned int max_latency;
 -
 -      unsigned int stonewall;
 -      unsigned int new_group;
 -      unsigned int numjobs;
 -      os_cpu_mask_t cpumask;
 -      unsigned int cpumask_set;
 -      os_cpu_mask_t verify_cpumask;
 -      unsigned int verify_cpumask_set;
 -#ifdef FIO_HAVE_LIBNUMA
 -      struct bitmask *numa_cpunodesmask;
 -      unsigned int numa_cpumask_set;
 -      unsigned short numa_mem_mode;
 -      unsigned int numa_mem_prefer_node;
 -      struct bitmask *numa_memnodesmask;
 -      unsigned int numa_memmask_set;
 -#endif
 -      unsigned int iolog;
 -      unsigned int rwmixcycle;
 -      unsigned int rwmix[2];
 -      unsigned int nice;
 -      unsigned int file_service_type;
 -      unsigned int group_reporting;
 -      unsigned int fadvise_hint;
 -      enum fio_fallocate_mode fallocate_mode;
 -      unsigned int zero_buffers;
 -      unsigned int refill_buffers;
 -      unsigned int scramble_buffers;
 -      unsigned int compress_percentage;
 -      unsigned int compress_chunk;
 -      unsigned int time_based;
 -      unsigned int disable_lat;
 -      unsigned int disable_clat;
 -      unsigned int disable_slat;
 -      unsigned int disable_bw;
 -      unsigned int gtod_reduce;
 -      unsigned int gtod_cpu;
 -      unsigned int gtod_offload;
 -      enum fio_cs clocksource;
 -      unsigned int no_stall;
 -      unsigned int trim_percentage;
 -      unsigned int trim_batch;
 -      unsigned int trim_zero;
 -      unsigned long long trim_backlog;
 -      unsigned int clat_percentiles;
 -      unsigned int overwrite_plist;
 -      fio_fp64_t percentile_list[FIO_IO_U_LIST_MAX_LEN];
 -
 -      char *read_iolog_file;
 -      char *write_iolog_file;
 -      char *bw_log_file;
 -      char *lat_log_file;
 -      char *iops_log_file;
 -      char *replay_redirect;
 -
 -      /*
 -       * Pre-run and post-run shell
 -       */
 -      char *exec_prerun;
 -      char *exec_postrun;
 -
 -      unsigned int rate[DDIR_RWDIR_CNT];
 -      unsigned int ratemin[DDIR_RWDIR_CNT];
 -      unsigned int ratecycle;
 -      unsigned int rate_iops[DDIR_RWDIR_CNT];
 -      unsigned int rate_iops_min[DDIR_RWDIR_CNT];
 -
 -      char *ioscheduler;
 -
 -      /*
 -       * CPU "io" cycle burner
 -       */
 -      unsigned int cpuload;
 -      unsigned int cpucycle;
 -
 -      /*
 -       * I/O Error handling
 -       */
 -      enum error_type continue_on_error;
 -
 -      /*
 -       * Benchmark profile type
 -       */
 -      char *profile;
 -
 -      /*
 -       * blkio cgroup support
 -       */
 -      char *cgroup;
 -      unsigned int cgroup_weight;
 -      unsigned int cgroup_nodelete;
 -
 -      unsigned int uid;
 -      unsigned int gid;
 -
 -      int flow_id;
 -      int flow;
 -      int flow_watermark;
 -      unsigned int flow_sleep;
 -
 -      unsigned long long offset_increment;
 -
 -      unsigned int sync_file_range;
 -};
 -
  /*
   * This describes a single thread/process executing a fio job.
   */
@@@ -66,12 -315,10 +76,12 @@@ struct thread_data 
        void *eo;
        char verror[FIO_VERROR_SIZE];
        pthread_t thread;
 -      int thread_number;
 -      int groupid;
 +      unsigned int thread_number;
 +      unsigned int groupid;
        struct thread_stat ts;
  
 +      int client_type;
 +
        struct io_log *slat_log;
        struct io_log *clat_log;
        struct io_log *lat_log;
        size_t orig_buffer_size;
        volatile int terminate;
        volatile int runstate;
 -      unsigned int ioprio;
 -      unsigned int ioprio_set;
        unsigned int last_was_sync;
        enum fio_ddir last_ddir;
  
 -      char *mmapfile;
        int mmapfd;
  
        void *iolog_buf;
         */
        struct prof_io_ops prof_io_ops;
        void *prof_data;
 +
 +      void *pinned_mem;
  };
  
  /*
@@@ -310,10 -558,13 +320,11 @@@ enum 
  
  extern int exitall_on_terminate;
  extern unsigned int thread_number;
 -extern unsigned int nr_process, nr_thread;
+ extern unsigned int stat_number;
  extern int shm_id;
  extern int groupid;
  extern int output_format;
  extern int temp_stall_ts;
 -extern unsigned long long mlock_size;
  extern uintptr_t page_mask, page_size;
  extern int read_only;
  extern int eta_print;
@@@ -344,7 -595,7 +355,7 @@@ static inline void fio_ro_check(struct 
  
  #define REAL_MAX_JOBS         2048
  
- static inline enum error_type td_error_type(enum fio_ddir ddir, int err)
+ static inline enum error_type_bit td_error_type(enum fio_ddir ddir, int err)
  {
        if (err == EILSEQ)
                return ERROR_TYPE_VERIFY_BIT;
@@@ -394,10 -645,9 +405,10 @@@ static inline int should_fsync(struct t
  /*
   * Init/option functions
   */
 +extern int __must_check fio_init_options(void);
  extern int __must_check parse_options(int, char **);
 -extern int parse_jobs_ini(char *, int, int);
 -extern int parse_cmd_line(int, char **);
 +extern int parse_jobs_ini(char *, int, int, int);
 +extern int parse_cmd_line(int, char **, int);
  extern int fio_backend(void);
  extern void reset_fio_state(void);
  extern void clear_io_state(struct thread_data *);
@@@ -412,14 -662,10 +423,14 @@@ extern void fio_options_dup_and_init(st
  extern void fio_options_mem_dupe(struct thread_data *);
  extern void options_mem_dupe(void *data, struct fio_option *options);
  extern void td_fill_rand_seeds(struct thread_data *);
 -extern void add_job_opts(const char **);
 +extern void add_job_opts(const char **, int);
  extern char *num2str(unsigned long, int, int, int);
  extern int ioengine_load(struct thread_data *);
  
 +extern unsigned long page_mask;
 +extern unsigned long page_size;
 +extern int initialize_fio(char *envp[]);
 +
  #define FIO_GETOPT_JOB                0x89000000
  #define FIO_GETOPT_IOENGINE   0x98000000
  #define FIO_NR_OPTIONS                (FIO_MAX_OPTS + 128)
   */
  extern void print_thread_status(void);
  extern void print_status_init(int);
 +extern char *fio_uint_to_kmg(unsigned int val);
  
  /*
   * Thread life cycle. Once a thread has a runstate beyond TD_INITIALIZED, it
@@@ -441,6 -686,7 +452,7 @@@ enum 
        TD_NOT_CREATED = 0,
        TD_CREATED,
        TD_INITIALIZED,
+       TD_SETTING_UP,
        TD_RAMP,
        TD_RUNNING,
        TD_PRE_READING,
@@@ -457,11 -703,10 +469,11 @@@ extern void fio_terminate_threads(int)
  /*
   * Memory helpers
   */
 -extern int __must_check fio_pin_memory(void);
 -extern void fio_unpin_memory(void);
 +extern int __must_check fio_pin_memory(struct thread_data *);
 +extern void fio_unpin_memory(struct thread_data *);
  extern int __must_check allocate_io_mem(struct thread_data *);
  extern void free_io_mem(struct thread_data *);
 +extern void free_threads_shm(void);
  
  /*
   * Reset stats after ramp time completes
@@@ -570,12 -815,16 +582,18 @@@ static inline void td_io_u_free_notify(
  extern const char *fio_get_arch_string(int);
  extern const char *fio_get_os_string(int);
  
 +#define ARRAY_SIZE(x) (sizeof((x)) / (sizeof((x)[0])))
 +
  enum {
        FIO_OUTPUT_TERSE        = 0,
        FIO_OUTPUT_JSON,
        FIO_OUTPUT_NORMAL,
  };
  
+ enum {
+       FIO_RAND_DIST_RANDOM    = 0,
+       FIO_RAND_DIST_ZIPF,
+       FIO_RAND_DIST_PARETO,
+ };
  #endif
diff --combined init.c
index 488101bfa3b9098491fbac2dccb045951873615b,a682423e9801be03a120b6d038f0b8fd6c762c1c..bdee8a21bf8e43cbdba7b106a920b57e1cc6717d
--- 1/init.c
--- 2/init.c
+++ b/init.c
@@@ -38,6 -38,7 +38,6 @@@ struct thread_data *threads = NULL
  int exitall_on_terminate = 0;
  int output_format = FIO_OUTPUT_NORMAL;
  int eta_print;
 -unsigned long long mlock_size = 0;
  FILE *f_out = NULL;
  FILE *f_err = NULL;
  char **job_sections = NULL;
@@@ -209,7 -210,7 +209,7 @@@ static struct option l_opts[FIO_NR_OPTI
        },
  };
  
 -static void free_shm(void)
 +void free_threads_shm(void)
  {
        struct shmid_ds sbuf;
  
                void *tp = threads;
  
                threads = NULL;
 +              shmdt(tp);
 +              shmctl(shm_id, IPC_RMID, &sbuf);
 +              shm_id = -1;
 +      }
 +}
 +
 +void free_shm(void)
 +{
 +      if (threads) {
                file_hash_exit();
                flow_exit();
                fio_debug_jobp = NULL;
 -              shmdt(tp);
 -              shmctl(shm_id, IPC_RMID, &sbuf);
 +              free_threads_shm();
        }
  
        scleanup();
@@@ -324,6 -317,10 +324,10 @@@ static struct thread_data *get_new_job(
        profile_add_hooks(td);
  
        td->thread_number = thread_number;
+       if (!parent || !parent->o.group_reporting)
+               stat_number++;
        return td;
  }
  
@@@ -605,7 -602,7 +609,7 @@@ static int fixup_options(struct thread_
  /*
   * This function leaks the buffer
   */
 -static char *to_kmg(unsigned int val)
 +char *fio_uint_to_kmg(unsigned int val)
  {
        char *buf = malloc(32);
        char post[] = { 0, 'K', 'M', 'G', 'P', 'E', 0 };
@@@ -766,9 -763,11 +770,9 @@@ int ioengine_load(struct thread_data *t
   * to make sure we don't have conflicts, and initializes various
   * members of td.
   */
 -static int add_job(struct thread_data *td, const char *jobname, int job_add_num)
 +static int add_job(struct thread_data *td, const char *jobname, int job_add_num,
 +                 int recursed, int client_type)
  {
 -      const char *ddir_str[] = { NULL, "read", "write", "rw", NULL,
 -                                 "randread", "randwrite", "randrw",
 -                                 "trim", NULL, NULL, NULL, "randtrim" };
        unsigned int i;
        char fname[PATH_MAX];
        int numjobs, file_alloced;
                return 0;
        }
  
 +      td->client_type = client_type;
 +
        if (profile_td_init(td))
                goto err;
  
        if (ioengine_load(td))
                goto err;
  
 -      if (td->o.use_thread)
 -              nr_thread++;
 -      else
 -              nr_process++;
 -
        if (td->o.odirect)
                td->io_ops->flags |= FIO_RAWIO;
  
        if (setup_rate(td))
                goto err;
  
 -      if (td->o.write_lat_log) {
 -              setup_log(&td->lat_log, td->o.log_avg_msec);
 -              setup_log(&td->slat_log, td->o.log_avg_msec);
 -              setup_log(&td->clat_log, td->o.log_avg_msec);
 +      if (td->o.lat_log_file) {
 +              setup_log(&td->lat_log, td->o.log_avg_msec, IO_LOG_TYPE_LAT);
 +              setup_log(&td->slat_log, td->o.log_avg_msec, IO_LOG_TYPE_SLAT);
 +              setup_log(&td->clat_log, td->o.log_avg_msec, IO_LOG_TYPE_CLAT);
        }
 -      if (td->o.write_bw_log)
 -              setup_log(&td->bw_log, td->o.log_avg_msec);
 -      if (td->o.write_iops_log)
 -              setup_log(&td->iops_log, td->o.log_avg_msec);
 +      if (td->o.bw_log_file)
 +              setup_log(&td->bw_log, td->o.log_avg_msec, IO_LOG_TYPE_BW);
 +      if (td->o.iops_log_file)
 +              setup_log(&td->iops_log, td->o.log_avg_msec, IO_LOG_TYPE_IOPS);
  
        if (!td->o.name)
                td->o.name = strdup(jobname);
  
        if (output_format == FIO_OUTPUT_NORMAL) {
                if (!job_add_num) {
 -                      if (!strcmp(td->io_ops->name, "cpuio")) {
 -                              log_info("%s: ioengine=cpu, cpuload=%u,"
 -                                       " cpucycle=%u\n", td->o.name,
 -                                                      td->o.cpuload,
 -                                                      td->o.cpucycle);
 -                      } else {
 +                      if (is_backend && !recursed)
 +                              fio_server_send_add_job(td);
 +
 +                      if (!(td->io_ops->flags & FIO_NOIO)) {
                                char *c1, *c2, *c3, *c4, *c5, *c6;
  
 -                              c1 = to_kmg(td->o.min_bs[DDIR_READ]);
 -                              c2 = to_kmg(td->o.max_bs[DDIR_READ]);
 -                              c3 = to_kmg(td->o.min_bs[DDIR_WRITE]);
 -                              c4 = to_kmg(td->o.max_bs[DDIR_WRITE]);
 -                              c5 = to_kmg(td->o.min_bs[DDIR_TRIM]);
 -                              c6 = to_kmg(td->o.max_bs[DDIR_TRIM]);
 +                              c1 = fio_uint_to_kmg(td->o.min_bs[DDIR_READ]);
 +                              c2 = fio_uint_to_kmg(td->o.max_bs[DDIR_READ]);
 +                              c3 = fio_uint_to_kmg(td->o.min_bs[DDIR_WRITE]);
 +                              c4 = fio_uint_to_kmg(td->o.max_bs[DDIR_WRITE]);
 +                              c5 = fio_uint_to_kmg(td->o.min_bs[DDIR_TRIM]);
 +                              c6 = fio_uint_to_kmg(td->o.max_bs[DDIR_TRIM]);
  
                                log_info("%s: (g=%d): rw=%s, bs=%s-%s/%s-%s/%s-%s,"
                                         " ioengine=%s, iodepth=%u\n",
                                                td->o.name, td->groupid,
 -                                              ddir_str[td->o.td_ddir],
 +                                              ddir_str(td->o.td_ddir),
                                                c1, c2, c3, c4, c5, c6,
                                                td->io_ops->name,
                                                td->o.iodepth);
  
                job_add_num = numjobs - 1;
  
 -              if (add_job(td_new, jobname, job_add_num))
 +              if (add_job(td_new, jobname, job_add_num, 1, client_type))
                        goto err;
        }
  
@@@ -948,7 -952,7 +952,7 @@@ err
  /*
   * Parse as if 'o' was a command line
   */
 -void add_job_opts(const char **o)
 +void add_job_opts(const char **o, int client_type)
  {
        struct thread_data *td, *td_parent;
        int i, in_global = 1;
                if (!strncmp(o[i], "name", 4)) {
                        in_global = 0;
                        if (td)
 -                              add_job(td, jobname, 0);
 +                              add_job(td, jobname, 0, 0, client_type);
                        td = NULL;
                        sprintf(jobname, "%s", o[i] + 5);
                }
        }
  
        if (td)
 -              add_job(td, jobname, 0);
 +              add_job(td, jobname, 0, 0, client_type);
  }
  
  static int skip_this_section(const char *name)
@@@ -1017,7 -1021,7 +1021,7 @@@ static int is_empty_or_comment(char *li
  /*
   * This is our [ini] type file parser.
   */
 -int parse_jobs_ini(char *file, int is_buf, int stonewall_flag)
 +int parse_jobs_ini(char *file, int is_buf, int stonewall_flag, int type)
  {
        unsigned int global;
        struct thread_data *td;
                                for (i = 0; i < num_opts; i++)
                                        log_info("--%s ", opts[i]);
  
 -                      ret = add_job(td, name, 0);
 +                      ret = add_job(td, name, 0, 0, type);
                } else {
                        log_err("fio: job %s dropped\n", name);
                        put_job(td);
@@@ -1247,62 -1251,20 +1251,62 @@@ static void usage(const char *name
  
  #ifdef FIO_INC_DEBUG
  struct debug_level debug_levels[] = {
 -      { .name = "process",    .shift = FD_PROCESS, },
 -      { .name = "file",       .shift = FD_FILE, },
 -      { .name = "io",         .shift = FD_IO, },
 -      { .name = "mem",        .shift = FD_MEM, },
 -      { .name = "blktrace",   .shift = FD_BLKTRACE },
 -      { .name = "verify",     .shift = FD_VERIFY },
 -      { .name = "random",     .shift = FD_RANDOM },
 -      { .name = "parse",      .shift = FD_PARSE },
 -      { .name = "diskutil",   .shift = FD_DISKUTIL },
 -      { .name = "job",        .shift = FD_JOB },
 -      { .name = "mutex",      .shift = FD_MUTEX },
 -      { .name = "profile",    .shift = FD_PROFILE },
 -      { .name = "time",       .shift = FD_TIME },
 -      { .name = "net",        .shift = FD_NET },
 +      { .name = "process",
 +        .help = "Process creation/exit logging",
 +        .shift = FD_PROCESS,
 +      },
 +      { .name = "file",
 +        .help = "File related action logging",
 +        .shift = FD_FILE,
 +      },
 +      { .name = "io",
 +        .help = "IO and IO engine action logging (offsets, queue, completions, etc)",
 +        .shift = FD_IO,
 +      },
 +      { .name = "mem",
 +        .help = "Memory allocation/freeing logging",
 +        .shift = FD_MEM,
 +      },
 +      { .name = "blktrace",
 +        .help = "blktrace action logging",
 +        .shift = FD_BLKTRACE,
 +      },
 +      { .name = "verify",
 +        .help = "IO verification action logging",
 +        .shift = FD_VERIFY,
 +      },
 +      { .name = "random",
 +        .help = "Random generation logging",
 +        .shift = FD_RANDOM,
 +      },
 +      { .name = "parse",
 +        .help = "Parser logging",
 +        .shift = FD_PARSE,
 +      },
 +      { .name = "diskutil",
 +        .help = "Disk utility logging actions",
 +        .shift = FD_DISKUTIL,
 +      },
 +      { .name = "job",
 +        .help = "Logging related to creating/destroying jobs",
 +        .shift = FD_JOB,
 +      },
 +      { .name = "mutex",
 +        .help = "Mutex logging",
 +        .shift = FD_MUTEX
 +      },
 +      { .name = "profile",
 +        .help = "Logging related to profiles",
 +        .shift = FD_PROFILE,
 +      },
 +      { .name = "time",
 +        .help = "Logging related to time keeping functions",
 +        .shift = FD_TIME,
 +      },
 +      { .name = "net",
 +        .help = "Network logging",
 +        .shift = FD_NET,
 +      },
        { .name = NULL, },
  };
  
@@@ -1409,7 -1371,7 +1413,7 @@@ void parse_cmd_client(void *client, cha
        fio_client_add_cmd_option(client, opt);
  }
  
 -int parse_cmd_line(int argc, char *argv[])
 +int parse_cmd_line(int argc, char *argv[], int client_type)
  {
        struct thread_data *td = NULL;
        int c, ini_idx = 0, lidx, ret = 0, do_exit = 0, exit_val = 0;
                        char *val = optarg;
  
                        if (!strncmp(opt, "name", 4) && td) {
 -                              ret = add_job(td, td->o.name ?: "fio", 0);
 +                              ret = add_job(td, td->o.name ?: "fio", 0, 0, client_type);
                                if (ret)
                                        return 0;
                                td = NULL;
                                exit_val = 1;
                                break;
                        }
 -                      if (fio_client_add(optarg, &cur_client)) {
 +                      if (fio_client_add(&fio_client_ops, optarg, &cur_client)) {
                                log_err("fio: failed adding client %s\n", optarg);
                                do_exit++;
                                exit_val = 1;
  
        if (td) {
                if (!ret)
 -                      ret = add_job(td, td->o.name ?: "fio", 0);
 +                      ret = add_job(td, td->o.name ?: "fio", 0, 0, client_type);
        }
  
        while (!ret && optind < argc) {
        return ini_idx;
  }
  
 -int parse_options(int argc, char *argv[])
 +int fio_init_options(void)
  {
 -      int job_files, i;
 -
        f_out = stdout;
        f_err = stderr;
  
        if (fill_def_thread())
                return 1;
  
 -      job_files = parse_cmd_line(argc, argv);
 +      return 0;
 +}
 +
 +extern int fio_check_options(struct thread_options *);
 +
 +int parse_options(int argc, char *argv[])
 +{
 +      const int type = FIO_CLIENT_TYPE_CLI;
 +      int job_files, i;
 +
 +      if (fio_init_options())
 +              return 1;
 +      if (fio_test_cconv(&def_thread.o))
 +              log_err("fio: failed internal cconv test\n");
 +
 +      job_files = parse_cmd_line(argc, argv, type);
  
        if (job_files > 0) {
                for (i = 0; i < job_files; i++) {
                                        return 1;
                                free(ini_file[i]);
                        } else if (!is_backend) {
 -                              if (parse_jobs_ini(ini_file[i], 0, i))
 +                              if (parse_jobs_ini(ini_file[i], 0, i, type))
                                        return 1;
                                free(ini_file[i]);
                        }
  
        return 0;
  }
 +
 +void options_default_fill(struct thread_options *o)
 +{
 +      memcpy(o, &def_thread.o, sizeof(*o));
 +}
diff --combined io_u.c
index 347e1159fa5d5d69e48faebe31965ea508aef60c,d81fefdeefe2269e2be9df49d976cc7c0c801c22..dcb56f1a5a854b1cb5e0c924ffa67072cd8adee0
--- 1/io_u.c
--- 2/io_u.c
+++ b/io_u.c
@@@ -78,7 -78,7 +78,7 @@@ static void mark_random_map(struct thre
                                mask = -1UL;
                        else
                                mask = ((1UL << this_blocks) - 1) << bit;
 -      
 +
                        if (!(f->file_map[idx] & mask))
                                break;
  
@@@ -157,8 -157,8 +157,8 @@@ static int get_next_free_block(struct t
        return 1;
  }
  
- static int get_next_rand_offset(struct thread_data *td, struct fio_file *f,
-                               enum fio_ddir ddir, unsigned long long *b)
+ static int __get_next_rand_offset(struct thread_data *td, struct fio_file *f,
+                                 enum fio_ddir ddir, unsigned long long *b)
  {
        unsigned long long rmax, r, lastb;
        int loops = 5;
@@@ -234,6 -234,36 +234,36 @@@ ret
        return 0;
  }
  
+ static int __get_next_rand_offset_zipf(struct thread_data *td,
+                                      struct fio_file *f, enum fio_ddir ddir,
+                                      unsigned long long *b)
+ {
+       *b = zipf_next(&f->zipf);
+       return 0;
+ }
+ static int __get_next_rand_offset_pareto(struct thread_data *td,
+                                        struct fio_file *f, enum fio_ddir ddir,
+                                        unsigned long long *b)
+ {
+       *b = pareto_next(&f->zipf);
+       return 0;
+ }
+ static int get_next_rand_offset(struct thread_data *td, struct fio_file *f,
+                               enum fio_ddir ddir, unsigned long long *b)
+ {
+       if (td->o.random_distribution == FIO_RAND_DIST_RANDOM)
+               return __get_next_rand_offset(td, f, ddir, b);
+       else if (td->o.random_distribution == FIO_RAND_DIST_ZIPF)
+               return __get_next_rand_offset_zipf(td, f, ddir, b);
+       else if (td->o.random_distribution == FIO_RAND_DIST_PARETO)
+               return __get_next_rand_offset_pareto(td, f, ddir, b);
+       log_err("fio: unknown random distribution: %d\n", td->o.random_distribution);
+       return 1;
+ }
  static int get_next_rand_block(struct thread_data *td, struct fio_file *f,
                               enum fio_ddir ddir, unsigned long long *b)
  {
@@@ -310,7 -340,7 +340,7 @@@ static int get_next_block(struct thread
                        ret = 1;
                }
        }
 -      
 +
        if (!ret) {
                if (offset != -1ULL)
                        io_u->offset = offset;
@@@ -383,7 -413,7 +413,7 @@@ static inline int io_u_fits(struct thre
  static unsigned int __get_next_buflen(struct thread_data *td, struct io_u *io_u)
  {
        const int ddir = io_u->ddir;
-       unsigned int uninitialized_var(buflen);
+       unsigned int buflen = 0;
        unsigned int minbs, maxbs;
        unsigned long r, rand_max;
  
@@@ -1315,7 -1345,7 +1345,7 @@@ static void account_io_completion(struc
                                  struct io_completion_data *icd,
                                  const enum fio_ddir idx, unsigned int bytes)
  {
-       unsigned long uninitialized_var(lusec);
+       unsigned long lusec = 0;
  
        if (!td->o.disable_clat || !td->o.disable_bw)
                lusec = utime_since(&io_u->issue_time, &icd->time);
  
                tusec = utime_since(&io_u->start_time, &icd->time);
                add_lat_sample(td, idx, tusec, bytes);
+               if (td->o.max_latency && tusec > td->o.max_latency) {
+                       if (!td->error)
+                               log_err("fio: latency of %lu usec exceeds specified max (%u usec)\n", tusec, td->o.max_latency);
+                       td_verror(td, ETIMEDOUT, "max latency exceeded");
+                       icd->error = ETIMEDOUT;
+               }
        }
  
        if (!td->o.disable_clat) {
@@@ -1351,11 -1388,6 +1388,6 @@@ static long long usec_for_io(struct thr
  static void io_completed(struct thread_data *td, struct io_u *io_u,
                         struct io_completion_data *icd)
  {
-       /*
-        * Older gcc's are too dumb to realize that usec is always used
-        * initialized, silence that warning.
-        */
-       unsigned long uninitialized_var(usec);
        struct fio_file *f;
  
        dprint_io_u(io_u, "io complete");
diff --combined lib/rbtree.c
index 7cff649821e85260014efc0aca4916221575962b,0000000000000000000000000000000000000000..883bc7231d0905b114e45fb57c9a0b525e252b98
mode 100644,000000..100644
--- /dev/null
@@@ -1,302 -1,0 +1,333 @@@
 +/*
 +  Red Black Trees
 +  (C) 1999  Andrea Arcangeli <andrea@suse.de>
 +  (C) 2002  David Woodhouse <dwmw2@infradead.org>
 +  
 +  This program is free software; you can redistribute it and/or modify
 +  it under the terms of the GNU General Public License as published by
 +  the Free Software Foundation; either version 2 of the License, or
 +  (at your option) any later version.
 +
 +  This program is distributed in the hope that it will be useful,
 +  but WITHOUT ANY WARRANTY; without even the implied warranty of
 +  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 +  GNU General Public License for more details.
 +
 +  You should have received a copy of the GNU General Public License
 +  along with this program; if not, write to the Free Software
 +  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 +
 +  linux/lib/rbtree.c
 +*/
 +
 +#include "rbtree.h"
 +
 +static void __rb_rotate_left(struct rb_node *node, struct rb_root *root)
 +{
 +      struct rb_node *right = node->rb_right;
 +      struct rb_node *parent = rb_parent(node);
 +
 +      if ((node->rb_right = right->rb_left))
 +              rb_set_parent(right->rb_left, node);
 +      right->rb_left = node;
 +
 +      rb_set_parent(right, parent);
 +
 +      if (parent)
 +      {
 +              if (node == parent->rb_left)
 +                      parent->rb_left = right;
 +              else
 +                      parent->rb_right = right;
 +      }
 +      else
 +              root->rb_node = right;
 +      rb_set_parent(node, right);
 +}
 +
 +static void __rb_rotate_right(struct rb_node *node, struct rb_root *root)
 +{
 +      struct rb_node *left = node->rb_left;
 +      struct rb_node *parent = rb_parent(node);
 +
 +      if ((node->rb_left = left->rb_right))
 +              rb_set_parent(left->rb_right, node);
 +      left->rb_right = node;
 +
 +      rb_set_parent(left, parent);
 +
 +      if (parent)
 +      {
 +              if (node == parent->rb_right)
 +                      parent->rb_right = left;
 +              else
 +                      parent->rb_left = left;
 +      }
 +      else
 +              root->rb_node = left;
 +      rb_set_parent(node, left);
 +}
 +
 +void rb_insert_color(struct rb_node *node, struct rb_root *root)
 +{
 +      struct rb_node *parent, *gparent;
 +
 +      while ((parent = rb_parent(node)) && rb_is_red(parent))
 +      {
 +              gparent = rb_parent(parent);
 +
 +              if (parent == gparent->rb_left)
 +              {
 +                      {
 +                              register struct rb_node *uncle = gparent->rb_right;
 +                              if (uncle && rb_is_red(uncle))
 +                              {
 +                                      rb_set_black(uncle);
 +                                      rb_set_black(parent);
 +                                      rb_set_red(gparent);
 +                                      node = gparent;
 +                                      continue;
 +                              }
 +                      }
 +
 +                      if (parent->rb_right == node)
 +                      {
 +                              register struct rb_node *tmp;
 +                              __rb_rotate_left(parent, root);
 +                              tmp = parent;
 +                              parent = node;
 +                              node = tmp;
 +                      }
 +
 +                      rb_set_black(parent);
 +                      rb_set_red(gparent);
 +                      __rb_rotate_right(gparent, root);
 +              } else {
 +                      {
 +                              register struct rb_node *uncle = gparent->rb_left;
 +                              if (uncle && rb_is_red(uncle))
 +                              {
 +                                      rb_set_black(uncle);
 +                                      rb_set_black(parent);
 +                                      rb_set_red(gparent);
 +                                      node = gparent;
 +                                      continue;
 +                              }
 +                      }
 +
 +                      if (parent->rb_left == node)
 +                      {
 +                              register struct rb_node *tmp;
 +                              __rb_rotate_right(parent, root);
 +                              tmp = parent;
 +                              parent = node;
 +                              node = tmp;
 +                      }
 +
 +                      rb_set_black(parent);
 +                      rb_set_red(gparent);
 +                      __rb_rotate_left(gparent, root);
 +              }
 +      }
 +
 +      rb_set_black(root->rb_node);
 +}
 +
 +static void __rb_erase_color(struct rb_node *node, struct rb_node *parent,
 +                           struct rb_root *root)
 +{
 +      struct rb_node *other;
 +
 +      while ((!node || rb_is_black(node)) && node != root->rb_node)
 +      {
 +              if (parent->rb_left == node)
 +              {
 +                      other = parent->rb_right;
 +                      if (rb_is_red(other))
 +                      {
 +                              rb_set_black(other);
 +                              rb_set_red(parent);
 +                              __rb_rotate_left(parent, root);
 +                              other = parent->rb_right;
 +                      }
 +                      if ((!other->rb_left || rb_is_black(other->rb_left)) &&
 +                          (!other->rb_right || rb_is_black(other->rb_right)))
 +                      {
 +                              rb_set_red(other);
 +                              node = parent;
 +                              parent = rb_parent(node);
 +                      }
 +                      else
 +                      {
 +                              if (!other->rb_right || rb_is_black(other->rb_right))
 +                              {
 +                                      struct rb_node *o_left;
 +                                      if ((o_left = other->rb_left))
 +                                              rb_set_black(o_left);
 +                                      rb_set_red(other);
 +                                      __rb_rotate_right(other, root);
 +                                      other = parent->rb_right;
 +                              }
 +                              rb_set_color(other, rb_color(parent));
 +                              rb_set_black(parent);
 +                              if (other->rb_right)
 +                                      rb_set_black(other->rb_right);
 +                              __rb_rotate_left(parent, root);
 +                              node = root->rb_node;
 +                              break;
 +                      }
 +              }
 +              else
 +              {
 +                      other = parent->rb_left;
 +                      if (rb_is_red(other))
 +                      {
 +                              rb_set_black(other);
 +                              rb_set_red(parent);
 +                              __rb_rotate_right(parent, root);
 +                              other = parent->rb_left;
 +                      }
 +                      if ((!other->rb_left || rb_is_black(other->rb_left)) &&
 +                          (!other->rb_right || rb_is_black(other->rb_right)))
 +                      {
 +                              rb_set_red(other);
 +                              node = parent;
 +                              parent = rb_parent(node);
 +                      }
 +                      else
 +                      {
 +                              if (!other->rb_left || rb_is_black(other->rb_left))
 +                              {
 +                                      register struct rb_node *o_right;
 +                                      if ((o_right = other->rb_right))
 +                                              rb_set_black(o_right);
 +                                      rb_set_red(other);
 +                                      __rb_rotate_left(other, root);
 +                                      other = parent->rb_left;
 +                              }
 +                              rb_set_color(other, rb_color(parent));
 +                              rb_set_black(parent);
 +                              if (other->rb_left)
 +                                      rb_set_black(other->rb_left);
 +                              __rb_rotate_right(parent, root);
 +                              node = root->rb_node;
 +                              break;
 +                      }
 +              }
 +      }
 +      if (node)
 +              rb_set_black(node);
 +}
 +
 +void rb_erase(struct rb_node *node, struct rb_root *root)
 +{
 +      struct rb_node *child, *parent;
 +      int color;
 +
 +      if (!node->rb_left)
 +              child = node->rb_right;
 +      else if (!node->rb_right)
 +              child = node->rb_left;
 +      else
 +      {
 +              struct rb_node *old = node, *left;
 +
 +              node = node->rb_right;
 +              while ((left = node->rb_left) != NULL)
 +                      node = left;
 +              child = node->rb_right;
 +              parent = rb_parent(node);
 +              color = rb_color(node);
 +
 +              if (child)
 +                      rb_set_parent(child, parent);
 +              if (parent == old) {
 +                      parent->rb_right = child;
 +                      parent = node;
 +              } else
 +                      parent->rb_left = child;
 +
 +              node->rb_parent_color = old->rb_parent_color;
 +              node->rb_right = old->rb_right;
 +              node->rb_left = old->rb_left;
 +
 +              if (rb_parent(old))
 +              {
 +                      if (rb_parent(old)->rb_left == old)
 +                              rb_parent(old)->rb_left = node;
 +                      else
 +                              rb_parent(old)->rb_right = node;
 +              } else
 +                      root->rb_node = node;
 +
 +              rb_set_parent(old->rb_left, node);
 +              if (old->rb_right)
 +                      rb_set_parent(old->rb_right, node);
 +              goto color;
 +      }
 +
 +      parent = rb_parent(node);
 +      color = rb_color(node);
 +
 +      if (child)
 +              rb_set_parent(child, parent);
 +      if (parent)
 +      {
 +              if (parent->rb_left == node)
 +                      parent->rb_left = child;
 +              else
 +                      parent->rb_right = child;
 +      }
 +      else
 +              root->rb_node = child;
 +
 + color:
 +      if (color == RB_BLACK)
 +              __rb_erase_color(child, parent, root);
 +}
 +
 +/*
 + * This function returns the first node (in sort order) of the tree.
 + */
 +struct rb_node *rb_first(struct rb_root *root)
 +{
 +      struct rb_node  *n;
 +
 +      n = root->rb_node;
 +      if (!n)
 +              return NULL;
 +      while (n->rb_left)
 +              n = n->rb_left;
 +      return n;
 +}
++
++struct rb_node *rb_next(const struct rb_node *node)
++{
++      struct rb_node *parent;
++
++      if (RB_EMPTY_NODE(node))
++              return NULL;
++
++      /*
++       * If we have a right-hand child, go down and then left as far
++       * as we can.
++       */
++      if (node->rb_right) {
++              node = node->rb_right; 
++              while (node->rb_left)
++                      node=node->rb_left;
++              return (struct rb_node *)node;
++      }
++
++      /*
++       * No right-hand children. Everything down and left is smaller than us,
++       * so any 'next' node must be in the general direction of our parent.
++       * Go up the tree; any time the ancestor is a right-hand child of its
++       * parent, keep going up. First time it's a left-hand child of its
++       * parent, said parent is our 'next' node.
++       */
++      while ((parent = rb_parent(node)) && node == parent->rb_right)
++              node = parent;
++
++      return parent;
++}
diff --combined lib/rbtree.h
index 7563725e51a90e37cff54e81c546ed30b151b28f,0000000000000000000000000000000000000000..c6cfe4a9384d8041c978d7cef4b31cc587125e65
mode 100644,000000..100644
--- /dev/null
@@@ -1,154 -1,0 +1,155 @@@
 +/*
 +  Red Black Trees
 +  (C) 1999  Andrea Arcangeli <andrea@suse.de>
 +  
 +  This program is free software; you can redistribute it and/or modify
 +  it under the terms of the GNU General Public License as published by
 +  the Free Software Foundation; either version 2 of the License, or
 +  (at your option) any later version.
 +
 +  This program is distributed in the hope that it will be useful,
 +  but WITHOUT ANY WARRANTY; without even the implied warranty of
 +  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 +  GNU General Public License for more details.
 +
 +  You should have received a copy of the GNU General Public License
 +  along with this program; if not, write to the Free Software
 +  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 +
 +  linux/include/linux/rbtree.h
 +
 +  To use rbtrees you'll have to implement your own insert and search cores.
 +  This will avoid us to use callbacks and to drop drammatically performances.
 +  I know it's not the cleaner way,  but in C (not in C++) to get
 +  performances and genericity...
 +
 +  Some example of insert and search follows here. The search is a plain
 +  normal search over an ordered tree. The insert instead must be implemented
 +  int two steps: as first thing the code must insert the element in
 +  order as a red leaf in the tree, then the support library function
 +  rb_insert_color() must be called. Such function will do the
 +  not trivial work to rebalance the rbtree if necessary.
 +
 +-----------------------------------------------------------------------
 +static inline struct page * rb_search_page_cache(struct inode * inode,
 +                                               unsigned long offset)
 +{
 +      struct rb_node * n = inode->i_rb_page_cache.rb_node;
 +      struct page * page;
 +
 +      while (n)
 +      {
 +              page = rb_entry(n, struct page, rb_page_cache);
 +
 +              if (offset < page->offset)
 +                      n = n->rb_left;
 +              else if (offset > page->offset)
 +                      n = n->rb_right;
 +              else
 +                      return page;
 +      }
 +      return NULL;
 +}
 +
 +static inline struct page * __rb_insert_page_cache(struct inode * inode,
 +                                                 unsigned long offset,
 +                                                 struct rb_node * node)
 +{
 +      struct rb_node ** p = &inode->i_rb_page_cache.rb_node;
 +      struct rb_node * parent = NULL;
 +      struct page * page;
 +
 +      while (*p)
 +      {
 +              parent = *p;
 +              page = rb_entry(parent, struct page, rb_page_cache);
 +
 +              if (offset < page->offset)
 +                      p = &(*p)->rb_left;
 +              else if (offset > page->offset)
 +                      p = &(*p)->rb_right;
 +              else
 +                      return page;
 +      }
 +
 +      rb_link_node(node, parent, p);
 +
 +      return NULL;
 +}
 +
 +static inline struct page * rb_insert_page_cache(struct inode * inode,
 +                                               unsigned long offset,
 +                                               struct rb_node * node)
 +{
 +      struct page * ret;
 +      if ((ret = __rb_insert_page_cache(inode, offset, node)))
 +              goto out;
 +      rb_insert_color(node, &inode->i_rb_page_cache);
 + out:
 +      return ret;
 +}
 +-----------------------------------------------------------------------
 +*/
 +
 +#ifndef       _LINUX_RBTREE_H
 +#define       _LINUX_RBTREE_H
 +
 +#include <stdlib.h>
 +#include <inttypes.h>
 +
 +struct rb_node
 +{
 +      intptr_t rb_parent_color;
 +#define       RB_RED          0
 +#define       RB_BLACK        1
 +      struct rb_node *rb_right;
 +      struct rb_node *rb_left;
 +} __attribute__((aligned(sizeof(long))));
 +    /* The alignment might seem pointless, but allegedly CRIS needs it */
 +
 +struct rb_root
 +{
 +      struct rb_node *rb_node;
 +};
 +
 +
 +#define rb_parent(r)   ((struct rb_node *)((r)->rb_parent_color & ~3))
 +#define rb_color(r)   ((r)->rb_parent_color & 1)
 +#define rb_is_red(r)   (!rb_color(r))
 +#define rb_is_black(r) rb_color(r)
 +#define rb_set_red(r)  do { (r)->rb_parent_color &= ~1; } while (0)
 +#define rb_set_black(r)  do { (r)->rb_parent_color |= 1; } while (0)
 +
 +static inline void rb_set_parent(struct rb_node *rb, struct rb_node *p)
 +{
 +      rb->rb_parent_color = (rb->rb_parent_color & 3) | (uintptr_t)p;
 +}
 +static inline void rb_set_color(struct rb_node *rb, int color)
 +{
 +      rb->rb_parent_color = (rb->rb_parent_color & ~1) | color;
 +}
 +
 +#define RB_ROOT       (struct rb_root) { NULL, }
 +#define       rb_entry(ptr, type, member) container_of(ptr, type, member)
 +
 +#define RB_EMPTY_ROOT(root)   ((root)->rb_node == NULL)
 +#define RB_EMPTY_NODE(node)   (rb_parent(node) == node)
 +#define RB_CLEAR_NODE(node)   (rb_set_parent(node, node))
 +
 +extern void rb_insert_color(struct rb_node *, struct rb_root *);
 +extern void rb_erase(struct rb_node *, struct rb_root *);
 +
 +/* Find logical next and previous nodes in a tree */
 +extern struct rb_node *rb_first(struct rb_root *);
++extern struct rb_node *rb_next(const struct rb_node *);
 +
 +static inline void rb_link_node(struct rb_node * node, struct rb_node * parent,
 +                              struct rb_node ** rb_link)
 +{
 +      node->rb_parent_color = (uintptr_t)parent;
 +      node->rb_left = node->rb_right = NULL;
 +
 +      *rb_link = node;
 +}
 +
 +#endif        /* _LINUX_RBTREE_H */
diff --combined options.c
index eb7c596a1b96d04f3e2d387fd3ee0eefc337cd66,ae8598825e048a783da985bf9c6d89b8fb2530a2..9ee606037402a0d855093d88a51f017acc8f75fb
+++ b/options.c
@@@ -37,7 -37,7 +37,7 @@@ static int converthexchartoint(char a
  {
        int base;
  
 -      switch(a) {
 +      switch (a) {
        case '0'...'9':
                base = '0';
                break;
@@@ -50,7 -50,7 +50,7 @@@
        default:
                base = 0;
        }
 -      return (a - base);
 +      return a - base;
  }
  
  static int bs_cmp(const void *p1, const void *p2)
@@@ -61,7 -61,7 +61,7 @@@
        return bsp1->perc < bsp2->perc;
  }
  
 -static int bssplit_ddir(struct thread_data *td, int ddir, char *str)
 +static int bssplit_ddir(struct thread_options *o, int ddir, char *str)
  {
        struct bssplit *bssplit;
        unsigned int i, perc, perc_missing;
@@@ -69,7 -69,7 +69,7 @@@
        long long val;
        char *fname;
  
 -      td->o.bssplit_nr[ddir] = 4;
 +      o->bssplit_nr[ddir] = 4;
        bssplit = malloc(4 * sizeof(struct bssplit));
  
        i = 0;
@@@ -84,9 -84,9 +84,9 @@@
                /*
                 * grow struct buffer, if needed
                 */
 -              if (i == td->o.bssplit_nr[ddir]) {
 -                      td->o.bssplit_nr[ddir] <<= 1;
 -                      bssplit = realloc(bssplit, td->o.bssplit_nr[ddir]
 +              if (i == o->bssplit_nr[ddir]) {
 +                      o->bssplit_nr[ddir] <<= 1;
 +                      bssplit = realloc(bssplit, o->bssplit_nr[ddir]
                                                  * sizeof(struct bssplit));
                }
  
                } else
                        perc = -1;
  
 -              if (str_to_decimal(fname, &val, 1, td)) {
 +              if (str_to_decimal(fname, &val, 1, o)) {
                        log_err("fio: bssplit conversion failed\n");
 -                      free(td->o.bssplit);
 +                      free(o->bssplit);
                        return 1;
                }
  
                i++;
        }
  
 -      td->o.bssplit_nr[ddir] = i;
 +      o->bssplit_nr[ddir] = i;
  
        /*
         * Now check if the percentages add up, and how much is missing
         */
        perc = perc_missing = 0;
 -      for (i = 0; i < td->o.bssplit_nr[ddir]; i++) {
 +      for (i = 0; i < o->bssplit_nr[ddir]; i++) {
                struct bssplit *bsp = &bssplit[i];
  
                if (bsp->perc == (unsigned char) -1)
         * them.
         */
        if (perc_missing) {
 -              for (i = 0; i < td->o.bssplit_nr[ddir]; i++) {
 +              for (i = 0; i < o->bssplit_nr[ddir]; i++) {
                        struct bssplit *bsp = &bssplit[i];
  
                        if (bsp->perc == (unsigned char) -1)
                }
        }
  
 -      td->o.min_bs[ddir] = min_bs;
 -      td->o.max_bs[ddir] = max_bs;
 +      o->min_bs[ddir] = min_bs;
 +      o->max_bs[ddir] = max_bs;
  
        /*
         * now sort based on percentages, for ease of lookup
         */
 -      qsort(bssplit, td->o.bssplit_nr[ddir], sizeof(struct bssplit), bs_cmp);
 -      td->o.bssplit[ddir] = bssplit;
 +      qsort(bssplit, o->bssplit_nr[ddir], sizeof(struct bssplit), bs_cmp);
 +      o->bssplit[ddir] = bssplit;
        return 0;
 -
  }
  
  static int str_bssplit_cb(void *data, const char *input)
        if (odir) {
                ddir = strchr(odir + 1, ',');
                if (ddir) {
 -                      ret = bssplit_ddir(td, DDIR_TRIM, ddir + 1);
 +                      ret = bssplit_ddir(&td->o, DDIR_TRIM, ddir + 1);
                        if (!ret)
                                *ddir = '\0';
                } else {
                        char *op;
  
                        op = strdup(odir + 1);
 -                      ret = bssplit_ddir(td, DDIR_TRIM, op);
 +                      ret = bssplit_ddir(&td->o, DDIR_TRIM, op);
  
                        free(op);
                }
 -              if (!ret) 
 -                      ret = bssplit_ddir(td, DDIR_WRITE, odir + 1);
 +              if (!ret)
 +                      ret = bssplit_ddir(&td->o, DDIR_WRITE, odir + 1);
                if (!ret) {
                        *odir = '\0';
 -                      ret = bssplit_ddir(td, DDIR_READ, str);
 +                      ret = bssplit_ddir(&td->o, DDIR_READ, str);
                }
        } else {
                char *op;
  
                op = strdup(str);
 -              ret = bssplit_ddir(td, DDIR_WRITE, op);
 +              ret = bssplit_ddir(&td->o, DDIR_WRITE, op);
                free(op);
  
                if (!ret) {
                        op = strdup(str);
 -                      ret = bssplit_ddir(td, DDIR_TRIM, op);
 +                      ret = bssplit_ddir(&td->o, DDIR_TRIM, op);
                        free(op);
                }
 -              ret = bssplit_ddir(td, DDIR_READ, str);
 +              ret = bssplit_ddir(&td->o, DDIR_READ, str);
        }
  
        free(p);
  
  static int str2error(char *str)
  {
 -      const char * err[] = {"EPERM", "ENOENT", "ESRCH", "EINTR", "EIO",
 +      const char *err[] = { "EPERM", "ENOENT", "ESRCH", "EINTR", "EIO",
                            "ENXIO", "E2BIG", "ENOEXEC", "EBADF",
                            "ECHILD", "EAGAIN", "ENOMEM", "EACCES",
                            "EFAULT", "ENOTBLK", "EBUSY", "EEXIST",
                            "EXDEV", "ENODEV", "ENOTDIR", "EISDIR",
                            "EINVAL", "ENFILE", "EMFILE", "ENOTTY",
                            "ETXTBSY","EFBIG", "ENOSPC", "ESPIPE",
 -                          "EROFS","EMLINK", "EPIPE", "EDOM", "ERANGE"};
 +                          "EROFS","EMLINK", "EPIPE", "EDOM", "ERANGE" };
        int i = 0, num = sizeof(err) / sizeof(void *);
  
 -      whilei < num) {
 +      while (i < num) {
                if (!strcmp(err[i], str))
                        return i + 1;
                i++;
@@@ -311,27 -312,26 +311,27 @@@ static int str_ignore_error_cb(void *da
  static int str_rw_cb(void *data, const char *str)
  {
        struct thread_data *td = data;
 +      struct thread_options *o = &td->o;
        char *nr = get_opt_postfix(str);
  
 -      td->o.ddir_seq_nr = 1;
 -      td->o.ddir_seq_add = 0;
 +      o->ddir_seq_nr = 1;
 +      o->ddir_seq_add = 0;
  
        if (!nr)
                return 0;
  
        if (td_random(td))
 -              td->o.ddir_seq_nr = atoi(nr);
 +              o->ddir_seq_nr = atoi(nr);
        else {
                long long val;
  
 -              if (str_to_decimal(nr, &val, 1, td)) {
 +              if (str_to_decimal(nr, &val, 1, o)) {
                        log_err("fio: rw postfix parsing failed\n");
                        free(nr);
                        return 1;
                }
  
 -              td->o.ddir_seq_add = val;
 +              o->ddir_seq_add = val;
        }
  
        free(nr);
  static int str_mem_cb(void *data, const char *mem)
  {
        struct thread_data *td = data;
 +      struct thread_options *o = &td->o;
  
 -      if (td->o.mem_type == MEM_MMAPHUGE || td->o.mem_type == MEM_MMAP) {
 -              td->mmapfile = get_opt_postfix(mem);
 -              if (td->o.mem_type == MEM_MMAPHUGE && !td->mmapfile) {
 +      if (o->mem_type == MEM_MMAPHUGE || o->mem_type == MEM_MMAP) {
 +              o->mmapfile = get_opt_postfix(mem);
 +              if (o->mem_type == MEM_MMAPHUGE && !o->mmapfile) {
                        log_err("fio: mmaphuge:/path/to/file\n");
                        return 1;
                }
        return 0;
  }
  
 -static int str_verify_cb(void *data, const char *mem)
 -{
 -      struct thread_data *td = data;
 -
 -      if (td->o.verify == VERIFY_CRC32C_INTEL ||
 -          td->o.verify == VERIFY_CRC32C) {
 -              crc32c_intel_probe();
 -      }
 -
 -      return 0;
 -}
 -
  static int fio_clock_source_cb(void *data, const char *str)
  {
        struct thread_data *td = data;
        return 0;
  }
  
 -static int str_lockmem_cb(void fio_unused *data, unsigned long long *val)
 -{
 -      mlock_size = *val;
 -      return 0;
 -}
 -
  static int str_rwmix_read_cb(void *data, unsigned long long *val)
  {
        struct thread_data *td = data;
@@@ -381,6 -398,40 +381,6 @@@ static int str_rwmix_write_cb(void *dat
        return 0;
  }
  
 -#ifdef FIO_HAVE_IOPRIO
 -static int str_prioclass_cb(void *data, unsigned long long *val)
 -{
 -      struct thread_data *td = data;
 -      unsigned short mask;
 -
 -      /*
 -       * mask off old class bits, str_prio_cb() may have set a default class
 -       */
 -      mask = (1 << IOPRIO_CLASS_SHIFT) - 1;
 -      td->ioprio &= mask;
 -
 -      td->ioprio |= *val << IOPRIO_CLASS_SHIFT;
 -      td->ioprio_set = 1;
 -      return 0;
 -}
 -
 -static int str_prio_cb(void *data, unsigned long long *val)
 -{
 -      struct thread_data *td = data;
 -
 -      td->ioprio |= *val;
 -
 -      /*
 -       * If no class is set, assume BE
 -       */
 -      if ((td->ioprio >> IOPRIO_CLASS_SHIFT) == 0)
 -              td->ioprio |= IOPRIO_CLASS_BE << IOPRIO_CLASS_SHIFT;
 -
 -      td->ioprio_set = 1;
 -      return 0;
 -}
 -#endif
 -
  static int str_exitall_cb(void)
  {
        exitall_on_terminate = 1;
@@@ -513,6 -564,140 +513,130 @@@ static int str_verify_cpus_allowed_cb(v
  }
  #endif
  
 -#ifdef FIO_HAVE_TRIM
 -static int str_verify_trim_cb(void *data, unsigned long long *val)
 -{
 -      struct thread_data *td = data;
 -
 -      td->o.trim_percentage = *val;
 -      return 0;
 -}
 -#endif
 -
+ #ifdef FIO_HAVE_LIBNUMA
+ static int str_numa_cpunodes_cb(void *data, char *input)
+ {
+       struct thread_data *td = data;
+       /* numa_parse_nodestring() parses a character string list
+        * of nodes into a bit mask. The bit mask is allocated by
+        * numa_allocate_nodemask(), so it should be freed by
+        * numa_free_nodemask().
+        */
+       td->o.numa_cpunodesmask = numa_parse_nodestring(input);
+       if (td->o.numa_cpunodesmask == NULL) {
+               log_err("fio: numa_parse_nodestring failed\n");
+               td_verror(td, 1, "str_numa_cpunodes_cb");
+               return 1;
+       }
+       td->o.numa_cpumask_set = 1;
+       return 0;
+ }
+ static int str_numa_mpol_cb(void *data, char *input)
+ {
+       struct thread_data *td = data;
+       const char * const policy_types[] =
+               { "default", "prefer", "bind", "interleave", "local" };
+       int i;
+       char *nodelist = strchr(input, ':');
+       if (nodelist) {
+               /* NUL-terminate mode */
+               *nodelist++ = '\0';
+       }
+       for (i = 0; i <= MPOL_LOCAL; i++) {
+               if (!strcmp(input, policy_types[i])) {
+                       td->o.numa_mem_mode = i;
+                       break;
+               }
+       }
+       if (i > MPOL_LOCAL) {
+               log_err("fio: memory policy should be: default, prefer, bind, interleave, local\n");
+               goto out;
+       }
+       switch (td->o.numa_mem_mode) {
+       case MPOL_PREFERRED:
+               /*
+                * Insist on a nodelist of one node only
+                */
+               if (nodelist) {
+                       char *rest = nodelist;
+                       while (isdigit(*rest))
+                               rest++;
+                       if (*rest) {
+                               log_err("fio: one node only for \'prefer\'\n");
+                               goto out;
+                       }
+               } else {
+                       log_err("fio: one node is needed for \'prefer\'\n");
+                       goto out;
+               }
+               break;
+       case MPOL_INTERLEAVE:
+               /*
+                * Default to online nodes with memory if no nodelist
+                */
+               if (!nodelist)
+                       nodelist = strdup("all");
+               break;
+       case MPOL_LOCAL:
+       case MPOL_DEFAULT:
+               /*
+                * Don't allow a nodelist
+                */
+               if (nodelist) {
+                       log_err("fio: NO nodelist for \'local\'\n");
+                       goto out;
+               }
+               break;
+       case MPOL_BIND:
+               /*
+                * Insist on a nodelist
+                */
+               if (!nodelist) {
+                       log_err("fio: a nodelist is needed for \'bind\'\n");
+                       goto out;
+               }
+               break;
+       }
+       /* numa_parse_nodestring() parses a character string list
+        * of nodes into a bit mask. The bit mask is allocated by
+        * numa_allocate_nodemask(), so it should be freed by
+        * numa_free_nodemask().
+        */
+       switch (td->o.numa_mem_mode) {
+       case MPOL_PREFERRED:
+               td->o.numa_mem_prefer_node = atoi(nodelist);
+               break;
+       case MPOL_INTERLEAVE:
+       case MPOL_BIND:
+               td->o.numa_memnodesmask = numa_parse_nodestring(nodelist);
+               if (td->o.numa_memnodesmask == NULL) {
+                       log_err("fio: numa_parse_nodestring failed\n");
+                       td_verror(td, 1, "str_numa_memnodes_cb");
+                       return 1;
+               }
+               break;
+       case MPOL_LOCAL:
+       case MPOL_DEFAULT:
+       default:
+               break;
+       }
+       td->o.numa_memmask_set = 1;
+       return 0;
+ out:
+       return 1;
+ }
+ #endif
  static int str_fst_cb(void *data, const char *str)
  {
        struct thread_data *td = data;
@@@ -543,6 -728,84 +667,45 @@@ static int str_sfr_cb(void *data, cons
  }
  #endif
  
 -              td->o.zipf_theta = val;
+ static int str_random_distribution_cb(void *data, const char *str)
+ {
+       struct thread_data *td = data;
+       double val;
+       char *nr;
+       if (td->o.random_distribution == FIO_RAND_DIST_ZIPF)
+               val = 1.1;
+       else if (td->o.random_distribution == FIO_RAND_DIST_PARETO)
+               val = 0.2;
+       else
+               return 0;
+       nr = get_opt_postfix(str);
+       if (nr && !str_to_float(nr, &val)) {
+               log_err("fio: random postfix parsing failed\n");
+               free(nr);
+               return 1;
+       }
+       free(nr);
+       if (td->o.random_distribution == FIO_RAND_DIST_ZIPF) {
+               if (val == 1.00) {
+                       log_err("fio: zipf theta must different than 1.0\n");
+                       return 1;
+               }
 -              td->o.pareto_h = val;
 -      }
 -
 -      return 0;
 -}
 -
 -static int check_dir(struct thread_data *td, char *fname)
 -{
 -#if 0
 -      char file[PATH_MAX], *dir;
 -      int elen = 0;
 -
 -      if (td->o.directory) {
 -              strcpy(file, td->o.directory);
 -              strcat(file, "/");
 -              elen = strlen(file);
 -      }
 -
 -      sprintf(file + elen, "%s", fname);
 -      dir = dirname(file);
 -
 -      {
 -      struct stat sb;
 -      /*
 -       * We can't do this on FIO_DISKLESSIO engines. The engine isn't loaded
 -       * yet, so we can't do this check right here...
 -       */
 -      if (lstat(dir, &sb) < 0) {
 -              int ret = errno;
 -
 -              log_err("fio: %s is not a directory\n", dir);
 -              td_verror(td, ret, "lstat");
 -              return 1;
 -      }
 -
 -      if (!S_ISDIR(sb.st_mode)) {
 -              log_err("fio: %s is not a directory\n", dir);
 -              return 1;
 -      }
++              td->o.zipf_theta.u.f = val;
+       } else {
+               if (val <= 0.00 || val >= 1.00) {
+                       log_err("fio: pareto input out of range (0 < input < 1.0)\n");
+                       return 1;
+               }
 -#endif
++              td->o.pareto_h.u.f = val;
+       }
+       return 0;
+ }
  /*
   * Return next file in the string. Files are separated with ':'. If the ':'
   * is escaped with a '\', then that ':' is part of the filename and does not
@@@ -605,6 -868,10 +768,6 @@@ static int str_filename_cb(void *data, 
        while ((fname = get_next_file_name(&str)) != NULL) {
                if (!strlen(fname))
                        break;
 -              if (check_dir(td, fname)) {
 -                      free(p);
 -                      return 1;
 -              }
                add_file(td, fname);
                td->o.nr_files++;
        }
@@@ -643,12 -910,25 +806,12 @@@ static int str_opendir_cb(void *data, c
        return add_dir_files(td, td->o.opendir);
  }
  
 -static int str_verify_offset_cb(void *data, unsigned long long *off)
 -{
 -      struct thread_data *td = data;
 -
 -      if (*off && *off < sizeof(struct verify_header)) {
 -              log_err("fio: verify_offset too small\n");
 -              return 1;
 -      }
 -
 -      td->o.verify_offset = *off;
 -      return 0;
 -}
 -
  static int str_verify_pattern_cb(void *data, const char *input)
  {
        struct thread_data *td = data;
        long off;
        int i = 0, j = 0, len, k, base = 10;
 -      char* loc1, * loc2;
 +      char *loc1, *loc2;
  
        loc1 = strstr(input, "0x");
        loc2 = strstr(input, "0X");
@@@ -724,6 -1004,39 +887,6 @@@ static int str_lockfile_cb(void *data, 
        return 0;
  }
  
 -static int str_write_bw_log_cb(void *data, const char *str)
 -{
 -      struct thread_data *td = data;
 -
 -      if (str)
 -              td->o.bw_log_file = strdup(str);
 -
 -      td->o.write_bw_log = 1;
 -      return 0;
 -}
 -
 -static int str_write_lat_log_cb(void *data, const char *str)
 -{
 -      struct thread_data *td = data;
 -
 -      if (str)
 -              td->o.lat_log_file = strdup(str);
 -
 -      td->o.write_lat_log = 1;
 -      return 0;
 -}
 -
 -static int str_write_iops_log_cb(void *data, const char *str)
 -{
 -      struct thread_data *td = data;
 -
 -      if (str)
 -              td->o.iops_log_file = strdup(str);
 -
 -      td->o.write_iops_log = 1;
 -      return 0;
 -}
 -
  static int str_gtod_reduce_cb(void *data, int *il)
  {
        struct thread_data *td = data;
@@@ -805,216 -1118,54 +968,216 @@@ static int kb_base_verify(struct fio_op
        return 0;
  }
  
 +/*
 + * Option grouping
 + */
 +static struct opt_group fio_opt_groups[] = {
 +      {
 +              .name   = "General",
 +              .mask   = FIO_OPT_C_GENERAL,
 +      },
 +      {
 +              .name   = "I/O",
 +              .mask   = FIO_OPT_C_IO,
 +      },
 +      {
 +              .name   = "File",
 +              .mask   = FIO_OPT_C_FILE,
 +      },
 +      {
 +              .name   = "Statistics",
 +              .mask   = FIO_OPT_C_STAT,
 +      },
 +      {
 +              .name   = "Logging",
 +              .mask   = FIO_OPT_C_LOG,
 +      },
 +      {
 +              .name   = "Profiles",
 +              .mask   = FIO_OPT_C_PROFILE,
 +      },
 +      {
 +              .name   = NULL,
 +      },
 +};
 +
 +static struct opt_group *__opt_group_from_mask(struct opt_group *ogs, unsigned int *mask,
 +                                             unsigned int inv_mask)
 +{
 +      struct opt_group *og;
 +      int i;
 +
 +      if (*mask == inv_mask || !*mask)
 +              return NULL;
 +
 +      for (i = 0; ogs[i].name; i++) {
 +              og = &ogs[i];
 +
 +              if (*mask & og->mask) {
 +                      *mask &= ~(og->mask);
 +                      return og;
 +              }
 +      }
 +
 +      return NULL;
 +}
 +
 +struct opt_group *opt_group_from_mask(unsigned int *mask)
 +{
 +      return __opt_group_from_mask(fio_opt_groups, mask, FIO_OPT_C_INVALID);
 +}
 +
 +static struct opt_group fio_opt_cat_groups[] = {
 +      {
 +              .name   = "Rate",
 +              .mask   = FIO_OPT_G_RATE,
 +      },
 +      {
 +              .name   = "Zone",
 +              .mask   = FIO_OPT_G_ZONE,
 +      },
 +      {
 +              .name   = "Read/write mix",
 +              .mask   = FIO_OPT_G_RWMIX,
 +      },
 +      {
 +              .name   = "Verify",
 +              .mask   = FIO_OPT_G_VERIFY,
 +      },
 +      {
 +              .name   = "Trim",
 +              .mask   = FIO_OPT_G_TRIM,
 +      },
 +      {
 +              .name   = "I/O Logging",
 +              .mask   = FIO_OPT_G_IOLOG,
 +      },
 +      {
 +              .name   = "I/O Depth",
 +              .mask   = FIO_OPT_G_IO_DEPTH,
 +      },
 +      {
 +              .name   = "I/O Flow",
 +              .mask   = FIO_OPT_G_IO_FLOW,
 +      },
 +      {
 +              .name   = "Description",
 +              .mask   = FIO_OPT_G_DESC,
 +      },
 +      {
 +              .name   = "Filename",
 +              .mask   = FIO_OPT_G_FILENAME,
 +      },
 +      {
 +              .name   = "General I/O",
 +              .mask   = FIO_OPT_G_IO_BASIC,
 +      },
 +      {
 +              .name   = "Cgroups",
 +              .mask   = FIO_OPT_G_CGROUP,
 +      },
 +      {
 +              .name   = "Runtime",
 +              .mask   = FIO_OPT_G_RUNTIME,
 +      },
 +      {
 +              .name   = "Process",
 +              .mask   = FIO_OPT_G_PROCESS,
 +      },
 +      {
 +              .name   = "Job credentials / priority",
 +              .mask   = FIO_OPT_G_CRED,
 +      },
 +      {
 +              .name   = "Clock settings",
 +              .mask   = FIO_OPT_G_CLOCK,
 +      },
 +      {
 +              .name   = "I/O Type",
 +              .mask   = FIO_OPT_G_IO_TYPE,
 +      },
 +      {
 +              .name   = "I/O Thinktime",
 +              .mask   = FIO_OPT_G_THINKTIME,
 +      },
 +      {
 +              .name   = "Randomizations",
 +              .mask   = FIO_OPT_G_RANDOM,
 +      },
 +      {
 +              .name   = "I/O buffers",
 +              .mask   = FIO_OPT_G_IO_BUF,
 +      },
 +      {
 +              .name   = "Tiobench profile",
 +              .mask   = FIO_OPT_G_TIOBENCH,
 +      },
 +
 +      {
 +              .name   = NULL,
 +      }
 +};
 +
 +struct opt_group *opt_group_cat_from_mask(unsigned int *mask)
 +{
 +      return __opt_group_from_mask(fio_opt_cat_groups, mask, FIO_OPT_G_INVALID);
 +}
 +
  /*
   * Map of job/command line options
   */
 -static struct fio_option options[FIO_MAX_OPTS] = {
 +struct fio_option fio_options[FIO_MAX_OPTS] = {
        {
                .name   = "description",
 +              .lname  = "Description of job",
                .type   = FIO_OPT_STR_STORE,
                .off1   = td_var_offset(description),
                .help   = "Text job description",
 +              .category = FIO_OPT_C_GENERAL,
 +              .group  = FIO_OPT_G_DESC,
        },
        {
                .name   = "name",
 +              .lname  = "Job name",
                .type   = FIO_OPT_STR_STORE,
                .off1   = td_var_offset(name),
                .help   = "Name of this job",
 -      },
 -      {
 -              .name   = "directory",
 -              .type   = FIO_OPT_STR_STORE,
 -              .off1   = td_var_offset(directory),
 -              .cb     = str_directory_cb,
 -              .help   = "Directory to store files in",
 +              .category = FIO_OPT_C_GENERAL,
 +              .group  = FIO_OPT_G_DESC,
        },
        {
                .name   = "filename",
 +              .lname  = "Filename(s)",
                .type   = FIO_OPT_STR_STORE,
                .off1   = td_var_offset(filename),
                .cb     = str_filename_cb,
                .prio   = -1, /* must come after "directory" */
                .help   = "File(s) to use for the workload",
 +              .category = FIO_OPT_C_FILE,
 +              .group  = FIO_OPT_G_FILENAME,
        },
        {
 -              .name   = "kb_base",
 -              .type   = FIO_OPT_INT,
 -              .off1   = td_var_offset(kb_base),
 -              .verify = kb_base_verify,
 -              .prio   = 1,
 -              .def    = "1024",
 -              .help   = "How many bytes per KB for reporting (1000 or 1024)",
 +              .name   = "directory",
 +              .lname  = "Directory",
 +              .type   = FIO_OPT_STR_STORE,
 +              .off1   = td_var_offset(directory),
 +              .cb     = str_directory_cb,
 +              .help   = "Directory to store files in",
 +              .category = FIO_OPT_C_FILE,
 +              .group  = FIO_OPT_G_FILENAME,
        },
        {
                .name   = "lockfile",
 +              .lname  = "Lockfile",
                .type   = FIO_OPT_STR,
                .cb     = str_lockfile_cb,
                .off1   = td_var_offset(file_lock_mode),
                .help   = "Lock file when doing IO to it",
                .parent = "filename",
 +              .hide   = 0,
                .def    = "none",
 +              .category = FIO_OPT_C_FILE,
 +              .group  = FIO_OPT_G_FILENAME,
                .posval = {
                          { .ival = "none",
                            .oval = FILE_LOCK_NONE,
        },
        {
                .name   = "opendir",
 +              .lname  = "Open directory",
                .type   = FIO_OPT_STR_STORE,
                .off1   = td_var_offset(opendir),
                .cb     = str_opendir_cb,
                .help   = "Recursively add files from this directory and down",
 +              .category = FIO_OPT_C_FILE,
 +              .group  = FIO_OPT_G_FILENAME,
        },
        {
                .name   = "rw",
 +              .lname  = "Read/write",
                .alias  = "readwrite",
                .type   = FIO_OPT_STR,
                .cb     = str_rw_cb,
                .help   = "IO direction",
                .def    = "read",
                .verify = rw_verify,
 +              .category = FIO_OPT_C_IO,
 +              .group  = FIO_OPT_G_IO_BASIC,
                .posval = {
                          { .ival = "read",
                            .oval = TD_DDIR_READ,
        },
        {
                .name   = "rw_sequencer",
 +              .lname  = "RW Sequencer",
                .type   = FIO_OPT_STR,
                .off1   = td_var_offset(rw_seq),
                .help   = "IO offset generator modifier",
                .def    = "sequential",
 +              .category = FIO_OPT_C_IO,
 +              .group  = FIO_OPT_G_IO_BASIC,
                .posval = {
                          { .ival = "sequential",
                            .oval = RW_SEQ_SEQ,
  
        {
                .name   = "ioengine",
 +              .lname  = "IO Engine",
                .type   = FIO_OPT_STR_STORE,
                .off1   = td_var_offset(ioengine),
                .help   = "IO engine to use",
                .def    = FIO_PREFERRED_ENGINE,
 +              .category = FIO_OPT_C_IO,
 +              .group  = FIO_OPT_G_IO_BASIC,
                .posval = {
                          { .ival = "sync",
                            .help = "Use read/write",
        },
        {
                .name   = "iodepth",
 +              .lname  = "IO Depth",
                .type   = FIO_OPT_INT,
                .off1   = td_var_offset(iodepth),
                .help   = "Number of IO buffers to keep in flight",
                .minval = 1,
 +              .interval = 1,
                .def    = "1",
 +              .category = FIO_OPT_C_IO,
 +              .group  = FIO_OPT_G_IO_BASIC,
        },
        {
                .name   = "iodepth_batch",
 +              .lname  = "IO Depth batch",
                .alias  = "iodepth_batch_submit",
                .type   = FIO_OPT_INT,
                .off1   = td_var_offset(iodepth_batch),
                .help   = "Number of IO buffers to submit in one go",
                .parent = "iodepth",
 +              .hide   = 1,
                .minval = 1,
 +              .interval = 1,
                .def    = "1",
 +              .category = FIO_OPT_C_IO,
 +              .group  = FIO_OPT_G_IO_BASIC,
        },
        {
                .name   = "iodepth_batch_complete",
 +              .lname  = "IO Depth batch complete",
                .type   = FIO_OPT_INT,
                .off1   = td_var_offset(iodepth_batch_complete),
                .help   = "Number of IO buffers to retrieve in one go",
                .parent = "iodepth",
 +              .hide   = 1,
                .minval = 0,
 +              .interval = 1,
                .def    = "1",
 +              .category = FIO_OPT_C_IO,
 +              .group  = FIO_OPT_G_IO_BASIC,
        },
        {
                .name   = "iodepth_low",
 +              .lname  = "IO Depth batch low",
                .type   = FIO_OPT_INT,
                .off1   = td_var_offset(iodepth_low),
                .help   = "Low water mark for queuing depth",
                .parent = "iodepth",
 +              .hide   = 1,
 +              .interval = 1,
 +              .category = FIO_OPT_C_IO,
 +              .group  = FIO_OPT_G_IO_BASIC,
        },
        {
                .name   = "size",
 +              .lname  = "Size",
                .type   = FIO_OPT_STR_VAL,
                .cb     = str_size_cb,
                .help   = "Total size of device or files",
 +              .interval = 1024 * 1024,
 +              .category = FIO_OPT_C_IO,
 +              .group  = FIO_OPT_G_INVALID,
        },
        {
                .name   = "fill_device",
 +              .lname  = "Fill device",
                .alias  = "fill_fs",
                .type   = FIO_OPT_BOOL,
                .off1   = td_var_offset(fill_device),
                .help   = "Write until an ENOSPC error occurs",
                .def    = "0",
 +              .category = FIO_OPT_C_FILE,
 +              .group  = FIO_OPT_G_INVALID,
        },
        {
                .name   = "filesize",
 +              .lname  = "File size",
                .type   = FIO_OPT_STR_VAL,
                .off1   = td_var_offset(file_size_low),
                .off2   = td_var_offset(file_size_high),
                .minval = 1,
                .help   = "Size of individual files",
 +              .interval = 1024 * 1024,
 +              .category = FIO_OPT_C_FILE,
 +              .group  = FIO_OPT_G_INVALID,
        },
        {
                .name   = "offset",
 +              .lname  = "IO offset",
                .alias  = "fileoffset",
                .type   = FIO_OPT_STR_VAL,
                .off1   = td_var_offset(start_offset),
                .help   = "Start IO from this offset",
                .def    = "0",
 +              .interval = 1024 * 1024,
 +              .category = FIO_OPT_C_IO,
 +              .group  = FIO_OPT_G_INVALID,
        },
        {
                .name   = "offset_increment",
 +              .lname  = "IO offset increment",
                .type   = FIO_OPT_STR_VAL,
                .off1   = td_var_offset(offset_increment),
                .help   = "What is the increment from one offset to the next",
                .parent = "offset",
 +              .hide   = 1,
                .def    = "0",
 +              .interval = 1024 * 1024,
 +              .category = FIO_OPT_C_IO,
 +              .group  = FIO_OPT_G_INVALID,
        },
        {
                .name   = "bs",
 +              .lname  = "Block size",
                .alias  = "blocksize",
                .type   = FIO_OPT_INT,
                .off1   = td_var_offset(bs[DDIR_READ]),
                .help   = "Block size unit",
                .def    = "4k",
                .parent = "rw",
 +              .hide   = 1,
 +              .interval = 512,
 +              .category = FIO_OPT_C_IO,
 +              .group  = FIO_OPT_G_INVALID,
        },
        {
                .name   = "ba",
 +              .lname  = "Block size align",
                .alias  = "blockalign",
                .type   = FIO_OPT_INT,
                .off1   = td_var_offset(ba[DDIR_READ]),
                .minval = 1,
                .help   = "IO block offset alignment",
                .parent = "rw",
 +              .hide   = 1,
 +              .interval = 512,
 +              .category = FIO_OPT_C_IO,
 +              .group  = FIO_OPT_G_INVALID,
        },
        {
                .name   = "bsrange",
 +              .lname  = "Block size range",
                .alias  = "blocksize_range",
                .type   = FIO_OPT_RANGE,
                .off1   = td_var_offset(min_bs[DDIR_READ]),
                .minval = 1,
                .help   = "Set block size range (in more detail than bs)",
                .parent = "rw",
 +              .hide   = 1,
 +              .interval = 4096,
 +              .category = FIO_OPT_C_IO,
 +              .group  = FIO_OPT_G_INVALID,
        },
        {
                .name   = "bssplit",
 +              .lname  = "Block size split",
                .type   = FIO_OPT_STR,
                .cb     = str_bssplit_cb,
                .help   = "Set a specific mix of block sizes",
                .parent = "rw",
 +              .hide   = 1,
 +              .category = FIO_OPT_C_IO,
 +              .group  = FIO_OPT_G_INVALID,
        },
        {
                .name   = "bs_unaligned",
 +              .lname  = "Block size unaligned",
                .alias  = "blocksize_unaligned",
                .type   = FIO_OPT_STR_SET,
                .off1   = td_var_offset(bs_unaligned),
                .help   = "Don't sector align IO buffer sizes",
                .parent = "rw",
 +              .hide   = 1,
 +              .category = FIO_OPT_C_IO,
 +              .group  = FIO_OPT_G_INVALID,
        },
        {
                .name   = "randrepeat",
 +              .lname  = "Random repeatable",
                .type   = FIO_OPT_BOOL,
                .off1   = td_var_offset(rand_repeatable),
                .help   = "Use repeatable random IO pattern",
                .def    = "1",
                .parent = "rw",
 +              .hide   = 1,
 +              .category = FIO_OPT_C_IO,
 +              .group  = FIO_OPT_G_RANDOM,
        },
        {
                .name   = "use_os_rand",
 +              .lname  = "Use OS random",
                .type   = FIO_OPT_BOOL,
                .off1   = td_var_offset(use_os_rand),
                .help   = "Set to use OS random generator",
                .def    = "0",
                .parent = "rw",
 +              .hide   = 1,
 +              .category = FIO_OPT_C_IO,
 +              .group  = FIO_OPT_G_RANDOM,
        },
        {
                .name   = "norandommap",
 +              .lname  = "No randommap",
                .type   = FIO_OPT_STR_SET,
                .off1   = td_var_offset(norandommap),
                .help   = "Accept potential duplicate random blocks",
                .parent = "rw",
 +              .hide   = 1,
 +              .hide_on_set = 1,
 +              .category = FIO_OPT_C_IO,
 +              .group  = FIO_OPT_G_RANDOM,
        },
        {
                .name   = "softrandommap",
 +              .lname  = "Soft randommap",
                .type   = FIO_OPT_BOOL,
                .off1   = td_var_offset(softrandommap),
                .help   = "Set norandommap if randommap allocation fails",
                .parent = "norandommap",
 +              .hide   = 1,
                .def    = "0",
 +              .category = FIO_OPT_C_IO,
 +              .group  = FIO_OPT_G_RANDOM,
        },
+       {
+               .name   = "random_distribution",
+               .type   = FIO_OPT_STR,
+               .off1   = td_var_offset(random_distribution),
+               .cb     = str_random_distribution_cb,
+               .help   = "Random offset distribution generator",
+               .def    = "random",
+               .posval = {
+                         { .ival = "random",
+                           .oval = FIO_RAND_DIST_RANDOM,
+                           .help = "Completely random",
+                         },
+                         { .ival = "zipf",
+                           .oval = FIO_RAND_DIST_ZIPF,
+                           .help = "Zipf distribution",
+                         },
+                         { .ival = "pareto",
+                           .oval = FIO_RAND_DIST_PARETO,
+                           .help = "Pareto distribution",
+                         },
+               },
+       },
        {
                .name   = "nrfiles",
 +              .lname  = "Number of files",
                .alias  = "nr_files",
                .type   = FIO_OPT_INT,
                .off1   = td_var_offset(nr_files),
                .help   = "Split job workload between this number of files",
                .def    = "1",
 +              .interval = 1,
 +              .category = FIO_OPT_C_FILE,
 +              .group  = FIO_OPT_G_INVALID,
        },
        {
                .name   = "openfiles",
 +              .lname  = "Number of open files",
                .type   = FIO_OPT_INT,
                .off1   = td_var_offset(open_files),
                .help   = "Number of files to keep open at the same time",
 +              .category = FIO_OPT_C_FILE,
 +              .group  = FIO_OPT_G_INVALID,
        },
        {
                .name   = "file_service_type",
 +              .lname  = "File service type",
                .type   = FIO_OPT_STR,
                .cb     = str_fst_cb,
                .off1   = td_var_offset(file_service_type),
                .help   = "How to select which file to service next",
                .def    = "roundrobin",
 +              .category = FIO_OPT_C_FILE,
 +              .group  = FIO_OPT_G_INVALID,
                .posval = {
                          { .ival = "random",
                            .oval = FIO_FSERVICE_RANDOM,
                          },
                },
                .parent = "nrfiles",
 +              .hide   = 1,
        },
  #ifdef FIO_HAVE_FALLOCATE
        {
                .name   = "fallocate",
 +              .lname  = "Fallocate",
                .type   = FIO_OPT_STR,
                .off1   = td_var_offset(fallocate_mode),
                .help   = "Whether pre-allocation is performed when laying out files",
                .def    = "posix",
 +              .category = FIO_OPT_C_FILE,
 +              .group  = FIO_OPT_G_INVALID,
                .posval = {
                          { .ival = "none",
                            .oval = FIO_FALLOCATE_NONE,
  #endif        /* FIO_HAVE_FALLOCATE */
        {
                .name   = "fadvise_hint",
 +              .lname  = "Fadvise hint",
                .type   = FIO_OPT_BOOL,
                .off1   = td_var_offset(fadvise_hint),
                .help   = "Use fadvise() to advise the kernel on IO pattern",
                .def    = "1",
 +              .category = FIO_OPT_C_FILE,
 +              .group  = FIO_OPT_G_INVALID,
        },
        {
                .name   = "fsync",
 +              .lname  = "Fsync",
                .type   = FIO_OPT_INT,
                .off1   = td_var_offset(fsync_blocks),
                .help   = "Issue fsync for writes every given number of blocks",
                .def    = "0",
 +              .interval = 1,
 +              .category = FIO_OPT_C_FILE,
 +              .group  = FIO_OPT_G_INVALID,
        },
        {
                .name   = "fdatasync",
 +              .lname  = "Fdatasync",
                .type   = FIO_OPT_INT,
                .off1   = td_var_offset(fdatasync_blocks),
                .help   = "Issue fdatasync for writes every given number of blocks",
                .def    = "0",
 +              .interval = 1,
 +              .category = FIO_OPT_C_FILE,
 +              .group  = FIO_OPT_G_INVALID,
        },
        {
                .name   = "write_barrier",
 +              .lname  = "Write barrier",
                .type   = FIO_OPT_INT,
                .off1   = td_var_offset(barrier_blocks),
                .help   = "Make every Nth write a barrier write",
                .def    = "0",
 +              .interval = 1,
 +              .category = FIO_OPT_C_IO,
 +              .group  = FIO_OPT_G_INVALID,
        },
  #ifdef FIO_HAVE_SYNC_FILE_RANGE
        {
                .name   = "sync_file_range",
 +              .lname  = "Sync file range",
                .posval = {
                          { .ival = "wait_before",
                            .oval = SYNC_FILE_RANGE_WAIT_BEFORE,
                .cb     = str_sfr_cb,
                .off1   = td_var_offset(sync_file_range),
                .help   = "Use sync_file_range()",
 +              .category = FIO_OPT_C_FILE,
 +              .group  = FIO_OPT_G_INVALID,
        },
  #endif
        {
                .name   = "direct",
 +              .lname  = "Direct I/O",
                .type   = FIO_OPT_BOOL,
                .off1   = td_var_offset(odirect),
                .help   = "Use O_DIRECT IO (negates buffered)",
                .def    = "0",
 +              .inverse = "buffered",
 +              .category = FIO_OPT_C_IO,
 +              .group  = FIO_OPT_G_IO_TYPE,
        },
        {
                .name   = "buffered",
 +              .lname  = "Buffered I/O",
                .type   = FIO_OPT_BOOL,
                .off1   = td_var_offset(odirect),
                .neg    = 1,
                .help   = "Use buffered IO (negates direct)",
                .def    = "1",
 +              .inverse = "direct",
 +              .category = FIO_OPT_C_IO,
 +              .group  = FIO_OPT_G_IO_TYPE,
        },
        {
                .name   = "overwrite",
 +              .lname  = "Overwrite",
                .type   = FIO_OPT_BOOL,
                .off1   = td_var_offset(overwrite),
                .help   = "When writing, set whether to overwrite current data",
                .def    = "0",
 +              .category = FIO_OPT_C_FILE,
 +              .group  = FIO_OPT_G_INVALID,
        },
        {
                .name   = "loops",
 +              .lname  = "Loops",
                .type   = FIO_OPT_INT,
                .off1   = td_var_offset(loops),
                .help   = "Number of times to run the job",
                .def    = "1",
 +              .interval = 1,
 +              .category = FIO_OPT_C_GENERAL,
 +              .group  = FIO_OPT_G_RUNTIME,
        },
        {
                .name   = "numjobs",
 +              .lname  = "Number of jobs",
                .type   = FIO_OPT_INT,
                .off1   = td_var_offset(numjobs),
                .help   = "Duplicate this job this many times",
                .def    = "1",
 +              .interval = 1,
 +              .category = FIO_OPT_C_GENERAL,
 +              .group  = FIO_OPT_G_RUNTIME,
        },
        {
                .name   = "startdelay",
 +              .lname  = "Start delay",
                .type   = FIO_OPT_STR_VAL_TIME,
                .off1   = td_var_offset(start_delay),
                .help   = "Only start job when this period has passed",
                .def    = "0",
 +              .category = FIO_OPT_C_GENERAL,
 +              .group  = FIO_OPT_G_RUNTIME,
        },
        {
                .name   = "runtime",
 +              .lname  = "Runtime",
                .alias  = "timeout",
                .type   = FIO_OPT_STR_VAL_TIME,
                .off1   = td_var_offset(timeout),
                .help   = "Stop workload when this amount of time has passed",
                .def    = "0",
 +              .category = FIO_OPT_C_GENERAL,
 +              .group  = FIO_OPT_G_RUNTIME,
        },
        {
                .name   = "time_based",
 +              .lname  = "Time based",
                .type   = FIO_OPT_STR_SET,
                .off1   = td_var_offset(time_based),
                .help   = "Keep running until runtime/timeout is met",
 +              .category = FIO_OPT_C_GENERAL,
 +              .group  = FIO_OPT_G_RUNTIME,
        },
        {
                .name   = "ramp_time",
 +              .lname  = "Ramp time",
                .type   = FIO_OPT_STR_VAL_TIME,
                .off1   = td_var_offset(ramp_time),
                .help   = "Ramp up time before measuring performance",
 +              .category = FIO_OPT_C_GENERAL,
 +              .group  = FIO_OPT_G_RUNTIME,
        },
        {
                .name   = "clocksource",
 +              .lname  = "Clock source",
                .type   = FIO_OPT_STR,
                .cb     = fio_clock_source_cb,
                .off1   = td_var_offset(clocksource),
                .help   = "What type of timing source to use",
 +              .category = FIO_OPT_C_GENERAL,
 +              .group  = FIO_OPT_G_CLOCK,
                .posval = {
                          { .ival = "gettimeofday",
                            .oval = CS_GTOD,
        {
                .name   = "mem",
                .alias  = "iomem",
 +              .lname  = "I/O Memory",
                .type   = FIO_OPT_STR,
                .cb     = str_mem_cb,
                .off1   = td_var_offset(mem_type),
                .help   = "Backing type for IO buffers",
                .def    = "malloc",
 +              .category = FIO_OPT_C_IO,
 +              .group  = FIO_OPT_G_INVALID,
                .posval = {
                          { .ival = "malloc",
                            .oval = MEM_MALLOC,
        {
                .name   = "iomem_align",
                .alias  = "mem_align",
 +              .lname  = "I/O memory alignment",
                .type   = FIO_OPT_INT,
                .off1   = td_var_offset(mem_align),
                .minval = 0,
                .help   = "IO memory buffer offset alignment",
                .def    = "0",
                .parent = "iomem",
 +              .hide   = 1,
 +              .category = FIO_OPT_C_IO,
 +              .group  = FIO_OPT_G_INVALID,
        },
        {
                .name   = "verify",
 +              .lname  = "Verify",
                .type   = FIO_OPT_STR,
                .off1   = td_var_offset(verify),
                .help   = "Verify data written",
 -              .cb     = str_verify_cb,
                .def    = "0",
 +              .category = FIO_OPT_C_IO,
 +              .group  = FIO_OPT_G_VERIFY,
                .posval = {
                          { .ival = "0",
                            .oval = VERIFY_NONE,
        },
        {
                .name   = "do_verify",
 +              .lname  = "Perform verify step",
                .type   = FIO_OPT_BOOL,
                .off1   = td_var_offset(do_verify),
                .help   = "Run verification stage after write",
                .def    = "1",
                .parent = "verify",
 +              .hide   = 1,
 +              .category = FIO_OPT_C_IO,
 +              .group  = FIO_OPT_G_VERIFY,
        },
        {
                .name   = "verifysort",
 +              .lname  = "Verify sort",
                .type   = FIO_OPT_BOOL,
                .off1   = td_var_offset(verifysort),
                .help   = "Sort written verify blocks for read back",
                .def    = "1",
                .parent = "verify",
 +              .hide   = 1,
 +              .category = FIO_OPT_C_IO,
 +              .group  = FIO_OPT_G_VERIFY,
        },
        {
                .name   = "verify_interval",
 +              .lname  = "Verify interval",
                .type   = FIO_OPT_INT,
                .off1   = td_var_offset(verify_interval),
                .minval = 2 * sizeof(struct verify_header),
                .help   = "Store verify buffer header every N bytes",
                .parent = "verify",
 +              .hide   = 1,
 +              .interval = 2 * sizeof(struct verify_header),
 +              .category = FIO_OPT_C_IO,
 +              .group  = FIO_OPT_G_VERIFY,
        },
        {
                .name   = "verify_offset",
 +              .lname  = "Verify offset",
                .type   = FIO_OPT_INT,
                .help   = "Offset verify header location by N bytes",
 -              .def    = "0",
 -              .cb     = str_verify_offset_cb,
 +              .off1   = td_var_offset(verify_offset),
 +              .minval = sizeof(struct verify_header),
                .parent = "verify",
 +              .hide   = 1,
 +              .category = FIO_OPT_C_IO,
 +              .group  = FIO_OPT_G_VERIFY,
        },
        {
                .name   = "verify_pattern",
 +              .lname  = "Verify pattern",
                .type   = FIO_OPT_STR,
                .cb     = str_verify_pattern_cb,
                .help   = "Fill pattern for IO buffers",
                .parent = "verify",
 +              .hide   = 1,
 +              .category = FIO_OPT_C_IO,
 +              .group  = FIO_OPT_G_VERIFY,
        },
        {
                .name   = "verify_fatal",
 +              .lname  = "Verify fatal",
                .type   = FIO_OPT_BOOL,
                .off1   = td_var_offset(verify_fatal),
                .def    = "0",
                .help   = "Exit on a single verify failure, don't continue",
                .parent = "verify",
 +              .hide   = 1,
 +              .category = FIO_OPT_C_IO,
 +              .group  = FIO_OPT_G_VERIFY,
        },
        {
                .name   = "verify_dump",
 +              .lname  = "Verify dump",
                .type   = FIO_OPT_BOOL,
                .off1   = td_var_offset(verify_dump),
                .def    = "0",
                .help   = "Dump contents of good and bad blocks on failure",
                .parent = "verify",
 +              .hide   = 1,
 +              .category = FIO_OPT_C_IO,
 +              .group  = FIO_OPT_G_VERIFY,
        },
        {
                .name   = "verify_async",
 +              .lname  = "Verify asynchronously",
                .type   = FIO_OPT_INT,
                .off1   = td_var_offset(verify_async),
                .def    = "0",
                .help   = "Number of async verifier threads to use",
                .parent = "verify",
 +              .hide   = 1,
 +              .category = FIO_OPT_C_IO,
 +              .group  = FIO_OPT_G_VERIFY,
        },
        {
                .name   = "verify_backlog",
 +              .lname  = "Verify backlog",
                .type   = FIO_OPT_STR_VAL,
                .off1   = td_var_offset(verify_backlog),
                .help   = "Verify after this number of blocks are written",
                .parent = "verify",
 +              .hide   = 1,
 +              .category = FIO_OPT_C_IO,
 +              .group  = FIO_OPT_G_VERIFY,
        },
        {
                .name   = "verify_backlog_batch",
 +              .lname  = "Verify backlog batch",
                .type   = FIO_OPT_INT,
                .off1   = td_var_offset(verify_batch),
                .help   = "Verify this number of IO blocks",
                .parent = "verify",
 +              .hide   = 1,
 +              .category = FIO_OPT_C_IO,
 +              .group  = FIO_OPT_G_VERIFY,
        },
  #ifdef FIO_HAVE_CPU_AFFINITY
        {
                .name   = "verify_async_cpus",
 +              .lname  = "Async verify CPUs",
                .type   = FIO_OPT_STR,
                .cb     = str_verify_cpus_allowed_cb,
                .help   = "Set CPUs allowed for async verify threads",
                .parent = "verify_async",
 +              .hide   = 1,
 +              .category = FIO_OPT_C_IO,
 +              .group  = FIO_OPT_G_VERIFY,
        },
  #endif
  #ifdef FIO_HAVE_TRIM
        {
                .name   = "trim_percentage",
 +              .lname  = "Trim percentage",
                .type   = FIO_OPT_INT,
 -              .cb     = str_verify_trim_cb,
 +              .off1   = td_var_offset(trim_percentage),
 +              .minval = 0,
                .maxval = 100,
                .help   = "Number of verify blocks to discard/trim",
                .parent = "verify",
                .def    = "0",
 +              .interval = 1,
 +              .hide   = 1,
 +              .category = FIO_OPT_C_IO,
 +              .group  = FIO_OPT_G_TRIM,
        },
        {
                .name   = "trim_verify_zero",
 -              .type   = FIO_OPT_INT,
 +              .lname  = "Verify trim zero",
 +              .type   = FIO_OPT_BOOL,
                .help   = "Verify that trim/discarded blocks are returned as zeroes",
                .off1   = td_var_offset(trim_zero),
                .parent = "trim_percentage",
 +              .hide   = 1,
                .def    = "1",
 +              .category = FIO_OPT_C_IO,
 +              .group  = FIO_OPT_G_TRIM,
        },
        {
                .name   = "trim_backlog",
 +              .lname  = "Trim backlog",
                .type   = FIO_OPT_STR_VAL,
                .off1   = td_var_offset(trim_backlog),
                .help   = "Trim after this number of blocks are written",
                .parent = "trim_percentage",
 +              .hide   = 1,
 +              .interval = 1,
 +              .category = FIO_OPT_C_IO,
 +              .group  = FIO_OPT_G_TRIM,
        },
        {
                .name   = "trim_backlog_batch",
 +              .lname  = "Trim backlog batch",
                .type   = FIO_OPT_INT,
                .off1   = td_var_offset(trim_batch),
                .help   = "Trim this number of IO blocks",
                .parent = "trim_percentage",
 +              .hide   = 1,
 +              .interval = 1,
 +              .category = FIO_OPT_C_IO,
 +              .group  = FIO_OPT_G_TRIM,
        },
  #endif
        {
                .name   = "write_iolog",
 +              .lname  = "Write I/O log",
                .type   = FIO_OPT_STR_STORE,
                .off1   = td_var_offset(write_iolog_file),
                .help   = "Store IO pattern to file",
 +              .category = FIO_OPT_C_IO,
 +              .group  = FIO_OPT_G_IOLOG,
        },
        {
                .name   = "read_iolog",
 +              .lname  = "Read I/O log",
                .type   = FIO_OPT_STR_STORE,
                .off1   = td_var_offset(read_iolog_file),
                .help   = "Playback IO pattern from file",
 +              .category = FIO_OPT_C_IO,
 +              .group  = FIO_OPT_G_IOLOG,
        },
        {
                .name   = "replay_no_stall",
 -              .type   = FIO_OPT_INT,
 +              .lname  = "Don't stall on replay",
 +              .type   = FIO_OPT_BOOL,
                .off1   = td_var_offset(no_stall),
                .def    = "0",
                .parent = "read_iolog",
 +              .hide   = 1,
                .help   = "Playback IO pattern file as fast as possible without stalls",
 +              .category = FIO_OPT_C_IO,
 +              .group  = FIO_OPT_G_IOLOG,
        },
        {
                .name   = "replay_redirect",
 +              .lname  = "Redirect device for replay",
                .type   = FIO_OPT_STR_STORE,
                .off1   = td_var_offset(replay_redirect),
                .parent = "read_iolog",
 +              .hide   = 1,
                .help   = "Replay all I/O onto this device, regardless of trace device",
 +              .category = FIO_OPT_C_IO,
 +              .group  = FIO_OPT_G_IOLOG,
        },
        {
                .name   = "exec_prerun",
 +              .lname  = "Pre-execute runnable",
                .type   = FIO_OPT_STR_STORE,
                .off1   = td_var_offset(exec_prerun),
                .help   = "Execute this file prior to running job",
 +              .category = FIO_OPT_C_GENERAL,
 +              .group  = FIO_OPT_G_INVALID,
        },
        {
                .name   = "exec_postrun",
 +              .lname  = "Post-execute runnable",
                .type   = FIO_OPT_STR_STORE,
                .off1   = td_var_offset(exec_postrun),
                .help   = "Execute this file after running job",
 +              .category = FIO_OPT_C_GENERAL,
 +              .group  = FIO_OPT_G_INVALID,
        },
  #ifdef FIO_HAVE_IOSCHED_SWITCH
        {
                .name   = "ioscheduler",
 +              .lname  = "I/O scheduler",
                .type   = FIO_OPT_STR_STORE,
                .off1   = td_var_offset(ioscheduler),
                .help   = "Use this IO scheduler on the backing device",
 +              .category = FIO_OPT_C_FILE,
 +              .group  = FIO_OPT_G_INVALID,
        },
  #endif
        {
                .name   = "zonesize",
 +              .lname  = "Zone size",
                .type   = FIO_OPT_STR_VAL,
                .off1   = td_var_offset(zone_size),
                .help   = "Amount of data to read per zone",
                .def    = "0",
 +              .interval = 1024 * 1024,
 +              .category = FIO_OPT_C_IO,
 +              .group  = FIO_OPT_G_ZONE,
        },
        {
                .name   = "zonerange",
 +              .lname  = "Zone range",
                .type   = FIO_OPT_STR_VAL,
                .off1   = td_var_offset(zone_range),
                .help   = "Give size of an IO zone",
                .def    = "0",
 +              .interval = 1024 * 1024,
 +              .category = FIO_OPT_C_IO,
 +              .group  = FIO_OPT_G_ZONE,
        },
        {
                .name   = "zoneskip",
 +              .lname  = "Zone skip",
                .type   = FIO_OPT_STR_VAL,
                .off1   = td_var_offset(zone_skip),
                .help   = "Space between IO zones",
                .def    = "0",
 +              .interval = 1024 * 1024,
 +              .category = FIO_OPT_C_IO,
 +              .group  = FIO_OPT_G_ZONE,
        },
        {
                .name   = "lockmem",
 +              .lname  = "Lock memory",
                .type   = FIO_OPT_STR_VAL,
 -              .cb     = str_lockmem_cb,
 +              .off1   = td_var_offset(lockmem),
                .help   = "Lock down this amount of memory",
                .def    = "0",
 +              .interval = 1024 * 1024,
 +              .category = FIO_OPT_C_GENERAL,
 +              .group  = FIO_OPT_G_INVALID,
        },
        {
                .name   = "rwmixread",
 +              .lname  = "Read/write mix read",
                .type   = FIO_OPT_INT,
                .cb     = str_rwmix_read_cb,
                .maxval = 100,
                .help   = "Percentage of mixed workload that is reads",
                .def    = "50",
 +              .interval = 5,
 +              .inverse = "rwmixwrite",
 +              .category = FIO_OPT_C_IO,
 +              .group  = FIO_OPT_G_RWMIX,
        },
        {
                .name   = "rwmixwrite",
 +              .lname  = "Read/write mix write",
                .type   = FIO_OPT_INT,
                .cb     = str_rwmix_write_cb,
                .maxval = 100,
                .help   = "Percentage of mixed workload that is writes",
                .def    = "50",
 +              .interval = 5,
 +              .inverse = "rwmixread",
 +              .category = FIO_OPT_C_IO,
 +              .group  = FIO_OPT_G_RWMIX,
        },
        {
                .name   = "rwmixcycle",
 +              .lname  = "Read/write mix cycle",
                .type   = FIO_OPT_DEPRECATED,
 +              .category = FIO_OPT_C_IO,
 +              .group  = FIO_OPT_G_RWMIX,
        },
        {
                .name   = "nice",
 +              .lname  = "Nice",
                .type   = FIO_OPT_INT,
                .off1   = td_var_offset(nice),
                .help   = "Set job CPU nice value",
                .minval = -19,
                .maxval = 20,
                .def    = "0",
 +              .interval = 1,
 +              .category = FIO_OPT_C_GENERAL,
 +              .group  = FIO_OPT_G_CRED,
        },
  #ifdef FIO_HAVE_IOPRIO
        {
                .name   = "prio",
 +              .lname  = "I/O nice priority",
                .type   = FIO_OPT_INT,
 -              .cb     = str_prio_cb,
 +              .off1   = td_var_offset(ioprio),
                .help   = "Set job IO priority value",
                .minval = 0,
                .maxval = 7,
 +              .interval = 1,
 +              .category = FIO_OPT_C_GENERAL,
 +              .group  = FIO_OPT_G_CRED,
        },
        {
                .name   = "prioclass",
 +              .lname  = "I/O nice priority class",
                .type   = FIO_OPT_INT,
 -              .cb     = str_prioclass_cb,
 +              .off1   = td_var_offset(ioprio_class),
                .help   = "Set job IO priority class",
                .minval = 0,
                .maxval = 3,
 +              .interval = 1,
 +              .category = FIO_OPT_C_GENERAL,
 +              .group  = FIO_OPT_G_CRED,
        },
  #endif
        {
                .name   = "thinktime",
 +              .lname  = "Thinktime",
                .type   = FIO_OPT_INT,
                .off1   = td_var_offset(thinktime),
                .help   = "Idle time between IO buffers (usec)",
                .def    = "0",
 +              .category = FIO_OPT_C_IO,
 +              .group  = FIO_OPT_G_THINKTIME,
        },
        {
                .name   = "thinktime_spin",
 +              .lname  = "Thinktime spin",
                .type   = FIO_OPT_INT,
                .off1   = td_var_offset(thinktime_spin),
                .help   = "Start think time by spinning this amount (usec)",
                .def    = "0",
                .parent = "thinktime",
 +              .hide   = 1,
 +              .category = FIO_OPT_C_IO,
 +              .group  = FIO_OPT_G_THINKTIME,
        },
        {
                .name   = "thinktime_blocks",
 +              .lname  = "Thinktime blocks",
                .type   = FIO_OPT_INT,
                .off1   = td_var_offset(thinktime_blocks),
                .help   = "IO buffer period between 'thinktime'",
                .def    = "1",
                .parent = "thinktime",
 +              .hide   = 1,
 +              .category = FIO_OPT_C_IO,
 +              .group  = FIO_OPT_G_THINKTIME,
        },
        {
                .name   = "rate",
 +              .lname  = "I/O rate",
                .type   = FIO_OPT_INT,
                .off1   = td_var_offset(rate[DDIR_READ]),
                .off2   = td_var_offset(rate[DDIR_WRITE]),
                .off3   = td_var_offset(rate[DDIR_TRIM]),
                .help   = "Set bandwidth rate",
 +              .category = FIO_OPT_C_IO,
 +              .group  = FIO_OPT_G_RATE,
        },
        {
                .name   = "ratemin",
 +              .lname  = "I/O min rate",
                .type   = FIO_OPT_INT,
                .off1   = td_var_offset(ratemin[DDIR_READ]),
                .off2   = td_var_offset(ratemin[DDIR_WRITE]),
                .off3   = td_var_offset(ratemin[DDIR_TRIM]),
                .help   = "Job must meet this rate or it will be shutdown",
                .parent = "rate",
 +              .hide   = 1,
 +              .category = FIO_OPT_C_IO,
 +              .group  = FIO_OPT_G_RATE,
        },
        {
                .name   = "rate_iops",
 +              .lname  = "I/O rate IOPS",
                .type   = FIO_OPT_INT,
                .off1   = td_var_offset(rate_iops[DDIR_READ]),
                .off2   = td_var_offset(rate_iops[DDIR_WRITE]),
                .off3   = td_var_offset(rate_iops[DDIR_TRIM]),
                .help   = "Limit IO used to this number of IO operations/sec",
 +              .hide   = 1,
 +              .category = FIO_OPT_C_IO,
 +              .group  = FIO_OPT_G_RATE,
        },
        {
                .name   = "rate_iops_min",
 +              .lname  = "I/O min rate IOPS",
                .type   = FIO_OPT_INT,
                .off1   = td_var_offset(rate_iops_min[DDIR_READ]),
                .off2   = td_var_offset(rate_iops_min[DDIR_WRITE]),
                .off3   = td_var_offset(rate_iops_min[DDIR_TRIM]),
                .help   = "Job must meet this rate or it will be shut down",
                .parent = "rate_iops",
 +              .hide   = 1,
 +              .category = FIO_OPT_C_IO,
 +              .group  = FIO_OPT_G_RATE,
        },
        {
                .name   = "ratecycle",
 +              .lname  = "I/O rate cycle",
                .type   = FIO_OPT_INT,
                .off1   = td_var_offset(ratecycle),
                .help   = "Window average for rate limits (msec)",
                .def    = "1000",
                .parent = "rate",
 +              .hide   = 1,
 +              .category = FIO_OPT_C_IO,
 +              .group  = FIO_OPT_G_RATE,
        },
+       {
+               .name   = "max_latency",
+               .type   = FIO_OPT_INT,
+               .off1   = td_var_offset(max_latency),
+               .help   = "Maximum tolerated IO latency (usec)",
++              .category = FIO_OPT_C_IO,
++              .group = FIO_OPT_G_RATE,
+       },
        {
                .name   = "invalidate",
 +              .lname  = "Cache invalidate",
                .type   = FIO_OPT_BOOL,
                .off1   = td_var_offset(invalidate_cache),
                .help   = "Invalidate buffer/page cache prior to running job",
                .def    = "1",
 +              .category = FIO_OPT_C_IO,
 +              .group  = FIO_OPT_G_IO_TYPE,
        },
        {
                .name   = "sync",
 +              .lname  = "Synchronous I/O",
                .type   = FIO_OPT_BOOL,
                .off1   = td_var_offset(sync_io),
                .help   = "Use O_SYNC for buffered writes",
                .def    = "0",
                .parent = "buffered",
 -      },
 -      {
 -              .name   = "bwavgtime",
 -              .type   = FIO_OPT_INT,
 -              .off1   = td_var_offset(bw_avg_time),
 -              .help   = "Time window over which to calculate bandwidth"
 -                        " (msec)",
 -              .def    = "500",
 -              .parent = "write_bw_log",
 -      },
 -      {
 -              .name   = "iopsavgtime",
 -              .type   = FIO_OPT_INT,
 -              .off1   = td_var_offset(iops_avg_time),
 -              .help   = "Time window over which to calculate IOPS (msec)",
 -              .def    = "500",
 -              .parent = "write_iops_log",
 +              .hide   = 1,
 +              .category = FIO_OPT_C_IO,
 +              .group  = FIO_OPT_G_IO_TYPE,
        },
        {
                .name   = "create_serialize",
 +              .lname  = "Create serialize",
                .type   = FIO_OPT_BOOL,
                .off1   = td_var_offset(create_serialize),
                .help   = "Serialize creating of job files",
                .def    = "1",
 +              .category = FIO_OPT_C_FILE,
 +              .group  = FIO_OPT_G_INVALID,
        },
        {
                .name   = "create_fsync",
 +              .lname  = "Create fsync",
                .type   = FIO_OPT_BOOL,
                .off1   = td_var_offset(create_fsync),
                .help   = "fsync file after creation",
                .def    = "1",
 +              .category = FIO_OPT_C_FILE,
 +              .group  = FIO_OPT_G_INVALID,
        },
        {
                .name   = "create_on_open",
 +              .lname  = "Create on open",
                .type   = FIO_OPT_BOOL,
                .off1   = td_var_offset(create_on_open),
                .help   = "Create files when they are opened for IO",
                .def    = "0",
 +              .category = FIO_OPT_C_FILE,
 +              .group  = FIO_OPT_G_INVALID,
        },
        {
                .name   = "create_only",
                .type   = FIO_OPT_BOOL,
                .off1   = td_var_offset(create_only),
                .help   = "Only perform file creation phase",
 +              .category = FIO_OPT_C_FILE,
                .def    = "0",
        },
        {
                .name   = "pre_read",
 +              .lname  = "Pre-read files",
                .type   = FIO_OPT_BOOL,
                .off1   = td_var_offset(pre_read),
                .help   = "Pre-read files before starting official testing",
                .def    = "0",
 -      },
 -      {
 -              .name   = "cpuload",
 -              .type   = FIO_OPT_INT,
 -              .off1   = td_var_offset(cpuload),
 -              .help   = "Use this percentage of CPU",
 -      },
 -      {
 -              .name   = "cpuchunks",
 -              .type   = FIO_OPT_INT,
 -              .off1   = td_var_offset(cpucycle),
 -              .help   = "Length of the CPU burn cycles (usecs)",
 -              .def    = "50000",
 -              .parent = "cpuload",
 +              .category = FIO_OPT_C_FILE,
 +              .group  = FIO_OPT_G_INVALID,
        },
  #ifdef FIO_HAVE_CPU_AFFINITY
        {
                .name   = "cpumask",
 +              .lname  = "CPU mask",
                .type   = FIO_OPT_INT,
                .cb     = str_cpumask_cb,
                .help   = "CPU affinity mask",
 +              .category = FIO_OPT_C_GENERAL,
 +              .group  = FIO_OPT_G_CRED,
        },
        {
                .name   = "cpus_allowed",
 +              .lname  = "CPUs allowed",
                .type   = FIO_OPT_STR,
                .cb     = str_cpus_allowed_cb,
                .help   = "Set CPUs allowed",
 +              .category = FIO_OPT_C_GENERAL,
 +              .group  = FIO_OPT_G_CRED,
        },
+ #endif
+ #ifdef FIO_HAVE_LIBNUMA
+       {
+               .name   = "numa_cpu_nodes",
+               .type   = FIO_OPT_STR,
+               .cb     = str_numa_cpunodes_cb,
+               .help   = "NUMA CPU nodes bind",
+       },
+       {
+               .name   = "numa_mem_policy",
+               .type   = FIO_OPT_STR,
+               .cb     = str_numa_mpol_cb,
+               .help   = "NUMA memory policy setup",
+       },
  #endif
        {
                .name   = "end_fsync",
 +              .lname  = "End fsync",
                .type   = FIO_OPT_BOOL,
                .off1   = td_var_offset(end_fsync),
                .help   = "Include fsync at the end of job",
                .def    = "0",
 +              .category = FIO_OPT_C_FILE,
 +              .group  = FIO_OPT_G_INVALID,
        },
        {
                .name   = "fsync_on_close",
 +              .lname  = "Fsync on close",
                .type   = FIO_OPT_BOOL,
                .off1   = td_var_offset(fsync_on_close),
                .help   = "fsync files on close",
                .def    = "0",
 +              .category = FIO_OPT_C_FILE,
 +              .group  = FIO_OPT_G_INVALID,
        },
        {
                .name   = "unlink",
 +              .lname  = "Unlink file",
                .type   = FIO_OPT_BOOL,
                .off1   = td_var_offset(unlink),
                .help   = "Unlink created files after job has completed",
                .def    = "0",
 +              .category = FIO_OPT_C_FILE,
 +              .group  = FIO_OPT_G_INVALID,
        },
        {
                .name   = "exitall",
 +              .lname  = "Exit-all on terminate",
                .type   = FIO_OPT_STR_SET,
                .cb     = str_exitall_cb,
                .help   = "Terminate all jobs when one exits",
 +              .category = FIO_OPT_C_GENERAL,
 +              .group  = FIO_OPT_G_PROCESS,
        },
        {
                .name   = "stonewall",
 +              .lname  = "Wait for previous",
                .alias  = "wait_for_previous",
                .type   = FIO_OPT_STR_SET,
                .off1   = td_var_offset(stonewall),
                .help   = "Insert a hard barrier between this job and previous",
 +              .category = FIO_OPT_C_GENERAL,
 +              .group  = FIO_OPT_G_PROCESS,
        },
        {
                .name   = "new_group",
 +              .lname  = "New group",
                .type   = FIO_OPT_STR_SET,
                .off1   = td_var_offset(new_group),
                .help   = "Mark the start of a new group (for reporting)",
 +              .category = FIO_OPT_C_GENERAL,
 +              .group  = FIO_OPT_G_PROCESS,
        },
        {
                .name   = "thread",
 +              .lname  = "Thread",
                .type   = FIO_OPT_STR_SET,
                .off1   = td_var_offset(use_thread),
 -              .help   = "Use threads instead of forks",
 +              .help   = "Use threads instead of processes",
 +              .category = FIO_OPT_C_GENERAL,
 +              .group  = FIO_OPT_G_PROCESS,
        },
        {
                .name   = "write_bw_log",
 -              .type   = FIO_OPT_STR,
 -              .off1   = td_var_offset(write_bw_log),
 -              .cb     = str_write_bw_log_cb,
 +              .lname  = "Write bandwidth log",
 +              .type   = FIO_OPT_STR_STORE,
 +              .off1   = td_var_offset(bw_log_file),
                .help   = "Write log of bandwidth during run",
 +              .category = FIO_OPT_C_LOG,
 +              .group  = FIO_OPT_G_INVALID,
        },
        {
                .name   = "write_lat_log",
 -              .type   = FIO_OPT_STR,
 -              .off1   = td_var_offset(write_lat_log),
 -              .cb     = str_write_lat_log_cb,
 +              .lname  = "Write latency log",
 +              .type   = FIO_OPT_STR_STORE,
 +              .off1   = td_var_offset(lat_log_file),
                .help   = "Write log of latency during run",
 +              .category = FIO_OPT_C_LOG,
 +              .group  = FIO_OPT_G_INVALID,
        },
        {
                .name   = "write_iops_log",
 +              .lname  = "Write IOPS log",
                .type   = FIO_OPT_STR,
 -              .off1   = td_var_offset(write_iops_log),
 -              .cb     = str_write_iops_log_cb,
 +              .off1   = td_var_offset(iops_log_file),
                .help   = "Write log of IOPS during run",
 +              .category = FIO_OPT_C_LOG,
 +              .group  = FIO_OPT_G_INVALID,
        },
        {
                .name   = "log_avg_msec",
 +              .lname  = "Log averaging (msec)",
                .type   = FIO_OPT_INT,
                .off1   = td_var_offset(log_avg_msec),
                .help   = "Average bw/iops/lat logs over this period of time",
                .def    = "0",
 +              .category = FIO_OPT_C_LOG,
 +              .group  = FIO_OPT_G_INVALID,
        },
        {
 -              .name   = "hugepage-size",
 +              .name   = "bwavgtime",
 +              .lname  = "Bandwidth average time",
                .type   = FIO_OPT_INT,
 -              .off1   = td_var_offset(hugepage_size),
 -              .help   = "When using hugepages, specify size of each page",
 -              .def    = __fio_stringify(FIO_HUGE_PAGE),
 +              .off1   = td_var_offset(bw_avg_time),
 +              .help   = "Time window over which to calculate bandwidth"
 +                        " (msec)",
 +              .def    = "500",
 +              .parent = "write_bw_log",
 +              .hide   = 1,
 +              .interval = 100,
 +              .category = FIO_OPT_C_LOG,
 +              .group  = FIO_OPT_G_INVALID,
 +      },
 +      {
 +              .name   = "iopsavgtime",
 +              .lname  = "IOPS average time",
 +              .type   = FIO_OPT_INT,
 +              .off1   = td_var_offset(iops_avg_time),
 +              .help   = "Time window over which to calculate IOPS (msec)",
 +              .def    = "500",
 +              .parent = "write_iops_log",
 +              .hide   = 1,
 +              .interval = 100,
 +              .category = FIO_OPT_C_LOG,
 +              .group  = FIO_OPT_G_INVALID,
        },
        {
                .name   = "group_reporting",
 -              .type   = FIO_OPT_STR_SET,
 +              .lname  = "Group reporting",
 +              .type   = FIO_OPT_BOOL,
                .off1   = td_var_offset(group_reporting),
                .help   = "Do reporting on a per-group basis",
 +              .def    = "1",
 +              .category = FIO_OPT_C_STAT,
 +              .group  = FIO_OPT_G_INVALID,
        },
        {
                .name   = "zero_buffers",
 +              .lname  = "Zero I/O buffers",
                .type   = FIO_OPT_STR_SET,
                .off1   = td_var_offset(zero_buffers),
                .help   = "Init IO buffers to all zeroes",
 +              .category = FIO_OPT_C_IO,
 +              .group  = FIO_OPT_G_IO_BUF,
        },
        {
                .name   = "refill_buffers",
 +              .lname  = "Refill I/O buffers",
                .type   = FIO_OPT_STR_SET,
                .off1   = td_var_offset(refill_buffers),
                .help   = "Refill IO buffers on every IO submit",
 +              .category = FIO_OPT_C_IO,
 +              .group  = FIO_OPT_G_IO_BUF,
        },
        {
                .name   = "scramble_buffers",
 +              .lname  = "Scramble I/O buffers",
                .type   = FIO_OPT_BOOL,
                .off1   = td_var_offset(scramble_buffers),
                .help   = "Slightly scramble buffers on every IO submit",
                .def    = "1",
 +              .category = FIO_OPT_C_IO,
 +              .group  = FIO_OPT_G_IO_BUF,
        },
        {
                .name   = "buffer_compress_percentage",
 +              .lname  = "Buffer compression percentage",
                .type   = FIO_OPT_INT,
                .off1   = td_var_offset(compress_percentage),
                .maxval = 100,
                .minval = 1,
                .help   = "How compressible the buffer is (approximately)",
 +              .interval = 5,
 +              .category = FIO_OPT_C_IO,
 +              .group  = FIO_OPT_G_IO_BUF,
        },
        {
                .name   = "buffer_compress_chunk",
 +              .lname  = "Buffer compression chunk size",
                .type   = FIO_OPT_INT,
                .off1   = td_var_offset(compress_chunk),
                .parent = "buffer_compress_percentage",
 +              .hide   = 1,
                .help   = "Size of compressible region in buffer",
 +              .interval = 256,
 +              .category = FIO_OPT_C_IO,
 +              .group  = FIO_OPT_G_IO_BUF,
        },
        {
                .name   = "clat_percentiles",
 +              .lname  = "Completion latency percentiles",
                .type   = FIO_OPT_BOOL,
                .off1   = td_var_offset(clat_percentiles),
                .help   = "Enable the reporting of completion latency percentiles",
                .def    = "1",
 +              .category = FIO_OPT_C_STAT,
 +              .group  = FIO_OPT_G_INVALID,
        },
        {
                .name   = "percentile_list",
 +              .lname  = "Completion latency percentile list",
                .type   = FIO_OPT_FLOAT_LIST,
                .off1   = td_var_offset(percentile_list),
                .off2   = td_var_offset(overwrite_plist),
                .maxlen = FIO_IO_U_LIST_MAX_LEN,
                .minfp  = 0.0,
                .maxfp  = 100.0,
 +              .category = FIO_OPT_C_STAT,
 +              .group  = FIO_OPT_G_INVALID,
        },
  
  #ifdef FIO_HAVE_DISK_UTIL
        {
                .name   = "disk_util",
 +              .lname  = "Disk utilization",
                .type   = FIO_OPT_BOOL,
                .off1   = td_var_offset(do_disk_util),
                .help   = "Log disk utilization statistics",
                .def    = "1",
 +              .category = FIO_OPT_C_STAT,
 +              .group  = FIO_OPT_G_INVALID,
        },
  #endif
        {
                .name   = "gtod_reduce",
 +              .lname  = "Reduce gettimeofday() calls",
                .type   = FIO_OPT_BOOL,
                .help   = "Greatly reduce number of gettimeofday() calls",
                .cb     = str_gtod_reduce_cb,
                .def    = "0",
 +              .hide_on_set = 1,
 +              .category = FIO_OPT_C_STAT,
 +              .group  = FIO_OPT_G_INVALID,
        },
        {
                .name   = "disable_lat",
 +              .lname  = "Disable all latency stats",
                .type   = FIO_OPT_BOOL,
                .off1   = td_var_offset(disable_lat),
                .help   = "Disable latency numbers",
                .parent = "gtod_reduce",
 +              .hide   = 1,
                .def    = "0",
 +              .category = FIO_OPT_C_STAT,
 +              .group  = FIO_OPT_G_INVALID,
        },
        {
                .name   = "disable_clat",
 +              .lname  = "Disable completion latency stats",
                .type   = FIO_OPT_BOOL,
                .off1   = td_var_offset(disable_clat),
                .help   = "Disable completion latency numbers",
                .parent = "gtod_reduce",
 +              .hide   = 1,
                .def    = "0",
 +              .category = FIO_OPT_C_STAT,
 +              .group  = FIO_OPT_G_INVALID,
        },
        {
                .name   = "disable_slat",
 +              .lname  = "Disable submission latency stats",
                .type   = FIO_OPT_BOOL,
                .off1   = td_var_offset(disable_slat),
                .help   = "Disable submission latency numbers",
                .parent = "gtod_reduce",
 +              .hide   = 1,
                .def    = "0",
 +              .category = FIO_OPT_C_STAT,
 +              .group  = FIO_OPT_G_INVALID,
        },
        {
                .name   = "disable_bw_measurement",
 +              .lname  = "Disable bandwidth stats",
                .type   = FIO_OPT_BOOL,
                .off1   = td_var_offset(disable_bw),
                .help   = "Disable bandwidth logging",
                .parent = "gtod_reduce",
 +              .hide   = 1,
                .def    = "0",
 +              .category = FIO_OPT_C_STAT,
 +              .group  = FIO_OPT_G_INVALID,
        },
        {
                .name   = "gtod_cpu",
 +              .lname  = "Dedicated gettimeofday() CPU",
                .type   = FIO_OPT_INT,
                .cb     = str_gtod_cpu_cb,
                .help   = "Set up dedicated gettimeofday() thread on this CPU",
                .verify = gtod_cpu_verify,
 +              .category = FIO_OPT_C_GENERAL,
 +              .group  = FIO_OPT_G_CLOCK,
        },
        {
                .name   = "continue_on_error",
 +              .lname  = "Continue on error",
                .type   = FIO_OPT_STR,
                .off1   = td_var_offset(continue_on_error),
                .help   = "Continue on non-fatal errors during IO",
                .def    = "none",
 +              .category = FIO_OPT_C_GENERAL,
 +              .group  = FIO_OPT_G_ERR,
                .posval = {
                          { .ival = "none",
                            .oval = ERROR_TYPE_NONE,
                .cb     = str_ignore_error_cb,
                .help   = "Set a specific list of errors to ignore",
                .parent = "rw",
 +              .category = FIO_OPT_C_GENERAL,
 +              .group  = FIO_OPT_G_ERR,
        },
        {
                .name   = "error_dump",
                .off1   = td_var_offset(error_dump),
                .def    = "0",
                .help   = "Dump info on each error",
 +              .category = FIO_OPT_C_GENERAL,
 +              .group  = FIO_OPT_G_ERR,
        },
 -
        {
                .name   = "profile",
 +              .lname  = "Profile",
                .type   = FIO_OPT_STR_STORE,
                .off1   = td_var_offset(profile),
                .help   = "Select a specific builtin performance test",
 +              .category = FIO_OPT_C_PROFILE,
 +              .group  = FIO_OPT_G_INVALID,
        },
        {
                .name   = "cgroup",
 +              .lname  = "Cgroup",
                .type   = FIO_OPT_STR_STORE,
                .off1   = td_var_offset(cgroup),
                .help   = "Add job to cgroup of this name",
 +              .category = FIO_OPT_C_GENERAL,
 +              .group  = FIO_OPT_G_CGROUP,
 +      },
 +      {
 +              .name   = "cgroup_nodelete",
 +              .lname  = "Cgroup no-delete",
 +              .type   = FIO_OPT_BOOL,
 +              .off1   = td_var_offset(cgroup_nodelete),
 +              .help   = "Do not delete cgroups after job completion",
 +              .def    = "0",
 +              .parent = "cgroup",
 +              .category = FIO_OPT_C_GENERAL,
 +              .group  = FIO_OPT_G_CGROUP,
        },
        {
                .name   = "cgroup_weight",
 +              .lname  = "Cgroup weight",
                .type   = FIO_OPT_INT,
                .off1   = td_var_offset(cgroup_weight),
                .help   = "Use given weight for cgroup",
                .minval = 100,
                .maxval = 1000,
 -      },
 -      {
 -              .name   = "cgroup_nodelete",
 -              .type   = FIO_OPT_BOOL,
 -              .off1   = td_var_offset(cgroup_nodelete),
 -              .help   = "Do not delete cgroups after job completion",
 -              .def    = "0",
 +              .parent = "cgroup",
 +              .category = FIO_OPT_C_GENERAL,
 +              .group  = FIO_OPT_G_CGROUP,
        },
        {
                .name   = "uid",
 +              .lname  = "User ID",
                .type   = FIO_OPT_INT,
                .off1   = td_var_offset(uid),
                .help   = "Run job with this user ID",
 +              .category = FIO_OPT_C_GENERAL,
 +              .group  = FIO_OPT_G_CRED,
        },
        {
                .name   = "gid",
 +              .lname  = "Group ID",
                .type   = FIO_OPT_INT,
                .off1   = td_var_offset(gid),
                .help   = "Run job with this group ID",
 +              .category = FIO_OPT_C_GENERAL,
 +              .group  = FIO_OPT_G_CRED,
 +      },
 +      {
 +              .name   = "kb_base",
 +              .lname  = "KB Base",
 +              .type   = FIO_OPT_INT,
 +              .off1   = td_var_offset(kb_base),
 +              .verify = kb_base_verify,
 +              .prio   = 1,
 +              .def    = "1024",
 +              .help   = "How many bytes per KB for reporting (1000 or 1024)",
 +              .category = FIO_OPT_C_GENERAL,
 +              .group  = FIO_OPT_G_INVALID,
 +      },
 +      {
 +              .name   = "hugepage-size",
 +              .lname  = "Hugepage size",
 +              .type   = FIO_OPT_INT,
 +              .off1   = td_var_offset(hugepage_size),
 +              .help   = "When using hugepages, specify size of each page",
 +              .def    = __fio_stringify(FIO_HUGE_PAGE),
 +              .interval = 1024 * 1024,
 +              .category = FIO_OPT_C_GENERAL,
 +              .group  = FIO_OPT_G_INVALID,
        },
        {
                .name   = "flow_id",
 +              .lname  = "I/O flow ID",
                .type   = FIO_OPT_INT,
                .off1   = td_var_offset(flow_id),
                .help   = "The flow index ID to use",
                .def    = "0",
 +              .category = FIO_OPT_C_IO,
 +              .group  = FIO_OPT_G_IO_FLOW,
        },
        {
                .name   = "flow",
 +              .lname  = "I/O flow weight",
                .type   = FIO_OPT_INT,
                .off1   = td_var_offset(flow),
                .help   = "Weight for flow control of this job",
                .parent = "flow_id",
 +              .hide   = 1,
                .def    = "0",
 +              .category = FIO_OPT_C_IO,
 +              .group  = FIO_OPT_G_IO_FLOW,
        },
        {
                .name   = "flow_watermark",
 +              .lname  = "I/O flow watermark",
                .type   = FIO_OPT_INT,
                .off1   = td_var_offset(flow_watermark),
                .help   = "High watermark for flow control. This option"
                        " should be set to the same value for all threads"
                        " with non-zero flow.",
                .parent = "flow_id",
 +              .hide   = 1,
                .def    = "1024",
 +              .category = FIO_OPT_C_IO,
 +              .group  = FIO_OPT_G_IO_FLOW,
        },
        {
                .name   = "flow_sleep",
 +              .lname  = "I/O flow sleep",
                .type   = FIO_OPT_INT,
                .off1   = td_var_offset(flow_sleep),
                .help   = "How many microseconds to sleep after being held"
                        " back by the flow control mechanism",
                .parent = "flow_id",
 +              .hide   = 1,
                .def    = "0",
 +              .category = FIO_OPT_C_IO,
 +              .group  = FIO_OPT_G_IO_FLOW,
        },
        {
                .name = NULL,
@@@ -2954,13 -2659,13 +3161,13 @@@ void fio_options_dup_and_init(struct op
  {
        unsigned int i;
  
 -      options_init(options);
 +      options_init(fio_options);
  
        i = 0;
        while (long_options[i].name)
                i++;
  
 -      options_to_lopts(options, long_options, i, FIO_GETOPT_JOB);
 +      options_to_lopts(fio_options, long_options, i, FIO_GETOPT_JOB);
  }
  
  struct fio_keyword {
@@@ -3043,12 -2748,14 +3250,12 @@@ static char *bc_calc(char *str
  
        sprintf(buf, "echo '%s' | %s", tmp, BC_APP);
        f = popen(buf, "r");
 -      if (!f) {
 +      if (!f)
                return NULL;
 -      }
  
        ret = fread(&buf[tmp - str], 1, 128 - (tmp - str), f);
 -      if (ret <= 0) {
 +      if (ret <= 0)
                return NULL;
 -      }
  
        pclose(f);
        buf[(tmp - str) + ret - 1] = '\0';
@@@ -3181,13 -2888,13 +3388,13 @@@ int fio_options_parse(struct thread_dat
        int i, ret, unknown;
        char **opts_copy;
  
 -      sort_options(opts, options, num_opts);
 +      sort_options(opts, fio_options, num_opts);
        opts_copy = dup_and_sub_options(opts, num_opts);
  
        for (ret = 0, i = 0, unknown = 0; i < num_opts; i++) {
                struct fio_option *o;
 -              int newret = parse_option(opts_copy[i], opts[i], options, &o,
 -                                        td);
 +              int newret = parse_option(opts_copy[i], opts[i], fio_options,
 +                                              &o, td);
  
                if (opts_copy[i]) {
                        if (newret && !o) {
  
  int fio_cmd_option_parse(struct thread_data *td, const char *opt, char *val)
  {
 -      return parse_cmd_option(opt, val, options, td);
 +      return parse_cmd_option(opt, val, fio_options, td);
  }
  
  int fio_cmd_ioengine_option_parse(struct thread_data *td, const char *opt,
  
  void fio_fill_default_options(struct thread_data *td)
  {
 -      fill_default_options(td, options);
 +      fill_default_options(td, fio_options);
  }
  
  int fio_show_option_help(const char *opt)
  {
 -      return show_cmd_help(options, opt);
 +      return show_cmd_help(fio_options, opt);
  }
  
  void options_mem_dupe(void *data, struct fio_option *options)
   */
  void fio_options_mem_dupe(struct thread_data *td)
  {
 -      options_mem_dupe(&td->o, options);
 +      options_mem_dupe(&td->o, fio_options);
  
        if (td->eo && td->io_ops) {
                void *oldeo = td->eo;
  
  unsigned int fio_get_kb_base(void *data)
  {
 -      struct thread_data *td = data;
 +      struct thread_options *o = data;
        unsigned int kb_base = 0;
  
 -      if (td)
 -              kb_base = td->o.kb_base;
 +      if (o)
 +              kb_base = o->kb_base;
        if (!kb_base)
                kb_base = 1024;
  
@@@ -3301,13 -3008,13 +3508,13 @@@ int add_option(struct fio_option *o
        struct fio_option *__o;
        int opt_index = 0;
  
 -      __o = options;
 +      __o = fio_options;
        while (__o->name) {
                opt_index++;
                __o++;
        }
  
 -      memcpy(&options[opt_index], o, sizeof(*o));
 +      memcpy(&fio_options[opt_index], o, sizeof(*o));
        return 0;
  }
  
@@@ -3315,7 -3022,7 +3522,7 @@@ void invalidate_profile_options(const c
  {
        struct fio_option *o;
  
 -      o = options;
 +      o = fio_options;
        while (o->name) {
                if (o->prof_name && !strcmp(o->prof_name, prof_name)) {
                        o->type = FIO_OPT_INVALID;
@@@ -3330,7 -3037,7 +3537,7 @@@ void add_opt_posval(const char *optname
        struct fio_option *o;
        unsigned int i;
  
 -      o = find_option(options, optname);
 +      o = find_option(fio_options, optname);
        if (!o)
                return;
  
@@@ -3349,7 -3056,7 +3556,7 @@@ void del_opt_posval(const char *optname
        struct fio_option *o;
        unsigned int i;
  
 -      o = find_option(options, optname);
 +      o = find_option(fio_options, optname);
        if (!o)
                return;
  
  
  void fio_options_free(struct thread_data *td)
  {
 -      options_free(options, td);
 +      options_free(fio_options, td);
        if (td->eo && td->io_ops && td->io_ops->options) {
                options_free(td->io_ops->options, td->eo);
                free(td->eo);
                td->eo = NULL;
        }
  }
 +
 +struct fio_option *fio_option_find(const char *name)
 +{
 +      return find_option(fio_options, name);
 +}
 +
diff --combined parse.c
index 9a6494f32db811746ba46303b5f300a83e1ddfc6,0bbb0b30dafd5c8b57ac842e28bdaeba789c6022..ffe2dc0feb28aef5dc6560a2f894a5915b202f9f
+++ b/parse.c
  #include "parse.h"
  #include "debug.h"
  #include "options.h"
+ #include "minmax.h"
  
 -static struct fio_option *fio_options;
 -extern unsigned int fio_get_kb_base(void *);
 +static struct fio_option *__fio_options;
  
  static int vp_cmp(const void *p1, const void *p2)
  {
@@@ -46,7 -48,7 +47,7 @@@ static void posval_sort(struct fio_opti
  static void show_option_range(struct fio_option *o,
                                int (*logger)(const char *format, ...))
  {
 -      if (o->type == FIO_OPT_FLOAT_LIST){
 +      if (o->type == FIO_OPT_FLOAT_LIST) {
                if (isnan(o->minfp) && isnan(o->maxfp))
                        return;
  
@@@ -152,32 -154,32 +153,32 @@@ static unsigned long long __get_mult_by
        for (i = 0; i < strlen(c); i++)
                c[i] = tolower(c[i]);
  
 -      if (!strcmp("pib", c)) {
 +      if (!strncmp("pib", c, 3)) {
                pow = 5;
                mult = 1000;
 -      } else if (!strcmp("tib", c)) {
 +      } else if (!strncmp("tib", c, 3)) {
                pow = 4;
                mult = 1000;
 -      } else if (!strcmp("gib", c)) {
 +      } else if (!strncmp("gib", c, 3)) {
                pow = 3;
                mult = 1000;
 -      } else if (!strcmp("mib", c)) {
 +      } else if (!strncmp("mib", c, 3)) {
                pow = 2;
                mult = 1000;
 -      } else if (!strcmp("kib", c)) {
 +      } else if (!strncmp("kib", c, 3)) {
                pow = 1;
                mult = 1000;
 -      } else if (!strcmp("p", c) || !strcmp("pb", c))
 +      } else if (!strncmp("p", c, 1) || !strncmp("pb", c, 2))
                pow = 5;
 -      else if (!strcmp("t", c) || !strcmp("tb", c))
 +      else if (!strncmp("t", c, 1) || !strncmp("tb", c, 2))
                pow = 4;
 -      else if (!strcmp("g", c) || !strcmp("gb", c))
 +      else if (!strncmp("g", c, 1) || !strncmp("gb", c, 2))
                pow = 3;
 -      else if (!strcmp("m", c) || !strcmp("mb", c))
 +      else if (!strncmp("m", c, 1) || !strncmp("mb", c, 2))
                pow = 2;
 -      else if (!strcmp("k", c) || !strcmp("kb", c))
 +      else if (!strncmp("k", c, 1) || !strncmp("kb", c, 2))
                pow = 1;
 -      else if (!strcmp("%", c)) {
 +      else if (!strncmp("%", c, 1)) {
                *percent = 1;
                free(c);
                return ret;
@@@ -219,7 -221,7 +220,7 @@@ static unsigned long long get_mult_byte
  /*
   * Convert string into a floating number. Return 1 for success and 0 otherwise.
   */
static int str_to_float(const char *str, double *val)
+ int str_to_float(const char *str, double *val)
  {
        return (1 == sscanf(str, "%lf", val));
  }
@@@ -259,7 -261,7 +260,7 @@@ int str_to_decimal(const char *str, lon
        return 0;
  }
  
 -static int check_str_bytes(const char *p, long long *val, void *data)
 +int check_str_bytes(const char *p, long long *val, void *data)
  {
        return str_to_decimal(p, val, 1, data);
  }
@@@ -355,7 -357,7 +356,7 @@@ static int __handle_option(struct fio_o
                           int first, int more, int curr)
  {
        int il, *ilp;
 -      doubleflp;
 +      double *flp;
        long long ull, *ullp;
        long ul1, ul2;
        double uf;
                                        o->maxlen);
                        return 1;
                }
 -              if (!str_to_float(ptr, &uf)){
 +              if (!str_to_float(ptr, &uf)) {
                        log_err("not a floating point value: %s\n", ptr);
                        return 1;
                }
@@@ -826,14 -828,14 +827,14 @@@ static int opt_cmp(const void *p1, cons
  
        if (*(char **)p1) {
                s = strdup(*((char **) p1));
 -              o = get_option(s, fio_options, &foo);
 +              o = get_option(s, __fio_options, &foo);
                if (o)
                        prio1 = o->prio;
                free(s);
        }
        if (*(char **)p2) {
                s = strdup(*((char **) p2));
 -              o = get_option(s, fio_options, &foo);
 +              o = get_option(s, __fio_options, &foo);
                if (o)
                        prio2 = o->prio;
                free(s);
  
  void sort_options(char **opts, struct fio_option *options, int num_opts)
  {
 -      fio_options = options;
 +      __fio_options = options;
        qsort(opts, num_opts, sizeof(char *), opt_cmp);
 -      fio_options = NULL;
 +      __fio_options = NULL;
  }
  
  int parse_cmd_option(const char *opt, const char *val,
@@@ -889,8 -891,9 +890,8 @@@ int parse_option(char *opt, const char 
                return 1;
        }
  
 -      if (!handle_option(*o, post, data)) {
 +      if (!handle_option(*o, post, data))
                return 0;
 -      }
  
        log_err("fio: failed parsing %s\n", input);
        return 1;
@@@ -1106,10 -1109,6 +1107,10 @@@ void option_init(struct fio_option *o
                      (o->roff1 || o->roff2 || o->roff3 || o->roff4))) {
                log_err("Option %s: both cb and offset given\n", o->name);
        }
 +      if (!o->category) {
 +              log_info("Options %s: no category defined. Setting to misc\n", o->name);
 +              o->category = FIO_OPT_C_GENERAL;
 +      }
  }
  
  /*
@@@ -1122,11 -1121,8 +1123,11 @@@ void options_init(struct fio_option *op
  
        dprint(FD_PARSE, "init options\n");
  
 -      for (o = &options[0]; o->name; o++)
 +      for (o = &options[0]; o->name; o++) {
                option_init(o);
 +              if (o->inverse)
 +                      o->inv_opt = find_option(options, o->inverse);
 +      }
  }
  
  void options_free(struct fio_option *options, void *data)
diff --combined parse.h
index 7fee4fa423d900a21296a4af37c96a014dcb983f,b2f9e5a0969439bd1ffe7d4e303ed3af88f0727f..b9da7b950c2813a0bb53457179b2abd7e1257ef8
+++ b/parse.h
@@@ -40,7 -40,6 +40,7 @@@ struct value_pair 
   */
  struct fio_option {
        const char *name;               /* option name */
 +      const char *lname;              /* long option name */
        const char *alias;              /* possible old allowed name */
        enum fio_opt_type type;         /* option type */
        unsigned int off1;              /* potential parameters */
@@@ -54,7 -53,6 +54,7 @@@
        int minval;
        double maxfp;                   /* max and min floating value */
        double minfp;
 +      unsigned int interval;          /* client hint for suitable interval */
        unsigned int maxlen;            /* max length */
        int neg;                        /* negate value stored */
        int prio;
        const char *def;                /* default setting */
        struct value_pair posval[PARSE_MAX_VP];/* possible values */
        const char *parent;             /* parent option */
 +      int hide;                       /* hide if parent isn't set */
 +      int hide_on_set;                /* hide on set, not on unset */
 +      const char *inverse;            /* if set, apply opposite action to this option */
 +      struct fio_option *inv_opt;     /* cached lookup */
        int (*verify)(struct fio_option *, void *);
        const char *prof_name;          /* only valid for specific profile */
 +      unsigned int category;          /* what type of option */
 +      unsigned int group;             /* who to group with */
 +      void *gui_data;
  };
  
  typedef int (str_cb_fn)(void *, char *);
@@@ -88,7 -79,7 +88,8 @@@ extern void options_free(struct fio_opt
  extern void strip_blank_front(char **);
  extern void strip_blank_end(char *);
  extern int str_to_decimal(const char *, long long *, int, void *);
 +extern int check_str_bytes(const char *p, long long *val, void *data);
+ extern int str_to_float(const char *str, double *val);
  
  /*
   * Handlers for the options
@@@ -100,13 -91,6 +101,6 @@@ typedef int (fio_opt_str_set_fn)(void *
  
  #define td_var(start, offset) ((void *) start + (offset))
  
- #ifndef min
- #define min(a, b)     ((a) < (b) ? (a) : (b))
- #endif
- #ifndef max
- #define max(a, b)     ((a) > (b) ? (a) : (b))
- #endif
  static inline int parse_is_percent(unsigned long long val)
  {
        return val <= -1ULL && val >= (-1ULL - 100ULL);
diff --combined server.c
index d120c523d779bed1e228e99af7f461ce5ac1b1c1,33b80d68620d103eb52feb93aafca7b765d9ef98..64475913a8c36c395f7b6b7a99f576a8362ca9d0
+++ b/server.c
  #include <netdb.h>
  #include <syslog.h>
  #include <signal.h>
 +#include <zlib.h>
  
  #include "fio.h"
  #include "server.h"
  #include "crc/crc16.h"
  #include "lib/ieee754.h"
  
 -int fio_net_port = 8765;
 +int fio_net_port = FIO_NET_PORT;
  
  int exit_backend = 0;
  
@@@ -32,22 -31,9 +32,22 @@@ static char *fio_server_arg
  static char *bind_sock;
  static struct sockaddr_in saddr_in;
  static struct sockaddr_in6 saddr_in6;
 -static int first_cmd_check;
  static int use_ipv6;
  
 +struct fio_fork_item {
 +      struct flist_head list;
 +      int exitval;
 +      int signal;
 +      int exited;
 +      pid_t pid;
 +};
 +
 +/* Created on fork on new connection */
 +static FLIST_HEAD(conn_list);
 +
 +/* Created on job fork from connection */
 +static FLIST_HEAD(job_list);
 +
  static const char *fio_server_ops[FIO_NET_CMD_NR] = {
        "",
        "QUIT",
        "START",
        "STOP",
        "DISK_UTIL",
 -      "RUN",
 +      "SERVER_START",
 +      "ADD_JOB",
 +      "CMD_RUN"
 +      "CMD_IOLOG",
  };
  
  const char *fio_server_op(unsigned int op)
        return buf;
  }
  
 -int fio_send_data(int sk, const void *p, unsigned int len)
 +static ssize_t iov_total_len(const struct iovec *iov, int count)
  {
 -      assert(len <= sizeof(struct fio_net_cmd) + FIO_SERVER_MAX_PDU);
 +      ssize_t ret = 0;
  
 -      do {
 -              int ret = send(sk, p, len, 0);
 +      while (count--) {
 +              ret += iov->iov_len;
 +              iov++;
 +      }
 +
 +      return ret;
 +}
 +
 +static int fio_sendv_data(int sk, struct iovec *iov, int count)
 +{
 +      ssize_t total_len = iov_total_len(iov, count);
 +      ssize_t ret;
  
 +      do {
 +              ret = writev(sk, iov, count);
                if (ret > 0) {
 -                      len -= ret;
 -                      if (!len)
 +                      total_len -= ret;
 +                      if (!total_len)
                                break;
 -                      p += ret;
 -                      continue;
 +
 +                      while (ret) {
 +                              if (ret >= iov->iov_len) {
 +                                      ret -= iov->iov_len;
 +                                      iov++;
 +                                      continue;
 +                              }
 +                              iov->iov_base += ret;
 +                              iov->iov_len -= ret;
 +                              ret = 0;
 +                      }
                } else if (!ret)
                        break;
                else if (errno == EAGAIN || errno == EINTR)
                        break;
        } while (!exit_backend);
  
 -      if (!len)
 +      if (!total_len)
                return 0;
  
 +      if (errno)
 +              return -errno;
 +
        return 1;
  }
  
 +int fio_send_data(int sk, const void *p, unsigned int len)
 +{
 +      struct iovec iov = { .iov_base = (void *) p, .iov_len = len };
 +
 +      assert(len <= sizeof(struct fio_net_cmd) + FIO_SERVER_MAX_FRAGMENT_PDU);
 +
 +      return fio_sendv_data(sk, &iov, 1);
 +}
 +
  int fio_recv_data(int sk, void *p, unsigned int len)
  {
        do {
@@@ -193,7 -143,7 +193,7 @@@ static int verify_convert_cmd(struct fi
                return 1;
        }
  
 -      if (cmd->pdu_len > FIO_SERVER_MAX_PDU) {
 +      if (cmd->pdu_len > FIO_SERVER_MAX_FRAGMENT_PDU) {
                log_err("fio: command payload too large: %u\n", cmd->pdu_len);
                return 1;
        }
@@@ -269,21 -219,12 +269,21 @@@ struct fio_net_cmd *fio_net_recv_cmd(in
                cmdret = NULL;
        } else if (cmdret) {
                /* zero-terminate text input */
 -              if (cmdret->pdu_len && (cmdret->opcode == FIO_NET_CMD_TEXT ||
 -                  cmdret->opcode == FIO_NET_CMD_JOB)) {
 -                      char *buf = (char *) cmdret->payload;
 -
 -                      buf[cmdret->pdu_len ] = '\0';
 +              if (cmdret->pdu_len) {
 +                      if (cmdret->opcode == FIO_NET_CMD_TEXT) {
 +                              struct cmd_text_pdu *pdu = (struct cmd_text_pdu *) cmdret->payload;
 +                              char *buf = (char *) pdu->buf;
 +
 +                              buf[pdu->buf_len] = '\0';
 +                      } else if (cmdret->opcode == FIO_NET_CMD_JOB) {
 +                              struct cmd_job_pdu *pdu = (struct cmd_job_pdu *) cmdret->payload;
 +                              char *buf = (char *) pdu->buf;
 +                              int len = le32_to_cpu(pdu->buf_len);
 +
 +                              buf[len] = '\0';
 +                      }
                }
 +
                /* frag flag is internal */
                cmdret->flags &= ~FIO_NET_CMD_F_MORE;
        }
        return cmdret;
  }
  
 -void fio_net_cmd_crc(struct fio_net_cmd *cmd)
 +static void add_reply(uint64_t tag, struct flist_head *list)
 +{
 +      struct fio_net_cmd_reply *reply = (struct fio_net_cmd_reply *) tag;
 +
 +      flist_add_tail(&reply->list, list);
 +}
 +
 +static uint64_t alloc_reply(uint64_t tag, uint16_t opcode)
 +{
 +      struct fio_net_cmd_reply *reply;
 +
 +      reply = calloc(1, sizeof(*reply));
 +      INIT_FLIST_HEAD(&reply->list);
 +      gettimeofday(&reply->tv, NULL);
 +      reply->saved_tag = tag;
 +      reply->opcode = opcode;
 +
 +      return (uintptr_t) reply;
 +}
 +
 +static void free_reply(uint64_t tag)
 +{
 +      struct fio_net_cmd_reply *reply = (struct fio_net_cmd_reply *) tag;
 +
 +      free(reply);
 +}
 +
 +void fio_net_cmd_crc_pdu(struct fio_net_cmd *cmd, const void *pdu)
  {
        uint32_t pdu_len;
  
        cmd->cmd_crc16 = __cpu_to_le16(fio_crc16(cmd, FIO_NET_CMD_CRC_SZ));
  
        pdu_len = le32_to_cpu(cmd->pdu_len);
 -      if (pdu_len)
 -              cmd->pdu_crc16 = __cpu_to_le16(fio_crc16(cmd->payload, pdu_len));
 +      cmd->pdu_crc16 = __cpu_to_le16(fio_crc16(pdu, pdu_len));
 +}
 +
 +void fio_net_cmd_crc(struct fio_net_cmd *cmd)
 +{
 +      fio_net_cmd_crc_pdu(cmd, cmd->payload);
  }
  
  int fio_net_send_cmd(int fd, uint16_t opcode, const void *buf, off_t size,
 -                   uint64_t tag)
 +                   uint64_t *tagptr, struct flist_head *list)
  {
        struct fio_net_cmd *cmd = NULL;
        size_t this_len, cur_len = 0;
 +      uint64_t tag;
        int ret;
  
 +      if (list) {
 +              assert(tagptr);
 +              tag = *tagptr = alloc_reply(*tagptr, opcode);
 +      } else
 +              tag = tagptr ? *tagptr : 0;
 +
        do {
                this_len = size;
 -              if (this_len > FIO_SERVER_MAX_PDU)
 -                      this_len = FIO_SERVER_MAX_PDU;
 +              if (this_len > FIO_SERVER_MAX_FRAGMENT_PDU)
 +                      this_len = FIO_SERVER_MAX_FRAGMENT_PDU;
  
                if (!cmd || cur_len < sizeof(*cmd) + this_len) {
                        if (cmd)
                buf += this_len;
        } while (!ret && size);
  
 +      if (list) {
 +              if (ret)
 +                      free_reply(tag);
 +              else
 +                      add_reply(tag, list);
 +      }
 +
        if (cmd)
                free(cmd);
  
@@@ -402,173 -298,63 +402,176 @@@ static int fio_net_send_simple_stack_cm
  int fio_net_send_simple_cmd(int sk, uint16_t opcode, uint64_t tag,
                            struct flist_head *list)
  {
 -      struct fio_net_int_cmd *cmd;
        int ret;
  
 -      if (!list)
 -              return fio_net_send_simple_stack_cmd(sk, opcode, tag);
 +      if (list)
 +              tag = alloc_reply(tag, opcode);
  
 -      cmd = malloc(sizeof(*cmd));
 -
 -      fio_init_net_cmd(&cmd->cmd, opcode, NULL, 0, (uintptr_t) cmd);
 -      fio_net_cmd_crc(&cmd->cmd);
 -
 -      INIT_FLIST_HEAD(&cmd->list);
 -      gettimeofday(&cmd->tv, NULL);
 -      cmd->saved_tag = tag;
 -
 -      ret = fio_send_data(sk, &cmd->cmd, sizeof(cmd->cmd));
 +      ret = fio_net_send_simple_stack_cmd(sk, opcode, tag);
        if (ret) {
 -              free(cmd);
 +              if (list)
 +                      free_reply(tag);
 +
                return ret;
        }
  
 -      flist_add_tail(&cmd->list, list);
 +      if (list)
 +              add_reply(tag, list);
 +
        return 0;
  }
  
 -static int fio_server_send_quit_cmd(void)
 +int fio_net_send_quit(int sk)
  {
        dprint(FD_NET, "server: sending quit\n");
 -      return fio_net_send_simple_cmd(server_fd, FIO_NET_CMD_QUIT, 0, NULL);
 +
 +      return fio_net_send_simple_cmd(sk, FIO_NET_CMD_QUIT, 0, NULL);
  }
  
 -static int handle_job_cmd(struct fio_net_cmd *cmd)
 +static int fio_net_send_ack(int sk, struct fio_net_cmd *cmd, int error,
 +                          int signal)
  {
 -      char *buf = (char *) cmd->payload;
 -      struct cmd_start_pdu spdu;
        struct cmd_end_pdu epdu;
 +      uint64_t tag = 0;
 +
 +      if (cmd)
 +              tag = cmd->tag;
 +
 +      epdu.error = __cpu_to_le32(error);
 +      epdu.signal = __cpu_to_le32(signal);
 +      return fio_net_send_cmd(sk, FIO_NET_CMD_STOP, &epdu, sizeof(epdu), &tag, NULL);
 +}
 +
 +int fio_net_send_stop(int sk, int error, int signal)
 +{
 +      dprint(FD_NET, "server: sending stop (%d, %d)\n", error, signal);
 +      return fio_net_send_ack(sk, NULL, error, signal);
 +}
 +
 +static void fio_server_add_fork_item(pid_t pid, struct flist_head *list)
 +{
 +      struct fio_fork_item *ffi;
 +
 +      ffi = malloc(sizeof(*ffi));
 +      ffi->exitval = 0;
 +      ffi->signal = 0;
 +      ffi->exited = 0;
 +      ffi->pid = pid;
 +      flist_add_tail(&ffi->list, list);
 +}
 +
 +static void fio_server_add_conn_pid(pid_t pid)
 +{
 +      dprint(FD_NET, "server: forked off connection job (pid=%u)\n", pid);
 +      fio_server_add_fork_item(pid, &conn_list);
 +}
 +
 +static void fio_server_add_job_pid(pid_t pid)
 +{
 +      dprint(FD_NET, "server: forked off job job (pid=%u)\n", pid);
 +      fio_server_add_fork_item(pid, &job_list);
 +}
 +
 +static void fio_server_check_fork_item(struct fio_fork_item *ffi)
 +{
 +      int ret, status;
 +
 +      ret = waitpid(ffi->pid, &status, WNOHANG);
 +      if (ret < 0) {
 +              if (errno == ECHILD) {
 +                      log_err("fio: connection pid %u disappeared\n", ffi->pid);
 +                      ffi->exited = 1;
 +              } else
 +                      log_err("fio: waitpid: %s\n", strerror(errno));
 +      } else if (ret == ffi->pid) {
 +              if (WIFSIGNALED(status)) {
 +                      ffi->signal = WTERMSIG(status);
 +                      ffi->exited = 1;
 +              }
 +              if (WIFEXITED(status)) {
 +                      if (WEXITSTATUS(status))
 +                              ffi->exitval = WEXITSTATUS(status);
 +                      ffi->exited = 1;
 +              }
 +      }
 +}
 +
 +static void fio_server_fork_item_done(struct fio_fork_item *ffi)
 +{
 +      dprint(FD_NET, "pid %u exited, sig=%u, exitval=%d\n", ffi->pid, ffi->signal, ffi->exitval);
 +
 +      /*
 +       * Fold STOP and QUIT...
 +       */
 +      fio_net_send_stop(server_fd, ffi->exitval, ffi->signal);
 +      fio_net_send_quit(server_fd);
 +      flist_del(&ffi->list);
 +      free(ffi);
 +}
 +
 +static void fio_server_check_fork_items(struct flist_head *list)
 +{
 +      struct flist_head *entry, *tmp;
 +      struct fio_fork_item *ffi;
 +
 +      flist_for_each_safe(entry, tmp, list) {
 +              ffi = flist_entry(entry, struct fio_fork_item, list);
 +
 +              fio_server_check_fork_item(ffi);
 +
 +              if (ffi->exited)
 +                      fio_server_fork_item_done(ffi);
 +      }
 +}
 +
 +static void fio_server_check_jobs(void)
 +{
 +      fio_server_check_fork_items(&job_list);
 +}
 +
 +static void fio_server_check_conns(void)
 +{
 +      fio_server_check_fork_items(&conn_list);
 +}
 +
 +static int handle_run_cmd(struct fio_net_cmd *cmd)
 +{
 +      pid_t pid;
        int ret;
  
 -      if (parse_jobs_ini(buf, 1, 0)) {
 -              fio_server_send_quit_cmd();
 +      set_genesis_time();
 +
 +      pid = fork();
 +      if (pid) {
 +              fio_server_add_job_pid(pid);
 +              return 0;
 +      }
 +
 +      ret = fio_backend();
 +      free_threads_shm();
 +      _exit(ret);
 +}
 +
 +static int handle_job_cmd(struct fio_net_cmd *cmd)
 +{
 +      struct cmd_job_pdu *pdu = (struct cmd_job_pdu *) cmd->payload;
 +      void *buf = pdu->buf;
 +      struct cmd_start_pdu spdu;
 +
 +      pdu->buf_len = le32_to_cpu(pdu->buf_len);
 +      pdu->client_type = le32_to_cpu(pdu->client_type);
 +
+       stat_number = 0;
 +      if (parse_jobs_ini(buf, 1, 0, pdu->client_type)) {
 +              fio_net_send_quit(server_fd);
                return -1;
        }
  
        spdu.jobs = cpu_to_le32(thread_number);
 -      fio_net_send_cmd(server_fd, FIO_NET_CMD_START, &spdu, sizeof(spdu), 0);
 -
 -      ret = fio_backend();
 -
 -      epdu.error = ret;
 -      fio_net_send_cmd(server_fd, FIO_NET_CMD_STOP, &epdu, sizeof(epdu), 0);
 -
 -      fio_server_send_quit_cmd();
 -      reset_fio_state();
 -      return ret;
+       spdu.stat_outputs = cpu_to_le32(stat_number);
 +      fio_net_send_cmd(server_fd, FIO_NET_CMD_START, &spdu, sizeof(spdu), NULL, NULL);
 +      return 0;
  }
  
  static int handle_jobline_cmd(struct fio_net_cmd *cmd)
        struct cmd_single_line_pdu *cslp;
        struct cmd_line_pdu *clp;
        unsigned long offset;
 +      struct cmd_start_pdu spdu;
        char **argv;
 -      int ret, i;
 +      int i;
  
        clp = pdu;
        clp->lines = le16_to_cpu(clp->lines);
 +      clp->client_type = le16_to_cpu(clp->client_type);
        argv = malloc(clp->lines * sizeof(char *));
        offset = sizeof(*clp);
  
                dprint(FD_NET, "server: %d: %s\n", i, argv[i]);
        }
  
 -      if (parse_cmd_line(clp->lines, argv)) {
 -              fio_server_send_quit_cmd();
++      stat_number = 0;
++
 +      if (parse_cmd_line(clp->lines, argv, clp->client_type)) {
 +              fio_net_send_quit(server_fd);
                free(argv);
                return -1;
        }
  
        free(argv);
  
 -      fio_net_send_simple_cmd(server_fd, FIO_NET_CMD_START, 0, NULL);
 -
 -      ret = fio_backend();
 -      fio_server_send_quit_cmd();
 -      reset_fio_state();
 -      return ret;
 +      spdu.jobs = cpu_to_le32(thread_number);
++      spdu.stat_outputs = cpu_to_le32(stat_number);
 +      fio_net_send_cmd(server_fd, FIO_NET_CMD_START, &spdu, sizeof(spdu), NULL, NULL);
 +      return 0;
  }
  
  static int handle_probe_cmd(struct fio_net_cmd *cmd)
  {
        struct cmd_probe_pdu probe;
 +      uint64_t tag = cmd->tag;
  
        dprint(FD_NET, "server: sending probe reply\n");
  
  
        probe.os        = FIO_OS;
        probe.arch      = FIO_ARCH;
 -
        probe.bpp       = sizeof(void *);
 +      probe.cpus      = __cpu_to_le32(cpus_online());
 +      probe.flags     = 0;
  
 -      return fio_net_send_cmd(server_fd, FIO_NET_CMD_PROBE, &probe, sizeof(probe), cmd->tag);
 +      return fio_net_send_cmd(server_fd, FIO_NET_CMD_PROBE, &probe, sizeof(probe), &tag, NULL);
  }
  
  static int handle_send_eta_cmd(struct fio_net_cmd *cmd)
  {
        struct jobs_eta *je;
        size_t size;
 +      uint64_t tag = cmd->tag;
        int i;
  
        if (!thread_number)
        je->nr_ramp             = cpu_to_le32(je->nr_ramp);
        je->nr_pending          = cpu_to_le32(je->nr_pending);
        je->files_open          = cpu_to_le32(je->files_open);
 -      je->m_rate              = cpu_to_le32(je->m_rate);
 -      je->t_rate              = cpu_to_le32(je->t_rate);
 -      je->m_iops              = cpu_to_le32(je->m_iops);
 -      je->t_iops              = cpu_to_le32(je->t_iops);
  
 -      for (i = 0; i < 2; i++) {
 +      for (i = 0; i < DDIR_RWDIR_CNT; i++) {
 +              je->m_rate[i]   = cpu_to_le32(je->m_rate[i]);
 +              je->t_rate[i]   = cpu_to_le32(je->t_rate[i]);
 +              je->m_iops[i]   = cpu_to_le32(je->m_iops[i]);
 +              je->t_iops[i]   = cpu_to_le32(je->t_iops[i]);
                je->rate[i]     = cpu_to_le32(je->rate[i]);
                je->iops[i]     = cpu_to_le32(je->iops[i]);
        }
  
        je->elapsed_sec         = cpu_to_le64(je->elapsed_sec);
        je->eta_sec             = cpu_to_le64(je->eta_sec);
 +      je->nr_threads          = cpu_to_le32(je->nr_threads);
        je->is_pow2             = cpu_to_le32(je->is_pow2);
  
 -      fio_net_send_cmd(server_fd, FIO_NET_CMD_ETA, je, size, cmd->tag);
 +      fio_net_send_cmd(server_fd, FIO_NET_CMD_ETA, je, size, &tag, NULL);
        free(je);
        return 0;
  }
  
 +static int send_update_job_reply(int fd, uint64_t __tag, int error)
 +{
 +      uint64_t tag = __tag;
 +      uint32_t pdu_error;
 +
 +      pdu_error = __cpu_to_le32(error);
 +      return fio_net_send_cmd(fd, FIO_NET_CMD_UPDATE_JOB, &pdu_error, sizeof(pdu_error), &tag, NULL);
 +}
 +
 +static int handle_update_job_cmd(struct fio_net_cmd *cmd)
 +{
 +      struct cmd_add_job_pdu *pdu = (struct cmd_add_job_pdu *) cmd->payload;
 +      struct thread_data *td;
 +      uint32_t tnumber;
 +
 +      tnumber = le32_to_cpu(pdu->thread_number);
 +
 +      dprint(FD_NET, "server: updating options for job %u\n", tnumber);
 +
 +      if (!tnumber || tnumber > thread_number) {
 +              send_update_job_reply(server_fd, cmd->tag, ENODEV);
 +              return 0;
 +      }
 +
 +      td = &threads[tnumber - 1];
 +      convert_thread_options_to_cpu(&td->o, &pdu->top);
 +      send_update_job_reply(server_fd, cmd->tag, 0);
 +      return 0;
 +}
 +
  static int handle_command(struct fio_net_cmd *cmd)
  {
        int ret;
        case FIO_NET_CMD_SEND_ETA:
                ret = handle_send_eta_cmd(cmd);
                break;
 +      case FIO_NET_CMD_RUN:
 +              ret = handle_run_cmd(cmd);
 +              break;
 +      case FIO_NET_CMD_UPDATE_JOB:
 +              ret = handle_update_job_cmd(cmd);
 +              break;
        default:
 -              log_err("fio: unknown opcode: %s\n",fio_server_op(cmd->opcode));
 +              log_err("fio: unknown opcode: %s\n", fio_server_op(cmd->opcode));
                ret = 1;
        }
  
        return ret;
  }
  
 -static int handle_connection(int sk, int block)
 +static int handle_connection(int sk)
  {
        struct fio_net_cmd *cmd = NULL;
        int ret = 0;
  
 +      reset_fio_state();
 +      INIT_FLIST_HEAD(&job_list);
 +      server_fd = sk;
 +
        /* read forever */
        while (!exit_backend) {
                struct pollfd pfd = {
  
                ret = 0;
                do {
 -                      ret = poll(&pfd, 1, 100);
 +                      int timeout = 1000;
 +
 +                      if (!flist_empty(&job_list))
 +                              timeout = 100;
 +
 +                      ret = poll(&pfd, 1, timeout);
                        if (ret < 0) {
                                if (errno == EINTR)
                                        break;
                                log_err("fio: poll: %s\n", strerror(errno));
                                break;
                        } else if (!ret) {
 -                              if (!block)
 -                                      return 0;
 +                              fio_server_check_jobs();
                                continue;
                        }
  
                        }
                } while (!exit_backend);
  
 +              fio_server_check_jobs();
 +
                if (ret < 0)
                        break;
  
        if (cmd)
                free(cmd);
  
 -      return ret;
 -}
 -
 -void fio_server_idle_loop(void)
 -{
 -      if (!first_cmd_check)
 -              fio_net_send_simple_cmd(server_fd, FIO_NET_CMD_RUN, 0, NULL);
 -      if (server_fd != -1)
 -              handle_connection(server_fd, 0);
 +      close(sk);
 +      _exit(ret);
  }
  
  static int accept_loop(int listen_sk)
        struct sockaddr_in addr;
        fio_socklen_t len = sizeof(addr);
        struct pollfd pfd;
 -      int ret, sk, flags, exitval = 0;
 +      int ret = 0, sk, flags, exitval = 0;
  
        dprint(FD_NET, "server enter accept loop\n");
  
        flags = fcntl(listen_sk, F_GETFL);
        flags |= O_NONBLOCK;
        fcntl(listen_sk, F_SETFL, flags);
 -again:
 -      pfd.fd = listen_sk;
 -      pfd.events = POLLIN;
 -      do {
 -              ret = poll(&pfd, 1, 100);
 -              if (ret < 0) {
 -                      if (errno == EINTR)
 -                              break;
 -                      log_err("fio: poll: %s\n", strerror(errno));
 -                      goto out;
 -              } else if (!ret)
 -                      continue;
  
 -              if (pfd.revents & POLLIN)
 -                      break;
 -      } while (!exit_backend);
 +      while (!exit_backend) {
 +              pid_t pid;
  
 -      if (exit_backend)
 -              goto out;
 +              pfd.fd = listen_sk;
 +              pfd.events = POLLIN;
 +              do {
 +                      int timeout = 1000;
  
 -      sk = accept(listen_sk, (struct sockaddr *) &addr, &len);
 -      if (sk < 0) {
 -              log_err("fio: accept: %s\n", strerror(errno));
 -              return -1;
 -      }
 +                      if (!flist_empty(&conn_list))
 +                              timeout = 100;
  
 -      dprint(FD_NET, "server: connect from %s\n", inet_ntoa(addr.sin_addr));
 +                      ret = poll(&pfd, 1, timeout);
 +                      if (ret < 0) {
 +                              if (errno == EINTR)
 +                                      break;
 +                              log_err("fio: poll: %s\n", strerror(errno));
 +                              break;
 +                      } else if (!ret) {
 +                              fio_server_check_conns();
 +                              continue;
 +                      }
  
 -      server_fd = sk;
 +                      if (pfd.revents & POLLIN)
 +                              break;
 +              } while (!exit_backend);
  
 -      exitval = handle_connection(sk, 1);
 +              fio_server_check_conns();
  
 -      server_fd = -1;
 -      close(sk);
 +              if (exit_backend || ret < 0)
 +                      break;
  
 -      if (!exit_backend)
 -              goto again;
 +              sk = accept(listen_sk, (struct sockaddr *) &addr, &len);
 +              if (sk < 0) {
 +                      log_err("fio: accept: %s\n", strerror(errno));
 +                      return -1;
 +              }
 +
 +              dprint(FD_NET, "server: connect from %s\n", inet_ntoa(addr.sin_addr));
 +
 +              pid = fork();
 +              if (pid) {
 +                      close(sk);
 +                      fio_server_add_conn_pid(pid);
 +                      continue;
 +              }
 +
 +              /* exits */
 +              handle_connection(sk);
 +      }
  
 -out:
        return exitval;
  }
  
 -int fio_server_text_output(const char *buf, size_t len)
 +int fio_server_text_output(int level, const char *buf, size_t len)
  {
 -      if (server_fd != -1)
 -              return fio_net_send_cmd(server_fd, FIO_NET_CMD_TEXT, buf, len, 0);
 +      struct cmd_text_pdu *pdu;
 +      unsigned int tlen;
 +      struct timeval tv;
 +
 +      if (server_fd == -1)
 +              return log_local_buf(buf, len);
 +
 +      tlen = sizeof(*pdu) + len;
 +      pdu = malloc(tlen);
 +
 +      pdu->level      = __cpu_to_le32(level);
 +      pdu->buf_len    = __cpu_to_le32(len);
  
 -      return log_local_buf(buf, len);
 +      gettimeofday(&tv, NULL);
 +      pdu->log_sec    = __cpu_to_le64(tv.tv_sec);
 +      pdu->log_usec   = __cpu_to_le64(tv.tv_usec);
 +
 +      memcpy(pdu->buf, buf, len);
 +
 +      fio_net_send_cmd(server_fd, FIO_NET_CMD_TEXT, pdu, tlen, NULL, NULL);
 +      free(pdu);
 +      return len;
  }
  
  static void convert_io_stat(struct io_stat *dst, struct io_stat *src)
@@@ -925,7 -639,7 +931,7 @@@ static void convert_gs(struct group_run
  {
        int i;
  
 -      for (i = 0; i < 2; i++) {
 +      for (i = 0; i < DDIR_RWDIR_CNT; i++) {
                dst->max_run[i]         = cpu_to_le64(src->max_run[i]);
                dst->min_run[i]         = cpu_to_le64(src->min_run[i]);
                dst->max_bw[i]          = cpu_to_le64(src->max_bw[i]);
@@@ -955,13 -669,12 +961,13 @@@ void fio_server_send_ts(struct thread_s
        strcpy(p.ts.verror, ts->verror);
        strcpy(p.ts.description, ts->description);
  
 -      p.ts.error      = cpu_to_le32(ts->error);
 -      p.ts.groupid    = cpu_to_le32(ts->groupid);
 -      p.ts.pid        = cpu_to_le32(ts->pid);
 -      p.ts.members    = cpu_to_le32(ts->members);
 +      p.ts.error              = cpu_to_le32(ts->error);
 +      p.ts.thread_number      = cpu_to_le32(ts->thread_number);
 +      p.ts.groupid            = cpu_to_le32(ts->groupid);
 +      p.ts.pid                = cpu_to_le32(ts->pid);
 +      p.ts.members            = cpu_to_le32(ts->members);
  
 -      for (i = 0; i < 2; i++) {
 +      for (i = 0; i < DDIR_RWDIR_CNT; i++) {
                convert_io_stat(&p.ts.clat_stat[i], &ts->clat_stat[i]);
                convert_io_stat(&p.ts.slat_stat[i], &ts->slat_stat[i]);
                convert_io_stat(&p.ts.lat_stat[i], &ts->lat_stat[i]);
                p.ts.io_u_lat_m[i]      = cpu_to_le32(ts->io_u_lat_m[i]);
        }
  
 -      for (i = 0; i < 2; i++)
 +      for (i = 0; i < DDIR_RWDIR_CNT; i++)
                for (j = 0; j < FIO_IO_U_PLAT_NR; j++)
                        p.ts.io_u_plat[i][j] = cpu_to_le32(ts->io_u_plat[i][j]);
  
        p.ts.total_submit       = cpu_to_le64(ts->total_submit);
        p.ts.total_complete     = cpu_to_le64(ts->total_complete);
  
 -      for (i = 0; i < 2; i++) {
 +      for (i = 0; i < DDIR_RWDIR_CNT; i++) {
                p.ts.io_bytes[i]        = cpu_to_le64(ts->io_bytes[i]);
                p.ts.runtime[i]         = cpu_to_le64(ts->runtime[i]);
        }
  
        convert_gs(&p.rs, rs);
  
 -      fio_net_send_cmd(server_fd, FIO_NET_CMD_TS, &p, sizeof(p), 0);
 +      fio_net_send_cmd(server_fd, FIO_NET_CMD_TS, &p, sizeof(p), NULL, NULL);
  }
  
  void fio_server_send_gs(struct group_run_stats *rs)
        dprint(FD_NET, "server sending group run stats\n");
  
        convert_gs(&gs, rs);
 -      fio_net_send_cmd(server_fd, FIO_NET_CMD_GS, &gs, sizeof(gs), 0);
 +      fio_net_send_cmd(server_fd, FIO_NET_CMD_GS, &gs, sizeof(gs), NULL, NULL);
  }
  
  static void convert_agg(struct disk_util_agg *dst, struct disk_util_agg *src)
@@@ -1082,125 -795,23 +1088,125 @@@ void fio_server_send_du(void
                convert_dus(&pdu.dus, &du->dus);
                convert_agg(&pdu.agg, &du->agg);
  
 -              fio_net_send_cmd(server_fd, FIO_NET_CMD_DU, &pdu, sizeof(pdu), 0);
 +              fio_net_send_cmd(server_fd, FIO_NET_CMD_DU, &pdu, sizeof(pdu), NULL, NULL);
        }
  }
  
 -int fio_server_log(const char *format, ...)
 +/*
 + * Send a command with a separate PDU, not inlined in the command
 + */
 +static int fio_send_cmd_ext_pdu(int sk, uint16_t opcode, const void *buf,
 +                              off_t size, uint64_t tag, uint32_t flags)
  {
 -      char buffer[1024];
 -      va_list args;
 -      size_t len;
 +      struct fio_net_cmd cmd;
 +      struct iovec iov[2];
  
 -      dprint(FD_NET, "server log\n");
 +      iov[0].iov_base = &cmd;
 +      iov[0].iov_len = sizeof(cmd);
 +      iov[1].iov_base = (void *) buf;
 +      iov[1].iov_len = size;
  
 -      va_start(args, format);
 -      len = vsnprintf(buffer, sizeof(buffer), format, args);
 -      va_end(args);
 +      __fio_init_net_cmd(&cmd, opcode, size, tag);
 +      cmd.flags = __cpu_to_le32(flags);
 +      fio_net_cmd_crc_pdu(&cmd, buf);
  
 -      return fio_server_text_output(buffer, len);
 +      return fio_sendv_data(sk, iov, 2);
 +}
 +
 +int fio_send_iolog(struct thread_data *td, struct io_log *log, const char *name)
 +{
 +      struct cmd_iolog_pdu pdu;
 +      z_stream stream;
 +      void *out_pdu;
 +      int i, ret = 0;
 +
 +      pdu.thread_number = cpu_to_le32(td->thread_number);
 +      pdu.nr_samples = __cpu_to_le32(log->nr_samples);
 +      pdu.log_type = cpu_to_le32(log->log_type);
 +      strcpy((char *) pdu.name, name);
 +
 +      for (i = 0; i < log->nr_samples; i++) {
 +              struct io_sample *s = &log->log[i];
 +
 +              s->time = cpu_to_le64(s->time);
 +              s->val  = cpu_to_le64(s->val);
 +              s->ddir = cpu_to_le32(s->ddir);
 +              s->bs   = cpu_to_le32(s->bs);
 +      }
 +
 +      /*
 +       * Dirty - since the log is potentially huge, compress it into
 +       * FIO_SERVER_MAX_FRAGMENT_PDU chunks and let the receiving
 +       * side defragment it.
 +       */
 +      out_pdu = malloc(FIO_SERVER_MAX_FRAGMENT_PDU);
 +
 +      stream.zalloc = Z_NULL;
 +      stream.zfree = Z_NULL;
 +      stream.opaque = Z_NULL;
 +
 +      if (deflateInit(&stream, Z_DEFAULT_COMPRESSION) != Z_OK) {
 +              ret = 1;
 +              goto err;
 +      }
 +
 +      /*
 +       * Send header first, it's not compressed.
 +       */
 +      ret = fio_send_cmd_ext_pdu(server_fd, FIO_NET_CMD_IOLOG, &pdu,
 +                                      sizeof(pdu), 0, FIO_NET_CMD_F_MORE);
 +      if (ret)
 +              goto err_zlib;
 +
 +      stream.next_in = (void *) log->log;
 +      stream.avail_in = log->nr_samples * sizeof(struct io_sample);
 +
 +      do {
 +              unsigned int this_len, flags = 0;
 +              int ret;
 +
 +              stream.avail_out = FIO_SERVER_MAX_FRAGMENT_PDU;
 +              stream.next_out = out_pdu;
 +              ret = deflate(&stream, Z_FINISH);
 +              /* may be Z_OK, or Z_STREAM_END */
 +              if (ret < 0)
 +                      goto err_zlib;
 +
 +              this_len = FIO_SERVER_MAX_FRAGMENT_PDU - stream.avail_out;
 +
 +              if (stream.avail_in)
 +                      flags = FIO_NET_CMD_F_MORE;
 +
 +              ret = fio_send_cmd_ext_pdu(server_fd, FIO_NET_CMD_IOLOG,
 +                                         out_pdu, this_len, 0, flags);
 +              if (ret)
 +                      goto err_zlib;
 +      } while (stream.avail_in);
 +
 +err_zlib:
 +      deflateEnd(&stream);
 +err:
 +      free(out_pdu);
 +      return ret;
 +}
 +
 +void fio_server_send_add_job(struct thread_data *td)
 +{
 +      struct cmd_add_job_pdu pdu;
 +
 +      memset(&pdu, 0, sizeof(pdu));
 +      pdu.thread_number = cpu_to_le32(td->thread_number);
 +      pdu.groupid = cpu_to_le32(td->groupid);
 +      convert_thread_options_to_net(&pdu.top, &td->o);
 +
 +      fio_net_send_cmd(server_fd, FIO_NET_CMD_ADD_JOB, &pdu, sizeof(pdu), NULL, NULL);
 +}
 +
 +void fio_server_send_start(struct thread_data *td)
 +{
 +      assert(server_fd != -1);
 +
 +      fio_net_send_simple_cmd(server_fd, FIO_NET_CMD_SERVER_START, 0, NULL);
  }
  
  static int fio_init_server_ip(void)
@@@ -1332,46 -943,6 +1338,46 @@@ static int fio_init_server_connection(v
        return sk;
  }
  
 +int fio_server_parse_host(const char *host, int *ipv6, struct in_addr *inp,
 +                        struct in6_addr *inp6)
 +
 +{
 +      int ret = 0;
 +
 +      if (*ipv6)
 +              ret = inet_pton(AF_INET6, host, inp6);
 +      else
 +              ret = inet_pton(AF_INET, host, inp);
 +
 +      if (ret != 1) {
 +              struct hostent *hent;
 +
 +              hent = gethostbyname(host);
 +              if (!hent) {
 +                      log_err("fio: failed to resolve <%s>\n", host);
 +                      return 0;
 +              }
 +
 +              if (*ipv6) {
 +                      if (hent->h_addrtype != AF_INET6) {
 +                              log_info("fio: falling back to IPv4\n");
 +                              *ipv6 = 0;
 +                      } else
 +                              memcpy(inp6, hent->h_addr_list[0], 16);
 +              }
 +              if (!*ipv6) {
 +                      if (hent->h_addrtype != AF_INET) {
 +                              log_err("fio: lookup type mismatch\n");
 +                              return 0;
 +                      }
 +                      memcpy(inp, hent->h_addr_list[0], 4);
 +              }
 +              ret = 1;
 +      }
 +
 +      return !(ret == 1);
 +}
 +
  /*
   * Parse a host/ip/port string. Reads from 'str'.
   *
@@@ -1390,7 -961,7 +1396,7 @@@ int fio_server_parse_string(const char 
  {
        const char *host = str;
        char *portp;
 -      int ret, lport = 0;
 +      int lport = 0;
  
        *ptr = NULL;
        *is_sock = 0;
  
        *ptr = strdup(host);
  
 -      if (*ipv6)
 -              ret = inet_pton(AF_INET6, host, inp6);
 -      else
 -              ret = inet_pton(AF_INET, host, inp);
 -
 -      if (ret != 1) {
 -              struct hostent *hent;
 -
 -              hent = gethostbyname(host);
 -              if (!hent) {
 -                      log_err("fio: failed to resolve <%s>\n", host);
 -                      free(*ptr);
 -                      *ptr = NULL;
 -                      return 1;
 -              }
 -
 -              if (*ipv6) {
 -                      if (hent->h_addrtype != AF_INET6) {
 -                              log_info("fio: falling back to IPv4\n");
 -                              *ipv6 = 0;
 -                      } else
 -                              memcpy(inp6, hent->h_addr_list[0], 16);
 -              }
 -              if (!*ipv6) {
 -                      if (hent->h_addrtype != AF_INET) {
 -                              log_err("fio: lookup type mismatch\n");
 -                              free(*ptr);
 -                              *ptr = NULL;
 -                              return 1;
 -                      }
 -                      memcpy(inp, hent->h_addr_list[0], 4);
 -              }
 +      if (fio_server_parse_host(*ptr, ipv6, inp, inp6)) {
 +              free(*ptr);
 +              *ptr = NULL;
 +              return 1;
        }
  
        if (*port == 0)
@@@ -1590,7 -1189,7 +1596,7 @@@ int fio_start_server(char *pidfile
  
  #if defined(WIN32)
        WSADATA wsd;
 -      WSAStartup(MAKEWORD(2,2), &wsd);
 +      WSAStartup(MAKEWORD(2, 2), &wsd);
  #endif
  
        if (!pidfile)
diff --combined server.h
index 5273fd17cff55b45b7a20ccf45b042a3070f936a,624e4c0a49dfed37b9bd8e7f82304dcc4ad06a37..201e62d67c943a367ebb4005a45182ed6d52ab79
+++ b/server.h
@@@ -10,8 -10,6 +10,8 @@@
  #include "os/os.h"
  #include "diskutil.h"
  
 +#define FIO_NET_PORT 8765
 +
  /*
   * On-wire encoding is little endian
   */
@@@ -30,47 -28,41 +30,47 @@@ struct fio_net_cmd 
        uint8_t payload[0];     /* payload */
  };
  
 -struct fio_net_int_cmd {
 -      struct fio_net_cmd cmd;
 +struct fio_net_cmd_reply {
        struct flist_head list;
        struct timeval tv;
        uint64_t saved_tag;
 +      uint16_t opcode;
  };
  
  enum {
-       FIO_SERVER_VER                  = 18,
 -      FIO_SERVER_VER          = 9,
 -
 -      FIO_SERVER_MAX_PDU      = 1024,
 -
 -      FIO_NET_CMD_QUIT        = 1,
 -      FIO_NET_CMD_EXIT        = 2,
 -      FIO_NET_CMD_JOB         = 3,
 -      FIO_NET_CMD_JOBLINE     = 4,
 -      FIO_NET_CMD_TEXT        = 5,
 -      FIO_NET_CMD_TS          = 6,
 -      FIO_NET_CMD_GS          = 7,
 -      FIO_NET_CMD_SEND_ETA    = 8,
 -      FIO_NET_CMD_ETA         = 9,
 -      FIO_NET_CMD_PROBE       = 10,
 -      FIO_NET_CMD_START       = 11,
 -      FIO_NET_CMD_STOP        = 12,
 -      FIO_NET_CMD_DU          = 13,
 -      FIO_NET_CMD_RUN         = 14,
 -      FIO_NET_CMD_NR          = 15,
 -
 -      FIO_NET_CMD_F_MORE      = 1UL << 0,
++      FIO_SERVER_VER                  = 19,
 +
 +      FIO_SERVER_MAX_FRAGMENT_PDU     = 1024,
 +
 +      FIO_NET_CMD_QUIT                = 1,
 +      FIO_NET_CMD_EXIT                = 2,
 +      FIO_NET_CMD_JOB                 = 3,
 +      FIO_NET_CMD_JOBLINE             = 4,
 +      FIO_NET_CMD_TEXT                = 5,
 +      FIO_NET_CMD_TS                  = 6,
 +      FIO_NET_CMD_GS                  = 7,
 +      FIO_NET_CMD_SEND_ETA            = 8,
 +      FIO_NET_CMD_ETA                 = 9,
 +      FIO_NET_CMD_PROBE               = 10,
 +      FIO_NET_CMD_START               = 11,
 +      FIO_NET_CMD_STOP                = 12,
 +      FIO_NET_CMD_DU                  = 13,
 +      FIO_NET_CMD_SERVER_START        = 14,
 +      FIO_NET_CMD_ADD_JOB             = 15,
 +      FIO_NET_CMD_RUN                 = 16,
 +      FIO_NET_CMD_IOLOG               = 17,
 +      FIO_NET_CMD_UPDATE_JOB          = 18,
 +      FIO_NET_CMD_NR                  = 19,
 +
 +      FIO_NET_CMD_F_MORE              = 1UL << 0,
  
        /* crc does not include the crc fields */
 -      FIO_NET_CMD_CRC_SZ      = sizeof(struct fio_net_cmd) -
 -                                      2 * sizeof(uint16_t),
 +      FIO_NET_CMD_CRC_SZ              = sizeof(struct fio_net_cmd) -
 +                                              2 * sizeof(uint16_t),
 +
 +      FIO_NET_NAME_MAX                = 256,
  
 -      FIO_NET_CLIENT_TIMEOUT  = 5000,
 +      FIO_NET_CLIENT_TIMEOUT          = 5000,
  };
  
  struct cmd_ts_pdu {
@@@ -90,8 -82,6 +90,8 @@@ struct cmd_probe_pdu 
        uint8_t os;
        uint8_t arch;
        uint8_t bpp;
 +      uint32_t cpus;
 +      uint64_t flags;
  };
  
  struct cmd_single_line_pdu {
  
  struct cmd_line_pdu {
        uint16_t lines;
 +      uint16_t client_type;
        struct cmd_single_line_pdu options[0];
  };
  
 +struct cmd_job_pdu {
 +      uint32_t buf_len;
 +      uint32_t client_type;
 +      uint8_t buf[0];
 +};
 +
  struct cmd_start_pdu {
        uint32_t jobs;
+       uint32_t stat_outputs;
  };
  
  struct cmd_end_pdu {
        uint32_t error;
 +      uint32_t signal;
 +};
 +
 +struct cmd_add_job_pdu {
 +      uint32_t thread_number;
 +      uint32_t groupid;
 +      struct thread_options_pack top;
 +};
 +
 +struct cmd_text_pdu {
 +      uint32_t level;
 +      uint32_t buf_len;
 +      uint64_t log_sec;
 +      uint64_t log_usec;
 +      uint8_t buf[0];
 +};
 +
 +struct cmd_iolog_pdu {
 +      uint32_t thread_number;
 +      uint32_t nr_samples;
 +      uint32_t log_type;
 +      uint8_t name[FIO_NET_NAME_MAX];
 +      struct io_sample samples[0];
  };
  
  extern int fio_start_server(char *);
 -extern int fio_server_text_output(const char *, size_t);
 -extern int fio_server_log(const char *format, ...);
 -extern int fio_net_send_cmd(int, uint16_t, const void *, off_t, uint64_t);
 +extern int fio_server_text_output(int, const char *, size_t);
 +extern int fio_net_send_cmd(int, uint16_t, const void *, off_t, uint64_t *, struct flist_head *);
  extern int fio_net_send_simple_cmd(int, uint16_t, uint64_t, struct flist_head *);
  extern void fio_server_set_arg(const char *);
  extern int fio_server_parse_string(const char *, char **, int *, int *, struct in_addr *, struct in6_addr *, int *);
 +extern int fio_server_parse_host(const char *, int *, struct in_addr *, struct in6_addr *);
  extern const char *fio_server_op(unsigned int);
  extern void fio_server_got_signal(int);
  
@@@ -161,44 -122,33 +162,44 @@@ extern void fio_server_idle_loop(void)
  
  extern int fio_clients_connect(void);
  extern int fio_clients_send_ini(const char *);
 -extern int fio_handle_clients(void);
 -extern int fio_client_add(const char *, void **);
  extern void fio_client_add_cmd_option(void *, const char *);
  extern void fio_client_add_ini_file(void *, const char *);
  
  extern int fio_recv_data(int sk, void *p, unsigned int len);
  extern int fio_send_data(int sk, const void *p, unsigned int len);
  extern void fio_net_cmd_crc(struct fio_net_cmd *);
 +extern void fio_net_cmd_crc_pdu(struct fio_net_cmd *, const void *);
  extern struct fio_net_cmd *fio_net_recv_cmd(int sk);
  
 +extern int fio_send_iolog(struct thread_data *, struct io_log *, const char *);
 +extern void fio_server_send_add_job(struct thread_data *);
 +extern void fio_server_send_start(struct thread_data *);
 +extern int fio_net_send_stop(int sk, int error, int signal);
 +extern int fio_net_send_quit(int sk);
 +
  extern int exit_backend;
  extern int fio_net_port;
  
 -static inline void fio_init_net_cmd(struct fio_net_cmd *cmd, uint16_t opcode,
 -                                  const void *pdu, uint32_t pdu_len,
 -                                  uint64_t tag)
 +static inline void __fio_init_net_cmd(struct fio_net_cmd *cmd, uint16_t opcode,
 +                                    uint32_t pdu_len, uint64_t tag)
  {
        memset(cmd, 0, sizeof(*cmd));
  
        cmd->version    = __cpu_to_le16(FIO_SERVER_VER);
        cmd->opcode     = cpu_to_le16(opcode);
        cmd->tag        = cpu_to_le64(tag);
 +      cmd->pdu_len    = cpu_to_le32(pdu_len);
 +}
 +
 +
 +static inline void fio_init_net_cmd(struct fio_net_cmd *cmd, uint16_t opcode,
 +                                  const void *pdu, uint32_t pdu_len,
 +                                  uint64_t tag)
 +{
 +      __fio_init_net_cmd(cmd, opcode, pdu_len, tag);
  
 -      if (pdu) {
 -              cmd->pdu_len    = cpu_to_le32(pdu_len);
 +      if (pdu)
                memcpy(&cmd->payload, pdu, pdu_len);
 -      }
  }
  
  #endif
diff --combined stat.c
index 4a881d4fedf75b22b6283cffa2fdd0236cf82dda,522901a993e89f70d6f075ac9e5c2ebd52e77eae..2e7824295600385430d724db60c4dfd574870907
--- 1/stat.c
--- 2/stat.c
+++ b/stat.c
@@@ -16,7 -16,11 +16,11 @@@ void update_rusage_stat(struct thread_d
  {
        struct thread_stat *ts = &td->ts;
  
+ #ifdef RUSAGE_THREAD
+       getrusage(RUSAGE_THREAD, &td->ru_end);
+ #else
        getrusage(RUSAGE_SELF, &td->ru_end);
+ #endif
  
        ts->usr_time += mtime_since(&td->ru_start.ru_utime,
                                        &td->ru_end.ru_utime);
@@@ -64,12 -68,12 +68,12 @@@ static unsigned int plat_val_to_idx(uns
  
        /*
         * Discard the error bits and apply the mask to find the
 -         * index for the buckets in the group
 +       * index for the buckets in the group
         */
        offset = (FIO_IO_U_PLAT_VAL - 1) & (val >> error_bits);
  
        /* Make sure the index does not exceed (array size - 1) */
 -      idx = (base + offset) < (FIO_IO_U_PLAT_NR - 1)?
 +      idx = (base + offset) < (FIO_IO_U_PLAT_NR - 1) ?
                (base + offset) : (FIO_IO_U_PLAT_NR - 1);
  
        return idx;
@@@ -87,11 -91,11 +91,11 @@@ static unsigned int plat_idx_to_val(uns
  
        /* MSB <= (FIO_IO_U_PLAT_BITS-1), cannot be rounded off. Use
         * all bits of the sample as index */
 -      if (idx < (FIO_IO_U_PLAT_VAL << 1) )
 +      if (idx < (FIO_IO_U_PLAT_VAL << 1))
                return idx;
  
        /* Find the group and compute the minimum value of that group */
 -      error_bits = (idx >> FIO_IO_U_PLAT_BITS) -1;
 +      error_bits = (idx >> FIO_IO_U_PLAT_BITS) - 1;
        base = 1 << (error_bits + FIO_IO_U_PLAT_BITS);
  
        /* Find its bucket number of the group */
@@@ -115,9 -119,11 +119,9 @@@ static int double_cmp(const void *a, co
        return cmp;
  }
  
 -static unsigned int calc_clat_percentiles(unsigned int *io_u_plat,
 -                                        unsigned long nr, fio_fp64_t *plist,
 -                                        unsigned int **output,
 -                                        unsigned int *maxv,
 -                                        unsigned int *minv)
 +unsigned int calc_clat_percentiles(unsigned int *io_u_plat, unsigned long nr,
 +                                 fio_fp64_t *plist, unsigned int **output,
 +                                 unsigned int *maxv, unsigned int *minv)
  {
        unsigned long sum = 0;
        unsigned int len, i, j = 0;
         * isn't a worry. Also note that this does not work for NaN values.
         */
        if (len > 1)
 -              qsort((void*)plist, len, sizeof(plist[0]), double_cmp);
 +              qsort((void *)plist, len, sizeof(plist[0]), double_cmp);
  
        /*
         * Calculate bucket values, note down max and min values
@@@ -233,8 -239,8 +237,8 @@@ out
                free(ovals);
  }
  
 -static int calc_lat(struct io_stat *is, unsigned long *min, unsigned long *max,
 -                  double *mean, double *dev)
 +int calc_lat(struct io_stat *is, unsigned long *min, unsigned long *max,
 +           double *mean, double *dev)
  {
        double n = is->samples;
  
@@@ -286,7 -292,12 +290,7 @@@ void show_group_stats(struct group_run_
        }
  }
  
 -#define ts_total_io_u(ts)     \
 -      ((ts)->total_io_u[DDIR_READ] + (ts)->total_io_u[DDIR_WRITE] +\
 -              (ts)->total_io_u[DDIR_TRIM])
 -
 -static void stat_calc_dist(unsigned int *map, unsigned long total,
 -                         double *io_u_dist)
 +void stat_calc_dist(unsigned int *map, unsigned long total, double *io_u_dist)
  {
        int i;
  
  static void stat_calc_lat(struct thread_stat *ts, double *dst,
                          unsigned int *src, int nr)
  {
 -      unsigned long total = ts_total_io_u(ts);
 +      unsigned long total = ddir_rw_sum(ts->total_io_u);
        int i;
  
        /*
        }
  }
  
 -static void stat_calc_lat_u(struct thread_stat *ts, double *io_u_lat)
 +void stat_calc_lat_u(struct thread_stat *ts, double *io_u_lat)
  {
        stat_calc_lat(ts, io_u_lat, ts->io_u_lat_u, FIO_IO_U_LAT_U_NR);
  }
  
 -static void stat_calc_lat_m(struct thread_stat *ts, double *io_u_lat)
 +void stat_calc_lat_m(struct thread_stat *ts, double *io_u_lat)
  {
        stat_calc_lat(ts, io_u_lat, ts->io_u_lat_m, FIO_IO_U_LAT_M_NR);
  }
  
 -static int usec_to_msec(unsigned long *min, unsigned long *max, double *mean,
 -                      double *dev)
 +static void display_lat(const char *name, unsigned long min, unsigned long max,
 +                      double mean, double dev)
  {
 -      if (*min > 1000 && *max > 1000 && *mean > 1000.0 && *dev > 1000.0) {
 -              *min /= 1000;
 -              *max /= 1000;
 -              *mean /= 1000.0;
 -              *dev /= 1000.0;
 -              return 0;
 -      }
 +      const char *base = "(usec)";
 +      char *minp, *maxp;
  
 -      return 1;
 +      if (!usec_to_msec(&min, &max, &mean, &dev))
 +              base = "(msec)";
 +
 +      minp = num2str(min, 6, 1, 0);
 +      maxp = num2str(max, 6, 1, 0);
 +
 +      log_info("    %s %s: min=%s, max=%s, avg=%5.02f,"
 +               " stdev=%5.02f\n", name, base, minp, maxp, mean, dev);
 +
 +      free(minp);
 +      free(maxp);
  }
  
  static void show_ddir_status(struct group_run_stats *rs, struct thread_stat *ts,
        free(bw_p);
        free(iops_p);
  
 -      if (calc_lat(&ts->slat_stat[ddir], &min, &max, &mean, &dev)) {
 -              const char *base = "(usec)";
 -              char *minp, *maxp;
 -
 -              if (!usec_to_msec(&min, &max, &mean, &dev))
 -                      base = "(msec)";
 -
 -              minp = num2str(min, 6, 1, 0);
 -              maxp = num2str(max, 6, 1, 0);
 -
 -              log_info("    slat %s: min=%s, max=%s, avg=%5.02f,"
 -                       " stdev=%5.02f\n", base, minp, maxp, mean, dev);
 -
 -              free(minp);
 -              free(maxp);
 -      }
 -      if (calc_lat(&ts->clat_stat[ddir], &min, &max, &mean, &dev)) {
 -              const char *base = "(usec)";
 -              char *minp, *maxp;
 -
 -              if (!usec_to_msec(&min, &max, &mean, &dev))
 -                      base = "(msec)";
 -
 -              minp = num2str(min, 6, 1, 0);
 -              maxp = num2str(max, 6, 1, 0);
 -
 -              log_info("    clat %s: min=%s, max=%s, avg=%5.02f,"
 -                       " stdev=%5.02f\n", base, minp, maxp, mean, dev);
 -
 -              free(minp);
 -              free(maxp);
 -      }
 -      if (calc_lat(&ts->lat_stat[ddir], &min, &max, &mean, &dev)) {
 -              const char *base = "(usec)";
 -              char *minp, *maxp;
 -
 -              if (!usec_to_msec(&min, &max, &mean, &dev))
 -                      base = "(msec)";
 -
 -              minp = num2str(min, 6, 1, 0);
 -              maxp = num2str(max, 6, 1, 0);
 -
 -              log_info("     lat %s: min=%s, max=%s, avg=%5.02f,"
 -                       " stdev=%5.02f\n", base, minp, maxp, mean, dev);
 +      if (calc_lat(&ts->slat_stat[ddir], &min, &max, &mean, &dev))
 +              display_lat("slat", min, max, mean, dev);
 +      if (calc_lat(&ts->clat_stat[ddir], &min, &max, &mean, &dev))
 +              display_lat("clat", min, max, mean, dev);
 +      if (calc_lat(&ts->lat_stat[ddir], &min, &max, &mean, &dev))
 +              display_lat(" lat", min, max, mean, dev);
  
 -              free(minp);
 -              free(maxp);
 -      }
        if (ts->clat_percentiles) {
                show_clat_percentiles(ts->io_u_plat[ddir],
                                        ts->clat_stat[ddir].samples,
@@@ -469,14 -516,8 +473,14 @@@ static void show_lat_m(double *io_u_lat
        show_lat(io_u_lat_m, FIO_IO_U_LAT_M_NR, ranges, "msec");
  }
  
 -static void show_latencies(double *io_u_lat_u, double *io_u_lat_m)
 +static void show_latencies(struct thread_stat *ts)
  {
 +      double io_u_lat_u[FIO_IO_U_LAT_U_NR];
 +      double io_u_lat_m[FIO_IO_U_LAT_M_NR];
 +
 +      stat_calc_lat_u(ts, io_u_lat_u);
 +      stat_calc_lat_m(ts, io_u_lat_m);
 +
        show_lat_u(io_u_lat_u);
        show_lat_m(io_u_lat_m);
  }
@@@ -486,6 -527,8 +490,6 @@@ void show_thread_status(struct thread_s
        double usr_cpu, sys_cpu;
        unsigned long runtime;
        double io_u_dist[FIO_IO_U_MAP_NR];
 -      double io_u_lat_u[FIO_IO_U_LAT_U_NR];
 -      double io_u_lat_m[FIO_IO_U_LAT_M_NR];
        time_t time_p;
        char time_buf[64];
  
        if (ts->io_bytes[DDIR_TRIM])
                show_ddir_status(rs, ts, DDIR_TRIM);
  
 -      stat_calc_lat_u(ts, io_u_lat_u);
 -      stat_calc_lat_m(ts, io_u_lat_m);
 -      show_latencies(io_u_lat_u, io_u_lat_m);
 +      show_latencies(ts);
  
        runtime = ts->total_run_time;
        if (runtime) {
        log_info("  cpu          : usr=%3.2f%%, sys=%3.2f%%, ctx=%lu, majf=%lu,"
                 " minf=%lu\n", usr_cpu, sys_cpu, ts->ctx, ts->majf, ts->minf);
  
 -      stat_calc_dist(ts->io_u_map, ts_total_io_u(ts), io_u_dist);
 +      stat_calc_dist(ts->io_u_map, ddir_rw_sum(ts->total_io_u), io_u_dist);
        log_info("  IO depths    : 1=%3.1f%%, 2=%3.1f%%, 4=%3.1f%%, 8=%3.1f%%,"
                 " 16=%3.1f%%, 32=%3.1f%%, >=64=%3.1f%%\n", io_u_dist[0],
                                        io_u_dist[1], io_u_dist[2],
@@@ -742,7 -787,7 +746,7 @@@ static void add_ddir_status_json(struc
  }
  
  static void show_thread_status_terse_v2(struct thread_stat *ts,
 -                                      struct group_run_stats *rs)
 +                                      struct group_run_stats *rs)
  {
        double io_u_dist[FIO_IO_U_MAP_NR];
        double io_u_lat_u[FIO_IO_U_LAT_U_NR];
                                                                ts->minf);
  
        /* Calc % distribution of IO depths, usecond, msecond latency */
 -      stat_calc_dist(ts->io_u_map, ts_total_io_u(ts), io_u_dist);
 +      stat_calc_dist(ts->io_u_map, ddir_rw_sum(ts->total_io_u), io_u_dist);
        stat_calc_lat_u(ts, io_u_lat_u);
        stat_calc_lat_m(ts, io_u_lat_m);
  
@@@ -836,7 -881,7 +840,7 @@@ static void show_thread_status_terse_v3
                                                                ts->minf);
  
        /* Calc % distribution of IO depths, usecond, msecond latency */
 -      stat_calc_dist(ts->io_u_map, ts_total_io_u(ts), io_u_dist);
 +      stat_calc_dist(ts->io_u_map, ddir_rw_sum(ts->total_io_u), io_u_dist);
        stat_calc_lat_u(ts, io_u_lat_u);
        stat_calc_lat_m(ts, io_u_lat_m);
  
@@@ -903,7 -948,7 +907,7 @@@ static struct json_object *show_thread_
  
  
        /* Calc % distribution of IO depths, usecond, msecond latency */
 -      stat_calc_dist(ts->io_u_map, ts_total_io_u(ts), io_u_dist);
 +      stat_calc_dist(ts->io_u_map, ddir_rw_sum(ts->total_io_u), io_u_dist);
        stat_calc_lat_u(ts, io_u_lat_u);
        stat_calc_lat_m(ts, io_u_lat_m);
  
@@@ -1161,11 -1206,6 +1165,11 @@@ void show_run_stats(void
                        else
                                memset(ts->description, 0, FIO_JOBNAME_SIZE);
  
 +                      /*
 +                       * If multiple entries in this group, this is
 +                       * the first member.
 +                       */
 +                      ts->thread_number = td->thread_number;
                        ts->groupid = td->groupid;
  
                        /*
diff --combined thread_options.h
index 9b9079659801a9e9d94684b1c6abcdfbe49ff0c0,0000000000000000000000000000000000000000..9975af17d4f24b078f78bfea23c81475c5ba5cf6
mode 100644,000000..100644
--- /dev/null
@@@ -1,429 -1,0 +1,449 @@@
 +#ifndef FIO_THREAD_OPTIONS_H
 +#define FIO_THREAD_OPTIONS_H
 +
 +#include "arch/arch.h"
 +#include "os/os.h"
 +#include "stat.h"
 +#include "gettime.h"
 +
 +/*
 + * What type of allocation to use for io buffers
 + */
 +enum fio_memtype {
 +      MEM_MALLOC = 0, /* ordinary malloc */
 +      MEM_SHM,        /* use shared memory segments */
 +      MEM_SHMHUGE,    /* use shared memory segments with huge pages */
 +      MEM_MMAP,       /* use anonynomous mmap */
 +      MEM_MMAPHUGE,   /* memory mapped huge file */
 +};
 +
 +/*
 + * What type of errors to continue on when continue_on_error is used
 + */
 +enum error_type_bit {
 +      ERROR_TYPE_READ_BIT = 0,
 +      ERROR_TYPE_WRITE_BIT = 1,
 +      ERROR_TYPE_VERIFY_BIT = 2,
 +      ERROR_TYPE_CNT = 3,
 +};
 +
 +#define ERROR_STR_MAX 128
 +
 +enum error_type {
 +        ERROR_TYPE_NONE = 0,
 +        ERROR_TYPE_READ = 1 << ERROR_TYPE_READ_BIT,
 +        ERROR_TYPE_WRITE = 1 << ERROR_TYPE_WRITE_BIT,
 +        ERROR_TYPE_VERIFY = 1 << ERROR_TYPE_VERIFY_BIT,
 +        ERROR_TYPE_ANY = 0xffff,
 +};
 +
 +#define BSSPLIT_MAX   64
 +
 +struct bssplit {
 +      uint32_t bs;
 +      uint32_t perc;
 +};
 +
 +struct thread_options {
 +      int pad;
 +      char *description;
 +      char *name;
 +      char *directory;
 +      char *filename;
 +      char *opendir;
 +      char *ioengine;
 +      char *mmapfile;
 +      enum td_ddir td_ddir;
 +      unsigned int rw_seq;
 +      unsigned int kb_base;
 +      unsigned int ddir_seq_nr;
 +      long ddir_seq_add;
 +      unsigned int iodepth;
 +      unsigned int iodepth_low;
 +      unsigned int iodepth_batch;
 +      unsigned int iodepth_batch_complete;
 +
 +      unsigned long long size;
 +      unsigned int size_percent;
 +      unsigned int fill_device;
 +      unsigned long long file_size_low;
 +      unsigned long long file_size_high;
 +      unsigned long long start_offset;
 +
 +      unsigned int bs[DDIR_RWDIR_CNT];
 +      unsigned int ba[DDIR_RWDIR_CNT];
 +      unsigned int min_bs[DDIR_RWDIR_CNT];
 +      unsigned int max_bs[DDIR_RWDIR_CNT];
 +      struct bssplit *bssplit[DDIR_RWDIR_CNT];
 +      unsigned int bssplit_nr[DDIR_RWDIR_CNT];
 +
 +      int *ignore_error[ERROR_TYPE_CNT];
 +      unsigned int ignore_error_nr[ERROR_TYPE_CNT];
 +      unsigned int error_dump;
 +
 +      unsigned int nr_files;
 +      unsigned int open_files;
 +      enum file_lock_mode file_lock_mode;
 +      unsigned int lockfile_batch;
 +
 +      unsigned int odirect;
 +      unsigned int invalidate_cache;
 +      unsigned int create_serialize;
 +      unsigned int create_fsync;
 +      unsigned int create_on_open;
 +      unsigned int create_only;
 +      unsigned int end_fsync;
 +      unsigned int pre_read;
 +      unsigned int sync_io;
 +      unsigned int verify;
 +      unsigned int do_verify;
 +      unsigned int verifysort;
 +      unsigned int verify_interval;
 +      unsigned int verify_offset;
 +      char verify_pattern[MAX_PATTERN_SIZE];
 +      unsigned int verify_pattern_bytes;
 +      unsigned int verify_fatal;
 +      unsigned int verify_dump;
 +      unsigned int verify_async;
 +      unsigned long long verify_backlog;
 +      unsigned int verify_batch;
 +      unsigned int use_thread;
 +      unsigned int unlink;
 +      unsigned int do_disk_util;
 +      unsigned int override_sync;
 +      unsigned int rand_repeatable;
 +      unsigned int use_os_rand;
 +      unsigned int log_avg_msec;
 +      unsigned int norandommap;
 +      unsigned int softrandommap;
 +      unsigned int bs_unaligned;
 +      unsigned int fsync_on_close;
 +
++      unsigned int random_distribution;
++      fio_fp64_t zipf_theta;
++      fio_fp64_t pareto_h;
++
 +      unsigned int hugepage_size;
 +      unsigned int rw_min_bs;
 +      unsigned int thinktime;
 +      unsigned int thinktime_spin;
 +      unsigned int thinktime_blocks;
 +      unsigned int fsync_blocks;
 +      unsigned int fdatasync_blocks;
 +      unsigned int barrier_blocks;
 +      unsigned long long start_delay;
 +      unsigned long long timeout;
 +      unsigned long long ramp_time;
 +      unsigned int overwrite;
 +      unsigned int bw_avg_time;
 +      unsigned int iops_avg_time;
 +      unsigned int loops;
 +      unsigned long long zone_range;
 +      unsigned long long zone_size;
 +      unsigned long long zone_skip;
 +      unsigned long long lockmem;
 +      enum fio_memtype mem_type;
 +      unsigned int mem_align;
 +
++      unsigned max_latency;
++
 +      unsigned int stonewall;
 +      unsigned int new_group;
 +      unsigned int numjobs;
 +      os_cpu_mask_t cpumask;
 +      unsigned int cpumask_set;
 +      os_cpu_mask_t verify_cpumask;
 +      unsigned int verify_cpumask_set;
++#ifdef FIO_HAVE_LIBNUMA
++      struct bitmask *numa_cpunodesmask;
++      unsigned int numa_cpumask_set;
++      unsigned short numa_mem_mode;
++      unsigned int numa_mem_prefer_node;
++      struct bitmask *numa_memnodesmask;
++      unsigned int numa_memmask_set;
++#endif
 +      unsigned int iolog;
 +      unsigned int rwmixcycle;
 +      unsigned int rwmix[2];
 +      unsigned int nice;
 +      unsigned int ioprio;
 +      unsigned int ioprio_class;
 +      unsigned int file_service_type;
 +      unsigned int group_reporting;
 +      unsigned int fadvise_hint;
 +      enum fio_fallocate_mode fallocate_mode;
 +      unsigned int zero_buffers;
 +      unsigned int refill_buffers;
 +      unsigned int scramble_buffers;
 +      unsigned int compress_percentage;
 +      unsigned int compress_chunk;
 +      unsigned int time_based;
 +      unsigned int disable_lat;
 +      unsigned int disable_clat;
 +      unsigned int disable_slat;
 +      unsigned int disable_bw;
 +      unsigned int gtod_reduce;
 +      unsigned int gtod_cpu;
 +      unsigned int gtod_offload;
 +      enum fio_cs clocksource;
 +      unsigned int no_stall;
 +      unsigned int trim_percentage;
 +      unsigned int trim_batch;
 +      unsigned int trim_zero;
 +      unsigned long long trim_backlog;
 +      unsigned int clat_percentiles;
 +      unsigned int overwrite_plist;
 +      fio_fp64_t percentile_list[FIO_IO_U_LIST_MAX_LEN];
 +
 +      char *read_iolog_file;
 +      char *write_iolog_file;
 +      char *bw_log_file;
 +      char *lat_log_file;
 +      char *iops_log_file;
 +      char *replay_redirect;
 +
 +      /*
 +       * Pre-run and post-run shell
 +       */
 +      char *exec_prerun;
 +      char *exec_postrun;
 +
 +      unsigned int rate[DDIR_RWDIR_CNT];
 +      unsigned int ratemin[DDIR_RWDIR_CNT];
 +      unsigned int ratecycle;
 +      unsigned int rate_iops[DDIR_RWDIR_CNT];
 +      unsigned int rate_iops_min[DDIR_RWDIR_CNT];
 +
 +      char *ioscheduler;
 +
 +      /*
 +       * I/O Error handling
 +       */
 +      enum error_type continue_on_error;
 +
 +      /*
 +       * Benchmark profile type
 +       */
 +      char *profile;
 +
 +      /*
 +       * blkio cgroup support
 +       */
 +      char *cgroup;
 +      unsigned int cgroup_weight;
 +      unsigned int cgroup_nodelete;
 +
 +      unsigned int uid;
 +      unsigned int gid;
 +
 +      int flow_id;
 +      int flow;
 +      int flow_watermark;
 +      unsigned int flow_sleep;
 +
 +      unsigned long long offset_increment;
 +
 +      unsigned int sync_file_range;
 +};
 +
 +#define FIO_TOP_STR_MAX               256
 +
 +struct thread_options_pack {
 +      uint8_t description[FIO_TOP_STR_MAX];
 +      uint8_t name[FIO_TOP_STR_MAX];
 +      uint8_t directory[FIO_TOP_STR_MAX];
 +      uint8_t filename[FIO_TOP_STR_MAX];
 +      uint8_t opendir[FIO_TOP_STR_MAX];
 +      uint8_t ioengine[FIO_TOP_STR_MAX];
 +      uint8_t mmapfile[FIO_TOP_STR_MAX];
 +      uint32_t td_ddir;
 +      uint32_t rw_seq;
 +      uint32_t kb_base;
 +      uint32_t ddir_seq_nr;
 +      uint64_t ddir_seq_add;
 +      uint32_t iodepth;
 +      uint32_t iodepth_low;
 +      uint32_t iodepth_batch;
 +      uint32_t iodepth_batch_complete;
 +
 +      uint64_t size;
 +      uint32_t size_percent;
 +      uint32_t fill_device;
 +      uint64_t file_size_low;
 +      uint64_t file_size_high;
 +      uint64_t start_offset;
 +
 +      uint32_t bs[DDIR_RWDIR_CNT];
 +      uint32_t ba[DDIR_RWDIR_CNT];
 +      uint32_t min_bs[DDIR_RWDIR_CNT];
 +      uint32_t max_bs[DDIR_RWDIR_CNT];
 +      struct bssplit bssplit[DDIR_RWDIR_CNT][BSSPLIT_MAX];
 +      uint32_t bssplit_nr[DDIR_RWDIR_CNT];
 +
 +      uint32_t ignore_error[ERROR_TYPE_CNT][ERROR_STR_MAX];
 +      uint32_t ignore_error_nr[ERROR_TYPE_CNT];
 +      uint32_t error_dump;
 +
 +      uint32_t nr_files;
 +      uint32_t open_files;
 +      uint32_t file_lock_mode;
 +      uint32_t lockfile_batch;
 +
 +      uint32_t odirect;
 +      uint32_t invalidate_cache;
 +      uint32_t create_serialize;
 +      uint32_t create_fsync;
 +      uint32_t create_on_open;
 +      uint32_t create_only;
 +      uint32_t end_fsync;
 +      uint32_t pre_read;
 +      uint32_t sync_io;
 +      uint32_t verify;
 +      uint32_t do_verify;
 +      uint32_t verifysort;
 +      uint32_t verify_interval;
 +      uint32_t verify_offset;
 +      uint8_t verify_pattern[MAX_PATTERN_SIZE];
 +      uint32_t verify_pattern_bytes;
 +      uint32_t verify_fatal;
 +      uint32_t verify_dump;
 +      uint32_t verify_async;
 +      uint64_t verify_backlog;
 +      uint32_t verify_batch;
 +      uint32_t use_thread;
 +      uint32_t unlink;
 +      uint32_t do_disk_util;
 +      uint32_t override_sync;
 +      uint32_t rand_repeatable;
 +      uint32_t use_os_rand;
 +      uint32_t log_avg_msec;
 +      uint32_t norandommap;
 +      uint32_t softrandommap;
 +      uint32_t bs_unaligned;
 +      uint32_t fsync_on_close;
 +
++      uint32_t random_distribution;
++      fio_fp64_t zipf_theta;
++      fio_fp64_t pareto_h;
++
 +      uint32_t hugepage_size;
 +      uint32_t rw_min_bs;
 +      uint32_t thinktime;
 +      uint32_t thinktime_spin;
 +      uint32_t thinktime_blocks;
 +      uint32_t fsync_blocks;
 +      uint32_t fdatasync_blocks;
 +      uint32_t barrier_blocks;
 +      uint64_t start_delay;
 +      uint64_t timeout;
 +      uint64_t ramp_time;
 +      uint32_t overwrite;
 +      uint32_t bw_avg_time;
 +      uint32_t iops_avg_time;
 +      uint32_t loops;
 +      uint64_t zone_range;
 +      uint64_t zone_size;
 +      uint64_t zone_skip;
 +      uint64_t lockmem;
 +      uint32_t mem_type;
 +      uint32_t mem_align;
 +
++      uint32_t max_latency;
++
 +      uint32_t stonewall;
 +      uint32_t new_group;
 +      uint32_t numjobs;
 +      uint8_t cpumask[FIO_TOP_STR_MAX];
 +      uint32_t cpumask_set;
 +      uint8_t verify_cpumask[FIO_TOP_STR_MAX];
 +      uint32_t verify_cpumask_set;
 +      uint32_t iolog;
 +      uint32_t rwmixcycle;
 +      uint32_t rwmix[2];
 +      uint32_t nice;
 +      uint32_t ioprio;
 +      uint32_t ioprio_class;
 +      uint32_t file_service_type;
 +      uint32_t group_reporting;
 +      uint32_t fadvise_hint;
 +      uint32_t fallocate_mode;
 +      uint32_t zero_buffers;
 +      uint32_t refill_buffers;
 +      uint32_t scramble_buffers;
 +      unsigned int compress_percentage;
 +      unsigned int compress_chunk;
 +      uint32_t time_based;
 +      uint32_t disable_lat;
 +      uint32_t disable_clat;
 +      uint32_t disable_slat;
 +      uint32_t disable_bw;
 +      uint32_t gtod_reduce;
 +      uint32_t gtod_cpu;
 +      uint32_t gtod_offload;
 +      uint32_t clocksource;
 +      uint32_t no_stall;
 +      uint32_t trim_percentage;
 +      uint32_t trim_batch;
 +      uint32_t trim_zero;
 +      uint64_t trim_backlog;
 +      uint32_t clat_percentiles;
 +      uint32_t overwrite_plist;
 +      fio_fp64_t percentile_list[FIO_IO_U_LIST_MAX_LEN];
 +
 +      uint8_t read_iolog_file[FIO_TOP_STR_MAX];
 +      uint8_t write_iolog_file[FIO_TOP_STR_MAX];
 +      uint8_t bw_log_file[FIO_TOP_STR_MAX];
 +      uint8_t lat_log_file[FIO_TOP_STR_MAX];
 +      uint8_t iops_log_file[FIO_TOP_STR_MAX];
 +      uint8_t replay_redirect[FIO_TOP_STR_MAX];
 +
 +      /*
 +       * Pre-run and post-run shell
 +       */
 +      uint8_t exec_prerun[FIO_TOP_STR_MAX];
 +      uint8_t exec_postrun[FIO_TOP_STR_MAX];
 +
 +      uint32_t rate[DDIR_RWDIR_CNT];
 +      uint32_t ratemin[DDIR_RWDIR_CNT];
 +      uint32_t ratecycle;
 +      uint32_t rate_iops[DDIR_RWDIR_CNT];
 +      uint32_t rate_iops_min[DDIR_RWDIR_CNT];
 +
 +      uint8_t ioscheduler[FIO_TOP_STR_MAX];
 +
 +      /*
 +       * I/O Error handling
 +       */
 +      uint32_t continue_on_error;
 +
 +      /*
 +       * Benchmark profile type
 +       */
 +      uint8_t profile[FIO_TOP_STR_MAX];
 +
 +      /*
 +       * blkio cgroup support
 +       */
 +      uint8_t cgroup[FIO_TOP_STR_MAX];
 +      uint32_t cgroup_weight;
 +      uint32_t cgroup_nodelete;
 +
 +      uint32_t uid;
 +      uint32_t gid;
 +
 +      int32_t flow_id;
 +      int32_t flow;
 +      int32_t flow_watermark;
 +      uint32_t flow_sleep;
 +
 +      uint64_t offset_increment;
 +
 +      uint32_t sync_file_range;
 +} __attribute__((packed));
 +
 +extern void convert_thread_options_to_cpu(struct thread_options *o, struct thread_options_pack *top);
 +extern void convert_thread_options_to_net(struct thread_options_pack *top, struct thread_options *);
 +extern int fio_test_cconv(struct thread_options *);
 +extern void options_default_fill(struct thread_options *o);
 +
 +#endif