Merge branch 'master' into gfio
authorJens Axboe <axboe@kernel.dk>
Wed, 14 Nov 2012 21:25:31 +0000 (14:25 -0700)
committerJens Axboe <axboe@kernel.dk>
Wed, 14 Nov 2012 21:25:31 +0000 (14:25 -0700)
Conflicts:
Makefile
backend.c
client.c
fio.h
options.c
parse.c
parse.h
server.c
server.h

Signed-off-by: Jens Axboe <axboe@kernel.dk>
21 files changed:
1  2 
HOWTO
Makefile
backend.c
cconv.c
client.c
client.h
eta.c
filesetup.c
fio.1
fio.h
init.c
io_u.c
lib/rbtree.c
lib/rbtree.h
options.c
parse.c
parse.h
server.c
server.h
stat.c
thread_options.h

diff --cc HOWTO
Simple merge
diff --cc Makefile
+++ b/Makefile
@@@ -18,7 -17,7 +20,7 @@@ SOURCE := gettime.c ioengines.c init.c 
                lib/num2str.c lib/ieee754.c $(wildcard crc/*.c) engines/cpu.c \
                engines/mmap.c engines/sync.c engines/null.c engines/net.c \
                memalign.c server.c client.c iolog.c backend.c libfio.c flow.c \
-               cconv.c lib/prio_tree.c json.c
 -              json.c lib/zipf.c gettime-thread.c
++              cconv.c lib/prio_tree.c lib/zipf.c json.c gettime-thread.c
  
  ifeq ($(UNAME), Linux)
    SOURCE += diskutil.c fifo.c blktrace.c helpers.c cgroup.c trim.c \
@@@ -70,12 -69,9 +72,12 @@@ ifneq (,$(findstring CYGWIN,$(UNAME))
  endif
  
  OBJS = $(SOURCE:.c=.o)
 +FIO_OBJS = $(OBJS) fio.o
 +GFIO_OBJS = $(OBJS) gfio.o graph.o tickmarks.o ghelpers.o goptions.o gerror.o \
 +                      gclient.o gcompat.o cairo_text_helpers.o printing.o
  
  T_SMALLOC_OBJS = t/stest.o
- T_SMALLOC_OBJS += mutex.o smalloc.o t/log.o gettime.o time.o
+ T_SMALLOC_OBJS += gettime.o mutex.o smalloc.o t/log.o
  T_SMALLOC_PROGS = t/stest
  
  T_IEEE_OBJS = t/ieee754.o
@@@ -152,12 -130,12 +163,15 @@@ t/stest: $(T_SMALLOC_OBJS
  t/ieee754: $(T_IEEE_OBJS)
        $(QUIET_CC)$(CC) $(LDFLAGS) $(CFLAGS) -o $@ $(T_IEEE_OBJS) $(LIBS) $(LDFLAGS)
  
 -fio: $(OBJS)
 -      $(QUIET_CC)$(CC) $(LDFLAGS) $(CFLAGS) -o $@ $(OBJS) $(LIBS) $(LDFLAGS)
 -
 +fio: $(FIO_OBJS)
 +      $(QUIET_CC)$(CC) $(LDFLAGS) $(CFLAGS) -o $@ $(FIO_OBJS) $(LIBS) $(LDFLAGS)
 +
 +gfio: $(GFIO_OBJS)
 +      $(QUIET_CC)$(CC) $(LIBS) -o gfio $(GFIO_OBJS) $(LIBS) $(GTK_LDFLAGS)
 +
+ t/genzipf: $(T_ZIPF_OBJS)
+       $(QUIET_CC)$(CC) $(LDFLAGS) $(CFLAGS) -o $@ $(T_ZIPF_OBJS) $(LIBS) $(LDFLAGS)
  .depend: $(SOURCE)
        $(QUIET_DEP)$(CC) -MM $(CFLAGS) $(CPPFLAGS) $(SOURCE) 1> .depend
  
diff --cc backend.c
+++ b/backend.c
@@@ -64,6 -62,9 +64,7 @@@ struct io_log *agg_io_log[DDIR_RWDIR_CN
  
  int groupid = 0;
  unsigned int thread_number = 0;
 -unsigned int nr_process = 0;
 -unsigned int nr_thread = 0;
+ unsigned int stat_number = 0;
  int shm_id = 0;
  int temp_stall_ts;
  unsigned long done_secs = 0;
@@@ -1053,17 -1048,54 +1054,60 @@@ static void *thread_main(void *data
         * Set affinity first, in case it has an impact on the memory
         * allocations.
         */
 -      if (td->o.cpumask_set && fio_setaffinity(td->pid, td->o.cpumask) == -1) {
 -              td_verror(td, errno, "cpu_set_affinity");
 -              goto err;
 +      if (o->cpumask_set) {
 +              ret = fio_setaffinity(td->pid, o->cpumask);
 +              if (ret == -1) {
 +                      td_verror(td, errno, "cpu_set_affinity");
 +                      goto err;
 +              }
        }
  
 +      if (fio_pin_memory(td))
 +              goto err;
 +
+ #ifdef FIO_HAVE_LIBNUMA
+       /* numa node setup */
+       if (td->o.numa_cpumask_set || td->o.numa_memmask_set) {
+               int ret;
+               if (numa_available() < 0) {
+                       td_verror(td, errno, "Does not support NUMA API\n");
+                       goto err;
+               }
+               if (td->o.numa_cpumask_set) {
+                       ret = numa_run_on_node_mask(td->o.numa_cpunodesmask);
+                       if (ret == -1) {
+                               td_verror(td, errno, \
+                                       "numa_run_on_node_mask failed\n");
+                               goto err;
+                       }
+               }
+               if (td->o.numa_memmask_set) {
+                       switch (td->o.numa_mem_mode) {
+                       case MPOL_INTERLEAVE:
+                               numa_set_interleave_mask(td->o.numa_memnodesmask);
+                               break;
+                       case MPOL_BIND:
+                               numa_set_membind(td->o.numa_memnodesmask);
+                               break;
+                       case MPOL_LOCAL:
+                               numa_set_localalloc();
+                               break;
+                       case MPOL_PREFERRED:
+                               numa_set_preferred(td->o.numa_mem_prefer_node);
+                               break;
+                       case MPOL_DEFAULT:
+                       default:
+                               break;
+                       }
+               }
+       }
+ #endif
        /*
         * May alter parameters that init_io_u() will use, so we need to
         * do this first.
diff --cc cconv.c
index b023315,0000000..ca97c73
mode 100644,000000..100644
--- /dev/null
+++ b/cconv.c
@@@ -1,402 -1,0 +1,410 @@@
 +#include <string.h>
 +
 +#include "thread_options.h"
 +
 +static void string_to_cpu(char **dst, const uint8_t *src)
 +{
 +      const char *__src = (const char *) src;
 +
 +      if (strlen(__src))
 +              *dst = strdup(__src);
 +}
 +
 +static void string_to_net(uint8_t *dst, const char *src)
 +{
 +      if (src)
 +              strcpy((char *) dst, src);
 +      else
 +              dst[0] = '\0';
 +}
 +
 +void convert_thread_options_to_cpu(struct thread_options *o,
 +                                 struct thread_options_pack *top)
 +{
 +      int i, j;
 +
 +      string_to_cpu(&o->description, top->description);
 +      string_to_cpu(&o->name, top->name);
 +      string_to_cpu(&o->directory, top->directory);
 +      string_to_cpu(&o->filename, top->filename);
 +      string_to_cpu(&o->opendir, top->opendir);
 +      string_to_cpu(&o->ioengine, top->ioengine);
 +      string_to_cpu(&o->mmapfile, top->mmapfile);
 +      string_to_cpu(&o->read_iolog_file, top->read_iolog_file);
 +      string_to_cpu(&o->write_iolog_file, top->write_iolog_file);
 +      string_to_cpu(&o->bw_log_file, top->bw_log_file);
 +      string_to_cpu(&o->lat_log_file, top->lat_log_file);
 +      string_to_cpu(&o->iops_log_file, top->iops_log_file);
 +      string_to_cpu(&o->replay_redirect, top->replay_redirect);
 +      string_to_cpu(&o->exec_prerun, top->exec_prerun);
 +      string_to_cpu(&o->exec_postrun, top->exec_postrun);
 +      string_to_cpu(&o->ioscheduler, top->ioscheduler);
 +      string_to_cpu(&o->profile, top->profile);
 +      string_to_cpu(&o->cgroup, top->cgroup);
 +
 +      o->td_ddir = le32_to_cpu(top->td_ddir);
 +      o->rw_seq = le32_to_cpu(top->rw_seq);
 +      o->kb_base = le32_to_cpu(top->kb_base);
 +      o->ddir_seq_nr = le32_to_cpu(top->ddir_seq_nr);
 +      o->ddir_seq_add = le64_to_cpu(top->ddir_seq_add);
 +      o->iodepth = le32_to_cpu(top->iodepth);
 +      o->iodepth_low = le32_to_cpu(top->iodepth_low);
 +      o->iodepth_batch = le32_to_cpu(top->iodepth_batch);
 +      o->iodepth_batch_complete = le32_to_cpu(top->iodepth_batch_complete);
 +      o->size = le64_to_cpu(top->size);
 +      o->size_percent = le32_to_cpu(top->size_percent);
 +      o->fill_device = le32_to_cpu(top->fill_device);
 +      o->file_size_low = le64_to_cpu(top->file_size_low);
 +      o->file_size_high = le64_to_cpu(top->file_size_high);
 +      o->start_offset = le64_to_cpu(top->start_offset);
 +
 +      for (i = 0; i < DDIR_RWDIR_CNT; i++) {
 +              o->bs[i] = le32_to_cpu(top->bs[i]);
 +              o->ba[i] = le32_to_cpu(top->ba[i]);
 +              o->min_bs[i] = le32_to_cpu(top->min_bs[i]);
 +              o->max_bs[i] = le32_to_cpu(top->max_bs[i]);
 +              o->bssplit_nr[i] = le32_to_cpu(top->bssplit_nr[i]);
 +
 +              if (o->bssplit_nr[i]) {
 +                      o->bssplit[i] = malloc(o->bssplit_nr[i] * sizeof(struct bssplit));
 +                      for (j = 0; j < o->bssplit_nr[i]; j++) {
 +                              o->bssplit[i][j].bs = le32_to_cpu(top->bssplit[i][j].bs);
 +                              o->bssplit[i][j].perc = le32_to_cpu(top->bssplit[i][j].perc);
 +                      }
 +              }
 +
 +              o->rwmix[i] = le32_to_cpu(top->rwmix[i]);
 +              o->rate[i] = le32_to_cpu(top->rate[i]);
 +              o->ratemin[i] = le32_to_cpu(top->ratemin[i]);
 +              o->rate_iops[i] = le32_to_cpu(top->rate_iops[i]);
 +              o->rate_iops_min[i] = le32_to_cpu(top->rate_iops_min[i]);
 +      }
 +
 +      o->ratecycle = le32_to_cpu(top->ratecycle);
 +      o->nr_files = le32_to_cpu(top->nr_files);
 +      o->open_files = le32_to_cpu(top->open_files);
 +      o->file_lock_mode = le32_to_cpu(top->file_lock_mode);
 +      o->lockfile_batch = le32_to_cpu(top->lockfile_batch);
 +      o->odirect = le32_to_cpu(top->odirect);
 +      o->invalidate_cache = le32_to_cpu(top->invalidate_cache);
 +      o->create_serialize = le32_to_cpu(top->create_serialize);
 +      o->create_fsync = le32_to_cpu(top->create_fsync);
 +      o->create_on_open = le32_to_cpu(top->create_on_open);
 +      o->create_only = le32_to_cpu(top->create_only);
 +      o->end_fsync = le32_to_cpu(top->end_fsync);
 +      o->pre_read = le32_to_cpu(top->pre_read);
 +      o->sync_io = le32_to_cpu(top->sync_io);
 +      o->verify = le32_to_cpu(top->verify);
 +      o->do_verify = le32_to_cpu(top->do_verify);
 +      o->verifysort = le32_to_cpu(top->verifysort);
 +      o->verify_interval = le32_to_cpu(top->verify_interval);
 +      o->verify_offset = le32_to_cpu(top->verify_offset);
 +
 +      memcpy(o->verify_pattern, top->verify_pattern, MAX_PATTERN_SIZE);
 +
 +      o->verify_pattern_bytes = le32_to_cpu(top->verify_pattern_bytes);
 +      o->verify_fatal = le32_to_cpu(top->verify_fatal);
 +      o->verify_dump = le32_to_cpu(top->verify_dump);
 +      o->verify_async = le32_to_cpu(top->verify_async);
 +      o->verify_batch = le32_to_cpu(top->verify_batch);
 +      o->use_thread = le32_to_cpu(top->use_thread);
 +      o->unlink = le32_to_cpu(top->unlink);
 +      o->do_disk_util = le32_to_cpu(top->do_disk_util);
 +      o->override_sync = le32_to_cpu(top->override_sync);
 +      o->rand_repeatable = le32_to_cpu(top->rand_repeatable);
 +      o->use_os_rand = le32_to_cpu(top->use_os_rand);
 +      o->log_avg_msec = le32_to_cpu(top->log_avg_msec);
 +      o->norandommap = le32_to_cpu(top->norandommap);
 +      o->softrandommap = le32_to_cpu(top->softrandommap);
 +      o->bs_unaligned = le32_to_cpu(top->bs_unaligned);
 +      o->fsync_on_close = le32_to_cpu(top->fsync_on_close);
++      o->random_distribution = le32_to_cpu(top->random_distribution);
++      o->zipf_theta.u.f = fio_uint64_to_double(le64_to_cpu(top->zipf_theta.u.i));
++      o->pareto_h.u.f = fio_uint64_to_double(le64_to_cpu(top->pareto_h.u.i));
 +      o->hugepage_size = le32_to_cpu(top->hugepage_size);
 +      o->rw_min_bs = le32_to_cpu(top->rw_min_bs);
 +      o->thinktime = le32_to_cpu(top->thinktime);
 +      o->thinktime_spin = le32_to_cpu(top->thinktime_spin);
 +      o->thinktime_blocks = le32_to_cpu(top->thinktime_blocks);
 +      o->fsync_blocks = le32_to_cpu(top->fsync_blocks);
 +      o->fdatasync_blocks = le32_to_cpu(top->fdatasync_blocks);
 +      o->barrier_blocks = le32_to_cpu(top->barrier_blocks);
 +
 +      o->verify_backlog = le64_to_cpu(top->verify_backlog);
 +      o->start_delay = le64_to_cpu(top->start_delay);
 +      o->timeout = le64_to_cpu(top->timeout);
 +      o->ramp_time = le64_to_cpu(top->ramp_time);
 +      o->zone_range = le64_to_cpu(top->zone_range);
 +      o->zone_size = le64_to_cpu(top->zone_size);
 +      o->zone_skip = le64_to_cpu(top->zone_skip);
 +      o->lockmem = le64_to_cpu(top->lockmem);
 +      o->offset_increment = le64_to_cpu(top->offset_increment);
 +
 +      o->overwrite = le32_to_cpu(top->overwrite);
 +      o->bw_avg_time = le32_to_cpu(top->bw_avg_time);
 +      o->iops_avg_time = le32_to_cpu(top->iops_avg_time);
 +      o->loops = le32_to_cpu(top->loops);
 +      o->mem_type = le32_to_cpu(top->mem_type);
 +      o->mem_align = le32_to_cpu(top->mem_align);
++      o->max_latency = le32_to_cpu(top->max_latency);
 +      o->stonewall = le32_to_cpu(top->stonewall);
 +      o->new_group = le32_to_cpu(top->new_group);
 +      o->numjobs = le32_to_cpu(top->numjobs);
 +      o->cpumask_set = le32_to_cpu(top->cpumask_set);
 +      o->verify_cpumask_set = le32_to_cpu(top->verify_cpumask_set);
 +      o->iolog = le32_to_cpu(top->iolog);
 +      o->rwmixcycle = le32_to_cpu(top->rwmixcycle);
 +      o->nice = le32_to_cpu(top->nice);
 +      o->ioprio = le32_to_cpu(top->ioprio);
 +      o->ioprio_class = le32_to_cpu(top->ioprio_class);
 +      o->file_service_type = le32_to_cpu(top->file_service_type);
 +      o->group_reporting = le32_to_cpu(top->group_reporting);
 +      o->fadvise_hint = le32_to_cpu(top->fadvise_hint);
 +      o->fallocate_mode = le32_to_cpu(top->fallocate_mode);
 +      o->zero_buffers = le32_to_cpu(top->zero_buffers);
 +      o->refill_buffers = le32_to_cpu(top->refill_buffers);
 +      o->scramble_buffers = le32_to_cpu(top->scramble_buffers);
 +      o->time_based = le32_to_cpu(top->time_based);
 +      o->disable_lat = le32_to_cpu(top->disable_lat);
 +      o->disable_clat = le32_to_cpu(top->disable_clat);
 +      o->disable_slat = le32_to_cpu(top->disable_slat);
 +      o->disable_bw = le32_to_cpu(top->disable_bw);
 +      o->gtod_reduce = le32_to_cpu(top->gtod_reduce);
 +      o->gtod_cpu = le32_to_cpu(top->gtod_cpu);
 +      o->gtod_offload = le32_to_cpu(top->gtod_offload);
 +      o->clocksource = le32_to_cpu(top->clocksource);
 +      o->no_stall = le32_to_cpu(top->no_stall);
 +      o->trim_percentage = le32_to_cpu(top->trim_percentage);
 +      o->trim_batch = le32_to_cpu(top->trim_batch);
 +      o->trim_zero = le32_to_cpu(top->trim_zero);
 +      o->clat_percentiles = le32_to_cpu(top->clat_percentiles);
 +      o->overwrite_plist = le32_to_cpu(top->overwrite_plist);
 +      o->continue_on_error = le32_to_cpu(top->continue_on_error);
 +      o->cgroup_weight = le32_to_cpu(top->cgroup_weight);
 +      o->cgroup_nodelete = le32_to_cpu(top->cgroup_nodelete);
 +      o->uid = le32_to_cpu(top->uid);
 +      o->gid = le32_to_cpu(top->gid);
 +      o->flow_id = __le32_to_cpu(top->flow_id);
 +      o->flow = __le32_to_cpu(top->flow);
 +      o->flow_watermark = __le32_to_cpu(top->flow_watermark);
 +      o->flow_sleep = le32_to_cpu(top->flow_sleep);
 +      o->sync_file_range = le32_to_cpu(top->sync_file_range);
 +      o->compress_percentage = le32_to_cpu(top->compress_percentage);
 +      o->compress_chunk = le32_to_cpu(top->compress_chunk);
 +
 +      o->trim_backlog = le64_to_cpu(top->trim_backlog);
 +
 +      for (i = 0; i < FIO_IO_U_LIST_MAX_LEN; i++)
 +              o->percentile_list[i].u.f = fio_uint64_to_double(le64_to_cpu(top->percentile_list[i].u.i));
 +#if 0
 +      uint8_t cpumask[FIO_TOP_STR_MAX];
 +      uint8_t verify_cpumask[FIO_TOP_STR_MAX];
 +#endif
 +}
 +
 +void convert_thread_options_to_net(struct thread_options_pack *top,
 +                                 struct thread_options *o)
 +{
 +      int i, j;
 +
 +      string_to_net(top->description, o->description);
 +      string_to_net(top->name, o->name);
 +      string_to_net(top->directory, o->directory);
 +      string_to_net(top->filename, o->filename);
 +      string_to_net(top->opendir, o->opendir);
 +      string_to_net(top->ioengine, o->ioengine);
 +      string_to_net(top->mmapfile, o->mmapfile);
 +      string_to_net(top->read_iolog_file, o->read_iolog_file);
 +      string_to_net(top->write_iolog_file, o->write_iolog_file);
 +      string_to_net(top->bw_log_file, o->bw_log_file);
 +      string_to_net(top->lat_log_file, o->lat_log_file);
 +      string_to_net(top->iops_log_file, o->iops_log_file);
 +      string_to_net(top->replay_redirect, o->replay_redirect);
 +      string_to_net(top->exec_prerun, o->exec_prerun);
 +      string_to_net(top->exec_postrun, o->exec_postrun);
 +      string_to_net(top->ioscheduler, o->ioscheduler);
 +      string_to_net(top->profile, o->profile);
 +      string_to_net(top->cgroup, o->cgroup);
 +
 +      top->td_ddir = cpu_to_le32(o->td_ddir);
 +      top->rw_seq = cpu_to_le32(o->rw_seq);
 +      top->kb_base = cpu_to_le32(o->kb_base);
 +      top->ddir_seq_nr = cpu_to_le32(o->ddir_seq_nr);
 +      top->iodepth = cpu_to_le32(o->iodepth);
 +      top->iodepth_low = cpu_to_le32(o->iodepth_low);
 +      top->iodepth_batch = cpu_to_le32(o->iodepth_batch);
 +      top->iodepth_batch_complete = cpu_to_le32(o->iodepth_batch_complete);
 +      top->size_percent = cpu_to_le32(o->size_percent);
 +      top->fill_device = cpu_to_le32(o->fill_device);
 +      top->ratecycle = cpu_to_le32(o->ratecycle);
 +      top->nr_files = cpu_to_le32(o->nr_files);
 +      top->open_files = cpu_to_le32(o->open_files);
 +      top->file_lock_mode = cpu_to_le32(o->file_lock_mode);
 +      top->lockfile_batch = cpu_to_le32(o->lockfile_batch);
 +      top->odirect = cpu_to_le32(o->odirect);
 +      top->invalidate_cache = cpu_to_le32(o->invalidate_cache);
 +      top->create_serialize = cpu_to_le32(o->create_serialize);
 +      top->create_fsync = cpu_to_le32(o->create_fsync);
 +      top->create_on_open = cpu_to_le32(o->create_on_open);
 +      top->create_only = cpu_to_le32(o->create_only);
 +      top->end_fsync = cpu_to_le32(o->end_fsync);
 +      top->pre_read = cpu_to_le32(o->pre_read);
 +      top->sync_io = cpu_to_le32(o->sync_io);
 +      top->verify = cpu_to_le32(o->verify);
 +      top->do_verify = cpu_to_le32(o->do_verify);
 +      top->verifysort = cpu_to_le32(o->verifysort);
 +      top->verify_interval = cpu_to_le32(o->verify_interval);
 +      top->verify_offset = cpu_to_le32(o->verify_offset);
 +      top->verify_pattern_bytes = cpu_to_le32(o->verify_pattern_bytes);
 +      top->verify_fatal = cpu_to_le32(o->verify_fatal);
 +      top->verify_dump = cpu_to_le32(o->verify_dump);
 +      top->verify_async = cpu_to_le32(o->verify_async);
 +      top->verify_batch = cpu_to_le32(o->verify_batch);
 +      top->use_thread = cpu_to_le32(o->use_thread);
 +      top->unlink = cpu_to_le32(o->unlink);
 +      top->do_disk_util = cpu_to_le32(o->do_disk_util);
 +      top->override_sync = cpu_to_le32(o->override_sync);
 +      top->rand_repeatable = cpu_to_le32(o->rand_repeatable);
 +      top->use_os_rand = cpu_to_le32(o->use_os_rand);
 +      top->log_avg_msec = cpu_to_le32(o->log_avg_msec);
 +      top->norandommap = cpu_to_le32(o->norandommap);
 +      top->softrandommap = cpu_to_le32(o->softrandommap);
 +      top->bs_unaligned = cpu_to_le32(o->bs_unaligned);
 +      top->fsync_on_close = cpu_to_le32(o->fsync_on_close);
++      top->random_distribution = cpu_to_le32(o->random_distribution);
++      top->zipf_theta.u.i = __cpu_to_le64(fio_double_to_uint64(o->zipf_theta.u.f));
++      top->pareto_h.u.i = __cpu_to_le64(fio_double_to_uint64(o->pareto_h.u.f));
 +      top->hugepage_size = cpu_to_le32(o->hugepage_size);
 +      top->rw_min_bs = cpu_to_le32(o->rw_min_bs);
 +      top->thinktime = cpu_to_le32(o->thinktime);
 +      top->thinktime_spin = cpu_to_le32(o->thinktime_spin);
 +      top->thinktime_blocks = cpu_to_le32(o->thinktime_blocks);
 +      top->fsync_blocks = cpu_to_le32(o->fsync_blocks);
 +      top->fdatasync_blocks = cpu_to_le32(o->fdatasync_blocks);
 +      top->barrier_blocks = cpu_to_le32(o->barrier_blocks);
 +      top->overwrite = cpu_to_le32(o->overwrite);
 +      top->bw_avg_time = cpu_to_le32(o->bw_avg_time);
 +      top->iops_avg_time = cpu_to_le32(o->iops_avg_time);
 +      top->loops = cpu_to_le32(o->loops);
 +      top->mem_type = cpu_to_le32(o->mem_type);
 +      top->mem_align = cpu_to_le32(o->mem_align);
++      top->max_latency = cpu_to_le32(o->max_latency);
 +      top->stonewall = cpu_to_le32(o->stonewall);
 +      top->new_group = cpu_to_le32(o->new_group);
 +      top->numjobs = cpu_to_le32(o->numjobs);
 +      top->cpumask_set = cpu_to_le32(o->cpumask_set);
 +      top->verify_cpumask_set = cpu_to_le32(o->verify_cpumask_set);
 +      top->iolog = cpu_to_le32(o->iolog);
 +      top->rwmixcycle = cpu_to_le32(o->rwmixcycle);
 +      top->nice = cpu_to_le32(o->nice);
 +      top->ioprio = cpu_to_le32(o->ioprio);
 +      top->ioprio_class = cpu_to_le32(o->ioprio_class);
 +      top->file_service_type = cpu_to_le32(o->file_service_type);
 +      top->group_reporting = cpu_to_le32(o->group_reporting);
 +      top->fadvise_hint = cpu_to_le32(o->fadvise_hint);
 +      top->fallocate_mode = cpu_to_le32(o->fallocate_mode);
 +      top->zero_buffers = cpu_to_le32(o->zero_buffers);
 +      top->refill_buffers = cpu_to_le32(o->refill_buffers);
 +      top->scramble_buffers = cpu_to_le32(o->scramble_buffers);
 +      top->time_based = cpu_to_le32(o->time_based);
 +      top->disable_lat = cpu_to_le32(o->disable_lat);
 +      top->disable_clat = cpu_to_le32(o->disable_clat);
 +      top->disable_slat = cpu_to_le32(o->disable_slat);
 +      top->disable_bw = cpu_to_le32(o->disable_bw);
 +      top->gtod_reduce = cpu_to_le32(o->gtod_reduce);
 +      top->gtod_cpu = cpu_to_le32(o->gtod_cpu);
 +      top->gtod_offload = cpu_to_le32(o->gtod_offload);
 +      top->clocksource = cpu_to_le32(o->clocksource);
 +      top->no_stall = cpu_to_le32(o->no_stall);
 +      top->trim_percentage = cpu_to_le32(o->trim_percentage);
 +      top->trim_batch = cpu_to_le32(o->trim_batch);
 +      top->trim_zero = cpu_to_le32(o->trim_zero);
 +      top->clat_percentiles = cpu_to_le32(o->clat_percentiles);
 +      top->overwrite_plist = cpu_to_le32(o->overwrite_plist);
 +      top->continue_on_error = cpu_to_le32(o->continue_on_error);
 +      top->cgroup_weight = cpu_to_le32(o->cgroup_weight);
 +      top->cgroup_nodelete = cpu_to_le32(o->cgroup_nodelete);
 +      top->uid = cpu_to_le32(o->uid);
 +      top->gid = cpu_to_le32(o->gid);
 +      top->flow_id = __cpu_to_le32(o->flow_id);
 +      top->flow = __cpu_to_le32(o->flow);
 +      top->flow_watermark = __cpu_to_le32(o->flow_watermark);
 +      top->flow_sleep = cpu_to_le32(o->flow_sleep);
 +      top->sync_file_range = cpu_to_le32(o->sync_file_range);
 +      top->compress_percentage = cpu_to_le32(o->compress_percentage);
 +      top->compress_chunk = cpu_to_le32(o->compress_chunk);
 +
 +      for (i = 0; i < DDIR_RWDIR_CNT; i++) {
 +              top->bs[i] = cpu_to_le32(o->bs[i]);
 +              top->ba[i] = cpu_to_le32(o->ba[i]);
 +              top->min_bs[i] = cpu_to_le32(o->min_bs[i]);
 +              top->max_bs[i] = cpu_to_le32(o->max_bs[i]);
 +              top->bssplit_nr[i] = cpu_to_le32(o->bssplit_nr[i]);
 +
 +              if (o->bssplit_nr[i]) {
 +                      unsigned int bssplit_nr = o->bssplit_nr[i];
 +
 +                      if (bssplit_nr > BSSPLIT_MAX) {
 +                              log_err("fio: BSSPLIT_MAX is too small\n");
 +                              bssplit_nr = BSSPLIT_MAX;
 +                      }
 +                      for (j = 0; j < bssplit_nr; j++) {
 +                              top->bssplit[i][j].bs = cpu_to_le32(o->bssplit[i][j].bs);
 +                              top->bssplit[i][j].perc = cpu_to_le32(o->bssplit[i][j].perc);
 +                      }
 +              }
 +
 +              top->rwmix[i] = cpu_to_le32(o->rwmix[i]);
 +              top->rate[i] = cpu_to_le32(o->rate[i]);
 +              top->ratemin[i] = cpu_to_le32(o->ratemin[i]);
 +              top->rate_iops[i] = cpu_to_le32(o->rate_iops[i]);
 +              top->rate_iops_min[i] = cpu_to_le32(o->rate_iops_min[i]);
 +      }
 +
 +      memcpy(top->verify_pattern, o->verify_pattern, MAX_PATTERN_SIZE);
 +
 +      top->size = __cpu_to_le64(o->size);
 +      top->verify_backlog = __cpu_to_le64(o->verify_backlog);
 +      top->start_delay = __cpu_to_le64(o->start_delay);
 +      top->timeout = __cpu_to_le64(o->timeout);
 +      top->ramp_time = __cpu_to_le64(o->ramp_time);
 +      top->zone_range = __cpu_to_le64(o->zone_range);
 +      top->zone_size = __cpu_to_le64(o->zone_size);
 +      top->zone_skip = __cpu_to_le64(o->zone_skip);
 +      top->lockmem = __cpu_to_le64(o->lockmem);
 +      top->ddir_seq_add = __cpu_to_le64(o->ddir_seq_add);
 +      top->file_size_low = __cpu_to_le64(o->file_size_low);
 +      top->file_size_high = __cpu_to_le64(o->file_size_high);
 +      top->start_offset = __cpu_to_le64(o->start_offset);
 +      top->trim_backlog = __cpu_to_le64(o->trim_backlog);
 +      top->offset_increment = __cpu_to_le64(o->offset_increment);
 +
 +      for (i = 0; i < FIO_IO_U_LIST_MAX_LEN; i++)
 +              top->percentile_list[i].u.i = __cpu_to_le64(fio_double_to_uint64(o->percentile_list[i].u.f));
 +#if 0
 +      uint8_t cpumask[FIO_TOP_STR_MAX];
 +      uint8_t verify_cpumask[FIO_TOP_STR_MAX];
 +#endif
 +
 +}
 +
 +/*
 + * Basic conversion test. We'd really need to fill in more of the options
 + * to have a thorough test. Even better, we should auto-generate the
 + * converter functions...
 + */
 +int fio_test_cconv(struct thread_options *__o)
 +{
 +      struct thread_options o;
 +      struct thread_options_pack top1, top2;
 +
 +      memset(&top1, 0, sizeof(top1));
 +      memset(&top2, 0, sizeof(top2));
 +
 +      convert_thread_options_to_net(&top1, __o);
 +      memset(&o, 0, sizeof(o));
 +      convert_thread_options_to_cpu(&o, &top1);
 +      convert_thread_options_to_net(&top2, &o);
 +
 +      return memcmp(&top1, &top2, sizeof(top1));
 +}
diff --cc client.c
+++ b/client.c
@@@ -50,11 -81,11 +50,12 @@@ static FLIST_HEAD(eta_list)
  
  static FLIST_HEAD(arg_list);
  
 -static struct thread_stat client_ts;
 -static struct group_run_stats client_gs;
 -static int sum_stat_clients = 0;
 +struct thread_stat client_ts;
 +struct group_run_stats client_gs;
 +int sum_stat_clients;
 +
  static int sum_stat_nr;
+ static int do_output_all_clients;
  
  #define FIO_CLIENT_HASH_BITS  7
  #define FIO_CLIENT_HASH_SZ    (1 << FIO_CLIENT_HASH_BITS)
@@@ -116,41 -162,16 +117,42 @@@ void fio_put_client(struct fio_client *
        if (client->ini_file)
                free(client->ini_file);
  
+       if (!client->did_stat)
+               sum_stat_clients -= client->nr_stat;
        free(client);
 +}
 +
 +static void remove_client(struct fio_client *client)
 +{
 +      assert(client->refs);
 +
 +      dprint(FD_NET, "client: removed <%s>\n", client->hostname);
 +
 +      if (!flist_empty(&client->list))
 +              flist_del_init(&client->list);
 +
 +      fio_client_remove_hash(client);
 +
 +      if (!flist_empty(&client->eta_list)) {
 +              flist_del_init(&client->eta_list);
 +              fio_client_dec_jobs_eta(client->eta_in_flight, client->ops->eta);
 +      }
 +
 +      close(client->fd);
 +      client->fd = -1;
 +
 +      if (client->ops->removed)
 +              client->ops->removed(client);
 +
        nr_clients--;
-       sum_stat_clients--;
 +      fio_put_client(client);
  }
  
 -static void put_client(struct fio_client *client)
 +struct fio_client *fio_get_client(struct fio_client *client)
  {
 -      remove_client(client);
 +      client->refs++;
 +      return client;
  }
  
  static void __fio_client_add_cmd_option(struct fio_client *client,
@@@ -775,9 -673,13 +777,10 @@@ static void handle_ts(struct fio_clien
  {
        struct cmd_ts_pdu *p = (struct cmd_ts_pdu *) cmd->payload;
  
 -      convert_ts(&p->ts, &p->ts);
 -      convert_gs(&p->rs, &p->rs);
 -
        show_thread_status(&p->ts, &p->rs);
+       client->did_stat = 1;
  
-       if (sum_stat_clients == 1)
+       if (!do_output_all_clients)
                return;
  
        sum_thread_stats(&client_ts, &p->ts, sum_stat_nr);
diff --cc client.h
index 341d260,0000000..10d6ec3
mode 100644,000000..100644
--- /dev/null
+++ b/client.h
@@@ -1,142 -1,0 +1,144 @@@
 +#ifndef CLIENT_H
 +#define CLIENT_H
 +
 +#include <sys/socket.h>
 +#include <sys/un.h>
 +#include <netinet/in.h>
 +#include <arpa/inet.h>
 +
 +#include "stat.h"
 +
 +struct fio_net_cmd;
 +struct client_ops;
 +
 +enum {
 +      Client_created          = 0,
 +      Client_connected        = 1,
 +      Client_started          = 2,
 +      Client_running          = 3,
 +      Client_stopped          = 4,
 +      Client_exited           = 5,
 +};
 +
 +struct fio_client {
 +      struct flist_head list;
 +      struct flist_head hash_list;
 +      struct flist_head arg_list;
 +      union {
 +              struct sockaddr_in addr;
 +              struct sockaddr_in6 addr6;
 +              struct sockaddr_un addr_un;
 +      };
 +      char *hostname;
 +      int port;
 +      int fd;
 +      unsigned int refs;
 +
 +      char *name;
 +
 +      int state;
 +
 +      int skip_newline;
 +      int is_sock;
 +      int disk_stats_shown;
 +      unsigned int jobs;
++      unsigned int nr_stat;
 +      int error;
 +      int signal;
 +      int ipv6;
 +      int sent_job;
++      int did_stat;
 +      uint32_t type;
 +
 +      uint32_t thread_number;
 +      uint32_t groupid;
 +
 +      struct flist_head eta_list;
 +      struct client_eta *eta_in_flight;
 +
 +      struct flist_head cmd_list;
 +
 +      uint16_t argc;
 +      char **argv;
 +
 +      struct client_ops *ops;
 +      void *client_data;
 +
 +      char **ini_file;
 +      unsigned int nr_ini_file;
 +};
 +
 +struct cmd_iolog_pdu;
 +typedef void (client_cmd_op)(struct fio_client *, struct fio_net_cmd *);
 +typedef void (client_eta_op)(struct jobs_eta *je);
 +typedef void (client_timed_out_op)(struct fio_client *);
 +typedef void (client_jobs_eta_op)(struct fio_client *client, struct jobs_eta *je);
 +typedef void (client_iolog_op)(struct fio_client *client, struct cmd_iolog_pdu *);
 +
 +struct client_ops {
 +      client_cmd_op           *text;
 +      client_cmd_op           *disk_util;
 +      client_cmd_op           *thread_status;
 +      client_cmd_op           *group_stats;
 +      client_jobs_eta_op      *jobs_eta;
 +      client_eta_op           *eta;
 +      client_cmd_op           *probe;
 +      client_cmd_op           *quit;
 +      client_cmd_op           *add_job;
 +      client_cmd_op           *update_job;
 +      client_timed_out_op     *timed_out;
 +      client_cmd_op           *stop;
 +      client_cmd_op           *start;
 +      client_cmd_op           *job_start;
 +      client_iolog_op         *iolog;
 +      client_timed_out_op     *removed;
 +
 +      unsigned int eta_msec;
 +      int stay_connected;
 +      uint32_t client_type;
 +};
 +
 +extern struct client_ops fio_client_ops;
 +
 +struct client_eta {
 +      struct jobs_eta eta;
 +      unsigned int pending;
 +};
 +
 +extern int fio_handle_client(struct fio_client *);
 +extern void fio_client_dec_jobs_eta(struct client_eta *eta, client_eta_op fn);
 +extern void fio_client_sum_jobs_eta(struct jobs_eta *dst, struct jobs_eta *je);
 +
 +enum {
 +      Fio_client_ipv4 = 1,
 +      Fio_client_ipv6,
 +      Fio_client_socket,
 +};
 +
 +extern int fio_client_connect(struct fio_client *);
 +extern int fio_clients_connect(void);
 +extern int fio_start_client(struct fio_client *);
 +extern int fio_start_all_clients(void);
 +extern int fio_client_send_ini(struct fio_client *, const char *);
 +extern int fio_clients_send_ini(const char *);
 +extern int fio_handle_clients(struct client_ops *);
 +extern int fio_client_add(struct client_ops *, const char *, void **);
 +extern struct fio_client *fio_client_add_explicit(struct client_ops *, const char *, int, int);
 +extern void fio_client_add_cmd_option(void *, const char *);
 +extern void fio_client_add_ini_file(void *, const char *);
 +extern int fio_client_terminate(struct fio_client *);
 +extern void fio_clients_terminate(void);
 +extern struct fio_client *fio_get_client(struct fio_client *);
 +extern void fio_put_client(struct fio_client *);
 +extern int fio_client_update_options(struct fio_client *, struct thread_options *, uint64_t *);
 +extern int fio_client_wait_for_reply(struct fio_client *, uint64_t);
 +
 +#define FIO_CLIENT_DEF_ETA_MSEC               900
 +
 +enum {
 +      FIO_CLIENT_TYPE_CLI             = 1,
 +      FIO_CLIENT_TYPE_GUI             = 2,
 +};
 +
 +#endif
 +
diff --cc eta.c
Simple merge
diff --cc filesetup.c
@@@ -862,6 -863,42 +863,42 @@@ int pre_read_files(struct thread_data *
        return 1;
  }
  
 -              zipf_init(&f->zipf, nranges, td->o.zipf_theta, seed);
+ static int __init_rand_distribution(struct thread_data *td, struct fio_file *f)
+ {
+       unsigned int range_size, seed;
+       unsigned long nranges;
+       range_size = min(td->o.min_bs[DDIR_READ], td->o.min_bs[DDIR_WRITE]);
+       nranges = (f->real_file_size + range_size - 1) / range_size;
+       seed = jhash(f->file_name, strlen(f->file_name), 0) * td->thread_number;
+       if (td->o.random_distribution == FIO_RAND_DIST_ZIPF)
 -              pareto_init(&f->zipf, nranges, td->o.pareto_h, seed);
++              zipf_init(&f->zipf, nranges, td->o.zipf_theta.u.f, seed);
+       else
++              pareto_init(&f->zipf, nranges, td->o.pareto_h.u.f, seed);
+       return 1;
+ }
+ static int init_rand_distribution(struct thread_data *td)
+ {
+       struct fio_file *f;
+       unsigned int i;
+       int state;
+       if (td->o.random_distribution == FIO_RAND_DIST_RANDOM)
+               return 0;
+       state = td->runstate;
+       td_set_runstate(td, TD_SETTING_UP);
+       for_each_file(td, f, i)
+               __init_rand_distribution(td, f);
+       td_set_runstate(td, state);
+       return 1;
+ }
  int init_random_map(struct thread_data *td)
  {
        unsigned long long blocks, num_maps;
diff --cc fio.1
Simple merge
diff --cc fio.h
--- 1/fio.h
--- 2/fio.h
+++ b/fio.h
@@@ -50,6 -48,27 +50,16 @@@ struct thread_data
  #include <sys/asynch.h>
  #endif
  
 -/*
 - * What type of allocation to use for io buffers
 - */
 -enum fio_memtype {
 -      MEM_MALLOC = 0, /* ordinary malloc */
 -      MEM_SHM,        /* use shared memory segments */
 -      MEM_SHMHUGE,    /* use shared memory segments with huge pages */
 -      MEM_MMAP,       /* use anonynomous mmap */
 -      MEM_MMAPHUGE,   /* memory mapped huge file */
 -};
 -
+ #ifdef FIO_HAVE_LIBNUMA
+ #include <linux/mempolicy.h>
+ #include <numa.h>
+ /*
+  * "local" is pseudo-policy
+  */
+ #define MPOL_LOCAL MPOL_MAX
+ #endif
  /*
   * offset generator types
   */
@@@ -310,6 -558,8 +320,7 @@@ enum 
  
  extern int exitall_on_terminate;
  extern unsigned int thread_number;
 -extern unsigned int nr_process, nr_thread;
+ extern unsigned int stat_number;
  extern int shm_id;
  extern int groupid;
  extern int output_format;
diff --cc init.c
Simple merge
diff --cc io_u.c
Simple merge
diff --cc lib/rbtree.c
index 7cff649,0000000..883bc72
mode 100644,000000..100644
--- /dev/null
@@@ -1,302 -1,0 +1,333 @@@
 +/*
 +  Red Black Trees
 +  (C) 1999  Andrea Arcangeli <andrea@suse.de>
 +  (C) 2002  David Woodhouse <dwmw2@infradead.org>
 +  
 +  This program is free software; you can redistribute it and/or modify
 +  it under the terms of the GNU General Public License as published by
 +  the Free Software Foundation; either version 2 of the License, or
 +  (at your option) any later version.
 +
 +  This program is distributed in the hope that it will be useful,
 +  but WITHOUT ANY WARRANTY; without even the implied warranty of
 +  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 +  GNU General Public License for more details.
 +
 +  You should have received a copy of the GNU General Public License
 +  along with this program; if not, write to the Free Software
 +  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 +
 +  linux/lib/rbtree.c
 +*/
 +
 +#include "rbtree.h"
 +
 +static void __rb_rotate_left(struct rb_node *node, struct rb_root *root)
 +{
 +      struct rb_node *right = node->rb_right;
 +      struct rb_node *parent = rb_parent(node);
 +
 +      if ((node->rb_right = right->rb_left))
 +              rb_set_parent(right->rb_left, node);
 +      right->rb_left = node;
 +
 +      rb_set_parent(right, parent);
 +
 +      if (parent)
 +      {
 +              if (node == parent->rb_left)
 +                      parent->rb_left = right;
 +              else
 +                      parent->rb_right = right;
 +      }
 +      else
 +              root->rb_node = right;
 +      rb_set_parent(node, right);
 +}
 +
 +static void __rb_rotate_right(struct rb_node *node, struct rb_root *root)
 +{
 +      struct rb_node *left = node->rb_left;
 +      struct rb_node *parent = rb_parent(node);
 +
 +      if ((node->rb_left = left->rb_right))
 +              rb_set_parent(left->rb_right, node);
 +      left->rb_right = node;
 +
 +      rb_set_parent(left, parent);
 +
 +      if (parent)
 +      {
 +              if (node == parent->rb_right)
 +                      parent->rb_right = left;
 +              else
 +                      parent->rb_left = left;
 +      }
 +      else
 +              root->rb_node = left;
 +      rb_set_parent(node, left);
 +}
 +
 +void rb_insert_color(struct rb_node *node, struct rb_root *root)
 +{
 +      struct rb_node *parent, *gparent;
 +
 +      while ((parent = rb_parent(node)) && rb_is_red(parent))
 +      {
 +              gparent = rb_parent(parent);
 +
 +              if (parent == gparent->rb_left)
 +              {
 +                      {
 +                              register struct rb_node *uncle = gparent->rb_right;
 +                              if (uncle && rb_is_red(uncle))
 +                              {
 +                                      rb_set_black(uncle);
 +                                      rb_set_black(parent);
 +                                      rb_set_red(gparent);
 +                                      node = gparent;
 +                                      continue;
 +                              }
 +                      }
 +
 +                      if (parent->rb_right == node)
 +                      {
 +                              register struct rb_node *tmp;
 +                              __rb_rotate_left(parent, root);
 +                              tmp = parent;
 +                              parent = node;
 +                              node = tmp;
 +                      }
 +
 +                      rb_set_black(parent);
 +                      rb_set_red(gparent);
 +                      __rb_rotate_right(gparent, root);
 +              } else {
 +                      {
 +                              register struct rb_node *uncle = gparent->rb_left;
 +                              if (uncle && rb_is_red(uncle))
 +                              {
 +                                      rb_set_black(uncle);
 +                                      rb_set_black(parent);
 +                                      rb_set_red(gparent);
 +                                      node = gparent;
 +                                      continue;
 +                              }
 +                      }
 +
 +                      if (parent->rb_left == node)
 +                      {
 +                              register struct rb_node *tmp;
 +                              __rb_rotate_right(parent, root);
 +                              tmp = parent;
 +                              parent = node;
 +                              node = tmp;
 +                      }
 +
 +                      rb_set_black(parent);
 +                      rb_set_red(gparent);
 +                      __rb_rotate_left(gparent, root);
 +              }
 +      }
 +
 +      rb_set_black(root->rb_node);
 +}
 +
 +static void __rb_erase_color(struct rb_node *node, struct rb_node *parent,
 +                           struct rb_root *root)
 +{
 +      struct rb_node *other;
 +
 +      while ((!node || rb_is_black(node)) && node != root->rb_node)
 +      {
 +              if (parent->rb_left == node)
 +              {
 +                      other = parent->rb_right;
 +                      if (rb_is_red(other))
 +                      {
 +                              rb_set_black(other);
 +                              rb_set_red(parent);
 +                              __rb_rotate_left(parent, root);
 +                              other = parent->rb_right;
 +                      }
 +                      if ((!other->rb_left || rb_is_black(other->rb_left)) &&
 +                          (!other->rb_right || rb_is_black(other->rb_right)))
 +                      {
 +                              rb_set_red(other);
 +                              node = parent;
 +                              parent = rb_parent(node);
 +                      }
 +                      else
 +                      {
 +                              if (!other->rb_right || rb_is_black(other->rb_right))
 +                              {
 +                                      struct rb_node *o_left;
 +                                      if ((o_left = other->rb_left))
 +                                              rb_set_black(o_left);
 +                                      rb_set_red(other);
 +                                      __rb_rotate_right(other, root);
 +                                      other = parent->rb_right;
 +                              }
 +                              rb_set_color(other, rb_color(parent));
 +                              rb_set_black(parent);
 +                              if (other->rb_right)
 +                                      rb_set_black(other->rb_right);
 +                              __rb_rotate_left(parent, root);
 +                              node = root->rb_node;
 +                              break;
 +                      }
 +              }
 +              else
 +              {
 +                      other = parent->rb_left;
 +                      if (rb_is_red(other))
 +                      {
 +                              rb_set_black(other);
 +                              rb_set_red(parent);
 +                              __rb_rotate_right(parent, root);
 +                              other = parent->rb_left;
 +                      }
 +                      if ((!other->rb_left || rb_is_black(other->rb_left)) &&
 +                          (!other->rb_right || rb_is_black(other->rb_right)))
 +                      {
 +                              rb_set_red(other);
 +                              node = parent;
 +                              parent = rb_parent(node);
 +                      }
 +                      else
 +                      {
 +                              if (!other->rb_left || rb_is_black(other->rb_left))
 +                              {
 +                                      register struct rb_node *o_right;
 +                                      if ((o_right = other->rb_right))
 +                                              rb_set_black(o_right);
 +                                      rb_set_red(other);
 +                                      __rb_rotate_left(other, root);
 +                                      other = parent->rb_left;
 +                              }
 +                              rb_set_color(other, rb_color(parent));
 +                              rb_set_black(parent);
 +                              if (other->rb_left)
 +                                      rb_set_black(other->rb_left);
 +                              __rb_rotate_right(parent, root);
 +                              node = root->rb_node;
 +                              break;
 +                      }
 +              }
 +      }
 +      if (node)
 +              rb_set_black(node);
 +}
 +
 +void rb_erase(struct rb_node *node, struct rb_root *root)
 +{
 +      struct rb_node *child, *parent;
 +      int color;
 +
 +      if (!node->rb_left)
 +              child = node->rb_right;
 +      else if (!node->rb_right)
 +              child = node->rb_left;
 +      else
 +      {
 +              struct rb_node *old = node, *left;
 +
 +              node = node->rb_right;
 +              while ((left = node->rb_left) != NULL)
 +                      node = left;
 +              child = node->rb_right;
 +              parent = rb_parent(node);
 +              color = rb_color(node);
 +
 +              if (child)
 +                      rb_set_parent(child, parent);
 +              if (parent == old) {
 +                      parent->rb_right = child;
 +                      parent = node;
 +              } else
 +                      parent->rb_left = child;
 +
 +              node->rb_parent_color = old->rb_parent_color;
 +              node->rb_right = old->rb_right;
 +              node->rb_left = old->rb_left;
 +
 +              if (rb_parent(old))
 +              {
 +                      if (rb_parent(old)->rb_left == old)
 +                              rb_parent(old)->rb_left = node;
 +                      else
 +                              rb_parent(old)->rb_right = node;
 +              } else
 +                      root->rb_node = node;
 +
 +              rb_set_parent(old->rb_left, node);
 +              if (old->rb_right)
 +                      rb_set_parent(old->rb_right, node);
 +              goto color;
 +      }
 +
 +      parent = rb_parent(node);
 +      color = rb_color(node);
 +
 +      if (child)
 +              rb_set_parent(child, parent);
 +      if (parent)
 +      {
 +              if (parent->rb_left == node)
 +                      parent->rb_left = child;
 +              else
 +                      parent->rb_right = child;
 +      }
 +      else
 +              root->rb_node = child;
 +
 + color:
 +      if (color == RB_BLACK)
 +              __rb_erase_color(child, parent, root);
 +}
 +
 +/*
 + * This function returns the first node (in sort order) of the tree.
 + */
 +struct rb_node *rb_first(struct rb_root *root)
 +{
 +      struct rb_node  *n;
 +
 +      n = root->rb_node;
 +      if (!n)
 +              return NULL;
 +      while (n->rb_left)
 +              n = n->rb_left;
 +      return n;
 +}
++
++struct rb_node *rb_next(const struct rb_node *node)
++{
++      struct rb_node *parent;
++
++      if (RB_EMPTY_NODE(node))
++              return NULL;
++
++      /*
++       * If we have a right-hand child, go down and then left as far
++       * as we can.
++       */
++      if (node->rb_right) {
++              node = node->rb_right; 
++              while (node->rb_left)
++                      node=node->rb_left;
++              return (struct rb_node *)node;
++      }
++
++      /*
++       * No right-hand children. Everything down and left is smaller than us,
++       * so any 'next' node must be in the general direction of our parent.
++       * Go up the tree; any time the ancestor is a right-hand child of its
++       * parent, keep going up. First time it's a left-hand child of its
++       * parent, said parent is our 'next' node.
++       */
++      while ((parent = rb_parent(node)) && node == parent->rb_right)
++              node = parent;
++
++      return parent;
++}
diff --cc lib/rbtree.h
index 7563725,0000000..c6cfe4a
mode 100644,000000..100644
--- /dev/null
@@@ -1,154 -1,0 +1,155 @@@
 +/*
 +  Red Black Trees
 +  (C) 1999  Andrea Arcangeli <andrea@suse.de>
 +  
 +  This program is free software; you can redistribute it and/or modify
 +  it under the terms of the GNU General Public License as published by
 +  the Free Software Foundation; either version 2 of the License, or
 +  (at your option) any later version.
 +
 +  This program is distributed in the hope that it will be useful,
 +  but WITHOUT ANY WARRANTY; without even the implied warranty of
 +  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 +  GNU General Public License for more details.
 +
 +  You should have received a copy of the GNU General Public License
 +  along with this program; if not, write to the Free Software
 +  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 +
 +  linux/include/linux/rbtree.h
 +
 +  To use rbtrees you'll have to implement your own insert and search cores.
 +  This will avoid us to use callbacks and to drop drammatically performances.
 +  I know it's not the cleaner way,  but in C (not in C++) to get
 +  performances and genericity...
 +
 +  Some example of insert and search follows here. The search is a plain
 +  normal search over an ordered tree. The insert instead must be implemented
 +  int two steps: as first thing the code must insert the element in
 +  order as a red leaf in the tree, then the support library function
 +  rb_insert_color() must be called. Such function will do the
 +  not trivial work to rebalance the rbtree if necessary.
 +
 +-----------------------------------------------------------------------
 +static inline struct page * rb_search_page_cache(struct inode * inode,
 +                                               unsigned long offset)
 +{
 +      struct rb_node * n = inode->i_rb_page_cache.rb_node;
 +      struct page * page;
 +
 +      while (n)
 +      {
 +              page = rb_entry(n, struct page, rb_page_cache);
 +
 +              if (offset < page->offset)
 +                      n = n->rb_left;
 +              else if (offset > page->offset)
 +                      n = n->rb_right;
 +              else
 +                      return page;
 +      }
 +      return NULL;
 +}
 +
 +static inline struct page * __rb_insert_page_cache(struct inode * inode,
 +                                                 unsigned long offset,
 +                                                 struct rb_node * node)
 +{
 +      struct rb_node ** p = &inode->i_rb_page_cache.rb_node;
 +      struct rb_node * parent = NULL;
 +      struct page * page;
 +
 +      while (*p)
 +      {
 +              parent = *p;
 +              page = rb_entry(parent, struct page, rb_page_cache);
 +
 +              if (offset < page->offset)
 +                      p = &(*p)->rb_left;
 +              else if (offset > page->offset)
 +                      p = &(*p)->rb_right;
 +              else
 +                      return page;
 +      }
 +
 +      rb_link_node(node, parent, p);
 +
 +      return NULL;
 +}
 +
 +static inline struct page * rb_insert_page_cache(struct inode * inode,
 +                                               unsigned long offset,
 +                                               struct rb_node * node)
 +{
 +      struct page * ret;
 +      if ((ret = __rb_insert_page_cache(inode, offset, node)))
 +              goto out;
 +      rb_insert_color(node, &inode->i_rb_page_cache);
 + out:
 +      return ret;
 +}
 +-----------------------------------------------------------------------
 +*/
 +
 +#ifndef       _LINUX_RBTREE_H
 +#define       _LINUX_RBTREE_H
 +
 +#include <stdlib.h>
 +#include <inttypes.h>
 +
 +struct rb_node
 +{
 +      intptr_t rb_parent_color;
 +#define       RB_RED          0
 +#define       RB_BLACK        1
 +      struct rb_node *rb_right;
 +      struct rb_node *rb_left;
 +} __attribute__((aligned(sizeof(long))));
 +    /* The alignment might seem pointless, but allegedly CRIS needs it */
 +
 +struct rb_root
 +{
 +      struct rb_node *rb_node;
 +};
 +
 +
 +#define rb_parent(r)   ((struct rb_node *)((r)->rb_parent_color & ~3))
 +#define rb_color(r)   ((r)->rb_parent_color & 1)
 +#define rb_is_red(r)   (!rb_color(r))
 +#define rb_is_black(r) rb_color(r)
 +#define rb_set_red(r)  do { (r)->rb_parent_color &= ~1; } while (0)
 +#define rb_set_black(r)  do { (r)->rb_parent_color |= 1; } while (0)
 +
 +static inline void rb_set_parent(struct rb_node *rb, struct rb_node *p)
 +{
 +      rb->rb_parent_color = (rb->rb_parent_color & 3) | (uintptr_t)p;
 +}
 +static inline void rb_set_color(struct rb_node *rb, int color)
 +{
 +      rb->rb_parent_color = (rb->rb_parent_color & ~1) | color;
 +}
 +
 +#define RB_ROOT       (struct rb_root) { NULL, }
 +#define       rb_entry(ptr, type, member) container_of(ptr, type, member)
 +
 +#define RB_EMPTY_ROOT(root)   ((root)->rb_node == NULL)
 +#define RB_EMPTY_NODE(node)   (rb_parent(node) == node)
 +#define RB_CLEAR_NODE(node)   (rb_set_parent(node, node))
 +
 +extern void rb_insert_color(struct rb_node *, struct rb_root *);
 +extern void rb_erase(struct rb_node *, struct rb_root *);
 +
 +/* Find logical next and previous nodes in a tree */
 +extern struct rb_node *rb_first(struct rb_root *);
++extern struct rb_node *rb_next(const struct rb_node *);
 +
 +static inline void rb_link_node(struct rb_node * node, struct rb_node * parent,
 +                              struct rb_node ** rb_link)
 +{
 +      node->rb_parent_color = (uintptr_t)parent;
 +      node->rb_left = node->rb_right = NULL;
 +
 +      *rb_link = node;
 +}
 +
 +#endif        /* _LINUX_RBTREE_H */
diff --cc options.c
+++ b/options.c
@@@ -513,6 -564,140 +513,130 @@@ static int str_verify_cpus_allowed_cb(v
  }
  #endif
  
 -#ifdef FIO_HAVE_TRIM
 -static int str_verify_trim_cb(void *data, unsigned long long *val)
 -{
 -      struct thread_data *td = data;
 -
 -      td->o.trim_percentage = *val;
 -      return 0;
 -}
 -#endif
 -
+ #ifdef FIO_HAVE_LIBNUMA
+ static int str_numa_cpunodes_cb(void *data, char *input)
+ {
+       struct thread_data *td = data;
+       /* numa_parse_nodestring() parses a character string list
+        * of nodes into a bit mask. The bit mask is allocated by
+        * numa_allocate_nodemask(), so it should be freed by
+        * numa_free_nodemask().
+        */
+       td->o.numa_cpunodesmask = numa_parse_nodestring(input);
+       if (td->o.numa_cpunodesmask == NULL) {
+               log_err("fio: numa_parse_nodestring failed\n");
+               td_verror(td, 1, "str_numa_cpunodes_cb");
+               return 1;
+       }
+       td->o.numa_cpumask_set = 1;
+       return 0;
+ }
+ static int str_numa_mpol_cb(void *data, char *input)
+ {
+       struct thread_data *td = data;
+       const char * const policy_types[] =
+               { "default", "prefer", "bind", "interleave", "local" };
+       int i;
+       char *nodelist = strchr(input, ':');
+       if (nodelist) {
+               /* NUL-terminate mode */
+               *nodelist++ = '\0';
+       }
+       for (i = 0; i <= MPOL_LOCAL; i++) {
+               if (!strcmp(input, policy_types[i])) {
+                       td->o.numa_mem_mode = i;
+                       break;
+               }
+       }
+       if (i > MPOL_LOCAL) {
+               log_err("fio: memory policy should be: default, prefer, bind, interleave, local\n");
+               goto out;
+       }
+       switch (td->o.numa_mem_mode) {
+       case MPOL_PREFERRED:
+               /*
+                * Insist on a nodelist of one node only
+                */
+               if (nodelist) {
+                       char *rest = nodelist;
+                       while (isdigit(*rest))
+                               rest++;
+                       if (*rest) {
+                               log_err("fio: one node only for \'prefer\'\n");
+                               goto out;
+                       }
+               } else {
+                       log_err("fio: one node is needed for \'prefer\'\n");
+                       goto out;
+               }
+               break;
+       case MPOL_INTERLEAVE:
+               /*
+                * Default to online nodes with memory if no nodelist
+                */
+               if (!nodelist)
+                       nodelist = strdup("all");
+               break;
+       case MPOL_LOCAL:
+       case MPOL_DEFAULT:
+               /*
+                * Don't allow a nodelist
+                */
+               if (nodelist) {
+                       log_err("fio: NO nodelist for \'local\'\n");
+                       goto out;
+               }
+               break;
+       case MPOL_BIND:
+               /*
+                * Insist on a nodelist
+                */
+               if (!nodelist) {
+                       log_err("fio: a nodelist is needed for \'bind\'\n");
+                       goto out;
+               }
+               break;
+       }
+       /* numa_parse_nodestring() parses a character string list
+        * of nodes into a bit mask. The bit mask is allocated by
+        * numa_allocate_nodemask(), so it should be freed by
+        * numa_free_nodemask().
+        */
+       switch (td->o.numa_mem_mode) {
+       case MPOL_PREFERRED:
+               td->o.numa_mem_prefer_node = atoi(nodelist);
+               break;
+       case MPOL_INTERLEAVE:
+       case MPOL_BIND:
+               td->o.numa_memnodesmask = numa_parse_nodestring(nodelist);
+               if (td->o.numa_memnodesmask == NULL) {
+                       log_err("fio: numa_parse_nodestring failed\n");
+                       td_verror(td, 1, "str_numa_memnodes_cb");
+                       return 1;
+               }
+               break;
+       case MPOL_LOCAL:
+       case MPOL_DEFAULT:
+       default:
+               break;
+       }
+       td->o.numa_memmask_set = 1;
+       return 0;
+ out:
+       return 1;
+ }
+ #endif
  static int str_fst_cb(void *data, const char *str)
  {
        struct thread_data *td = data;
@@@ -543,6 -728,84 +667,45 @@@ static int str_sfr_cb(void *data, cons
  }
  #endif
  
 -              td->o.zipf_theta = val;
+ static int str_random_distribution_cb(void *data, const char *str)
+ {
+       struct thread_data *td = data;
+       double val;
+       char *nr;
+       if (td->o.random_distribution == FIO_RAND_DIST_ZIPF)
+               val = 1.1;
+       else if (td->o.random_distribution == FIO_RAND_DIST_PARETO)
+               val = 0.2;
+       else
+               return 0;
+       nr = get_opt_postfix(str);
+       if (nr && !str_to_float(nr, &val)) {
+               log_err("fio: random postfix parsing failed\n");
+               free(nr);
+               return 1;
+       }
+       free(nr);
+       if (td->o.random_distribution == FIO_RAND_DIST_ZIPF) {
+               if (val == 1.00) {
+                       log_err("fio: zipf theta must different than 1.0\n");
+                       return 1;
+               }
 -              td->o.pareto_h = val;
 -      }
 -
 -      return 0;
 -}
 -
 -static int check_dir(struct thread_data *td, char *fname)
 -{
 -#if 0
 -      char file[PATH_MAX], *dir;
 -      int elen = 0;
 -
 -      if (td->o.directory) {
 -              strcpy(file, td->o.directory);
 -              strcat(file, "/");
 -              elen = strlen(file);
 -      }
 -
 -      sprintf(file + elen, "%s", fname);
 -      dir = dirname(file);
 -
 -      {
 -      struct stat sb;
 -      /*
 -       * We can't do this on FIO_DISKLESSIO engines. The engine isn't loaded
 -       * yet, so we can't do this check right here...
 -       */
 -      if (lstat(dir, &sb) < 0) {
 -              int ret = errno;
 -
 -              log_err("fio: %s is not a directory\n", dir);
 -              td_verror(td, ret, "lstat");
 -              return 1;
 -      }
 -
 -      if (!S_ISDIR(sb.st_mode)) {
 -              log_err("fio: %s is not a directory\n", dir);
 -              return 1;
 -      }
++              td->o.zipf_theta.u.f = val;
+       } else {
+               if (val <= 0.00 || val >= 1.00) {
+                       log_err("fio: pareto input out of range (0 < input < 1.0)\n");
+                       return 1;
+               }
 -#endif
++              td->o.pareto_h.u.f = val;
+       }
+       return 0;
+ }
  /*
   * Return next file in the string. Files are separated with ':'. If the ':'
   * is escaped with a '\', then that ':' is part of the filename and does not
@@@ -1446,14 -1509,32 +1609,36 @@@ struct fio_option fio_options[FIO_MAX_O
                .off1   = td_var_offset(softrandommap),
                .help   = "Set norandommap if randommap allocation fails",
                .parent = "norandommap",
 +              .hide   = 1,
                .def    = "0",
 +              .category = FIO_OPT_C_IO,
 +              .group  = FIO_OPT_G_RANDOM,
        },
+       {
+               .name   = "random_distribution",
+               .type   = FIO_OPT_STR,
+               .off1   = td_var_offset(random_distribution),
+               .cb     = str_random_distribution_cb,
+               .help   = "Random offset distribution generator",
+               .def    = "random",
+               .posval = {
+                         { .ival = "random",
+                           .oval = FIO_RAND_DIST_RANDOM,
+                           .help = "Completely random",
+                         },
+                         { .ival = "zipf",
+                           .oval = FIO_RAND_DIST_ZIPF,
+                           .help = "Zipf distribution",
+                         },
+                         { .ival = "pareto",
+                           .oval = FIO_RAND_DIST_PARETO,
+                           .help = "Pareto distribution",
+                         },
+               },
+       },
        {
                .name   = "nrfiles",
 +              .lname  = "Number of files",
                .alias  = "nr_files",
                .type   = FIO_OPT_INT,
                .off1   = td_var_offset(nr_files),
                .help   = "Window average for rate limits (msec)",
                .def    = "1000",
                .parent = "rate",
 +              .hide   = 1,
 +              .category = FIO_OPT_C_IO,
 +              .group  = FIO_OPT_G_RATE,
        },
+       {
+               .name   = "max_latency",
+               .type   = FIO_OPT_INT,
+               .off1   = td_var_offset(max_latency),
+               .help   = "Maximum tolerated IO latency (usec)",
++              .category = FIO_OPT_C_IO,
++              .group = FIO_OPT_G_RATE,
+       },
        {
                .name   = "invalidate",
 +              .lname  = "Cache invalidate",
                .type   = FIO_OPT_BOOL,
                .off1   = td_var_offset(invalidate_cache),
                .help   = "Invalidate buffer/page cache prior to running job",
                .type   = FIO_OPT_STR,
                .cb     = str_cpus_allowed_cb,
                .help   = "Set CPUs allowed",
 +              .category = FIO_OPT_C_GENERAL,
 +              .group  = FIO_OPT_G_CRED,
        },
+ #endif
+ #ifdef FIO_HAVE_LIBNUMA
+       {
+               .name   = "numa_cpu_nodes",
+               .type   = FIO_OPT_STR,
+               .cb     = str_numa_cpunodes_cb,
+               .help   = "NUMA CPU nodes bind",
+       },
+       {
+               .name   = "numa_mem_policy",
+               .type   = FIO_OPT_STR,
+               .cb     = str_numa_mpol_cb,
+               .help   = "NUMA memory policy setup",
+       },
  #endif
        {
                .name   = "end_fsync",
diff --cc parse.c
+++ b/parse.c
  #include "parse.h"
  #include "debug.h"
  #include "options.h"
+ #include "minmax.h"
  
 -static struct fio_option *fio_options;
 -extern unsigned int fio_get_kb_base(void *);
 +static struct fio_option *__fio_options;
  
  static int vp_cmp(const void *p1, const void *p2)
  {
diff --cc parse.h
+++ b/parse.h
@@@ -88,7 -79,7 +88,8 @@@ extern void options_free(struct fio_opt
  extern void strip_blank_front(char **);
  extern void strip_blank_end(char *);
  extern int str_to_decimal(const char *, long long *, int, void *);
 +extern int check_str_bytes(const char *p, long long *val, void *data);
+ extern int str_to_float(const char *str, double *val);
  
  /*
   * Handlers for the options
diff --cc server.c
+++ b/server.c
@@@ -421,154 -323,38 +421,157 @@@ int fio_net_send_simple_cmd(int sk, uin
        return 0;
  }
  
 -static int fio_server_send_quit_cmd(void)
 +int fio_net_send_quit(int sk)
  {
        dprint(FD_NET, "server: sending quit\n");
 -      return fio_net_send_simple_cmd(server_fd, FIO_NET_CMD_QUIT, 0, NULL);
 +
 +      return fio_net_send_simple_cmd(sk, FIO_NET_CMD_QUIT, 0, NULL);
  }
  
 -static int handle_job_cmd(struct fio_net_cmd *cmd)
 +static int fio_net_send_ack(int sk, struct fio_net_cmd *cmd, int error,
 +                          int signal)
  {
 -      char *buf = (char *) cmd->payload;
 -      struct cmd_start_pdu spdu;
        struct cmd_end_pdu epdu;
 +      uint64_t tag = 0;
 +
 +      if (cmd)
 +              tag = cmd->tag;
 +
 +      epdu.error = __cpu_to_le32(error);
 +      epdu.signal = __cpu_to_le32(signal);
 +      return fio_net_send_cmd(sk, FIO_NET_CMD_STOP, &epdu, sizeof(epdu), &tag, NULL);
 +}
 +
 +int fio_net_send_stop(int sk, int error, int signal)
 +{
 +      dprint(FD_NET, "server: sending stop (%d, %d)\n", error, signal);
 +      return fio_net_send_ack(sk, NULL, error, signal);
 +}
 +
 +static void fio_server_add_fork_item(pid_t pid, struct flist_head *list)
 +{
 +      struct fio_fork_item *ffi;
 +
 +      ffi = malloc(sizeof(*ffi));
 +      ffi->exitval = 0;
 +      ffi->signal = 0;
 +      ffi->exited = 0;
 +      ffi->pid = pid;
 +      flist_add_tail(&ffi->list, list);
 +}
 +
 +static void fio_server_add_conn_pid(pid_t pid)
 +{
 +      dprint(FD_NET, "server: forked off connection job (pid=%u)\n", pid);
 +      fio_server_add_fork_item(pid, &conn_list);
 +}
 +
 +static void fio_server_add_job_pid(pid_t pid)
 +{
 +      dprint(FD_NET, "server: forked off job job (pid=%u)\n", pid);
 +      fio_server_add_fork_item(pid, &job_list);
 +}
 +
 +static void fio_server_check_fork_item(struct fio_fork_item *ffi)
 +{
 +      int ret, status;
 +
 +      ret = waitpid(ffi->pid, &status, WNOHANG);
 +      if (ret < 0) {
 +              if (errno == ECHILD) {
 +                      log_err("fio: connection pid %u disappeared\n", ffi->pid);
 +                      ffi->exited = 1;
 +              } else
 +                      log_err("fio: waitpid: %s\n", strerror(errno));
 +      } else if (ret == ffi->pid) {
 +              if (WIFSIGNALED(status)) {
 +                      ffi->signal = WTERMSIG(status);
 +                      ffi->exited = 1;
 +              }
 +              if (WIFEXITED(status)) {
 +                      if (WEXITSTATUS(status))
 +                              ffi->exitval = WEXITSTATUS(status);
 +                      ffi->exited = 1;
 +              }
 +      }
 +}
 +
 +static void fio_server_fork_item_done(struct fio_fork_item *ffi)
 +{
 +      dprint(FD_NET, "pid %u exited, sig=%u, exitval=%d\n", ffi->pid, ffi->signal, ffi->exitval);
 +
 +      /*
 +       * Fold STOP and QUIT...
 +       */
 +      fio_net_send_stop(server_fd, ffi->exitval, ffi->signal);
 +      fio_net_send_quit(server_fd);
 +      flist_del(&ffi->list);
 +      free(ffi);
 +}
 +
 +static void fio_server_check_fork_items(struct flist_head *list)
 +{
 +      struct flist_head *entry, *tmp;
 +      struct fio_fork_item *ffi;
 +
 +      flist_for_each_safe(entry, tmp, list) {
 +              ffi = flist_entry(entry, struct fio_fork_item, list);
 +
 +              fio_server_check_fork_item(ffi);
 +
 +              if (ffi->exited)
 +                      fio_server_fork_item_done(ffi);
 +      }
 +}
 +
 +static void fio_server_check_jobs(void)
 +{
 +      fio_server_check_fork_items(&job_list);
 +}
 +
 +static void fio_server_check_conns(void)
 +{
 +      fio_server_check_fork_items(&conn_list);
 +}
 +
 +static int handle_run_cmd(struct fio_net_cmd *cmd)
 +{
 +      pid_t pid;
        int ret;
  
 -      if (parse_jobs_ini(buf, 1, 0)) {
 -              fio_server_send_quit_cmd();
 +      set_genesis_time();
 +
 +      pid = fork();
 +      if (pid) {
 +              fio_server_add_job_pid(pid);
 +              return 0;
 +      }
 +
 +      ret = fio_backend();
 +      free_threads_shm();
 +      _exit(ret);
 +}
 +
 +static int handle_job_cmd(struct fio_net_cmd *cmd)
 +{
 +      struct cmd_job_pdu *pdu = (struct cmd_job_pdu *) cmd->payload;
 +      void *buf = pdu->buf;
 +      struct cmd_start_pdu spdu;
 +
 +      pdu->buf_len = le32_to_cpu(pdu->buf_len);
 +      pdu->client_type = le32_to_cpu(pdu->client_type);
 +
+       stat_number = 0;
 +      if (parse_jobs_ini(buf, 1, 0, pdu->client_type)) {
 +              fio_net_send_quit(server_fd);
                return -1;
        }
  
        spdu.jobs = cpu_to_le32(thread_number);
 -      fio_net_send_cmd(server_fd, FIO_NET_CMD_START, &spdu, sizeof(spdu), 0);
 -
 -      ret = fio_backend();
 -
 -      epdu.error = ret;
 -      fio_net_send_cmd(server_fd, FIO_NET_CMD_STOP, &epdu, sizeof(epdu), 0);
 -
 -      fio_server_send_quit_cmd();
 -      reset_fio_state();
 -      return ret;
+       spdu.stat_outputs = cpu_to_le32(stat_number);
 +      fio_net_send_cmd(server_fd, FIO_NET_CMD_START, &spdu, sizeof(spdu), NULL, NULL);
 +      return 0;
  }
  
  static int handle_jobline_cmd(struct fio_net_cmd *cmd)
                dprint(FD_NET, "server: %d: %s\n", i, argv[i]);
        }
  
 -      if (parse_cmd_line(clp->lines, argv)) {
 -              fio_server_send_quit_cmd();
++      stat_number = 0;
++
 +      if (parse_cmd_line(clp->lines, argv, clp->client_type)) {
 +              fio_net_send_quit(server_fd);
                free(argv);
                return -1;
        }
  
        free(argv);
  
 -      fio_net_send_simple_cmd(server_fd, FIO_NET_CMD_START, 0, NULL);
 -
 -      ret = fio_backend();
 -      fio_server_send_quit_cmd();
 -      reset_fio_state();
 -      return ret;
 +      spdu.jobs = cpu_to_le32(thread_number);
++      spdu.stat_outputs = cpu_to_le32(stat_number);
 +      fio_net_send_cmd(server_fd, FIO_NET_CMD_START, &spdu, sizeof(spdu), NULL, NULL);
 +      return 0;
  }
  
  static int handle_probe_cmd(struct fio_net_cmd *cmd)
diff --cc server.h
+++ b/server.h
@@@ -38,39 -36,33 +38,39 @@@ struct fio_net_cmd_reply 
  };
  
  enum {
-       FIO_SERVER_VER                  = 18,
 -      FIO_SERVER_VER          = 9,
 -
 -      FIO_SERVER_MAX_PDU      = 1024,
 -
 -      FIO_NET_CMD_QUIT        = 1,
 -      FIO_NET_CMD_EXIT        = 2,
 -      FIO_NET_CMD_JOB         = 3,
 -      FIO_NET_CMD_JOBLINE     = 4,
 -      FIO_NET_CMD_TEXT        = 5,
 -      FIO_NET_CMD_TS          = 6,
 -      FIO_NET_CMD_GS          = 7,
 -      FIO_NET_CMD_SEND_ETA    = 8,
 -      FIO_NET_CMD_ETA         = 9,
 -      FIO_NET_CMD_PROBE       = 10,
 -      FIO_NET_CMD_START       = 11,
 -      FIO_NET_CMD_STOP        = 12,
 -      FIO_NET_CMD_DU          = 13,
 -      FIO_NET_CMD_RUN         = 14,
 -      FIO_NET_CMD_NR          = 15,
 -
 -      FIO_NET_CMD_F_MORE      = 1UL << 0,
++      FIO_SERVER_VER                  = 19,
 +
 +      FIO_SERVER_MAX_FRAGMENT_PDU     = 1024,
 +
 +      FIO_NET_CMD_QUIT                = 1,
 +      FIO_NET_CMD_EXIT                = 2,
 +      FIO_NET_CMD_JOB                 = 3,
 +      FIO_NET_CMD_JOBLINE             = 4,
 +      FIO_NET_CMD_TEXT                = 5,
 +      FIO_NET_CMD_TS                  = 6,
 +      FIO_NET_CMD_GS                  = 7,
 +      FIO_NET_CMD_SEND_ETA            = 8,
 +      FIO_NET_CMD_ETA                 = 9,
 +      FIO_NET_CMD_PROBE               = 10,
 +      FIO_NET_CMD_START               = 11,
 +      FIO_NET_CMD_STOP                = 12,
 +      FIO_NET_CMD_DU                  = 13,
 +      FIO_NET_CMD_SERVER_START        = 14,
 +      FIO_NET_CMD_ADD_JOB             = 15,
 +      FIO_NET_CMD_RUN                 = 16,
 +      FIO_NET_CMD_IOLOG               = 17,
 +      FIO_NET_CMD_UPDATE_JOB          = 18,
 +      FIO_NET_CMD_NR                  = 19,
 +
 +      FIO_NET_CMD_F_MORE              = 1UL << 0,
  
        /* crc does not include the crc fields */
 -      FIO_NET_CMD_CRC_SZ      = sizeof(struct fio_net_cmd) -
 -                                      2 * sizeof(uint16_t),
 +      FIO_NET_CMD_CRC_SZ              = sizeof(struct fio_net_cmd) -
 +                                              2 * sizeof(uint16_t),
 +
 +      FIO_NET_NAME_MAX                = 256,
  
 -      FIO_NET_CLIENT_TIMEOUT  = 5000,
 +      FIO_NET_CLIENT_TIMEOUT          = 5000,
  };
  
  struct cmd_ts_pdu {
@@@ -105,14 -94,9 +105,15 @@@ struct cmd_line_pdu 
        struct cmd_single_line_pdu options[0];
  };
  
 +struct cmd_job_pdu {
 +      uint32_t buf_len;
 +      uint32_t client_type;
 +      uint8_t buf[0];
 +};
 +
  struct cmd_start_pdu {
        uint32_t jobs;
+       uint32_t stat_outputs;
  };
  
  struct cmd_end_pdu {
diff --cc stat.c
Simple merge
index 9b90796,0000000..9975af1
mode 100644,000000..100644
--- /dev/null
@@@ -1,429 -1,0 +1,449 @@@
 +#ifndef FIO_THREAD_OPTIONS_H
 +#define FIO_THREAD_OPTIONS_H
 +
 +#include "arch/arch.h"
 +#include "os/os.h"
 +#include "stat.h"
 +#include "gettime.h"
 +
 +/*
 + * What type of allocation to use for io buffers
 + */
 +enum fio_memtype {
 +      MEM_MALLOC = 0, /* ordinary malloc */
 +      MEM_SHM,        /* use shared memory segments */
 +      MEM_SHMHUGE,    /* use shared memory segments with huge pages */
 +      MEM_MMAP,       /* use anonynomous mmap */
 +      MEM_MMAPHUGE,   /* memory mapped huge file */
 +};
 +
 +/*
 + * What type of errors to continue on when continue_on_error is used
 + */
 +enum error_type_bit {
 +      ERROR_TYPE_READ_BIT = 0,
 +      ERROR_TYPE_WRITE_BIT = 1,
 +      ERROR_TYPE_VERIFY_BIT = 2,
 +      ERROR_TYPE_CNT = 3,
 +};
 +
 +#define ERROR_STR_MAX 128
 +
 +enum error_type {
 +        ERROR_TYPE_NONE = 0,
 +        ERROR_TYPE_READ = 1 << ERROR_TYPE_READ_BIT,
 +        ERROR_TYPE_WRITE = 1 << ERROR_TYPE_WRITE_BIT,
 +        ERROR_TYPE_VERIFY = 1 << ERROR_TYPE_VERIFY_BIT,
 +        ERROR_TYPE_ANY = 0xffff,
 +};
 +
 +#define BSSPLIT_MAX   64
 +
 +struct bssplit {
 +      uint32_t bs;
 +      uint32_t perc;
 +};
 +
 +struct thread_options {
 +      int pad;
 +      char *description;
 +      char *name;
 +      char *directory;
 +      char *filename;
 +      char *opendir;
 +      char *ioengine;
 +      char *mmapfile;
 +      enum td_ddir td_ddir;
 +      unsigned int rw_seq;
 +      unsigned int kb_base;
 +      unsigned int ddir_seq_nr;
 +      long ddir_seq_add;
 +      unsigned int iodepth;
 +      unsigned int iodepth_low;
 +      unsigned int iodepth_batch;
 +      unsigned int iodepth_batch_complete;
 +
 +      unsigned long long size;
 +      unsigned int size_percent;
 +      unsigned int fill_device;
 +      unsigned long long file_size_low;
 +      unsigned long long file_size_high;
 +      unsigned long long start_offset;
 +
 +      unsigned int bs[DDIR_RWDIR_CNT];
 +      unsigned int ba[DDIR_RWDIR_CNT];
 +      unsigned int min_bs[DDIR_RWDIR_CNT];
 +      unsigned int max_bs[DDIR_RWDIR_CNT];
 +      struct bssplit *bssplit[DDIR_RWDIR_CNT];
 +      unsigned int bssplit_nr[DDIR_RWDIR_CNT];
 +
 +      int *ignore_error[ERROR_TYPE_CNT];
 +      unsigned int ignore_error_nr[ERROR_TYPE_CNT];
 +      unsigned int error_dump;
 +
 +      unsigned int nr_files;
 +      unsigned int open_files;
 +      enum file_lock_mode file_lock_mode;
 +      unsigned int lockfile_batch;
 +
 +      unsigned int odirect;
 +      unsigned int invalidate_cache;
 +      unsigned int create_serialize;
 +      unsigned int create_fsync;
 +      unsigned int create_on_open;
 +      unsigned int create_only;
 +      unsigned int end_fsync;
 +      unsigned int pre_read;
 +      unsigned int sync_io;
 +      unsigned int verify;
 +      unsigned int do_verify;
 +      unsigned int verifysort;
 +      unsigned int verify_interval;
 +      unsigned int verify_offset;
 +      char verify_pattern[MAX_PATTERN_SIZE];
 +      unsigned int verify_pattern_bytes;
 +      unsigned int verify_fatal;
 +      unsigned int verify_dump;
 +      unsigned int verify_async;
 +      unsigned long long verify_backlog;
 +      unsigned int verify_batch;
 +      unsigned int use_thread;
 +      unsigned int unlink;
 +      unsigned int do_disk_util;
 +      unsigned int override_sync;
 +      unsigned int rand_repeatable;
 +      unsigned int use_os_rand;
 +      unsigned int log_avg_msec;
 +      unsigned int norandommap;
 +      unsigned int softrandommap;
 +      unsigned int bs_unaligned;
 +      unsigned int fsync_on_close;
 +
++      unsigned int random_distribution;
++      fio_fp64_t zipf_theta;
++      fio_fp64_t pareto_h;
++
 +      unsigned int hugepage_size;
 +      unsigned int rw_min_bs;
 +      unsigned int thinktime;
 +      unsigned int thinktime_spin;
 +      unsigned int thinktime_blocks;
 +      unsigned int fsync_blocks;
 +      unsigned int fdatasync_blocks;
 +      unsigned int barrier_blocks;
 +      unsigned long long start_delay;
 +      unsigned long long timeout;
 +      unsigned long long ramp_time;
 +      unsigned int overwrite;
 +      unsigned int bw_avg_time;
 +      unsigned int iops_avg_time;
 +      unsigned int loops;
 +      unsigned long long zone_range;
 +      unsigned long long zone_size;
 +      unsigned long long zone_skip;
 +      unsigned long long lockmem;
 +      enum fio_memtype mem_type;
 +      unsigned int mem_align;
 +
++      unsigned max_latency;
++
 +      unsigned int stonewall;
 +      unsigned int new_group;
 +      unsigned int numjobs;
 +      os_cpu_mask_t cpumask;
 +      unsigned int cpumask_set;
 +      os_cpu_mask_t verify_cpumask;
 +      unsigned int verify_cpumask_set;
++#ifdef FIO_HAVE_LIBNUMA
++      struct bitmask *numa_cpunodesmask;
++      unsigned int numa_cpumask_set;
++      unsigned short numa_mem_mode;
++      unsigned int numa_mem_prefer_node;
++      struct bitmask *numa_memnodesmask;
++      unsigned int numa_memmask_set;
++#endif
 +      unsigned int iolog;
 +      unsigned int rwmixcycle;
 +      unsigned int rwmix[2];
 +      unsigned int nice;
 +      unsigned int ioprio;
 +      unsigned int ioprio_class;
 +      unsigned int file_service_type;
 +      unsigned int group_reporting;
 +      unsigned int fadvise_hint;
 +      enum fio_fallocate_mode fallocate_mode;
 +      unsigned int zero_buffers;
 +      unsigned int refill_buffers;
 +      unsigned int scramble_buffers;
 +      unsigned int compress_percentage;
 +      unsigned int compress_chunk;
 +      unsigned int time_based;
 +      unsigned int disable_lat;
 +      unsigned int disable_clat;
 +      unsigned int disable_slat;
 +      unsigned int disable_bw;
 +      unsigned int gtod_reduce;
 +      unsigned int gtod_cpu;
 +      unsigned int gtod_offload;
 +      enum fio_cs clocksource;
 +      unsigned int no_stall;
 +      unsigned int trim_percentage;
 +      unsigned int trim_batch;
 +      unsigned int trim_zero;
 +      unsigned long long trim_backlog;
 +      unsigned int clat_percentiles;
 +      unsigned int overwrite_plist;
 +      fio_fp64_t percentile_list[FIO_IO_U_LIST_MAX_LEN];
 +
 +      char *read_iolog_file;
 +      char *write_iolog_file;
 +      char *bw_log_file;
 +      char *lat_log_file;
 +      char *iops_log_file;
 +      char *replay_redirect;
 +
 +      /*
 +       * Pre-run and post-run shell
 +       */
 +      char *exec_prerun;
 +      char *exec_postrun;
 +
 +      unsigned int rate[DDIR_RWDIR_CNT];
 +      unsigned int ratemin[DDIR_RWDIR_CNT];
 +      unsigned int ratecycle;
 +      unsigned int rate_iops[DDIR_RWDIR_CNT];
 +      unsigned int rate_iops_min[DDIR_RWDIR_CNT];
 +
 +      char *ioscheduler;
 +
 +      /*
 +       * I/O Error handling
 +       */
 +      enum error_type continue_on_error;
 +
 +      /*
 +       * Benchmark profile type
 +       */
 +      char *profile;
 +
 +      /*
 +       * blkio cgroup support
 +       */
 +      char *cgroup;
 +      unsigned int cgroup_weight;
 +      unsigned int cgroup_nodelete;
 +
 +      unsigned int uid;
 +      unsigned int gid;
 +
 +      int flow_id;
 +      int flow;
 +      int flow_watermark;
 +      unsigned int flow_sleep;
 +
 +      unsigned long long offset_increment;
 +
 +      unsigned int sync_file_range;
 +};
 +
 +#define FIO_TOP_STR_MAX               256
 +
 +struct thread_options_pack {
 +      uint8_t description[FIO_TOP_STR_MAX];
 +      uint8_t name[FIO_TOP_STR_MAX];
 +      uint8_t directory[FIO_TOP_STR_MAX];
 +      uint8_t filename[FIO_TOP_STR_MAX];
 +      uint8_t opendir[FIO_TOP_STR_MAX];
 +      uint8_t ioengine[FIO_TOP_STR_MAX];
 +      uint8_t mmapfile[FIO_TOP_STR_MAX];
 +      uint32_t td_ddir;
 +      uint32_t rw_seq;
 +      uint32_t kb_base;
 +      uint32_t ddir_seq_nr;
 +      uint64_t ddir_seq_add;
 +      uint32_t iodepth;
 +      uint32_t iodepth_low;
 +      uint32_t iodepth_batch;
 +      uint32_t iodepth_batch_complete;
 +
 +      uint64_t size;
 +      uint32_t size_percent;
 +      uint32_t fill_device;
 +      uint64_t file_size_low;
 +      uint64_t file_size_high;
 +      uint64_t start_offset;
 +
 +      uint32_t bs[DDIR_RWDIR_CNT];
 +      uint32_t ba[DDIR_RWDIR_CNT];
 +      uint32_t min_bs[DDIR_RWDIR_CNT];
 +      uint32_t max_bs[DDIR_RWDIR_CNT];
 +      struct bssplit bssplit[DDIR_RWDIR_CNT][BSSPLIT_MAX];
 +      uint32_t bssplit_nr[DDIR_RWDIR_CNT];
 +
 +      uint32_t ignore_error[ERROR_TYPE_CNT][ERROR_STR_MAX];
 +      uint32_t ignore_error_nr[ERROR_TYPE_CNT];
 +      uint32_t error_dump;
 +
 +      uint32_t nr_files;
 +      uint32_t open_files;
 +      uint32_t file_lock_mode;
 +      uint32_t lockfile_batch;
 +
 +      uint32_t odirect;
 +      uint32_t invalidate_cache;
 +      uint32_t create_serialize;
 +      uint32_t create_fsync;
 +      uint32_t create_on_open;
 +      uint32_t create_only;
 +      uint32_t end_fsync;
 +      uint32_t pre_read;
 +      uint32_t sync_io;
 +      uint32_t verify;
 +      uint32_t do_verify;
 +      uint32_t verifysort;
 +      uint32_t verify_interval;
 +      uint32_t verify_offset;
 +      uint8_t verify_pattern[MAX_PATTERN_SIZE];
 +      uint32_t verify_pattern_bytes;
 +      uint32_t verify_fatal;
 +      uint32_t verify_dump;
 +      uint32_t verify_async;
 +      uint64_t verify_backlog;
 +      uint32_t verify_batch;
 +      uint32_t use_thread;
 +      uint32_t unlink;
 +      uint32_t do_disk_util;
 +      uint32_t override_sync;
 +      uint32_t rand_repeatable;
 +      uint32_t use_os_rand;
 +      uint32_t log_avg_msec;
 +      uint32_t norandommap;
 +      uint32_t softrandommap;
 +      uint32_t bs_unaligned;
 +      uint32_t fsync_on_close;
 +
++      uint32_t random_distribution;
++      fio_fp64_t zipf_theta;
++      fio_fp64_t pareto_h;
++
 +      uint32_t hugepage_size;
 +      uint32_t rw_min_bs;
 +      uint32_t thinktime;
 +      uint32_t thinktime_spin;
 +      uint32_t thinktime_blocks;
 +      uint32_t fsync_blocks;
 +      uint32_t fdatasync_blocks;
 +      uint32_t barrier_blocks;
 +      uint64_t start_delay;
 +      uint64_t timeout;
 +      uint64_t ramp_time;
 +      uint32_t overwrite;
 +      uint32_t bw_avg_time;
 +      uint32_t iops_avg_time;
 +      uint32_t loops;
 +      uint64_t zone_range;
 +      uint64_t zone_size;
 +      uint64_t zone_skip;
 +      uint64_t lockmem;
 +      uint32_t mem_type;
 +      uint32_t mem_align;
 +
++      uint32_t max_latency;
++
 +      uint32_t stonewall;
 +      uint32_t new_group;
 +      uint32_t numjobs;
 +      uint8_t cpumask[FIO_TOP_STR_MAX];
 +      uint32_t cpumask_set;
 +      uint8_t verify_cpumask[FIO_TOP_STR_MAX];
 +      uint32_t verify_cpumask_set;
 +      uint32_t iolog;
 +      uint32_t rwmixcycle;
 +      uint32_t rwmix[2];
 +      uint32_t nice;
 +      uint32_t ioprio;
 +      uint32_t ioprio_class;
 +      uint32_t file_service_type;
 +      uint32_t group_reporting;
 +      uint32_t fadvise_hint;
 +      uint32_t fallocate_mode;
 +      uint32_t zero_buffers;
 +      uint32_t refill_buffers;
 +      uint32_t scramble_buffers;
 +      unsigned int compress_percentage;
 +      unsigned int compress_chunk;
 +      uint32_t time_based;
 +      uint32_t disable_lat;
 +      uint32_t disable_clat;
 +      uint32_t disable_slat;
 +      uint32_t disable_bw;
 +      uint32_t gtod_reduce;
 +      uint32_t gtod_cpu;
 +      uint32_t gtod_offload;
 +      uint32_t clocksource;
 +      uint32_t no_stall;
 +      uint32_t trim_percentage;
 +      uint32_t trim_batch;
 +      uint32_t trim_zero;
 +      uint64_t trim_backlog;
 +      uint32_t clat_percentiles;
 +      uint32_t overwrite_plist;
 +      fio_fp64_t percentile_list[FIO_IO_U_LIST_MAX_LEN];
 +
 +      uint8_t read_iolog_file[FIO_TOP_STR_MAX];
 +      uint8_t write_iolog_file[FIO_TOP_STR_MAX];
 +      uint8_t bw_log_file[FIO_TOP_STR_MAX];
 +      uint8_t lat_log_file[FIO_TOP_STR_MAX];
 +      uint8_t iops_log_file[FIO_TOP_STR_MAX];
 +      uint8_t replay_redirect[FIO_TOP_STR_MAX];
 +
 +      /*
 +       * Pre-run and post-run shell
 +       */
 +      uint8_t exec_prerun[FIO_TOP_STR_MAX];
 +      uint8_t exec_postrun[FIO_TOP_STR_MAX];
 +
 +      uint32_t rate[DDIR_RWDIR_CNT];
 +      uint32_t ratemin[DDIR_RWDIR_CNT];
 +      uint32_t ratecycle;
 +      uint32_t rate_iops[DDIR_RWDIR_CNT];
 +      uint32_t rate_iops_min[DDIR_RWDIR_CNT];
 +
 +      uint8_t ioscheduler[FIO_TOP_STR_MAX];
 +
 +      /*
 +       * I/O Error handling
 +       */
 +      uint32_t continue_on_error;
 +
 +      /*
 +       * Benchmark profile type
 +       */
 +      uint8_t profile[FIO_TOP_STR_MAX];
 +
 +      /*
 +       * blkio cgroup support
 +       */
 +      uint8_t cgroup[FIO_TOP_STR_MAX];
 +      uint32_t cgroup_weight;
 +      uint32_t cgroup_nodelete;
 +
 +      uint32_t uid;
 +      uint32_t gid;
 +
 +      int32_t flow_id;
 +      int32_t flow;
 +      int32_t flow_watermark;
 +      uint32_t flow_sleep;
 +
 +      uint64_t offset_increment;
 +
 +      uint32_t sync_file_range;
 +} __attribute__((packed));
 +
 +extern void convert_thread_options_to_cpu(struct thread_options *o, struct thread_options_pack *top);
 +extern void convert_thread_options_to_net(struct thread_options_pack *top, struct thread_options *);
 +extern int fio_test_cconv(struct thread_options *);
 +extern void options_default_fill(struct thread_options *o);
 +
 +#endif