Merge branch 'fix-init-read-iolog' of https://github.com/aclamk/fio
authorJens Axboe <axboe@kernel.dk>
Fri, 19 Oct 2018 17:06:51 +0000 (11:06 -0600)
committerJens Axboe <axboe@kernel.dk>
Fri, 19 Oct 2018 17:06:51 +0000 (11:06 -0600)
* 'fix-init-read-iolog' of https://github.com/aclamk/fio:
  iolog: Fix problem with setup() not invoked when read_iolog is used.

77 files changed:
FIO-VERSION-GEN
HOWTO
Makefile
backend.c
blktrace.c
blktrace.h
cconv.c
client.c
client.h
configure
engines/cpu.c
engines/fusion-aw.c [deleted file]
engines/http.c
engines/rados.c
eta.c
examples/cross-stripe-verify.fio [new file with mode: 0644]
examples/fio-rand-RW.fio [new file with mode: 0644]
examples/fio-rand-RW.job [deleted file]
examples/fio-rand-read.fio [new file with mode: 0644]
examples/fio-rand-read.job [deleted file]
examples/fio-rand-write.fio [new file with mode: 0644]
examples/fio-rand-write.job [deleted file]
examples/fio-seq-RW.fio [new file with mode: 0644]
examples/fio-seq-RW.job [deleted file]
examples/fio-seq-read.fio [new file with mode: 0644]
examples/fio-seq-read.job [deleted file]
examples/fio-seq-write.fio [new file with mode: 0644]
examples/fio-seq-write.job [deleted file]
examples/fusion-aw-sync.fio [deleted file]
file.h
filesetup.c
fio.1
fio.h
gettime.c
gfio.c
init.c
ioengines.c
iolog.c
lib/axmap.c
lib/axmap.h
lib/lfsr.c
lib/num2str.c
lib/rand.c
lib/zipf.c
lib/zipf.h
options.c
options.h
os/os-aix.h
os/os-android.h
os/os-dragonfly.h
os/os-freebsd.h
os/os-hpux.h
os/os-linux.h
os/os-mac.h
os/os-netbsd.h
os/os-openbsd.h
os/os-solaris.h
os/os-windows.h
os/os.h
os/windows/examples.wxs
os/windows/posix.c
parse.c
rate-submit.c
server.c
server.h
stat.c
stat.h
t/axmap.c
t/jobs/t0010-b7aae4ba.fio [new file with mode: 0644]
t/jobs/t0011-5d2788d5.fio [new file with mode: 0644]
t/zbd/functions
t/zbd/test-zbd-support
thread_options.h
time.c
tools/hist/fio-histo-log-pctiles.py
zbd.c
zbd.h

index 99261fba75f340df23e16bcd7439cb9dc2a87ec9..17b215de188bfe8a2232d0244220902f339f18fb 100755 (executable)
@@ -1,7 +1,7 @@
 #!/bin/sh
 
 GVF=FIO-VERSION-FILE
-DEF_VER=fio-3.8
+DEF_VER=fio-3.11
 
 LF='
 '
diff --git a/HOWTO b/HOWTO
index 0c5b7109df30c55c35bbc2d8441be0ee97d3ecb8..72ef8725818c91105be33fe44568ed3e55558b59 100644 (file)
--- a/HOWTO
+++ b/HOWTO
@@ -100,6 +100,10 @@ Command line options
 
        Parse options only, don't start any I/O.
 
+.. option:: --merge-blktrace-only
+
+       Merge blktraces only, don't start any I/O.
+
 .. option:: --output=filename
 
        Write output to file `filename`.
@@ -2335,8 +2339,13 @@ I/O depth
        ``serialize_overlap`` tells fio to avoid provoking this behavior by explicitly
        serializing in-flight I/Os that have a non-zero overlap. Note that setting
        this option can reduce both performance and the :option:`iodepth` achieved.
-       Additionally this option does not work when :option:`io_submit_mode` is set to
-       offload. Default: false.
+
+       This option only applies to I/Os issued for a single job except when it is
+       enabled along with :option:`io_submit_mode`=offload. In offload mode, fio
+       will check for overlap among all I/Os submitted by offload jobs with :option:`serialize_overlap`
+       enabled. Threads must be used for all such jobs.
+
+       Default: false.
 
 .. option:: io_submit_mode=str
 
@@ -2491,6 +2500,33 @@ I/O replay
        will be read at once. If selected true, input from iolog will be read
        gradually. Useful when iolog is very large, or it is generated.
 
+.. option:: merge_blktrace_file=str
+
+       When specified, rather than replaying the logs passed to :option:`read_iolog`,
+       the logs go through a merge phase which aggregates them into a single
+       blktrace. The resulting file is then passed on as the :option:`read_iolog`
+       parameter. The intention here is to make the order of events consistent.
+       This limits the influence of the scheduler compared to replaying multiple
+       blktraces via concurrent jobs.
+
+.. option:: merge_blktrace_scalars=float_list
+
+       This is a percentage based option that is index paired with the list of
+       files passed to :option:`read_iolog`. When merging is performed, scale
+       the time of each event by the corresponding amount. For example,
+       ``--merge_blktrace_scalars="50:100"`` runs the first trace in halftime
+       and the second trace in realtime. This knob is separately tunable from
+       :option:`replay_time_scale` which scales the trace during runtime and
+       does not change the output of the merge unlike this option.
+
+.. option:: merge_blktrace_iters=float_list
+
+       This is a whole number option that is index paired with the list of files
+       passed to :option:`read_iolog`. When merging is performed, run each trace
+       for the specified number of iterations. For example,
+       ``--merge_blktrace_iters="2:1"`` runs the first trace for two iterations
+       and the second trace for one iteration.
+
 .. option:: replay_no_stall=bool
 
        When replaying I/O with :option:`read_iolog` the default behavior is to
@@ -2528,12 +2564,13 @@ I/O replay
 
 .. option:: replay_align=int
 
-       Force alignment of I/O offsets and lengths in a trace to this power of 2
-       value.
+       Force alignment of the byte offsets in a trace to this value. The value
+       must be a power of 2.
 
 .. option:: replay_scale=int
 
-       Scale sector offsets down by this factor when replaying traces.
+       Scale byte offsets down by this factor when replaying traces. Should most
+       likely use :option:`replay_align` as well.
 
 .. option:: replay_skip=str
 
@@ -2969,6 +3006,10 @@ Steady state
        data from the rolling collection window. Threshold limits can be expressed
        as a fixed value or as a percentage of the mean in the collection window.
 
+       When using this feature, most jobs should include the :option:`time_based`
+       and :option:`runtime` options or the :option:`loops` option so that fio does not
+       stop running after it has covered the full size of the specified file(s) or device(s).
+
                **iops**
                        Collect IOPS data. Stop the job if all individual IOPS measurements
                        are within the specified limit of the mean IOPS (e.g., ``iops:2``
@@ -3839,6 +3880,46 @@ given in bytes. The `action` can be one of these:
 **trim**
           Trim the given file from the given `offset` for `length` bytes.
 
+
+I/O Replay - Merging Traces
+---------------------------
+
+Colocation is a common practice used to get the most out of a machine.
+Knowing which workloads play nicely with each other and which ones don't is
+a much harder task. While fio can replay workloads concurrently via multiple
+jobs, it leaves some variability up to the scheduler making results harder to
+reproduce. Merging is a way to make the order of events consistent.
+
+Merging is integrated into I/O replay and done when a
+:option:`merge_blktrace_file` is specified. The list of files passed to
+:option:`read_iolog` go through the merge process and output a single file
+stored to the specified file. The output file is passed on as if it were the
+only file passed to :option:`read_iolog`. An example would look like::
+
+       $ fio --read_iolog="<file1>:<file2>" --merge_blktrace_file="<output_file>"
+
+Creating only the merged file can be done by passing the command line argument
+:option:`merge-blktrace-only`.
+
+Scaling traces can be done to see the relative impact of any particular trace
+being slowed down or sped up. :option:`merge_blktrace_scalars` takes in a colon
+separated list of percentage scalars. It is index paired with the files passed
+to :option:`read_iolog`.
+
+With scaling, it may be desirable to match the running time of all traces.
+This can be done with :option:`merge_blktrace_iters`. It is index paired with
+:option:`read_iolog` just like :option:`merge_blktrace_scalars`.
+
+In an example, given two traces, A and B, each 60s long. If we want to see
+the impact of trace A issuing IOs twice as fast and repeat trace A over the
+runtime of trace B, the following can be done::
+
+       $ fio --read_iolog="<trace_a>:"<trace_b>" --merge_blktrace_file"<output_file>" --merge_blktrace_scalars="50:100" --merge_blktrace_iters="2:1"
+
+This runs trace A at 2x the speed twice for approximately the same runtime as
+a single run of trace B.
+
+
 CPU idleness profiling
 ----------------------
 
index 7e87b2fd6c1eca0cdfce8881e631b753e492787f..4721b789be273ec70fdbc50f567c2bab771cddf0 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -86,9 +86,6 @@ endif
 ifdef CONFIG_GUASI
   SOURCE += engines/guasi.c
 endif
-ifdef CONFIG_FUSION_AW
-  SOURCE += engines/fusion-aw.c
-endif
 ifdef CONFIG_SOLARISAIO
   SOURCE += engines/solarisaio.c
 endif
@@ -201,7 +198,7 @@ endif
 ifneq (,$(findstring CYGWIN,$(CONFIG_TARGET_OS)))
   SOURCE += os/windows/posix.c
   LIBS  += -lpthread -lpsapi -lws2_32
-  CFLAGS += -DPSAPI_VERSION=1 -Ios/windows/posix/include -Wno-format -static
+  CFLAGS += -DPSAPI_VERSION=1 -Ios/windows/posix/include -Wno-format
 endif
 
 OBJS := $(SOURCE:.c=.o)
index 8fec1ce3141f4f5b3f910784c75f5f6a200c88d9..cc3c4e78e9d8ae6b8fd9cdc245272fc395369f34 100644 (file)
--- a/backend.c
+++ b/backend.c
@@ -29,6 +29,7 @@
 #include <sys/stat.h>
 #include <sys/wait.h>
 #include <math.h>
+#include <pthread.h>
 
 #include "fio.h"
 #include "smalloc.h"
@@ -53,7 +54,7 @@ static struct fio_sem *startup_sem;
 static struct flist_head *cgroup_list;
 static struct cgroup_mnt *cgroup_mnt;
 static int exit_value;
-static volatile int fio_abort;
+static volatile bool fio_abort;
 static unsigned int nr_process = 0;
 static unsigned int nr_thread = 0;
 
@@ -65,6 +66,7 @@ unsigned int stat_number = 0;
 int shm_id = 0;
 int temp_stall_ts;
 unsigned long done_secs = 0;
+pthread_mutex_t overlap_check = PTHREAD_MUTEX_INITIALIZER;
 
 #define JOB_START_TIMEOUT      (5 * 1000)
 
@@ -567,7 +569,7 @@ static int unlink_all_files(struct thread_data *td)
 /*
  * Check if io_u will overlap an in-flight IO in the queue
  */
-static bool in_flight_overlap(struct io_u_queue *q, struct io_u *io_u)
+bool in_flight_overlap(struct io_u_queue *q, struct io_u *io_u)
 {
        bool overlap;
        struct io_u *check_io_u;
@@ -1872,7 +1874,11 @@ static void *thread_main(void *data)
                         "perhaps try --debug=io option for details?\n",
                         td->o.name, td->io_ops->name);
 
+       if (td->o.serialize_overlap && td->o.io_submit_mode == IO_MODE_OFFLOAD)
+               pthread_mutex_lock(&overlap_check);
        td_set_runstate(td, TD_FINISHING);
+       if (td->o.serialize_overlap && td->o.io_submit_mode == IO_MODE_OFFLOAD)
+               pthread_mutex_unlock(&overlap_check);
 
        update_rusage_stat(td);
        td->ts.total_run_time = mtime_since_now(&td->epoch);
@@ -2213,18 +2219,22 @@ static void run_threads(struct sk_out *sk_out)
        }
 
        if (output_format & FIO_OUTPUT_NORMAL) {
-               log_info("Starting ");
+               struct buf_output out;
+
+               buf_output_init(&out);
+               __log_buf(&out, "Starting ");
                if (nr_thread)
-                       log_info("%d thread%s", nr_thread,
+                       __log_buf(&out, "%d thread%s", nr_thread,
                                                nr_thread > 1 ? "s" : "");
                if (nr_process) {
                        if (nr_thread)
-                               log_info(" and ");
-                       log_info("%d process%s", nr_process,
+                               __log_buf(&out, " and ");
+                       __log_buf(&out, "%d process%s", nr_process,
                                                nr_process > 1 ? "es" : "");
                }
-               log_info("\n");
-               log_info_flush();
+               __log_buf(&out, "\n");
+               log_info_buf(out.buf, out.buflen);
+               buf_output_free(&out);
        }
 
        todo = thread_number;
@@ -2367,7 +2377,7 @@ reap:
                        if (fio_sem_down_timeout(startup_sem, 10000)) {
                                log_err("fio: job startup hung? exiting.\n");
                                fio_terminate_threads(TERMINATE_ALL);
-                               fio_abort = 1;
+                               fio_abort = true;
                                nr_started--;
                                free(fd);
                                break;
index 36a7180930d0f23bf036922820a7901f165a15c9..efe9ce248a8c7f861a6aa5dbec9163f6d51ca90d 100644 (file)
@@ -3,8 +3,7 @@
  */
 #include <stdio.h>
 #include <stdlib.h>
-#include <sys/ioctl.h>
-#include <linux/fs.h>
+#include <unistd.h>
 
 #include "flist.h"
 #include "fio.h"
@@ -128,37 +127,17 @@ static void trace_add_open_close_event(struct thread_data *td, int fileno, enum
        flist_add_tail(&ipo->list, &td->io_log_list);
 }
 
-static int get_dev_blocksize(const char *dev, unsigned int *bs)
+static int trace_add_file(struct thread_data *td, __u32 device)
 {
-       int fd;
-
-       fd = open(dev, O_RDONLY);
-       if (fd < 0)
-               return 1;
-
-       if (ioctl(fd, BLKSSZGET, bs) < 0) {
-               close(fd);
-               return 1;
-       }
-
-       close(fd);
-       return 0;
-}
-
-static int trace_add_file(struct thread_data *td, __u32 device,
-                         unsigned int *bs)
-{
-       static unsigned int last_maj, last_min, last_fileno, last_bs;
+       static unsigned int last_maj, last_min, last_fileno;
        unsigned int maj = FMAJOR(device);
        unsigned int min = FMINOR(device);
        struct fio_file *f;
-       unsigned int i;
        char dev[256];
+       unsigned int i;
 
-       if (last_maj == maj && last_min == min) {
-               *bs = last_bs;
+       if (last_maj == maj && last_min == min)
                return last_fileno;
-       }
 
        last_maj = maj;
        last_min = min;
@@ -166,17 +145,14 @@ static int trace_add_file(struct thread_data *td, __u32 device,
        /*
         * check for this file in our list
         */
-       for_each_file(td, f, i) {
+       for_each_file(td, f, i)
                if (f->major == maj && f->minor == min) {
                        last_fileno = f->fileno;
-                       last_bs = f->bs;
-                       goto out;
+                       return last_fileno;
                }
-       }
 
        strcpy(dev, "/dev");
        if (blktrace_lookup_device(td->o.replay_redirect, dev, maj, min)) {
-               unsigned int this_bs;
                int fileno;
 
                if (td->o.replay_redirect)
@@ -188,22 +164,13 @@ static int trace_add_file(struct thread_data *td, __u32 device,
 
                dprint(FD_BLKTRACE, "add devices %s\n", dev);
                fileno = add_file_exclusive(td, dev);
-
-               if (get_dev_blocksize(dev, &this_bs))
-                       this_bs = 512;
-
                td->o.open_files++;
                td->files[fileno]->major = maj;
                td->files[fileno]->minor = min;
-               td->files[fileno]->bs = this_bs;
                trace_add_open_close_event(td, fileno, FIO_LOG_OPEN_FILE);
-
                last_fileno = fileno;
-               last_bs = this_bs;
        }
 
-out:
-       *bs = last_bs;
        return last_fileno;
 }
 
@@ -220,14 +187,14 @@ static void t_bytes_align(struct thread_options *o, struct blk_io_trace *t)
  */
 static void store_ipo(struct thread_data *td, unsigned long long offset,
                      unsigned int bytes, int rw, unsigned long long ttime,
-                     int fileno, unsigned int bs)
+                     int fileno)
 {
        struct io_piece *ipo;
 
        ipo = calloc(1, sizeof(*ipo));
        init_ipo(ipo);
 
-       ipo->offset = offset * bs;
+       ipo->offset = offset * 512;
        if (td->o.replay_scale)
                ipo->offset = ipo->offset / td->o.replay_scale;
        ipo_bytes_align(td->o.replay_align, ipo);
@@ -267,10 +234,9 @@ static void handle_trace_notify(struct blk_io_trace *t)
 static void handle_trace_discard(struct thread_data *td,
                                 struct blk_io_trace *t,
                                 unsigned long long ttime,
-                                unsigned long *ios, unsigned int *rw_bs)
+                                unsigned long *ios, unsigned int *bs)
 {
        struct io_piece *ipo;
-       unsigned int bs;
        int fileno;
 
        if (td->o.replay_skip & (1u << DDIR_TRIM))
@@ -278,17 +244,17 @@ static void handle_trace_discard(struct thread_data *td,
 
        ipo = calloc(1, sizeof(*ipo));
        init_ipo(ipo);
-       fileno = trace_add_file(td, t->device, &bs);
+       fileno = trace_add_file(td, t->device);
 
        ios[DDIR_TRIM]++;
-       if (t->bytes > rw_bs[DDIR_TRIM])
-               rw_bs[DDIR_TRIM] = t->bytes;
+       if (t->bytes > bs[DDIR_TRIM])
+               bs[DDIR_TRIM] = t->bytes;
 
        td->o.size += t->bytes;
 
        INIT_FLIST_HEAD(&ipo->list);
 
-       ipo->offset = t->sector * bs;
+       ipo->offset = t->sector * 512;
        if (td->o.replay_scale)
                ipo->offset = ipo->offset / td->o.replay_scale;
        ipo_bytes_align(td->o.replay_align, ipo);
@@ -310,13 +276,12 @@ static void dump_trace(struct blk_io_trace *t)
 
 static void handle_trace_fs(struct thread_data *td, struct blk_io_trace *t,
                            unsigned long long ttime, unsigned long *ios,
-                           unsigned int *rw_bs)
+                           unsigned int *bs)
 {
-       unsigned int bs;
        int rw;
        int fileno;
 
-       fileno = trace_add_file(td, t->device, &bs);
+       fileno = trace_add_file(td, t->device);
 
        rw = (t->action & BLK_TC_ACT(BLK_TC_WRITE)) != 0;
 
@@ -334,19 +299,18 @@ static void handle_trace_fs(struct thread_data *td, struct blk_io_trace *t,
                return;
        }
 
-       if (t->bytes > rw_bs[rw])
-               rw_bs[rw] = t->bytes;
+       if (t->bytes > bs[rw])
+               bs[rw] = t->bytes;
 
        ios[rw]++;
        td->o.size += t->bytes;
-       store_ipo(td, t->sector, t->bytes, rw, ttime, fileno, bs);
+       store_ipo(td, t->sector, t->bytes, rw, ttime, fileno);
 }
 
 static void handle_trace_flush(struct thread_data *td, struct blk_io_trace *t,
                               unsigned long long ttime, unsigned long *ios)
 {
        struct io_piece *ipo;
-       unsigned int bs;
        int fileno;
 
        if (td->o.replay_skip & (1u << DDIR_SYNC))
@@ -354,7 +318,7 @@ static void handle_trace_flush(struct thread_data *td, struct blk_io_trace *t,
 
        ipo = calloc(1, sizeof(*ipo));
        init_ipo(ipo);
-       fileno = trace_add_file(td, t->device, &bs);
+       fileno = trace_add_file(td, t->device);
 
        ipo->delay = ttime / 1000;
        ipo->ddir = DDIR_SYNC;
@@ -613,3 +577,213 @@ err:
        fifo_free(fifo);
        return false;
 }
+
+static int init_merge_param_list(fio_fp64_t *vals, struct blktrace_cursor *bcs,
+                                int nr_logs, int def, size_t off)
+{
+       int i = 0, len = 0;
+
+       while (len < FIO_IO_U_LIST_MAX_LEN && vals[len].u.f != 0.0)
+               len++;
+
+       if (len && len != nr_logs)
+               return len;
+
+       for (i = 0; i < nr_logs; i++) {
+               int *val = (int *)((char *)&bcs[i] + off);
+               *val = def;
+               if (len)
+                       *val = (int)vals[i].u.f;
+       }
+
+       return 0;
+
+}
+
+static int find_earliest_io(struct blktrace_cursor *bcs, int nr_logs)
+{
+       __u64 time = ~(__u64)0;
+       int idx = 0, i;
+
+       for (i = 0; i < nr_logs; i++) {
+               if (bcs[i].t.time < time) {
+                       time = bcs[i].t.time;
+                       idx = i;
+               }
+       }
+
+       return idx;
+}
+
+static void merge_finish_file(struct blktrace_cursor *bcs, int i, int *nr_logs)
+{
+       bcs[i].iter++;
+       if (bcs[i].iter < bcs[i].nr_iter) {
+               lseek(bcs[i].fd, 0, SEEK_SET);
+               return;
+       }
+
+       *nr_logs -= 1;
+
+       /* close file */
+       fifo_free(bcs[i].fifo);
+       close(bcs[i].fd);
+
+       /* keep active files contiguous */
+       memmove(&bcs[i], &bcs[*nr_logs], sizeof(bcs[i]));
+}
+
+static int read_trace(struct thread_data *td, struct blktrace_cursor *bc)
+{
+       int ret = 0;
+       struct blk_io_trace *t = &bc->t;
+
+read_skip:
+       /* read an io trace */
+       ret = trace_fifo_get(td, bc->fifo, bc->fd, t, sizeof(*t));
+       if (ret < 0) {
+               return ret;
+       } else if (!ret) {
+               if (!bc->length)
+                       bc->length = bc->t.time;
+               return ret;
+       } else if (ret < (int) sizeof(*t)) {
+               log_err("fio: short fifo get\n");
+               return -1;
+       }
+
+       if (bc->swap)
+               byteswap_trace(t);
+
+       /* skip over actions that fio does not care about */
+       if ((t->action & 0xffff) != __BLK_TA_QUEUE ||
+           t_get_ddir(t) == DDIR_INVAL) {
+               ret = discard_pdu(td, bc->fifo, bc->fd, t);
+               if (ret < 0) {
+                       td_verror(td, ret, "blktrace lseek");
+                       return ret;
+               } else if (t->pdu_len != ret) {
+                       log_err("fio: discarded %d of %d\n", ret,
+                               t->pdu_len);
+                       return -1;
+               }
+               goto read_skip;
+       }
+
+       t->time = (t->time + bc->iter * bc->length) * bc->scalar / 100;
+
+       return ret;
+}
+
+static int write_trace(FILE *fp, struct blk_io_trace *t)
+{
+       /* pdu is not used so just write out only the io trace */
+       t->pdu_len = 0;
+       return fwrite((void *)t, sizeof(*t), 1, fp);
+}
+
+int merge_blktrace_iologs(struct thread_data *td)
+{
+       int nr_logs = get_max_str_idx(td->o.read_iolog_file);
+       struct blktrace_cursor *bcs = malloc(sizeof(struct blktrace_cursor) *
+                                            nr_logs);
+       struct blktrace_cursor *bc;
+       FILE *merge_fp;
+       char *str, *ptr, *name, *merge_buf;
+       int i, ret;
+
+       ret = init_merge_param_list(td->o.merge_blktrace_scalars, bcs, nr_logs,
+                                   100, offsetof(struct blktrace_cursor,
+                                                 scalar));
+       if (ret) {
+               log_err("fio: merge_blktrace_scalars(%d) != nr_logs(%d)\n",
+                       ret, nr_logs);
+               goto err_param;
+       }
+
+       ret = init_merge_param_list(td->o.merge_blktrace_iters, bcs, nr_logs,
+                                   1, offsetof(struct blktrace_cursor,
+                                               nr_iter));
+       if (ret) {
+               log_err("fio: merge_blktrace_iters(%d) != nr_logs(%d)\n",
+                       ret, nr_logs);
+               goto err_param;
+       }
+
+       /* setup output file */
+       merge_fp = fopen(td->o.merge_blktrace_file, "w");
+       merge_buf = malloc(128 * 1024);
+       ret = setvbuf(merge_fp, merge_buf, _IOFBF, 128 * 1024);
+       if (ret)
+               goto err_out_file;
+
+       /* setup input files */
+       str = ptr = strdup(td->o.read_iolog_file);
+       nr_logs = 0;
+       for (i = 0; (name = get_next_str(&ptr)) != NULL; i++) {
+               bcs[i].fd = open(name, O_RDONLY);
+               if (bcs[i].fd < 0) {
+                       log_err("fio: could not open file: %s\n", name);
+                       ret = bcs[i].fd;
+                       goto err_file;
+               }
+               bcs[i].fifo = fifo_alloc(TRACE_FIFO_SIZE);
+               nr_logs++;
+
+               if (!is_blktrace(name, &bcs[i].swap)) {
+                       log_err("fio: file is not a blktrace: %s\n", name);
+                       goto err_file;
+               }
+
+               ret = read_trace(td, &bcs[i]);
+               if (ret < 0) {
+                       goto err_file;
+               } else if (!ret) {
+                       merge_finish_file(bcs, i, &nr_logs);
+                       i--;
+               }
+       }
+       free(str);
+
+       /* merge files */
+       while (nr_logs) {
+               i = find_earliest_io(bcs, nr_logs);
+               bc = &bcs[i];
+               /* skip over the pdu */
+               ret = discard_pdu(td, bc->fifo, bc->fd, &bc->t);
+               if (ret < 0) {
+                       td_verror(td, ret, "blktrace lseek");
+                       goto err_file;
+               } else if (bc->t.pdu_len != ret) {
+                       log_err("fio: discarded %d of %d\n", ret,
+                               bc->t.pdu_len);
+                       goto err_file;
+               }
+
+               ret = write_trace(merge_fp, &bc->t);
+               ret = read_trace(td, bc);
+               if (ret < 0)
+                       goto err_file;
+               else if (!ret)
+                       merge_finish_file(bcs, i, &nr_logs);
+       }
+
+       /* set iolog file to read from the newly merged file */
+       td->o.read_iolog_file = td->o.merge_blktrace_file;
+       ret = 0;
+
+err_file:
+       /* cleanup */
+       for (i = 0; i < nr_logs; i++) {
+               fifo_free(bcs[i].fifo);
+               close(bcs[i].fd);
+       }
+err_out_file:
+       fflush(merge_fp);
+       fclose(merge_fp);
+       free(merge_buf);
+err_param:
+       free(bcs);
+
+       return ret;
+}
index 096993eda81fbc68ad49dfb704c21c0e35de2653..a0e82faa05eed81bb3e8819a200208089a6009d7 100644 (file)
@@ -1,10 +1,27 @@
 #ifndef FIO_BLKTRACE_H
 #define FIO_BLKTRACE_H
 
+
 #ifdef FIO_HAVE_BLKTRACE
 
+#include <asm/types.h>
+
+#include "blktrace_api.h"
+
+struct blktrace_cursor {
+       struct fifo             *fifo;  // fifo queue for reading
+       int                     fd;     // blktrace file
+       __u64                   length; // length of trace
+       struct blk_io_trace     t;      // current io trace
+       int                     swap;   // bitwise reverse required
+       int                     scalar; // scale percentage
+       int                     iter;   // current iteration
+       int                     nr_iter; // number of iterations to run
+};
+
 bool is_blktrace(const char *, int *);
 bool load_blktrace(struct thread_data *, const char *, int);
+int merge_blktrace_iologs(struct thread_data *td);
 
 #else
 
@@ -19,5 +36,10 @@ static inline bool load_blktrace(struct thread_data *td, const char *fname,
        return false;
 }
 
+static inline int merge_blktrace_iologs(struct thread_data *td)
+{
+       return false;
+}
+
 #endif
 #endif
diff --git a/cconv.c b/cconv.c
index 1d7f6f22350327e99ef070acb3a3764ace470711..50e45c63a636bf0be66299a8f20a0a9e75d264ae 100644 (file)
--- a/cconv.c
+++ b/cconv.c
@@ -37,6 +37,7 @@ static void free_thread_options_to_cpu(struct thread_options *o)
        free(o->mmapfile);
        free(o->read_iolog_file);
        free(o->write_iolog_file);
+       free(o->merge_blktrace_file);
        free(o->bw_log_file);
        free(o->lat_log_file);
        free(o->iops_log_file);
@@ -73,6 +74,7 @@ void convert_thread_options_to_cpu(struct thread_options *o,
        string_to_cpu(&o->mmapfile, top->mmapfile);
        string_to_cpu(&o->read_iolog_file, top->read_iolog_file);
        string_to_cpu(&o->write_iolog_file, top->write_iolog_file);
+       string_to_cpu(&o->merge_blktrace_file, top->merge_blktrace_file);
        string_to_cpu(&o->bw_log_file, top->bw_log_file);
        string_to_cpu(&o->lat_log_file, top->lat_log_file);
        string_to_cpu(&o->iops_log_file, top->iops_log_file);
@@ -304,6 +306,12 @@ void convert_thread_options_to_cpu(struct thread_options *o,
 
        for (i = 0; i < FIO_IO_U_LIST_MAX_LEN; i++)
                o->percentile_list[i].u.f = fio_uint64_to_double(le64_to_cpu(top->percentile_list[i].u.i));
+
+       for (i = 0; i < FIO_IO_U_LIST_MAX_LEN; i++)
+               o->merge_blktrace_scalars[i].u.f = fio_uint64_to_double(le64_to_cpu(top->merge_blktrace_scalars[i].u.i));
+
+       for (i = 0; i < FIO_IO_U_LIST_MAX_LEN; i++)
+               o->merge_blktrace_iters[i].u.f = fio_uint64_to_double(le64_to_cpu(top->merge_blktrace_iters[i].u.i));
 #if 0
        uint8_t cpumask[FIO_TOP_STR_MAX];
        uint8_t verify_cpumask[FIO_TOP_STR_MAX];
@@ -330,6 +338,7 @@ void convert_thread_options_to_net(struct thread_options_pack *top,
        string_to_net(top->mmapfile, o->mmapfile);
        string_to_net(top->read_iolog_file, o->read_iolog_file);
        string_to_net(top->write_iolog_file, o->write_iolog_file);
+       string_to_net(top->merge_blktrace_file, o->merge_blktrace_file);
        string_to_net(top->bw_log_file, o->bw_log_file);
        string_to_net(top->lat_log_file, o->lat_log_file);
        string_to_net(top->iops_log_file, o->iops_log_file);
@@ -565,6 +574,12 @@ void convert_thread_options_to_net(struct thread_options_pack *top,
 
        for (i = 0; i < FIO_IO_U_LIST_MAX_LEN; i++)
                top->percentile_list[i].u.i = __cpu_to_le64(fio_double_to_uint64(o->percentile_list[i].u.f));
+
+       for (i = 0; i < FIO_IO_U_LIST_MAX_LEN; i++)
+               top->merge_blktrace_scalars[i].u.i = __cpu_to_le64(fio_double_to_uint64(o->merge_blktrace_scalars[i].u.f));
+
+       for (i = 0; i < FIO_IO_U_LIST_MAX_LEN; i++)
+               top->merge_blktrace_iters[i].u.i = __cpu_to_le64(fio_double_to_uint64(o->merge_blktrace_iters[i].u.f));
 #if 0
        uint8_t cpumask[FIO_TOP_STR_MAX];
        uint8_t verify_cpumask[FIO_TOP_STR_MAX];
index 31c7c6495790e91018cface694803ff9bfcefa1e..3248906756db77d9586fb2a71513943c9e57b0b6 100644 (file)
--- a/client.c
+++ b/client.c
@@ -198,14 +198,23 @@ static void fio_client_json_init(void)
 
 static void fio_client_json_fini(void)
 {
-       if (!(output_format & FIO_OUTPUT_JSON))
+       struct buf_output out;
+
+       if (!root)
                return;
 
-       log_info("\n");
-       json_print_object(root, NULL);
-       log_info("\n");
+       buf_output_init(&out);
+
+       __log_buf(&out, "\n");
+       json_print_object(root, &out);
+       __log_buf(&out, "\n");
+       log_info_buf(out.buf, out.buflen);
+
+       buf_output_free(&out);
+
        json_free_object(root);
        root = NULL;
+       job_opt_object = NULL;
        clients_array = NULL;
        du_array = NULL;
 }
@@ -233,6 +242,9 @@ void fio_put_client(struct fio_client *client)
        if (--client->refs)
                return;
 
+       log_info_buf(client->buf.buf, client->buf.buflen);
+       buf_output_free(&client->buf);
+
        free(client->hostname);
        if (client->argv)
                free(client->argv);
@@ -351,9 +363,7 @@ void fio_client_add_cmd_option(void *cookie, const char *opt)
        }
 }
 
-struct fio_client *fio_client_add_explicit(struct client_ops *ops,
-                                          const char *hostname, int type,
-                                          int port)
+static struct fio_client *get_new_client(void)
 {
        struct fio_client *client;
 
@@ -366,6 +376,19 @@ struct fio_client *fio_client_add_explicit(struct client_ops *ops,
        INIT_FLIST_HEAD(&client->eta_list);
        INIT_FLIST_HEAD(&client->cmd_list);
 
+       buf_output_init(&client->buf);
+
+       return client;
+}
+
+struct fio_client *fio_client_add_explicit(struct client_ops *ops,
+                                          const char *hostname, int type,
+                                          int port)
+{
+       struct fio_client *client;
+
+       client = get_new_client();
+
        client->hostname = strdup(hostname);
 
        if (type == Fio_client_socket)
@@ -441,14 +464,7 @@ int fio_client_add(struct client_ops *ops, const char *hostname, void **cookie)
                }
        }
 
-       client = malloc(sizeof(*client));
-       memset(client, 0, sizeof(*client));
-
-       INIT_FLIST_HEAD(&client->list);
-       INIT_FLIST_HEAD(&client->hash_list);
-       INIT_FLIST_HEAD(&client->arg_list);
-       INIT_FLIST_HEAD(&client->eta_list);
-       INIT_FLIST_HEAD(&client->cmd_list);
+       client = get_new_client();
 
        if (fio_server_parse_string(hostname, &client->hostname,
                                        &client->is_sock, &client->port,
@@ -1059,13 +1075,10 @@ static void handle_ts(struct fio_client *client, struct fio_net_cmd *cmd)
        struct flist_head *opt_list = NULL;
        struct json_object *tsobj;
 
-       if (output_format & FIO_OUTPUT_TERSE)
-               return;
-
        if (client->opt_lists && p->ts.thread_number <= client->jobs)
                opt_list = &client->opt_lists[p->ts.thread_number - 1];
 
-       tsobj = show_thread_status(&p->ts, &p->rs, opt_list, NULL);
+       tsobj = show_thread_status(&p->ts, &p->rs, opt_list, &client->buf);
        client->did_stat = true;
        if (tsobj) {
                json_object_add_client_info(tsobj, client);
@@ -1086,7 +1099,7 @@ static void handle_ts(struct fio_client *client, struct fio_net_cmd *cmd)
 
        if (++sum_stat_nr == sum_stat_clients) {
                strcpy(client_ts.name, "All clients");
-               tsobj = show_thread_status(&client_ts, &client_gs, NULL, NULL);
+               tsobj = show_thread_status(&client_ts, &client_gs, NULL, &client->buf);
                if (tsobj) {
                        json_object_add_client_info(tsobj, client);
                        json_array_add_value_object(clients_array, tsobj);
@@ -1098,11 +1111,8 @@ static void handle_gs(struct fio_client *client, struct fio_net_cmd *cmd)
 {
        struct group_run_stats *gs = (struct group_run_stats *) cmd->payload;
 
-       if (output_format & FIO_OUTPUT_TERSE)
-               return;
-
        if (output_format & FIO_OUTPUT_NORMAL)
-               show_group_stats(gs, NULL);
+               show_group_stats(gs, &client->buf);
 }
 
 static void handle_job_opt(struct fio_client *client, struct fio_net_cmd *cmd)
@@ -1144,13 +1154,17 @@ static void handle_text(struct fio_client *client, struct fio_net_cmd *cmd)
        const char *buf = (const char *) pdu->buf;
        const char *name;
        int fio_unused ret;
+       struct buf_output out;
+
+       buf_output_init(&out);
 
        name = client->name ? client->name : client->hostname;
 
        if (!client->skip_newline && !(output_format & FIO_OUTPUT_TERSE))
-               fprintf(f_out, "<%s> ", name);
-       ret = fwrite(buf, pdu->buf_len, 1, f_out);
-       fflush(f_out);
+               __log_buf(&out, "<%s> ", name);
+       __log_buf(&out, "%s", buf);
+       log_info_buf(out.buf, out.buflen);
+       buf_output_free(&out);
        client->skip_newline = strchr(buf, '\n') == NULL;
 }
 
@@ -1191,23 +1205,21 @@ static void handle_du(struct fio_client *client, struct fio_net_cmd *cmd)
 {
        struct cmd_du_pdu *du = (struct cmd_du_pdu *) cmd->payload;
 
-       if (output_format & FIO_OUTPUT_TERSE)
-               return;
-
-       if (!client->disk_stats_shown) {
+       if (!client->disk_stats_shown)
                client->disk_stats_shown = true;
-               if (!(output_format & FIO_OUTPUT_JSON))
-                       log_info("\nDisk stats (read/write):\n");
-       }
 
        if (output_format & FIO_OUTPUT_JSON) {
                struct json_object *duobj;
+
                json_array_add_disk_util(&du->dus, &du->agg, du_array);
                duobj = json_array_last_value_object(du_array);
                json_object_add_client_info(duobj, client);
+       } else if (output_format & FIO_OUTPUT_TERSE)
+               print_disk_util(&du->dus, &du->agg, 1, &client->buf);
+       else if (output_format & FIO_OUTPUT_NORMAL) {
+               __log_buf(&client->buf, "\nDisk stats (read/write):\n");
+               print_disk_util(&du->dus, &du->agg, 0, &client->buf);
        }
-       if (output_format & FIO_OUTPUT_NORMAL)
-               print_disk_util(&du->dus, &du->agg, 0, NULL);
 }
 
 static void convert_jobs_eta(struct jobs_eta *je)
@@ -1465,9 +1477,6 @@ static void handle_probe(struct fio_client *client, struct fio_net_cmd *cmd)
        const char *os, *arch;
        char bit[16];
 
-       if (output_format & FIO_OUTPUT_TERSE)
-               return;
-
        os = fio_get_os_string(probe->os);
        if (!os)
                os = "unknown";
@@ -1479,10 +1488,11 @@ static void handle_probe(struct fio_client *client, struct fio_net_cmd *cmd)
        sprintf(bit, "%d-bit", probe->bpp * 8);
        probe->flags = le64_to_cpu(probe->flags);
 
-       if (!(output_format & FIO_OUTPUT_JSON))
+       if (output_format & FIO_OUTPUT_NORMAL) {
                log_info("hostname=%s, be=%u, %s, os=%s, arch=%s, fio=%s, flags=%lx\n",
                        probe->hostname, probe->bigendian, bit, os, arch,
                        probe->fio_version, (unsigned long) probe->flags);
+       }
 
        if (!client->name)
                client->name = strdup((char *) probe->hostname);
index a597449dcbb218cff12dc36da09001e9bd5bc084..8033325ed0a94371643cb5ebf8c1f69b029e8889 100644 (file)
--- a/client.h
+++ b/client.h
@@ -74,6 +74,8 @@ struct fio_client {
 
        struct client_file *files;
        unsigned int nr_files;
+
+       struct buf_output buf;
 };
 
 typedef void (client_cmd_op)(struct fio_client *, struct fio_net_cmd *);
index 5e11195d60b66cb003d7e5eeff6d7a0ed7355299..5490e26ea70f2db55f86c2e49ec2f9f362568767 100755 (executable)
--- a/configure
+++ b/configure
@@ -361,6 +361,7 @@ CYGWIN*)
   output_sym "CONFIG_WINDOWSAIO"
   # We now take the regular configuration path without having exit 0 here.
   # Flags below are still necessary mostly for MinGW.
+  build_static="yes"
   socklen_t="yes"
   rusage_thread="yes"
   fdatasync="yes"
@@ -1148,28 +1149,6 @@ if compile_prog "" "" "guasi"; then
 fi
 print_config "GUASI" "$guasi"
 
-##########################################
-# fusion-aw probe
-if test "$fusion_aw" != "yes" ; then
-  fusion_aw="no"
-fi
-cat > $TMPC << EOF
-#include <nvm/nvm_primitives.h>
-int main(int argc, char **argv)
-{
-  nvm_version_t ver_info;
-  nvm_handle_t handle;
-
-  handle = nvm_get_handle(0, &ver_info);
-  return nvm_atomic_write(handle, 0, 0, 0);
-}
-EOF
-if compile_prog "" "-L/usr/lib/fio -L/usr/lib/nvm -lnvm-primitives -ldl -lpthread" "fusion-aw"; then
-  LIBS="-L/usr/lib/fio -L/usr/lib/nvm -lnvm-primitives -ldl -lpthread $LIBS"
-  fusion_aw="yes"
-fi
-print_config "Fusion-io atomic engine" "$fusion_aw"
-
 ##########################################
 # libnuma probe
 if test "$libnuma" != "yes" ; then
@@ -2405,9 +2384,6 @@ fi
 if test "$guasi" = "yes" ; then
   output_sym "CONFIG_GUASI"
 fi
-if test "$fusion_aw" = "yes" ; then
-  output_sym "CONFIG_FUSION_AW"
-fi
 if test "$libnuma_v2" = "yes" ; then
   output_sym "CONFIG_LIBNUMA"
 fi
index 09872508e12129d1408040eadb40055c0ebb8181..4d572b441a54921541e5d7e27ae8151c1dc4a2ce 100644 (file)
@@ -85,7 +85,7 @@ static int fio_cpuio_init(struct thread_data *td)
         */
        o->thinktime_blocks = 1;
        o->thinktime_spin = 0;
-       o->thinktime = (co->cpucycle * (100 - co->cpuload)) / co->cpuload;
+       o->thinktime = ((unsigned long long) co->cpucycle * (100 - co->cpuload)) / co->cpuload;
 
        o->nr_files = o->open_files = 1;
 
diff --git a/engines/fusion-aw.c b/engines/fusion-aw.c
deleted file mode 100644 (file)
index eb5fdf5..0000000
+++ /dev/null
@@ -1,183 +0,0 @@
-/*
- * Custom fio(1) engine that submits synchronous atomic writes to file.
- *
- * Copyright (C) 2013 Fusion-io, Inc.
- * Author: Santhosh Kumar Koundinya (skoundinya@fusionio.com).
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; under version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License version
- * 2 for more details.
- *
- * You should have received a copy of the GNU General Public License Version 2
- * along with this program; if not see <http://www.gnu.org/licenses/>
- */
-
-#include <stdlib.h>
-#include <stdint.h>
-
-#include "../fio.h"
-
-#include <nvm/nvm_primitives.h>
-
-#define NUM_ATOMIC_CAPABILITIES (5)
-
-struct fas_data {
-       nvm_handle_t nvm_handle;
-       size_t xfer_buf_align;
-       size_t xfer_buflen_align;
-       size_t xfer_buflen_max;
-       size_t sector_size;
-};
-
-static enum fio_q_status queue(struct thread_data *td, struct io_u *io_u)
-{
-       struct fas_data *d = FILE_ENG_DATA(io_u->file);
-       int rc;
-
-       if (io_u->ddir != DDIR_WRITE) {
-               td_vmsg(td, EINVAL, "only writes supported", "io_u->ddir");
-               rc = -EINVAL;
-               goto out;
-       }
-
-       if ((size_t) io_u->xfer_buf % d->xfer_buf_align) {
-               td_vmsg(td, EINVAL, "unaligned data buffer", "io_u->xfer_buf");
-               rc = -EINVAL;
-               goto out;
-       }
-
-       if (io_u->xfer_buflen % d->xfer_buflen_align) {
-               td_vmsg(td, EINVAL, "unaligned data size", "io_u->xfer_buflen");
-               rc = -EINVAL;
-               goto out;
-       }
-
-       if (io_u->xfer_buflen > d->xfer_buflen_max) {
-               td_vmsg(td, EINVAL, "data too big", "io_u->xfer_buflen");
-               rc = -EINVAL;
-               goto out;
-       }
-
-       rc = nvm_atomic_write(d->nvm_handle, (uint64_t) io_u->xfer_buf,
-               io_u->xfer_buflen, io_u->offset / d->sector_size);
-       if (rc == -1) {
-               td_verror(td, errno, "nvm_atomic_write");
-               rc = -errno;
-               goto out;
-       }
-       rc = FIO_Q_COMPLETED;
-out:
-       if (rc < 0)
-               io_u->error = -rc;
-
-       return rc;
-}
-
-static int open_file(struct thread_data *td, struct fio_file *f)
-{
-       int rc;
-       int fio_unused close_file_rc;
-       struct fas_data *d;
-       nvm_version_t nvm_version;
-       nvm_capability_t nvm_capability[NUM_ATOMIC_CAPABILITIES];
-
-
-       d = malloc(sizeof(*d));
-       if (!d) {
-               td_verror(td, ENOMEM, "malloc");
-               rc = ENOMEM;
-               goto error;
-       }
-       d->nvm_handle = -1;
-       FILE_SET_ENG_DATA(f, d);
-
-       rc = generic_open_file(td, f);
-
-       if (rc)
-               goto free_engine_data;
-
-       /* Set the version of the library as seen when engine is compiled */
-       nvm_version.major = NVM_PRIMITIVES_API_MAJOR;
-       nvm_version.minor = NVM_PRIMITIVES_API_MINOR;
-       nvm_version.micro = NVM_PRIMITIVES_API_MICRO;
-
-       d->nvm_handle = nvm_get_handle(f->fd, &nvm_version);
-       if (d->nvm_handle == -1) {
-               td_vmsg(td, errno, "nvm_get_handle failed", "nvm_get_handle");
-               rc = errno;
-               goto close_file;
-       }
-
-       nvm_capability[0].cap_id = NVM_CAP_ATOMIC_WRITE_START_ALIGN_ID;
-       nvm_capability[1].cap_id = NVM_CAP_ATOMIC_WRITE_MULTIPLICITY_ID;
-       nvm_capability[2].cap_id = NVM_CAP_ATOMIC_WRITE_MAX_VECTOR_SIZE_ID;
-       nvm_capability[3].cap_id = NVM_CAP_SECTOR_SIZE_ID;
-       nvm_capability[4].cap_id = NVM_CAP_ATOMIC_MAX_IOV_ID;
-       rc = nvm_get_capabilities(d->nvm_handle, nvm_capability,
-                                  NUM_ATOMIC_CAPABILITIES, false);
-       if (rc == -1) {
-               td_vmsg(td, errno, "error in getting atomic write capabilities", "nvm_get_capabilities");
-               rc = errno;
-               goto close_file;
-       } else if (rc < NUM_ATOMIC_CAPABILITIES) {
-               td_vmsg(td, EINVAL, "couldn't get all the atomic write capabilities" , "nvm_get_capabilities");
-               rc = ECANCELED;
-               goto close_file;
-       }
-       /* Reset rc to 0 because we got all capabilities we needed */
-       rc = 0;
-       d->xfer_buf_align = nvm_capability[0].cap_value;
-       d->xfer_buflen_align = nvm_capability[1].cap_value;
-       d->xfer_buflen_max = d->xfer_buflen_align * nvm_capability[2].cap_value * nvm_capability[4].cap_value;
-       d->sector_size = nvm_capability[3].cap_value;
-
-out:
-       return rc;
-close_file:
-       close_file_rc = generic_close_file(td, f);
-free_engine_data:
-       free(d);
-error:
-       f->fd = -1;
-       FILE_SET_ENG_DATA(f, NULL);
-       goto out;
-}
-
-static int close_file(struct thread_data *td, struct fio_file *f)
-{
-       struct fas_data *d = FILE_ENG_DATA(f);
-
-       if (d) {
-               if (d->nvm_handle != -1)
-                       nvm_release_handle(d->nvm_handle);
-               free(d);
-               FILE_SET_ENG_DATA(f, NULL);
-       }
-
-       return generic_close_file(td, f);
-}
-
-static struct ioengine_ops ioengine = {
-       .name = "fusion-aw-sync",
-       .version = FIO_IOOPS_VERSION,
-       .queue = queue,
-       .open_file = open_file,
-       .close_file = close_file,
-       .get_file_size = generic_get_file_size,
-       .flags = FIO_SYNCIO | FIO_RAWIO | FIO_MEMALIGN
-};
-
-static void fio_init fio_fusion_aw_init(void)
-{
-       register_ioengine(&ioengine);
-}
-
-static void fio_exit fio_fusion_aw_exit(void)
-{
-       unregister_ioengine(&ioengine);
-}
index 93fcd0d8e44a5528a4e36879bbd75af9afe9904e..d81e4288ff7bfdec000781240cea3aafa687ca88 100644 (file)
@@ -273,6 +273,8 @@ static void _hmac(unsigned char *md, void *key, int key_len, char *data) {
        ctx = HMAC_CTX_new();
 #else
        ctx = &_ctx;
+       /* work-around crash in certain versions of libssl */
+       HMAC_CTX_init(ctx);
 #endif
        HMAC_Init_ex(ctx, key, key_len, EVP_sha256(), NULL);
        HMAC_Update(ctx, (unsigned char*)data, strlen(data));
index c6aec730e3ac1eb125a5953af019221df90961a6..86100dc4918c586c29d6da8914afc6fbea913e8c 100644 (file)
@@ -21,8 +21,6 @@ struct fio_rados_iou {
 struct rados_data {
        rados_t cluster;
        rados_ioctx_t io_ctx;
-       char **objects;
-       size_t object_count;
        struct io_u **aio_events;
        bool connected;
 };
@@ -96,18 +94,11 @@ static int _fio_setup_rados_data(struct thread_data *td,
        rados->aio_events = calloc(td->o.iodepth, sizeof(struct io_u *));
        if (!rados->aio_events)
                goto failed;
-
-       rados->object_count = td->o.nr_files;
-       rados->objects = calloc(rados->object_count, sizeof(char*));
-       if (!rados->objects)
-               goto failed;
-
        *rados_data_ptr = rados;
        return 0;
 
 failed:
        if (rados) {
-               rados->object_count = 0;
                if (rados->aio_events)
                        free(rados->aio_events);
                free(rados);
@@ -115,15 +106,12 @@ failed:
        return 1;
 }
 
-static void _fio_rados_rm_objects(struct rados_data *rados)
+static void _fio_rados_rm_objects(struct thread_data *td, struct rados_data *rados)
 {
        size_t i;
-       for (i = 0; i < rados->object_count; ++i) {
-               if (rados->objects[i]) {
-                       rados_remove(rados->io_ctx, rados->objects[i]);
-                       free(rados->objects[i]);
-                       rados->objects[i] = NULL;
-               }
+       for (i = 0; i < td->o.nr_files; i++) {
+               struct fio_file *f = td->files[i];
+               rados_remove(rados->io_ctx, f->file_name);
        }
 }
 
@@ -136,7 +124,6 @@ static int _fio_rados_connect(struct thread_data *td)
                td->o.size / (td->o.nr_files ? td->o.nr_files : 1u);
        struct fio_file *f;
        uint32_t i;
-       size_t oname_len = 0;
 
        if (o->cluster_name) {
                char *client_name = NULL;
@@ -165,6 +152,11 @@ static int _fio_rados_connect(struct thread_data *td)
        } else
                r = rados_create(&rados->cluster, o->client_name);
 
+       if (o->pool_name == NULL) {
+               log_err("rados pool name must be provided.\n");
+               goto failed_early;
+       }
+
        if (r < 0) {
                log_err("rados_create failed.\n");
                goto failed_early;
@@ -188,30 +180,18 @@ static int _fio_rados_connect(struct thread_data *td)
                goto failed_shutdown;
        }
 
-       for (i = 0; i < rados->object_count; i++) {
+       for (i = 0; i < td->o.nr_files; i++) {
                f = td->files[i];
                f->real_file_size = file_size;
-               f->engine_pos = i;
-
-               oname_len = strlen(f->file_name) + 32;
-               rados->objects[i] = malloc(oname_len);
-               /* vary objects for different jobs */
-               snprintf(rados->objects[i], oname_len - 1,
-                       "fio_rados_bench.%s.%x",
-                       f->file_name, td->thread_number);
-               r = rados_write(rados->io_ctx, rados->objects[i], "", 0, 0);
+               r = rados_write(rados->io_ctx, f->file_name, "", 0, 0);
                if (r < 0) {
-                       free(rados->objects[i]);
-                       rados->objects[i] = NULL;
-                       log_err("error creating object.\n");
                        goto failed_obj_create;
                }
        }
-
-  return 0;
+       return 0;
 
 failed_obj_create:
-       _fio_rados_rm_objects(rados);
+       _fio_rados_rm_objects(td, rados);
        rados_ioctx_destroy(rados->io_ctx);
        rados->io_ctx = NULL;
 failed_shutdown:
@@ -226,8 +206,6 @@ static void _fio_rados_disconnect(struct rados_data *rados)
        if (!rados)
                return;
 
-       _fio_rados_rm_objects(rados);
-
        if (rados->io_ctx) {
                rados_ioctx_destroy(rados->io_ctx);
                rados->io_ctx = NULL;
@@ -244,8 +222,8 @@ static void fio_rados_cleanup(struct thread_data *td)
        struct rados_data *rados = td->io_ops_data;
 
        if (rados) {
+               _fio_rados_rm_objects(td, rados);
                _fio_rados_disconnect(rados);
-               free(rados->objects);
                free(rados->aio_events);
                free(rados);
        }
@@ -256,7 +234,7 @@ static enum fio_q_status fio_rados_queue(struct thread_data *td,
 {
        struct rados_data *rados = td->io_ops_data;
        struct fio_rados_iou *fri = io_u->engine_data;
-       char *object = rados->objects[io_u->file->engine_pos];
+       char *object = io_u->file->file_name;
        int r = -1;
 
        fio_ro_check(td, io_u);
diff --git a/eta.c b/eta.c
index 970a67dfd0ac8d6672758b99d528f32526f26e4e..b69dd19439712f3e597696dfffb0cdc57dcf5ff9 100644 (file)
--- a/eta.c
+++ b/eta.c
@@ -177,12 +177,27 @@ static unsigned long thread_eta(struct thread_data *td)
                bytes_total = td->fill_device_size;
        }
 
-       if (td->o.zone_size && td->o.zone_skip && bytes_total) {
+       /*
+        * If io_size is set, bytes_total is an exact value that does not need
+        * adjustment.
+        */
+       if (td->o.zone_size && td->o.zone_skip && bytes_total &&
+           !fio_option_is_set(&td->o, io_size)) {
                unsigned int nr_zones;
                uint64_t zone_bytes;
 
-               zone_bytes = bytes_total + td->o.zone_size + td->o.zone_skip;
-               nr_zones = (zone_bytes - 1) / (td->o.zone_size + td->o.zone_skip);
+               /*
+                * Calculate the upper bound of the number of zones that will
+                * be processed, including skipped bytes between zones. If this
+                * is larger than total_io_size (e.g. when --io_size or --size
+                * specify a small value), use the lower bound to avoid
+                * adjustments to a negative value that would result in a very
+                * large bytes_total and an incorrect eta.
+                */
+               zone_bytes = td->o.zone_size + td->o.zone_skip;
+               nr_zones = (bytes_total + zone_bytes - 1) / zone_bytes;
+               if (bytes_total < nr_zones * td->o.zone_skip)
+                       nr_zones = bytes_total / zone_bytes;
                bytes_total -= nr_zones * td->o.zone_skip;
        }
 
diff --git a/examples/cross-stripe-verify.fio b/examples/cross-stripe-verify.fio
new file mode 100644 (file)
index 0000000..68664ed
--- /dev/null
@@ -0,0 +1,25 @@
+# Example of how to split a drive up into sections, manually, and perform
+# verify from a bunch of jobs. This example is special in that it assumes
+# the drive is at around 30 * 124G in size, so with the below settings, we'll
+# cover most of the drive. It's also special in that it doesn't write
+# everything, it just writes 16k at a specific boundary, for every 128k.
+# This is done to exercise the split path for Intel NVMe devices, most of
+# which have a 128k stripe size and require IOs to be split if the cross
+# the stripe boundary.
+#
+[global]
+bs=16k
+direct=1
+rw=write:112k
+verify=crc32c
+filename=/dev/nvme0n1
+verify_backlog=1
+offset_increment=124g
+io_size=120g
+offset=120k
+group_reporting=1
+verify_dump=1
+loops=2
+
+[write-verify]
+numjobs=30
diff --git a/examples/fio-rand-RW.fio b/examples/fio-rand-RW.fio
new file mode 100644 (file)
index 0000000..0df0bc1
--- /dev/null
@@ -0,0 +1,18 @@
+; fio-rand-RW.job for fiotest
+
+[global]
+name=fio-rand-RW
+filename=fio-rand-RW
+rw=randrw
+rwmixread=60
+rwmixwrite=40
+bs=4K
+direct=0
+numjobs=4
+time_based=1
+runtime=900
+
+[file1]
+size=10G
+ioengine=libaio
+iodepth=16
diff --git a/examples/fio-rand-RW.job b/examples/fio-rand-RW.job
deleted file mode 100644 (file)
index 0df0bc1..0000000
+++ /dev/null
@@ -1,18 +0,0 @@
-; fio-rand-RW.job for fiotest
-
-[global]
-name=fio-rand-RW
-filename=fio-rand-RW
-rw=randrw
-rwmixread=60
-rwmixwrite=40
-bs=4K
-direct=0
-numjobs=4
-time_based=1
-runtime=900
-
-[file1]
-size=10G
-ioengine=libaio
-iodepth=16
diff --git a/examples/fio-rand-read.fio b/examples/fio-rand-read.fio
new file mode 100644 (file)
index 0000000..bc15466
--- /dev/null
@@ -0,0 +1,16 @@
+; fio-rand-read.job for fiotest
+
+[global]
+name=fio-rand-read
+filename=fio-rand-read
+rw=randread
+bs=4K
+direct=0
+numjobs=1
+time_based=1
+runtime=900
+
+[file1]
+size=10G
+ioengine=libaio
+iodepth=16
diff --git a/examples/fio-rand-read.job b/examples/fio-rand-read.job
deleted file mode 100644 (file)
index bc15466..0000000
+++ /dev/null
@@ -1,16 +0,0 @@
-; fio-rand-read.job for fiotest
-
-[global]
-name=fio-rand-read
-filename=fio-rand-read
-rw=randread
-bs=4K
-direct=0
-numjobs=1
-time_based=1
-runtime=900
-
-[file1]
-size=10G
-ioengine=libaio
-iodepth=16
diff --git a/examples/fio-rand-write.fio b/examples/fio-rand-write.fio
new file mode 100644 (file)
index 0000000..bd1b73a
--- /dev/null
@@ -0,0 +1,16 @@
+; fio-rand-write.job for fiotest
+
+[global]
+name=fio-rand-write
+filename=fio-rand-write
+rw=randwrite
+bs=4K
+direct=0
+numjobs=4
+time_based=1
+runtime=900
+
+[file1]
+size=10G
+ioengine=libaio
+iodepth=16
diff --git a/examples/fio-rand-write.job b/examples/fio-rand-write.job
deleted file mode 100644 (file)
index bd1b73a..0000000
+++ /dev/null
@@ -1,16 +0,0 @@
-; fio-rand-write.job for fiotest
-
-[global]
-name=fio-rand-write
-filename=fio-rand-write
-rw=randwrite
-bs=4K
-direct=0
-numjobs=4
-time_based=1
-runtime=900
-
-[file1]
-size=10G
-ioengine=libaio
-iodepth=16
diff --git a/examples/fio-seq-RW.fio b/examples/fio-seq-RW.fio
new file mode 100644 (file)
index 0000000..8f7090f
--- /dev/null
@@ -0,0 +1,18 @@
+; fio-seq-RW.job for fiotest
+
+[global]
+name=fio-seq-RW
+filename=fio-seq-RW
+rw=rw
+rwmixread=60
+rwmixwrite=40
+bs=256K
+direct=0
+numjobs=4
+time_based=1
+runtime=900
+
+[file1]
+size=10G
+ioengine=libaio
+iodepth=16
diff --git a/examples/fio-seq-RW.job b/examples/fio-seq-RW.job
deleted file mode 100644 (file)
index 8f7090f..0000000
+++ /dev/null
@@ -1,18 +0,0 @@
-; fio-seq-RW.job for fiotest
-
-[global]
-name=fio-seq-RW
-filename=fio-seq-RW
-rw=rw
-rwmixread=60
-rwmixwrite=40
-bs=256K
-direct=0
-numjobs=4
-time_based=1
-runtime=900
-
-[file1]
-size=10G
-ioengine=libaio
-iodepth=16
diff --git a/examples/fio-seq-read.fio b/examples/fio-seq-read.fio
new file mode 100644 (file)
index 0000000..28de93c
--- /dev/null
@@ -0,0 +1,14 @@
+[global]
+name=fio-seq-reads
+filename=fio-seq-reads
+rw=read
+bs=256K
+direct=1
+numjobs=1
+time_based=1
+runtime=900
+
+[file1]
+size=10G
+ioengine=libaio
+iodepth=16
diff --git a/examples/fio-seq-read.job b/examples/fio-seq-read.job
deleted file mode 100644 (file)
index 28de93c..0000000
+++ /dev/null
@@ -1,14 +0,0 @@
-[global]
-name=fio-seq-reads
-filename=fio-seq-reads
-rw=read
-bs=256K
-direct=1
-numjobs=1
-time_based=1
-runtime=900
-
-[file1]
-size=10G
-ioengine=libaio
-iodepth=16
diff --git a/examples/fio-seq-write.fio b/examples/fio-seq-write.fio
new file mode 100644 (file)
index 0000000..b291a15
--- /dev/null
@@ -0,0 +1,16 @@
+; fio-seq-write.job for fiotest
+
+[global]
+name=fio-seq-write
+filename=fio-seq-write
+rw=write
+bs=256K
+direct=0
+numjobs=1
+time_based=1
+runtime=900
+
+[file1]
+size=10G
+ioengine=libaio
+iodepth=16
diff --git a/examples/fio-seq-write.job b/examples/fio-seq-write.job
deleted file mode 100644 (file)
index b291a15..0000000
+++ /dev/null
@@ -1,16 +0,0 @@
-; fio-seq-write.job for fiotest
-
-[global]
-name=fio-seq-write
-filename=fio-seq-write
-rw=write
-bs=256K
-direct=0
-numjobs=1
-time_based=1
-runtime=900
-
-[file1]
-size=10G
-ioengine=libaio
-iodepth=16
diff --git a/examples/fusion-aw-sync.fio b/examples/fusion-aw-sync.fio
deleted file mode 100644 (file)
index f2ca313..0000000
+++ /dev/null
@@ -1,18 +0,0 @@
-# Example Job File that randomly writes 8k worth of data atomically for
-# 60 seconds.
-[rw_aw_file_sync]
-rw=randwrite
-ioengine=fusion-aw-sync
-blocksize=8k
-blockalign=8k
-
-# if file system supports atomic write
-filename=/mnt/fs/file
-# or test on a direct block device instead
-#filename=/dev/fioa
-randrepeat=1
-fallocate=none
-direct=1
-invalidate=0
-runtime=60
-time_based
diff --git a/file.h b/file.h
index 446a1fbeb967cf614547b40272c1cec1d4e19058..e50c0f9c3fac752455bfbe74701fd86b04b844ad 100644 (file)
--- a/file.h
+++ b/file.h
@@ -89,7 +89,6 @@ struct fio_file {
         */
        unsigned int major, minor;
        int fileno;
-       unsigned long long bs;
        char *file_name;
 
        /*
index 25dd907f978054bf0cb63f6dba5b77d3ad6e626a..457d4c1223394969347ff4c56fbc6b73700f6b6b 100644 (file)
@@ -331,7 +331,7 @@ unsigned long long get_rand_file_size(struct thread_data *td)
 {
        unsigned long long ret, sized;
        uint64_t frand_max;
-       unsigned long r;
+       uint64_t r;
 
        frand_max = rand_max(&td->file_size_state);
        r = __rand(&td->file_size_state);
@@ -1192,13 +1192,13 @@ bool pre_read_files(struct thread_data *td)
 static void __init_rand_distribution(struct thread_data *td, struct fio_file *f)
 {
        unsigned int range_size, seed;
-       unsigned long nranges;
+       uint64_t nranges;
        uint64_t fsize;
 
        range_size = min(td->o.min_bs[DDIR_READ], td->o.min_bs[DDIR_WRITE]);
        fsize = min(f->real_file_size, f->io_size);
 
-       nranges = (fsize + range_size - 1) / range_size;
+       nranges = (fsize + range_size - 1ULL) / range_size;
 
        seed = jhash(f->file_name, strlen(f->file_name), 0) * td->thread_number;
        if (!td->o.rand_repeatable)
diff --git a/fio.1 b/fio.1
index 593f4db1cdcfd86550c12eea6c18c97774ff1dab..7691b2b13daa7e489e109e43fd947e8834ca466d 100644 (file)
--- a/fio.1
+++ b/fio.1
@@ -20,6 +20,9 @@ file and memory debugging). `help' will list all available tracing options.
 .BI \-\-parse\-only
 Parse options only, don't start any I/O.
 .TP
+.BI \-\-merge\-blktrace\-only
+Merge blktraces only, don't start any I/O.
+.TP
 .BI \-\-output \fR=\fPfilename
 Write output to \fIfilename\fR.
 .TP
@@ -2067,8 +2070,15 @@ changing data and the overlapping region has a non-zero size. Setting
 \fBserialize_overlap\fR tells fio to avoid provoking this behavior by explicitly
 serializing in-flight I/Os that have a non-zero overlap. Note that setting
 this option can reduce both performance and the \fBiodepth\fR achieved.
-Additionally this option does not work when \fBio_submit_mode\fR is set to
-offload. Default: false.
+.RS
+.P
+This option only applies to I/Os issued for a single job except when it is
+enabled along with \fBio_submit_mode\fR=offload. In offload mode, fio
+will check for overlap among all I/Os submitted by offload jobs with \fBserialize_overlap\fR
+enabled. Threads must be used for all such jobs.
+.P
+Default: false.
+.RE
 .TP
 .BI io_submit_mode \fR=\fPstr
 This option controls how fio submits the I/O to the I/O engine. The default
@@ -2198,6 +2208,30 @@ Determines how iolog is read. If false (default) entire \fBread_iolog\fR will
 be read at once. If selected true, input from iolog will be read gradually.
 Useful when iolog is very large, or it is generated.
 .TP
+.BI merge_blktrace_file \fR=\fPstr
+When specified, rather than replaying the logs passed to \fBread_iolog\fR,
+the logs go through a merge phase which aggregates them into a single blktrace.
+The resulting file is then passed on as the \fBread_iolog\fR parameter. The
+intention here is to make the order of events consistent. This limits the
+influence of the scheduler compared to replaying multiple blktraces via
+concurrent jobs.
+.TP
+.BI merge_blktrace_scalars \fR=\fPfloat_list
+This is a percentage based option that is index paired with the list of files
+passed to \fBread_iolog\fR. When merging is performed, scale the time of each
+event by the corresponding amount. For example,
+`\-\-merge_blktrace_scalars="50:100"' runs the first trace in halftime and the
+second trace in realtime. This knob is separately tunable from
+\fBreplay_time_scale\fR which scales the trace during runtime and will not
+change the output of the merge unlike this option.
+.TP
+.BI merge_blktrace_iters \fR=\fPfloat_list
+This is a whole number option that is index paired with the list of files
+passed to \fBread_iolog\fR. When merging is performed, run each trace for
+the specified number of iterations. For example,
+`\-\-merge_blktrace_iters="2:1"' runs the first trace for two iterations
+and the second trace for one iteration.
+.TP
 .BI replay_no_stall \fR=\fPbool
 When replaying I/O with \fBread_iolog\fR the default behavior is to
 attempt to respect the timestamps within the log and replay them with the
@@ -2230,11 +2264,12 @@ Unfortunately this also breaks the strict time ordering between multiple
 device accesses.
 .TP
 .BI replay_align \fR=\fPint
-Force alignment of I/O offsets and lengths in a trace to this power of 2
-value.
+Force alignment of the byte offsets in a trace to this value. The value
+must be a power of 2.
 .TP
 .BI replay_scale \fR=\fPint
-Scale sector offsets down by this factor when replaying traces.
+Scale bye offsets down by this factor when replaying traces. Should most
+likely use \fBreplay_align\fR as well.
 .SS "Threads, processes and job synchronization"
 .TP
 .BI replay_skip \fR=\fPstr
@@ -2644,6 +2679,12 @@ steady state assessment criteria. All assessments are carried out using only
 data from the rolling collection window. Threshold limits can be expressed
 as a fixed value or as a percentage of the mean in the collection window.
 .RS
+.P
+When using this feature, most jobs should include the \fBtime_based\fR
+and \fBruntime\fR options or the \fBloops\fR option so that fio does not
+stop running after it has covered the full size of the specified file(s)
+or device(s).
+.RS
 .RS
 .TP
 .B iops
@@ -3531,6 +3572,45 @@ Write `length' bytes beginning from `offset'.
 Trim the given file from the given `offset' for `length' bytes.
 .RE
 .RE
+.SH I/O REPLAY \- MERGING TRACES
+Colocation is a common practice used to get the most out of a machine.
+Knowing which workloads play nicely with each other and which ones don't is
+a much harder task. While fio can replay workloads concurrently via multiple
+jobs, it leaves some variability up to the scheduler making results harder to
+reproduce. Merging is a way to make the order of events consistent.
+.P
+Merging is integrated into I/O replay and done when a \fBmerge_blktrace_file\fR
+is specified. The list of files passed to \fBread_iolog\fR go through the merge
+process and output a single file stored to the specified file. The output file is
+passed on as if it were the only file passed to \fBread_iolog\fR. An example would
+look like:
+.RS
+.P
+$ fio \-\-read_iolog="<file1>:<file2>" \-\-merge_blktrace_file="<output_file>"
+.RE
+.P
+Creating only the merged file can be done by passing the command line argument
+\fBmerge-blktrace-only\fR.
+.P
+Scaling traces can be done to see the relative impact of any particular trace
+being slowed down or sped up. \fBmerge_blktrace_scalars\fR takes in a colon
+separated list of percentage scalars. It is index paired with the files passed
+to \fBread_iolog\fR.
+.P
+With scaling, it may be desirable to match the running time of all traces.
+This can be done with \fBmerge_blktrace_iters\fR. It is index paired with
+\fBread_iolog\fR just like \fBmerge_blktrace_scalars\fR.
+.P
+In an example, given two traces, A and B, each 60s long. If we want to see
+the impact of trace A issuing IOs twice as fast and repeat trace A over the
+runtime of trace B, the following can be done:
+.RS
+.P
+$ fio \-\-read_iolog="<trace_a>:"<trace_b>" \-\-merge_blktrace_file"<output_file>" \-\-merge_blktrace_scalars="50:100" \-\-merge_blktrace_iters="2:1"
+.RE
+.P
+This runs trace A at 2x the speed twice for approximately the same runtime as
+a single run of trace B.
 .SH CPU IDLENESS PROFILING
 In some cases, we want to understand CPU overhead in a test. For example, we
 test patches for the specific goodness of whether they reduce CPU usage.
diff --git a/fio.h b/fio.h
index 9e99da194f4f53850ec4299027f88e8bd8048e0d..e394e165078a4fa3da53d397cae2791e8e9fe195 100644 (file)
--- a/fio.h
+++ b/fio.h
@@ -57,7 +57,9 @@
 /*
  * "local" is pseudo-policy
  */
-#define MPOL_LOCAL MPOL_MAX
+#ifndef MPOL_LOCAL
+#define MPOL_LOCAL 4
+#endif
 #endif
 
 #ifdef CONFIG_CUDA
@@ -503,7 +505,7 @@ enum {
 #define __fio_stringify_1(x)   #x
 #define __fio_stringify(x)     __fio_stringify_1(x)
 
-extern int exitall_on_terminate;
+extern bool exitall_on_terminate;
 extern unsigned int thread_number;
 extern unsigned int stat_number;
 extern int shm_id;
@@ -512,7 +514,7 @@ extern int output_format;
 extern int append_terse_output;
 extern int temp_stall_ts;
 extern uintptr_t page_mask, page_size;
-extern int read_only;
+extern bool read_only;
 extern int eta_print;
 extern int eta_new_line;
 extern unsigned int eta_interval_msec;
@@ -523,10 +525,10 @@ extern enum fio_cs fio_clock_source;
 extern int fio_clock_source_set;
 extern int warnings_fatal;
 extern int terse_version;
-extern int is_backend;
-extern int is_local_backend;
+extern bool is_backend;
+extern bool is_local_backend;
 extern int nr_clients;
-extern int log_syslog;
+extern bool log_syslog;
 extern int status_interval;
 extern const char fio_version_string[];
 extern char *trigger_file;
@@ -850,4 +852,7 @@ enum {
 extern void exec_trigger(const char *);
 extern void check_trigger_file(void);
 
+extern bool in_flight_overlap(struct io_u_queue *q, struct io_u *io_u);
+extern pthread_mutex_t overlap_check;
+
 #endif
index c0f26382721d0be7e01d413b8a33bcca6ac04921..7702193dc0b7eced79b5ade5b9a9d07553b9a10e 100644 (file)
--- a/gettime.c
+++ b/gettime.c
@@ -237,12 +237,11 @@ static unsigned long get_cycles_per_msec(void)
        c_s = get_cpu_clock();
        do {
                __fio_gettime(&e);
+               c_e = get_cpu_clock();
 
                elapsed = utime_since(&s, &e);
-               if (elapsed >= 1280) {
-                       c_e = get_cpu_clock();
+               if (elapsed >= 1280)
                        break;
-               }
        } while (1);
 
        fio_clock_source = old_cs;
diff --git a/gfio.c b/gfio.c
index f59238cd29c816e80b5952333e6794d377c47460..28053968515f99340f51667aaffa337bdf733ebb 100644 (file)
--- a/gfio.c
+++ b/gfio.c
@@ -38,7 +38,7 @@
 #include "gclient.h"
 #include "graph.h"
 
-static int gfio_server_running;
+static bool gfio_server_running;
 static unsigned int gfio_graph_limit = 100;
 
 GdkColor gfio_color_white;
@@ -461,10 +461,10 @@ static int send_job_file(struct gui_entry *ge)
 static void *server_thread(void *arg)
 {
        fio_server_create_sk_key();
-       is_backend = 1;
-       gfio_server_running = 1;
+       is_backend = true;
+       gfio_server_running = true;
        fio_start_server(NULL);
-       gfio_server_running = 0;
+       gfio_server_running = false;
        fio_server_destroy_sk_key();
        return NULL;
 }
@@ -472,7 +472,7 @@ static void *server_thread(void *arg)
 static void gfio_start_server(struct gui *ui)
 {
        if (!gfio_server_running) {
-               gfio_server_running = 1;
+               gfio_server_running = true;
                pthread_create(&ui->server_t, NULL, server_thread, NULL);
                pthread_detach(ui->server_t);
        }
diff --git a/init.c b/init.c
index 09f58a3538d4c4b28974bfc64b384ab111420e19..a2b70c4acb4ba51a7916eda28f90bbcf3cdb10a9 100644 (file)
--- a/init.c
+++ b/init.c
@@ -30,6 +30,7 @@
 #include "idletime.h"
 #include "filelock.h"
 #include "steadystate.h"
+#include "blktrace.h"
 
 #include "oslib/getopt.h"
 #include "oslib/strcasestr.h"
@@ -44,15 +45,16 @@ const char fio_version_string[] = FIO_VERSION;
 
 static char **ini_file;
 static int max_jobs = FIO_MAX_JOBS;
-static int dump_cmdline;
-static int parse_only;
+static bool dump_cmdline;
+static bool parse_only;
+static bool merge_blktrace_only;
 
 static struct thread_data def_thread;
 struct thread_data *threads = NULL;
 static char **job_sections;
 static int nr_job_sections;
 
-int exitall_on_terminate = 0;
+bool exitall_on_terminate = false;
 int output_format = FIO_OUTPUT_NORMAL;
 int eta_print = FIO_ETA_AUTO;
 unsigned int eta_interval_msec = 1000;
@@ -62,13 +64,13 @@ FILE *f_err = NULL;
 char *exec_profile = NULL;
 int warnings_fatal = 0;
 int terse_version = 3;
-int is_backend = 0;
-int is_local_backend = 0;
+bool is_backend = false;
+bool is_local_backend = false;
 int nr_clients = 0;
-int log_syslog = 0;
+bool log_syslog = false;
 
-int write_bw_log = 0;
-int read_only = 0;
+bool write_bw_log = false;
+bool read_only = false;
 int status_interval = 0;
 
 char *trigger_file = NULL;
@@ -286,6 +288,11 @@ static struct option l_opts[FIO_NR_OPTIONS] = {
                .has_arg        = required_argument,
                .val            = 'K',
        },
+       {
+               .name           = (char *) "merge-blktrace-only",
+               .has_arg        = no_argument,
+               .val            = 'A' | FIO_CLIENT_FLAG,
+       },
        {
                .name           = NULL,
        },
@@ -737,19 +744,12 @@ static int fixup_options(struct thread_data *td)
        /*
         * There's no need to check for in-flight overlapping IOs if the job
         * isn't changing data or the maximum iodepth is guaranteed to be 1
+        * when we are not in offload mode
         */
        if (o->serialize_overlap && !(td->flags & TD_F_READ_IOLOG) &&
-           (!(td_write(td) || td_trim(td)) || o->iodepth == 1))
+           (!(td_write(td) || td_trim(td)) || o->iodepth == 1) &&
+           o->io_submit_mode != IO_MODE_OFFLOAD)
                o->serialize_overlap = 0;
-       /*
-        * Currently can't check for overlaps in offload mode
-        */
-       if (o->serialize_overlap && o->io_submit_mode == IO_MODE_OFFLOAD) {
-               log_err("fio: checking for in-flight overlaps when the "
-                       "io_submit_mode is offload is not supported\n");
-               o->serialize_overlap = 0;
-               ret |= warnings_fatal;
-       }
 
        if (o->nr_files > td->files_index)
                o->nr_files = td->files_index;
@@ -1681,6 +1681,7 @@ static int add_job(struct thread_data *td, const char *jobname, int job_add_num,
                                char *c1, *c2, *c3, *c4;
                                char *c5 = NULL, *c6 = NULL;
                                int i2p = is_power_of_2(o->kb_base);
+                               struct buf_output out;
 
                                c1 = num2str(o->min_bs[DDIR_READ], o->sig_figs, 1, i2p, N2S_BYTE);
                                c2 = num2str(o->max_bs[DDIR_READ], o->sig_figs, 1, i2p, N2S_BYTE);
@@ -1692,19 +1693,22 @@ static int add_job(struct thread_data *td, const char *jobname, int job_add_num,
                                        c6 = num2str(o->max_bs[DDIR_TRIM], o->sig_figs, 1, i2p, N2S_BYTE);
                                }
 
-                               log_info("%s: (g=%d): rw=%s, ", td->o.name,
+                               buf_output_init(&out);
+                               __log_buf(&out, "%s: (g=%d): rw=%s, ", td->o.name,
                                                        td->groupid,
                                                        ddir_str(o->td_ddir));
 
                                if (o->bs_is_seq_rand)
-                                       log_info("bs=(R) %s-%s, (W) %s-%s, bs_is_seq_rand, ",
+                                       __log_buf(&out, "bs=(R) %s-%s, (W) %s-%s, bs_is_seq_rand, ",
                                                        c1, c2, c3, c4);
                                else
-                                       log_info("bs=(R) %s-%s, (W) %s-%s, (T) %s-%s, ",
+                                       __log_buf(&out, "bs=(R) %s-%s, (W) %s-%s, (T) %s-%s, ",
                                                        c1, c2, c3, c4, c5, c6);
 
-                               log_info("ioengine=%s, iodepth=%u\n",
+                               __log_buf(&out, "ioengine=%s, iodepth=%u\n",
                                                td->io_ops->name, o->iodepth);
+                               log_info_buf(out.buf, out.buflen);
+                               buf_output_free(&out);
 
                                free(c1);
                                free(c2);
@@ -1720,6 +1724,14 @@ static int add_job(struct thread_data *td, const char *jobname, int job_add_num,
        if (td_steadystate_init(td))
                goto err;
 
+       if (o->merge_blktrace_file && !merge_blktrace_iologs(td))
+               goto err;
+
+       if (merge_blktrace_only) {
+               put_job(td);
+               return 0;
+       }
+
        /*
         * recurse add identical jobs, clear numjobs and stonewall options
         * as they don't apply to sub-jobs
@@ -2169,6 +2181,7 @@ static void usage(const char *name)
        printf("  --debug=options\tEnable debug logging. May be one/more of:\n");
        show_debug_categories();
        printf("  --parse-only\t\tParse options only, don't start any IO\n");
+       printf("  --merge-blktrace-only\tMerge blktraces only, don't start any IO\n");
        printf("  --output\t\tWrite output to file\n");
        printf("  --bandwidth-log\tGenerate aggregate bandwidth logs\n");
        printf("  --minimal\t\tMinimal (terse) output\n");
@@ -2467,7 +2480,7 @@ int parse_cmd_line(int argc, char *argv[], int client_type)
        char *ostr = cmd_optstr;
        char *pid_file = NULL;
        void *cur_client = NULL;
-       int backend = 0;
+       bool backend = false;
 
        /*
         * Reset optind handling, since we may call this multiple times
@@ -2493,7 +2506,7 @@ int parse_cmd_line(int argc, char *argv[], int client_type)
                        exit_val = 1;
                        break;
                case 'b':
-                       write_bw_log = 1;
+                       write_bw_log = true;
                        break;
                case 'o': {
                        FILE *tmp;
@@ -2548,7 +2561,7 @@ int parse_cmd_line(int argc, char *argv[], int client_type)
                        break;
                case 's':
                        did_arg = true;
-                       dump_cmdline = 1;
+                       dump_cmdline = true;
                        break;
                case 'r':
                        read_only = 1;
@@ -2614,7 +2627,7 @@ int parse_cmd_line(int argc, char *argv[], int client_type)
                        break;
                case 'P':
                        did_arg = true;
-                       parse_only = 1;
+                       parse_only = true;
                        break;
                case 'x': {
                        size_t new_size;
@@ -2739,8 +2752,8 @@ int parse_cmd_line(int argc, char *argv[], int client_type)
                        }
                        if (optarg)
                                fio_server_set_arg(optarg);
-                       is_backend = 1;
-                       backend = 1;
+                       is_backend = true;
+                       backend = true;
 #else
                        log_err("fio: client/server requires SHM support\n");
                        do_exit++;
@@ -2885,6 +2898,11 @@ int parse_cmd_line(int argc, char *argv[], int client_type)
                        }
                        trigger_timeout /= 1000000;
                        break;
+
+               case 'A':
+                       did_arg = true;
+                       merge_blktrace_only = true;
+                       break;
                case '?':
                        log_err("%s: unrecognized option '%s'\n", argv[0],
                                                        argv[optind - 1]);
index ba02952b1f0f8d0c8ae9bebe31e091a0302046aa..47f606a75409c9a2679443ad02ba7f818e6f6ed4 100644 (file)
@@ -288,6 +288,8 @@ enum fio_q_status td_io_queue(struct thread_data *td, struct io_u *io_u)
 
        assert((io_u->flags & IO_U_F_FLIGHT) == 0);
        io_u_set(td, io_u, IO_U_F_FLIGHT);
+       if (td->o.serialize_overlap && td->o.io_submit_mode == IO_MODE_OFFLOAD)
+               pthread_mutex_unlock(&overlap_check);
 
        assert(fio_file_open(io_u->file));
 
diff --git a/iolog.c b/iolog.c
index f3eedb56252c9b177c2f75d2d50e9a99a2b59696..b72dcf9791cfa242ceace8b4080a03934e9fa554 100644 (file)
--- a/iolog.c
+++ b/iolog.c
@@ -566,7 +566,9 @@ static bool read_iolog2(struct thread_data *td)
 static bool is_socket(const char *path)
 {
        struct stat buf;
-       int r = stat(path, &buf);
+       int r;
+
+       r = stat(path, &buf);
        if (r == -1)
                return false;
 
@@ -575,16 +577,25 @@ static bool is_socket(const char *path)
 
 static int open_socket(const char *path)
 {
-       int fd = socket(AF_UNIX, SOCK_STREAM, 0);
        struct sockaddr_un addr;
+       int ret, fd;
+
+       fd = socket(AF_UNIX, SOCK_STREAM, 0);
        if (fd < 0)
                return fd;
+
        addr.sun_family = AF_UNIX;
-       strncpy(addr.sun_path, path, sizeof(addr.sun_path));
-       if (connect(fd, (const struct sockaddr *)&addr, strlen(path) + sizeof(addr.sun_family)) == 0)
+       if (snprintf(addr.sun_path, sizeof(addr.sun_path), "%s", path) >=
+           sizeof(addr.sun_path)) {
+               log_err("%s: path name %s is too long for a Unix socket\n",
+                       __func__, path);
+       }
+
+       ret = connect(fd, (const struct sockaddr *)&addr, strlen(path) + sizeof(addr.sun_family));
+       if (!ret)
                return fd;
-       else
-               close(fd);
+
+       close(fd);
        return -1;
 }
 
@@ -593,20 +604,23 @@ static int open_socket(const char *path)
  */
 static bool init_iolog_read(struct thread_data *td)
 {
-       char buffer[256], *p;
+       char buffer[256], *p, *fname;
        FILE *f = NULL;
-       bool ret;
-       char* fname = get_name_by_idx(td->o.read_iolog_file, td->subjob_number);
+
+       fname = get_name_by_idx(td->o.read_iolog_file, td->subjob_number);
        dprint(FD_IO, "iolog: name=%s\n", fname);
 
        if (is_socket(fname)) {
-               int fd = open_socket(fname);
-               if (fd >= 0) {
+               int fd;
+
+               fd = open_socket(fname);
+               if (fd >= 0)
                        f = fdopen(fd, "r");
-               }
        } else
                f = fopen(fname, "r");
+
        free(fname);
+
        if (!f) {
                perror("fopen read iolog");
                return false;
@@ -619,21 +633,20 @@ static bool init_iolog_read(struct thread_data *td)
                fclose(f);
                return false;
        }
-       td->io_log_rfile = f;
+
        /*
         * version 2 of the iolog stores a specific string as the
         * first line, check for that
         */
        if (!strncmp(iolog_ver2, buffer, strlen(iolog_ver2))) {
                free_release_files(td);
-               ret = read_iolog2(td);
-       }
-       else {
-               log_err("fio: iolog version 1 is no longer supported\n");
-               ret = false;
+               td->io_log_rfile = f;
+               return read_iolog2(td);
        }
 
-       return ret;
+       log_err("fio: iolog version 1 is no longer supported\n");
+       fclose(f);
+       return false;
 }
 
 /*
index 03e712f53be7a7623f90b12a3eecf57d73fc6f62..27301bd8465848d3cf8af57fb4c28b1078084742 100644 (file)
@@ -110,7 +110,7 @@ void axmap_free(struct axmap *axmap)
 }
 
 /* Allocate memory for a set that can store the numbers 0 .. @nr_bits - 1. */
-struct axmap *axmap_new(unsigned long nr_bits)
+struct axmap *axmap_new(uint64_t nr_bits)
 {
        struct axmap *axmap;
        unsigned int i, levels;
@@ -135,13 +135,14 @@ struct axmap *axmap_new(unsigned long nr_bits)
        for (i = 0; i < axmap->nr_levels; i++) {
                struct axmap_level *al = &axmap->levels[i];
 
+               nr_bits = (nr_bits + BLOCKS_PER_UNIT - 1) >> UNIT_SHIFT;
+
                al->level = i;
-               al->map_size = (nr_bits + BLOCKS_PER_UNIT - 1) >> UNIT_SHIFT;
+               al->map_size = nr_bits;
                al->map = malloc(al->map_size * sizeof(unsigned long));
                if (!al->map)
                        goto free_levels;
 
-               nr_bits = (nr_bits + BLOCKS_PER_UNIT - 1) >> UNIT_SHIFT;
        }
 
        axmap_reset(axmap);
@@ -164,7 +165,7 @@ free_axmap:
  * returns true.
  */
 static bool axmap_handler(struct axmap *axmap, uint64_t bit_nr,
-                         bool (*func)(struct axmap_level *, unsigned long, unsigned int,
+                         bool (*func)(struct axmap_level *, uint64_t, unsigned int,
                          void *), void *data)
 {
        struct axmap_level *al;
@@ -193,12 +194,12 @@ static bool axmap_handler(struct axmap *axmap, uint64_t bit_nr,
  * returns true.
  */
 static bool axmap_handler_topdown(struct axmap *axmap, uint64_t bit_nr,
-       bool (*func)(struct axmap_level *, unsigned long, unsigned int, void *))
+       bool (*func)(struct axmap_level *, uint64_t, unsigned int, void *))
 {
        int i;
 
        for (i = axmap->nr_levels - 1; i >= 0; i--) {
-               unsigned long index = bit_nr >> (UNIT_SHIFT * i);
+               uint64_t index = bit_nr >> (UNIT_SHIFT * i);
                unsigned long offset = index >> UNIT_SHIFT;
                unsigned int bit = index & BLOCKS_PER_UNIT_MASK;
 
@@ -219,7 +220,7 @@ struct axmap_set_data {
  * the boundary of the element at offset @offset. Return the number of bits
  * that have been set in @__data->set_bits if @al->level == 0.
  */
-static bool axmap_set_fn(struct axmap_level *al, unsigned long offset,
+static bool axmap_set_fn(struct axmap_level *al, uint64_t offset,
                         unsigned int bit, void *__data)
 {
        struct axmap_set_data *data = __data;
@@ -321,10 +322,10 @@ unsigned int axmap_set_nr(struct axmap *axmap, uint64_t bit_nr,
        return set_bits;
 }
 
-static bool axmap_isset_fn(struct axmap_level *al, unsigned long offset,
+static bool axmap_isset_fn(struct axmap_level *al, uint64_t offset,
                           unsigned int bit, void *unused)
 {
-       return (al->map[offset] & (1UL << bit)) != 0;
+       return (al->map[offset] & (1ULL << bit)) != 0;
 }
 
 bool axmap_isset(struct axmap *axmap, uint64_t bit_nr)
index 55349d8731f2e4edfcc01f7aad025e309782acf6..aa5976898c114fb8870865db753200a6140cc978 100644 (file)
@@ -5,7 +5,7 @@
 #include "types.h"
 
 struct axmap;
-struct axmap *axmap_new(unsigned long nr_bits);
+struct axmap *axmap_new(uint64_t nr_bits);
 void axmap_free(struct axmap *bm);
 
 void axmap_set(struct axmap *axmap, uint64_t bit_nr);
index a4f1fb13b64f7b1087970bba0e9c58afafd002db..49e34a8cfa9e3227d75342438fefbaae8804ae85 100644 (file)
@@ -78,7 +78,7 @@ static uint8_t lfsr_taps[64][FIO_MAX_TAPS] =
 
 #define __LFSR_NEXT(__fl, __v)                                         \
        __v = ((__v >> 1) | __fl->cached_bit) ^                 \
-                       (((__v & 1UL) - 1UL) & __fl->xormask);
+                       (((__v & 1ULL) - 1ULL) & __fl->xormask);
 
 static inline void __lfsr_next(struct fio_lfsr *fl, unsigned int spin)
 {
@@ -146,7 +146,7 @@ static uint64_t lfsr_create_xormask(uint8_t *taps)
        uint64_t xormask = 0;
 
        for(i = 0; i < FIO_MAX_TAPS && taps[i] != 0; i++)
-               xormask |= 1UL << (taps[i] - 1);
+               xormask |= 1ULL << (taps[i] - 1);
 
        return xormask;
 }
@@ -161,7 +161,7 @@ static uint8_t *find_lfsr(uint64_t size)
         * take that into account.
         */
        for (i = 3; i < 64; i++)
-               if ((1UL << i) > size)
+               if ((1ULL << i) > size)
                        return lfsr_taps[i];
 
        return NULL;
@@ -241,7 +241,7 @@ int lfsr_init(struct fio_lfsr *fl, uint64_t nums, unsigned long seed,
 
        fl->max_val = nums - 1;
        fl->xormask = lfsr_create_xormask(taps);
-       fl->cached_bit = 1UL << (taps[0] - 1);
+       fl->cached_bit = 1ULL << (taps[0] - 1);
 
        if (prepare_spin(fl, spin))
                return 1;
index 40fb3aec923d3ce6199b64d0cf4c791c92e2151a..1abe22f33794c0ccf6b724e74aef7a1f62271166 100644 (file)
@@ -30,7 +30,7 @@ char *num2str(uint64_t num, int maxlen, int base, int pow2, enum n2s_unit units)
                [N2S_BYTEPERSEC]= "B/s",
                [N2S_BITPERSEC] = "bit/s"
        };
-       const unsigned int thousand[] = { 1000, 1024 };
+       const unsigned int thousand = pow2 ? 1024 : 1000;
        unsigned int modulo;
        int post_index, carry = 0;
        char tmp[32], fmt[32];
@@ -49,7 +49,7 @@ char *num2str(uint64_t num, int maxlen, int base, int pow2, enum n2s_unit units)
                unitprefix = sistr;
 
        for (post_index = 0; base > 1; post_index++)
-               base /= thousand[!!pow2];
+               base /= thousand;
 
        switch (units) {
        case N2S_NONE:
@@ -72,14 +72,14 @@ char *num2str(uint64_t num, int maxlen, int base, int pow2, enum n2s_unit units)
         * Divide by K/Ki until string length of num <= maxlen.
         */
        modulo = -1U;
-       while (post_index < sizeof(sistr)) {
+       while (post_index < ARRAY_SIZE(sistr)) {
                sprintf(tmp, "%llu", (unsigned long long) num);
                if (strlen(tmp) <= maxlen)
                        break;
 
-               modulo = num % thousand[!!pow2];
-               num /= thousand[!!pow2];
-               carry = modulo >= thousand[!!pow2] / 2;
+               modulo = num % thousand;
+               num /= thousand;
+               carry = modulo >= thousand / 2;
                post_index++;
        }
 
@@ -110,9 +110,9 @@ done:
         * Fill in everything and return the result.
         */
        assert(maxlen - strlen(tmp) - 1 > 0);
-       assert(modulo < thousand[!!pow2]);
+       assert(modulo < thousand);
        sprintf(fmt, "%%.%df", (int)(maxlen - strlen(tmp) - 1));
-       sprintf(tmp, fmt, (double)modulo / (double)thousand[!!pow2]);
+       sprintf(tmp, fmt, (double)modulo / (double)thousand);
 
        sprintf(buf, "%llu.%s%s%s", (unsigned long long) num, &tmp[2],
                        unitprefix[post_index], unitstr[units]);
index 46ffe4fb0c6b9108685254cbe7f110983eb27088..99846a8d0fdd553e9fa3faea39f73c6971da13ac 100644 (file)
@@ -156,7 +156,7 @@ void __fill_random_buf_percentage(unsigned long seed, void *buf,
                /*
                 * Fill random chunk
                 */
-               this_len = (segment * (100 - percentage)) / 100;
+               this_len = ((unsigned long long)segment * (100 - percentage)) / 100;
                if (this_len > len)
                        this_len = len;
 
index 1ff8568094dcecc34ad8350b9f364051de756dd5..321a4fb9645e8995e1145758ec75530aeea75a16 100644 (file)
@@ -8,7 +8,7 @@
 
 static void zipf_update(struct zipf_state *zs)
 {
-       unsigned long to_gen;
+       uint64_t to_gen;
        unsigned int i;
 
        /*
@@ -22,7 +22,7 @@ static void zipf_update(struct zipf_state *zs)
                zs->zetan += pow(1.0 / (double) (i + 1), zs->theta);
 }
 
-static void shared_rand_init(struct zipf_state *zs, unsigned long nranges,
+static void shared_rand_init(struct zipf_state *zs, uint64_t nranges,
                             unsigned int seed)
 {
        memset(zs, 0, sizeof(*zs));
@@ -32,7 +32,7 @@ static void shared_rand_init(struct zipf_state *zs, unsigned long nranges,
        zs->rand_off = __rand(&zs->rand);
 }
 
-void zipf_init(struct zipf_state *zs, unsigned long nranges, double theta,
+void zipf_init(struct zipf_state *zs, uint64_t nranges, double theta,
               unsigned int seed)
 {
        shared_rand_init(zs, nranges, seed);
@@ -43,7 +43,7 @@ void zipf_init(struct zipf_state *zs, unsigned long nranges, double theta,
        zipf_update(zs);
 }
 
-unsigned long long zipf_next(struct zipf_state *zs)
+uint64_t zipf_next(struct zipf_state *zs)
 {
        double alpha, eta, rand_uni, rand_z;
        unsigned long long n = zs->nranges;
@@ -70,14 +70,14 @@ unsigned long long zipf_next(struct zipf_state *zs)
        return (val + zs->rand_off) % zs->nranges;
 }
 
-void pareto_init(struct zipf_state *zs, unsigned long nranges, double h,
+void pareto_init(struct zipf_state *zs, uint64_t nranges, double h,
                 unsigned int seed)
 {
        shared_rand_init(zs, nranges, seed);
        zs->pareto_pow = log(h) / log(1.0 - h);
 }
 
-unsigned long long pareto_next(struct zipf_state *zs)
+uint64_t pareto_next(struct zipf_state *zs)
 {
        double rand = (double) __rand(&zs->rand) / (double) FRAND32_MAX;
        unsigned long long n;
index a4aa163c80bc1dde7ad46e2f8ae85f32935d3456..16b65f57f146f0247581f52f2aee8fcf455b1694 100644 (file)
@@ -16,11 +16,11 @@ struct zipf_state {
        bool disable_hash;
 };
 
-void zipf_init(struct zipf_state *zs, unsigned long nranges, double theta, unsigned int seed);
-unsigned long long zipf_next(struct zipf_state *zs);
+void zipf_init(struct zipf_state *zs, uint64_t nranges, double theta, unsigned int seed);
+uint64_t zipf_next(struct zipf_state *zs);
 
-void pareto_init(struct zipf_state *zs, unsigned long nranges, double h, unsigned int seed);
-unsigned long long pareto_next(struct zipf_state *zs);
+void pareto_init(struct zipf_state *zs, uint64_t nranges, double h, unsigned int seed);
+uint64_t pareto_next(struct zipf_state *zs);
 void zipf_disable_hash(struct zipf_state *zs);
 
 #endif
index 534233bdbc297251d040f605a84b37d21d1b51be..98187def98fbe842e90ae6d4a85a9fb3f358fd10 100644 (file)
--- a/options.c
+++ b/options.c
@@ -482,7 +482,7 @@ static int str_rwmix_write_cb(void *data, unsigned long long *val)
 
 static int str_exitall_cb(void)
 {
-       exitall_on_terminate = 1;
+       exitall_on_terminate = true;
        return 0;
 }
 
@@ -1155,7 +1155,7 @@ static int str_steadystate_cb(void *data, const char *str)
  * is escaped with a '\', then that ':' is part of the filename and does not
  * indicate a new file.
  */
-static char *get_next_name(char **ptr)
+char *get_next_str(char **ptr)
 {
        char *str = *ptr;
        char *p, *start;
@@ -1197,14 +1197,14 @@ static char *get_next_name(char **ptr)
 }
 
 
-static int get_max_name_idx(char *input)
+int get_max_str_idx(char *input)
 {
        unsigned int cur_idx;
        char *str, *p;
 
        p = str = strdup(input);
        for (cur_idx = 0; ; cur_idx++)
-               if (get_next_name(&str) == NULL)
+               if (get_next_str(&str) == NULL)
                        break;
 
        free(p);
@@ -1224,9 +1224,9 @@ int set_name_idx(char *target, size_t tlen, char *input, int index,
 
        p = str = strdup(input);
 
-       index %= get_max_name_idx(input);
+       index %= get_max_str_idx(input);
        for (cur_idx = 0; cur_idx <= index; cur_idx++)
-               fname = get_next_name(&str);
+               fname = get_next_str(&str);
 
        if (client_sockaddr_str[0] && unique_filename) {
                len = snprintf(target, tlen, "%s/%s.", fname,
@@ -1247,9 +1247,9 @@ char* get_name_by_idx(char *input, int index)
 
        p = str = strdup(input);
 
-       index %= get_max_name_idx(input);
+       index %= get_max_str_idx(input);
        for (cur_idx = 0; cur_idx <= index; cur_idx++)
-               fname = get_next_name(&str);
+               fname = get_next_str(&str);
 
        fname = strdup(fname);
        free(p);
@@ -1273,7 +1273,7 @@ static int str_filename_cb(void *data, const char *input)
        if (!td->files_index)
                td->o.nr_files = 0;
 
-       while ((fname = get_next_name(&str)) != NULL) {
+       while ((fname = get_next_str(&str)) != NULL) {
                if (!strlen(fname))
                        break;
                add_file(td, fname, 0, 1);
@@ -1294,7 +1294,7 @@ static int str_directory_cb(void *data, const char fio_unused *unused)
                return 0;
 
        p = str = strdup(td->o.directory);
-       while ((dirname = get_next_name(&str)) != NULL) {
+       while ((dirname = get_next_str(&str)) != NULL) {
                if (lstat(dirname, &sb) < 0) {
                        ret = errno;
 
@@ -1828,11 +1828,6 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                            .help = "RDMA IO engine",
                          },
 #endif
-#ifdef CONFIG_FUSION_AW
-                         { .ival = "fusion-aw-sync",
-                           .help = "Fusion-io atomic write engine",
-                         },
-#endif
 #ifdef CONFIG_LINUX_EXT4_MOVE_EXTENT
                          { .ival = "e4defrag",
                            .help = "ext4 defrag engine",
@@ -2207,14 +2202,6 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                .category = FIO_OPT_C_IO,
                .group  = FIO_OPT_G_RANDOM,
        },
-       {
-               .name   = "use_os_rand",
-               .lname  = "Use OS random",
-               .type   = FIO_OPT_DEPRECATED,
-               .off1   = offsetof(struct thread_options, dep_use_os_rand),
-               .category = FIO_OPT_C_IO,
-               .group  = FIO_OPT_G_RANDOM,
-       },
        {
                .name   = "norandommap",
                .lname  = "No randommap",
@@ -3203,6 +3190,35 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                .category = FIO_OPT_C_IO,
                .group  = FIO_OPT_G_IOLOG,
        },
+       {
+               .name   = "merge_blktrace_file",
+               .lname  = "Merged blktrace output filename",
+               .type   = FIO_OPT_STR_STORE,
+               .off1   = offsetof(struct thread_options, merge_blktrace_file),
+               .help   = "Merged blktrace output filename",
+               .category = FIO_OPT_C_IO,
+               .group = FIO_OPT_G_IOLOG,
+       },
+       {
+               .name   = "merge_blktrace_scalars",
+               .lname  = "Percentage to scale each trace",
+               .type   = FIO_OPT_FLOAT_LIST,
+               .off1   = offsetof(struct thread_options, merge_blktrace_scalars),
+               .maxlen = FIO_IO_U_LIST_MAX_LEN,
+               .help   = "Percentage to scale each trace",
+               .category = FIO_OPT_C_IO,
+               .group  = FIO_OPT_G_IOLOG,
+       },
+       {
+               .name   = "merge_blktrace_iters",
+               .lname  = "Number of iterations to run per trace",
+               .type   = FIO_OPT_FLOAT_LIST,
+               .off1   = offsetof(struct thread_options, merge_blktrace_iters),
+               .maxlen = FIO_IO_U_LIST_MAX_LEN,
+               .help   = "Number of iterations to run per trace",
+               .category = FIO_OPT_C_IO,
+               .group  = FIO_OPT_G_IOLOG,
+       },
        {
                .name   = "exec_prerun",
                .lname  = "Pre-execute runnable",
index 8fdd1363277500169b087ed79e06fc74d52fa24e..5276f31e6818673a338fcbc3ef18997263b0abd0 100644 (file)
--- a/options.h
+++ b/options.h
@@ -16,6 +16,8 @@ void add_opt_posval(const char *, const char *, const char *);
 void del_opt_posval(const char *, const char *);
 struct thread_data;
 void fio_options_free(struct thread_data *);
+char *get_next_str(char **ptr);
+int get_max_str_idx(char *input);
 char* get_name_by_idx(char *input, int index);
 int set_name_idx(char *, size_t, char *, int, bool);
 
index e204d6f27f270f1e63a06809fb4fdaffb2cbe1e4..1aab96e08d40ad9656eaa41ba1626a348bfa0d52 100644 (file)
@@ -11,7 +11,6 @@
 #include "../file.h"
 
 #define FIO_HAVE_ODIRECT
-#define FIO_USE_GENERIC_RAND
 #define FIO_USE_GENERIC_INIT_RANDOM_STATE
 
 #define OS_MAP_ANON            MAP_ANON
index 1483275e9e631aa751c690995308085a3a47fbca..3c050776249c426a75834e3cb244f3a844cc09aa 100644 (file)
@@ -201,23 +201,6 @@ static inline unsigned long long os_phys_mem(void)
        return (unsigned long long) pages * (unsigned long long) pagesize;
 }
 
-typedef struct { unsigned short r[3]; } os_random_state_t;
-
-static inline void os_random_seed(unsigned long seed, os_random_state_t *rs)
-{
-       rs->r[0] = seed & 0xffff;
-       seed >>= 16;
-       rs->r[1] = seed & 0xffff;
-       seed >>= 16;
-       rs->r[2] = seed & 0xffff;
-       seed48(rs->r);
-}
-
-static inline long os_random_long(os_random_state_t *rs)
-{
-       return nrand48(rs->r);
-}
-
 #ifdef O_NOATIME
 #define FIO_O_NOATIME  O_NOATIME
 #else
index e80ad8cd906ea72b4bde0b66b7bc7f2a462487ba..eb92521fa5ed4edab58134f1dbdff17c88fc26a2 100644 (file)
@@ -25,7 +25,6 @@
 #include "../lib/types.h"
 
 #define FIO_HAVE_ODIRECT
-#define FIO_USE_GENERIC_RAND
 #define FIO_USE_GENERIC_INIT_RANDOM_STATE
 #define FIO_HAVE_FS_STAT
 #define FIO_HAVE_TRIM
index 97bc8ae5f4e2658d99d590d90f2919098df7ed96..789da178afa1671bd788be57964c4368ef46ee23 100644 (file)
@@ -16,7 +16,6 @@
 #include "../file.h"
 
 #define FIO_HAVE_ODIRECT
-#define FIO_USE_GENERIC_RAND
 #define FIO_USE_GENERIC_INIT_RANDOM_STATE
 #define FIO_HAVE_CHARDEV_SIZE
 #define FIO_HAVE_FS_STAT
index 515a5256878a1d57f6f2fb651f05c2fa66b2af7b..c1dafe42ee55c0bf2317304daa828e138691e312 100644 (file)
@@ -20,7 +20,6 @@
 #include "../file.h"
 
 #define FIO_HAVE_ODIRECT
-#define FIO_USE_GENERIC_RAND
 #define FIO_USE_GENERIC_INIT_RANDOM_STATE
 #define FIO_HAVE_CHARDEV_SIZE
 
index 6b63d123956afa6f28f20b3e7ff32e583f652e99..ba58bf7d962380413f15441d259eea8573f3c26e 100644 (file)
@@ -60,8 +60,6 @@
 
 typedef cpu_set_t os_cpu_mask_t;
 
-typedef struct drand48_data os_random_state_t;
-
 #ifdef CONFIG_3ARG_AFFINITY
 #define fio_setaffinity(pid, cpumask)          \
        sched_setaffinity((pid), sizeof(cpumask), &(cpumask))
@@ -170,19 +168,6 @@ static inline unsigned long long os_phys_mem(void)
        return (unsigned long long) pages * (unsigned long long) pagesize;
 }
 
-static inline void os_random_seed(unsigned long seed, os_random_state_t *rs)
-{
-       srand48_r(seed, rs);
-}
-
-static inline long os_random_long(os_random_state_t *rs)
-{
-       long val;
-
-       lrand48_r(rs, &val);
-       return val;
-}
-
 static inline int fio_lookup_raw(dev_t dev, int *majdev, int *mindev)
 {
        struct raw_config_request rq;
index 92a60ee98766f0fc1526cd440e38a687e6cc02ae..0b9c870761df96677a90a80f183a1c850fec2a36 100644 (file)
@@ -16,7 +16,6 @@
 
 #include "../file.h"
 
-#define FIO_USE_GENERIC_RAND
 #define FIO_USE_GENERIC_INIT_RANDOM_STATE
 #define FIO_HAVE_GETTID
 #define FIO_HAVE_CHARDEV_SIZE
index 682a11c95f1ccb91df421e424923dbd2c9703761..c06261d4a2a8407d7d7640df6c19d38a4e9f60a3 100644 (file)
@@ -21,7 +21,6 @@
 #include "../file.h"
 
 #define FIO_HAVE_ODIRECT
-#define FIO_USE_GENERIC_RAND
 #define FIO_USE_GENERIC_INIT_RANDOM_STATE
 #define FIO_HAVE_FS_STAT
 #define FIO_HAVE_GETTID
index b4c02c9bf236803227cdb053f35975bb2b6f26d6..70f58b49ce3572a9f24b414a998ed79e6ab32a49 100644 (file)
@@ -20,7 +20,6 @@
 
 #include "../file.h"
 
-#define FIO_USE_GENERIC_RAND
 #define FIO_USE_GENERIC_INIT_RANDOM_STATE
 #define FIO_HAVE_FS_STAT
 #define FIO_HAVE_GETTID
index 2425ab9e0efd9b265e15bc24fa575fe78609ec26..1a411af664e8de947fc0497f59a12648e8d60fe0 100644 (file)
@@ -47,7 +47,6 @@ struct solaris_rand_seed {
 #define FIO_OS_HAS_CTIME_R
 
 typedef psetid_t os_cpu_mask_t;
-typedef struct solaris_rand_seed os_random_state_t;
 
 static inline int chardev_size(struct fio_file *f, unsigned long long *bytes)
 {
@@ -92,21 +91,6 @@ static inline unsigned long long get_fs_free_size(const char *path)
        return ret;
 }
 
-static inline void os_random_seed(unsigned long seed, os_random_state_t *rs)
-{
-       rs->r[0] = seed & 0xffff;
-       seed >>= 16;
-       rs->r[1] = seed & 0xffff;
-       seed >>= 16;
-       rs->r[2] = seed & 0xffff;
-       seed48(rs->r);
-}
-
-static inline long os_random_long(os_random_state_t *rs)
-{
-       return nrand48(rs->r);
-}
-
 #define FIO_OS_DIRECTIO
 extern int directio(int, int);
 static inline int fio_set_odirect(struct fio_file *f)
index aad446e7b71da5f6c8a5846cfe9e8eae5386e5f7..ef955dc36f4fd18fe1cab5f7ffc38da750e1352b 100644 (file)
@@ -35,7 +35,6 @@ int rand_r(unsigned *);
 #define FIO_HAVE_CPU_AFFINITY
 #define FIO_HAVE_CHARDEV_SIZE
 #define FIO_HAVE_GETTID
-#define FIO_USE_GENERIC_RAND
 
 #define FIO_PREFERRED_ENGINE           "windowsaio"
 #define FIO_PREFERRED_CLOCK_SOURCE     CS_CGETTIME
diff --git a/os/os.h b/os/os.h
index becc41033e4a8c8db674abf78b2c9ed42f003c69..0b182c4a6058a5e4e881538ddbb0de55f02a7ca7 100644 (file)
--- a/os/os.h
+++ b/os/os.h
@@ -298,23 +298,6 @@ static inline int blockdev_size(struct fio_file *f, unsigned long long *bytes)
 }
 #endif
 
-#ifdef FIO_USE_GENERIC_RAND
-typedef unsigned int os_random_state_t;
-
-static inline void os_random_seed(unsigned long seed, os_random_state_t *rs)
-{
-       srand(seed);
-}
-
-static inline long os_random_long(os_random_state_t *rs)
-{
-       long val;
-
-       val = rand_r(rs);
-       return val;
-}
-#endif
-
 #ifdef FIO_USE_GENERIC_INIT_RANDOM_STATE
 static inline int init_random_seeds(unsigned long *rand_seeds, int size)
 {
index e8580d91c6db9203b7576f5f953e20a8368b8458..9308ba8be829c62b88cb06470a068cc2aef3f7dc 100755 (executable)
                   <File Source="..\..\examples\filecreate-ioengine.fio" />
                 </Component>
                 <Component>
-                  <File Source="..\..\examples\fio-rand-read.job" />
+                  <File Source="..\..\examples\fio-rand-read.fio" />
                 </Component>
                 <Component>
-                  <File Source="..\..\examples\fio-rand-RW.job" />
+                  <File Source="..\..\examples\fio-rand-RW.fio" />
                 </Component>
                 <Component>
-                  <File Source="..\..\examples\fio-rand-write.job" />
+                  <File Source="..\..\examples\fio-rand-write.fio" />
                 </Component>
                 <Component>
-                  <File Source="..\..\examples\fio-seq-read.job" />
+                  <File Source="..\..\examples\fio-seq-read.fio" />
                 </Component>
                 <Component>
-                  <File Source="..\..\examples\fio-seq-RW.job" />
+                  <File Source="..\..\examples\fio-seq-RW.fio" />
                 </Component>
                 <Component>
-                  <File Source="..\..\examples\fio-seq-write.job" />
+                  <File Source="..\..\examples\fio-seq-write.fio" />
                 </Component>
                 <Component>
                   <File Source="..\..\examples\fixed-rate-submission.fio" />
@@ -74,9 +74,6 @@
                 <Component>
                   <File Source="..\..\examples\ftruncate.fio" />
                 </Component>
-                <Component>
-                  <File Source="..\..\examples\fusion-aw-sync.fio" />
-                </Component>
                 <Component>
                   <File Source="..\..\examples\gfapi.fio" />
                 </Component>
             <ComponentRef Id="enospc_pressure.fio" />
             <ComponentRef Id="falloc.fio" />
             <ComponentRef Id="filecreate_ioengine.fio"/>
-            <ComponentRef Id="fio_rand_read.job"/>
-            <ComponentRef Id="fio_rand_RW.job"/>
-            <ComponentRef Id="fio_rand_write.job"/>
-            <ComponentRef Id="fio_seq_read.job"/>
-            <ComponentRef Id="fio_seq_RW.job"/>
-            <ComponentRef Id="fio_seq_write.job"/>
+            <ComponentRef Id="fio_rand_read.fio"/>
+            <ComponentRef Id="fio_rand_RW.fio"/>
+            <ComponentRef Id="fio_rand_write.fio"/>
+            <ComponentRef Id="fio_seq_read.fio"/>
+            <ComponentRef Id="fio_seq_RW.fio"/>
+            <ComponentRef Id="fio_seq_write.fio"/>
             <ComponentRef Id="fixed_rate_submission.fio" />
             <ComponentRef Id="flow.fio" />
             <ComponentRef Id="fsx.fio" />
             <ComponentRef Id="ftruncate.fio"/>
-            <ComponentRef Id="fusion_aw_sync.fio" />
             <ComponentRef Id="gfapi.fio" />
             <ComponentRef Id="gpudirect_rdmaio_client.fio"/>
             <ComponentRef Id="gpudirect_rdmaio_server.fio"/>
index d33250de0f85ab9f0c1ccb6b0df36feb1c805b17..fd1d5582f1b33c73b84c21f4fffec4c8aafd79b9 100644 (file)
@@ -29,87 +29,151 @@ extern unsigned long mtime_since_now(struct timespec *);
 extern void fio_gettime(struct timespec *, void *);
 
 /* These aren't defined in the MinGW headers */
-HRESULT WINAPI StringCchCopyA(
-  char *pszDest,
-  size_t cchDest,
-  const char *pszSrc);
-
-HRESULT WINAPI StringCchPrintfA(
-  char *pszDest,
-  size_t cchDest,
-  const char *pszFormat,
-  ...);
+HRESULT WINAPI StringCchCopyA(char *pszDest, size_t cchDest, const char *pszSrc);
+HRESULT WINAPI StringCchPrintfA(char *pszDest, size_t cchDest, const char *pszFormat, ...);
 
 int win_to_posix_error(DWORD winerr)
 {
-       switch (winerr)
-       {
-       case ERROR_FILE_NOT_FOUND:              return ENOENT;
-       case ERROR_PATH_NOT_FOUND:              return ENOENT;
-       case ERROR_ACCESS_DENIED:               return EACCES;
-       case ERROR_INVALID_HANDLE:              return EBADF;
-       case ERROR_NOT_ENOUGH_MEMORY:   return ENOMEM;
-       case ERROR_INVALID_DATA:                return EINVAL;
-       case ERROR_OUTOFMEMORY:                 return ENOMEM;
-       case ERROR_INVALID_DRIVE:               return ENODEV;
-       case ERROR_NOT_SAME_DEVICE:             return EXDEV;
-       case ERROR_WRITE_PROTECT:               return EROFS;
-       case ERROR_BAD_UNIT:                    return ENODEV;
-       case ERROR_SHARING_VIOLATION:   return EACCES;
-       case ERROR_LOCK_VIOLATION:              return EACCES;
-       case ERROR_SHARING_BUFFER_EXCEEDED:     return ENOLCK;
-       case ERROR_HANDLE_DISK_FULL:    return ENOSPC;
-       case ERROR_NOT_SUPPORTED:               return ENOSYS;
-       case ERROR_FILE_EXISTS:                 return EEXIST;
-       case ERROR_CANNOT_MAKE:                 return EPERM;
-       case ERROR_INVALID_PARAMETER:   return EINVAL;
-       case ERROR_NO_PROC_SLOTS:               return EAGAIN;
-       case ERROR_BROKEN_PIPE:                 return EPIPE;
-       case ERROR_OPEN_FAILED:                 return EIO;
-       case ERROR_NO_MORE_SEARCH_HANDLES:      return ENFILE;
-       case ERROR_CALL_NOT_IMPLEMENTED:        return ENOSYS;
-       case ERROR_INVALID_NAME:                return ENOENT;
-       case ERROR_WAIT_NO_CHILDREN:    return ECHILD;
-       case ERROR_CHILD_NOT_COMPLETE:  return EBUSY;
-       case ERROR_DIR_NOT_EMPTY:               return ENOTEMPTY;
-       case ERROR_SIGNAL_REFUSED:              return EIO;
-       case ERROR_BAD_PATHNAME:                return ENOENT;
-       case ERROR_SIGNAL_PENDING:              return EBUSY;
-       case ERROR_MAX_THRDS_REACHED:   return EAGAIN;
-       case ERROR_BUSY:                                return EBUSY;
-       case ERROR_ALREADY_EXISTS:              return EEXIST;
-       case ERROR_NO_SIGNAL_SENT:              return EIO;
-       case ERROR_FILENAME_EXCED_RANGE:        return EINVAL;
-       case ERROR_META_EXPANSION_TOO_LONG:     return EINVAL;
-       case ERROR_INVALID_SIGNAL_NUMBER:       return EINVAL;
-       case ERROR_THREAD_1_INACTIVE:   return EINVAL;
-       case ERROR_BAD_PIPE:                    return EINVAL;
-       case ERROR_PIPE_BUSY:                   return EBUSY;
-       case ERROR_NO_DATA:                             return EPIPE;
-       case ERROR_MORE_DATA:                   return EAGAIN;
-       case ERROR_DIRECTORY:                   return ENOTDIR;
-       case ERROR_PIPE_CONNECTED:              return EBUSY;
-       case ERROR_NO_TOKEN:                    return EINVAL;
-       case ERROR_PROCESS_ABORTED:             return EFAULT;
-       case ERROR_BAD_DEVICE:                  return ENODEV;
-       case ERROR_BAD_USERNAME:                return EINVAL;
-       case ERROR_OPEN_FILES:                  return EAGAIN;
-       case ERROR_ACTIVE_CONNECTIONS:  return EAGAIN;
-       case ERROR_DEVICE_IN_USE:               return EAGAIN;
-       case ERROR_INVALID_AT_INTERRUPT_TIME:   return EINTR;
-       case ERROR_IO_DEVICE:                   return EIO;
-       case ERROR_NOT_OWNER:                   return EPERM;
-       case ERROR_END_OF_MEDIA:                return ENOSPC;
-       case ERROR_EOM_OVERFLOW:                return ENOSPC;
-       case ERROR_BEGINNING_OF_MEDIA:  return ESPIPE;
-       case ERROR_SETMARK_DETECTED:    return ESPIPE;
-       case ERROR_NO_DATA_DETECTED:    return ENOSPC;
-       case ERROR_POSSIBLE_DEADLOCK:   return EDEADLOCK;
-       case ERROR_CRC:                                 return EIO;
-       case ERROR_NEGATIVE_SEEK:               return EINVAL;
-       case ERROR_DISK_FULL:                   return ENOSPC;
-       case ERROR_NOACCESS:                    return EFAULT;
-       case ERROR_FILE_INVALID:                return ENXIO;
+       switch (winerr) {
+       case ERROR_SUCCESS:
+               return 0;
+       case ERROR_FILE_NOT_FOUND:
+               return ENOENT;
+       case ERROR_PATH_NOT_FOUND:
+               return ENOENT;
+       case ERROR_ACCESS_DENIED:
+               return EACCES;
+       case ERROR_INVALID_HANDLE:
+               return EBADF;
+       case ERROR_NOT_ENOUGH_MEMORY:
+               return ENOMEM;
+       case ERROR_INVALID_DATA:
+               return EINVAL;
+       case ERROR_OUTOFMEMORY:
+               return ENOMEM;
+       case ERROR_INVALID_DRIVE:
+               return ENODEV;
+       case ERROR_NOT_SAME_DEVICE:
+               return EXDEV;
+       case ERROR_WRITE_PROTECT:
+               return EROFS;
+       case ERROR_BAD_UNIT:
+               return ENODEV;
+       case ERROR_NOT_READY:
+               return EAGAIN;
+       case ERROR_SHARING_VIOLATION:
+               return EACCES;
+       case ERROR_LOCK_VIOLATION:
+               return EACCES;
+       case ERROR_SHARING_BUFFER_EXCEEDED:
+               return ENOLCK;
+       case ERROR_HANDLE_DISK_FULL:
+               return ENOSPC;
+       case ERROR_NOT_SUPPORTED:
+               return ENOSYS;
+       case ERROR_FILE_EXISTS:
+               return EEXIST;
+       case ERROR_CANNOT_MAKE:
+               return EPERM;
+       case ERROR_INVALID_PARAMETER:
+               return EINVAL;
+       case ERROR_NO_PROC_SLOTS:
+               return EAGAIN;
+       case ERROR_BROKEN_PIPE:
+               return EPIPE;
+       case ERROR_OPEN_FAILED:
+               return EIO;
+       case ERROR_NO_MORE_SEARCH_HANDLES:
+               return ENFILE;
+       case ERROR_CALL_NOT_IMPLEMENTED:
+               return ENOSYS;
+       case ERROR_INVALID_NAME:
+               return ENOENT;
+       case ERROR_WAIT_NO_CHILDREN:
+               return ECHILD;
+       case ERROR_CHILD_NOT_COMPLETE:
+               return EBUSY;
+       case ERROR_DIR_NOT_EMPTY:
+               return ENOTEMPTY;
+       case ERROR_SIGNAL_REFUSED:
+               return EIO;
+       case ERROR_BAD_PATHNAME:
+               return ENOENT;
+       case ERROR_SIGNAL_PENDING:
+               return EBUSY;
+       case ERROR_MAX_THRDS_REACHED:
+               return EAGAIN;
+       case ERROR_BUSY:
+               return EBUSY;
+       case ERROR_ALREADY_EXISTS:
+               return EEXIST;
+       case ERROR_NO_SIGNAL_SENT:
+               return EIO;
+       case ERROR_FILENAME_EXCED_RANGE:
+               return EINVAL;
+       case ERROR_META_EXPANSION_TOO_LONG:
+               return EINVAL;
+       case ERROR_INVALID_SIGNAL_NUMBER:
+               return EINVAL;
+       case ERROR_THREAD_1_INACTIVE:
+               return EINVAL;
+       case ERROR_BAD_PIPE:
+               return EINVAL;
+       case ERROR_PIPE_BUSY:
+               return EBUSY;
+       case ERROR_NO_DATA:
+               return EPIPE;
+       case ERROR_MORE_DATA:
+               return EAGAIN;
+       case ERROR_DIRECTORY:
+               return ENOTDIR;
+       case ERROR_PIPE_CONNECTED:
+               return EBUSY;
+       case ERROR_NO_TOKEN:
+               return EINVAL;
+       case ERROR_PROCESS_ABORTED:
+               return EFAULT;
+       case ERROR_BAD_DEVICE:
+               return ENODEV;
+       case ERROR_BAD_USERNAME:
+               return EINVAL;
+       case ERROR_OPEN_FILES:
+               return EAGAIN;
+       case ERROR_ACTIVE_CONNECTIONS:
+               return EAGAIN;
+       case ERROR_DEVICE_IN_USE:
+               return EBUSY;
+       case ERROR_INVALID_AT_INTERRUPT_TIME:
+               return EINTR;
+       case ERROR_IO_DEVICE:
+               return EIO;
+       case ERROR_NOT_OWNER:
+               return EPERM;
+       case ERROR_END_OF_MEDIA:
+               return ENOSPC;
+       case ERROR_EOM_OVERFLOW:
+               return ENOSPC;
+       case ERROR_BEGINNING_OF_MEDIA:
+               return ESPIPE;
+       case ERROR_SETMARK_DETECTED:
+               return ESPIPE;
+       case ERROR_NO_DATA_DETECTED:
+               return ENOSPC;
+       case ERROR_POSSIBLE_DEADLOCK:
+               return EDEADLOCK;
+       case ERROR_CRC:
+               return EIO;
+       case ERROR_NEGATIVE_SEEK:
+               return EINVAL;
+       case ERROR_DISK_FULL:
+               return ENOSPC;
+       case ERROR_NOACCESS:
+               return EFAULT;
+       case ERROR_FILE_INVALID:
+               return ENXIO;
+       default:
+               log_err("fio: windows error %d not handled\n", winerr);
+               return EIO;
        }
 
        return winerr;
@@ -138,8 +202,7 @@ int GetNumLogicalProcessors(void)
                }
        }
 
-       for (i = 0; i < len / sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION); i++)
-       {
+       for (i = 0; i < len / sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION); i++) {
                if (processor_info[i].Relationship == RelationProcessorCore)
                        num_processors += hweight64(processor_info[i].ProcessorMask);
        }
@@ -155,8 +218,7 @@ long sysconf(int name)
        SYSTEM_INFO sysInfo;
        MEMORYSTATUSEX status;
 
-       switch (name)
-       {
+       switch (name) {
        case _SC_NPROCESSORS_ONLN:
                val = GetNumLogicalProcessors();
                if (val == -1)
@@ -226,29 +288,36 @@ char *dlerror(void)
 /* Copied from http://blogs.msdn.com/b/joshpoley/archive/2007/12/19/date-time-formats-and-conversions.aspx */
 void Time_tToSystemTime(time_t dosTime, SYSTEMTIME *systemTime)
 {
-    FILETIME utcFT;
-    LONGLONG jan1970;
+       FILETIME utcFT;
+       LONGLONG jan1970;
        SYSTEMTIME tempSystemTime;
 
-    jan1970 = Int32x32To64(dosTime, 10000000) + 116444736000000000;
-    utcFT.dwLowDateTime = (DWORD)jan1970;
-    utcFT.dwHighDateTime = jan1970 >> 32;
+       jan1970 = Int32x32To64(dosTime, 10000000) + 116444736000000000;
+       utcFT.dwLowDateTime = (DWORD)jan1970;
+       utcFT.dwHighDateTime = jan1970 >> 32;
 
-    FileTimeToSystemTime((FILETIME*)&utcFT, &tempSystemTime);
+       FileTimeToSystemTime((FILETIME*)&utcFT, &tempSystemTime);
        SystemTimeToTzSpecificLocalTime(NULL, &tempSystemTime, systemTime);
 }
 
-charctime_r(const time_t *t, char *buf)
+char *ctime_r(const time_t *t, char *buf)
 {
-    SYSTEMTIME systime;
-    const char * const dayOfWeek[] = { "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat" };
-    const char * const monthOfYear[] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
-
-    Time_tToSystemTime(*t, &systime);
-    /* We don't know how long `buf` is, but assume it's rounded up from the minimum of 25 to 32 */
-    StringCchPrintfA(buf, 31, "%s %s %d %02d:%02d:%02d %04d\n", dayOfWeek[systime.wDayOfWeek % 7], monthOfYear[(systime.wMonth - 1) % 12],
-                                                                                systime.wDay, systime.wHour, systime.wMinute, systime.wSecond, systime.wYear);
-    return buf;
+       SYSTEMTIME systime;
+       const char * const dayOfWeek[] = { "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat" };
+       const char * const monthOfYear[] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
+
+       Time_tToSystemTime(*t, &systime);
+
+       /*
+        * We don't know how long `buf` is, but assume it's rounded up from
+        * the minimum of 25 to 32
+        */
+       StringCchPrintfA(buf, 31, "%s %s %d %02d:%02d:%02d %04d\n",
+                               dayOfWeek[systime.wDayOfWeek % 7],
+                               monthOfYear[(systime.wMonth - 1) % 12],
+                               systime.wDay, systime.wHour, systime.wMinute,
+                               systime.wSecond, systime.wYear);
+       return buf;
 }
 
 int gettimeofday(struct timeval *restrict tp, void *restrict tzp)
@@ -275,8 +344,7 @@ int gettimeofday(struct timeval *restrict tp, void *restrict tzp)
        return 0;
 }
 
-int sigaction(int sig, const struct sigaction *act,
-               struct sigaction *oact)
+int sigaction(int sig, const struct sigaction *act, struct sigaction *oact)
 {
        int rc = 0;
        void (*prev_handler)(int);
@@ -291,13 +359,12 @@ int sigaction(int sig, const struct sigaction *act,
        return rc;
 }
 
-int lstat(const char * path, struct stat * buf)
+int lstat(const char *path, struct stat *buf)
 {
        return stat(path, buf);
 }
 
-void *mmap(void *addr, size_t len, int prot, int flags,
-               int fildes, off_t off)
+void *mmap(void *addr, size_t len, int prot, int flags, int fildes, off_t off)
 {
        DWORD vaProt = 0;
        DWORD mapAccess = 0;
@@ -323,25 +390,20 @@ void *mmap(void *addr, size_t len, int prot, int flags,
        lenhigh = len >> 16;
        /* If the low DWORD is zero and the high DWORD is non-zero, `CreateFileMapping`
           will return ERROR_INVALID_PARAMETER. To avoid this, set both to zero. */
-       if (lenlow == 0) {
+       if (lenlow == 0)
                lenhigh = 0;
-       }
 
-       if (flags & MAP_ANON || flags & MAP_ANONYMOUS)
-       {
+       if (flags & MAP_ANON || flags & MAP_ANONYMOUS) {
                allocAddr = VirtualAlloc(addr, len, MEM_COMMIT, vaProt);
                if (allocAddr == NULL)
                        errno = win_to_posix_error(GetLastError());
-       }
-       else
-       {
-               hMap = CreateFileMapping((HANDLE)_get_osfhandle(fildes), NULL, vaProt, lenhigh, lenlow, NULL);
+       } else {
+               hMap = CreateFileMapping((HANDLE)_get_osfhandle(fildes), NULL,
+                                               vaProt, lenhigh, lenlow, NULL);
 
                if (hMap != NULL)
-               {
-                       allocAddr = MapViewOfFile(hMap, mapAccess, off >> 16, off & 0xFFFF, len);
-               }
-
+                       allocAddr = MapViewOfFile(hMap, mapAccess, off >> 16,
+                                                       off & 0xFFFF, len);
                if (hMap == NULL || allocAddr == NULL)
                        errno = win_to_posix_error(GetLastError());
 
@@ -360,9 +422,7 @@ int munmap(void *addr, size_t len)
        success = UnmapViewOfFile(addr);
 
        if (!success)
-       {
                success = VirtualFree(addr, 0, MEM_RELEASE);
-       }
 
        return !success;
 }
@@ -390,8 +450,12 @@ static HANDLE log_file = INVALID_HANDLE_VALUE;
 
 void openlog(const char *ident, int logopt, int facility)
 {
-       if (log_file == INVALID_HANDLE_VALUE)
-               log_file = CreateFileA("syslog.txt", GENERIC_WRITE, FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_ALWAYS, 0, NULL);
+       if (log_file != INVALID_HANDLE_VALUE)
+               return;
+
+       log_file = CreateFileA("syslog.txt", GENERIC_WRITE,
+                               FILE_SHARE_READ | FILE_SHARE_WRITE, NULL,
+                               OPEN_ALWAYS, 0, NULL);
 }
 
 void closelog(void)
@@ -408,7 +472,9 @@ void syslog(int priority, const char *message, ... /* argument */)
        DWORD bytes_written;
 
        if (log_file == INVALID_HANDLE_VALUE) {
-               log_file = CreateFileA("syslog.txt", GENERIC_WRITE, FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_ALWAYS, 0, NULL);
+               log_file = CreateFileA("syslog.txt", GENERIC_WRITE,
+                                       FILE_SHARE_READ | FILE_SHARE_WRITE,
+                                       NULL, OPEN_ALWAYS, 0, NULL);
        }
 
        if (log_file == INVALID_HANDLE_VALUE) {
@@ -483,8 +549,7 @@ int clock_gettime(clockid_t clock_id, struct timespec *tp)
 {
        int rc = 0;
 
-       if (clock_id == CLOCK_MONOTONIC)
-       {
+       if (clock_id == CLOCK_MONOTONIC) {
                static LARGE_INTEGER freq = {{0,0}};
                LARGE_INTEGER counts;
                uint64_t t;
@@ -503,9 +568,7 @@ int clock_gettime(clockid_t clock_id, struct timespec *tp)
                 * and then divide by the frequency. */
                t *= 1000000000;
                tp->tv_nsec = t / freq.QuadPart;
-       }
-       else if (clock_id == CLOCK_REALTIME)
-       {
+       } else if (clock_id == CLOCK_REALTIME) {
                /* clock_gettime(CLOCK_REALTIME,...) is just an alias for gettimeofday with a
                 * higher-precision field. */
                struct timeval tv;
@@ -552,6 +615,7 @@ int mlock(const void * addr, size_t len)
 int munlock(const void * addr, size_t len)
 {
        BOOL success = VirtualUnlock((LPVOID)addr, len);
+
        if (!success) {
                errno = win_to_posix_error(GetLastError());
                return -1;
@@ -611,22 +675,26 @@ int shmget(key_t key, size_t size, int shmflg)
        int mapid = -1;
        uint32_t size_low = size & 0xFFFFFFFF;
        uint32_t size_high = ((uint64_t)size) >> 32;
-       HANDLE hMapping = CreateFileMapping(INVALID_HANDLE_VALUE, NULL, (PAGE_EXECUTE_READWRITE | SEC_RESERVE), size_high, size_low, NULL);
+       HANDLE hMapping;
+
+       hMapping = CreateFileMapping(INVALID_HANDLE_VALUE, NULL,
+                                       PAGE_EXECUTE_READWRITE | SEC_RESERVE,
+                                       size_high, size_low, NULL);
        if (hMapping != NULL) {
                fileMappings[nFileMappings] = hMapping;
                mapid = nFileMappings;
                nFileMappings++;
-       } else {
+       } else
                errno = ENOSYS;
-       }
 
        return mapid;
 }
 
 void *shmat(int shmid, const void *shmaddr, int shmflg)
 {
-       voidmapAddr;
+       void *mapAddr;
        MEMORY_BASIC_INFORMATION memInfo;
+
        mapAddr = MapViewOfFile(fileMappings[shmid], FILE_MAP_ALL_ACCESS, 0, 0, 0);
        if (mapAddr == NULL) {
                errno = win_to_posix_error(GetLastError());
@@ -662,9 +730,9 @@ int shmctl(int shmid, int cmd, struct shmid_ds *buf)
        if (cmd == IPC_RMID) {
                fileMappings[shmid] = INVALID_HANDLE_VALUE;
                return 0;
-       } else {
-               log_err("%s is not implemented\n", __func__);
        }
+
+       log_err("%s is not implemented\n", __func__);
        errno = ENOSYS;
        return -1;
 }
@@ -753,6 +821,7 @@ ssize_t pwrite(int fildes, const void *buf, size_t nbyte,
 {
        int64_t pos = _telli64(fildes);
        ssize_t len = _write(fildes, buf, nbyte);
+
        _lseeki64(fildes, pos, SEEK_SET);
        return len;
 }
@@ -761,6 +830,7 @@ ssize_t pread(int fildes, void *buf, size_t nbyte, off_t offset)
 {
        int64_t pos = _telli64(fildes);
        ssize_t len = read(fildes, buf, nbyte);
+
        _lseeki64(fildes, pos, SEEK_SET);
        return len;
 }
@@ -776,11 +846,12 @@ ssize_t writev(int fildes, const struct iovec *iov, int iovcnt)
 {
        int i;
        DWORD bytes_written = 0;
-       for (i = 0; i < iovcnt; i++)
-       {
-               int len = send((SOCKET)fildes, iov[i].iov_base, iov[i].iov_len, 0);
-               if (len == SOCKET_ERROR)
-               {
+
+       for (i = 0; i < iovcnt; i++) {
+               int len;
+
+               len = send((SOCKET)fildes, iov[i].iov_base, iov[i].iov_len, 0);
+               if (len == SOCKET_ERROR) {
                        DWORD err = GetLastError();
                        errno = win_to_posix_error(err);
                        bytes_written = -1;
@@ -792,8 +863,7 @@ ssize_t writev(int fildes, const struct iovec *iov, int iovcnt)
        return bytes_written;
 }
 
-long long strtoll(const char *restrict str, char **restrict endptr,
-               int base)
+long long strtoll(const char *restrict str, char **restrict endptr, int base)
 {
        return _strtoi64(str, endptr, base);
 }
@@ -816,8 +886,7 @@ int poll(struct pollfd fds[], nfds_t nfds, int timeout)
        FD_ZERO(&writefds);
        FD_ZERO(&exceptfds);
 
-       for (i = 0; i < nfds; i++)
-       {
+       for (i = 0; i < nfds; i++) {
                if (fds[i].fd < 0) {
                        fds[i].revents = 0;
                        continue;
@@ -834,11 +903,9 @@ int poll(struct pollfd fds[], nfds_t nfds, int timeout)
        rc = select(nfds, &readfds, &writefds, &exceptfds, to);
 
        if (rc != SOCKET_ERROR) {
-               for (i = 0; i < nfds; i++)
-               {
-                       if (fds[i].fd < 0) {
+               for (i = 0; i < nfds; i++) {
+                       if (fds[i].fd < 0)
                                continue;
-                       }
 
                        if ((fds[i].events & POLLIN) && FD_ISSET(fds[i].fd, &readfds))
                                fds[i].revents |= POLLIN;
@@ -884,9 +951,11 @@ int nanosleep(const struct timespec *rqtp, struct timespec *rmtp)
 DIR *opendir(const char *dirname)
 {
        struct dirent_ctx *dc = NULL;
+       HANDLE file;
 
        /* See if we can open it. If not, we'll return an error here */
-       HANDLE file = CreateFileA(dirname, 0, FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS, NULL);
+       file = CreateFileA(dirname, 0, FILE_SHARE_READ | FILE_SHARE_WRITE, NULL,
+                               OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS, NULL);
        if (file != INVALID_HANDLE_VALUE) {
                CloseHandle(file);
                dc = (struct dirent_ctx*)malloc(sizeof(struct dirent_ctx));
@@ -929,6 +998,7 @@ struct dirent *readdir(DIR *dirp)
 
        if (dirp->find_handle == INVALID_HANDLE_VALUE) {
                char search_pattern[MAX_PATH];
+
                StringCchPrintfA(search_pattern, MAX_PATH-1, "%s\\*", dirp->dirname);
                dirp->find_handle = FindFirstFileA(search_pattern, &find_data);
                if (dirp->find_handle == INVALID_HANDLE_VALUE)
@@ -960,8 +1030,8 @@ in_addr_t inet_network(const char *cp)
 }
 
 #ifdef CONFIG_WINDOWS_XP
-const char* inet_ntop(int af, const void *restrict src,
-               char *restrict dst, socklen_t size)
+const char *inet_ntop(int af, const void *restrict src, char *restrict dst,
+                     socklen_t size)
 {
        INT status = SOCKET_ERROR;
        WSADATA wsd;
@@ -977,6 +1047,7 @@ const char* inet_ntop(int af, const void *restrict src,
        if (af == AF_INET) {
                struct sockaddr_in si;
                DWORD len = size;
+
                memset(&si, 0, sizeof(si));
                si.sin_family = af;
                memcpy(&si.sin_addr, src, sizeof(si.sin_addr));
@@ -984,6 +1055,7 @@ const char* inet_ntop(int af, const void *restrict src,
        } else if (af == AF_INET6) {
                struct sockaddr_in6 si6;
                DWORD len = size;
+
                memset(&si6, 0, sizeof(si6));
                si6.sin6_family = af;
                memcpy(&si6.sin6_addr, src, sizeof(si6.sin6_addr));
@@ -1016,6 +1088,7 @@ int inet_pton(int af, const char *restrict src, void *restrict dst)
        if (af == AF_INET) {
                struct sockaddr_in si;
                INT len = sizeof(si);
+
                memset(&si, 0, sizeof(si));
                si.sin_family = af;
                status = WSAStringToAddressA((char*)src, af, NULL, (struct sockaddr*)&si, &len);
@@ -1024,6 +1097,7 @@ int inet_pton(int af, const char *restrict src, void *restrict dst)
        } else if (af == AF_INET6) {
                struct sockaddr_in6 si6;
                INT len = sizeof(si6);
+
                memset(&si6, 0, sizeof(si6));
                si6.sin6_family = af;
                status = WSAStringToAddressA((char*)src, af, NULL, (struct sockaddr*)&si6, &len);
diff --git a/parse.c b/parse.c
index 5d88d910e0ce6437ff302b469cb382eeffdb1e28..a7d4516e47028b9373c7012bd8b56877c8f493cf 100644 (file)
--- a/parse.c
+++ b/parse.c
@@ -506,6 +506,33 @@ static const char *opt_type_name(const struct fio_option *o)
        return "OPT_UNKNOWN?";
 }
 
+static bool val_too_large(const struct fio_option *o, unsigned long long val,
+                         bool is_uint)
+{
+       if (!o->maxval)
+               return false;
+
+       if (is_uint) {
+               if ((int) val < 0)
+                       return (int) val > (int) o->maxval;
+               return (unsigned int) val > o->maxval;
+       }
+
+       return val > o->maxval;
+}
+
+static bool val_too_small(const struct fio_option *o, unsigned long long val,
+                         bool is_uint)
+{
+       if (!o->minval)
+               return false;
+
+       if (is_uint)
+               return (int) val < o->minval;
+
+       return val < o->minval;
+}
+
 static int __handle_option(const struct fio_option *o, const char *ptr,
                           void *data, int first, int more, int curr)
 {
@@ -595,14 +622,14 @@ static int __handle_option(const struct fio_option *o, const char *ptr,
                        return 1;
                }
 
-               if (o->maxval && ull > o->maxval) {
-                       log_err("max value out of range: %llu"
-                                       " (%llu max)\n", ull, o->maxval);
+               if (val_too_large(o, ull, o->type == FIO_OPT_INT)) {
+                       log_err("%s: max value out of range: %llu"
+                               " (%llu max)\n", o->name, ull, o->maxval);
                        return 1;
                }
-               if (o->minval && ull < o->minval) {
-                       log_err("min value out of range: %lld"
-                                       " (%d min)\n", ull, o->minval);
+               if (val_too_small(o, ull, o->type == FIO_OPT_INT)) {
+                       log_err("%s: min value out of range: %lld"
+                               " (%d min)\n", o->name, ull, o->minval);
                        return 1;
                }
                if (o->posval[0].ival) {
index 5c77a4e8ecb83089ad2fb6a0322aafcbcab2bc4c..68ad755d948056c0ff9fabc0a26a14961f410a05 100644 (file)
@@ -9,6 +9,36 @@
 #include "lib/getrusage.h"
 #include "rate-submit.h"
 
+static void check_overlap(struct io_u *io_u)
+{
+       int i;
+       struct thread_data *td;
+       bool overlap = false;
+
+       do {
+               /*
+                * Allow only one thread to check for overlap at a
+                * time to prevent two threads from thinking the coast
+                * is clear and then submitting IOs that overlap with
+                * each other
+                */
+               pthread_mutex_lock(&overlap_check);
+               for_each_td(td, i) {
+                       if (td->runstate <= TD_SETTING_UP ||
+                               td->runstate >= TD_FINISHING ||
+                               !td->o.serialize_overlap ||
+                               td->o.io_submit_mode != IO_MODE_OFFLOAD)
+                               continue;
+
+                       overlap = in_flight_overlap(&td->io_u_all, io_u);
+                       if (overlap) {
+                               pthread_mutex_unlock(&overlap_check);
+                               break;
+                       }
+               }
+       } while (overlap);
+}
+
 static int io_workqueue_fn(struct submit_worker *sw,
                           struct workqueue_work *work)
 {
@@ -17,6 +47,9 @@ static int io_workqueue_fn(struct submit_worker *sw,
        struct thread_data *td = sw->priv;
        int ret;
 
+       if (td->o.serialize_overlap)
+               check_overlap(io_u);
+
        dprint(FD_RATE, "io_u %p queued by %u\n", io_u, gettid());
 
        io_u_set(td, io_u, IO_U_F_NO_FILE_PUT);
@@ -50,10 +83,6 @@ static int io_workqueue_fn(struct submit_worker *sw,
                ret = io_u_queued_complete(td, min_evts);
                if (ret > 0)
                        td->cur_depth -= ret;
-       } else if (ret == FIO_Q_BUSY) {
-               ret = io_u_queued_complete(td, td->cur_depth);
-               if (ret > 0)
-                       td->cur_depth -= ret;
        }
 
        return 0;
@@ -126,7 +155,7 @@ static int io_workqueue_init_worker_fn(struct submit_worker *sw)
        clear_io_state(td, 1);
 
        td_set_runstate(td, TD_RUNNING);
-       td->flags |= TD_F_CHILD;
+       td->flags |= TD_F_CHILD | TD_F_NEED_LOCK;
        td->parent = parent;
        return 0;
 
index b966c66cbfe342d476b8054e2665932004ed41ba..90d3396b62fd8a557304cdbef929c6a428febf33 100644 (file)
--- a/server.c
+++ b/server.c
@@ -28,7 +28,7 @@
 
 int fio_net_port = FIO_NET_PORT;
 
-int exit_backend = 0;
+bool exit_backend = false;
 
 enum {
        SK_F_FREE       = 1,
@@ -296,6 +296,8 @@ static int verify_convert_cmd(struct fio_net_cmd *cmd)
        if (crc != cmd->cmd_crc16) {
                log_err("fio: server bad crc on command (got %x, wanted %x)\n",
                                cmd->cmd_crc16, crc);
+               fprintf(f_err, "fio: server bad crc on command (got %x, wanted %x)\n",
+                               cmd->cmd_crc16, crc);
                return 1;
        }
 
@@ -310,6 +312,8 @@ static int verify_convert_cmd(struct fio_net_cmd *cmd)
                break;
        default:
                log_err("fio: bad server cmd version %d\n", cmd->version);
+               fprintf(f_err, "fio: client/server version mismatch (%d != %d)\n",
+                               cmd->version, FIO_SERVER_VER);
                return 1;
        }
 
@@ -991,7 +995,7 @@ static int handle_command(struct sk_out *sk_out, struct flist_head *job_list,
                ret = 0;
                break;
        case FIO_NET_CMD_EXIT:
-               exit_backend = 1;
+               exit_backend = true;
                return -1;
        case FIO_NET_CMD_LOAD_FILE:
                ret = handle_load_file_cmd(cmd);
@@ -2488,7 +2492,7 @@ void fio_server_got_signal(int signal)
                sk_out->sk = -1;
        else {
                log_info("\nfio: terminating on signal %d\n", signal);
-               exit_backend = 1;
+               exit_backend = true;
        }
 }
 
@@ -2570,7 +2574,7 @@ int fio_start_server(char *pidfile)
 
        setsid();
        openlog("fio", LOG_NDELAY|LOG_NOWAIT|LOG_PID, LOG_USER);
-       log_syslog = 1;
+       log_syslog = true;
        close(STDIN_FILENO);
        close(STDOUT_FILENO);
        close(STDERR_FILENO);
index 37d2f76a96560dbfa49e1ec23e5cc0c9c17ca493..371e51ea24e85206ac857a1f62bf623b8b044e9f 100644 (file)
--- a/server.h
+++ b/server.h
@@ -48,7 +48,7 @@ struct fio_net_cmd_reply {
 };
 
 enum {
-       FIO_SERVER_VER                  = 74,
+       FIO_SERVER_VER                  = 77,
 
        FIO_SERVER_MAX_FRAGMENT_PDU     = 1024,
        FIO_SERVER_MAX_CMD_MB           = 2048,
@@ -232,7 +232,7 @@ extern int fio_net_send_quit(int sk);
 extern int fio_server_create_sk_key(void);
 extern void fio_server_destroy_sk_key(void);
 
-extern int exit_backend;
+extern bool exit_backend;
 extern int fio_net_port;
 
 #endif
diff --git a/stat.c b/stat.c
index 1a9c553b12b744d72a1f25c28f6b7f1f53accfa0..ef9c4af2d81878a1ec6ec427cc804c18f7b07752 100644 (file)
--- a/stat.c
+++ b/stat.c
@@ -1059,10 +1059,16 @@ static void add_ddir_status_json(struct thread_stat *ts,
 
        if (ts->clat_percentiles || ts->lat_percentiles) {
                if (ddir_rw(ddir)) {
+                       uint64_t samples;
+
+                       if (ts->clat_percentiles)
+                               samples = ts->clat_stat[ddir].samples;
+                       else
+                               samples = ts->lat_stat[ddir].samples;
+
                        len = calc_clat_percentiles(ts->io_u_plat[ddir],
-                                       ts->clat_stat[ddir].samples,
-                                       ts->percentile_list, &ovals, &maxv,
-                                       &minv);
+                                       samples, ts->percentile_list, &ovals,
+                                       &maxv, &minv);
                } else {
                        len = calc_clat_percentiles(ts->io_u_sync_plat,
                                        ts->sync_stat.samples,
@@ -1928,8 +1934,6 @@ void __show_run_stats(void)
                if (is_backend) {
                        fio_server_send_job_options(opt_lists[i], i);
                        fio_server_send_ts(ts, rs);
-                       if (output_format & FIO_OUTPUT_TERSE)
-                               show_thread_status_terse(ts, rs, &output[__FIO_OUTPUT_TERSE]);
                } else {
                        if (output_format & FIO_OUTPUT_TERSE)
                                show_thread_status_terse(ts, rs, &output[__FIO_OUTPUT_TERSE]);
diff --git a/stat.h b/stat.h
index 98de281e2e1188387cb0ae9e887940c9abb7491b..b4ba71e3b0df3b9131e46132622e5cc4c88de78b 100644 (file)
--- a/stat.h
+++ b/stat.h
@@ -326,7 +326,7 @@ extern void add_sync_clat_sample(struct thread_stat *ts,
 extern int calc_log_samples(void);
 
 extern struct io_log *agg_io_log[DDIR_RWDIR_CNT];
-extern int write_bw_log;
+extern bool write_bw_log;
 
 static inline bool nsec_to_usec(unsigned long long *min,
                                unsigned long long *max, double *mean,
index a2e6fd6210636fbe1509764999e70963fb805094..9d6bdee5e8b7f7c18e3388caae096fe879bf1bb3 100644 (file)
--- a/t/axmap.c
+++ b/t/axmap.c
@@ -5,7 +5,7 @@
 #include "../lib/lfsr.h"
 #include "../lib/axmap.h"
 
-static int test_regular(size_t size, int seed)
+static int test_regular(uint64_t size, int seed)
 {
        struct fio_lfsr lfsr;
        struct axmap *map;
@@ -61,11 +61,11 @@ static int check_next_free(struct axmap *map, uint64_t start, uint64_t expected)
        return 0;
 }
 
-static int test_next_free(size_t size, int seed)
+static int test_next_free(uint64_t size, int seed)
 {
        struct fio_lfsr lfsr;
        struct axmap *map;
-       size_t osize;
+       uint64_t osize;
        uint64_t ff, lastfree;
        int err, i;
 
@@ -196,7 +196,7 @@ static int test_next_free(size_t size, int seed)
        return 0;
 }
 
-static int test_multi(size_t size, unsigned int bit_off)
+static int test_multi(uint64_t size, unsigned int bit_off)
 {
        unsigned int map_size = size;
        struct axmap *map;
@@ -395,7 +395,7 @@ static int test_overlap(void)
 
 int main(int argc, char *argv[])
 {
-       size_t size = (1UL << 23) - 200;
+       uint64_t size = (1ULL << 23) - 200;
        int seed = 1;
 
        if (argc > 1) {
diff --git a/t/jobs/t0010-b7aae4ba.fio b/t/jobs/t0010-b7aae4ba.fio
new file mode 100644 (file)
index 0000000..0223770
--- /dev/null
@@ -0,0 +1,8 @@
+# Expected result: fio runs and completes the job
+# Buggy result: fio segfaults
+#
+[test]
+ioengine=null
+size=10g
+io_submit_mode=offload
+iodepth=16
diff --git a/t/jobs/t0011-5d2788d5.fio b/t/jobs/t0011-5d2788d5.fio
new file mode 100644 (file)
index 0000000..09861f7
--- /dev/null
@@ -0,0 +1,18 @@
+# Expected results: no parse warnings, runs and with roughly 1/8 iops between
+#                      the two jobs.
+# Buggy result: parse warning on flow value overflow, no 1/8 division between
+                       jobs.
+#
+[global]
+bs=4k
+ioengine=null
+size=100g
+runtime=3
+flow_id=1
+
+[flow1]
+flow=-8
+rate_iops=1000
+
+[flow2]
+flow=1
index 95f9bf456b9a190ebde9461aebf378457bf316cd..173f0ca64a421dcb863e124b2effe5b42c0bd08c 100644 (file)
@@ -102,5 +102,8 @@ fio_written() {
 }
 
 fio_reset_count() {
-    sed -n 's/^.*write:[^;]*; \([0-9]*\) zone resets$/\1/p'
+    local count
+
+    count=$(sed -n 's/^.*write:[^;]*; \([0-9]*\) zone resets$/\1/p')
+    echo "${count:-0}"
 }
index 6ee5055b57cfed94a00ddf764639898289da5b73..2d7279109af9ab77e534e0ec4ebfdb3fff713dd5 100755 (executable)
@@ -81,13 +81,14 @@ is_scsi_device() {
 }
 
 run_fio() {
-    local fio
+    local fio opts
 
     fio=$(dirname "$0")/../../fio
 
-    { echo; echo "fio $*"; echo; } >>"${logfile}.${test_number}"
+    opts=("--aux-path=/tmp" "--allow_file_create=0" "$@")
+    { echo; echo "fio ${opts[*]}"; echo; } >>"${logfile}.${test_number}"
 
-    "${dynamic_analyzer[@]}" "$fio" "$@"
+    "${dynamic_analyzer[@]}" "$fio" "${opts[@]}"
 }
 
 run_one_fio_job() {
@@ -113,7 +114,7 @@ run_fio_on_seq() {
 # Check whether buffered writes are refused.
 test1() {
     run_fio --name=job1 --filename="$dev" --rw=write --direct=0 --bs=4K        \
-           --size="${zone_size}"                                       \
+           --size="${zone_size}" --thread=1                            \
            --zonemode=zbd --zonesize="${zone_size}" 2>&1 |
        tee -a "${logfile}.${test_number}" |
        grep -q 'Using direct I/O is mandatory for writing to ZBD drives'
@@ -800,18 +801,26 @@ fi
 
 logfile=$0.log
 
+passed=0
+failed=0
 rc=0
 for test_number in "${tests[@]}"; do
     rm -f "${logfile}.${test_number}"
     echo -n "Running test $test_number ... "
     if eval "test$test_number"; then
        status="PASS"
+       ((passed++))
     else
        status="FAIL"
+       ((failed++))
        rc=1
     fi
     echo "$status"
     echo "$status" >> "${logfile}.${test_number}"
 done
 
+echo "$passed tests passed"
+if [ $failed -gt 0 ]; then
+    echo " and $failed tests failed"
+fi
 exit $rc
index 393158340e96a52a8068cd39d0d8c3c6c4cc4da3..14c6969f3acb4bd741e1d33a5b70b1aac8d68b4a 100644 (file)
@@ -140,7 +140,6 @@ struct thread_options {
        unsigned int rand_repeatable;
        unsigned int allrand_repeatable;
        unsigned long long rand_seed;
-       unsigned int dep_use_os_rand;
        unsigned int log_avg_msec;
        unsigned int log_hist_msec;
        unsigned int log_hist_coarseness;
@@ -173,7 +172,6 @@ struct thread_options {
 
        unsigned int hugepage_size;
        unsigned long long rw_min_bs;
-       unsigned int pad2;
        unsigned int thinktime;
        unsigned int thinktime_spin;
        unsigned int thinktime_blocks;
@@ -258,6 +256,9 @@ struct thread_options {
        char *read_iolog_file;
        bool read_iolog_chunked;
        char *write_iolog_file;
+       char *merge_blktrace_file;
+       fio_fp64_t merge_blktrace_scalars[FIO_IO_U_LIST_MAX_LEN];
+       fio_fp64_t merge_blktrace_iters[FIO_IO_U_LIST_MAX_LEN];
 
        unsigned int write_bw_log;
        unsigned int write_lat_log;
@@ -427,8 +428,8 @@ struct thread_options_pack {
        uint32_t override_sync;
        uint32_t rand_repeatable;
        uint32_t allrand_repeatable;
+       uint32_t pad;
        uint64_t rand_seed;
-       uint32_t dep_use_os_rand;
        uint32_t log_avg_msec;
        uint32_t log_hist_msec;
        uint32_t log_hist_coarseness;
@@ -540,6 +541,9 @@ struct thread_options_pack {
 
        uint8_t read_iolog_file[FIO_TOP_STR_MAX];
        uint8_t write_iolog_file[FIO_TOP_STR_MAX];
+       uint8_t merge_blktrace_file[FIO_TOP_STR_MAX];
+       fio_fp64_t merge_blktrace_scalars[FIO_IO_U_LIST_MAX_LEN];
+       fio_fp64_t merge_blktrace_iters[FIO_IO_U_LIST_MAX_LEN];
 
        uint32_t write_bw_log;
        uint32_t write_lat_log;
@@ -566,7 +570,7 @@ struct thread_options_pack {
        uint32_t rate_iops_min[DDIR_RWDIR_CNT];
        uint32_t rate_process;
        uint32_t rate_ign_think;
-       uint32_t pad;
+       uint32_t pad3;
 
        uint8_t ioscheduler[FIO_TOP_STR_MAX];
 
diff --git a/time.c b/time.c
index c8876829a367ede755453f6b255b2f4139ee8a74..1999969955b2828d6a9d38b425147b5ac3b5babf 100644 (file)
--- a/time.c
+++ b/time.c
@@ -118,6 +118,7 @@ bool ramp_time_over(struct thread_data *td)
        if (utime_since_now(&td->epoch) >= td->o.ramp_time) {
                td->ramp_time_over = true;
                reset_all_stats(td);
+               reset_io_stats(td);
                td_set_runstate(td, TD_RAMP);
 
                /*
index c398113c12bd5423859bd3e01710fb5bd04f5570..7f08f6e32d15cb64946ec53bf42db816ad741669 100755 (executable)
 # if you do this, don't pass normal CLI parameters to it
 # otherwise it runs the CLI
 
-import sys, os, math, copy
+import sys, os, math, copy, time
 from copy import deepcopy
 import argparse
-import unittest2
+
+unittest2_imported = True
+try:
+    import unittest2
+except ImportError:
+    unittest2_imported = False
 
 msec_per_sec = 1000
 nsec_per_usec = 1000
+direction_read = 0
+direction_write = 1
 
 class FioHistoLogExc(Exception):
     pass
@@ -52,13 +59,20 @@ def exception_suffix( record_num, pathname ):
 
 # log file parser raises FioHistoLogExc exceptions
 # it returns histogram buckets in whatever unit fio uses
-
-def parse_hist_file(logfn, buckets_per_interval):
-    max_timestamp_ms = 0.0
-    
+# inputs:
+#  logfn: pathname to histogram log file
+#  buckets_per_interval - how many histogram buckets to expect
+#  log_hist_msec - if not None, expected time interval between histogram records
+
+def parse_hist_file(logfn, buckets_per_interval, log_hist_msec):
+    previous_ts_ms_read = -1
+    previous_ts_ms_write = -1
     with open(logfn, 'r') as f:
         records = [ l.strip() for l in f.readlines() ]
     intervals = []
+    last_time_ms = -1
+    last_direction = -1
     for k, r in enumerate(records):
         if r == '':
             continue
@@ -75,14 +89,20 @@ def parse_hist_file(logfn, buckets_per_interval):
         if len(int_tokens) < 3:
             raise FioHistoLogExc('too few numbers %s' % exception_suffix(k+1, logfn))
 
-        time_ms = int_tokens[0]
-        if time_ms > max_timestamp_ms:
-            max_timestamp_ms = time_ms
-
         direction = int_tokens[1]
-        if direction != 0 and direction != 1:
+        if direction != direction_read and direction != direction_write:
             raise FioHistoLogExc('invalid I/O direction %s' % exception_suffix(k+1, logfn))
 
+        time_ms = int_tokens[0]
+        if direction == direction_read:
+            if time_ms < previous_ts_ms_read:
+                raise FioHistoLogExc('read timestamp in column 1 decreased %s' % exception_suffix(k+1, logfn))
+            previous_ts_ms_read = time_ms
+        elif direction == direction_write:
+            if time_ms < previous_ts_ms_write:
+                raise FioHistoLogExc('write timestamp in column 1 decreased %s' % exception_suffix(k+1, logfn))
+            previous_ts_ms_write = time_ms
+
         bsz = int_tokens[2]
         if bsz > (1 << 24):
             raise FioHistoLogExc('block size too large %s' % exception_suffix(k+1, logfn))
@@ -91,10 +111,31 @@ def parse_hist_file(logfn, buckets_per_interval):
         if len(buckets) != buckets_per_interval:
             raise FioHistoLogExc('%d buckets per interval but %d expected in %s' % 
                     (len(buckets), buckets_per_interval, exception_suffix(k+1, logfn)))
+
+        # hack to filter out records with the same timestamp
+        # we should not have to do this if fio logs histogram records correctly
+
+        if time_ms == last_time_ms and direction == last_direction:
+            continue
+        last_time_ms = time_ms
+        last_direction = direction
+
         intervals.append((time_ms, direction, bsz, buckets))
     if len(intervals) == 0:
         raise FioHistoLogExc('no records in %s' % logfn)
-    return (intervals, max_timestamp_ms)
+    (first_timestamp, _, _, _) = intervals[0]
+    if first_timestamp < 1000000:
+        start_time = 0    # assume log_unix_epoch = 0
+    elif log_hist_msec != None:
+        start_time = first_timestamp - log_hist_msec
+    elif len(intervals) > 1:
+        (second_timestamp, _, _, _) = intervals[1]
+        start_time = first_timestamp - (second_timestamp - first_timestamp)
+    else:
+        raise FioHistoLogExc('no way to estimate test start time')
+    (end_timestamp, _, _, _) = intervals[-1]
+
+    return (intervals, start_time, end_timestamp)
 
 
 # compute time range for each bucket index in histogram record
@@ -123,12 +164,13 @@ def time_ranges(groups, counters_per_group, fio_version=3):
 
 # compute number of time quantum intervals in the test
 
-def get_time_intervals(time_quantum, max_timestamp_ms):
+def get_time_intervals(time_quantum, min_timestamp_ms, max_timestamp_ms):
     # round down to nearest second
     max_timestamp = max_timestamp_ms // msec_per_sec
+    min_timestamp = min_timestamp_ms // msec_per_sec
     # round up to nearest whole multiple of time_quantum
-    time_interval_count = (max_timestamp + time_quantum) // time_quantum
-    end_time = time_interval_count * time_quantum
+    time_interval_count = ((max_timestamp - min_timestamp) + time_quantum) // time_quantum
+    end_time = min_timestamp + (time_interval_count * time_quantum)
     return (end_time, time_interval_count)
 
 # align raw histogram log data to time quantum so 
@@ -146,17 +188,17 @@ def get_time_intervals(time_quantum, max_timestamp_ms):
 # so the contribution of this bucket to this time quantum is
 # 515 x 0.99 = 509.85
 
-def align_histo_log(raw_histogram_log, time_quantum, bucket_count, max_timestamp_ms):
+def align_histo_log(raw_histogram_log, time_quantum, bucket_count, min_timestamp_ms, max_timestamp_ms):
 
     # slice up test time int intervals of time_quantum seconds
 
-    (end_time, time_interval_count) = get_time_intervals(time_quantum, max_timestamp_ms)
+    (end_time, time_interval_count) = get_time_intervals(time_quantum, min_timestamp_ms, max_timestamp_ms)
     time_qtm_ms = time_quantum * msec_per_sec
     end_time_ms = end_time * msec_per_sec
     aligned_intervals = []
     for j in range(0, time_interval_count):
         aligned_intervals.append((
-            j * time_qtm_ms,
+            min_timestamp_ms + (j * time_qtm_ms),
             [ 0.0 for j in range(0, bucket_count) ] ))
 
     log_record_count = len(raw_histogram_log)
@@ -189,14 +231,20 @@ def align_histo_log(raw_histogram_log, time_quantum, bucket_count, max_timestamp
 
         # calculate first quantum that overlaps this histogram record 
 
-        qtm_start_ms = (time_msec // time_qtm_ms) * time_qtm_ms
-        qtm_end_ms = ((time_msec + time_qtm_ms) // time_qtm_ms) * time_qtm_ms
-        qtm_index = qtm_start_ms // time_qtm_ms
+        offset_from_min_ts = time_msec - min_timestamp_ms
+        qtm_start_ms = min_timestamp_ms + (offset_from_min_ts // time_qtm_ms) * time_qtm_ms
+        qtm_end_ms = min_timestamp_ms + ((offset_from_min_ts + time_qtm_ms) // time_qtm_ms) * time_qtm_ms
+        qtm_index = offset_from_min_ts // time_qtm_ms
 
         # for each quantum that overlaps this histogram record's time interval
 
         while qtm_start_ms < time_msec_end:  # while quantum overlaps record
 
+            # some histogram logs may be longer than others
+
+            if len(aligned_intervals) <= qtm_index:
+                break
+
             # calculate fraction of time that this quantum 
             # overlaps histogram record's time interval
             
@@ -316,6 +364,9 @@ def compute_percentiles_from_logs():
     parser.add_argument("--time-quantum", dest="time_quantum", 
         default="1", type=int,
         help="time quantum in seconds (default=1)")
+    parser.add_argument("--log-hist-msec", dest="log_hist_msec", 
+        type=int, default=None,
+        help="log_hist_msec value in fio job file")
     parser.add_argument("--output-unit", dest="output_unit", 
         default="usec", type=str,
         help="Latency percentile output unit: msec|usec|nsec (default usec)")
@@ -339,30 +390,24 @@ def compute_percentiles_from_logs():
     buckets_per_interval = buckets_per_group * args.bucket_groups
     print('buckets per interval = %d ' % buckets_per_interval)
     bucket_index_range = range(0, buckets_per_interval)
+    if args.log_hist_msec != None:
+        print('log_hist_msec = %d' % args.log_hist_msec)
     if args.time_quantum == 0:
         print('ERROR: time-quantum must be a positive number of seconds')
     print('output unit = ' + args.output_unit)
     if args.output_unit == 'msec':
-        time_divisor = 1000.0
+        time_divisor = float(msec_per_sec)
     elif args.output_unit == 'usec':
         time_divisor = 1.0
 
-    # calculate response time interval associated with each histogram bucket
-
-    bucket_times = time_ranges(args.bucket_groups, buckets_per_group, fio_version=args.fio_version)
-
     # construct template for each histogram bucket array with buckets all zeroes
     # we just copy this for each new histogram
 
     zeroed_buckets = [ 0.0 for r in bucket_index_range ]
 
-    # print CSV header just like fiologparser_hist does
+    # calculate response time interval associated with each histogram bucket
 
-    header = 'msec, '
-    for p in args.pctiles_wanted:
-        header += '%3.1f, ' % p
-    print('time (millisec), percentiles in increasing order with values in ' + args.output_unit)
-    print(header)
+    bucket_times = time_ranges(args.bucket_groups, buckets_per_group, fio_version=args.fio_version)
 
     # parse the histogram logs
     # assumption: each bucket has a monotonically increasing time
@@ -370,33 +415,52 @@ def compute_percentiles_from_logs():
     # (exception: if randrw workload, then there is a read and a write 
     # record for the same time interval)
 
-    max_timestamp_all_logs = 0
+    test_start_time = 0
+    test_end_time = 1.0e18
     hist_files = {}
     for fn in args.file_list:
         try:
-            (hist_files[fn], max_timestamp_ms)  = parse_hist_file(fn, buckets_per_interval)
+            (hist_files[fn], log_start_time, log_end_time)  = parse_hist_file(fn, buckets_per_interval, args.log_hist_msec)
         except FioHistoLogExc as e:
             myabort(str(e))
-        max_timestamp_all_logs = max(max_timestamp_all_logs, max_timestamp_ms)
-
-    (end_time, time_interval_count) = get_time_intervals(args.time_quantum, max_timestamp_all_logs)
+        # we consider the test started when all threads have started logging
+        test_start_time = max(test_start_time, log_start_time)
+        # we consider the test over when one of the logs has ended
+        test_end_time = min(test_end_time, log_end_time)
+
+    if test_start_time >= test_end_time:
+        raise FioHistoLogExc('no time interval when all threads logs overlapped')
+    if test_start_time > 0:
+        print('all threads running as of unix epoch time %d = %s' % (
+               test_start_time/float(msec_per_sec), 
+               time.ctime(test_start_time/1000.0)))
+
+    (end_time, time_interval_count) = get_time_intervals(args.time_quantum, test_start_time, test_end_time)
     all_threads_histograms = [ ((j*args.time_quantum*msec_per_sec), deepcopy(zeroed_buckets))
-                                for j in range(0, time_interval_count) ]
+                               for j in range(0, time_interval_count) ]
 
     for logfn in hist_files.keys():
         aligned_per_thread = align_histo_log(hist_files[logfn], 
                                              args.time_quantum, 
                                              buckets_per_interval, 
-                                             max_timestamp_all_logs)
+                                             test_start_time,
+                                             test_end_time)
         for t in range(0, time_interval_count):
             (_, all_threads_histo_t) = all_threads_histograms[t]
             (_, log_histo_t) = aligned_per_thread[t]
             add_to_histo_from( all_threads_histo_t, log_histo_t )
 
     # calculate percentiles across aggregate histogram for all threads
+    # print CSV header just like fiologparser_hist does
+
+    header = 'msec-since-start, '
+    for p in args.pctiles_wanted:
+        header += '%3.1f, ' % p
+    print('time (millisec), percentiles in increasing order with values in ' + args.output_unit)
+    print(header)
 
     for (t_msec, all_threads_histo_t) in all_threads_histograms:
-        record = '%d, ' % t_msec
+        record = '%8d, ' % t_msec
         pct = get_pctiles(all_threads_histo_t, args.pctiles_wanted, bucket_times)
         if not pct:
             for w in args.pctiles_wanted:
@@ -412,14 +476,14 @@ def compute_percentiles_from_logs():
 #end of MAIN PROGRAM
 
 
-
 ##### below are unit tests ##############
 
-import tempfile, shutil
-from os.path import join
-should_not_get_here = False
+if unittest2_imported:
+  import tempfile, shutil
+  from os.path import join
+  should_not_get_here = False
 
-class Test(unittest2.TestCase):
+  class Test(unittest2.TestCase):
     tempdir = None
 
     # a little less typing please
@@ -455,8 +519,9 @@ class Test(unittest2.TestCase):
         with open(self.fn, 'w') as f:
             f.write('1234, 0, 4096, 1, 2, 3, 4\n')
             f.write('5678,1,16384,5,6,7,8 \n')
-        (raw_histo_log, max_timestamp) = parse_hist_file(self.fn, 4) # 4 buckets per interval
-        self.A(len(raw_histo_log) == 2 and max_timestamp == 5678)
+        (raw_histo_log, min_timestamp, max_timestamp) = parse_hist_file(self.fn, 4, None) # 4 buckets per interval
+        # if not log_unix_epoch=1, then min_timestamp will always be set to zero
+        self.A(len(raw_histo_log) == 2 and min_timestamp == 0 and max_timestamp == 5678)
         (time_ms, direction, bsz, histo) = raw_histo_log[0]
         self.A(time_ms == 1234 and direction == 0 and bsz == 4096 and histo == [ 1, 2, 3, 4 ])
         (time_ms, direction, bsz, histo) = raw_histo_log[1]
@@ -466,7 +531,7 @@ class Test(unittest2.TestCase):
         with open(self.fn, 'w') as f:
             pass
         try:
-            (raw_histo_log, max_timestamp_ms) = parse_hist_file(self.fn, 4)
+            (raw_histo_log, _, _) = parse_hist_file(self.fn, 4, None)
             self.A(should_not_get_here)
         except FioHistoLogExc as e:
             self.A(str(e).startswith('no records'))
@@ -477,7 +542,7 @@ class Test(unittest2.TestCase):
             f.write('1234, 0, 4096, 1, 2, 3, 4\n')
             f.write('5678,1,16384,5,6,7,8 \n')
             f.write('\n')
-        (raw_histo_log, max_timestamp_ms) = parse_hist_file(self.fn, 4)
+        (raw_histo_log, _, max_timestamp_ms) = parse_hist_file(self.fn, 4, None)
         self.A(len(raw_histo_log) == 2 and max_timestamp_ms == 5678)
         (time_ms, direction, bsz, histo) = raw_histo_log[0]
         self.A(time_ms == 1234 and direction == 0 and bsz == 4096 and histo == [ 1, 2, 3, 4 ])
@@ -488,7 +553,7 @@ class Test(unittest2.TestCase):
         with open(self.fn, 'w') as f:
             f.write('12, 0, 4096, 1a, 2, 3, 4\n')
         try:
-            (raw_histo_log, _) = parse_hist_file(self.fn, 4)
+            (raw_histo_log, _, _) = parse_hist_file(self.fn, 4, None)
             self.A(False)
         except FioHistoLogExc as e:
             self.A(str(e).startswith('non-integer'))
@@ -497,7 +562,7 @@ class Test(unittest2.TestCase):
         with open(self.fn, 'w') as f:
             f.write('-12, 0, 4096, 1, 2, 3, 4\n')
         try:
-            (raw_histo_log, _) = parse_hist_file(self.fn, 4)
+            (raw_histo_log, _, _) = parse_hist_file(self.fn, 4, None)
             self.A(False)
         except FioHistoLogExc as e:
             self.A(str(e).startswith('negative integer'))
@@ -506,7 +571,7 @@ class Test(unittest2.TestCase):
         with open(self.fn, 'w') as f:
             f.write('0, 0\n')
         try:
-            (raw_histo_log, _) = parse_hist_file(self.fn, 4)
+            (raw_histo_log, _, _) = parse_hist_file(self.fn, 4, None)
             self.A(False)
         except FioHistoLogExc as e:
             self.A(str(e).startswith('too few numbers'))
@@ -515,7 +580,7 @@ class Test(unittest2.TestCase):
         with open(self.fn, 'w') as f:
             f.write('100, 2, 4096, 1, 2, 3, 4\n')
         try:
-            (raw_histo_log, _) = parse_hist_file(self.fn, 4)
+            (raw_histo_log, _, _) = parse_hist_file(self.fn, 4, None)
             self.A(False)
         except FioHistoLogExc as e:
             self.A(str(e).startswith('invalid I/O direction'))
@@ -523,11 +588,11 @@ class Test(unittest2.TestCase):
     def test_b8_parse_bsz_too_big(self):
         with open(self.fn+'_good', 'w') as f:
             f.write('100, 1, %d, 1, 2, 3, 4\n' % (1<<24))
-        (raw_histo_log, max_timestamp_ms) = parse_hist_file(self.fn+'_good', 4)
+        (raw_histo_log, _, _) = parse_hist_file(self.fn+'_good', 4, None)
         with open(self.fn+'_bad', 'w') as f:
             f.write('100, 1, 20000000, 1, 2, 3, 4\n')
         try:
-            (raw_histo_log, _) = parse_hist_file(self.fn+'_bad', 4)
+            (raw_histo_log, _, _) = parse_hist_file(self.fn+'_bad', 4, None)
             self.A(False)
         except FioHistoLogExc as e:
             self.A(str(e).startswith('block size too large'))
@@ -536,7 +601,7 @@ class Test(unittest2.TestCase):
         with open(self.fn, 'w') as f:
             f.write('100, 1, %d, 1, 2, 3, 4, 5\n' % (1<<24))
         try:
-            (raw_histo_log, _) = parse_hist_file(self.fn, 4)
+            (raw_histo_log, _, _) = parse_hist_file(self.fn, 4, None)
             self.A(False)
         except FioHistoLogExc as e:
             self.A(str(e).__contains__('buckets per interval'))
@@ -565,12 +630,44 @@ class Test(unittest2.TestCase):
     def test_d1_align_histo_log_1_quantum(self):
         with open(self.fn, 'w') as f:
             f.write('100, 1, 4096, 1, 2, 3, 4')
-        (raw_histo_log, max_timestamp_ms) = parse_hist_file(self.fn, 4)
-        self.A(max_timestamp_ms == 100)
-        aligned_log = align_histo_log(raw_histo_log, 5, 4, max_timestamp_ms)
+        (raw_histo_log, min_timestamp_ms, max_timestamp_ms) = parse_hist_file(self.fn, 4, None)
+        self.A(min_timestamp_ms == 0 and max_timestamp_ms == 100)
+        aligned_log = align_histo_log(raw_histo_log, 5, 4, min_timestamp_ms, max_timestamp_ms)
         self.A(len(aligned_log) == 1)
         (time_ms0, h) = aligned_log[0]
-        self.A(time_ms0 == 0 and h == [1.0, 2.0, 3.0, 4.0])
+        self.A(time_ms0 == 0 and h == [1., 2., 3., 4.])
+
+    # handle case with log_unix_epoch=1 timestamps, 1-second time quantum
+    # here both records will be separated into 2 aligned intervals
+
+    def test_d1a_align_2rec_histo_log_epoch_1_quantum_1sec(self):
+        with open(self.fn, 'w') as f:
+            f.write('1536504002123, 1, 4096, 1, 2, 3, 4\n')
+            f.write('1536504003123, 1, 4096, 4, 3, 2, 1\n')
+        (raw_histo_log, min_timestamp_ms, max_timestamp_ms) = parse_hist_file(self.fn, 4, None)
+        self.A(min_timestamp_ms == 1536504001123 and max_timestamp_ms == 1536504003123)
+        aligned_log = align_histo_log(raw_histo_log, 1, 4, min_timestamp_ms, max_timestamp_ms)
+        self.A(len(aligned_log) == 3)
+        (time_ms0, h) = aligned_log[0]
+        self.A(time_ms0 == 1536504001123 and h == [0., 0., 0., 0.])
+        (time_ms1, h) = aligned_log[1]
+        self.A(time_ms1 == 1536504002123 and h == [1., 2., 3., 4.])
+        (time_ms2, h) = aligned_log[2]
+        self.A(time_ms2 == 1536504003123 and h == [4., 3., 2., 1.])
+
+    # handle case with log_unix_epoch=1 timestamps, 5-second time quantum
+    # here both records will be merged into a single aligned time interval
+
+    def test_d1b_align_2rec_histo_log_epoch_1_quantum_5sec(self):
+        with open(self.fn, 'w') as f:
+            f.write('1536504002123, 1, 4096, 1, 2, 3, 4\n')
+            f.write('1536504003123, 1, 4096, 4, 3, 2, 1\n')
+        (raw_histo_log, min_timestamp_ms, max_timestamp_ms) = parse_hist_file(self.fn, 4, None)
+        self.A(min_timestamp_ms == 1536504001123 and max_timestamp_ms == 1536504003123)
+        aligned_log = align_histo_log(raw_histo_log, 5, 4, min_timestamp_ms, max_timestamp_ms)
+        self.A(len(aligned_log) == 1)
+        (time_ms0, h) = aligned_log[0]
+        self.A(time_ms0 == 1536504001123 and h == [5., 5., 5., 5.])
 
     # we need this to compare 2 lists of floating point numbers for equality
     # because of floating-point imprecision
@@ -592,11 +689,11 @@ class Test(unittest2.TestCase):
         with open(self.fn, 'w') as f:
             f.write('2000, 1, 4096, 1, 2, 3, 4\n')
             f.write('7000, 1, 4096, 1, 2, 3, 4\n')
-        (raw_histo_log, max_timestamp_ms) = parse_hist_file(self.fn, 4)
-        self.A(max_timestamp_ms == 7000)
+        (raw_histo_log, min_timestamp_ms, max_timestamp_ms) = parse_hist_file(self.fn, 4, None)
+        self.A(min_timestamp_ms == 0 and max_timestamp_ms == 7000)
         (_, _, _, raw_buckets1) = raw_histo_log[0]
         (_, _, _, raw_buckets2) = raw_histo_log[1]
-        aligned_log = align_histo_log(raw_histo_log, 5, 4, max_timestamp_ms)
+        aligned_log = align_histo_log(raw_histo_log, 5, 4, min_timestamp_ms, max_timestamp_ms)
         self.A(len(aligned_log) == 2)
         (time_ms1, h1) = aligned_log[0]
         (time_ms2, h2) = aligned_log[1]
@@ -614,9 +711,9 @@ class Test(unittest2.TestCase):
         with open(self.fn, 'w') as f:
             buckets = [ 100 for j in range(0, 128) ]
             f.write('9000, 1, 4096, %s\n' % ', '.join([str(b) for b in buckets]))
-        (raw_histo_log, max_timestamp_ms) = parse_hist_file(self.fn, 128)
-        self.A(max_timestamp_ms == 9000)
-        aligned_log = align_histo_log(raw_histo_log, 5, 128, max_timestamp_ms)
+        (raw_histo_log, min_timestamp_ms, max_timestamp_ms) = parse_hist_file(self.fn, 128, None)
+        self.A(min_timestamp_ms == 0 and max_timestamp_ms == 9000)
+        aligned_log = align_histo_log(raw_histo_log, 5, 128, min_timestamp_ms, max_timestamp_ms)
         time_intervals = time_ranges(4, 32)
         # since buckets are all equal, then median is halfway through time_intervals
         # and max latency interval is at end of time_intervals
@@ -638,9 +735,9 @@ class Test(unittest2.TestCase):
             # add one I/O request to last bucket
             buckets[-1] = 1
             f.write('9000, 1, 4096, %s\n' % ', '.join([str(b) for b in buckets]))
-        (raw_histo_log, max_timestamp_ms) = parse_hist_file(self.fn, fio_v3_bucket_count)
-        self.A(max_timestamp_ms == 9000)
-        aligned_log = align_histo_log(raw_histo_log, 5, fio_v3_bucket_count, max_timestamp_ms)
+        (raw_histo_log, min_timestamp_ms, max_timestamp_ms) = parse_hist_file(self.fn, fio_v3_bucket_count, None)
+        self.A(min_timestamp_ms == 0 and max_timestamp_ms == 9000)
+        aligned_log = align_histo_log(raw_histo_log, 5, fio_v3_bucket_count, min_timestamp_ms, max_timestamp_ms)
         (time_ms, histo) = aligned_log[1]
         time_intervals = time_ranges(29, 64)
         expected_pctiles = { 100.0:(64*(1<<28))/1000.0 }
@@ -651,7 +748,10 @@ class Test(unittest2.TestCase):
 
 if __name__ == '__main__':
     if os.getenv('UNITTEST'):
-        sys.exit(unittest2.main())
+        if unittest2_imported:
+            sys.exit(unittest2.main())
+        else:
+            raise Exception('you must install unittest2 module to run unit test')
     else:
         compute_percentiles_from_logs()
 
diff --git a/zbd.c b/zbd.c
index 0f3636a525d835a1514de71a9f652729105cfaff..8acda1f62f32bcf38c458f3eb67110e54262da7c 100644 (file)
--- a/zbd.c
+++ b/zbd.c
@@ -128,9 +128,9 @@ static bool zbd_verify_sizes(void)
                                                 f->file_name);
                                        return false;
                                }
-                               log_info("%s: rounded up offset from %lu to %lu\n",
-                                        f->file_name, f->file_offset,
-                                        new_offset);
+                               log_info("%s: rounded up offset from %llu to %llu\n",
+                                        f->file_name, (unsigned long long) f->file_offset,
+                                        (unsigned long long) new_offset);
                                f->io_size -= (new_offset - f->file_offset);
                                f->file_offset = new_offset;
                        }
@@ -143,9 +143,9 @@ static bool zbd_verify_sizes(void)
                                                 f->file_name);
                                        return false;
                                }
-                               log_info("%s: rounded down io_size from %lu to %lu\n",
-                                        f->file_name, f->io_size,
-                                        new_end - f->file_offset);
+                               log_info("%s: rounded down io_size from %llu to %llu\n",
+                                        f->file_name, (unsigned long long) f->io_size,
+                                        (unsigned long long) new_end - f->file_offset);
                                f->io_size = new_end - f->file_offset;
                        }
                }
@@ -357,14 +357,15 @@ static int parse_zone_info(struct thread_data *td, struct fio_file *f)
        if (td->o.zone_size == 0) {
                td->o.zone_size = zone_size;
        } else if (td->o.zone_size != zone_size) {
-               log_info("fio: %s job parameter zonesize %lld does not match disk zone size %ld.\n",
-                        f->file_name, td->o.zone_size, zone_size);
+               log_info("fio: %s job parameter zonesize %llu does not match disk zone size %llu.\n",
+                        f->file_name, (unsigned long long) td->o.zone_size,
+                       (unsigned long long) zone_size);
                ret = -EINVAL;
                goto close;
        }
 
-       dprint(FD_ZBD, "Device %s has %d zones of size %lu KB\n", f->file_name,
-              nr_zones, zone_size / 1024);
+       dprint(FD_ZBD, "Device %s has %d zones of size %llu KB\n", f->file_name,
+              nr_zones, (unsigned long long) zone_size / 1024);
 
        zbd_info = scalloc(1, sizeof(*zbd_info) +
                           (nr_zones + 1) * sizeof(zbd_info->zone_info[0]));
@@ -407,8 +408,8 @@ static int parse_zone_info(struct thread_data *td, struct fio_file *f)
                        break;
                ret = read_zone_info(fd, start_sector, buf, bufsz);
                if (ret < 0) {
-                       log_info("fio: BLKREPORTZONE(%lu) failed for %s (%d).\n",
-                                start_sector, f->file_name, -ret);
+                       log_info("fio: BLKREPORTZONE(%llu) failed for %s (%d).\n",
+                                (unsigned long long) start_sector, f->file_name, -ret);
                        goto close;
                }
        }
@@ -602,6 +603,12 @@ static int zbd_reset_range(struct thread_data *td, const struct fio_file *f,
        return ret;
 }
 
+static unsigned int zbd_zone_nr(struct zoned_block_device_info *zbd_info,
+                               struct fio_zone_info *zone)
+{
+       return zone - zbd_info->zone_info;
+}
+
 /**
  * zbd_reset_zone - reset the write pointer of a single zone
  * @td: FIO thread data.
@@ -613,12 +620,10 @@ static int zbd_reset_range(struct thread_data *td, const struct fio_file *f,
 static int zbd_reset_zone(struct thread_data *td, const struct fio_file *f,
                          struct fio_zone_info *z)
 {
-       int ret;
+       dprint(FD_ZBD, "%s: resetting wp of zone %u.\n", f->file_name,
+               zbd_zone_nr(f->zbd_info, z));
 
-       dprint(FD_ZBD, "%s: resetting wp of zone %lu.\n", f->file_name,
-              z - f->zbd_info->zone_info);
-       ret = zbd_reset_range(td, f, z->start, (z+1)->start - z->start);
-       return ret;
+       return zbd_reset_range(td, f, z->start, (z+1)->start - z->start);
 }
 
 /*
@@ -639,8 +644,8 @@ static int zbd_reset_zones(struct thread_data *td, struct fio_file *f,
        bool reset_wp;
        int res = 0;
 
-       dprint(FD_ZBD, "%s: examining zones %lu .. %lu\n", f->file_name,
-              zb - f->zbd_info->zone_info, ze - f->zbd_info->zone_info);
+       dprint(FD_ZBD, "%s: examining zones %u .. %u\n", f->file_name,
+               zbd_zone_nr(f->zbd_info, zb), zbd_zone_nr(f->zbd_info, ze));
        assert(f->fd != -1);
        for (z = zb; z < ze; z++) {
                pthread_mutex_lock(&z->mutex);
@@ -653,10 +658,10 @@ static int zbd_reset_zones(struct thread_data *td, struct fio_file *f,
                                start_z = z;
                        } else if (start_z < ze && !reset_wp) {
                                dprint(FD_ZBD,
-                                      "%s: resetting zones %lu .. %lu\n",
+                                      "%s: resetting zones %u .. %u\n",
                                       f->file_name,
-                                      start_z - f->zbd_info->zone_info,
-                                      z - f->zbd_info->zone_info);
+                                       zbd_zone_nr(f->zbd_info, start_z),
+                                       zbd_zone_nr(f->zbd_info, z));
                                if (zbd_reset_range(td, f, start_z->start,
                                                z->start - start_z->start) < 0)
                                        res = 1;
@@ -666,9 +671,9 @@ static int zbd_reset_zones(struct thread_data *td, struct fio_file *f,
                default:
                        if (start_z == ze)
                                break;
-                       dprint(FD_ZBD, "%s: resetting zones %lu .. %lu\n",
-                              f->file_name, start_z - f->zbd_info->zone_info,
-                              z - f->zbd_info->zone_info);
+                       dprint(FD_ZBD, "%s: resetting zones %u .. %u\n",
+                              f->file_name, zbd_zone_nr(f->zbd_info, start_z),
+                              zbd_zone_nr(f->zbd_info, z));
                        if (zbd_reset_range(td, f, start_z->start,
                                            z->start - start_z->start) < 0)
                                res = 1;
@@ -677,9 +682,9 @@ static int zbd_reset_zones(struct thread_data *td, struct fio_file *f,
                }
        }
        if (start_z < ze) {
-               dprint(FD_ZBD, "%s: resetting zones %lu .. %lu\n", f->file_name,
-                      start_z - f->zbd_info->zone_info,
-                      z - f->zbd_info->zone_info);
+               dprint(FD_ZBD, "%s: resetting zones %u .. %u\n", f->file_name,
+                       zbd_zone_nr(f->zbd_info, start_z),
+                       zbd_zone_nr(f->zbd_info, z));
                if (zbd_reset_range(td, f, start_z->start,
                                    z->start - start_z->start) < 0)
                        res = 1;
@@ -721,34 +726,68 @@ static bool zbd_dec_and_reset_write_cnt(const struct thread_data *td,
        return write_cnt == 0;
 }
 
-/* Check whether the value of zbd_info.sectors_with_data is correct. */
-static void check_swd(const struct thread_data *td, const struct fio_file *f)
+enum swd_action {
+       CHECK_SWD,
+       SET_SWD,
+};
+
+/* Calculate the number of sectors with data (swd) and perform action 'a' */
+static uint64_t zbd_process_swd(const struct fio_file *f, enum swd_action a)
 {
-#if 0
        struct fio_zone_info *zb, *ze, *z;
-       uint64_t swd;
+       uint64_t swd = 0;
 
        zb = &f->zbd_info->zone_info[zbd_zone_idx(f, f->file_offset)];
        ze = &f->zbd_info->zone_info[zbd_zone_idx(f, f->file_offset +
                                                  f->io_size)];
-       swd = 0;
        for (z = zb; z < ze; z++) {
                pthread_mutex_lock(&z->mutex);
                swd += z->wp - z->start;
        }
        pthread_mutex_lock(&f->zbd_info->mutex);
-       assert(f->zbd_info->sectors_with_data == swd);
+       switch (a) {
+       case CHECK_SWD:
+               assert(f->zbd_info->sectors_with_data == swd);
+               break;
+       case SET_SWD:
+               f->zbd_info->sectors_with_data = swd;
+               break;
+       }
        pthread_mutex_unlock(&f->zbd_info->mutex);
        for (z = zb; z < ze; z++)
                pthread_mutex_unlock(&z->mutex);
-#endif
+
+       return swd;
+}
+
+/*
+ * The swd check is useful for debugging but takes too much time to leave
+ * it enabled all the time. Hence it is disabled by default.
+ */
+static const bool enable_check_swd = false;
+
+/* Check whether the value of zbd_info.sectors_with_data is correct. */
+static void zbd_check_swd(const struct fio_file *f)
+{
+       if (!enable_check_swd)
+               return;
+
+       zbd_process_swd(f, CHECK_SWD);
+}
+
+static void zbd_init_swd(struct fio_file *f)
+{
+       uint64_t swd;
+
+       swd = zbd_process_swd(f, SET_SWD);
+       dprint(FD_ZBD, "%s(%s): swd = %" PRIu64 "\n", __func__, f->file_name,
+              swd);
 }
 
 void zbd_file_reset(struct thread_data *td, struct fio_file *f)
 {
-       struct fio_zone_info *zb, *ze, *z;
+       struct fio_zone_info *zb, *ze;
        uint32_t zone_idx_e;
-       uint64_t swd = 0;
 
        if (!f->zbd_info)
                return;
@@ -756,16 +795,7 @@ void zbd_file_reset(struct thread_data *td, struct fio_file *f)
        zb = &f->zbd_info->zone_info[zbd_zone_idx(f, f->file_offset)];
        zone_idx_e = zbd_zone_idx(f, f->file_offset + f->io_size);
        ze = &f->zbd_info->zone_info[zone_idx_e];
-       for (z = zb ; z < ze; z++) {
-               pthread_mutex_lock(&z->mutex);
-               swd += z->wp - z->start;
-       }
-       pthread_mutex_lock(&f->zbd_info->mutex);
-       f->zbd_info->sectors_with_data = swd;
-       pthread_mutex_unlock(&f->zbd_info->mutex);
-       for (z = zb ; z < ze; z++)
-               pthread_mutex_unlock(&z->mutex);
-       dprint(FD_ZBD, "%s(%s): swd = %ld\n", __func__, f->file_name, swd);
+       zbd_init_swd(f);
        /*
         * If data verification is enabled reset the affected zones before
         * writing any data to avoid that a zone reset has to be issued while
@@ -995,8 +1025,8 @@ static struct fio_zone_info *zbd_replay_write_order(struct thread_data *td,
        }
 
        if (z->verify_block * min_bs >= f->zbd_info->zone_size)
-               log_err("%s: %d * %d >= %ld\n", f->file_name, z->verify_block,
-                       min_bs, f->zbd_info->zone_size);
+               log_err("%s: %d * %d >= %llu\n", f->file_name, z->verify_block,
+                       min_bs, (unsigned long long) f->zbd_info->zone_size);
        io_u->offset = z->start + z->verify_block++ * min_bs;
        return z;
 }
@@ -1094,6 +1124,8 @@ static void zbd_post_submit(const struct io_u *io_u, bool success)
        }
 unlock:
        pthread_mutex_unlock(&z->mutex);
+
+       zbd_check_swd(io_u->file);
 }
 
 bool zbd_unaligned_write(int error_code)
@@ -1146,6 +1178,8 @@ enum io_u_action zbd_adjust_block(struct thread_data *td, struct io_u *io_u)
            io_u->ddir == DDIR_READ && td->o.read_beyond_wp)
                return io_u_accept;
 
+       zbd_check_swd(f);
+
        pthread_mutex_lock(&zb->mutex);
        switch (io_u->ddir) {
        case DDIR_READ:
@@ -1219,7 +1253,6 @@ enum io_u_action zbd_adjust_block(struct thread_data *td, struct io_u *io_u)
                }
                /* Check whether the zone reset threshold has been exceeded */
                if (td->o.zrf.u.f) {
-                       check_swd(td, f);
                        if (f->zbd_info->sectors_with_data >=
                            f->io_size * td->o.zrt.u.f &&
                            zbd_dec_and_reset_write_cnt(td, f)) {
@@ -1240,7 +1273,6 @@ enum io_u_action zbd_adjust_block(struct thread_data *td, struct io_u *io_u)
                        zb->reset_zone = 0;
                        if (zbd_reset_zone(td, f, zb) < 0)
                                goto eof;
-                       check_swd(td, f);
                }
                /* Make writes occur at the write pointer */
                assert(!zbd_zone_full(f, zb, min_bs));
@@ -1301,7 +1333,7 @@ char *zbd_write_status(const struct thread_stat *ts)
 {
        char *res;
 
-       if (asprintf(&res, "; %ld zone resets", ts->nr_zone_resets) < 0)
+       if (asprintf(&res, "; %llu zone resets", (unsigned long long) ts->nr_zone_resets) < 0)
                return NULL;
        return res;
 }
diff --git a/zbd.h b/zbd.h
index d750b67ecea45317f939f38d04f599046aa56e2d..33e6d8bd4146e2f7d3f983994399ba1c64d08a8f 100644 (file)
--- a/zbd.h
+++ b/zbd.h
@@ -31,8 +31,8 @@ enum io_u_action {
 
 /**
  * struct fio_zone_info - information about a single ZBD zone
- * @start: zone start in 512 byte units
- * @wp: zone write pointer location in 512 byte units
+ * @start: zone start location (bytes)
+ * @wp: zone write pointer location (bytes)
  * @verify_block: number of blocks that have been verified for this zone
  * @mutex: protects the modifiable members in this structure
  * @type: zone type (BLK_ZONE_TYPE_*)