Merge branch 'read_iolog-from-unix-socket' of https://github.com/aclamk/fio
authorJens Axboe <axboe@kernel.dk>
Fri, 3 Aug 2018 15:16:10 +0000 (09:16 -0600)
committerJens Axboe <axboe@kernel.dk>
Fri, 3 Aug 2018 15:16:10 +0000 (09:16 -0600)
* 'read_iolog-from-unix-socket' of https://github.com/aclamk/fio:
  iolog: allow to read_iolog from unix socket

45 files changed:
FIO-VERSION-GEN
HOWTO
backend.c
cconv.c
client.c
compiler/compiler.h
doc/fio-histo-log-pctiles.pdf [new file with mode: 0644]
engines/glusterfs_sync.c
engines/libaio.c
engines/libpmem.c
engines/sg.c
engines/solarisaio.c
file.h
filesetup.c
fio.1
fio.h
flist.h
gclient.c
gerror.c
gfio.c
goptions.c
graph.c
init.c
io_u.c
io_u.h
ioengines.c
iolog.c
iolog.h
lib/axmap.c
minmax.h
options.c
os/windows/posix.c [changed mode: 0755->0644]
oslib/libmtd_common.h
parse.c
parse.h
server.c
server.h
stat.c
stat.h
t/sgunmap-perf.py [new file with mode: 0755]
t/sgunmap-test.py [new file with mode: 0755]
thread_options.h
tickmarks.c
tools/hist/fio-histo-log-pctiles.py [new file with mode: 0755]
verify.c

index b28a1f3..99261fb 100755 (executable)
@@ -1,7 +1,7 @@
 #!/bin/sh
 
 GVF=FIO-VERSION-FILE
-DEF_VER=fio-3.7
+DEF_VER=fio-3.8
 
 LF='
 '
diff --git a/HOWTO b/HOWTO
index 70eed28..804d93e 100644 (file)
--- a/HOWTO
+++ b/HOWTO
@@ -991,13 +991,15 @@ I/O type
                **write**
                                Sequential writes.
                **trim**
-                               Sequential trims (Linux block devices only).
+                               Sequential trims (Linux block devices and SCSI
+                               character devices only).
                **randread**
                                Random reads.
                **randwrite**
                                Random writes.
                **randtrim**
-                               Random trims (Linux block devices only).
+                               Random trims (Linux block devices and SCSI
+                               character devices only).
                **rw,readwrite**
                                Sequential mixed reads and writes.
                **randrw**
@@ -1748,7 +1750,7 @@ I/O engine
                        ioctl, or if the target is an sg character device we use
                        :manpage:`read(2)` and :manpage:`write(2)` for asynchronous
                        I/O. Requires :option:`filename` option to specify either block or
-                       character devices.
+                       character devices. This engine supports trim operations.
                        The sg engine includes engine specific options.
 
                **null**
@@ -2082,6 +2084,7 @@ with the caveat that when used on the command line, they must come after the
        the force unit access (fua) flag. Default is 0.
 
 .. option:: sg_write_mode=str : [sg]
+
        Specify the type of write commands to issue. This option can take three values:
 
        **write**
index a7e9184..3c45e78 100644 (file)
--- a/backend.c
+++ b/backend.c
@@ -454,7 +454,7 @@ int io_queue_event(struct thread_data *td, struct io_u *io_u, int *ret,
                        *ret = -io_u->error;
                        clear_io_u(td, io_u);
                } else if (io_u->resid) {
-                       int bytes = io_u->xfer_buflen - io_u->resid;
+                       long long bytes = io_u->xfer_buflen - io_u->resid;
                        struct fio_file *f = io_u->file;
 
                        if (bytes_issued)
@@ -583,7 +583,7 @@ static bool in_flight_overlap(struct io_u_queue *q, struct io_u *io_u)
 
                        if (x1 < y2 && y1 < x2) {
                                overlap = true;
-                               dprint(FD_IO, "in-flight overlap: %llu/%lu, %llu/%lu\n",
+                               dprint(FD_IO, "in-flight overlap: %llu/%llu, %llu/%llu\n",
                                                x1, io_u->buflen,
                                                y1, check_io_u->buflen);
                                break;
@@ -1033,7 +1033,7 @@ static void do_io(struct thread_data *td, uint64_t *bytes_done)
                        log_io_piece(td, io_u);
 
                if (td->o.io_submit_mode == IO_MODE_OFFLOAD) {
-                       const unsigned long blen = io_u->xfer_buflen;
+                       const unsigned long long blen = io_u->xfer_buflen;
                        const enum fio_ddir __ddir = acct_ddir(io_u);
 
                        if (td->error)
@@ -1199,7 +1199,7 @@ static void cleanup_io_u(struct thread_data *td)
 static int init_io_u(struct thread_data *td)
 {
        struct io_u *io_u;
-       unsigned int max_bs, min_write;
+       unsigned long long max_bs, min_write;
        int cl_align, i, max_units;
        int data_xfer = 1, err;
        char *p;
@@ -1234,7 +1234,7 @@ static int init_io_u(struct thread_data *td)
                td->orig_buffer_size += page_mask + td->o.mem_align;
 
        if (td->o.mem_type == MEM_SHMHUGE || td->o.mem_type == MEM_MMAPHUGE) {
-               unsigned long bs;
+               unsigned long long bs;
 
                bs = td->orig_buffer_size + td->o.hugepage_size - 1;
                td->orig_buffer_size = bs & ~(td->o.hugepage_size - 1);
diff --git a/cconv.c b/cconv.c
index bfd699d..534bfb0 100644 (file)
--- a/cconv.c
+++ b/cconv.c
@@ -110,16 +110,16 @@ void convert_thread_options_to_cpu(struct thread_options *o,
        o->start_offset_percent = le32_to_cpu(top->start_offset_percent);
 
        for (i = 0; i < DDIR_RWDIR_CNT; i++) {
-               o->bs[i] = le32_to_cpu(top->bs[i]);
-               o->ba[i] = le32_to_cpu(top->ba[i]);
-               o->min_bs[i] = le32_to_cpu(top->min_bs[i]);
-               o->max_bs[i] = le32_to_cpu(top->max_bs[i]);
+               o->bs[i] = le64_to_cpu(top->bs[i]);
+               o->ba[i] = le64_to_cpu(top->ba[i]);
+               o->min_bs[i] = le64_to_cpu(top->min_bs[i]);
+               o->max_bs[i] = le64_to_cpu(top->max_bs[i]);
                o->bssplit_nr[i] = le32_to_cpu(top->bssplit_nr[i]);
 
                if (o->bssplit_nr[i]) {
                        o->bssplit[i] = malloc(o->bssplit_nr[i] * sizeof(struct bssplit));
                        for (j = 0; j < o->bssplit_nr[i]; j++) {
-                               o->bssplit[i][j].bs = le32_to_cpu(top->bssplit[i][j].bs);
+                               o->bssplit[i][j].bs = le64_to_cpu(top->bssplit[i][j].bs);
                                o->bssplit[i][j].perc = le32_to_cpu(top->bssplit[i][j].perc);
                        }
                }
@@ -203,7 +203,7 @@ void convert_thread_options_to_cpu(struct thread_options *o,
        o->gauss_dev.u.f = fio_uint64_to_double(le64_to_cpu(top->gauss_dev.u.i));
        o->random_generator = le32_to_cpu(top->random_generator);
        o->hugepage_size = le32_to_cpu(top->hugepage_size);
-       o->rw_min_bs = le32_to_cpu(top->rw_min_bs);
+       o->rw_min_bs = le64_to_cpu(top->rw_min_bs);
        o->thinktime = le32_to_cpu(top->thinktime);
        o->thinktime_spin = le32_to_cpu(top->thinktime_spin);
        o->thinktime_blocks = le32_to_cpu(top->thinktime_blocks);
@@ -410,7 +410,7 @@ void convert_thread_options_to_net(struct thread_options_pack *top,
        top->gauss_dev.u.i = __cpu_to_le64(fio_double_to_uint64(o->gauss_dev.u.f));
        top->random_generator = cpu_to_le32(o->random_generator);
        top->hugepage_size = cpu_to_le32(o->hugepage_size);
-       top->rw_min_bs = cpu_to_le32(o->rw_min_bs);
+       top->rw_min_bs = __cpu_to_le64(o->rw_min_bs);
        top->thinktime = cpu_to_le32(o->thinktime);
        top->thinktime_spin = cpu_to_le32(o->thinktime_spin);
        top->thinktime_blocks = cpu_to_le32(o->thinktime_blocks);
@@ -488,10 +488,10 @@ void convert_thread_options_to_net(struct thread_options_pack *top,
        top->write_hist_log = cpu_to_le32(o->write_hist_log);
 
        for (i = 0; i < DDIR_RWDIR_CNT; i++) {
-               top->bs[i] = cpu_to_le32(o->bs[i]);
-               top->ba[i] = cpu_to_le32(o->ba[i]);
-               top->min_bs[i] = cpu_to_le32(o->min_bs[i]);
-               top->max_bs[i] = cpu_to_le32(o->max_bs[i]);
+               top->bs[i] = __cpu_to_le64(o->bs[i]);
+               top->ba[i] = __cpu_to_le64(o->ba[i]);
+               top->min_bs[i] = __cpu_to_le64(o->min_bs[i]);
+               top->max_bs[i] = __cpu_to_le64(o->max_bs[i]);
                top->bssplit_nr[i] = cpu_to_le32(o->bssplit_nr[i]);
 
                if (o->bssplit_nr[i]) {
@@ -502,7 +502,7 @@ void convert_thread_options_to_net(struct thread_options_pack *top,
                                bssplit_nr = BSSPLIT_MAX;
                        }
                        for (j = 0; j < bssplit_nr; j++) {
-                               top->bssplit[i][j].bs = cpu_to_le32(o->bssplit[i][j].bs);
+                               top->bssplit[i][j].bs = cpu_to_le64(o->bssplit[i][j].bs);
                                top->bssplit[i][j].perc = cpu_to_le32(o->bssplit[i][j].perc);
                        }
                }
index 2a86ea9..e2525c8 100644 (file)
--- a/client.c
+++ b/client.c
@@ -1357,8 +1357,8 @@ static void client_flush_hist_samples(FILE *f, int hist_coarseness, void *sample
                entry = s->data.plat_entry;
                io_u_plat = entry->io_u_plat;
 
-               fprintf(f, "%lu, %u, %u, ", (unsigned long) s->time,
-                                               io_sample_ddir(s), s->bs);
+               fprintf(f, "%lu, %u, %llu, ", (unsigned long) s->time,
+                                               io_sample_ddir(s), (unsigned long long) s->bs);
                for (j = 0; j < FIO_IO_U_PLAT_NR - stride; j += stride) {
                        fprintf(f, "%llu, ", (unsigned long long)hist_sum(j, stride, io_u_plat, NULL));
                }
@@ -1647,7 +1647,7 @@ static struct cmd_iolog_pdu *convert_iolog(struct fio_net_cmd *cmd,
                s->time         = le64_to_cpu(s->time);
                s->data.val     = le64_to_cpu(s->data.val);
                s->__ddir       = le32_to_cpu(s->__ddir);
-               s->bs           = le32_to_cpu(s->bs);
+               s->bs           = le64_to_cpu(s->bs);
 
                if (ret->log_offset) {
                        struct io_sample_offset *so = (void *) s;
index dacb737..ddfbcc1 100644 (file)
@@ -28,7 +28,7 @@
  */
 #define typecheck(type,x) \
 ({     type __dummy; \
-       typeof(x) __dummy2; \
+       __typeof__(x) __dummy2; \
        (void)(&__dummy == &__dummy2); \
        1; \
 })
@@ -70,7 +70,7 @@
 
 #ifdef FIO_INTERNAL
 #define ARRAY_SIZE(x)    (sizeof((x)) / (sizeof((x)[0])))
-#define FIELD_SIZE(s, f) (sizeof(((typeof(s))0)->f))
+#define FIELD_SIZE(s, f) (sizeof(((__typeof__(s))0)->f))
 #endif
 
 #endif
diff --git a/doc/fio-histo-log-pctiles.pdf b/doc/fio-histo-log-pctiles.pdf
new file mode 100644 (file)
index 0000000..069ab99
Binary files /dev/null and b/doc/fio-histo-log-pctiles.pdf differ
index a10e0ed..099a5af 100644 (file)
@@ -34,7 +34,7 @@ static enum fio_q_status fio_gf_queue(struct thread_data *td, struct io_u *io_u)
        struct gf_data *g = td->io_ops_data;
        int ret = 0;
 
-       dprint(FD_FILE, "fio queue len %lu\n", io_u->xfer_buflen);
+       dprint(FD_FILE, "fio queue len %llu\n", io_u->xfer_buflen);
        fio_ro_check(td, io_u);
 
        if (io_u->ddir == DDIR_READ)
@@ -50,7 +50,7 @@ static enum fio_q_status fio_gf_queue(struct thread_data *td, struct io_u *io_u)
                io_u->error = EINVAL;
                return FIO_Q_COMPLETED;
        }
-       dprint(FD_FILE, "fio len %lu ret %d\n", io_u->xfer_buflen, ret);
+       dprint(FD_FILE, "fio len %llu ret %d\n", io_u->xfer_buflen, ret);
        if (io_u->file && ret >= 0 && ddir_rw(io_u->ddir))
                LAST_POS(io_u->file) = io_u->offset + ret;
 
index dae2a70..7ac36b2 100644 (file)
@@ -207,6 +207,8 @@ static enum fio_q_status fio_libaio_queue(struct thread_data *td,
                        return FIO_Q_BUSY;
 
                do_io_u_trim(td, io_u);
+               io_u_mark_submit(td, 1);
+               io_u_mark_complete(td, 1);
                return FIO_Q_COMPLETED;
        }
 
index 21ff4f6..4ef3094 100644 (file)
@@ -499,7 +499,7 @@ static int fio_libpmem_init(struct thread_data *td)
 {
        struct thread_options *o = &td->o;
 
-       dprint(FD_IO,"o->rw_min_bs %d \n o->fsync_blocks %d \n o->fdatasync_blocks %d \n",
+       dprint(FD_IO,"o->rw_min_bs %llu \n o->fsync_blocks %d \n o->fdatasync_blocks %d \n",
                        o->rw_min_bs,o->fsync_blocks,o->fdatasync_blocks);
        dprint(FD_IO, "DEBUG fio_libpmem_init\n");
 
index 06cd194..7741f83 100644 (file)
@@ -3,6 +3,51 @@
  *
  * IO engine that uses the Linux SG v3 interface to talk to SCSI devices
  *
+ * This ioengine can operate in two modes:
+ *     sync    with block devices (/dev/sdX) or
+ *             with character devices (/dev/sgY) with direct=1 or sync=1
+ *     async   with character devices with direct=0 and sync=0
+ *
+ * What value does queue() return for the different cases?
+ *                             queue() return value
+ * In sync mode:
+ *  /dev/sdX           RWT     FIO_Q_COMPLETED
+ *  /dev/sgY           RWT     FIO_Q_COMPLETED
+ *   with direct=1 or sync=1
+ *
+ * In async mode:
+ *  /dev/sgY           RWT     FIO_Q_QUEUED
+ *   direct=0 and sync=0
+ *
+ * Because FIO_SYNCIO is set for this ioengine td_io_queue() will fill in
+ * issue_time *before* each IO is sent to queue()
+ *
+ * Where are the IO counting functions called for the different cases?
+ *
+ * In sync mode:
+ *  /dev/sdX (commit==NULL)
+ *   RWT
+ *    io_u_mark_depth()                        called in td_io_queue()
+ *    io_u_mark_submit/complete()      called in td_io_queue()
+ *    issue_time                       set in td_io_queue()
+ *
+ *  /dev/sgY with direct=1 or sync=1 (commit does nothing)
+ *   RWT
+ *    io_u_mark_depth()                        called in td_io_queue()
+ *    io_u_mark_submit/complete()      called in queue()
+ *    issue_time                       set in td_io_queue()
+ *  
+ * In async mode:
+ *  /dev/sgY with direct=0 and sync=0
+ *   RW: read and write operations are submitted in queue()
+ *    io_u_mark_depth()                        called in td_io_commit()
+ *    io_u_mark_submit()               called in queue()
+ *    issue_time                       set in td_io_queue()
+ *   T: trim operations are queued in queue() and submitted in commit()
+ *    io_u_mark_depth()                        called in td_io_commit()
+ *    io_u_mark_submit()               called in commit()
+ *    issue_time                       set in commit()
+ *
  */
 #include <stdio.h>
 #include <stdlib.h>
@@ -81,6 +126,9 @@ static struct fio_option options[] = {
 #define MAX_10B_LBA  0xFFFFFFFFULL
 #define SCSI_TIMEOUT_MS 30000   // 30 second timeout; currently no method to override
 #define MAX_SB 64               // sense block maximum return size
+/*
+#define FIO_SGIO_DEBUG
+*/
 
 struct sgio_cmd {
        unsigned char cdb[16];      // enhanced from 10 to support 16 byte commands
@@ -88,6 +136,12 @@ struct sgio_cmd {
        int nr;
 };
 
+struct sgio_trim {
+       char *unmap_param;
+       unsigned int unmap_range_count;
+       struct io_u **trim_io_us;
+};
+
 struct sgio_data {
        struct sgio_cmd *cmds;
        struct io_u **events;
@@ -96,8 +150,18 @@ struct sgio_data {
        void *sgbuf;
        unsigned int bs;
        int type_checked;
+       struct sgio_trim **trim_queues;
+       int current_queue;
+#ifdef FIO_SGIO_DEBUG
+       unsigned int *trim_queue_map;
+#endif
 };
 
+static inline bool sgio_unbuffered(struct thread_data *td)
+{
+       return (td->o.odirect || td->o.sync_io);
+}
+
 static void sgio_hdr_init(struct sgio_data *sd, struct sg_io_hdr *hdr,
                          struct io_u *io_u, int fs)
 {
@@ -113,6 +177,7 @@ static void sgio_hdr_init(struct sgio_data *sd, struct sg_io_hdr *hdr,
        hdr->mx_sb_len = sizeof(sc->sb);
        hdr->pack_id = io_u->index;
        hdr->usr_ptr = io_u;
+       hdr->timeout = SCSI_TIMEOUT_MS;
 
        if (fs) {
                hdr->dxferp = io_u->xfer_buf;
@@ -165,10 +230,11 @@ static int fio_sgio_getevents(struct thread_data *td, unsigned int min,
                              const struct timespec fio_unused *t)
 {
        struct sgio_data *sd = td->io_ops_data;
-       int left = max, eventNum, ret, r = 0;
+       int left = max, eventNum, ret, r = 0, trims = 0;
        void *buf = sd->sgbuf;
-       unsigned int i, events;
+       unsigned int i, j, events;
        struct fio_file *f;
+       struct io_u *io_u;
 
        /*
         * Fill in the file descriptors
@@ -186,10 +252,20 @@ static int fio_sgio_getevents(struct thread_data *td, unsigned int min,
                sd->pfds[i].events = POLLIN;
        }
 
-       while (left) {
+       /*
+       ** There are two counters here:
+       **  - number of SCSI commands completed
+       **  - number of io_us completed
+       **
+       ** These are the same with reads and writes, but
+       ** could differ with trim/unmap commands because
+       ** a single unmap can include multiple io_us
+       */
+
+       while (left > 0) {
                char *p;
 
-               dprint(FD_IO, "sgio_getevents: sd %p: left=%d\n", sd, left);
+               dprint(FD_IO, "sgio_getevents: sd %p: min=%d, max=%d, left=%d\n", sd, min, max, left);
 
                do {
                        if (!min)
@@ -217,15 +293,21 @@ re_read:
                for_each_file(td, f, i) {
                        for (eventNum = 0; eventNum < left; eventNum++) {
                                ret = sg_fd_read(f->fd, p, sizeof(struct sg_io_hdr));
-                               dprint(FD_IO, "sgio_getevents: ret: %d\n", ret);
+                               dprint(FD_IO, "sgio_getevents: sg_fd_read ret: %d\n", ret);
                                if (ret) {
                                        r = -ret;
                                        td_verror(td, r, "sg_read");
                                        break;
                                }
+                               io_u = ((struct sg_io_hdr *)p)->usr_ptr;
+                               if (io_u->ddir == DDIR_TRIM) {
+                                       events += sd->trim_queues[io_u->index]->unmap_range_count;
+                                       eventNum += sd->trim_queues[io_u->index]->unmap_range_count - 1;
+                               } else
+                                       events++;
+
                                p += sizeof(struct sg_io_hdr);
-                               events++;
-                               dprint(FD_IO, "sgio_getevents: events: %d\n", events);
+                               dprint(FD_IO, "sgio_getevents: events: %d, eventNum: %d, left: %d\n", events, eventNum, left);
                        }
                }
 
@@ -241,14 +323,38 @@ re_read:
 
                for (i = 0; i < events; i++) {
                        struct sg_io_hdr *hdr = (struct sg_io_hdr *) buf + i;
-                       sd->events[i] = hdr->usr_ptr;
+                       sd->events[i + trims] = hdr->usr_ptr;
+                       io_u = (struct io_u *)(hdr->usr_ptr);
 
-                       /* record if an io error occurred, ignore resid */
                        if (hdr->info & SG_INFO_CHECK) {
-                               struct io_u *io_u;
-                               io_u = (struct io_u *)(hdr->usr_ptr);
+                               /* record if an io error occurred, ignore resid */
                                memcpy(&io_u->hdr, hdr, sizeof(struct sg_io_hdr));
-                               sd->events[i]->error = EIO;
+                               sd->events[i + trims]->error = EIO;
+                       }
+
+                       if (io_u->ddir == DDIR_TRIM) {
+                               struct sgio_trim *st = sd->trim_queues[io_u->index];
+#ifdef FIO_SGIO_DEBUG
+                               assert(st->trim_io_us[0] == io_u);
+                               assert(sd->trim_queue_map[io_u->index] == io_u->index);
+                               dprint(FD_IO, "sgio_getevents: reaping %d io_us from trim queue %d\n", st->unmap_range_count, io_u->index);
+                               dprint(FD_IO, "sgio_getevents: reaped io_u %d and stored in events[%d]\n", io_u->index, i+trims);
+#endif
+                               for (j = 1; j < st->unmap_range_count; j++) {
+                                       ++trims;
+                                       sd->events[i + trims] = st->trim_io_us[j];
+#ifdef FIO_SGIO_DEBUG
+                                       dprint(FD_IO, "sgio_getevents: reaped io_u %d and stored in events[%d]\n", st->trim_io_us[j]->index, i+trims);
+                                       assert(sd->trim_queue_map[st->trim_io_us[j]->index] == io_u->index);
+#endif
+                                       if (hdr->info & SG_INFO_CHECK) {
+                                               /* record if an io error occurred, ignore resid */
+                                               memcpy(&st->trim_io_us[j]->hdr, hdr, sizeof(struct sg_io_hdr));
+                                               sd->events[i + trims]->error = EIO;
+                                       }
+                               }
+                               events -= st->unmap_range_count - 1;
+                               st->unmap_range_count = 0;
                        }
                }
        }
@@ -287,7 +393,8 @@ static enum fio_q_status fio_sgio_ioctl_doio(struct thread_data *td,
        return FIO_Q_COMPLETED;
 }
 
-static int fio_sgio_rw_doio(struct fio_file *f, struct io_u *io_u, int do_sync)
+static enum fio_q_status fio_sgio_rw_doio(struct fio_file *f,
+                                         struct io_u *io_u, int do_sync)
 {
        struct sg_io_hdr *hdr = &io_u->hdr;
        int ret;
@@ -311,10 +418,11 @@ static int fio_sgio_rw_doio(struct fio_file *f, struct io_u *io_u, int do_sync)
        return FIO_Q_QUEUED;
 }
 
-static int fio_sgio_doio(struct thread_data *td, struct io_u *io_u, int do_sync)
+static enum fio_q_status fio_sgio_doio(struct thread_data *td,
+                                      struct io_u *io_u, int do_sync)
 {
        struct fio_file *f = io_u->file;
-       int ret;
+       enum fio_q_status ret;
 
        if (f->filetype == FIO_TYPE_BLOCK) {
                ret = fio_sgio_ioctl_doio(td, f, io_u);
@@ -328,12 +436,41 @@ static int fio_sgio_doio(struct thread_data *td, struct io_u *io_u, int do_sync)
        return ret;
 }
 
+static void fio_sgio_rw_lba(struct sg_io_hdr *hdr, unsigned long long lba,
+                           unsigned long long nr_blocks)
+{
+       if (lba < MAX_10B_LBA) {
+               hdr->cmdp[2] = (unsigned char) ((lba >> 24) & 0xff);
+               hdr->cmdp[3] = (unsigned char) ((lba >> 16) & 0xff);
+               hdr->cmdp[4] = (unsigned char) ((lba >>  8) & 0xff);
+               hdr->cmdp[5] = (unsigned char) (lba & 0xff);
+               hdr->cmdp[7] = (unsigned char) ((nr_blocks >> 8) & 0xff);
+               hdr->cmdp[8] = (unsigned char) (nr_blocks & 0xff);
+       } else {
+               hdr->cmdp[2] = (unsigned char) ((lba >> 56) & 0xff);
+               hdr->cmdp[3] = (unsigned char) ((lba >> 48) & 0xff);
+               hdr->cmdp[4] = (unsigned char) ((lba >> 40) & 0xff);
+               hdr->cmdp[5] = (unsigned char) ((lba >> 32) & 0xff);
+               hdr->cmdp[6] = (unsigned char) ((lba >> 24) & 0xff);
+               hdr->cmdp[7] = (unsigned char) ((lba >> 16) & 0xff);
+               hdr->cmdp[8] = (unsigned char) ((lba >>  8) & 0xff);
+               hdr->cmdp[9] = (unsigned char) (lba & 0xff);
+               hdr->cmdp[10] = (unsigned char) ((nr_blocks >> 32) & 0xff);
+               hdr->cmdp[11] = (unsigned char) ((nr_blocks >> 16) & 0xff);
+               hdr->cmdp[12] = (unsigned char) ((nr_blocks >> 8) & 0xff);
+               hdr->cmdp[13] = (unsigned char) (nr_blocks & 0xff);
+       }
+
+       return;
+}
+
 static int fio_sgio_prep(struct thread_data *td, struct io_u *io_u)
 {
        struct sg_io_hdr *hdr = &io_u->hdr;
        struct sg_options *o = td->eo;
        struct sgio_data *sd = td->io_ops_data;
-       long long nr_blocks, lba;
+       unsigned long long nr_blocks, lba;
+       int offset;
 
        if (io_u->xfer_buflen & (sd->bs - 1)) {
                log_err("read/write not sector aligned\n");
@@ -355,6 +492,8 @@ static int fio_sgio_prep(struct thread_data *td, struct io_u *io_u)
                if (o->readfua)
                        hdr->cmdp[1] |= 0x08;
 
+               fio_sgio_rw_lba(hdr, lba, nr_blocks);
+
        } else if (io_u->ddir == DDIR_WRITE) {
                sgio_hdr_init(sd, hdr, io_u, 1);
 
@@ -383,58 +522,111 @@ static int fio_sgio_prep(struct thread_data *td, struct io_u *io_u)
                                hdr->cmdp[0] = 0x93; // write same(16)
                        break;
                };
-       } else {
+
+               fio_sgio_rw_lba(hdr, lba, nr_blocks);
+
+       } else if (io_u->ddir == DDIR_TRIM) {
+               struct sgio_trim *st;
+
+               if (sd->current_queue == -1) {
+                       sgio_hdr_init(sd, hdr, io_u, 0);
+
+                       hdr->cmd_len = 10;
+                       hdr->dxfer_direction = SG_DXFER_TO_DEV;
+                       hdr->cmdp[0] = 0x42; // unmap
+                       sd->current_queue = io_u->index;
+                       st = sd->trim_queues[sd->current_queue];
+                       hdr->dxferp = st->unmap_param;
+#ifdef FIO_SGIO_DEBUG
+                       assert(sd->trim_queues[io_u->index]->unmap_range_count == 0);
+                       dprint(FD_IO, "sg: creating new queue based on io_u %d\n", io_u->index);
+#endif
+               }
+               else
+                       st = sd->trim_queues[sd->current_queue];
+
+               dprint(FD_IO, "sg: adding io_u %d to trim queue %d\n", io_u->index, sd->current_queue);
+               st->trim_io_us[st->unmap_range_count] = io_u;
+#ifdef FIO_SGIO_DEBUG
+               sd->trim_queue_map[io_u->index] = sd->current_queue;
+#endif
+
+               offset = 8 + 16 * st->unmap_range_count;
+               st->unmap_param[offset] = (unsigned char) ((lba >> 56) & 0xff);
+               st->unmap_param[offset+1] = (unsigned char) ((lba >> 48) & 0xff);
+               st->unmap_param[offset+2] = (unsigned char) ((lba >> 40) & 0xff);
+               st->unmap_param[offset+3] = (unsigned char) ((lba >> 32) & 0xff);
+               st->unmap_param[offset+4] = (unsigned char) ((lba >> 24) & 0xff);
+               st->unmap_param[offset+5] = (unsigned char) ((lba >> 16) & 0xff);
+               st->unmap_param[offset+6] = (unsigned char) ((lba >>  8) & 0xff);
+               st->unmap_param[offset+7] = (unsigned char) (lba & 0xff);
+               st->unmap_param[offset+8] = (unsigned char) ((nr_blocks >> 32) & 0xff);
+               st->unmap_param[offset+9] = (unsigned char) ((nr_blocks >> 16) & 0xff);
+               st->unmap_param[offset+10] = (unsigned char) ((nr_blocks >> 8) & 0xff);
+               st->unmap_param[offset+11] = (unsigned char) (nr_blocks & 0xff);
+
+               st->unmap_range_count++;
+
+       } else if (ddir_sync(io_u->ddir)) {
                sgio_hdr_init(sd, hdr, io_u, 0);
                hdr->dxfer_direction = SG_DXFER_NONE;
                if (lba < MAX_10B_LBA)
                        hdr->cmdp[0] = 0x35; // synccache(10)
                else
                        hdr->cmdp[0] = 0x91; // synccache(16)
-       }
+       } else
+               assert(0);
 
-       /*
-        * for synccache, we leave lba and length to 0 to sync all
-        * blocks on medium.
-        */
-       if (hdr->dxfer_direction != SG_DXFER_NONE) {
-               if (lba < MAX_10B_LBA) {
-                       hdr->cmdp[2] = (unsigned char) ((lba >> 24) & 0xff);
-                       hdr->cmdp[3] = (unsigned char) ((lba >> 16) & 0xff);
-                       hdr->cmdp[4] = (unsigned char) ((lba >>  8) & 0xff);
-                       hdr->cmdp[5] = (unsigned char) (lba & 0xff);
-                       hdr->cmdp[7] = (unsigned char) ((nr_blocks >> 8) & 0xff);
-                       hdr->cmdp[8] = (unsigned char) (nr_blocks & 0xff);
-               } else {
-                       hdr->cmdp[2] = (unsigned char) ((lba >> 56) & 0xff);
-                       hdr->cmdp[3] = (unsigned char) ((lba >> 48) & 0xff);
-                       hdr->cmdp[4] = (unsigned char) ((lba >> 40) & 0xff);
-                       hdr->cmdp[5] = (unsigned char) ((lba >> 32) & 0xff);
-                       hdr->cmdp[6] = (unsigned char) ((lba >> 24) & 0xff);
-                       hdr->cmdp[7] = (unsigned char) ((lba >> 16) & 0xff);
-                       hdr->cmdp[8] = (unsigned char) ((lba >>  8) & 0xff);
-                       hdr->cmdp[9] = (unsigned char) (lba & 0xff);
-                       hdr->cmdp[10] = (unsigned char) ((nr_blocks >> 32) & 0xff);
-                       hdr->cmdp[11] = (unsigned char) ((nr_blocks >> 16) & 0xff);
-                       hdr->cmdp[12] = (unsigned char) ((nr_blocks >> 8) & 0xff);
-                       hdr->cmdp[13] = (unsigned char) (nr_blocks & 0xff);
-               }
-       }
-
-       hdr->timeout = SCSI_TIMEOUT_MS;
        return 0;
 }
 
+static void fio_sgio_unmap_setup(struct sg_io_hdr *hdr, struct sgio_trim *st)
+{
+       hdr->dxfer_len = st->unmap_range_count * 16 + 8;
+       hdr->cmdp[7] = (unsigned char) (((st->unmap_range_count * 16 + 8) >> 8) & 0xff);
+       hdr->cmdp[8] = (unsigned char) ((st->unmap_range_count * 16 + 8) & 0xff);
+
+       st->unmap_param[0] = (unsigned char) (((16 * st->unmap_range_count + 6) >> 8) & 0xff);
+       st->unmap_param[1] = (unsigned char)  ((16 * st->unmap_range_count + 6) & 0xff);
+       st->unmap_param[2] = (unsigned char) (((16 * st->unmap_range_count) >> 8) & 0xff);
+       st->unmap_param[3] = (unsigned char)  ((16 * st->unmap_range_count) & 0xff);
+
+       return;
+}
+
 static enum fio_q_status fio_sgio_queue(struct thread_data *td,
                                        struct io_u *io_u)
 {
        struct sg_io_hdr *hdr = &io_u->hdr;
+       struct sgio_data *sd = td->io_ops_data;
        int ret, do_sync = 0;
 
        fio_ro_check(td, io_u);
 
-       if (td->o.sync_io || td->o.odirect || ddir_sync(io_u->ddir))
+       if (sgio_unbuffered(td) || ddir_sync(io_u->ddir))
                do_sync = 1;
 
+       if (io_u->ddir == DDIR_TRIM) {
+               if (do_sync || io_u->file->filetype == FIO_TYPE_BLOCK) {
+                       struct sgio_trim *st = sd->trim_queues[sd->current_queue];
+
+                       /* finish cdb setup for unmap because we are
+                       ** doing unmap commands synchronously */
+#ifdef FIO_SGIO_DEBUG
+                       assert(st->unmap_range_count == 1);
+                       assert(io_u == st->trim_io_us[0]);
+#endif
+                       hdr = &io_u->hdr;
+
+                       fio_sgio_unmap_setup(hdr, st);
+
+                       st->unmap_range_count = 0;
+                       sd->current_queue = -1;
+               } else
+                       /* queue up trim ranges and submit in commit() */
+                       return FIO_Q_QUEUED;
+       }
+
        ret = fio_sgio_doio(td, io_u, do_sync);
 
        if (ret < 0)
@@ -442,6 +634,14 @@ static enum fio_q_status fio_sgio_queue(struct thread_data *td,
        else if (hdr->status) {
                io_u->resid = hdr->resid;
                io_u->error = EIO;
+       } else if (td->io_ops->commit != NULL) {
+               if (do_sync && !ddir_sync(io_u->ddir)) {
+                       io_u_mark_submit(td, 1);
+                       io_u_mark_complete(td, 1);
+               } else if (io_u->ddir == DDIR_READ || io_u->ddir == DDIR_WRITE) {
+                       io_u_mark_submit(td, 1);
+                       io_u_queued(td, io_u);
+               }
        }
 
        if (io_u->error) {
@@ -452,6 +652,61 @@ static enum fio_q_status fio_sgio_queue(struct thread_data *td,
        return ret;
 }
 
+static int fio_sgio_commit(struct thread_data *td)
+{
+       struct sgio_data *sd = td->io_ops_data;
+       struct sgio_trim *st;
+       struct io_u *io_u;
+       struct sg_io_hdr *hdr;
+       struct timespec now;
+       unsigned int i;
+       int ret;
+
+       if (sd->current_queue == -1)
+               return 0;
+
+       st = sd->trim_queues[sd->current_queue];
+       io_u = st->trim_io_us[0];
+       hdr = &io_u->hdr;
+
+       fio_sgio_unmap_setup(hdr, st);
+
+       sd->current_queue = -1;
+
+       ret = fio_sgio_rw_doio(io_u->file, io_u, 0);
+
+       if (ret < 0)
+               for (i = 0; i < st->unmap_range_count; i++)
+                       st->trim_io_us[i]->error = errno;
+       else if (hdr->status)
+               for (i = 0; i < st->unmap_range_count; i++) {
+                       st->trim_io_us[i]->resid = hdr->resid;
+                       st->trim_io_us[i]->error = EIO;
+               }
+       else {
+               if (fio_fill_issue_time(td)) {
+                       fio_gettime(&now, NULL);
+                       for (i = 0; i < st->unmap_range_count; i++) {
+                               struct io_u *io_u = st->trim_io_us[i];
+
+                               memcpy(&io_u->issue_time, &now, sizeof(now));
+                               io_u_queued(td, io_u);
+                       }
+               }
+               io_u_mark_submit(td, st->unmap_range_count);
+       }
+
+       if (io_u->error) {
+               td_verror(td, io_u->error, "xfer");
+               return 0;
+       }
+
+       if (ret == FIO_Q_QUEUED)
+               return 0;
+       else
+               return ret;
+}
+
 static struct io_u *fio_sgio_event(struct thread_data *td, int event)
 {
        struct sgio_data *sd = td->io_ops_data;
@@ -553,6 +808,7 @@ static int fio_sgio_read_capacity(struct thread_data *td, unsigned int *bs,
 static void fio_sgio_cleanup(struct thread_data *td)
 {
        struct sgio_data *sd = td->io_ops_data;
+       int i;
 
        if (sd) {
                free(sd->events);
@@ -560,6 +816,17 @@ static void fio_sgio_cleanup(struct thread_data *td)
                free(sd->fd_flags);
                free(sd->pfds);
                free(sd->sgbuf);
+#ifdef FIO_SGIO_DEBUG
+               free(sd->trim_queue_map);
+#endif
+
+               for (i = 0; i < td->o.iodepth; i++) {
+                       free(sd->trim_queues[i]->unmap_param);
+                       free(sd->trim_queues[i]->trim_io_us);
+                       free(sd->trim_queues[i]);
+               }
+
+               free(sd->trim_queues);
                free(sd);
        }
 }
@@ -567,20 +834,30 @@ static void fio_sgio_cleanup(struct thread_data *td)
 static int fio_sgio_init(struct thread_data *td)
 {
        struct sgio_data *sd;
+       struct sgio_trim *st;
+       int i;
 
-       sd = malloc(sizeof(*sd));
-       memset(sd, 0, sizeof(*sd));
-       sd->cmds = malloc(td->o.iodepth * sizeof(struct sgio_cmd));
-       memset(sd->cmds, 0, td->o.iodepth * sizeof(struct sgio_cmd));
-       sd->events = malloc(td->o.iodepth * sizeof(struct io_u *));
-       memset(sd->events, 0, td->o.iodepth * sizeof(struct io_u *));
-       sd->pfds = malloc(sizeof(struct pollfd) * td->o.nr_files);
-       memset(sd->pfds, 0, sizeof(struct pollfd) * td->o.nr_files);
-       sd->fd_flags = malloc(sizeof(int) * td->o.nr_files);
-       memset(sd->fd_flags, 0, sizeof(int) * td->o.nr_files);
-       sd->sgbuf = malloc(sizeof(struct sg_io_hdr) * td->o.iodepth);
-       memset(sd->sgbuf, 0, sizeof(struct sg_io_hdr) * td->o.iodepth);
+       sd = calloc(1, sizeof(*sd));
+       sd->cmds = calloc(td->o.iodepth, sizeof(struct sgio_cmd));
+       sd->sgbuf = calloc(td->o.iodepth, sizeof(struct sg_io_hdr));
+       sd->events = calloc(td->o.iodepth, sizeof(struct io_u *));
+       sd->pfds = calloc(td->o.nr_files, sizeof(struct pollfd));
+       sd->fd_flags = calloc(td->o.nr_files, sizeof(int));
        sd->type_checked = 0;
+
+       sd->trim_queues = calloc(td->o.iodepth, sizeof(struct sgio_trim *));
+       sd->current_queue = -1;
+#ifdef FIO_SGIO_DEBUG
+       sd->trim_queue_map = calloc(td->o.iodepth, sizeof(int));
+#endif
+       for (i = 0; i < td->o.iodepth; i++) {
+               sd->trim_queues[i] = calloc(1, sizeof(struct sgio_trim));
+               st = sd->trim_queues[i];
+               st->unmap_param = calloc(td->o.iodepth + 1, sizeof(char[16]));
+               st->unmap_range_count = 0;
+               st->trim_io_us = calloc(td->o.iodepth, sizeof(struct io_u *));
+       }
+
        td->io_ops_data = sd;
 
        /*
@@ -632,6 +909,12 @@ static int fio_sgio_type_check(struct thread_data *td, struct fio_file *f)
        if (f->filetype == FIO_TYPE_BLOCK) {
                td->io_ops->getevents = NULL;
                td->io_ops->event = NULL;
+               td->io_ops->commit = NULL;
+               /*
+               ** Setting these functions to null may cause problems
+               ** with filename=/dev/sda:/dev/sg0 since we are only
+               ** considering a single file
+               */
        }
        sd->type_checked = 1;
 
@@ -848,6 +1131,23 @@ static char *fio_sgio_errdetails(struct io_u *io_u)
                        snprintf(msgchunk, MAXMSGCHUNK, "SG Driver: %d bytes out of %d not transferred. ", hdr->resid, hdr->dxfer_len);
                        strlcat(msg, msgchunk, MAXERRDETAIL);
                }
+               if (hdr->cmdp) {
+                       strlcat(msg, "cdb:", MAXERRDETAIL);
+                       for (i = 0; i < hdr->cmd_len; i++) {
+                               snprintf(msgchunk, MAXMSGCHUNK, " %02x", hdr->cmdp[i]);
+                               strlcat(msg, msgchunk, MAXERRDETAIL);
+                       }
+                       strlcat(msg, ". ", MAXERRDETAIL);
+                       if (io_u->ddir == DDIR_TRIM) {
+                               unsigned char *param_list = hdr->dxferp;
+                               strlcat(msg, "dxferp:", MAXERRDETAIL);
+                               for (i = 0; i < hdr->dxfer_len; i++) {
+                                       snprintf(msgchunk, MAXMSGCHUNK, " %02x", param_list[i]);
+                                       strlcat(msg, msgchunk, MAXERRDETAIL);
+                               }
+                               strlcat(msg, ". ", MAXERRDETAIL);
+                       }
+               }
        }
 
        if (!(hdr->info & SG_INFO_CHECK) && !strlen(msg))
@@ -906,6 +1206,7 @@ static struct ioengine_ops ioengine = {
        .init           = fio_sgio_init,
        .prep           = fio_sgio_prep,
        .queue          = fio_sgio_queue,
+       .commit         = fio_sgio_commit,
        .getevents      = fio_sgio_getevents,
        .errdetails     = fio_sgio_errdetails,
        .event          = fio_sgio_event,
index 151f31d..21e9593 100644 (file)
@@ -105,7 +105,7 @@ static struct io_u *fio_solarisaio_event(struct thread_data *td, int event)
        return sd->aio_events[event];
 }
 
-static int fio_solarisaio_queue(struct thread_data fio_unused *td,
+static enum fio_q_status fio_solarisaio_queue(struct thread_data fio_unused *td,
                              struct io_u *io_u)
 {
        struct solarisaio_data *sd = td->io_ops_data;
diff --git a/file.h b/file.h
index 8fd34b1..c0a547e 100644 (file)
--- a/file.h
+++ b/file.h
@@ -86,7 +86,7 @@ struct fio_file {
         */
        unsigned int major, minor;
        int fileno;
-       int bs;
+       unsigned long long bs;
        char *file_name;
 
        /*
index a2427a1..accb67a 100644 (file)
@@ -107,7 +107,7 @@ static int extend_file(struct thread_data *td, struct fio_file *f)
 {
        int new_layout = 0, unlink_file = 0, flags;
        unsigned long long left;
-       unsigned int bs;
+       unsigned long long bs;
        char *b = NULL;
 
        if (read_only) {
@@ -260,7 +260,7 @@ static bool pre_read_file(struct thread_data *td, struct fio_file *f)
 {
        int r, did_open = 0, old_runstate;
        unsigned long long left;
-       unsigned int bs;
+       unsigned long long bs;
        bool ret = true;
        char *b;
 
@@ -900,7 +900,7 @@ int setup_files(struct thread_data *td)
        unsigned int i, nr_fs_extra = 0;
        int err = 0, need_extend;
        int old_state;
-       const unsigned int bs = td_min_bs(td);
+       const unsigned long long bs = td_min_bs(td);
        uint64_t fs = 0;
 
        dprint(FD_FILE, "setup files\n");
diff --git a/fio.1 b/fio.1
index 6d2eba6..a446aba 100644 (file)
--- a/fio.1
+++ b/fio.1
@@ -757,7 +757,7 @@ Sequential reads.
 Sequential writes.
 .TP
 .B trim
-Sequential trims (Linux block devices only).
+Sequential trims (Linux block devices and SCSI character devices only).
 .TP
 .B randread
 Random reads.
@@ -766,7 +766,7 @@ Random reads.
 Random writes.
 .TP
 .B randtrim
-Random trims (Linux block devices only).
+Random trims (Linux block devices and SCSI character devices only).
 .TP
 .B rw,readwrite
 Sequential mixed reads and writes.
@@ -1524,7 +1524,8 @@ SCSI generic sg v3 I/O. May either be synchronous using the SG_IO
 ioctl, or if the target is an sg character device we use
 \fBread\fR\|(2) and \fBwrite\fR\|(2) for asynchronous
 I/O. Requires \fBfilename\fR option to specify either block or
-character devices. The sg engine includes engine specific options.
+character devices. This engine supports trim operations. The
+sg engine includes engine specific options.
 .TP
 .B null
 Doesn't transfer any data, just pretends to. This is mainly used to
diff --git a/fio.h b/fio.h
index 3ac552b..685aab1 100644 (file)
--- a/fio.h
+++ b/fio.h
@@ -736,17 +736,17 @@ static inline bool should_check_rate(struct thread_data *td)
        return ddir_rw_sum(td->bytes_done) != 0;
 }
 
-static inline unsigned int td_max_bs(struct thread_data *td)
+static inline unsigned long long td_max_bs(struct thread_data *td)
 {
-       unsigned int max_bs;
+       unsigned long long max_bs;
 
        max_bs = max(td->o.max_bs[DDIR_READ], td->o.max_bs[DDIR_WRITE]);
        return max(td->o.max_bs[DDIR_TRIM], max_bs);
 }
 
-static inline unsigned int td_min_bs(struct thread_data *td)
+static inline unsigned long long td_min_bs(struct thread_data *td)
 {
-       unsigned int min_bs;
+       unsigned long long min_bs;
 
        min_bs = min(td->o.min_bs[DDIR_READ], td->o.min_bs[DDIR_WRITE]);
        return min(td->o.min_bs[DDIR_TRIM], min_bs);
diff --git a/flist.h b/flist.h
index 2ca3d77..5437cd8 100644 (file)
--- a/flist.h
+++ b/flist.h
@@ -4,8 +4,8 @@
 #include <stdlib.h>
 #include <stddef.h>
 
-#define container_of(ptr, type, member) ({                     \
-       const typeof( ((type *)0)->member ) *__mptr = (ptr);    \
+#define container_of(ptr, type, member)  ({                    \
+       const __typeof__( ((type *)0)->member ) *__mptr = (ptr);        \
        (type *)( (char *)__mptr - offsetof(type,member) );})
 
 /*
index bcd7a88..7e5071d 100644 (file)
--- a/gclient.c
+++ b/gclient.c
@@ -1,4 +1,4 @@
-#include <malloc.h>
+#include <stdlib.h>
 #include <string.h>
 
 #include <glib.h>
index 43bdaba..1ebcb27 100644 (file)
--- a/gerror.c
+++ b/gerror.c
@@ -1,5 +1,5 @@
 #include <locale.h>
-#include <malloc.h>
+#include <stdlib.h>
 #include <string.h>
 #include <stdarg.h>
 
diff --git a/gfio.c b/gfio.c
index d222a1c..f59238c 100644 (file)
--- a/gfio.c
+++ b/gfio.c
@@ -22,8 +22,9 @@
  *
  */
 #include <locale.h>
-#include <malloc.h>
+#include <stdlib.h>
 #include <string.h>
+#include <libgen.h>
 
 #include <glib.h>
 #include <cairo.h>
index 16938ed..f44254b 100644 (file)
@@ -1,5 +1,5 @@
 #include <locale.h>
-#include <malloc.h>
+#include <stdlib.h>
 #include <string.h>
 
 #include <glib.h>
diff --git a/graph.c b/graph.c
index f82b52a..7a17417 100644 (file)
--- a/graph.c
+++ b/graph.c
@@ -21,7 +21,7 @@
  *
  */
 #include <string.h>
-#include <malloc.h>
+#include <stdlib.h>
 #include <math.h>
 #include <assert.h>
 #include <stdlib.h>
diff --git a/init.c b/init.c
index af4cc6b..ede0a8b 100644 (file)
--- a/init.c
+++ b/init.c
@@ -531,7 +531,7 @@ static void put_job(struct thread_data *td)
 
 static int __setup_rate(struct thread_data *td, enum fio_ddir ddir)
 {
-       unsigned int bs = td->o.min_bs[ddir];
+       unsigned long long bs = td->o.min_bs[ddir];
 
        assert(ddir_rw(ddir));
 
@@ -891,7 +891,7 @@ static int fixup_options(struct thread_data *td)
         * If size is set but less than the min block size, complain
         */
        if (o->size && o->size < td_min_bs(td)) {
-               log_err("fio: size too small, must not be less than minimum block size: %llu < %u\n",
+               log_err("fio: size too small, must not be less than minimum block size: %llu < %llu\n",
                        (unsigned long long) o->size, td_min_bs(td));
                ret |= 1;
        }
@@ -2158,7 +2158,7 @@ static void usage(const char *name)
        printf("  --showcmd\t\tTurn a job file into command line options\n");
        printf("  --eta=when\t\tWhen ETA estimate should be printed\n");
        printf("            \t\tMay be \"always\", \"never\" or \"auto\"\n");
-       printf("  --eta-newline=time\tForce a new line for every 'time'");
+       printf("  --eta-newline=t\tForce a new line for every 't'");
        printf(" period passed\n");
        printf("  --status-interval=t\tForce full status dump every");
        printf(" 't' period passed\n");
diff --git a/io_u.c b/io_u.c
index 5221a78..c58dcf0 100644 (file)
--- a/io_u.c
+++ b/io_u.c
@@ -33,9 +33,9 @@ static bool random_map_free(struct fio_file *f, const uint64_t block)
  */
 static void mark_random_map(struct thread_data *td, struct io_u *io_u)
 {
-       unsigned int min_bs = td->o.min_bs[io_u->ddir];
+       unsigned long long min_bs = td->o.min_bs[io_u->ddir];
        struct fio_file *f = io_u->file;
-       unsigned int nr_blocks;
+       unsigned long long nr_blocks;
        uint64_t block;
 
        block = (io_u->offset - f->file_offset) / (uint64_t) min_bs;
@@ -503,19 +503,19 @@ static int get_next_offset(struct thread_data *td, struct io_u *io_u,
 }
 
 static inline bool io_u_fits(struct thread_data *td, struct io_u *io_u,
-                            unsigned int buflen)
+                            unsigned long long buflen)
 {
        struct fio_file *f = io_u->file;
 
        return io_u->offset + buflen <= f->io_size + get_start_offset(td, f);
 }
 
-static unsigned int get_next_buflen(struct thread_data *td, struct io_u *io_u,
+static unsigned long long get_next_buflen(struct thread_data *td, struct io_u *io_u,
                                    bool is_random)
 {
        int ddir = io_u->ddir;
-       unsigned int buflen = 0;
-       unsigned int minbs, maxbs;
+       unsigned long long buflen = 0;
+       unsigned long long minbs, maxbs;
        uint64_t frand_max, r;
        bool power_2;
 
@@ -541,7 +541,7 @@ static unsigned int get_next_buflen(struct thread_data *td, struct io_u *io_u,
                r = __rand(&td->bsrange_state[ddir]);
 
                if (!td->o.bssplit_nr[ddir]) {
-                       buflen = minbs + (unsigned int) ((double) maxbs *
+                       buflen = minbs + (unsigned long long) ((double) maxbs *
                                        (r / (frand_max + 1.0)));
                } else {
                        long long perc = 0;
@@ -891,7 +891,7 @@ static int fill_io_u(struct thread_data *td, struct io_u *io_u)
        }
 
        if (io_u->offset + io_u->buflen > io_u->file->real_file_size) {
-               dprint(FD_IO, "io_u %p, off=0x%llx + len=0x%lx exceeds file size=0x%llx\n",
+               dprint(FD_IO, "io_u %p, off=0x%llx + len=0x%llx exceeds file size=0x%llx\n",
                        io_u,
                        (unsigned long long) io_u->offset, io_u->buflen,
                        (unsigned long long) io_u->file->real_file_size);
@@ -1582,7 +1582,7 @@ static bool check_get_verify(struct thread_data *td, struct io_u *io_u)
  */
 static void small_content_scramble(struct io_u *io_u)
 {
-       unsigned int i, nr_blocks = io_u->buflen >> 9;
+       unsigned long long i, nr_blocks = io_u->buflen >> 9;
        unsigned int offset;
        uint64_t boffset, *iptr;
        char *p;
@@ -1726,7 +1726,7 @@ static void __io_u_log_error(struct thread_data *td, struct io_u *io_u)
        if (td_non_fatal_error(td, eb, io_u->error) && !td->o.error_dump)
                return;
 
-       log_err("fio: io_u error%s%s: %s: %s offset=%llu, buflen=%lu\n",
+       log_err("fio: io_u error%s%s: %s: %s offset=%llu, buflen=%llu\n",
                io_u->file ? " on file " : "",
                io_u->file ? io_u->file->file_name : "",
                strerror(io_u->error),
@@ -1892,7 +1892,7 @@ static void io_completed(struct thread_data *td, struct io_u **io_u_ptr,
        td->last_ddir = ddir;
 
        if (!io_u->error && ddir_rw(ddir)) {
-               unsigned int bytes = io_u->buflen - io_u->resid;
+               unsigned long long bytes = io_u->buflen - io_u->resid;
                int ret;
 
                td->io_blocks[ddir]++;
@@ -2082,8 +2082,8 @@ static void save_buf_state(struct thread_data *td, struct frand_state *rs)
                frand_copy(&td->buf_state_prev, rs);
 }
 
-void fill_io_buffer(struct thread_data *td, void *buf, unsigned int min_write,
-                   unsigned int max_bs)
+void fill_io_buffer(struct thread_data *td, void *buf, unsigned long long min_write,
+                   unsigned long long max_bs)
 {
        struct thread_options *o = &td->o;
 
@@ -2093,8 +2093,8 @@ void fill_io_buffer(struct thread_data *td, void *buf, unsigned int min_write,
        if (o->compress_percentage || o->dedupe_percentage) {
                unsigned int perc = td->o.compress_percentage;
                struct frand_state *rs;
-               unsigned int left = max_bs;
-               unsigned int this_write;
+               unsigned long long left = max_bs;
+               unsigned long long this_write;
 
                do {
                        rs = get_buf_state(td);
@@ -2103,7 +2103,7 @@ void fill_io_buffer(struct thread_data *td, void *buf, unsigned int min_write,
 
                        if (perc) {
                                this_write = min_not_zero(min_write,
-                                                       td->o.compress_chunk);
+                                                       (unsigned long long) td->o.compress_chunk);
 
                                fill_random_buf_percentage(rs, buf, perc,
                                        this_write, this_write,
@@ -2130,7 +2130,7 @@ void fill_io_buffer(struct thread_data *td, void *buf, unsigned int min_write,
  * "randomly" fill the buffer contents
  */
 void io_u_fill_buffer(struct thread_data *td, struct io_u *io_u,
-                     unsigned int min_write, unsigned int max_bs)
+                     unsigned long long min_write, unsigned long long max_bs)
 {
        io_u->buf_filled_len = 0;
        fill_io_buffer(td, io_u->buf, min_write, max_bs);
diff --git a/io_u.h b/io_u.h
index 4f433c3..9a423b2 100644 (file)
--- a/io_u.h
+++ b/io_u.h
@@ -51,7 +51,7 @@ struct io_u {
        /*
         * Allocated/set buffer and length
         */
-       unsigned long buflen;
+       unsigned long long buflen;
        unsigned long long offset;
        void *buf;
 
@@ -65,13 +65,13 @@ struct io_u {
         * partial transfers / residual data counts
         */
        void *xfer_buf;
-       unsigned long xfer_buflen;
+       unsigned long long xfer_buflen;
 
        /*
         * Parameter related to pre-filled buffers and
         * their size to handle variable block sizes.
         */
-       unsigned long buf_filled_len;
+       unsigned long long buf_filled_len;
 
        struct io_piece *ipo;
 
@@ -134,8 +134,8 @@ extern void io_u_queued(struct thread_data *, struct io_u *);
 extern int io_u_quiesce(struct thread_data *);
 extern void io_u_log_error(struct thread_data *, struct io_u *);
 extern void io_u_mark_depth(struct thread_data *, unsigned int);
-extern void fill_io_buffer(struct thread_data *, void *, unsigned int, unsigned int);
-extern void io_u_fill_buffer(struct thread_data *td, struct io_u *, unsigned int, unsigned int);
+extern void fill_io_buffer(struct thread_data *, void *, unsigned long long, unsigned long long);
+extern void io_u_fill_buffer(struct thread_data *td, struct io_u *, unsigned long long, unsigned long long);
 void io_u_mark_complete(struct thread_data *, unsigned int);
 void io_u_mark_submit(struct thread_data *, unsigned int);
 bool queue_full(const struct thread_data *);
@@ -149,13 +149,13 @@ static inline void dprint_io_u(struct io_u *io_u, const char *p)
        struct fio_file *f = io_u->file;
 
        if (f)
-               dprint(FD_IO, "%s: io_u %p: off=0x%llx,len=0x%lx,ddir=%d,file=%s\n",
+               dprint(FD_IO, "%s: io_u %p: off=0x%llx,len=0x%llx,ddir=%d,file=%s\n",
                                p, io_u,
                                (unsigned long long) io_u->offset,
                                io_u->buflen, io_u->ddir,
                                f->file_name);
        else
-               dprint(FD_IO, "%s: io_u %p: off=0x%llx,len=0x%lx,ddir=%d\n",
+               dprint(FD_IO, "%s: io_u %p: off=0x%llx,len=0x%llx,ddir=%d\n",
                                p, io_u,
                                (unsigned long long) io_u->offset,
                                io_u->buflen, io_u->ddir);
index d579682..e5fbcd4 100644 (file)
@@ -279,7 +279,7 @@ out:
 enum fio_q_status td_io_queue(struct thread_data *td, struct io_u *io_u)
 {
        const enum fio_ddir ddir = acct_ddir(io_u);
-       unsigned long buflen = io_u->xfer_buflen;
+       unsigned long long buflen = io_u->xfer_buflen;
        enum fio_q_status ret;
 
        dprint_io_u(io_u, "queue");
@@ -350,7 +350,7 @@ enum fio_q_status td_io_queue(struct thread_data *td, struct io_u *io_u)
                         "invalid block size. Try setting direct=0.\n");
        }
 
-       if (!td->io_ops->commit || io_u->ddir == DDIR_TRIM) {
+       if (!td->io_ops->commit) {
                io_u_mark_submit(td, 1);
                io_u_mark_complete(td, 1);
        }
diff --git a/iolog.c b/iolog.c
index 3b04195..eb38027 100644 (file)
--- a/iolog.c
+++ b/iolog.c
@@ -42,7 +42,7 @@ void log_io_u(const struct thread_data *td, const struct io_u *io_u)
        if (!td->o.write_iolog_file)
                return;
 
-       fprintf(td->iolog_f, "%s %s %llu %lu\n", io_u->file->file_name,
+       fprintf(td->iolog_f, "%s %s %llu %llu\n", io_u->file->file_name,
                                                io_ddir_name(io_u->ddir),
                                                io_u->offset, io_u->buflen);
 }
@@ -168,7 +168,7 @@ int read_iolog_get(struct thread_data *td, struct io_u *io_u)
                        io_u->buflen = ipo->len;
                        io_u->file = td->files[ipo->fileno];
                        get_file(io_u->file);
-                       dprint(FD_IO, "iolog: get %llu/%lu/%s\n", io_u->offset,
+                       dprint(FD_IO, "iolog: get %llu/%llu/%s\n", io_u->offset,
                                                io_u->buflen, io_u->file->file_name);
                        if (ipo->delay)
                                iolog_delay(td, ipo->delay);
@@ -774,8 +774,8 @@ static void flush_hist_samples(FILE *f, int hist_coarseness, void *samples,
                entry_before = flist_first_entry(&entry->list, struct io_u_plat_entry, list);
                io_u_plat_before = entry_before->io_u_plat;
 
-               fprintf(f, "%lu, %u, %u, ", (unsigned long) s->time,
-                                               io_sample_ddir(s), s->bs);
+               fprintf(f, "%lu, %u, %llu, ", (unsigned long) s->time,
+                                               io_sample_ddir(s), (unsigned long long) s->bs);
                for (j = 0; j < FIO_IO_U_PLAT_NR - stride; j += stride) {
                        fprintf(f, "%llu, ", (unsigned long long)
                                hist_sum(j, stride, io_u_plat, io_u_plat_before));
@@ -807,17 +807,17 @@ void flush_samples(FILE *f, void *samples, uint64_t sample_size)
                s = __get_sample(samples, log_offset, i);
 
                if (!log_offset) {
-                       fprintf(f, "%lu, %" PRId64 ", %u, %u\n",
+                       fprintf(f, "%lu, %" PRId64 ", %u, %llu\n",
                                        (unsigned long) s->time,
                                        s->data.val,
-                                       io_sample_ddir(s), s->bs);
+                                       io_sample_ddir(s), (unsigned long long) s->bs);
                } else {
                        struct io_sample_offset *so = (void *) s;
 
-                       fprintf(f, "%lu, %" PRId64 ", %u, %u, %llu\n",
+                       fprintf(f, "%lu, %" PRId64 ", %u, %llu, %llu\n",
                                        (unsigned long) s->time,
                                        s->data.val,
-                                       io_sample_ddir(s), s->bs,
+                                       io_sample_ddir(s), (unsigned long long) s->bs,
                                        (unsigned long long) so->offset);
                }
        }
diff --git a/iolog.h b/iolog.h
index a4e335a..3b8c901 100644 (file)
--- a/iolog.h
+++ b/iolog.h
@@ -42,7 +42,7 @@ struct io_sample {
        uint64_t time;
        union io_sample_data data;
        uint32_t __ddir;
-       uint32_t bs;
+       uint64_t bs;
 };
 
 struct io_sample_offset {
index 4047f23..454af0b 100644 (file)
@@ -156,10 +156,10 @@ static bool axmap_handler(struct axmap *axmap, uint64_t bit_nr,
                          void *), void *data)
 {
        struct axmap_level *al;
+       uint64_t index = bit_nr;
        int i;
 
        for (i = 0; i < axmap->nr_levels; i++) {
-               unsigned long index = ulog64(bit_nr, i);
                unsigned long offset = index >> UNIT_SHIFT;
                unsigned int bit = index & BLOCKS_PER_UNIT_MASK;
 
@@ -167,6 +167,9 @@ static bool axmap_handler(struct axmap *axmap, uint64_t bit_nr,
 
                if (func(al, offset, bit, data))
                        return true;
+
+               if (index)
+                       index >>= UNIT_SHIFT;
        }
 
        return false;
index afc78f0..ec0848c 100644 (file)
--- a/minmax.h
+++ b/minmax.h
@@ -3,23 +3,23 @@
 
 #ifndef min
 #define min(x,y) ({ \
-       typeof(x) _x = (x);     \
-       typeof(y) _y = (y);     \
+       __typeof__(x) _x = (x); \
+       __typeof__(y) _y = (y); \
        (void) (&_x == &_y);            \
        _x < _y ? _x : _y; })
 #endif
 
 #ifndef max
 #define max(x,y) ({ \
-       typeof(x) _x = (x);     \
-       typeof(y) _y = (y);     \
+       __typeof__(x) _x = (x); \
+       __typeof__(y) _y = (y); \
        (void) (&_x == &_y);            \
        _x > _y ? _x : _y; })
 #endif
 
 #define min_not_zero(x, y) ({          \
-       typeof(x) __x = (x);            \
-       typeof(y) __y = (y);            \
+       __typeof__(x) __x = (x);                \
+       __typeof__(y) __y = (y);                \
        __x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); })
 
 #endif
index a174e2c..4b46402 100644 (file)
--- a/options.c
+++ b/options.c
@@ -52,7 +52,7 @@ static int bs_cmp(const void *p1, const void *p2)
 
 struct split {
        unsigned int nr;
-       unsigned int val1[ZONESPLIT_MAX];
+       unsigned long long val1[ZONESPLIT_MAX];
        unsigned long long val2[ZONESPLIT_MAX];
 };
 
@@ -119,7 +119,7 @@ static int bssplit_ddir(struct thread_options *o, enum fio_ddir ddir, char *str,
                        bool data)
 {
        unsigned int i, perc, perc_missing;
-       unsigned int max_bs, min_bs;
+       unsigned long long max_bs, min_bs;
        struct split split;
 
        memset(&split, 0, sizeof(split));
@@ -2112,7 +2112,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                .name   = "bs",
                .lname  = "Block size",
                .alias  = "blocksize",
-               .type   = FIO_OPT_INT,
+               .type   = FIO_OPT_ULL,
                .off1   = offsetof(struct thread_options, bs[DDIR_READ]),
                .off2   = offsetof(struct thread_options, bs[DDIR_WRITE]),
                .off3   = offsetof(struct thread_options, bs[DDIR_TRIM]),
@@ -2129,7 +2129,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                .name   = "ba",
                .lname  = "Block size align",
                .alias  = "blockalign",
-               .type   = FIO_OPT_INT,
+               .type   = FIO_OPT_ULL,
                .off1   = offsetof(struct thread_options, ba[DDIR_READ]),
                .off2   = offsetof(struct thread_options, ba[DDIR_WRITE]),
                .off3   = offsetof(struct thread_options, ba[DDIR_TRIM]),
@@ -2163,7 +2163,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
        {
                .name   = "bssplit",
                .lname  = "Block size split",
-               .type   = FIO_OPT_STR,
+               .type   = FIO_OPT_STR_ULL,
                .cb     = str_bssplit_cb,
                .off1   = offsetof(struct thread_options, bssplit),
                .help   = "Set a specific mix of block sizes",
old mode 100755 (executable)
new mode 100644 (file)
index 87f93b6..4ed9f0b 100644 (file)
@@ -49,18 +49,18 @@ extern "C" {
 #define min(a, b) MIN(a, b) /* glue for linux kernel source */
 #define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
 
-#define ALIGN(x,a) __ALIGN_MASK(x,(typeof(x))(a)-1)
+#define ALIGN(x,a) __ALIGN_MASK(x,(__typeof__(x))(a)-1)
 #define __ALIGN_MASK(x,mask) (((x)+(mask))&~(mask))
 
 #define min_t(t,x,y) ({ \
-       typeof((x)) _x = (x); \
-       typeof((y)) _y = (y); \
+       __typeof__((x)) _x = (x); \
+       __typeof__((y)) _y = (y); \
        (_x < _y) ? _x : _y; \
 })
 
 #define max_t(t,x,y) ({ \
-       typeof((x)) _x = (x); \
-       typeof((y)) _y = (y); \
+       __typeof__((x)) _x = (x); \
+       __typeof__((y)) _y = (y); \
        (_x > _y) ? _x : _y; \
 })
 
diff --git a/parse.c b/parse.c
index 6261fca..194ad59 100644 (file)
--- a/parse.c
+++ b/parse.c
 static const char *opt_type_names[] = {
        "OPT_INVALID",
        "OPT_STR",
+       "OPT_STR_ULL",
        "OPT_STR_MULTI",
        "OPT_STR_VAL",
        "OPT_STR_VAL_TIME",
        "OPT_STR_STORE",
        "OPT_RANGE",
        "OPT_INT",
+       "OPT_ULL",
        "OPT_BOOL",
        "OPT_FLOAT_LIST",
        "OPT_STR_SET",
@@ -438,7 +440,7 @@ void strip_blank_end(char *p)
        *(s + 1) = '\0';
 }
 
-static int check_range_bytes(const char *str, long *val, void *data)
+static int check_range_bytes(const char *str, long long *val, void *data)
 {
        long long __val;
 
@@ -507,7 +509,8 @@ static int __handle_option(const struct fio_option *o, const char *ptr,
        int il=0, *ilp;
        fio_fp64_t *flp;
        long long ull, *ullp;
-       long ul1, ul2;
+       long ul2;
+       long long ull1, ull2;
        double uf;
        char **cp = NULL;
        int ret = 0, is_time = 0;
@@ -525,6 +528,7 @@ static int __handle_option(const struct fio_option *o, const char *ptr,
 
        switch (o->type) {
        case FIO_OPT_STR:
+       case FIO_OPT_STR_ULL:
        case FIO_OPT_STR_MULTI: {
                fio_opt_str_fn *fn = o->cb;
 
@@ -540,7 +544,11 @@ static int __handle_option(const struct fio_option *o, const char *ptr,
                                break;
                        if (!strncmp(vp->ival, ptr, str_match_len(vp, ptr))) {
                                ret = 0;
-                               if (o->off1)
+                               if (!o->off1)
+                                       continue;
+                               if (o->type == FIO_OPT_STR_ULL)
+                                       val_store(ullp, vp->oval, o->off1, vp->orval, data, o);
+                               else
                                        val_store(ilp, vp->oval, o->off1, vp->orval, data, o);
                                continue;
                        }
@@ -554,6 +562,8 @@ static int __handle_option(const struct fio_option *o, const char *ptr,
        }
        case FIO_OPT_STR_VAL_TIME:
                is_time = 1;
+               /* fall through */
+       case FIO_OPT_ULL:
        case FIO_OPT_INT:
        case FIO_OPT_STR_VAL: {
                fio_opt_str_val_fn *fn = o->cb;
@@ -584,7 +594,7 @@ static int __handle_option(const struct fio_option *o, const char *ptr,
 
                if (o->maxval && ull > o->maxval) {
                        log_err("max value out of range: %llu"
-                                       " (%u max)\n", ull, o->maxval);
+                                       " (%llu max)\n", ull, o->maxval);
                        return 1;
                }
                if (o->minval && ull < o->minval) {
@@ -636,6 +646,27 @@ static int __handle_option(const struct fio_option *o, const char *ptr,
                                                        val_store(ilp, ull, o->off3, 0, data, o);
                                        }
                                }
+                       } else if (o->type == FIO_OPT_ULL) {
+                               if (first)
+                                       val_store(ullp, ull, o->off1, 0, data, o);
+                               if (curr == 1) {
+                                       if (o->off2)
+                                               val_store(ullp, ull, o->off2, 0, data, o);
+                               }
+                               if (curr == 2) {
+                                       if (o->off3)
+                                               val_store(ullp, ull, o->off3, 0, data, o);
+                               }
+                               if (!more) {
+                                       if (curr < 1) {
+                                               if (o->off2)
+                                                       val_store(ullp, ull, o->off2, 0, data, o);
+                                       }
+                                       if (curr < 2) {
+                                               if (o->off3)
+                                                       val_store(ullp, ull, o->off3, 0, data, o);
+                                       }
+                               }
                        } else {
                                if (first)
                                        val_store(ullp, ull, o->off1, 0, data, o);
@@ -790,43 +821,43 @@ static int __handle_option(const struct fio_option *o, const char *ptr,
                p1 = tmp;
 
                ret = 1;
-               if (!check_range_bytes(p1, &ul1, data) &&
-                   !check_range_bytes(p2, &ul2, data)) {
+               if (!check_range_bytes(p1, &ull1, data) &&
+                       !check_range_bytes(p2, &ull2, data)) {
                        ret = 0;
-                       if (ul1 > ul2) {
-                               unsigned long foo = ul1;
+                       if (ull1 > ull2) {
+                               unsigned long long foo = ull1;
 
-                               ul1 = ul2;
-                               ul2 = foo;
+                               ull1 = ull2;
+                               ull2 = foo;
                        }
 
                        if (first) {
-                               val_store(ilp, ul1, o->off1, 0, data, o);
-                               val_store(ilp, ul2, o->off2, 0, data, o);
+                               val_store(ullp, ull1, o->off1, 0, data, o);
+                               val_store(ullp, ull2, o->off2, 0, data, o);
                        }
                        if (curr == 1) {
                                if (o->off3 && o->off4) {
-                                       val_store(ilp, ul1, o->off3, 0, data, o);
-                                       val_store(ilp, ul2, o->off4, 0, data, o);
+                                       val_store(ullp, ull1, o->off3, 0, data, o);
+                                       val_store(ullp, ull2, o->off4, 0, data, o);
                                }
                        }
                        if (curr == 2) {
                                if (o->off5 && o->off6) {
-                                       val_store(ilp, ul1, o->off5, 0, data, o);
-                                       val_store(ilp, ul2, o->off6, 0, data, o);
+                                       val_store(ullp, ull1, o->off5, 0, data, o);
+                                       val_store(ullp, ull2, o->off6, 0, data, o);
                                }
                        }
                        if (!more) {
                                if (curr < 1) {
                                        if (o->off3 && o->off4) {
-                                               val_store(ilp, ul1, o->off3, 0, data, o);
-                                               val_store(ilp, ul2, o->off4, 0, data, o);
+                                               val_store(ullp, ull1, o->off3, 0, data, o);
+                                               val_store(ullp, ull2, o->off4, 0, data, o);
                                        }
                                }
                                if (curr < 2) {
                                        if (o->off5 && o->off6) {
-                                               val_store(ilp, ul1, o->off5, 0, data, o);
-                                               val_store(ilp, ul2, o->off6, 0, data, o);
+                                               val_store(ullp, ull1, o->off5, 0, data, o);
+                                               val_store(ullp, ull2, o->off6, 0, data, o);
                                        }
                                }
                        }
@@ -851,7 +882,7 @@ static int __handle_option(const struct fio_option *o, const char *ptr,
                        break;
 
                if (o->maxval && il > (int) o->maxval) {
-                       log_err("max value out of range: %d (%d max)\n",
+                       log_err("max value out of range: %d (%llu max)\n",
                                                                il, o->maxval);
                        return 1;
                }
@@ -878,6 +909,7 @@ static int __handle_option(const struct fio_option *o, const char *ptr,
        }
        case FIO_OPT_DEPRECATED:
                ret = 1;
+               /* fall through */
        case FIO_OPT_SOFT_DEPRECATED:
                log_info("Option %s is deprecated\n", o->name);
                break;
@@ -1325,6 +1357,10 @@ static void option_init(struct fio_option *o)
                if (!o->maxval)
                        o->maxval = UINT_MAX;
        }
+       if (o->type == FIO_OPT_ULL) {
+               if (!o->maxval)
+                       o->maxval = ULLONG_MAX;
+       }
        if (o->type == FIO_OPT_STR_SET && o->def && !o->no_warn_def) {
                log_err("Option %s: string set option with"
                                " default will always be true\n", o->name);
diff --git a/parse.h b/parse.h
index 4de5e77..b47a02c 100644 (file)
--- a/parse.h
+++ b/parse.h
 enum fio_opt_type {
        FIO_OPT_INVALID = 0,
        FIO_OPT_STR,
+       FIO_OPT_STR_ULL,
        FIO_OPT_STR_MULTI,
        FIO_OPT_STR_VAL,
        FIO_OPT_STR_VAL_TIME,
        FIO_OPT_STR_STORE,
        FIO_OPT_RANGE,
        FIO_OPT_INT,
+       FIO_OPT_ULL,
        FIO_OPT_BOOL,
        FIO_OPT_FLOAT_LIST,
        FIO_OPT_STR_SET,
@@ -29,7 +31,7 @@ enum fio_opt_type {
  */
 struct value_pair {
        const char *ival;               /* string option */
-       unsigned int oval;              /* output value */
+       unsigned long long oval;/* output value */
        const char *help;               /* help text for sub option */
        int orval;                      /* OR value */
        void *cb;                       /* sub-option callback */
@@ -52,7 +54,7 @@ struct fio_option {
        unsigned int off4;
        unsigned int off5;
        unsigned int off6;
-       unsigned int maxval;            /* max and min value */
+       unsigned long long maxval;              /* max and min value */
        int minval;
        double maxfp;                   /* max and min floating value */
        double minfp;
index 7e7ffed..b966c66 100644 (file)
--- a/server.c
+++ b/server.c
@@ -1985,7 +1985,7 @@ int fio_send_iolog(struct thread_data *td, struct io_log *log, const char *name)
                        s->time         = cpu_to_le64(s->time);
                        s->data.val     = cpu_to_le64(s->data.val);
                        s->__ddir       = cpu_to_le32(s->__ddir);
-                       s->bs           = cpu_to_le32(s->bs);
+                       s->bs           = cpu_to_le64(s->bs);
 
                        if (log->log_offset) {
                                struct io_sample_offset *so = (void *) s;
index b48bbe1..37d2f76 100644 (file)
--- a/server.h
+++ b/server.h
@@ -48,7 +48,7 @@ struct fio_net_cmd_reply {
 };
 
 enum {
-       FIO_SERVER_VER                  = 73,
+       FIO_SERVER_VER                  = 74,
 
        FIO_SERVER_MAX_FRAGMENT_PDU     = 1024,
        FIO_SERVER_MAX_CMD_MB           = 2048,
diff --git a/stat.c b/stat.c
index a308eb8..82e79df 100644 (file)
--- a/stat.c
+++ b/stat.c
@@ -619,8 +619,8 @@ static int block_state_category(int block_state)
 
 static int compare_block_infos(const void *bs1, const void *bs2)
 {
-       uint32_t block1 = *(uint32_t *)bs1;
-       uint32_t block2 = *(uint32_t *)bs2;
+       uint64_t block1 = *(uint64_t *)bs1;
+       uint64_t block2 = *(uint64_t *)bs2;
        int state1 = BLOCK_INFO_STATE(block1);
        int state2 = BLOCK_INFO_STATE(block2);
        int bscat1 = block_state_category(state1);
@@ -1295,13 +1295,8 @@ static struct json_object *show_thread_status_json(struct thread_stat *ts,
        json_object_add_value_int(root, "majf", ts->majf);
        json_object_add_value_int(root, "minf", ts->minf);
 
-
-       /* Calc % distribution of IO depths, usecond, msecond latency */
+       /* Calc % distribution of IO depths */
        stat_calc_dist(ts->io_u_map, ddir_rw_sum(ts->total_io_u), io_u_dist);
-       stat_calc_lat_n(ts, io_u_lat_n);
-       stat_calc_lat_u(ts, io_u_lat_u);
-       stat_calc_lat_m(ts, io_u_lat_m);
-
        tmp = json_create_object();
        json_object_add_value_object(root, "iodepth_level", tmp);
        /* Only show fixed 7 I/O depth levels*/
@@ -1314,6 +1309,44 @@ static struct json_object *show_thread_status_json(struct thread_stat *ts,
                json_object_add_value_float(tmp, (const char *)name, io_u_dist[i]);
        }
 
+       /* Calc % distribution of submit IO depths */
+       stat_calc_dist(ts->io_u_submit, ts->total_submit, io_u_dist);
+       tmp = json_create_object();
+       json_object_add_value_object(root, "iodepth_submit", tmp);
+       /* Only show fixed 7 I/O depth levels*/
+       for (i = 0; i < 7; i++) {
+               char name[20];
+               if (i == 0)
+                       snprintf(name, 20, "0");
+               else if (i < 6)
+                       snprintf(name, 20, "%d", 1 << (i+1));
+               else
+                       snprintf(name, 20, ">=%d", 1 << i);
+               json_object_add_value_float(tmp, (const char *)name, io_u_dist[i]);
+       }
+
+       /* Calc % distribution of completion IO depths */
+       stat_calc_dist(ts->io_u_complete, ts->total_complete, io_u_dist);
+       tmp = json_create_object();
+       json_object_add_value_object(root, "iodepth_complete", tmp);
+       /* Only show fixed 7 I/O depth levels*/
+       for (i = 0; i < 7; i++) {
+               char name[20];
+               if (i == 0)
+                       snprintf(name, 20, "0");
+               else if (i < 6)
+                       snprintf(name, 20, "%d", 1 << (i+1));
+               else
+                       snprintf(name, 20, ">=%d", 1 << i);
+               json_object_add_value_float(tmp, (const char *)name, io_u_dist[i]);
+       }
+
+       /* Calc % distribution of nsecond, usecond, msecond latency */
+       stat_calc_dist(ts->io_u_map, ddir_rw_sum(ts->total_io_u), io_u_dist);
+       stat_calc_lat_n(ts, io_u_lat_n);
+       stat_calc_lat_u(ts, io_u_lat_u);
+       stat_calc_lat_m(ts, io_u_lat_m);
+
        /* Nanosecond latency */
        tmp = json_create_object();
        json_object_add_value_object(root, "latency_ns", tmp);
@@ -2220,7 +2253,7 @@ static struct io_logs *get_cur_log(struct io_log *iolog)
 }
 
 static void __add_log_sample(struct io_log *iolog, union io_sample_data data,
-                            enum fio_ddir ddir, unsigned int bs,
+                            enum fio_ddir ddir, unsigned long long bs,
                             unsigned long t, uint64_t offset)
 {
        struct io_logs *cur_log;
@@ -2338,7 +2371,7 @@ static void _add_stat_to_log(struct io_log *iolog, unsigned long elapsed,
 static unsigned long add_log_sample(struct thread_data *td,
                                    struct io_log *iolog,
                                    union io_sample_data data,
-                                   enum fio_ddir ddir, unsigned int bs,
+                                   enum fio_ddir ddir, unsigned long long bs,
                                    uint64_t offset)
 {
        unsigned long elapsed, this_window;
@@ -2400,7 +2433,7 @@ void finalize_logs(struct thread_data *td, bool unit_logs)
                _add_stat_to_log(td->iops_log, elapsed, td->o.log_max != 0);
 }
 
-void add_agg_sample(union io_sample_data data, enum fio_ddir ddir, unsigned int bs)
+void add_agg_sample(union io_sample_data data, enum fio_ddir ddir, unsigned long long bs)
 {
        struct io_log *iolog;
 
@@ -2430,7 +2463,8 @@ static void add_clat_percentile_sample(struct thread_stat *ts,
 }
 
 void add_clat_sample(struct thread_data *td, enum fio_ddir ddir,
-                    unsigned long long nsec, unsigned int bs, uint64_t offset)
+                    unsigned long long nsec, unsigned long long bs,
+                    uint64_t offset)
 {
        unsigned long elapsed, this_window;
        struct thread_stat *ts = &td->ts;
@@ -2489,7 +2523,7 @@ void add_clat_sample(struct thread_data *td, enum fio_ddir ddir,
 }
 
 void add_slat_sample(struct thread_data *td, enum fio_ddir ddir,
-                    unsigned long usec, unsigned int bs, uint64_t offset)
+                    unsigned long usec, unsigned long long bs, uint64_t offset)
 {
        struct thread_stat *ts = &td->ts;
 
@@ -2507,7 +2541,8 @@ void add_slat_sample(struct thread_data *td, enum fio_ddir ddir,
 }
 
 void add_lat_sample(struct thread_data *td, enum fio_ddir ddir,
-                   unsigned long long nsec, unsigned int bs, uint64_t offset)
+                   unsigned long long nsec, unsigned long long bs,
+                   uint64_t offset)
 {
        struct thread_stat *ts = &td->ts;
 
@@ -2590,7 +2625,7 @@ static int __add_samples(struct thread_data *td, struct timespec *parent_tv,
                add_stat_sample(&stat[ddir], rate);
 
                if (log) {
-                       unsigned int bs = 0;
+                       unsigned long long bs = 0;
 
                        if (td->o.min_bs[ddir] == td->o.max_bs[ddir])
                                bs = td->o.min_bs[ddir];
diff --git a/stat.h b/stat.h
index c5b8185..5dcaae0 100644 (file)
--- a/stat.h
+++ b/stat.h
@@ -308,12 +308,12 @@ extern void update_rusage_stat(struct thread_data *);
 extern void clear_rusage_stat(struct thread_data *);
 
 extern void add_lat_sample(struct thread_data *, enum fio_ddir, unsigned long long,
-                               unsigned int, uint64_t);
+                               unsigned long long, uint64_t);
 extern void add_clat_sample(struct thread_data *, enum fio_ddir, unsigned long long,
-                               unsigned int, uint64_t);
+                               unsigned long long, uint64_t);
 extern void add_slat_sample(struct thread_data *, enum fio_ddir, unsigned long,
-                               unsigned int, uint64_t);
-extern void add_agg_sample(union io_sample_data, enum fio_ddir, unsigned int);
+                               unsigned long long, uint64_t);
+extern void add_agg_sample(union io_sample_data, enum fio_ddir, unsigned long long);
 extern void add_iops_sample(struct thread_data *, struct io_u *,
                                unsigned int);
 extern void add_bw_sample(struct thread_data *, struct io_u *,
diff --git a/t/sgunmap-perf.py b/t/sgunmap-perf.py
new file mode 100755 (executable)
index 0000000..fadbb85
--- /dev/null
@@ -0,0 +1,115 @@
+#!/usr/bin/python2.7
+#
+# sgunmap-test.py
+#
+# Basic performance testing using fio's sg ioengine
+#
+# USAGE
+# sgunmap-perf.py char-device block-device fio-executable
+#
+# EXAMPLE
+# t/sgunmap-perf.py /dev/sg1 /dev/sdb ./fio
+#
+# REQUIREMENTS
+# Python 2.6+
+#
+#
+
+from __future__ import absolute_import
+from __future__ import print_function
+import sys
+import json
+import argparse
+import subprocess
+from six.moves import range
+
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('cdev',
+                        help='character device target (e.g., /dev/sg0)')
+    parser.add_argument('bdev',
+                        help='block device target (e.g., /dev/sda)')
+    parser.add_argument('fioc',
+                        help='path to candidate fio executable (e.g., ./fio)')
+    parser.add_argument('fior',
+                        help='path to reference fio executable (e.g., ./fio)')
+    args = parser.parse_args()
+
+    return args
+
+
+def fulldevice(fio, dev, ioengine='psync', rw='trim', bs='1M'):
+    parameters = ["--name=test",
+                  "--output-format=json",
+                  "--random_generator=lfsr",
+                  "--bs={0}".format(bs),
+                  "--rw={0}".format(rw),
+                  "--ioengine={0}".format(ioengine),
+                  "--filename={0}".format(dev)]
+
+    output = subprocess.check_output([fio] + parameters)
+    jsondata = json.loads(output)
+    jobdata = jsondata['jobs'][0]
+    return jobdata
+
+
+def runtest(fio, dev, rw, qd, batch, bs='512', runtime='30s'):
+    parameters = ["--name=test",
+                  "--random_generator=tausworthe64",
+                  "--time_based",
+                  "--runtime={0}".format(runtime),
+                  "--output-format=json",
+                  "--ioengine=sg",
+                  "--blocksize={0}".format(bs),
+                  "--rw={0}".format(rw),
+                  "--filename={0}".format(dev),
+                  "--iodepth={0}".format(qd),
+                  "--iodepth_batch={0}".format(batch)]
+
+    output = subprocess.check_output([fio] + parameters)
+    jsondata = json.loads(output)
+    jobdata = jsondata['jobs'][0]
+#    print(parameters)
+
+    return jobdata
+
+
+def runtests(fio, dev, qd, batch, rw, bs='512', trials=5):
+    iops = []
+    for x in range(trials):
+        jd = runtest(fio, dev, rw, qd, batch, bs=bs)
+        total = jd['read']['iops'] + jd['write']['iops'] + jd['trim']['iops']
+#       print(total)
+        iops.extend([total])
+    return iops, (sum(iops) / trials)
+
+if __name__ == '__main__':
+    args = parse_args()
+
+    print("Trimming full device {0}".format(args.cdev))
+    fulldevice(args.fior, args.cdev, ioengine='sg')
+
+    print("Running rand read tests on {0}"
+        " with fio candidate build {1}".format(args.cdev, args.fioc))
+    randread, rrmean = runtests(args.fioc, args.cdev, 16, 1, 'randread',
+        trials=5)
+    print("IOPS mean {0}, trials {1}".format(rrmean, randread))
+
+    print("Running rand read tests on {0}"
+        " with fio reference build {1}".format(args.cdev, args.fior))
+    randread, rrmean = runtests(args.fior, args.cdev, 16, 1, 'randread',
+        trials=5)
+    print("IOPS mean {0}, trials {1}".format(rrmean, randread))
+
+    print("Running rand write tests on {0}"
+        " with fio candidate build {1}".format(args.cdev, args.fioc))
+    randwrite, rwmean = runtests(args.fioc, args.cdev, 16, 1, 'randwrite',
+        trials=5)
+    print("IOPS mean {0}, trials {1}".format(rwmean, randwrite))
+
+    print("Running rand write tests on {0}"
+        " with fio reference build {1}".format(args.cdev, args.fior))
+    randwrite, rwmean = runtests(args.fior, args.cdev, 16, 1, 'randwrite',
+        trials=5)
+    print("IOPS mean {0}, trials {1}".format(rwmean, randwrite))
diff --git a/t/sgunmap-test.py b/t/sgunmap-test.py
new file mode 100755 (executable)
index 0000000..d2caa5f
--- /dev/null
@@ -0,0 +1,173 @@
+#!/usr/bin/python2.7
+# Note: this script is python2 and python 3 compatible.
+#
+# sgunmap-test.py
+#
+# Limited functonality test for trim workloads using fio's sg ioengine
+# This checks only the three sets of reported iodepths
+#
+# !!!WARNING!!!
+# This script carries out destructive tests. Be sure that
+# there is no data you want to keep on the supplied devices.
+#
+# USAGE
+# sgunmap-test.py char-device block-device fio-executable
+#
+# EXAMPLE
+# t/sgunmap-test.py /dev/sg1 /dev/sdb ./fio
+#
+# REQUIREMENTS
+# Python 2.6+
+#
+# TEST MATRIX
+# For both char-dev and block-dev these are the expected
+# submit/complete IO depths
+#
+#                       blockdev                chardev
+#                       iodepth                 iodepth
+# R QD1                 sub/comp: 1-4=100%      sub/comp: 1-4=100%
+# W QD1                 sub/comp: 1-4=100%      sub/comp: 1-4=100%
+# T QD1                 sub/comp: 1-4=100%      sub/comp: 1-4=100%
+#
+# R QD16, batch8        sub/comp: 1-4=100%      sub/comp: 1-4=100%
+# W QD16, batch8        sub/comp: 1-4=100%      sub/comp: 1-4=100%
+# T QD16, batch8        sub/comp: 1-4=100%      sub/comp: 5-8=100%
+#
+# R QD16, batch16       sub/comp: 1-4=100%      sub/comp: 1-4=100%
+# W QD16, batch16       sub/comp: 1-4=100%      sub/comp: 1-4=100%
+# T QD16, batch16       sub/comp: 1-4=100%      sub/comp: 9-16=100%
+#
+
+from __future__ import absolute_import
+from __future__ import print_function
+import sys
+import json
+import argparse
+import traceback
+import subprocess
+from six.moves import range
+
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('chardev',
+                        help='character device target (e.g., /dev/sg0)')
+    parser.add_argument('blockdev',
+                        help='block device target (e.g., /dev/sda)')
+    parser.add_argument('fio',
+                        help='path to fio executable (e.g., ./fio)')
+    args = parser.parse_args()
+
+    return args
+
+#
+# With block devices,
+#     iodepth = 1 always
+#     submit = complete = 1-4 always
+# With character devices,
+# RW
+#     iodepth = qd
+#     submit = 1-4
+#     complete = 1-4 except for the IOs in flight
+#                when the job is ending
+# T
+#     iodepth = qd
+#     submit = qdbatch
+#     complete = qdbatch except for the IOs in flight
+#                when the job is ending
+#
+
+
+def check(jsondata, parameters, block, qd, qdbatch, rw):
+    iodepth = jsondata['iodepth_level']
+    submit = jsondata['iodepth_submit']
+    complete = jsondata['iodepth_complete']
+
+    try:
+        if block:
+            assert iodepth['1'] == 100.0
+            assert submit['4'] == 100.0
+            assert complete['4'] == 100.0
+        elif 'read' in rw or 'write' in rw:
+            assert iodepth[str(qd)] > 99.9
+            assert submit['4'] == 100.0
+            assert complete['4'] > 99.9
+        else:
+            if qdbatch <= 4:
+                batchkey = '4'
+            elif qdbatch > 64:
+                batchkey = '>=64'
+            else:
+                batchkey = str(qdbatch)
+            if qd >= 64:
+                qdkey = ">=64"
+            else:
+                qdkey = str(qd)
+            assert iodepth[qdkey] > 99
+            assert submit[batchkey] == 100.0
+            assert complete[batchkey] > 99
+    except AssertionError:
+        print("Assertion failed")
+        traceback.print_exc()
+        print(jsondata)
+        return
+
+    print("**********passed*********")
+
+
+def runalltests(args, qd, batch):
+    block = False
+    for dev in [args.chardev, args.blockdev]:
+        for rw in ["randread", "randwrite", "randtrim"]:
+            parameters = ["--name=test",
+                           "--time_based",
+                           "--runtime=30s",
+                           "--output-format=json",
+                           "--ioengine=sg",
+                           "--rw={0}".format(rw),
+                           "--filename={0}".format(dev),
+                           "--iodepth={0}".format(qd),
+                           "--iodepth_batch={0}".format(batch)]
+
+            print(parameters)
+            output = subprocess.check_output([args.fio] + parameters)
+            jsondata = json.loads(output)
+            jobdata = jsondata['jobs'][0]
+            check(jobdata, parameters, block, qd, batch, rw)
+        block = True
+
+
+def runcdevtrimtest(args, qd, batch):
+    parameters = ["--name=test",
+                   "--time_based",
+                   "--runtime=30s",
+                   "--output-format=json",
+                   "--ioengine=sg",
+                   "--rw=randtrim",
+                   "--filename={0}".format(args.chardev),
+                   "--iodepth={0}".format(qd),
+                   "--iodepth_batch={0}".format(batch)]
+
+    print(parameters)
+    output = subprocess.check_output([args.fio] + parameters)
+    jsondata = json.loads(output)
+    jobdata = jsondata['jobs'][0]
+    check(jobdata, parameters, False, qd, batch, "randtrim")
+
+
+if __name__ == '__main__':
+    args = parse_args()
+
+    runcdevtrimtest(args, 32, 2)
+    runcdevtrimtest(args, 32, 4)
+    runcdevtrimtest(args, 32, 8)
+    runcdevtrimtest(args, 64, 4)
+    runcdevtrimtest(args, 64, 8)
+    runcdevtrimtest(args, 64, 16)
+    runcdevtrimtest(args, 128, 8)
+    runcdevtrimtest(args, 128, 16)
+    runcdevtrimtest(args, 128, 32)
+
+    runalltests(args, 1, 1)
+    runalltests(args, 16, 2)
+    runalltests(args, 16, 16)
index 8d13b79..8adba48 100644 (file)
@@ -29,7 +29,7 @@ enum fio_memtype {
 #define ZONESPLIT_MAX  256
 
 struct bssplit {
-       uint32_t bs;
+       uint64_t bs;
        uint32_t perc;
 };
 
@@ -82,10 +82,10 @@ struct thread_options {
        unsigned long long start_offset;
        unsigned long long start_offset_align;
 
-       unsigned int bs[DDIR_RWDIR_CNT];
-       unsigned int ba[DDIR_RWDIR_CNT];
-       unsigned int min_bs[DDIR_RWDIR_CNT];
-       unsigned int max_bs[DDIR_RWDIR_CNT];
+       unsigned long long bs[DDIR_RWDIR_CNT];
+       unsigned long long ba[DDIR_RWDIR_CNT];
+       unsigned long long min_bs[DDIR_RWDIR_CNT];
+       unsigned long long max_bs[DDIR_RWDIR_CNT];
        struct bssplit *bssplit[DDIR_RWDIR_CNT];
        unsigned int bssplit_nr[DDIR_RWDIR_CNT];
 
@@ -164,7 +164,8 @@ struct thread_options {
        unsigned int perc_rand[DDIR_RWDIR_CNT];
 
        unsigned int hugepage_size;
-       unsigned int rw_min_bs;
+       unsigned long long rw_min_bs;
+       unsigned int pad2;
        unsigned int thinktime;
        unsigned int thinktime_spin;
        unsigned int thinktime_blocks;
@@ -363,10 +364,10 @@ struct thread_options_pack {
        uint64_t start_offset;
        uint64_t start_offset_align;
 
-       uint32_t bs[DDIR_RWDIR_CNT];
-       uint32_t ba[DDIR_RWDIR_CNT];
-       uint32_t min_bs[DDIR_RWDIR_CNT];
-       uint32_t max_bs[DDIR_RWDIR_CNT];
+       uint64_t bs[DDIR_RWDIR_CNT];
+       uint64_t ba[DDIR_RWDIR_CNT];
+       uint64_t min_bs[DDIR_RWDIR_CNT];
+       uint64_t max_bs[DDIR_RWDIR_CNT];
        struct bssplit bssplit[DDIR_RWDIR_CNT][BSSPLIT_MAX];
        uint32_t bssplit_nr[DDIR_RWDIR_CNT];
 
@@ -443,7 +444,8 @@ struct thread_options_pack {
        uint32_t perc_rand[DDIR_RWDIR_CNT];
 
        uint32_t hugepage_size;
-       uint32_t rw_min_bs;
+       uint64_t rw_min_bs;
+       uint32_t pad2;
        uint32_t thinktime;
        uint32_t thinktime_spin;
        uint32_t thinktime_blocks;
index 808de67..88bace0 100644 (file)
@@ -1,6 +1,6 @@
 #include <stdio.h>
 #include <math.h>
-#include <malloc.h>
+#include <stdlib.h>
 #include <string.h>
 
 /*
diff --git a/tools/hist/fio-histo-log-pctiles.py b/tools/hist/fio-histo-log-pctiles.py
new file mode 100755 (executable)
index 0000000..c398113
--- /dev/null
@@ -0,0 +1,657 @@
+#!/usr/bin/env python
+
+# module to parse fio histogram log files, not using pandas
+# runs in python v2 or v3
+# to get help with the CLI: $ python fio-histo-log-pctiles.py -h
+# this can be run standalone as a script but is callable
+# assumes all threads run for same time duration
+# assumes all threads are doing the same thing for the entire run
+
+# percentiles:
+#  0 - min latency
+#  50 - median
+#  100 - max latency
+
+# TO-DO: 
+#   separate read and write stats for randrw mixed workload
+#   report average latency if needed
+#   prove that it works (partially done with unit tests)
+
+# to run unit tests, set UNITTEST environment variable to anything
+# if you do this, don't pass normal CLI parameters to it
+# otherwise it runs the CLI
+
+import sys, os, math, copy
+from copy import deepcopy
+import argparse
+import unittest2
+
+msec_per_sec = 1000
+nsec_per_usec = 1000
+
+class FioHistoLogExc(Exception):
+    pass
+
+# if there is an error, print message, and exit with error status
+
+def myabort(msg):
+    print('ERROR: ' + msg)
+    sys.exit(1)
+
+# convert histogram log file into a list of
+# (time_ms, direction, bsz, buckets) tuples where
+# - time_ms is the time in msec at which the log record was written
+# - direction is 0 (read) or 1 (write)
+# - bsz is block size (not used)
+# - buckets is a CSV list of counters that make up the histogram
+# caller decides if the expected number of counters are present
+
+
+def exception_suffix( record_num, pathname ):
+    return 'in histogram record %d file %s' % (record_num+1, pathname)
+
+# log file parser raises FioHistoLogExc exceptions
+# it returns histogram buckets in whatever unit fio uses
+
+def parse_hist_file(logfn, buckets_per_interval):
+    max_timestamp_ms = 0.0
+    
+    with open(logfn, 'r') as f:
+        records = [ l.strip() for l in f.readlines() ]
+    intervals = []
+    for k, r in enumerate(records):
+        if r == '':
+            continue
+        tokens = r.split(',')
+        try:
+            int_tokens = [ int(t) for t in tokens ]
+        except ValueError as e:
+            raise FioHistoLogExc('non-integer value %s' % exception_suffix(k+1, logfn))
+
+        neg_ints = list(filter( lambda tk : tk < 0, int_tokens ))
+        if len(neg_ints) > 0:
+            raise FioHistoLogExc('negative integer value %s' % exception_suffix(k+1, logfn))
+
+        if len(int_tokens) < 3:
+            raise FioHistoLogExc('too few numbers %s' % exception_suffix(k+1, logfn))
+
+        time_ms = int_tokens[0]
+        if time_ms > max_timestamp_ms:
+            max_timestamp_ms = time_ms
+
+        direction = int_tokens[1]
+        if direction != 0 and direction != 1:
+            raise FioHistoLogExc('invalid I/O direction %s' % exception_suffix(k+1, logfn))
+
+        bsz = int_tokens[2]
+        if bsz > (1 << 24):
+            raise FioHistoLogExc('block size too large %s' % exception_suffix(k+1, logfn))
+
+        buckets = int_tokens[3:]
+        if len(buckets) != buckets_per_interval:
+            raise FioHistoLogExc('%d buckets per interval but %d expected in %s' % 
+                    (len(buckets), buckets_per_interval, exception_suffix(k+1, logfn)))
+        intervals.append((time_ms, direction, bsz, buckets))
+    if len(intervals) == 0:
+        raise FioHistoLogExc('no records in %s' % logfn)
+    return (intervals, max_timestamp_ms)
+
+
+# compute time range for each bucket index in histogram record
+# see comments in https://github.com/axboe/fio/blob/master/stat.h
+# for description of bucket groups and buckets
+# fio v3 bucket ranges are in nanosec (since response times are measured in nanosec)
+# but we convert fio v3 nanosecs to floating-point microseconds
+
+def time_ranges(groups, counters_per_group, fio_version=3):
+    bucket_width = 1
+    bucket_base = 0
+    bucket_intervals = []
+    for g in range(0, groups):
+        for b in range(0, counters_per_group):
+            rmin = float(bucket_base)
+            rmax = rmin + bucket_width
+            if fio_version == 3:
+                rmin /= nsec_per_usec
+                rmax /= nsec_per_usec
+            bucket_intervals.append( [rmin, rmax] )
+            bucket_base += bucket_width
+        if g != 0:
+            bucket_width *= 2
+    return bucket_intervals
+
+
+# compute number of time quantum intervals in the test
+
+def get_time_intervals(time_quantum, max_timestamp_ms):
+    # round down to nearest second
+    max_timestamp = max_timestamp_ms // msec_per_sec
+    # round up to nearest whole multiple of time_quantum
+    time_interval_count = (max_timestamp + time_quantum) // time_quantum
+    end_time = time_interval_count * time_quantum
+    return (end_time, time_interval_count)
+
+# align raw histogram log data to time quantum so 
+# we can then combine histograms from different threads with addition
+# for randrw workload we count both reads and writes in same output bucket
+# but we separate reads and writes for purposes of calculating
+# end time for histogram record.
+# this requires us to weight a raw histogram bucket by the 
+# fraction of time quantum that the bucket overlaps the current
+# time quantum interval
+# for example, if we have a bucket with 515 samples for time interval
+# [ 1010, 2014 ] msec since start of test, and time quantum is 1 sec, then
+# for time quantum interval [ 1000, 2000 ] msec, the overlap is
+# (2000 - 1010) / (2000 - 1000) = 0.99
+# so the contribution of this bucket to this time quantum is
+# 515 x 0.99 = 509.85
+
+def align_histo_log(raw_histogram_log, time_quantum, bucket_count, max_timestamp_ms):
+
+    # slice up test time int intervals of time_quantum seconds
+
+    (end_time, time_interval_count) = get_time_intervals(time_quantum, max_timestamp_ms)
+    time_qtm_ms = time_quantum * msec_per_sec
+    end_time_ms = end_time * msec_per_sec
+    aligned_intervals = []
+    for j in range(0, time_interval_count):
+        aligned_intervals.append((
+            j * time_qtm_ms,
+            [ 0.0 for j in range(0, bucket_count) ] ))
+
+    log_record_count = len(raw_histogram_log)
+    for k, record in enumerate(raw_histogram_log):
+
+        # find next record with same direction to get end-time
+        # have to avoid going past end of array
+        # for fio randrw workload, 
+        # we have read and write records on same time interval
+        # sometimes read and write records are in opposite order
+        # assertion checks that next read/write record 
+        # can be separated by at most 2 other records
+
+        (time_msec, direction, sz, interval_buckets) = record
+        if k+1 < log_record_count:
+            (time_msec_end, direction2, _, _) = raw_histogram_log[k+1]
+            if direction2 != direction:
+                if k+2 < log_record_count:
+                    (time_msec_end, direction2, _, _) = raw_histogram_log[k+2]
+                    if direction2 != direction:
+                        if k+3 < log_record_count:
+                            (time_msec_end, direction2, _, _) = raw_histogram_log[k+3]
+                            assert direction2 == direction
+                        else:
+                            time_msec_end = end_time_ms
+                else:
+                    time_msec_end = end_time_ms
+        else:
+            time_msec_end = end_time_ms
+
+        # calculate first quantum that overlaps this histogram record 
+
+        qtm_start_ms = (time_msec // time_qtm_ms) * time_qtm_ms
+        qtm_end_ms = ((time_msec + time_qtm_ms) // time_qtm_ms) * time_qtm_ms
+        qtm_index = qtm_start_ms // time_qtm_ms
+
+        # for each quantum that overlaps this histogram record's time interval
+
+        while qtm_start_ms < time_msec_end:  # while quantum overlaps record
+
+            # calculate fraction of time that this quantum 
+            # overlaps histogram record's time interval
+            
+            overlap_start = max(qtm_start_ms, time_msec)
+            overlap_end = min(qtm_end_ms, time_msec_end)
+            weight = float(overlap_end - overlap_start)
+            weight /= (time_msec_end - time_msec)
+            (_,aligned_histogram) = aligned_intervals[qtm_index]
+            for bx, b in enumerate(interval_buckets):
+                weighted_bucket = weight * b
+                aligned_histogram[bx] += weighted_bucket
+
+            # advance to the next time quantum
+
+            qtm_start_ms += time_qtm_ms
+            qtm_end_ms += time_qtm_ms
+            qtm_index += 1
+
+    return aligned_intervals
+
+# add histogram in "source" to histogram in "target"
+# it is assumed that the 2 histograms are precisely time-aligned
+
+def add_to_histo_from( target, source ):
+    for b in range(0, len(source)):
+        target[b] += source[b]
+
+# compute percentiles
+# inputs:
+#   buckets: histogram bucket array 
+#   wanted: list of floating-pt percentiles to calculate
+#   time_ranges: [tmin,tmax) time interval for each bucket
+# returns None if no I/O reported.
+# otherwise we would be dividing by zero
+# think of buckets as probability distribution function
+# and this loop is integrating to get cumulative distribution function
+
+def get_pctiles(buckets, wanted, time_ranges):
+
+    # get total of IO requests done
+    total_ios = 0
+    for io_count in buckets:
+        total_ios += io_count
+
+    # don't return percentiles if no I/O was done during interval
+    if total_ios == 0.0:
+        return None
+
+    pctile_count = len(wanted)
+
+    # results returned as dictionary keyed by percentile
+    pctile_result = {}
+
+    # index of next percentile in list
+    pctile_index = 0
+
+    # next percentile
+    next_pctile = wanted[pctile_index]
+
+    # no one is interested in percentiles bigger than this but not 100.0
+    # this prevents floating-point error from preventing loop exit
+    almost_100 = 99.9999
+
+    # pct is the percentile corresponding to 
+    # all I/O requests up through bucket b
+    pct = 0.0
+    total_so_far = 0
+    for b, io_count in enumerate(buckets):
+        if io_count == 0:
+            continue
+        total_so_far += io_count
+        # last_pct_lt is the percentile corresponding to 
+        # all I/O requests up to, but not including, bucket b
+        last_pct = pct
+        pct = 100.0 * float(total_so_far) / total_ios
+        # a single bucket could satisfy multiple pctiles
+        # so this must be a while loop
+        # for 100-percentile (max latency) case, no bucket exceeds it 
+        # so we must stop there.
+        while ((next_pctile == 100.0 and pct >= almost_100) or
+               (next_pctile < 100.0  and pct > next_pctile)):
+            # interpolate between min and max time for bucket time interval
+            # we keep the time_ranges access inside this loop, 
+            # even though it could be above the loop,
+            # because in many cases we will not be even entering 
+            # the loop so we optimize out these accesses
+            range_max_time = time_ranges[b][1]
+            range_min_time = time_ranges[b][0]
+            offset_frac = (next_pctile - last_pct)/(pct - last_pct)
+            interpolation = range_min_time + (offset_frac*(range_max_time - range_min_time))
+            pctile_result[next_pctile] = interpolation
+            pctile_index += 1
+            if pctile_index == pctile_count:
+                break
+            next_pctile = wanted[pctile_index]
+        if pctile_index == pctile_count:
+            break
+    assert pctile_index == pctile_count
+    return pctile_result
+
+
+# this is really the main program
+
+def compute_percentiles_from_logs():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--fio-version", dest="fio_version", 
+        default="3", choices=[2,3], type=int, 
+        help="fio version (default=3)")
+    parser.add_argument("--bucket-groups", dest="bucket_groups", default="29", type=int, 
+        help="fio histogram bucket groups (default=29)")
+    parser.add_argument("--bucket-bits", dest="bucket_bits", 
+        default="6", type=int, 
+        help="fio histogram buckets-per-group bits (default=6 means 64 buckets/group)")
+    parser.add_argument("--percentiles", dest="pctiles_wanted", 
+        default=[ 0., 50., 95., 99., 100.], type=float, nargs='+',
+        help="fio histogram buckets-per-group bits (default=6 means 64 buckets/group)")
+    parser.add_argument("--time-quantum", dest="time_quantum", 
+        default="1", type=int,
+        help="time quantum in seconds (default=1)")
+    parser.add_argument("--output-unit", dest="output_unit", 
+        default="usec", type=str,
+        help="Latency percentile output unit: msec|usec|nsec (default usec)")
+    parser.add_argument("file_list", nargs='+', 
+        help='list of files, preceded by " -- " if necessary')
+    args = parser.parse_args()
+
+    # default changes based on fio version
+    if args.fio_version == 2:
+        args.bucket_groups = 19
+
+    # print parameters
+
+    print('fio version = %d' % args.fio_version)
+    print('bucket groups = %d' % args.bucket_groups)
+    print('bucket bits = %d' % args.bucket_bits)
+    print('time quantum = %d sec' % args.time_quantum)
+    print('percentiles = %s' % ','.join([ str(p) for p in args.pctiles_wanted ]))
+    buckets_per_group = 1 << args.bucket_bits
+    print('buckets per group = %d' % buckets_per_group)
+    buckets_per_interval = buckets_per_group * args.bucket_groups
+    print('buckets per interval = %d ' % buckets_per_interval)
+    bucket_index_range = range(0, buckets_per_interval)
+    if args.time_quantum == 0:
+        print('ERROR: time-quantum must be a positive number of seconds')
+    print('output unit = ' + args.output_unit)
+    if args.output_unit == 'msec':
+        time_divisor = 1000.0
+    elif args.output_unit == 'usec':
+        time_divisor = 1.0
+
+    # calculate response time interval associated with each histogram bucket
+
+    bucket_times = time_ranges(args.bucket_groups, buckets_per_group, fio_version=args.fio_version)
+
+    # construct template for each histogram bucket array with buckets all zeroes
+    # we just copy this for each new histogram
+
+    zeroed_buckets = [ 0.0 for r in bucket_index_range ]
+
+    # print CSV header just like fiologparser_hist does
+
+    header = 'msec, '
+    for p in args.pctiles_wanted:
+        header += '%3.1f, ' % p
+    print('time (millisec), percentiles in increasing order with values in ' + args.output_unit)
+    print(header)
+
+    # parse the histogram logs
+    # assumption: each bucket has a monotonically increasing time
+    # assumption: time ranges do not overlap for a single thread's records
+    # (exception: if randrw workload, then there is a read and a write 
+    # record for the same time interval)
+
+    max_timestamp_all_logs = 0
+    hist_files = {}
+    for fn in args.file_list:
+        try:
+            (hist_files[fn], max_timestamp_ms)  = parse_hist_file(fn, buckets_per_interval)
+        except FioHistoLogExc as e:
+            myabort(str(e))
+        max_timestamp_all_logs = max(max_timestamp_all_logs, max_timestamp_ms)
+
+    (end_time, time_interval_count) = get_time_intervals(args.time_quantum, max_timestamp_all_logs)
+    all_threads_histograms = [ ((j*args.time_quantum*msec_per_sec), deepcopy(zeroed_buckets))
+                                for j in range(0, time_interval_count) ]
+
+    for logfn in hist_files.keys():
+        aligned_per_thread = align_histo_log(hist_files[logfn], 
+                                             args.time_quantum, 
+                                             buckets_per_interval, 
+                                             max_timestamp_all_logs)
+        for t in range(0, time_interval_count):
+            (_, all_threads_histo_t) = all_threads_histograms[t]
+            (_, log_histo_t) = aligned_per_thread[t]
+            add_to_histo_from( all_threads_histo_t, log_histo_t )
+
+    # calculate percentiles across aggregate histogram for all threads
+
+    for (t_msec, all_threads_histo_t) in all_threads_histograms:
+        record = '%d, ' % t_msec
+        pct = get_pctiles(all_threads_histo_t, args.pctiles_wanted, bucket_times)
+        if not pct:
+            for w in args.pctiles_wanted:
+                record += ', '
+        else:
+            pct_keys = [ k for k in pct.keys() ]
+            pct_values = [ str(pct[wanted]/time_divisor) for wanted in sorted(pct_keys) ]
+            record += ', '.join(pct_values)
+        print(record)
+
+
+
+#end of MAIN PROGRAM
+
+
+
+##### below are unit tests ##############
+
+import tempfile, shutil
+from os.path import join
+should_not_get_here = False
+
+class Test(unittest2.TestCase):
+    tempdir = None
+
+    # a little less typing please
+    def A(self, boolean_val):
+        self.assertTrue(boolean_val)
+
+    # initialize unit test environment
+
+    @classmethod
+    def setUpClass(cls):
+        d = tempfile.mkdtemp()
+        Test.tempdir = d
+
+    # remove anything left by unit test environment
+    # unless user sets UNITTEST_LEAVE_FILES environment variable
+
+    @classmethod
+    def tearDownClass(cls):
+        if not os.getenv("UNITTEST_LEAVE_FILES"):
+            shutil.rmtree(cls.tempdir)
+
+    def setUp(self):
+        self.fn = join(Test.tempdir, self.id())
+
+    def test_a_add_histos(self):
+        a = [ 1.0, 2.0 ]
+        b = [ 1.5, 2.5 ]
+        add_to_histo_from( a, b )
+        self.A(a == [2.5, 4.5])
+        self.A(b == [1.5, 2.5])
+
+    def test_b1_parse_log(self):
+        with open(self.fn, 'w') as f:
+            f.write('1234, 0, 4096, 1, 2, 3, 4\n')
+            f.write('5678,1,16384,5,6,7,8 \n')
+        (raw_histo_log, max_timestamp) = parse_hist_file(self.fn, 4) # 4 buckets per interval
+        self.A(len(raw_histo_log) == 2 and max_timestamp == 5678)
+        (time_ms, direction, bsz, histo) = raw_histo_log[0]
+        self.A(time_ms == 1234 and direction == 0 and bsz == 4096 and histo == [ 1, 2, 3, 4 ])
+        (time_ms, direction, bsz, histo) = raw_histo_log[1]
+        self.A(time_ms == 5678 and direction == 1 and bsz == 16384 and histo == [ 5, 6, 7, 8 ])
+
+    def test_b2_parse_empty_log(self):
+        with open(self.fn, 'w') as f:
+            pass
+        try:
+            (raw_histo_log, max_timestamp_ms) = parse_hist_file(self.fn, 4)
+            self.A(should_not_get_here)
+        except FioHistoLogExc as e:
+            self.A(str(e).startswith('no records'))
+
+    def test_b3_parse_empty_records(self):
+        with open(self.fn, 'w') as f:
+            f.write('\n')
+            f.write('1234, 0, 4096, 1, 2, 3, 4\n')
+            f.write('5678,1,16384,5,6,7,8 \n')
+            f.write('\n')
+        (raw_histo_log, max_timestamp_ms) = parse_hist_file(self.fn, 4)
+        self.A(len(raw_histo_log) == 2 and max_timestamp_ms == 5678)
+        (time_ms, direction, bsz, histo) = raw_histo_log[0]
+        self.A(time_ms == 1234 and direction == 0 and bsz == 4096 and histo == [ 1, 2, 3, 4 ])
+        (time_ms, direction, bsz, histo) = raw_histo_log[1]
+        self.A(time_ms == 5678 and direction == 1 and bsz == 16384 and histo == [ 5, 6, 7, 8 ])
+
+    def test_b4_parse_non_int(self):
+        with open(self.fn, 'w') as f:
+            f.write('12, 0, 4096, 1a, 2, 3, 4\n')
+        try:
+            (raw_histo_log, _) = parse_hist_file(self.fn, 4)
+            self.A(False)
+        except FioHistoLogExc as e:
+            self.A(str(e).startswith('non-integer'))
+
+    def test_b5_parse_neg_int(self):
+        with open(self.fn, 'w') as f:
+            f.write('-12, 0, 4096, 1, 2, 3, 4\n')
+        try:
+            (raw_histo_log, _) = parse_hist_file(self.fn, 4)
+            self.A(False)
+        except FioHistoLogExc as e:
+            self.A(str(e).startswith('negative integer'))
+
+    def test_b6_parse_too_few_int(self):
+        with open(self.fn, 'w') as f:
+            f.write('0, 0\n')
+        try:
+            (raw_histo_log, _) = parse_hist_file(self.fn, 4)
+            self.A(False)
+        except FioHistoLogExc as e:
+            self.A(str(e).startswith('too few numbers'))
+
+    def test_b7_parse_invalid_direction(self):
+        with open(self.fn, 'w') as f:
+            f.write('100, 2, 4096, 1, 2, 3, 4\n')
+        try:
+            (raw_histo_log, _) = parse_hist_file(self.fn, 4)
+            self.A(False)
+        except FioHistoLogExc as e:
+            self.A(str(e).startswith('invalid I/O direction'))
+
+    def test_b8_parse_bsz_too_big(self):
+        with open(self.fn+'_good', 'w') as f:
+            f.write('100, 1, %d, 1, 2, 3, 4\n' % (1<<24))
+        (raw_histo_log, max_timestamp_ms) = parse_hist_file(self.fn+'_good', 4)
+        with open(self.fn+'_bad', 'w') as f:
+            f.write('100, 1, 20000000, 1, 2, 3, 4\n')
+        try:
+            (raw_histo_log, _) = parse_hist_file(self.fn+'_bad', 4)
+            self.A(False)
+        except FioHistoLogExc as e:
+            self.A(str(e).startswith('block size too large'))
+
+    def test_b9_parse_wrong_bucket_count(self):
+        with open(self.fn, 'w') as f:
+            f.write('100, 1, %d, 1, 2, 3, 4, 5\n' % (1<<24))
+        try:
+            (raw_histo_log, _) = parse_hist_file(self.fn, 4)
+            self.A(False)
+        except FioHistoLogExc as e:
+            self.A(str(e).__contains__('buckets per interval'))
+
+    def test_c1_time_ranges(self):
+        ranges = time_ranges(3, 2)  # fio_version defaults to 3
+        expected_ranges = [ # fio_version 3 is in nanoseconds
+                [0.000, 0.001], [0.001, 0.002],   # first group
+                [0.002, 0.003], [0.003, 0.004],   # second group same width
+                [0.004, 0.006], [0.006, 0.008]]   # subsequent groups double width
+        self.A(ranges == expected_ranges)
+        ranges = time_ranges(3, 2, fio_version=3)
+        self.A(ranges == expected_ranges)
+        ranges = time_ranges(3, 2, fio_version=2)
+        expected_ranges_v2 = [ [ 1000.0 * min_or_max for min_or_max in time_range ] 
+                               for time_range in expected_ranges ]
+        self.A(ranges == expected_ranges_v2)
+        # see fio V3 stat.h for why 29 groups and 2^6 buckets/group
+        normal_ranges_v3 = time_ranges(29, 64)
+        # for v3, bucket time intervals are measured in nanoseconds
+        self.A(len(normal_ranges_v3) == 29 * 64 and normal_ranges_v3[-1][1] == 64*(1<<(29-1))/1000.0)
+        normal_ranges_v2 = time_ranges(19, 64, fio_version=2)
+        # for v2, bucket time intervals are measured in microseconds so we have fewer buckets
+        self.A(len(normal_ranges_v2) == 19 * 64 and normal_ranges_v2[-1][1] == 64*(1<<(19-1)))
+
+    def test_d1_align_histo_log_1_quantum(self):
+        with open(self.fn, 'w') as f:
+            f.write('100, 1, 4096, 1, 2, 3, 4')
+        (raw_histo_log, max_timestamp_ms) = parse_hist_file(self.fn, 4)
+        self.A(max_timestamp_ms == 100)
+        aligned_log = align_histo_log(raw_histo_log, 5, 4, max_timestamp_ms)
+        self.A(len(aligned_log) == 1)
+        (time_ms0, h) = aligned_log[0]
+        self.A(time_ms0 == 0 and h == [1.0, 2.0, 3.0, 4.0])
+
+    # we need this to compare 2 lists of floating point numbers for equality
+    # because of floating-point imprecision
+
+    def compare_2_floats(self, x, y):
+        if x == 0.0 or y == 0.0:
+            return (x+y) < 0.0000001
+        else:
+            return (math.fabs(x-y)/x) < 0.00001
+                
+    def is_close(self, buckets, buckets_expected):
+        if len(buckets) != len(buckets_expected):
+            return False
+        compare_buckets = lambda k: self.compare_2_floats(buckets[k], buckets_expected[k])
+        indices_close = list(filter(compare_buckets, range(0, len(buckets))))
+        return len(indices_close) == len(buckets)
+
+    def test_d2_align_histo_log_2_quantum(self):
+        with open(self.fn, 'w') as f:
+            f.write('2000, 1, 4096, 1, 2, 3, 4\n')
+            f.write('7000, 1, 4096, 1, 2, 3, 4\n')
+        (raw_histo_log, max_timestamp_ms) = parse_hist_file(self.fn, 4)
+        self.A(max_timestamp_ms == 7000)
+        (_, _, _, raw_buckets1) = raw_histo_log[0]
+        (_, _, _, raw_buckets2) = raw_histo_log[1]
+        aligned_log = align_histo_log(raw_histo_log, 5, 4, max_timestamp_ms)
+        self.A(len(aligned_log) == 2)
+        (time_ms1, h1) = aligned_log[0]
+        (time_ms2, h2) = aligned_log[1]
+        # because first record is from time interval [2000, 7000]
+        # we weight it according
+        expect1 = [float(b) * 0.6 for b in raw_buckets1]
+        expect2 = [float(b) * 0.4 for b in raw_buckets1]
+        for e in range(0, len(expect2)):
+            expect2[e] += raw_buckets2[e]
+        self.A(time_ms1 == 0    and self.is_close(h1, expect1))
+        self.A(time_ms2 == 5000 and self.is_close(h2, expect2))
+
+    # what to expect if histogram buckets are all equal
+    def test_e1_get_pctiles_flat_histo(self):
+        with open(self.fn, 'w') as f:
+            buckets = [ 100 for j in range(0, 128) ]
+            f.write('9000, 1, 4096, %s\n' % ', '.join([str(b) for b in buckets]))
+        (raw_histo_log, max_timestamp_ms) = parse_hist_file(self.fn, 128)
+        self.A(max_timestamp_ms == 9000)
+        aligned_log = align_histo_log(raw_histo_log, 5, 128, max_timestamp_ms)
+        time_intervals = time_ranges(4, 32)
+        # since buckets are all equal, then median is halfway through time_intervals
+        # and max latency interval is at end of time_intervals
+        self.A(time_intervals[64][1] == 0.066 and time_intervals[127][1] == 0.256)
+        pctiles_wanted = [ 0, 50, 100 ]
+        pct_vs_time = []
+        for (time_ms, histo) in aligned_log:
+            pct_vs_time.append(get_pctiles(histo, pctiles_wanted, time_intervals))
+        self.A(pct_vs_time[0] == None)  # no I/O in this time interval
+        expected_pctiles = { 0:0.000, 50:0.064, 100:0.256 }
+        self.A(pct_vs_time[1] == expected_pctiles)
+
+    # what to expect if just the highest histogram bucket is used
+    def test_e2_get_pctiles_highest_pct(self):
+        fio_v3_bucket_count = 29 * 64
+        with open(self.fn, 'w') as f:
+            # make a empty fio v3 histogram
+            buckets = [ 0 for j in range(0, fio_v3_bucket_count) ]
+            # add one I/O request to last bucket
+            buckets[-1] = 1
+            f.write('9000, 1, 4096, %s\n' % ', '.join([str(b) for b in buckets]))
+        (raw_histo_log, max_timestamp_ms) = parse_hist_file(self.fn, fio_v3_bucket_count)
+        self.A(max_timestamp_ms == 9000)
+        aligned_log = align_histo_log(raw_histo_log, 5, fio_v3_bucket_count, max_timestamp_ms)
+        (time_ms, histo) = aligned_log[1]
+        time_intervals = time_ranges(29, 64)
+        expected_pctiles = { 100.0:(64*(1<<28))/1000.0 }
+        pct = get_pctiles( histo, [ 100.0 ], time_intervals )
+        self.A(pct == expected_pctiles)
+
+# we are using this module as a standalone program
+
+if __name__ == '__main__':
+    if os.getenv('UNITTEST'):
+        sys.exit(unittest2.main())
+    else:
+        compute_percentiles_from_logs()
+
index 40d484b..01492f2 100644 (file)
--- a/verify.c
+++ b/verify.c
@@ -801,7 +801,7 @@ static int verify_trimmed_io_u(struct thread_data *td, struct io_u *io_u)
 
        mem_is_zero_slow(io_u->buf, io_u->buflen, &offset);
 
-       log_err("trim: verify failed at file %s offset %llu, length %lu"
+       log_err("trim: verify failed at file %s offset %llu, length %llu"
                ", block offset %lu\n",
                        io_u->file->file_name, io_u->offset, io_u->buflen,
                        (unsigned long) offset);
@@ -1517,7 +1517,7 @@ int paste_blockoff(char *buf, unsigned int len, void *priv)
        struct io_u *io = priv;
        unsigned long long off;
 
-       typecheck(typeof(off), io->offset);
+       typecheck(__typeof__(off), io->offset);
        off = cpu_to_le64((uint64_t)io->offset);
        len = min(len, (unsigned int)sizeof(off));
        memcpy(buf, &off, len);