Add support for doing total latency percentiles
authorJens Axboe <axboe@kernel.dk>
Thu, 14 Sep 2017 04:07:31 +0000 (22:07 -0600)
committerJens Axboe <axboe@kernel.dk>
Thu, 14 Sep 2017 04:07:31 +0000 (22:07 -0600)
By default, fio does completion latency percentiles. Sometimes
what you want is total IO latency percentiles, including the
submission part as well. If that's your thing, then set

lat_percentiles=1

and get that instead of the default completion latency percentiles.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
12 files changed:
HOWTO
cconv.c
client.c
fio.1
gclient.c
init.c
options.c
server.c
server.h
stat.c
stat.h
thread_options.h

diff --git a/HOWTO b/HOWTO
index 2a70b7c629b6d6470625ac98e02dad555571f1ed..bfaa054a5de0e84e1d914d2b2375c041c00eb57b 100644 (file)
--- a/HOWTO
+++ b/HOWTO
@@ -2860,7 +2860,15 @@ Measurements and reporting
 
 .. option:: clat_percentiles=bool
 
 
 .. option:: clat_percentiles=bool
 
-       Enable the reporting of percentiles of completion latencies.
+       Enable the reporting of percentiles of completion latencies.  This
+       option is mutually exclusive with :option:`lat_percentiles`.
+
+.. option:: lat_percentiles=bool
+
+       Enable the reporting of percentiles of IO latencies. This is similar
+       to :option:`clat_percentiles`, except that this includes the
+       submission latency. This option is mutually exclusive with
+       :option:`clat_percentiles`.
 
 .. option:: percentile_list=float_list
 
 
 .. option:: percentile_list=float_list
 
diff --git a/cconv.c b/cconv.c
index ac58705dabf40e17089e612f980c5f63db1d0e1a..f809fd5197521ba58432100d694cb902538a1382 100644 (file)
--- a/cconv.c
+++ b/cconv.c
@@ -267,6 +267,7 @@ void convert_thread_options_to_cpu(struct thread_options *o,
        o->trim_batch = le32_to_cpu(top->trim_batch);
        o->trim_zero = le32_to_cpu(top->trim_zero);
        o->clat_percentiles = le32_to_cpu(top->clat_percentiles);
        o->trim_batch = le32_to_cpu(top->trim_batch);
        o->trim_zero = le32_to_cpu(top->trim_zero);
        o->clat_percentiles = le32_to_cpu(top->clat_percentiles);
+       o->lat_percentiles = le32_to_cpu(top->lat_percentiles);
        o->percentile_precision = le32_to_cpu(top->percentile_precision);
        o->continue_on_error = le32_to_cpu(top->continue_on_error);
        o->cgroup_weight = le32_to_cpu(top->cgroup_weight);
        o->percentile_precision = le32_to_cpu(top->percentile_precision);
        o->continue_on_error = le32_to_cpu(top->continue_on_error);
        o->cgroup_weight = le32_to_cpu(top->cgroup_weight);
@@ -454,6 +455,7 @@ void convert_thread_options_to_net(struct thread_options_pack *top,
        top->trim_batch = cpu_to_le32(o->trim_batch);
        top->trim_zero = cpu_to_le32(o->trim_zero);
        top->clat_percentiles = cpu_to_le32(o->clat_percentiles);
        top->trim_batch = cpu_to_le32(o->trim_batch);
        top->trim_zero = cpu_to_le32(o->trim_zero);
        top->clat_percentiles = cpu_to_le32(o->clat_percentiles);
+       top->lat_percentiles = cpu_to_le32(o->lat_percentiles);
        top->percentile_precision = cpu_to_le32(o->percentile_precision);
        top->continue_on_error = cpu_to_le32(o->continue_on_error);
        top->cgroup_weight = cpu_to_le32(o->cgroup_weight);
        top->percentile_precision = cpu_to_le32(o->percentile_precision);
        top->continue_on_error = cpu_to_le32(o->continue_on_error);
        top->cgroup_weight = cpu_to_le32(o->cgroup_weight);
index 281d853f14a771f1ecb36b62fedc5f5ca565848b..09e810afc6120bb84b28e12164b09bb084661f59 100644 (file)
--- a/client.c
+++ b/client.c
@@ -893,7 +893,8 @@ static void convert_ts(struct thread_stat *dst, struct thread_stat *src)
        dst->ctx                = le64_to_cpu(src->ctx);
        dst->minf               = le64_to_cpu(src->minf);
        dst->majf               = le64_to_cpu(src->majf);
        dst->ctx                = le64_to_cpu(src->ctx);
        dst->minf               = le64_to_cpu(src->minf);
        dst->majf               = le64_to_cpu(src->majf);
-       dst->clat_percentiles   = le64_to_cpu(src->clat_percentiles);
+       dst->clat_percentiles   = le32_to_cpu(src->clat_percentiles);
+       dst->lat_percentiles    = le32_to_cpu(src->lat_percentiles);
        dst->percentile_precision = le64_to_cpu(src->percentile_precision);
 
        for (i = 0; i < FIO_IO_U_LIST_MAX_LEN; i++) {
        dst->percentile_precision = le64_to_cpu(src->percentile_precision);
 
        for (i = 0; i < FIO_IO_U_LIST_MAX_LEN; i++) {
diff --git a/fio.1 b/fio.1
index 97133dacbb846d286c8f2219a7f5178b42cc38f5..63e1c2e585bd65f37e9887bc673bb3de4bd174b7 100644 (file)
--- a/fio.1
+++ b/fio.1
@@ -2543,7 +2543,13 @@ Disable measurements of throughput/bandwidth numbers. See
 \fBdisable_lat\fR.
 .TP
 .BI clat_percentiles \fR=\fPbool
 \fBdisable_lat\fR.
 .TP
 .BI clat_percentiles \fR=\fPbool
-Enable the reporting of percentiles of completion latencies.
+Enable the reporting of percentiles of completion latencies. This option is
+mutually exclusive with \fBlat_percentiles\fR.
+.TP
+.BI lat_percentiles \fR=\fPbool
+Enable the reporting of percentiles of IO latencies. This is similar to
+\fBclat_percentiles\fR, except that this includes the submission latency.
+This option is mutually exclusive with \fBclat_percentiles\fR.
 .TP
 .BI percentile_list \fR=\fPfloat_list
 Overwrite the default list of percentiles for completion latencies and the
 .TP
 .BI percentile_list \fR=\fPfloat_list
 Overwrite the default list of percentiles for completion latencies and the
index 4eb99a0896ee85c5a9c62c5591b1c928ee5b7a71..43c8a0891818ac0d28d36d770e507960eb29bd4a 100644 (file)
--- a/gclient.c
+++ b/gclient.c
@@ -1127,7 +1127,11 @@ static void gfio_show_clat_percentiles(struct gfio_client *gc,
                base = "nsec";
         }
 
                base = "nsec";
         }
 
-       sprintf(tmp, "Completion percentiles (%s)", base);
+       if (ts->clat_percentiles)
+               sprintf(tmp, "Completion percentiles (%s)", base);
+       else
+               sprintf(tmp, "Latency percentiles (%s)", base);
+
        tree_view = gfio_output_clat_percentiles(ovals, plist, len, base, scale_down);
        ge->clat_graph = setup_clat_graph(tmp, ovals, plist, len, 700.0, 300.0);
 
        tree_view = gfio_output_clat_percentiles(ovals, plist, len, base, scale_down);
        ge->clat_graph = setup_clat_graph(tmp, ovals, plist, len, 700.0, 300.0);
 
diff --git a/init.c b/init.c
index 834c868e33f080cf34c321860e772c0d42b27b0f..6ac5212916b5609f1c2ee1c9f8b165a7640d25e6 100644 (file)
--- a/init.c
+++ b/init.c
@@ -909,6 +909,20 @@ static int fixup_options(struct thread_data *td)
                ret = 1;
        }
 
                ret = 1;
        }
 
+       if (fio_option_is_set(o, clat_percentiles) &&
+           !fio_option_is_set(o, lat_percentiles)) {
+               o->lat_percentiles = !o->clat_percentiles;
+       } else if (fio_option_is_set(o, lat_percentiles) &&
+                  !fio_option_is_set(o, clat_percentiles)) {
+               o->clat_percentiles = !o->lat_percentiles;
+       } else if (fio_option_is_set(o, lat_percentiles) &&
+                  fio_option_is_set(o, clat_percentiles) &&
+                  o->lat_percentiles && o->clat_percentiles) {
+               log_err("fio: lat_percentiles and clat_percentiles are "
+                       "mutually exclusive\n");
+               ret = 1;
+       }
+
        return ret;
 }
 
        return ret;
 }
 
@@ -1401,6 +1415,7 @@ static int add_job(struct thread_data *td, const char *jobname, int job_add_num,
        td->mutex = fio_mutex_init(FIO_MUTEX_LOCKED);
 
        td->ts.clat_percentiles = o->clat_percentiles;
        td->mutex = fio_mutex_init(FIO_MUTEX_LOCKED);
 
        td->ts.clat_percentiles = o->clat_percentiles;
+       td->ts.lat_percentiles = o->lat_percentiles;
        td->ts.percentile_precision = o->percentile_precision;
        memcpy(td->ts.percentile_list, o->percentile_list, sizeof(o->percentile_list));
 
        td->ts.percentile_precision = o->percentile_precision;
        memcpy(td->ts.percentile_list, o->percentile_list, sizeof(o->percentile_list));
 
index 54fa4eef7ac5965792ff0fae3aeeaad85470a7c5..5c1abe91817dc7c3ba62fab0108fbac041f3c032 100644 (file)
--- a/options.c
+++ b/options.c
@@ -4076,6 +4076,18 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                .off1   = offsetof(struct thread_options, clat_percentiles),
                .help   = "Enable the reporting of completion latency percentiles",
                .def    = "1",
                .off1   = offsetof(struct thread_options, clat_percentiles),
                .help   = "Enable the reporting of completion latency percentiles",
                .def    = "1",
+               .inverse = "lat_percentiles",
+               .category = FIO_OPT_C_STAT,
+               .group  = FIO_OPT_G_INVALID,
+       },
+       {
+               .name   = "lat_percentiles",
+               .lname  = "IO latency percentiles",
+               .type   = FIO_OPT_BOOL,
+               .off1   = offsetof(struct thread_options, lat_percentiles),
+               .help   = "Enable the reporting of IO latency percentiles",
+               .def    = "0",
+               .inverse = "clat_percentiles",
                .category = FIO_OPT_C_STAT,
                .group  = FIO_OPT_G_INVALID,
        },
                .category = FIO_OPT_C_STAT,
                .group  = FIO_OPT_G_INVALID,
        },
index 2c08c3e12b26ea3bbe19a15f1cac95cfaba134d4..0469cea7221f063c26fcc0526cc6a2f8928ae873 100644 (file)
--- a/server.c
+++ b/server.c
@@ -1484,7 +1484,8 @@ void fio_server_send_ts(struct thread_stat *ts, struct group_run_stats *rs)
        p.ts.ctx                = cpu_to_le64(ts->ctx);
        p.ts.minf               = cpu_to_le64(ts->minf);
        p.ts.majf               = cpu_to_le64(ts->majf);
        p.ts.ctx                = cpu_to_le64(ts->ctx);
        p.ts.minf               = cpu_to_le64(ts->minf);
        p.ts.majf               = cpu_to_le64(ts->majf);
-       p.ts.clat_percentiles   = cpu_to_le64(ts->clat_percentiles);
+       p.ts.clat_percentiles   = cpu_to_le32(ts->clat_percentiles);
+       p.ts.lat_percentiles    = cpu_to_le32(ts->lat_percentiles);
        p.ts.percentile_precision = cpu_to_le64(ts->percentile_precision);
 
        for (i = 0; i < FIO_IO_U_LIST_MAX_LEN; i++) {
        p.ts.percentile_precision = cpu_to_le64(ts->percentile_precision);
 
        for (i = 0; i < FIO_IO_U_LIST_MAX_LEN; i++) {
index f63a5185d2a0f832cbe4adfab47933d9e422cb4f..ba3abfeb32287e777eb93e867a6f0e72a13a34c8 100644 (file)
--- a/server.h
+++ b/server.h
@@ -49,7 +49,7 @@ struct fio_net_cmd_reply {
 };
 
 enum {
 };
 
 enum {
-       FIO_SERVER_VER                  = 65,
+       FIO_SERVER_VER                  = 66,
 
        FIO_SERVER_MAX_FRAGMENT_PDU     = 1024,
        FIO_SERVER_MAX_CMD_MB           = 2048,
 
        FIO_SERVER_MAX_FRAGMENT_PDU     = 1024,
        FIO_SERVER_MAX_CMD_MB           = 2048,
diff --git a/stat.c b/stat.c
index 63353cc0cd2bb496de27d6bda796ab130cb285d5..9828d153de8f16edd3d577487de3c4d5fd68cfcc 100644 (file)
--- a/stat.c
+++ b/stat.c
@@ -200,12 +200,13 @@ unsigned int calc_clat_percentiles(unsigned int *io_u_plat, unsigned long nr,
  */
 static void show_clat_percentiles(unsigned int *io_u_plat, unsigned long nr,
                                  fio_fp64_t *plist, unsigned int precision,
  */
 static void show_clat_percentiles(unsigned int *io_u_plat, unsigned long nr,
                                  fio_fp64_t *plist, unsigned int precision,
-                                 struct buf_output *out)
+                                 bool is_clat, struct buf_output *out)
 {
        unsigned int divisor, len, i, j = 0;
        unsigned long long minv, maxv;
        unsigned long long *ovals;
        int is_last, per_line, scale_down, time_width;
 {
        unsigned int divisor, len, i, j = 0;
        unsigned long long minv, maxv;
        unsigned long long *ovals;
        int is_last, per_line, scale_down, time_width;
+       const char *pre = is_clat ? "clat" : " lat";
        char fmt[32];
 
        len = calc_clat_percentiles(io_u_plat, nr, plist, &ovals, &maxv, &minv);
        char fmt[32];
 
        len = calc_clat_percentiles(io_u_plat, nr, plist, &ovals, &maxv, &minv);
@@ -219,15 +220,15 @@ static void show_clat_percentiles(unsigned int *io_u_plat, unsigned long nr,
        if (minv > 2000000 && maxv > 99999999ULL) {
                scale_down = 2;
                divisor = 1000000;
        if (minv > 2000000 && maxv > 99999999ULL) {
                scale_down = 2;
                divisor = 1000000;
-               log_buf(out, "    clat percentiles (msec):\n     |");
+               log_buf(out, "    %s percentiles (msec):\n     |", pre);
        } else if (minv > 2000 && maxv > 99999) {
                scale_down = 1;
                divisor = 1000;
        } else if (minv > 2000 && maxv > 99999) {
                scale_down = 1;
                divisor = 1000;
-               log_buf(out, "    clat percentiles (usec):\n     |");
+               log_buf(out, "    %s percentiles (usec):\n     |", pre);
        } else {
                scale_down = 0;
                divisor = 1;
        } else {
                scale_down = 0;
                divisor = 1;
-               log_buf(out, "    clat percentiles (nsec):\n     |");
+               log_buf(out, "    %s percentiles (nsec):\n     |", pre);
        }
 
 
        }
 
 
@@ -457,11 +458,12 @@ static void show_ddir_status(struct group_run_stats *rs, struct thread_stat *ts,
        if (calc_lat(&ts->lat_stat[ddir], &min, &max, &mean, &dev))
                display_lat(" lat", min, max, mean, dev, out);
 
        if (calc_lat(&ts->lat_stat[ddir], &min, &max, &mean, &dev))
                display_lat(" lat", min, max, mean, dev, out);
 
-       if (ts->clat_percentiles) {
+       if (ts->clat_percentiles || ts->lat_percentiles) {
                show_clat_percentiles(ts->io_u_plat[ddir],
                                        ts->clat_stat[ddir].samples,
                                        ts->percentile_list,
                show_clat_percentiles(ts->io_u_plat[ddir],
                                        ts->clat_stat[ddir].samples,
                                        ts->percentile_list,
-                                       ts->percentile_precision, out);
+                                       ts->percentile_precision,
+                                       ts->clat_percentiles, out);
        }
        if (calc_lat(&ts->bw_stat[ddir], &min, &max, &mean, &dev)) {
                double p_of_agg = 100.0, fkb_base = (double)rs->kb_base;
        }
        if (calc_lat(&ts->bw_stat[ddir], &min, &max, &mean, &dev)) {
                double p_of_agg = 100.0, fkb_base = (double)rs->kb_base;
@@ -896,7 +898,7 @@ static void show_ddir_status_terse(struct thread_stat *ts,
        else
                log_buf(out, ";%llu;%llu;%f;%f", 0ULL, 0ULL, 0.0, 0.0);
 
        else
                log_buf(out, ";%llu;%llu;%f;%f", 0ULL, 0ULL, 0.0, 0.0);
 
-       if (ts->clat_percentiles) {
+       if (ts->clat_percentiles || ts->lat_percentiles) {
                len = calc_clat_percentiles(ts->io_u_plat[ddir],
                                        ts->clat_stat[ddir].samples,
                                        ts->percentile_list, &ovals, &maxv,
                len = calc_clat_percentiles(ts->io_u_plat[ddir],
                                        ts->clat_stat[ddir].samples,
                                        ts->percentile_list, &ovals, &maxv,
@@ -1011,7 +1013,7 @@ static void add_ddir_status_json(struct thread_stat *ts,
        json_object_add_value_float(tmp_object, "mean", mean);
        json_object_add_value_float(tmp_object, "stddev", dev);
 
        json_object_add_value_float(tmp_object, "mean", mean);
        json_object_add_value_float(tmp_object, "stddev", dev);
 
-       if (ts->clat_percentiles) {
+       if (ts->clat_percentiles || ts->lat_percentiles) {
                len = calc_clat_percentiles(ts->io_u_plat[ddir],
                                        ts->clat_stat[ddir].samples,
                                        ts->percentile_list, &ovals, &maxv,
                len = calc_clat_percentiles(ts->io_u_plat[ddir],
                                        ts->clat_stat[ddir].samples,
                                        ts->percentile_list, &ovals, &maxv,
@@ -1645,6 +1647,7 @@ void __show_run_stats(void)
                ts = &threadstats[j];
 
                ts->clat_percentiles = td->o.clat_percentiles;
                ts = &threadstats[j];
 
                ts->clat_percentiles = td->o.clat_percentiles;
+               ts->lat_percentiles = td->o.lat_percentiles;
                ts->percentile_precision = td->o.percentile_precision;
                memcpy(ts->percentile_list, td->o.percentile_list, sizeof(td->o.percentile_list));
                opt_lists[j] = &td->opt_list;
                ts->percentile_precision = td->o.percentile_precision;
                memcpy(ts->percentile_list, td->o.percentile_list, sizeof(td->o.percentile_list));
                opt_lists[j] = &td->opt_list;
@@ -2437,6 +2440,9 @@ void add_lat_sample(struct thread_data *td, enum fio_ddir ddir,
                add_log_sample(td, td->lat_log, sample_val(nsec), ddir, bs,
                               offset);
 
                add_log_sample(td, td->lat_log, sample_val(nsec), ddir, bs,
                               offset);
 
+       if (ts->lat_percentiles)
+               add_clat_percentile_sample(ts, nsec, ddir);
+
        td_io_u_unlock(td);
 }
 
        td_io_u_unlock(td);
 }
 
diff --git a/stat.h b/stat.h
index 132dee3cd876dcda01d18e60b576049a66f00eb1..848331bb5e47fef2438cf19a910fed5c02b712bc 100644 (file)
--- a/stat.h
+++ b/stat.h
@@ -172,7 +172,8 @@ struct thread_stat {
        /*
         * IO depth and latency stats
         */
        /*
         * IO depth and latency stats
         */
-       uint64_t clat_percentiles;
+       uint32_t clat_percentiles;
+       uint32_t lat_percentiles;
        uint64_t percentile_precision;
        fio_fp64_t percentile_list[FIO_IO_U_LIST_MAX_LEN];
 
        uint64_t percentile_precision;
        fio_fp64_t percentile_list[FIO_IO_U_LIST_MAX_LEN];
 
index fd6576e252c75ae3e294b6cf6f5c449486ca2d90..1813cdc706a89a5a493386e7f7d0345abf3fae3a 100644 (file)
@@ -240,6 +240,7 @@ struct thread_options {
        unsigned int trim_zero;
        unsigned long long trim_backlog;
        unsigned int clat_percentiles;
        unsigned int trim_zero;
        unsigned long long trim_backlog;
        unsigned int clat_percentiles;
+       unsigned int lat_percentiles;
        unsigned int percentile_precision;      /* digits after decimal for percentiles */
        fio_fp64_t percentile_list[FIO_IO_U_LIST_MAX_LEN];
 
        unsigned int percentile_precision;      /* digits after decimal for percentiles */
        fio_fp64_t percentile_list[FIO_IO_U_LIST_MAX_LEN];
 
@@ -343,7 +344,7 @@ struct thread_options_pack {
        uint32_t iodepth_batch_complete_min;
        uint32_t iodepth_batch_complete_max;
        uint32_t serialize_overlap;
        uint32_t iodepth_batch_complete_min;
        uint32_t iodepth_batch_complete_max;
        uint32_t serialize_overlap;
-       uint32_t pad3;
+       uint32_t lat_percentiles;
 
        uint64_t size;
        uint64_t io_size;
 
        uint64_t size;
        uint64_t io_size;