Add support for latency probing over an interval of load

author Jens Axboe <axboe@kernel.dk>

Wed, 24 Oct 2018 11:01:43 +0000 (05:01 -0600)

committer Jens Axboe <axboe@kernel.dk>

Wed, 24 Oct 2018 11:01:43 +0000 (05:01 -0600)
author Jens Axboe <axboe@kernel.dk>
Wed, 24 Oct 2018 11:01:43 +0000 (05:01 -0600)
committer Jens Axboe <axboe@kernel.dk>
Wed, 24 Oct 2018 11:01:43 +0000 (05:01 -0600)
diff --git a/Makefile b/Makefile

index 4721b789be273ec70fdbc50f567c2bab771cddf0..62c6ddfd432ecaee7658de4e76ad36bb843642f6 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -50,7 +50,7 @@ SOURCE :=     $(sort $(patsubst $(SRCDIR)/%,%,$(wildcard $(SRCDIR)/crc/*.c)) \
                 gettime-thread.c helpers.c json.c idletime.c td_error.c \
                 profiles/tiobench.c profiles/act.c io_u_queue.c filelock.c \
                 workqueue.c rate-submit.c optgroup.c helper_thread.c \
-               steadystate.c zone-dist.c
+               steadystate.c zone-dist.c target.c
  
  ifdef CONFIG_LIBHDFS
    HDFSFLAGS= -I $(JAVA_HOME)/include -I $(JAVA_HOME)/include/linux -I $(FIO_LIBHDFS_INCLUDE)
diff --git a/backend.c b/backend.c

index d6450baf1efdef1da5945445e7f9325c2e7f0efd..3c734f0d6baf6eadd2d53deb7d963f6fe3ead385 100644 (file)
--- a/backend.c
+++ b/backend.c
@@ -49,6 +49,7 @@
  #include "helper_thread.h"
  #include "pshared.h"
  #include "zone-dist.h"
+#include "target.h"
  
  static struct fio_sem *startup_sem;
  static struct flist_head *cgroup_list;
@@ -1090,8 +1091,8 @@ reap:
                                 break;
                         }
                 }
-               if (!in_ramp_time(td) && td->o.latency_target)
-                       lat_target_check(td);
+               if (!in_ramp_time(td) && lat_target_check(td))
+                       break;
  
                 if (ddir_rw(ddir) && td->o.thinktime)
                         handle_thinktime(td, ddir);
@@ -1867,7 +1868,8 @@ static void *thread_main(void *data)
          * (Are we not missing other flags that can be ignored ?)
          */
         if ((td->o.size || td->o.io_size) && !ddir_rw_sum(bytes_done) &&
-           !did_some_io && !td->o.create_only &&
+           !did_some_io && (td->o.iodepth_mode != IOD_STEPPED) &&
+           !td->o.create_only &&
             !(td_ioengine_flagged(td, FIO_NOIO) ||
               td_ioengine_flagged(td, FIO_DISKLESSIO)))
                 log_err("%s: No I/O performed by %s, "
diff --git a/cconv.c b/cconv.c

index 50e45c63a636bf0be66299a8f20a0a9e75d264ae..4040be28f497cae6dcc57f96287fb29c987f0c89 100644 (file)
--- a/cconv.c
+++ b/cconv.c
@@ -100,6 +100,12 @@ void convert_thread_options_to_cpu(struct thread_options *o,
         o->iodepth_batch_complete_min = le32_to_cpu(top->iodepth_batch_complete_min);
         o->iodepth_batch_complete_max = le32_to_cpu(top->iodepth_batch_complete_max);
         o->serialize_overlap = le32_to_cpu(top->serialize_overlap);
+       o->iodepth_mode = le32_to_cpu(top->iodepth_mode);
+       o->lat_step_low = le32_to_cpu(top->lat_step_low);
+       o->lat_step_high = le32_to_cpu(top->lat_step_high);
+       o->lat_step_inc = le32_to_cpu(top->lat_step_inc);
+       o->lat_step_ramp = le32_to_cpu(top->lat_step_ramp);
+       o->lat_step_run = le32_to_cpu(top->lat_step_run);
         o->size = le64_to_cpu(top->size);
         o->io_size = le64_to_cpu(top->io_size);
         o->size_percent = le32_to_cpu(top->size_percent);
@@ -363,6 +369,12 @@ void convert_thread_options_to_net(struct thread_options_pack *top,
         top->iodepth_batch_complete_min = cpu_to_le32(o->iodepth_batch_complete_min);
         top->iodepth_batch_complete_max = cpu_to_le32(o->iodepth_batch_complete_max);
         top->serialize_overlap = cpu_to_le32(o->serialize_overlap);
+       top->iodepth_mode = cpu_to_le32(o->iodepth_mode);
+       top->lat_step_low = cpu_to_le32(o->lat_step_low);
+       top->lat_step_high = cpu_to_le32(o->lat_step_high);
+       top->lat_step_inc = cpu_to_le32(o->lat_step_inc);
+       top->lat_step_ramp = cpu_to_le32(o->lat_step_ramp);
+       top->lat_step_run = cpu_to_le32(o->lat_step_run);
         top->size_percent = cpu_to_le32(o->size_percent);
         top->fill_device = cpu_to_le32(o->fill_device);
         top->file_append = cpu_to_le32(o->file_append);
diff --git a/client.c b/client.c

index 3248906756db77d9586fb2a71513943c9e57b0b6..0c87eb541e6ef87d16f35ef6507f0faed6858f17 100644 (file)
--- a/client.c
+++ b/client.c
@@ -1024,6 +1024,15 @@ static void convert_ts(struct thread_stat *dst, struct thread_stat *src)
         for (i = 0; i < dst->nr_block_infos; i++)
                 dst->block_infos[i] = le32_to_cpu(src->block_infos[i]);
  
+       for (i = 0; i < ARRAY_SIZE(dst->step_stats); i++) {
+               struct lat_step_stats *ls = &src->step_stats[i];
+
+               for (j = 0; j < DDIR_RWDIR_CNT; j++) {
+                       dst->step_stats[i].iops[j] = le64_to_cpu(ls->iops[j]);
+                       dst->step_stats[i].avg[j].u.f = fio_uint64_to_double(le64_to_cpu(ls->avg[j].u.i));
+               }
+       }
+
         dst->ss_dur             = le64_to_cpu(src->ss_dur);
         dst->ss_state           = le32_to_cpu(src->ss_state);
         dst->ss_head            = le32_to_cpu(src->ss_head);
diff --git a/examples/iodepth_mode_stepped.fio b/examples/iodepth_mode_stepped.fio

new file mode 100644 (file)

index 0000000..fc2b9f4
--- /dev/null
+++ b/examples/iodepth_mode_stepped.fio
@@ -0,0 +1,19 @@
+# Job demonstrating how to use the iodepth_mode=stepped feature
+#
+[step]
+ioengine=libaio
+# iodepth / step_high (130% here) must be high enough to saturate performance
+iodepth=64
+numjobs=1
+direct=1
+# Step from 10% to 130%, in 5% intervals. For each step, use a ramp time
+# of 5s, then 30 seconds of runtime
+iodepth_mode=stepped:10-130/5,5/30
+rw=randread
+norandommap
+filename=/dev/nvme0n1p9
+runtime=1h
+time_based=1
+numjobs=2
+group_reporting=1
+cpus_allowed=0,2
diff --git a/fio.h b/fio.h

index b3ba5db2e95ac9e16cd0633bc8912b125c08e333..081998bf70cf9ef86cce2ab74b2284c0409a99e8 100644 (file)
--- a/fio.h
+++ b/fio.h
@@ -155,6 +155,11 @@ enum {
         F_ADV_SEQUENTIAL,
  };
  
+enum {
+       IOD_NONE = 0,
+       IOD_STEPPED,
+};
+
  /*
   * Per-thread/process specific data. Only used for the network client
   * for now.
@@ -374,9 +379,14 @@ struct thread_data {
         unsigned int latency_qd;
         unsigned int latency_qd_high;
         unsigned int latency_qd_low;
+       unsigned int latency_qd_step;
         unsigned int latency_failed;
-       uint64_t latency_ios;
+       unsigned int latency_state;
+       unsigned int latency_iops[DDIR_RWDIR_CNT];
+       unsigned int latency_step;
+       uint64_t latency_ios[DDIR_RWDIR_CNT];
         int latency_end_run;
+       unsigned int nr_lat_stats;
  
         /*
          * read/write mixed workload state
@@ -687,13 +697,6 @@ extern int io_queue_event(struct thread_data *td, struct io_u *io_u, int *ret,
                    enum fio_ddir ddir, uint64_t *bytes_issued, int from_verify,
                    struct timespec *comp_time);
  
-/*
- * Latency target helpers
- */
-extern void lat_target_check(struct thread_data *);
-extern void lat_target_init(struct thread_data *);
-extern void lat_target_reset(struct thread_data *);
-
  /*
   * Iterates all threads/processes within all the defined jobs
   */
@@ -751,6 +754,8 @@ static inline bool should_check_rate(struct thread_data *td)
         return ddir_rw_sum(td->bytes_done) != 0;
  }
  
+int setup_rate(struct thread_data *td);
+
  static inline unsigned long long td_max_bs(struct thread_data *td)
  {
         unsigned long long max_bs;
diff --git a/init.c b/init.c

index a2b70c4acb4ba51a7916eda28f90bbcf3cdb10a9..691248034405f8d2a98adeb48806c227a2d318d1 100644 (file)
--- a/init.c
+++ b/init.c
@@ -559,7 +559,7 @@ static int __setup_rate(struct thread_data *td, enum fio_ddir ddir)
         return 0;
  }
  
-static int setup_rate(struct thread_data *td)
+int setup_rate(struct thread_data *td)
  {
         int ret = 0;
  
diff --git a/io_u.c b/io_u.c

index 56abe6fd598ef2d8ecb0c7565b9552e0755e2ad5..e1ac2097b051a1f658b692f8aaf40d38928e0919 100644 (file)
--- a/io_u.c
+++ b/io_u.c
@@ -11,6 +11,7 @@
  #include "lib/pow2.h"
  #include "minmax.h"
  #include "zbd.h"
+#include "target.h"
  
  struct io_completion_data {
         int nr;                         /* input */
@@ -1356,146 +1357,6 @@ static long set_io_u_file(struct thread_data *td, struct io_u *io_u)
         return 0;
  }
  
-static void lat_fatal(struct thread_data *td, struct io_completion_data *icd,
-                     unsigned long long tnsec, unsigned long long max_nsec)
-{
-       if (!td->error)
-               log_err("fio: latency of %llu nsec exceeds specified max (%llu nsec)\n", tnsec, max_nsec);
-       td_verror(td, ETIMEDOUT, "max latency exceeded");
-       icd->error = ETIMEDOUT;
-}
-
-static void lat_new_cycle(struct thread_data *td)
-{
-       fio_gettime(&td->latency_ts, NULL);
-       td->latency_ios = ddir_rw_sum(td->io_blocks);
-       td->latency_failed = 0;
-}
-
-/*
- * We had an IO outside the latency target. Reduce the queue depth. If we
- * are at QD=1, then it's time to give up.
- */
-static bool __lat_target_failed(struct thread_data *td)
-{
-       if (td->latency_qd == 1)
-               return true;
-
-       td->latency_qd_high = td->latency_qd;
-
-       if (td->latency_qd == td->latency_qd_low)
-               td->latency_qd_low--;
-
-       td->latency_qd = (td->latency_qd + td->latency_qd_low) / 2;
-
-       dprint(FD_RATE, "Ramped down: %d %d %d\n", td->latency_qd_low, td->latency_qd, td->latency_qd_high);
-
-       /*
-        * When we ramp QD down, quiesce existing IO to prevent
-        * a storm of ramp downs due to pending higher depth.
-        */
-       io_u_quiesce(td);
-       lat_new_cycle(td);
-       return false;
-}
-
-static bool lat_target_failed(struct thread_data *td)
-{
-       if (td->o.latency_percentile.u.f == 100.0)
-               return __lat_target_failed(td);
-
-       td->latency_failed++;
-       return false;
-}
-
-void lat_target_init(struct thread_data *td)
-{
-       td->latency_end_run = 0;
-
-       if (td->o.latency_target) {
-               dprint(FD_RATE, "Latency target=%llu\n", td->o.latency_target);
-               fio_gettime(&td->latency_ts, NULL);
-               td->latency_qd = 1;
-               td->latency_qd_high = td->o.iodepth;
-               td->latency_qd_low = 1;
-               td->latency_ios = ddir_rw_sum(td->io_blocks);
-       } else
-               td->latency_qd = td->o.iodepth;
-}
-
-void lat_target_reset(struct thread_data *td)
-{
-       if (!td->latency_end_run)
-               lat_target_init(td);
-}
-
-static void lat_target_success(struct thread_data *td)
-{
-       const unsigned int qd = td->latency_qd;
-       struct thread_options *o = &td->o;
-
-       td->latency_qd_low = td->latency_qd;
-
-       /*
-        * If we haven't failed yet, we double up to a failing value instead
-        * of bisecting from highest possible queue depth. If we have set
-        * a limit other than td->o.iodepth, bisect between that.
-        */
-       if (td->latency_qd_high != o->iodepth)
-               td->latency_qd = (td->latency_qd + td->latency_qd_high) / 2;
-       else
-               td->latency_qd *= 2;
-
-       if (td->latency_qd > o->iodepth)
-               td->latency_qd = o->iodepth;
-
-       dprint(FD_RATE, "Ramped up: %d %d %d\n", td->latency_qd_low, td->latency_qd, td->latency_qd_high);
-
-       /*
-        * Same as last one, we are done. Let it run a latency cycle, so
-        * we get only the results from the targeted depth.
-        */
-       if (td->latency_qd == qd) {
-               if (td->latency_end_run) {
-                       dprint(FD_RATE, "We are done\n");
-                       td->done = 1;
-               } else {
-                       dprint(FD_RATE, "Quiesce and final run\n");
-                       io_u_quiesce(td);
-                       td->latency_end_run = 1;
-                       reset_all_stats(td);
-                       reset_io_stats(td);
-               }
-       }
-
-       lat_new_cycle(td);
-}
-
-/*
- * Check if we can bump the queue depth
- */
-void lat_target_check(struct thread_data *td)
-{
-       uint64_t usec_window;
-       uint64_t ios;
-       double success_ios;
-
-       usec_window = utime_since_now(&td->latency_ts);
-       if (usec_window < td->o.latency_window)
-               return;
-
-       ios = ddir_rw_sum(td->io_blocks) - td->latency_ios;
-       success_ios = (double) (ios - td->latency_failed) / (double) ios;
-       success_ios *= 100.0;
-
-       dprint(FD_RATE, "Success rate: %.2f%% (target %.2f%%)\n", success_ios, td->o.latency_percentile.u.f);
-
-       if (success_ios >= td->o.latency_percentile.u.f)
-               lat_target_success(td);
-       else
-               __lat_target_failed(td);
-}
-
  /*
   * If latency target is enabled, we might be ramping up or down and not
   * using the full queue depth available.
@@ -1506,7 +1367,7 @@ bool queue_full(const struct thread_data *td)
  
         if (qempty)
                 return true;
-       if (!td->o.latency_target)
+       if (!td->o.latency_target || td->o.iodepth_mode != IOD_STEPPED)
                 return false;
  
         return td->cur_depth >= td->latency_qd;
@@ -1837,11 +1698,15 @@ static void account_io_completion(struct thread_data *td, struct io_u *io_u,
                                 icd->error = ops->io_u_lat(td, tnsec);
                 }
  
-               if (td->o.max_latency && tnsec > td->o.max_latency)
-                       lat_fatal(td, icd, tnsec, td->o.max_latency);
+               if (td->o.max_latency && tnsec > td->o.max_latency) {
+                       icd->error = ETIMEDOUT;
+                       lat_fatal(td, tnsec, td->o.max_latency);
+               }
                 if (td->o.latency_target && tnsec > td->o.latency_target) {
-                       if (lat_target_failed(td))
-                               lat_fatal(td, icd, tnsec, td->o.latency_target);
+                       if (lat_target_failed(td)) {
+                               icd->error = ETIMEDOUT;
+                               lat_fatal(td, tnsec, td->o.latency_target);
+                       }
                 }
         }
  
@@ -1887,8 +1752,8 @@ static void file_log_write_comp(const struct thread_data *td, struct fio_file *f
  
  static bool should_account(struct thread_data *td)
  {
-       return ramp_time_over(td) && (td->runstate == TD_RUNNING ||
-                                          td->runstate == TD_VERIFYING);
+       return lat_step_account(td) && ramp_time_over(td) &&
+               (td->runstate == TD_RUNNING || td->runstate == TD_VERIFYING);
  }
  
  static void io_completed(struct thread_data *td, struct io_u **io_u_ptr,
diff --git a/libfio.c b/libfio.c

index 674bc1dc0a9ed37216bf86ae25acef05e33dd4f4..a672dd559969d9a73b38f717091f9577a2f43a63 100644 (file)
--- a/libfio.c
+++ b/libfio.c
@@ -34,6 +34,7 @@
  #include "filelock.h"
  #include "helper_thread.h"
  #include "filehash.h"
+#include "target.h"
  
  FLIST_HEAD(disk_list);
  
diff --git a/options.c b/options.c

index 98187def98fbe842e90ae6d4a85a9fb3f358fd10..52acf978129d6c900e45635579e672d8b1bb542b 100644 (file)
--- a/options.c
+++ b/options.c
@@ -13,6 +13,7 @@
  #include "lib/pattern.h"
  #include "options.h"
  #include "optgroup.h"
+#include "target.h"
  
  char client_sockaddr_str[INET6_ADDRSTRLEN] = { 0 };
  
@@ -480,6 +481,51 @@ static int str_rwmix_write_cb(void *data, unsigned long long *val)
         return 0;
  }
  
+static int str_iodepth_mode_cb(void *data, const char *input)
+{
+       struct thread_data *td = cb_data_to_td(data);
+       struct thread_options *o = &td->o;
+       char *str, *p, *n;
+       int ret = 1;
+
+       if (o->iodepth_mode == IOD_NONE)
+               return 0;
+
+       if (parse_dryrun())
+               return 0;
+
+       p = str = strdup(input);
+
+       strip_blank_front(&str);
+       strip_blank_end(str);
+
+       n = strchr(p, ':');
+       if (!n)
+               goto err;
+
+       *n++ = '\0';
+
+       /* format is now 'low-min/step' */
+       ret = sscanf(n, "%u-%u/%u,%u/%u", &o->lat_step_low, &o->lat_step_high,
+                                       &o->lat_step_inc, &o->lat_step_ramp,
+                                       &o->lat_step_run);
+       if (ret == 5) {
+               ret = 0;
+               o->lat_step_ramp *= 1000;
+               o->lat_step_run *= 1000;
+       } else if (ret == 3) {
+               o->lat_step_ramp = IOD_STEPPED_DEF_RAMP;
+               o->lat_step_run = IOD_STEPPED_DEF_RUN;
+               ret = 0;
+       } else
+               ret = 1;
+err:
+       if (ret)
+               log_err("fio: failed parsing <%s>\n", input);
+       free(str);
+       return ret;
+}
+
  static int str_exitall_cb(void)
  {
         exitall_on_terminate = true;
@@ -1959,6 +2005,30 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .category = FIO_OPT_C_IO,
                 .group  = FIO_OPT_G_IO_BASIC,
         },
+       {
+               .name   = "iodepth_mode",
+               .lname  = "IO Depth Mode",
+               .type   = FIO_OPT_STR,
+               .off1   = offsetof(struct thread_options, iodepth_mode),
+               .cb     = str_iodepth_mode_cb,
+               .help   = "How to vary the queue depth",
+               .parent = "iodepth",
+               .hide   = 1,
+               .interval = 1,
+               .category = FIO_OPT_C_IO,
+               .group  = FIO_OPT_G_IO_BASIC,
+               .posval = {
+                         { .ival = "none",
+                           .oval = IOD_NONE,
+                           .help = "No depth modification",
+                         },
+                         { .ival = "stepped",
+                           .oval = IOD_STEPPED,
+                           .help = "Stepped IO depth:hi-lo/inc,ramp/run",
+                         },
+               },
+       },
+
         {
                 .name   = "serialize_overlap",
                 .lname  = "Serialize overlap",
diff --git a/server.c b/server.c

index 90d3396b62fd8a557304cdbef929c6a428febf33..a636f2724347c256984a509751b7103835568996 100644 (file)
--- a/server.c
+++ b/server.c
@@ -1550,6 +1550,15 @@ void fio_server_send_ts(struct thread_stat *ts, struct group_run_stats *rs)
  
         p.ts.sig_figs           = cpu_to_le32(ts->sig_figs);
  
+       for (i = 0; i < ARRAY_SIZE(ts->step_stats); i++) {
+               struct lat_step_stats *ls = &ts->step_stats[i];
+
+               for (j = 0; j < DDIR_RWDIR_CNT; j++) {
+                       p.ts.step_stats[i].iops[j] = cpu_to_le64(ls->iops[j]);
+                       p.ts.step_stats[i].avg[j].u.i = cpu_to_le64(fio_double_to_uint64(ls->avg[j].u.f));
+               }
+       }
+
         p.ts.nr_block_infos     = cpu_to_le64(ts->nr_block_infos);
         for (i = 0; i < p.ts.nr_block_infos; i++)
                 p.ts.block_infos[i] = cpu_to_le32(ts->block_infos[i]);
diff --git a/server.h b/server.h

index 371e51ea24e85206ac857a1f62bf623b8b044e9f..abb23bad490d155d6e7935d7cd165e331376f6e7 100644 (file)
--- a/server.h
+++ b/server.h
@@ -48,7 +48,7 @@ struct fio_net_cmd_reply {
  };
  
  enum {
-       FIO_SERVER_VER                  = 77,
+       FIO_SERVER_VER                  = 78,
  
         FIO_SERVER_MAX_FRAGMENT_PDU     = 1024,
         FIO_SERVER_MAX_CMD_MB           = 2048,
diff --git a/stat.c b/stat.c

index 331abf676c7bfae00c349208e894f7049f5f6ad5..26125fad6908b5d5291c239a6daed84d3ae9bbf6 100644 (file)
--- a/stat.c
+++ b/stat.c
@@ -15,6 +15,7 @@
  #include "helper_thread.h"
  #include "smalloc.h"
  #include "zbd.h"
+#include "target.h"
  
  #define LOG_MSEC_SLACK 1
  
@@ -391,7 +392,7 @@ void stat_calc_lat_m(struct thread_stat *ts, double *io_u_lat)
         stat_calc_lat(ts, io_u_lat, ts->io_u_lat_m, FIO_IO_U_LAT_M_NR);
  }
  
-static void display_lat(const char *name, unsigned long long min,
+void display_lat(const char *name, unsigned long long min,
                         unsigned long long max, double mean, double dev,
                         struct buf_output *out)
  {
@@ -887,6 +888,11 @@ static void show_thread_status_normal(struct thread_stat *ts,
  
         if (ts->ss_dur)
                 show_ss_normal(ts, out);
+
+       if (lat_ts_has_stats(ts)) {
+               log_buf(out, "  Stepped latency report\n");
+               lat_step_report(ts, out);
+       }
  }
  
  static void show_ddir_status_terse(struct thread_stat *ts,
@@ -1264,7 +1270,7 @@ static struct json_object *show_thread_status_json(struct thread_stat *ts,
         double io_u_lat_u[FIO_IO_U_LAT_U_NR];
         double io_u_lat_m[FIO_IO_U_LAT_M_NR];
         double usr_cpu, sys_cpu;
-       int i;
+       int i, j;
         size_t size;
  
         root = json_create_object();
@@ -1488,6 +1494,32 @@ static struct json_object *show_thread_status_json(struct thread_stat *ts,
                 json_object_add_value_array(data, "bw", bw);
         }
  
+       if (lat_ts_has_stats(ts)) {
+               tmp = json_create_object();
+               json_object_add_value_object(root, "lat_step", tmp);
+       }
+
+       for (i = 0; i < DDIR_RWDIR_CNT; i++) {
+               struct json_object *val;
+
+               if (!__lat_ts_has_stats(ts, i))
+                       continue;
+
+               val = json_create_object();
+               json_object_add_value_object(tmp, io_ddir_name(i), val);
+
+               for (j = 0; j < ARRAY_SIZE(ts->step_stats); j++) {
+                       struct lat_step_stats *ls = &ts->step_stats[j];
+                       char name[32];
+
+                       if (!ls->iops[i])
+                               continue;
+
+                       sprintf(name, "%llu", (unsigned long long) ls->iops[i]);
+                       json_object_add_value_float(val, name, ls->avg[i].u.f);
+               }
+       }
+
         return root;
  }
  
@@ -1553,6 +1585,25 @@ static void sum_stat(struct io_stat *dst, struct io_stat *src, bool first)
         dst->S.u.f = S;
  }
  
+static void sum_lat_step_stats(struct lat_step_stats *dst,
+                              struct lat_step_stats *src, bool first)
+{
+       int i;
+
+       for (i = 0; i < DDIR_RWDIR_CNT; i++) {
+               if (!dst->iops[i] && !src->iops[i])
+                       continue;
+               if (first)
+                       dst->avg[i].u.f = src->avg[i].u.f;
+               else {
+                       dst->avg[i].u.f = ((src->avg[i].u.f * src->iops[i]) +
+                               (dst->avg[i].u.f * dst->iops[i])) /
+                               (dst->iops[i] + src->iops[i]);
+               }
+               dst->iops[i] += src->iops[i];
+       }
+}
+
  void sum_group_stats(struct group_run_stats *dst, struct group_run_stats *src)
  {
         int i;
@@ -1665,6 +1716,9 @@ void sum_thread_stats(struct thread_stat *dst, struct thread_stat *src,
         dst->total_submit += src->total_submit;
         dst->total_complete += src->total_complete;
         dst->nr_zone_resets += src->nr_zone_resets;
+
+       for (l = 0; l < ARRAY_SIZE(dst->step_stats); l++)
+               sum_lat_step_stats(&dst->step_stats[l], &src->step_stats[l], first);
  }
  
  void init_group_run_stat(struct group_run_stats *gs)
@@ -1711,6 +1765,9 @@ void __show_run_stats(void)
         for (i = 0; i < groupid + 1; i++)
                 init_group_run_stat(&runstats[i]);
  
+       for (i = 0; i < FIO_OUTPUT_NR; i++)
+               buf_output_init(&output[i]);
+
         /*
          * find out how many threads stats we need. if group reporting isn't
          * enabled, it's one-per-td.
@@ -1887,9 +1944,6 @@ void __show_run_stats(void)
                 }
         }
  
-       for (i = 0; i < FIO_OUTPUT_NR; i++)
-               buf_output_init(&output[i]);
-
         /*
          * don't overwrite last signal output
          */
diff --git a/stat.h b/stat.h

index b4ba71e3b0df3b9131e46132622e5cc4c88de78b..8a165b78efdfc8e5895bb68e0bae6308fbe68ab5 100644 (file)
--- a/stat.h
+++ b/stat.h
@@ -4,6 +4,11 @@
  #include "iolog.h"
  #include "lib/output_buffer.h"
  
+struct lat_step_stats {
+       uint64_t iops[DDIR_RWDIR_CNT];
+       fio_fp64_t avg[DDIR_RWDIR_CNT];
+};
+
  struct group_run_stats {
         uint64_t max_run[DDIR_RWDIR_CNT], min_run[DDIR_RWDIR_CNT];
         uint64_t max_bw[DDIR_RWDIR_CNT], min_bw[DDIR_RWDIR_CNT];
@@ -145,6 +150,8 @@ enum block_info_state {
  #define FIO_JOBDESC_SIZE       256
  #define FIO_VERROR_SIZE                128
  
+#define MAX_STEP_STATS         64
+
  struct thread_stat {
         char name[FIO_JOBNAME_SIZE];
         char verror[FIO_VERROR_SIZE];
@@ -227,6 +234,9 @@ struct thread_stat {
         uint64_t latency_window;
  
         uint32_t sig_figs;
+       uint32_t pad4;
+
+       struct lat_step_stats step_stats[MAX_STEP_STATS];
  
         uint64_t ss_dur;
         uint32_t ss_state;
@@ -239,12 +249,12 @@ struct thread_stat {
  
         union {
                 uint64_t *ss_iops_data;
-               uint64_t pad4;
+               uint64_t pad5;
         };
  
         union {
                 uint64_t *ss_bw_data;
-               uint64_t pad5;
+               uint64_t pad6;
         };
  } __attribute__((packed));
  
diff --git a/target.c b/target.c

new file mode 100644 (file)

index 0000000..d372ff1
--- /dev/null
+++ b/target.c
@@ -0,0 +1,375 @@
+#include <unistd.h>
+
+#include "fio.h"
+#include "target.h"
+#include "smalloc.h"
+#include "stat.h"
+
+void lat_fatal(struct thread_data *td, unsigned long long tnsec,
+              unsigned long long max_nsec)
+{
+       if (!td->error)
+               log_err("fio: latency of %llu nsec exceeds specified max (%llu nsec)\n", tnsec, max_nsec);
+       td_verror(td, ETIMEDOUT, "max latency exceeded");
+}
+
+static void lat_ios_note(struct thread_data *td)
+{
+       int i;
+
+       for (i = 0; i < DDIR_RWDIR_CNT; i++)
+               td->latency_ios[i] = td->io_blocks[i];
+}
+
+static void lat_new_cycle(struct thread_data *td)
+{
+       fio_gettime(&td->latency_ts, NULL);
+       lat_ios_note(td);
+       td->latency_failed = 0;
+}
+
+/*
+ * We had an IO outside the latency target. Reduce the queue depth. If we
+ * are at QD=1, then it's time to give up.
+ */
+static bool __lat_target_failed(struct thread_data *td)
+{
+       if (td->latency_qd == 1)
+               return true;
+
+       td->latency_qd_high = td->latency_qd;
+
+       if (td->latency_qd == td->latency_qd_low)
+               td->latency_qd_low--;
+
+       td->latency_qd = (td->latency_qd + td->latency_qd_low) / 2;
+
+       dprint(FD_RATE, "Ramped down: %d %d %d\n", td->latency_qd_low, td->latency_qd, td->latency_qd_high);
+
+       /*
+        * When we ramp QD down, quiesce existing IO to prevent
+        * a storm of ramp downs due to pending higher depth.
+        */
+       io_u_quiesce(td);
+       lat_new_cycle(td);
+       return false;
+}
+
+bool lat_target_failed(struct thread_data *td)
+{
+       if (td->o.latency_percentile.u.f == 100.0)
+               return __lat_target_failed(td);
+
+       td->latency_failed++;
+       return false;
+}
+
+static void lat_step_init(struct thread_data *td)
+{
+       struct thread_options *o = &td->o;
+
+       fio_gettime(&td->latency_ts, NULL);
+       td->latency_state = IOD_STATE_PROBE_RAMP;
+       td->latency_step = 0;
+       td->latency_qd = td->o.iodepth;
+       dprint(FD_RATE, "Stepped: %d-%d/%d,%d/%d\n", o->lat_step_low,
+                               o->lat_step_high, o->lat_step_inc,
+                               o->lat_step_ramp, o->lat_step_run);
+}
+
+void lat_target_init(struct thread_data *td)
+{
+       td->latency_end_run = 0;
+
+       if (td->o.latency_target) {
+               dprint(FD_RATE, "Latency target=%llu\n", td->o.latency_target);
+               fio_gettime(&td->latency_ts, NULL);
+               td->latency_qd = 1;
+               td->latency_qd_high = td->o.iodepth;
+               td->latency_qd_low = 1;
+               lat_ios_note(td);
+       } else if (td->o.iodepth_mode == IOD_STEPPED)
+               lat_step_init(td);
+       else
+               td->latency_qd = td->o.iodepth;
+}
+
+void lat_target_reset(struct thread_data *td)
+{
+       if (td->o.latency_target && !td->latency_end_run)
+               lat_target_init(td);
+}
+
+static void lat_target_success(struct thread_data *td)
+{
+       const unsigned int qd = td->latency_qd;
+       struct thread_options *o = &td->o;
+
+       td->latency_qd_low = td->latency_qd;
+
+       /*
+        * If we haven't failed yet, we double up to a failing value instead
+        * of bisecting from highest possible queue depth. If we have set
+        * a limit other than td->o.iodepth, bisect between that.
+        */
+       if (td->latency_qd_high != o->iodepth)
+               td->latency_qd = (td->latency_qd + td->latency_qd_high) / 2;
+       else
+               td->latency_qd *= 2;
+
+       if (td->latency_qd > o->iodepth)
+               td->latency_qd = o->iodepth;
+
+       dprint(FD_RATE, "Ramped up: %d %d %d\n", td->latency_qd_low, td->latency_qd, td->latency_qd_high);
+
+       /*
+        * Same as last one, we are done. Let it run a latency cycle, so
+        * we get only the results from the targeted depth.
+        */
+       if (td->latency_qd == qd) {
+               if (td->latency_end_run) {
+                       dprint(FD_RATE, "We are done\n");
+                       td->done = 1;
+               } else {
+                       dprint(FD_RATE, "Quiesce and final run\n");
+                       io_u_quiesce(td);
+                       td->latency_end_run = 1;
+                       reset_all_stats(td);
+                       reset_io_stats(td);
+               }
+       }
+
+       lat_new_cycle(td);
+}
+
+void __lat_target_check(struct thread_data *td)
+{
+       uint64_t usec_window;
+       uint64_t ios;
+       double success_ios;
+
+       usec_window = utime_since_now(&td->latency_ts);
+       if (usec_window < td->o.latency_window)
+               return;
+
+       ios = ddir_rw_sum(td->io_blocks) - ddir_rw_sum(td->latency_ios);
+       success_ios = (double) (ios - td->latency_failed) / (double) ios;
+       success_ios *= 100.0;
+
+       dprint(FD_RATE, "Success rate: %.2f%% (target %.2f%%)\n", success_ios, td->o.latency_percentile.u.f);
+
+       if (success_ios >= td->o.latency_percentile.u.f)
+               lat_target_success(td);
+       else
+               __lat_target_failed(td);
+}
+
+static void lat_clear_rate(struct thread_data *td)
+{
+       int i;
+
+       td->flags &= ~TD_F_CHECK_RATE;
+       for (i = 0; i < DDIR_RWDIR_CNT; i++)
+               td->o.rate_iops[i] = 0;
+}
+
+/*
+ * Returns true if we're done stepping
+ */
+static bool lat_step_recalc(struct thread_data *td)
+{
+       struct thread_options *o = &td->o;
+       unsigned int cur, perc;
+
+       cur = td->latency_step * o->lat_step_inc;
+       if (cur >= o->lat_step_high)
+               return true;
+
+       perc = (td->latency_step + 1) * o->lat_step_inc;
+       if (perc < 100) {
+               int i;
+
+               for (i = 0; i < DDIR_RWDIR_CNT; i++) {
+                       unsigned int this_iops;
+
+                       this_iops = (perc * td->latency_iops[i]) / 100;
+                       td->o.rate_iops[i] = this_iops;
+               }
+               setup_rate(td);
+               td->flags |= TD_F_CHECK_RATE;
+               td->latency_qd = td->o.iodepth * 100 / o->lat_step_high;
+       } else {
+               td->latency_qd = td->o.iodepth * perc / o->lat_step_high;
+               lat_clear_rate(td);
+       }
+               
+       dprint(FD_RATE, "Stepped: step=%d, perc=%d, qd=%d\n", td->latency_step,
+                                               perc, td->latency_qd);
+       return false;
+}
+
+static void lat_step_reset(struct thread_data *td)
+{
+       struct thread_stat *ts = &td->ts;
+       struct io_stat *ios = &ts->clat_stat[DDIR_RWDIR_CNT];
+
+       ios->max_val = ios->min_val = ios->samples = 0;
+       ios->mean.u.f = ios->S.u.f = 0;
+
+       lat_clear_rate(td);
+       reset_all_stats(td);
+       reset_io_stats(td);
+}
+
+static uint64_t lat_iops_since(struct thread_data *td, uint64_t msec,
+                              enum fio_ddir ddir)
+{
+       if (msec) {
+               uint64_t ios;
+
+               ios = td->io_blocks[ddir] - td->latency_ios[ddir];
+               return (ios * 1000) / msec;
+       }
+
+       return 0;
+}
+
+static void lat_step_add_sample(struct thread_data *td, uint64_t msec)
+{
+       struct thread_stat *ts = &td->ts;
+       unsigned long long min, max;
+       struct lat_step_stats *ls;
+       double mean[DDIR_RWDIR_CNT], dev;
+       int i;
+
+       if (td->nr_lat_stats == ARRAY_SIZE(td->ts.step_stats)) {
+               log_err("fio: ts->step_stats too small, dropping entries\n");
+               return;
+       }
+
+       for (i = 0; i < DDIR_RWDIR_CNT; i++)
+               calc_lat(&ts->clat_stat[i], &min, &max, &mean[i], &dev);
+
+       for (i = 0; i < DDIR_RWDIR_CNT; i++) {
+               ls = &td->ts.step_stats[td->nr_lat_stats];
+
+               ls->iops[i] = lat_iops_since(td, msec, i);
+               ls->avg[i].u.f = mean[i];
+       }
+
+       td->nr_lat_stats++;
+}
+
+bool __lat_ts_has_stats(struct thread_stat *ts, enum fio_ddir ddir)
+{
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(ts->step_stats); i++) {
+               struct lat_step_stats *ls = &ts->step_stats[i];
+
+               if (ls->iops[ddir])
+                       return true;
+       }
+
+       return false;
+}
+
+bool lat_ts_has_stats(struct thread_stat *ts)
+{
+       int i;
+
+       for (i = 0; i < DDIR_RWDIR_CNT; i++)
+               if (__lat_ts_has_stats(ts, i))
+                       return true;
+
+       return false;
+}
+
+void lat_step_report(struct thread_stat *ts, struct buf_output *out)
+{
+       int i, j;
+
+       for (i = 0; i < ARRAY_SIZE(ts->step_stats); i++) {
+               struct lat_step_stats *ls = &ts->step_stats[i];
+
+               for (j = 0; j < DDIR_RWDIR_CNT; j++) {
+                       if (!ls->iops[j])
+                               continue;
+
+                       __log_buf(out, "    %s: iops=%llu, lat=%.1f nsec\n",
+                                       io_ddir_name(j),
+                                       (unsigned long long) ls->iops[j],
+                                       ls->avg[j].u.f);
+               }
+       }
+}
+
+static void lat_next_state(struct thread_data *td, int new_state)
+{
+       td->latency_state = new_state;
+       fio_gettime(&td->latency_ts, NULL);
+}
+
+bool lat_step_check(struct thread_data *td)
+{
+       struct thread_options *o = &td->o;
+       uint64_t msec;
+
+       msec = mtime_since_now(&td->latency_ts);
+
+       switch (td->latency_state) {
+       case IOD_STATE_PROBE_RAMP:
+               if (msec < o->lat_step_ramp)
+                       break;
+
+               lat_step_reset(td);
+               lat_ios_note(td);
+
+               lat_next_state(td, IOD_STATE_PROBE_RUN);
+               break;
+       case IOD_STATE_PROBE_RUN: {
+               int i;
+
+               if (msec < o->lat_step_run)
+                       break;
+
+               io_u_quiesce(td);
+
+               for (i = 0; i < DDIR_RWDIR_CNT; i++)
+                       td->latency_iops[i] = lat_iops_since(td, msec, i);
+
+               lat_step_reset(td);
+               lat_step_recalc(td);
+
+               io_u_quiesce(td);
+               lat_next_state(td, IOD_STATE_RAMP);
+               break;
+               }
+       case IOD_STATE_RAMP:
+               if (msec < o->lat_step_ramp)
+                       break;
+
+               lat_ios_note(td);
+               lat_next_state(td, IOD_STATE_RUN);
+               break;
+       case IOD_STATE_RUN:
+               if (msec < o->lat_step_run)
+                       break;
+
+               io_u_quiesce(td);
+               fio_gettime(&td->latency_ts, NULL);
+               td->latency_step++;
+
+               lat_step_add_sample(td, msec);
+               lat_step_reset(td);
+
+               if (!lat_step_recalc(td))
+                       break;
+
+               td->done = 1;
+               lat_next_state(td, IOD_STATE_DONE);
+               break;
+       };
+
+       return td->latency_state == IOD_STATE_DONE;
+}
diff --git a/target.h b/target.h

new file mode 100644 (file)

index 0000000..a794285
--- /dev/null
+++ b/target.h
@@ -0,0 +1,58 @@
+#ifndef FIO_LAT_TARGET_H
+#define FIO_LAT_TARGET_H
+
+#include "fio.h"
+
+enum {
+       IOD_STEPPED_DEF_RAMP    = 5000,
+       IOD_STEPPED_DEF_RUN     = 30000,
+};
+
+/*
+ * Starts out as PROBE_RAMP -> PROBE_RUN, then iterations of
+ * RAMP -> RUN with various iops limiting settings
+ */
+enum {
+       IOD_STATE_PROBE_RAMP = 1,
+       IOD_STATE_PROBE_RUN,
+       IOD_STATE_RAMP,
+       IOD_STATE_RUN,
+       IOD_STATE_DONE,
+};
+
+/*
+ * Latency target helpers
+ */
+void lat_target_init(struct thread_data *);
+void lat_target_reset(struct thread_data *);
+bool lat_target_failed(struct thread_data *td);
+void lat_step_report(struct thread_stat *ts, struct buf_output *out);
+bool lat_ts_has_stats(struct thread_stat *ts);
+bool __lat_ts_has_stats(struct thread_stat *ts, enum fio_ddir);
+
+void lat_fatal(struct thread_data *td, unsigned long long tnsec,
+               unsigned long long max_nsec);
+
+bool lat_step_check(struct thread_data *td);
+void __lat_target_check(struct thread_data *td);
+
+static inline bool lat_target_check(struct thread_data *td)
+{
+       if (td->o.latency_target) {
+               __lat_target_check(td);
+               return false;
+       } else if (td->o.iodepth_mode == IOD_STEPPED)
+               return lat_step_check(td);
+
+       return false;
+}
+
+static inline bool lat_step_account(struct thread_data *td)
+{
+       if (td->o.iodepth_mode != IOD_STEPPED)
+               return true;
+
+       return td->latency_state == IOD_STATE_RUN;
+}
+
+#endif
diff --git a/thread_options.h b/thread_options.h

index 14c6969f3acb4bd741e1d33a5b70b1aac8d68b4a..e062fa6ffd66cb33ec314b4a64b8d85f4946b219 100644 (file)
--- a/thread_options.h
+++ b/thread_options.h
@@ -77,6 +77,13 @@ struct thread_options {
         unsigned int iodepth_batch_complete_min;
         unsigned int iodepth_batch_complete_max;
         unsigned int serialize_overlap;
+       unsigned int iodepth_mode;
+
+       unsigned int lat_step_low;
+       unsigned int lat_step_high;
+       unsigned int lat_step_inc;
+       unsigned int lat_step_ramp;
+       unsigned int lat_step_run;
  
         unsigned int unique_filename;
  
@@ -361,6 +368,7 @@ struct thread_options_pack {
         uint32_t kb_base;
         uint32_t unit_base;
         uint32_t ddir_seq_nr;
+       uint32_t pad;
         uint64_t ddir_seq_add;
         uint32_t iodepth;
         uint32_t iodepth_low;
@@ -368,6 +376,15 @@ struct thread_options_pack {
         uint32_t iodepth_batch_complete_min;
         uint32_t iodepth_batch_complete_max;
         uint32_t serialize_overlap;
+
+       uint32_t iodepth_mode;
+       uint32_t lat_step_low;
+       uint32_t lat_step_high;
+       uint32_t lat_step_inc;
+       uint32_t lat_step_ramp;
+       uint32_t lat_step_run;
+
+       uint32_t pad2;
         uint32_t lat_percentiles;
  
         uint64_t size;
@@ -416,6 +433,7 @@ struct thread_options_pack {
         uint32_t verify_fatal;
         uint32_t verify_dump;
         uint32_t verify_async;
+       uint32_t pad3;
         uint64_t verify_backlog;
         uint32_t verify_batch;
         uint32_t experimental_verify;
@@ -428,7 +446,7 @@ struct thread_options_pack {
         uint32_t override_sync;
         uint32_t rand_repeatable;
         uint32_t allrand_repeatable;
-       uint32_t pad;
+       uint32_t pad4;
         uint64_t rand_seed;
         uint32_t log_avg_msec;
         uint32_t log_hist_msec;
@@ -451,6 +469,7 @@ struct thread_options_pack {
  
         struct zone_split zone_split[DDIR_RWDIR_CNT][ZONESPLIT_MAX];
         uint32_t zone_split_nr[DDIR_RWDIR_CNT];
+       uint32_t pad5;
  
         fio_fp64_t zipf_theta;
         fio_fp64_t pareto_h;
@@ -459,10 +478,10 @@ struct thread_options_pack {
         uint32_t random_generator;
  
         uint32_t perc_rand[DDIR_RWDIR_CNT];
+       uint32_t pad6;
  
         uint32_t hugepage_size;
         uint64_t rw_min_bs;
-       uint32_t pad2;
         uint32_t thinktime;
         uint32_t thinktime_spin;
         uint32_t thinktime_blocks;
@@ -476,6 +495,7 @@ struct thread_options_pack {
         uint64_t ss_dur;
         uint64_t ss_ramp_time;
         uint32_t ss_state;
+       uint32_t pad7;
         fio_fp64_t ss_limit;
         uint32_t overwrite;
         uint32_t bw_avg_time;
@@ -534,6 +554,7 @@ struct thread_options_pack {
         uint32_t trim_percentage;
         uint32_t trim_batch;
         uint32_t trim_zero;
+       uint32_t pad8;
         uint64_t trim_backlog;
         uint32_t clat_percentiles;
         uint32_t percentile_precision;
@@ -570,7 +591,6 @@ struct thread_options_pack {
         uint32_t rate_iops_min[DDIR_RWDIR_CNT];
         uint32_t rate_process;
         uint32_t rate_ign_think;
-       uint32_t pad3;
  
         uint8_t ioscheduler[FIO_TOP_STR_MAX];
  
@@ -598,6 +618,7 @@ struct thread_options_pack {
         int32_t flow;
         int32_t flow_watermark;
         uint32_t flow_sleep;
+       uint32_t pad9;
  
         uint64_t offset_increment;
         uint64_t number_ios;
author	Jens Axboe <axboe@kernel.dk>
	Wed, 24 Oct 2018 11:01:43 +0000 (05:01 -0600)
committer	Jens Axboe <axboe@kernel.dk>
	Wed, 24 Oct 2018 11:01:43 +0000 (05:01 -0600)
Makefile		patch \| blob \| blame \| history
backend.c		patch \| blob \| blame \| history
cconv.c		patch \| blob \| blame \| history
client.c		patch \| blob \| blame \| history
examples/iodepth_mode_stepped.fio	[new file with mode: 0644]	patch \| blob
fio.h		patch \| blob \| blame \| history
init.c		patch \| blob \| blame \| history
io_u.c		patch \| blob \| blame \| history
libfio.c		patch \| blob \| blame \| history
options.c		patch \| blob \| blame \| history
server.c		patch \| blob \| blame \| history
server.h		patch \| blob \| blame \| history
stat.c		patch \| blob \| blame \| history
stat.h		patch \| blob \| blame \| history
target.c	[new file with mode: 0644]	patch \| blob
target.h	[new file with mode: 0644]	patch \| blob
thread_options.h		patch \| blob \| blame \| history