Add support for latency probing over an interval of load

[fio.git] / io_u.c
diff --git a/io_u.c b/io_u.c

index c58dcf0493bb50347367f8a1d6886ded60ec7748..e1ac2097b051a1f658b692f8aaf40d38928e0919 100644 (file)
--- a/io_u.c
+++ b/io_u.c
@@ -10,6 +10,8 @@
  #include "err.h"
  #include "lib/pow2.h"
  #include "minmax.h"
+#include "zbd.h"
+#include "target.h"
  
  struct io_completion_data {
         int nr;                         /* input */
@@ -31,21 +33,27 @@ static bool random_map_free(struct fio_file *f, const uint64_t block)
  /*
   * Mark a given offset as used in the map.
   */
-static void mark_random_map(struct thread_data *td, struct io_u *io_u)
+static uint64_t mark_random_map(struct thread_data *td, struct io_u *io_u,
+                               uint64_t offset, uint64_t buflen)
  {
         unsigned long long min_bs = td->o.min_bs[io_u->ddir];
         struct fio_file *f = io_u->file;
         unsigned long long nr_blocks;
         uint64_t block;
  
-       block = (io_u->offset - f->file_offset) / (uint64_t) min_bs;
-       nr_blocks = (io_u->buflen + min_bs - 1) / min_bs;
+       block = (offset - f->file_offset) / (uint64_t) min_bs;
+       nr_blocks = (buflen + min_bs - 1) / min_bs;
+       assert(nr_blocks > 0);
  
-       if (!(io_u->flags & IO_U_F_BUSY_OK))
+       if (!(io_u->flags & IO_U_F_BUSY_OK)) {
                 nr_blocks = axmap_set_nr(f->io_axmap, block, nr_blocks);
+               assert(nr_blocks > 0);
+       }
+
+       if ((nr_blocks * min_bs) < buflen)
+               buflen = nr_blocks * min_bs;
  
-       if ((nr_blocks * min_bs) < io_u->buflen)
-               io_u->buflen = nr_blocks * min_bs;
+       return buflen;
  }
  
  static uint64_t last_block(struct thread_data *td, struct fio_file *f,
@@ -64,7 +72,7 @@ static uint64_t last_block(struct thread_data *td, struct fio_file *f,
         if (max_size > f->real_file_size)
                 max_size = f->real_file_size;
  
-       if (td->o.zone_range)
+       if (td->o.zone_mode == ZONE_MODE_STRIDED && td->o.zone_range)
                 max_size = td->o.zone_range;
  
         if (td->o.min_bs[ddir] > td->o.ba[ddir])
@@ -761,10 +769,18 @@ void put_file_log(struct thread_data *td, struct fio_file *f)
  
  void put_io_u(struct thread_data *td, struct io_u *io_u)
  {
+       const bool needs_lock = td_async_processing(td);
+
+       if (io_u->post_submit) {
+               io_u->post_submit(io_u, io_u->error == 0);
+               io_u->post_submit = NULL;
+       }
+
         if (td->parent)
                 td = td->parent;
  
-       td_io_u_lock(td);
+       if (needs_lock)
+               __td_io_u_lock(td);
  
         if (io_u->file && !(io_u->flags & IO_U_F_NO_FILE_PUT))
                 put_file_log(td, io_u->file);
@@ -778,7 +794,9 @@ void put_io_u(struct thread_data *td, struct io_u *io_u)
         }
         io_u_qpush(&td->io_u_freelist, io_u);
         td_io_u_free_notify(td);
-       td_io_u_unlock(td);
+
+       if (needs_lock)
+               __td_io_u_unlock(td);
  }
  
  void clear_io_u(struct thread_data *td, struct io_u *io_u)
@@ -789,6 +807,7 @@ void clear_io_u(struct thread_data *td, struct io_u *io_u)
  
  void requeue_io_u(struct thread_data *td, struct io_u **io_u)
  {
+       const bool needs_lock = td_async_processing(td);
         struct io_u *__io_u = *io_u;
         enum fio_ddir ddir = acct_ddir(__io_u);
  
@@ -797,7 +816,8 @@ void requeue_io_u(struct thread_data *td, struct io_u **io_u)
         if (td->parent)
                 td = td->parent;
  
-       td_io_u_lock(td);
+       if (needs_lock)
+               __td_io_u_lock(td);
  
         io_u_set(td, __io_u, IO_U_F_FREE);
         if ((__io_u->flags & IO_U_F_FLIGHT) && ddir_rw(ddir))
@@ -811,14 +831,21 @@ void requeue_io_u(struct thread_data *td, struct io_u **io_u)
  
         io_u_rpush(&td->io_u_requeues, __io_u);
         td_io_u_free_notify(td);
-       td_io_u_unlock(td);
+
+       if (needs_lock)
+               __td_io_u_unlock(td);
+
         *io_u = NULL;
  }
  
-static void __fill_io_u_zone(struct thread_data *td, struct io_u *io_u)
+static void setup_strided_zone_mode(struct thread_data *td, struct io_u *io_u)
  {
         struct fio_file *f = io_u->file;
  
+       assert(td->o.zone_mode == ZONE_MODE_STRIDED);
+       assert(td->o.zone_size);
+       assert(td->o.zone_range);
+
         /*
          * See if it's time to switch to a new zone
          */
@@ -857,6 +884,8 @@ static void __fill_io_u_zone(struct thread_data *td, struct io_u *io_u)
  static int fill_io_u(struct thread_data *td, struct io_u *io_u)
  {
         bool is_random;
+       uint64_t offset;
+       enum io_u_action ret;
  
         if (td_ioengine_flagged(td, FIO_NOIO))
                 goto out;
@@ -869,11 +898,8 @@ static int fill_io_u(struct thread_data *td, struct io_u *io_u)
         if (!ddir_rw(io_u->ddir))
                 goto out;
  
-       /*
-        * When file is zoned zone_range is always positive
-        */
-       if (td->o.zone_range)
-               __fill_io_u_zone(td, io_u);
+       if (td->o.zone_mode == ZONE_MODE_STRIDED)
+               setup_strided_zone_mode(td, io_u);
  
         /*
          * No log, let the seq/rand engine retrieve the next buflen and
@@ -890,6 +916,13 @@ static int fill_io_u(struct thread_data *td, struct io_u *io_u)
                 return 1;
         }
  
+       offset = io_u->offset;
+       if (td->o.zone_mode == ZONE_MODE_ZBD) {
+               ret = zbd_adjust_block(td, io_u);
+               if (ret == io_u_eof)
+                       return 1;
+       }
+
         if (io_u->offset + io_u->buflen > io_u->file->real_file_size) {
                 dprint(FD_IO, "io_u %p, off=0x%llx + len=0x%llx exceeds file size=0x%llx\n",
                         io_u,
@@ -902,7 +935,7 @@ static int fill_io_u(struct thread_data *td, struct io_u *io_u)
          * mark entry before potentially trimming io_u
          */
         if (td_random(td) && file_randommap(td, io_u->file))
-               mark_random_map(td, io_u);
+               io_u->buflen = mark_random_map(td, io_u, offset, io_u->buflen);
  
  out:
         dprint_io_u(io_u, "fill");
@@ -1303,6 +1336,11 @@ static long set_io_u_file(struct thread_data *td, struct io_u *io_u)
                 if (!fill_io_u(td, io_u))
                         break;
  
+               if (io_u->post_submit) {
+                       io_u->post_submit(io_u, false);
+                       io_u->post_submit = NULL;
+               }
+
                 put_file_log(td, f);
                 td_io_close_file(td, f);
                 io_u->file = NULL;
@@ -1319,146 +1357,6 @@ static long set_io_u_file(struct thread_data *td, struct io_u *io_u)
         return 0;
  }
  
-static void lat_fatal(struct thread_data *td, struct io_completion_data *icd,
-                     unsigned long long tnsec, unsigned long long max_nsec)
-{
-       if (!td->error)
-               log_err("fio: latency of %llu nsec exceeds specified max (%llu nsec)\n", tnsec, max_nsec);
-       td_verror(td, ETIMEDOUT, "max latency exceeded");
-       icd->error = ETIMEDOUT;
-}
-
-static void lat_new_cycle(struct thread_data *td)
-{
-       fio_gettime(&td->latency_ts, NULL);
-       td->latency_ios = ddir_rw_sum(td->io_blocks);
-       td->latency_failed = 0;
-}
-
-/*
- * We had an IO outside the latency target. Reduce the queue depth. If we
- * are at QD=1, then it's time to give up.
- */
-static bool __lat_target_failed(struct thread_data *td)
-{
-       if (td->latency_qd == 1)
-               return true;
-
-       td->latency_qd_high = td->latency_qd;
-
-       if (td->latency_qd == td->latency_qd_low)
-               td->latency_qd_low--;
-
-       td->latency_qd = (td->latency_qd + td->latency_qd_low) / 2;
-
-       dprint(FD_RATE, "Ramped down: %d %d %d\n", td->latency_qd_low, td->latency_qd, td->latency_qd_high);
-
-       /*
-        * When we ramp QD down, quiesce existing IO to prevent
-        * a storm of ramp downs due to pending higher depth.
-        */
-       io_u_quiesce(td);
-       lat_new_cycle(td);
-       return false;
-}
-
-static bool lat_target_failed(struct thread_data *td)
-{
-       if (td->o.latency_percentile.u.f == 100.0)
-               return __lat_target_failed(td);
-
-       td->latency_failed++;
-       return false;
-}
-
-void lat_target_init(struct thread_data *td)
-{
-       td->latency_end_run = 0;
-
-       if (td->o.latency_target) {
-               dprint(FD_RATE, "Latency target=%llu\n", td->o.latency_target);
-               fio_gettime(&td->latency_ts, NULL);
-               td->latency_qd = 1;
-               td->latency_qd_high = td->o.iodepth;
-               td->latency_qd_low = 1;
-               td->latency_ios = ddir_rw_sum(td->io_blocks);
-       } else
-               td->latency_qd = td->o.iodepth;
-}
-
-void lat_target_reset(struct thread_data *td)
-{
-       if (!td->latency_end_run)
-               lat_target_init(td);
-}
-
-static void lat_target_success(struct thread_data *td)
-{
-       const unsigned int qd = td->latency_qd;
-       struct thread_options *o = &td->o;
-
-       td->latency_qd_low = td->latency_qd;
-
-       /*
-        * If we haven't failed yet, we double up to a failing value instead
-        * of bisecting from highest possible queue depth. If we have set
-        * a limit other than td->o.iodepth, bisect between that.
-        */
-       if (td->latency_qd_high != o->iodepth)
-               td->latency_qd = (td->latency_qd + td->latency_qd_high) / 2;
-       else
-               td->latency_qd *= 2;
-
-       if (td->latency_qd > o->iodepth)
-               td->latency_qd = o->iodepth;
-
-       dprint(FD_RATE, "Ramped up: %d %d %d\n", td->latency_qd_low, td->latency_qd, td->latency_qd_high);
-
-       /*
-        * Same as last one, we are done. Let it run a latency cycle, so
-        * we get only the results from the targeted depth.
-        */
-       if (td->latency_qd == qd) {
-               if (td->latency_end_run) {
-                       dprint(FD_RATE, "We are done\n");
-                       td->done = 1;
-               } else {
-                       dprint(FD_RATE, "Quiesce and final run\n");
-                       io_u_quiesce(td);
-                       td->latency_end_run = 1;
-                       reset_all_stats(td);
-                       reset_io_stats(td);
-               }
-       }
-
-       lat_new_cycle(td);
-}
-
-/*
- * Check if we can bump the queue depth
- */
-void lat_target_check(struct thread_data *td)
-{
-       uint64_t usec_window;
-       uint64_t ios;
-       double success_ios;
-
-       usec_window = utime_since_now(&td->latency_ts);
-       if (usec_window < td->o.latency_window)
-               return;
-
-       ios = ddir_rw_sum(td->io_blocks) - td->latency_ios;
-       success_ios = (double) (ios - td->latency_failed) / (double) ios;
-       success_ios *= 100.0;
-
-       dprint(FD_RATE, "Success rate: %.2f%% (target %.2f%%)\n", success_ios, td->o.latency_percentile.u.f);
-
-       if (success_ios >= td->o.latency_percentile.u.f)
-               lat_target_success(td);
-       else
-               __lat_target_failed(td);
-}
-
  /*
   * If latency target is enabled, we might be ramping up or down and not
   * using the full queue depth available.
@@ -1469,7 +1367,7 @@ bool queue_full(const struct thread_data *td)
  
         if (qempty)
                 return true;
-       if (!td->o.latency_target)
+       if (!td->o.latency_target || td->o.iodepth_mode != IOD_STEPPED)
                 return false;
  
         return td->cur_depth >= td->latency_qd;
@@ -1477,13 +1375,15 @@ bool queue_full(const struct thread_data *td)
  
  struct io_u *__get_io_u(struct thread_data *td)
  {
+       const bool needs_lock = td_async_processing(td);
         struct io_u *io_u = NULL;
         int ret;
  
         if (td->stop_io)
                 return NULL;
  
-       td_io_u_lock(td);
+       if (needs_lock)
+               __td_io_u_lock(td);
  
  again:
         if (!io_u_rempty(&td->io_u_requeues))
@@ -1520,7 +1420,9 @@ again:
                 goto again;
         }
  
-       td_io_u_unlock(td);
+       if (needs_lock)
+               __td_io_u_unlock(td);
+
         return io_u;
  }
  
@@ -1757,6 +1659,16 @@ static inline bool gtod_reduce(struct thread_data *td)
                         || td->o.gtod_reduce;
  }
  
+static void trim_block_info(struct thread_data *td, struct io_u *io_u)
+{
+       uint32_t *info = io_u_block_info(td, io_u);
+
+       if (BLOCK_INFO_STATE(*info) >= BLOCK_STATE_TRIM_FAILURE)
+               return;
+
+       *info = BLOCK_INFO(BLOCK_STATE_TRIMMED, BLOCK_INFO_TRIMS(*info) + 1);
+}
+
  static void account_io_completion(struct thread_data *td, struct io_u *io_u,
                                   struct io_completion_data *icd,
                                   const enum fio_ddir idx, unsigned int bytes)
@@ -1786,11 +1698,15 @@ static void account_io_completion(struct thread_data *td, struct io_u *io_u,
                                 icd->error = ops->io_u_lat(td, tnsec);
                 }
  
-               if (td->o.max_latency && tnsec > td->o.max_latency)
-                       lat_fatal(td, icd, tnsec, td->o.max_latency);
+               if (td->o.max_latency && tnsec > td->o.max_latency) {
+                       icd->error = ETIMEDOUT;
+                       lat_fatal(td, tnsec, td->o.max_latency);
+               }
                 if (td->o.latency_target && tnsec > td->o.latency_target) {
-                       if (lat_target_failed(td))
-                               lat_fatal(td, icd, tnsec, td->o.latency_target);
+                       if (lat_target_failed(td)) {
+                               icd->error = ETIMEDOUT;
+                               lat_fatal(td, tnsec, td->o.latency_target);
+                       }
                 }
         }
  
@@ -1808,18 +1724,8 @@ static void account_io_completion(struct thread_data *td, struct io_u *io_u,
         } else if (ddir_sync(idx) && !td->o.disable_clat)
                 add_sync_clat_sample(&td->ts, llnsec);
  
-       if (td->ts.nr_block_infos && io_u->ddir == DDIR_TRIM) {
-               uint32_t *info = io_u_block_info(td, io_u);
-               if (BLOCK_INFO_STATE(*info) < BLOCK_STATE_TRIM_FAILURE) {
-                       if (io_u->ddir == DDIR_TRIM) {
-                               *info = BLOCK_INFO(BLOCK_STATE_TRIMMED,
-                                               BLOCK_INFO_TRIMS(*info) + 1);
-                       } else if (io_u->ddir == DDIR_WRITE) {
-                               *info = BLOCK_INFO_SET_STATE(BLOCK_STATE_WRITTEN,
-                                                               *info);
-                       }
-               }
-       }
+       if (td->ts.nr_block_infos && io_u->ddir == DDIR_TRIM)
+               trim_block_info(td, io_u);
  }
  
  static void file_log_write_comp(const struct thread_data *td, struct fio_file *f,
@@ -1846,8 +1752,8 @@ static void file_log_write_comp(const struct thread_data *td, struct fio_file *f
  
  static bool should_account(struct thread_data *td)
  {
-       return ramp_time_over(td) && (td->runstate == TD_RUNNING ||
-                                          td->runstate == TD_VERIFYING);
+       return lat_step_account(td) && ramp_time_over(td) &&
+               (td->runstate == TD_RUNNING || td->runstate == TD_VERIFYING);
  }
  
  static void io_completed(struct thread_data *td, struct io_u **io_u_ptr,