Add option to ignore thinktime for rated IO
authorJens Axboe <axboe@kernel.dk>
Wed, 6 Dec 2017 19:27:07 +0000 (12:27 -0700)
committerJens Axboe <axboe@kernel.dk>
Wed, 6 Dec 2017 19:27:07 +0000 (12:27 -0700)
By default, fio will ignore thinktime when calculating the next
time to issue and IO, if rated IO is specified. This leads to
fio entering a catch-up type of mode after doing the specified
sleep. For some workloads, that may not be useful. If someone
asks for a specific amount of IOPS and sets a thinktime, they
may want to exclude the sleep time.

Fixes: https://github.com/axboe/fio/issues/497
Signed-off-by: Jens Axboe <axboe@kernel.dk>
HOWTO
backend.c
cconv.c
fio.1
io_u.c
options.c
server.h
thread_options.h

diff --git a/HOWTO b/HOWTO
index 4caaf54..563ca93 100644 (file)
--- a/HOWTO
+++ b/HOWTO
@@ -2208,6 +2208,13 @@ I/O rate
        (https://en.wikipedia.org/wiki/Poisson_point_process). The lambda will be
        10^6 / IOPS for the given workload.
 
+.. option:: rate_ignore_thinktime=bool
+
+       By default, fio will attempt to catch up to the specified rate setting,
+       if any kind of thinktime setting was used. If this option is set, then
+       fio will ignore the thinktime and continue doing IO at the specified
+       rate, instead of entering a catch-up mode after thinktime is done.
+
 
 I/O latency
 ~~~~~~~~~~~
index 6c805c7..69f03dc 100644 (file)
--- a/backend.c
+++ b/backend.c
@@ -844,14 +844,13 @@ static bool io_complete_bytes_exceeded(struct thread_data *td)
  */
 static long long usec_for_io(struct thread_data *td, enum fio_ddir ddir)
 {
-       uint64_t secs, remainder, bps, bytes, iops;
+       uint64_t bps = td->rate_bps[ddir];
 
        assert(!(td->flags & TD_F_CHILD));
-       bytes = td->rate_io_issue_bytes[ddir];
-       bps = td->rate_bps[ddir];
 
        if (td->o.rate_process == RATE_PROCESS_POISSON) {
-               uint64_t val;
+               uint64_t val, iops;
+
                iops = bps / td->o.bs[ddir];
                val = (int64_t) (1000000 / iops) *
                                -logf(__rand_0_1(&td->poisson_state[ddir]));
@@ -863,14 +862,44 @@ static long long usec_for_io(struct thread_data *td, enum fio_ddir ddir)
                td->last_usec[ddir] += val;
                return td->last_usec[ddir];
        } else if (bps) {
-               secs = bytes / bps;
-               remainder = bytes % bps;
+               uint64_t bytes = td->rate_io_issue_bytes[ddir];
+               uint64_t secs = bytes / bps;
+               uint64_t remainder = bytes % bps;
+
                return remainder * 1000000 / bps + secs * 1000000;
        }
 
        return 0;
 }
 
+static void handle_thinktime(struct thread_data *td, enum fio_ddir ddir)
+{
+       unsigned long long b;
+       uint64_t total;
+       int left;
+
+       b = ddir_rw_sum(td->io_blocks);
+       if (b % td->o.thinktime_blocks)
+               return;
+
+       io_u_quiesce(td);
+
+       total = 0;
+       if (td->o.thinktime_spin)
+               total = usec_spin(td->o.thinktime_spin);
+
+       left = td->o.thinktime - total;
+       if (left)
+               total += usec_sleep(td, left);
+
+       /*
+        * If we're ignoring thinktime for the rate, add the number of bytes
+        * we would have done while sleeping.
+        */
+       if (total && td->rate_bps[ddir] && td->o.rate_ign_think)
+               td->rate_io_issue_bytes[ddir] += (td->rate_bps[ddir] * 1000000) / total;
+}
+
 /*
  * Main IO worker function. It retrieves io_u's to process and queues
  * and reaps them, checking for rate and errors along the way.
@@ -955,6 +984,7 @@ static void do_io(struct thread_data *td, uint64_t *bytes_done)
                        int err = PTR_ERR(io_u);
 
                        io_u = NULL;
+                       ddir = DDIR_INVAL;
                        if (err == -EBUSY) {
                                ret = FIO_Q_BUSY;
                                goto reap;
@@ -1062,23 +1092,8 @@ reap:
                if (!in_ramp_time(td) && td->o.latency_target)
                        lat_target_check(td);
 
-               if (td->o.thinktime) {
-                       unsigned long long b;
-
-                       b = ddir_rw_sum(td->io_blocks);
-                       if (!(b % td->o.thinktime_blocks)) {
-                               int left;
-
-                               io_u_quiesce(td);
-
-                               if (td->o.thinktime_spin)
-                                       usec_spin(td->o.thinktime_spin);
-
-                               left = td->o.thinktime - td->o.thinktime_spin;
-                               if (left)
-                                       usec_sleep(td, left);
-                       }
-               }
+               if (ddir_rw(ddir) && td->o.thinktime)
+                       handle_thinktime(td, ddir);
        }
 
        check_update_rusage(td);
diff --git a/cconv.c b/cconv.c
index 5ed4640..92996b1 100644 (file)
--- a/cconv.c
+++ b/cconv.c
@@ -298,6 +298,7 @@ void convert_thread_options_to_cpu(struct thread_options *o,
 
        o->trim_backlog = le64_to_cpu(top->trim_backlog);
        o->rate_process = le32_to_cpu(top->rate_process);
+       o->rate_ign_think = le32_to_cpu(top->rate_ign_think);
 
        for (i = 0; i < FIO_IO_U_LIST_MAX_LEN; i++)
                o->percentile_list[i].u.f = fio_uint64_to_double(le64_to_cpu(top->percentile_list[i].u.i));
@@ -557,6 +558,7 @@ void convert_thread_options_to_net(struct thread_options_pack *top,
        top->offset_increment = __cpu_to_le64(o->offset_increment);
        top->number_ios = __cpu_to_le64(o->number_ios);
        top->rate_process = cpu_to_le32(o->rate_process);
+       top->rate_ign_think = cpu_to_le32(o->rate_ign_think);
 
        for (i = 0; i < FIO_IO_U_LIST_MAX_LEN; i++)
                top->percentile_list[i].u.i = __cpu_to_le64(fio_double_to_uint64(o->percentile_list[i].u.f));
diff --git a/fio.1 b/fio.1
index 54d1b0f..80abc14 100644 (file)
--- a/fio.1
+++ b/fio.1
@@ -1955,6 +1955,12 @@ I/Os that gets adjusted based on I/O completion rates. If this is set to
 flow, known as the Poisson process
 (\fIhttps://en.wikipedia.org/wiki/Poisson_point_process\fR). The lambda will be
 10^6 / IOPS for the given workload.
+.TP
+.BI rate_ignore_thinktime \fR=\fPbool
+By default, fio will attempt to catch up to the specified rate setting, if any
+kind of thinktime setting was used. If this option is set, then fio will
+ignore the thinktime and continue doing IO at the specified rate, instead of
+entering a catch-up mode after thinktime is done.
 .SS "I/O latency"
 .TP
 .BI latency_target \fR=\fPtime
diff --git a/io_u.c b/io_u.c
index 44933a1..7ccbd31 100644 (file)
--- a/io_u.c
+++ b/io_u.c
@@ -775,8 +775,7 @@ static enum fio_ddir rate_ddir(struct thread_data *td, enum fio_ddir ddir)
        if (td->o.io_submit_mode == IO_MODE_INLINE)
                io_u_quiesce(td);
 
-       usec = usec_sleep(td, usec);
-
+       usec_sleep(td, usec);
        return ddir;
 }
 
index 3fa646c..9a3431d 100644 (file)
--- a/options.c
+++ b/options.c
@@ -3459,6 +3459,16 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                .category = FIO_OPT_C_IO,
                .group  = FIO_OPT_G_RATE,
        },
+       {
+               .name   = "rate_ignore_thinktime",
+               .lname  = "Rate ignore thinktime",
+               .type   = FIO_OPT_BOOL,
+               .off1   = offsetof(struct thread_options, rate_ign_think),
+               .help   = "Rated IO ignores thinktime settings",
+               .parent = "rate",
+               .category = FIO_OPT_C_IO,
+               .group  = FIO_OPT_G_RATE,
+       },
        {
                .name   = "max_latency",
                .lname  = "Max Latency (usec)",
index 438a6c3..1a9b650 100644 (file)
--- a/server.h
+++ b/server.h
@@ -49,7 +49,7 @@ struct fio_net_cmd_reply {
 };
 
 enum {
-       FIO_SERVER_VER                  = 68,
+       FIO_SERVER_VER                  = 69,
 
        FIO_SERVER_MAX_FRAGMENT_PDU     = 1024,
        FIO_SERVER_MAX_CMD_MB           = 2048,
index 793df8a..dc290b0 100644 (file)
@@ -273,6 +273,7 @@ struct thread_options {
        unsigned int rate_iops[DDIR_RWDIR_CNT];
        unsigned int rate_iops_min[DDIR_RWDIR_CNT];
        unsigned int rate_process;
+       unsigned int rate_ign_think;
 
        char *ioscheduler;
 
@@ -547,6 +548,8 @@ struct thread_options_pack {
        uint32_t rate_iops[DDIR_RWDIR_CNT];
        uint32_t rate_iops_min[DDIR_RWDIR_CNT];
        uint32_t rate_process;
+       uint32_t rate_ign_think;
+       uint32_t pad;
 
        uint8_t ioscheduler[FIO_TOP_STR_MAX];