From 1a9bf8146d9842d268bcb01f4286325a8cfccc21 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Wed, 6 Dec 2017 12:27:07 -0700
Subject: [PATCH] Add option to ignore thinktime for rated IO

By default, fio will ignore thinktime when calculating the next
time to issue and IO, if rated IO is specified. This leads to
fio entering a catch-up type of mode after doing the specified
sleep. For some workloads, that may not be useful. If someone
asks for a specific amount of IOPS and sets a thinktime, they
may want to exclude the sleep time.

Fixes: https://github.com/axboe/fio/issues/497
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 HOWTO            |  7 ++++++
 backend.c        | 61 ++++++++++++++++++++++++++++++------------------
 cconv.c          |  2 ++
 fio.1            |  6 +++++
 io_u.c           |  3 +--
 options.c        | 10 ++++++++
 server.h         |  2 +-
 thread_options.h |  3 +++
 8 files changed, 68 insertions(+), 26 deletions(-)

diff --git a/HOWTO b/HOWTO
index 4caaf542..563ca933 100644
--- a/HOWTO
+++ b/HOWTO
@@ -2208,6 +2208,13 @@ I/O rate
 	(https://en.wikipedia.org/wiki/Poisson_point_process). The lambda will be
 	10^6 / IOPS for the given workload.
 
+.. option:: rate_ignore_thinktime=bool
+
+	By default, fio will attempt to catch up to the specified rate setting,
+	if any kind of thinktime setting was used. If this option is set, then
+	fio will ignore the thinktime and continue doing IO at the specified
+	rate, instead of entering a catch-up mode after thinktime is done.
+
 
 I/O latency
 ~~~~~~~~~~~
diff --git a/backend.c b/backend.c
index 6c805c7f..69f03dcf 100644
--- a/backend.c
+++ b/backend.c
@@ -844,14 +844,13 @@ static bool io_complete_bytes_exceeded(struct thread_data *td)
  */
 static long long usec_for_io(struct thread_data *td, enum fio_ddir ddir)
 {
-	uint64_t secs, remainder, bps, bytes, iops;
+	uint64_t bps = td->rate_bps[ddir];
 
 	assert(!(td->flags & TD_F_CHILD));
-	bytes = td->rate_io_issue_bytes[ddir];
-	bps = td->rate_bps[ddir];
 
 	if (td->o.rate_process == RATE_PROCESS_POISSON) {
-		uint64_t val;
+		uint64_t val, iops;
+
 		iops = bps / td->o.bs[ddir];
 		val = (int64_t) (1000000 / iops) *
 				-logf(__rand_0_1(&td->poisson_state[ddir]));
@@ -863,14 +862,44 @@ static long long usec_for_io(struct thread_data *td, enum fio_ddir ddir)
 		td->last_usec[ddir] += val;
 		return td->last_usec[ddir];
 	} else if (bps) {
-		secs = bytes / bps;
-		remainder = bytes % bps;
+		uint64_t bytes = td->rate_io_issue_bytes[ddir];
+		uint64_t secs = bytes / bps;
+		uint64_t remainder = bytes % bps;
+
 		return remainder * 1000000 / bps + secs * 1000000;
 	}
 
 	return 0;
 }
 
+static void handle_thinktime(struct thread_data *td, enum fio_ddir ddir)
+{
+	unsigned long long b;
+	uint64_t total;
+	int left;
+
+	b = ddir_rw_sum(td->io_blocks);
+	if (b % td->o.thinktime_blocks)
+		return;
+
+	io_u_quiesce(td);
+
+	total = 0;
+	if (td->o.thinktime_spin)
+		total = usec_spin(td->o.thinktime_spin);
+
+	left = td->o.thinktime - total;
+	if (left)
+		total += usec_sleep(td, left);
+
+	/*
+	 * If we're ignoring thinktime for the rate, add the number of bytes
+	 * we would have done while sleeping.
+	 */
+	if (total && td->rate_bps[ddir] && td->o.rate_ign_think)
+		td->rate_io_issue_bytes[ddir] += (td->rate_bps[ddir] * 1000000) / total;
+}
+
 /*
  * Main IO worker function. It retrieves io_u's to process and queues
  * and reaps them, checking for rate and errors along the way.
@@ -955,6 +984,7 @@ static void do_io(struct thread_data *td, uint64_t *bytes_done)
 			int err = PTR_ERR(io_u);
 
 			io_u = NULL;
+			ddir = DDIR_INVAL;
 			if (err == -EBUSY) {
 				ret = FIO_Q_BUSY;
 				goto reap;
@@ -1062,23 +1092,8 @@ reap:
 		if (!in_ramp_time(td) && td->o.latency_target)
 			lat_target_check(td);
 
-		if (td->o.thinktime) {
-			unsigned long long b;
-
-			b = ddir_rw_sum(td->io_blocks);
-			if (!(b % td->o.thinktime_blocks)) {
-				int left;
-
-				io_u_quiesce(td);
-
-				if (td->o.thinktime_spin)
-					usec_spin(td->o.thinktime_spin);
-
-				left = td->o.thinktime - td->o.thinktime_spin;
-				if (left)
-					usec_sleep(td, left);
-			}
-		}
+		if (ddir_rw(ddir) && td->o.thinktime)
+			handle_thinktime(td, ddir);
 	}
 
 	check_update_rusage(td);
diff --git a/cconv.c b/cconv.c
index 5ed46406..92996b1e 100644
--- a/cconv.c
+++ b/cconv.c
@@ -298,6 +298,7 @@ void convert_thread_options_to_cpu(struct thread_options *o,
 
 	o->trim_backlog = le64_to_cpu(top->trim_backlog);
 	o->rate_process = le32_to_cpu(top->rate_process);
+	o->rate_ign_think = le32_to_cpu(top->rate_ign_think);
 
 	for (i = 0; i < FIO_IO_U_LIST_MAX_LEN; i++)
 		o->percentile_list[i].u.f = fio_uint64_to_double(le64_to_cpu(top->percentile_list[i].u.i));
@@ -557,6 +558,7 @@ void convert_thread_options_to_net(struct thread_options_pack *top,
 	top->offset_increment = __cpu_to_le64(o->offset_increment);
 	top->number_ios = __cpu_to_le64(o->number_ios);
 	top->rate_process = cpu_to_le32(o->rate_process);
+	top->rate_ign_think = cpu_to_le32(o->rate_ign_think);
 
 	for (i = 0; i < FIO_IO_U_LIST_MAX_LEN; i++)
 		top->percentile_list[i].u.i = __cpu_to_le64(fio_double_to_uint64(o->percentile_list[i].u.f));
diff --git a/fio.1 b/fio.1
index 54d1b0f3..80abc14f 100644
--- a/fio.1
+++ b/fio.1
@@ -1955,6 +1955,12 @@ I/Os that gets adjusted based on I/O completion rates. If this is set to
 flow, known as the Poisson process
 (\fIhttps://en.wikipedia.org/wiki/Poisson_point_process\fR). The lambda will be
 10^6 / IOPS for the given workload.
+.TP
+.BI rate_ignore_thinktime \fR=\fPbool
+By default, fio will attempt to catch up to the specified rate setting, if any
+kind of thinktime setting was used. If this option is set, then fio will
+ignore the thinktime and continue doing IO at the specified rate, instead of
+entering a catch-up mode after thinktime is done.
 .SS "I/O latency"
 .TP
 .BI latency_target \fR=\fPtime
diff --git a/io_u.c b/io_u.c
index 44933a18..7ccbd312 100644
--- a/io_u.c
+++ b/io_u.c
@@ -775,8 +775,7 @@ static enum fio_ddir rate_ddir(struct thread_data *td, enum fio_ddir ddir)
 	if (td->o.io_submit_mode == IO_MODE_INLINE)
 		io_u_quiesce(td);
 
-	usec = usec_sleep(td, usec);
-
+	usec_sleep(td, usec);
 	return ddir;
 }
 
diff --git a/options.c b/options.c
index 3fa646c0..9a3431d8 100644
--- a/options.c
+++ b/options.c
@@ -3459,6 +3459,16 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
 		.category = FIO_OPT_C_IO,
 		.group	= FIO_OPT_G_RATE,
 	},
+	{
+		.name	= "rate_ignore_thinktime",
+		.lname	= "Rate ignore thinktime",
+		.type	= FIO_OPT_BOOL,
+		.off1	= offsetof(struct thread_options, rate_ign_think),
+		.help	= "Rated IO ignores thinktime settings",
+		.parent = "rate",
+		.category = FIO_OPT_C_IO,
+		.group	= FIO_OPT_G_RATE,
+	},
 	{
 		.name	= "max_latency",
 		.lname	= "Max Latency (usec)",
diff --git a/server.h b/server.h
index 438a6c3e..1a9b650d 100644
--- a/server.h
+++ b/server.h
@@ -49,7 +49,7 @@ struct fio_net_cmd_reply {
 };
 
 enum {
-	FIO_SERVER_VER			= 68,
+	FIO_SERVER_VER			= 69,
 
 	FIO_SERVER_MAX_FRAGMENT_PDU	= 1024,
 	FIO_SERVER_MAX_CMD_MB		= 2048,
diff --git a/thread_options.h b/thread_options.h
index 793df8a5..dc290b0b 100644
--- a/thread_options.h
+++ b/thread_options.h
@@ -273,6 +273,7 @@ struct thread_options {
 	unsigned int rate_iops[DDIR_RWDIR_CNT];
 	unsigned int rate_iops_min[DDIR_RWDIR_CNT];
 	unsigned int rate_process;
+	unsigned int rate_ign_think;
 
 	char *ioscheduler;
 
@@ -547,6 +548,8 @@ struct thread_options_pack {
 	uint32_t rate_iops[DDIR_RWDIR_CNT];
 	uint32_t rate_iops_min[DDIR_RWDIR_CNT];
 	uint32_t rate_process;
+	uint32_t rate_ign_think;
+	uint32_t pad;
 
 	uint8_t ioscheduler[FIO_TOP_STR_MAX];
 
-- 
2.25.1