For iodepth > 1, the null ioengine is an asynchronous ioengine, yet it
does not report submission latency. This patch adds code for setting
issue_time in order to calculate submission latency.
The extra work does reduce performance for iodepth > 1 but this
reduction can be mitigated with options like gtod_reduce.
- there are no differeces at qd 1 (5837K vs 5864K)
- default options at qd 256 shows an 8% IOPS reduction (3583K vs 3303K).
- gtod_reduce=1 at qd 256 shows a 3% IOPS reduction (5119K vs 4966K).
$ ../fio-canonical/fio --name=test --ioengine=null --size=1T --norandommap --ramp_time=5s --runtime=15s --cpus_allowed=1
test: (g=0): rw=read, bs=(R) 4096B-4096B, (W) 4096B-4096B, (T) 4096B-4096B, ioengine=null, iodepth=1
fio-3.30-73-gd622
<snip>
read: IOPS=5837k, BW=22.3GiB/s (23.9GB/s)(334GiB/15001msec)
<snip>
$ ./fio --name=test --ioengine=null --size=1T --norandommap --ramp_time=5s --runtime=15s --cpus_allowed=1
test: (g=0): rw=read, bs=(R) 4096B-4096B, (W) 4096B-4096B, (T) 4096B-4096B, ioengine=null, iodepth=1
fio-3.30-74-gcfbc2
<snip>
read: IOPS=5864k, BW=22.4GiB/s (24.0GB/s)(336GiB/15001msec)
<snip>
$ ../fio-canonical/fio --name=test --ioengine=null --size=1T --norandommap --iodepth=256 --ramp_time=5s --runtime=15s --cpus_allowed=1
test: (g=0): rw=read, bs=(R) 4096B-4096B, (W) 4096B-4096B, (T) 4096B-4096B, ioengine=null, iodepth=256
fio-3.30-73-gd622
<snip>
read: IOPS=3583k, BW=13.7GiB/s (14.7GB/s)(205GiB/15001msec)
<snip>
$ ./fio --name=test --ioengine=null --size=1T --norandommap --iodepth=256 --ramp_time=5s --runtime=15s --cpus_allowed=1
test: (g=0): rw=read, bs=(R) 4096B-4096B, (W) 4096B-4096B, (T) 4096B-4096B, ioengine=null, iodepth=256
fio-3.30-74-gcfbc2
<snip>
read: IOPS=3303k, BW=12.6GiB/s (13.5GB/s)(189GiB/15001msec)
<snip>
$ ../fio-canonical/fio --name=test --ioengine=null --size=1T --norandommap --iodepth=256 --ramp_time=5s --runtime=15s --cpus_allowed=1 --gtod_reduce=1
test: (g=0): rw=read, bs=(R) 4096B-4096B, (W) 4096B-4096B, (T) 4096B-4096B, ioengine=null, iodepth=256
fio-3.30-73-gd622
<snip>
read: IOPS=5119k, BW=19.5GiB/s (21.0GB/s)(293GiB/15001msec)
<snip>
$ ./fio --name=test --ioengine=null --size=1T --norandommap --iodepth=256 --ramp_time=5s --runtime=15s --cpus_allowed=1 --gtod_reduce=1
test: (g=0): rw=read, bs=(R) 4096B-4096B, (W) 4096B-4096B, (T) 4096B-4096B, ioengine=null, iodepth=256
fio-3.30-74-gcfbc2
<snip>
read: IOPS=4966k, BW=18.9GiB/s (20.3GB/s)(284GiB/15001msec)
<snip>
Signed-off-by: Vincent Fu <vincent.fu@samsung.com>
return ret;
}
+static void null_queued(struct thread_data *td, struct null_data *nd)
+{
+ struct timespec now;
+
+ if (!fio_fill_issue_time(td))
+ return;
+
+ fio_gettime(&now, NULL);
+
+ for (int i = 0; i < nd->queued; i++) {
+ struct io_u *io_u = nd->io_us[i];
+
+ memcpy(&io_u->issue_time, &now, sizeof(now));
+ io_u_queued(td, io_u);
+ }
+}
+
static int null_commit(struct thread_data *td, struct null_data *nd)
{
if (!nd->events) {
+ null_queued(td, nd);
+
#ifndef FIO_EXTERNAL_ENGINE
io_u_mark_submit(td, nd->queued);
#endif