From 997b5680d139ce82c2034ba3a0d602cfd778b89b Mon Sep 17 00:00:00 2001 From: Sitsofe Wheeler Date: Sun, 23 Apr 2017 22:54:54 +0100 Subject: [PATCH] fio: add serialize_overlap option If this isn't set (the default) fio can submit write I/Os that overlap other in-flight I/Os leading to potential data races. For example the following job frequently fails at the verification stage: ./fio --random_distribution=zipf:1.6 --direct=1 --filename \ /tmp/fiofile --ioengine=posixaio --iodepth=32 --size=20M --bs=4k \ --rw=randwrite --verify=crc32c --name=verifyoverlap When serialize_overlap=1 fio avoids creating such races. Thanks to Rachel Lunnon (StorMagic) for helping me debug the initial version of this! Fixes: https://github.com/axboe/fio/issues/335 v2: Fix merge conflict and add missing conversion. v3: Add man page, fix serialize_overlap disabling, improve commit message. Tested-by: Jeff Furlong Signed-off-by: Sitsofe Wheeler --- HOWTO | 16 +++++++++++++++- backend.c | 47 +++++++++++++++++++++++++++++++++++++++++++++-- cconv.c | 2 ++ fio.1 | 14 ++++++++++++++ init.c | 17 +++++++++++++++++ options.c | 11 +++++++++++ thread_options.h | 3 +++ 7 files changed, 107 insertions(+), 3 deletions(-) diff --git a/HOWTO b/HOWTO index fc173f02..8a7cb1aa 100644 --- a/HOWTO +++ b/HOWTO @@ -2030,6 +2030,21 @@ I/O depth 16 requests, it will let the depth drain down to 4 before starting to fill it again. +.. option:: serialize_overlap=bool + + Serialize in-flight I/Os that might otherwise cause or suffer from data races. + When two or more I/Os are submitted simultaneously, there is no guarantee that + the I/Os will be processed or completed in the submitted order. Further, if + two or more of those I/Os are writes, any overlapping region between them can + become indeterminate/undefined on certain storage. These issues can cause + verification to fail erratically when at least one of the racing I/Os is + changing data and the overlapping region has a non-zero size. Setting + ``serialize_overlap`` tells fio to avoid provoking this behavior by explicitly + serializing in-flight I/Os that have a non-zero overlap. Note that setting + this option can reduce both performance and the `:option:iodepth` achieved. + Additionally this option does not work when :option:`io_submit_mode` is set to + offload. Default: false. + .. option:: io_submit_mode=str This option controls how fio submits the I/O to the I/O engine. The default @@ -2605,7 +2620,6 @@ Verification Enable experimental verification. - Steady state ~~~~~~~~~~~~ diff --git a/backend.c b/backend.c index fe159970..f003761d 100644 --- a/backend.c +++ b/backend.c @@ -586,6 +586,37 @@ static int unlink_all_files(struct thread_data *td) return ret; } +/* + * Check if io_u will overlap an in-flight IO in the queue + */ +static bool in_flight_overlap(struct io_u_queue *q, struct io_u *io_u) +{ + bool overlap; + struct io_u *check_io_u; + unsigned long long x1, x2, y1, y2; + int i; + + x1 = io_u->offset; + x2 = io_u->offset + io_u->buflen; + overlap = false; + io_u_qiter(q, check_io_u, i) { + if (check_io_u->flags & IO_U_F_FLIGHT) { + y1 = check_io_u->offset; + y2 = check_io_u->offset + check_io_u->buflen; + + if (x1 < y2 && y1 < x2) { + overlap = true; + dprint(FD_IO, "in-flight overlap: %llu/%lu, %llu/%lu\n", + x1, io_u->buflen, + y1, check_io_u->buflen); + break; + } + } + } + + return overlap; +} + /* * The main verify engine. Runs over the writes we previously submitted, * reads the blocks back in, and checks the crc/md5 of the data. @@ -716,7 +747,13 @@ static void do_verify(struct thread_data *td, uint64_t verify_bytes) if (!td->o.disable_slat) fio_gettime(&io_u->start_time, NULL); - ret = td_io_queue(td, io_u); + if (td->o.serialize_overlap && td->cur_depth > 0) { + if (in_flight_overlap(&td->io_u_all, io_u)) + ret = FIO_Q_BUSY; + else + ret = td_io_queue(td, io_u); + } else + ret = td_io_queue(td, io_u); if (io_queue_event(td, io_u, &ret, ddir, NULL, 1, NULL)) break; @@ -983,7 +1020,13 @@ static void do_io(struct thread_data *td, uint64_t *bytes_done) td->rate_next_io_time[ddir] = usec_for_io(td, ddir); } else { - ret = td_io_queue(td, io_u); + if (td->o.serialize_overlap && td->cur_depth > 0) { + if (in_flight_overlap(&td->io_u_all, io_u)) + ret = FIO_Q_BUSY; + else + ret = td_io_queue(td, io_u); + } else + ret = td_io_queue(td, io_u); if (should_check_rate(td)) td->rate_next_io_time[ddir] = usec_for_io(td, ddir); diff --git a/cconv.c b/cconv.c index f9f2b306..ac58705d 100644 --- a/cconv.c +++ b/cconv.c @@ -96,6 +96,7 @@ void convert_thread_options_to_cpu(struct thread_options *o, o->iodepth_batch = le32_to_cpu(top->iodepth_batch); o->iodepth_batch_complete_min = le32_to_cpu(top->iodepth_batch_complete_min); o->iodepth_batch_complete_max = le32_to_cpu(top->iodepth_batch_complete_max); + o->serialize_overlap = le32_to_cpu(top->serialize_overlap); o->size = le64_to_cpu(top->size); o->io_size = le64_to_cpu(top->io_size); o->size_percent = le32_to_cpu(top->size_percent); @@ -346,6 +347,7 @@ void convert_thread_options_to_net(struct thread_options_pack *top, top->iodepth_batch = cpu_to_le32(o->iodepth_batch); top->iodepth_batch_complete_min = cpu_to_le32(o->iodepth_batch_complete_min); top->iodepth_batch_complete_max = cpu_to_le32(o->iodepth_batch_complete_max); + top->serialize_overlap = cpu_to_le32(o->serialize_overlap); top->size_percent = cpu_to_le32(o->size_percent); top->fill_device = cpu_to_le32(o->fill_device); top->file_append = cpu_to_le32(o->file_append); diff --git a/fio.1 b/fio.1 index a3fba650..14359e60 100644 --- a/fio.1 +++ b/fio.1 @@ -1044,6 +1044,20 @@ we simply do polling. Low watermark indicating when to start filling the queue again. Default: \fBiodepth\fR. .TP +.BI serialize_overlap \fR=\fPbool +Serialize in-flight I/Os that might otherwise cause or suffer from data races. +When two or more I/Os are submitted simultaneously, there is no guarantee that +the I/Os will be processed or completed in the submitted order. Further, if +two or more of those I/Os are writes, any overlapping region between them can +become indeterminate/undefined on certain storage. These issues can cause +verification to fail erratically when at least one of the racing I/Os is +changing data and the overlapping region has a non-zero size. Setting +\fBserialize_overlap\fR tells fio to avoid provoking this behavior by explicitly +serializing in-flight I/Os that have a non-zero overlap. Note that setting +this option can reduce both performance and the \fBiodepth\fR achieved. +Additionally this option does not work when \fBio_submit_mode\fR is set to +offload. Default: false. +.TP .BI io_submit_mode \fR=\fPstr This option controls how fio submits the IO to the IO engine. The default is \fBinline\fR, which means that the fio job threads submit and reap IO directly. diff --git a/init.c b/init.c index 42e71071..164e411c 100644 --- a/init.c +++ b/init.c @@ -698,6 +698,23 @@ static int fixup_options(struct thread_data *td) if (o->iodepth_batch_complete_min > o->iodepth_batch_complete_max) o->iodepth_batch_complete_max = o->iodepth_batch_complete_min; + /* + * There's no need to check for in-flight overlapping IOs if the job + * isn't changing data or the maximum iodepth is guaranteed to be 1 + */ + if (o->serialize_overlap && !(td->flags & TD_F_READ_IOLOG) && + (!(td_write(td) || td_trim(td)) || o->iodepth == 1)) + o->serialize_overlap = 0; + /* + * Currently can't check for overlaps in offload mode + */ + if (o->serialize_overlap && o->io_submit_mode == IO_MODE_OFFLOAD) { + log_err("fio: checking for in-flight overlaps when the " + "io_submit_mode is offload is not supported\n"); + o->serialize_overlap = 0; + ret = warnings_fatal; + } + if (o->nr_files > td->files_index) o->nr_files = td->files_index; diff --git a/options.c b/options.c index f2b2bb9c..443791ab 100644 --- a/options.c +++ b/options.c @@ -1881,6 +1881,17 @@ struct fio_option fio_options[FIO_MAX_OPTS] = { .category = FIO_OPT_C_IO, .group = FIO_OPT_G_IO_BASIC, }, + { + .name = "serialize_overlap", + .lname = "Serialize overlap", + .off1 = offsetof(struct thread_options, serialize_overlap), + .type = FIO_OPT_BOOL, + .help = "Wait for in-flight IOs that collide to complete", + .parent = "iodepth", + .def = "0", + .category = FIO_OPT_C_IO, + .group = FIO_OPT_G_IO_BASIC, + }, { .name = "io_submit_mode", .lname = "IO submit mode", diff --git a/thread_options.h b/thread_options.h index f3dfd42f..26a3e0e6 100644 --- a/thread_options.h +++ b/thread_options.h @@ -65,6 +65,7 @@ struct thread_options { unsigned int iodepth_batch; unsigned int iodepth_batch_complete_min; unsigned int iodepth_batch_complete_max; + unsigned int serialize_overlap; unsigned int unique_filename; @@ -340,6 +341,8 @@ struct thread_options_pack { uint32_t iodepth_batch; uint32_t iodepth_batch_complete_min; uint32_t iodepth_batch_complete_max; + uint32_t serialize_overlap; + uint32_t pad3; uint64_t size; uint64_t io_size; -- 2.25.1