From: Jens Axboe Date: Mon, 14 Aug 2017 14:32:56 +0000 (-0600) Subject: Merge branch 'ci' of https://github.com/sitsofe/fio X-Git-Tag: fio-3.0~11 X-Git-Url: https://git.kernel.dk/?p=fio.git;a=commitdiff_plain;h=e97412c38bedc4f365bdf4abe1ff8f6694587a86;hp=a397a400d44c0dd5d38eab3edaae3ff368cae277 Merge branch 'ci' of https://github.com/sitsofe/fio --- diff --git a/HOWTO b/HOWTO index fc173f02..8a7cb1aa 100644 --- a/HOWTO +++ b/HOWTO @@ -2030,6 +2030,21 @@ I/O depth 16 requests, it will let the depth drain down to 4 before starting to fill it again. +.. option:: serialize_overlap=bool + + Serialize in-flight I/Os that might otherwise cause or suffer from data races. + When two or more I/Os are submitted simultaneously, there is no guarantee that + the I/Os will be processed or completed in the submitted order. Further, if + two or more of those I/Os are writes, any overlapping region between them can + become indeterminate/undefined on certain storage. These issues can cause + verification to fail erratically when at least one of the racing I/Os is + changing data and the overlapping region has a non-zero size. Setting + ``serialize_overlap`` tells fio to avoid provoking this behavior by explicitly + serializing in-flight I/Os that have a non-zero overlap. Note that setting + this option can reduce both performance and the `:option:iodepth` achieved. + Additionally this option does not work when :option:`io_submit_mode` is set to + offload. Default: false. + .. option:: io_submit_mode=str This option controls how fio submits the I/O to the I/O engine. The default @@ -2605,7 +2620,6 @@ Verification Enable experimental verification. - Steady state ~~~~~~~~~~~~ diff --git a/backend.c b/backend.c index fe159970..d2675b43 100644 --- a/backend.c +++ b/backend.c @@ -586,6 +586,50 @@ static int unlink_all_files(struct thread_data *td) return ret; } +/* + * Check if io_u will overlap an in-flight IO in the queue + */ +static bool in_flight_overlap(struct io_u_queue *q, struct io_u *io_u) +{ + bool overlap; + struct io_u *check_io_u; + unsigned long long x1, x2, y1, y2; + int i; + + x1 = io_u->offset; + x2 = io_u->offset + io_u->buflen; + overlap = false; + io_u_qiter(q, check_io_u, i) { + if (check_io_u->flags & IO_U_F_FLIGHT) { + y1 = check_io_u->offset; + y2 = check_io_u->offset + check_io_u->buflen; + + if (x1 < y2 && y1 < x2) { + overlap = true; + dprint(FD_IO, "in-flight overlap: %llu/%lu, %llu/%lu\n", + x1, io_u->buflen, + y1, check_io_u->buflen); + break; + } + } + } + + return overlap; +} + +static int io_u_submit(struct thread_data *td, struct io_u *io_u) +{ + /* + * Check for overlap if the user asked us to, and we have + * at least one IO in flight besides this one. + */ + if (td->o.serialize_overlap && td->cur_depth > 1 && + in_flight_overlap(&td->io_u_all, io_u)) + return FIO_Q_BUSY; + + return td_io_queue(td, io_u); +} + /* * The main verify engine. Runs over the writes we previously submitted, * reads the blocks back in, and checks the crc/md5 of the data. @@ -716,7 +760,7 @@ static void do_verify(struct thread_data *td, uint64_t verify_bytes) if (!td->o.disable_slat) fio_gettime(&io_u->start_time, NULL); - ret = td_io_queue(td, io_u); + ret = io_u_submit(td, io_u); if (io_queue_event(td, io_u, &ret, ddir, NULL, 1, NULL)) break; @@ -983,7 +1027,7 @@ static void do_io(struct thread_data *td, uint64_t *bytes_done) td->rate_next_io_time[ddir] = usec_for_io(td, ddir); } else { - ret = td_io_queue(td, io_u); + ret = io_u_submit(td, io_u); if (should_check_rate(td)) td->rate_next_io_time[ddir] = usec_for_io(td, ddir); diff --git a/cconv.c b/cconv.c index f9f2b306..ac58705d 100644 --- a/cconv.c +++ b/cconv.c @@ -96,6 +96,7 @@ void convert_thread_options_to_cpu(struct thread_options *o, o->iodepth_batch = le32_to_cpu(top->iodepth_batch); o->iodepth_batch_complete_min = le32_to_cpu(top->iodepth_batch_complete_min); o->iodepth_batch_complete_max = le32_to_cpu(top->iodepth_batch_complete_max); + o->serialize_overlap = le32_to_cpu(top->serialize_overlap); o->size = le64_to_cpu(top->size); o->io_size = le64_to_cpu(top->io_size); o->size_percent = le32_to_cpu(top->size_percent); @@ -346,6 +347,7 @@ void convert_thread_options_to_net(struct thread_options_pack *top, top->iodepth_batch = cpu_to_le32(o->iodepth_batch); top->iodepth_batch_complete_min = cpu_to_le32(o->iodepth_batch_complete_min); top->iodepth_batch_complete_max = cpu_to_le32(o->iodepth_batch_complete_max); + top->serialize_overlap = cpu_to_le32(o->serialize_overlap); top->size_percent = cpu_to_le32(o->size_percent); top->fill_device = cpu_to_le32(o->fill_device); top->file_append = cpu_to_le32(o->file_append); diff --git a/fio.1 b/fio.1 index a3fba650..14359e60 100644 --- a/fio.1 +++ b/fio.1 @@ -1044,6 +1044,20 @@ we simply do polling. Low watermark indicating when to start filling the queue again. Default: \fBiodepth\fR. .TP +.BI serialize_overlap \fR=\fPbool +Serialize in-flight I/Os that might otherwise cause or suffer from data races. +When two or more I/Os are submitted simultaneously, there is no guarantee that +the I/Os will be processed or completed in the submitted order. Further, if +two or more of those I/Os are writes, any overlapping region between them can +become indeterminate/undefined on certain storage. These issues can cause +verification to fail erratically when at least one of the racing I/Os is +changing data and the overlapping region has a non-zero size. Setting +\fBserialize_overlap\fR tells fio to avoid provoking this behavior by explicitly +serializing in-flight I/Os that have a non-zero overlap. Note that setting +this option can reduce both performance and the \fBiodepth\fR achieved. +Additionally this option does not work when \fBio_submit_mode\fR is set to +offload. Default: false. +.TP .BI io_submit_mode \fR=\fPstr This option controls how fio submits the IO to the IO engine. The default is \fBinline\fR, which means that the fio job threads submit and reap IO directly. diff --git a/init.c b/init.c index 42e71071..164e411c 100644 --- a/init.c +++ b/init.c @@ -698,6 +698,23 @@ static int fixup_options(struct thread_data *td) if (o->iodepth_batch_complete_min > o->iodepth_batch_complete_max) o->iodepth_batch_complete_max = o->iodepth_batch_complete_min; + /* + * There's no need to check for in-flight overlapping IOs if the job + * isn't changing data or the maximum iodepth is guaranteed to be 1 + */ + if (o->serialize_overlap && !(td->flags & TD_F_READ_IOLOG) && + (!(td_write(td) || td_trim(td)) || o->iodepth == 1)) + o->serialize_overlap = 0; + /* + * Currently can't check for overlaps in offload mode + */ + if (o->serialize_overlap && o->io_submit_mode == IO_MODE_OFFLOAD) { + log_err("fio: checking for in-flight overlaps when the " + "io_submit_mode is offload is not supported\n"); + o->serialize_overlap = 0; + ret = warnings_fatal; + } + if (o->nr_files > td->files_index) o->nr_files = td->files_index; diff --git a/iolog.c b/iolog.c index 27c14eb3..760d7b0a 100644 --- a/iolog.c +++ b/iolog.c @@ -227,21 +227,16 @@ void log_io_piece(struct thread_data *td, struct io_u *io_u) } /* - * We don't need to sort the entries, if: + * We don't need to sort the entries if we only performed sequential + * writes. In this case, just reading back data in the order we wrote + * it out is the faster but still safe. * - * Sequential writes, or - * Random writes that lay out the file as it goes along - * - * For both these cases, just reading back data in the order we - * wrote it out is the fastest. - * - * One exception is if we don't have a random map AND we are doing - * verifies, in that case we need to check for duplicate blocks and - * drop the old one, which we rely on the rb insert/lookup for - * handling. + * One exception is if we don't have a random map in which case we need + * to check for duplicate blocks and drop the old one, which we rely on + * the rb insert/lookup for handling. */ - if (((!td->o.verifysort) || !td_random(td) || !td->o.overwrite) && - (file_randommap(td, ipo->file) || td->o.verify == VERIFY_NONE)) { + if (((!td->o.verifysort) || !td_random(td)) && + file_randommap(td, ipo->file)) { INIT_FLIST_HEAD(&ipo->list); flist_add_tail(&ipo->list, &td->io_hist_list); ipo->flags |= IP_F_ONLIST; @@ -284,7 +279,8 @@ restart: td->io_hist_len--; rb_erase(parent, &td->io_hist_tree); remove_trim_entry(td, __ipo); - free(__ipo); + if (!(__ipo->flags & IP_F_IN_FLIGHT)) + free(__ipo); goto restart; } } diff --git a/options.c b/options.c index f2b2bb9c..443791ab 100644 --- a/options.c +++ b/options.c @@ -1881,6 +1881,17 @@ struct fio_option fio_options[FIO_MAX_OPTS] = { .category = FIO_OPT_C_IO, .group = FIO_OPT_G_IO_BASIC, }, + { + .name = "serialize_overlap", + .lname = "Serialize overlap", + .off1 = offsetof(struct thread_options, serialize_overlap), + .type = FIO_OPT_BOOL, + .help = "Wait for in-flight IOs that collide to complete", + .parent = "iodepth", + .def = "0", + .category = FIO_OPT_C_IO, + .group = FIO_OPT_G_IO_BASIC, + }, { .name = "io_submit_mode", .lname = "IO submit mode", diff --git a/thread_options.h b/thread_options.h index f3dfd42f..26a3e0e6 100644 --- a/thread_options.h +++ b/thread_options.h @@ -65,6 +65,7 @@ struct thread_options { unsigned int iodepth_batch; unsigned int iodepth_batch_complete_min; unsigned int iodepth_batch_complete_max; + unsigned int serialize_overlap; unsigned int unique_filename; @@ -340,6 +341,8 @@ struct thread_options_pack { uint32_t iodepth_batch; uint32_t iodepth_batch_complete_min; uint32_t iodepth_batch_complete_max; + uint32_t serialize_overlap; + uint32_t pad3; uint64_t size; uint64_t io_size;