Merge branch 'ci' of https://github.com/sitsofe/fio
authorJens Axboe <axboe@kernel.dk>
Mon, 14 Aug 2017 14:32:56 +0000 (08:32 -0600)
committerJens Axboe <axboe@kernel.dk>
Mon, 14 Aug 2017 14:32:56 +0000 (08:32 -0600)
HOWTO
backend.c
cconv.c
fio.1
init.c
iolog.c
options.c
thread_options.h

diff --git a/HOWTO b/HOWTO
index fc173f02876faab1b0049b4e8f4b6c99acd5b3a5..8a7cb1aac380973900a32884f08c64df3cb41a02 100644 (file)
--- a/HOWTO
+++ b/HOWTO
@@ -2030,6 +2030,21 @@ I/O depth
        16 requests, it will let the depth drain down to 4 before starting to fill
        it again.
 
+.. option:: serialize_overlap=bool
+
+       Serialize in-flight I/Os that might otherwise cause or suffer from data races.
+       When two or more I/Os are submitted simultaneously, there is no guarantee that
+       the I/Os will be processed or completed in the submitted order. Further, if
+       two or more of those I/Os are writes, any overlapping region between them can
+       become indeterminate/undefined on certain storage. These issues can cause
+       verification to fail erratically when at least one of the racing I/Os is
+       changing data and the overlapping region has a non-zero size. Setting
+       ``serialize_overlap`` tells fio to avoid provoking this behavior by explicitly
+       serializing in-flight I/Os that have a non-zero overlap. Note that setting
+       this option can reduce both performance and the `:option:iodepth` achieved.
+       Additionally this option does not work when :option:`io_submit_mode` is set to
+       offload. Default: false.
+
 .. option:: io_submit_mode=str
 
        This option controls how fio submits the I/O to the I/O engine. The default
@@ -2605,7 +2620,6 @@ Verification
 
        Enable experimental verification.
 
-
 Steady state
 ~~~~~~~~~~~~
 
index fe1599706fcdcb32b0990b1e840b3259448396ab..d2675b43cfd3b9900636ecb76ea8c96f82817352 100644 (file)
--- a/backend.c
+++ b/backend.c
@@ -586,6 +586,50 @@ static int unlink_all_files(struct thread_data *td)
        return ret;
 }
 
+/*
+ * Check if io_u will overlap an in-flight IO in the queue
+ */
+static bool in_flight_overlap(struct io_u_queue *q, struct io_u *io_u)
+{
+       bool overlap;
+       struct io_u *check_io_u;
+       unsigned long long x1, x2, y1, y2;
+       int i;
+
+       x1 = io_u->offset;
+       x2 = io_u->offset + io_u->buflen;
+       overlap = false;
+       io_u_qiter(q, check_io_u, i) {
+               if (check_io_u->flags & IO_U_F_FLIGHT) {
+                       y1 = check_io_u->offset;
+                       y2 = check_io_u->offset + check_io_u->buflen;
+
+                       if (x1 < y2 && y1 < x2) {
+                               overlap = true;
+                               dprint(FD_IO, "in-flight overlap: %llu/%lu, %llu/%lu\n",
+                                               x1, io_u->buflen,
+                                               y1, check_io_u->buflen);
+                               break;
+                       }
+               }
+       }
+
+       return overlap;
+}
+
+static int io_u_submit(struct thread_data *td, struct io_u *io_u)
+{
+       /*
+        * Check for overlap if the user asked us to, and we have
+        * at least one IO in flight besides this one.
+        */
+       if (td->o.serialize_overlap && td->cur_depth > 1 &&
+           in_flight_overlap(&td->io_u_all, io_u))
+               return FIO_Q_BUSY;
+
+       return td_io_queue(td, io_u);
+}
+
 /*
  * The main verify engine. Runs over the writes we previously submitted,
  * reads the blocks back in, and checks the crc/md5 of the data.
@@ -716,7 +760,7 @@ static void do_verify(struct thread_data *td, uint64_t verify_bytes)
                if (!td->o.disable_slat)
                        fio_gettime(&io_u->start_time, NULL);
 
-               ret = td_io_queue(td, io_u);
+               ret = io_u_submit(td, io_u);
 
                if (io_queue_event(td, io_u, &ret, ddir, NULL, 1, NULL))
                        break;
@@ -983,7 +1027,7 @@ static void do_io(struct thread_data *td, uint64_t *bytes_done)
                                td->rate_next_io_time[ddir] = usec_for_io(td, ddir);
 
                } else {
-                       ret = td_io_queue(td, io_u);
+                       ret = io_u_submit(td, io_u);
 
                        if (should_check_rate(td))
                                td->rate_next_io_time[ddir] = usec_for_io(td, ddir);
diff --git a/cconv.c b/cconv.c
index f9f2b306166a8aaf12089bba627914115477f170..ac58705dabf40e17089e612f980c5f63db1d0e1a 100644 (file)
--- a/cconv.c
+++ b/cconv.c
@@ -96,6 +96,7 @@ void convert_thread_options_to_cpu(struct thread_options *o,
        o->iodepth_batch = le32_to_cpu(top->iodepth_batch);
        o->iodepth_batch_complete_min = le32_to_cpu(top->iodepth_batch_complete_min);
        o->iodepth_batch_complete_max = le32_to_cpu(top->iodepth_batch_complete_max);
+       o->serialize_overlap = le32_to_cpu(top->serialize_overlap);
        o->size = le64_to_cpu(top->size);
        o->io_size = le64_to_cpu(top->io_size);
        o->size_percent = le32_to_cpu(top->size_percent);
@@ -346,6 +347,7 @@ void convert_thread_options_to_net(struct thread_options_pack *top,
        top->iodepth_batch = cpu_to_le32(o->iodepth_batch);
        top->iodepth_batch_complete_min = cpu_to_le32(o->iodepth_batch_complete_min);
        top->iodepth_batch_complete_max = cpu_to_le32(o->iodepth_batch_complete_max);
+       top->serialize_overlap = cpu_to_le32(o->serialize_overlap);
        top->size_percent = cpu_to_le32(o->size_percent);
        top->fill_device = cpu_to_le32(o->fill_device);
        top->file_append = cpu_to_le32(o->file_append);
diff --git a/fio.1 b/fio.1
index a3fba650f1c42d988ea5d1c9d5e8042e9d5e03b5..14359e609a961eab7a1bce506bdd01a2cc5a67aa 100644 (file)
--- a/fio.1
+++ b/fio.1
@@ -1044,6 +1044,20 @@ we simply do polling.
 Low watermark indicating when to start filling the queue again.  Default:
 \fBiodepth\fR.
 .TP
+.BI serialize_overlap \fR=\fPbool
+Serialize in-flight I/Os that might otherwise cause or suffer from data races.
+When two or more I/Os are submitted simultaneously, there is no guarantee that
+the I/Os will be processed or completed in the submitted order. Further, if
+two or more of those I/Os are writes, any overlapping region between them can
+become indeterminate/undefined on certain storage. These issues can cause
+verification to fail erratically when at least one of the racing I/Os is
+changing data and the overlapping region has a non-zero size. Setting
+\fBserialize_overlap\fR tells fio to avoid provoking this behavior by explicitly
+serializing in-flight I/Os that have a non-zero overlap. Note that setting
+this option can reduce both performance and the \fBiodepth\fR achieved.
+Additionally this option does not work when \fBio_submit_mode\fR is set to
+offload. Default: false.
+.TP
 .BI io_submit_mode \fR=\fPstr
 This option controls how fio submits the IO to the IO engine. The default is
 \fBinline\fR, which means that the fio job threads submit and reap IO directly.
diff --git a/init.c b/init.c
index 42e710715a250e2eca8f2a6a8cd7c609e6571455..164e411c6c8d4a9f576434422dce837edac3a988 100644 (file)
--- a/init.c
+++ b/init.c
@@ -698,6 +698,23 @@ static int fixup_options(struct thread_data *td)
        if (o->iodepth_batch_complete_min > o->iodepth_batch_complete_max)
                o->iodepth_batch_complete_max = o->iodepth_batch_complete_min;
 
+       /*
+        * There's no need to check for in-flight overlapping IOs if the job
+        * isn't changing data or the maximum iodepth is guaranteed to be 1
+        */
+       if (o->serialize_overlap && !(td->flags & TD_F_READ_IOLOG) &&
+           (!(td_write(td) || td_trim(td)) || o->iodepth == 1))
+               o->serialize_overlap = 0;
+       /*
+        * Currently can't check for overlaps in offload mode
+        */
+       if (o->serialize_overlap && o->io_submit_mode == IO_MODE_OFFLOAD) {
+               log_err("fio: checking for in-flight overlaps when the "
+                       "io_submit_mode is offload is not supported\n");
+               o->serialize_overlap = 0;
+               ret = warnings_fatal;
+       }
+
        if (o->nr_files > td->files_index)
                o->nr_files = td->files_index;
 
diff --git a/iolog.c b/iolog.c
index 27c14eb3630fda9179722d17bf0229374190e042..760d7b0a43d97962bb7d065c6e464c7b89a7f5f5 100644 (file)
--- a/iolog.c
+++ b/iolog.c
@@ -227,21 +227,16 @@ void log_io_piece(struct thread_data *td, struct io_u *io_u)
        }
 
        /*
-        * We don't need to sort the entries, if:
+        * We don't need to sort the entries if we only performed sequential
+        * writes. In this case, just reading back data in the order we wrote
+        * it out is the faster but still safe.
         *
-        *      Sequential writes, or
-        *      Random writes that lay out the file as it goes along
-        *
-        * For both these cases, just reading back data in the order we
-        * wrote it out is the fastest.
-        *
-        * One exception is if we don't have a random map AND we are doing
-        * verifies, in that case we need to check for duplicate blocks and
-        * drop the old one, which we rely on the rb insert/lookup for
-        * handling.
+        * One exception is if we don't have a random map in which case we need
+        * to check for duplicate blocks and drop the old one, which we rely on
+        * the rb insert/lookup for handling.
         */
-       if (((!td->o.verifysort) || !td_random(td) || !td->o.overwrite) &&
-             (file_randommap(td, ipo->file) || td->o.verify == VERIFY_NONE)) {
+       if (((!td->o.verifysort) || !td_random(td)) &&
+             file_randommap(td, ipo->file)) {
                INIT_FLIST_HEAD(&ipo->list);
                flist_add_tail(&ipo->list, &td->io_hist_list);
                ipo->flags |= IP_F_ONLIST;
@@ -284,7 +279,8 @@ restart:
                        td->io_hist_len--;
                        rb_erase(parent, &td->io_hist_tree);
                        remove_trim_entry(td, __ipo);
-                       free(__ipo);
+                       if (!(__ipo->flags & IP_F_IN_FLIGHT))
+                               free(__ipo);
                        goto restart;
                }
        }
index f2b2bb9ce0a5c3806553cd5589367f2ad86c4f0c..443791abc6b1dc46400ec057abc0c03e83ea8023 100644 (file)
--- a/options.c
+++ b/options.c
@@ -1881,6 +1881,17 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                .category = FIO_OPT_C_IO,
                .group  = FIO_OPT_G_IO_BASIC,
        },
+       {
+               .name   = "serialize_overlap",
+               .lname  = "Serialize overlap",
+               .off1   = offsetof(struct thread_options, serialize_overlap),
+               .type   = FIO_OPT_BOOL,
+               .help   = "Wait for in-flight IOs that collide to complete",
+               .parent = "iodepth",
+               .def    = "0",
+               .category = FIO_OPT_C_IO,
+               .group  = FIO_OPT_G_IO_BASIC,
+       },
        {
                .name   = "io_submit_mode",
                .lname  = "IO submit mode",
index f3dfd42f7fbfa5df55e775606ad41f5f62c6a882..26a3e0e67c4968098319bca119a9c26d7ec5efa8 100644 (file)
@@ -65,6 +65,7 @@ struct thread_options {
        unsigned int iodepth_batch;
        unsigned int iodepth_batch_complete_min;
        unsigned int iodepth_batch_complete_max;
+       unsigned int serialize_overlap;
 
        unsigned int unique_filename;
 
@@ -340,6 +341,8 @@ struct thread_options_pack {
        uint32_t iodepth_batch;
        uint32_t iodepth_batch_complete_min;
        uint32_t iodepth_batch_complete_max;
+       uint32_t serialize_overlap;
+       uint32_t pad3;
 
        uint64_t size;
        uint64_t io_size;