Add iodepth_batch_complete control
authorJens Axboe <jens.axboe@oracle.com>
Thu, 5 Jun 2008 07:03:30 +0000 (09:03 +0200)
committerJens Axboe <jens.axboe@oracle.com>
Thu, 5 Jun 2008 07:03:30 +0000 (09:03 +0200)
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
HOWTO
fio.c
fio.h
io_u.c
ioengines.c
options.c

diff --git a/HOWTO b/HOWTO
index 12974f3..50ca467 100644 (file)
--- a/HOWTO
+++ b/HOWTO
@@ -433,11 +433,21 @@ iodepth=int       This defines how many io units to keep in flight against
                job, can be overridden with a larger value for higher
                concurrency.
 
+iodepth_batch_submit=int
 iodepth_batch=int This defines how many pieces of IO to submit at once.
                It defaults to 1 which means that we submit each IO
                as soon as it is available, but can be raised to submit
                bigger batches of IO at the time.
 
+iodepth_batch_complete=int This defines how many pieces of IO to retrieve
+               at once. It defaults to 1 which means that we'll ask
+               for a minimum of 1 IO in the retrieval process from
+               the kernel. The IO retrieval will go on until we
+               hit the limit set by iodepth_low. If this variable is
+               set to 0, then fio will always check for completed
+               events before queuing more IO. This helps reduce
+               IO latency, at the cost of more retrieval system calls.
+
 iodepth_low=int        The low water mark indicating when to start filling
                the queue again. Defaults to the same as iodepth, meaning
                that fio will attempt to keep the queue full at all times.
diff --git a/fio.c b/fio.c
index f0566ed..cedbfb0 100644 (file)
--- a/fio.c
+++ b/fio.c
@@ -355,7 +355,7 @@ static void do_verify(struct thread_data *td)
 
        io_u = NULL;
        while (!td->terminate) {
-               int ret2;
+               int ret2, full;
 
                io_u = __get_io_u(td);
                if (!io_u)
@@ -435,19 +435,25 @@ sync_done:
                 * if we can queue more, do so. but check if there are
                 * completed io_u's first.
                 */
-               min_events = 0;
-               if (queue_full(td) || ret == FIO_Q_BUSY) {
-                       if (td->cur_depth >= td->o.iodepth_low)
-                               min_events = td->cur_depth - td->o.iodepth_low;
-                       if (!min_events)
+               full = queue_full(td) || ret == FIO_Q_BUSY;
+               if (full || !td->o.iodepth_batch_complete) {
+                       min_events = td->o.iodepth_batch_complete;
+                       if (full && !min_events)
                                min_events = 1;
-               }
 
-               /*
-                * Reap required number of io units, if any, and do the
-                * verification on them through the callback handler
-                */
-               if (io_u_queued_complete(td, min_events) < 0)
+                       do {
+                               /*
+                                * Reap required number of io units, if any,
+                                * and do the verification on them through
+                                * the callback handler
+                                */
+                               if (io_u_queued_complete(td, min_events) < 0) {
+                                       ret = -1;
+                                       break;
+                               }
+                       } while (full && (td->cur_depth > td->o.iodepth_low));
+               }
+               if (ret < 0)
                        break;
        }
 
@@ -480,7 +486,7 @@ static void do_io(struct thread_data *td)
                long bytes_done = 0;
                int min_evts = 0;
                struct io_u *io_u;
-               int ret2;
+               int ret2, full;
 
                if (td->terminate)
                        break;
@@ -570,18 +576,25 @@ sync_done:
                /*
                 * See if we need to complete some commands
                 */
-               if (queue_full(td) || ret == FIO_Q_BUSY) {
-                       min_evts = 0;
-                       if (td->cur_depth >= td->o.iodepth_low)
-                               min_evts = td->cur_depth - td->o.iodepth_low;
-                       if (!min_evts)
+               full = queue_full(td) || ret == FIO_Q_BUSY;
+               if (full || !td->o.iodepth_batch_complete) {
+                       min_evts = td->o.iodepth_batch_complete;
+                       if (full && !min_evts)
                                min_evts = 1;
+
                        fio_gettime(&comp_time, NULL);
-                       bytes_done = io_u_queued_complete(td, min_evts);
-                       if (bytes_done < 0)
-                               break;
+
+                       do {
+                               ret = io_u_queued_complete(td, min_evts);
+                               if (ret <= 0)
+                                       break;
+
+                               bytes_done += ret;
+                       } while (full && (td->cur_depth > td->o.iodepth_low));
                }
 
+               if (ret < 0)
+                       break;
                if (!bytes_done)
                        continue;
 
diff --git a/fio.h b/fio.h
index 9d80237..207b2ec 100644 (file)
--- a/fio.h
+++ b/fio.h
@@ -418,6 +418,7 @@ struct thread_options {
        unsigned int iodepth;
        unsigned int iodepth_low;
        unsigned int iodepth_batch;
+       unsigned int iodepth_batch_complete;
 
        unsigned long long size;
        unsigned int fill_device;
diff --git a/io_u.c b/io_u.c
index 3f71367..92b7076 100644 (file)
--- a/io_u.c
+++ b/io_u.c
@@ -971,19 +971,19 @@ long io_u_sync_complete(struct thread_data *td, struct io_u *io_u)
 /*
  * Called to complete min_events number of io for the async engines.
  */
-long io_u_queued_complete(struct thread_data *td, int min_events)
+long io_u_queued_complete(struct thread_data *td, int min_evts)
 {
        struct io_completion_data icd;
        struct timespec *tvp = NULL;
        int ret;
        struct timespec ts = { .tv_sec = 0, .tv_nsec = 0, };
 
-       dprint(FD_IO, "io_u_queued_completed: min=%d\n", min_events);
+       dprint(FD_IO, "io_u_queued_completed: min=%d\n", min_evts);
 
-       if (!min_events)
+       if (!min_evts)
                tvp = &ts;
 
-       ret = td_io_getevents(td, min_events, td->cur_depth, tvp);
+       ret = td_io_getevents(td, min_evts, td->o.iodepth_batch_complete, tvp);
        if (ret < 0) {
                td_verror(td, -ret, "td_io_getevents");
                return ret;
index 8975591..e447539 100644 (file)
@@ -195,9 +195,13 @@ int td_io_getevents(struct thread_data *td, unsigned int min, unsigned int max,
                if (r < 0)
                        goto out;
        }
+       if (max > td->cur_depth)
+               max = td->cur_depth;
+       if (min > max)
+               max = min;
 
        r = 0;
-       if (td->io_ops->getevents)
+       if (max && td->io_ops->getevents)
                r = td->io_ops->getevents(td, min, max, t);
 out:
        if (r >= 0)
index b398695..18787f8 100644 (file)
--- a/options.c
+++ b/options.c
@@ -612,6 +612,7 @@ static struct fio_option options[] = {
        },
        {
                .name   = "iodepth_batch",
+               .alias  = "iodepth_batch_submit",
                .type   = FIO_OPT_INT,
                .off1   = td_var_offset(iodepth_batch),
                .help   = "Number of IO to submit in one go",
@@ -619,6 +620,15 @@ static struct fio_option options[] = {
                .minval = 1,
                .def    = "1",
        },
+       {
+               .name   = "iodepth_batch_complete",
+               .type   = FIO_OPT_INT,
+               .off1   = td_var_offset(iodepth_batch_complete),
+               .help   = "Number of IO to retrieve in one go",
+               .parent = "iodepth",
+               .minval = 0,
+               .def    = "1",
+       },
        {
                .name   = "iodepth_low",
                .type   = FIO_OPT_INT,