Introduce new option: iodepth_batch_complete_max

author Roman Pen <r.peniaev@gmail.com>

Sun, 27 Sep 2015 19:24:55 +0000 (21:24 +0200)

committer Jens Axboe <axboe@fb.com>

Thu, 1 Oct 2015 06:54:04 +0000 (08:54 +0200)
author Roman Pen <r.peniaev@gmail.com>
Sun, 27 Sep 2015 19:24:55 +0000 (21:24 +0200)
committer Jens Axboe <axboe@fb.com>
Thu, 1 Oct 2015 06:54:04 +0000 (08:54 +0200)
diff --git a/HOWTO b/HOWTO

index 3049316a29aee80f28d0fd89ed686c943b704cb6..40233bd90e5c1cf43a47541b8367506a46327dc5 100644 (file)
--- a/HOWTO
+++ b/HOWTO
@@ -815,6 +815,7 @@ iodepth_batch=int This defines how many pieces of IO to submit at once.
                 bigger batches of IO at the time. If it is set to 0 the iodepth
                 value will be used.
  
+iodepth_batch_complete_min=int
  iodepth_batch_complete=int This defines how many pieces of IO to retrieve
                 at once. It defaults to 1 which means that we'll ask
                 for a minimum of 1 IO in the retrieval process from
@@ -824,6 +825,31 @@ iodepth_batch_complete=int This defines how many pieces of IO to retrieve
                 events before queuing more IO. This helps reduce
                 IO latency, at the cost of more retrieval system calls.
  
+iodepth_batch_complete_max=int This defines maximum pieces of IO to
+               retrieve at once. This variable should be used along with
+               iodepth_batch_complete_min=int variable, specifying the range
+               of min and max amount of IO which should be retrieved. By default
+               it is equal to iodepth_batch_complete_min value.
+
+               Example #1:
+
+               iodepth_batch_complete_min=1
+               iodepth_batch_complete_max=<iodepth>
+
+               which means that we will retrieve at leat 1 IO and up to the
+               whole submitted queue depth. If none of IO has been completed
+               yet, we will wait.
+
+               Example #2:
+
+               iodepth_batch_complete_min=0
+               iodepth_batch_complete_max=<iodepth>
+
+               which means that we can retrieve up to the whole submitted
+               queue depth, but if none of IO has been completed yet, we will
+               NOT wait and immediately exit the system call. In this example
+               we simply do polling.
+
  iodepth_low=int        The low water mark indicating when to start filling
                 the queue again. Defaults to the same as iodepth, meaning
                 that fio will attempt to keep the queue full at all times.
diff --git a/backend.c b/backend.c

index dec0d55b0dbf3382cad2bfdfcfad9d4683d912a9..b1477df8e4672fed634b73e677655acc110dede5 100644 (file)
--- a/backend.c
+++ b/backend.c
@@ -446,8 +446,8 @@ static int wait_for_completions(struct thread_data *td, struct timeval *time)
         /*
          * if the queue is full, we MUST reap at least 1 event
          */
-       min_evts = min(td->o.iodepth_batch_complete, td->cur_depth);
-    if ((full && !min_evts) || !td->o.iodepth_batch_complete)
+       min_evts = min(td->o.iodepth_batch_complete_min, td->cur_depth);
+    if ((full && !min_evts) || !td->o.iodepth_batch_complete_min)
                 min_evts = 1;
  
         if (time && (__should_check_rate(td, DDIR_READ) ||
@@ -551,6 +551,12 @@ sync_done:
         return 0;
  }
  
+static inline int io_in_polling(struct thread_data *td)
+{
+       return !td->o.iodepth_batch_complete_min &&
+                  !td->o.iodepth_batch_complete_max;
+}
+
  /*
   * The main verify engine. Runs over the writes we previously submitted,
   * reads the blocks back in, and checks the crc/md5 of the data.
@@ -684,7 +690,7 @@ static void do_verify(struct thread_data *td, uint64_t verify_bytes)
                  */
  reap:
                 full = queue_full(td) || (ret == FIO_Q_BUSY && td->cur_depth);
-               if (full || !td->o.iodepth_batch_complete)
+               if (full || io_in_polling(td))
                         ret = wait_for_completions(td, NULL);
  
                 if (ret < 0)
@@ -932,7 +938,7 @@ static uint64_t do_io(struct thread_data *td)
  reap:
                         full = queue_full(td) ||
                                 (ret == FIO_Q_BUSY && td->cur_depth);
-                       if (full || !td->o.iodepth_batch_complete)
+                       if (full || io_in_polling(td))
                                 ret = wait_for_completions(td, &comp_time);
                 }
                 if (ret < 0)
diff --git a/cconv.c b/cconv.c

index 44f17dab3c7c213e3aed97362b2db0c0f5954e60..fde8c6de6e8dc9baa5de2bc89f1f21b54ffd336b 100644 (file)
--- a/cconv.c
+++ b/cconv.c
@@ -83,7 +83,8 @@ void convert_thread_options_to_cpu(struct thread_options *o,
         o->iodepth = le32_to_cpu(top->iodepth);
         o->iodepth_low = le32_to_cpu(top->iodepth_low);
         o->iodepth_batch = le32_to_cpu(top->iodepth_batch);
-       o->iodepth_batch_complete = le32_to_cpu(top->iodepth_batch_complete);
+       o->iodepth_batch_complete_min = le32_to_cpu(top->iodepth_batch_complete_min);
+       o->iodepth_batch_complete_max = le32_to_cpu(top->iodepth_batch_complete_max);
         o->size = le64_to_cpu(top->size);
         o->io_limit = le64_to_cpu(top->io_limit);
         o->size_percent = le32_to_cpu(top->size_percent);
@@ -300,7 +301,8 @@ void convert_thread_options_to_net(struct thread_options_pack *top,
         top->iodepth = cpu_to_le32(o->iodepth);
         top->iodepth_low = cpu_to_le32(o->iodepth_low);
         top->iodepth_batch = cpu_to_le32(o->iodepth_batch);
-       top->iodepth_batch_complete = cpu_to_le32(o->iodepth_batch_complete);
+       top->iodepth_batch_complete_min = cpu_to_le32(o->iodepth_batch_complete_min);
+       top->iodepth_batch_complete_max = cpu_to_le32(o->iodepth_batch_complete_max);
         top->size_percent = cpu_to_le32(o->size_percent);
         top->fill_device = cpu_to_le32(o->fill_device);
         top->file_append = cpu_to_le32(o->file_append);
diff --git a/engines/libaio.c b/engines/libaio.c

index 9685c99d84ec9d1705f4f8515ec3906dd4a1c2e3..60dc49d3352a950fa522931ccdfe52a601628975 100644 (file)
--- a/engines/libaio.c
+++ b/engines/libaio.c
@@ -146,7 +146,7 @@ static int fio_libaio_getevents(struct thread_data *td, unsigned int min,
  {
         struct libaio_data *ld = td->io_ops->data;
         struct libaio_options *o = td->eo;
-       unsigned actual_min = td->o.iodepth_batch_complete == 0 ? 0 : min;
+       unsigned actual_min = td->o.iodepth_batch_complete_min == 0 ? 0 : min;
         struct timespec __lt, *lt = NULL;
         int r, events = 0;
  
diff --git a/fio.1 b/fio.1

index aea9f348512b2529fdd848459b76afd3e1908739..b04979089062daac1793d4126408edeb56174008 100644 (file)
--- a/fio.1
+++ b/fio.1
@@ -704,7 +704,7 @@ which means that we submit each IO as soon as it is available, but can
  be raised to submit bigger batches of IO at the time. If it is set to 0
  the \fBiodepth\fR value will be used.
  .TP
-.BI iodepth_batch_complete \fR=\fPint
+.BI iodepth_batch_complete_min \fR=\fPint "\fR,\fP iodepth_batch_complete" \fR=\fPint
  This defines how many pieces of IO to retrieve at once. It defaults to 1 which
   means that we'll ask for a minimum of 1 IO in the retrieval process from the
  kernel. The IO retrieval will go on until we hit the limit set by
@@ -712,6 +712,38 @@ kernel. The IO retrieval will go on until we hit the limit set by
  completed events before queuing more IO. This helps reduce IO latency, at the
  cost of more retrieval system calls.
  .TP
+.BI iodepth_batch_complete_max \fR=\fPint
+This defines maximum pieces of IO to
+retrieve at once. This variable should be used along with
+\fBiodepth_batch_complete_min\fR=int variable, specifying the range
+of min and max amount of IO which should be retrieved. By default
+it is equal to \fBiodepth_batch_complete_min\fR value.
+
+Example #1:
+.RS
+.RS
+\fBiodepth_batch_complete_min\fR=1
+.LP
+\fBiodepth_batch_complete_max\fR=<iodepth>
+.RE
+
+which means that we will retrieve at leat 1 IO and up to the
+whole submitted queue depth. If none of IO has been completed
+yet, we will wait.
+
+Example #2:
+.RS
+\fBiodepth_batch_complete_min\fR=0
+.LP
+\fBiodepth_batch_complete_max\fR=<iodepth>
+.RE
+
+which means that we can retrieve up to the whole submitted
+queue depth, but if none of IO has been completed yet, we will
+NOT wait and immediately exit the system call. In this example
+we simply do polling.
+.RE
+.TP
  .BI iodepth_low \fR=\fPint
  Low watermark indicating when to start filling the queue again.  Default:
  \fBiodepth\fR. 
diff --git a/init.c b/init.c

index 684cd60042809ff473843f1f846b3bff3565d00e..3f72b36688b132932211e3c9ea162ab9d9fe4cd6 100644 (file)
--- a/init.c
+++ b/init.c
@@ -630,6 +630,13 @@ static int fixup_options(struct thread_data *td)
         if (o->iodepth_batch > o->iodepth || !o->iodepth_batch)
                 o->iodepth_batch = o->iodepth;
  
+       /*
+        * If max batch complete number isn't set or set incorrectly,
+        * default to the same as iodepth_batch_complete_min
+        */
+       if (o->iodepth_batch_complete_min > o->iodepth_batch_complete_max)
+               o->iodepth_batch_complete_max = o->iodepth_batch_complete_min;
+
         if (o->nr_files > td->files_index)
                 o->nr_files = td->files_index;
  
diff --git a/io_u.c b/io_u.c

index ac55b2fd6805125791e7df1ab65ba5eee789ca52..6dda5790a7f1d494fd46758ecc9b3b6406e4a11a 100644 (file)
--- a/io_u.c
+++ b/io_u.c
@@ -1836,7 +1836,9 @@ int io_u_queued_complete(struct thread_data *td, int min_evts)
         else if (min_evts > td->cur_depth)
                 min_evts = td->cur_depth;
  
-       ret = td_io_getevents(td, min_evts, td->o.iodepth_batch_complete, tvp);
+       /* No worries, td_io_getevents fixes min and max if they are
+        * set incorrectly */
+       ret = td_io_getevents(td, min_evts, td->o.iodepth_batch_complete_max, tvp);
         if (ret < 0) {
                 td_verror(td, -ret, "td_io_getevents");
                 return ret;
diff --git a/options.c b/options.c

index 1868dfdd0115ec9b2fae05a9b794a8f33f8cf5ab..0169ca2974329bda89c1444246ddfebade1ac462 100644 (file)
--- a/options.c
+++ b/options.c
@@ -1504,11 +1504,12 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .group  = FIO_OPT_G_IO_BASIC,
         },
         {
-               .name   = "iodepth_batch_complete",
-               .lname  = "IO Depth batch complete",
+               .name   = "iodepth_batch_complete_min",
+               .lname  = "Min IO depth batch complete",
+               .alias  = "iodepth_batch_complete",
                 .type   = FIO_OPT_INT,
-               .off1   = td_var_offset(iodepth_batch_complete),
-               .help   = "Number of IO buffers to retrieve in one go",
+               .off1   = td_var_offset(iodepth_batch_complete_min),
+               .help   = "Min number of IO buffers to retrieve in one go",
                 .parent = "iodepth",
                 .hide   = 1,
                 .minval = 0,
@@ -1517,6 +1518,19 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .category = FIO_OPT_C_IO,
                 .group  = FIO_OPT_G_IO_BASIC,
         },
+       {
+               .name   = "iodepth_batch_complete_max",
+               .lname  = "Max IO depth batch complete",
+               .type   = FIO_OPT_INT,
+               .off1   = td_var_offset(iodepth_batch_complete_max),
+               .help   = "Max number of IO buffers to retrieve in one go",
+               .parent = "iodepth",
+               .hide   = 1,
+               .minval = 0,
+               .interval = 1,
+               .category = FIO_OPT_C_IO,
+               .group  = FIO_OPT_G_IO_BASIC,
+       },
         {
                 .name   = "iodepth_low",
                 .lname  = "IO Depth batch low",
diff --git a/thread_options.h b/thread_options.h

index 38936e9a9e811e8e5d092a9de08eaefecff5f199..5ef560ef157d02e58f7c42f74f5a0780a9feefc0 100644 (file)
--- a/thread_options.h
+++ b/thread_options.h
@@ -54,7 +54,8 @@ struct thread_options {
         unsigned int iodepth;
         unsigned int iodepth_low;
         unsigned int iodepth_batch;
-       unsigned int iodepth_batch_complete;
+       unsigned int iodepth_batch_complete_min;
+       unsigned int iodepth_batch_complete_max;
  
         unsigned long long size;
         unsigned long long io_limit;
@@ -299,7 +300,9 @@ struct thread_options_pack {
         uint32_t iodepth;
         uint32_t iodepth_low;
         uint32_t iodepth_batch;
-       uint32_t iodepth_batch_complete;
+       uint32_t iodepth_batch_complete_min;
+       uint32_t iodepth_batch_complete_max;
+       uint32_t __proper_alignment_for_64b;
  
         uint64_t size;
         uint64_t io_limit;
author	Roman Pen <r.peniaev@gmail.com>
	Sun, 27 Sep 2015 19:24:55 +0000 (21:24 +0200)
committer	Jens Axboe <axboe@fb.com>
	Thu, 1 Oct 2015 06:54:04 +0000 (08:54 +0200)
HOWTO		patch \| blob \| blame \| history
backend.c		patch \| blob \| blame \| history
cconv.c		patch \| blob \| blame \| history
engines/libaio.c		patch \| blob \| blame \| history
fio.1		patch \| blob \| blame \| history
init.c		patch \| blob \| blame \| history
io_u.c		patch \| blob \| blame \| history
options.c		patch \| blob \| blame \| history
thread_options.h		patch \| blob \| blame \| history