Fix thread hang when using async engines (libaio,etc.) when too low of a iops rate...
authorRyan Marchand <rmarchan@amazon.com>
Tue, 31 Jan 2012 13:05:32 +0000 (14:05 +0100)
committerJens Axboe <axboe@kernel.dk>
Tue, 31 Jan 2012 13:05:32 +0000 (14:05 +0100)
Rate limiting logic was using thread_data->cur_depth to decide the
min_evts number to ask for during its "flush" prior to sleeping.
td->cur_depth, however, does not properly track in-flight IOs submitted
to the async engines.  Added field to thread_data structure and use
that, instead, to track IOs currently in flight.

Signed-off-by: Ryan Marchand <rmarchan@amazon.com>
Signed-off-by: Steven Noonan <snoonan@amazon.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
fio.h
io_u.c
ioengines.c

diff --git a/fio.h b/fio.h
index a9286b86729d6231c899ab533b4ab13110513266..c8b94f6a22deccdf2110c6d3afa33a7eee3958fb 100644 (file)
--- a/fio.h
+++ b/fio.h
@@ -347,10 +347,23 @@ struct thread_data {
        struct ioengine_ops *io_ops;
 
        /*
        struct ioengine_ops *io_ops;
 
        /*
-        * Current IO depth and list of free and busy io_u's.
+        * Queue depth of io_u's that fio MIGHT do
         */
        unsigned int cur_depth;
         */
        unsigned int cur_depth;
+
+       /*
+        * io_u's about to be committed
+        */
        unsigned int io_u_queued;
        unsigned int io_u_queued;
+
+       /*
+        * io_u's submitted but not completed yet
+        */
+       unsigned int io_u_in_flight;
+
+       /*
+        * List of free and busy io_u's
+        */
        struct flist_head io_u_freelist;
        struct flist_head io_u_busylist;
        struct flist_head io_u_requeues;
        struct flist_head io_u_freelist;
        struct flist_head io_u_busylist;
        struct flist_head io_u_requeues;
diff --git a/io_u.c b/io_u.c
index 703d1e85e45ad570715d237bd3dc96fccff6f020..428b312c9b34fea3cd1db6d38b375fd1a14376e6 100644 (file)
--- a/io_u.c
+++ b/io_u.c
@@ -496,12 +496,17 @@ static enum fio_ddir rate_ddir(struct thread_data *td, enum fio_ddir ddir)
 
        /*
         * We are going to sleep, ensure that we flush anything pending as
 
        /*
         * We are going to sleep, ensure that we flush anything pending as
-        * not to skew our latency numbers
+        * not to skew our latency numbers.
+        *
+        * Changed to only monitor 'in flight' requests here instead of the
+        * td->cur_depth, b/c td->cur_depth does not accurately represent
+        * io's that have been actually submitted to an async engine,
+        * and cur_depth is meaningless for sync engines.
         */
         */
-       if (td->cur_depth) {
+       if (td->io_u_in_flight) {
                int fio_unused ret;
 
                int fio_unused ret;
 
-               ret = io_u_queued_complete(td, td->cur_depth, NULL);
+               ret = io_u_queued_complete(td, td->io_u_in_flight, NULL);
        }
 
        fio_gettime(&t, NULL);
        }
 
        fio_gettime(&t, NULL);
index e8ed871d2d96d641014cbd79e7e4df27c6b472a3..4c609f2b88ae4c537dc90489241808acd711fa1f 100644 (file)
@@ -222,9 +222,14 @@ int td_io_getevents(struct thread_data *td, unsigned int min, unsigned int max,
        if (max && td->io_ops->getevents)
                r = td->io_ops->getevents(td, min, max, t);
 out:
        if (max && td->io_ops->getevents)
                r = td->io_ops->getevents(td, min, max, t);
 out:
-       if (r >= 0)
+       if (r >= 0) {
+               /*
+                * Reflect that our submitted requests were retrieved with
+                * whatever OS async calls are in the underlying engine.
+                */
+               td->io_u_in_flight -= r;
                io_u_mark_complete(td, r);
                io_u_mark_complete(td, r);
-       else
+       else
                td_verror(td, r, "get_events");
 
        dprint(FD_IO, "getevents: %d\n", r);
                td_verror(td, r, "get_events");
 
        dprint(FD_IO, "getevents: %d\n", r);
@@ -344,14 +349,19 @@ int td_io_commit(struct thread_data *td)
        if (!td->cur_depth || !td->io_u_queued)
                return 0;
 
        if (!td->cur_depth || !td->io_u_queued)
                return 0;
 
-       io_u_mark_depth(td, td->io_u_queued);
-       td->io_u_queued = 0;
+       io_u_mark_depth(td, td->io_u_queued);   
 
        if (td->io_ops->commit) {
                ret = td->io_ops->commit(td);
                if (ret)
                        td_verror(td, -ret, "io commit");
        }
 
        if (td->io_ops->commit) {
                ret = td->io_ops->commit(td);
                if (ret)
                        td_verror(td, -ret, "io commit");
        }
+       
+       /*
+        * Reflect that events were submitted as async IO requests.
+        */
+       td->io_u_in_flight += td->io_u_queued;
+       td->io_u_queued = 0;
 
        return 0;
 }
 
        return 0;
 }