workqueue: ensure we see deferred error for IOs
authorJens Axboe <axboe@kernel.dk>
Thu, 15 Nov 2018 22:21:39 +0000 (15:21 -0700)
committerJens Axboe <axboe@kernel.dk>
Thu, 15 Nov 2018 22:21:39 +0000 (15:21 -0700)
If we get an error doing io_getevents(), for instance, then
the parent never knows this happened. This causes the parent
to hang waiting for IO to complete, which will never happen.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
backend.c
io_u.c
rate-submit.c

index d6450baf1efdef1da5945445e7f9325c2e7f0efd..12f1b2b909b95cd0df148a86b1a62a706d479c99 100644 (file)
--- a/backend.c
+++ b/backend.c
@@ -237,6 +237,9 @@ static void cleanup_pending_aio(struct thread_data *td)
 {
        int r;
 
+       if (td->error)
+               return;
+
        /*
         * get immediately available events, if any
         */
diff --git a/io_u.c b/io_u.c
index 56abe6fd598ef2d8ecb0c7565b9552e0755e2ad5..d28d368f694916683ad05ad34f7fa446d3c11e89 100644 (file)
--- a/io_u.c
+++ b/io_u.c
@@ -604,7 +604,7 @@ static inline enum fio_ddir get_rand_ddir(struct thread_data *td)
 
 int io_u_quiesce(struct thread_data *td)
 {
-       int completed = 0;
+       int ret = 0, completed = 0;
 
        /*
         * We are going to sleep, ensure that we flush anything pending as
@@ -619,17 +619,20 @@ int io_u_quiesce(struct thread_data *td)
                td_io_commit(td);
 
        while (td->io_u_in_flight) {
-               int ret;
-
                ret = io_u_queued_complete(td, 1);
                if (ret > 0)
                        completed += ret;
+               else if (ret < 0)
+                       break;
        }
 
        if (td->flags & TD_F_REGROW_LOGS)
                regrow_logs(td);
 
-       return completed;
+       if (completed)
+               return completed;
+
+       return ret;
 }
 
 static enum fio_ddir rate_ddir(struct thread_data *td, enum fio_ddir ddir)
@@ -1556,6 +1559,8 @@ again:
                assert(!(td->flags & TD_F_CHILD));
                ret = pthread_cond_wait(&td->free_cond, &td->io_u_lock);
                assert(ret == 0);
+               if (td->error)
+                       return NULL;
                goto again;
        }
 
index e5c6204351db00304db32fa376837faa9c420cc9..b07a207273b9d68223f506e4fb0b3f824d999fbd 100644 (file)
@@ -53,7 +53,7 @@ static int io_workqueue_fn(struct submit_worker *sw,
        struct io_u *io_u = container_of(work, struct io_u, work);
        const enum fio_ddir ddir = io_u->ddir;
        struct thread_data *td = sw->priv;
-       int ret;
+       int ret, error;
 
        if (td->o.serialize_overlap)
                check_overlap(io_u);
@@ -71,12 +71,14 @@ static int io_workqueue_fn(struct submit_worker *sw,
                ret = io_u_queued_complete(td, 1);
                if (ret > 0)
                        td->cur_depth -= ret;
+               else if (ret < 0)
+                       break;
                io_u_clear(td, io_u, IO_U_F_FLIGHT);
        } while (1);
 
        dprint(FD_RATE, "io_u %p ret %d by %u\n", io_u, ret, gettid());
 
-       io_queue_event(td, io_u, &ret, ddir, NULL, 0, NULL);
+       error = io_queue_event(td, io_u, &ret, ddir, NULL, 0, NULL);
 
        if (ret == FIO_Q_COMPLETED)
                td->cur_depth--;
@@ -93,6 +95,9 @@ static int io_workqueue_fn(struct submit_worker *sw,
                        td->cur_depth -= ret;
        }
 
+       if (error || td->error)
+               pthread_cond_signal(&td->parent->free_cond);
+
        return 0;
 }
 
@@ -100,6 +105,8 @@ static bool io_workqueue_pre_sleep_flush_fn(struct submit_worker *sw)
 {
        struct thread_data *td = sw->priv;
 
+       if (td->error)
+               return false;
        if (td->io_u_queued || td->cur_depth || td->io_u_in_flight)
                return true;