[PATCH] Catch processes/threads that unexpectedly exited
authorJens Axboe <jens.axboe@oracle.com>
Sat, 17 Feb 2007 05:19:24 +0000 (06:19 +0100)
committerJens Axboe <jens.axboe@oracle.com>
Sat, 17 Feb 2007 05:19:24 +0000 (06:19 +0100)
Fio will no longer stall waiting for dead children.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
fio.c

diff --git a/fio.c b/fio.c
index e386f53c0259726cabfb388909d0e85c6e6a8a49..774b160c969337b2ad28b8c852479d4cf6e2f0ae 100644 (file)
--- a/fio.c
+++ b/fio.c
@@ -793,7 +793,7 @@ static int fork_main(int shmid, int offset)
 static void reap_threads(int *nr_running, int *t_rate, int *m_rate)
 {
        struct thread_data *td;
 static void reap_threads(int *nr_running, int *t_rate, int *m_rate)
 {
        struct thread_data *td;
-       int i, cputhreads, pending;
+       int i, cputhreads, pending, status, ret;
 
        /*
         * reap exited threads (TD_EXITED -> TD_REAPED)
 
        /*
         * reap exited threads (TD_EXITED -> TD_REAPED)
@@ -807,6 +807,22 @@ static void reap_threads(int *nr_running, int *t_rate, int *m_rate)
                if (td->io_ops && td->io_ops->flags & FIO_CPUIO)
                        cputhreads++;
 
                if (td->io_ops && td->io_ops->flags & FIO_CPUIO)
                        cputhreads++;
 
+               if (td->runstate < TD_EXITED) {
+                       /*
+                        * check if someone quit or got killed in an unusual way
+                        */
+                       ret = waitpid(td->pid, &status, WNOHANG);
+                       if (ret < 0)
+                               perror("waitpid");
+                       else if ((ret == td->pid) && WIFSIGNALED(status)) {
+                               int sig = WTERMSIG(status);
+
+                               log_err("fio: pid=%d, got signal=%d\n", td->pid, sig);
+                               td_set_runstate(td, TD_REAPED);
+                               goto reaped;
+                       }
+               }
+
                if (td->runstate != TD_EXITED) {
                        if (td->runstate < TD_RUNNING)
                                pending++;
                if (td->runstate != TD_EXITED) {
                        if (td->runstate < TD_RUNNING)
                                pending++;
@@ -827,13 +843,16 @@ static void reap_threads(int *nr_running, int *t_rate, int *m_rate)
                } else {
                        int status;
 
                } else {
                        int status;
 
-                       waitpid(td->pid, &status, 0);
+                       ret = waitpid(td->pid, &status, 0);
+                       if (ret < 0)
+                               perror("waitpid");
                        if (WIFEXITED(status) && WEXITSTATUS(status)) {
                                if (!exit_value)
                                        exit_value++;
                        }
                }
 
                        if (WIFEXITED(status) && WEXITSTATUS(status)) {
                                if (!exit_value)
                                        exit_value++;
                        }
                }
 
+reaped:
                (*nr_running)--;
                (*m_rate) -= td->ratemin;
                (*t_rate) -= td->rate;
                (*nr_running)--;
                (*m_rate) -= td->ratemin;
                (*t_rate) -= td->rate;