Add code to detect a task that exited prior to up'ing the startup mutex
authorJens Axboe <jens.axboe@oracle.com>
Wed, 1 Jul 2009 21:02:10 +0000 (23:02 +0200)
committerJens Axboe <jens.axboe@oracle.com>
Wed, 1 Jul 2009 21:02:10 +0000 (23:02 +0200)
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
fio.c
mutex.c
mutex.h

diff --git a/fio.c b/fio.c
index 63d9a1b7ab434ca4eba416ab2fe71bbc9e71385a..fa5deabb6842ed0c527c07e76f68c9c93560e5cc 100644 (file)
--- a/fio.c
+++ b/fio.c
@@ -1492,7 +1492,13 @@ static void run_threads(void)
                                        *fio_debug_jobp = pid;
                        }
                        dprint(FD_MUTEX, "wait on startup_mutex\n");
-                       fio_mutex_down(startup_mutex);
+                       if (fio_mutex_down_timeout(startup_mutex, 10)) {
+                               log_err("fio: job startup hung? exiting.\n");
+                               terminate_threads(TERMINATE_ALL);
+                               fio_abort = 1;
+                               nr_started--;
+                               break;
+                       }
                        dprint(FD_MUTEX, "done waiting on startup_mutex\n");
                }
 
diff --git a/mutex.c b/mutex.c
index 1538f62c4b1a286ebe015c299031ffede126e2c4..a44743745ab43d18b8342a67598b4977c1e43557 100644 (file)
--- a/mutex.c
+++ b/mutex.c
@@ -3,6 +3,7 @@
 #include <unistd.h>
 #include <stdlib.h>
 #include <fcntl.h>
+#include <time.h>
 #include <pthread.h>
 #include <sys/mman.h>
 
@@ -88,6 +89,30 @@ err:
        return NULL;
 }
 
+int fio_mutex_down_timeout(struct fio_mutex *mutex, unsigned int seconds)
+{
+       struct timespec t;
+       int ret = 0;
+
+       clock_gettime(CLOCK_REALTIME, &t);
+       t.tv_sec += seconds;
+
+       pthread_mutex_lock(&mutex->lock);
+
+       while (!mutex->value && !ret) {
+               mutex->waiters++;
+               ret = pthread_cond_timedwait(&mutex->cond, &mutex->lock, &t);
+               mutex->waiters--;
+       }
+
+       if (!ret) {
+               mutex->value--;
+               pthread_mutex_unlock(&mutex->lock);
+       }
+
+       return ret;
+}
+
 void fio_mutex_down(struct fio_mutex *mutex)
 {
        pthread_mutex_lock(&mutex->lock);
diff --git a/mutex.h b/mutex.h
index 7be0ab1b44b7c96fa4d7be645a9ff95cc9e8a0bb..ac79dc63a6fe536f5acecc9ffd35d34bd95813b5 100644 (file)
--- a/mutex.h
+++ b/mutex.h
@@ -15,6 +15,7 @@ struct fio_mutex {
 extern struct fio_mutex *fio_mutex_init(int);
 extern void fio_mutex_remove(struct fio_mutex *);
 extern void fio_mutex_down(struct fio_mutex *);
+extern int fio_mutex_down_timeout(struct fio_mutex *, unsigned int);
 extern void fio_mutex_down_read(struct fio_mutex *);
 extern void fio_mutex_down_write(struct fio_mutex *);
 extern void fio_mutex_up(struct fio_mutex *);