stat: avoid hang with race in stat thread exit and rusage update
[fio.git] / backend.c
index 8303dc626a5c69bbfb999c046cd16958566deaf2..90d998dc18653630d7a6d6979085901166a7f52d 100644 (file)
--- a/backend.c
+++ b/backend.c
@@ -56,7 +56,6 @@
 #include "lib/tp.h"
 
 static pthread_t disk_util_thread;
-static struct fio_mutex *disk_thread_mutex;
 static pthread_cond_t du_cond;
 static pthread_mutex_t du_lock;
 
@@ -1565,13 +1564,17 @@ err:
        if (o->write_iolog_file)
                write_iolog_close(td);
 
-       fio_mutex_remove(td->rusage_sem);
-       td->rusage_sem = NULL;
-
        fio_mutex_remove(td->mutex);
        td->mutex = NULL;
 
        td_set_runstate(td, TD_EXITED);
+
+       /*
+        * Do this last after setting our runstate to exited, so we
+        * know that the stat thread is signaled.
+        */
+       check_update_rusage(td);
+
        return (void *) (uintptr_t) td->error;
 }
 
@@ -2010,10 +2013,8 @@ static void *disk_thread_main(void *data)
                }
 
                ret = pthread_cond_timedwait(&du_cond, &du_lock, &ts);
-               if (ret != ETIMEDOUT) {
-                       printf("disk thread should exit %d\n", ret);
+               if (ret != ETIMEDOUT)
                        break;
-               }
 
                ret = update_io_ticks();
 
@@ -2030,14 +2031,11 @@ static int create_disk_util_thread(void)
 
        setup_disk_util();
 
-       disk_thread_mutex = fio_mutex_init(FIO_MUTEX_LOCKED);
-
        pthread_cond_init(&du_cond, NULL);
        pthread_mutex_init(&du_lock, NULL);
 
        ret = pthread_create(&disk_util_thread, NULL, disk_thread_main, NULL);
        if (ret) {
-               fio_mutex_remove(disk_thread_mutex);
                log_err("Can't create disk util thread: %s\n", strerror(ret));
                return 1;
        }
@@ -2079,6 +2077,7 @@ int fio_backend(void)
        set_genesis_time();
        stat_init();
        create_disk_util_thread();
+       create_status_interval_thread();
 
        cgroup_list = smalloc(sizeof(*cgroup_list));
        INIT_FLIST_HEAD(cgroup_list);
@@ -2086,6 +2085,7 @@ int fio_backend(void)
        run_threads();
 
        wait_for_disk_thread_exit();
+       wait_for_status_interval_thread_exit();
 
        if (!fio_abort) {
                __show_run_stats();
@@ -2101,8 +2101,11 @@ int fio_backend(void)
                }
        }
 
-       for_each_td(td, i)
+       for_each_td(td, i) {
                fio_options_free(td);
+               fio_mutex_remove(td->rusage_sem);
+               td->rusage_sem = NULL;
+       }
 
        free_disk_util();
        cgroup_kill(cgroup_list);
@@ -2110,7 +2113,6 @@ int fio_backend(void)
        sfree(cgroup_mnt);
 
        fio_mutex_remove(startup_mutex);
-       fio_mutex_remove(disk_thread_mutex);
        stat_exit();
        return exit_value;
 }