Revert "fio: Simplify forking of processes"
[fio.git] / backend.c
index 1723b8f423190b6bb2bd8d812692d55f8017d1ca..bb0200bb2b0fdb29c80aacdc03ca1b026385191e 100644 (file)
--- a/backend.c
+++ b/backend.c
 #include "workqueue.h"
 #include "lib/mountcheck.h"
 #include "rate-submit.h"
-
-static pthread_t helper_thread;
-static pthread_mutex_t helper_lock;
-pthread_cond_t helper_cond;
-int helper_do_stat = 0;
+#include "helper_thread.h"
 
 static struct fio_mutex *startup_mutex;
 static struct flist_head *cgroup_list;
@@ -79,7 +75,6 @@ unsigned int stat_number = 0;
 int shm_id = 0;
 int temp_stall_ts;
 unsigned long done_secs = 0;
-volatile int helper_exit = 0;
 
 #define PAGE_ALIGN(buf)        \
        (char *) (((uintptr_t) (buf) + page_mask) & ~page_mask)
@@ -446,6 +441,12 @@ static int wait_for_completions(struct thread_data *td, struct timeval *time)
        int min_evts = 0;
        int ret;
 
+       if (td->flags & TD_F_REGROW_LOGS) {
+               ret = io_u_quiesce(td);
+               regrow_logs(td);
+               return ret;
+       }
+
        /*
         * if the queue is full, we MUST reap at least 1 event
         */
@@ -523,6 +524,9 @@ sync_done:
                                break;
                }
 
+               if (td->flags & TD_F_REGROW_LOGS)
+                       regrow_logs(td);
+
                /*
                 * when doing I/O (not when verifying),
                 * check for any errors that are to be ignored
@@ -1424,7 +1428,6 @@ static void *thread_main(void *data)
        struct thread_data *td = fd->td;
        struct thread_options *o = &td->o;
        struct sk_out *sk_out = fd->sk_out;
-       pthread_condattr_t attr;
        int clear_state;
        int ret;
 
@@ -1449,12 +1452,18 @@ static void *thread_main(void *data)
        INIT_FLIST_HEAD(&td->verify_list);
        INIT_FLIST_HEAD(&td->trim_list);
        INIT_FLIST_HEAD(&td->next_rand_list);
-       pthread_mutex_init(&td->io_u_lock, NULL);
        td->io_hist_tree = RB_ROOT;
 
-       pthread_condattr_init(&attr);
-       pthread_cond_init(&td->verify_cond, &attr);
-       pthread_cond_init(&td->free_cond, &attr);
+       ret = mutex_cond_init_pshared(&td->io_u_lock, &td->free_cond);
+       if (ret) {
+               td_verror(td, ret, "mutex_cond_init_pshared");
+               goto err;
+       }
+       ret = cond_init_pshared(&td->verify_cond);
+       if (ret) {
+               td_verror(td, ret, "mutex_cond_pshared");
+               goto err;
+       }
 
        td_set_runstate(td, TD_INITIALIZED);
        dprint(FD_MUTEX, "up startup_mutex\n");
@@ -1476,6 +1485,14 @@ static void *thread_main(void *data)
                goto err;
        }
 
+       /*
+        * Do this early, we don't want the compress threads to be limited
+        * to the same CPUs as the IO workers. So do this before we set
+        * any potential CPU affinity
+        */
+       if (iolog_compress_init(td, sk_out))
+               goto err;
+
        /*
         * If we have a gettimeofday() thread, make sure we exclude that
         * thread from this job
@@ -1610,9 +1627,6 @@ static void *thread_main(void *data)
                        goto err;
        }
 
-       if (iolog_compress_init(td, sk_out))
-               goto err;
-
        fio_verify_init(td);
 
        if (rate_submit_init(td, sk_out))
@@ -1710,6 +1724,8 @@ static void *thread_main(void *data)
                        break;
        }
 
+       td_set_runstate(td, TD_FINISHING);
+
        update_rusage_stat(td);
        td->ts.total_run_time = mtime_since_now(&td->epoch);
        td->ts.io_bytes[DDIR_READ] = td->io_bytes[DDIR_READ];
@@ -1722,7 +1738,7 @@ static void *thread_main(void *data)
 
        fio_unpin_memory(td);
 
-       fio_writeout_logs(td);
+       td_writeout_logs(td, true);
 
        iolog_compress_exit(td);
        rate_submit_exit(td);
@@ -1818,8 +1834,9 @@ static int fork_main(struct sk_out *sk_out, int shmid, int offset)
 
 static void dump_td_info(struct thread_data *td)
 {
-       log_err("fio: job '%s' hasn't exited in %lu seconds, it appears to "
-               "be stuck. Doing forceful exit of this job.\n", td->o.name,
+       log_err("fio: job '%s' (state=%d) hasn't exited in %lu seconds, it "
+               "appears to be stuck. Doing forceful exit of this job.\n",
+                       td->o.name, td->runstate,
                        (unsigned long) time_since_now(&td->terminate_time));
 }
 
@@ -1905,6 +1922,7 @@ static void reap_threads(unsigned int *nr_running, unsigned int *t_rate,
                 * move on.
                 */
                if (td->terminate &&
+                   td->runstate < TD_FSYNCING &&
                    time_since_now(&td->terminate_time) >= FIO_REAP_TIMEOUT) {
                        dump_td_info(td);
                        td_set_runstate(td, TD_REAPED);
@@ -2319,82 +2337,10 @@ reap:
        update_io_ticks();
 }
 
-static void wait_for_helper_thread_exit(void)
-{
-       void *ret;
-
-       helper_exit = 1;
-       pthread_cond_signal(&helper_cond);
-       pthread_join(helper_thread, &ret);
-}
-
 static void free_disk_util(void)
 {
        disk_util_prune_entries();
-
-       pthread_cond_destroy(&helper_cond);
-}
-
-static void *helper_thread_main(void *data)
-{
-       struct sk_out *sk_out = data;
-       int ret = 0;
-
-       sk_out_assign(sk_out);
-
-       fio_mutex_up(startup_mutex);
-
-       while (!ret) {
-               uint64_t sec = DISK_UTIL_MSEC / 1000;
-               uint64_t nsec = (DISK_UTIL_MSEC % 1000) * 1000000;
-               struct timespec ts;
-               struct timeval tv;
-
-               gettimeofday(&tv, NULL);
-               ts.tv_sec = tv.tv_sec + sec;
-               ts.tv_nsec = (tv.tv_usec * 1000) + nsec;
-
-               if (ts.tv_nsec >= 1000000000ULL) {
-                       ts.tv_nsec -= 1000000000ULL;
-                       ts.tv_sec++;
-               }
-
-               pthread_cond_timedwait(&helper_cond, &helper_lock, &ts);
-
-               ret = update_io_ticks();
-
-               if (helper_do_stat) {
-                       helper_do_stat = 0;
-                       __show_running_run_stats();
-               }
-
-               if (!is_backend)
-                       print_thread_status();
-       }
-
-       sk_out_drop();
-       return NULL;
-}
-
-static int create_helper_thread(struct sk_out *sk_out)
-{
-       int ret;
-
-       setup_disk_util();
-
-       pthread_cond_init(&helper_cond, NULL);
-       pthread_mutex_init(&helper_lock, NULL);
-
-       ret = pthread_create(&helper_thread, NULL, helper_thread_main, sk_out);
-       if (ret) {
-               log_err("Can't create helper thread: %s\n", strerror(ret));
-               return 1;
-       }
-
-       dprint(FD_MUTEX, "wait on startup_mutex\n");
-       fio_mutex_down(startup_mutex);
-       dprint(FD_MUTEX, "done waiting on startup_mutex\n");
-       return 0;
+       helper_thread_destroy();
 }
 
 int fio_backend(struct sk_out *sk_out)
@@ -2427,14 +2373,14 @@ int fio_backend(struct sk_out *sk_out)
 
        set_genesis_time();
        stat_init();
-       create_helper_thread(sk_out);
+       helper_thread_create(startup_mutex, sk_out);
 
        cgroup_list = smalloc(sizeof(*cgroup_list));
        INIT_FLIST_HEAD(cgroup_list);
 
        run_threads(sk_out);
 
-       wait_for_helper_thread_exit();
+       helper_thread_exit();
 
        if (!fio_abort) {
                __show_run_stats();