helper_thread: fix inconsistent status intervals
authorVincent Fu <vincent.fu@wdc.com>
Tue, 28 Apr 2020 18:16:46 +0000 (12:16 -0600)
committerVincent Fu <vincent.fu@wdc.com>
Wed, 29 Apr 2020 11:23:21 +0000 (05:23 -0600)
The signal handler safety changes to the helper thread have resulted in
inconsistent status-interval intervals. Consider the following:

$ ./fio-canonical/fio --name=test --rw=randwrite --ioengine=libaio --direct=1 --runtime=180 --time_based --filename=/dev/fioa --output=write-canonical.out --minimal --status-interval=1
$ cut -d ';' -f 50 < write-canonical.out | awk 'NR>1{print $1-p} {p=$1}' | sort -n | tail
1002
1002
1002
1002
1002
1042
1046
1251
1252
1252

Several of the status-interval output lines are ~1250ms apart.

This patch moves code for triggering the status-interval output from the main
fio process to the helper thread. The resulting intervals are much closer to
the desired 1000ms.

$ ./fio/fio --name=test --rw=randwrite --ioengine=libaio --direct=1 --runtime=180 --time_based --filename=/dev/fioa --minimal --status-interval=1 --output=write-test.out
$ cut -d ';' -f 50 < write-test.out | awk 'NR>1{print $1-p} {p=$1}' | sort -n | tail
1001
1001
1001
1001
1001
1001
1001
1001
1001
1001

Reported-by: <nate.rivers@wdc.com>
Fixes: 31eca641ad91 ("Fix a potential deadlock in helper_do_stat()")
Signed-off-by: Vincent Fu <vincent.fu@wdc.com>
helper_thread.c
stat.c

index 90bfab5c0ce978039a289c145f248a31d6289652..dc74225362e56b97f9e44d8230c040ea7ef66723 100644 (file)
@@ -131,8 +131,9 @@ void helper_thread_exit(void)
 static void *helper_thread_main(void *data)
 {
        struct helper_data *hd = data;
-       unsigned int msec_to_next_event, next_log, next_ss = STEADYSTATE_MSEC;
-       struct timespec ts, last_du, last_ss;
+       unsigned int msec_to_next_event, next_log, next_si;
+       unsigned int next_ss = STEADYSTATE_MSEC;
+       struct timespec ts, last_du, last_ss, last_si;
        char action;
        int ret = 0;
 
@@ -157,12 +158,13 @@ static void *helper_thread_main(void *data)
 #endif
        memcpy(&last_du, &ts, sizeof(ts));
        memcpy(&last_ss, &ts, sizeof(ts));
+       memcpy(&last_si, &ts, sizeof(ts));
 
        fio_sem_up(hd->startup_sem);
 
        msec_to_next_event = DISK_UTIL_MSEC;
        while (!ret && !hd->exit) {
-               uint64_t since_du, since_ss = 0;
+               uint64_t since_du, since_si, since_ss = 0;
                struct timeval timeout = {
                        .tv_sec  = msec_to_next_event / 1000,
                        .tv_usec = (msec_to_next_event % 1000) * 1000,
@@ -207,6 +209,20 @@ static void *helper_thread_main(void *data)
                if (action == A_DO_STAT)
                        __show_running_run_stats();
 
+               if (status_interval) {
+                       since_si = mtime_since(&last_si, &ts);
+                       if (since_si >= status_interval || status_interval - since_si < 10) {
+                               __show_running_run_stats();
+                               timespec_add_msec(&last_si, since_si);
+                               if (since_si > status_interval)
+                                       next_si = status_interval - (since_si - status_interval);
+                               else
+                                       next_si = status_interval;
+                       } else
+                               next_si = status_interval - since_si;
+                       msec_to_next_event = min(next_si, msec_to_next_event);
+               }
+
                next_log = calc_log_samples();
                if (!next_log)
                        next_log = DISK_UTIL_MSEC;
diff --git a/stat.c b/stat.c
index efa811d2b70e5c23dd48ace87d19eb925fc31916..2cf11947d99adac33ef3195a08f8039d3399492b 100644 (file)
--- a/stat.c
+++ b/stat.c
@@ -2354,8 +2354,6 @@ void __show_running_run_stats(void)
        fio_sem_up(stat_sem);
 }
 
-static bool status_interval_init;
-static struct timespec status_time;
 static bool status_file_disabled;
 
 #define FIO_STATUS_FILE                "fio-dump-status"
@@ -2398,16 +2396,6 @@ static int check_status_file(void)
 
 void check_for_running_stats(void)
 {
-       if (status_interval) {
-               if (!status_interval_init) {
-                       fio_gettime(&status_time, NULL);
-                       status_interval_init = true;
-               } else if (mtime_since_now(&status_time) >= status_interval) {
-                       show_running_run_stats();
-                       fio_gettime(&status_time, NULL);
-                       return;
-               }
-       }
        if (check_status_file()) {
                show_running_run_stats();
                return;