Steady state detection: enhance reporting of results, change memory allocation point
authorVincent Fu <Vincent.Fu@sandisk.com>
Thu, 30 Jun 2016 18:06:30 +0000 (14:06 -0400)
committerJens Axboe <axboe@fb.com>
Mon, 15 Aug 2016 15:44:52 +0000 (09:44 -0600)
1. Always record and report both BW and IOPS for steady state window (useful when multiple block sizes are used)
2. Always report both slope and maximum deviation even though one is always unused
3. Report mean BW and mean IOPS in JSON output
4. Allocate memory for ring buffers in steadystate_setup() instead of add_job(). The benefit is that only one set of buffers is now allocated for eaceh group. Previously each thread had its own set of buffers enough though only one set in each reporting group was actually used.
5. Update test script to accommodate new output

backend.c
fio.h
init.c
stat.c
steadystate.c
steadystate.h
unit_tests/steadystate_tests.py

index b55a5274f4ad56e698cdea9d154a1ef7935b36d6..04067ebc0f7048fd9a8810362086fbbad4d0c31f 100644 (file)
--- a/backend.c
+++ b/backend.c
@@ -2411,8 +2411,12 @@ int fio_backend(struct sk_out *sk_out)
        }
 
        for_each_td(td, i) {
-               if (td->ss.dur)
-                       free(td->ss.cache);
+               if (td->ss.dur) {
+                       if (td->ss.iops_data != NULL) {
+                               free(td->ss.iops_data);
+                               free(td->ss.bw_data);
+                       }
+               }
                fio_options_free(td);
                if (td->rusage_sem) {
                        fio_mutex_remove(td->rusage_sem);
diff --git a/fio.h b/fio.h
index 2d0327c58706850f9a79e9cfe1394d25dd08a227..ed2abe79618e210b3a316ec5edd0c94feacfd55b 100644 (file)
--- a/fio.h
+++ b/fio.h
@@ -140,9 +140,12 @@ struct steadystate_data {
 
        unsigned int head;
        unsigned int tail;
-       unsigned long *cache;
+       unsigned long *iops_data;
+       unsigned long *bw_data;
 
+       double slope;
        double criterion;
+       double deviation;
 
        unsigned long long sum_y;
        unsigned long long sum_x;
diff --git a/init.c b/init.c
index 7f91c04819153a8a1159d47e94db719e920611f7..1b8f08275503fac07b6d45bfea3f116735b1c8b0 100644 (file)
--- a/init.c
+++ b/init.c
@@ -1603,18 +1603,8 @@ static int add_job(struct thread_data *td, const char *jobname, int job_add_num,
                else
                        td->ss.check_iops = false;
 
-
-               /* when group reporting is enabled only the cache allocated for the final td is actually used */
-               td->ss.cache = malloc(o->ss_dur * sizeof(*(td->ss.cache)));
-               if (td->ss.cache == NULL)
-               {
-                       log_err("fio: unable to allocate memory for steadystate cache\n");
-                       goto err;
-               }
-               for (i = 0; i < td->ss.dur; i++)
-                       td->ss.cache[i] = 0;
-               /* initialize so that it is obvious if the cache is not full in the output */
-
+               td->ss.bw_data = NULL;
+               td->ss.iops_data = NULL;
                td->ss.ramp_time_over = (td->ss.ramp_time == 0);
                td->ss.attained = 0;
                td->ss.last_in_group = 0;
@@ -1627,6 +1617,8 @@ static int add_job(struct thread_data *td, const char *jobname, int job_add_num,
                td->ss.sum_y = 0;
                td->ss.oldest_y = 0;
                td->ss.criterion = 0.0;
+               td->ss.slope = 0.0;
+               td->ss.deviation = 0.0;
                td->ts.ss = &td->ss;
        }
        else
diff --git a/stat.c b/stat.c
index f51735e7dc4b4d84462541c7b97dcf451a98edac..42fe0faf37c3a504854c18b703d3d90732993528 100644 (file)
--- a/stat.c
+++ b/stat.c
@@ -1257,8 +1257,10 @@ static struct json_object *show_thread_status_json(struct thread_stat *ts,
 
        /* steady state detection; move this behind json+? */
        if (ts->ss) {
-               struct json_array *cache;
+               struct json_object *data;
+               struct json_array *iops, *bw;
                struct steadystate_data *ss = ts->ss;
+               double mean_iops = 0.0, mean_bw = 0.0;
                int i, x;
                char ss_option[64];
 
@@ -1275,13 +1277,26 @@ static struct json_object *show_thread_status_json(struct thread_stat *ts,
                json_object_add_value_int(tmp, "steadystate_ramptime", ss->ramp_time / 1000000L);
                json_object_add_value_int(tmp, "attained", ss->attained);
                json_object_add_value_float(tmp, "criterion", ss->pct ? ss->criterion / 100 : ss->criterion);
-
-               cache = json_create_array();
-               json_object_add_value_array(tmp, "data", cache);
+               json_object_add_value_float(tmp, "max_deviation", ss->deviation);
+               json_object_add_value_float(tmp, "slope", ss->slope);
+
+               data = json_create_object();
+               json_object_add_value_object(tmp, "data", data);
+               bw = json_create_array();
+               iops = json_create_array();
+               json_object_add_value_array(data, "iops", iops);
+               json_object_add_value_array(data, "bw", bw);
                for (i = 0; i < ss->dur; i++) {
                        x = (ss->head + i) % ss->dur;
-                       json_array_add_value_int(cache, ss->cache[x]);
+                       mean_bw += (double) ss->bw_data[x];
+                       mean_iops += (double) ss->iops_data[x];
+                       json_array_add_value_int(bw, ss->bw_data[x]);
+                       json_array_add_value_int(iops, ss->iops_data[x]);
                }
+               mean_bw /= ss->dur;
+               mean_iops /= ss->dur;
+               json_object_add_value_float(data, "bw_mean", mean_bw);
+               json_object_add_value_float(data, "iops_mean", mean_iops);
        }
 
        return root;
index e0dcc22e465c68f80bfeef71704415c0077cdfae..c12ea245aa7863981c70535c1e17f3871d6d0086 100644 (file)
@@ -1,3 +1,5 @@
+#include <stdlib.h>
+
 #include "fio.h"
 #include "steadystate.h"
 #include "helper_thread.h"
@@ -18,19 +20,39 @@ void steadystate_setup()
        prev_groupid = -1;
        prev_td = NULL;
        for_each_td(td, i) {
-               if (!td->o.group_reporting)
+               if (td->ts.ss == NULL)
+                       continue;
+
+               if (!td->o.group_reporting) {
+                       steadystate_alloc(td);
                        continue;
+               }
 
                if (prev_groupid != td->groupid) {
-                       if (prev_td != NULL)
+                       if (prev_td != NULL) {
                                prev_td->ss.last_in_group = 1;
+                               steadystate_alloc(prev_td);
+                       }
                        prev_groupid = td->groupid;
                }
                prev_td = td;
        }
 
-       if (prev_td != NULL && prev_td->o.group_reporting)
+       if (prev_td != NULL && prev_td->o.group_reporting) {
                prev_td->ss.last_in_group = 1;
+               steadystate_alloc(prev_td);
+       }
+}
+
+void steadystate_alloc(struct thread_data *td)
+{
+       int i;
+
+       td->ss.bw_data = malloc(td->ss.dur * sizeof(unsigned long));
+       td->ss.iops_data = malloc(td->ss.dur * sizeof(unsigned long));
+       /* initialize so that it is obvious if the cache is not full in the output */
+       for (i = 0; i < td->ss.dur; i++)
+               td->ss.iops_data[i] = td->ss.bw_data[i] = 0;
 }
 
 void steadystate_check()
@@ -123,12 +145,13 @@ void steadystate_check()
 
 bool steadystate_slope(unsigned long iops, unsigned long bw, struct thread_data *td)
 {
-       int i, x;
+       int i, j;
        double result;
-       double slope;
        struct steadystate_data *ss = &td->ss;
+       unsigned long new_val = ss->check_iops ? iops : bw;
 
-       ss->cache[ss->tail] = ss->check_iops ? iops : bw;
+       ss->bw_data[ss->tail] = bw;
+       ss->iops_data[ss->tail] = iops;
 
        if (ss->tail < ss->head || (ss->tail - ss->head == ss->dur - 1))
        {
@@ -136,19 +159,19 @@ bool steadystate_slope(unsigned long iops, unsigned long bw, struct thread_data
                {
                        for(i = 0; i < ss->dur; i++)
                        {
-                               ss->sum_y += ss->cache[i];
-                               x = ss->head + i;
-                               if (x >= ss->dur)
-                                       x -= ss->dur;
-                               ss->sum_xy += ss->cache[x] * i;
+                               ss->sum_y += ss->check_iops ? ss->iops_data[i] : ss->bw_data[i];
+                               j = ss->head + i;
+                               if (j >= ss->dur)
+                                       j -= ss->dur;
+                               ss->sum_xy += (ss->check_iops ? ss->iops_data[j] : ss->bw_data[j]) * i;
                        }
                } else {                /* easy to update the sums */
                        ss->sum_y -= ss->oldest_y;
-                       ss->sum_y += ss->cache[ss->tail];
-                       ss->sum_xy = ss->sum_xy - ss->sum_y + ss->dur * ss->cache[ss->tail];
+                       ss->sum_y += new_val;
+                       ss->sum_xy = ss->sum_xy - ss->sum_y + ss->dur * new_val;
                }
 
-               ss->oldest_y = ss->cache[ss->head];
+               ss->oldest_y = ss->check_iops ? ss->iops_data[ss->head] : ss->bw_data[ss->head];
 
                /*
                 * calculate slope as (sum_xy - sum_x * sum_y / n) / (sum_(x^2) - (sum_x)^2 / n)
@@ -156,11 +179,11 @@ bool steadystate_slope(unsigned long iops, unsigned long bw, struct thread_data
                 * off by a few milliseconds. This assumption greatly simplifies the
                 * calculations.
                 */
-               slope = (ss->sum_xy - (double) ss->sum_x * ss->sum_y / ss->dur) / (ss->sum_x_sq - (double) ss->sum_x * ss->sum_x / ss->dur);
-               ss->criterion = ss->pct ? slope / (ss->sum_y / ss->dur) * 100.0: slope;
+               ss->slope = (ss->sum_xy - (double) ss->sum_x * ss->sum_y / ss->dur) / (ss->sum_x_sq - (double) ss->sum_x * ss->sum_x / ss->dur);
+               ss->criterion = ss->pct ? ss->slope / (ss->sum_y / ss->dur) * 100.0: ss->slope;
 
                dprint(FD_STEADYSTATE, "sum_y: %llu, sum_xy: %llu, slope: %f, criterion: %f, limit: %f\n",
-                       ss->sum_y, ss->sum_xy, slope, ss->criterion, ss->limit);
+                       ss->sum_y, ss->sum_xy, ss->slope, ss->criterion, ss->limit);
 
                result = ss->criterion * (ss->criterion < 0.0 ? -1 : 1);
                if (result < ss->limit)
@@ -178,36 +201,36 @@ bool steadystate_deviation(unsigned long iops, unsigned long bw, struct thread_d
        int i;
        double diff;
        double mean;
-       double deviation;
 
        struct steadystate_data *ss = &td->ss;
 
-       ss->cache[ss->tail] = ss->check_iops ? iops : bw;
+       ss->bw_data[ss->tail] = bw;
+       ss->iops_data[ss->tail] = iops;
 
        if (ss->tail < ss->head || (ss->tail - ss->head == ss->dur - 1))
        {
                if (ss->sum_y == 0)     /* first time through */
                {
                        for(i = 0; i < ss->dur; i++)
-                               ss->sum_y += ss->cache[i];
+                               ss->sum_y += ss->check_iops ? ss->iops_data[i] : ss->bw_data[i];
                } else {                /* easy to update the sum */
                        ss->sum_y -= ss->oldest_y;
-                       ss->sum_y += ss->cache[ss->tail];
+                       ss->sum_y += ss->check_iops ? ss->iops_data[ss->tail] : ss->bw_data[ss->tail];
                }
 
-               ss->oldest_y = ss->cache[ss->head];
+               ss->oldest_y = ss->check_iops ? ss->iops_data[ss->head] : ss->bw_data[ss->head];
                mean = (double) ss->sum_y / ss->dur;
-               deviation = 0.0;
+               ss->deviation = 0.0;
 
                for (i = 0; i < ss->dur; i++)
                {       
-                       diff = (double) ss->cache[i] - mean;
-                       deviation = max(deviation, diff * (diff < 0.0 ? -1 : 1));
+                       diff = (double) (ss->check_iops ? ss->iops_data[i] : ss->bw_data[i]) - mean;
+                       ss->deviation = max(ss->deviation, diff * (diff < 0.0 ? -1 : 1));
                }
 
-               ss->criterion = ss->pct ? deviation / mean * 100.0 : deviation;
+               ss->criterion = ss->pct ? ss->deviation / mean * 100.0 : ss->deviation;
 
-               dprint(FD_STEADYSTATE, "sum_y: %llu, mean: %f, max diff: %f, objective: %f, limit: %f\n", ss->sum_y, mean, deviation, ss->criterion, ss->limit);
+               dprint(FD_STEADYSTATE, "sum_y: %llu, mean: %f, max diff: %f, objective: %f, limit: %f\n", ss->sum_y, mean, ss->deviation, ss->criterion, ss->limit);
 
                if (ss->criterion < ss->limit)
                        return true;
index 039ffc900acbe37085f65962d8190ae3408043c8..31e5c3fc42f27ff67ca6a06771e2f9fca0f0c41a 100644 (file)
@@ -3,6 +3,7 @@
 
 extern void steadystate_check(void);
 extern void steadystate_setup(void);
+extern void steadystate_alloc(struct thread_data *);
 extern bool steadystate_deviation(unsigned long, unsigned long, struct thread_data *);
 extern bool steadystate_slope(unsigned long, unsigned long, struct thread_data *);
 #endif
index 02b2b0d63d729b1abcac8a0c2c6bec0dee3bd9fc..e4a5d19b719d40531c6b99f01b88f5c0ac57af23 100755 (executable)
@@ -39,13 +39,15 @@ def parse_args():
 
 
 def check(data, iops, slope, pct, limit, dur, criterion):
+    measurement = 'iops' if iops else 'bw'
+    data = data[measurement]
     mean = sum(data) / len(data)
     if slope:
         x = range(len(data))
         m, intercept, r_value, p_value, std_err = stats.linregress(x,data)
         m = abs(m)
         if pct:
-            target = m / mean * 100
+            target = m / mean
         else:
             target = m
     else:
@@ -53,11 +55,11 @@ def check(data, iops, slope, pct, limit, dur, criterion):
         for x in data:
             maxdev = max(abs(mean-x), maxdev)
         if pct:
-            target = maxdev / mean * 100
+            target = maxdev / mean
         else:
             target = maxdev
 
-    return (abs(target - criterion) / criterion < 0.001), target < limit, mean, target
+    return (abs(target - criterion) / criterion < 0.005), target < limit, mean, target
 
 
 if __name__ == '__main__':
@@ -158,12 +160,12 @@ if __name__ == '__main__':
                             dur=suite[jobnum]['ss_dur'],
                             criterion=job['steadystate']['criterion'])
                         if not objsame:
-                            line = 'FAILED ' + line + ' fio criterion {0} != calculated criterion {1}, data: {2} '.format(job['steadystate']['criterion'], target, job['steadystate']['data'])
+                            line = 'FAILED ' + line + ' fio criterion {0} != calculated criterion {1}, data: {2} '.format(job['steadystate']['criterion'], target, job['steadystate'])
                         else:
                             if met:
-                                line = 'PASSED ' + line + ' target {0} < limit {1}, data {2}'.format(target, suite[jobnum]['ss_limit'], job['steadystate']['data'])
+                                line = 'PASSED ' + line + ' target {0} < limit {1}, data {2}'.format(target, suite[jobnum]['ss_limit'], job['steadystate'])
                             else:
-                                line = 'FAILED ' + line + ' target {0} < limit {1} but fio reports ss not attained, data: {2}'.format(target, suite[jobnum]['ss_limit'], job['steadystate']['data'])
+                                line = 'FAILED ' + line + ' target {0} < limit {1} but fio reports ss not attained, data: {2}'.format(target, suite[jobnum]['ss_limit'], job['steadystate'])
                     
                 else:
                     # check runtime, confirm criterion calculation, and confirm that criterion was not met
@@ -182,14 +184,14 @@ if __name__ == '__main__':
                             criterion=job['steadystate']['criterion'])
                         if not objsame:
                             if actual > (suite[jobnum]['ss_dur'] + suite[jobnum]['ss_ramp'])*1000:
-                                line = 'FAILED ' + line + ' fio criterion {0} != calculated criterion {1}, data: {2} '.format(job['steadystate']['criterion'], target, job['steadystate']['data'])
+                                line = 'FAILED ' + line + ' fio criterion {0} != calculated criterion {1}, data: {2} '.format(job['steadystate']['criterion'], target, job['steadystate'])
                             else:
-                                line = 'PASSED ' + line + ' fio criterion {0} == 0.0 since ss_dur + ss_ramp has not elapsed, data: {1} '.format(job['steadystate']['criterion'], job['steadystate']['data'])
+                                line = 'PASSED ' + line + ' fio criterion {0} == 0.0 since ss_dur + ss_ramp has not elapsed, data: {1} '.format(job['steadystate']['criterion'], job['steadystate'])
                         else:
                             if met:
-                                line = 'FAILED ' + line + ' target {0} < threshold {1} but fio reports ss not attained, data: {2}'.format(target, suite[jobnum]['ss_limit'], job['steadystate']['data'])
+                                line = 'FAILED ' + line + ' target {0} < threshold {1} but fio reports ss not attained, data: {2}'.format(target, suite[jobnum]['ss_limit'], job['steadystate'])
                             else:
-                                line = 'PASSED ' + line + ' criterion {0} > threshold {1}, data {2}'.format(target, suite[jobnum]['ss_limit'], job['steadystate']['data'])
+                                line = 'PASSED ' + line + ' criterion {0} > threshold {1}, data {2}'.format(target, suite[jobnum]['ss_limit'], job['steadystate'])
             else:
                 expected = suite[jobnum]['timeout'] * 1000
                 actual = job['read']['runtime']