Steady state detection: enhance reporting of results, change memory allocation point
authorVincent Fu <Vincent.Fu@sandisk.com>
Thu, 30 Jun 2016 18:06:30 +0000 (14:06 -0400)
committerJens Axboe <axboe@fb.com>
Mon, 15 Aug 2016 15:44:52 +0000 (09:44 -0600)
1. Always record and report both BW and IOPS for steady state window (useful when multiple block sizes are used)
2. Always report both slope and maximum deviation even though one is always unused
3. Report mean BW and mean IOPS in JSON output
4. Allocate memory for ring buffers in steadystate_setup() instead of add_job(). The benefit is that only one set of buffers is now allocated for eaceh group. Previously each thread had its own set of buffers enough though only one set in each reporting group was actually used.
5. Update test script to accommodate new output

backend.c
fio.h
init.c
stat.c
steadystate.c
steadystate.h
unit_tests/steadystate_tests.py

index b55a527..04067eb 100644 (file)
--- a/backend.c
+++ b/backend.c
@@ -2411,8 +2411,12 @@ int fio_backend(struct sk_out *sk_out)
        }
 
        for_each_td(td, i) {
-               if (td->ss.dur)
-                       free(td->ss.cache);
+               if (td->ss.dur) {
+                       if (td->ss.iops_data != NULL) {
+                               free(td->ss.iops_data);
+                               free(td->ss.bw_data);
+                       }
+               }
                fio_options_free(td);
                if (td->rusage_sem) {
                        fio_mutex_remove(td->rusage_sem);
diff --git a/fio.h b/fio.h
index 2d0327c..ed2abe7 100644 (file)
--- a/fio.h
+++ b/fio.h
@@ -140,9 +140,12 @@ struct steadystate_data {
 
        unsigned int head;
        unsigned int tail;
-       unsigned long *cache;
+       unsigned long *iops_data;
+       unsigned long *bw_data;
 
+       double slope;
        double criterion;
+       double deviation;
 
        unsigned long long sum_y;
        unsigned long long sum_x;
diff --git a/init.c b/init.c
index 7f91c04..1b8f082 100644 (file)
--- a/init.c
+++ b/init.c
@@ -1603,18 +1603,8 @@ static int add_job(struct thread_data *td, const char *jobname, int job_add_num,
                else
                        td->ss.check_iops = false;
 
-
-               /* when group reporting is enabled only the cache allocated for the final td is actually used */
-               td->ss.cache = malloc(o->ss_dur * sizeof(*(td->ss.cache)));
-               if (td->ss.cache == NULL)
-               {
-                       log_err("fio: unable to allocate memory for steadystate cache\n");
-                       goto err;
-               }
-               for (i = 0; i < td->ss.dur; i++)
-                       td->ss.cache[i] = 0;
-               /* initialize so that it is obvious if the cache is not full in the output */
-
+               td->ss.bw_data = NULL;
+               td->ss.iops_data = NULL;
                td->ss.ramp_time_over = (td->ss.ramp_time == 0);
                td->ss.attained = 0;
                td->ss.last_in_group = 0;
@@ -1627,6 +1617,8 @@ static int add_job(struct thread_data *td, const char *jobname, int job_add_num,
                td->ss.sum_y = 0;
                td->ss.oldest_y = 0;
                td->ss.criterion = 0.0;
+               td->ss.slope = 0.0;
+               td->ss.deviation = 0.0;
                td->ts.ss = &td->ss;
        }
        else
diff --git a/stat.c b/stat.c
index f51735e..42fe0fa 100644 (file)
--- a/stat.c
+++ b/stat.c
@@ -1257,8 +1257,10 @@ static struct json_object *show_thread_status_json(struct thread_stat *ts,
 
        /* steady state detection; move this behind json+? */
        if (ts->ss) {
-               struct json_array *cache;
+               struct json_object *data;
+               struct json_array *iops, *bw;
                struct steadystate_data *ss = ts->ss;
+               double mean_iops = 0.0, mean_bw = 0.0;
                int i, x;
                char ss_option[64];
 
@@ -1275,13 +1277,26 @@ static struct json_object *show_thread_status_json(struct thread_stat *ts,
                json_object_add_value_int(tmp, "steadystate_ramptime", ss->ramp_time / 1000000L);
                json_object_add_value_int(tmp, "attained", ss->attained);
                json_object_add_value_float(tmp, "criterion", ss->pct ? ss->criterion / 100 : ss->criterion);
-
-               cache = json_create_array();
-               json_object_add_value_array(tmp, "data", cache);
+               json_object_add_value_float(tmp, "max_deviation", ss->deviation);
+               json_object_add_value_float(tmp, "slope", ss->slope);
+
+               data = json_create_object();
+               json_object_add_value_object(tmp, "data", data);
+               bw = json_create_array();
+               iops = json_create_array();
+               json_object_add_value_array(data, "iops", iops);
+               json_object_add_value_array(data, "bw", bw);
                for (i = 0; i < ss->dur; i++) {
                        x = (ss->head + i) % ss->dur;
-                       json_array_add_value_int(cache, ss->cache[x]);
+                       mean_bw += (double) ss->bw_data[x];
+                       mean_iops += (double) ss->iops_data[x];
+                       json_array_add_value_int(bw, ss->bw_data[x]);
+                       json_array_add_value_int(iops, ss->iops_data[x]);
                }
+               mean_bw /= ss->dur;
+               mean_iops /= ss->dur;
+               json_object_add_value_float(data, "bw_mean", mean_bw);
+               json_object_add_value_float(data, "iops_mean", mean_iops);
        }
 
        return root;
index e0dcc22..c12ea24 100644 (file)
@@ -1,3 +1,5 @@
+#include <stdlib.h>
+
 #include "fio.h"
 #include "steadystate.h"
 #include "helper_thread.h"
@@ -18,19 +20,39 @@ void steadystate_setup()
        prev_groupid = -1;
        prev_td = NULL;
        for_each_td(td, i) {
-               if (!td->o.group_reporting)
+               if (td->ts.ss == NULL)
+                       continue;
+
+               if (!td->o.group_reporting) {
+                       steadystate_alloc(td);
                        continue;
+               }
 
                if (prev_groupid != td->groupid) {
-                       if (prev_td != NULL)
+                       if (prev_td != NULL) {
                                prev_td->ss.last_in_group = 1;
+                               steadystate_alloc(prev_td);
+                       }
                        prev_groupid = td->groupid;
                }
                prev_td = td;
        }
 
-       if (prev_td != NULL && prev_td->o.group_reporting)
+       if (prev_td != NULL && prev_td->o.group_reporting) {
                prev_td->ss.last_in_group = 1;
+               steadystate_alloc(prev_td);
+       }
+}
+
+void steadystate_alloc(struct thread_data *td)
+{
+       int i;
+
+       td->ss.bw_data = malloc(td->ss.dur * sizeof(unsigned long));
+       td->ss.iops_data = malloc(td->ss.dur * sizeof(unsigned long));
+       /* initialize so that it is obvious if the cache is not full in the output */
+       for (i = 0; i < td->ss.dur; i++)
+               td->ss.iops_data[i] = td->ss.bw_data[i] = 0;
 }
 
 void steadystate_check()
@@ -123,12 +145,13 @@ void steadystate_check()
 
 bool steadystate_slope(unsigned long iops, unsigned long bw, struct thread_data *td)
 {
-       int i, x;
+       int i, j;
        double result;
-       double slope;
        struct steadystate_data *ss = &td->ss;
+       unsigned long new_val = ss->check_iops ? iops : bw;
 
-       ss->cache[ss->tail] = ss->check_iops ? iops : bw;
+       ss->bw_data[ss->tail] = bw;
+       ss->iops_data[ss->tail] = iops;
 
        if (ss->tail < ss->head || (ss->tail - ss->head == ss->dur - 1))
        {
@@ -136,19 +159,19 @@ bool steadystate_slope(unsigned long iops, unsigned long bw, struct thread_data
                {
                        for(i = 0; i < ss->dur; i++)
                        {
-                               ss->sum_y += ss->cache[i];
-                               x = ss->head + i;
-                               if (x >= ss->dur)
-                                       x -= ss->dur;
-                               ss->sum_xy += ss->cache[x] * i;
+                               ss->sum_y += ss->check_iops ? ss->iops_data[i] : ss->bw_data[i];
+                               j = ss->head + i;
+                               if (j >= ss->dur)
+                                       j -= ss->dur;
+                               ss->sum_xy += (ss->check_iops ? ss->iops_data[j] : ss->bw_data[j]) * i;
                        }
                } else {                /* easy to update the sums */
                        ss->sum_y -= ss->oldest_y;
-                       ss->sum_y += ss->cache[ss->tail];
-                       ss->sum_xy = ss->sum_xy - ss->sum_y + ss->dur * ss->cache[ss->tail];
+                       ss->sum_y += new_val;
+                       ss->sum_xy = ss->sum_xy - ss->sum_y + ss->dur * new_val;
                }
 
-               ss->oldest_y = ss->cache[ss->head];
+               ss->oldest_y = ss->check_iops ? ss->iops_data[ss->head] : ss->bw_data[ss->head];
 
                /*
                 * calculate slope as (sum_xy - sum_x * sum_y / n) / (sum_(x^2) - (sum_x)^2 / n)
@@ -156,11 +179,11 @@ bool steadystate_slope(unsigned long iops, unsigned long bw, struct thread_data
                 * off by a few milliseconds. This assumption greatly simplifies the
                 * calculations.
                 */
-               slope = (ss->sum_xy - (double) ss->sum_x * ss->sum_y / ss->dur) / (ss->sum_x_sq - (double) ss->sum_x * ss->sum_x / ss->dur);
-               ss->criterion = ss->pct ? slope / (ss->sum_y / ss->dur) * 100.0: slope;
+               ss->slope = (ss->sum_xy - (double) ss->sum_x * ss->sum_y / ss->dur) / (ss->sum_x_sq - (double) ss->sum_x * ss->sum_x / ss->dur);
+               ss->criterion = ss->pct ? ss->slope / (ss->sum_y / ss->dur) * 100.0: ss->slope;
 
                dprint(FD_STEADYSTATE, "sum_y: %llu, sum_xy: %llu, slope: %f, criterion: %f, limit: %f\n",
-                       ss->sum_y, ss->sum_xy, slope, ss->criterion, ss->limit);
+                       ss->sum_y, ss->sum_xy, ss->slope, ss->criterion, ss->limit);
 
                result = ss->criterion * (ss->criterion < 0.0 ? -1 : 1);
                if (result < ss->limit)
@@ -178,36 +201,36 @@ bool steadystate_deviation(unsigned long iops, unsigned long bw, struct thread_d
        int i;
        double diff;
        double mean;
-       double deviation;
 
        struct steadystate_data *ss = &td->ss;
 
-       ss->cache[ss->tail] = ss->check_iops ? iops : bw;
+       ss->bw_data[ss->tail] = bw;
+       ss->iops_data[ss->tail] = iops;
 
        if (ss->tail < ss->head || (ss->tail - ss->head == ss->dur - 1))
        {
                if (ss->sum_y == 0)     /* first time through */
                {
                        for(i = 0; i < ss->dur; i++)
-                               ss->sum_y += ss->cache[i];
+                               ss->sum_y += ss->check_iops ? ss->iops_data[i] : ss->bw_data[i];
                } else {                /* easy to update the sum */
                        ss->sum_y -= ss->oldest_y;
-                       ss->sum_y += ss->cache[ss->tail];
+                       ss->sum_y += ss->check_iops ? ss->iops_data[ss->tail] : ss->bw_data[ss->tail];
                }
 
-               ss->oldest_y = ss->cache[ss->head];
+               ss->oldest_y = ss->check_iops ? ss->iops_data[ss->head] : ss->bw_data[ss->head];
                mean = (double) ss->sum_y / ss->dur;
-               deviation = 0.0;
+               ss->deviation = 0.0;
 
                for (i = 0; i < ss->dur; i++)
                {       
-                       diff = (double) ss->cache[i] - mean;
-                       deviation = max(deviation, diff * (diff < 0.0 ? -1 : 1));
+                       diff = (double) (ss->check_iops ? ss->iops_data[i] : ss->bw_data[i]) - mean;
+                       ss->deviation = max(ss->deviation, diff * (diff < 0.0 ? -1 : 1));
                }
 
-               ss->criterion = ss->pct ? deviation / mean * 100.0 : deviation;
+               ss->criterion = ss->pct ? ss->deviation / mean * 100.0 : ss->deviation;
 
-               dprint(FD_STEADYSTATE, "sum_y: %llu, mean: %f, max diff: %f, objective: %f, limit: %f\n", ss->sum_y, mean, deviation, ss->criterion, ss->limit);
+               dprint(FD_STEADYSTATE, "sum_y: %llu, mean: %f, max diff: %f, objective: %f, limit: %f\n", ss->sum_y, mean, ss->deviation, ss->criterion, ss->limit);
 
                if (ss->criterion < ss->limit)
                        return true;
index 039ffc9..31e5c3f 100644 (file)
@@ -3,6 +3,7 @@
 
 extern void steadystate_check(void);
 extern void steadystate_setup(void);
+extern void steadystate_alloc(struct thread_data *);
 extern bool steadystate_deviation(unsigned long, unsigned long, struct thread_data *);
 extern bool steadystate_slope(unsigned long, unsigned long, struct thread_data *);
 #endif
index 02b2b0d..e4a5d19 100755 (executable)
@@ -39,13 +39,15 @@ def parse_args():
 
 
 def check(data, iops, slope, pct, limit, dur, criterion):
+    measurement = 'iops' if iops else 'bw'
+    data = data[measurement]
     mean = sum(data) / len(data)
     if slope:
         x = range(len(data))
         m, intercept, r_value, p_value, std_err = stats.linregress(x,data)
         m = abs(m)
         if pct:
-            target = m / mean * 100
+            target = m / mean
         else:
             target = m
     else:
@@ -53,11 +55,11 @@ def check(data, iops, slope, pct, limit, dur, criterion):
         for x in data:
             maxdev = max(abs(mean-x), maxdev)
         if pct:
-            target = maxdev / mean * 100
+            target = maxdev / mean
         else:
             target = maxdev
 
-    return (abs(target - criterion) / criterion < 0.001), target < limit, mean, target
+    return (abs(target - criterion) / criterion < 0.005), target < limit, mean, target
 
 
 if __name__ == '__main__':
@@ -158,12 +160,12 @@ if __name__ == '__main__':
                             dur=suite[jobnum]['ss_dur'],
                             criterion=job['steadystate']['criterion'])
                         if not objsame:
-                            line = 'FAILED ' + line + ' fio criterion {0} != calculated criterion {1}, data: {2} '.format(job['steadystate']['criterion'], target, job['steadystate']['data'])
+                            line = 'FAILED ' + line + ' fio criterion {0} != calculated criterion {1}, data: {2} '.format(job['steadystate']['criterion'], target, job['steadystate'])
                         else:
                             if met:
-                                line = 'PASSED ' + line + ' target {0} < limit {1}, data {2}'.format(target, suite[jobnum]['ss_limit'], job['steadystate']['data'])
+                                line = 'PASSED ' + line + ' target {0} < limit {1}, data {2}'.format(target, suite[jobnum]['ss_limit'], job['steadystate'])
                             else:
-                                line = 'FAILED ' + line + ' target {0} < limit {1} but fio reports ss not attained, data: {2}'.format(target, suite[jobnum]['ss_limit'], job['steadystate']['data'])
+                                line = 'FAILED ' + line + ' target {0} < limit {1} but fio reports ss not attained, data: {2}'.format(target, suite[jobnum]['ss_limit'], job['steadystate'])
                     
                 else:
                     # check runtime, confirm criterion calculation, and confirm that criterion was not met
@@ -182,14 +184,14 @@ if __name__ == '__main__':
                             criterion=job['steadystate']['criterion'])
                         if not objsame:
                             if actual > (suite[jobnum]['ss_dur'] + suite[jobnum]['ss_ramp'])*1000:
-                                line = 'FAILED ' + line + ' fio criterion {0} != calculated criterion {1}, data: {2} '.format(job['steadystate']['criterion'], target, job['steadystate']['data'])
+                                line = 'FAILED ' + line + ' fio criterion {0} != calculated criterion {1}, data: {2} '.format(job['steadystate']['criterion'], target, job['steadystate'])
                             else:
-                                line = 'PASSED ' + line + ' fio criterion {0} == 0.0 since ss_dur + ss_ramp has not elapsed, data: {1} '.format(job['steadystate']['criterion'], job['steadystate']['data'])
+                                line = 'PASSED ' + line + ' fio criterion {0} == 0.0 since ss_dur + ss_ramp has not elapsed, data: {1} '.format(job['steadystate']['criterion'], job['steadystate'])
                         else:
                             if met:
-                                line = 'FAILED ' + line + ' target {0} < threshold {1} but fio reports ss not attained, data: {2}'.format(target, suite[jobnum]['ss_limit'], job['steadystate']['data'])
+                                line = 'FAILED ' + line + ' target {0} < threshold {1} but fio reports ss not attained, data: {2}'.format(target, suite[jobnum]['ss_limit'], job['steadystate'])
                             else:
-                                line = 'PASSED ' + line + ' criterion {0} > threshold {1}, data {2}'.format(target, suite[jobnum]['ss_limit'], job['steadystate']['data'])
+                                line = 'PASSED ' + line + ' criterion {0} > threshold {1}, data {2}'.format(target, suite[jobnum]['ss_limit'], job['steadystate'])
             else:
                 expected = suite[jobnum]['timeout'] * 1000
                 actual = job['read']['runtime']