add rsp. time samples as column 2, use meaningful pctile names
authorBen England <bengland@redhat.com>
Tue, 16 Oct 2018 16:19:39 +0000 (12:19 -0400)
committerBen England <bengland@redhat.com>
Tue, 16 Oct 2018 16:19:39 +0000 (12:19 -0400)
percentiles on a small sample count are suspect.
0th percentile is min, 50th is median, 100th is max.
sample count also lets you know which fio jobs
are contributing the most.

tools/hist/fio-histo-log-pctiles.py

index 7f08f6e..f9df2a3 100755 (executable)
@@ -272,6 +272,13 @@ def add_to_histo_from( target, source ):
     for b in range(0, len(source)):
         target[b] += source[b]
 
+
+# calculate total samples in the histogram buckets
+
+def get_samples(buckets):
+    return reduce( lambda x,y: x + y, buckets)
+
+
 # compute percentiles
 # inputs:
 #   buckets: histogram bucket array 
@@ -453,14 +460,24 @@ def compute_percentiles_from_logs():
     # calculate percentiles across aggregate histogram for all threads
     # print CSV header just like fiologparser_hist does
 
-    header = 'msec-since-start, '
+    header = 'msec-since-start, samples, '
     for p in args.pctiles_wanted:
-        header += '%3.1f, ' % p
+        if p == 0.:
+            next_pctile_header = 'min'
+        elif p == 100.:
+            next_pctile_header = 'max'
+        elif p == 50.:
+            next_pctile_header = 'median'
+        else:
+            next_pctile_header = '%3.1f' % p
+        header += '%s, ' % next_pctile_header
+
     print('time (millisec), percentiles in increasing order with values in ' + args.output_unit)
     print(header)
 
     for (t_msec, all_threads_histo_t) in all_threads_histograms:
-        record = '%8d, ' % t_msec
+        samples = get_samples(all_threads_histo_t)
+        record = '%8d, %8d, ' % (t_msec, samples)
         pct = get_pctiles(all_threads_histo_t, args.pctiles_wanted, bucket_times)
         if not pct:
             for w in args.pctiles_wanted: