tools/fiologparser.py

   1 #!/usr/bin/env python3
   2 # Note: this script is python2 and python 3 compatible.
   3 #
   4 # fiologparser.py
   5 #
   6 # This tool lets you parse multiple fio log files and look at interaval
   7 # statistics even when samples are non-uniform.  For instance:
   8 #
   9 # fiologparser.py -s *bw*
  10 #
  11 # to see per-interval sums for all bandwidth logs or:
  12 #
  13 # fiologparser.py -a *clat*
  14 #
  15 # to see per-interval average completion latency.
  16
  17 from __future__ import absolute_import
  18 from __future__ import print_function
  19 import argparse
  20 import math
  21 from functools import reduce
  22
  23 def parse_args():
  24     parser = argparse.ArgumentParser()
  25     parser.add_argument('-i', '--interval', required=False, type=int, default=1000, help='interval of time in seconds.')
  26     parser.add_argument('-d', '--divisor', required=False, type=int, default=1, help='divide the results by this value.')
  27     parser.add_argument('-f', '--full', dest='full', action='store_true', default=False, help='print full output.')
  28     parser.add_argument('-A', '--all', dest='allstats', action='store_true', default=False,
  29                         help='print all stats for each interval.')
  30     parser.add_argument('-a', '--average', dest='average', action='store_true', default=False, help='print the average for each interval.')
  31     parser.add_argument('-s', '--sum', dest='sum', action='store_true', default=False, help='print the sum for each interval.')
  32     parser.add_argument("FILE", help="collectl log output files to parse", nargs="+")
  33     args = parser.parse_args()
  34
  35     return args
  36
  37 def get_ftime(series):
  38     ftime = 0
  39     for ts in series:
  40         if ftime == 0 or ts.last.end < ftime:
  41             ftime = ts.last.end
  42     return ftime
  43
  44 def print_full(ctx, series):
  45     ftime = get_ftime(series)
  46     start = 0
  47     end = ctx.interval
  48
  49     while (start < ftime):
  50         end = ftime if ftime < end else end
  51         results = [ts.get_value(start, end) for ts in series]
  52         print("%s, %s" % (end, ', '.join(["%0.3f" % i for i in results])))
  53         start += ctx.interval
  54         end += ctx.interval
  55
  56 def print_sums(ctx, series):
  57     ftime = get_ftime(series)
  58     start = 0
  59     end = ctx.interval
  60
  61     while (start < ftime):
  62         end = ftime if ftime < end else end
  63         results = [ts.get_value(start, end) for ts in series]
  64         print("%s, %0.3f" % (end, sum(results)))
  65         start += ctx.interval
  66         end += ctx.interval
  67
  68 def print_averages(ctx, series):
  69     ftime = get_ftime(series)
  70     start = 0
  71     end = ctx.interval
  72
  73     while (start < ftime):
  74         end = ftime if ftime < end else end
  75         results = [ts.get_value(start, end) for ts in series]
  76         print("%s, %0.3f" % (end, float(sum(results))/len(results)))
  77         start += ctx.interval
  78         end += ctx.interval
  79
  80 # FIXME: this routine is computationally inefficient
  81 # and has O(N^2) behavior
  82 # it would be better to make one pass through samples
  83 # to segment them into a series of time intervals, and
  84 # then compute stats on each time interval instead.
  85 # to debug this routine, use
  86 #   # sort -n -t ',' -k 2 small.log
  87 # on your input.
  88
  89 def my_extend( vlist, val ):
  90     vlist.extend(val)
  91     return vlist
  92
  93 array_collapser = lambda vlist, val:  my_extend(vlist, val)
  94
  95 def print_all_stats(ctx, series):
  96     ftime = get_ftime(series)
  97     start = 0
  98     end = ctx.interval
  99     print('start-time, samples, min, avg, median, 90%, 95%, 99%, max')
 100     while (start < ftime):  # for each time interval
 101         end = ftime if ftime < end else end
 102         sample_arrays = [ s.get_samples(start, end) for s in series ]
 103         samplevalue_arrays = []
 104         for sample_array in sample_arrays:
 105             samplevalue_arrays.append(
 106                 [ sample.value for sample in sample_array ] )
 107         # collapse list of lists of sample values into list of sample values
 108         samplevalues = reduce( array_collapser, samplevalue_arrays, [] )
 109         # compute all stats and print them
 110         mymin = min(samplevalues)
 111         myavg = sum(samplevalues) / float(len(samplevalues))
 112         mymedian = median(samplevalues)
 113         my90th = percentile(samplevalues, 0.90)
 114         my95th = percentile(samplevalues, 0.95)
 115         my99th = percentile(samplevalues, 0.99)
 116         mymax = max(samplevalues)
 117         print( '%f, %d, %f, %f, %f, %f, %f, %f, %f' % (
 118             start, len(samplevalues),
 119             mymin, myavg, mymedian, my90th, my95th, my99th, mymax))
 120
 121         # advance to next interval
 122         start += ctx.interval
 123         end += ctx.interval
 124
 125 def median(values):
 126     s=sorted(values)
 127     return float(s[(len(s)-1)/2]+s[(len(s)/2)])/2
 128
 129 def percentile(values, p):
 130     s = sorted(values)
 131     k = (len(s)-1) * p
 132     f = math.floor(k)
 133     c = math.ceil(k)
 134     if f == c:
 135         return s[int(k)]
 136     return (s[int(f)] * (c-k)) + (s[int(c)] * (k-f))
 137
 138 def print_default(ctx, series):
 139     ftime = get_ftime(series)
 140     start = 0
 141     end = ctx.interval
 142     averages = []
 143     weights = []
 144
 145     while (start < ftime):
 146         end = ftime if ftime < end else end
 147         results = [ts.get_value(start, end) for ts in series]
 148         averages.append(sum(results))
 149         weights.append(end-start)
 150         start += ctx.interval
 151         end += ctx.interval
 152
 153     total = 0
 154     for i in range(0, len(averages)):
 155         total += averages[i]*weights[i]
 156     print('%0.3f' % (total/sum(weights)))
 157
 158 class TimeSeries(object):
 159     def __init__(self, ctx, fn):
 160         self.ctx = ctx
 161         self.last = None
 162         self.samples = []
 163         self.read_data(fn)
 164
 165     def read_data(self, fn):
 166         f = open(fn, 'r')
 167         p_time = 0
 168         for line in f:
 169             (time, value, foo, bar) = line.rstrip('\r\n').rsplit(', ')
 170             self.add_sample(p_time, int(time), int(value))
 171             p_time = int(time)
 172
 173     def add_sample(self, start, end, value):
 174         sample = Sample(ctx, start, end, value)
 175         if not self.last or self.last.end < end:
 176             self.last = sample
 177         self.samples.append(sample)
 178
 179     def get_samples(self, start, end):
 180         sample_list = []
 181         for s in self.samples:
 182             if s.start >= start and s.end <= end:
 183                 sample_list.append(s)
 184         return sample_list
 185
 186     def get_value(self, start, end):
 187         value = 0
 188         for sample in self.samples:
 189             value += sample.get_contribution(start, end)
 190         return value
 191
 192 class Sample(object):
 193     def __init__(self, ctx, start, end, value):
 194        self.ctx = ctx
 195        self.start = start
 196        self.end = end
 197        self.value = value
 198
 199     def get_contribution(self, start, end):
 200        # short circuit if not within the bound
 201        if (end < self.start or start > self.end):
 202            return 0
 203
 204        sbound = self.start if start < self.start else start
 205        ebound = self.end if end > self.end else end
 206        ratio = float(ebound-sbound) / (end-start)
 207        return self.value*ratio/ctx.divisor
 208
 209
 210 if __name__ == '__main__':
 211     ctx = parse_args()
 212     series = []
 213     for fn in ctx.FILE:
 214        series.append(TimeSeries(ctx, fn))
 215     if ctx.sum:
 216         print_sums(ctx, series)
 217     elif ctx.average:
 218         print_averages(ctx, series)
 219     elif ctx.full:
 220         print_full(ctx, series)
 221     elif ctx.allstats:
 222         print_all_stats(ctx, series)
 223     else:
 224         print_default(ctx, series)