tools/fiologparser.py

   1 #!/usr/bin/python2.7
   2 # Note: this script is python2 and python 3 compatible.
   3 #
   4 # fiologparser.py
   5 #
   6 # This tool lets you parse multiple fio log files and look at interaval
   7 # statistics even when samples are non-uniform.  For instance:
   8 #
   9 # fiologparser.py -s *bw*
  10 #
  11 # to see per-interval sums for all bandwidth logs or:
  12 #
  13 # fiologparser.py -a *clat*
  14 #
  15 # to see per-interval average completion latency.
  16
  17 from __future__ import absolute_import
  18 from __future__ import print_function
  19 import argparse
  20 import math
  21
  22 def parse_args():
  23     parser = argparse.ArgumentParser()
  24     parser.add_argument('-i', '--interval', required=False, type=int, default=1000, help='interval of time in seconds.')
  25     parser.add_argument('-d', '--divisor', required=False, type=int, default=1, help='divide the results by this value.')
  26     parser.add_argument('-f', '--full', dest='full', action='store_true', default=False, help='print full output.')
  27     parser.add_argument('-A', '--all', dest='allstats', action='store_true', default=False,
  28                         help='print all stats for each interval.')
  29     parser.add_argument('-a', '--average', dest='average', action='store_true', default=False, help='print the average for each interval.')
  30     parser.add_argument('-s', '--sum', dest='sum', action='store_true', default=False, help='print the sum for each interval.')
  31     parser.add_argument("FILE", help="collectl log output files to parse", nargs="+")
  32     args = parser.parse_args()
  33
  34     return args
  35
  36 def get_ftime(series):
  37     ftime = 0
  38     for ts in series:
  39         if ftime == 0 or ts.last.end < ftime:
  40             ftime = ts.last.end
  41     return ftime
  42
  43 def print_full(ctx, series):
  44     ftime = get_ftime(series)
  45     start = 0
  46     end = ctx.interval
  47
  48     while (start < ftime):
  49         end = ftime if ftime < end else end
  50         results = [ts.get_value(start, end) for ts in series]
  51         print("%s, %s" % (end, ', '.join(["%0.3f" % i for i in results])))
  52         start += ctx.interval
  53         end += ctx.interval
  54
  55 def print_sums(ctx, series):
  56     ftime = get_ftime(series)
  57     start = 0
  58     end = ctx.interval
  59
  60     while (start < ftime):
  61         end = ftime if ftime < end else end
  62         results = [ts.get_value(start, end) for ts in series]
  63         print("%s, %0.3f" % (end, sum(results)))
  64         start += ctx.interval
  65         end += ctx.interval
  66
  67 def print_averages(ctx, series):
  68     ftime = get_ftime(series)
  69     start = 0
  70     end = ctx.interval
  71
  72     while (start < ftime):
  73         end = ftime if ftime < end else end
  74         results = [ts.get_value(start, end) for ts in series]
  75         print("%s, %0.3f" % (end, float(sum(results))/len(results)))
  76         start += ctx.interval
  77         end += ctx.interval
  78
  79 # FIXME: this routine is computationally inefficient
  80 # and has O(N^2) behavior
  81 # it would be better to make one pass through samples
  82 # to segment them into a series of time intervals, and
  83 # then compute stats on each time interval instead.
  84 # to debug this routine, use
  85 #   # sort -n -t ',' -k 2 small.log
  86 # on your input.
  87
  88 def my_extend( vlist, val ):
  89     vlist.extend(val)
  90     return vlist
  91
  92 array_collapser = lambda vlist, val:  my_extend(vlist, val)
  93
  94 def print_all_stats(ctx, series):
  95     ftime = get_ftime(series)
  96     start = 0
  97     end = ctx.interval
  98     print('start-time, samples, min, avg, median, 90%, 95%, 99%, max')
  99     while (start < ftime):  # for each time interval
 100         end = ftime if ftime < end else end
 101         sample_arrays = [ s.get_samples(start, end) for s in series ]
 102         samplevalue_arrays = []
 103         for sample_array in sample_arrays:
 104             samplevalue_arrays.append(
 105                 [ sample.value for sample in sample_array ] )
 106         # collapse list of lists of sample values into list of sample values
 107         samplevalues = reduce( array_collapser, samplevalue_arrays, [] )
 108         # compute all stats and print them
 109         mymin = min(samplevalues)
 110         myavg = sum(samplevalues) / float(len(samplevalues))
 111         mymedian = median(samplevalues)
 112         my90th = percentile(samplevalues, 0.90)
 113         my95th = percentile(samplevalues, 0.95)
 114         my99th = percentile(samplevalues, 0.99)
 115         mymax = max(samplevalues)
 116         print( '%f, %d, %f, %f, %f, %f, %f, %f, %f' % (
 117             start, len(samplevalues),
 118             mymin, myavg, mymedian, my90th, my95th, my99th, mymax))
 119
 120         # advance to next interval
 121         start += ctx.interval
 122         end += ctx.interval
 123
 124 def median(values):
 125     s=sorted(values)
 126     return float(s[(len(s)-1)/2]+s[(len(s)/2)])/2
 127
 128 def percentile(values, p):
 129     s = sorted(values)
 130     k = (len(s)-1) * p
 131     f = math.floor(k)
 132     c = math.ceil(k)
 133     if f == c:
 134         return s[int(k)]
 135     return (s[int(f)] * (c-k)) + (s[int(c)] * (k-f))
 136
 137 def print_default(ctx, series):
 138     ftime = get_ftime(series)
 139     start = 0
 140     end = ctx.interval
 141     averages = []
 142     weights = []
 143
 144     while (start < ftime):
 145         end = ftime if ftime < end else end
 146         results = [ts.get_value(start, end) for ts in series]
 147         averages.append(sum(results))
 148         weights.append(end-start)
 149         start += ctx.interval
 150         end += ctx.interval
 151
 152     total = 0
 153     for i in range(0, len(averages)):
 154         total += averages[i]*weights[i]
 155     print('%0.3f' % (total/sum(weights)))
 156
 157 class TimeSeries(object):
 158     def __init__(self, ctx, fn):
 159         self.ctx = ctx
 160         self.last = None
 161         self.samples = []
 162         self.read_data(fn)
 163
 164     def read_data(self, fn):
 165         f = open(fn, 'r')
 166         p_time = 0
 167         for line in f:
 168             (time, value, foo, bar) = line.rstrip('\r\n').rsplit(', ')
 169             self.add_sample(p_time, int(time), int(value))
 170             p_time = int(time)
 171
 172     def add_sample(self, start, end, value):
 173         sample = Sample(ctx, start, end, value)
 174         if not self.last or self.last.end < end:
 175             self.last = sample
 176         self.samples.append(sample)
 177
 178     def get_samples(self, start, end):
 179         sample_list = []
 180         for s in self.samples:
 181             if s.start >= start and s.end <= end:
 182                 sample_list.append(s)
 183         return sample_list
 184
 185     def get_value(self, start, end):
 186         value = 0
 187         for sample in self.samples:
 188             value += sample.get_contribution(start, end)
 189         return value
 190
 191 class Sample(object):
 192     def __init__(self, ctx, start, end, value):
 193        self.ctx = ctx
 194        self.start = start
 195        self.end = end
 196        self.value = value
 197
 198     def get_contribution(self, start, end):
 199        # short circuit if not within the bound
 200        if (end < self.start or start > self.end):
 201            return 0
 202
 203        sbound = self.start if start < self.start else start
 204        ebound = self.end if end > self.end else end
 205        ratio = float(ebound-sbound) / (end-start)
 206        return self.value*ratio/ctx.divisor
 207
 208
 209 if __name__ == '__main__':
 210     ctx = parse_args()
 211     series = []
 212     for fn in ctx.FILE:
 213        series.append(TimeSeries(ctx, fn))
 214     if ctx.sum:
 215         print_sums(ctx, series)
 216     elif ctx.average:
 217         print_averages(ctx, series)
 218     elif ctx.full:
 219         print_full(ctx, series)
 220     elif ctx.allstats:
 221         print_all_stats(ctx, series)
 222     else:
 223         print_default(ctx, series)