tools/fiologparser.py

   1 #!/usr/bin/python
   2 #
   3 # fiologparser.py
   4 #
   5 # This tool lets you parse multiple fio log files and look at interaval
   6 # statistics even when samples are non-uniform.  For instance:
   7 #
   8 # fiologparser.py -s *bw*
   9 #
  10 # to see per-interval sums for all bandwidth logs or:
  11 #
  12 # fiologparser.py -a *clat*
  13 #
  14 # to see per-interval average completion latency.
  15
  16 import argparse
  17 import numpy
  18 import scipy
  19
  20 def parse_args():
  21     parser = argparse.ArgumentParser()
  22     parser.add_argument('-i', '--interval', required=False, type=int, default=1000, help='interval of time in seconds.')
  23     parser.add_argument('-d', '--divisor', required=False, type=int, default=1, help='divide the results by this value.')
  24     parser.add_argument('-f', '--full', dest='full', action='store_true', default=False, help='print full output.')
  25     parser.add_argument('-A', '--all', dest='allstats', action='store_true', default=False,
  26                         help='print all stats for each interval.')
  27     parser.add_argument('-a', '--average', dest='average', action='store_true', default=False, help='print the average for each interval.')
  28     parser.add_argument('-s', '--sum', dest='sum', action='store_true', default=False, help='print the sum for each interval.')
  29     parser.add_argument("FILE", help="collectl log output files to parse", nargs="+")
  30     args = parser.parse_args()
  31
  32     return args
  33
  34 def get_ftime(series):
  35     ftime = 0
  36     for ts in series:
  37         if ftime == 0 or ts.last.end < ftime:
  38             ftime = ts.last.end
  39     return ftime
  40
  41 def print_full(ctx, series):
  42     ftime = get_ftime(series)
  43     start = 0
  44     end = ctx.interval
  45
  46     while (start < ftime):
  47         end = ftime if ftime < end else end
  48         results = [ts.get_value(start, end) for ts in series]
  49         print "%s, %s" % (end, ', '.join(["%0.3f" % i for i in results]))
  50         start += ctx.interval
  51         end += ctx.interval
  52
  53 def print_sums(ctx, series):
  54     ftime = get_ftime(series)
  55     start = 0
  56     end = ctx.interval
  57
  58     while (start < ftime):
  59         end = ftime if ftime < end else end
  60         results = [ts.get_value(start, end) for ts in series]
  61         print "%s, %0.3f" % (end, sum(results))
  62         start += ctx.interval
  63         end += ctx.interval
  64
  65 def print_averages(ctx, series):
  66     ftime = get_ftime(series)
  67     start = 0
  68     end = ctx.interval
  69
  70     while (start < ftime):
  71         end = ftime if ftime < end else end
  72         results = [ts.get_value(start, end) for ts in series]
  73         print "%s, %0.3f" % (end, float(sum(results))/len(results))
  74         start += ctx.interval
  75         end += ctx.interval
  76
  77 # FIXME: this routine is computationally inefficient
  78 # and has O(N^2) behavior
  79 # it would be better to make one pass through samples
  80 # to segment them into a series of time intervals, and
  81 # then compute stats on each time interval instead.
  82 # to debug this routine, use
  83 #   # sort -n -t ',' -k 2 small.log
  84 # on your input.
  85 # Sometimes scipy interpolates between two values to get a percentile
  86
  87 def my_extend( vlist, val ):
  88     vlist.extend(val)
  89     return vlist
  90
  91 array_collapser = lambda vlist, val:  my_extend(vlist, val)
  92
  93 def print_all_stats(ctx, series):
  94     ftime = get_ftime(series)
  95     start = 0
  96     end = ctx.interval
  97     print('start-time, samples, min, avg, median, 90%, 95%, 99%, max')
  98     while (start < ftime):  # for each time interval
  99         end = ftime if ftime < end else end
 100         sample_arrays = [ s.get_samples(start, end) for s in series ]
 101         samplevalue_arrays = []
 102         for sample_array in sample_arrays:
 103             samplevalue_arrays.append(
 104                 [ sample.value for sample in sample_array ] )
 105         #print('samplevalue_arrays len: %d' % len(samplevalue_arrays))
 106         #print('samplevalue_arrays elements len: ' + \
 107                #str(map( lambda l: len(l), samplevalue_arrays)))
 108         # collapse list of lists of sample values into list of sample values
 109         samplevalues = reduce( array_collapser, samplevalue_arrays, [] )
 110         #print('samplevalues: ' + str(sorted(samplevalues)))
 111         # compute all stats and print them
 112         myarray = scipy.fromiter(samplevalues, float)
 113         mymin = scipy.amin(myarray)
 114         myavg = scipy.average(myarray)
 115         mymedian = scipy.median(myarray)
 116         my90th = scipy.percentile(myarray, 90)
 117         my95th = scipy.percentile(myarray, 95)
 118         my99th = scipy.percentile(myarray, 99)
 119         mymax = scipy.amax(myarray)
 120         print( '%f, %d, %f, %f, %f, %f, %f, %f, %f' % (
 121             start, len(samplevalues),
 122             mymin, myavg, mymedian, my90th, my95th, my99th, mymax))
 123
 124         # advance to next interval
 125         start += ctx.interval
 126         end += ctx.interval
 127
 128
 129 def print_default(ctx, series):
 130     ftime = get_ftime(series)
 131     start = 0
 132     end = ctx.interval
 133     averages = []
 134     weights = []
 135
 136     while (start < ftime):
 137         end = ftime if ftime < end else end
 138         results = [ts.get_value(start, end) for ts in series]
 139         averages.append(sum(results))
 140         weights.append(end-start)
 141         start += ctx.interval
 142         end += ctx.interval
 143
 144     total = 0
 145     for i in xrange(0, len(averages)):
 146         total += averages[i]*weights[i]
 147     print '%0.3f' % (total/sum(weights))
 148
 149 class TimeSeries():
 150     def __init__(self, ctx, fn):
 151         self.ctx = ctx
 152         self.last = None
 153         self.samples = []
 154         self.read_data(fn)
 155
 156     def read_data(self, fn):
 157         f = open(fn, 'r')
 158         p_time = 0
 159         for line in f:
 160             (time, value, foo, bar) = line.rstrip('\r\n').rsplit(', ')
 161             self.add_sample(p_time, int(time), int(value))
 162             p_time = int(time)
 163
 164     def add_sample(self, start, end, value):
 165         sample = Sample(ctx, start, end, value)
 166         if not self.last or self.last.end < end:
 167             self.last = sample
 168         self.samples.append(sample)
 169
 170     def get_samples(self, start, end):
 171         sample_list = []
 172         for s in self.samples:
 173             if s.start >= start and s.end <= end:
 174                 sample_list.append(s)
 175         return sample_list
 176
 177     def get_value(self, start, end):
 178         value = 0
 179         for sample in self.samples:
 180             value += sample.get_contribution(start, end)
 181         return value
 182
 183 class Sample():
 184     def __init__(self, ctx, start, end, value):
 185        self.ctx = ctx
 186        self.start = start
 187        self.end = end
 188        self.value = value
 189
 190     def get_contribution(self, start, end):
 191        # short circuit if not within the bound
 192        if (end < self.start or start > self.end):
 193            return 0
 194
 195        sbound = self.start if start < self.start else start
 196        ebound = self.end if end > self.end else end
 197        ratio = float(ebound-sbound) / (end-start)
 198        return self.value*ratio/ctx.divisor
 199
 200
 201 if __name__ == '__main__':
 202     ctx = parse_args()
 203     series = []
 204     for fn in ctx.FILE:
 205        series.append(TimeSeries(ctx, fn))
 206     if ctx.sum:
 207         print_sums(ctx, series)
 208     elif ctx.average:
 209         print_averages(ctx, series)
 210     elif ctx.full:
 211         print_full(ctx, series)
 212     elif ctx.allstats:
 213         print_all_stats(ctx, series)
 214     else:
 215         print_default(ctx, series)
 216