2 # Note: this script is python2 and python 3 compatible.
6 # This tool lets you parse multiple fio log files and look at interaval
7 # statistics even when samples are non-uniform. For instance:
9 # fiologparser.py -s *bw*
11 # to see per-interval sums for all bandwidth logs or:
13 # fiologparser.py -a *clat*
15 # to see per-interval average completion latency.
17 from __future__ import absolute_import
18 from __future__ import print_function
21 from functools import reduce
24 parser = argparse.ArgumentParser()
25 parser.add_argument('-i', '--interval', required=False, type=int, default=1000, help='interval of time in seconds.')
26 parser.add_argument('-d', '--divisor', required=False, type=int, default=1, help='divide the results by this value.')
27 parser.add_argument('-f', '--full', dest='full', action='store_true', default=False, help='print full output.')
28 parser.add_argument('-A', '--all', dest='allstats', action='store_true', default=False,
29 help='print all stats for each interval.')
30 parser.add_argument('-a', '--average', dest='average', action='store_true', default=False, help='print the average for each interval.')
31 parser.add_argument('-s', '--sum', dest='sum', action='store_true', default=False, help='print the sum for each interval.')
32 parser.add_argument("FILE", help="collectl log output files to parse", nargs="+")
33 args = parser.parse_args()
37 def get_ftime(series):
40 if ftime == 0 or ts.last.end < ftime:
44 def print_full(ctx, series):
45 ftime = get_ftime(series)
49 while (start < ftime):
50 end = ftime if ftime < end else end
51 results = [ts.get_value(start, end) for ts in series]
52 print("%s, %s" % (end, ', '.join(["%0.3f" % i for i in results])))
56 def print_sums(ctx, series):
57 ftime = get_ftime(series)
61 while (start < ftime):
62 end = ftime if ftime < end else end
63 results = [ts.get_value(start, end) for ts in series]
64 print("%s, %0.3f" % (end, sum(results)))
68 def print_averages(ctx, series):
69 ftime = get_ftime(series)
73 while (start < ftime):
74 end = ftime if ftime < end else end
75 results = [ts.get_value(start, end) for ts in series]
76 print("%s, %0.3f" % (end, float(sum(results))/len(results)))
80 # FIXME: this routine is computationally inefficient
81 # and has O(N^2) behavior
82 # it would be better to make one pass through samples
83 # to segment them into a series of time intervals, and
84 # then compute stats on each time interval instead.
85 # to debug this routine, use
86 # # sort -n -t ',' -k 2 small.log
89 def my_extend( vlist, val ):
93 array_collapser = lambda vlist, val: my_extend(vlist, val)
95 def print_all_stats(ctx, series):
96 ftime = get_ftime(series)
99 print('start-time, samples, min, avg, median, 90%, 95%, 99%, max')
100 while (start < ftime): # for each time interval
101 end = ftime if ftime < end else end
102 sample_arrays = [ s.get_samples(start, end) for s in series ]
103 samplevalue_arrays = []
104 for sample_array in sample_arrays:
105 samplevalue_arrays.append(
106 [ sample.value for sample in sample_array ] )
107 # collapse list of lists of sample values into list of sample values
108 samplevalues = reduce( array_collapser, samplevalue_arrays, [] )
109 # compute all stats and print them
110 mymin = min(samplevalues)
111 myavg = sum(samplevalues) / float(len(samplevalues))
112 mymedian = median(samplevalues)
113 my90th = percentile(samplevalues, 0.90)
114 my95th = percentile(samplevalues, 0.95)
115 my99th = percentile(samplevalues, 0.99)
116 mymax = max(samplevalues)
117 print( '%f, %d, %f, %f, %f, %f, %f, %f, %f' % (
118 start, len(samplevalues),
119 mymin, myavg, mymedian, my90th, my95th, my99th, mymax))
121 # advance to next interval
122 start += ctx.interval
127 return float(s[(len(s)-1)/2]+s[(len(s)/2)])/2
129 def percentile(values, p):
136 return (s[int(f)] * (c-k)) + (s[int(c)] * (k-f))
138 def print_default(ctx, series):
139 ftime = get_ftime(series)
145 while (start < ftime):
146 end = ftime if ftime < end else end
147 results = [ts.get_value(start, end) for ts in series]
148 averages.append(sum(results))
149 weights.append(end-start)
150 start += ctx.interval
154 for i in range(0, len(averages)):
155 total += averages[i]*weights[i]
156 print('%0.3f' % (total/sum(weights)))
158 class TimeSeries(object):
159 def __init__(self, ctx, fn):
165 def read_data(self, fn):
169 (time, value, foo, bar) = line.rstrip('\r\n').rsplit(', ')
170 self.add_sample(p_time, int(time), int(value))
173 def add_sample(self, start, end, value):
174 sample = Sample(ctx, start, end, value)
175 if not self.last or self.last.end < end:
177 self.samples.append(sample)
179 def get_samples(self, start, end):
181 for s in self.samples:
182 if s.start >= start and s.end <= end:
183 sample_list.append(s)
186 def get_value(self, start, end):
188 for sample in self.samples:
189 value += sample.get_contribution(start, end)
192 class Sample(object):
193 def __init__(self, ctx, start, end, value):
199 def get_contribution(self, start, end):
200 # short circuit if not within the bound
201 if (end < self.start or start > self.end):
204 sbound = self.start if start < self.start else start
205 ebound = self.end if end > self.end else end
206 ratio = float(ebound-sbound) / (end-start)
207 return self.value*ratio/ctx.divisor
210 if __name__ == '__main__':
214 series.append(TimeSeries(ctx, fn))
216 print_sums(ctx, series)
218 print_averages(ctx, series)
220 print_full(ctx, series)
222 print_all_stats(ctx, series)
224 print_default(ctx, series)