Spelling fix. Reported by DebianĀ“s lintian.
[fio.git] / tools / fiologparser.py
CommitLineData
642483db 1#!/usr/bin/python
34831ee3
MN
2#
3# fiologparser.py
4#
5# This tool lets you parse multiple fio log files and look at interaval
6# statistics even when samples are non-uniform. For instance:
7#
8# fiologparser.py -s *bw*
9#
10# to see per-interval sums for all bandwidth logs or:
11#
12# fiologparser.py -a *clat*
13#
14# to see per-interval average completion latency.
15
16import argparse
8c4693e2
BE
17import numpy
18import scipy
34831ee3
MN
19
20def parse_args():
21 parser = argparse.ArgumentParser()
22 parser.add_argument('-i', '--interval', required=False, type=int, default=1000, help='interval of time in seconds.')
23 parser.add_argument('-d', '--divisor', required=False, type=int, default=1, help='divide the results by this value.')
24 parser.add_argument('-f', '--full', dest='full', action='store_true', default=False, help='print full output.')
8c4693e2
BE
25 parser.add_argument('-A', '--all', dest='allstats', action='store_true', default=False,
26 help='print all stats for each interval.')
34831ee3
MN
27 parser.add_argument('-a', '--average', dest='average', action='store_true', default=False, help='print the average for each interval.')
28 parser.add_argument('-s', '--sum', dest='sum', action='store_true', default=False, help='print the sum for each interval.')
29 parser.add_argument("FILE", help="collectl log output files to parse", nargs="+")
30 args = parser.parse_args()
31
32 return args
33
34def get_ftime(series):
35 ftime = 0
36 for ts in series:
37 if ftime == 0 or ts.last.end < ftime:
38 ftime = ts.last.end
39 return ftime
40
41def print_full(ctx, series):
42 ftime = get_ftime(series)
43 start = 0
44 end = ctx.interval
45
46 while (start < ftime):
47 end = ftime if ftime < end else end
48 results = [ts.get_value(start, end) for ts in series]
49 print "%s, %s" % (end, ', '.join(["%0.3f" % i for i in results]))
50 start += ctx.interval
51 end += ctx.interval
52
53def print_sums(ctx, series):
54 ftime = get_ftime(series)
55 start = 0
56 end = ctx.interval
57
58 while (start < ftime):
59 end = ftime if ftime < end else end
60 results = [ts.get_value(start, end) for ts in series]
61 print "%s, %0.3f" % (end, sum(results))
62 start += ctx.interval
63 end += ctx.interval
64
65def print_averages(ctx, series):
66 ftime = get_ftime(series)
67 start = 0
68 end = ctx.interval
69
70 while (start < ftime):
71 end = ftime if ftime < end else end
72 results = [ts.get_value(start, end) for ts in series]
73 print "%s, %0.3f" % (end, float(sum(results))/len(results))
74 start += ctx.interval
75 end += ctx.interval
76
8c4693e2
BE
77# FIXME: this routine is computationally inefficient
78# and has O(N^2) behavior
79# it would be better to make one pass through samples
80# to segment them into a series of time intervals, and
81# then compute stats on each time interval instead.
82# to debug this routine, use
83# # sort -n -t ',' -k 2 small.log
84# on your input.
85# Sometimes scipy interpolates between two values to get a percentile
86
87def my_extend( vlist, val ):
88 vlist.extend(val)
89 return vlist
90
91array_collapser = lambda vlist, val: my_extend(vlist, val)
92
93def print_all_stats(ctx, series):
94 ftime = get_ftime(series)
95 start = 0
96 end = ctx.interval
97 print('start-time, samples, min, avg, median, 90%, 95%, 99%, max')
98 while (start < ftime): # for each time interval
99 end = ftime if ftime < end else end
100 sample_arrays = [ s.get_samples(start, end) for s in series ]
101 samplevalue_arrays = []
102 for sample_array in sample_arrays:
103 samplevalue_arrays.append(
104 [ sample.value for sample in sample_array ] )
105 #print('samplevalue_arrays len: %d' % len(samplevalue_arrays))
106 #print('samplevalue_arrays elements len: ' + \
107 #str(map( lambda l: len(l), samplevalue_arrays)))
108 # collapse list of lists of sample values into list of sample values
109 samplevalues = reduce( array_collapser, samplevalue_arrays, [] )
110 #print('samplevalues: ' + str(sorted(samplevalues)))
111 # compute all stats and print them
112 myarray = scipy.fromiter(samplevalues, float)
113 mymin = scipy.amin(myarray)
114 myavg = scipy.average(myarray)
115 mymedian = scipy.median(myarray)
116 my90th = scipy.percentile(myarray, 90)
117 my95th = scipy.percentile(myarray, 95)
118 my99th = scipy.percentile(myarray, 99)
119 mymax = scipy.amax(myarray)
120 print( '%f, %d, %f, %f, %f, %f, %f, %f, %f' % (
121 start, len(samplevalues),
122 mymin, myavg, mymedian, my90th, my95th, my99th, mymax))
123
124 # advance to next interval
125 start += ctx.interval
126 end += ctx.interval
127
34831ee3
MN
128
129def print_default(ctx, series):
130 ftime = get_ftime(series)
131 start = 0
132 end = ctx.interval
133 averages = []
134 weights = []
135
136 while (start < ftime):
137 end = ftime if ftime < end else end
138 results = [ts.get_value(start, end) for ts in series]
139 averages.append(sum(results))
140 weights.append(end-start)
141 start += ctx.interval
142 end += ctx.interval
143
144 total = 0
145 for i in xrange(0, len(averages)):
146 total += averages[i]*weights[i]
147 print '%0.3f' % (total/sum(weights))
148
149class TimeSeries():
150 def __init__(self, ctx, fn):
151 self.ctx = ctx
152 self.last = None
153 self.samples = []
154 self.read_data(fn)
155
156 def read_data(self, fn):
157 f = open(fn, 'r')
158 p_time = 0
159 for line in f:
160 (time, value, foo, bar) = line.rstrip('\r\n').rsplit(', ')
161 self.add_sample(p_time, int(time), int(value))
162 p_time = int(time)
163
164 def add_sample(self, start, end, value):
165 sample = Sample(ctx, start, end, value)
166 if not self.last or self.last.end < end:
167 self.last = sample
168 self.samples.append(sample)
169
8c4693e2
BE
170 def get_samples(self, start, end):
171 sample_list = []
172 for s in self.samples:
173 if s.start >= start and s.end <= end:
174 sample_list.append(s)
175 return sample_list
176
34831ee3
MN
177 def get_value(self, start, end):
178 value = 0
179 for sample in self.samples:
180 value += sample.get_contribution(start, end)
181 return value
182
183class Sample():
184 def __init__(self, ctx, start, end, value):
185 self.ctx = ctx
186 self.start = start
187 self.end = end
188 self.value = value
189
190 def get_contribution(self, start, end):
191 # short circuit if not within the bound
192 if (end < self.start or start > self.end):
193 return 0
194
195 sbound = self.start if start < self.start else start
196 ebound = self.end if end > self.end else end
197 ratio = float(ebound-sbound) / (end-start)
198 return self.value*ratio/ctx.divisor
199
200
201if __name__ == '__main__':
202 ctx = parse_args()
203 series = []
204 for fn in ctx.FILE:
205 series.append(TimeSeries(ctx, fn))
206 if ctx.sum:
207 print_sums(ctx, series)
208 elif ctx.average:
209 print_averages(ctx, series)
210 elif ctx.full:
211 print_full(ctx, series)
8c4693e2
BE
212 elif ctx.allstats:
213 print_all_stats(ctx, series)
34831ee3
MN
214 else:
215 print_default(ctx, series)
216