Fio 3.15
[fio.git] / tools / fiologparser.py
CommitLineData
60023ade 1#!/usr/bin/python2.7
5eac3b00 2# Note: this script is python2 and python 3 compatible.
34831ee3
MN
3#
4# fiologparser.py
5#
6# This tool lets you parse multiple fio log files and look at interaval
7# statistics even when samples are non-uniform. For instance:
8#
9# fiologparser.py -s *bw*
10#
11# to see per-interval sums for all bandwidth logs or:
12#
13# fiologparser.py -a *clat*
14#
15# to see per-interval average completion latency.
16
5eac3b00
BD
17from __future__ import absolute_import
18from __future__ import print_function
34831ee3 19import argparse
f4042e41 20import math
34831ee3
MN
21
22def parse_args():
23 parser = argparse.ArgumentParser()
24 parser.add_argument('-i', '--interval', required=False, type=int, default=1000, help='interval of time in seconds.')
25 parser.add_argument('-d', '--divisor', required=False, type=int, default=1, help='divide the results by this value.')
26 parser.add_argument('-f', '--full', dest='full', action='store_true', default=False, help='print full output.')
8c4693e2
BE
27 parser.add_argument('-A', '--all', dest='allstats', action='store_true', default=False,
28 help='print all stats for each interval.')
34831ee3
MN
29 parser.add_argument('-a', '--average', dest='average', action='store_true', default=False, help='print the average for each interval.')
30 parser.add_argument('-s', '--sum', dest='sum', action='store_true', default=False, help='print the sum for each interval.')
31 parser.add_argument("FILE", help="collectl log output files to parse", nargs="+")
32 args = parser.parse_args()
33
34 return args
35
36def get_ftime(series):
37 ftime = 0
38 for ts in series:
39 if ftime == 0 or ts.last.end < ftime:
40 ftime = ts.last.end
41 return ftime
42
43def print_full(ctx, series):
44 ftime = get_ftime(series)
45 start = 0
46 end = ctx.interval
47
48 while (start < ftime):
49 end = ftime if ftime < end else end
50 results = [ts.get_value(start, end) for ts in series]
9bae2522 51 print("%s, %s" % (end, ', '.join(["%0.3f" % i for i in results])))
34831ee3
MN
52 start += ctx.interval
53 end += ctx.interval
54
55def print_sums(ctx, series):
56 ftime = get_ftime(series)
57 start = 0
58 end = ctx.interval
59
60 while (start < ftime):
61 end = ftime if ftime < end else end
62 results = [ts.get_value(start, end) for ts in series]
9bae2522 63 print("%s, %0.3f" % (end, sum(results)))
34831ee3
MN
64 start += ctx.interval
65 end += ctx.interval
66
67def print_averages(ctx, series):
68 ftime = get_ftime(series)
69 start = 0
70 end = ctx.interval
71
72 while (start < ftime):
73 end = ftime if ftime < end else end
74 results = [ts.get_value(start, end) for ts in series]
9bae2522 75 print("%s, %0.3f" % (end, float(sum(results))/len(results)))
34831ee3
MN
76 start += ctx.interval
77 end += ctx.interval
78
8c4693e2
BE
79# FIXME: this routine is computationally inefficient
80# and has O(N^2) behavior
81# it would be better to make one pass through samples
82# to segment them into a series of time intervals, and
83# then compute stats on each time interval instead.
84# to debug this routine, use
85# # sort -n -t ',' -k 2 small.log
86# on your input.
8c4693e2
BE
87
88def my_extend( vlist, val ):
89 vlist.extend(val)
90 return vlist
91
92array_collapser = lambda vlist, val: my_extend(vlist, val)
93
94def print_all_stats(ctx, series):
95 ftime = get_ftime(series)
96 start = 0
97 end = ctx.interval
98 print('start-time, samples, min, avg, median, 90%, 95%, 99%, max')
99 while (start < ftime): # for each time interval
100 end = ftime if ftime < end else end
101 sample_arrays = [ s.get_samples(start, end) for s in series ]
102 samplevalue_arrays = []
103 for sample_array in sample_arrays:
104 samplevalue_arrays.append(
105 [ sample.value for sample in sample_array ] )
8c4693e2
BE
106 # collapse list of lists of sample values into list of sample values
107 samplevalues = reduce( array_collapser, samplevalue_arrays, [] )
8c4693e2 108 # compute all stats and print them
f4042e41
MN
109 mymin = min(samplevalues)
110 myavg = sum(samplevalues) / float(len(samplevalues))
111 mymedian = median(samplevalues)
112 my90th = percentile(samplevalues, 0.90)
113 my95th = percentile(samplevalues, 0.95)
114 my99th = percentile(samplevalues, 0.99)
115 mymax = max(samplevalues)
8c4693e2
BE
116 print( '%f, %d, %f, %f, %f, %f, %f, %f, %f' % (
117 start, len(samplevalues),
118 mymin, myavg, mymedian, my90th, my95th, my99th, mymax))
119
120 # advance to next interval
121 start += ctx.interval
122 end += ctx.interval
123
f4042e41
MN
124def median(values):
125 s=sorted(values)
126 return float(s[(len(s)-1)/2]+s[(len(s)/2)])/2
127
128def percentile(values, p):
129 s = sorted(values)
130 k = (len(s)-1) * p
131 f = math.floor(k)
132 c = math.ceil(k)
133 if f == c:
134 return s[int(k)]
135 return (s[int(f)] * (c-k)) + (s[int(c)] * (k-f))
34831ee3
MN
136
137def print_default(ctx, series):
138 ftime = get_ftime(series)
139 start = 0
140 end = ctx.interval
141 averages = []
142 weights = []
143
144 while (start < ftime):
145 end = ftime if ftime < end else end
146 results = [ts.get_value(start, end) for ts in series]
147 averages.append(sum(results))
148 weights.append(end-start)
149 start += ctx.interval
150 end += ctx.interval
151
152 total = 0
9bae2522 153 for i in range(0, len(averages)):
34831ee3 154 total += averages[i]*weights[i]
9bae2522 155 print('%0.3f' % (total/sum(weights)))
34831ee3 156
9bae2522 157class TimeSeries(object):
34831ee3
MN
158 def __init__(self, ctx, fn):
159 self.ctx = ctx
160 self.last = None
161 self.samples = []
162 self.read_data(fn)
163
164 def read_data(self, fn):
165 f = open(fn, 'r')
166 p_time = 0
167 for line in f:
168 (time, value, foo, bar) = line.rstrip('\r\n').rsplit(', ')
169 self.add_sample(p_time, int(time), int(value))
170 p_time = int(time)
171
172 def add_sample(self, start, end, value):
173 sample = Sample(ctx, start, end, value)
174 if not self.last or self.last.end < end:
175 self.last = sample
176 self.samples.append(sample)
177
8c4693e2
BE
178 def get_samples(self, start, end):
179 sample_list = []
180 for s in self.samples:
181 if s.start >= start and s.end <= end:
182 sample_list.append(s)
183 return sample_list
184
34831ee3
MN
185 def get_value(self, start, end):
186 value = 0
187 for sample in self.samples:
188 value += sample.get_contribution(start, end)
189 return value
190
9bae2522 191class Sample(object):
34831ee3
MN
192 def __init__(self, ctx, start, end, value):
193 self.ctx = ctx
194 self.start = start
195 self.end = end
196 self.value = value
197
198 def get_contribution(self, start, end):
199 # short circuit if not within the bound
200 if (end < self.start or start > self.end):
201 return 0
202
203 sbound = self.start if start < self.start else start
204 ebound = self.end if end > self.end else end
205 ratio = float(ebound-sbound) / (end-start)
206 return self.value*ratio/ctx.divisor
207
208
209if __name__ == '__main__':
210 ctx = parse_args()
211 series = []
212 for fn in ctx.FILE:
213 series.append(TimeSeries(ctx, fn))
214 if ctx.sum:
215 print_sums(ctx, series)
216 elif ctx.average:
217 print_averages(ctx, series)
218 elif ctx.full:
219 print_full(ctx, series)
8c4693e2
BE
220 elif ctx.allstats:
221 print_all_stats(ctx, series)
34831ee3
MN
222 else:
223 print_default(ctx, series)