Merge branch 'master' of https://github.com/celestinechen/fio
[fio.git] / tools / fiologparser.py
CommitLineData
8629f5f5 1#!/usr/bin/env python3
5eac3b00 2# Note: this script is python2 and python 3 compatible.
34831ee3
MN
3#
4# fiologparser.py
5#
6# This tool lets you parse multiple fio log files and look at interaval
7# statistics even when samples are non-uniform. For instance:
8#
9# fiologparser.py -s *bw*
10#
11# to see per-interval sums for all bandwidth logs or:
12#
13# fiologparser.py -a *clat*
14#
15# to see per-interval average completion latency.
16
5eac3b00
BD
17from __future__ import absolute_import
18from __future__ import print_function
34831ee3 19import argparse
f4042e41 20import math
8629f5f5 21from functools import reduce
34831ee3
MN
22
23def parse_args():
24 parser = argparse.ArgumentParser()
25 parser.add_argument('-i', '--interval', required=False, type=int, default=1000, help='interval of time in seconds.')
26 parser.add_argument('-d', '--divisor', required=False, type=int, default=1, help='divide the results by this value.')
27 parser.add_argument('-f', '--full', dest='full', action='store_true', default=False, help='print full output.')
8c4693e2
BE
28 parser.add_argument('-A', '--all', dest='allstats', action='store_true', default=False,
29 help='print all stats for each interval.')
34831ee3
MN
30 parser.add_argument('-a', '--average', dest='average', action='store_true', default=False, help='print the average for each interval.')
31 parser.add_argument('-s', '--sum', dest='sum', action='store_true', default=False, help='print the sum for each interval.')
32 parser.add_argument("FILE", help="collectl log output files to parse", nargs="+")
33 args = parser.parse_args()
34
35 return args
36
37def get_ftime(series):
38 ftime = 0
39 for ts in series:
40 if ftime == 0 or ts.last.end < ftime:
41 ftime = ts.last.end
42 return ftime
43
44def print_full(ctx, series):
45 ftime = get_ftime(series)
46 start = 0
47 end = ctx.interval
48
49 while (start < ftime):
50 end = ftime if ftime < end else end
51 results = [ts.get_value(start, end) for ts in series]
9bae2522 52 print("%s, %s" % (end, ', '.join(["%0.3f" % i for i in results])))
34831ee3
MN
53 start += ctx.interval
54 end += ctx.interval
55
56def print_sums(ctx, series):
57 ftime = get_ftime(series)
58 start = 0
59 end = ctx.interval
60
61 while (start < ftime):
62 end = ftime if ftime < end else end
63 results = [ts.get_value(start, end) for ts in series]
9bae2522 64 print("%s, %0.3f" % (end, sum(results)))
34831ee3
MN
65 start += ctx.interval
66 end += ctx.interval
67
68def print_averages(ctx, series):
69 ftime = get_ftime(series)
70 start = 0
71 end = ctx.interval
72
73 while (start < ftime):
74 end = ftime if ftime < end else end
75 results = [ts.get_value(start, end) for ts in series]
9bae2522 76 print("%s, %0.3f" % (end, float(sum(results))/len(results)))
34831ee3
MN
77 start += ctx.interval
78 end += ctx.interval
79
8c4693e2
BE
80# FIXME: this routine is computationally inefficient
81# and has O(N^2) behavior
82# it would be better to make one pass through samples
83# to segment them into a series of time intervals, and
84# then compute stats on each time interval instead.
85# to debug this routine, use
86# # sort -n -t ',' -k 2 small.log
87# on your input.
8c4693e2
BE
88
89def my_extend( vlist, val ):
90 vlist.extend(val)
91 return vlist
92
93array_collapser = lambda vlist, val: my_extend(vlist, val)
94
95def print_all_stats(ctx, series):
96 ftime = get_ftime(series)
97 start = 0
98 end = ctx.interval
99 print('start-time, samples, min, avg, median, 90%, 95%, 99%, max')
100 while (start < ftime): # for each time interval
101 end = ftime if ftime < end else end
102 sample_arrays = [ s.get_samples(start, end) for s in series ]
103 samplevalue_arrays = []
104 for sample_array in sample_arrays:
105 samplevalue_arrays.append(
106 [ sample.value for sample in sample_array ] )
8c4693e2
BE
107 # collapse list of lists of sample values into list of sample values
108 samplevalues = reduce( array_collapser, samplevalue_arrays, [] )
8c4693e2 109 # compute all stats and print them
f4042e41
MN
110 mymin = min(samplevalues)
111 myavg = sum(samplevalues) / float(len(samplevalues))
112 mymedian = median(samplevalues)
113 my90th = percentile(samplevalues, 0.90)
114 my95th = percentile(samplevalues, 0.95)
115 my99th = percentile(samplevalues, 0.99)
116 mymax = max(samplevalues)
8c4693e2
BE
117 print( '%f, %d, %f, %f, %f, %f, %f, %f, %f' % (
118 start, len(samplevalues),
119 mymin, myavg, mymedian, my90th, my95th, my99th, mymax))
120
121 # advance to next interval
122 start += ctx.interval
123 end += ctx.interval
124
f4042e41
MN
125def median(values):
126 s=sorted(values)
127 return float(s[(len(s)-1)/2]+s[(len(s)/2)])/2
128
129def percentile(values, p):
130 s = sorted(values)
131 k = (len(s)-1) * p
132 f = math.floor(k)
133 c = math.ceil(k)
134 if f == c:
135 return s[int(k)]
136 return (s[int(f)] * (c-k)) + (s[int(c)] * (k-f))
34831ee3
MN
137
138def print_default(ctx, series):
139 ftime = get_ftime(series)
140 start = 0
141 end = ctx.interval
142 averages = []
143 weights = []
144
145 while (start < ftime):
146 end = ftime if ftime < end else end
147 results = [ts.get_value(start, end) for ts in series]
148 averages.append(sum(results))
149 weights.append(end-start)
150 start += ctx.interval
151 end += ctx.interval
152
153 total = 0
9bae2522 154 for i in range(0, len(averages)):
34831ee3 155 total += averages[i]*weights[i]
9bae2522 156 print('%0.3f' % (total/sum(weights)))
34831ee3 157
9bae2522 158class TimeSeries(object):
34831ee3
MN
159 def __init__(self, ctx, fn):
160 self.ctx = ctx
161 self.last = None
162 self.samples = []
163 self.read_data(fn)
164
165 def read_data(self, fn):
166 f = open(fn, 'r')
167 p_time = 0
168 for line in f:
5f2a90b8 169 (time, value) = line.rstrip('\r\n').rsplit(', ')[:2]
34831ee3
MN
170 self.add_sample(p_time, int(time), int(value))
171 p_time = int(time)
172
173 def add_sample(self, start, end, value):
174 sample = Sample(ctx, start, end, value)
175 if not self.last or self.last.end < end:
176 self.last = sample
177 self.samples.append(sample)
178
8c4693e2
BE
179 def get_samples(self, start, end):
180 sample_list = []
181 for s in self.samples:
182 if s.start >= start and s.end <= end:
183 sample_list.append(s)
184 return sample_list
185
34831ee3
MN
186 def get_value(self, start, end):
187 value = 0
188 for sample in self.samples:
189 value += sample.get_contribution(start, end)
190 return value
191
9bae2522 192class Sample(object):
34831ee3
MN
193 def __init__(self, ctx, start, end, value):
194 self.ctx = ctx
195 self.start = start
196 self.end = end
197 self.value = value
198
199 def get_contribution(self, start, end):
200 # short circuit if not within the bound
201 if (end < self.start or start > self.end):
202 return 0
203
204 sbound = self.start if start < self.start else start
205 ebound = self.end if end > self.end else end
206 ratio = float(ebound-sbound) / (end-start)
207 return self.value*ratio/ctx.divisor
208
209
210if __name__ == '__main__':
211 ctx = parse_args()
212 series = []
213 for fn in ctx.FILE:
214 series.append(TimeSeries(ctx, fn))
215 if ctx.sum:
216 print_sums(ctx, series)
217 elif ctx.average:
218 print_averages(ctx, series)
219 elif ctx.full:
220 print_full(ctx, series)
8c4693e2
BE
221 elif ctx.allstats:
222 print_all_stats(ctx, series)
34831ee3
MN
223 else:
224 print_default(ctx, series)