Commit | Line | Data |
---|---|---|
642483db | 1 | #!/usr/bin/python |
34831ee3 MN |
2 | # |
3 | # fiologparser.py | |
4 | # | |
5 | # This tool lets you parse multiple fio log files and look at interaval | |
6 | # statistics even when samples are non-uniform. For instance: | |
7 | # | |
8 | # fiologparser.py -s *bw* | |
9 | # | |
10 | # to see per-interval sums for all bandwidth logs or: | |
11 | # | |
12 | # fiologparser.py -a *clat* | |
13 | # | |
14 | # to see per-interval average completion latency. | |
15 | ||
16 | import argparse | |
8c4693e2 BE |
17 | import numpy |
18 | import scipy | |
34831ee3 MN |
19 | |
20 | def parse_args(): | |
21 | parser = argparse.ArgumentParser() | |
22 | parser.add_argument('-i', '--interval', required=False, type=int, default=1000, help='interval of time in seconds.') | |
23 | parser.add_argument('-d', '--divisor', required=False, type=int, default=1, help='divide the results by this value.') | |
24 | parser.add_argument('-f', '--full', dest='full', action='store_true', default=False, help='print full output.') | |
8c4693e2 BE |
25 | parser.add_argument('-A', '--all', dest='allstats', action='store_true', default=False, |
26 | help='print all stats for each interval.') | |
34831ee3 MN |
27 | parser.add_argument('-a', '--average', dest='average', action='store_true', default=False, help='print the average for each interval.') |
28 | parser.add_argument('-s', '--sum', dest='sum', action='store_true', default=False, help='print the sum for each interval.') | |
29 | parser.add_argument("FILE", help="collectl log output files to parse", nargs="+") | |
30 | args = parser.parse_args() | |
31 | ||
32 | return args | |
33 | ||
34 | def get_ftime(series): | |
35 | ftime = 0 | |
36 | for ts in series: | |
37 | if ftime == 0 or ts.last.end < ftime: | |
38 | ftime = ts.last.end | |
39 | return ftime | |
40 | ||
41 | def print_full(ctx, series): | |
42 | ftime = get_ftime(series) | |
43 | start = 0 | |
44 | end = ctx.interval | |
45 | ||
46 | while (start < ftime): | |
47 | end = ftime if ftime < end else end | |
48 | results = [ts.get_value(start, end) for ts in series] | |
49 | print "%s, %s" % (end, ', '.join(["%0.3f" % i for i in results])) | |
50 | start += ctx.interval | |
51 | end += ctx.interval | |
52 | ||
53 | def print_sums(ctx, series): | |
54 | ftime = get_ftime(series) | |
55 | start = 0 | |
56 | end = ctx.interval | |
57 | ||
58 | while (start < ftime): | |
59 | end = ftime if ftime < end else end | |
60 | results = [ts.get_value(start, end) for ts in series] | |
61 | print "%s, %0.3f" % (end, sum(results)) | |
62 | start += ctx.interval | |
63 | end += ctx.interval | |
64 | ||
65 | def print_averages(ctx, series): | |
66 | ftime = get_ftime(series) | |
67 | start = 0 | |
68 | end = ctx.interval | |
69 | ||
70 | while (start < ftime): | |
71 | end = ftime if ftime < end else end | |
72 | results = [ts.get_value(start, end) for ts in series] | |
73 | print "%s, %0.3f" % (end, float(sum(results))/len(results)) | |
74 | start += ctx.interval | |
75 | end += ctx.interval | |
76 | ||
8c4693e2 BE |
77 | # FIXME: this routine is computationally inefficient |
78 | # and has O(N^2) behavior | |
79 | # it would be better to make one pass through samples | |
80 | # to segment them into a series of time intervals, and | |
81 | # then compute stats on each time interval instead. | |
82 | # to debug this routine, use | |
83 | # # sort -n -t ',' -k 2 small.log | |
84 | # on your input. | |
85 | # Sometimes scipy interpolates between two values to get a percentile | |
86 | ||
87 | def my_extend( vlist, val ): | |
88 | vlist.extend(val) | |
89 | return vlist | |
90 | ||
91 | array_collapser = lambda vlist, val: my_extend(vlist, val) | |
92 | ||
93 | def print_all_stats(ctx, series): | |
94 | ftime = get_ftime(series) | |
95 | start = 0 | |
96 | end = ctx.interval | |
97 | print('start-time, samples, min, avg, median, 90%, 95%, 99%, max') | |
98 | while (start < ftime): # for each time interval | |
99 | end = ftime if ftime < end else end | |
100 | sample_arrays = [ s.get_samples(start, end) for s in series ] | |
101 | samplevalue_arrays = [] | |
102 | for sample_array in sample_arrays: | |
103 | samplevalue_arrays.append( | |
104 | [ sample.value for sample in sample_array ] ) | |
105 | #print('samplevalue_arrays len: %d' % len(samplevalue_arrays)) | |
106 | #print('samplevalue_arrays elements len: ' + \ | |
107 | #str(map( lambda l: len(l), samplevalue_arrays))) | |
108 | # collapse list of lists of sample values into list of sample values | |
109 | samplevalues = reduce( array_collapser, samplevalue_arrays, [] ) | |
110 | #print('samplevalues: ' + str(sorted(samplevalues))) | |
111 | # compute all stats and print them | |
112 | myarray = scipy.fromiter(samplevalues, float) | |
113 | mymin = scipy.amin(myarray) | |
114 | myavg = scipy.average(myarray) | |
115 | mymedian = scipy.median(myarray) | |
116 | my90th = scipy.percentile(myarray, 90) | |
117 | my95th = scipy.percentile(myarray, 95) | |
118 | my99th = scipy.percentile(myarray, 99) | |
119 | mymax = scipy.amax(myarray) | |
120 | print( '%f, %d, %f, %f, %f, %f, %f, %f, %f' % ( | |
121 | start, len(samplevalues), | |
122 | mymin, myavg, mymedian, my90th, my95th, my99th, mymax)) | |
123 | ||
124 | # advance to next interval | |
125 | start += ctx.interval | |
126 | end += ctx.interval | |
127 | ||
34831ee3 MN |
128 | |
129 | def print_default(ctx, series): | |
130 | ftime = get_ftime(series) | |
131 | start = 0 | |
132 | end = ctx.interval | |
133 | averages = [] | |
134 | weights = [] | |
135 | ||
136 | while (start < ftime): | |
137 | end = ftime if ftime < end else end | |
138 | results = [ts.get_value(start, end) for ts in series] | |
139 | averages.append(sum(results)) | |
140 | weights.append(end-start) | |
141 | start += ctx.interval | |
142 | end += ctx.interval | |
143 | ||
144 | total = 0 | |
145 | for i in xrange(0, len(averages)): | |
146 | total += averages[i]*weights[i] | |
147 | print '%0.3f' % (total/sum(weights)) | |
148 | ||
149 | class TimeSeries(): | |
150 | def __init__(self, ctx, fn): | |
151 | self.ctx = ctx | |
152 | self.last = None | |
153 | self.samples = [] | |
154 | self.read_data(fn) | |
155 | ||
156 | def read_data(self, fn): | |
157 | f = open(fn, 'r') | |
158 | p_time = 0 | |
159 | for line in f: | |
160 | (time, value, foo, bar) = line.rstrip('\r\n').rsplit(', ') | |
161 | self.add_sample(p_time, int(time), int(value)) | |
162 | p_time = int(time) | |
163 | ||
164 | def add_sample(self, start, end, value): | |
165 | sample = Sample(ctx, start, end, value) | |
166 | if not self.last or self.last.end < end: | |
167 | self.last = sample | |
168 | self.samples.append(sample) | |
169 | ||
8c4693e2 BE |
170 | def get_samples(self, start, end): |
171 | sample_list = [] | |
172 | for s in self.samples: | |
173 | if s.start >= start and s.end <= end: | |
174 | sample_list.append(s) | |
175 | return sample_list | |
176 | ||
34831ee3 MN |
177 | def get_value(self, start, end): |
178 | value = 0 | |
179 | for sample in self.samples: | |
180 | value += sample.get_contribution(start, end) | |
181 | return value | |
182 | ||
183 | class Sample(): | |
184 | def __init__(self, ctx, start, end, value): | |
185 | self.ctx = ctx | |
186 | self.start = start | |
187 | self.end = end | |
188 | self.value = value | |
189 | ||
190 | def get_contribution(self, start, end): | |
191 | # short circuit if not within the bound | |
192 | if (end < self.start or start > self.end): | |
193 | return 0 | |
194 | ||
195 | sbound = self.start if start < self.start else start | |
196 | ebound = self.end if end > self.end else end | |
197 | ratio = float(ebound-sbound) / (end-start) | |
198 | return self.value*ratio/ctx.divisor | |
199 | ||
200 | ||
201 | if __name__ == '__main__': | |
202 | ctx = parse_args() | |
203 | series = [] | |
204 | for fn in ctx.FILE: | |
205 | series.append(TimeSeries(ctx, fn)) | |
206 | if ctx.sum: | |
207 | print_sums(ctx, series) | |
208 | elif ctx.average: | |
209 | print_averages(ctx, series) | |
210 | elif ctx.full: | |
211 | print_full(ctx, series) | |
8c4693e2 BE |
212 | elif ctx.allstats: |
213 | print_all_stats(ctx, series) | |
34831ee3 MN |
214 | else: |
215 | print_default(ctx, series) | |
216 |