Commit | Line | Data |
---|---|---|
642483db | 1 | #!/usr/bin/python |
34831ee3 MN |
2 | # |
3 | # fiologparser.py | |
4 | # | |
5 | # This tool lets you parse multiple fio log files and look at interaval | |
6 | # statistics even when samples are non-uniform. For instance: | |
7 | # | |
8 | # fiologparser.py -s *bw* | |
9 | # | |
10 | # to see per-interval sums for all bandwidth logs or: | |
11 | # | |
12 | # fiologparser.py -a *clat* | |
13 | # | |
14 | # to see per-interval average completion latency. | |
15 | ||
16 | import argparse | |
f4042e41 | 17 | import math |
34831ee3 MN |
18 | |
19 | def parse_args(): | |
20 | parser = argparse.ArgumentParser() | |
21 | parser.add_argument('-i', '--interval', required=False, type=int, default=1000, help='interval of time in seconds.') | |
22 | parser.add_argument('-d', '--divisor', required=False, type=int, default=1, help='divide the results by this value.') | |
23 | parser.add_argument('-f', '--full', dest='full', action='store_true', default=False, help='print full output.') | |
8c4693e2 BE |
24 | parser.add_argument('-A', '--all', dest='allstats', action='store_true', default=False, |
25 | help='print all stats for each interval.') | |
34831ee3 MN |
26 | parser.add_argument('-a', '--average', dest='average', action='store_true', default=False, help='print the average for each interval.') |
27 | parser.add_argument('-s', '--sum', dest='sum', action='store_true', default=False, help='print the sum for each interval.') | |
28 | parser.add_argument("FILE", help="collectl log output files to parse", nargs="+") | |
29 | args = parser.parse_args() | |
30 | ||
31 | return args | |
32 | ||
33 | def get_ftime(series): | |
34 | ftime = 0 | |
35 | for ts in series: | |
36 | if ftime == 0 or ts.last.end < ftime: | |
37 | ftime = ts.last.end | |
38 | return ftime | |
39 | ||
40 | def print_full(ctx, series): | |
41 | ftime = get_ftime(series) | |
42 | start = 0 | |
43 | end = ctx.interval | |
44 | ||
45 | while (start < ftime): | |
46 | end = ftime if ftime < end else end | |
47 | results = [ts.get_value(start, end) for ts in series] | |
48 | print "%s, %s" % (end, ', '.join(["%0.3f" % i for i in results])) | |
49 | start += ctx.interval | |
50 | end += ctx.interval | |
51 | ||
52 | def print_sums(ctx, series): | |
53 | ftime = get_ftime(series) | |
54 | start = 0 | |
55 | end = ctx.interval | |
56 | ||
57 | while (start < ftime): | |
58 | end = ftime if ftime < end else end | |
59 | results = [ts.get_value(start, end) for ts in series] | |
60 | print "%s, %0.3f" % (end, sum(results)) | |
61 | start += ctx.interval | |
62 | end += ctx.interval | |
63 | ||
64 | def print_averages(ctx, series): | |
65 | ftime = get_ftime(series) | |
66 | start = 0 | |
67 | end = ctx.interval | |
68 | ||
69 | while (start < ftime): | |
70 | end = ftime if ftime < end else end | |
71 | results = [ts.get_value(start, end) for ts in series] | |
72 | print "%s, %0.3f" % (end, float(sum(results))/len(results)) | |
73 | start += ctx.interval | |
74 | end += ctx.interval | |
75 | ||
8c4693e2 BE |
76 | # FIXME: this routine is computationally inefficient |
77 | # and has O(N^2) behavior | |
78 | # it would be better to make one pass through samples | |
79 | # to segment them into a series of time intervals, and | |
80 | # then compute stats on each time interval instead. | |
81 | # to debug this routine, use | |
82 | # # sort -n -t ',' -k 2 small.log | |
83 | # on your input. | |
8c4693e2 BE |
84 | |
85 | def my_extend( vlist, val ): | |
86 | vlist.extend(val) | |
87 | return vlist | |
88 | ||
89 | array_collapser = lambda vlist, val: my_extend(vlist, val) | |
90 | ||
91 | def print_all_stats(ctx, series): | |
92 | ftime = get_ftime(series) | |
93 | start = 0 | |
94 | end = ctx.interval | |
95 | print('start-time, samples, min, avg, median, 90%, 95%, 99%, max') | |
96 | while (start < ftime): # for each time interval | |
97 | end = ftime if ftime < end else end | |
98 | sample_arrays = [ s.get_samples(start, end) for s in series ] | |
99 | samplevalue_arrays = [] | |
100 | for sample_array in sample_arrays: | |
101 | samplevalue_arrays.append( | |
102 | [ sample.value for sample in sample_array ] ) | |
8c4693e2 BE |
103 | # collapse list of lists of sample values into list of sample values |
104 | samplevalues = reduce( array_collapser, samplevalue_arrays, [] ) | |
8c4693e2 | 105 | # compute all stats and print them |
f4042e41 MN |
106 | mymin = min(samplevalues) |
107 | myavg = sum(samplevalues) / float(len(samplevalues)) | |
108 | mymedian = median(samplevalues) | |
109 | my90th = percentile(samplevalues, 0.90) | |
110 | my95th = percentile(samplevalues, 0.95) | |
111 | my99th = percentile(samplevalues, 0.99) | |
112 | mymax = max(samplevalues) | |
8c4693e2 BE |
113 | print( '%f, %d, %f, %f, %f, %f, %f, %f, %f' % ( |
114 | start, len(samplevalues), | |
115 | mymin, myavg, mymedian, my90th, my95th, my99th, mymax)) | |
116 | ||
117 | # advance to next interval | |
118 | start += ctx.interval | |
119 | end += ctx.interval | |
120 | ||
f4042e41 MN |
121 | def median(values): |
122 | s=sorted(values) | |
123 | return float(s[(len(s)-1)/2]+s[(len(s)/2)])/2 | |
124 | ||
125 | def percentile(values, p): | |
126 | s = sorted(values) | |
127 | k = (len(s)-1) * p | |
128 | f = math.floor(k) | |
129 | c = math.ceil(k) | |
130 | if f == c: | |
131 | return s[int(k)] | |
132 | return (s[int(f)] * (c-k)) + (s[int(c)] * (k-f)) | |
34831ee3 MN |
133 | |
134 | def print_default(ctx, series): | |
135 | ftime = get_ftime(series) | |
136 | start = 0 | |
137 | end = ctx.interval | |
138 | averages = [] | |
139 | weights = [] | |
140 | ||
141 | while (start < ftime): | |
142 | end = ftime if ftime < end else end | |
143 | results = [ts.get_value(start, end) for ts in series] | |
144 | averages.append(sum(results)) | |
145 | weights.append(end-start) | |
146 | start += ctx.interval | |
147 | end += ctx.interval | |
148 | ||
149 | total = 0 | |
150 | for i in xrange(0, len(averages)): | |
151 | total += averages[i]*weights[i] | |
152 | print '%0.3f' % (total/sum(weights)) | |
153 | ||
154 | class TimeSeries(): | |
155 | def __init__(self, ctx, fn): | |
156 | self.ctx = ctx | |
157 | self.last = None | |
158 | self.samples = [] | |
159 | self.read_data(fn) | |
160 | ||
161 | def read_data(self, fn): | |
162 | f = open(fn, 'r') | |
163 | p_time = 0 | |
164 | for line in f: | |
165 | (time, value, foo, bar) = line.rstrip('\r\n').rsplit(', ') | |
166 | self.add_sample(p_time, int(time), int(value)) | |
167 | p_time = int(time) | |
168 | ||
169 | def add_sample(self, start, end, value): | |
170 | sample = Sample(ctx, start, end, value) | |
171 | if not self.last or self.last.end < end: | |
172 | self.last = sample | |
173 | self.samples.append(sample) | |
174 | ||
8c4693e2 BE |
175 | def get_samples(self, start, end): |
176 | sample_list = [] | |
177 | for s in self.samples: | |
178 | if s.start >= start and s.end <= end: | |
179 | sample_list.append(s) | |
180 | return sample_list | |
181 | ||
34831ee3 MN |
182 | def get_value(self, start, end): |
183 | value = 0 | |
184 | for sample in self.samples: | |
185 | value += sample.get_contribution(start, end) | |
186 | return value | |
187 | ||
188 | class Sample(): | |
189 | def __init__(self, ctx, start, end, value): | |
190 | self.ctx = ctx | |
191 | self.start = start | |
192 | self.end = end | |
193 | self.value = value | |
194 | ||
195 | def get_contribution(self, start, end): | |
196 | # short circuit if not within the bound | |
197 | if (end < self.start or start > self.end): | |
198 | return 0 | |
199 | ||
200 | sbound = self.start if start < self.start else start | |
201 | ebound = self.end if end > self.end else end | |
202 | ratio = float(ebound-sbound) / (end-start) | |
203 | return self.value*ratio/ctx.divisor | |
204 | ||
205 | ||
206 | if __name__ == '__main__': | |
207 | ctx = parse_args() | |
208 | series = [] | |
209 | for fn in ctx.FILE: | |
210 | series.append(TimeSeries(ctx, fn)) | |
211 | if ctx.sum: | |
212 | print_sums(ctx, series) | |
213 | elif ctx.average: | |
214 | print_averages(ctx, series) | |
215 | elif ctx.full: | |
216 | print_full(ctx, series) | |
8c4693e2 BE |
217 | elif ctx.allstats: |
218 | print_all_stats(ctx, series) | |
34831ee3 MN |
219 | else: |
220 | print_default(ctx, series) | |
221 |