Commit | Line | Data |
---|---|---|
60023ade | 1 | #!/usr/bin/python2.7 |
5eac3b00 | 2 | # Note: this script is python2 and python 3 compatible. |
34831ee3 MN |
3 | # |
4 | # fiologparser.py | |
5 | # | |
6 | # This tool lets you parse multiple fio log files and look at interaval | |
7 | # statistics even when samples are non-uniform. For instance: | |
8 | # | |
9 | # fiologparser.py -s *bw* | |
10 | # | |
11 | # to see per-interval sums for all bandwidth logs or: | |
12 | # | |
13 | # fiologparser.py -a *clat* | |
14 | # | |
15 | # to see per-interval average completion latency. | |
16 | ||
5eac3b00 BD |
17 | from __future__ import absolute_import |
18 | from __future__ import print_function | |
34831ee3 | 19 | import argparse |
f4042e41 | 20 | import math |
34831ee3 MN |
21 | |
22 | def parse_args(): | |
23 | parser = argparse.ArgumentParser() | |
24 | parser.add_argument('-i', '--interval', required=False, type=int, default=1000, help='interval of time in seconds.') | |
25 | parser.add_argument('-d', '--divisor', required=False, type=int, default=1, help='divide the results by this value.') | |
26 | parser.add_argument('-f', '--full', dest='full', action='store_true', default=False, help='print full output.') | |
8c4693e2 BE |
27 | parser.add_argument('-A', '--all', dest='allstats', action='store_true', default=False, |
28 | help='print all stats for each interval.') | |
34831ee3 MN |
29 | parser.add_argument('-a', '--average', dest='average', action='store_true', default=False, help='print the average for each interval.') |
30 | parser.add_argument('-s', '--sum', dest='sum', action='store_true', default=False, help='print the sum for each interval.') | |
31 | parser.add_argument("FILE", help="collectl log output files to parse", nargs="+") | |
32 | args = parser.parse_args() | |
33 | ||
34 | return args | |
35 | ||
36 | def get_ftime(series): | |
37 | ftime = 0 | |
38 | for ts in series: | |
39 | if ftime == 0 or ts.last.end < ftime: | |
40 | ftime = ts.last.end | |
41 | return ftime | |
42 | ||
43 | def print_full(ctx, series): | |
44 | ftime = get_ftime(series) | |
45 | start = 0 | |
46 | end = ctx.interval | |
47 | ||
48 | while (start < ftime): | |
49 | end = ftime if ftime < end else end | |
50 | results = [ts.get_value(start, end) for ts in series] | |
9bae2522 | 51 | print("%s, %s" % (end, ', '.join(["%0.3f" % i for i in results]))) |
34831ee3 MN |
52 | start += ctx.interval |
53 | end += ctx.interval | |
54 | ||
55 | def print_sums(ctx, series): | |
56 | ftime = get_ftime(series) | |
57 | start = 0 | |
58 | end = ctx.interval | |
59 | ||
60 | while (start < ftime): | |
61 | end = ftime if ftime < end else end | |
62 | results = [ts.get_value(start, end) for ts in series] | |
9bae2522 | 63 | print("%s, %0.3f" % (end, sum(results))) |
34831ee3 MN |
64 | start += ctx.interval |
65 | end += ctx.interval | |
66 | ||
67 | def print_averages(ctx, series): | |
68 | ftime = get_ftime(series) | |
69 | start = 0 | |
70 | end = ctx.interval | |
71 | ||
72 | while (start < ftime): | |
73 | end = ftime if ftime < end else end | |
74 | results = [ts.get_value(start, end) for ts in series] | |
9bae2522 | 75 | print("%s, %0.3f" % (end, float(sum(results))/len(results))) |
34831ee3 MN |
76 | start += ctx.interval |
77 | end += ctx.interval | |
78 | ||
8c4693e2 BE |
79 | # FIXME: this routine is computationally inefficient |
80 | # and has O(N^2) behavior | |
81 | # it would be better to make one pass through samples | |
82 | # to segment them into a series of time intervals, and | |
83 | # then compute stats on each time interval instead. | |
84 | # to debug this routine, use | |
85 | # # sort -n -t ',' -k 2 small.log | |
86 | # on your input. | |
8c4693e2 BE |
87 | |
88 | def my_extend( vlist, val ): | |
89 | vlist.extend(val) | |
90 | return vlist | |
91 | ||
92 | array_collapser = lambda vlist, val: my_extend(vlist, val) | |
93 | ||
94 | def print_all_stats(ctx, series): | |
95 | ftime = get_ftime(series) | |
96 | start = 0 | |
97 | end = ctx.interval | |
98 | print('start-time, samples, min, avg, median, 90%, 95%, 99%, max') | |
99 | while (start < ftime): # for each time interval | |
100 | end = ftime if ftime < end else end | |
101 | sample_arrays = [ s.get_samples(start, end) for s in series ] | |
102 | samplevalue_arrays = [] | |
103 | for sample_array in sample_arrays: | |
104 | samplevalue_arrays.append( | |
105 | [ sample.value for sample in sample_array ] ) | |
8c4693e2 BE |
106 | # collapse list of lists of sample values into list of sample values |
107 | samplevalues = reduce( array_collapser, samplevalue_arrays, [] ) | |
8c4693e2 | 108 | # compute all stats and print them |
f4042e41 MN |
109 | mymin = min(samplevalues) |
110 | myavg = sum(samplevalues) / float(len(samplevalues)) | |
111 | mymedian = median(samplevalues) | |
112 | my90th = percentile(samplevalues, 0.90) | |
113 | my95th = percentile(samplevalues, 0.95) | |
114 | my99th = percentile(samplevalues, 0.99) | |
115 | mymax = max(samplevalues) | |
8c4693e2 BE |
116 | print( '%f, %d, %f, %f, %f, %f, %f, %f, %f' % ( |
117 | start, len(samplevalues), | |
118 | mymin, myavg, mymedian, my90th, my95th, my99th, mymax)) | |
119 | ||
120 | # advance to next interval | |
121 | start += ctx.interval | |
122 | end += ctx.interval | |
123 | ||
f4042e41 MN |
124 | def median(values): |
125 | s=sorted(values) | |
126 | return float(s[(len(s)-1)/2]+s[(len(s)/2)])/2 | |
127 | ||
128 | def percentile(values, p): | |
129 | s = sorted(values) | |
130 | k = (len(s)-1) * p | |
131 | f = math.floor(k) | |
132 | c = math.ceil(k) | |
133 | if f == c: | |
134 | return s[int(k)] | |
135 | return (s[int(f)] * (c-k)) + (s[int(c)] * (k-f)) | |
34831ee3 MN |
136 | |
137 | def print_default(ctx, series): | |
138 | ftime = get_ftime(series) | |
139 | start = 0 | |
140 | end = ctx.interval | |
141 | averages = [] | |
142 | weights = [] | |
143 | ||
144 | while (start < ftime): | |
145 | end = ftime if ftime < end else end | |
146 | results = [ts.get_value(start, end) for ts in series] | |
147 | averages.append(sum(results)) | |
148 | weights.append(end-start) | |
149 | start += ctx.interval | |
150 | end += ctx.interval | |
151 | ||
152 | total = 0 | |
9bae2522 | 153 | for i in range(0, len(averages)): |
34831ee3 | 154 | total += averages[i]*weights[i] |
9bae2522 | 155 | print('%0.3f' % (total/sum(weights))) |
34831ee3 | 156 | |
9bae2522 | 157 | class TimeSeries(object): |
34831ee3 MN |
158 | def __init__(self, ctx, fn): |
159 | self.ctx = ctx | |
160 | self.last = None | |
161 | self.samples = [] | |
162 | self.read_data(fn) | |
163 | ||
164 | def read_data(self, fn): | |
165 | f = open(fn, 'r') | |
166 | p_time = 0 | |
167 | for line in f: | |
168 | (time, value, foo, bar) = line.rstrip('\r\n').rsplit(', ') | |
169 | self.add_sample(p_time, int(time), int(value)) | |
170 | p_time = int(time) | |
171 | ||
172 | def add_sample(self, start, end, value): | |
173 | sample = Sample(ctx, start, end, value) | |
174 | if not self.last or self.last.end < end: | |
175 | self.last = sample | |
176 | self.samples.append(sample) | |
177 | ||
8c4693e2 BE |
178 | def get_samples(self, start, end): |
179 | sample_list = [] | |
180 | for s in self.samples: | |
181 | if s.start >= start and s.end <= end: | |
182 | sample_list.append(s) | |
183 | return sample_list | |
184 | ||
34831ee3 MN |
185 | def get_value(self, start, end): |
186 | value = 0 | |
187 | for sample in self.samples: | |
188 | value += sample.get_contribution(start, end) | |
189 | return value | |
190 | ||
9bae2522 | 191 | class Sample(object): |
34831ee3 MN |
192 | def __init__(self, ctx, start, end, value): |
193 | self.ctx = ctx | |
194 | self.start = start | |
195 | self.end = end | |
196 | self.value = value | |
197 | ||
198 | def get_contribution(self, start, end): | |
199 | # short circuit if not within the bound | |
200 | if (end < self.start or start > self.end): | |
201 | return 0 | |
202 | ||
203 | sbound = self.start if start < self.start else start | |
204 | ebound = self.end if end > self.end else end | |
205 | ratio = float(ebound-sbound) / (end-start) | |
206 | return self.value*ratio/ctx.divisor | |
207 | ||
208 | ||
209 | if __name__ == '__main__': | |
210 | ctx = parse_args() | |
211 | series = [] | |
212 | for fn in ctx.FILE: | |
213 | series.append(TimeSeries(ctx, fn)) | |
214 | if ctx.sum: | |
215 | print_sums(ctx, series) | |
216 | elif ctx.average: | |
217 | print_averages(ctx, series) | |
218 | elif ctx.full: | |
219 | print_full(ctx, series) | |
8c4693e2 BE |
220 | elif ctx.allstats: |
221 | print_all_stats(ctx, series) | |
34831ee3 MN |
222 | else: |
223 | print_default(ctx, series) |