Commit | Line | Data |
---|---|---|
8629f5f5 | 1 | #!/usr/bin/env python3 |
5eac3b00 | 2 | # Note: this script is python2 and python 3 compatible. |
34831ee3 MN |
3 | # |
4 | # fiologparser.py | |
5 | # | |
6 | # This tool lets you parse multiple fio log files and look at interaval | |
7 | # statistics even when samples are non-uniform. For instance: | |
8 | # | |
9 | # fiologparser.py -s *bw* | |
10 | # | |
11 | # to see per-interval sums for all bandwidth logs or: | |
12 | # | |
13 | # fiologparser.py -a *clat* | |
14 | # | |
15 | # to see per-interval average completion latency. | |
16 | ||
5eac3b00 BD |
17 | from __future__ import absolute_import |
18 | from __future__ import print_function | |
34831ee3 | 19 | import argparse |
f4042e41 | 20 | import math |
8629f5f5 | 21 | from functools import reduce |
34831ee3 MN |
22 | |
23 | def parse_args(): | |
24 | parser = argparse.ArgumentParser() | |
25 | parser.add_argument('-i', '--interval', required=False, type=int, default=1000, help='interval of time in seconds.') | |
26 | parser.add_argument('-d', '--divisor', required=False, type=int, default=1, help='divide the results by this value.') | |
27 | parser.add_argument('-f', '--full', dest='full', action='store_true', default=False, help='print full output.') | |
8c4693e2 BE |
28 | parser.add_argument('-A', '--all', dest='allstats', action='store_true', default=False, |
29 | help='print all stats for each interval.') | |
34831ee3 MN |
30 | parser.add_argument('-a', '--average', dest='average', action='store_true', default=False, help='print the average for each interval.') |
31 | parser.add_argument('-s', '--sum', dest='sum', action='store_true', default=False, help='print the sum for each interval.') | |
32 | parser.add_argument("FILE", help="collectl log output files to parse", nargs="+") | |
33 | args = parser.parse_args() | |
34 | ||
35 | return args | |
36 | ||
37 | def get_ftime(series): | |
38 | ftime = 0 | |
39 | for ts in series: | |
40 | if ftime == 0 or ts.last.end < ftime: | |
41 | ftime = ts.last.end | |
42 | return ftime | |
43 | ||
44 | def print_full(ctx, series): | |
45 | ftime = get_ftime(series) | |
46 | start = 0 | |
47 | end = ctx.interval | |
48 | ||
49 | while (start < ftime): | |
50 | end = ftime if ftime < end else end | |
51 | results = [ts.get_value(start, end) for ts in series] | |
9bae2522 | 52 | print("%s, %s" % (end, ', '.join(["%0.3f" % i for i in results]))) |
34831ee3 MN |
53 | start += ctx.interval |
54 | end += ctx.interval | |
55 | ||
56 | def print_sums(ctx, series): | |
57 | ftime = get_ftime(series) | |
58 | start = 0 | |
59 | end = ctx.interval | |
60 | ||
61 | while (start < ftime): | |
62 | end = ftime if ftime < end else end | |
63 | results = [ts.get_value(start, end) for ts in series] | |
9bae2522 | 64 | print("%s, %0.3f" % (end, sum(results))) |
34831ee3 MN |
65 | start += ctx.interval |
66 | end += ctx.interval | |
67 | ||
68 | def print_averages(ctx, series): | |
69 | ftime = get_ftime(series) | |
70 | start = 0 | |
71 | end = ctx.interval | |
72 | ||
73 | while (start < ftime): | |
74 | end = ftime if ftime < end else end | |
75 | results = [ts.get_value(start, end) for ts in series] | |
9bae2522 | 76 | print("%s, %0.3f" % (end, float(sum(results))/len(results))) |
34831ee3 MN |
77 | start += ctx.interval |
78 | end += ctx.interval | |
79 | ||
8c4693e2 BE |
80 | # FIXME: this routine is computationally inefficient |
81 | # and has O(N^2) behavior | |
82 | # it would be better to make one pass through samples | |
83 | # to segment them into a series of time intervals, and | |
84 | # then compute stats on each time interval instead. | |
85 | # to debug this routine, use | |
86 | # # sort -n -t ',' -k 2 small.log | |
87 | # on your input. | |
8c4693e2 BE |
88 | |
89 | def my_extend( vlist, val ): | |
90 | vlist.extend(val) | |
91 | return vlist | |
92 | ||
93 | array_collapser = lambda vlist, val: my_extend(vlist, val) | |
94 | ||
95 | def print_all_stats(ctx, series): | |
96 | ftime = get_ftime(series) | |
97 | start = 0 | |
98 | end = ctx.interval | |
99 | print('start-time, samples, min, avg, median, 90%, 95%, 99%, max') | |
100 | while (start < ftime): # for each time interval | |
101 | end = ftime if ftime < end else end | |
102 | sample_arrays = [ s.get_samples(start, end) for s in series ] | |
103 | samplevalue_arrays = [] | |
104 | for sample_array in sample_arrays: | |
105 | samplevalue_arrays.append( | |
106 | [ sample.value for sample in sample_array ] ) | |
8c4693e2 BE |
107 | # collapse list of lists of sample values into list of sample values |
108 | samplevalues = reduce( array_collapser, samplevalue_arrays, [] ) | |
8c4693e2 | 109 | # compute all stats and print them |
f4042e41 MN |
110 | mymin = min(samplevalues) |
111 | myavg = sum(samplevalues) / float(len(samplevalues)) | |
112 | mymedian = median(samplevalues) | |
113 | my90th = percentile(samplevalues, 0.90) | |
114 | my95th = percentile(samplevalues, 0.95) | |
115 | my99th = percentile(samplevalues, 0.99) | |
116 | mymax = max(samplevalues) | |
8c4693e2 BE |
117 | print( '%f, %d, %f, %f, %f, %f, %f, %f, %f' % ( |
118 | start, len(samplevalues), | |
119 | mymin, myavg, mymedian, my90th, my95th, my99th, mymax)) | |
120 | ||
121 | # advance to next interval | |
122 | start += ctx.interval | |
123 | end += ctx.interval | |
124 | ||
f4042e41 MN |
125 | def median(values): |
126 | s=sorted(values) | |
127 | return float(s[(len(s)-1)/2]+s[(len(s)/2)])/2 | |
128 | ||
129 | def percentile(values, p): | |
130 | s = sorted(values) | |
131 | k = (len(s)-1) * p | |
132 | f = math.floor(k) | |
133 | c = math.ceil(k) | |
134 | if f == c: | |
135 | return s[int(k)] | |
136 | return (s[int(f)] * (c-k)) + (s[int(c)] * (k-f)) | |
34831ee3 MN |
137 | |
138 | def print_default(ctx, series): | |
139 | ftime = get_ftime(series) | |
140 | start = 0 | |
141 | end = ctx.interval | |
142 | averages = [] | |
143 | weights = [] | |
144 | ||
145 | while (start < ftime): | |
146 | end = ftime if ftime < end else end | |
147 | results = [ts.get_value(start, end) for ts in series] | |
148 | averages.append(sum(results)) | |
149 | weights.append(end-start) | |
150 | start += ctx.interval | |
151 | end += ctx.interval | |
152 | ||
153 | total = 0 | |
9bae2522 | 154 | for i in range(0, len(averages)): |
34831ee3 | 155 | total += averages[i]*weights[i] |
9bae2522 | 156 | print('%0.3f' % (total/sum(weights))) |
34831ee3 | 157 | |
9bae2522 | 158 | class TimeSeries(object): |
34831ee3 MN |
159 | def __init__(self, ctx, fn): |
160 | self.ctx = ctx | |
161 | self.last = None | |
162 | self.samples = [] | |
163 | self.read_data(fn) | |
164 | ||
165 | def read_data(self, fn): | |
166 | f = open(fn, 'r') | |
167 | p_time = 0 | |
168 | for line in f: | |
169 | (time, value, foo, bar) = line.rstrip('\r\n').rsplit(', ') | |
170 | self.add_sample(p_time, int(time), int(value)) | |
171 | p_time = int(time) | |
172 | ||
173 | def add_sample(self, start, end, value): | |
174 | sample = Sample(ctx, start, end, value) | |
175 | if not self.last or self.last.end < end: | |
176 | self.last = sample | |
177 | self.samples.append(sample) | |
178 | ||
8c4693e2 BE |
179 | def get_samples(self, start, end): |
180 | sample_list = [] | |
181 | for s in self.samples: | |
182 | if s.start >= start and s.end <= end: | |
183 | sample_list.append(s) | |
184 | return sample_list | |
185 | ||
34831ee3 MN |
186 | def get_value(self, start, end): |
187 | value = 0 | |
188 | for sample in self.samples: | |
189 | value += sample.get_contribution(start, end) | |
190 | return value | |
191 | ||
9bae2522 | 192 | class Sample(object): |
34831ee3 MN |
193 | def __init__(self, ctx, start, end, value): |
194 | self.ctx = ctx | |
195 | self.start = start | |
196 | self.end = end | |
197 | self.value = value | |
198 | ||
199 | def get_contribution(self, start, end): | |
200 | # short circuit if not within the bound | |
201 | if (end < self.start or start > self.end): | |
202 | return 0 | |
203 | ||
204 | sbound = self.start if start < self.start else start | |
205 | ebound = self.end if end > self.end else end | |
206 | ratio = float(ebound-sbound) / (end-start) | |
207 | return self.value*ratio/ctx.divisor | |
208 | ||
209 | ||
210 | if __name__ == '__main__': | |
211 | ctx = parse_args() | |
212 | series = [] | |
213 | for fn in ctx.FILE: | |
214 | series.append(TimeSeries(ctx, fn)) | |
215 | if ctx.sum: | |
216 | print_sums(ctx, series) | |
217 | elif ctx.average: | |
218 | print_averages(ctx, series) | |
219 | elif ctx.full: | |
220 | print_full(ctx, series) | |
8c4693e2 BE |
221 | elif ctx.allstats: |
222 | print_all_stats(ctx, series) | |
34831ee3 MN |
223 | else: |
224 | print_default(ctx, series) |