Commit | Line | Data |
---|---|---|
60023ade | 1 | #!/usr/bin/python2.7 |
1e613c9c KC |
2 | """ |
3 | Utility for converting *_clat_hist* files generated by fio into latency statistics. | |
4 | ||
5 | Example usage: | |
6 | ||
7 | $ fiologparser_hist.py *_clat_hist* | |
8 | end-time, samples, min, avg, median, 90%, 95%, 99%, max | |
9 | 1000, 15, 192, 1678.107, 1788.859, 1856.076, 1880.040, 1899.208, 1888.000 | |
10 | 2000, 43, 152, 1642.368, 1714.099, 1816.659, 1845.552, 1888.131, 1888.000 | |
11 | 4000, 39, 1152, 1546.962, 1545.785, 1627.192, 1640.019, 1691.204, 1744 | |
12 | ... | |
13 | ||
1e613c9c KC |
14 | @author Karl Cronburg <karl.cronburg@gmail.com> |
15 | """ | |
16 | import os | |
17 | import sys | |
18 | import pandas | |
4a11e9d7 | 19 | import re |
1e613c9c KC |
20 | import numpy as np |
21 | ||
4a11e9d7 | 22 | runascmd = False |
23 | ||
1e613c9c KC |
24 | err = sys.stderr.write |
25 | ||
4a11e9d7 | 26 | class HistFileRdr(): |
7c5489c0 | 27 | """ Class to read a hist file line by line, buffering |
4a11e9d7 | 28 | a value array for the latest line, and allowing a preview |
29 | of the next timestamp in next line | |
30 | Note: this does not follow a generator pattern, but must explicitly | |
31 | get next bin array. | |
32 | """ | |
33 | def __init__(self, file): | |
34 | self.fp = open(file, 'r') | |
35 | self.data = self.nextData() | |
7c5489c0 | 36 | |
4a11e9d7 | 37 | def close(self): |
38 | self.fp.close() | |
39 | self.fp = None | |
7c5489c0 | 40 | |
4a11e9d7 | 41 | def nextData(self): |
42 | self.data = None | |
7c5489c0 | 43 | if self.fp: |
4a11e9d7 | 44 | line = self.fp.readline() |
45 | if line == "": | |
46 | self.close() | |
47 | else: | |
48 | self.data = [int(x) for x in line.replace(' ', '').rstrip().split(',')] | |
7c5489c0 | 49 | |
4a11e9d7 | 50 | return self.data |
7c5489c0 | 51 | |
4a11e9d7 | 52 | @property |
53 | def curTS(self): | |
54 | ts = None | |
55 | if self.data: | |
56 | ts = self.data[0] | |
57 | return ts | |
7c5489c0 | 58 | |
4a11e9d7 | 59 | @property |
60 | def curDir(self): | |
61 | d = None | |
62 | if self.data: | |
63 | d = self.data[1] | |
64 | return d | |
7c5489c0 | 65 | |
4a11e9d7 | 66 | @property |
67 | def curBins(self): | |
68 | return self.data[3:] | |
69 | ||
1e613c9c KC |
70 | def weighted_percentile(percs, vs, ws): |
71 | """ Use linear interpolation to calculate the weighted percentile. | |
72 | ||
73 | Value and weight arrays are first sorted by value. The cumulative | |
74 | distribution function (cdf) is then computed, after which np.interp | |
75 | finds the two values closest to our desired weighted percentile(s) | |
76 | and linearly interpolates them. | |
77 | ||
78 | percs :: List of percentiles we want to calculate | |
79 | vs :: Array of values we are computing the percentile of | |
80 | ws :: Array of weights for our corresponding values | |
81 | return :: Array of percentiles | |
82 | """ | |
83 | idx = np.argsort(vs) | |
84 | vs, ws = vs[idx], ws[idx] # weights and values sorted by value | |
85 | cdf = 100 * (ws.cumsum() - ws / 2.0) / ws.sum() | |
86 | return np.interp(percs, cdf, vs) # linear interpolation | |
87 | ||
88 | def weights(start_ts, end_ts, start, end): | |
89 | """ Calculate weights based on fraction of sample falling in the | |
90 | given interval [start,end]. Weights computed using vector / array | |
91 | computation instead of for-loops. | |
4a11e9d7 | 92 | |
1e613c9c KC |
93 | Note that samples with zero time length are effectively ignored |
94 | (we set their weight to zero). | |
95 | ||
96 | start_ts :: Array of start times for a set of samples | |
97 | end_ts :: Array of end times for a set of samples | |
98 | start :: int | |
99 | end :: int | |
100 | return :: Array of weights | |
101 | """ | |
102 | sbounds = np.maximum(start_ts, start).astype(float) | |
103 | ebounds = np.minimum(end_ts, end).astype(float) | |
104 | ws = (ebounds - sbounds) / (end_ts - start_ts) | |
105 | if np.any(np.isnan(ws)): | |
106 | err("WARNING: zero-length sample(s) detected. Log file corrupt" | |
107 | " / bad time values? Ignoring these samples.\n") | |
108 | ws[np.where(np.isnan(ws))] = 0.0; | |
109 | return ws | |
110 | ||
111 | def weighted_average(vs, ws): | |
112 | return np.sum(vs * ws) / np.sum(ws) | |
113 | ||
4a11e9d7 | 114 | |
115 | percs = None | |
116 | columns = None | |
117 | ||
118 | def gen_output_columns(ctx): | |
119 | global percs,columns | |
120 | strpercs = re.split('[,:]', ctx.percentiles) | |
121 | percs = [50.0] # always print 50% in 'median' column | |
122 | percs.extend(list(map(float,strpercs))) | |
123 | if ctx.directions: | |
124 | columns = ["end-time", "dir", "samples", "min", "avg", "median"] | |
125 | else: | |
126 | columns = ["end-time", "samples", "min", "avg", "median"] | |
127 | columns.extend(list(map(lambda x: x+'%', strpercs))) | |
128 | columns.append("max") | |
1e613c9c KC |
129 | |
130 | def fmt_float_list(ctx, num=1): | |
131 | """ Return a comma separated list of float formatters to the required number | |
132 | of decimal places. For instance: | |
133 | ||
134 | fmt_float_list(ctx.decimals=4, num=3) == "%.4f, %.4f, %.4f" | |
135 | """ | |
136 | return ', '.join(["%%.%df" % ctx.decimals] * num) | |
137 | ||
138 | # Default values - see beginning of main() for how we detect number columns in | |
139 | # the input files: | |
140 | __HIST_COLUMNS = 1216 | |
141 | __NON_HIST_COLUMNS = 3 | |
142 | __TOTAL_COLUMNS = __HIST_COLUMNS + __NON_HIST_COLUMNS | |
4a11e9d7 | 143 | |
65a4d15c KC |
144 | def read_chunk(rdr, sz): |
145 | """ Read the next chunk of size sz from the given reader. """ | |
1e613c9c KC |
146 | try: |
147 | """ StopIteration occurs when the pandas reader is empty, and AttributeError | |
148 | occurs if rdr is None due to the file being empty. """ | |
149 | new_arr = rdr.read().values | |
150 | except (StopIteration, AttributeError): | |
4a11e9d7 | 151 | return None |
1e613c9c | 152 | |
4a11e9d7 | 153 | # Let's leave the array as is, and let later code ignore the block size |
154 | return new_arr | |
1e613c9c | 155 | |
4a11e9d7 | 156 | #""" Extract array of the times, directions wo times, and histograms matrix without times column. """ |
157 | #times, rws, szs = new_arr[:,0], new_arr[:,1], new_arr[:,2] | |
158 | #hists = new_arr[:,__NON_HIST_COLUMNS:] | |
159 | #times = times.reshape((len(times),1)) | |
160 | #dirs = rws.reshape((len(rws),1)) | |
161 | #arr = np.append(times, hists, axis=1) | |
162 | #return arr | |
1e613c9c KC |
163 | |
164 | def get_min(fps, arrs): | |
165 | """ Find the file with the current first row with the smallest start time """ | |
65a4d15c | 166 | return min([fp for fp in fps if not arrs[fp] is None], key=lambda fp: arrs.get(fp)[0][0]) |
1e613c9c KC |
167 | |
168 | def histogram_generator(ctx, fps, sz): | |
169 | ||
1e613c9c KC |
170 | # Create a chunked pandas reader for each of the files: |
171 | rdrs = {} | |
172 | for fp in fps: | |
173 | try: | |
174 | rdrs[fp] = pandas.read_csv(fp, dtype=int, header=None, chunksize=sz) | |
175 | except ValueError as e: | |
176 | if e.message == 'No columns to parse from file': | |
d1f6fcad | 177 | if ctx.warn: sys.stderr.write("WARNING: Empty input file encountered.\n") |
1e613c9c KC |
178 | rdrs[fp] = None |
179 | else: | |
180 | raise(e) | |
181 | ||
65a4d15c KC |
182 | # Initial histograms from disk: |
183 | arrs = {fp: read_chunk(rdr, sz) for fp,rdr in rdrs.items()} | |
1e613c9c KC |
184 | while True: |
185 | ||
186 | try: | |
187 | """ ValueError occurs when nothing more to read """ | |
188 | fp = get_min(fps, arrs) | |
189 | except ValueError: | |
190 | return | |
65a4d15c | 191 | arr = arrs[fp] |
4a11e9d7 | 192 | arri = np.insert(arr[0], 1, fps.index(fp)) |
193 | yield arri | |
65a4d15c | 194 | arrs[fp] = arr[1:] |
1e613c9c | 195 | |
65a4d15c KC |
196 | if arrs[fp].shape[0] == 0: |
197 | arrs[fp] = read_chunk(rdrs[fp], sz) | |
1e613c9c KC |
198 | |
199 | def _plat_idx_to_val(idx, edge=0.5, FIO_IO_U_PLAT_BITS=6, FIO_IO_U_PLAT_VAL=64): | |
200 | """ Taken from fio's stat.c for calculating the latency value of a bin | |
201 | from that bin's index. | |
202 | ||
203 | idx : the value of the index into the histogram bins | |
204 | edge : fractional value in the range [0,1]** indicating how far into | |
205 | the bin we wish to compute the latency value of. | |
206 | ||
207 | ** edge = 0.0 and 1.0 computes the lower and upper latency bounds | |
208 | respectively of the given bin index. """ | |
209 | ||
210 | # MSB <= (FIO_IO_U_PLAT_BITS-1), cannot be rounded off. Use | |
211 | # all bits of the sample as index | |
212 | if (idx < (FIO_IO_U_PLAT_VAL << 1)): | |
213 | return idx | |
214 | ||
215 | # Find the group and compute the minimum value of that group | |
216 | error_bits = (idx >> FIO_IO_U_PLAT_BITS) - 1 | |
217 | base = 1 << (error_bits + FIO_IO_U_PLAT_BITS) | |
218 | ||
219 | # Find its bucket number of the group | |
220 | k = idx % FIO_IO_U_PLAT_VAL | |
221 | ||
222 | # Return the mean (if edge=0.5) of the range of the bucket | |
223 | return base + ((k + edge) * (1 << error_bits)) | |
224 | ||
225 | def plat_idx_to_val_coarse(idx, coarseness, edge=0.5): | |
226 | """ Converts the given *coarse* index into a non-coarse index as used by fio | |
227 | in stat.h:plat_idx_to_val(), subsequently computing the appropriate | |
228 | latency value for that bin. | |
229 | """ | |
230 | ||
231 | # Multiply the index by the power of 2 coarseness to get the bin | |
232 | # bin index with a max of 1536 bins (FIO_IO_U_PLAT_GROUP_NR = 24 in stat.h) | |
233 | stride = 1 << coarseness | |
234 | idx = idx * stride | |
235 | lower = _plat_idx_to_val(idx, edge=0.0) | |
236 | upper = _plat_idx_to_val(idx + stride, edge=1.0) | |
237 | return lower + (upper - lower) * edge | |
238 | ||
4a11e9d7 | 239 | def print_all_stats(ctx, end, mn, ss_cnt, vs, ws, mx, dir=dir): |
1e613c9c KC |
240 | ps = weighted_percentile(percs, vs, ws) |
241 | ||
242 | avg = weighted_average(vs, ws) | |
243 | values = [mn, avg] + list(ps) + [mx] | |
4a11e9d7 | 244 | if ctx.directions: |
245 | row = [end, dir, ss_cnt] | |
246 | fmt = "%d, %s, %d, " | |
247 | else: | |
248 | row = [end, ss_cnt] | |
249 | fmt = "%d, %d, " | |
250 | row = row + [float(x) / ctx.divisor for x in values] | |
251 | if ctx.divisor > 1: | |
252 | fmt = fmt + fmt_float_list(ctx, len(percs)+3) | |
253 | else: | |
254 | # max and min are decimal values if no divisor | |
255 | fmt = fmt + "%d, " + fmt_float_list(ctx, len(percs)+1) + ", %d" | |
7c5489c0 | 256 | |
1e613c9c KC |
257 | print (fmt % tuple(row)) |
258 | ||
259 | def update_extreme(val, fncn, new_val): | |
260 | """ Calculate min / max in the presence of None values """ | |
261 | if val is None: return new_val | |
262 | else: return fncn(val, new_val) | |
263 | ||
264 | # See beginning of main() for how bin_vals are computed | |
265 | bin_vals = [] | |
266 | lower_bin_vals = [] # lower edge of each bin | |
267 | upper_bin_vals = [] # upper edge of each bin | |
268 | ||
4a11e9d7 | 269 | def process_interval(ctx, iHist, iEnd, dir): |
270 | """ print estimated percentages for the given merged sample | |
271 | """ | |
272 | ss_cnt = 0 # number of samples affecting this interval | |
273 | mn_bin_val, mx_bin_val = None, None | |
7c5489c0 | 274 | |
4a11e9d7 | 275 | # Update total number of samples affecting current interval histogram: |
276 | ss_cnt += np.sum(iHist) | |
7c5489c0 | 277 | |
4a11e9d7 | 278 | # Update min and max bin values |
279 | idxs = np.nonzero(iHist != 0)[0] | |
280 | if idxs.size > 0: | |
281 | mn_bin_val = bin_vals[idxs[0]] | |
282 | mx_bin_val = bin_vals[idxs[-1]] | |
283 | ||
284 | if ss_cnt > 0: print_all_stats(ctx, iEnd, mn_bin_val, ss_cnt, bin_vals, iHist, mx_bin_val, dir=dir) | |
285 | ||
286 | ||
287 | dir_map = ['r', 'w', 't'] # map of directional value in log to textual representation | |
288 | def process_weighted_interval(ctx, samples, iStart, iEnd, printdirs): | |
1e613c9c KC |
289 | """ Construct the weighted histogram for the given interval by scanning |
290 | through all the histograms and figuring out which of their bins have | |
291 | samples with latencies which overlap with the given interval | |
292 | [iStart,iEnd]. | |
293 | """ | |
1e613c9c | 294 | |
4a11e9d7 | 295 | times, files, dirs, sizes, hists = samples[:,0], samples[:,1], samples[:,2], samples[:,3], samples[:,4:] |
296 | iHist={}; ss_cnt = {}; mn_bin_val={}; mx_bin_val={} | |
297 | for dir in printdirs: | |
298 | iHist[dir] = np.zeros(__HIST_COLUMNS, dtype=float) | |
299 | ss_cnt[dir] = 0 # number of samples affecting this interval | |
300 | mn_bin_val[dir] = None | |
301 | mx_bin_val[dir] = None | |
302 | ||
303 | for end_time,file,dir,hist in zip(times,files,dirs,hists): | |
304 | ||
1e613c9c KC |
305 | # Only look at bins of the current histogram sample which |
306 | # started before the end of the current time interval [start,end] | |
4a11e9d7 | 307 | start_times = (end_time - 0.5 * ctx.interval) - bin_vals / ctx.time_divisor |
1e613c9c KC |
308 | idx = np.where(start_times < iEnd) |
309 | s_ts, l_bvs, u_bvs, hs = start_times[idx], lower_bin_vals[idx], upper_bin_vals[idx], hist[idx] | |
310 | ||
4a11e9d7 | 311 | # Increment current interval histogram by weighted values of future histogram |
312 | # total number of samples | |
313 | # and min and max values as necessary | |
314 | textdir = dir_map[dir] | |
1e613c9c | 315 | ws = hs * weights(s_ts, end_time, iStart, iEnd) |
4a11e9d7 | 316 | mmidx = np.where(hs != 0)[0] |
317 | if 'm' in printdirs: | |
318 | iHist['m'][idx] += ws | |
319 | ss_cnt['m'] += np.sum(hs) | |
320 | if mmidx.size > 0: | |
321 | mn_bin_val['m'] = update_extreme(mn_bin_val['m'], min, l_bvs[max(0, mmidx[0] - 1)]) | |
322 | mx_bin_val['m'] = update_extreme(mx_bin_val['m'], max, u_bvs[min(len(hs) - 1, mmidx[-1] + 1)]) | |
323 | if textdir in printdirs: | |
324 | iHist[textdir][idx] += ws | |
325 | ss_cnt[textdir] += np.sum(hs) # Update total number of samples affecting current interval histogram: | |
326 | if mmidx.size > 0: | |
327 | mn_bin_val[textdir] = update_extreme(mn_bin_val[textdir], min, l_bvs[max(0, mmidx[0] - 1)]) | |
328 | mx_bin_val[textdir] = update_extreme(mx_bin_val[textdir], max, u_bvs[min(len(hs) - 1, mmidx[-1] + 1)]) | |
329 | ||
7c5489c0 | 330 | for textdir in sorted(printdirs): |
4a11e9d7 | 331 | if ss_cnt[textdir] > 0: print_all_stats(ctx, iEnd, mn_bin_val[textdir], ss_cnt[textdir], bin_vals, iHist[textdir], mx_bin_val[textdir], dir=textdir) |
1e613c9c KC |
332 | |
333 | def guess_max_from_bins(ctx, hist_cols): | |
334 | """ Try to guess the GROUP_NR from given # of histogram | |
335 | columns seen in an input file """ | |
336 | max_coarse = 8 | |
337 | if ctx.group_nr < 19 or ctx.group_nr > 26: | |
338 | bins = [ctx.group_nr * (1 << 6)] | |
339 | else: | |
340 | bins = [1216,1280,1344,1408,1472,1536,1600,1664] | |
341 | coarses = range(max_coarse + 1) | |
342 | fncn = lambda z: list(map(lambda x: z/2**x if z % 2**x == 0 else -10, coarses)) | |
343 | ||
344 | arr = np.transpose(list(map(fncn, bins))) | |
345 | idx = np.where(arr == hist_cols) | |
346 | if len(idx[1]) == 0: | |
347 | table = repr(arr.astype(int)).replace('-10', 'N/A').replace('array',' ') | |
4a11e9d7 | 348 | errmsg = ("Unable to determine bin values from input clat_hist files. Namely \n" |
1e613c9c KC |
349 | "the first line of file '%s' " % ctx.FILE[0] + "has %d \n" % (__TOTAL_COLUMNS,) + |
350 | "columns of which we assume %d " % (hist_cols,) + "correspond to histogram bins. \n" | |
351 | "This number needs to be equal to one of the following numbers:\n\n" | |
352 | + table + "\n\n" | |
353 | "Possible reasons and corresponding solutions:\n" | |
354 | " - Input file(s) does not contain histograms.\n" | |
355 | " - You recompiled fio with a different GROUP_NR. If so please specify this\n" | |
356 | " new GROUP_NR on the command line with --group_nr\n") | |
4a11e9d7 | 357 | if runascmd: |
358 | err(errmsg) | |
359 | exit(1) | |
360 | else: | |
361 | raise RuntimeError(errmsg) | |
7c5489c0 | 362 | |
1e613c9c KC |
363 | return bins[idx[1][0]] |
364 | ||
4a11e9d7 | 365 | def output_weighted_interval_data(ctx,printdirs): |
1e613c9c KC |
366 | |
367 | fps = [open(f, 'r') for f in ctx.FILE] | |
368 | gen = histogram_generator(ctx, fps, ctx.buff_size) | |
369 | ||
370 | print(', '.join(columns)) | |
371 | ||
372 | try: | |
373 | start, end = 0, ctx.interval | |
4a11e9d7 | 374 | arr = np.empty(shape=(0,__TOTAL_COLUMNS + 1),dtype=int) |
1e613c9c KC |
375 | more_data = True |
376 | while more_data or len(arr) > 0: | |
7c5489c0 | 377 | |
1e613c9c KC |
378 | # Read up to ctx.max_latency (default 20 seconds) of data from end of current interval. |
379 | while len(arr) == 0 or arr[-1][0] < ctx.max_latency * 1000 + end: | |
380 | try: | |
381 | new_arr = next(gen) | |
382 | except StopIteration: | |
383 | more_data = False | |
384 | break | |
4a11e9d7 | 385 | nashape = new_arr.reshape((1,__TOTAL_COLUMNS + 1)) |
386 | arr = np.append(arr, nashape, axis=0) | |
387 | #arr = arr.astype(int) | |
1e613c9c KC |
388 | |
389 | if arr.size > 0: | |
b9f93e04 KC |
390 | # Jump immediately to the start of the input, rounding |
391 | # down to the nearest multiple of the interval (useful when --log_unix_epoch | |
392 | # was used to create these histograms): | |
393 | if start == 0 and arr[0][0] - ctx.max_latency > end: | |
394 | start = arr[0][0] - ctx.max_latency | |
395 | start = start - (start % ctx.interval) | |
396 | end = start + ctx.interval | |
397 | ||
4a11e9d7 | 398 | process_weighted_interval(ctx, arr, start, end, printdirs) |
1e613c9c KC |
399 | |
400 | # Update arr to throw away samples we no longer need - samples which | |
401 | # end before the start of the next interval, i.e. the end of the | |
402 | # current interval: | |
403 | idx = np.where(arr[:,0] > end) | |
404 | arr = arr[idx] | |
405 | ||
406 | start += ctx.interval | |
407 | end = start + ctx.interval | |
408 | finally: | |
bb36ebad BG |
409 | for fp in fps: |
410 | fp.close() | |
1e613c9c | 411 | |
4a11e9d7 | 412 | def output_interval_data(ctx,directions): |
413 | fps = [HistFileRdr(f) for f in ctx.FILE] | |
414 | ||
415 | print(', '.join(columns)) | |
416 | ||
417 | start = 0 | |
418 | end = ctx.interval | |
419 | while True: | |
7c5489c0 | 420 | |
4a11e9d7 | 421 | more_data = False |
7c5489c0 | 422 | |
4a11e9d7 | 423 | # add bins from all files in target intervals |
424 | arr = None | |
425 | numSamples = 0 | |
426 | while True: | |
427 | foundSamples = False | |
428 | for fp in fps: | |
429 | ts = fp.curTS | |
430 | if ts and ts+10 < end: # shift sample time when very close to an end time | |
431 | curdirect = fp.curDir | |
432 | numSamples += 1 | |
433 | foundSamples = True | |
434 | if arr is None: | |
435 | arr = {} | |
436 | for d in directions: | |
437 | arr[d] = np.zeros(shape=(__HIST_COLUMNS), dtype=int) | |
7c5489c0 | 438 | if 'm' in arr: |
4a11e9d7 | 439 | arr['m'] = np.add(arr['m'], fp.curBins) |
7c5489c0 | 440 | if 'r' in arr and curdirect == 0: |
4a11e9d7 | 441 | arr['r'] = np.add(arr['r'], fp.curBins) |
7c5489c0 | 442 | if 'w' in arr and curdirect == 1: |
4a11e9d7 | 443 | arr['w'] = np.add(arr['w'], fp.curBins) |
7c5489c0 | 444 | if 't' in arr and curdirect == 2: |
4a11e9d7 | 445 | arr['t'] = np.add(arr['t'], fp.curBins) |
7c5489c0 | 446 | |
4a11e9d7 | 447 | more_data = True |
448 | fp.nextData() | |
449 | elif ts: | |
450 | more_data = True | |
7c5489c0 | 451 | |
4a11e9d7 | 452 | # reached end of all files |
7c5489c0 | 453 | # or gone through all files without finding sample in interval |
4a11e9d7 | 454 | if not more_data or not foundSamples: |
455 | break | |
7c5489c0 | 456 | |
4a11e9d7 | 457 | if arr is not None: |
458 | #print("{} size({}) samples({}) nonzero({}):".format(end, arr.size, numSamples, np.count_nonzero(arr)), str(arr), ) | |
459 | for d in sorted(arr.keys()): | |
460 | aval = arr[d] | |
461 | process_interval(ctx, aval, end, d) | |
7c5489c0 | 462 | |
4a11e9d7 | 463 | # reach end of all files |
464 | if not more_data: | |
465 | break | |
7c5489c0 | 466 | |
4a11e9d7 | 467 | start += ctx.interval |
468 | end = start + ctx.interval | |
469 | ||
470 | def main(ctx): | |
471 | ||
472 | if ctx.job_file: | |
473 | try: | |
474 | from configparser import SafeConfigParser, NoOptionError | |
475 | except ImportError: | |
476 | from ConfigParser import SafeConfigParser, NoOptionError | |
477 | ||
478 | cp = SafeConfigParser(allow_no_value=True) | |
479 | with open(ctx.job_file, 'r') as fp: | |
480 | cp.readfp(fp) | |
481 | ||
482 | if ctx.interval is None: | |
483 | # Auto detect --interval value | |
484 | for s in cp.sections(): | |
485 | try: | |
486 | hist_msec = cp.get(s, 'log_hist_msec') | |
487 | if hist_msec is not None: | |
488 | ctx.interval = int(hist_msec) | |
489 | except NoOptionError: | |
490 | pass | |
7c5489c0 | 491 | |
4a11e9d7 | 492 | if not hasattr(ctx, 'percentiles'): |
493 | ctx.percentiles = "90,95,99" | |
494 | ||
7c5489c0 JA |
495 | if ctx.directions: |
496 | ctx.directions = ctx.directions.lower() | |
497 | ||
4a11e9d7 | 498 | if ctx.interval is None: |
499 | ctx.interval = 1000 | |
500 | ||
501 | if ctx.usbin: | |
502 | ctx.time_divisor = 1000.0 # bins are in us | |
503 | else: | |
504 | ctx.time_divisor = 1000000.0 # bins are in ns | |
505 | ||
506 | gen_output_columns(ctx) | |
507 | ||
508 | ||
509 | # Automatically detect how many columns are in the input files, | |
510 | # calculate the corresponding 'coarseness' parameter used to generate | |
511 | # those files, and calculate the appropriate bin latency values: | |
512 | with open(ctx.FILE[0], 'r') as fp: | |
513 | global bin_vals,lower_bin_vals,upper_bin_vals,__HIST_COLUMNS,__TOTAL_COLUMNS | |
514 | __TOTAL_COLUMNS = len(fp.readline().split(',')) | |
515 | __HIST_COLUMNS = __TOTAL_COLUMNS - __NON_HIST_COLUMNS | |
516 | ||
517 | max_cols = guess_max_from_bins(ctx, __HIST_COLUMNS) | |
518 | coarseness = int(np.log2(float(max_cols) / __HIST_COLUMNS)) | |
519 | bin_vals = np.array([plat_idx_to_val_coarse(x, coarseness) for x in np.arange(__HIST_COLUMNS)], dtype=float) | |
520 | lower_bin_vals = np.array([plat_idx_to_val_coarse(x, coarseness, 0.0) for x in np.arange(__HIST_COLUMNS)], dtype=float) | |
521 | upper_bin_vals = np.array([plat_idx_to_val_coarse(x, coarseness, 1.0) for x in np.arange(__HIST_COLUMNS)], dtype=float) | |
522 | ||
523 | # indicate which directions to output (read(0), write(1), trim(2), mixed(3)) | |
524 | directions = set() | |
525 | if not ctx.directions or 'm' in ctx.directions: directions.add('m') | |
526 | if ctx.directions and 'r' in ctx.directions: directions.add('r') | |
527 | if ctx.directions and 'w' in ctx.directions: directions.add('w') | |
528 | if ctx.directions and 't' in ctx.directions: directions.add('t') | |
529 | ||
530 | if ctx.noweight: | |
531 | output_interval_data(ctx, directions) | |
532 | else: | |
533 | output_weighted_interval_data(ctx, directions) | |
534 | ||
1e613c9c KC |
535 | |
536 | if __name__ == '__main__': | |
537 | import argparse | |
4a11e9d7 | 538 | runascmd = True |
1e613c9c KC |
539 | p = argparse.ArgumentParser() |
540 | arg = p.add_argument | |
541 | arg("FILE", help='space separated list of latency log filenames', nargs='+') | |
542 | arg('--buff_size', | |
543 | default=10000, | |
544 | type=int, | |
545 | help='number of samples to buffer into numpy at a time') | |
546 | ||
547 | arg('--max_latency', | |
548 | default=20, | |
549 | type=float, | |
550 | help='number of seconds of data to process at a time') | |
551 | ||
552 | arg('-i', '--interval', | |
1e613c9c | 553 | type=int, |
b9f93e04 | 554 | help='interval width (ms), default 1000 ms') |
1e613c9c | 555 | |
4a11e9d7 | 556 | arg('--noweight', |
557 | action='store_true', | |
558 | default=False, | |
559 | help='do not perform weighting of samples between output intervals') | |
560 | ||
1e613c9c KC |
561 | arg('-d', '--divisor', |
562 | required=False, | |
563 | type=int, | |
564 | default=1, | |
565 | help='divide the results by this value.') | |
566 | ||
567 | arg('--decimals', | |
568 | default=3, | |
569 | type=int, | |
570 | help='number of decimal places to print floats to') | |
571 | ||
d1f6fcad KC |
572 | arg('--warn', |
573 | dest='warn', | |
574 | action='store_true', | |
575 | default=False, | |
576 | help='print warning messages to stderr') | |
1e613c9c KC |
577 | |
578 | arg('--group_nr', | |
e40e581c | 579 | default=29, |
1e613c9c KC |
580 | type=int, |
581 | help='FIO_IO_U_PLAT_GROUP_NR as defined in stat.h') | |
582 | ||
b9f93e04 KC |
583 | arg('--job-file', |
584 | default=None, | |
585 | type=str, | |
586 | help='Optional argument pointing to the job file used to create the ' | |
587 | 'given histogram files. Useful for auto-detecting --log_hist_msec and ' | |
588 | '--log_unix_epoch (in fio) values.') | |
589 | ||
4a11e9d7 | 590 | arg('--percentiles', |
591 | default="90:95:99", | |
592 | type=str, | |
593 | help='Optional argument of comma or colon separated percentiles to print. ' | |
594 | 'The default is "90.0:95.0:99.0". min, median(50%%) and max percentiles are always printed') | |
595 | ||
596 | arg('--usbin', | |
597 | default=False, | |
598 | action='store_true', | |
7c5489c0 | 599 | help='histogram bin latencies are in us (fio versions < 2.99. fio uses ns for version >= 2.99') |
4a11e9d7 | 600 | |
601 | arg('--directions', | |
602 | default=None, | |
603 | type=str, | |
604 | help='Optionally split results output by reads, writes, trims or mixed. ' | |
7c5489c0 | 605 | 'Value may be any combination of "rwtm" characters. ' |
4a11e9d7 | 606 | 'By default, only "mixed" results are output without a "dir" field. ' |
607 | 'But, specifying the --directions option ' | |
608 | 'adds a "dir" field to the output content, and separate rows for each of the indicated ' | |
609 | 'directions.') | |
610 | ||
1e613c9c KC |
611 | main(p.parse_args()) |
612 |