| 1 | #!/usr/bin/env python3 |
| 2 | """ |
| 3 | Utility for converting *_clat_hist* files generated by fio into latency statistics. |
| 4 | |
| 5 | Example usage: |
| 6 | |
| 7 | $ fiologparser_hist.py *_clat_hist* |
| 8 | end-time, samples, min, avg, median, 90%, 95%, 99%, max |
| 9 | 1000, 15, 192, 1678.107, 1788.859, 1856.076, 1880.040, 1899.208, 1888.000 |
| 10 | 2000, 43, 152, 1642.368, 1714.099, 1816.659, 1845.552, 1888.131, 1888.000 |
| 11 | 4000, 39, 1152, 1546.962, 1545.785, 1627.192, 1640.019, 1691.204, 1744 |
| 12 | ... |
| 13 | |
| 14 | @author Karl Cronburg <karl.cronburg@gmail.com> |
| 15 | """ |
| 16 | import os |
| 17 | import sys |
| 18 | import pandas |
| 19 | import re |
| 20 | import numpy as np |
| 21 | |
| 22 | runascmd = False |
| 23 | |
| 24 | err = sys.stderr.write |
| 25 | |
| 26 | class HistFileRdr(): |
| 27 | """ Class to read a hist file line by line, buffering |
| 28 | a value array for the latest line, and allowing a preview |
| 29 | of the next timestamp in next line |
| 30 | Note: this does not follow a generator pattern, but must explicitly |
| 31 | get next bin array. |
| 32 | """ |
| 33 | def __init__(self, file): |
| 34 | self.fp = open(file, 'r') |
| 35 | self.data = self.nextData() |
| 36 | |
| 37 | def close(self): |
| 38 | self.fp.close() |
| 39 | self.fp = None |
| 40 | |
| 41 | def nextData(self): |
| 42 | self.data = None |
| 43 | if self.fp: |
| 44 | line = self.fp.readline() |
| 45 | if line == "": |
| 46 | self.close() |
| 47 | else: |
| 48 | self.data = [int(x) for x in line.replace(' ', '').rstrip().split(',')] |
| 49 | |
| 50 | return self.data |
| 51 | |
| 52 | @property |
| 53 | def curTS(self): |
| 54 | ts = None |
| 55 | if self.data: |
| 56 | ts = self.data[0] |
| 57 | return ts |
| 58 | |
| 59 | @property |
| 60 | def curDir(self): |
| 61 | d = None |
| 62 | if self.data: |
| 63 | d = self.data[1] |
| 64 | return d |
| 65 | |
| 66 | @property |
| 67 | def curBins(self): |
| 68 | return self.data[3:] |
| 69 | |
| 70 | def weighted_percentile(percs, vs, ws): |
| 71 | """ Use linear interpolation to calculate the weighted percentile. |
| 72 | |
| 73 | Value and weight arrays are first sorted by value. The cumulative |
| 74 | distribution function (cdf) is then computed, after which np.interp |
| 75 | finds the two values closest to our desired weighted percentile(s) |
| 76 | and linearly interpolates them. |
| 77 | |
| 78 | percs :: List of percentiles we want to calculate |
| 79 | vs :: Array of values we are computing the percentile of |
| 80 | ws :: Array of weights for our corresponding values |
| 81 | return :: Array of percentiles |
| 82 | """ |
| 83 | idx = np.argsort(vs) |
| 84 | vs, ws = vs[idx], ws[idx] # weights and values sorted by value |
| 85 | cdf = 100 * (ws.cumsum() - ws / 2.0) / ws.sum() |
| 86 | return np.interp(percs, cdf, vs) # linear interpolation |
| 87 | |
| 88 | def weights(start_ts, end_ts, start, end): |
| 89 | """ Calculate weights based on fraction of sample falling in the |
| 90 | given interval [start,end]. Weights computed using vector / array |
| 91 | computation instead of for-loops. |
| 92 | |
| 93 | Note that samples with zero time length are effectively ignored |
| 94 | (we set their weight to zero). |
| 95 | |
| 96 | start_ts :: Array of start times for a set of samples |
| 97 | end_ts :: Array of end times for a set of samples |
| 98 | start :: int |
| 99 | end :: int |
| 100 | return :: Array of weights |
| 101 | """ |
| 102 | sbounds = np.maximum(start_ts, start).astype(float) |
| 103 | ebounds = np.minimum(end_ts, end).astype(float) |
| 104 | ws = (ebounds - sbounds) / (end_ts - start_ts) |
| 105 | if np.any(np.isnan(ws)): |
| 106 | err("WARNING: zero-length sample(s) detected. Log file corrupt" |
| 107 | " / bad time values? Ignoring these samples.\n") |
| 108 | ws[np.where(np.isnan(ws))] = 0.0; |
| 109 | return ws |
| 110 | |
| 111 | def weighted_average(vs, ws): |
| 112 | return np.sum(vs * ws) / np.sum(ws) |
| 113 | |
| 114 | |
| 115 | percs = None |
| 116 | columns = None |
| 117 | |
| 118 | def gen_output_columns(ctx): |
| 119 | global percs,columns |
| 120 | strpercs = re.split('[,:]', ctx.percentiles) |
| 121 | percs = [50.0] # always print 50% in 'median' column |
| 122 | percs.extend(list(map(float,strpercs))) |
| 123 | if ctx.directions: |
| 124 | columns = ["end-time", "dir", "samples", "min", "avg", "median"] |
| 125 | else: |
| 126 | columns = ["end-time", "samples", "min", "avg", "median"] |
| 127 | columns.extend(list([x+'%' for x in strpercs])) |
| 128 | columns.append("max") |
| 129 | |
| 130 | def fmt_float_list(ctx, num=1): |
| 131 | """ Return a comma separated list of float formatters to the required number |
| 132 | of decimal places. For instance: |
| 133 | |
| 134 | fmt_float_list(ctx.decimals=4, num=3) == "%.4f, %.4f, %.4f" |
| 135 | """ |
| 136 | return ', '.join(["%%.%df" % ctx.decimals] * num) |
| 137 | |
| 138 | # Default values - see beginning of main() for how we detect number columns in |
| 139 | # the input files: |
| 140 | __HIST_COLUMNS = 1216 |
| 141 | __NON_HIST_COLUMNS = 3 |
| 142 | __TOTAL_COLUMNS = __HIST_COLUMNS + __NON_HIST_COLUMNS |
| 143 | |
| 144 | def read_chunk(rdr, sz): |
| 145 | """ Read the next chunk of size sz from the given reader. """ |
| 146 | try: |
| 147 | """ StopIteration occurs when the pandas reader is empty, and AttributeError |
| 148 | occurs if rdr is None due to the file being empty. """ |
| 149 | new_arr = rdr.read().values |
| 150 | except (StopIteration, AttributeError): |
| 151 | return None |
| 152 | |
| 153 | # Let's leave the array as is, and let later code ignore the block size |
| 154 | return new_arr |
| 155 | |
| 156 | #""" Extract array of the times, directions wo times, and histograms matrix without times column. """ |
| 157 | #times, rws, szs = new_arr[:,0], new_arr[:,1], new_arr[:,2] |
| 158 | #hists = new_arr[:,__NON_HIST_COLUMNS:] |
| 159 | #times = times.reshape((len(times),1)) |
| 160 | #dirs = rws.reshape((len(rws),1)) |
| 161 | #arr = np.append(times, hists, axis=1) |
| 162 | #return arr |
| 163 | |
| 164 | def get_min(fps, arrs): |
| 165 | """ Find the file with the current first row with the smallest start time """ |
| 166 | return min([fp for fp in fps if not arrs[fp] is None], key=lambda fp: arrs.get(fp)[0][0]) |
| 167 | |
| 168 | def histogram_generator(ctx, fps, sz): |
| 169 | |
| 170 | # Create a chunked pandas reader for each of the files: |
| 171 | rdrs = {} |
| 172 | for fp in fps: |
| 173 | try: |
| 174 | rdrs[fp] = pandas.read_csv(fp, dtype=int, header=None, chunksize=sz) |
| 175 | except ValueError as e: |
| 176 | if e.message == 'No columns to parse from file': |
| 177 | if ctx.warn: sys.stderr.write("WARNING: Empty input file encountered.\n") |
| 178 | rdrs[fp] = None |
| 179 | else: |
| 180 | raise(e) |
| 181 | |
| 182 | # Initial histograms from disk: |
| 183 | arrs = {fp: read_chunk(rdr, sz) for fp,rdr in rdrs.items()} |
| 184 | while True: |
| 185 | |
| 186 | try: |
| 187 | """ ValueError occurs when nothing more to read """ |
| 188 | fp = get_min(fps, arrs) |
| 189 | except ValueError: |
| 190 | return |
| 191 | arr = arrs[fp] |
| 192 | arri = np.insert(arr[0], 1, fps.index(fp)) |
| 193 | yield arri |
| 194 | arrs[fp] = arr[1:] |
| 195 | |
| 196 | if arrs[fp].shape[0] == 0: |
| 197 | arrs[fp] = read_chunk(rdrs[fp], sz) |
| 198 | |
| 199 | def _plat_idx_to_val(idx, edge=0.5, FIO_IO_U_PLAT_BITS=6, FIO_IO_U_PLAT_VAL=64): |
| 200 | """ Taken from fio's stat.c for calculating the latency value of a bin |
| 201 | from that bin's index. |
| 202 | |
| 203 | idx : the value of the index into the histogram bins |
| 204 | edge : fractional value in the range [0,1]** indicating how far into |
| 205 | the bin we wish to compute the latency value of. |
| 206 | |
| 207 | ** edge = 0.0 and 1.0 computes the lower and upper latency bounds |
| 208 | respectively of the given bin index. """ |
| 209 | |
| 210 | # MSB <= (FIO_IO_U_PLAT_BITS-1), cannot be rounded off. Use |
| 211 | # all bits of the sample as index |
| 212 | if (idx < (FIO_IO_U_PLAT_VAL << 1)): |
| 213 | return idx |
| 214 | |
| 215 | # Find the group and compute the minimum value of that group |
| 216 | error_bits = (idx >> FIO_IO_U_PLAT_BITS) - 1 |
| 217 | base = 1 << (error_bits + FIO_IO_U_PLAT_BITS) |
| 218 | |
| 219 | # Find its bucket number of the group |
| 220 | k = idx % FIO_IO_U_PLAT_VAL |
| 221 | |
| 222 | # Return the mean (if edge=0.5) of the range of the bucket |
| 223 | return base + ((k + edge) * (1 << error_bits)) |
| 224 | |
| 225 | def plat_idx_to_val_coarse(idx, coarseness, edge=0.5): |
| 226 | """ Converts the given *coarse* index into a non-coarse index as used by fio |
| 227 | in stat.h:plat_idx_to_val(), subsequently computing the appropriate |
| 228 | latency value for that bin. |
| 229 | """ |
| 230 | |
| 231 | # Multiply the index by the power of 2 coarseness to get the bin |
| 232 | # bin index with a max of 1536 bins (FIO_IO_U_PLAT_GROUP_NR = 24 in stat.h) |
| 233 | stride = 1 << coarseness |
| 234 | idx = idx * stride |
| 235 | lower = _plat_idx_to_val(idx, edge=0.0) |
| 236 | upper = _plat_idx_to_val(idx + stride, edge=1.0) |
| 237 | return lower + (upper - lower) * edge |
| 238 | |
| 239 | def print_all_stats(ctx, end, mn, ss_cnt, vs, ws, mx, dir=dir): |
| 240 | ps = weighted_percentile(percs, vs, ws) |
| 241 | |
| 242 | avg = weighted_average(vs, ws) |
| 243 | values = [mn, avg] + list(ps) + [mx] |
| 244 | if ctx.directions: |
| 245 | row = [end, dir, ss_cnt] |
| 246 | fmt = "%d, %s, %d, " |
| 247 | else: |
| 248 | row = [end, ss_cnt] |
| 249 | fmt = "%d, %d, " |
| 250 | row = row + [float(x) / ctx.divisor for x in values] |
| 251 | if ctx.divisor > 1: |
| 252 | fmt = fmt + fmt_float_list(ctx, len(percs)+3) |
| 253 | else: |
| 254 | # max and min are decimal values if no divisor |
| 255 | fmt = fmt + "%d, " + fmt_float_list(ctx, len(percs)+1) + ", %d" |
| 256 | |
| 257 | print (fmt % tuple(row)) |
| 258 | |
| 259 | def update_extreme(val, fncn, new_val): |
| 260 | """ Calculate min / max in the presence of None values """ |
| 261 | if val is None: return new_val |
| 262 | else: return fncn(val, new_val) |
| 263 | |
| 264 | # See beginning of main() for how bin_vals are computed |
| 265 | bin_vals = [] |
| 266 | lower_bin_vals = [] # lower edge of each bin |
| 267 | upper_bin_vals = [] # upper edge of each bin |
| 268 | |
| 269 | def process_interval(ctx, iHist, iEnd, dir): |
| 270 | """ print estimated percentages for the given merged sample |
| 271 | """ |
| 272 | ss_cnt = 0 # number of samples affecting this interval |
| 273 | mn_bin_val, mx_bin_val = None, None |
| 274 | |
| 275 | # Update total number of samples affecting current interval histogram: |
| 276 | ss_cnt += np.sum(iHist) |
| 277 | |
| 278 | # Update min and max bin values |
| 279 | idxs = np.nonzero(iHist != 0)[0] |
| 280 | if idxs.size > 0: |
| 281 | mn_bin_val = bin_vals[idxs[0]] |
| 282 | mx_bin_val = bin_vals[idxs[-1]] |
| 283 | |
| 284 | if ss_cnt > 0: print_all_stats(ctx, iEnd, mn_bin_val, ss_cnt, bin_vals, iHist, mx_bin_val, dir=dir) |
| 285 | |
| 286 | |
| 287 | dir_map = ['r', 'w', 't'] # map of directional value in log to textual representation |
| 288 | def process_weighted_interval(ctx, samples, iStart, iEnd, printdirs): |
| 289 | """ Construct the weighted histogram for the given interval by scanning |
| 290 | through all the histograms and figuring out which of their bins have |
| 291 | samples with latencies which overlap with the given interval |
| 292 | [iStart,iEnd]. |
| 293 | """ |
| 294 | |
| 295 | times, files, dirs, sizes, hists = samples[:,0], samples[:,1], samples[:,2], samples[:,3], samples[:,4:] |
| 296 | iHist={}; ss_cnt = {}; mn_bin_val={}; mx_bin_val={} |
| 297 | for dir in printdirs: |
| 298 | iHist[dir] = np.zeros(__HIST_COLUMNS, dtype=float) |
| 299 | ss_cnt[dir] = 0 # number of samples affecting this interval |
| 300 | mn_bin_val[dir] = None |
| 301 | mx_bin_val[dir] = None |
| 302 | |
| 303 | for end_time,file,dir,hist in zip(times,files,dirs,hists): |
| 304 | |
| 305 | # Only look at bins of the current histogram sample which |
| 306 | # started before the end of the current time interval [start,end] |
| 307 | start_times = (end_time - 0.5 * ctx.interval) - bin_vals / ctx.time_divisor |
| 308 | idx = np.where(start_times < iEnd) |
| 309 | s_ts, l_bvs, u_bvs, hs = start_times[idx], lower_bin_vals[idx], upper_bin_vals[idx], hist[idx] |
| 310 | |
| 311 | # Increment current interval histogram by weighted values of future histogram |
| 312 | # total number of samples |
| 313 | # and min and max values as necessary |
| 314 | textdir = dir_map[dir] |
| 315 | ws = hs * weights(s_ts, end_time, iStart, iEnd) |
| 316 | mmidx = np.where(hs != 0)[0] |
| 317 | if 'm' in printdirs: |
| 318 | iHist['m'][idx] += ws |
| 319 | ss_cnt['m'] += np.sum(hs) |
| 320 | if mmidx.size > 0: |
| 321 | mn_bin_val['m'] = update_extreme(mn_bin_val['m'], min, l_bvs[max(0, mmidx[0] - 1)]) |
| 322 | mx_bin_val['m'] = update_extreme(mx_bin_val['m'], max, u_bvs[min(len(hs) - 1, mmidx[-1] + 1)]) |
| 323 | if textdir in printdirs: |
| 324 | iHist[textdir][idx] += ws |
| 325 | ss_cnt[textdir] += np.sum(hs) # Update total number of samples affecting current interval histogram: |
| 326 | if mmidx.size > 0: |
| 327 | mn_bin_val[textdir] = update_extreme(mn_bin_val[textdir], min, l_bvs[max(0, mmidx[0] - 1)]) |
| 328 | mx_bin_val[textdir] = update_extreme(mx_bin_val[textdir], max, u_bvs[min(len(hs) - 1, mmidx[-1] + 1)]) |
| 329 | |
| 330 | for textdir in sorted(printdirs): |
| 331 | if ss_cnt[textdir] > 0: print_all_stats(ctx, iEnd, mn_bin_val[textdir], ss_cnt[textdir], bin_vals, iHist[textdir], mx_bin_val[textdir], dir=textdir) |
| 332 | |
| 333 | def guess_max_from_bins(ctx, hist_cols): |
| 334 | """ Try to guess the GROUP_NR from given # of histogram |
| 335 | columns seen in an input file """ |
| 336 | max_coarse = 8 |
| 337 | if ctx.group_nr < 19 or ctx.group_nr > 26: |
| 338 | bins = [ctx.group_nr * (1 << 6)] |
| 339 | else: |
| 340 | bins = [1216,1280,1344,1408,1472,1536,1600,1664] |
| 341 | coarses = range(max_coarse + 1) |
| 342 | fncn = lambda z: list([z/2**x if z % 2**x == 0 else -10 for x in coarses]) |
| 343 | |
| 344 | arr = np.transpose(list(map(fncn, bins))) |
| 345 | idx = np.where(arr == hist_cols) |
| 346 | if len(idx[1]) == 0: |
| 347 | table = repr(arr.astype(int)).replace('-10', 'N/A').replace('array',' ') |
| 348 | errmsg = ("Unable to determine bin values from input clat_hist files. Namely \n" |
| 349 | "the first line of file '%s' " % ctx.FILE[0] + "has %d \n" % (__TOTAL_COLUMNS,) + |
| 350 | "columns of which we assume %d " % (hist_cols,) + "correspond to histogram bins. \n" |
| 351 | "This number needs to be equal to one of the following numbers:\n\n" |
| 352 | + table + "\n\n" |
| 353 | "Possible reasons and corresponding solutions:\n" |
| 354 | " - Input file(s) does not contain histograms.\n" |
| 355 | " - You recompiled fio with a different GROUP_NR. If so please specify this\n" |
| 356 | " new GROUP_NR on the command line with --group_nr\n") |
| 357 | if runascmd: |
| 358 | err(errmsg) |
| 359 | exit(1) |
| 360 | else: |
| 361 | raise RuntimeError(errmsg) |
| 362 | |
| 363 | return bins[idx[1][0]] |
| 364 | |
| 365 | def output_weighted_interval_data(ctx,printdirs): |
| 366 | |
| 367 | fps = [open(f, 'r') for f in ctx.FILE] |
| 368 | gen = histogram_generator(ctx, fps, ctx.buff_size) |
| 369 | |
| 370 | print(', '.join(columns)) |
| 371 | |
| 372 | try: |
| 373 | start, end = 0, ctx.interval |
| 374 | arr = np.empty(shape=(0,__TOTAL_COLUMNS + 1),dtype=int) |
| 375 | more_data = True |
| 376 | while more_data or len(arr) > 0: |
| 377 | |
| 378 | # Read up to ctx.max_latency (default 20 seconds) of data from end of current interval. |
| 379 | while len(arr) == 0 or arr[-1][0] < ctx.max_latency * 1000 + end: |
| 380 | try: |
| 381 | new_arr = next(gen) |
| 382 | except StopIteration: |
| 383 | more_data = False |
| 384 | break |
| 385 | nashape = new_arr.reshape((1,__TOTAL_COLUMNS + 1)) |
| 386 | arr = np.append(arr, nashape, axis=0) |
| 387 | #arr = arr.astype(int) |
| 388 | |
| 389 | if arr.size > 0: |
| 390 | # Jump immediately to the start of the input, rounding |
| 391 | # down to the nearest multiple of the interval (useful when --log_unix_epoch |
| 392 | # was used to create these histograms): |
| 393 | if start == 0 and arr[0][0] - ctx.max_latency > end: |
| 394 | start = arr[0][0] - ctx.max_latency |
| 395 | start = start - (start % ctx.interval) |
| 396 | end = start + ctx.interval |
| 397 | |
| 398 | process_weighted_interval(ctx, arr, start, end, printdirs) |
| 399 | |
| 400 | # Update arr to throw away samples we no longer need - samples which |
| 401 | # end before the start of the next interval, i.e. the end of the |
| 402 | # current interval: |
| 403 | idx = np.where(arr[:,0] > end) |
| 404 | arr = arr[idx] |
| 405 | |
| 406 | start += ctx.interval |
| 407 | end = start + ctx.interval |
| 408 | finally: |
| 409 | for fp in fps: |
| 410 | fp.close() |
| 411 | |
| 412 | def output_interval_data(ctx,directions): |
| 413 | fps = [HistFileRdr(f) for f in ctx.FILE] |
| 414 | |
| 415 | print(', '.join(columns)) |
| 416 | |
| 417 | start = 0 |
| 418 | end = ctx.interval |
| 419 | while True: |
| 420 | |
| 421 | more_data = False |
| 422 | |
| 423 | # add bins from all files in target intervals |
| 424 | arr = None |
| 425 | numSamples = 0 |
| 426 | while True: |
| 427 | foundSamples = False |
| 428 | for fp in fps: |
| 429 | ts = fp.curTS |
| 430 | if ts and ts+10 < end: # shift sample time when very close to an end time |
| 431 | curdirect = fp.curDir |
| 432 | numSamples += 1 |
| 433 | foundSamples = True |
| 434 | if arr is None: |
| 435 | arr = {} |
| 436 | for d in directions: |
| 437 | arr[d] = np.zeros(shape=(__HIST_COLUMNS), dtype=int) |
| 438 | if 'm' in arr: |
| 439 | arr['m'] = np.add(arr['m'], fp.curBins) |
| 440 | if 'r' in arr and curdirect == 0: |
| 441 | arr['r'] = np.add(arr['r'], fp.curBins) |
| 442 | if 'w' in arr and curdirect == 1: |
| 443 | arr['w'] = np.add(arr['w'], fp.curBins) |
| 444 | if 't' in arr and curdirect == 2: |
| 445 | arr['t'] = np.add(arr['t'], fp.curBins) |
| 446 | |
| 447 | more_data = True |
| 448 | fp.nextData() |
| 449 | elif ts: |
| 450 | more_data = True |
| 451 | |
| 452 | # reached end of all files |
| 453 | # or gone through all files without finding sample in interval |
| 454 | if not more_data or not foundSamples: |
| 455 | break |
| 456 | |
| 457 | if arr is not None: |
| 458 | #print("{} size({}) samples({}) nonzero({}):".format(end, arr.size, numSamples, np.count_nonzero(arr)), str(arr), ) |
| 459 | for d in sorted(arr.keys()): |
| 460 | aval = arr[d] |
| 461 | process_interval(ctx, aval, end, d) |
| 462 | |
| 463 | # reach end of all files |
| 464 | if not more_data: |
| 465 | break |
| 466 | |
| 467 | start += ctx.interval |
| 468 | end = start + ctx.interval |
| 469 | |
| 470 | def main(ctx): |
| 471 | |
| 472 | if ctx.job_file: |
| 473 | from configparser import SafeConfigParser, NoOptionError |
| 474 | |
| 475 | cp = SafeConfigParser(allow_no_value=True) |
| 476 | with open(ctx.job_file, 'r') as fp: |
| 477 | cp.readfp(fp) |
| 478 | |
| 479 | if ctx.interval is None: |
| 480 | # Auto detect --interval value |
| 481 | for s in cp.sections(): |
| 482 | try: |
| 483 | hist_msec = cp.get(s, 'log_hist_msec') |
| 484 | if hist_msec is not None: |
| 485 | ctx.interval = int(hist_msec) |
| 486 | except NoOptionError: |
| 487 | pass |
| 488 | |
| 489 | if not hasattr(ctx, 'percentiles'): |
| 490 | ctx.percentiles = "90,95,99" |
| 491 | |
| 492 | if ctx.directions: |
| 493 | ctx.directions = ctx.directions.lower() |
| 494 | |
| 495 | if ctx.interval is None: |
| 496 | ctx.interval = 1000 |
| 497 | |
| 498 | if ctx.usbin: |
| 499 | ctx.time_divisor = 1000.0 # bins are in us |
| 500 | else: |
| 501 | ctx.time_divisor = 1000000.0 # bins are in ns |
| 502 | |
| 503 | gen_output_columns(ctx) |
| 504 | |
| 505 | |
| 506 | # Automatically detect how many columns are in the input files, |
| 507 | # calculate the corresponding 'coarseness' parameter used to generate |
| 508 | # those files, and calculate the appropriate bin latency values: |
| 509 | with open(ctx.FILE[0], 'r') as fp: |
| 510 | global bin_vals,lower_bin_vals,upper_bin_vals,__HIST_COLUMNS,__TOTAL_COLUMNS |
| 511 | __TOTAL_COLUMNS = len(fp.readline().split(',')) |
| 512 | __HIST_COLUMNS = __TOTAL_COLUMNS - __NON_HIST_COLUMNS |
| 513 | |
| 514 | max_cols = guess_max_from_bins(ctx, __HIST_COLUMNS) |
| 515 | coarseness = int(np.log2(float(max_cols) / __HIST_COLUMNS)) |
| 516 | bin_vals = np.array([plat_idx_to_val_coarse(x, coarseness) for x in np.arange(__HIST_COLUMNS)], dtype=float) |
| 517 | lower_bin_vals = np.array([plat_idx_to_val_coarse(x, coarseness, 0.0) for x in np.arange(__HIST_COLUMNS)], dtype=float) |
| 518 | upper_bin_vals = np.array([plat_idx_to_val_coarse(x, coarseness, 1.0) for x in np.arange(__HIST_COLUMNS)], dtype=float) |
| 519 | |
| 520 | # indicate which directions to output (read(0), write(1), trim(2), mixed(3)) |
| 521 | directions = set() |
| 522 | if not ctx.directions or 'm' in ctx.directions: directions.add('m') |
| 523 | if ctx.directions and 'r' in ctx.directions: directions.add('r') |
| 524 | if ctx.directions and 'w' in ctx.directions: directions.add('w') |
| 525 | if ctx.directions and 't' in ctx.directions: directions.add('t') |
| 526 | |
| 527 | if ctx.noweight: |
| 528 | output_interval_data(ctx, directions) |
| 529 | else: |
| 530 | output_weighted_interval_data(ctx, directions) |
| 531 | |
| 532 | |
| 533 | if __name__ == '__main__': |
| 534 | import argparse |
| 535 | runascmd = True |
| 536 | p = argparse.ArgumentParser() |
| 537 | arg = p.add_argument |
| 538 | arg("FILE", help='space separated list of latency log filenames', nargs='+') |
| 539 | arg('--buff_size', |
| 540 | default=10000, |
| 541 | type=int, |
| 542 | help='number of samples to buffer into numpy at a time') |
| 543 | |
| 544 | arg('--max_latency', |
| 545 | default=20, |
| 546 | type=float, |
| 547 | help='number of seconds of data to process at a time') |
| 548 | |
| 549 | arg('-i', '--interval', |
| 550 | type=int, |
| 551 | help='interval width (ms), default 1000 ms') |
| 552 | |
| 553 | arg('--noweight', |
| 554 | action='store_true', |
| 555 | default=False, |
| 556 | help='do not perform weighting of samples between output intervals') |
| 557 | |
| 558 | arg('-d', '--divisor', |
| 559 | required=False, |
| 560 | type=int, |
| 561 | default=1, |
| 562 | help='divide the results by this value.') |
| 563 | |
| 564 | arg('--decimals', |
| 565 | default=3, |
| 566 | type=int, |
| 567 | help='number of decimal places to print floats to') |
| 568 | |
| 569 | arg('--warn', |
| 570 | dest='warn', |
| 571 | action='store_true', |
| 572 | default=False, |
| 573 | help='print warning messages to stderr') |
| 574 | |
| 575 | arg('--group_nr', |
| 576 | default=29, |
| 577 | type=int, |
| 578 | help='FIO_IO_U_PLAT_GROUP_NR as defined in stat.h') |
| 579 | |
| 580 | arg('--job-file', |
| 581 | default=None, |
| 582 | type=str, |
| 583 | help='Optional argument pointing to the job file used to create the ' |
| 584 | 'given histogram files. Useful for auto-detecting --log_hist_msec and ' |
| 585 | '--log_unix_epoch (in fio) values.') |
| 586 | |
| 587 | arg('--percentiles', |
| 588 | default="90:95:99", |
| 589 | type=str, |
| 590 | help='Optional argument of comma or colon separated percentiles to print. ' |
| 591 | 'The default is "90.0:95.0:99.0". min, median(50%%) and max percentiles are always printed') |
| 592 | |
| 593 | arg('--usbin', |
| 594 | default=False, |
| 595 | action='store_true', |
| 596 | help='histogram bin latencies are in us (fio versions < 2.99. fio uses ns for version >= 2.99') |
| 597 | |
| 598 | arg('--directions', |
| 599 | default=None, |
| 600 | type=str, |
| 601 | help='Optionally split results output by reads, writes, trims or mixed. ' |
| 602 | 'Value may be any combination of "rwtm" characters. ' |
| 603 | 'By default, only "mixed" results are output without a "dir" field. ' |
| 604 | 'But, specifying the --directions option ' |
| 605 | 'adds a "dir" field to the output content, and separate rows for each of the indicated ' |
| 606 | 'directions.') |
| 607 | |
| 608 | main(p.parse_args()) |
| 609 | |