tools/hist/fiologparser_hist.py

   1 #!/usr/bin/env python2.7
   2 """
   3     Utility for converting *_clat_hist* files generated by fio into latency statistics.
   4
   5     Example usage:
   6
   7             $ fiologparser_hist.py *_clat_hist*
   8             end-time, samples, min, avg, median, 90%, 95%, 99%, max
   9             1000, 15, 192, 1678.107, 1788.859, 1856.076, 1880.040, 1899.208, 1888.000
  10             2000, 43, 152, 1642.368, 1714.099, 1816.659, 1845.552, 1888.131, 1888.000
  11             4000, 39, 1152, 1546.962, 1545.785, 1627.192, 1640.019, 1691.204, 1744
  12             ...
  13
  14     @author Karl Cronburg <karl.cronburg@gmail.com>
  15 """
  16 import os
  17 import sys
  18 import pandas
  19 import numpy as np
  20
  21 err = sys.stderr.write
  22
  23 def weighted_percentile(percs, vs, ws):
  24     """ Use linear interpolation to calculate the weighted percentile.
  25
  26         Value and weight arrays are first sorted by value. The cumulative
  27         distribution function (cdf) is then computed, after which np.interp
  28         finds the two values closest to our desired weighted percentile(s)
  29         and linearly interpolates them.
  30
  31         percs  :: List of percentiles we want to calculate
  32         vs     :: Array of values we are computing the percentile of
  33         ws     :: Array of weights for our corresponding values
  34         return :: Array of percentiles
  35     """
  36     idx = np.argsort(vs)
  37     vs, ws = vs[idx], ws[idx] # weights and values sorted by value
  38     cdf = 100 * (ws.cumsum() - ws / 2.0) / ws.sum()
  39     return np.interp(percs, cdf, vs) # linear interpolation
  40
  41 def weights(start_ts, end_ts, start, end):
  42     """ Calculate weights based on fraction of sample falling in the
  43         given interval [start,end]. Weights computed using vector / array
  44         computation instead of for-loops.
  45
  46         Note that samples with zero time length are effectively ignored
  47         (we set their weight to zero).
  48
  49         start_ts :: Array of start times for a set of samples
  50         end_ts   :: Array of end times for a set of samples
  51         start    :: int
  52         end      :: int
  53         return   :: Array of weights
  54     """
  55     sbounds = np.maximum(start_ts, start).astype(float)
  56     ebounds = np.minimum(end_ts,   end).astype(float)
  57     ws = (ebounds - sbounds) / (end_ts - start_ts)
  58     if np.any(np.isnan(ws)):
  59       err("WARNING: zero-length sample(s) detected. Log file corrupt"
  60           " / bad time values? Ignoring these samples.\n")
  61     ws[np.where(np.isnan(ws))] = 0.0;
  62     return ws
  63
  64 def weighted_average(vs, ws):
  65     return np.sum(vs * ws) / np.sum(ws)
  66
  67 columns = ["end-time", "samples", "min", "avg", "median", "90%", "95%", "99%", "max"]
  68 percs   = [50, 90, 95, 99]
  69
  70 def fmt_float_list(ctx, num=1):
  71   """ Return a comma separated list of float formatters to the required number
  72       of decimal places. For instance:
  73
  74         fmt_float_list(ctx.decimals=4, num=3) == "%.4f, %.4f, %.4f"
  75   """
  76   return ', '.join(["%%.%df" % ctx.decimals] * num)
  77
  78 # Default values - see beginning of main() for how we detect number columns in
  79 # the input files:
  80 __HIST_COLUMNS = 1216
  81 __NON_HIST_COLUMNS = 3
  82 __TOTAL_COLUMNS = __HIST_COLUMNS + __NON_HIST_COLUMNS
  83
  84 def read_chunk(rdr, sz):
  85     """ Read the next chunk of size sz from the given reader. """
  86     try:
  87         """ StopIteration occurs when the pandas reader is empty, and AttributeError
  88             occurs if rdr is None due to the file being empty. """
  89         new_arr = rdr.read().values
  90     except (StopIteration, AttributeError):
  91         return None
  92
  93     """ Extract array of just the times, and histograms matrix without times column. """
  94     times, rws, szs = new_arr[:,0], new_arr[:,1], new_arr[:,2]
  95     hists = new_arr[:,__NON_HIST_COLUMNS:]
  96     times = times.reshape((len(times),1))
  97     arr = np.append(times, hists, axis=1)
  98
  99     return arr
 100
 101 def get_min(fps, arrs):
 102     """ Find the file with the current first row with the smallest start time """
 103     return min([fp for fp in fps if not arrs[fp] is None], key=lambda fp: arrs.get(fp)[0][0])
 104
 105 def histogram_generator(ctx, fps, sz):
 106
 107     # Create a chunked pandas reader for each of the files:
 108     rdrs = {}
 109     for fp in fps:
 110         try:
 111             rdrs[fp] = pandas.read_csv(fp, dtype=int, header=None, chunksize=sz)
 112         except ValueError as e:
 113             if e.message == 'No columns to parse from file':
 114                 if ctx.warn: sys.stderr.write("WARNING: Empty input file encountered.\n")
 115                 rdrs[fp] = None
 116             else:
 117                 raise(e)
 118
 119     # Initial histograms from disk:
 120     arrs = {fp: read_chunk(rdr, sz) for fp,rdr in rdrs.items()}
 121     while True:
 122
 123         try:
 124             """ ValueError occurs when nothing more to read """
 125             fp = get_min(fps, arrs)
 126         except ValueError:
 127             return
 128         arr = arrs[fp]
 129         yield np.insert(arr[0], 1, fps.index(fp))
 130         arrs[fp] = arr[1:]
 131
 132         if arrs[fp].shape[0] == 0:
 133             arrs[fp] = read_chunk(rdrs[fp], sz)
 134
 135 def _plat_idx_to_val(idx, edge=0.5, FIO_IO_U_PLAT_BITS=6, FIO_IO_U_PLAT_VAL=64):
 136     """ Taken from fio's stat.c for calculating the latency value of a bin
 137         from that bin's index.
 138
 139             idx  : the value of the index into the histogram bins
 140             edge : fractional value in the range [0,1]** indicating how far into
 141             the bin we wish to compute the latency value of.
 142
 143         ** edge = 0.0 and 1.0 computes the lower and upper latency bounds
 144            respectively of the given bin index. """
 145
 146     # MSB <= (FIO_IO_U_PLAT_BITS-1), cannot be rounded off. Use
 147     # all bits of the sample as index
 148     if (idx < (FIO_IO_U_PLAT_VAL << 1)):
 149         return idx
 150
 151     # Find the group and compute the minimum value of that group
 152     error_bits = (idx >> FIO_IO_U_PLAT_BITS) - 1
 153     base = 1 << (error_bits + FIO_IO_U_PLAT_BITS)
 154
 155     # Find its bucket number of the group
 156     k = idx % FIO_IO_U_PLAT_VAL
 157
 158     # Return the mean (if edge=0.5) of the range of the bucket
 159     return base + ((k + edge) * (1 << error_bits))
 160
 161 def plat_idx_to_val_coarse(idx, coarseness, edge=0.5):
 162     """ Converts the given *coarse* index into a non-coarse index as used by fio
 163         in stat.h:plat_idx_to_val(), subsequently computing the appropriate
 164         latency value for that bin.
 165         """
 166
 167     # Multiply the index by the power of 2 coarseness to get the bin
 168     # bin index with a max of 1536 bins (FIO_IO_U_PLAT_GROUP_NR = 24 in stat.h)
 169     stride = 1 << coarseness
 170     idx = idx * stride
 171     lower = _plat_idx_to_val(idx, edge=0.0)
 172     upper = _plat_idx_to_val(idx + stride, edge=1.0)
 173     return lower + (upper - lower) * edge
 174
 175 def print_all_stats(ctx, end, mn, ss_cnt, vs, ws, mx):
 176     ps = weighted_percentile(percs, vs, ws)
 177
 178     avg = weighted_average(vs, ws)
 179     values = [mn, avg] + list(ps) + [mx]
 180     row = [end, ss_cnt] + map(lambda x: float(x) / ctx.divisor, values)
 181     fmt = "%d, %d, %d, " + fmt_float_list(ctx, 5) + ", %d"
 182     print (fmt % tuple(row))
 183
 184 def update_extreme(val, fncn, new_val):
 185     """ Calculate min / max in the presence of None values """
 186     if val is None: return new_val
 187     else: return fncn(val, new_val)
 188
 189 # See beginning of main() for how bin_vals are computed
 190 bin_vals = []
 191 lower_bin_vals = [] # lower edge of each bin
 192 upper_bin_vals = [] # upper edge of each bin
 193
 194 def process_interval(ctx, samples, iStart, iEnd):
 195     """ Construct the weighted histogram for the given interval by scanning
 196         through all the histograms and figuring out which of their bins have
 197         samples with latencies which overlap with the given interval
 198         [iStart,iEnd].
 199     """
 200
 201     times, files, hists = samples[:,0], samples[:,1], samples[:,2:]
 202     iHist = np.zeros(__HIST_COLUMNS)
 203     ss_cnt = 0 # number of samples affecting this interval
 204     mn_bin_val, mx_bin_val = None, None
 205
 206     for end_time,file,hist in zip(times,files,hists):
 207
 208         # Only look at bins of the current histogram sample which
 209         # started before the end of the current time interval [start,end]
 210         start_times = (end_time - 0.5 * ctx.interval) - bin_vals / 1000.0
 211         idx = np.where(start_times < iEnd)
 212         s_ts, l_bvs, u_bvs, hs = start_times[idx], lower_bin_vals[idx], upper_bin_vals[idx], hist[idx]
 213
 214         # Increment current interval histogram by weighted values of future histogram:
 215         ws = hs * weights(s_ts, end_time, iStart, iEnd)
 216         iHist[idx] += ws
 217
 218         # Update total number of samples affecting current interval histogram:
 219         ss_cnt += np.sum(hs)
 220
 221         # Update min and max bin values seen if necessary:
 222         idx = np.where(hs != 0)[0]
 223         if idx.size > 0:
 224             mn_bin_val = update_extreme(mn_bin_val, min, l_bvs[max(0,           idx[0]  - 1)])
 225             mx_bin_val = update_extreme(mx_bin_val, max, u_bvs[min(len(hs) - 1, idx[-1] + 1)])
 226
 227     if ss_cnt > 0: print_all_stats(ctx, iEnd, mn_bin_val, ss_cnt, bin_vals, iHist, mx_bin_val)
 228
 229 def guess_max_from_bins(ctx, hist_cols):
 230     """ Try to guess the GROUP_NR from given # of histogram
 231         columns seen in an input file """
 232     max_coarse = 8
 233     if ctx.group_nr < 19 or ctx.group_nr > 26:
 234         bins = [ctx.group_nr * (1 << 6)]
 235     else:
 236         bins = [1216,1280,1344,1408,1472,1536,1600,1664]
 237     coarses = range(max_coarse + 1)
 238     fncn = lambda z: list(map(lambda x: z/2**x if z % 2**x == 0 else -10, coarses))
 239
 240     arr = np.transpose(list(map(fncn, bins)))
 241     idx = np.where(arr == hist_cols)
 242     if len(idx[1]) == 0:
 243         table = repr(arr.astype(int)).replace('-10', 'N/A').replace('array','     ')
 244         err("Unable to determine bin values from input clat_hist files. Namely \n"
 245             "the first line of file '%s' " % ctx.FILE[0] + "has %d \n" % (__TOTAL_COLUMNS,) +
 246             "columns of which we assume %d " % (hist_cols,) + "correspond to histogram bins. \n"
 247             "This number needs to be equal to one of the following numbers:\n\n"
 248             + table + "\n\n"
 249             "Possible reasons and corresponding solutions:\n"
 250             "  - Input file(s) does not contain histograms.\n"
 251             "  - You recompiled fio with a different GROUP_NR. If so please specify this\n"
 252             "    new GROUP_NR on the command line with --group_nr\n")
 253         exit(1)
 254     return bins[idx[1][0]]
 255
 256 def main(ctx):
 257
 258     # Automatically detect how many columns are in the input files,
 259     # calculate the corresponding 'coarseness' parameter used to generate
 260     # those files, and calculate the appropriate bin latency values:
 261     with open(ctx.FILE[0], 'r') as fp:
 262         global bin_vals,lower_bin_vals,upper_bin_vals,__HIST_COLUMNS,__TOTAL_COLUMNS
 263         __TOTAL_COLUMNS = len(fp.readline().split(','))
 264         __HIST_COLUMNS = __TOTAL_COLUMNS - __NON_HIST_COLUMNS
 265
 266         max_cols = guess_max_from_bins(ctx, __HIST_COLUMNS)
 267         coarseness = int(np.log2(float(max_cols) / __HIST_COLUMNS))
 268         bin_vals = np.array(map(lambda x: plat_idx_to_val_coarse(x, coarseness), np.arange(__HIST_COLUMNS)), dtype=float)
 269         lower_bin_vals = np.array(map(lambda x: plat_idx_to_val_coarse(x, coarseness, 0.0), np.arange(__HIST_COLUMNS)), dtype=float)
 270         upper_bin_vals = np.array(map(lambda x: plat_idx_to_val_coarse(x, coarseness, 1.0), np.arange(__HIST_COLUMNS)), dtype=float)
 271
 272     fps = [open(f, 'r') for f in ctx.FILE]
 273     gen = histogram_generator(ctx, fps, ctx.buff_size)
 274
 275     print(', '.join(columns))
 276
 277     try:
 278         start, end = 0, ctx.interval
 279         arr = np.empty(shape=(0,__TOTAL_COLUMNS - 1))
 280         more_data = True
 281         while more_data or len(arr) > 0:
 282
 283             # Read up to ctx.max_latency (default 20 seconds) of data from end of current interval.
 284             while len(arr) == 0 or arr[-1][0] < ctx.max_latency * 1000 + end:
 285                 try:
 286                     new_arr = next(gen)
 287                 except StopIteration:
 288                     more_data = False
 289                     break
 290                 arr = np.append(arr, new_arr.reshape((1,__TOTAL_COLUMNS - 1)), axis=0)
 291             arr = arr.astype(int)
 292
 293             if arr.size > 0:
 294                 process_interval(ctx, arr, start, end)
 295
 296                 # Update arr to throw away samples we no longer need - samples which
 297                 # end before the start of the next interval, i.e. the end of the
 298                 # current interval:
 299                 idx = np.where(arr[:,0] > end)
 300                 arr = arr[idx]
 301
 302             start += ctx.interval
 303             end = start + ctx.interval
 304     finally:
 305         map(lambda f: f.close(), fps)
 306
 307
 308 if __name__ == '__main__':
 309     import argparse
 310     p = argparse.ArgumentParser()
 311     arg = p.add_argument
 312     arg("FILE", help='space separated list of latency log filenames', nargs='+')
 313     arg('--buff_size',
 314         default=10000,
 315         type=int,
 316         help='number of samples to buffer into numpy at a time')
 317
 318     arg('--max_latency',
 319         default=20,
 320         type=float,
 321         help='number of seconds of data to process at a time')
 322
 323     arg('-i', '--interval',
 324         default=1000,
 325         type=int,
 326         help='interval width (ms)')
 327
 328     arg('-d', '--divisor',
 329         required=False,
 330         type=int,
 331         default=1,
 332         help='divide the results by this value.')
 333
 334     arg('--decimals',
 335         default=3,
 336         type=int,
 337         help='number of decimal places to print floats to')
 338
 339     arg('--warn',
 340         dest='warn',
 341         action='store_true',
 342         default=False,
 343         help='print warning messages to stderr')
 344
 345     arg('--group_nr',
 346         default=19,
 347         type=int,
 348         help='FIO_IO_U_PLAT_GROUP_NR as defined in stat.h')
 349
 350     main(p.parse_args())
 351