--- /dev/null
+#!/usr/bin/env python2.7
+"""
+ Utility for converting *_clat_hist* files generated by fio into latency statistics.
+
+ Example usage:
+
+ $ fiologparser_hist.py *_clat_hist*
+ end-time, samples, min, avg, median, 90%, 95%, 99%, max
+ 1000, 15, 192, 1678.107, 1788.859, 1856.076, 1880.040, 1899.208, 1888.000
+ 2000, 43, 152, 1642.368, 1714.099, 1816.659, 1845.552, 1888.131, 1888.000
+ 4000, 39, 1152, 1546.962, 1545.785, 1627.192, 1640.019, 1691.204, 1744
+ ...
+
+ Notes:
+
+ * end-times are calculated to be uniform increments of the --interval value given,
+ regardless of when histogram samples are reported. Of note:
+
+ * Intervals with no samples are omitted. In the example above this means
+ "no statistics from 2 to 3 seconds" and "39 samples influenced the statistics
+ of the interval from 3 to 4 seconds".
+
+ * Intervals with a single sample will have the same value for all statistics
+
+ * The number of samples is unweighted, corresponding to the total number of samples
+ which have any effect whatsoever on the interval.
+
+ * Min statistics are computed using value of the lower boundary of the first bin
+ (in increasing bin order) with non-zero samples in it. Similarly for max,
+ we take the upper boundary of the last bin with non-zero samples in it.
+ This is semantically identical to taking the 0th and 100th percentiles with a
+ 50% bin-width buffer (because percentiles are computed using mid-points of
+ the bins). This enforces the following nice properties:
+
+ * min <= 50th <= 90th <= 95th <= 99th <= max
+
+ * min and max are strict lower and upper bounds on the actual
+ min / max seen by fio (and reported in *_clat.* with averaging turned off).
+
+ * Average statistics use a standard weighted arithmetic mean.
+
+ * Percentile statistics are computed using the weighted percentile method as
+ described here: https://en.wikipedia.org/wiki/Percentile#Weighted_percentile
+ See weights() method for details on how weights are computed for individual
+ samples. In process_interval() we further multiply by the height of each bin
+ to get weighted histograms.
+
+ * We convert files given on the command line, assumed to be fio histogram files,
+ on-the-fly into their corresponding differenced files i.e. non-cumulative histograms
+ because fio outputs cumulative histograms, but we want histograms corresponding
+ to individual time intervals. An individual histogram file can contain the cumulative
+ histograms for multiple different r/w directions (notably when --rw=randrw). This
+ is accounted for by tracking each r/w direction separately. In the statistics
+ reported we ultimately merge *all* histograms (regardless of r/w direction).
+
+ * The value of *_GROUP_NR in stat.h (and *_BITS) determines how many latency bins
+ fio outputs when histogramming is enabled. Namely for the current default of
+ GROUP_NR=19, we get 1,216 bins with a maximum latency of approximately 17
+ seconds. For certain applications this may not be sufficient. With GROUP_NR=24
+ we have 1,536 bins, giving us a maximum latency of 541 seconds (~ 9 minutes). If
+ you expect your application to experience latencies greater than 17 seconds,
+ you will need to recompile fio with a larger GROUP_NR, e.g. with:
+
+ sed -i.bak 's/^#define FIO_IO_U_PLAT_GROUP_NR 19\n/#define FIO_IO_U_PLAT_GROUP_NR 24/g' stat.h
+ make fio
+
+ Quick reference table for the max latency corresponding to a sampling of
+ values for GROUP_NR:
+
+ GROUP_NR | # bins | max latency bin value
+ 19 | 1216 | 16.9 sec
+ 20 | 1280 | 33.8 sec
+ 21 | 1344 | 67.6 sec
+ 22 | 1408 | 2 min, 15 sec
+ 23 | 1472 | 4 min, 32 sec
+ 24 | 1536 | 9 min, 4 sec
+ 25 | 1600 | 18 min, 8 sec
+ 26 | 1664 | 36 min, 16 sec
+
+ * At present this program automatically detects the number of histogram bins in
+ the log files, and adjusts the bin latency values accordingly. In particular if
+ you use the --log_hist_coarseness parameter of fio, you get output files with
+ a number of bins according to the following table (note that the first
+ row is identical to the table above):
+
+ coarse \ GROUP_NR
+ 19 20 21 22 23 24 25 26
+ -------------------------------------------------------
+ 0 [[ 1216, 1280, 1344, 1408, 1472, 1536, 1600, 1664],
+ 1 [ 608, 640, 672, 704, 736, 768, 800, 832],
+ 2 [ 304, 320, 336, 352, 368, 384, 400, 416],
+ 3 [ 152, 160, 168, 176, 184, 192, 200, 208],
+ 4 [ 76, 80, 84, 88, 92, 96, 100, 104],
+ 5 [ 38, 40, 42, 44, 46, 48, 50, 52],
+ 6 [ 19, 20, 21, 22, 23, 24, 25, 26],
+ 7 [ N/A, 10, N/A, 11, N/A, 12, N/A, 13],
+ 8 [ N/A, 5, N/A, N/A, N/A, 6, N/A, N/A]]
+
+ For other values of GROUP_NR and coarseness, this table can be computed like this:
+
+ bins = [1216,1280,1344,1408,1472,1536,1600,1664]
+ max_coarse = 8
+ fncn = lambda z: list(map(lambda x: z/2**x if z % 2**x == 0 else nan, range(max_coarse + 1)))
+ np.transpose(list(map(fncn, bins)))
+
+ Also note that you can achieve the same downsampling / log file size reduction
+ by pre-processing (before inputting into this script) with half_bins.py.
+
+ * If you have not adjusted GROUP_NR for your (high latency) application, then you
+ will see the percentiles computed by this tool max out at the max latency bin
+ value as in the first table above, and in this plot (where GROUP_NR=19 and thus we see
+ a max latency of ~16.7 seconds in the red line):
+
+ https://www.cronburg.com/fio/max_latency_bin_value_bug.png
+
+ * Motivation for, design decisions, and the implementation process are
+ described in further detail here:
+
+ https://www.cronburg.com/fio/cloud-latency-problem-measurement/
+
+ @author Karl Cronburg <karl.cronburg@gmail.com>
+"""
+import os
+import sys
+import pandas
+import numpy as np
+
+err = sys.stderr.write
+
+def weighted_percentile(percs, vs, ws):
+ """ Use linear interpolation to calculate the weighted percentile.
+
+ Value and weight arrays are first sorted by value. The cumulative
+ distribution function (cdf) is then computed, after which np.interp
+ finds the two values closest to our desired weighted percentile(s)
+ and linearly interpolates them.
+
+ percs :: List of percentiles we want to calculate
+ vs :: Array of values we are computing the percentile of
+ ws :: Array of weights for our corresponding values
+ return :: Array of percentiles
+ """
+ idx = np.argsort(vs)
+ vs, ws = vs[idx], ws[idx] # weights and values sorted by value
+ cdf = 100 * (ws.cumsum() - ws / 2.0) / ws.sum()
+ return np.interp(percs, cdf, vs) # linear interpolation
+
+def weights(start_ts, end_ts, start, end):
+ """ Calculate weights based on fraction of sample falling in the
+ given interval [start,end]. Weights computed using vector / array
+ computation instead of for-loops.
+
+ Note that samples with zero time length are effectively ignored
+ (we set their weight to zero).
+
+ start_ts :: Array of start times for a set of samples
+ end_ts :: Array of end times for a set of samples
+ start :: int
+ end :: int
+ return :: Array of weights
+ """
+ sbounds = np.maximum(start_ts, start).astype(float)
+ ebounds = np.minimum(end_ts, end).astype(float)
+ ws = (ebounds - sbounds) / (end_ts - start_ts)
+ if np.any(np.isnan(ws)):
+ err("WARNING: zero-length sample(s) detected. Log file corrupt"
+ " / bad time values? Ignoring these samples.\n")
+ ws[np.where(np.isnan(ws))] = 0.0;
+ return ws
+
+def weighted_average(vs, ws):
+ return np.sum(vs * ws) / np.sum(ws)
+
+columns = ["end-time", "samples", "min", "avg", "median", "90%", "95%", "99%", "max"]
+percs = [50, 90, 95, 99]
+
+def fmt_float_list(ctx, num=1):
+ """ Return a comma separated list of float formatters to the required number
+ of decimal places. For instance:
+
+ fmt_float_list(ctx.decimals=4, num=3) == "%.4f, %.4f, %.4f"
+ """
+ return ', '.join(["%%.%df" % ctx.decimals] * num)
+
+# Default values - see beginning of main() for how we detect number columns in
+# the input files:
+__HIST_COLUMNS = 1216
+__NON_HIST_COLUMNS = 3
+__TOTAL_COLUMNS = __HIST_COLUMNS + __NON_HIST_COLUMNS
+
+def sequential_diffs(head_row, times, rws, hists):
+ """ Take the difference of sequential (in time) histograms with the same
+ r/w direction, returning a new array of differenced histograms. """
+ result = np.empty(shape=(0, __HIST_COLUMNS))
+ result_times = np.empty(shape=(1, 0))
+ for i in range(8):
+ idx = np.where(rws == i)
+ diff = np.diff(np.append(head_row[i], hists[idx], axis=0), axis=0).astype(int)
+ result = np.append(diff, result, axis=0)
+ result_times = np.append(times[idx], result_times)
+ idx = np.argsort(result_times)
+ return result[idx]
+
+def read_chunk(head_row, rdr, sz):
+ """ Read the next chunk of size sz from the given reader, computing the
+ differences across neighboring histogram samples.
+ """
+ try:
+ """ StopIteration occurs when the pandas reader is empty, and AttributeError
+ occurs if rdr is None due to the file being empty. """
+ new_arr = rdr.read().values
+ except (StopIteration, AttributeError):
+ return None
+
+ """ Extract array of just the times, and histograms matrix without times column.
+ Then, take the sequential difference of each of the rows in the histogram
+ matrix. This is necessary because fio outputs *cumulative* histograms as
+ opposed to histograms with counts just for a particular interval. """
+ times, rws, szs = new_arr[:,0], new_arr[:,1], new_arr[:,2]
+ hists = new_arr[:,__NON_HIST_COLUMNS:]
+ hists_diff = sequential_diffs(head_row, times, rws, hists)
+ times = times.reshape((len(times),1))
+ arr = np.append(times, hists_diff, axis=1)
+
+ """ hists[-1] will be the row we need to start our differencing with the
+ next time we call read_chunk() on the same rdr """
+ return arr, hists[-1]
+
+def get_min(fps, arrs):
+ """ Find the file with the current first row with the smallest start time """
+ return min([fp for fp in fps if not arrs[fp] is None], key=lambda fp: arrs.get(fp)[0][0][0])
+
+def histogram_generator(ctx, fps, sz):
+
+ """ head_row for a particular file keeps track of the last (cumulative)
+ histogram we read so that we have a reference point to subtract off
+ when computing sequential differences. """
+ head_row = np.zeros(shape=(1, __HIST_COLUMNS))
+ head_rows = {fp: {i: head_row for i in range(8)} for fp in fps}
+
+ # Create a chunked pandas reader for each of the files:
+ rdrs = {}
+ for fp in fps:
+ try:
+ rdrs[fp] = pandas.read_csv(fp, dtype=int, header=None, chunksize=sz)
+ except ValueError as e:
+ if e.message == 'No columns to parse from file':
+ if not ctx.nowarn: sys.stderr.write("WARNING: Empty input file encountered.\n")
+ rdrs[fp] = None
+ else:
+ raise(e)
+
+ # Initial histograms and corresponding head_rows:
+ arrs = {fp: read_chunk(head_rows[fp], rdr, sz) for fp,rdr in rdrs.items()}
+ while True:
+
+ try:
+ """ ValueError occurs when nothing more to read """
+ fp = get_min(fps, arrs)
+ except ValueError:
+ return
+ arr, head_row = arrs[fp]
+ yield np.insert(arr[0], 1, fps.index(fp))
+ arrs[fp] = arr[1:], head_row
+ head_rows[fp] = head_row
+
+ if arrs[fp][0].shape[0] == 0:
+ arrs[fp] = read_chunk(head_rows[fp], rdrs[fp], sz)
+
+def _plat_idx_to_val(idx, edge=0.5, FIO_IO_U_PLAT_BITS=6, FIO_IO_U_PLAT_VAL=64):
+ """ Taken from fio's stat.c for calculating the latency value of a bin
+ from that bin's index.
+
+ idx : the value of the index into the histogram bins
+ edge : fractional value in the range [0,1]** indicating how far into
+ the bin we wish to compute the latency value of.
+
+ ** edge = 0.0 and 1.0 computes the lower and upper latency bounds
+ respectively of the given bin index. """
+
+ # MSB <= (FIO_IO_U_PLAT_BITS-1), cannot be rounded off. Use
+ # all bits of the sample as index
+ if (idx < (FIO_IO_U_PLAT_VAL << 1)):
+ return idx
+
+ # Find the group and compute the minimum value of that group
+ error_bits = (idx >> FIO_IO_U_PLAT_BITS) - 1
+ base = 1 << (error_bits + FIO_IO_U_PLAT_BITS)
+
+ # Find its bucket number of the group
+ k = idx % FIO_IO_U_PLAT_VAL
+
+ # Return the mean (if edge=0.5) of the range of the bucket
+ return base + ((k + edge) * (1 << error_bits))
+
+def plat_idx_to_val_coarse(idx, coarseness, edge=0.5):
+ """ Converts the given *coarse* index into a non-coarse index as used by fio
+ in stat.h:plat_idx_to_val(), subsequently computing the appropriate
+ latency value for that bin.
+ """
+
+ # Multiply the index by the power of 2 coarseness to get the bin
+ # bin index with a max of 1536 bins (FIO_IO_U_PLAT_GROUP_NR = 24 in stat.h)
+ stride = 1 << coarseness
+ idx = idx * stride
+ lower = _plat_idx_to_val(idx, edge=0.0)
+ upper = _plat_idx_to_val(idx + stride, edge=1.0)
+ return lower + (upper - lower) * edge
+
+def print_all_stats(ctx, end, mn, ss_cnt, vs, ws, mx):
+ ps = weighted_percentile(percs, vs, ws)
+
+ avg = weighted_average(vs, ws)
+ values = [mn, avg] + list(ps) + [mx]
+ row = [end, ss_cnt] + map(lambda x: float(x) / ctx.divisor, values)
+ fmt = "%d, %d, %d, " + fmt_float_list(ctx, 5) + ", %d"
+ print (fmt % tuple(row))
+
+def update_extreme(val, fncn, new_val):
+ """ Calculate min / max in the presence of None values """
+ if val is None: return new_val
+ else: return fncn(val, new_val)
+
+# See beginning of main() for how bin_vals are computed
+bin_vals = []
+lower_bin_vals = [] # lower edge of each bin
+upper_bin_vals = [] # upper edge of each bin
+
+def process_interval(ctx, samples, iStart, iEnd):
+ """ Construct the weighted histogram for the given interval by scanning
+ through all the histograms and figuring out which of their bins have
+ samples with latencies which overlap with the given interval
+ [iStart,iEnd].
+ """
+
+ times, files, hists = samples[:,0], samples[:,1], samples[:,2:]
+ iHist = np.zeros(__HIST_COLUMNS)
+ ss_cnt = 0 # number of samples affecting this interval
+ mn_bin_val, mx_bin_val = None, None
+
+ for end_time,file,hist in zip(times,files,hists):
+
+ # Only look at bins of the current histogram sample which
+ # started before the end of the current time interval [start,end]
+ start_times = (end_time - 0.5 * ctx.interval) - bin_vals / 1000.0
+ idx = np.where(start_times < iEnd)
+ s_ts, l_bvs, u_bvs, hs = start_times[idx], lower_bin_vals[idx], upper_bin_vals[idx], hist[idx]
+
+ # Increment current interval histogram by weighted values of future histogram:
+ ws = hs * weights(s_ts, end_time, iStart, iEnd)
+ iHist[idx] += ws
+
+ # Update total number of samples affecting current interval histogram:
+ ss_cnt += np.sum(hs)
+
+ # Update min and max bin values seen if necessary:
+ idx = np.where(hs != 0)[0]
+ if idx.size > 0:
+ mn_bin_val = update_extreme(mn_bin_val, min, l_bvs[max(0, idx[0] - 1)])
+ mx_bin_val = update_extreme(mx_bin_val, max, u_bvs[min(len(hs) - 1, idx[-1] + 1)])
+
+ if ss_cnt > 0: print_all_stats(ctx, iEnd, mn_bin_val, ss_cnt, bin_vals, iHist, mx_bin_val)
+
+def guess_max_from_bins(ctx, hist_cols):
+ """ Try to guess the GROUP_NR from given # of histogram
+ columns seen in an input file """
+ max_coarse = 8
+ if ctx.group_nr < 19 or ctx.group_nr > 26:
+ bins = [ctx.group_nr * (1 << 6)]
+ else:
+ bins = [1216,1280,1344,1408,1472,1536,1600,1664]
+ coarses = range(max_coarse + 1)
+ fncn = lambda z: list(map(lambda x: z/2**x if z % 2**x == 0 else -10, coarses))
+
+ arr = np.transpose(list(map(fncn, bins)))
+ idx = np.where(arr == hist_cols)
+ if len(idx[1]) == 0:
+ table = repr(arr.astype(int)).replace('-10', 'N/A').replace('array',' ')
+ err("Unable to determine bin values from input clat_hist files. Namely \n"
+ "the first line of file '%s' " % ctx.FILE[0] + "has %d \n" % (__TOTAL_COLUMNS,) +
+ "columns of which we assume %d " % (hist_cols,) + "correspond to histogram bins. \n"
+ "This number needs to be equal to one of the following numbers:\n\n"
+ + table + "\n\n"
+ "Possible reasons and corresponding solutions:\n"
+ " - Input file(s) does not contain histograms.\n"
+ " - You recompiled fio with a different GROUP_NR. If so please specify this\n"
+ " new GROUP_NR on the command line with --group_nr\n")
+ exit(1)
+ return bins[idx[1][0]]
+
+def main(ctx):
+
+ # Automatically detect how many columns are in the input files,
+ # calculate the corresponding 'coarseness' parameter used to generate
+ # those files, and calculate the appropriate bin latency values:
+ with open(ctx.FILE[0], 'r') as fp:
+ global bin_vals,lower_bin_vals,upper_bin_vals,__HIST_COLUMNS,__TOTAL_COLUMNS
+ __TOTAL_COLUMNS = len(fp.readline().split(','))
+ __HIST_COLUMNS = __TOTAL_COLUMNS - __NON_HIST_COLUMNS
+
+ max_cols = guess_max_from_bins(ctx, __HIST_COLUMNS)
+ coarseness = int(np.log2(float(max_cols) / __HIST_COLUMNS))
+ bin_vals = np.array(map(lambda x: plat_idx_to_val_coarse(x, coarseness), np.arange(__HIST_COLUMNS)), dtype=float)
+ lower_bin_vals = np.array(map(lambda x: plat_idx_to_val_coarse(x, coarseness, 0.0), np.arange(__HIST_COLUMNS)), dtype=float)
+ upper_bin_vals = np.array(map(lambda x: plat_idx_to_val_coarse(x, coarseness, 1.0), np.arange(__HIST_COLUMNS)), dtype=float)
+
+ fps = [open(f, 'r') for f in ctx.FILE]
+ gen = histogram_generator(ctx, fps, ctx.buff_size)
+
+ print(', '.join(columns))
+
+ try:
+ start, end = 0, ctx.interval
+ arr = np.empty(shape=(0,__TOTAL_COLUMNS - 1))
+ more_data = True
+ while more_data or len(arr) > 0:
+
+ # Read up to ctx.max_latency (default 20 seconds) of data from end of current interval.
+ while len(arr) == 0 or arr[-1][0] < ctx.max_latency * 1000 + end:
+ try:
+ new_arr = next(gen)
+ except StopIteration:
+ more_data = False
+ break
+ arr = np.append(arr, new_arr.reshape((1,__TOTAL_COLUMNS - 1)), axis=0)
+ arr = arr.astype(int)
+
+ if arr.size > 0:
+ process_interval(ctx, arr, start, end)
+
+ # Update arr to throw away samples we no longer need - samples which
+ # end before the start of the next interval, i.e. the end of the
+ # current interval:
+ idx = np.where(arr[:,0] > end)
+ arr = arr[idx]
+
+ start += ctx.interval
+ end = start + ctx.interval
+ finally:
+ map(lambda f: f.close(), fps)
+
+
+if __name__ == '__main__':
+ import argparse
+ p = argparse.ArgumentParser()
+ arg = p.add_argument
+ arg("FILE", help='space separated list of latency log filenames', nargs='+')
+ arg('--buff_size',
+ default=10000,
+ type=int,
+ help='number of samples to buffer into numpy at a time')
+
+ arg('--max_latency',
+ default=20,
+ type=float,
+ help='number of seconds of data to process at a time')
+
+ arg('-i', '--interval',
+ default=1000,
+ type=int,
+ help='interval width (ms)')
+
+ arg('-d', '--divisor',
+ required=False,
+ type=int,
+ default=1,
+ help='divide the results by this value.')
+
+ arg('--decimals',
+ default=3,
+ type=int,
+ help='number of decimal places to print floats to')
+
+ arg('--nowarn',
+ dest='nowarn',
+ action='store_false',
+ default=True,
+ help='do not print any warning messages to stderr')
+
+ arg('--group_nr',
+ default=19,
+ type=int,
+ help='FIO_IO_U_PLAT_GROUP_NR as defined in stat.h')
+
+ main(p.parse_args())
+