Expand fiologparser_hist operations with new options

[fio.git] / tools / hist / fiologparser_hist.py
diff --git a/tools/hist/fiologparser_hist.py b/tools/hist/fiologparser_hist.py

index ce98d2ecd75581bc66e8c3de3b94da91690fe1c1..f26db44948a85d1187087f1c638f79fd6596fb24 100755 (executable)
--- a/tools/hist/fiologparser_hist.py
+++ b/tools/hist/fiologparser_hist.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2.7
+#!/usr/bin/python2.7
  """ 
      Utility for converting *_clat_hist* files generated by fio into latency statistics.
      
@@ -11,122 +11,62 @@
              4000, 39, 1152, 1546.962, 1545.785, 1627.192, 1640.019, 1691.204, 1744
              ...
      
-    Notes:
-
-    * end-times are calculated to be uniform increments of the --interval value given,
-      regardless of when histogram samples are reported. Of note:
-        
-        * Intervals with no samples are omitted. In the example above this means
-          "no statistics from 2 to 3 seconds" and "39 samples influenced the statistics
-          of the interval from 3 to 4 seconds".
-        
-        * Intervals with a single sample will have the same value for all statistics
-        
-    * The number of samples is unweighted, corresponding to the total number of samples
-      which have any effect whatsoever on the interval.
-
-    * Min statistics are computed using value of the lower boundary of the first bin
-      (in increasing bin order) with non-zero samples in it. Similarly for max,
-      we take the upper boundary of the last bin with non-zero samples in it.
-      This is semantically identical to taking the 0th and 100th percentiles with a
-      50% bin-width buffer (because percentiles are computed using mid-points of
-      the bins). This enforces the following nice properties:
-
-        * min <= 50th <= 90th <= 95th <= 99th <= max
-
-        * min and max are strict lower and upper bounds on the actual
-          min / max seen by fio (and reported in *_clat.* with averaging turned off).
-
-    * Average statistics use a standard weighted arithmetic mean.
-
-    * Percentile statistics are computed using the weighted percentile method as
-      described here: https://en.wikipedia.org/wiki/Percentile#Weighted_percentile
-      See weights() method for details on how weights are computed for individual
-      samples. In process_interval() we further multiply by the height of each bin
-      to get weighted histograms.
-    
-    * We convert files given on the command line, assumed to be fio histogram files,
-      on-the-fly into their corresponding differenced files i.e. non-cumulative histograms
-      because fio outputs cumulative histograms, but we want histograms corresponding
-      to individual time intervals. An individual histogram file can contain the cumulative
-      histograms for multiple different r/w directions (notably when --rw=randrw). This
-      is accounted for by tracking each r/w direction separately. In the statistics
-      reported we ultimately merge *all* histograms (regardless of r/w direction).
-
-    * The value of *_GROUP_NR in stat.h (and *_BITS) determines how many latency bins
-      fio outputs when histogramming is enabled. Namely for the current default of
-      GROUP_NR=19, we get 1,216 bins with a maximum latency of approximately 17
-      seconds. For certain applications this may not be sufficient. With GROUP_NR=24
-      we have 1,536 bins, giving us a maximum latency of 541 seconds (~ 9 minutes). If
-      you expect your application to experience latencies greater than 17 seconds,
-      you will need to recompile fio with a larger GROUP_NR, e.g. with:
-        
-            sed -i.bak 's/^#define FIO_IO_U_PLAT_GROUP_NR 19\n/#define FIO_IO_U_PLAT_GROUP_NR 24/g' stat.h
-            make fio
-            
-      Quick reference table for the max latency corresponding to a sampling of
-      values for GROUP_NR:
-            
-            GROUP_NR | # bins | max latency bin value
-            19       | 1216   | 16.9 sec
-            20       | 1280   | 33.8 sec
-            21       | 1344   | 67.6 sec
-            22       | 1408   | 2  min, 15 sec
-            23       | 1472   | 4  min, 32 sec
-            24       | 1536   | 9  min, 4  sec
-            25       | 1600   | 18 min, 8  sec
-            26       | 1664   | 36 min, 16 sec
-      
-    * At present this program automatically detects the number of histogram bins in
-      the log files, and adjusts the bin latency values accordingly. In particular if
-      you use the --log_hist_coarseness parameter of fio, you get output files with
-      a number of bins according to the following table (note that the first
-      row is identical to the table above):
-
-      coarse \ GROUP_NR
-                  19     20    21     22     23     24     25     26
-             -------------------------------------------------------
-            0  [[ 1216,  1280,  1344,  1408,  1472,  1536,  1600,  1664],
-            1   [  608,   640,   672,   704,   736,   768,   800,   832],
-            2   [  304,   320,   336,   352,   368,   384,   400,   416],
-            3   [  152,   160,   168,   176,   184,   192,   200,   208],
-            4   [   76,    80,    84,    88,    92,    96,   100,   104],
-            5   [   38,    40,    42,    44,    46,    48,    50,    52],
-            6   [   19,    20,    21,    22,    23,    24,    25,    26],
-            7   [  N/A,    10,   N/A,    11,   N/A,    12,   N/A,    13],
-            8   [  N/A,     5,   N/A,   N/A,   N/A,     6,   N/A,   N/A]]
-
-      For other values of GROUP_NR and coarseness, this table can be computed like this:    
-        
-            bins = [1216,1280,1344,1408,1472,1536,1600,1664]
-            max_coarse = 8
-            fncn = lambda z: list(map(lambda x: z/2**x if z % 2**x == 0 else nan, range(max_coarse + 1)))
-            np.transpose(list(map(fncn, bins)))
-      
-      Also note that you can achieve the same downsampling / log file size reduction
-      by pre-processing (before inputting into this script) with half_bins.py.
-
-    * If you have not adjusted GROUP_NR for your (high latency) application, then you
-      will see the percentiles computed by this tool max out at the max latency bin
-      value as in the first table above, and in this plot (where GROUP_NR=19 and thus we see
-      a max latency of ~16.7 seconds in the red line):
-
-            https://www.cronburg.com/fio/max_latency_bin_value_bug.png
-    
-    * Motivation for, design decisions, and the implementation process are
-      described in further detail here:
-
-            https://www.cronburg.com/fio/cloud-latency-problem-measurement/
-
      @author Karl Cronburg <karl.cronburg@gmail.com>
  """
  import os
  import sys
  import pandas
+import re
  import numpy as np
  
+runascmd = False
+
  err = sys.stderr.write
  
+class HistFileRdr():
+    """ Class to read a hist file line by line, buffering 
+        a value array for the latest line, and allowing a preview
+        of the next timestamp in next line
+        Note: this does not follow a generator pattern, but must explicitly
+        get next bin array.
+    """
+    def __init__(self, file):
+        self.fp = open(file, 'r')
+        self.data = self.nextData()
+        
+    def close(self):
+        self.fp.close()
+        self.fp = None
+        
+    def nextData(self):
+        self.data = None
+        if self.fp: 
+            line = self.fp.readline()
+            if line == "":
+                self.close()
+            else:
+                self.data = [int(x) for x in line.replace(' ', '').rstrip().split(',')]
+                
+        return self.data
+ 
+    @property
+    def curTS(self):
+        ts = None
+        if self.data:
+            ts = self.data[0]
+        return ts
+             
+    @property
+    def curDir(self):
+        d = None
+        if self.data:
+            d = self.data[1]
+        return d
+             
+    @property
+    def curBins(self):
+        return self.data[3:]
+
  def weighted_percentile(percs, vs, ws):
      """ Use linear interpolation to calculate the weighted percentile.
          
@@ -149,7 +89,7 @@ def weights(start_ts, end_ts, start, end):
      """ Calculate weights based on fraction of sample falling in the
          given interval [start,end]. Weights computed using vector / array
          computation instead of for-loops.
-    
+
          Note that samples with zero time length are effectively ignored
          (we set their weight to zero).
  
@@ -171,8 +111,21 @@ def weights(start_ts, end_ts, start, end):
  def weighted_average(vs, ws):
      return np.sum(vs * ws) / np.sum(ws)
  
-columns = ["end-time", "samples", "min", "avg", "median", "90%", "95%", "99%", "max"]
-percs   = [50, 90, 95, 99]
+
+percs = None
+columns = None
+
+def gen_output_columns(ctx):
+    global percs,columns
+    strpercs = re.split('[,:]', ctx.percentiles)
+    percs = [50.0]  # always print 50% in 'median' column
+    percs.extend(list(map(float,strpercs)))
+    if ctx.directions:
+        columns = ["end-time", "dir", "samples", "min", "avg", "median"]
+    else:
+        columns = ["end-time", "samples", "min", "avg", "median"]
+    columns.extend(list(map(lambda x: x+'%', strpercs)))
+    columns.append("max")
  
  def fmt_float_list(ctx, num=1):
    """ Return a comma separated list of float formatters to the required number
@@ -187,57 +140,33 @@ def fmt_float_list(ctx, num=1):
  __HIST_COLUMNS = 1216
  __NON_HIST_COLUMNS = 3
  __TOTAL_COLUMNS = __HIST_COLUMNS + __NON_HIST_COLUMNS
-    
-def sequential_diffs(head_row, times, rws, hists):
-    """ Take the difference of sequential (in time) histograms with the same
-        r/w direction, returning a new array of differenced histograms.  """
-    result = np.empty(shape=(0, __HIST_COLUMNS))
-    result_times = np.empty(shape=(1, 0))
-    for i in range(8):
-        idx = np.where(rws == i)
-        diff = np.diff(np.append(head_row[i], hists[idx], axis=0), axis=0).astype(int)
-        result = np.append(diff, result, axis=0)
-        result_times = np.append(times[idx], result_times)
-    idx = np.argsort(result_times)
-    return result[idx]
-
-def read_chunk(head_row, rdr, sz):
-    """ Read the next chunk of size sz from the given reader, computing the
-        differences across neighboring histogram samples.
-    """
+
+def read_chunk(rdr, sz):
+    """ Read the next chunk of size sz from the given reader. """
      try:
          """ StopIteration occurs when the pandas reader is empty, and AttributeError
              occurs if rdr is None due to the file being empty. """
          new_arr = rdr.read().values
      except (StopIteration, AttributeError):
-        return None    
-
-    """ Extract array of just the times, and histograms matrix without times column.
-        Then, take the sequential difference of each of the rows in the histogram
-        matrix. This is necessary because fio outputs *cumulative* histograms as
-        opposed to histograms with counts just for a particular interval. """
-    times, rws, szs = new_arr[:,0], new_arr[:,1], new_arr[:,2]
-    hists = new_arr[:,__NON_HIST_COLUMNS:]
-    hists_diff   = sequential_diffs(head_row, times, rws, hists)
-    times = times.reshape((len(times),1))
-    arr = np.append(times, hists_diff, axis=1)
-
-    """ hists[-1] will be the row we need to start our differencing with the
-        next time we call read_chunk() on the same rdr """
-    return arr, hists[-1]
+        return None
+
+    # Let's leave the array as is, and let later code ignore the block size
+    return new_arr
+
+    #""" Extract array of the times, directions wo times, and histograms matrix without times column. """
+    #times, rws, szs = new_arr[:,0], new_arr[:,1], new_arr[:,2]
+    #hists = new_arr[:,__NON_HIST_COLUMNS:]
+    #times = times.reshape((len(times),1))
+    #dirs  = rws.reshape((len(rws),1))
+    #arr = np.append(times, hists, axis=1)
+    #return arr
  
  def get_min(fps, arrs):
      """ Find the file with the current first row with the smallest start time """
-    return min([fp for fp in fps if not arrs[fp] is None], key=lambda fp: arrs.get(fp)[0][0][0])
+    return min([fp for fp in fps if not arrs[fp] is None], key=lambda fp: arrs.get(fp)[0][0])
  
  def histogram_generator(ctx, fps, sz):
      
-    """ head_row for a particular file keeps track of the last (cumulative)
-        histogram we read so that we have a reference point to subtract off
-        when computing sequential differences. """
-    head_row  = np.zeros(shape=(1, __HIST_COLUMNS))
-    head_rows = {fp: {i: head_row for i in range(8)} for fp in fps}
-
      # Create a chunked pandas reader for each of the files:
      rdrs = {}
      for fp in fps:
@@ -245,13 +174,13 @@ def histogram_generator(ctx, fps, sz):
              rdrs[fp] = pandas.read_csv(fp, dtype=int, header=None, chunksize=sz)
          except ValueError as e:
              if e.message == 'No columns to parse from file':
-                if not ctx.nowarn: sys.stderr.write("WARNING: Empty input file encountered.\n")
+                if ctx.warn: sys.stderr.write("WARNING: Empty input file encountered.\n")
                  rdrs[fp] = None
              else:
                  raise(e)
  
-    # Initial histograms and corresponding head_rows:
-    arrs = {fp: read_chunk(head_rows[fp], rdr, sz) for fp,rdr in rdrs.items()}
+    # Initial histograms from disk:
+    arrs = {fp: read_chunk(rdr, sz) for fp,rdr in rdrs.items()}
      while True:
  
          try:
@@ -259,13 +188,13 @@ def histogram_generator(ctx, fps, sz):
              fp = get_min(fps, arrs)
          except ValueError:
              return
-        arr, head_row = arrs[fp]
-        yield np.insert(arr[0], 1, fps.index(fp))
-        arrs[fp] = arr[1:], head_row
-        head_rows[fp] = head_row
+        arr = arrs[fp]
+        arri = np.insert(arr[0], 1, fps.index(fp))
+        yield arri
+        arrs[fp] = arr[1:]
  
-        if arrs[fp][0].shape[0] == 0:
-            arrs[fp] = read_chunk(head_rows[fp], rdrs[fp], sz)
+        if arrs[fp].shape[0] == 0:
+            arrs[fp] = read_chunk(rdrs[fp], sz)
  
  def _plat_idx_to_val(idx, edge=0.5, FIO_IO_U_PLAT_BITS=6, FIO_IO_U_PLAT_VAL=64):
      """ Taken from fio's stat.c for calculating the latency value of a bin
@@ -307,13 +236,24 @@ def plat_idx_to_val_coarse(idx, coarseness, edge=0.5):
      upper = _plat_idx_to_val(idx + stride, edge=1.0)
      return lower + (upper - lower) * edge
  
-def print_all_stats(ctx, end, mn, ss_cnt, vs, ws, mx):
+def print_all_stats(ctx, end, mn, ss_cnt, vs, ws, mx, dir=dir):
      ps = weighted_percentile(percs, vs, ws)
  
      avg = weighted_average(vs, ws)
      values = [mn, avg] + list(ps) + [mx]
-    row = [end, ss_cnt] + map(lambda x: float(x) / ctx.divisor, values)
-    fmt = "%d, %d, %d, " + fmt_float_list(ctx, 5) + ", %d"
+    if ctx.directions:
+        row = [end, dir, ss_cnt]
+        fmt = "%d, %s, %d, "
+    else:
+        row = [end, ss_cnt]
+        fmt = "%d, %d, "
+    row = row + [float(x) / ctx.divisor for x in values]
+    if ctx.divisor > 1:
+        fmt = fmt + fmt_float_list(ctx, len(percs)+3)
+    else:
+        # max and min are decimal values if no divisor
+        fmt = fmt + "%d, " + fmt_float_list(ctx, len(percs)+1) + ", %d"
+        
      print (fmt % tuple(row))
  
  def update_extreme(val, fncn, new_val):
@@ -326,40 +266,69 @@ bin_vals = []
  lower_bin_vals = [] # lower edge of each bin
  upper_bin_vals = [] # upper edge of each bin 
  
-def process_interval(ctx, samples, iStart, iEnd):
+def process_interval(ctx, iHist, iEnd, dir):
+    """ print estimated percentages for the given merged sample
+    """
+    ss_cnt = 0 # number of samples affecting this interval
+    mn_bin_val, mx_bin_val = None, None
+   
+    # Update total number of samples affecting current interval histogram:
+    ss_cnt += np.sum(iHist)
+        
+    # Update min and max bin values
+    idxs = np.nonzero(iHist != 0)[0]
+    if idxs.size > 0:
+        mn_bin_val = bin_vals[idxs[0]]
+        mx_bin_val = bin_vals[idxs[-1]]
+
+    if ss_cnt > 0: print_all_stats(ctx, iEnd, mn_bin_val, ss_cnt, bin_vals, iHist, mx_bin_val, dir=dir)
+
+
+dir_map = ['r', 'w', 't']  # map of directional value in log to textual representation
+def process_weighted_interval(ctx, samples, iStart, iEnd, printdirs):
      """ Construct the weighted histogram for the given interval by scanning
          through all the histograms and figuring out which of their bins have
          samples with latencies which overlap with the given interval
          [iStart,iEnd].
      """
-    
-    times, files, hists = samples[:,0], samples[:,1], samples[:,2:]
-    iHist = np.zeros(__HIST_COLUMNS)
-    ss_cnt = 0 # number of samples affecting this interval
-    mn_bin_val, mx_bin_val = None, None
  
-    for end_time,file,hist in zip(times,files,hists):
-            
+    times, files, dirs, sizes, hists = samples[:,0], samples[:,1], samples[:,2], samples[:,3], samples[:,4:]
+    iHist={}; ss_cnt = {}; mn_bin_val={}; mx_bin_val={}
+    for dir in printdirs:
+        iHist[dir] = np.zeros(__HIST_COLUMNS, dtype=float)
+        ss_cnt[dir] = 0 # number of samples affecting this interval
+        mn_bin_val[dir] = None
+        mx_bin_val[dir] = None
+
+    for end_time,file,dir,hist in zip(times,files,dirs,hists):
+
          # Only look at bins of the current histogram sample which
          # started before the end of the current time interval [start,end]
-        start_times = (end_time - 0.5 * ctx.interval) - bin_vals / 1000.0
+        start_times = (end_time - 0.5 * ctx.interval) - bin_vals / ctx.time_divisor
          idx = np.where(start_times < iEnd)
          s_ts, l_bvs, u_bvs, hs = start_times[idx], lower_bin_vals[idx], upper_bin_vals[idx], hist[idx]
  
-        # Increment current interval histogram by weighted values of future histogram:
+        # Increment current interval histogram by weighted values of future histogram
+        # total number of samples
+        # and min and max values as necessary
+        textdir = dir_map[dir]
          ws = hs * weights(s_ts, end_time, iStart, iEnd)
-        iHist[idx] += ws
-    
-        # Update total number of samples affecting current interval histogram:
-        ss_cnt += np.sum(hs)
-        
-        # Update min and max bin values seen if necessary:
-        idx = np.where(hs != 0)[0]
-        if idx.size > 0:
-            mn_bin_val = update_extreme(mn_bin_val, min, l_bvs[max(0,           idx[0]  - 1)])
-            mx_bin_val = update_extreme(mx_bin_val, max, u_bvs[min(len(hs) - 1, idx[-1] + 1)])
-
-    if ss_cnt > 0: print_all_stats(ctx, iEnd, mn_bin_val, ss_cnt, bin_vals, iHist, mx_bin_val)
+        mmidx = np.where(hs != 0)[0]
+        if 'm' in printdirs:
+            iHist['m'][idx] += ws
+            ss_cnt['m'] += np.sum(hs)
+            if mmidx.size > 0:
+                mn_bin_val['m'] = update_extreme(mn_bin_val['m'], min, l_bvs[max(0,           mmidx[0]  - 1)])
+                mx_bin_val['m'] = update_extreme(mx_bin_val['m'], max, u_bvs[min(len(hs) - 1, mmidx[-1] + 1)])
+        if textdir in printdirs:
+            iHist[textdir][idx] += ws
+            ss_cnt[textdir] += np.sum(hs)  # Update total number of samples affecting current interval histogram:
+            if mmidx.size > 0:
+                mn_bin_val[textdir] = update_extreme(mn_bin_val[textdir], min, l_bvs[max(0,           mmidx[0]  - 1)])
+                mx_bin_val[textdir] = update_extreme(mx_bin_val[textdir], max, u_bvs[min(len(hs) - 1, mmidx[-1] + 1)])
+
+    for textdir in sorted(printdirs):            
+        if ss_cnt[textdir] > 0: print_all_stats(ctx, iEnd, mn_bin_val[textdir], ss_cnt[textdir], bin_vals, iHist[textdir], mx_bin_val[textdir], dir=textdir)
  
  def guess_max_from_bins(ctx, hist_cols):
      """ Try to guess the GROUP_NR from given # of histogram
@@ -376,7 +345,7 @@ def guess_max_from_bins(ctx, hist_cols):
      idx = np.where(arr == hist_cols)
      if len(idx[1]) == 0:
          table = repr(arr.astype(int)).replace('-10', 'N/A').replace('array','     ')
-        err("Unable to determine bin values from input clat_hist files. Namely \n"
+        errmsg = ("Unable to determine bin values from input clat_hist files. Namely \n"
              "the first line of file '%s' " % ctx.FILE[0] + "has %d \n" % (__TOTAL_COLUMNS,) +
              "columns of which we assume %d " % (hist_cols,) + "correspond to histogram bins. \n"
              "This number needs to be equal to one of the following numbers:\n\n"
@@ -385,24 +354,15 @@ def guess_max_from_bins(ctx, hist_cols):
              "  - Input file(s) does not contain histograms.\n"
              "  - You recompiled fio with a different GROUP_NR. If so please specify this\n"
              "    new GROUP_NR on the command line with --group_nr\n")
-        exit(1)
+        if runascmd:
+            err(errmsg)
+            exit(1)
+        else:
+            raise RuntimeError(errmsg) 
+        
      return bins[idx[1][0]]
  
-def main(ctx):
-
-    # Automatically detect how many columns are in the input files,
-    # calculate the corresponding 'coarseness' parameter used to generate
-    # those files, and calculate the appropriate bin latency values:
-    with open(ctx.FILE[0], 'r') as fp:
-        global bin_vals,lower_bin_vals,upper_bin_vals,__HIST_COLUMNS,__TOTAL_COLUMNS
-        __TOTAL_COLUMNS = len(fp.readline().split(','))
-        __HIST_COLUMNS = __TOTAL_COLUMNS - __NON_HIST_COLUMNS
-
-        max_cols = guess_max_from_bins(ctx, __HIST_COLUMNS)
-        coarseness = int(np.log2(float(max_cols) / __HIST_COLUMNS))
-        bin_vals = np.array(map(lambda x: plat_idx_to_val_coarse(x, coarseness), np.arange(__HIST_COLUMNS)), dtype=float)
-        lower_bin_vals = np.array(map(lambda x: plat_idx_to_val_coarse(x, coarseness, 0.0), np.arange(__HIST_COLUMNS)), dtype=float)
-        upper_bin_vals = np.array(map(lambda x: plat_idx_to_val_coarse(x, coarseness, 1.0), np.arange(__HIST_COLUMNS)), dtype=float)
+def output_weighted_interval_data(ctx,printdirs):
  
      fps = [open(f, 'r') for f in ctx.FILE]
      gen = histogram_generator(ctx, fps, ctx.buff_size)
@@ -411,7 +371,7 @@ def main(ctx):
  
      try:
          start, end = 0, ctx.interval
-        arr = np.empty(shape=(0,__TOTAL_COLUMNS - 1))
+        arr = np.empty(shape=(0,__TOTAL_COLUMNS + 1),dtype=int)
          more_data = True
          while more_data or len(arr) > 0:
              
@@ -422,11 +382,20 @@ def main(ctx):
                  except StopIteration:
                      more_data = False
                      break
-                arr = np.append(arr, new_arr.reshape((1,__TOTAL_COLUMNS - 1)), axis=0)
-            arr = arr.astype(int)
+                nashape  = new_arr.reshape((1,__TOTAL_COLUMNS + 1))
+                arr = np.append(arr, nashape, axis=0)
+            #arr = arr.astype(int)
              
              if arr.size > 0:
-                process_interval(ctx, arr, start, end)
+                # Jump immediately to the start of the input, rounding
+                # down to the nearest multiple of the interval (useful when --log_unix_epoch
+                # was used to create these histograms):
+                if start == 0 and arr[0][0] - ctx.max_latency > end:
+                    start = arr[0][0] - ctx.max_latency
+                    start = start - (start % ctx.interval)
+                    end = start + ctx.interval
+
+                process_weighted_interval(ctx, arr, start, end, printdirs)
                  
                  # Update arr to throw away samples we no longer need - samples which
                  # end before the start of the next interval, i.e. the end of the
@@ -437,11 +406,136 @@ def main(ctx):
              start += ctx.interval
              end = start + ctx.interval
      finally:
-        map(lambda f: f.close(), fps)
+        for fp in fps:
+            fp.close()
+
+def output_interval_data(ctx,directions):
+    fps = [HistFileRdr(f) for f in ctx.FILE]
+
+    print(', '.join(columns))
+
+    start = 0
+    end = ctx.interval
+    while True:
+        
+        more_data = False
+        
+        # add bins from all files in target intervals
+        arr = None
+        numSamples = 0
+        while True:
+            foundSamples = False
+            for fp in fps:
+                ts = fp.curTS
+                if ts and ts+10 < end:  # shift sample time when very close to an end time
+                    curdirect = fp.curDir
+                    numSamples += 1
+                    foundSamples = True
+                    if arr is None:
+                        arr = {}
+                        for d in directions:
+                            arr[d] = np.zeros(shape=(__HIST_COLUMNS), dtype=int)
+                    if 'm' in arr: 
+                        arr['m'] = np.add(arr['m'], fp.curBins)
+                    if 'r' in arr and curdirect == 0: 
+                        arr['r'] = np.add(arr['r'], fp.curBins)
+                    if 'w' in arr and curdirect == 1: 
+                        arr['w'] = np.add(arr['w'], fp.curBins)
+                    if 't' in arr and curdirect == 2: 
+                        arr['t'] = np.add(arr['t'], fp.curBins)
+                        
+                    more_data = True
+                    fp.nextData()
+                elif ts:
+                    more_data = True
+            
+            # reached end of all files
+            # or gone through all files without finding sample in interval 
+            if not more_data or not foundSamples:
+                break
+        
+        if arr is not None:
+            #print("{} size({}) samples({}) nonzero({}):".format(end, arr.size, numSamples, np.count_nonzero(arr)), str(arr), )
+            for d in sorted(arr.keys()):
+                aval = arr[d]
+                process_interval(ctx, aval, end, d)
+        
+        # reach end of all files
+        if not more_data:
+            break
+            
+        start += ctx.interval
+        end = start + ctx.interval
+
+def main(ctx):
+
+    if ctx.job_file:
+        try:
+            from configparser import SafeConfigParser, NoOptionError
+        except ImportError:
+            from ConfigParser import SafeConfigParser, NoOptionError
+
+        cp = SafeConfigParser(allow_no_value=True)
+        with open(ctx.job_file, 'r') as fp:
+            cp.readfp(fp)
+
+        if ctx.interval is None:
+            # Auto detect --interval value
+            for s in cp.sections():
+                try:
+                    hist_msec = cp.get(s, 'log_hist_msec')
+                    if hist_msec is not None:
+                        ctx.interval = int(hist_msec)
+                except NoOptionError:
+                    pass
+    
+    if not hasattr(ctx, 'percentiles'):
+        ctx.percentiles = "90,95,99"
+
+    if ctx.directions: 
+        ctx.directions = ctx.directions.lower() 
+                
+    if ctx.interval is None:
+        ctx.interval = 1000
+
+    if ctx.usbin:
+        ctx.time_divisor = 1000.0        # bins are in us
+    else:
+        ctx.time_divisor = 1000000.0     # bins are in ns
+
+    gen_output_columns(ctx)
+
+
+    # Automatically detect how many columns are in the input files,
+    # calculate the corresponding 'coarseness' parameter used to generate
+    # those files, and calculate the appropriate bin latency values:
+    with open(ctx.FILE[0], 'r') as fp:
+        global bin_vals,lower_bin_vals,upper_bin_vals,__HIST_COLUMNS,__TOTAL_COLUMNS
+        __TOTAL_COLUMNS = len(fp.readline().split(','))
+        __HIST_COLUMNS = __TOTAL_COLUMNS - __NON_HIST_COLUMNS
+
+        max_cols = guess_max_from_bins(ctx, __HIST_COLUMNS)
+        coarseness = int(np.log2(float(max_cols) / __HIST_COLUMNS))
+        bin_vals = np.array([plat_idx_to_val_coarse(x, coarseness) for x in np.arange(__HIST_COLUMNS)], dtype=float)
+        lower_bin_vals = np.array([plat_idx_to_val_coarse(x, coarseness, 0.0) for x in np.arange(__HIST_COLUMNS)], dtype=float)
+        upper_bin_vals = np.array([plat_idx_to_val_coarse(x, coarseness, 1.0) for x in np.arange(__HIST_COLUMNS)], dtype=float)
+
+    # indicate which directions to output (read(0), write(1), trim(2), mixed(3))
+    directions = set()
+    if not ctx.directions or 'm' in ctx.directions: directions.add('m')
+    if ctx.directions and 'r' in ctx.directions:    directions.add('r')
+    if ctx.directions and 'w' in ctx.directions:    directions.add('w')
+    if ctx.directions and 't' in ctx.directions:    directions.add('t')
+
+    if ctx.noweight:
+        output_interval_data(ctx, directions)
+    else:
+        output_weighted_interval_data(ctx, directions)
  
  
  if __name__ == '__main__':
      import argparse
+    runascmd = True
      p = argparse.ArgumentParser()
      arg = p.add_argument
      arg("FILE", help='space separated list of latency log filenames', nargs='+')
@@ -456,9 +550,13 @@ if __name__ == '__main__':
          help='number of seconds of data to process at a time')
  
      arg('-i', '--interval',
-        default=1000,
          type=int,
-        help='interval width (ms)')
+        help='interval width (ms), default 1000 ms')
+
+    arg('--noweight',
+        action='store_true',
+        default=False,
+        help='do not perform weighting of samples between output intervals')
  
      arg('-d', '--divisor',
          required=False,
@@ -471,16 +569,44 @@ if __name__ == '__main__':
          type=int,
          help='number of decimal places to print floats to')
  
-    arg('--nowarn',
-        dest='nowarn',
-        action='store_false',
-        default=True,
-        help='do not print any warning messages to stderr')
+    arg('--warn',
+        dest='warn',
+        action='store_true',
+        default=False,
+        help='print warning messages to stderr')
  
      arg('--group_nr',
-        default=19,
+        default=29,
          type=int,
          help='FIO_IO_U_PLAT_GROUP_NR as defined in stat.h')
  
+    arg('--job-file',
+        default=None,
+        type=str,
+        help='Optional argument pointing to the job file used to create the '
+             'given histogram files. Useful for auto-detecting --log_hist_msec and '
+             '--log_unix_epoch (in fio) values.')
+
+    arg('--percentiles',
+        default="90:95:99",
+        type=str,
+        help='Optional argument of comma or colon separated percentiles to print. '
+             'The default is "90.0:95.0:99.0".  min, median(50%%) and max percentiles are always printed')
+
+    arg('--usbin',
+        default=False,
+        action='store_true',
+        help='histogram bin latencies are in us (fio versions < 2.99. fio uses ns for version >= 2.99')      
+
+    arg('--directions',
+        default=None,
+        type=str,
+        help='Optionally split results output by reads, writes, trims or mixed. '
+             'Value may be any combination of "rwtm" characters. '  
+             'By default, only "mixed" results are output without a "dir" field. '
+             'But, specifying the --directions option '
+             'adds a "dir" field to the output content, and separate rows for each of the indicated '
+             'directions.')
+
      main(p.parse_args())