tools/fio_jsonplus_clat2csv

   1 #!/usr/bin/env python3
   2 # Note: this script is python2 and python3 compatible.
   3
   4 """
   5 fio_jsonplus_clat2csv
   6
   7 This script converts fio's json+ latency data to CSV format.
   8
   9 For example:
  10
  11 Run the following fio jobs:
  12 $ fio --output=fio-jsonplus.output --output-format=json+ --ioengine=null \
  13     --time_based --runtime=3s --size=1G --slat_percentiles=1 \
  14     --clat_percentiles=1 --lat_percentiles=1 \
  15     --name=test1 --rw=randrw \
  16     --name=test2 --rw=read \
  17     --name=test3 --rw=write
  18
  19 Then run:
  20 $ fio_jsonplus_clat2csv fio-jsonplus.output fio-jsonplus.csv
  21
  22 You will end up with the following 3 files:
  23
  24 -rw-r--r-- 1 root root 77547 Mar 24 15:17 fio-jsonplus_job0.csv
  25 -rw-r--r-- 1 root root 65413 Mar 24 15:17 fio-jsonplus_job1.csv
  26 -rw-r--r-- 1 root root 63291 Mar 24 15:17 fio-jsonplus_job2.csv
  27
  28 fio-jsonplus_job0.csv will look something like:
  29
  30 nsec, read_slat_ns_count, read_slat_ns_cumulative, read_slat_ns_percentile, read_clat_ns_count, read_clat_ns_cumulative, read_clat_ns_percentile, read_lat_ns_count, read_lat_ns_cumulative, read_lat_ns_percentile, write_slat_ns_count, write_slat_ns_cumulative, write_slat_ns_percentile, write_clat_ns_count, write_clat_ns_cumulative, write_clat_ns_percentile, write_lat_ns_count, write_lat_ns_cumulative, write_lat_ns_percentile, trim_slat_ns_count, trim_slat_ns_cumulative, trim_slat_ns_percentile, trim_clat_ns_count, trim_clat_ns_cumulative, trim_clat_ns_percentile, trim_lat_ns_count, trim_lat_ns_cumulative, trim_lat_ns_percentile,
  31 12, , , , 3, 3, 6.11006798673e-07, , , , , , , 2, 2, 4.07580840603e-07, , , , , , , , , , , , ,
  32 13, , , , 1364, 1367, 0.000278415431262, , , , , , , 1776, 1778, 0.000362339367296, , , , , , , , , , , , ,
  33 14, , , , 181872, 183239, 0.037320091594, , , , , , , 207436, 209214, 0.0426358089929, , , , , , , , , , , , ,
  34 15, , , , 1574811, 1758050, 0.358060167469, , , , , , , 1661435, 1870649, 0.381220345946, , , , , , , , , , , , ,
  35 16, , , , 2198478, 3956528, 0.805821835713, , , , , , , 2154571, 4025220, 0.820301275606, , , , , , , , , , , , ,
  36 17, , , , 724335, 4680863, 0.953346372218, , , , , , , 645351, 4670571, 0.951817627138, , , , , , , , , , , , ,
  37 18, , , , 71837, 4752700, 0.96797733735, , , , , , , 61084, 4731655, 0.964265961171, , , , , , , , , , , , ,
  38 19, , , , 15915, 4768615, 0.971218728417, , , , , , , 18419, 4750074, 0.968019576923, , , , , , , , , , , , ,
  39 20, , , , 12651, 4781266, 0.973795344087, , , , , , , 14176, 4764250, 0.970908509921, , , , , , , , , , , , ,
  40 ...
  41 168960, , , , , , , , , , , , , 1, 4906999, 0.999999388629, 1, 4906997, 0.999998981048, , , , , , , , , ,
  42 177152, , , , , , , , , , , , , 1, 4907000, 0.999999592419, 1, 4906998, 0.999999184838, , , , , , , , , ,
  43 183296, , , , , , , , , , , , , 1, 4907001, 0.99999979621, 1, 4906999, 0.999999388629, , , , , , , , , ,
  44 189440, , , , , , , 1, 4909925, 0.999999185324, , , , , , , , , , , , , , , , , , ,
  45 214016, , , , 1, 4909928, 0.999999796331, 2, 4909927, 0.999999592662, , , , , , , , , , , , , , , , , , ,
  46 246784, , , , , , , , , , , , , , , , 1, 4907000, 0.999999592419, , , , , , , , , ,
  47 272384, , , , 1, 4909929, 1.0, 1, 4909928, 0.999999796331, , , , , , , , , , , , , , , , , , ,
  48 329728, , , , , , , , , , , , , 1, 4907002, 1.0, 1, 4907001, 0.99999979621, , , , , , , , , ,
  49 1003520, , , , , , , , , , , , , , , , 1, 4907002, 1.0, , , , , , , , , ,
  50 1089536, , , , , , , 1, 4909929, 1.0, , , , , , , , , , , , , , , , , , ,
  51
  52 The first line says that there were three read IOs with 12ns clat,
  53 the cumulative number of read IOs at or below 12ns was two, and
  54 12ns was the 0.0000611th percentile for read latency. There were
  55 two write IOs with 12ns clat, the cumulative number of write IOs
  56 at or below 12ns was two, and 12ns was the 0.0000408th percentile
  57 for write latency.
  58
  59 The job had one write IO complete at 168960ns and 4906999 write IOs
  60 completed at or below this duration. Also this duration was the
  61 99.99994th percentile for write latency. There was one write IO
  62 with a total latency of 168960ns, this duration had a cumulative
  63 frequency of 4906997 write IOs and was the 99.9998981048th percentile
  64 for write total latency.
  65
  66 The last line says that one read IO had 1089536ns total latency, this
  67 duration had a cumulative frequency of 4909929 and represented the 100th
  68 percentile for read total latency.
  69
  70 Running the following:
  71
  72 $ fio_jsonplus_clat2csv fio-jsonplus.output fio-jsonplus.csv --validate
  73 fio-jsonplus_job0.csv validated
  74 fio-jsonplus_job1.csv validated
  75 fio-jsonplus_job2.csv validated
  76
  77 will check the CSV data against the json+ output to confirm that the CSV
  78 data matches.
  79 """
  80
  81 from __future__ import absolute_import
  82 from __future__ import print_function
  83 import os
  84 import json
  85 import argparse
  86 import itertools
  87 import six
  88
  89 DDIR_LIST = ['read', 'write', 'trim']
  90 LAT_LIST = ['slat_ns', 'clat_ns', 'lat_ns']
  91
  92 def parse_args():
  93     """Parse command-line arguments."""
  94
  95     parser = argparse.ArgumentParser()
  96     parser.add_argument('source',
  97                         help='fio json+ output file containing completion '
  98                              'latency data')
  99     parser.add_argument('dest',
 100                         help='destination file stub for latency data in CSV '
 101                              'format. job number will be appended to filename')
 102     parser.add_argument('--debug', '-d', action='store_true',
 103                         help='enable debug prints')
 104     parser.add_argument('--validate', action='store_true',
 105                         help='validate CSV against JSON output')
 106     args = parser.parse_args()
 107
 108     return args
 109
 110
 111 def percentile(idx, run_total):
 112     """Return a percentile for a specified index based on a running total.
 113
 114     Parameters:
 115         idx         index for which to generate percentile.
 116         run_total   list of cumulative sums.
 117
 118     Returns:
 119         Percentile represented by the specified index.
 120     """
 121
 122     total = run_total[len(run_total)-1]
 123     if total == 0:
 124         return 0
 125
 126     return float(run_total[idx]) / total
 127
 128
 129 def more_bins(indices, bins):
 130     """Determine whether we have more bins to process.
 131
 132     Parameters:
 133         indices     a dict containing the last index processed in each bin.
 134         bins        a dict contaiing a set of bins to process.
 135
 136     Returns:
 137         True if the indices do not yet point to the end of each bin in bins.
 138         False if the indices point beyond their respective bins.
 139     """
 140
 141     for key, value in six.iteritems(indices):
 142         if value < len(bins[key]):
 143             return True
 144
 145     return False
 146
 147
 148 def debug_print(debug, *args):
 149     """Print debug messages.
 150
 151     Parameters:
 152         debug       emit messages if True.
 153         *args       arguments for print().
 154     """
 155
 156     if debug:
 157         print(*args)
 158
 159
 160 def get_csvfile(dest, jobnum):
 161     """Generate CSV filename from command-line arguments and job numbers.
 162
 163     Parameters:
 164         dest        file specification for CSV filename.
 165         jobnum      job number.
 166
 167     Returns:
 168         A string that is a new filename that incorporates the job number.
 169     """
 170
 171     stub, ext = os.path.splitext(dest)
 172     return stub + '_job' + str(jobnum) + ext
 173
 174
 175 def validate(args, jsondata, col_labels):
 176     """Validate CSV data against json+ output.
 177
 178     This function checks the CSV data to make sure that it was correctly
 179     generated from the original json+ output. json+ 'bins' objects are
 180     constructed from the CSV data and then compared to the corresponding
 181     objects in the json+ data. An AssertionError will appear if a mismatch
 182     is found.
 183
 184     Percentiles and cumulative counts are not checked.
 185
 186     Parameters:
 187         args        command-line arguments for this script.
 188         jsondata    json+ output to compare against.
 189         col_labels  column labels for CSV data.
 190
 191     Returns
 192         0 if no mismatches found.
 193     """
 194
 195     colnames = [c.strip() for c in col_labels.split(',')]
 196
 197     for jobnum in range(len(jsondata['jobs'])):
 198         job_data = jsondata['jobs'][jobnum]
 199         csvfile = get_csvfile(args.dest, jobnum)
 200
 201         with open(csvfile, 'r') as csvsource:
 202             csvlines = csvsource.read().split('\n')
 203
 204         assert csvlines[0] == col_labels
 205         debug_print(args.debug, 'col_labels match for', csvfile)
 206
 207         # create 'bins' objects from the CSV data
 208         counts = {}
 209         for ddir in DDIR_LIST:
 210             counts[ddir] = {}
 211             for lat in LAT_LIST:
 212                 counts[ddir][lat] = {}
 213
 214         csvlines.pop(0)
 215         for line in csvlines:
 216             if line.strip() == "":
 217                 continue
 218             values = line.split(',')
 219             nsec = values[0]
 220             for col in colnames:
 221                 if 'count' in col:
 222                     val = values[colnames.index(col)]
 223                     if val.strip() != "":
 224                         count = int(val)
 225                         ddir, lat, _, _ = col.split('_')
 226                         lat = lat + '_ns'
 227                         counts[ddir][lat][nsec] = count
 228                         try:
 229                             assert count == job_data[ddir][lat]['bins'][nsec]
 230                         except Exception:
 231                             print("mismatch:", csvfile, ddir, lat, nsec, "ns")
 232                             return 1
 233
 234         # compare 'bins' objects created from the CSV data
 235         # with corresponding 'bins' objects in the json+ output
 236         for ddir in DDIR_LIST:
 237             for lat in LAT_LIST:
 238                 if lat in job_data[ddir] and 'bins' in job_data[ddir][lat]:
 239                     assert job_data[ddir][lat]['bins'] == counts[ddir][lat]
 240                     debug_print(args.debug, csvfile, ddir, lat, "bins match")
 241                 else:
 242                     assert counts[ddir][lat] == {}
 243                     debug_print(args.debug, csvfile, ddir, lat, "bins empty")
 244
 245         print(csvfile, "validated")
 246
 247     return 0
 248
 249
 250 def main():
 251     """Starting point for this script.
 252
 253     In standard mode, this script will generate CSV data from fio json+ output.
 254     In validation mode it will check to make sure that counts in CSV files
 255     match the counts in the json+ data.
 256     """
 257
 258     args = parse_args()
 259
 260     with open(args.source, 'r') as source:
 261         jsondata = json.loads(source.read())
 262
 263     ddir_lat_list = list(ddir + '_' + lat for ddir, lat in itertools.product(DDIR_LIST, LAT_LIST))
 264     debug_print(args.debug, 'ddir_lat_list: ', ddir_lat_list)
 265     col_labels = 'nsec, '
 266     for ddir_lat in ddir_lat_list:
 267         col_labels += "{0}_count, {0}_cumulative, {0}_percentile, ".format(ddir_lat)
 268     debug_print(args.debug, 'col_labels: ', col_labels)
 269
 270     if args.validate:
 271         return validate(args, jsondata, col_labels)
 272
 273     for jobnum in range(0, len(jsondata['jobs'])):
 274         bins = {}
 275         run_total = {}
 276
 277         for ddir in DDIR_LIST:
 278             ddir_data = jsondata['jobs'][jobnum][ddir]
 279             for lat in LAT_LIST:
 280                 ddir_lat = ddir + '_' + lat
 281                 if lat not in ddir_data or 'bins' not in ddir_data[lat]:
 282                     bins[ddir_lat] = []
 283                     debug_print(args.debug, 'job', jobnum, ddir_lat, 'not found')
 284                     continue
 285
 286                 debug_print(args.debug, 'job', jobnum, ddir_lat, 'processing')
 287                 bins[ddir_lat] = [[int(key), value] for key, value in
 288                                   six.iteritems(ddir_data[lat]['bins'])]
 289                 bins[ddir_lat] = sorted(bins[ddir_lat], key=lambda bin: bin[0])
 290
 291                 run_total[ddir_lat] = [0 for x in range(0, len(bins[ddir_lat]))]
 292                 run_total[ddir_lat][0] = bins[ddir_lat][0][1]
 293                 for index in range(1, len(bins[ddir_lat])):
 294                     run_total[ddir_lat][index] = run_total[ddir_lat][index-1] + \
 295                         bins[ddir_lat][index][1]
 296
 297         csvfile = get_csvfile(args.dest, jobnum)
 298         with open(csvfile, 'w') as output:
 299             output.write(col_labels + "\n")
 300
 301 #
 302 # Have a counter for each ddir_lat pairing
 303 # In each round, pick the shortest remaining duration
 304 # and output a line with any values for that duration
 305 #
 306             indices = {x: 0 for x in ddir_lat_list}
 307             while more_bins(indices, bins):
 308                 debug_print(args.debug, 'indices: ', indices)
 309                 min_lat = 17112760320
 310                 for ddir_lat in ddir_lat_list:
 311                     if indices[ddir_lat] < len(bins[ddir_lat]):
 312                         min_lat = min(bins[ddir_lat][indices[ddir_lat]][0], min_lat)
 313
 314                 output.write("{0}, ".format(min_lat))
 315
 316                 for ddir_lat in ddir_lat_list:
 317                     if indices[ddir_lat] < len(bins[ddir_lat]) and \
 318                        min_lat == bins[ddir_lat][indices[ddir_lat]][0]:
 319                         count = bins[ddir_lat][indices[ddir_lat]][1]
 320                         cumulative = run_total[ddir_lat][indices[ddir_lat]]
 321                         ptile = percentile(indices[ddir_lat], run_total[ddir_lat])
 322                         output.write("{0}, {1}, {2}, ".format(count, cumulative, ptile))
 323                         indices[ddir_lat] += 1
 324                     else:
 325                         output.write(", , , ")
 326                 output.write("\n")
 327
 328             print("{0} generated".format(csvfile))
 329
 330
 331 if __name__ == '__main__':
 332     main()