| 1 | #!/usr/bin/python2.7 |
| 2 | # Note: this script is python2 and python3 compatible. |
| 3 | |
| 4 | """ |
| 5 | fio_jsonplus_clat2csv |
| 6 | |
| 7 | This script converts fio's json+ latency data to CSV format. |
| 8 | |
| 9 | For example: |
| 10 | |
| 11 | Run the following fio jobs: |
| 12 | $ fio --output=fio-jsonplus.output --output-format=json+ --ioengine=null \ |
| 13 | --time_based --runtime=3s --size=1G --slat_percentiles=1 \ |
| 14 | --clat_percentiles=1 --lat_percentiles=1 \ |
| 15 | --name=test1 --rw=randrw \ |
| 16 | --name=test2 --rw=read \ |
| 17 | --name=test3 --rw=write |
| 18 | |
| 19 | Then run: |
| 20 | $ fio_jsonplus_clat2csv fio-jsonplus.output fio-jsonplus.csv |
| 21 | |
| 22 | You will end up with the following 3 files: |
| 23 | |
| 24 | -rw-r--r-- 1 root root 77547 Mar 24 15:17 fio-jsonplus_job0.csv |
| 25 | -rw-r--r-- 1 root root 65413 Mar 24 15:17 fio-jsonplus_job1.csv |
| 26 | -rw-r--r-- 1 root root 63291 Mar 24 15:17 fio-jsonplus_job2.csv |
| 27 | |
| 28 | fio-jsonplus_job0.csv will look something like: |
| 29 | |
| 30 | nsec, read_slat_ns_count, read_slat_ns_cumulative, read_slat_ns_percentile, read_clat_ns_count, read_clat_ns_cumulative, read_clat_ns_percentile, read_lat_ns_count, read_lat_ns_cumulative, read_lat_ns_percentile, write_slat_ns_count, write_slat_ns_cumulative, write_slat_ns_percentile, write_clat_ns_count, write_clat_ns_cumulative, write_clat_ns_percentile, write_lat_ns_count, write_lat_ns_cumulative, write_lat_ns_percentile, trim_slat_ns_count, trim_slat_ns_cumulative, trim_slat_ns_percentile, trim_clat_ns_count, trim_clat_ns_cumulative, trim_clat_ns_percentile, trim_lat_ns_count, trim_lat_ns_cumulative, trim_lat_ns_percentile, |
| 31 | 12, , , , 3, 3, 6.11006798673e-07, , , , , , , 2, 2, 4.07580840603e-07, , , , , , , , , , , , , |
| 32 | 13, , , , 1364, 1367, 0.000278415431262, , , , , , , 1776, 1778, 0.000362339367296, , , , , , , , , , , , , |
| 33 | 14, , , , 181872, 183239, 0.037320091594, , , , , , , 207436, 209214, 0.0426358089929, , , , , , , , , , , , , |
| 34 | 15, , , , 1574811, 1758050, 0.358060167469, , , , , , , 1661435, 1870649, 0.381220345946, , , , , , , , , , , , , |
| 35 | 16, , , , 2198478, 3956528, 0.805821835713, , , , , , , 2154571, 4025220, 0.820301275606, , , , , , , , , , , , , |
| 36 | 17, , , , 724335, 4680863, 0.953346372218, , , , , , , 645351, 4670571, 0.951817627138, , , , , , , , , , , , , |
| 37 | 18, , , , 71837, 4752700, 0.96797733735, , , , , , , 61084, 4731655, 0.964265961171, , , , , , , , , , , , , |
| 38 | 19, , , , 15915, 4768615, 0.971218728417, , , , , , , 18419, 4750074, 0.968019576923, , , , , , , , , , , , , |
| 39 | 20, , , , 12651, 4781266, 0.973795344087, , , , , , , 14176, 4764250, 0.970908509921, , , , , , , , , , , , , |
| 40 | ... |
| 41 | 168960, , , , , , , , , , , , , 1, 4906999, 0.999999388629, 1, 4906997, 0.999998981048, , , , , , , , , , |
| 42 | 177152, , , , , , , , , , , , , 1, 4907000, 0.999999592419, 1, 4906998, 0.999999184838, , , , , , , , , , |
| 43 | 183296, , , , , , , , , , , , , 1, 4907001, 0.99999979621, 1, 4906999, 0.999999388629, , , , , , , , , , |
| 44 | 189440, , , , , , , 1, 4909925, 0.999999185324, , , , , , , , , , , , , , , , , , , |
| 45 | 214016, , , , 1, 4909928, 0.999999796331, 2, 4909927, 0.999999592662, , , , , , , , , , , , , , , , , , , |
| 46 | 246784, , , , , , , , , , , , , , , , 1, 4907000, 0.999999592419, , , , , , , , , , |
| 47 | 272384, , , , 1, 4909929, 1.0, 1, 4909928, 0.999999796331, , , , , , , , , , , , , , , , , , , |
| 48 | 329728, , , , , , , , , , , , , 1, 4907002, 1.0, 1, 4907001, 0.99999979621, , , , , , , , , , |
| 49 | 1003520, , , , , , , , , , , , , , , , 1, 4907002, 1.0, , , , , , , , , , |
| 50 | 1089536, , , , , , , 1, 4909929, 1.0, , , , , , , , , , , , , , , , , , , |
| 51 | |
| 52 | The first line says that there were three read IOs with 12ns clat, |
| 53 | the cumulative number of read IOs at or below 12ns was two, and |
| 54 | 12ns was the 0.0000611th percentile for read latency. There were |
| 55 | two write IOs with 12ns clat, the cumulative number of write IOs |
| 56 | at or below 12ns was two, and 12ns was the 0.0000408th percentile |
| 57 | for write latency. |
| 58 | |
| 59 | The job had one write IO complete at 168960ns and 4906999 write IOs |
| 60 | completed at or below this duration. Also this duration was the |
| 61 | 99.99994th percentile for write latency. There was one write IO |
| 62 | with a total latency of 168960ns, this duration had a cumulative |
| 63 | frequency of 4906997 write IOs and was the 99.9998981048th percentile |
| 64 | for write total latency. |
| 65 | |
| 66 | The last line says that one read IO had 1089536ns total latency, this |
| 67 | duration had a cumulative frequency of 4909929 and represented the 100th |
| 68 | percentile for read total latency. |
| 69 | |
| 70 | Running the following: |
| 71 | |
| 72 | $ fio_jsonplus_clat2csv fio-jsonplus.output fio-jsonplus.csv --validate |
| 73 | fio-jsonplus_job0.csv validated |
| 74 | fio-jsonplus_job1.csv validated |
| 75 | fio-jsonplus_job2.csv validated |
| 76 | |
| 77 | will check the CSV data against the json+ output to confirm that the CSV |
| 78 | data matches. |
| 79 | """ |
| 80 | |
| 81 | from __future__ import absolute_import |
| 82 | from __future__ import print_function |
| 83 | import os |
| 84 | import json |
| 85 | import argparse |
| 86 | import itertools |
| 87 | import six |
| 88 | |
| 89 | DDIR_LIST = ['read', 'write', 'trim'] |
| 90 | LAT_LIST = ['slat_ns', 'clat_ns', 'lat_ns'] |
| 91 | |
| 92 | def parse_args(): |
| 93 | """Parse command-line arguments.""" |
| 94 | |
| 95 | parser = argparse.ArgumentParser() |
| 96 | parser.add_argument('source', |
| 97 | help='fio json+ output file containing completion ' |
| 98 | 'latency data') |
| 99 | parser.add_argument('dest', |
| 100 | help='destination file stub for latency data in CSV ' |
| 101 | 'format. job number will be appended to filename') |
| 102 | parser.add_argument('--debug', '-d', action='store_true', |
| 103 | help='enable debug prints') |
| 104 | parser.add_argument('--validate', action='store_true', |
| 105 | help='validate CSV against JSON output') |
| 106 | args = parser.parse_args() |
| 107 | |
| 108 | return args |
| 109 | |
| 110 | |
| 111 | def percentile(idx, run_total): |
| 112 | """Return a percentile for a specified index based on a running total. |
| 113 | |
| 114 | Parameters: |
| 115 | idx index for which to generate percentile. |
| 116 | run_total list of cumulative sums. |
| 117 | |
| 118 | Returns: |
| 119 | Percentile represented by the specified index. |
| 120 | """ |
| 121 | |
| 122 | total = run_total[len(run_total)-1] |
| 123 | if total == 0: |
| 124 | return 0 |
| 125 | |
| 126 | return float(run_total[idx]) / total |
| 127 | |
| 128 | |
| 129 | def more_bins(indices, bins): |
| 130 | """Determine whether we have more bins to process. |
| 131 | |
| 132 | Parameters: |
| 133 | indices a dict containing the last index processed in each bin. |
| 134 | bins a dict contaiing a set of bins to process. |
| 135 | |
| 136 | Returns: |
| 137 | True if the indices do not yet point to the end of each bin in bins. |
| 138 | False if the indices point beyond their repsective bins. |
| 139 | """ |
| 140 | |
| 141 | for key, value in six.iteritems(indices): |
| 142 | if value < len(bins[key]): |
| 143 | return True |
| 144 | |
| 145 | return False |
| 146 | |
| 147 | |
| 148 | def debug_print(debug, *args): |
| 149 | """Print debug messages. |
| 150 | |
| 151 | Parameters: |
| 152 | debug emit messages if True. |
| 153 | *args arguments for print(). |
| 154 | """ |
| 155 | |
| 156 | if debug: |
| 157 | print(*args) |
| 158 | |
| 159 | |
| 160 | def get_csvfile(dest, jobnum): |
| 161 | """Generate CSV filename from command-line arguments and job numbers. |
| 162 | |
| 163 | Paramaters: |
| 164 | dest file specification for CSV filename. |
| 165 | jobnum job number. |
| 166 | |
| 167 | Returns: |
| 168 | A string that is a new filename that incorporates the job number. |
| 169 | """ |
| 170 | |
| 171 | stub, ext = os.path.splitext(dest) |
| 172 | return stub + '_job' + str(jobnum) + ext |
| 173 | |
| 174 | |
| 175 | def validate(args, jsondata, col_labels): |
| 176 | """Validate CSV data against json+ output. |
| 177 | |
| 178 | This function checks the CSV data to make sure that it was correctly |
| 179 | generated from the original json+ output. json+ 'bins' objects are |
| 180 | constructed from the CSV data and then compared to the corresponding |
| 181 | objects in the json+ data. An AssertionError will appear if a mismatch |
| 182 | is found. |
| 183 | |
| 184 | Percentiles and cumulative counts are not checked. |
| 185 | |
| 186 | Parameters: |
| 187 | args command-line arguments for this script. |
| 188 | jsondata json+ output to compare against. |
| 189 | col_labels column labels for CSV data. |
| 190 | |
| 191 | Returns |
| 192 | 0 if no mismatches found. |
| 193 | """ |
| 194 | |
| 195 | colnames = [c.strip() for c in col_labels.split(',')] |
| 196 | |
| 197 | for jobnum in range(len(jsondata['jobs'])): |
| 198 | job_data = jsondata['jobs'][jobnum] |
| 199 | csvfile = get_csvfile(args.dest, jobnum) |
| 200 | |
| 201 | with open(csvfile, 'r') as csvsource: |
| 202 | csvlines = csvsource.read().split('\n') |
| 203 | |
| 204 | assert csvlines[0] == col_labels |
| 205 | debug_print(args.debug, 'col_labels match for', csvfile) |
| 206 | |
| 207 | # create 'bins' objects from the CSV data |
| 208 | counts = {} |
| 209 | for ddir in DDIR_LIST: |
| 210 | counts[ddir] = {} |
| 211 | for lat in LAT_LIST: |
| 212 | counts[ddir][lat] = {} |
| 213 | |
| 214 | csvlines.pop(0) |
| 215 | for line in csvlines: |
| 216 | if line.strip() == "": |
| 217 | continue |
| 218 | values = line.split(',') |
| 219 | nsec = values[0] |
| 220 | for col in colnames: |
| 221 | if 'count' in col: |
| 222 | val = values[colnames.index(col)] |
| 223 | if val.strip() != "": |
| 224 | count = int(val) |
| 225 | ddir, lat, _, _ = col.split('_') |
| 226 | lat = lat + '_ns' |
| 227 | counts[ddir][lat][nsec] = count |
| 228 | try: |
| 229 | assert count == job_data[ddir][lat]['bins'][nsec] |
| 230 | except Exception: |
| 231 | print("mismatch:", csvfile, ddir, lat, nsec, "ns") |
| 232 | return 1 |
| 233 | |
| 234 | # compare 'bins' objects created from the CSV data |
| 235 | # with corresponding 'bins' objects in the json+ output |
| 236 | for ddir in DDIR_LIST: |
| 237 | for lat in LAT_LIST: |
| 238 | if lat in job_data[ddir] and 'bins' in job_data[ddir][lat]: |
| 239 | assert job_data[ddir][lat]['bins'] == counts[ddir][lat] |
| 240 | debug_print(args.debug, csvfile, ddir, lat, "bins match") |
| 241 | else: |
| 242 | assert counts[ddir][lat] == {} |
| 243 | debug_print(args.debug, csvfile, ddir, lat, "bins empty") |
| 244 | |
| 245 | print(csvfile, "validated") |
| 246 | |
| 247 | return 0 |
| 248 | |
| 249 | |
| 250 | def main(): |
| 251 | """Starting point for this script. |
| 252 | |
| 253 | In standard mode, this script will generate CSV data from fio json+ output. |
| 254 | In validation mode it will check to make sure that counts in CSV files |
| 255 | match the counts in the json+ data. |
| 256 | """ |
| 257 | |
| 258 | args = parse_args() |
| 259 | |
| 260 | with open(args.source, 'r') as source: |
| 261 | jsondata = json.loads(source.read()) |
| 262 | |
| 263 | ddir_lat_list = list(ddir + '_' + lat for ddir, lat in itertools.product(DDIR_LIST, LAT_LIST)) |
| 264 | debug_print(args.debug, 'ddir_lat_list: ', ddir_lat_list) |
| 265 | col_labels = 'nsec, ' |
| 266 | for ddir_lat in ddir_lat_list: |
| 267 | col_labels += "{0}_count, {0}_cumulative, {0}_percentile, ".format(ddir_lat) |
| 268 | debug_print(args.debug, 'col_labels: ', col_labels) |
| 269 | |
| 270 | if args.validate: |
| 271 | return validate(args, jsondata, col_labels) |
| 272 | |
| 273 | for jobnum in range(0, len(jsondata['jobs'])): |
| 274 | bins = {} |
| 275 | run_total = {} |
| 276 | |
| 277 | for ddir in DDIR_LIST: |
| 278 | ddir_data = jsondata['jobs'][jobnum][ddir] |
| 279 | for lat in LAT_LIST: |
| 280 | ddir_lat = ddir + '_' + lat |
| 281 | if lat not in ddir_data or 'bins' not in ddir_data[lat]: |
| 282 | bins[ddir_lat] = [] |
| 283 | debug_print(args.debug, 'job', jobnum, ddir_lat, 'not found') |
| 284 | continue |
| 285 | |
| 286 | debug_print(args.debug, 'job', jobnum, ddir_lat, 'processing') |
| 287 | bins[ddir_lat] = [[int(key), value] for key, value in |
| 288 | six.iteritems(ddir_data[lat]['bins'])] |
| 289 | bins[ddir_lat] = sorted(bins[ddir_lat], key=lambda bin: bin[0]) |
| 290 | |
| 291 | run_total[ddir_lat] = [0 for x in range(0, len(bins[ddir_lat]))] |
| 292 | run_total[ddir_lat][0] = bins[ddir_lat][0][1] |
| 293 | for index in range(1, len(bins[ddir_lat])): |
| 294 | run_total[ddir_lat][index] = run_total[ddir_lat][index-1] + \ |
| 295 | bins[ddir_lat][index][1] |
| 296 | |
| 297 | csvfile = get_csvfile(args.dest, jobnum) |
| 298 | with open(csvfile, 'w') as output: |
| 299 | output.write(col_labels + "\n") |
| 300 | |
| 301 | # |
| 302 | # Have a counter for each ddir_lat pairing |
| 303 | # In each round, pick the shortest remaining duration |
| 304 | # and output a line with any values for that duration |
| 305 | # |
| 306 | indices = {x: 0 for x in ddir_lat_list} |
| 307 | while more_bins(indices, bins): |
| 308 | debug_print(args.debug, 'indices: ', indices) |
| 309 | min_lat = 17112760320 |
| 310 | for ddir_lat in ddir_lat_list: |
| 311 | if indices[ddir_lat] < len(bins[ddir_lat]): |
| 312 | min_lat = min(bins[ddir_lat][indices[ddir_lat]][0], min_lat) |
| 313 | |
| 314 | output.write("{0}, ".format(min_lat)) |
| 315 | |
| 316 | for ddir_lat in ddir_lat_list: |
| 317 | if indices[ddir_lat] < len(bins[ddir_lat]) and \ |
| 318 | min_lat == bins[ddir_lat][indices[ddir_lat]][0]: |
| 319 | count = bins[ddir_lat][indices[ddir_lat]][1] |
| 320 | cumulative = run_total[ddir_lat][indices[ddir_lat]] |
| 321 | ptile = percentile(indices[ddir_lat], run_total[ddir_lat]) |
| 322 | output.write("{0}, {1}, {2}, ".format(count, cumulative, ptile)) |
| 323 | indices[ddir_lat] += 1 |
| 324 | else: |
| 325 | output.write(", , , ") |
| 326 | output.write("\n") |
| 327 | |
| 328 | print("{0} generated".format(csvfile)) |
| 329 | |
| 330 | |
| 331 | if __name__ == '__main__': |
| 332 | main() |