t/latency_percentiles.py

   1 #!/usr/bin/env python3
   2 # SPDX-License-Identifier: GPL-2.0-only
   3 #
   4 # Copyright (c) 2020 Western Digital Corporation or its affiliates.
   5 #
   6 """
   7 # latency_percentiles.py
   8 #
   9 # Test the code that produces latency percentiles
  10 # This is mostly to test the code changes to allow reporting
  11 # of slat, clat, and lat percentiles
  12 #
  13 # USAGE
  14 # python3 latency-tests.py [-f fio-path] [-a artifact-root] [--debug]
  15 #
  16 #
  17 # Test scenarios:
  18 #
  19 # - DONE json
  20 #   unified rw reporting
  21 #   compare with latency log
  22 #   try various combinations of the ?lat_percentile options
  23 #   null, aio
  24 #   r, w, t
  25 # - DONE json+
  26 #   check presence of latency bins
  27 #   if the json percentiles match those from the raw data
  28 #   then the latency bin values and counts are probably ok
  29 # - DONE terse
  30 #   produce both terse, JSON output and confirm that they match
  31 #   lat only; both lat and clat
  32 # - DONE sync_lat
  33 #   confirm that sync_lat data appears
  34 # - MANUAL TESTING normal output:
  35 #       null ioengine
  36 #           enable all, but only clat and lat appear
  37 #           enable subset of latency types
  38 #           read, write, trim, unified
  39 #       libaio ioengine
  40 #           enable all latency types
  41 #           enable subset of latency types
  42 #           read, write, trim, unified
  43 # ./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
  44 #       --ioengine=null --slat_percentiles=1 --clat_percentiles=1 --lat_percentiles=1
  45 # echo confirm that clat and lat percentiles appear
  46 # ./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
  47 #       --ioengine=null --slat_percentiles=0 --clat_percentiles=0 --lat_percentiles=1
  48 # echo confirm that only lat percentiles appear
  49 # ./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
  50 #       --ioengine=null --slat_percentiles=0 --clat_percentiles=1 --lat_percentiles=0
  51 # echo confirm that only clat percentiles appear
  52 # ./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
  53 #       --ioengine=libaio --slat_percentiles=1 --clat_percentiles=1 --lat_percentiles=1
  54 # echo confirm that slat, clat, lat percentiles appear
  55 # ./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
  56 #       --ioengine=libaio --slat_percentiles=0 --clat_percentiles=1 --lat_percentiles=1
  57 # echo confirm that clat and lat percentiles appear
  58 # ./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
  59 #       --ioengine=libaio -rw=randrw
  60 # echo confirm that clat percentiles appear for reads and writes
  61 # ./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
  62 #       --ioengine=libaio --slat_percentiles=1 --clat_percentiles=0 --lat_percentiles=0 --rw=randrw
  63 # echo confirm that slat percentiles appear for both reads and writes
  64 # ./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
  65 #       --ioengine=libaio --slat_percentiles=1 --clat_percentiles=1 --lat_percentiles=1 \
  66 #       --rw=randrw --unified_rw_reporting=1
  67 # echo confirm that slat, clat, and lat percentiles appear for 'mixed' IOs
  68 #./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
  69 #       --ioengine=null --slat_percentiles=1 --clat_percentiles=1 --lat_percentiles=1 \
  70 #       --rw=randrw --fsync=32
  71 # echo confirm that fsync latencies appear
  72 """
  73
  74 import os
  75 import csv
  76 import sys
  77 import json
  78 import math
  79 import time
  80 import argparse
  81 import platform
  82 import subprocess
  83 from pathlib import Path
  84
  85
  86 class FioLatTest():
  87     """fio latency percentile test."""
  88
  89     def __init__(self, artifact_root, test_options, debug):
  90         """
  91         artifact_root   root directory for artifacts (subdirectory will be created under here)
  92         test            test specification
  93         """
  94         self.artifact_root = artifact_root
  95         self.test_options = test_options
  96         self.debug = debug
  97         self.filename = None
  98         self.json_data = None
  99         self.terse_data = None
 100
 101         self.test_dir = os.path.join(self.artifact_root,
 102                                      "{:03d}".format(self.test_options['test_id']))
 103         if not os.path.exists(self.test_dir):
 104             os.mkdir(self.test_dir)
 105
 106         self.filename = "latency{:03d}".format(self.test_options['test_id'])
 107
 108     def run_fio(self, fio_path):
 109         """Run a test."""
 110
 111         fio_args = [
 112             "--name=latency",
 113             "--randrepeat=0",
 114             "--norandommap",
 115             "--time_based",
 116             "--size=16M",
 117             "--rwmixread=50",
 118             "--group_reporting=1",
 119             "--write_lat_log={0}".format(self.filename),
 120             "--output={0}.out".format(self.filename),
 121             "--ioengine={ioengine}".format(**self.test_options),
 122             "--rw={rw}".format(**self.test_options),
 123             "--runtime={runtime}".format(**self.test_options),
 124             "--output-format={output-format}".format(**self.test_options),
 125         ]
 126         for opt in ['slat_percentiles', 'clat_percentiles', 'lat_percentiles',
 127                     'unified_rw_reporting', 'fsync', 'fdatasync', 'numjobs', 'cmdprio_percentage']:
 128             if opt in self.test_options:
 129                 option = '--{0}={{{0}}}'.format(opt)
 130                 fio_args.append(option.format(**self.test_options))
 131
 132         command = [fio_path] + fio_args
 133         with open(os.path.join(self.test_dir, "{0}.command".format(self.filename)), "w+") as \
 134                 command_file:
 135             command_file.write("%s\n" % command)
 136
 137         passed = True
 138         stdout_file = open(os.path.join(self.test_dir, "{0}.stdout".format(self.filename)), "w+")
 139         stderr_file = open(os.path.join(self.test_dir, "{0}.stderr".format(self.filename)), "w+")
 140         exitcode_file = open(os.path.join(self.test_dir,
 141                                           "{0}.exitcode".format(self.filename)), "w+")
 142         try:
 143             proc = None
 144             # Avoid using subprocess.run() here because when a timeout occurs,
 145             # fio will be stopped with SIGKILL. This does not give fio a
 146             # chance to clean up and means that child processes may continue
 147             # running and submitting IO.
 148             proc = subprocess.Popen(command,
 149                                     stdout=stdout_file,
 150                                     stderr=stderr_file,
 151                                     cwd=self.test_dir,
 152                                     universal_newlines=True)
 153             proc.communicate(timeout=300)
 154             exitcode_file.write('{0}\n'.format(proc.returncode))
 155             passed &= (proc.returncode == 0)
 156         except subprocess.TimeoutExpired:
 157             proc.terminate()
 158             proc.communicate()
 159             assert proc.poll()
 160             print("Timeout expired")
 161             passed = False
 162         except Exception:
 163             if proc:
 164                 if not proc.poll():
 165                     proc.terminate()
 166                     proc.communicate()
 167             print("Exception: %s" % sys.exc_info())
 168             passed = False
 169         finally:
 170             stdout_file.close()
 171             stderr_file.close()
 172             exitcode_file.close()
 173
 174         if passed:
 175             if 'json' in self.test_options['output-format']:
 176                 if not self.get_json():
 177                     print('Unable to decode JSON data')
 178                     passed = False
 179             if 'terse' in self.test_options['output-format']:
 180                 if not self.get_terse():
 181                     print('Unable to decode terse data')
 182                     passed = False
 183
 184         return passed
 185
 186     def get_json(self):
 187         """Convert fio JSON output into a python JSON object"""
 188
 189         filename = os.path.join(self.test_dir, "{0}.out".format(self.filename))
 190         with open(filename, 'r') as file:
 191             file_data = file.read()
 192
 193         #
 194         # Sometimes fio informational messages are included at the top of the
 195         # JSON output, especially under Windows. Try to decode output as JSON
 196         # data, lopping off up to the first four lines
 197         #
 198         lines = file_data.splitlines()
 199         for i in range(5):
 200             file_data = '\n'.join(lines[i:])
 201             try:
 202                 self.json_data = json.loads(file_data)
 203             except json.JSONDecodeError:
 204                 continue
 205             else:
 206                 return True
 207
 208         return False
 209
 210     def get_terse(self):
 211         """Read fio output and return terse format data."""
 212
 213         filename = os.path.join(self.test_dir, "{0}.out".format(self.filename))
 214         with open(filename, 'r') as file:
 215             file_data = file.read()
 216
 217         #
 218         # Read the first few lines and see if any of them begin with '3;fio-'
 219         # If so, the line is probably terse output. Obviously, this only
 220         # works for fio terse version 3 and it does not work for
 221         # multi-line terse output
 222         #
 223         lines = file_data.splitlines()
 224         for i in range(8):
 225             file_data = lines[i]
 226             if file_data.startswith('3;fio-'):
 227                 self.terse_data = file_data.split(';')
 228                 return True
 229
 230         return False
 231
 232     def check_latencies(self, jsondata, ddir, slat=True, clat=True, tlat=True, plus=False,
 233                         unified=False):
 234         """Check fio latency data.
 235
 236         ddir                data direction to check (0=read, 1=write, 2=trim)
 237         slat                True if submission latency data available to check
 238         clat                True if completion latency data available to check
 239         tlat                True of total latency data available to check
 240         plus                True if we actually have json+ format data where additional checks can
 241                             be carried out
 242         unified             True if fio is reporting unified r/w data
 243         """
 244
 245         types = {
 246             'slat': slat,
 247             'clat': clat,
 248             'lat': tlat
 249         }
 250
 251         retval = True
 252
 253         for lat in ['slat', 'clat', 'lat']:
 254             this_iter = True
 255             if not types[lat]:
 256                 if 'percentile' in jsondata[lat+'_ns']:
 257                     this_iter = False
 258                     print('unexpected %s percentiles found' % lat)
 259                 else:
 260                     print("%s percentiles skipped" % lat)
 261                 continue
 262             else:
 263                 if 'percentile' not in jsondata[lat+'_ns']:
 264                     this_iter = False
 265                     print('%s percentiles not found in fio output' % lat)
 266
 267             #
 268             # Check only for the presence/absence of json+
 269             # latency bins. Future work can check the
 270             # accurracy of the bin values and counts.
 271             #
 272             # Because the latency percentiles are based on
 273             # the bins, we can be confident that the bin
 274             # values and counts are correct if fio's
 275             # latency percentiles match what we compute
 276             # from the raw data.
 277             #
 278             if plus:
 279                 if 'bins' not in jsondata[lat+'_ns']:
 280                     print('bins not found with json+ output format')
 281                     this_iter = False
 282                 else:
 283                     if not self.check_jsonplus(jsondata[lat+'_ns']):
 284                         this_iter = False
 285             else:
 286                 if 'bins' in jsondata[lat+'_ns']:
 287                     print('json+ bins found with json output format')
 288                     this_iter = False
 289
 290             latencies = []
 291             for i in range(10):
 292                 lat_file = os.path.join(self.test_dir, "%s_%s.%s.log" % (self.filename, lat, i+1))
 293                 if not os.path.exists(lat_file):
 294                     break
 295                 with open(lat_file, 'r', newline='') as file:
 296                     reader = csv.reader(file)
 297                     for line in reader:
 298                         if unified or int(line[2]) == ddir:
 299                             latencies.append(int(line[1]))
 300
 301             if int(jsondata['total_ios']) != len(latencies):
 302                 this_iter = False
 303                 print('%s: total_ios = %s, latencies logged = %d' % \
 304                         (lat, jsondata['total_ios'], len(latencies)))
 305             elif self.debug:
 306                 print("total_ios %s match latencies logged" % jsondata['total_ios'])
 307
 308             latencies.sort()
 309             ptiles = jsondata[lat+'_ns']['percentile']
 310
 311             for percentile in ptiles.keys():
 312                 #
 313                 # numpy.percentile(latencies, float(percentile),
 314                 #       interpolation='higher')
 315                 # produces values that mostly match what fio reports
 316                 # however, in the tails of the distribution, the values produced
 317                 # by fio's and numpy.percentile's algorithms are occasionally off
 318                 # by one latency measurement. So instead of relying on the canned
 319                 # numpy.percentile routine, implement here fio's algorithm
 320                 #
 321                 rank = math.ceil(float(percentile)/100 * len(latencies))
 322                 if rank > 0:
 323                     index = rank - 1
 324                 else:
 325                     index = 0
 326                 value = latencies[int(index)]
 327                 fio_val = int(ptiles[percentile])
 328                 # The theory in stat.h says that the proportional error will be
 329                 # less than 1/128
 330                 if not self.similar(fio_val, value):
 331                     delta = abs(fio_val - value) / value
 332                     print("Error with %s %sth percentile: "
 333                           "fio: %d, expected: %d, proportional delta: %f" %
 334                           (lat, percentile, fio_val, value, delta))
 335                     print("Rank: %d, index: %d" % (rank, index))
 336                     this_iter = False
 337                 elif self.debug:
 338                     print('%s %sth percentile values match: %d, %d' %
 339                           (lat, percentile, fio_val, value))
 340
 341             if this_iter:
 342                 print("%s percentiles match" % lat)
 343             else:
 344                 retval = False
 345
 346         return retval
 347
 348     @staticmethod
 349     def check_empty(job):
 350         """
 351         Make sure JSON data is empty.
 352
 353         Some data structures should be empty. This function makes sure that they are.
 354
 355         job         JSON object that we need to check for emptiness
 356         """
 357
 358         return job['total_ios'] == 0 and \
 359                 job['slat_ns']['N'] == 0 and \
 360                 job['clat_ns']['N'] == 0 and \
 361                 job['lat_ns']['N'] == 0
 362
 363     def check_nocmdprio_lat(self, job):
 364         """
 365         Make sure no high/low priority latencies appear.
 366
 367         job         JSON object to check
 368         """
 369
 370         for ddir in ['read', 'write', 'trim']:
 371             if ddir in job:
 372                 if 'lat_high_prio' in job[ddir] or 'lat_low_prio' in job[ddir] or \
 373                     'clat_high_prio' in job[ddir] or 'clat_low_prio' in job[ddir]:
 374                     print("Unexpected high/low priority latencies found in %s output" % ddir)
 375                     return False
 376
 377         if self.debug:
 378             print("No high/low priority latencies found")
 379
 380         return True
 381
 382     @staticmethod
 383     def similar(approximation, actual):
 384         """
 385         Check whether the approximate values recorded by fio are within the theoretical bound.
 386
 387         Since it is impractical to store exact latency measurements for each and every IO, fio
 388         groups similar latency measurements into variable-sized bins. The theory in stat.h says
 389         that the proportional error will be less than 1/128. This function checks whether this
 390         is true.
 391
 392         TODO This test will fail when comparing a value from the largest latency bin against its
 393         actual measurement. Find some way to detect this and avoid failing.
 394
 395         approximation   value of the bin used by fio to store a given latency
 396         actual          actual latency value
 397         """
 398
 399         # Avoid a division by zero. The smallest latency values have no error.
 400         if actual == 0:
 401             return approximation == 0
 402
 403         delta = abs(approximation - actual) / actual
 404         return delta <= 1/128
 405
 406     def check_jsonplus(self, jsondata):
 407         """Check consistency of json+ data
 408
 409         When we have json+ data we can check the min value, max value, and
 410         sample size reported by fio
 411
 412         jsondata            json+ data that we need to check
 413         """
 414
 415         retval = True
 416
 417         keys = [int(k) for k in jsondata['bins'].keys()]
 418         values = [int(jsondata['bins'][k]) for k in jsondata['bins'].keys()]
 419         smallest = min(keys)
 420         biggest = max(keys)
 421         sampsize = sum(values)
 422
 423         if not self.similar(jsondata['min'], smallest):
 424             retval = False
 425             print('reported min %d does not match json+ min %d' % (jsondata['min'], smallest))
 426         elif self.debug:
 427             print('json+ min values match: %d' % jsondata['min'])
 428
 429         if not self.similar(jsondata['max'], biggest):
 430             retval = False
 431             print('reported max %d does not match json+ max %d' % (jsondata['max'], biggest))
 432         elif self.debug:
 433             print('json+ max values match: %d' % jsondata['max'])
 434
 435         if sampsize != jsondata['N']:
 436             retval = False
 437             print('reported sample size %d does not match json+ total count %d' % \
 438                     (jsondata['N'], sampsize))
 439         elif self.debug:
 440             print('json+ sample sizes match: %d' % sampsize)
 441
 442         return retval
 443
 444     def check_sync_lat(self, jsondata, plus=False):
 445         """Check fsync latency percentile data.
 446
 447         All we can check is that some percentiles are reported, unless we have json+ data.
 448         If we actually have json+ data then we can do more checking.
 449
 450         jsondata        JSON data for fsync operations
 451         plus            True if we actually have json+ data
 452         """
 453         retval = True
 454
 455         if 'percentile' not in jsondata['lat_ns']:
 456             print("Sync percentile data not found")
 457             return False
 458
 459         if int(jsondata['total_ios']) != int(jsondata['lat_ns']['N']):
 460             retval = False
 461             print('Mismatch between total_ios and lat_ns sample size')
 462         elif self.debug:
 463             print('sync sample sizes match: %d' % jsondata['total_ios'])
 464
 465         if not plus:
 466             if 'bins' in jsondata['lat_ns']:
 467                 print('Unexpected json+ bin data found')
 468                 return False
 469
 470         if not self.check_jsonplus(jsondata['lat_ns']):
 471             retval = False
 472
 473         return retval
 474
 475     def check_terse(self, terse, jsondata):
 476         """Compare terse latencies with JSON latencies.
 477
 478         terse           terse format data for checking
 479         jsondata        JSON format data for checking
 480         """
 481
 482         retval = True
 483
 484         for lat in terse:
 485             split = lat.split('%')
 486             pct = split[0]
 487             terse_val = int(split[1][1:])
 488             json_val = math.floor(jsondata[pct]/1000)
 489             if terse_val != json_val:
 490                 retval = False
 491                 print('Mismatch with %sth percentile: json value=%d,%d terse value=%d' % \
 492                         (pct, jsondata[pct], json_val, terse_val))
 493             elif self.debug:
 494                 print('Terse %sth percentile matches JSON value: %d' % (pct, terse_val))
 495
 496         return retval
 497
 498     def check_prio_latencies(self, jsondata, clat=True, plus=False):
 499         """Check consistency of high/low priority latencies.
 500
 501         clat                True if we should check clat data; other check lat data
 502         plus                True if we have json+ format data where additional checks can
 503                             be carried out
 504         unified             True if fio is reporting unified r/w data
 505         """
 506
 507         if clat:
 508             high = 'clat_high_prio'
 509             low = 'clat_low_prio'
 510             combined = 'clat_ns'
 511         else:
 512             high = 'lat_high_prio'
 513             low = 'lat_low_prio'
 514             combined = 'lat_ns'
 515
 516         if not high in jsondata or not low in jsondata or not combined in jsondata:
 517             print("Error identifying high/low priority latencies")
 518             return False
 519
 520         if jsondata[high]['N'] + jsondata[low]['N'] != jsondata[combined]['N']:
 521             print("High %d + low %d != combined sample size %d" % \
 522                     (jsondata[high]['N'], jsondata[low]['N'], jsondata[combined]['N']))
 523             return False
 524         elif self.debug:
 525             print("High %d + low %d == combined sample size %d" % \
 526                     (jsondata[high]['N'], jsondata[low]['N'], jsondata[combined]['N']))
 527
 528         if min(jsondata[high]['min'], jsondata[low]['min']) != jsondata[combined]['min']:
 529             print("Min of high %d, low %d min latencies does not match min %d from combined data" % \
 530                     (jsondata[high]['min'], jsondata[low]['min'], jsondata[combined]['min']))
 531             return False
 532         elif self.debug:
 533             print("Min of high %d, low %d min latencies matches min %d from combined data" % \
 534                     (jsondata[high]['min'], jsondata[low]['min'], jsondata[combined]['min']))
 535
 536         if max(jsondata[high]['max'], jsondata[low]['max']) != jsondata[combined]['max']:
 537             print("Max of high %d, low %d max latencies does not match max %d from combined data" % \
 538                     (jsondata[high]['max'], jsondata[low]['max'], jsondata[combined]['max']))
 539             return False
 540         elif self.debug:
 541             print("Max of high %d, low %d max latencies matches max %d from combined data" % \
 542                     (jsondata[high]['max'], jsondata[low]['max'], jsondata[combined]['max']))
 543
 544         weighted_avg = (jsondata[high]['mean'] * jsondata[high]['N'] + \
 545                         jsondata[low]['mean'] * jsondata[low]['N']) / jsondata[combined]['N']
 546         delta = abs(weighted_avg - jsondata[combined]['mean'])
 547         if (delta / jsondata[combined]['mean']) > 0.0001:
 548             print("Difference between weighted average %f of high, low means "
 549                   "and actual mean %f exceeds 0.01%%" % (weighted_avg, jsondata[combined]['mean']))
 550             return False
 551         elif self.debug:
 552             print("Weighted average %f of high, low means matches actual mean %f" % \
 553                     (weighted_avg, jsondata[combined]['mean']))
 554
 555         if plus:
 556             if not self.check_jsonplus(jsondata[high]):
 557                 return False
 558             if not self.check_jsonplus(jsondata[low]):
 559                 return False
 560
 561             bins = {**jsondata[high]['bins'], **jsondata[low]['bins']}
 562             for duration in bins.keys():
 563                 if duration in jsondata[high]['bins'] and duration in jsondata[low]['bins']:
 564                     bins[duration] = jsondata[high]['bins'][duration] + \
 565                             jsondata[low]['bins'][duration]
 566
 567             if len(bins) != len(jsondata[combined]['bins']):
 568                 print("Number of combined high/low bins does not match number of overall bins")
 569                 return False
 570             elif self.debug:
 571                 print("Number of bins from merged high/low data matches number of overall bins")
 572
 573             for duration in bins.keys():
 574                 if bins[duration] != jsondata[combined]['bins'][duration]:
 575                     print("Merged high/low count does not match overall count for duration %d" \
 576                             % duration)
 577                     return False
 578
 579         print("Merged high/low priority latency data match combined latency data")
 580         return True
 581
 582     def check(self):
 583         """Check test output."""
 584
 585         raise NotImplementedError()
 586
 587
 588 class Test001(FioLatTest):
 589     """Test object for Test 1."""
 590
 591     def check(self):
 592         """Check Test 1 output."""
 593
 594         job = self.json_data['jobs'][0]
 595
 596         retval = True
 597         if not self.check_empty(job['write']):
 598             print("Unexpected write data found in output")
 599             retval = False
 600         if not self.check_empty(job['trim']):
 601             print("Unexpected trim data found in output")
 602             retval = False
 603         if not self.check_nocmdprio_lat(job):
 604             print("Unexpected high/low priority latencies found")
 605             retval = False
 606
 607         retval &= self.check_latencies(job['read'], 0, slat=False)
 608
 609         return retval
 610
 611
 612 class Test002(FioLatTest):
 613     """Test object for Test 2."""
 614
 615     def check(self):
 616         """Check Test 2 output."""
 617
 618         job = self.json_data['jobs'][0]
 619
 620         retval = True
 621         if not self.check_empty(job['read']):
 622             print("Unexpected read data found in output")
 623             retval = False
 624         if not self.check_empty(job['trim']):
 625             print("Unexpected trim data found in output")
 626             retval = False
 627         if not self.check_nocmdprio_lat(job):
 628             print("Unexpected high/low priority latencies found")
 629             retval = False
 630
 631         retval &= self.check_latencies(job['write'], 1, slat=False, clat=False)
 632
 633         return retval
 634
 635
 636 class Test003(FioLatTest):
 637     """Test object for Test 3."""
 638
 639     def check(self):
 640         """Check Test 3 output."""
 641
 642         job = self.json_data['jobs'][0]
 643
 644         retval = True
 645         if not self.check_empty(job['read']):
 646             print("Unexpected read data found in output")
 647             retval = False
 648         if not self.check_empty(job['write']):
 649             print("Unexpected write data found in output")
 650             retval = False
 651         if not self.check_nocmdprio_lat(job):
 652             print("Unexpected high/low priority latencies found")
 653             retval = False
 654
 655         retval &= self.check_latencies(job['trim'], 2, slat=False, tlat=False)
 656
 657         return retval
 658
 659
 660 class Test004(FioLatTest):
 661     """Test object for Tests 4, 13."""
 662
 663     def check(self):
 664         """Check Test 4, 13 output."""
 665
 666         job = self.json_data['jobs'][0]
 667
 668         retval = True
 669         if not self.check_empty(job['write']):
 670             print("Unexpected write data found in output")
 671             retval = False
 672         if not self.check_empty(job['trim']):
 673             print("Unexpected trim data found in output")
 674             retval = False
 675         if not self.check_nocmdprio_lat(job):
 676             print("Unexpected high/low priority latencies found")
 677             retval = False
 678
 679         retval &= self.check_latencies(job['read'], 0, plus=True)
 680
 681         return retval
 682
 683
 684 class Test005(FioLatTest):
 685     """Test object for Test 5."""
 686
 687     def check(self):
 688         """Check Test 5 output."""
 689
 690         job = self.json_data['jobs'][0]
 691
 692         retval = True
 693         if not self.check_empty(job['read']):
 694             print("Unexpected read data found in output")
 695             retval = False
 696         if not self.check_empty(job['trim']):
 697             print("Unexpected trim data found in output")
 698             retval = False
 699         if not self.check_nocmdprio_lat(job):
 700             print("Unexpected high/low priority latencies found")
 701             retval = False
 702
 703         retval &= self.check_latencies(job['write'], 1, slat=False, plus=True)
 704
 705         return retval
 706
 707
 708 class Test006(FioLatTest):
 709     """Test object for Test 6."""
 710
 711     def check(self):
 712         """Check Test 6 output."""
 713
 714         job = self.json_data['jobs'][0]
 715
 716         retval = True
 717         if not self.check_empty(job['write']):
 718             print("Unexpected write data found in output")
 719             retval = False
 720         if not self.check_empty(job['trim']):
 721             print("Unexpected trim data found in output")
 722             retval = False
 723         if not self.check_nocmdprio_lat(job):
 724             print("Unexpected high/low priority latencies found")
 725             retval = False
 726
 727         retval &= self.check_latencies(job['read'], 0, slat=False, tlat=False, plus=True)
 728
 729         return retval
 730
 731
 732 class Test007(FioLatTest):
 733     """Test object for Test 7."""
 734
 735     def check(self):
 736         """Check Test 7 output."""
 737
 738         job = self.json_data['jobs'][0]
 739
 740         retval = True
 741         if not self.check_empty(job['trim']):
 742             print("Unexpected trim data found in output")
 743             retval = False
 744         if not self.check_nocmdprio_lat(job):
 745             print("Unexpected high/low priority latencies found")
 746             retval = False
 747
 748         retval &= self.check_latencies(job['read'], 0, clat=False, tlat=False, plus=True)
 749         retval &= self.check_latencies(job['write'], 1, clat=False, tlat=False, plus=True)
 750
 751         return retval
 752
 753
 754 class Test008(FioLatTest):
 755     """Test object for Tests 8, 14."""
 756
 757     def check(self):
 758         """Check Test 8, 14 output."""
 759
 760         job = self.json_data['jobs'][0]
 761
 762         retval = True
 763         if 'read' in job or 'write'in job or 'trim' in job:
 764             print("Unexpected data direction found in fio output")
 765             retval = False
 766         if not self.check_nocmdprio_lat(job):
 767             print("Unexpected high/low priority latencies found")
 768             retval = False
 769
 770         retval &= self.check_latencies(job['mixed'], 0, plus=True, unified=True)
 771
 772         return retval
 773
 774
 775 class Test009(FioLatTest):
 776     """Test object for Test 9."""
 777
 778     def check(self):
 779         """Check Test 9 output."""
 780
 781         job = self.json_data['jobs'][0]
 782
 783         retval = True
 784         if not self.check_empty(job['read']):
 785             print("Unexpected read data found in output")
 786             retval = False
 787         if not self.check_empty(job['trim']):
 788             print("Unexpected trim data found in output")
 789             retval = False
 790         if not self.check_sync_lat(job['sync'], plus=True):
 791             print("Error checking fsync latency data")
 792             retval = False
 793         if not self.check_nocmdprio_lat(job):
 794             print("Unexpected high/low priority latencies found")
 795             retval = False
 796
 797         retval &= self.check_latencies(job['write'], 1, slat=False, plus=True)
 798
 799         return retval
 800
 801
 802 class Test010(FioLatTest):
 803     """Test object for Test 10."""
 804
 805     def check(self):
 806         """Check Test 10 output."""
 807
 808         job = self.json_data['jobs'][0]
 809
 810         retval = True
 811         if not self.check_empty(job['trim']):
 812             print("Unexpected trim data found in output")
 813             retval = False
 814         if not self.check_nocmdprio_lat(job):
 815             print("Unexpected high/low priority latencies found")
 816             retval = False
 817
 818         retval &= self.check_latencies(job['read'], 0, plus=True)
 819         retval &= self.check_latencies(job['write'], 1, plus=True)
 820         retval &= self.check_terse(self.terse_data[17:34], job['read']['lat_ns']['percentile'])
 821         retval &= self.check_terse(self.terse_data[58:75], job['write']['lat_ns']['percentile'])
 822         # Terse data checking only works for default percentiles.
 823         # This needs to be changed if something other than the default is ever used.
 824
 825         return retval
 826
 827
 828 class Test011(FioLatTest):
 829     """Test object for Test 11."""
 830
 831     def check(self):
 832         """Check Test 11 output."""
 833
 834         job = self.json_data['jobs'][0]
 835
 836         retval = True
 837         if not self.check_empty(job['trim']):
 838             print("Unexpected trim data found in output")
 839             retval = False
 840         if not self.check_nocmdprio_lat(job):
 841             print("Unexpected high/low priority latencies found")
 842             retval = False
 843
 844         retval &= self.check_latencies(job['read'], 0, slat=False, clat=False, plus=True)
 845         retval &= self.check_latencies(job['write'], 1, slat=False, clat=False, plus=True)
 846         retval &= self.check_terse(self.terse_data[17:34], job['read']['lat_ns']['percentile'])
 847         retval &= self.check_terse(self.terse_data[58:75], job['write']['lat_ns']['percentile'])
 848         # Terse data checking only works for default percentiles.
 849         # This needs to be changed if something other than the default is ever used.
 850
 851         return retval
 852
 853
 854 class Test015(FioLatTest):
 855     """Test object for Test 15."""
 856
 857     def check(self):
 858         """Check Test 15 output."""
 859
 860         job = self.json_data['jobs'][0]
 861
 862         retval = True
 863         if not self.check_empty(job['write']):
 864             print("Unexpected write data found in output")
 865             retval = False
 866         if not self.check_empty(job['trim']):
 867             print("Unexpected trim data found in output")
 868             retval = False
 869
 870         retval &= self.check_latencies(job['read'], 0, plus=True)
 871         retval &= self.check_prio_latencies(job['read'], clat=False, plus=True)
 872
 873         return retval
 874
 875
 876 class Test016(FioLatTest):
 877     """Test object for Test 16."""
 878
 879     def check(self):
 880         """Check Test 16 output."""
 881
 882         job = self.json_data['jobs'][0]
 883
 884         retval = True
 885         if not self.check_empty(job['read']):
 886             print("Unexpected read data found in output")
 887             retval = False
 888         if not self.check_empty(job['trim']):
 889             print("Unexpected trim data found in output")
 890             retval = False
 891
 892         retval &= self.check_latencies(job['write'], 1, slat=False, plus=True)
 893         retval &= self.check_prio_latencies(job['write'], clat=False, plus=True)
 894
 895         return retval
 896
 897
 898 class Test017(FioLatTest):
 899     """Test object for Test 17."""
 900
 901     def check(self):
 902         """Check Test 17 output."""
 903
 904         job = self.json_data['jobs'][0]
 905
 906         retval = True
 907         if not self.check_empty(job['write']):
 908             print("Unexpected write data found in output")
 909             retval = False
 910         if not self.check_empty(job['trim']):
 911             print("Unexpected trim data found in output")
 912             retval = False
 913
 914         retval &= self.check_latencies(job['read'], 0, slat=False, tlat=False, plus=True)
 915         retval &= self.check_prio_latencies(job['read'], plus=True)
 916
 917         return retval
 918
 919
 920 class Test018(FioLatTest):
 921     """Test object for Test 18."""
 922
 923     def check(self):
 924         """Check Test 18 output."""
 925
 926         job = self.json_data['jobs'][0]
 927
 928         retval = True
 929         if not self.check_empty(job['trim']):
 930             print("Unexpected trim data found in output")
 931             retval = False
 932
 933         retval &= self.check_latencies(job['read'], 0, clat=False, tlat=False, plus=True)
 934         retval &= self.check_latencies(job['write'], 1, clat=False, tlat=False, plus=True)
 935
 936         # We actually have json+ data but setting plus=False below avoids checking the
 937         # json+ bins which did not exist for clat and lat because this job is run with
 938         # clat_percentiles=0, lat_percentiles=0, However, we can still check the summary
 939         # statistics
 940         retval &= self.check_prio_latencies(job['write'], plus=False)
 941         retval &= self.check_prio_latencies(job['read'], plus=False)
 942
 943         return retval
 944
 945
 946 class Test019(FioLatTest):
 947     """Test object for Tests 19, 20."""
 948
 949     def check(self):
 950         """Check Test 19, 20 output."""
 951
 952         job = self.json_data['jobs'][0]
 953
 954         retval = True
 955         if 'read' in job or 'write'in job or 'trim' in job:
 956             print("Unexpected data direction found in fio output")
 957             retval = False
 958
 959         retval &= self.check_latencies(job['mixed'], 0, plus=True, unified=True)
 960         retval &= self.check_prio_latencies(job['mixed'], clat=False, plus=True)
 961
 962         return retval
 963
 964
 965 def parse_args():
 966     """Parse command-line arguments."""
 967
 968     parser = argparse.ArgumentParser()
 969     parser.add_argument('-f', '--fio', help='path to file executable (e.g., ./fio)')
 970     parser.add_argument('-a', '--artifact-root', help='artifact root directory')
 971     parser.add_argument('-d', '--debug', help='enable debug output', action='store_true')
 972     parser.add_argument('-s', '--skip', nargs='+', type=int,
 973                         help='list of test(s) to skip')
 974     parser.add_argument('-o', '--run-only', nargs='+', type=int,
 975                         help='list of test(s) to run, skipping all others')
 976     args = parser.parse_args()
 977
 978     return args
 979
 980
 981 def main():
 982     """Run tests of fio latency percentile reporting"""
 983
 984     args = parse_args()
 985
 986     artifact_root = args.artifact_root if args.artifact_root else \
 987         "latency-test-{0}".format(time.strftime("%Y%m%d-%H%M%S"))
 988     os.mkdir(artifact_root)
 989     print("Artifact directory is %s" % artifact_root)
 990
 991     if args.fio:
 992         fio = str(Path(args.fio).absolute())
 993     else:
 994         fio = 'fio'
 995     print("fio path is %s" % fio)
 996
 997     if platform.system() == 'Linux':
 998         aio = 'libaio'
 999     elif platform.system() == 'Windows':
1000         aio = 'windowsaio'
1001     else:
1002         aio = 'posixaio'
1003
1004     test_list = [
1005         {
1006             # randread, null
1007             # enable slat, clat, lat
1008             # only clat and lat will appear because
1009             # because the null ioengine is syncrhonous
1010             "test_id": 1,
1011             "runtime": 2,
1012             "output-format": "json",
1013             "slat_percentiles": 1,
1014             "clat_percentiles": 1,
1015             "lat_percentiles": 1,
1016             "ioengine": 'null',
1017             'rw': 'randread',
1018             "test_obj": Test001,
1019         },
1020         {
1021             # randwrite, null
1022             # enable lat only
1023             "test_id": 2,
1024             "runtime": 2,
1025             "output-format": "json",
1026             "slat_percentiles": 0,
1027             "clat_percentiles": 0,
1028             "lat_percentiles": 1,
1029             "ioengine": 'null',
1030             'rw': 'randwrite',
1031             "test_obj": Test002,
1032         },
1033         {
1034             # randtrim, null
1035             # enable clat only
1036             "test_id": 3,
1037             "runtime": 2,
1038             "output-format": "json",
1039             "slat_percentiles": 0,
1040             "clat_percentiles": 1,
1041             "lat_percentiles": 0,
1042             "ioengine": 'null',
1043             'rw': 'randtrim',
1044             "test_obj": Test003,
1045         },
1046         {
1047             # randread, aio
1048             # enable slat, clat, lat
1049             # all will appear because liaio is asynchronous
1050             "test_id": 4,
1051             "runtime": 5,
1052             "output-format": "json+",
1053             "slat_percentiles": 1,
1054             "clat_percentiles": 1,
1055             "lat_percentiles": 1,
1056             "ioengine": aio,
1057             'rw': 'randread',
1058             "test_obj": Test004,
1059         },
1060         {
1061             # randwrite, aio
1062             # enable only clat, lat
1063             "test_id": 5,
1064             "runtime": 5,
1065             "output-format": "json+",
1066             "slat_percentiles": 0,
1067             "clat_percentiles": 1,
1068             "lat_percentiles": 1,
1069             "ioengine": aio,
1070             'rw': 'randwrite',
1071             "test_obj": Test005,
1072         },
1073         {
1074             # randread, aio
1075             # by default only clat should appear
1076             "test_id": 6,
1077             "runtime": 5,
1078             "output-format": "json+",
1079             "ioengine": aio,
1080             'rw': 'randread',
1081             "test_obj": Test006,
1082         },
1083         {
1084             # 50/50 r/w, aio
1085             # enable only slat
1086             "test_id": 7,
1087             "runtime": 5,
1088             "output-format": "json+",
1089             "slat_percentiles": 1,
1090             "clat_percentiles": 0,
1091             "lat_percentiles": 0,
1092             "ioengine": aio,
1093             'rw': 'randrw',
1094             "test_obj": Test007,
1095         },
1096         {
1097             # 50/50 r/w, aio, unified_rw_reporting
1098             # enable slat, clat, lat
1099             "test_id": 8,
1100             "runtime": 5,
1101             "output-format": "json+",
1102             "slat_percentiles": 1,
1103             "clat_percentiles": 1,
1104             "lat_percentiles": 1,
1105             "ioengine": aio,
1106             'rw': 'randrw',
1107             'unified_rw_reporting': 1,
1108             "test_obj": Test008,
1109         },
1110         {
1111             # randwrite, null
1112             # enable slat, clat, lat
1113             # fsync
1114             "test_id": 9,
1115             "runtime": 2,
1116             "output-format": "json+",
1117             "slat_percentiles": 1,
1118             "clat_percentiles": 1,
1119             "lat_percentiles": 1,
1120             "ioengine": 'null',
1121             'rw': 'randwrite',
1122             'fsync': 32,
1123             "test_obj": Test009,
1124         },
1125         {
1126             # 50/50 r/w, aio
1127             # enable slat, clat, lat
1128             "test_id": 10,
1129             "runtime": 5,
1130             "output-format": "terse,json+",
1131             "slat_percentiles": 1,
1132             "clat_percentiles": 1,
1133             "lat_percentiles": 1,
1134             "ioengine": aio,
1135             'rw': 'randrw',
1136             "test_obj": Test010,
1137         },
1138         {
1139             # 50/50 r/w, aio
1140             # enable only lat
1141             "test_id": 11,
1142             "runtime": 5,
1143             "output-format": "terse,json+",
1144             "slat_percentiles": 0,
1145             "clat_percentiles": 0,
1146             "lat_percentiles": 1,
1147             "ioengine": aio,
1148             'rw': 'randrw',
1149             "test_obj": Test011,
1150         },
1151         {
1152             # randread, null
1153             # enable slat, clat, lat
1154             # only clat and lat will appear because
1155             # because the null ioengine is syncrhonous
1156             # same as Test 1 except
1157             # numjobs = 4 to test sum_thread_stats() changes
1158             "test_id": 12,
1159             "runtime": 2,
1160             "output-format": "json",
1161             "slat_percentiles": 1,
1162             "clat_percentiles": 1,
1163             "lat_percentiles": 1,
1164             "ioengine": 'null',
1165             'rw': 'randread',
1166             'numjobs': 4,
1167             "test_obj": Test001,
1168         },
1169         {
1170             # randread, aio
1171             # enable slat, clat, lat
1172             # all will appear because liaio is asynchronous
1173             # same as Test 4 except
1174             # numjobs = 4 to test sum_thread_stats() changes
1175             "test_id": 13,
1176             "runtime": 5,
1177             "output-format": "json+",
1178             "slat_percentiles": 1,
1179             "clat_percentiles": 1,
1180             "lat_percentiles": 1,
1181             "ioengine": aio,
1182             'rw': 'randread',
1183             'numjobs': 4,
1184             "test_obj": Test004,
1185         },
1186         {
1187             # 50/50 r/w, aio, unified_rw_reporting
1188             # enable slat, clat, lata
1189             # same as Test 8 except
1190             # numjobs = 4 to test sum_thread_stats() changes
1191             "test_id": 14,
1192             "runtime": 5,
1193             "output-format": "json+",
1194             "slat_percentiles": 1,
1195             "clat_percentiles": 1,
1196             "lat_percentiles": 1,
1197             "ioengine": aio,
1198             'rw': 'randrw',
1199             'unified_rw_reporting': 1,
1200             'numjobs': 4,
1201             "test_obj": Test008,
1202         },
1203         {
1204             # randread, aio
1205             # enable slat, clat, lat
1206             # all will appear because liaio is asynchronous
1207             # same as Test 4 except add cmdprio_percentage
1208             "test_id": 15,
1209             "runtime": 5,
1210             "output-format": "json+",
1211             "slat_percentiles": 1,
1212             "clat_percentiles": 1,
1213             "lat_percentiles": 1,
1214             "ioengine": aio,
1215             'rw': 'randread',
1216             'cmdprio_percentage': 50,
1217             "test_obj": Test015,
1218         },
1219         {
1220             # randwrite, aio
1221             # enable only clat, lat
1222             # same as Test 5 except add cmdprio_percentage
1223             "test_id": 16,
1224             "runtime": 5,
1225             "output-format": "json+",
1226             "slat_percentiles": 0,
1227             "clat_percentiles": 1,
1228             "lat_percentiles": 1,
1229             "ioengine": aio,
1230             'rw': 'randwrite',
1231             'cmdprio_percentage': 50,
1232             "test_obj": Test016,
1233         },
1234         {
1235             # randread, aio
1236             # by default only clat should appear
1237             # same as Test 6 except add cmdprio_percentage
1238             "test_id": 17,
1239             "runtime": 5,
1240             "output-format": "json+",
1241             "ioengine": aio,
1242             'rw': 'randread',
1243             'cmdprio_percentage': 50,
1244             "test_obj": Test017,
1245         },
1246         {
1247             # 50/50 r/w, aio
1248             # enable only slat
1249             # same as Test 7 except add cmdprio_percentage
1250             "test_id": 18,
1251             "runtime": 5,
1252             "output-format": "json+",
1253             "slat_percentiles": 1,
1254             "clat_percentiles": 0,
1255             "lat_percentiles": 0,
1256             "ioengine": aio,
1257             'rw': 'randrw',
1258             'cmdprio_percentage': 50,
1259             "test_obj": Test018,
1260         },
1261         {
1262             # 50/50 r/w, aio, unified_rw_reporting
1263             # enable slat, clat, lat
1264             # same as Test 8 except add cmdprio_percentage
1265             "test_id": 19,
1266             "runtime": 5,
1267             "output-format": "json+",
1268             "slat_percentiles": 1,
1269             "clat_percentiles": 1,
1270             "lat_percentiles": 1,
1271             "ioengine": aio,
1272             'rw': 'randrw',
1273             'unified_rw_reporting': 1,
1274             'cmdprio_percentage': 50,
1275             "test_obj": Test019,
1276         },
1277         {
1278             # 50/50 r/w, aio, unified_rw_reporting
1279             # enable slat, clat, lat
1280             # same as Test 19 except
1281             # add numjobs = 4 to test sum_thread_stats() changes
1282             "test_id": 20,
1283             "runtime": 5,
1284             "output-format": "json+",
1285             "slat_percentiles": 1,
1286             "clat_percentiles": 1,
1287             "lat_percentiles": 1,
1288             "ioengine": aio,
1289             'rw': 'randrw',
1290             'unified_rw_reporting': 1,
1291             'cmdprio_percentage': 50,
1292             'numjobs': 4,
1293             "test_obj": Test019,
1294         },
1295     ]
1296
1297     passed = 0
1298     failed = 0
1299     skipped = 0
1300
1301     for test in test_list:
1302         if (args.skip and test['test_id'] in args.skip) or \
1303            (args.run_only and test['test_id'] not in args.run_only):
1304             skipped = skipped + 1
1305             outcome = 'SKIPPED (User request)'
1306         elif platform.system() != 'Linux' and 'cmdprio_percentage' in test:
1307             skipped = skipped + 1
1308             outcome = 'SKIPPED (Linux required for cmdprio_percentage tests)'
1309         else:
1310             test_obj = test['test_obj'](artifact_root, test, args.debug)
1311             status = test_obj.run_fio(fio)
1312             if status:
1313                 status = test_obj.check()
1314             if status:
1315                 passed = passed + 1
1316                 outcome = 'PASSED'
1317             else:
1318                 failed = failed + 1
1319                 outcome = 'FAILED'
1320
1321         print("**********Test {0} {1}**********".format(test['test_id'], outcome))
1322
1323     print("{0} tests passed, {1} failed, {2} skipped".format(passed, failed, skipped))
1324
1325     sys.exit(failed)
1326
1327
1328 if __name__ == '__main__':
1329     main()