t/latency_percentiles.py

   1 #!/usr/bin/env python3
   2 # SPDX-License-Identifier: GPL-2.0-only
   3 #
   4 # Copyright (c) 2020 Western Digital Corporation or its affiliates.
   5 #
   6 """
   7 # latency_percentiles.py
   8 #
   9 # Test the code that produces latency percentiles
  10 # This is mostly to test the code changes to allow reporting
  11 # of slat, clat, and lat percentiles
  12 #
  13 # USAGE
  14 # python3 latency-tests.py [-f fio-path] [-a artifact-root] [--debug]
  15 #
  16 #
  17 # Test scenarios:
  18 #
  19 # - DONE json
  20 #   unified rw reporting
  21 #   compare with latency log
  22 #   try various combinations of the ?lat_percentile options
  23 #   null, aio
  24 #   r, w, t
  25 # - DONE json+
  26 #   check presence of latency bins
  27 #   if the json percentiles match those from the raw data
  28 #   then the latency bin values and counts are probably ok
  29 # - DONE terse
  30 #   produce both terse, JSON output and confirm that they match
  31 #   lat only; both lat and clat
  32 # - DONE sync_lat
  33 #   confirm that sync_lat data appears
  34 # - MANUAL TESTING normal output:
  35 #       null ioengine
  36 #           enable all, but only clat and lat appear
  37 #           enable subset of latency types
  38 #           read, write, trim, unified
  39 #       libaio ioengine
  40 #           enable all latency types
  41 #           enable subset of latency types
  42 #           read, write, trim, unified
  43 # ./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
  44 #       --ioengine=null --slat_percentiles=1 --clat_percentiles=1 --lat_percentiles=1
  45 # echo confirm that clat and lat percentiles appear
  46 # ./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
  47 #       --ioengine=null --slat_percentiles=0 --clat_percentiles=0 --lat_percentiles=1
  48 # echo confirm that only lat percentiles appear
  49 # ./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
  50 #       --ioengine=null --slat_percentiles=0 --clat_percentiles=1 --lat_percentiles=0
  51 # echo confirm that only clat percentiles appear
  52 # ./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
  53 #       --ioengine=libaio --slat_percentiles=1 --clat_percentiles=1 --lat_percentiles=1
  54 # echo confirm that slat, clat, lat percentiles appear
  55 # ./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
  56 #       --ioengine=libaio --slat_percentiles=0 --clat_percentiles=1 --lat_percentiles=1
  57 # echo confirm that clat and lat percentiles appear
  58 # ./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
  59 #       --ioengine=libaio -rw=randrw
  60 # echo confirm that clat percentiles appear for reads and writes
  61 # ./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
  62 #       --ioengine=libaio --slat_percentiles=1 --clat_percentiles=0 --lat_percentiles=0 --rw=randrw
  63 # echo confirm that slat percentiles appear for both reads and writes
  64 # ./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
  65 #       --ioengine=libaio --slat_percentiles=1 --clat_percentiles=1 --lat_percentiles=1 \
  66 #       --rw=randrw --unified_rw_reporting=1
  67 # echo confirm that slat, clat, and lat percentiles appear for 'mixed' IOs
  68 #./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
  69 #       --ioengine=null --slat_percentiles=1 --clat_percentiles=1 --lat_percentiles=1 \
  70 #       --rw=randrw --fsync=32
  71 # echo confirm that fsync latencies appear
  72 """
  73
  74 import os
  75 import csv
  76 import sys
  77 import json
  78 import math
  79 import time
  80 import argparse
  81 import platform
  82 import subprocess
  83 from pathlib import Path
  84
  85
  86 class FioLatTest():
  87     """fio latency percentile test."""
  88
  89     def __init__(self, artifact_root, test_options, debug):
  90         """
  91         artifact_root   root directory for artifacts (subdirectory will be created under here)
  92         test            test specification
  93         """
  94         self.artifact_root = artifact_root
  95         self.test_options = test_options
  96         self.debug = debug
  97         self.filename = None
  98         self.json_data = None
  99         self.terse_data = None
 100
 101         self.test_dir = os.path.join(self.artifact_root,
 102                                      "{:03d}".format(self.test_options['test_id']))
 103         if not os.path.exists(self.test_dir):
 104             os.mkdir(self.test_dir)
 105
 106         self.filename = "latency{:03d}".format(self.test_options['test_id'])
 107
 108     def run_fio(self, fio_path):
 109         """Run a test."""
 110
 111         fio_args = [
 112             "--name=latency",
 113             "--randrepeat=0",
 114             "--norandommap",
 115             "--time_based",
 116             "--size=16M",
 117             "--rwmixread=50",
 118             "--group_reporting=1",
 119             "--write_lat_log={0}".format(self.filename),
 120             "--output={0}.out".format(self.filename),
 121             "--ioengine={ioengine}".format(**self.test_options),
 122             "--rw={rw}".format(**self.test_options),
 123             "--runtime={runtime}".format(**self.test_options),
 124             "--output-format={output-format}".format(**self.test_options),
 125         ]
 126         for opt in ['slat_percentiles', 'clat_percentiles', 'lat_percentiles',
 127                     'unified_rw_reporting', 'fsync', 'fdatasync', 'numjobs', 'cmdprio_percentage']:
 128             if opt in self.test_options:
 129                 option = '--{0}={{{0}}}'.format(opt)
 130                 fio_args.append(option.format(**self.test_options))
 131
 132         command = [fio_path] + fio_args
 133         with open(os.path.join(self.test_dir, "{0}.command".format(self.filename)), "w+") as \
 134                 command_file:
 135             command_file.write("%s\n" % command)
 136
 137         passed = True
 138         stdout_file = open(os.path.join(self.test_dir, "{0}.stdout".format(self.filename)), "w+")
 139         stderr_file = open(os.path.join(self.test_dir, "{0}.stderr".format(self.filename)), "w+")
 140         exitcode_file = open(os.path.join(self.test_dir,
 141                                           "{0}.exitcode".format(self.filename)), "w+")
 142         try:
 143             proc = None
 144             # Avoid using subprocess.run() here because when a timeout occurs,
 145             # fio will be stopped with SIGKILL. This does not give fio a
 146             # chance to clean up and means that child processes may continue
 147             # running and submitting IO.
 148             proc = subprocess.Popen(command,
 149                                     stdout=stdout_file,
 150                                     stderr=stderr_file,
 151                                     cwd=self.test_dir,
 152                                     universal_newlines=True)
 153             proc.communicate(timeout=300)
 154             exitcode_file.write('{0}\n'.format(proc.returncode))
 155             passed &= (proc.returncode == 0)
 156         except subprocess.TimeoutExpired:
 157             proc.terminate()
 158             proc.communicate()
 159             assert proc.poll()
 160             print("Timeout expired")
 161             passed = False
 162         except Exception:
 163             if proc:
 164                 if not proc.poll():
 165                     proc.terminate()
 166                     proc.communicate()
 167             print("Exception: %s" % sys.exc_info())
 168             passed = False
 169         finally:
 170             stdout_file.close()
 171             stderr_file.close()
 172             exitcode_file.close()
 173
 174         if passed:
 175             if 'json' in self.test_options['output-format']:
 176                 if not self.get_json():
 177                     print('Unable to decode JSON data')
 178                     passed = False
 179             if 'terse' in self.test_options['output-format']:
 180                 if not self.get_terse():
 181                     print('Unable to decode terse data')
 182                     passed = False
 183
 184         return passed
 185
 186     def get_json(self):
 187         """Convert fio JSON output into a python JSON object"""
 188
 189         filename = os.path.join(self.test_dir, "{0}.out".format(self.filename))
 190         with open(filename, 'r') as file:
 191             file_data = file.read()
 192
 193         #
 194         # Sometimes fio informational messages are included at the top of the
 195         # JSON output, especially under Windows. Try to decode output as JSON
 196         # data, lopping off up to the first four lines
 197         #
 198         lines = file_data.splitlines()
 199         for i in range(5):
 200             file_data = '\n'.join(lines[i:])
 201             try:
 202                 self.json_data = json.loads(file_data)
 203             except json.JSONDecodeError:
 204                 continue
 205             else:
 206                 return True
 207
 208         return False
 209
 210     def get_terse(self):
 211         """Read fio output and return terse format data."""
 212
 213         filename = os.path.join(self.test_dir, "{0}.out".format(self.filename))
 214         with open(filename, 'r') as file:
 215             file_data = file.read()
 216
 217         #
 218         # Read the first few lines and see if any of them begin with '3;fio-'
 219         # If so, the line is probably terse output. Obviously, this only
 220         # works for fio terse version 3 and it does not work for
 221         # multi-line terse output
 222         #
 223         lines = file_data.splitlines()
 224         for i in range(8):
 225             file_data = lines[i]
 226             if file_data.startswith('3;fio-'):
 227                 self.terse_data = file_data.split(';')
 228                 return True
 229
 230         return False
 231
 232     def check_latencies(self, jsondata, ddir, slat=True, clat=True, tlat=True, plus=False,
 233                         unified=False):
 234         """Check fio latency data.
 235
 236         ddir                data direction to check (0=read, 1=write, 2=trim)
 237         slat                True if submission latency data available to check
 238         clat                True if completion latency data available to check
 239         tlat                True of total latency data available to check
 240         plus                True if we actually have json+ format data where additional checks can
 241                             be carried out
 242         unified             True if fio is reporting unified r/w data
 243         """
 244
 245         types = {
 246             'slat': slat,
 247             'clat': clat,
 248             'lat': tlat
 249         }
 250
 251         retval = True
 252
 253         for lat in ['slat', 'clat', 'lat']:
 254             this_iter = True
 255             if not types[lat]:
 256                 if 'percentile' in jsondata[lat+'_ns']:
 257                     this_iter = False
 258                     print('unexpected %s percentiles found' % lat)
 259                 else:
 260                     print("%s percentiles skipped" % lat)
 261                 continue
 262             else:
 263                 if 'percentile' not in jsondata[lat+'_ns']:
 264                     this_iter = False
 265                     print('%s percentiles not found in fio output' % lat)
 266
 267             #
 268             # Check only for the presence/absence of json+
 269             # latency bins. Future work can check the
 270             # accurracy of the bin values and counts.
 271             #
 272             # Because the latency percentiles are based on
 273             # the bins, we can be confident that the bin
 274             # values and counts are correct if fio's
 275             # latency percentiles match what we compute
 276             # from the raw data.
 277             #
 278             if plus:
 279                 if 'bins' not in jsondata[lat+'_ns']:
 280                     print('bins not found with json+ output format')
 281                     this_iter = False
 282                 else:
 283                     if not self.check_jsonplus(jsondata[lat+'_ns']):
 284                         this_iter = False
 285             else:
 286                 if 'bins' in jsondata[lat+'_ns']:
 287                     print('json+ bins found with json output format')
 288                     this_iter = False
 289
 290             latencies = []
 291             for i in range(10):
 292                 lat_file = os.path.join(self.test_dir, "%s_%s.%s.log" % (self.filename, lat, i+1))
 293                 if not os.path.exists(lat_file):
 294                     break
 295                 with open(lat_file, 'r', newline='') as file:
 296                     reader = csv.reader(file)
 297                     for line in reader:
 298                         if unified or int(line[2]) == ddir:
 299                             latencies.append(int(line[1]))
 300
 301             if int(jsondata['total_ios']) != len(latencies):
 302                 this_iter = False
 303                 print('%s: total_ios = %s, latencies logged = %d' % \
 304                         (lat, jsondata['total_ios'], len(latencies)))
 305             elif self.debug:
 306                 print("total_ios %s match latencies logged" % jsondata['total_ios'])
 307
 308             latencies.sort()
 309             ptiles = jsondata[lat+'_ns']['percentile']
 310
 311             for percentile in ptiles.keys():
 312                 #
 313                 # numpy.percentile(latencies, float(percentile),
 314                 #       interpolation='higher')
 315                 # produces values that mostly match what fio reports
 316                 # however, in the tails of the distribution, the values produced
 317                 # by fio's and numpy.percentile's algorithms are occasionally off
 318                 # by one latency measurement. So instead of relying on the canned
 319                 # numpy.percentile routine, implement here fio's algorithm
 320                 #
 321                 rank = math.ceil(float(percentile)/100 * len(latencies))
 322                 if rank > 0:
 323                     index = rank - 1
 324                 else:
 325                     index = 0
 326                 value = latencies[int(index)]
 327                 fio_val = int(ptiles[percentile])
 328                 # The theory in stat.h says that the proportional error will be
 329                 # less than 1/128
 330                 if not self.similar(fio_val, value):
 331                     delta = abs(fio_val - value) / value
 332                     print("Error with %s %sth percentile: "
 333                           "fio: %d, expected: %d, proportional delta: %f" %
 334                           (lat, percentile, fio_val, value, delta))
 335                     print("Rank: %d, index: %d" % (rank, index))
 336                     this_iter = False
 337                 elif self.debug:
 338                     print('%s %sth percentile values match: %d, %d' %
 339                           (lat, percentile, fio_val, value))
 340
 341             if this_iter:
 342                 print("%s percentiles match" % lat)
 343             else:
 344                 retval = False
 345
 346         return retval
 347
 348     @staticmethod
 349     def check_empty(job):
 350         """
 351         Make sure JSON data is empty.
 352
 353         Some data structures should be empty. This function makes sure that they are.
 354
 355         job         JSON object that we need to check for emptiness
 356         """
 357
 358         return job['total_ios'] == 0 and \
 359                 job['slat_ns']['N'] == 0 and \
 360                 job['clat_ns']['N'] == 0 and \
 361                 job['lat_ns']['N'] == 0
 362
 363     def check_nocmdprio_lat(self, job):
 364         """
 365         Make sure no high/low priority latencies appear.
 366
 367         job         JSON object to check
 368         """
 369
 370         for ddir in ['read', 'write', 'trim']:
 371             if ddir in job:
 372                 if 'lat_high_prio' in job[ddir] or 'lat_low_prio' in job[ddir] or \
 373                     'clat_high_prio' in job[ddir] or 'clat_low_prio' in job[ddir]:
 374                     print("Unexpected high/low priority latencies found in %s output" % ddir)
 375                     return False
 376
 377         if self.debug:
 378             print("No high/low priority latencies found")
 379
 380         return True
 381
 382     @staticmethod
 383     def similar(approximation, actual):
 384         """
 385         Check whether the approximate values recorded by fio are within the theoretical bound.
 386
 387         Since it is impractical to store exact latency measurements for each and every IO, fio
 388         groups similar latency measurements into variable-sized bins. The theory in stat.h says
 389         that the proportional error will be less than 1/128. This function checks whether this
 390         is true.
 391
 392         TODO This test will fail when comparing a value from the largest latency bin against its
 393         actual measurement. Find some way to detect this and avoid failing.
 394
 395         approximation   value of the bin used by fio to store a given latency
 396         actual          actual latency value
 397         """
 398         delta = abs(approximation - actual) / actual
 399         return delta <= 1/128
 400
 401     def check_jsonplus(self, jsondata):
 402         """Check consistency of json+ data
 403
 404         When we have json+ data we can check the min value, max value, and
 405         sample size reported by fio
 406
 407         jsondata            json+ data that we need to check
 408         """
 409
 410         retval = True
 411
 412         keys = [int(k) for k in jsondata['bins'].keys()]
 413         values = [int(jsondata['bins'][k]) for k in jsondata['bins'].keys()]
 414         smallest = min(keys)
 415         biggest = max(keys)
 416         sampsize = sum(values)
 417
 418         if not self.similar(jsondata['min'], smallest):
 419             retval = False
 420             print('reported min %d does not match json+ min %d' % (jsondata['min'], smallest))
 421         elif self.debug:
 422             print('json+ min values match: %d' % jsondata['min'])
 423
 424         if not self.similar(jsondata['max'], biggest):
 425             retval = False
 426             print('reported max %d does not match json+ max %d' % (jsondata['max'], biggest))
 427         elif self.debug:
 428             print('json+ max values match: %d' % jsondata['max'])
 429
 430         if sampsize != jsondata['N']:
 431             retval = False
 432             print('reported sample size %d does not match json+ total count %d' % \
 433                     (jsondata['N'], sampsize))
 434         elif self.debug:
 435             print('json+ sample sizes match: %d' % sampsize)
 436
 437         return retval
 438
 439     def check_sync_lat(self, jsondata, plus=False):
 440         """Check fsync latency percentile data.
 441
 442         All we can check is that some percentiles are reported, unless we have json+ data.
 443         If we actually have json+ data then we can do more checking.
 444
 445         jsondata        JSON data for fsync operations
 446         plus            True if we actually have json+ data
 447         """
 448         retval = True
 449
 450         if 'percentile' not in jsondata['lat_ns']:
 451             print("Sync percentile data not found")
 452             return False
 453
 454         if int(jsondata['total_ios']) != int(jsondata['lat_ns']['N']):
 455             retval = False
 456             print('Mismatch between total_ios and lat_ns sample size')
 457         elif self.debug:
 458             print('sync sample sizes match: %d' % jsondata['total_ios'])
 459
 460         if not plus:
 461             if 'bins' in jsondata['lat_ns']:
 462                 print('Unexpected json+ bin data found')
 463                 return False
 464
 465         if not self.check_jsonplus(jsondata['lat_ns']):
 466             retval = False
 467
 468         return retval
 469
 470     def check_terse(self, terse, jsondata):
 471         """Compare terse latencies with JSON latencies.
 472
 473         terse           terse format data for checking
 474         jsondata        JSON format data for checking
 475         """
 476
 477         retval = True
 478
 479         for lat in terse:
 480             split = lat.split('%')
 481             pct = split[0]
 482             terse_val = int(split[1][1:])
 483             json_val = math.floor(jsondata[pct]/1000)
 484             if terse_val != json_val:
 485                 retval = False
 486                 print('Mismatch with %sth percentile: json value=%d,%d terse value=%d' % \
 487                         (pct, jsondata[pct], json_val, terse_val))
 488             elif self.debug:
 489                 print('Terse %sth percentile matches JSON value: %d' % (pct, terse_val))
 490
 491         return retval
 492
 493     def check_prio_latencies(self, jsondata, clat=True, plus=False):
 494         """Check consistency of high/low priority latencies.
 495
 496         clat                True if we should check clat data; other check lat data
 497         plus                True if we have json+ format data where additional checks can
 498                             be carried out
 499         unified             True if fio is reporting unified r/w data
 500         """
 501
 502         if clat:
 503             high = 'clat_high_prio'
 504             low = 'clat_low_prio'
 505             combined = 'clat_ns'
 506         else:
 507             high = 'lat_high_prio'
 508             low = 'lat_low_prio'
 509             combined = 'lat_ns'
 510
 511         if not high in jsondata or not low in jsondata or not combined in jsondata:
 512             print("Error identifying high/low priority latencies")
 513             return False
 514
 515         if jsondata[high]['N'] + jsondata[low]['N'] != jsondata[combined]['N']:
 516             print("High %d + low %d != combined sample size %d" % \
 517                     (jsondata[high]['N'], jsondata[low]['N'], jsondata[combined]['N']))
 518             return False
 519         elif self.debug:
 520             print("High %d + low %d == combined sample size %d" % \
 521                     (jsondata[high]['N'], jsondata[low]['N'], jsondata[combined]['N']))
 522
 523         if min(jsondata[high]['min'], jsondata[low]['min']) != jsondata[combined]['min']:
 524             print("Min of high %d, low %d min latencies does not match min %d from combined data" % \
 525                     (jsondata[high]['min'], jsondata[low]['min'], jsondata[combined]['min']))
 526             return False
 527         elif self.debug:
 528             print("Min of high %d, low %d min latencies matches min %d from combined data" % \
 529                     (jsondata[high]['min'], jsondata[low]['min'], jsondata[combined]['min']))
 530
 531         if max(jsondata[high]['max'], jsondata[low]['max']) != jsondata[combined]['max']:
 532             print("Max of high %d, low %d max latencies does not match max %d from combined data" % \
 533                     (jsondata[high]['max'], jsondata[low]['max'], jsondata[combined]['max']))
 534             return False
 535         elif self.debug:
 536             print("Max of high %d, low %d max latencies matches max %d from combined data" % \
 537                     (jsondata[high]['max'], jsondata[low]['max'], jsondata[combined]['max']))
 538
 539         weighted_avg = (jsondata[high]['mean'] * jsondata[high]['N'] + \
 540                         jsondata[low]['mean'] * jsondata[low]['N']) / jsondata[combined]['N']
 541         delta = abs(weighted_avg - jsondata[combined]['mean'])
 542         if (delta / jsondata[combined]['mean']) > 0.0001:
 543             print("Difference between weighted average %f of high, low means "
 544                   "and actual mean %f exceeds 0.01%%" % (weighted_avg, jsondata[combined]['mean']))
 545             return False
 546         elif self.debug:
 547             print("Weighted average %f of high, low means matches actual mean %f" % \
 548                     (weighted_avg, jsondata[combined]['mean']))
 549
 550         if plus:
 551             if not self.check_jsonplus(jsondata[high]):
 552                 return False
 553             if not self.check_jsonplus(jsondata[low]):
 554                 return False
 555
 556             bins = {**jsondata[high]['bins'], **jsondata[low]['bins']}
 557             for duration in bins.keys():
 558                 if duration in jsondata[high]['bins'] and duration in jsondata[low]['bins']:
 559                     bins[duration] = jsondata[high]['bins'][duration] + \
 560                             jsondata[low]['bins'][duration]
 561
 562             if len(bins) != len(jsondata[combined]['bins']):
 563                 print("Number of combined high/low bins does not match number of overall bins")
 564                 return False
 565             elif self.debug:
 566                 print("Number of bins from merged high/low data matches number of overall bins")
 567
 568             for duration in bins.keys():
 569                 if bins[duration] != jsondata[combined]['bins'][duration]:
 570                     print("Merged high/low count does not match overall count for duration %d" \
 571                             % duration)
 572                     return False
 573
 574         print("Merged high/low priority latency data match combined latency data")
 575         return True
 576
 577     def check(self):
 578         """Check test output."""
 579
 580         raise NotImplementedError()
 581
 582
 583 class Test001(FioLatTest):
 584     """Test object for Test 1."""
 585
 586     def check(self):
 587         """Check Test 1 output."""
 588
 589         job = self.json_data['jobs'][0]
 590
 591         retval = True
 592         if not self.check_empty(job['write']):
 593             print("Unexpected write data found in output")
 594             retval = False
 595         if not self.check_empty(job['trim']):
 596             print("Unexpected trim data found in output")
 597             retval = False
 598         if not self.check_nocmdprio_lat(job):
 599             print("Unexpected high/low priority latencies found")
 600             retval = False
 601
 602         retval &= self.check_latencies(job['read'], 0, slat=False)
 603
 604         return retval
 605
 606
 607 class Test002(FioLatTest):
 608     """Test object for Test 2."""
 609
 610     def check(self):
 611         """Check Test 2 output."""
 612
 613         job = self.json_data['jobs'][0]
 614
 615         retval = True
 616         if not self.check_empty(job['read']):
 617             print("Unexpected read data found in output")
 618             retval = False
 619         if not self.check_empty(job['trim']):
 620             print("Unexpected trim data found in output")
 621             retval = False
 622         if not self.check_nocmdprio_lat(job):
 623             print("Unexpected high/low priority latencies found")
 624             retval = False
 625
 626         retval &= self.check_latencies(job['write'], 1, slat=False, clat=False)
 627
 628         return retval
 629
 630
 631 class Test003(FioLatTest):
 632     """Test object for Test 3."""
 633
 634     def check(self):
 635         """Check Test 3 output."""
 636
 637         job = self.json_data['jobs'][0]
 638
 639         retval = True
 640         if not self.check_empty(job['read']):
 641             print("Unexpected read data found in output")
 642             retval = False
 643         if not self.check_empty(job['write']):
 644             print("Unexpected write data found in output")
 645             retval = False
 646         if not self.check_nocmdprio_lat(job):
 647             print("Unexpected high/low priority latencies found")
 648             retval = False
 649
 650         retval &= self.check_latencies(job['trim'], 2, slat=False, tlat=False)
 651
 652         return retval
 653
 654
 655 class Test004(FioLatTest):
 656     """Test object for Tests 4, 13."""
 657
 658     def check(self):
 659         """Check Test 4, 13 output."""
 660
 661         job = self.json_data['jobs'][0]
 662
 663         retval = True
 664         if not self.check_empty(job['write']):
 665             print("Unexpected write data found in output")
 666             retval = False
 667         if not self.check_empty(job['trim']):
 668             print("Unexpected trim data found in output")
 669             retval = False
 670         if not self.check_nocmdprio_lat(job):
 671             print("Unexpected high/low priority latencies found")
 672             retval = False
 673
 674         retval &= self.check_latencies(job['read'], 0, plus=True)
 675
 676         return retval
 677
 678
 679 class Test005(FioLatTest):
 680     """Test object for Test 5."""
 681
 682     def check(self):
 683         """Check Test 5 output."""
 684
 685         job = self.json_data['jobs'][0]
 686
 687         retval = True
 688         if not self.check_empty(job['read']):
 689             print("Unexpected read data found in output")
 690             retval = False
 691         if not self.check_empty(job['trim']):
 692             print("Unexpected trim data found in output")
 693             retval = False
 694         if not self.check_nocmdprio_lat(job):
 695             print("Unexpected high/low priority latencies found")
 696             retval = False
 697
 698         retval &= self.check_latencies(job['write'], 1, slat=False, plus=True)
 699
 700         return retval
 701
 702
 703 class Test006(FioLatTest):
 704     """Test object for Test 6."""
 705
 706     def check(self):
 707         """Check Test 6 output."""
 708
 709         job = self.json_data['jobs'][0]
 710
 711         retval = True
 712         if not self.check_empty(job['write']):
 713             print("Unexpected write data found in output")
 714             retval = False
 715         if not self.check_empty(job['trim']):
 716             print("Unexpected trim data found in output")
 717             retval = False
 718         if not self.check_nocmdprio_lat(job):
 719             print("Unexpected high/low priority latencies found")
 720             retval = False
 721
 722         retval &= self.check_latencies(job['read'], 0, slat=False, tlat=False, plus=True)
 723
 724         return retval
 725
 726
 727 class Test007(FioLatTest):
 728     """Test object for Test 7."""
 729
 730     def check(self):
 731         """Check Test 7 output."""
 732
 733         job = self.json_data['jobs'][0]
 734
 735         retval = True
 736         if not self.check_empty(job['trim']):
 737             print("Unexpected trim data found in output")
 738             retval = False
 739         if not self.check_nocmdprio_lat(job):
 740             print("Unexpected high/low priority latencies found")
 741             retval = False
 742
 743         retval &= self.check_latencies(job['read'], 0, clat=False, tlat=False, plus=True)
 744         retval &= self.check_latencies(job['write'], 1, clat=False, tlat=False, plus=True)
 745
 746         return retval
 747
 748
 749 class Test008(FioLatTest):
 750     """Test object for Tests 8, 14."""
 751
 752     def check(self):
 753         """Check Test 8, 14 output."""
 754
 755         job = self.json_data['jobs'][0]
 756
 757         retval = True
 758         if 'read' in job or 'write'in job or 'trim' in job:
 759             print("Unexpected data direction found in fio output")
 760             retval = False
 761         if not self.check_nocmdprio_lat(job):
 762             print("Unexpected high/low priority latencies found")
 763             retval = False
 764
 765         retval &= self.check_latencies(job['mixed'], 0, plus=True, unified=True)
 766
 767         return retval
 768
 769
 770 class Test009(FioLatTest):
 771     """Test object for Test 9."""
 772
 773     def check(self):
 774         """Check Test 9 output."""
 775
 776         job = self.json_data['jobs'][0]
 777
 778         retval = True
 779         if not self.check_empty(job['read']):
 780             print("Unexpected read data found in output")
 781             retval = False
 782         if not self.check_empty(job['trim']):
 783             print("Unexpected trim data found in output")
 784             retval = False
 785         if not self.check_sync_lat(job['sync'], plus=True):
 786             print("Error checking fsync latency data")
 787             retval = False
 788         if not self.check_nocmdprio_lat(job):
 789             print("Unexpected high/low priority latencies found")
 790             retval = False
 791
 792         retval &= self.check_latencies(job['write'], 1, slat=False, plus=True)
 793
 794         return retval
 795
 796
 797 class Test010(FioLatTest):
 798     """Test object for Test 10."""
 799
 800     def check(self):
 801         """Check Test 10 output."""
 802
 803         job = self.json_data['jobs'][0]
 804
 805         retval = True
 806         if not self.check_empty(job['trim']):
 807             print("Unexpected trim data found in output")
 808             retval = False
 809         if not self.check_nocmdprio_lat(job):
 810             print("Unexpected high/low priority latencies found")
 811             retval = False
 812
 813         retval &= self.check_latencies(job['read'], 0, plus=True)
 814         retval &= self.check_latencies(job['write'], 1, plus=True)
 815         retval &= self.check_terse(self.terse_data[17:34], job['read']['lat_ns']['percentile'])
 816         retval &= self.check_terse(self.terse_data[58:75], job['write']['lat_ns']['percentile'])
 817         # Terse data checking only works for default percentiles.
 818         # This needs to be changed if something other than the default is ever used.
 819
 820         return retval
 821
 822
 823 class Test011(FioLatTest):
 824     """Test object for Test 11."""
 825
 826     def check(self):
 827         """Check Test 11 output."""
 828
 829         job = self.json_data['jobs'][0]
 830
 831         retval = True
 832         if not self.check_empty(job['trim']):
 833             print("Unexpected trim data found in output")
 834             retval = False
 835         if not self.check_nocmdprio_lat(job):
 836             print("Unexpected high/low priority latencies found")
 837             retval = False
 838
 839         retval &= self.check_latencies(job['read'], 0, slat=False, clat=False, plus=True)
 840         retval &= self.check_latencies(job['write'], 1, slat=False, clat=False, plus=True)
 841         retval &= self.check_terse(self.terse_data[17:34], job['read']['lat_ns']['percentile'])
 842         retval &= self.check_terse(self.terse_data[58:75], job['write']['lat_ns']['percentile'])
 843         # Terse data checking only works for default percentiles.
 844         # This needs to be changed if something other than the default is ever used.
 845
 846         return retval
 847
 848
 849 class Test015(FioLatTest):
 850     """Test object for Test 15."""
 851
 852     def check(self):
 853         """Check Test 15 output."""
 854
 855         job = self.json_data['jobs'][0]
 856
 857         retval = True
 858         if not self.check_empty(job['write']):
 859             print("Unexpected write data found in output")
 860             retval = False
 861         if not self.check_empty(job['trim']):
 862             print("Unexpected trim data found in output")
 863             retval = False
 864
 865         retval &= self.check_latencies(job['read'], 0, plus=True)
 866         retval &= self.check_prio_latencies(job['read'], clat=False, plus=True)
 867
 868         return retval
 869
 870
 871 class Test016(FioLatTest):
 872     """Test object for Test 16."""
 873
 874     def check(self):
 875         """Check Test 16 output."""
 876
 877         job = self.json_data['jobs'][0]
 878
 879         retval = True
 880         if not self.check_empty(job['read']):
 881             print("Unexpected read data found in output")
 882             retval = False
 883         if not self.check_empty(job['trim']):
 884             print("Unexpected trim data found in output")
 885             retval = False
 886
 887         retval &= self.check_latencies(job['write'], 1, slat=False, plus=True)
 888         retval &= self.check_prio_latencies(job['write'], clat=False, plus=True)
 889
 890         return retval
 891
 892
 893 class Test017(FioLatTest):
 894     """Test object for Test 17."""
 895
 896     def check(self):
 897         """Check Test 17 output."""
 898
 899         job = self.json_data['jobs'][0]
 900
 901         retval = True
 902         if not self.check_empty(job['write']):
 903             print("Unexpected write data found in output")
 904             retval = False
 905         if not self.check_empty(job['trim']):
 906             print("Unexpected trim data found in output")
 907             retval = False
 908
 909         retval &= self.check_latencies(job['read'], 0, slat=False, tlat=False, plus=True)
 910         retval &= self.check_prio_latencies(job['read'], plus=True)
 911
 912         return retval
 913
 914
 915 class Test018(FioLatTest):
 916     """Test object for Test 18."""
 917
 918     def check(self):
 919         """Check Test 18 output."""
 920
 921         job = self.json_data['jobs'][0]
 922
 923         retval = True
 924         if not self.check_empty(job['trim']):
 925             print("Unexpected trim data found in output")
 926             retval = False
 927
 928         retval &= self.check_latencies(job['read'], 0, clat=False, tlat=False, plus=True)
 929         retval &= self.check_latencies(job['write'], 1, clat=False, tlat=False, plus=True)
 930
 931         # We actually have json+ data but setting plus=False below avoids checking the
 932         # json+ bins which did not exist for clat and lat because this job is run with
 933         # clat_percentiles=0, lat_percentiles=0, However, we can still check the summary
 934         # statistics
 935         retval &= self.check_prio_latencies(job['write'], plus=False)
 936         retval &= self.check_prio_latencies(job['read'], plus=False)
 937
 938         return retval
 939
 940
 941 class Test019(FioLatTest):
 942     """Test object for Tests 19, 20."""
 943
 944     def check(self):
 945         """Check Test 19, 20 output."""
 946
 947         job = self.json_data['jobs'][0]
 948
 949         retval = True
 950         if 'read' in job or 'write'in job or 'trim' in job:
 951             print("Unexpected data direction found in fio output")
 952             retval = False
 953
 954         retval &= self.check_latencies(job['mixed'], 0, plus=True, unified=True)
 955         retval &= self.check_prio_latencies(job['mixed'], clat=False, plus=True)
 956
 957         return retval
 958
 959
 960 def parse_args():
 961     """Parse command-line arguments."""
 962
 963     parser = argparse.ArgumentParser()
 964     parser.add_argument('-f', '--fio', help='path to file executable (e.g., ./fio)')
 965     parser.add_argument('-a', '--artifact-root', help='artifact root directory')
 966     parser.add_argument('-d', '--debug', help='enable debug output', action='store_true')
 967     parser.add_argument('-s', '--skip', nargs='+', type=int,
 968                         help='list of test(s) to skip')
 969     parser.add_argument('-o', '--run-only', nargs='+', type=int,
 970                         help='list of test(s) to run, skipping all others')
 971     args = parser.parse_args()
 972
 973     return args
 974
 975
 976 def main():
 977     """Run tests of fio latency percentile reporting"""
 978
 979     args = parse_args()
 980
 981     artifact_root = args.artifact_root if args.artifact_root else \
 982         "latency-test-{0}".format(time.strftime("%Y%m%d-%H%M%S"))
 983     os.mkdir(artifact_root)
 984     print("Artifact directory is %s" % artifact_root)
 985
 986     if args.fio:
 987         fio = str(Path(args.fio).absolute())
 988     else:
 989         fio = 'fio'
 990     print("fio path is %s" % fio)
 991
 992     if platform.system() == 'Linux':
 993         aio = 'libaio'
 994     elif platform.system() == 'Windows':
 995         aio = 'windowsaio'
 996     else:
 997         aio = 'posixaio'
 998
 999     test_list = [
1000         {
1001             # randread, null
1002             # enable slat, clat, lat
1003             # only clat and lat will appear because
1004             # because the null ioengine is syncrhonous
1005             "test_id": 1,
1006             "runtime": 2,
1007             "output-format": "json",
1008             "slat_percentiles": 1,
1009             "clat_percentiles": 1,
1010             "lat_percentiles": 1,
1011             "ioengine": 'null',
1012             'rw': 'randread',
1013             "test_obj": Test001,
1014         },
1015         {
1016             # randwrite, null
1017             # enable lat only
1018             "test_id": 2,
1019             "runtime": 2,
1020             "output-format": "json",
1021             "slat_percentiles": 0,
1022             "clat_percentiles": 0,
1023             "lat_percentiles": 1,
1024             "ioengine": 'null',
1025             'rw': 'randwrite',
1026             "test_obj": Test002,
1027         },
1028         {
1029             # randtrim, null
1030             # enable clat only
1031             "test_id": 3,
1032             "runtime": 2,
1033             "output-format": "json",
1034             "slat_percentiles": 0,
1035             "clat_percentiles": 1,
1036             "lat_percentiles": 0,
1037             "ioengine": 'null',
1038             'rw': 'randtrim',
1039             "test_obj": Test003,
1040         },
1041         {
1042             # randread, aio
1043             # enable slat, clat, lat
1044             # all will appear because liaio is asynchronous
1045             "test_id": 4,
1046             "runtime": 5,
1047             "output-format": "json+",
1048             "slat_percentiles": 1,
1049             "clat_percentiles": 1,
1050             "lat_percentiles": 1,
1051             "ioengine": aio,
1052             'rw': 'randread',
1053             "test_obj": Test004,
1054         },
1055         {
1056             # randwrite, aio
1057             # enable only clat, lat
1058             "test_id": 5,
1059             "runtime": 5,
1060             "output-format": "json+",
1061             "slat_percentiles": 0,
1062             "clat_percentiles": 1,
1063             "lat_percentiles": 1,
1064             "ioengine": aio,
1065             'rw': 'randwrite',
1066             "test_obj": Test005,
1067         },
1068         {
1069             # randread, aio
1070             # by default only clat should appear
1071             "test_id": 6,
1072             "runtime": 5,
1073             "output-format": "json+",
1074             "ioengine": aio,
1075             'rw': 'randread',
1076             "test_obj": Test006,
1077         },
1078         {
1079             # 50/50 r/w, aio
1080             # enable only slat
1081             "test_id": 7,
1082             "runtime": 5,
1083             "output-format": "json+",
1084             "slat_percentiles": 1,
1085             "clat_percentiles": 0,
1086             "lat_percentiles": 0,
1087             "ioengine": aio,
1088             'rw': 'randrw',
1089             "test_obj": Test007,
1090         },
1091         {
1092             # 50/50 r/w, aio, unified_rw_reporting
1093             # enable slat, clat, lat
1094             "test_id": 8,
1095             "runtime": 5,
1096             "output-format": "json+",
1097             "slat_percentiles": 1,
1098             "clat_percentiles": 1,
1099             "lat_percentiles": 1,
1100             "ioengine": aio,
1101             'rw': 'randrw',
1102             'unified_rw_reporting': 1,
1103             "test_obj": Test008,
1104         },
1105         {
1106             # randwrite, null
1107             # enable slat, clat, lat
1108             # fsync
1109             "test_id": 9,
1110             "runtime": 2,
1111             "output-format": "json+",
1112             "slat_percentiles": 1,
1113             "clat_percentiles": 1,
1114             "lat_percentiles": 1,
1115             "ioengine": 'null',
1116             'rw': 'randwrite',
1117             'fsync': 32,
1118             "test_obj": Test009,
1119         },
1120         {
1121             # 50/50 r/w, aio
1122             # enable slat, clat, lat
1123             "test_id": 10,
1124             "runtime": 5,
1125             "output-format": "terse,json+",
1126             "slat_percentiles": 1,
1127             "clat_percentiles": 1,
1128             "lat_percentiles": 1,
1129             "ioengine": aio,
1130             'rw': 'randrw',
1131             "test_obj": Test010,
1132         },
1133         {
1134             # 50/50 r/w, aio
1135             # enable only lat
1136             "test_id": 11,
1137             "runtime": 5,
1138             "output-format": "terse,json+",
1139             "slat_percentiles": 0,
1140             "clat_percentiles": 0,
1141             "lat_percentiles": 1,
1142             "ioengine": aio,
1143             'rw': 'randrw',
1144             "test_obj": Test011,
1145         },
1146         {
1147             # randread, null
1148             # enable slat, clat, lat
1149             # only clat and lat will appear because
1150             # because the null ioengine is syncrhonous
1151             # same as Test 1 except
1152             # numjobs = 4 to test sum_thread_stats() changes
1153             "test_id": 12,
1154             "runtime": 2,
1155             "output-format": "json",
1156             "slat_percentiles": 1,
1157             "clat_percentiles": 1,
1158             "lat_percentiles": 1,
1159             "ioengine": 'null',
1160             'rw': 'randread',
1161             'numjobs': 4,
1162             "test_obj": Test001,
1163         },
1164         {
1165             # randread, aio
1166             # enable slat, clat, lat
1167             # all will appear because liaio is asynchronous
1168             # same as Test 4 except
1169             # numjobs = 4 to test sum_thread_stats() changes
1170             "test_id": 13,
1171             "runtime": 5,
1172             "output-format": "json+",
1173             "slat_percentiles": 1,
1174             "clat_percentiles": 1,
1175             "lat_percentiles": 1,
1176             "ioengine": aio,
1177             'rw': 'randread',
1178             'numjobs': 4,
1179             "test_obj": Test004,
1180         },
1181         {
1182             # 50/50 r/w, aio, unified_rw_reporting
1183             # enable slat, clat, lata
1184             # same as Test 8 except
1185             # numjobs = 4 to test sum_thread_stats() changes
1186             "test_id": 14,
1187             "runtime": 5,
1188             "output-format": "json+",
1189             "slat_percentiles": 1,
1190             "clat_percentiles": 1,
1191             "lat_percentiles": 1,
1192             "ioengine": aio,
1193             'rw': 'randrw',
1194             'unified_rw_reporting': 1,
1195             'numjobs': 4,
1196             "test_obj": Test008,
1197         },
1198         {
1199             # randread, aio
1200             # enable slat, clat, lat
1201             # all will appear because liaio is asynchronous
1202             # same as Test 4 except add cmdprio_percentage
1203             "test_id": 15,
1204             "runtime": 5,
1205             "output-format": "json+",
1206             "slat_percentiles": 1,
1207             "clat_percentiles": 1,
1208             "lat_percentiles": 1,
1209             "ioengine": aio,
1210             'rw': 'randread',
1211             'cmdprio_percentage': 50,
1212             "test_obj": Test015,
1213         },
1214         {
1215             # randwrite, aio
1216             # enable only clat, lat
1217             # same as Test 5 except add cmdprio_percentage
1218             "test_id": 16,
1219             "runtime": 5,
1220             "output-format": "json+",
1221             "slat_percentiles": 0,
1222             "clat_percentiles": 1,
1223             "lat_percentiles": 1,
1224             "ioengine": aio,
1225             'rw': 'randwrite',
1226             'cmdprio_percentage': 50,
1227             "test_obj": Test016,
1228         },
1229         {
1230             # randread, aio
1231             # by default only clat should appear
1232             # same as Test 6 except add cmdprio_percentage
1233             "test_id": 17,
1234             "runtime": 5,
1235             "output-format": "json+",
1236             "ioengine": aio,
1237             'rw': 'randread',
1238             'cmdprio_percentage': 50,
1239             "test_obj": Test017,
1240         },
1241         {
1242             # 50/50 r/w, aio
1243             # enable only slat
1244             # same as Test 7 except add cmdprio_percentage
1245             "test_id": 18,
1246             "runtime": 5,
1247             "output-format": "json+",
1248             "slat_percentiles": 1,
1249             "clat_percentiles": 0,
1250             "lat_percentiles": 0,
1251             "ioengine": aio,
1252             'rw': 'randrw',
1253             'cmdprio_percentage': 50,
1254             "test_obj": Test018,
1255         },
1256         {
1257             # 50/50 r/w, aio, unified_rw_reporting
1258             # enable slat, clat, lat
1259             # same as Test 8 except add cmdprio_percentage
1260             "test_id": 19,
1261             "runtime": 5,
1262             "output-format": "json+",
1263             "slat_percentiles": 1,
1264             "clat_percentiles": 1,
1265             "lat_percentiles": 1,
1266             "ioengine": aio,
1267             'rw': 'randrw',
1268             'unified_rw_reporting': 1,
1269             'cmdprio_percentage': 50,
1270             "test_obj": Test019,
1271         },
1272         {
1273             # 50/50 r/w, aio, unified_rw_reporting
1274             # enable slat, clat, lat
1275             # same as Test 19 except
1276             # add numjobs = 4 to test sum_thread_stats() changes
1277             "test_id": 20,
1278             "runtime": 5,
1279             "output-format": "json+",
1280             "slat_percentiles": 1,
1281             "clat_percentiles": 1,
1282             "lat_percentiles": 1,
1283             "ioengine": aio,
1284             'rw': 'randrw',
1285             'unified_rw_reporting': 1,
1286             'cmdprio_percentage': 50,
1287             'numjobs': 4,
1288             "test_obj": Test019,
1289         },
1290     ]
1291
1292     passed = 0
1293     failed = 0
1294     skipped = 0
1295
1296     for test in test_list:
1297         if (args.skip and test['test_id'] in args.skip) or \
1298            (args.run_only and test['test_id'] not in args.run_only):
1299             skipped = skipped + 1
1300             outcome = 'SKIPPED (User request)'
1301         elif platform.system() != 'Linux' and 'cmdprio_percentage' in test:
1302             skipped = skipped + 1
1303             outcome = 'SKIPPED (Linux required for cmdprio_percentage tests)'
1304         else:
1305             test_obj = test['test_obj'](artifact_root, test, args.debug)
1306             status = test_obj.run_fio(fio)
1307             if status:
1308                 status = test_obj.check()
1309             if status:
1310                 passed = passed + 1
1311                 outcome = 'PASSED'
1312             else:
1313                 failed = failed + 1
1314                 outcome = 'FAILED'
1315
1316         print("**********Test {0} {1}**********".format(test['test_id'], outcome))
1317
1318     print("{0} tests passed, {1} failed, {2} skipped".format(passed, failed, skipped))
1319
1320     sys.exit(failed)
1321
1322
1323 if __name__ == '__main__':
1324     main()