t/latency_percentiles.py

   1 #!/usr/bin/env python3
   2 # SPDX-License-Identifier: GPL-2.0-only
   3 #
   4 # Copyright (c) 2020 Western Digital Corporation or its affiliates.
   5 #
   6 """
   7 # latency_percentiles.py
   8 #
   9 # Test the code that produces latency percentiles
  10 # This is mostly to test the code changes to allow reporting
  11 # of slat, clat, and lat percentiles
  12 #
  13 # USAGE
  14 # python3 latency-tests.py [-f fio-path] [-a artifact-root] [--debug]
  15 #
  16 #
  17 # Test scenarios:
  18 #
  19 # - DONE json
  20 #   unified rw reporting
  21 #   compare with latency log
  22 #   try various combinations of the ?lat_percentile options
  23 #   null, aio
  24 #   r, w, t
  25 # - DONE json+
  26 #   check presence of latency bins
  27 #   if the json percentiles match those from the raw data
  28 #   then the latency bin values and counts are probably ok
  29 # - DONE terse
  30 #   produce both terse, JSON output and confirm that they match
  31 #   lat only; both lat and clat
  32 # - DONE sync_lat
  33 #   confirm that sync_lat data appears
  34 # - MANUAL TESTING normal output:
  35 #       null ioengine
  36 #           enable all, but only clat and lat appear
  37 #           enable subset of latency types
  38 #           read, write, trim, unified
  39 #       libaio ioengine
  40 #           enable all latency types
  41 #           enable subset of latency types
  42 #           read, write, trim, unified
  43 # ./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
  44 #       --ioengine=null --slat_percentiles=1 --clat_percentiles=1 --lat_percentiles=1
  45 # echo confirm that clat and lat percentiles appear
  46 # ./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
  47 #       --ioengine=null --slat_percentiles=0 --clat_percentiles=0 --lat_percentiles=1
  48 # echo confirm that only lat percentiles appear
  49 # ./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
  50 #       --ioengine=null --slat_percentiles=0 --clat_percentiles=1 --lat_percentiles=0
  51 # echo confirm that only clat percentiles appear
  52 # ./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
  53 #       --ioengine=libaio --slat_percentiles=1 --clat_percentiles=1 --lat_percentiles=1
  54 # echo confirm that slat, clat, lat percentiles appear
  55 # ./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
  56 #       --ioengine=libaio --slat_percentiles=0 --clat_percentiles=1 --lat_percentiles=1
  57 # echo confirm that clat and lat percentiles appear
  58 # ./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
  59 #       --ioengine=libaio -rw=randrw
  60 # echo confirm that clat percentiles appear for reads and writes
  61 # ./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
  62 #       --ioengine=libaio --slat_percentiles=1 --clat_percentiles=0 --lat_percentiles=0 --rw=randrw
  63 # echo confirm that slat percentiles appear for both reads and writes
  64 # ./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
  65 #       --ioengine=libaio --slat_percentiles=1 --clat_percentiles=1 --lat_percentiles=1 \
  66 #       --rw=randrw --unified_rw_reporting=1
  67 # echo confirm that slat, clat, and lat percentiles appear for 'mixed' IOs
  68 #./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
  69 #       --ioengine=null --slat_percentiles=1 --clat_percentiles=1 --lat_percentiles=1 \
  70 #       --rw=randrw --fsync=32
  71 # echo confirm that fsync latencies appear
  72 """
  73
  74 import os
  75 import csv
  76 import sys
  77 import json
  78 import math
  79 import time
  80 import argparse
  81 import platform
  82 import subprocess
  83 from pathlib import Path
  84
  85
  86 class FioLatTest():
  87     """fio latency percentile test."""
  88
  89     def __init__(self, artifact_root, test_options, debug):
  90         """
  91         artifact_root   root directory for artifacts (subdirectory will be created under here)
  92         test            test specification
  93         """
  94         self.artifact_root = artifact_root
  95         self.test_options = test_options
  96         self.debug = debug
  97         self.filename = None
  98         self.json_data = None
  99         self.terse_data = None
 100
 101         self.test_dir = os.path.join(self.artifact_root,
 102                                      "{:03d}".format(self.test_options['test_id']))
 103         if not os.path.exists(self.test_dir):
 104             os.mkdir(self.test_dir)
 105
 106         self.filename = "latency{:03d}".format(self.test_options['test_id'])
 107
 108     def run_fio(self, fio_path):
 109         """Run a test."""
 110
 111         fio_args = [
 112             "--max-jobs=16",
 113             "--name=latency",
 114             "--randrepeat=0",
 115             "--norandommap",
 116             "--time_based",
 117             "--size=16M",
 118             "--rwmixread=50",
 119             "--group_reporting=1",
 120             "--write_lat_log={0}".format(self.filename),
 121             "--output={0}.out".format(self.filename),
 122             "--ioengine={ioengine}".format(**self.test_options),
 123             "--rw={rw}".format(**self.test_options),
 124             "--runtime={runtime}".format(**self.test_options),
 125             "--output-format={output-format}".format(**self.test_options),
 126         ]
 127         for opt in ['slat_percentiles', 'clat_percentiles', 'lat_percentiles',
 128                     'unified_rw_reporting', 'fsync', 'fdatasync', 'numjobs', 'cmdprio_percentage']:
 129             if opt in self.test_options:
 130                 option = '--{0}={{{0}}}'.format(opt)
 131                 fio_args.append(option.format(**self.test_options))
 132
 133         command = [fio_path] + fio_args
 134         with open(os.path.join(self.test_dir, "{0}.command".format(self.filename)), "w+") as \
 135                 command_file:
 136             command_file.write("%s\n" % command)
 137
 138         passed = True
 139         stdout_file = open(os.path.join(self.test_dir, "{0}.stdout".format(self.filename)), "w+")
 140         stderr_file = open(os.path.join(self.test_dir, "{0}.stderr".format(self.filename)), "w+")
 141         exitcode_file = open(os.path.join(self.test_dir,
 142                                           "{0}.exitcode".format(self.filename)), "w+")
 143         try:
 144             proc = None
 145             # Avoid using subprocess.run() here because when a timeout occurs,
 146             # fio will be stopped with SIGKILL. This does not give fio a
 147             # chance to clean up and means that child processes may continue
 148             # running and submitting IO.
 149             proc = subprocess.Popen(command,
 150                                     stdout=stdout_file,
 151                                     stderr=stderr_file,
 152                                     cwd=self.test_dir,
 153                                     universal_newlines=True)
 154             proc.communicate(timeout=300)
 155             exitcode_file.write('{0}\n'.format(proc.returncode))
 156             passed &= (proc.returncode == 0)
 157         except subprocess.TimeoutExpired:
 158             proc.terminate()
 159             proc.communicate()
 160             assert proc.poll()
 161             print("Timeout expired")
 162             passed = False
 163         except Exception:
 164             if proc:
 165                 if not proc.poll():
 166                     proc.terminate()
 167                     proc.communicate()
 168             print("Exception: %s" % sys.exc_info())
 169             passed = False
 170         finally:
 171             stdout_file.close()
 172             stderr_file.close()
 173             exitcode_file.close()
 174
 175         if passed:
 176             if 'json' in self.test_options['output-format']:
 177                 if not self.get_json():
 178                     print('Unable to decode JSON data')
 179                     passed = False
 180             if 'terse' in self.test_options['output-format']:
 181                 if not self.get_terse():
 182                     print('Unable to decode terse data')
 183                     passed = False
 184
 185         return passed
 186
 187     def get_json(self):
 188         """Convert fio JSON output into a python JSON object"""
 189
 190         filename = os.path.join(self.test_dir, "{0}.out".format(self.filename))
 191         with open(filename, 'r') as file:
 192             file_data = file.read()
 193
 194         #
 195         # Sometimes fio informational messages are included at the top of the
 196         # JSON output, especially under Windows. Try to decode output as JSON
 197         # data, lopping off up to the first four lines
 198         #
 199         lines = file_data.splitlines()
 200         for i in range(5):
 201             file_data = '\n'.join(lines[i:])
 202             try:
 203                 self.json_data = json.loads(file_data)
 204             except json.JSONDecodeError:
 205                 continue
 206             else:
 207                 return True
 208
 209         return False
 210
 211     def get_terse(self):
 212         """Read fio output and return terse format data."""
 213
 214         filename = os.path.join(self.test_dir, "{0}.out".format(self.filename))
 215         with open(filename, 'r') as file:
 216             file_data = file.read()
 217
 218         #
 219         # Read the first few lines and see if any of them begin with '3;'
 220         # If so, the line is probably terse output. Obviously, this only
 221         # works for fio terse version 3 and it does not work for
 222         # multi-line terse output
 223         #
 224         lines = file_data.splitlines()
 225         for i in range(8):
 226             file_data = lines[i]
 227             if file_data.startswith('3;'):
 228                 self.terse_data = file_data.split(';')
 229                 return True
 230
 231         return False
 232
 233     def check_latencies(self, jsondata, ddir, slat=True, clat=True, tlat=True, plus=False,
 234                         unified=False):
 235         """Check fio latency data.
 236
 237         ddir                data direction to check (0=read, 1=write, 2=trim)
 238         slat                True if submission latency data available to check
 239         clat                True if completion latency data available to check
 240         tlat                True of total latency data available to check
 241         plus                True if we actually have json+ format data where additional checks can
 242                             be carried out
 243         unified             True if fio is reporting unified r/w data
 244         """
 245
 246         types = {
 247             'slat': slat,
 248             'clat': clat,
 249             'lat': tlat
 250         }
 251
 252         retval = True
 253
 254         for lat in ['slat', 'clat', 'lat']:
 255             this_iter = True
 256             if not types[lat]:
 257                 if 'percentile' in jsondata[lat+'_ns']:
 258                     this_iter = False
 259                     print('unexpected %s percentiles found' % lat)
 260                 else:
 261                     print("%s percentiles skipped" % lat)
 262                 continue
 263             else:
 264                 if 'percentile' not in jsondata[lat+'_ns']:
 265                     this_iter = False
 266                     print('%s percentiles not found in fio output' % lat)
 267
 268             #
 269             # Check only for the presence/absence of json+
 270             # latency bins. Future work can check the
 271             # accurracy of the bin values and counts.
 272             #
 273             # Because the latency percentiles are based on
 274             # the bins, we can be confident that the bin
 275             # values and counts are correct if fio's
 276             # latency percentiles match what we compute
 277             # from the raw data.
 278             #
 279             if plus:
 280                 if 'bins' not in jsondata[lat+'_ns']:
 281                     print('bins not found with json+ output format')
 282                     this_iter = False
 283                 else:
 284                     if not self.check_jsonplus(jsondata[lat+'_ns']):
 285                         this_iter = False
 286             else:
 287                 if 'bins' in jsondata[lat+'_ns']:
 288                     print('json+ bins found with json output format')
 289                     this_iter = False
 290
 291             latencies = []
 292             for i in range(10):
 293                 lat_file = os.path.join(self.test_dir, "%s_%s.%s.log" % (self.filename, lat, i+1))
 294                 if not os.path.exists(lat_file):
 295                     break
 296                 with open(lat_file, 'r', newline='') as file:
 297                     reader = csv.reader(file)
 298                     for line in reader:
 299                         if unified or int(line[2]) == ddir:
 300                             latencies.append(int(line[1]))
 301
 302             if int(jsondata['total_ios']) != len(latencies):
 303                 this_iter = False
 304                 print('%s: total_ios = %s, latencies logged = %d' % \
 305                         (lat, jsondata['total_ios'], len(latencies)))
 306             elif self.debug:
 307                 print("total_ios %s match latencies logged" % jsondata['total_ios'])
 308
 309             latencies.sort()
 310             ptiles = jsondata[lat+'_ns']['percentile']
 311
 312             for percentile in ptiles.keys():
 313                 #
 314                 # numpy.percentile(latencies, float(percentile),
 315                 #       interpolation='higher')
 316                 # produces values that mostly match what fio reports
 317                 # however, in the tails of the distribution, the values produced
 318                 # by fio's and numpy.percentile's algorithms are occasionally off
 319                 # by one latency measurement. So instead of relying on the canned
 320                 # numpy.percentile routine, implement here fio's algorithm
 321                 #
 322                 rank = math.ceil(float(percentile)/100 * len(latencies))
 323                 if rank > 0:
 324                     index = rank - 1
 325                 else:
 326                     index = 0
 327                 value = latencies[int(index)]
 328                 fio_val = int(ptiles[percentile])
 329                 # The theory in stat.h says that the proportional error will be
 330                 # less than 1/128
 331                 if not self.similar(fio_val, value):
 332                     delta = abs(fio_val - value) / value
 333                     print("Error with %s %sth percentile: "
 334                           "fio: %d, expected: %d, proportional delta: %f" %
 335                           (lat, percentile, fio_val, value, delta))
 336                     print("Rank: %d, index: %d" % (rank, index))
 337                     this_iter = False
 338                 elif self.debug:
 339                     print('%s %sth percentile values match: %d, %d' %
 340                           (lat, percentile, fio_val, value))
 341
 342             if this_iter:
 343                 print("%s percentiles match" % lat)
 344             else:
 345                 retval = False
 346
 347         return retval
 348
 349     @staticmethod
 350     def check_empty(job):
 351         """
 352         Make sure JSON data is empty.
 353
 354         Some data structures should be empty. This function makes sure that they are.
 355
 356         job         JSON object that we need to check for emptiness
 357         """
 358
 359         return job['total_ios'] == 0 and \
 360                 job['slat_ns']['N'] == 0 and \
 361                 job['clat_ns']['N'] == 0 and \
 362                 job['lat_ns']['N'] == 0
 363
 364     def check_nocmdprio_lat(self, job):
 365         """
 366         Make sure no high/low priority latencies appear.
 367
 368         job         JSON object to check
 369         """
 370
 371         for ddir in ['read', 'write', 'trim']:
 372             if ddir in job:
 373                 if 'lat_high_prio' in job[ddir] or 'lat_low_prio' in job[ddir] or \
 374                     'clat_high_prio' in job[ddir] or 'clat_low_prio' in job[ddir]:
 375                     print("Unexpected high/low priority latencies found in %s output" % ddir)
 376                     return False
 377
 378         if self.debug:
 379             print("No high/low priority latencies found")
 380
 381         return True
 382
 383     @staticmethod
 384     def similar(approximation, actual):
 385         """
 386         Check whether the approximate values recorded by fio are within the theoretical bound.
 387
 388         Since it is impractical to store exact latency measurements for each and every IO, fio
 389         groups similar latency measurements into variable-sized bins. The theory in stat.h says
 390         that the proportional error will be less than 1/128. This function checks whether this
 391         is true.
 392
 393         TODO This test will fail when comparing a value from the largest latency bin against its
 394         actual measurement. Find some way to detect this and avoid failing.
 395
 396         approximation   value of the bin used by fio to store a given latency
 397         actual          actual latency value
 398         """
 399
 400         # Avoid a division by zero. The smallest latency values have no error.
 401         if actual == 0:
 402             return approximation == 0
 403
 404         delta = abs(approximation - actual) / actual
 405         return delta <= 1/128
 406
 407     def check_jsonplus(self, jsondata):
 408         """Check consistency of json+ data
 409
 410         When we have json+ data we can check the min value, max value, and
 411         sample size reported by fio
 412
 413         jsondata            json+ data that we need to check
 414         """
 415
 416         retval = True
 417
 418         keys = [int(k) for k in jsondata['bins'].keys()]
 419         values = [int(jsondata['bins'][k]) for k in jsondata['bins'].keys()]
 420         smallest = min(keys)
 421         biggest = max(keys)
 422         sampsize = sum(values)
 423
 424         if not self.similar(jsondata['min'], smallest):
 425             retval = False
 426             print('reported min %d does not match json+ min %d' % (jsondata['min'], smallest))
 427         elif self.debug:
 428             print('json+ min values match: %d' % jsondata['min'])
 429
 430         if not self.similar(jsondata['max'], biggest):
 431             retval = False
 432             print('reported max %d does not match json+ max %d' % (jsondata['max'], biggest))
 433         elif self.debug:
 434             print('json+ max values match: %d' % jsondata['max'])
 435
 436         if sampsize != jsondata['N']:
 437             retval = False
 438             print('reported sample size %d does not match json+ total count %d' % \
 439                     (jsondata['N'], sampsize))
 440         elif self.debug:
 441             print('json+ sample sizes match: %d' % sampsize)
 442
 443         return retval
 444
 445     def check_sync_lat(self, jsondata, plus=False):
 446         """Check fsync latency percentile data.
 447
 448         All we can check is that some percentiles are reported, unless we have json+ data.
 449         If we actually have json+ data then we can do more checking.
 450
 451         jsondata        JSON data for fsync operations
 452         plus            True if we actually have json+ data
 453         """
 454         retval = True
 455
 456         if 'percentile' not in jsondata['lat_ns']:
 457             print("Sync percentile data not found")
 458             return False
 459
 460         if int(jsondata['total_ios']) != int(jsondata['lat_ns']['N']):
 461             retval = False
 462             print('Mismatch between total_ios and lat_ns sample size')
 463         elif self.debug:
 464             print('sync sample sizes match: %d' % jsondata['total_ios'])
 465
 466         if not plus:
 467             if 'bins' in jsondata['lat_ns']:
 468                 print('Unexpected json+ bin data found')
 469                 return False
 470
 471         if not self.check_jsonplus(jsondata['lat_ns']):
 472             retval = False
 473
 474         return retval
 475
 476     def check_terse(self, terse, jsondata):
 477         """Compare terse latencies with JSON latencies.
 478
 479         terse           terse format data for checking
 480         jsondata        JSON format data for checking
 481         """
 482
 483         retval = True
 484
 485         for lat in terse:
 486             split = lat.split('%')
 487             pct = split[0]
 488             terse_val = int(split[1][1:])
 489             json_val = math.floor(jsondata[pct]/1000)
 490             if terse_val != json_val:
 491                 retval = False
 492                 print('Mismatch with %sth percentile: json value=%d,%d terse value=%d' % \
 493                         (pct, jsondata[pct], json_val, terse_val))
 494             elif self.debug:
 495                 print('Terse %sth percentile matches JSON value: %d' % (pct, terse_val))
 496
 497         return retval
 498
 499     def check_prio_latencies(self, jsondata, clat=True, plus=False):
 500         """Check consistency of high/low priority latencies.
 501
 502         clat                True if we should check clat data; other check lat data
 503         plus                True if we have json+ format data where additional checks can
 504                             be carried out
 505         unified             True if fio is reporting unified r/w data
 506         """
 507
 508         if clat:
 509             high = 'clat_high_prio'
 510             low = 'clat_low_prio'
 511             combined = 'clat_ns'
 512         else:
 513             high = 'lat_high_prio'
 514             low = 'lat_low_prio'
 515             combined = 'lat_ns'
 516
 517         if not high in jsondata or not low in jsondata or not combined in jsondata:
 518             print("Error identifying high/low priority latencies")
 519             return False
 520
 521         if jsondata[high]['N'] + jsondata[low]['N'] != jsondata[combined]['N']:
 522             print("High %d + low %d != combined sample size %d" % \
 523                     (jsondata[high]['N'], jsondata[low]['N'], jsondata[combined]['N']))
 524             return False
 525         elif self.debug:
 526             print("High %d + low %d == combined sample size %d" % \
 527                     (jsondata[high]['N'], jsondata[low]['N'], jsondata[combined]['N']))
 528
 529         if min(jsondata[high]['min'], jsondata[low]['min']) != jsondata[combined]['min']:
 530             print("Min of high %d, low %d min latencies does not match min %d from combined data" % \
 531                     (jsondata[high]['min'], jsondata[low]['min'], jsondata[combined]['min']))
 532             return False
 533         elif self.debug:
 534             print("Min of high %d, low %d min latencies matches min %d from combined data" % \
 535                     (jsondata[high]['min'], jsondata[low]['min'], jsondata[combined]['min']))
 536
 537         if max(jsondata[high]['max'], jsondata[low]['max']) != jsondata[combined]['max']:
 538             print("Max of high %d, low %d max latencies does not match max %d from combined data" % \
 539                     (jsondata[high]['max'], jsondata[low]['max'], jsondata[combined]['max']))
 540             return False
 541         elif self.debug:
 542             print("Max of high %d, low %d max latencies matches max %d from combined data" % \
 543                     (jsondata[high]['max'], jsondata[low]['max'], jsondata[combined]['max']))
 544
 545         weighted_avg = (jsondata[high]['mean'] * jsondata[high]['N'] + \
 546                         jsondata[low]['mean'] * jsondata[low]['N']) / jsondata[combined]['N']
 547         delta = abs(weighted_avg - jsondata[combined]['mean'])
 548         if (delta / jsondata[combined]['mean']) > 0.0001:
 549             print("Difference between weighted average %f of high, low means "
 550                   "and actual mean %f exceeds 0.01%%" % (weighted_avg, jsondata[combined]['mean']))
 551             return False
 552         elif self.debug:
 553             print("Weighted average %f of high, low means matches actual mean %f" % \
 554                     (weighted_avg, jsondata[combined]['mean']))
 555
 556         if plus:
 557             if not self.check_jsonplus(jsondata[high]):
 558                 return False
 559             if not self.check_jsonplus(jsondata[low]):
 560                 return False
 561
 562             bins = {**jsondata[high]['bins'], **jsondata[low]['bins']}
 563             for duration in bins.keys():
 564                 if duration in jsondata[high]['bins'] and duration in jsondata[low]['bins']:
 565                     bins[duration] = jsondata[high]['bins'][duration] + \
 566                             jsondata[low]['bins'][duration]
 567
 568             if len(bins) != len(jsondata[combined]['bins']):
 569                 print("Number of combined high/low bins does not match number of overall bins")
 570                 return False
 571             elif self.debug:
 572                 print("Number of bins from merged high/low data matches number of overall bins")
 573
 574             for duration in bins.keys():
 575                 if bins[duration] != jsondata[combined]['bins'][duration]:
 576                     print("Merged high/low count does not match overall count for duration %d" \
 577                             % duration)
 578                     return False
 579
 580         print("Merged high/low priority latency data match combined latency data")
 581         return True
 582
 583     def check(self):
 584         """Check test output."""
 585
 586         raise NotImplementedError()
 587
 588
 589 class Test001(FioLatTest):
 590     """Test object for Test 1."""
 591
 592     def check(self):
 593         """Check Test 1 output."""
 594
 595         job = self.json_data['jobs'][0]
 596
 597         retval = True
 598         if not self.check_empty(job['write']):
 599             print("Unexpected write data found in output")
 600             retval = False
 601         if not self.check_empty(job['trim']):
 602             print("Unexpected trim data found in output")
 603             retval = False
 604         if not self.check_nocmdprio_lat(job):
 605             print("Unexpected high/low priority latencies found")
 606             retval = False
 607
 608         retval &= self.check_latencies(job['read'], 0, slat=False)
 609
 610         return retval
 611
 612
 613 class Test002(FioLatTest):
 614     """Test object for Test 2."""
 615
 616     def check(self):
 617         """Check Test 2 output."""
 618
 619         job = self.json_data['jobs'][0]
 620
 621         retval = True
 622         if not self.check_empty(job['read']):
 623             print("Unexpected read data found in output")
 624             retval = False
 625         if not self.check_empty(job['trim']):
 626             print("Unexpected trim data found in output")
 627             retval = False
 628         if not self.check_nocmdprio_lat(job):
 629             print("Unexpected high/low priority latencies found")
 630             retval = False
 631
 632         retval &= self.check_latencies(job['write'], 1, slat=False, clat=False)
 633
 634         return retval
 635
 636
 637 class Test003(FioLatTest):
 638     """Test object for Test 3."""
 639
 640     def check(self):
 641         """Check Test 3 output."""
 642
 643         job = self.json_data['jobs'][0]
 644
 645         retval = True
 646         if not self.check_empty(job['read']):
 647             print("Unexpected read data found in output")
 648             retval = False
 649         if not self.check_empty(job['write']):
 650             print("Unexpected write data found in output")
 651             retval = False
 652         if not self.check_nocmdprio_lat(job):
 653             print("Unexpected high/low priority latencies found")
 654             retval = False
 655
 656         retval &= self.check_latencies(job['trim'], 2, slat=False, tlat=False)
 657
 658         return retval
 659
 660
 661 class Test004(FioLatTest):
 662     """Test object for Tests 4, 13."""
 663
 664     def check(self):
 665         """Check Test 4, 13 output."""
 666
 667         job = self.json_data['jobs'][0]
 668
 669         retval = True
 670         if not self.check_empty(job['write']):
 671             print("Unexpected write data found in output")
 672             retval = False
 673         if not self.check_empty(job['trim']):
 674             print("Unexpected trim data found in output")
 675             retval = False
 676         if not self.check_nocmdprio_lat(job):
 677             print("Unexpected high/low priority latencies found")
 678             retval = False
 679
 680         retval &= self.check_latencies(job['read'], 0, plus=True)
 681
 682         return retval
 683
 684
 685 class Test005(FioLatTest):
 686     """Test object for Test 5."""
 687
 688     def check(self):
 689         """Check Test 5 output."""
 690
 691         job = self.json_data['jobs'][0]
 692
 693         retval = True
 694         if not self.check_empty(job['read']):
 695             print("Unexpected read data found in output")
 696             retval = False
 697         if not self.check_empty(job['trim']):
 698             print("Unexpected trim data found in output")
 699             retval = False
 700         if not self.check_nocmdprio_lat(job):
 701             print("Unexpected high/low priority latencies found")
 702             retval = False
 703
 704         retval &= self.check_latencies(job['write'], 1, slat=False, plus=True)
 705
 706         return retval
 707
 708
 709 class Test006(FioLatTest):
 710     """Test object for Test 6."""
 711
 712     def check(self):
 713         """Check Test 6 output."""
 714
 715         job = self.json_data['jobs'][0]
 716
 717         retval = True
 718         if not self.check_empty(job['write']):
 719             print("Unexpected write data found in output")
 720             retval = False
 721         if not self.check_empty(job['trim']):
 722             print("Unexpected trim data found in output")
 723             retval = False
 724         if not self.check_nocmdprio_lat(job):
 725             print("Unexpected high/low priority latencies found")
 726             retval = False
 727
 728         retval &= self.check_latencies(job['read'], 0, slat=False, tlat=False, plus=True)
 729
 730         return retval
 731
 732
 733 class Test007(FioLatTest):
 734     """Test object for Test 7."""
 735
 736     def check(self):
 737         """Check Test 7 output."""
 738
 739         job = self.json_data['jobs'][0]
 740
 741         retval = True
 742         if not self.check_empty(job['trim']):
 743             print("Unexpected trim data found in output")
 744             retval = False
 745         if not self.check_nocmdprio_lat(job):
 746             print("Unexpected high/low priority latencies found")
 747             retval = False
 748
 749         retval &= self.check_latencies(job['read'], 0, clat=False, tlat=False, plus=True)
 750         retval &= self.check_latencies(job['write'], 1, clat=False, tlat=False, plus=True)
 751
 752         return retval
 753
 754
 755 class Test008(FioLatTest):
 756     """Test object for Tests 8, 14."""
 757
 758     def check(self):
 759         """Check Test 8, 14 output."""
 760
 761         job = self.json_data['jobs'][0]
 762
 763         retval = True
 764         if 'read' in job or 'write'in job or 'trim' in job:
 765             print("Unexpected data direction found in fio output")
 766             retval = False
 767         if not self.check_nocmdprio_lat(job):
 768             print("Unexpected high/low priority latencies found")
 769             retval = False
 770
 771         retval &= self.check_latencies(job['mixed'], 0, plus=True, unified=True)
 772
 773         return retval
 774
 775
 776 class Test009(FioLatTest):
 777     """Test object for Test 9."""
 778
 779     def check(self):
 780         """Check Test 9 output."""
 781
 782         job = self.json_data['jobs'][0]
 783
 784         retval = True
 785         if not self.check_empty(job['read']):
 786             print("Unexpected read data found in output")
 787             retval = False
 788         if not self.check_empty(job['trim']):
 789             print("Unexpected trim data found in output")
 790             retval = False
 791         if not self.check_sync_lat(job['sync'], plus=True):
 792             print("Error checking fsync latency data")
 793             retval = False
 794         if not self.check_nocmdprio_lat(job):
 795             print("Unexpected high/low priority latencies found")
 796             retval = False
 797
 798         retval &= self.check_latencies(job['write'], 1, slat=False, plus=True)
 799
 800         return retval
 801
 802
 803 class Test010(FioLatTest):
 804     """Test object for Test 10."""
 805
 806     def check(self):
 807         """Check Test 10 output."""
 808
 809         job = self.json_data['jobs'][0]
 810
 811         retval = True
 812         if not self.check_empty(job['trim']):
 813             print("Unexpected trim data found in output")
 814             retval = False
 815         if not self.check_nocmdprio_lat(job):
 816             print("Unexpected high/low priority latencies found")
 817             retval = False
 818
 819         retval &= self.check_latencies(job['read'], 0, plus=True)
 820         retval &= self.check_latencies(job['write'], 1, plus=True)
 821         retval &= self.check_terse(self.terse_data[17:34], job['read']['lat_ns']['percentile'])
 822         retval &= self.check_terse(self.terse_data[58:75], job['write']['lat_ns']['percentile'])
 823         # Terse data checking only works for default percentiles.
 824         # This needs to be changed if something other than the default is ever used.
 825
 826         return retval
 827
 828
 829 class Test011(FioLatTest):
 830     """Test object for Test 11."""
 831
 832     def check(self):
 833         """Check Test 11 output."""
 834
 835         job = self.json_data['jobs'][0]
 836
 837         retval = True
 838         if not self.check_empty(job['trim']):
 839             print("Unexpected trim data found in output")
 840             retval = False
 841         if not self.check_nocmdprio_lat(job):
 842             print("Unexpected high/low priority latencies found")
 843             retval = False
 844
 845         retval &= self.check_latencies(job['read'], 0, slat=False, clat=False, plus=True)
 846         retval &= self.check_latencies(job['write'], 1, slat=False, clat=False, plus=True)
 847         retval &= self.check_terse(self.terse_data[17:34], job['read']['lat_ns']['percentile'])
 848         retval &= self.check_terse(self.terse_data[58:75], job['write']['lat_ns']['percentile'])
 849         # Terse data checking only works for default percentiles.
 850         # This needs to be changed if something other than the default is ever used.
 851
 852         return retval
 853
 854
 855 class Test015(FioLatTest):
 856     """Test object for Test 15."""
 857
 858     def check(self):
 859         """Check Test 15 output."""
 860
 861         job = self.json_data['jobs'][0]
 862
 863         retval = True
 864         if not self.check_empty(job['write']):
 865             print("Unexpected write data found in output")
 866             retval = False
 867         if not self.check_empty(job['trim']):
 868             print("Unexpected trim data found in output")
 869             retval = False
 870
 871         retval &= self.check_latencies(job['read'], 0, plus=True)
 872         retval &= self.check_prio_latencies(job['read'], clat=False, plus=True)
 873
 874         return retval
 875
 876
 877 class Test016(FioLatTest):
 878     """Test object for Test 16."""
 879
 880     def check(self):
 881         """Check Test 16 output."""
 882
 883         job = self.json_data['jobs'][0]
 884
 885         retval = True
 886         if not self.check_empty(job['read']):
 887             print("Unexpected read data found in output")
 888             retval = False
 889         if not self.check_empty(job['trim']):
 890             print("Unexpected trim data found in output")
 891             retval = False
 892
 893         retval &= self.check_latencies(job['write'], 1, slat=False, plus=True)
 894         retval &= self.check_prio_latencies(job['write'], clat=False, plus=True)
 895
 896         return retval
 897
 898
 899 class Test017(FioLatTest):
 900     """Test object for Test 17."""
 901
 902     def check(self):
 903         """Check Test 17 output."""
 904
 905         job = self.json_data['jobs'][0]
 906
 907         retval = True
 908         if not self.check_empty(job['write']):
 909             print("Unexpected write data found in output")
 910             retval = False
 911         if not self.check_empty(job['trim']):
 912             print("Unexpected trim data found in output")
 913             retval = False
 914
 915         retval &= self.check_latencies(job['read'], 0, slat=False, tlat=False, plus=True)
 916         retval &= self.check_prio_latencies(job['read'], plus=True)
 917
 918         return retval
 919
 920
 921 class Test018(FioLatTest):
 922     """Test object for Test 18."""
 923
 924     def check(self):
 925         """Check Test 18 output."""
 926
 927         job = self.json_data['jobs'][0]
 928
 929         retval = True
 930         if not self.check_empty(job['trim']):
 931             print("Unexpected trim data found in output")
 932             retval = False
 933
 934         retval &= self.check_latencies(job['read'], 0, clat=False, tlat=False, plus=True)
 935         retval &= self.check_latencies(job['write'], 1, clat=False, tlat=False, plus=True)
 936
 937         # We actually have json+ data but setting plus=False below avoids checking the
 938         # json+ bins which did not exist for clat and lat because this job is run with
 939         # clat_percentiles=0, lat_percentiles=0, However, we can still check the summary
 940         # statistics
 941         retval &= self.check_prio_latencies(job['write'], plus=False)
 942         retval &= self.check_prio_latencies(job['read'], plus=False)
 943
 944         return retval
 945
 946
 947 class Test019(FioLatTest):
 948     """Test object for Tests 19, 20."""
 949
 950     def check(self):
 951         """Check Test 19, 20 output."""
 952
 953         job = self.json_data['jobs'][0]
 954
 955         retval = True
 956         if 'read' in job or 'write'in job or 'trim' in job:
 957             print("Unexpected data direction found in fio output")
 958             retval = False
 959
 960         retval &= self.check_latencies(job['mixed'], 0, plus=True, unified=True)
 961         retval &= self.check_prio_latencies(job['mixed'], clat=False, plus=True)
 962
 963         return retval
 964
 965
 966 def parse_args():
 967     """Parse command-line arguments."""
 968
 969     parser = argparse.ArgumentParser()
 970     parser.add_argument('-f', '--fio', help='path to file executable (e.g., ./fio)')
 971     parser.add_argument('-a', '--artifact-root', help='artifact root directory')
 972     parser.add_argument('-d', '--debug', help='enable debug output', action='store_true')
 973     parser.add_argument('-s', '--skip', nargs='+', type=int,
 974                         help='list of test(s) to skip')
 975     parser.add_argument('-o', '--run-only', nargs='+', type=int,
 976                         help='list of test(s) to run, skipping all others')
 977     args = parser.parse_args()
 978
 979     return args
 980
 981
 982 def main():
 983     """Run tests of fio latency percentile reporting"""
 984
 985     args = parse_args()
 986
 987     artifact_root = args.artifact_root if args.artifact_root else \
 988         "latency-test-{0}".format(time.strftime("%Y%m%d-%H%M%S"))
 989     os.mkdir(artifact_root)
 990     print("Artifact directory is %s" % artifact_root)
 991
 992     if args.fio:
 993         fio = str(Path(args.fio).absolute())
 994     else:
 995         fio = 'fio'
 996     print("fio path is %s" % fio)
 997
 998     if platform.system() == 'Linux':
 999         aio = 'libaio'
1000     elif platform.system() == 'Windows':
1001         aio = 'windowsaio'
1002     else:
1003         aio = 'posixaio'
1004
1005     test_list = [
1006         {
1007             # randread, null
1008             # enable slat, clat, lat
1009             # only clat and lat will appear because
1010             # because the null ioengine is syncrhonous
1011             "test_id": 1,
1012             "runtime": 2,
1013             "output-format": "json",
1014             "slat_percentiles": 1,
1015             "clat_percentiles": 1,
1016             "lat_percentiles": 1,
1017             "ioengine": 'null',
1018             'rw': 'randread',
1019             "test_obj": Test001,
1020         },
1021         {
1022             # randwrite, null
1023             # enable lat only
1024             "test_id": 2,
1025             "runtime": 2,
1026             "output-format": "json",
1027             "slat_percentiles": 0,
1028             "clat_percentiles": 0,
1029             "lat_percentiles": 1,
1030             "ioengine": 'null',
1031             'rw': 'randwrite',
1032             "test_obj": Test002,
1033         },
1034         {
1035             # randtrim, null
1036             # enable clat only
1037             "test_id": 3,
1038             "runtime": 2,
1039             "output-format": "json",
1040             "slat_percentiles": 0,
1041             "clat_percentiles": 1,
1042             "lat_percentiles": 0,
1043             "ioengine": 'null',
1044             'rw': 'randtrim',
1045             "test_obj": Test003,
1046         },
1047         {
1048             # randread, aio
1049             # enable slat, clat, lat
1050             # all will appear because liaio is asynchronous
1051             "test_id": 4,
1052             "runtime": 5,
1053             "output-format": "json+",
1054             "slat_percentiles": 1,
1055             "clat_percentiles": 1,
1056             "lat_percentiles": 1,
1057             "ioengine": aio,
1058             'rw': 'randread',
1059             "test_obj": Test004,
1060         },
1061         {
1062             # randwrite, aio
1063             # enable only clat, lat
1064             "test_id": 5,
1065             "runtime": 5,
1066             "output-format": "json+",
1067             "slat_percentiles": 0,
1068             "clat_percentiles": 1,
1069             "lat_percentiles": 1,
1070             "ioengine": aio,
1071             'rw': 'randwrite',
1072             "test_obj": Test005,
1073         },
1074         {
1075             # randread, aio
1076             # by default only clat should appear
1077             "test_id": 6,
1078             "runtime": 5,
1079             "output-format": "json+",
1080             "ioengine": aio,
1081             'rw': 'randread',
1082             "test_obj": Test006,
1083         },
1084         {
1085             # 50/50 r/w, aio
1086             # enable only slat
1087             "test_id": 7,
1088             "runtime": 5,
1089             "output-format": "json+",
1090             "slat_percentiles": 1,
1091             "clat_percentiles": 0,
1092             "lat_percentiles": 0,
1093             "ioengine": aio,
1094             'rw': 'randrw',
1095             "test_obj": Test007,
1096         },
1097         {
1098             # 50/50 r/w, aio, unified_rw_reporting
1099             # enable slat, clat, lat
1100             "test_id": 8,
1101             "runtime": 5,
1102             "output-format": "json+",
1103             "slat_percentiles": 1,
1104             "clat_percentiles": 1,
1105             "lat_percentiles": 1,
1106             "ioengine": aio,
1107             'rw': 'randrw',
1108             'unified_rw_reporting': 1,
1109             "test_obj": Test008,
1110         },
1111         {
1112             # randwrite, null
1113             # enable slat, clat, lat
1114             # fsync
1115             "test_id": 9,
1116             "runtime": 2,
1117             "output-format": "json+",
1118             "slat_percentiles": 1,
1119             "clat_percentiles": 1,
1120             "lat_percentiles": 1,
1121             "ioengine": 'null',
1122             'rw': 'randwrite',
1123             'fsync': 32,
1124             "test_obj": Test009,
1125         },
1126         {
1127             # 50/50 r/w, aio
1128             # enable slat, clat, lat
1129             "test_id": 10,
1130             "runtime": 5,
1131             "output-format": "terse,json+",
1132             "slat_percentiles": 1,
1133             "clat_percentiles": 1,
1134             "lat_percentiles": 1,
1135             "ioengine": aio,
1136             'rw': 'randrw',
1137             "test_obj": Test010,
1138         },
1139         {
1140             # 50/50 r/w, aio
1141             # enable only lat
1142             "test_id": 11,
1143             "runtime": 5,
1144             "output-format": "terse,json+",
1145             "slat_percentiles": 0,
1146             "clat_percentiles": 0,
1147             "lat_percentiles": 1,
1148             "ioengine": aio,
1149             'rw': 'randrw',
1150             "test_obj": Test011,
1151         },
1152         {
1153             # randread, null
1154             # enable slat, clat, lat
1155             # only clat and lat will appear because
1156             # because the null ioengine is syncrhonous
1157             # same as Test 1 except
1158             # numjobs = 4 to test sum_thread_stats() changes
1159             "test_id": 12,
1160             "runtime": 2,
1161             "output-format": "json",
1162             "slat_percentiles": 1,
1163             "clat_percentiles": 1,
1164             "lat_percentiles": 1,
1165             "ioengine": 'null',
1166             'rw': 'randread',
1167             'numjobs': 4,
1168             "test_obj": Test001,
1169         },
1170         {
1171             # randread, aio
1172             # enable slat, clat, lat
1173             # all will appear because liaio is asynchronous
1174             # same as Test 4 except
1175             # numjobs = 4 to test sum_thread_stats() changes
1176             "test_id": 13,
1177             "runtime": 5,
1178             "output-format": "json+",
1179             "slat_percentiles": 1,
1180             "clat_percentiles": 1,
1181             "lat_percentiles": 1,
1182             "ioengine": aio,
1183             'rw': 'randread',
1184             'numjobs': 4,
1185             "test_obj": Test004,
1186         },
1187         {
1188             # 50/50 r/w, aio, unified_rw_reporting
1189             # enable slat, clat, lata
1190             # same as Test 8 except
1191             # numjobs = 4 to test sum_thread_stats() changes
1192             "test_id": 14,
1193             "runtime": 5,
1194             "output-format": "json+",
1195             "slat_percentiles": 1,
1196             "clat_percentiles": 1,
1197             "lat_percentiles": 1,
1198             "ioengine": aio,
1199             'rw': 'randrw',
1200             'unified_rw_reporting': 1,
1201             'numjobs': 4,
1202             "test_obj": Test008,
1203         },
1204         {
1205             # randread, aio
1206             # enable slat, clat, lat
1207             # all will appear because liaio is asynchronous
1208             # same as Test 4 except add cmdprio_percentage
1209             "test_id": 15,
1210             "runtime": 5,
1211             "output-format": "json+",
1212             "slat_percentiles": 1,
1213             "clat_percentiles": 1,
1214             "lat_percentiles": 1,
1215             "ioengine": aio,
1216             'rw': 'randread',
1217             'cmdprio_percentage': 50,
1218             "test_obj": Test015,
1219         },
1220         {
1221             # randwrite, aio
1222             # enable only clat, lat
1223             # same as Test 5 except add cmdprio_percentage
1224             "test_id": 16,
1225             "runtime": 5,
1226             "output-format": "json+",
1227             "slat_percentiles": 0,
1228             "clat_percentiles": 1,
1229             "lat_percentiles": 1,
1230             "ioengine": aio,
1231             'rw': 'randwrite',
1232             'cmdprio_percentage': 50,
1233             "test_obj": Test016,
1234         },
1235         {
1236             # randread, aio
1237             # by default only clat should appear
1238             # same as Test 6 except add cmdprio_percentage
1239             "test_id": 17,
1240             "runtime": 5,
1241             "output-format": "json+",
1242             "ioengine": aio,
1243             'rw': 'randread',
1244             'cmdprio_percentage': 50,
1245             "test_obj": Test017,
1246         },
1247         {
1248             # 50/50 r/w, aio
1249             # enable only slat
1250             # same as Test 7 except add cmdprio_percentage
1251             "test_id": 18,
1252             "runtime": 5,
1253             "output-format": "json+",
1254             "slat_percentiles": 1,
1255             "clat_percentiles": 0,
1256             "lat_percentiles": 0,
1257             "ioengine": aio,
1258             'rw': 'randrw',
1259             'cmdprio_percentage': 50,
1260             "test_obj": Test018,
1261         },
1262         {
1263             # 50/50 r/w, aio, unified_rw_reporting
1264             # enable slat, clat, lat
1265             # same as Test 8 except add cmdprio_percentage
1266             "test_id": 19,
1267             "runtime": 5,
1268             "output-format": "json+",
1269             "slat_percentiles": 1,
1270             "clat_percentiles": 1,
1271             "lat_percentiles": 1,
1272             "ioengine": aio,
1273             'rw': 'randrw',
1274             'unified_rw_reporting': 1,
1275             'cmdprio_percentage': 50,
1276             "test_obj": Test019,
1277         },
1278         {
1279             # 50/50 r/w, aio, unified_rw_reporting
1280             # enable slat, clat, lat
1281             # same as Test 19 except
1282             # add numjobs = 4 to test sum_thread_stats() changes
1283             "test_id": 20,
1284             "runtime": 5,
1285             "output-format": "json+",
1286             "slat_percentiles": 1,
1287             "clat_percentiles": 1,
1288             "lat_percentiles": 1,
1289             "ioengine": aio,
1290             'rw': 'randrw',
1291             'unified_rw_reporting': 1,
1292             'cmdprio_percentage': 50,
1293             'numjobs': 4,
1294             "test_obj": Test019,
1295         },
1296     ]
1297
1298     passed = 0
1299     failed = 0
1300     skipped = 0
1301
1302     for test in test_list:
1303         if (args.skip and test['test_id'] in args.skip) or \
1304            (args.run_only and test['test_id'] not in args.run_only):
1305             skipped = skipped + 1
1306             outcome = 'SKIPPED (User request)'
1307         elif (platform.system() != 'Linux' or os.geteuid() != 0) and 'cmdprio_percentage' in test:
1308             skipped = skipped + 1
1309             outcome = 'SKIPPED (Linux root required for cmdprio_percentage tests)'
1310         else:
1311             test_obj = test['test_obj'](artifact_root, test, args.debug)
1312             status = test_obj.run_fio(fio)
1313             if status:
1314                 status = test_obj.check()
1315             if status:
1316                 passed = passed + 1
1317                 outcome = 'PASSED'
1318             else:
1319                 failed = failed + 1
1320                 outcome = 'FAILED'
1321
1322         print("**********Test {0} {1}**********".format(test['test_id'], outcome))
1323
1324     print("{0} tests passed, {1} failed, {2} skipped".format(passed, failed, skipped))
1325
1326     sys.exit(failed)
1327
1328
1329 if __name__ == '__main__':
1330     main()