t/latency_percentiles.py

   1 #!/usr/bin/env python3
   2 # SPDX-License-Identifier: GPL-2.0-only
   3 #
   4 # Copyright (c) 2020 Western Digital Corporation or its affiliates.
   5 #
   6 """
   7 # latency_percentiles.py
   8 #
   9 # Test the code that produces latency percentiles
  10 # This is mostly to test the code changes to allow reporting
  11 # of slat, clat, and lat percentiles
  12 #
  13 # USAGE
  14 # python3 latency-tests.py [-f fio-path] [-a artifact-root] [--debug]
  15 #
  16 #
  17 # Test scenarios:
  18 #
  19 # - DONE json
  20 #   unified rw reporting
  21 #   compare with latency log
  22 #   try various combinations of the ?lat_percentile options
  23 #   null, aio
  24 #   r, w, t
  25 # - DONE json+
  26 #   check presence of latency bins
  27 #   if the json percentiles match those from the raw data
  28 #   then the latency bin values and counts are probably ok
  29 # - DONE terse
  30 #   produce both terse, JSON output and confirm that they match
  31 #   lat only; both lat and clat
  32 # - DONE sync_lat
  33 #   confirm that sync_lat data appears
  34 # - MANUAL TESTING normal output:
  35 #       null ioengine
  36 #           enable all, but only clat and lat appear
  37 #           enable subset of latency types
  38 #           read, write, trim, unified
  39 #       libaio ioengine
  40 #           enable all latency types
  41 #           enable subset of latency types
  42 #           read, write, trim, unified
  43 # ./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
  44 #       --ioengine=null --slat_percentiles=1 --clat_percentiles=1 --lat_percentiles=1
  45 # echo confirm that clat and lat percentiles appear
  46 # ./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
  47 #       --ioengine=null --slat_percentiles=0 --clat_percentiles=0 --lat_percentiles=1
  48 # echo confirm that only lat percentiles appear
  49 # ./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
  50 #       --ioengine=null --slat_percentiles=0 --clat_percentiles=1 --lat_percentiles=0
  51 # echo confirm that only clat percentiles appear
  52 # ./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
  53 #       --ioengine=libaio --slat_percentiles=1 --clat_percentiles=1 --lat_percentiles=1
  54 # echo confirm that slat, clat, lat percentiles appear
  55 # ./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
  56 #       --ioengine=libaio --slat_percentiles=0 --clat_percentiles=1 --lat_percentiles=1
  57 # echo confirm that clat and lat percentiles appear
  58 # ./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
  59 #       --ioengine=libaio -rw=randrw
  60 # echo confirm that clat percentiles appear for reads and writes
  61 # ./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
  62 #       --ioengine=libaio --slat_percentiles=1 --clat_percentiles=0 --lat_percentiles=0 --rw=randrw
  63 # echo confirm that slat percentiles appear for both reads and writes
  64 # ./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
  65 #       --ioengine=libaio --slat_percentiles=1 --clat_percentiles=1 --lat_percentiles=1 \
  66 #       --rw=randrw --unified_rw_reporting=1
  67 # echo confirm that slat, clat, and lat percentiles appear for 'mixed' IOs
  68 #./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
  69 #       --ioengine=null --slat_percentiles=1 --clat_percentiles=1 --lat_percentiles=1 \
  70 #       --rw=randrw --fsync=32
  71 # echo confirm that fsync latencies appear
  72 """
  73
  74 import os
  75 import csv
  76 import sys
  77 import json
  78 import math
  79 import time
  80 import argparse
  81 import platform
  82 import subprocess
  83 from collections import Counter
  84 from pathlib import Path
  85
  86
  87 class FioLatTest():
  88     """fio latency percentile test."""
  89
  90     def __init__(self, artifact_root, test_options, debug):
  91         """
  92         artifact_root   root directory for artifacts (subdirectory will be created under here)
  93         test            test specification
  94         """
  95         self.artifact_root = artifact_root
  96         self.test_options = test_options
  97         self.debug = debug
  98         self.filename = None
  99         self.json_data = None
 100         self.terse_data = None
 101
 102         self.test_dir = os.path.join(self.artifact_root,
 103                                      "{:03d}".format(self.test_options['test_id']))
 104         if not os.path.exists(self.test_dir):
 105             os.mkdir(self.test_dir)
 106
 107         self.filename = "latency{:03d}".format(self.test_options['test_id'])
 108
 109     def run_fio(self, fio_path):
 110         """Run a test."""
 111
 112         fio_args = [
 113             "--max-jobs=16",
 114             "--name=latency",
 115             "--randrepeat=0",
 116             "--norandommap",
 117             "--time_based",
 118             "--size=16M",
 119             "--rwmixread=50",
 120             "--group_reporting=1",
 121             "--write_lat_log={0}".format(self.filename),
 122             "--output={0}.out".format(self.filename),
 123             "--ioengine={ioengine}".format(**self.test_options),
 124             "--rw={rw}".format(**self.test_options),
 125             "--runtime={runtime}".format(**self.test_options),
 126             "--output-format={output-format}".format(**self.test_options),
 127         ]
 128         for opt in ['slat_percentiles', 'clat_percentiles', 'lat_percentiles',
 129                     'unified_rw_reporting', 'fsync', 'fdatasync', 'numjobs',
 130                     'cmdprio_percentage', 'bssplit', 'cmdprio_bssplit']:
 131             if opt in self.test_options:
 132                 option = '--{0}={{{0}}}'.format(opt)
 133                 fio_args.append(option.format(**self.test_options))
 134
 135         command = [fio_path] + fio_args
 136         with open(os.path.join(self.test_dir, "{0}.command".format(self.filename)), "w+") as \
 137                 command_file:
 138             command_file.write("%s\n" % command)
 139
 140         passed = True
 141         stdout_file = open(os.path.join(self.test_dir, "{0}.stdout".format(self.filename)), "w+")
 142         stderr_file = open(os.path.join(self.test_dir, "{0}.stderr".format(self.filename)), "w+")
 143         exitcode_file = open(os.path.join(self.test_dir,
 144                                           "{0}.exitcode".format(self.filename)), "w+")
 145         try:
 146             proc = None
 147             # Avoid using subprocess.run() here because when a timeout occurs,
 148             # fio will be stopped with SIGKILL. This does not give fio a
 149             # chance to clean up and means that child processes may continue
 150             # running and submitting IO.
 151             proc = subprocess.Popen(command,
 152                                     stdout=stdout_file,
 153                                     stderr=stderr_file,
 154                                     cwd=self.test_dir,
 155                                     universal_newlines=True)
 156             proc.communicate(timeout=300)
 157             exitcode_file.write('{0}\n'.format(proc.returncode))
 158             passed &= (proc.returncode == 0)
 159         except subprocess.TimeoutExpired:
 160             proc.terminate()
 161             proc.communicate()
 162             assert proc.poll()
 163             print("Timeout expired")
 164             passed = False
 165         except Exception:
 166             if proc:
 167                 if not proc.poll():
 168                     proc.terminate()
 169                     proc.communicate()
 170             print("Exception: %s" % sys.exc_info())
 171             passed = False
 172         finally:
 173             stdout_file.close()
 174             stderr_file.close()
 175             exitcode_file.close()
 176
 177         if passed:
 178             if 'json' in self.test_options['output-format']:
 179                 if not self.get_json():
 180                     print('Unable to decode JSON data')
 181                     passed = False
 182             if 'terse' in self.test_options['output-format']:
 183                 if not self.get_terse():
 184                     print('Unable to decode terse data')
 185                     passed = False
 186
 187         return passed
 188
 189     def get_json(self):
 190         """Convert fio JSON output into a python JSON object"""
 191
 192         filename = os.path.join(self.test_dir, "{0}.out".format(self.filename))
 193         with open(filename, 'r') as file:
 194             file_data = file.read()
 195
 196         #
 197         # Sometimes fio informational messages are included at the top of the
 198         # JSON output, especially under Windows. Try to decode output as JSON
 199         # data, lopping off up to the first four lines
 200         #
 201         lines = file_data.splitlines()
 202         for i in range(5):
 203             file_data = '\n'.join(lines[i:])
 204             try:
 205                 self.json_data = json.loads(file_data)
 206             except json.JSONDecodeError:
 207                 continue
 208             else:
 209                 return True
 210
 211         return False
 212
 213     def get_terse(self):
 214         """Read fio output and return terse format data."""
 215
 216         filename = os.path.join(self.test_dir, "{0}.out".format(self.filename))
 217         with open(filename, 'r') as file:
 218             file_data = file.read()
 219
 220         #
 221         # Read the first few lines and see if any of them begin with '3;'
 222         # If so, the line is probably terse output. Obviously, this only
 223         # works for fio terse version 3 and it does not work for
 224         # multi-line terse output
 225         #
 226         lines = file_data.splitlines()
 227         for i in range(8):
 228             file_data = lines[i]
 229             if file_data.startswith('3;'):
 230                 self.terse_data = file_data.split(';')
 231                 return True
 232
 233         return False
 234
 235     def check_latencies(self, jsondata, ddir, slat=True, clat=True, tlat=True, plus=False,
 236                         unified=False):
 237         """Check fio latency data.
 238
 239         ddir                data direction to check (0=read, 1=write, 2=trim)
 240         slat                True if submission latency data available to check
 241         clat                True if completion latency data available to check
 242         tlat                True of total latency data available to check
 243         plus                True if we actually have json+ format data where additional checks can
 244                             be carried out
 245         unified             True if fio is reporting unified r/w data
 246         """
 247
 248         types = {
 249             'slat': slat,
 250             'clat': clat,
 251             'lat': tlat
 252         }
 253
 254         retval = True
 255
 256         for lat in ['slat', 'clat', 'lat']:
 257             this_iter = True
 258             if not types[lat]:
 259                 if 'percentile' in jsondata[lat+'_ns']:
 260                     this_iter = False
 261                     print('unexpected %s percentiles found' % lat)
 262                 else:
 263                     print("%s percentiles skipped" % lat)
 264                 continue
 265             else:
 266                 if 'percentile' not in jsondata[lat+'_ns']:
 267                     this_iter = False
 268                     print('%s percentiles not found in fio output' % lat)
 269
 270             #
 271             # Check only for the presence/absence of json+
 272             # latency bins. Future work can check the
 273             # accuracy of the bin values and counts.
 274             #
 275             # Because the latency percentiles are based on
 276             # the bins, we can be confident that the bin
 277             # values and counts are correct if fio's
 278             # latency percentiles match what we compute
 279             # from the raw data.
 280             #
 281             if plus:
 282                 if 'bins' not in jsondata[lat+'_ns']:
 283                     print('bins not found with json+ output format')
 284                     this_iter = False
 285                 else:
 286                     if not self.check_jsonplus(jsondata[lat+'_ns']):
 287                         this_iter = False
 288             else:
 289                 if 'bins' in jsondata[lat+'_ns']:
 290                     print('json+ bins found with json output format')
 291                     this_iter = False
 292
 293             latencies = []
 294             for i in range(10):
 295                 lat_file = os.path.join(self.test_dir, "%s_%s.%s.log" % (self.filename, lat, i+1))
 296                 if not os.path.exists(lat_file):
 297                     break
 298                 with open(lat_file, 'r', newline='') as file:
 299                     reader = csv.reader(file)
 300                     for line in reader:
 301                         if unified or int(line[2]) == ddir:
 302                             latencies.append(int(line[1]))
 303
 304             if int(jsondata['total_ios']) != len(latencies):
 305                 this_iter = False
 306                 print('%s: total_ios = %s, latencies logged = %d' % \
 307                         (lat, jsondata['total_ios'], len(latencies)))
 308             elif self.debug:
 309                 print("total_ios %s match latencies logged" % jsondata['total_ios'])
 310
 311             latencies.sort()
 312             ptiles = jsondata[lat+'_ns']['percentile']
 313
 314             for percentile in ptiles.keys():
 315                 #
 316                 # numpy.percentile(latencies, float(percentile),
 317                 #       interpolation='higher')
 318                 # produces values that mostly match what fio reports
 319                 # however, in the tails of the distribution, the values produced
 320                 # by fio's and numpy.percentile's algorithms are occasionally off
 321                 # by one latency measurement. So instead of relying on the canned
 322                 # numpy.percentile routine, implement here fio's algorithm
 323                 #
 324                 rank = math.ceil(float(percentile)/100 * len(latencies))
 325                 if rank > 0:
 326                     index = rank - 1
 327                 else:
 328                     index = 0
 329                 value = latencies[int(index)]
 330                 fio_val = int(ptiles[percentile])
 331                 # The theory in stat.h says that the proportional error will be
 332                 # less than 1/128
 333                 if not self.similar(fio_val, value):
 334                     delta = abs(fio_val - value) / value
 335                     print("Error with %s %sth percentile: "
 336                           "fio: %d, expected: %d, proportional delta: %f" %
 337                           (lat, percentile, fio_val, value, delta))
 338                     print("Rank: %d, index: %d" % (rank, index))
 339                     this_iter = False
 340                 elif self.debug:
 341                     print('%s %sth percentile values match: %d, %d' %
 342                           (lat, percentile, fio_val, value))
 343
 344             if this_iter:
 345                 print("%s percentiles match" % lat)
 346             else:
 347                 retval = False
 348
 349         return retval
 350
 351     @staticmethod
 352     def check_empty(job):
 353         """
 354         Make sure JSON data is empty.
 355
 356         Some data structures should be empty. This function makes sure that they are.
 357
 358         job         JSON object that we need to check for emptiness
 359         """
 360
 361         return job['total_ios'] == 0 and \
 362                 job['slat_ns']['N'] == 0 and \
 363                 job['clat_ns']['N'] == 0 and \
 364                 job['lat_ns']['N'] == 0
 365
 366     def check_nocmdprio_lat(self, job):
 367         """
 368         Make sure no per priority latencies appear.
 369
 370         job         JSON object to check
 371         """
 372
 373         for ddir in ['read', 'write', 'trim']:
 374             if ddir in job:
 375                 if 'prios' in job[ddir]:
 376                     print("Unexpected per priority latencies found in %s output" % ddir)
 377                     return False
 378
 379         if self.debug:
 380             print("No per priority latencies found")
 381
 382         return True
 383
 384     @staticmethod
 385     def similar(approximation, actual):
 386         """
 387         Check whether the approximate values recorded by fio are within the theoretical bound.
 388
 389         Since it is impractical to store exact latency measurements for each and every IO, fio
 390         groups similar latency measurements into variable-sized bins. The theory in stat.h says
 391         that the proportional error will be less than 1/128. This function checks whether this
 392         is true.
 393
 394         TODO This test will fail when comparing a value from the largest latency bin against its
 395         actual measurement. Find some way to detect this and avoid failing.
 396
 397         approximation   value of the bin used by fio to store a given latency
 398         actual          actual latency value
 399         """
 400
 401         # Avoid a division by zero. The smallest latency values have no error.
 402         if actual == 0:
 403             return approximation == 0
 404
 405         delta = abs(approximation - actual) / actual
 406         return delta <= 1/128
 407
 408     def check_jsonplus(self, jsondata):
 409         """Check consistency of json+ data
 410
 411         When we have json+ data we can check the min value, max value, and
 412         sample size reported by fio
 413
 414         jsondata            json+ data that we need to check
 415         """
 416
 417         retval = True
 418
 419         keys = [int(k) for k in jsondata['bins'].keys()]
 420         values = [int(jsondata['bins'][k]) for k in jsondata['bins'].keys()]
 421         smallest = min(keys)
 422         biggest = max(keys)
 423         sampsize = sum(values)
 424
 425         if not self.similar(jsondata['min'], smallest):
 426             retval = False
 427             print('reported min %d does not match json+ min %d' % (jsondata['min'], smallest))
 428         elif self.debug:
 429             print('json+ min values match: %d' % jsondata['min'])
 430
 431         if not self.similar(jsondata['max'], biggest):
 432             retval = False
 433             print('reported max %d does not match json+ max %d' % (jsondata['max'], biggest))
 434         elif self.debug:
 435             print('json+ max values match: %d' % jsondata['max'])
 436
 437         if sampsize != jsondata['N']:
 438             retval = False
 439             print('reported sample size %d does not match json+ total count %d' % \
 440                     (jsondata['N'], sampsize))
 441         elif self.debug:
 442             print('json+ sample sizes match: %d' % sampsize)
 443
 444         return retval
 445
 446     def check_sync_lat(self, jsondata, plus=False):
 447         """Check fsync latency percentile data.
 448
 449         All we can check is that some percentiles are reported, unless we have json+ data.
 450         If we actually have json+ data then we can do more checking.
 451
 452         jsondata        JSON data for fsync operations
 453         plus            True if we actually have json+ data
 454         """
 455         retval = True
 456
 457         if 'percentile' not in jsondata['lat_ns']:
 458             print("Sync percentile data not found")
 459             return False
 460
 461         if int(jsondata['total_ios']) != int(jsondata['lat_ns']['N']):
 462             retval = False
 463             print('Mismatch between total_ios and lat_ns sample size')
 464         elif self.debug:
 465             print('sync sample sizes match: %d' % jsondata['total_ios'])
 466
 467         if not plus:
 468             if 'bins' in jsondata['lat_ns']:
 469                 print('Unexpected json+ bin data found')
 470                 return False
 471
 472         if not self.check_jsonplus(jsondata['lat_ns']):
 473             retval = False
 474
 475         return retval
 476
 477     def check_terse(self, terse, jsondata):
 478         """Compare terse latencies with JSON latencies.
 479
 480         terse           terse format data for checking
 481         jsondata        JSON format data for checking
 482         """
 483
 484         retval = True
 485
 486         for lat in terse:
 487             split = lat.split('%')
 488             pct = split[0]
 489             terse_val = int(split[1][1:])
 490             json_val = math.floor(jsondata[pct]/1000)
 491             if terse_val != json_val:
 492                 retval = False
 493                 print('Mismatch with %sth percentile: json value=%d,%d terse value=%d' % \
 494                         (pct, jsondata[pct], json_val, terse_val))
 495             elif self.debug:
 496                 print('Terse %sth percentile matches JSON value: %d' % (pct, terse_val))
 497
 498         return retval
 499
 500     def check_prio_latencies(self, jsondata, clat=True, plus=False):
 501         """Check consistency of per priority latencies.
 502
 503         clat                True if we should check clat data; other check lat data
 504         plus                True if we have json+ format data where additional checks can
 505                             be carried out
 506         unified             True if fio is reporting unified r/w data
 507         """
 508
 509         if clat:
 510             obj = combined = 'clat_ns'
 511         else:
 512             obj = combined = 'lat_ns'
 513
 514         if not 'prios' in jsondata or not combined in jsondata:
 515             print("Error identifying per priority latencies")
 516             return False
 517
 518         sum_sample_size = sum([x[obj]['N'] for x in jsondata['prios']])
 519         if sum_sample_size != jsondata[combined]['N']:
 520             print("Per prio sample size sum %d != combined sample size %d" %
 521                   (sum_sample_size, jsondata[combined]['N']))
 522             return False
 523         elif self.debug:
 524             print("Per prio sample size sum %d == combined sample size %d" %
 525                   (sum_sample_size, jsondata[combined]['N']))
 526
 527         min_val = min([x[obj]['min'] for x in jsondata['prios']])
 528         if min_val != jsondata[combined]['min']:
 529             print("Min per prio min latency %d does not match min %d from combined data" %
 530                   (min_val, jsondata[combined]['min']))
 531             return False
 532         elif self.debug:
 533             print("Min per prio min latency %d matches min %d from combined data" %
 534                   (min_val, jsondata[combined]['min']))
 535
 536         max_val = max([x[obj]['max'] for x in jsondata['prios']])
 537         if max_val != jsondata[combined]['max']:
 538             print("Max per prio max latency %d does not match max %d from combined data" %
 539                   (max_val, jsondata[combined]['max']))
 540             return False
 541         elif self.debug:
 542             print("Max per prio max latency %d matches max %d from combined data" %
 543                   (max_val, jsondata[combined]['max']))
 544
 545         weighted_vals = [x[obj]['mean'] * x[obj]['N'] for x in jsondata['prios']]
 546         weighted_avg = sum(weighted_vals) / jsondata[combined]['N']
 547         delta = abs(weighted_avg - jsondata[combined]['mean'])
 548         if (delta / jsondata[combined]['mean']) > 0.0001:
 549             print("Difference between merged per prio weighted average %f mean "
 550                   "and actual mean %f exceeds 0.01%%" % (weighted_avg, jsondata[combined]['mean']))
 551             return False
 552         elif self.debug:
 553             print("Merged per prio weighted average %f mean matches actual mean %f" %
 554                   (weighted_avg, jsondata[combined]['mean']))
 555
 556         if plus:
 557             for prio in jsondata['prios']:
 558                 if not self.check_jsonplus(prio[obj]):
 559                     return False
 560
 561             counter = Counter()
 562             for prio in jsondata['prios']:
 563                 counter.update(prio[obj]['bins'])
 564
 565             bins = dict(counter)
 566
 567             if len(bins) != len(jsondata[combined]['bins']):
 568                 print("Number of merged bins %d does not match number of overall bins %d" %
 569                       (len(bins), len(jsondata[combined]['bins'])))
 570                 return False
 571             elif self.debug:
 572                 print("Number of merged bins %d matches number of overall bins %d" %
 573                       (len(bins), len(jsondata[combined]['bins'])))
 574
 575             for duration in bins.keys():
 576                 if bins[duration] != jsondata[combined]['bins'][duration]:
 577                     print("Merged per prio count does not match overall count for duration %d" %
 578                           duration)
 579                     return False
 580
 581         print("Merged per priority latency data match combined latency data")
 582         return True
 583
 584     def check(self):
 585         """Check test output."""
 586
 587         raise NotImplementedError()
 588
 589
 590 class Test001(FioLatTest):
 591     """Test object for Test 1."""
 592
 593     def check(self):
 594         """Check Test 1 output."""
 595
 596         job = self.json_data['jobs'][0]
 597
 598         retval = True
 599         if not self.check_empty(job['write']):
 600             print("Unexpected write data found in output")
 601             retval = False
 602         if not self.check_empty(job['trim']):
 603             print("Unexpected trim data found in output")
 604             retval = False
 605         if not self.check_nocmdprio_lat(job):
 606             print("Unexpected per priority latencies found")
 607             retval = False
 608
 609         retval &= self.check_latencies(job['read'], 0, slat=False)
 610
 611         return retval
 612
 613
 614 class Test002(FioLatTest):
 615     """Test object for Test 2."""
 616
 617     def check(self):
 618         """Check Test 2 output."""
 619
 620         job = self.json_data['jobs'][0]
 621
 622         retval = True
 623         if not self.check_empty(job['read']):
 624             print("Unexpected read data found in output")
 625             retval = False
 626         if not self.check_empty(job['trim']):
 627             print("Unexpected trim data found in output")
 628             retval = False
 629         if not self.check_nocmdprio_lat(job):
 630             print("Unexpected per priority latencies found")
 631             retval = False
 632
 633         retval &= self.check_latencies(job['write'], 1, slat=False, clat=False)
 634
 635         return retval
 636
 637
 638 class Test003(FioLatTest):
 639     """Test object for Test 3."""
 640
 641     def check(self):
 642         """Check Test 3 output."""
 643
 644         job = self.json_data['jobs'][0]
 645
 646         retval = True
 647         if not self.check_empty(job['read']):
 648             print("Unexpected read data found in output")
 649             retval = False
 650         if not self.check_empty(job['write']):
 651             print("Unexpected write data found in output")
 652             retval = False
 653         if not self.check_nocmdprio_lat(job):
 654             print("Unexpected per priority latencies found")
 655             retval = False
 656
 657         retval &= self.check_latencies(job['trim'], 2, slat=False, tlat=False)
 658
 659         return retval
 660
 661
 662 class Test004(FioLatTest):
 663     """Test object for Tests 4, 13."""
 664
 665     def check(self):
 666         """Check Test 4, 13 output."""
 667
 668         job = self.json_data['jobs'][0]
 669
 670         retval = True
 671         if not self.check_empty(job['write']):
 672             print("Unexpected write data found in output")
 673             retval = False
 674         if not self.check_empty(job['trim']):
 675             print("Unexpected trim data found in output")
 676             retval = False
 677         if not self.check_nocmdprio_lat(job):
 678             print("Unexpected per priority latencies found")
 679             retval = False
 680
 681         retval &= self.check_latencies(job['read'], 0, plus=True)
 682
 683         return retval
 684
 685
 686 class Test005(FioLatTest):
 687     """Test object for Test 5."""
 688
 689     def check(self):
 690         """Check Test 5 output."""
 691
 692         job = self.json_data['jobs'][0]
 693
 694         retval = True
 695         if not self.check_empty(job['read']):
 696             print("Unexpected read data found in output")
 697             retval = False
 698         if not self.check_empty(job['trim']):
 699             print("Unexpected trim data found in output")
 700             retval = False
 701         if not self.check_nocmdprio_lat(job):
 702             print("Unexpected per priority latencies found")
 703             retval = False
 704
 705         retval &= self.check_latencies(job['write'], 1, slat=False, plus=True)
 706
 707         return retval
 708
 709
 710 class Test006(FioLatTest):
 711     """Test object for Test 6."""
 712
 713     def check(self):
 714         """Check Test 6 output."""
 715
 716         job = self.json_data['jobs'][0]
 717
 718         retval = True
 719         if not self.check_empty(job['write']):
 720             print("Unexpected write data found in output")
 721             retval = False
 722         if not self.check_empty(job['trim']):
 723             print("Unexpected trim data found in output")
 724             retval = False
 725         if not self.check_nocmdprio_lat(job):
 726             print("Unexpected per priority latencies found")
 727             retval = False
 728
 729         retval &= self.check_latencies(job['read'], 0, slat=False, tlat=False, plus=True)
 730
 731         return retval
 732
 733
 734 class Test007(FioLatTest):
 735     """Test object for Test 7."""
 736
 737     def check(self):
 738         """Check Test 7 output."""
 739
 740         job = self.json_data['jobs'][0]
 741
 742         retval = True
 743         if not self.check_empty(job['trim']):
 744             print("Unexpected trim data found in output")
 745             retval = False
 746         if not self.check_nocmdprio_lat(job):
 747             print("Unexpected per priority latencies found")
 748             retval = False
 749
 750         retval &= self.check_latencies(job['read'], 0, clat=False, tlat=False, plus=True)
 751         retval &= self.check_latencies(job['write'], 1, clat=False, tlat=False, plus=True)
 752
 753         return retval
 754
 755
 756 class Test008(FioLatTest):
 757     """Test object for Tests 8, 14."""
 758
 759     def check(self):
 760         """Check Test 8, 14 output."""
 761
 762         job = self.json_data['jobs'][0]
 763
 764         retval = True
 765         if 'read' in job or 'write' in job or 'trim' in job:
 766             print("Unexpected data direction found in fio output")
 767             retval = False
 768         if not self.check_nocmdprio_lat(job):
 769             print("Unexpected per priority latencies found")
 770             retval = False
 771
 772         retval &= self.check_latencies(job['mixed'], 0, plus=True, unified=True)
 773
 774         return retval
 775
 776
 777 class Test009(FioLatTest):
 778     """Test object for Test 9."""
 779
 780     def check(self):
 781         """Check Test 9 output."""
 782
 783         job = self.json_data['jobs'][0]
 784
 785         retval = True
 786         if not self.check_empty(job['read']):
 787             print("Unexpected read data found in output")
 788             retval = False
 789         if not self.check_empty(job['trim']):
 790             print("Unexpected trim data found in output")
 791             retval = False
 792         if not self.check_sync_lat(job['sync'], plus=True):
 793             print("Error checking fsync latency data")
 794             retval = False
 795         if not self.check_nocmdprio_lat(job):
 796             print("Unexpected per priority latencies found")
 797             retval = False
 798
 799         retval &= self.check_latencies(job['write'], 1, slat=False, plus=True)
 800
 801         return retval
 802
 803
 804 class Test010(FioLatTest):
 805     """Test object for Test 10."""
 806
 807     def check(self):
 808         """Check Test 10 output."""
 809
 810         job = self.json_data['jobs'][0]
 811
 812         retval = True
 813         if not self.check_empty(job['trim']):
 814             print("Unexpected trim data found in output")
 815             retval = False
 816         if not self.check_nocmdprio_lat(job):
 817             print("Unexpected per priority latencies found")
 818             retval = False
 819
 820         retval &= self.check_latencies(job['read'], 0, plus=True)
 821         retval &= self.check_latencies(job['write'], 1, plus=True)
 822         retval &= self.check_terse(self.terse_data[17:34], job['read']['lat_ns']['percentile'])
 823         retval &= self.check_terse(self.terse_data[58:75], job['write']['lat_ns']['percentile'])
 824         # Terse data checking only works for default percentiles.
 825         # This needs to be changed if something other than the default is ever used.
 826
 827         return retval
 828
 829
 830 class Test011(FioLatTest):
 831     """Test object for Test 11."""
 832
 833     def check(self):
 834         """Check Test 11 output."""
 835
 836         job = self.json_data['jobs'][0]
 837
 838         retval = True
 839         if not self.check_empty(job['trim']):
 840             print("Unexpected trim data found in output")
 841             retval = False
 842         if not self.check_nocmdprio_lat(job):
 843             print("Unexpected per priority latencies found")
 844             retval = False
 845
 846         retval &= self.check_latencies(job['read'], 0, slat=False, clat=False, plus=True)
 847         retval &= self.check_latencies(job['write'], 1, slat=False, clat=False, plus=True)
 848         retval &= self.check_terse(self.terse_data[17:34], job['read']['lat_ns']['percentile'])
 849         retval &= self.check_terse(self.terse_data[58:75], job['write']['lat_ns']['percentile'])
 850         # Terse data checking only works for default percentiles.
 851         # This needs to be changed if something other than the default is ever used.
 852
 853         return retval
 854
 855
 856 class Test015(FioLatTest):
 857     """Test object for Test 15."""
 858
 859     def check(self):
 860         """Check Test 15 output."""
 861
 862         job = self.json_data['jobs'][0]
 863
 864         retval = True
 865         if not self.check_empty(job['write']):
 866             print("Unexpected write data found in output")
 867             retval = False
 868         if not self.check_empty(job['trim']):
 869             print("Unexpected trim data found in output")
 870             retval = False
 871
 872         retval &= self.check_latencies(job['read'], 0, plus=True)
 873         retval &= self.check_prio_latencies(job['read'], clat=False, plus=True)
 874
 875         return retval
 876
 877
 878 class Test016(FioLatTest):
 879     """Test object for Test 16."""
 880
 881     def check(self):
 882         """Check Test 16 output."""
 883
 884         job = self.json_data['jobs'][0]
 885
 886         retval = True
 887         if not self.check_empty(job['read']):
 888             print("Unexpected read data found in output")
 889             retval = False
 890         if not self.check_empty(job['trim']):
 891             print("Unexpected trim data found in output")
 892             retval = False
 893
 894         retval &= self.check_latencies(job['write'], 1, slat=False, plus=True)
 895         retval &= self.check_prio_latencies(job['write'], clat=False, plus=True)
 896
 897         return retval
 898
 899
 900 class Test017(FioLatTest):
 901     """Test object for Test 17."""
 902
 903     def check(self):
 904         """Check Test 17 output."""
 905
 906         job = self.json_data['jobs'][0]
 907
 908         retval = True
 909         if not self.check_empty(job['write']):
 910             print("Unexpected write data found in output")
 911             retval = False
 912         if not self.check_empty(job['trim']):
 913             print("Unexpected trim data found in output")
 914             retval = False
 915
 916         retval &= self.check_latencies(job['read'], 0, slat=False, tlat=False, plus=True)
 917         retval &= self.check_prio_latencies(job['read'], plus=True)
 918
 919         return retval
 920
 921
 922 class Test018(FioLatTest):
 923     """Test object for Test 18."""
 924
 925     def check(self):
 926         """Check Test 18 output."""
 927
 928         job = self.json_data['jobs'][0]
 929
 930         retval = True
 931         if not self.check_empty(job['trim']):
 932             print("Unexpected trim data found in output")
 933             retval = False
 934
 935         retval &= self.check_latencies(job['read'], 0, clat=False, tlat=False, plus=True)
 936         retval &= self.check_latencies(job['write'], 1, clat=False, tlat=False, plus=True)
 937
 938         # We actually have json+ data but setting plus=False below avoids checking the
 939         # json+ bins which did not exist for clat and lat because this job is run with
 940         # clat_percentiles=0, lat_percentiles=0, However, we can still check the summary
 941         # statistics
 942         retval &= self.check_prio_latencies(job['write'], plus=False)
 943         retval &= self.check_prio_latencies(job['read'], plus=False)
 944
 945         return retval
 946
 947
 948 class Test019(FioLatTest):
 949     """Test object for Tests 19, 20."""
 950
 951     def check(self):
 952         """Check Test 19, 20 output."""
 953
 954         job = self.json_data['jobs'][0]
 955
 956         retval = True
 957         if 'read' in job or 'write' in job or 'trim' in job:
 958             print("Unexpected data direction found in fio output")
 959             retval = False
 960
 961         retval &= self.check_latencies(job['mixed'], 0, plus=True, unified=True)
 962         retval &= self.check_prio_latencies(job['mixed'], clat=False, plus=True)
 963
 964         return retval
 965
 966
 967 class Test021(FioLatTest):
 968     """Test object for Test 21."""
 969
 970     def check(self):
 971         """Check Test 21 output."""
 972
 973         job = self.json_data['jobs'][0]
 974
 975         retval = True
 976         if not self.check_empty(job['trim']):
 977             print("Unexpected trim data found in output")
 978             retval = False
 979
 980         retval &= self.check_latencies(job['read'], 0, slat=False, tlat=False, plus=True)
 981         retval &= self.check_latencies(job['write'], 1, slat=False, tlat=False, plus=True)
 982         retval &= self.check_prio_latencies(job['read'], clat=True, plus=True)
 983         retval &= self.check_prio_latencies(job['write'], clat=True, plus=True)
 984
 985         return retval
 986
 987
 988 def parse_args():
 989     """Parse command-line arguments."""
 990
 991     parser = argparse.ArgumentParser()
 992     parser.add_argument('-f', '--fio', help='path to file executable (e.g., ./fio)')
 993     parser.add_argument('-a', '--artifact-root', help='artifact root directory')
 994     parser.add_argument('-d', '--debug', help='enable debug output', action='store_true')
 995     parser.add_argument('-s', '--skip', nargs='+', type=int,
 996                         help='list of test(s) to skip')
 997     parser.add_argument('-o', '--run-only', nargs='+', type=int,
 998                         help='list of test(s) to run, skipping all others')
 999     args = parser.parse_args()
1000
1001     return args
1002
1003
1004 def main():
1005     """Run tests of fio latency percentile reporting"""
1006
1007     args = parse_args()
1008
1009     artifact_root = args.artifact_root if args.artifact_root else \
1010         "latency-test-{0}".format(time.strftime("%Y%m%d-%H%M%S"))
1011     os.mkdir(artifact_root)
1012     print("Artifact directory is %s" % artifact_root)
1013
1014     if args.fio:
1015         fio = str(Path(args.fio).absolute())
1016     else:
1017         fio = 'fio'
1018     print("fio path is %s" % fio)
1019
1020     if platform.system() == 'Linux':
1021         aio = 'libaio'
1022     elif platform.system() == 'Windows':
1023         aio = 'windowsaio'
1024     else:
1025         aio = 'posixaio'
1026
1027     test_list = [
1028         {
1029             # randread, null
1030             # enable slat, clat, lat
1031             # only clat and lat will appear because
1032             # because the null ioengine is synchronous
1033             "test_id": 1,
1034             "runtime": 2,
1035             "output-format": "json",
1036             "slat_percentiles": 1,
1037             "clat_percentiles": 1,
1038             "lat_percentiles": 1,
1039             "ioengine": 'null',
1040             'rw': 'randread',
1041             "test_obj": Test001,
1042         },
1043         {
1044             # randwrite, null
1045             # enable lat only
1046             "test_id": 2,
1047             "runtime": 2,
1048             "output-format": "json",
1049             "slat_percentiles": 0,
1050             "clat_percentiles": 0,
1051             "lat_percentiles": 1,
1052             "ioengine": 'null',
1053             'rw': 'randwrite',
1054             "test_obj": Test002,
1055         },
1056         {
1057             # randtrim, null
1058             # enable clat only
1059             "test_id": 3,
1060             "runtime": 2,
1061             "output-format": "json",
1062             "slat_percentiles": 0,
1063             "clat_percentiles": 1,
1064             "lat_percentiles": 0,
1065             "ioengine": 'null',
1066             'rw': 'randtrim',
1067             "test_obj": Test003,
1068         },
1069         {
1070             # randread, aio
1071             # enable slat, clat, lat
1072             # all will appear because libaio is asynchronous
1073             "test_id": 4,
1074             "runtime": 5,
1075             "output-format": "json+",
1076             "slat_percentiles": 1,
1077             "clat_percentiles": 1,
1078             "lat_percentiles": 1,
1079             "ioengine": aio,
1080             'rw': 'randread',
1081             "test_obj": Test004,
1082         },
1083         {
1084             # randwrite, aio
1085             # enable only clat, lat
1086             "test_id": 5,
1087             "runtime": 5,
1088             "output-format": "json+",
1089             "slat_percentiles": 0,
1090             "clat_percentiles": 1,
1091             "lat_percentiles": 1,
1092             "ioengine": aio,
1093             'rw': 'randwrite',
1094             "test_obj": Test005,
1095         },
1096         {
1097             # randread, aio
1098             # by default only clat should appear
1099             "test_id": 6,
1100             "runtime": 5,
1101             "output-format": "json+",
1102             "ioengine": aio,
1103             'rw': 'randread',
1104             "test_obj": Test006,
1105         },
1106         {
1107             # 50/50 r/w, aio
1108             # enable only slat
1109             "test_id": 7,
1110             "runtime": 5,
1111             "output-format": "json+",
1112             "slat_percentiles": 1,
1113             "clat_percentiles": 0,
1114             "lat_percentiles": 0,
1115             "ioengine": aio,
1116             'rw': 'randrw',
1117             "test_obj": Test007,
1118         },
1119         {
1120             # 50/50 r/w, aio, unified_rw_reporting
1121             # enable slat, clat, lat
1122             "test_id": 8,
1123             "runtime": 5,
1124             "output-format": "json+",
1125             "slat_percentiles": 1,
1126             "clat_percentiles": 1,
1127             "lat_percentiles": 1,
1128             "ioengine": aio,
1129             'rw': 'randrw',
1130             'unified_rw_reporting': 1,
1131             "test_obj": Test008,
1132         },
1133         {
1134             # randwrite, null
1135             # enable slat, clat, lat
1136             # fsync
1137             "test_id": 9,
1138             "runtime": 2,
1139             "output-format": "json+",
1140             "slat_percentiles": 1,
1141             "clat_percentiles": 1,
1142             "lat_percentiles": 1,
1143             "ioengine": 'null',
1144             'rw': 'randwrite',
1145             'fsync': 32,
1146             "test_obj": Test009,
1147         },
1148         {
1149             # 50/50 r/w, aio
1150             # enable slat, clat, lat
1151             "test_id": 10,
1152             "runtime": 5,
1153             "output-format": "terse,json+",
1154             "slat_percentiles": 1,
1155             "clat_percentiles": 1,
1156             "lat_percentiles": 1,
1157             "ioengine": aio,
1158             'rw': 'randrw',
1159             "test_obj": Test010,
1160         },
1161         {
1162             # 50/50 r/w, aio
1163             # enable only lat
1164             "test_id": 11,
1165             "runtime": 5,
1166             "output-format": "terse,json+",
1167             "slat_percentiles": 0,
1168             "clat_percentiles": 0,
1169             "lat_percentiles": 1,
1170             "ioengine": aio,
1171             'rw': 'randrw',
1172             "test_obj": Test011,
1173         },
1174         {
1175             # randread, null
1176             # enable slat, clat, lat
1177             # only clat and lat will appear because
1178             # because the null ioengine is synchronous
1179             # same as Test 1 except add numjobs = 4 to test
1180             # sum_thread_stats() changes
1181             "test_id": 12,
1182             "runtime": 2,
1183             "output-format": "json",
1184             "slat_percentiles": 1,
1185             "clat_percentiles": 1,
1186             "lat_percentiles": 1,
1187             "ioengine": 'null',
1188             'rw': 'randread',
1189             'numjobs': 4,
1190             "test_obj": Test001,
1191         },
1192         {
1193             # randread, aio
1194             # enable slat, clat, lat
1195             # all will appear because libaio is asynchronous
1196             # same as Test 4 except add numjobs = 4 to test
1197             # sum_thread_stats() changes
1198             "test_id": 13,
1199             "runtime": 5,
1200             "output-format": "json+",
1201             "slat_percentiles": 1,
1202             "clat_percentiles": 1,
1203             "lat_percentiles": 1,
1204             "ioengine": aio,
1205             'rw': 'randread',
1206             'numjobs': 4,
1207             "test_obj": Test004,
1208         },
1209         {
1210             # 50/50 r/w, aio, unified_rw_reporting
1211             # enable slat, clat, lata
1212             # same as Test 8 except add numjobs = 4 to test
1213             # sum_thread_stats() changes
1214             "test_id": 14,
1215             "runtime": 5,
1216             "output-format": "json+",
1217             "slat_percentiles": 1,
1218             "clat_percentiles": 1,
1219             "lat_percentiles": 1,
1220             "ioengine": aio,
1221             'rw': 'randrw',
1222             'unified_rw_reporting': 1,
1223             'numjobs': 4,
1224             "test_obj": Test008,
1225         },
1226         {
1227             # randread, aio
1228             # enable slat, clat, lat
1229             # all will appear because libaio is asynchronous
1230             # same as Test 4 except add cmdprio_percentage
1231             "test_id": 15,
1232             "runtime": 5,
1233             "output-format": "json+",
1234             "slat_percentiles": 1,
1235             "clat_percentiles": 1,
1236             "lat_percentiles": 1,
1237             "ioengine": aio,
1238             'rw': 'randread',
1239             'cmdprio_percentage': 50,
1240             "test_obj": Test015,
1241         },
1242         {
1243             # randwrite, aio
1244             # enable only clat, lat
1245             # same as Test 5 except add cmdprio_percentage
1246             "test_id": 16,
1247             "runtime": 5,
1248             "output-format": "json+",
1249             "slat_percentiles": 0,
1250             "clat_percentiles": 1,
1251             "lat_percentiles": 1,
1252             "ioengine": aio,
1253             'rw': 'randwrite',
1254             'cmdprio_percentage': 50,
1255             "test_obj": Test016,
1256         },
1257         {
1258             # randread, aio
1259             # by default only clat should appear
1260             # same as Test 6 except add cmdprio_percentage
1261             "test_id": 17,
1262             "runtime": 5,
1263             "output-format": "json+",
1264             "ioengine": aio,
1265             'rw': 'randread',
1266             'cmdprio_percentage': 50,
1267             "test_obj": Test017,
1268         },
1269         {
1270             # 50/50 r/w, aio
1271             # enable only slat
1272             # same as Test 7 except add cmdprio_percentage
1273             "test_id": 18,
1274             "runtime": 5,
1275             "output-format": "json+",
1276             "slat_percentiles": 1,
1277             "clat_percentiles": 0,
1278             "lat_percentiles": 0,
1279             "ioengine": aio,
1280             'rw': 'randrw',
1281             'cmdprio_percentage': 50,
1282             "test_obj": Test018,
1283         },
1284         {
1285             # 50/50 r/w, aio, unified_rw_reporting
1286             # enable slat, clat, lat
1287             # same as Test 8 except add cmdprio_percentage
1288             "test_id": 19,
1289             "runtime": 5,
1290             "output-format": "json+",
1291             "slat_percentiles": 1,
1292             "clat_percentiles": 1,
1293             "lat_percentiles": 1,
1294             "ioengine": aio,
1295             'rw': 'randrw',
1296             'unified_rw_reporting': 1,
1297             'cmdprio_percentage': 50,
1298             "test_obj": Test019,
1299         },
1300         {
1301             # 50/50 r/w, aio, unified_rw_reporting
1302             # enable slat, clat, lat
1303             # same as Test 19 except add numjobs = 4 to test
1304             # sum_thread_stats() changes
1305             "test_id": 20,
1306             "runtime": 5,
1307             "output-format": "json+",
1308             "slat_percentiles": 1,
1309             "clat_percentiles": 1,
1310             "lat_percentiles": 1,
1311             "ioengine": aio,
1312             'rw': 'randrw',
1313             'unified_rw_reporting': 1,
1314             'cmdprio_percentage': 50,
1315             'numjobs': 4,
1316             "test_obj": Test019,
1317         },
1318         {
1319             # r/w, aio
1320             # enable only clat
1321             # test bssplit and cmdprio_bssplit
1322             "test_id": 21,
1323             "runtime": 5,
1324             "output-format": "json+",
1325             "slat_percentiles": 0,
1326             "clat_percentiles": 1,
1327             "lat_percentiles": 0,
1328             "ioengine": aio,
1329             'rw': 'randrw',
1330             'bssplit': '64k/40:1024k/60',
1331             'cmdprio_bssplit': '64k/25/1/1:64k/75/3/2:1024k/0',
1332             "test_obj": Test021,
1333         },
1334         {
1335             # r/w, aio
1336             # enable only clat
1337             # same as Test 21 except add numjobs = 4 to test
1338             # sum_thread_stats() changes
1339             "test_id": 22,
1340             "runtime": 5,
1341             "output-format": "json+",
1342             "slat_percentiles": 0,
1343             "clat_percentiles": 1,
1344             "lat_percentiles": 0,
1345             "ioengine": aio,
1346             'rw': 'randrw',
1347             'bssplit': '64k/40:1024k/60',
1348             'cmdprio_bssplit': '64k/25/1/1:64k/75/3/2:1024k/0',
1349             'numjobs': 4,
1350             "test_obj": Test021,
1351         },
1352     ]
1353
1354     passed = 0
1355     failed = 0
1356     skipped = 0
1357
1358     for test in test_list:
1359         if (args.skip and test['test_id'] in args.skip) or \
1360            (args.run_only and test['test_id'] not in args.run_only):
1361             skipped = skipped + 1
1362             outcome = 'SKIPPED (User request)'
1363         elif (platform.system() != 'Linux' or os.geteuid() != 0) and \
1364              ('cmdprio_percentage' in test or 'cmdprio_bssplit' in test):
1365             skipped = skipped + 1
1366             outcome = 'SKIPPED (Linux root required for cmdprio tests)'
1367         else:
1368             test_obj = test['test_obj'](artifact_root, test, args.debug)
1369             status = test_obj.run_fio(fio)
1370             if status:
1371                 status = test_obj.check()
1372             if status:
1373                 passed = passed + 1
1374                 outcome = 'PASSED'
1375             else:
1376                 failed = failed + 1
1377                 outcome = 'FAILED'
1378
1379         print("**********Test {0} {1}**********".format(test['test_id'], outcome))
1380
1381     print("{0} tests passed, {1} failed, {2} skipped".format(passed, failed, skipped))
1382
1383     sys.exit(failed)
1384
1385
1386 if __name__ == '__main__':
1387     main()