fio: ioengine flag cleanup
[fio.git] / t / latency_percentiles.py
CommitLineData
20e70a59
VF
1#!/usr/bin/env python3
2# SPDX-License-Identifier: GPL-2.0-only
3#
4# Copyright (c) 2020 Western Digital Corporation or its affiliates.
5#
6"""
7# latency_percentiles.py
8#
9# Test the code that produces latency percentiles
10# This is mostly to test the code changes to allow reporting
11# of slat, clat, and lat percentiles
12#
13# USAGE
14# python3 latency-tests.py [-f fio-path] [-a artifact-root] [--debug]
15#
16#
17# Test scenarios:
18#
19# - DONE json
20# unified rw reporting
21# compare with latency log
22# try various combinations of the ?lat_percentile options
23# null, aio
24# r, w, t
25# - DONE json+
26# check presence of latency bins
27# if the json percentiles match those from the raw data
28# then the latency bin values and counts are probably ok
29# - DONE terse
30# produce both terse, JSON output and confirm that they match
31# lat only; both lat and clat
32# - DONE sync_lat
33# confirm that sync_lat data appears
34# - MANUAL TESTING normal output:
35# null ioengine
36# enable all, but only clat and lat appear
37# enable subset of latency types
38# read, write, trim, unified
39# libaio ioengine
40# enable all latency types
41# enable subset of latency types
42# read, write, trim, unified
43# ./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
44# --ioengine=null --slat_percentiles=1 --clat_percentiles=1 --lat_percentiles=1
45# echo confirm that clat and lat percentiles appear
46# ./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
47# --ioengine=null --slat_percentiles=0 --clat_percentiles=0 --lat_percentiles=1
48# echo confirm that only lat percentiles appear
49# ./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
50# --ioengine=null --slat_percentiles=0 --clat_percentiles=1 --lat_percentiles=0
51# echo confirm that only clat percentiles appear
52# ./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
53# --ioengine=libaio --slat_percentiles=1 --clat_percentiles=1 --lat_percentiles=1
54# echo confirm that slat, clat, lat percentiles appear
55# ./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
56# --ioengine=libaio --slat_percentiles=0 --clat_percentiles=1 --lat_percentiles=1
57# echo confirm that clat and lat percentiles appear
58# ./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
59# --ioengine=libaio -rw=randrw
60# echo confirm that clat percentiles appear for reads and writes
61# ./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
62# --ioengine=libaio --slat_percentiles=1 --clat_percentiles=0 --lat_percentiles=0 --rw=randrw
63# echo confirm that slat percentiles appear for both reads and writes
64# ./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
65# --ioengine=libaio --slat_percentiles=1 --clat_percentiles=1 --lat_percentiles=1 \
66# --rw=randrw --unified_rw_reporting=1
67# echo confirm that slat, clat, and lat percentiles appear for 'mixed' IOs
68#./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
69# --ioengine=null --slat_percentiles=1 --clat_percentiles=1 --lat_percentiles=1 \
70# --rw=randrw --fsync=32
71# echo confirm that fsync latencies appear
72"""
73
74import os
75import csv
76import sys
77import json
78import math
79import time
80import argparse
81import platform
82import subprocess
1cbbba65 83from collections import Counter
20e70a59
VF
84from pathlib import Path
85
86
87class FioLatTest():
88 """fio latency percentile test."""
89
90 def __init__(self, artifact_root, test_options, debug):
91 """
92 artifact_root root directory for artifacts (subdirectory will be created under here)
93 test test specification
94 """
95 self.artifact_root = artifact_root
96 self.test_options = test_options
97 self.debug = debug
98 self.filename = None
99 self.json_data = None
100 self.terse_data = None
101
102 self.test_dir = os.path.join(self.artifact_root,
103 "{:03d}".format(self.test_options['test_id']))
104 if not os.path.exists(self.test_dir):
105 os.mkdir(self.test_dir)
106
107 self.filename = "latency{:03d}".format(self.test_options['test_id'])
108
109 def run_fio(self, fio_path):
110 """Run a test."""
111
112 fio_args = [
771dbb52 113 "--max-jobs=16",
20e70a59
VF
114 "--name=latency",
115 "--randrepeat=0",
116 "--norandommap",
117 "--time_based",
118 "--size=16M",
119 "--rwmixread=50",
120 "--group_reporting=1",
121 "--write_lat_log={0}".format(self.filename),
122 "--output={0}.out".format(self.filename),
123 "--ioengine={ioengine}".format(**self.test_options),
124 "--rw={rw}".format(**self.test_options),
125 "--runtime={runtime}".format(**self.test_options),
126 "--output-format={output-format}".format(**self.test_options),
127 ]
128 for opt in ['slat_percentiles', 'clat_percentiles', 'lat_percentiles',
f79e4dea
NC
129 'unified_rw_reporting', 'fsync', 'fdatasync', 'numjobs',
130 'cmdprio_percentage', 'bssplit', 'cmdprio_bssplit']:
20e70a59
VF
131 if opt in self.test_options:
132 option = '--{0}={{{0}}}'.format(opt)
133 fio_args.append(option.format(**self.test_options))
134
135 command = [fio_path] + fio_args
136 with open(os.path.join(self.test_dir, "{0}.command".format(self.filename)), "w+") as \
137 command_file:
138 command_file.write("%s\n" % command)
139
140 passed = True
141 stdout_file = open(os.path.join(self.test_dir, "{0}.stdout".format(self.filename)), "w+")
142 stderr_file = open(os.path.join(self.test_dir, "{0}.stderr".format(self.filename)), "w+")
143 exitcode_file = open(os.path.join(self.test_dir,
144 "{0}.exitcode".format(self.filename)), "w+")
145 try:
146 proc = None
147 # Avoid using subprocess.run() here because when a timeout occurs,
148 # fio will be stopped with SIGKILL. This does not give fio a
149 # chance to clean up and means that child processes may continue
150 # running and submitting IO.
151 proc = subprocess.Popen(command,
152 stdout=stdout_file,
153 stderr=stderr_file,
154 cwd=self.test_dir,
155 universal_newlines=True)
156 proc.communicate(timeout=300)
157 exitcode_file.write('{0}\n'.format(proc.returncode))
158 passed &= (proc.returncode == 0)
159 except subprocess.TimeoutExpired:
160 proc.terminate()
161 proc.communicate()
162 assert proc.poll()
163 print("Timeout expired")
164 passed = False
165 except Exception:
166 if proc:
167 if not proc.poll():
168 proc.terminate()
169 proc.communicate()
170 print("Exception: %s" % sys.exc_info())
171 passed = False
172 finally:
173 stdout_file.close()
174 stderr_file.close()
175 exitcode_file.close()
176
177 if passed:
178 if 'json' in self.test_options['output-format']:
179 if not self.get_json():
180 print('Unable to decode JSON data')
181 passed = False
182 if 'terse' in self.test_options['output-format']:
183 if not self.get_terse():
184 print('Unable to decode terse data')
185 passed = False
186
187 return passed
188
189 def get_json(self):
190 """Convert fio JSON output into a python JSON object"""
191
192 filename = os.path.join(self.test_dir, "{0}.out".format(self.filename))
193 with open(filename, 'r') as file:
194 file_data = file.read()
195
196 #
197 # Sometimes fio informational messages are included at the top of the
198 # JSON output, especially under Windows. Try to decode output as JSON
199 # data, lopping off up to the first four lines
200 #
201 lines = file_data.splitlines()
202 for i in range(5):
203 file_data = '\n'.join(lines[i:])
204 try:
205 self.json_data = json.loads(file_data)
206 except json.JSONDecodeError:
207 continue
208 else:
209 return True
210
211 return False
212
213 def get_terse(self):
214 """Read fio output and return terse format data."""
215
216 filename = os.path.join(self.test_dir, "{0}.out".format(self.filename))
217 with open(filename, 'r') as file:
218 file_data = file.read()
219
220 #
84f9318f 221 # Read the first few lines and see if any of them begin with '3;'
20e70a59
VF
222 # If so, the line is probably terse output. Obviously, this only
223 # works for fio terse version 3 and it does not work for
224 # multi-line terse output
225 #
226 lines = file_data.splitlines()
227 for i in range(8):
228 file_data = lines[i]
84f9318f 229 if file_data.startswith('3;'):
20e70a59
VF
230 self.terse_data = file_data.split(';')
231 return True
232
233 return False
234
235 def check_latencies(self, jsondata, ddir, slat=True, clat=True, tlat=True, plus=False,
236 unified=False):
237 """Check fio latency data.
238
239 ddir data direction to check (0=read, 1=write, 2=trim)
240 slat True if submission latency data available to check
241 clat True if completion latency data available to check
242 tlat True of total latency data available to check
243 plus True if we actually have json+ format data where additional checks can
244 be carried out
245 unified True if fio is reporting unified r/w data
246 """
247
248 types = {
249 'slat': slat,
250 'clat': clat,
251 'lat': tlat
252 }
253
254 retval = True
255
256 for lat in ['slat', 'clat', 'lat']:
257 this_iter = True
258 if not types[lat]:
259 if 'percentile' in jsondata[lat+'_ns']:
260 this_iter = False
261 print('unexpected %s percentiles found' % lat)
262 else:
263 print("%s percentiles skipped" % lat)
264 continue
265 else:
266 if 'percentile' not in jsondata[lat+'_ns']:
267 this_iter = False
268 print('%s percentiles not found in fio output' % lat)
269
270 #
271 # Check only for the presence/absence of json+
272 # latency bins. Future work can check the
fc002f14 273 # accuracy of the bin values and counts.
20e70a59
VF
274 #
275 # Because the latency percentiles are based on
276 # the bins, we can be confident that the bin
277 # values and counts are correct if fio's
278 # latency percentiles match what we compute
279 # from the raw data.
280 #
281 if plus:
282 if 'bins' not in jsondata[lat+'_ns']:
283 print('bins not found with json+ output format')
284 this_iter = False
285 else:
286 if not self.check_jsonplus(jsondata[lat+'_ns']):
287 this_iter = False
288 else:
289 if 'bins' in jsondata[lat+'_ns']:
290 print('json+ bins found with json output format')
291 this_iter = False
292
293 latencies = []
294 for i in range(10):
295 lat_file = os.path.join(self.test_dir, "%s_%s.%s.log" % (self.filename, lat, i+1))
296 if not os.path.exists(lat_file):
297 break
298 with open(lat_file, 'r', newline='') as file:
299 reader = csv.reader(file)
300 for line in reader:
301 if unified or int(line[2]) == ddir:
302 latencies.append(int(line[1]))
303
304 if int(jsondata['total_ios']) != len(latencies):
305 this_iter = False
306 print('%s: total_ios = %s, latencies logged = %d' % \
307 (lat, jsondata['total_ios'], len(latencies)))
308 elif self.debug:
309 print("total_ios %s match latencies logged" % jsondata['total_ios'])
310
311 latencies.sort()
312 ptiles = jsondata[lat+'_ns']['percentile']
313
314 for percentile in ptiles.keys():
315 #
316 # numpy.percentile(latencies, float(percentile),
317 # interpolation='higher')
318 # produces values that mostly match what fio reports
319 # however, in the tails of the distribution, the values produced
320 # by fio's and numpy.percentile's algorithms are occasionally off
321 # by one latency measurement. So instead of relying on the canned
322 # numpy.percentile routine, implement here fio's algorithm
323 #
324 rank = math.ceil(float(percentile)/100 * len(latencies))
325 if rank > 0:
326 index = rank - 1
327 else:
328 index = 0
329 value = latencies[int(index)]
330 fio_val = int(ptiles[percentile])
331 # The theory in stat.h says that the proportional error will be
332 # less than 1/128
333 if not self.similar(fio_val, value):
334 delta = abs(fio_val - value) / value
335 print("Error with %s %sth percentile: "
336 "fio: %d, expected: %d, proportional delta: %f" %
337 (lat, percentile, fio_val, value, delta))
338 print("Rank: %d, index: %d" % (rank, index))
339 this_iter = False
340 elif self.debug:
341 print('%s %sth percentile values match: %d, %d' %
342 (lat, percentile, fio_val, value))
343
344 if this_iter:
345 print("%s percentiles match" % lat)
346 else:
347 retval = False
348
349 return retval
350
351 @staticmethod
352 def check_empty(job):
353 """
354 Make sure JSON data is empty.
355
356 Some data structures should be empty. This function makes sure that they are.
357
358 job JSON object that we need to check for emptiness
359 """
360
361 return job['total_ios'] == 0 and \
362 job['slat_ns']['N'] == 0 and \
363 job['clat_ns']['N'] == 0 and \
364 job['lat_ns']['N'] == 0
365
366 def check_nocmdprio_lat(self, job):
367 """
1cbbba65 368 Make sure no per priority latencies appear.
20e70a59
VF
369
370 job JSON object to check
371 """
372
373 for ddir in ['read', 'write', 'trim']:
374 if ddir in job:
1cbbba65
NC
375 if 'prios' in job[ddir]:
376 print("Unexpected per priority latencies found in %s output" % ddir)
20e70a59
VF
377 return False
378
379 if self.debug:
1cbbba65 380 print("No per priority latencies found")
20e70a59
VF
381
382 return True
383
384 @staticmethod
385 def similar(approximation, actual):
386 """
387 Check whether the approximate values recorded by fio are within the theoretical bound.
388
389 Since it is impractical to store exact latency measurements for each and every IO, fio
390 groups similar latency measurements into variable-sized bins. The theory in stat.h says
391 that the proportional error will be less than 1/128. This function checks whether this
392 is true.
393
394 TODO This test will fail when comparing a value from the largest latency bin against its
395 actual measurement. Find some way to detect this and avoid failing.
396
397 approximation value of the bin used by fio to store a given latency
398 actual actual latency value
399 """
9b37832d
VF
400
401 # Avoid a division by zero. The smallest latency values have no error.
402 if actual == 0:
403 return approximation == 0
404
20e70a59
VF
405 delta = abs(approximation - actual) / actual
406 return delta <= 1/128
407
408 def check_jsonplus(self, jsondata):
409 """Check consistency of json+ data
410
411 When we have json+ data we can check the min value, max value, and
412 sample size reported by fio
413
414 jsondata json+ data that we need to check
415 """
416
417 retval = True
418
419 keys = [int(k) for k in jsondata['bins'].keys()]
420 values = [int(jsondata['bins'][k]) for k in jsondata['bins'].keys()]
421 smallest = min(keys)
422 biggest = max(keys)
423 sampsize = sum(values)
424
425 if not self.similar(jsondata['min'], smallest):
426 retval = False
427 print('reported min %d does not match json+ min %d' % (jsondata['min'], smallest))
428 elif self.debug:
429 print('json+ min values match: %d' % jsondata['min'])
430
431 if not self.similar(jsondata['max'], biggest):
432 retval = False
433 print('reported max %d does not match json+ max %d' % (jsondata['max'], biggest))
434 elif self.debug:
435 print('json+ max values match: %d' % jsondata['max'])
436
437 if sampsize != jsondata['N']:
438 retval = False
439 print('reported sample size %d does not match json+ total count %d' % \
440 (jsondata['N'], sampsize))
441 elif self.debug:
442 print('json+ sample sizes match: %d' % sampsize)
443
444 return retval
445
446 def check_sync_lat(self, jsondata, plus=False):
447 """Check fsync latency percentile data.
448
449 All we can check is that some percentiles are reported, unless we have json+ data.
450 If we actually have json+ data then we can do more checking.
451
452 jsondata JSON data for fsync operations
453 plus True if we actually have json+ data
454 """
455 retval = True
456
457 if 'percentile' not in jsondata['lat_ns']:
458 print("Sync percentile data not found")
459 return False
460
461 if int(jsondata['total_ios']) != int(jsondata['lat_ns']['N']):
462 retval = False
463 print('Mismatch between total_ios and lat_ns sample size')
464 elif self.debug:
465 print('sync sample sizes match: %d' % jsondata['total_ios'])
466
467 if not plus:
468 if 'bins' in jsondata['lat_ns']:
469 print('Unexpected json+ bin data found')
470 return False
471
472 if not self.check_jsonplus(jsondata['lat_ns']):
473 retval = False
474
475 return retval
476
477 def check_terse(self, terse, jsondata):
478 """Compare terse latencies with JSON latencies.
479
480 terse terse format data for checking
481 jsondata JSON format data for checking
482 """
483
484 retval = True
485
486 for lat in terse:
487 split = lat.split('%')
488 pct = split[0]
489 terse_val = int(split[1][1:])
490 json_val = math.floor(jsondata[pct]/1000)
491 if terse_val != json_val:
492 retval = False
493 print('Mismatch with %sth percentile: json value=%d,%d terse value=%d' % \
494 (pct, jsondata[pct], json_val, terse_val))
495 elif self.debug:
496 print('Terse %sth percentile matches JSON value: %d' % (pct, terse_val))
497
498 return retval
499
500 def check_prio_latencies(self, jsondata, clat=True, plus=False):
1cbbba65 501 """Check consistency of per priority latencies.
20e70a59
VF
502
503 clat True if we should check clat data; other check lat data
504 plus True if we have json+ format data where additional checks can
505 be carried out
506 unified True if fio is reporting unified r/w data
507 """
508
509 if clat:
1cbbba65 510 obj = combined = 'clat_ns'
20e70a59 511 else:
1cbbba65 512 obj = combined = 'lat_ns'
20e70a59 513
1cbbba65
NC
514 if not 'prios' in jsondata or not combined in jsondata:
515 print("Error identifying per priority latencies")
20e70a59
VF
516 return False
517
1cbbba65
NC
518 sum_sample_size = sum([x[obj]['N'] for x in jsondata['prios']])
519 if sum_sample_size != jsondata[combined]['N']:
520 print("Per prio sample size sum %d != combined sample size %d" %
521 (sum_sample_size, jsondata[combined]['N']))
20e70a59
VF
522 return False
523 elif self.debug:
1cbbba65
NC
524 print("Per prio sample size sum %d == combined sample size %d" %
525 (sum_sample_size, jsondata[combined]['N']))
20e70a59 526
1cbbba65
NC
527 min_val = min([x[obj]['min'] for x in jsondata['prios']])
528 if min_val != jsondata[combined]['min']:
529 print("Min per prio min latency %d does not match min %d from combined data" %
530 (min_val, jsondata[combined]['min']))
20e70a59
VF
531 return False
532 elif self.debug:
1cbbba65
NC
533 print("Min per prio min latency %d matches min %d from combined data" %
534 (min_val, jsondata[combined]['min']))
20e70a59 535
1cbbba65
NC
536 max_val = max([x[obj]['max'] for x in jsondata['prios']])
537 if max_val != jsondata[combined]['max']:
538 print("Max per prio max latency %d does not match max %d from combined data" %
539 (max_val, jsondata[combined]['max']))
20e70a59
VF
540 return False
541 elif self.debug:
1cbbba65
NC
542 print("Max per prio max latency %d matches max %d from combined data" %
543 (max_val, jsondata[combined]['max']))
20e70a59 544
1cbbba65
NC
545 weighted_vals = [x[obj]['mean'] * x[obj]['N'] for x in jsondata['prios']]
546 weighted_avg = sum(weighted_vals) / jsondata[combined]['N']
20e70a59
VF
547 delta = abs(weighted_avg - jsondata[combined]['mean'])
548 if (delta / jsondata[combined]['mean']) > 0.0001:
1cbbba65 549 print("Difference between merged per prio weighted average %f mean "
20e70a59
VF
550 "and actual mean %f exceeds 0.01%%" % (weighted_avg, jsondata[combined]['mean']))
551 return False
552 elif self.debug:
1cbbba65
NC
553 print("Merged per prio weighted average %f mean matches actual mean %f" %
554 (weighted_avg, jsondata[combined]['mean']))
20e70a59
VF
555
556 if plus:
1cbbba65
NC
557 for prio in jsondata['prios']:
558 if not self.check_jsonplus(prio[obj]):
559 return False
20e70a59 560
1cbbba65
NC
561 counter = Counter()
562 for prio in jsondata['prios']:
563 counter.update(prio[obj]['bins'])
564
565 bins = dict(counter)
20e70a59
VF
566
567 if len(bins) != len(jsondata[combined]['bins']):
1cbbba65
NC
568 print("Number of merged bins %d does not match number of overall bins %d" %
569 (len(bins), len(jsondata[combined]['bins'])))
20e70a59
VF
570 return False
571 elif self.debug:
1cbbba65
NC
572 print("Number of merged bins %d matches number of overall bins %d" %
573 (len(bins), len(jsondata[combined]['bins'])))
20e70a59
VF
574
575 for duration in bins.keys():
576 if bins[duration] != jsondata[combined]['bins'][duration]:
1cbbba65
NC
577 print("Merged per prio count does not match overall count for duration %d" %
578 duration)
20e70a59
VF
579 return False
580
1cbbba65 581 print("Merged per priority latency data match combined latency data")
20e70a59
VF
582 return True
583
584 def check(self):
585 """Check test output."""
586
587 raise NotImplementedError()
588
589
590class Test001(FioLatTest):
591 """Test object for Test 1."""
592
593 def check(self):
594 """Check Test 1 output."""
595
596 job = self.json_data['jobs'][0]
597
598 retval = True
599 if not self.check_empty(job['write']):
600 print("Unexpected write data found in output")
601 retval = False
602 if not self.check_empty(job['trim']):
603 print("Unexpected trim data found in output")
604 retval = False
605 if not self.check_nocmdprio_lat(job):
1cbbba65 606 print("Unexpected per priority latencies found")
20e70a59
VF
607 retval = False
608
609 retval &= self.check_latencies(job['read'], 0, slat=False)
610
611 return retval
612
613
614class Test002(FioLatTest):
615 """Test object for Test 2."""
616
617 def check(self):
618 """Check Test 2 output."""
619
620 job = self.json_data['jobs'][0]
621
622 retval = True
623 if not self.check_empty(job['read']):
624 print("Unexpected read data found in output")
625 retval = False
626 if not self.check_empty(job['trim']):
627 print("Unexpected trim data found in output")
628 retval = False
629 if not self.check_nocmdprio_lat(job):
1cbbba65 630 print("Unexpected per priority latencies found")
20e70a59
VF
631 retval = False
632
633 retval &= self.check_latencies(job['write'], 1, slat=False, clat=False)
634
635 return retval
636
637
638class Test003(FioLatTest):
639 """Test object for Test 3."""
640
641 def check(self):
642 """Check Test 3 output."""
643
644 job = self.json_data['jobs'][0]
645
646 retval = True
647 if not self.check_empty(job['read']):
648 print("Unexpected read data found in output")
649 retval = False
650 if not self.check_empty(job['write']):
651 print("Unexpected write data found in output")
652 retval = False
653 if not self.check_nocmdprio_lat(job):
1cbbba65 654 print("Unexpected per priority latencies found")
20e70a59
VF
655 retval = False
656
657 retval &= self.check_latencies(job['trim'], 2, slat=False, tlat=False)
658
659 return retval
660
661
662class Test004(FioLatTest):
663 """Test object for Tests 4, 13."""
664
665 def check(self):
666 """Check Test 4, 13 output."""
667
668 job = self.json_data['jobs'][0]
669
670 retval = True
671 if not self.check_empty(job['write']):
672 print("Unexpected write data found in output")
673 retval = False
674 if not self.check_empty(job['trim']):
675 print("Unexpected trim data found in output")
676 retval = False
677 if not self.check_nocmdprio_lat(job):
1cbbba65 678 print("Unexpected per priority latencies found")
20e70a59
VF
679 retval = False
680
681 retval &= self.check_latencies(job['read'], 0, plus=True)
682
683 return retval
684
685
686class Test005(FioLatTest):
687 """Test object for Test 5."""
688
689 def check(self):
690 """Check Test 5 output."""
691
692 job = self.json_data['jobs'][0]
693
694 retval = True
695 if not self.check_empty(job['read']):
696 print("Unexpected read data found in output")
697 retval = False
698 if not self.check_empty(job['trim']):
699 print("Unexpected trim data found in output")
700 retval = False
701 if not self.check_nocmdprio_lat(job):
1cbbba65 702 print("Unexpected per priority latencies found")
20e70a59
VF
703 retval = False
704
705 retval &= self.check_latencies(job['write'], 1, slat=False, plus=True)
706
707 return retval
708
709
710class Test006(FioLatTest):
711 """Test object for Test 6."""
712
713 def check(self):
714 """Check Test 6 output."""
715
716 job = self.json_data['jobs'][0]
717
718 retval = True
719 if not self.check_empty(job['write']):
720 print("Unexpected write data found in output")
721 retval = False
722 if not self.check_empty(job['trim']):
723 print("Unexpected trim data found in output")
724 retval = False
725 if not self.check_nocmdprio_lat(job):
1cbbba65 726 print("Unexpected per priority latencies found")
20e70a59
VF
727 retval = False
728
729 retval &= self.check_latencies(job['read'], 0, slat=False, tlat=False, plus=True)
730
731 return retval
732
733
734class Test007(FioLatTest):
735 """Test object for Test 7."""
736
737 def check(self):
738 """Check Test 7 output."""
739
740 job = self.json_data['jobs'][0]
741
742 retval = True
743 if not self.check_empty(job['trim']):
744 print("Unexpected trim data found in output")
745 retval = False
746 if not self.check_nocmdprio_lat(job):
1cbbba65 747 print("Unexpected per priority latencies found")
20e70a59
VF
748 retval = False
749
750 retval &= self.check_latencies(job['read'], 0, clat=False, tlat=False, plus=True)
751 retval &= self.check_latencies(job['write'], 1, clat=False, tlat=False, plus=True)
752
753 return retval
754
755
756class Test008(FioLatTest):
757 """Test object for Tests 8, 14."""
758
759 def check(self):
760 """Check Test 8, 14 output."""
761
762 job = self.json_data['jobs'][0]
763
764 retval = True
f79e4dea 765 if 'read' in job or 'write' in job or 'trim' in job:
20e70a59
VF
766 print("Unexpected data direction found in fio output")
767 retval = False
768 if not self.check_nocmdprio_lat(job):
1cbbba65 769 print("Unexpected per priority latencies found")
20e70a59
VF
770 retval = False
771
772 retval &= self.check_latencies(job['mixed'], 0, plus=True, unified=True)
773
774 return retval
775
776
777class Test009(FioLatTest):
778 """Test object for Test 9."""
779
780 def check(self):
781 """Check Test 9 output."""
782
783 job = self.json_data['jobs'][0]
784
785 retval = True
786 if not self.check_empty(job['read']):
787 print("Unexpected read data found in output")
788 retval = False
789 if not self.check_empty(job['trim']):
790 print("Unexpected trim data found in output")
791 retval = False
792 if not self.check_sync_lat(job['sync'], plus=True):
793 print("Error checking fsync latency data")
794 retval = False
795 if not self.check_nocmdprio_lat(job):
1cbbba65 796 print("Unexpected per priority latencies found")
20e70a59
VF
797 retval = False
798
799 retval &= self.check_latencies(job['write'], 1, slat=False, plus=True)
800
801 return retval
802
803
804class Test010(FioLatTest):
805 """Test object for Test 10."""
806
807 def check(self):
808 """Check Test 10 output."""
809
810 job = self.json_data['jobs'][0]
811
812 retval = True
813 if not self.check_empty(job['trim']):
814 print("Unexpected trim data found in output")
815 retval = False
816 if not self.check_nocmdprio_lat(job):
1cbbba65 817 print("Unexpected per priority latencies found")
20e70a59
VF
818 retval = False
819
820 retval &= self.check_latencies(job['read'], 0, plus=True)
821 retval &= self.check_latencies(job['write'], 1, plus=True)
822 retval &= self.check_terse(self.terse_data[17:34], job['read']['lat_ns']['percentile'])
823 retval &= self.check_terse(self.terse_data[58:75], job['write']['lat_ns']['percentile'])
824 # Terse data checking only works for default percentiles.
825 # This needs to be changed if something other than the default is ever used.
826
827 return retval
828
829
830class Test011(FioLatTest):
831 """Test object for Test 11."""
832
833 def check(self):
834 """Check Test 11 output."""
835
836 job = self.json_data['jobs'][0]
837
838 retval = True
839 if not self.check_empty(job['trim']):
840 print("Unexpected trim data found in output")
841 retval = False
842 if not self.check_nocmdprio_lat(job):
1cbbba65 843 print("Unexpected per priority latencies found")
20e70a59
VF
844 retval = False
845
846 retval &= self.check_latencies(job['read'], 0, slat=False, clat=False, plus=True)
847 retval &= self.check_latencies(job['write'], 1, slat=False, clat=False, plus=True)
848 retval &= self.check_terse(self.terse_data[17:34], job['read']['lat_ns']['percentile'])
849 retval &= self.check_terse(self.terse_data[58:75], job['write']['lat_ns']['percentile'])
850 # Terse data checking only works for default percentiles.
851 # This needs to be changed if something other than the default is ever used.
852
853 return retval
854
855
856class Test015(FioLatTest):
857 """Test object for Test 15."""
858
859 def check(self):
860 """Check Test 15 output."""
861
862 job = self.json_data['jobs'][0]
863
864 retval = True
865 if not self.check_empty(job['write']):
866 print("Unexpected write data found in output")
867 retval = False
868 if not self.check_empty(job['trim']):
869 print("Unexpected trim data found in output")
870 retval = False
871
872 retval &= self.check_latencies(job['read'], 0, plus=True)
873 retval &= self.check_prio_latencies(job['read'], clat=False, plus=True)
874
875 return retval
876
877
878class Test016(FioLatTest):
879 """Test object for Test 16."""
880
881 def check(self):
882 """Check Test 16 output."""
883
884 job = self.json_data['jobs'][0]
885
886 retval = True
887 if not self.check_empty(job['read']):
888 print("Unexpected read data found in output")
889 retval = False
890 if not self.check_empty(job['trim']):
891 print("Unexpected trim data found in output")
892 retval = False
893
894 retval &= self.check_latencies(job['write'], 1, slat=False, plus=True)
895 retval &= self.check_prio_latencies(job['write'], clat=False, plus=True)
896
897 return retval
898
899
900class Test017(FioLatTest):
901 """Test object for Test 17."""
902
903 def check(self):
904 """Check Test 17 output."""
905
906 job = self.json_data['jobs'][0]
907
908 retval = True
909 if not self.check_empty(job['write']):
910 print("Unexpected write data found in output")
911 retval = False
912 if not self.check_empty(job['trim']):
913 print("Unexpected trim data found in output")
914 retval = False
915
916 retval &= self.check_latencies(job['read'], 0, slat=False, tlat=False, plus=True)
917 retval &= self.check_prio_latencies(job['read'], plus=True)
918
919 return retval
920
921
922class Test018(FioLatTest):
923 """Test object for Test 18."""
924
925 def check(self):
926 """Check Test 18 output."""
927
928 job = self.json_data['jobs'][0]
929
930 retval = True
931 if not self.check_empty(job['trim']):
932 print("Unexpected trim data found in output")
933 retval = False
934
935 retval &= self.check_latencies(job['read'], 0, clat=False, tlat=False, plus=True)
936 retval &= self.check_latencies(job['write'], 1, clat=False, tlat=False, plus=True)
937
938 # We actually have json+ data but setting plus=False below avoids checking the
939 # json+ bins which did not exist for clat and lat because this job is run with
940 # clat_percentiles=0, lat_percentiles=0, However, we can still check the summary
941 # statistics
942 retval &= self.check_prio_latencies(job['write'], plus=False)
943 retval &= self.check_prio_latencies(job['read'], plus=False)
944
945 return retval
946
947
948class Test019(FioLatTest):
949 """Test object for Tests 19, 20."""
950
951 def check(self):
952 """Check Test 19, 20 output."""
953
954 job = self.json_data['jobs'][0]
955
956 retval = True
f79e4dea 957 if 'read' in job or 'write' in job or 'trim' in job:
20e70a59
VF
958 print("Unexpected data direction found in fio output")
959 retval = False
960
961 retval &= self.check_latencies(job['mixed'], 0, plus=True, unified=True)
962 retval &= self.check_prio_latencies(job['mixed'], clat=False, plus=True)
963
964 return retval
965
966
f79e4dea
NC
967class Test021(FioLatTest):
968 """Test object for Test 21."""
969
970 def check(self):
971 """Check Test 21 output."""
972
973 job = self.json_data['jobs'][0]
974
975 retval = True
976 if not self.check_empty(job['trim']):
977 print("Unexpected trim data found in output")
978 retval = False
979
980 retval &= self.check_latencies(job['read'], 0, slat=False, tlat=False, plus=True)
981 retval &= self.check_latencies(job['write'], 1, slat=False, tlat=False, plus=True)
982 retval &= self.check_prio_latencies(job['read'], clat=True, plus=True)
983 retval &= self.check_prio_latencies(job['write'], clat=True, plus=True)
984
985 return retval
986
987
20e70a59
VF
988def parse_args():
989 """Parse command-line arguments."""
990
991 parser = argparse.ArgumentParser()
992 parser.add_argument('-f', '--fio', help='path to file executable (e.g., ./fio)')
993 parser.add_argument('-a', '--artifact-root', help='artifact root directory')
994 parser.add_argument('-d', '--debug', help='enable debug output', action='store_true')
995 parser.add_argument('-s', '--skip', nargs='+', type=int,
996 help='list of test(s) to skip')
997 parser.add_argument('-o', '--run-only', nargs='+', type=int,
998 help='list of test(s) to run, skipping all others')
999 args = parser.parse_args()
1000
1001 return args
1002
1003
1004def main():
1005 """Run tests of fio latency percentile reporting"""
1006
1007 args = parse_args()
1008
1009 artifact_root = args.artifact_root if args.artifact_root else \
1010 "latency-test-{0}".format(time.strftime("%Y%m%d-%H%M%S"))
1011 os.mkdir(artifact_root)
1012 print("Artifact directory is %s" % artifact_root)
1013
1014 if args.fio:
1015 fio = str(Path(args.fio).absolute())
1016 else:
1017 fio = 'fio'
1018 print("fio path is %s" % fio)
1019
1020 if platform.system() == 'Linux':
1021 aio = 'libaio'
1022 elif platform.system() == 'Windows':
1023 aio = 'windowsaio'
1024 else:
1025 aio = 'posixaio'
1026
1027 test_list = [
1028 {
1029 # randread, null
1030 # enable slat, clat, lat
1031 # only clat and lat will appear because
f79e4dea 1032 # because the null ioengine is synchronous
20e70a59
VF
1033 "test_id": 1,
1034 "runtime": 2,
1035 "output-format": "json",
1036 "slat_percentiles": 1,
1037 "clat_percentiles": 1,
1038 "lat_percentiles": 1,
1039 "ioengine": 'null',
1040 'rw': 'randread',
1041 "test_obj": Test001,
1042 },
1043 {
1044 # randwrite, null
1045 # enable lat only
1046 "test_id": 2,
1047 "runtime": 2,
1048 "output-format": "json",
1049 "slat_percentiles": 0,
1050 "clat_percentiles": 0,
1051 "lat_percentiles": 1,
1052 "ioengine": 'null',
1053 'rw': 'randwrite',
1054 "test_obj": Test002,
1055 },
1056 {
1057 # randtrim, null
1058 # enable clat only
1059 "test_id": 3,
1060 "runtime": 2,
1061 "output-format": "json",
1062 "slat_percentiles": 0,
1063 "clat_percentiles": 1,
1064 "lat_percentiles": 0,
1065 "ioengine": 'null',
1066 'rw': 'randtrim',
1067 "test_obj": Test003,
1068 },
1069 {
1070 # randread, aio
1071 # enable slat, clat, lat
f79e4dea 1072 # all will appear because libaio is asynchronous
20e70a59
VF
1073 "test_id": 4,
1074 "runtime": 5,
1075 "output-format": "json+",
1076 "slat_percentiles": 1,
1077 "clat_percentiles": 1,
1078 "lat_percentiles": 1,
1079 "ioengine": aio,
1080 'rw': 'randread',
1081 "test_obj": Test004,
1082 },
1083 {
1084 # randwrite, aio
1085 # enable only clat, lat
1086 "test_id": 5,
1087 "runtime": 5,
1088 "output-format": "json+",
1089 "slat_percentiles": 0,
1090 "clat_percentiles": 1,
1091 "lat_percentiles": 1,
1092 "ioengine": aio,
1093 'rw': 'randwrite',
1094 "test_obj": Test005,
1095 },
1096 {
1097 # randread, aio
1098 # by default only clat should appear
1099 "test_id": 6,
1100 "runtime": 5,
1101 "output-format": "json+",
1102 "ioengine": aio,
1103 'rw': 'randread',
1104 "test_obj": Test006,
1105 },
1106 {
1107 # 50/50 r/w, aio
1108 # enable only slat
1109 "test_id": 7,
1110 "runtime": 5,
1111 "output-format": "json+",
1112 "slat_percentiles": 1,
1113 "clat_percentiles": 0,
1114 "lat_percentiles": 0,
1115 "ioengine": aio,
1116 'rw': 'randrw',
1117 "test_obj": Test007,
1118 },
1119 {
1120 # 50/50 r/w, aio, unified_rw_reporting
1121 # enable slat, clat, lat
1122 "test_id": 8,
1123 "runtime": 5,
1124 "output-format": "json+",
1125 "slat_percentiles": 1,
1126 "clat_percentiles": 1,
1127 "lat_percentiles": 1,
1128 "ioengine": aio,
1129 'rw': 'randrw',
1130 'unified_rw_reporting': 1,
1131 "test_obj": Test008,
1132 },
1133 {
1134 # randwrite, null
1135 # enable slat, clat, lat
1136 # fsync
1137 "test_id": 9,
1138 "runtime": 2,
1139 "output-format": "json+",
1140 "slat_percentiles": 1,
1141 "clat_percentiles": 1,
1142 "lat_percentiles": 1,
1143 "ioengine": 'null',
1144 'rw': 'randwrite',
1145 'fsync': 32,
1146 "test_obj": Test009,
1147 },
1148 {
1149 # 50/50 r/w, aio
1150 # enable slat, clat, lat
1151 "test_id": 10,
1152 "runtime": 5,
1153 "output-format": "terse,json+",
1154 "slat_percentiles": 1,
1155 "clat_percentiles": 1,
1156 "lat_percentiles": 1,
1157 "ioengine": aio,
1158 'rw': 'randrw',
1159 "test_obj": Test010,
1160 },
1161 {
1162 # 50/50 r/w, aio
1163 # enable only lat
1164 "test_id": 11,
1165 "runtime": 5,
1166 "output-format": "terse,json+",
1167 "slat_percentiles": 0,
1168 "clat_percentiles": 0,
1169 "lat_percentiles": 1,
1170 "ioengine": aio,
1171 'rw': 'randrw',
1172 "test_obj": Test011,
1173 },
1174 {
1175 # randread, null
1176 # enable slat, clat, lat
1177 # only clat and lat will appear because
f79e4dea
NC
1178 # because the null ioengine is synchronous
1179 # same as Test 1 except add numjobs = 4 to test
1180 # sum_thread_stats() changes
20e70a59
VF
1181 "test_id": 12,
1182 "runtime": 2,
1183 "output-format": "json",
1184 "slat_percentiles": 1,
1185 "clat_percentiles": 1,
1186 "lat_percentiles": 1,
1187 "ioengine": 'null',
1188 'rw': 'randread',
1189 'numjobs': 4,
1190 "test_obj": Test001,
1191 },
1192 {
1193 # randread, aio
1194 # enable slat, clat, lat
f79e4dea
NC
1195 # all will appear because libaio is asynchronous
1196 # same as Test 4 except add numjobs = 4 to test
1197 # sum_thread_stats() changes
20e70a59
VF
1198 "test_id": 13,
1199 "runtime": 5,
1200 "output-format": "json+",
1201 "slat_percentiles": 1,
1202 "clat_percentiles": 1,
1203 "lat_percentiles": 1,
1204 "ioengine": aio,
1205 'rw': 'randread',
1206 'numjobs': 4,
1207 "test_obj": Test004,
1208 },
1209 {
1210 # 50/50 r/w, aio, unified_rw_reporting
1211 # enable slat, clat, lata
f79e4dea
NC
1212 # same as Test 8 except add numjobs = 4 to test
1213 # sum_thread_stats() changes
20e70a59
VF
1214 "test_id": 14,
1215 "runtime": 5,
1216 "output-format": "json+",
1217 "slat_percentiles": 1,
1218 "clat_percentiles": 1,
1219 "lat_percentiles": 1,
1220 "ioengine": aio,
1221 'rw': 'randrw',
1222 'unified_rw_reporting': 1,
1223 'numjobs': 4,
1224 "test_obj": Test008,
1225 },
1226 {
1227 # randread, aio
1228 # enable slat, clat, lat
f79e4dea 1229 # all will appear because libaio is asynchronous
20e70a59
VF
1230 # same as Test 4 except add cmdprio_percentage
1231 "test_id": 15,
1232 "runtime": 5,
1233 "output-format": "json+",
1234 "slat_percentiles": 1,
1235 "clat_percentiles": 1,
1236 "lat_percentiles": 1,
1237 "ioengine": aio,
1238 'rw': 'randread',
1239 'cmdprio_percentage': 50,
1240 "test_obj": Test015,
1241 },
1242 {
1243 # randwrite, aio
1244 # enable only clat, lat
1245 # same as Test 5 except add cmdprio_percentage
1246 "test_id": 16,
1247 "runtime": 5,
1248 "output-format": "json+",
1249 "slat_percentiles": 0,
1250 "clat_percentiles": 1,
1251 "lat_percentiles": 1,
1252 "ioengine": aio,
1253 'rw': 'randwrite',
1254 'cmdprio_percentage': 50,
1255 "test_obj": Test016,
1256 },
1257 {
1258 # randread, aio
1259 # by default only clat should appear
1260 # same as Test 6 except add cmdprio_percentage
1261 "test_id": 17,
1262 "runtime": 5,
1263 "output-format": "json+",
1264 "ioengine": aio,
1265 'rw': 'randread',
1266 'cmdprio_percentage': 50,
1267 "test_obj": Test017,
1268 },
1269 {
1270 # 50/50 r/w, aio
1271 # enable only slat
1272 # same as Test 7 except add cmdprio_percentage
1273 "test_id": 18,
1274 "runtime": 5,
1275 "output-format": "json+",
1276 "slat_percentiles": 1,
1277 "clat_percentiles": 0,
1278 "lat_percentiles": 0,
1279 "ioengine": aio,
1280 'rw': 'randrw',
1281 'cmdprio_percentage': 50,
1282 "test_obj": Test018,
1283 },
1284 {
1285 # 50/50 r/w, aio, unified_rw_reporting
1286 # enable slat, clat, lat
1287 # same as Test 8 except add cmdprio_percentage
1288 "test_id": 19,
1289 "runtime": 5,
1290 "output-format": "json+",
1291 "slat_percentiles": 1,
1292 "clat_percentiles": 1,
1293 "lat_percentiles": 1,
1294 "ioengine": aio,
1295 'rw': 'randrw',
1296 'unified_rw_reporting': 1,
1297 'cmdprio_percentage': 50,
1298 "test_obj": Test019,
1299 },
1300 {
1301 # 50/50 r/w, aio, unified_rw_reporting
1302 # enable slat, clat, lat
f79e4dea
NC
1303 # same as Test 19 except add numjobs = 4 to test
1304 # sum_thread_stats() changes
20e70a59
VF
1305 "test_id": 20,
1306 "runtime": 5,
1307 "output-format": "json+",
1308 "slat_percentiles": 1,
1309 "clat_percentiles": 1,
1310 "lat_percentiles": 1,
1311 "ioengine": aio,
1312 'rw': 'randrw',
1313 'unified_rw_reporting': 1,
1314 'cmdprio_percentage': 50,
1315 'numjobs': 4,
1316 "test_obj": Test019,
1317 },
f79e4dea
NC
1318 {
1319 # r/w, aio
1320 # enable only clat
1321 # test bssplit and cmdprio_bssplit
1322 "test_id": 21,
1323 "runtime": 5,
1324 "output-format": "json+",
1325 "slat_percentiles": 0,
1326 "clat_percentiles": 1,
1327 "lat_percentiles": 0,
1328 "ioengine": aio,
1329 'rw': 'randrw',
1330 'bssplit': '64k/40:1024k/60',
1331 'cmdprio_bssplit': '64k/25/1/1:64k/75/3/2:1024k/0',
1332 "test_obj": Test021,
1333 },
1334 {
1335 # r/w, aio
1336 # enable only clat
1337 # same as Test 21 except add numjobs = 4 to test
1338 # sum_thread_stats() changes
1339 "test_id": 22,
1340 "runtime": 5,
1341 "output-format": "json+",
1342 "slat_percentiles": 0,
1343 "clat_percentiles": 1,
1344 "lat_percentiles": 0,
1345 "ioengine": aio,
1346 'rw': 'randrw',
1347 'bssplit': '64k/40:1024k/60',
1348 'cmdprio_bssplit': '64k/25/1/1:64k/75/3/2:1024k/0',
1349 'numjobs': 4,
1350 "test_obj": Test021,
1351 },
20e70a59
VF
1352 ]
1353
1354 passed = 0
1355 failed = 0
1356 skipped = 0
1357
1358 for test in test_list:
1359 if (args.skip and test['test_id'] in args.skip) or \
1360 (args.run_only and test['test_id'] not in args.run_only):
1361 skipped = skipped + 1
1362 outcome = 'SKIPPED (User request)'
f79e4dea
NC
1363 elif (platform.system() != 'Linux' or os.geteuid() != 0) and \
1364 ('cmdprio_percentage' in test or 'cmdprio_bssplit' in test):
20e70a59 1365 skipped = skipped + 1
f79e4dea 1366 outcome = 'SKIPPED (Linux root required for cmdprio tests)'
20e70a59
VF
1367 else:
1368 test_obj = test['test_obj'](artifact_root, test, args.debug)
1369 status = test_obj.run_fio(fio)
1370 if status:
1371 status = test_obj.check()
1372 if status:
1373 passed = passed + 1
1374 outcome = 'PASSED'
1375 else:
1376 failed = failed + 1
1377 outcome = 'FAILED'
1378
1379 print("**********Test {0} {1}**********".format(test['test_id'], outcome))
1380
1381 print("{0} tests passed, {1} failed, {2} skipped".format(passed, failed, skipped))
1382
1383 sys.exit(failed)
1384
1385
1386if __name__ == '__main__':
1387 main()