stat: report clat stats on a per priority granularity
[fio.git] / t / latency_percentiles.py
CommitLineData
20e70a59
VF
1#!/usr/bin/env python3
2# SPDX-License-Identifier: GPL-2.0-only
3#
4# Copyright (c) 2020 Western Digital Corporation or its affiliates.
5#
6"""
7# latency_percentiles.py
8#
9# Test the code that produces latency percentiles
10# This is mostly to test the code changes to allow reporting
11# of slat, clat, and lat percentiles
12#
13# USAGE
14# python3 latency-tests.py [-f fio-path] [-a artifact-root] [--debug]
15#
16#
17# Test scenarios:
18#
19# - DONE json
20# unified rw reporting
21# compare with latency log
22# try various combinations of the ?lat_percentile options
23# null, aio
24# r, w, t
25# - DONE json+
26# check presence of latency bins
27# if the json percentiles match those from the raw data
28# then the latency bin values and counts are probably ok
29# - DONE terse
30# produce both terse, JSON output and confirm that they match
31# lat only; both lat and clat
32# - DONE sync_lat
33# confirm that sync_lat data appears
34# - MANUAL TESTING normal output:
35# null ioengine
36# enable all, but only clat and lat appear
37# enable subset of latency types
38# read, write, trim, unified
39# libaio ioengine
40# enable all latency types
41# enable subset of latency types
42# read, write, trim, unified
43# ./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
44# --ioengine=null --slat_percentiles=1 --clat_percentiles=1 --lat_percentiles=1
45# echo confirm that clat and lat percentiles appear
46# ./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
47# --ioengine=null --slat_percentiles=0 --clat_percentiles=0 --lat_percentiles=1
48# echo confirm that only lat percentiles appear
49# ./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
50# --ioengine=null --slat_percentiles=0 --clat_percentiles=1 --lat_percentiles=0
51# echo confirm that only clat percentiles appear
52# ./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
53# --ioengine=libaio --slat_percentiles=1 --clat_percentiles=1 --lat_percentiles=1
54# echo confirm that slat, clat, lat percentiles appear
55# ./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
56# --ioengine=libaio --slat_percentiles=0 --clat_percentiles=1 --lat_percentiles=1
57# echo confirm that clat and lat percentiles appear
58# ./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
59# --ioengine=libaio -rw=randrw
60# echo confirm that clat percentiles appear for reads and writes
61# ./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
62# --ioengine=libaio --slat_percentiles=1 --clat_percentiles=0 --lat_percentiles=0 --rw=randrw
63# echo confirm that slat percentiles appear for both reads and writes
64# ./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
65# --ioengine=libaio --slat_percentiles=1 --clat_percentiles=1 --lat_percentiles=1 \
66# --rw=randrw --unified_rw_reporting=1
67# echo confirm that slat, clat, and lat percentiles appear for 'mixed' IOs
68#./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
69# --ioengine=null --slat_percentiles=1 --clat_percentiles=1 --lat_percentiles=1 \
70# --rw=randrw --fsync=32
71# echo confirm that fsync latencies appear
72"""
73
74import os
75import csv
76import sys
77import json
78import math
79import time
80import argparse
81import platform
82import subprocess
83from pathlib import Path
84
85
86class FioLatTest():
87 """fio latency percentile test."""
88
89 def __init__(self, artifact_root, test_options, debug):
90 """
91 artifact_root root directory for artifacts (subdirectory will be created under here)
92 test test specification
93 """
94 self.artifact_root = artifact_root
95 self.test_options = test_options
96 self.debug = debug
97 self.filename = None
98 self.json_data = None
99 self.terse_data = None
100
101 self.test_dir = os.path.join(self.artifact_root,
102 "{:03d}".format(self.test_options['test_id']))
103 if not os.path.exists(self.test_dir):
104 os.mkdir(self.test_dir)
105
106 self.filename = "latency{:03d}".format(self.test_options['test_id'])
107
108 def run_fio(self, fio_path):
109 """Run a test."""
110
111 fio_args = [
771dbb52 112 "--max-jobs=16",
20e70a59
VF
113 "--name=latency",
114 "--randrepeat=0",
115 "--norandommap",
116 "--time_based",
117 "--size=16M",
118 "--rwmixread=50",
119 "--group_reporting=1",
120 "--write_lat_log={0}".format(self.filename),
121 "--output={0}.out".format(self.filename),
122 "--ioengine={ioengine}".format(**self.test_options),
123 "--rw={rw}".format(**self.test_options),
124 "--runtime={runtime}".format(**self.test_options),
125 "--output-format={output-format}".format(**self.test_options),
126 ]
127 for opt in ['slat_percentiles', 'clat_percentiles', 'lat_percentiles',
128 'unified_rw_reporting', 'fsync', 'fdatasync', 'numjobs', 'cmdprio_percentage']:
129 if opt in self.test_options:
130 option = '--{0}={{{0}}}'.format(opt)
131 fio_args.append(option.format(**self.test_options))
132
133 command = [fio_path] + fio_args
134 with open(os.path.join(self.test_dir, "{0}.command".format(self.filename)), "w+") as \
135 command_file:
136 command_file.write("%s\n" % command)
137
138 passed = True
139 stdout_file = open(os.path.join(self.test_dir, "{0}.stdout".format(self.filename)), "w+")
140 stderr_file = open(os.path.join(self.test_dir, "{0}.stderr".format(self.filename)), "w+")
141 exitcode_file = open(os.path.join(self.test_dir,
142 "{0}.exitcode".format(self.filename)), "w+")
143 try:
144 proc = None
145 # Avoid using subprocess.run() here because when a timeout occurs,
146 # fio will be stopped with SIGKILL. This does not give fio a
147 # chance to clean up and means that child processes may continue
148 # running and submitting IO.
149 proc = subprocess.Popen(command,
150 stdout=stdout_file,
151 stderr=stderr_file,
152 cwd=self.test_dir,
153 universal_newlines=True)
154 proc.communicate(timeout=300)
155 exitcode_file.write('{0}\n'.format(proc.returncode))
156 passed &= (proc.returncode == 0)
157 except subprocess.TimeoutExpired:
158 proc.terminate()
159 proc.communicate()
160 assert proc.poll()
161 print("Timeout expired")
162 passed = False
163 except Exception:
164 if proc:
165 if not proc.poll():
166 proc.terminate()
167 proc.communicate()
168 print("Exception: %s" % sys.exc_info())
169 passed = False
170 finally:
171 stdout_file.close()
172 stderr_file.close()
173 exitcode_file.close()
174
175 if passed:
176 if 'json' in self.test_options['output-format']:
177 if not self.get_json():
178 print('Unable to decode JSON data')
179 passed = False
180 if 'terse' in self.test_options['output-format']:
181 if not self.get_terse():
182 print('Unable to decode terse data')
183 passed = False
184
185 return passed
186
187 def get_json(self):
188 """Convert fio JSON output into a python JSON object"""
189
190 filename = os.path.join(self.test_dir, "{0}.out".format(self.filename))
191 with open(filename, 'r') as file:
192 file_data = file.read()
193
194 #
195 # Sometimes fio informational messages are included at the top of the
196 # JSON output, especially under Windows. Try to decode output as JSON
197 # data, lopping off up to the first four lines
198 #
199 lines = file_data.splitlines()
200 for i in range(5):
201 file_data = '\n'.join(lines[i:])
202 try:
203 self.json_data = json.loads(file_data)
204 except json.JSONDecodeError:
205 continue
206 else:
207 return True
208
209 return False
210
211 def get_terse(self):
212 """Read fio output and return terse format data."""
213
214 filename = os.path.join(self.test_dir, "{0}.out".format(self.filename))
215 with open(filename, 'r') as file:
216 file_data = file.read()
217
218 #
84f9318f 219 # Read the first few lines and see if any of them begin with '3;'
20e70a59
VF
220 # If so, the line is probably terse output. Obviously, this only
221 # works for fio terse version 3 and it does not work for
222 # multi-line terse output
223 #
224 lines = file_data.splitlines()
225 for i in range(8):
226 file_data = lines[i]
84f9318f 227 if file_data.startswith('3;'):
20e70a59
VF
228 self.terse_data = file_data.split(';')
229 return True
230
231 return False
232
233 def check_latencies(self, jsondata, ddir, slat=True, clat=True, tlat=True, plus=False,
234 unified=False):
235 """Check fio latency data.
236
237 ddir data direction to check (0=read, 1=write, 2=trim)
238 slat True if submission latency data available to check
239 clat True if completion latency data available to check
240 tlat True of total latency data available to check
241 plus True if we actually have json+ format data where additional checks can
242 be carried out
243 unified True if fio is reporting unified r/w data
244 """
245
246 types = {
247 'slat': slat,
248 'clat': clat,
249 'lat': tlat
250 }
251
252 retval = True
253
254 for lat in ['slat', 'clat', 'lat']:
255 this_iter = True
256 if not types[lat]:
257 if 'percentile' in jsondata[lat+'_ns']:
258 this_iter = False
259 print('unexpected %s percentiles found' % lat)
260 else:
261 print("%s percentiles skipped" % lat)
262 continue
263 else:
264 if 'percentile' not in jsondata[lat+'_ns']:
265 this_iter = False
266 print('%s percentiles not found in fio output' % lat)
267
268 #
269 # Check only for the presence/absence of json+
270 # latency bins. Future work can check the
271 # accurracy of the bin values and counts.
272 #
273 # Because the latency percentiles are based on
274 # the bins, we can be confident that the bin
275 # values and counts are correct if fio's
276 # latency percentiles match what we compute
277 # from the raw data.
278 #
279 if plus:
280 if 'bins' not in jsondata[lat+'_ns']:
281 print('bins not found with json+ output format')
282 this_iter = False
283 else:
284 if not self.check_jsonplus(jsondata[lat+'_ns']):
285 this_iter = False
286 else:
287 if 'bins' in jsondata[lat+'_ns']:
288 print('json+ bins found with json output format')
289 this_iter = False
290
291 latencies = []
292 for i in range(10):
293 lat_file = os.path.join(self.test_dir, "%s_%s.%s.log" % (self.filename, lat, i+1))
294 if not os.path.exists(lat_file):
295 break
296 with open(lat_file, 'r', newline='') as file:
297 reader = csv.reader(file)
298 for line in reader:
299 if unified or int(line[2]) == ddir:
300 latencies.append(int(line[1]))
301
302 if int(jsondata['total_ios']) != len(latencies):
303 this_iter = False
304 print('%s: total_ios = %s, latencies logged = %d' % \
305 (lat, jsondata['total_ios'], len(latencies)))
306 elif self.debug:
307 print("total_ios %s match latencies logged" % jsondata['total_ios'])
308
309 latencies.sort()
310 ptiles = jsondata[lat+'_ns']['percentile']
311
312 for percentile in ptiles.keys():
313 #
314 # numpy.percentile(latencies, float(percentile),
315 # interpolation='higher')
316 # produces values that mostly match what fio reports
317 # however, in the tails of the distribution, the values produced
318 # by fio's and numpy.percentile's algorithms are occasionally off
319 # by one latency measurement. So instead of relying on the canned
320 # numpy.percentile routine, implement here fio's algorithm
321 #
322 rank = math.ceil(float(percentile)/100 * len(latencies))
323 if rank > 0:
324 index = rank - 1
325 else:
326 index = 0
327 value = latencies[int(index)]
328 fio_val = int(ptiles[percentile])
329 # The theory in stat.h says that the proportional error will be
330 # less than 1/128
331 if not self.similar(fio_val, value):
332 delta = abs(fio_val - value) / value
333 print("Error with %s %sth percentile: "
334 "fio: %d, expected: %d, proportional delta: %f" %
335 (lat, percentile, fio_val, value, delta))
336 print("Rank: %d, index: %d" % (rank, index))
337 this_iter = False
338 elif self.debug:
339 print('%s %sth percentile values match: %d, %d' %
340 (lat, percentile, fio_val, value))
341
342 if this_iter:
343 print("%s percentiles match" % lat)
344 else:
345 retval = False
346
347 return retval
348
349 @staticmethod
350 def check_empty(job):
351 """
352 Make sure JSON data is empty.
353
354 Some data structures should be empty. This function makes sure that they are.
355
356 job JSON object that we need to check for emptiness
357 """
358
359 return job['total_ios'] == 0 and \
360 job['slat_ns']['N'] == 0 and \
361 job['clat_ns']['N'] == 0 and \
362 job['lat_ns']['N'] == 0
363
364 def check_nocmdprio_lat(self, job):
365 """
366 Make sure no high/low priority latencies appear.
367
368 job JSON object to check
369 """
370
371 for ddir in ['read', 'write', 'trim']:
372 if ddir in job:
373 if 'lat_high_prio' in job[ddir] or 'lat_low_prio' in job[ddir] or \
374 'clat_high_prio' in job[ddir] or 'clat_low_prio' in job[ddir]:
375 print("Unexpected high/low priority latencies found in %s output" % ddir)
376 return False
377
378 if self.debug:
379 print("No high/low priority latencies found")
380
381 return True
382
383 @staticmethod
384 def similar(approximation, actual):
385 """
386 Check whether the approximate values recorded by fio are within the theoretical bound.
387
388 Since it is impractical to store exact latency measurements for each and every IO, fio
389 groups similar latency measurements into variable-sized bins. The theory in stat.h says
390 that the proportional error will be less than 1/128. This function checks whether this
391 is true.
392
393 TODO This test will fail when comparing a value from the largest latency bin against its
394 actual measurement. Find some way to detect this and avoid failing.
395
396 approximation value of the bin used by fio to store a given latency
397 actual actual latency value
398 """
9b37832d
VF
399
400 # Avoid a division by zero. The smallest latency values have no error.
401 if actual == 0:
402 return approximation == 0
403
20e70a59
VF
404 delta = abs(approximation - actual) / actual
405 return delta <= 1/128
406
407 def check_jsonplus(self, jsondata):
408 """Check consistency of json+ data
409
410 When we have json+ data we can check the min value, max value, and
411 sample size reported by fio
412
413 jsondata json+ data that we need to check
414 """
415
416 retval = True
417
418 keys = [int(k) for k in jsondata['bins'].keys()]
419 values = [int(jsondata['bins'][k]) for k in jsondata['bins'].keys()]
420 smallest = min(keys)
421 biggest = max(keys)
422 sampsize = sum(values)
423
424 if not self.similar(jsondata['min'], smallest):
425 retval = False
426 print('reported min %d does not match json+ min %d' % (jsondata['min'], smallest))
427 elif self.debug:
428 print('json+ min values match: %d' % jsondata['min'])
429
430 if not self.similar(jsondata['max'], biggest):
431 retval = False
432 print('reported max %d does not match json+ max %d' % (jsondata['max'], biggest))
433 elif self.debug:
434 print('json+ max values match: %d' % jsondata['max'])
435
436 if sampsize != jsondata['N']:
437 retval = False
438 print('reported sample size %d does not match json+ total count %d' % \
439 (jsondata['N'], sampsize))
440 elif self.debug:
441 print('json+ sample sizes match: %d' % sampsize)
442
443 return retval
444
445 def check_sync_lat(self, jsondata, plus=False):
446 """Check fsync latency percentile data.
447
448 All we can check is that some percentiles are reported, unless we have json+ data.
449 If we actually have json+ data then we can do more checking.
450
451 jsondata JSON data for fsync operations
452 plus True if we actually have json+ data
453 """
454 retval = True
455
456 if 'percentile' not in jsondata['lat_ns']:
457 print("Sync percentile data not found")
458 return False
459
460 if int(jsondata['total_ios']) != int(jsondata['lat_ns']['N']):
461 retval = False
462 print('Mismatch between total_ios and lat_ns sample size')
463 elif self.debug:
464 print('sync sample sizes match: %d' % jsondata['total_ios'])
465
466 if not plus:
467 if 'bins' in jsondata['lat_ns']:
468 print('Unexpected json+ bin data found')
469 return False
470
471 if not self.check_jsonplus(jsondata['lat_ns']):
472 retval = False
473
474 return retval
475
476 def check_terse(self, terse, jsondata):
477 """Compare terse latencies with JSON latencies.
478
479 terse terse format data for checking
480 jsondata JSON format data for checking
481 """
482
483 retval = True
484
485 for lat in terse:
486 split = lat.split('%')
487 pct = split[0]
488 terse_val = int(split[1][1:])
489 json_val = math.floor(jsondata[pct]/1000)
490 if terse_val != json_val:
491 retval = False
492 print('Mismatch with %sth percentile: json value=%d,%d terse value=%d' % \
493 (pct, jsondata[pct], json_val, terse_val))
494 elif self.debug:
495 print('Terse %sth percentile matches JSON value: %d' % (pct, terse_val))
496
497 return retval
498
499 def check_prio_latencies(self, jsondata, clat=True, plus=False):
500 """Check consistency of high/low priority latencies.
501
502 clat True if we should check clat data; other check lat data
503 plus True if we have json+ format data where additional checks can
504 be carried out
505 unified True if fio is reporting unified r/w data
506 """
507
508 if clat:
509 high = 'clat_high_prio'
510 low = 'clat_low_prio'
511 combined = 'clat_ns'
512 else:
513 high = 'lat_high_prio'
514 low = 'lat_low_prio'
515 combined = 'lat_ns'
516
517 if not high in jsondata or not low in jsondata or not combined in jsondata:
518 print("Error identifying high/low priority latencies")
519 return False
520
521 if jsondata[high]['N'] + jsondata[low]['N'] != jsondata[combined]['N']:
522 print("High %d + low %d != combined sample size %d" % \
523 (jsondata[high]['N'], jsondata[low]['N'], jsondata[combined]['N']))
524 return False
525 elif self.debug:
526 print("High %d + low %d == combined sample size %d" % \
527 (jsondata[high]['N'], jsondata[low]['N'], jsondata[combined]['N']))
528
529 if min(jsondata[high]['min'], jsondata[low]['min']) != jsondata[combined]['min']:
530 print("Min of high %d, low %d min latencies does not match min %d from combined data" % \
531 (jsondata[high]['min'], jsondata[low]['min'], jsondata[combined]['min']))
532 return False
533 elif self.debug:
534 print("Min of high %d, low %d min latencies matches min %d from combined data" % \
535 (jsondata[high]['min'], jsondata[low]['min'], jsondata[combined]['min']))
536
537 if max(jsondata[high]['max'], jsondata[low]['max']) != jsondata[combined]['max']:
538 print("Max of high %d, low %d max latencies does not match max %d from combined data" % \
539 (jsondata[high]['max'], jsondata[low]['max'], jsondata[combined]['max']))
540 return False
541 elif self.debug:
542 print("Max of high %d, low %d max latencies matches max %d from combined data" % \
543 (jsondata[high]['max'], jsondata[low]['max'], jsondata[combined]['max']))
544
545 weighted_avg = (jsondata[high]['mean'] * jsondata[high]['N'] + \
546 jsondata[low]['mean'] * jsondata[low]['N']) / jsondata[combined]['N']
547 delta = abs(weighted_avg - jsondata[combined]['mean'])
548 if (delta / jsondata[combined]['mean']) > 0.0001:
549 print("Difference between weighted average %f of high, low means "
550 "and actual mean %f exceeds 0.01%%" % (weighted_avg, jsondata[combined]['mean']))
551 return False
552 elif self.debug:
553 print("Weighted average %f of high, low means matches actual mean %f" % \
554 (weighted_avg, jsondata[combined]['mean']))
555
556 if plus:
557 if not self.check_jsonplus(jsondata[high]):
558 return False
559 if not self.check_jsonplus(jsondata[low]):
560 return False
561
562 bins = {**jsondata[high]['bins'], **jsondata[low]['bins']}
563 for duration in bins.keys():
564 if duration in jsondata[high]['bins'] and duration in jsondata[low]['bins']:
565 bins[duration] = jsondata[high]['bins'][duration] + \
566 jsondata[low]['bins'][duration]
567
568 if len(bins) != len(jsondata[combined]['bins']):
569 print("Number of combined high/low bins does not match number of overall bins")
570 return False
571 elif self.debug:
572 print("Number of bins from merged high/low data matches number of overall bins")
573
574 for duration in bins.keys():
575 if bins[duration] != jsondata[combined]['bins'][duration]:
576 print("Merged high/low count does not match overall count for duration %d" \
577 % duration)
578 return False
579
580 print("Merged high/low priority latency data match combined latency data")
581 return True
582
583 def check(self):
584 """Check test output."""
585
586 raise NotImplementedError()
587
588
589class Test001(FioLatTest):
590 """Test object for Test 1."""
591
592 def check(self):
593 """Check Test 1 output."""
594
595 job = self.json_data['jobs'][0]
596
597 retval = True
598 if not self.check_empty(job['write']):
599 print("Unexpected write data found in output")
600 retval = False
601 if not self.check_empty(job['trim']):
602 print("Unexpected trim data found in output")
603 retval = False
604 if not self.check_nocmdprio_lat(job):
605 print("Unexpected high/low priority latencies found")
606 retval = False
607
608 retval &= self.check_latencies(job['read'], 0, slat=False)
609
610 return retval
611
612
613class Test002(FioLatTest):
614 """Test object for Test 2."""
615
616 def check(self):
617 """Check Test 2 output."""
618
619 job = self.json_data['jobs'][0]
620
621 retval = True
622 if not self.check_empty(job['read']):
623 print("Unexpected read data found in output")
624 retval = False
625 if not self.check_empty(job['trim']):
626 print("Unexpected trim data found in output")
627 retval = False
628 if not self.check_nocmdprio_lat(job):
629 print("Unexpected high/low priority latencies found")
630 retval = False
631
632 retval &= self.check_latencies(job['write'], 1, slat=False, clat=False)
633
634 return retval
635
636
637class Test003(FioLatTest):
638 """Test object for Test 3."""
639
640 def check(self):
641 """Check Test 3 output."""
642
643 job = self.json_data['jobs'][0]
644
645 retval = True
646 if not self.check_empty(job['read']):
647 print("Unexpected read data found in output")
648 retval = False
649 if not self.check_empty(job['write']):
650 print("Unexpected write data found in output")
651 retval = False
652 if not self.check_nocmdprio_lat(job):
653 print("Unexpected high/low priority latencies found")
654 retval = False
655
656 retval &= self.check_latencies(job['trim'], 2, slat=False, tlat=False)
657
658 return retval
659
660
661class Test004(FioLatTest):
662 """Test object for Tests 4, 13."""
663
664 def check(self):
665 """Check Test 4, 13 output."""
666
667 job = self.json_data['jobs'][0]
668
669 retval = True
670 if not self.check_empty(job['write']):
671 print("Unexpected write data found in output")
672 retval = False
673 if not self.check_empty(job['trim']):
674 print("Unexpected trim data found in output")
675 retval = False
676 if not self.check_nocmdprio_lat(job):
677 print("Unexpected high/low priority latencies found")
678 retval = False
679
680 retval &= self.check_latencies(job['read'], 0, plus=True)
681
682 return retval
683
684
685class Test005(FioLatTest):
686 """Test object for Test 5."""
687
688 def check(self):
689 """Check Test 5 output."""
690
691 job = self.json_data['jobs'][0]
692
693 retval = True
694 if not self.check_empty(job['read']):
695 print("Unexpected read data found in output")
696 retval = False
697 if not self.check_empty(job['trim']):
698 print("Unexpected trim data found in output")
699 retval = False
700 if not self.check_nocmdprio_lat(job):
701 print("Unexpected high/low priority latencies found")
702 retval = False
703
704 retval &= self.check_latencies(job['write'], 1, slat=False, plus=True)
705
706 return retval
707
708
709class Test006(FioLatTest):
710 """Test object for Test 6."""
711
712 def check(self):
713 """Check Test 6 output."""
714
715 job = self.json_data['jobs'][0]
716
717 retval = True
718 if not self.check_empty(job['write']):
719 print("Unexpected write data found in output")
720 retval = False
721 if not self.check_empty(job['trim']):
722 print("Unexpected trim data found in output")
723 retval = False
724 if not self.check_nocmdprio_lat(job):
725 print("Unexpected high/low priority latencies found")
726 retval = False
727
728 retval &= self.check_latencies(job['read'], 0, slat=False, tlat=False, plus=True)
729
730 return retval
731
732
733class Test007(FioLatTest):
734 """Test object for Test 7."""
735
736 def check(self):
737 """Check Test 7 output."""
738
739 job = self.json_data['jobs'][0]
740
741 retval = True
742 if not self.check_empty(job['trim']):
743 print("Unexpected trim data found in output")
744 retval = False
745 if not self.check_nocmdprio_lat(job):
746 print("Unexpected high/low priority latencies found")
747 retval = False
748
749 retval &= self.check_latencies(job['read'], 0, clat=False, tlat=False, plus=True)
750 retval &= self.check_latencies(job['write'], 1, clat=False, tlat=False, plus=True)
751
752 return retval
753
754
755class Test008(FioLatTest):
756 """Test object for Tests 8, 14."""
757
758 def check(self):
759 """Check Test 8, 14 output."""
760
761 job = self.json_data['jobs'][0]
762
763 retval = True
764 if 'read' in job or 'write'in job or 'trim' in job:
765 print("Unexpected data direction found in fio output")
766 retval = False
767 if not self.check_nocmdprio_lat(job):
768 print("Unexpected high/low priority latencies found")
769 retval = False
770
771 retval &= self.check_latencies(job['mixed'], 0, plus=True, unified=True)
772
773 return retval
774
775
776class Test009(FioLatTest):
777 """Test object for Test 9."""
778
779 def check(self):
780 """Check Test 9 output."""
781
782 job = self.json_data['jobs'][0]
783
784 retval = True
785 if not self.check_empty(job['read']):
786 print("Unexpected read data found in output")
787 retval = False
788 if not self.check_empty(job['trim']):
789 print("Unexpected trim data found in output")
790 retval = False
791 if not self.check_sync_lat(job['sync'], plus=True):
792 print("Error checking fsync latency data")
793 retval = False
794 if not self.check_nocmdprio_lat(job):
795 print("Unexpected high/low priority latencies found")
796 retval = False
797
798 retval &= self.check_latencies(job['write'], 1, slat=False, plus=True)
799
800 return retval
801
802
803class Test010(FioLatTest):
804 """Test object for Test 10."""
805
806 def check(self):
807 """Check Test 10 output."""
808
809 job = self.json_data['jobs'][0]
810
811 retval = True
812 if not self.check_empty(job['trim']):
813 print("Unexpected trim data found in output")
814 retval = False
815 if not self.check_nocmdprio_lat(job):
816 print("Unexpected high/low priority latencies found")
817 retval = False
818
819 retval &= self.check_latencies(job['read'], 0, plus=True)
820 retval &= self.check_latencies(job['write'], 1, plus=True)
821 retval &= self.check_terse(self.terse_data[17:34], job['read']['lat_ns']['percentile'])
822 retval &= self.check_terse(self.terse_data[58:75], job['write']['lat_ns']['percentile'])
823 # Terse data checking only works for default percentiles.
824 # This needs to be changed if something other than the default is ever used.
825
826 return retval
827
828
829class Test011(FioLatTest):
830 """Test object for Test 11."""
831
832 def check(self):
833 """Check Test 11 output."""
834
835 job = self.json_data['jobs'][0]
836
837 retval = True
838 if not self.check_empty(job['trim']):
839 print("Unexpected trim data found in output")
840 retval = False
841 if not self.check_nocmdprio_lat(job):
842 print("Unexpected high/low priority latencies found")
843 retval = False
844
845 retval &= self.check_latencies(job['read'], 0, slat=False, clat=False, plus=True)
846 retval &= self.check_latencies(job['write'], 1, slat=False, clat=False, plus=True)
847 retval &= self.check_terse(self.terse_data[17:34], job['read']['lat_ns']['percentile'])
848 retval &= self.check_terse(self.terse_data[58:75], job['write']['lat_ns']['percentile'])
849 # Terse data checking only works for default percentiles.
850 # This needs to be changed if something other than the default is ever used.
851
852 return retval
853
854
855class Test015(FioLatTest):
856 """Test object for Test 15."""
857
858 def check(self):
859 """Check Test 15 output."""
860
861 job = self.json_data['jobs'][0]
862
863 retval = True
864 if not self.check_empty(job['write']):
865 print("Unexpected write data found in output")
866 retval = False
867 if not self.check_empty(job['trim']):
868 print("Unexpected trim data found in output")
869 retval = False
870
871 retval &= self.check_latencies(job['read'], 0, plus=True)
872 retval &= self.check_prio_latencies(job['read'], clat=False, plus=True)
873
874 return retval
875
876
877class Test016(FioLatTest):
878 """Test object for Test 16."""
879
880 def check(self):
881 """Check Test 16 output."""
882
883 job = self.json_data['jobs'][0]
884
885 retval = True
886 if not self.check_empty(job['read']):
887 print("Unexpected read data found in output")
888 retval = False
889 if not self.check_empty(job['trim']):
890 print("Unexpected trim data found in output")
891 retval = False
892
893 retval &= self.check_latencies(job['write'], 1, slat=False, plus=True)
894 retval &= self.check_prio_latencies(job['write'], clat=False, plus=True)
895
896 return retval
897
898
899class Test017(FioLatTest):
900 """Test object for Test 17."""
901
902 def check(self):
903 """Check Test 17 output."""
904
905 job = self.json_data['jobs'][0]
906
907 retval = True
908 if not self.check_empty(job['write']):
909 print("Unexpected write data found in output")
910 retval = False
911 if not self.check_empty(job['trim']):
912 print("Unexpected trim data found in output")
913 retval = False
914
915 retval &= self.check_latencies(job['read'], 0, slat=False, tlat=False, plus=True)
916 retval &= self.check_prio_latencies(job['read'], plus=True)
917
918 return retval
919
920
921class Test018(FioLatTest):
922 """Test object for Test 18."""
923
924 def check(self):
925 """Check Test 18 output."""
926
927 job = self.json_data['jobs'][0]
928
929 retval = True
930 if not self.check_empty(job['trim']):
931 print("Unexpected trim data found in output")
932 retval = False
933
934 retval &= self.check_latencies(job['read'], 0, clat=False, tlat=False, plus=True)
935 retval &= self.check_latencies(job['write'], 1, clat=False, tlat=False, plus=True)
936
937 # We actually have json+ data but setting plus=False below avoids checking the
938 # json+ bins which did not exist for clat and lat because this job is run with
939 # clat_percentiles=0, lat_percentiles=0, However, we can still check the summary
940 # statistics
941 retval &= self.check_prio_latencies(job['write'], plus=False)
942 retval &= self.check_prio_latencies(job['read'], plus=False)
943
944 return retval
945
946
947class Test019(FioLatTest):
948 """Test object for Tests 19, 20."""
949
950 def check(self):
951 """Check Test 19, 20 output."""
952
953 job = self.json_data['jobs'][0]
954
955 retval = True
956 if 'read' in job or 'write'in job or 'trim' in job:
957 print("Unexpected data direction found in fio output")
958 retval = False
959
960 retval &= self.check_latencies(job['mixed'], 0, plus=True, unified=True)
961 retval &= self.check_prio_latencies(job['mixed'], clat=False, plus=True)
962
963 return retval
964
965
966def parse_args():
967 """Parse command-line arguments."""
968
969 parser = argparse.ArgumentParser()
970 parser.add_argument('-f', '--fio', help='path to file executable (e.g., ./fio)')
971 parser.add_argument('-a', '--artifact-root', help='artifact root directory')
972 parser.add_argument('-d', '--debug', help='enable debug output', action='store_true')
973 parser.add_argument('-s', '--skip', nargs='+', type=int,
974 help='list of test(s) to skip')
975 parser.add_argument('-o', '--run-only', nargs='+', type=int,
976 help='list of test(s) to run, skipping all others')
977 args = parser.parse_args()
978
979 return args
980
981
982def main():
983 """Run tests of fio latency percentile reporting"""
984
985 args = parse_args()
986
987 artifact_root = args.artifact_root if args.artifact_root else \
988 "latency-test-{0}".format(time.strftime("%Y%m%d-%H%M%S"))
989 os.mkdir(artifact_root)
990 print("Artifact directory is %s" % artifact_root)
991
992 if args.fio:
993 fio = str(Path(args.fio).absolute())
994 else:
995 fio = 'fio'
996 print("fio path is %s" % fio)
997
998 if platform.system() == 'Linux':
999 aio = 'libaio'
1000 elif platform.system() == 'Windows':
1001 aio = 'windowsaio'
1002 else:
1003 aio = 'posixaio'
1004
1005 test_list = [
1006 {
1007 # randread, null
1008 # enable slat, clat, lat
1009 # only clat and lat will appear because
1010 # because the null ioengine is syncrhonous
1011 "test_id": 1,
1012 "runtime": 2,
1013 "output-format": "json",
1014 "slat_percentiles": 1,
1015 "clat_percentiles": 1,
1016 "lat_percentiles": 1,
1017 "ioengine": 'null',
1018 'rw': 'randread',
1019 "test_obj": Test001,
1020 },
1021 {
1022 # randwrite, null
1023 # enable lat only
1024 "test_id": 2,
1025 "runtime": 2,
1026 "output-format": "json",
1027 "slat_percentiles": 0,
1028 "clat_percentiles": 0,
1029 "lat_percentiles": 1,
1030 "ioengine": 'null',
1031 'rw': 'randwrite',
1032 "test_obj": Test002,
1033 },
1034 {
1035 # randtrim, null
1036 # enable clat only
1037 "test_id": 3,
1038 "runtime": 2,
1039 "output-format": "json",
1040 "slat_percentiles": 0,
1041 "clat_percentiles": 1,
1042 "lat_percentiles": 0,
1043 "ioengine": 'null',
1044 'rw': 'randtrim',
1045 "test_obj": Test003,
1046 },
1047 {
1048 # randread, aio
1049 # enable slat, clat, lat
1050 # all will appear because liaio is asynchronous
1051 "test_id": 4,
1052 "runtime": 5,
1053 "output-format": "json+",
1054 "slat_percentiles": 1,
1055 "clat_percentiles": 1,
1056 "lat_percentiles": 1,
1057 "ioengine": aio,
1058 'rw': 'randread',
1059 "test_obj": Test004,
1060 },
1061 {
1062 # randwrite, aio
1063 # enable only clat, lat
1064 "test_id": 5,
1065 "runtime": 5,
1066 "output-format": "json+",
1067 "slat_percentiles": 0,
1068 "clat_percentiles": 1,
1069 "lat_percentiles": 1,
1070 "ioengine": aio,
1071 'rw': 'randwrite',
1072 "test_obj": Test005,
1073 },
1074 {
1075 # randread, aio
1076 # by default only clat should appear
1077 "test_id": 6,
1078 "runtime": 5,
1079 "output-format": "json+",
1080 "ioengine": aio,
1081 'rw': 'randread',
1082 "test_obj": Test006,
1083 },
1084 {
1085 # 50/50 r/w, aio
1086 # enable only slat
1087 "test_id": 7,
1088 "runtime": 5,
1089 "output-format": "json+",
1090 "slat_percentiles": 1,
1091 "clat_percentiles": 0,
1092 "lat_percentiles": 0,
1093 "ioengine": aio,
1094 'rw': 'randrw',
1095 "test_obj": Test007,
1096 },
1097 {
1098 # 50/50 r/w, aio, unified_rw_reporting
1099 # enable slat, clat, lat
1100 "test_id": 8,
1101 "runtime": 5,
1102 "output-format": "json+",
1103 "slat_percentiles": 1,
1104 "clat_percentiles": 1,
1105 "lat_percentiles": 1,
1106 "ioengine": aio,
1107 'rw': 'randrw',
1108 'unified_rw_reporting': 1,
1109 "test_obj": Test008,
1110 },
1111 {
1112 # randwrite, null
1113 # enable slat, clat, lat
1114 # fsync
1115 "test_id": 9,
1116 "runtime": 2,
1117 "output-format": "json+",
1118 "slat_percentiles": 1,
1119 "clat_percentiles": 1,
1120 "lat_percentiles": 1,
1121 "ioengine": 'null',
1122 'rw': 'randwrite',
1123 'fsync': 32,
1124 "test_obj": Test009,
1125 },
1126 {
1127 # 50/50 r/w, aio
1128 # enable slat, clat, lat
1129 "test_id": 10,
1130 "runtime": 5,
1131 "output-format": "terse,json+",
1132 "slat_percentiles": 1,
1133 "clat_percentiles": 1,
1134 "lat_percentiles": 1,
1135 "ioengine": aio,
1136 'rw': 'randrw',
1137 "test_obj": Test010,
1138 },
1139 {
1140 # 50/50 r/w, aio
1141 # enable only lat
1142 "test_id": 11,
1143 "runtime": 5,
1144 "output-format": "terse,json+",
1145 "slat_percentiles": 0,
1146 "clat_percentiles": 0,
1147 "lat_percentiles": 1,
1148 "ioengine": aio,
1149 'rw': 'randrw',
1150 "test_obj": Test011,
1151 },
1152 {
1153 # randread, null
1154 # enable slat, clat, lat
1155 # only clat and lat will appear because
1156 # because the null ioengine is syncrhonous
1157 # same as Test 1 except
1158 # numjobs = 4 to test sum_thread_stats() changes
1159 "test_id": 12,
1160 "runtime": 2,
1161 "output-format": "json",
1162 "slat_percentiles": 1,
1163 "clat_percentiles": 1,
1164 "lat_percentiles": 1,
1165 "ioengine": 'null',
1166 'rw': 'randread',
1167 'numjobs': 4,
1168 "test_obj": Test001,
1169 },
1170 {
1171 # randread, aio
1172 # enable slat, clat, lat
1173 # all will appear because liaio is asynchronous
1174 # same as Test 4 except
1175 # numjobs = 4 to test sum_thread_stats() changes
1176 "test_id": 13,
1177 "runtime": 5,
1178 "output-format": "json+",
1179 "slat_percentiles": 1,
1180 "clat_percentiles": 1,
1181 "lat_percentiles": 1,
1182 "ioengine": aio,
1183 'rw': 'randread',
1184 'numjobs': 4,
1185 "test_obj": Test004,
1186 },
1187 {
1188 # 50/50 r/w, aio, unified_rw_reporting
1189 # enable slat, clat, lata
1190 # same as Test 8 except
1191 # numjobs = 4 to test sum_thread_stats() changes
1192 "test_id": 14,
1193 "runtime": 5,
1194 "output-format": "json+",
1195 "slat_percentiles": 1,
1196 "clat_percentiles": 1,
1197 "lat_percentiles": 1,
1198 "ioengine": aio,
1199 'rw': 'randrw',
1200 'unified_rw_reporting': 1,
1201 'numjobs': 4,
1202 "test_obj": Test008,
1203 },
1204 {
1205 # randread, aio
1206 # enable slat, clat, lat
1207 # all will appear because liaio is asynchronous
1208 # same as Test 4 except add cmdprio_percentage
1209 "test_id": 15,
1210 "runtime": 5,
1211 "output-format": "json+",
1212 "slat_percentiles": 1,
1213 "clat_percentiles": 1,
1214 "lat_percentiles": 1,
1215 "ioengine": aio,
1216 'rw': 'randread',
1217 'cmdprio_percentage': 50,
1218 "test_obj": Test015,
1219 },
1220 {
1221 # randwrite, aio
1222 # enable only clat, lat
1223 # same as Test 5 except add cmdprio_percentage
1224 "test_id": 16,
1225 "runtime": 5,
1226 "output-format": "json+",
1227 "slat_percentiles": 0,
1228 "clat_percentiles": 1,
1229 "lat_percentiles": 1,
1230 "ioengine": aio,
1231 'rw': 'randwrite',
1232 'cmdprio_percentage': 50,
1233 "test_obj": Test016,
1234 },
1235 {
1236 # randread, aio
1237 # by default only clat should appear
1238 # same as Test 6 except add cmdprio_percentage
1239 "test_id": 17,
1240 "runtime": 5,
1241 "output-format": "json+",
1242 "ioengine": aio,
1243 'rw': 'randread',
1244 'cmdprio_percentage': 50,
1245 "test_obj": Test017,
1246 },
1247 {
1248 # 50/50 r/w, aio
1249 # enable only slat
1250 # same as Test 7 except add cmdprio_percentage
1251 "test_id": 18,
1252 "runtime": 5,
1253 "output-format": "json+",
1254 "slat_percentiles": 1,
1255 "clat_percentiles": 0,
1256 "lat_percentiles": 0,
1257 "ioengine": aio,
1258 'rw': 'randrw',
1259 'cmdprio_percentage': 50,
1260 "test_obj": Test018,
1261 },
1262 {
1263 # 50/50 r/w, aio, unified_rw_reporting
1264 # enable slat, clat, lat
1265 # same as Test 8 except add cmdprio_percentage
1266 "test_id": 19,
1267 "runtime": 5,
1268 "output-format": "json+",
1269 "slat_percentiles": 1,
1270 "clat_percentiles": 1,
1271 "lat_percentiles": 1,
1272 "ioengine": aio,
1273 'rw': 'randrw',
1274 'unified_rw_reporting': 1,
1275 'cmdprio_percentage': 50,
1276 "test_obj": Test019,
1277 },
1278 {
1279 # 50/50 r/w, aio, unified_rw_reporting
1280 # enable slat, clat, lat
1281 # same as Test 19 except
1282 # add numjobs = 4 to test sum_thread_stats() changes
1283 "test_id": 20,
1284 "runtime": 5,
1285 "output-format": "json+",
1286 "slat_percentiles": 1,
1287 "clat_percentiles": 1,
1288 "lat_percentiles": 1,
1289 "ioengine": aio,
1290 'rw': 'randrw',
1291 'unified_rw_reporting': 1,
1292 'cmdprio_percentage': 50,
1293 'numjobs': 4,
1294 "test_obj": Test019,
1295 },
1296 ]
1297
1298 passed = 0
1299 failed = 0
1300 skipped = 0
1301
1302 for test in test_list:
1303 if (args.skip and test['test_id'] in args.skip) or \
1304 (args.run_only and test['test_id'] not in args.run_only):
1305 skipped = skipped + 1
1306 outcome = 'SKIPPED (User request)'
c4148940 1307 elif (platform.system() != 'Linux' or os.geteuid() != 0) and 'cmdprio_percentage' in test:
20e70a59 1308 skipped = skipped + 1
c4148940 1309 outcome = 'SKIPPED (Linux root required for cmdprio_percentage tests)'
20e70a59
VF
1310 else:
1311 test_obj = test['test_obj'](artifact_root, test, args.debug)
1312 status = test_obj.run_fio(fio)
1313 if status:
1314 status = test_obj.check()
1315 if status:
1316 passed = passed + 1
1317 outcome = 'PASSED'
1318 else:
1319 failed = failed + 1
1320 outcome = 'FAILED'
1321
1322 print("**********Test {0} {1}**********".format(test['test_id'], outcome))
1323
1324 print("{0} tests passed, {1} failed, {2} skipped".format(passed, failed, skipped))
1325
1326 sys.exit(failed)
1327
1328
1329if __name__ == '__main__':
1330 main()