stat: convert json output to a new per priority granularity format
[fio.git] / t / latency_percentiles.py
CommitLineData
20e70a59
VF
1#!/usr/bin/env python3
2# SPDX-License-Identifier: GPL-2.0-only
3#
4# Copyright (c) 2020 Western Digital Corporation or its affiliates.
5#
6"""
7# latency_percentiles.py
8#
9# Test the code that produces latency percentiles
10# This is mostly to test the code changes to allow reporting
11# of slat, clat, and lat percentiles
12#
13# USAGE
14# python3 latency-tests.py [-f fio-path] [-a artifact-root] [--debug]
15#
16#
17# Test scenarios:
18#
19# - DONE json
20# unified rw reporting
21# compare with latency log
22# try various combinations of the ?lat_percentile options
23# null, aio
24# r, w, t
25# - DONE json+
26# check presence of latency bins
27# if the json percentiles match those from the raw data
28# then the latency bin values and counts are probably ok
29# - DONE terse
30# produce both terse, JSON output and confirm that they match
31# lat only; both lat and clat
32# - DONE sync_lat
33# confirm that sync_lat data appears
34# - MANUAL TESTING normal output:
35# null ioengine
36# enable all, but only clat and lat appear
37# enable subset of latency types
38# read, write, trim, unified
39# libaio ioengine
40# enable all latency types
41# enable subset of latency types
42# read, write, trim, unified
43# ./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
44# --ioengine=null --slat_percentiles=1 --clat_percentiles=1 --lat_percentiles=1
45# echo confirm that clat and lat percentiles appear
46# ./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
47# --ioengine=null --slat_percentiles=0 --clat_percentiles=0 --lat_percentiles=1
48# echo confirm that only lat percentiles appear
49# ./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
50# --ioengine=null --slat_percentiles=0 --clat_percentiles=1 --lat_percentiles=0
51# echo confirm that only clat percentiles appear
52# ./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
53# --ioengine=libaio --slat_percentiles=1 --clat_percentiles=1 --lat_percentiles=1
54# echo confirm that slat, clat, lat percentiles appear
55# ./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
56# --ioengine=libaio --slat_percentiles=0 --clat_percentiles=1 --lat_percentiles=1
57# echo confirm that clat and lat percentiles appear
58# ./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
59# --ioengine=libaio -rw=randrw
60# echo confirm that clat percentiles appear for reads and writes
61# ./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
62# --ioengine=libaio --slat_percentiles=1 --clat_percentiles=0 --lat_percentiles=0 --rw=randrw
63# echo confirm that slat percentiles appear for both reads and writes
64# ./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
65# --ioengine=libaio --slat_percentiles=1 --clat_percentiles=1 --lat_percentiles=1 \
66# --rw=randrw --unified_rw_reporting=1
67# echo confirm that slat, clat, and lat percentiles appear for 'mixed' IOs
68#./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
69# --ioengine=null --slat_percentiles=1 --clat_percentiles=1 --lat_percentiles=1 \
70# --rw=randrw --fsync=32
71# echo confirm that fsync latencies appear
72"""
73
74import os
75import csv
76import sys
77import json
78import math
79import time
80import argparse
81import platform
82import subprocess
1cbbba65 83from collections import Counter
20e70a59
VF
84from pathlib import Path
85
86
87class FioLatTest():
88 """fio latency percentile test."""
89
90 def __init__(self, artifact_root, test_options, debug):
91 """
92 artifact_root root directory for artifacts (subdirectory will be created under here)
93 test test specification
94 """
95 self.artifact_root = artifact_root
96 self.test_options = test_options
97 self.debug = debug
98 self.filename = None
99 self.json_data = None
100 self.terse_data = None
101
102 self.test_dir = os.path.join(self.artifact_root,
103 "{:03d}".format(self.test_options['test_id']))
104 if not os.path.exists(self.test_dir):
105 os.mkdir(self.test_dir)
106
107 self.filename = "latency{:03d}".format(self.test_options['test_id'])
108
109 def run_fio(self, fio_path):
110 """Run a test."""
111
112 fio_args = [
771dbb52 113 "--max-jobs=16",
20e70a59
VF
114 "--name=latency",
115 "--randrepeat=0",
116 "--norandommap",
117 "--time_based",
118 "--size=16M",
119 "--rwmixread=50",
120 "--group_reporting=1",
121 "--write_lat_log={0}".format(self.filename),
122 "--output={0}.out".format(self.filename),
123 "--ioengine={ioengine}".format(**self.test_options),
124 "--rw={rw}".format(**self.test_options),
125 "--runtime={runtime}".format(**self.test_options),
126 "--output-format={output-format}".format(**self.test_options),
127 ]
128 for opt in ['slat_percentiles', 'clat_percentiles', 'lat_percentiles',
129 'unified_rw_reporting', 'fsync', 'fdatasync', 'numjobs', 'cmdprio_percentage']:
130 if opt in self.test_options:
131 option = '--{0}={{{0}}}'.format(opt)
132 fio_args.append(option.format(**self.test_options))
133
134 command = [fio_path] + fio_args
135 with open(os.path.join(self.test_dir, "{0}.command".format(self.filename)), "w+") as \
136 command_file:
137 command_file.write("%s\n" % command)
138
139 passed = True
140 stdout_file = open(os.path.join(self.test_dir, "{0}.stdout".format(self.filename)), "w+")
141 stderr_file = open(os.path.join(self.test_dir, "{0}.stderr".format(self.filename)), "w+")
142 exitcode_file = open(os.path.join(self.test_dir,
143 "{0}.exitcode".format(self.filename)), "w+")
144 try:
145 proc = None
146 # Avoid using subprocess.run() here because when a timeout occurs,
147 # fio will be stopped with SIGKILL. This does not give fio a
148 # chance to clean up and means that child processes may continue
149 # running and submitting IO.
150 proc = subprocess.Popen(command,
151 stdout=stdout_file,
152 stderr=stderr_file,
153 cwd=self.test_dir,
154 universal_newlines=True)
155 proc.communicate(timeout=300)
156 exitcode_file.write('{0}\n'.format(proc.returncode))
157 passed &= (proc.returncode == 0)
158 except subprocess.TimeoutExpired:
159 proc.terminate()
160 proc.communicate()
161 assert proc.poll()
162 print("Timeout expired")
163 passed = False
164 except Exception:
165 if proc:
166 if not proc.poll():
167 proc.terminate()
168 proc.communicate()
169 print("Exception: %s" % sys.exc_info())
170 passed = False
171 finally:
172 stdout_file.close()
173 stderr_file.close()
174 exitcode_file.close()
175
176 if passed:
177 if 'json' in self.test_options['output-format']:
178 if not self.get_json():
179 print('Unable to decode JSON data')
180 passed = False
181 if 'terse' in self.test_options['output-format']:
182 if not self.get_terse():
183 print('Unable to decode terse data')
184 passed = False
185
186 return passed
187
188 def get_json(self):
189 """Convert fio JSON output into a python JSON object"""
190
191 filename = os.path.join(self.test_dir, "{0}.out".format(self.filename))
192 with open(filename, 'r') as file:
193 file_data = file.read()
194
195 #
196 # Sometimes fio informational messages are included at the top of the
197 # JSON output, especially under Windows. Try to decode output as JSON
198 # data, lopping off up to the first four lines
199 #
200 lines = file_data.splitlines()
201 for i in range(5):
202 file_data = '\n'.join(lines[i:])
203 try:
204 self.json_data = json.loads(file_data)
205 except json.JSONDecodeError:
206 continue
207 else:
208 return True
209
210 return False
211
212 def get_terse(self):
213 """Read fio output and return terse format data."""
214
215 filename = os.path.join(self.test_dir, "{0}.out".format(self.filename))
216 with open(filename, 'r') as file:
217 file_data = file.read()
218
219 #
84f9318f 220 # Read the first few lines and see if any of them begin with '3;'
20e70a59
VF
221 # If so, the line is probably terse output. Obviously, this only
222 # works for fio terse version 3 and it does not work for
223 # multi-line terse output
224 #
225 lines = file_data.splitlines()
226 for i in range(8):
227 file_data = lines[i]
84f9318f 228 if file_data.startswith('3;'):
20e70a59
VF
229 self.terse_data = file_data.split(';')
230 return True
231
232 return False
233
234 def check_latencies(self, jsondata, ddir, slat=True, clat=True, tlat=True, plus=False,
235 unified=False):
236 """Check fio latency data.
237
238 ddir data direction to check (0=read, 1=write, 2=trim)
239 slat True if submission latency data available to check
240 clat True if completion latency data available to check
241 tlat True of total latency data available to check
242 plus True if we actually have json+ format data where additional checks can
243 be carried out
244 unified True if fio is reporting unified r/w data
245 """
246
247 types = {
248 'slat': slat,
249 'clat': clat,
250 'lat': tlat
251 }
252
253 retval = True
254
255 for lat in ['slat', 'clat', 'lat']:
256 this_iter = True
257 if not types[lat]:
258 if 'percentile' in jsondata[lat+'_ns']:
259 this_iter = False
260 print('unexpected %s percentiles found' % lat)
261 else:
262 print("%s percentiles skipped" % lat)
263 continue
264 else:
265 if 'percentile' not in jsondata[lat+'_ns']:
266 this_iter = False
267 print('%s percentiles not found in fio output' % lat)
268
269 #
270 # Check only for the presence/absence of json+
271 # latency bins. Future work can check the
272 # accurracy of the bin values and counts.
273 #
274 # Because the latency percentiles are based on
275 # the bins, we can be confident that the bin
276 # values and counts are correct if fio's
277 # latency percentiles match what we compute
278 # from the raw data.
279 #
280 if plus:
281 if 'bins' not in jsondata[lat+'_ns']:
282 print('bins not found with json+ output format')
283 this_iter = False
284 else:
285 if not self.check_jsonplus(jsondata[lat+'_ns']):
286 this_iter = False
287 else:
288 if 'bins' in jsondata[lat+'_ns']:
289 print('json+ bins found with json output format')
290 this_iter = False
291
292 latencies = []
293 for i in range(10):
294 lat_file = os.path.join(self.test_dir, "%s_%s.%s.log" % (self.filename, lat, i+1))
295 if not os.path.exists(lat_file):
296 break
297 with open(lat_file, 'r', newline='') as file:
298 reader = csv.reader(file)
299 for line in reader:
300 if unified or int(line[2]) == ddir:
301 latencies.append(int(line[1]))
302
303 if int(jsondata['total_ios']) != len(latencies):
304 this_iter = False
305 print('%s: total_ios = %s, latencies logged = %d' % \
306 (lat, jsondata['total_ios'], len(latencies)))
307 elif self.debug:
308 print("total_ios %s match latencies logged" % jsondata['total_ios'])
309
310 latencies.sort()
311 ptiles = jsondata[lat+'_ns']['percentile']
312
313 for percentile in ptiles.keys():
314 #
315 # numpy.percentile(latencies, float(percentile),
316 # interpolation='higher')
317 # produces values that mostly match what fio reports
318 # however, in the tails of the distribution, the values produced
319 # by fio's and numpy.percentile's algorithms are occasionally off
320 # by one latency measurement. So instead of relying on the canned
321 # numpy.percentile routine, implement here fio's algorithm
322 #
323 rank = math.ceil(float(percentile)/100 * len(latencies))
324 if rank > 0:
325 index = rank - 1
326 else:
327 index = 0
328 value = latencies[int(index)]
329 fio_val = int(ptiles[percentile])
330 # The theory in stat.h says that the proportional error will be
331 # less than 1/128
332 if not self.similar(fio_val, value):
333 delta = abs(fio_val - value) / value
334 print("Error with %s %sth percentile: "
335 "fio: %d, expected: %d, proportional delta: %f" %
336 (lat, percentile, fio_val, value, delta))
337 print("Rank: %d, index: %d" % (rank, index))
338 this_iter = False
339 elif self.debug:
340 print('%s %sth percentile values match: %d, %d' %
341 (lat, percentile, fio_val, value))
342
343 if this_iter:
344 print("%s percentiles match" % lat)
345 else:
346 retval = False
347
348 return retval
349
350 @staticmethod
351 def check_empty(job):
352 """
353 Make sure JSON data is empty.
354
355 Some data structures should be empty. This function makes sure that they are.
356
357 job JSON object that we need to check for emptiness
358 """
359
360 return job['total_ios'] == 0 and \
361 job['slat_ns']['N'] == 0 and \
362 job['clat_ns']['N'] == 0 and \
363 job['lat_ns']['N'] == 0
364
365 def check_nocmdprio_lat(self, job):
366 """
1cbbba65 367 Make sure no per priority latencies appear.
20e70a59
VF
368
369 job JSON object to check
370 """
371
372 for ddir in ['read', 'write', 'trim']:
373 if ddir in job:
1cbbba65
NC
374 if 'prios' in job[ddir]:
375 print("Unexpected per priority latencies found in %s output" % ddir)
20e70a59
VF
376 return False
377
378 if self.debug:
1cbbba65 379 print("No per priority latencies found")
20e70a59
VF
380
381 return True
382
383 @staticmethod
384 def similar(approximation, actual):
385 """
386 Check whether the approximate values recorded by fio are within the theoretical bound.
387
388 Since it is impractical to store exact latency measurements for each and every IO, fio
389 groups similar latency measurements into variable-sized bins. The theory in stat.h says
390 that the proportional error will be less than 1/128. This function checks whether this
391 is true.
392
393 TODO This test will fail when comparing a value from the largest latency bin against its
394 actual measurement. Find some way to detect this and avoid failing.
395
396 approximation value of the bin used by fio to store a given latency
397 actual actual latency value
398 """
9b37832d
VF
399
400 # Avoid a division by zero. The smallest latency values have no error.
401 if actual == 0:
402 return approximation == 0
403
20e70a59
VF
404 delta = abs(approximation - actual) / actual
405 return delta <= 1/128
406
407 def check_jsonplus(self, jsondata):
408 """Check consistency of json+ data
409
410 When we have json+ data we can check the min value, max value, and
411 sample size reported by fio
412
413 jsondata json+ data that we need to check
414 """
415
416 retval = True
417
418 keys = [int(k) for k in jsondata['bins'].keys()]
419 values = [int(jsondata['bins'][k]) for k in jsondata['bins'].keys()]
420 smallest = min(keys)
421 biggest = max(keys)
422 sampsize = sum(values)
423
424 if not self.similar(jsondata['min'], smallest):
425 retval = False
426 print('reported min %d does not match json+ min %d' % (jsondata['min'], smallest))
427 elif self.debug:
428 print('json+ min values match: %d' % jsondata['min'])
429
430 if not self.similar(jsondata['max'], biggest):
431 retval = False
432 print('reported max %d does not match json+ max %d' % (jsondata['max'], biggest))
433 elif self.debug:
434 print('json+ max values match: %d' % jsondata['max'])
435
436 if sampsize != jsondata['N']:
437 retval = False
438 print('reported sample size %d does not match json+ total count %d' % \
439 (jsondata['N'], sampsize))
440 elif self.debug:
441 print('json+ sample sizes match: %d' % sampsize)
442
443 return retval
444
445 def check_sync_lat(self, jsondata, plus=False):
446 """Check fsync latency percentile data.
447
448 All we can check is that some percentiles are reported, unless we have json+ data.
449 If we actually have json+ data then we can do more checking.
450
451 jsondata JSON data for fsync operations
452 plus True if we actually have json+ data
453 """
454 retval = True
455
456 if 'percentile' not in jsondata['lat_ns']:
457 print("Sync percentile data not found")
458 return False
459
460 if int(jsondata['total_ios']) != int(jsondata['lat_ns']['N']):
461 retval = False
462 print('Mismatch between total_ios and lat_ns sample size')
463 elif self.debug:
464 print('sync sample sizes match: %d' % jsondata['total_ios'])
465
466 if not plus:
467 if 'bins' in jsondata['lat_ns']:
468 print('Unexpected json+ bin data found')
469 return False
470
471 if not self.check_jsonplus(jsondata['lat_ns']):
472 retval = False
473
474 return retval
475
476 def check_terse(self, terse, jsondata):
477 """Compare terse latencies with JSON latencies.
478
479 terse terse format data for checking
480 jsondata JSON format data for checking
481 """
482
483 retval = True
484
485 for lat in terse:
486 split = lat.split('%')
487 pct = split[0]
488 terse_val = int(split[1][1:])
489 json_val = math.floor(jsondata[pct]/1000)
490 if terse_val != json_val:
491 retval = False
492 print('Mismatch with %sth percentile: json value=%d,%d terse value=%d' % \
493 (pct, jsondata[pct], json_val, terse_val))
494 elif self.debug:
495 print('Terse %sth percentile matches JSON value: %d' % (pct, terse_val))
496
497 return retval
498
499 def check_prio_latencies(self, jsondata, clat=True, plus=False):
1cbbba65 500 """Check consistency of per priority latencies.
20e70a59
VF
501
502 clat True if we should check clat data; other check lat data
503 plus True if we have json+ format data where additional checks can
504 be carried out
505 unified True if fio is reporting unified r/w data
506 """
507
508 if clat:
1cbbba65 509 obj = combined = 'clat_ns'
20e70a59 510 else:
1cbbba65 511 obj = combined = 'lat_ns'
20e70a59 512
1cbbba65
NC
513 if not 'prios' in jsondata or not combined in jsondata:
514 print("Error identifying per priority latencies")
20e70a59
VF
515 return False
516
1cbbba65
NC
517 sum_sample_size = sum([x[obj]['N'] for x in jsondata['prios']])
518 if sum_sample_size != jsondata[combined]['N']:
519 print("Per prio sample size sum %d != combined sample size %d" %
520 (sum_sample_size, jsondata[combined]['N']))
20e70a59
VF
521 return False
522 elif self.debug:
1cbbba65
NC
523 print("Per prio sample size sum %d == combined sample size %d" %
524 (sum_sample_size, jsondata[combined]['N']))
20e70a59 525
1cbbba65
NC
526 min_val = min([x[obj]['min'] for x in jsondata['prios']])
527 if min_val != jsondata[combined]['min']:
528 print("Min per prio min latency %d does not match min %d from combined data" %
529 (min_val, jsondata[combined]['min']))
20e70a59
VF
530 return False
531 elif self.debug:
1cbbba65
NC
532 print("Min per prio min latency %d matches min %d from combined data" %
533 (min_val, jsondata[combined]['min']))
20e70a59 534
1cbbba65
NC
535 max_val = max([x[obj]['max'] for x in jsondata['prios']])
536 if max_val != jsondata[combined]['max']:
537 print("Max per prio max latency %d does not match max %d from combined data" %
538 (max_val, jsondata[combined]['max']))
20e70a59
VF
539 return False
540 elif self.debug:
1cbbba65
NC
541 print("Max per prio max latency %d matches max %d from combined data" %
542 (max_val, jsondata[combined]['max']))
20e70a59 543
1cbbba65
NC
544 weighted_vals = [x[obj]['mean'] * x[obj]['N'] for x in jsondata['prios']]
545 weighted_avg = sum(weighted_vals) / jsondata[combined]['N']
20e70a59
VF
546 delta = abs(weighted_avg - jsondata[combined]['mean'])
547 if (delta / jsondata[combined]['mean']) > 0.0001:
1cbbba65 548 print("Difference between merged per prio weighted average %f mean "
20e70a59
VF
549 "and actual mean %f exceeds 0.01%%" % (weighted_avg, jsondata[combined]['mean']))
550 return False
551 elif self.debug:
1cbbba65
NC
552 print("Merged per prio weighted average %f mean matches actual mean %f" %
553 (weighted_avg, jsondata[combined]['mean']))
20e70a59
VF
554
555 if plus:
1cbbba65
NC
556 for prio in jsondata['prios']:
557 if not self.check_jsonplus(prio[obj]):
558 return False
20e70a59 559
1cbbba65
NC
560 counter = Counter()
561 for prio in jsondata['prios']:
562 counter.update(prio[obj]['bins'])
563
564 bins = dict(counter)
20e70a59
VF
565
566 if len(bins) != len(jsondata[combined]['bins']):
1cbbba65
NC
567 print("Number of merged bins %d does not match number of overall bins %d" %
568 (len(bins), len(jsondata[combined]['bins'])))
20e70a59
VF
569 return False
570 elif self.debug:
1cbbba65
NC
571 print("Number of merged bins %d matches number of overall bins %d" %
572 (len(bins), len(jsondata[combined]['bins'])))
20e70a59
VF
573
574 for duration in bins.keys():
575 if bins[duration] != jsondata[combined]['bins'][duration]:
1cbbba65
NC
576 print("Merged per prio count does not match overall count for duration %d" %
577 duration)
20e70a59
VF
578 return False
579
1cbbba65 580 print("Merged per priority latency data match combined latency data")
20e70a59
VF
581 return True
582
583 def check(self):
584 """Check test output."""
585
586 raise NotImplementedError()
587
588
589class Test001(FioLatTest):
590 """Test object for Test 1."""
591
592 def check(self):
593 """Check Test 1 output."""
594
595 job = self.json_data['jobs'][0]
596
597 retval = True
598 if not self.check_empty(job['write']):
599 print("Unexpected write data found in output")
600 retval = False
601 if not self.check_empty(job['trim']):
602 print("Unexpected trim data found in output")
603 retval = False
604 if not self.check_nocmdprio_lat(job):
1cbbba65 605 print("Unexpected per priority latencies found")
20e70a59
VF
606 retval = False
607
608 retval &= self.check_latencies(job['read'], 0, slat=False)
609
610 return retval
611
612
613class Test002(FioLatTest):
614 """Test object for Test 2."""
615
616 def check(self):
617 """Check Test 2 output."""
618
619 job = self.json_data['jobs'][0]
620
621 retval = True
622 if not self.check_empty(job['read']):
623 print("Unexpected read data found in output")
624 retval = False
625 if not self.check_empty(job['trim']):
626 print("Unexpected trim data found in output")
627 retval = False
628 if not self.check_nocmdprio_lat(job):
1cbbba65 629 print("Unexpected per priority latencies found")
20e70a59
VF
630 retval = False
631
632 retval &= self.check_latencies(job['write'], 1, slat=False, clat=False)
633
634 return retval
635
636
637class Test003(FioLatTest):
638 """Test object for Test 3."""
639
640 def check(self):
641 """Check Test 3 output."""
642
643 job = self.json_data['jobs'][0]
644
645 retval = True
646 if not self.check_empty(job['read']):
647 print("Unexpected read data found in output")
648 retval = False
649 if not self.check_empty(job['write']):
650 print("Unexpected write data found in output")
651 retval = False
652 if not self.check_nocmdprio_lat(job):
1cbbba65 653 print("Unexpected per priority latencies found")
20e70a59
VF
654 retval = False
655
656 retval &= self.check_latencies(job['trim'], 2, slat=False, tlat=False)
657
658 return retval
659
660
661class Test004(FioLatTest):
662 """Test object for Tests 4, 13."""
663
664 def check(self):
665 """Check Test 4, 13 output."""
666
667 job = self.json_data['jobs'][0]
668
669 retval = True
670 if not self.check_empty(job['write']):
671 print("Unexpected write data found in output")
672 retval = False
673 if not self.check_empty(job['trim']):
674 print("Unexpected trim data found in output")
675 retval = False
676 if not self.check_nocmdprio_lat(job):
1cbbba65 677 print("Unexpected per priority latencies found")
20e70a59
VF
678 retval = False
679
680 retval &= self.check_latencies(job['read'], 0, plus=True)
681
682 return retval
683
684
685class Test005(FioLatTest):
686 """Test object for Test 5."""
687
688 def check(self):
689 """Check Test 5 output."""
690
691 job = self.json_data['jobs'][0]
692
693 retval = True
694 if not self.check_empty(job['read']):
695 print("Unexpected read data found in output")
696 retval = False
697 if not self.check_empty(job['trim']):
698 print("Unexpected trim data found in output")
699 retval = False
700 if not self.check_nocmdprio_lat(job):
1cbbba65 701 print("Unexpected per priority latencies found")
20e70a59
VF
702 retval = False
703
704 retval &= self.check_latencies(job['write'], 1, slat=False, plus=True)
705
706 return retval
707
708
709class Test006(FioLatTest):
710 """Test object for Test 6."""
711
712 def check(self):
713 """Check Test 6 output."""
714
715 job = self.json_data['jobs'][0]
716
717 retval = True
718 if not self.check_empty(job['write']):
719 print("Unexpected write data found in output")
720 retval = False
721 if not self.check_empty(job['trim']):
722 print("Unexpected trim data found in output")
723 retval = False
724 if not self.check_nocmdprio_lat(job):
1cbbba65 725 print("Unexpected per priority latencies found")
20e70a59
VF
726 retval = False
727
728 retval &= self.check_latencies(job['read'], 0, slat=False, tlat=False, plus=True)
729
730 return retval
731
732
733class Test007(FioLatTest):
734 """Test object for Test 7."""
735
736 def check(self):
737 """Check Test 7 output."""
738
739 job = self.json_data['jobs'][0]
740
741 retval = True
742 if not self.check_empty(job['trim']):
743 print("Unexpected trim data found in output")
744 retval = False
745 if not self.check_nocmdprio_lat(job):
1cbbba65 746 print("Unexpected per priority latencies found")
20e70a59
VF
747 retval = False
748
749 retval &= self.check_latencies(job['read'], 0, clat=False, tlat=False, plus=True)
750 retval &= self.check_latencies(job['write'], 1, clat=False, tlat=False, plus=True)
751
752 return retval
753
754
755class Test008(FioLatTest):
756 """Test object for Tests 8, 14."""
757
758 def check(self):
759 """Check Test 8, 14 output."""
760
761 job = self.json_data['jobs'][0]
762
763 retval = True
764 if 'read' in job or 'write'in job or 'trim' in job:
765 print("Unexpected data direction found in fio output")
766 retval = False
767 if not self.check_nocmdprio_lat(job):
1cbbba65 768 print("Unexpected per priority latencies found")
20e70a59
VF
769 retval = False
770
771 retval &= self.check_latencies(job['mixed'], 0, plus=True, unified=True)
772
773 return retval
774
775
776class Test009(FioLatTest):
777 """Test object for Test 9."""
778
779 def check(self):
780 """Check Test 9 output."""
781
782 job = self.json_data['jobs'][0]
783
784 retval = True
785 if not self.check_empty(job['read']):
786 print("Unexpected read data found in output")
787 retval = False
788 if not self.check_empty(job['trim']):
789 print("Unexpected trim data found in output")
790 retval = False
791 if not self.check_sync_lat(job['sync'], plus=True):
792 print("Error checking fsync latency data")
793 retval = False
794 if not self.check_nocmdprio_lat(job):
1cbbba65 795 print("Unexpected per priority latencies found")
20e70a59
VF
796 retval = False
797
798 retval &= self.check_latencies(job['write'], 1, slat=False, plus=True)
799
800 return retval
801
802
803class Test010(FioLatTest):
804 """Test object for Test 10."""
805
806 def check(self):
807 """Check Test 10 output."""
808
809 job = self.json_data['jobs'][0]
810
811 retval = True
812 if not self.check_empty(job['trim']):
813 print("Unexpected trim data found in output")
814 retval = False
815 if not self.check_nocmdprio_lat(job):
1cbbba65 816 print("Unexpected per priority latencies found")
20e70a59
VF
817 retval = False
818
819 retval &= self.check_latencies(job['read'], 0, plus=True)
820 retval &= self.check_latencies(job['write'], 1, plus=True)
821 retval &= self.check_terse(self.terse_data[17:34], job['read']['lat_ns']['percentile'])
822 retval &= self.check_terse(self.terse_data[58:75], job['write']['lat_ns']['percentile'])
823 # Terse data checking only works for default percentiles.
824 # This needs to be changed if something other than the default is ever used.
825
826 return retval
827
828
829class Test011(FioLatTest):
830 """Test object for Test 11."""
831
832 def check(self):
833 """Check Test 11 output."""
834
835 job = self.json_data['jobs'][0]
836
837 retval = True
838 if not self.check_empty(job['trim']):
839 print("Unexpected trim data found in output")
840 retval = False
841 if not self.check_nocmdprio_lat(job):
1cbbba65 842 print("Unexpected per priority latencies found")
20e70a59
VF
843 retval = False
844
845 retval &= self.check_latencies(job['read'], 0, slat=False, clat=False, plus=True)
846 retval &= self.check_latencies(job['write'], 1, slat=False, clat=False, plus=True)
847 retval &= self.check_terse(self.terse_data[17:34], job['read']['lat_ns']['percentile'])
848 retval &= self.check_terse(self.terse_data[58:75], job['write']['lat_ns']['percentile'])
849 # Terse data checking only works for default percentiles.
850 # This needs to be changed if something other than the default is ever used.
851
852 return retval
853
854
855class Test015(FioLatTest):
856 """Test object for Test 15."""
857
858 def check(self):
859 """Check Test 15 output."""
860
861 job = self.json_data['jobs'][0]
862
863 retval = True
864 if not self.check_empty(job['write']):
865 print("Unexpected write data found in output")
866 retval = False
867 if not self.check_empty(job['trim']):
868 print("Unexpected trim data found in output")
869 retval = False
870
871 retval &= self.check_latencies(job['read'], 0, plus=True)
872 retval &= self.check_prio_latencies(job['read'], clat=False, plus=True)
873
874 return retval
875
876
877class Test016(FioLatTest):
878 """Test object for Test 16."""
879
880 def check(self):
881 """Check Test 16 output."""
882
883 job = self.json_data['jobs'][0]
884
885 retval = True
886 if not self.check_empty(job['read']):
887 print("Unexpected read data found in output")
888 retval = False
889 if not self.check_empty(job['trim']):
890 print("Unexpected trim data found in output")
891 retval = False
892
893 retval &= self.check_latencies(job['write'], 1, slat=False, plus=True)
894 retval &= self.check_prio_latencies(job['write'], clat=False, plus=True)
895
896 return retval
897
898
899class Test017(FioLatTest):
900 """Test object for Test 17."""
901
902 def check(self):
903 """Check Test 17 output."""
904
905 job = self.json_data['jobs'][0]
906
907 retval = True
908 if not self.check_empty(job['write']):
909 print("Unexpected write data found in output")
910 retval = False
911 if not self.check_empty(job['trim']):
912 print("Unexpected trim data found in output")
913 retval = False
914
915 retval &= self.check_latencies(job['read'], 0, slat=False, tlat=False, plus=True)
916 retval &= self.check_prio_latencies(job['read'], plus=True)
917
918 return retval
919
920
921class Test018(FioLatTest):
922 """Test object for Test 18."""
923
924 def check(self):
925 """Check Test 18 output."""
926
927 job = self.json_data['jobs'][0]
928
929 retval = True
930 if not self.check_empty(job['trim']):
931 print("Unexpected trim data found in output")
932 retval = False
933
934 retval &= self.check_latencies(job['read'], 0, clat=False, tlat=False, plus=True)
935 retval &= self.check_latencies(job['write'], 1, clat=False, tlat=False, plus=True)
936
937 # We actually have json+ data but setting plus=False below avoids checking the
938 # json+ bins which did not exist for clat and lat because this job is run with
939 # clat_percentiles=0, lat_percentiles=0, However, we can still check the summary
940 # statistics
941 retval &= self.check_prio_latencies(job['write'], plus=False)
942 retval &= self.check_prio_latencies(job['read'], plus=False)
943
944 return retval
945
946
947class Test019(FioLatTest):
948 """Test object for Tests 19, 20."""
949
950 def check(self):
951 """Check Test 19, 20 output."""
952
953 job = self.json_data['jobs'][0]
954
955 retval = True
956 if 'read' in job or 'write'in job or 'trim' in job:
957 print("Unexpected data direction found in fio output")
958 retval = False
959
960 retval &= self.check_latencies(job['mixed'], 0, plus=True, unified=True)
961 retval &= self.check_prio_latencies(job['mixed'], clat=False, plus=True)
962
963 return retval
964
965
966def parse_args():
967 """Parse command-line arguments."""
968
969 parser = argparse.ArgumentParser()
970 parser.add_argument('-f', '--fio', help='path to file executable (e.g., ./fio)')
971 parser.add_argument('-a', '--artifact-root', help='artifact root directory')
972 parser.add_argument('-d', '--debug', help='enable debug output', action='store_true')
973 parser.add_argument('-s', '--skip', nargs='+', type=int,
974 help='list of test(s) to skip')
975 parser.add_argument('-o', '--run-only', nargs='+', type=int,
976 help='list of test(s) to run, skipping all others')
977 args = parser.parse_args()
978
979 return args
980
981
982def main():
983 """Run tests of fio latency percentile reporting"""
984
985 args = parse_args()
986
987 artifact_root = args.artifact_root if args.artifact_root else \
988 "latency-test-{0}".format(time.strftime("%Y%m%d-%H%M%S"))
989 os.mkdir(artifact_root)
990 print("Artifact directory is %s" % artifact_root)
991
992 if args.fio:
993 fio = str(Path(args.fio).absolute())
994 else:
995 fio = 'fio'
996 print("fio path is %s" % fio)
997
998 if platform.system() == 'Linux':
999 aio = 'libaio'
1000 elif platform.system() == 'Windows':
1001 aio = 'windowsaio'
1002 else:
1003 aio = 'posixaio'
1004
1005 test_list = [
1006 {
1007 # randread, null
1008 # enable slat, clat, lat
1009 # only clat and lat will appear because
1010 # because the null ioengine is syncrhonous
1011 "test_id": 1,
1012 "runtime": 2,
1013 "output-format": "json",
1014 "slat_percentiles": 1,
1015 "clat_percentiles": 1,
1016 "lat_percentiles": 1,
1017 "ioengine": 'null',
1018 'rw': 'randread',
1019 "test_obj": Test001,
1020 },
1021 {
1022 # randwrite, null
1023 # enable lat only
1024 "test_id": 2,
1025 "runtime": 2,
1026 "output-format": "json",
1027 "slat_percentiles": 0,
1028 "clat_percentiles": 0,
1029 "lat_percentiles": 1,
1030 "ioengine": 'null',
1031 'rw': 'randwrite',
1032 "test_obj": Test002,
1033 },
1034 {
1035 # randtrim, null
1036 # enable clat only
1037 "test_id": 3,
1038 "runtime": 2,
1039 "output-format": "json",
1040 "slat_percentiles": 0,
1041 "clat_percentiles": 1,
1042 "lat_percentiles": 0,
1043 "ioengine": 'null',
1044 'rw': 'randtrim',
1045 "test_obj": Test003,
1046 },
1047 {
1048 # randread, aio
1049 # enable slat, clat, lat
1050 # all will appear because liaio is asynchronous
1051 "test_id": 4,
1052 "runtime": 5,
1053 "output-format": "json+",
1054 "slat_percentiles": 1,
1055 "clat_percentiles": 1,
1056 "lat_percentiles": 1,
1057 "ioengine": aio,
1058 'rw': 'randread',
1059 "test_obj": Test004,
1060 },
1061 {
1062 # randwrite, aio
1063 # enable only clat, lat
1064 "test_id": 5,
1065 "runtime": 5,
1066 "output-format": "json+",
1067 "slat_percentiles": 0,
1068 "clat_percentiles": 1,
1069 "lat_percentiles": 1,
1070 "ioengine": aio,
1071 'rw': 'randwrite',
1072 "test_obj": Test005,
1073 },
1074 {
1075 # randread, aio
1076 # by default only clat should appear
1077 "test_id": 6,
1078 "runtime": 5,
1079 "output-format": "json+",
1080 "ioengine": aio,
1081 'rw': 'randread',
1082 "test_obj": Test006,
1083 },
1084 {
1085 # 50/50 r/w, aio
1086 # enable only slat
1087 "test_id": 7,
1088 "runtime": 5,
1089 "output-format": "json+",
1090 "slat_percentiles": 1,
1091 "clat_percentiles": 0,
1092 "lat_percentiles": 0,
1093 "ioengine": aio,
1094 'rw': 'randrw',
1095 "test_obj": Test007,
1096 },
1097 {
1098 # 50/50 r/w, aio, unified_rw_reporting
1099 # enable slat, clat, lat
1100 "test_id": 8,
1101 "runtime": 5,
1102 "output-format": "json+",
1103 "slat_percentiles": 1,
1104 "clat_percentiles": 1,
1105 "lat_percentiles": 1,
1106 "ioengine": aio,
1107 'rw': 'randrw',
1108 'unified_rw_reporting': 1,
1109 "test_obj": Test008,
1110 },
1111 {
1112 # randwrite, null
1113 # enable slat, clat, lat
1114 # fsync
1115 "test_id": 9,
1116 "runtime": 2,
1117 "output-format": "json+",
1118 "slat_percentiles": 1,
1119 "clat_percentiles": 1,
1120 "lat_percentiles": 1,
1121 "ioengine": 'null',
1122 'rw': 'randwrite',
1123 'fsync': 32,
1124 "test_obj": Test009,
1125 },
1126 {
1127 # 50/50 r/w, aio
1128 # enable slat, clat, lat
1129 "test_id": 10,
1130 "runtime": 5,
1131 "output-format": "terse,json+",
1132 "slat_percentiles": 1,
1133 "clat_percentiles": 1,
1134 "lat_percentiles": 1,
1135 "ioengine": aio,
1136 'rw': 'randrw',
1137 "test_obj": Test010,
1138 },
1139 {
1140 # 50/50 r/w, aio
1141 # enable only lat
1142 "test_id": 11,
1143 "runtime": 5,
1144 "output-format": "terse,json+",
1145 "slat_percentiles": 0,
1146 "clat_percentiles": 0,
1147 "lat_percentiles": 1,
1148 "ioengine": aio,
1149 'rw': 'randrw',
1150 "test_obj": Test011,
1151 },
1152 {
1153 # randread, null
1154 # enable slat, clat, lat
1155 # only clat and lat will appear because
1156 # because the null ioengine is syncrhonous
1157 # same as Test 1 except
1158 # numjobs = 4 to test sum_thread_stats() changes
1159 "test_id": 12,
1160 "runtime": 2,
1161 "output-format": "json",
1162 "slat_percentiles": 1,
1163 "clat_percentiles": 1,
1164 "lat_percentiles": 1,
1165 "ioengine": 'null',
1166 'rw': 'randread',
1167 'numjobs': 4,
1168 "test_obj": Test001,
1169 },
1170 {
1171 # randread, aio
1172 # enable slat, clat, lat
1173 # all will appear because liaio is asynchronous
1174 # same as Test 4 except
1175 # numjobs = 4 to test sum_thread_stats() changes
1176 "test_id": 13,
1177 "runtime": 5,
1178 "output-format": "json+",
1179 "slat_percentiles": 1,
1180 "clat_percentiles": 1,
1181 "lat_percentiles": 1,
1182 "ioengine": aio,
1183 'rw': 'randread',
1184 'numjobs': 4,
1185 "test_obj": Test004,
1186 },
1187 {
1188 # 50/50 r/w, aio, unified_rw_reporting
1189 # enable slat, clat, lata
1190 # same as Test 8 except
1191 # numjobs = 4 to test sum_thread_stats() changes
1192 "test_id": 14,
1193 "runtime": 5,
1194 "output-format": "json+",
1195 "slat_percentiles": 1,
1196 "clat_percentiles": 1,
1197 "lat_percentiles": 1,
1198 "ioengine": aio,
1199 'rw': 'randrw',
1200 'unified_rw_reporting': 1,
1201 'numjobs': 4,
1202 "test_obj": Test008,
1203 },
1204 {
1205 # randread, aio
1206 # enable slat, clat, lat
1207 # all will appear because liaio is asynchronous
1208 # same as Test 4 except add cmdprio_percentage
1209 "test_id": 15,
1210 "runtime": 5,
1211 "output-format": "json+",
1212 "slat_percentiles": 1,
1213 "clat_percentiles": 1,
1214 "lat_percentiles": 1,
1215 "ioengine": aio,
1216 'rw': 'randread',
1217 'cmdprio_percentage': 50,
1218 "test_obj": Test015,
1219 },
1220 {
1221 # randwrite, aio
1222 # enable only clat, lat
1223 # same as Test 5 except add cmdprio_percentage
1224 "test_id": 16,
1225 "runtime": 5,
1226 "output-format": "json+",
1227 "slat_percentiles": 0,
1228 "clat_percentiles": 1,
1229 "lat_percentiles": 1,
1230 "ioengine": aio,
1231 'rw': 'randwrite',
1232 'cmdprio_percentage': 50,
1233 "test_obj": Test016,
1234 },
1235 {
1236 # randread, aio
1237 # by default only clat should appear
1238 # same as Test 6 except add cmdprio_percentage
1239 "test_id": 17,
1240 "runtime": 5,
1241 "output-format": "json+",
1242 "ioengine": aio,
1243 'rw': 'randread',
1244 'cmdprio_percentage': 50,
1245 "test_obj": Test017,
1246 },
1247 {
1248 # 50/50 r/w, aio
1249 # enable only slat
1250 # same as Test 7 except add cmdprio_percentage
1251 "test_id": 18,
1252 "runtime": 5,
1253 "output-format": "json+",
1254 "slat_percentiles": 1,
1255 "clat_percentiles": 0,
1256 "lat_percentiles": 0,
1257 "ioengine": aio,
1258 'rw': 'randrw',
1259 'cmdprio_percentage': 50,
1260 "test_obj": Test018,
1261 },
1262 {
1263 # 50/50 r/w, aio, unified_rw_reporting
1264 # enable slat, clat, lat
1265 # same as Test 8 except add cmdprio_percentage
1266 "test_id": 19,
1267 "runtime": 5,
1268 "output-format": "json+",
1269 "slat_percentiles": 1,
1270 "clat_percentiles": 1,
1271 "lat_percentiles": 1,
1272 "ioengine": aio,
1273 'rw': 'randrw',
1274 'unified_rw_reporting': 1,
1275 'cmdprio_percentage': 50,
1276 "test_obj": Test019,
1277 },
1278 {
1279 # 50/50 r/w, aio, unified_rw_reporting
1280 # enable slat, clat, lat
1281 # same as Test 19 except
1282 # add numjobs = 4 to test sum_thread_stats() changes
1283 "test_id": 20,
1284 "runtime": 5,
1285 "output-format": "json+",
1286 "slat_percentiles": 1,
1287 "clat_percentiles": 1,
1288 "lat_percentiles": 1,
1289 "ioengine": aio,
1290 'rw': 'randrw',
1291 'unified_rw_reporting': 1,
1292 'cmdprio_percentage': 50,
1293 'numjobs': 4,
1294 "test_obj": Test019,
1295 },
1296 ]
1297
1298 passed = 0
1299 failed = 0
1300 skipped = 0
1301
1302 for test in test_list:
1303 if (args.skip and test['test_id'] in args.skip) or \
1304 (args.run_only and test['test_id'] not in args.run_only):
1305 skipped = skipped + 1
1306 outcome = 'SKIPPED (User request)'
c4148940 1307 elif (platform.system() != 'Linux' or os.geteuid() != 0) and 'cmdprio_percentage' in test:
20e70a59 1308 skipped = skipped + 1
c4148940 1309 outcome = 'SKIPPED (Linux root required for cmdprio_percentage tests)'
20e70a59
VF
1310 else:
1311 test_obj = test['test_obj'](artifact_root, test, args.debug)
1312 status = test_obj.run_fio(fio)
1313 if status:
1314 status = test_obj.check()
1315 if status:
1316 passed = passed + 1
1317 outcome = 'PASSED'
1318 else:
1319 failed = failed + 1
1320 outcome = 'FAILED'
1321
1322 print("**********Test {0} {1}**********".format(test['test_id'], outcome))
1323
1324 print("{0} tests passed, {1} failed, {2} skipped".format(passed, failed, skipped))
1325
1326 sys.exit(failed)
1327
1328
1329if __name__ == '__main__':
1330 main()