Unify architecture io_uring syscall numbers
[fio.git] / t / latency_percentiles.py
CommitLineData
20e70a59
VF
1#!/usr/bin/env python3
2# SPDX-License-Identifier: GPL-2.0-only
3#
4# Copyright (c) 2020 Western Digital Corporation or its affiliates.
5#
6"""
7# latency_percentiles.py
8#
9# Test the code that produces latency percentiles
10# This is mostly to test the code changes to allow reporting
11# of slat, clat, and lat percentiles
12#
13# USAGE
14# python3 latency-tests.py [-f fio-path] [-a artifact-root] [--debug]
15#
16#
17# Test scenarios:
18#
19# - DONE json
20# unified rw reporting
21# compare with latency log
22# try various combinations of the ?lat_percentile options
23# null, aio
24# r, w, t
25# - DONE json+
26# check presence of latency bins
27# if the json percentiles match those from the raw data
28# then the latency bin values and counts are probably ok
29# - DONE terse
30# produce both terse, JSON output and confirm that they match
31# lat only; both lat and clat
32# - DONE sync_lat
33# confirm that sync_lat data appears
34# - MANUAL TESTING normal output:
35# null ioengine
36# enable all, but only clat and lat appear
37# enable subset of latency types
38# read, write, trim, unified
39# libaio ioengine
40# enable all latency types
41# enable subset of latency types
42# read, write, trim, unified
43# ./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
44# --ioengine=null --slat_percentiles=1 --clat_percentiles=1 --lat_percentiles=1
45# echo confirm that clat and lat percentiles appear
46# ./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
47# --ioengine=null --slat_percentiles=0 --clat_percentiles=0 --lat_percentiles=1
48# echo confirm that only lat percentiles appear
49# ./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
50# --ioengine=null --slat_percentiles=0 --clat_percentiles=1 --lat_percentiles=0
51# echo confirm that only clat percentiles appear
52# ./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
53# --ioengine=libaio --slat_percentiles=1 --clat_percentiles=1 --lat_percentiles=1
54# echo confirm that slat, clat, lat percentiles appear
55# ./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
56# --ioengine=libaio --slat_percentiles=0 --clat_percentiles=1 --lat_percentiles=1
57# echo confirm that clat and lat percentiles appear
58# ./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
59# --ioengine=libaio -rw=randrw
60# echo confirm that clat percentiles appear for reads and writes
61# ./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
62# --ioengine=libaio --slat_percentiles=1 --clat_percentiles=0 --lat_percentiles=0 --rw=randrw
63# echo confirm that slat percentiles appear for both reads and writes
64# ./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
65# --ioengine=libaio --slat_percentiles=1 --clat_percentiles=1 --lat_percentiles=1 \
66# --rw=randrw --unified_rw_reporting=1
67# echo confirm that slat, clat, and lat percentiles appear for 'mixed' IOs
68#./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
69# --ioengine=null --slat_percentiles=1 --clat_percentiles=1 --lat_percentiles=1 \
70# --rw=randrw --fsync=32
71# echo confirm that fsync latencies appear
72"""
73
74import os
75import csv
76import sys
77import json
78import math
79import time
80import argparse
81import platform
82import subprocess
83from pathlib import Path
84
85
86class FioLatTest():
87 """fio latency percentile test."""
88
89 def __init__(self, artifact_root, test_options, debug):
90 """
91 artifact_root root directory for artifacts (subdirectory will be created under here)
92 test test specification
93 """
94 self.artifact_root = artifact_root
95 self.test_options = test_options
96 self.debug = debug
97 self.filename = None
98 self.json_data = None
99 self.terse_data = None
100
101 self.test_dir = os.path.join(self.artifact_root,
102 "{:03d}".format(self.test_options['test_id']))
103 if not os.path.exists(self.test_dir):
104 os.mkdir(self.test_dir)
105
106 self.filename = "latency{:03d}".format(self.test_options['test_id'])
107
108 def run_fio(self, fio_path):
109 """Run a test."""
110
111 fio_args = [
112 "--name=latency",
113 "--randrepeat=0",
114 "--norandommap",
115 "--time_based",
116 "--size=16M",
117 "--rwmixread=50",
118 "--group_reporting=1",
119 "--write_lat_log={0}".format(self.filename),
120 "--output={0}.out".format(self.filename),
121 "--ioengine={ioengine}".format(**self.test_options),
122 "--rw={rw}".format(**self.test_options),
123 "--runtime={runtime}".format(**self.test_options),
124 "--output-format={output-format}".format(**self.test_options),
125 ]
126 for opt in ['slat_percentiles', 'clat_percentiles', 'lat_percentiles',
127 'unified_rw_reporting', 'fsync', 'fdatasync', 'numjobs', 'cmdprio_percentage']:
128 if opt in self.test_options:
129 option = '--{0}={{{0}}}'.format(opt)
130 fio_args.append(option.format(**self.test_options))
131
132 command = [fio_path] + fio_args
133 with open(os.path.join(self.test_dir, "{0}.command".format(self.filename)), "w+") as \
134 command_file:
135 command_file.write("%s\n" % command)
136
137 passed = True
138 stdout_file = open(os.path.join(self.test_dir, "{0}.stdout".format(self.filename)), "w+")
139 stderr_file = open(os.path.join(self.test_dir, "{0}.stderr".format(self.filename)), "w+")
140 exitcode_file = open(os.path.join(self.test_dir,
141 "{0}.exitcode".format(self.filename)), "w+")
142 try:
143 proc = None
144 # Avoid using subprocess.run() here because when a timeout occurs,
145 # fio will be stopped with SIGKILL. This does not give fio a
146 # chance to clean up and means that child processes may continue
147 # running and submitting IO.
148 proc = subprocess.Popen(command,
149 stdout=stdout_file,
150 stderr=stderr_file,
151 cwd=self.test_dir,
152 universal_newlines=True)
153 proc.communicate(timeout=300)
154 exitcode_file.write('{0}\n'.format(proc.returncode))
155 passed &= (proc.returncode == 0)
156 except subprocess.TimeoutExpired:
157 proc.terminate()
158 proc.communicate()
159 assert proc.poll()
160 print("Timeout expired")
161 passed = False
162 except Exception:
163 if proc:
164 if not proc.poll():
165 proc.terminate()
166 proc.communicate()
167 print("Exception: %s" % sys.exc_info())
168 passed = False
169 finally:
170 stdout_file.close()
171 stderr_file.close()
172 exitcode_file.close()
173
174 if passed:
175 if 'json' in self.test_options['output-format']:
176 if not self.get_json():
177 print('Unable to decode JSON data')
178 passed = False
179 if 'terse' in self.test_options['output-format']:
180 if not self.get_terse():
181 print('Unable to decode terse data')
182 passed = False
183
184 return passed
185
186 def get_json(self):
187 """Convert fio JSON output into a python JSON object"""
188
189 filename = os.path.join(self.test_dir, "{0}.out".format(self.filename))
190 with open(filename, 'r') as file:
191 file_data = file.read()
192
193 #
194 # Sometimes fio informational messages are included at the top of the
195 # JSON output, especially under Windows. Try to decode output as JSON
196 # data, lopping off up to the first four lines
197 #
198 lines = file_data.splitlines()
199 for i in range(5):
200 file_data = '\n'.join(lines[i:])
201 try:
202 self.json_data = json.loads(file_data)
203 except json.JSONDecodeError:
204 continue
205 else:
206 return True
207
208 return False
209
210 def get_terse(self):
211 """Read fio output and return terse format data."""
212
213 filename = os.path.join(self.test_dir, "{0}.out".format(self.filename))
214 with open(filename, 'r') as file:
215 file_data = file.read()
216
217 #
218 # Read the first few lines and see if any of them begin with '3;fio-'
219 # If so, the line is probably terse output. Obviously, this only
220 # works for fio terse version 3 and it does not work for
221 # multi-line terse output
222 #
223 lines = file_data.splitlines()
224 for i in range(8):
225 file_data = lines[i]
226 if file_data.startswith('3;fio-'):
227 self.terse_data = file_data.split(';')
228 return True
229
230 return False
231
232 def check_latencies(self, jsondata, ddir, slat=True, clat=True, tlat=True, plus=False,
233 unified=False):
234 """Check fio latency data.
235
236 ddir data direction to check (0=read, 1=write, 2=trim)
237 slat True if submission latency data available to check
238 clat True if completion latency data available to check
239 tlat True of total latency data available to check
240 plus True if we actually have json+ format data where additional checks can
241 be carried out
242 unified True if fio is reporting unified r/w data
243 """
244
245 types = {
246 'slat': slat,
247 'clat': clat,
248 'lat': tlat
249 }
250
251 retval = True
252
253 for lat in ['slat', 'clat', 'lat']:
254 this_iter = True
255 if not types[lat]:
256 if 'percentile' in jsondata[lat+'_ns']:
257 this_iter = False
258 print('unexpected %s percentiles found' % lat)
259 else:
260 print("%s percentiles skipped" % lat)
261 continue
262 else:
263 if 'percentile' not in jsondata[lat+'_ns']:
264 this_iter = False
265 print('%s percentiles not found in fio output' % lat)
266
267 #
268 # Check only for the presence/absence of json+
269 # latency bins. Future work can check the
270 # accurracy of the bin values and counts.
271 #
272 # Because the latency percentiles are based on
273 # the bins, we can be confident that the bin
274 # values and counts are correct if fio's
275 # latency percentiles match what we compute
276 # from the raw data.
277 #
278 if plus:
279 if 'bins' not in jsondata[lat+'_ns']:
280 print('bins not found with json+ output format')
281 this_iter = False
282 else:
283 if not self.check_jsonplus(jsondata[lat+'_ns']):
284 this_iter = False
285 else:
286 if 'bins' in jsondata[lat+'_ns']:
287 print('json+ bins found with json output format')
288 this_iter = False
289
290 latencies = []
291 for i in range(10):
292 lat_file = os.path.join(self.test_dir, "%s_%s.%s.log" % (self.filename, lat, i+1))
293 if not os.path.exists(lat_file):
294 break
295 with open(lat_file, 'r', newline='') as file:
296 reader = csv.reader(file)
297 for line in reader:
298 if unified or int(line[2]) == ddir:
299 latencies.append(int(line[1]))
300
301 if int(jsondata['total_ios']) != len(latencies):
302 this_iter = False
303 print('%s: total_ios = %s, latencies logged = %d' % \
304 (lat, jsondata['total_ios'], len(latencies)))
305 elif self.debug:
306 print("total_ios %s match latencies logged" % jsondata['total_ios'])
307
308 latencies.sort()
309 ptiles = jsondata[lat+'_ns']['percentile']
310
311 for percentile in ptiles.keys():
312 #
313 # numpy.percentile(latencies, float(percentile),
314 # interpolation='higher')
315 # produces values that mostly match what fio reports
316 # however, in the tails of the distribution, the values produced
317 # by fio's and numpy.percentile's algorithms are occasionally off
318 # by one latency measurement. So instead of relying on the canned
319 # numpy.percentile routine, implement here fio's algorithm
320 #
321 rank = math.ceil(float(percentile)/100 * len(latencies))
322 if rank > 0:
323 index = rank - 1
324 else:
325 index = 0
326 value = latencies[int(index)]
327 fio_val = int(ptiles[percentile])
328 # The theory in stat.h says that the proportional error will be
329 # less than 1/128
330 if not self.similar(fio_val, value):
331 delta = abs(fio_val - value) / value
332 print("Error with %s %sth percentile: "
333 "fio: %d, expected: %d, proportional delta: %f" %
334 (lat, percentile, fio_val, value, delta))
335 print("Rank: %d, index: %d" % (rank, index))
336 this_iter = False
337 elif self.debug:
338 print('%s %sth percentile values match: %d, %d' %
339 (lat, percentile, fio_val, value))
340
341 if this_iter:
342 print("%s percentiles match" % lat)
343 else:
344 retval = False
345
346 return retval
347
348 @staticmethod
349 def check_empty(job):
350 """
351 Make sure JSON data is empty.
352
353 Some data structures should be empty. This function makes sure that they are.
354
355 job JSON object that we need to check for emptiness
356 """
357
358 return job['total_ios'] == 0 and \
359 job['slat_ns']['N'] == 0 and \
360 job['clat_ns']['N'] == 0 and \
361 job['lat_ns']['N'] == 0
362
363 def check_nocmdprio_lat(self, job):
364 """
365 Make sure no high/low priority latencies appear.
366
367 job JSON object to check
368 """
369
370 for ddir in ['read', 'write', 'trim']:
371 if ddir in job:
372 if 'lat_high_prio' in job[ddir] or 'lat_low_prio' in job[ddir] or \
373 'clat_high_prio' in job[ddir] or 'clat_low_prio' in job[ddir]:
374 print("Unexpected high/low priority latencies found in %s output" % ddir)
375 return False
376
377 if self.debug:
378 print("No high/low priority latencies found")
379
380 return True
381
382 @staticmethod
383 def similar(approximation, actual):
384 """
385 Check whether the approximate values recorded by fio are within the theoretical bound.
386
387 Since it is impractical to store exact latency measurements for each and every IO, fio
388 groups similar latency measurements into variable-sized bins. The theory in stat.h says
389 that the proportional error will be less than 1/128. This function checks whether this
390 is true.
391
392 TODO This test will fail when comparing a value from the largest latency bin against its
393 actual measurement. Find some way to detect this and avoid failing.
394
395 approximation value of the bin used by fio to store a given latency
396 actual actual latency value
397 """
398 delta = abs(approximation - actual) / actual
399 return delta <= 1/128
400
401 def check_jsonplus(self, jsondata):
402 """Check consistency of json+ data
403
404 When we have json+ data we can check the min value, max value, and
405 sample size reported by fio
406
407 jsondata json+ data that we need to check
408 """
409
410 retval = True
411
412 keys = [int(k) for k in jsondata['bins'].keys()]
413 values = [int(jsondata['bins'][k]) for k in jsondata['bins'].keys()]
414 smallest = min(keys)
415 biggest = max(keys)
416 sampsize = sum(values)
417
418 if not self.similar(jsondata['min'], smallest):
419 retval = False
420 print('reported min %d does not match json+ min %d' % (jsondata['min'], smallest))
421 elif self.debug:
422 print('json+ min values match: %d' % jsondata['min'])
423
424 if not self.similar(jsondata['max'], biggest):
425 retval = False
426 print('reported max %d does not match json+ max %d' % (jsondata['max'], biggest))
427 elif self.debug:
428 print('json+ max values match: %d' % jsondata['max'])
429
430 if sampsize != jsondata['N']:
431 retval = False
432 print('reported sample size %d does not match json+ total count %d' % \
433 (jsondata['N'], sampsize))
434 elif self.debug:
435 print('json+ sample sizes match: %d' % sampsize)
436
437 return retval
438
439 def check_sync_lat(self, jsondata, plus=False):
440 """Check fsync latency percentile data.
441
442 All we can check is that some percentiles are reported, unless we have json+ data.
443 If we actually have json+ data then we can do more checking.
444
445 jsondata JSON data for fsync operations
446 plus True if we actually have json+ data
447 """
448 retval = True
449
450 if 'percentile' not in jsondata['lat_ns']:
451 print("Sync percentile data not found")
452 return False
453
454 if int(jsondata['total_ios']) != int(jsondata['lat_ns']['N']):
455 retval = False
456 print('Mismatch between total_ios and lat_ns sample size')
457 elif self.debug:
458 print('sync sample sizes match: %d' % jsondata['total_ios'])
459
460 if not plus:
461 if 'bins' in jsondata['lat_ns']:
462 print('Unexpected json+ bin data found')
463 return False
464
465 if not self.check_jsonplus(jsondata['lat_ns']):
466 retval = False
467
468 return retval
469
470 def check_terse(self, terse, jsondata):
471 """Compare terse latencies with JSON latencies.
472
473 terse terse format data for checking
474 jsondata JSON format data for checking
475 """
476
477 retval = True
478
479 for lat in terse:
480 split = lat.split('%')
481 pct = split[0]
482 terse_val = int(split[1][1:])
483 json_val = math.floor(jsondata[pct]/1000)
484 if terse_val != json_val:
485 retval = False
486 print('Mismatch with %sth percentile: json value=%d,%d terse value=%d' % \
487 (pct, jsondata[pct], json_val, terse_val))
488 elif self.debug:
489 print('Terse %sth percentile matches JSON value: %d' % (pct, terse_val))
490
491 return retval
492
493 def check_prio_latencies(self, jsondata, clat=True, plus=False):
494 """Check consistency of high/low priority latencies.
495
496 clat True if we should check clat data; other check lat data
497 plus True if we have json+ format data where additional checks can
498 be carried out
499 unified True if fio is reporting unified r/w data
500 """
501
502 if clat:
503 high = 'clat_high_prio'
504 low = 'clat_low_prio'
505 combined = 'clat_ns'
506 else:
507 high = 'lat_high_prio'
508 low = 'lat_low_prio'
509 combined = 'lat_ns'
510
511 if not high in jsondata or not low in jsondata or not combined in jsondata:
512 print("Error identifying high/low priority latencies")
513 return False
514
515 if jsondata[high]['N'] + jsondata[low]['N'] != jsondata[combined]['N']:
516 print("High %d + low %d != combined sample size %d" % \
517 (jsondata[high]['N'], jsondata[low]['N'], jsondata[combined]['N']))
518 return False
519 elif self.debug:
520 print("High %d + low %d == combined sample size %d" % \
521 (jsondata[high]['N'], jsondata[low]['N'], jsondata[combined]['N']))
522
523 if min(jsondata[high]['min'], jsondata[low]['min']) != jsondata[combined]['min']:
524 print("Min of high %d, low %d min latencies does not match min %d from combined data" % \
525 (jsondata[high]['min'], jsondata[low]['min'], jsondata[combined]['min']))
526 return False
527 elif self.debug:
528 print("Min of high %d, low %d min latencies matches min %d from combined data" % \
529 (jsondata[high]['min'], jsondata[low]['min'], jsondata[combined]['min']))
530
531 if max(jsondata[high]['max'], jsondata[low]['max']) != jsondata[combined]['max']:
532 print("Max of high %d, low %d max latencies does not match max %d from combined data" % \
533 (jsondata[high]['max'], jsondata[low]['max'], jsondata[combined]['max']))
534 return False
535 elif self.debug:
536 print("Max of high %d, low %d max latencies matches max %d from combined data" % \
537 (jsondata[high]['max'], jsondata[low]['max'], jsondata[combined]['max']))
538
539 weighted_avg = (jsondata[high]['mean'] * jsondata[high]['N'] + \
540 jsondata[low]['mean'] * jsondata[low]['N']) / jsondata[combined]['N']
541 delta = abs(weighted_avg - jsondata[combined]['mean'])
542 if (delta / jsondata[combined]['mean']) > 0.0001:
543 print("Difference between weighted average %f of high, low means "
544 "and actual mean %f exceeds 0.01%%" % (weighted_avg, jsondata[combined]['mean']))
545 return False
546 elif self.debug:
547 print("Weighted average %f of high, low means matches actual mean %f" % \
548 (weighted_avg, jsondata[combined]['mean']))
549
550 if plus:
551 if not self.check_jsonplus(jsondata[high]):
552 return False
553 if not self.check_jsonplus(jsondata[low]):
554 return False
555
556 bins = {**jsondata[high]['bins'], **jsondata[low]['bins']}
557 for duration in bins.keys():
558 if duration in jsondata[high]['bins'] and duration in jsondata[low]['bins']:
559 bins[duration] = jsondata[high]['bins'][duration] + \
560 jsondata[low]['bins'][duration]
561
562 if len(bins) != len(jsondata[combined]['bins']):
563 print("Number of combined high/low bins does not match number of overall bins")
564 return False
565 elif self.debug:
566 print("Number of bins from merged high/low data matches number of overall bins")
567
568 for duration in bins.keys():
569 if bins[duration] != jsondata[combined]['bins'][duration]:
570 print("Merged high/low count does not match overall count for duration %d" \
571 % duration)
572 return False
573
574 print("Merged high/low priority latency data match combined latency data")
575 return True
576
577 def check(self):
578 """Check test output."""
579
580 raise NotImplementedError()
581
582
583class Test001(FioLatTest):
584 """Test object for Test 1."""
585
586 def check(self):
587 """Check Test 1 output."""
588
589 job = self.json_data['jobs'][0]
590
591 retval = True
592 if not self.check_empty(job['write']):
593 print("Unexpected write data found in output")
594 retval = False
595 if not self.check_empty(job['trim']):
596 print("Unexpected trim data found in output")
597 retval = False
598 if not self.check_nocmdprio_lat(job):
599 print("Unexpected high/low priority latencies found")
600 retval = False
601
602 retval &= self.check_latencies(job['read'], 0, slat=False)
603
604 return retval
605
606
607class Test002(FioLatTest):
608 """Test object for Test 2."""
609
610 def check(self):
611 """Check Test 2 output."""
612
613 job = self.json_data['jobs'][0]
614
615 retval = True
616 if not self.check_empty(job['read']):
617 print("Unexpected read data found in output")
618 retval = False
619 if not self.check_empty(job['trim']):
620 print("Unexpected trim data found in output")
621 retval = False
622 if not self.check_nocmdprio_lat(job):
623 print("Unexpected high/low priority latencies found")
624 retval = False
625
626 retval &= self.check_latencies(job['write'], 1, slat=False, clat=False)
627
628 return retval
629
630
631class Test003(FioLatTest):
632 """Test object for Test 3."""
633
634 def check(self):
635 """Check Test 3 output."""
636
637 job = self.json_data['jobs'][0]
638
639 retval = True
640 if not self.check_empty(job['read']):
641 print("Unexpected read data found in output")
642 retval = False
643 if not self.check_empty(job['write']):
644 print("Unexpected write data found in output")
645 retval = False
646 if not self.check_nocmdprio_lat(job):
647 print("Unexpected high/low priority latencies found")
648 retval = False
649
650 retval &= self.check_latencies(job['trim'], 2, slat=False, tlat=False)
651
652 return retval
653
654
655class Test004(FioLatTest):
656 """Test object for Tests 4, 13."""
657
658 def check(self):
659 """Check Test 4, 13 output."""
660
661 job = self.json_data['jobs'][0]
662
663 retval = True
664 if not self.check_empty(job['write']):
665 print("Unexpected write data found in output")
666 retval = False
667 if not self.check_empty(job['trim']):
668 print("Unexpected trim data found in output")
669 retval = False
670 if not self.check_nocmdprio_lat(job):
671 print("Unexpected high/low priority latencies found")
672 retval = False
673
674 retval &= self.check_latencies(job['read'], 0, plus=True)
675
676 return retval
677
678
679class Test005(FioLatTest):
680 """Test object for Test 5."""
681
682 def check(self):
683 """Check Test 5 output."""
684
685 job = self.json_data['jobs'][0]
686
687 retval = True
688 if not self.check_empty(job['read']):
689 print("Unexpected read data found in output")
690 retval = False
691 if not self.check_empty(job['trim']):
692 print("Unexpected trim data found in output")
693 retval = False
694 if not self.check_nocmdprio_lat(job):
695 print("Unexpected high/low priority latencies found")
696 retval = False
697
698 retval &= self.check_latencies(job['write'], 1, slat=False, plus=True)
699
700 return retval
701
702
703class Test006(FioLatTest):
704 """Test object for Test 6."""
705
706 def check(self):
707 """Check Test 6 output."""
708
709 job = self.json_data['jobs'][0]
710
711 retval = True
712 if not self.check_empty(job['write']):
713 print("Unexpected write data found in output")
714 retval = False
715 if not self.check_empty(job['trim']):
716 print("Unexpected trim data found in output")
717 retval = False
718 if not self.check_nocmdprio_lat(job):
719 print("Unexpected high/low priority latencies found")
720 retval = False
721
722 retval &= self.check_latencies(job['read'], 0, slat=False, tlat=False, plus=True)
723
724 return retval
725
726
727class Test007(FioLatTest):
728 """Test object for Test 7."""
729
730 def check(self):
731 """Check Test 7 output."""
732
733 job = self.json_data['jobs'][0]
734
735 retval = True
736 if not self.check_empty(job['trim']):
737 print("Unexpected trim data found in output")
738 retval = False
739 if not self.check_nocmdprio_lat(job):
740 print("Unexpected high/low priority latencies found")
741 retval = False
742
743 retval &= self.check_latencies(job['read'], 0, clat=False, tlat=False, plus=True)
744 retval &= self.check_latencies(job['write'], 1, clat=False, tlat=False, plus=True)
745
746 return retval
747
748
749class Test008(FioLatTest):
750 """Test object for Tests 8, 14."""
751
752 def check(self):
753 """Check Test 8, 14 output."""
754
755 job = self.json_data['jobs'][0]
756
757 retval = True
758 if 'read' in job or 'write'in job or 'trim' in job:
759 print("Unexpected data direction found in fio output")
760 retval = False
761 if not self.check_nocmdprio_lat(job):
762 print("Unexpected high/low priority latencies found")
763 retval = False
764
765 retval &= self.check_latencies(job['mixed'], 0, plus=True, unified=True)
766
767 return retval
768
769
770class Test009(FioLatTest):
771 """Test object for Test 9."""
772
773 def check(self):
774 """Check Test 9 output."""
775
776 job = self.json_data['jobs'][0]
777
778 retval = True
779 if not self.check_empty(job['read']):
780 print("Unexpected read data found in output")
781 retval = False
782 if not self.check_empty(job['trim']):
783 print("Unexpected trim data found in output")
784 retval = False
785 if not self.check_sync_lat(job['sync'], plus=True):
786 print("Error checking fsync latency data")
787 retval = False
788 if not self.check_nocmdprio_lat(job):
789 print("Unexpected high/low priority latencies found")
790 retval = False
791
792 retval &= self.check_latencies(job['write'], 1, slat=False, plus=True)
793
794 return retval
795
796
797class Test010(FioLatTest):
798 """Test object for Test 10."""
799
800 def check(self):
801 """Check Test 10 output."""
802
803 job = self.json_data['jobs'][0]
804
805 retval = True
806 if not self.check_empty(job['trim']):
807 print("Unexpected trim data found in output")
808 retval = False
809 if not self.check_nocmdprio_lat(job):
810 print("Unexpected high/low priority latencies found")
811 retval = False
812
813 retval &= self.check_latencies(job['read'], 0, plus=True)
814 retval &= self.check_latencies(job['write'], 1, plus=True)
815 retval &= self.check_terse(self.terse_data[17:34], job['read']['lat_ns']['percentile'])
816 retval &= self.check_terse(self.terse_data[58:75], job['write']['lat_ns']['percentile'])
817 # Terse data checking only works for default percentiles.
818 # This needs to be changed if something other than the default is ever used.
819
820 return retval
821
822
823class Test011(FioLatTest):
824 """Test object for Test 11."""
825
826 def check(self):
827 """Check Test 11 output."""
828
829 job = self.json_data['jobs'][0]
830
831 retval = True
832 if not self.check_empty(job['trim']):
833 print("Unexpected trim data found in output")
834 retval = False
835 if not self.check_nocmdprio_lat(job):
836 print("Unexpected high/low priority latencies found")
837 retval = False
838
839 retval &= self.check_latencies(job['read'], 0, slat=False, clat=False, plus=True)
840 retval &= self.check_latencies(job['write'], 1, slat=False, clat=False, plus=True)
841 retval &= self.check_terse(self.terse_data[17:34], job['read']['lat_ns']['percentile'])
842 retval &= self.check_terse(self.terse_data[58:75], job['write']['lat_ns']['percentile'])
843 # Terse data checking only works for default percentiles.
844 # This needs to be changed if something other than the default is ever used.
845
846 return retval
847
848
849class Test015(FioLatTest):
850 """Test object for Test 15."""
851
852 def check(self):
853 """Check Test 15 output."""
854
855 job = self.json_data['jobs'][0]
856
857 retval = True
858 if not self.check_empty(job['write']):
859 print("Unexpected write data found in output")
860 retval = False
861 if not self.check_empty(job['trim']):
862 print("Unexpected trim data found in output")
863 retval = False
864
865 retval &= self.check_latencies(job['read'], 0, plus=True)
866 retval &= self.check_prio_latencies(job['read'], clat=False, plus=True)
867
868 return retval
869
870
871class Test016(FioLatTest):
872 """Test object for Test 16."""
873
874 def check(self):
875 """Check Test 16 output."""
876
877 job = self.json_data['jobs'][0]
878
879 retval = True
880 if not self.check_empty(job['read']):
881 print("Unexpected read data found in output")
882 retval = False
883 if not self.check_empty(job['trim']):
884 print("Unexpected trim data found in output")
885 retval = False
886
887 retval &= self.check_latencies(job['write'], 1, slat=False, plus=True)
888 retval &= self.check_prio_latencies(job['write'], clat=False, plus=True)
889
890 return retval
891
892
893class Test017(FioLatTest):
894 """Test object for Test 17."""
895
896 def check(self):
897 """Check Test 17 output."""
898
899 job = self.json_data['jobs'][0]
900
901 retval = True
902 if not self.check_empty(job['write']):
903 print("Unexpected write data found in output")
904 retval = False
905 if not self.check_empty(job['trim']):
906 print("Unexpected trim data found in output")
907 retval = False
908
909 retval &= self.check_latencies(job['read'], 0, slat=False, tlat=False, plus=True)
910 retval &= self.check_prio_latencies(job['read'], plus=True)
911
912 return retval
913
914
915class Test018(FioLatTest):
916 """Test object for Test 18."""
917
918 def check(self):
919 """Check Test 18 output."""
920
921 job = self.json_data['jobs'][0]
922
923 retval = True
924 if not self.check_empty(job['trim']):
925 print("Unexpected trim data found in output")
926 retval = False
927
928 retval &= self.check_latencies(job['read'], 0, clat=False, tlat=False, plus=True)
929 retval &= self.check_latencies(job['write'], 1, clat=False, tlat=False, plus=True)
930
931 # We actually have json+ data but setting plus=False below avoids checking the
932 # json+ bins which did not exist for clat and lat because this job is run with
933 # clat_percentiles=0, lat_percentiles=0, However, we can still check the summary
934 # statistics
935 retval &= self.check_prio_latencies(job['write'], plus=False)
936 retval &= self.check_prio_latencies(job['read'], plus=False)
937
938 return retval
939
940
941class Test019(FioLatTest):
942 """Test object for Tests 19, 20."""
943
944 def check(self):
945 """Check Test 19, 20 output."""
946
947 job = self.json_data['jobs'][0]
948
949 retval = True
950 if 'read' in job or 'write'in job or 'trim' in job:
951 print("Unexpected data direction found in fio output")
952 retval = False
953
954 retval &= self.check_latencies(job['mixed'], 0, plus=True, unified=True)
955 retval &= self.check_prio_latencies(job['mixed'], clat=False, plus=True)
956
957 return retval
958
959
960def parse_args():
961 """Parse command-line arguments."""
962
963 parser = argparse.ArgumentParser()
964 parser.add_argument('-f', '--fio', help='path to file executable (e.g., ./fio)')
965 parser.add_argument('-a', '--artifact-root', help='artifact root directory')
966 parser.add_argument('-d', '--debug', help='enable debug output', action='store_true')
967 parser.add_argument('-s', '--skip', nargs='+', type=int,
968 help='list of test(s) to skip')
969 parser.add_argument('-o', '--run-only', nargs='+', type=int,
970 help='list of test(s) to run, skipping all others')
971 args = parser.parse_args()
972
973 return args
974
975
976def main():
977 """Run tests of fio latency percentile reporting"""
978
979 args = parse_args()
980
981 artifact_root = args.artifact_root if args.artifact_root else \
982 "latency-test-{0}".format(time.strftime("%Y%m%d-%H%M%S"))
983 os.mkdir(artifact_root)
984 print("Artifact directory is %s" % artifact_root)
985
986 if args.fio:
987 fio = str(Path(args.fio).absolute())
988 else:
989 fio = 'fio'
990 print("fio path is %s" % fio)
991
992 if platform.system() == 'Linux':
993 aio = 'libaio'
994 elif platform.system() == 'Windows':
995 aio = 'windowsaio'
996 else:
997 aio = 'posixaio'
998
999 test_list = [
1000 {
1001 # randread, null
1002 # enable slat, clat, lat
1003 # only clat and lat will appear because
1004 # because the null ioengine is syncrhonous
1005 "test_id": 1,
1006 "runtime": 2,
1007 "output-format": "json",
1008 "slat_percentiles": 1,
1009 "clat_percentiles": 1,
1010 "lat_percentiles": 1,
1011 "ioengine": 'null',
1012 'rw': 'randread',
1013 "test_obj": Test001,
1014 },
1015 {
1016 # randwrite, null
1017 # enable lat only
1018 "test_id": 2,
1019 "runtime": 2,
1020 "output-format": "json",
1021 "slat_percentiles": 0,
1022 "clat_percentiles": 0,
1023 "lat_percentiles": 1,
1024 "ioengine": 'null',
1025 'rw': 'randwrite',
1026 "test_obj": Test002,
1027 },
1028 {
1029 # randtrim, null
1030 # enable clat only
1031 "test_id": 3,
1032 "runtime": 2,
1033 "output-format": "json",
1034 "slat_percentiles": 0,
1035 "clat_percentiles": 1,
1036 "lat_percentiles": 0,
1037 "ioengine": 'null',
1038 'rw': 'randtrim',
1039 "test_obj": Test003,
1040 },
1041 {
1042 # randread, aio
1043 # enable slat, clat, lat
1044 # all will appear because liaio is asynchronous
1045 "test_id": 4,
1046 "runtime": 5,
1047 "output-format": "json+",
1048 "slat_percentiles": 1,
1049 "clat_percentiles": 1,
1050 "lat_percentiles": 1,
1051 "ioengine": aio,
1052 'rw': 'randread',
1053 "test_obj": Test004,
1054 },
1055 {
1056 # randwrite, aio
1057 # enable only clat, lat
1058 "test_id": 5,
1059 "runtime": 5,
1060 "output-format": "json+",
1061 "slat_percentiles": 0,
1062 "clat_percentiles": 1,
1063 "lat_percentiles": 1,
1064 "ioengine": aio,
1065 'rw': 'randwrite',
1066 "test_obj": Test005,
1067 },
1068 {
1069 # randread, aio
1070 # by default only clat should appear
1071 "test_id": 6,
1072 "runtime": 5,
1073 "output-format": "json+",
1074 "ioengine": aio,
1075 'rw': 'randread',
1076 "test_obj": Test006,
1077 },
1078 {
1079 # 50/50 r/w, aio
1080 # enable only slat
1081 "test_id": 7,
1082 "runtime": 5,
1083 "output-format": "json+",
1084 "slat_percentiles": 1,
1085 "clat_percentiles": 0,
1086 "lat_percentiles": 0,
1087 "ioengine": aio,
1088 'rw': 'randrw',
1089 "test_obj": Test007,
1090 },
1091 {
1092 # 50/50 r/w, aio, unified_rw_reporting
1093 # enable slat, clat, lat
1094 "test_id": 8,
1095 "runtime": 5,
1096 "output-format": "json+",
1097 "slat_percentiles": 1,
1098 "clat_percentiles": 1,
1099 "lat_percentiles": 1,
1100 "ioengine": aio,
1101 'rw': 'randrw',
1102 'unified_rw_reporting': 1,
1103 "test_obj": Test008,
1104 },
1105 {
1106 # randwrite, null
1107 # enable slat, clat, lat
1108 # fsync
1109 "test_id": 9,
1110 "runtime": 2,
1111 "output-format": "json+",
1112 "slat_percentiles": 1,
1113 "clat_percentiles": 1,
1114 "lat_percentiles": 1,
1115 "ioengine": 'null',
1116 'rw': 'randwrite',
1117 'fsync': 32,
1118 "test_obj": Test009,
1119 },
1120 {
1121 # 50/50 r/w, aio
1122 # enable slat, clat, lat
1123 "test_id": 10,
1124 "runtime": 5,
1125 "output-format": "terse,json+",
1126 "slat_percentiles": 1,
1127 "clat_percentiles": 1,
1128 "lat_percentiles": 1,
1129 "ioengine": aio,
1130 'rw': 'randrw',
1131 "test_obj": Test010,
1132 },
1133 {
1134 # 50/50 r/w, aio
1135 # enable only lat
1136 "test_id": 11,
1137 "runtime": 5,
1138 "output-format": "terse,json+",
1139 "slat_percentiles": 0,
1140 "clat_percentiles": 0,
1141 "lat_percentiles": 1,
1142 "ioengine": aio,
1143 'rw': 'randrw',
1144 "test_obj": Test011,
1145 },
1146 {
1147 # randread, null
1148 # enable slat, clat, lat
1149 # only clat and lat will appear because
1150 # because the null ioengine is syncrhonous
1151 # same as Test 1 except
1152 # numjobs = 4 to test sum_thread_stats() changes
1153 "test_id": 12,
1154 "runtime": 2,
1155 "output-format": "json",
1156 "slat_percentiles": 1,
1157 "clat_percentiles": 1,
1158 "lat_percentiles": 1,
1159 "ioengine": 'null',
1160 'rw': 'randread',
1161 'numjobs': 4,
1162 "test_obj": Test001,
1163 },
1164 {
1165 # randread, aio
1166 # enable slat, clat, lat
1167 # all will appear because liaio is asynchronous
1168 # same as Test 4 except
1169 # numjobs = 4 to test sum_thread_stats() changes
1170 "test_id": 13,
1171 "runtime": 5,
1172 "output-format": "json+",
1173 "slat_percentiles": 1,
1174 "clat_percentiles": 1,
1175 "lat_percentiles": 1,
1176 "ioengine": aio,
1177 'rw': 'randread',
1178 'numjobs': 4,
1179 "test_obj": Test004,
1180 },
1181 {
1182 # 50/50 r/w, aio, unified_rw_reporting
1183 # enable slat, clat, lata
1184 # same as Test 8 except
1185 # numjobs = 4 to test sum_thread_stats() changes
1186 "test_id": 14,
1187 "runtime": 5,
1188 "output-format": "json+",
1189 "slat_percentiles": 1,
1190 "clat_percentiles": 1,
1191 "lat_percentiles": 1,
1192 "ioengine": aio,
1193 'rw': 'randrw',
1194 'unified_rw_reporting': 1,
1195 'numjobs': 4,
1196 "test_obj": Test008,
1197 },
1198 {
1199 # randread, aio
1200 # enable slat, clat, lat
1201 # all will appear because liaio is asynchronous
1202 # same as Test 4 except add cmdprio_percentage
1203 "test_id": 15,
1204 "runtime": 5,
1205 "output-format": "json+",
1206 "slat_percentiles": 1,
1207 "clat_percentiles": 1,
1208 "lat_percentiles": 1,
1209 "ioengine": aio,
1210 'rw': 'randread',
1211 'cmdprio_percentage': 50,
1212 "test_obj": Test015,
1213 },
1214 {
1215 # randwrite, aio
1216 # enable only clat, lat
1217 # same as Test 5 except add cmdprio_percentage
1218 "test_id": 16,
1219 "runtime": 5,
1220 "output-format": "json+",
1221 "slat_percentiles": 0,
1222 "clat_percentiles": 1,
1223 "lat_percentiles": 1,
1224 "ioengine": aio,
1225 'rw': 'randwrite',
1226 'cmdprio_percentage': 50,
1227 "test_obj": Test016,
1228 },
1229 {
1230 # randread, aio
1231 # by default only clat should appear
1232 # same as Test 6 except add cmdprio_percentage
1233 "test_id": 17,
1234 "runtime": 5,
1235 "output-format": "json+",
1236 "ioengine": aio,
1237 'rw': 'randread',
1238 'cmdprio_percentage': 50,
1239 "test_obj": Test017,
1240 },
1241 {
1242 # 50/50 r/w, aio
1243 # enable only slat
1244 # same as Test 7 except add cmdprio_percentage
1245 "test_id": 18,
1246 "runtime": 5,
1247 "output-format": "json+",
1248 "slat_percentiles": 1,
1249 "clat_percentiles": 0,
1250 "lat_percentiles": 0,
1251 "ioengine": aio,
1252 'rw': 'randrw',
1253 'cmdprio_percentage': 50,
1254 "test_obj": Test018,
1255 },
1256 {
1257 # 50/50 r/w, aio, unified_rw_reporting
1258 # enable slat, clat, lat
1259 # same as Test 8 except add cmdprio_percentage
1260 "test_id": 19,
1261 "runtime": 5,
1262 "output-format": "json+",
1263 "slat_percentiles": 1,
1264 "clat_percentiles": 1,
1265 "lat_percentiles": 1,
1266 "ioengine": aio,
1267 'rw': 'randrw',
1268 'unified_rw_reporting': 1,
1269 'cmdprio_percentage': 50,
1270 "test_obj": Test019,
1271 },
1272 {
1273 # 50/50 r/w, aio, unified_rw_reporting
1274 # enable slat, clat, lat
1275 # same as Test 19 except
1276 # add numjobs = 4 to test sum_thread_stats() changes
1277 "test_id": 20,
1278 "runtime": 5,
1279 "output-format": "json+",
1280 "slat_percentiles": 1,
1281 "clat_percentiles": 1,
1282 "lat_percentiles": 1,
1283 "ioengine": aio,
1284 'rw': 'randrw',
1285 'unified_rw_reporting': 1,
1286 'cmdprio_percentage': 50,
1287 'numjobs': 4,
1288 "test_obj": Test019,
1289 },
1290 ]
1291
1292 passed = 0
1293 failed = 0
1294 skipped = 0
1295
1296 for test in test_list:
1297 if (args.skip and test['test_id'] in args.skip) or \
1298 (args.run_only and test['test_id'] not in args.run_only):
1299 skipped = skipped + 1
1300 outcome = 'SKIPPED (User request)'
1301 elif platform.system() != 'Linux' and 'cmdprio_percentage' in test:
1302 skipped = skipped + 1
1303 outcome = 'SKIPPED (Linux required for cmdprio_percentage tests)'
1304 else:
1305 test_obj = test['test_obj'](artifact_root, test, args.debug)
1306 status = test_obj.run_fio(fio)
1307 if status:
1308 status = test_obj.check()
1309 if status:
1310 passed = passed + 1
1311 outcome = 'PASSED'
1312 else:
1313 failed = failed + 1
1314 outcome = 'FAILED'
1315
1316 print("**********Test {0} {1}**********".format(test['test_id'], outcome))
1317
1318 print("{0} tests passed, {1} failed, {2} skipped".format(passed, failed, skipped))
1319
1320 sys.exit(failed)
1321
1322
1323if __name__ == '__main__':
1324 main()