t/run-fio-tests: better catch file errors
[fio.git] / t / latency_percentiles.py
CommitLineData
20e70a59
VF
1#!/usr/bin/env python3
2# SPDX-License-Identifier: GPL-2.0-only
3#
4# Copyright (c) 2020 Western Digital Corporation or its affiliates.
5#
6"""
7# latency_percentiles.py
8#
9# Test the code that produces latency percentiles
10# This is mostly to test the code changes to allow reporting
11# of slat, clat, and lat percentiles
12#
13# USAGE
14# python3 latency-tests.py [-f fio-path] [-a artifact-root] [--debug]
15#
16#
17# Test scenarios:
18#
19# - DONE json
20# unified rw reporting
21# compare with latency log
22# try various combinations of the ?lat_percentile options
23# null, aio
24# r, w, t
25# - DONE json+
26# check presence of latency bins
27# if the json percentiles match those from the raw data
28# then the latency bin values and counts are probably ok
29# - DONE terse
30# produce both terse, JSON output and confirm that they match
31# lat only; both lat and clat
32# - DONE sync_lat
33# confirm that sync_lat data appears
34# - MANUAL TESTING normal output:
35# null ioengine
36# enable all, but only clat and lat appear
37# enable subset of latency types
38# read, write, trim, unified
39# libaio ioengine
40# enable all latency types
41# enable subset of latency types
42# read, write, trim, unified
43# ./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
44# --ioengine=null --slat_percentiles=1 --clat_percentiles=1 --lat_percentiles=1
45# echo confirm that clat and lat percentiles appear
46# ./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
47# --ioengine=null --slat_percentiles=0 --clat_percentiles=0 --lat_percentiles=1
48# echo confirm that only lat percentiles appear
49# ./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
50# --ioengine=null --slat_percentiles=0 --clat_percentiles=1 --lat_percentiles=0
51# echo confirm that only clat percentiles appear
52# ./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
53# --ioengine=libaio --slat_percentiles=1 --clat_percentiles=1 --lat_percentiles=1
54# echo confirm that slat, clat, lat percentiles appear
55# ./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
56# --ioengine=libaio --slat_percentiles=0 --clat_percentiles=1 --lat_percentiles=1
57# echo confirm that clat and lat percentiles appear
58# ./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
59# --ioengine=libaio -rw=randrw
60# echo confirm that clat percentiles appear for reads and writes
61# ./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
62# --ioengine=libaio --slat_percentiles=1 --clat_percentiles=0 --lat_percentiles=0 --rw=randrw
63# echo confirm that slat percentiles appear for both reads and writes
64# ./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
65# --ioengine=libaio --slat_percentiles=1 --clat_percentiles=1 --lat_percentiles=1 \
66# --rw=randrw --unified_rw_reporting=1
67# echo confirm that slat, clat, and lat percentiles appear for 'mixed' IOs
68#./fio/fio --name=test --randrepeat=0 --norandommap --time_based --runtime=2s --size=512M \
69# --ioengine=null --slat_percentiles=1 --clat_percentiles=1 --lat_percentiles=1 \
70# --rw=randrw --fsync=32
71# echo confirm that fsync latencies appear
72"""
73
74import os
75import csv
76import sys
77import json
78import math
79import time
80import argparse
81import platform
82import subprocess
83from pathlib import Path
84
85
86class FioLatTest():
87 """fio latency percentile test."""
88
89 def __init__(self, artifact_root, test_options, debug):
90 """
91 artifact_root root directory for artifacts (subdirectory will be created under here)
92 test test specification
93 """
94 self.artifact_root = artifact_root
95 self.test_options = test_options
96 self.debug = debug
97 self.filename = None
98 self.json_data = None
99 self.terse_data = None
100
101 self.test_dir = os.path.join(self.artifact_root,
102 "{:03d}".format(self.test_options['test_id']))
103 if not os.path.exists(self.test_dir):
104 os.mkdir(self.test_dir)
105
106 self.filename = "latency{:03d}".format(self.test_options['test_id'])
107
108 def run_fio(self, fio_path):
109 """Run a test."""
110
111 fio_args = [
112 "--name=latency",
113 "--randrepeat=0",
114 "--norandommap",
115 "--time_based",
116 "--size=16M",
117 "--rwmixread=50",
118 "--group_reporting=1",
119 "--write_lat_log={0}".format(self.filename),
120 "--output={0}.out".format(self.filename),
121 "--ioengine={ioengine}".format(**self.test_options),
122 "--rw={rw}".format(**self.test_options),
123 "--runtime={runtime}".format(**self.test_options),
124 "--output-format={output-format}".format(**self.test_options),
125 ]
126 for opt in ['slat_percentiles', 'clat_percentiles', 'lat_percentiles',
127 'unified_rw_reporting', 'fsync', 'fdatasync', 'numjobs', 'cmdprio_percentage']:
128 if opt in self.test_options:
129 option = '--{0}={{{0}}}'.format(opt)
130 fio_args.append(option.format(**self.test_options))
131
132 command = [fio_path] + fio_args
133 with open(os.path.join(self.test_dir, "{0}.command".format(self.filename)), "w+") as \
134 command_file:
135 command_file.write("%s\n" % command)
136
137 passed = True
138 stdout_file = open(os.path.join(self.test_dir, "{0}.stdout".format(self.filename)), "w+")
139 stderr_file = open(os.path.join(self.test_dir, "{0}.stderr".format(self.filename)), "w+")
140 exitcode_file = open(os.path.join(self.test_dir,
141 "{0}.exitcode".format(self.filename)), "w+")
142 try:
143 proc = None
144 # Avoid using subprocess.run() here because when a timeout occurs,
145 # fio will be stopped with SIGKILL. This does not give fio a
146 # chance to clean up and means that child processes may continue
147 # running and submitting IO.
148 proc = subprocess.Popen(command,
149 stdout=stdout_file,
150 stderr=stderr_file,
151 cwd=self.test_dir,
152 universal_newlines=True)
153 proc.communicate(timeout=300)
154 exitcode_file.write('{0}\n'.format(proc.returncode))
155 passed &= (proc.returncode == 0)
156 except subprocess.TimeoutExpired:
157 proc.terminate()
158 proc.communicate()
159 assert proc.poll()
160 print("Timeout expired")
161 passed = False
162 except Exception:
163 if proc:
164 if not proc.poll():
165 proc.terminate()
166 proc.communicate()
167 print("Exception: %s" % sys.exc_info())
168 passed = False
169 finally:
170 stdout_file.close()
171 stderr_file.close()
172 exitcode_file.close()
173
174 if passed:
175 if 'json' in self.test_options['output-format']:
176 if not self.get_json():
177 print('Unable to decode JSON data')
178 passed = False
179 if 'terse' in self.test_options['output-format']:
180 if not self.get_terse():
181 print('Unable to decode terse data')
182 passed = False
183
184 return passed
185
186 def get_json(self):
187 """Convert fio JSON output into a python JSON object"""
188
189 filename = os.path.join(self.test_dir, "{0}.out".format(self.filename))
190 with open(filename, 'r') as file:
191 file_data = file.read()
192
193 #
194 # Sometimes fio informational messages are included at the top of the
195 # JSON output, especially under Windows. Try to decode output as JSON
196 # data, lopping off up to the first four lines
197 #
198 lines = file_data.splitlines()
199 for i in range(5):
200 file_data = '\n'.join(lines[i:])
201 try:
202 self.json_data = json.loads(file_data)
203 except json.JSONDecodeError:
204 continue
205 else:
206 return True
207
208 return False
209
210 def get_terse(self):
211 """Read fio output and return terse format data."""
212
213 filename = os.path.join(self.test_dir, "{0}.out".format(self.filename))
214 with open(filename, 'r') as file:
215 file_data = file.read()
216
217 #
218 # Read the first few lines and see if any of them begin with '3;fio-'
219 # If so, the line is probably terse output. Obviously, this only
220 # works for fio terse version 3 and it does not work for
221 # multi-line terse output
222 #
223 lines = file_data.splitlines()
224 for i in range(8):
225 file_data = lines[i]
226 if file_data.startswith('3;fio-'):
227 self.terse_data = file_data.split(';')
228 return True
229
230 return False
231
232 def check_latencies(self, jsondata, ddir, slat=True, clat=True, tlat=True, plus=False,
233 unified=False):
234 """Check fio latency data.
235
236 ddir data direction to check (0=read, 1=write, 2=trim)
237 slat True if submission latency data available to check
238 clat True if completion latency data available to check
239 tlat True of total latency data available to check
240 plus True if we actually have json+ format data where additional checks can
241 be carried out
242 unified True if fio is reporting unified r/w data
243 """
244
245 types = {
246 'slat': slat,
247 'clat': clat,
248 'lat': tlat
249 }
250
251 retval = True
252
253 for lat in ['slat', 'clat', 'lat']:
254 this_iter = True
255 if not types[lat]:
256 if 'percentile' in jsondata[lat+'_ns']:
257 this_iter = False
258 print('unexpected %s percentiles found' % lat)
259 else:
260 print("%s percentiles skipped" % lat)
261 continue
262 else:
263 if 'percentile' not in jsondata[lat+'_ns']:
264 this_iter = False
265 print('%s percentiles not found in fio output' % lat)
266
267 #
268 # Check only for the presence/absence of json+
269 # latency bins. Future work can check the
270 # accurracy of the bin values and counts.
271 #
272 # Because the latency percentiles are based on
273 # the bins, we can be confident that the bin
274 # values and counts are correct if fio's
275 # latency percentiles match what we compute
276 # from the raw data.
277 #
278 if plus:
279 if 'bins' not in jsondata[lat+'_ns']:
280 print('bins not found with json+ output format')
281 this_iter = False
282 else:
283 if not self.check_jsonplus(jsondata[lat+'_ns']):
284 this_iter = False
285 else:
286 if 'bins' in jsondata[lat+'_ns']:
287 print('json+ bins found with json output format')
288 this_iter = False
289
290 latencies = []
291 for i in range(10):
292 lat_file = os.path.join(self.test_dir, "%s_%s.%s.log" % (self.filename, lat, i+1))
293 if not os.path.exists(lat_file):
294 break
295 with open(lat_file, 'r', newline='') as file:
296 reader = csv.reader(file)
297 for line in reader:
298 if unified or int(line[2]) == ddir:
299 latencies.append(int(line[1]))
300
301 if int(jsondata['total_ios']) != len(latencies):
302 this_iter = False
303 print('%s: total_ios = %s, latencies logged = %d' % \
304 (lat, jsondata['total_ios'], len(latencies)))
305 elif self.debug:
306 print("total_ios %s match latencies logged" % jsondata['total_ios'])
307
308 latencies.sort()
309 ptiles = jsondata[lat+'_ns']['percentile']
310
311 for percentile in ptiles.keys():
312 #
313 # numpy.percentile(latencies, float(percentile),
314 # interpolation='higher')
315 # produces values that mostly match what fio reports
316 # however, in the tails of the distribution, the values produced
317 # by fio's and numpy.percentile's algorithms are occasionally off
318 # by one latency measurement. So instead of relying on the canned
319 # numpy.percentile routine, implement here fio's algorithm
320 #
321 rank = math.ceil(float(percentile)/100 * len(latencies))
322 if rank > 0:
323 index = rank - 1
324 else:
325 index = 0
326 value = latencies[int(index)]
327 fio_val = int(ptiles[percentile])
328 # The theory in stat.h says that the proportional error will be
329 # less than 1/128
330 if not self.similar(fio_val, value):
331 delta = abs(fio_val - value) / value
332 print("Error with %s %sth percentile: "
333 "fio: %d, expected: %d, proportional delta: %f" %
334 (lat, percentile, fio_val, value, delta))
335 print("Rank: %d, index: %d" % (rank, index))
336 this_iter = False
337 elif self.debug:
338 print('%s %sth percentile values match: %d, %d' %
339 (lat, percentile, fio_val, value))
340
341 if this_iter:
342 print("%s percentiles match" % lat)
343 else:
344 retval = False
345
346 return retval
347
348 @staticmethod
349 def check_empty(job):
350 """
351 Make sure JSON data is empty.
352
353 Some data structures should be empty. This function makes sure that they are.
354
355 job JSON object that we need to check for emptiness
356 """
357
358 return job['total_ios'] == 0 and \
359 job['slat_ns']['N'] == 0 and \
360 job['clat_ns']['N'] == 0 and \
361 job['lat_ns']['N'] == 0
362
363 def check_nocmdprio_lat(self, job):
364 """
365 Make sure no high/low priority latencies appear.
366
367 job JSON object to check
368 """
369
370 for ddir in ['read', 'write', 'trim']:
371 if ddir in job:
372 if 'lat_high_prio' in job[ddir] or 'lat_low_prio' in job[ddir] or \
373 'clat_high_prio' in job[ddir] or 'clat_low_prio' in job[ddir]:
374 print("Unexpected high/low priority latencies found in %s output" % ddir)
375 return False
376
377 if self.debug:
378 print("No high/low priority latencies found")
379
380 return True
381
382 @staticmethod
383 def similar(approximation, actual):
384 """
385 Check whether the approximate values recorded by fio are within the theoretical bound.
386
387 Since it is impractical to store exact latency measurements for each and every IO, fio
388 groups similar latency measurements into variable-sized bins. The theory in stat.h says
389 that the proportional error will be less than 1/128. This function checks whether this
390 is true.
391
392 TODO This test will fail when comparing a value from the largest latency bin against its
393 actual measurement. Find some way to detect this and avoid failing.
394
395 approximation value of the bin used by fio to store a given latency
396 actual actual latency value
397 """
9b37832d
VF
398
399 # Avoid a division by zero. The smallest latency values have no error.
400 if actual == 0:
401 return approximation == 0
402
20e70a59
VF
403 delta = abs(approximation - actual) / actual
404 return delta <= 1/128
405
406 def check_jsonplus(self, jsondata):
407 """Check consistency of json+ data
408
409 When we have json+ data we can check the min value, max value, and
410 sample size reported by fio
411
412 jsondata json+ data that we need to check
413 """
414
415 retval = True
416
417 keys = [int(k) for k in jsondata['bins'].keys()]
418 values = [int(jsondata['bins'][k]) for k in jsondata['bins'].keys()]
419 smallest = min(keys)
420 biggest = max(keys)
421 sampsize = sum(values)
422
423 if not self.similar(jsondata['min'], smallest):
424 retval = False
425 print('reported min %d does not match json+ min %d' % (jsondata['min'], smallest))
426 elif self.debug:
427 print('json+ min values match: %d' % jsondata['min'])
428
429 if not self.similar(jsondata['max'], biggest):
430 retval = False
431 print('reported max %d does not match json+ max %d' % (jsondata['max'], biggest))
432 elif self.debug:
433 print('json+ max values match: %d' % jsondata['max'])
434
435 if sampsize != jsondata['N']:
436 retval = False
437 print('reported sample size %d does not match json+ total count %d' % \
438 (jsondata['N'], sampsize))
439 elif self.debug:
440 print('json+ sample sizes match: %d' % sampsize)
441
442 return retval
443
444 def check_sync_lat(self, jsondata, plus=False):
445 """Check fsync latency percentile data.
446
447 All we can check is that some percentiles are reported, unless we have json+ data.
448 If we actually have json+ data then we can do more checking.
449
450 jsondata JSON data for fsync operations
451 plus True if we actually have json+ data
452 """
453 retval = True
454
455 if 'percentile' not in jsondata['lat_ns']:
456 print("Sync percentile data not found")
457 return False
458
459 if int(jsondata['total_ios']) != int(jsondata['lat_ns']['N']):
460 retval = False
461 print('Mismatch between total_ios and lat_ns sample size')
462 elif self.debug:
463 print('sync sample sizes match: %d' % jsondata['total_ios'])
464
465 if not plus:
466 if 'bins' in jsondata['lat_ns']:
467 print('Unexpected json+ bin data found')
468 return False
469
470 if not self.check_jsonplus(jsondata['lat_ns']):
471 retval = False
472
473 return retval
474
475 def check_terse(self, terse, jsondata):
476 """Compare terse latencies with JSON latencies.
477
478 terse terse format data for checking
479 jsondata JSON format data for checking
480 """
481
482 retval = True
483
484 for lat in terse:
485 split = lat.split('%')
486 pct = split[0]
487 terse_val = int(split[1][1:])
488 json_val = math.floor(jsondata[pct]/1000)
489 if terse_val != json_val:
490 retval = False
491 print('Mismatch with %sth percentile: json value=%d,%d terse value=%d' % \
492 (pct, jsondata[pct], json_val, terse_val))
493 elif self.debug:
494 print('Terse %sth percentile matches JSON value: %d' % (pct, terse_val))
495
496 return retval
497
498 def check_prio_latencies(self, jsondata, clat=True, plus=False):
499 """Check consistency of high/low priority latencies.
500
501 clat True if we should check clat data; other check lat data
502 plus True if we have json+ format data where additional checks can
503 be carried out
504 unified True if fio is reporting unified r/w data
505 """
506
507 if clat:
508 high = 'clat_high_prio'
509 low = 'clat_low_prio'
510 combined = 'clat_ns'
511 else:
512 high = 'lat_high_prio'
513 low = 'lat_low_prio'
514 combined = 'lat_ns'
515
516 if not high in jsondata or not low in jsondata or not combined in jsondata:
517 print("Error identifying high/low priority latencies")
518 return False
519
520 if jsondata[high]['N'] + jsondata[low]['N'] != jsondata[combined]['N']:
521 print("High %d + low %d != combined sample size %d" % \
522 (jsondata[high]['N'], jsondata[low]['N'], jsondata[combined]['N']))
523 return False
524 elif self.debug:
525 print("High %d + low %d == combined sample size %d" % \
526 (jsondata[high]['N'], jsondata[low]['N'], jsondata[combined]['N']))
527
528 if min(jsondata[high]['min'], jsondata[low]['min']) != jsondata[combined]['min']:
529 print("Min of high %d, low %d min latencies does not match min %d from combined data" % \
530 (jsondata[high]['min'], jsondata[low]['min'], jsondata[combined]['min']))
531 return False
532 elif self.debug:
533 print("Min of high %d, low %d min latencies matches min %d from combined data" % \
534 (jsondata[high]['min'], jsondata[low]['min'], jsondata[combined]['min']))
535
536 if max(jsondata[high]['max'], jsondata[low]['max']) != jsondata[combined]['max']:
537 print("Max of high %d, low %d max latencies does not match max %d from combined data" % \
538 (jsondata[high]['max'], jsondata[low]['max'], jsondata[combined]['max']))
539 return False
540 elif self.debug:
541 print("Max of high %d, low %d max latencies matches max %d from combined data" % \
542 (jsondata[high]['max'], jsondata[low]['max'], jsondata[combined]['max']))
543
544 weighted_avg = (jsondata[high]['mean'] * jsondata[high]['N'] + \
545 jsondata[low]['mean'] * jsondata[low]['N']) / jsondata[combined]['N']
546 delta = abs(weighted_avg - jsondata[combined]['mean'])
547 if (delta / jsondata[combined]['mean']) > 0.0001:
548 print("Difference between weighted average %f of high, low means "
549 "and actual mean %f exceeds 0.01%%" % (weighted_avg, jsondata[combined]['mean']))
550 return False
551 elif self.debug:
552 print("Weighted average %f of high, low means matches actual mean %f" % \
553 (weighted_avg, jsondata[combined]['mean']))
554
555 if plus:
556 if not self.check_jsonplus(jsondata[high]):
557 return False
558 if not self.check_jsonplus(jsondata[low]):
559 return False
560
561 bins = {**jsondata[high]['bins'], **jsondata[low]['bins']}
562 for duration in bins.keys():
563 if duration in jsondata[high]['bins'] and duration in jsondata[low]['bins']:
564 bins[duration] = jsondata[high]['bins'][duration] + \
565 jsondata[low]['bins'][duration]
566
567 if len(bins) != len(jsondata[combined]['bins']):
568 print("Number of combined high/low bins does not match number of overall bins")
569 return False
570 elif self.debug:
571 print("Number of bins from merged high/low data matches number of overall bins")
572
573 for duration in bins.keys():
574 if bins[duration] != jsondata[combined]['bins'][duration]:
575 print("Merged high/low count does not match overall count for duration %d" \
576 % duration)
577 return False
578
579 print("Merged high/low priority latency data match combined latency data")
580 return True
581
582 def check(self):
583 """Check test output."""
584
585 raise NotImplementedError()
586
587
588class Test001(FioLatTest):
589 """Test object for Test 1."""
590
591 def check(self):
592 """Check Test 1 output."""
593
594 job = self.json_data['jobs'][0]
595
596 retval = True
597 if not self.check_empty(job['write']):
598 print("Unexpected write data found in output")
599 retval = False
600 if not self.check_empty(job['trim']):
601 print("Unexpected trim data found in output")
602 retval = False
603 if not self.check_nocmdprio_lat(job):
604 print("Unexpected high/low priority latencies found")
605 retval = False
606
607 retval &= self.check_latencies(job['read'], 0, slat=False)
608
609 return retval
610
611
612class Test002(FioLatTest):
613 """Test object for Test 2."""
614
615 def check(self):
616 """Check Test 2 output."""
617
618 job = self.json_data['jobs'][0]
619
620 retval = True
621 if not self.check_empty(job['read']):
622 print("Unexpected read data found in output")
623 retval = False
624 if not self.check_empty(job['trim']):
625 print("Unexpected trim data found in output")
626 retval = False
627 if not self.check_nocmdprio_lat(job):
628 print("Unexpected high/low priority latencies found")
629 retval = False
630
631 retval &= self.check_latencies(job['write'], 1, slat=False, clat=False)
632
633 return retval
634
635
636class Test003(FioLatTest):
637 """Test object for Test 3."""
638
639 def check(self):
640 """Check Test 3 output."""
641
642 job = self.json_data['jobs'][0]
643
644 retval = True
645 if not self.check_empty(job['read']):
646 print("Unexpected read data found in output")
647 retval = False
648 if not self.check_empty(job['write']):
649 print("Unexpected write data found in output")
650 retval = False
651 if not self.check_nocmdprio_lat(job):
652 print("Unexpected high/low priority latencies found")
653 retval = False
654
655 retval &= self.check_latencies(job['trim'], 2, slat=False, tlat=False)
656
657 return retval
658
659
660class Test004(FioLatTest):
661 """Test object for Tests 4, 13."""
662
663 def check(self):
664 """Check Test 4, 13 output."""
665
666 job = self.json_data['jobs'][0]
667
668 retval = True
669 if not self.check_empty(job['write']):
670 print("Unexpected write data found in output")
671 retval = False
672 if not self.check_empty(job['trim']):
673 print("Unexpected trim data found in output")
674 retval = False
675 if not self.check_nocmdprio_lat(job):
676 print("Unexpected high/low priority latencies found")
677 retval = False
678
679 retval &= self.check_latencies(job['read'], 0, plus=True)
680
681 return retval
682
683
684class Test005(FioLatTest):
685 """Test object for Test 5."""
686
687 def check(self):
688 """Check Test 5 output."""
689
690 job = self.json_data['jobs'][0]
691
692 retval = True
693 if not self.check_empty(job['read']):
694 print("Unexpected read data found in output")
695 retval = False
696 if not self.check_empty(job['trim']):
697 print("Unexpected trim data found in output")
698 retval = False
699 if not self.check_nocmdprio_lat(job):
700 print("Unexpected high/low priority latencies found")
701 retval = False
702
703 retval &= self.check_latencies(job['write'], 1, slat=False, plus=True)
704
705 return retval
706
707
708class Test006(FioLatTest):
709 """Test object for Test 6."""
710
711 def check(self):
712 """Check Test 6 output."""
713
714 job = self.json_data['jobs'][0]
715
716 retval = True
717 if not self.check_empty(job['write']):
718 print("Unexpected write data found in output")
719 retval = False
720 if not self.check_empty(job['trim']):
721 print("Unexpected trim data found in output")
722 retval = False
723 if not self.check_nocmdprio_lat(job):
724 print("Unexpected high/low priority latencies found")
725 retval = False
726
727 retval &= self.check_latencies(job['read'], 0, slat=False, tlat=False, plus=True)
728
729 return retval
730
731
732class Test007(FioLatTest):
733 """Test object for Test 7."""
734
735 def check(self):
736 """Check Test 7 output."""
737
738 job = self.json_data['jobs'][0]
739
740 retval = True
741 if not self.check_empty(job['trim']):
742 print("Unexpected trim data found in output")
743 retval = False
744 if not self.check_nocmdprio_lat(job):
745 print("Unexpected high/low priority latencies found")
746 retval = False
747
748 retval &= self.check_latencies(job['read'], 0, clat=False, tlat=False, plus=True)
749 retval &= self.check_latencies(job['write'], 1, clat=False, tlat=False, plus=True)
750
751 return retval
752
753
754class Test008(FioLatTest):
755 """Test object for Tests 8, 14."""
756
757 def check(self):
758 """Check Test 8, 14 output."""
759
760 job = self.json_data['jobs'][0]
761
762 retval = True
763 if 'read' in job or 'write'in job or 'trim' in job:
764 print("Unexpected data direction found in fio output")
765 retval = False
766 if not self.check_nocmdprio_lat(job):
767 print("Unexpected high/low priority latencies found")
768 retval = False
769
770 retval &= self.check_latencies(job['mixed'], 0, plus=True, unified=True)
771
772 return retval
773
774
775class Test009(FioLatTest):
776 """Test object for Test 9."""
777
778 def check(self):
779 """Check Test 9 output."""
780
781 job = self.json_data['jobs'][0]
782
783 retval = True
784 if not self.check_empty(job['read']):
785 print("Unexpected read data found in output")
786 retval = False
787 if not self.check_empty(job['trim']):
788 print("Unexpected trim data found in output")
789 retval = False
790 if not self.check_sync_lat(job['sync'], plus=True):
791 print("Error checking fsync latency data")
792 retval = False
793 if not self.check_nocmdprio_lat(job):
794 print("Unexpected high/low priority latencies found")
795 retval = False
796
797 retval &= self.check_latencies(job['write'], 1, slat=False, plus=True)
798
799 return retval
800
801
802class Test010(FioLatTest):
803 """Test object for Test 10."""
804
805 def check(self):
806 """Check Test 10 output."""
807
808 job = self.json_data['jobs'][0]
809
810 retval = True
811 if not self.check_empty(job['trim']):
812 print("Unexpected trim data found in output")
813 retval = False
814 if not self.check_nocmdprio_lat(job):
815 print("Unexpected high/low priority latencies found")
816 retval = False
817
818 retval &= self.check_latencies(job['read'], 0, plus=True)
819 retval &= self.check_latencies(job['write'], 1, plus=True)
820 retval &= self.check_terse(self.terse_data[17:34], job['read']['lat_ns']['percentile'])
821 retval &= self.check_terse(self.terse_data[58:75], job['write']['lat_ns']['percentile'])
822 # Terse data checking only works for default percentiles.
823 # This needs to be changed if something other than the default is ever used.
824
825 return retval
826
827
828class Test011(FioLatTest):
829 """Test object for Test 11."""
830
831 def check(self):
832 """Check Test 11 output."""
833
834 job = self.json_data['jobs'][0]
835
836 retval = True
837 if not self.check_empty(job['trim']):
838 print("Unexpected trim data found in output")
839 retval = False
840 if not self.check_nocmdprio_lat(job):
841 print("Unexpected high/low priority latencies found")
842 retval = False
843
844 retval &= self.check_latencies(job['read'], 0, slat=False, clat=False, plus=True)
845 retval &= self.check_latencies(job['write'], 1, slat=False, clat=False, plus=True)
846 retval &= self.check_terse(self.terse_data[17:34], job['read']['lat_ns']['percentile'])
847 retval &= self.check_terse(self.terse_data[58:75], job['write']['lat_ns']['percentile'])
848 # Terse data checking only works for default percentiles.
849 # This needs to be changed if something other than the default is ever used.
850
851 return retval
852
853
854class Test015(FioLatTest):
855 """Test object for Test 15."""
856
857 def check(self):
858 """Check Test 15 output."""
859
860 job = self.json_data['jobs'][0]
861
862 retval = True
863 if not self.check_empty(job['write']):
864 print("Unexpected write data found in output")
865 retval = False
866 if not self.check_empty(job['trim']):
867 print("Unexpected trim data found in output")
868 retval = False
869
870 retval &= self.check_latencies(job['read'], 0, plus=True)
871 retval &= self.check_prio_latencies(job['read'], clat=False, plus=True)
872
873 return retval
874
875
876class Test016(FioLatTest):
877 """Test object for Test 16."""
878
879 def check(self):
880 """Check Test 16 output."""
881
882 job = self.json_data['jobs'][0]
883
884 retval = True
885 if not self.check_empty(job['read']):
886 print("Unexpected read data found in output")
887 retval = False
888 if not self.check_empty(job['trim']):
889 print("Unexpected trim data found in output")
890 retval = False
891
892 retval &= self.check_latencies(job['write'], 1, slat=False, plus=True)
893 retval &= self.check_prio_latencies(job['write'], clat=False, plus=True)
894
895 return retval
896
897
898class Test017(FioLatTest):
899 """Test object for Test 17."""
900
901 def check(self):
902 """Check Test 17 output."""
903
904 job = self.json_data['jobs'][0]
905
906 retval = True
907 if not self.check_empty(job['write']):
908 print("Unexpected write data found in output")
909 retval = False
910 if not self.check_empty(job['trim']):
911 print("Unexpected trim data found in output")
912 retval = False
913
914 retval &= self.check_latencies(job['read'], 0, slat=False, tlat=False, plus=True)
915 retval &= self.check_prio_latencies(job['read'], plus=True)
916
917 return retval
918
919
920class Test018(FioLatTest):
921 """Test object for Test 18."""
922
923 def check(self):
924 """Check Test 18 output."""
925
926 job = self.json_data['jobs'][0]
927
928 retval = True
929 if not self.check_empty(job['trim']):
930 print("Unexpected trim data found in output")
931 retval = False
932
933 retval &= self.check_latencies(job['read'], 0, clat=False, tlat=False, plus=True)
934 retval &= self.check_latencies(job['write'], 1, clat=False, tlat=False, plus=True)
935
936 # We actually have json+ data but setting plus=False below avoids checking the
937 # json+ bins which did not exist for clat and lat because this job is run with
938 # clat_percentiles=0, lat_percentiles=0, However, we can still check the summary
939 # statistics
940 retval &= self.check_prio_latencies(job['write'], plus=False)
941 retval &= self.check_prio_latencies(job['read'], plus=False)
942
943 return retval
944
945
946class Test019(FioLatTest):
947 """Test object for Tests 19, 20."""
948
949 def check(self):
950 """Check Test 19, 20 output."""
951
952 job = self.json_data['jobs'][0]
953
954 retval = True
955 if 'read' in job or 'write'in job or 'trim' in job:
956 print("Unexpected data direction found in fio output")
957 retval = False
958
959 retval &= self.check_latencies(job['mixed'], 0, plus=True, unified=True)
960 retval &= self.check_prio_latencies(job['mixed'], clat=False, plus=True)
961
962 return retval
963
964
965def parse_args():
966 """Parse command-line arguments."""
967
968 parser = argparse.ArgumentParser()
969 parser.add_argument('-f', '--fio', help='path to file executable (e.g., ./fio)')
970 parser.add_argument('-a', '--artifact-root', help='artifact root directory')
971 parser.add_argument('-d', '--debug', help='enable debug output', action='store_true')
972 parser.add_argument('-s', '--skip', nargs='+', type=int,
973 help='list of test(s) to skip')
974 parser.add_argument('-o', '--run-only', nargs='+', type=int,
975 help='list of test(s) to run, skipping all others')
976 args = parser.parse_args()
977
978 return args
979
980
981def main():
982 """Run tests of fio latency percentile reporting"""
983
984 args = parse_args()
985
986 artifact_root = args.artifact_root if args.artifact_root else \
987 "latency-test-{0}".format(time.strftime("%Y%m%d-%H%M%S"))
988 os.mkdir(artifact_root)
989 print("Artifact directory is %s" % artifact_root)
990
991 if args.fio:
992 fio = str(Path(args.fio).absolute())
993 else:
994 fio = 'fio'
995 print("fio path is %s" % fio)
996
997 if platform.system() == 'Linux':
998 aio = 'libaio'
999 elif platform.system() == 'Windows':
1000 aio = 'windowsaio'
1001 else:
1002 aio = 'posixaio'
1003
1004 test_list = [
1005 {
1006 # randread, null
1007 # enable slat, clat, lat
1008 # only clat and lat will appear because
1009 # because the null ioengine is syncrhonous
1010 "test_id": 1,
1011 "runtime": 2,
1012 "output-format": "json",
1013 "slat_percentiles": 1,
1014 "clat_percentiles": 1,
1015 "lat_percentiles": 1,
1016 "ioengine": 'null',
1017 'rw': 'randread',
1018 "test_obj": Test001,
1019 },
1020 {
1021 # randwrite, null
1022 # enable lat only
1023 "test_id": 2,
1024 "runtime": 2,
1025 "output-format": "json",
1026 "slat_percentiles": 0,
1027 "clat_percentiles": 0,
1028 "lat_percentiles": 1,
1029 "ioengine": 'null',
1030 'rw': 'randwrite',
1031 "test_obj": Test002,
1032 },
1033 {
1034 # randtrim, null
1035 # enable clat only
1036 "test_id": 3,
1037 "runtime": 2,
1038 "output-format": "json",
1039 "slat_percentiles": 0,
1040 "clat_percentiles": 1,
1041 "lat_percentiles": 0,
1042 "ioengine": 'null',
1043 'rw': 'randtrim',
1044 "test_obj": Test003,
1045 },
1046 {
1047 # randread, aio
1048 # enable slat, clat, lat
1049 # all will appear because liaio is asynchronous
1050 "test_id": 4,
1051 "runtime": 5,
1052 "output-format": "json+",
1053 "slat_percentiles": 1,
1054 "clat_percentiles": 1,
1055 "lat_percentiles": 1,
1056 "ioengine": aio,
1057 'rw': 'randread',
1058 "test_obj": Test004,
1059 },
1060 {
1061 # randwrite, aio
1062 # enable only clat, lat
1063 "test_id": 5,
1064 "runtime": 5,
1065 "output-format": "json+",
1066 "slat_percentiles": 0,
1067 "clat_percentiles": 1,
1068 "lat_percentiles": 1,
1069 "ioengine": aio,
1070 'rw': 'randwrite',
1071 "test_obj": Test005,
1072 },
1073 {
1074 # randread, aio
1075 # by default only clat should appear
1076 "test_id": 6,
1077 "runtime": 5,
1078 "output-format": "json+",
1079 "ioengine": aio,
1080 'rw': 'randread',
1081 "test_obj": Test006,
1082 },
1083 {
1084 # 50/50 r/w, aio
1085 # enable only slat
1086 "test_id": 7,
1087 "runtime": 5,
1088 "output-format": "json+",
1089 "slat_percentiles": 1,
1090 "clat_percentiles": 0,
1091 "lat_percentiles": 0,
1092 "ioengine": aio,
1093 'rw': 'randrw',
1094 "test_obj": Test007,
1095 },
1096 {
1097 # 50/50 r/w, aio, unified_rw_reporting
1098 # enable slat, clat, lat
1099 "test_id": 8,
1100 "runtime": 5,
1101 "output-format": "json+",
1102 "slat_percentiles": 1,
1103 "clat_percentiles": 1,
1104 "lat_percentiles": 1,
1105 "ioengine": aio,
1106 'rw': 'randrw',
1107 'unified_rw_reporting': 1,
1108 "test_obj": Test008,
1109 },
1110 {
1111 # randwrite, null
1112 # enable slat, clat, lat
1113 # fsync
1114 "test_id": 9,
1115 "runtime": 2,
1116 "output-format": "json+",
1117 "slat_percentiles": 1,
1118 "clat_percentiles": 1,
1119 "lat_percentiles": 1,
1120 "ioengine": 'null',
1121 'rw': 'randwrite',
1122 'fsync': 32,
1123 "test_obj": Test009,
1124 },
1125 {
1126 # 50/50 r/w, aio
1127 # enable slat, clat, lat
1128 "test_id": 10,
1129 "runtime": 5,
1130 "output-format": "terse,json+",
1131 "slat_percentiles": 1,
1132 "clat_percentiles": 1,
1133 "lat_percentiles": 1,
1134 "ioengine": aio,
1135 'rw': 'randrw',
1136 "test_obj": Test010,
1137 },
1138 {
1139 # 50/50 r/w, aio
1140 # enable only lat
1141 "test_id": 11,
1142 "runtime": 5,
1143 "output-format": "terse,json+",
1144 "slat_percentiles": 0,
1145 "clat_percentiles": 0,
1146 "lat_percentiles": 1,
1147 "ioengine": aio,
1148 'rw': 'randrw',
1149 "test_obj": Test011,
1150 },
1151 {
1152 # randread, null
1153 # enable slat, clat, lat
1154 # only clat and lat will appear because
1155 # because the null ioengine is syncrhonous
1156 # same as Test 1 except
1157 # numjobs = 4 to test sum_thread_stats() changes
1158 "test_id": 12,
1159 "runtime": 2,
1160 "output-format": "json",
1161 "slat_percentiles": 1,
1162 "clat_percentiles": 1,
1163 "lat_percentiles": 1,
1164 "ioengine": 'null',
1165 'rw': 'randread',
1166 'numjobs': 4,
1167 "test_obj": Test001,
1168 },
1169 {
1170 # randread, aio
1171 # enable slat, clat, lat
1172 # all will appear because liaio is asynchronous
1173 # same as Test 4 except
1174 # numjobs = 4 to test sum_thread_stats() changes
1175 "test_id": 13,
1176 "runtime": 5,
1177 "output-format": "json+",
1178 "slat_percentiles": 1,
1179 "clat_percentiles": 1,
1180 "lat_percentiles": 1,
1181 "ioengine": aio,
1182 'rw': 'randread',
1183 'numjobs': 4,
1184 "test_obj": Test004,
1185 },
1186 {
1187 # 50/50 r/w, aio, unified_rw_reporting
1188 # enable slat, clat, lata
1189 # same as Test 8 except
1190 # numjobs = 4 to test sum_thread_stats() changes
1191 "test_id": 14,
1192 "runtime": 5,
1193 "output-format": "json+",
1194 "slat_percentiles": 1,
1195 "clat_percentiles": 1,
1196 "lat_percentiles": 1,
1197 "ioengine": aio,
1198 'rw': 'randrw',
1199 'unified_rw_reporting': 1,
1200 'numjobs': 4,
1201 "test_obj": Test008,
1202 },
1203 {
1204 # randread, aio
1205 # enable slat, clat, lat
1206 # all will appear because liaio is asynchronous
1207 # same as Test 4 except add cmdprio_percentage
1208 "test_id": 15,
1209 "runtime": 5,
1210 "output-format": "json+",
1211 "slat_percentiles": 1,
1212 "clat_percentiles": 1,
1213 "lat_percentiles": 1,
1214 "ioengine": aio,
1215 'rw': 'randread',
1216 'cmdprio_percentage': 50,
1217 "test_obj": Test015,
1218 },
1219 {
1220 # randwrite, aio
1221 # enable only clat, lat
1222 # same as Test 5 except add cmdprio_percentage
1223 "test_id": 16,
1224 "runtime": 5,
1225 "output-format": "json+",
1226 "slat_percentiles": 0,
1227 "clat_percentiles": 1,
1228 "lat_percentiles": 1,
1229 "ioengine": aio,
1230 'rw': 'randwrite',
1231 'cmdprio_percentage': 50,
1232 "test_obj": Test016,
1233 },
1234 {
1235 # randread, aio
1236 # by default only clat should appear
1237 # same as Test 6 except add cmdprio_percentage
1238 "test_id": 17,
1239 "runtime": 5,
1240 "output-format": "json+",
1241 "ioengine": aio,
1242 'rw': 'randread',
1243 'cmdprio_percentage': 50,
1244 "test_obj": Test017,
1245 },
1246 {
1247 # 50/50 r/w, aio
1248 # enable only slat
1249 # same as Test 7 except add cmdprio_percentage
1250 "test_id": 18,
1251 "runtime": 5,
1252 "output-format": "json+",
1253 "slat_percentiles": 1,
1254 "clat_percentiles": 0,
1255 "lat_percentiles": 0,
1256 "ioengine": aio,
1257 'rw': 'randrw',
1258 'cmdprio_percentage': 50,
1259 "test_obj": Test018,
1260 },
1261 {
1262 # 50/50 r/w, aio, unified_rw_reporting
1263 # enable slat, clat, lat
1264 # same as Test 8 except add cmdprio_percentage
1265 "test_id": 19,
1266 "runtime": 5,
1267 "output-format": "json+",
1268 "slat_percentiles": 1,
1269 "clat_percentiles": 1,
1270 "lat_percentiles": 1,
1271 "ioengine": aio,
1272 'rw': 'randrw',
1273 'unified_rw_reporting': 1,
1274 'cmdprio_percentage': 50,
1275 "test_obj": Test019,
1276 },
1277 {
1278 # 50/50 r/w, aio, unified_rw_reporting
1279 # enable slat, clat, lat
1280 # same as Test 19 except
1281 # add numjobs = 4 to test sum_thread_stats() changes
1282 "test_id": 20,
1283 "runtime": 5,
1284 "output-format": "json+",
1285 "slat_percentiles": 1,
1286 "clat_percentiles": 1,
1287 "lat_percentiles": 1,
1288 "ioengine": aio,
1289 'rw': 'randrw',
1290 'unified_rw_reporting': 1,
1291 'cmdprio_percentage': 50,
1292 'numjobs': 4,
1293 "test_obj": Test019,
1294 },
1295 ]
1296
1297 passed = 0
1298 failed = 0
1299 skipped = 0
1300
1301 for test in test_list:
1302 if (args.skip and test['test_id'] in args.skip) or \
1303 (args.run_only and test['test_id'] not in args.run_only):
1304 skipped = skipped + 1
1305 outcome = 'SKIPPED (User request)'
1306 elif platform.system() != 'Linux' and 'cmdprio_percentage' in test:
1307 skipped = skipped + 1
1308 outcome = 'SKIPPED (Linux required for cmdprio_percentage tests)'
1309 else:
1310 test_obj = test['test_obj'](artifact_root, test, args.debug)
1311 status = test_obj.run_fio(fio)
1312 if status:
1313 status = test_obj.check()
1314 if status:
1315 passed = passed + 1
1316 outcome = 'PASSED'
1317 else:
1318 failed = failed + 1
1319 outcome = 'FAILED'
1320
1321 print("**********Test {0} {1}**********".format(test['test_id'], outcome))
1322
1323 print("{0} tests passed, {1} failed, {2} skipped".format(passed, failed, skipped))
1324
1325 sys.exit(failed)
1326
1327
1328if __name__ == '__main__':
1329 main()