Commit | Line | Data |
---|---|---|
8629f5f5 | 1 | #!/usr/bin/env python3 |
50d2a375 | 2 | # Note: this script is python2 and python3 compatible. |
e26cf2f7 VF |
3 | |
4 | """ | |
5 | fio_jsonplus_clat2csv | |
6 | ||
7 | This script converts fio's json+ latency data to CSV format. | |
8 | ||
9 | For example: | |
10 | ||
11 | Run the following fio jobs: | |
12 | $ fio --output=fio-jsonplus.output --output-format=json+ --ioengine=null \ | |
13 | --time_based --runtime=3s --size=1G --slat_percentiles=1 \ | |
14 | --clat_percentiles=1 --lat_percentiles=1 \ | |
15 | --name=test1 --rw=randrw \ | |
16 | --name=test2 --rw=read \ | |
17 | --name=test3 --rw=write | |
18 | ||
19 | Then run: | |
20 | $ fio_jsonplus_clat2csv fio-jsonplus.output fio-jsonplus.csv | |
21 | ||
22 | You will end up with the following 3 files: | |
23 | ||
24 | -rw-r--r-- 1 root root 77547 Mar 24 15:17 fio-jsonplus_job0.csv | |
25 | -rw-r--r-- 1 root root 65413 Mar 24 15:17 fio-jsonplus_job1.csv | |
26 | -rw-r--r-- 1 root root 63291 Mar 24 15:17 fio-jsonplus_job2.csv | |
27 | ||
28 | fio-jsonplus_job0.csv will look something like: | |
29 | ||
30 | nsec, read_slat_ns_count, read_slat_ns_cumulative, read_slat_ns_percentile, read_clat_ns_count, read_clat_ns_cumulative, read_clat_ns_percentile, read_lat_ns_count, read_lat_ns_cumulative, read_lat_ns_percentile, write_slat_ns_count, write_slat_ns_cumulative, write_slat_ns_percentile, write_clat_ns_count, write_clat_ns_cumulative, write_clat_ns_percentile, write_lat_ns_count, write_lat_ns_cumulative, write_lat_ns_percentile, trim_slat_ns_count, trim_slat_ns_cumulative, trim_slat_ns_percentile, trim_clat_ns_count, trim_clat_ns_cumulative, trim_clat_ns_percentile, trim_lat_ns_count, trim_lat_ns_cumulative, trim_lat_ns_percentile, | |
31 | 12, , , , 3, 3, 6.11006798673e-07, , , , , , , 2, 2, 4.07580840603e-07, , , , , , , , , , , , , | |
32 | 13, , , , 1364, 1367, 0.000278415431262, , , , , , , 1776, 1778, 0.000362339367296, , , , , , , , , , , , , | |
33 | 14, , , , 181872, 183239, 0.037320091594, , , , , , , 207436, 209214, 0.0426358089929, , , , , , , , , , , , , | |
34 | 15, , , , 1574811, 1758050, 0.358060167469, , , , , , , 1661435, 1870649, 0.381220345946, , , , , , , , , , , , , | |
35 | 16, , , , 2198478, 3956528, 0.805821835713, , , , , , , 2154571, 4025220, 0.820301275606, , , , , , , , , , , , , | |
36 | 17, , , , 724335, 4680863, 0.953346372218, , , , , , , 645351, 4670571, 0.951817627138, , , , , , , , , , , , , | |
37 | 18, , , , 71837, 4752700, 0.96797733735, , , , , , , 61084, 4731655, 0.964265961171, , , , , , , , , , , , , | |
38 | 19, , , , 15915, 4768615, 0.971218728417, , , , , , , 18419, 4750074, 0.968019576923, , , , , , , , , , , , , | |
39 | 20, , , , 12651, 4781266, 0.973795344087, , , , , , , 14176, 4764250, 0.970908509921, , , , , , , , , , , , , | |
40 | ... | |
41 | 168960, , , , , , , , , , , , , 1, 4906999, 0.999999388629, 1, 4906997, 0.999998981048, , , , , , , , , , | |
42 | 177152, , , , , , , , , , , , , 1, 4907000, 0.999999592419, 1, 4906998, 0.999999184838, , , , , , , , , , | |
43 | 183296, , , , , , , , , , , , , 1, 4907001, 0.99999979621, 1, 4906999, 0.999999388629, , , , , , , , , , | |
44 | 189440, , , , , , , 1, 4909925, 0.999999185324, , , , , , , , , , , , , , , , , , , | |
45 | 214016, , , , 1, 4909928, 0.999999796331, 2, 4909927, 0.999999592662, , , , , , , , , , , , , , , , , , , | |
46 | 246784, , , , , , , , , , , , , , , , 1, 4907000, 0.999999592419, , , , , , , , , , | |
47 | 272384, , , , 1, 4909929, 1.0, 1, 4909928, 0.999999796331, , , , , , , , , , , , , , , , , , , | |
48 | 329728, , , , , , , , , , , , , 1, 4907002, 1.0, 1, 4907001, 0.99999979621, , , , , , , , , , | |
49 | 1003520, , , , , , , , , , , , , , , , 1, 4907002, 1.0, , , , , , , , , , | |
50 | 1089536, , , , , , , 1, 4909929, 1.0, , , , , , , , , , , , , , , , , , , | |
51 | ||
52 | The first line says that there were three read IOs with 12ns clat, | |
53 | the cumulative number of read IOs at or below 12ns was two, and | |
54 | 12ns was the 0.0000611th percentile for read latency. There were | |
55 | two write IOs with 12ns clat, the cumulative number of write IOs | |
56 | at or below 12ns was two, and 12ns was the 0.0000408th percentile | |
57 | for write latency. | |
58 | ||
59 | The job had one write IO complete at 168960ns and 4906999 write IOs | |
60 | completed at or below this duration. Also this duration was the | |
61 | 99.99994th percentile for write latency. There was one write IO | |
62 | with a total latency of 168960ns, this duration had a cumulative | |
63 | frequency of 4906997 write IOs and was the 99.9998981048th percentile | |
64 | for write total latency. | |
65 | ||
66 | The last line says that one read IO had 1089536ns total latency, this | |
67 | duration had a cumulative frequency of 4909929 and represented the 100th | |
68 | percentile for read total latency. | |
69 | ||
70 | Running the following: | |
71 | ||
72 | $ fio_jsonplus_clat2csv fio-jsonplus.output fio-jsonplus.csv --validate | |
73 | fio-jsonplus_job0.csv validated | |
74 | fio-jsonplus_job1.csv validated | |
75 | fio-jsonplus_job2.csv validated | |
76 | ||
77 | will check the CSV data against the json+ output to confirm that the CSV | |
78 | data matches. | |
79 | """ | |
afe8d310 | 80 | |
50d2a375 BD |
81 | from __future__ import absolute_import |
82 | from __future__ import print_function | |
afe8d310 VF |
83 | import os |
84 | import json | |
85 | import argparse | |
e26cf2f7 | 86 | import itertools |
50d2a375 | 87 | import six |
afe8d310 | 88 | |
e26cf2f7 VF |
89 | DDIR_LIST = ['read', 'write', 'trim'] |
90 | LAT_LIST = ['slat_ns', 'clat_ns', 'lat_ns'] | |
afe8d310 VF |
91 | |
92 | def parse_args(): | |
e26cf2f7 VF |
93 | """Parse command-line arguments.""" |
94 | ||
afe8d310 VF |
95 | parser = argparse.ArgumentParser() |
96 | parser.add_argument('source', | |
97 | help='fio json+ output file containing completion ' | |
98 | 'latency data') | |
99 | parser.add_argument('dest', | |
100 | help='destination file stub for latency data in CSV ' | |
101 | 'format. job number will be appended to filename') | |
e26cf2f7 VF |
102 | parser.add_argument('--debug', '-d', action='store_true', |
103 | help='enable debug prints') | |
104 | parser.add_argument('--validate', action='store_true', | |
105 | help='validate CSV against JSON output') | |
afe8d310 VF |
106 | args = parser.parse_args() |
107 | ||
108 | return args | |
109 | ||
110 | ||
111 | def percentile(idx, run_total): | |
e26cf2f7 VF |
112 | """Return a percentile for a specified index based on a running total. |
113 | ||
114 | Parameters: | |
115 | idx index for which to generate percentile. | |
116 | run_total list of cumulative sums. | |
117 | ||
118 | Returns: | |
119 | Percentile represented by the specified index. | |
120 | """ | |
121 | ||
afe8d310 VF |
122 | total = run_total[len(run_total)-1] |
123 | if total == 0: | |
124 | return 0 | |
125 | ||
126 | return float(run_total[idx]) / total | |
127 | ||
128 | ||
e26cf2f7 VF |
129 | def more_bins(indices, bins): |
130 | """Determine whether we have more bins to process. | |
131 | ||
132 | Parameters: | |
133 | indices a dict containing the last index processed in each bin. | |
134 | bins a dict contaiing a set of bins to process. | |
135 | ||
136 | Returns: | |
137 | True if the indices do not yet point to the end of each bin in bins. | |
fc002f14 | 138 | False if the indices point beyond their respective bins. |
e26cf2f7 VF |
139 | """ |
140 | ||
50d2a375 | 141 | for key, value in six.iteritems(indices): |
afe8d310 VF |
142 | if value < len(bins[key]): |
143 | return True | |
144 | ||
145 | return False | |
146 | ||
147 | ||
e26cf2f7 VF |
148 | def debug_print(debug, *args): |
149 | """Print debug messages. | |
150 | ||
151 | Parameters: | |
152 | debug emit messages if True. | |
153 | *args arguments for print(). | |
154 | """ | |
155 | ||
156 | if debug: | |
157 | print(*args) | |
158 | ||
159 | ||
160 | def get_csvfile(dest, jobnum): | |
161 | """Generate CSV filename from command-line arguments and job numbers. | |
162 | ||
fc002f14 | 163 | Parameters: |
e26cf2f7 VF |
164 | dest file specification for CSV filename. |
165 | jobnum job number. | |
166 | ||
167 | Returns: | |
168 | A string that is a new filename that incorporates the job number. | |
169 | """ | |
170 | ||
171 | stub, ext = os.path.splitext(dest) | |
172 | return stub + '_job' + str(jobnum) + ext | |
173 | ||
174 | ||
175 | def validate(args, jsondata, col_labels): | |
176 | """Validate CSV data against json+ output. | |
177 | ||
178 | This function checks the CSV data to make sure that it was correctly | |
179 | generated from the original json+ output. json+ 'bins' objects are | |
180 | constructed from the CSV data and then compared to the corresponding | |
181 | objects in the json+ data. An AssertionError will appear if a mismatch | |
182 | is found. | |
183 | ||
184 | Percentiles and cumulative counts are not checked. | |
185 | ||
186 | Parameters: | |
187 | args command-line arguments for this script. | |
188 | jsondata json+ output to compare against. | |
189 | col_labels column labels for CSV data. | |
190 | ||
191 | Returns | |
192 | 0 if no mismatches found. | |
193 | """ | |
194 | ||
195 | colnames = [c.strip() for c in col_labels.split(',')] | |
196 | ||
197 | for jobnum in range(len(jsondata['jobs'])): | |
198 | job_data = jsondata['jobs'][jobnum] | |
199 | csvfile = get_csvfile(args.dest, jobnum) | |
200 | ||
201 | with open(csvfile, 'r') as csvsource: | |
202 | csvlines = csvsource.read().split('\n') | |
203 | ||
204 | assert csvlines[0] == col_labels | |
205 | debug_print(args.debug, 'col_labels match for', csvfile) | |
206 | ||
207 | # create 'bins' objects from the CSV data | |
208 | counts = {} | |
209 | for ddir in DDIR_LIST: | |
210 | counts[ddir] = {} | |
211 | for lat in LAT_LIST: | |
212 | counts[ddir][lat] = {} | |
213 | ||
214 | csvlines.pop(0) | |
215 | for line in csvlines: | |
216 | if line.strip() == "": | |
217 | continue | |
218 | values = line.split(',') | |
219 | nsec = values[0] | |
220 | for col in colnames: | |
221 | if 'count' in col: | |
222 | val = values[colnames.index(col)] | |
223 | if val.strip() != "": | |
224 | count = int(val) | |
225 | ddir, lat, _, _ = col.split('_') | |
226 | lat = lat + '_ns' | |
227 | counts[ddir][lat][nsec] = count | |
228 | try: | |
229 | assert count == job_data[ddir][lat]['bins'][nsec] | |
230 | except Exception: | |
231 | print("mismatch:", csvfile, ddir, lat, nsec, "ns") | |
232 | return 1 | |
233 | ||
234 | # compare 'bins' objects created from the CSV data | |
235 | # with corresponding 'bins' objects in the json+ output | |
236 | for ddir in DDIR_LIST: | |
237 | for lat in LAT_LIST: | |
238 | if lat in job_data[ddir] and 'bins' in job_data[ddir][lat]: | |
239 | assert job_data[ddir][lat]['bins'] == counts[ddir][lat] | |
240 | debug_print(args.debug, csvfile, ddir, lat, "bins match") | |
241 | else: | |
242 | assert counts[ddir][lat] == {} | |
243 | debug_print(args.debug, csvfile, ddir, lat, "bins empty") | |
244 | ||
245 | print(csvfile, "validated") | |
246 | ||
247 | return 0 | |
248 | ||
249 | ||
afe8d310 | 250 | def main(): |
e26cf2f7 VF |
251 | """Starting point for this script. |
252 | ||
253 | In standard mode, this script will generate CSV data from fio json+ output. | |
254 | In validation mode it will check to make sure that counts in CSV files | |
255 | match the counts in the json+ data. | |
256 | """ | |
257 | ||
afe8d310 VF |
258 | args = parse_args() |
259 | ||
260 | with open(args.source, 'r') as source: | |
261 | jsondata = json.loads(source.read()) | |
262 | ||
e26cf2f7 VF |
263 | ddir_lat_list = list(ddir + '_' + lat for ddir, lat in itertools.product(DDIR_LIST, LAT_LIST)) |
264 | debug_print(args.debug, 'ddir_lat_list: ', ddir_lat_list) | |
265 | col_labels = 'nsec, ' | |
266 | for ddir_lat in ddir_lat_list: | |
267 | col_labels += "{0}_count, {0}_cumulative, {0}_percentile, ".format(ddir_lat) | |
268 | debug_print(args.debug, 'col_labels: ', col_labels) | |
269 | ||
270 | if args.validate: | |
271 | return validate(args, jsondata, col_labels) | |
272 | ||
afe8d310 VF |
273 | for jobnum in range(0, len(jsondata['jobs'])): |
274 | bins = {} | |
275 | run_total = {} | |
e26cf2f7 VF |
276 | |
277 | for ddir in DDIR_LIST: | |
278 | ddir_data = jsondata['jobs'][jobnum][ddir] | |
279 | for lat in LAT_LIST: | |
280 | ddir_lat = ddir + '_' + lat | |
281 | if lat not in ddir_data or 'bins' not in ddir_data[lat]: | |
282 | bins[ddir_lat] = [] | |
283 | debug_print(args.debug, 'job', jobnum, ddir_lat, 'not found') | |
284 | continue | |
285 | ||
286 | debug_print(args.debug, 'job', jobnum, ddir_lat, 'processing') | |
287 | bins[ddir_lat] = [[int(key), value] for key, value in | |
288 | six.iteritems(ddir_data[lat]['bins'])] | |
289 | bins[ddir_lat] = sorted(bins[ddir_lat], key=lambda bin: bin[0]) | |
290 | ||
291 | run_total[ddir_lat] = [0 for x in range(0, len(bins[ddir_lat]))] | |
292 | run_total[ddir_lat][0] = bins[ddir_lat][0][1] | |
293 | for index in range(1, len(bins[ddir_lat])): | |
294 | run_total[ddir_lat][index] = run_total[ddir_lat][index-1] + \ | |
295 | bins[ddir_lat][index][1] | |
296 | ||
297 | csvfile = get_csvfile(args.dest, jobnum) | |
298 | with open(csvfile, 'w') as output: | |
299 | output.write(col_labels + "\n") | |
afe8d310 VF |
300 | |
301 | # | |
e26cf2f7 | 302 | # Have a counter for each ddir_lat pairing |
afe8d310 VF |
303 | # In each round, pick the shortest remaining duration |
304 | # and output a line with any values for that duration | |
305 | # | |
e26cf2f7 VF |
306 | indices = {x: 0 for x in ddir_lat_list} |
307 | while more_bins(indices, bins): | |
308 | debug_print(args.debug, 'indices: ', indices) | |
afe8d310 | 309 | min_lat = 17112760320 |
e26cf2f7 VF |
310 | for ddir_lat in ddir_lat_list: |
311 | if indices[ddir_lat] < len(bins[ddir_lat]): | |
312 | min_lat = min(bins[ddir_lat][indices[ddir_lat]][0], min_lat) | |
afe8d310 VF |
313 | |
314 | output.write("{0}, ".format(min_lat)) | |
315 | ||
e26cf2f7 VF |
316 | for ddir_lat in ddir_lat_list: |
317 | if indices[ddir_lat] < len(bins[ddir_lat]) and \ | |
318 | min_lat == bins[ddir_lat][indices[ddir_lat]][0]: | |
319 | count = bins[ddir_lat][indices[ddir_lat]][1] | |
320 | cumulative = run_total[ddir_lat][indices[ddir_lat]] | |
321 | ptile = percentile(indices[ddir_lat], run_total[ddir_lat]) | |
322 | output.write("{0}, {1}, {2}, ".format(count, cumulative, ptile)) | |
323 | indices[ddir_lat] += 1 | |
afe8d310 VF |
324 | else: |
325 | output.write(", , , ") | |
326 | output.write("\n") | |
327 | ||
e26cf2f7 | 328 | print("{0} generated".format(csvfile)) |
afe8d310 VF |
329 | |
330 | ||
331 | if __name__ == '__main__': | |
332 | main() |