[fio.git] / tools / fio_jsonplus_clat2csv

#!/usr/bin/env python3
# Note: this script is python2 and python3 compatible.

"""
fio_jsonplus_clat2csv

This script converts fio's json+ latency data to CSV format.

For example:

Run the following fio jobs:
$ fio --output=fio-jsonplus.output --output-format=json+ --ioengine=null \
    --time_based --runtime=3s --size=1G --slat_percentiles=1 \
    --clat_percentiles=1 --lat_percentiles=1 \
    --name=test1 --rw=randrw \
    --name=test2 --rw=read \
    --name=test3 --rw=write

Then run:
$ fio_jsonplus_clat2csv fio-jsonplus.output fio-jsonplus.csv

You will end up with the following 3 files:

-rw-r--r-- 1 root root 77547 Mar 24 15:17 fio-jsonplus_job0.csv
-rw-r--r-- 1 root root 65413 Mar 24 15:17 fio-jsonplus_job1.csv
-rw-r--r-- 1 root root 63291 Mar 24 15:17 fio-jsonplus_job2.csv

fio-jsonplus_job0.csv will look something like:

nsec, read_slat_ns_count, read_slat_ns_cumulative, read_slat_ns_percentile, read_clat_ns_count, read_clat_ns_cumulative, read_clat_ns_percentile, read_lat_ns_count, read_lat_ns_cumulative, read_lat_ns_percentile, write_slat_ns_count, write_slat_ns_cumulative, write_slat_ns_percentile, write_clat_ns_count, write_clat_ns_cumulative, write_clat_ns_percentile, write_lat_ns_count, write_lat_ns_cumulative, write_lat_ns_percentile, trim_slat_ns_count, trim_slat_ns_cumulative, trim_slat_ns_percentile, trim_clat_ns_count, trim_clat_ns_cumulative, trim_clat_ns_percentile, trim_lat_ns_count, trim_lat_ns_cumulative, trim_lat_ns_percentile,
12, , , , 3, 3, 6.11006798673e-07, , , , , , , 2, 2, 4.07580840603e-07, , , , , , , , , , , , ,
13, , , , 1364, 1367, 0.000278415431262, , , , , , , 1776, 1778, 0.000362339367296, , , , , , , , , , , , ,
14, , , , 181872, 183239, 0.037320091594, , , , , , , 207436, 209214, 0.0426358089929, , , , , , , , , , , , ,
15, , , , 1574811, 1758050, 0.358060167469, , , , , , , 1661435, 1870649, 0.381220345946, , , , , , , , , , , , ,
16, , , , 2198478, 3956528, 0.805821835713, , , , , , , 2154571, 4025220, 0.820301275606, , , , , , , , , , , , ,
17, , , , 724335, 4680863, 0.953346372218, , , , , , , 645351, 4670571, 0.951817627138, , , , , , , , , , , , ,
18, , , , 71837, 4752700, 0.96797733735, , , , , , , 61084, 4731655, 0.964265961171, , , , , , , , , , , , ,
19, , , , 15915, 4768615, 0.971218728417, , , , , , , 18419, 4750074, 0.968019576923, , , , , , , , , , , , ,
20, , , , 12651, 4781266, 0.973795344087, , , , , , , 14176, 4764250, 0.970908509921, , , , , , , , , , , , ,
...
168960, , , , , , , , , , , , , 1, 4906999, 0.999999388629, 1, 4906997, 0.999998981048, , , , , , , , , ,
177152, , , , , , , , , , , , , 1, 4907000, 0.999999592419, 1, 4906998, 0.999999184838, , , , , , , , , ,
183296, , , , , , , , , , , , , 1, 4907001, 0.99999979621, 1, 4906999, 0.999999388629, , , , , , , , , ,
189440, , , , , , , 1, 4909925, 0.999999185324, , , , , , , , , , , , , , , , , , ,
214016, , , , 1, 4909928, 0.999999796331, 2, 4909927, 0.999999592662, , , , , , , , , , , , , , , , , , ,
246784, , , , , , , , , , , , , , , , 1, 4907000, 0.999999592419, , , , , , , , , ,
272384, , , , 1, 4909929, 1.0, 1, 4909928, 0.999999796331, , , , , , , , , , , , , , , , , , ,
329728, , , , , , , , , , , , , 1, 4907002, 1.0, 1, 4907001, 0.99999979621, , , , , , , , , ,
1003520, , , , , , , , , , , , , , , , 1, 4907002, 1.0, , , , , , , , , ,
1089536, , , , , , , 1, 4909929, 1.0, , , , , , , , , , , , , , , , , , ,

The first line says that there were three read IOs with 12ns clat,
the cumulative number of read IOs at or below 12ns was two, and
12ns was the 0.0000611th percentile for read latency. There were
two write IOs with 12ns clat, the cumulative number of write IOs
at or below 12ns was two, and 12ns was the 0.0000408th percentile
for write latency.

The job had one write IO complete at 168960ns and 4906999 write IOs
completed at or below this duration. Also this duration was the
99.99994th percentile for write latency. There was one write IO
with a total latency of 168960ns, this duration had a cumulative
frequency of 4906997 write IOs and was the 99.9998981048th percentile
for write total latency.

The last line says that one read IO had 1089536ns total latency, this
duration had a cumulative frequency of 4909929 and represented the 100th
percentile for read total latency.

Running the following:

$ fio_jsonplus_clat2csv fio-jsonplus.output fio-jsonplus.csv --validate
fio-jsonplus_job0.csv validated
fio-jsonplus_job1.csv validated
fio-jsonplus_job2.csv validated

will check the CSV data against the json+ output to confirm that the CSV
data matches.
"""

from __future__ import absolute_import
from __future__ import print_function
import os
import json
import argparse
import itertools
import six

DDIR_LIST = ['read', 'write', 'trim']
LAT_LIST = ['slat_ns', 'clat_ns', 'lat_ns']

def parse_args():
    """Parse command-line arguments."""

    parser = argparse.ArgumentParser()
    parser.add_argument('source',
                        help='fio json+ output file containing completion '
                             'latency data')
    parser.add_argument('dest',
                        help='destination file stub for latency data in CSV '
                             'format. job number will be appended to filename')
    parser.add_argument('--debug', '-d', action='store_true',
                        help='enable debug prints')
    parser.add_argument('--validate', action='store_true',
                        help='validate CSV against JSON output')
    args = parser.parse_args()

    return args


def percentile(idx, run_total):
    """Return a percentile for a specified index based on a running total.

    Parameters:
        idx         index for which to generate percentile.
        run_total   list of cumulative sums.

    Returns:
        Percentile represented by the specified index.
    """

    total = run_total[len(run_total)-1]
    if total == 0:
        return 0

    return float(run_total[idx]) / total


def more_bins(indices, bins):
    """Determine whether we have more bins to process.

    Parameters:
        indices     a dict containing the last index processed in each bin.
        bins        a dict contaiing a set of bins to process.

    Returns:
        True if the indices do not yet point to the end of each bin in bins.
        False if the indices point beyond their respective bins.
    """

    for key, value in six.iteritems(indices):
        if value < len(bins[key]):
            return True

    return False


def debug_print(debug, *args):
    """Print debug messages.

    Parameters:
        debug       emit messages if True.
        *args       arguments for print().
    """

    if debug:
        print(*args)


def get_csvfile(dest, jobnum):
    """Generate CSV filename from command-line arguments and job numbers.

    Parameters:
        dest        file specification for CSV filename.
        jobnum      job number.

    Returns:
        A string that is a new filename that incorporates the job number.
    """

    stub, ext = os.path.splitext(dest)
    return stub + '_job' + str(jobnum) + ext


def validate(args, jsondata, col_labels):
    """Validate CSV data against json+ output.

    This function checks the CSV data to make sure that it was correctly
    generated from the original json+ output. json+ 'bins' objects are
    constructed from the CSV data and then compared to the corresponding
    objects in the json+ data. An AssertionError will appear if a mismatch
    is found.

    Percentiles and cumulative counts are not checked.

    Parameters:
        args        command-line arguments for this script.
        jsondata    json+ output to compare against.
        col_labels  column labels for CSV data.

    Returns
        0 if no mismatches found.
    """

    colnames = [c.strip() for c in col_labels.split(',')]

    for jobnum in range(len(jsondata['jobs'])):
        job_data = jsondata['jobs'][jobnum]
        csvfile = get_csvfile(args.dest, jobnum)

        with open(csvfile, 'r') as csvsource:
            csvlines = csvsource.read().split('\n')

        assert csvlines[0] == col_labels
        debug_print(args.debug, 'col_labels match for', csvfile)

        # create 'bins' objects from the CSV data
        counts = {}
        for ddir in DDIR_LIST:
            counts[ddir] = {}
            for lat in LAT_LIST:
                counts[ddir][lat] = {}

        csvlines.pop(0)
        for line in csvlines:
            if line.strip() == "":
                continue
            values = line.split(',')
            nsec = values[0]
            for col in colnames:
                if 'count' in col:
                    val = values[colnames.index(col)]
                    if val.strip() != "":
                        count = int(val)
                        ddir, lat, _, _ = col.split('_')
                        lat = lat + '_ns'
                        counts[ddir][lat][nsec] = count
                        try:
                            assert count == job_data[ddir][lat]['bins'][nsec]
                        except Exception:
                            print("mismatch:", csvfile, ddir, lat, nsec, "ns")
                            return 1

        # compare 'bins' objects created from the CSV data
        # with corresponding 'bins' objects in the json+ output
        for ddir in DDIR_LIST:
            for lat in LAT_LIST:
                if lat in job_data[ddir] and 'bins' in job_data[ddir][lat]:
                    assert job_data[ddir][lat]['bins'] == counts[ddir][lat]
                    debug_print(args.debug, csvfile, ddir, lat, "bins match")
                else:
                    assert counts[ddir][lat] == {}
                    debug_print(args.debug, csvfile, ddir, lat, "bins empty")

        print(csvfile, "validated")

    return 0


def main():
    """Starting point for this script.

    In standard mode, this script will generate CSV data from fio json+ output.
    In validation mode it will check to make sure that counts in CSV files
    match the counts in the json+ data.
    """

    args = parse_args()

    with open(args.source, 'r') as source:
        jsondata = json.loads(source.read())

    ddir_lat_list = list(ddir + '_' + lat for ddir, lat in itertools.product(DDIR_LIST, LAT_LIST))
    debug_print(args.debug, 'ddir_lat_list: ', ddir_lat_list)
    col_labels = 'nsec, '
    for ddir_lat in ddir_lat_list:
        col_labels += "{0}_count, {0}_cumulative, {0}_percentile, ".format(ddir_lat)
    debug_print(args.debug, 'col_labels: ', col_labels)

    if args.validate:
        return validate(args, jsondata, col_labels)

    for jobnum in range(0, len(jsondata['jobs'])):
        bins = {}
        run_total = {}

        for ddir in DDIR_LIST:
            ddir_data = jsondata['jobs'][jobnum][ddir]
            for lat in LAT_LIST:
                ddir_lat = ddir + '_' + lat
                if lat not in ddir_data or 'bins' not in ddir_data[lat]:
                    bins[ddir_lat] = []
                    debug_print(args.debug, 'job', jobnum, ddir_lat, 'not found')
                    continue

                debug_print(args.debug, 'job', jobnum, ddir_lat, 'processing')
                bins[ddir_lat] = [[int(key), value] for key, value in
                                  six.iteritems(ddir_data[lat]['bins'])]
                bins[ddir_lat] = sorted(bins[ddir_lat], key=lambda bin: bin[0])

                run_total[ddir_lat] = [0 for x in range(0, len(bins[ddir_lat]))]
                run_total[ddir_lat][0] = bins[ddir_lat][0][1]
                for index in range(1, len(bins[ddir_lat])):
                    run_total[ddir_lat][index] = run_total[ddir_lat][index-1] + \
                        bins[ddir_lat][index][1]

        csvfile = get_csvfile(args.dest, jobnum)
        with open(csvfile, 'w') as output:
            output.write(col_labels + "\n")

#
# Have a counter for each ddir_lat pairing
# In each round, pick the shortest remaining duration
# and output a line with any values for that duration
#
            indices = {x: 0 for x in ddir_lat_list}
            while more_bins(indices, bins):
                debug_print(args.debug, 'indices: ', indices)
                min_lat = 17112760320
                for ddir_lat in ddir_lat_list:
                    if indices[ddir_lat] < len(bins[ddir_lat]):
                        min_lat = min(bins[ddir_lat][indices[ddir_lat]][0], min_lat)

                output.write("{0}, ".format(min_lat))

                for ddir_lat in ddir_lat_list:
                    if indices[ddir_lat] < len(bins[ddir_lat]) and \
                       min_lat == bins[ddir_lat][indices[ddir_lat]][0]:
                        count = bins[ddir_lat][indices[ddir_lat]][1]
                        cumulative = run_total[ddir_lat][indices[ddir_lat]]
                        ptile = percentile(indices[ddir_lat], run_total[ddir_lat])
                        output.write("{0}, {1}, {2}, ".format(count, cumulative, ptile))
                        indices[ddir_lat] += 1
                    else:
                        output.write(", , , ")
                output.write("\n")

            print("{0} generated".format(csvfile))


if __name__ == '__main__':
    main()
Commit	Line	Data
8629f5f5	1	#!/usr/bin/env python3
50d2a375	2	# Note: this script is python2 and python3 compatible.
e26cf2f7 VF	3
	4	"""
	5	fio_jsonplus_clat2csv
	6
	7	This script converts fio's json+ latency data to CSV format.
	8
	9	For example:
	10
	11	Run the following fio jobs:
	12	$ fio --output=fio-jsonplus.output --output-format=json+ --ioengine=null \
	13	--time_based --runtime=3s --size=1G --slat_percentiles=1 \
	14	--clat_percentiles=1 --lat_percentiles=1 \
	15	--name=test1 --rw=randrw \
	16	--name=test2 --rw=read \
	17	--name=test3 --rw=write
	18
	19	Then run:
	20	$ fio_jsonplus_clat2csv fio-jsonplus.output fio-jsonplus.csv
	21
	22	You will end up with the following 3 files:
	23
	24	-rw-r--r-- 1 root root 77547 Mar 24 15:17 fio-jsonplus_job0.csv
	25	-rw-r--r-- 1 root root 65413 Mar 24 15:17 fio-jsonplus_job1.csv
	26	-rw-r--r-- 1 root root 63291 Mar 24 15:17 fio-jsonplus_job2.csv
	27
	28	fio-jsonplus_job0.csv will look something like:
	29
	30	nsec, read_slat_ns_count, read_slat_ns_cumulative, read_slat_ns_percentile, read_clat_ns_count, read_clat_ns_cumulative, read_clat_ns_percentile, read_lat_ns_count, read_lat_ns_cumulative, read_lat_ns_percentile, write_slat_ns_count, write_slat_ns_cumulative, write_slat_ns_percentile, write_clat_ns_count, write_clat_ns_cumulative, write_clat_ns_percentile, write_lat_ns_count, write_lat_ns_cumulative, write_lat_ns_percentile, trim_slat_ns_count, trim_slat_ns_cumulative, trim_slat_ns_percentile, trim_clat_ns_count, trim_clat_ns_cumulative, trim_clat_ns_percentile, trim_lat_ns_count, trim_lat_ns_cumulative, trim_lat_ns_percentile,
	31	12, , , , 3, 3, 6.11006798673e-07, , , , , , , 2, 2, 4.07580840603e-07, , , , , , , , , , , , ,
	32	13, , , , 1364, 1367, 0.000278415431262, , , , , , , 1776, 1778, 0.000362339367296, , , , , , , , , , , , ,
	33	14, , , , 181872, 183239, 0.037320091594, , , , , , , 207436, 209214, 0.0426358089929, , , , , , , , , , , , ,
	34	15, , , , 1574811, 1758050, 0.358060167469, , , , , , , 1661435, 1870649, 0.381220345946, , , , , , , , , , , , ,
	35	16, , , , 2198478, 3956528, 0.805821835713, , , , , , , 2154571, 4025220, 0.820301275606, , , , , , , , , , , , ,
	36	17, , , , 724335, 4680863, 0.953346372218, , , , , , , 645351, 4670571, 0.951817627138, , , , , , , , , , , , ,
	37	18, , , , 71837, 4752700, 0.96797733735, , , , , , , 61084, 4731655, 0.964265961171, , , , , , , , , , , , ,
	38	19, , , , 15915, 4768615, 0.971218728417, , , , , , , 18419, 4750074, 0.968019576923, , , , , , , , , , , , ,
	39	20, , , , 12651, 4781266, 0.973795344087, , , , , , , 14176, 4764250, 0.970908509921, , , , , , , , , , , , ,
	40	...
	41	168960, , , , , , , , , , , , , 1, 4906999, 0.999999388629, 1, 4906997, 0.999998981048, , , , , , , , , ,
	42	177152, , , , , , , , , , , , , 1, 4907000, 0.999999592419, 1, 4906998, 0.999999184838, , , , , , , , , ,
	43	183296, , , , , , , , , , , , , 1, 4907001, 0.99999979621, 1, 4906999, 0.999999388629, , , , , , , , , ,
	44	189440, , , , , , , 1, 4909925, 0.999999185324, , , , , , , , , , , , , , , , , , ,
	45	214016, , , , 1, 4909928, 0.999999796331, 2, 4909927, 0.999999592662, , , , , , , , , , , , , , , , , , ,
	46	246784, , , , , , , , , , , , , , , , 1, 4907000, 0.999999592419, , , , , , , , , ,
	47	272384, , , , 1, 4909929, 1.0, 1, 4909928, 0.999999796331, , , , , , , , , , , , , , , , , , ,
	48	329728, , , , , , , , , , , , , 1, 4907002, 1.0, 1, 4907001, 0.99999979621, , , , , , , , , ,
	49	1003520, , , , , , , , , , , , , , , , 1, 4907002, 1.0, , , , , , , , , ,
	50	1089536, , , , , , , 1, 4909929, 1.0, , , , , , , , , , , , , , , , , , ,
	51
	52	The first line says that there were three read IOs with 12ns clat,
	53	the cumulative number of read IOs at or below 12ns was two, and
	54	12ns was the 0.0000611th percentile for read latency. There were
	55	two write IOs with 12ns clat, the cumulative number of write IOs
	56	at or below 12ns was two, and 12ns was the 0.0000408th percentile
	57	for write latency.
	58
	59	The job had one write IO complete at 168960ns and 4906999 write IOs
	60	completed at or below this duration. Also this duration was the
	61	99.99994th percentile for write latency. There was one write IO
	62	with a total latency of 168960ns, this duration had a cumulative
	63	frequency of 4906997 write IOs and was the 99.9998981048th percentile
	64	for write total latency.
	65
	66	The last line says that one read IO had 1089536ns total latency, this
67	duration had a cumulative frequency of 4909929 and represented the 100th
68	percentile for read total latency.
69
70	Running the following:
71
72	$ fio_jsonplus_clat2csv fio-jsonplus.output fio-jsonplus.csv --validate
73	fio-jsonplus_job0.csv validated
74	fio-jsonplus_job1.csv validated
75	fio-jsonplus_job2.csv validated
76
77	will check the CSV data against the json+ output to confirm that the CSV
78	data matches.
79	"""
afe8d310	80
50d2a375 BD	81	from __future__ import absolute_import
50d2a375 BD	82	from __future__ import print_function
afe8d310 VF	83	import os
	84	import json
	85	import argparse
e26cf2f7	86	import itertools
50d2a375	87	import six
afe8d310	88
e26cf2f7 VF	89	DDIR_LIST = ['read', 'write', 'trim']
e26cf2f7 VF	90	LAT_LIST = ['slat_ns', 'clat_ns', 'lat_ns']
afe8d310 VF	91
afe8d310 VF	92	def parse_args():
e26cf2f7 VF	93	"""Parse command-line arguments."""
e26cf2f7 VF	94
afe8d310 VF	95	parser = argparse.ArgumentParser()
	96	parser.add_argument('source',
	97	help='fio json+ output file containing completion '
	98	'latency data')
	99	parser.add_argument('dest',
	100	help='destination file stub for latency data in CSV '
	101	'format. job number will be appended to filename')
e26cf2f7 VF	102	parser.add_argument('--debug', '-d', action='store_true',
	103	help='enable debug prints')
	104	parser.add_argument('--validate', action='store_true',
	105	help='validate CSV against JSON output')
afe8d310 VF	106	args = parser.parse_args()
	107
	108	return args
	109
	110
	111	def percentile(idx, run_total):
e26cf2f7 VF	112	"""Return a percentile for a specified index based on a running total.
	113
	114	Parameters:
	115	idx index for which to generate percentile.
	116	run_total list of cumulative sums.
	117
	118	Returns:
	119	Percentile represented by the specified index.
	120	"""
	121
afe8d310 VF	122	total = run_total[len(run_total)-1]
	123	if total == 0:
	124	return 0
	125
	126	return float(run_total[idx]) / total
	127
	128
e26cf2f7 VF	129	def more_bins(indices, bins):
	130	"""Determine whether we have more bins to process.
	131
	132	Parameters:
	133	indices a dict containing the last index processed in each bin.
	134	bins a dict contaiing a set of bins to process.
	135
	136	Returns:
	137	True if the indices do not yet point to the end of each bin in bins.
fc002f14	138	False if the indices point beyond their respective bins.
e26cf2f7 VF	139	"""
e26cf2f7 VF	140
50d2a375	141	for key, value in six.iteritems(indices):
afe8d310 VF	142	if value < len(bins[key]):
	143	return True
	144
	145	return False
	146
	147
e26cf2f7 VF	148	def debug_print(debug, *args):
	149	"""Print debug messages.
	150
	151	Parameters:
	152	debug emit messages if True.
	153	*args arguments for print().
	154	"""
	155
	156	if debug:
	157	print(*args)
	158
	159
	160	def get_csvfile(dest, jobnum):
	161	"""Generate CSV filename from command-line arguments and job numbers.
	162
fc002f14	163	Parameters:
e26cf2f7 VF	164	dest file specification for CSV filename.
	165	jobnum job number.
	166
	167	Returns:
	168	A string that is a new filename that incorporates the job number.
	169	"""
	170
	171	stub, ext = os.path.splitext(dest)
	172	return stub + '_job' + str(jobnum) + ext
	173
	174
	175	def validate(args, jsondata, col_labels):
	176	"""Validate CSV data against json+ output.
	177
	178	This function checks the CSV data to make sure that it was correctly
	179	generated from the original json+ output. json+ 'bins' objects are
	180	constructed from the CSV data and then compared to the corresponding
	181	objects in the json+ data. An AssertionError will appear if a mismatch
	182	is found.
	183
	184	Percentiles and cumulative counts are not checked.
	185
	186	Parameters:
	187	args command-line arguments for this script.
	188	jsondata json+ output to compare against.
	189	col_labels column labels for CSV data.
	190
	191	Returns
	192	0 if no mismatches found.
	193	"""
	194
	195	colnames = [c.strip() for c in col_labels.split(',')]
	196
	197	for jobnum in range(len(jsondata['jobs'])):
	198	job_data = jsondata['jobs'][jobnum]
	199	csvfile = get_csvfile(args.dest, jobnum)
	200
	201	with open(csvfile, 'r') as csvsource:
	202	csvlines = csvsource.read().split('\n')
	203
	204	assert csvlines[0] == col_labels
	205	debug_print(args.debug, 'col_labels match for', csvfile)
	206
	207	# create 'bins' objects from the CSV data
	208	counts = {}
	209	for ddir in DDIR_LIST:
	210	counts[ddir] = {}
	211	for lat in LAT_LIST:
	212	counts[ddir][lat] = {}
	213
	214	csvlines.pop(0)
	215	for line in csvlines:
	216	if line.strip() == "":
	217	continue
	218	values = line.split(',')
	219	nsec = values[0]
	220	for col in colnames:
	221	if 'count' in col:
	222	val = values[colnames.index(col)]
	223	if val.strip() != "":
	224	count = int(val)
	225	ddir, lat, _, _ = col.split('_')
	226	lat = lat + '_ns'
	227	counts[ddir][lat][nsec] = count
228	try:
229	assert count == job_data[ddir][lat]['bins'][nsec]
230	except Exception:
231	print("mismatch:", csvfile, ddir, lat, nsec, "ns")
232	return 1
233
234	# compare 'bins' objects created from the CSV data
235	# with corresponding 'bins' objects in the json+ output
236	for ddir in DDIR_LIST:
237	for lat in LAT_LIST:
238	if lat in job_data[ddir] and 'bins' in job_data[ddir][lat]:
239	assert job_data[ddir][lat]['bins'] == counts[ddir][lat]
240	debug_print(args.debug, csvfile, ddir, lat, "bins match")
241	else:
242	assert counts[ddir][lat] == {}
243	debug_print(args.debug, csvfile, ddir, lat, "bins empty")
244
245	print(csvfile, "validated")
246
247	return 0
248
249
afe8d310	250	def main():
e26cf2f7 VF	251	"""Starting point for this script.
	252
	253	In standard mode, this script will generate CSV data from fio json+ output.
	254	In validation mode it will check to make sure that counts in CSV files
	255	match the counts in the json+ data.
	256	"""
	257
afe8d310 VF	258	args = parse_args()
	259
	260	with open(args.source, 'r') as source:
	261	jsondata = json.loads(source.read())
	262
e26cf2f7 VF	263	ddir_lat_list = list(ddir + '_' + lat for ddir, lat in itertools.product(DDIR_LIST, LAT_LIST))
	264	debug_print(args.debug, 'ddir_lat_list: ', ddir_lat_list)
	265	col_labels = 'nsec, '
	266	for ddir_lat in ddir_lat_list:
	267	col_labels += "{0}_count, {0}_cumulative, {0}_percentile, ".format(ddir_lat)
	268	debug_print(args.debug, 'col_labels: ', col_labels)
	269
	270	if args.validate:
	271	return validate(args, jsondata, col_labels)
	272
afe8d310 VF	273	for jobnum in range(0, len(jsondata['jobs'])):
	274	bins = {}
	275	run_total = {}
e26cf2f7 VF	276
	277	for ddir in DDIR_LIST:
	278	ddir_data = jsondata['jobs'][jobnum][ddir]
	279	for lat in LAT_LIST:
	280	ddir_lat = ddir + '_' + lat
	281	if lat not in ddir_data or 'bins' not in ddir_data[lat]:
	282	bins[ddir_lat] = []
	283	debug_print(args.debug, 'job', jobnum, ddir_lat, 'not found')
	284	continue
	285
	286	debug_print(args.debug, 'job', jobnum, ddir_lat, 'processing')
	287	bins[ddir_lat] = [[int(key), value] for key, value in
	288	six.iteritems(ddir_data[lat]['bins'])]
	289	bins[ddir_lat] = sorted(bins[ddir_lat], key=lambda bin: bin[0])
	290
	291	run_total[ddir_lat] = [0 for x in range(0, len(bins[ddir_lat]))]
	292	run_total[ddir_lat][0] = bins[ddir_lat][0][1]
	293	for index in range(1, len(bins[ddir_lat])):
	294	run_total[ddir_lat][index] = run_total[ddir_lat][index-1] + \
	295	bins[ddir_lat][index][1]
	296
	297	csvfile = get_csvfile(args.dest, jobnum)
	298	with open(csvfile, 'w') as output:
	299	output.write(col_labels + "\n")
afe8d310 VF	300
afe8d310 VF	301	#
e26cf2f7	302	# Have a counter for each ddir_lat pairing
afe8d310 VF	303	# In each round, pick the shortest remaining duration
	304	# and output a line with any values for that duration
	305	#
e26cf2f7 VF	306	indices = {x: 0 for x in ddir_lat_list}
	307	while more_bins(indices, bins):
	308	debug_print(args.debug, 'indices: ', indices)
afe8d310	309	min_lat = 17112760320
e26cf2f7 VF	310	for ddir_lat in ddir_lat_list:
	311	if indices[ddir_lat] < len(bins[ddir_lat]):
	312	min_lat = min(bins[ddir_lat][indices[ddir_lat]][0], min_lat)
afe8d310 VF	313
	314	output.write("{0}, ".format(min_lat))
	315
e26cf2f7 VF	316	for ddir_lat in ddir_lat_list:
	317	if indices[ddir_lat] < len(bins[ddir_lat]) and \
	318	min_lat == bins[ddir_lat][indices[ddir_lat]][0]:
	319	count = bins[ddir_lat][indices[ddir_lat]][1]
	320	cumulative = run_total[ddir_lat][indices[ddir_lat]]
	321	ptile = percentile(indices[ddir_lat], run_total[ddir_lat])
	322	output.write("{0}, {1}, {2}, ".format(count, cumulative, ptile))
	323	indices[ddir_lat] += 1
afe8d310 VF	324	else:
	325	output.write(", , , ")
	326	output.write("\n")
	327
e26cf2f7	328	print("{0} generated".format(csvfile))
afe8d310 VF	329
	330
	331	if __name__ == '__main__':
	332	main()