[fio.git] / tools / fiologparser.py

#!/usr/bin/env python3
# Note: this script is python2 and python 3 compatible.
#
# fiologparser.py
#
# This tool lets you parse multiple fio log files and look at interaval
# statistics even when samples are non-uniform.  For instance:
#
# fiologparser.py -s *bw*
#
# to see per-interval sums for all bandwidth logs or:
#
# fiologparser.py -a *clat*
#
# to see per-interval average completion latency.

from __future__ import absolute_import
from __future__ import print_function
import argparse
import math
from functools import reduce

def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('-i', '--interval', required=False, type=int, default=1000, help='interval of time in seconds.')
    parser.add_argument('-d', '--divisor', required=False, type=int, default=1, help='divide the results by this value.')
    parser.add_argument('-f', '--full', dest='full', action='store_true', default=False, help='print full output.')
    parser.add_argument('-A', '--all', dest='allstats', action='store_true', default=False, 
                        help='print all stats for each interval.')
    parser.add_argument('-a', '--average', dest='average', action='store_true', default=False, help='print the average for each interval.')
    parser.add_argument('-s', '--sum', dest='sum', action='store_true', default=False, help='print the sum for each interval.')
    parser.add_argument("FILE", help="collectl log output files to parse", nargs="+")
    args = parser.parse_args()

    return args

def get_ftime(series):
    ftime = 0
    for ts in series:
        if ftime == 0 or ts.last.end < ftime:
            ftime = ts.last.end
    return ftime

def print_full(ctx, series):
    ftime = get_ftime(series)
    start = 0 
    end = ctx.interval

    while (start < ftime):
        end = ftime if ftime < end else end
        results = [ts.get_value(start, end) for ts in series]
        print("%s, %s" % (end, ', '.join(["%0.3f" % i for i in results])))
        start += ctx.interval
        end += ctx.interval

def print_sums(ctx, series):
    ftime = get_ftime(series)
    start = 0
    end = ctx.interval

    while (start < ftime):
        end = ftime if ftime < end else end
        results = [ts.get_value(start, end) for ts in series]
        print("%s, %0.3f" % (end, sum(results)))
        start += ctx.interval
        end += ctx.interval

def print_averages(ctx, series):
    ftime = get_ftime(series)
    start = 0
    end = ctx.interval

    while (start < ftime):
        end = ftime if ftime < end else end
        results = [ts.get_value(start, end) for ts in series]
        print("%s, %0.3f" % (end, float(sum(results))/len(results)))
        start += ctx.interval
        end += ctx.interval

# FIXME: this routine is computationally inefficient
# and has O(N^2) behavior
# it would be better to make one pass through samples
# to segment them into a series of time intervals, and
# then compute stats on each time interval instead.
# to debug this routine, use
#   # sort -n -t ',' -k 2 small.log
# on your input.

def my_extend( vlist, val ):
    vlist.extend(val)
    return vlist

array_collapser = lambda vlist, val:  my_extend(vlist, val) 

def print_all_stats(ctx, series):
    ftime = get_ftime(series)
    start = 0 
    end = ctx.interval
    print('start-time, samples, min, avg, median, 90%, 95%, 99%, max')
    while (start < ftime):  # for each time interval
        end = ftime if ftime < end else end
        sample_arrays = [ s.get_samples(start, end) for s in series ]
        samplevalue_arrays = []
        for sample_array in sample_arrays:
            samplevalue_arrays.append( 
                [ sample.value for sample in sample_array ] )
        # collapse list of lists of sample values into list of sample values
        samplevalues = reduce( array_collapser, samplevalue_arrays, [] )
        # compute all stats and print them
        mymin = min(samplevalues)
        myavg = sum(samplevalues) / float(len(samplevalues))
        mymedian = median(samplevalues)
        my90th = percentile(samplevalues, 0.90) 
        my95th = percentile(samplevalues, 0.95)
        my99th = percentile(samplevalues, 0.99)
        mymax = max(samplevalues)
        print( '%f, %d, %f, %f, %f, %f, %f, %f, %f' % (
            start, len(samplevalues), 
            mymin, myavg, mymedian, my90th, my95th, my99th, mymax))

        # advance to next interval
        start += ctx.interval
        end += ctx.interval

def median(values):
    s=sorted(values)
    return float(s[(len(s)-1)/2]+s[(len(s)/2)])/2

def percentile(values, p):
    s = sorted(values)
    k = (len(s)-1) * p
    f = math.floor(k)
    c = math.ceil(k)
    if f == c:
        return s[int(k)]
    return (s[int(f)] * (c-k)) + (s[int(c)] * (k-f))

def print_default(ctx, series):
    ftime = get_ftime(series)
    start = 0
    end = ctx.interval
    averages = []
    weights = []

    while (start < ftime):
        end = ftime if ftime < end else end
        results = [ts.get_value(start, end) for ts in series]
        averages.append(sum(results)) 
        weights.append(end-start)
        start += ctx.interval
        end += ctx.interval

    total = 0
    for i in range(0, len(averages)):
        total += averages[i]*weights[i]
    print('%0.3f' % (total/sum(weights)))
 
class TimeSeries(object):
    def __init__(self, ctx, fn):
        self.ctx = ctx
        self.last = None 
        self.samples = []
        self.read_data(fn)

    def read_data(self, fn):
        f = open(fn, 'r')
        p_time = 0
        for line in f:
            (time, value) = line.rstrip('\r\n').rsplit(', ')[:2]
            self.add_sample(p_time, int(time), int(value))
            p_time = int(time)
 
    def add_sample(self, start, end, value):
        sample = Sample(ctx, start, end, value)
        if not self.last or self.last.end < end:
            self.last = sample
        self.samples.append(sample)

    def get_samples(self, start, end):
        sample_list = []
        for s in self.samples:
            if s.start >= start and s.end <= end:
                sample_list.append(s)
        return sample_list

    def get_value(self, start, end):
        value = 0
        for sample in self.samples:
            value += sample.get_contribution(start, end)
        return value

class Sample(object):
    def __init__(self, ctx, start, end, value):
       self.ctx = ctx
       self.start = start
       self.end = end
       self.value = value

    def get_contribution(self, start, end):
       # short circuit if not within the bound
       if (end < self.start or start > self.end):
           return 0 

       sbound = self.start if start < self.start else start
       ebound = self.end if end > self.end else end
       ratio = float(ebound-sbound) / (end-start) 
       return self.value*ratio/ctx.divisor


if __name__ == '__main__':
    ctx = parse_args()
    series = []
    for fn in ctx.FILE:
       series.append(TimeSeries(ctx, fn)) 
    if ctx.sum:
        print_sums(ctx, series)
    elif ctx.average:
        print_averages(ctx, series)
    elif ctx.full:
        print_full(ctx, series)
    elif ctx.allstats:
        print_all_stats(ctx, series)
    else:
        print_default(ctx, series)
Commit	Line	Data
8629f5f5	1	#!/usr/bin/env python3
5eac3b00	2	# Note: this script is python2 and python 3 compatible.
34831ee3 MN	3	#
	4	# fiologparser.py
	5	#
	6	# This tool lets you parse multiple fio log files and look at interaval
	7	# statistics even when samples are non-uniform. For instance:
	8	#
	9	# fiologparser.py -s bw
	10	#
	11	# to see per-interval sums for all bandwidth logs or:
	12	#
	13	# fiologparser.py -a clat
	14	#
	15	# to see per-interval average completion latency.
	16
5eac3b00 BD	17	from __future__ import absolute_import
5eac3b00 BD	18	from __future__ import print_function
34831ee3	19	import argparse
f4042e41	20	import math
8629f5f5	21	from functools import reduce
34831ee3 MN	22
	23	def parse_args():
	24	parser = argparse.ArgumentParser()
	25	parser.add_argument('-i', '--interval', required=False, type=int, default=1000, help='interval of time in seconds.')
	26	parser.add_argument('-d', '--divisor', required=False, type=int, default=1, help='divide the results by this value.')
	27	parser.add_argument('-f', '--full', dest='full', action='store_true', default=False, help='print full output.')
8c4693e2 BE	28	parser.add_argument('-A', '--all', dest='allstats', action='store_true', default=False,
8c4693e2 BE	29	help='print all stats for each interval.')
34831ee3 MN	30	parser.add_argument('-a', '--average', dest='average', action='store_true', default=False, help='print the average for each interval.')
	31	parser.add_argument('-s', '--sum', dest='sum', action='store_true', default=False, help='print the sum for each interval.')
	32	parser.add_argument("FILE", help="collectl log output files to parse", nargs="+")
	33	args = parser.parse_args()
	34
	35	return args
	36
	37	def get_ftime(series):
	38	ftime = 0
	39	for ts in series:
	40	if ftime == 0 or ts.last.end < ftime:
	41	ftime = ts.last.end
	42	return ftime
	43
	44	def print_full(ctx, series):
	45	ftime = get_ftime(series)
	46	start = 0
	47	end = ctx.interval
	48
	49	while (start < ftime):
	50	end = ftime if ftime < end else end
	51	results = [ts.get_value(start, end) for ts in series]
9bae2522	52	print("%s, %s" % (end, ', '.join(["%0.3f" % i for i in results])))
34831ee3 MN	53	start += ctx.interval
	54	end += ctx.interval
	55
	56	def print_sums(ctx, series):
	57	ftime = get_ftime(series)
	58	start = 0
	59	end = ctx.interval
	60
	61	while (start < ftime):
	62	end = ftime if ftime < end else end
	63	results = [ts.get_value(start, end) for ts in series]
9bae2522	64	print("%s, %0.3f" % (end, sum(results)))
34831ee3 MN	65	start += ctx.interval
	66	end += ctx.interval
	67
	68	def print_averages(ctx, series):
	69	ftime = get_ftime(series)
	70	start = 0
	71	end = ctx.interval
	72
	73	while (start < ftime):
	74	end = ftime if ftime < end else end
	75	results = [ts.get_value(start, end) for ts in series]
9bae2522	76	print("%s, %0.3f" % (end, float(sum(results))/len(results)))
34831ee3 MN	77	start += ctx.interval
	78	end += ctx.interval
	79
8c4693e2 BE	80	# FIXME: this routine is computationally inefficient
	81	# and has O(N^2) behavior
	82	# it would be better to make one pass through samples
	83	# to segment them into a series of time intervals, and
	84	# then compute stats on each time interval instead.
	85	# to debug this routine, use
	86	# # sort -n -t ',' -k 2 small.log
	87	# on your input.
8c4693e2 BE	88
	89	def my_extend( vlist, val ):
	90	vlist.extend(val)
	91	return vlist
	92
	93	array_collapser = lambda vlist, val: my_extend(vlist, val)
	94
	95	def print_all_stats(ctx, series):
	96	ftime = get_ftime(series)
	97	start = 0
	98	end = ctx.interval
	99	print('start-time, samples, min, avg, median, 90%, 95%, 99%, max')
	100	while (start < ftime): # for each time interval
	101	end = ftime if ftime < end else end
	102	sample_arrays = [ s.get_samples(start, end) for s in series ]
	103	samplevalue_arrays = []
	104	for sample_array in sample_arrays:
	105	samplevalue_arrays.append(
	106	[ sample.value for sample in sample_array ] )
8c4693e2 BE	107	# collapse list of lists of sample values into list of sample values
8c4693e2 BE	108	samplevalues = reduce( array_collapser, samplevalue_arrays, [] )
8c4693e2	109	# compute all stats and print them
f4042e41 MN	110	mymin = min(samplevalues)
	111	myavg = sum(samplevalues) / float(len(samplevalues))
	112	mymedian = median(samplevalues)
	113	my90th = percentile(samplevalues, 0.90)
	114	my95th = percentile(samplevalues, 0.95)
	115	my99th = percentile(samplevalues, 0.99)
	116	mymax = max(samplevalues)
8c4693e2 BE	117	print( '%f, %d, %f, %f, %f, %f, %f, %f, %f' % (
	118	start, len(samplevalues),
	119	mymin, myavg, mymedian, my90th, my95th, my99th, mymax))
	120
	121	# advance to next interval
	122	start += ctx.interval
	123	end += ctx.interval
	124
f4042e41 MN	125	def median(values):
	126	s=sorted(values)
	127	return float(s[(len(s)-1)/2]+s[(len(s)/2)])/2
	128
	129	def percentile(values, p):
	130	s = sorted(values)
	131	k = (len(s)-1) * p
	132	f = math.floor(k)
	133	c = math.ceil(k)
	134	if f == c:
	135	return s[int(k)]
	136	return (s[int(f)] * (c-k)) + (s[int(c)] * (k-f))
34831ee3 MN	137
	138	def print_default(ctx, series):
	139	ftime = get_ftime(series)
	140	start = 0
	141	end = ctx.interval
	142	averages = []
	143	weights = []
	144
	145	while (start < ftime):
	146	end = ftime if ftime < end else end
	147	results = [ts.get_value(start, end) for ts in series]
	148	averages.append(sum(results))
	149	weights.append(end-start)
	150	start += ctx.interval
	151	end += ctx.interval
	152
	153	total = 0
9bae2522	154	for i in range(0, len(averages)):
34831ee3	155	total += averages[i]*weights[i]
9bae2522	156	print('%0.3f' % (total/sum(weights)))
34831ee3	157
9bae2522	158	class TimeSeries(object):
34831ee3 MN	159	def __init__(self, ctx, fn):
	160	self.ctx = ctx
	161	self.last = None
	162	self.samples = []
	163	self.read_data(fn)
	164
	165	def read_data(self, fn):
	166	f = open(fn, 'r')
	167	p_time = 0
	168	for line in f:
5f2a90b8	169	(time, value) = line.rstrip('\r\n').rsplit(', ')[:2]
34831ee3 MN	170	self.add_sample(p_time, int(time), int(value))
	171	p_time = int(time)
	172
	173	def add_sample(self, start, end, value):
	174	sample = Sample(ctx, start, end, value)
	175	if not self.last or self.last.end < end:
	176	self.last = sample
	177	self.samples.append(sample)
	178
8c4693e2 BE	179	def get_samples(self, start, end):
	180	sample_list = []
	181	for s in self.samples:
	182	if s.start >= start and s.end <= end:
	183	sample_list.append(s)
	184	return sample_list
	185
34831ee3 MN	186	def get_value(self, start, end):
	187	value = 0
	188	for sample in self.samples:
	189	value += sample.get_contribution(start, end)
	190	return value
	191
9bae2522	192	class Sample(object):
34831ee3 MN	193	def __init__(self, ctx, start, end, value):
	194	self.ctx = ctx
	195	self.start = start
	196	self.end = end
	197	self.value = value
	198
	199	def get_contribution(self, start, end):
	200	# short circuit if not within the bound
	201	if (end < self.start or start > self.end):
	202	return 0
	203
	204	sbound = self.start if start < self.start else start
	205	ebound = self.end if end > self.end else end
	206	ratio = float(ebound-sbound) / (end-start)
	207	return self.value*ratio/ctx.divisor
	208
	209
	210	if __name__ == '__main__':
	211	ctx = parse_args()
	212	series = []
	213	for fn in ctx.FILE:
	214	series.append(TimeSeries(ctx, fn))
	215	if ctx.sum:
	216	print_sums(ctx, series)
	217	elif ctx.average:
	218	print_averages(ctx, series)
	219	elif ctx.full:
	220	print_full(ctx, series)
8c4693e2 BE	221	elif ctx.allstats:
8c4693e2 BE	222	print_all_stats(ctx, series)
34831ee3 MN	223	else:
34831ee3 MN	224	print_default(ctx, series)