[fio.git] / tools / fiologparser.py

#!/usr/bin/python
#
# fiologparser.py
#
# This tool lets you parse multiple fio log files and look at interaval
# statistics even when samples are non-uniform.  For instance:
#
# fiologparser.py -s *bw*
#
# to see per-interval sums for all bandwidth logs or:
#
# fiologparser.py -a *clat*
#
# to see per-interval average completion latency.

import argparse
import numpy
import scipy

def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('-i', '--interval', required=False, type=int, default=1000, help='interval of time in seconds.')
    parser.add_argument('-d', '--divisor', required=False, type=int, default=1, help='divide the results by this value.')
    parser.add_argument('-f', '--full', dest='full', action='store_true', default=False, help='print full output.')
    parser.add_argument('-A', '--all', dest='allstats', action='store_true', default=False, 
                        help='print all stats for each interval.')
    parser.add_argument('-a', '--average', dest='average', action='store_true', default=False, help='print the average for each interval.')
    parser.add_argument('-s', '--sum', dest='sum', action='store_true', default=False, help='print the sum for each interval.')
    parser.add_argument("FILE", help="collectl log output files to parse", nargs="+")
    args = parser.parse_args()

    return args

def get_ftime(series):
    ftime = 0
    for ts in series:
        if ftime == 0 or ts.last.end < ftime:
            ftime = ts.last.end
    return ftime

def print_full(ctx, series):
    ftime = get_ftime(series)
    start = 0 
    end = ctx.interval

    while (start < ftime):
        end = ftime if ftime < end else end
        results = [ts.get_value(start, end) for ts in series]
        print "%s, %s" % (end, ', '.join(["%0.3f" % i for i in results]))
        start += ctx.interval
        end += ctx.interval

def print_sums(ctx, series):
    ftime = get_ftime(series)
    start = 0
    end = ctx.interval

    while (start < ftime):
        end = ftime if ftime < end else end
        results = [ts.get_value(start, end) for ts in series]
        print "%s, %0.3f" % (end, sum(results))
        start += ctx.interval
        end += ctx.interval

def print_averages(ctx, series):
    ftime = get_ftime(series)
    start = 0
    end = ctx.interval

    while (start < ftime):
        end = ftime if ftime < end else end
        results = [ts.get_value(start, end) for ts in series]
        print "%s, %0.3f" % (end, float(sum(results))/len(results))
        start += ctx.interval
        end += ctx.interval

# FIXME: this routine is computationally inefficient
# and has O(N^2) behavior
# it would be better to make one pass through samples
# to segment them into a series of time intervals, and
# then compute stats on each time interval instead.
# to debug this routine, use
#   # sort -n -t ',' -k 2 small.log
# on your input.
# Sometimes scipy interpolates between two values to get a percentile

def my_extend( vlist, val ):
    vlist.extend(val)
    return vlist

array_collapser = lambda vlist, val:  my_extend(vlist, val) 

def print_all_stats(ctx, series):
    ftime = get_ftime(series)
    start = 0 
    end = ctx.interval
    print('start-time, samples, min, avg, median, 90%, 95%, 99%, max')
    while (start < ftime):  # for each time interval
        end = ftime if ftime < end else end
        sample_arrays = [ s.get_samples(start, end) for s in series ]
        samplevalue_arrays = []
        for sample_array in sample_arrays:
            samplevalue_arrays.append( 
                [ sample.value for sample in sample_array ] )
        #print('samplevalue_arrays len: %d' % len(samplevalue_arrays))
        #print('samplevalue_arrays elements len: ' + \
               #str(map( lambda l: len(l), samplevalue_arrays)))
        # collapse list of lists of sample values into list of sample values
        samplevalues = reduce( array_collapser, samplevalue_arrays, [] )
        #print('samplevalues: ' + str(sorted(samplevalues)))
        # compute all stats and print them
        myarray = scipy.fromiter(samplevalues, float)
        mymin = scipy.amin(myarray)
        myavg = scipy.average(myarray)
        mymedian = scipy.median(myarray)
        my90th = scipy.percentile(myarray, 90)
        my95th = scipy.percentile(myarray, 95)
        my99th = scipy.percentile(myarray, 99)
        mymax = scipy.amax(myarray)
        print( '%f, %d, %f, %f, %f, %f, %f, %f, %f' % (
            start, len(samplevalues), 
            mymin, myavg, mymedian, my90th, my95th, my99th, mymax))

        # advance to next interval
        start += ctx.interval
        end += ctx.interval


def print_default(ctx, series):
    ftime = get_ftime(series)
    start = 0
    end = ctx.interval
    averages = []
    weights = []

    while (start < ftime):
        end = ftime if ftime < end else end
        results = [ts.get_value(start, end) for ts in series]
        averages.append(sum(results)) 
        weights.append(end-start)
        start += ctx.interval
        end += ctx.interval

    total = 0
    for i in xrange(0, len(averages)):
        total += averages[i]*weights[i]
    print '%0.3f' % (total/sum(weights))
 
class TimeSeries():
    def __init__(self, ctx, fn):
        self.ctx = ctx
        self.last = None 
        self.samples = []
        self.read_data(fn)

    def read_data(self, fn):
        f = open(fn, 'r')
        p_time = 0
        for line in f:
            (time, value, foo, bar) = line.rstrip('\r\n').rsplit(', ')
            self.add_sample(p_time, int(time), int(value))
            p_time = int(time)
 
    def add_sample(self, start, end, value):
        sample = Sample(ctx, start, end, value)
        if not self.last or self.last.end < end:
            self.last = sample
        self.samples.append(sample)

    def get_samples(self, start, end):
        sample_list = []
        for s in self.samples:
            if s.start >= start and s.end <= end:
                sample_list.append(s)
        return sample_list

    def get_value(self, start, end):
        value = 0
        for sample in self.samples:
            value += sample.get_contribution(start, end)
        return value

class Sample():
    def __init__(self, ctx, start, end, value):
       self.ctx = ctx
       self.start = start
       self.end = end
       self.value = value

    def get_contribution(self, start, end):
       # short circuit if not within the bound
       if (end < self.start or start > self.end):
           return 0 

       sbound = self.start if start < self.start else start
       ebound = self.end if end > self.end else end
       ratio = float(ebound-sbound) / (end-start) 
       return self.value*ratio/ctx.divisor


if __name__ == '__main__':
    ctx = parse_args()
    series = []
    for fn in ctx.FILE:
       series.append(TimeSeries(ctx, fn)) 
    if ctx.sum:
        print_sums(ctx, series)
    elif ctx.average:
        print_averages(ctx, series)
    elif ctx.full:
        print_full(ctx, series)
    elif ctx.allstats:
        print_all_stats(ctx, series)
    else:
        print_default(ctx, series)
Commit	Line	Data
642483db	1	#!/usr/bin/python
34831ee3 MN	2	#
	3	# fiologparser.py
	4	#
	5	# This tool lets you parse multiple fio log files and look at interaval
	6	# statistics even when samples are non-uniform. For instance:
	7	#
	8	# fiologparser.py -s bw
	9	#
	10	# to see per-interval sums for all bandwidth logs or:
	11	#
	12	# fiologparser.py -a clat
	13	#
	14	# to see per-interval average completion latency.
	15
	16	import argparse
8c4693e2 BE	17	import numpy
8c4693e2 BE	18	import scipy
34831ee3 MN	19
	20	def parse_args():
	21	parser = argparse.ArgumentParser()
	22	parser.add_argument('-i', '--interval', required=False, type=int, default=1000, help='interval of time in seconds.')
	23	parser.add_argument('-d', '--divisor', required=False, type=int, default=1, help='divide the results by this value.')
	24	parser.add_argument('-f', '--full', dest='full', action='store_true', default=False, help='print full output.')
8c4693e2 BE	25	parser.add_argument('-A', '--all', dest='allstats', action='store_true', default=False,
8c4693e2 BE	26	help='print all stats for each interval.')
34831ee3 MN	27	parser.add_argument('-a', '--average', dest='average', action='store_true', default=False, help='print the average for each interval.')
	28	parser.add_argument('-s', '--sum', dest='sum', action='store_true', default=False, help='print the sum for each interval.')
	29	parser.add_argument("FILE", help="collectl log output files to parse", nargs="+")
	30	args = parser.parse_args()
	31
	32	return args
	33
	34	def get_ftime(series):
	35	ftime = 0
	36	for ts in series:
	37	if ftime == 0 or ts.last.end < ftime:
	38	ftime = ts.last.end
	39	return ftime
	40
	41	def print_full(ctx, series):
	42	ftime = get_ftime(series)
	43	start = 0
	44	end = ctx.interval
	45
	46	while (start < ftime):
	47	end = ftime if ftime < end else end
	48	results = [ts.get_value(start, end) for ts in series]
	49	print "%s, %s" % (end, ', '.join(["%0.3f" % i for i in results]))
	50	start += ctx.interval
	51	end += ctx.interval
	52
	53	def print_sums(ctx, series):
	54	ftime = get_ftime(series)
	55	start = 0
	56	end = ctx.interval
	57
	58	while (start < ftime):
	59	end = ftime if ftime < end else end
	60	results = [ts.get_value(start, end) for ts in series]
	61	print "%s, %0.3f" % (end, sum(results))
	62	start += ctx.interval
	63	end += ctx.interval
	64
	65	def print_averages(ctx, series):
	66	ftime = get_ftime(series)
	67	start = 0
	68	end = ctx.interval
	69
	70	while (start < ftime):
	71	end = ftime if ftime < end else end
	72	results = [ts.get_value(start, end) for ts in series]
	73	print "%s, %0.3f" % (end, float(sum(results))/len(results))
	74	start += ctx.interval
	75	end += ctx.interval
	76
8c4693e2 BE	77	# FIXME: this routine is computationally inefficient
	78	# and has O(N^2) behavior
	79	# it would be better to make one pass through samples
	80	# to segment them into a series of time intervals, and
	81	# then compute stats on each time interval instead.
	82	# to debug this routine, use
	83	# # sort -n -t ',' -k 2 small.log
	84	# on your input.
	85	# Sometimes scipy interpolates between two values to get a percentile
	86
	87	def my_extend( vlist, val ):
	88	vlist.extend(val)
	89	return vlist
	90
	91	array_collapser = lambda vlist, val: my_extend(vlist, val)
	92
	93	def print_all_stats(ctx, series):
	94	ftime = get_ftime(series)
	95	start = 0
	96	end = ctx.interval
	97	print('start-time, samples, min, avg, median, 90%, 95%, 99%, max')
	98	while (start < ftime): # for each time interval
	99	end = ftime if ftime < end else end
	100	sample_arrays = [ s.get_samples(start, end) for s in series ]
	101	samplevalue_arrays = []
	102	for sample_array in sample_arrays:
	103	samplevalue_arrays.append(
	104	[ sample.value for sample in sample_array ] )
	105	#print('samplevalue_arrays len: %d' % len(samplevalue_arrays))
	106	#print('samplevalue_arrays elements len: ' + \
	107	#str(map( lambda l: len(l), samplevalue_arrays)))
	108	# collapse list of lists of sample values into list of sample values
	109	samplevalues = reduce( array_collapser, samplevalue_arrays, [] )
	110	#print('samplevalues: ' + str(sorted(samplevalues)))
	111	# compute all stats and print them
	112	myarray = scipy.fromiter(samplevalues, float)
	113	mymin = scipy.amin(myarray)
	114	myavg = scipy.average(myarray)
	115	mymedian = scipy.median(myarray)
	116	my90th = scipy.percentile(myarray, 90)
	117	my95th = scipy.percentile(myarray, 95)
	118	my99th = scipy.percentile(myarray, 99)
	119	mymax = scipy.amax(myarray)
	120	print( '%f, %d, %f, %f, %f, %f, %f, %f, %f' % (
	121	start, len(samplevalues),
	122	mymin, myavg, mymedian, my90th, my95th, my99th, mymax))
	123
	124	# advance to next interval
	125	start += ctx.interval
	126	end += ctx.interval
	127
34831ee3 MN	128
	129	def print_default(ctx, series):
	130	ftime = get_ftime(series)
	131	start = 0
	132	end = ctx.interval
	133	averages = []
	134	weights = []
	135
	136	while (start < ftime):
	137	end = ftime if ftime < end else end
	138	results = [ts.get_value(start, end) for ts in series]
	139	averages.append(sum(results))
	140	weights.append(end-start)
	141	start += ctx.interval
	142	end += ctx.interval
	143
	144	total = 0
	145	for i in xrange(0, len(averages)):
	146	total += averages[i]*weights[i]
	147	print '%0.3f' % (total/sum(weights))
	148
	149	class TimeSeries():
	150	def __init__(self, ctx, fn):
	151	self.ctx = ctx
	152	self.last = None
	153	self.samples = []
	154	self.read_data(fn)
	155
	156	def read_data(self, fn):
	157	f = open(fn, 'r')
	158	p_time = 0
	159	for line in f:
	160	(time, value, foo, bar) = line.rstrip('\r\n').rsplit(', ')
	161	self.add_sample(p_time, int(time), int(value))
	162	p_time = int(time)
	163
	164	def add_sample(self, start, end, value):
	165	sample = Sample(ctx, start, end, value)
	166	if not self.last or self.last.end < end:
	167	self.last = sample
	168	self.samples.append(sample)
	169
8c4693e2 BE	170	def get_samples(self, start, end):
	171	sample_list = []
	172	for s in self.samples:
	173	if s.start >= start and s.end <= end:
	174	sample_list.append(s)
	175	return sample_list
	176
34831ee3 MN	177	def get_value(self, start, end):
	178	value = 0
	179	for sample in self.samples:
	180	value += sample.get_contribution(start, end)
	181	return value
	182
	183	class Sample():
	184	def __init__(self, ctx, start, end, value):
	185	self.ctx = ctx
	186	self.start = start
	187	self.end = end
	188	self.value = value
	189
	190	def get_contribution(self, start, end):
	191	# short circuit if not within the bound
	192	if (end < self.start or start > self.end):
	193	return 0
	194
	195	sbound = self.start if start < self.start else start
	196	ebound = self.end if end > self.end else end
	197	ratio = float(ebound-sbound) / (end-start)
	198	return self.value*ratio/ctx.divisor
	199
	200
	201	if __name__ == '__main__':
	202	ctx = parse_args()
	203	series = []
	204	for fn in ctx.FILE:
	205	series.append(TimeSeries(ctx, fn))
	206	if ctx.sum:
	207	print_sums(ctx, series)
	208	elif ctx.average:
	209	print_averages(ctx, series)
	210	elif ctx.full:
	211	print_full(ctx, series)
8c4693e2 BE	212	elif ctx.allstats:
8c4693e2 BE	213	print_all_stats(ctx, series)
34831ee3 MN	214	else:
	215	print_default(ctx, series)
	216