| 1 | #!/usr/bin/python |
| 2 | # |
| 3 | # fiologparser.py |
| 4 | # |
| 5 | # This tool lets you parse multiple fio log files and look at interaval |
| 6 | # statistics even when samples are non-uniform. For instance: |
| 7 | # |
| 8 | # fiologparser.py -s *bw* |
| 9 | # |
| 10 | # to see per-interval sums for all bandwidth logs or: |
| 11 | # |
| 12 | # fiologparser.py -a *clat* |
| 13 | # |
| 14 | # to see per-interval average completion latency. |
| 15 | |
| 16 | import argparse |
| 17 | import math |
| 18 | |
| 19 | def parse_args(): |
| 20 | parser = argparse.ArgumentParser() |
| 21 | parser.add_argument('-i', '--interval', required=False, type=int, default=1000, help='interval of time in seconds.') |
| 22 | parser.add_argument('-d', '--divisor', required=False, type=int, default=1, help='divide the results by this value.') |
| 23 | parser.add_argument('-f', '--full', dest='full', action='store_true', default=False, help='print full output.') |
| 24 | parser.add_argument('-A', '--all', dest='allstats', action='store_true', default=False, |
| 25 | help='print all stats for each interval.') |
| 26 | parser.add_argument('-a', '--average', dest='average', action='store_true', default=False, help='print the average for each interval.') |
| 27 | parser.add_argument('-s', '--sum', dest='sum', action='store_true', default=False, help='print the sum for each interval.') |
| 28 | parser.add_argument("FILE", help="collectl log output files to parse", nargs="+") |
| 29 | args = parser.parse_args() |
| 30 | |
| 31 | return args |
| 32 | |
| 33 | def get_ftime(series): |
| 34 | ftime = 0 |
| 35 | for ts in series: |
| 36 | if ftime == 0 or ts.last.end < ftime: |
| 37 | ftime = ts.last.end |
| 38 | return ftime |
| 39 | |
| 40 | def print_full(ctx, series): |
| 41 | ftime = get_ftime(series) |
| 42 | start = 0 |
| 43 | end = ctx.interval |
| 44 | |
| 45 | while (start < ftime): |
| 46 | end = ftime if ftime < end else end |
| 47 | results = [ts.get_value(start, end) for ts in series] |
| 48 | print("%s, %s" % (end, ', '.join(["%0.3f" % i for i in results]))) |
| 49 | start += ctx.interval |
| 50 | end += ctx.interval |
| 51 | |
| 52 | def print_sums(ctx, series): |
| 53 | ftime = get_ftime(series) |
| 54 | start = 0 |
| 55 | end = ctx.interval |
| 56 | |
| 57 | while (start < ftime): |
| 58 | end = ftime if ftime < end else end |
| 59 | results = [ts.get_value(start, end) for ts in series] |
| 60 | print("%s, %0.3f" % (end, sum(results))) |
| 61 | start += ctx.interval |
| 62 | end += ctx.interval |
| 63 | |
| 64 | def print_averages(ctx, series): |
| 65 | ftime = get_ftime(series) |
| 66 | start = 0 |
| 67 | end = ctx.interval |
| 68 | |
| 69 | while (start < ftime): |
| 70 | end = ftime if ftime < end else end |
| 71 | results = [ts.get_value(start, end) for ts in series] |
| 72 | print("%s, %0.3f" % (end, float(sum(results))/len(results))) |
| 73 | start += ctx.interval |
| 74 | end += ctx.interval |
| 75 | |
| 76 | # FIXME: this routine is computationally inefficient |
| 77 | # and has O(N^2) behavior |
| 78 | # it would be better to make one pass through samples |
| 79 | # to segment them into a series of time intervals, and |
| 80 | # then compute stats on each time interval instead. |
| 81 | # to debug this routine, use |
| 82 | # # sort -n -t ',' -k 2 small.log |
| 83 | # on your input. |
| 84 | |
| 85 | def my_extend( vlist, val ): |
| 86 | vlist.extend(val) |
| 87 | return vlist |
| 88 | |
| 89 | array_collapser = lambda vlist, val: my_extend(vlist, val) |
| 90 | |
| 91 | def print_all_stats(ctx, series): |
| 92 | ftime = get_ftime(series) |
| 93 | start = 0 |
| 94 | end = ctx.interval |
| 95 | print('start-time, samples, min, avg, median, 90%, 95%, 99%, max') |
| 96 | while (start < ftime): # for each time interval |
| 97 | end = ftime if ftime < end else end |
| 98 | sample_arrays = [ s.get_samples(start, end) for s in series ] |
| 99 | samplevalue_arrays = [] |
| 100 | for sample_array in sample_arrays: |
| 101 | samplevalue_arrays.append( |
| 102 | [ sample.value for sample in sample_array ] ) |
| 103 | # collapse list of lists of sample values into list of sample values |
| 104 | samplevalues = reduce( array_collapser, samplevalue_arrays, [] ) |
| 105 | # compute all stats and print them |
| 106 | mymin = min(samplevalues) |
| 107 | myavg = sum(samplevalues) / float(len(samplevalues)) |
| 108 | mymedian = median(samplevalues) |
| 109 | my90th = percentile(samplevalues, 0.90) |
| 110 | my95th = percentile(samplevalues, 0.95) |
| 111 | my99th = percentile(samplevalues, 0.99) |
| 112 | mymax = max(samplevalues) |
| 113 | print( '%f, %d, %f, %f, %f, %f, %f, %f, %f' % ( |
| 114 | start, len(samplevalues), |
| 115 | mymin, myavg, mymedian, my90th, my95th, my99th, mymax)) |
| 116 | |
| 117 | # advance to next interval |
| 118 | start += ctx.interval |
| 119 | end += ctx.interval |
| 120 | |
| 121 | def median(values): |
| 122 | s=sorted(values) |
| 123 | return float(s[(len(s)-1)/2]+s[(len(s)/2)])/2 |
| 124 | |
| 125 | def percentile(values, p): |
| 126 | s = sorted(values) |
| 127 | k = (len(s)-1) * p |
| 128 | f = math.floor(k) |
| 129 | c = math.ceil(k) |
| 130 | if f == c: |
| 131 | return s[int(k)] |
| 132 | return (s[int(f)] * (c-k)) + (s[int(c)] * (k-f)) |
| 133 | |
| 134 | def print_default(ctx, series): |
| 135 | ftime = get_ftime(series) |
| 136 | start = 0 |
| 137 | end = ctx.interval |
| 138 | averages = [] |
| 139 | weights = [] |
| 140 | |
| 141 | while (start < ftime): |
| 142 | end = ftime if ftime < end else end |
| 143 | results = [ts.get_value(start, end) for ts in series] |
| 144 | averages.append(sum(results)) |
| 145 | weights.append(end-start) |
| 146 | start += ctx.interval |
| 147 | end += ctx.interval |
| 148 | |
| 149 | total = 0 |
| 150 | for i in range(0, len(averages)): |
| 151 | total += averages[i]*weights[i] |
| 152 | print('%0.3f' % (total/sum(weights))) |
| 153 | |
| 154 | class TimeSeries(object): |
| 155 | def __init__(self, ctx, fn): |
| 156 | self.ctx = ctx |
| 157 | self.last = None |
| 158 | self.samples = [] |
| 159 | self.read_data(fn) |
| 160 | |
| 161 | def read_data(self, fn): |
| 162 | f = open(fn, 'r') |
| 163 | p_time = 0 |
| 164 | for line in f: |
| 165 | (time, value, foo, bar) = line.rstrip('\r\n').rsplit(', ') |
| 166 | self.add_sample(p_time, int(time), int(value)) |
| 167 | p_time = int(time) |
| 168 | |
| 169 | def add_sample(self, start, end, value): |
| 170 | sample = Sample(ctx, start, end, value) |
| 171 | if not self.last or self.last.end < end: |
| 172 | self.last = sample |
| 173 | self.samples.append(sample) |
| 174 | |
| 175 | def get_samples(self, start, end): |
| 176 | sample_list = [] |
| 177 | for s in self.samples: |
| 178 | if s.start >= start and s.end <= end: |
| 179 | sample_list.append(s) |
| 180 | return sample_list |
| 181 | |
| 182 | def get_value(self, start, end): |
| 183 | value = 0 |
| 184 | for sample in self.samples: |
| 185 | value += sample.get_contribution(start, end) |
| 186 | return value |
| 187 | |
| 188 | class Sample(object): |
| 189 | def __init__(self, ctx, start, end, value): |
| 190 | self.ctx = ctx |
| 191 | self.start = start |
| 192 | self.end = end |
| 193 | self.value = value |
| 194 | |
| 195 | def get_contribution(self, start, end): |
| 196 | # short circuit if not within the bound |
| 197 | if (end < self.start or start > self.end): |
| 198 | return 0 |
| 199 | |
| 200 | sbound = self.start if start < self.start else start |
| 201 | ebound = self.end if end > self.end else end |
| 202 | ratio = float(ebound-sbound) / (end-start) |
| 203 | return self.value*ratio/ctx.divisor |
| 204 | |
| 205 | |
| 206 | if __name__ == '__main__': |
| 207 | ctx = parse_args() |
| 208 | series = [] |
| 209 | for fn in ctx.FILE: |
| 210 | series.append(TimeSeries(ctx, fn)) |
| 211 | if ctx.sum: |
| 212 | print_sums(ctx, series) |
| 213 | elif ctx.average: |
| 214 | print_averages(ctx, series) |
| 215 | elif ctx.full: |
| 216 | print_full(ctx, series) |
| 217 | elif ctx.allstats: |
| 218 | print_all_stats(ctx, series) |
| 219 | else: |
| 220 | print_default(ctx, series) |
| 221 | |