fio2gnuplot: Don't truncate fio log files
[fio.git] / tools / plot / fio2gnuplot.py
CommitLineData
9402b895
EV
1#!/usr/bin/python
2#
3# Copyright (C) 2013 eNovance SAS <licensing@enovance.com>
4# Author: Erwan Velu <erwan@enovance.com>
5#
6# The license below covers all files distributed with fio unless otherwise
7# noted in the file itself.
8#
9# This program is free software; you can redistribute it and/or modify
10# it under the terms of the GNU General Public License version 2 as
11# published by the Free Software Foundation.
12#
13# This program is distributed in the hope that it will be useful,
14# but WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16# GNU General Public License for more details.
17#
18# You should have received a copy of the GNU General Public License
19# along with this program; if not, write to the Free Software
20# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21
22import os
23import fnmatch
24import sys
25import getopt
26import re
27import math
28
29def find_file(path, pattern):
30 fio_data_file=[]
31 # For all the local files
32 for file in os.listdir(path):
33 # If the file math the regexp
34 if fnmatch.fnmatch(file, pattern):
35 # Let's consider this file
36 fio_data_file.append(file)
37
38 return fio_data_file
39
67b7db1e 40def generate_gnuplot_script(fio_data_file,title,gnuplot_output_filename,mode,disk_perf):
9402b895 41 f=open("mygraph",'w')
d270b8bc
EV
42 if len(fio_data_file) > 1:
43 f.write("call \'graph3D.gpm\' \'%s' \'%s\' \'\' \'%s\' \'%s\'\n" % (title,gnuplot_output_filename,gnuplot_output_filename,mode))
67b7db1e
EV
44
45 pos=0
46 # Let's create a temporary file for each selected fio file
47 for file in fio_data_file:
48 tmp_filename = "gnuplot_temp_file.%d" % pos
49 png_file=file.replace('.log','')
50 raw_filename = "%s-2Draw" % (png_file)
51 smooth_filename = "%s-2Dsmooth" % (png_file)
52 trend_filename = "%s-2Dtrend" % (png_file)
53 avg = average(disk_perf[pos])
54 f.write("call \'graph2D.gpm\' \'%s' \'%s\' \'\' \'%s\' \'%s\' \'%s\' \'%s\' \'%f\'\n" % (title,tmp_filename,raw_filename,mode,smooth_filename,trend_filename,avg))
55 pos = pos +1
56
9402b895
EV
57 f.close()
58
59def generate_gnuplot_math_script(title,gnuplot_output_filename,mode,average):
60 f=open("mymath",'a')
61 f.write("call \'math.gpm\' \'%s' \'%s\' \'\' \'%s\' \'%s\' %s\n" % (title,gnuplot_output_filename,gnuplot_output_filename,mode,average))
62 f.close()
63
64def compute_aggregated_file(fio_data_file, gnuplot_output_filename):
65 temp_files=[]
66 pos=0
67 # Let's create a temporary file for each selected fio file
68 for file in fio_data_file:
69 tmp_filename = "gnuplot_temp_file.%d" % pos
70 temp_files.append(open(tmp_filename,'r'))
71 pos = pos +1
72
73 f = open(gnuplot_output_filename, "w")
74 index=0
75 # Let's add some information
76 for tempfile in temp_files:
77 f.write("# Disk%d was coming from %s\n" % (index,fio_data_file[index]))
78 f.write(tempfile.read())
79 f.write("\n")
80 tempfile.close()
81 index = index + 1
82 f.close()
83
84def average(s): return sum(s) * 1.0 / len(s)
85
86def compute_temp_file(fio_data_file,disk_perf):
87 files=[]
88 temp_outfile=[]
89 blk_size=0
90 for file in fio_data_file:
91 files.append(open(file))
92 pos = len(files) - 1
93 tmp_filename = "gnuplot_temp_file.%d" % pos
09f30f43
EV
94 gnuplot_file=open(tmp_filename,'w')
95 temp_outfile.append(gnuplot_file)
96 gnuplot_file.write("#Temporary file based on file %s\n" % file)
9402b895
EV
97 disk_perf.append([])
98
99 shall_break = False
100 while True:
101 current_line=[]
f9846c39
EV
102 nb_empty_files=0
103 nb_files=len(files)
9402b895
EV
104 for file in files:
105 s=file.readline().replace(',',' ').split()
106 if not s:
f9846c39
EV
107 nb_empty_files+=1
108 s="-1, 0, 0, 0'".replace(',',' ').split()
109
110 if (nb_empty_files == nb_files):
9402b895
EV
111 shall_break=True
112 break;
f9846c39 113
9402b895
EV
114 current_line.append(s);
115
116 if shall_break == True:
117 break
118
119 last_time = -1
120 index=0
121 perfs=[]
122 for line in current_line:
123 time, perf, x, block_size = line
124 if (blk_size == 0):
125 blk_size=int(block_size)
126
127 # We ignore the first 500msec as it doesn't seems to be part of the real benchmark
128 # Time < 500 usually reports BW=0 breaking the min computing
129 if ((int(time)) > 500):
130 disk_perf[index].append(int(perf))
131 perfs.append(perf)
132 index = index + 1
133
134 # If we reach this point, it means that all the traces are coherent
135 for p in enumerate(perfs):
136 temp_outfile[p[0]].write("%s %.2f %s\n" % (p[0], float(float(time)/1000), p[1]))
137
138 for file in files:
139 file.close()
140 for file in temp_outfile:
141 file.close()
142 return blk_size
143
144def compute_math(fio_data_file, title,gnuplot_output_filename,mode,disk_perf):
145 global_min=[]
146 global_max=[]
147 average_file=open(gnuplot_output_filename+'.average', 'w')
148 min_file=open(gnuplot_output_filename+'.min', 'w')
149 max_file=open(gnuplot_output_filename+'.max', 'w')
150 stddev_file=open(gnuplot_output_filename+'.stddev', 'w')
151 global_file=open(gnuplot_output_filename+'.global','w')
152
153 min_file.write('DiskName %s\n' % mode)
154 max_file.write('DiskName %s\n'% mode)
155 average_file.write('DiskName %s\n'% mode)
156 stddev_file.write('DiskName %s\n'% mode )
157 for disk in xrange(len(fio_data_file)):
158# print disk_perf[disk]
159 min_file.write("# Disk%d was coming from %s\n" % (disk,fio_data_file[disk]))
160 max_file.write("# Disk%d was coming from %s\n" % (disk,fio_data_file[disk]))
161 average_file.write("# Disk%d was coming from %s\n" % (disk,fio_data_file[disk]))
162 stddev_file.write("# Disk%d was coming from %s\n" % (disk,fio_data_file[disk]))
163 avg = average(disk_perf[disk])
164 variance = map(lambda x: (x - avg)**2, disk_perf[disk])
165 standard_deviation = math.sqrt(average(variance))
166# print "Disk%d [ min=%.2f max=%.2f avg=%.2f stddev=%.2f \n" % (disk,min(disk_perf[disk]),max(disk_perf[disk]),avg, standard_deviation)
167 average_file.write('%d %d\n' % (disk, avg))
168 stddev_file.write('%d %d\n' % (disk, standard_deviation))
169 local_min=min(disk_perf[disk])
170 local_max=max(disk_perf[disk])
171 min_file.write('%d %d\n' % (disk, local_min))
172 max_file.write('%d %d\n' % (disk, local_max))
173 global_min.append(int(local_min))
174 global_max.append(int(local_max))
175
176 global_disk_perf = sum(disk_perf, [])
177 avg = average(global_disk_perf)
178 variance = map(lambda x: (x - avg)**2, global_disk_perf)
179 standard_deviation = math.sqrt(average(variance))
180
181 global_file.write('min=%.2f\n' % min(global_disk_perf))
182 global_file.write('max=%.2f\n' % max(global_disk_perf))
183 global_file.write('avg=%.2f\n' % avg)
184 global_file.write('stddev=%.2f\n' % standard_deviation)
185 global_file.write('values_count=%d\n' % len(global_disk_perf))
186 global_file.write('disks_count=%d\n' % len(fio_data_file))
187 #print "Global [ min=%.2f max=%.2f avg=%.2f stddev=%.2f \n" % (min(global_disk_perf),max(global_disk_perf),avg, standard_deviation)
188
189 average_file.close()
190 min_file.close()
191 max_file.close()
192 stddev_file.close()
193 global_file.close()
194 try:
195 os.remove('mymath')
196 except:
197 True
198
199 generate_gnuplot_math_script("Average values of "+title,gnuplot_output_filename+'.average',mode,int(avg))
200 generate_gnuplot_math_script("Min values of "+title,gnuplot_output_filename+'.min',mode,average(global_min))
201 generate_gnuplot_math_script("Max values of "+title,gnuplot_output_filename+'.max',mode,average(global_max))
202 generate_gnuplot_math_script("Standard Deviation of "+title,gnuplot_output_filename+'.stddev',mode,int(standard_deviation))
203
204def parse_global_files(fio_data_file, global_search):
205 max_result=0
206 max_file=''
207 for file in fio_data_file:
208 f=open(file)
209 disk_count=0
210 search_value=-1
211
212 # Let's read the complete file
213 while True:
214 try:
215 # We do split the name from the value
216 name,value=f.readline().split("=")
217 except:
218 f.close()
219 break
220 # If we ended the file
221 if not name:
222 # Let's process what we have
223 f.close()
224 break
225 else:
226 # disks_count is not global_search item
227 # As we need it for some computation, let's save it
228 if name=="disks_count":
229 disks_count=int(value)
230
231 # Let's catch the searched item
232 if global_search in name:
233 search_value=float(value)
234
235 # Let's process the avg value by estimated the global bandwidth per file
236 # We keep the biggest in memory for reporting
237 if global_search == "avg":
238 if (disks_count > 0) and (search_value != -1):
239 result=disks_count*search_value
240 if (result > max_result):
241 max_result=result
242 max_file=file
243 # Let's print the avg output
244 if global_search == "avg":
245 print "Biggest aggregated value of %s was %2.f in file %s\n" % (global_search, max_result, max_file)
246 else:
247 print "Global search %s is not yet implemented\n" % global_search
248
249def render_gnuplot():
250 print "Running gnuplot Rendering\n"
251 try:
252 os.system("gnuplot mymath")
253 os.system("gnuplot mygraph")
254 except:
255 print "Could not run gnuplot on mymath or mygraph !\n"
256 sys.exit(1);
257
258def print_help():
259 print 'fio2gnuplot.py -ghbio -t <title> -o <outputfile> -p <pattern>'
260 print
261 print '-h --help : Print this help'
262 print '-p <pattern> or --pattern <pattern> : A pattern in regexp to select fio input files'
263 print '-b or --bandwidth : A predefined pattern for selecting *_bw.log files'
264 print '-i or --iops : A predefined pattern for selecting *_iops.log files'
265 print '-g or --gnuplot : Render gnuplot traces before exiting'
266 print '-o or --outputfile <file> : The basename for gnuplot traces'
267 print ' - Basename is set with the pattern if defined'
268 print '-t or --title <title> : The title of the gnuplot traces'
269 print ' - Title is set with the block size detected in fio traces'
270 print '-G or --Global <type> : Search for <type> in .global files match by a pattern'
271 print ' - Available types are : min, max, avg, stddev'
272 print ' - The .global extension is added automatically to the pattern'
273
274def main(argv):
275 mode='unknown'
276 pattern=''
277 pattern_set_by_user=False
278 title='No title'
279 gnuplot_output_filename='result'
280 disk_perf=[]
281 run_gnuplot=False
282 parse_global=False
283 global_search=''
284
285 try:
286 opts, args = getopt.getopt(argv[1:],"ghbio:t:p:G:")
287 except getopt.GetoptError:
288 print_help()
289 sys.exit(2)
290
291 for opt, arg in opts:
292 if opt in ("-b", "--bandwidth"):
293 pattern='*_bw.log'
294 elif opt in ("-i", "--iops"):
295 pattern='*_iops.log'
296 elif opt in ("-p", "--pattern"):
297 pattern_set_by_user=True
298 pattern=arg
299 pattern=pattern.replace('\\','')
300 elif opt in ("-o", "--outputfile"):
301 gnuplot_output_filename=arg
302 elif opt in ("-t", "--title"):
303 title=arg
304 elif opt in ("-g", "--gnuplot"):
305 run_gnuplot=True
306 elif opt in ("-G", "--Global"):
307 parse_global=True
308 global_search=arg
309 elif opt in ("-h", "--help"):
310 print_help()
311 sys.exit(1)
312
313 # Adding .global extension to the file
314 if parse_global==True:
315 if not gnuplot_output_filename.endswith('.global'):
316 pattern = pattern+'.global'
317
318 fio_data_file=find_file('.',pattern)
319 if len(fio_data_file) == 0:
320 print "No log file found with pattern %s!" % pattern
321 sys.exit(1)
322
323 fio_data_file=sorted(fio_data_file, key=str.lower)
324 for file in fio_data_file:
325 print 'Selected %s' % file
326 if "_bw.log" in file :
327 mode="Bandwidth (KB/sec)"
328 if "_iops.log" in file :
329 mode="IO per Seconds (IO/sec)"
330 if (title == 'No title') and (mode != 'unknown'):
331 if "Bandwidth" in mode:
332 title='Bandwidth benchmark with %d fio results' % len(fio_data_file)
333 if "IO" in mode:
334 title='IO benchmark with %d fio results' % len(fio_data_file)
335
336 #We need to adjust the output filename regarding the pattern required by the user
337 if (pattern_set_by_user == True):
338 gnuplot_output_filename=pattern
339 # As we do have some regexp in the pattern, let's make this simpliest
340 # We do remove the simpliest parts of the expression to get a clear file name
341 gnuplot_output_filename=gnuplot_output_filename.replace('-*-','-')
342 gnuplot_output_filename=gnuplot_output_filename.replace('*','-')
343 gnuplot_output_filename=gnuplot_output_filename.replace('--','-')
344 gnuplot_output_filename=gnuplot_output_filename.replace('.log','')
345 # Insure that we don't have any starting or trailing dash to the filename
346 gnuplot_output_filename = gnuplot_output_filename[:-1] if gnuplot_output_filename.endswith('-') else gnuplot_output_filename
347 gnuplot_output_filename = gnuplot_output_filename[1:] if gnuplot_output_filename.startswith('-') else gnuplot_output_filename
348
349 if parse_global==True:
350 parse_global_files(fio_data_file, global_search)
351 else:
352 blk_size=compute_temp_file(fio_data_file,disk_perf)
353 title="%s @ Blocksize = %dK" % (title,blk_size/1024)
354 compute_aggregated_file(fio_data_file, gnuplot_output_filename)
355 compute_math(fio_data_file,title,gnuplot_output_filename,mode,disk_perf)
67b7db1e 356 generate_gnuplot_script(fio_data_file,title,gnuplot_output_filename,mode,disk_perf)
9402b895
EV
357
358 if (run_gnuplot==True):
359 render_gnuplot()
360
361 # Cleaning temporary files
362 try:
363 os.remove('gnuplot_temp_file.*')
364 except:
365 True
366
367#Main
368if __name__ == "__main__":
369 sys.exit(main(sys.argv))