fio2gnuplot: Fixing plotting issues on shorten files
[fio.git] / tools / plot / fio2gnuplot.py
CommitLineData
9402b895
EV
1#!/usr/bin/python
2#
3# Copyright (C) 2013 eNovance SAS <licensing@enovance.com>
4# Author: Erwan Velu <erwan@enovance.com>
5#
6# The license below covers all files distributed with fio unless otherwise
7# noted in the file itself.
8#
9# This program is free software; you can redistribute it and/or modify
10# it under the terms of the GNU General Public License version 2 as
11# published by the Free Software Foundation.
12#
13# This program is distributed in the hope that it will be useful,
14# but WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16# GNU General Public License for more details.
17#
18# You should have received a copy of the GNU General Public License
19# along with this program; if not, write to the Free Software
20# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21
22import os
23import fnmatch
24import sys
25import getopt
26import re
27import math
795ae706 28import shutil
9402b895
EV
29
30def find_file(path, pattern):
31 fio_data_file=[]
32 # For all the local files
33 for file in os.listdir(path):
34 # If the file math the regexp
35 if fnmatch.fnmatch(file, pattern):
36 # Let's consider this file
37 fio_data_file.append(file)
38
39 return fio_data_file
40
1cc6579e 41def generate_gnuplot_script(fio_data_file,title,gnuplot_output_filename,gnuplot_output_dir,mode,disk_perf,gpm_dir):
d0b4ba6c 42 print "Generating rendering scripts"
5923d3a5
EV
43 filename=gnuplot_output_dir+'mygraph'
44 f=open(filename,'w')
795ae706
EV
45
46 # Plotting 3D or comparing graphs doesn't have a meaning unless if there is at least 2 traces
d270b8bc 47 if len(fio_data_file) > 1:
1cc6579e 48 f.write("call \'%s/graph3D.gpm\' \'%s' \'%s\' \'\' \'%s\' \'%s\'\n" % (gpm_dir,title,gnuplot_output_filename,gnuplot_output_filename,mode))
67b7db1e 49
795ae706
EV
50 # Setting up the compare files that will be plot later
51 compare=open(gnuplot_output_dir + 'compare.gnuplot','w')
52 compare.write('''
53set title '%s'
54set terminal png size 1280,1024
55set ytics axis out auto
56set key top left reverse
57set xlabel "Time (Seconds)"
58set ylabel '%s'
795ae706
EV
59set yrange [0:]
60'''% (title,mode))
61 compare.close()
62 #Copying the common file for all kind of graph (raw/smooth/trend)
63 compare_raw_filename="compare-%s-2Draw" % (gnuplot_output_filename)
64 compare_smooth_filename="compare-%s-2Dsmooth" % (gnuplot_output_filename)
65 compare_trend_filename="compare-%s-2Dtrend" % (gnuplot_output_filename)
66 shutil.copy(gnuplot_output_dir+'compare.gnuplot',gnuplot_output_dir+compare_raw_filename+".gnuplot")
67 shutil.copy(gnuplot_output_dir+'compare.gnuplot',gnuplot_output_dir+compare_smooth_filename+".gnuplot")
68 shutil.copy(gnuplot_output_dir+'compare.gnuplot',gnuplot_output_dir+compare_trend_filename+".gnuplot")
69
70 #Setting up a different output filename for each kind of graph
71 compare_raw=open(gnuplot_output_dir+compare_raw_filename + ".gnuplot",'a')
72 compare_raw.write("set output '%s.png'\n" % compare_raw_filename)
73 compare_smooth=open(gnuplot_output_dir+compare_smooth_filename+".gnuplot",'a')
74 compare_smooth.write("set output '%s.png'\n" % compare_smooth_filename)
75 compare_trend=open(gnuplot_output_dir+compare_trend_filename+".gnuplot",'a')
76 compare_trend.write("set output '%s.png'\n" % compare_trend_filename)
77
67b7db1e
EV
78 pos=0
79 # Let's create a temporary file for each selected fio file
80 for file in fio_data_file:
81 tmp_filename = "gnuplot_temp_file.%d" % pos
795ae706
EV
82
83 # Plotting comparing graphs doesn't have a meaning unless if there is at least 2 traces
84 if len(fio_data_file) > 1:
85 # Adding the plot instruction for each kind of comparing graphs
86 if pos ==0 :
87 compare_raw.write("plot '%s' using 2:3 with linespoints title '%s'" % (tmp_filename,fio_data_file[pos]))
88 compare_smooth.write("plot '%s' using 2:3 smooth csplines title '%s'" % (tmp_filename,fio_data_file[pos]))
89 compare_trend.write("plot '%s' using 2:3 smooth bezier title '%s'" % (tmp_filename,fio_data_file[pos]))
90 else:
91 compare_raw.write(",\\\n'%s' using 2:3 with linespoints title '%s'" % (tmp_filename,fio_data_file[pos]))
92 compare_smooth.write(",\\\n'%s' using 2:3 smooth csplines title '%s'" % (tmp_filename,fio_data_file[pos]))
93 compare_trend.write(",\\\n'%s' using 2:3 smooth bezier title '%s'" % (tmp_filename,fio_data_file[pos]))
94
95 png_file=file.replace('.log','')
67b7db1e
EV
96 raw_filename = "%s-2Draw" % (png_file)
97 smooth_filename = "%s-2Dsmooth" % (png_file)
98 trend_filename = "%s-2Dtrend" % (png_file)
99 avg = average(disk_perf[pos])
428b4f6b 100 f.write("call \'%s/graph2D.gpm\' \'%s' \'%s\' \'%s\' \'%s\' \'%s\' \'%s\' \'%s\' \'%f\'\n" % (gpm_dir,title,tmp_filename,fio_data_file[pos],raw_filename,mode,smooth_filename,trend_filename,avg))
67b7db1e
EV
101 pos = pos +1
102
795ae706
EV
103 # Plotting comparing graphs doesn't have a meaning unless if there is at least 2 traces
104 if len(fio_data_file) > 1:
105 os.remove(gnuplot_output_dir+"compare.gnuplot")
106 compare_raw.close()
107 compare_smooth.close()
108 compare_trend.close()
9402b895
EV
109 f.close()
110
1cc6579e 111def generate_gnuplot_math_script(title,gnuplot_output_filename,mode,average,gnuplot_output_dir,gpm_dir):
5923d3a5
EV
112 filename=gnuplot_output_dir+'mymath';
113 f=open(filename,'a')
1cc6579e 114 f.write("call \'%s/math.gpm\' \'%s' \'%s\' \'\' \'%s\' \'%s\' %s\n" % (gpm_dir,title,gnuplot_output_filename,gnuplot_output_filename,mode,average))
9402b895
EV
115 f.close()
116
5923d3a5 117def compute_aggregated_file(fio_data_file, gnuplot_output_filename, gnuplot_output_dir):
d0b4ba6c 118 print "Processing data file 2/2"
9402b895
EV
119 temp_files=[]
120 pos=0
5923d3a5 121
9402b895
EV
122 # Let's create a temporary file for each selected fio file
123 for file in fio_data_file:
5923d3a5 124 tmp_filename = "%sgnuplot_temp_file.%d" % (gnuplot_output_dir, pos)
9402b895
EV
125 temp_files.append(open(tmp_filename,'r'))
126 pos = pos +1
127
1cc6579e 128 f = open(gnuplot_output_dir+gnuplot_output_filename, "w")
9402b895
EV
129 index=0
130 # Let's add some information
131 for tempfile in temp_files:
132 f.write("# Disk%d was coming from %s\n" % (index,fio_data_file[index]))
133 f.write(tempfile.read())
134 f.write("\n")
135 tempfile.close()
136 index = index + 1
137 f.close()
138
139def average(s): return sum(s) * 1.0 / len(s)
140
932f303b
EV
141def compute_temp_file(fio_data_file,disk_perf,gnuplot_output_dir, min_time, max_time):
142 end_time=max_time
143 if end_time == -1:
144 end_time="infinite"
145 print "Processing data file 1/2 with %s<time<%s" % (min_time,end_time)
9402b895
EV
146 files=[]
147 temp_outfile=[]
148 blk_size=0
149 for file in fio_data_file:
150 files.append(open(file))
151 pos = len(files) - 1
5923d3a5 152 tmp_filename = "%sgnuplot_temp_file.%d" % (gnuplot_output_dir,pos)
09f30f43
EV
153 gnuplot_file=open(tmp_filename,'w')
154 temp_outfile.append(gnuplot_file)
155 gnuplot_file.write("#Temporary file based on file %s\n" % file)
9402b895
EV
156 disk_perf.append([])
157
158 shall_break = False
159 while True:
160 current_line=[]
f9846c39
EV
161 nb_empty_files=0
162 nb_files=len(files)
51d52208
EV
163 for myfile in files:
164 s=myfile.readline().replace(',',' ').split()
9402b895 165 if not s:
f9846c39 166 nb_empty_files+=1
4ff1f2d0 167 s="-1, 0, 0, 0".replace(',',' ').split()
f9846c39
EV
168
169 if (nb_empty_files == nb_files):
9402b895
EV
170 shall_break=True
171 break;
f9846c39 172
9402b895
EV
173 current_line.append(s);
174
175 if shall_break == True:
176 break
177
178 last_time = -1
ffab7c2f 179 index=-1
9402b895 180 perfs=[]
ffab7c2f
EV
181 for line in enumerate(current_line):
182 # Index will be used to remember what file was featuring what value
183 index=index+1
184
185 time, perf, x, block_size = line[1]
9402b895 186 if (blk_size == 0):
32deaac9
EV
187 try:
188 blk_size=int(block_size)
189 except:
190 print "Error while reading the following line :"
191 print line
192 sys.exit(1);
9402b895
EV
193
194 # We ignore the first 500msec as it doesn't seems to be part of the real benchmark
195 # Time < 500 usually reports BW=0 breaking the min computing
ffab7c2f
EV
196 if (min_time == 0):
197 min_time==0.5
198
199 # Then we estimate if the data we got is part of the time range we want to plot
200 if ((float(time)>(float(min_time)*1000)) and ((int(time) < (int(max_time)*1000)) or max_time==-1)):
932f303b 201 disk_perf[index].append(int(perf))
ffab7c2f 202 perfs.append("%d %s %s"% (index, time, perf))
9402b895
EV
203
204 # If we reach this point, it means that all the traces are coherent
205 for p in enumerate(perfs):
ffab7c2f
EV
206 index, perf_time,perf = p[1].split()
207 temp_outfile[int(index)].write("%s %.2f %s\n" % (index, float(float(perf_time)/1000), perf))
d7e30e61 208
9402b895
EV
209
210 for file in files:
211 file.close()
212 for file in temp_outfile:
213 file.close()
214 return blk_size
215
1cc6579e 216def compute_math(fio_data_file, title,gnuplot_output_filename,gnuplot_output_dir,mode,disk_perf,gpm_dir):
d0b4ba6c 217 print "Computing Maths"
9402b895
EV
218 global_min=[]
219 global_max=[]
5923d3a5
EV
220 average_file=open(gnuplot_output_dir+gnuplot_output_filename+'.average', 'w')
221 min_file=open(gnuplot_output_dir+gnuplot_output_filename+'.min', 'w')
222 max_file=open(gnuplot_output_dir+gnuplot_output_filename+'.max', 'w')
223 stddev_file=open(gnuplot_output_dir+gnuplot_output_filename+'.stddev', 'w')
224 global_file=open(gnuplot_output_dir+gnuplot_output_filename+'.global','w')
9402b895
EV
225
226 min_file.write('DiskName %s\n' % mode)
227 max_file.write('DiskName %s\n'% mode)
228 average_file.write('DiskName %s\n'% mode)
229 stddev_file.write('DiskName %s\n'% mode )
230 for disk in xrange(len(fio_data_file)):
231# print disk_perf[disk]
232 min_file.write("# Disk%d was coming from %s\n" % (disk,fio_data_file[disk]))
233 max_file.write("# Disk%d was coming from %s\n" % (disk,fio_data_file[disk]))
234 average_file.write("# Disk%d was coming from %s\n" % (disk,fio_data_file[disk]))
235 stddev_file.write("# Disk%d was coming from %s\n" % (disk,fio_data_file[disk]))
236 avg = average(disk_perf[disk])
237 variance = map(lambda x: (x - avg)**2, disk_perf[disk])
238 standard_deviation = math.sqrt(average(variance))
239# print "Disk%d [ min=%.2f max=%.2f avg=%.2f stddev=%.2f \n" % (disk,min(disk_perf[disk]),max(disk_perf[disk]),avg, standard_deviation)
240 average_file.write('%d %d\n' % (disk, avg))
241 stddev_file.write('%d %d\n' % (disk, standard_deviation))
242 local_min=min(disk_perf[disk])
243 local_max=max(disk_perf[disk])
244 min_file.write('%d %d\n' % (disk, local_min))
245 max_file.write('%d %d\n' % (disk, local_max))
246 global_min.append(int(local_min))
247 global_max.append(int(local_max))
248
249 global_disk_perf = sum(disk_perf, [])
250 avg = average(global_disk_perf)
251 variance = map(lambda x: (x - avg)**2, global_disk_perf)
252 standard_deviation = math.sqrt(average(variance))
253
254 global_file.write('min=%.2f\n' % min(global_disk_perf))
255 global_file.write('max=%.2f\n' % max(global_disk_perf))
256 global_file.write('avg=%.2f\n' % avg)
257 global_file.write('stddev=%.2f\n' % standard_deviation)
258 global_file.write('values_count=%d\n' % len(global_disk_perf))
259 global_file.write('disks_count=%d\n' % len(fio_data_file))
260 #print "Global [ min=%.2f max=%.2f avg=%.2f stddev=%.2f \n" % (min(global_disk_perf),max(global_disk_perf),avg, standard_deviation)
261
262 average_file.close()
263 min_file.close()
264 max_file.close()
265 stddev_file.close()
266 global_file.close()
267 try:
1cc6579e 268 os.remove(gnuplot_output_dir+'mymath')
9402b895
EV
269 except:
270 True
271
1cc6579e
EV
272 generate_gnuplot_math_script("Average values of "+title,gnuplot_output_filename+'.average',mode,int(avg),gnuplot_output_dir,gpm_dir)
273 generate_gnuplot_math_script("Min values of "+title,gnuplot_output_filename+'.min',mode,average(global_min),gnuplot_output_dir,gpm_dir)
274 generate_gnuplot_math_script("Max values of "+title,gnuplot_output_filename+'.max',mode,average(global_max),gnuplot_output_dir,gpm_dir)
275 generate_gnuplot_math_script("Standard Deviation of "+title,gnuplot_output_filename+'.stddev',mode,int(standard_deviation),gnuplot_output_dir,gpm_dir)
9402b895
EV
276
277def parse_global_files(fio_data_file, global_search):
278 max_result=0
279 max_file=''
280 for file in fio_data_file:
281 f=open(file)
282 disk_count=0
283 search_value=-1
284
285 # Let's read the complete file
286 while True:
287 try:
288 # We do split the name from the value
289 name,value=f.readline().split("=")
290 except:
291 f.close()
292 break
293 # If we ended the file
294 if not name:
295 # Let's process what we have
296 f.close()
297 break
298 else:
299 # disks_count is not global_search item
300 # As we need it for some computation, let's save it
301 if name=="disks_count":
302 disks_count=int(value)
303
304 # Let's catch the searched item
305 if global_search in name:
306 search_value=float(value)
307
308 # Let's process the avg value by estimated the global bandwidth per file
309 # We keep the biggest in memory for reporting
310 if global_search == "avg":
311 if (disks_count > 0) and (search_value != -1):
312 result=disks_count*search_value
313 if (result > max_result):
314 max_result=result
315 max_file=file
316 # Let's print the avg output
317 if global_search == "avg":
318 print "Biggest aggregated value of %s was %2.f in file %s\n" % (global_search, max_result, max_file)
319 else:
320 print "Global search %s is not yet implemented\n" % global_search
321
795ae706
EV
322def render_gnuplot(fio_data_file, gnuplot_output_dir):
323 print "Running gnuplot Rendering"
9402b895 324 try:
795ae706
EV
325 # Let's render all the compared files if some
326 if len(fio_data_file) > 1:
327 print " |-> Rendering comparing traces"
328 os.system("cd %s; for i in *.gnuplot; do gnuplot $i; done" % gnuplot_output_dir)
329 print " |-> Rendering math traces"
5923d3a5 330 os.system("cd %s; gnuplot mymath" % gnuplot_output_dir)
795ae706 331 print " |-> Rendering 2D & 3D traces"
5923d3a5 332 os.system("cd %s; gnuplot mygraph" % gnuplot_output_dir)
d0b4ba6c
EV
333
334 name_of_directory="the current"
335 if gnuplot_output_dir != "./":
336 name_of_directory=gnuplot_output_dir
337 print "\nRendering traces are available in %s directory" % name_of_directory
9402b895
EV
338 except:
339 print "Could not run gnuplot on mymath or mygraph !\n"
340 sys.exit(1);
341
342def print_help():
932f303b 343 print 'fio2gnuplot.py -ghbiod -t <title> -o <outputfile> -p <pattern> -G <type> -m <time> -M <time>'
9402b895
EV
344 print
345 print '-h --help : Print this help'
346 print '-p <pattern> or --pattern <pattern> : A pattern in regexp to select fio input files'
347 print '-b or --bandwidth : A predefined pattern for selecting *_bw.log files'
348 print '-i or --iops : A predefined pattern for selecting *_iops.log files'
349 print '-g or --gnuplot : Render gnuplot traces before exiting'
350 print '-o or --outputfile <file> : The basename for gnuplot traces'
351 print ' - Basename is set with the pattern if defined'
5923d3a5 352 print '-d or --outputdir <dir> : The directory where gnuplot shall render files'
9402b895
EV
353 print '-t or --title <title> : The title of the gnuplot traces'
354 print ' - Title is set with the block size detected in fio traces'
355 print '-G or --Global <type> : Search for <type> in .global files match by a pattern'
356 print ' - Available types are : min, max, avg, stddev'
357 print ' - The .global extension is added automatically to the pattern'
932f303b
EV
358 print '-m or --min_time <time> : Only consider data starting from <time> seconds (default is 0)'
359 print '-M or --max_time <time> : Only consider data ending before <time> seconds (default is -1 aka nolimit)'
9402b895
EV
360
361def main(argv):
362 mode='unknown'
363 pattern=''
364 pattern_set_by_user=False
365 title='No title'
366 gnuplot_output_filename='result'
5923d3a5 367 gnuplot_output_dir='./'
1cc6579e 368 gpm_dir="/usr/share/fio/"
9402b895
EV
369 disk_perf=[]
370 run_gnuplot=False
371 parse_global=False
372 global_search=''
932f303b
EV
373 min_time=0
374 max_time=-1
9402b895 375
1cc6579e
EV
376 if not os.path.isfile(gpm_dir+'math.gpm'):
377 gpm_dir="/usr/local/share/fio/"
378 if not os.path.isfile(gpm_dir+'math.gpm'):
379 print "Looks like fio didn't got installed properly as no gpm files found in '/usr/share/fio' or '/usr/local/share/fio'\n"
380 sys.exit(3)
381
9402b895 382 try:
932f303b 383 opts, args = getopt.getopt(argv[1:],"ghbio:d:t:p:G:m:M:")
9402b895
EV
384 except getopt.GetoptError:
385 print_help()
386 sys.exit(2)
387
388 for opt, arg in opts:
389 if opt in ("-b", "--bandwidth"):
390 pattern='*_bw.log'
391 elif opt in ("-i", "--iops"):
392 pattern='*_iops.log'
393 elif opt in ("-p", "--pattern"):
394 pattern_set_by_user=True
395 pattern=arg
396 pattern=pattern.replace('\\','')
397 elif opt in ("-o", "--outputfile"):
398 gnuplot_output_filename=arg
5923d3a5
EV
399 elif opt in ("-d", "--outputdir"):
400 gnuplot_output_dir=arg
401 if not gnuplot_output_dir.endswith('/'):
402 gnuplot_output_dir=gnuplot_output_dir+'/'
403 if not os.path.exists(gnuplot_output_dir):
404 os.makedirs(gnuplot_output_dir)
9402b895
EV
405 elif opt in ("-t", "--title"):
406 title=arg
932f303b
EV
407 elif opt in ("-m", "--min_time"):
408 min_time=arg
409 elif opt in ("-M", "--max_time"):
410 max_time=arg
9402b895
EV
411 elif opt in ("-g", "--gnuplot"):
412 run_gnuplot=True
413 elif opt in ("-G", "--Global"):
414 parse_global=True
415 global_search=arg
416 elif opt in ("-h", "--help"):
417 print_help()
418 sys.exit(1)
419
420 # Adding .global extension to the file
421 if parse_global==True:
422 if not gnuplot_output_filename.endswith('.global'):
423 pattern = pattern+'.global'
424
425 fio_data_file=find_file('.',pattern)
426 if len(fio_data_file) == 0:
427 print "No log file found with pattern %s!" % pattern
428 sys.exit(1)
356a65dd
EV
429 else:
430 print "%d files Selected with pattern '%s'" % (len(fio_data_file), pattern)
9402b895
EV
431
432 fio_data_file=sorted(fio_data_file, key=str.lower)
433 for file in fio_data_file:
356a65dd 434 print ' |-> %s' % file
9402b895
EV
435 if "_bw.log" in file :
436 mode="Bandwidth (KB/sec)"
437 if "_iops.log" in file :
438 mode="IO per Seconds (IO/sec)"
439 if (title == 'No title') and (mode != 'unknown'):
440 if "Bandwidth" in mode:
441 title='Bandwidth benchmark with %d fio results' % len(fio_data_file)
442 if "IO" in mode:
443 title='IO benchmark with %d fio results' % len(fio_data_file)
444
356a65dd 445 print
9402b895
EV
446 #We need to adjust the output filename regarding the pattern required by the user
447 if (pattern_set_by_user == True):
448 gnuplot_output_filename=pattern
449 # As we do have some regexp in the pattern, let's make this simpliest
450 # We do remove the simpliest parts of the expression to get a clear file name
451 gnuplot_output_filename=gnuplot_output_filename.replace('-*-','-')
452 gnuplot_output_filename=gnuplot_output_filename.replace('*','-')
453 gnuplot_output_filename=gnuplot_output_filename.replace('--','-')
454 gnuplot_output_filename=gnuplot_output_filename.replace('.log','')
455 # Insure that we don't have any starting or trailing dash to the filename
456 gnuplot_output_filename = gnuplot_output_filename[:-1] if gnuplot_output_filename.endswith('-') else gnuplot_output_filename
457 gnuplot_output_filename = gnuplot_output_filename[1:] if gnuplot_output_filename.startswith('-') else gnuplot_output_filename
458
459 if parse_global==True:
460 parse_global_files(fio_data_file, global_search)
461 else:
932f303b 462 blk_size=compute_temp_file(fio_data_file,disk_perf,gnuplot_output_dir,min_time,max_time)
9402b895 463 title="%s @ Blocksize = %dK" % (title,blk_size/1024)
5923d3a5 464 compute_aggregated_file(fio_data_file, gnuplot_output_filename, gnuplot_output_dir)
1cc6579e
EV
465 compute_math(fio_data_file,title,gnuplot_output_filename,gnuplot_output_dir,mode,disk_perf,gpm_dir)
466 generate_gnuplot_script(fio_data_file,title,gnuplot_output_filename,gnuplot_output_dir,mode,disk_perf,gpm_dir)
9402b895
EV
467
468 if (run_gnuplot==True):
795ae706 469 render_gnuplot(fio_data_file, gnuplot_output_dir)
9402b895
EV
470
471 # Cleaning temporary files
472 try:
473 os.remove('gnuplot_temp_file.*')
474 except:
475 True
476
477#Main
478if __name__ == "__main__":
479 sys.exit(main(sys.argv))