fio2gnuplot: Plotting average value for compare graphs
[fio.git] / tools / plot / fio2gnuplot.py
1 #!/usr/bin/python
2 #
3 #  Copyright (C) 2013 eNovance SAS <licensing@enovance.com>
4 #  Author: Erwan Velu  <erwan@enovance.com>
5 #
6 #  The license below covers all files distributed with fio unless otherwise
7 #  noted in the file itself.
8 #
9 #  This program is free software; you can redistribute it and/or modify
10 #  it under the terms of the GNU General Public License version 2 as
11 #  published by the Free Software Foundation.
12 #
13 #  This program is distributed in the hope that it will be useful,
14 #  but WITHOUT ANY WARRANTY; without even the implied warranty of
15 #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 #  GNU General Public License for more details.
17 #
18 #  You should have received a copy of the GNU General Public License
19 #  along with this program; if not, write to the Free Software
20 #  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
21
22 import os
23 import fnmatch
24 import sys
25 import getopt
26 import re
27 import math
28 import shutil
29
30 def find_file(path, pattern):
31         fio_data_file=[]
32         # For all the local files
33         for file in os.listdir(path):
34             # If the file math the regexp
35             if fnmatch.fnmatch(file, pattern):
36                 # Let's consider this file
37                 fio_data_file.append(file)
38
39         return fio_data_file
40
41 def generate_gnuplot_script(fio_data_file,title,gnuplot_output_filename,gnuplot_output_dir,mode,disk_perf,gpm_dir):
42         print "Generating rendering scripts"
43         filename=gnuplot_output_dir+'mygraph'
44         f=open(filename,'w')
45
46         # Plotting 3D or comparing graphs doesn't have a meaning unless if there is at least 2 traces
47         if len(fio_data_file) > 1:
48                 f.write("call \'%s/graph3D.gpm\' \'%s' \'%s\' \'\' \'%s\' \'%s\'\n" % (gpm_dir,title,gnuplot_output_filename,gnuplot_output_filename,mode))
49
50                 # Setting up the compare files that will be plot later
51                 compare=open(gnuplot_output_dir + 'compare.gnuplot','w')
52                 compare.write('''
53 set title '%s'
54 set terminal png size 1280,1024
55 set ytics axis out auto
56 set key top left reverse
57 set xlabel "Time (Seconds)"
58 set ylabel '%s'
59 set yrange [0:]
60 set style line 1 lt 1 lw 3 pt 3 linecolor rgb "green"
61 '''% (title,mode))
62                 compare.close()
63                 #Copying the common file for all kind of graph (raw/smooth/trend)
64                 compare_raw_filename="compare-%s-2Draw" % (gnuplot_output_filename)
65                 compare_smooth_filename="compare-%s-2Dsmooth" % (gnuplot_output_filename)
66                 compare_trend_filename="compare-%s-2Dtrend" % (gnuplot_output_filename)
67                 shutil.copy(gnuplot_output_dir+'compare.gnuplot',gnuplot_output_dir+compare_raw_filename+".gnuplot")
68                 shutil.copy(gnuplot_output_dir+'compare.gnuplot',gnuplot_output_dir+compare_smooth_filename+".gnuplot")
69                 shutil.copy(gnuplot_output_dir+'compare.gnuplot',gnuplot_output_dir+compare_trend_filename+".gnuplot")
70
71                 #Setting up a different output filename for each kind of graph
72                 compare_raw=open(gnuplot_output_dir+compare_raw_filename + ".gnuplot",'a')
73                 compare_raw.write("set output '%s.png'\n" % compare_raw_filename)
74                 compare_smooth=open(gnuplot_output_dir+compare_smooth_filename+".gnuplot",'a')
75                 compare_smooth.write("set output '%s.png'\n" % compare_smooth_filename)
76                 compare_trend=open(gnuplot_output_dir+compare_trend_filename+".gnuplot",'a')
77                 compare_trend.write("set output '%s.png'\n" % compare_trend_filename)
78
79                 # Let's plot the average value for all the traces
80                 global_disk_perf = sum(disk_perf, [])
81                 global_avg  = average(global_disk_perf)
82                 compare_raw.write("plot %s w l ls 1 ti 'Global average value (%.2f)'" % (global_avg,global_avg));
83                 compare_smooth.write("plot %s w l ls 1 ti 'Global average value (%.2f)'" % (global_avg,global_avg));
84                 compare_trend.write("plot %s w l ls 1 ti 'Global average value (%.2f)'" % (global_avg,global_avg));
85
86         pos=0
87         # Let's create a temporary file for each selected fio file
88         for file in fio_data_file:
89                 tmp_filename = "gnuplot_temp_file.%d" % pos
90
91                 # Plotting comparing graphs doesn't have a meaning unless if there is at least 2 traces
92                 if len(fio_data_file) > 1:
93                         # Adding the plot instruction for each kind of comparing graphs
94                         compare_raw.write(",\\\n'%s' using 2:3 with linespoints title '%s'" % (tmp_filename,fio_data_file[pos]))
95                         compare_smooth.write(",\\\n'%s' using 2:3 smooth csplines title '%s'" % (tmp_filename,fio_data_file[pos]))
96                         compare_trend.write(",\\\n'%s' using 2:3 smooth bezier title '%s'" % (tmp_filename,fio_data_file[pos]))
97
98                 png_file=file.replace('.log','')
99                 raw_filename = "%s-2Draw" % (png_file)
100                 smooth_filename = "%s-2Dsmooth" % (png_file)
101                 trend_filename = "%s-2Dtrend" % (png_file)
102                 avg  = average(disk_perf[pos])
103                 f.write("call \'%s/graph2D.gpm\' \'%s' \'%s\' \'%s\' \'%s\' \'%s\' \'%s\' \'%s\' \'%f\'\n" % (gpm_dir,title,tmp_filename,fio_data_file[pos],raw_filename,mode,smooth_filename,trend_filename,avg))
104                 pos = pos +1
105
106         # Plotting comparing graphs doesn't have a meaning unless if there is at least 2 traces
107         if len(fio_data_file) > 1:
108                 os.remove(gnuplot_output_dir+"compare.gnuplot")
109                 compare_raw.close()
110                 compare_smooth.close()
111                 compare_trend.close()
112         f.close()
113
114 def generate_gnuplot_math_script(title,gnuplot_output_filename,mode,average,gnuplot_output_dir,gpm_dir):
115         filename=gnuplot_output_dir+'mymath';
116         f=open(filename,'a')
117         f.write("call \'%s/math.gpm\' \'%s' \'%s\' \'\' \'%s\' \'%s\' %s\n" % (gpm_dir,title,gnuplot_output_filename,gnuplot_output_filename,mode,average))
118         f.close()
119
120 def compute_aggregated_file(fio_data_file, gnuplot_output_filename, gnuplot_output_dir):
121         print "Processing data file 2/2"
122         temp_files=[]
123         pos=0
124
125         # Let's create a temporary file for each selected fio file
126         for file in fio_data_file:
127                 tmp_filename = "%sgnuplot_temp_file.%d" % (gnuplot_output_dir, pos)
128                 temp_files.append(open(tmp_filename,'r'))
129                 pos = pos +1
130
131         f = open(gnuplot_output_dir+gnuplot_output_filename, "w")
132         index=0
133         # Let's add some information
134         for tempfile in temp_files:
135                     f.write("# Disk%d was coming from %s\n" % (index,fio_data_file[index]))
136                     f.write(tempfile.read())
137                     f.write("\n")
138                     tempfile.close()
139                     index = index + 1
140         f.close()
141
142 def average(s): return sum(s) * 1.0 / len(s)
143
144 def compute_temp_file(fio_data_file,disk_perf,gnuplot_output_dir, min_time, max_time):
145         end_time=max_time
146         if end_time == -1:
147                 end_time="infinite"
148         print "Processing data file 1/2 with %s<time<%s" % (min_time,end_time)
149         files=[]
150         temp_outfile=[]
151         blk_size=0
152         for file in fio_data_file:
153                 files.append(open(file))
154                 pos = len(files) - 1
155                 tmp_filename = "%sgnuplot_temp_file.%d" % (gnuplot_output_dir,pos)
156                 gnuplot_file=open(tmp_filename,'w')
157                 temp_outfile.append(gnuplot_file)
158                 gnuplot_file.write("#Temporary file based on file %s\n" % file)
159                 disk_perf.append([])
160
161         shall_break = False
162         while True:
163                 current_line=[]
164                 nb_empty_files=0
165                 nb_files=len(files)
166                 for myfile in files:
167                         s=myfile.readline().replace(',',' ').split()
168                         if not s:
169                                 nb_empty_files+=1
170                                 s="-1, 0, 0, 0".replace(',',' ').split()
171
172                         if (nb_empty_files == nb_files):
173                                 shall_break=True
174                                 break;
175
176                         current_line.append(s);
177
178                 if shall_break == True:
179                         break
180
181                 last_time = -1
182                 index=-1
183                 perfs=[]
184                 for line in enumerate(current_line):
185                         # Index will be used to remember what file was featuring what value
186                         index=index+1
187
188                         time, perf, x, block_size = line[1]
189                         if (blk_size == 0):
190                                 try:
191                                         blk_size=int(block_size)
192                                 except:
193                                         print "Error while reading the following line :"
194                                         print line
195                                         sys.exit(1);
196
197                         # We ignore the first 500msec as it doesn't seems to be part of the real benchmark
198                         # Time < 500 usually reports BW=0 breaking the min computing
199                         if (min_time == 0):
200                                 min_time==0.5
201
202                         # Then we estimate if the data we got is part of the time range we want to plot
203                         if ((float(time)>(float(min_time)*1000)) and ((int(time) < (int(max_time)*1000)) or max_time==-1)):
204                                         disk_perf[index].append(int(perf))
205                                         perfs.append("%d %s %s"% (index, time, perf))
206
207                 # If we reach this point, it means that all the traces are coherent
208                 for p in enumerate(perfs):
209                         index, perf_time,perf = p[1].split()
210                         temp_outfile[int(index)].write("%s %.2f %s\n" % (index, float(float(perf_time)/1000), perf))
211
212
213         for file in files:
214                 file.close()
215         for file in temp_outfile:
216                 file.close()
217         return blk_size
218
219 def compute_math(fio_data_file, title,gnuplot_output_filename,gnuplot_output_dir,mode,disk_perf,gpm_dir):
220         print "Computing Maths"
221         global_min=[]
222         global_max=[]
223         average_file=open(gnuplot_output_dir+gnuplot_output_filename+'.average', 'w')
224         min_file=open(gnuplot_output_dir+gnuplot_output_filename+'.min', 'w')
225         max_file=open(gnuplot_output_dir+gnuplot_output_filename+'.max', 'w')
226         stddev_file=open(gnuplot_output_dir+gnuplot_output_filename+'.stddev', 'w')
227         global_file=open(gnuplot_output_dir+gnuplot_output_filename+'.global','w')
228
229         min_file.write('DiskName %s\n' % mode)
230         max_file.write('DiskName %s\n'% mode)
231         average_file.write('DiskName %s\n'% mode)
232         stddev_file.write('DiskName %s\n'% mode )
233         for disk in xrange(len(fio_data_file)):
234 #               print disk_perf[disk]
235                 min_file.write("# Disk%d was coming from %s\n" % (disk,fio_data_file[disk]))
236                 max_file.write("# Disk%d was coming from %s\n" % (disk,fio_data_file[disk]))
237                 average_file.write("# Disk%d was coming from %s\n" % (disk,fio_data_file[disk]))
238                 stddev_file.write("# Disk%d was coming from %s\n" % (disk,fio_data_file[disk]))
239                 avg  = average(disk_perf[disk])
240                 variance = map(lambda x: (x - avg)**2, disk_perf[disk])
241                 standard_deviation = math.sqrt(average(variance))
242 #               print "Disk%d [ min=%.2f max=%.2f avg=%.2f stddev=%.2f \n" % (disk,min(disk_perf[disk]),max(disk_perf[disk]),avg, standard_deviation)
243                 average_file.write('%d %d\n' % (disk, avg))
244                 stddev_file.write('%d %d\n' % (disk, standard_deviation))
245                 local_min=min(disk_perf[disk])
246                 local_max=max(disk_perf[disk])
247                 min_file.write('%d %d\n' % (disk, local_min))
248                 max_file.write('%d %d\n' % (disk, local_max))
249                 global_min.append(int(local_min))
250                 global_max.append(int(local_max))
251
252         global_disk_perf = sum(disk_perf, [])
253         avg  = average(global_disk_perf)
254         variance = map(lambda x: (x - avg)**2, global_disk_perf)
255         standard_deviation = math.sqrt(average(variance))
256
257         global_file.write('min=%.2f\n' % min(global_disk_perf))
258         global_file.write('max=%.2f\n' % max(global_disk_perf))
259         global_file.write('avg=%.2f\n' % avg)
260         global_file.write('stddev=%.2f\n' % standard_deviation)
261         global_file.write('values_count=%d\n' % len(global_disk_perf))
262         global_file.write('disks_count=%d\n' % len(fio_data_file))
263         #print "Global [ min=%.2f max=%.2f avg=%.2f stddev=%.2f \n" % (min(global_disk_perf),max(global_disk_perf),avg, standard_deviation)
264
265         average_file.close()
266         min_file.close()
267         max_file.close()
268         stddev_file.close()
269         global_file.close()
270         try:
271                 os.remove(gnuplot_output_dir+'mymath')
272         except:
273                 True
274
275         generate_gnuplot_math_script("Average values of "+title,gnuplot_output_filename+'.average',mode,int(avg),gnuplot_output_dir,gpm_dir)
276         generate_gnuplot_math_script("Min values of "+title,gnuplot_output_filename+'.min',mode,average(global_min),gnuplot_output_dir,gpm_dir)
277         generate_gnuplot_math_script("Max values of "+title,gnuplot_output_filename+'.max',mode,average(global_max),gnuplot_output_dir,gpm_dir)
278         generate_gnuplot_math_script("Standard Deviation of "+title,gnuplot_output_filename+'.stddev',mode,int(standard_deviation),gnuplot_output_dir,gpm_dir)
279
280 def parse_global_files(fio_data_file, global_search):
281         max_result=0
282         max_file=''
283         for file in fio_data_file:
284                 f=open(file)
285                 disk_count=0
286                 search_value=-1
287
288                 # Let's read the complete file
289                 while True:
290                         try:
291                                 # We do split the name from the value
292                                 name,value=f.readline().split("=")
293                         except:
294                                 f.close()
295                                 break
296                         # If we ended the file
297                         if not name:
298                                 # Let's process what we have
299                                 f.close()
300                                 break
301                         else:
302                                 # disks_count is not global_search item
303                                 # As we need it for some computation, let's save it
304                                 if name=="disks_count":
305                                         disks_count=int(value)
306
307                                 # Let's catch the searched item
308                                 if global_search in name:
309                                         search_value=float(value)
310
311                 # Let's process the avg value by estimated the global bandwidth per file
312                 # We keep the biggest in memory for reporting
313                 if global_search == "avg":
314                         if (disks_count > 0) and (search_value != -1):
315                                 result=disks_count*search_value
316                                 if (result > max_result):
317                                         max_result=result
318                                         max_file=file
319         # Let's print the avg output
320         if global_search == "avg":
321                 print "Biggest aggregated value of %s was %2.f in file %s\n" % (global_search, max_result, max_file)
322         else:
323                 print "Global search %s is not yet implemented\n" % global_search
324
325 def render_gnuplot(fio_data_file, gnuplot_output_dir):
326         print "Running gnuplot Rendering"
327         try:
328                 # Let's render all the compared files if some
329                 if len(fio_data_file) > 1:
330                         print " |-> Rendering comparing traces"
331                         os.system("cd %s; for i in *.gnuplot; do gnuplot $i; done" % gnuplot_output_dir)
332                 print " |-> Rendering math traces"
333                 os.system("cd %s; gnuplot mymath" % gnuplot_output_dir)
334                 print " |-> Rendering 2D & 3D traces"
335                 os.system("cd %s; gnuplot mygraph" % gnuplot_output_dir)
336
337                 name_of_directory="the current"
338                 if gnuplot_output_dir != "./":
339                         name_of_directory=gnuplot_output_dir
340                 print "\nRendering traces are available in %s directory" % name_of_directory
341         except:
342                 print "Could not run gnuplot on mymath or mygraph !\n"
343                 sys.exit(1);
344
345 def print_help():
346     print 'fio2gnuplot.py -ghbiod -t <title> -o <outputfile> -p <pattern> -G <type> -m <time> -M <time>'
347     print
348     print '-h --help                           : Print this help'
349     print '-p <pattern> or --pattern <pattern> : A pattern in regexp to select fio input files'
350     print '-b           or --bandwidth         : A predefined pattern for selecting *_bw.log files'
351     print '-i           or --iops              : A predefined pattern for selecting *_iops.log files'
352     print '-g           or --gnuplot           : Render gnuplot traces before exiting'
353     print '-o           or --outputfile <file> : The basename for gnuplot traces'
354     print '                                       - Basename is set with the pattern if defined'
355     print '-d           or --outputdir <dir>   : The directory where gnuplot shall render files'
356     print '-t           or --title <title>     : The title of the gnuplot traces'
357     print '                                       - Title is set with the block size detected in fio traces'
358     print '-G           or --Global <type>     : Search for <type> in .global files match by a pattern'
359     print '                                       - Available types are : min, max, avg, stddev'
360     print '                                       - The .global extension is added automatically to the pattern'
361     print '-m           or --min_time <time>   : Only consider data starting from <time> seconds (default is 0)'
362     print '-M           or --max_time <time>   : Only consider data ending before <time> seconds (default is -1 aka nolimit)'
363
364 def main(argv):
365     mode='unknown'
366     pattern=''
367     pattern_set_by_user=False
368     title='No title'
369     gnuplot_output_filename='result'
370     gnuplot_output_dir='./'
371     gpm_dir="/usr/share/fio/"
372     disk_perf=[]
373     run_gnuplot=False
374     parse_global=False
375     global_search=''
376     min_time=0
377     max_time=-1
378
379     if not os.path.isfile(gpm_dir+'math.gpm'):
380             gpm_dir="/usr/local/share/fio/"
381             if not os.path.isfile(gpm_dir+'math.gpm'):
382                     print "Looks like fio didn't got installed properly as no gpm files found in '/usr/share/fio' or '/usr/local/share/fio'\n"
383                     sys.exit(3)
384
385     try:
386             opts, args = getopt.getopt(argv[1:],"ghbio:d:t:p:G:m:M:")
387     except getopt.GetoptError:
388          print_help()
389          sys.exit(2)
390
391     for opt, arg in opts:
392       if opt in ("-b", "--bandwidth"):
393          pattern='*_bw.log'
394       elif opt in ("-i", "--iops"):
395          pattern='*_iops.log'
396       elif opt in ("-p", "--pattern"):
397          pattern_set_by_user=True
398          pattern=arg
399          pattern=pattern.replace('\\','')
400       elif opt in ("-o", "--outputfile"):
401          gnuplot_output_filename=arg
402       elif opt in ("-d", "--outputdir"):
403          gnuplot_output_dir=arg
404          if not gnuplot_output_dir.endswith('/'):
405                 gnuplot_output_dir=gnuplot_output_dir+'/'
406          if not os.path.exists(gnuplot_output_dir):
407                 os.makedirs(gnuplot_output_dir)
408       elif opt in ("-t", "--title"):
409          title=arg
410       elif opt in ("-m", "--min_time"):
411          min_time=arg
412       elif opt in ("-M", "--max_time"):
413          max_time=arg
414       elif opt in ("-g", "--gnuplot"):
415          run_gnuplot=True
416       elif opt in ("-G", "--Global"):
417          parse_global=True
418          global_search=arg
419       elif opt in ("-h", "--help"):
420           print_help()
421           sys.exit(1)
422
423     # Adding .global extension to the file
424     if parse_global==True:
425             if not gnuplot_output_filename.endswith('.global'):
426                 pattern = pattern+'.global'
427
428     fio_data_file=find_file('.',pattern)
429     if len(fio_data_file) == 0:
430             print "No log file found with pattern %s!" % pattern
431             sys.exit(1)
432     else:
433             print "%d files Selected with pattern '%s'" % (len(fio_data_file), pattern)
434
435     fio_data_file=sorted(fio_data_file, key=str.lower)
436     for file in fio_data_file:
437         print ' |-> %s' % file
438         if "_bw.log" in file :
439                 mode="Bandwidth (KB/sec)"
440         if "_iops.log" in file :
441                 mode="IO per Seconds (IO/sec)"
442     if (title == 'No title') and (mode != 'unknown'):
443             if "Bandwidth" in mode:
444                     title='Bandwidth benchmark with %d fio results' % len(fio_data_file)
445             if "IO" in mode:
446                     title='IO benchmark with %d fio results' % len(fio_data_file)
447
448     print
449     #We need to adjust the output filename regarding the pattern required by the user
450     if (pattern_set_by_user == True):
451             gnuplot_output_filename=pattern
452             # As we do have some regexp in the pattern, let's make this simpliest
453             # We do remove the simpliest parts of the expression to get a clear file name
454             gnuplot_output_filename=gnuplot_output_filename.replace('-*-','-')
455             gnuplot_output_filename=gnuplot_output_filename.replace('*','-')
456             gnuplot_output_filename=gnuplot_output_filename.replace('--','-')
457             gnuplot_output_filename=gnuplot_output_filename.replace('.log','')
458             # Insure that we don't have any starting or trailing dash to the filename
459             gnuplot_output_filename = gnuplot_output_filename[:-1] if gnuplot_output_filename.endswith('-') else gnuplot_output_filename
460             gnuplot_output_filename = gnuplot_output_filename[1:] if gnuplot_output_filename.startswith('-') else gnuplot_output_filename
461
462     if parse_global==True:
463         parse_global_files(fio_data_file, global_search)
464     else:
465         blk_size=compute_temp_file(fio_data_file,disk_perf,gnuplot_output_dir,min_time,max_time)
466         title="%s @ Blocksize = %dK" % (title,blk_size/1024)
467         compute_aggregated_file(fio_data_file, gnuplot_output_filename, gnuplot_output_dir)
468         compute_math(fio_data_file,title,gnuplot_output_filename,gnuplot_output_dir,mode,disk_perf,gpm_dir)
469         generate_gnuplot_script(fio_data_file,title,gnuplot_output_filename,gnuplot_output_dir,mode,disk_perf,gpm_dir)
470
471         if (run_gnuplot==True):
472                 render_gnuplot(fio_data_file, gnuplot_output_dir)
473
474     # Cleaning temporary files
475     try:
476         os.remove('gnuplot_temp_file.*')
477     except:
478         True
479
480 #Main
481 if __name__ == "__main__":
482     sys.exit(main(sys.argv))