fio2gnuplot: Printing number of selected files
[fio.git] / tools / plot / fio2gnuplot.py
1 #!/usr/bin/python
2 #
3 #  Copyright (C) 2013 eNovance SAS <licensing@enovance.com>
4 #  Author: Erwan Velu  <erwan@enovance.com>
5 #
6 #  The license below covers all files distributed with fio unless otherwise
7 #  noted in the file itself.
8 #
9 #  This program is free software; you can redistribute it and/or modify
10 #  it under the terms of the GNU General Public License version 2 as
11 #  published by the Free Software Foundation.
12 #
13 #  This program is distributed in the hope that it will be useful,
14 #  but WITHOUT ANY WARRANTY; without even the implied warranty of
15 #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 #  GNU General Public License for more details.
17 #
18 #  You should have received a copy of the GNU General Public License
19 #  along with this program; if not, write to the Free Software
20 #  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
21
22 import os
23 import fnmatch
24 import sys
25 import getopt
26 import re
27 import math
28
29 def find_file(path, pattern):
30         fio_data_file=[]
31         # For all the local files
32         for file in os.listdir(path):
33             # If the file math the regexp
34             if fnmatch.fnmatch(file, pattern):
35                 # Let's consider this file
36                 fio_data_file.append(file)
37
38         return fio_data_file
39
40 def generate_gnuplot_script(fio_data_file,title,gnuplot_output_filename,gnuplot_output_dir,mode,disk_perf,gpm_dir):
41         filename=gnuplot_output_dir+'mygraph'
42         f=open(filename,'w')
43         if len(fio_data_file) > 1:
44                 f.write("call \'%s/graph3D.gpm\' \'%s' \'%s\' \'\' \'%s\' \'%s\'\n" % (gpm_dir,title,gnuplot_output_filename,gnuplot_output_filename,mode))
45
46         pos=0
47         # Let's create a temporary file for each selected fio file
48         for file in fio_data_file:
49                 tmp_filename = "gnuplot_temp_file.%d" % pos
50                 png_file=file.replace('.log','')
51                 raw_filename = "%s-2Draw" % (png_file)
52                 smooth_filename = "%s-2Dsmooth" % (png_file)
53                 trend_filename = "%s-2Dtrend" % (png_file)
54                 avg  = average(disk_perf[pos])
55                 f.write("call \'%s/graph2D.gpm\' \'%s' \'%s\' \'\' \'%s\' \'%s\' \'%s\' \'%s\' \'%f\'\n" % (gpm_dir,title,tmp_filename,raw_filename,mode,smooth_filename,trend_filename,avg))
56                 pos = pos +1
57
58         f.close()
59
60 def generate_gnuplot_math_script(title,gnuplot_output_filename,mode,average,gnuplot_output_dir,gpm_dir):
61         filename=gnuplot_output_dir+'mymath';
62         f=open(filename,'a')
63         f.write("call \'%s/math.gpm\' \'%s' \'%s\' \'\' \'%s\' \'%s\' %s\n" % (gpm_dir,title,gnuplot_output_filename,gnuplot_output_filename,mode,average))
64         f.close()
65
66 def compute_aggregated_file(fio_data_file, gnuplot_output_filename, gnuplot_output_dir):
67         temp_files=[]
68         pos=0
69
70         # Let's create a temporary file for each selected fio file
71         for file in fio_data_file:
72                 tmp_filename = "%sgnuplot_temp_file.%d" % (gnuplot_output_dir, pos)
73                 temp_files.append(open(tmp_filename,'r'))
74                 pos = pos +1
75
76         f = open(gnuplot_output_dir+gnuplot_output_filename, "w")
77         index=0
78         # Let's add some information
79         for tempfile in temp_files:
80                     f.write("# Disk%d was coming from %s\n" % (index,fio_data_file[index]))
81                     f.write(tempfile.read())
82                     f.write("\n")
83                     tempfile.close()
84                     index = index + 1
85         f.close()
86
87 def average(s): return sum(s) * 1.0 / len(s)
88
89 def compute_temp_file(fio_data_file,disk_perf,gnuplot_output_dir):
90         files=[]
91         temp_outfile=[]
92         blk_size=0
93         for file in fio_data_file:
94                 files.append(open(file))
95                 pos = len(files) - 1
96                 tmp_filename = "%sgnuplot_temp_file.%d" % (gnuplot_output_dir,pos)
97                 gnuplot_file=open(tmp_filename,'w')
98                 temp_outfile.append(gnuplot_file)
99                 gnuplot_file.write("#Temporary file based on file %s\n" % file)
100                 disk_perf.append([])
101
102         shall_break = False
103         while True:
104                 current_line=[]
105                 nb_empty_files=0
106                 nb_files=len(files)
107                 for file in files:
108                         s=file.readline().replace(',',' ').split()
109                         if not s:
110                                 nb_empty_files+=1
111                                 s="-1, 0, 0, 0'".replace(',',' ').split()
112
113                         if (nb_empty_files == nb_files):
114                                 shall_break=True
115                                 break;
116
117                         current_line.append(s);
118
119                 if shall_break == True:
120                         break
121
122                 last_time = -1
123                 index=0
124                 perfs=[]
125                 for line in current_line:
126                         time, perf, x, block_size = line
127                         if (blk_size == 0):
128                                 blk_size=int(block_size)
129
130                         # We ignore the first 500msec as it doesn't seems to be part of the real benchmark
131                         # Time < 500 usually reports BW=0 breaking the min computing
132                         if (((int(time)) > 500) or (int(time)==-1)):
133                                 disk_perf[index].append(int(perf))
134                                 perfs.append("%s %s"% (time, perf))
135                                 index = index + 1
136
137                 # If we reach this point, it means that all the traces are coherent
138                 for p in enumerate(perfs):
139                         perf_time,perf = p[1].split()
140                         if (perf_time != "-1"):
141                                 temp_outfile[p[0]].write("%s %.2f %s\n" % (p[0], float(float(perf_time)/1000), perf))
142
143
144         for file in files:
145                 file.close()
146         for file in temp_outfile:
147                 file.close()
148         return blk_size
149
150 def compute_math(fio_data_file, title,gnuplot_output_filename,gnuplot_output_dir,mode,disk_perf,gpm_dir):
151         global_min=[]
152         global_max=[]
153         average_file=open(gnuplot_output_dir+gnuplot_output_filename+'.average', 'w')
154         min_file=open(gnuplot_output_dir+gnuplot_output_filename+'.min', 'w')
155         max_file=open(gnuplot_output_dir+gnuplot_output_filename+'.max', 'w')
156         stddev_file=open(gnuplot_output_dir+gnuplot_output_filename+'.stddev', 'w')
157         global_file=open(gnuplot_output_dir+gnuplot_output_filename+'.global','w')
158
159         min_file.write('DiskName %s\n' % mode)
160         max_file.write('DiskName %s\n'% mode)
161         average_file.write('DiskName %s\n'% mode)
162         stddev_file.write('DiskName %s\n'% mode )
163         for disk in xrange(len(fio_data_file)):
164 #               print disk_perf[disk]
165                 min_file.write("# Disk%d was coming from %s\n" % (disk,fio_data_file[disk]))
166                 max_file.write("# Disk%d was coming from %s\n" % (disk,fio_data_file[disk]))
167                 average_file.write("# Disk%d was coming from %s\n" % (disk,fio_data_file[disk]))
168                 stddev_file.write("# Disk%d was coming from %s\n" % (disk,fio_data_file[disk]))
169                 avg  = average(disk_perf[disk])
170                 variance = map(lambda x: (x - avg)**2, disk_perf[disk])
171                 standard_deviation = math.sqrt(average(variance))
172 #               print "Disk%d [ min=%.2f max=%.2f avg=%.2f stddev=%.2f \n" % (disk,min(disk_perf[disk]),max(disk_perf[disk]),avg, standard_deviation)
173                 average_file.write('%d %d\n' % (disk, avg))
174                 stddev_file.write('%d %d\n' % (disk, standard_deviation))
175                 local_min=min(disk_perf[disk])
176                 local_max=max(disk_perf[disk])
177                 min_file.write('%d %d\n' % (disk, local_min))
178                 max_file.write('%d %d\n' % (disk, local_max))
179                 global_min.append(int(local_min))
180                 global_max.append(int(local_max))
181
182         global_disk_perf = sum(disk_perf, [])
183         avg  = average(global_disk_perf)
184         variance = map(lambda x: (x - avg)**2, global_disk_perf)
185         standard_deviation = math.sqrt(average(variance))
186
187         global_file.write('min=%.2f\n' % min(global_disk_perf))
188         global_file.write('max=%.2f\n' % max(global_disk_perf))
189         global_file.write('avg=%.2f\n' % avg)
190         global_file.write('stddev=%.2f\n' % standard_deviation)
191         global_file.write('values_count=%d\n' % len(global_disk_perf))
192         global_file.write('disks_count=%d\n' % len(fio_data_file))
193         #print "Global [ min=%.2f max=%.2f avg=%.2f stddev=%.2f \n" % (min(global_disk_perf),max(global_disk_perf),avg, standard_deviation)
194
195         average_file.close()
196         min_file.close()
197         max_file.close()
198         stddev_file.close()
199         global_file.close()
200         try:
201                 os.remove(gnuplot_output_dir+'mymath')
202         except:
203                 True
204
205         generate_gnuplot_math_script("Average values of "+title,gnuplot_output_filename+'.average',mode,int(avg),gnuplot_output_dir,gpm_dir)
206         generate_gnuplot_math_script("Min values of "+title,gnuplot_output_filename+'.min',mode,average(global_min),gnuplot_output_dir,gpm_dir)
207         generate_gnuplot_math_script("Max values of "+title,gnuplot_output_filename+'.max',mode,average(global_max),gnuplot_output_dir,gpm_dir)
208         generate_gnuplot_math_script("Standard Deviation of "+title,gnuplot_output_filename+'.stddev',mode,int(standard_deviation),gnuplot_output_dir,gpm_dir)
209
210 def parse_global_files(fio_data_file, global_search):
211         max_result=0
212         max_file=''
213         for file in fio_data_file:
214                 f=open(file)
215                 disk_count=0
216                 search_value=-1
217
218                 # Let's read the complete file
219                 while True:
220                         try:
221                                 # We do split the name from the value
222                                 name,value=f.readline().split("=")
223                         except:
224                                 f.close()
225                                 break
226                         # If we ended the file
227                         if not name:
228                                 # Let's process what we have
229                                 f.close()
230                                 break
231                         else:
232                                 # disks_count is not global_search item
233                                 # As we need it for some computation, let's save it
234                                 if name=="disks_count":
235                                         disks_count=int(value)
236
237                                 # Let's catch the searched item
238                                 if global_search in name:
239                                         search_value=float(value)
240
241                 # Let's process the avg value by estimated the global bandwidth per file
242                 # We keep the biggest in memory for reporting
243                 if global_search == "avg":
244                         if (disks_count > 0) and (search_value != -1):
245                                 result=disks_count*search_value
246                                 if (result > max_result):
247                                         max_result=result
248                                         max_file=file
249         # Let's print the avg output
250         if global_search == "avg":
251                 print "Biggest aggregated value of %s was %2.f in file %s\n" % (global_search, max_result, max_file)
252         else:
253                 print "Global search %s is not yet implemented\n" % global_search
254
255 def render_gnuplot(gnuplot_output_dir):
256         print "Running gnuplot Rendering\n"
257         try:
258                 os.system("cd %s; gnuplot mymath" % gnuplot_output_dir)
259                 os.system("cd %s; gnuplot mygraph" % gnuplot_output_dir)
260         except:
261                 print "Could not run gnuplot on mymath or mygraph !\n"
262                 sys.exit(1);
263
264 def print_help():
265     print 'fio2gnuplot.py -ghbio -t <title> -o <outputfile> -p <pattern>'
266     print
267     print '-h --help                           : Print this help'
268     print '-p <pattern> or --pattern <pattern> : A pattern in regexp to select fio input files'
269     print '-b           or --bandwidth         : A predefined pattern for selecting *_bw.log files'
270     print '-i           or --iops              : A predefined pattern for selecting *_iops.log files'
271     print '-g           or --gnuplot           : Render gnuplot traces before exiting'
272     print '-o           or --outputfile <file> : The basename for gnuplot traces'
273     print '                                       - Basename is set with the pattern if defined'
274     print '-d           or --outputdir <dir>   : The directory where gnuplot shall render files'
275     print '-t           or --title <title>     : The title of the gnuplot traces'
276     print '                                       - Title is set with the block size detected in fio traces'
277     print '-G           or --Global <type>     : Search for <type> in .global files match by a pattern'
278     print '                                       - Available types are : min, max, avg, stddev'
279     print '                                       - The .global extension is added automatically to the pattern'
280
281 def main(argv):
282     mode='unknown'
283     pattern=''
284     pattern_set_by_user=False
285     title='No title'
286     gnuplot_output_filename='result'
287     gnuplot_output_dir='./'
288     gpm_dir="/usr/share/fio/"
289     disk_perf=[]
290     run_gnuplot=False
291     parse_global=False
292     global_search=''
293
294     if not os.path.isfile(gpm_dir+'math.gpm'):
295             gpm_dir="/usr/local/share/fio/"
296             if not os.path.isfile(gpm_dir+'math.gpm'):
297                     print "Looks like fio didn't got installed properly as no gpm files found in '/usr/share/fio' or '/usr/local/share/fio'\n"
298                     sys.exit(3)
299
300     try:
301             opts, args = getopt.getopt(argv[1:],"ghbio:d:t:p:G:")
302     except getopt.GetoptError:
303          print_help()
304          sys.exit(2)
305
306     for opt, arg in opts:
307       if opt in ("-b", "--bandwidth"):
308          pattern='*_bw.log'
309       elif opt in ("-i", "--iops"):
310          pattern='*_iops.log'
311       elif opt in ("-p", "--pattern"):
312          pattern_set_by_user=True
313          pattern=arg
314          pattern=pattern.replace('\\','')
315       elif opt in ("-o", "--outputfile"):
316          gnuplot_output_filename=arg
317       elif opt in ("-d", "--outputdir"):
318          gnuplot_output_dir=arg
319          if not gnuplot_output_dir.endswith('/'):
320                 gnuplot_output_dir=gnuplot_output_dir+'/'
321          if not os.path.exists(gnuplot_output_dir):
322                 os.makedirs(gnuplot_output_dir)
323       elif opt in ("-t", "--title"):
324          title=arg
325       elif opt in ("-g", "--gnuplot"):
326          run_gnuplot=True
327       elif opt in ("-G", "--Global"):
328          parse_global=True
329          global_search=arg
330       elif opt in ("-h", "--help"):
331           print_help()
332           sys.exit(1)
333
334     # Adding .global extension to the file
335     if parse_global==True:
336             if not gnuplot_output_filename.endswith('.global'):
337                 pattern = pattern+'.global'
338
339     fio_data_file=find_file('.',pattern)
340     if len(fio_data_file) == 0:
341             print "No log file found with pattern %s!" % pattern
342             sys.exit(1)
343     else:
344             print "%d files Selected with pattern '%s'" % (len(fio_data_file), pattern)
345
346     fio_data_file=sorted(fio_data_file, key=str.lower)
347     for file in fio_data_file:
348         print ' |-> %s' % file
349         if "_bw.log" in file :
350                 mode="Bandwidth (KB/sec)"
351         if "_iops.log" in file :
352                 mode="IO per Seconds (IO/sec)"
353     if (title == 'No title') and (mode != 'unknown'):
354             if "Bandwidth" in mode:
355                     title='Bandwidth benchmark with %d fio results' % len(fio_data_file)
356             if "IO" in mode:
357                     title='IO benchmark with %d fio results' % len(fio_data_file)
358
359     print
360     #We need to adjust the output filename regarding the pattern required by the user
361     if (pattern_set_by_user == True):
362             gnuplot_output_filename=pattern
363             # As we do have some regexp in the pattern, let's make this simpliest
364             # We do remove the simpliest parts of the expression to get a clear file name
365             gnuplot_output_filename=gnuplot_output_filename.replace('-*-','-')
366             gnuplot_output_filename=gnuplot_output_filename.replace('*','-')
367             gnuplot_output_filename=gnuplot_output_filename.replace('--','-')
368             gnuplot_output_filename=gnuplot_output_filename.replace('.log','')
369             # Insure that we don't have any starting or trailing dash to the filename
370             gnuplot_output_filename = gnuplot_output_filename[:-1] if gnuplot_output_filename.endswith('-') else gnuplot_output_filename
371             gnuplot_output_filename = gnuplot_output_filename[1:] if gnuplot_output_filename.startswith('-') else gnuplot_output_filename
372
373     if parse_global==True:
374         parse_global_files(fio_data_file, global_search)
375     else:
376         blk_size=compute_temp_file(fio_data_file,disk_perf,gnuplot_output_dir)
377         title="%s @ Blocksize = %dK" % (title,blk_size/1024)
378         compute_aggregated_file(fio_data_file, gnuplot_output_filename, gnuplot_output_dir)
379         compute_math(fio_data_file,title,gnuplot_output_filename,gnuplot_output_dir,mode,disk_perf,gpm_dir)
380         generate_gnuplot_script(fio_data_file,title,gnuplot_output_filename,gnuplot_output_dir,mode,disk_perf,gpm_dir)
381
382         if (run_gnuplot==True):
383                 render_gnuplot(gnuplot_output_dir)
384
385     # Cleaning temporary files
386     try:
387         os.remove('gnuplot_temp_file.*')
388     except:
389         True
390
391 #Main
392 if __name__ == "__main__":
393     sys.exit(main(sys.argv))