Adding fio2gnuplot tool
[fio.git] / tools / plot / fio2gnuplot.py
... / ...
CommitLineData
1#!/usr/bin/python
2#
3# Copyright (C) 2013 eNovance SAS <licensing@enovance.com>
4# Author: Erwan Velu <erwan@enovance.com>
5#
6# The license below covers all files distributed with fio unless otherwise
7# noted in the file itself.
8#
9# This program is free software; you can redistribute it and/or modify
10# it under the terms of the GNU General Public License version 2 as
11# published by the Free Software Foundation.
12#
13# This program is distributed in the hope that it will be useful,
14# but WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16# GNU General Public License for more details.
17#
18# You should have received a copy of the GNU General Public License
19# along with this program; if not, write to the Free Software
20# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21
22import os
23import fnmatch
24import sys
25import getopt
26import re
27import math
28
29def find_file(path, pattern):
30 fio_data_file=[]
31 # For all the local files
32 for file in os.listdir(path):
33 # If the file math the regexp
34 if fnmatch.fnmatch(file, pattern):
35 # Let's consider this file
36 fio_data_file.append(file)
37
38 return fio_data_file
39
40def generate_gnuplot_script(title,gnuplot_output_filename,mode):
41 f=open("mygraph",'w')
42 f.write("call \'graph3D.gpm\' \'%s' \'%s\' \'\' \'%s\' \'%s\'\n" % (title,gnuplot_output_filename,gnuplot_output_filename,mode))
43 f.close()
44
45def generate_gnuplot_math_script(title,gnuplot_output_filename,mode,average):
46 f=open("mymath",'a')
47 f.write("call \'math.gpm\' \'%s' \'%s\' \'\' \'%s\' \'%s\' %s\n" % (title,gnuplot_output_filename,gnuplot_output_filename,mode,average))
48 f.close()
49
50def compute_aggregated_file(fio_data_file, gnuplot_output_filename):
51 temp_files=[]
52 pos=0
53 # Let's create a temporary file for each selected fio file
54 for file in fio_data_file:
55 tmp_filename = "gnuplot_temp_file.%d" % pos
56 temp_files.append(open(tmp_filename,'r'))
57 pos = pos +1
58
59 f = open(gnuplot_output_filename, "w")
60 index=0
61 # Let's add some information
62 for tempfile in temp_files:
63 f.write("# Disk%d was coming from %s\n" % (index,fio_data_file[index]))
64 f.write(tempfile.read())
65 f.write("\n")
66 tempfile.close()
67 index = index + 1
68 f.close()
69
70def average(s): return sum(s) * 1.0 / len(s)
71
72def compute_temp_file(fio_data_file,disk_perf):
73 files=[]
74 temp_outfile=[]
75 blk_size=0
76 for file in fio_data_file:
77 files.append(open(file))
78 pos = len(files) - 1
79 tmp_filename = "gnuplot_temp_file.%d" % pos
80 temp_outfile.append(open(tmp_filename,'w'))
81 disk_perf.append([])
82
83 shall_break = False
84 while True:
85 current_line=[]
86 for file in files:
87 s=file.readline().replace(',',' ').split()
88 if not s:
89 shall_break=True
90 break;
91 current_line.append(s);
92
93 if shall_break == True:
94 break
95
96 last_time = -1
97 index=0
98 perfs=[]
99 for line in current_line:
100 time, perf, x, block_size = line
101 if (blk_size == 0):
102 blk_size=int(block_size)
103
104 # We ignore the first 500msec as it doesn't seems to be part of the real benchmark
105 # Time < 500 usually reports BW=0 breaking the min computing
106 if ((int(time)) > 500):
107 disk_perf[index].append(int(perf))
108 perfs.append(perf)
109 index = index + 1
110
111 # If we reach this point, it means that all the traces are coherent
112 for p in enumerate(perfs):
113 temp_outfile[p[0]].write("%s %.2f %s\n" % (p[0], float(float(time)/1000), p[1]))
114
115 for file in files:
116 file.close()
117 for file in temp_outfile:
118 file.close()
119 return blk_size
120
121def compute_math(fio_data_file, title,gnuplot_output_filename,mode,disk_perf):
122 global_min=[]
123 global_max=[]
124 average_file=open(gnuplot_output_filename+'.average', 'w')
125 min_file=open(gnuplot_output_filename+'.min', 'w')
126 max_file=open(gnuplot_output_filename+'.max', 'w')
127 stddev_file=open(gnuplot_output_filename+'.stddev', 'w')
128 global_file=open(gnuplot_output_filename+'.global','w')
129
130 min_file.write('DiskName %s\n' % mode)
131 max_file.write('DiskName %s\n'% mode)
132 average_file.write('DiskName %s\n'% mode)
133 stddev_file.write('DiskName %s\n'% mode )
134 for disk in xrange(len(fio_data_file)):
135# print disk_perf[disk]
136 min_file.write("# Disk%d was coming from %s\n" % (disk,fio_data_file[disk]))
137 max_file.write("# Disk%d was coming from %s\n" % (disk,fio_data_file[disk]))
138 average_file.write("# Disk%d was coming from %s\n" % (disk,fio_data_file[disk]))
139 stddev_file.write("# Disk%d was coming from %s\n" % (disk,fio_data_file[disk]))
140 avg = average(disk_perf[disk])
141 variance = map(lambda x: (x - avg)**2, disk_perf[disk])
142 standard_deviation = math.sqrt(average(variance))
143# print "Disk%d [ min=%.2f max=%.2f avg=%.2f stddev=%.2f \n" % (disk,min(disk_perf[disk]),max(disk_perf[disk]),avg, standard_deviation)
144 average_file.write('%d %d\n' % (disk, avg))
145 stddev_file.write('%d %d\n' % (disk, standard_deviation))
146 local_min=min(disk_perf[disk])
147 local_max=max(disk_perf[disk])
148 min_file.write('%d %d\n' % (disk, local_min))
149 max_file.write('%d %d\n' % (disk, local_max))
150 global_min.append(int(local_min))
151 global_max.append(int(local_max))
152
153 global_disk_perf = sum(disk_perf, [])
154 avg = average(global_disk_perf)
155 variance = map(lambda x: (x - avg)**2, global_disk_perf)
156 standard_deviation = math.sqrt(average(variance))
157
158 global_file.write('min=%.2f\n' % min(global_disk_perf))
159 global_file.write('max=%.2f\n' % max(global_disk_perf))
160 global_file.write('avg=%.2f\n' % avg)
161 global_file.write('stddev=%.2f\n' % standard_deviation)
162 global_file.write('values_count=%d\n' % len(global_disk_perf))
163 global_file.write('disks_count=%d\n' % len(fio_data_file))
164 #print "Global [ min=%.2f max=%.2f avg=%.2f stddev=%.2f \n" % (min(global_disk_perf),max(global_disk_perf),avg, standard_deviation)
165
166 average_file.close()
167 min_file.close()
168 max_file.close()
169 stddev_file.close()
170 global_file.close()
171 try:
172 os.remove('mymath')
173 except:
174 True
175
176 generate_gnuplot_math_script("Average values of "+title,gnuplot_output_filename+'.average',mode,int(avg))
177 generate_gnuplot_math_script("Min values of "+title,gnuplot_output_filename+'.min',mode,average(global_min))
178 generate_gnuplot_math_script("Max values of "+title,gnuplot_output_filename+'.max',mode,average(global_max))
179 generate_gnuplot_math_script("Standard Deviation of "+title,gnuplot_output_filename+'.stddev',mode,int(standard_deviation))
180
181def parse_global_files(fio_data_file, global_search):
182 max_result=0
183 max_file=''
184 for file in fio_data_file:
185 f=open(file)
186 disk_count=0
187 search_value=-1
188
189 # Let's read the complete file
190 while True:
191 try:
192 # We do split the name from the value
193 name,value=f.readline().split("=")
194 except:
195 f.close()
196 break
197 # If we ended the file
198 if not name:
199 # Let's process what we have
200 f.close()
201 break
202 else:
203 # disks_count is not global_search item
204 # As we need it for some computation, let's save it
205 if name=="disks_count":
206 disks_count=int(value)
207
208 # Let's catch the searched item
209 if global_search in name:
210 search_value=float(value)
211
212 # Let's process the avg value by estimated the global bandwidth per file
213 # We keep the biggest in memory for reporting
214 if global_search == "avg":
215 if (disks_count > 0) and (search_value != -1):
216 result=disks_count*search_value
217 if (result > max_result):
218 max_result=result
219 max_file=file
220 # Let's print the avg output
221 if global_search == "avg":
222 print "Biggest aggregated value of %s was %2.f in file %s\n" % (global_search, max_result, max_file)
223 else:
224 print "Global search %s is not yet implemented\n" % global_search
225
226def render_gnuplot():
227 print "Running gnuplot Rendering\n"
228 try:
229 os.system("gnuplot mymath")
230 os.system("gnuplot mygraph")
231 except:
232 print "Could not run gnuplot on mymath or mygraph !\n"
233 sys.exit(1);
234
235def print_help():
236 print 'fio2gnuplot.py -ghbio -t <title> -o <outputfile> -p <pattern>'
237 print
238 print '-h --help : Print this help'
239 print '-p <pattern> or --pattern <pattern> : A pattern in regexp to select fio input files'
240 print '-b or --bandwidth : A predefined pattern for selecting *_bw.log files'
241 print '-i or --iops : A predefined pattern for selecting *_iops.log files'
242 print '-g or --gnuplot : Render gnuplot traces before exiting'
243 print '-o or --outputfile <file> : The basename for gnuplot traces'
244 print ' - Basename is set with the pattern if defined'
245 print '-t or --title <title> : The title of the gnuplot traces'
246 print ' - Title is set with the block size detected in fio traces'
247 print '-G or --Global <type> : Search for <type> in .global files match by a pattern'
248 print ' - Available types are : min, max, avg, stddev'
249 print ' - The .global extension is added automatically to the pattern'
250
251def main(argv):
252 mode='unknown'
253 pattern=''
254 pattern_set_by_user=False
255 title='No title'
256 gnuplot_output_filename='result'
257 disk_perf=[]
258 run_gnuplot=False
259 parse_global=False
260 global_search=''
261
262 try:
263 opts, args = getopt.getopt(argv[1:],"ghbio:t:p:G:")
264 except getopt.GetoptError:
265 print_help()
266 sys.exit(2)
267
268 for opt, arg in opts:
269 if opt in ("-b", "--bandwidth"):
270 pattern='*_bw.log'
271 elif opt in ("-i", "--iops"):
272 pattern='*_iops.log'
273 elif opt in ("-p", "--pattern"):
274 pattern_set_by_user=True
275 pattern=arg
276 pattern=pattern.replace('\\','')
277 elif opt in ("-o", "--outputfile"):
278 gnuplot_output_filename=arg
279 elif opt in ("-t", "--title"):
280 title=arg
281 elif opt in ("-g", "--gnuplot"):
282 run_gnuplot=True
283 elif opt in ("-G", "--Global"):
284 parse_global=True
285 global_search=arg
286 elif opt in ("-h", "--help"):
287 print_help()
288 sys.exit(1)
289
290 # Adding .global extension to the file
291 if parse_global==True:
292 if not gnuplot_output_filename.endswith('.global'):
293 pattern = pattern+'.global'
294
295 fio_data_file=find_file('.',pattern)
296 if len(fio_data_file) == 0:
297 print "No log file found with pattern %s!" % pattern
298 sys.exit(1)
299
300 fio_data_file=sorted(fio_data_file, key=str.lower)
301 for file in fio_data_file:
302 print 'Selected %s' % file
303 if "_bw.log" in file :
304 mode="Bandwidth (KB/sec)"
305 if "_iops.log" in file :
306 mode="IO per Seconds (IO/sec)"
307 if (title == 'No title') and (mode != 'unknown'):
308 if "Bandwidth" in mode:
309 title='Bandwidth benchmark with %d fio results' % len(fio_data_file)
310 if "IO" in mode:
311 title='IO benchmark with %d fio results' % len(fio_data_file)
312
313 #We need to adjust the output filename regarding the pattern required by the user
314 if (pattern_set_by_user == True):
315 gnuplot_output_filename=pattern
316 # As we do have some regexp in the pattern, let's make this simpliest
317 # We do remove the simpliest parts of the expression to get a clear file name
318 gnuplot_output_filename=gnuplot_output_filename.replace('-*-','-')
319 gnuplot_output_filename=gnuplot_output_filename.replace('*','-')
320 gnuplot_output_filename=gnuplot_output_filename.replace('--','-')
321 gnuplot_output_filename=gnuplot_output_filename.replace('.log','')
322 # Insure that we don't have any starting or trailing dash to the filename
323 gnuplot_output_filename = gnuplot_output_filename[:-1] if gnuplot_output_filename.endswith('-') else gnuplot_output_filename
324 gnuplot_output_filename = gnuplot_output_filename[1:] if gnuplot_output_filename.startswith('-') else gnuplot_output_filename
325
326 if parse_global==True:
327 parse_global_files(fio_data_file, global_search)
328 else:
329 blk_size=compute_temp_file(fio_data_file,disk_perf)
330 title="%s @ Blocksize = %dK" % (title,blk_size/1024)
331 compute_aggregated_file(fio_data_file, gnuplot_output_filename)
332 compute_math(fio_data_file,title,gnuplot_output_filename,mode,disk_perf)
333 generate_gnuplot_script(title,gnuplot_output_filename,mode)
334
335 if (run_gnuplot==True):
336 render_gnuplot()
337
338 # Cleaning temporary files
339 try:
340 os.remove('gnuplot_temp_file.*')
341 except:
342 True
343
344#Main
345if __name__ == "__main__":
346 sys.exit(main(sys.argv))