[PATCH] Change timeout to runtime and rearrange option entries
[fio.git] / init.c
1 /*
2  * This file contains job initialization and setup functions.
3  */
4 #include <stdio.h>
5 #include <stdlib.h>
6 #include <unistd.h>
7 #include <fcntl.h>
8 #include <ctype.h>
9 #include <string.h>
10 #include <errno.h>
11 #include <getopt.h>
12 #include <assert.h>
13 #include <sys/ipc.h>
14 #include <sys/shm.h>
15 #include <sys/types.h>
16 #include <sys/stat.h>
17
18 #include "fio.h"
19 #include "parse.h"
20
21 #define FIO_RANDSEED            (0xb1899bedUL)
22
23 #define td_var_offset(var)      ((size_t) &((struct thread_data *)0)->var)
24
25 static int str_rw_cb(void *, const char *);
26 static int str_ioengine_cb(void *, const char *);
27 static int str_mem_cb(void *, const char *);
28 static int str_verify_cb(void *, const char *);
29 static int str_lockmem_cb(void *, unsigned long *);
30 #ifdef FIO_HAVE_IOPRIO
31 static int str_prio_cb(void *, unsigned int *);
32 static int str_prioclass_cb(void *, unsigned int *);
33 #endif
34 static int str_exitall_cb(void);
35 static int str_cpumask_cb(void *, unsigned int *);
36
37 #define __stringify_1(x)        #x
38 #define __stringify(x)          __stringify_1(x)
39
40 /*
41  * Map of job/command line options
42  */
43 static struct fio_option options[] = {
44         {
45                 .name   = "name",
46                 .type   = FIO_OPT_STR_STORE,
47                 .off1   = td_var_offset(name),
48                 .help   = "Name of this job",
49         },
50         {
51                 .name   = "directory",
52                 .type   = FIO_OPT_STR_STORE,
53                 .off1   = td_var_offset(directory),
54                 .help   = "Directory to store files in",
55         },
56         {
57                 .name   = "filename",
58                 .type   = FIO_OPT_STR_STORE,
59                 .off1   = td_var_offset(filename),
60                 .help   = "Force the use of a specific file",
61         },
62         {
63                 .name   = "rw",
64                 .type   = FIO_OPT_STR,
65                 .cb     = str_rw_cb,
66                 .help   = "IO direction",
67                 .def    = "read",
68                 .posval = { "read", "write", "randwrite", "randread", "rw",
69                                 "randrw", },
70         },
71         {
72                 .name   = "ioengine",
73                 .type   = FIO_OPT_STR,
74                 .cb     = str_ioengine_cb,
75                 .help   = "IO engine to use",
76                 .def    = "sync",
77                 .posval = { "sync", "libaio", "posixaio", "mmap", "splice",
78                                 "sg", "null", },
79         },
80         {
81                 .name   = "iodepth",
82                 .type   = FIO_OPT_INT,
83                 .off1   = td_var_offset(iodepth),
84                 .help   = "Amount of IO buffers to keep in flight",
85                 .def    = "1",
86         },
87         {
88                 .name   = "size",
89                 .type   = FIO_OPT_STR_VAL,
90                 .off1   = td_var_offset(total_file_size),
91                 .help   = "Size of device or file",
92         },
93         {
94                 .name   = "bs",
95                 .type   = FIO_OPT_STR_VAL_INT,
96                 .off1   = td_var_offset(bs[DDIR_READ]),
97                 .off2   = td_var_offset(bs[DDIR_WRITE]),
98                 .help   = "Block size unit",
99                 .def    = "4k",
100         },
101         {
102                 .name   = "bsrange",
103                 .type   = FIO_OPT_RANGE,
104                 .off1   = td_var_offset(min_bs[DDIR_READ]),
105                 .off2   = td_var_offset(max_bs[DDIR_READ]),
106                 .off3   = td_var_offset(min_bs[DDIR_WRITE]),
107                 .off4   = td_var_offset(max_bs[DDIR_WRITE]),
108                 .help   = "Set block size range (in more detail than bs)",
109         },
110         {
111                 .name   = "bs_unaligned",
112                 .type   = FIO_OPT_STR_SET,
113                 .off1   = td_var_offset(bs_unaligned),
114                 .help   = "Don't sector align IO buffer sizes",
115         },
116         {
117                 .name   = "offset",
118                 .type   = FIO_OPT_STR_VAL,
119                 .off1   = td_var_offset(start_offset),
120                 .help   = "Start IO from this offset",
121                 .def    = "0",
122         },
123         {
124                 .name   = "randrepeat",
125                 .type   = FIO_OPT_BOOL,
126                 .off1   = td_var_offset(rand_repeatable),
127                 .help   = "Use repeatable random IO pattern",
128                 .def    = "1",
129         },
130         {
131                 .name   = "norandommap",
132                 .type   = FIO_OPT_STR_SET,
133                 .off1   = td_var_offset(norandommap),
134                 .help   = "Accept potential duplicate random blocks",
135         },
136         {
137                 .name   = "nrfiles",
138                 .type   = FIO_OPT_INT,
139                 .off1   = td_var_offset(nr_files),
140                 .help   = "Split job workload between this number of files",
141                 .def    = "1",
142         },
143         {
144                 .name   = "fsync",
145                 .type   = FIO_OPT_INT,
146                 .off1   = td_var_offset(fsync_blocks),
147                 .help   = "Issue fsync for writes every given number of blocks",
148                 .def    = "0",
149         },
150         {
151                 .name   = "direct",
152                 .type   = FIO_OPT_BOOL,
153                 .off1   = td_var_offset(odirect),
154                 .help   = "Use O_DIRECT IO",
155                 .def    = "1",
156         },
157         {
158                 .name   = "overwrite",
159                 .type   = FIO_OPT_BOOL,
160                 .off1   = td_var_offset(overwrite),
161                 .help   = "When writing, set whether to overwrite current data",
162                 .def    = "0",
163         },
164         {
165                 .name   = "loops",
166                 .type   = FIO_OPT_INT,
167                 .off1   = td_var_offset(loops),
168                 .help   = "Number of times to run the job",
169                 .def    = "1",
170         },
171         {
172                 .name   = "numjobs",
173                 .type   = FIO_OPT_INT,
174                 .off1   = td_var_offset(numjobs),
175                 .help   = "Duplicate this job this many times",
176                 .def    = "1",
177         },
178         {
179                 .name   = "startdelay",
180                 .type   = FIO_OPT_INT,
181                 .off1   = td_var_offset(start_delay),
182                 .help   = "Only start job when this period has passed",
183                 .def    = "0",
184         },
185         {
186                 .name   = "runtime",
187                 .alias  = "timeout",
188                 .type   = FIO_OPT_STR_VAL_TIME,
189                 .off1   = td_var_offset(timeout),
190                 .help   = "Stop workload when this amount of time has passed",
191                 .def    = "0",
192         },
193         {
194                 .name   = "mem",
195                 .type   = FIO_OPT_STR,
196                 .cb     = str_mem_cb,
197                 .help   = "Backing type for IO buffers",
198                 .def    = "malloc",
199                 .posval =  { "malloc", "shm", "shmhuge", "mmap", "mmaphuge", },
200         },
201         {
202                 .name   = "verify",
203                 .type   = FIO_OPT_STR,
204                 .cb     = str_verify_cb,
205                 .help   = "Verify sum function",
206                 .def    = "0",
207                 .posval = { "crc32", "md5", },
208         },
209         {
210                 .name   = "write_iolog",
211                 .type   = FIO_OPT_STR_STORE,
212                 .off1   = td_var_offset(write_iolog_file),
213                 .help   = "Store IO pattern to file",
214         },
215         {
216                 .name   = "read_iolog",
217                 .type   = FIO_OPT_STR_STORE,
218                 .off1   = td_var_offset(read_iolog_file),
219                 .help   = "Playback IO pattern from file",
220         },
221         {
222                 .name   = "exec_prerun",
223                 .type   = FIO_OPT_STR_STORE,
224                 .off1   = td_var_offset(exec_prerun),
225                 .help   = "Execute this file prior to running job",
226         },
227         {
228                 .name   = "exec_postrun",
229                 .type   = FIO_OPT_STR_STORE,
230                 .off1   = td_var_offset(exec_postrun),
231                 .help   = "Execute this file after running job",
232         },
233 #ifdef FIO_HAVE_IOSCHED_SWITCH
234         {
235                 .name   = "ioscheduler",
236                 .type   = FIO_OPT_STR_STORE,
237                 .off1   = td_var_offset(ioscheduler),
238                 .help   = "Use this IO scheduler on the backing device",
239         },
240 #endif
241         {
242                 .name   = "zonesize",
243                 .type   = FIO_OPT_STR_VAL,
244                 .off1   = td_var_offset(zone_size),
245                 .help   = "Give size of an IO zone",
246                 .def    = "0",
247         },
248         {
249                 .name   = "zoneskip",
250                 .type   = FIO_OPT_STR_VAL,
251                 .off1   = td_var_offset(zone_skip),
252                 .help   = "Space between IO zones",
253                 .def    = "0",
254         },
255         {
256                 .name   = "lockmem",
257                 .type   = FIO_OPT_STR_VAL,
258                 .cb     = str_lockmem_cb,
259                 .help   = "Lock down this amount of memory",
260                 .def    = "0",
261         },
262         {
263                 .name   = "rwmixcycle",
264                 .type   = FIO_OPT_INT,
265                 .off1   = td_var_offset(rwmixcycle),
266                 .help   = "Cycle period for mixed read/write workloads (msec)",
267                 .def    = "500",
268         },
269         {
270                 .name   = "rwmixread",
271                 .type   = FIO_OPT_INT,
272                 .off1   = td_var_offset(rwmixread),
273                 .maxval = 100,
274                 .help   = "Percentage of mixed workload that is reads",
275                 .def    = "50",
276         },
277         {
278                 .name   = "rwmixwrite",
279                 .type   = FIO_OPT_INT,
280                 .off1   = td_var_offset(rwmixwrite),
281                 .maxval = 100,
282                 .help   = "Percentage of mixed workload that is writes",
283                 .def    = "50",
284         },
285         {
286                 .name   = "nice",
287                 .type   = FIO_OPT_INT,
288                 .off1   = td_var_offset(nice),
289                 .help   = "Set job CPU nice value",
290                 .minval = -19,
291                 .maxval = 20,
292                 .def    = "0",
293         },
294 #ifdef FIO_HAVE_IOPRIO
295         {
296                 .name   = "prio",
297                 .type   = FIO_OPT_INT,
298                 .cb     = str_prio_cb,
299                 .help   = "Set job IO priority value",
300                 .minval = 0,
301                 .maxval = 7,
302         },
303         {
304                 .name   = "prioclass",
305                 .type   = FIO_OPT_INT,
306                 .cb     = str_prioclass_cb,
307                 .help   = "Set job IO priority class",
308                 .minval = 0,
309                 .maxval = 3,
310         },
311 #endif
312         {
313                 .name   = "thinktime",
314                 .type   = FIO_OPT_INT,
315                 .off1   = td_var_offset(thinktime),
316                 .help   = "Idle time between IO buffers",
317                 .def    = "0",
318         },
319         {
320                 .name   = "thinktime_blocks",
321                 .type   = FIO_OPT_INT,
322                 .off1   = td_var_offset(thinktime_blocks),
323                 .help   = "IO buffer period between 'thinktime'",
324                 .def    = "1",
325         },
326         {
327                 .name   = "rate",
328                 .type   = FIO_OPT_INT,
329                 .off1   = td_var_offset(rate),
330                 .help   = "Set bandwidth rate",
331         },
332         {
333                 .name   = "ratemin",
334                 .type   = FIO_OPT_INT,
335                 .off1   = td_var_offset(ratemin),
336                 .help   = "The bottom limit accepted",
337         },
338         {
339                 .name   = "ratecycle",
340                 .type   = FIO_OPT_INT,
341                 .off1   = td_var_offset(ratecycle),
342                 .help   = "Window average for rate limits (msec)",
343                 .def    = "1000",
344         },
345         {
346                 .name   = "invalidate",
347                 .type   = FIO_OPT_BOOL,
348                 .off1   = td_var_offset(invalidate_cache),
349                 .help   = "Invalidate buffer/page cache prior to running job",
350                 .def    = "1",
351         },
352         {
353                 .name   = "sync",
354                 .type   = FIO_OPT_BOOL,
355                 .off1   = td_var_offset(sync_io),
356                 .help   = "Use O_SYNC for buffered writes",
357                 .def    = "0",
358         },
359         {
360                 .name   = "bwavgtime",
361                 .type   = FIO_OPT_INT,
362                 .off1   = td_var_offset(bw_avg_time),
363                 .help   = "Time window over which to calculate bandwidth (msec)",
364                 .def    = "500",
365         },
366         {
367                 .name   = "create_serialize",
368                 .type   = FIO_OPT_BOOL,
369                 .off1   = td_var_offset(create_serialize),
370                 .help   = "Serialize creating of job files",
371                 .def    = "1",
372         },
373         {
374                 .name   = "create_fsync",
375                 .type   = FIO_OPT_BOOL,
376                 .off1   = td_var_offset(create_fsync),
377                 .help   = "Fsync file after creation",
378                 .def    = "1",
379         },
380         {
381                 .name   = "cpuload",
382                 .type   = FIO_OPT_INT,
383                 .off1   = td_var_offset(cpuload),
384                 .help   = "Use this percentage of CPU",
385         },
386         {
387                 .name   = "cpuchunks",
388                 .type   = FIO_OPT_INT,
389                 .off1   = td_var_offset(cpucycle),
390                 .help   = "Length of the CPU burn cycles",
391         },
392 #ifdef FIO_HAVE_CPU_AFFINITY
393         {
394                 .name   = "cpumask",
395                 .type   = FIO_OPT_INT,
396                 .cb     = str_cpumask_cb,
397                 .help   = "CPU affinity mask",
398         },
399 #endif
400         {
401                 .name   = "end_fsync",
402                 .type   = FIO_OPT_BOOL,
403                 .off1   = td_var_offset(end_fsync),
404                 .help   = "Include fsync at the end of job",
405                 .def    = "0",
406         },
407         {
408                 .name   = "unlink",
409                 .type   = FIO_OPT_BOOL,
410                 .off1   = td_var_offset(unlink),
411                 .help   = "Unlink created files after job has completed",
412                 .def    = "1",
413         },
414         {
415                 .name   = "exitall",
416                 .type   = FIO_OPT_STR_SET,
417                 .cb     = str_exitall_cb,
418                 .help   = "Terminate all jobs when one exits",
419         },
420         {
421                 .name   = "stonewall",
422                 .type   = FIO_OPT_STR_SET,
423                 .off1   = td_var_offset(stonewall),
424                 .help   = "Insert a hard barrier between this job and previous",
425         },
426         {
427                 .name   = "thread",
428                 .type   = FIO_OPT_STR_SET,
429                 .off1   = td_var_offset(use_thread),
430                 .help   = "Use threads instead of forks",
431         },
432         {
433                 .name   = "write_bw_log",
434                 .type   = FIO_OPT_STR_SET,
435                 .off1   = td_var_offset(write_bw_log),
436                 .help   = "Write log of bandwidth during run",
437         },
438         {
439                 .name   = "write_lat_log",
440                 .type   = FIO_OPT_STR_SET,
441                 .off1   = td_var_offset(write_lat_log),
442                 .help   = "Write log of latency during run",
443         },
444         {
445                 .name   = "hugepage-size",
446                 .type   = FIO_OPT_STR_VAL,
447                 .off1   = td_var_offset(hugepage_size),
448                 .help   = "When using hugepages, specify size of each page",
449                 .def    = __stringify(FIO_HUGE_PAGE),
450         },
451         {
452                 .name = NULL,
453         },
454 };
455
456 #define FIO_JOB_OPTS    (sizeof(options) / sizeof(struct fio_option))
457 #define FIO_CMD_OPTS    (16)
458 #define FIO_GETOPT_JOB  (0x89988998)
459
460 /*
461  * Command line options. These will contain the above, plus a few
462  * extra that only pertain to fio itself and not jobs.
463  */
464 static struct option long_options[FIO_JOB_OPTS + FIO_CMD_OPTS] = {
465         {
466                 .name           = "output",
467                 .has_arg        = required_argument,
468                 .val            = 'o',
469         },
470         {
471                 .name           = "timeout",
472                 .has_arg        = required_argument,
473                 .val            = 't',
474         },
475         {
476                 .name           = "latency-log",
477                 .has_arg        = required_argument,
478                 .val            = 'l',
479         },
480         {
481                 .name           = "bandwidth-log",
482                 .has_arg        = required_argument,
483                 .val            = 'b',
484         },
485         {
486                 .name           = "minimal",
487                 .has_arg        = optional_argument,
488                 .val            = 'm',
489         },
490         {
491                 .name           = "version",
492                 .has_arg        = no_argument,
493                 .val            = 'v',
494         },
495         {
496                 .name           = "help",
497                 .has_arg        = no_argument,
498                 .val            = 'h',
499         },
500         {
501                 .name           = "cmdhelp",
502                 .has_arg        = required_argument,
503                 .val            = 'c',
504         },
505         {
506                 .name           = NULL,
507         },
508 };
509
510 static int def_timeout = 0;
511
512 static char fio_version_string[] = "fio 1.11";
513
514 static char **ini_file;
515 static int max_jobs = MAX_JOBS;
516
517 struct thread_data def_thread;
518 struct thread_data *threads = NULL;
519
520 int exitall_on_terminate = 0;
521 int terse_output = 0;
522 unsigned long long mlock_size = 0;
523 FILE *f_out = NULL;
524 FILE *f_err = NULL;
525
526 static int write_lat_log = 0;
527 static int write_bw_log = 0;
528
529 /*
530  * Return a free job structure.
531  */
532 static struct thread_data *get_new_job(int global, struct thread_data *parent)
533 {
534         struct thread_data *td;
535
536         if (global)
537                 return &def_thread;
538         if (thread_number >= max_jobs)
539                 return NULL;
540
541         td = &threads[thread_number++];
542         *td = *parent;
543
544         td->thread_number = thread_number;
545         return td;
546 }
547
548 static void put_job(struct thread_data *td)
549 {
550         if (td == &def_thread)
551                 return;
552
553         memset(&threads[td->thread_number - 1], 0, sizeof(*td));
554         thread_number--;
555 }
556
557 /*
558  * Lazy way of fixing up options that depend on each other. We could also
559  * define option callback handlers, but this is easier.
560  */
561 static void fixup_options(struct thread_data *td)
562 {
563         if (!td->rwmixread && td->rwmixwrite)
564                 td->rwmixread = 100 - td->rwmixwrite;
565
566         if (td->write_iolog_file && td->read_iolog_file) {
567                 log_err("fio: read iolog overrides write_iolog\n");
568                 free(td->write_iolog_file);
569                 td->write_iolog_file = NULL;
570         }
571
572         if (td->io_ops->flags & FIO_SYNCIO)
573                 td->iodepth = 1;
574         else {
575                 if (!td->iodepth)
576                         td->iodepth = td->nr_files;
577         }
578
579         /*
580          * only really works for sequential io for now, and with 1 file
581          */
582         if (td->zone_size && !td->sequential && td->nr_files == 1)
583                 td->zone_size = 0;
584
585         /*
586          * Reads can do overwrites, we always need to pre-create the file
587          */
588         if (td_read(td) || td_rw(td))
589                 td->overwrite = 1;
590
591         if (!td->min_bs[DDIR_READ])
592                 td->min_bs[DDIR_READ]= td->bs[DDIR_READ];
593         if (!td->max_bs[DDIR_READ])
594                 td->max_bs[DDIR_READ] = td->bs[DDIR_READ];
595         if (!td->min_bs[DDIR_WRITE])
596                 td->min_bs[DDIR_WRITE]= td->bs[DDIR_WRITE];
597         if (!td->max_bs[DDIR_WRITE])
598                 td->max_bs[DDIR_WRITE] = td->bs[DDIR_WRITE];
599
600         td->rw_min_bs = min(td->min_bs[DDIR_READ], td->min_bs[DDIR_WRITE]);
601
602         if (td_read(td) && !td_rw(td))
603                 td->verify = 0;
604
605         if (td->norandommap && td->verify != VERIFY_NONE) {
606                 log_err("fio: norandommap given, verify disabled\n");
607                 td->verify = VERIFY_NONE;
608         }
609         if (td->bs_unaligned && (td->odirect || td->io_ops->flags & FIO_RAWIO))
610                 log_err("fio: bs_unaligned may not work with raw io\n");
611
612         /*
613          * O_DIRECT and char doesn't mix, clear that flag if necessary.
614          */
615         if (td->filetype == FIO_TYPE_CHAR && td->odirect)
616                 td->odirect = 0;
617 }
618
619 /*
620  * This function leaks the buffer
621  */
622 static char *to_kmg(unsigned int val)
623 {
624         char *buf = malloc(32);
625         char post[] = { 0, 'K', 'M', 'G', 'P', 0 };
626         char *p = post;
627
628         do {
629                 if (val & 1023)
630                         break;
631
632                 val >>= 10;
633                 p++;
634         } while (*p);
635
636         snprintf(buf, 31, "%u%c", val, *p);
637         return buf;
638 }
639
640 /*
641  * Adds a job to the list of things todo. Sanitizes the various options
642  * to make sure we don't have conflicts, and initializes various
643  * members of td.
644  */
645 static int add_job(struct thread_data *td, const char *jobname, int job_add_num)
646 {
647         const char *ddir_str[] = { "read", "write", "randread", "randwrite",
648                                    "rw", NULL, "randrw" };
649         struct stat sb;
650         int numjobs, ddir, i;
651         struct fio_file *f;
652
653         /*
654          * the def_thread is just for options, it's not a real job
655          */
656         if (td == &def_thread)
657                 return 0;
658
659         assert(td->io_ops);
660
661         if (td->odirect)
662                 td->io_ops->flags |= FIO_RAWIO;
663
664         td->filetype = FIO_TYPE_FILE;
665         if (!stat(jobname, &sb)) {
666                 if (S_ISBLK(sb.st_mode))
667                         td->filetype = FIO_TYPE_BD;
668                 else if (S_ISCHR(sb.st_mode))
669                         td->filetype = FIO_TYPE_CHAR;
670         }
671
672         fixup_options(td);
673
674         if (td->filename)
675                 td->nr_uniq_files = 1;
676         else
677                 td->nr_uniq_files = td->nr_files;
678
679         if (td->filetype == FIO_TYPE_FILE || td->filename) {
680                 char tmp[PATH_MAX];
681                 int len = 0;
682
683                 if (td->directory && td->directory[0] != '\0')
684                         len = sprintf(tmp, "%s/", td->directory);
685
686                 td->files = malloc(sizeof(struct fio_file) * td->nr_files);
687
688                 for_each_file(td, f, i) {
689                         memset(f, 0, sizeof(*f));
690                         f->fd = -1;
691
692                         if (td->filename)
693                                 sprintf(tmp + len, "%s", td->filename);
694                         else
695                                 sprintf(tmp + len, "%s.%d.%d", jobname, td->thread_number, i);
696                         f->file_name = strdup(tmp);
697                 }
698         } else {
699                 td->nr_files = 1;
700                 td->files = malloc(sizeof(struct fio_file));
701                 f = &td->files[0];
702
703                 memset(f, 0, sizeof(*f));
704                 f->fd = -1;
705                 f->file_name = strdup(jobname);
706         }
707
708         for_each_file(td, f, i) {
709                 f->file_size = td->total_file_size / td->nr_files;
710                 f->file_offset = td->start_offset;
711         }
712                 
713         fio_sem_init(&td->mutex, 0);
714
715         td->clat_stat[0].min_val = td->clat_stat[1].min_val = ULONG_MAX;
716         td->slat_stat[0].min_val = td->slat_stat[1].min_val = ULONG_MAX;
717         td->bw_stat[0].min_val = td->bw_stat[1].min_val = ULONG_MAX;
718
719         if (td->stonewall && td->thread_number > 1)
720                 groupid++;
721
722         td->groupid = groupid;
723
724         if (setup_rate(td))
725                 goto err;
726
727         if (td->write_lat_log) {
728                 setup_log(&td->slat_log);
729                 setup_log(&td->clat_log);
730         }
731         if (td->write_bw_log)
732                 setup_log(&td->bw_log);
733
734         if (!td->name)
735                 td->name = strdup(jobname);
736
737         ddir = td->ddir + (!td->sequential << 1) + (td->iomix << 2);
738
739         if (!terse_output) {
740                 if (!job_add_num) {
741                         if (td->io_ops->flags & FIO_CPUIO)
742                                 fprintf(f_out, "%s: ioengine=cpu, cpuload=%u, cpucycle=%u\n", td->name, td->cpuload, td->cpucycle);
743                         else {
744                                 char *c1, *c2, *c3, *c4;
745
746                                 c1 = to_kmg(td->min_bs[DDIR_READ]);
747                                 c2 = to_kmg(td->max_bs[DDIR_READ]);
748                                 c3 = to_kmg(td->min_bs[DDIR_WRITE]);
749                                 c4 = to_kmg(td->max_bs[DDIR_WRITE]);
750
751                                 fprintf(f_out, "%s: (g=%d): rw=%s, odir=%u, bs=%s-%s/%s-%s, rate=%u, ioengine=%s, iodepth=%u\n", td->name, td->groupid, ddir_str[ddir], td->odirect, c1, c2, c3, c4, td->rate, td->io_ops->name, td->iodepth);
752
753                                 free(c1);
754                                 free(c2);
755                                 free(c3);
756                                 free(c4);
757                         }
758                 } else if (job_add_num == 1)
759                         fprintf(f_out, "...\n");
760         }
761
762         /*
763          * recurse add identical jobs, clear numjobs and stonewall options
764          * as they don't apply to sub-jobs
765          */
766         numjobs = td->numjobs;
767         while (--numjobs) {
768                 struct thread_data *td_new = get_new_job(0, td);
769
770                 if (!td_new)
771                         goto err;
772
773                 td_new->numjobs = 1;
774                 td_new->stonewall = 0;
775                 job_add_num = numjobs - 1;
776
777                 if (add_job(td_new, jobname, job_add_num))
778                         goto err;
779         }
780         return 0;
781 err:
782         put_job(td);
783         return -1;
784 }
785
786 /*
787  * Initialize the various random states we need (random io, block size ranges,
788  * read/write mix, etc).
789  */
790 int init_random_state(struct thread_data *td)
791 {
792         unsigned long seeds[4];
793         int fd, num_maps, blocks, i;
794         struct fio_file *f;
795
796         if (td->io_ops->flags & FIO_CPUIO)
797                 return 0;
798
799         fd = open("/dev/urandom", O_RDONLY);
800         if (fd == -1) {
801                 td_verror(td, errno);
802                 return 1;
803         }
804
805         if (read(fd, seeds, sizeof(seeds)) < (int) sizeof(seeds)) {
806                 td_verror(td, EIO);
807                 close(fd);
808                 return 1;
809         }
810
811         close(fd);
812
813         os_random_seed(seeds[0], &td->bsrange_state);
814         os_random_seed(seeds[1], &td->verify_state);
815         os_random_seed(seeds[2], &td->rwmix_state);
816
817         if (td->sequential)
818                 return 0;
819
820         if (td->rand_repeatable)
821                 seeds[3] = FIO_RANDSEED;
822
823         if (!td->norandommap) {
824                 for_each_file(td, f, i) {
825                         blocks = (f->file_size + td->rw_min_bs - 1) / td->rw_min_bs;
826                         num_maps = (blocks + BLOCKS_PER_MAP-1)/ BLOCKS_PER_MAP;
827                         f->file_map = malloc(num_maps * sizeof(long));
828                         f->num_maps = num_maps;
829                         memset(f->file_map, 0, num_maps * sizeof(long));
830                 }
831         }
832
833         os_random_seed(seeds[3], &td->random_state);
834         return 0;
835 }
836
837 static void fill_cpu_mask(os_cpu_mask_t cpumask, int cpu)
838 {
839 #ifdef FIO_HAVE_CPU_AFFINITY
840         unsigned int i;
841
842         CPU_ZERO(&cpumask);
843
844         for (i = 0; i < sizeof(int) * 8; i++) {
845                 if ((1 << i) & cpu)
846                         CPU_SET(i, &cpumask);
847         }
848 #endif
849 }
850
851 static int is_empty_or_comment(char *line)
852 {
853         unsigned int i;
854
855         for (i = 0; i < strlen(line); i++) {
856                 if (line[i] == ';')
857                         return 1;
858                 if (!isspace(line[i]) && !iscntrl(line[i]))
859                         return 0;
860         }
861
862         return 1;
863 }
864
865 static int str_rw_cb(void *data, const char *mem)
866 {
867         struct thread_data *td = data;
868
869         if (!strncmp(mem, "read", 4) || !strncmp(mem, "0", 1)) {
870                 td->ddir = DDIR_READ;
871                 td->sequential = 1;
872                 return 0;
873         } else if (!strncmp(mem, "randread", 8)) {
874                 td->ddir = DDIR_READ;
875                 td->sequential = 0;
876                 return 0;
877         } else if (!strncmp(mem, "write", 5) || !strncmp(mem, "1", 1)) {
878                 td->ddir = DDIR_WRITE;
879                 td->sequential = 1;
880                 return 0;
881         } else if (!strncmp(mem, "randwrite", 9)) {
882                 td->ddir = DDIR_WRITE;
883                 td->sequential = 0;
884                 return 0;
885         } else if (!strncmp(mem, "rw", 2)) {
886                 td->ddir = DDIR_READ;
887                 td->iomix = 1;
888                 td->sequential = 1;
889                 return 0;
890         } else if (!strncmp(mem, "randrw", 6)) {
891                 td->ddir = DDIR_READ;
892                 td->iomix = 1;
893                 td->sequential = 0;
894                 return 0;
895         }
896
897         log_err("fio: data direction: read, write, randread, randwrite, rw, randrw\n");
898         return 1;
899 }
900
901 static int str_verify_cb(void *data, const char *mem)
902 {
903         struct thread_data *td = data;
904
905         if (!strncmp(mem, "0", 1)) {
906                 td->verify = VERIFY_NONE;
907                 return 0;
908         } else if (!strncmp(mem, "md5", 3) || !strncmp(mem, "1", 1)) {
909                 td->verify = VERIFY_MD5;
910                 return 0;
911         } else if (!strncmp(mem, "crc32", 5)) {
912                 td->verify = VERIFY_CRC32;
913                 return 0;
914         }
915
916         log_err("fio: verify types: md5, crc32\n");
917         return 1;
918 }
919
920 /*
921  * Check if mmap/mmaphuge has a :/foo/bar/file at the end. If so, return that.
922  */
923 static char *get_mmap_file(const char *str)
924 {
925         char *p = strstr(str, ":");
926
927         if (!p)
928                 return NULL;
929
930         p++;
931         strip_blank_front(&p);
932         strip_blank_end(p);
933         return strdup(p);
934 }
935
936 static int str_mem_cb(void *data, const char *mem)
937 {
938         struct thread_data *td = data;
939
940         if (!strncmp(mem, "malloc", 6)) {
941                 td->mem_type = MEM_MALLOC;
942                 return 0;
943         } else if (!strncmp(mem, "mmaphuge", 8)) {
944 #ifdef FIO_HAVE_HUGETLB
945                 /*
946                  * mmaphuge must be appended with the actual file
947                  */
948                 td->mmapfile = get_mmap_file(mem);
949                 if (!td->mmapfile) {
950                         log_err("fio: mmaphuge:/path/to/file\n");
951                         return 1;
952                 }
953
954                 td->mem_type = MEM_MMAPHUGE;
955                 return 0;
956 #else
957                 log_err("fio: mmaphuge not available\n");
958                 return 1;
959 #endif
960         } else if (!strncmp(mem, "mmap", 4)) {
961                 /*
962                  * Check if the user wants file backed memory. It's ok
963                  * if there's no file given, we'll just use anon mamp then.
964                  */
965                 td->mmapfile = get_mmap_file(mem);
966                 td->mem_type = MEM_MMAP;
967                 return 0;
968         } else if (!strncmp(mem, "shmhuge", 7)) {
969 #ifdef FIO_HAVE_HUGETLB
970                 td->mem_type = MEM_SHMHUGE;
971                 return 0;
972 #else
973                 log_err("fio: shmhuge not available\n");
974                 return 1;
975 #endif
976         } else if (!strncmp(mem, "shm", 3)) {
977                 td->mem_type = MEM_SHM;
978                 return 0;
979         }
980
981         log_err("fio: mem type: malloc, shm, shmhuge, mmap, mmaphuge\n");
982         return 1;
983 }
984
985 static int str_ioengine_cb(void *data, const char *str)
986 {
987         struct thread_data *td = data;
988
989         td->io_ops = load_ioengine(td, str);
990         if (td->io_ops)
991                 return 0;
992
993         log_err("fio: ioengine= libaio, posixaio, sync, mmap, sgio, splice, cpu, null\n");
994         log_err("fio: or specify path to dynamic ioengine module\n");
995         return 1;
996 }
997
998 static int str_lockmem_cb(void fio_unused *data, unsigned long *val)
999 {
1000         mlock_size = *val;
1001         return 0;
1002 }
1003
1004 #ifdef FIO_HAVE_IOPRIO
1005 static int str_prioclass_cb(void *data, unsigned int *val)
1006 {
1007         struct thread_data *td = data;
1008
1009         td->ioprio |= *val << IOPRIO_CLASS_SHIFT;
1010         return 0;
1011 }
1012
1013 static int str_prio_cb(void *data, unsigned int *val)
1014 {
1015         struct thread_data *td = data;
1016
1017         td->ioprio |= *val;
1018         return 0;
1019 }
1020 #endif
1021
1022 static int str_exitall_cb(void)
1023 {
1024         exitall_on_terminate = 1;
1025         return 0;
1026 }
1027
1028 static int str_cpumask_cb(void *data, unsigned int *val)
1029 {
1030         struct thread_data *td = data;
1031
1032         fill_cpu_mask(td->cpumask, *val);
1033         return 0;
1034 }
1035
1036 /*
1037  * This is our [ini] type file parser.
1038  */
1039 static int parse_jobs_ini(char *file, int stonewall_flag)
1040 {
1041         unsigned int global;
1042         struct thread_data *td;
1043         char *string, *name;
1044         fpos_t off;
1045         FILE *f;
1046         char *p;
1047         int ret = 0, stonewall;
1048
1049         f = fopen(file, "r");
1050         if (!f) {
1051                 perror("fopen job file");
1052                 return 1;
1053         }
1054
1055         string = malloc(4096);
1056         name = malloc(256);
1057         memset(name, 0, 256);
1058
1059         stonewall = stonewall_flag;
1060         do {
1061                 p = fgets(string, 4095, f);
1062                 if (!p)
1063                         break;
1064                 if (is_empty_or_comment(p))
1065                         continue;
1066                 if (sscanf(p, "[%255s]", name) != 1)
1067                         continue;
1068
1069                 global = !strncmp(name, "global", 6);
1070
1071                 name[strlen(name) - 1] = '\0';
1072
1073                 td = get_new_job(global, &def_thread);
1074                 if (!td) {
1075                         ret = 1;
1076                         break;
1077                 }
1078
1079                 /*
1080                  * Seperate multiple job files by a stonewall
1081                  */
1082                 if (!global && stonewall) {
1083                         td->stonewall = stonewall;
1084                         stonewall = 0;
1085                 }
1086
1087                 fgetpos(f, &off);
1088                 while ((p = fgets(string, 4096, f)) != NULL) {
1089                         if (is_empty_or_comment(p))
1090                                 continue;
1091
1092                         strip_blank_front(&p);
1093
1094                         if (p[0] == '[')
1095                                 break;
1096
1097                         strip_blank_end(p);
1098
1099                         fgetpos(f, &off);
1100
1101                         /*
1102                          * Don't break here, continue parsing options so we
1103                          * dump all the bad ones. Makes trial/error fixups
1104                          * easier on the user.
1105                          */
1106                         ret |= parse_option(p, options, td);
1107                 }
1108
1109                 if (!ret) {
1110                         fsetpos(f, &off);
1111                         ret = add_job(td, name, 0);
1112                 } else {
1113                         log_err("fio: job %s dropped\n", name);
1114                         put_job(td);
1115                 }
1116         } while (!ret);
1117
1118         free(string);
1119         free(name);
1120         fclose(f);
1121         return ret;
1122 }
1123
1124 static int fill_def_thread(void)
1125 {
1126         memset(&def_thread, 0, sizeof(def_thread));
1127
1128         if (fio_getaffinity(getpid(), &def_thread.cpumask) == -1) {
1129                 perror("sched_getaffinity");
1130                 return 1;
1131         }
1132
1133         /*
1134          * fill default options
1135          */
1136         fill_default_options(&def_thread, options);
1137
1138         def_thread.timeout = def_timeout;
1139         def_thread.write_bw_log = write_bw_log;
1140         def_thread.write_lat_log = write_lat_log;
1141
1142 #ifdef FIO_HAVE_DISK_UTIL
1143         def_thread.do_disk_util = 1;
1144 #endif
1145
1146         return 0;
1147 }
1148
1149 static void usage(void)
1150 {
1151         printf("%s\n", fio_version_string);
1152         printf("\t--output\tWrite output to file\n");
1153         printf("\t--timeout\tRuntime in seconds\n");
1154         printf("\t--latency-log\tGenerate per-job latency logs\n");
1155         printf("\t--bandwidth-log\tGenerate per-job bandwidth logs\n");
1156         printf("\t--minimal\tMinimal (terse) output\n");
1157         printf("\t--version\tPrint version info and exit\n");
1158         printf("\t--help\t\tPrint this page\n");
1159         printf("\t--cmdhelp=cmd\tPrint command help, \"all\" for all of them\n");
1160 }
1161
1162 static int parse_cmd_line(int argc, char *argv[])
1163 {
1164         struct thread_data *td = NULL;
1165         int c, ini_idx = 0, lidx, ret;
1166
1167         while ((c = getopt_long(argc, argv, "", long_options, &lidx)) != -1) {
1168                 switch (c) {
1169                 case 't':
1170                         def_timeout = atoi(optarg);
1171                         break;
1172                 case 'l':
1173                         write_lat_log = 1;
1174                         break;
1175                 case 'w':
1176                         write_bw_log = 1;
1177                         break;
1178                 case 'o':
1179                         f_out = fopen(optarg, "w+");
1180                         if (!f_out) {
1181                                 perror("fopen output");
1182                                 exit(1);
1183                         }
1184                         f_err = f_out;
1185                         break;
1186                 case 'm':
1187                         terse_output = 1;
1188                         break;
1189                 case 'h':
1190                         usage();
1191                         exit(0);
1192                 case 'c':
1193                         ret = show_cmd_help(options, optarg);
1194                         exit(ret);
1195                 case 'v':
1196                         printf("%s\n", fio_version_string);
1197                         exit(0);
1198                 case FIO_GETOPT_JOB: {
1199                         const char *opt = long_options[lidx].name;
1200                         char *val = optarg;
1201
1202                         if (!strncmp(opt, "name", 4) && td) {
1203                                 ret = add_job(td, td->name ?: "fio", 0);
1204                                 if (ret) {
1205                                         put_job(td);
1206                                         return 0;
1207                                 }
1208                                 td = NULL;
1209                         }
1210                         if (!td) {
1211                                 int global = !strncmp(val, "global", 6);
1212
1213                                 td = get_new_job(global, &def_thread);
1214                                 if (!td)
1215                                         return 0;
1216                         }
1217
1218                         ret = parse_cmd_option(opt, val, options, td);
1219                         if (ret) {
1220                                 log_err("fio: job dropped\n");
1221                                 put_job(td);
1222                                 td = NULL;
1223                         }
1224                         break;
1225                 }
1226                 default:
1227                         break;
1228                 }
1229         }
1230
1231         if (td) {
1232                 ret = add_job(td, td->name ?: "fio", 0);
1233                 if (ret)
1234                         put_job(td);
1235         }
1236
1237         while (optind < argc) {
1238                 ini_idx++;
1239                 ini_file = realloc(ini_file, ini_idx * sizeof(char *));
1240                 ini_file[ini_idx - 1] = strdup(argv[optind]);
1241                 optind++;
1242         }
1243
1244         return ini_idx;
1245 }
1246
1247 static void free_shm(void)
1248 {
1249         struct shmid_ds sbuf;
1250
1251         if (threads) {
1252                 shmdt((void *) threads);
1253                 threads = NULL;
1254                 shmctl(shm_id, IPC_RMID, &sbuf);
1255         }
1256 }
1257
1258 /*
1259  * The thread area is shared between the main process and the job
1260  * threads/processes. So setup a shared memory segment that will hold
1261  * all the job info.
1262  */
1263 static int setup_thread_area(void)
1264 {
1265         /*
1266          * 1024 is too much on some machines, scale max_jobs if
1267          * we get a failure that looks like too large a shm segment
1268          */
1269         do {
1270                 size_t size = max_jobs * sizeof(struct thread_data);
1271
1272                 shm_id = shmget(0, size, IPC_CREAT | 0600);
1273                 if (shm_id != -1)
1274                         break;
1275                 if (errno != EINVAL) {
1276                         perror("shmget");
1277                         break;
1278                 }
1279
1280                 max_jobs >>= 1;
1281         } while (max_jobs);
1282
1283         if (shm_id == -1)
1284                 return 1;
1285
1286         threads = shmat(shm_id, NULL, 0);
1287         if (threads == (void *) -1) {
1288                 perror("shmat");
1289                 return 1;
1290         }
1291
1292         atexit(free_shm);
1293         return 0;
1294 }
1295
1296 /*
1297  * Copy the fio options into the long options map, so we mirror
1298  * job and cmd line options.
1299  */
1300 static void dupe_job_options(void)
1301 {
1302         struct fio_option *o;
1303         unsigned int i;
1304
1305         i = 0;
1306         while (long_options[i].name)
1307                 i++;
1308
1309         o = &options[0];
1310         while (o->name) {
1311                 long_options[i].name = o->name;
1312                 long_options[i].val = FIO_GETOPT_JOB;
1313                 if (o->type == FIO_OPT_STR_SET)
1314                         long_options[i].has_arg = no_argument;
1315                 else
1316                         long_options[i].has_arg = required_argument;
1317
1318                 i++;
1319                 o++;
1320                 assert(i < FIO_JOB_OPTS + FIO_CMD_OPTS);
1321         }
1322 }
1323
1324 int parse_options(int argc, char *argv[])
1325 {
1326         int job_files, i;
1327
1328         f_out = stdout;
1329         f_err = stderr;
1330
1331         options_init(options);
1332
1333         dupe_job_options();
1334
1335         if (setup_thread_area())
1336                 return 1;
1337         if (fill_def_thread())
1338                 return 1;
1339
1340         job_files = parse_cmd_line(argc, argv);
1341
1342         for (i = 0; i < job_files; i++) {
1343                 if (fill_def_thread())
1344                         return 1;
1345                 if (parse_jobs_ini(ini_file[i], i))
1346                         return 1;
1347                 free(ini_file[i]);
1348         }
1349
1350         free(ini_file);
1351
1352         if (!thread_number) {
1353                 log_err("No jobs defined(s)\n");
1354                 return 1;
1355         }
1356
1357         return 0;
1358 }