01e615f92e90e1f37864e591279f037c8ddba55c
[fio.git] / init.c
1 /*
2  * This file contains job initialization and setup functions.
3  */
4 #include <stdio.h>
5 #include <stdlib.h>
6 #include <unistd.h>
7 #include <fcntl.h>
8 #include <ctype.h>
9 #include <string.h>
10 #include <errno.h>
11 #include <getopt.h>
12 #include <assert.h>
13 #include <sys/ipc.h>
14 #include <sys/shm.h>
15 #include <sys/types.h>
16 #include <sys/stat.h>
17
18 #include "fio.h"
19 #include "parse.h"
20
21 #define FIO_RANDSEED            (0xb1899bedUL)
22
23 #define td_var_offset(var)      ((size_t) &((struct thread_data *)0)->var)
24
25 static int str_rw_cb(void *, const char *);
26 static int str_ioengine_cb(void *, const char *);
27 static int str_mem_cb(void *, const char *);
28 static int str_verify_cb(void *, const char *);
29 static int str_lockmem_cb(void *, unsigned long *);
30 #ifdef FIO_HAVE_IOPRIO
31 static int str_prio_cb(void *, unsigned int *);
32 static int str_prioclass_cb(void *, unsigned int *);
33 #endif
34 static int str_exitall_cb(void);
35 static int str_cpumask_cb(void *, unsigned int *);
36 static int str_file_service_cb(void *, const char *);
37
38 #define __stringify_1(x)        #x
39 #define __stringify(x)          __stringify_1(x)
40
41 /*
42  * Map of job/command line options
43  */
44 static struct fio_option options[] = {
45         {
46                 .name   = "description",
47                 .type   = FIO_OPT_STR_STORE,
48                 .off1   = td_var_offset(description),
49                 .help   = "Text job description",
50         },
51         {
52                 .name   = "name",
53                 .type   = FIO_OPT_STR_STORE,
54                 .off1   = td_var_offset(name),
55                 .help   = "Name of this job",
56         },
57         {
58                 .name   = "directory",
59                 .type   = FIO_OPT_STR_STORE,
60                 .off1   = td_var_offset(directory),
61                 .help   = "Directory to store files in",
62         },
63         {
64                 .name   = "filename",
65                 .type   = FIO_OPT_STR_STORE,
66                 .off1   = td_var_offset(filename),
67                 .help   = "Force the use of a specific file",
68         },
69         {
70                 .name   = "rw",
71                 .type   = FIO_OPT_STR,
72                 .cb     = str_rw_cb,
73                 .help   = "IO direction",
74                 .def    = "read",
75                 .posval = { "read", "write", "randwrite", "randread", "rw",
76                                 "randrw", },
77         },
78         {
79                 .name   = "ioengine",
80                 .type   = FIO_OPT_STR,
81                 .cb     = str_ioengine_cb,
82                 .help   = "IO engine to use",
83                 .def    = "sync",
84                 .posval = { "sync", "libaio", "posixaio", "mmap", "splice",
85                                 "sg", "null", "net", "syslet-rw" },
86         },
87         {
88                 .name   = "iodepth",
89                 .type   = FIO_OPT_INT,
90                 .off1   = td_var_offset(iodepth),
91                 .help   = "Amount of IO buffers to keep in flight",
92                 .def    = "1",
93         },
94         {
95                 .name   = "iodepth_low",
96                 .type   = FIO_OPT_INT,
97                 .off1   = td_var_offset(iodepth_low),
98                 .help   = "Low water mark for queuing depth",
99         },
100         {
101                 .name   = "size",
102                 .type   = FIO_OPT_STR_VAL,
103                 .off1   = td_var_offset(total_file_size),
104                 .help   = "Size of device or file",
105         },
106         {
107                 .name   = "bs",
108                 .type   = FIO_OPT_STR_VAL_INT,
109                 .off1   = td_var_offset(bs[DDIR_READ]),
110                 .off2   = td_var_offset(bs[DDIR_WRITE]),
111                 .help   = "Block size unit",
112                 .def    = "4k",
113         },
114         {
115                 .name   = "bsrange",
116                 .type   = FIO_OPT_RANGE,
117                 .off1   = td_var_offset(min_bs[DDIR_READ]),
118                 .off2   = td_var_offset(max_bs[DDIR_READ]),
119                 .off3   = td_var_offset(min_bs[DDIR_WRITE]),
120                 .off4   = td_var_offset(max_bs[DDIR_WRITE]),
121                 .help   = "Set block size range (in more detail than bs)",
122         },
123         {
124                 .name   = "bs_unaligned",
125                 .type   = FIO_OPT_STR_SET,
126                 .off1   = td_var_offset(bs_unaligned),
127                 .help   = "Don't sector align IO buffer sizes",
128         },
129         {
130                 .name   = "offset",
131                 .type   = FIO_OPT_STR_VAL,
132                 .off1   = td_var_offset(start_offset),
133                 .help   = "Start IO from this offset",
134                 .def    = "0",
135         },
136         {
137                 .name   = "randrepeat",
138                 .type   = FIO_OPT_BOOL,
139                 .off1   = td_var_offset(rand_repeatable),
140                 .help   = "Use repeatable random IO pattern",
141                 .def    = "1",
142         },
143         {
144                 .name   = "norandommap",
145                 .type   = FIO_OPT_STR_SET,
146                 .off1   = td_var_offset(norandommap),
147                 .help   = "Accept potential duplicate random blocks",
148         },
149         {
150                 .name   = "nrfiles",
151                 .type   = FIO_OPT_INT,
152                 .off1   = td_var_offset(nr_files),
153                 .help   = "Split job workload between this number of files",
154                 .def    = "1",
155         },
156         {
157                 .name   = "file_service_type",
158                 .type   = FIO_OPT_STR,
159                 .cb     = str_file_service_cb,
160                 .help   = "How to select which file to service next",
161                 .def    = "roundrobin",
162                 .posval = { "random", "roundrobin" },
163         },
164         {
165                 .name   = "fsync",
166                 .type   = FIO_OPT_INT,
167                 .off1   = td_var_offset(fsync_blocks),
168                 .help   = "Issue fsync for writes every given number of blocks",
169                 .def    = "0",
170         },
171         {
172                 .name   = "direct",
173                 .type   = FIO_OPT_BOOL,
174                 .off1   = td_var_offset(odirect),
175                 .help   = "Use O_DIRECT IO (negates buffered)",
176                 .def    = "0",
177         },
178         {
179                 .name   = "buffered",
180                 .type   = FIO_OPT_BOOL,
181                 .off1   = td_var_offset(odirect),
182                 .neg    = 1,
183                 .help   = "Use buffered IO (negates direct)",
184                 .def    = "1",
185         },
186         {
187                 .name   = "overwrite",
188                 .type   = FIO_OPT_BOOL,
189                 .off1   = td_var_offset(overwrite),
190                 .help   = "When writing, set whether to overwrite current data",
191                 .def    = "0",
192         },
193         {
194                 .name   = "loops",
195                 .type   = FIO_OPT_INT,
196                 .off1   = td_var_offset(loops),
197                 .help   = "Number of times to run the job",
198                 .def    = "1",
199         },
200         {
201                 .name   = "numjobs",
202                 .type   = FIO_OPT_INT,
203                 .off1   = td_var_offset(numjobs),
204                 .help   = "Duplicate this job this many times",
205                 .def    = "1",
206         },
207         {
208                 .name   = "startdelay",
209                 .type   = FIO_OPT_INT,
210                 .off1   = td_var_offset(start_delay),
211                 .help   = "Only start job when this period has passed",
212                 .def    = "0",
213         },
214         {
215                 .name   = "runtime",
216                 .alias  = "timeout",
217                 .type   = FIO_OPT_STR_VAL_TIME,
218                 .off1   = td_var_offset(timeout),
219                 .help   = "Stop workload when this amount of time has passed",
220                 .def    = "0",
221         },
222         {
223                 .name   = "mem",
224                 .type   = FIO_OPT_STR,
225                 .cb     = str_mem_cb,
226                 .help   = "Backing type for IO buffers",
227                 .def    = "malloc",
228                 .posval =  { "malloc", "shm", "shmhuge", "mmap", "mmaphuge", },
229         },
230         {
231                 .name   = "verify",
232                 .type   = FIO_OPT_STR,
233                 .cb     = str_verify_cb,
234                 .help   = "Verify sum function",
235                 .def    = "0",
236                 .posval = { "crc32", "md5", },
237         },
238         {
239                 .name   = "write_iolog",
240                 .type   = FIO_OPT_STR_STORE,
241                 .off1   = td_var_offset(write_iolog_file),
242                 .help   = "Store IO pattern to file",
243         },
244         {
245                 .name   = "read_iolog",
246                 .type   = FIO_OPT_STR_STORE,
247                 .off1   = td_var_offset(read_iolog_file),
248                 .help   = "Playback IO pattern from file",
249         },
250         {
251                 .name   = "exec_prerun",
252                 .type   = FIO_OPT_STR_STORE,
253                 .off1   = td_var_offset(exec_prerun),
254                 .help   = "Execute this file prior to running job",
255         },
256         {
257                 .name   = "exec_postrun",
258                 .type   = FIO_OPT_STR_STORE,
259                 .off1   = td_var_offset(exec_postrun),
260                 .help   = "Execute this file after running job",
261         },
262 #ifdef FIO_HAVE_IOSCHED_SWITCH
263         {
264                 .name   = "ioscheduler",
265                 .type   = FIO_OPT_STR_STORE,
266                 .off1   = td_var_offset(ioscheduler),
267                 .help   = "Use this IO scheduler on the backing device",
268         },
269 #endif
270         {
271                 .name   = "zonesize",
272                 .type   = FIO_OPT_STR_VAL,
273                 .off1   = td_var_offset(zone_size),
274                 .help   = "Give size of an IO zone",
275                 .def    = "0",
276         },
277         {
278                 .name   = "zoneskip",
279                 .type   = FIO_OPT_STR_VAL,
280                 .off1   = td_var_offset(zone_skip),
281                 .help   = "Space between IO zones",
282                 .def    = "0",
283         },
284         {
285                 .name   = "lockmem",
286                 .type   = FIO_OPT_STR_VAL,
287                 .cb     = str_lockmem_cb,
288                 .help   = "Lock down this amount of memory",
289                 .def    = "0",
290         },
291         {
292                 .name   = "rwmixcycle",
293                 .type   = FIO_OPT_INT,
294                 .off1   = td_var_offset(rwmixcycle),
295                 .help   = "Cycle period for mixed read/write workloads (msec)",
296                 .def    = "500",
297         },
298         {
299                 .name   = "rwmixread",
300                 .type   = FIO_OPT_INT,
301                 .off1   = td_var_offset(rwmixread),
302                 .maxval = 100,
303                 .help   = "Percentage of mixed workload that is reads",
304                 .def    = "50",
305         },
306         {
307                 .name   = "rwmixwrite",
308                 .type   = FIO_OPT_INT,
309                 .off1   = td_var_offset(rwmixwrite),
310                 .maxval = 100,
311                 .help   = "Percentage of mixed workload that is writes",
312                 .def    = "50",
313         },
314         {
315                 .name   = "nice",
316                 .type   = FIO_OPT_INT,
317                 .off1   = td_var_offset(nice),
318                 .help   = "Set job CPU nice value",
319                 .minval = -19,
320                 .maxval = 20,
321                 .def    = "0",
322         },
323 #ifdef FIO_HAVE_IOPRIO
324         {
325                 .name   = "prio",
326                 .type   = FIO_OPT_INT,
327                 .cb     = str_prio_cb,
328                 .help   = "Set job IO priority value",
329                 .minval = 0,
330                 .maxval = 7,
331         },
332         {
333                 .name   = "prioclass",
334                 .type   = FIO_OPT_INT,
335                 .cb     = str_prioclass_cb,
336                 .help   = "Set job IO priority class",
337                 .minval = 0,
338                 .maxval = 3,
339         },
340 #endif
341         {
342                 .name   = "thinktime",
343                 .type   = FIO_OPT_INT,
344                 .off1   = td_var_offset(thinktime),
345                 .help   = "Idle time between IO buffers (usec)",
346                 .def    = "0",
347         },
348         {
349                 .name   = "thinktime_spin",
350                 .type   = FIO_OPT_INT,
351                 .off1   = td_var_offset(thinktime_spin),
352                 .help   = "Start thinktime by spinning this amount (usec)",
353                 .def    = "0",
354         },
355         {
356                 .name   = "thinktime_blocks",
357                 .type   = FIO_OPT_INT,
358                 .off1   = td_var_offset(thinktime_blocks),
359                 .help   = "IO buffer period between 'thinktime'",
360                 .def    = "1",
361         },
362         {
363                 .name   = "rate",
364                 .type   = FIO_OPT_INT,
365                 .off1   = td_var_offset(rate),
366                 .help   = "Set bandwidth rate",
367         },
368         {
369                 .name   = "ratemin",
370                 .type   = FIO_OPT_INT,
371                 .off1   = td_var_offset(ratemin),
372                 .help   = "The bottom limit accepted",
373         },
374         {
375                 .name   = "ratecycle",
376                 .type   = FIO_OPT_INT,
377                 .off1   = td_var_offset(ratecycle),
378                 .help   = "Window average for rate limits (msec)",
379                 .def    = "1000",
380         },
381         {
382                 .name   = "invalidate",
383                 .type   = FIO_OPT_BOOL,
384                 .off1   = td_var_offset(invalidate_cache),
385                 .help   = "Invalidate buffer/page cache prior to running job",
386                 .def    = "1",
387         },
388         {
389                 .name   = "sync",
390                 .type   = FIO_OPT_BOOL,
391                 .off1   = td_var_offset(sync_io),
392                 .help   = "Use O_SYNC for buffered writes",
393                 .def    = "0",
394         },
395         {
396                 .name   = "bwavgtime",
397                 .type   = FIO_OPT_INT,
398                 .off1   = td_var_offset(bw_avg_time),
399                 .help   = "Time window over which to calculate bandwidth (msec)",
400                 .def    = "500",
401         },
402         {
403                 .name   = "create_serialize",
404                 .type   = FIO_OPT_BOOL,
405                 .off1   = td_var_offset(create_serialize),
406                 .help   = "Serialize creating of job files",
407                 .def    = "1",
408         },
409         {
410                 .name   = "create_fsync",
411                 .type   = FIO_OPT_BOOL,
412                 .off1   = td_var_offset(create_fsync),
413                 .help   = "Fsync file after creation",
414                 .def    = "1",
415         },
416         {
417                 .name   = "cpuload",
418                 .type   = FIO_OPT_INT,
419                 .off1   = td_var_offset(cpuload),
420                 .help   = "Use this percentage of CPU",
421         },
422         {
423                 .name   = "cpuchunks",
424                 .type   = FIO_OPT_INT,
425                 .off1   = td_var_offset(cpucycle),
426                 .help   = "Length of the CPU burn cycles",
427         },
428 #ifdef FIO_HAVE_CPU_AFFINITY
429         {
430                 .name   = "cpumask",
431                 .type   = FIO_OPT_INT,
432                 .cb     = str_cpumask_cb,
433                 .help   = "CPU affinity mask",
434         },
435 #endif
436         {
437                 .name   = "end_fsync",
438                 .type   = FIO_OPT_BOOL,
439                 .off1   = td_var_offset(end_fsync),
440                 .help   = "Include fsync at the end of job",
441                 .def    = "0",
442         },
443         {
444                 .name   = "unlink",
445                 .type   = FIO_OPT_BOOL,
446                 .off1   = td_var_offset(unlink),
447                 .help   = "Unlink created files after job has completed",
448                 .def    = "0",
449         },
450         {
451                 .name   = "exitall",
452                 .type   = FIO_OPT_STR_SET,
453                 .cb     = str_exitall_cb,
454                 .help   = "Terminate all jobs when one exits",
455         },
456         {
457                 .name   = "stonewall",
458                 .type   = FIO_OPT_STR_SET,
459                 .off1   = td_var_offset(stonewall),
460                 .help   = "Insert a hard barrier between this job and previous",
461         },
462         {
463                 .name   = "thread",
464                 .type   = FIO_OPT_STR_SET,
465                 .off1   = td_var_offset(use_thread),
466                 .help   = "Use threads instead of forks",
467         },
468         {
469                 .name   = "write_bw_log",
470                 .type   = FIO_OPT_STR_SET,
471                 .off1   = td_var_offset(write_bw_log),
472                 .help   = "Write log of bandwidth during run",
473         },
474         {
475                 .name   = "write_lat_log",
476                 .type   = FIO_OPT_STR_SET,
477                 .off1   = td_var_offset(write_lat_log),
478                 .help   = "Write log of latency during run",
479         },
480         {
481                 .name   = "hugepage-size",
482                 .type   = FIO_OPT_STR_VAL,
483                 .off1   = td_var_offset(hugepage_size),
484                 .help   = "When using hugepages, specify size of each page",
485                 .def    = __stringify(FIO_HUGE_PAGE),
486         },
487         {
488                 .name = NULL,
489         },
490 };
491
492 #define FIO_JOB_OPTS    (sizeof(options) / sizeof(struct fio_option))
493 #define FIO_CMD_OPTS    (16)
494 #define FIO_GETOPT_JOB  (0x89988998)
495
496 /*
497  * Command line options. These will contain the above, plus a few
498  * extra that only pertain to fio itself and not jobs.
499  */
500 static struct option long_options[FIO_JOB_OPTS + FIO_CMD_OPTS] = {
501         {
502                 .name           = "output",
503                 .has_arg        = required_argument,
504                 .val            = 'o',
505         },
506         {
507                 .name           = "timeout",
508                 .has_arg        = required_argument,
509                 .val            = 't',
510         },
511         {
512                 .name           = "latency-log",
513                 .has_arg        = required_argument,
514                 .val            = 'l',
515         },
516         {
517                 .name           = "bandwidth-log",
518                 .has_arg        = required_argument,
519                 .val            = 'b',
520         },
521         {
522                 .name           = "minimal",
523                 .has_arg        = optional_argument,
524                 .val            = 'm',
525         },
526         {
527                 .name           = "version",
528                 .has_arg        = no_argument,
529                 .val            = 'v',
530         },
531         {
532                 .name           = "help",
533                 .has_arg        = no_argument,
534                 .val            = 'h',
535         },
536         {
537                 .name           = "cmdhelp",
538                 .has_arg        = required_argument,
539                 .val            = 'c',
540         },
541         {
542                 .name           = NULL,
543         },
544 };
545
546 static int def_timeout = 0;
547
548 static char fio_version_string[] = "fio 1.11";
549
550 static char **ini_file;
551 static int max_jobs = MAX_JOBS;
552
553 struct thread_data def_thread;
554 struct thread_data *threads = NULL;
555
556 int exitall_on_terminate = 0;
557 int terse_output = 0;
558 unsigned long long mlock_size = 0;
559 FILE *f_out = NULL;
560 FILE *f_err = NULL;
561
562 static int write_lat_log = 0;
563 int write_bw_log = 0;
564
565 /*
566  * Return a free job structure.
567  */
568 static struct thread_data *get_new_job(int global, struct thread_data *parent)
569 {
570         struct thread_data *td;
571
572         if (global)
573                 return &def_thread;
574         if (thread_number >= max_jobs)
575                 return NULL;
576
577         td = &threads[thread_number++];
578         *td = *parent;
579
580         td->thread_number = thread_number;
581         return td;
582 }
583
584 static void put_job(struct thread_data *td)
585 {
586         if (td == &def_thread)
587                 return;
588
589         if (td->error)
590                 fprintf(f_out, "fio: %s\n", td->verror);
591
592         memset(&threads[td->thread_number - 1], 0, sizeof(*td));
593         thread_number--;
594 }
595
596 /*
597  * Lazy way of fixing up options that depend on each other. We could also
598  * define option callback handlers, but this is easier.
599  */
600 static void fixup_options(struct thread_data *td)
601 {
602         if (!td->rwmixread && td->rwmixwrite)
603                 td->rwmixread = 100 - td->rwmixwrite;
604
605         if (td->write_iolog_file && td->read_iolog_file) {
606                 log_err("fio: read iolog overrides write_iolog\n");
607                 free(td->write_iolog_file);
608                 td->write_iolog_file = NULL;
609         }
610
611         if (td->io_ops->flags & FIO_SYNCIO)
612                 td->iodepth = 1;
613         else {
614                 if (!td->iodepth)
615                         td->iodepth = td->nr_files;
616         }
617
618         /*
619          * only really works for sequential io for now, and with 1 file
620          */
621         if (td->zone_size && !td->sequential && td->nr_files == 1)
622                 td->zone_size = 0;
623
624         /*
625          * Reads can do overwrites, we always need to pre-create the file
626          */
627         if (td_read(td) || td_rw(td))
628                 td->overwrite = 1;
629
630         if (!td->min_bs[DDIR_READ])
631                 td->min_bs[DDIR_READ]= td->bs[DDIR_READ];
632         if (!td->max_bs[DDIR_READ])
633                 td->max_bs[DDIR_READ] = td->bs[DDIR_READ];
634         if (!td->min_bs[DDIR_WRITE])
635                 td->min_bs[DDIR_WRITE]= td->bs[DDIR_WRITE];
636         if (!td->max_bs[DDIR_WRITE])
637                 td->max_bs[DDIR_WRITE] = td->bs[DDIR_WRITE];
638
639         td->rw_min_bs = min(td->min_bs[DDIR_READ], td->min_bs[DDIR_WRITE]);
640
641         if (td_read(td) && !td_rw(td))
642                 td->verify = 0;
643
644         if (td->norandommap && td->verify != VERIFY_NONE) {
645                 log_err("fio: norandommap given, verify disabled\n");
646                 td->verify = VERIFY_NONE;
647         }
648         if (td->bs_unaligned && (td->odirect || td->io_ops->flags & FIO_RAWIO))
649                 log_err("fio: bs_unaligned may not work with raw io\n");
650
651         /*
652          * O_DIRECT and char doesn't mix, clear that flag if necessary.
653          */
654         if (td->filetype == FIO_TYPE_CHAR && td->odirect)
655                 td->odirect = 0;
656
657         /*
658          * thinktime_spin must be less than thinktime
659          */
660         if (td->thinktime_spin > td->thinktime)
661                 td->thinktime_spin = td->thinktime;
662
663         /*
664          * The low water mark cannot be bigger than the iodepth
665          */
666         if (td->iodepth_low > td->iodepth || !td->iodepth_low)
667                 td->iodepth_low = td->iodepth;
668 }
669
670 /*
671  * This function leaks the buffer
672  */
673 static char *to_kmg(unsigned int val)
674 {
675         char *buf = malloc(32);
676         char post[] = { 0, 'K', 'M', 'G', 'P', 'E', 0 };
677         char *p = post;
678
679         do {
680                 if (val & 1023)
681                         break;
682
683                 val >>= 10;
684                 p++;
685         } while (*p);
686
687         snprintf(buf, 31, "%u%c", val, *p);
688         return buf;
689 }
690
691 /*
692  * Adds a job to the list of things todo. Sanitizes the various options
693  * to make sure we don't have conflicts, and initializes various
694  * members of td.
695  */
696 static int add_job(struct thread_data *td, const char *jobname, int job_add_num)
697 {
698         const char *ddir_str[] = { "read", "write", "randread", "randwrite",
699                                    "rw", NULL, "randrw" };
700         struct stat sb;
701         int numjobs, ddir, i;
702         struct fio_file *f;
703
704         /*
705          * the def_thread is just for options, it's not a real job
706          */
707         if (td == &def_thread)
708                 return 0;
709
710         assert(td->io_ops);
711
712         if (td->odirect)
713                 td->io_ops->flags |= FIO_RAWIO;
714
715         td->filetype = FIO_TYPE_FILE;
716         if (td->filename && !lstat(td->filename, &sb)) {
717                 if (S_ISBLK(sb.st_mode))
718                         td->filetype = FIO_TYPE_BD;
719                 else if (S_ISCHR(sb.st_mode))
720                         td->filetype = FIO_TYPE_CHAR;
721         }
722
723         fixup_options(td);
724
725         if (td->filename)
726                 td->nr_uniq_files = 1;
727         else
728                 td->nr_uniq_files = td->nr_files;
729
730         if (td->filetype == FIO_TYPE_FILE || td->filename) {
731                 char tmp[PATH_MAX];
732                 int len = 0;
733
734                 if (td->directory && td->directory[0] != '\0') {
735                         if (lstat(td->directory, &sb) < 0) {
736                                 log_err("fio: %s is not a directory\n", td->directory);
737                                 td_verror(td, errno, "lstat");
738                                 return 1;
739                         }
740                         if (!S_ISDIR(sb.st_mode)) {
741                                 log_err("fio: %s is not a directory\n", td->directory);
742                                 return 1;
743                         }
744                         len = sprintf(tmp, "%s/", td->directory);
745                 }
746
747                 td->files = malloc(sizeof(struct fio_file) * td->nr_files);
748
749                 for_each_file(td, f, i) {
750                         memset(f, 0, sizeof(*f));
751                         f->fd = -1;
752
753                         if (td->filename)
754                                 sprintf(tmp + len, "%s", td->filename);
755                         else
756                                 sprintf(tmp + len, "%s.%d.%d", jobname, td->thread_number, i);
757                         f->file_name = strdup(tmp);
758                 }
759         } else {
760                 td->nr_files = 1;
761                 td->files = malloc(sizeof(struct fio_file));
762                 f = &td->files[0];
763
764                 memset(f, 0, sizeof(*f));
765                 f->fd = -1;
766                 f->file_name = strdup(jobname);
767         }
768
769         for_each_file(td, f, i) {
770                 f->file_size = td->total_file_size / td->nr_files;
771                 f->file_offset = td->start_offset;
772         }
773                 
774         fio_sem_init(&td->mutex, 0);
775
776         td->ts.clat_stat[0].min_val = td->ts.clat_stat[1].min_val = ULONG_MAX;
777         td->ts.slat_stat[0].min_val = td->ts.slat_stat[1].min_val = ULONG_MAX;
778         td->ts.bw_stat[0].min_val = td->ts.bw_stat[1].min_val = ULONG_MAX;
779
780         if (td->stonewall && td->thread_number > 1)
781                 groupid++;
782
783         td->groupid = groupid;
784
785         if (setup_rate(td))
786                 goto err;
787
788         if (td->write_lat_log) {
789                 setup_log(&td->ts.slat_log);
790                 setup_log(&td->ts.clat_log);
791         }
792         if (td->write_bw_log)
793                 setup_log(&td->ts.bw_log);
794
795         if (!td->name)
796                 td->name = strdup(jobname);
797
798         ddir = td->ddir + (!td->sequential << 1) + (td->iomix << 2);
799
800         if (!terse_output) {
801                 if (!job_add_num) {
802                         if (td->io_ops->flags & FIO_CPUIO)
803                                 fprintf(f_out, "%s: ioengine=cpu, cpuload=%u, cpucycle=%u\n", td->name, td->cpuload, td->cpucycle);
804                         else {
805                                 char *c1, *c2, *c3, *c4;
806
807                                 c1 = to_kmg(td->min_bs[DDIR_READ]);
808                                 c2 = to_kmg(td->max_bs[DDIR_READ]);
809                                 c3 = to_kmg(td->min_bs[DDIR_WRITE]);
810                                 c4 = to_kmg(td->max_bs[DDIR_WRITE]);
811
812                                 fprintf(f_out, "%s: (g=%d): rw=%s, bs=%s-%s/%s-%s, ioengine=%s, iodepth=%u\n", td->name, td->groupid, ddir_str[ddir], c1, c2, c3, c4, td->io_ops->name, td->iodepth);
813
814                                 free(c1);
815                                 free(c2);
816                                 free(c3);
817                                 free(c4);
818                         }
819                 } else if (job_add_num == 1)
820                         fprintf(f_out, "...\n");
821         }
822
823         /*
824          * recurse add identical jobs, clear numjobs and stonewall options
825          * as they don't apply to sub-jobs
826          */
827         numjobs = td->numjobs;
828         while (--numjobs) {
829                 struct thread_data *td_new = get_new_job(0, td);
830
831                 if (!td_new)
832                         goto err;
833
834                 td_new->numjobs = 1;
835                 td_new->stonewall = 0;
836                 job_add_num = numjobs - 1;
837
838                 if (add_job(td_new, jobname, job_add_num))
839                         goto err;
840         }
841         return 0;
842 err:
843         put_job(td);
844         return -1;
845 }
846
847 /*
848  * Initialize the various random states we need (random io, block size ranges,
849  * read/write mix, etc).
850  */
851 int init_random_state(struct thread_data *td)
852 {
853         unsigned long seeds[5];
854         int fd, num_maps, blocks, i;
855         struct fio_file *f;
856
857         if (td->io_ops->flags & FIO_CPUIO)
858                 return 0;
859
860         fd = open("/dev/urandom", O_RDONLY);
861         if (fd == -1) {
862                 td_verror(td, errno, "open");
863                 return 1;
864         }
865
866         if (read(fd, seeds, sizeof(seeds)) < (int) sizeof(seeds)) {
867                 td_verror(td, EIO, "read");
868                 close(fd);
869                 return 1;
870         }
871
872         close(fd);
873
874         os_random_seed(seeds[0], &td->bsrange_state);
875         os_random_seed(seeds[1], &td->verify_state);
876         os_random_seed(seeds[2], &td->rwmix_state);
877         os_random_seed(seeds[3], &td->next_file_state);
878
879         if (td->sequential)
880                 return 0;
881
882         if (td->rand_repeatable)
883                 seeds[4] = FIO_RANDSEED * td->thread_number;
884
885         if (!td->norandommap) {
886                 for_each_file(td, f, i) {
887                         blocks = (f->real_file_size + td->rw_min_bs - 1) / td->rw_min_bs;
888                         num_maps = (blocks + BLOCKS_PER_MAP-1)/ BLOCKS_PER_MAP;
889                         f->file_map = malloc(num_maps * sizeof(long));
890                         f->num_maps = num_maps;
891                         memset(f->file_map, 0, num_maps * sizeof(long));
892                 }
893         }
894
895         os_random_seed(seeds[4], &td->random_state);
896         return 0;
897 }
898
899 static void fill_cpu_mask(os_cpu_mask_t cpumask, int cpu)
900 {
901 #ifdef FIO_HAVE_CPU_AFFINITY
902         unsigned int i;
903
904         CPU_ZERO(&cpumask);
905
906         for (i = 0; i < sizeof(int) * 8; i++) {
907                 if ((1 << i) & cpu)
908                         CPU_SET(i, &cpumask);
909         }
910 #endif
911 }
912
913 static int is_empty_or_comment(char *line)
914 {
915         unsigned int i;
916
917         for (i = 0; i < strlen(line); i++) {
918                 if (line[i] == ';')
919                         return 1;
920                 if (line[i] == '#')
921                         return 1;
922                 if (!isspace(line[i]) && !iscntrl(line[i]))
923                         return 0;
924         }
925
926         return 1;
927 }
928
929 static int str_rw_cb(void *data, const char *mem)
930 {
931         struct thread_data *td = data;
932
933         if (!strncmp(mem, "read", 4) || !strncmp(mem, "0", 1)) {
934                 td->ddir = DDIR_READ;
935                 td->sequential = 1;
936                 return 0;
937         } else if (!strncmp(mem, "randread", 8)) {
938                 td->ddir = DDIR_READ;
939                 td->sequential = 0;
940                 return 0;
941         } else if (!strncmp(mem, "write", 5) || !strncmp(mem, "1", 1)) {
942                 td->ddir = DDIR_WRITE;
943                 td->sequential = 1;
944                 return 0;
945         } else if (!strncmp(mem, "randwrite", 9)) {
946                 td->ddir = DDIR_WRITE;
947                 td->sequential = 0;
948                 return 0;
949         } else if (!strncmp(mem, "rw", 2)) {
950                 td->ddir = DDIR_READ;
951                 td->iomix = 1;
952                 td->sequential = 1;
953                 return 0;
954         } else if (!strncmp(mem, "randrw", 6)) {
955                 td->ddir = DDIR_READ;
956                 td->iomix = 1;
957                 td->sequential = 0;
958                 return 0;
959         }
960
961         log_err("fio: data direction: read, write, randread, randwrite, rw, randrw\n");
962         return 1;
963 }
964
965 static int str_verify_cb(void *data, const char *mem)
966 {
967         struct thread_data *td = data;
968
969         if (!strncmp(mem, "0", 1)) {
970                 td->verify = VERIFY_NONE;
971                 return 0;
972         } else if (!strncmp(mem, "md5", 3) || !strncmp(mem, "1", 1)) {
973                 td->verify = VERIFY_MD5;
974                 return 0;
975         } else if (!strncmp(mem, "crc32", 5)) {
976                 td->verify = VERIFY_CRC32;
977                 return 0;
978         }
979
980         log_err("fio: verify types: md5, crc32\n");
981         return 1;
982 }
983
984 /*
985  * Check if mmap/mmaphuge has a :/foo/bar/file at the end. If so, return that.
986  */
987 static char *get_mmap_file(const char *str)
988 {
989         char *p = strstr(str, ":");
990
991         if (!p)
992                 return NULL;
993
994         p++;
995         strip_blank_front(&p);
996         strip_blank_end(p);
997         return strdup(p);
998 }
999
1000 static int str_mem_cb(void *data, const char *mem)
1001 {
1002         struct thread_data *td = data;
1003
1004         if (!strncmp(mem, "malloc", 6)) {
1005                 td->mem_type = MEM_MALLOC;
1006                 return 0;
1007         } else if (!strncmp(mem, "mmaphuge", 8)) {
1008 #ifdef FIO_HAVE_HUGETLB
1009                 /*
1010                  * mmaphuge must be appended with the actual file
1011                  */
1012                 td->mmapfile = get_mmap_file(mem);
1013                 if (!td->mmapfile) {
1014                         log_err("fio: mmaphuge:/path/to/file\n");
1015                         return 1;
1016                 }
1017
1018                 td->mem_type = MEM_MMAPHUGE;
1019                 return 0;
1020 #else
1021                 log_err("fio: mmaphuge not available\n");
1022                 return 1;
1023 #endif
1024         } else if (!strncmp(mem, "mmap", 4)) {
1025                 /*
1026                  * Check if the user wants file backed memory. It's ok
1027                  * if there's no file given, we'll just use anon mamp then.
1028                  */
1029                 td->mmapfile = get_mmap_file(mem);
1030                 td->mem_type = MEM_MMAP;
1031                 return 0;
1032         } else if (!strncmp(mem, "shmhuge", 7)) {
1033 #ifdef FIO_HAVE_HUGETLB
1034                 td->mem_type = MEM_SHMHUGE;
1035                 return 0;
1036 #else
1037                 log_err("fio: shmhuge not available\n");
1038                 return 1;
1039 #endif
1040         } else if (!strncmp(mem, "shm", 3)) {
1041                 td->mem_type = MEM_SHM;
1042                 return 0;
1043         }
1044
1045         log_err("fio: mem type: malloc, shm, shmhuge, mmap, mmaphuge\n");
1046         return 1;
1047 }
1048
1049 static int str_ioengine_cb(void *data, const char *str)
1050 {
1051         struct thread_data *td = data;
1052
1053         td->io_ops = load_ioengine(td, str);
1054         if (td->io_ops)
1055                 return 0;
1056
1057         log_err("fio: ioengine= libaio, posixaio, sync, syslet-rw, mmap, sgio, splice, cpu, null\n");
1058         log_err("fio: or specify path to dynamic ioengine module\n");
1059         return 1;
1060 }
1061
1062 static int str_lockmem_cb(void fio_unused *data, unsigned long *val)
1063 {
1064         mlock_size = *val;
1065         return 0;
1066 }
1067
1068 #ifdef FIO_HAVE_IOPRIO
1069 static int str_prioclass_cb(void *data, unsigned int *val)
1070 {
1071         struct thread_data *td = data;
1072
1073         td->ioprio |= *val << IOPRIO_CLASS_SHIFT;
1074         return 0;
1075 }
1076
1077 static int str_prio_cb(void *data, unsigned int *val)
1078 {
1079         struct thread_data *td = data;
1080
1081         td->ioprio |= *val;
1082         return 0;
1083 }
1084 #endif
1085
1086 static int str_exitall_cb(void)
1087 {
1088         exitall_on_terminate = 1;
1089         return 0;
1090 }
1091
1092 static int str_cpumask_cb(void *data, unsigned int *val)
1093 {
1094         struct thread_data *td = data;
1095
1096         fill_cpu_mask(td->cpumask, *val);
1097         return 0;
1098 }
1099
1100 static int str_file_service_cb(void *data, const char *str)
1101 {
1102         struct thread_data *td = data;
1103
1104         if (!strncmp(str, "random", 6)) {
1105                 td->file_service_type = FIO_FSERVICE_RANDOM;
1106                 return 0;
1107         } else if (!strncmp(str, "roundrobin", 10)) {
1108                 td->file_service_type = FIO_FSERVICE_RR;
1109                 return 0;
1110         }
1111
1112         log_err("fio: file_service= random, roundrobin\n");
1113         return 1;
1114 }
1115
1116 /*
1117  * This is our [ini] type file parser.
1118  */
1119 static int parse_jobs_ini(char *file, int stonewall_flag)
1120 {
1121         unsigned int global;
1122         struct thread_data *td;
1123         char *string, *name;
1124         fpos_t off;
1125         FILE *f;
1126         char *p;
1127         int ret = 0, stonewall;
1128
1129         f = fopen(file, "r");
1130         if (!f) {
1131                 perror("fopen job file");
1132                 return 1;
1133         }
1134
1135         string = malloc(4096);
1136         name = malloc(256);
1137         memset(name, 0, 256);
1138
1139         stonewall = stonewall_flag;
1140         do {
1141                 p = fgets(string, 4095, f);
1142                 if (!p)
1143                         break;
1144                 if (is_empty_or_comment(p))
1145                         continue;
1146                 if (sscanf(p, "[%255s]", name) != 1)
1147                         continue;
1148
1149                 global = !strncmp(name, "global", 6);
1150
1151                 name[strlen(name) - 1] = '\0';
1152
1153                 td = get_new_job(global, &def_thread);
1154                 if (!td) {
1155                         ret = 1;
1156                         break;
1157                 }
1158
1159                 /*
1160                  * Seperate multiple job files by a stonewall
1161                  */
1162                 if (!global && stonewall) {
1163                         td->stonewall = stonewall;
1164                         stonewall = 0;
1165                 }
1166
1167                 fgetpos(f, &off);
1168                 while ((p = fgets(string, 4096, f)) != NULL) {
1169                         if (is_empty_or_comment(p))
1170                                 continue;
1171
1172                         strip_blank_front(&p);
1173
1174                         if (p[0] == '[')
1175                                 break;
1176
1177                         strip_blank_end(p);
1178
1179                         fgetpos(f, &off);
1180
1181                         /*
1182                          * Don't break here, continue parsing options so we
1183                          * dump all the bad ones. Makes trial/error fixups
1184                          * easier on the user.
1185                          */
1186                         ret |= parse_option(p, options, td);
1187                 }
1188
1189                 if (!ret) {
1190                         fsetpos(f, &off);
1191                         ret = add_job(td, name, 0);
1192                 } else {
1193                         log_err("fio: job %s dropped\n", name);
1194                         put_job(td);
1195                 }
1196         } while (!ret);
1197
1198         free(string);
1199         free(name);
1200         fclose(f);
1201         return ret;
1202 }
1203
1204 static int fill_def_thread(void)
1205 {
1206         memset(&def_thread, 0, sizeof(def_thread));
1207
1208         if (fio_getaffinity(getpid(), &def_thread.cpumask) == -1) {
1209                 perror("sched_getaffinity");
1210                 return 1;
1211         }
1212
1213         /*
1214          * fill default options
1215          */
1216         fill_default_options(&def_thread, options);
1217
1218         def_thread.timeout = def_timeout;
1219         def_thread.write_bw_log = write_bw_log;
1220         def_thread.write_lat_log = write_lat_log;
1221
1222 #ifdef FIO_HAVE_DISK_UTIL
1223         def_thread.do_disk_util = 1;
1224 #endif
1225
1226         return 0;
1227 }
1228
1229 static void usage(void)
1230 {
1231         printf("%s\n", fio_version_string);
1232         printf("\t--output\tWrite output to file\n");
1233         printf("\t--timeout\tRuntime in seconds\n");
1234         printf("\t--latency-log\tGenerate per-job latency logs\n");
1235         printf("\t--bandwidth-log\tGenerate per-job bandwidth logs\n");
1236         printf("\t--minimal\tMinimal (terse) output\n");
1237         printf("\t--version\tPrint version info and exit\n");
1238         printf("\t--help\t\tPrint this page\n");
1239         printf("\t--cmdhelp=cmd\tPrint command help, \"all\" for all of them\n");
1240 }
1241
1242 static int parse_cmd_line(int argc, char *argv[])
1243 {
1244         struct thread_data *td = NULL;
1245         int c, ini_idx = 0, lidx, ret;
1246
1247         while ((c = getopt_long(argc, argv, "", long_options, &lidx)) != -1) {
1248                 switch (c) {
1249                 case 't':
1250                         def_timeout = atoi(optarg);
1251                         break;
1252                 case 'l':
1253                         write_lat_log = 1;
1254                         break;
1255                 case 'w':
1256                         write_bw_log = 1;
1257                         break;
1258                 case 'o':
1259                         f_out = fopen(optarg, "w+");
1260                         if (!f_out) {
1261                                 perror("fopen output");
1262                                 exit(1);
1263                         }
1264                         f_err = f_out;
1265                         break;
1266                 case 'm':
1267                         terse_output = 1;
1268                         break;
1269                 case 'h':
1270                         usage();
1271                         exit(0);
1272                 case 'c':
1273                         ret = show_cmd_help(options, optarg);
1274                         exit(ret);
1275                 case 'v':
1276                         printf("%s\n", fio_version_string);
1277                         exit(0);
1278                 case FIO_GETOPT_JOB: {
1279                         const char *opt = long_options[lidx].name;
1280                         char *val = optarg;
1281
1282                         if (!strncmp(opt, "name", 4) && td) {
1283                                 ret = add_job(td, td->name ?: "fio", 0);
1284                                 if (ret) {
1285                                         put_job(td);
1286                                         return 0;
1287                                 }
1288                                 td = NULL;
1289                         }
1290                         if (!td) {
1291                                 int global = !strncmp(val, "global", 6);
1292
1293                                 td = get_new_job(global, &def_thread);
1294                                 if (!td)
1295                                         return 0;
1296                         }
1297
1298                         ret = parse_cmd_option(opt, val, options, td);
1299                         if (ret) {
1300                                 log_err("fio: job dropped\n");
1301                                 put_job(td);
1302                                 td = NULL;
1303                         }
1304                         break;
1305                 }
1306                 default:
1307                         break;
1308                 }
1309         }
1310
1311         if (td) {
1312                 ret = add_job(td, td->name ?: "fio", 0);
1313                 if (ret)
1314                         put_job(td);
1315         }
1316
1317         while (optind < argc) {
1318                 ini_idx++;
1319                 ini_file = realloc(ini_file, ini_idx * sizeof(char *));
1320                 ini_file[ini_idx - 1] = strdup(argv[optind]);
1321                 optind++;
1322         }
1323
1324         return ini_idx;
1325 }
1326
1327 static void free_shm(void)
1328 {
1329         struct shmid_ds sbuf;
1330
1331         if (threads) {
1332                 shmdt((void *) threads);
1333                 threads = NULL;
1334                 shmctl(shm_id, IPC_RMID, &sbuf);
1335         }
1336 }
1337
1338 /*
1339  * The thread area is shared between the main process and the job
1340  * threads/processes. So setup a shared memory segment that will hold
1341  * all the job info.
1342  */
1343 static int setup_thread_area(void)
1344 {
1345         /*
1346          * 1024 is too much on some machines, scale max_jobs if
1347          * we get a failure that looks like too large a shm segment
1348          */
1349         do {
1350                 size_t size = max_jobs * sizeof(struct thread_data);
1351
1352                 shm_id = shmget(0, size, IPC_CREAT | 0600);
1353                 if (shm_id != -1)
1354                         break;
1355                 if (errno != EINVAL) {
1356                         perror("shmget");
1357                         break;
1358                 }
1359
1360                 max_jobs >>= 1;
1361         } while (max_jobs);
1362
1363         if (shm_id == -1)
1364                 return 1;
1365
1366         threads = shmat(shm_id, NULL, 0);
1367         if (threads == (void *) -1) {
1368                 perror("shmat");
1369                 return 1;
1370         }
1371
1372         atexit(free_shm);
1373         return 0;
1374 }
1375
1376 /*
1377  * Copy the fio options into the long options map, so we mirror
1378  * job and cmd line options.
1379  */
1380 static void dupe_job_options(void)
1381 {
1382         struct fio_option *o;
1383         unsigned int i;
1384
1385         i = 0;
1386         while (long_options[i].name)
1387                 i++;
1388
1389         o = &options[0];
1390         while (o->name) {
1391                 long_options[i].name = o->name;
1392                 long_options[i].val = FIO_GETOPT_JOB;
1393                 if (o->type == FIO_OPT_STR_SET)
1394                         long_options[i].has_arg = no_argument;
1395                 else
1396                         long_options[i].has_arg = required_argument;
1397
1398                 i++;
1399                 o++;
1400                 assert(i < FIO_JOB_OPTS + FIO_CMD_OPTS);
1401         }
1402 }
1403
1404 int parse_options(int argc, char *argv[])
1405 {
1406         int job_files, i;
1407
1408         f_out = stdout;
1409         f_err = stderr;
1410
1411         options_init(options);
1412
1413         dupe_job_options();
1414
1415         if (setup_thread_area())
1416                 return 1;
1417         if (fill_def_thread())
1418                 return 1;
1419
1420         job_files = parse_cmd_line(argc, argv);
1421
1422         for (i = 0; i < job_files; i++) {
1423                 if (fill_def_thread())
1424                         return 1;
1425                 if (parse_jobs_ini(ini_file[i], i))
1426                         return 1;
1427                 free(ini_file[i]);
1428         }
1429
1430         free(ini_file);
1431
1432         if (!thread_number) {
1433                 log_err("No jobs defined(s)\n");
1434                 return 1;
1435         }
1436
1437         return 0;
1438 }