[PATCH] fio: fixes for quit-on-rate-error and status dumps
[disktools.git] / fio.c
1 /*
2  * fio - the flexible io tester
3  *
4  * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
5  *
6  *  This program is free software; you can redistribute it and/or modify
7  *  it under the terms of the GNU General Public License as published by
8  *  the Free Software Foundation; either version 2 of the License, or
9  *  (at your option) any later version.
10  *
11  *  This program is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  *  GNU General Public License for more details.
15  *
16  *  You should have received a copy of the GNU General Public License
17  *  along with this program; if not, write to the Free Software
18  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
19  *
20  */
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <unistd.h>
24 #include <fcntl.h>
25 #include <string.h>
26 #include <errno.h>
27 #include <signal.h>
28 #include <time.h>
29 #include <ctype.h>
30 #include <sched.h>
31 #include <sys/time.h>
32 #include <sys/types.h>
33 #include <sys/stat.h>
34 #include <sys/wait.h>
35 #include <semaphore.h>
36 #include <sys/ipc.h>
37 #include <sys/shm.h>
38 #include <asm/unistd.h>
39
40 #define MAX_JOBS        (1024)
41
42 /*
43  * assume we don't have _get either, if _set isn't defined
44  */
45 #ifndef __NR_ioprio_set
46
47 #if defined(__i386__)
48 #define __NR_ioprio_set         289
49 #define __NR_ioprio_get         290
50 #elif defined(__powerpc__) || defined(__powerpc64__)
51 #define __NR_ioprio_set         273
52 #define __NR_ioprio_get         274
53 #elif defined(__x86_64__)
54 #define __NR_ioprio_set         251
55 #define __NR_ioprio_get         252
56 #elif defined(__ia64__)
57 #define __NR_ioprio_set         1274
58 #define __NR_ioprio_get         1275
59 #elif defined(__alpha__)
60 #define __NR_ioprio_set         442
61 #define __NR_ioprio_get         443
62 #elif defined(__s390x__) || defined(__s390__)
63 #define __NR_ioprio_set         282
64 #define __NR_ioprio_get         283
65 #else
66 #error "Unsupported arch"
67 #endif
68
69 #endif
70
71 static int ioprio_set(int which, int who, int ioprio)
72 {
73         return syscall(__NR_ioprio_set, which, who, ioprio);
74 }
75
76 enum {
77         IOPRIO_WHO_PROCESS = 1,
78         IOPRIO_WHO_PGRP,
79         IOPRIO_WHO_USER,
80 };
81
82 #define IOPRIO_CLASS_SHIFT      13
83
84 #define MASK    (4095)
85
86 #define DEF_BS          (4096)
87 #define DEF_TIMEOUT     (30)
88 #define DEF_RATE_CYCLE  (1000)
89 #define DEF_ODIRECT     (1)
90 #define DEF_SEQUENTIAL  (1)
91 #define DEF_WRITESTAT   (0)
92 #define DEF_RAND_REPEAT (1)
93
94 #define ALIGN(buf)      (char *) (((unsigned long) (buf) + MASK) & ~(MASK))
95
96 static int sequential = DEF_SEQUENTIAL;
97 static int write_stat = DEF_WRITESTAT;
98 static int repeatable = DEF_RAND_REPEAT;
99 static int timeout = DEF_TIMEOUT;
100 static int odirect = DEF_ODIRECT;
101 static int global_bs = DEF_BS;
102 static int rate_quit = 1;
103
104 static int thread_number;
105 static char *ini_file;
106
107 static int shm_id;
108
109 static cpu_set_t def_cpumask;
110
111 enum {
112         DDIR_READ = 0,
113         DDIR_WRITE,
114 };
115
116 /*
117  * thread life cycle
118  */
119 enum {
120         TD_NOT_CREATED = 0,
121         TD_CREATED,
122         TD_STARTED,
123         TD_EXITED,
124         TD_REAPED,
125 };
126
127 struct thread_data {
128         char file_name[256];
129         int thread_number;
130         int error;
131         int fd;
132         int stat_fd;
133         pid_t pid;
134         volatile int terminate;
135         volatile int runstate;
136         unsigned int ddir;
137         unsigned int ioprio;
138         unsigned int sequential;
139         unsigned int bs;
140         unsigned int odirect;
141         unsigned int delay_sleep;
142         unsigned int fsync_blocks;
143         unsigned int start_delay;
144         cpu_set_t cpumask;
145
146         unsigned int rate;
147         unsigned int ratemin;
148         unsigned int ratecycle;
149         unsigned long rate_usec_cycle;
150         long rate_pending_usleep;
151         unsigned long rate_blocks;
152         struct timeval lastrate;
153
154         unsigned long max_latency;      /* msec */
155         unsigned long min_latency;      /* msec */
156         unsigned long runtime;          /* sec */
157         unsigned long blocks;
158         unsigned long io_blocks;
159         unsigned long last_block;
160         sem_t mutex;
161         struct drand48_data random_state;
162
163         /*
164          * bandwidth stat
165          */
166         unsigned long stat_time;
167         unsigned long stat_time_last;
168         unsigned long stat_blocks_last;
169
170         struct timeval start;
171 };
172
173 static struct thread_data *threads;
174
175 static sem_t startup_sem;
176
177 static void sig_handler(int sig)
178 {
179         int i;
180
181         printf("got signal %d\n", sig);
182
183         for (i = 0; i < thread_number; i++) {
184                 struct thread_data *td = &threads[i];
185
186                 td->terminate = 1;
187         }
188 }
189
190 static void terminate_threads(void)
191 {
192         int i;
193
194         for (i = 0; i < thread_number; i++) {
195                 struct thread_data *td = &threads[i];
196
197                 td->terminate = 1;
198                 td->start_delay = 0;
199         }
200 }
201
202 static int init_random_state(struct thread_data *td)
203 {
204         unsigned long seed = 123;
205
206         if (td->sequential)
207                 return 0;
208
209         if (!repeatable) {
210                 int fd = open("/dev/random", O_RDONLY);
211
212                 if (fd == -1) {
213                         td->error = errno;
214                         return 1;
215                 }
216
217                 if (read(fd, &seed, sizeof(seed)) < (int) sizeof(seed)) {
218                         td->error = EIO;
219                         close(fd);
220                         return 1;
221                 }
222
223                 close(fd);
224         }
225
226         srand48_r(seed, &td->random_state);
227         return 0;
228 }
229
230 static void shutdown_stat_file(struct thread_data *td)
231 {
232         if (td->stat_fd != -1) {
233                 fsync(td->stat_fd);
234                 close(td->stat_fd);
235         }
236 }
237
238 static int init_stat_file(struct thread_data *td)
239 {
240         char n[256];
241
242         if (!write_stat)
243                 return 0;
244
245         sprintf(n, "%s.stat", td->file_name);
246         td->stat_fd = open(n, O_WRONLY | O_CREAT | O_TRUNC, 0644);
247         if (td->stat_fd == -1) {
248                 td->error = errno;
249                 return 1;
250         }
251
252         return 0;
253 }
254
255 static unsigned long utime_since(struct timeval *s, struct timeval *e)
256 {
257         double sec, usec;
258
259         sec = e->tv_sec - s->tv_sec;
260         usec = e->tv_usec - s->tv_usec;
261         if (sec > 0 && usec < 0) {
262                 sec--;
263                 usec += 1000000;
264         }
265
266         sec *= (double) 1000000;
267
268         return sec + usec;
269 }
270
271 static unsigned long mtime_since(struct timeval *s, struct timeval *e)
272 {
273         double sec, usec;
274
275         sec = e->tv_sec - s->tv_sec;
276         usec = e->tv_usec - s->tv_usec;
277         if (sec > 0 && usec < 0) {
278                 sec--;
279                 usec += 1000000;
280         }
281
282         sec *= (double) 1000;
283         usec /= (double) 1000;
284
285         return sec + usec;
286 }
287
288 static unsigned long get_next_offset(struct thread_data *td)
289 {
290         unsigned long b;
291         long r;
292
293         if (!td->sequential) {
294                 lrand48_r(&td->random_state, &r);
295                 b = (1+(double) (td->blocks-1) * r / (RAND_MAX+1.0));
296         } else {
297                 b = td->last_block;
298                 td->last_block++;
299         }
300
301         return b * td->bs;
302 }
303
304 static void add_stat_sample(struct thread_data *td, unsigned long msec)
305 {
306         char sample[256];
307
308         if (!td->stat_fd)
309                 return;
310
311 #if 0
312         sprintf(sample, "%lu, %lu\n", td->io_blocks, msec);
313         write(td->stat_fd, sample, strlen(sample));
314 #else
315         td->stat_time += msec;
316         td->stat_time_last += msec;
317         td->stat_blocks_last++;
318
319         if (td->stat_time_last >= 500) {
320                 unsigned long rate = td->stat_blocks_last * td->bs / (td->stat_time_last);
321
322                 td->stat_time_last = 0;
323                 td->stat_blocks_last = 0;
324                 sprintf(sample, "%lu, %lu\n", td->stat_time, rate);
325                 //sprintf(sample, "%lu, %lu\n", td->io_blocks, msec);
326                 write(td->stat_fd, sample, strlen(sample));
327         }
328 #endif
329 }
330
331 static void usec_sleep(int usec)
332 {
333         struct timespec req = { .tv_sec = 0, .tv_nsec = usec * 1000 };
334         struct timespec rem;
335
336         do {
337                 rem.tv_sec = rem.tv_nsec = 0;
338                 nanosleep(&req, &rem);
339                 if (!rem.tv_nsec)
340                         break;
341
342                 req.tv_nsec = rem.tv_nsec;
343         } while (1);
344 }
345
346 static void rate_throttle(struct thread_data *td, unsigned long time_spent)
347 {
348         if (!td->rate)
349                 return;
350
351         if (time_spent < td->rate_usec_cycle) {
352                 unsigned long s = td->rate_usec_cycle - time_spent;
353
354                 td->rate_pending_usleep += s;
355                 if (td->rate_pending_usleep >= 100000) {
356                         usec_sleep(td->rate_pending_usleep);
357                         td->rate_pending_usleep = 0;
358                 }
359         } else {
360                 long overtime = time_spent - td->rate_usec_cycle;
361
362                 td->rate_pending_usleep -= overtime;
363         }
364 }
365
366 static int check_min_rate(struct thread_data *td, struct timeval *now)
367 {
368         unsigned long spent = mtime_since(&td->start, now);
369         unsigned long rate;
370
371         /*
372          * allow a 2 second settle period in the beginning
373          */
374         if (spent < 2000)
375                 return 0;
376
377         /*
378          * if rate blocks is set, sample is running
379          */
380         if (td->rate_blocks) {
381                 spent = mtime_since(&td->lastrate, now);
382                 if (spent < td->ratecycle)
383                         return 0;
384
385                 rate = ((td->io_blocks - td->rate_blocks) * td->bs) / spent;
386                 if (rate < td->ratemin) {
387                         printf("Client%d: min rate %d not met, got %ldKiB/sec\n", td->thread_number, td->ratemin, rate);
388                         if (rate_quit)
389                                 terminate_threads();
390                         return 1;
391                 }
392         }
393
394         td->rate_blocks = td->io_blocks;
395         memcpy(&td->lastrate, now, sizeof(*now));
396         return 0;
397 }
398
399 #define should_fsync(td)        ((td)->ddir == DDIR_WRITE && !(td)->odirect)
400
401 static void do_thread_io(struct thread_data *td)
402 {
403         struct timeval s, e;
404         char *buffer, *ptr;
405         unsigned long blocks, msec, usec;
406
407         ptr = malloc(td->bs + MASK);
408         buffer = ALIGN(ptr);
409
410         gettimeofday(&td->start, NULL);
411
412         if (td->ratemin)
413                 memcpy(&td->lastrate, &td->start, sizeof(td->start));
414
415         for (blocks = 0; blocks < td->blocks; blocks++) {
416                 off_t offset = get_next_offset(td);
417                 int ret;
418
419                 if (td->terminate)
420                         break;
421
422                 if (lseek(td->fd, offset, SEEK_SET) == -1) {
423                         td->error = errno;
424                         break;
425                 }
426
427                 if (td->delay_sleep)
428                         usec_sleep(td->delay_sleep);
429
430                 gettimeofday(&s, NULL);
431
432                 if (td->ddir == DDIR_READ)
433                         ret = read(td->fd, buffer, td->bs);
434                 else
435                         ret = write(td->fd, buffer, td->bs);
436
437                 if (ret < (int) td->bs) {
438                         if (ret == -1)
439                                 td->error = errno;
440                         break;
441                 }
442
443                 td->io_blocks++;
444
445                 if (should_fsync(td) && td->fsync_blocks &&
446                     (td->io_blocks % td->fsync_blocks) == 0)
447                         fsync(td->fd);
448
449                 gettimeofday(&e, NULL);
450
451                 usec = utime_since(&s, &e);
452
453                 rate_throttle(td, usec);
454
455                 if (check_min_rate(td, &e)) {
456                         td->error = ENODATA;
457                         break;
458                 }
459
460                 msec = usec / 1000;
461                 add_stat_sample(td, msec);
462
463                 if (msec < td->min_latency)
464                         td->min_latency = msec;
465                 if (msec > td->max_latency)
466                         td->max_latency = msec;
467         }
468
469         if (should_fsync(td))
470                 fsync(td->fd);
471
472         gettimeofday(&e, NULL);
473         td->runtime = mtime_since(&td->start, &e);
474
475         free(ptr);
476 }
477         
478 static void *thread_main(int shm_id, int offset, char *argv[])
479 {
480         struct thread_data *td;
481         void *data;
482         struct stat st;
483         int ret = 1, flags;
484
485         data = shmat(shm_id, NULL, 0);
486         td = data + offset * sizeof(struct thread_data);
487         td->pid = getpid();
488
489         td->fd = -1;
490
491         if (sched_setaffinity(td->pid, sizeof(td->cpumask), &td->cpumask) == -1) {
492                 td->error = errno;
493                 goto err;
494         }
495
496         printf("Thread (%s) (pid=%u) (f=%s) started\n", td->ddir == DDIR_READ ? "read" : "write", td->pid, td->file_name);
497         fflush(stdout);
498
499         sprintf(argv[0], "fio%d", offset);
500
501         flags = 0;
502         if (td->odirect)
503                 flags |= O_DIRECT;
504
505         if (td->ddir == DDIR_READ)
506                 td->fd = open(td->file_name, flags | O_RDONLY);
507         else
508                 td->fd = open(td->file_name, flags | O_WRONLY | O_CREAT | O_TRUNC, 0644);
509
510         if (td->fd == -1) {
511                 td->error = errno;
512                 goto err;
513         }
514
515         if (init_random_state(td))
516                 goto out;
517         if (init_stat_file(td))
518                 goto out;
519
520         if (td->ddir == DDIR_READ) {
521                 if (fstat(td->fd, &st) == -1) {
522                         td->error = errno;
523                         goto out;
524                 }
525
526                 td->blocks = st.st_size / td->bs;
527                 if (!td->blocks) {
528                         td->error = EINVAL;
529                         goto out;
530                 }
531         } else
532                 td->blocks = 1024 * 1024 * 1024 / td->bs;
533
534         if (td->ioprio) {
535                 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, td->ioprio) == -1) {
536                         td->error = errno;
537                         goto out;
538                 }
539         }
540
541         sem_post(&startup_sem);
542         sem_wait(&td->mutex);
543         do_thread_io(td);
544         ret = 0;
545
546 out:
547         shutdown_stat_file(td);
548 err:
549         if (td->fd != -1)
550                 close(td->fd);
551         if (ret)
552                 sem_post(&startup_sem);
553
554         td->runstate = TD_EXITED;
555         shmdt(data);
556         return NULL;
557 }
558
559 static void free_shm(void)
560 {
561         shmdt(threads);
562 }
563
564 static void show_thread_status(struct thread_data *td)
565 {
566         int prio, prio_class;
567         unsigned long bw = 0;
568
569         if (!td->io_blocks && !td->error)
570                 return;
571
572         if (td->runtime)
573                 bw = (td->io_blocks * td->bs) / td->runtime;
574
575         prio = td->ioprio & 0xff;
576         prio_class = td->ioprio >> IOPRIO_CLASS_SHIFT;
577
578         printf("thread%d (%s): err=%2d, prio=%1d/%1d maxl=%5lumsec, io=%6luMiB, bw=%6luKiB/sec\n", td->thread_number, td->ddir == DDIR_READ ? " read": "write", td->error, prio_class, prio, td->max_latency, td->io_blocks * td->bs >> 20, bw);
579 }
580
581 static int setup_rate(struct thread_data *td)
582 {
583         int nr_reads_per_sec;
584
585         if (!td->rate)
586                 return 0;
587
588         if (td->rate < td->ratemin) {
589                 fprintf(stderr, "min rate larger than nominal rate\n");
590                 return -1;
591         }
592
593         nr_reads_per_sec = td->rate * 1024 / td->bs;
594         td->rate_usec_cycle = 1000000 / nr_reads_per_sec;
595         td->rate_pending_usleep = 0;
596         return 0;
597 }
598
599 static struct thread_data *get_new_job(void)
600 {
601         struct thread_data *td;
602
603         if (thread_number >= MAX_JOBS)
604                 return NULL;
605
606         td = &threads[thread_number++];
607         memset(td, 0, sizeof(*td));
608
609         td->thread_number = thread_number;
610         td->ddir = DDIR_READ;
611         td->bs = global_bs;
612         td->odirect = 1;
613         td->delay_sleep = 0;
614         td->rate = 0;
615         td->ratecycle = DEF_RATE_CYCLE;
616         td->sequential = sequential;
617         td->ioprio = 0;
618         memcpy(&td->cpumask, &def_cpumask, sizeof(td->cpumask));
619
620         return td;
621 }
622
623 static void put_job(struct thread_data *td)
624 {
625         memset(&threads[td->thread_number - 1], 0, sizeof(*td));
626         thread_number--;
627 }
628
629 static int add_job(struct thread_data *td, const char *filename, int prioclass,
630                    int prio)
631 {
632         strcpy(td->file_name, filename);
633         td->stat_fd = -1;
634         sem_init(&td->mutex, 1, 0);
635         td->min_latency = 10000000;
636         td->ioprio = (prioclass << IOPRIO_CLASS_SHIFT) | prio;
637
638         if (setup_rate(td))
639                 return -1;
640
641         printf("Client%d: file=%s, rw=%d, prio=%d, seq=%d, odir=%d, bs=%d, rate=%d\n", td->thread_number, filename, td->ddir, td->ioprio, td->sequential, td->odirect, td->bs, td->rate);
642         return 0;
643 }
644
645 static void fill_cpu_mask(cpu_set_t cpumask, int cpu)
646 {
647         unsigned int i;
648
649         CPU_ZERO(&cpumask);
650
651         for (i = 0; i < sizeof(int) * 8; i++) {
652                 if ((1 << i) & cpu)
653                         CPU_SET(i, &cpumask);
654         }
655 }
656
657 static void fill_option(const char *input, char *output)
658 {
659         int i;
660
661         i = 0;
662         while (input[i] != ',' && input[i] != '}' && input[i] != '\0') {
663                 output[i] = input[i];
664                 i++;
665         }
666
667         output[i] = '\0';
668 }
669
670 /*
671  * job key words:
672  *
673  * file=
674  * bs=
675  * rw=
676  * direct=
677  */
678 static void parse_jobs_cmd(int argc, char *argv[], int index)
679 {
680         struct thread_data *td;
681         unsigned int prio, prioclass, cpu;
682         char *string, *filename, *p, *c;
683         int i;
684
685         string = malloc(256);
686         filename = malloc(256);
687
688         for (i = index; i < argc; i++) {
689                 p = argv[i];
690
691                 c = strpbrk(p, "{");
692                 if (!c)
693                         break;
694
695                 filename[0] = 0;
696
697                 td = get_new_job();
698                 if (!td)
699                         break;
700
701                 prioclass = 2;
702                 prio = 4;
703
704                 c = strstr(p, "rw=");
705                 if (c) {
706                         c += 3;
707                         if (*c == '0')
708                                 td->ddir = DDIR_READ;
709                         else
710                                 td->ddir = DDIR_WRITE;
711                 }
712
713                 c = strstr(p, "prio=");
714                 if (c) {
715                         c += 5;
716                         prio = *c - '0';
717                 }
718
719                 c = strstr(p, "prioclass=");
720                 if (c) {
721                         c += 10;
722                         prioclass = *c - '0';
723                 }
724
725                 c = strstr(p, "file=");
726                 if (c) {
727                         c += 5;
728                         fill_option(c, filename);
729                 }
730
731                 c = strstr(p, "bs=");
732                 if (c) {
733                         c += 3;
734                         fill_option(c, string);
735                         td->bs = strtoul(string, NULL, 10);
736                         td->bs <<= 10;
737                 }
738
739                 c = strstr(p, "direct=");
740                 if (c) {
741                         c += 7;
742                         if (*c != '0')
743                                 td->odirect = 1;
744                         else
745                                 td->odirect = 0;
746                 }
747
748                 c = strstr(p, "delay=");
749                 if (c) {
750                         c += 6;
751                         fill_option(c, string);
752                         td->delay_sleep = strtoul(string, NULL, 10);
753                 }
754
755                 c = strstr(p, "rate=");
756                 if (c) {
757                         c += 5;
758                         fill_option(c, string);
759                         td->rate = strtoul(string, NULL, 10);
760                 }
761
762                 c = strstr(p, "ratemin=");
763                 if (c) {
764                         c += 8;
765                         fill_option(c, string);
766                         td->ratemin = strtoul(string, NULL, 10);
767                 }
768
769                 c = strstr(p, "ratecycle=");
770                 if (c) {
771                         c += 10;
772                         fill_option(c, string);
773                         td->ratecycle = strtoul(string, NULL, 10);
774                 }
775
776                 c = strstr(p, "cpumask=");
777                 if (c) {
778                         c += 8;
779                         fill_option(c, string);
780                         cpu = strtoul(string, NULL, 10);
781                         fill_cpu_mask(td->cpumask, cpu);
782                 }
783
784                 c = strstr(p, "fsync=");
785                 if (c) {
786                         c += 6;
787                         fill_option(c, string);
788                         td->fsync_blocks = strtoul(string, NULL, 10);
789                 }
790
791                 c = strstr(p, "startdelay=");
792                 if (c) {
793                         c += 11;
794                         fill_option(c, string);
795                         td->start_delay = strtoul(string, NULL, 10);
796                 }
797
798                 c = strstr(p, "random");
799                 if (c)
800                         td->sequential = 0;
801                 c = strstr(p, "sequential");
802                 if (c)
803                         td->sequential = 1;
804
805                 if (add_job(td, filename, prioclass, prio))
806                         put_job(td);
807         }
808
809         free(string);
810         free(filename);
811 }
812
813 static int check_int(char *p, char *name, unsigned int *val)
814 {
815         char str[128];
816
817         sprintf(str, "%s=%%d", name);
818         if (sscanf(p, str, val) == 1)
819                 return 0;
820
821         sprintf(str, "%s = %%d", name);
822         if (sscanf(p, str, val) == 1)
823                 return 0;
824
825         return 1;
826 }
827
828 static int is_empty(char *line)
829 {
830         unsigned int i;
831
832         for (i = 0; i < strlen(line); i++)
833                 if (!isspace(line[i]) && !iscntrl(line[i]))
834                         return 0;
835
836         return 1;
837 }
838
839 static int parse_jobs_ini(char *file)
840 {
841         unsigned int prioclass, prio, cpu;
842         struct thread_data *td;
843         char *string, *name;
844         fpos_t off;
845         FILE *f;
846         char *p;
847
848         f = fopen(file, "r");
849         if (!f) {
850                 perror("fopen");
851                 return 1;
852         }
853
854         string = malloc(4096);
855         name = malloc(256);
856
857         while ((p = fgets(string, 4096, f)) != NULL) {
858                 if (sscanf(p, "[%s]", name) != 1)
859                         continue;
860
861                 name[strlen(name) - 1] = '\0';
862
863                 td = get_new_job();
864                 if (!td)
865                         break;
866
867                 prioclass = 2;
868                 prio = 4;
869
870                 fgetpos(f, &off);
871                 while ((p = fgets(string, 4096, f)) != NULL) {
872                         if (is_empty(p))
873                                 break;
874                         if (!check_int(p, "bs", &td->bs)) {
875                                 td->bs <<= 10;
876                                 fgetpos(f, &off);
877                                 continue;
878                         }
879                         if (!check_int(p, "rw", &td->ddir)) {
880                                 fgetpos(f, &off);
881                                 continue;
882                         }
883                         if (!check_int(p, "prio", &prio)) {
884                                 fgetpos(f, &off);
885                                 continue;
886                         }
887                         if (!check_int(p, "prioclass", &prioclass)) {
888                                 fgetpos(f, &off);
889                                 continue;
890                         }
891                         if (!check_int(p, "direct", &td->odirect)) {
892                                 fgetpos(f, &off);
893                                 continue;
894                         }
895                         if (!check_int(p, "rate", &td->rate)) {
896                                 fgetpos(f, &off);
897                                 continue;
898                         }
899                         if (!check_int(p, "ratemin", &td->ratemin)) {
900                                 fgetpos(f, &off);
901                                 continue;
902                         }
903                         if (!check_int(p, "ratecycle", &td->ratecycle)) {
904                                 fgetpos(f, &off);
905                                 continue;
906                         }
907                         if (!check_int(p, "delay", &td->delay_sleep)) {
908                                 fgetpos(f, &off);
909                                 continue;
910                         }
911                         if (!check_int(p, "cpumask", &cpu)) {
912                                 fill_cpu_mask(td->cpumask, cpu);
913                                 fgetpos(f, &off);
914                                 continue;
915                         }
916                         if (!check_int(p, "fsync", &td->fsync_blocks)) {
917                                 fgetpos(f, &off);
918                                 continue;
919                         }
920                         if (!check_int(p, "startdelay", &td->start_delay)) {
921                                 fgetpos(f, &off);
922                                 continue;
923                         }
924                         if (!strcmp(p, "sequential")) {
925                                 td->sequential = 1;
926                                 fgetpos(f, &off);
927                                 continue;
928                         }
929                         if (!strcmp(p, "random")) {
930                                 td->sequential = 0;
931                                 fgetpos(f, &off);
932                                 continue;
933                         }
934                 }
935                 fsetpos(f, &off);
936
937                 if (add_job(td, name, prioclass, prio))
938                         put_job(td);
939         }
940
941         free(string);
942         free(name);
943         return 0;
944 }
945
946 static int parse_options(int argc, char *argv[])
947 {
948         int i;
949
950         for (i = 1; i < argc; i++) {
951                 char *parm = argv[i];
952
953                 if (parm[0] != '-')
954                         break;
955
956                 parm++;
957                 switch (*parm) {
958                         case 's':
959                                 parm++;
960                                 sequential = !!atoi(parm);
961                                 break;
962                         case 'b':
963                                 parm++;
964                                 global_bs = atoi(parm);
965                                 global_bs <<= 10;
966                                 if (!global_bs) {
967                                         printf("bad block size\n");
968                                         global_bs = DEF_BS;
969                                 }
970                                 break;
971                         case 't':
972                                 parm++;
973                                 timeout = atoi(parm);
974                                 break;
975                         case 'w':
976                                 parm++;
977                                 write_stat = !!atoi(parm);
978                                 break;
979                         case 'r':
980                                 parm++;
981                                 repeatable = !!atoi(parm);
982                                 break;
983                         case 'R':
984                                 parm++;
985                                 rate_quit = !!atoi(parm);
986                                 break;
987                         case 'o':
988                                 parm++;
989                                 odirect = !!atoi(parm);
990                                 break;
991                         case 'f':
992                                 if (i + 1 >= argc) {
993                                         printf("-f needs file as arg\n");
994                                         break;
995                                 }
996                                 ini_file = strdup(argv[i+1]);
997                                 break;
998                         default:
999                                 printf("bad option %s\n", argv[i]);
1000                                 break;
1001                 }
1002         }
1003
1004         return i;
1005 }
1006
1007 static void reap_threads(int *nr_running, int *t_rate, int *m_rate)
1008 {
1009         int i;
1010
1011         for (i = 0; i < thread_number; i++) {
1012                 struct thread_data *td = &threads[i];
1013
1014                 if (td->runstate != TD_EXITED)
1015                         continue;
1016
1017                 td->runstate = TD_REAPED;
1018                 waitpid(td->pid, NULL, 0);
1019                 (*nr_running)--;
1020                 (*m_rate) -= td->ratemin;
1021                 (*t_rate) -= td->rate;
1022                 printf("Threads now running: %d", *nr_running);
1023                 if (*m_rate || *t_rate)
1024                         printf(", rate %d/%dKiB/sec", *t_rate, *m_rate);
1025                 printf("\n");
1026         }
1027 }
1028
1029 static void run_threads(char *argv[])
1030 {
1031         struct timeval genesis, now;
1032         struct thread_data *td;
1033         unsigned long spent;
1034         int i, todo, nr_running, m_rate, t_rate;
1035
1036         gettimeofday(&genesis, NULL);
1037
1038         printf("Starting %d threads\n", thread_number);
1039         fflush(stdout);
1040
1041         if (timeout) {
1042                 signal(SIGALRM, sig_handler);
1043                 alarm(timeout);
1044         }
1045
1046         todo = thread_number;
1047         nr_running = 0;
1048         m_rate = t_rate = 0;
1049
1050         while (todo) {
1051                 for (i = 0; i < thread_number; i++) {
1052                         td = &threads[i];
1053
1054                         if (td->runstate != TD_NOT_CREATED)
1055                                 continue;
1056
1057                         /*
1058                          * never got a chance to start, killed by other
1059                          * thread for some reason
1060                          */
1061                         if (td->terminate) {
1062                                 todo--;
1063                                 continue;
1064                         }
1065
1066                         if (td->start_delay) {
1067                                 gettimeofday(&now, NULL);
1068                                 spent = mtime_since(&genesis, &now);
1069
1070                                 if (td->start_delay * 1000 > spent)
1071                                         continue;
1072                         }
1073
1074                         td->runstate = TD_CREATED;
1075                         sem_init(&startup_sem, 1, 1);
1076                         todo--;
1077
1078                         if (fork())
1079                                 sem_wait(&startup_sem);
1080                         else {
1081                                 thread_main(shm_id, i, argv);
1082                                 exit(0);
1083                         }
1084                 }
1085
1086                 for (i = 0; i < thread_number; i++) {
1087                         struct thread_data *td = &threads[i];
1088
1089                         if (td->runstate == TD_CREATED) {
1090                                 td->runstate = TD_STARTED;
1091                                 nr_running++;
1092                                 m_rate += td->ratemin;
1093                                 t_rate += td->rate;
1094                                 sem_post(&td->mutex);
1095
1096                                 printf("Threads now running: %d", nr_running);
1097                                 if (m_rate || t_rate)
1098                                         printf(", rate %d/%dKiB/sec", t_rate, m_rate);
1099                                 printf("\n");
1100                         }
1101                 }
1102
1103                 reap_threads(&nr_running, &t_rate, &m_rate);
1104
1105                 if (todo)
1106                         usleep(100000);
1107         }
1108
1109         while (nr_running) {
1110                 reap_threads(&nr_running, &t_rate, &m_rate);
1111                 usleep(10000);
1112         }
1113 }
1114
1115 int main(int argc, char *argv[])
1116 {
1117         static unsigned long max_run[2], min_run[2], total_blocks[2];
1118         static unsigned long max_bw[2], min_bw[2], maxl[2], minl[2];
1119         static unsigned long read_mb, write_mb, read_agg, write_agg;
1120         int i;
1121
1122         shm_id = shmget(0, MAX_JOBS * sizeof(struct thread_data), IPC_CREAT | 0600);
1123         if (shm_id == -1) {
1124                 perror("shmget");
1125                 return 1;
1126         }
1127
1128         threads = shmat(shm_id, NULL, 0);
1129         if (threads == (void *) -1 ) {
1130                 perror("shmat");
1131                 return 1;
1132         }
1133
1134         atexit(free_shm);
1135
1136         if (sched_getaffinity(getpid(), sizeof(def_cpumask), &def_cpumask) == -1) {
1137                 perror("sched_getaffinity");
1138                 return 1;
1139         }
1140
1141         i = parse_options(argc, argv);
1142
1143         if (ini_file) {
1144                 if (parse_jobs_ini(ini_file))
1145                         return 1;
1146         } else
1147                 parse_jobs_cmd(argc, argv, i);
1148
1149         if (!thread_number) {
1150                 printf("Nothing to do\n");
1151                 return 1;
1152         }
1153
1154         printf("%s: %s, bs=%uKiB, timeo=%u, write_stat=%u, odirect=%d\n", argv[0], sequential ? "sequential" : "random", global_bs >> 10, timeout, write_stat, odirect);
1155
1156         run_threads(argv);
1157
1158         min_bw[0] = min_run[0] = ~0UL;
1159         min_bw[1] = min_run[1] = ~0UL;
1160         minl[0] = minl[1] = ~0UL;
1161         for (i = 0; i < thread_number; i++) {
1162                 struct thread_data *td = &threads[i];
1163                 unsigned long bw = 0;
1164
1165                 if (td->error)
1166                         goto show_stat;
1167
1168                 if (td->runtime < min_run[td->ddir])
1169                         min_run[td->ddir] = td->runtime;
1170                 if (td->runtime > max_run[td->ddir])
1171                         max_run[td->ddir] = td->runtime;
1172
1173                 if (td->runtime)
1174                         bw = (td->io_blocks * td->bs) / td->runtime;
1175                 if (bw < min_bw[td->ddir])
1176                         min_bw[td->ddir] = bw;
1177                 if (bw > max_bw[td->ddir])
1178                         max_bw[td->ddir] = bw;
1179                 if (td->max_latency < minl[td->ddir])
1180                         minl[td->ddir] = td->max_latency;
1181                 if (td->max_latency > maxl[td->ddir])
1182                         maxl[td->ddir] = td->max_latency;
1183
1184                 total_blocks[td->ddir] += td->io_blocks;
1185
1186                 if (td->ddir == DDIR_READ) {
1187                         read_mb += (td->bs * td->io_blocks) >> 20;
1188                         if (td->runtime)
1189                                 read_agg += (td->io_blocks * td->bs) / td->runtime;
1190                 }
1191                 if (td->ddir == DDIR_WRITE) {
1192                         write_mb += (td->bs * td->io_blocks) >> 20;
1193                         if (td->runtime)
1194                                 write_agg += (td->io_blocks * td->bs) / td->runtime;
1195                 }
1196
1197 show_stat:
1198                 show_thread_status(td);
1199         }
1200
1201         printf("Run status:\n");
1202         if (max_run[DDIR_READ])
1203                 printf("   READ: io=%luMiB, aggrb=%lu, minl=%lu, maxl=%lu, minb=%lu, maxb=%lu, mint=%lumsec, maxt=%lumsec\n", read_mb, read_agg, minl[0], maxl[0], min_bw[0], max_bw[0], min_run[0], max_run[0]);
1204         if (max_run[DDIR_WRITE])
1205                 printf("  WRITE: io=%luMiB, aggrb=%lu, minl=%lu, maxl=%lu, minb=%lu, maxb=%lu, mint=%lumsec, maxt=%lumsec\n", write_mb, write_agg, minl[1], maxl[1], min_bw[1], max_bw[1], min_run[1], max_run[1]);
1206
1207         return 0;
1208 }