Added fio, the flexible io tester.
[disktools.git] / fio.c
CommitLineData
892199bd
JA
1#include <stdio.h>
2#include <stdlib.h>
3#include <unistd.h>
4#include <fcntl.h>
5#include <string.h>
6#include <errno.h>
7#include <signal.h>
8#include <time.h>
9#include <sys/time.h>
10#include <sys/types.h>
11#include <sys/stat.h>
12#include <sys/wait.h>
13#include <semaphore.h>
14#include <sys/ipc.h>
15#include <sys/shm.h>
16#include <asm/unistd.h>
17
18/*
19 * assume we don't have _get either, if _set isn't defined
20 */
21#ifndef __NR_ioprio_set
22
23#if defined(__i386__)
24#define __NR_ioprio_set 289
25#define __NR_ioprio_get 290
26#elif defined(__powerpc__) || defined(__powerpc64__)
27#define __NR_ioprio_set 273
28#define __NR_ioprio_get 274
29#elif defined(__x86_64__)
30#define __NR_ioprio_set 251
31#define __NR_ioprio_get 252
32#elif defined(__ia64__)
33#define __NR_ioprio_set 1274
34#define __NR_ioprio_get 1275
35#elif defined(__alpha__)
36#define __NR_ioprio_set 442
37#define __NR_ioprio_get 443
38#elif defined(__s390x__) || defined(__s390__)
39#define __NR_ioprio_set 282
40#define __NR_ioprio_get 283
41#else
42#error "Unsupported arch"
43#endif
44
45#endif
46
47static int ioprio_set(int which, int who, int ioprio)
48{
49 return syscall(__NR_ioprio_set, which, who, ioprio);
50}
51
52enum {
53 IOPRIO_WHO_PROCESS = 1,
54 IOPRIO_WHO_PGRP,
55 IOPRIO_WHO_USER,
56};
57
58#define IOPRIO_CLASS_SHIFT 13
59
60#define BS (4096)
61#define MASK (4095)
62
63#define TIMEOUT (30)
64#define MAX_THREADS (32)
65
66#define ALIGN(buf) (((unsigned long) (buf) + MASK) & ~(MASK))
67
68static int sequential = 1;
69static int write_stat = 0;
70static int repeatable = 1;
71static int thread_number;
72static int timeout = TIMEOUT;
73static int odirect = 1;
74static int global_bs = BS;
75
76static int shm_id;
77
78#define DDIR_READ (0)
79#define DDIR_WRITE (1)
80
81struct thread_data {
82 char file_name[256];
83 int thread_number;
84 int error;
85 int fd;
86 int stat_fd;
87 pid_t pid;
88 int terminate;
89 int ddir;
90 int ioprio;
91 int sequential;
92 int bs;
93 int odirect;
94 int delay_sleep;
95 unsigned long max_latency; /* msec */
96 unsigned long min_latency; /* msec */
97 unsigned long runtime; /* sec */
98 unsigned long blocks;
99 unsigned long blocks_read;
100 unsigned long last_block;
101 sem_t mutex;
102 sem_t done_mutex;
103 struct drand48_data random_state;
104
105 /*
106 * bandwidth stat
107 */
108 unsigned long stat_time;
109 unsigned long stat_time_last;
110 unsigned long stat_blocks_last;
111};
112
113static struct thread_data *threads;
114static int thread_ddir[MAX_THREADS];
115
116static sem_t startup_sem;
117
118void sig_handler(int sig)
119{
120 int i;
121
122 for (i = 0; i < thread_number; i++) {
123 struct thread_data *td = &threads[i];
124
125 td->terminate = 1;
126 }
127}
128
129int init_random_state(struct thread_data *td)
130{
131 unsigned long seed = 123;
132
133 if (td->sequential)
134 return 0;
135
136 if (!repeatable) {
137 int fd = open("/dev/random", O_RDONLY);
138
139 if (fd == -1) {
140 td->error = errno;
141 return 1;
142 }
143
144 if (read(fd, &seed, sizeof(seed)) < sizeof(seed)) {
145 td->error = EIO;
146 close(fd);
147 return 1;
148 }
149
150 close(fd);
151 }
152
153 srand48_r(seed, &td->random_state);
154 return 0;
155}
156
157void shutdown_stat_file(struct thread_data *td)
158{
159 if (td->stat_fd != -1) {
160 fsync(td->stat_fd);
161 close(td->stat_fd);
162 }
163}
164
165int init_stat_file(struct thread_data *td)
166{
167 char *n;
168
169 if (!write_stat)
170 return 0;
171
172 n = malloc(256);
173 sprintf(n, "%s.stat", td->file_name);
174 td->stat_fd = open(n, O_WRONLY | O_CREAT | O_TRUNC, 0644);
175 if (td->stat_fd == -1) {
176 free(n);
177 td->error = errno;
178 return 1;
179 }
180
181 free(n);
182 return 0;
183}
184
185unsigned long utime_since(struct timeval *s, struct timeval *e)
186{
187 double sec, usec;
188
189 sec = e->tv_sec - s->tv_sec;
190 usec = e->tv_usec - s->tv_usec;
191 if (sec > 0 && usec < 0) {
192 sec--;
193 usec += 1000000;
194 }
195
196 sec *= (double) 1000000;
197
198 return sec + usec;
199}
200
201unsigned long mtime_since(struct timeval *s, struct timeval *e)
202{
203 double sec, usec;
204
205 sec = e->tv_sec - s->tv_sec;
206 usec = e->tv_usec - s->tv_usec;
207 if (sec > 0 && usec < 0) {
208 sec--;
209 usec += 1000000;
210 }
211
212 sec *= (double) 1000;
213 usec /= (double) 1000;
214
215 return sec + usec;
216}
217
218unsigned long time_since(struct timeval *s, struct timeval *e)
219{
220 double sec, usec, ret;
221
222 sec = e->tv_sec - s->tv_sec;
223 usec = e->tv_usec - s->tv_usec;
224 if (sec > 0 && usec < 0) {
225 sec--;
226 usec += 1000000;
227 }
228
229 ret = sec + usec / (double) 1000000;
230 if (ret < 0)
231 ret = 0;
232
233 return (unsigned long) ret;
234}
235
236unsigned long get_next_offset(struct thread_data *td)
237{
238 unsigned long b;
239 long r;
240
241 if (!td->sequential) {
242 lrand48_r(&td->random_state, &r);
243 b = (1+(double) (td->blocks-1) * r / (RAND_MAX+1.0));
244 } else {
245 b = td->last_block;
246 td->last_block++;
247 }
248
249 return b * td->bs;
250}
251
252void add_stat_sample(struct thread_data *td, unsigned long block, unsigned long msec)
253{
254 char sample[256];
255
256 if (!td->stat_fd)
257 return;
258
259#if 0
260 sprintf(sample, "%lu, %lu\n", td->blocks_read, msec);
261 write(td->stat_fd, sample, strlen(sample));
262#else
263 td->stat_time += msec;
264 td->stat_time_last += msec;
265 td->stat_blocks_last++;
266
267 if (td->stat_time_last >= 500) {
268 unsigned long rate = td->stat_blocks_last * td->bs / (td->stat_time_last);
269
270 td->stat_time_last = 0;
271 td->stat_blocks_last = 0;
272 sprintf(sample, "%lu, %lu\n", td->stat_time, rate);
273 //sprintf(sample, "%lu, %lu\n", td->blocks_read, msec);
274 write(td->stat_fd, sample, strlen(sample));
275 }
276#endif
277}
278
279void delay_sleep(int usec)
280{
281 unsigned long since;
282 struct timeval start, end;
283
284 gettimeofday(&start, NULL);
285
286 do {
287 gettimeofday(&end, NULL);
288
289 since = utime_since(&start, &end);
290 if (since >= usec)
291 break;
292 } while (1);
293}
294
295void do_thread_io(struct thread_data *td)
296{
297 struct timeval s, e, start;
298 char *buffer, *ptr;
299 unsigned long blocks, msec;
300
301 ptr = malloc(td->bs+MASK);
302 buffer = (char *) ALIGN(ptr);
303
304 gettimeofday(&start, NULL);
305
306 for (blocks = 0; blocks < td->blocks; blocks++) {
307 off_t offset = get_next_offset(td);
308 int ret;
309
310 if (td->terminate)
311 break;
312
313 if (lseek(td->fd, offset, SEEK_SET) == -1) {
314 td->error = errno;
315 break;
316 }
317
318 if (td->delay_sleep)
319 delay_sleep(td->delay_sleep);
320
321 gettimeofday(&s, NULL);
322
323 if (td->ddir == DDIR_READ)
324 ret = read(td->fd, buffer, td->bs);
325 else
326 ret = write(td->fd, buffer, td->bs);
327
328 gettimeofday(&e, NULL);
329
330 if (ret < td->bs) {
331 if (ret == -1)
332 td->error = errno;
333 break;
334 }
335
336 msec = mtime_since(&s, &e);
337
338 add_stat_sample(td, offset / td->bs, msec);
339
340 td->blocks_read++;
341
342 //if (td->ddir == DDIR_WRITE && !(td->blocks_read % 512))
343 // fsync(td->fd);
344
345 if (msec < td->min_latency)
346 td->min_latency = msec;
347 if (msec > td->max_latency)
348 td->max_latency = msec;
349 }
350
351 if (td->ddir == DDIR_WRITE && !td->odirect)
352 fsync(td->fd);
353
354 gettimeofday(&e, NULL);
355 td->runtime = mtime_since(&start, &e);
356
357 free(ptr);
358}
359
360void *thread_main(int shm_id, int offset, char *argv[])
361{
362 struct thread_data *td;
363 void *data;
364 struct stat *statbuf = NULL;
365 int ret = 1, flags;
366
367 data = shmat(shm_id, NULL, 0);
368 td = data + offset * sizeof(struct thread_data);
369 td->pid = getpid();
370
371 printf("Thread (%s) (pid=%u) (f=%s) started\n", td->ddir == DDIR_READ ? "read" : "write", td->pid, td->file_name);
372 fflush(stdout);
373
374 sprintf(argv[0], "%s%d\n", argv[0], offset);
375
376 if (td->ddir == DDIR_READ)
377 flags = O_RDONLY;
378 else
379 flags = O_WRONLY | O_CREAT | O_TRUNC;
380
381 if (td->odirect)
382 flags |= O_DIRECT;
383
384 td->fd = open(td->file_name, flags);
385 if (td->fd == -1) {
386 td->error = errno;
387 goto err;
388 }
389
390 if (init_random_state(td))
391 goto out;
392 if (init_stat_file(td))
393 goto out;
394
395 if (td->ddir == DDIR_READ) {
396 statbuf = malloc(sizeof(*statbuf));
397 if (fstat(td->fd, statbuf) == -1) {
398 td->error = errno;
399 goto out;
400 }
401
402 td->blocks = statbuf->st_size / td->bs;
403 if (!td->blocks) {
404 td->error = EINVAL;
405 goto out;
406 }
407 } else
408 td->blocks = 1024 * 1024 * 1024 / td->bs;
409
410 if (td->ioprio != -1) {
411 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, td->ioprio) == -1) {
412 td->error = errno;
413 goto out;
414 }
415 }
416
417 sem_post(&startup_sem);
418 sem_wait(&td->mutex);
419 do_thread_io(td);
420 ret = 0;
421
422out:
423 close(td->fd);
424 if (statbuf)
425 free(statbuf);
426 shutdown_stat_file(td);
427err:
428 sem_post(&td->done_mutex);
429 if (ret)
430 sem_post(&startup_sem);
431 shmdt(td);
432 return NULL;
433}
434
435void free_shm(void)
436{
437 shmdt(threads);
438}
439
440void show_thread_status(struct thread_data *td)
441{
442 int prio, prio_class;
443 unsigned long bw = 0;
444
445 if (td->runtime)
446 bw = (td->blocks_read * td->bs) / td->runtime;
447
448 prio = td->ioprio & 0xff;
449 prio_class = td->ioprio >> IOPRIO_CLASS_SHIFT;
450
451 printf("thread%d (%s): err=%2d, prio=%1d/%1d maxl=%5lumsec, io=%6luMiB, bw=%6luKiB/sec\n", td->thread_number, td->ddir == DDIR_READ ? " read": "write", td->error, prio_class, prio, td->max_latency, td->blocks_read * td->bs >> 20, bw);
452}
453
454void usage(char *progname)
455{
456 printf("%s: <-s 0/1> <-b kb> <-t sec> <-w 0/1> <-c r,w,r...> file0... fileN\n", progname);
457}
458
459void add_job(const char *filename, int rw, int bs, int direct, int prio, int random, int delay)
460{
461 struct thread_data *td = &threads[thread_number];
462
463 strcpy(td->file_name, filename);
464 td->thread_number = thread_number + 1;
465 td->stat_fd = -1;
466 sem_init(&td->mutex, 1, 1);
467 sem_init(&td->done_mutex, 1, 0);
468 td->min_latency = 10000000;
469 td->ddir = rw;
470 td->ioprio = prio;
471 td->sequential = !random;
472 td->odirect = direct;
473 td->bs = bs;
474 td->delay_sleep = delay;
475
476 thread_number++;
477}
478
479void fill_option(const char *input, char *output)
480{
481 int i;
482
483 i = 0;
484 while (input[i] != ',' && input[i] != '}' && input[i] != '\0') {
485 output[i] = input[i];
486 i++;
487 }
488
489 output[i] = '\0';
490}
491
492/*
493 * job key words:
494 *
495 * file=
496 * bs=
497 * rw=
498 * direct=
499 */
500int parse_jobs(int argc, char *argv[], int index)
501{
502 int rw, bs, direct, prio, random, prioclass, delay;
503 char *string, *filename, *p, *c;
504 int i;
505
506 string = malloc(256);
507 filename = malloc(256);
508
509 for (i = index; i < argc; i++) {
510 p = argv[i];
511
512 c = strpbrk(p, "{");
513 if (!c)
514 break;
515
516 filename[0] = 0;
517 rw = DDIR_READ;
518 bs = global_bs;
519 direct = 1;
520 prio = 4;
521 random = !sequential;
522 prioclass = 2;
523 delay = 0;
524
525 c = strstr(p, "rw=");
526 if (c) {
527 c += 3;
528 if (*c == '0')
529 rw = DDIR_READ;
530 else
531 rw = DDIR_WRITE;
532 }
533
534 c = strstr(p, "prio=");
535 if (c) {
536 c += 5;
537 prio = *c - '0';
538 }
539
540 c = strstr(p, "prioclass=");
541 if (c) {
542 c += 10;
543 prioclass = *c - '0';
544 }
545
546 c = strstr(p, "file=");
547 if (c) {
548 c += 5;
549 fill_option(c, filename);
550 }
551
552 c = strstr(p, "bs=");
553 if (c) {
554 c += 3;
555 fill_option(c, string);
556 bs = strtoul(string, NULL, 10);
557 bs <<= 10;
558 }
559
560 c = strstr(p, "direct=");
561 if (c) {
562 c += 7;
563 if (*c != '0')
564 direct = 1;
565 else
566 direct = 0;
567 }
568
569 c = strstr(p, "delay=");
570 if (c) {
571 c += 6;
572 fill_option(c, string);
573 delay = strtoul(string, NULL, 10);
574 }
575
576 c = strstr(p, "random");
577 if (c)
578 random = 1;
579 c = strstr(p, "sequential");
580 if (c)
581 random = 0;
582
583 add_job(filename, rw, bs, direct, (prioclass << IOPRIO_CLASS_SHIFT) | prio, random, delay);
584 }
585
586 return thread_number;
587}
588
589int parse_options(int argc, char *argv[])
590{
591 int i, j;
592
593 for (i = 1; i < argc; i++) {
594 char *parm = argv[i];
595
596 if (parm[0] != '-')
597 break;
598
599 parm++;
600 switch (*parm) {
601 case 's':
602 parm++;
603 sequential = !!atoi(parm);
604 break;
605 case 'b':
606 parm++;
607 global_bs = atoi(parm);
608 global_bs <<= 10;
609 break;
610 case 't':
611 parm++;
612 timeout = atoi(parm);
613 break;
614 case 'w':
615 parm++;
616 write_stat = !!atoi(parm);
617 break;
618 case 'r':
619 parm++;
620 repeatable = !!atoi(parm);
621 break;
622 case 'c': {
623 char *c;
624 j = 0;
625 parm++;
626 while ((c = strsep(&parm, ",")) != NULL) {
627 int rw = DDIR_READ;
628
629 if (*c == '1')
630 rw = DDIR_WRITE;
631
632 thread_ddir[j] = rw;
633 j++;
634 }
635 break;
636 }
637 case 'o':
638 parm++;
639 odirect = !!atoi(parm);
640 break;
641 default:
642 printf("bad option %s\n", argv[1]);
643 break;
644 }
645 }
646
647 if (global_bs <= 0)
648 global_bs = BS;
649 if (timeout <= 0)
650 timeout = TIMEOUT;
651
652 printf("%s: %s, bs=%uKiB, timeo=%u, write_stat=%u, odirect=%d\n", argv[0], sequential ? "sequential" : "random", global_bs >> 10, timeout, write_stat, odirect);
653 return i;
654}
655
656int main(int argc, char *argv[])
657{
658 static unsigned long max_run[2], min_run[2], total_blocks[2];
659 static unsigned long max_bw[2], min_bw[2], maxl[2], minl[2];
660 static unsigned long read_mb, write_mb, read_agg, write_agg;
661 int i, jobs;
662
663 if (argc - 1 > MAX_THREADS) {
664 printf("max %d threads\n", MAX_THREADS);
665 return 1;
666 }
667
668 shm_id = shmget(0, (argc - 1) * sizeof(struct thread_data), IPC_CREAT);
669 if (shm_id == -1) {
670 perror("shmget");
671 return 1;
672 }
673
674 threads = shmat(shm_id, NULL, 0);
675
676 atexit(free_shm);
677
678 for (i = 0; i < MAX_THREADS; i++)
679 thread_ddir[i] = DDIR_READ;
680
681 i = parse_options(argc, argv);
682 jobs = parse_jobs(argc, argv, i);
683
684 if (!jobs) {
685 printf("Nothing to do\n");
686 return 1;
687 }
688
689 for (i = 0; i < jobs; i++) {
690 sem_init(&startup_sem, 1, 1);
691
692 if (fork())
693 sem_wait(&startup_sem);
694 else {
695 thread_main(shm_id, i, argv);
696 exit(0);
697 }
698 }
699
700 if (!thread_number) {
701 usage(argv[0]);
702 return 1;
703 }
704
705 signal(SIGALRM, sig_handler);
706 alarm(timeout);
707
708 printf("Starting %d threads\n", thread_number);
709 for (i = 0; i < thread_number; i++) {
710 struct thread_data *td = &threads[i];
711
712 sem_post(&td->mutex);
713 }
714
715 for (i = 0; i < thread_number; i++) {
716 struct thread_data *td = &threads[i];
717
718 waitpid(td->pid, NULL, 0);
719 }
720
721 min_bw[0] = min_run[0] = ~0UL;
722 min_bw[1] = min_run[1] = ~0UL;
723 minl[0] = minl[1] = ~0UL;
724 for (i = 0; i < thread_number; i++) {
725 struct thread_data *td = &threads[i];
726 unsigned long bw = 0;
727
728 if (td->error)
729 continue;
730
731 if (td->runtime < min_run[td->ddir])
732 min_run[td->ddir] = td->runtime;
733 if (td->runtime > max_run[td->ddir])
734 max_run[td->ddir] = td->runtime;
735
736 if (td->runtime)
737 bw = (td->blocks_read * td->bs) / td->runtime;
738 if (bw < min_bw[td->ddir])
739 min_bw[td->ddir] = bw;
740 if (bw > max_bw[td->ddir])
741 max_bw[td->ddir] = bw;
742 if (td->max_latency < minl[td->ddir])
743 minl[td->ddir] = td->max_latency;
744 if (td->max_latency > maxl[td->ddir])
745 maxl[td->ddir] = td->max_latency;
746
747 total_blocks[td->ddir] += td->blocks_read;
748
749 if (td->ddir == DDIR_READ) {
750 read_mb += (td->bs * td->blocks_read) >> 20;
751 if (td->runtime)
752 read_agg += (td->blocks_read * td->bs) / td->runtime;
753 }
754 if (td->ddir == DDIR_WRITE) {
755 write_mb += (td->bs * td->blocks_read) >> 20;
756 if (td->runtime)
757 write_agg += (td->blocks_read * td->bs) / td->runtime;
758 }
759
760 show_thread_status(td);
761 }
762
763 printf("Run status:\n");
764 if (max_run[DDIR_READ])
765 printf(" READ: io=%luMiB, aggrb=%lu, minl=%lu, maxl=%lu, minb=%lu, maxb=%lu, mint=%lumsec, maxt=%lumsec\n", read_mb, read_agg, minl[0], maxl[0], min_bw[0], max_bw[0], min_run[0], max_run[0]);
766 if (max_run[DDIR_WRITE])
767 printf(" WRITE: io=%luMiB, aggrb=%lu, minl=%lu, maxl=%lu, minb=%lu, maxb=%lu, mint=%lumsec, maxt=%lumsec\n", write_mb, write_agg, minl[1], maxl[1], min_bw[1], max_bw[1], min_run[1], max_run[1]);
768 return 0;
769}