2 * fio - the flexible io tester
4 * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
32 #include <sys/types.h>
35 #include <semaphore.h>
38 #include <asm/unistd.h>
40 #define MAX_JOBS (1024)
43 * assume we don't have _get either, if _set isn't defined
45 #ifndef __NR_ioprio_set
48 #define __NR_ioprio_set 289
49 #define __NR_ioprio_get 290
50 #elif defined(__powerpc__) || defined(__powerpc64__)
51 #define __NR_ioprio_set 273
52 #define __NR_ioprio_get 274
53 #elif defined(__x86_64__)
54 #define __NR_ioprio_set 251
55 #define __NR_ioprio_get 252
56 #elif defined(__ia64__)
57 #define __NR_ioprio_set 1274
58 #define __NR_ioprio_get 1275
59 #elif defined(__alpha__)
60 #define __NR_ioprio_set 442
61 #define __NR_ioprio_get 443
62 #elif defined(__s390x__) || defined(__s390__)
63 #define __NR_ioprio_set 282
64 #define __NR_ioprio_get 283
66 #error "Unsupported arch"
71 static int ioprio_set(int which, int who, int ioprio)
73 return syscall(__NR_ioprio_set, which, who, ioprio);
77 IOPRIO_WHO_PROCESS = 1,
82 #define IOPRIO_CLASS_SHIFT 13
87 #define DEF_TIMEOUT (30)
88 #define DEF_RATE_CYCLE (1000)
89 #define DEF_ODIRECT (1)
90 #define DEF_SEQUENTIAL (1)
91 #define DEF_WRITESTAT (0)
92 #define DEF_RAND_REPEAT (1)
94 #define ALIGN(buf) (char *) (((unsigned long) (buf) + MASK) & ~(MASK))
96 static int sequential = DEF_SEQUENTIAL;
97 static int write_stat = DEF_WRITESTAT;
98 static int repeatable = DEF_RAND_REPEAT;
99 static int timeout = DEF_TIMEOUT;
100 static int odirect = DEF_ODIRECT;
101 static int global_bs = DEF_BS;
103 static int thread_number;
104 static char *ini_file;
108 static cpu_set_t def_cpumask;
122 volatile int terminate;
123 volatile int started;
126 unsigned int sequential;
128 unsigned int odirect;
129 unsigned int delay_sleep;
130 unsigned int fsync_blocks;
131 unsigned int start_delay;
135 unsigned int ratemin;
136 unsigned int ratecycle;
137 unsigned long rate_usec_cycle;
138 long rate_pending_usleep;
139 unsigned long rate_blocks;
140 struct timeval lastrate;
142 unsigned long max_latency; /* msec */
143 unsigned long min_latency; /* msec */
144 unsigned long runtime; /* sec */
145 unsigned long blocks;
146 unsigned long io_blocks;
147 unsigned long last_block;
149 struct drand48_data random_state;
154 unsigned long stat_time;
155 unsigned long stat_time_last;
156 unsigned long stat_blocks_last;
158 struct timeval start;
161 static struct thread_data *threads;
163 static sem_t startup_sem;
165 static void sig_handler(int sig)
169 for (i = 0; i < thread_number; i++) {
170 struct thread_data *td = &threads[i];
176 static int init_random_state(struct thread_data *td)
178 unsigned long seed = 123;
184 int fd = open("/dev/random", O_RDONLY);
191 if (read(fd, &seed, sizeof(seed)) < (int) sizeof(seed)) {
200 srand48_r(seed, &td->random_state);
204 static void shutdown_stat_file(struct thread_data *td)
206 if (td->stat_fd != -1) {
212 static int init_stat_file(struct thread_data *td)
219 sprintf(n, "%s.stat", td->file_name);
220 td->stat_fd = open(n, O_WRONLY | O_CREAT | O_TRUNC, 0644);
221 if (td->stat_fd == -1) {
229 static unsigned long utime_since(struct timeval *s, struct timeval *e)
233 sec = e->tv_sec - s->tv_sec;
234 usec = e->tv_usec - s->tv_usec;
235 if (sec > 0 && usec < 0) {
240 sec *= (double) 1000000;
245 static unsigned long mtime_since(struct timeval *s, struct timeval *e)
249 sec = e->tv_sec - s->tv_sec;
250 usec = e->tv_usec - s->tv_usec;
251 if (sec > 0 && usec < 0) {
256 sec *= (double) 1000;
257 usec /= (double) 1000;
262 static unsigned long get_next_offset(struct thread_data *td)
267 if (!td->sequential) {
268 lrand48_r(&td->random_state, &r);
269 b = (1+(double) (td->blocks-1) * r / (RAND_MAX+1.0));
278 static void add_stat_sample(struct thread_data *td, unsigned long msec)
286 sprintf(sample, "%lu, %lu\n", td->io_blocks, msec);
287 write(td->stat_fd, sample, strlen(sample));
289 td->stat_time += msec;
290 td->stat_time_last += msec;
291 td->stat_blocks_last++;
293 if (td->stat_time_last >= 500) {
294 unsigned long rate = td->stat_blocks_last * td->bs / (td->stat_time_last);
296 td->stat_time_last = 0;
297 td->stat_blocks_last = 0;
298 sprintf(sample, "%lu, %lu\n", td->stat_time, rate);
299 //sprintf(sample, "%lu, %lu\n", td->io_blocks, msec);
300 write(td->stat_fd, sample, strlen(sample));
305 static void usec_sleep(int usec)
307 struct timespec req = { .tv_sec = 0, .tv_nsec = usec * 1000 };
311 rem.tv_sec = rem.tv_nsec = 0;
312 nanosleep(&req, &rem);
316 req.tv_nsec = rem.tv_nsec;
320 static void rate_throttle(struct thread_data *td, unsigned long time_spent)
325 if (time_spent < td->rate_usec_cycle) {
326 unsigned long s = td->rate_usec_cycle - time_spent;
328 td->rate_pending_usleep += s;
329 if (td->rate_pending_usleep >= 100000) {
330 usec_sleep(td->rate_pending_usleep);
331 td->rate_pending_usleep = 0;
334 long overtime = time_spent - td->rate_usec_cycle;
336 td->rate_pending_usleep -= overtime;
340 static int check_min_rate(struct thread_data *td, struct timeval *now)
342 unsigned long spent = mtime_since(&td->start, now);
346 * allow a 2 second settle period in the beginning
352 * if rate blocks is set, sample is running
354 if (td->rate_blocks) {
355 spent = mtime_since(&td->lastrate, now);
356 if (spent < td->ratecycle)
359 rate = ((td->io_blocks - td->rate_blocks) * td->bs) / spent;
360 if (rate < td->ratemin) {
361 printf("Client%d: min rate %d not met, got %ldKiB/sec\n", td->thread_number, td->ratemin, rate);
366 td->rate_blocks = td->io_blocks;
367 memcpy(&td->lastrate, now, sizeof(*now));
371 #define should_fsync(td) ((td)->ddir == DDIR_WRITE && !(td)->odirect)
373 static void do_thread_io(struct thread_data *td)
377 unsigned long blocks, msec, usec;
379 ptr = malloc(td->bs + MASK);
382 gettimeofday(&td->start, NULL);
385 memcpy(&td->lastrate, &td->start, sizeof(td->start));
387 for (blocks = 0; blocks < td->blocks; blocks++) {
388 off_t offset = get_next_offset(td);
394 if (lseek(td->fd, offset, SEEK_SET) == -1) {
400 usec_sleep(td->delay_sleep);
402 gettimeofday(&s, NULL);
404 if (td->ddir == DDIR_READ)
405 ret = read(td->fd, buffer, td->bs);
407 ret = write(td->fd, buffer, td->bs);
409 if (ret < (int) td->bs) {
417 if (should_fsync(td) && td->fsync_blocks &&
418 (td->io_blocks % td->fsync_blocks) == 0)
421 gettimeofday(&e, NULL);
423 usec = utime_since(&s, &e);
425 rate_throttle(td, usec);
427 if (check_min_rate(td, &e)) {
433 add_stat_sample(td, msec);
435 if (msec < td->min_latency)
436 td->min_latency = msec;
437 if (msec > td->max_latency)
438 td->max_latency = msec;
441 if (should_fsync(td))
444 gettimeofday(&e, NULL);
445 td->runtime = mtime_since(&td->start, &e);
450 static void *thread_main(int shm_id, int offset, char *argv[])
452 struct thread_data *td;
457 data = shmat(shm_id, NULL, 0);
458 td = data + offset * sizeof(struct thread_data);
463 if (sched_setaffinity(td->pid, sizeof(td->cpumask), &td->cpumask) == -1) {
468 printf("Thread (%s) (pid=%u) (f=%s) started\n", td->ddir == DDIR_READ ? "read" : "write", td->pid, td->file_name);
471 sprintf(argv[0], "fio%d", offset);
477 if (td->ddir == DDIR_READ)
478 td->fd = open(td->file_name, flags | O_RDONLY);
480 td->fd = open(td->file_name, flags | O_WRONLY | O_CREAT | O_TRUNC, 0644);
487 if (init_random_state(td))
489 if (init_stat_file(td))
492 if (td->ddir == DDIR_READ) {
493 if (fstat(td->fd, &st) == -1) {
498 td->blocks = st.st_size / td->bs;
504 td->blocks = 1024 * 1024 * 1024 / td->bs;
507 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, td->ioprio) == -1) {
513 sem_post(&startup_sem);
514 sem_wait(&td->mutex);
519 shutdown_stat_file(td);
524 sem_post(&startup_sem);
529 static void free_shm(void)
534 static void show_thread_status(struct thread_data *td)
536 int prio, prio_class;
537 unsigned long bw = 0;
540 bw = (td->io_blocks * td->bs) / td->runtime;
542 prio = td->ioprio & 0xff;
543 prio_class = td->ioprio >> IOPRIO_CLASS_SHIFT;
545 printf("thread%d (%s): err=%2d, prio=%1d/%1d maxl=%5lumsec, io=%6luMiB, bw=%6luKiB/sec\n", td->thread_number, td->ddir == DDIR_READ ? " read": "write", td->error, prio_class, prio, td->max_latency, td->io_blocks * td->bs >> 20, bw);
548 static int setup_rate(struct thread_data *td)
550 int nr_reads_per_sec;
555 if (td->rate < td->ratemin) {
556 fprintf(stderr, "min rate larger than nominal rate\n");
560 nr_reads_per_sec = td->rate * 1024 / td->bs;
561 td->rate_usec_cycle = 1000000 / nr_reads_per_sec;
562 td->rate_pending_usleep = 0;
566 static struct thread_data *get_new_job(void)
568 struct thread_data *td;
570 if (thread_number >= MAX_JOBS)
573 td = &threads[thread_number++];
574 memset(td, 0, sizeof(*td));
576 td->thread_number = thread_number;
577 td->ddir = DDIR_READ;
582 td->ratecycle = DEF_RATE_CYCLE;
583 td->sequential = sequential;
585 memcpy(&td->cpumask, &def_cpumask, sizeof(td->cpumask));
590 static void put_job(struct thread_data *td)
592 memset(&threads[td->thread_number - 1], 0, sizeof(*td));
596 static int add_job(struct thread_data *td, const char *filename, int prioclass,
599 strcpy(td->file_name, filename);
601 sem_init(&td->mutex, 1, 0);
602 td->min_latency = 10000000;
603 td->ioprio = (prioclass << IOPRIO_CLASS_SHIFT) | prio;
608 printf("Client%d: file=%s, rw=%d, prio=%d, seq=%d, odir=%d, bs=%d, rate=%d\n", td->thread_number, filename, td->ddir, td->ioprio, td->sequential, td->odirect, td->bs, td->rate);
612 static void fill_cpu_mask(cpu_set_t cpumask, int cpu)
618 for (i = 0; i < sizeof(int) * 8; i++) {
620 CPU_SET(i, &cpumask);
624 static void fill_option(const char *input, char *output)
629 while (input[i] != ',' && input[i] != '}' && input[i] != '\0') {
630 output[i] = input[i];
645 static void parse_jobs_cmd(int argc, char *argv[], int index)
647 struct thread_data *td;
648 unsigned int prio, prioclass, cpu;
649 char *string, *filename, *p, *c;
652 string = malloc(256);
653 filename = malloc(256);
655 for (i = index; i < argc; i++) {
671 c = strstr(p, "rw=");
675 td->ddir = DDIR_READ;
677 td->ddir = DDIR_WRITE;
680 c = strstr(p, "prio=");
686 c = strstr(p, "prioclass=");
689 prioclass = *c - '0';
692 c = strstr(p, "file=");
695 fill_option(c, filename);
698 c = strstr(p, "bs=");
701 fill_option(c, string);
702 td->bs = strtoul(string, NULL, 10);
706 c = strstr(p, "direct=");
715 c = strstr(p, "delay=");
718 fill_option(c, string);
719 td->delay_sleep = strtoul(string, NULL, 10);
722 c = strstr(p, "rate=");
725 fill_option(c, string);
726 td->rate = strtoul(string, NULL, 10);
729 c = strstr(p, "ratemin=");
732 fill_option(c, string);
733 td->ratemin = strtoul(string, NULL, 10);
736 c = strstr(p, "ratecycle=");
739 fill_option(c, string);
740 td->ratecycle = strtoul(string, NULL, 10);
743 c = strstr(p, "cpumask=");
746 fill_option(c, string);
747 cpu = strtoul(string, NULL, 10);
748 fill_cpu_mask(td->cpumask, cpu);
751 c = strstr(p, "fsync=");
754 fill_option(c, string);
755 td->fsync_blocks = strtoul(string, NULL, 10);
758 c = strstr(p, "startdelay=");
761 fill_option(c, string);
762 td->start_delay = strtoul(string, NULL, 10);
765 c = strstr(p, "random");
768 c = strstr(p, "sequential");
772 if (add_job(td, filename, prioclass, prio))
780 static int check_int(char *p, char *name, unsigned int *val)
784 sprintf(str, "%s=%%d", name);
785 if (sscanf(p, str, val) == 1)
788 sprintf(str, "%s = %%d", name);
789 if (sscanf(p, str, val) == 1)
795 static int is_empty(char *line)
799 for (i = 0; i < strlen(line); i++)
800 if (!isspace(line[i]) && !iscntrl(line[i]))
806 static int parse_jobs_ini(char *file)
808 unsigned int prioclass, prio, cpu;
809 struct thread_data *td;
815 f = fopen(file, "r");
821 string = malloc(4096);
824 while ((p = fgets(string, 4096, f)) != NULL) {
825 if (sscanf(p, "[%s]", name) != 1)
828 name[strlen(name) - 1] = '\0';
838 while ((p = fgets(string, 4096, f)) != NULL) {
841 if (!check_int(p, "bs", &td->bs)) {
846 if (!check_int(p, "rw", &td->ddir)) {
850 if (!check_int(p, "prio", &prio)) {
854 if (!check_int(p, "prioclass", &prioclass)) {
858 if (!check_int(p, "direct", &td->odirect)) {
862 if (!check_int(p, "rate", &td->rate)) {
866 if (!check_int(p, "ratemin", &td->ratemin)) {
870 if (!check_int(p, "ratecycle", &td->ratecycle)) {
874 if (!check_int(p, "delay", &td->delay_sleep)) {
878 if (!check_int(p, "cpumask", &cpu)) {
879 fill_cpu_mask(td->cpumask, cpu);
883 if (!check_int(p, "fsync", &td->fsync_blocks)) {
887 if (!check_int(p, "startdelay", &td->start_delay)) {
891 if (!strcmp(p, "sequential")) {
896 if (!strcmp(p, "random")) {
904 if (add_job(td, name, prioclass, prio))
913 static int parse_options(int argc, char *argv[])
917 for (i = 1; i < argc; i++) {
918 char *parm = argv[i];
927 sequential = !!atoi(parm);
931 global_bs = atoi(parm);
934 printf("bad block size\n");
940 timeout = atoi(parm);
944 write_stat = !!atoi(parm);
948 repeatable = !!atoi(parm);
952 odirect = !!atoi(parm);
956 printf("-f needs file as arg\n");
959 ini_file = strdup(argv[i+1]);
962 printf("bad option %s\n", argv[i]);
970 static void run_threads(char *argv[])
972 struct timeval genesis, now;
973 struct thread_data *td;
977 gettimeofday(&genesis, NULL);
979 printf("Starting %d threads\n", thread_number);
983 signal(SIGALRM, sig_handler);
987 todo = thread_number;
990 for (i = 0; i < thread_number; i++) {
996 if (td->start_delay) {
997 gettimeofday(&now, NULL);
998 spent = mtime_since(&genesis, &now);
1000 if (td->start_delay * 1000 > spent)
1005 sem_init(&startup_sem, 1, 1);
1009 sem_wait(&startup_sem);
1011 thread_main(shm_id, i, argv);
1016 for (i = 0; i < thread_number; i++) {
1017 struct thread_data *td = &threads[i];
1019 if (td->started == 1) {
1021 sem_post(&td->mutex);
1030 int main(int argc, char *argv[])
1032 static unsigned long max_run[2], min_run[2], total_blocks[2];
1033 static unsigned long max_bw[2], min_bw[2], maxl[2], minl[2];
1034 static unsigned long read_mb, write_mb, read_agg, write_agg;
1037 shm_id = shmget(0, MAX_JOBS * sizeof(struct thread_data), IPC_CREAT | 0600);
1043 threads = shmat(shm_id, NULL, 0);
1044 if (threads == (void *) -1 ) {
1051 if (sched_getaffinity(getpid(), sizeof(def_cpumask), &def_cpumask) == -1) {
1052 perror("sched_getaffinity");
1056 i = parse_options(argc, argv);
1059 if (parse_jobs_ini(ini_file))
1062 parse_jobs_cmd(argc, argv, i);
1064 if (!thread_number) {
1065 printf("Nothing to do\n");
1069 printf("%s: %s, bs=%uKiB, timeo=%u, write_stat=%u, odirect=%d\n", argv[0], sequential ? "sequential" : "random", global_bs >> 10, timeout, write_stat, odirect);
1073 for (i = 0; i < thread_number; i++) {
1074 struct thread_data *td = &threads[i];
1076 waitpid(td->pid, NULL, 0);
1079 min_bw[0] = min_run[0] = ~0UL;
1080 min_bw[1] = min_run[1] = ~0UL;
1081 minl[0] = minl[1] = ~0UL;
1082 for (i = 0; i < thread_number; i++) {
1083 struct thread_data *td = &threads[i];
1084 unsigned long bw = 0;
1089 if (td->runtime < min_run[td->ddir])
1090 min_run[td->ddir] = td->runtime;
1091 if (td->runtime > max_run[td->ddir])
1092 max_run[td->ddir] = td->runtime;
1095 bw = (td->io_blocks * td->bs) / td->runtime;
1096 if (bw < min_bw[td->ddir])
1097 min_bw[td->ddir] = bw;
1098 if (bw > max_bw[td->ddir])
1099 max_bw[td->ddir] = bw;
1100 if (td->max_latency < minl[td->ddir])
1101 minl[td->ddir] = td->max_latency;
1102 if (td->max_latency > maxl[td->ddir])
1103 maxl[td->ddir] = td->max_latency;
1105 total_blocks[td->ddir] += td->io_blocks;
1107 if (td->ddir == DDIR_READ) {
1108 read_mb += (td->bs * td->io_blocks) >> 20;
1110 read_agg += (td->io_blocks * td->bs) / td->runtime;
1112 if (td->ddir == DDIR_WRITE) {
1113 write_mb += (td->bs * td->io_blocks) >> 20;
1115 write_agg += (td->io_blocks * td->bs) / td->runtime;
1119 show_thread_status(td);
1122 printf("Run status:\n");
1123 if (max_run[DDIR_READ])
1124 printf(" READ: io=%luMiB, aggrb=%lu, minl=%lu, maxl=%lu, minb=%lu, maxb=%lu, mint=%lumsec, maxt=%lumsec\n", read_mb, read_agg, minl[0], maxl[0], min_bw[0], max_bw[0], min_run[0], max_run[0]);
1125 if (max_run[DDIR_WRITE])
1126 printf(" WRITE: io=%luMiB, aggrb=%lu, minl=%lu, maxl=%lu, minb=%lu, maxb=%lu, mint=%lumsec, maxt=%lumsec\n", write_mb, write_agg, minl[1], maxl[1], min_bw[1], max_bw[1], min_run[1], max_run[1]);