2 * fio - the flexible io tester
4 * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
32 #include <sys/types.h>
35 #include <semaphore.h>
38 #include <asm/unistd.h>
41 * assume we don't have _get either, if _set isn't defined
43 #ifndef __NR_ioprio_set
46 #define __NR_ioprio_set 289
47 #define __NR_ioprio_get 290
48 #elif defined(__powerpc__) || defined(__powerpc64__)
49 #define __NR_ioprio_set 273
50 #define __NR_ioprio_get 274
51 #elif defined(__x86_64__)
52 #define __NR_ioprio_set 251
53 #define __NR_ioprio_get 252
54 #elif defined(__ia64__)
55 #define __NR_ioprio_set 1274
56 #define __NR_ioprio_get 1275
57 #elif defined(__alpha__)
58 #define __NR_ioprio_set 442
59 #define __NR_ioprio_get 443
60 #elif defined(__s390x__) || defined(__s390__)
61 #define __NR_ioprio_set 282
62 #define __NR_ioprio_get 283
64 #error "Unsupported arch"
69 static int ioprio_set(int which, int who, int ioprio)
71 return syscall(__NR_ioprio_set, which, who, ioprio);
75 IOPRIO_WHO_PROCESS = 1,
80 #define IOPRIO_CLASS_SHIFT 13
87 #define ALIGN(buf) (((unsigned long) (buf) + MASK) & ~(MASK))
89 static int sequential = 1;
90 static int write_stat = 0;
91 static int repeatable = 1;
92 static int thread_number;
93 static int timeout = TIMEOUT;
94 static int odirect = 1;
95 static int global_bs = BS;
96 static char *ini_file;
100 static cpu_set_t def_cpumask;
102 #define DDIR_READ (0)
103 #define DDIR_WRITE (1)
122 unsigned int rate_usec_cycle;
123 unsigned int rate_pending_usleep;
125 unsigned long max_latency; /* msec */
126 unsigned long min_latency; /* msec */
127 unsigned long runtime; /* sec */
128 unsigned long blocks;
129 unsigned long blocks_read;
130 unsigned long last_block;
133 struct drand48_data random_state;
138 unsigned long stat_time;
139 unsigned long stat_time_last;
140 unsigned long stat_blocks_last;
143 static struct thread_data *threads;
145 static sem_t startup_sem;
147 void sig_handler(int sig)
151 for (i = 0; i < thread_number; i++) {
152 struct thread_data *td = &threads[i];
158 int init_random_state(struct thread_data *td)
160 unsigned long seed = 123;
166 int fd = open("/dev/random", O_RDONLY);
173 if (read(fd, &seed, sizeof(seed)) < (int) sizeof(seed)) {
182 srand48_r(seed, &td->random_state);
186 void shutdown_stat_file(struct thread_data *td)
188 if (td->stat_fd != -1) {
194 int init_stat_file(struct thread_data *td)
202 sprintf(n, "%s.stat", td->file_name);
203 td->stat_fd = open(n, O_WRONLY | O_CREAT | O_TRUNC, 0644);
204 if (td->stat_fd == -1) {
214 unsigned long utime_since(struct timeval *s, struct timeval *e)
218 sec = e->tv_sec - s->tv_sec;
219 usec = e->tv_usec - s->tv_usec;
220 if (sec > 0 && usec < 0) {
225 sec *= (double) 1000000;
230 unsigned long mtime_since(struct timeval *s, struct timeval *e)
234 sec = e->tv_sec - s->tv_sec;
235 usec = e->tv_usec - s->tv_usec;
236 if (sec > 0 && usec < 0) {
241 sec *= (double) 1000;
242 usec /= (double) 1000;
247 unsigned long time_since(struct timeval *s, struct timeval *e)
249 double sec, usec, ret;
251 sec = e->tv_sec - s->tv_sec;
252 usec = e->tv_usec - s->tv_usec;
253 if (sec > 0 && usec < 0) {
258 ret = sec + usec / (double) 1000000;
262 return (unsigned long) ret;
265 unsigned long get_next_offset(struct thread_data *td)
270 if (!td->sequential) {
271 lrand48_r(&td->random_state, &r);
272 b = (1+(double) (td->blocks-1) * r / (RAND_MAX+1.0));
281 void add_stat_sample(struct thread_data *td, unsigned long msec)
289 sprintf(sample, "%lu, %lu\n", td->blocks_read, msec);
290 write(td->stat_fd, sample, strlen(sample));
292 td->stat_time += msec;
293 td->stat_time_last += msec;
294 td->stat_blocks_last++;
296 if (td->stat_time_last >= 500) {
297 unsigned long rate = td->stat_blocks_last * td->bs / (td->stat_time_last);
299 td->stat_time_last = 0;
300 td->stat_blocks_last = 0;
301 sprintf(sample, "%lu, %lu\n", td->stat_time, rate);
302 //sprintf(sample, "%lu, %lu\n", td->blocks_read, msec);
303 write(td->stat_fd, sample, strlen(sample));
308 void usec_sleep(int usec)
310 struct timespec req = { .tv_sec = 0, .tv_nsec = usec * 1000 };
314 rem.tv_sec = rem.tv_nsec = 0;
315 nanosleep(&req, &rem);
319 req.tv_nsec = rem.tv_nsec;
323 void rate_throttle(struct thread_data *td, unsigned long time_spent)
325 if (time_spent < td->rate_usec_cycle) {
326 unsigned long s = td->rate_usec_cycle - time_spent;
328 td->rate_pending_usleep += s;
329 if (td->rate_pending_usleep >= 100000) {
330 usec_sleep(td->rate_pending_usleep);
331 td->rate_pending_usleep = 0;
333 } else if (td->rate_pending_usleep) {
334 long overtime = time_spent - td->rate_usec_cycle;
336 if (overtime > td->rate_pending_usleep)
337 td->rate_pending_usleep = 0;
339 td->rate_pending_usleep -= overtime;
343 void do_thread_io(struct thread_data *td)
345 struct timeval s, e, start;
347 unsigned long blocks, msec, usec;
349 ptr = malloc(td->bs+MASK);
350 buffer = (char *) ALIGN(ptr);
352 gettimeofday(&start, NULL);
354 for (blocks = 0; blocks < td->blocks; blocks++) {
355 off_t offset = get_next_offset(td);
361 if (lseek(td->fd, offset, SEEK_SET) == -1) {
367 usec_sleep(td->delay_sleep);
369 gettimeofday(&s, NULL);
371 if (td->ddir == DDIR_READ)
372 ret = read(td->fd, buffer, td->bs);
374 ret = write(td->fd, buffer, td->bs);
382 gettimeofday(&e, NULL);
384 usec = utime_since(&s, &e);
388 rate_throttle(td, usec);
390 add_stat_sample(td, msec);
394 //if (td->ddir == DDIR_WRITE && !(td->blocks_read % 512))
397 if (msec < td->min_latency)
398 td->min_latency = msec;
399 if (msec > td->max_latency)
400 td->max_latency = msec;
403 if (td->ddir == DDIR_WRITE && !td->odirect)
406 gettimeofday(&e, NULL);
407 td->runtime = mtime_since(&start, &e);
412 void *thread_main(int shm_id, int offset, char *argv[])
414 struct thread_data *td;
416 struct stat *statbuf = NULL;
419 data = shmat(shm_id, NULL, 0);
420 td = data + offset * sizeof(struct thread_data);
425 if (sched_setaffinity(td->pid, sizeof(td->cpumask), &td->cpumask) == -1) {
430 printf("Thread (%s) (pid=%u) (f=%s) started\n", td->ddir == DDIR_READ ? "read" : "write", td->pid, td->file_name);
433 sprintf(argv[0], "%s%d\n", argv[0], offset);
439 if (td->ddir == DDIR_READ)
440 td->fd = open(td->file_name, flags | O_RDONLY);
442 td->fd = open(td->file_name, flags | O_WRONLY | O_CREAT | O_TRUNC, 0644);
449 if (init_random_state(td))
451 if (init_stat_file(td))
454 if (td->ddir == DDIR_READ) {
455 statbuf = malloc(sizeof(*statbuf));
456 if (fstat(td->fd, statbuf) == -1) {
461 td->blocks = statbuf->st_size / td->bs;
467 td->blocks = 1024 * 1024 * 1024 / td->bs;
469 if (td->ioprio != -1) {
470 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, td->ioprio) == -1) {
476 sem_post(&startup_sem);
477 sem_wait(&td->mutex);
484 shutdown_stat_file(td);
488 sem_post(&td->done_mutex);
490 sem_post(&startup_sem);
500 void show_thread_status(struct thread_data *td)
502 int prio, prio_class;
503 unsigned long bw = 0;
506 bw = (td->blocks_read * td->bs) / td->runtime;
508 prio = td->ioprio & 0xff;
509 prio_class = td->ioprio >> IOPRIO_CLASS_SHIFT;
511 printf("thread%d (%s): err=%2d, prio=%1d/%1d maxl=%5lumsec, io=%6luMiB, bw=%6luKiB/sec\n", td->thread_number, td->ddir == DDIR_READ ? " read": "write", td->error, prio_class, prio, td->max_latency, td->blocks_read * td->bs >> 20, bw);
514 void usage(char *progname)
516 printf("%s: <-s 0/1> <-b kb> <-t sec> <-w 0/1> <-c r,w,r...> file0... fileN\n", progname);
519 void setup_rate(struct thread_data *td, int rate)
521 int nr_reads_per_sec = rate * 1024 / td->bs;
524 td->rate_usec_cycle = 1000000 / nr_reads_per_sec;
525 td->rate_pending_usleep = 0;
528 void add_job(const char *filename, int rw, int bs, int direct, int prio, int random, int delay, int rate, cpu_set_t cpumask)
530 struct thread_data *td = &threads[thread_number++];
532 strcpy(td->file_name, filename);
533 td->thread_number = thread_number;
535 sem_init(&td->mutex, 1, 1);
536 sem_init(&td->done_mutex, 1, 0);
537 td->min_latency = 10000000;
540 td->sequential = !random;
541 td->odirect = direct;
543 td->delay_sleep = delay;
544 td->cpumask = cpumask;
547 setup_rate(td, rate);
549 printf("Client%d: file=%s, rw=%d, prio=%d, seq=%d, odir=%d, bs=%d, rate=%d\n", thread_number, filename, rw, prio, !random, direct, bs, rate);
552 static void fill_cpu_mask(cpu_set_t cpumask, int cpu)
558 for (i = 0; i < sizeof(int) * 8; i++) {
560 CPU_SET(i, &cpumask);
564 void fill_option(const char *input, char *output)
569 while (input[i] != ',' && input[i] != '}' && input[i] != '\0') {
570 output[i] = input[i];
585 int parse_jobs_cmd(int argc, char *argv[], int index)
587 int rw, bs, direct, prio, random, prioclass, delay, rate, cpu;
588 char *string, *filename, *p, *c;
592 string = malloc(256);
593 filename = malloc(256);
595 for (i = index; i < argc; i++) {
607 random = !sequential;
611 memcpy(&cpumask, &def_cpumask, sizeof(cpumask));
613 c = strstr(p, "rw=");
622 c = strstr(p, "prio=");
628 c = strstr(p, "prioclass=");
631 prioclass = *c - '0';
634 c = strstr(p, "file=");
637 fill_option(c, filename);
640 c = strstr(p, "bs=");
643 fill_option(c, string);
644 bs = strtoul(string, NULL, 10);
648 c = strstr(p, "direct=");
657 c = strstr(p, "delay=");
660 fill_option(c, string);
661 delay = strtoul(string, NULL, 10);
664 c = strstr(p, "rate=");
667 fill_option(c, string);
668 rate = strtoul(string, NULL, 10);
671 c = strstr(p, "cpumask=");
674 fill_option(c, string);
675 cpu = strtoul(string, NULL, 10);
676 fill_cpu_mask(cpumask, cpu);
680 c = strstr(p, "random");
683 c = strstr(p, "sequential");
687 add_job(filename, rw, bs, direct, (prioclass << IOPRIO_CLASS_SHIFT) | prio, random, delay, rate, cpumask);
692 return thread_number;
695 int check_int(char *p, char *name, int *val)
699 sprintf(str, "%s=%%d", name);
700 if (sscanf(p, str, val) == 1)
703 sprintf(str, "%s = %%d", name);
704 if (sscanf(p, str, val) == 1)
710 int is_empty(char *line)
714 for (i = 0; i < strlen(line); i++)
715 if (!isspace(line[i]) && !iscntrl(line[i]))
721 int parse_jobs_ini(char *file)
723 int rw, bs, direct, prio, random, prioclass, delay, rate, jobs, cpu;
730 f = fopen(file, "r");
736 string = malloc(4096);
741 while ((p = fgets(string, 4096, f)) != NULL) {
742 if (sscanf(p, "[%s]", name) != 1)
745 name[strlen(name) - 1] = '\0';
751 random = !sequential;
755 memcpy(&cpumask, &def_cpumask, sizeof(cpumask));
759 while ((p = fgets(string, 4096, f)) != NULL) {
762 if (!check_int(p, "bs", &bs)) {
767 if (!check_int(p, "rw", &rw)) {
771 if (!check_int(p, "prio", &prio)) {
775 if (!check_int(p, "prioclass", &prioclass)) {
779 if (!check_int(p, "direct", &direct)) {
783 if (!check_int(p, "rate", &rate)) {
787 if (!check_int(p, "delay", &delay)) {
791 if (!check_int(p, "cpumask", &cpu)) {
792 fill_cpu_mask(cpumask, cpu);
796 if (!strcmp(p, "sequential")) {
801 if (!strcmp(p, "random")) {
809 add_job(name, rw, bs, direct, (prioclass << IOPRIO_CLASS_SHIFT) | prio, random, delay, rate, cpumask);
818 int parse_options(int argc, char *argv[])
822 for (i = 1; i < argc; i++) {
823 char *parm = argv[i];
832 sequential = !!atoi(parm);
836 global_bs = atoi(parm);
841 timeout = atoi(parm);
845 write_stat = !!atoi(parm);
849 repeatable = !!atoi(parm);
853 odirect = !!atoi(parm);
857 printf("-f needs file as arg\n");
860 ini_file = strdup(argv[i+1]);
863 printf("bad option %s\n", argv[i]);
871 int main(int argc, char *argv[])
873 static unsigned long max_run[2], min_run[2], total_blocks[2];
874 static unsigned long max_bw[2], min_bw[2], maxl[2], minl[2];
875 static unsigned long read_mb, write_mb, read_agg, write_agg;
878 if (sched_getaffinity(getpid(), sizeof(def_cpumask), &def_cpumask) == -1) {
879 perror("sched_getaffinity");
883 shm_id = shmget(0, (argc - 1) * sizeof(struct thread_data), IPC_CREAT | 0600);
889 threads = shmat(shm_id, NULL, 0);
890 if (threads == (void *) -1 ) {
897 i = parse_options(argc, argv);
900 jobs = parse_jobs_ini(ini_file);
902 jobs = parse_jobs_cmd(argc, argv, i);
909 printf("%s: %s, bs=%uKiB, timeo=%u, write_stat=%u, odirect=%d\n", argv[0], sequential ? "sequential" : "random", global_bs >> 10, timeout, write_stat, odirect);
912 printf("Nothing to do\n");
915 printf("%d Clients configured\n", jobs);
917 for (i = 0; i < jobs; i++) {
918 sem_init(&startup_sem, 1, 1);
921 sem_wait(&startup_sem);
923 thread_main(shm_id, i, argv);
928 if (!thread_number) {
933 signal(SIGALRM, sig_handler);
936 printf("Starting %d threads\n", thread_number);
937 for (i = 0; i < thread_number; i++) {
938 struct thread_data *td = &threads[i];
940 sem_post(&td->mutex);
943 for (i = 0; i < thread_number; i++) {
944 struct thread_data *td = &threads[i];
946 waitpid(td->pid, NULL, 0);
949 min_bw[0] = min_run[0] = ~0UL;
950 min_bw[1] = min_run[1] = ~0UL;
951 minl[0] = minl[1] = ~0UL;
952 for (i = 0; i < thread_number; i++) {
953 struct thread_data *td = &threads[i];
954 unsigned long bw = 0;
959 if (td->runtime < min_run[td->ddir])
960 min_run[td->ddir] = td->runtime;
961 if (td->runtime > max_run[td->ddir])
962 max_run[td->ddir] = td->runtime;
965 bw = (td->blocks_read * td->bs) / td->runtime;
966 if (bw < min_bw[td->ddir])
967 min_bw[td->ddir] = bw;
968 if (bw > max_bw[td->ddir])
969 max_bw[td->ddir] = bw;
970 if (td->max_latency < minl[td->ddir])
971 minl[td->ddir] = td->max_latency;
972 if (td->max_latency > maxl[td->ddir])
973 maxl[td->ddir] = td->max_latency;
975 total_blocks[td->ddir] += td->blocks_read;
977 if (td->ddir == DDIR_READ) {
978 read_mb += (td->bs * td->blocks_read) >> 20;
980 read_agg += (td->blocks_read * td->bs) / td->runtime;
982 if (td->ddir == DDIR_WRITE) {
983 write_mb += (td->bs * td->blocks_read) >> 20;
985 write_agg += (td->blocks_read * td->bs) / td->runtime;
989 show_thread_status(td);
992 printf("Run status:\n");
993 if (max_run[DDIR_READ])
994 printf(" READ: io=%luMiB, aggrb=%lu, minl=%lu, maxl=%lu, minb=%lu, maxb=%lu, mint=%lumsec, maxt=%lumsec\n", read_mb, read_agg, minl[0], maxl[0], min_bw[0], max_bw[0], min_run[0], max_run[0]);
995 if (max_run[DDIR_WRITE])
996 printf(" WRITE: io=%luMiB, aggrb=%lu, minl=%lu, maxl=%lu, minb=%lu, maxb=%lu, mint=%lumsec, maxt=%lumsec\n", write_mb, write_agg, minl[1], maxl[1], min_bw[1], max_bw[1], min_run[1], max_run[1]);