Added fio, the flexible io tester.
authorJens Axboe <axboe@suse.de>
Tue, 4 Oct 2005 07:30:46 +0000 (09:30 +0200)
committerJens Axboe <axboe@suse.de>
Tue, 4 Oct 2005 07:30:46 +0000 (09:30 +0200)
Makefile
fio.c [new file with mode: 0644]

index bf144290dff3020a3ce22be6814d48b83f866b4e..df705c8993eef5ecc4a65b499383852cbc00bf58 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,12 +1,15 @@
 CC     = gcc
 CFLAGS = -Wall -O2 -g -D_GNU_SOURCE
-PROGS  = dops
+PROGS  = dops fio
 
 all: $(PROGS)
 
 dops: dops.o
        $(CC) $(CFLAGS) -o $@ $(filter %.o,$^) -laio
 
+fio: fio.o
+       $(CC) $(CFLAGS) -o $@ $(filter %.o,$^) -lpthread
+
 clean:
        -rm -f *.o $(PROGS)
 
diff --git a/fio.c b/fio.c
new file mode 100644 (file)
index 0000000..8fd3af3
--- /dev/null
+++ b/fio.c
@@ -0,0 +1,769 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <string.h>
+#include <errno.h>
+#include <signal.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <semaphore.h>
+#include <sys/ipc.h>
+#include <sys/shm.h>
+#include <asm/unistd.h>
+
+/*
+ * assume we don't have _get either, if _set isn't defined
+ */
+#ifndef __NR_ioprio_set
+
+#if defined(__i386__)
+#define __NR_ioprio_set                289
+#define __NR_ioprio_get                290
+#elif defined(__powerpc__) || defined(__powerpc64__)
+#define __NR_ioprio_set                273
+#define __NR_ioprio_get                274
+#elif defined(__x86_64__)
+#define __NR_ioprio_set                251
+#define __NR_ioprio_get                252
+#elif defined(__ia64__)
+#define __NR_ioprio_set                1274
+#define __NR_ioprio_get                1275
+#elif defined(__alpha__)
+#define __NR_ioprio_set                442
+#define __NR_ioprio_get                443
+#elif defined(__s390x__) || defined(__s390__)
+#define __NR_ioprio_set                282
+#define __NR_ioprio_get                283
+#else
+#error "Unsupported arch"
+#endif
+
+#endif
+
+static int ioprio_set(int which, int who, int ioprio)
+{
+       return syscall(__NR_ioprio_set, which, who, ioprio);
+}
+
+enum {
+       IOPRIO_WHO_PROCESS = 1,
+       IOPRIO_WHO_PGRP,
+       IOPRIO_WHO_USER,
+};
+
+#define IOPRIO_CLASS_SHIFT     13
+
+#define BS     (4096)
+#define MASK   (4095)
+
+#define TIMEOUT        (30)
+#define MAX_THREADS (32)
+
+#define ALIGN(buf)      (((unsigned long) (buf) + MASK) & ~(MASK))
+
+static int sequential = 1;
+static int write_stat = 0;
+static int repeatable = 1;
+static int thread_number;
+static int timeout = TIMEOUT;
+static int odirect = 1;
+static int global_bs = BS;
+
+static int shm_id;
+
+#define DDIR_READ      (0)
+#define DDIR_WRITE     (1)
+
+struct thread_data {
+       char file_name[256];
+       int thread_number;
+       int error;
+       int fd;
+       int stat_fd;
+       pid_t pid;
+       int terminate;
+       int ddir;
+       int ioprio;
+       int sequential;
+       int bs;
+       int odirect;
+       int delay_sleep;
+       unsigned long max_latency;      /* msec */
+       unsigned long min_latency;      /* msec */
+       unsigned long runtime;          /* sec */
+       unsigned long blocks;
+       unsigned long blocks_read;
+       unsigned long last_block;
+       sem_t mutex;
+       sem_t done_mutex;
+       struct drand48_data random_state;
+
+       /*
+        * bandwidth stat
+        */
+       unsigned long stat_time;
+       unsigned long stat_time_last;
+       unsigned long stat_blocks_last;
+};
+
+static struct thread_data *threads;
+static int thread_ddir[MAX_THREADS];
+
+static sem_t startup_sem;
+
+void sig_handler(int sig)
+{
+       int i;
+
+       for (i = 0; i < thread_number; i++) {
+               struct thread_data *td = &threads[i];
+
+               td->terminate = 1;
+       }
+}
+
+int init_random_state(struct thread_data *td)
+{
+       unsigned long seed = 123;
+
+       if (td->sequential)
+               return 0;
+
+       if (!repeatable) {
+               int fd = open("/dev/random", O_RDONLY);
+
+               if (fd == -1) {
+                       td->error = errno;
+                       return 1;
+               }
+
+               if (read(fd, &seed, sizeof(seed)) < sizeof(seed)) {
+                       td->error = EIO;
+                       close(fd);
+                       return 1;
+               }
+
+               close(fd);
+       }
+
+       srand48_r(seed, &td->random_state);
+       return 0;
+}
+
+void shutdown_stat_file(struct thread_data *td)
+{
+       if (td->stat_fd != -1) {
+               fsync(td->stat_fd);
+               close(td->stat_fd);
+       }
+}
+
+int init_stat_file(struct thread_data *td)
+{
+       char *n;
+
+       if (!write_stat)
+               return 0;
+
+       n = malloc(256);
+       sprintf(n, "%s.stat", td->file_name);
+       td->stat_fd = open(n, O_WRONLY | O_CREAT | O_TRUNC, 0644);
+       if (td->stat_fd == -1) {
+               free(n);
+               td->error = errno;
+               return 1;
+       }
+
+       free(n);
+       return 0;
+}
+
+unsigned long utime_since(struct timeval *s, struct timeval *e)
+{
+       double sec, usec;
+
+       sec = e->tv_sec - s->tv_sec;
+       usec = e->tv_usec - s->tv_usec;
+       if (sec > 0 && usec < 0) {
+               sec--;
+               usec += 1000000;
+       }
+
+       sec *= (double) 1000000;
+
+       return sec + usec;
+}
+
+unsigned long mtime_since(struct timeval *s, struct timeval *e)
+{
+       double sec, usec;
+
+       sec = e->tv_sec - s->tv_sec;
+       usec = e->tv_usec - s->tv_usec;
+       if (sec > 0 && usec < 0) {
+               sec--;
+               usec += 1000000;
+       }
+
+       sec *= (double) 1000;
+       usec /= (double) 1000;
+
+       return sec + usec;
+}
+
+unsigned long time_since(struct timeval *s, struct timeval *e)
+{
+       double sec, usec, ret;
+
+       sec = e->tv_sec - s->tv_sec;
+       usec = e->tv_usec - s->tv_usec;
+       if (sec > 0 && usec < 0) {
+               sec--;
+               usec += 1000000;
+       }
+
+       ret = sec + usec / (double) 1000000;
+       if (ret < 0)
+               ret = 0;
+
+       return (unsigned long) ret;
+}
+
+unsigned long get_next_offset(struct thread_data *td)
+{
+       unsigned long b;
+       long r;
+
+       if (!td->sequential) {
+               lrand48_r(&td->random_state, &r);
+               b = (1+(double) (td->blocks-1) * r / (RAND_MAX+1.0));
+       } else {
+               b = td->last_block;
+               td->last_block++;
+       }
+
+       return b * td->bs;
+}
+
+void add_stat_sample(struct thread_data *td, unsigned long block, unsigned long msec)
+{
+       char sample[256];
+
+       if (!td->stat_fd)
+               return;
+
+#if 0
+       sprintf(sample, "%lu, %lu\n", td->blocks_read, msec);
+       write(td->stat_fd, sample, strlen(sample));
+#else
+       td->stat_time += msec;
+       td->stat_time_last += msec;
+       td->stat_blocks_last++;
+
+       if (td->stat_time_last >= 500) {
+               unsigned long rate = td->stat_blocks_last * td->bs / (td->stat_time_last);
+
+               td->stat_time_last = 0;
+               td->stat_blocks_last = 0;
+               sprintf(sample, "%lu, %lu\n", td->stat_time, rate);
+               //sprintf(sample, "%lu, %lu\n", td->blocks_read, msec);
+               write(td->stat_fd, sample, strlen(sample));
+       }
+#endif
+}
+
+void delay_sleep(int usec)
+{
+       unsigned long since;
+       struct timeval start, end;
+
+       gettimeofday(&start, NULL);
+
+       do {
+               gettimeofday(&end, NULL);
+
+               since = utime_since(&start, &end);
+               if (since >= usec)
+                       break;
+       } while (1);
+}
+
+void do_thread_io(struct thread_data *td)
+{
+       struct timeval s, e, start;
+       char *buffer, *ptr;
+       unsigned long blocks, msec;
+
+       ptr = malloc(td->bs+MASK);
+       buffer = (char *) ALIGN(ptr);
+
+       gettimeofday(&start, NULL);
+
+       for (blocks = 0; blocks < td->blocks; blocks++) {
+               off_t offset = get_next_offset(td);
+               int ret;
+
+               if (td->terminate)
+                       break;
+
+               if (lseek(td->fd, offset, SEEK_SET) == -1) {
+                       td->error = errno;
+                       break;
+               }
+
+               if (td->delay_sleep)
+                       delay_sleep(td->delay_sleep);
+
+               gettimeofday(&s, NULL);
+
+               if (td->ddir == DDIR_READ)
+                       ret = read(td->fd, buffer, td->bs);
+               else
+                       ret = write(td->fd, buffer, td->bs);
+
+               gettimeofday(&e, NULL);
+
+               if (ret < td->bs) {
+                       if (ret == -1)
+                               td->error = errno;
+                       break;
+               }
+
+               msec = mtime_since(&s, &e);
+
+               add_stat_sample(td, offset / td->bs, msec);
+
+               td->blocks_read++;
+
+               //if (td->ddir == DDIR_WRITE && !(td->blocks_read % 512))
+               //      fsync(td->fd);
+
+               if (msec < td->min_latency)
+                       td->min_latency = msec;
+               if (msec > td->max_latency)
+                       td->max_latency = msec;
+       }
+
+       if (td->ddir == DDIR_WRITE && !td->odirect)
+               fsync(td->fd);
+
+       gettimeofday(&e, NULL);
+       td->runtime = mtime_since(&start, &e);
+
+       free(ptr);
+}
+       
+void *thread_main(int shm_id, int offset, char *argv[])
+{
+       struct thread_data *td;
+       void *data;
+       struct stat *statbuf = NULL;
+       int ret = 1, flags;
+
+       data = shmat(shm_id, NULL, 0);
+       td = data + offset * sizeof(struct thread_data);
+       td->pid = getpid();
+
+       printf("Thread (%s) (pid=%u) (f=%s) started\n", td->ddir == DDIR_READ ? "read" : "write", td->pid, td->file_name);
+       fflush(stdout);
+
+       sprintf(argv[0], "%s%d\n", argv[0], offset);
+
+       if (td->ddir == DDIR_READ)
+               flags = O_RDONLY;
+       else
+               flags = O_WRONLY | O_CREAT | O_TRUNC;
+
+       if (td->odirect)
+               flags |= O_DIRECT;
+
+       td->fd = open(td->file_name, flags);
+       if (td->fd == -1) {
+               td->error = errno;
+               goto err;
+       }
+
+       if (init_random_state(td))
+               goto out;
+       if (init_stat_file(td))
+               goto out;
+
+       if (td->ddir == DDIR_READ) {
+               statbuf = malloc(sizeof(*statbuf));
+               if (fstat(td->fd, statbuf) == -1) {
+                       td->error = errno;
+                       goto out;
+               }
+
+               td->blocks = statbuf->st_size / td->bs;
+               if (!td->blocks) {
+                       td->error = EINVAL;
+                       goto out;
+               }
+       } else
+               td->blocks = 1024 * 1024 * 1024 / td->bs;
+
+       if (td->ioprio != -1) {
+               if (ioprio_set(IOPRIO_WHO_PROCESS, 0, td->ioprio) == -1) {
+                       td->error = errno;
+                       goto out;
+               }
+       }
+
+       sem_post(&startup_sem);
+       sem_wait(&td->mutex);
+       do_thread_io(td);
+       ret = 0;
+
+out:
+       close(td->fd);
+       if (statbuf)
+               free(statbuf);
+       shutdown_stat_file(td);
+err:
+       sem_post(&td->done_mutex);
+       if (ret)
+               sem_post(&startup_sem);
+       shmdt(td);
+       return NULL;
+}
+
+void free_shm(void)
+{
+       shmdt(threads);
+}
+
+void show_thread_status(struct thread_data *td)
+{
+       int prio, prio_class;
+       unsigned long bw = 0;
+
+       if (td->runtime)
+               bw = (td->blocks_read * td->bs) / td->runtime;
+
+       prio = td->ioprio & 0xff;
+       prio_class = td->ioprio >> IOPRIO_CLASS_SHIFT;
+
+       printf("thread%d (%s): err=%2d, prio=%1d/%1d maxl=%5lumsec, io=%6luMiB, bw=%6luKiB/sec\n", td->thread_number, td->ddir == DDIR_READ ? " read": "write", td->error, prio_class, prio, td->max_latency, td->blocks_read * td->bs >> 20, bw);
+}
+
+void usage(char *progname)
+{
+       printf("%s: <-s 0/1> <-b kb> <-t sec> <-w 0/1> <-c r,w,r...> file0... fileN\n", progname);
+}
+
+void add_job(const char *filename, int rw, int bs, int direct, int prio, int random, int delay)
+{
+       struct thread_data *td = &threads[thread_number];
+
+       strcpy(td->file_name, filename);
+       td->thread_number = thread_number + 1;
+       td->stat_fd = -1;
+       sem_init(&td->mutex, 1, 1);
+       sem_init(&td->done_mutex, 1, 0);
+       td->min_latency = 10000000;
+       td->ddir = rw;
+       td->ioprio = prio;
+       td->sequential = !random;
+       td->odirect = direct;
+       td->bs = bs;
+       td->delay_sleep = delay;
+
+       thread_number++;
+}
+
+void fill_option(const char *input, char *output)
+{
+       int i;
+
+       i = 0;
+       while (input[i] != ',' && input[i] != '}' && input[i] != '\0') {
+               output[i] = input[i];
+               i++;
+       }
+
+       output[i] = '\0';
+}
+
+/*
+ * job key words:
+ *
+ * file=
+ * bs=
+ * rw=
+ * direct=
+ */
+int parse_jobs(int argc, char *argv[], int index)
+{
+       int rw, bs, direct, prio, random, prioclass, delay;
+       char *string, *filename, *p, *c;
+       int i;
+
+       string = malloc(256);
+       filename = malloc(256);
+
+       for (i = index; i < argc; i++) {
+               p = argv[i];
+
+               c = strpbrk(p, "{");
+               if (!c)
+                       break;
+
+               filename[0] = 0;
+               rw = DDIR_READ;
+               bs = global_bs;
+               direct = 1;
+               prio = 4;
+               random = !sequential;
+               prioclass = 2;
+               delay = 0;
+
+               c = strstr(p, "rw=");
+               if (c) {
+                       c += 3;
+                       if (*c == '0')
+                               rw = DDIR_READ;
+                       else
+                               rw = DDIR_WRITE;
+               }
+
+               c = strstr(p, "prio=");
+               if (c) {
+                       c += 5;
+                       prio = *c - '0';
+               }
+
+               c = strstr(p, "prioclass=");
+               if (c) {
+                       c += 10;
+                       prioclass = *c - '0';
+               }
+
+               c = strstr(p, "file=");
+               if (c) {
+                       c += 5;
+                       fill_option(c, filename);
+               }
+
+               c = strstr(p, "bs=");
+               if (c) {
+                       c += 3;
+                       fill_option(c, string);
+                       bs = strtoul(string, NULL, 10);
+                       bs <<= 10;
+               }
+
+               c = strstr(p, "direct=");
+               if (c) {
+                       c += 7;
+                       if (*c != '0')
+                               direct = 1;
+                       else
+                               direct = 0;
+               }
+
+               c = strstr(p, "delay=");
+               if (c) {
+                       c += 6;
+                       fill_option(c, string);
+                       delay = strtoul(string, NULL, 10);
+               }
+
+               c = strstr(p, "random");
+               if (c)
+                       random = 1;
+               c = strstr(p, "sequential");
+               if (c)
+                       random = 0;
+
+               add_job(filename, rw, bs, direct, (prioclass << IOPRIO_CLASS_SHIFT) | prio, random, delay);
+       }
+
+       return thread_number;
+}
+
+int parse_options(int argc, char *argv[])
+{
+       int i, j;
+
+       for (i = 1; i < argc; i++) {
+               char *parm = argv[i];
+
+               if (parm[0] != '-')
+                       break;
+
+               parm++;
+               switch (*parm) {
+                       case 's':
+                               parm++;
+                               sequential = !!atoi(parm);
+                               break;
+                       case 'b':
+                               parm++;
+                               global_bs = atoi(parm);
+                               global_bs <<= 10;
+                               break;
+                       case 't':
+                               parm++;
+                               timeout = atoi(parm);
+                               break;
+                       case 'w':
+                               parm++;
+                               write_stat = !!atoi(parm);
+                               break;
+                       case 'r':
+                               parm++;
+                               repeatable = !!atoi(parm);
+                               break;
+                       case 'c': {
+                               char *c;
+                               j = 0;
+                               parm++;
+                               while ((c = strsep(&parm, ",")) != NULL) {
+                                       int rw = DDIR_READ;
+
+                                       if (*c == '1')
+                                               rw = DDIR_WRITE;
+
+                                       thread_ddir[j] = rw;
+                                       j++;
+                               }
+                               break;
+                       }
+                       case 'o':
+                               parm++;
+                               odirect = !!atoi(parm);
+                               break;
+                       default:
+                               printf("bad option %s\n", argv[1]);
+                               break;
+               }
+       }
+
+       if (global_bs <= 0)
+               global_bs = BS;
+       if (timeout <= 0)
+               timeout = TIMEOUT;
+
+       printf("%s: %s, bs=%uKiB, timeo=%u, write_stat=%u, odirect=%d\n", argv[0], sequential ? "sequential" : "random", global_bs >> 10, timeout, write_stat, odirect);
+       return i;
+}
+
+int main(int argc, char *argv[])
+{
+       static unsigned long max_run[2], min_run[2], total_blocks[2];
+       static unsigned long max_bw[2], min_bw[2], maxl[2], minl[2];
+       static unsigned long read_mb, write_mb, read_agg, write_agg;
+       int i, jobs;
+
+       if (argc - 1 > MAX_THREADS) {
+               printf("max %d threads\n", MAX_THREADS);
+               return 1;
+       }
+
+       shm_id = shmget(0, (argc - 1) * sizeof(struct thread_data), IPC_CREAT);
+       if (shm_id == -1) {
+               perror("shmget");
+               return 1;
+       }
+
+       threads = shmat(shm_id, NULL, 0);
+
+       atexit(free_shm);
+
+       for (i = 0; i < MAX_THREADS; i++)
+               thread_ddir[i] = DDIR_READ;
+
+       i = parse_options(argc, argv);
+       jobs = parse_jobs(argc, argv, i);
+       
+       if (!jobs) {
+               printf("Nothing to do\n");
+               return 1;
+       }
+
+       for (i = 0; i < jobs; i++) {
+               sem_init(&startup_sem, 1, 1);
+
+               if (fork())
+                       sem_wait(&startup_sem);
+               else {
+                       thread_main(shm_id, i, argv);
+                       exit(0);
+               }
+       }
+
+       if (!thread_number) {
+               usage(argv[0]);
+               return 1;
+       }
+
+       signal(SIGALRM, sig_handler);
+       alarm(timeout);
+
+       printf("Starting %d threads\n", thread_number);
+       for (i = 0; i < thread_number; i++) {
+               struct thread_data *td = &threads[i];
+
+               sem_post(&td->mutex);
+       }
+
+       for (i = 0; i < thread_number; i++) {
+               struct thread_data *td = &threads[i];
+
+               waitpid(td->pid, NULL, 0);
+       }
+
+       min_bw[0] = min_run[0] = ~0UL;
+       min_bw[1] = min_run[1] = ~0UL;
+       minl[0] = minl[1] = ~0UL;
+       for (i = 0; i < thread_number; i++) {
+               struct thread_data *td = &threads[i];
+               unsigned long bw = 0;
+
+               if (td->error)
+                       continue;
+
+               if (td->runtime < min_run[td->ddir])
+                       min_run[td->ddir] = td->runtime;
+               if (td->runtime > max_run[td->ddir])
+                       max_run[td->ddir] = td->runtime;
+
+               if (td->runtime)
+                       bw = (td->blocks_read * td->bs) / td->runtime;
+               if (bw < min_bw[td->ddir])
+                       min_bw[td->ddir] = bw;
+               if (bw > max_bw[td->ddir])
+                       max_bw[td->ddir] = bw;
+               if (td->max_latency < minl[td->ddir])
+                       minl[td->ddir] = td->max_latency;
+               if (td->max_latency > maxl[td->ddir])
+                       maxl[td->ddir] = td->max_latency;
+
+               total_blocks[td->ddir] += td->blocks_read;
+
+               if (td->ddir == DDIR_READ) {
+                       read_mb += (td->bs * td->blocks_read) >> 20;
+                       if (td->runtime)
+                               read_agg += (td->blocks_read * td->bs) / td->runtime;
+               }
+               if (td->ddir == DDIR_WRITE) {
+                       write_mb += (td->bs * td->blocks_read) >> 20;
+                       if (td->runtime)
+                               write_agg += (td->blocks_read * td->bs) / td->runtime;
+               }
+
+               show_thread_status(td);
+       }
+
+       printf("Run status:\n");
+       if (max_run[DDIR_READ])
+               printf("   READ: io=%luMiB, aggrb=%lu, minl=%lu, maxl=%lu, minb=%lu, maxb=%lu, mint=%lumsec, maxt=%lumsec\n", read_mb, read_agg, minl[0], maxl[0], min_bw[0], max_bw[0], min_run[0], max_run[0]);
+       if (max_run[DDIR_WRITE])
+               printf("  WRITE: io=%luMiB, aggrb=%lu, minl=%lu, maxl=%lu, minb=%lu, maxb=%lu, mint=%lumsec, maxt=%lumsec\n", write_mb, write_agg, minl[1], maxl[1], min_bw[1], max_bw[1], min_run[1], max_run[1]);
+       return 0;
+}