--- /dev/null
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <string.h>
+#include <errno.h>
+#include <signal.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <semaphore.h>
+#include <sys/ipc.h>
+#include <sys/shm.h>
+#include <asm/unistd.h>
+
+/*
+ * assume we don't have _get either, if _set isn't defined
+ */
+#ifndef __NR_ioprio_set
+
+#if defined(__i386__)
+#define __NR_ioprio_set 289
+#define __NR_ioprio_get 290
+#elif defined(__powerpc__) || defined(__powerpc64__)
+#define __NR_ioprio_set 273
+#define __NR_ioprio_get 274
+#elif defined(__x86_64__)
+#define __NR_ioprio_set 251
+#define __NR_ioprio_get 252
+#elif defined(__ia64__)
+#define __NR_ioprio_set 1274
+#define __NR_ioprio_get 1275
+#elif defined(__alpha__)
+#define __NR_ioprio_set 442
+#define __NR_ioprio_get 443
+#elif defined(__s390x__) || defined(__s390__)
+#define __NR_ioprio_set 282
+#define __NR_ioprio_get 283
+#else
+#error "Unsupported arch"
+#endif
+
+#endif
+
+static int ioprio_set(int which, int who, int ioprio)
+{
+ return syscall(__NR_ioprio_set, which, who, ioprio);
+}
+
+enum {
+ IOPRIO_WHO_PROCESS = 1,
+ IOPRIO_WHO_PGRP,
+ IOPRIO_WHO_USER,
+};
+
+#define IOPRIO_CLASS_SHIFT 13
+
+#define BS (4096)
+#define MASK (4095)
+
+#define TIMEOUT (30)
+#define MAX_THREADS (32)
+
+#define ALIGN(buf) (((unsigned long) (buf) + MASK) & ~(MASK))
+
+static int sequential = 1;
+static int write_stat = 0;
+static int repeatable = 1;
+static int thread_number;
+static int timeout = TIMEOUT;
+static int odirect = 1;
+static int global_bs = BS;
+
+static int shm_id;
+
+#define DDIR_READ (0)
+#define DDIR_WRITE (1)
+
+struct thread_data {
+ char file_name[256];
+ int thread_number;
+ int error;
+ int fd;
+ int stat_fd;
+ pid_t pid;
+ int terminate;
+ int ddir;
+ int ioprio;
+ int sequential;
+ int bs;
+ int odirect;
+ int delay_sleep;
+ unsigned long max_latency; /* msec */
+ unsigned long min_latency; /* msec */
+ unsigned long runtime; /* sec */
+ unsigned long blocks;
+ unsigned long blocks_read;
+ unsigned long last_block;
+ sem_t mutex;
+ sem_t done_mutex;
+ struct drand48_data random_state;
+
+ /*
+ * bandwidth stat
+ */
+ unsigned long stat_time;
+ unsigned long stat_time_last;
+ unsigned long stat_blocks_last;
+};
+
+static struct thread_data *threads;
+static int thread_ddir[MAX_THREADS];
+
+static sem_t startup_sem;
+
+void sig_handler(int sig)
+{
+ int i;
+
+ for (i = 0; i < thread_number; i++) {
+ struct thread_data *td = &threads[i];
+
+ td->terminate = 1;
+ }
+}
+
+int init_random_state(struct thread_data *td)
+{
+ unsigned long seed = 123;
+
+ if (td->sequential)
+ return 0;
+
+ if (!repeatable) {
+ int fd = open("/dev/random", O_RDONLY);
+
+ if (fd == -1) {
+ td->error = errno;
+ return 1;
+ }
+
+ if (read(fd, &seed, sizeof(seed)) < sizeof(seed)) {
+ td->error = EIO;
+ close(fd);
+ return 1;
+ }
+
+ close(fd);
+ }
+
+ srand48_r(seed, &td->random_state);
+ return 0;
+}
+
+void shutdown_stat_file(struct thread_data *td)
+{
+ if (td->stat_fd != -1) {
+ fsync(td->stat_fd);
+ close(td->stat_fd);
+ }
+}
+
+int init_stat_file(struct thread_data *td)
+{
+ char *n;
+
+ if (!write_stat)
+ return 0;
+
+ n = malloc(256);
+ sprintf(n, "%s.stat", td->file_name);
+ td->stat_fd = open(n, O_WRONLY | O_CREAT | O_TRUNC, 0644);
+ if (td->stat_fd == -1) {
+ free(n);
+ td->error = errno;
+ return 1;
+ }
+
+ free(n);
+ return 0;
+}
+
+unsigned long utime_since(struct timeval *s, struct timeval *e)
+{
+ double sec, usec;
+
+ sec = e->tv_sec - s->tv_sec;
+ usec = e->tv_usec - s->tv_usec;
+ if (sec > 0 && usec < 0) {
+ sec--;
+ usec += 1000000;
+ }
+
+ sec *= (double) 1000000;
+
+ return sec + usec;
+}
+
+unsigned long mtime_since(struct timeval *s, struct timeval *e)
+{
+ double sec, usec;
+
+ sec = e->tv_sec - s->tv_sec;
+ usec = e->tv_usec - s->tv_usec;
+ if (sec > 0 && usec < 0) {
+ sec--;
+ usec += 1000000;
+ }
+
+ sec *= (double) 1000;
+ usec /= (double) 1000;
+
+ return sec + usec;
+}
+
+unsigned long time_since(struct timeval *s, struct timeval *e)
+{
+ double sec, usec, ret;
+
+ sec = e->tv_sec - s->tv_sec;
+ usec = e->tv_usec - s->tv_usec;
+ if (sec > 0 && usec < 0) {
+ sec--;
+ usec += 1000000;
+ }
+
+ ret = sec + usec / (double) 1000000;
+ if (ret < 0)
+ ret = 0;
+
+ return (unsigned long) ret;
+}
+
+unsigned long get_next_offset(struct thread_data *td)
+{
+ unsigned long b;
+ long r;
+
+ if (!td->sequential) {
+ lrand48_r(&td->random_state, &r);
+ b = (1+(double) (td->blocks-1) * r / (RAND_MAX+1.0));
+ } else {
+ b = td->last_block;
+ td->last_block++;
+ }
+
+ return b * td->bs;
+}
+
+void add_stat_sample(struct thread_data *td, unsigned long block, unsigned long msec)
+{
+ char sample[256];
+
+ if (!td->stat_fd)
+ return;
+
+#if 0
+ sprintf(sample, "%lu, %lu\n", td->blocks_read, msec);
+ write(td->stat_fd, sample, strlen(sample));
+#else
+ td->stat_time += msec;
+ td->stat_time_last += msec;
+ td->stat_blocks_last++;
+
+ if (td->stat_time_last >= 500) {
+ unsigned long rate = td->stat_blocks_last * td->bs / (td->stat_time_last);
+
+ td->stat_time_last = 0;
+ td->stat_blocks_last = 0;
+ sprintf(sample, "%lu, %lu\n", td->stat_time, rate);
+ //sprintf(sample, "%lu, %lu\n", td->blocks_read, msec);
+ write(td->stat_fd, sample, strlen(sample));
+ }
+#endif
+}
+
+void delay_sleep(int usec)
+{
+ unsigned long since;
+ struct timeval start, end;
+
+ gettimeofday(&start, NULL);
+
+ do {
+ gettimeofday(&end, NULL);
+
+ since = utime_since(&start, &end);
+ if (since >= usec)
+ break;
+ } while (1);
+}
+
+void do_thread_io(struct thread_data *td)
+{
+ struct timeval s, e, start;
+ char *buffer, *ptr;
+ unsigned long blocks, msec;
+
+ ptr = malloc(td->bs+MASK);
+ buffer = (char *) ALIGN(ptr);
+
+ gettimeofday(&start, NULL);
+
+ for (blocks = 0; blocks < td->blocks; blocks++) {
+ off_t offset = get_next_offset(td);
+ int ret;
+
+ if (td->terminate)
+ break;
+
+ if (lseek(td->fd, offset, SEEK_SET) == -1) {
+ td->error = errno;
+ break;
+ }
+
+ if (td->delay_sleep)
+ delay_sleep(td->delay_sleep);
+
+ gettimeofday(&s, NULL);
+
+ if (td->ddir == DDIR_READ)
+ ret = read(td->fd, buffer, td->bs);
+ else
+ ret = write(td->fd, buffer, td->bs);
+
+ gettimeofday(&e, NULL);
+
+ if (ret < td->bs) {
+ if (ret == -1)
+ td->error = errno;
+ break;
+ }
+
+ msec = mtime_since(&s, &e);
+
+ add_stat_sample(td, offset / td->bs, msec);
+
+ td->blocks_read++;
+
+ //if (td->ddir == DDIR_WRITE && !(td->blocks_read % 512))
+ // fsync(td->fd);
+
+ if (msec < td->min_latency)
+ td->min_latency = msec;
+ if (msec > td->max_latency)
+ td->max_latency = msec;
+ }
+
+ if (td->ddir == DDIR_WRITE && !td->odirect)
+ fsync(td->fd);
+
+ gettimeofday(&e, NULL);
+ td->runtime = mtime_since(&start, &e);
+
+ free(ptr);
+}
+
+void *thread_main(int shm_id, int offset, char *argv[])
+{
+ struct thread_data *td;
+ void *data;
+ struct stat *statbuf = NULL;
+ int ret = 1, flags;
+
+ data = shmat(shm_id, NULL, 0);
+ td = data + offset * sizeof(struct thread_data);
+ td->pid = getpid();
+
+ printf("Thread (%s) (pid=%u) (f=%s) started\n", td->ddir == DDIR_READ ? "read" : "write", td->pid, td->file_name);
+ fflush(stdout);
+
+ sprintf(argv[0], "%s%d\n", argv[0], offset);
+
+ if (td->ddir == DDIR_READ)
+ flags = O_RDONLY;
+ else
+ flags = O_WRONLY | O_CREAT | O_TRUNC;
+
+ if (td->odirect)
+ flags |= O_DIRECT;
+
+ td->fd = open(td->file_name, flags);
+ if (td->fd == -1) {
+ td->error = errno;
+ goto err;
+ }
+
+ if (init_random_state(td))
+ goto out;
+ if (init_stat_file(td))
+ goto out;
+
+ if (td->ddir == DDIR_READ) {
+ statbuf = malloc(sizeof(*statbuf));
+ if (fstat(td->fd, statbuf) == -1) {
+ td->error = errno;
+ goto out;
+ }
+
+ td->blocks = statbuf->st_size / td->bs;
+ if (!td->blocks) {
+ td->error = EINVAL;
+ goto out;
+ }
+ } else
+ td->blocks = 1024 * 1024 * 1024 / td->bs;
+
+ if (td->ioprio != -1) {
+ if (ioprio_set(IOPRIO_WHO_PROCESS, 0, td->ioprio) == -1) {
+ td->error = errno;
+ goto out;
+ }
+ }
+
+ sem_post(&startup_sem);
+ sem_wait(&td->mutex);
+ do_thread_io(td);
+ ret = 0;
+
+out:
+ close(td->fd);
+ if (statbuf)
+ free(statbuf);
+ shutdown_stat_file(td);
+err:
+ sem_post(&td->done_mutex);
+ if (ret)
+ sem_post(&startup_sem);
+ shmdt(td);
+ return NULL;
+}
+
+void free_shm(void)
+{
+ shmdt(threads);
+}
+
+void show_thread_status(struct thread_data *td)
+{
+ int prio, prio_class;
+ unsigned long bw = 0;
+
+ if (td->runtime)
+ bw = (td->blocks_read * td->bs) / td->runtime;
+
+ prio = td->ioprio & 0xff;
+ prio_class = td->ioprio >> IOPRIO_CLASS_SHIFT;
+
+ printf("thread%d (%s): err=%2d, prio=%1d/%1d maxl=%5lumsec, io=%6luMiB, bw=%6luKiB/sec\n", td->thread_number, td->ddir == DDIR_READ ? " read": "write", td->error, prio_class, prio, td->max_latency, td->blocks_read * td->bs >> 20, bw);
+}
+
+void usage(char *progname)
+{
+ printf("%s: <-s 0/1> <-b kb> <-t sec> <-w 0/1> <-c r,w,r...> file0... fileN\n", progname);
+}
+
+void add_job(const char *filename, int rw, int bs, int direct, int prio, int random, int delay)
+{
+ struct thread_data *td = &threads[thread_number];
+
+ strcpy(td->file_name, filename);
+ td->thread_number = thread_number + 1;
+ td->stat_fd = -1;
+ sem_init(&td->mutex, 1, 1);
+ sem_init(&td->done_mutex, 1, 0);
+ td->min_latency = 10000000;
+ td->ddir = rw;
+ td->ioprio = prio;
+ td->sequential = !random;
+ td->odirect = direct;
+ td->bs = bs;
+ td->delay_sleep = delay;
+
+ thread_number++;
+}
+
+void fill_option(const char *input, char *output)
+{
+ int i;
+
+ i = 0;
+ while (input[i] != ',' && input[i] != '}' && input[i] != '\0') {
+ output[i] = input[i];
+ i++;
+ }
+
+ output[i] = '\0';
+}
+
+/*
+ * job key words:
+ *
+ * file=
+ * bs=
+ * rw=
+ * direct=
+ */
+int parse_jobs(int argc, char *argv[], int index)
+{
+ int rw, bs, direct, prio, random, prioclass, delay;
+ char *string, *filename, *p, *c;
+ int i;
+
+ string = malloc(256);
+ filename = malloc(256);
+
+ for (i = index; i < argc; i++) {
+ p = argv[i];
+
+ c = strpbrk(p, "{");
+ if (!c)
+ break;
+
+ filename[0] = 0;
+ rw = DDIR_READ;
+ bs = global_bs;
+ direct = 1;
+ prio = 4;
+ random = !sequential;
+ prioclass = 2;
+ delay = 0;
+
+ c = strstr(p, "rw=");
+ if (c) {
+ c += 3;
+ if (*c == '0')
+ rw = DDIR_READ;
+ else
+ rw = DDIR_WRITE;
+ }
+
+ c = strstr(p, "prio=");
+ if (c) {
+ c += 5;
+ prio = *c - '0';
+ }
+
+ c = strstr(p, "prioclass=");
+ if (c) {
+ c += 10;
+ prioclass = *c - '0';
+ }
+
+ c = strstr(p, "file=");
+ if (c) {
+ c += 5;
+ fill_option(c, filename);
+ }
+
+ c = strstr(p, "bs=");
+ if (c) {
+ c += 3;
+ fill_option(c, string);
+ bs = strtoul(string, NULL, 10);
+ bs <<= 10;
+ }
+
+ c = strstr(p, "direct=");
+ if (c) {
+ c += 7;
+ if (*c != '0')
+ direct = 1;
+ else
+ direct = 0;
+ }
+
+ c = strstr(p, "delay=");
+ if (c) {
+ c += 6;
+ fill_option(c, string);
+ delay = strtoul(string, NULL, 10);
+ }
+
+ c = strstr(p, "random");
+ if (c)
+ random = 1;
+ c = strstr(p, "sequential");
+ if (c)
+ random = 0;
+
+ add_job(filename, rw, bs, direct, (prioclass << IOPRIO_CLASS_SHIFT) | prio, random, delay);
+ }
+
+ return thread_number;
+}
+
+int parse_options(int argc, char *argv[])
+{
+ int i, j;
+
+ for (i = 1; i < argc; i++) {
+ char *parm = argv[i];
+
+ if (parm[0] != '-')
+ break;
+
+ parm++;
+ switch (*parm) {
+ case 's':
+ parm++;
+ sequential = !!atoi(parm);
+ break;
+ case 'b':
+ parm++;
+ global_bs = atoi(parm);
+ global_bs <<= 10;
+ break;
+ case 't':
+ parm++;
+ timeout = atoi(parm);
+ break;
+ case 'w':
+ parm++;
+ write_stat = !!atoi(parm);
+ break;
+ case 'r':
+ parm++;
+ repeatable = !!atoi(parm);
+ break;
+ case 'c': {
+ char *c;
+ j = 0;
+ parm++;
+ while ((c = strsep(&parm, ",")) != NULL) {
+ int rw = DDIR_READ;
+
+ if (*c == '1')
+ rw = DDIR_WRITE;
+
+ thread_ddir[j] = rw;
+ j++;
+ }
+ break;
+ }
+ case 'o':
+ parm++;
+ odirect = !!atoi(parm);
+ break;
+ default:
+ printf("bad option %s\n", argv[1]);
+ break;
+ }
+ }
+
+ if (global_bs <= 0)
+ global_bs = BS;
+ if (timeout <= 0)
+ timeout = TIMEOUT;
+
+ printf("%s: %s, bs=%uKiB, timeo=%u, write_stat=%u, odirect=%d\n", argv[0], sequential ? "sequential" : "random", global_bs >> 10, timeout, write_stat, odirect);
+ return i;
+}
+
+int main(int argc, char *argv[])
+{
+ static unsigned long max_run[2], min_run[2], total_blocks[2];
+ static unsigned long max_bw[2], min_bw[2], maxl[2], minl[2];
+ static unsigned long read_mb, write_mb, read_agg, write_agg;
+ int i, jobs;
+
+ if (argc - 1 > MAX_THREADS) {
+ printf("max %d threads\n", MAX_THREADS);
+ return 1;
+ }
+
+ shm_id = shmget(0, (argc - 1) * sizeof(struct thread_data), IPC_CREAT);
+ if (shm_id == -1) {
+ perror("shmget");
+ return 1;
+ }
+
+ threads = shmat(shm_id, NULL, 0);
+
+ atexit(free_shm);
+
+ for (i = 0; i < MAX_THREADS; i++)
+ thread_ddir[i] = DDIR_READ;
+
+ i = parse_options(argc, argv);
+ jobs = parse_jobs(argc, argv, i);
+
+ if (!jobs) {
+ printf("Nothing to do\n");
+ return 1;
+ }
+
+ for (i = 0; i < jobs; i++) {
+ sem_init(&startup_sem, 1, 1);
+
+ if (fork())
+ sem_wait(&startup_sem);
+ else {
+ thread_main(shm_id, i, argv);
+ exit(0);
+ }
+ }
+
+ if (!thread_number) {
+ usage(argv[0]);
+ return 1;
+ }
+
+ signal(SIGALRM, sig_handler);
+ alarm(timeout);
+
+ printf("Starting %d threads\n", thread_number);
+ for (i = 0; i < thread_number; i++) {
+ struct thread_data *td = &threads[i];
+
+ sem_post(&td->mutex);
+ }
+
+ for (i = 0; i < thread_number; i++) {
+ struct thread_data *td = &threads[i];
+
+ waitpid(td->pid, NULL, 0);
+ }
+
+ min_bw[0] = min_run[0] = ~0UL;
+ min_bw[1] = min_run[1] = ~0UL;
+ minl[0] = minl[1] = ~0UL;
+ for (i = 0; i < thread_number; i++) {
+ struct thread_data *td = &threads[i];
+ unsigned long bw = 0;
+
+ if (td->error)
+ continue;
+
+ if (td->runtime < min_run[td->ddir])
+ min_run[td->ddir] = td->runtime;
+ if (td->runtime > max_run[td->ddir])
+ max_run[td->ddir] = td->runtime;
+
+ if (td->runtime)
+ bw = (td->blocks_read * td->bs) / td->runtime;
+ if (bw < min_bw[td->ddir])
+ min_bw[td->ddir] = bw;
+ if (bw > max_bw[td->ddir])
+ max_bw[td->ddir] = bw;
+ if (td->max_latency < minl[td->ddir])
+ minl[td->ddir] = td->max_latency;
+ if (td->max_latency > maxl[td->ddir])
+ maxl[td->ddir] = td->max_latency;
+
+ total_blocks[td->ddir] += td->blocks_read;
+
+ if (td->ddir == DDIR_READ) {
+ read_mb += (td->bs * td->blocks_read) >> 20;
+ if (td->runtime)
+ read_agg += (td->blocks_read * td->bs) / td->runtime;
+ }
+ if (td->ddir == DDIR_WRITE) {
+ write_mb += (td->bs * td->blocks_read) >> 20;
+ if (td->runtime)
+ write_agg += (td->blocks_read * td->bs) / td->runtime;
+ }
+
+ show_thread_status(td);
+ }
+
+ printf("Run status:\n");
+ if (max_run[DDIR_READ])
+ printf(" READ: io=%luMiB, aggrb=%lu, minl=%lu, maxl=%lu, minb=%lu, maxb=%lu, mint=%lumsec, maxt=%lumsec\n", read_mb, read_agg, minl[0], maxl[0], min_bw[0], max_bw[0], min_run[0], max_run[0]);
+ if (max_run[DDIR_WRITE])
+ printf(" WRITE: io=%luMiB, aggrb=%lu, minl=%lu, maxl=%lu, minb=%lu, maxb=%lu, mint=%lumsec, maxt=%lumsec\n", write_mb, write_agg, minl[1], maxl[1], min_bw[1], max_bw[1], min_run[1], max_run[1]);
+ return 0;
+}