[PATCH] Shorten the file names, stupid to prefix everything with fio-
authorJens Axboe <axboe@suse.de>
Wed, 7 Jun 2006 06:45:01 +0000 (08:45 +0200)
committerJens Axboe <axboe@suse.de>
Wed, 7 Jun 2006 06:45:01 +0000 (08:45 +0200)
16 files changed:
Makefile
fio-ini.c [deleted file]
fio-io.c [deleted file]
fio-log.c [deleted file]
fio-log.h [deleted file]
fio-stat.c [deleted file]
fio-stat.h [deleted file]
fio-time.c [deleted file]
fio-time.h [deleted file]
fio.c
fio.h
init.c [new file with mode: 0644]
ioengines.c [new file with mode: 0644]
log.c [new file with mode: 0644]
stat.c [new file with mode: 0644]
time.c [new file with mode: 0644]

index 16c8413643a69300228eb518189a3809be397bf6..684e8e04277b9fc23a9884ab70cdbc802e22c6d2 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@ SCRIPTS = fio_generate_plots
 
 all: depend $(PROGS) $(SCRIPTS)
 
-fio: fio.o fio-io.o fio-ini.o fio-stat.o fio-log.o fio-time.o md5.o crc32.o
+fio: fio.o ioengines.o init.o stat.o log.o time.o md5.o crc32.o
        $(CC) $(CFLAGS) -o $@ $(filter %.o,$^) -lpthread -laio -lm -lrt
 
 clean:
diff --git a/fio-ini.c b/fio-ini.c
deleted file mode 100644 (file)
index d2122e3..0000000
--- a/fio-ini.c
+++ /dev/null
@@ -1,1133 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <ctype.h>
-#include <string.h>
-#include <errno.h>
-#include <sys/ipc.h>
-#include <sys/shm.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "fio.h"
-
-#define DEF_BS                 (4096)
-#define DEF_TIMEOUT            (0)
-#define DEF_RATE_CYCLE         (1000)
-#define DEF_ODIRECT            (1)
-#define DEF_IO_ENGINE          (FIO_SYNCIO)
-#define DEF_IO_ENGINE_NAME     "sync"
-#define DEF_SEQUENTIAL         (1)
-#define DEF_RAND_REPEAT                (1)
-#define DEF_OVERWRITE          (1)
-#define DEF_CREATE             (1)
-#define DEF_INVALIDATE         (1)
-#define DEF_SYNCIO             (0)
-#define DEF_RANDSEED           (0xb1899bedUL)
-#define DEF_BWAVGTIME          (500)
-#define DEF_CREATE_SER         (1)
-#define DEF_CREATE_FSYNC       (1)
-#define DEF_LOOPS              (1)
-#define DEF_VERIFY             (0)
-#define DEF_STONEWALL          (0)
-#define DEF_NUMJOBS            (1)
-#define DEF_USE_THREAD         (0)
-#define DEF_FILE_SIZE          (1024 * 1024 * 1024UL)
-#define DEF_ZONE_SIZE          (0)
-#define DEF_ZONE_SKIP          (0)
-#define DEF_RWMIX_CYCLE                (500)
-#define DEF_RWMIX_READ         (50)
-#define DEF_NICE               (0)
-
-static char fio_version_string[] = "fio 1.4";
-
-static int repeatable = DEF_RAND_REPEAT;
-static char *ini_file;
-static int max_jobs = MAX_JOBS;
-
-struct thread_data def_thread;
-struct thread_data *threads = NULL;
-
-int rate_quit = 0;
-int write_lat_log = 0;
-int write_bw_log = 0;
-int exitall_on_terminate = 0;
-unsigned long long mlock_size = 0;
-
-static int setup_rate(struct thread_data *td)
-{
-       int nr_reads_per_sec;
-
-       if (!td->rate)
-               return 0;
-
-       if (td->rate < td->ratemin) {
-               fprintf(stderr, "min rate larger than nominal rate\n");
-               return -1;
-       }
-
-       nr_reads_per_sec = (td->rate * 1024) / td->min_bs;
-       td->rate_usec_cycle = 1000000 / nr_reads_per_sec;
-       td->rate_pending_usleep = 0;
-       return 0;
-}
-
-static void setup_log(struct io_log **log)
-{
-       struct io_log *l = malloc(sizeof(*l));
-
-       l->nr_samples = 0;
-       l->max_samples = 1024;
-       l->log = malloc(l->max_samples * sizeof(struct io_sample));
-       *log = l;
-}
-
-void finish_log(struct thread_data *td, struct io_log *log, const char *name)
-{
-       char file_name[256];
-       FILE *f;
-       unsigned int i;
-
-       snprintf(file_name, 200, "client%d_%s.log", td->thread_number, name);
-       f = fopen(file_name, "w");
-       if (!f) {
-               perror("fopen log");
-               return;
-       }
-
-       for (i = 0; i < log->nr_samples; i++)
-               fprintf(f, "%lu, %lu, %u\n", log->log[i].time, log->log[i].val, log->log[i].ddir);
-
-       fclose(f);
-       free(log->log);
-       free(log);
-}
-
-static struct thread_data *get_new_job(int global, struct thread_data *parent)
-{
-       struct thread_data *td;
-
-       if (global)
-               return &def_thread;
-       if (thread_number >= max_jobs)
-               return NULL;
-
-       td = &threads[thread_number++];
-       if (parent)
-               *td = *parent;
-       else
-               memset(td, 0, sizeof(*td));
-
-       td->fd = -1;
-       td->thread_number = thread_number;
-
-       td->ddir = parent->ddir;
-       td->ioprio = parent->ioprio;
-       td->sequential = parent->sequential;
-       td->bs = parent->bs;
-       td->min_bs = parent->min_bs;
-       td->max_bs = parent->max_bs;
-       td->odirect = parent->odirect;
-       td->thinktime = parent->thinktime;
-       td->fsync_blocks = parent->fsync_blocks;
-       td->start_delay = parent->start_delay;
-       td->timeout = parent->timeout;
-       td->io_engine = parent->io_engine;
-       td->create_file = parent->create_file;
-       td->overwrite = parent->overwrite;
-       td->invalidate_cache = parent->invalidate_cache;
-       td->file_size = parent->file_size;
-       td->file_offset = parent->file_offset;
-       td->zone_size = parent->zone_size;
-       td->zone_skip = parent->zone_skip;
-       td->rate = parent->rate;
-       td->ratemin = parent->ratemin;
-       td->ratecycle = parent->ratecycle;
-       td->iodepth = parent->iodepth;
-       td->sync_io = parent->sync_io;
-       td->mem_type = parent->mem_type;
-       td->bw_avg_time = parent->bw_avg_time;
-       td->create_serialize = parent->create_serialize;
-       td->create_fsync = parent->create_fsync;
-       td->loops = parent->loops;
-       td->verify = parent->verify;
-       td->stonewall = parent->stonewall;
-       td->numjobs = parent->numjobs;
-       td->use_thread = parent->use_thread;
-       td->do_disk_util = parent->do_disk_util;
-       memcpy(&td->cpumask, &parent->cpumask, sizeof(td->cpumask));
-       strcpy(td->io_engine_name, parent->io_engine_name);
-
-       return td;
-}
-
-static void put_job(struct thread_data *td)
-{
-       memset(&threads[td->thread_number - 1], 0, sizeof(*td));
-       thread_number--;
-}
-
-static int add_job(struct thread_data *td, const char *jobname, int job_add_num)
-{
-       char *ddir_str[] = { "read", "write", "randread", "randwrite",
-                            "rw", NULL, "randrw" };
-       struct stat sb;
-       int numjobs, ddir;
-
-#ifndef FIO_HAVE_LIBAIO
-       if (td->io_engine == FIO_LIBAIO) {
-               fprintf(stderr, "Linux libaio not available\n");
-               return 1;
-       }
-#endif
-#ifndef FIO_HAVE_POSIXAIO
-       if (td->io_engine == FIO_POSIXAIO) {
-               fprintf(stderr, "posix aio not available\n");
-               return 1;
-       }
-#endif
-
-       /*
-        * the def_thread is just for options, it's not a real job
-        */
-       if (td == &def_thread)
-               return 0;
-
-       if (td->io_engine & FIO_SYNCIO)
-               td->iodepth = 1;
-       else {
-               if (!td->iodepth)
-                       td->iodepth = 1;
-       }
-
-       /*
-        * only really works for sequential io for now
-        */
-       if (td->zone_size && !td->sequential)
-               td->zone_size = 0;
-
-       td->filetype = FIO_TYPE_FILE;
-       if (!stat(jobname, &sb)) {
-               if (S_ISBLK(sb.st_mode))
-                       td->filetype = FIO_TYPE_BD;
-               else if (S_ISCHR(sb.st_mode))
-                       td->filetype = FIO_TYPE_CHAR;
-       }
-
-       if (td->filetype == FIO_TYPE_FILE) {
-               if (td->directory && td->directory[0] != '\0')
-                       sprintf(td->file_name, "%s/%s.%d", td->directory, jobname, td->jobnum);
-               else
-                       sprintf(td->file_name, "%s.%d", jobname, td->jobnum);
-       } else
-               strncpy(td->file_name, jobname, sizeof(td->file_name) - 1);
-
-       sem_init(&td->mutex, 0, 0);
-
-       td->clat_stat[0].min_val = td->clat_stat[1].min_val = ULONG_MAX;
-       td->slat_stat[0].min_val = td->slat_stat[1].min_val = ULONG_MAX;
-       td->bw_stat[0].min_val = td->bw_stat[1].min_val = ULONG_MAX;
-
-       if (td->min_bs == -1U)
-               td->min_bs = td->bs;
-       if (td->max_bs == -1U)
-               td->max_bs = td->bs;
-       if (td_read(td) && !td_rw(td))
-               td->verify = 0;
-
-       if (td->stonewall && td->thread_number > 1)
-               groupid++;
-
-       td->groupid = groupid;
-
-       if (setup_rate(td))
-               goto err;
-
-       if (write_lat_log) {
-               setup_log(&td->slat_log);
-               setup_log(&td->clat_log);
-       }
-       if (write_bw_log)
-               setup_log(&td->bw_log);
-
-       ddir = td->ddir + (!td->sequential << 1) + (td->iomix << 2);
-
-       if (!job_add_num)
-               printf("Client%d (g=%d): rw=%s, odir=%d, bs=%d-%d, rate=%d, ioengine=%s, iodepth=%d\n", td->thread_number, td->groupid, ddir_str[ddir], td->odirect, td->min_bs, td->max_bs, td->rate, td->io_engine_name, td->iodepth);
-       else if (job_add_num == 1)
-               printf("...\n");
-
-       /*
-        * recurse add identical jobs, clear numjobs and stonewall options
-        * as they don't apply to sub-jobs
-        */
-       numjobs = td->numjobs;
-       while (--numjobs) {
-               struct thread_data *td_new = get_new_job(0, td);
-
-               if (!td_new)
-                       goto err;
-
-               td_new->numjobs = 1;
-               td_new->stonewall = 0;
-               td_new->jobnum = numjobs;
-               job_add_num = numjobs - 1;
-
-               if (add_job(td_new, jobname, job_add_num))
-                       goto err;
-       }
-       return 0;
-err:
-       put_job(td);
-       return -1;
-}
-
-int init_random_state(struct thread_data *td)
-{
-       unsigned long seeds[4];
-       int fd, num_maps, blocks;
-
-       fd = open("/dev/urandom", O_RDONLY);
-       if (fd == -1) {
-               td_verror(td, errno);
-               return 1;
-       }
-
-       if (read(fd, seeds, sizeof(seeds)) < (int) sizeof(seeds)) {
-               td_verror(td, EIO);
-               close(fd);
-               return 1;
-       }
-
-       close(fd);
-
-       srand48_r(seeds[0], &td->bsrange_state);
-       srand48_r(seeds[1], &td->verify_state);
-       srand48_r(seeds[2], &td->rwmix_state);
-
-       if (td->sequential)
-               return 0;
-
-       if (repeatable)
-               seeds[3] = DEF_RANDSEED;
-
-       blocks = (td->io_size + td->min_bs - 1) / td->min_bs;
-       num_maps = blocks / BLOCKS_PER_MAP;
-       td->file_map = malloc(num_maps * sizeof(long));
-       td->num_maps = num_maps;
-       memset(td->file_map, 0, num_maps * sizeof(long));
-
-       srand48_r(seeds[3], &td->random_state);
-       return 0;
-}
-
-static void fill_cpu_mask(os_cpu_mask_t cpumask, int cpu)
-{
-#ifdef FIO_HAVE_CPU_AFFINITY
-       unsigned int i;
-
-       CPU_ZERO(&cpumask);
-
-       for (i = 0; i < sizeof(int) * 8; i++) {
-               if ((1 << i) & cpu)
-                       CPU_SET(i, &cpumask);
-       }
-#endif
-}
-
-static unsigned long get_mult(char c)
-{
-       switch (c) {
-               case 'k':
-               case 'K':
-                       return 1024;
-               case 'm':
-               case 'M':
-                       return 1024 * 1024;
-               case 'g':
-               case 'G':
-                       return 1024 * 1024 * 1024;
-               default:
-                       return 1;
-       }
-}
-
-/*
- * convert string after '=' into decimal value, noting any size suffix
- */
-static int str_cnv(char *p, unsigned long long *val)
-{
-       char *str;
-       int len;
-
-       str = strchr(p, '=');
-       if (!str)
-               return 1;
-
-       str++;
-       len = strlen(str);
-
-       *val = strtoul(str, NULL, 10);
-       if (*val == ULONG_MAX && errno == ERANGE)
-               return 1;
-
-       *val *= get_mult(str[len - 1]);
-       return 0;
-}
-
-static int check_strcnv(char *p, char *name, unsigned long long *val)
-{
-       if (strncmp(p, name, strlen(name) - 1))
-               return 1;
-
-       return str_cnv(p, val);
-}
-
-static void strip_blank_front(char **p)
-{
-       char *s = *p;
-
-       while (isspace(*s))
-               s++;
-}
-
-static void strip_blank_end(char *p)
-{
-       char *s = p + strlen(p) - 1;
-
-       while (isspace(*s) || iscntrl(*s))
-               s--;
-
-       *(s + 1) = '\0';
-}
-
-typedef int (str_cb_fn)(struct thread_data *, char *);
-
-static int check_str(char *p, char *name, str_cb_fn *cb, struct thread_data *td)
-{
-       char *s;
-
-       if (strncmp(p, name, strlen(name)))
-               return 1;
-
-       s = strstr(p, name);
-       if (!s)
-               return 1;
-
-       s = strchr(s, '=');
-       if (!s)
-               return 1;
-
-       s++;
-       strip_blank_front(&s);
-       return cb(td, s);
-}
-
-static int check_strstore(char *p, char *name, char *dest)
-{
-       char *s;
-
-       if (strncmp(p, name, strlen(name)))
-               return 1;
-
-       s = strstr(p, name);
-       if (!s)
-               return 1;
-
-       s = strchr(p, '=');
-       if (!s)
-               return 1;
-
-       s++;
-       strip_blank_front(&s);
-
-       strcpy(dest, s);
-       return 0;
-}
-
-static int __check_range(char *str, unsigned long *val)
-{
-       char suffix;
-
-       if (sscanf(str, "%lu%c", val, &suffix) == 2) {
-               *val *= get_mult(suffix);
-               return 0;
-       }
-
-       if (sscanf(str, "%lu", val) == 1)
-               return 0;
-
-       return 1;
-}
-
-static int check_range(char *p, char *name, unsigned long *s, unsigned long *e)
-{
-       char option[128];
-       char *str, *p1, *p2;
-
-       if (strncmp(p, name, strlen(name)))
-               return 1;
-
-       strcpy(option, p);
-       p = option;
-
-       str = strstr(p, name);
-       if (!str)
-               return 1;
-
-       p += strlen(name);
-
-       str = strchr(p, '=');
-       if (!str)
-               return 1;
-
-       /*
-        * 'p' now holds whatever is after the '=' sign
-        */
-       p1 = str + 1;
-
-       /*
-        * terminate p1 at the '-' sign
-        */
-       p = strchr(p1, '-');
-       if (!p)
-               return 1;
-
-       p2 = p + 1;
-       *p = '\0';
-
-       if (!__check_range(p1, s) && !__check_range(p2, e))
-               return 0;
-
-       return 1;
-}
-
-static int check_int(char *p, char *name, unsigned int *val)
-{
-       char *str;
-
-       if (strncmp(p, name, strlen(name)))
-               return 1;
-
-       str = strstr(p, name);
-       if (!str)
-               return 1;
-
-       str = strchr(p, '=');
-       if (!str)
-               return 1;
-
-       str++;
-
-       if (sscanf(str, "%u", val) == 1)
-               return 0;
-
-       return 1;
-}
-
-static int check_strset(char *p, char *name)
-{
-       return strncmp(p, name, strlen(name));
-}
-
-static int is_empty_or_comment(char *line)
-{
-       unsigned int i;
-
-       for (i = 0; i < strlen(line); i++) {
-               if (line[i] == ';')
-                       return 1;
-               if (!isspace(line[i]) && !iscntrl(line[i]))
-                       return 0;
-       }
-
-       return 1;
-}
-
-static int str_rw_cb(struct thread_data *td, char *mem)
-{
-       if (!strncmp(mem, "read", 4) || !strncmp(mem, "0", 1)) {
-               td->ddir = DDIR_READ;
-               td->sequential = 1;
-               return 0;
-       } else if (!strncmp(mem, "randread", 8)) {
-               td->ddir = DDIR_READ;
-               td->sequential = 0;
-               return 0;
-       } else if (!strncmp(mem, "write", 5) || !strncmp(mem, "1", 1)) {
-               td->ddir = DDIR_WRITE;
-               td->sequential = 1;
-               return 0;
-       } else if (!strncmp(mem, "randwrite", 9)) {
-               td->ddir = DDIR_WRITE;
-               td->sequential = 0;
-               return 0;
-       } else if (!strncmp(mem, "rw", 2)) {
-               td->ddir = 0;
-               td->iomix = 1;
-               td->sequential = 1;
-               return 0;
-       } else if (!strncmp(mem, "randrw", 6)) {
-               td->ddir = 0;
-               td->iomix = 1;
-               td->sequential = 0;
-               return 0;
-       }
-
-       fprintf(stderr, "bad data direction: %s\n", mem);
-       return 1;
-}
-
-static int str_verify_cb(struct thread_data *td, char *mem)
-{
-       if (!strncmp(mem, "0", 1)) {
-               td->verify = VERIFY_NONE;
-               return 0;
-       } else if (!strncmp(mem, "md5", 3) || !strncmp(mem, "1", 1)) {
-               td->verify = VERIFY_MD5;
-               return 0;
-       } else if (!strncmp(mem, "crc32", 5)) {
-               td->verify = VERIFY_CRC32;
-               return 0;
-       }
-
-       fprintf(stderr, "bad verify type: %s\n", mem);
-       return 1;
-}
-
-static int str_mem_cb(struct thread_data *td, char *mem)
-{
-       if (!strncmp(mem, "malloc", 6)) {
-               td->mem_type = MEM_MALLOC;
-               return 0;
-       } else if (!strncmp(mem, "shm", 3)) {
-               td->mem_type = MEM_SHM;
-               return 0;
-       } else if (!strncmp(mem, "mmap", 4)) {
-               td->mem_type = MEM_MMAP;
-               return 0;
-       }
-
-       fprintf(stderr, "bad mem type: %s\n", mem);
-       return 1;
-}
-
-static int str_ioengine_cb(struct thread_data *td, char *str)
-{
-       if (!strncmp(str, "linuxaio", 8) || !strncmp(str, "aio", 3) ||
-           !strncmp(str, "libaio", 6)) {
-               strcpy(td->io_engine_name, "libaio");
-               td->io_engine = FIO_LIBAIO;
-               return 0;
-       } else if (!strncmp(str, "posixaio", 8)) {
-               strcpy(td->io_engine_name, "posixaio");
-               td->io_engine = FIO_POSIXAIO;
-               return 0;
-       } else if (!strncmp(str, "sync", 4)) {
-               strcpy(td->io_engine_name, "sync");
-               td->io_engine = FIO_SYNCIO;
-               return 0;
-       } else if (!strncmp(str, "mmap", 4)) {
-               strcpy(td->io_engine_name, "mmap");
-               td->io_engine = FIO_MMAPIO;
-               return 0;
-       } else if (!strncmp(str, "sgio", 4)) {
-               strcpy(td->io_engine_name, "sgio");
-               td->io_engine = FIO_SGIO;
-               return 0;
-       } else if (!strncmp(str, "splice", 6)) {
-               strcpy(td->io_engine_name, "splice");
-               td->io_engine = FIO_SPLICEIO;
-               return 0;
-       }
-
-       fprintf(stderr, "bad ioengine type: %s\n", str);
-       return 1;
-}
-
-static int str_iolog_cb(struct thread_data *td, char *file)
-{
-       td->iolog_file = strdup(file);
-       return 0;
-}
-
-static int str_prerun_cb(struct thread_data *td, char *file)
-{
-       td->exec_prerun = strdup(file);
-       return 0;
-}
-
-static int str_postrun_cb(struct thread_data *td, char *file)
-{
-       td->exec_postrun = strdup(file);
-       return 0;
-}
-
-static int str_iosched_cb(struct thread_data *td, char *file)
-{
-       td->ioscheduler = strdup(file);
-       return 0;
-}
-
-int parse_jobs_ini(char *file)
-{
-       unsigned int prioclass, prio, cpu, global, il;
-       unsigned long long ull;
-       unsigned long ul1, ul2;
-       struct thread_data *td;
-       char *string, *name, *tmpbuf;
-       fpos_t off;
-       FILE *f;
-       char *p;
-
-       f = fopen(file, "r");
-       if (!f) {
-               perror("fopen job file");
-               return 1;
-       }
-
-       string = malloc(4096);
-       name = malloc(256);
-       tmpbuf = malloc(4096);
-
-       while ((p = fgets(string, 4096, f)) != NULL) {
-               if (is_empty_or_comment(p))
-                       continue;
-               if (sscanf(p, "[%s]", name) != 1)
-                       continue;
-
-               global = !strncmp(name, "global", 6);
-
-               name[strlen(name) - 1] = '\0';
-
-               td = get_new_job(global, &def_thread);
-               if (!td)
-                       return 1;
-
-               fgetpos(f, &off);
-               while ((p = fgets(string, 4096, f)) != NULL) {
-                       if (is_empty_or_comment(p))
-                               continue;
-                       if (strstr(p, "["))
-                               break;
-                       strip_blank_front(&p);
-                       strip_blank_end(p);
-
-                       if (!check_int(p, "prio", &prio)) {
-#ifndef FIO_HAVE_IOPRIO
-                               fprintf(stderr, "io priorities not available\n");
-                               return 1;
-#endif
-                               td->ioprio |= prio;
-                               fgetpos(f, &off);
-                               continue;
-                       }
-                       if (!check_int(p, "prioclass", &prioclass)) {
-#ifndef FIO_HAVE_IOPRIO
-                               fprintf(stderr, "io priorities not available\n");
-                               return 1;
-#endif
-                               td->ioprio |= prioclass << IOPRIO_CLASS_SHIFT;
-                               fgetpos(f, &off);
-                               continue;
-                       }
-                       if (!check_int(p, "direct", &td->odirect)) {
-                               fgetpos(f, &off);
-                               continue;
-                       }
-                       if (!check_int(p, "rate", &td->rate)) {
-                               fgetpos(f, &off);
-                               continue;
-                       }
-                       if (!check_int(p, "ratemin", &td->ratemin)) {
-                               fgetpos(f, &off);
-                               continue;
-                       }
-                       if (!check_int(p, "ratecycle", &td->ratecycle)) {
-                               fgetpos(f, &off);
-                               continue;
-                       }
-                       if (!check_int(p, "thinktime", &td->thinktime)) {
-                               fgetpos(f, &off);
-                               continue;
-                       }
-                       if (!check_int(p, "cpumask", &cpu)) {
-#ifndef FIO_HAVE_CPU_AFFINITY
-                               fprintf(stderr, "cpu affinity not available\n");
-                               return 1;
-#endif
-                               fill_cpu_mask(td->cpumask, cpu);
-                               fgetpos(f, &off);
-                               continue;
-                       }
-                       if (!check_int(p, "fsync", &td->fsync_blocks)) {
-                               fgetpos(f, &off);
-                               td->end_fsync = 1;
-                               continue;
-                       }
-                       if (!check_int(p, "startdelay", &td->start_delay)) {
-                               fgetpos(f, &off);
-                               continue;
-                       }
-                       if (!check_int(p, "timeout", &td->timeout)) {
-                               fgetpos(f, &off);
-                               continue;
-                       }
-                       if (!check_int(p, "invalidate",&td->invalidate_cache)) {
-                               fgetpos(f, &off);
-                               continue;
-                       }
-                       if (!check_int(p, "iodepth", &td->iodepth)) {
-                               fgetpos(f, &off);
-                               continue;
-                       }
-                       if (!check_int(p, "sync", &td->sync_io)) {
-                               fgetpos(f, &off);
-                               continue;
-                       }
-                       if (!check_int(p, "bwavgtime", &td->bw_avg_time)) {
-                               fgetpos(f, &off);
-                               continue;
-                       }
-                       if (!check_int(p, "create_serialize", &td->create_serialize)) {
-                               fgetpos(f, &off);
-                               continue;
-                       }
-                       if (!check_int(p, "create_fsync", &td->create_fsync)) {
-                               fgetpos(f, &off);
-                               continue;
-                       }
-                       if (!check_int(p, "end_fsync", &td->end_fsync)) {
-                               fgetpos(f, &off);
-                               continue;
-                       }
-                       if (!check_int(p, "loops", &td->loops)) {
-                               fgetpos(f, &off);
-                               continue;
-                       }
-                       if (!check_int(p, "numjobs", &td->numjobs)) {
-                               fgetpos(f, &off);
-                               continue;
-                       }
-                       if (!check_int(p, "overwrite", &td->overwrite)) {
-                               fgetpos(f, &off);
-                               continue;
-                       }
-                       if (!check_int(p, "rwmixcycle", &td->rwmixcycle)) {
-                               fgetpos(f, &off);
-                               continue;
-                       }
-                       if (!check_int(p, "rwmixread", &il)) {
-                               if (il > 100)
-                                       il = 100;
-                               td->rwmixread = il;
-                               fgetpos(f, &off);
-                               continue;
-                       }
-                       if (!check_int(p, "rwmixwrite", &il)) {
-                               if (il > 100)
-                                       il = 100;
-                               td->rwmixread = 100 - il;
-                               fgetpos(f, &off);
-                               continue;
-                       }
-                       if (!check_int(p, "nice", &td->nice)) {
-                               fgetpos(f, &off);
-                               continue;
-                       }
-                       if (!check_range(p, "bsrange", &ul1, &ul2)) {
-                               if (ul1 > ul2) {
-                                       td->max_bs = ul1;
-                                       td->min_bs = ul2;
-                               } else {
-                                       td->max_bs = ul2;
-                                       td->min_bs = ul1;
-                               }
-                               fgetpos(f, &off);
-                               continue;
-                       }
-                       if (!check_strcnv(p, "bs", &ull)) {
-                               td->bs = ull;
-                               fgetpos(f, &off);
-                               continue;
-                       }
-                       if (!check_strcnv(p, "size", &td->file_size)) {
-                               fgetpos(f, &off);
-                               continue;
-                       }
-                       if (!check_strcnv(p, "offset", &td->file_offset)) {
-                               fgetpos(f, &off);
-                               continue;
-                       }
-                       if (!check_strcnv(p, "zonesize", &td->zone_size)) {
-                               fgetpos(f, &off);
-                               continue;
-                       }
-                       if (!check_strcnv(p, "zoneskip", &td->zone_skip)) {
-                               fgetpos(f, &off);
-                               continue;
-                       }
-                       if (!check_strcnv(p, "lockmem", &mlock_size)) {
-                               fgetpos(f, &off);
-                               continue;
-                       }
-                       if (!check_strstore(p, "directory", tmpbuf)) {
-                               td->directory = strdup(tmpbuf);
-                               fgetpos(f, &off);
-                               continue;
-                       }
-                       if (!check_str(p, "mem", str_mem_cb, td)) {
-                               fgetpos(f, &off);
-                               continue;
-                       }
-                       if (!check_str(p, "verify", str_verify_cb, td)) {
-                               fgetpos(f, &off);
-                               continue;
-                       }
-                       if (!check_str(p, "rw", str_rw_cb, td)) {
-                               fgetpos(f, &off);
-                               continue;
-                       }
-                       if (!check_str(p, "ioengine", str_ioengine_cb, td)) {
-                               fgetpos(f, &off);
-                               continue;
-                       }
-                       if (!check_strset(p, "create")) {
-                               td->create_file = 1;
-                               fgetpos(f, &off);
-                               continue;
-                       }
-                       if (!check_strset(p, "exitall")) {
-                               exitall_on_terminate = 1;
-                               fgetpos(f, &off);
-                               continue;
-                       }
-                       if (!check_strset(p, "stonewall")) {
-                               td->stonewall = 1;
-                               fgetpos(f, &off);
-                               continue;
-                       }
-                       if (!check_strset(p, "thread")) {
-                               td->use_thread = 1;
-                               fgetpos(f, &off);
-                               continue;
-                       }
-                       if (!check_str(p, "iolog", str_iolog_cb, td)) {
-                               td->read_iolog = 1;
-                               td->write_iolog = 0;
-                               fgetpos(f, &off);
-                               continue;
-                       }
-                       if (!td->read_iolog &&
-                           !check_str(p, "write_iolog", str_iolog_cb, td)) {
-                               td->write_iolog = 1;
-                               fgetpos(f, &off);
-                               continue;
-                       }
-                       if (!check_str(p, "exec_prerun", str_prerun_cb, td)) {
-                               fgetpos(f, &off);
-                               continue;
-                       }
-                       if (!check_str(p, "exec_postrun", str_postrun_cb, td)) {
-                               fgetpos(f, &off);
-                               continue;
-                       }
-                       if (!check_str(p, "ioscheduler", str_iosched_cb, td)) {
-                               fgetpos(f, &off);
-                               continue;
-                       }
-
-                       printf("Client%d: bad option %s\n",td->thread_number,p);
-                       return 1;
-               }
-               fsetpos(f, &off);
-
-               if (add_job(td, name, 0))
-                       return 1;
-       }
-
-       free(string);
-       free(name);
-       free(tmpbuf);
-       fclose(f);
-       return 0;
-}
-
-static int fill_def_thread(void)
-{
-       memset(&def_thread, 0, sizeof(def_thread));
-
-       if (fio_getaffinity(getpid(), &def_thread.cpumask) == -1) {
-               perror("sched_getaffinity");
-               return 1;
-       }
-
-       /*
-        * fill globals
-        */
-       def_thread.ddir = DDIR_READ;
-       def_thread.iomix = 0;
-       def_thread.bs = DEF_BS;
-       def_thread.min_bs = -1;
-       def_thread.max_bs = -1;
-       def_thread.io_engine = DEF_IO_ENGINE;
-       strcpy(def_thread.io_engine_name, DEF_IO_ENGINE_NAME);
-       def_thread.odirect = DEF_ODIRECT;
-       def_thread.ratecycle = DEF_RATE_CYCLE;
-       def_thread.sequential = DEF_SEQUENTIAL;
-       def_thread.timeout = DEF_TIMEOUT;
-       def_thread.create_file = DEF_CREATE;
-       def_thread.overwrite = DEF_OVERWRITE;
-       def_thread.invalidate_cache = DEF_INVALIDATE;
-       def_thread.sync_io = DEF_SYNCIO;
-       def_thread.mem_type = MEM_MALLOC;
-       def_thread.bw_avg_time = DEF_BWAVGTIME;
-       def_thread.create_serialize = DEF_CREATE_SER;
-       def_thread.create_fsync = DEF_CREATE_FSYNC;
-       def_thread.loops = DEF_LOOPS;
-       def_thread.verify = DEF_VERIFY;
-       def_thread.stonewall = DEF_STONEWALL;
-       def_thread.numjobs = DEF_NUMJOBS;
-       def_thread.use_thread = DEF_USE_THREAD;
-       def_thread.rwmixcycle = DEF_RWMIX_CYCLE;
-       def_thread.rwmixread = DEF_RWMIX_READ;
-       def_thread.nice = DEF_NICE;
-#ifdef FIO_HAVE_DISK_UTIL
-       def_thread.do_disk_util = 1;
-#endif
-
-       return 0;
-}
-
-static void usage(char *name)
-{
-       printf("%s\n", fio_version_string);
-       printf("\t-s IO is sequential\n");
-       printf("\t-b Block size in KiB for each IO\n");
-       printf("\t-t Runtime in seconds\n");
-       printf("\t-R Exit all threads on failure to meet rate goal\n");
-       printf("\t-o Use O_DIRECT\n");
-       printf("\t-l Generate per-job latency logs\n");
-       printf("\t-w Generate per-job bandwidth logs\n");
-       printf("\t-f Job file (Required)\n");
-       printf("\t-v Print version info and exit\n");
-}
-
-static void parse_cmd_line(int argc, char *argv[])
-{
-       int c;
-
-       while ((c = getopt(argc, argv, "s:b:t:r:R:o:f:lwvh")) != EOF) {
-               switch (c) {
-                       case 's':
-                               def_thread.sequential = !!atoi(optarg);
-                               break;
-                       case 'b':
-                               def_thread.bs = atoi(optarg);
-                               def_thread.bs <<= 10;
-                               if (!def_thread.bs) {
-                                       printf("bad block size\n");
-                                       def_thread.bs = DEF_BS;
-                               }
-                               break;
-                       case 't':
-                               def_thread.timeout = atoi(optarg);
-                               break;
-                       case 'r':
-                               repeatable = !!atoi(optarg);
-                               break;
-                       case 'R':
-                               rate_quit = !!atoi(optarg);
-                               break;
-                       case 'o':
-                               def_thread.odirect = !!atoi(optarg);
-                               break;
-                       case 'f':
-                               ini_file = strdup(optarg);
-                               break;
-                       case 'l':
-                               write_lat_log = 1;
-                               break;
-                       case 'w':
-                               write_bw_log = 1;
-                               break;
-                       case 'h':
-                               usage(argv[0]);
-                               exit(0);
-                       case 'v':
-                               printf("%s\n", fio_version_string);
-                               exit(0);
-               }
-       }
-
-       if (!ini_file && argc > 1 && argv[argc - 1][0] != '-')
-               ini_file = strdup(argv[argc - 1]);
-}
-
-static void free_shm(void)
-{
-       struct shmid_ds sbuf;
-
-       if (threads) {
-               shmdt(threads);
-               threads = NULL;
-               shmctl(shm_id, IPC_RMID, &sbuf);
-       }
-}
-
-static int setup_thread_area(void)
-{
-       /*
-        * 1024 is too much on some machines, scale max_jobs if
-        * we get a failure that looks like too large a shm segment
-        */
-       do {
-               int s = max_jobs * sizeof(struct thread_data);
-
-               shm_id = shmget(0, s, IPC_CREAT | 0600);
-               if (shm_id != -1)
-                       break;
-               if (errno != EINVAL) {
-                       perror("shmget");
-                       break;
-               }
-
-               max_jobs >>= 1;
-       } while (max_jobs);
-
-       if (shm_id == -1)
-               return 1;
-
-       threads = shmat(shm_id, NULL, 0);
-       if (threads == (void *) -1) {
-               perror("shmat");
-               return 1;
-       }
-
-       atexit(free_shm);
-       return 0;
-}
-
-int parse_options(int argc, char *argv[])
-{
-       if (setup_thread_area())
-               return 1;
-       if (fill_def_thread())
-               return 1;
-
-       parse_cmd_line(argc, argv);
-
-       if (!ini_file) {
-               printf("Need job file\n");
-               usage(argv[0]);
-               return 1;
-       }
-
-       if (parse_jobs_ini(ini_file)) {
-               usage(argv[0]);
-               return 1;
-       }
-
-       return 0;
-}
diff --git a/fio-io.c b/fio-io.c
deleted file mode 100644 (file)
index 7b1c1bd..0000000
--- a/fio-io.c
+++ /dev/null
@@ -1,919 +0,0 @@
-/*
- * The io parts of the fio tool, includes workers for sync and mmap'ed
- * io, as well as both posix and linux libaio support.
- *
- * sync io is implemented on top of aio.
- *
- * This is not really specific to fio, if the get_io_u/put_io_u and
- * structures was pulled into this as well it would be a perfectly
- * generic io engine that could be used for other projects.
- *
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <errno.h>
-#include <assert.h>
-#include <time.h>
-#include <sys/mman.h>
-#include <sys/poll.h>
-#include "fio.h"
-#include "os.h"
-
-#ifdef FIO_HAVE_LIBAIO
-
-#define ev_to_iou(ev)  (struct io_u *) ((unsigned long) (ev)->obj)
-
-static int fio_io_sync(struct thread_data *td)
-{
-       return fsync(td->fd);
-}
-
-static int fill_timespec(struct timespec *ts)
-{
-#ifdef _POSIX_TIMERS
-       if (!clock_gettime(CLOCK_MONOTONIC, ts))
-               return 0;
-
-       perror("clock_gettime");
-#endif
-       return 1;
-}
-
-static unsigned long long ts_utime_since_now(struct timespec *t)
-{
-       long long sec, nsec;
-       struct timespec now;
-
-       if (fill_timespec(&now))
-               return 0;
-       
-       sec = now.tv_sec - t->tv_sec;
-       nsec = now.tv_nsec - t->tv_nsec;
-       if (sec > 0 && nsec < 0) {
-               sec--;
-               nsec += 1000000000;
-       }
-
-       sec *= 1000000;
-       nsec /= 1000;
-       return sec + nsec;
-}
-
-struct libaio_data {
-       io_context_t aio_ctx;
-       struct io_event *aio_events;
-};
-
-static int fio_libaio_io_prep(struct thread_data *td, struct io_u *io_u)
-{
-       if (io_u->ddir == DDIR_READ)
-               io_prep_pread(&io_u->iocb, td->fd, io_u->buf, io_u->buflen, io_u->offset);
-       else
-               io_prep_pwrite(&io_u->iocb, td->fd, io_u->buf, io_u->buflen, io_u->offset);
-
-       return 0;
-}
-
-static struct io_u *fio_libaio_event(struct thread_data *td, int event)
-{
-       struct libaio_data *ld = td->io_data;
-
-       return ev_to_iou(ld->aio_events + event);
-}
-
-static int fio_libaio_getevents(struct thread_data *td, int min, int max,
-                               struct timespec *t)
-{
-       struct libaio_data *ld = td->io_data;
-       int r;
-
-       do {
-               r = io_getevents(ld->aio_ctx, min, max, ld->aio_events, t);
-               if (r == -EAGAIN) {
-                       usleep(100);
-                       continue;
-               } else if (r == -EINTR)
-                       continue;
-               else
-                       break;
-       } while (1);
-
-       return r;
-}
-
-static int fio_libaio_queue(struct thread_data *td, struct io_u *io_u)
-{
-       struct libaio_data *ld = td->io_data;
-       struct iocb *iocb = &io_u->iocb;
-       int ret;
-
-       do {
-               ret = io_submit(ld->aio_ctx, 1, &iocb);
-               if (ret == 1)
-                       return 0;
-               else if (ret == -EAGAIN)
-                       usleep(100);
-               else if (ret == -EINTR)
-                       continue;
-               else
-                       break;
-       } while (1);
-
-       return ret;
-
-}
-
-static int fio_libaio_cancel(struct thread_data *td, struct io_u *io_u)
-{
-       struct libaio_data *ld = td->io_data;
-
-       return io_cancel(ld->aio_ctx, &io_u->iocb, ld->aio_events);
-}
-
-static void fio_libaio_cleanup(struct thread_data *td)
-{
-       struct libaio_data *ld = td->io_data;
-
-       if (ld) {
-               io_destroy(ld->aio_ctx);
-               if (ld->aio_events)
-                       free(ld->aio_events);
-
-               free(ld);
-               td->io_data = NULL;
-       }
-}
-
-int fio_libaio_init(struct thread_data *td)
-{
-       struct libaio_data *ld = malloc(sizeof(*ld));
-
-       memset(ld, 0, sizeof(*ld));
-       if (io_queue_init(td->iodepth, &ld->aio_ctx)) {
-               td_verror(td, errno);
-               return 1;
-       }
-
-       td->io_prep = fio_libaio_io_prep;
-       td->io_queue = fio_libaio_queue;
-       td->io_getevents = fio_libaio_getevents;
-       td->io_event = fio_libaio_event;
-       td->io_cancel = fio_libaio_cancel;
-       td->io_cleanup = fio_libaio_cleanup;
-       td->io_sync = fio_io_sync;
-
-       ld->aio_events = malloc(td->iodepth * sizeof(struct io_event));
-       td->io_data = ld;
-       return 0;
-}
-
-#else /* FIO_HAVE_LIBAIO */
-
-int fio_libaio_init(struct thread_data *td)
-{
-       return EINVAL;
-}
-
-#endif /* FIO_HAVE_LIBAIO */
-
-#ifdef FIO_HAVE_POSIXAIO
-
-struct posixaio_data {
-       struct io_u **aio_events;
-};
-
-static int fio_posixaio_cancel(struct thread_data *td, struct io_u *io_u)
-{
-       int r = aio_cancel(td->fd, &io_u->aiocb);
-
-       if (r == 1 || r == AIO_CANCELED)
-               return 0;
-
-       return 1;
-}
-
-static int fio_posixaio_prep(struct thread_data *td, struct io_u *io_u)
-{
-       struct aiocb *aiocb = &io_u->aiocb;
-
-       aiocb->aio_fildes = td->fd;
-       aiocb->aio_buf = io_u->buf;
-       aiocb->aio_nbytes = io_u->buflen;
-       aiocb->aio_offset = io_u->offset;
-
-       io_u->seen = 0;
-       return 0;
-}
-
-static int fio_posixaio_getevents(struct thread_data *td, int min, int max,
-                                 struct timespec *t)
-{
-       struct posixaio_data *pd = td->io_data;
-       struct list_head *entry;
-       struct timespec start;
-       int r, have_timeout = 0;
-
-       if (t && !fill_timespec(&start))
-               have_timeout = 1;
-
-       r = 0;
-restart:
-       list_for_each(entry, &td->io_u_busylist) {
-               struct io_u *io_u = list_entry(entry, struct io_u, list);
-               int err;
-
-               if (io_u->seen)
-                       continue;
-
-               err = aio_error(&io_u->aiocb);
-               switch (err) {
-                       default:
-                               io_u->error = err;
-                       case ECANCELED:
-                       case 0:
-                               pd->aio_events[r++] = io_u;
-                               io_u->seen = 1;
-                               break;
-                       case EINPROGRESS:
-                               break;
-               }
-
-               if (r >= max)
-                       break;
-       }
-
-       if (r >= min)
-               return r;
-
-       if (have_timeout) {
-               unsigned long long usec;
-
-               usec = (t->tv_sec * 1000000) + (t->tv_nsec / 1000);
-               if (ts_utime_since_now(&start) > usec)
-                       return r;
-       }
-
-       /*
-        * hrmpf, we need to wait for more. we should use aio_suspend, for
-        * now just sleep a little and recheck status of busy-and-not-seen
-        */
-       usleep(1000);
-       goto restart;
-}
-
-static struct io_u *fio_posixaio_event(struct thread_data *td, int event)
-{
-       struct posixaio_data *pd = td->io_data;
-
-       return pd->aio_events[event];
-}
-
-static int fio_posixaio_queue(struct thread_data *td, struct io_u *io_u)
-{
-       struct aiocb *aiocb = &io_u->aiocb;
-       int ret;
-
-       if (io_u->ddir == DDIR_READ)
-               ret = aio_read(aiocb);
-       else
-               ret = aio_write(aiocb);
-
-       if (ret)
-               io_u->error = errno;
-               
-       return io_u->error;
-}
-
-static void fio_posixaio_cleanup(struct thread_data *td)
-{
-       struct posixaio_data *pd = td->io_data;
-
-       if (pd) {
-               free(pd->aio_events);
-               free(pd);
-               td->io_data = NULL;
-       }
-}
-
-int fio_posixaio_init(struct thread_data *td)
-{
-       struct posixaio_data *pd = malloc(sizeof(*pd));
-
-       pd->aio_events = malloc(td->iodepth * sizeof(struct io_u *));
-
-       td->io_prep = fio_posixaio_prep;
-       td->io_queue = fio_posixaio_queue;
-       td->io_getevents = fio_posixaio_getevents;
-       td->io_event = fio_posixaio_event;
-       td->io_cancel = fio_posixaio_cancel;
-       td->io_cleanup = fio_posixaio_cleanup;
-       td->io_sync = fio_io_sync;
-
-       td->io_data = pd;
-       return 0;
-}
-
-#else /* FIO_HAVE_POSIXAIO */
-
-int fio_posixaio_init(struct thread_data *td)
-{
-       return EINVAL;
-}
-
-#endif /* FIO_HAVE_POSIXAIO */
-
-struct syncio_data {
-       struct io_u *last_io_u;
-};
-
-static int fio_syncio_getevents(struct thread_data *td, int min, int max,
-                               struct timespec *t)
-{
-       assert(max <= 1);
-
-       /*
-        * we can only have one finished io_u for sync io, since the depth
-        * is always 1
-        */
-       if (list_empty(&td->io_u_busylist))
-               return 0;
-
-       return 1;
-}
-
-static struct io_u *fio_syncio_event(struct thread_data *td, int event)
-{
-       struct syncio_data *sd = td->io_data;
-
-       assert(event == 0);
-
-       return sd->last_io_u;
-}
-
-static int fio_syncio_prep(struct thread_data *td, struct io_u *io_u)
-{
-       if (lseek(td->fd, io_u->offset, SEEK_SET) == -1) {
-               td_verror(td, errno);
-               return 1;
-       }
-
-       return 0;
-}
-
-static int fio_syncio_queue(struct thread_data *td, struct io_u *io_u)
-{
-       struct syncio_data *sd = td->io_data;
-       int ret;
-
-       if (io_u->ddir == DDIR_READ)
-               ret = read(td->fd, io_u->buf, io_u->buflen);
-       else
-               ret = write(td->fd, io_u->buf, io_u->buflen);
-
-       if ((unsigned int) ret != io_u->buflen) {
-               if (ret > 0) {
-                       io_u->resid = io_u->buflen - ret;
-                       io_u->error = ENODATA;
-               } else
-                       io_u->error = errno;
-       }
-
-       if (!io_u->error)
-               sd->last_io_u = io_u;
-
-       return io_u->error;
-}
-
-static void fio_syncio_cleanup(struct thread_data *td)
-{
-       if (td->io_data) {
-               free(td->io_data);
-               td->io_data = NULL;
-       }
-}
-
-int fio_syncio_init(struct thread_data *td)
-{
-       struct syncio_data *sd = malloc(sizeof(*sd));
-
-       td->io_prep = fio_syncio_prep;
-       td->io_queue = fio_syncio_queue;
-       td->io_getevents = fio_syncio_getevents;
-       td->io_event = fio_syncio_event;
-       td->io_cancel = NULL;
-       td->io_cleanup = fio_syncio_cleanup;
-       td->io_sync = fio_io_sync;
-
-       sd->last_io_u = NULL;
-       td->io_data = sd;
-       return 0;
-}
-
-static int fio_mmapio_queue(struct thread_data *td, struct io_u *io_u)
-{
-       unsigned long long real_off = io_u->offset - td->file_offset;
-       struct syncio_data *sd = td->io_data;
-
-       if (io_u->ddir == DDIR_READ)
-               memcpy(io_u->buf, td->mmap + real_off, io_u->buflen);
-       else
-               memcpy(td->mmap + real_off, io_u->buf, io_u->buflen);
-
-       /*
-        * not really direct, but should drop the pages from the cache
-        */
-       if (td->odirect) {
-               if (msync(td->mmap + real_off, io_u->buflen, MS_SYNC) < 0)
-                       io_u->error = errno;
-               if (madvise(td->mmap + real_off, io_u->buflen,  MADV_DONTNEED) < 0)
-                       io_u->error = errno;
-       }
-
-       if (!io_u->error)
-               sd->last_io_u = io_u;
-
-       return io_u->error;
-}
-
-static int fio_mmapio_sync(struct thread_data *td)
-{
-       return msync(td->mmap, td->file_size, MS_SYNC);
-}
-
-int fio_mmapio_init(struct thread_data *td)
-{
-       struct syncio_data *sd = malloc(sizeof(*sd));
-
-       td->io_prep = NULL;
-       td->io_queue = fio_mmapio_queue;
-       td->io_getevents = fio_syncio_getevents;
-       td->io_event = fio_syncio_event;
-       td->io_cancel = NULL;
-       td->io_cleanup = fio_syncio_cleanup;
-       td->io_sync = fio_mmapio_sync;
-
-       sd->last_io_u = NULL;
-       td->io_data = sd;
-       return 0;
-}
-
-#ifdef FIO_HAVE_SGIO
-
-struct sgio_cmd {
-       unsigned char cdb[10];
-       int nr;
-};
-
-struct sgio_data {
-       struct sgio_cmd *cmds;
-       struct io_u **events;
-       unsigned int bs;
-};
-
-static void sgio_hdr_init(struct sgio_data *sd, struct sg_io_hdr *hdr,
-                         struct io_u *io_u, int fs)
-{
-       struct sgio_cmd *sc = &sd->cmds[io_u->index];
-
-       memset(hdr, 0, sizeof(*hdr));
-       memset(sc->cdb, 0, sizeof(sc->cdb));
-
-       hdr->interface_id = 'S';
-       hdr->cmdp = sc->cdb;
-       hdr->cmd_len = sizeof(sc->cdb);
-       hdr->pack_id = io_u->index;
-       hdr->usr_ptr = io_u;
-
-       if (fs) {
-               hdr->dxferp = io_u->buf;
-               hdr->dxfer_len = io_u->buflen;
-       }
-}
-
-static int fio_sgio_getevents(struct thread_data *td, int min, int max,
-                             struct timespec *t)
-{
-       struct sgio_data *sd = td->io_data;
-       struct pollfd pfd = { .fd = td->fd, .events = POLLIN };
-       void *buf = malloc(max * sizeof(struct sg_io_hdr));
-       int left = max, ret, events, i, r = 0, fl = 0;
-
-       /*
-        * don't block for !events
-        */
-       if (!min) {
-               fl = fcntl(td->fd, F_GETFL);
-               fcntl(td->fd, F_SETFL, fl | O_NONBLOCK);
-       }
-
-       while (left) {
-               do {
-                       if (!min)
-                               break;
-                       poll(&pfd, 1, -1);
-                       if (pfd.revents & POLLIN)
-                               break;
-               } while (1);
-
-               ret = read(td->fd, buf, left * sizeof(struct sg_io_hdr));
-               if (ret < 0) {
-                       if (errno == EAGAIN)
-                               break;
-                       td_verror(td, errno);
-                       r = -1;
-                       break;
-               } else if (!ret)
-                       break;
-
-               events = ret / sizeof(struct sg_io_hdr);
-               left -= events;
-               r += events;
-
-               for (i = 0; i < events; i++) {
-                       struct sg_io_hdr *hdr = (struct sg_io_hdr *) buf + i;
-
-                       sd->events[i] = hdr->usr_ptr;
-               }
-       }
-
-       if (!min)
-               fcntl(td->fd, F_SETFL, fl);
-
-       free(buf);
-       return r;
-}
-
-static int fio_sgio_ioctl_doio(struct thread_data *td, struct io_u *io_u)
-{
-       struct sgio_data *sd = td->io_data;
-       struct sg_io_hdr *hdr = &io_u->hdr;
-
-       sd->events[0] = io_u;
-
-       return ioctl(td->fd, SG_IO, hdr);
-}
-
-static int fio_sgio_rw_doio(struct thread_data *td, struct io_u *io_u, int sync)
-{
-       struct sg_io_hdr *hdr = &io_u->hdr;
-       int ret;
-
-       ret = write(td->fd, hdr, sizeof(*hdr));
-       if (ret < 0)
-               return errno;
-
-       if (sync) {
-               ret = read(td->fd, hdr, sizeof(*hdr));
-               if (ret < 0)
-                       return errno;
-       }
-
-       return 0;
-}
-
-static int fio_sgio_doio(struct thread_data *td, struct io_u *io_u, int sync)
-{
-       if (td->filetype == FIO_TYPE_BD)
-               return fio_sgio_ioctl_doio(td, io_u);
-
-       return fio_sgio_rw_doio(td, io_u, sync);
-}
-
-static int fio_sgio_sync(struct thread_data *td)
-{
-       struct sgio_data *sd = td->io_data;
-       struct sg_io_hdr *hdr;
-       struct io_u *io_u;
-       int ret;
-
-       io_u = __get_io_u(td);
-       if (!io_u)
-               return ENOMEM;
-
-       hdr = &io_u->hdr;
-       sgio_hdr_init(sd, hdr, io_u, 0);
-       hdr->dxfer_direction = SG_DXFER_NONE;
-
-       hdr->cmdp[0] = 0x35;
-
-       ret = fio_sgio_doio(td, io_u, 1);
-       put_io_u(td, io_u);
-       return ret;
-}
-
-static int fio_sgio_prep(struct thread_data *td, struct io_u *io_u)
-{
-       struct sg_io_hdr *hdr = &io_u->hdr;
-       struct sgio_data *sd = td->io_data;
-       int nr_blocks, lba;
-
-       if (io_u->buflen & (sd->bs - 1)) {
-               fprintf(stderr, "read/write not sector aligned\n");
-               return EINVAL;
-       }
-
-       sgio_hdr_init(sd, hdr, io_u, 1);
-
-       if (io_u->ddir == DDIR_READ) {
-               hdr->dxfer_direction = SG_DXFER_FROM_DEV;
-               hdr->cmdp[0] = 0x28;
-       } else {
-               hdr->dxfer_direction = SG_DXFER_TO_DEV;
-               hdr->cmdp[0] = 0x2a;
-       }
-
-       nr_blocks = io_u->buflen / sd->bs;
-       lba = io_u->offset / sd->bs;
-       hdr->cmdp[2] = (lba >> 24) & 0xff;
-       hdr->cmdp[3] = (lba >> 16) & 0xff;
-       hdr->cmdp[4] = (lba >>  8) & 0xff;
-       hdr->cmdp[5] = lba & 0xff;
-       hdr->cmdp[7] = (nr_blocks >> 8) & 0xff;
-       hdr->cmdp[8] = nr_blocks & 0xff;
-       return 0;
-}
-
-static int fio_sgio_queue(struct thread_data *td, struct io_u *io_u)
-{
-       struct sg_io_hdr *hdr = &io_u->hdr;
-       int ret;
-
-       ret = fio_sgio_doio(td, io_u, 0);
-
-       if (ret < 0)
-               io_u->error = errno;
-       else if (hdr->status) {
-               io_u->resid = hdr->resid;
-               io_u->error = EIO;
-       }
-
-       return io_u->error;
-}
-
-static struct io_u *fio_sgio_event(struct thread_data *td, int event)
-{
-       struct sgio_data *sd = td->io_data;
-
-       return sd->events[event];
-}
-
-static int fio_sgio_get_bs(struct thread_data *td, unsigned int *bs)
-{
-       struct sgio_data *sd = td->io_data;
-       struct io_u *io_u;
-       struct sg_io_hdr *hdr;
-       unsigned char buf[8];
-       int ret;
-
-       io_u = __get_io_u(td);
-       assert(io_u);
-
-       hdr = &io_u->hdr;
-       sgio_hdr_init(sd, hdr, io_u, 0);
-       memset(buf, 0, sizeof(buf));
-
-       hdr->cmdp[0] = 0x25;
-       hdr->dxfer_direction = SG_DXFER_FROM_DEV;
-       hdr->dxferp = buf;
-       hdr->dxfer_len = sizeof(buf);
-
-       ret = fio_sgio_doio(td, io_u, 1);
-       if (ret) {
-               put_io_u(td, io_u);
-               return ret;
-       }
-
-       *bs = (buf[4] << 24) | (buf[5] << 16) | (buf[6] << 8) | buf[7];
-       put_io_u(td, io_u);
-       return 0;
-}
-
-int fio_sgio_init(struct thread_data *td)
-{
-       struct sgio_data *sd;
-       unsigned int bs;
-       int ret;
-
-       sd = malloc(sizeof(*sd));
-       sd->cmds = malloc(td->iodepth * sizeof(struct sgio_cmd));
-       sd->events = malloc(td->iodepth * sizeof(struct io_u *));
-       td->io_data = sd;
-
-       if (td->filetype == FIO_TYPE_BD) {
-               if (ioctl(td->fd, BLKSSZGET, &bs) < 0) {
-                       td_verror(td, errno);
-                       return 1;
-               }
-       } else if (td->filetype == FIO_TYPE_CHAR) {
-               int version;
-
-               if (ioctl(td->fd, SG_GET_VERSION_NUM, &version) < 0) {
-                       td_verror(td, errno);
-                       return 1;
-               }
-
-               ret = fio_sgio_get_bs(td, &bs);
-               if (ret)
-                       return ret;
-       } else {
-               fprintf(stderr, "ioengine sgio only works on block devices\n");
-               return 1;
-       }
-
-       sd->bs = bs;
-
-       td->io_prep = fio_sgio_prep;
-       td->io_queue = fio_sgio_queue;
-
-       if (td->filetype == FIO_TYPE_BD)
-               td->io_getevents = fio_syncio_getevents;
-       else
-               td->io_getevents = fio_sgio_getevents;
-
-       td->io_event = fio_sgio_event;
-       td->io_cancel = NULL;
-       td->io_cleanup = fio_syncio_cleanup;
-       td->io_sync = fio_sgio_sync;
-
-       /*
-        * we want to do it, regardless of whether odirect is set or not
-        */
-       td->override_sync = 1;
-       return 0;
-}
-
-#else /* FIO_HAVE_SGIO */
-
-int fio_sgio_init(struct thread_data *td)
-{
-       return EINVAL;
-}
-
-#endif /* FIO_HAVE_SGIO */
-
-#ifdef FIO_HAVE_SPLICE
-struct spliceio_data {
-       struct io_u *last_io_u;
-       int pipe[2];
-};
-
-static struct io_u *fio_spliceio_event(struct thread_data *td, int event)
-{
-       struct spliceio_data *sd = td->io_data;
-
-       assert(event == 0);
-
-       return sd->last_io_u;
-}
-
-/*
- * For splice reading, we unfortunately cannot (yet) vmsplice the other way.
- * So just splice the data from the file into the pipe, and use regular
- * read to fill the buffer. Doesn't make a lot of sense, but...
- */
-static int fio_splice_read(struct thread_data *td, struct io_u *io_u)
-{
-       struct spliceio_data *sd = td->io_data;
-       int ret, ret2, buflen;
-       off_t offset;
-       void *p;
-
-       offset = io_u->offset;
-       buflen = io_u->buflen;
-       p = io_u->buf;
-       while (buflen) {
-               int this_len = buflen;
-
-               if (this_len > SPLICE_DEF_SIZE)
-                       this_len = SPLICE_DEF_SIZE;
-
-               ret = splice(td->fd, &offset, sd->pipe[1], NULL, this_len, SPLICE_F_MORE);
-               if (ret < 0) {
-                       if (errno == ENODATA || errno == EAGAIN)
-                               continue;
-
-                       return errno;
-               }
-
-               buflen -= ret;
-
-               while (ret) {
-                       ret2 = read(sd->pipe[0], p, ret);
-                       if (ret2 < 0)
-                               return errno;
-
-                       ret -= ret2;
-                       p += ret2;
-               }
-       }
-
-       return io_u->buflen;
-}
-
-/*
- * For splice writing, we can vmsplice our data buffer directly into a
- * pipe and then splice that to a file.
- */
-static int fio_splice_write(struct thread_data *td, struct io_u *io_u)
-{
-       struct spliceio_data *sd = td->io_data;
-       struct iovec iov[1] = {
-               {
-                       .iov_base = io_u->buf,
-                       .iov_len = io_u->buflen,
-               }
-       };
-       struct pollfd pfd = { .fd = sd->pipe[1], .events = POLLOUT, };
-       off_t off = io_u->offset;
-       int ret, ret2;
-
-       while (iov[0].iov_len) {
-               if (poll(&pfd, 1, -1) < 0)
-                       return errno;
-
-               ret = vmsplice(sd->pipe[1], iov, 1, SPLICE_F_NONBLOCK);
-               if (ret < 0)
-                       return errno;
-
-               iov[0].iov_len -= ret;
-               iov[0].iov_base += ret;
-
-               while (ret) {
-                       ret2 = splice(sd->pipe[0], NULL, td->fd, &off, ret, 0);
-                       if (ret2 < 0)
-                               return errno;
-
-                       ret -= ret2;
-               }
-       }
-
-       return io_u->buflen;
-}
-
-static int fio_spliceio_queue(struct thread_data *td, struct io_u *io_u)
-{
-       struct spliceio_data *sd = td->io_data;
-       int ret;
-
-       if (io_u->ddir == DDIR_READ)
-               ret = fio_splice_read(td, io_u);
-       else
-               ret = fio_splice_write(td, io_u);
-
-       if ((unsigned int) ret != io_u->buflen) {
-               if (ret > 0) {
-                       io_u->resid = io_u->buflen - ret;
-                       io_u->error = ENODATA;
-               } else
-                       io_u->error = errno;
-       }
-
-       if (!io_u->error)
-               sd->last_io_u = io_u;
-
-       return io_u->error;
-}
-
-static void fio_spliceio_cleanup(struct thread_data *td)
-{
-       struct spliceio_data *sd = td->io_data;
-
-       if (sd) {
-               close(sd->pipe[0]);
-               close(sd->pipe[1]);
-               free(sd);
-               td->io_data = NULL;
-       }
-}
-
-int fio_spliceio_init(struct thread_data *td)
-{
-       struct spliceio_data *sd = malloc(sizeof(*sd));
-
-       td->io_queue = fio_spliceio_queue;
-       td->io_getevents = fio_syncio_getevents;
-       td->io_event = fio_spliceio_event;
-       td->io_cancel = NULL;
-       td->io_cleanup = fio_spliceio_cleanup;
-       td->io_sync = fio_io_sync;
-
-       sd->last_io_u = NULL;
-       if (pipe(sd->pipe) < 0) {
-               td_verror(td, errno);
-               free(sd);
-               return 1;
-       }
-
-       td->io_data = sd;
-       return 0;
-}
-
-#else /* FIO_HAVE_SPLICE */
-
-int fio_spliceio_init(struct thread_data *td)
-{
-       return EINVAL;
-}
-
-#endif /* FIO_HAVE_SPLICE */
diff --git a/fio-log.c b/fio-log.c
deleted file mode 100644 (file)
index 42aedf2..0000000
--- a/fio-log.c
+++ /dev/null
@@ -1,162 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include "list.h"
-#include "fio.h"
-
-void write_iolog_put(struct thread_data *td, struct io_u *io_u)
-{
-       fprintf(td->iolog_f, "%d,%llu,%u\n", io_u->ddir, io_u->offset, io_u->buflen);
-}
-
-int read_iolog_get(struct thread_data *td, struct io_u *io_u)
-{
-       struct io_piece *ipo;
-
-       if (!list_empty(&td->io_log_list)) {
-               ipo = list_entry(td->io_log_list.next, struct io_piece, list);
-               list_del(&ipo->list);
-               io_u->offset = ipo->offset;
-               io_u->buflen = ipo->len;
-               io_u->ddir = ipo->ddir;
-               free(ipo);
-               return 0;
-       }
-
-       return 1;
-}
-
-void prune_io_piece_log(struct thread_data *td)
-{
-       struct io_piece *ipo;
-
-       while (!list_empty(&td->io_hist_list)) {
-               ipo = list_entry(td->io_hist_list.next, struct io_piece, list);
-
-               list_del(&ipo->list);
-               free(ipo);
-       }
-}
-
-/*
- * log a succesful write, so we can unwind the log for verify
- */
-void log_io_piece(struct thread_data *td, struct io_u *io_u)
-{
-       struct io_piece *ipo = malloc(sizeof(struct io_piece));
-       struct list_head *entry;
-
-       INIT_LIST_HEAD(&ipo->list);
-       ipo->offset = io_u->offset;
-       ipo->len = io_u->buflen;
-
-       /*
-        * for random io where the writes extend the file, it will typically
-        * be laid out with the block scattered as written. it's faster to
-        * read them in in that order again, so don't sort
-        */
-       if (td->sequential || !td->overwrite) {
-               list_add_tail(&ipo->list, &td->io_hist_list);
-               return;
-       }
-
-       /*
-        * for random io, sort the list so verify will run faster
-        */
-       entry = &td->io_hist_list;
-       while ((entry = entry->prev) != &td->io_hist_list) {
-               struct io_piece *__ipo = list_entry(entry, struct io_piece, list);
-
-               if (__ipo->offset < ipo->offset)
-                       break;
-       }
-
-       list_add(&ipo->list, entry);
-}
-
-void write_iolog_close(struct thread_data *td)
-{
-       fflush(td->iolog_f);
-       fclose(td->iolog_f);
-       free(td->iolog_buf);
-}
-
-int init_iolog(struct thread_data *td)
-{
-       unsigned long long offset;
-       unsigned int bytes;
-       char *str, *p;
-       FILE *f;
-       int rw, i, reads, writes;
-
-       if (!td->read_iolog && !td->write_iolog)
-               return 0;
-
-       if (td->read_iolog)
-               f = fopen(td->iolog_file, "r");
-       else
-               f = fopen(td->iolog_file, "w");
-
-       if (!f) {
-               perror("fopen iolog");
-               printf("file %s, %d/%d\n", td->iolog_file, td->read_iolog, td->write_iolog);
-               return 1;
-       }
-
-       /*
-        * That's it for writing, setup a log buffer and we're done.
-         */
-       if (td->write_iolog) {
-               td->iolog_f = f;
-               td->iolog_buf = malloc(8192);
-               setvbuf(f, td->iolog_buf, _IOFBF, 8192);
-               return 0;
-       }
-
-       /*
-        * Read in the read iolog and store it, reuse the infrastructure
-        * for doing verifications.
-        */
-       str = malloc(4096);
-       reads = writes = i = 0;
-       while ((p = fgets(str, 4096, f)) != NULL) {
-               struct io_piece *ipo;
-
-               if (sscanf(p, "%d,%llu,%u", &rw, &offset, &bytes) != 3) {
-                       fprintf(stderr, "bad iolog: %s\n", p);
-                       continue;
-               }
-               if (rw == DDIR_READ)
-                       reads++;
-               else if (rw == DDIR_WRITE)
-                       writes++;
-               else {
-                       fprintf(stderr, "bad ddir: %d\n", rw);
-                       continue;
-               }
-
-               ipo = malloc(sizeof(*ipo));
-               INIT_LIST_HEAD(&ipo->list);
-               ipo->offset = offset;
-               ipo->len = bytes;
-               if (bytes > td->max_bs)
-                       td->max_bs = bytes;
-               ipo->ddir = rw;
-               list_add_tail(&ipo->list, &td->io_log_list);
-               i++;
-       }
-
-       free(str);
-       fclose(f);
-
-       if (!i)
-               return 1;
-
-       if (reads && !writes)
-               td->ddir = DDIR_READ;
-       else if (!reads && writes)
-               td->ddir = DDIR_READ;
-       else
-               td->iomix = 1;
-
-       return 0;
-}
diff --git a/fio-log.h b/fio-log.h
deleted file mode 100644 (file)
index 99bb9f7..0000000
--- a/fio-log.h
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifndef FIO_LOG_H
-#define FIO_LOG_H
-
-extern int read_iolog_get(struct thread_data *, struct io_u *);
-extern void write_iolog_put(struct thread_data *, struct io_u *);
-extern int init_iolog(struct thread_data *td);
-extern void log_io_piece(struct thread_data *, struct io_u *);
-extern void prune_io_piece_log(struct thread_data *);
-extern void write_iolog_close(struct thread_data *);
-
-#endif
diff --git a/fio-stat.c b/fio-stat.c
deleted file mode 100644 (file)
index 70c653d..0000000
+++ /dev/null
@@ -1,519 +0,0 @@
-#include <stdio.h>
-#include <string.h>
-#include <sys/time.h>
-#include <sys/types.h>
-#include <dirent.h>
-#include <libgen.h>
-#include <math.h>
-
-#include "fio.h"
-#include "fio-time.h"
-
-static struct itimerval itimer;
-static LIST_HEAD(disk_list);
-
-static int get_io_ticks(struct disk_util *du, struct disk_util_stat *dus)
-{
-       unsigned in_flight;
-       char line[256];
-       FILE *f;
-       char *p;
-
-       f = fopen(du->path, "r");
-       if (!f)
-               return 1;
-
-       p = fgets(line, sizeof(line), f);
-       if (!p) {
-               fclose(f);
-               return 1;
-       }
-
-       if (sscanf(p, "%u %u %llu %u %u %u %llu %u %u %u %u\n", &dus->ios[0], &dus->merges[0], &dus->sectors[0], &dus->ticks[0], &dus->ios[1], &dus->merges[1], &dus->sectors[1], &dus->ticks[1], &in_flight, &dus->io_ticks, &dus->time_in_queue) != 11) {
-               fclose(f);
-               return 1;
-       }
-
-       fclose(f);
-       return 0;
-}
-
-static void update_io_tick_disk(struct disk_util *du)
-{
-       struct disk_util_stat __dus, *dus, *ldus;
-       struct timeval t;
-
-       if (get_io_ticks(du, &__dus))
-               return;
-
-       dus = &du->dus;
-       ldus = &du->last_dus;
-
-       dus->sectors[0] += (__dus.sectors[0] - ldus->sectors[0]);
-       dus->sectors[1] += (__dus.sectors[1] - ldus->sectors[1]);
-       dus->ios[0] += (__dus.ios[0] - ldus->ios[0]);
-       dus->ios[1] += (__dus.ios[1] - ldus->ios[1]);
-       dus->merges[0] += (__dus.merges[0] - ldus->merges[0]);
-       dus->merges[1] += (__dus.merges[1] - ldus->merges[1]);
-       dus->ticks[0] += (__dus.ticks[0] - ldus->ticks[0]);
-       dus->ticks[1] += (__dus.ticks[1] - ldus->ticks[1]);
-       dus->io_ticks += (__dus.io_ticks - ldus->io_ticks);
-       dus->time_in_queue += (__dus.time_in_queue - ldus->time_in_queue);
-
-       gettimeofday(&t, NULL);
-       du->msec += mtime_since(&du->time, &t);
-       memcpy(&du->time, &t, sizeof(t));
-       memcpy(ldus, &__dus, sizeof(__dus));
-}
-
-void update_io_ticks(void)
-{
-       struct list_head *entry;
-       struct disk_util *du;
-
-       list_for_each(entry, &disk_list) {
-               du = list_entry(entry, struct disk_util, list);
-               update_io_tick_disk(du);
-       }
-}
-
-static int disk_util_exists(dev_t dev)
-{
-       struct list_head *entry;
-       struct disk_util *du;
-
-       list_for_each(entry, &disk_list) {
-               du = list_entry(entry, struct disk_util, list);
-
-               if (du->dev == dev)
-                       return 1;
-       }
-
-       return 0;
-}
-
-static void disk_util_add(dev_t dev, char *path)
-{
-       struct disk_util *du = malloc(sizeof(*du));
-
-       memset(du, 0, sizeof(*du));
-       INIT_LIST_HEAD(&du->list);
-       sprintf(du->path, "%s/stat", path);
-       du->name = strdup(basename(path));
-       du->dev = dev;
-
-       gettimeofday(&du->time, NULL);
-       get_io_ticks(du, &du->last_dus);
-
-       list_add_tail(&du->list, &disk_list);
-}
-
-static int check_dev_match(dev_t dev, char *path)
-{
-       unsigned int major, minor;
-       char line[256], *p;
-       FILE *f;
-
-       f = fopen(path, "r");
-       if (!f) {
-               perror("open path");
-               return 1;
-       }
-
-       p = fgets(line, sizeof(line), f);
-       if (!p) {
-               fclose(f);
-               return 1;
-       }
-
-       if (sscanf(p, "%u:%u", &major, &minor) != 2) {
-               fclose(f);
-               return 1;
-       }
-
-       if (((major << 8) | minor) == dev) {
-               fclose(f);
-               return 0;
-       }
-
-       fclose(f);
-       return 1;
-}
-
-static int find_block_dir(dev_t dev, char *path)
-{
-       struct dirent *dir;
-       struct stat st;
-       int found = 0;
-       DIR *D;
-
-       D = opendir(path);
-       if (!D)
-               return 0;
-
-       while ((dir = readdir(D)) != NULL) {
-               char full_path[256];
-
-               if (!strcmp(dir->d_name, ".") || !strcmp(dir->d_name, ".."))
-                       continue;
-               if (!strcmp(dir->d_name, "device"))
-                       continue;
-
-               sprintf(full_path, "%s/%s", path, dir->d_name);
-
-               if (!strcmp(dir->d_name, "dev")) {
-                       if (!check_dev_match(dev, full_path)) {
-                               found = 1;
-                               break;
-                       }
-               }
-
-               if (stat(full_path, &st) == -1) {
-                       perror("stat");
-                       break;
-               }
-
-               if (!S_ISDIR(st.st_mode) || S_ISLNK(st.st_mode))
-                       continue;
-
-               found = find_block_dir(dev, full_path);
-               if (found) {
-                       strcpy(path, full_path);
-                       break;
-               }
-       }
-
-       closedir(D);
-       return found;
-}
-
-void init_disk_util(struct thread_data *td)
-{
-       struct stat st;
-       char foo[256], tmp[256];
-       dev_t dev;
-       char *p;
-
-       if (!td->do_disk_util)
-               return;
-
-       if (!stat(td->file_name, &st)) {
-               if (S_ISBLK(st.st_mode))
-                       dev = st.st_rdev;
-               else
-                       dev = st.st_dev;
-       } else {
-               /*
-                * must be a file, open "." in that path
-                */
-               strcpy(foo, td->file_name);
-               p = dirname(foo);
-               if (stat(p, &st)) {
-                       perror("disk util stat");
-                       return;
-               }
-
-               dev = st.st_dev;
-       }
-
-       if (disk_util_exists(dev))
-               return;
-               
-       sprintf(foo, "/sys/block");
-       if (!find_block_dir(dev, foo))
-               return;
-
-       /*
-        * If there's a ../queue/ directory there, we are inside a partition.
-        * Check if that is the case and jump back. For loop/md/dm etc we
-        * are already in the right spot.
-        */
-       sprintf(tmp, "%s/../queue", foo);
-       if (!stat(tmp, &st)) {
-               p = dirname(foo);
-               sprintf(tmp, "%s/queue", p);
-               if (stat(tmp, &st)) {
-                       fprintf(stderr, "unknown sysfs layout\n");
-                       return;
-               }
-               sprintf(foo, "%s", p);
-       }
-
-       td->sysfs_root = strdup(foo);
-       disk_util_add(dev, foo);
-}
-
-void disk_util_timer_arm(void)
-{
-       itimer.it_value.tv_sec = 0;
-       itimer.it_value.tv_usec = DISK_UTIL_MSEC * 1000;
-       setitimer(ITIMER_REAL, &itimer, NULL);
-}
-
-void update_rusage_stat(struct thread_data *td)
-{
-       if (!(td->runtime[0] + td->runtime[1]))
-               return;
-
-       getrusage(RUSAGE_SELF, &td->ru_end);
-
-       td->usr_time += mtime_since(&td->ru_start.ru_utime, &td->ru_end.ru_utime);
-       td->sys_time += mtime_since(&td->ru_start.ru_stime, &td->ru_end.ru_stime);
-       td->ctx += td->ru_end.ru_nvcsw + td->ru_end.ru_nivcsw - (td->ru_start.ru_nvcsw + td->ru_start.ru_nivcsw);
-
-       
-       memcpy(&td->ru_start, &td->ru_end, sizeof(td->ru_end));
-}
-
-static int calc_lat(struct io_stat *is, unsigned long *min, unsigned long *max,
-                   double *mean, double *dev)
-{
-       double n;
-
-       if (is->samples == 0)
-               return 0;
-
-       *min = is->min_val;
-       *max = is->max_val;
-
-       n = (double) is->samples;
-       *mean = (double) is->val / n;
-       *dev = sqrt(((double) is->val_sq - (*mean * *mean) / n) / (n - 1));
-       if (!(*min + *max) && !(*mean + *dev))
-               return 0;
-
-       return 1;
-}
-
-static void show_group_stats(struct group_run_stats *rs, int id)
-{
-       printf("\nRun status group %d (all jobs):\n", id);
-
-       if (rs->max_run[DDIR_READ])
-               printf("   READ: io=%lluMiB, aggrb=%llu, minb=%llu, maxb=%llu, mint=%llumsec, maxt=%llumsec\n", rs->io_kb[0] >> 10, rs->agg[0], rs->min_bw[0], rs->max_bw[0], rs->min_run[0], rs->max_run[0]);
-       if (rs->max_run[DDIR_WRITE])
-               printf("  WRITE: io=%lluMiB, aggrb=%llu, minb=%llu, maxb=%llu, mint=%llumsec, maxt=%llumsec\n", rs->io_kb[1] >> 10, rs->agg[1], rs->min_bw[1], rs->max_bw[1], rs->min_run[1], rs->max_run[1]);
-}
-
-static void show_disk_util(void)
-{
-       struct disk_util_stat *dus;
-       struct list_head *entry;
-       struct disk_util *du;
-       double util;
-
-       printf("\nDisk stats (read/write):\n");
-
-       list_for_each(entry, &disk_list) {
-               du = list_entry(entry, struct disk_util, list);
-               dus = &du->dus;
-
-               util = (double) 100 * du->dus.io_ticks / (double) du->msec;
-               if (util > 100.0)
-                       util = 100.0;
-
-               printf("  %s: ios=%u/%u, merge=%u/%u, ticks=%u/%u, in_queue=%u, util=%3.2f%%\n", du->name, dus->ios[0], dus->ios[1], dus->merges[0], dus->merges[1], dus->ticks[0], dus->ticks[1], dus->time_in_queue, util);
-       }
-}
-
-static void show_ddir_status(struct thread_data *td, struct group_run_stats *rs,
-                            int ddir)
-{
-       char *ddir_str[] = { "read ", "write" };
-       unsigned long min, max;
-       unsigned long long bw;
-       double mean, dev;
-
-       if (!td->runtime[ddir])
-               return;
-
-       bw = td->io_bytes[ddir] / td->runtime[ddir];
-       printf("  %s: io=%6lluMiB, bw=%6lluKiB/s, runt=%6lumsec\n", ddir_str[ddir], td->io_bytes[ddir] >> 20, bw, td->runtime[ddir]);
-
-       if (calc_lat(&td->slat_stat[ddir], &min, &max, &mean, &dev))
-               printf("    slat (msec): min=%5lu, max=%5lu, avg=%5.02f, dev=%5.02f\n", min, max, mean, dev);
-
-       if (calc_lat(&td->clat_stat[ddir], &min, &max, &mean, &dev))
-               printf("    clat (msec): min=%5lu, max=%5lu, avg=%5.02f, dev=%5.02f\n", min, max, mean, dev);
-
-       if (calc_lat(&td->bw_stat[ddir], &min, &max, &mean, &dev)) {
-               double p_of_agg;
-
-               p_of_agg = mean * 100 / (double) rs->agg[ddir];
-               printf("    bw (KiB/s) : min=%5lu, max=%5lu, per=%3.2f%%, avg=%5.02f, dev=%5.02f\n", min, max, p_of_agg, mean, dev);
-       }
-}
-
-static void show_thread_status(struct thread_data *td,
-                              struct group_run_stats *rs)
-{
-       double usr_cpu, sys_cpu;
-
-       if (!(td->io_bytes[0] + td->io_bytes[1]) && !td->error)
-               return;
-
-       printf("Client%d (groupid=%d): err=%2d:\n", td->thread_number, td->groupid, td->error);
-
-       show_ddir_status(td, rs, td->ddir);
-       if (td->io_bytes[td->ddir ^ 1])
-               show_ddir_status(td, rs, td->ddir ^ 1);
-
-       if (td->runtime[0] + td->runtime[1]) {
-               double runt = td->runtime[0] + td->runtime[1];
-
-               usr_cpu = (double) td->usr_time * 100 / runt;
-               sys_cpu = (double) td->sys_time * 100 / runt;
-       } else {
-               usr_cpu = 0;
-               sys_cpu = 0;
-       }
-
-       printf("  cpu          : usr=%3.2f%%, sys=%3.2f%%, ctx=%lu\n", usr_cpu, sys_cpu, td->ctx);
-}
-
-void show_run_stats(void)
-{
-       struct group_run_stats *runstats, *rs;
-       struct thread_data *td;
-       int i;
-
-       runstats = malloc(sizeof(struct group_run_stats) * (groupid + 1));
-
-       for (i = 0; i < groupid + 1; i++) {
-               rs = &runstats[i];
-
-               memset(rs, 0, sizeof(*rs));
-               rs->min_bw[0] = rs->min_run[0] = ~0UL;
-               rs->min_bw[1] = rs->min_run[1] = ~0UL;
-       }
-
-       for (i = 0; i < thread_number; i++) {
-               unsigned long long rbw, wbw;
-
-               td = &threads[i];
-
-               if (td->error) {
-                       printf("Client%d: %s\n", td->thread_number, td->verror);
-                       continue;
-               }
-
-               rs = &runstats[td->groupid];
-
-               if (td->runtime[0] < rs->min_run[0] || !rs->min_run[0])
-                       rs->min_run[0] = td->runtime[0];
-               if (td->runtime[0] > rs->max_run[0])
-                       rs->max_run[0] = td->runtime[0];
-               if (td->runtime[1] < rs->min_run[1] || !rs->min_run[1])
-                       rs->min_run[1] = td->runtime[1];
-               if (td->runtime[1] > rs->max_run[1])
-                       rs->max_run[1] = td->runtime[1];
-
-               rbw = wbw = 0;
-               if (td->runtime[0])
-                       rbw = td->io_bytes[0] / (unsigned long long) td->runtime[0];
-               if (td->runtime[1])
-                       wbw = td->io_bytes[1] / (unsigned long long) td->runtime[1];
-
-               if (rbw < rs->min_bw[0])
-                       rs->min_bw[0] = rbw;
-               if (wbw < rs->min_bw[1])
-                       rs->min_bw[1] = wbw;
-               if (rbw > rs->max_bw[0])
-                       rs->max_bw[0] = rbw;
-               if (wbw > rs->max_bw[1])
-                       rs->max_bw[1] = wbw;
-
-               rs->io_kb[0] += td->io_bytes[0] >> 10;
-               rs->io_kb[1] += td->io_bytes[1] >> 10;
-       }
-
-       for (i = 0; i < groupid + 1; i++) {
-               rs = &runstats[i];
-
-               if (rs->max_run[0])
-                       rs->agg[0] = (rs->io_kb[0]*1024) / rs->max_run[0];
-               if (rs->max_run[1])
-                       rs->agg[1] = (rs->io_kb[1]*1024) / rs->max_run[1];
-       }
-
-       /*
-        * don't overwrite last signal output
-        */
-       printf("\n");
-
-       for (i = 0; i < thread_number; i++) {
-               td = &threads[i];
-               rs = &runstats[td->groupid];
-
-               show_thread_status(td, rs);
-       }
-
-       for (i = 0; i < groupid + 1; i++)
-               show_group_stats(&runstats[i], i);
-
-       show_disk_util();
-}
-
-static inline void add_stat_sample(struct io_stat *is, unsigned long val)
-{
-       if (val > is->max_val)
-               is->max_val = val;
-       if (val < is->min_val)
-               is->min_val = val;
-
-       is->val += val;
-       is->val_sq += val * val;
-       is->samples++;
-}
-
-static void add_log_sample(struct thread_data *td, struct io_log *iolog,
-                          unsigned long val, int ddir)
-{
-       if (iolog->nr_samples == iolog->max_samples) {
-               int new_size = sizeof(struct io_sample) * iolog->max_samples*2;
-
-               iolog->log = realloc(iolog->log, new_size);
-               iolog->max_samples <<= 1;
-       }
-
-       iolog->log[iolog->nr_samples].val = val;
-       iolog->log[iolog->nr_samples].time = mtime_since_now(&td->epoch);
-       iolog->log[iolog->nr_samples].ddir = ddir;
-       iolog->nr_samples++;
-}
-
-void add_clat_sample(struct thread_data *td, int ddir, unsigned long msec)
-{
-       add_stat_sample(&td->clat_stat[ddir], msec);
-
-       if (td->clat_log)
-               add_log_sample(td, td->clat_log, msec, ddir);
-}
-
-void add_slat_sample(struct thread_data *td, int ddir, unsigned long msec)
-{
-       add_stat_sample(&td->slat_stat[ddir], msec);
-
-       if (td->slat_log)
-               add_log_sample(td, td->slat_log, msec, ddir);
-}
-
-void add_bw_sample(struct thread_data *td, int ddir)
-{
-       unsigned long spent = mtime_since_now(&td->stat_sample_time[ddir]);
-       unsigned long rate;
-
-       if (spent < td->bw_avg_time)
-               return;
-
-       rate = (td->this_io_bytes[ddir] - td->stat_io_bytes[ddir]) / spent;
-       add_stat_sample(&td->bw_stat[ddir], rate);
-
-       if (td->bw_log)
-               add_log_sample(td, td->bw_log, rate, ddir);
-
-       gettimeofday(&td->stat_sample_time[ddir], NULL);
-       td->stat_io_bytes[ddir] = td->this_io_bytes[ddir];
-}
-
-
diff --git a/fio-stat.h b/fio-stat.h
deleted file mode 100644 (file)
index b1ce677..0000000
+++ /dev/null
@@ -1,12 +0,0 @@
-#ifndef FIO_STAT_H
-#define FIO_STAT_H
-
-extern void add_clat_sample(struct thread_data *, int, unsigned long);
-extern void add_slat_sample(struct thread_data *, int, unsigned long);
-extern void add_bw_sample(struct thread_data *, int);
-extern void show_run_stats(void);
-extern void init_disk_util(struct thread_data *);
-extern void update_rusage_stat(struct thread_data *);
-extern void update_io_ticks(void);
-extern void disk_util_timer_arm(void);
-#endif
diff --git a/fio-time.c b/fio-time.c
deleted file mode 100644 (file)
index 5246263..0000000
+++ /dev/null
@@ -1,122 +0,0 @@
-#include <time.h>
-#include <sys/time.h>
-
-#include "fio.h"
-
-unsigned long utime_since(struct timeval *s, struct timeval *e)
-{
-       double sec, usec;
-
-       sec = e->tv_sec - s->tv_sec;
-       usec = e->tv_usec - s->tv_usec;
-       if (sec > 0 && usec < 0) {
-               sec--;
-               usec += 1000000;
-       }
-
-       sec *= (double) 1000000;
-
-       return sec + usec;
-}
-
-static unsigned long utime_since_now(struct timeval *s)
-{
-       struct timeval t;
-
-       gettimeofday(&t, NULL);
-       return utime_since(s, &t);
-}
-
-unsigned long mtime_since(struct timeval *s, struct timeval *e)
-{
-       double sec, usec;
-
-       sec = e->tv_sec - s->tv_sec;
-       usec = e->tv_usec - s->tv_usec;
-       if (sec > 0 && usec < 0) {
-               sec--;
-               usec += 1000000;
-       }
-
-       sec *= (double) 1000;
-       usec /= (double) 1000;
-
-       return sec + usec;
-}
-
-unsigned long mtime_since_now(struct timeval *s)
-{
-       struct timeval t;
-
-       gettimeofday(&t, NULL);
-       return mtime_since(s, &t);
-}
-
-unsigned long time_since_now(struct timeval *s)
-{
-       return mtime_since_now(s) / 1000;
-}
-
-/*
- * busy looping version for the last few usec
- */
-static void __usec_sleep(unsigned int usec)
-{
-       struct timeval start;
-
-       gettimeofday(&start, NULL);
-       while (utime_since_now(&start) < usec)
-               nop;
-}
-
-void usec_sleep(struct thread_data *td, unsigned long usec)
-{
-       struct timespec req, rem;
-
-       req.tv_sec = usec / 1000000;
-       req.tv_nsec = usec * 1000 - req.tv_sec * 1000000;
-
-       do {
-               if (usec < 5000) {
-                       __usec_sleep(usec);
-                       break;
-               }
-
-               rem.tv_sec = rem.tv_nsec = 0;
-               if (nanosleep(&req, &rem) < 0)
-                       break;
-
-               if ((rem.tv_sec + rem.tv_nsec) == 0)
-                       break;
-
-               req.tv_nsec = rem.tv_nsec;
-               req.tv_sec = rem.tv_sec;
-
-               usec = rem.tv_sec * 1000000 + rem.tv_nsec / 1000;
-       } while (!td->terminate);
-}
-
-void rate_throttle(struct thread_data *td, unsigned long time_spent,
-                  unsigned int bytes)
-{
-       unsigned long usec_cycle;
-
-       if (!td->rate)
-               return;
-
-       usec_cycle = td->rate_usec_cycle * (bytes / td->min_bs);
-
-       if (time_spent < usec_cycle) {
-               unsigned long s = usec_cycle - time_spent;
-
-               td->rate_pending_usleep += s;
-               if (td->rate_pending_usleep >= 100000) {
-                       usec_sleep(td, td->rate_pending_usleep);
-                       td->rate_pending_usleep = 0;
-               }
-       } else {
-               long overtime = time_spent - usec_cycle;
-
-               td->rate_pending_usleep -= overtime;
-       }
-}
diff --git a/fio-time.h b/fio-time.h
deleted file mode 100644 (file)
index 4be3c4f..0000000
+++ /dev/null
@@ -1,12 +0,0 @@
-#ifndef FIO_TIME_H
-#define FIO_TIME_H
-
-extern unsigned long utime_since(struct timeval *, struct timeval *);
-extern unsigned long mtime_since(struct timeval *, struct timeval *);
-extern unsigned long mtime_since_now(struct timeval *);
-extern unsigned long time_since_now(struct timeval *);
-extern void usec_sleep(struct thread_data *, unsigned long);
-
-extern void rate_throttle(struct thread_data *, unsigned long, unsigned int);
-
-#endif
diff --git a/fio.c b/fio.c
index 1eeb1cccc46b6c9b3371afd3f7404e0b17bc93ad..8ac44522990f46819dcfc90720fb43d167eb376c 100644 (file)
--- a/fio.c
+++ b/fio.c
 #include "fio.h"
 #include "os.h"
 
-#include "fio-time.h"
-#include "fio-stat.h"
-#include "fio-log.h"
-
 #define MASK   (4095)
 
 #define ALIGN(buf)     (char *) (((unsigned long) (buf) + MASK) & ~(MASK))
diff --git a/fio.h b/fio.h
index 58fabbdb1fbcd8221bef66abcc33efb33fce1721..045fd012127e6b6ebd50f6520095f5cb2b462e00 100644 (file)
--- a/fio.h
+++ b/fio.h
@@ -353,4 +353,36 @@ struct io_completion_data {
 #define min(a, b)      ((a) < (b) ? (a) : (b))
 #endif
 
+/*
+ * Log exports
+ */
+extern int read_iolog_get(struct thread_data *, struct io_u *);
+extern void write_iolog_put(struct thread_data *, struct io_u *);
+extern int init_iolog(struct thread_data *td);
+extern void log_io_piece(struct thread_data *, struct io_u *);
+extern void prune_io_piece_log(struct thread_data *);
+extern void write_iolog_close(struct thread_data *);
+
+/*
+ * Logging
+ */
+extern void add_clat_sample(struct thread_data *, int, unsigned long);
+extern void add_slat_sample(struct thread_data *, int, unsigned long);
+extern void add_bw_sample(struct thread_data *, int);
+extern void show_run_stats(void);
+extern void init_disk_util(struct thread_data *);
+extern void update_rusage_stat(struct thread_data *);
+extern void update_io_ticks(void);
+extern void disk_util_timer_arm(void);
+
+/*
+ * Time functions
+ */
+extern unsigned long utime_since(struct timeval *, struct timeval *);
+extern unsigned long mtime_since(struct timeval *, struct timeval *);
+extern unsigned long mtime_since_now(struct timeval *);
+extern unsigned long time_since_now(struct timeval *);
+extern void usec_sleep(struct thread_data *, unsigned long);
+extern void rate_throttle(struct thread_data *, unsigned long, unsigned int);
+
 #endif
diff --git a/init.c b/init.c
new file mode 100644 (file)
index 0000000..d2122e3
--- /dev/null
+++ b/init.c
@@ -0,0 +1,1133 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <ctype.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/ipc.h>
+#include <sys/shm.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "fio.h"
+
+#define DEF_BS                 (4096)
+#define DEF_TIMEOUT            (0)
+#define DEF_RATE_CYCLE         (1000)
+#define DEF_ODIRECT            (1)
+#define DEF_IO_ENGINE          (FIO_SYNCIO)
+#define DEF_IO_ENGINE_NAME     "sync"
+#define DEF_SEQUENTIAL         (1)
+#define DEF_RAND_REPEAT                (1)
+#define DEF_OVERWRITE          (1)
+#define DEF_CREATE             (1)
+#define DEF_INVALIDATE         (1)
+#define DEF_SYNCIO             (0)
+#define DEF_RANDSEED           (0xb1899bedUL)
+#define DEF_BWAVGTIME          (500)
+#define DEF_CREATE_SER         (1)
+#define DEF_CREATE_FSYNC       (1)
+#define DEF_LOOPS              (1)
+#define DEF_VERIFY             (0)
+#define DEF_STONEWALL          (0)
+#define DEF_NUMJOBS            (1)
+#define DEF_USE_THREAD         (0)
+#define DEF_FILE_SIZE          (1024 * 1024 * 1024UL)
+#define DEF_ZONE_SIZE          (0)
+#define DEF_ZONE_SKIP          (0)
+#define DEF_RWMIX_CYCLE                (500)
+#define DEF_RWMIX_READ         (50)
+#define DEF_NICE               (0)
+
+static char fio_version_string[] = "fio 1.4";
+
+static int repeatable = DEF_RAND_REPEAT;
+static char *ini_file;
+static int max_jobs = MAX_JOBS;
+
+struct thread_data def_thread;
+struct thread_data *threads = NULL;
+
+int rate_quit = 0;
+int write_lat_log = 0;
+int write_bw_log = 0;
+int exitall_on_terminate = 0;
+unsigned long long mlock_size = 0;
+
+static int setup_rate(struct thread_data *td)
+{
+       int nr_reads_per_sec;
+
+       if (!td->rate)
+               return 0;
+
+       if (td->rate < td->ratemin) {
+               fprintf(stderr, "min rate larger than nominal rate\n");
+               return -1;
+       }
+
+       nr_reads_per_sec = (td->rate * 1024) / td->min_bs;
+       td->rate_usec_cycle = 1000000 / nr_reads_per_sec;
+       td->rate_pending_usleep = 0;
+       return 0;
+}
+
+static void setup_log(struct io_log **log)
+{
+       struct io_log *l = malloc(sizeof(*l));
+
+       l->nr_samples = 0;
+       l->max_samples = 1024;
+       l->log = malloc(l->max_samples * sizeof(struct io_sample));
+       *log = l;
+}
+
+void finish_log(struct thread_data *td, struct io_log *log, const char *name)
+{
+       char file_name[256];
+       FILE *f;
+       unsigned int i;
+
+       snprintf(file_name, 200, "client%d_%s.log", td->thread_number, name);
+       f = fopen(file_name, "w");
+       if (!f) {
+               perror("fopen log");
+               return;
+       }
+
+       for (i = 0; i < log->nr_samples; i++)
+               fprintf(f, "%lu, %lu, %u\n", log->log[i].time, log->log[i].val, log->log[i].ddir);
+
+       fclose(f);
+       free(log->log);
+       free(log);
+}
+
+static struct thread_data *get_new_job(int global, struct thread_data *parent)
+{
+       struct thread_data *td;
+
+       if (global)
+               return &def_thread;
+       if (thread_number >= max_jobs)
+               return NULL;
+
+       td = &threads[thread_number++];
+       if (parent)
+               *td = *parent;
+       else
+               memset(td, 0, sizeof(*td));
+
+       td->fd = -1;
+       td->thread_number = thread_number;
+
+       td->ddir = parent->ddir;
+       td->ioprio = parent->ioprio;
+       td->sequential = parent->sequential;
+       td->bs = parent->bs;
+       td->min_bs = parent->min_bs;
+       td->max_bs = parent->max_bs;
+       td->odirect = parent->odirect;
+       td->thinktime = parent->thinktime;
+       td->fsync_blocks = parent->fsync_blocks;
+       td->start_delay = parent->start_delay;
+       td->timeout = parent->timeout;
+       td->io_engine = parent->io_engine;
+       td->create_file = parent->create_file;
+       td->overwrite = parent->overwrite;
+       td->invalidate_cache = parent->invalidate_cache;
+       td->file_size = parent->file_size;
+       td->file_offset = parent->file_offset;
+       td->zone_size = parent->zone_size;
+       td->zone_skip = parent->zone_skip;
+       td->rate = parent->rate;
+       td->ratemin = parent->ratemin;
+       td->ratecycle = parent->ratecycle;
+       td->iodepth = parent->iodepth;
+       td->sync_io = parent->sync_io;
+       td->mem_type = parent->mem_type;
+       td->bw_avg_time = parent->bw_avg_time;
+       td->create_serialize = parent->create_serialize;
+       td->create_fsync = parent->create_fsync;
+       td->loops = parent->loops;
+       td->verify = parent->verify;
+       td->stonewall = parent->stonewall;
+       td->numjobs = parent->numjobs;
+       td->use_thread = parent->use_thread;
+       td->do_disk_util = parent->do_disk_util;
+       memcpy(&td->cpumask, &parent->cpumask, sizeof(td->cpumask));
+       strcpy(td->io_engine_name, parent->io_engine_name);
+
+       return td;
+}
+
+static void put_job(struct thread_data *td)
+{
+       memset(&threads[td->thread_number - 1], 0, sizeof(*td));
+       thread_number--;
+}
+
+static int add_job(struct thread_data *td, const char *jobname, int job_add_num)
+{
+       char *ddir_str[] = { "read", "write", "randread", "randwrite",
+                            "rw", NULL, "randrw" };
+       struct stat sb;
+       int numjobs, ddir;
+
+#ifndef FIO_HAVE_LIBAIO
+       if (td->io_engine == FIO_LIBAIO) {
+               fprintf(stderr, "Linux libaio not available\n");
+               return 1;
+       }
+#endif
+#ifndef FIO_HAVE_POSIXAIO
+       if (td->io_engine == FIO_POSIXAIO) {
+               fprintf(stderr, "posix aio not available\n");
+               return 1;
+       }
+#endif
+
+       /*
+        * the def_thread is just for options, it's not a real job
+        */
+       if (td == &def_thread)
+               return 0;
+
+       if (td->io_engine & FIO_SYNCIO)
+               td->iodepth = 1;
+       else {
+               if (!td->iodepth)
+                       td->iodepth = 1;
+       }
+
+       /*
+        * only really works for sequential io for now
+        */
+       if (td->zone_size && !td->sequential)
+               td->zone_size = 0;
+
+       td->filetype = FIO_TYPE_FILE;
+       if (!stat(jobname, &sb)) {
+               if (S_ISBLK(sb.st_mode))
+                       td->filetype = FIO_TYPE_BD;
+               else if (S_ISCHR(sb.st_mode))
+                       td->filetype = FIO_TYPE_CHAR;
+       }
+
+       if (td->filetype == FIO_TYPE_FILE) {
+               if (td->directory && td->directory[0] != '\0')
+                       sprintf(td->file_name, "%s/%s.%d", td->directory, jobname, td->jobnum);
+               else
+                       sprintf(td->file_name, "%s.%d", jobname, td->jobnum);
+       } else
+               strncpy(td->file_name, jobname, sizeof(td->file_name) - 1);
+
+       sem_init(&td->mutex, 0, 0);
+
+       td->clat_stat[0].min_val = td->clat_stat[1].min_val = ULONG_MAX;
+       td->slat_stat[0].min_val = td->slat_stat[1].min_val = ULONG_MAX;
+       td->bw_stat[0].min_val = td->bw_stat[1].min_val = ULONG_MAX;
+
+       if (td->min_bs == -1U)
+               td->min_bs = td->bs;
+       if (td->max_bs == -1U)
+               td->max_bs = td->bs;
+       if (td_read(td) && !td_rw(td))
+               td->verify = 0;
+
+       if (td->stonewall && td->thread_number > 1)
+               groupid++;
+
+       td->groupid = groupid;
+
+       if (setup_rate(td))
+               goto err;
+
+       if (write_lat_log) {
+               setup_log(&td->slat_log);
+               setup_log(&td->clat_log);
+       }
+       if (write_bw_log)
+               setup_log(&td->bw_log);
+
+       ddir = td->ddir + (!td->sequential << 1) + (td->iomix << 2);
+
+       if (!job_add_num)
+               printf("Client%d (g=%d): rw=%s, odir=%d, bs=%d-%d, rate=%d, ioengine=%s, iodepth=%d\n", td->thread_number, td->groupid, ddir_str[ddir], td->odirect, td->min_bs, td->max_bs, td->rate, td->io_engine_name, td->iodepth);
+       else if (job_add_num == 1)
+               printf("...\n");
+
+       /*
+        * recurse add identical jobs, clear numjobs and stonewall options
+        * as they don't apply to sub-jobs
+        */
+       numjobs = td->numjobs;
+       while (--numjobs) {
+               struct thread_data *td_new = get_new_job(0, td);
+
+               if (!td_new)
+                       goto err;
+
+               td_new->numjobs = 1;
+               td_new->stonewall = 0;
+               td_new->jobnum = numjobs;
+               job_add_num = numjobs - 1;
+
+               if (add_job(td_new, jobname, job_add_num))
+                       goto err;
+       }
+       return 0;
+err:
+       put_job(td);
+       return -1;
+}
+
+int init_random_state(struct thread_data *td)
+{
+       unsigned long seeds[4];
+       int fd, num_maps, blocks;
+
+       fd = open("/dev/urandom", O_RDONLY);
+       if (fd == -1) {
+               td_verror(td, errno);
+               return 1;
+       }
+
+       if (read(fd, seeds, sizeof(seeds)) < (int) sizeof(seeds)) {
+               td_verror(td, EIO);
+               close(fd);
+               return 1;
+       }
+
+       close(fd);
+
+       srand48_r(seeds[0], &td->bsrange_state);
+       srand48_r(seeds[1], &td->verify_state);
+       srand48_r(seeds[2], &td->rwmix_state);
+
+       if (td->sequential)
+               return 0;
+
+       if (repeatable)
+               seeds[3] = DEF_RANDSEED;
+
+       blocks = (td->io_size + td->min_bs - 1) / td->min_bs;
+       num_maps = blocks / BLOCKS_PER_MAP;
+       td->file_map = malloc(num_maps * sizeof(long));
+       td->num_maps = num_maps;
+       memset(td->file_map, 0, num_maps * sizeof(long));
+
+       srand48_r(seeds[3], &td->random_state);
+       return 0;
+}
+
+static void fill_cpu_mask(os_cpu_mask_t cpumask, int cpu)
+{
+#ifdef FIO_HAVE_CPU_AFFINITY
+       unsigned int i;
+
+       CPU_ZERO(&cpumask);
+
+       for (i = 0; i < sizeof(int) * 8; i++) {
+               if ((1 << i) & cpu)
+                       CPU_SET(i, &cpumask);
+       }
+#endif
+}
+
+static unsigned long get_mult(char c)
+{
+       switch (c) {
+               case 'k':
+               case 'K':
+                       return 1024;
+               case 'm':
+               case 'M':
+                       return 1024 * 1024;
+               case 'g':
+               case 'G':
+                       return 1024 * 1024 * 1024;
+               default:
+                       return 1;
+       }
+}
+
+/*
+ * convert string after '=' into decimal value, noting any size suffix
+ */
+static int str_cnv(char *p, unsigned long long *val)
+{
+       char *str;
+       int len;
+
+       str = strchr(p, '=');
+       if (!str)
+               return 1;
+
+       str++;
+       len = strlen(str);
+
+       *val = strtoul(str, NULL, 10);
+       if (*val == ULONG_MAX && errno == ERANGE)
+               return 1;
+
+       *val *= get_mult(str[len - 1]);
+       return 0;
+}
+
+static int check_strcnv(char *p, char *name, unsigned long long *val)
+{
+       if (strncmp(p, name, strlen(name) - 1))
+               return 1;
+
+       return str_cnv(p, val);
+}
+
+static void strip_blank_front(char **p)
+{
+       char *s = *p;
+
+       while (isspace(*s))
+               s++;
+}
+
+static void strip_blank_end(char *p)
+{
+       char *s = p + strlen(p) - 1;
+
+       while (isspace(*s) || iscntrl(*s))
+               s--;
+
+       *(s + 1) = '\0';
+}
+
+typedef int (str_cb_fn)(struct thread_data *, char *);
+
+static int check_str(char *p, char *name, str_cb_fn *cb, struct thread_data *td)
+{
+       char *s;
+
+       if (strncmp(p, name, strlen(name)))
+               return 1;
+
+       s = strstr(p, name);
+       if (!s)
+               return 1;
+
+       s = strchr(s, '=');
+       if (!s)
+               return 1;
+
+       s++;
+       strip_blank_front(&s);
+       return cb(td, s);
+}
+
+static int check_strstore(char *p, char *name, char *dest)
+{
+       char *s;
+
+       if (strncmp(p, name, strlen(name)))
+               return 1;
+
+       s = strstr(p, name);
+       if (!s)
+               return 1;
+
+       s = strchr(p, '=');
+       if (!s)
+               return 1;
+
+       s++;
+       strip_blank_front(&s);
+
+       strcpy(dest, s);
+       return 0;
+}
+
+static int __check_range(char *str, unsigned long *val)
+{
+       char suffix;
+
+       if (sscanf(str, "%lu%c", val, &suffix) == 2) {
+               *val *= get_mult(suffix);
+               return 0;
+       }
+
+       if (sscanf(str, "%lu", val) == 1)
+               return 0;
+
+       return 1;
+}
+
+static int check_range(char *p, char *name, unsigned long *s, unsigned long *e)
+{
+       char option[128];
+       char *str, *p1, *p2;
+
+       if (strncmp(p, name, strlen(name)))
+               return 1;
+
+       strcpy(option, p);
+       p = option;
+
+       str = strstr(p, name);
+       if (!str)
+               return 1;
+
+       p += strlen(name);
+
+       str = strchr(p, '=');
+       if (!str)
+               return 1;
+
+       /*
+        * 'p' now holds whatever is after the '=' sign
+        */
+       p1 = str + 1;
+
+       /*
+        * terminate p1 at the '-' sign
+        */
+       p = strchr(p1, '-');
+       if (!p)
+               return 1;
+
+       p2 = p + 1;
+       *p = '\0';
+
+       if (!__check_range(p1, s) && !__check_range(p2, e))
+               return 0;
+
+       return 1;
+}
+
+static int check_int(char *p, char *name, unsigned int *val)
+{
+       char *str;
+
+       if (strncmp(p, name, strlen(name)))
+               return 1;
+
+       str = strstr(p, name);
+       if (!str)
+               return 1;
+
+       str = strchr(p, '=');
+       if (!str)
+               return 1;
+
+       str++;
+
+       if (sscanf(str, "%u", val) == 1)
+               return 0;
+
+       return 1;
+}
+
+static int check_strset(char *p, char *name)
+{
+       return strncmp(p, name, strlen(name));
+}
+
+static int is_empty_or_comment(char *line)
+{
+       unsigned int i;
+
+       for (i = 0; i < strlen(line); i++) {
+               if (line[i] == ';')
+                       return 1;
+               if (!isspace(line[i]) && !iscntrl(line[i]))
+                       return 0;
+       }
+
+       return 1;
+}
+
+static int str_rw_cb(struct thread_data *td, char *mem)
+{
+       if (!strncmp(mem, "read", 4) || !strncmp(mem, "0", 1)) {
+               td->ddir = DDIR_READ;
+               td->sequential = 1;
+               return 0;
+       } else if (!strncmp(mem, "randread", 8)) {
+               td->ddir = DDIR_READ;
+               td->sequential = 0;
+               return 0;
+       } else if (!strncmp(mem, "write", 5) || !strncmp(mem, "1", 1)) {
+               td->ddir = DDIR_WRITE;
+               td->sequential = 1;
+               return 0;
+       } else if (!strncmp(mem, "randwrite", 9)) {
+               td->ddir = DDIR_WRITE;
+               td->sequential = 0;
+               return 0;
+       } else if (!strncmp(mem, "rw", 2)) {
+               td->ddir = 0;
+               td->iomix = 1;
+               td->sequential = 1;
+               return 0;
+       } else if (!strncmp(mem, "randrw", 6)) {
+               td->ddir = 0;
+               td->iomix = 1;
+               td->sequential = 0;
+               return 0;
+       }
+
+       fprintf(stderr, "bad data direction: %s\n", mem);
+       return 1;
+}
+
+static int str_verify_cb(struct thread_data *td, char *mem)
+{
+       if (!strncmp(mem, "0", 1)) {
+               td->verify = VERIFY_NONE;
+               return 0;
+       } else if (!strncmp(mem, "md5", 3) || !strncmp(mem, "1", 1)) {
+               td->verify = VERIFY_MD5;
+               return 0;
+       } else if (!strncmp(mem, "crc32", 5)) {
+               td->verify = VERIFY_CRC32;
+               return 0;
+       }
+
+       fprintf(stderr, "bad verify type: %s\n", mem);
+       return 1;
+}
+
+static int str_mem_cb(struct thread_data *td, char *mem)
+{
+       if (!strncmp(mem, "malloc", 6)) {
+               td->mem_type = MEM_MALLOC;
+               return 0;
+       } else if (!strncmp(mem, "shm", 3)) {
+               td->mem_type = MEM_SHM;
+               return 0;
+       } else if (!strncmp(mem, "mmap", 4)) {
+               td->mem_type = MEM_MMAP;
+               return 0;
+       }
+
+       fprintf(stderr, "bad mem type: %s\n", mem);
+       return 1;
+}
+
+static int str_ioengine_cb(struct thread_data *td, char *str)
+{
+       if (!strncmp(str, "linuxaio", 8) || !strncmp(str, "aio", 3) ||
+           !strncmp(str, "libaio", 6)) {
+               strcpy(td->io_engine_name, "libaio");
+               td->io_engine = FIO_LIBAIO;
+               return 0;
+       } else if (!strncmp(str, "posixaio", 8)) {
+               strcpy(td->io_engine_name, "posixaio");
+               td->io_engine = FIO_POSIXAIO;
+               return 0;
+       } else if (!strncmp(str, "sync", 4)) {
+               strcpy(td->io_engine_name, "sync");
+               td->io_engine = FIO_SYNCIO;
+               return 0;
+       } else if (!strncmp(str, "mmap", 4)) {
+               strcpy(td->io_engine_name, "mmap");
+               td->io_engine = FIO_MMAPIO;
+               return 0;
+       } else if (!strncmp(str, "sgio", 4)) {
+               strcpy(td->io_engine_name, "sgio");
+               td->io_engine = FIO_SGIO;
+               return 0;
+       } else if (!strncmp(str, "splice", 6)) {
+               strcpy(td->io_engine_name, "splice");
+               td->io_engine = FIO_SPLICEIO;
+               return 0;
+       }
+
+       fprintf(stderr, "bad ioengine type: %s\n", str);
+       return 1;
+}
+
+static int str_iolog_cb(struct thread_data *td, char *file)
+{
+       td->iolog_file = strdup(file);
+       return 0;
+}
+
+static int str_prerun_cb(struct thread_data *td, char *file)
+{
+       td->exec_prerun = strdup(file);
+       return 0;
+}
+
+static int str_postrun_cb(struct thread_data *td, char *file)
+{
+       td->exec_postrun = strdup(file);
+       return 0;
+}
+
+static int str_iosched_cb(struct thread_data *td, char *file)
+{
+       td->ioscheduler = strdup(file);
+       return 0;
+}
+
+int parse_jobs_ini(char *file)
+{
+       unsigned int prioclass, prio, cpu, global, il;
+       unsigned long long ull;
+       unsigned long ul1, ul2;
+       struct thread_data *td;
+       char *string, *name, *tmpbuf;
+       fpos_t off;
+       FILE *f;
+       char *p;
+
+       f = fopen(file, "r");
+       if (!f) {
+               perror("fopen job file");
+               return 1;
+       }
+
+       string = malloc(4096);
+       name = malloc(256);
+       tmpbuf = malloc(4096);
+
+       while ((p = fgets(string, 4096, f)) != NULL) {
+               if (is_empty_or_comment(p))
+                       continue;
+               if (sscanf(p, "[%s]", name) != 1)
+                       continue;
+
+               global = !strncmp(name, "global", 6);
+
+               name[strlen(name) - 1] = '\0';
+
+               td = get_new_job(global, &def_thread);
+               if (!td)
+                       return 1;
+
+               fgetpos(f, &off);
+               while ((p = fgets(string, 4096, f)) != NULL) {
+                       if (is_empty_or_comment(p))
+                               continue;
+                       if (strstr(p, "["))
+                               break;
+                       strip_blank_front(&p);
+                       strip_blank_end(p);
+
+                       if (!check_int(p, "prio", &prio)) {
+#ifndef FIO_HAVE_IOPRIO
+                               fprintf(stderr, "io priorities not available\n");
+                               return 1;
+#endif
+                               td->ioprio |= prio;
+                               fgetpos(f, &off);
+                               continue;
+                       }
+                       if (!check_int(p, "prioclass", &prioclass)) {
+#ifndef FIO_HAVE_IOPRIO
+                               fprintf(stderr, "io priorities not available\n");
+                               return 1;
+#endif
+                               td->ioprio |= prioclass << IOPRIO_CLASS_SHIFT;
+                               fgetpos(f, &off);
+                               continue;
+                       }
+                       if (!check_int(p, "direct", &td->odirect)) {
+                               fgetpos(f, &off);
+                               continue;
+                       }
+                       if (!check_int(p, "rate", &td->rate)) {
+                               fgetpos(f, &off);
+                               continue;
+                       }
+                       if (!check_int(p, "ratemin", &td->ratemin)) {
+                               fgetpos(f, &off);
+                               continue;
+                       }
+                       if (!check_int(p, "ratecycle", &td->ratecycle)) {
+                               fgetpos(f, &off);
+                               continue;
+                       }
+                       if (!check_int(p, "thinktime", &td->thinktime)) {
+                               fgetpos(f, &off);
+                               continue;
+                       }
+                       if (!check_int(p, "cpumask", &cpu)) {
+#ifndef FIO_HAVE_CPU_AFFINITY
+                               fprintf(stderr, "cpu affinity not available\n");
+                               return 1;
+#endif
+                               fill_cpu_mask(td->cpumask, cpu);
+                               fgetpos(f, &off);
+                               continue;
+                       }
+                       if (!check_int(p, "fsync", &td->fsync_blocks)) {
+                               fgetpos(f, &off);
+                               td->end_fsync = 1;
+                               continue;
+                       }
+                       if (!check_int(p, "startdelay", &td->start_delay)) {
+                               fgetpos(f, &off);
+                               continue;
+                       }
+                       if (!check_int(p, "timeout", &td->timeout)) {
+                               fgetpos(f, &off);
+                               continue;
+                       }
+                       if (!check_int(p, "invalidate",&td->invalidate_cache)) {
+                               fgetpos(f, &off);
+                               continue;
+                       }
+                       if (!check_int(p, "iodepth", &td->iodepth)) {
+                               fgetpos(f, &off);
+                               continue;
+                       }
+                       if (!check_int(p, "sync", &td->sync_io)) {
+                               fgetpos(f, &off);
+                               continue;
+                       }
+                       if (!check_int(p, "bwavgtime", &td->bw_avg_time)) {
+                               fgetpos(f, &off);
+                               continue;
+                       }
+                       if (!check_int(p, "create_serialize", &td->create_serialize)) {
+                               fgetpos(f, &off);
+                               continue;
+                       }
+                       if (!check_int(p, "create_fsync", &td->create_fsync)) {
+                               fgetpos(f, &off);
+                               continue;
+                       }
+                       if (!check_int(p, "end_fsync", &td->end_fsync)) {
+                               fgetpos(f, &off);
+                               continue;
+                       }
+                       if (!check_int(p, "loops", &td->loops)) {
+                               fgetpos(f, &off);
+                               continue;
+                       }
+                       if (!check_int(p, "numjobs", &td->numjobs)) {
+                               fgetpos(f, &off);
+                               continue;
+                       }
+                       if (!check_int(p, "overwrite", &td->overwrite)) {
+                               fgetpos(f, &off);
+                               continue;
+                       }
+                       if (!check_int(p, "rwmixcycle", &td->rwmixcycle)) {
+                               fgetpos(f, &off);
+                               continue;
+                       }
+                       if (!check_int(p, "rwmixread", &il)) {
+                               if (il > 100)
+                                       il = 100;
+                               td->rwmixread = il;
+                               fgetpos(f, &off);
+                               continue;
+                       }
+                       if (!check_int(p, "rwmixwrite", &il)) {
+                               if (il > 100)
+                                       il = 100;
+                               td->rwmixread = 100 - il;
+                               fgetpos(f, &off);
+                               continue;
+                       }
+                       if (!check_int(p, "nice", &td->nice)) {
+                               fgetpos(f, &off);
+                               continue;
+                       }
+                       if (!check_range(p, "bsrange", &ul1, &ul2)) {
+                               if (ul1 > ul2) {
+                                       td->max_bs = ul1;
+                                       td->min_bs = ul2;
+                               } else {
+                                       td->max_bs = ul2;
+                                       td->min_bs = ul1;
+                               }
+                               fgetpos(f, &off);
+                               continue;
+                       }
+                       if (!check_strcnv(p, "bs", &ull)) {
+                               td->bs = ull;
+                               fgetpos(f, &off);
+                               continue;
+                       }
+                       if (!check_strcnv(p, "size", &td->file_size)) {
+                               fgetpos(f, &off);
+                               continue;
+                       }
+                       if (!check_strcnv(p, "offset", &td->file_offset)) {
+                               fgetpos(f, &off);
+                               continue;
+                       }
+                       if (!check_strcnv(p, "zonesize", &td->zone_size)) {
+                               fgetpos(f, &off);
+                               continue;
+                       }
+                       if (!check_strcnv(p, "zoneskip", &td->zone_skip)) {
+                               fgetpos(f, &off);
+                               continue;
+                       }
+                       if (!check_strcnv(p, "lockmem", &mlock_size)) {
+                               fgetpos(f, &off);
+                               continue;
+                       }
+                       if (!check_strstore(p, "directory", tmpbuf)) {
+                               td->directory = strdup(tmpbuf);
+                               fgetpos(f, &off);
+                               continue;
+                       }
+                       if (!check_str(p, "mem", str_mem_cb, td)) {
+                               fgetpos(f, &off);
+                               continue;
+                       }
+                       if (!check_str(p, "verify", str_verify_cb, td)) {
+                               fgetpos(f, &off);
+                               continue;
+                       }
+                       if (!check_str(p, "rw", str_rw_cb, td)) {
+                               fgetpos(f, &off);
+                               continue;
+                       }
+                       if (!check_str(p, "ioengine", str_ioengine_cb, td)) {
+                               fgetpos(f, &off);
+                               continue;
+                       }
+                       if (!check_strset(p, "create")) {
+                               td->create_file = 1;
+                               fgetpos(f, &off);
+                               continue;
+                       }
+                       if (!check_strset(p, "exitall")) {
+                               exitall_on_terminate = 1;
+                               fgetpos(f, &off);
+                               continue;
+                       }
+                       if (!check_strset(p, "stonewall")) {
+                               td->stonewall = 1;
+                               fgetpos(f, &off);
+                               continue;
+                       }
+                       if (!check_strset(p, "thread")) {
+                               td->use_thread = 1;
+                               fgetpos(f, &off);
+                               continue;
+                       }
+                       if (!check_str(p, "iolog", str_iolog_cb, td)) {
+                               td->read_iolog = 1;
+                               td->write_iolog = 0;
+                               fgetpos(f, &off);
+                               continue;
+                       }
+                       if (!td->read_iolog &&
+                           !check_str(p, "write_iolog", str_iolog_cb, td)) {
+                               td->write_iolog = 1;
+                               fgetpos(f, &off);
+                               continue;
+                       }
+                       if (!check_str(p, "exec_prerun", str_prerun_cb, td)) {
+                               fgetpos(f, &off);
+                               continue;
+                       }
+                       if (!check_str(p, "exec_postrun", str_postrun_cb, td)) {
+                               fgetpos(f, &off);
+                               continue;
+                       }
+                       if (!check_str(p, "ioscheduler", str_iosched_cb, td)) {
+                               fgetpos(f, &off);
+                               continue;
+                       }
+
+                       printf("Client%d: bad option %s\n",td->thread_number,p);
+                       return 1;
+               }
+               fsetpos(f, &off);
+
+               if (add_job(td, name, 0))
+                       return 1;
+       }
+
+       free(string);
+       free(name);
+       free(tmpbuf);
+       fclose(f);
+       return 0;
+}
+
+static int fill_def_thread(void)
+{
+       memset(&def_thread, 0, sizeof(def_thread));
+
+       if (fio_getaffinity(getpid(), &def_thread.cpumask) == -1) {
+               perror("sched_getaffinity");
+               return 1;
+       }
+
+       /*
+        * fill globals
+        */
+       def_thread.ddir = DDIR_READ;
+       def_thread.iomix = 0;
+       def_thread.bs = DEF_BS;
+       def_thread.min_bs = -1;
+       def_thread.max_bs = -1;
+       def_thread.io_engine = DEF_IO_ENGINE;
+       strcpy(def_thread.io_engine_name, DEF_IO_ENGINE_NAME);
+       def_thread.odirect = DEF_ODIRECT;
+       def_thread.ratecycle = DEF_RATE_CYCLE;
+       def_thread.sequential = DEF_SEQUENTIAL;
+       def_thread.timeout = DEF_TIMEOUT;
+       def_thread.create_file = DEF_CREATE;
+       def_thread.overwrite = DEF_OVERWRITE;
+       def_thread.invalidate_cache = DEF_INVALIDATE;
+       def_thread.sync_io = DEF_SYNCIO;
+       def_thread.mem_type = MEM_MALLOC;
+       def_thread.bw_avg_time = DEF_BWAVGTIME;
+       def_thread.create_serialize = DEF_CREATE_SER;
+       def_thread.create_fsync = DEF_CREATE_FSYNC;
+       def_thread.loops = DEF_LOOPS;
+       def_thread.verify = DEF_VERIFY;
+       def_thread.stonewall = DEF_STONEWALL;
+       def_thread.numjobs = DEF_NUMJOBS;
+       def_thread.use_thread = DEF_USE_THREAD;
+       def_thread.rwmixcycle = DEF_RWMIX_CYCLE;
+       def_thread.rwmixread = DEF_RWMIX_READ;
+       def_thread.nice = DEF_NICE;
+#ifdef FIO_HAVE_DISK_UTIL
+       def_thread.do_disk_util = 1;
+#endif
+
+       return 0;
+}
+
+static void usage(char *name)
+{
+       printf("%s\n", fio_version_string);
+       printf("\t-s IO is sequential\n");
+       printf("\t-b Block size in KiB for each IO\n");
+       printf("\t-t Runtime in seconds\n");
+       printf("\t-R Exit all threads on failure to meet rate goal\n");
+       printf("\t-o Use O_DIRECT\n");
+       printf("\t-l Generate per-job latency logs\n");
+       printf("\t-w Generate per-job bandwidth logs\n");
+       printf("\t-f Job file (Required)\n");
+       printf("\t-v Print version info and exit\n");
+}
+
+static void parse_cmd_line(int argc, char *argv[])
+{
+       int c;
+
+       while ((c = getopt(argc, argv, "s:b:t:r:R:o:f:lwvh")) != EOF) {
+               switch (c) {
+                       case 's':
+                               def_thread.sequential = !!atoi(optarg);
+                               break;
+                       case 'b':
+                               def_thread.bs = atoi(optarg);
+                               def_thread.bs <<= 10;
+                               if (!def_thread.bs) {
+                                       printf("bad block size\n");
+                                       def_thread.bs = DEF_BS;
+                               }
+                               break;
+                       case 't':
+                               def_thread.timeout = atoi(optarg);
+                               break;
+                       case 'r':
+                               repeatable = !!atoi(optarg);
+                               break;
+                       case 'R':
+                               rate_quit = !!atoi(optarg);
+                               break;
+                       case 'o':
+                               def_thread.odirect = !!atoi(optarg);
+                               break;
+                       case 'f':
+                               ini_file = strdup(optarg);
+                               break;
+                       case 'l':
+                               write_lat_log = 1;
+                               break;
+                       case 'w':
+                               write_bw_log = 1;
+                               break;
+                       case 'h':
+                               usage(argv[0]);
+                               exit(0);
+                       case 'v':
+                               printf("%s\n", fio_version_string);
+                               exit(0);
+               }
+       }
+
+       if (!ini_file && argc > 1 && argv[argc - 1][0] != '-')
+               ini_file = strdup(argv[argc - 1]);
+}
+
+static void free_shm(void)
+{
+       struct shmid_ds sbuf;
+
+       if (threads) {
+               shmdt(threads);
+               threads = NULL;
+               shmctl(shm_id, IPC_RMID, &sbuf);
+       }
+}
+
+static int setup_thread_area(void)
+{
+       /*
+        * 1024 is too much on some machines, scale max_jobs if
+        * we get a failure that looks like too large a shm segment
+        */
+       do {
+               int s = max_jobs * sizeof(struct thread_data);
+
+               shm_id = shmget(0, s, IPC_CREAT | 0600);
+               if (shm_id != -1)
+                       break;
+               if (errno != EINVAL) {
+                       perror("shmget");
+                       break;
+               }
+
+               max_jobs >>= 1;
+       } while (max_jobs);
+
+       if (shm_id == -1)
+               return 1;
+
+       threads = shmat(shm_id, NULL, 0);
+       if (threads == (void *) -1) {
+               perror("shmat");
+               return 1;
+       }
+
+       atexit(free_shm);
+       return 0;
+}
+
+int parse_options(int argc, char *argv[])
+{
+       if (setup_thread_area())
+               return 1;
+       if (fill_def_thread())
+               return 1;
+
+       parse_cmd_line(argc, argv);
+
+       if (!ini_file) {
+               printf("Need job file\n");
+               usage(argv[0]);
+               return 1;
+       }
+
+       if (parse_jobs_ini(ini_file)) {
+               usage(argv[0]);
+               return 1;
+       }
+
+       return 0;
+}
diff --git a/ioengines.c b/ioengines.c
new file mode 100644 (file)
index 0000000..7b1c1bd
--- /dev/null
@@ -0,0 +1,919 @@
+/*
+ * The io parts of the fio tool, includes workers for sync and mmap'ed
+ * io, as well as both posix and linux libaio support.
+ *
+ * sync io is implemented on top of aio.
+ *
+ * This is not really specific to fio, if the get_io_u/put_io_u and
+ * structures was pulled into this as well it would be a perfectly
+ * generic io engine that could be used for other projects.
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <assert.h>
+#include <time.h>
+#include <sys/mman.h>
+#include <sys/poll.h>
+#include "fio.h"
+#include "os.h"
+
+#ifdef FIO_HAVE_LIBAIO
+
+#define ev_to_iou(ev)  (struct io_u *) ((unsigned long) (ev)->obj)
+
+static int fio_io_sync(struct thread_data *td)
+{
+       return fsync(td->fd);
+}
+
+static int fill_timespec(struct timespec *ts)
+{
+#ifdef _POSIX_TIMERS
+       if (!clock_gettime(CLOCK_MONOTONIC, ts))
+               return 0;
+
+       perror("clock_gettime");
+#endif
+       return 1;
+}
+
+static unsigned long long ts_utime_since_now(struct timespec *t)
+{
+       long long sec, nsec;
+       struct timespec now;
+
+       if (fill_timespec(&now))
+               return 0;
+       
+       sec = now.tv_sec - t->tv_sec;
+       nsec = now.tv_nsec - t->tv_nsec;
+       if (sec > 0 && nsec < 0) {
+               sec--;
+               nsec += 1000000000;
+       }
+
+       sec *= 1000000;
+       nsec /= 1000;
+       return sec + nsec;
+}
+
+struct libaio_data {
+       io_context_t aio_ctx;
+       struct io_event *aio_events;
+};
+
+static int fio_libaio_io_prep(struct thread_data *td, struct io_u *io_u)
+{
+       if (io_u->ddir == DDIR_READ)
+               io_prep_pread(&io_u->iocb, td->fd, io_u->buf, io_u->buflen, io_u->offset);
+       else
+               io_prep_pwrite(&io_u->iocb, td->fd, io_u->buf, io_u->buflen, io_u->offset);
+
+       return 0;
+}
+
+static struct io_u *fio_libaio_event(struct thread_data *td, int event)
+{
+       struct libaio_data *ld = td->io_data;
+
+       return ev_to_iou(ld->aio_events + event);
+}
+
+static int fio_libaio_getevents(struct thread_data *td, int min, int max,
+                               struct timespec *t)
+{
+       struct libaio_data *ld = td->io_data;
+       int r;
+
+       do {
+               r = io_getevents(ld->aio_ctx, min, max, ld->aio_events, t);
+               if (r == -EAGAIN) {
+                       usleep(100);
+                       continue;
+               } else if (r == -EINTR)
+                       continue;
+               else
+                       break;
+       } while (1);
+
+       return r;
+}
+
+static int fio_libaio_queue(struct thread_data *td, struct io_u *io_u)
+{
+       struct libaio_data *ld = td->io_data;
+       struct iocb *iocb = &io_u->iocb;
+       int ret;
+
+       do {
+               ret = io_submit(ld->aio_ctx, 1, &iocb);
+               if (ret == 1)
+                       return 0;
+               else if (ret == -EAGAIN)
+                       usleep(100);
+               else if (ret == -EINTR)
+                       continue;
+               else
+                       break;
+       } while (1);
+
+       return ret;
+
+}
+
+static int fio_libaio_cancel(struct thread_data *td, struct io_u *io_u)
+{
+       struct libaio_data *ld = td->io_data;
+
+       return io_cancel(ld->aio_ctx, &io_u->iocb, ld->aio_events);
+}
+
+static void fio_libaio_cleanup(struct thread_data *td)
+{
+       struct libaio_data *ld = td->io_data;
+
+       if (ld) {
+               io_destroy(ld->aio_ctx);
+               if (ld->aio_events)
+                       free(ld->aio_events);
+
+               free(ld);
+               td->io_data = NULL;
+       }
+}
+
+int fio_libaio_init(struct thread_data *td)
+{
+       struct libaio_data *ld = malloc(sizeof(*ld));
+
+       memset(ld, 0, sizeof(*ld));
+       if (io_queue_init(td->iodepth, &ld->aio_ctx)) {
+               td_verror(td, errno);
+               return 1;
+       }
+
+       td->io_prep = fio_libaio_io_prep;
+       td->io_queue = fio_libaio_queue;
+       td->io_getevents = fio_libaio_getevents;
+       td->io_event = fio_libaio_event;
+       td->io_cancel = fio_libaio_cancel;
+       td->io_cleanup = fio_libaio_cleanup;
+       td->io_sync = fio_io_sync;
+
+       ld->aio_events = malloc(td->iodepth * sizeof(struct io_event));
+       td->io_data = ld;
+       return 0;
+}
+
+#else /* FIO_HAVE_LIBAIO */
+
+int fio_libaio_init(struct thread_data *td)
+{
+       return EINVAL;
+}
+
+#endif /* FIO_HAVE_LIBAIO */
+
+#ifdef FIO_HAVE_POSIXAIO
+
+struct posixaio_data {
+       struct io_u **aio_events;
+};
+
+static int fio_posixaio_cancel(struct thread_data *td, struct io_u *io_u)
+{
+       int r = aio_cancel(td->fd, &io_u->aiocb);
+
+       if (r == 1 || r == AIO_CANCELED)
+               return 0;
+
+       return 1;
+}
+
+static int fio_posixaio_prep(struct thread_data *td, struct io_u *io_u)
+{
+       struct aiocb *aiocb = &io_u->aiocb;
+
+       aiocb->aio_fildes = td->fd;
+       aiocb->aio_buf = io_u->buf;
+       aiocb->aio_nbytes = io_u->buflen;
+       aiocb->aio_offset = io_u->offset;
+
+       io_u->seen = 0;
+       return 0;
+}
+
+static int fio_posixaio_getevents(struct thread_data *td, int min, int max,
+                                 struct timespec *t)
+{
+       struct posixaio_data *pd = td->io_data;
+       struct list_head *entry;
+       struct timespec start;
+       int r, have_timeout = 0;
+
+       if (t && !fill_timespec(&start))
+               have_timeout = 1;
+
+       r = 0;
+restart:
+       list_for_each(entry, &td->io_u_busylist) {
+               struct io_u *io_u = list_entry(entry, struct io_u, list);
+               int err;
+
+               if (io_u->seen)
+                       continue;
+
+               err = aio_error(&io_u->aiocb);
+               switch (err) {
+                       default:
+                               io_u->error = err;
+                       case ECANCELED:
+                       case 0:
+                               pd->aio_events[r++] = io_u;
+                               io_u->seen = 1;
+                               break;
+                       case EINPROGRESS:
+                               break;
+               }
+
+               if (r >= max)
+                       break;
+       }
+
+       if (r >= min)
+               return r;
+
+       if (have_timeout) {
+               unsigned long long usec;
+
+               usec = (t->tv_sec * 1000000) + (t->tv_nsec / 1000);
+               if (ts_utime_since_now(&start) > usec)
+                       return r;
+       }
+
+       /*
+        * hrmpf, we need to wait for more. we should use aio_suspend, for
+        * now just sleep a little and recheck status of busy-and-not-seen
+        */
+       usleep(1000);
+       goto restart;
+}
+
+static struct io_u *fio_posixaio_event(struct thread_data *td, int event)
+{
+       struct posixaio_data *pd = td->io_data;
+
+       return pd->aio_events[event];
+}
+
+static int fio_posixaio_queue(struct thread_data *td, struct io_u *io_u)
+{
+       struct aiocb *aiocb = &io_u->aiocb;
+       int ret;
+
+       if (io_u->ddir == DDIR_READ)
+               ret = aio_read(aiocb);
+       else
+               ret = aio_write(aiocb);
+
+       if (ret)
+               io_u->error = errno;
+               
+       return io_u->error;
+}
+
+static void fio_posixaio_cleanup(struct thread_data *td)
+{
+       struct posixaio_data *pd = td->io_data;
+
+       if (pd) {
+               free(pd->aio_events);
+               free(pd);
+               td->io_data = NULL;
+       }
+}
+
+int fio_posixaio_init(struct thread_data *td)
+{
+       struct posixaio_data *pd = malloc(sizeof(*pd));
+
+       pd->aio_events = malloc(td->iodepth * sizeof(struct io_u *));
+
+       td->io_prep = fio_posixaio_prep;
+       td->io_queue = fio_posixaio_queue;
+       td->io_getevents = fio_posixaio_getevents;
+       td->io_event = fio_posixaio_event;
+       td->io_cancel = fio_posixaio_cancel;
+       td->io_cleanup = fio_posixaio_cleanup;
+       td->io_sync = fio_io_sync;
+
+       td->io_data = pd;
+       return 0;
+}
+
+#else /* FIO_HAVE_POSIXAIO */
+
+int fio_posixaio_init(struct thread_data *td)
+{
+       return EINVAL;
+}
+
+#endif /* FIO_HAVE_POSIXAIO */
+
+struct syncio_data {
+       struct io_u *last_io_u;
+};
+
+static int fio_syncio_getevents(struct thread_data *td, int min, int max,
+                               struct timespec *t)
+{
+       assert(max <= 1);
+
+       /*
+        * we can only have one finished io_u for sync io, since the depth
+        * is always 1
+        */
+       if (list_empty(&td->io_u_busylist))
+               return 0;
+
+       return 1;
+}
+
+static struct io_u *fio_syncio_event(struct thread_data *td, int event)
+{
+       struct syncio_data *sd = td->io_data;
+
+       assert(event == 0);
+
+       return sd->last_io_u;
+}
+
+static int fio_syncio_prep(struct thread_data *td, struct io_u *io_u)
+{
+       if (lseek(td->fd, io_u->offset, SEEK_SET) == -1) {
+               td_verror(td, errno);
+               return 1;
+       }
+
+       return 0;
+}
+
+static int fio_syncio_queue(struct thread_data *td, struct io_u *io_u)
+{
+       struct syncio_data *sd = td->io_data;
+       int ret;
+
+       if (io_u->ddir == DDIR_READ)
+               ret = read(td->fd, io_u->buf, io_u->buflen);
+       else
+               ret = write(td->fd, io_u->buf, io_u->buflen);
+
+       if ((unsigned int) ret != io_u->buflen) {
+               if (ret > 0) {
+                       io_u->resid = io_u->buflen - ret;
+                       io_u->error = ENODATA;
+               } else
+                       io_u->error = errno;
+       }
+
+       if (!io_u->error)
+               sd->last_io_u = io_u;
+
+       return io_u->error;
+}
+
+static void fio_syncio_cleanup(struct thread_data *td)
+{
+       if (td->io_data) {
+               free(td->io_data);
+               td->io_data = NULL;
+       }
+}
+
+int fio_syncio_init(struct thread_data *td)
+{
+       struct syncio_data *sd = malloc(sizeof(*sd));
+
+       td->io_prep = fio_syncio_prep;
+       td->io_queue = fio_syncio_queue;
+       td->io_getevents = fio_syncio_getevents;
+       td->io_event = fio_syncio_event;
+       td->io_cancel = NULL;
+       td->io_cleanup = fio_syncio_cleanup;
+       td->io_sync = fio_io_sync;
+
+       sd->last_io_u = NULL;
+       td->io_data = sd;
+       return 0;
+}
+
+static int fio_mmapio_queue(struct thread_data *td, struct io_u *io_u)
+{
+       unsigned long long real_off = io_u->offset - td->file_offset;
+       struct syncio_data *sd = td->io_data;
+
+       if (io_u->ddir == DDIR_READ)
+               memcpy(io_u->buf, td->mmap + real_off, io_u->buflen);
+       else
+               memcpy(td->mmap + real_off, io_u->buf, io_u->buflen);
+
+       /*
+        * not really direct, but should drop the pages from the cache
+        */
+       if (td->odirect) {
+               if (msync(td->mmap + real_off, io_u->buflen, MS_SYNC) < 0)
+                       io_u->error = errno;
+               if (madvise(td->mmap + real_off, io_u->buflen,  MADV_DONTNEED) < 0)
+                       io_u->error = errno;
+       }
+
+       if (!io_u->error)
+               sd->last_io_u = io_u;
+
+       return io_u->error;
+}
+
+static int fio_mmapio_sync(struct thread_data *td)
+{
+       return msync(td->mmap, td->file_size, MS_SYNC);
+}
+
+int fio_mmapio_init(struct thread_data *td)
+{
+       struct syncio_data *sd = malloc(sizeof(*sd));
+
+       td->io_prep = NULL;
+       td->io_queue = fio_mmapio_queue;
+       td->io_getevents = fio_syncio_getevents;
+       td->io_event = fio_syncio_event;
+       td->io_cancel = NULL;
+       td->io_cleanup = fio_syncio_cleanup;
+       td->io_sync = fio_mmapio_sync;
+
+       sd->last_io_u = NULL;
+       td->io_data = sd;
+       return 0;
+}
+
+#ifdef FIO_HAVE_SGIO
+
+struct sgio_cmd {
+       unsigned char cdb[10];
+       int nr;
+};
+
+struct sgio_data {
+       struct sgio_cmd *cmds;
+       struct io_u **events;
+       unsigned int bs;
+};
+
+static void sgio_hdr_init(struct sgio_data *sd, struct sg_io_hdr *hdr,
+                         struct io_u *io_u, int fs)
+{
+       struct sgio_cmd *sc = &sd->cmds[io_u->index];
+
+       memset(hdr, 0, sizeof(*hdr));
+       memset(sc->cdb, 0, sizeof(sc->cdb));
+
+       hdr->interface_id = 'S';
+       hdr->cmdp = sc->cdb;
+       hdr->cmd_len = sizeof(sc->cdb);
+       hdr->pack_id = io_u->index;
+       hdr->usr_ptr = io_u;
+
+       if (fs) {
+               hdr->dxferp = io_u->buf;
+               hdr->dxfer_len = io_u->buflen;
+       }
+}
+
+static int fio_sgio_getevents(struct thread_data *td, int min, int max,
+                             struct timespec *t)
+{
+       struct sgio_data *sd = td->io_data;
+       struct pollfd pfd = { .fd = td->fd, .events = POLLIN };
+       void *buf = malloc(max * sizeof(struct sg_io_hdr));
+       int left = max, ret, events, i, r = 0, fl = 0;
+
+       /*
+        * don't block for !events
+        */
+       if (!min) {
+               fl = fcntl(td->fd, F_GETFL);
+               fcntl(td->fd, F_SETFL, fl | O_NONBLOCK);
+       }
+
+       while (left) {
+               do {
+                       if (!min)
+                               break;
+                       poll(&pfd, 1, -1);
+                       if (pfd.revents & POLLIN)
+                               break;
+               } while (1);
+
+               ret = read(td->fd, buf, left * sizeof(struct sg_io_hdr));
+               if (ret < 0) {
+                       if (errno == EAGAIN)
+                               break;
+                       td_verror(td, errno);
+                       r = -1;
+                       break;
+               } else if (!ret)
+                       break;
+
+               events = ret / sizeof(struct sg_io_hdr);
+               left -= events;
+               r += events;
+
+               for (i = 0; i < events; i++) {
+                       struct sg_io_hdr *hdr = (struct sg_io_hdr *) buf + i;
+
+                       sd->events[i] = hdr->usr_ptr;
+               }
+       }
+
+       if (!min)
+               fcntl(td->fd, F_SETFL, fl);
+
+       free(buf);
+       return r;
+}
+
+static int fio_sgio_ioctl_doio(struct thread_data *td, struct io_u *io_u)
+{
+       struct sgio_data *sd = td->io_data;
+       struct sg_io_hdr *hdr = &io_u->hdr;
+
+       sd->events[0] = io_u;
+
+       return ioctl(td->fd, SG_IO, hdr);
+}
+
+static int fio_sgio_rw_doio(struct thread_data *td, struct io_u *io_u, int sync)
+{
+       struct sg_io_hdr *hdr = &io_u->hdr;
+       int ret;
+
+       ret = write(td->fd, hdr, sizeof(*hdr));
+       if (ret < 0)
+               return errno;
+
+       if (sync) {
+               ret = read(td->fd, hdr, sizeof(*hdr));
+               if (ret < 0)
+                       return errno;
+       }
+
+       return 0;
+}
+
+static int fio_sgio_doio(struct thread_data *td, struct io_u *io_u, int sync)
+{
+       if (td->filetype == FIO_TYPE_BD)
+               return fio_sgio_ioctl_doio(td, io_u);
+
+       return fio_sgio_rw_doio(td, io_u, sync);
+}
+
+static int fio_sgio_sync(struct thread_data *td)
+{
+       struct sgio_data *sd = td->io_data;
+       struct sg_io_hdr *hdr;
+       struct io_u *io_u;
+       int ret;
+
+       io_u = __get_io_u(td);
+       if (!io_u)
+               return ENOMEM;
+
+       hdr = &io_u->hdr;
+       sgio_hdr_init(sd, hdr, io_u, 0);
+       hdr->dxfer_direction = SG_DXFER_NONE;
+
+       hdr->cmdp[0] = 0x35;
+
+       ret = fio_sgio_doio(td, io_u, 1);
+       put_io_u(td, io_u);
+       return ret;
+}
+
+static int fio_sgio_prep(struct thread_data *td, struct io_u *io_u)
+{
+       struct sg_io_hdr *hdr = &io_u->hdr;
+       struct sgio_data *sd = td->io_data;
+       int nr_blocks, lba;
+
+       if (io_u->buflen & (sd->bs - 1)) {
+               fprintf(stderr, "read/write not sector aligned\n");
+               return EINVAL;
+       }
+
+       sgio_hdr_init(sd, hdr, io_u, 1);
+
+       if (io_u->ddir == DDIR_READ) {
+               hdr->dxfer_direction = SG_DXFER_FROM_DEV;
+               hdr->cmdp[0] = 0x28;
+       } else {
+               hdr->dxfer_direction = SG_DXFER_TO_DEV;
+               hdr->cmdp[0] = 0x2a;
+       }
+
+       nr_blocks = io_u->buflen / sd->bs;
+       lba = io_u->offset / sd->bs;
+       hdr->cmdp[2] = (lba >> 24) & 0xff;
+       hdr->cmdp[3] = (lba >> 16) & 0xff;
+       hdr->cmdp[4] = (lba >>  8) & 0xff;
+       hdr->cmdp[5] = lba & 0xff;
+       hdr->cmdp[7] = (nr_blocks >> 8) & 0xff;
+       hdr->cmdp[8] = nr_blocks & 0xff;
+       return 0;
+}
+
+static int fio_sgio_queue(struct thread_data *td, struct io_u *io_u)
+{
+       struct sg_io_hdr *hdr = &io_u->hdr;
+       int ret;
+
+       ret = fio_sgio_doio(td, io_u, 0);
+
+       if (ret < 0)
+               io_u->error = errno;
+       else if (hdr->status) {
+               io_u->resid = hdr->resid;
+               io_u->error = EIO;
+       }
+
+       return io_u->error;
+}
+
+static struct io_u *fio_sgio_event(struct thread_data *td, int event)
+{
+       struct sgio_data *sd = td->io_data;
+
+       return sd->events[event];
+}
+
+static int fio_sgio_get_bs(struct thread_data *td, unsigned int *bs)
+{
+       struct sgio_data *sd = td->io_data;
+       struct io_u *io_u;
+       struct sg_io_hdr *hdr;
+       unsigned char buf[8];
+       int ret;
+
+       io_u = __get_io_u(td);
+       assert(io_u);
+
+       hdr = &io_u->hdr;
+       sgio_hdr_init(sd, hdr, io_u, 0);
+       memset(buf, 0, sizeof(buf));
+
+       hdr->cmdp[0] = 0x25;
+       hdr->dxfer_direction = SG_DXFER_FROM_DEV;
+       hdr->dxferp = buf;
+       hdr->dxfer_len = sizeof(buf);
+
+       ret = fio_sgio_doio(td, io_u, 1);
+       if (ret) {
+               put_io_u(td, io_u);
+               return ret;
+       }
+
+       *bs = (buf[4] << 24) | (buf[5] << 16) | (buf[6] << 8) | buf[7];
+       put_io_u(td, io_u);
+       return 0;
+}
+
+int fio_sgio_init(struct thread_data *td)
+{
+       struct sgio_data *sd;
+       unsigned int bs;
+       int ret;
+
+       sd = malloc(sizeof(*sd));
+       sd->cmds = malloc(td->iodepth * sizeof(struct sgio_cmd));
+       sd->events = malloc(td->iodepth * sizeof(struct io_u *));
+       td->io_data = sd;
+
+       if (td->filetype == FIO_TYPE_BD) {
+               if (ioctl(td->fd, BLKSSZGET, &bs) < 0) {
+                       td_verror(td, errno);
+                       return 1;
+               }
+       } else if (td->filetype == FIO_TYPE_CHAR) {
+               int version;
+
+               if (ioctl(td->fd, SG_GET_VERSION_NUM, &version) < 0) {
+                       td_verror(td, errno);
+                       return 1;
+               }
+
+               ret = fio_sgio_get_bs(td, &bs);
+               if (ret)
+                       return ret;
+       } else {
+               fprintf(stderr, "ioengine sgio only works on block devices\n");
+               return 1;
+       }
+
+       sd->bs = bs;
+
+       td->io_prep = fio_sgio_prep;
+       td->io_queue = fio_sgio_queue;
+
+       if (td->filetype == FIO_TYPE_BD)
+               td->io_getevents = fio_syncio_getevents;
+       else
+               td->io_getevents = fio_sgio_getevents;
+
+       td->io_event = fio_sgio_event;
+       td->io_cancel = NULL;
+       td->io_cleanup = fio_syncio_cleanup;
+       td->io_sync = fio_sgio_sync;
+
+       /*
+        * we want to do it, regardless of whether odirect is set or not
+        */
+       td->override_sync = 1;
+       return 0;
+}
+
+#else /* FIO_HAVE_SGIO */
+
+int fio_sgio_init(struct thread_data *td)
+{
+       return EINVAL;
+}
+
+#endif /* FIO_HAVE_SGIO */
+
+#ifdef FIO_HAVE_SPLICE
+struct spliceio_data {
+       struct io_u *last_io_u;
+       int pipe[2];
+};
+
+static struct io_u *fio_spliceio_event(struct thread_data *td, int event)
+{
+       struct spliceio_data *sd = td->io_data;
+
+       assert(event == 0);
+
+       return sd->last_io_u;
+}
+
+/*
+ * For splice reading, we unfortunately cannot (yet) vmsplice the other way.
+ * So just splice the data from the file into the pipe, and use regular
+ * read to fill the buffer. Doesn't make a lot of sense, but...
+ */
+static int fio_splice_read(struct thread_data *td, struct io_u *io_u)
+{
+       struct spliceio_data *sd = td->io_data;
+       int ret, ret2, buflen;
+       off_t offset;
+       void *p;
+
+       offset = io_u->offset;
+       buflen = io_u->buflen;
+       p = io_u->buf;
+       while (buflen) {
+               int this_len = buflen;
+
+               if (this_len > SPLICE_DEF_SIZE)
+                       this_len = SPLICE_DEF_SIZE;
+
+               ret = splice(td->fd, &offset, sd->pipe[1], NULL, this_len, SPLICE_F_MORE);
+               if (ret < 0) {
+                       if (errno == ENODATA || errno == EAGAIN)
+                               continue;
+
+                       return errno;
+               }
+
+               buflen -= ret;
+
+               while (ret) {
+                       ret2 = read(sd->pipe[0], p, ret);
+                       if (ret2 < 0)
+                               return errno;
+
+                       ret -= ret2;
+                       p += ret2;
+               }
+       }
+
+       return io_u->buflen;
+}
+
+/*
+ * For splice writing, we can vmsplice our data buffer directly into a
+ * pipe and then splice that to a file.
+ */
+static int fio_splice_write(struct thread_data *td, struct io_u *io_u)
+{
+       struct spliceio_data *sd = td->io_data;
+       struct iovec iov[1] = {
+               {
+                       .iov_base = io_u->buf,
+                       .iov_len = io_u->buflen,
+               }
+       };
+       struct pollfd pfd = { .fd = sd->pipe[1], .events = POLLOUT, };
+       off_t off = io_u->offset;
+       int ret, ret2;
+
+       while (iov[0].iov_len) {
+               if (poll(&pfd, 1, -1) < 0)
+                       return errno;
+
+               ret = vmsplice(sd->pipe[1], iov, 1, SPLICE_F_NONBLOCK);
+               if (ret < 0)
+                       return errno;
+
+               iov[0].iov_len -= ret;
+               iov[0].iov_base += ret;
+
+               while (ret) {
+                       ret2 = splice(sd->pipe[0], NULL, td->fd, &off, ret, 0);
+                       if (ret2 < 0)
+                               return errno;
+
+                       ret -= ret2;
+               }
+       }
+
+       return io_u->buflen;
+}
+
+static int fio_spliceio_queue(struct thread_data *td, struct io_u *io_u)
+{
+       struct spliceio_data *sd = td->io_data;
+       int ret;
+
+       if (io_u->ddir == DDIR_READ)
+               ret = fio_splice_read(td, io_u);
+       else
+               ret = fio_splice_write(td, io_u);
+
+       if ((unsigned int) ret != io_u->buflen) {
+               if (ret > 0) {
+                       io_u->resid = io_u->buflen - ret;
+                       io_u->error = ENODATA;
+               } else
+                       io_u->error = errno;
+       }
+
+       if (!io_u->error)
+               sd->last_io_u = io_u;
+
+       return io_u->error;
+}
+
+static void fio_spliceio_cleanup(struct thread_data *td)
+{
+       struct spliceio_data *sd = td->io_data;
+
+       if (sd) {
+               close(sd->pipe[0]);
+               close(sd->pipe[1]);
+               free(sd);
+               td->io_data = NULL;
+       }
+}
+
+int fio_spliceio_init(struct thread_data *td)
+{
+       struct spliceio_data *sd = malloc(sizeof(*sd));
+
+       td->io_queue = fio_spliceio_queue;
+       td->io_getevents = fio_syncio_getevents;
+       td->io_event = fio_spliceio_event;
+       td->io_cancel = NULL;
+       td->io_cleanup = fio_spliceio_cleanup;
+       td->io_sync = fio_io_sync;
+
+       sd->last_io_u = NULL;
+       if (pipe(sd->pipe) < 0) {
+               td_verror(td, errno);
+               free(sd);
+               return 1;
+       }
+
+       td->io_data = sd;
+       return 0;
+}
+
+#else /* FIO_HAVE_SPLICE */
+
+int fio_spliceio_init(struct thread_data *td)
+{
+       return EINVAL;
+}
+
+#endif /* FIO_HAVE_SPLICE */
diff --git a/log.c b/log.c
new file mode 100644 (file)
index 0000000..42aedf2
--- /dev/null
+++ b/log.c
@@ -0,0 +1,162 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include "list.h"
+#include "fio.h"
+
+void write_iolog_put(struct thread_data *td, struct io_u *io_u)
+{
+       fprintf(td->iolog_f, "%d,%llu,%u\n", io_u->ddir, io_u->offset, io_u->buflen);
+}
+
+int read_iolog_get(struct thread_data *td, struct io_u *io_u)
+{
+       struct io_piece *ipo;
+
+       if (!list_empty(&td->io_log_list)) {
+               ipo = list_entry(td->io_log_list.next, struct io_piece, list);
+               list_del(&ipo->list);
+               io_u->offset = ipo->offset;
+               io_u->buflen = ipo->len;
+               io_u->ddir = ipo->ddir;
+               free(ipo);
+               return 0;
+       }
+
+       return 1;
+}
+
+void prune_io_piece_log(struct thread_data *td)
+{
+       struct io_piece *ipo;
+
+       while (!list_empty(&td->io_hist_list)) {
+               ipo = list_entry(td->io_hist_list.next, struct io_piece, list);
+
+               list_del(&ipo->list);
+               free(ipo);
+       }
+}
+
+/*
+ * log a succesful write, so we can unwind the log for verify
+ */
+void log_io_piece(struct thread_data *td, struct io_u *io_u)
+{
+       struct io_piece *ipo = malloc(sizeof(struct io_piece));
+       struct list_head *entry;
+
+       INIT_LIST_HEAD(&ipo->list);
+       ipo->offset = io_u->offset;
+       ipo->len = io_u->buflen;
+
+       /*
+        * for random io where the writes extend the file, it will typically
+        * be laid out with the block scattered as written. it's faster to
+        * read them in in that order again, so don't sort
+        */
+       if (td->sequential || !td->overwrite) {
+               list_add_tail(&ipo->list, &td->io_hist_list);
+               return;
+       }
+
+       /*
+        * for random io, sort the list so verify will run faster
+        */
+       entry = &td->io_hist_list;
+       while ((entry = entry->prev) != &td->io_hist_list) {
+               struct io_piece *__ipo = list_entry(entry, struct io_piece, list);
+
+               if (__ipo->offset < ipo->offset)
+                       break;
+       }
+
+       list_add(&ipo->list, entry);
+}
+
+void write_iolog_close(struct thread_data *td)
+{
+       fflush(td->iolog_f);
+       fclose(td->iolog_f);
+       free(td->iolog_buf);
+}
+
+int init_iolog(struct thread_data *td)
+{
+       unsigned long long offset;
+       unsigned int bytes;
+       char *str, *p;
+       FILE *f;
+       int rw, i, reads, writes;
+
+       if (!td->read_iolog && !td->write_iolog)
+               return 0;
+
+       if (td->read_iolog)
+               f = fopen(td->iolog_file, "r");
+       else
+               f = fopen(td->iolog_file, "w");
+
+       if (!f) {
+               perror("fopen iolog");
+               printf("file %s, %d/%d\n", td->iolog_file, td->read_iolog, td->write_iolog);
+               return 1;
+       }
+
+       /*
+        * That's it for writing, setup a log buffer and we're done.
+         */
+       if (td->write_iolog) {
+               td->iolog_f = f;
+               td->iolog_buf = malloc(8192);
+               setvbuf(f, td->iolog_buf, _IOFBF, 8192);
+               return 0;
+       }
+
+       /*
+        * Read in the read iolog and store it, reuse the infrastructure
+        * for doing verifications.
+        */
+       str = malloc(4096);
+       reads = writes = i = 0;
+       while ((p = fgets(str, 4096, f)) != NULL) {
+               struct io_piece *ipo;
+
+               if (sscanf(p, "%d,%llu,%u", &rw, &offset, &bytes) != 3) {
+                       fprintf(stderr, "bad iolog: %s\n", p);
+                       continue;
+               }
+               if (rw == DDIR_READ)
+                       reads++;
+               else if (rw == DDIR_WRITE)
+                       writes++;
+               else {
+                       fprintf(stderr, "bad ddir: %d\n", rw);
+                       continue;
+               }
+
+               ipo = malloc(sizeof(*ipo));
+               INIT_LIST_HEAD(&ipo->list);
+               ipo->offset = offset;
+               ipo->len = bytes;
+               if (bytes > td->max_bs)
+                       td->max_bs = bytes;
+               ipo->ddir = rw;
+               list_add_tail(&ipo->list, &td->io_log_list);
+               i++;
+       }
+
+       free(str);
+       fclose(f);
+
+       if (!i)
+               return 1;
+
+       if (reads && !writes)
+               td->ddir = DDIR_READ;
+       else if (!reads && writes)
+               td->ddir = DDIR_READ;
+       else
+               td->iomix = 1;
+
+       return 0;
+}
diff --git a/stat.c b/stat.c
new file mode 100644 (file)
index 0000000..5452716
--- /dev/null
+++ b/stat.c
@@ -0,0 +1,518 @@
+#include <stdio.h>
+#include <string.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <libgen.h>
+#include <math.h>
+
+#include "fio.h"
+
+static struct itimerval itimer;
+static LIST_HEAD(disk_list);
+
+static int get_io_ticks(struct disk_util *du, struct disk_util_stat *dus)
+{
+       unsigned in_flight;
+       char line[256];
+       FILE *f;
+       char *p;
+
+       f = fopen(du->path, "r");
+       if (!f)
+               return 1;
+
+       p = fgets(line, sizeof(line), f);
+       if (!p) {
+               fclose(f);
+               return 1;
+       }
+
+       if (sscanf(p, "%u %u %llu %u %u %u %llu %u %u %u %u\n", &dus->ios[0], &dus->merges[0], &dus->sectors[0], &dus->ticks[0], &dus->ios[1], &dus->merges[1], &dus->sectors[1], &dus->ticks[1], &in_flight, &dus->io_ticks, &dus->time_in_queue) != 11) {
+               fclose(f);
+               return 1;
+       }
+
+       fclose(f);
+       return 0;
+}
+
+static void update_io_tick_disk(struct disk_util *du)
+{
+       struct disk_util_stat __dus, *dus, *ldus;
+       struct timeval t;
+
+       if (get_io_ticks(du, &__dus))
+               return;
+
+       dus = &du->dus;
+       ldus = &du->last_dus;
+
+       dus->sectors[0] += (__dus.sectors[0] - ldus->sectors[0]);
+       dus->sectors[1] += (__dus.sectors[1] - ldus->sectors[1]);
+       dus->ios[0] += (__dus.ios[0] - ldus->ios[0]);
+       dus->ios[1] += (__dus.ios[1] - ldus->ios[1]);
+       dus->merges[0] += (__dus.merges[0] - ldus->merges[0]);
+       dus->merges[1] += (__dus.merges[1] - ldus->merges[1]);
+       dus->ticks[0] += (__dus.ticks[0] - ldus->ticks[0]);
+       dus->ticks[1] += (__dus.ticks[1] - ldus->ticks[1]);
+       dus->io_ticks += (__dus.io_ticks - ldus->io_ticks);
+       dus->time_in_queue += (__dus.time_in_queue - ldus->time_in_queue);
+
+       gettimeofday(&t, NULL);
+       du->msec += mtime_since(&du->time, &t);
+       memcpy(&du->time, &t, sizeof(t));
+       memcpy(ldus, &__dus, sizeof(__dus));
+}
+
+void update_io_ticks(void)
+{
+       struct list_head *entry;
+       struct disk_util *du;
+
+       list_for_each(entry, &disk_list) {
+               du = list_entry(entry, struct disk_util, list);
+               update_io_tick_disk(du);
+       }
+}
+
+static int disk_util_exists(dev_t dev)
+{
+       struct list_head *entry;
+       struct disk_util *du;
+
+       list_for_each(entry, &disk_list) {
+               du = list_entry(entry, struct disk_util, list);
+
+               if (du->dev == dev)
+                       return 1;
+       }
+
+       return 0;
+}
+
+static void disk_util_add(dev_t dev, char *path)
+{
+       struct disk_util *du = malloc(sizeof(*du));
+
+       memset(du, 0, sizeof(*du));
+       INIT_LIST_HEAD(&du->list);
+       sprintf(du->path, "%s/stat", path);
+       du->name = strdup(basename(path));
+       du->dev = dev;
+
+       gettimeofday(&du->time, NULL);
+       get_io_ticks(du, &du->last_dus);
+
+       list_add_tail(&du->list, &disk_list);
+}
+
+static int check_dev_match(dev_t dev, char *path)
+{
+       unsigned int major, minor;
+       char line[256], *p;
+       FILE *f;
+
+       f = fopen(path, "r");
+       if (!f) {
+               perror("open path");
+               return 1;
+       }
+
+       p = fgets(line, sizeof(line), f);
+       if (!p) {
+               fclose(f);
+               return 1;
+       }
+
+       if (sscanf(p, "%u:%u", &major, &minor) != 2) {
+               fclose(f);
+               return 1;
+       }
+
+       if (((major << 8) | minor) == dev) {
+               fclose(f);
+               return 0;
+       }
+
+       fclose(f);
+       return 1;
+}
+
+static int find_block_dir(dev_t dev, char *path)
+{
+       struct dirent *dir;
+       struct stat st;
+       int found = 0;
+       DIR *D;
+
+       D = opendir(path);
+       if (!D)
+               return 0;
+
+       while ((dir = readdir(D)) != NULL) {
+               char full_path[256];
+
+               if (!strcmp(dir->d_name, ".") || !strcmp(dir->d_name, ".."))
+                       continue;
+               if (!strcmp(dir->d_name, "device"))
+                       continue;
+
+               sprintf(full_path, "%s/%s", path, dir->d_name);
+
+               if (!strcmp(dir->d_name, "dev")) {
+                       if (!check_dev_match(dev, full_path)) {
+                               found = 1;
+                               break;
+                       }
+               }
+
+               if (stat(full_path, &st) == -1) {
+                       perror("stat");
+                       break;
+               }
+
+               if (!S_ISDIR(st.st_mode) || S_ISLNK(st.st_mode))
+                       continue;
+
+               found = find_block_dir(dev, full_path);
+               if (found) {
+                       strcpy(path, full_path);
+                       break;
+               }
+       }
+
+       closedir(D);
+       return found;
+}
+
+void init_disk_util(struct thread_data *td)
+{
+       struct stat st;
+       char foo[256], tmp[256];
+       dev_t dev;
+       char *p;
+
+       if (!td->do_disk_util)
+               return;
+
+       if (!stat(td->file_name, &st)) {
+               if (S_ISBLK(st.st_mode))
+                       dev = st.st_rdev;
+               else
+                       dev = st.st_dev;
+       } else {
+               /*
+                * must be a file, open "." in that path
+                */
+               strcpy(foo, td->file_name);
+               p = dirname(foo);
+               if (stat(p, &st)) {
+                       perror("disk util stat");
+                       return;
+               }
+
+               dev = st.st_dev;
+       }
+
+       if (disk_util_exists(dev))
+               return;
+               
+       sprintf(foo, "/sys/block");
+       if (!find_block_dir(dev, foo))
+               return;
+
+       /*
+        * If there's a ../queue/ directory there, we are inside a partition.
+        * Check if that is the case and jump back. For loop/md/dm etc we
+        * are already in the right spot.
+        */
+       sprintf(tmp, "%s/../queue", foo);
+       if (!stat(tmp, &st)) {
+               p = dirname(foo);
+               sprintf(tmp, "%s/queue", p);
+               if (stat(tmp, &st)) {
+                       fprintf(stderr, "unknown sysfs layout\n");
+                       return;
+               }
+               sprintf(foo, "%s", p);
+       }
+
+       td->sysfs_root = strdup(foo);
+       disk_util_add(dev, foo);
+}
+
+void disk_util_timer_arm(void)
+{
+       itimer.it_value.tv_sec = 0;
+       itimer.it_value.tv_usec = DISK_UTIL_MSEC * 1000;
+       setitimer(ITIMER_REAL, &itimer, NULL);
+}
+
+void update_rusage_stat(struct thread_data *td)
+{
+       if (!(td->runtime[0] + td->runtime[1]))
+               return;
+
+       getrusage(RUSAGE_SELF, &td->ru_end);
+
+       td->usr_time += mtime_since(&td->ru_start.ru_utime, &td->ru_end.ru_utime);
+       td->sys_time += mtime_since(&td->ru_start.ru_stime, &td->ru_end.ru_stime);
+       td->ctx += td->ru_end.ru_nvcsw + td->ru_end.ru_nivcsw - (td->ru_start.ru_nvcsw + td->ru_start.ru_nivcsw);
+
+       
+       memcpy(&td->ru_start, &td->ru_end, sizeof(td->ru_end));
+}
+
+static int calc_lat(struct io_stat *is, unsigned long *min, unsigned long *max,
+                   double *mean, double *dev)
+{
+       double n;
+
+       if (is->samples == 0)
+               return 0;
+
+       *min = is->min_val;
+       *max = is->max_val;
+
+       n = (double) is->samples;
+       *mean = (double) is->val / n;
+       *dev = sqrt(((double) is->val_sq - (*mean * *mean) / n) / (n - 1));
+       if (!(*min + *max) && !(*mean + *dev))
+               return 0;
+
+       return 1;
+}
+
+static void show_group_stats(struct group_run_stats *rs, int id)
+{
+       printf("\nRun status group %d (all jobs):\n", id);
+
+       if (rs->max_run[DDIR_READ])
+               printf("   READ: io=%lluMiB, aggrb=%llu, minb=%llu, maxb=%llu, mint=%llumsec, maxt=%llumsec\n", rs->io_kb[0] >> 10, rs->agg[0], rs->min_bw[0], rs->max_bw[0], rs->min_run[0], rs->max_run[0]);
+       if (rs->max_run[DDIR_WRITE])
+               printf("  WRITE: io=%lluMiB, aggrb=%llu, minb=%llu, maxb=%llu, mint=%llumsec, maxt=%llumsec\n", rs->io_kb[1] >> 10, rs->agg[1], rs->min_bw[1], rs->max_bw[1], rs->min_run[1], rs->max_run[1]);
+}
+
+static void show_disk_util(void)
+{
+       struct disk_util_stat *dus;
+       struct list_head *entry;
+       struct disk_util *du;
+       double util;
+
+       printf("\nDisk stats (read/write):\n");
+
+       list_for_each(entry, &disk_list) {
+               du = list_entry(entry, struct disk_util, list);
+               dus = &du->dus;
+
+               util = (double) 100 * du->dus.io_ticks / (double) du->msec;
+               if (util > 100.0)
+                       util = 100.0;
+
+               printf("  %s: ios=%u/%u, merge=%u/%u, ticks=%u/%u, in_queue=%u, util=%3.2f%%\n", du->name, dus->ios[0], dus->ios[1], dus->merges[0], dus->merges[1], dus->ticks[0], dus->ticks[1], dus->time_in_queue, util);
+       }
+}
+
+static void show_ddir_status(struct thread_data *td, struct group_run_stats *rs,
+                            int ddir)
+{
+       char *ddir_str[] = { "read ", "write" };
+       unsigned long min, max;
+       unsigned long long bw;
+       double mean, dev;
+
+       if (!td->runtime[ddir])
+               return;
+
+       bw = td->io_bytes[ddir] / td->runtime[ddir];
+       printf("  %s: io=%6lluMiB, bw=%6lluKiB/s, runt=%6lumsec\n", ddir_str[ddir], td->io_bytes[ddir] >> 20, bw, td->runtime[ddir]);
+
+       if (calc_lat(&td->slat_stat[ddir], &min, &max, &mean, &dev))
+               printf("    slat (msec): min=%5lu, max=%5lu, avg=%5.02f, dev=%5.02f\n", min, max, mean, dev);
+
+       if (calc_lat(&td->clat_stat[ddir], &min, &max, &mean, &dev))
+               printf("    clat (msec): min=%5lu, max=%5lu, avg=%5.02f, dev=%5.02f\n", min, max, mean, dev);
+
+       if (calc_lat(&td->bw_stat[ddir], &min, &max, &mean, &dev)) {
+               double p_of_agg;
+
+               p_of_agg = mean * 100 / (double) rs->agg[ddir];
+               printf("    bw (KiB/s) : min=%5lu, max=%5lu, per=%3.2f%%, avg=%5.02f, dev=%5.02f\n", min, max, p_of_agg, mean, dev);
+       }
+}
+
+static void show_thread_status(struct thread_data *td,
+                              struct group_run_stats *rs)
+{
+       double usr_cpu, sys_cpu;
+
+       if (!(td->io_bytes[0] + td->io_bytes[1]) && !td->error)
+               return;
+
+       printf("Client%d (groupid=%d): err=%2d:\n", td->thread_number, td->groupid, td->error);
+
+       show_ddir_status(td, rs, td->ddir);
+       if (td->io_bytes[td->ddir ^ 1])
+               show_ddir_status(td, rs, td->ddir ^ 1);
+
+       if (td->runtime[0] + td->runtime[1]) {
+               double runt = td->runtime[0] + td->runtime[1];
+
+               usr_cpu = (double) td->usr_time * 100 / runt;
+               sys_cpu = (double) td->sys_time * 100 / runt;
+       } else {
+               usr_cpu = 0;
+               sys_cpu = 0;
+       }
+
+       printf("  cpu          : usr=%3.2f%%, sys=%3.2f%%, ctx=%lu\n", usr_cpu, sys_cpu, td->ctx);
+}
+
+void show_run_stats(void)
+{
+       struct group_run_stats *runstats, *rs;
+       struct thread_data *td;
+       int i;
+
+       runstats = malloc(sizeof(struct group_run_stats) * (groupid + 1));
+
+       for (i = 0; i < groupid + 1; i++) {
+               rs = &runstats[i];
+
+               memset(rs, 0, sizeof(*rs));
+               rs->min_bw[0] = rs->min_run[0] = ~0UL;
+               rs->min_bw[1] = rs->min_run[1] = ~0UL;
+       }
+
+       for (i = 0; i < thread_number; i++) {
+               unsigned long long rbw, wbw;
+
+               td = &threads[i];
+
+               if (td->error) {
+                       printf("Client%d: %s\n", td->thread_number, td->verror);
+                       continue;
+               }
+
+               rs = &runstats[td->groupid];
+
+               if (td->runtime[0] < rs->min_run[0] || !rs->min_run[0])
+                       rs->min_run[0] = td->runtime[0];
+               if (td->runtime[0] > rs->max_run[0])
+                       rs->max_run[0] = td->runtime[0];
+               if (td->runtime[1] < rs->min_run[1] || !rs->min_run[1])
+                       rs->min_run[1] = td->runtime[1];
+               if (td->runtime[1] > rs->max_run[1])
+                       rs->max_run[1] = td->runtime[1];
+
+               rbw = wbw = 0;
+               if (td->runtime[0])
+                       rbw = td->io_bytes[0] / (unsigned long long) td->runtime[0];
+               if (td->runtime[1])
+                       wbw = td->io_bytes[1] / (unsigned long long) td->runtime[1];
+
+               if (rbw < rs->min_bw[0])
+                       rs->min_bw[0] = rbw;
+               if (wbw < rs->min_bw[1])
+                       rs->min_bw[1] = wbw;
+               if (rbw > rs->max_bw[0])
+                       rs->max_bw[0] = rbw;
+               if (wbw > rs->max_bw[1])
+                       rs->max_bw[1] = wbw;
+
+               rs->io_kb[0] += td->io_bytes[0] >> 10;
+               rs->io_kb[1] += td->io_bytes[1] >> 10;
+       }
+
+       for (i = 0; i < groupid + 1; i++) {
+               rs = &runstats[i];
+
+               if (rs->max_run[0])
+                       rs->agg[0] = (rs->io_kb[0]*1024) / rs->max_run[0];
+               if (rs->max_run[1])
+                       rs->agg[1] = (rs->io_kb[1]*1024) / rs->max_run[1];
+       }
+
+       /*
+        * don't overwrite last signal output
+        */
+       printf("\n");
+
+       for (i = 0; i < thread_number; i++) {
+               td = &threads[i];
+               rs = &runstats[td->groupid];
+
+               show_thread_status(td, rs);
+       }
+
+       for (i = 0; i < groupid + 1; i++)
+               show_group_stats(&runstats[i], i);
+
+       show_disk_util();
+}
+
+static inline void add_stat_sample(struct io_stat *is, unsigned long val)
+{
+       if (val > is->max_val)
+               is->max_val = val;
+       if (val < is->min_val)
+               is->min_val = val;
+
+       is->val += val;
+       is->val_sq += val * val;
+       is->samples++;
+}
+
+static void add_log_sample(struct thread_data *td, struct io_log *iolog,
+                          unsigned long val, int ddir)
+{
+       if (iolog->nr_samples == iolog->max_samples) {
+               int new_size = sizeof(struct io_sample) * iolog->max_samples*2;
+
+               iolog->log = realloc(iolog->log, new_size);
+               iolog->max_samples <<= 1;
+       }
+
+       iolog->log[iolog->nr_samples].val = val;
+       iolog->log[iolog->nr_samples].time = mtime_since_now(&td->epoch);
+       iolog->log[iolog->nr_samples].ddir = ddir;
+       iolog->nr_samples++;
+}
+
+void add_clat_sample(struct thread_data *td, int ddir, unsigned long msec)
+{
+       add_stat_sample(&td->clat_stat[ddir], msec);
+
+       if (td->clat_log)
+               add_log_sample(td, td->clat_log, msec, ddir);
+}
+
+void add_slat_sample(struct thread_data *td, int ddir, unsigned long msec)
+{
+       add_stat_sample(&td->slat_stat[ddir], msec);
+
+       if (td->slat_log)
+               add_log_sample(td, td->slat_log, msec, ddir);
+}
+
+void add_bw_sample(struct thread_data *td, int ddir)
+{
+       unsigned long spent = mtime_since_now(&td->stat_sample_time[ddir]);
+       unsigned long rate;
+
+       if (spent < td->bw_avg_time)
+               return;
+
+       rate = (td->this_io_bytes[ddir] - td->stat_io_bytes[ddir]) / spent;
+       add_stat_sample(&td->bw_stat[ddir], rate);
+
+       if (td->bw_log)
+               add_log_sample(td, td->bw_log, rate, ddir);
+
+       gettimeofday(&td->stat_sample_time[ddir], NULL);
+       td->stat_io_bytes[ddir] = td->this_io_bytes[ddir];
+}
+
+
diff --git a/time.c b/time.c
new file mode 100644 (file)
index 0000000..5246263
--- /dev/null
+++ b/time.c
@@ -0,0 +1,122 @@
+#include <time.h>
+#include <sys/time.h>
+
+#include "fio.h"
+
+unsigned long utime_since(struct timeval *s, struct timeval *e)
+{
+       double sec, usec;
+
+       sec = e->tv_sec - s->tv_sec;
+       usec = e->tv_usec - s->tv_usec;
+       if (sec > 0 && usec < 0) {
+               sec--;
+               usec += 1000000;
+       }
+
+       sec *= (double) 1000000;
+
+       return sec + usec;
+}
+
+static unsigned long utime_since_now(struct timeval *s)
+{
+       struct timeval t;
+
+       gettimeofday(&t, NULL);
+       return utime_since(s, &t);
+}
+
+unsigned long mtime_since(struct timeval *s, struct timeval *e)
+{
+       double sec, usec;
+
+       sec = e->tv_sec - s->tv_sec;
+       usec = e->tv_usec - s->tv_usec;
+       if (sec > 0 && usec < 0) {
+               sec--;
+               usec += 1000000;
+       }
+
+       sec *= (double) 1000;
+       usec /= (double) 1000;
+
+       return sec + usec;
+}
+
+unsigned long mtime_since_now(struct timeval *s)
+{
+       struct timeval t;
+
+       gettimeofday(&t, NULL);
+       return mtime_since(s, &t);
+}
+
+unsigned long time_since_now(struct timeval *s)
+{
+       return mtime_since_now(s) / 1000;
+}
+
+/*
+ * busy looping version for the last few usec
+ */
+static void __usec_sleep(unsigned int usec)
+{
+       struct timeval start;
+
+       gettimeofday(&start, NULL);
+       while (utime_since_now(&start) < usec)
+               nop;
+}
+
+void usec_sleep(struct thread_data *td, unsigned long usec)
+{
+       struct timespec req, rem;
+
+       req.tv_sec = usec / 1000000;
+       req.tv_nsec = usec * 1000 - req.tv_sec * 1000000;
+
+       do {
+               if (usec < 5000) {
+                       __usec_sleep(usec);
+                       break;
+               }
+
+               rem.tv_sec = rem.tv_nsec = 0;
+               if (nanosleep(&req, &rem) < 0)
+                       break;
+
+               if ((rem.tv_sec + rem.tv_nsec) == 0)
+                       break;
+
+               req.tv_nsec = rem.tv_nsec;
+               req.tv_sec = rem.tv_sec;
+
+               usec = rem.tv_sec * 1000000 + rem.tv_nsec / 1000;
+       } while (!td->terminate);
+}
+
+void rate_throttle(struct thread_data *td, unsigned long time_spent,
+                  unsigned int bytes)
+{
+       unsigned long usec_cycle;
+
+       if (!td->rate)
+               return;
+
+       usec_cycle = td->rate_usec_cycle * (bytes / td->min_bs);
+
+       if (time_spent < usec_cycle) {
+               unsigned long s = usec_cycle - time_spent;
+
+               td->rate_pending_usleep += s;
+               if (td->rate_pending_usleep >= 100000) {
+                       usec_sleep(td, td->rate_pending_usleep);
+                       td->rate_pending_usleep = 0;
+               }
+       } else {
+               long overtime = time_spent - usec_cycle;
+
+               td->rate_pending_usleep -= overtime;
+       }
+}