all: depend $(PROGS) $(SCRIPTS)
-fio: fio.o fio-io.o fio-ini.o fio-stat.o fio-log.o fio-time.o md5.o crc32.o
+fio: fio.o ioengines.o init.o stat.o log.o time.o md5.o crc32.o
$(CC) $(CFLAGS) -o $@ $(filter %.o,$^) -lpthread -laio -lm -lrt
clean:
+++ /dev/null
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <ctype.h>
-#include <string.h>
-#include <errno.h>
-#include <sys/ipc.h>
-#include <sys/shm.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "fio.h"
-
-#define DEF_BS (4096)
-#define DEF_TIMEOUT (0)
-#define DEF_RATE_CYCLE (1000)
-#define DEF_ODIRECT (1)
-#define DEF_IO_ENGINE (FIO_SYNCIO)
-#define DEF_IO_ENGINE_NAME "sync"
-#define DEF_SEQUENTIAL (1)
-#define DEF_RAND_REPEAT (1)
-#define DEF_OVERWRITE (1)
-#define DEF_CREATE (1)
-#define DEF_INVALIDATE (1)
-#define DEF_SYNCIO (0)
-#define DEF_RANDSEED (0xb1899bedUL)
-#define DEF_BWAVGTIME (500)
-#define DEF_CREATE_SER (1)
-#define DEF_CREATE_FSYNC (1)
-#define DEF_LOOPS (1)
-#define DEF_VERIFY (0)
-#define DEF_STONEWALL (0)
-#define DEF_NUMJOBS (1)
-#define DEF_USE_THREAD (0)
-#define DEF_FILE_SIZE (1024 * 1024 * 1024UL)
-#define DEF_ZONE_SIZE (0)
-#define DEF_ZONE_SKIP (0)
-#define DEF_RWMIX_CYCLE (500)
-#define DEF_RWMIX_READ (50)
-#define DEF_NICE (0)
-
-static char fio_version_string[] = "fio 1.4";
-
-static int repeatable = DEF_RAND_REPEAT;
-static char *ini_file;
-static int max_jobs = MAX_JOBS;
-
-struct thread_data def_thread;
-struct thread_data *threads = NULL;
-
-int rate_quit = 0;
-int write_lat_log = 0;
-int write_bw_log = 0;
-int exitall_on_terminate = 0;
-unsigned long long mlock_size = 0;
-
-static int setup_rate(struct thread_data *td)
-{
- int nr_reads_per_sec;
-
- if (!td->rate)
- return 0;
-
- if (td->rate < td->ratemin) {
- fprintf(stderr, "min rate larger than nominal rate\n");
- return -1;
- }
-
- nr_reads_per_sec = (td->rate * 1024) / td->min_bs;
- td->rate_usec_cycle = 1000000 / nr_reads_per_sec;
- td->rate_pending_usleep = 0;
- return 0;
-}
-
-static void setup_log(struct io_log **log)
-{
- struct io_log *l = malloc(sizeof(*l));
-
- l->nr_samples = 0;
- l->max_samples = 1024;
- l->log = malloc(l->max_samples * sizeof(struct io_sample));
- *log = l;
-}
-
-void finish_log(struct thread_data *td, struct io_log *log, const char *name)
-{
- char file_name[256];
- FILE *f;
- unsigned int i;
-
- snprintf(file_name, 200, "client%d_%s.log", td->thread_number, name);
- f = fopen(file_name, "w");
- if (!f) {
- perror("fopen log");
- return;
- }
-
- for (i = 0; i < log->nr_samples; i++)
- fprintf(f, "%lu, %lu, %u\n", log->log[i].time, log->log[i].val, log->log[i].ddir);
-
- fclose(f);
- free(log->log);
- free(log);
-}
-
-static struct thread_data *get_new_job(int global, struct thread_data *parent)
-{
- struct thread_data *td;
-
- if (global)
- return &def_thread;
- if (thread_number >= max_jobs)
- return NULL;
-
- td = &threads[thread_number++];
- if (parent)
- *td = *parent;
- else
- memset(td, 0, sizeof(*td));
-
- td->fd = -1;
- td->thread_number = thread_number;
-
- td->ddir = parent->ddir;
- td->ioprio = parent->ioprio;
- td->sequential = parent->sequential;
- td->bs = parent->bs;
- td->min_bs = parent->min_bs;
- td->max_bs = parent->max_bs;
- td->odirect = parent->odirect;
- td->thinktime = parent->thinktime;
- td->fsync_blocks = parent->fsync_blocks;
- td->start_delay = parent->start_delay;
- td->timeout = parent->timeout;
- td->io_engine = parent->io_engine;
- td->create_file = parent->create_file;
- td->overwrite = parent->overwrite;
- td->invalidate_cache = parent->invalidate_cache;
- td->file_size = parent->file_size;
- td->file_offset = parent->file_offset;
- td->zone_size = parent->zone_size;
- td->zone_skip = parent->zone_skip;
- td->rate = parent->rate;
- td->ratemin = parent->ratemin;
- td->ratecycle = parent->ratecycle;
- td->iodepth = parent->iodepth;
- td->sync_io = parent->sync_io;
- td->mem_type = parent->mem_type;
- td->bw_avg_time = parent->bw_avg_time;
- td->create_serialize = parent->create_serialize;
- td->create_fsync = parent->create_fsync;
- td->loops = parent->loops;
- td->verify = parent->verify;
- td->stonewall = parent->stonewall;
- td->numjobs = parent->numjobs;
- td->use_thread = parent->use_thread;
- td->do_disk_util = parent->do_disk_util;
- memcpy(&td->cpumask, &parent->cpumask, sizeof(td->cpumask));
- strcpy(td->io_engine_name, parent->io_engine_name);
-
- return td;
-}
-
-static void put_job(struct thread_data *td)
-{
- memset(&threads[td->thread_number - 1], 0, sizeof(*td));
- thread_number--;
-}
-
-static int add_job(struct thread_data *td, const char *jobname, int job_add_num)
-{
- char *ddir_str[] = { "read", "write", "randread", "randwrite",
- "rw", NULL, "randrw" };
- struct stat sb;
- int numjobs, ddir;
-
-#ifndef FIO_HAVE_LIBAIO
- if (td->io_engine == FIO_LIBAIO) {
- fprintf(stderr, "Linux libaio not available\n");
- return 1;
- }
-#endif
-#ifndef FIO_HAVE_POSIXAIO
- if (td->io_engine == FIO_POSIXAIO) {
- fprintf(stderr, "posix aio not available\n");
- return 1;
- }
-#endif
-
- /*
- * the def_thread is just for options, it's not a real job
- */
- if (td == &def_thread)
- return 0;
-
- if (td->io_engine & FIO_SYNCIO)
- td->iodepth = 1;
- else {
- if (!td->iodepth)
- td->iodepth = 1;
- }
-
- /*
- * only really works for sequential io for now
- */
- if (td->zone_size && !td->sequential)
- td->zone_size = 0;
-
- td->filetype = FIO_TYPE_FILE;
- if (!stat(jobname, &sb)) {
- if (S_ISBLK(sb.st_mode))
- td->filetype = FIO_TYPE_BD;
- else if (S_ISCHR(sb.st_mode))
- td->filetype = FIO_TYPE_CHAR;
- }
-
- if (td->filetype == FIO_TYPE_FILE) {
- if (td->directory && td->directory[0] != '\0')
- sprintf(td->file_name, "%s/%s.%d", td->directory, jobname, td->jobnum);
- else
- sprintf(td->file_name, "%s.%d", jobname, td->jobnum);
- } else
- strncpy(td->file_name, jobname, sizeof(td->file_name) - 1);
-
- sem_init(&td->mutex, 0, 0);
-
- td->clat_stat[0].min_val = td->clat_stat[1].min_val = ULONG_MAX;
- td->slat_stat[0].min_val = td->slat_stat[1].min_val = ULONG_MAX;
- td->bw_stat[0].min_val = td->bw_stat[1].min_val = ULONG_MAX;
-
- if (td->min_bs == -1U)
- td->min_bs = td->bs;
- if (td->max_bs == -1U)
- td->max_bs = td->bs;
- if (td_read(td) && !td_rw(td))
- td->verify = 0;
-
- if (td->stonewall && td->thread_number > 1)
- groupid++;
-
- td->groupid = groupid;
-
- if (setup_rate(td))
- goto err;
-
- if (write_lat_log) {
- setup_log(&td->slat_log);
- setup_log(&td->clat_log);
- }
- if (write_bw_log)
- setup_log(&td->bw_log);
-
- ddir = td->ddir + (!td->sequential << 1) + (td->iomix << 2);
-
- if (!job_add_num)
- printf("Client%d (g=%d): rw=%s, odir=%d, bs=%d-%d, rate=%d, ioengine=%s, iodepth=%d\n", td->thread_number, td->groupid, ddir_str[ddir], td->odirect, td->min_bs, td->max_bs, td->rate, td->io_engine_name, td->iodepth);
- else if (job_add_num == 1)
- printf("...\n");
-
- /*
- * recurse add identical jobs, clear numjobs and stonewall options
- * as they don't apply to sub-jobs
- */
- numjobs = td->numjobs;
- while (--numjobs) {
- struct thread_data *td_new = get_new_job(0, td);
-
- if (!td_new)
- goto err;
-
- td_new->numjobs = 1;
- td_new->stonewall = 0;
- td_new->jobnum = numjobs;
- job_add_num = numjobs - 1;
-
- if (add_job(td_new, jobname, job_add_num))
- goto err;
- }
- return 0;
-err:
- put_job(td);
- return -1;
-}
-
-int init_random_state(struct thread_data *td)
-{
- unsigned long seeds[4];
- int fd, num_maps, blocks;
-
- fd = open("/dev/urandom", O_RDONLY);
- if (fd == -1) {
- td_verror(td, errno);
- return 1;
- }
-
- if (read(fd, seeds, sizeof(seeds)) < (int) sizeof(seeds)) {
- td_verror(td, EIO);
- close(fd);
- return 1;
- }
-
- close(fd);
-
- srand48_r(seeds[0], &td->bsrange_state);
- srand48_r(seeds[1], &td->verify_state);
- srand48_r(seeds[2], &td->rwmix_state);
-
- if (td->sequential)
- return 0;
-
- if (repeatable)
- seeds[3] = DEF_RANDSEED;
-
- blocks = (td->io_size + td->min_bs - 1) / td->min_bs;
- num_maps = blocks / BLOCKS_PER_MAP;
- td->file_map = malloc(num_maps * sizeof(long));
- td->num_maps = num_maps;
- memset(td->file_map, 0, num_maps * sizeof(long));
-
- srand48_r(seeds[3], &td->random_state);
- return 0;
-}
-
-static void fill_cpu_mask(os_cpu_mask_t cpumask, int cpu)
-{
-#ifdef FIO_HAVE_CPU_AFFINITY
- unsigned int i;
-
- CPU_ZERO(&cpumask);
-
- for (i = 0; i < sizeof(int) * 8; i++) {
- if ((1 << i) & cpu)
- CPU_SET(i, &cpumask);
- }
-#endif
-}
-
-static unsigned long get_mult(char c)
-{
- switch (c) {
- case 'k':
- case 'K':
- return 1024;
- case 'm':
- case 'M':
- return 1024 * 1024;
- case 'g':
- case 'G':
- return 1024 * 1024 * 1024;
- default:
- return 1;
- }
-}
-
-/*
- * convert string after '=' into decimal value, noting any size suffix
- */
-static int str_cnv(char *p, unsigned long long *val)
-{
- char *str;
- int len;
-
- str = strchr(p, '=');
- if (!str)
- return 1;
-
- str++;
- len = strlen(str);
-
- *val = strtoul(str, NULL, 10);
- if (*val == ULONG_MAX && errno == ERANGE)
- return 1;
-
- *val *= get_mult(str[len - 1]);
- return 0;
-}
-
-static int check_strcnv(char *p, char *name, unsigned long long *val)
-{
- if (strncmp(p, name, strlen(name) - 1))
- return 1;
-
- return str_cnv(p, val);
-}
-
-static void strip_blank_front(char **p)
-{
- char *s = *p;
-
- while (isspace(*s))
- s++;
-}
-
-static void strip_blank_end(char *p)
-{
- char *s = p + strlen(p) - 1;
-
- while (isspace(*s) || iscntrl(*s))
- s--;
-
- *(s + 1) = '\0';
-}
-
-typedef int (str_cb_fn)(struct thread_data *, char *);
-
-static int check_str(char *p, char *name, str_cb_fn *cb, struct thread_data *td)
-{
- char *s;
-
- if (strncmp(p, name, strlen(name)))
- return 1;
-
- s = strstr(p, name);
- if (!s)
- return 1;
-
- s = strchr(s, '=');
- if (!s)
- return 1;
-
- s++;
- strip_blank_front(&s);
- return cb(td, s);
-}
-
-static int check_strstore(char *p, char *name, char *dest)
-{
- char *s;
-
- if (strncmp(p, name, strlen(name)))
- return 1;
-
- s = strstr(p, name);
- if (!s)
- return 1;
-
- s = strchr(p, '=');
- if (!s)
- return 1;
-
- s++;
- strip_blank_front(&s);
-
- strcpy(dest, s);
- return 0;
-}
-
-static int __check_range(char *str, unsigned long *val)
-{
- char suffix;
-
- if (sscanf(str, "%lu%c", val, &suffix) == 2) {
- *val *= get_mult(suffix);
- return 0;
- }
-
- if (sscanf(str, "%lu", val) == 1)
- return 0;
-
- return 1;
-}
-
-static int check_range(char *p, char *name, unsigned long *s, unsigned long *e)
-{
- char option[128];
- char *str, *p1, *p2;
-
- if (strncmp(p, name, strlen(name)))
- return 1;
-
- strcpy(option, p);
- p = option;
-
- str = strstr(p, name);
- if (!str)
- return 1;
-
- p += strlen(name);
-
- str = strchr(p, '=');
- if (!str)
- return 1;
-
- /*
- * 'p' now holds whatever is after the '=' sign
- */
- p1 = str + 1;
-
- /*
- * terminate p1 at the '-' sign
- */
- p = strchr(p1, '-');
- if (!p)
- return 1;
-
- p2 = p + 1;
- *p = '\0';
-
- if (!__check_range(p1, s) && !__check_range(p2, e))
- return 0;
-
- return 1;
-}
-
-static int check_int(char *p, char *name, unsigned int *val)
-{
- char *str;
-
- if (strncmp(p, name, strlen(name)))
- return 1;
-
- str = strstr(p, name);
- if (!str)
- return 1;
-
- str = strchr(p, '=');
- if (!str)
- return 1;
-
- str++;
-
- if (sscanf(str, "%u", val) == 1)
- return 0;
-
- return 1;
-}
-
-static int check_strset(char *p, char *name)
-{
- return strncmp(p, name, strlen(name));
-}
-
-static int is_empty_or_comment(char *line)
-{
- unsigned int i;
-
- for (i = 0; i < strlen(line); i++) {
- if (line[i] == ';')
- return 1;
- if (!isspace(line[i]) && !iscntrl(line[i]))
- return 0;
- }
-
- return 1;
-}
-
-static int str_rw_cb(struct thread_data *td, char *mem)
-{
- if (!strncmp(mem, "read", 4) || !strncmp(mem, "0", 1)) {
- td->ddir = DDIR_READ;
- td->sequential = 1;
- return 0;
- } else if (!strncmp(mem, "randread", 8)) {
- td->ddir = DDIR_READ;
- td->sequential = 0;
- return 0;
- } else if (!strncmp(mem, "write", 5) || !strncmp(mem, "1", 1)) {
- td->ddir = DDIR_WRITE;
- td->sequential = 1;
- return 0;
- } else if (!strncmp(mem, "randwrite", 9)) {
- td->ddir = DDIR_WRITE;
- td->sequential = 0;
- return 0;
- } else if (!strncmp(mem, "rw", 2)) {
- td->ddir = 0;
- td->iomix = 1;
- td->sequential = 1;
- return 0;
- } else if (!strncmp(mem, "randrw", 6)) {
- td->ddir = 0;
- td->iomix = 1;
- td->sequential = 0;
- return 0;
- }
-
- fprintf(stderr, "bad data direction: %s\n", mem);
- return 1;
-}
-
-static int str_verify_cb(struct thread_data *td, char *mem)
-{
- if (!strncmp(mem, "0", 1)) {
- td->verify = VERIFY_NONE;
- return 0;
- } else if (!strncmp(mem, "md5", 3) || !strncmp(mem, "1", 1)) {
- td->verify = VERIFY_MD5;
- return 0;
- } else if (!strncmp(mem, "crc32", 5)) {
- td->verify = VERIFY_CRC32;
- return 0;
- }
-
- fprintf(stderr, "bad verify type: %s\n", mem);
- return 1;
-}
-
-static int str_mem_cb(struct thread_data *td, char *mem)
-{
- if (!strncmp(mem, "malloc", 6)) {
- td->mem_type = MEM_MALLOC;
- return 0;
- } else if (!strncmp(mem, "shm", 3)) {
- td->mem_type = MEM_SHM;
- return 0;
- } else if (!strncmp(mem, "mmap", 4)) {
- td->mem_type = MEM_MMAP;
- return 0;
- }
-
- fprintf(stderr, "bad mem type: %s\n", mem);
- return 1;
-}
-
-static int str_ioengine_cb(struct thread_data *td, char *str)
-{
- if (!strncmp(str, "linuxaio", 8) || !strncmp(str, "aio", 3) ||
- !strncmp(str, "libaio", 6)) {
- strcpy(td->io_engine_name, "libaio");
- td->io_engine = FIO_LIBAIO;
- return 0;
- } else if (!strncmp(str, "posixaio", 8)) {
- strcpy(td->io_engine_name, "posixaio");
- td->io_engine = FIO_POSIXAIO;
- return 0;
- } else if (!strncmp(str, "sync", 4)) {
- strcpy(td->io_engine_name, "sync");
- td->io_engine = FIO_SYNCIO;
- return 0;
- } else if (!strncmp(str, "mmap", 4)) {
- strcpy(td->io_engine_name, "mmap");
- td->io_engine = FIO_MMAPIO;
- return 0;
- } else if (!strncmp(str, "sgio", 4)) {
- strcpy(td->io_engine_name, "sgio");
- td->io_engine = FIO_SGIO;
- return 0;
- } else if (!strncmp(str, "splice", 6)) {
- strcpy(td->io_engine_name, "splice");
- td->io_engine = FIO_SPLICEIO;
- return 0;
- }
-
- fprintf(stderr, "bad ioengine type: %s\n", str);
- return 1;
-}
-
-static int str_iolog_cb(struct thread_data *td, char *file)
-{
- td->iolog_file = strdup(file);
- return 0;
-}
-
-static int str_prerun_cb(struct thread_data *td, char *file)
-{
- td->exec_prerun = strdup(file);
- return 0;
-}
-
-static int str_postrun_cb(struct thread_data *td, char *file)
-{
- td->exec_postrun = strdup(file);
- return 0;
-}
-
-static int str_iosched_cb(struct thread_data *td, char *file)
-{
- td->ioscheduler = strdup(file);
- return 0;
-}
-
-int parse_jobs_ini(char *file)
-{
- unsigned int prioclass, prio, cpu, global, il;
- unsigned long long ull;
- unsigned long ul1, ul2;
- struct thread_data *td;
- char *string, *name, *tmpbuf;
- fpos_t off;
- FILE *f;
- char *p;
-
- f = fopen(file, "r");
- if (!f) {
- perror("fopen job file");
- return 1;
- }
-
- string = malloc(4096);
- name = malloc(256);
- tmpbuf = malloc(4096);
-
- while ((p = fgets(string, 4096, f)) != NULL) {
- if (is_empty_or_comment(p))
- continue;
- if (sscanf(p, "[%s]", name) != 1)
- continue;
-
- global = !strncmp(name, "global", 6);
-
- name[strlen(name) - 1] = '\0';
-
- td = get_new_job(global, &def_thread);
- if (!td)
- return 1;
-
- fgetpos(f, &off);
- while ((p = fgets(string, 4096, f)) != NULL) {
- if (is_empty_or_comment(p))
- continue;
- if (strstr(p, "["))
- break;
- strip_blank_front(&p);
- strip_blank_end(p);
-
- if (!check_int(p, "prio", &prio)) {
-#ifndef FIO_HAVE_IOPRIO
- fprintf(stderr, "io priorities not available\n");
- return 1;
-#endif
- td->ioprio |= prio;
- fgetpos(f, &off);
- continue;
- }
- if (!check_int(p, "prioclass", &prioclass)) {
-#ifndef FIO_HAVE_IOPRIO
- fprintf(stderr, "io priorities not available\n");
- return 1;
-#endif
- td->ioprio |= prioclass << IOPRIO_CLASS_SHIFT;
- fgetpos(f, &off);
- continue;
- }
- if (!check_int(p, "direct", &td->odirect)) {
- fgetpos(f, &off);
- continue;
- }
- if (!check_int(p, "rate", &td->rate)) {
- fgetpos(f, &off);
- continue;
- }
- if (!check_int(p, "ratemin", &td->ratemin)) {
- fgetpos(f, &off);
- continue;
- }
- if (!check_int(p, "ratecycle", &td->ratecycle)) {
- fgetpos(f, &off);
- continue;
- }
- if (!check_int(p, "thinktime", &td->thinktime)) {
- fgetpos(f, &off);
- continue;
- }
- if (!check_int(p, "cpumask", &cpu)) {
-#ifndef FIO_HAVE_CPU_AFFINITY
- fprintf(stderr, "cpu affinity not available\n");
- return 1;
-#endif
- fill_cpu_mask(td->cpumask, cpu);
- fgetpos(f, &off);
- continue;
- }
- if (!check_int(p, "fsync", &td->fsync_blocks)) {
- fgetpos(f, &off);
- td->end_fsync = 1;
- continue;
- }
- if (!check_int(p, "startdelay", &td->start_delay)) {
- fgetpos(f, &off);
- continue;
- }
- if (!check_int(p, "timeout", &td->timeout)) {
- fgetpos(f, &off);
- continue;
- }
- if (!check_int(p, "invalidate",&td->invalidate_cache)) {
- fgetpos(f, &off);
- continue;
- }
- if (!check_int(p, "iodepth", &td->iodepth)) {
- fgetpos(f, &off);
- continue;
- }
- if (!check_int(p, "sync", &td->sync_io)) {
- fgetpos(f, &off);
- continue;
- }
- if (!check_int(p, "bwavgtime", &td->bw_avg_time)) {
- fgetpos(f, &off);
- continue;
- }
- if (!check_int(p, "create_serialize", &td->create_serialize)) {
- fgetpos(f, &off);
- continue;
- }
- if (!check_int(p, "create_fsync", &td->create_fsync)) {
- fgetpos(f, &off);
- continue;
- }
- if (!check_int(p, "end_fsync", &td->end_fsync)) {
- fgetpos(f, &off);
- continue;
- }
- if (!check_int(p, "loops", &td->loops)) {
- fgetpos(f, &off);
- continue;
- }
- if (!check_int(p, "numjobs", &td->numjobs)) {
- fgetpos(f, &off);
- continue;
- }
- if (!check_int(p, "overwrite", &td->overwrite)) {
- fgetpos(f, &off);
- continue;
- }
- if (!check_int(p, "rwmixcycle", &td->rwmixcycle)) {
- fgetpos(f, &off);
- continue;
- }
- if (!check_int(p, "rwmixread", &il)) {
- if (il > 100)
- il = 100;
- td->rwmixread = il;
- fgetpos(f, &off);
- continue;
- }
- if (!check_int(p, "rwmixwrite", &il)) {
- if (il > 100)
- il = 100;
- td->rwmixread = 100 - il;
- fgetpos(f, &off);
- continue;
- }
- if (!check_int(p, "nice", &td->nice)) {
- fgetpos(f, &off);
- continue;
- }
- if (!check_range(p, "bsrange", &ul1, &ul2)) {
- if (ul1 > ul2) {
- td->max_bs = ul1;
- td->min_bs = ul2;
- } else {
- td->max_bs = ul2;
- td->min_bs = ul1;
- }
- fgetpos(f, &off);
- continue;
- }
- if (!check_strcnv(p, "bs", &ull)) {
- td->bs = ull;
- fgetpos(f, &off);
- continue;
- }
- if (!check_strcnv(p, "size", &td->file_size)) {
- fgetpos(f, &off);
- continue;
- }
- if (!check_strcnv(p, "offset", &td->file_offset)) {
- fgetpos(f, &off);
- continue;
- }
- if (!check_strcnv(p, "zonesize", &td->zone_size)) {
- fgetpos(f, &off);
- continue;
- }
- if (!check_strcnv(p, "zoneskip", &td->zone_skip)) {
- fgetpos(f, &off);
- continue;
- }
- if (!check_strcnv(p, "lockmem", &mlock_size)) {
- fgetpos(f, &off);
- continue;
- }
- if (!check_strstore(p, "directory", tmpbuf)) {
- td->directory = strdup(tmpbuf);
- fgetpos(f, &off);
- continue;
- }
- if (!check_str(p, "mem", str_mem_cb, td)) {
- fgetpos(f, &off);
- continue;
- }
- if (!check_str(p, "verify", str_verify_cb, td)) {
- fgetpos(f, &off);
- continue;
- }
- if (!check_str(p, "rw", str_rw_cb, td)) {
- fgetpos(f, &off);
- continue;
- }
- if (!check_str(p, "ioengine", str_ioengine_cb, td)) {
- fgetpos(f, &off);
- continue;
- }
- if (!check_strset(p, "create")) {
- td->create_file = 1;
- fgetpos(f, &off);
- continue;
- }
- if (!check_strset(p, "exitall")) {
- exitall_on_terminate = 1;
- fgetpos(f, &off);
- continue;
- }
- if (!check_strset(p, "stonewall")) {
- td->stonewall = 1;
- fgetpos(f, &off);
- continue;
- }
- if (!check_strset(p, "thread")) {
- td->use_thread = 1;
- fgetpos(f, &off);
- continue;
- }
- if (!check_str(p, "iolog", str_iolog_cb, td)) {
- td->read_iolog = 1;
- td->write_iolog = 0;
- fgetpos(f, &off);
- continue;
- }
- if (!td->read_iolog &&
- !check_str(p, "write_iolog", str_iolog_cb, td)) {
- td->write_iolog = 1;
- fgetpos(f, &off);
- continue;
- }
- if (!check_str(p, "exec_prerun", str_prerun_cb, td)) {
- fgetpos(f, &off);
- continue;
- }
- if (!check_str(p, "exec_postrun", str_postrun_cb, td)) {
- fgetpos(f, &off);
- continue;
- }
- if (!check_str(p, "ioscheduler", str_iosched_cb, td)) {
- fgetpos(f, &off);
- continue;
- }
-
- printf("Client%d: bad option %s\n",td->thread_number,p);
- return 1;
- }
- fsetpos(f, &off);
-
- if (add_job(td, name, 0))
- return 1;
- }
-
- free(string);
- free(name);
- free(tmpbuf);
- fclose(f);
- return 0;
-}
-
-static int fill_def_thread(void)
-{
- memset(&def_thread, 0, sizeof(def_thread));
-
- if (fio_getaffinity(getpid(), &def_thread.cpumask) == -1) {
- perror("sched_getaffinity");
- return 1;
- }
-
- /*
- * fill globals
- */
- def_thread.ddir = DDIR_READ;
- def_thread.iomix = 0;
- def_thread.bs = DEF_BS;
- def_thread.min_bs = -1;
- def_thread.max_bs = -1;
- def_thread.io_engine = DEF_IO_ENGINE;
- strcpy(def_thread.io_engine_name, DEF_IO_ENGINE_NAME);
- def_thread.odirect = DEF_ODIRECT;
- def_thread.ratecycle = DEF_RATE_CYCLE;
- def_thread.sequential = DEF_SEQUENTIAL;
- def_thread.timeout = DEF_TIMEOUT;
- def_thread.create_file = DEF_CREATE;
- def_thread.overwrite = DEF_OVERWRITE;
- def_thread.invalidate_cache = DEF_INVALIDATE;
- def_thread.sync_io = DEF_SYNCIO;
- def_thread.mem_type = MEM_MALLOC;
- def_thread.bw_avg_time = DEF_BWAVGTIME;
- def_thread.create_serialize = DEF_CREATE_SER;
- def_thread.create_fsync = DEF_CREATE_FSYNC;
- def_thread.loops = DEF_LOOPS;
- def_thread.verify = DEF_VERIFY;
- def_thread.stonewall = DEF_STONEWALL;
- def_thread.numjobs = DEF_NUMJOBS;
- def_thread.use_thread = DEF_USE_THREAD;
- def_thread.rwmixcycle = DEF_RWMIX_CYCLE;
- def_thread.rwmixread = DEF_RWMIX_READ;
- def_thread.nice = DEF_NICE;
-#ifdef FIO_HAVE_DISK_UTIL
- def_thread.do_disk_util = 1;
-#endif
-
- return 0;
-}
-
-static void usage(char *name)
-{
- printf("%s\n", fio_version_string);
- printf("\t-s IO is sequential\n");
- printf("\t-b Block size in KiB for each IO\n");
- printf("\t-t Runtime in seconds\n");
- printf("\t-R Exit all threads on failure to meet rate goal\n");
- printf("\t-o Use O_DIRECT\n");
- printf("\t-l Generate per-job latency logs\n");
- printf("\t-w Generate per-job bandwidth logs\n");
- printf("\t-f Job file (Required)\n");
- printf("\t-v Print version info and exit\n");
-}
-
-static void parse_cmd_line(int argc, char *argv[])
-{
- int c;
-
- while ((c = getopt(argc, argv, "s:b:t:r:R:o:f:lwvh")) != EOF) {
- switch (c) {
- case 's':
- def_thread.sequential = !!atoi(optarg);
- break;
- case 'b':
- def_thread.bs = atoi(optarg);
- def_thread.bs <<= 10;
- if (!def_thread.bs) {
- printf("bad block size\n");
- def_thread.bs = DEF_BS;
- }
- break;
- case 't':
- def_thread.timeout = atoi(optarg);
- break;
- case 'r':
- repeatable = !!atoi(optarg);
- break;
- case 'R':
- rate_quit = !!atoi(optarg);
- break;
- case 'o':
- def_thread.odirect = !!atoi(optarg);
- break;
- case 'f':
- ini_file = strdup(optarg);
- break;
- case 'l':
- write_lat_log = 1;
- break;
- case 'w':
- write_bw_log = 1;
- break;
- case 'h':
- usage(argv[0]);
- exit(0);
- case 'v':
- printf("%s\n", fio_version_string);
- exit(0);
- }
- }
-
- if (!ini_file && argc > 1 && argv[argc - 1][0] != '-')
- ini_file = strdup(argv[argc - 1]);
-}
-
-static void free_shm(void)
-{
- struct shmid_ds sbuf;
-
- if (threads) {
- shmdt(threads);
- threads = NULL;
- shmctl(shm_id, IPC_RMID, &sbuf);
- }
-}
-
-static int setup_thread_area(void)
-{
- /*
- * 1024 is too much on some machines, scale max_jobs if
- * we get a failure that looks like too large a shm segment
- */
- do {
- int s = max_jobs * sizeof(struct thread_data);
-
- shm_id = shmget(0, s, IPC_CREAT | 0600);
- if (shm_id != -1)
- break;
- if (errno != EINVAL) {
- perror("shmget");
- break;
- }
-
- max_jobs >>= 1;
- } while (max_jobs);
-
- if (shm_id == -1)
- return 1;
-
- threads = shmat(shm_id, NULL, 0);
- if (threads == (void *) -1) {
- perror("shmat");
- return 1;
- }
-
- atexit(free_shm);
- return 0;
-}
-
-int parse_options(int argc, char *argv[])
-{
- if (setup_thread_area())
- return 1;
- if (fill_def_thread())
- return 1;
-
- parse_cmd_line(argc, argv);
-
- if (!ini_file) {
- printf("Need job file\n");
- usage(argv[0]);
- return 1;
- }
-
- if (parse_jobs_ini(ini_file)) {
- usage(argv[0]);
- return 1;
- }
-
- return 0;
-}
+++ /dev/null
-/*
- * The io parts of the fio tool, includes workers for sync and mmap'ed
- * io, as well as both posix and linux libaio support.
- *
- * sync io is implemented on top of aio.
- *
- * This is not really specific to fio, if the get_io_u/put_io_u and
- * structures was pulled into this as well it would be a perfectly
- * generic io engine that could be used for other projects.
- *
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <errno.h>
-#include <assert.h>
-#include <time.h>
-#include <sys/mman.h>
-#include <sys/poll.h>
-#include "fio.h"
-#include "os.h"
-
-#ifdef FIO_HAVE_LIBAIO
-
-#define ev_to_iou(ev) (struct io_u *) ((unsigned long) (ev)->obj)
-
-static int fio_io_sync(struct thread_data *td)
-{
- return fsync(td->fd);
-}
-
-static int fill_timespec(struct timespec *ts)
-{
-#ifdef _POSIX_TIMERS
- if (!clock_gettime(CLOCK_MONOTONIC, ts))
- return 0;
-
- perror("clock_gettime");
-#endif
- return 1;
-}
-
-static unsigned long long ts_utime_since_now(struct timespec *t)
-{
- long long sec, nsec;
- struct timespec now;
-
- if (fill_timespec(&now))
- return 0;
-
- sec = now.tv_sec - t->tv_sec;
- nsec = now.tv_nsec - t->tv_nsec;
- if (sec > 0 && nsec < 0) {
- sec--;
- nsec += 1000000000;
- }
-
- sec *= 1000000;
- nsec /= 1000;
- return sec + nsec;
-}
-
-struct libaio_data {
- io_context_t aio_ctx;
- struct io_event *aio_events;
-};
-
-static int fio_libaio_io_prep(struct thread_data *td, struct io_u *io_u)
-{
- if (io_u->ddir == DDIR_READ)
- io_prep_pread(&io_u->iocb, td->fd, io_u->buf, io_u->buflen, io_u->offset);
- else
- io_prep_pwrite(&io_u->iocb, td->fd, io_u->buf, io_u->buflen, io_u->offset);
-
- return 0;
-}
-
-static struct io_u *fio_libaio_event(struct thread_data *td, int event)
-{
- struct libaio_data *ld = td->io_data;
-
- return ev_to_iou(ld->aio_events + event);
-}
-
-static int fio_libaio_getevents(struct thread_data *td, int min, int max,
- struct timespec *t)
-{
- struct libaio_data *ld = td->io_data;
- int r;
-
- do {
- r = io_getevents(ld->aio_ctx, min, max, ld->aio_events, t);
- if (r == -EAGAIN) {
- usleep(100);
- continue;
- } else if (r == -EINTR)
- continue;
- else
- break;
- } while (1);
-
- return r;
-}
-
-static int fio_libaio_queue(struct thread_data *td, struct io_u *io_u)
-{
- struct libaio_data *ld = td->io_data;
- struct iocb *iocb = &io_u->iocb;
- int ret;
-
- do {
- ret = io_submit(ld->aio_ctx, 1, &iocb);
- if (ret == 1)
- return 0;
- else if (ret == -EAGAIN)
- usleep(100);
- else if (ret == -EINTR)
- continue;
- else
- break;
- } while (1);
-
- return ret;
-
-}
-
-static int fio_libaio_cancel(struct thread_data *td, struct io_u *io_u)
-{
- struct libaio_data *ld = td->io_data;
-
- return io_cancel(ld->aio_ctx, &io_u->iocb, ld->aio_events);
-}
-
-static void fio_libaio_cleanup(struct thread_data *td)
-{
- struct libaio_data *ld = td->io_data;
-
- if (ld) {
- io_destroy(ld->aio_ctx);
- if (ld->aio_events)
- free(ld->aio_events);
-
- free(ld);
- td->io_data = NULL;
- }
-}
-
-int fio_libaio_init(struct thread_data *td)
-{
- struct libaio_data *ld = malloc(sizeof(*ld));
-
- memset(ld, 0, sizeof(*ld));
- if (io_queue_init(td->iodepth, &ld->aio_ctx)) {
- td_verror(td, errno);
- return 1;
- }
-
- td->io_prep = fio_libaio_io_prep;
- td->io_queue = fio_libaio_queue;
- td->io_getevents = fio_libaio_getevents;
- td->io_event = fio_libaio_event;
- td->io_cancel = fio_libaio_cancel;
- td->io_cleanup = fio_libaio_cleanup;
- td->io_sync = fio_io_sync;
-
- ld->aio_events = malloc(td->iodepth * sizeof(struct io_event));
- td->io_data = ld;
- return 0;
-}
-
-#else /* FIO_HAVE_LIBAIO */
-
-int fio_libaio_init(struct thread_data *td)
-{
- return EINVAL;
-}
-
-#endif /* FIO_HAVE_LIBAIO */
-
-#ifdef FIO_HAVE_POSIXAIO
-
-struct posixaio_data {
- struct io_u **aio_events;
-};
-
-static int fio_posixaio_cancel(struct thread_data *td, struct io_u *io_u)
-{
- int r = aio_cancel(td->fd, &io_u->aiocb);
-
- if (r == 1 || r == AIO_CANCELED)
- return 0;
-
- return 1;
-}
-
-static int fio_posixaio_prep(struct thread_data *td, struct io_u *io_u)
-{
- struct aiocb *aiocb = &io_u->aiocb;
-
- aiocb->aio_fildes = td->fd;
- aiocb->aio_buf = io_u->buf;
- aiocb->aio_nbytes = io_u->buflen;
- aiocb->aio_offset = io_u->offset;
-
- io_u->seen = 0;
- return 0;
-}
-
-static int fio_posixaio_getevents(struct thread_data *td, int min, int max,
- struct timespec *t)
-{
- struct posixaio_data *pd = td->io_data;
- struct list_head *entry;
- struct timespec start;
- int r, have_timeout = 0;
-
- if (t && !fill_timespec(&start))
- have_timeout = 1;
-
- r = 0;
-restart:
- list_for_each(entry, &td->io_u_busylist) {
- struct io_u *io_u = list_entry(entry, struct io_u, list);
- int err;
-
- if (io_u->seen)
- continue;
-
- err = aio_error(&io_u->aiocb);
- switch (err) {
- default:
- io_u->error = err;
- case ECANCELED:
- case 0:
- pd->aio_events[r++] = io_u;
- io_u->seen = 1;
- break;
- case EINPROGRESS:
- break;
- }
-
- if (r >= max)
- break;
- }
-
- if (r >= min)
- return r;
-
- if (have_timeout) {
- unsigned long long usec;
-
- usec = (t->tv_sec * 1000000) + (t->tv_nsec / 1000);
- if (ts_utime_since_now(&start) > usec)
- return r;
- }
-
- /*
- * hrmpf, we need to wait for more. we should use aio_suspend, for
- * now just sleep a little and recheck status of busy-and-not-seen
- */
- usleep(1000);
- goto restart;
-}
-
-static struct io_u *fio_posixaio_event(struct thread_data *td, int event)
-{
- struct posixaio_data *pd = td->io_data;
-
- return pd->aio_events[event];
-}
-
-static int fio_posixaio_queue(struct thread_data *td, struct io_u *io_u)
-{
- struct aiocb *aiocb = &io_u->aiocb;
- int ret;
-
- if (io_u->ddir == DDIR_READ)
- ret = aio_read(aiocb);
- else
- ret = aio_write(aiocb);
-
- if (ret)
- io_u->error = errno;
-
- return io_u->error;
-}
-
-static void fio_posixaio_cleanup(struct thread_data *td)
-{
- struct posixaio_data *pd = td->io_data;
-
- if (pd) {
- free(pd->aio_events);
- free(pd);
- td->io_data = NULL;
- }
-}
-
-int fio_posixaio_init(struct thread_data *td)
-{
- struct posixaio_data *pd = malloc(sizeof(*pd));
-
- pd->aio_events = malloc(td->iodepth * sizeof(struct io_u *));
-
- td->io_prep = fio_posixaio_prep;
- td->io_queue = fio_posixaio_queue;
- td->io_getevents = fio_posixaio_getevents;
- td->io_event = fio_posixaio_event;
- td->io_cancel = fio_posixaio_cancel;
- td->io_cleanup = fio_posixaio_cleanup;
- td->io_sync = fio_io_sync;
-
- td->io_data = pd;
- return 0;
-}
-
-#else /* FIO_HAVE_POSIXAIO */
-
-int fio_posixaio_init(struct thread_data *td)
-{
- return EINVAL;
-}
-
-#endif /* FIO_HAVE_POSIXAIO */
-
-struct syncio_data {
- struct io_u *last_io_u;
-};
-
-static int fio_syncio_getevents(struct thread_data *td, int min, int max,
- struct timespec *t)
-{
- assert(max <= 1);
-
- /*
- * we can only have one finished io_u for sync io, since the depth
- * is always 1
- */
- if (list_empty(&td->io_u_busylist))
- return 0;
-
- return 1;
-}
-
-static struct io_u *fio_syncio_event(struct thread_data *td, int event)
-{
- struct syncio_data *sd = td->io_data;
-
- assert(event == 0);
-
- return sd->last_io_u;
-}
-
-static int fio_syncio_prep(struct thread_data *td, struct io_u *io_u)
-{
- if (lseek(td->fd, io_u->offset, SEEK_SET) == -1) {
- td_verror(td, errno);
- return 1;
- }
-
- return 0;
-}
-
-static int fio_syncio_queue(struct thread_data *td, struct io_u *io_u)
-{
- struct syncio_data *sd = td->io_data;
- int ret;
-
- if (io_u->ddir == DDIR_READ)
- ret = read(td->fd, io_u->buf, io_u->buflen);
- else
- ret = write(td->fd, io_u->buf, io_u->buflen);
-
- if ((unsigned int) ret != io_u->buflen) {
- if (ret > 0) {
- io_u->resid = io_u->buflen - ret;
- io_u->error = ENODATA;
- } else
- io_u->error = errno;
- }
-
- if (!io_u->error)
- sd->last_io_u = io_u;
-
- return io_u->error;
-}
-
-static void fio_syncio_cleanup(struct thread_data *td)
-{
- if (td->io_data) {
- free(td->io_data);
- td->io_data = NULL;
- }
-}
-
-int fio_syncio_init(struct thread_data *td)
-{
- struct syncio_data *sd = malloc(sizeof(*sd));
-
- td->io_prep = fio_syncio_prep;
- td->io_queue = fio_syncio_queue;
- td->io_getevents = fio_syncio_getevents;
- td->io_event = fio_syncio_event;
- td->io_cancel = NULL;
- td->io_cleanup = fio_syncio_cleanup;
- td->io_sync = fio_io_sync;
-
- sd->last_io_u = NULL;
- td->io_data = sd;
- return 0;
-}
-
-static int fio_mmapio_queue(struct thread_data *td, struct io_u *io_u)
-{
- unsigned long long real_off = io_u->offset - td->file_offset;
- struct syncio_data *sd = td->io_data;
-
- if (io_u->ddir == DDIR_READ)
- memcpy(io_u->buf, td->mmap + real_off, io_u->buflen);
- else
- memcpy(td->mmap + real_off, io_u->buf, io_u->buflen);
-
- /*
- * not really direct, but should drop the pages from the cache
- */
- if (td->odirect) {
- if (msync(td->mmap + real_off, io_u->buflen, MS_SYNC) < 0)
- io_u->error = errno;
- if (madvise(td->mmap + real_off, io_u->buflen, MADV_DONTNEED) < 0)
- io_u->error = errno;
- }
-
- if (!io_u->error)
- sd->last_io_u = io_u;
-
- return io_u->error;
-}
-
-static int fio_mmapio_sync(struct thread_data *td)
-{
- return msync(td->mmap, td->file_size, MS_SYNC);
-}
-
-int fio_mmapio_init(struct thread_data *td)
-{
- struct syncio_data *sd = malloc(sizeof(*sd));
-
- td->io_prep = NULL;
- td->io_queue = fio_mmapio_queue;
- td->io_getevents = fio_syncio_getevents;
- td->io_event = fio_syncio_event;
- td->io_cancel = NULL;
- td->io_cleanup = fio_syncio_cleanup;
- td->io_sync = fio_mmapio_sync;
-
- sd->last_io_u = NULL;
- td->io_data = sd;
- return 0;
-}
-
-#ifdef FIO_HAVE_SGIO
-
-struct sgio_cmd {
- unsigned char cdb[10];
- int nr;
-};
-
-struct sgio_data {
- struct sgio_cmd *cmds;
- struct io_u **events;
- unsigned int bs;
-};
-
-static void sgio_hdr_init(struct sgio_data *sd, struct sg_io_hdr *hdr,
- struct io_u *io_u, int fs)
-{
- struct sgio_cmd *sc = &sd->cmds[io_u->index];
-
- memset(hdr, 0, sizeof(*hdr));
- memset(sc->cdb, 0, sizeof(sc->cdb));
-
- hdr->interface_id = 'S';
- hdr->cmdp = sc->cdb;
- hdr->cmd_len = sizeof(sc->cdb);
- hdr->pack_id = io_u->index;
- hdr->usr_ptr = io_u;
-
- if (fs) {
- hdr->dxferp = io_u->buf;
- hdr->dxfer_len = io_u->buflen;
- }
-}
-
-static int fio_sgio_getevents(struct thread_data *td, int min, int max,
- struct timespec *t)
-{
- struct sgio_data *sd = td->io_data;
- struct pollfd pfd = { .fd = td->fd, .events = POLLIN };
- void *buf = malloc(max * sizeof(struct sg_io_hdr));
- int left = max, ret, events, i, r = 0, fl = 0;
-
- /*
- * don't block for !events
- */
- if (!min) {
- fl = fcntl(td->fd, F_GETFL);
- fcntl(td->fd, F_SETFL, fl | O_NONBLOCK);
- }
-
- while (left) {
- do {
- if (!min)
- break;
- poll(&pfd, 1, -1);
- if (pfd.revents & POLLIN)
- break;
- } while (1);
-
- ret = read(td->fd, buf, left * sizeof(struct sg_io_hdr));
- if (ret < 0) {
- if (errno == EAGAIN)
- break;
- td_verror(td, errno);
- r = -1;
- break;
- } else if (!ret)
- break;
-
- events = ret / sizeof(struct sg_io_hdr);
- left -= events;
- r += events;
-
- for (i = 0; i < events; i++) {
- struct sg_io_hdr *hdr = (struct sg_io_hdr *) buf + i;
-
- sd->events[i] = hdr->usr_ptr;
- }
- }
-
- if (!min)
- fcntl(td->fd, F_SETFL, fl);
-
- free(buf);
- return r;
-}
-
-static int fio_sgio_ioctl_doio(struct thread_data *td, struct io_u *io_u)
-{
- struct sgio_data *sd = td->io_data;
- struct sg_io_hdr *hdr = &io_u->hdr;
-
- sd->events[0] = io_u;
-
- return ioctl(td->fd, SG_IO, hdr);
-}
-
-static int fio_sgio_rw_doio(struct thread_data *td, struct io_u *io_u, int sync)
-{
- struct sg_io_hdr *hdr = &io_u->hdr;
- int ret;
-
- ret = write(td->fd, hdr, sizeof(*hdr));
- if (ret < 0)
- return errno;
-
- if (sync) {
- ret = read(td->fd, hdr, sizeof(*hdr));
- if (ret < 0)
- return errno;
- }
-
- return 0;
-}
-
-static int fio_sgio_doio(struct thread_data *td, struct io_u *io_u, int sync)
-{
- if (td->filetype == FIO_TYPE_BD)
- return fio_sgio_ioctl_doio(td, io_u);
-
- return fio_sgio_rw_doio(td, io_u, sync);
-}
-
-static int fio_sgio_sync(struct thread_data *td)
-{
- struct sgio_data *sd = td->io_data;
- struct sg_io_hdr *hdr;
- struct io_u *io_u;
- int ret;
-
- io_u = __get_io_u(td);
- if (!io_u)
- return ENOMEM;
-
- hdr = &io_u->hdr;
- sgio_hdr_init(sd, hdr, io_u, 0);
- hdr->dxfer_direction = SG_DXFER_NONE;
-
- hdr->cmdp[0] = 0x35;
-
- ret = fio_sgio_doio(td, io_u, 1);
- put_io_u(td, io_u);
- return ret;
-}
-
-static int fio_sgio_prep(struct thread_data *td, struct io_u *io_u)
-{
- struct sg_io_hdr *hdr = &io_u->hdr;
- struct sgio_data *sd = td->io_data;
- int nr_blocks, lba;
-
- if (io_u->buflen & (sd->bs - 1)) {
- fprintf(stderr, "read/write not sector aligned\n");
- return EINVAL;
- }
-
- sgio_hdr_init(sd, hdr, io_u, 1);
-
- if (io_u->ddir == DDIR_READ) {
- hdr->dxfer_direction = SG_DXFER_FROM_DEV;
- hdr->cmdp[0] = 0x28;
- } else {
- hdr->dxfer_direction = SG_DXFER_TO_DEV;
- hdr->cmdp[0] = 0x2a;
- }
-
- nr_blocks = io_u->buflen / sd->bs;
- lba = io_u->offset / sd->bs;
- hdr->cmdp[2] = (lba >> 24) & 0xff;
- hdr->cmdp[3] = (lba >> 16) & 0xff;
- hdr->cmdp[4] = (lba >> 8) & 0xff;
- hdr->cmdp[5] = lba & 0xff;
- hdr->cmdp[7] = (nr_blocks >> 8) & 0xff;
- hdr->cmdp[8] = nr_blocks & 0xff;
- return 0;
-}
-
-static int fio_sgio_queue(struct thread_data *td, struct io_u *io_u)
-{
- struct sg_io_hdr *hdr = &io_u->hdr;
- int ret;
-
- ret = fio_sgio_doio(td, io_u, 0);
-
- if (ret < 0)
- io_u->error = errno;
- else if (hdr->status) {
- io_u->resid = hdr->resid;
- io_u->error = EIO;
- }
-
- return io_u->error;
-}
-
-static struct io_u *fio_sgio_event(struct thread_data *td, int event)
-{
- struct sgio_data *sd = td->io_data;
-
- return sd->events[event];
-}
-
-static int fio_sgio_get_bs(struct thread_data *td, unsigned int *bs)
-{
- struct sgio_data *sd = td->io_data;
- struct io_u *io_u;
- struct sg_io_hdr *hdr;
- unsigned char buf[8];
- int ret;
-
- io_u = __get_io_u(td);
- assert(io_u);
-
- hdr = &io_u->hdr;
- sgio_hdr_init(sd, hdr, io_u, 0);
- memset(buf, 0, sizeof(buf));
-
- hdr->cmdp[0] = 0x25;
- hdr->dxfer_direction = SG_DXFER_FROM_DEV;
- hdr->dxferp = buf;
- hdr->dxfer_len = sizeof(buf);
-
- ret = fio_sgio_doio(td, io_u, 1);
- if (ret) {
- put_io_u(td, io_u);
- return ret;
- }
-
- *bs = (buf[4] << 24) | (buf[5] << 16) | (buf[6] << 8) | buf[7];
- put_io_u(td, io_u);
- return 0;
-}
-
-int fio_sgio_init(struct thread_data *td)
-{
- struct sgio_data *sd;
- unsigned int bs;
- int ret;
-
- sd = malloc(sizeof(*sd));
- sd->cmds = malloc(td->iodepth * sizeof(struct sgio_cmd));
- sd->events = malloc(td->iodepth * sizeof(struct io_u *));
- td->io_data = sd;
-
- if (td->filetype == FIO_TYPE_BD) {
- if (ioctl(td->fd, BLKSSZGET, &bs) < 0) {
- td_verror(td, errno);
- return 1;
- }
- } else if (td->filetype == FIO_TYPE_CHAR) {
- int version;
-
- if (ioctl(td->fd, SG_GET_VERSION_NUM, &version) < 0) {
- td_verror(td, errno);
- return 1;
- }
-
- ret = fio_sgio_get_bs(td, &bs);
- if (ret)
- return ret;
- } else {
- fprintf(stderr, "ioengine sgio only works on block devices\n");
- return 1;
- }
-
- sd->bs = bs;
-
- td->io_prep = fio_sgio_prep;
- td->io_queue = fio_sgio_queue;
-
- if (td->filetype == FIO_TYPE_BD)
- td->io_getevents = fio_syncio_getevents;
- else
- td->io_getevents = fio_sgio_getevents;
-
- td->io_event = fio_sgio_event;
- td->io_cancel = NULL;
- td->io_cleanup = fio_syncio_cleanup;
- td->io_sync = fio_sgio_sync;
-
- /*
- * we want to do it, regardless of whether odirect is set or not
- */
- td->override_sync = 1;
- return 0;
-}
-
-#else /* FIO_HAVE_SGIO */
-
-int fio_sgio_init(struct thread_data *td)
-{
- return EINVAL;
-}
-
-#endif /* FIO_HAVE_SGIO */
-
-#ifdef FIO_HAVE_SPLICE
-struct spliceio_data {
- struct io_u *last_io_u;
- int pipe[2];
-};
-
-static struct io_u *fio_spliceio_event(struct thread_data *td, int event)
-{
- struct spliceio_data *sd = td->io_data;
-
- assert(event == 0);
-
- return sd->last_io_u;
-}
-
-/*
- * For splice reading, we unfortunately cannot (yet) vmsplice the other way.
- * So just splice the data from the file into the pipe, and use regular
- * read to fill the buffer. Doesn't make a lot of sense, but...
- */
-static int fio_splice_read(struct thread_data *td, struct io_u *io_u)
-{
- struct spliceio_data *sd = td->io_data;
- int ret, ret2, buflen;
- off_t offset;
- void *p;
-
- offset = io_u->offset;
- buflen = io_u->buflen;
- p = io_u->buf;
- while (buflen) {
- int this_len = buflen;
-
- if (this_len > SPLICE_DEF_SIZE)
- this_len = SPLICE_DEF_SIZE;
-
- ret = splice(td->fd, &offset, sd->pipe[1], NULL, this_len, SPLICE_F_MORE);
- if (ret < 0) {
- if (errno == ENODATA || errno == EAGAIN)
- continue;
-
- return errno;
- }
-
- buflen -= ret;
-
- while (ret) {
- ret2 = read(sd->pipe[0], p, ret);
- if (ret2 < 0)
- return errno;
-
- ret -= ret2;
- p += ret2;
- }
- }
-
- return io_u->buflen;
-}
-
-/*
- * For splice writing, we can vmsplice our data buffer directly into a
- * pipe and then splice that to a file.
- */
-static int fio_splice_write(struct thread_data *td, struct io_u *io_u)
-{
- struct spliceio_data *sd = td->io_data;
- struct iovec iov[1] = {
- {
- .iov_base = io_u->buf,
- .iov_len = io_u->buflen,
- }
- };
- struct pollfd pfd = { .fd = sd->pipe[1], .events = POLLOUT, };
- off_t off = io_u->offset;
- int ret, ret2;
-
- while (iov[0].iov_len) {
- if (poll(&pfd, 1, -1) < 0)
- return errno;
-
- ret = vmsplice(sd->pipe[1], iov, 1, SPLICE_F_NONBLOCK);
- if (ret < 0)
- return errno;
-
- iov[0].iov_len -= ret;
- iov[0].iov_base += ret;
-
- while (ret) {
- ret2 = splice(sd->pipe[0], NULL, td->fd, &off, ret, 0);
- if (ret2 < 0)
- return errno;
-
- ret -= ret2;
- }
- }
-
- return io_u->buflen;
-}
-
-static int fio_spliceio_queue(struct thread_data *td, struct io_u *io_u)
-{
- struct spliceio_data *sd = td->io_data;
- int ret;
-
- if (io_u->ddir == DDIR_READ)
- ret = fio_splice_read(td, io_u);
- else
- ret = fio_splice_write(td, io_u);
-
- if ((unsigned int) ret != io_u->buflen) {
- if (ret > 0) {
- io_u->resid = io_u->buflen - ret;
- io_u->error = ENODATA;
- } else
- io_u->error = errno;
- }
-
- if (!io_u->error)
- sd->last_io_u = io_u;
-
- return io_u->error;
-}
-
-static void fio_spliceio_cleanup(struct thread_data *td)
-{
- struct spliceio_data *sd = td->io_data;
-
- if (sd) {
- close(sd->pipe[0]);
- close(sd->pipe[1]);
- free(sd);
- td->io_data = NULL;
- }
-}
-
-int fio_spliceio_init(struct thread_data *td)
-{
- struct spliceio_data *sd = malloc(sizeof(*sd));
-
- td->io_queue = fio_spliceio_queue;
- td->io_getevents = fio_syncio_getevents;
- td->io_event = fio_spliceio_event;
- td->io_cancel = NULL;
- td->io_cleanup = fio_spliceio_cleanup;
- td->io_sync = fio_io_sync;
-
- sd->last_io_u = NULL;
- if (pipe(sd->pipe) < 0) {
- td_verror(td, errno);
- free(sd);
- return 1;
- }
-
- td->io_data = sd;
- return 0;
-}
-
-#else /* FIO_HAVE_SPLICE */
-
-int fio_spliceio_init(struct thread_data *td)
-{
- return EINVAL;
-}
-
-#endif /* FIO_HAVE_SPLICE */
+++ /dev/null
-#include <stdio.h>
-#include <stdlib.h>
-#include "list.h"
-#include "fio.h"
-
-void write_iolog_put(struct thread_data *td, struct io_u *io_u)
-{
- fprintf(td->iolog_f, "%d,%llu,%u\n", io_u->ddir, io_u->offset, io_u->buflen);
-}
-
-int read_iolog_get(struct thread_data *td, struct io_u *io_u)
-{
- struct io_piece *ipo;
-
- if (!list_empty(&td->io_log_list)) {
- ipo = list_entry(td->io_log_list.next, struct io_piece, list);
- list_del(&ipo->list);
- io_u->offset = ipo->offset;
- io_u->buflen = ipo->len;
- io_u->ddir = ipo->ddir;
- free(ipo);
- return 0;
- }
-
- return 1;
-}
-
-void prune_io_piece_log(struct thread_data *td)
-{
- struct io_piece *ipo;
-
- while (!list_empty(&td->io_hist_list)) {
- ipo = list_entry(td->io_hist_list.next, struct io_piece, list);
-
- list_del(&ipo->list);
- free(ipo);
- }
-}
-
-/*
- * log a succesful write, so we can unwind the log for verify
- */
-void log_io_piece(struct thread_data *td, struct io_u *io_u)
-{
- struct io_piece *ipo = malloc(sizeof(struct io_piece));
- struct list_head *entry;
-
- INIT_LIST_HEAD(&ipo->list);
- ipo->offset = io_u->offset;
- ipo->len = io_u->buflen;
-
- /*
- * for random io where the writes extend the file, it will typically
- * be laid out with the block scattered as written. it's faster to
- * read them in in that order again, so don't sort
- */
- if (td->sequential || !td->overwrite) {
- list_add_tail(&ipo->list, &td->io_hist_list);
- return;
- }
-
- /*
- * for random io, sort the list so verify will run faster
- */
- entry = &td->io_hist_list;
- while ((entry = entry->prev) != &td->io_hist_list) {
- struct io_piece *__ipo = list_entry(entry, struct io_piece, list);
-
- if (__ipo->offset < ipo->offset)
- break;
- }
-
- list_add(&ipo->list, entry);
-}
-
-void write_iolog_close(struct thread_data *td)
-{
- fflush(td->iolog_f);
- fclose(td->iolog_f);
- free(td->iolog_buf);
-}
-
-int init_iolog(struct thread_data *td)
-{
- unsigned long long offset;
- unsigned int bytes;
- char *str, *p;
- FILE *f;
- int rw, i, reads, writes;
-
- if (!td->read_iolog && !td->write_iolog)
- return 0;
-
- if (td->read_iolog)
- f = fopen(td->iolog_file, "r");
- else
- f = fopen(td->iolog_file, "w");
-
- if (!f) {
- perror("fopen iolog");
- printf("file %s, %d/%d\n", td->iolog_file, td->read_iolog, td->write_iolog);
- return 1;
- }
-
- /*
- * That's it for writing, setup a log buffer and we're done.
- */
- if (td->write_iolog) {
- td->iolog_f = f;
- td->iolog_buf = malloc(8192);
- setvbuf(f, td->iolog_buf, _IOFBF, 8192);
- return 0;
- }
-
- /*
- * Read in the read iolog and store it, reuse the infrastructure
- * for doing verifications.
- */
- str = malloc(4096);
- reads = writes = i = 0;
- while ((p = fgets(str, 4096, f)) != NULL) {
- struct io_piece *ipo;
-
- if (sscanf(p, "%d,%llu,%u", &rw, &offset, &bytes) != 3) {
- fprintf(stderr, "bad iolog: %s\n", p);
- continue;
- }
- if (rw == DDIR_READ)
- reads++;
- else if (rw == DDIR_WRITE)
- writes++;
- else {
- fprintf(stderr, "bad ddir: %d\n", rw);
- continue;
- }
-
- ipo = malloc(sizeof(*ipo));
- INIT_LIST_HEAD(&ipo->list);
- ipo->offset = offset;
- ipo->len = bytes;
- if (bytes > td->max_bs)
- td->max_bs = bytes;
- ipo->ddir = rw;
- list_add_tail(&ipo->list, &td->io_log_list);
- i++;
- }
-
- free(str);
- fclose(f);
-
- if (!i)
- return 1;
-
- if (reads && !writes)
- td->ddir = DDIR_READ;
- else if (!reads && writes)
- td->ddir = DDIR_READ;
- else
- td->iomix = 1;
-
- return 0;
-}
+++ /dev/null
-#ifndef FIO_LOG_H
-#define FIO_LOG_H
-
-extern int read_iolog_get(struct thread_data *, struct io_u *);
-extern void write_iolog_put(struct thread_data *, struct io_u *);
-extern int init_iolog(struct thread_data *td);
-extern void log_io_piece(struct thread_data *, struct io_u *);
-extern void prune_io_piece_log(struct thread_data *);
-extern void write_iolog_close(struct thread_data *);
-
-#endif
+++ /dev/null
-#include <stdio.h>
-#include <string.h>
-#include <sys/time.h>
-#include <sys/types.h>
-#include <dirent.h>
-#include <libgen.h>
-#include <math.h>
-
-#include "fio.h"
-#include "fio-time.h"
-
-static struct itimerval itimer;
-static LIST_HEAD(disk_list);
-
-static int get_io_ticks(struct disk_util *du, struct disk_util_stat *dus)
-{
- unsigned in_flight;
- char line[256];
- FILE *f;
- char *p;
-
- f = fopen(du->path, "r");
- if (!f)
- return 1;
-
- p = fgets(line, sizeof(line), f);
- if (!p) {
- fclose(f);
- return 1;
- }
-
- if (sscanf(p, "%u %u %llu %u %u %u %llu %u %u %u %u\n", &dus->ios[0], &dus->merges[0], &dus->sectors[0], &dus->ticks[0], &dus->ios[1], &dus->merges[1], &dus->sectors[1], &dus->ticks[1], &in_flight, &dus->io_ticks, &dus->time_in_queue) != 11) {
- fclose(f);
- return 1;
- }
-
- fclose(f);
- return 0;
-}
-
-static void update_io_tick_disk(struct disk_util *du)
-{
- struct disk_util_stat __dus, *dus, *ldus;
- struct timeval t;
-
- if (get_io_ticks(du, &__dus))
- return;
-
- dus = &du->dus;
- ldus = &du->last_dus;
-
- dus->sectors[0] += (__dus.sectors[0] - ldus->sectors[0]);
- dus->sectors[1] += (__dus.sectors[1] - ldus->sectors[1]);
- dus->ios[0] += (__dus.ios[0] - ldus->ios[0]);
- dus->ios[1] += (__dus.ios[1] - ldus->ios[1]);
- dus->merges[0] += (__dus.merges[0] - ldus->merges[0]);
- dus->merges[1] += (__dus.merges[1] - ldus->merges[1]);
- dus->ticks[0] += (__dus.ticks[0] - ldus->ticks[0]);
- dus->ticks[1] += (__dus.ticks[1] - ldus->ticks[1]);
- dus->io_ticks += (__dus.io_ticks - ldus->io_ticks);
- dus->time_in_queue += (__dus.time_in_queue - ldus->time_in_queue);
-
- gettimeofday(&t, NULL);
- du->msec += mtime_since(&du->time, &t);
- memcpy(&du->time, &t, sizeof(t));
- memcpy(ldus, &__dus, sizeof(__dus));
-}
-
-void update_io_ticks(void)
-{
- struct list_head *entry;
- struct disk_util *du;
-
- list_for_each(entry, &disk_list) {
- du = list_entry(entry, struct disk_util, list);
- update_io_tick_disk(du);
- }
-}
-
-static int disk_util_exists(dev_t dev)
-{
- struct list_head *entry;
- struct disk_util *du;
-
- list_for_each(entry, &disk_list) {
- du = list_entry(entry, struct disk_util, list);
-
- if (du->dev == dev)
- return 1;
- }
-
- return 0;
-}
-
-static void disk_util_add(dev_t dev, char *path)
-{
- struct disk_util *du = malloc(sizeof(*du));
-
- memset(du, 0, sizeof(*du));
- INIT_LIST_HEAD(&du->list);
- sprintf(du->path, "%s/stat", path);
- du->name = strdup(basename(path));
- du->dev = dev;
-
- gettimeofday(&du->time, NULL);
- get_io_ticks(du, &du->last_dus);
-
- list_add_tail(&du->list, &disk_list);
-}
-
-static int check_dev_match(dev_t dev, char *path)
-{
- unsigned int major, minor;
- char line[256], *p;
- FILE *f;
-
- f = fopen(path, "r");
- if (!f) {
- perror("open path");
- return 1;
- }
-
- p = fgets(line, sizeof(line), f);
- if (!p) {
- fclose(f);
- return 1;
- }
-
- if (sscanf(p, "%u:%u", &major, &minor) != 2) {
- fclose(f);
- return 1;
- }
-
- if (((major << 8) | minor) == dev) {
- fclose(f);
- return 0;
- }
-
- fclose(f);
- return 1;
-}
-
-static int find_block_dir(dev_t dev, char *path)
-{
- struct dirent *dir;
- struct stat st;
- int found = 0;
- DIR *D;
-
- D = opendir(path);
- if (!D)
- return 0;
-
- while ((dir = readdir(D)) != NULL) {
- char full_path[256];
-
- if (!strcmp(dir->d_name, ".") || !strcmp(dir->d_name, ".."))
- continue;
- if (!strcmp(dir->d_name, "device"))
- continue;
-
- sprintf(full_path, "%s/%s", path, dir->d_name);
-
- if (!strcmp(dir->d_name, "dev")) {
- if (!check_dev_match(dev, full_path)) {
- found = 1;
- break;
- }
- }
-
- if (stat(full_path, &st) == -1) {
- perror("stat");
- break;
- }
-
- if (!S_ISDIR(st.st_mode) || S_ISLNK(st.st_mode))
- continue;
-
- found = find_block_dir(dev, full_path);
- if (found) {
- strcpy(path, full_path);
- break;
- }
- }
-
- closedir(D);
- return found;
-}
-
-void init_disk_util(struct thread_data *td)
-{
- struct stat st;
- char foo[256], tmp[256];
- dev_t dev;
- char *p;
-
- if (!td->do_disk_util)
- return;
-
- if (!stat(td->file_name, &st)) {
- if (S_ISBLK(st.st_mode))
- dev = st.st_rdev;
- else
- dev = st.st_dev;
- } else {
- /*
- * must be a file, open "." in that path
- */
- strcpy(foo, td->file_name);
- p = dirname(foo);
- if (stat(p, &st)) {
- perror("disk util stat");
- return;
- }
-
- dev = st.st_dev;
- }
-
- if (disk_util_exists(dev))
- return;
-
- sprintf(foo, "/sys/block");
- if (!find_block_dir(dev, foo))
- return;
-
- /*
- * If there's a ../queue/ directory there, we are inside a partition.
- * Check if that is the case and jump back. For loop/md/dm etc we
- * are already in the right spot.
- */
- sprintf(tmp, "%s/../queue", foo);
- if (!stat(tmp, &st)) {
- p = dirname(foo);
- sprintf(tmp, "%s/queue", p);
- if (stat(tmp, &st)) {
- fprintf(stderr, "unknown sysfs layout\n");
- return;
- }
- sprintf(foo, "%s", p);
- }
-
- td->sysfs_root = strdup(foo);
- disk_util_add(dev, foo);
-}
-
-void disk_util_timer_arm(void)
-{
- itimer.it_value.tv_sec = 0;
- itimer.it_value.tv_usec = DISK_UTIL_MSEC * 1000;
- setitimer(ITIMER_REAL, &itimer, NULL);
-}
-
-void update_rusage_stat(struct thread_data *td)
-{
- if (!(td->runtime[0] + td->runtime[1]))
- return;
-
- getrusage(RUSAGE_SELF, &td->ru_end);
-
- td->usr_time += mtime_since(&td->ru_start.ru_utime, &td->ru_end.ru_utime);
- td->sys_time += mtime_since(&td->ru_start.ru_stime, &td->ru_end.ru_stime);
- td->ctx += td->ru_end.ru_nvcsw + td->ru_end.ru_nivcsw - (td->ru_start.ru_nvcsw + td->ru_start.ru_nivcsw);
-
-
- memcpy(&td->ru_start, &td->ru_end, sizeof(td->ru_end));
-}
-
-static int calc_lat(struct io_stat *is, unsigned long *min, unsigned long *max,
- double *mean, double *dev)
-{
- double n;
-
- if (is->samples == 0)
- return 0;
-
- *min = is->min_val;
- *max = is->max_val;
-
- n = (double) is->samples;
- *mean = (double) is->val / n;
- *dev = sqrt(((double) is->val_sq - (*mean * *mean) / n) / (n - 1));
- if (!(*min + *max) && !(*mean + *dev))
- return 0;
-
- return 1;
-}
-
-static void show_group_stats(struct group_run_stats *rs, int id)
-{
- printf("\nRun status group %d (all jobs):\n", id);
-
- if (rs->max_run[DDIR_READ])
- printf(" READ: io=%lluMiB, aggrb=%llu, minb=%llu, maxb=%llu, mint=%llumsec, maxt=%llumsec\n", rs->io_kb[0] >> 10, rs->agg[0], rs->min_bw[0], rs->max_bw[0], rs->min_run[0], rs->max_run[0]);
- if (rs->max_run[DDIR_WRITE])
- printf(" WRITE: io=%lluMiB, aggrb=%llu, minb=%llu, maxb=%llu, mint=%llumsec, maxt=%llumsec\n", rs->io_kb[1] >> 10, rs->agg[1], rs->min_bw[1], rs->max_bw[1], rs->min_run[1], rs->max_run[1]);
-}
-
-static void show_disk_util(void)
-{
- struct disk_util_stat *dus;
- struct list_head *entry;
- struct disk_util *du;
- double util;
-
- printf("\nDisk stats (read/write):\n");
-
- list_for_each(entry, &disk_list) {
- du = list_entry(entry, struct disk_util, list);
- dus = &du->dus;
-
- util = (double) 100 * du->dus.io_ticks / (double) du->msec;
- if (util > 100.0)
- util = 100.0;
-
- printf(" %s: ios=%u/%u, merge=%u/%u, ticks=%u/%u, in_queue=%u, util=%3.2f%%\n", du->name, dus->ios[0], dus->ios[1], dus->merges[0], dus->merges[1], dus->ticks[0], dus->ticks[1], dus->time_in_queue, util);
- }
-}
-
-static void show_ddir_status(struct thread_data *td, struct group_run_stats *rs,
- int ddir)
-{
- char *ddir_str[] = { "read ", "write" };
- unsigned long min, max;
- unsigned long long bw;
- double mean, dev;
-
- if (!td->runtime[ddir])
- return;
-
- bw = td->io_bytes[ddir] / td->runtime[ddir];
- printf(" %s: io=%6lluMiB, bw=%6lluKiB/s, runt=%6lumsec\n", ddir_str[ddir], td->io_bytes[ddir] >> 20, bw, td->runtime[ddir]);
-
- if (calc_lat(&td->slat_stat[ddir], &min, &max, &mean, &dev))
- printf(" slat (msec): min=%5lu, max=%5lu, avg=%5.02f, dev=%5.02f\n", min, max, mean, dev);
-
- if (calc_lat(&td->clat_stat[ddir], &min, &max, &mean, &dev))
- printf(" clat (msec): min=%5lu, max=%5lu, avg=%5.02f, dev=%5.02f\n", min, max, mean, dev);
-
- if (calc_lat(&td->bw_stat[ddir], &min, &max, &mean, &dev)) {
- double p_of_agg;
-
- p_of_agg = mean * 100 / (double) rs->agg[ddir];
- printf(" bw (KiB/s) : min=%5lu, max=%5lu, per=%3.2f%%, avg=%5.02f, dev=%5.02f\n", min, max, p_of_agg, mean, dev);
- }
-}
-
-static void show_thread_status(struct thread_data *td,
- struct group_run_stats *rs)
-{
- double usr_cpu, sys_cpu;
-
- if (!(td->io_bytes[0] + td->io_bytes[1]) && !td->error)
- return;
-
- printf("Client%d (groupid=%d): err=%2d:\n", td->thread_number, td->groupid, td->error);
-
- show_ddir_status(td, rs, td->ddir);
- if (td->io_bytes[td->ddir ^ 1])
- show_ddir_status(td, rs, td->ddir ^ 1);
-
- if (td->runtime[0] + td->runtime[1]) {
- double runt = td->runtime[0] + td->runtime[1];
-
- usr_cpu = (double) td->usr_time * 100 / runt;
- sys_cpu = (double) td->sys_time * 100 / runt;
- } else {
- usr_cpu = 0;
- sys_cpu = 0;
- }
-
- printf(" cpu : usr=%3.2f%%, sys=%3.2f%%, ctx=%lu\n", usr_cpu, sys_cpu, td->ctx);
-}
-
-void show_run_stats(void)
-{
- struct group_run_stats *runstats, *rs;
- struct thread_data *td;
- int i;
-
- runstats = malloc(sizeof(struct group_run_stats) * (groupid + 1));
-
- for (i = 0; i < groupid + 1; i++) {
- rs = &runstats[i];
-
- memset(rs, 0, sizeof(*rs));
- rs->min_bw[0] = rs->min_run[0] = ~0UL;
- rs->min_bw[1] = rs->min_run[1] = ~0UL;
- }
-
- for (i = 0; i < thread_number; i++) {
- unsigned long long rbw, wbw;
-
- td = &threads[i];
-
- if (td->error) {
- printf("Client%d: %s\n", td->thread_number, td->verror);
- continue;
- }
-
- rs = &runstats[td->groupid];
-
- if (td->runtime[0] < rs->min_run[0] || !rs->min_run[0])
- rs->min_run[0] = td->runtime[0];
- if (td->runtime[0] > rs->max_run[0])
- rs->max_run[0] = td->runtime[0];
- if (td->runtime[1] < rs->min_run[1] || !rs->min_run[1])
- rs->min_run[1] = td->runtime[1];
- if (td->runtime[1] > rs->max_run[1])
- rs->max_run[1] = td->runtime[1];
-
- rbw = wbw = 0;
- if (td->runtime[0])
- rbw = td->io_bytes[0] / (unsigned long long) td->runtime[0];
- if (td->runtime[1])
- wbw = td->io_bytes[1] / (unsigned long long) td->runtime[1];
-
- if (rbw < rs->min_bw[0])
- rs->min_bw[0] = rbw;
- if (wbw < rs->min_bw[1])
- rs->min_bw[1] = wbw;
- if (rbw > rs->max_bw[0])
- rs->max_bw[0] = rbw;
- if (wbw > rs->max_bw[1])
- rs->max_bw[1] = wbw;
-
- rs->io_kb[0] += td->io_bytes[0] >> 10;
- rs->io_kb[1] += td->io_bytes[1] >> 10;
- }
-
- for (i = 0; i < groupid + 1; i++) {
- rs = &runstats[i];
-
- if (rs->max_run[0])
- rs->agg[0] = (rs->io_kb[0]*1024) / rs->max_run[0];
- if (rs->max_run[1])
- rs->agg[1] = (rs->io_kb[1]*1024) / rs->max_run[1];
- }
-
- /*
- * don't overwrite last signal output
- */
- printf("\n");
-
- for (i = 0; i < thread_number; i++) {
- td = &threads[i];
- rs = &runstats[td->groupid];
-
- show_thread_status(td, rs);
- }
-
- for (i = 0; i < groupid + 1; i++)
- show_group_stats(&runstats[i], i);
-
- show_disk_util();
-}
-
-static inline void add_stat_sample(struct io_stat *is, unsigned long val)
-{
- if (val > is->max_val)
- is->max_val = val;
- if (val < is->min_val)
- is->min_val = val;
-
- is->val += val;
- is->val_sq += val * val;
- is->samples++;
-}
-
-static void add_log_sample(struct thread_data *td, struct io_log *iolog,
- unsigned long val, int ddir)
-{
- if (iolog->nr_samples == iolog->max_samples) {
- int new_size = sizeof(struct io_sample) * iolog->max_samples*2;
-
- iolog->log = realloc(iolog->log, new_size);
- iolog->max_samples <<= 1;
- }
-
- iolog->log[iolog->nr_samples].val = val;
- iolog->log[iolog->nr_samples].time = mtime_since_now(&td->epoch);
- iolog->log[iolog->nr_samples].ddir = ddir;
- iolog->nr_samples++;
-}
-
-void add_clat_sample(struct thread_data *td, int ddir, unsigned long msec)
-{
- add_stat_sample(&td->clat_stat[ddir], msec);
-
- if (td->clat_log)
- add_log_sample(td, td->clat_log, msec, ddir);
-}
-
-void add_slat_sample(struct thread_data *td, int ddir, unsigned long msec)
-{
- add_stat_sample(&td->slat_stat[ddir], msec);
-
- if (td->slat_log)
- add_log_sample(td, td->slat_log, msec, ddir);
-}
-
-void add_bw_sample(struct thread_data *td, int ddir)
-{
- unsigned long spent = mtime_since_now(&td->stat_sample_time[ddir]);
- unsigned long rate;
-
- if (spent < td->bw_avg_time)
- return;
-
- rate = (td->this_io_bytes[ddir] - td->stat_io_bytes[ddir]) / spent;
- add_stat_sample(&td->bw_stat[ddir], rate);
-
- if (td->bw_log)
- add_log_sample(td, td->bw_log, rate, ddir);
-
- gettimeofday(&td->stat_sample_time[ddir], NULL);
- td->stat_io_bytes[ddir] = td->this_io_bytes[ddir];
-}
-
-
+++ /dev/null
-#ifndef FIO_STAT_H
-#define FIO_STAT_H
-
-extern void add_clat_sample(struct thread_data *, int, unsigned long);
-extern void add_slat_sample(struct thread_data *, int, unsigned long);
-extern void add_bw_sample(struct thread_data *, int);
-extern void show_run_stats(void);
-extern void init_disk_util(struct thread_data *);
-extern void update_rusage_stat(struct thread_data *);
-extern void update_io_ticks(void);
-extern void disk_util_timer_arm(void);
-#endif
+++ /dev/null
-#include <time.h>
-#include <sys/time.h>
-
-#include "fio.h"
-
-unsigned long utime_since(struct timeval *s, struct timeval *e)
-{
- double sec, usec;
-
- sec = e->tv_sec - s->tv_sec;
- usec = e->tv_usec - s->tv_usec;
- if (sec > 0 && usec < 0) {
- sec--;
- usec += 1000000;
- }
-
- sec *= (double) 1000000;
-
- return sec + usec;
-}
-
-static unsigned long utime_since_now(struct timeval *s)
-{
- struct timeval t;
-
- gettimeofday(&t, NULL);
- return utime_since(s, &t);
-}
-
-unsigned long mtime_since(struct timeval *s, struct timeval *e)
-{
- double sec, usec;
-
- sec = e->tv_sec - s->tv_sec;
- usec = e->tv_usec - s->tv_usec;
- if (sec > 0 && usec < 0) {
- sec--;
- usec += 1000000;
- }
-
- sec *= (double) 1000;
- usec /= (double) 1000;
-
- return sec + usec;
-}
-
-unsigned long mtime_since_now(struct timeval *s)
-{
- struct timeval t;
-
- gettimeofday(&t, NULL);
- return mtime_since(s, &t);
-}
-
-unsigned long time_since_now(struct timeval *s)
-{
- return mtime_since_now(s) / 1000;
-}
-
-/*
- * busy looping version for the last few usec
- */
-static void __usec_sleep(unsigned int usec)
-{
- struct timeval start;
-
- gettimeofday(&start, NULL);
- while (utime_since_now(&start) < usec)
- nop;
-}
-
-void usec_sleep(struct thread_data *td, unsigned long usec)
-{
- struct timespec req, rem;
-
- req.tv_sec = usec / 1000000;
- req.tv_nsec = usec * 1000 - req.tv_sec * 1000000;
-
- do {
- if (usec < 5000) {
- __usec_sleep(usec);
- break;
- }
-
- rem.tv_sec = rem.tv_nsec = 0;
- if (nanosleep(&req, &rem) < 0)
- break;
-
- if ((rem.tv_sec + rem.tv_nsec) == 0)
- break;
-
- req.tv_nsec = rem.tv_nsec;
- req.tv_sec = rem.tv_sec;
-
- usec = rem.tv_sec * 1000000 + rem.tv_nsec / 1000;
- } while (!td->terminate);
-}
-
-void rate_throttle(struct thread_data *td, unsigned long time_spent,
- unsigned int bytes)
-{
- unsigned long usec_cycle;
-
- if (!td->rate)
- return;
-
- usec_cycle = td->rate_usec_cycle * (bytes / td->min_bs);
-
- if (time_spent < usec_cycle) {
- unsigned long s = usec_cycle - time_spent;
-
- td->rate_pending_usleep += s;
- if (td->rate_pending_usleep >= 100000) {
- usec_sleep(td, td->rate_pending_usleep);
- td->rate_pending_usleep = 0;
- }
- } else {
- long overtime = time_spent - usec_cycle;
-
- td->rate_pending_usleep -= overtime;
- }
-}
+++ /dev/null
-#ifndef FIO_TIME_H
-#define FIO_TIME_H
-
-extern unsigned long utime_since(struct timeval *, struct timeval *);
-extern unsigned long mtime_since(struct timeval *, struct timeval *);
-extern unsigned long mtime_since_now(struct timeval *);
-extern unsigned long time_since_now(struct timeval *);
-extern void usec_sleep(struct thread_data *, unsigned long);
-
-extern void rate_throttle(struct thread_data *, unsigned long, unsigned int);
-
-#endif
#include "fio.h"
#include "os.h"
-#include "fio-time.h"
-#include "fio-stat.h"
-#include "fio-log.h"
-
#define MASK (4095)
#define ALIGN(buf) (char *) (((unsigned long) (buf) + MASK) & ~(MASK))
#define min(a, b) ((a) < (b) ? (a) : (b))
#endif
+/*
+ * Log exports
+ */
+extern int read_iolog_get(struct thread_data *, struct io_u *);
+extern void write_iolog_put(struct thread_data *, struct io_u *);
+extern int init_iolog(struct thread_data *td);
+extern void log_io_piece(struct thread_data *, struct io_u *);
+extern void prune_io_piece_log(struct thread_data *);
+extern void write_iolog_close(struct thread_data *);
+
+/*
+ * Logging
+ */
+extern void add_clat_sample(struct thread_data *, int, unsigned long);
+extern void add_slat_sample(struct thread_data *, int, unsigned long);
+extern void add_bw_sample(struct thread_data *, int);
+extern void show_run_stats(void);
+extern void init_disk_util(struct thread_data *);
+extern void update_rusage_stat(struct thread_data *);
+extern void update_io_ticks(void);
+extern void disk_util_timer_arm(void);
+
+/*
+ * Time functions
+ */
+extern unsigned long utime_since(struct timeval *, struct timeval *);
+extern unsigned long mtime_since(struct timeval *, struct timeval *);
+extern unsigned long mtime_since_now(struct timeval *);
+extern unsigned long time_since_now(struct timeval *);
+extern void usec_sleep(struct thread_data *, unsigned long);
+extern void rate_throttle(struct thread_data *, unsigned long, unsigned int);
+
#endif
--- /dev/null
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <ctype.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/ipc.h>
+#include <sys/shm.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "fio.h"
+
+#define DEF_BS (4096)
+#define DEF_TIMEOUT (0)
+#define DEF_RATE_CYCLE (1000)
+#define DEF_ODIRECT (1)
+#define DEF_IO_ENGINE (FIO_SYNCIO)
+#define DEF_IO_ENGINE_NAME "sync"
+#define DEF_SEQUENTIAL (1)
+#define DEF_RAND_REPEAT (1)
+#define DEF_OVERWRITE (1)
+#define DEF_CREATE (1)
+#define DEF_INVALIDATE (1)
+#define DEF_SYNCIO (0)
+#define DEF_RANDSEED (0xb1899bedUL)
+#define DEF_BWAVGTIME (500)
+#define DEF_CREATE_SER (1)
+#define DEF_CREATE_FSYNC (1)
+#define DEF_LOOPS (1)
+#define DEF_VERIFY (0)
+#define DEF_STONEWALL (0)
+#define DEF_NUMJOBS (1)
+#define DEF_USE_THREAD (0)
+#define DEF_FILE_SIZE (1024 * 1024 * 1024UL)
+#define DEF_ZONE_SIZE (0)
+#define DEF_ZONE_SKIP (0)
+#define DEF_RWMIX_CYCLE (500)
+#define DEF_RWMIX_READ (50)
+#define DEF_NICE (0)
+
+static char fio_version_string[] = "fio 1.4";
+
+static int repeatable = DEF_RAND_REPEAT;
+static char *ini_file;
+static int max_jobs = MAX_JOBS;
+
+struct thread_data def_thread;
+struct thread_data *threads = NULL;
+
+int rate_quit = 0;
+int write_lat_log = 0;
+int write_bw_log = 0;
+int exitall_on_terminate = 0;
+unsigned long long mlock_size = 0;
+
+static int setup_rate(struct thread_data *td)
+{
+ int nr_reads_per_sec;
+
+ if (!td->rate)
+ return 0;
+
+ if (td->rate < td->ratemin) {
+ fprintf(stderr, "min rate larger than nominal rate\n");
+ return -1;
+ }
+
+ nr_reads_per_sec = (td->rate * 1024) / td->min_bs;
+ td->rate_usec_cycle = 1000000 / nr_reads_per_sec;
+ td->rate_pending_usleep = 0;
+ return 0;
+}
+
+static void setup_log(struct io_log **log)
+{
+ struct io_log *l = malloc(sizeof(*l));
+
+ l->nr_samples = 0;
+ l->max_samples = 1024;
+ l->log = malloc(l->max_samples * sizeof(struct io_sample));
+ *log = l;
+}
+
+void finish_log(struct thread_data *td, struct io_log *log, const char *name)
+{
+ char file_name[256];
+ FILE *f;
+ unsigned int i;
+
+ snprintf(file_name, 200, "client%d_%s.log", td->thread_number, name);
+ f = fopen(file_name, "w");
+ if (!f) {
+ perror("fopen log");
+ return;
+ }
+
+ for (i = 0; i < log->nr_samples; i++)
+ fprintf(f, "%lu, %lu, %u\n", log->log[i].time, log->log[i].val, log->log[i].ddir);
+
+ fclose(f);
+ free(log->log);
+ free(log);
+}
+
+static struct thread_data *get_new_job(int global, struct thread_data *parent)
+{
+ struct thread_data *td;
+
+ if (global)
+ return &def_thread;
+ if (thread_number >= max_jobs)
+ return NULL;
+
+ td = &threads[thread_number++];
+ if (parent)
+ *td = *parent;
+ else
+ memset(td, 0, sizeof(*td));
+
+ td->fd = -1;
+ td->thread_number = thread_number;
+
+ td->ddir = parent->ddir;
+ td->ioprio = parent->ioprio;
+ td->sequential = parent->sequential;
+ td->bs = parent->bs;
+ td->min_bs = parent->min_bs;
+ td->max_bs = parent->max_bs;
+ td->odirect = parent->odirect;
+ td->thinktime = parent->thinktime;
+ td->fsync_blocks = parent->fsync_blocks;
+ td->start_delay = parent->start_delay;
+ td->timeout = parent->timeout;
+ td->io_engine = parent->io_engine;
+ td->create_file = parent->create_file;
+ td->overwrite = parent->overwrite;
+ td->invalidate_cache = parent->invalidate_cache;
+ td->file_size = parent->file_size;
+ td->file_offset = parent->file_offset;
+ td->zone_size = parent->zone_size;
+ td->zone_skip = parent->zone_skip;
+ td->rate = parent->rate;
+ td->ratemin = parent->ratemin;
+ td->ratecycle = parent->ratecycle;
+ td->iodepth = parent->iodepth;
+ td->sync_io = parent->sync_io;
+ td->mem_type = parent->mem_type;
+ td->bw_avg_time = parent->bw_avg_time;
+ td->create_serialize = parent->create_serialize;
+ td->create_fsync = parent->create_fsync;
+ td->loops = parent->loops;
+ td->verify = parent->verify;
+ td->stonewall = parent->stonewall;
+ td->numjobs = parent->numjobs;
+ td->use_thread = parent->use_thread;
+ td->do_disk_util = parent->do_disk_util;
+ memcpy(&td->cpumask, &parent->cpumask, sizeof(td->cpumask));
+ strcpy(td->io_engine_name, parent->io_engine_name);
+
+ return td;
+}
+
+static void put_job(struct thread_data *td)
+{
+ memset(&threads[td->thread_number - 1], 0, sizeof(*td));
+ thread_number--;
+}
+
+static int add_job(struct thread_data *td, const char *jobname, int job_add_num)
+{
+ char *ddir_str[] = { "read", "write", "randread", "randwrite",
+ "rw", NULL, "randrw" };
+ struct stat sb;
+ int numjobs, ddir;
+
+#ifndef FIO_HAVE_LIBAIO
+ if (td->io_engine == FIO_LIBAIO) {
+ fprintf(stderr, "Linux libaio not available\n");
+ return 1;
+ }
+#endif
+#ifndef FIO_HAVE_POSIXAIO
+ if (td->io_engine == FIO_POSIXAIO) {
+ fprintf(stderr, "posix aio not available\n");
+ return 1;
+ }
+#endif
+
+ /*
+ * the def_thread is just for options, it's not a real job
+ */
+ if (td == &def_thread)
+ return 0;
+
+ if (td->io_engine & FIO_SYNCIO)
+ td->iodepth = 1;
+ else {
+ if (!td->iodepth)
+ td->iodepth = 1;
+ }
+
+ /*
+ * only really works for sequential io for now
+ */
+ if (td->zone_size && !td->sequential)
+ td->zone_size = 0;
+
+ td->filetype = FIO_TYPE_FILE;
+ if (!stat(jobname, &sb)) {
+ if (S_ISBLK(sb.st_mode))
+ td->filetype = FIO_TYPE_BD;
+ else if (S_ISCHR(sb.st_mode))
+ td->filetype = FIO_TYPE_CHAR;
+ }
+
+ if (td->filetype == FIO_TYPE_FILE) {
+ if (td->directory && td->directory[0] != '\0')
+ sprintf(td->file_name, "%s/%s.%d", td->directory, jobname, td->jobnum);
+ else
+ sprintf(td->file_name, "%s.%d", jobname, td->jobnum);
+ } else
+ strncpy(td->file_name, jobname, sizeof(td->file_name) - 1);
+
+ sem_init(&td->mutex, 0, 0);
+
+ td->clat_stat[0].min_val = td->clat_stat[1].min_val = ULONG_MAX;
+ td->slat_stat[0].min_val = td->slat_stat[1].min_val = ULONG_MAX;
+ td->bw_stat[0].min_val = td->bw_stat[1].min_val = ULONG_MAX;
+
+ if (td->min_bs == -1U)
+ td->min_bs = td->bs;
+ if (td->max_bs == -1U)
+ td->max_bs = td->bs;
+ if (td_read(td) && !td_rw(td))
+ td->verify = 0;
+
+ if (td->stonewall && td->thread_number > 1)
+ groupid++;
+
+ td->groupid = groupid;
+
+ if (setup_rate(td))
+ goto err;
+
+ if (write_lat_log) {
+ setup_log(&td->slat_log);
+ setup_log(&td->clat_log);
+ }
+ if (write_bw_log)
+ setup_log(&td->bw_log);
+
+ ddir = td->ddir + (!td->sequential << 1) + (td->iomix << 2);
+
+ if (!job_add_num)
+ printf("Client%d (g=%d): rw=%s, odir=%d, bs=%d-%d, rate=%d, ioengine=%s, iodepth=%d\n", td->thread_number, td->groupid, ddir_str[ddir], td->odirect, td->min_bs, td->max_bs, td->rate, td->io_engine_name, td->iodepth);
+ else if (job_add_num == 1)
+ printf("...\n");
+
+ /*
+ * recurse add identical jobs, clear numjobs and stonewall options
+ * as they don't apply to sub-jobs
+ */
+ numjobs = td->numjobs;
+ while (--numjobs) {
+ struct thread_data *td_new = get_new_job(0, td);
+
+ if (!td_new)
+ goto err;
+
+ td_new->numjobs = 1;
+ td_new->stonewall = 0;
+ td_new->jobnum = numjobs;
+ job_add_num = numjobs - 1;
+
+ if (add_job(td_new, jobname, job_add_num))
+ goto err;
+ }
+ return 0;
+err:
+ put_job(td);
+ return -1;
+}
+
+int init_random_state(struct thread_data *td)
+{
+ unsigned long seeds[4];
+ int fd, num_maps, blocks;
+
+ fd = open("/dev/urandom", O_RDONLY);
+ if (fd == -1) {
+ td_verror(td, errno);
+ return 1;
+ }
+
+ if (read(fd, seeds, sizeof(seeds)) < (int) sizeof(seeds)) {
+ td_verror(td, EIO);
+ close(fd);
+ return 1;
+ }
+
+ close(fd);
+
+ srand48_r(seeds[0], &td->bsrange_state);
+ srand48_r(seeds[1], &td->verify_state);
+ srand48_r(seeds[2], &td->rwmix_state);
+
+ if (td->sequential)
+ return 0;
+
+ if (repeatable)
+ seeds[3] = DEF_RANDSEED;
+
+ blocks = (td->io_size + td->min_bs - 1) / td->min_bs;
+ num_maps = blocks / BLOCKS_PER_MAP;
+ td->file_map = malloc(num_maps * sizeof(long));
+ td->num_maps = num_maps;
+ memset(td->file_map, 0, num_maps * sizeof(long));
+
+ srand48_r(seeds[3], &td->random_state);
+ return 0;
+}
+
+static void fill_cpu_mask(os_cpu_mask_t cpumask, int cpu)
+{
+#ifdef FIO_HAVE_CPU_AFFINITY
+ unsigned int i;
+
+ CPU_ZERO(&cpumask);
+
+ for (i = 0; i < sizeof(int) * 8; i++) {
+ if ((1 << i) & cpu)
+ CPU_SET(i, &cpumask);
+ }
+#endif
+}
+
+static unsigned long get_mult(char c)
+{
+ switch (c) {
+ case 'k':
+ case 'K':
+ return 1024;
+ case 'm':
+ case 'M':
+ return 1024 * 1024;
+ case 'g':
+ case 'G':
+ return 1024 * 1024 * 1024;
+ default:
+ return 1;
+ }
+}
+
+/*
+ * convert string after '=' into decimal value, noting any size suffix
+ */
+static int str_cnv(char *p, unsigned long long *val)
+{
+ char *str;
+ int len;
+
+ str = strchr(p, '=');
+ if (!str)
+ return 1;
+
+ str++;
+ len = strlen(str);
+
+ *val = strtoul(str, NULL, 10);
+ if (*val == ULONG_MAX && errno == ERANGE)
+ return 1;
+
+ *val *= get_mult(str[len - 1]);
+ return 0;
+}
+
+static int check_strcnv(char *p, char *name, unsigned long long *val)
+{
+ if (strncmp(p, name, strlen(name) - 1))
+ return 1;
+
+ return str_cnv(p, val);
+}
+
+static void strip_blank_front(char **p)
+{
+ char *s = *p;
+
+ while (isspace(*s))
+ s++;
+}
+
+static void strip_blank_end(char *p)
+{
+ char *s = p + strlen(p) - 1;
+
+ while (isspace(*s) || iscntrl(*s))
+ s--;
+
+ *(s + 1) = '\0';
+}
+
+typedef int (str_cb_fn)(struct thread_data *, char *);
+
+static int check_str(char *p, char *name, str_cb_fn *cb, struct thread_data *td)
+{
+ char *s;
+
+ if (strncmp(p, name, strlen(name)))
+ return 1;
+
+ s = strstr(p, name);
+ if (!s)
+ return 1;
+
+ s = strchr(s, '=');
+ if (!s)
+ return 1;
+
+ s++;
+ strip_blank_front(&s);
+ return cb(td, s);
+}
+
+static int check_strstore(char *p, char *name, char *dest)
+{
+ char *s;
+
+ if (strncmp(p, name, strlen(name)))
+ return 1;
+
+ s = strstr(p, name);
+ if (!s)
+ return 1;
+
+ s = strchr(p, '=');
+ if (!s)
+ return 1;
+
+ s++;
+ strip_blank_front(&s);
+
+ strcpy(dest, s);
+ return 0;
+}
+
+static int __check_range(char *str, unsigned long *val)
+{
+ char suffix;
+
+ if (sscanf(str, "%lu%c", val, &suffix) == 2) {
+ *val *= get_mult(suffix);
+ return 0;
+ }
+
+ if (sscanf(str, "%lu", val) == 1)
+ return 0;
+
+ return 1;
+}
+
+static int check_range(char *p, char *name, unsigned long *s, unsigned long *e)
+{
+ char option[128];
+ char *str, *p1, *p2;
+
+ if (strncmp(p, name, strlen(name)))
+ return 1;
+
+ strcpy(option, p);
+ p = option;
+
+ str = strstr(p, name);
+ if (!str)
+ return 1;
+
+ p += strlen(name);
+
+ str = strchr(p, '=');
+ if (!str)
+ return 1;
+
+ /*
+ * 'p' now holds whatever is after the '=' sign
+ */
+ p1 = str + 1;
+
+ /*
+ * terminate p1 at the '-' sign
+ */
+ p = strchr(p1, '-');
+ if (!p)
+ return 1;
+
+ p2 = p + 1;
+ *p = '\0';
+
+ if (!__check_range(p1, s) && !__check_range(p2, e))
+ return 0;
+
+ return 1;
+}
+
+static int check_int(char *p, char *name, unsigned int *val)
+{
+ char *str;
+
+ if (strncmp(p, name, strlen(name)))
+ return 1;
+
+ str = strstr(p, name);
+ if (!str)
+ return 1;
+
+ str = strchr(p, '=');
+ if (!str)
+ return 1;
+
+ str++;
+
+ if (sscanf(str, "%u", val) == 1)
+ return 0;
+
+ return 1;
+}
+
+static int check_strset(char *p, char *name)
+{
+ return strncmp(p, name, strlen(name));
+}
+
+static int is_empty_or_comment(char *line)
+{
+ unsigned int i;
+
+ for (i = 0; i < strlen(line); i++) {
+ if (line[i] == ';')
+ return 1;
+ if (!isspace(line[i]) && !iscntrl(line[i]))
+ return 0;
+ }
+
+ return 1;
+}
+
+static int str_rw_cb(struct thread_data *td, char *mem)
+{
+ if (!strncmp(mem, "read", 4) || !strncmp(mem, "0", 1)) {
+ td->ddir = DDIR_READ;
+ td->sequential = 1;
+ return 0;
+ } else if (!strncmp(mem, "randread", 8)) {
+ td->ddir = DDIR_READ;
+ td->sequential = 0;
+ return 0;
+ } else if (!strncmp(mem, "write", 5) || !strncmp(mem, "1", 1)) {
+ td->ddir = DDIR_WRITE;
+ td->sequential = 1;
+ return 0;
+ } else if (!strncmp(mem, "randwrite", 9)) {
+ td->ddir = DDIR_WRITE;
+ td->sequential = 0;
+ return 0;
+ } else if (!strncmp(mem, "rw", 2)) {
+ td->ddir = 0;
+ td->iomix = 1;
+ td->sequential = 1;
+ return 0;
+ } else if (!strncmp(mem, "randrw", 6)) {
+ td->ddir = 0;
+ td->iomix = 1;
+ td->sequential = 0;
+ return 0;
+ }
+
+ fprintf(stderr, "bad data direction: %s\n", mem);
+ return 1;
+}
+
+static int str_verify_cb(struct thread_data *td, char *mem)
+{
+ if (!strncmp(mem, "0", 1)) {
+ td->verify = VERIFY_NONE;
+ return 0;
+ } else if (!strncmp(mem, "md5", 3) || !strncmp(mem, "1", 1)) {
+ td->verify = VERIFY_MD5;
+ return 0;
+ } else if (!strncmp(mem, "crc32", 5)) {
+ td->verify = VERIFY_CRC32;
+ return 0;
+ }
+
+ fprintf(stderr, "bad verify type: %s\n", mem);
+ return 1;
+}
+
+static int str_mem_cb(struct thread_data *td, char *mem)
+{
+ if (!strncmp(mem, "malloc", 6)) {
+ td->mem_type = MEM_MALLOC;
+ return 0;
+ } else if (!strncmp(mem, "shm", 3)) {
+ td->mem_type = MEM_SHM;
+ return 0;
+ } else if (!strncmp(mem, "mmap", 4)) {
+ td->mem_type = MEM_MMAP;
+ return 0;
+ }
+
+ fprintf(stderr, "bad mem type: %s\n", mem);
+ return 1;
+}
+
+static int str_ioengine_cb(struct thread_data *td, char *str)
+{
+ if (!strncmp(str, "linuxaio", 8) || !strncmp(str, "aio", 3) ||
+ !strncmp(str, "libaio", 6)) {
+ strcpy(td->io_engine_name, "libaio");
+ td->io_engine = FIO_LIBAIO;
+ return 0;
+ } else if (!strncmp(str, "posixaio", 8)) {
+ strcpy(td->io_engine_name, "posixaio");
+ td->io_engine = FIO_POSIXAIO;
+ return 0;
+ } else if (!strncmp(str, "sync", 4)) {
+ strcpy(td->io_engine_name, "sync");
+ td->io_engine = FIO_SYNCIO;
+ return 0;
+ } else if (!strncmp(str, "mmap", 4)) {
+ strcpy(td->io_engine_name, "mmap");
+ td->io_engine = FIO_MMAPIO;
+ return 0;
+ } else if (!strncmp(str, "sgio", 4)) {
+ strcpy(td->io_engine_name, "sgio");
+ td->io_engine = FIO_SGIO;
+ return 0;
+ } else if (!strncmp(str, "splice", 6)) {
+ strcpy(td->io_engine_name, "splice");
+ td->io_engine = FIO_SPLICEIO;
+ return 0;
+ }
+
+ fprintf(stderr, "bad ioengine type: %s\n", str);
+ return 1;
+}
+
+static int str_iolog_cb(struct thread_data *td, char *file)
+{
+ td->iolog_file = strdup(file);
+ return 0;
+}
+
+static int str_prerun_cb(struct thread_data *td, char *file)
+{
+ td->exec_prerun = strdup(file);
+ return 0;
+}
+
+static int str_postrun_cb(struct thread_data *td, char *file)
+{
+ td->exec_postrun = strdup(file);
+ return 0;
+}
+
+static int str_iosched_cb(struct thread_data *td, char *file)
+{
+ td->ioscheduler = strdup(file);
+ return 0;
+}
+
+int parse_jobs_ini(char *file)
+{
+ unsigned int prioclass, prio, cpu, global, il;
+ unsigned long long ull;
+ unsigned long ul1, ul2;
+ struct thread_data *td;
+ char *string, *name, *tmpbuf;
+ fpos_t off;
+ FILE *f;
+ char *p;
+
+ f = fopen(file, "r");
+ if (!f) {
+ perror("fopen job file");
+ return 1;
+ }
+
+ string = malloc(4096);
+ name = malloc(256);
+ tmpbuf = malloc(4096);
+
+ while ((p = fgets(string, 4096, f)) != NULL) {
+ if (is_empty_or_comment(p))
+ continue;
+ if (sscanf(p, "[%s]", name) != 1)
+ continue;
+
+ global = !strncmp(name, "global", 6);
+
+ name[strlen(name) - 1] = '\0';
+
+ td = get_new_job(global, &def_thread);
+ if (!td)
+ return 1;
+
+ fgetpos(f, &off);
+ while ((p = fgets(string, 4096, f)) != NULL) {
+ if (is_empty_or_comment(p))
+ continue;
+ if (strstr(p, "["))
+ break;
+ strip_blank_front(&p);
+ strip_blank_end(p);
+
+ if (!check_int(p, "prio", &prio)) {
+#ifndef FIO_HAVE_IOPRIO
+ fprintf(stderr, "io priorities not available\n");
+ return 1;
+#endif
+ td->ioprio |= prio;
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_int(p, "prioclass", &prioclass)) {
+#ifndef FIO_HAVE_IOPRIO
+ fprintf(stderr, "io priorities not available\n");
+ return 1;
+#endif
+ td->ioprio |= prioclass << IOPRIO_CLASS_SHIFT;
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_int(p, "direct", &td->odirect)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_int(p, "rate", &td->rate)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_int(p, "ratemin", &td->ratemin)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_int(p, "ratecycle", &td->ratecycle)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_int(p, "thinktime", &td->thinktime)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_int(p, "cpumask", &cpu)) {
+#ifndef FIO_HAVE_CPU_AFFINITY
+ fprintf(stderr, "cpu affinity not available\n");
+ return 1;
+#endif
+ fill_cpu_mask(td->cpumask, cpu);
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_int(p, "fsync", &td->fsync_blocks)) {
+ fgetpos(f, &off);
+ td->end_fsync = 1;
+ continue;
+ }
+ if (!check_int(p, "startdelay", &td->start_delay)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_int(p, "timeout", &td->timeout)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_int(p, "invalidate",&td->invalidate_cache)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_int(p, "iodepth", &td->iodepth)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_int(p, "sync", &td->sync_io)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_int(p, "bwavgtime", &td->bw_avg_time)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_int(p, "create_serialize", &td->create_serialize)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_int(p, "create_fsync", &td->create_fsync)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_int(p, "end_fsync", &td->end_fsync)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_int(p, "loops", &td->loops)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_int(p, "numjobs", &td->numjobs)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_int(p, "overwrite", &td->overwrite)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_int(p, "rwmixcycle", &td->rwmixcycle)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_int(p, "rwmixread", &il)) {
+ if (il > 100)
+ il = 100;
+ td->rwmixread = il;
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_int(p, "rwmixwrite", &il)) {
+ if (il > 100)
+ il = 100;
+ td->rwmixread = 100 - il;
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_int(p, "nice", &td->nice)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_range(p, "bsrange", &ul1, &ul2)) {
+ if (ul1 > ul2) {
+ td->max_bs = ul1;
+ td->min_bs = ul2;
+ } else {
+ td->max_bs = ul2;
+ td->min_bs = ul1;
+ }
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_strcnv(p, "bs", &ull)) {
+ td->bs = ull;
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_strcnv(p, "size", &td->file_size)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_strcnv(p, "offset", &td->file_offset)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_strcnv(p, "zonesize", &td->zone_size)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_strcnv(p, "zoneskip", &td->zone_skip)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_strcnv(p, "lockmem", &mlock_size)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_strstore(p, "directory", tmpbuf)) {
+ td->directory = strdup(tmpbuf);
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_str(p, "mem", str_mem_cb, td)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_str(p, "verify", str_verify_cb, td)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_str(p, "rw", str_rw_cb, td)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_str(p, "ioengine", str_ioengine_cb, td)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_strset(p, "create")) {
+ td->create_file = 1;
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_strset(p, "exitall")) {
+ exitall_on_terminate = 1;
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_strset(p, "stonewall")) {
+ td->stonewall = 1;
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_strset(p, "thread")) {
+ td->use_thread = 1;
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_str(p, "iolog", str_iolog_cb, td)) {
+ td->read_iolog = 1;
+ td->write_iolog = 0;
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!td->read_iolog &&
+ !check_str(p, "write_iolog", str_iolog_cb, td)) {
+ td->write_iolog = 1;
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_str(p, "exec_prerun", str_prerun_cb, td)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_str(p, "exec_postrun", str_postrun_cb, td)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_str(p, "ioscheduler", str_iosched_cb, td)) {
+ fgetpos(f, &off);
+ continue;
+ }
+
+ printf("Client%d: bad option %s\n",td->thread_number,p);
+ return 1;
+ }
+ fsetpos(f, &off);
+
+ if (add_job(td, name, 0))
+ return 1;
+ }
+
+ free(string);
+ free(name);
+ free(tmpbuf);
+ fclose(f);
+ return 0;
+}
+
+static int fill_def_thread(void)
+{
+ memset(&def_thread, 0, sizeof(def_thread));
+
+ if (fio_getaffinity(getpid(), &def_thread.cpumask) == -1) {
+ perror("sched_getaffinity");
+ return 1;
+ }
+
+ /*
+ * fill globals
+ */
+ def_thread.ddir = DDIR_READ;
+ def_thread.iomix = 0;
+ def_thread.bs = DEF_BS;
+ def_thread.min_bs = -1;
+ def_thread.max_bs = -1;
+ def_thread.io_engine = DEF_IO_ENGINE;
+ strcpy(def_thread.io_engine_name, DEF_IO_ENGINE_NAME);
+ def_thread.odirect = DEF_ODIRECT;
+ def_thread.ratecycle = DEF_RATE_CYCLE;
+ def_thread.sequential = DEF_SEQUENTIAL;
+ def_thread.timeout = DEF_TIMEOUT;
+ def_thread.create_file = DEF_CREATE;
+ def_thread.overwrite = DEF_OVERWRITE;
+ def_thread.invalidate_cache = DEF_INVALIDATE;
+ def_thread.sync_io = DEF_SYNCIO;
+ def_thread.mem_type = MEM_MALLOC;
+ def_thread.bw_avg_time = DEF_BWAVGTIME;
+ def_thread.create_serialize = DEF_CREATE_SER;
+ def_thread.create_fsync = DEF_CREATE_FSYNC;
+ def_thread.loops = DEF_LOOPS;
+ def_thread.verify = DEF_VERIFY;
+ def_thread.stonewall = DEF_STONEWALL;
+ def_thread.numjobs = DEF_NUMJOBS;
+ def_thread.use_thread = DEF_USE_THREAD;
+ def_thread.rwmixcycle = DEF_RWMIX_CYCLE;
+ def_thread.rwmixread = DEF_RWMIX_READ;
+ def_thread.nice = DEF_NICE;
+#ifdef FIO_HAVE_DISK_UTIL
+ def_thread.do_disk_util = 1;
+#endif
+
+ return 0;
+}
+
+static void usage(char *name)
+{
+ printf("%s\n", fio_version_string);
+ printf("\t-s IO is sequential\n");
+ printf("\t-b Block size in KiB for each IO\n");
+ printf("\t-t Runtime in seconds\n");
+ printf("\t-R Exit all threads on failure to meet rate goal\n");
+ printf("\t-o Use O_DIRECT\n");
+ printf("\t-l Generate per-job latency logs\n");
+ printf("\t-w Generate per-job bandwidth logs\n");
+ printf("\t-f Job file (Required)\n");
+ printf("\t-v Print version info and exit\n");
+}
+
+static void parse_cmd_line(int argc, char *argv[])
+{
+ int c;
+
+ while ((c = getopt(argc, argv, "s:b:t:r:R:o:f:lwvh")) != EOF) {
+ switch (c) {
+ case 's':
+ def_thread.sequential = !!atoi(optarg);
+ break;
+ case 'b':
+ def_thread.bs = atoi(optarg);
+ def_thread.bs <<= 10;
+ if (!def_thread.bs) {
+ printf("bad block size\n");
+ def_thread.bs = DEF_BS;
+ }
+ break;
+ case 't':
+ def_thread.timeout = atoi(optarg);
+ break;
+ case 'r':
+ repeatable = !!atoi(optarg);
+ break;
+ case 'R':
+ rate_quit = !!atoi(optarg);
+ break;
+ case 'o':
+ def_thread.odirect = !!atoi(optarg);
+ break;
+ case 'f':
+ ini_file = strdup(optarg);
+ break;
+ case 'l':
+ write_lat_log = 1;
+ break;
+ case 'w':
+ write_bw_log = 1;
+ break;
+ case 'h':
+ usage(argv[0]);
+ exit(0);
+ case 'v':
+ printf("%s\n", fio_version_string);
+ exit(0);
+ }
+ }
+
+ if (!ini_file && argc > 1 && argv[argc - 1][0] != '-')
+ ini_file = strdup(argv[argc - 1]);
+}
+
+static void free_shm(void)
+{
+ struct shmid_ds sbuf;
+
+ if (threads) {
+ shmdt(threads);
+ threads = NULL;
+ shmctl(shm_id, IPC_RMID, &sbuf);
+ }
+}
+
+static int setup_thread_area(void)
+{
+ /*
+ * 1024 is too much on some machines, scale max_jobs if
+ * we get a failure that looks like too large a shm segment
+ */
+ do {
+ int s = max_jobs * sizeof(struct thread_data);
+
+ shm_id = shmget(0, s, IPC_CREAT | 0600);
+ if (shm_id != -1)
+ break;
+ if (errno != EINVAL) {
+ perror("shmget");
+ break;
+ }
+
+ max_jobs >>= 1;
+ } while (max_jobs);
+
+ if (shm_id == -1)
+ return 1;
+
+ threads = shmat(shm_id, NULL, 0);
+ if (threads == (void *) -1) {
+ perror("shmat");
+ return 1;
+ }
+
+ atexit(free_shm);
+ return 0;
+}
+
+int parse_options(int argc, char *argv[])
+{
+ if (setup_thread_area())
+ return 1;
+ if (fill_def_thread())
+ return 1;
+
+ parse_cmd_line(argc, argv);
+
+ if (!ini_file) {
+ printf("Need job file\n");
+ usage(argv[0]);
+ return 1;
+ }
+
+ if (parse_jobs_ini(ini_file)) {
+ usage(argv[0]);
+ return 1;
+ }
+
+ return 0;
+}
--- /dev/null
+/*
+ * The io parts of the fio tool, includes workers for sync and mmap'ed
+ * io, as well as both posix and linux libaio support.
+ *
+ * sync io is implemented on top of aio.
+ *
+ * This is not really specific to fio, if the get_io_u/put_io_u and
+ * structures was pulled into this as well it would be a perfectly
+ * generic io engine that could be used for other projects.
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <assert.h>
+#include <time.h>
+#include <sys/mman.h>
+#include <sys/poll.h>
+#include "fio.h"
+#include "os.h"
+
+#ifdef FIO_HAVE_LIBAIO
+
+#define ev_to_iou(ev) (struct io_u *) ((unsigned long) (ev)->obj)
+
+static int fio_io_sync(struct thread_data *td)
+{
+ return fsync(td->fd);
+}
+
+static int fill_timespec(struct timespec *ts)
+{
+#ifdef _POSIX_TIMERS
+ if (!clock_gettime(CLOCK_MONOTONIC, ts))
+ return 0;
+
+ perror("clock_gettime");
+#endif
+ return 1;
+}
+
+static unsigned long long ts_utime_since_now(struct timespec *t)
+{
+ long long sec, nsec;
+ struct timespec now;
+
+ if (fill_timespec(&now))
+ return 0;
+
+ sec = now.tv_sec - t->tv_sec;
+ nsec = now.tv_nsec - t->tv_nsec;
+ if (sec > 0 && nsec < 0) {
+ sec--;
+ nsec += 1000000000;
+ }
+
+ sec *= 1000000;
+ nsec /= 1000;
+ return sec + nsec;
+}
+
+struct libaio_data {
+ io_context_t aio_ctx;
+ struct io_event *aio_events;
+};
+
+static int fio_libaio_io_prep(struct thread_data *td, struct io_u *io_u)
+{
+ if (io_u->ddir == DDIR_READ)
+ io_prep_pread(&io_u->iocb, td->fd, io_u->buf, io_u->buflen, io_u->offset);
+ else
+ io_prep_pwrite(&io_u->iocb, td->fd, io_u->buf, io_u->buflen, io_u->offset);
+
+ return 0;
+}
+
+static struct io_u *fio_libaio_event(struct thread_data *td, int event)
+{
+ struct libaio_data *ld = td->io_data;
+
+ return ev_to_iou(ld->aio_events + event);
+}
+
+static int fio_libaio_getevents(struct thread_data *td, int min, int max,
+ struct timespec *t)
+{
+ struct libaio_data *ld = td->io_data;
+ int r;
+
+ do {
+ r = io_getevents(ld->aio_ctx, min, max, ld->aio_events, t);
+ if (r == -EAGAIN) {
+ usleep(100);
+ continue;
+ } else if (r == -EINTR)
+ continue;
+ else
+ break;
+ } while (1);
+
+ return r;
+}
+
+static int fio_libaio_queue(struct thread_data *td, struct io_u *io_u)
+{
+ struct libaio_data *ld = td->io_data;
+ struct iocb *iocb = &io_u->iocb;
+ int ret;
+
+ do {
+ ret = io_submit(ld->aio_ctx, 1, &iocb);
+ if (ret == 1)
+ return 0;
+ else if (ret == -EAGAIN)
+ usleep(100);
+ else if (ret == -EINTR)
+ continue;
+ else
+ break;
+ } while (1);
+
+ return ret;
+
+}
+
+static int fio_libaio_cancel(struct thread_data *td, struct io_u *io_u)
+{
+ struct libaio_data *ld = td->io_data;
+
+ return io_cancel(ld->aio_ctx, &io_u->iocb, ld->aio_events);
+}
+
+static void fio_libaio_cleanup(struct thread_data *td)
+{
+ struct libaio_data *ld = td->io_data;
+
+ if (ld) {
+ io_destroy(ld->aio_ctx);
+ if (ld->aio_events)
+ free(ld->aio_events);
+
+ free(ld);
+ td->io_data = NULL;
+ }
+}
+
+int fio_libaio_init(struct thread_data *td)
+{
+ struct libaio_data *ld = malloc(sizeof(*ld));
+
+ memset(ld, 0, sizeof(*ld));
+ if (io_queue_init(td->iodepth, &ld->aio_ctx)) {
+ td_verror(td, errno);
+ return 1;
+ }
+
+ td->io_prep = fio_libaio_io_prep;
+ td->io_queue = fio_libaio_queue;
+ td->io_getevents = fio_libaio_getevents;
+ td->io_event = fio_libaio_event;
+ td->io_cancel = fio_libaio_cancel;
+ td->io_cleanup = fio_libaio_cleanup;
+ td->io_sync = fio_io_sync;
+
+ ld->aio_events = malloc(td->iodepth * sizeof(struct io_event));
+ td->io_data = ld;
+ return 0;
+}
+
+#else /* FIO_HAVE_LIBAIO */
+
+int fio_libaio_init(struct thread_data *td)
+{
+ return EINVAL;
+}
+
+#endif /* FIO_HAVE_LIBAIO */
+
+#ifdef FIO_HAVE_POSIXAIO
+
+struct posixaio_data {
+ struct io_u **aio_events;
+};
+
+static int fio_posixaio_cancel(struct thread_data *td, struct io_u *io_u)
+{
+ int r = aio_cancel(td->fd, &io_u->aiocb);
+
+ if (r == 1 || r == AIO_CANCELED)
+ return 0;
+
+ return 1;
+}
+
+static int fio_posixaio_prep(struct thread_data *td, struct io_u *io_u)
+{
+ struct aiocb *aiocb = &io_u->aiocb;
+
+ aiocb->aio_fildes = td->fd;
+ aiocb->aio_buf = io_u->buf;
+ aiocb->aio_nbytes = io_u->buflen;
+ aiocb->aio_offset = io_u->offset;
+
+ io_u->seen = 0;
+ return 0;
+}
+
+static int fio_posixaio_getevents(struct thread_data *td, int min, int max,
+ struct timespec *t)
+{
+ struct posixaio_data *pd = td->io_data;
+ struct list_head *entry;
+ struct timespec start;
+ int r, have_timeout = 0;
+
+ if (t && !fill_timespec(&start))
+ have_timeout = 1;
+
+ r = 0;
+restart:
+ list_for_each(entry, &td->io_u_busylist) {
+ struct io_u *io_u = list_entry(entry, struct io_u, list);
+ int err;
+
+ if (io_u->seen)
+ continue;
+
+ err = aio_error(&io_u->aiocb);
+ switch (err) {
+ default:
+ io_u->error = err;
+ case ECANCELED:
+ case 0:
+ pd->aio_events[r++] = io_u;
+ io_u->seen = 1;
+ break;
+ case EINPROGRESS:
+ break;
+ }
+
+ if (r >= max)
+ break;
+ }
+
+ if (r >= min)
+ return r;
+
+ if (have_timeout) {
+ unsigned long long usec;
+
+ usec = (t->tv_sec * 1000000) + (t->tv_nsec / 1000);
+ if (ts_utime_since_now(&start) > usec)
+ return r;
+ }
+
+ /*
+ * hrmpf, we need to wait for more. we should use aio_suspend, for
+ * now just sleep a little and recheck status of busy-and-not-seen
+ */
+ usleep(1000);
+ goto restart;
+}
+
+static struct io_u *fio_posixaio_event(struct thread_data *td, int event)
+{
+ struct posixaio_data *pd = td->io_data;
+
+ return pd->aio_events[event];
+}
+
+static int fio_posixaio_queue(struct thread_data *td, struct io_u *io_u)
+{
+ struct aiocb *aiocb = &io_u->aiocb;
+ int ret;
+
+ if (io_u->ddir == DDIR_READ)
+ ret = aio_read(aiocb);
+ else
+ ret = aio_write(aiocb);
+
+ if (ret)
+ io_u->error = errno;
+
+ return io_u->error;
+}
+
+static void fio_posixaio_cleanup(struct thread_data *td)
+{
+ struct posixaio_data *pd = td->io_data;
+
+ if (pd) {
+ free(pd->aio_events);
+ free(pd);
+ td->io_data = NULL;
+ }
+}
+
+int fio_posixaio_init(struct thread_data *td)
+{
+ struct posixaio_data *pd = malloc(sizeof(*pd));
+
+ pd->aio_events = malloc(td->iodepth * sizeof(struct io_u *));
+
+ td->io_prep = fio_posixaio_prep;
+ td->io_queue = fio_posixaio_queue;
+ td->io_getevents = fio_posixaio_getevents;
+ td->io_event = fio_posixaio_event;
+ td->io_cancel = fio_posixaio_cancel;
+ td->io_cleanup = fio_posixaio_cleanup;
+ td->io_sync = fio_io_sync;
+
+ td->io_data = pd;
+ return 0;
+}
+
+#else /* FIO_HAVE_POSIXAIO */
+
+int fio_posixaio_init(struct thread_data *td)
+{
+ return EINVAL;
+}
+
+#endif /* FIO_HAVE_POSIXAIO */
+
+struct syncio_data {
+ struct io_u *last_io_u;
+};
+
+static int fio_syncio_getevents(struct thread_data *td, int min, int max,
+ struct timespec *t)
+{
+ assert(max <= 1);
+
+ /*
+ * we can only have one finished io_u for sync io, since the depth
+ * is always 1
+ */
+ if (list_empty(&td->io_u_busylist))
+ return 0;
+
+ return 1;
+}
+
+static struct io_u *fio_syncio_event(struct thread_data *td, int event)
+{
+ struct syncio_data *sd = td->io_data;
+
+ assert(event == 0);
+
+ return sd->last_io_u;
+}
+
+static int fio_syncio_prep(struct thread_data *td, struct io_u *io_u)
+{
+ if (lseek(td->fd, io_u->offset, SEEK_SET) == -1) {
+ td_verror(td, errno);
+ return 1;
+ }
+
+ return 0;
+}
+
+static int fio_syncio_queue(struct thread_data *td, struct io_u *io_u)
+{
+ struct syncio_data *sd = td->io_data;
+ int ret;
+
+ if (io_u->ddir == DDIR_READ)
+ ret = read(td->fd, io_u->buf, io_u->buflen);
+ else
+ ret = write(td->fd, io_u->buf, io_u->buflen);
+
+ if ((unsigned int) ret != io_u->buflen) {
+ if (ret > 0) {
+ io_u->resid = io_u->buflen - ret;
+ io_u->error = ENODATA;
+ } else
+ io_u->error = errno;
+ }
+
+ if (!io_u->error)
+ sd->last_io_u = io_u;
+
+ return io_u->error;
+}
+
+static void fio_syncio_cleanup(struct thread_data *td)
+{
+ if (td->io_data) {
+ free(td->io_data);
+ td->io_data = NULL;
+ }
+}
+
+int fio_syncio_init(struct thread_data *td)
+{
+ struct syncio_data *sd = malloc(sizeof(*sd));
+
+ td->io_prep = fio_syncio_prep;
+ td->io_queue = fio_syncio_queue;
+ td->io_getevents = fio_syncio_getevents;
+ td->io_event = fio_syncio_event;
+ td->io_cancel = NULL;
+ td->io_cleanup = fio_syncio_cleanup;
+ td->io_sync = fio_io_sync;
+
+ sd->last_io_u = NULL;
+ td->io_data = sd;
+ return 0;
+}
+
+static int fio_mmapio_queue(struct thread_data *td, struct io_u *io_u)
+{
+ unsigned long long real_off = io_u->offset - td->file_offset;
+ struct syncio_data *sd = td->io_data;
+
+ if (io_u->ddir == DDIR_READ)
+ memcpy(io_u->buf, td->mmap + real_off, io_u->buflen);
+ else
+ memcpy(td->mmap + real_off, io_u->buf, io_u->buflen);
+
+ /*
+ * not really direct, but should drop the pages from the cache
+ */
+ if (td->odirect) {
+ if (msync(td->mmap + real_off, io_u->buflen, MS_SYNC) < 0)
+ io_u->error = errno;
+ if (madvise(td->mmap + real_off, io_u->buflen, MADV_DONTNEED) < 0)
+ io_u->error = errno;
+ }
+
+ if (!io_u->error)
+ sd->last_io_u = io_u;
+
+ return io_u->error;
+}
+
+static int fio_mmapio_sync(struct thread_data *td)
+{
+ return msync(td->mmap, td->file_size, MS_SYNC);
+}
+
+int fio_mmapio_init(struct thread_data *td)
+{
+ struct syncio_data *sd = malloc(sizeof(*sd));
+
+ td->io_prep = NULL;
+ td->io_queue = fio_mmapio_queue;
+ td->io_getevents = fio_syncio_getevents;
+ td->io_event = fio_syncio_event;
+ td->io_cancel = NULL;
+ td->io_cleanup = fio_syncio_cleanup;
+ td->io_sync = fio_mmapio_sync;
+
+ sd->last_io_u = NULL;
+ td->io_data = sd;
+ return 0;
+}
+
+#ifdef FIO_HAVE_SGIO
+
+struct sgio_cmd {
+ unsigned char cdb[10];
+ int nr;
+};
+
+struct sgio_data {
+ struct sgio_cmd *cmds;
+ struct io_u **events;
+ unsigned int bs;
+};
+
+static void sgio_hdr_init(struct sgio_data *sd, struct sg_io_hdr *hdr,
+ struct io_u *io_u, int fs)
+{
+ struct sgio_cmd *sc = &sd->cmds[io_u->index];
+
+ memset(hdr, 0, sizeof(*hdr));
+ memset(sc->cdb, 0, sizeof(sc->cdb));
+
+ hdr->interface_id = 'S';
+ hdr->cmdp = sc->cdb;
+ hdr->cmd_len = sizeof(sc->cdb);
+ hdr->pack_id = io_u->index;
+ hdr->usr_ptr = io_u;
+
+ if (fs) {
+ hdr->dxferp = io_u->buf;
+ hdr->dxfer_len = io_u->buflen;
+ }
+}
+
+static int fio_sgio_getevents(struct thread_data *td, int min, int max,
+ struct timespec *t)
+{
+ struct sgio_data *sd = td->io_data;
+ struct pollfd pfd = { .fd = td->fd, .events = POLLIN };
+ void *buf = malloc(max * sizeof(struct sg_io_hdr));
+ int left = max, ret, events, i, r = 0, fl = 0;
+
+ /*
+ * don't block for !events
+ */
+ if (!min) {
+ fl = fcntl(td->fd, F_GETFL);
+ fcntl(td->fd, F_SETFL, fl | O_NONBLOCK);
+ }
+
+ while (left) {
+ do {
+ if (!min)
+ break;
+ poll(&pfd, 1, -1);
+ if (pfd.revents & POLLIN)
+ break;
+ } while (1);
+
+ ret = read(td->fd, buf, left * sizeof(struct sg_io_hdr));
+ if (ret < 0) {
+ if (errno == EAGAIN)
+ break;
+ td_verror(td, errno);
+ r = -1;
+ break;
+ } else if (!ret)
+ break;
+
+ events = ret / sizeof(struct sg_io_hdr);
+ left -= events;
+ r += events;
+
+ for (i = 0; i < events; i++) {
+ struct sg_io_hdr *hdr = (struct sg_io_hdr *) buf + i;
+
+ sd->events[i] = hdr->usr_ptr;
+ }
+ }
+
+ if (!min)
+ fcntl(td->fd, F_SETFL, fl);
+
+ free(buf);
+ return r;
+}
+
+static int fio_sgio_ioctl_doio(struct thread_data *td, struct io_u *io_u)
+{
+ struct sgio_data *sd = td->io_data;
+ struct sg_io_hdr *hdr = &io_u->hdr;
+
+ sd->events[0] = io_u;
+
+ return ioctl(td->fd, SG_IO, hdr);
+}
+
+static int fio_sgio_rw_doio(struct thread_data *td, struct io_u *io_u, int sync)
+{
+ struct sg_io_hdr *hdr = &io_u->hdr;
+ int ret;
+
+ ret = write(td->fd, hdr, sizeof(*hdr));
+ if (ret < 0)
+ return errno;
+
+ if (sync) {
+ ret = read(td->fd, hdr, sizeof(*hdr));
+ if (ret < 0)
+ return errno;
+ }
+
+ return 0;
+}
+
+static int fio_sgio_doio(struct thread_data *td, struct io_u *io_u, int sync)
+{
+ if (td->filetype == FIO_TYPE_BD)
+ return fio_sgio_ioctl_doio(td, io_u);
+
+ return fio_sgio_rw_doio(td, io_u, sync);
+}
+
+static int fio_sgio_sync(struct thread_data *td)
+{
+ struct sgio_data *sd = td->io_data;
+ struct sg_io_hdr *hdr;
+ struct io_u *io_u;
+ int ret;
+
+ io_u = __get_io_u(td);
+ if (!io_u)
+ return ENOMEM;
+
+ hdr = &io_u->hdr;
+ sgio_hdr_init(sd, hdr, io_u, 0);
+ hdr->dxfer_direction = SG_DXFER_NONE;
+
+ hdr->cmdp[0] = 0x35;
+
+ ret = fio_sgio_doio(td, io_u, 1);
+ put_io_u(td, io_u);
+ return ret;
+}
+
+static int fio_sgio_prep(struct thread_data *td, struct io_u *io_u)
+{
+ struct sg_io_hdr *hdr = &io_u->hdr;
+ struct sgio_data *sd = td->io_data;
+ int nr_blocks, lba;
+
+ if (io_u->buflen & (sd->bs - 1)) {
+ fprintf(stderr, "read/write not sector aligned\n");
+ return EINVAL;
+ }
+
+ sgio_hdr_init(sd, hdr, io_u, 1);
+
+ if (io_u->ddir == DDIR_READ) {
+ hdr->dxfer_direction = SG_DXFER_FROM_DEV;
+ hdr->cmdp[0] = 0x28;
+ } else {
+ hdr->dxfer_direction = SG_DXFER_TO_DEV;
+ hdr->cmdp[0] = 0x2a;
+ }
+
+ nr_blocks = io_u->buflen / sd->bs;
+ lba = io_u->offset / sd->bs;
+ hdr->cmdp[2] = (lba >> 24) & 0xff;
+ hdr->cmdp[3] = (lba >> 16) & 0xff;
+ hdr->cmdp[4] = (lba >> 8) & 0xff;
+ hdr->cmdp[5] = lba & 0xff;
+ hdr->cmdp[7] = (nr_blocks >> 8) & 0xff;
+ hdr->cmdp[8] = nr_blocks & 0xff;
+ return 0;
+}
+
+static int fio_sgio_queue(struct thread_data *td, struct io_u *io_u)
+{
+ struct sg_io_hdr *hdr = &io_u->hdr;
+ int ret;
+
+ ret = fio_sgio_doio(td, io_u, 0);
+
+ if (ret < 0)
+ io_u->error = errno;
+ else if (hdr->status) {
+ io_u->resid = hdr->resid;
+ io_u->error = EIO;
+ }
+
+ return io_u->error;
+}
+
+static struct io_u *fio_sgio_event(struct thread_data *td, int event)
+{
+ struct sgio_data *sd = td->io_data;
+
+ return sd->events[event];
+}
+
+static int fio_sgio_get_bs(struct thread_data *td, unsigned int *bs)
+{
+ struct sgio_data *sd = td->io_data;
+ struct io_u *io_u;
+ struct sg_io_hdr *hdr;
+ unsigned char buf[8];
+ int ret;
+
+ io_u = __get_io_u(td);
+ assert(io_u);
+
+ hdr = &io_u->hdr;
+ sgio_hdr_init(sd, hdr, io_u, 0);
+ memset(buf, 0, sizeof(buf));
+
+ hdr->cmdp[0] = 0x25;
+ hdr->dxfer_direction = SG_DXFER_FROM_DEV;
+ hdr->dxferp = buf;
+ hdr->dxfer_len = sizeof(buf);
+
+ ret = fio_sgio_doio(td, io_u, 1);
+ if (ret) {
+ put_io_u(td, io_u);
+ return ret;
+ }
+
+ *bs = (buf[4] << 24) | (buf[5] << 16) | (buf[6] << 8) | buf[7];
+ put_io_u(td, io_u);
+ return 0;
+}
+
+int fio_sgio_init(struct thread_data *td)
+{
+ struct sgio_data *sd;
+ unsigned int bs;
+ int ret;
+
+ sd = malloc(sizeof(*sd));
+ sd->cmds = malloc(td->iodepth * sizeof(struct sgio_cmd));
+ sd->events = malloc(td->iodepth * sizeof(struct io_u *));
+ td->io_data = sd;
+
+ if (td->filetype == FIO_TYPE_BD) {
+ if (ioctl(td->fd, BLKSSZGET, &bs) < 0) {
+ td_verror(td, errno);
+ return 1;
+ }
+ } else if (td->filetype == FIO_TYPE_CHAR) {
+ int version;
+
+ if (ioctl(td->fd, SG_GET_VERSION_NUM, &version) < 0) {
+ td_verror(td, errno);
+ return 1;
+ }
+
+ ret = fio_sgio_get_bs(td, &bs);
+ if (ret)
+ return ret;
+ } else {
+ fprintf(stderr, "ioengine sgio only works on block devices\n");
+ return 1;
+ }
+
+ sd->bs = bs;
+
+ td->io_prep = fio_sgio_prep;
+ td->io_queue = fio_sgio_queue;
+
+ if (td->filetype == FIO_TYPE_BD)
+ td->io_getevents = fio_syncio_getevents;
+ else
+ td->io_getevents = fio_sgio_getevents;
+
+ td->io_event = fio_sgio_event;
+ td->io_cancel = NULL;
+ td->io_cleanup = fio_syncio_cleanup;
+ td->io_sync = fio_sgio_sync;
+
+ /*
+ * we want to do it, regardless of whether odirect is set or not
+ */
+ td->override_sync = 1;
+ return 0;
+}
+
+#else /* FIO_HAVE_SGIO */
+
+int fio_sgio_init(struct thread_data *td)
+{
+ return EINVAL;
+}
+
+#endif /* FIO_HAVE_SGIO */
+
+#ifdef FIO_HAVE_SPLICE
+struct spliceio_data {
+ struct io_u *last_io_u;
+ int pipe[2];
+};
+
+static struct io_u *fio_spliceio_event(struct thread_data *td, int event)
+{
+ struct spliceio_data *sd = td->io_data;
+
+ assert(event == 0);
+
+ return sd->last_io_u;
+}
+
+/*
+ * For splice reading, we unfortunately cannot (yet) vmsplice the other way.
+ * So just splice the data from the file into the pipe, and use regular
+ * read to fill the buffer. Doesn't make a lot of sense, but...
+ */
+static int fio_splice_read(struct thread_data *td, struct io_u *io_u)
+{
+ struct spliceio_data *sd = td->io_data;
+ int ret, ret2, buflen;
+ off_t offset;
+ void *p;
+
+ offset = io_u->offset;
+ buflen = io_u->buflen;
+ p = io_u->buf;
+ while (buflen) {
+ int this_len = buflen;
+
+ if (this_len > SPLICE_DEF_SIZE)
+ this_len = SPLICE_DEF_SIZE;
+
+ ret = splice(td->fd, &offset, sd->pipe[1], NULL, this_len, SPLICE_F_MORE);
+ if (ret < 0) {
+ if (errno == ENODATA || errno == EAGAIN)
+ continue;
+
+ return errno;
+ }
+
+ buflen -= ret;
+
+ while (ret) {
+ ret2 = read(sd->pipe[0], p, ret);
+ if (ret2 < 0)
+ return errno;
+
+ ret -= ret2;
+ p += ret2;
+ }
+ }
+
+ return io_u->buflen;
+}
+
+/*
+ * For splice writing, we can vmsplice our data buffer directly into a
+ * pipe and then splice that to a file.
+ */
+static int fio_splice_write(struct thread_data *td, struct io_u *io_u)
+{
+ struct spliceio_data *sd = td->io_data;
+ struct iovec iov[1] = {
+ {
+ .iov_base = io_u->buf,
+ .iov_len = io_u->buflen,
+ }
+ };
+ struct pollfd pfd = { .fd = sd->pipe[1], .events = POLLOUT, };
+ off_t off = io_u->offset;
+ int ret, ret2;
+
+ while (iov[0].iov_len) {
+ if (poll(&pfd, 1, -1) < 0)
+ return errno;
+
+ ret = vmsplice(sd->pipe[1], iov, 1, SPLICE_F_NONBLOCK);
+ if (ret < 0)
+ return errno;
+
+ iov[0].iov_len -= ret;
+ iov[0].iov_base += ret;
+
+ while (ret) {
+ ret2 = splice(sd->pipe[0], NULL, td->fd, &off, ret, 0);
+ if (ret2 < 0)
+ return errno;
+
+ ret -= ret2;
+ }
+ }
+
+ return io_u->buflen;
+}
+
+static int fio_spliceio_queue(struct thread_data *td, struct io_u *io_u)
+{
+ struct spliceio_data *sd = td->io_data;
+ int ret;
+
+ if (io_u->ddir == DDIR_READ)
+ ret = fio_splice_read(td, io_u);
+ else
+ ret = fio_splice_write(td, io_u);
+
+ if ((unsigned int) ret != io_u->buflen) {
+ if (ret > 0) {
+ io_u->resid = io_u->buflen - ret;
+ io_u->error = ENODATA;
+ } else
+ io_u->error = errno;
+ }
+
+ if (!io_u->error)
+ sd->last_io_u = io_u;
+
+ return io_u->error;
+}
+
+static void fio_spliceio_cleanup(struct thread_data *td)
+{
+ struct spliceio_data *sd = td->io_data;
+
+ if (sd) {
+ close(sd->pipe[0]);
+ close(sd->pipe[1]);
+ free(sd);
+ td->io_data = NULL;
+ }
+}
+
+int fio_spliceio_init(struct thread_data *td)
+{
+ struct spliceio_data *sd = malloc(sizeof(*sd));
+
+ td->io_queue = fio_spliceio_queue;
+ td->io_getevents = fio_syncio_getevents;
+ td->io_event = fio_spliceio_event;
+ td->io_cancel = NULL;
+ td->io_cleanup = fio_spliceio_cleanup;
+ td->io_sync = fio_io_sync;
+
+ sd->last_io_u = NULL;
+ if (pipe(sd->pipe) < 0) {
+ td_verror(td, errno);
+ free(sd);
+ return 1;
+ }
+
+ td->io_data = sd;
+ return 0;
+}
+
+#else /* FIO_HAVE_SPLICE */
+
+int fio_spliceio_init(struct thread_data *td)
+{
+ return EINVAL;
+}
+
+#endif /* FIO_HAVE_SPLICE */
--- /dev/null
+#include <stdio.h>
+#include <stdlib.h>
+#include "list.h"
+#include "fio.h"
+
+void write_iolog_put(struct thread_data *td, struct io_u *io_u)
+{
+ fprintf(td->iolog_f, "%d,%llu,%u\n", io_u->ddir, io_u->offset, io_u->buflen);
+}
+
+int read_iolog_get(struct thread_data *td, struct io_u *io_u)
+{
+ struct io_piece *ipo;
+
+ if (!list_empty(&td->io_log_list)) {
+ ipo = list_entry(td->io_log_list.next, struct io_piece, list);
+ list_del(&ipo->list);
+ io_u->offset = ipo->offset;
+ io_u->buflen = ipo->len;
+ io_u->ddir = ipo->ddir;
+ free(ipo);
+ return 0;
+ }
+
+ return 1;
+}
+
+void prune_io_piece_log(struct thread_data *td)
+{
+ struct io_piece *ipo;
+
+ while (!list_empty(&td->io_hist_list)) {
+ ipo = list_entry(td->io_hist_list.next, struct io_piece, list);
+
+ list_del(&ipo->list);
+ free(ipo);
+ }
+}
+
+/*
+ * log a succesful write, so we can unwind the log for verify
+ */
+void log_io_piece(struct thread_data *td, struct io_u *io_u)
+{
+ struct io_piece *ipo = malloc(sizeof(struct io_piece));
+ struct list_head *entry;
+
+ INIT_LIST_HEAD(&ipo->list);
+ ipo->offset = io_u->offset;
+ ipo->len = io_u->buflen;
+
+ /*
+ * for random io where the writes extend the file, it will typically
+ * be laid out with the block scattered as written. it's faster to
+ * read them in in that order again, so don't sort
+ */
+ if (td->sequential || !td->overwrite) {
+ list_add_tail(&ipo->list, &td->io_hist_list);
+ return;
+ }
+
+ /*
+ * for random io, sort the list so verify will run faster
+ */
+ entry = &td->io_hist_list;
+ while ((entry = entry->prev) != &td->io_hist_list) {
+ struct io_piece *__ipo = list_entry(entry, struct io_piece, list);
+
+ if (__ipo->offset < ipo->offset)
+ break;
+ }
+
+ list_add(&ipo->list, entry);
+}
+
+void write_iolog_close(struct thread_data *td)
+{
+ fflush(td->iolog_f);
+ fclose(td->iolog_f);
+ free(td->iolog_buf);
+}
+
+int init_iolog(struct thread_data *td)
+{
+ unsigned long long offset;
+ unsigned int bytes;
+ char *str, *p;
+ FILE *f;
+ int rw, i, reads, writes;
+
+ if (!td->read_iolog && !td->write_iolog)
+ return 0;
+
+ if (td->read_iolog)
+ f = fopen(td->iolog_file, "r");
+ else
+ f = fopen(td->iolog_file, "w");
+
+ if (!f) {
+ perror("fopen iolog");
+ printf("file %s, %d/%d\n", td->iolog_file, td->read_iolog, td->write_iolog);
+ return 1;
+ }
+
+ /*
+ * That's it for writing, setup a log buffer and we're done.
+ */
+ if (td->write_iolog) {
+ td->iolog_f = f;
+ td->iolog_buf = malloc(8192);
+ setvbuf(f, td->iolog_buf, _IOFBF, 8192);
+ return 0;
+ }
+
+ /*
+ * Read in the read iolog and store it, reuse the infrastructure
+ * for doing verifications.
+ */
+ str = malloc(4096);
+ reads = writes = i = 0;
+ while ((p = fgets(str, 4096, f)) != NULL) {
+ struct io_piece *ipo;
+
+ if (sscanf(p, "%d,%llu,%u", &rw, &offset, &bytes) != 3) {
+ fprintf(stderr, "bad iolog: %s\n", p);
+ continue;
+ }
+ if (rw == DDIR_READ)
+ reads++;
+ else if (rw == DDIR_WRITE)
+ writes++;
+ else {
+ fprintf(stderr, "bad ddir: %d\n", rw);
+ continue;
+ }
+
+ ipo = malloc(sizeof(*ipo));
+ INIT_LIST_HEAD(&ipo->list);
+ ipo->offset = offset;
+ ipo->len = bytes;
+ if (bytes > td->max_bs)
+ td->max_bs = bytes;
+ ipo->ddir = rw;
+ list_add_tail(&ipo->list, &td->io_log_list);
+ i++;
+ }
+
+ free(str);
+ fclose(f);
+
+ if (!i)
+ return 1;
+
+ if (reads && !writes)
+ td->ddir = DDIR_READ;
+ else if (!reads && writes)
+ td->ddir = DDIR_READ;
+ else
+ td->iomix = 1;
+
+ return 0;
+}
--- /dev/null
+#include <stdio.h>
+#include <string.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <libgen.h>
+#include <math.h>
+
+#include "fio.h"
+
+static struct itimerval itimer;
+static LIST_HEAD(disk_list);
+
+static int get_io_ticks(struct disk_util *du, struct disk_util_stat *dus)
+{
+ unsigned in_flight;
+ char line[256];
+ FILE *f;
+ char *p;
+
+ f = fopen(du->path, "r");
+ if (!f)
+ return 1;
+
+ p = fgets(line, sizeof(line), f);
+ if (!p) {
+ fclose(f);
+ return 1;
+ }
+
+ if (sscanf(p, "%u %u %llu %u %u %u %llu %u %u %u %u\n", &dus->ios[0], &dus->merges[0], &dus->sectors[0], &dus->ticks[0], &dus->ios[1], &dus->merges[1], &dus->sectors[1], &dus->ticks[1], &in_flight, &dus->io_ticks, &dus->time_in_queue) != 11) {
+ fclose(f);
+ return 1;
+ }
+
+ fclose(f);
+ return 0;
+}
+
+static void update_io_tick_disk(struct disk_util *du)
+{
+ struct disk_util_stat __dus, *dus, *ldus;
+ struct timeval t;
+
+ if (get_io_ticks(du, &__dus))
+ return;
+
+ dus = &du->dus;
+ ldus = &du->last_dus;
+
+ dus->sectors[0] += (__dus.sectors[0] - ldus->sectors[0]);
+ dus->sectors[1] += (__dus.sectors[1] - ldus->sectors[1]);
+ dus->ios[0] += (__dus.ios[0] - ldus->ios[0]);
+ dus->ios[1] += (__dus.ios[1] - ldus->ios[1]);
+ dus->merges[0] += (__dus.merges[0] - ldus->merges[0]);
+ dus->merges[1] += (__dus.merges[1] - ldus->merges[1]);
+ dus->ticks[0] += (__dus.ticks[0] - ldus->ticks[0]);
+ dus->ticks[1] += (__dus.ticks[1] - ldus->ticks[1]);
+ dus->io_ticks += (__dus.io_ticks - ldus->io_ticks);
+ dus->time_in_queue += (__dus.time_in_queue - ldus->time_in_queue);
+
+ gettimeofday(&t, NULL);
+ du->msec += mtime_since(&du->time, &t);
+ memcpy(&du->time, &t, sizeof(t));
+ memcpy(ldus, &__dus, sizeof(__dus));
+}
+
+void update_io_ticks(void)
+{
+ struct list_head *entry;
+ struct disk_util *du;
+
+ list_for_each(entry, &disk_list) {
+ du = list_entry(entry, struct disk_util, list);
+ update_io_tick_disk(du);
+ }
+}
+
+static int disk_util_exists(dev_t dev)
+{
+ struct list_head *entry;
+ struct disk_util *du;
+
+ list_for_each(entry, &disk_list) {
+ du = list_entry(entry, struct disk_util, list);
+
+ if (du->dev == dev)
+ return 1;
+ }
+
+ return 0;
+}
+
+static void disk_util_add(dev_t dev, char *path)
+{
+ struct disk_util *du = malloc(sizeof(*du));
+
+ memset(du, 0, sizeof(*du));
+ INIT_LIST_HEAD(&du->list);
+ sprintf(du->path, "%s/stat", path);
+ du->name = strdup(basename(path));
+ du->dev = dev;
+
+ gettimeofday(&du->time, NULL);
+ get_io_ticks(du, &du->last_dus);
+
+ list_add_tail(&du->list, &disk_list);
+}
+
+static int check_dev_match(dev_t dev, char *path)
+{
+ unsigned int major, minor;
+ char line[256], *p;
+ FILE *f;
+
+ f = fopen(path, "r");
+ if (!f) {
+ perror("open path");
+ return 1;
+ }
+
+ p = fgets(line, sizeof(line), f);
+ if (!p) {
+ fclose(f);
+ return 1;
+ }
+
+ if (sscanf(p, "%u:%u", &major, &minor) != 2) {
+ fclose(f);
+ return 1;
+ }
+
+ if (((major << 8) | minor) == dev) {
+ fclose(f);
+ return 0;
+ }
+
+ fclose(f);
+ return 1;
+}
+
+static int find_block_dir(dev_t dev, char *path)
+{
+ struct dirent *dir;
+ struct stat st;
+ int found = 0;
+ DIR *D;
+
+ D = opendir(path);
+ if (!D)
+ return 0;
+
+ while ((dir = readdir(D)) != NULL) {
+ char full_path[256];
+
+ if (!strcmp(dir->d_name, ".") || !strcmp(dir->d_name, ".."))
+ continue;
+ if (!strcmp(dir->d_name, "device"))
+ continue;
+
+ sprintf(full_path, "%s/%s", path, dir->d_name);
+
+ if (!strcmp(dir->d_name, "dev")) {
+ if (!check_dev_match(dev, full_path)) {
+ found = 1;
+ break;
+ }
+ }
+
+ if (stat(full_path, &st) == -1) {
+ perror("stat");
+ break;
+ }
+
+ if (!S_ISDIR(st.st_mode) || S_ISLNK(st.st_mode))
+ continue;
+
+ found = find_block_dir(dev, full_path);
+ if (found) {
+ strcpy(path, full_path);
+ break;
+ }
+ }
+
+ closedir(D);
+ return found;
+}
+
+void init_disk_util(struct thread_data *td)
+{
+ struct stat st;
+ char foo[256], tmp[256];
+ dev_t dev;
+ char *p;
+
+ if (!td->do_disk_util)
+ return;
+
+ if (!stat(td->file_name, &st)) {
+ if (S_ISBLK(st.st_mode))
+ dev = st.st_rdev;
+ else
+ dev = st.st_dev;
+ } else {
+ /*
+ * must be a file, open "." in that path
+ */
+ strcpy(foo, td->file_name);
+ p = dirname(foo);
+ if (stat(p, &st)) {
+ perror("disk util stat");
+ return;
+ }
+
+ dev = st.st_dev;
+ }
+
+ if (disk_util_exists(dev))
+ return;
+
+ sprintf(foo, "/sys/block");
+ if (!find_block_dir(dev, foo))
+ return;
+
+ /*
+ * If there's a ../queue/ directory there, we are inside a partition.
+ * Check if that is the case and jump back. For loop/md/dm etc we
+ * are already in the right spot.
+ */
+ sprintf(tmp, "%s/../queue", foo);
+ if (!stat(tmp, &st)) {
+ p = dirname(foo);
+ sprintf(tmp, "%s/queue", p);
+ if (stat(tmp, &st)) {
+ fprintf(stderr, "unknown sysfs layout\n");
+ return;
+ }
+ sprintf(foo, "%s", p);
+ }
+
+ td->sysfs_root = strdup(foo);
+ disk_util_add(dev, foo);
+}
+
+void disk_util_timer_arm(void)
+{
+ itimer.it_value.tv_sec = 0;
+ itimer.it_value.tv_usec = DISK_UTIL_MSEC * 1000;
+ setitimer(ITIMER_REAL, &itimer, NULL);
+}
+
+void update_rusage_stat(struct thread_data *td)
+{
+ if (!(td->runtime[0] + td->runtime[1]))
+ return;
+
+ getrusage(RUSAGE_SELF, &td->ru_end);
+
+ td->usr_time += mtime_since(&td->ru_start.ru_utime, &td->ru_end.ru_utime);
+ td->sys_time += mtime_since(&td->ru_start.ru_stime, &td->ru_end.ru_stime);
+ td->ctx += td->ru_end.ru_nvcsw + td->ru_end.ru_nivcsw - (td->ru_start.ru_nvcsw + td->ru_start.ru_nivcsw);
+
+
+ memcpy(&td->ru_start, &td->ru_end, sizeof(td->ru_end));
+}
+
+static int calc_lat(struct io_stat *is, unsigned long *min, unsigned long *max,
+ double *mean, double *dev)
+{
+ double n;
+
+ if (is->samples == 0)
+ return 0;
+
+ *min = is->min_val;
+ *max = is->max_val;
+
+ n = (double) is->samples;
+ *mean = (double) is->val / n;
+ *dev = sqrt(((double) is->val_sq - (*mean * *mean) / n) / (n - 1));
+ if (!(*min + *max) && !(*mean + *dev))
+ return 0;
+
+ return 1;
+}
+
+static void show_group_stats(struct group_run_stats *rs, int id)
+{
+ printf("\nRun status group %d (all jobs):\n", id);
+
+ if (rs->max_run[DDIR_READ])
+ printf(" READ: io=%lluMiB, aggrb=%llu, minb=%llu, maxb=%llu, mint=%llumsec, maxt=%llumsec\n", rs->io_kb[0] >> 10, rs->agg[0], rs->min_bw[0], rs->max_bw[0], rs->min_run[0], rs->max_run[0]);
+ if (rs->max_run[DDIR_WRITE])
+ printf(" WRITE: io=%lluMiB, aggrb=%llu, minb=%llu, maxb=%llu, mint=%llumsec, maxt=%llumsec\n", rs->io_kb[1] >> 10, rs->agg[1], rs->min_bw[1], rs->max_bw[1], rs->min_run[1], rs->max_run[1]);
+}
+
+static void show_disk_util(void)
+{
+ struct disk_util_stat *dus;
+ struct list_head *entry;
+ struct disk_util *du;
+ double util;
+
+ printf("\nDisk stats (read/write):\n");
+
+ list_for_each(entry, &disk_list) {
+ du = list_entry(entry, struct disk_util, list);
+ dus = &du->dus;
+
+ util = (double) 100 * du->dus.io_ticks / (double) du->msec;
+ if (util > 100.0)
+ util = 100.0;
+
+ printf(" %s: ios=%u/%u, merge=%u/%u, ticks=%u/%u, in_queue=%u, util=%3.2f%%\n", du->name, dus->ios[0], dus->ios[1], dus->merges[0], dus->merges[1], dus->ticks[0], dus->ticks[1], dus->time_in_queue, util);
+ }
+}
+
+static void show_ddir_status(struct thread_data *td, struct group_run_stats *rs,
+ int ddir)
+{
+ char *ddir_str[] = { "read ", "write" };
+ unsigned long min, max;
+ unsigned long long bw;
+ double mean, dev;
+
+ if (!td->runtime[ddir])
+ return;
+
+ bw = td->io_bytes[ddir] / td->runtime[ddir];
+ printf(" %s: io=%6lluMiB, bw=%6lluKiB/s, runt=%6lumsec\n", ddir_str[ddir], td->io_bytes[ddir] >> 20, bw, td->runtime[ddir]);
+
+ if (calc_lat(&td->slat_stat[ddir], &min, &max, &mean, &dev))
+ printf(" slat (msec): min=%5lu, max=%5lu, avg=%5.02f, dev=%5.02f\n", min, max, mean, dev);
+
+ if (calc_lat(&td->clat_stat[ddir], &min, &max, &mean, &dev))
+ printf(" clat (msec): min=%5lu, max=%5lu, avg=%5.02f, dev=%5.02f\n", min, max, mean, dev);
+
+ if (calc_lat(&td->bw_stat[ddir], &min, &max, &mean, &dev)) {
+ double p_of_agg;
+
+ p_of_agg = mean * 100 / (double) rs->agg[ddir];
+ printf(" bw (KiB/s) : min=%5lu, max=%5lu, per=%3.2f%%, avg=%5.02f, dev=%5.02f\n", min, max, p_of_agg, mean, dev);
+ }
+}
+
+static void show_thread_status(struct thread_data *td,
+ struct group_run_stats *rs)
+{
+ double usr_cpu, sys_cpu;
+
+ if (!(td->io_bytes[0] + td->io_bytes[1]) && !td->error)
+ return;
+
+ printf("Client%d (groupid=%d): err=%2d:\n", td->thread_number, td->groupid, td->error);
+
+ show_ddir_status(td, rs, td->ddir);
+ if (td->io_bytes[td->ddir ^ 1])
+ show_ddir_status(td, rs, td->ddir ^ 1);
+
+ if (td->runtime[0] + td->runtime[1]) {
+ double runt = td->runtime[0] + td->runtime[1];
+
+ usr_cpu = (double) td->usr_time * 100 / runt;
+ sys_cpu = (double) td->sys_time * 100 / runt;
+ } else {
+ usr_cpu = 0;
+ sys_cpu = 0;
+ }
+
+ printf(" cpu : usr=%3.2f%%, sys=%3.2f%%, ctx=%lu\n", usr_cpu, sys_cpu, td->ctx);
+}
+
+void show_run_stats(void)
+{
+ struct group_run_stats *runstats, *rs;
+ struct thread_data *td;
+ int i;
+
+ runstats = malloc(sizeof(struct group_run_stats) * (groupid + 1));
+
+ for (i = 0; i < groupid + 1; i++) {
+ rs = &runstats[i];
+
+ memset(rs, 0, sizeof(*rs));
+ rs->min_bw[0] = rs->min_run[0] = ~0UL;
+ rs->min_bw[1] = rs->min_run[1] = ~0UL;
+ }
+
+ for (i = 0; i < thread_number; i++) {
+ unsigned long long rbw, wbw;
+
+ td = &threads[i];
+
+ if (td->error) {
+ printf("Client%d: %s\n", td->thread_number, td->verror);
+ continue;
+ }
+
+ rs = &runstats[td->groupid];
+
+ if (td->runtime[0] < rs->min_run[0] || !rs->min_run[0])
+ rs->min_run[0] = td->runtime[0];
+ if (td->runtime[0] > rs->max_run[0])
+ rs->max_run[0] = td->runtime[0];
+ if (td->runtime[1] < rs->min_run[1] || !rs->min_run[1])
+ rs->min_run[1] = td->runtime[1];
+ if (td->runtime[1] > rs->max_run[1])
+ rs->max_run[1] = td->runtime[1];
+
+ rbw = wbw = 0;
+ if (td->runtime[0])
+ rbw = td->io_bytes[0] / (unsigned long long) td->runtime[0];
+ if (td->runtime[1])
+ wbw = td->io_bytes[1] / (unsigned long long) td->runtime[1];
+
+ if (rbw < rs->min_bw[0])
+ rs->min_bw[0] = rbw;
+ if (wbw < rs->min_bw[1])
+ rs->min_bw[1] = wbw;
+ if (rbw > rs->max_bw[0])
+ rs->max_bw[0] = rbw;
+ if (wbw > rs->max_bw[1])
+ rs->max_bw[1] = wbw;
+
+ rs->io_kb[0] += td->io_bytes[0] >> 10;
+ rs->io_kb[1] += td->io_bytes[1] >> 10;
+ }
+
+ for (i = 0; i < groupid + 1; i++) {
+ rs = &runstats[i];
+
+ if (rs->max_run[0])
+ rs->agg[0] = (rs->io_kb[0]*1024) / rs->max_run[0];
+ if (rs->max_run[1])
+ rs->agg[1] = (rs->io_kb[1]*1024) / rs->max_run[1];
+ }
+
+ /*
+ * don't overwrite last signal output
+ */
+ printf("\n");
+
+ for (i = 0; i < thread_number; i++) {
+ td = &threads[i];
+ rs = &runstats[td->groupid];
+
+ show_thread_status(td, rs);
+ }
+
+ for (i = 0; i < groupid + 1; i++)
+ show_group_stats(&runstats[i], i);
+
+ show_disk_util();
+}
+
+static inline void add_stat_sample(struct io_stat *is, unsigned long val)
+{
+ if (val > is->max_val)
+ is->max_val = val;
+ if (val < is->min_val)
+ is->min_val = val;
+
+ is->val += val;
+ is->val_sq += val * val;
+ is->samples++;
+}
+
+static void add_log_sample(struct thread_data *td, struct io_log *iolog,
+ unsigned long val, int ddir)
+{
+ if (iolog->nr_samples == iolog->max_samples) {
+ int new_size = sizeof(struct io_sample) * iolog->max_samples*2;
+
+ iolog->log = realloc(iolog->log, new_size);
+ iolog->max_samples <<= 1;
+ }
+
+ iolog->log[iolog->nr_samples].val = val;
+ iolog->log[iolog->nr_samples].time = mtime_since_now(&td->epoch);
+ iolog->log[iolog->nr_samples].ddir = ddir;
+ iolog->nr_samples++;
+}
+
+void add_clat_sample(struct thread_data *td, int ddir, unsigned long msec)
+{
+ add_stat_sample(&td->clat_stat[ddir], msec);
+
+ if (td->clat_log)
+ add_log_sample(td, td->clat_log, msec, ddir);
+}
+
+void add_slat_sample(struct thread_data *td, int ddir, unsigned long msec)
+{
+ add_stat_sample(&td->slat_stat[ddir], msec);
+
+ if (td->slat_log)
+ add_log_sample(td, td->slat_log, msec, ddir);
+}
+
+void add_bw_sample(struct thread_data *td, int ddir)
+{
+ unsigned long spent = mtime_since_now(&td->stat_sample_time[ddir]);
+ unsigned long rate;
+
+ if (spent < td->bw_avg_time)
+ return;
+
+ rate = (td->this_io_bytes[ddir] - td->stat_io_bytes[ddir]) / spent;
+ add_stat_sample(&td->bw_stat[ddir], rate);
+
+ if (td->bw_log)
+ add_log_sample(td, td->bw_log, rate, ddir);
+
+ gettimeofday(&td->stat_sample_time[ddir], NULL);
+ td->stat_io_bytes[ddir] = td->this_io_bytes[ddir];
+}
+
+
--- /dev/null
+#include <time.h>
+#include <sys/time.h>
+
+#include "fio.h"
+
+unsigned long utime_since(struct timeval *s, struct timeval *e)
+{
+ double sec, usec;
+
+ sec = e->tv_sec - s->tv_sec;
+ usec = e->tv_usec - s->tv_usec;
+ if (sec > 0 && usec < 0) {
+ sec--;
+ usec += 1000000;
+ }
+
+ sec *= (double) 1000000;
+
+ return sec + usec;
+}
+
+static unsigned long utime_since_now(struct timeval *s)
+{
+ struct timeval t;
+
+ gettimeofday(&t, NULL);
+ return utime_since(s, &t);
+}
+
+unsigned long mtime_since(struct timeval *s, struct timeval *e)
+{
+ double sec, usec;
+
+ sec = e->tv_sec - s->tv_sec;
+ usec = e->tv_usec - s->tv_usec;
+ if (sec > 0 && usec < 0) {
+ sec--;
+ usec += 1000000;
+ }
+
+ sec *= (double) 1000;
+ usec /= (double) 1000;
+
+ return sec + usec;
+}
+
+unsigned long mtime_since_now(struct timeval *s)
+{
+ struct timeval t;
+
+ gettimeofday(&t, NULL);
+ return mtime_since(s, &t);
+}
+
+unsigned long time_since_now(struct timeval *s)
+{
+ return mtime_since_now(s) / 1000;
+}
+
+/*
+ * busy looping version for the last few usec
+ */
+static void __usec_sleep(unsigned int usec)
+{
+ struct timeval start;
+
+ gettimeofday(&start, NULL);
+ while (utime_since_now(&start) < usec)
+ nop;
+}
+
+void usec_sleep(struct thread_data *td, unsigned long usec)
+{
+ struct timespec req, rem;
+
+ req.tv_sec = usec / 1000000;
+ req.tv_nsec = usec * 1000 - req.tv_sec * 1000000;
+
+ do {
+ if (usec < 5000) {
+ __usec_sleep(usec);
+ break;
+ }
+
+ rem.tv_sec = rem.tv_nsec = 0;
+ if (nanosleep(&req, &rem) < 0)
+ break;
+
+ if ((rem.tv_sec + rem.tv_nsec) == 0)
+ break;
+
+ req.tv_nsec = rem.tv_nsec;
+ req.tv_sec = rem.tv_sec;
+
+ usec = rem.tv_sec * 1000000 + rem.tv_nsec / 1000;
+ } while (!td->terminate);
+}
+
+void rate_throttle(struct thread_data *td, unsigned long time_spent,
+ unsigned int bytes)
+{
+ unsigned long usec_cycle;
+
+ if (!td->rate)
+ return;
+
+ usec_cycle = td->rate_usec_cycle * (bytes / td->min_bs);
+
+ if (time_spent < usec_cycle) {
+ unsigned long s = usec_cycle - time_spent;
+
+ td->rate_pending_usleep += s;
+ if (td->rate_pending_usleep >= 100000) {
+ usec_sleep(td, td->rate_pending_usleep);
+ td->rate_pending_usleep = 0;
+ }
+ } else {
+ long overtime = time_spent - usec_cycle;
+
+ td->rate_pending_usleep -= overtime;
+ }
+}