From: Jens Axboe Date: Wed, 7 Jun 2006 06:45:01 +0000 (+0200) Subject: [PATCH] Shorten the file names, stupid to prefix everything with fio- X-Git-Tag: fio-1.5~40 X-Git-Url: https://git.kernel.dk/?p=fio.git;a=commitdiff_plain;h=6796209a7e3d39522b0f5599aba277809786335e;hp=592ef98a623407437c1807bc29deaa87726de5b4;ds=inline [PATCH] Shorten the file names, stupid to prefix everything with fio- --- diff --git a/Makefile b/Makefile index 16c84136..684e8e04 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ SCRIPTS = fio_generate_plots all: depend $(PROGS) $(SCRIPTS) -fio: fio.o fio-io.o fio-ini.o fio-stat.o fio-log.o fio-time.o md5.o crc32.o +fio: fio.o ioengines.o init.o stat.o log.o time.o md5.o crc32.o $(CC) $(CFLAGS) -o $@ $(filter %.o,$^) -lpthread -laio -lm -lrt clean: diff --git a/fio-ini.c b/fio-ini.c deleted file mode 100644 index d2122e3f..00000000 --- a/fio-ini.c +++ /dev/null @@ -1,1133 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "fio.h" - -#define DEF_BS (4096) -#define DEF_TIMEOUT (0) -#define DEF_RATE_CYCLE (1000) -#define DEF_ODIRECT (1) -#define DEF_IO_ENGINE (FIO_SYNCIO) -#define DEF_IO_ENGINE_NAME "sync" -#define DEF_SEQUENTIAL (1) -#define DEF_RAND_REPEAT (1) -#define DEF_OVERWRITE (1) -#define DEF_CREATE (1) -#define DEF_INVALIDATE (1) -#define DEF_SYNCIO (0) -#define DEF_RANDSEED (0xb1899bedUL) -#define DEF_BWAVGTIME (500) -#define DEF_CREATE_SER (1) -#define DEF_CREATE_FSYNC (1) -#define DEF_LOOPS (1) -#define DEF_VERIFY (0) -#define DEF_STONEWALL (0) -#define DEF_NUMJOBS (1) -#define DEF_USE_THREAD (0) -#define DEF_FILE_SIZE (1024 * 1024 * 1024UL) -#define DEF_ZONE_SIZE (0) -#define DEF_ZONE_SKIP (0) -#define DEF_RWMIX_CYCLE (500) -#define DEF_RWMIX_READ (50) -#define DEF_NICE (0) - -static char fio_version_string[] = "fio 1.4"; - -static int repeatable = DEF_RAND_REPEAT; -static char *ini_file; -static int max_jobs = MAX_JOBS; - -struct thread_data def_thread; -struct thread_data *threads = NULL; - -int rate_quit = 0; -int write_lat_log = 0; -int write_bw_log = 0; -int exitall_on_terminate = 0; -unsigned long long mlock_size = 0; - -static int setup_rate(struct thread_data *td) -{ - int nr_reads_per_sec; - - if (!td->rate) - return 0; - - if (td->rate < td->ratemin) { - fprintf(stderr, "min rate larger than nominal rate\n"); - return -1; - } - - nr_reads_per_sec = (td->rate * 1024) / td->min_bs; - td->rate_usec_cycle = 1000000 / nr_reads_per_sec; - td->rate_pending_usleep = 0; - return 0; -} - -static void setup_log(struct io_log **log) -{ - struct io_log *l = malloc(sizeof(*l)); - - l->nr_samples = 0; - l->max_samples = 1024; - l->log = malloc(l->max_samples * sizeof(struct io_sample)); - *log = l; -} - -void finish_log(struct thread_data *td, struct io_log *log, const char *name) -{ - char file_name[256]; - FILE *f; - unsigned int i; - - snprintf(file_name, 200, "client%d_%s.log", td->thread_number, name); - f = fopen(file_name, "w"); - if (!f) { - perror("fopen log"); - return; - } - - for (i = 0; i < log->nr_samples; i++) - fprintf(f, "%lu, %lu, %u\n", log->log[i].time, log->log[i].val, log->log[i].ddir); - - fclose(f); - free(log->log); - free(log); -} - -static struct thread_data *get_new_job(int global, struct thread_data *parent) -{ - struct thread_data *td; - - if (global) - return &def_thread; - if (thread_number >= max_jobs) - return NULL; - - td = &threads[thread_number++]; - if (parent) - *td = *parent; - else - memset(td, 0, sizeof(*td)); - - td->fd = -1; - td->thread_number = thread_number; - - td->ddir = parent->ddir; - td->ioprio = parent->ioprio; - td->sequential = parent->sequential; - td->bs = parent->bs; - td->min_bs = parent->min_bs; - td->max_bs = parent->max_bs; - td->odirect = parent->odirect; - td->thinktime = parent->thinktime; - td->fsync_blocks = parent->fsync_blocks; - td->start_delay = parent->start_delay; - td->timeout = parent->timeout; - td->io_engine = parent->io_engine; - td->create_file = parent->create_file; - td->overwrite = parent->overwrite; - td->invalidate_cache = parent->invalidate_cache; - td->file_size = parent->file_size; - td->file_offset = parent->file_offset; - td->zone_size = parent->zone_size; - td->zone_skip = parent->zone_skip; - td->rate = parent->rate; - td->ratemin = parent->ratemin; - td->ratecycle = parent->ratecycle; - td->iodepth = parent->iodepth; - td->sync_io = parent->sync_io; - td->mem_type = parent->mem_type; - td->bw_avg_time = parent->bw_avg_time; - td->create_serialize = parent->create_serialize; - td->create_fsync = parent->create_fsync; - td->loops = parent->loops; - td->verify = parent->verify; - td->stonewall = parent->stonewall; - td->numjobs = parent->numjobs; - td->use_thread = parent->use_thread; - td->do_disk_util = parent->do_disk_util; - memcpy(&td->cpumask, &parent->cpumask, sizeof(td->cpumask)); - strcpy(td->io_engine_name, parent->io_engine_name); - - return td; -} - -static void put_job(struct thread_data *td) -{ - memset(&threads[td->thread_number - 1], 0, sizeof(*td)); - thread_number--; -} - -static int add_job(struct thread_data *td, const char *jobname, int job_add_num) -{ - char *ddir_str[] = { "read", "write", "randread", "randwrite", - "rw", NULL, "randrw" }; - struct stat sb; - int numjobs, ddir; - -#ifndef FIO_HAVE_LIBAIO - if (td->io_engine == FIO_LIBAIO) { - fprintf(stderr, "Linux libaio not available\n"); - return 1; - } -#endif -#ifndef FIO_HAVE_POSIXAIO - if (td->io_engine == FIO_POSIXAIO) { - fprintf(stderr, "posix aio not available\n"); - return 1; - } -#endif - - /* - * the def_thread is just for options, it's not a real job - */ - if (td == &def_thread) - return 0; - - if (td->io_engine & FIO_SYNCIO) - td->iodepth = 1; - else { - if (!td->iodepth) - td->iodepth = 1; - } - - /* - * only really works for sequential io for now - */ - if (td->zone_size && !td->sequential) - td->zone_size = 0; - - td->filetype = FIO_TYPE_FILE; - if (!stat(jobname, &sb)) { - if (S_ISBLK(sb.st_mode)) - td->filetype = FIO_TYPE_BD; - else if (S_ISCHR(sb.st_mode)) - td->filetype = FIO_TYPE_CHAR; - } - - if (td->filetype == FIO_TYPE_FILE) { - if (td->directory && td->directory[0] != '\0') - sprintf(td->file_name, "%s/%s.%d", td->directory, jobname, td->jobnum); - else - sprintf(td->file_name, "%s.%d", jobname, td->jobnum); - } else - strncpy(td->file_name, jobname, sizeof(td->file_name) - 1); - - sem_init(&td->mutex, 0, 0); - - td->clat_stat[0].min_val = td->clat_stat[1].min_val = ULONG_MAX; - td->slat_stat[0].min_val = td->slat_stat[1].min_val = ULONG_MAX; - td->bw_stat[0].min_val = td->bw_stat[1].min_val = ULONG_MAX; - - if (td->min_bs == -1U) - td->min_bs = td->bs; - if (td->max_bs == -1U) - td->max_bs = td->bs; - if (td_read(td) && !td_rw(td)) - td->verify = 0; - - if (td->stonewall && td->thread_number > 1) - groupid++; - - td->groupid = groupid; - - if (setup_rate(td)) - goto err; - - if (write_lat_log) { - setup_log(&td->slat_log); - setup_log(&td->clat_log); - } - if (write_bw_log) - setup_log(&td->bw_log); - - ddir = td->ddir + (!td->sequential << 1) + (td->iomix << 2); - - if (!job_add_num) - printf("Client%d (g=%d): rw=%s, odir=%d, bs=%d-%d, rate=%d, ioengine=%s, iodepth=%d\n", td->thread_number, td->groupid, ddir_str[ddir], td->odirect, td->min_bs, td->max_bs, td->rate, td->io_engine_name, td->iodepth); - else if (job_add_num == 1) - printf("...\n"); - - /* - * recurse add identical jobs, clear numjobs and stonewall options - * as they don't apply to sub-jobs - */ - numjobs = td->numjobs; - while (--numjobs) { - struct thread_data *td_new = get_new_job(0, td); - - if (!td_new) - goto err; - - td_new->numjobs = 1; - td_new->stonewall = 0; - td_new->jobnum = numjobs; - job_add_num = numjobs - 1; - - if (add_job(td_new, jobname, job_add_num)) - goto err; - } - return 0; -err: - put_job(td); - return -1; -} - -int init_random_state(struct thread_data *td) -{ - unsigned long seeds[4]; - int fd, num_maps, blocks; - - fd = open("/dev/urandom", O_RDONLY); - if (fd == -1) { - td_verror(td, errno); - return 1; - } - - if (read(fd, seeds, sizeof(seeds)) < (int) sizeof(seeds)) { - td_verror(td, EIO); - close(fd); - return 1; - } - - close(fd); - - srand48_r(seeds[0], &td->bsrange_state); - srand48_r(seeds[1], &td->verify_state); - srand48_r(seeds[2], &td->rwmix_state); - - if (td->sequential) - return 0; - - if (repeatable) - seeds[3] = DEF_RANDSEED; - - blocks = (td->io_size + td->min_bs - 1) / td->min_bs; - num_maps = blocks / BLOCKS_PER_MAP; - td->file_map = malloc(num_maps * sizeof(long)); - td->num_maps = num_maps; - memset(td->file_map, 0, num_maps * sizeof(long)); - - srand48_r(seeds[3], &td->random_state); - return 0; -} - -static void fill_cpu_mask(os_cpu_mask_t cpumask, int cpu) -{ -#ifdef FIO_HAVE_CPU_AFFINITY - unsigned int i; - - CPU_ZERO(&cpumask); - - for (i = 0; i < sizeof(int) * 8; i++) { - if ((1 << i) & cpu) - CPU_SET(i, &cpumask); - } -#endif -} - -static unsigned long get_mult(char c) -{ - switch (c) { - case 'k': - case 'K': - return 1024; - case 'm': - case 'M': - return 1024 * 1024; - case 'g': - case 'G': - return 1024 * 1024 * 1024; - default: - return 1; - } -} - -/* - * convert string after '=' into decimal value, noting any size suffix - */ -static int str_cnv(char *p, unsigned long long *val) -{ - char *str; - int len; - - str = strchr(p, '='); - if (!str) - return 1; - - str++; - len = strlen(str); - - *val = strtoul(str, NULL, 10); - if (*val == ULONG_MAX && errno == ERANGE) - return 1; - - *val *= get_mult(str[len - 1]); - return 0; -} - -static int check_strcnv(char *p, char *name, unsigned long long *val) -{ - if (strncmp(p, name, strlen(name) - 1)) - return 1; - - return str_cnv(p, val); -} - -static void strip_blank_front(char **p) -{ - char *s = *p; - - while (isspace(*s)) - s++; -} - -static void strip_blank_end(char *p) -{ - char *s = p + strlen(p) - 1; - - while (isspace(*s) || iscntrl(*s)) - s--; - - *(s + 1) = '\0'; -} - -typedef int (str_cb_fn)(struct thread_data *, char *); - -static int check_str(char *p, char *name, str_cb_fn *cb, struct thread_data *td) -{ - char *s; - - if (strncmp(p, name, strlen(name))) - return 1; - - s = strstr(p, name); - if (!s) - return 1; - - s = strchr(s, '='); - if (!s) - return 1; - - s++; - strip_blank_front(&s); - return cb(td, s); -} - -static int check_strstore(char *p, char *name, char *dest) -{ - char *s; - - if (strncmp(p, name, strlen(name))) - return 1; - - s = strstr(p, name); - if (!s) - return 1; - - s = strchr(p, '='); - if (!s) - return 1; - - s++; - strip_blank_front(&s); - - strcpy(dest, s); - return 0; -} - -static int __check_range(char *str, unsigned long *val) -{ - char suffix; - - if (sscanf(str, "%lu%c", val, &suffix) == 2) { - *val *= get_mult(suffix); - return 0; - } - - if (sscanf(str, "%lu", val) == 1) - return 0; - - return 1; -} - -static int check_range(char *p, char *name, unsigned long *s, unsigned long *e) -{ - char option[128]; - char *str, *p1, *p2; - - if (strncmp(p, name, strlen(name))) - return 1; - - strcpy(option, p); - p = option; - - str = strstr(p, name); - if (!str) - return 1; - - p += strlen(name); - - str = strchr(p, '='); - if (!str) - return 1; - - /* - * 'p' now holds whatever is after the '=' sign - */ - p1 = str + 1; - - /* - * terminate p1 at the '-' sign - */ - p = strchr(p1, '-'); - if (!p) - return 1; - - p2 = p + 1; - *p = '\0'; - - if (!__check_range(p1, s) && !__check_range(p2, e)) - return 0; - - return 1; -} - -static int check_int(char *p, char *name, unsigned int *val) -{ - char *str; - - if (strncmp(p, name, strlen(name))) - return 1; - - str = strstr(p, name); - if (!str) - return 1; - - str = strchr(p, '='); - if (!str) - return 1; - - str++; - - if (sscanf(str, "%u", val) == 1) - return 0; - - return 1; -} - -static int check_strset(char *p, char *name) -{ - return strncmp(p, name, strlen(name)); -} - -static int is_empty_or_comment(char *line) -{ - unsigned int i; - - for (i = 0; i < strlen(line); i++) { - if (line[i] == ';') - return 1; - if (!isspace(line[i]) && !iscntrl(line[i])) - return 0; - } - - return 1; -} - -static int str_rw_cb(struct thread_data *td, char *mem) -{ - if (!strncmp(mem, "read", 4) || !strncmp(mem, "0", 1)) { - td->ddir = DDIR_READ; - td->sequential = 1; - return 0; - } else if (!strncmp(mem, "randread", 8)) { - td->ddir = DDIR_READ; - td->sequential = 0; - return 0; - } else if (!strncmp(mem, "write", 5) || !strncmp(mem, "1", 1)) { - td->ddir = DDIR_WRITE; - td->sequential = 1; - return 0; - } else if (!strncmp(mem, "randwrite", 9)) { - td->ddir = DDIR_WRITE; - td->sequential = 0; - return 0; - } else if (!strncmp(mem, "rw", 2)) { - td->ddir = 0; - td->iomix = 1; - td->sequential = 1; - return 0; - } else if (!strncmp(mem, "randrw", 6)) { - td->ddir = 0; - td->iomix = 1; - td->sequential = 0; - return 0; - } - - fprintf(stderr, "bad data direction: %s\n", mem); - return 1; -} - -static int str_verify_cb(struct thread_data *td, char *mem) -{ - if (!strncmp(mem, "0", 1)) { - td->verify = VERIFY_NONE; - return 0; - } else if (!strncmp(mem, "md5", 3) || !strncmp(mem, "1", 1)) { - td->verify = VERIFY_MD5; - return 0; - } else if (!strncmp(mem, "crc32", 5)) { - td->verify = VERIFY_CRC32; - return 0; - } - - fprintf(stderr, "bad verify type: %s\n", mem); - return 1; -} - -static int str_mem_cb(struct thread_data *td, char *mem) -{ - if (!strncmp(mem, "malloc", 6)) { - td->mem_type = MEM_MALLOC; - return 0; - } else if (!strncmp(mem, "shm", 3)) { - td->mem_type = MEM_SHM; - return 0; - } else if (!strncmp(mem, "mmap", 4)) { - td->mem_type = MEM_MMAP; - return 0; - } - - fprintf(stderr, "bad mem type: %s\n", mem); - return 1; -} - -static int str_ioengine_cb(struct thread_data *td, char *str) -{ - if (!strncmp(str, "linuxaio", 8) || !strncmp(str, "aio", 3) || - !strncmp(str, "libaio", 6)) { - strcpy(td->io_engine_name, "libaio"); - td->io_engine = FIO_LIBAIO; - return 0; - } else if (!strncmp(str, "posixaio", 8)) { - strcpy(td->io_engine_name, "posixaio"); - td->io_engine = FIO_POSIXAIO; - return 0; - } else if (!strncmp(str, "sync", 4)) { - strcpy(td->io_engine_name, "sync"); - td->io_engine = FIO_SYNCIO; - return 0; - } else if (!strncmp(str, "mmap", 4)) { - strcpy(td->io_engine_name, "mmap"); - td->io_engine = FIO_MMAPIO; - return 0; - } else if (!strncmp(str, "sgio", 4)) { - strcpy(td->io_engine_name, "sgio"); - td->io_engine = FIO_SGIO; - return 0; - } else if (!strncmp(str, "splice", 6)) { - strcpy(td->io_engine_name, "splice"); - td->io_engine = FIO_SPLICEIO; - return 0; - } - - fprintf(stderr, "bad ioengine type: %s\n", str); - return 1; -} - -static int str_iolog_cb(struct thread_data *td, char *file) -{ - td->iolog_file = strdup(file); - return 0; -} - -static int str_prerun_cb(struct thread_data *td, char *file) -{ - td->exec_prerun = strdup(file); - return 0; -} - -static int str_postrun_cb(struct thread_data *td, char *file) -{ - td->exec_postrun = strdup(file); - return 0; -} - -static int str_iosched_cb(struct thread_data *td, char *file) -{ - td->ioscheduler = strdup(file); - return 0; -} - -int parse_jobs_ini(char *file) -{ - unsigned int prioclass, prio, cpu, global, il; - unsigned long long ull; - unsigned long ul1, ul2; - struct thread_data *td; - char *string, *name, *tmpbuf; - fpos_t off; - FILE *f; - char *p; - - f = fopen(file, "r"); - if (!f) { - perror("fopen job file"); - return 1; - } - - string = malloc(4096); - name = malloc(256); - tmpbuf = malloc(4096); - - while ((p = fgets(string, 4096, f)) != NULL) { - if (is_empty_or_comment(p)) - continue; - if (sscanf(p, "[%s]", name) != 1) - continue; - - global = !strncmp(name, "global", 6); - - name[strlen(name) - 1] = '\0'; - - td = get_new_job(global, &def_thread); - if (!td) - return 1; - - fgetpos(f, &off); - while ((p = fgets(string, 4096, f)) != NULL) { - if (is_empty_or_comment(p)) - continue; - if (strstr(p, "[")) - break; - strip_blank_front(&p); - strip_blank_end(p); - - if (!check_int(p, "prio", &prio)) { -#ifndef FIO_HAVE_IOPRIO - fprintf(stderr, "io priorities not available\n"); - return 1; -#endif - td->ioprio |= prio; - fgetpos(f, &off); - continue; - } - if (!check_int(p, "prioclass", &prioclass)) { -#ifndef FIO_HAVE_IOPRIO - fprintf(stderr, "io priorities not available\n"); - return 1; -#endif - td->ioprio |= prioclass << IOPRIO_CLASS_SHIFT; - fgetpos(f, &off); - continue; - } - if (!check_int(p, "direct", &td->odirect)) { - fgetpos(f, &off); - continue; - } - if (!check_int(p, "rate", &td->rate)) { - fgetpos(f, &off); - continue; - } - if (!check_int(p, "ratemin", &td->ratemin)) { - fgetpos(f, &off); - continue; - } - if (!check_int(p, "ratecycle", &td->ratecycle)) { - fgetpos(f, &off); - continue; - } - if (!check_int(p, "thinktime", &td->thinktime)) { - fgetpos(f, &off); - continue; - } - if (!check_int(p, "cpumask", &cpu)) { -#ifndef FIO_HAVE_CPU_AFFINITY - fprintf(stderr, "cpu affinity not available\n"); - return 1; -#endif - fill_cpu_mask(td->cpumask, cpu); - fgetpos(f, &off); - continue; - } - if (!check_int(p, "fsync", &td->fsync_blocks)) { - fgetpos(f, &off); - td->end_fsync = 1; - continue; - } - if (!check_int(p, "startdelay", &td->start_delay)) { - fgetpos(f, &off); - continue; - } - if (!check_int(p, "timeout", &td->timeout)) { - fgetpos(f, &off); - continue; - } - if (!check_int(p, "invalidate",&td->invalidate_cache)) { - fgetpos(f, &off); - continue; - } - if (!check_int(p, "iodepth", &td->iodepth)) { - fgetpos(f, &off); - continue; - } - if (!check_int(p, "sync", &td->sync_io)) { - fgetpos(f, &off); - continue; - } - if (!check_int(p, "bwavgtime", &td->bw_avg_time)) { - fgetpos(f, &off); - continue; - } - if (!check_int(p, "create_serialize", &td->create_serialize)) { - fgetpos(f, &off); - continue; - } - if (!check_int(p, "create_fsync", &td->create_fsync)) { - fgetpos(f, &off); - continue; - } - if (!check_int(p, "end_fsync", &td->end_fsync)) { - fgetpos(f, &off); - continue; - } - if (!check_int(p, "loops", &td->loops)) { - fgetpos(f, &off); - continue; - } - if (!check_int(p, "numjobs", &td->numjobs)) { - fgetpos(f, &off); - continue; - } - if (!check_int(p, "overwrite", &td->overwrite)) { - fgetpos(f, &off); - continue; - } - if (!check_int(p, "rwmixcycle", &td->rwmixcycle)) { - fgetpos(f, &off); - continue; - } - if (!check_int(p, "rwmixread", &il)) { - if (il > 100) - il = 100; - td->rwmixread = il; - fgetpos(f, &off); - continue; - } - if (!check_int(p, "rwmixwrite", &il)) { - if (il > 100) - il = 100; - td->rwmixread = 100 - il; - fgetpos(f, &off); - continue; - } - if (!check_int(p, "nice", &td->nice)) { - fgetpos(f, &off); - continue; - } - if (!check_range(p, "bsrange", &ul1, &ul2)) { - if (ul1 > ul2) { - td->max_bs = ul1; - td->min_bs = ul2; - } else { - td->max_bs = ul2; - td->min_bs = ul1; - } - fgetpos(f, &off); - continue; - } - if (!check_strcnv(p, "bs", &ull)) { - td->bs = ull; - fgetpos(f, &off); - continue; - } - if (!check_strcnv(p, "size", &td->file_size)) { - fgetpos(f, &off); - continue; - } - if (!check_strcnv(p, "offset", &td->file_offset)) { - fgetpos(f, &off); - continue; - } - if (!check_strcnv(p, "zonesize", &td->zone_size)) { - fgetpos(f, &off); - continue; - } - if (!check_strcnv(p, "zoneskip", &td->zone_skip)) { - fgetpos(f, &off); - continue; - } - if (!check_strcnv(p, "lockmem", &mlock_size)) { - fgetpos(f, &off); - continue; - } - if (!check_strstore(p, "directory", tmpbuf)) { - td->directory = strdup(tmpbuf); - fgetpos(f, &off); - continue; - } - if (!check_str(p, "mem", str_mem_cb, td)) { - fgetpos(f, &off); - continue; - } - if (!check_str(p, "verify", str_verify_cb, td)) { - fgetpos(f, &off); - continue; - } - if (!check_str(p, "rw", str_rw_cb, td)) { - fgetpos(f, &off); - continue; - } - if (!check_str(p, "ioengine", str_ioengine_cb, td)) { - fgetpos(f, &off); - continue; - } - if (!check_strset(p, "create")) { - td->create_file = 1; - fgetpos(f, &off); - continue; - } - if (!check_strset(p, "exitall")) { - exitall_on_terminate = 1; - fgetpos(f, &off); - continue; - } - if (!check_strset(p, "stonewall")) { - td->stonewall = 1; - fgetpos(f, &off); - continue; - } - if (!check_strset(p, "thread")) { - td->use_thread = 1; - fgetpos(f, &off); - continue; - } - if (!check_str(p, "iolog", str_iolog_cb, td)) { - td->read_iolog = 1; - td->write_iolog = 0; - fgetpos(f, &off); - continue; - } - if (!td->read_iolog && - !check_str(p, "write_iolog", str_iolog_cb, td)) { - td->write_iolog = 1; - fgetpos(f, &off); - continue; - } - if (!check_str(p, "exec_prerun", str_prerun_cb, td)) { - fgetpos(f, &off); - continue; - } - if (!check_str(p, "exec_postrun", str_postrun_cb, td)) { - fgetpos(f, &off); - continue; - } - if (!check_str(p, "ioscheduler", str_iosched_cb, td)) { - fgetpos(f, &off); - continue; - } - - printf("Client%d: bad option %s\n",td->thread_number,p); - return 1; - } - fsetpos(f, &off); - - if (add_job(td, name, 0)) - return 1; - } - - free(string); - free(name); - free(tmpbuf); - fclose(f); - return 0; -} - -static int fill_def_thread(void) -{ - memset(&def_thread, 0, sizeof(def_thread)); - - if (fio_getaffinity(getpid(), &def_thread.cpumask) == -1) { - perror("sched_getaffinity"); - return 1; - } - - /* - * fill globals - */ - def_thread.ddir = DDIR_READ; - def_thread.iomix = 0; - def_thread.bs = DEF_BS; - def_thread.min_bs = -1; - def_thread.max_bs = -1; - def_thread.io_engine = DEF_IO_ENGINE; - strcpy(def_thread.io_engine_name, DEF_IO_ENGINE_NAME); - def_thread.odirect = DEF_ODIRECT; - def_thread.ratecycle = DEF_RATE_CYCLE; - def_thread.sequential = DEF_SEQUENTIAL; - def_thread.timeout = DEF_TIMEOUT; - def_thread.create_file = DEF_CREATE; - def_thread.overwrite = DEF_OVERWRITE; - def_thread.invalidate_cache = DEF_INVALIDATE; - def_thread.sync_io = DEF_SYNCIO; - def_thread.mem_type = MEM_MALLOC; - def_thread.bw_avg_time = DEF_BWAVGTIME; - def_thread.create_serialize = DEF_CREATE_SER; - def_thread.create_fsync = DEF_CREATE_FSYNC; - def_thread.loops = DEF_LOOPS; - def_thread.verify = DEF_VERIFY; - def_thread.stonewall = DEF_STONEWALL; - def_thread.numjobs = DEF_NUMJOBS; - def_thread.use_thread = DEF_USE_THREAD; - def_thread.rwmixcycle = DEF_RWMIX_CYCLE; - def_thread.rwmixread = DEF_RWMIX_READ; - def_thread.nice = DEF_NICE; -#ifdef FIO_HAVE_DISK_UTIL - def_thread.do_disk_util = 1; -#endif - - return 0; -} - -static void usage(char *name) -{ - printf("%s\n", fio_version_string); - printf("\t-s IO is sequential\n"); - printf("\t-b Block size in KiB for each IO\n"); - printf("\t-t Runtime in seconds\n"); - printf("\t-R Exit all threads on failure to meet rate goal\n"); - printf("\t-o Use O_DIRECT\n"); - printf("\t-l Generate per-job latency logs\n"); - printf("\t-w Generate per-job bandwidth logs\n"); - printf("\t-f Job file (Required)\n"); - printf("\t-v Print version info and exit\n"); -} - -static void parse_cmd_line(int argc, char *argv[]) -{ - int c; - - while ((c = getopt(argc, argv, "s:b:t:r:R:o:f:lwvh")) != EOF) { - switch (c) { - case 's': - def_thread.sequential = !!atoi(optarg); - break; - case 'b': - def_thread.bs = atoi(optarg); - def_thread.bs <<= 10; - if (!def_thread.bs) { - printf("bad block size\n"); - def_thread.bs = DEF_BS; - } - break; - case 't': - def_thread.timeout = atoi(optarg); - break; - case 'r': - repeatable = !!atoi(optarg); - break; - case 'R': - rate_quit = !!atoi(optarg); - break; - case 'o': - def_thread.odirect = !!atoi(optarg); - break; - case 'f': - ini_file = strdup(optarg); - break; - case 'l': - write_lat_log = 1; - break; - case 'w': - write_bw_log = 1; - break; - case 'h': - usage(argv[0]); - exit(0); - case 'v': - printf("%s\n", fio_version_string); - exit(0); - } - } - - if (!ini_file && argc > 1 && argv[argc - 1][0] != '-') - ini_file = strdup(argv[argc - 1]); -} - -static void free_shm(void) -{ - struct shmid_ds sbuf; - - if (threads) { - shmdt(threads); - threads = NULL; - shmctl(shm_id, IPC_RMID, &sbuf); - } -} - -static int setup_thread_area(void) -{ - /* - * 1024 is too much on some machines, scale max_jobs if - * we get a failure that looks like too large a shm segment - */ - do { - int s = max_jobs * sizeof(struct thread_data); - - shm_id = shmget(0, s, IPC_CREAT | 0600); - if (shm_id != -1) - break; - if (errno != EINVAL) { - perror("shmget"); - break; - } - - max_jobs >>= 1; - } while (max_jobs); - - if (shm_id == -1) - return 1; - - threads = shmat(shm_id, NULL, 0); - if (threads == (void *) -1) { - perror("shmat"); - return 1; - } - - atexit(free_shm); - return 0; -} - -int parse_options(int argc, char *argv[]) -{ - if (setup_thread_area()) - return 1; - if (fill_def_thread()) - return 1; - - parse_cmd_line(argc, argv); - - if (!ini_file) { - printf("Need job file\n"); - usage(argv[0]); - return 1; - } - - if (parse_jobs_ini(ini_file)) { - usage(argv[0]); - return 1; - } - - return 0; -} diff --git a/fio-io.c b/fio-io.c deleted file mode 100644 index 7b1c1bd3..00000000 --- a/fio-io.c +++ /dev/null @@ -1,919 +0,0 @@ -/* - * The io parts of the fio tool, includes workers for sync and mmap'ed - * io, as well as both posix and linux libaio support. - * - * sync io is implemented on top of aio. - * - * This is not really specific to fio, if the get_io_u/put_io_u and - * structures was pulled into this as well it would be a perfectly - * generic io engine that could be used for other projects. - * - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include "fio.h" -#include "os.h" - -#ifdef FIO_HAVE_LIBAIO - -#define ev_to_iou(ev) (struct io_u *) ((unsigned long) (ev)->obj) - -static int fio_io_sync(struct thread_data *td) -{ - return fsync(td->fd); -} - -static int fill_timespec(struct timespec *ts) -{ -#ifdef _POSIX_TIMERS - if (!clock_gettime(CLOCK_MONOTONIC, ts)) - return 0; - - perror("clock_gettime"); -#endif - return 1; -} - -static unsigned long long ts_utime_since_now(struct timespec *t) -{ - long long sec, nsec; - struct timespec now; - - if (fill_timespec(&now)) - return 0; - - sec = now.tv_sec - t->tv_sec; - nsec = now.tv_nsec - t->tv_nsec; - if (sec > 0 && nsec < 0) { - sec--; - nsec += 1000000000; - } - - sec *= 1000000; - nsec /= 1000; - return sec + nsec; -} - -struct libaio_data { - io_context_t aio_ctx; - struct io_event *aio_events; -}; - -static int fio_libaio_io_prep(struct thread_data *td, struct io_u *io_u) -{ - if (io_u->ddir == DDIR_READ) - io_prep_pread(&io_u->iocb, td->fd, io_u->buf, io_u->buflen, io_u->offset); - else - io_prep_pwrite(&io_u->iocb, td->fd, io_u->buf, io_u->buflen, io_u->offset); - - return 0; -} - -static struct io_u *fio_libaio_event(struct thread_data *td, int event) -{ - struct libaio_data *ld = td->io_data; - - return ev_to_iou(ld->aio_events + event); -} - -static int fio_libaio_getevents(struct thread_data *td, int min, int max, - struct timespec *t) -{ - struct libaio_data *ld = td->io_data; - int r; - - do { - r = io_getevents(ld->aio_ctx, min, max, ld->aio_events, t); - if (r == -EAGAIN) { - usleep(100); - continue; - } else if (r == -EINTR) - continue; - else - break; - } while (1); - - return r; -} - -static int fio_libaio_queue(struct thread_data *td, struct io_u *io_u) -{ - struct libaio_data *ld = td->io_data; - struct iocb *iocb = &io_u->iocb; - int ret; - - do { - ret = io_submit(ld->aio_ctx, 1, &iocb); - if (ret == 1) - return 0; - else if (ret == -EAGAIN) - usleep(100); - else if (ret == -EINTR) - continue; - else - break; - } while (1); - - return ret; - -} - -static int fio_libaio_cancel(struct thread_data *td, struct io_u *io_u) -{ - struct libaio_data *ld = td->io_data; - - return io_cancel(ld->aio_ctx, &io_u->iocb, ld->aio_events); -} - -static void fio_libaio_cleanup(struct thread_data *td) -{ - struct libaio_data *ld = td->io_data; - - if (ld) { - io_destroy(ld->aio_ctx); - if (ld->aio_events) - free(ld->aio_events); - - free(ld); - td->io_data = NULL; - } -} - -int fio_libaio_init(struct thread_data *td) -{ - struct libaio_data *ld = malloc(sizeof(*ld)); - - memset(ld, 0, sizeof(*ld)); - if (io_queue_init(td->iodepth, &ld->aio_ctx)) { - td_verror(td, errno); - return 1; - } - - td->io_prep = fio_libaio_io_prep; - td->io_queue = fio_libaio_queue; - td->io_getevents = fio_libaio_getevents; - td->io_event = fio_libaio_event; - td->io_cancel = fio_libaio_cancel; - td->io_cleanup = fio_libaio_cleanup; - td->io_sync = fio_io_sync; - - ld->aio_events = malloc(td->iodepth * sizeof(struct io_event)); - td->io_data = ld; - return 0; -} - -#else /* FIO_HAVE_LIBAIO */ - -int fio_libaio_init(struct thread_data *td) -{ - return EINVAL; -} - -#endif /* FIO_HAVE_LIBAIO */ - -#ifdef FIO_HAVE_POSIXAIO - -struct posixaio_data { - struct io_u **aio_events; -}; - -static int fio_posixaio_cancel(struct thread_data *td, struct io_u *io_u) -{ - int r = aio_cancel(td->fd, &io_u->aiocb); - - if (r == 1 || r == AIO_CANCELED) - return 0; - - return 1; -} - -static int fio_posixaio_prep(struct thread_data *td, struct io_u *io_u) -{ - struct aiocb *aiocb = &io_u->aiocb; - - aiocb->aio_fildes = td->fd; - aiocb->aio_buf = io_u->buf; - aiocb->aio_nbytes = io_u->buflen; - aiocb->aio_offset = io_u->offset; - - io_u->seen = 0; - return 0; -} - -static int fio_posixaio_getevents(struct thread_data *td, int min, int max, - struct timespec *t) -{ - struct posixaio_data *pd = td->io_data; - struct list_head *entry; - struct timespec start; - int r, have_timeout = 0; - - if (t && !fill_timespec(&start)) - have_timeout = 1; - - r = 0; -restart: - list_for_each(entry, &td->io_u_busylist) { - struct io_u *io_u = list_entry(entry, struct io_u, list); - int err; - - if (io_u->seen) - continue; - - err = aio_error(&io_u->aiocb); - switch (err) { - default: - io_u->error = err; - case ECANCELED: - case 0: - pd->aio_events[r++] = io_u; - io_u->seen = 1; - break; - case EINPROGRESS: - break; - } - - if (r >= max) - break; - } - - if (r >= min) - return r; - - if (have_timeout) { - unsigned long long usec; - - usec = (t->tv_sec * 1000000) + (t->tv_nsec / 1000); - if (ts_utime_since_now(&start) > usec) - return r; - } - - /* - * hrmpf, we need to wait for more. we should use aio_suspend, for - * now just sleep a little and recheck status of busy-and-not-seen - */ - usleep(1000); - goto restart; -} - -static struct io_u *fio_posixaio_event(struct thread_data *td, int event) -{ - struct posixaio_data *pd = td->io_data; - - return pd->aio_events[event]; -} - -static int fio_posixaio_queue(struct thread_data *td, struct io_u *io_u) -{ - struct aiocb *aiocb = &io_u->aiocb; - int ret; - - if (io_u->ddir == DDIR_READ) - ret = aio_read(aiocb); - else - ret = aio_write(aiocb); - - if (ret) - io_u->error = errno; - - return io_u->error; -} - -static void fio_posixaio_cleanup(struct thread_data *td) -{ - struct posixaio_data *pd = td->io_data; - - if (pd) { - free(pd->aio_events); - free(pd); - td->io_data = NULL; - } -} - -int fio_posixaio_init(struct thread_data *td) -{ - struct posixaio_data *pd = malloc(sizeof(*pd)); - - pd->aio_events = malloc(td->iodepth * sizeof(struct io_u *)); - - td->io_prep = fio_posixaio_prep; - td->io_queue = fio_posixaio_queue; - td->io_getevents = fio_posixaio_getevents; - td->io_event = fio_posixaio_event; - td->io_cancel = fio_posixaio_cancel; - td->io_cleanup = fio_posixaio_cleanup; - td->io_sync = fio_io_sync; - - td->io_data = pd; - return 0; -} - -#else /* FIO_HAVE_POSIXAIO */ - -int fio_posixaio_init(struct thread_data *td) -{ - return EINVAL; -} - -#endif /* FIO_HAVE_POSIXAIO */ - -struct syncio_data { - struct io_u *last_io_u; -}; - -static int fio_syncio_getevents(struct thread_data *td, int min, int max, - struct timespec *t) -{ - assert(max <= 1); - - /* - * we can only have one finished io_u for sync io, since the depth - * is always 1 - */ - if (list_empty(&td->io_u_busylist)) - return 0; - - return 1; -} - -static struct io_u *fio_syncio_event(struct thread_data *td, int event) -{ - struct syncio_data *sd = td->io_data; - - assert(event == 0); - - return sd->last_io_u; -} - -static int fio_syncio_prep(struct thread_data *td, struct io_u *io_u) -{ - if (lseek(td->fd, io_u->offset, SEEK_SET) == -1) { - td_verror(td, errno); - return 1; - } - - return 0; -} - -static int fio_syncio_queue(struct thread_data *td, struct io_u *io_u) -{ - struct syncio_data *sd = td->io_data; - int ret; - - if (io_u->ddir == DDIR_READ) - ret = read(td->fd, io_u->buf, io_u->buflen); - else - ret = write(td->fd, io_u->buf, io_u->buflen); - - if ((unsigned int) ret != io_u->buflen) { - if (ret > 0) { - io_u->resid = io_u->buflen - ret; - io_u->error = ENODATA; - } else - io_u->error = errno; - } - - if (!io_u->error) - sd->last_io_u = io_u; - - return io_u->error; -} - -static void fio_syncio_cleanup(struct thread_data *td) -{ - if (td->io_data) { - free(td->io_data); - td->io_data = NULL; - } -} - -int fio_syncio_init(struct thread_data *td) -{ - struct syncio_data *sd = malloc(sizeof(*sd)); - - td->io_prep = fio_syncio_prep; - td->io_queue = fio_syncio_queue; - td->io_getevents = fio_syncio_getevents; - td->io_event = fio_syncio_event; - td->io_cancel = NULL; - td->io_cleanup = fio_syncio_cleanup; - td->io_sync = fio_io_sync; - - sd->last_io_u = NULL; - td->io_data = sd; - return 0; -} - -static int fio_mmapio_queue(struct thread_data *td, struct io_u *io_u) -{ - unsigned long long real_off = io_u->offset - td->file_offset; - struct syncio_data *sd = td->io_data; - - if (io_u->ddir == DDIR_READ) - memcpy(io_u->buf, td->mmap + real_off, io_u->buflen); - else - memcpy(td->mmap + real_off, io_u->buf, io_u->buflen); - - /* - * not really direct, but should drop the pages from the cache - */ - if (td->odirect) { - if (msync(td->mmap + real_off, io_u->buflen, MS_SYNC) < 0) - io_u->error = errno; - if (madvise(td->mmap + real_off, io_u->buflen, MADV_DONTNEED) < 0) - io_u->error = errno; - } - - if (!io_u->error) - sd->last_io_u = io_u; - - return io_u->error; -} - -static int fio_mmapio_sync(struct thread_data *td) -{ - return msync(td->mmap, td->file_size, MS_SYNC); -} - -int fio_mmapio_init(struct thread_data *td) -{ - struct syncio_data *sd = malloc(sizeof(*sd)); - - td->io_prep = NULL; - td->io_queue = fio_mmapio_queue; - td->io_getevents = fio_syncio_getevents; - td->io_event = fio_syncio_event; - td->io_cancel = NULL; - td->io_cleanup = fio_syncio_cleanup; - td->io_sync = fio_mmapio_sync; - - sd->last_io_u = NULL; - td->io_data = sd; - return 0; -} - -#ifdef FIO_HAVE_SGIO - -struct sgio_cmd { - unsigned char cdb[10]; - int nr; -}; - -struct sgio_data { - struct sgio_cmd *cmds; - struct io_u **events; - unsigned int bs; -}; - -static void sgio_hdr_init(struct sgio_data *sd, struct sg_io_hdr *hdr, - struct io_u *io_u, int fs) -{ - struct sgio_cmd *sc = &sd->cmds[io_u->index]; - - memset(hdr, 0, sizeof(*hdr)); - memset(sc->cdb, 0, sizeof(sc->cdb)); - - hdr->interface_id = 'S'; - hdr->cmdp = sc->cdb; - hdr->cmd_len = sizeof(sc->cdb); - hdr->pack_id = io_u->index; - hdr->usr_ptr = io_u; - - if (fs) { - hdr->dxferp = io_u->buf; - hdr->dxfer_len = io_u->buflen; - } -} - -static int fio_sgio_getevents(struct thread_data *td, int min, int max, - struct timespec *t) -{ - struct sgio_data *sd = td->io_data; - struct pollfd pfd = { .fd = td->fd, .events = POLLIN }; - void *buf = malloc(max * sizeof(struct sg_io_hdr)); - int left = max, ret, events, i, r = 0, fl = 0; - - /* - * don't block for !events - */ - if (!min) { - fl = fcntl(td->fd, F_GETFL); - fcntl(td->fd, F_SETFL, fl | O_NONBLOCK); - } - - while (left) { - do { - if (!min) - break; - poll(&pfd, 1, -1); - if (pfd.revents & POLLIN) - break; - } while (1); - - ret = read(td->fd, buf, left * sizeof(struct sg_io_hdr)); - if (ret < 0) { - if (errno == EAGAIN) - break; - td_verror(td, errno); - r = -1; - break; - } else if (!ret) - break; - - events = ret / sizeof(struct sg_io_hdr); - left -= events; - r += events; - - for (i = 0; i < events; i++) { - struct sg_io_hdr *hdr = (struct sg_io_hdr *) buf + i; - - sd->events[i] = hdr->usr_ptr; - } - } - - if (!min) - fcntl(td->fd, F_SETFL, fl); - - free(buf); - return r; -} - -static int fio_sgio_ioctl_doio(struct thread_data *td, struct io_u *io_u) -{ - struct sgio_data *sd = td->io_data; - struct sg_io_hdr *hdr = &io_u->hdr; - - sd->events[0] = io_u; - - return ioctl(td->fd, SG_IO, hdr); -} - -static int fio_sgio_rw_doio(struct thread_data *td, struct io_u *io_u, int sync) -{ - struct sg_io_hdr *hdr = &io_u->hdr; - int ret; - - ret = write(td->fd, hdr, sizeof(*hdr)); - if (ret < 0) - return errno; - - if (sync) { - ret = read(td->fd, hdr, sizeof(*hdr)); - if (ret < 0) - return errno; - } - - return 0; -} - -static int fio_sgio_doio(struct thread_data *td, struct io_u *io_u, int sync) -{ - if (td->filetype == FIO_TYPE_BD) - return fio_sgio_ioctl_doio(td, io_u); - - return fio_sgio_rw_doio(td, io_u, sync); -} - -static int fio_sgio_sync(struct thread_data *td) -{ - struct sgio_data *sd = td->io_data; - struct sg_io_hdr *hdr; - struct io_u *io_u; - int ret; - - io_u = __get_io_u(td); - if (!io_u) - return ENOMEM; - - hdr = &io_u->hdr; - sgio_hdr_init(sd, hdr, io_u, 0); - hdr->dxfer_direction = SG_DXFER_NONE; - - hdr->cmdp[0] = 0x35; - - ret = fio_sgio_doio(td, io_u, 1); - put_io_u(td, io_u); - return ret; -} - -static int fio_sgio_prep(struct thread_data *td, struct io_u *io_u) -{ - struct sg_io_hdr *hdr = &io_u->hdr; - struct sgio_data *sd = td->io_data; - int nr_blocks, lba; - - if (io_u->buflen & (sd->bs - 1)) { - fprintf(stderr, "read/write not sector aligned\n"); - return EINVAL; - } - - sgio_hdr_init(sd, hdr, io_u, 1); - - if (io_u->ddir == DDIR_READ) { - hdr->dxfer_direction = SG_DXFER_FROM_DEV; - hdr->cmdp[0] = 0x28; - } else { - hdr->dxfer_direction = SG_DXFER_TO_DEV; - hdr->cmdp[0] = 0x2a; - } - - nr_blocks = io_u->buflen / sd->bs; - lba = io_u->offset / sd->bs; - hdr->cmdp[2] = (lba >> 24) & 0xff; - hdr->cmdp[3] = (lba >> 16) & 0xff; - hdr->cmdp[4] = (lba >> 8) & 0xff; - hdr->cmdp[5] = lba & 0xff; - hdr->cmdp[7] = (nr_blocks >> 8) & 0xff; - hdr->cmdp[8] = nr_blocks & 0xff; - return 0; -} - -static int fio_sgio_queue(struct thread_data *td, struct io_u *io_u) -{ - struct sg_io_hdr *hdr = &io_u->hdr; - int ret; - - ret = fio_sgio_doio(td, io_u, 0); - - if (ret < 0) - io_u->error = errno; - else if (hdr->status) { - io_u->resid = hdr->resid; - io_u->error = EIO; - } - - return io_u->error; -} - -static struct io_u *fio_sgio_event(struct thread_data *td, int event) -{ - struct sgio_data *sd = td->io_data; - - return sd->events[event]; -} - -static int fio_sgio_get_bs(struct thread_data *td, unsigned int *bs) -{ - struct sgio_data *sd = td->io_data; - struct io_u *io_u; - struct sg_io_hdr *hdr; - unsigned char buf[8]; - int ret; - - io_u = __get_io_u(td); - assert(io_u); - - hdr = &io_u->hdr; - sgio_hdr_init(sd, hdr, io_u, 0); - memset(buf, 0, sizeof(buf)); - - hdr->cmdp[0] = 0x25; - hdr->dxfer_direction = SG_DXFER_FROM_DEV; - hdr->dxferp = buf; - hdr->dxfer_len = sizeof(buf); - - ret = fio_sgio_doio(td, io_u, 1); - if (ret) { - put_io_u(td, io_u); - return ret; - } - - *bs = (buf[4] << 24) | (buf[5] << 16) | (buf[6] << 8) | buf[7]; - put_io_u(td, io_u); - return 0; -} - -int fio_sgio_init(struct thread_data *td) -{ - struct sgio_data *sd; - unsigned int bs; - int ret; - - sd = malloc(sizeof(*sd)); - sd->cmds = malloc(td->iodepth * sizeof(struct sgio_cmd)); - sd->events = malloc(td->iodepth * sizeof(struct io_u *)); - td->io_data = sd; - - if (td->filetype == FIO_TYPE_BD) { - if (ioctl(td->fd, BLKSSZGET, &bs) < 0) { - td_verror(td, errno); - return 1; - } - } else if (td->filetype == FIO_TYPE_CHAR) { - int version; - - if (ioctl(td->fd, SG_GET_VERSION_NUM, &version) < 0) { - td_verror(td, errno); - return 1; - } - - ret = fio_sgio_get_bs(td, &bs); - if (ret) - return ret; - } else { - fprintf(stderr, "ioengine sgio only works on block devices\n"); - return 1; - } - - sd->bs = bs; - - td->io_prep = fio_sgio_prep; - td->io_queue = fio_sgio_queue; - - if (td->filetype == FIO_TYPE_BD) - td->io_getevents = fio_syncio_getevents; - else - td->io_getevents = fio_sgio_getevents; - - td->io_event = fio_sgio_event; - td->io_cancel = NULL; - td->io_cleanup = fio_syncio_cleanup; - td->io_sync = fio_sgio_sync; - - /* - * we want to do it, regardless of whether odirect is set or not - */ - td->override_sync = 1; - return 0; -} - -#else /* FIO_HAVE_SGIO */ - -int fio_sgio_init(struct thread_data *td) -{ - return EINVAL; -} - -#endif /* FIO_HAVE_SGIO */ - -#ifdef FIO_HAVE_SPLICE -struct spliceio_data { - struct io_u *last_io_u; - int pipe[2]; -}; - -static struct io_u *fio_spliceio_event(struct thread_data *td, int event) -{ - struct spliceio_data *sd = td->io_data; - - assert(event == 0); - - return sd->last_io_u; -} - -/* - * For splice reading, we unfortunately cannot (yet) vmsplice the other way. - * So just splice the data from the file into the pipe, and use regular - * read to fill the buffer. Doesn't make a lot of sense, but... - */ -static int fio_splice_read(struct thread_data *td, struct io_u *io_u) -{ - struct spliceio_data *sd = td->io_data; - int ret, ret2, buflen; - off_t offset; - void *p; - - offset = io_u->offset; - buflen = io_u->buflen; - p = io_u->buf; - while (buflen) { - int this_len = buflen; - - if (this_len > SPLICE_DEF_SIZE) - this_len = SPLICE_DEF_SIZE; - - ret = splice(td->fd, &offset, sd->pipe[1], NULL, this_len, SPLICE_F_MORE); - if (ret < 0) { - if (errno == ENODATA || errno == EAGAIN) - continue; - - return errno; - } - - buflen -= ret; - - while (ret) { - ret2 = read(sd->pipe[0], p, ret); - if (ret2 < 0) - return errno; - - ret -= ret2; - p += ret2; - } - } - - return io_u->buflen; -} - -/* - * For splice writing, we can vmsplice our data buffer directly into a - * pipe and then splice that to a file. - */ -static int fio_splice_write(struct thread_data *td, struct io_u *io_u) -{ - struct spliceio_data *sd = td->io_data; - struct iovec iov[1] = { - { - .iov_base = io_u->buf, - .iov_len = io_u->buflen, - } - }; - struct pollfd pfd = { .fd = sd->pipe[1], .events = POLLOUT, }; - off_t off = io_u->offset; - int ret, ret2; - - while (iov[0].iov_len) { - if (poll(&pfd, 1, -1) < 0) - return errno; - - ret = vmsplice(sd->pipe[1], iov, 1, SPLICE_F_NONBLOCK); - if (ret < 0) - return errno; - - iov[0].iov_len -= ret; - iov[0].iov_base += ret; - - while (ret) { - ret2 = splice(sd->pipe[0], NULL, td->fd, &off, ret, 0); - if (ret2 < 0) - return errno; - - ret -= ret2; - } - } - - return io_u->buflen; -} - -static int fio_spliceio_queue(struct thread_data *td, struct io_u *io_u) -{ - struct spliceio_data *sd = td->io_data; - int ret; - - if (io_u->ddir == DDIR_READ) - ret = fio_splice_read(td, io_u); - else - ret = fio_splice_write(td, io_u); - - if ((unsigned int) ret != io_u->buflen) { - if (ret > 0) { - io_u->resid = io_u->buflen - ret; - io_u->error = ENODATA; - } else - io_u->error = errno; - } - - if (!io_u->error) - sd->last_io_u = io_u; - - return io_u->error; -} - -static void fio_spliceio_cleanup(struct thread_data *td) -{ - struct spliceio_data *sd = td->io_data; - - if (sd) { - close(sd->pipe[0]); - close(sd->pipe[1]); - free(sd); - td->io_data = NULL; - } -} - -int fio_spliceio_init(struct thread_data *td) -{ - struct spliceio_data *sd = malloc(sizeof(*sd)); - - td->io_queue = fio_spliceio_queue; - td->io_getevents = fio_syncio_getevents; - td->io_event = fio_spliceio_event; - td->io_cancel = NULL; - td->io_cleanup = fio_spliceio_cleanup; - td->io_sync = fio_io_sync; - - sd->last_io_u = NULL; - if (pipe(sd->pipe) < 0) { - td_verror(td, errno); - free(sd); - return 1; - } - - td->io_data = sd; - return 0; -} - -#else /* FIO_HAVE_SPLICE */ - -int fio_spliceio_init(struct thread_data *td) -{ - return EINVAL; -} - -#endif /* FIO_HAVE_SPLICE */ diff --git a/fio-log.c b/fio-log.c deleted file mode 100644 index 42aedf27..00000000 --- a/fio-log.c +++ /dev/null @@ -1,162 +0,0 @@ -#include -#include -#include "list.h" -#include "fio.h" - -void write_iolog_put(struct thread_data *td, struct io_u *io_u) -{ - fprintf(td->iolog_f, "%d,%llu,%u\n", io_u->ddir, io_u->offset, io_u->buflen); -} - -int read_iolog_get(struct thread_data *td, struct io_u *io_u) -{ - struct io_piece *ipo; - - if (!list_empty(&td->io_log_list)) { - ipo = list_entry(td->io_log_list.next, struct io_piece, list); - list_del(&ipo->list); - io_u->offset = ipo->offset; - io_u->buflen = ipo->len; - io_u->ddir = ipo->ddir; - free(ipo); - return 0; - } - - return 1; -} - -void prune_io_piece_log(struct thread_data *td) -{ - struct io_piece *ipo; - - while (!list_empty(&td->io_hist_list)) { - ipo = list_entry(td->io_hist_list.next, struct io_piece, list); - - list_del(&ipo->list); - free(ipo); - } -} - -/* - * log a succesful write, so we can unwind the log for verify - */ -void log_io_piece(struct thread_data *td, struct io_u *io_u) -{ - struct io_piece *ipo = malloc(sizeof(struct io_piece)); - struct list_head *entry; - - INIT_LIST_HEAD(&ipo->list); - ipo->offset = io_u->offset; - ipo->len = io_u->buflen; - - /* - * for random io where the writes extend the file, it will typically - * be laid out with the block scattered as written. it's faster to - * read them in in that order again, so don't sort - */ - if (td->sequential || !td->overwrite) { - list_add_tail(&ipo->list, &td->io_hist_list); - return; - } - - /* - * for random io, sort the list so verify will run faster - */ - entry = &td->io_hist_list; - while ((entry = entry->prev) != &td->io_hist_list) { - struct io_piece *__ipo = list_entry(entry, struct io_piece, list); - - if (__ipo->offset < ipo->offset) - break; - } - - list_add(&ipo->list, entry); -} - -void write_iolog_close(struct thread_data *td) -{ - fflush(td->iolog_f); - fclose(td->iolog_f); - free(td->iolog_buf); -} - -int init_iolog(struct thread_data *td) -{ - unsigned long long offset; - unsigned int bytes; - char *str, *p; - FILE *f; - int rw, i, reads, writes; - - if (!td->read_iolog && !td->write_iolog) - return 0; - - if (td->read_iolog) - f = fopen(td->iolog_file, "r"); - else - f = fopen(td->iolog_file, "w"); - - if (!f) { - perror("fopen iolog"); - printf("file %s, %d/%d\n", td->iolog_file, td->read_iolog, td->write_iolog); - return 1; - } - - /* - * That's it for writing, setup a log buffer and we're done. - */ - if (td->write_iolog) { - td->iolog_f = f; - td->iolog_buf = malloc(8192); - setvbuf(f, td->iolog_buf, _IOFBF, 8192); - return 0; - } - - /* - * Read in the read iolog and store it, reuse the infrastructure - * for doing verifications. - */ - str = malloc(4096); - reads = writes = i = 0; - while ((p = fgets(str, 4096, f)) != NULL) { - struct io_piece *ipo; - - if (sscanf(p, "%d,%llu,%u", &rw, &offset, &bytes) != 3) { - fprintf(stderr, "bad iolog: %s\n", p); - continue; - } - if (rw == DDIR_READ) - reads++; - else if (rw == DDIR_WRITE) - writes++; - else { - fprintf(stderr, "bad ddir: %d\n", rw); - continue; - } - - ipo = malloc(sizeof(*ipo)); - INIT_LIST_HEAD(&ipo->list); - ipo->offset = offset; - ipo->len = bytes; - if (bytes > td->max_bs) - td->max_bs = bytes; - ipo->ddir = rw; - list_add_tail(&ipo->list, &td->io_log_list); - i++; - } - - free(str); - fclose(f); - - if (!i) - return 1; - - if (reads && !writes) - td->ddir = DDIR_READ; - else if (!reads && writes) - td->ddir = DDIR_READ; - else - td->iomix = 1; - - return 0; -} diff --git a/fio-log.h b/fio-log.h deleted file mode 100644 index 99bb9f7f..00000000 --- a/fio-log.h +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef FIO_LOG_H -#define FIO_LOG_H - -extern int read_iolog_get(struct thread_data *, struct io_u *); -extern void write_iolog_put(struct thread_data *, struct io_u *); -extern int init_iolog(struct thread_data *td); -extern void log_io_piece(struct thread_data *, struct io_u *); -extern void prune_io_piece_log(struct thread_data *); -extern void write_iolog_close(struct thread_data *); - -#endif diff --git a/fio-stat.c b/fio-stat.c deleted file mode 100644 index 70c653db..00000000 --- a/fio-stat.c +++ /dev/null @@ -1,519 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include - -#include "fio.h" -#include "fio-time.h" - -static struct itimerval itimer; -static LIST_HEAD(disk_list); - -static int get_io_ticks(struct disk_util *du, struct disk_util_stat *dus) -{ - unsigned in_flight; - char line[256]; - FILE *f; - char *p; - - f = fopen(du->path, "r"); - if (!f) - return 1; - - p = fgets(line, sizeof(line), f); - if (!p) { - fclose(f); - return 1; - } - - if (sscanf(p, "%u %u %llu %u %u %u %llu %u %u %u %u\n", &dus->ios[0], &dus->merges[0], &dus->sectors[0], &dus->ticks[0], &dus->ios[1], &dus->merges[1], &dus->sectors[1], &dus->ticks[1], &in_flight, &dus->io_ticks, &dus->time_in_queue) != 11) { - fclose(f); - return 1; - } - - fclose(f); - return 0; -} - -static void update_io_tick_disk(struct disk_util *du) -{ - struct disk_util_stat __dus, *dus, *ldus; - struct timeval t; - - if (get_io_ticks(du, &__dus)) - return; - - dus = &du->dus; - ldus = &du->last_dus; - - dus->sectors[0] += (__dus.sectors[0] - ldus->sectors[0]); - dus->sectors[1] += (__dus.sectors[1] - ldus->sectors[1]); - dus->ios[0] += (__dus.ios[0] - ldus->ios[0]); - dus->ios[1] += (__dus.ios[1] - ldus->ios[1]); - dus->merges[0] += (__dus.merges[0] - ldus->merges[0]); - dus->merges[1] += (__dus.merges[1] - ldus->merges[1]); - dus->ticks[0] += (__dus.ticks[0] - ldus->ticks[0]); - dus->ticks[1] += (__dus.ticks[1] - ldus->ticks[1]); - dus->io_ticks += (__dus.io_ticks - ldus->io_ticks); - dus->time_in_queue += (__dus.time_in_queue - ldus->time_in_queue); - - gettimeofday(&t, NULL); - du->msec += mtime_since(&du->time, &t); - memcpy(&du->time, &t, sizeof(t)); - memcpy(ldus, &__dus, sizeof(__dus)); -} - -void update_io_ticks(void) -{ - struct list_head *entry; - struct disk_util *du; - - list_for_each(entry, &disk_list) { - du = list_entry(entry, struct disk_util, list); - update_io_tick_disk(du); - } -} - -static int disk_util_exists(dev_t dev) -{ - struct list_head *entry; - struct disk_util *du; - - list_for_each(entry, &disk_list) { - du = list_entry(entry, struct disk_util, list); - - if (du->dev == dev) - return 1; - } - - return 0; -} - -static void disk_util_add(dev_t dev, char *path) -{ - struct disk_util *du = malloc(sizeof(*du)); - - memset(du, 0, sizeof(*du)); - INIT_LIST_HEAD(&du->list); - sprintf(du->path, "%s/stat", path); - du->name = strdup(basename(path)); - du->dev = dev; - - gettimeofday(&du->time, NULL); - get_io_ticks(du, &du->last_dus); - - list_add_tail(&du->list, &disk_list); -} - -static int check_dev_match(dev_t dev, char *path) -{ - unsigned int major, minor; - char line[256], *p; - FILE *f; - - f = fopen(path, "r"); - if (!f) { - perror("open path"); - return 1; - } - - p = fgets(line, sizeof(line), f); - if (!p) { - fclose(f); - return 1; - } - - if (sscanf(p, "%u:%u", &major, &minor) != 2) { - fclose(f); - return 1; - } - - if (((major << 8) | minor) == dev) { - fclose(f); - return 0; - } - - fclose(f); - return 1; -} - -static int find_block_dir(dev_t dev, char *path) -{ - struct dirent *dir; - struct stat st; - int found = 0; - DIR *D; - - D = opendir(path); - if (!D) - return 0; - - while ((dir = readdir(D)) != NULL) { - char full_path[256]; - - if (!strcmp(dir->d_name, ".") || !strcmp(dir->d_name, "..")) - continue; - if (!strcmp(dir->d_name, "device")) - continue; - - sprintf(full_path, "%s/%s", path, dir->d_name); - - if (!strcmp(dir->d_name, "dev")) { - if (!check_dev_match(dev, full_path)) { - found = 1; - break; - } - } - - if (stat(full_path, &st) == -1) { - perror("stat"); - break; - } - - if (!S_ISDIR(st.st_mode) || S_ISLNK(st.st_mode)) - continue; - - found = find_block_dir(dev, full_path); - if (found) { - strcpy(path, full_path); - break; - } - } - - closedir(D); - return found; -} - -void init_disk_util(struct thread_data *td) -{ - struct stat st; - char foo[256], tmp[256]; - dev_t dev; - char *p; - - if (!td->do_disk_util) - return; - - if (!stat(td->file_name, &st)) { - if (S_ISBLK(st.st_mode)) - dev = st.st_rdev; - else - dev = st.st_dev; - } else { - /* - * must be a file, open "." in that path - */ - strcpy(foo, td->file_name); - p = dirname(foo); - if (stat(p, &st)) { - perror("disk util stat"); - return; - } - - dev = st.st_dev; - } - - if (disk_util_exists(dev)) - return; - - sprintf(foo, "/sys/block"); - if (!find_block_dir(dev, foo)) - return; - - /* - * If there's a ../queue/ directory there, we are inside a partition. - * Check if that is the case and jump back. For loop/md/dm etc we - * are already in the right spot. - */ - sprintf(tmp, "%s/../queue", foo); - if (!stat(tmp, &st)) { - p = dirname(foo); - sprintf(tmp, "%s/queue", p); - if (stat(tmp, &st)) { - fprintf(stderr, "unknown sysfs layout\n"); - return; - } - sprintf(foo, "%s", p); - } - - td->sysfs_root = strdup(foo); - disk_util_add(dev, foo); -} - -void disk_util_timer_arm(void) -{ - itimer.it_value.tv_sec = 0; - itimer.it_value.tv_usec = DISK_UTIL_MSEC * 1000; - setitimer(ITIMER_REAL, &itimer, NULL); -} - -void update_rusage_stat(struct thread_data *td) -{ - if (!(td->runtime[0] + td->runtime[1])) - return; - - getrusage(RUSAGE_SELF, &td->ru_end); - - td->usr_time += mtime_since(&td->ru_start.ru_utime, &td->ru_end.ru_utime); - td->sys_time += mtime_since(&td->ru_start.ru_stime, &td->ru_end.ru_stime); - td->ctx += td->ru_end.ru_nvcsw + td->ru_end.ru_nivcsw - (td->ru_start.ru_nvcsw + td->ru_start.ru_nivcsw); - - - memcpy(&td->ru_start, &td->ru_end, sizeof(td->ru_end)); -} - -static int calc_lat(struct io_stat *is, unsigned long *min, unsigned long *max, - double *mean, double *dev) -{ - double n; - - if (is->samples == 0) - return 0; - - *min = is->min_val; - *max = is->max_val; - - n = (double) is->samples; - *mean = (double) is->val / n; - *dev = sqrt(((double) is->val_sq - (*mean * *mean) / n) / (n - 1)); - if (!(*min + *max) && !(*mean + *dev)) - return 0; - - return 1; -} - -static void show_group_stats(struct group_run_stats *rs, int id) -{ - printf("\nRun status group %d (all jobs):\n", id); - - if (rs->max_run[DDIR_READ]) - printf(" READ: io=%lluMiB, aggrb=%llu, minb=%llu, maxb=%llu, mint=%llumsec, maxt=%llumsec\n", rs->io_kb[0] >> 10, rs->agg[0], rs->min_bw[0], rs->max_bw[0], rs->min_run[0], rs->max_run[0]); - if (rs->max_run[DDIR_WRITE]) - printf(" WRITE: io=%lluMiB, aggrb=%llu, minb=%llu, maxb=%llu, mint=%llumsec, maxt=%llumsec\n", rs->io_kb[1] >> 10, rs->agg[1], rs->min_bw[1], rs->max_bw[1], rs->min_run[1], rs->max_run[1]); -} - -static void show_disk_util(void) -{ - struct disk_util_stat *dus; - struct list_head *entry; - struct disk_util *du; - double util; - - printf("\nDisk stats (read/write):\n"); - - list_for_each(entry, &disk_list) { - du = list_entry(entry, struct disk_util, list); - dus = &du->dus; - - util = (double) 100 * du->dus.io_ticks / (double) du->msec; - if (util > 100.0) - util = 100.0; - - printf(" %s: ios=%u/%u, merge=%u/%u, ticks=%u/%u, in_queue=%u, util=%3.2f%%\n", du->name, dus->ios[0], dus->ios[1], dus->merges[0], dus->merges[1], dus->ticks[0], dus->ticks[1], dus->time_in_queue, util); - } -} - -static void show_ddir_status(struct thread_data *td, struct group_run_stats *rs, - int ddir) -{ - char *ddir_str[] = { "read ", "write" }; - unsigned long min, max; - unsigned long long bw; - double mean, dev; - - if (!td->runtime[ddir]) - return; - - bw = td->io_bytes[ddir] / td->runtime[ddir]; - printf(" %s: io=%6lluMiB, bw=%6lluKiB/s, runt=%6lumsec\n", ddir_str[ddir], td->io_bytes[ddir] >> 20, bw, td->runtime[ddir]); - - if (calc_lat(&td->slat_stat[ddir], &min, &max, &mean, &dev)) - printf(" slat (msec): min=%5lu, max=%5lu, avg=%5.02f, dev=%5.02f\n", min, max, mean, dev); - - if (calc_lat(&td->clat_stat[ddir], &min, &max, &mean, &dev)) - printf(" clat (msec): min=%5lu, max=%5lu, avg=%5.02f, dev=%5.02f\n", min, max, mean, dev); - - if (calc_lat(&td->bw_stat[ddir], &min, &max, &mean, &dev)) { - double p_of_agg; - - p_of_agg = mean * 100 / (double) rs->agg[ddir]; - printf(" bw (KiB/s) : min=%5lu, max=%5lu, per=%3.2f%%, avg=%5.02f, dev=%5.02f\n", min, max, p_of_agg, mean, dev); - } -} - -static void show_thread_status(struct thread_data *td, - struct group_run_stats *rs) -{ - double usr_cpu, sys_cpu; - - if (!(td->io_bytes[0] + td->io_bytes[1]) && !td->error) - return; - - printf("Client%d (groupid=%d): err=%2d:\n", td->thread_number, td->groupid, td->error); - - show_ddir_status(td, rs, td->ddir); - if (td->io_bytes[td->ddir ^ 1]) - show_ddir_status(td, rs, td->ddir ^ 1); - - if (td->runtime[0] + td->runtime[1]) { - double runt = td->runtime[0] + td->runtime[1]; - - usr_cpu = (double) td->usr_time * 100 / runt; - sys_cpu = (double) td->sys_time * 100 / runt; - } else { - usr_cpu = 0; - sys_cpu = 0; - } - - printf(" cpu : usr=%3.2f%%, sys=%3.2f%%, ctx=%lu\n", usr_cpu, sys_cpu, td->ctx); -} - -void show_run_stats(void) -{ - struct group_run_stats *runstats, *rs; - struct thread_data *td; - int i; - - runstats = malloc(sizeof(struct group_run_stats) * (groupid + 1)); - - for (i = 0; i < groupid + 1; i++) { - rs = &runstats[i]; - - memset(rs, 0, sizeof(*rs)); - rs->min_bw[0] = rs->min_run[0] = ~0UL; - rs->min_bw[1] = rs->min_run[1] = ~0UL; - } - - for (i = 0; i < thread_number; i++) { - unsigned long long rbw, wbw; - - td = &threads[i]; - - if (td->error) { - printf("Client%d: %s\n", td->thread_number, td->verror); - continue; - } - - rs = &runstats[td->groupid]; - - if (td->runtime[0] < rs->min_run[0] || !rs->min_run[0]) - rs->min_run[0] = td->runtime[0]; - if (td->runtime[0] > rs->max_run[0]) - rs->max_run[0] = td->runtime[0]; - if (td->runtime[1] < rs->min_run[1] || !rs->min_run[1]) - rs->min_run[1] = td->runtime[1]; - if (td->runtime[1] > rs->max_run[1]) - rs->max_run[1] = td->runtime[1]; - - rbw = wbw = 0; - if (td->runtime[0]) - rbw = td->io_bytes[0] / (unsigned long long) td->runtime[0]; - if (td->runtime[1]) - wbw = td->io_bytes[1] / (unsigned long long) td->runtime[1]; - - if (rbw < rs->min_bw[0]) - rs->min_bw[0] = rbw; - if (wbw < rs->min_bw[1]) - rs->min_bw[1] = wbw; - if (rbw > rs->max_bw[0]) - rs->max_bw[0] = rbw; - if (wbw > rs->max_bw[1]) - rs->max_bw[1] = wbw; - - rs->io_kb[0] += td->io_bytes[0] >> 10; - rs->io_kb[1] += td->io_bytes[1] >> 10; - } - - for (i = 0; i < groupid + 1; i++) { - rs = &runstats[i]; - - if (rs->max_run[0]) - rs->agg[0] = (rs->io_kb[0]*1024) / rs->max_run[0]; - if (rs->max_run[1]) - rs->agg[1] = (rs->io_kb[1]*1024) / rs->max_run[1]; - } - - /* - * don't overwrite last signal output - */ - printf("\n"); - - for (i = 0; i < thread_number; i++) { - td = &threads[i]; - rs = &runstats[td->groupid]; - - show_thread_status(td, rs); - } - - for (i = 0; i < groupid + 1; i++) - show_group_stats(&runstats[i], i); - - show_disk_util(); -} - -static inline void add_stat_sample(struct io_stat *is, unsigned long val) -{ - if (val > is->max_val) - is->max_val = val; - if (val < is->min_val) - is->min_val = val; - - is->val += val; - is->val_sq += val * val; - is->samples++; -} - -static void add_log_sample(struct thread_data *td, struct io_log *iolog, - unsigned long val, int ddir) -{ - if (iolog->nr_samples == iolog->max_samples) { - int new_size = sizeof(struct io_sample) * iolog->max_samples*2; - - iolog->log = realloc(iolog->log, new_size); - iolog->max_samples <<= 1; - } - - iolog->log[iolog->nr_samples].val = val; - iolog->log[iolog->nr_samples].time = mtime_since_now(&td->epoch); - iolog->log[iolog->nr_samples].ddir = ddir; - iolog->nr_samples++; -} - -void add_clat_sample(struct thread_data *td, int ddir, unsigned long msec) -{ - add_stat_sample(&td->clat_stat[ddir], msec); - - if (td->clat_log) - add_log_sample(td, td->clat_log, msec, ddir); -} - -void add_slat_sample(struct thread_data *td, int ddir, unsigned long msec) -{ - add_stat_sample(&td->slat_stat[ddir], msec); - - if (td->slat_log) - add_log_sample(td, td->slat_log, msec, ddir); -} - -void add_bw_sample(struct thread_data *td, int ddir) -{ - unsigned long spent = mtime_since_now(&td->stat_sample_time[ddir]); - unsigned long rate; - - if (spent < td->bw_avg_time) - return; - - rate = (td->this_io_bytes[ddir] - td->stat_io_bytes[ddir]) / spent; - add_stat_sample(&td->bw_stat[ddir], rate); - - if (td->bw_log) - add_log_sample(td, td->bw_log, rate, ddir); - - gettimeofday(&td->stat_sample_time[ddir], NULL); - td->stat_io_bytes[ddir] = td->this_io_bytes[ddir]; -} - - diff --git a/fio-stat.h b/fio-stat.h deleted file mode 100644 index b1ce6777..00000000 --- a/fio-stat.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef FIO_STAT_H -#define FIO_STAT_H - -extern void add_clat_sample(struct thread_data *, int, unsigned long); -extern void add_slat_sample(struct thread_data *, int, unsigned long); -extern void add_bw_sample(struct thread_data *, int); -extern void show_run_stats(void); -extern void init_disk_util(struct thread_data *); -extern void update_rusage_stat(struct thread_data *); -extern void update_io_ticks(void); -extern void disk_util_timer_arm(void); -#endif diff --git a/fio-time.c b/fio-time.c deleted file mode 100644 index 52462633..00000000 --- a/fio-time.c +++ /dev/null @@ -1,122 +0,0 @@ -#include -#include - -#include "fio.h" - -unsigned long utime_since(struct timeval *s, struct timeval *e) -{ - double sec, usec; - - sec = e->tv_sec - s->tv_sec; - usec = e->tv_usec - s->tv_usec; - if (sec > 0 && usec < 0) { - sec--; - usec += 1000000; - } - - sec *= (double) 1000000; - - return sec + usec; -} - -static unsigned long utime_since_now(struct timeval *s) -{ - struct timeval t; - - gettimeofday(&t, NULL); - return utime_since(s, &t); -} - -unsigned long mtime_since(struct timeval *s, struct timeval *e) -{ - double sec, usec; - - sec = e->tv_sec - s->tv_sec; - usec = e->tv_usec - s->tv_usec; - if (sec > 0 && usec < 0) { - sec--; - usec += 1000000; - } - - sec *= (double) 1000; - usec /= (double) 1000; - - return sec + usec; -} - -unsigned long mtime_since_now(struct timeval *s) -{ - struct timeval t; - - gettimeofday(&t, NULL); - return mtime_since(s, &t); -} - -unsigned long time_since_now(struct timeval *s) -{ - return mtime_since_now(s) / 1000; -} - -/* - * busy looping version for the last few usec - */ -static void __usec_sleep(unsigned int usec) -{ - struct timeval start; - - gettimeofday(&start, NULL); - while (utime_since_now(&start) < usec) - nop; -} - -void usec_sleep(struct thread_data *td, unsigned long usec) -{ - struct timespec req, rem; - - req.tv_sec = usec / 1000000; - req.tv_nsec = usec * 1000 - req.tv_sec * 1000000; - - do { - if (usec < 5000) { - __usec_sleep(usec); - break; - } - - rem.tv_sec = rem.tv_nsec = 0; - if (nanosleep(&req, &rem) < 0) - break; - - if ((rem.tv_sec + rem.tv_nsec) == 0) - break; - - req.tv_nsec = rem.tv_nsec; - req.tv_sec = rem.tv_sec; - - usec = rem.tv_sec * 1000000 + rem.tv_nsec / 1000; - } while (!td->terminate); -} - -void rate_throttle(struct thread_data *td, unsigned long time_spent, - unsigned int bytes) -{ - unsigned long usec_cycle; - - if (!td->rate) - return; - - usec_cycle = td->rate_usec_cycle * (bytes / td->min_bs); - - if (time_spent < usec_cycle) { - unsigned long s = usec_cycle - time_spent; - - td->rate_pending_usleep += s; - if (td->rate_pending_usleep >= 100000) { - usec_sleep(td, td->rate_pending_usleep); - td->rate_pending_usleep = 0; - } - } else { - long overtime = time_spent - usec_cycle; - - td->rate_pending_usleep -= overtime; - } -} diff --git a/fio-time.h b/fio-time.h deleted file mode 100644 index 4be3c4fe..00000000 --- a/fio-time.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef FIO_TIME_H -#define FIO_TIME_H - -extern unsigned long utime_since(struct timeval *, struct timeval *); -extern unsigned long mtime_since(struct timeval *, struct timeval *); -extern unsigned long mtime_since_now(struct timeval *); -extern unsigned long time_since_now(struct timeval *); -extern void usec_sleep(struct thread_data *, unsigned long); - -extern void rate_throttle(struct thread_data *, unsigned long, unsigned int); - -#endif diff --git a/fio.c b/fio.c index 1eeb1ccc..8ac44522 100644 --- a/fio.c +++ b/fio.c @@ -34,10 +34,6 @@ #include "fio.h" #include "os.h" -#include "fio-time.h" -#include "fio-stat.h" -#include "fio-log.h" - #define MASK (4095) #define ALIGN(buf) (char *) (((unsigned long) (buf) + MASK) & ~(MASK)) diff --git a/fio.h b/fio.h index 58fabbdb..045fd012 100644 --- a/fio.h +++ b/fio.h @@ -353,4 +353,36 @@ struct io_completion_data { #define min(a, b) ((a) < (b) ? (a) : (b)) #endif +/* + * Log exports + */ +extern int read_iolog_get(struct thread_data *, struct io_u *); +extern void write_iolog_put(struct thread_data *, struct io_u *); +extern int init_iolog(struct thread_data *td); +extern void log_io_piece(struct thread_data *, struct io_u *); +extern void prune_io_piece_log(struct thread_data *); +extern void write_iolog_close(struct thread_data *); + +/* + * Logging + */ +extern void add_clat_sample(struct thread_data *, int, unsigned long); +extern void add_slat_sample(struct thread_data *, int, unsigned long); +extern void add_bw_sample(struct thread_data *, int); +extern void show_run_stats(void); +extern void init_disk_util(struct thread_data *); +extern void update_rusage_stat(struct thread_data *); +extern void update_io_ticks(void); +extern void disk_util_timer_arm(void); + +/* + * Time functions + */ +extern unsigned long utime_since(struct timeval *, struct timeval *); +extern unsigned long mtime_since(struct timeval *, struct timeval *); +extern unsigned long mtime_since_now(struct timeval *); +extern unsigned long time_since_now(struct timeval *); +extern void usec_sleep(struct thread_data *, unsigned long); +extern void rate_throttle(struct thread_data *, unsigned long, unsigned int); + #endif diff --git a/init.c b/init.c new file mode 100644 index 00000000..d2122e3f --- /dev/null +++ b/init.c @@ -0,0 +1,1133 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "fio.h" + +#define DEF_BS (4096) +#define DEF_TIMEOUT (0) +#define DEF_RATE_CYCLE (1000) +#define DEF_ODIRECT (1) +#define DEF_IO_ENGINE (FIO_SYNCIO) +#define DEF_IO_ENGINE_NAME "sync" +#define DEF_SEQUENTIAL (1) +#define DEF_RAND_REPEAT (1) +#define DEF_OVERWRITE (1) +#define DEF_CREATE (1) +#define DEF_INVALIDATE (1) +#define DEF_SYNCIO (0) +#define DEF_RANDSEED (0xb1899bedUL) +#define DEF_BWAVGTIME (500) +#define DEF_CREATE_SER (1) +#define DEF_CREATE_FSYNC (1) +#define DEF_LOOPS (1) +#define DEF_VERIFY (0) +#define DEF_STONEWALL (0) +#define DEF_NUMJOBS (1) +#define DEF_USE_THREAD (0) +#define DEF_FILE_SIZE (1024 * 1024 * 1024UL) +#define DEF_ZONE_SIZE (0) +#define DEF_ZONE_SKIP (0) +#define DEF_RWMIX_CYCLE (500) +#define DEF_RWMIX_READ (50) +#define DEF_NICE (0) + +static char fio_version_string[] = "fio 1.4"; + +static int repeatable = DEF_RAND_REPEAT; +static char *ini_file; +static int max_jobs = MAX_JOBS; + +struct thread_data def_thread; +struct thread_data *threads = NULL; + +int rate_quit = 0; +int write_lat_log = 0; +int write_bw_log = 0; +int exitall_on_terminate = 0; +unsigned long long mlock_size = 0; + +static int setup_rate(struct thread_data *td) +{ + int nr_reads_per_sec; + + if (!td->rate) + return 0; + + if (td->rate < td->ratemin) { + fprintf(stderr, "min rate larger than nominal rate\n"); + return -1; + } + + nr_reads_per_sec = (td->rate * 1024) / td->min_bs; + td->rate_usec_cycle = 1000000 / nr_reads_per_sec; + td->rate_pending_usleep = 0; + return 0; +} + +static void setup_log(struct io_log **log) +{ + struct io_log *l = malloc(sizeof(*l)); + + l->nr_samples = 0; + l->max_samples = 1024; + l->log = malloc(l->max_samples * sizeof(struct io_sample)); + *log = l; +} + +void finish_log(struct thread_data *td, struct io_log *log, const char *name) +{ + char file_name[256]; + FILE *f; + unsigned int i; + + snprintf(file_name, 200, "client%d_%s.log", td->thread_number, name); + f = fopen(file_name, "w"); + if (!f) { + perror("fopen log"); + return; + } + + for (i = 0; i < log->nr_samples; i++) + fprintf(f, "%lu, %lu, %u\n", log->log[i].time, log->log[i].val, log->log[i].ddir); + + fclose(f); + free(log->log); + free(log); +} + +static struct thread_data *get_new_job(int global, struct thread_data *parent) +{ + struct thread_data *td; + + if (global) + return &def_thread; + if (thread_number >= max_jobs) + return NULL; + + td = &threads[thread_number++]; + if (parent) + *td = *parent; + else + memset(td, 0, sizeof(*td)); + + td->fd = -1; + td->thread_number = thread_number; + + td->ddir = parent->ddir; + td->ioprio = parent->ioprio; + td->sequential = parent->sequential; + td->bs = parent->bs; + td->min_bs = parent->min_bs; + td->max_bs = parent->max_bs; + td->odirect = parent->odirect; + td->thinktime = parent->thinktime; + td->fsync_blocks = parent->fsync_blocks; + td->start_delay = parent->start_delay; + td->timeout = parent->timeout; + td->io_engine = parent->io_engine; + td->create_file = parent->create_file; + td->overwrite = parent->overwrite; + td->invalidate_cache = parent->invalidate_cache; + td->file_size = parent->file_size; + td->file_offset = parent->file_offset; + td->zone_size = parent->zone_size; + td->zone_skip = parent->zone_skip; + td->rate = parent->rate; + td->ratemin = parent->ratemin; + td->ratecycle = parent->ratecycle; + td->iodepth = parent->iodepth; + td->sync_io = parent->sync_io; + td->mem_type = parent->mem_type; + td->bw_avg_time = parent->bw_avg_time; + td->create_serialize = parent->create_serialize; + td->create_fsync = parent->create_fsync; + td->loops = parent->loops; + td->verify = parent->verify; + td->stonewall = parent->stonewall; + td->numjobs = parent->numjobs; + td->use_thread = parent->use_thread; + td->do_disk_util = parent->do_disk_util; + memcpy(&td->cpumask, &parent->cpumask, sizeof(td->cpumask)); + strcpy(td->io_engine_name, parent->io_engine_name); + + return td; +} + +static void put_job(struct thread_data *td) +{ + memset(&threads[td->thread_number - 1], 0, sizeof(*td)); + thread_number--; +} + +static int add_job(struct thread_data *td, const char *jobname, int job_add_num) +{ + char *ddir_str[] = { "read", "write", "randread", "randwrite", + "rw", NULL, "randrw" }; + struct stat sb; + int numjobs, ddir; + +#ifndef FIO_HAVE_LIBAIO + if (td->io_engine == FIO_LIBAIO) { + fprintf(stderr, "Linux libaio not available\n"); + return 1; + } +#endif +#ifndef FIO_HAVE_POSIXAIO + if (td->io_engine == FIO_POSIXAIO) { + fprintf(stderr, "posix aio not available\n"); + return 1; + } +#endif + + /* + * the def_thread is just for options, it's not a real job + */ + if (td == &def_thread) + return 0; + + if (td->io_engine & FIO_SYNCIO) + td->iodepth = 1; + else { + if (!td->iodepth) + td->iodepth = 1; + } + + /* + * only really works for sequential io for now + */ + if (td->zone_size && !td->sequential) + td->zone_size = 0; + + td->filetype = FIO_TYPE_FILE; + if (!stat(jobname, &sb)) { + if (S_ISBLK(sb.st_mode)) + td->filetype = FIO_TYPE_BD; + else if (S_ISCHR(sb.st_mode)) + td->filetype = FIO_TYPE_CHAR; + } + + if (td->filetype == FIO_TYPE_FILE) { + if (td->directory && td->directory[0] != '\0') + sprintf(td->file_name, "%s/%s.%d", td->directory, jobname, td->jobnum); + else + sprintf(td->file_name, "%s.%d", jobname, td->jobnum); + } else + strncpy(td->file_name, jobname, sizeof(td->file_name) - 1); + + sem_init(&td->mutex, 0, 0); + + td->clat_stat[0].min_val = td->clat_stat[1].min_val = ULONG_MAX; + td->slat_stat[0].min_val = td->slat_stat[1].min_val = ULONG_MAX; + td->bw_stat[0].min_val = td->bw_stat[1].min_val = ULONG_MAX; + + if (td->min_bs == -1U) + td->min_bs = td->bs; + if (td->max_bs == -1U) + td->max_bs = td->bs; + if (td_read(td) && !td_rw(td)) + td->verify = 0; + + if (td->stonewall && td->thread_number > 1) + groupid++; + + td->groupid = groupid; + + if (setup_rate(td)) + goto err; + + if (write_lat_log) { + setup_log(&td->slat_log); + setup_log(&td->clat_log); + } + if (write_bw_log) + setup_log(&td->bw_log); + + ddir = td->ddir + (!td->sequential << 1) + (td->iomix << 2); + + if (!job_add_num) + printf("Client%d (g=%d): rw=%s, odir=%d, bs=%d-%d, rate=%d, ioengine=%s, iodepth=%d\n", td->thread_number, td->groupid, ddir_str[ddir], td->odirect, td->min_bs, td->max_bs, td->rate, td->io_engine_name, td->iodepth); + else if (job_add_num == 1) + printf("...\n"); + + /* + * recurse add identical jobs, clear numjobs and stonewall options + * as they don't apply to sub-jobs + */ + numjobs = td->numjobs; + while (--numjobs) { + struct thread_data *td_new = get_new_job(0, td); + + if (!td_new) + goto err; + + td_new->numjobs = 1; + td_new->stonewall = 0; + td_new->jobnum = numjobs; + job_add_num = numjobs - 1; + + if (add_job(td_new, jobname, job_add_num)) + goto err; + } + return 0; +err: + put_job(td); + return -1; +} + +int init_random_state(struct thread_data *td) +{ + unsigned long seeds[4]; + int fd, num_maps, blocks; + + fd = open("/dev/urandom", O_RDONLY); + if (fd == -1) { + td_verror(td, errno); + return 1; + } + + if (read(fd, seeds, sizeof(seeds)) < (int) sizeof(seeds)) { + td_verror(td, EIO); + close(fd); + return 1; + } + + close(fd); + + srand48_r(seeds[0], &td->bsrange_state); + srand48_r(seeds[1], &td->verify_state); + srand48_r(seeds[2], &td->rwmix_state); + + if (td->sequential) + return 0; + + if (repeatable) + seeds[3] = DEF_RANDSEED; + + blocks = (td->io_size + td->min_bs - 1) / td->min_bs; + num_maps = blocks / BLOCKS_PER_MAP; + td->file_map = malloc(num_maps * sizeof(long)); + td->num_maps = num_maps; + memset(td->file_map, 0, num_maps * sizeof(long)); + + srand48_r(seeds[3], &td->random_state); + return 0; +} + +static void fill_cpu_mask(os_cpu_mask_t cpumask, int cpu) +{ +#ifdef FIO_HAVE_CPU_AFFINITY + unsigned int i; + + CPU_ZERO(&cpumask); + + for (i = 0; i < sizeof(int) * 8; i++) { + if ((1 << i) & cpu) + CPU_SET(i, &cpumask); + } +#endif +} + +static unsigned long get_mult(char c) +{ + switch (c) { + case 'k': + case 'K': + return 1024; + case 'm': + case 'M': + return 1024 * 1024; + case 'g': + case 'G': + return 1024 * 1024 * 1024; + default: + return 1; + } +} + +/* + * convert string after '=' into decimal value, noting any size suffix + */ +static int str_cnv(char *p, unsigned long long *val) +{ + char *str; + int len; + + str = strchr(p, '='); + if (!str) + return 1; + + str++; + len = strlen(str); + + *val = strtoul(str, NULL, 10); + if (*val == ULONG_MAX && errno == ERANGE) + return 1; + + *val *= get_mult(str[len - 1]); + return 0; +} + +static int check_strcnv(char *p, char *name, unsigned long long *val) +{ + if (strncmp(p, name, strlen(name) - 1)) + return 1; + + return str_cnv(p, val); +} + +static void strip_blank_front(char **p) +{ + char *s = *p; + + while (isspace(*s)) + s++; +} + +static void strip_blank_end(char *p) +{ + char *s = p + strlen(p) - 1; + + while (isspace(*s) || iscntrl(*s)) + s--; + + *(s + 1) = '\0'; +} + +typedef int (str_cb_fn)(struct thread_data *, char *); + +static int check_str(char *p, char *name, str_cb_fn *cb, struct thread_data *td) +{ + char *s; + + if (strncmp(p, name, strlen(name))) + return 1; + + s = strstr(p, name); + if (!s) + return 1; + + s = strchr(s, '='); + if (!s) + return 1; + + s++; + strip_blank_front(&s); + return cb(td, s); +} + +static int check_strstore(char *p, char *name, char *dest) +{ + char *s; + + if (strncmp(p, name, strlen(name))) + return 1; + + s = strstr(p, name); + if (!s) + return 1; + + s = strchr(p, '='); + if (!s) + return 1; + + s++; + strip_blank_front(&s); + + strcpy(dest, s); + return 0; +} + +static int __check_range(char *str, unsigned long *val) +{ + char suffix; + + if (sscanf(str, "%lu%c", val, &suffix) == 2) { + *val *= get_mult(suffix); + return 0; + } + + if (sscanf(str, "%lu", val) == 1) + return 0; + + return 1; +} + +static int check_range(char *p, char *name, unsigned long *s, unsigned long *e) +{ + char option[128]; + char *str, *p1, *p2; + + if (strncmp(p, name, strlen(name))) + return 1; + + strcpy(option, p); + p = option; + + str = strstr(p, name); + if (!str) + return 1; + + p += strlen(name); + + str = strchr(p, '='); + if (!str) + return 1; + + /* + * 'p' now holds whatever is after the '=' sign + */ + p1 = str + 1; + + /* + * terminate p1 at the '-' sign + */ + p = strchr(p1, '-'); + if (!p) + return 1; + + p2 = p + 1; + *p = '\0'; + + if (!__check_range(p1, s) && !__check_range(p2, e)) + return 0; + + return 1; +} + +static int check_int(char *p, char *name, unsigned int *val) +{ + char *str; + + if (strncmp(p, name, strlen(name))) + return 1; + + str = strstr(p, name); + if (!str) + return 1; + + str = strchr(p, '='); + if (!str) + return 1; + + str++; + + if (sscanf(str, "%u", val) == 1) + return 0; + + return 1; +} + +static int check_strset(char *p, char *name) +{ + return strncmp(p, name, strlen(name)); +} + +static int is_empty_or_comment(char *line) +{ + unsigned int i; + + for (i = 0; i < strlen(line); i++) { + if (line[i] == ';') + return 1; + if (!isspace(line[i]) && !iscntrl(line[i])) + return 0; + } + + return 1; +} + +static int str_rw_cb(struct thread_data *td, char *mem) +{ + if (!strncmp(mem, "read", 4) || !strncmp(mem, "0", 1)) { + td->ddir = DDIR_READ; + td->sequential = 1; + return 0; + } else if (!strncmp(mem, "randread", 8)) { + td->ddir = DDIR_READ; + td->sequential = 0; + return 0; + } else if (!strncmp(mem, "write", 5) || !strncmp(mem, "1", 1)) { + td->ddir = DDIR_WRITE; + td->sequential = 1; + return 0; + } else if (!strncmp(mem, "randwrite", 9)) { + td->ddir = DDIR_WRITE; + td->sequential = 0; + return 0; + } else if (!strncmp(mem, "rw", 2)) { + td->ddir = 0; + td->iomix = 1; + td->sequential = 1; + return 0; + } else if (!strncmp(mem, "randrw", 6)) { + td->ddir = 0; + td->iomix = 1; + td->sequential = 0; + return 0; + } + + fprintf(stderr, "bad data direction: %s\n", mem); + return 1; +} + +static int str_verify_cb(struct thread_data *td, char *mem) +{ + if (!strncmp(mem, "0", 1)) { + td->verify = VERIFY_NONE; + return 0; + } else if (!strncmp(mem, "md5", 3) || !strncmp(mem, "1", 1)) { + td->verify = VERIFY_MD5; + return 0; + } else if (!strncmp(mem, "crc32", 5)) { + td->verify = VERIFY_CRC32; + return 0; + } + + fprintf(stderr, "bad verify type: %s\n", mem); + return 1; +} + +static int str_mem_cb(struct thread_data *td, char *mem) +{ + if (!strncmp(mem, "malloc", 6)) { + td->mem_type = MEM_MALLOC; + return 0; + } else if (!strncmp(mem, "shm", 3)) { + td->mem_type = MEM_SHM; + return 0; + } else if (!strncmp(mem, "mmap", 4)) { + td->mem_type = MEM_MMAP; + return 0; + } + + fprintf(stderr, "bad mem type: %s\n", mem); + return 1; +} + +static int str_ioengine_cb(struct thread_data *td, char *str) +{ + if (!strncmp(str, "linuxaio", 8) || !strncmp(str, "aio", 3) || + !strncmp(str, "libaio", 6)) { + strcpy(td->io_engine_name, "libaio"); + td->io_engine = FIO_LIBAIO; + return 0; + } else if (!strncmp(str, "posixaio", 8)) { + strcpy(td->io_engine_name, "posixaio"); + td->io_engine = FIO_POSIXAIO; + return 0; + } else if (!strncmp(str, "sync", 4)) { + strcpy(td->io_engine_name, "sync"); + td->io_engine = FIO_SYNCIO; + return 0; + } else if (!strncmp(str, "mmap", 4)) { + strcpy(td->io_engine_name, "mmap"); + td->io_engine = FIO_MMAPIO; + return 0; + } else if (!strncmp(str, "sgio", 4)) { + strcpy(td->io_engine_name, "sgio"); + td->io_engine = FIO_SGIO; + return 0; + } else if (!strncmp(str, "splice", 6)) { + strcpy(td->io_engine_name, "splice"); + td->io_engine = FIO_SPLICEIO; + return 0; + } + + fprintf(stderr, "bad ioengine type: %s\n", str); + return 1; +} + +static int str_iolog_cb(struct thread_data *td, char *file) +{ + td->iolog_file = strdup(file); + return 0; +} + +static int str_prerun_cb(struct thread_data *td, char *file) +{ + td->exec_prerun = strdup(file); + return 0; +} + +static int str_postrun_cb(struct thread_data *td, char *file) +{ + td->exec_postrun = strdup(file); + return 0; +} + +static int str_iosched_cb(struct thread_data *td, char *file) +{ + td->ioscheduler = strdup(file); + return 0; +} + +int parse_jobs_ini(char *file) +{ + unsigned int prioclass, prio, cpu, global, il; + unsigned long long ull; + unsigned long ul1, ul2; + struct thread_data *td; + char *string, *name, *tmpbuf; + fpos_t off; + FILE *f; + char *p; + + f = fopen(file, "r"); + if (!f) { + perror("fopen job file"); + return 1; + } + + string = malloc(4096); + name = malloc(256); + tmpbuf = malloc(4096); + + while ((p = fgets(string, 4096, f)) != NULL) { + if (is_empty_or_comment(p)) + continue; + if (sscanf(p, "[%s]", name) != 1) + continue; + + global = !strncmp(name, "global", 6); + + name[strlen(name) - 1] = '\0'; + + td = get_new_job(global, &def_thread); + if (!td) + return 1; + + fgetpos(f, &off); + while ((p = fgets(string, 4096, f)) != NULL) { + if (is_empty_or_comment(p)) + continue; + if (strstr(p, "[")) + break; + strip_blank_front(&p); + strip_blank_end(p); + + if (!check_int(p, "prio", &prio)) { +#ifndef FIO_HAVE_IOPRIO + fprintf(stderr, "io priorities not available\n"); + return 1; +#endif + td->ioprio |= prio; + fgetpos(f, &off); + continue; + } + if (!check_int(p, "prioclass", &prioclass)) { +#ifndef FIO_HAVE_IOPRIO + fprintf(stderr, "io priorities not available\n"); + return 1; +#endif + td->ioprio |= prioclass << IOPRIO_CLASS_SHIFT; + fgetpos(f, &off); + continue; + } + if (!check_int(p, "direct", &td->odirect)) { + fgetpos(f, &off); + continue; + } + if (!check_int(p, "rate", &td->rate)) { + fgetpos(f, &off); + continue; + } + if (!check_int(p, "ratemin", &td->ratemin)) { + fgetpos(f, &off); + continue; + } + if (!check_int(p, "ratecycle", &td->ratecycle)) { + fgetpos(f, &off); + continue; + } + if (!check_int(p, "thinktime", &td->thinktime)) { + fgetpos(f, &off); + continue; + } + if (!check_int(p, "cpumask", &cpu)) { +#ifndef FIO_HAVE_CPU_AFFINITY + fprintf(stderr, "cpu affinity not available\n"); + return 1; +#endif + fill_cpu_mask(td->cpumask, cpu); + fgetpos(f, &off); + continue; + } + if (!check_int(p, "fsync", &td->fsync_blocks)) { + fgetpos(f, &off); + td->end_fsync = 1; + continue; + } + if (!check_int(p, "startdelay", &td->start_delay)) { + fgetpos(f, &off); + continue; + } + if (!check_int(p, "timeout", &td->timeout)) { + fgetpos(f, &off); + continue; + } + if (!check_int(p, "invalidate",&td->invalidate_cache)) { + fgetpos(f, &off); + continue; + } + if (!check_int(p, "iodepth", &td->iodepth)) { + fgetpos(f, &off); + continue; + } + if (!check_int(p, "sync", &td->sync_io)) { + fgetpos(f, &off); + continue; + } + if (!check_int(p, "bwavgtime", &td->bw_avg_time)) { + fgetpos(f, &off); + continue; + } + if (!check_int(p, "create_serialize", &td->create_serialize)) { + fgetpos(f, &off); + continue; + } + if (!check_int(p, "create_fsync", &td->create_fsync)) { + fgetpos(f, &off); + continue; + } + if (!check_int(p, "end_fsync", &td->end_fsync)) { + fgetpos(f, &off); + continue; + } + if (!check_int(p, "loops", &td->loops)) { + fgetpos(f, &off); + continue; + } + if (!check_int(p, "numjobs", &td->numjobs)) { + fgetpos(f, &off); + continue; + } + if (!check_int(p, "overwrite", &td->overwrite)) { + fgetpos(f, &off); + continue; + } + if (!check_int(p, "rwmixcycle", &td->rwmixcycle)) { + fgetpos(f, &off); + continue; + } + if (!check_int(p, "rwmixread", &il)) { + if (il > 100) + il = 100; + td->rwmixread = il; + fgetpos(f, &off); + continue; + } + if (!check_int(p, "rwmixwrite", &il)) { + if (il > 100) + il = 100; + td->rwmixread = 100 - il; + fgetpos(f, &off); + continue; + } + if (!check_int(p, "nice", &td->nice)) { + fgetpos(f, &off); + continue; + } + if (!check_range(p, "bsrange", &ul1, &ul2)) { + if (ul1 > ul2) { + td->max_bs = ul1; + td->min_bs = ul2; + } else { + td->max_bs = ul2; + td->min_bs = ul1; + } + fgetpos(f, &off); + continue; + } + if (!check_strcnv(p, "bs", &ull)) { + td->bs = ull; + fgetpos(f, &off); + continue; + } + if (!check_strcnv(p, "size", &td->file_size)) { + fgetpos(f, &off); + continue; + } + if (!check_strcnv(p, "offset", &td->file_offset)) { + fgetpos(f, &off); + continue; + } + if (!check_strcnv(p, "zonesize", &td->zone_size)) { + fgetpos(f, &off); + continue; + } + if (!check_strcnv(p, "zoneskip", &td->zone_skip)) { + fgetpos(f, &off); + continue; + } + if (!check_strcnv(p, "lockmem", &mlock_size)) { + fgetpos(f, &off); + continue; + } + if (!check_strstore(p, "directory", tmpbuf)) { + td->directory = strdup(tmpbuf); + fgetpos(f, &off); + continue; + } + if (!check_str(p, "mem", str_mem_cb, td)) { + fgetpos(f, &off); + continue; + } + if (!check_str(p, "verify", str_verify_cb, td)) { + fgetpos(f, &off); + continue; + } + if (!check_str(p, "rw", str_rw_cb, td)) { + fgetpos(f, &off); + continue; + } + if (!check_str(p, "ioengine", str_ioengine_cb, td)) { + fgetpos(f, &off); + continue; + } + if (!check_strset(p, "create")) { + td->create_file = 1; + fgetpos(f, &off); + continue; + } + if (!check_strset(p, "exitall")) { + exitall_on_terminate = 1; + fgetpos(f, &off); + continue; + } + if (!check_strset(p, "stonewall")) { + td->stonewall = 1; + fgetpos(f, &off); + continue; + } + if (!check_strset(p, "thread")) { + td->use_thread = 1; + fgetpos(f, &off); + continue; + } + if (!check_str(p, "iolog", str_iolog_cb, td)) { + td->read_iolog = 1; + td->write_iolog = 0; + fgetpos(f, &off); + continue; + } + if (!td->read_iolog && + !check_str(p, "write_iolog", str_iolog_cb, td)) { + td->write_iolog = 1; + fgetpos(f, &off); + continue; + } + if (!check_str(p, "exec_prerun", str_prerun_cb, td)) { + fgetpos(f, &off); + continue; + } + if (!check_str(p, "exec_postrun", str_postrun_cb, td)) { + fgetpos(f, &off); + continue; + } + if (!check_str(p, "ioscheduler", str_iosched_cb, td)) { + fgetpos(f, &off); + continue; + } + + printf("Client%d: bad option %s\n",td->thread_number,p); + return 1; + } + fsetpos(f, &off); + + if (add_job(td, name, 0)) + return 1; + } + + free(string); + free(name); + free(tmpbuf); + fclose(f); + return 0; +} + +static int fill_def_thread(void) +{ + memset(&def_thread, 0, sizeof(def_thread)); + + if (fio_getaffinity(getpid(), &def_thread.cpumask) == -1) { + perror("sched_getaffinity"); + return 1; + } + + /* + * fill globals + */ + def_thread.ddir = DDIR_READ; + def_thread.iomix = 0; + def_thread.bs = DEF_BS; + def_thread.min_bs = -1; + def_thread.max_bs = -1; + def_thread.io_engine = DEF_IO_ENGINE; + strcpy(def_thread.io_engine_name, DEF_IO_ENGINE_NAME); + def_thread.odirect = DEF_ODIRECT; + def_thread.ratecycle = DEF_RATE_CYCLE; + def_thread.sequential = DEF_SEQUENTIAL; + def_thread.timeout = DEF_TIMEOUT; + def_thread.create_file = DEF_CREATE; + def_thread.overwrite = DEF_OVERWRITE; + def_thread.invalidate_cache = DEF_INVALIDATE; + def_thread.sync_io = DEF_SYNCIO; + def_thread.mem_type = MEM_MALLOC; + def_thread.bw_avg_time = DEF_BWAVGTIME; + def_thread.create_serialize = DEF_CREATE_SER; + def_thread.create_fsync = DEF_CREATE_FSYNC; + def_thread.loops = DEF_LOOPS; + def_thread.verify = DEF_VERIFY; + def_thread.stonewall = DEF_STONEWALL; + def_thread.numjobs = DEF_NUMJOBS; + def_thread.use_thread = DEF_USE_THREAD; + def_thread.rwmixcycle = DEF_RWMIX_CYCLE; + def_thread.rwmixread = DEF_RWMIX_READ; + def_thread.nice = DEF_NICE; +#ifdef FIO_HAVE_DISK_UTIL + def_thread.do_disk_util = 1; +#endif + + return 0; +} + +static void usage(char *name) +{ + printf("%s\n", fio_version_string); + printf("\t-s IO is sequential\n"); + printf("\t-b Block size in KiB for each IO\n"); + printf("\t-t Runtime in seconds\n"); + printf("\t-R Exit all threads on failure to meet rate goal\n"); + printf("\t-o Use O_DIRECT\n"); + printf("\t-l Generate per-job latency logs\n"); + printf("\t-w Generate per-job bandwidth logs\n"); + printf("\t-f Job file (Required)\n"); + printf("\t-v Print version info and exit\n"); +} + +static void parse_cmd_line(int argc, char *argv[]) +{ + int c; + + while ((c = getopt(argc, argv, "s:b:t:r:R:o:f:lwvh")) != EOF) { + switch (c) { + case 's': + def_thread.sequential = !!atoi(optarg); + break; + case 'b': + def_thread.bs = atoi(optarg); + def_thread.bs <<= 10; + if (!def_thread.bs) { + printf("bad block size\n"); + def_thread.bs = DEF_BS; + } + break; + case 't': + def_thread.timeout = atoi(optarg); + break; + case 'r': + repeatable = !!atoi(optarg); + break; + case 'R': + rate_quit = !!atoi(optarg); + break; + case 'o': + def_thread.odirect = !!atoi(optarg); + break; + case 'f': + ini_file = strdup(optarg); + break; + case 'l': + write_lat_log = 1; + break; + case 'w': + write_bw_log = 1; + break; + case 'h': + usage(argv[0]); + exit(0); + case 'v': + printf("%s\n", fio_version_string); + exit(0); + } + } + + if (!ini_file && argc > 1 && argv[argc - 1][0] != '-') + ini_file = strdup(argv[argc - 1]); +} + +static void free_shm(void) +{ + struct shmid_ds sbuf; + + if (threads) { + shmdt(threads); + threads = NULL; + shmctl(shm_id, IPC_RMID, &sbuf); + } +} + +static int setup_thread_area(void) +{ + /* + * 1024 is too much on some machines, scale max_jobs if + * we get a failure that looks like too large a shm segment + */ + do { + int s = max_jobs * sizeof(struct thread_data); + + shm_id = shmget(0, s, IPC_CREAT | 0600); + if (shm_id != -1) + break; + if (errno != EINVAL) { + perror("shmget"); + break; + } + + max_jobs >>= 1; + } while (max_jobs); + + if (shm_id == -1) + return 1; + + threads = shmat(shm_id, NULL, 0); + if (threads == (void *) -1) { + perror("shmat"); + return 1; + } + + atexit(free_shm); + return 0; +} + +int parse_options(int argc, char *argv[]) +{ + if (setup_thread_area()) + return 1; + if (fill_def_thread()) + return 1; + + parse_cmd_line(argc, argv); + + if (!ini_file) { + printf("Need job file\n"); + usage(argv[0]); + return 1; + } + + if (parse_jobs_ini(ini_file)) { + usage(argv[0]); + return 1; + } + + return 0; +} diff --git a/ioengines.c b/ioengines.c new file mode 100644 index 00000000..7b1c1bd3 --- /dev/null +++ b/ioengines.c @@ -0,0 +1,919 @@ +/* + * The io parts of the fio tool, includes workers for sync and mmap'ed + * io, as well as both posix and linux libaio support. + * + * sync io is implemented on top of aio. + * + * This is not really specific to fio, if the get_io_u/put_io_u and + * structures was pulled into this as well it would be a perfectly + * generic io engine that could be used for other projects. + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include "fio.h" +#include "os.h" + +#ifdef FIO_HAVE_LIBAIO + +#define ev_to_iou(ev) (struct io_u *) ((unsigned long) (ev)->obj) + +static int fio_io_sync(struct thread_data *td) +{ + return fsync(td->fd); +} + +static int fill_timespec(struct timespec *ts) +{ +#ifdef _POSIX_TIMERS + if (!clock_gettime(CLOCK_MONOTONIC, ts)) + return 0; + + perror("clock_gettime"); +#endif + return 1; +} + +static unsigned long long ts_utime_since_now(struct timespec *t) +{ + long long sec, nsec; + struct timespec now; + + if (fill_timespec(&now)) + return 0; + + sec = now.tv_sec - t->tv_sec; + nsec = now.tv_nsec - t->tv_nsec; + if (sec > 0 && nsec < 0) { + sec--; + nsec += 1000000000; + } + + sec *= 1000000; + nsec /= 1000; + return sec + nsec; +} + +struct libaio_data { + io_context_t aio_ctx; + struct io_event *aio_events; +}; + +static int fio_libaio_io_prep(struct thread_data *td, struct io_u *io_u) +{ + if (io_u->ddir == DDIR_READ) + io_prep_pread(&io_u->iocb, td->fd, io_u->buf, io_u->buflen, io_u->offset); + else + io_prep_pwrite(&io_u->iocb, td->fd, io_u->buf, io_u->buflen, io_u->offset); + + return 0; +} + +static struct io_u *fio_libaio_event(struct thread_data *td, int event) +{ + struct libaio_data *ld = td->io_data; + + return ev_to_iou(ld->aio_events + event); +} + +static int fio_libaio_getevents(struct thread_data *td, int min, int max, + struct timespec *t) +{ + struct libaio_data *ld = td->io_data; + int r; + + do { + r = io_getevents(ld->aio_ctx, min, max, ld->aio_events, t); + if (r == -EAGAIN) { + usleep(100); + continue; + } else if (r == -EINTR) + continue; + else + break; + } while (1); + + return r; +} + +static int fio_libaio_queue(struct thread_data *td, struct io_u *io_u) +{ + struct libaio_data *ld = td->io_data; + struct iocb *iocb = &io_u->iocb; + int ret; + + do { + ret = io_submit(ld->aio_ctx, 1, &iocb); + if (ret == 1) + return 0; + else if (ret == -EAGAIN) + usleep(100); + else if (ret == -EINTR) + continue; + else + break; + } while (1); + + return ret; + +} + +static int fio_libaio_cancel(struct thread_data *td, struct io_u *io_u) +{ + struct libaio_data *ld = td->io_data; + + return io_cancel(ld->aio_ctx, &io_u->iocb, ld->aio_events); +} + +static void fio_libaio_cleanup(struct thread_data *td) +{ + struct libaio_data *ld = td->io_data; + + if (ld) { + io_destroy(ld->aio_ctx); + if (ld->aio_events) + free(ld->aio_events); + + free(ld); + td->io_data = NULL; + } +} + +int fio_libaio_init(struct thread_data *td) +{ + struct libaio_data *ld = malloc(sizeof(*ld)); + + memset(ld, 0, sizeof(*ld)); + if (io_queue_init(td->iodepth, &ld->aio_ctx)) { + td_verror(td, errno); + return 1; + } + + td->io_prep = fio_libaio_io_prep; + td->io_queue = fio_libaio_queue; + td->io_getevents = fio_libaio_getevents; + td->io_event = fio_libaio_event; + td->io_cancel = fio_libaio_cancel; + td->io_cleanup = fio_libaio_cleanup; + td->io_sync = fio_io_sync; + + ld->aio_events = malloc(td->iodepth * sizeof(struct io_event)); + td->io_data = ld; + return 0; +} + +#else /* FIO_HAVE_LIBAIO */ + +int fio_libaio_init(struct thread_data *td) +{ + return EINVAL; +} + +#endif /* FIO_HAVE_LIBAIO */ + +#ifdef FIO_HAVE_POSIXAIO + +struct posixaio_data { + struct io_u **aio_events; +}; + +static int fio_posixaio_cancel(struct thread_data *td, struct io_u *io_u) +{ + int r = aio_cancel(td->fd, &io_u->aiocb); + + if (r == 1 || r == AIO_CANCELED) + return 0; + + return 1; +} + +static int fio_posixaio_prep(struct thread_data *td, struct io_u *io_u) +{ + struct aiocb *aiocb = &io_u->aiocb; + + aiocb->aio_fildes = td->fd; + aiocb->aio_buf = io_u->buf; + aiocb->aio_nbytes = io_u->buflen; + aiocb->aio_offset = io_u->offset; + + io_u->seen = 0; + return 0; +} + +static int fio_posixaio_getevents(struct thread_data *td, int min, int max, + struct timespec *t) +{ + struct posixaio_data *pd = td->io_data; + struct list_head *entry; + struct timespec start; + int r, have_timeout = 0; + + if (t && !fill_timespec(&start)) + have_timeout = 1; + + r = 0; +restart: + list_for_each(entry, &td->io_u_busylist) { + struct io_u *io_u = list_entry(entry, struct io_u, list); + int err; + + if (io_u->seen) + continue; + + err = aio_error(&io_u->aiocb); + switch (err) { + default: + io_u->error = err; + case ECANCELED: + case 0: + pd->aio_events[r++] = io_u; + io_u->seen = 1; + break; + case EINPROGRESS: + break; + } + + if (r >= max) + break; + } + + if (r >= min) + return r; + + if (have_timeout) { + unsigned long long usec; + + usec = (t->tv_sec * 1000000) + (t->tv_nsec / 1000); + if (ts_utime_since_now(&start) > usec) + return r; + } + + /* + * hrmpf, we need to wait for more. we should use aio_suspend, for + * now just sleep a little and recheck status of busy-and-not-seen + */ + usleep(1000); + goto restart; +} + +static struct io_u *fio_posixaio_event(struct thread_data *td, int event) +{ + struct posixaio_data *pd = td->io_data; + + return pd->aio_events[event]; +} + +static int fio_posixaio_queue(struct thread_data *td, struct io_u *io_u) +{ + struct aiocb *aiocb = &io_u->aiocb; + int ret; + + if (io_u->ddir == DDIR_READ) + ret = aio_read(aiocb); + else + ret = aio_write(aiocb); + + if (ret) + io_u->error = errno; + + return io_u->error; +} + +static void fio_posixaio_cleanup(struct thread_data *td) +{ + struct posixaio_data *pd = td->io_data; + + if (pd) { + free(pd->aio_events); + free(pd); + td->io_data = NULL; + } +} + +int fio_posixaio_init(struct thread_data *td) +{ + struct posixaio_data *pd = malloc(sizeof(*pd)); + + pd->aio_events = malloc(td->iodepth * sizeof(struct io_u *)); + + td->io_prep = fio_posixaio_prep; + td->io_queue = fio_posixaio_queue; + td->io_getevents = fio_posixaio_getevents; + td->io_event = fio_posixaio_event; + td->io_cancel = fio_posixaio_cancel; + td->io_cleanup = fio_posixaio_cleanup; + td->io_sync = fio_io_sync; + + td->io_data = pd; + return 0; +} + +#else /* FIO_HAVE_POSIXAIO */ + +int fio_posixaio_init(struct thread_data *td) +{ + return EINVAL; +} + +#endif /* FIO_HAVE_POSIXAIO */ + +struct syncio_data { + struct io_u *last_io_u; +}; + +static int fio_syncio_getevents(struct thread_data *td, int min, int max, + struct timespec *t) +{ + assert(max <= 1); + + /* + * we can only have one finished io_u for sync io, since the depth + * is always 1 + */ + if (list_empty(&td->io_u_busylist)) + return 0; + + return 1; +} + +static struct io_u *fio_syncio_event(struct thread_data *td, int event) +{ + struct syncio_data *sd = td->io_data; + + assert(event == 0); + + return sd->last_io_u; +} + +static int fio_syncio_prep(struct thread_data *td, struct io_u *io_u) +{ + if (lseek(td->fd, io_u->offset, SEEK_SET) == -1) { + td_verror(td, errno); + return 1; + } + + return 0; +} + +static int fio_syncio_queue(struct thread_data *td, struct io_u *io_u) +{ + struct syncio_data *sd = td->io_data; + int ret; + + if (io_u->ddir == DDIR_READ) + ret = read(td->fd, io_u->buf, io_u->buflen); + else + ret = write(td->fd, io_u->buf, io_u->buflen); + + if ((unsigned int) ret != io_u->buflen) { + if (ret > 0) { + io_u->resid = io_u->buflen - ret; + io_u->error = ENODATA; + } else + io_u->error = errno; + } + + if (!io_u->error) + sd->last_io_u = io_u; + + return io_u->error; +} + +static void fio_syncio_cleanup(struct thread_data *td) +{ + if (td->io_data) { + free(td->io_data); + td->io_data = NULL; + } +} + +int fio_syncio_init(struct thread_data *td) +{ + struct syncio_data *sd = malloc(sizeof(*sd)); + + td->io_prep = fio_syncio_prep; + td->io_queue = fio_syncio_queue; + td->io_getevents = fio_syncio_getevents; + td->io_event = fio_syncio_event; + td->io_cancel = NULL; + td->io_cleanup = fio_syncio_cleanup; + td->io_sync = fio_io_sync; + + sd->last_io_u = NULL; + td->io_data = sd; + return 0; +} + +static int fio_mmapio_queue(struct thread_data *td, struct io_u *io_u) +{ + unsigned long long real_off = io_u->offset - td->file_offset; + struct syncio_data *sd = td->io_data; + + if (io_u->ddir == DDIR_READ) + memcpy(io_u->buf, td->mmap + real_off, io_u->buflen); + else + memcpy(td->mmap + real_off, io_u->buf, io_u->buflen); + + /* + * not really direct, but should drop the pages from the cache + */ + if (td->odirect) { + if (msync(td->mmap + real_off, io_u->buflen, MS_SYNC) < 0) + io_u->error = errno; + if (madvise(td->mmap + real_off, io_u->buflen, MADV_DONTNEED) < 0) + io_u->error = errno; + } + + if (!io_u->error) + sd->last_io_u = io_u; + + return io_u->error; +} + +static int fio_mmapio_sync(struct thread_data *td) +{ + return msync(td->mmap, td->file_size, MS_SYNC); +} + +int fio_mmapio_init(struct thread_data *td) +{ + struct syncio_data *sd = malloc(sizeof(*sd)); + + td->io_prep = NULL; + td->io_queue = fio_mmapio_queue; + td->io_getevents = fio_syncio_getevents; + td->io_event = fio_syncio_event; + td->io_cancel = NULL; + td->io_cleanup = fio_syncio_cleanup; + td->io_sync = fio_mmapio_sync; + + sd->last_io_u = NULL; + td->io_data = sd; + return 0; +} + +#ifdef FIO_HAVE_SGIO + +struct sgio_cmd { + unsigned char cdb[10]; + int nr; +}; + +struct sgio_data { + struct sgio_cmd *cmds; + struct io_u **events; + unsigned int bs; +}; + +static void sgio_hdr_init(struct sgio_data *sd, struct sg_io_hdr *hdr, + struct io_u *io_u, int fs) +{ + struct sgio_cmd *sc = &sd->cmds[io_u->index]; + + memset(hdr, 0, sizeof(*hdr)); + memset(sc->cdb, 0, sizeof(sc->cdb)); + + hdr->interface_id = 'S'; + hdr->cmdp = sc->cdb; + hdr->cmd_len = sizeof(sc->cdb); + hdr->pack_id = io_u->index; + hdr->usr_ptr = io_u; + + if (fs) { + hdr->dxferp = io_u->buf; + hdr->dxfer_len = io_u->buflen; + } +} + +static int fio_sgio_getevents(struct thread_data *td, int min, int max, + struct timespec *t) +{ + struct sgio_data *sd = td->io_data; + struct pollfd pfd = { .fd = td->fd, .events = POLLIN }; + void *buf = malloc(max * sizeof(struct sg_io_hdr)); + int left = max, ret, events, i, r = 0, fl = 0; + + /* + * don't block for !events + */ + if (!min) { + fl = fcntl(td->fd, F_GETFL); + fcntl(td->fd, F_SETFL, fl | O_NONBLOCK); + } + + while (left) { + do { + if (!min) + break; + poll(&pfd, 1, -1); + if (pfd.revents & POLLIN) + break; + } while (1); + + ret = read(td->fd, buf, left * sizeof(struct sg_io_hdr)); + if (ret < 0) { + if (errno == EAGAIN) + break; + td_verror(td, errno); + r = -1; + break; + } else if (!ret) + break; + + events = ret / sizeof(struct sg_io_hdr); + left -= events; + r += events; + + for (i = 0; i < events; i++) { + struct sg_io_hdr *hdr = (struct sg_io_hdr *) buf + i; + + sd->events[i] = hdr->usr_ptr; + } + } + + if (!min) + fcntl(td->fd, F_SETFL, fl); + + free(buf); + return r; +} + +static int fio_sgio_ioctl_doio(struct thread_data *td, struct io_u *io_u) +{ + struct sgio_data *sd = td->io_data; + struct sg_io_hdr *hdr = &io_u->hdr; + + sd->events[0] = io_u; + + return ioctl(td->fd, SG_IO, hdr); +} + +static int fio_sgio_rw_doio(struct thread_data *td, struct io_u *io_u, int sync) +{ + struct sg_io_hdr *hdr = &io_u->hdr; + int ret; + + ret = write(td->fd, hdr, sizeof(*hdr)); + if (ret < 0) + return errno; + + if (sync) { + ret = read(td->fd, hdr, sizeof(*hdr)); + if (ret < 0) + return errno; + } + + return 0; +} + +static int fio_sgio_doio(struct thread_data *td, struct io_u *io_u, int sync) +{ + if (td->filetype == FIO_TYPE_BD) + return fio_sgio_ioctl_doio(td, io_u); + + return fio_sgio_rw_doio(td, io_u, sync); +} + +static int fio_sgio_sync(struct thread_data *td) +{ + struct sgio_data *sd = td->io_data; + struct sg_io_hdr *hdr; + struct io_u *io_u; + int ret; + + io_u = __get_io_u(td); + if (!io_u) + return ENOMEM; + + hdr = &io_u->hdr; + sgio_hdr_init(sd, hdr, io_u, 0); + hdr->dxfer_direction = SG_DXFER_NONE; + + hdr->cmdp[0] = 0x35; + + ret = fio_sgio_doio(td, io_u, 1); + put_io_u(td, io_u); + return ret; +} + +static int fio_sgio_prep(struct thread_data *td, struct io_u *io_u) +{ + struct sg_io_hdr *hdr = &io_u->hdr; + struct sgio_data *sd = td->io_data; + int nr_blocks, lba; + + if (io_u->buflen & (sd->bs - 1)) { + fprintf(stderr, "read/write not sector aligned\n"); + return EINVAL; + } + + sgio_hdr_init(sd, hdr, io_u, 1); + + if (io_u->ddir == DDIR_READ) { + hdr->dxfer_direction = SG_DXFER_FROM_DEV; + hdr->cmdp[0] = 0x28; + } else { + hdr->dxfer_direction = SG_DXFER_TO_DEV; + hdr->cmdp[0] = 0x2a; + } + + nr_blocks = io_u->buflen / sd->bs; + lba = io_u->offset / sd->bs; + hdr->cmdp[2] = (lba >> 24) & 0xff; + hdr->cmdp[3] = (lba >> 16) & 0xff; + hdr->cmdp[4] = (lba >> 8) & 0xff; + hdr->cmdp[5] = lba & 0xff; + hdr->cmdp[7] = (nr_blocks >> 8) & 0xff; + hdr->cmdp[8] = nr_blocks & 0xff; + return 0; +} + +static int fio_sgio_queue(struct thread_data *td, struct io_u *io_u) +{ + struct sg_io_hdr *hdr = &io_u->hdr; + int ret; + + ret = fio_sgio_doio(td, io_u, 0); + + if (ret < 0) + io_u->error = errno; + else if (hdr->status) { + io_u->resid = hdr->resid; + io_u->error = EIO; + } + + return io_u->error; +} + +static struct io_u *fio_sgio_event(struct thread_data *td, int event) +{ + struct sgio_data *sd = td->io_data; + + return sd->events[event]; +} + +static int fio_sgio_get_bs(struct thread_data *td, unsigned int *bs) +{ + struct sgio_data *sd = td->io_data; + struct io_u *io_u; + struct sg_io_hdr *hdr; + unsigned char buf[8]; + int ret; + + io_u = __get_io_u(td); + assert(io_u); + + hdr = &io_u->hdr; + sgio_hdr_init(sd, hdr, io_u, 0); + memset(buf, 0, sizeof(buf)); + + hdr->cmdp[0] = 0x25; + hdr->dxfer_direction = SG_DXFER_FROM_DEV; + hdr->dxferp = buf; + hdr->dxfer_len = sizeof(buf); + + ret = fio_sgio_doio(td, io_u, 1); + if (ret) { + put_io_u(td, io_u); + return ret; + } + + *bs = (buf[4] << 24) | (buf[5] << 16) | (buf[6] << 8) | buf[7]; + put_io_u(td, io_u); + return 0; +} + +int fio_sgio_init(struct thread_data *td) +{ + struct sgio_data *sd; + unsigned int bs; + int ret; + + sd = malloc(sizeof(*sd)); + sd->cmds = malloc(td->iodepth * sizeof(struct sgio_cmd)); + sd->events = malloc(td->iodepth * sizeof(struct io_u *)); + td->io_data = sd; + + if (td->filetype == FIO_TYPE_BD) { + if (ioctl(td->fd, BLKSSZGET, &bs) < 0) { + td_verror(td, errno); + return 1; + } + } else if (td->filetype == FIO_TYPE_CHAR) { + int version; + + if (ioctl(td->fd, SG_GET_VERSION_NUM, &version) < 0) { + td_verror(td, errno); + return 1; + } + + ret = fio_sgio_get_bs(td, &bs); + if (ret) + return ret; + } else { + fprintf(stderr, "ioengine sgio only works on block devices\n"); + return 1; + } + + sd->bs = bs; + + td->io_prep = fio_sgio_prep; + td->io_queue = fio_sgio_queue; + + if (td->filetype == FIO_TYPE_BD) + td->io_getevents = fio_syncio_getevents; + else + td->io_getevents = fio_sgio_getevents; + + td->io_event = fio_sgio_event; + td->io_cancel = NULL; + td->io_cleanup = fio_syncio_cleanup; + td->io_sync = fio_sgio_sync; + + /* + * we want to do it, regardless of whether odirect is set or not + */ + td->override_sync = 1; + return 0; +} + +#else /* FIO_HAVE_SGIO */ + +int fio_sgio_init(struct thread_data *td) +{ + return EINVAL; +} + +#endif /* FIO_HAVE_SGIO */ + +#ifdef FIO_HAVE_SPLICE +struct spliceio_data { + struct io_u *last_io_u; + int pipe[2]; +}; + +static struct io_u *fio_spliceio_event(struct thread_data *td, int event) +{ + struct spliceio_data *sd = td->io_data; + + assert(event == 0); + + return sd->last_io_u; +} + +/* + * For splice reading, we unfortunately cannot (yet) vmsplice the other way. + * So just splice the data from the file into the pipe, and use regular + * read to fill the buffer. Doesn't make a lot of sense, but... + */ +static int fio_splice_read(struct thread_data *td, struct io_u *io_u) +{ + struct spliceio_data *sd = td->io_data; + int ret, ret2, buflen; + off_t offset; + void *p; + + offset = io_u->offset; + buflen = io_u->buflen; + p = io_u->buf; + while (buflen) { + int this_len = buflen; + + if (this_len > SPLICE_DEF_SIZE) + this_len = SPLICE_DEF_SIZE; + + ret = splice(td->fd, &offset, sd->pipe[1], NULL, this_len, SPLICE_F_MORE); + if (ret < 0) { + if (errno == ENODATA || errno == EAGAIN) + continue; + + return errno; + } + + buflen -= ret; + + while (ret) { + ret2 = read(sd->pipe[0], p, ret); + if (ret2 < 0) + return errno; + + ret -= ret2; + p += ret2; + } + } + + return io_u->buflen; +} + +/* + * For splice writing, we can vmsplice our data buffer directly into a + * pipe and then splice that to a file. + */ +static int fio_splice_write(struct thread_data *td, struct io_u *io_u) +{ + struct spliceio_data *sd = td->io_data; + struct iovec iov[1] = { + { + .iov_base = io_u->buf, + .iov_len = io_u->buflen, + } + }; + struct pollfd pfd = { .fd = sd->pipe[1], .events = POLLOUT, }; + off_t off = io_u->offset; + int ret, ret2; + + while (iov[0].iov_len) { + if (poll(&pfd, 1, -1) < 0) + return errno; + + ret = vmsplice(sd->pipe[1], iov, 1, SPLICE_F_NONBLOCK); + if (ret < 0) + return errno; + + iov[0].iov_len -= ret; + iov[0].iov_base += ret; + + while (ret) { + ret2 = splice(sd->pipe[0], NULL, td->fd, &off, ret, 0); + if (ret2 < 0) + return errno; + + ret -= ret2; + } + } + + return io_u->buflen; +} + +static int fio_spliceio_queue(struct thread_data *td, struct io_u *io_u) +{ + struct spliceio_data *sd = td->io_data; + int ret; + + if (io_u->ddir == DDIR_READ) + ret = fio_splice_read(td, io_u); + else + ret = fio_splice_write(td, io_u); + + if ((unsigned int) ret != io_u->buflen) { + if (ret > 0) { + io_u->resid = io_u->buflen - ret; + io_u->error = ENODATA; + } else + io_u->error = errno; + } + + if (!io_u->error) + sd->last_io_u = io_u; + + return io_u->error; +} + +static void fio_spliceio_cleanup(struct thread_data *td) +{ + struct spliceio_data *sd = td->io_data; + + if (sd) { + close(sd->pipe[0]); + close(sd->pipe[1]); + free(sd); + td->io_data = NULL; + } +} + +int fio_spliceio_init(struct thread_data *td) +{ + struct spliceio_data *sd = malloc(sizeof(*sd)); + + td->io_queue = fio_spliceio_queue; + td->io_getevents = fio_syncio_getevents; + td->io_event = fio_spliceio_event; + td->io_cancel = NULL; + td->io_cleanup = fio_spliceio_cleanup; + td->io_sync = fio_io_sync; + + sd->last_io_u = NULL; + if (pipe(sd->pipe) < 0) { + td_verror(td, errno); + free(sd); + return 1; + } + + td->io_data = sd; + return 0; +} + +#else /* FIO_HAVE_SPLICE */ + +int fio_spliceio_init(struct thread_data *td) +{ + return EINVAL; +} + +#endif /* FIO_HAVE_SPLICE */ diff --git a/log.c b/log.c new file mode 100644 index 00000000..42aedf27 --- /dev/null +++ b/log.c @@ -0,0 +1,162 @@ +#include +#include +#include "list.h" +#include "fio.h" + +void write_iolog_put(struct thread_data *td, struct io_u *io_u) +{ + fprintf(td->iolog_f, "%d,%llu,%u\n", io_u->ddir, io_u->offset, io_u->buflen); +} + +int read_iolog_get(struct thread_data *td, struct io_u *io_u) +{ + struct io_piece *ipo; + + if (!list_empty(&td->io_log_list)) { + ipo = list_entry(td->io_log_list.next, struct io_piece, list); + list_del(&ipo->list); + io_u->offset = ipo->offset; + io_u->buflen = ipo->len; + io_u->ddir = ipo->ddir; + free(ipo); + return 0; + } + + return 1; +} + +void prune_io_piece_log(struct thread_data *td) +{ + struct io_piece *ipo; + + while (!list_empty(&td->io_hist_list)) { + ipo = list_entry(td->io_hist_list.next, struct io_piece, list); + + list_del(&ipo->list); + free(ipo); + } +} + +/* + * log a succesful write, so we can unwind the log for verify + */ +void log_io_piece(struct thread_data *td, struct io_u *io_u) +{ + struct io_piece *ipo = malloc(sizeof(struct io_piece)); + struct list_head *entry; + + INIT_LIST_HEAD(&ipo->list); + ipo->offset = io_u->offset; + ipo->len = io_u->buflen; + + /* + * for random io where the writes extend the file, it will typically + * be laid out with the block scattered as written. it's faster to + * read them in in that order again, so don't sort + */ + if (td->sequential || !td->overwrite) { + list_add_tail(&ipo->list, &td->io_hist_list); + return; + } + + /* + * for random io, sort the list so verify will run faster + */ + entry = &td->io_hist_list; + while ((entry = entry->prev) != &td->io_hist_list) { + struct io_piece *__ipo = list_entry(entry, struct io_piece, list); + + if (__ipo->offset < ipo->offset) + break; + } + + list_add(&ipo->list, entry); +} + +void write_iolog_close(struct thread_data *td) +{ + fflush(td->iolog_f); + fclose(td->iolog_f); + free(td->iolog_buf); +} + +int init_iolog(struct thread_data *td) +{ + unsigned long long offset; + unsigned int bytes; + char *str, *p; + FILE *f; + int rw, i, reads, writes; + + if (!td->read_iolog && !td->write_iolog) + return 0; + + if (td->read_iolog) + f = fopen(td->iolog_file, "r"); + else + f = fopen(td->iolog_file, "w"); + + if (!f) { + perror("fopen iolog"); + printf("file %s, %d/%d\n", td->iolog_file, td->read_iolog, td->write_iolog); + return 1; + } + + /* + * That's it for writing, setup a log buffer and we're done. + */ + if (td->write_iolog) { + td->iolog_f = f; + td->iolog_buf = malloc(8192); + setvbuf(f, td->iolog_buf, _IOFBF, 8192); + return 0; + } + + /* + * Read in the read iolog and store it, reuse the infrastructure + * for doing verifications. + */ + str = malloc(4096); + reads = writes = i = 0; + while ((p = fgets(str, 4096, f)) != NULL) { + struct io_piece *ipo; + + if (sscanf(p, "%d,%llu,%u", &rw, &offset, &bytes) != 3) { + fprintf(stderr, "bad iolog: %s\n", p); + continue; + } + if (rw == DDIR_READ) + reads++; + else if (rw == DDIR_WRITE) + writes++; + else { + fprintf(stderr, "bad ddir: %d\n", rw); + continue; + } + + ipo = malloc(sizeof(*ipo)); + INIT_LIST_HEAD(&ipo->list); + ipo->offset = offset; + ipo->len = bytes; + if (bytes > td->max_bs) + td->max_bs = bytes; + ipo->ddir = rw; + list_add_tail(&ipo->list, &td->io_log_list); + i++; + } + + free(str); + fclose(f); + + if (!i) + return 1; + + if (reads && !writes) + td->ddir = DDIR_READ; + else if (!reads && writes) + td->ddir = DDIR_READ; + else + td->iomix = 1; + + return 0; +} diff --git a/stat.c b/stat.c new file mode 100644 index 00000000..54527169 --- /dev/null +++ b/stat.c @@ -0,0 +1,518 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "fio.h" + +static struct itimerval itimer; +static LIST_HEAD(disk_list); + +static int get_io_ticks(struct disk_util *du, struct disk_util_stat *dus) +{ + unsigned in_flight; + char line[256]; + FILE *f; + char *p; + + f = fopen(du->path, "r"); + if (!f) + return 1; + + p = fgets(line, sizeof(line), f); + if (!p) { + fclose(f); + return 1; + } + + if (sscanf(p, "%u %u %llu %u %u %u %llu %u %u %u %u\n", &dus->ios[0], &dus->merges[0], &dus->sectors[0], &dus->ticks[0], &dus->ios[1], &dus->merges[1], &dus->sectors[1], &dus->ticks[1], &in_flight, &dus->io_ticks, &dus->time_in_queue) != 11) { + fclose(f); + return 1; + } + + fclose(f); + return 0; +} + +static void update_io_tick_disk(struct disk_util *du) +{ + struct disk_util_stat __dus, *dus, *ldus; + struct timeval t; + + if (get_io_ticks(du, &__dus)) + return; + + dus = &du->dus; + ldus = &du->last_dus; + + dus->sectors[0] += (__dus.sectors[0] - ldus->sectors[0]); + dus->sectors[1] += (__dus.sectors[1] - ldus->sectors[1]); + dus->ios[0] += (__dus.ios[0] - ldus->ios[0]); + dus->ios[1] += (__dus.ios[1] - ldus->ios[1]); + dus->merges[0] += (__dus.merges[0] - ldus->merges[0]); + dus->merges[1] += (__dus.merges[1] - ldus->merges[1]); + dus->ticks[0] += (__dus.ticks[0] - ldus->ticks[0]); + dus->ticks[1] += (__dus.ticks[1] - ldus->ticks[1]); + dus->io_ticks += (__dus.io_ticks - ldus->io_ticks); + dus->time_in_queue += (__dus.time_in_queue - ldus->time_in_queue); + + gettimeofday(&t, NULL); + du->msec += mtime_since(&du->time, &t); + memcpy(&du->time, &t, sizeof(t)); + memcpy(ldus, &__dus, sizeof(__dus)); +} + +void update_io_ticks(void) +{ + struct list_head *entry; + struct disk_util *du; + + list_for_each(entry, &disk_list) { + du = list_entry(entry, struct disk_util, list); + update_io_tick_disk(du); + } +} + +static int disk_util_exists(dev_t dev) +{ + struct list_head *entry; + struct disk_util *du; + + list_for_each(entry, &disk_list) { + du = list_entry(entry, struct disk_util, list); + + if (du->dev == dev) + return 1; + } + + return 0; +} + +static void disk_util_add(dev_t dev, char *path) +{ + struct disk_util *du = malloc(sizeof(*du)); + + memset(du, 0, sizeof(*du)); + INIT_LIST_HEAD(&du->list); + sprintf(du->path, "%s/stat", path); + du->name = strdup(basename(path)); + du->dev = dev; + + gettimeofday(&du->time, NULL); + get_io_ticks(du, &du->last_dus); + + list_add_tail(&du->list, &disk_list); +} + +static int check_dev_match(dev_t dev, char *path) +{ + unsigned int major, minor; + char line[256], *p; + FILE *f; + + f = fopen(path, "r"); + if (!f) { + perror("open path"); + return 1; + } + + p = fgets(line, sizeof(line), f); + if (!p) { + fclose(f); + return 1; + } + + if (sscanf(p, "%u:%u", &major, &minor) != 2) { + fclose(f); + return 1; + } + + if (((major << 8) | minor) == dev) { + fclose(f); + return 0; + } + + fclose(f); + return 1; +} + +static int find_block_dir(dev_t dev, char *path) +{ + struct dirent *dir; + struct stat st; + int found = 0; + DIR *D; + + D = opendir(path); + if (!D) + return 0; + + while ((dir = readdir(D)) != NULL) { + char full_path[256]; + + if (!strcmp(dir->d_name, ".") || !strcmp(dir->d_name, "..")) + continue; + if (!strcmp(dir->d_name, "device")) + continue; + + sprintf(full_path, "%s/%s", path, dir->d_name); + + if (!strcmp(dir->d_name, "dev")) { + if (!check_dev_match(dev, full_path)) { + found = 1; + break; + } + } + + if (stat(full_path, &st) == -1) { + perror("stat"); + break; + } + + if (!S_ISDIR(st.st_mode) || S_ISLNK(st.st_mode)) + continue; + + found = find_block_dir(dev, full_path); + if (found) { + strcpy(path, full_path); + break; + } + } + + closedir(D); + return found; +} + +void init_disk_util(struct thread_data *td) +{ + struct stat st; + char foo[256], tmp[256]; + dev_t dev; + char *p; + + if (!td->do_disk_util) + return; + + if (!stat(td->file_name, &st)) { + if (S_ISBLK(st.st_mode)) + dev = st.st_rdev; + else + dev = st.st_dev; + } else { + /* + * must be a file, open "." in that path + */ + strcpy(foo, td->file_name); + p = dirname(foo); + if (stat(p, &st)) { + perror("disk util stat"); + return; + } + + dev = st.st_dev; + } + + if (disk_util_exists(dev)) + return; + + sprintf(foo, "/sys/block"); + if (!find_block_dir(dev, foo)) + return; + + /* + * If there's a ../queue/ directory there, we are inside a partition. + * Check if that is the case and jump back. For loop/md/dm etc we + * are already in the right spot. + */ + sprintf(tmp, "%s/../queue", foo); + if (!stat(tmp, &st)) { + p = dirname(foo); + sprintf(tmp, "%s/queue", p); + if (stat(tmp, &st)) { + fprintf(stderr, "unknown sysfs layout\n"); + return; + } + sprintf(foo, "%s", p); + } + + td->sysfs_root = strdup(foo); + disk_util_add(dev, foo); +} + +void disk_util_timer_arm(void) +{ + itimer.it_value.tv_sec = 0; + itimer.it_value.tv_usec = DISK_UTIL_MSEC * 1000; + setitimer(ITIMER_REAL, &itimer, NULL); +} + +void update_rusage_stat(struct thread_data *td) +{ + if (!(td->runtime[0] + td->runtime[1])) + return; + + getrusage(RUSAGE_SELF, &td->ru_end); + + td->usr_time += mtime_since(&td->ru_start.ru_utime, &td->ru_end.ru_utime); + td->sys_time += mtime_since(&td->ru_start.ru_stime, &td->ru_end.ru_stime); + td->ctx += td->ru_end.ru_nvcsw + td->ru_end.ru_nivcsw - (td->ru_start.ru_nvcsw + td->ru_start.ru_nivcsw); + + + memcpy(&td->ru_start, &td->ru_end, sizeof(td->ru_end)); +} + +static int calc_lat(struct io_stat *is, unsigned long *min, unsigned long *max, + double *mean, double *dev) +{ + double n; + + if (is->samples == 0) + return 0; + + *min = is->min_val; + *max = is->max_val; + + n = (double) is->samples; + *mean = (double) is->val / n; + *dev = sqrt(((double) is->val_sq - (*mean * *mean) / n) / (n - 1)); + if (!(*min + *max) && !(*mean + *dev)) + return 0; + + return 1; +} + +static void show_group_stats(struct group_run_stats *rs, int id) +{ + printf("\nRun status group %d (all jobs):\n", id); + + if (rs->max_run[DDIR_READ]) + printf(" READ: io=%lluMiB, aggrb=%llu, minb=%llu, maxb=%llu, mint=%llumsec, maxt=%llumsec\n", rs->io_kb[0] >> 10, rs->agg[0], rs->min_bw[0], rs->max_bw[0], rs->min_run[0], rs->max_run[0]); + if (rs->max_run[DDIR_WRITE]) + printf(" WRITE: io=%lluMiB, aggrb=%llu, minb=%llu, maxb=%llu, mint=%llumsec, maxt=%llumsec\n", rs->io_kb[1] >> 10, rs->agg[1], rs->min_bw[1], rs->max_bw[1], rs->min_run[1], rs->max_run[1]); +} + +static void show_disk_util(void) +{ + struct disk_util_stat *dus; + struct list_head *entry; + struct disk_util *du; + double util; + + printf("\nDisk stats (read/write):\n"); + + list_for_each(entry, &disk_list) { + du = list_entry(entry, struct disk_util, list); + dus = &du->dus; + + util = (double) 100 * du->dus.io_ticks / (double) du->msec; + if (util > 100.0) + util = 100.0; + + printf(" %s: ios=%u/%u, merge=%u/%u, ticks=%u/%u, in_queue=%u, util=%3.2f%%\n", du->name, dus->ios[0], dus->ios[1], dus->merges[0], dus->merges[1], dus->ticks[0], dus->ticks[1], dus->time_in_queue, util); + } +} + +static void show_ddir_status(struct thread_data *td, struct group_run_stats *rs, + int ddir) +{ + char *ddir_str[] = { "read ", "write" }; + unsigned long min, max; + unsigned long long bw; + double mean, dev; + + if (!td->runtime[ddir]) + return; + + bw = td->io_bytes[ddir] / td->runtime[ddir]; + printf(" %s: io=%6lluMiB, bw=%6lluKiB/s, runt=%6lumsec\n", ddir_str[ddir], td->io_bytes[ddir] >> 20, bw, td->runtime[ddir]); + + if (calc_lat(&td->slat_stat[ddir], &min, &max, &mean, &dev)) + printf(" slat (msec): min=%5lu, max=%5lu, avg=%5.02f, dev=%5.02f\n", min, max, mean, dev); + + if (calc_lat(&td->clat_stat[ddir], &min, &max, &mean, &dev)) + printf(" clat (msec): min=%5lu, max=%5lu, avg=%5.02f, dev=%5.02f\n", min, max, mean, dev); + + if (calc_lat(&td->bw_stat[ddir], &min, &max, &mean, &dev)) { + double p_of_agg; + + p_of_agg = mean * 100 / (double) rs->agg[ddir]; + printf(" bw (KiB/s) : min=%5lu, max=%5lu, per=%3.2f%%, avg=%5.02f, dev=%5.02f\n", min, max, p_of_agg, mean, dev); + } +} + +static void show_thread_status(struct thread_data *td, + struct group_run_stats *rs) +{ + double usr_cpu, sys_cpu; + + if (!(td->io_bytes[0] + td->io_bytes[1]) && !td->error) + return; + + printf("Client%d (groupid=%d): err=%2d:\n", td->thread_number, td->groupid, td->error); + + show_ddir_status(td, rs, td->ddir); + if (td->io_bytes[td->ddir ^ 1]) + show_ddir_status(td, rs, td->ddir ^ 1); + + if (td->runtime[0] + td->runtime[1]) { + double runt = td->runtime[0] + td->runtime[1]; + + usr_cpu = (double) td->usr_time * 100 / runt; + sys_cpu = (double) td->sys_time * 100 / runt; + } else { + usr_cpu = 0; + sys_cpu = 0; + } + + printf(" cpu : usr=%3.2f%%, sys=%3.2f%%, ctx=%lu\n", usr_cpu, sys_cpu, td->ctx); +} + +void show_run_stats(void) +{ + struct group_run_stats *runstats, *rs; + struct thread_data *td; + int i; + + runstats = malloc(sizeof(struct group_run_stats) * (groupid + 1)); + + for (i = 0; i < groupid + 1; i++) { + rs = &runstats[i]; + + memset(rs, 0, sizeof(*rs)); + rs->min_bw[0] = rs->min_run[0] = ~0UL; + rs->min_bw[1] = rs->min_run[1] = ~0UL; + } + + for (i = 0; i < thread_number; i++) { + unsigned long long rbw, wbw; + + td = &threads[i]; + + if (td->error) { + printf("Client%d: %s\n", td->thread_number, td->verror); + continue; + } + + rs = &runstats[td->groupid]; + + if (td->runtime[0] < rs->min_run[0] || !rs->min_run[0]) + rs->min_run[0] = td->runtime[0]; + if (td->runtime[0] > rs->max_run[0]) + rs->max_run[0] = td->runtime[0]; + if (td->runtime[1] < rs->min_run[1] || !rs->min_run[1]) + rs->min_run[1] = td->runtime[1]; + if (td->runtime[1] > rs->max_run[1]) + rs->max_run[1] = td->runtime[1]; + + rbw = wbw = 0; + if (td->runtime[0]) + rbw = td->io_bytes[0] / (unsigned long long) td->runtime[0]; + if (td->runtime[1]) + wbw = td->io_bytes[1] / (unsigned long long) td->runtime[1]; + + if (rbw < rs->min_bw[0]) + rs->min_bw[0] = rbw; + if (wbw < rs->min_bw[1]) + rs->min_bw[1] = wbw; + if (rbw > rs->max_bw[0]) + rs->max_bw[0] = rbw; + if (wbw > rs->max_bw[1]) + rs->max_bw[1] = wbw; + + rs->io_kb[0] += td->io_bytes[0] >> 10; + rs->io_kb[1] += td->io_bytes[1] >> 10; + } + + for (i = 0; i < groupid + 1; i++) { + rs = &runstats[i]; + + if (rs->max_run[0]) + rs->agg[0] = (rs->io_kb[0]*1024) / rs->max_run[0]; + if (rs->max_run[1]) + rs->agg[1] = (rs->io_kb[1]*1024) / rs->max_run[1]; + } + + /* + * don't overwrite last signal output + */ + printf("\n"); + + for (i = 0; i < thread_number; i++) { + td = &threads[i]; + rs = &runstats[td->groupid]; + + show_thread_status(td, rs); + } + + for (i = 0; i < groupid + 1; i++) + show_group_stats(&runstats[i], i); + + show_disk_util(); +} + +static inline void add_stat_sample(struct io_stat *is, unsigned long val) +{ + if (val > is->max_val) + is->max_val = val; + if (val < is->min_val) + is->min_val = val; + + is->val += val; + is->val_sq += val * val; + is->samples++; +} + +static void add_log_sample(struct thread_data *td, struct io_log *iolog, + unsigned long val, int ddir) +{ + if (iolog->nr_samples == iolog->max_samples) { + int new_size = sizeof(struct io_sample) * iolog->max_samples*2; + + iolog->log = realloc(iolog->log, new_size); + iolog->max_samples <<= 1; + } + + iolog->log[iolog->nr_samples].val = val; + iolog->log[iolog->nr_samples].time = mtime_since_now(&td->epoch); + iolog->log[iolog->nr_samples].ddir = ddir; + iolog->nr_samples++; +} + +void add_clat_sample(struct thread_data *td, int ddir, unsigned long msec) +{ + add_stat_sample(&td->clat_stat[ddir], msec); + + if (td->clat_log) + add_log_sample(td, td->clat_log, msec, ddir); +} + +void add_slat_sample(struct thread_data *td, int ddir, unsigned long msec) +{ + add_stat_sample(&td->slat_stat[ddir], msec); + + if (td->slat_log) + add_log_sample(td, td->slat_log, msec, ddir); +} + +void add_bw_sample(struct thread_data *td, int ddir) +{ + unsigned long spent = mtime_since_now(&td->stat_sample_time[ddir]); + unsigned long rate; + + if (spent < td->bw_avg_time) + return; + + rate = (td->this_io_bytes[ddir] - td->stat_io_bytes[ddir]) / spent; + add_stat_sample(&td->bw_stat[ddir], rate); + + if (td->bw_log) + add_log_sample(td, td->bw_log, rate, ddir); + + gettimeofday(&td->stat_sample_time[ddir], NULL); + td->stat_io_bytes[ddir] = td->this_io_bytes[ddir]; +} + + diff --git a/time.c b/time.c new file mode 100644 index 00000000..52462633 --- /dev/null +++ b/time.c @@ -0,0 +1,122 @@ +#include +#include + +#include "fio.h" + +unsigned long utime_since(struct timeval *s, struct timeval *e) +{ + double sec, usec; + + sec = e->tv_sec - s->tv_sec; + usec = e->tv_usec - s->tv_usec; + if (sec > 0 && usec < 0) { + sec--; + usec += 1000000; + } + + sec *= (double) 1000000; + + return sec + usec; +} + +static unsigned long utime_since_now(struct timeval *s) +{ + struct timeval t; + + gettimeofday(&t, NULL); + return utime_since(s, &t); +} + +unsigned long mtime_since(struct timeval *s, struct timeval *e) +{ + double sec, usec; + + sec = e->tv_sec - s->tv_sec; + usec = e->tv_usec - s->tv_usec; + if (sec > 0 && usec < 0) { + sec--; + usec += 1000000; + } + + sec *= (double) 1000; + usec /= (double) 1000; + + return sec + usec; +} + +unsigned long mtime_since_now(struct timeval *s) +{ + struct timeval t; + + gettimeofday(&t, NULL); + return mtime_since(s, &t); +} + +unsigned long time_since_now(struct timeval *s) +{ + return mtime_since_now(s) / 1000; +} + +/* + * busy looping version for the last few usec + */ +static void __usec_sleep(unsigned int usec) +{ + struct timeval start; + + gettimeofday(&start, NULL); + while (utime_since_now(&start) < usec) + nop; +} + +void usec_sleep(struct thread_data *td, unsigned long usec) +{ + struct timespec req, rem; + + req.tv_sec = usec / 1000000; + req.tv_nsec = usec * 1000 - req.tv_sec * 1000000; + + do { + if (usec < 5000) { + __usec_sleep(usec); + break; + } + + rem.tv_sec = rem.tv_nsec = 0; + if (nanosleep(&req, &rem) < 0) + break; + + if ((rem.tv_sec + rem.tv_nsec) == 0) + break; + + req.tv_nsec = rem.tv_nsec; + req.tv_sec = rem.tv_sec; + + usec = rem.tv_sec * 1000000 + rem.tv_nsec / 1000; + } while (!td->terminate); +} + +void rate_throttle(struct thread_data *td, unsigned long time_spent, + unsigned int bytes) +{ + unsigned long usec_cycle; + + if (!td->rate) + return; + + usec_cycle = td->rate_usec_cycle * (bytes / td->min_bs); + + if (time_spent < usec_cycle) { + unsigned long s = usec_cycle - time_spent; + + td->rate_pending_usleep += s; + if (td->rate_pending_usleep >= 100000) { + usec_sleep(td, td->rate_pending_usleep); + td->rate_pending_usleep = 0; + } + } else { + long overtime = time_spent - usec_cycle; + + td->rate_pending_usleep -= overtime; + } +}