--- /dev/null
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <ctype.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/ipc.h>
+#include <sys/shm.h>
+
+#include "fio.h"
+
+#define DEF_BS (4096)
+#define DEF_TIMEOUT (0)
+#define DEF_RATE_CYCLE (1000)
+#define DEF_ODIRECT (1)
+#define DEF_SEQUENTIAL (1)
+#define DEF_RAND_REPEAT (1)
+#define DEF_OVERWRITE (1)
+#define DEF_CREATE (1)
+#define DEF_INVALIDATE (1)
+#define DEF_SYNCIO (0)
+#define DEF_RANDSEED (0xb1899bedUL)
+#define DEF_BWAVGTIME (500)
+#define DEF_CREATE_SER (1)
+#define DEF_CREATE_FSYNC (1)
+#define DEF_LOOPS (1)
+#define DEF_VERIFY (0)
+#define DEF_STONEWALL (0)
+#define DEF_NUMJOBS (1)
+#define DEF_USE_THREAD (0)
+#define DEF_USE_MMAP (0)
+
+static int repeatable = DEF_RAND_REPEAT;
+static char *ini_file;
+static int max_jobs = MAX_JOBS;
+
+struct thread_data def_thread;
+struct thread_data *threads = NULL;
+
+int rate_quit = 0;
+int write_lat_log = 0;
+int write_bw_log = 0;
+int exitall_on_terminate = 0;
+
+static int setup_rate(struct thread_data *td)
+{
+ int nr_reads_per_sec;
+
+ if (!td->rate)
+ return 0;
+
+ if (td->rate < td->ratemin) {
+ fprintf(stderr, "min rate larger than nominal rate\n");
+ return -1;
+ }
+
+ nr_reads_per_sec = (td->rate * 1024) / td->min_bs;
+ td->rate_usec_cycle = 1000000 / nr_reads_per_sec;
+ td->rate_pending_usleep = 0;
+ return 0;
+}
+
+static void setup_log(struct io_log **log)
+{
+ struct io_log *l = malloc(sizeof(*l));
+
+ l->nr_samples = 0;
+ l->max_samples = 1024;
+ l->log = malloc(l->max_samples * sizeof(struct io_sample));
+ *log = l;
+}
+
+void finish_log(struct thread_data *td, struct io_log *log, const char *name)
+{
+ char file_name[128];
+ FILE *f;
+ unsigned int i;
+
+ sprintf(file_name, "client%d_%s.log", td->thread_number, name);
+ f = fopen(file_name, "w");
+ if (!f) {
+ perror("fopen log");
+ return;
+ }
+
+ for (i = 0; i < log->nr_samples; i++)
+ fprintf(f, "%lu, %lu\n", log->log[i].time, log->log[i].val);
+
+ fclose(f);
+ free(log->log);
+ free(log);
+}
+
+static struct thread_data *get_new_job(int global, struct thread_data *parent)
+{
+ struct thread_data *td;
+
+ if (global)
+ return &def_thread;
+ if (thread_number >= max_jobs)
+ return NULL;
+
+ td = &threads[thread_number++];
+ memset(td, 0, sizeof(*td));
+
+ sprintf(td->directory, ".");
+
+ td->fd = -1;
+ td->thread_number = thread_number;
+
+ td->ddir = parent->ddir;
+ td->ioprio = parent->ioprio;
+ td->sequential = parent->sequential;
+ td->bs = parent->bs;
+ td->min_bs = parent->min_bs;
+ td->max_bs = parent->max_bs;
+ td->odirect = parent->odirect;
+ td->thinktime = parent->thinktime;
+ td->fsync_blocks = parent->fsync_blocks;
+ td->start_delay = parent->start_delay;
+ td->timeout = parent->timeout;
+ td->use_aio = parent->use_aio;
+ td->create_file = parent->create_file;
+ td->overwrite = parent->overwrite;
+ td->invalidate_cache = parent->invalidate_cache;
+ td->file_size = parent->file_size;
+ td->file_offset = parent->file_offset;
+ td->rate = parent->rate;
+ td->ratemin = parent->ratemin;
+ td->ratecycle = parent->ratecycle;
+ td->aio_depth = parent->aio_depth;
+ td->sync_io = parent->sync_io;
+ td->mem_type = parent->mem_type;
+ td->bw_avg_time = parent->bw_avg_time;
+ td->create_serialize = parent->create_serialize;
+ td->create_fsync = parent->create_fsync;
+ td->loops = parent->loops;
+ td->verify = parent->verify;
+ td->stonewall = parent->stonewall;
+ td->numjobs = parent->numjobs;
+ td->use_thread = parent->use_thread;
+ td->use_mmap = parent->use_mmap;
+ memcpy(&td->cpumask, &parent->cpumask, sizeof(td->cpumask));
+
+ return td;
+}
+
+static void put_job(struct thread_data *td)
+{
+ memset(&threads[td->thread_number - 1], 0, sizeof(*td));
+ thread_number--;
+}
+
+static int add_job(struct thread_data *td, const char *jobname, int prioclass,
+ int prio)
+{
+ struct stat sb;
+ int numjobs;
+
+ if (td == &def_thread)
+ return 0;
+
+ td->filetype = FIO_TYPE_FILE;
+ if (!stat(jobname, &sb) && S_ISBLK(sb.st_mode))
+ td->filetype = FIO_TYPE_BD;
+
+ if (td->filetype == FIO_TYPE_FILE)
+ sprintf(td->file_name, "%s/%s.%d", td->directory, jobname, td->thread_number);
+ else
+ strcpy(td->file_name, jobname);
+
+ sem_init(&td->mutex, 1, 0);
+ td->ioprio = (prioclass << IOPRIO_CLASS_SHIFT) | prio;
+
+ td->clat_stat.min_val = ULONG_MAX;
+ td->slat_stat.min_val = ULONG_MAX;
+ td->bw_stat.min_val = ULONG_MAX;
+
+ run_str[td->thread_number - 1] = 'P';
+
+ if (td->use_aio) {
+ if (!td->aio_depth)
+ td->aio_depth = 1;
+ if (td->use_mmap)
+ td->use_mmap = 0;
+ }
+
+ if (td->min_bs == -1U)
+ td->min_bs = td->bs;
+ if (td->max_bs == -1U)
+ td->max_bs = td->bs;
+ if (td_read(td))
+ td->verify = 0;
+
+ if (td->stonewall && td->thread_number > 1)
+ groupid++;
+
+ td->groupid = groupid;
+
+ if (setup_rate(td))
+ goto err;
+
+ if (write_lat_log)
+ setup_log(&td->lat_log);
+ if (write_bw_log)
+ setup_log(&td->bw_log);
+
+ printf("Client%d (g=%d): rw=%d, prio=%d/%d, seq=%d, odir=%d, mmap=%d, bs=%d-%d, rate=%d, aio=%d, aio_depth=%d\n", td->thread_number, td->groupid, td->ddir, prioclass, prio, td->sequential, td->odirect, td->use_mmap, td->min_bs, td->max_bs, td->rate, td->use_aio, td->aio_depth);
+
+ /*
+ * recurse add identical jobs, clear numjobs and stonewall options
+ * as they don't apply to sub-jobs
+ */
+ numjobs = td->numjobs;
+ while (--numjobs) {
+ struct thread_data *td_new = get_new_job(0, td);
+
+ if (!td_new)
+ goto err;
+
+ td_new->numjobs = 1;
+ td_new->stonewall = 0;
+
+ if (add_job(td_new, jobname, prioclass, prio))
+ goto err;
+ }
+ return 0;
+err:
+ put_job(td);
+ return -1;
+}
+
+int init_random_state(struct thread_data *td)
+{
+ unsigned long seed;
+ int fd, num_maps, blocks;
+
+ fd = open("/dev/random", O_RDONLY);
+ if (fd == -1) {
+ td->error = errno;
+ return 1;
+ }
+
+ if (read(fd, &seed, sizeof(seed)) < (int) sizeof(seed)) {
+ td->error = EIO;
+ close(fd);
+ return 1;
+ }
+
+ close(fd);
+
+ srand48_r(seed, &td->bsrange_state);
+ srand48_r(seed, &td->verify_state);
+
+ if (td->sequential)
+ return 0;
+
+ if (repeatable)
+ seed = DEF_RANDSEED;
+
+ blocks = (td->io_size + td->min_bs - 1) / td->min_bs;
+ num_maps = blocks / BLOCKS_PER_MAP;
+ td->file_map = malloc(num_maps * sizeof(long));
+ td->num_maps = num_maps;
+ memset(td->file_map, 0, num_maps * sizeof(long));
+
+ srand48_r(seed, &td->random_state);
+ return 0;
+}
+
+static void fill_cpu_mask(cpu_set_t cpumask, int cpu)
+{
+ unsigned int i;
+
+ CPU_ZERO(&cpumask);
+
+ for (i = 0; i < sizeof(int) * 8; i++) {
+ if ((1 << i) & cpu)
+ CPU_SET(i, &cpumask);
+ }
+}
+
+static unsigned long get_mult(char c)
+{
+ switch (c) {
+ case 'k':
+ case 'K':
+ return 1024;
+ case 'm':
+ case 'M':
+ return 1024 * 1024;
+ case 'g':
+ case 'G':
+ return 1024 * 1024 * 1024;
+ default:
+ return 1;
+ }
+}
+
+/*
+ * convert string after '=' into decimal value, noting any size suffix
+ */
+static int str_cnv(char *p, unsigned long long *val)
+{
+ char *str;
+ int len;
+
+ str = strstr(p, "=");
+ if (!str)
+ return 1;
+
+ str++;
+ len = strlen(str);
+
+ *val = strtoul(str, NULL, 10);
+ if (*val == ULONG_MAX && errno == ERANGE)
+ return 1;
+
+ *val *= get_mult(str[len - 2]);
+ return 0;
+}
+
+static int check_strcnv(char *p, char *name, unsigned long long *val)
+{
+ if (!strstr(p, name))
+ return 1;
+
+ return str_cnv(p, val);
+}
+
+static int check_str(char *p, char *name, char *option)
+{
+ char *s = strstr(p, name);
+
+ if (!s)
+ return 1;
+
+ s += strlen(name);
+ if (strstr(s, option))
+ return 0;
+
+ return 1;
+}
+
+static int check_strstore(char *p, char *name, char *dest)
+{
+ char *s = strstr(p, name);
+
+ if (!s)
+ return 1;
+
+ s = strstr(p, "=");
+ if (!s)
+ return 1;
+
+ s++;
+ while (isblank(*s))
+ s++;
+
+ strcpy(dest, s);
+
+ s = dest + strlen(dest) - 1;
+ while (isblank(*s)) {
+ *s = '\0';
+ s--;
+ }
+
+ return 0;
+}
+
+static int check_range(char *p, char *name, unsigned long *s, unsigned long *e)
+{
+ char str[128];
+ char s1, s2;
+
+ sprintf(str, "%s=%%lu%%c-%%lu%%c", name);
+ if (sscanf(p, str, s, &s1, e, &s2) == 4) {
+ *s *= get_mult(s1);
+ *e *= get_mult(s2);
+ return 0;
+ }
+
+ sprintf(str, "%s = %%lu%%c-%%lu%%c", name);
+ if (sscanf(p, str, s, &s1, e, &s2) == 4) {
+ *s *= get_mult(s1);
+ *e *= get_mult(s2);
+ return 0;
+ }
+
+ sprintf(str, "%s=%%lu-%%lu", name);
+ if (sscanf(p, str, s, e) == 2)
+ return 0;
+
+ sprintf(str, "%s = %%lu-%%lu", name);
+ if (sscanf(p, str, s, e) == 2)
+ return 0;
+
+ return 1;
+
+}
+
+static int check_int(char *p, char *name, unsigned int *val)
+{
+ char str[128];
+
+ sprintf(str, "%s=%%d", name);
+ if (sscanf(p, str, val) == 1)
+ return 0;
+
+ sprintf(str, "%s = %%d", name);
+ if (sscanf(p, str, val) == 1)
+ return 0;
+
+ return 1;
+}
+
+static int check_strset(char *p, char *name)
+{
+ return strncmp(p, name, strlen(name));
+}
+
+static int is_empty_or_comment(char *line)
+{
+ unsigned int i;
+
+ for (i = 0; i < strlen(line); i++) {
+ if (line[i] == ';')
+ return 1;
+ if (!isspace(line[i]) && !iscntrl(line[i]))
+ return 0;
+ }
+
+ return 1;
+}
+
+int parse_jobs_ini(char *file)
+{
+ unsigned int prioclass, prio, cpu, global;
+ unsigned long long ull;
+ unsigned long ul1, ul2;
+ struct thread_data *td;
+ char *string, *name;
+ fpos_t off;
+ FILE *f;
+ char *p;
+
+ f = fopen(file, "r");
+ if (!f) {
+ perror("fopen");
+ return 1;
+ }
+
+ string = malloc(4096);
+ name = malloc(256);
+
+ while ((p = fgets(string, 4096, f)) != NULL) {
+ if (is_empty_or_comment(p))
+ continue;
+ if (sscanf(p, "[%s]", name) != 1)
+ continue;
+
+ global = !strncmp(name, "global", 6);
+
+ name[strlen(name) - 1] = '\0';
+
+ td = get_new_job(global, &def_thread);
+ if (!td)
+ return 1;
+
+ prioclass = 2;
+ prio = 4;
+
+ fgetpos(f, &off);
+ while ((p = fgets(string, 4096, f)) != NULL) {
+ if (is_empty_or_comment(p))
+ continue;
+ if (strstr(p, "["))
+ break;
+ if (!check_int(p, "rw", &td->ddir)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_int(p, "prio", &prio)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_int(p, "prioclass", &prioclass)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_int(p, "direct", &td->odirect)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_int(p, "rate", &td->rate)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_int(p, "ratemin", &td->ratemin)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_int(p, "ratecycle", &td->ratecycle)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_int(p, "thinktime", &td->thinktime)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_int(p, "cpumask", &cpu)) {
+ fill_cpu_mask(td->cpumask, cpu);
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_int(p, "fsync", &td->fsync_blocks)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_int(p, "startdelay", &td->start_delay)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_int(p, "timeout", &td->timeout)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_int(p, "invalidate",&td->invalidate_cache)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_int(p, "aio_depth", &td->aio_depth)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_int(p, "sync", &td->sync_io)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_int(p, "bwavgtime", &td->bw_avg_time)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_int(p, "create_serialize", &td->create_serialize)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_int(p, "create_fsync", &td->create_fsync)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_int(p, "loops", &td->loops)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_int(p, "numjobs", &td->numjobs)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_int(p, "mmap", &td->use_mmap)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_range(p, "bsrange", &ul1, &ul2)) {
+ if (ul1 & 511)
+ printf("bad min block size, must be a multiple of 512\n");
+ else
+ td->min_bs = ul1;
+ if (ul2 & 511)
+ printf("bad max block size, must be a multiple of 512\n");
+ else
+ td->max_bs = ul2;
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_strcnv(p, "bs", &ull)) {
+ if (ull & 511)
+ printf("bad block size, must be a multiple of 512\n");
+ else
+ td->bs = ull;
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_strcnv(p, "size", &td->file_size)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_strcnv(p, "offset", &td->file_offset)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_strstore(p, "directory", td->directory)) {
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_str(p, "mem", "malloc")) {
+ td->mem_type = MEM_MALLOC;
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_str(p, "mem", "shm")) {
+ td->mem_type = MEM_SHM;
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_str(p, "mem", "mmap")) {
+ td->mem_type = MEM_MMAP;
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_str(p, "verify", "md5")) {
+ td->verify = VERIFY_MD5;
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_str(p, "verify", "crc32")) {
+ td->verify = VERIFY_CRC32;
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_strset(p, "sequential")) {
+ td->sequential = 1;
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_strset(p, "random")) {
+ td->sequential = 0;
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_strset(p, "aio")) {
+ td->use_aio = 1;
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_strset(p, "create")) {
+ td->create_file = 1;
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_strset(p, "overwrite")) {
+ td->overwrite = 1;
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_strset(p, "exitall")) {
+ exitall_on_terminate = 1;
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_strset(p, "stonewall")) {
+ td->stonewall = 1;
+ fgetpos(f, &off);
+ continue;
+ }
+ if (!check_strset(p, "thread")) {
+ td->use_thread = 1;
+ fgetpos(f, &off);
+ continue;
+ }
+
+ printf("Client%d: bad option %s\n",td->thread_number,p);
+ }
+ fsetpos(f, &off);
+
+ if (add_job(td, name, prioclass, prio))
+ return 1;
+ }
+
+ free(string);
+ free(name);
+ fclose(f);
+ return 0;
+}
+
+static int fill_def_thread(void)
+{
+ memset(&def_thread, 0, sizeof(def_thread));
+
+ if (sched_getaffinity(getpid(), sizeof(cpu_set_t), &def_thread.cpumask) == -1) {
+ perror("sched_getaffinity");
+ return 1;
+ }
+
+ /*
+ * fill globals
+ */
+ def_thread.ddir = DDIR_READ;
+ def_thread.bs = DEF_BS;
+ def_thread.min_bs = -1;
+ def_thread.max_bs = -1;
+ def_thread.odirect = DEF_ODIRECT;
+ def_thread.ratecycle = DEF_RATE_CYCLE;
+ def_thread.sequential = DEF_SEQUENTIAL;
+ def_thread.timeout = DEF_TIMEOUT;
+ def_thread.create_file = DEF_CREATE;
+ def_thread.overwrite = DEF_OVERWRITE;
+ def_thread.invalidate_cache = DEF_INVALIDATE;
+ def_thread.sync_io = DEF_SYNCIO;
+ def_thread.mem_type = MEM_MALLOC;
+ def_thread.bw_avg_time = DEF_BWAVGTIME;
+ def_thread.create_serialize = DEF_CREATE_SER;
+ def_thread.create_fsync = DEF_CREATE_FSYNC;
+ def_thread.loops = DEF_LOOPS;
+ def_thread.verify = DEF_VERIFY;
+ def_thread.stonewall = DEF_STONEWALL;
+ def_thread.numjobs = DEF_NUMJOBS;
+ def_thread.use_thread = DEF_USE_THREAD;
+ def_thread.use_mmap = DEF_USE_MMAP;
+
+ return 0;
+}
+
+static void parse_cmd_line(int argc, char *argv[])
+{
+ int i;
+
+ for (i = 1; i < argc; i++) {
+ char *parm = argv[i];
+
+ if (parm[0] != '-')
+ break;
+
+ parm++;
+ switch (*parm) {
+ case 's':
+ parm++;
+ def_thread.sequential = !!atoi(parm);
+ break;
+ case 'b':
+ parm++;
+ def_thread.bs = atoi(parm);
+ def_thread.bs <<= 10;
+ if (!def_thread.bs) {
+ printf("bad block size\n");
+ def_thread.bs = DEF_BS;
+ }
+ break;
+ case 't':
+ parm++;
+ def_thread.timeout = atoi(parm);
+ break;
+ case 'r':
+ parm++;
+ repeatable = !!atoi(parm);
+ break;
+ case 'R':
+ parm++;
+ rate_quit = !!atoi(parm);
+ break;
+ case 'o':
+ parm++;
+ def_thread.odirect = !!atoi(parm);
+ break;
+ case 'f':
+ if (i + 1 >= argc) {
+ printf("-f needs file as arg\n");
+ break;
+ }
+ ini_file = strdup(argv[i+1]);
+ i++;
+ break;
+ case 'l':
+ write_lat_log = 1;
+ break;
+ case 'w':
+ write_bw_log = 1;
+ break;
+ default:
+ printf("bad option %s\n", argv[i]);
+ break;
+ }
+ }
+}
+
+static void free_shm(void)
+{
+ struct shmid_ds sbuf;
+
+ if (threads) {
+ shmdt(threads);
+ threads = NULL;
+ shmctl(shm_id, IPC_RMID, &sbuf);
+ }
+}
+
+static int setup_thread_area(void)
+{
+ /*
+ * 1024 is too much on some machines, scale max_jobs if
+ * we get a failure that looks like too large a shm segment
+ */
+ do {
+ int s = max_jobs * sizeof(struct thread_data);
+
+ shm_id = shmget(0, s, IPC_CREAT | 0600);
+ if (shm_id != -1)
+ break;
+ if (errno != EINVAL) {
+ perror("shmget");
+ break;
+ }
+
+ max_jobs >>= 1;
+ } while (max_jobs);
+
+ if (shm_id == -1)
+ return 1;
+
+ threads = shmat(shm_id, NULL, 0);
+ if (threads == (void *) -1) {
+ perror("shmat");
+ return 1;
+ }
+
+ atexit(free_shm);
+ return 0;
+}
+
+int parse_options(int argc, char *argv[])
+{
+ if (setup_thread_area())
+ return 1;
+ if (fill_def_thread())
+ return 1;
+
+ parse_cmd_line(argc, argv);
+
+ if (!ini_file) {
+ printf("Need job file\n");
+ return 1;
+ }
+
+ if (parse_jobs_ini(ini_file))
+ return 1;
+
+ return 0;
+}