#!/bin/sh
GVF=FIO-VERSION-FILE
-DEF_VER=fio-3.8
+DEF_VER=fio-3.11
LF='
'
Parse options only, don't start any I/O.
+.. option:: --merge-blktrace-only
+
+ Merge blktraces only, don't start any I/O.
+
.. option:: --output=filename
Write output to file `filename`.
``serialize_overlap`` tells fio to avoid provoking this behavior by explicitly
serializing in-flight I/Os that have a non-zero overlap. Note that setting
this option can reduce both performance and the :option:`iodepth` achieved.
- Additionally this option does not work when :option:`io_submit_mode` is set to
- offload. Default: false.
+
+ This option only applies to I/Os issued for a single job except when it is
+ enabled along with :option:`io_submit_mode`=offload. In offload mode, fio
+ will check for overlap among all I/Os submitted by offload jobs with :option:`serialize_overlap`
+ enabled. Threads must be used for all such jobs.
+
+ Default: false.
.. option:: io_submit_mode=str
will be read at once. If selected true, input from iolog will be read
gradually. Useful when iolog is very large, or it is generated.
+.. option:: merge_blktrace_file=str
+
+ When specified, rather than replaying the logs passed to :option:`read_iolog`,
+ the logs go through a merge phase which aggregates them into a single
+ blktrace. The resulting file is then passed on as the :option:`read_iolog`
+ parameter. The intention here is to make the order of events consistent.
+ This limits the influence of the scheduler compared to replaying multiple
+ blktraces via concurrent jobs.
+
+.. option:: merge_blktrace_scalars=float_list
+
+ This is a percentage based option that is index paired with the list of
+ files passed to :option:`read_iolog`. When merging is performed, scale
+ the time of each event by the corresponding amount. For example,
+ ``--merge_blktrace_scalars="50:100"`` runs the first trace in halftime
+ and the second trace in realtime. This knob is separately tunable from
+ :option:`replay_time_scale` which scales the trace during runtime and
+ does not change the output of the merge unlike this option.
+
+.. option:: merge_blktrace_iters=float_list
+
+ This is a whole number option that is index paired with the list of files
+ passed to :option:`read_iolog`. When merging is performed, run each trace
+ for the specified number of iterations. For example,
+ ``--merge_blktrace_iters="2:1"`` runs the first trace for two iterations
+ and the second trace for one iteration.
+
.. option:: replay_no_stall=bool
When replaying I/O with :option:`read_iolog` the default behavior is to
.. option:: replay_align=int
- Force alignment of I/O offsets and lengths in a trace to this power of 2
- value.
+ Force alignment of the byte offsets in a trace to this value. The value
+ must be a power of 2.
.. option:: replay_scale=int
- Scale sector offsets down by this factor when replaying traces.
+ Scale byte offsets down by this factor when replaying traces. Should most
+ likely use :option:`replay_align` as well.
.. option:: replay_skip=str
data from the rolling collection window. Threshold limits can be expressed
as a fixed value or as a percentage of the mean in the collection window.
+ When using this feature, most jobs should include the :option:`time_based`
+ and :option:`runtime` options or the :option:`loops` option so that fio does not
+ stop running after it has covered the full size of the specified file(s) or device(s).
+
**iops**
Collect IOPS data. Stop the job if all individual IOPS measurements
are within the specified limit of the mean IOPS (e.g., ``iops:2``
**trim**
Trim the given file from the given `offset` for `length` bytes.
+
+I/O Replay - Merging Traces
+---------------------------
+
+Colocation is a common practice used to get the most out of a machine.
+Knowing which workloads play nicely with each other and which ones don't is
+a much harder task. While fio can replay workloads concurrently via multiple
+jobs, it leaves some variability up to the scheduler making results harder to
+reproduce. Merging is a way to make the order of events consistent.
+
+Merging is integrated into I/O replay and done when a
+:option:`merge_blktrace_file` is specified. The list of files passed to
+:option:`read_iolog` go through the merge process and output a single file
+stored to the specified file. The output file is passed on as if it were the
+only file passed to :option:`read_iolog`. An example would look like::
+
+ $ fio --read_iolog="<file1>:<file2>" --merge_blktrace_file="<output_file>"
+
+Creating only the merged file can be done by passing the command line argument
+:option:`merge-blktrace-only`.
+
+Scaling traces can be done to see the relative impact of any particular trace
+being slowed down or sped up. :option:`merge_blktrace_scalars` takes in a colon
+separated list of percentage scalars. It is index paired with the files passed
+to :option:`read_iolog`.
+
+With scaling, it may be desirable to match the running time of all traces.
+This can be done with :option:`merge_blktrace_iters`. It is index paired with
+:option:`read_iolog` just like :option:`merge_blktrace_scalars`.
+
+In an example, given two traces, A and B, each 60s long. If we want to see
+the impact of trace A issuing IOs twice as fast and repeat trace A over the
+runtime of trace B, the following can be done::
+
+ $ fio --read_iolog="<trace_a>:"<trace_b>" --merge_blktrace_file"<output_file>" --merge_blktrace_scalars="50:100" --merge_blktrace_iters="2:1"
+
+This runs trace A at 2x the speed twice for approximately the same runtime as
+a single run of trace B.
+
+
CPU idleness profiling
----------------------
ifdef CONFIG_GUASI
SOURCE += engines/guasi.c
endif
-ifdef CONFIG_FUSION_AW
- SOURCE += engines/fusion-aw.c
-endif
ifdef CONFIG_SOLARISAIO
SOURCE += engines/solarisaio.c
endif
ifneq (,$(findstring CYGWIN,$(CONFIG_TARGET_OS)))
SOURCE += os/windows/posix.c
LIBS += -lpthread -lpsapi -lws2_32
- CFLAGS += -DPSAPI_VERSION=1 -Ios/windows/posix/include -Wno-format -static
+ CFLAGS += -DPSAPI_VERSION=1 -Ios/windows/posix/include -Wno-format
endif
OBJS := $(SOURCE:.c=.o)
#include <sys/stat.h>
#include <sys/wait.h>
#include <math.h>
+#include <pthread.h>
#include "fio.h"
#include "smalloc.h"
static struct flist_head *cgroup_list;
static struct cgroup_mnt *cgroup_mnt;
static int exit_value;
-static volatile int fio_abort;
+static volatile bool fio_abort;
static unsigned int nr_process = 0;
static unsigned int nr_thread = 0;
int shm_id = 0;
int temp_stall_ts;
unsigned long done_secs = 0;
+pthread_mutex_t overlap_check = PTHREAD_MUTEX_INITIALIZER;
#define JOB_START_TIMEOUT (5 * 1000)
/*
* Check if io_u will overlap an in-flight IO in the queue
*/
-static bool in_flight_overlap(struct io_u_queue *q, struct io_u *io_u)
+bool in_flight_overlap(struct io_u_queue *q, struct io_u *io_u)
{
bool overlap;
struct io_u *check_io_u;
"perhaps try --debug=io option for details?\n",
td->o.name, td->io_ops->name);
+ if (td->o.serialize_overlap && td->o.io_submit_mode == IO_MODE_OFFLOAD)
+ pthread_mutex_lock(&overlap_check);
td_set_runstate(td, TD_FINISHING);
+ if (td->o.serialize_overlap && td->o.io_submit_mode == IO_MODE_OFFLOAD)
+ pthread_mutex_unlock(&overlap_check);
update_rusage_stat(td);
td->ts.total_run_time = mtime_since_now(&td->epoch);
}
if (output_format & FIO_OUTPUT_NORMAL) {
- log_info("Starting ");
+ struct buf_output out;
+
+ buf_output_init(&out);
+ __log_buf(&out, "Starting ");
if (nr_thread)
- log_info("%d thread%s", nr_thread,
+ __log_buf(&out, "%d thread%s", nr_thread,
nr_thread > 1 ? "s" : "");
if (nr_process) {
if (nr_thread)
- log_info(" and ");
- log_info("%d process%s", nr_process,
+ __log_buf(&out, " and ");
+ __log_buf(&out, "%d process%s", nr_process,
nr_process > 1 ? "es" : "");
}
- log_info("\n");
- log_info_flush();
+ __log_buf(&out, "\n");
+ log_info_buf(out.buf, out.buflen);
+ buf_output_free(&out);
}
todo = thread_number;
if (fio_sem_down_timeout(startup_sem, 10000)) {
log_err("fio: job startup hung? exiting.\n");
fio_terminate_threads(TERMINATE_ALL);
- fio_abort = 1;
+ fio_abort = true;
nr_started--;
free(fd);
break;
*/
#include <stdio.h>
#include <stdlib.h>
-#include <sys/ioctl.h>
-#include <linux/fs.h>
+#include <unistd.h>
#include "flist.h"
#include "fio.h"
flist_add_tail(&ipo->list, &td->io_log_list);
}
-static int get_dev_blocksize(const char *dev, unsigned int *bs)
+static int trace_add_file(struct thread_data *td, __u32 device)
{
- int fd;
-
- fd = open(dev, O_RDONLY);
- if (fd < 0)
- return 1;
-
- if (ioctl(fd, BLKSSZGET, bs) < 0) {
- close(fd);
- return 1;
- }
-
- close(fd);
- return 0;
-}
-
-static int trace_add_file(struct thread_data *td, __u32 device,
- unsigned int *bs)
-{
- static unsigned int last_maj, last_min, last_fileno, last_bs;
+ static unsigned int last_maj, last_min, last_fileno;
unsigned int maj = FMAJOR(device);
unsigned int min = FMINOR(device);
struct fio_file *f;
- unsigned int i;
char dev[256];
+ unsigned int i;
- if (last_maj == maj && last_min == min) {
- *bs = last_bs;
+ if (last_maj == maj && last_min == min)
return last_fileno;
- }
last_maj = maj;
last_min = min;
/*
* check for this file in our list
*/
- for_each_file(td, f, i) {
+ for_each_file(td, f, i)
if (f->major == maj && f->minor == min) {
last_fileno = f->fileno;
- last_bs = f->bs;
- goto out;
+ return last_fileno;
}
- }
strcpy(dev, "/dev");
if (blktrace_lookup_device(td->o.replay_redirect, dev, maj, min)) {
- unsigned int this_bs;
int fileno;
if (td->o.replay_redirect)
dprint(FD_BLKTRACE, "add devices %s\n", dev);
fileno = add_file_exclusive(td, dev);
-
- if (get_dev_blocksize(dev, &this_bs))
- this_bs = 512;
-
td->o.open_files++;
td->files[fileno]->major = maj;
td->files[fileno]->minor = min;
- td->files[fileno]->bs = this_bs;
trace_add_open_close_event(td, fileno, FIO_LOG_OPEN_FILE);
-
last_fileno = fileno;
- last_bs = this_bs;
}
-out:
- *bs = last_bs;
return last_fileno;
}
*/
static void store_ipo(struct thread_data *td, unsigned long long offset,
unsigned int bytes, int rw, unsigned long long ttime,
- int fileno, unsigned int bs)
+ int fileno)
{
struct io_piece *ipo;
ipo = calloc(1, sizeof(*ipo));
init_ipo(ipo);
- ipo->offset = offset * bs;
+ ipo->offset = offset * 512;
if (td->o.replay_scale)
ipo->offset = ipo->offset / td->o.replay_scale;
ipo_bytes_align(td->o.replay_align, ipo);
static void handle_trace_discard(struct thread_data *td,
struct blk_io_trace *t,
unsigned long long ttime,
- unsigned long *ios, unsigned int *rw_bs)
+ unsigned long *ios, unsigned int *bs)
{
struct io_piece *ipo;
- unsigned int bs;
int fileno;
if (td->o.replay_skip & (1u << DDIR_TRIM))
ipo = calloc(1, sizeof(*ipo));
init_ipo(ipo);
- fileno = trace_add_file(td, t->device, &bs);
+ fileno = trace_add_file(td, t->device);
ios[DDIR_TRIM]++;
- if (t->bytes > rw_bs[DDIR_TRIM])
- rw_bs[DDIR_TRIM] = t->bytes;
+ if (t->bytes > bs[DDIR_TRIM])
+ bs[DDIR_TRIM] = t->bytes;
td->o.size += t->bytes;
INIT_FLIST_HEAD(&ipo->list);
- ipo->offset = t->sector * bs;
+ ipo->offset = t->sector * 512;
if (td->o.replay_scale)
ipo->offset = ipo->offset / td->o.replay_scale;
ipo_bytes_align(td->o.replay_align, ipo);
static void handle_trace_fs(struct thread_data *td, struct blk_io_trace *t,
unsigned long long ttime, unsigned long *ios,
- unsigned int *rw_bs)
+ unsigned int *bs)
{
- unsigned int bs;
int rw;
int fileno;
- fileno = trace_add_file(td, t->device, &bs);
+ fileno = trace_add_file(td, t->device);
rw = (t->action & BLK_TC_ACT(BLK_TC_WRITE)) != 0;
return;
}
- if (t->bytes > rw_bs[rw])
- rw_bs[rw] = t->bytes;
+ if (t->bytes > bs[rw])
+ bs[rw] = t->bytes;
ios[rw]++;
td->o.size += t->bytes;
- store_ipo(td, t->sector, t->bytes, rw, ttime, fileno, bs);
+ store_ipo(td, t->sector, t->bytes, rw, ttime, fileno);
}
static void handle_trace_flush(struct thread_data *td, struct blk_io_trace *t,
unsigned long long ttime, unsigned long *ios)
{
struct io_piece *ipo;
- unsigned int bs;
int fileno;
if (td->o.replay_skip & (1u << DDIR_SYNC))
ipo = calloc(1, sizeof(*ipo));
init_ipo(ipo);
- fileno = trace_add_file(td, t->device, &bs);
+ fileno = trace_add_file(td, t->device);
ipo->delay = ttime / 1000;
ipo->ddir = DDIR_SYNC;
fifo_free(fifo);
return false;
}
+
+static int init_merge_param_list(fio_fp64_t *vals, struct blktrace_cursor *bcs,
+ int nr_logs, int def, size_t off)
+{
+ int i = 0, len = 0;
+
+ while (len < FIO_IO_U_LIST_MAX_LEN && vals[len].u.f != 0.0)
+ len++;
+
+ if (len && len != nr_logs)
+ return len;
+
+ for (i = 0; i < nr_logs; i++) {
+ int *val = (int *)((char *)&bcs[i] + off);
+ *val = def;
+ if (len)
+ *val = (int)vals[i].u.f;
+ }
+
+ return 0;
+
+}
+
+static int find_earliest_io(struct blktrace_cursor *bcs, int nr_logs)
+{
+ __u64 time = ~(__u64)0;
+ int idx = 0, i;
+
+ for (i = 0; i < nr_logs; i++) {
+ if (bcs[i].t.time < time) {
+ time = bcs[i].t.time;
+ idx = i;
+ }
+ }
+
+ return idx;
+}
+
+static void merge_finish_file(struct blktrace_cursor *bcs, int i, int *nr_logs)
+{
+ bcs[i].iter++;
+ if (bcs[i].iter < bcs[i].nr_iter) {
+ lseek(bcs[i].fd, 0, SEEK_SET);
+ return;
+ }
+
+ *nr_logs -= 1;
+
+ /* close file */
+ fifo_free(bcs[i].fifo);
+ close(bcs[i].fd);
+
+ /* keep active files contiguous */
+ memmove(&bcs[i], &bcs[*nr_logs], sizeof(bcs[i]));
+}
+
+static int read_trace(struct thread_data *td, struct blktrace_cursor *bc)
+{
+ int ret = 0;
+ struct blk_io_trace *t = &bc->t;
+
+read_skip:
+ /* read an io trace */
+ ret = trace_fifo_get(td, bc->fifo, bc->fd, t, sizeof(*t));
+ if (ret < 0) {
+ return ret;
+ } else if (!ret) {
+ if (!bc->length)
+ bc->length = bc->t.time;
+ return ret;
+ } else if (ret < (int) sizeof(*t)) {
+ log_err("fio: short fifo get\n");
+ return -1;
+ }
+
+ if (bc->swap)
+ byteswap_trace(t);
+
+ /* skip over actions that fio does not care about */
+ if ((t->action & 0xffff) != __BLK_TA_QUEUE ||
+ t_get_ddir(t) == DDIR_INVAL) {
+ ret = discard_pdu(td, bc->fifo, bc->fd, t);
+ if (ret < 0) {
+ td_verror(td, ret, "blktrace lseek");
+ return ret;
+ } else if (t->pdu_len != ret) {
+ log_err("fio: discarded %d of %d\n", ret,
+ t->pdu_len);
+ return -1;
+ }
+ goto read_skip;
+ }
+
+ t->time = (t->time + bc->iter * bc->length) * bc->scalar / 100;
+
+ return ret;
+}
+
+static int write_trace(FILE *fp, struct blk_io_trace *t)
+{
+ /* pdu is not used so just write out only the io trace */
+ t->pdu_len = 0;
+ return fwrite((void *)t, sizeof(*t), 1, fp);
+}
+
+int merge_blktrace_iologs(struct thread_data *td)
+{
+ int nr_logs = get_max_str_idx(td->o.read_iolog_file);
+ struct blktrace_cursor *bcs = malloc(sizeof(struct blktrace_cursor) *
+ nr_logs);
+ struct blktrace_cursor *bc;
+ FILE *merge_fp;
+ char *str, *ptr, *name, *merge_buf;
+ int i, ret;
+
+ ret = init_merge_param_list(td->o.merge_blktrace_scalars, bcs, nr_logs,
+ 100, offsetof(struct blktrace_cursor,
+ scalar));
+ if (ret) {
+ log_err("fio: merge_blktrace_scalars(%d) != nr_logs(%d)\n",
+ ret, nr_logs);
+ goto err_param;
+ }
+
+ ret = init_merge_param_list(td->o.merge_blktrace_iters, bcs, nr_logs,
+ 1, offsetof(struct blktrace_cursor,
+ nr_iter));
+ if (ret) {
+ log_err("fio: merge_blktrace_iters(%d) != nr_logs(%d)\n",
+ ret, nr_logs);
+ goto err_param;
+ }
+
+ /* setup output file */
+ merge_fp = fopen(td->o.merge_blktrace_file, "w");
+ merge_buf = malloc(128 * 1024);
+ ret = setvbuf(merge_fp, merge_buf, _IOFBF, 128 * 1024);
+ if (ret)
+ goto err_out_file;
+
+ /* setup input files */
+ str = ptr = strdup(td->o.read_iolog_file);
+ nr_logs = 0;
+ for (i = 0; (name = get_next_str(&ptr)) != NULL; i++) {
+ bcs[i].fd = open(name, O_RDONLY);
+ if (bcs[i].fd < 0) {
+ log_err("fio: could not open file: %s\n", name);
+ ret = bcs[i].fd;
+ goto err_file;
+ }
+ bcs[i].fifo = fifo_alloc(TRACE_FIFO_SIZE);
+ nr_logs++;
+
+ if (!is_blktrace(name, &bcs[i].swap)) {
+ log_err("fio: file is not a blktrace: %s\n", name);
+ goto err_file;
+ }
+
+ ret = read_trace(td, &bcs[i]);
+ if (ret < 0) {
+ goto err_file;
+ } else if (!ret) {
+ merge_finish_file(bcs, i, &nr_logs);
+ i--;
+ }
+ }
+ free(str);
+
+ /* merge files */
+ while (nr_logs) {
+ i = find_earliest_io(bcs, nr_logs);
+ bc = &bcs[i];
+ /* skip over the pdu */
+ ret = discard_pdu(td, bc->fifo, bc->fd, &bc->t);
+ if (ret < 0) {
+ td_verror(td, ret, "blktrace lseek");
+ goto err_file;
+ } else if (bc->t.pdu_len != ret) {
+ log_err("fio: discarded %d of %d\n", ret,
+ bc->t.pdu_len);
+ goto err_file;
+ }
+
+ ret = write_trace(merge_fp, &bc->t);
+ ret = read_trace(td, bc);
+ if (ret < 0)
+ goto err_file;
+ else if (!ret)
+ merge_finish_file(bcs, i, &nr_logs);
+ }
+
+ /* set iolog file to read from the newly merged file */
+ td->o.read_iolog_file = td->o.merge_blktrace_file;
+ ret = 0;
+
+err_file:
+ /* cleanup */
+ for (i = 0; i < nr_logs; i++) {
+ fifo_free(bcs[i].fifo);
+ close(bcs[i].fd);
+ }
+err_out_file:
+ fflush(merge_fp);
+ fclose(merge_fp);
+ free(merge_buf);
+err_param:
+ free(bcs);
+
+ return ret;
+}
#ifndef FIO_BLKTRACE_H
#define FIO_BLKTRACE_H
+
#ifdef FIO_HAVE_BLKTRACE
+#include <asm/types.h>
+
+#include "blktrace_api.h"
+
+struct blktrace_cursor {
+ struct fifo *fifo; // fifo queue for reading
+ int fd; // blktrace file
+ __u64 length; // length of trace
+ struct blk_io_trace t; // current io trace
+ int swap; // bitwise reverse required
+ int scalar; // scale percentage
+ int iter; // current iteration
+ int nr_iter; // number of iterations to run
+};
+
bool is_blktrace(const char *, int *);
bool load_blktrace(struct thread_data *, const char *, int);
+int merge_blktrace_iologs(struct thread_data *td);
#else
return false;
}
+static inline int merge_blktrace_iologs(struct thread_data *td)
+{
+ return false;
+}
+
#endif
#endif
free(o->mmapfile);
free(o->read_iolog_file);
free(o->write_iolog_file);
+ free(o->merge_blktrace_file);
free(o->bw_log_file);
free(o->lat_log_file);
free(o->iops_log_file);
string_to_cpu(&o->mmapfile, top->mmapfile);
string_to_cpu(&o->read_iolog_file, top->read_iolog_file);
string_to_cpu(&o->write_iolog_file, top->write_iolog_file);
+ string_to_cpu(&o->merge_blktrace_file, top->merge_blktrace_file);
string_to_cpu(&o->bw_log_file, top->bw_log_file);
string_to_cpu(&o->lat_log_file, top->lat_log_file);
string_to_cpu(&o->iops_log_file, top->iops_log_file);
for (i = 0; i < FIO_IO_U_LIST_MAX_LEN; i++)
o->percentile_list[i].u.f = fio_uint64_to_double(le64_to_cpu(top->percentile_list[i].u.i));
+
+ for (i = 0; i < FIO_IO_U_LIST_MAX_LEN; i++)
+ o->merge_blktrace_scalars[i].u.f = fio_uint64_to_double(le64_to_cpu(top->merge_blktrace_scalars[i].u.i));
+
+ for (i = 0; i < FIO_IO_U_LIST_MAX_LEN; i++)
+ o->merge_blktrace_iters[i].u.f = fio_uint64_to_double(le64_to_cpu(top->merge_blktrace_iters[i].u.i));
#if 0
uint8_t cpumask[FIO_TOP_STR_MAX];
uint8_t verify_cpumask[FIO_TOP_STR_MAX];
string_to_net(top->mmapfile, o->mmapfile);
string_to_net(top->read_iolog_file, o->read_iolog_file);
string_to_net(top->write_iolog_file, o->write_iolog_file);
+ string_to_net(top->merge_blktrace_file, o->merge_blktrace_file);
string_to_net(top->bw_log_file, o->bw_log_file);
string_to_net(top->lat_log_file, o->lat_log_file);
string_to_net(top->iops_log_file, o->iops_log_file);
for (i = 0; i < FIO_IO_U_LIST_MAX_LEN; i++)
top->percentile_list[i].u.i = __cpu_to_le64(fio_double_to_uint64(o->percentile_list[i].u.f));
+
+ for (i = 0; i < FIO_IO_U_LIST_MAX_LEN; i++)
+ top->merge_blktrace_scalars[i].u.i = __cpu_to_le64(fio_double_to_uint64(o->merge_blktrace_scalars[i].u.f));
+
+ for (i = 0; i < FIO_IO_U_LIST_MAX_LEN; i++)
+ top->merge_blktrace_iters[i].u.i = __cpu_to_le64(fio_double_to_uint64(o->merge_blktrace_iters[i].u.f));
#if 0
uint8_t cpumask[FIO_TOP_STR_MAX];
uint8_t verify_cpumask[FIO_TOP_STR_MAX];
static void fio_client_json_fini(void)
{
- if (!(output_format & FIO_OUTPUT_JSON))
+ struct buf_output out;
+
+ if (!root)
return;
- log_info("\n");
- json_print_object(root, NULL);
- log_info("\n");
+ buf_output_init(&out);
+
+ __log_buf(&out, "\n");
+ json_print_object(root, &out);
+ __log_buf(&out, "\n");
+ log_info_buf(out.buf, out.buflen);
+
+ buf_output_free(&out);
+
json_free_object(root);
root = NULL;
+ job_opt_object = NULL;
clients_array = NULL;
du_array = NULL;
}
if (--client->refs)
return;
+ log_info_buf(client->buf.buf, client->buf.buflen);
+ buf_output_free(&client->buf);
+
free(client->hostname);
if (client->argv)
free(client->argv);
}
}
-struct fio_client *fio_client_add_explicit(struct client_ops *ops,
- const char *hostname, int type,
- int port)
+static struct fio_client *get_new_client(void)
{
struct fio_client *client;
INIT_FLIST_HEAD(&client->eta_list);
INIT_FLIST_HEAD(&client->cmd_list);
+ buf_output_init(&client->buf);
+
+ return client;
+}
+
+struct fio_client *fio_client_add_explicit(struct client_ops *ops,
+ const char *hostname, int type,
+ int port)
+{
+ struct fio_client *client;
+
+ client = get_new_client();
+
client->hostname = strdup(hostname);
if (type == Fio_client_socket)
}
}
- client = malloc(sizeof(*client));
- memset(client, 0, sizeof(*client));
-
- INIT_FLIST_HEAD(&client->list);
- INIT_FLIST_HEAD(&client->hash_list);
- INIT_FLIST_HEAD(&client->arg_list);
- INIT_FLIST_HEAD(&client->eta_list);
- INIT_FLIST_HEAD(&client->cmd_list);
+ client = get_new_client();
if (fio_server_parse_string(hostname, &client->hostname,
&client->is_sock, &client->port,
struct flist_head *opt_list = NULL;
struct json_object *tsobj;
- if (output_format & FIO_OUTPUT_TERSE)
- return;
-
if (client->opt_lists && p->ts.thread_number <= client->jobs)
opt_list = &client->opt_lists[p->ts.thread_number - 1];
- tsobj = show_thread_status(&p->ts, &p->rs, opt_list, NULL);
+ tsobj = show_thread_status(&p->ts, &p->rs, opt_list, &client->buf);
client->did_stat = true;
if (tsobj) {
json_object_add_client_info(tsobj, client);
if (++sum_stat_nr == sum_stat_clients) {
strcpy(client_ts.name, "All clients");
- tsobj = show_thread_status(&client_ts, &client_gs, NULL, NULL);
+ tsobj = show_thread_status(&client_ts, &client_gs, NULL, &client->buf);
if (tsobj) {
json_object_add_client_info(tsobj, client);
json_array_add_value_object(clients_array, tsobj);
{
struct group_run_stats *gs = (struct group_run_stats *) cmd->payload;
- if (output_format & FIO_OUTPUT_TERSE)
- return;
-
if (output_format & FIO_OUTPUT_NORMAL)
- show_group_stats(gs, NULL);
+ show_group_stats(gs, &client->buf);
}
static void handle_job_opt(struct fio_client *client, struct fio_net_cmd *cmd)
const char *buf = (const char *) pdu->buf;
const char *name;
int fio_unused ret;
+ struct buf_output out;
+
+ buf_output_init(&out);
name = client->name ? client->name : client->hostname;
if (!client->skip_newline && !(output_format & FIO_OUTPUT_TERSE))
- fprintf(f_out, "<%s> ", name);
- ret = fwrite(buf, pdu->buf_len, 1, f_out);
- fflush(f_out);
+ __log_buf(&out, "<%s> ", name);
+ __log_buf(&out, "%s", buf);
+ log_info_buf(out.buf, out.buflen);
+ buf_output_free(&out);
client->skip_newline = strchr(buf, '\n') == NULL;
}
{
struct cmd_du_pdu *du = (struct cmd_du_pdu *) cmd->payload;
- if (output_format & FIO_OUTPUT_TERSE)
- return;
-
- if (!client->disk_stats_shown) {
+ if (!client->disk_stats_shown)
client->disk_stats_shown = true;
- if (!(output_format & FIO_OUTPUT_JSON))
- log_info("\nDisk stats (read/write):\n");
- }
if (output_format & FIO_OUTPUT_JSON) {
struct json_object *duobj;
+
json_array_add_disk_util(&du->dus, &du->agg, du_array);
duobj = json_array_last_value_object(du_array);
json_object_add_client_info(duobj, client);
+ } else if (output_format & FIO_OUTPUT_TERSE)
+ print_disk_util(&du->dus, &du->agg, 1, &client->buf);
+ else if (output_format & FIO_OUTPUT_NORMAL) {
+ __log_buf(&client->buf, "\nDisk stats (read/write):\n");
+ print_disk_util(&du->dus, &du->agg, 0, &client->buf);
}
- if (output_format & FIO_OUTPUT_NORMAL)
- print_disk_util(&du->dus, &du->agg, 0, NULL);
}
static void convert_jobs_eta(struct jobs_eta *je)
const char *os, *arch;
char bit[16];
- if (output_format & FIO_OUTPUT_TERSE)
- return;
-
os = fio_get_os_string(probe->os);
if (!os)
os = "unknown";
sprintf(bit, "%d-bit", probe->bpp * 8);
probe->flags = le64_to_cpu(probe->flags);
- if (!(output_format & FIO_OUTPUT_JSON))
+ if (output_format & FIO_OUTPUT_NORMAL) {
log_info("hostname=%s, be=%u, %s, os=%s, arch=%s, fio=%s, flags=%lx\n",
probe->hostname, probe->bigendian, bit, os, arch,
probe->fio_version, (unsigned long) probe->flags);
+ }
if (!client->name)
client->name = strdup((char *) probe->hostname);
struct client_file *files;
unsigned int nr_files;
+
+ struct buf_output buf;
};
typedef void (client_cmd_op)(struct fio_client *, struct fio_net_cmd *);
output_sym "CONFIG_WINDOWSAIO"
# We now take the regular configuration path without having exit 0 here.
# Flags below are still necessary mostly for MinGW.
+ build_static="yes"
socklen_t="yes"
rusage_thread="yes"
fdatasync="yes"
fi
print_config "GUASI" "$guasi"
-##########################################
-# fusion-aw probe
-if test "$fusion_aw" != "yes" ; then
- fusion_aw="no"
-fi
-cat > $TMPC << EOF
-#include <nvm/nvm_primitives.h>
-int main(int argc, char **argv)
-{
- nvm_version_t ver_info;
- nvm_handle_t handle;
-
- handle = nvm_get_handle(0, &ver_info);
- return nvm_atomic_write(handle, 0, 0, 0);
-}
-EOF
-if compile_prog "" "-L/usr/lib/fio -L/usr/lib/nvm -lnvm-primitives -ldl -lpthread" "fusion-aw"; then
- LIBS="-L/usr/lib/fio -L/usr/lib/nvm -lnvm-primitives -ldl -lpthread $LIBS"
- fusion_aw="yes"
-fi
-print_config "Fusion-io atomic engine" "$fusion_aw"
-
##########################################
# libnuma probe
if test "$libnuma" != "yes" ; then
if test "$guasi" = "yes" ; then
output_sym "CONFIG_GUASI"
fi
-if test "$fusion_aw" = "yes" ; then
- output_sym "CONFIG_FUSION_AW"
-fi
if test "$libnuma_v2" = "yes" ; then
output_sym "CONFIG_LIBNUMA"
fi
*/
o->thinktime_blocks = 1;
o->thinktime_spin = 0;
- o->thinktime = (co->cpucycle * (100 - co->cpuload)) / co->cpuload;
+ o->thinktime = ((unsigned long long) co->cpucycle * (100 - co->cpuload)) / co->cpuload;
o->nr_files = o->open_files = 1;
+++ /dev/null
-/*
- * Custom fio(1) engine that submits synchronous atomic writes to file.
- *
- * Copyright (C) 2013 Fusion-io, Inc.
- * Author: Santhosh Kumar Koundinya (skoundinya@fusionio.com).
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; under version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License version
- * 2 for more details.
- *
- * You should have received a copy of the GNU General Public License Version 2
- * along with this program; if not see <http://www.gnu.org/licenses/>
- */
-
-#include <stdlib.h>
-#include <stdint.h>
-
-#include "../fio.h"
-
-#include <nvm/nvm_primitives.h>
-
-#define NUM_ATOMIC_CAPABILITIES (5)
-
-struct fas_data {
- nvm_handle_t nvm_handle;
- size_t xfer_buf_align;
- size_t xfer_buflen_align;
- size_t xfer_buflen_max;
- size_t sector_size;
-};
-
-static enum fio_q_status queue(struct thread_data *td, struct io_u *io_u)
-{
- struct fas_data *d = FILE_ENG_DATA(io_u->file);
- int rc;
-
- if (io_u->ddir != DDIR_WRITE) {
- td_vmsg(td, EINVAL, "only writes supported", "io_u->ddir");
- rc = -EINVAL;
- goto out;
- }
-
- if ((size_t) io_u->xfer_buf % d->xfer_buf_align) {
- td_vmsg(td, EINVAL, "unaligned data buffer", "io_u->xfer_buf");
- rc = -EINVAL;
- goto out;
- }
-
- if (io_u->xfer_buflen % d->xfer_buflen_align) {
- td_vmsg(td, EINVAL, "unaligned data size", "io_u->xfer_buflen");
- rc = -EINVAL;
- goto out;
- }
-
- if (io_u->xfer_buflen > d->xfer_buflen_max) {
- td_vmsg(td, EINVAL, "data too big", "io_u->xfer_buflen");
- rc = -EINVAL;
- goto out;
- }
-
- rc = nvm_atomic_write(d->nvm_handle, (uint64_t) io_u->xfer_buf,
- io_u->xfer_buflen, io_u->offset / d->sector_size);
- if (rc == -1) {
- td_verror(td, errno, "nvm_atomic_write");
- rc = -errno;
- goto out;
- }
- rc = FIO_Q_COMPLETED;
-out:
- if (rc < 0)
- io_u->error = -rc;
-
- return rc;
-}
-
-static int open_file(struct thread_data *td, struct fio_file *f)
-{
- int rc;
- int fio_unused close_file_rc;
- struct fas_data *d;
- nvm_version_t nvm_version;
- nvm_capability_t nvm_capability[NUM_ATOMIC_CAPABILITIES];
-
-
- d = malloc(sizeof(*d));
- if (!d) {
- td_verror(td, ENOMEM, "malloc");
- rc = ENOMEM;
- goto error;
- }
- d->nvm_handle = -1;
- FILE_SET_ENG_DATA(f, d);
-
- rc = generic_open_file(td, f);
-
- if (rc)
- goto free_engine_data;
-
- /* Set the version of the library as seen when engine is compiled */
- nvm_version.major = NVM_PRIMITIVES_API_MAJOR;
- nvm_version.minor = NVM_PRIMITIVES_API_MINOR;
- nvm_version.micro = NVM_PRIMITIVES_API_MICRO;
-
- d->nvm_handle = nvm_get_handle(f->fd, &nvm_version);
- if (d->nvm_handle == -1) {
- td_vmsg(td, errno, "nvm_get_handle failed", "nvm_get_handle");
- rc = errno;
- goto close_file;
- }
-
- nvm_capability[0].cap_id = NVM_CAP_ATOMIC_WRITE_START_ALIGN_ID;
- nvm_capability[1].cap_id = NVM_CAP_ATOMIC_WRITE_MULTIPLICITY_ID;
- nvm_capability[2].cap_id = NVM_CAP_ATOMIC_WRITE_MAX_VECTOR_SIZE_ID;
- nvm_capability[3].cap_id = NVM_CAP_SECTOR_SIZE_ID;
- nvm_capability[4].cap_id = NVM_CAP_ATOMIC_MAX_IOV_ID;
- rc = nvm_get_capabilities(d->nvm_handle, nvm_capability,
- NUM_ATOMIC_CAPABILITIES, false);
- if (rc == -1) {
- td_vmsg(td, errno, "error in getting atomic write capabilities", "nvm_get_capabilities");
- rc = errno;
- goto close_file;
- } else if (rc < NUM_ATOMIC_CAPABILITIES) {
- td_vmsg(td, EINVAL, "couldn't get all the atomic write capabilities" , "nvm_get_capabilities");
- rc = ECANCELED;
- goto close_file;
- }
- /* Reset rc to 0 because we got all capabilities we needed */
- rc = 0;
- d->xfer_buf_align = nvm_capability[0].cap_value;
- d->xfer_buflen_align = nvm_capability[1].cap_value;
- d->xfer_buflen_max = d->xfer_buflen_align * nvm_capability[2].cap_value * nvm_capability[4].cap_value;
- d->sector_size = nvm_capability[3].cap_value;
-
-out:
- return rc;
-close_file:
- close_file_rc = generic_close_file(td, f);
-free_engine_data:
- free(d);
-error:
- f->fd = -1;
- FILE_SET_ENG_DATA(f, NULL);
- goto out;
-}
-
-static int close_file(struct thread_data *td, struct fio_file *f)
-{
- struct fas_data *d = FILE_ENG_DATA(f);
-
- if (d) {
- if (d->nvm_handle != -1)
- nvm_release_handle(d->nvm_handle);
- free(d);
- FILE_SET_ENG_DATA(f, NULL);
- }
-
- return generic_close_file(td, f);
-}
-
-static struct ioengine_ops ioengine = {
- .name = "fusion-aw-sync",
- .version = FIO_IOOPS_VERSION,
- .queue = queue,
- .open_file = open_file,
- .close_file = close_file,
- .get_file_size = generic_get_file_size,
- .flags = FIO_SYNCIO | FIO_RAWIO | FIO_MEMALIGN
-};
-
-static void fio_init fio_fusion_aw_init(void)
-{
- register_ioengine(&ioengine);
-}
-
-static void fio_exit fio_fusion_aw_exit(void)
-{
- unregister_ioengine(&ioengine);
-}
ctx = HMAC_CTX_new();
#else
ctx = &_ctx;
+ /* work-around crash in certain versions of libssl */
+ HMAC_CTX_init(ctx);
#endif
HMAC_Init_ex(ctx, key, key_len, EVP_sha256(), NULL);
HMAC_Update(ctx, (unsigned char*)data, strlen(data));
struct rados_data {
rados_t cluster;
rados_ioctx_t io_ctx;
- char **objects;
- size_t object_count;
struct io_u **aio_events;
bool connected;
};
rados->aio_events = calloc(td->o.iodepth, sizeof(struct io_u *));
if (!rados->aio_events)
goto failed;
-
- rados->object_count = td->o.nr_files;
- rados->objects = calloc(rados->object_count, sizeof(char*));
- if (!rados->objects)
- goto failed;
-
*rados_data_ptr = rados;
return 0;
failed:
if (rados) {
- rados->object_count = 0;
if (rados->aio_events)
free(rados->aio_events);
free(rados);
return 1;
}
-static void _fio_rados_rm_objects(struct rados_data *rados)
+static void _fio_rados_rm_objects(struct thread_data *td, struct rados_data *rados)
{
size_t i;
- for (i = 0; i < rados->object_count; ++i) {
- if (rados->objects[i]) {
- rados_remove(rados->io_ctx, rados->objects[i]);
- free(rados->objects[i]);
- rados->objects[i] = NULL;
- }
+ for (i = 0; i < td->o.nr_files; i++) {
+ struct fio_file *f = td->files[i];
+ rados_remove(rados->io_ctx, f->file_name);
}
}
td->o.size / (td->o.nr_files ? td->o.nr_files : 1u);
struct fio_file *f;
uint32_t i;
- size_t oname_len = 0;
if (o->cluster_name) {
char *client_name = NULL;
} else
r = rados_create(&rados->cluster, o->client_name);
+ if (o->pool_name == NULL) {
+ log_err("rados pool name must be provided.\n");
+ goto failed_early;
+ }
+
if (r < 0) {
log_err("rados_create failed.\n");
goto failed_early;
goto failed_shutdown;
}
- for (i = 0; i < rados->object_count; i++) {
+ for (i = 0; i < td->o.nr_files; i++) {
f = td->files[i];
f->real_file_size = file_size;
- f->engine_pos = i;
-
- oname_len = strlen(f->file_name) + 32;
- rados->objects[i] = malloc(oname_len);
- /* vary objects for different jobs */
- snprintf(rados->objects[i], oname_len - 1,
- "fio_rados_bench.%s.%x",
- f->file_name, td->thread_number);
- r = rados_write(rados->io_ctx, rados->objects[i], "", 0, 0);
+ r = rados_write(rados->io_ctx, f->file_name, "", 0, 0);
if (r < 0) {
- free(rados->objects[i]);
- rados->objects[i] = NULL;
- log_err("error creating object.\n");
goto failed_obj_create;
}
}
-
- return 0;
+ return 0;
failed_obj_create:
- _fio_rados_rm_objects(rados);
+ _fio_rados_rm_objects(td, rados);
rados_ioctx_destroy(rados->io_ctx);
rados->io_ctx = NULL;
failed_shutdown:
if (!rados)
return;
- _fio_rados_rm_objects(rados);
-
if (rados->io_ctx) {
rados_ioctx_destroy(rados->io_ctx);
rados->io_ctx = NULL;
struct rados_data *rados = td->io_ops_data;
if (rados) {
+ _fio_rados_rm_objects(td, rados);
_fio_rados_disconnect(rados);
- free(rados->objects);
free(rados->aio_events);
free(rados);
}
{
struct rados_data *rados = td->io_ops_data;
struct fio_rados_iou *fri = io_u->engine_data;
- char *object = rados->objects[io_u->file->engine_pos];
+ char *object = io_u->file->file_name;
int r = -1;
fio_ro_check(td, io_u);
bytes_total = td->fill_device_size;
}
- if (td->o.zone_size && td->o.zone_skip && bytes_total) {
+ /*
+ * If io_size is set, bytes_total is an exact value that does not need
+ * adjustment.
+ */
+ if (td->o.zone_size && td->o.zone_skip && bytes_total &&
+ !fio_option_is_set(&td->o, io_size)) {
unsigned int nr_zones;
uint64_t zone_bytes;
- zone_bytes = bytes_total + td->o.zone_size + td->o.zone_skip;
- nr_zones = (zone_bytes - 1) / (td->o.zone_size + td->o.zone_skip);
+ /*
+ * Calculate the upper bound of the number of zones that will
+ * be processed, including skipped bytes between zones. If this
+ * is larger than total_io_size (e.g. when --io_size or --size
+ * specify a small value), use the lower bound to avoid
+ * adjustments to a negative value that would result in a very
+ * large bytes_total and an incorrect eta.
+ */
+ zone_bytes = td->o.zone_size + td->o.zone_skip;
+ nr_zones = (bytes_total + zone_bytes - 1) / zone_bytes;
+ if (bytes_total < nr_zones * td->o.zone_skip)
+ nr_zones = bytes_total / zone_bytes;
bytes_total -= nr_zones * td->o.zone_skip;
}
--- /dev/null
+# Example of how to split a drive up into sections, manually, and perform
+# verify from a bunch of jobs. This example is special in that it assumes
+# the drive is at around 30 * 124G in size, so with the below settings, we'll
+# cover most of the drive. It's also special in that it doesn't write
+# everything, it just writes 16k at a specific boundary, for every 128k.
+# This is done to exercise the split path for Intel NVMe devices, most of
+# which have a 128k stripe size and require IOs to be split if the cross
+# the stripe boundary.
+#
+[global]
+bs=16k
+direct=1
+rw=write:112k
+verify=crc32c
+filename=/dev/nvme0n1
+verify_backlog=1
+offset_increment=124g
+io_size=120g
+offset=120k
+group_reporting=1
+verify_dump=1
+loops=2
+
+[write-verify]
+numjobs=30
--- /dev/null
+; fio-rand-RW.job for fiotest
+
+[global]
+name=fio-rand-RW
+filename=fio-rand-RW
+rw=randrw
+rwmixread=60
+rwmixwrite=40
+bs=4K
+direct=0
+numjobs=4
+time_based=1
+runtime=900
+
+[file1]
+size=10G
+ioengine=libaio
+iodepth=16
+++ /dev/null
-; fio-rand-RW.job for fiotest
-
-[global]
-name=fio-rand-RW
-filename=fio-rand-RW
-rw=randrw
-rwmixread=60
-rwmixwrite=40
-bs=4K
-direct=0
-numjobs=4
-time_based=1
-runtime=900
-
-[file1]
-size=10G
-ioengine=libaio
-iodepth=16
--- /dev/null
+; fio-rand-read.job for fiotest
+
+[global]
+name=fio-rand-read
+filename=fio-rand-read
+rw=randread
+bs=4K
+direct=0
+numjobs=1
+time_based=1
+runtime=900
+
+[file1]
+size=10G
+ioengine=libaio
+iodepth=16
+++ /dev/null
-; fio-rand-read.job for fiotest
-
-[global]
-name=fio-rand-read
-filename=fio-rand-read
-rw=randread
-bs=4K
-direct=0
-numjobs=1
-time_based=1
-runtime=900
-
-[file1]
-size=10G
-ioengine=libaio
-iodepth=16
--- /dev/null
+; fio-rand-write.job for fiotest
+
+[global]
+name=fio-rand-write
+filename=fio-rand-write
+rw=randwrite
+bs=4K
+direct=0
+numjobs=4
+time_based=1
+runtime=900
+
+[file1]
+size=10G
+ioengine=libaio
+iodepth=16
+++ /dev/null
-; fio-rand-write.job for fiotest
-
-[global]
-name=fio-rand-write
-filename=fio-rand-write
-rw=randwrite
-bs=4K
-direct=0
-numjobs=4
-time_based=1
-runtime=900
-
-[file1]
-size=10G
-ioengine=libaio
-iodepth=16
--- /dev/null
+; fio-seq-RW.job for fiotest
+
+[global]
+name=fio-seq-RW
+filename=fio-seq-RW
+rw=rw
+rwmixread=60
+rwmixwrite=40
+bs=256K
+direct=0
+numjobs=4
+time_based=1
+runtime=900
+
+[file1]
+size=10G
+ioengine=libaio
+iodepth=16
+++ /dev/null
-; fio-seq-RW.job for fiotest
-
-[global]
-name=fio-seq-RW
-filename=fio-seq-RW
-rw=rw
-rwmixread=60
-rwmixwrite=40
-bs=256K
-direct=0
-numjobs=4
-time_based=1
-runtime=900
-
-[file1]
-size=10G
-ioengine=libaio
-iodepth=16
--- /dev/null
+[global]
+name=fio-seq-reads
+filename=fio-seq-reads
+rw=read
+bs=256K
+direct=1
+numjobs=1
+time_based=1
+runtime=900
+
+[file1]
+size=10G
+ioengine=libaio
+iodepth=16
+++ /dev/null
-[global]
-name=fio-seq-reads
-filename=fio-seq-reads
-rw=read
-bs=256K
-direct=1
-numjobs=1
-time_based=1
-runtime=900
-
-[file1]
-size=10G
-ioengine=libaio
-iodepth=16
--- /dev/null
+; fio-seq-write.job for fiotest
+
+[global]
+name=fio-seq-write
+filename=fio-seq-write
+rw=write
+bs=256K
+direct=0
+numjobs=1
+time_based=1
+runtime=900
+
+[file1]
+size=10G
+ioengine=libaio
+iodepth=16
+++ /dev/null
-; fio-seq-write.job for fiotest
-
-[global]
-name=fio-seq-write
-filename=fio-seq-write
-rw=write
-bs=256K
-direct=0
-numjobs=1
-time_based=1
-runtime=900
-
-[file1]
-size=10G
-ioengine=libaio
-iodepth=16
+++ /dev/null
-# Example Job File that randomly writes 8k worth of data atomically for
-# 60 seconds.
-[rw_aw_file_sync]
-rw=randwrite
-ioengine=fusion-aw-sync
-blocksize=8k
-blockalign=8k
-
-# if file system supports atomic write
-filename=/mnt/fs/file
-# or test on a direct block device instead
-#filename=/dev/fioa
-randrepeat=1
-fallocate=none
-direct=1
-invalidate=0
-runtime=60
-time_based
*/
unsigned int major, minor;
int fileno;
- unsigned long long bs;
char *file_name;
/*
{
unsigned long long ret, sized;
uint64_t frand_max;
- unsigned long r;
+ uint64_t r;
frand_max = rand_max(&td->file_size_state);
r = __rand(&td->file_size_state);
static void __init_rand_distribution(struct thread_data *td, struct fio_file *f)
{
unsigned int range_size, seed;
- unsigned long nranges;
+ uint64_t nranges;
uint64_t fsize;
range_size = min(td->o.min_bs[DDIR_READ], td->o.min_bs[DDIR_WRITE]);
fsize = min(f->real_file_size, f->io_size);
- nranges = (fsize + range_size - 1) / range_size;
+ nranges = (fsize + range_size - 1ULL) / range_size;
seed = jhash(f->file_name, strlen(f->file_name), 0) * td->thread_number;
if (!td->o.rand_repeatable)
.BI \-\-parse\-only
Parse options only, don't start any I/O.
.TP
+.BI \-\-merge\-blktrace\-only
+Merge blktraces only, don't start any I/O.
+.TP
.BI \-\-output \fR=\fPfilename
Write output to \fIfilename\fR.
.TP
\fBserialize_overlap\fR tells fio to avoid provoking this behavior by explicitly
serializing in-flight I/Os that have a non-zero overlap. Note that setting
this option can reduce both performance and the \fBiodepth\fR achieved.
-Additionally this option does not work when \fBio_submit_mode\fR is set to
-offload. Default: false.
+.RS
+.P
+This option only applies to I/Os issued for a single job except when it is
+enabled along with \fBio_submit_mode\fR=offload. In offload mode, fio
+will check for overlap among all I/Os submitted by offload jobs with \fBserialize_overlap\fR
+enabled. Threads must be used for all such jobs.
+.P
+Default: false.
+.RE
.TP
.BI io_submit_mode \fR=\fPstr
This option controls how fio submits the I/O to the I/O engine. The default
be read at once. If selected true, input from iolog will be read gradually.
Useful when iolog is very large, or it is generated.
.TP
+.BI merge_blktrace_file \fR=\fPstr
+When specified, rather than replaying the logs passed to \fBread_iolog\fR,
+the logs go through a merge phase which aggregates them into a single blktrace.
+The resulting file is then passed on as the \fBread_iolog\fR parameter. The
+intention here is to make the order of events consistent. This limits the
+influence of the scheduler compared to replaying multiple blktraces via
+concurrent jobs.
+.TP
+.BI merge_blktrace_scalars \fR=\fPfloat_list
+This is a percentage based option that is index paired with the list of files
+passed to \fBread_iolog\fR. When merging is performed, scale the time of each
+event by the corresponding amount. For example,
+`\-\-merge_blktrace_scalars="50:100"' runs the first trace in halftime and the
+second trace in realtime. This knob is separately tunable from
+\fBreplay_time_scale\fR which scales the trace during runtime and will not
+change the output of the merge unlike this option.
+.TP
+.BI merge_blktrace_iters \fR=\fPfloat_list
+This is a whole number option that is index paired with the list of files
+passed to \fBread_iolog\fR. When merging is performed, run each trace for
+the specified number of iterations. For example,
+`\-\-merge_blktrace_iters="2:1"' runs the first trace for two iterations
+and the second trace for one iteration.
+.TP
.BI replay_no_stall \fR=\fPbool
When replaying I/O with \fBread_iolog\fR the default behavior is to
attempt to respect the timestamps within the log and replay them with the
device accesses.
.TP
.BI replay_align \fR=\fPint
-Force alignment of I/O offsets and lengths in a trace to this power of 2
-value.
+Force alignment of the byte offsets in a trace to this value. The value
+must be a power of 2.
.TP
.BI replay_scale \fR=\fPint
-Scale sector offsets down by this factor when replaying traces.
+Scale bye offsets down by this factor when replaying traces. Should most
+likely use \fBreplay_align\fR as well.
.SS "Threads, processes and job synchronization"
.TP
.BI replay_skip \fR=\fPstr
data from the rolling collection window. Threshold limits can be expressed
as a fixed value or as a percentage of the mean in the collection window.
.RS
+.P
+When using this feature, most jobs should include the \fBtime_based\fR
+and \fBruntime\fR options or the \fBloops\fR option so that fio does not
+stop running after it has covered the full size of the specified file(s)
+or device(s).
+.RS
.RS
.TP
.B iops
Trim the given file from the given `offset' for `length' bytes.
.RE
.RE
+.SH I/O REPLAY \- MERGING TRACES
+Colocation is a common practice used to get the most out of a machine.
+Knowing which workloads play nicely with each other and which ones don't is
+a much harder task. While fio can replay workloads concurrently via multiple
+jobs, it leaves some variability up to the scheduler making results harder to
+reproduce. Merging is a way to make the order of events consistent.
+.P
+Merging is integrated into I/O replay and done when a \fBmerge_blktrace_file\fR
+is specified. The list of files passed to \fBread_iolog\fR go through the merge
+process and output a single file stored to the specified file. The output file is
+passed on as if it were the only file passed to \fBread_iolog\fR. An example would
+look like:
+.RS
+.P
+$ fio \-\-read_iolog="<file1>:<file2>" \-\-merge_blktrace_file="<output_file>"
+.RE
+.P
+Creating only the merged file can be done by passing the command line argument
+\fBmerge-blktrace-only\fR.
+.P
+Scaling traces can be done to see the relative impact of any particular trace
+being slowed down or sped up. \fBmerge_blktrace_scalars\fR takes in a colon
+separated list of percentage scalars. It is index paired with the files passed
+to \fBread_iolog\fR.
+.P
+With scaling, it may be desirable to match the running time of all traces.
+This can be done with \fBmerge_blktrace_iters\fR. It is index paired with
+\fBread_iolog\fR just like \fBmerge_blktrace_scalars\fR.
+.P
+In an example, given two traces, A and B, each 60s long. If we want to see
+the impact of trace A issuing IOs twice as fast and repeat trace A over the
+runtime of trace B, the following can be done:
+.RS
+.P
+$ fio \-\-read_iolog="<trace_a>:"<trace_b>" \-\-merge_blktrace_file"<output_file>" \-\-merge_blktrace_scalars="50:100" \-\-merge_blktrace_iters="2:1"
+.RE
+.P
+This runs trace A at 2x the speed twice for approximately the same runtime as
+a single run of trace B.
.SH CPU IDLENESS PROFILING
In some cases, we want to understand CPU overhead in a test. For example, we
test patches for the specific goodness of whether they reduce CPU usage.
/*
* "local" is pseudo-policy
*/
-#define MPOL_LOCAL MPOL_MAX
+#ifndef MPOL_LOCAL
+#define MPOL_LOCAL 4
+#endif
#endif
#ifdef CONFIG_CUDA
#define __fio_stringify_1(x) #x
#define __fio_stringify(x) __fio_stringify_1(x)
-extern int exitall_on_terminate;
+extern bool exitall_on_terminate;
extern unsigned int thread_number;
extern unsigned int stat_number;
extern int shm_id;
extern int append_terse_output;
extern int temp_stall_ts;
extern uintptr_t page_mask, page_size;
-extern int read_only;
+extern bool read_only;
extern int eta_print;
extern int eta_new_line;
extern unsigned int eta_interval_msec;
extern int fio_clock_source_set;
extern int warnings_fatal;
extern int terse_version;
-extern int is_backend;
-extern int is_local_backend;
+extern bool is_backend;
+extern bool is_local_backend;
extern int nr_clients;
-extern int log_syslog;
+extern bool log_syslog;
extern int status_interval;
extern const char fio_version_string[];
extern char *trigger_file;
extern void exec_trigger(const char *);
extern void check_trigger_file(void);
+extern bool in_flight_overlap(struct io_u_queue *q, struct io_u *io_u);
+extern pthread_mutex_t overlap_check;
+
#endif
c_s = get_cpu_clock();
do {
__fio_gettime(&e);
+ c_e = get_cpu_clock();
elapsed = utime_since(&s, &e);
- if (elapsed >= 1280) {
- c_e = get_cpu_clock();
+ if (elapsed >= 1280)
break;
- }
} while (1);
fio_clock_source = old_cs;
#include "gclient.h"
#include "graph.h"
-static int gfio_server_running;
+static bool gfio_server_running;
static unsigned int gfio_graph_limit = 100;
GdkColor gfio_color_white;
static void *server_thread(void *arg)
{
fio_server_create_sk_key();
- is_backend = 1;
- gfio_server_running = 1;
+ is_backend = true;
+ gfio_server_running = true;
fio_start_server(NULL);
- gfio_server_running = 0;
+ gfio_server_running = false;
fio_server_destroy_sk_key();
return NULL;
}
static void gfio_start_server(struct gui *ui)
{
if (!gfio_server_running) {
- gfio_server_running = 1;
+ gfio_server_running = true;
pthread_create(&ui->server_t, NULL, server_thread, NULL);
pthread_detach(ui->server_t);
}
#include "idletime.h"
#include "filelock.h"
#include "steadystate.h"
+#include "blktrace.h"
#include "oslib/getopt.h"
#include "oslib/strcasestr.h"
static char **ini_file;
static int max_jobs = FIO_MAX_JOBS;
-static int dump_cmdline;
-static int parse_only;
+static bool dump_cmdline;
+static bool parse_only;
+static bool merge_blktrace_only;
static struct thread_data def_thread;
struct thread_data *threads = NULL;
static char **job_sections;
static int nr_job_sections;
-int exitall_on_terminate = 0;
+bool exitall_on_terminate = false;
int output_format = FIO_OUTPUT_NORMAL;
int eta_print = FIO_ETA_AUTO;
unsigned int eta_interval_msec = 1000;
char *exec_profile = NULL;
int warnings_fatal = 0;
int terse_version = 3;
-int is_backend = 0;
-int is_local_backend = 0;
+bool is_backend = false;
+bool is_local_backend = false;
int nr_clients = 0;
-int log_syslog = 0;
+bool log_syslog = false;
-int write_bw_log = 0;
-int read_only = 0;
+bool write_bw_log = false;
+bool read_only = false;
int status_interval = 0;
char *trigger_file = NULL;
.has_arg = required_argument,
.val = 'K',
},
+ {
+ .name = (char *) "merge-blktrace-only",
+ .has_arg = no_argument,
+ .val = 'A' | FIO_CLIENT_FLAG,
+ },
{
.name = NULL,
},
/*
* There's no need to check for in-flight overlapping IOs if the job
* isn't changing data or the maximum iodepth is guaranteed to be 1
+ * when we are not in offload mode
*/
if (o->serialize_overlap && !(td->flags & TD_F_READ_IOLOG) &&
- (!(td_write(td) || td_trim(td)) || o->iodepth == 1))
+ (!(td_write(td) || td_trim(td)) || o->iodepth == 1) &&
+ o->io_submit_mode != IO_MODE_OFFLOAD)
o->serialize_overlap = 0;
- /*
- * Currently can't check for overlaps in offload mode
- */
- if (o->serialize_overlap && o->io_submit_mode == IO_MODE_OFFLOAD) {
- log_err("fio: checking for in-flight overlaps when the "
- "io_submit_mode is offload is not supported\n");
- o->serialize_overlap = 0;
- ret |= warnings_fatal;
- }
if (o->nr_files > td->files_index)
o->nr_files = td->files_index;
char *c1, *c2, *c3, *c4;
char *c5 = NULL, *c6 = NULL;
int i2p = is_power_of_2(o->kb_base);
+ struct buf_output out;
c1 = num2str(o->min_bs[DDIR_READ], o->sig_figs, 1, i2p, N2S_BYTE);
c2 = num2str(o->max_bs[DDIR_READ], o->sig_figs, 1, i2p, N2S_BYTE);
c6 = num2str(o->max_bs[DDIR_TRIM], o->sig_figs, 1, i2p, N2S_BYTE);
}
- log_info("%s: (g=%d): rw=%s, ", td->o.name,
+ buf_output_init(&out);
+ __log_buf(&out, "%s: (g=%d): rw=%s, ", td->o.name,
td->groupid,
ddir_str(o->td_ddir));
if (o->bs_is_seq_rand)
- log_info("bs=(R) %s-%s, (W) %s-%s, bs_is_seq_rand, ",
+ __log_buf(&out, "bs=(R) %s-%s, (W) %s-%s, bs_is_seq_rand, ",
c1, c2, c3, c4);
else
- log_info("bs=(R) %s-%s, (W) %s-%s, (T) %s-%s, ",
+ __log_buf(&out, "bs=(R) %s-%s, (W) %s-%s, (T) %s-%s, ",
c1, c2, c3, c4, c5, c6);
- log_info("ioengine=%s, iodepth=%u\n",
+ __log_buf(&out, "ioengine=%s, iodepth=%u\n",
td->io_ops->name, o->iodepth);
+ log_info_buf(out.buf, out.buflen);
+ buf_output_free(&out);
free(c1);
free(c2);
if (td_steadystate_init(td))
goto err;
+ if (o->merge_blktrace_file && !merge_blktrace_iologs(td))
+ goto err;
+
+ if (merge_blktrace_only) {
+ put_job(td);
+ return 0;
+ }
+
/*
* recurse add identical jobs, clear numjobs and stonewall options
* as they don't apply to sub-jobs
printf(" --debug=options\tEnable debug logging. May be one/more of:\n");
show_debug_categories();
printf(" --parse-only\t\tParse options only, don't start any IO\n");
+ printf(" --merge-blktrace-only\tMerge blktraces only, don't start any IO\n");
printf(" --output\t\tWrite output to file\n");
printf(" --bandwidth-log\tGenerate aggregate bandwidth logs\n");
printf(" --minimal\t\tMinimal (terse) output\n");
char *ostr = cmd_optstr;
char *pid_file = NULL;
void *cur_client = NULL;
- int backend = 0;
+ bool backend = false;
/*
* Reset optind handling, since we may call this multiple times
exit_val = 1;
break;
case 'b':
- write_bw_log = 1;
+ write_bw_log = true;
break;
case 'o': {
FILE *tmp;
break;
case 's':
did_arg = true;
- dump_cmdline = 1;
+ dump_cmdline = true;
break;
case 'r':
read_only = 1;
break;
case 'P':
did_arg = true;
- parse_only = 1;
+ parse_only = true;
break;
case 'x': {
size_t new_size;
}
if (optarg)
fio_server_set_arg(optarg);
- is_backend = 1;
- backend = 1;
+ is_backend = true;
+ backend = true;
#else
log_err("fio: client/server requires SHM support\n");
do_exit++;
}
trigger_timeout /= 1000000;
break;
+
+ case 'A':
+ did_arg = true;
+ merge_blktrace_only = true;
+ break;
case '?':
log_err("%s: unrecognized option '%s'\n", argv[0],
argv[optind - 1]);
assert((io_u->flags & IO_U_F_FLIGHT) == 0);
io_u_set(td, io_u, IO_U_F_FLIGHT);
+ if (td->o.serialize_overlap && td->o.io_submit_mode == IO_MODE_OFFLOAD)
+ pthread_mutex_unlock(&overlap_check);
assert(fio_file_open(io_u->file));
static bool is_socket(const char *path)
{
struct stat buf;
- int r = stat(path, &buf);
+ int r;
+
+ r = stat(path, &buf);
if (r == -1)
return false;
static int open_socket(const char *path)
{
- int fd = socket(AF_UNIX, SOCK_STREAM, 0);
struct sockaddr_un addr;
+ int ret, fd;
+
+ fd = socket(AF_UNIX, SOCK_STREAM, 0);
if (fd < 0)
return fd;
+
addr.sun_family = AF_UNIX;
- strncpy(addr.sun_path, path, sizeof(addr.sun_path));
- if (connect(fd, (const struct sockaddr *)&addr, strlen(path) + sizeof(addr.sun_family)) == 0)
+ if (snprintf(addr.sun_path, sizeof(addr.sun_path), "%s", path) >=
+ sizeof(addr.sun_path)) {
+ log_err("%s: path name %s is too long for a Unix socket\n",
+ __func__, path);
+ }
+
+ ret = connect(fd, (const struct sockaddr *)&addr, strlen(path) + sizeof(addr.sun_family));
+ if (!ret)
return fd;
- else
- close(fd);
+
+ close(fd);
return -1;
}
*/
static bool init_iolog_read(struct thread_data *td)
{
- char buffer[256], *p;
+ char buffer[256], *p, *fname;
FILE *f = NULL;
- bool ret;
- char* fname = get_name_by_idx(td->o.read_iolog_file, td->subjob_number);
+
+ fname = get_name_by_idx(td->o.read_iolog_file, td->subjob_number);
dprint(FD_IO, "iolog: name=%s\n", fname);
if (is_socket(fname)) {
- int fd = open_socket(fname);
- if (fd >= 0) {
+ int fd;
+
+ fd = open_socket(fname);
+ if (fd >= 0)
f = fdopen(fd, "r");
- }
} else
f = fopen(fname, "r");
+
free(fname);
+
if (!f) {
perror("fopen read iolog");
return false;
fclose(f);
return false;
}
- td->io_log_rfile = f;
+
/*
* version 2 of the iolog stores a specific string as the
* first line, check for that
*/
if (!strncmp(iolog_ver2, buffer, strlen(iolog_ver2))) {
free_release_files(td);
- ret = read_iolog2(td);
- }
- else {
- log_err("fio: iolog version 1 is no longer supported\n");
- ret = false;
+ td->io_log_rfile = f;
+ return read_iolog2(td);
}
- return ret;
+ log_err("fio: iolog version 1 is no longer supported\n");
+ fclose(f);
+ return false;
}
/*
}
/* Allocate memory for a set that can store the numbers 0 .. @nr_bits - 1. */
-struct axmap *axmap_new(unsigned long nr_bits)
+struct axmap *axmap_new(uint64_t nr_bits)
{
struct axmap *axmap;
unsigned int i, levels;
for (i = 0; i < axmap->nr_levels; i++) {
struct axmap_level *al = &axmap->levels[i];
+ nr_bits = (nr_bits + BLOCKS_PER_UNIT - 1) >> UNIT_SHIFT;
+
al->level = i;
- al->map_size = (nr_bits + BLOCKS_PER_UNIT - 1) >> UNIT_SHIFT;
+ al->map_size = nr_bits;
al->map = malloc(al->map_size * sizeof(unsigned long));
if (!al->map)
goto free_levels;
- nr_bits = (nr_bits + BLOCKS_PER_UNIT - 1) >> UNIT_SHIFT;
}
axmap_reset(axmap);
* returns true.
*/
static bool axmap_handler(struct axmap *axmap, uint64_t bit_nr,
- bool (*func)(struct axmap_level *, unsigned long, unsigned int,
+ bool (*func)(struct axmap_level *, uint64_t, unsigned int,
void *), void *data)
{
struct axmap_level *al;
* returns true.
*/
static bool axmap_handler_topdown(struct axmap *axmap, uint64_t bit_nr,
- bool (*func)(struct axmap_level *, unsigned long, unsigned int, void *))
+ bool (*func)(struct axmap_level *, uint64_t, unsigned int, void *))
{
int i;
for (i = axmap->nr_levels - 1; i >= 0; i--) {
- unsigned long index = bit_nr >> (UNIT_SHIFT * i);
+ uint64_t index = bit_nr >> (UNIT_SHIFT * i);
unsigned long offset = index >> UNIT_SHIFT;
unsigned int bit = index & BLOCKS_PER_UNIT_MASK;
* the boundary of the element at offset @offset. Return the number of bits
* that have been set in @__data->set_bits if @al->level == 0.
*/
-static bool axmap_set_fn(struct axmap_level *al, unsigned long offset,
+static bool axmap_set_fn(struct axmap_level *al, uint64_t offset,
unsigned int bit, void *__data)
{
struct axmap_set_data *data = __data;
return set_bits;
}
-static bool axmap_isset_fn(struct axmap_level *al, unsigned long offset,
+static bool axmap_isset_fn(struct axmap_level *al, uint64_t offset,
unsigned int bit, void *unused)
{
- return (al->map[offset] & (1UL << bit)) != 0;
+ return (al->map[offset] & (1ULL << bit)) != 0;
}
bool axmap_isset(struct axmap *axmap, uint64_t bit_nr)
#include "types.h"
struct axmap;
-struct axmap *axmap_new(unsigned long nr_bits);
+struct axmap *axmap_new(uint64_t nr_bits);
void axmap_free(struct axmap *bm);
void axmap_set(struct axmap *axmap, uint64_t bit_nr);
#define __LFSR_NEXT(__fl, __v) \
__v = ((__v >> 1) | __fl->cached_bit) ^ \
- (((__v & 1UL) - 1UL) & __fl->xormask);
+ (((__v & 1ULL) - 1ULL) & __fl->xormask);
static inline void __lfsr_next(struct fio_lfsr *fl, unsigned int spin)
{
uint64_t xormask = 0;
for(i = 0; i < FIO_MAX_TAPS && taps[i] != 0; i++)
- xormask |= 1UL << (taps[i] - 1);
+ xormask |= 1ULL << (taps[i] - 1);
return xormask;
}
* take that into account.
*/
for (i = 3; i < 64; i++)
- if ((1UL << i) > size)
+ if ((1ULL << i) > size)
return lfsr_taps[i];
return NULL;
fl->max_val = nums - 1;
fl->xormask = lfsr_create_xormask(taps);
- fl->cached_bit = 1UL << (taps[0] - 1);
+ fl->cached_bit = 1ULL << (taps[0] - 1);
if (prepare_spin(fl, spin))
return 1;
[N2S_BYTEPERSEC]= "B/s",
[N2S_BITPERSEC] = "bit/s"
};
- const unsigned int thousand[] = { 1000, 1024 };
+ const unsigned int thousand = pow2 ? 1024 : 1000;
unsigned int modulo;
int post_index, carry = 0;
char tmp[32], fmt[32];
unitprefix = sistr;
for (post_index = 0; base > 1; post_index++)
- base /= thousand[!!pow2];
+ base /= thousand;
switch (units) {
case N2S_NONE:
* Divide by K/Ki until string length of num <= maxlen.
*/
modulo = -1U;
- while (post_index < sizeof(sistr)) {
+ while (post_index < ARRAY_SIZE(sistr)) {
sprintf(tmp, "%llu", (unsigned long long) num);
if (strlen(tmp) <= maxlen)
break;
- modulo = num % thousand[!!pow2];
- num /= thousand[!!pow2];
- carry = modulo >= thousand[!!pow2] / 2;
+ modulo = num % thousand;
+ num /= thousand;
+ carry = modulo >= thousand / 2;
post_index++;
}
* Fill in everything and return the result.
*/
assert(maxlen - strlen(tmp) - 1 > 0);
- assert(modulo < thousand[!!pow2]);
+ assert(modulo < thousand);
sprintf(fmt, "%%.%df", (int)(maxlen - strlen(tmp) - 1));
- sprintf(tmp, fmt, (double)modulo / (double)thousand[!!pow2]);
+ sprintf(tmp, fmt, (double)modulo / (double)thousand);
sprintf(buf, "%llu.%s%s%s", (unsigned long long) num, &tmp[2],
unitprefix[post_index], unitstr[units]);
/*
* Fill random chunk
*/
- this_len = (segment * (100 - percentage)) / 100;
+ this_len = ((unsigned long long)segment * (100 - percentage)) / 100;
if (this_len > len)
this_len = len;
static void zipf_update(struct zipf_state *zs)
{
- unsigned long to_gen;
+ uint64_t to_gen;
unsigned int i;
/*
zs->zetan += pow(1.0 / (double) (i + 1), zs->theta);
}
-static void shared_rand_init(struct zipf_state *zs, unsigned long nranges,
+static void shared_rand_init(struct zipf_state *zs, uint64_t nranges,
unsigned int seed)
{
memset(zs, 0, sizeof(*zs));
zs->rand_off = __rand(&zs->rand);
}
-void zipf_init(struct zipf_state *zs, unsigned long nranges, double theta,
+void zipf_init(struct zipf_state *zs, uint64_t nranges, double theta,
unsigned int seed)
{
shared_rand_init(zs, nranges, seed);
zipf_update(zs);
}
-unsigned long long zipf_next(struct zipf_state *zs)
+uint64_t zipf_next(struct zipf_state *zs)
{
double alpha, eta, rand_uni, rand_z;
unsigned long long n = zs->nranges;
return (val + zs->rand_off) % zs->nranges;
}
-void pareto_init(struct zipf_state *zs, unsigned long nranges, double h,
+void pareto_init(struct zipf_state *zs, uint64_t nranges, double h,
unsigned int seed)
{
shared_rand_init(zs, nranges, seed);
zs->pareto_pow = log(h) / log(1.0 - h);
}
-unsigned long long pareto_next(struct zipf_state *zs)
+uint64_t pareto_next(struct zipf_state *zs)
{
double rand = (double) __rand(&zs->rand) / (double) FRAND32_MAX;
unsigned long long n;
bool disable_hash;
};
-void zipf_init(struct zipf_state *zs, unsigned long nranges, double theta, unsigned int seed);
-unsigned long long zipf_next(struct zipf_state *zs);
+void zipf_init(struct zipf_state *zs, uint64_t nranges, double theta, unsigned int seed);
+uint64_t zipf_next(struct zipf_state *zs);
-void pareto_init(struct zipf_state *zs, unsigned long nranges, double h, unsigned int seed);
-unsigned long long pareto_next(struct zipf_state *zs);
+void pareto_init(struct zipf_state *zs, uint64_t nranges, double h, unsigned int seed);
+uint64_t pareto_next(struct zipf_state *zs);
void zipf_disable_hash(struct zipf_state *zs);
#endif
static int str_exitall_cb(void)
{
- exitall_on_terminate = 1;
+ exitall_on_terminate = true;
return 0;
}
* is escaped with a '\', then that ':' is part of the filename and does not
* indicate a new file.
*/
-static char *get_next_name(char **ptr)
+char *get_next_str(char **ptr)
{
char *str = *ptr;
char *p, *start;
}
-static int get_max_name_idx(char *input)
+int get_max_str_idx(char *input)
{
unsigned int cur_idx;
char *str, *p;
p = str = strdup(input);
for (cur_idx = 0; ; cur_idx++)
- if (get_next_name(&str) == NULL)
+ if (get_next_str(&str) == NULL)
break;
free(p);
p = str = strdup(input);
- index %= get_max_name_idx(input);
+ index %= get_max_str_idx(input);
for (cur_idx = 0; cur_idx <= index; cur_idx++)
- fname = get_next_name(&str);
+ fname = get_next_str(&str);
if (client_sockaddr_str[0] && unique_filename) {
len = snprintf(target, tlen, "%s/%s.", fname,
p = str = strdup(input);
- index %= get_max_name_idx(input);
+ index %= get_max_str_idx(input);
for (cur_idx = 0; cur_idx <= index; cur_idx++)
- fname = get_next_name(&str);
+ fname = get_next_str(&str);
fname = strdup(fname);
free(p);
if (!td->files_index)
td->o.nr_files = 0;
- while ((fname = get_next_name(&str)) != NULL) {
+ while ((fname = get_next_str(&str)) != NULL) {
if (!strlen(fname))
break;
add_file(td, fname, 0, 1);
return 0;
p = str = strdup(td->o.directory);
- while ((dirname = get_next_name(&str)) != NULL) {
+ while ((dirname = get_next_str(&str)) != NULL) {
if (lstat(dirname, &sb) < 0) {
ret = errno;
.help = "RDMA IO engine",
},
#endif
-#ifdef CONFIG_FUSION_AW
- { .ival = "fusion-aw-sync",
- .help = "Fusion-io atomic write engine",
- },
-#endif
#ifdef CONFIG_LINUX_EXT4_MOVE_EXTENT
{ .ival = "e4defrag",
.help = "ext4 defrag engine",
.category = FIO_OPT_C_IO,
.group = FIO_OPT_G_RANDOM,
},
- {
- .name = "use_os_rand",
- .lname = "Use OS random",
- .type = FIO_OPT_DEPRECATED,
- .off1 = offsetof(struct thread_options, dep_use_os_rand),
- .category = FIO_OPT_C_IO,
- .group = FIO_OPT_G_RANDOM,
- },
{
.name = "norandommap",
.lname = "No randommap",
.category = FIO_OPT_C_IO,
.group = FIO_OPT_G_IOLOG,
},
+ {
+ .name = "merge_blktrace_file",
+ .lname = "Merged blktrace output filename",
+ .type = FIO_OPT_STR_STORE,
+ .off1 = offsetof(struct thread_options, merge_blktrace_file),
+ .help = "Merged blktrace output filename",
+ .category = FIO_OPT_C_IO,
+ .group = FIO_OPT_G_IOLOG,
+ },
+ {
+ .name = "merge_blktrace_scalars",
+ .lname = "Percentage to scale each trace",
+ .type = FIO_OPT_FLOAT_LIST,
+ .off1 = offsetof(struct thread_options, merge_blktrace_scalars),
+ .maxlen = FIO_IO_U_LIST_MAX_LEN,
+ .help = "Percentage to scale each trace",
+ .category = FIO_OPT_C_IO,
+ .group = FIO_OPT_G_IOLOG,
+ },
+ {
+ .name = "merge_blktrace_iters",
+ .lname = "Number of iterations to run per trace",
+ .type = FIO_OPT_FLOAT_LIST,
+ .off1 = offsetof(struct thread_options, merge_blktrace_iters),
+ .maxlen = FIO_IO_U_LIST_MAX_LEN,
+ .help = "Number of iterations to run per trace",
+ .category = FIO_OPT_C_IO,
+ .group = FIO_OPT_G_IOLOG,
+ },
{
.name = "exec_prerun",
.lname = "Pre-execute runnable",
void del_opt_posval(const char *, const char *);
struct thread_data;
void fio_options_free(struct thread_data *);
+char *get_next_str(char **ptr);
+int get_max_str_idx(char *input);
char* get_name_by_idx(char *input, int index);
int set_name_idx(char *, size_t, char *, int, bool);
#include "../file.h"
#define FIO_HAVE_ODIRECT
-#define FIO_USE_GENERIC_RAND
#define FIO_USE_GENERIC_INIT_RANDOM_STATE
#define OS_MAP_ANON MAP_ANON
return (unsigned long long) pages * (unsigned long long) pagesize;
}
-typedef struct { unsigned short r[3]; } os_random_state_t;
-
-static inline void os_random_seed(unsigned long seed, os_random_state_t *rs)
-{
- rs->r[0] = seed & 0xffff;
- seed >>= 16;
- rs->r[1] = seed & 0xffff;
- seed >>= 16;
- rs->r[2] = seed & 0xffff;
- seed48(rs->r);
-}
-
-static inline long os_random_long(os_random_state_t *rs)
-{
- return nrand48(rs->r);
-}
-
#ifdef O_NOATIME
#define FIO_O_NOATIME O_NOATIME
#else
#include "../lib/types.h"
#define FIO_HAVE_ODIRECT
-#define FIO_USE_GENERIC_RAND
#define FIO_USE_GENERIC_INIT_RANDOM_STATE
#define FIO_HAVE_FS_STAT
#define FIO_HAVE_TRIM
#include "../file.h"
#define FIO_HAVE_ODIRECT
-#define FIO_USE_GENERIC_RAND
#define FIO_USE_GENERIC_INIT_RANDOM_STATE
#define FIO_HAVE_CHARDEV_SIZE
#define FIO_HAVE_FS_STAT
#include "../file.h"
#define FIO_HAVE_ODIRECT
-#define FIO_USE_GENERIC_RAND
#define FIO_USE_GENERIC_INIT_RANDOM_STATE
#define FIO_HAVE_CHARDEV_SIZE
typedef cpu_set_t os_cpu_mask_t;
-typedef struct drand48_data os_random_state_t;
-
#ifdef CONFIG_3ARG_AFFINITY
#define fio_setaffinity(pid, cpumask) \
sched_setaffinity((pid), sizeof(cpumask), &(cpumask))
return (unsigned long long) pages * (unsigned long long) pagesize;
}
-static inline void os_random_seed(unsigned long seed, os_random_state_t *rs)
-{
- srand48_r(seed, rs);
-}
-
-static inline long os_random_long(os_random_state_t *rs)
-{
- long val;
-
- lrand48_r(rs, &val);
- return val;
-}
-
static inline int fio_lookup_raw(dev_t dev, int *majdev, int *mindev)
{
struct raw_config_request rq;
#include "../file.h"
-#define FIO_USE_GENERIC_RAND
#define FIO_USE_GENERIC_INIT_RANDOM_STATE
#define FIO_HAVE_GETTID
#define FIO_HAVE_CHARDEV_SIZE
#include "../file.h"
#define FIO_HAVE_ODIRECT
-#define FIO_USE_GENERIC_RAND
#define FIO_USE_GENERIC_INIT_RANDOM_STATE
#define FIO_HAVE_FS_STAT
#define FIO_HAVE_GETTID
#include "../file.h"
-#define FIO_USE_GENERIC_RAND
#define FIO_USE_GENERIC_INIT_RANDOM_STATE
#define FIO_HAVE_FS_STAT
#define FIO_HAVE_GETTID
#define FIO_OS_HAS_CTIME_R
typedef psetid_t os_cpu_mask_t;
-typedef struct solaris_rand_seed os_random_state_t;
static inline int chardev_size(struct fio_file *f, unsigned long long *bytes)
{
return ret;
}
-static inline void os_random_seed(unsigned long seed, os_random_state_t *rs)
-{
- rs->r[0] = seed & 0xffff;
- seed >>= 16;
- rs->r[1] = seed & 0xffff;
- seed >>= 16;
- rs->r[2] = seed & 0xffff;
- seed48(rs->r);
-}
-
-static inline long os_random_long(os_random_state_t *rs)
-{
- return nrand48(rs->r);
-}
-
#define FIO_OS_DIRECTIO
extern int directio(int, int);
static inline int fio_set_odirect(struct fio_file *f)
#define FIO_HAVE_CPU_AFFINITY
#define FIO_HAVE_CHARDEV_SIZE
#define FIO_HAVE_GETTID
-#define FIO_USE_GENERIC_RAND
#define FIO_PREFERRED_ENGINE "windowsaio"
#define FIO_PREFERRED_CLOCK_SOURCE CS_CGETTIME
}
#endif
-#ifdef FIO_USE_GENERIC_RAND
-typedef unsigned int os_random_state_t;
-
-static inline void os_random_seed(unsigned long seed, os_random_state_t *rs)
-{
- srand(seed);
-}
-
-static inline long os_random_long(os_random_state_t *rs)
-{
- long val;
-
- val = rand_r(rs);
- return val;
-}
-#endif
-
#ifdef FIO_USE_GENERIC_INIT_RANDOM_STATE
static inline int init_random_seeds(unsigned long *rand_seeds, int size)
{
<File Source="..\..\examples\filecreate-ioengine.fio" />
</Component>
<Component>
- <File Source="..\..\examples\fio-rand-read.job" />
+ <File Source="..\..\examples\fio-rand-read.fio" />
</Component>
<Component>
- <File Source="..\..\examples\fio-rand-RW.job" />
+ <File Source="..\..\examples\fio-rand-RW.fio" />
</Component>
<Component>
- <File Source="..\..\examples\fio-rand-write.job" />
+ <File Source="..\..\examples\fio-rand-write.fio" />
</Component>
<Component>
- <File Source="..\..\examples\fio-seq-read.job" />
+ <File Source="..\..\examples\fio-seq-read.fio" />
</Component>
<Component>
- <File Source="..\..\examples\fio-seq-RW.job" />
+ <File Source="..\..\examples\fio-seq-RW.fio" />
</Component>
<Component>
- <File Source="..\..\examples\fio-seq-write.job" />
+ <File Source="..\..\examples\fio-seq-write.fio" />
</Component>
<Component>
<File Source="..\..\examples\fixed-rate-submission.fio" />
<Component>
<File Source="..\..\examples\ftruncate.fio" />
</Component>
- <Component>
- <File Source="..\..\examples\fusion-aw-sync.fio" />
- </Component>
<Component>
<File Source="..\..\examples\gfapi.fio" />
</Component>
<ComponentRef Id="enospc_pressure.fio" />
<ComponentRef Id="falloc.fio" />
<ComponentRef Id="filecreate_ioengine.fio"/>
- <ComponentRef Id="fio_rand_read.job"/>
- <ComponentRef Id="fio_rand_RW.job"/>
- <ComponentRef Id="fio_rand_write.job"/>
- <ComponentRef Id="fio_seq_read.job"/>
- <ComponentRef Id="fio_seq_RW.job"/>
- <ComponentRef Id="fio_seq_write.job"/>
+ <ComponentRef Id="fio_rand_read.fio"/>
+ <ComponentRef Id="fio_rand_RW.fio"/>
+ <ComponentRef Id="fio_rand_write.fio"/>
+ <ComponentRef Id="fio_seq_read.fio"/>
+ <ComponentRef Id="fio_seq_RW.fio"/>
+ <ComponentRef Id="fio_seq_write.fio"/>
<ComponentRef Id="fixed_rate_submission.fio" />
<ComponentRef Id="flow.fio" />
<ComponentRef Id="fsx.fio" />
<ComponentRef Id="ftruncate.fio"/>
- <ComponentRef Id="fusion_aw_sync.fio" />
<ComponentRef Id="gfapi.fio" />
<ComponentRef Id="gpudirect_rdmaio_client.fio"/>
<ComponentRef Id="gpudirect_rdmaio_server.fio"/>
extern void fio_gettime(struct timespec *, void *);
/* These aren't defined in the MinGW headers */
-HRESULT WINAPI StringCchCopyA(
- char *pszDest,
- size_t cchDest,
- const char *pszSrc);
-
-HRESULT WINAPI StringCchPrintfA(
- char *pszDest,
- size_t cchDest,
- const char *pszFormat,
- ...);
+HRESULT WINAPI StringCchCopyA(char *pszDest, size_t cchDest, const char *pszSrc);
+HRESULT WINAPI StringCchPrintfA(char *pszDest, size_t cchDest, const char *pszFormat, ...);
int win_to_posix_error(DWORD winerr)
{
- switch (winerr)
- {
- case ERROR_FILE_NOT_FOUND: return ENOENT;
- case ERROR_PATH_NOT_FOUND: return ENOENT;
- case ERROR_ACCESS_DENIED: return EACCES;
- case ERROR_INVALID_HANDLE: return EBADF;
- case ERROR_NOT_ENOUGH_MEMORY: return ENOMEM;
- case ERROR_INVALID_DATA: return EINVAL;
- case ERROR_OUTOFMEMORY: return ENOMEM;
- case ERROR_INVALID_DRIVE: return ENODEV;
- case ERROR_NOT_SAME_DEVICE: return EXDEV;
- case ERROR_WRITE_PROTECT: return EROFS;
- case ERROR_BAD_UNIT: return ENODEV;
- case ERROR_SHARING_VIOLATION: return EACCES;
- case ERROR_LOCK_VIOLATION: return EACCES;
- case ERROR_SHARING_BUFFER_EXCEEDED: return ENOLCK;
- case ERROR_HANDLE_DISK_FULL: return ENOSPC;
- case ERROR_NOT_SUPPORTED: return ENOSYS;
- case ERROR_FILE_EXISTS: return EEXIST;
- case ERROR_CANNOT_MAKE: return EPERM;
- case ERROR_INVALID_PARAMETER: return EINVAL;
- case ERROR_NO_PROC_SLOTS: return EAGAIN;
- case ERROR_BROKEN_PIPE: return EPIPE;
- case ERROR_OPEN_FAILED: return EIO;
- case ERROR_NO_MORE_SEARCH_HANDLES: return ENFILE;
- case ERROR_CALL_NOT_IMPLEMENTED: return ENOSYS;
- case ERROR_INVALID_NAME: return ENOENT;
- case ERROR_WAIT_NO_CHILDREN: return ECHILD;
- case ERROR_CHILD_NOT_COMPLETE: return EBUSY;
- case ERROR_DIR_NOT_EMPTY: return ENOTEMPTY;
- case ERROR_SIGNAL_REFUSED: return EIO;
- case ERROR_BAD_PATHNAME: return ENOENT;
- case ERROR_SIGNAL_PENDING: return EBUSY;
- case ERROR_MAX_THRDS_REACHED: return EAGAIN;
- case ERROR_BUSY: return EBUSY;
- case ERROR_ALREADY_EXISTS: return EEXIST;
- case ERROR_NO_SIGNAL_SENT: return EIO;
- case ERROR_FILENAME_EXCED_RANGE: return EINVAL;
- case ERROR_META_EXPANSION_TOO_LONG: return EINVAL;
- case ERROR_INVALID_SIGNAL_NUMBER: return EINVAL;
- case ERROR_THREAD_1_INACTIVE: return EINVAL;
- case ERROR_BAD_PIPE: return EINVAL;
- case ERROR_PIPE_BUSY: return EBUSY;
- case ERROR_NO_DATA: return EPIPE;
- case ERROR_MORE_DATA: return EAGAIN;
- case ERROR_DIRECTORY: return ENOTDIR;
- case ERROR_PIPE_CONNECTED: return EBUSY;
- case ERROR_NO_TOKEN: return EINVAL;
- case ERROR_PROCESS_ABORTED: return EFAULT;
- case ERROR_BAD_DEVICE: return ENODEV;
- case ERROR_BAD_USERNAME: return EINVAL;
- case ERROR_OPEN_FILES: return EAGAIN;
- case ERROR_ACTIVE_CONNECTIONS: return EAGAIN;
- case ERROR_DEVICE_IN_USE: return EAGAIN;
- case ERROR_INVALID_AT_INTERRUPT_TIME: return EINTR;
- case ERROR_IO_DEVICE: return EIO;
- case ERROR_NOT_OWNER: return EPERM;
- case ERROR_END_OF_MEDIA: return ENOSPC;
- case ERROR_EOM_OVERFLOW: return ENOSPC;
- case ERROR_BEGINNING_OF_MEDIA: return ESPIPE;
- case ERROR_SETMARK_DETECTED: return ESPIPE;
- case ERROR_NO_DATA_DETECTED: return ENOSPC;
- case ERROR_POSSIBLE_DEADLOCK: return EDEADLOCK;
- case ERROR_CRC: return EIO;
- case ERROR_NEGATIVE_SEEK: return EINVAL;
- case ERROR_DISK_FULL: return ENOSPC;
- case ERROR_NOACCESS: return EFAULT;
- case ERROR_FILE_INVALID: return ENXIO;
+ switch (winerr) {
+ case ERROR_SUCCESS:
+ return 0;
+ case ERROR_FILE_NOT_FOUND:
+ return ENOENT;
+ case ERROR_PATH_NOT_FOUND:
+ return ENOENT;
+ case ERROR_ACCESS_DENIED:
+ return EACCES;
+ case ERROR_INVALID_HANDLE:
+ return EBADF;
+ case ERROR_NOT_ENOUGH_MEMORY:
+ return ENOMEM;
+ case ERROR_INVALID_DATA:
+ return EINVAL;
+ case ERROR_OUTOFMEMORY:
+ return ENOMEM;
+ case ERROR_INVALID_DRIVE:
+ return ENODEV;
+ case ERROR_NOT_SAME_DEVICE:
+ return EXDEV;
+ case ERROR_WRITE_PROTECT:
+ return EROFS;
+ case ERROR_BAD_UNIT:
+ return ENODEV;
+ case ERROR_NOT_READY:
+ return EAGAIN;
+ case ERROR_SHARING_VIOLATION:
+ return EACCES;
+ case ERROR_LOCK_VIOLATION:
+ return EACCES;
+ case ERROR_SHARING_BUFFER_EXCEEDED:
+ return ENOLCK;
+ case ERROR_HANDLE_DISK_FULL:
+ return ENOSPC;
+ case ERROR_NOT_SUPPORTED:
+ return ENOSYS;
+ case ERROR_FILE_EXISTS:
+ return EEXIST;
+ case ERROR_CANNOT_MAKE:
+ return EPERM;
+ case ERROR_INVALID_PARAMETER:
+ return EINVAL;
+ case ERROR_NO_PROC_SLOTS:
+ return EAGAIN;
+ case ERROR_BROKEN_PIPE:
+ return EPIPE;
+ case ERROR_OPEN_FAILED:
+ return EIO;
+ case ERROR_NO_MORE_SEARCH_HANDLES:
+ return ENFILE;
+ case ERROR_CALL_NOT_IMPLEMENTED:
+ return ENOSYS;
+ case ERROR_INVALID_NAME:
+ return ENOENT;
+ case ERROR_WAIT_NO_CHILDREN:
+ return ECHILD;
+ case ERROR_CHILD_NOT_COMPLETE:
+ return EBUSY;
+ case ERROR_DIR_NOT_EMPTY:
+ return ENOTEMPTY;
+ case ERROR_SIGNAL_REFUSED:
+ return EIO;
+ case ERROR_BAD_PATHNAME:
+ return ENOENT;
+ case ERROR_SIGNAL_PENDING:
+ return EBUSY;
+ case ERROR_MAX_THRDS_REACHED:
+ return EAGAIN;
+ case ERROR_BUSY:
+ return EBUSY;
+ case ERROR_ALREADY_EXISTS:
+ return EEXIST;
+ case ERROR_NO_SIGNAL_SENT:
+ return EIO;
+ case ERROR_FILENAME_EXCED_RANGE:
+ return EINVAL;
+ case ERROR_META_EXPANSION_TOO_LONG:
+ return EINVAL;
+ case ERROR_INVALID_SIGNAL_NUMBER:
+ return EINVAL;
+ case ERROR_THREAD_1_INACTIVE:
+ return EINVAL;
+ case ERROR_BAD_PIPE:
+ return EINVAL;
+ case ERROR_PIPE_BUSY:
+ return EBUSY;
+ case ERROR_NO_DATA:
+ return EPIPE;
+ case ERROR_MORE_DATA:
+ return EAGAIN;
+ case ERROR_DIRECTORY:
+ return ENOTDIR;
+ case ERROR_PIPE_CONNECTED:
+ return EBUSY;
+ case ERROR_NO_TOKEN:
+ return EINVAL;
+ case ERROR_PROCESS_ABORTED:
+ return EFAULT;
+ case ERROR_BAD_DEVICE:
+ return ENODEV;
+ case ERROR_BAD_USERNAME:
+ return EINVAL;
+ case ERROR_OPEN_FILES:
+ return EAGAIN;
+ case ERROR_ACTIVE_CONNECTIONS:
+ return EAGAIN;
+ case ERROR_DEVICE_IN_USE:
+ return EBUSY;
+ case ERROR_INVALID_AT_INTERRUPT_TIME:
+ return EINTR;
+ case ERROR_IO_DEVICE:
+ return EIO;
+ case ERROR_NOT_OWNER:
+ return EPERM;
+ case ERROR_END_OF_MEDIA:
+ return ENOSPC;
+ case ERROR_EOM_OVERFLOW:
+ return ENOSPC;
+ case ERROR_BEGINNING_OF_MEDIA:
+ return ESPIPE;
+ case ERROR_SETMARK_DETECTED:
+ return ESPIPE;
+ case ERROR_NO_DATA_DETECTED:
+ return ENOSPC;
+ case ERROR_POSSIBLE_DEADLOCK:
+ return EDEADLOCK;
+ case ERROR_CRC:
+ return EIO;
+ case ERROR_NEGATIVE_SEEK:
+ return EINVAL;
+ case ERROR_DISK_FULL:
+ return ENOSPC;
+ case ERROR_NOACCESS:
+ return EFAULT;
+ case ERROR_FILE_INVALID:
+ return ENXIO;
+ default:
+ log_err("fio: windows error %d not handled\n", winerr);
+ return EIO;
}
return winerr;
}
}
- for (i = 0; i < len / sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION); i++)
- {
+ for (i = 0; i < len / sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION); i++) {
if (processor_info[i].Relationship == RelationProcessorCore)
num_processors += hweight64(processor_info[i].ProcessorMask);
}
SYSTEM_INFO sysInfo;
MEMORYSTATUSEX status;
- switch (name)
- {
+ switch (name) {
case _SC_NPROCESSORS_ONLN:
val = GetNumLogicalProcessors();
if (val == -1)
/* Copied from http://blogs.msdn.com/b/joshpoley/archive/2007/12/19/date-time-formats-and-conversions.aspx */
void Time_tToSystemTime(time_t dosTime, SYSTEMTIME *systemTime)
{
- FILETIME utcFT;
- LONGLONG jan1970;
+ FILETIME utcFT;
+ LONGLONG jan1970;
SYSTEMTIME tempSystemTime;
- jan1970 = Int32x32To64(dosTime, 10000000) + 116444736000000000;
- utcFT.dwLowDateTime = (DWORD)jan1970;
- utcFT.dwHighDateTime = jan1970 >> 32;
+ jan1970 = Int32x32To64(dosTime, 10000000) + 116444736000000000;
+ utcFT.dwLowDateTime = (DWORD)jan1970;
+ utcFT.dwHighDateTime = jan1970 >> 32;
- FileTimeToSystemTime((FILETIME*)&utcFT, &tempSystemTime);
+ FileTimeToSystemTime((FILETIME*)&utcFT, &tempSystemTime);
SystemTimeToTzSpecificLocalTime(NULL, &tempSystemTime, systemTime);
}
-char* ctime_r(const time_t *t, char *buf)
+char *ctime_r(const time_t *t, char *buf)
{
- SYSTEMTIME systime;
- const char * const dayOfWeek[] = { "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat" };
- const char * const monthOfYear[] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
-
- Time_tToSystemTime(*t, &systime);
- /* We don't know how long `buf` is, but assume it's rounded up from the minimum of 25 to 32 */
- StringCchPrintfA(buf, 31, "%s %s %d %02d:%02d:%02d %04d\n", dayOfWeek[systime.wDayOfWeek % 7], monthOfYear[(systime.wMonth - 1) % 12],
- systime.wDay, systime.wHour, systime.wMinute, systime.wSecond, systime.wYear);
- return buf;
+ SYSTEMTIME systime;
+ const char * const dayOfWeek[] = { "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat" };
+ const char * const monthOfYear[] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
+
+ Time_tToSystemTime(*t, &systime);
+
+ /*
+ * We don't know how long `buf` is, but assume it's rounded up from
+ * the minimum of 25 to 32
+ */
+ StringCchPrintfA(buf, 31, "%s %s %d %02d:%02d:%02d %04d\n",
+ dayOfWeek[systime.wDayOfWeek % 7],
+ monthOfYear[(systime.wMonth - 1) % 12],
+ systime.wDay, systime.wHour, systime.wMinute,
+ systime.wSecond, systime.wYear);
+ return buf;
}
int gettimeofday(struct timeval *restrict tp, void *restrict tzp)
return 0;
}
-int sigaction(int sig, const struct sigaction *act,
- struct sigaction *oact)
+int sigaction(int sig, const struct sigaction *act, struct sigaction *oact)
{
int rc = 0;
void (*prev_handler)(int);
return rc;
}
-int lstat(const char * path, struct stat * buf)
+int lstat(const char *path, struct stat *buf)
{
return stat(path, buf);
}
-void *mmap(void *addr, size_t len, int prot, int flags,
- int fildes, off_t off)
+void *mmap(void *addr, size_t len, int prot, int flags, int fildes, off_t off)
{
DWORD vaProt = 0;
DWORD mapAccess = 0;
lenhigh = len >> 16;
/* If the low DWORD is zero and the high DWORD is non-zero, `CreateFileMapping`
will return ERROR_INVALID_PARAMETER. To avoid this, set both to zero. */
- if (lenlow == 0) {
+ if (lenlow == 0)
lenhigh = 0;
- }
- if (flags & MAP_ANON || flags & MAP_ANONYMOUS)
- {
+ if (flags & MAP_ANON || flags & MAP_ANONYMOUS) {
allocAddr = VirtualAlloc(addr, len, MEM_COMMIT, vaProt);
if (allocAddr == NULL)
errno = win_to_posix_error(GetLastError());
- }
- else
- {
- hMap = CreateFileMapping((HANDLE)_get_osfhandle(fildes), NULL, vaProt, lenhigh, lenlow, NULL);
+ } else {
+ hMap = CreateFileMapping((HANDLE)_get_osfhandle(fildes), NULL,
+ vaProt, lenhigh, lenlow, NULL);
if (hMap != NULL)
- {
- allocAddr = MapViewOfFile(hMap, mapAccess, off >> 16, off & 0xFFFF, len);
- }
-
+ allocAddr = MapViewOfFile(hMap, mapAccess, off >> 16,
+ off & 0xFFFF, len);
if (hMap == NULL || allocAddr == NULL)
errno = win_to_posix_error(GetLastError());
success = UnmapViewOfFile(addr);
if (!success)
- {
success = VirtualFree(addr, 0, MEM_RELEASE);
- }
return !success;
}
void openlog(const char *ident, int logopt, int facility)
{
- if (log_file == INVALID_HANDLE_VALUE)
- log_file = CreateFileA("syslog.txt", GENERIC_WRITE, FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_ALWAYS, 0, NULL);
+ if (log_file != INVALID_HANDLE_VALUE)
+ return;
+
+ log_file = CreateFileA("syslog.txt", GENERIC_WRITE,
+ FILE_SHARE_READ | FILE_SHARE_WRITE, NULL,
+ OPEN_ALWAYS, 0, NULL);
}
void closelog(void)
DWORD bytes_written;
if (log_file == INVALID_HANDLE_VALUE) {
- log_file = CreateFileA("syslog.txt", GENERIC_WRITE, FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_ALWAYS, 0, NULL);
+ log_file = CreateFileA("syslog.txt", GENERIC_WRITE,
+ FILE_SHARE_READ | FILE_SHARE_WRITE,
+ NULL, OPEN_ALWAYS, 0, NULL);
}
if (log_file == INVALID_HANDLE_VALUE) {
{
int rc = 0;
- if (clock_id == CLOCK_MONOTONIC)
- {
+ if (clock_id == CLOCK_MONOTONIC) {
static LARGE_INTEGER freq = {{0,0}};
LARGE_INTEGER counts;
uint64_t t;
* and then divide by the frequency. */
t *= 1000000000;
tp->tv_nsec = t / freq.QuadPart;
- }
- else if (clock_id == CLOCK_REALTIME)
- {
+ } else if (clock_id == CLOCK_REALTIME) {
/* clock_gettime(CLOCK_REALTIME,...) is just an alias for gettimeofday with a
* higher-precision field. */
struct timeval tv;
int munlock(const void * addr, size_t len)
{
BOOL success = VirtualUnlock((LPVOID)addr, len);
+
if (!success) {
errno = win_to_posix_error(GetLastError());
return -1;
int mapid = -1;
uint32_t size_low = size & 0xFFFFFFFF;
uint32_t size_high = ((uint64_t)size) >> 32;
- HANDLE hMapping = CreateFileMapping(INVALID_HANDLE_VALUE, NULL, (PAGE_EXECUTE_READWRITE | SEC_RESERVE), size_high, size_low, NULL);
+ HANDLE hMapping;
+
+ hMapping = CreateFileMapping(INVALID_HANDLE_VALUE, NULL,
+ PAGE_EXECUTE_READWRITE | SEC_RESERVE,
+ size_high, size_low, NULL);
if (hMapping != NULL) {
fileMappings[nFileMappings] = hMapping;
mapid = nFileMappings;
nFileMappings++;
- } else {
+ } else
errno = ENOSYS;
- }
return mapid;
}
void *shmat(int shmid, const void *shmaddr, int shmflg)
{
- void* mapAddr;
+ void *mapAddr;
MEMORY_BASIC_INFORMATION memInfo;
+
mapAddr = MapViewOfFile(fileMappings[shmid], FILE_MAP_ALL_ACCESS, 0, 0, 0);
if (mapAddr == NULL) {
errno = win_to_posix_error(GetLastError());
if (cmd == IPC_RMID) {
fileMappings[shmid] = INVALID_HANDLE_VALUE;
return 0;
- } else {
- log_err("%s is not implemented\n", __func__);
}
+
+ log_err("%s is not implemented\n", __func__);
errno = ENOSYS;
return -1;
}
{
int64_t pos = _telli64(fildes);
ssize_t len = _write(fildes, buf, nbyte);
+
_lseeki64(fildes, pos, SEEK_SET);
return len;
}
{
int64_t pos = _telli64(fildes);
ssize_t len = read(fildes, buf, nbyte);
+
_lseeki64(fildes, pos, SEEK_SET);
return len;
}
{
int i;
DWORD bytes_written = 0;
- for (i = 0; i < iovcnt; i++)
- {
- int len = send((SOCKET)fildes, iov[i].iov_base, iov[i].iov_len, 0);
- if (len == SOCKET_ERROR)
- {
+
+ for (i = 0; i < iovcnt; i++) {
+ int len;
+
+ len = send((SOCKET)fildes, iov[i].iov_base, iov[i].iov_len, 0);
+ if (len == SOCKET_ERROR) {
DWORD err = GetLastError();
errno = win_to_posix_error(err);
bytes_written = -1;
return bytes_written;
}
-long long strtoll(const char *restrict str, char **restrict endptr,
- int base)
+long long strtoll(const char *restrict str, char **restrict endptr, int base)
{
return _strtoi64(str, endptr, base);
}
FD_ZERO(&writefds);
FD_ZERO(&exceptfds);
- for (i = 0; i < nfds; i++)
- {
+ for (i = 0; i < nfds; i++) {
if (fds[i].fd < 0) {
fds[i].revents = 0;
continue;
rc = select(nfds, &readfds, &writefds, &exceptfds, to);
if (rc != SOCKET_ERROR) {
- for (i = 0; i < nfds; i++)
- {
- if (fds[i].fd < 0) {
+ for (i = 0; i < nfds; i++) {
+ if (fds[i].fd < 0)
continue;
- }
if ((fds[i].events & POLLIN) && FD_ISSET(fds[i].fd, &readfds))
fds[i].revents |= POLLIN;
DIR *opendir(const char *dirname)
{
struct dirent_ctx *dc = NULL;
+ HANDLE file;
/* See if we can open it. If not, we'll return an error here */
- HANDLE file = CreateFileA(dirname, 0, FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS, NULL);
+ file = CreateFileA(dirname, 0, FILE_SHARE_READ | FILE_SHARE_WRITE, NULL,
+ OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS, NULL);
if (file != INVALID_HANDLE_VALUE) {
CloseHandle(file);
dc = (struct dirent_ctx*)malloc(sizeof(struct dirent_ctx));
if (dirp->find_handle == INVALID_HANDLE_VALUE) {
char search_pattern[MAX_PATH];
+
StringCchPrintfA(search_pattern, MAX_PATH-1, "%s\\*", dirp->dirname);
dirp->find_handle = FindFirstFileA(search_pattern, &find_data);
if (dirp->find_handle == INVALID_HANDLE_VALUE)
}
#ifdef CONFIG_WINDOWS_XP
-const char* inet_ntop(int af, const void *restrict src,
- char *restrict dst, socklen_t size)
+const char *inet_ntop(int af, const void *restrict src, char *restrict dst,
+ socklen_t size)
{
INT status = SOCKET_ERROR;
WSADATA wsd;
if (af == AF_INET) {
struct sockaddr_in si;
DWORD len = size;
+
memset(&si, 0, sizeof(si));
si.sin_family = af;
memcpy(&si.sin_addr, src, sizeof(si.sin_addr));
} else if (af == AF_INET6) {
struct sockaddr_in6 si6;
DWORD len = size;
+
memset(&si6, 0, sizeof(si6));
si6.sin6_family = af;
memcpy(&si6.sin6_addr, src, sizeof(si6.sin6_addr));
if (af == AF_INET) {
struct sockaddr_in si;
INT len = sizeof(si);
+
memset(&si, 0, sizeof(si));
si.sin_family = af;
status = WSAStringToAddressA((char*)src, af, NULL, (struct sockaddr*)&si, &len);
} else if (af == AF_INET6) {
struct sockaddr_in6 si6;
INT len = sizeof(si6);
+
memset(&si6, 0, sizeof(si6));
si6.sin6_family = af;
status = WSAStringToAddressA((char*)src, af, NULL, (struct sockaddr*)&si6, &len);
return "OPT_UNKNOWN?";
}
+static bool val_too_large(const struct fio_option *o, unsigned long long val,
+ bool is_uint)
+{
+ if (!o->maxval)
+ return false;
+
+ if (is_uint) {
+ if ((int) val < 0)
+ return (int) val > (int) o->maxval;
+ return (unsigned int) val > o->maxval;
+ }
+
+ return val > o->maxval;
+}
+
+static bool val_too_small(const struct fio_option *o, unsigned long long val,
+ bool is_uint)
+{
+ if (!o->minval)
+ return false;
+
+ if (is_uint)
+ return (int) val < o->minval;
+
+ return val < o->minval;
+}
+
static int __handle_option(const struct fio_option *o, const char *ptr,
void *data, int first, int more, int curr)
{
return 1;
}
- if (o->maxval && ull > o->maxval) {
- log_err("max value out of range: %llu"
- " (%llu max)\n", ull, o->maxval);
+ if (val_too_large(o, ull, o->type == FIO_OPT_INT)) {
+ log_err("%s: max value out of range: %llu"
+ " (%llu max)\n", o->name, ull, o->maxval);
return 1;
}
- if (o->minval && ull < o->minval) {
- log_err("min value out of range: %lld"
- " (%d min)\n", ull, o->minval);
+ if (val_too_small(o, ull, o->type == FIO_OPT_INT)) {
+ log_err("%s: min value out of range: %lld"
+ " (%d min)\n", o->name, ull, o->minval);
return 1;
}
if (o->posval[0].ival) {
#include "lib/getrusage.h"
#include "rate-submit.h"
+static void check_overlap(struct io_u *io_u)
+{
+ int i;
+ struct thread_data *td;
+ bool overlap = false;
+
+ do {
+ /*
+ * Allow only one thread to check for overlap at a
+ * time to prevent two threads from thinking the coast
+ * is clear and then submitting IOs that overlap with
+ * each other
+ */
+ pthread_mutex_lock(&overlap_check);
+ for_each_td(td, i) {
+ if (td->runstate <= TD_SETTING_UP ||
+ td->runstate >= TD_FINISHING ||
+ !td->o.serialize_overlap ||
+ td->o.io_submit_mode != IO_MODE_OFFLOAD)
+ continue;
+
+ overlap = in_flight_overlap(&td->io_u_all, io_u);
+ if (overlap) {
+ pthread_mutex_unlock(&overlap_check);
+ break;
+ }
+ }
+ } while (overlap);
+}
+
static int io_workqueue_fn(struct submit_worker *sw,
struct workqueue_work *work)
{
struct thread_data *td = sw->priv;
int ret;
+ if (td->o.serialize_overlap)
+ check_overlap(io_u);
+
dprint(FD_RATE, "io_u %p queued by %u\n", io_u, gettid());
io_u_set(td, io_u, IO_U_F_NO_FILE_PUT);
ret = io_u_queued_complete(td, min_evts);
if (ret > 0)
td->cur_depth -= ret;
- } else if (ret == FIO_Q_BUSY) {
- ret = io_u_queued_complete(td, td->cur_depth);
- if (ret > 0)
- td->cur_depth -= ret;
}
return 0;
clear_io_state(td, 1);
td_set_runstate(td, TD_RUNNING);
- td->flags |= TD_F_CHILD;
+ td->flags |= TD_F_CHILD | TD_F_NEED_LOCK;
td->parent = parent;
return 0;
int fio_net_port = FIO_NET_PORT;
-int exit_backend = 0;
+bool exit_backend = false;
enum {
SK_F_FREE = 1,
if (crc != cmd->cmd_crc16) {
log_err("fio: server bad crc on command (got %x, wanted %x)\n",
cmd->cmd_crc16, crc);
+ fprintf(f_err, "fio: server bad crc on command (got %x, wanted %x)\n",
+ cmd->cmd_crc16, crc);
return 1;
}
break;
default:
log_err("fio: bad server cmd version %d\n", cmd->version);
+ fprintf(f_err, "fio: client/server version mismatch (%d != %d)\n",
+ cmd->version, FIO_SERVER_VER);
return 1;
}
ret = 0;
break;
case FIO_NET_CMD_EXIT:
- exit_backend = 1;
+ exit_backend = true;
return -1;
case FIO_NET_CMD_LOAD_FILE:
ret = handle_load_file_cmd(cmd);
sk_out->sk = -1;
else {
log_info("\nfio: terminating on signal %d\n", signal);
- exit_backend = 1;
+ exit_backend = true;
}
}
setsid();
openlog("fio", LOG_NDELAY|LOG_NOWAIT|LOG_PID, LOG_USER);
- log_syslog = 1;
+ log_syslog = true;
close(STDIN_FILENO);
close(STDOUT_FILENO);
close(STDERR_FILENO);
};
enum {
- FIO_SERVER_VER = 74,
+ FIO_SERVER_VER = 77,
FIO_SERVER_MAX_FRAGMENT_PDU = 1024,
FIO_SERVER_MAX_CMD_MB = 2048,
extern int fio_server_create_sk_key(void);
extern void fio_server_destroy_sk_key(void);
-extern int exit_backend;
+extern bool exit_backend;
extern int fio_net_port;
#endif
if (ts->clat_percentiles || ts->lat_percentiles) {
if (ddir_rw(ddir)) {
+ uint64_t samples;
+
+ if (ts->clat_percentiles)
+ samples = ts->clat_stat[ddir].samples;
+ else
+ samples = ts->lat_stat[ddir].samples;
+
len = calc_clat_percentiles(ts->io_u_plat[ddir],
- ts->clat_stat[ddir].samples,
- ts->percentile_list, &ovals, &maxv,
- &minv);
+ samples, ts->percentile_list, &ovals,
+ &maxv, &minv);
} else {
len = calc_clat_percentiles(ts->io_u_sync_plat,
ts->sync_stat.samples,
if (is_backend) {
fio_server_send_job_options(opt_lists[i], i);
fio_server_send_ts(ts, rs);
- if (output_format & FIO_OUTPUT_TERSE)
- show_thread_status_terse(ts, rs, &output[__FIO_OUTPUT_TERSE]);
} else {
if (output_format & FIO_OUTPUT_TERSE)
show_thread_status_terse(ts, rs, &output[__FIO_OUTPUT_TERSE]);
extern int calc_log_samples(void);
extern struct io_log *agg_io_log[DDIR_RWDIR_CNT];
-extern int write_bw_log;
+extern bool write_bw_log;
static inline bool nsec_to_usec(unsigned long long *min,
unsigned long long *max, double *mean,
#include "../lib/lfsr.h"
#include "../lib/axmap.h"
-static int test_regular(size_t size, int seed)
+static int test_regular(uint64_t size, int seed)
{
struct fio_lfsr lfsr;
struct axmap *map;
return 0;
}
-static int test_next_free(size_t size, int seed)
+static int test_next_free(uint64_t size, int seed)
{
struct fio_lfsr lfsr;
struct axmap *map;
- size_t osize;
+ uint64_t osize;
uint64_t ff, lastfree;
int err, i;
return 0;
}
-static int test_multi(size_t size, unsigned int bit_off)
+static int test_multi(uint64_t size, unsigned int bit_off)
{
unsigned int map_size = size;
struct axmap *map;
int main(int argc, char *argv[])
{
- size_t size = (1UL << 23) - 200;
+ uint64_t size = (1ULL << 23) - 200;
int seed = 1;
if (argc > 1) {
--- /dev/null
+# Expected result: fio runs and completes the job
+# Buggy result: fio segfaults
+#
+[test]
+ioengine=null
+size=10g
+io_submit_mode=offload
+iodepth=16
--- /dev/null
+# Expected results: no parse warnings, runs and with roughly 1/8 iops between
+# the two jobs.
+# Buggy result: parse warning on flow value overflow, no 1/8 division between
+ jobs.
+#
+[global]
+bs=4k
+ioengine=null
+size=100g
+runtime=3
+flow_id=1
+
+[flow1]
+flow=-8
+rate_iops=1000
+
+[flow2]
+flow=1
}
fio_reset_count() {
- sed -n 's/^.*write:[^;]*; \([0-9]*\) zone resets$/\1/p'
+ local count
+
+ count=$(sed -n 's/^.*write:[^;]*; \([0-9]*\) zone resets$/\1/p')
+ echo "${count:-0}"
}
}
run_fio() {
- local fio
+ local fio opts
fio=$(dirname "$0")/../../fio
- { echo; echo "fio $*"; echo; } >>"${logfile}.${test_number}"
+ opts=("--aux-path=/tmp" "--allow_file_create=0" "$@")
+ { echo; echo "fio ${opts[*]}"; echo; } >>"${logfile}.${test_number}"
- "${dynamic_analyzer[@]}" "$fio" "$@"
+ "${dynamic_analyzer[@]}" "$fio" "${opts[@]}"
}
run_one_fio_job() {
# Check whether buffered writes are refused.
test1() {
run_fio --name=job1 --filename="$dev" --rw=write --direct=0 --bs=4K \
- --size="${zone_size}" \
+ --size="${zone_size}" --thread=1 \
--zonemode=zbd --zonesize="${zone_size}" 2>&1 |
tee -a "${logfile}.${test_number}" |
grep -q 'Using direct I/O is mandatory for writing to ZBD drives'
logfile=$0.log
+passed=0
+failed=0
rc=0
for test_number in "${tests[@]}"; do
rm -f "${logfile}.${test_number}"
echo -n "Running test $test_number ... "
if eval "test$test_number"; then
status="PASS"
+ ((passed++))
else
status="FAIL"
+ ((failed++))
rc=1
fi
echo "$status"
echo "$status" >> "${logfile}.${test_number}"
done
+echo "$passed tests passed"
+if [ $failed -gt 0 ]; then
+ echo " and $failed tests failed"
+fi
exit $rc
unsigned int rand_repeatable;
unsigned int allrand_repeatable;
unsigned long long rand_seed;
- unsigned int dep_use_os_rand;
unsigned int log_avg_msec;
unsigned int log_hist_msec;
unsigned int log_hist_coarseness;
unsigned int hugepage_size;
unsigned long long rw_min_bs;
- unsigned int pad2;
unsigned int thinktime;
unsigned int thinktime_spin;
unsigned int thinktime_blocks;
char *read_iolog_file;
bool read_iolog_chunked;
char *write_iolog_file;
+ char *merge_blktrace_file;
+ fio_fp64_t merge_blktrace_scalars[FIO_IO_U_LIST_MAX_LEN];
+ fio_fp64_t merge_blktrace_iters[FIO_IO_U_LIST_MAX_LEN];
unsigned int write_bw_log;
unsigned int write_lat_log;
uint32_t override_sync;
uint32_t rand_repeatable;
uint32_t allrand_repeatable;
+ uint32_t pad;
uint64_t rand_seed;
- uint32_t dep_use_os_rand;
uint32_t log_avg_msec;
uint32_t log_hist_msec;
uint32_t log_hist_coarseness;
uint8_t read_iolog_file[FIO_TOP_STR_MAX];
uint8_t write_iolog_file[FIO_TOP_STR_MAX];
+ uint8_t merge_blktrace_file[FIO_TOP_STR_MAX];
+ fio_fp64_t merge_blktrace_scalars[FIO_IO_U_LIST_MAX_LEN];
+ fio_fp64_t merge_blktrace_iters[FIO_IO_U_LIST_MAX_LEN];
uint32_t write_bw_log;
uint32_t write_lat_log;
uint32_t rate_iops_min[DDIR_RWDIR_CNT];
uint32_t rate_process;
uint32_t rate_ign_think;
- uint32_t pad;
+ uint32_t pad3;
uint8_t ioscheduler[FIO_TOP_STR_MAX];
if (utime_since_now(&td->epoch) >= td->o.ramp_time) {
td->ramp_time_over = true;
reset_all_stats(td);
+ reset_io_stats(td);
td_set_runstate(td, TD_RAMP);
/*
# if you do this, don't pass normal CLI parameters to it
# otherwise it runs the CLI
-import sys, os, math, copy
+import sys, os, math, copy, time
from copy import deepcopy
import argparse
-import unittest2
+
+unittest2_imported = True
+try:
+ import unittest2
+except ImportError:
+ unittest2_imported = False
msec_per_sec = 1000
nsec_per_usec = 1000
+direction_read = 0
+direction_write = 1
class FioHistoLogExc(Exception):
pass
# log file parser raises FioHistoLogExc exceptions
# it returns histogram buckets in whatever unit fio uses
-
-def parse_hist_file(logfn, buckets_per_interval):
- max_timestamp_ms = 0.0
-
+# inputs:
+# logfn: pathname to histogram log file
+# buckets_per_interval - how many histogram buckets to expect
+# log_hist_msec - if not None, expected time interval between histogram records
+
+def parse_hist_file(logfn, buckets_per_interval, log_hist_msec):
+ previous_ts_ms_read = -1
+ previous_ts_ms_write = -1
+
with open(logfn, 'r') as f:
records = [ l.strip() for l in f.readlines() ]
intervals = []
+ last_time_ms = -1
+ last_direction = -1
for k, r in enumerate(records):
if r == '':
continue
if len(int_tokens) < 3:
raise FioHistoLogExc('too few numbers %s' % exception_suffix(k+1, logfn))
- time_ms = int_tokens[0]
- if time_ms > max_timestamp_ms:
- max_timestamp_ms = time_ms
-
direction = int_tokens[1]
- if direction != 0 and direction != 1:
+ if direction != direction_read and direction != direction_write:
raise FioHistoLogExc('invalid I/O direction %s' % exception_suffix(k+1, logfn))
+ time_ms = int_tokens[0]
+ if direction == direction_read:
+ if time_ms < previous_ts_ms_read:
+ raise FioHistoLogExc('read timestamp in column 1 decreased %s' % exception_suffix(k+1, logfn))
+ previous_ts_ms_read = time_ms
+ elif direction == direction_write:
+ if time_ms < previous_ts_ms_write:
+ raise FioHistoLogExc('write timestamp in column 1 decreased %s' % exception_suffix(k+1, logfn))
+ previous_ts_ms_write = time_ms
+
bsz = int_tokens[2]
if bsz > (1 << 24):
raise FioHistoLogExc('block size too large %s' % exception_suffix(k+1, logfn))
if len(buckets) != buckets_per_interval:
raise FioHistoLogExc('%d buckets per interval but %d expected in %s' %
(len(buckets), buckets_per_interval, exception_suffix(k+1, logfn)))
+
+ # hack to filter out records with the same timestamp
+ # we should not have to do this if fio logs histogram records correctly
+
+ if time_ms == last_time_ms and direction == last_direction:
+ continue
+ last_time_ms = time_ms
+ last_direction = direction
+
intervals.append((time_ms, direction, bsz, buckets))
if len(intervals) == 0:
raise FioHistoLogExc('no records in %s' % logfn)
- return (intervals, max_timestamp_ms)
+ (first_timestamp, _, _, _) = intervals[0]
+ if first_timestamp < 1000000:
+ start_time = 0 # assume log_unix_epoch = 0
+ elif log_hist_msec != None:
+ start_time = first_timestamp - log_hist_msec
+ elif len(intervals) > 1:
+ (second_timestamp, _, _, _) = intervals[1]
+ start_time = first_timestamp - (second_timestamp - first_timestamp)
+ else:
+ raise FioHistoLogExc('no way to estimate test start time')
+ (end_timestamp, _, _, _) = intervals[-1]
+
+ return (intervals, start_time, end_timestamp)
# compute time range for each bucket index in histogram record
# compute number of time quantum intervals in the test
-def get_time_intervals(time_quantum, max_timestamp_ms):
+def get_time_intervals(time_quantum, min_timestamp_ms, max_timestamp_ms):
# round down to nearest second
max_timestamp = max_timestamp_ms // msec_per_sec
+ min_timestamp = min_timestamp_ms // msec_per_sec
# round up to nearest whole multiple of time_quantum
- time_interval_count = (max_timestamp + time_quantum) // time_quantum
- end_time = time_interval_count * time_quantum
+ time_interval_count = ((max_timestamp - min_timestamp) + time_quantum) // time_quantum
+ end_time = min_timestamp + (time_interval_count * time_quantum)
return (end_time, time_interval_count)
# align raw histogram log data to time quantum so
# so the contribution of this bucket to this time quantum is
# 515 x 0.99 = 509.85
-def align_histo_log(raw_histogram_log, time_quantum, bucket_count, max_timestamp_ms):
+def align_histo_log(raw_histogram_log, time_quantum, bucket_count, min_timestamp_ms, max_timestamp_ms):
# slice up test time int intervals of time_quantum seconds
- (end_time, time_interval_count) = get_time_intervals(time_quantum, max_timestamp_ms)
+ (end_time, time_interval_count) = get_time_intervals(time_quantum, min_timestamp_ms, max_timestamp_ms)
time_qtm_ms = time_quantum * msec_per_sec
end_time_ms = end_time * msec_per_sec
aligned_intervals = []
for j in range(0, time_interval_count):
aligned_intervals.append((
- j * time_qtm_ms,
+ min_timestamp_ms + (j * time_qtm_ms),
[ 0.0 for j in range(0, bucket_count) ] ))
log_record_count = len(raw_histogram_log)
# calculate first quantum that overlaps this histogram record
- qtm_start_ms = (time_msec // time_qtm_ms) * time_qtm_ms
- qtm_end_ms = ((time_msec + time_qtm_ms) // time_qtm_ms) * time_qtm_ms
- qtm_index = qtm_start_ms // time_qtm_ms
+ offset_from_min_ts = time_msec - min_timestamp_ms
+ qtm_start_ms = min_timestamp_ms + (offset_from_min_ts // time_qtm_ms) * time_qtm_ms
+ qtm_end_ms = min_timestamp_ms + ((offset_from_min_ts + time_qtm_ms) // time_qtm_ms) * time_qtm_ms
+ qtm_index = offset_from_min_ts // time_qtm_ms
# for each quantum that overlaps this histogram record's time interval
while qtm_start_ms < time_msec_end: # while quantum overlaps record
+ # some histogram logs may be longer than others
+
+ if len(aligned_intervals) <= qtm_index:
+ break
+
# calculate fraction of time that this quantum
# overlaps histogram record's time interval
parser.add_argument("--time-quantum", dest="time_quantum",
default="1", type=int,
help="time quantum in seconds (default=1)")
+ parser.add_argument("--log-hist-msec", dest="log_hist_msec",
+ type=int, default=None,
+ help="log_hist_msec value in fio job file")
parser.add_argument("--output-unit", dest="output_unit",
default="usec", type=str,
help="Latency percentile output unit: msec|usec|nsec (default usec)")
buckets_per_interval = buckets_per_group * args.bucket_groups
print('buckets per interval = %d ' % buckets_per_interval)
bucket_index_range = range(0, buckets_per_interval)
+ if args.log_hist_msec != None:
+ print('log_hist_msec = %d' % args.log_hist_msec)
if args.time_quantum == 0:
print('ERROR: time-quantum must be a positive number of seconds')
print('output unit = ' + args.output_unit)
if args.output_unit == 'msec':
- time_divisor = 1000.0
+ time_divisor = float(msec_per_sec)
elif args.output_unit == 'usec':
time_divisor = 1.0
- # calculate response time interval associated with each histogram bucket
-
- bucket_times = time_ranges(args.bucket_groups, buckets_per_group, fio_version=args.fio_version)
-
# construct template for each histogram bucket array with buckets all zeroes
# we just copy this for each new histogram
zeroed_buckets = [ 0.0 for r in bucket_index_range ]
- # print CSV header just like fiologparser_hist does
+ # calculate response time interval associated with each histogram bucket
- header = 'msec, '
- for p in args.pctiles_wanted:
- header += '%3.1f, ' % p
- print('time (millisec), percentiles in increasing order with values in ' + args.output_unit)
- print(header)
+ bucket_times = time_ranges(args.bucket_groups, buckets_per_group, fio_version=args.fio_version)
# parse the histogram logs
# assumption: each bucket has a monotonically increasing time
# (exception: if randrw workload, then there is a read and a write
# record for the same time interval)
- max_timestamp_all_logs = 0
+ test_start_time = 0
+ test_end_time = 1.0e18
hist_files = {}
for fn in args.file_list:
try:
- (hist_files[fn], max_timestamp_ms) = parse_hist_file(fn, buckets_per_interval)
+ (hist_files[fn], log_start_time, log_end_time) = parse_hist_file(fn, buckets_per_interval, args.log_hist_msec)
except FioHistoLogExc as e:
myabort(str(e))
- max_timestamp_all_logs = max(max_timestamp_all_logs, max_timestamp_ms)
-
- (end_time, time_interval_count) = get_time_intervals(args.time_quantum, max_timestamp_all_logs)
+ # we consider the test started when all threads have started logging
+ test_start_time = max(test_start_time, log_start_time)
+ # we consider the test over when one of the logs has ended
+ test_end_time = min(test_end_time, log_end_time)
+
+ if test_start_time >= test_end_time:
+ raise FioHistoLogExc('no time interval when all threads logs overlapped')
+ if test_start_time > 0:
+ print('all threads running as of unix epoch time %d = %s' % (
+ test_start_time/float(msec_per_sec),
+ time.ctime(test_start_time/1000.0)))
+
+ (end_time, time_interval_count) = get_time_intervals(args.time_quantum, test_start_time, test_end_time)
all_threads_histograms = [ ((j*args.time_quantum*msec_per_sec), deepcopy(zeroed_buckets))
- for j in range(0, time_interval_count) ]
+ for j in range(0, time_interval_count) ]
for logfn in hist_files.keys():
aligned_per_thread = align_histo_log(hist_files[logfn],
args.time_quantum,
buckets_per_interval,
- max_timestamp_all_logs)
+ test_start_time,
+ test_end_time)
for t in range(0, time_interval_count):
(_, all_threads_histo_t) = all_threads_histograms[t]
(_, log_histo_t) = aligned_per_thread[t]
add_to_histo_from( all_threads_histo_t, log_histo_t )
# calculate percentiles across aggregate histogram for all threads
+ # print CSV header just like fiologparser_hist does
+
+ header = 'msec-since-start, '
+ for p in args.pctiles_wanted:
+ header += '%3.1f, ' % p
+ print('time (millisec), percentiles in increasing order with values in ' + args.output_unit)
+ print(header)
for (t_msec, all_threads_histo_t) in all_threads_histograms:
- record = '%d, ' % t_msec
+ record = '%8d, ' % t_msec
pct = get_pctiles(all_threads_histo_t, args.pctiles_wanted, bucket_times)
if not pct:
for w in args.pctiles_wanted:
#end of MAIN PROGRAM
-
##### below are unit tests ##############
-import tempfile, shutil
-from os.path import join
-should_not_get_here = False
+if unittest2_imported:
+ import tempfile, shutil
+ from os.path import join
+ should_not_get_here = False
-class Test(unittest2.TestCase):
+ class Test(unittest2.TestCase):
tempdir = None
# a little less typing please
with open(self.fn, 'w') as f:
f.write('1234, 0, 4096, 1, 2, 3, 4\n')
f.write('5678,1,16384,5,6,7,8 \n')
- (raw_histo_log, max_timestamp) = parse_hist_file(self.fn, 4) # 4 buckets per interval
- self.A(len(raw_histo_log) == 2 and max_timestamp == 5678)
+ (raw_histo_log, min_timestamp, max_timestamp) = parse_hist_file(self.fn, 4, None) # 4 buckets per interval
+ # if not log_unix_epoch=1, then min_timestamp will always be set to zero
+ self.A(len(raw_histo_log) == 2 and min_timestamp == 0 and max_timestamp == 5678)
(time_ms, direction, bsz, histo) = raw_histo_log[0]
self.A(time_ms == 1234 and direction == 0 and bsz == 4096 and histo == [ 1, 2, 3, 4 ])
(time_ms, direction, bsz, histo) = raw_histo_log[1]
with open(self.fn, 'w') as f:
pass
try:
- (raw_histo_log, max_timestamp_ms) = parse_hist_file(self.fn, 4)
+ (raw_histo_log, _, _) = parse_hist_file(self.fn, 4, None)
self.A(should_not_get_here)
except FioHistoLogExc as e:
self.A(str(e).startswith('no records'))
f.write('1234, 0, 4096, 1, 2, 3, 4\n')
f.write('5678,1,16384,5,6,7,8 \n')
f.write('\n')
- (raw_histo_log, max_timestamp_ms) = parse_hist_file(self.fn, 4)
+ (raw_histo_log, _, max_timestamp_ms) = parse_hist_file(self.fn, 4, None)
self.A(len(raw_histo_log) == 2 and max_timestamp_ms == 5678)
(time_ms, direction, bsz, histo) = raw_histo_log[0]
self.A(time_ms == 1234 and direction == 0 and bsz == 4096 and histo == [ 1, 2, 3, 4 ])
with open(self.fn, 'w') as f:
f.write('12, 0, 4096, 1a, 2, 3, 4\n')
try:
- (raw_histo_log, _) = parse_hist_file(self.fn, 4)
+ (raw_histo_log, _, _) = parse_hist_file(self.fn, 4, None)
self.A(False)
except FioHistoLogExc as e:
self.A(str(e).startswith('non-integer'))
with open(self.fn, 'w') as f:
f.write('-12, 0, 4096, 1, 2, 3, 4\n')
try:
- (raw_histo_log, _) = parse_hist_file(self.fn, 4)
+ (raw_histo_log, _, _) = parse_hist_file(self.fn, 4, None)
self.A(False)
except FioHistoLogExc as e:
self.A(str(e).startswith('negative integer'))
with open(self.fn, 'w') as f:
f.write('0, 0\n')
try:
- (raw_histo_log, _) = parse_hist_file(self.fn, 4)
+ (raw_histo_log, _, _) = parse_hist_file(self.fn, 4, None)
self.A(False)
except FioHistoLogExc as e:
self.A(str(e).startswith('too few numbers'))
with open(self.fn, 'w') as f:
f.write('100, 2, 4096, 1, 2, 3, 4\n')
try:
- (raw_histo_log, _) = parse_hist_file(self.fn, 4)
+ (raw_histo_log, _, _) = parse_hist_file(self.fn, 4, None)
self.A(False)
except FioHistoLogExc as e:
self.A(str(e).startswith('invalid I/O direction'))
def test_b8_parse_bsz_too_big(self):
with open(self.fn+'_good', 'w') as f:
f.write('100, 1, %d, 1, 2, 3, 4\n' % (1<<24))
- (raw_histo_log, max_timestamp_ms) = parse_hist_file(self.fn+'_good', 4)
+ (raw_histo_log, _, _) = parse_hist_file(self.fn+'_good', 4, None)
with open(self.fn+'_bad', 'w') as f:
f.write('100, 1, 20000000, 1, 2, 3, 4\n')
try:
- (raw_histo_log, _) = parse_hist_file(self.fn+'_bad', 4)
+ (raw_histo_log, _, _) = parse_hist_file(self.fn+'_bad', 4, None)
self.A(False)
except FioHistoLogExc as e:
self.A(str(e).startswith('block size too large'))
with open(self.fn, 'w') as f:
f.write('100, 1, %d, 1, 2, 3, 4, 5\n' % (1<<24))
try:
- (raw_histo_log, _) = parse_hist_file(self.fn, 4)
+ (raw_histo_log, _, _) = parse_hist_file(self.fn, 4, None)
self.A(False)
except FioHistoLogExc as e:
self.A(str(e).__contains__('buckets per interval'))
def test_d1_align_histo_log_1_quantum(self):
with open(self.fn, 'w') as f:
f.write('100, 1, 4096, 1, 2, 3, 4')
- (raw_histo_log, max_timestamp_ms) = parse_hist_file(self.fn, 4)
- self.A(max_timestamp_ms == 100)
- aligned_log = align_histo_log(raw_histo_log, 5, 4, max_timestamp_ms)
+ (raw_histo_log, min_timestamp_ms, max_timestamp_ms) = parse_hist_file(self.fn, 4, None)
+ self.A(min_timestamp_ms == 0 and max_timestamp_ms == 100)
+ aligned_log = align_histo_log(raw_histo_log, 5, 4, min_timestamp_ms, max_timestamp_ms)
self.A(len(aligned_log) == 1)
(time_ms0, h) = aligned_log[0]
- self.A(time_ms0 == 0 and h == [1.0, 2.0, 3.0, 4.0])
+ self.A(time_ms0 == 0 and h == [1., 2., 3., 4.])
+
+ # handle case with log_unix_epoch=1 timestamps, 1-second time quantum
+ # here both records will be separated into 2 aligned intervals
+
+ def test_d1a_align_2rec_histo_log_epoch_1_quantum_1sec(self):
+ with open(self.fn, 'w') as f:
+ f.write('1536504002123, 1, 4096, 1, 2, 3, 4\n')
+ f.write('1536504003123, 1, 4096, 4, 3, 2, 1\n')
+ (raw_histo_log, min_timestamp_ms, max_timestamp_ms) = parse_hist_file(self.fn, 4, None)
+ self.A(min_timestamp_ms == 1536504001123 and max_timestamp_ms == 1536504003123)
+ aligned_log = align_histo_log(raw_histo_log, 1, 4, min_timestamp_ms, max_timestamp_ms)
+ self.A(len(aligned_log) == 3)
+ (time_ms0, h) = aligned_log[0]
+ self.A(time_ms0 == 1536504001123 and h == [0., 0., 0., 0.])
+ (time_ms1, h) = aligned_log[1]
+ self.A(time_ms1 == 1536504002123 and h == [1., 2., 3., 4.])
+ (time_ms2, h) = aligned_log[2]
+ self.A(time_ms2 == 1536504003123 and h == [4., 3., 2., 1.])
+
+ # handle case with log_unix_epoch=1 timestamps, 5-second time quantum
+ # here both records will be merged into a single aligned time interval
+
+ def test_d1b_align_2rec_histo_log_epoch_1_quantum_5sec(self):
+ with open(self.fn, 'w') as f:
+ f.write('1536504002123, 1, 4096, 1, 2, 3, 4\n')
+ f.write('1536504003123, 1, 4096, 4, 3, 2, 1\n')
+ (raw_histo_log, min_timestamp_ms, max_timestamp_ms) = parse_hist_file(self.fn, 4, None)
+ self.A(min_timestamp_ms == 1536504001123 and max_timestamp_ms == 1536504003123)
+ aligned_log = align_histo_log(raw_histo_log, 5, 4, min_timestamp_ms, max_timestamp_ms)
+ self.A(len(aligned_log) == 1)
+ (time_ms0, h) = aligned_log[0]
+ self.A(time_ms0 == 1536504001123 and h == [5., 5., 5., 5.])
# we need this to compare 2 lists of floating point numbers for equality
# because of floating-point imprecision
with open(self.fn, 'w') as f:
f.write('2000, 1, 4096, 1, 2, 3, 4\n')
f.write('7000, 1, 4096, 1, 2, 3, 4\n')
- (raw_histo_log, max_timestamp_ms) = parse_hist_file(self.fn, 4)
- self.A(max_timestamp_ms == 7000)
+ (raw_histo_log, min_timestamp_ms, max_timestamp_ms) = parse_hist_file(self.fn, 4, None)
+ self.A(min_timestamp_ms == 0 and max_timestamp_ms == 7000)
(_, _, _, raw_buckets1) = raw_histo_log[0]
(_, _, _, raw_buckets2) = raw_histo_log[1]
- aligned_log = align_histo_log(raw_histo_log, 5, 4, max_timestamp_ms)
+ aligned_log = align_histo_log(raw_histo_log, 5, 4, min_timestamp_ms, max_timestamp_ms)
self.A(len(aligned_log) == 2)
(time_ms1, h1) = aligned_log[0]
(time_ms2, h2) = aligned_log[1]
with open(self.fn, 'w') as f:
buckets = [ 100 for j in range(0, 128) ]
f.write('9000, 1, 4096, %s\n' % ', '.join([str(b) for b in buckets]))
- (raw_histo_log, max_timestamp_ms) = parse_hist_file(self.fn, 128)
- self.A(max_timestamp_ms == 9000)
- aligned_log = align_histo_log(raw_histo_log, 5, 128, max_timestamp_ms)
+ (raw_histo_log, min_timestamp_ms, max_timestamp_ms) = parse_hist_file(self.fn, 128, None)
+ self.A(min_timestamp_ms == 0 and max_timestamp_ms == 9000)
+ aligned_log = align_histo_log(raw_histo_log, 5, 128, min_timestamp_ms, max_timestamp_ms)
time_intervals = time_ranges(4, 32)
# since buckets are all equal, then median is halfway through time_intervals
# and max latency interval is at end of time_intervals
# add one I/O request to last bucket
buckets[-1] = 1
f.write('9000, 1, 4096, %s\n' % ', '.join([str(b) for b in buckets]))
- (raw_histo_log, max_timestamp_ms) = parse_hist_file(self.fn, fio_v3_bucket_count)
- self.A(max_timestamp_ms == 9000)
- aligned_log = align_histo_log(raw_histo_log, 5, fio_v3_bucket_count, max_timestamp_ms)
+ (raw_histo_log, min_timestamp_ms, max_timestamp_ms) = parse_hist_file(self.fn, fio_v3_bucket_count, None)
+ self.A(min_timestamp_ms == 0 and max_timestamp_ms == 9000)
+ aligned_log = align_histo_log(raw_histo_log, 5, fio_v3_bucket_count, min_timestamp_ms, max_timestamp_ms)
(time_ms, histo) = aligned_log[1]
time_intervals = time_ranges(29, 64)
expected_pctiles = { 100.0:(64*(1<<28))/1000.0 }
if __name__ == '__main__':
if os.getenv('UNITTEST'):
- sys.exit(unittest2.main())
+ if unittest2_imported:
+ sys.exit(unittest2.main())
+ else:
+ raise Exception('you must install unittest2 module to run unit test')
else:
compute_percentiles_from_logs()
f->file_name);
return false;
}
- log_info("%s: rounded up offset from %lu to %lu\n",
- f->file_name, f->file_offset,
- new_offset);
+ log_info("%s: rounded up offset from %llu to %llu\n",
+ f->file_name, (unsigned long long) f->file_offset,
+ (unsigned long long) new_offset);
f->io_size -= (new_offset - f->file_offset);
f->file_offset = new_offset;
}
f->file_name);
return false;
}
- log_info("%s: rounded down io_size from %lu to %lu\n",
- f->file_name, f->io_size,
- new_end - f->file_offset);
+ log_info("%s: rounded down io_size from %llu to %llu\n",
+ f->file_name, (unsigned long long) f->io_size,
+ (unsigned long long) new_end - f->file_offset);
f->io_size = new_end - f->file_offset;
}
}
if (td->o.zone_size == 0) {
td->o.zone_size = zone_size;
} else if (td->o.zone_size != zone_size) {
- log_info("fio: %s job parameter zonesize %lld does not match disk zone size %ld.\n",
- f->file_name, td->o.zone_size, zone_size);
+ log_info("fio: %s job parameter zonesize %llu does not match disk zone size %llu.\n",
+ f->file_name, (unsigned long long) td->o.zone_size,
+ (unsigned long long) zone_size);
ret = -EINVAL;
goto close;
}
- dprint(FD_ZBD, "Device %s has %d zones of size %lu KB\n", f->file_name,
- nr_zones, zone_size / 1024);
+ dprint(FD_ZBD, "Device %s has %d zones of size %llu KB\n", f->file_name,
+ nr_zones, (unsigned long long) zone_size / 1024);
zbd_info = scalloc(1, sizeof(*zbd_info) +
(nr_zones + 1) * sizeof(zbd_info->zone_info[0]));
break;
ret = read_zone_info(fd, start_sector, buf, bufsz);
if (ret < 0) {
- log_info("fio: BLKREPORTZONE(%lu) failed for %s (%d).\n",
- start_sector, f->file_name, -ret);
+ log_info("fio: BLKREPORTZONE(%llu) failed for %s (%d).\n",
+ (unsigned long long) start_sector, f->file_name, -ret);
goto close;
}
}
return ret;
}
+static unsigned int zbd_zone_nr(struct zoned_block_device_info *zbd_info,
+ struct fio_zone_info *zone)
+{
+ return zone - zbd_info->zone_info;
+}
+
/**
* zbd_reset_zone - reset the write pointer of a single zone
* @td: FIO thread data.
static int zbd_reset_zone(struct thread_data *td, const struct fio_file *f,
struct fio_zone_info *z)
{
- int ret;
+ dprint(FD_ZBD, "%s: resetting wp of zone %u.\n", f->file_name,
+ zbd_zone_nr(f->zbd_info, z));
- dprint(FD_ZBD, "%s: resetting wp of zone %lu.\n", f->file_name,
- z - f->zbd_info->zone_info);
- ret = zbd_reset_range(td, f, z->start, (z+1)->start - z->start);
- return ret;
+ return zbd_reset_range(td, f, z->start, (z+1)->start - z->start);
}
/*
bool reset_wp;
int res = 0;
- dprint(FD_ZBD, "%s: examining zones %lu .. %lu\n", f->file_name,
- zb - f->zbd_info->zone_info, ze - f->zbd_info->zone_info);
+ dprint(FD_ZBD, "%s: examining zones %u .. %u\n", f->file_name,
+ zbd_zone_nr(f->zbd_info, zb), zbd_zone_nr(f->zbd_info, ze));
assert(f->fd != -1);
for (z = zb; z < ze; z++) {
pthread_mutex_lock(&z->mutex);
start_z = z;
} else if (start_z < ze && !reset_wp) {
dprint(FD_ZBD,
- "%s: resetting zones %lu .. %lu\n",
+ "%s: resetting zones %u .. %u\n",
f->file_name,
- start_z - f->zbd_info->zone_info,
- z - f->zbd_info->zone_info);
+ zbd_zone_nr(f->zbd_info, start_z),
+ zbd_zone_nr(f->zbd_info, z));
if (zbd_reset_range(td, f, start_z->start,
z->start - start_z->start) < 0)
res = 1;
default:
if (start_z == ze)
break;
- dprint(FD_ZBD, "%s: resetting zones %lu .. %lu\n",
- f->file_name, start_z - f->zbd_info->zone_info,
- z - f->zbd_info->zone_info);
+ dprint(FD_ZBD, "%s: resetting zones %u .. %u\n",
+ f->file_name, zbd_zone_nr(f->zbd_info, start_z),
+ zbd_zone_nr(f->zbd_info, z));
if (zbd_reset_range(td, f, start_z->start,
z->start - start_z->start) < 0)
res = 1;
}
}
if (start_z < ze) {
- dprint(FD_ZBD, "%s: resetting zones %lu .. %lu\n", f->file_name,
- start_z - f->zbd_info->zone_info,
- z - f->zbd_info->zone_info);
+ dprint(FD_ZBD, "%s: resetting zones %u .. %u\n", f->file_name,
+ zbd_zone_nr(f->zbd_info, start_z),
+ zbd_zone_nr(f->zbd_info, z));
if (zbd_reset_range(td, f, start_z->start,
z->start - start_z->start) < 0)
res = 1;
return write_cnt == 0;
}
-/* Check whether the value of zbd_info.sectors_with_data is correct. */
-static void check_swd(const struct thread_data *td, const struct fio_file *f)
+enum swd_action {
+ CHECK_SWD,
+ SET_SWD,
+};
+
+/* Calculate the number of sectors with data (swd) and perform action 'a' */
+static uint64_t zbd_process_swd(const struct fio_file *f, enum swd_action a)
{
-#if 0
struct fio_zone_info *zb, *ze, *z;
- uint64_t swd;
+ uint64_t swd = 0;
zb = &f->zbd_info->zone_info[zbd_zone_idx(f, f->file_offset)];
ze = &f->zbd_info->zone_info[zbd_zone_idx(f, f->file_offset +
f->io_size)];
- swd = 0;
for (z = zb; z < ze; z++) {
pthread_mutex_lock(&z->mutex);
swd += z->wp - z->start;
}
pthread_mutex_lock(&f->zbd_info->mutex);
- assert(f->zbd_info->sectors_with_data == swd);
+ switch (a) {
+ case CHECK_SWD:
+ assert(f->zbd_info->sectors_with_data == swd);
+ break;
+ case SET_SWD:
+ f->zbd_info->sectors_with_data = swd;
+ break;
+ }
pthread_mutex_unlock(&f->zbd_info->mutex);
for (z = zb; z < ze; z++)
pthread_mutex_unlock(&z->mutex);
-#endif
+
+ return swd;
+}
+
+/*
+ * The swd check is useful for debugging but takes too much time to leave
+ * it enabled all the time. Hence it is disabled by default.
+ */
+static const bool enable_check_swd = false;
+
+/* Check whether the value of zbd_info.sectors_with_data is correct. */
+static void zbd_check_swd(const struct fio_file *f)
+{
+ if (!enable_check_swd)
+ return;
+
+ zbd_process_swd(f, CHECK_SWD);
+}
+
+static void zbd_init_swd(struct fio_file *f)
+{
+ uint64_t swd;
+
+ swd = zbd_process_swd(f, SET_SWD);
+ dprint(FD_ZBD, "%s(%s): swd = %" PRIu64 "\n", __func__, f->file_name,
+ swd);
}
void zbd_file_reset(struct thread_data *td, struct fio_file *f)
{
- struct fio_zone_info *zb, *ze, *z;
+ struct fio_zone_info *zb, *ze;
uint32_t zone_idx_e;
- uint64_t swd = 0;
if (!f->zbd_info)
return;
zb = &f->zbd_info->zone_info[zbd_zone_idx(f, f->file_offset)];
zone_idx_e = zbd_zone_idx(f, f->file_offset + f->io_size);
ze = &f->zbd_info->zone_info[zone_idx_e];
- for (z = zb ; z < ze; z++) {
- pthread_mutex_lock(&z->mutex);
- swd += z->wp - z->start;
- }
- pthread_mutex_lock(&f->zbd_info->mutex);
- f->zbd_info->sectors_with_data = swd;
- pthread_mutex_unlock(&f->zbd_info->mutex);
- for (z = zb ; z < ze; z++)
- pthread_mutex_unlock(&z->mutex);
- dprint(FD_ZBD, "%s(%s): swd = %ld\n", __func__, f->file_name, swd);
+ zbd_init_swd(f);
/*
* If data verification is enabled reset the affected zones before
* writing any data to avoid that a zone reset has to be issued while
}
if (z->verify_block * min_bs >= f->zbd_info->zone_size)
- log_err("%s: %d * %d >= %ld\n", f->file_name, z->verify_block,
- min_bs, f->zbd_info->zone_size);
+ log_err("%s: %d * %d >= %llu\n", f->file_name, z->verify_block,
+ min_bs, (unsigned long long) f->zbd_info->zone_size);
io_u->offset = z->start + z->verify_block++ * min_bs;
return z;
}
}
unlock:
pthread_mutex_unlock(&z->mutex);
+
+ zbd_check_swd(io_u->file);
}
bool zbd_unaligned_write(int error_code)
io_u->ddir == DDIR_READ && td->o.read_beyond_wp)
return io_u_accept;
+ zbd_check_swd(f);
+
pthread_mutex_lock(&zb->mutex);
switch (io_u->ddir) {
case DDIR_READ:
}
/* Check whether the zone reset threshold has been exceeded */
if (td->o.zrf.u.f) {
- check_swd(td, f);
if (f->zbd_info->sectors_with_data >=
f->io_size * td->o.zrt.u.f &&
zbd_dec_and_reset_write_cnt(td, f)) {
zb->reset_zone = 0;
if (zbd_reset_zone(td, f, zb) < 0)
goto eof;
- check_swd(td, f);
}
/* Make writes occur at the write pointer */
assert(!zbd_zone_full(f, zb, min_bs));
{
char *res;
- if (asprintf(&res, "; %ld zone resets", ts->nr_zone_resets) < 0)
+ if (asprintf(&res, "; %llu zone resets", (unsigned long long) ts->nr_zone_resets) < 0)
return NULL;
return res;
}
/**
* struct fio_zone_info - information about a single ZBD zone
- * @start: zone start in 512 byte units
- * @wp: zone write pointer location in 512 byte units
+ * @start: zone start location (bytes)
+ * @wp: zone write pointer location (bytes)
* @verify_block: number of blocks that have been verified for this zone
* @mutex: protects the modifiable members in this structure
* @type: zone type (BLK_ZONE_TYPE_*)