**write**
Sequential writes.
**trim**
- Sequential trims (Linux block devices only).
+ Sequential trims (Linux block devices and SCSI
+ character devices only).
**randread**
Random reads.
**randwrite**
Random writes.
**randtrim**
- Random trims (Linux block devices only).
+ Random trims (Linux block devices and SCSI
+ character devices only).
**rw,readwrite**
Sequential mixed reads and writes.
**randrw**
ioctl, or if the target is an sg character device we use
:manpage:`read(2)` and :manpage:`write(2)` for asynchronous
I/O. Requires :option:`filename` option to specify either block or
- character devices.
+ character devices. This engine supports trim operations.
The sg engine includes engine specific options.
**null**
the force unit access (fua) flag. Default is 0.
.. option:: sg_write_mode=str : [sg]
+
Specify the type of write commands to issue. This option can take three values:
**write**
replay, the file needs to be turned into a blkparse binary data file first
(``blkparse <device> -o /dev/null -d file_for_fio.bin``).
+ .. option:: read_iolog_chunked=bool
+
+ Determines how iolog is read. If false(default) entire :option:`read_iolog`
+ will be read at once. If selected true, input from iolog will be read
+ gradually. Useful when iolog is very large, or it is generated.
+
.. option:: replay_no_stall=bool
When replaying I/O with :option:`read_iolog` the default behavior is to
*ret = -io_u->error;
clear_io_u(td, io_u);
} else if (io_u->resid) {
- int bytes = io_u->xfer_buflen - io_u->resid;
+ long long bytes = io_u->xfer_buflen - io_u->resid;
struct fio_file *f = io_u->file;
if (bytes_issued)
if (x1 < y2 && y1 < x2) {
overlap = true;
- dprint(FD_IO, "in-flight overlap: %llu/%lu, %llu/%lu\n",
+ dprint(FD_IO, "in-flight overlap: %llu/%llu, %llu/%llu\n",
x1, io_u->buflen,
y1, check_io_u->buflen);
break;
* Break if we exceeded the bytes. The exception is time
* based runs, but we still need to break out of the loop
* for those to run verification, if enabled.
+ * Jobs read from iolog do not use this stop condition.
*/
if (bytes_issued >= total_bytes &&
+ !td->o.read_iolog_file &&
(!td->o.time_based ||
(td->o.time_based && td->o.verify != VERIFY_NONE)))
break;
log_io_piece(td, io_u);
if (td->o.io_submit_mode == IO_MODE_OFFLOAD) {
- const unsigned long blen = io_u->xfer_buflen;
+ const unsigned long long blen = io_u->xfer_buflen;
const enum fio_ddir __ddir = acct_ddir(io_u);
if (td->error)
static int init_io_u(struct thread_data *td)
{
struct io_u *io_u;
- unsigned int max_bs, min_write;
+ unsigned long long max_bs, min_write;
int cl_align, i, max_units;
int data_xfer = 1, err;
char *p;
td->orig_buffer_size += page_mask + td->o.mem_align;
if (td->o.mem_type == MEM_SHMHUGE || td->o.mem_type == MEM_MMAPHUGE) {
- unsigned long bs;
+ unsigned long long bs;
bs = td->orig_buffer_size + td->o.hugepage_size - 1;
td->orig_buffer_size = bs & ~(td->o.hugepage_size - 1);
*/
if (o->write_iolog_file)
write_iolog_close(td);
+ if (td->io_log_rfile)
+ fclose(td->io_log_rfile);
td_set_runstate(td, TD_EXITED);
Sequential writes.
.TP
.B trim
-Sequential trims (Linux block devices only).
+Sequential trims (Linux block devices and SCSI character devices only).
.TP
.B randread
Random reads.
Random writes.
.TP
.B randtrim
-Random trims (Linux block devices only).
+Random trims (Linux block devices and SCSI character devices only).
.TP
.B rw,readwrite
Sequential mixed reads and writes.
ioctl, or if the target is an sg character device we use
\fBread\fR\|(2) and \fBwrite\fR\|(2) for asynchronous
I/O. Requires \fBfilename\fR option to specify either block or
-character devices. The sg engine includes engine specific options.
+character devices. This engine supports trim operations. The
+sg engine includes engine specific options.
.TP
.B null
Doesn't transfer any data, just pretends to. This is mainly used to
replay, the file needs to be turned into a blkparse binary data file first
(`blkparse <device> \-o /dev/null \-d file_for_fio.bin').
.TP
+ .BI read_iolog_chunked \fR=\fPbool
+ Determines how iolog is read. If false (default) entire \fBread_iolog\fR will
+ be read at once. If selected true, input from iolog will be read gradually.
+ Useful when iolog is very large, or it is generated.
+ .TP
.BI replay_no_stall \fR=\fPbool
When replaying I/O with \fBread_iolog\fR the default behavior is to
attempt to respect the timestamps within the log and replay them with the
* For IO replaying
*/
struct flist_head io_log_list;
+ FILE *io_log_rfile;
+ unsigned int io_log_current;
+ unsigned int io_log_checkmark;
+ unsigned int io_log_highmark;
+ struct timespec io_log_highmark_time;
/*
* For tracking/handling discards
return ddir_rw_sum(td->bytes_done) != 0;
}
-static inline unsigned int td_max_bs(struct thread_data *td)
+static inline unsigned long long td_max_bs(struct thread_data *td)
{
- unsigned int max_bs;
+ unsigned long long max_bs;
max_bs = max(td->o.max_bs[DDIR_READ], td->o.max_bs[DDIR_WRITE]);
return max(td->o.max_bs[DDIR_TRIM], max_bs);
}
-static inline unsigned int td_min_bs(struct thread_data *td)
+static inline unsigned long long td_min_bs(struct thread_data *td)
{
- unsigned int min_bs;
+ unsigned long long min_bs;
min_bs = min(td->o.min_bs[DDIR_READ], td->o.min_bs[DDIR_WRITE]);
return min(td->o.min_bs[DDIR_TRIM], min_bs);
#include "blktrace.h"
#include "pshared.h"
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <arpa/inet.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+
static int iolog_flush(struct io_log *log);
static const char iolog_ver2[] = "fio version 2 iolog";
if (!td->o.write_iolog_file)
return;
- fprintf(td->iolog_f, "%s %s %llu %lu\n", io_u->file->file_name,
+ fprintf(td->iolog_f, "%s %s %llu %llu\n", io_u->file->file_name,
io_ddir_name(io_u->ddir),
io_u->offset, io_u->buflen);
}
return 1;
}
+ static bool read_iolog2(struct thread_data *td);
+
int read_iolog_get(struct thread_data *td, struct io_u *io_u)
{
struct io_piece *ipo;
while (!flist_empty(&td->io_log_list)) {
int ret;
-
+ if (td->o.read_iolog_chunked) {
+ if (td->io_log_checkmark == td->io_log_current) {
+ if (!read_iolog2(td))
+ return 1;
+ }
+ td->io_log_current--;
+ }
ipo = flist_first_entry(&td->io_log_list, struct io_piece, list);
flist_del(&ipo->list);
remove_trim_entry(td, ipo);
io_u->buflen = ipo->len;
io_u->file = td->files[ipo->fileno];
get_file(io_u->file);
- dprint(FD_IO, "iolog: get %llu/%lu/%s\n", io_u->offset,
+ dprint(FD_IO, "iolog: get %llu/%llu/%s\n", io_u->offset,
io_u->buflen, io_u->file->file_name);
if (ipo->delay)
iolog_delay(td, ipo->delay);
* Read version 2 iolog data. It is enhanced to include per-file logging,
* syncs, etc.
*/
- static bool read_iolog2(struct thread_data *td, FILE *f)
+ static bool read_iolog2(struct thread_data *td)
{
unsigned long long offset;
unsigned int bytes;
char *rfname, *fname, *act;
char *str, *p;
enum fio_ddir rw;
+ int64_t items_to_fetch = 0;
- free_release_files(td);
-
+ if (td->o.read_iolog_chunked) {
+ if (td->io_log_highmark == 0) {
+ items_to_fetch = 10;
+ } else {
+ struct timespec now;
+ uint64_t elapsed;
+ uint64_t for_1s;
+ fio_gettime(&now, NULL);
+ elapsed = ntime_since(&td->io_log_highmark_time, &now);
+ for_1s = (td->io_log_highmark - td->io_log_current) * 1000000000 / elapsed;
+ items_to_fetch = for_1s - td->io_log_current;
+ if (items_to_fetch < 0)
+ items_to_fetch = 0;
+ td->io_log_highmark = td->io_log_current + items_to_fetch;
+ td->io_log_checkmark = (td->io_log_highmark + 1) / 2;
+ fio_gettime(&td->io_log_highmark_time, NULL);
+ if (items_to_fetch == 0)
+ return true;
+ }
+ }
/*
* Read in the read iolog and store it, reuse the infrastructure
* for doing verifications.
act = malloc(256+16);
reads = writes = waits = 0;
- while ((p = fgets(str, 4096, f)) != NULL) {
+ while ((p = fgets(str, 4096, td->io_log_rfile)) != NULL) {
struct io_piece *ipo;
int r;
}
queue_io_piece(td, ipo);
+
+ if (td->o.read_iolog_chunked) {
+ td->io_log_current++;
+ items_to_fetch--;
+ if (items_to_fetch == 0)
+ break;
+ }
}
free(str);
free(act);
free(rfname);
+ if (td->o.read_iolog_chunked) {
+ td->io_log_highmark = td->io_log_current;
+ td->io_log_checkmark = (td->io_log_highmark + 1) / 2;
+ fio_gettime(&td->io_log_highmark_time, NULL);
+ }
+
if (writes && read_only) {
log_err("fio: <%s> skips replay of %d writes due to"
" read-only\n", td->o.name, writes);
writes = 0;
}
+ if (td->o.read_iolog_chunked) {
+ if (td->io_log_current == 0) {
+ return false;
+ }
+ td->o.td_ddir = TD_DDIR_RW;
+ return true;
+ }
+
if (!reads && !writes && !waits)
return false;
else if (reads && !writes)
return true;
}
+static bool is_socket(const char *path)
+{
+ struct stat buf;
+ int r = stat(path, &buf);
+ if (r == -1)
+ return false;
+
+ return S_ISSOCK(buf.st_mode);
+}
+
+static int open_socket(const char *path)
+{
+ int fd = socket(AF_UNIX, SOCK_STREAM, 0);
+ struct sockaddr_un addr;
+ if (fd < 0)
+ return fd;
+ addr.sun_family = AF_UNIX;
+ strncpy(addr.sun_path, path, sizeof(addr.sun_path));
+ if (connect(fd, (const struct sockaddr *)&addr, strlen(path) + sizeof(addr.sun_family)) == 0)
+ return fd;
+ else
+ close(fd);
+ return -1;
+}
+
/*
* open iolog, check version, and call appropriate parser
*/
static bool init_iolog_read(struct thread_data *td)
{
char buffer[256], *p;
- FILE *f;
+ FILE *f = NULL;
bool ret;
-
- f = fopen(td->o.read_iolog_file, "r");
+ if (is_socket(td->o.read_iolog_file)) {
+ int fd = open_socket(td->o.read_iolog_file);
+ if (fd >= 0) {
+ f = fdopen(fd, "r");
+ }
+ } else
+ f = fopen(td->o.read_iolog_file, "r");
if (!f) {
perror("fopen read iolog");
return false;
fclose(f);
return false;
}
-
+ td->io_log_rfile = f;
/*
* version 2 of the iolog stores a specific string as the
* first line, check for that
*/
- if (!strncmp(iolog_ver2, buffer, strlen(iolog_ver2)))
- ret = read_iolog2(td, f);
+ if (!strncmp(iolog_ver2, buffer, strlen(iolog_ver2))) {
+ free_release_files(td);
+ ret = read_iolog2(td);
+ }
else {
log_err("fio: iolog version 1 is no longer supported\n");
ret = false;
}
- fclose(f);
return ret;
}
entry_before = flist_first_entry(&entry->list, struct io_u_plat_entry, list);
io_u_plat_before = entry_before->io_u_plat;
- fprintf(f, "%lu, %u, %u, ", (unsigned long) s->time,
- io_sample_ddir(s), s->bs);
+ fprintf(f, "%lu, %u, %llu, ", (unsigned long) s->time,
+ io_sample_ddir(s), (unsigned long long) s->bs);
for (j = 0; j < FIO_IO_U_PLAT_NR - stride; j += stride) {
fprintf(f, "%llu, ", (unsigned long long)
hist_sum(j, stride, io_u_plat, io_u_plat_before));
s = __get_sample(samples, log_offset, i);
if (!log_offset) {
- fprintf(f, "%lu, %" PRId64 ", %u, %u\n",
+ fprintf(f, "%lu, %" PRId64 ", %u, %llu\n",
(unsigned long) s->time,
s->data.val,
- io_sample_ddir(s), s->bs);
+ io_sample_ddir(s), (unsigned long long) s->bs);
} else {
struct io_sample_offset *so = (void *) s;
- fprintf(f, "%lu, %" PRId64 ", %u, %u, %llu\n",
+ fprintf(f, "%lu, %" PRId64 ", %u, %llu, %llu\n",
(unsigned long) s->time,
s->data.val,
- io_sample_ddir(s), s->bs,
+ io_sample_ddir(s), (unsigned long long) s->bs,
(unsigned long long) so->offset);
}
}
struct split {
unsigned int nr;
- unsigned int val1[ZONESPLIT_MAX];
+ unsigned long long val1[ZONESPLIT_MAX];
unsigned long long val2[ZONESPLIT_MAX];
};
bool data)
{
unsigned int i, perc, perc_missing;
- unsigned int max_bs, min_bs;
+ unsigned long long max_bs, min_bs;
struct split split;
memset(&split, 0, sizeof(split));
.name = "bs",
.lname = "Block size",
.alias = "blocksize",
- .type = FIO_OPT_INT,
+ .type = FIO_OPT_ULL,
.off1 = offsetof(struct thread_options, bs[DDIR_READ]),
.off2 = offsetof(struct thread_options, bs[DDIR_WRITE]),
.off3 = offsetof(struct thread_options, bs[DDIR_TRIM]),
.name = "ba",
.lname = "Block size align",
.alias = "blockalign",
- .type = FIO_OPT_INT,
+ .type = FIO_OPT_ULL,
.off1 = offsetof(struct thread_options, ba[DDIR_READ]),
.off2 = offsetof(struct thread_options, ba[DDIR_WRITE]),
.off3 = offsetof(struct thread_options, ba[DDIR_TRIM]),
{
.name = "bssplit",
.lname = "Block size split",
- .type = FIO_OPT_STR,
+ .type = FIO_OPT_STR_ULL,
.cb = str_bssplit_cb,
.off1 = offsetof(struct thread_options, bssplit),
.help = "Set a specific mix of block sizes",
.category = FIO_OPT_C_IO,
.group = FIO_OPT_G_IOLOG,
},
+ {
+ .name = "read_iolog_chunked",
+ .lname = "Read I/O log in parts",
+ .type = FIO_OPT_BOOL,
+ .off1 = offsetof(struct thread_options, read_iolog_chunked),
+ .def = "0",
+ .parent = "read_iolog",
+ .help = "Parse IO pattern in chunks",
+ .category = FIO_OPT_C_IO,
+ .group = FIO_OPT_G_IOLOG,
+ },
{
.name = "replay_no_stall",
.lname = "Don't stall on replay",
#define ZONESPLIT_MAX 256
struct bssplit {
- uint32_t bs;
+ uint64_t bs;
uint32_t perc;
};
unsigned long long start_offset;
unsigned long long start_offset_align;
- unsigned int bs[DDIR_RWDIR_CNT];
- unsigned int ba[DDIR_RWDIR_CNT];
- unsigned int min_bs[DDIR_RWDIR_CNT];
- unsigned int max_bs[DDIR_RWDIR_CNT];
+ unsigned long long bs[DDIR_RWDIR_CNT];
+ unsigned long long ba[DDIR_RWDIR_CNT];
+ unsigned long long min_bs[DDIR_RWDIR_CNT];
+ unsigned long long max_bs[DDIR_RWDIR_CNT];
struct bssplit *bssplit[DDIR_RWDIR_CNT];
unsigned int bssplit_nr[DDIR_RWDIR_CNT];
unsigned int perc_rand[DDIR_RWDIR_CNT];
unsigned int hugepage_size;
- unsigned int rw_min_bs;
+ unsigned long long rw_min_bs;
+ unsigned int pad2;
unsigned int thinktime;
unsigned int thinktime_spin;
unsigned int thinktime_blocks;
fio_fp64_t percentile_list[FIO_IO_U_LIST_MAX_LEN];
char *read_iolog_file;
+ bool read_iolog_chunked;
char *write_iolog_file;
unsigned int write_bw_log;
uint64_t start_offset;
uint64_t start_offset_align;
- uint32_t bs[DDIR_RWDIR_CNT];
- uint32_t ba[DDIR_RWDIR_CNT];
- uint32_t min_bs[DDIR_RWDIR_CNT];
- uint32_t max_bs[DDIR_RWDIR_CNT];
+ uint64_t bs[DDIR_RWDIR_CNT];
+ uint64_t ba[DDIR_RWDIR_CNT];
+ uint64_t min_bs[DDIR_RWDIR_CNT];
+ uint64_t max_bs[DDIR_RWDIR_CNT];
struct bssplit bssplit[DDIR_RWDIR_CNT][BSSPLIT_MAX];
uint32_t bssplit_nr[DDIR_RWDIR_CNT];
uint32_t perc_rand[DDIR_RWDIR_CNT];
uint32_t hugepage_size;
- uint32_t rw_min_bs;
+ uint64_t rw_min_bs;
+ uint32_t pad2;
uint32_t thinktime;
uint32_t thinktime_spin;
uint32_t thinktime_blocks;