summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xFIO-VERSION-GEN2
-rw-r--r--HOWTO83
-rw-r--r--Makefile29
-rw-r--r--README10
-rw-r--r--cconv.c9
-rw-r--r--client.c14
-rwxr-xr-xconfigure177
-rw-r--r--diskutil.c9
-rw-r--r--engines/glusterfs.c8
-rw-r--r--engines/glusterfs_async.c5
-rw-r--r--engines/glusterfs_sync.c8
-rw-r--r--engines/http.c2
-rw-r--r--engines/io_uring.c157
-rw-r--r--engines/libaio.c61
-rw-r--r--engines/libiscsi.c411
-rw-r--r--engines/mmap.c54
-rw-r--r--engines/nbd.c359
-rw-r--r--engines/net.c6
-rw-r--r--engines/posixaio.c1
-rw-r--r--engines/rbd.c2
-rw-r--r--engines/sg.c4
-rw-r--r--engines/splice.c7
-rw-r--r--eta.c4
-rw-r--r--examples/libiscsi.fio3
-rw-r--r--examples/nbd.fio35
-rw-r--r--exp/expression-parser.y6
-rw-r--r--filesetup.c132
-rw-r--r--fio.1103
-rw-r--r--fio.h12
-rw-r--r--gclient.c4
-rw-r--r--init.c28
-rw-r--r--io_u.c6
-rw-r--r--ioengines.c11
-rw-r--r--ioengines.h2
-rw-r--r--lib/lfsr.c6
-rw-r--r--lib/lfsr.h4
-rw-r--r--lib/rand.c20
-rw-r--r--lib/rand.h8
-rw-r--r--optgroup.c4
-rw-r--r--optgroup.h8
-rw-r--r--options.c23
-rw-r--r--os/linux/io_uring.h11
-rw-r--r--os/os-dragonfly.h2
-rw-r--r--os/os-linux.h2
-rw-r--r--os/os-mac.h8
-rw-r--r--os/os-netbsd.h2
-rw-r--r--os/os-openbsd.h2
-rw-r--r--os/os-solaris.h2
-rw-r--r--os/os-windows.h4
-rw-r--r--os/os.h4
-rwxr-xr-xos/windows/install.wxs34
-rw-r--r--parse.c6
-rw-r--r--parse.h4
-rw-r--r--server.c23
-rw-r--r--server.h2
-rw-r--r--smalloc.c128
-rw-r--r--smalloc.h1
-rw-r--r--stat.c22
-rw-r--r--stat.h57
-rw-r--r--t/io_uring.c30
-rw-r--r--t/stest.c21
-rwxr-xr-xt/zbd/run-tests-against-zoned-nullb2
-rwxr-xr-xt/zbd/test-zbd-support27
-rw-r--r--thread_options.h3
-rwxr-xr-xtools/plot/fio2gnuplot234
-rw-r--r--verify.c17
-rw-r--r--verify.h2
-rw-r--r--zbd.c135
-rw-r--r--zbd.h7
69 files changed, 2100 insertions, 529 deletions
diff --git a/FIO-VERSION-GEN b/FIO-VERSION-GEN
index 37fb1a7a..d5cec22e 100755
--- a/FIO-VERSION-GEN
+++ b/FIO-VERSION-GEN
@@ -1,7 +1,7 @@
#!/bin/sh
GVF=FIO-VERSION-FILE
-DEF_VER=fio-3.13
+DEF_VER=fio-3.16
LF='
'
diff --git a/HOWTO b/HOWTO
index 468772d7..96a047de 100644
--- a/HOWTO
+++ b/HOWTO
@@ -93,6 +93,12 @@ Command line options
Dump info related to I/O rate switching.
*compress*
Dump info related to log compress/decompress.
+ *steadystate*
+ Dump info related to steadystate detection.
+ *helperthread*
+ Dump info related to the helper thread.
+ *zbd*
+ Dump info related to support for zoned block devices.
*?* or *help*
Show available debug options.
@@ -216,8 +222,8 @@ Command line options
.. option:: --alloc-size=kb
- Set the internal smalloc pool size to `kb` in KiB. The
- ``--alloc-size`` switch allows one to use a larger pool size for smalloc.
+ Allocate additional internal smalloc pools of size `kb` in KiB. The
+ ``--alloc-size`` option increases shared memory set aside for use by fio.
If running large jobs with randommap enabled, fio can run out of memory.
Smalloc is an internal allocator for shared structures from a fixed size
memory pool and can grow to 16 pools. The pool size defaults to 16MiB.
@@ -1246,7 +1252,9 @@ I/O type
is incremented for each sub-job (i.e. when :option:`numjobs` option is
specified). This option is useful if there are several jobs which are
intended to operate on a file in parallel disjoint segments, with even
- spacing between the starting points.
+ spacing between the starting points. Percentages can be used for this option.
+ If a percentage is given, the generated offset will be aligned to the minimum
+ ``blocksize`` or to the value of ``offset_align`` if provided.
.. option:: number_ios=int
@@ -1271,7 +1279,7 @@ I/O type
.. option:: fdatasync=int
Like :option:`fsync` but uses :manpage:`fdatasync(2)` to only sync data and
- not metadata blocks. In Windows, FreeBSD, and DragonFlyBSD there is no
+ not metadata blocks. In Windows, FreeBSD, DragonFlyBSD or OSX there is no
:manpage:`fdatasync(2)` so this falls back to using :manpage:`fsync(2)`.
Defaults to 0, which means fio does not periodically issue and wait for a
data-only sync to complete.
@@ -1805,6 +1813,11 @@ I/O engine
**pvsync2**
Basic :manpage:`preadv2(2)` or :manpage:`pwritev2(2)` I/O.
+ **io_uring**
+ Fast Linux native asynchronous I/O. Supports async IO
+ for both direct and buffered IO.
+ This engine defines engine specific options.
+
**libaio**
Linux native asynchronous I/O. Note that Linux may only support
queued behavior with non-buffered I/O (set ``direct=1`` or
@@ -1991,6 +2004,10 @@ I/O engine
Asynchronous read and write using DDN's Infinite Memory Engine (IME).
This engine will try to stack as much IOs as possible by creating
requests for IME. FIO will then decide when to commit these requests.
+ **libiscsi**
+ Read and write iscsi lun with libiscsi.
+ **nbd**
+ Read and write a Network Block Device (NBD).
I/O engine specific parameters
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -2000,6 +2017,41 @@ In addition, there are some parameters which are only valid when a specific
with the caveat that when used on the command line, they must come after the
:option:`ioengine` that defines them is selected.
+.. option:: hipri : [io_uring]
+
+ If this option is set, fio will attempt to use polled IO completions.
+ Normal IO completions generate interrupts to signal the completion of
+ IO, polled completions do not. Hence they are require active reaping
+ by the application. The benefits are more efficient IO for high IOPS
+ scenarios, and lower latencies for low queue depth IO.
+
+.. option:: fixedbufs : [io_uring]
+
+ If fio is asked to do direct IO, then Linux will map pages for each
+ IO call, and release them when IO is done. If this option is set, the
+ pages are pre-mapped before IO is started. This eliminates the need to
+ map and release for each IO. This is more efficient, and reduces the
+ IO latency as well.
+
+.. option:: registerfiles : [io_uring]
+ With this option, fio registers the set of files being used with the
+ kernel. This avoids the overhead of managing file counts in the kernel,
+ making the submission and completion part more lightweight. Required
+ for the below :option:`sqthread_poll` option.
+
+.. option:: sqthread_poll : [io_uring]
+
+ Normally fio will submit IO by issuing a system call to notify the
+ kernel of available items in the SQ ring. If this option is set, the
+ act of submitting IO will be done by a polling thread in the kernel.
+ This frees up cycles for fio, at the cost of using more CPU in the
+ system.
+
+.. option:: sqthread_poll_cpu : [io_uring]
+
+ When :option:`sqthread_poll` is set, this option provides a way to
+ define which CPU should be used for the polling thread.
+
.. option:: userspace_reap : [libaio]
Normally, with the libaio engine in use, fio will use the
@@ -2263,6 +2315,15 @@ with the caveat that when used on the command line, they must come after the
turns on verbose logging from libcurl, 2 additionally enables
HTTP IO tracing. Default is **0**
+.. option:: uri=str : [nbd]
+
+ Specify the NBD URI of the server to test. The string
+ is a standard NBD URI
+ (see https://github.com/NetworkBlockDevice/nbd/tree/master/doc).
+ Example URIs: nbd://localhost:10809
+ nbd+unix:///?socket=/tmp/socket
+ nbds://tlshost/exportname
+
I/O depth
~~~~~~~~~
@@ -2341,7 +2402,7 @@ I/O depth
this option can reduce both performance and the :option:`iodepth` achieved.
This option only applies to I/Os issued for a single job except when it is
- enabled along with :option:`io_submit_mode`=offload. In offload mode, fio
+ enabled along with :option:`io_submit_mode`\=offload. In offload mode, fio
will check for overlap among all I/Os submitted by offload jobs with :option:`serialize_overlap`
enabled.
@@ -3691,7 +3752,8 @@ is one long line of values, such as::
2;card0;0;0;7139336;121836;60004;1;10109;27.932460;116.933948;220;126861;3495.446807;1085.368601;226;126864;3523.635629;1089.012448;24063;99944;50.275485%;59818.274627;5540.657370;7155060;122104;60004;1;8338;29.086342;117.839068;388;128077;5032.488518;1234.785715;391;128085;5061.839412;1236.909129;23436;100928;50.287926%;59964.832030;5644.844189;14.595833%;19.394167%;123706;0;7313;0.1%;0.1%;0.1%;0.1%;0.1%;0.1%;100.0%;0.00%;0.00%;0.00%;0.00%;0.00%;0.00%;0.01%;0.02%;0.05%;0.16%;6.04%;40.40%;52.68%;0.64%;0.01%;0.00%;0.01%;0.00%;0.00%;0.00%;0.00%;0.00%
A description of this job goes here.
-The job description (if provided) follows on a second line.
+The job description (if provided) follows on a second line for terse v2.
+It appears on the same line for other terse versions.
To enable terse output, use the :option:`--minimal` or
:option:`--output-format`\=terse command line options. The
@@ -3776,6 +3838,11 @@ minimal output v3, separated by semicolons::
terse_version_3;fio_version;jobname;groupid;error;read_kb;read_bandwidth;read_iops;read_runtime_ms;read_slat_min;read_slat_max;read_slat_mean;read_slat_dev;read_clat_min;read_clat_max;read_clat_mean;read_clat_dev;read_clat_pct01;read_clat_pct02;read_clat_pct03;read_clat_pct04;read_clat_pct05;read_clat_pct06;read_clat_pct07;read_clat_pct08;read_clat_pct09;read_clat_pct10;read_clat_pct11;read_clat_pct12;read_clat_pct13;read_clat_pct14;read_clat_pct15;read_clat_pct16;read_clat_pct17;read_clat_pct18;read_clat_pct19;read_clat_pct20;read_tlat_min;read_lat_max;read_lat_mean;read_lat_dev;read_bw_min;read_bw_max;read_bw_agg_pct;read_bw_mean;read_bw_dev;write_kb;write_bandwidth;write_iops;write_runtime_ms;write_slat_min;write_slat_max;write_slat_mean;write_slat_dev;write_clat_min;write_clat_max;write_clat_mean;write_clat_dev;write_clat_pct01;write_clat_pct02;write_clat_pct03;write_clat_pct04;write_clat_pct05;write_clat_pct06;write_clat_pct07;write_clat_pct08;write_clat_pct09;write_clat_pct10;write_clat_pct11;write_clat_pct12;write_clat_pct13;write_clat_pct14;write_clat_pct15;write_clat_pct16;write_clat_pct17;write_clat_pct18;write_clat_pct19;write_clat_pct20;write_tlat_min;write_lat_max;write_lat_mean;write_lat_dev;write_bw_min;write_bw_max;write_bw_agg_pct;write_bw_mean;write_bw_dev;cpu_user;cpu_sys;cpu_csw;cpu_mjf;cpu_minf;iodepth_1;iodepth_2;iodepth_4;iodepth_8;iodepth_16;iodepth_32;iodepth_64;lat_2us;lat_4us;lat_10us;lat_20us;lat_50us;lat_100us;lat_250us;lat_500us;lat_750us;lat_1000us;lat_2ms;lat_4ms;lat_10ms;lat_20ms;lat_50ms;lat_100ms;lat_250ms;lat_500ms;lat_750ms;lat_1000ms;lat_2000ms;lat_over_2000ms;disk_name;disk_read_iops;disk_write_iops;disk_read_merges;disk_write_merges;disk_read_ticks;write_ticks;disk_queue_time;disk_util
+In client/server mode terse output differs from what appears when jobs are run
+locally. Disk utilization data is omitted from the standard terse output and
+for v3 and later appears on its own separate line at the end of each terse
+reporting cycle.
+
JSON output
------------
@@ -4054,6 +4121,7 @@ is recorded. Each *data direction* seen within the window period will aggregate
its values in a separate row. Further, when using windowed logging the *block
size* and *offset* entries will always contain 0.
+
Client/Server
-------------
@@ -4141,3 +4209,6 @@ containing two hostnames ``h1`` and ``h2`` with IP addresses 192.168.10.120 and
/mnt/nfs/fio/192.168.10.120.fileio.tmp
/mnt/nfs/fio/192.168.10.121.fileio.tmp
+
+Terse output in client/server mode will differ slightly from what is produced
+when fio is run in stand-alone mode. See the terse output section for details.
diff --git a/Makefile b/Makefile
index fd138dd2..7c21ef83 100644
--- a/Makefile
+++ b/Makefile
@@ -59,6 +59,18 @@ ifdef CONFIG_LIBHDFS
SOURCE += engines/libhdfs.c
endif
+ifdef CONFIG_LIBISCSI
+ CFLAGS += $(LIBISCSI_CFLAGS)
+ LIBS += $(LIBISCSI_LIBS)
+ SOURCE += engines/libiscsi.c
+endif
+
+ifdef CONFIG_LIBNBD
+ CFLAGS += $(LIBNBD_CFLAGS)
+ LIBS += $(LIBNBD_LIBS)
+ SOURCE += engines/nbd.c
+endif
+
ifdef CONFIG_64BIT
CFLAGS += -DBITS_PER_LONG=64
endif
@@ -519,6 +531,21 @@ doc: tools/plot/fio2gnuplot.1
test: fio
./fio --minimal --thread --exitall_on_error --runtime=1s --name=nulltest --ioengine=null --rw=randrw --iodepth=2 --norandommap --random_generator=tausworthe64 --size=16T --name=verifyfstest --filename=fiotestfile.tmp --unlink=1 --rw=write --verify=crc32c --verify_state_save=0 --size=16K
+fulltest:
+ sudo modprobe null_blk && \
+ if [ ! -e /usr/include/libzbc/zbc.h ]; then \
+ git clone https://github.com/hgst/libzbc && \
+ (cd libzbc && \
+ ./autogen.sh && \
+ ./configure --prefix=/usr && \
+ make -j && \
+ sudo make install) \
+ fi && \
+ sudo t/zbd/run-tests-against-regular-nullb && \
+ if [ -e /sys/module/null_blk/parameters/zoned ]; then \
+ sudo t/zbd/run-tests-against-zoned-nullb; \
+ fi
+
install: $(PROGS) $(SCRIPTS) tools/plot/fio2gnuplot.1 FORCE
$(INSTALL) -m 755 -d $(DESTDIR)$(bindir)
$(INSTALL) $(PROGS) $(SCRIPTS) $(DESTDIR)$(bindir)
@@ -529,3 +556,5 @@ install: $(PROGS) $(SCRIPTS) tools/plot/fio2gnuplot.1 FORCE
$(INSTALL) -m 644 $(SRCDIR)/tools/hist/fiologparser_hist.py.1 $(DESTDIR)$(mandir)/man1
$(INSTALL) -m 755 -d $(DESTDIR)$(sharedir)
$(INSTALL) -m 644 $(SRCDIR)/tools/plot/*gpm $(DESTDIR)$(sharedir)/
+
+.PHONY: test fulltest
diff --git a/README b/README
index 38022bbb..0f943bcc 100644
--- a/README
+++ b/README
@@ -119,8 +119,8 @@ Solaris:
``pkgutil -i fio``.
Windows:
- Rebecca Cran <rebecca+fio@bluestop.org> has fio packages for Windows at
- https://www.bluestop.org/fio/ . The latest builds for Windows can also
+ Rebecca Cran <rebecca@bsdio.com> has fio packages for Windows at
+ https://bsdio.com/fio/ . The latest builds for Windows can also
be grabbed from https://ci.appveyor.com/project/axboe/fio by clicking
the latest x86 or x64 build, then selecting the ARTIFACTS tab.
@@ -164,9 +164,9 @@ configure.
Windows
~~~~~~~
-On Windows, Cygwin (http://www.cygwin.com/) is required in order to build
-fio. To create an MSI installer package install WiX 3.8 from
-http://wixtoolset.org and run :file:`dobuild.cmd` from the :file:`os/windows`
+On Windows, Cygwin (https://www.cygwin.com/) is required in order to build
+fio. To create an MSI installer package install WiX from
+https://wixtoolset.org and run :file:`dobuild.cmd` from the :file:`os/windows`
directory.
How to compile fio on 64-bit Windows:
diff --git a/cconv.c b/cconv.c
index 50e45c63..bff5e34f 100644
--- a/cconv.c
+++ b/cconv.c
@@ -13,10 +13,9 @@ static void string_to_cpu(char **dst, const uint8_t *src)
static void __string_to_net(uint8_t *dst, const char *src, size_t dst_size)
{
- if (src) {
- dst[dst_size - 1] = '\0';
- strncpy((char *) dst, src, dst_size - 1);
- } else
+ if (src)
+ snprintf((char *) dst, dst_size, "%s", src);
+ else
dst[0] = '\0';
}
@@ -227,6 +226,7 @@ void convert_thread_options_to_cpu(struct thread_options *o,
o->zone_skip = le64_to_cpu(top->zone_skip);
o->zone_mode = le32_to_cpu(top->zone_mode);
o->lockmem = le64_to_cpu(top->lockmem);
+ o->offset_increment_percent = le32_to_cpu(top->offset_increment_percent);
o->offset_increment = le64_to_cpu(top->offset_increment);
o->number_ios = le64_to_cpu(top->number_ios);
@@ -567,6 +567,7 @@ void convert_thread_options_to_net(struct thread_options_pack *top,
top->start_offset_align = __cpu_to_le64(o->start_offset_align);
top->start_offset_percent = __cpu_to_le32(o->start_offset_percent);
top->trim_backlog = __cpu_to_le64(o->trim_backlog);
+ top->offset_increment_percent = __cpu_to_le32(o->offset_increment_percent);
top->offset_increment = __cpu_to_le64(o->offset_increment);
top->number_ios = __cpu_to_le64(o->number_ios);
top->rate_process = cpu_to_le32(o->rate_process);
diff --git a/client.c b/client.c
index 4cbffb62..e0047af0 100644
--- a/client.c
+++ b/client.c
@@ -520,7 +520,7 @@ static void probe_client(struct fio_client *client)
sname = server_name(client, buf, sizeof(buf));
memset(pdu.server, 0, sizeof(pdu.server));
- strncpy((char *) pdu.server, sname, sizeof(pdu.server) - 1);
+ snprintf((char *) pdu.server, sizeof(pdu.server), "%s", sname);
fio_net_send_cmd(client->fd, FIO_NET_CMD_PROBE, &pdu, sizeof(pdu), &tag, &client->cmd_list);
}
@@ -574,7 +574,8 @@ static int fio_client_connect_sock(struct fio_client *client)
memset(addr, 0, sizeof(*addr));
addr->sun_family = AF_UNIX;
- strncpy(addr->sun_path, client->hostname, sizeof(addr->sun_path) - 1);
+ snprintf(addr->sun_path, sizeof(addr->sun_path), "%s",
+ client->hostname);
fd = socket(AF_UNIX, SOCK_STREAM, 0);
if (fd < 0) {
@@ -1219,12 +1220,15 @@ static void handle_du(struct fio_client *client, struct fio_net_cmd *cmd)
json_array_add_disk_util(&du->dus, &du->agg, du_array);
duobj = json_array_last_value_object(du_array);
json_object_add_client_info(duobj, client);
- } else if (output_format & FIO_OUTPUT_TERSE)
- print_disk_util(&du->dus, &du->agg, 1, &client->buf);
- else if (output_format & FIO_OUTPUT_NORMAL) {
+ }
+ if (output_format & FIO_OUTPUT_NORMAL) {
__log_buf(&client->buf, "\nDisk stats (read/write):\n");
print_disk_util(&du->dus, &du->agg, 0, &client->buf);
}
+ if (output_format & FIO_OUTPUT_TERSE && terse_version >= 3) {
+ print_disk_util(&du->dus, &du->agg, 1, &client->buf);
+ __log_buf(&client->buf, "\n");
+ }
}
static void convert_jobs_eta(struct jobs_eta *je)
diff --git a/configure b/configure
index 6e549cdc..e32d5dcf 100755
--- a/configure
+++ b/configure
@@ -88,14 +88,14 @@ do_cc() {
}
compile_object() {
- do_cc $CFLAGS -c -o $TMPO $TMPC
+ do_cc $CFLAGS -Werror-implicit-function-declaration -c -o $TMPO $TMPC
}
compile_prog() {
local_cflags="$1"
local_ldflags="$2 $LIBS"
echo "Compiling test case $3" >> config.log
- do_cc $CFLAGS $local_cflags -o $TMPE $TMPC $LDFLAGS $local_ldflags
+ do_cc $CFLAGS -Werror-implicit-function-declaration $local_cflags -o $TMPE $TMPC $LDFLAGS $local_ldflags
}
feature_not_found() {
@@ -148,6 +148,9 @@ disable_lex=""
disable_pmem="no"
disable_native="no"
march_set="no"
+libiscsi="no"
+libnbd="no"
+libaio_uring="no"
prefix=/usr/local
# parse options
@@ -204,6 +207,14 @@ for opt do
;;
--with-ime=*) ime_path="$optarg"
;;
+ --enable-libiscsi) libiscsi="yes"
+ ;;
+ --enable-libnbd) libnbd="yes"
+ ;;
+ --disable-tcmalloc) disable_tcmalloc="yes"
+ ;;
+ --enable-libaio-uring) libaio_uring="yes"
+ ;;
--help)
show_help="yes"
;;
@@ -239,6 +250,10 @@ if test "$show_help" = "yes" ; then
echo "--enable-cuda Enable GPUDirect RDMA support"
echo "--disable-native Don't build for native host"
echo "--with-ime= Install path for DDN's Infinite Memory Engine"
+ echo "--enable-libiscsi Enable iscsi support"
+ echo "--enable-libnbd Enable libnbd (NBD engine) support"
+ echo "--disable-tcmalloc Disable tcmalloc support"
+ echo "--enable-libaio-uring Enable libaio emulated over io_uring"
exit $exit_val
fi
@@ -303,6 +318,10 @@ AIX|OpenBSD|NetBSD)
force_no_lex_o="yes"
fi
;;
+FreeBSD)
+ CFLAGS="$CFLAGS -I/usr/local/include"
+ LDFLAGS="$LDFLAGS -L/usr/local/lib"
+ ;;
Darwin)
# on Leopard most of the system is 32-bit, so we have to ask the kernel if
# we can run 64-bit userspace code.
@@ -588,17 +607,23 @@ int main(void)
return 0;
}
EOF
- if compile_prog "" "-laio" "libaio" ; then
+ if test "$libaio_uring" = "yes" && compile_prog "" "-luring" "libaio io_uring" ; then
libaio=yes
+ LIBS="-luring $LIBS"
+ elif compile_prog "" "-laio" "libaio" ; then
+ libaio=yes
+ libaio_uring=no
LIBS="-laio $LIBS"
else
if test "$libaio" = "yes" ; then
feature_not_found "linux AIO" "libaio-dev or libaio-devel"
fi
libaio=no
+ libaio_uring=no
fi
fi
print_config "Linux AIO support" "$libaio"
+print_config "Linux AIO over io_uring" "$libaio_uring"
##########################################
# posix aio probe
@@ -1336,31 +1361,30 @@ int main(void)
return GTK_CHECK_VERSION(2, 18, 0) ? 0 : 1; /* 0 on success */
}
EOF
-GTK_CFLAGS=$(pkg-config --cflags gtk+-2.0 gthread-2.0)
+GTK_CFLAGS=$(${cross_prefix}pkg-config --cflags gtk+-2.0 gthread-2.0)
ORG_LDFLAGS=$LDFLAGS
LDFLAGS=$(echo $LDFLAGS | sed s/"-static"//g)
if test "$?" != "0" ; then
echo "configure: gtk and gthread not found"
exit 1
fi
-GTK_LIBS=$(pkg-config --libs gtk+-2.0 gthread-2.0)
+GTK_LIBS=$(${cross_prefix}pkg-config --libs gtk+-2.0 gthread-2.0)
if test "$?" != "0" ; then
echo "configure: gtk and gthread not found"
exit 1
fi
-if compile_prog "$GTK_CFLAGS" "$GTK_LIBS" "gfio" ; then
- $TMPE
- if test "$?" = "0" ; then
+if ! ${cross_prefix}pkg-config --atleast-version 2.18.0 gtk+-2.0; then
+ echo "GTK found, but need version 2.18 or higher"
+ gfio="no"
+else
+ if compile_prog "$GTK_CFLAGS" "$GTK_LIBS" "gfio" ; then
gfio="yes"
GFIO_LIBS="$LIBS $GTK_LIBS"
CFLAGS="$CFLAGS $GTK_CFLAGS"
else
- echo "GTK found, but need version 2.18 or higher"
+ echo "Please install gtk and gdk libraries"
gfio="no"
fi
-else
- echo "Please install gtk and gdk libraries"
- gfio="no"
fi
LDFLAGS=$ORG_LDFLAGS
fi
@@ -1782,6 +1806,24 @@ print_config "Gluster API use fadvise" "$gf_fadvise"
fi
##########################################
+# check for newer gfapi
+if test "$gfapi" = "yes" ; then
+gf_new="no"
+cat > $TMPC << EOF
+#include <glusterfs/api/glfs.h>
+
+int main(int argc, char **argv)
+{
+ return glfs_fsync(NULL, NULL, NULL) && glfs_ftruncate(NULL, 0, NULL, NULL);
+}
+EOF
+if compile_prog "" "-lgfapi -lglusterfs" "gf new api"; then
+ gf_new="yes"
+fi
+print_config "Gluster new API" "$gf_new"
+fi
+
+##########################################
# check for gfapi trim support
if test "$gf_trim" != "yes" ; then
gf_trim="no"
@@ -1971,6 +2013,39 @@ fi
print_config "DDN's Infinite Memory Engine" "$libime"
##########################################
+# Check if we have required environment variables configured for libiscsi
+if test "$libiscsi" = "yes" ; then
+ if $(pkg-config --atleast-version=1.9.0 libiscsi); then
+ libiscsi="yes"
+ libiscsi_cflags=$(pkg-config --cflags libiscsi)
+ libiscsi_libs=$(pkg-config --libs libiscsi)
+ else
+ if test "$libiscsi" = "yes" ; then
+ echo "libiscsi" "Install libiscsi >= 1.9.0"
+ fi
+ libiscsi="no"
+ fi
+fi
+print_config "iscsi engine" "$libiscsi"
+
+##########################################
+# Check if we have libnbd (for NBD support).
+minimum_libnbd=0.9.8
+if test "$libnbd" = "yes" ; then
+ if $(pkg-config --atleast-version=$minimum_libnbd libnbd); then
+ libnbd="yes"
+ libnbd_cflags=$(pkg-config --cflags libnbd)
+ libnbd_libs=$(pkg-config --libs libnbd)
+ else
+ if test "$libnbd" = "yes" ; then
+ echo "libnbd" "Install libnbd >= $minimum_libnbd"
+ fi
+ libnbd="no"
+ fi
+fi
+print_config "NBD engine" "$libnbd"
+
+##########################################
# Check if we have lex/yacc available
yacc="no"
yacc_is_bison="no"
@@ -2326,6 +2401,45 @@ if compile_prog "-Wimplicit-fallthrough" "" "-Wimplicit-fallthrough"; then
fi
print_config "-Wimplicit-fallthrough" "$fallthrough"
+##########################################
+# check for MADV_HUGEPAGE support
+if test "$thp" != "yes" ; then
+ thp="no"
+fi
+if test "$esx" != "yes" ; then
+ cat > $TMPC <<EOF
+#include <sys/mman.h>
+int main(void)
+{
+ return madvise(0, 0x1000, MADV_HUGEPAGE);
+}
+EOF
+ if compile_prog "" "" "thp" ; then
+ thp=yes
+ else
+ if test "$thp" = "yes" ; then
+ feature_not_found "Transparent Huge Page" ""
+ fi
+ thp=no
+ fi
+fi
+print_config "MADV_HUGEPAGE" "$thp"
+
+##########################################
+# check for gettid()
+gettid="no"
+cat > $TMPC << EOF
+#include <unistd.h>
+int main(int argc, char **argv)
+{
+ return gettid();
+}
+EOF
+if compile_prog "" "" "gettid"; then
+ gettid="yes"
+fi
+print_config "gettid" "$gettid"
+
#############################################################################
if test "$wordsize" = "64" ; then
@@ -2345,6 +2459,9 @@ if test "$zlib" = "yes" ; then
fi
if test "$libaio" = "yes" ; then
output_sym "CONFIG_LIBAIO"
+ if test "$libaio_uring" = "yes" ; then
+ output_sym "CONFIG_LIBAIO_URING"
+ fi
fi
if test "$posix_aio" = "yes" ; then
output_sym "CONFIG_POSIXAIO"
@@ -2513,13 +2630,16 @@ fi
if test "$gf_trim" = "yes" ; then
output_sym "CONFIG_GF_TRIM"
fi
+if test "$gf_new" = "yes" ; then
+ output_sym "CONFIG_GF_NEW_API"
+fi
if test "$libhdfs" = "yes" ; then
output_sym "CONFIG_LIBHDFS"
echo "FIO_HDFS_CPU=$FIO_HDFS_CPU" >> $config_host_mak
echo "JAVA_HOME=$JAVA_HOME" >> $config_host_mak
echo "FIO_LIBHDFS_INCLUDE=$FIO_LIBHDFS_INCLUDE" >> $config_host_mak
echo "FIO_LIBHDFS_LIB=$FIO_LIBHDFS_LIB" >> $config_host_mak
- fi
+fi
if test "$mtd" = "yes" ; then
output_sym "CONFIG_MTD"
fi
@@ -2597,9 +2717,40 @@ fi
if test "$__kernel_rwf_t" = "yes"; then
output_sym "CONFIG_HAVE_KERNEL_RWF_T"
fi
+if test "$gettid" = "yes"; then
+ output_sym "CONFIG_HAVE_GETTID"
+fi
if test "$fallthrough" = "yes"; then
CFLAGS="$CFLAGS -Wimplicit-fallthrough"
fi
+if test "$thp" = "yes" ; then
+ output_sym "CONFIG_HAVE_THP"
+fi
+if test "$libiscsi" = "yes" ; then
+ output_sym "CONFIG_LIBISCSI"
+ echo "CONFIG_LIBISCSI=m" >> $config_host_mak
+ echo "LIBISCSI_CFLAGS=$libiscsi_cflags" >> $config_host_mak
+ echo "LIBISCSI_LIBS=$libiscsi_libs" >> $config_host_mak
+fi
+if test "$libnbd" = "yes" ; then
+ output_sym "CONFIG_LIBNBD"
+ echo "CONFIG_LIBNBD=m" >> $config_host_mak
+ echo "LIBNBD_CFLAGS=$libnbd_cflags" >> $config_host_mak
+ echo "LIBNBD_LIBS=$libnbd_libs" >> $config_host_mak
+fi
+cat > $TMPC << EOF
+int main(int argc, char **argv)
+{
+ return 0;
+}
+EOF
+if test "$disable_tcmalloc" != "yes" && compile_prog "" "-ltcmalloc" "tcmalloc"; then
+ LIBS="-ltcmalloc $LIBS"
+ tcmalloc="yes"
+else
+ tcmalloc="no"
+fi
+print_config "TCMalloc support" "$tcmalloc"
echo "LIBS+=$LIBS" >> $config_host_mak
echo "GFIO_LIBS+=$GFIO_LIBS" >> $config_host_mak
diff --git a/diskutil.c b/diskutil.c
index 7be4c022..f0744015 100644
--- a/diskutil.c
+++ b/diskutil.c
@@ -181,8 +181,7 @@ static int get_device_numbers(char *file_name, int *maj, int *min)
/*
* must be a file, open "." in that path
*/
- tempname[PATH_MAX - 1] = '\0';
- strncpy(tempname, file_name, PATH_MAX - 1);
+ snprintf(tempname, ARRAY_SIZE(tempname), "%s", file_name);
p = dirname(tempname);
if (stat(p, &st)) {
perror("disk util stat");
@@ -314,7 +313,8 @@ static struct disk_util *disk_util_add(struct thread_data *td, int majdev,
sfree(du);
return NULL;
}
- strncpy((char *) du->dus.name, basename(path), FIO_DU_NAME_SZ - 1);
+ snprintf((char *) du->dus.name, ARRAY_SIZE(du->dus.name), "%s",
+ basename(path));
du->sysfs_root = strdup(path);
du->major = majdev;
du->minor = mindev;
@@ -435,8 +435,7 @@ static struct disk_util *__init_per_file_disk_util(struct thread_data *td,
log_err("unknown sysfs layout\n");
return NULL;
}
- tmp[PATH_MAX - 1] = '\0';
- strncpy(tmp, p, PATH_MAX - 1);
+ snprintf(tmp, ARRAY_SIZE(tmp), "%s", p);
sprintf(path, "%s", tmp);
}
diff --git a/engines/glusterfs.c b/engines/glusterfs.c
index d0250b70..f2b84a2a 100644
--- a/engines/glusterfs.c
+++ b/engines/glusterfs.c
@@ -288,7 +288,11 @@ int fio_gf_open_file(struct thread_data *td, struct fio_file *f)
|| sb.st_size < f->real_file_size) {
dprint(FD_FILE, "fio extend file %s from %jd to %" PRIu64 "\n",
f->file_name, (intmax_t) sb.st_size, f->real_file_size);
+#if defined(CONFIG_GF_NEW_API)
+ ret = glfs_ftruncate(g->fd, f->real_file_size, NULL, NULL);
+#else
ret = glfs_ftruncate(g->fd, f->real_file_size);
+#endif
if (ret) {
log_err("failed fio extend file %s to %" PRIu64 "\n",
f->file_name, f->real_file_size);
@@ -350,7 +354,11 @@ int fio_gf_open_file(struct thread_data *td, struct fio_file *f)
f->file_name);
glfs_unlink(g->fs, f->file_name);
} else if (td->o.create_fsync) {
+#if defined(CONFIG_GF_NEW_API)
+ if (glfs_fsync(g->fd, NULL, NULL) < 0) {
+#else
if (glfs_fsync(g->fd) < 0) {
+#endif
dprint(FD_FILE,
"failed to sync, close %s\n",
f->file_name);
diff --git a/engines/glusterfs_async.c b/engines/glusterfs_async.c
index 9e1c4bf0..0392ad6e 100644
--- a/engines/glusterfs_async.c
+++ b/engines/glusterfs_async.c
@@ -84,7 +84,12 @@ static int fio_gf_io_u_init(struct thread_data *td, struct io_u *io_u)
return 0;
}
+#if defined(CONFIG_GF_NEW_API)
+static void gf_async_cb(glfs_fd_t * fd, ssize_t ret, struct glfs_stat *prestat,
+ struct glfs_stat *poststat, void *data)
+#else
static void gf_async_cb(glfs_fd_t * fd, ssize_t ret, void *data)
+#endif
{
struct io_u *io_u = data;
struct fio_gf_iou *iou = io_u->engine_data;
diff --git a/engines/glusterfs_sync.c b/engines/glusterfs_sync.c
index 099a5af1..de73261f 100644
--- a/engines/glusterfs_sync.c
+++ b/engines/glusterfs_sync.c
@@ -42,9 +42,17 @@ static enum fio_q_status fio_gf_queue(struct thread_data *td, struct io_u *io_u)
else if (io_u->ddir == DDIR_WRITE)
ret = glfs_write(g->fd, io_u->xfer_buf, io_u->xfer_buflen, 0);
else if (io_u->ddir == DDIR_SYNC)
+#if defined(CONFIG_GF_NEW_API)
+ ret = glfs_fsync(g->fd, NULL, NULL);
+#else
ret = glfs_fsync(g->fd);
+#endif
else if (io_u->ddir == DDIR_DATASYNC)
+#if defined(CONFIG_GF_NEW_API)
+ ret = glfs_fdatasync(g->fd, NULL, NULL);
+#else
ret = glfs_fdatasync(g->fd);
+#endif
else {
log_err("unsupported operation.\n");
io_u->error = EINVAL;
diff --git a/engines/http.c b/engines/http.c
index a35c0332..275fcab5 100644
--- a/engines/http.c
+++ b/engines/http.c
@@ -642,7 +642,7 @@ static int fio_http_invalidate(struct thread_data *td, struct fio_file *f)
static struct ioengine_ops ioengine = {
.name = "http",
.version = FIO_IOOPS_VERSION,
- .flags = FIO_DISKLESSIO,
+ .flags = FIO_DISKLESSIO | FIO_SYNCIO,
.setup = fio_http_setup,
.queue = fio_http_queue,
.getevents = fio_http_getevents,
diff --git a/engines/io_uring.c b/engines/io_uring.c
index 014f954e..ef56345b 100644
--- a/engines/io_uring.c
+++ b/engines/io_uring.c
@@ -50,6 +50,8 @@ struct ioring_data {
struct io_u **io_u_index;
+ int *fds;
+
struct io_sq_ring sq_ring;
struct io_uring_sqe *sqes;
struct iovec *iovecs;
@@ -62,9 +64,6 @@ struct ioring_data {
int cq_ring_off;
unsigned iodepth;
- uint64_t cachehit;
- uint64_t cachemiss;
-
struct ioring_mmap mmap[3];
};
@@ -72,6 +71,7 @@ struct ioring_options {
void *pad;
unsigned int hipri;
unsigned int fixedbufs;
+ unsigned int registerfiles;
unsigned int sqpoll_thread;
unsigned int sqpoll_set;
unsigned int sqpoll_cpu;
@@ -94,7 +94,7 @@ static struct fio_option options[] = {
.off1 = offsetof(struct ioring_options, hipri),
.help = "Use polled IO completions",
.category = FIO_OPT_C_ENGINE,
- .group = FIO_OPT_G_LIBAIO,
+ .group = FIO_OPT_G_IOURING,
},
{
.name = "fixedbufs",
@@ -103,7 +103,16 @@ static struct fio_option options[] = {
.off1 = offsetof(struct ioring_options, fixedbufs),
.help = "Pre map IO buffers",
.category = FIO_OPT_C_ENGINE,
- .group = FIO_OPT_G_LIBAIO,
+ .group = FIO_OPT_G_IOURING,
+ },
+ {
+ .name = "registerfiles",
+ .lname = "Register file set",
+ .type = FIO_OPT_STR_SET,
+ .off1 = offsetof(struct ioring_options, registerfiles),
+ .help = "Pre-open/register files",
+ .category = FIO_OPT_C_ENGINE,
+ .group = FIO_OPT_G_IOURING,
},
{
.name = "sqthread_poll",
@@ -112,7 +121,7 @@ static struct fio_option options[] = {
.off1 = offsetof(struct ioring_options, sqpoll_thread),
.help = "Offload submission/completion to kernel thread",
.category = FIO_OPT_C_ENGINE,
- .group = FIO_OPT_G_LIBAIO,
+ .group = FIO_OPT_G_IOURING,
},
{
.name = "sqthread_poll_cpu",
@@ -121,7 +130,7 @@ static struct fio_option options[] = {
.cb = fio_ioring_sqpoll_cb,
.help = "What CPU to run SQ thread polling on",
.category = FIO_OPT_C_ENGINE,
- .group = FIO_OPT_G_LIBAIO,
+ .group = FIO_OPT_G_IOURING,
},
{
.name = NULL,
@@ -143,10 +152,16 @@ static int fio_ioring_prep(struct thread_data *td, struct io_u *io_u)
struct io_uring_sqe *sqe;
sqe = &ld->sqes[io_u->index];
- sqe->fd = f->fd;
- sqe->flags = 0;
- sqe->ioprio = 0;
- sqe->buf_index = 0;
+
+ /* zero out fields not used in this submission */
+ memset(sqe, 0, sizeof(*sqe));
+
+ if (o->registerfiles) {
+ sqe->fd = f->engine_pos;
+ sqe->flags = IOSQE_FIXED_FILE;
+ } else {
+ sqe->fd = f->fd;
+ }
if (io_u->ddir == DDIR_READ || io_u->ddir == DDIR_WRITE) {
if (o->fixedbufs) {
@@ -167,10 +182,16 @@ static int fio_ioring_prep(struct thread_data *td, struct io_u *io_u)
}
sqe->off = io_u->offset;
} else if (ddir_sync(io_u->ddir)) {
- sqe->fsync_flags = 0;
- if (io_u->ddir == DDIR_DATASYNC)
- sqe->fsync_flags |= IORING_FSYNC_DATASYNC;
- sqe->opcode = IORING_OP_FSYNC;
+ if (io_u->ddir == DDIR_SYNC_FILE_RANGE) {
+ sqe->off = f->first_write;
+ sqe->len = f->last_write - f->first_write;
+ sqe->sync_range_flags = td->o.sync_file_range;
+ sqe->opcode = IORING_OP_SYNC_FILE_RANGE;
+ } else {
+ if (io_u->ddir == DDIR_DATASYNC)
+ sqe->fsync_flags |= IORING_FSYNC_DATASYNC;
+ sqe->opcode = IORING_OP_FSYNC;
+ }
}
sqe->user_data = (unsigned long) io_u;
@@ -197,13 +218,6 @@ static struct io_u *fio_ioring_event(struct thread_data *td, int event)
} else
io_u->error = 0;
- if (io_u->ddir == DDIR_READ) {
- if (cqe->flags & IOCQE_FLAG_CACHEHIT)
- ld->cachehit++;
- else
- ld->cachemiss++;
- }
-
return io_u;
}
@@ -243,6 +257,8 @@ static int fio_ioring_getevents(struct thread_data *td, unsigned int min,
r = fio_ioring_cqring_reap(td, events, max);
if (r) {
events += r;
+ if (actual_min != 0)
+ actual_min -= r;
continue;
}
@@ -250,7 +266,7 @@ static int fio_ioring_getevents(struct thread_data *td, unsigned int min,
r = io_uring_enter(ld, 0, actual_min,
IORING_ENTER_GETEVENTS);
if (r < 0) {
- if (errno == EAGAIN)
+ if (errno == EAGAIN || errno == EINTR)
continue;
td_verror(td, errno, "io_uring_enter");
break;
@@ -361,7 +377,7 @@ static int fio_ioring_commit(struct thread_data *td)
io_u_mark_submit(td, ret);
continue;
} else {
- if (errno == EAGAIN) {
+ if (errno == EAGAIN || errno == EINTR) {
ret = fio_ioring_cqring_reap(td, 0, ld->queued);
if (ret)
continue;
@@ -391,14 +407,12 @@ static void fio_ioring_cleanup(struct thread_data *td)
struct ioring_data *ld = td->io_ops_data;
if (ld) {
- td->ts.cachehit += ld->cachehit;
- td->ts.cachemiss += ld->cachemiss;
-
if (!(td->flags & TD_F_CHILD))
fio_ioring_unmap(ld);
free(ld->io_u_index);
free(ld->iovecs);
+ free(ld->fds);
free(ld);
}
}
@@ -487,9 +501,50 @@ static int fio_ioring_queue_init(struct thread_data *td)
return fio_ioring_mmap(ld, &p);
}
+static int fio_ioring_register_files(struct thread_data *td)
+{
+ struct ioring_data *ld = td->io_ops_data;
+ struct fio_file *f;
+ unsigned int i;
+ int ret;
+
+ ld->fds = calloc(td->o.nr_files, sizeof(int));
+
+ for_each_file(td, f, i) {
+ ret = generic_open_file(td, f);
+ if (ret)
+ goto err;
+ ld->fds[i] = f->fd;
+ f->engine_pos = i;
+ }
+
+ ret = syscall(__NR_sys_io_uring_register, ld->ring_fd,
+ IORING_REGISTER_FILES, ld->fds, td->o.nr_files);
+ if (ret) {
+err:
+ free(ld->fds);
+ ld->fds = NULL;
+ }
+
+ /*
+ * Pretend the file is closed again, and really close it if we hit
+ * an error.
+ */
+ for_each_file(td, f, i) {
+ if (ret) {
+ int fio_unused ret2;
+ ret2 = generic_close_file(td, f);
+ } else
+ f->fd = -1;
+ }
+
+ return ret;
+}
+
static int fio_ioring_post_init(struct thread_data *td)
{
struct ioring_data *ld = td->io_ops_data;
+ struct ioring_options *o = td->eo;
struct io_u *io_u;
int err, i;
@@ -507,6 +562,14 @@ static int fio_ioring_post_init(struct thread_data *td)
return 1;
}
+ if (o->registerfiles) {
+ err = fio_ioring_register_files(td);
+ if (err) {
+ td_verror(td, errno, "ioring_register_files");
+ return 1;
+ }
+ }
+
return 0;
}
@@ -517,8 +580,19 @@ static unsigned roundup_pow2(unsigned depth)
static int fio_ioring_init(struct thread_data *td)
{
+ struct ioring_options *o = td->eo;
struct ioring_data *ld;
+ /* sqthread submission requires registered files */
+ if (o->sqpoll_thread)
+ o->registerfiles = 1;
+
+ if (o->registerfiles && td->o.nr_files != td->o.open_files) {
+ log_err("fio: io_uring registered files require nr_files to "
+ "be identical to open_files\n");
+ return 1;
+ }
+
ld = calloc(1, sizeof(*ld));
/* ring depth must be a power-of-2 */
@@ -541,9 +615,34 @@ static int fio_ioring_io_u_init(struct thread_data *td, struct io_u *io_u)
return 0;
}
+static int fio_ioring_open_file(struct thread_data *td, struct fio_file *f)
+{
+ struct ioring_data *ld = td->io_ops_data;
+ struct ioring_options *o = td->eo;
+
+ if (!ld || !o->registerfiles)
+ return generic_open_file(td, f);
+
+ f->fd = ld->fds[f->engine_pos];
+ return 0;
+}
+
+static int fio_ioring_close_file(struct thread_data *td, struct fio_file *f)
+{
+ struct ioring_data *ld = td->io_ops_data;
+ struct ioring_options *o = td->eo;
+
+ if (!ld || !o->registerfiles)
+ return generic_close_file(td, f);
+
+ f->fd = -1;
+ return 0;
+}
+
static struct ioengine_ops ioengine = {
.name = "io_uring",
.version = FIO_IOOPS_VERSION,
+ .flags = FIO_ASYNCIO_SYNC_TRIM,
.init = fio_ioring_init,
.post_init = fio_ioring_post_init,
.io_u_init = fio_ioring_io_u_init,
@@ -553,8 +652,8 @@ static struct ioengine_ops ioengine = {
.getevents = fio_ioring_getevents,
.event = fio_ioring_event,
.cleanup = fio_ioring_cleanup,
- .open_file = generic_open_file,
- .close_file = generic_close_file,
+ .open_file = fio_ioring_open_file,
+ .close_file = fio_ioring_close_file,
.get_file_size = generic_get_file_size,
.options = options,
.option_struct_size = sizeof(struct ioring_options),
diff --git a/engines/libaio.c b/engines/libaio.c
index 8844ac8b..cd5b89f9 100644
--- a/engines/libaio.c
+++ b/engines/libaio.c
@@ -16,14 +16,6 @@
#include "../optgroup.h"
#include "../lib/memalign.h"
-#ifndef IOCB_FLAG_HIPRI
-#define IOCB_FLAG_HIPRI (1 << 2)
-#endif
-
-#ifndef IOCTX_FLAG_IOPOLL
-#define IOCTX_FLAG_IOPOLL (1 << 0)
-#endif
-
static int fio_libaio_commit(struct thread_data *td);
struct libaio_data {
@@ -66,15 +58,6 @@ static struct fio_option options[] = {
.group = FIO_OPT_G_LIBAIO,
},
{
- .name = "hipri",
- .lname = "High Priority",
- .type = FIO_OPT_STR_SET,
- .off1 = offsetof(struct libaio_options, hipri),
- .help = "Use polled IO completions",
- .category = FIO_OPT_C_ENGINE,
- .group = FIO_OPT_G_LIBAIO,
- },
- {
.name = NULL,
},
};
@@ -91,19 +74,12 @@ static inline void ring_inc(struct libaio_data *ld, unsigned int *val,
static int fio_libaio_prep(struct thread_data fio_unused *td, struct io_u *io_u)
{
struct fio_file *f = io_u->file;
- struct libaio_options *o = td->eo;
- struct iocb *iocb;
-
- iocb = &io_u->iocb;
+ struct iocb *iocb = &io_u->iocb;
if (io_u->ddir == DDIR_READ) {
io_prep_pread(iocb, f->fd, io_u->xfer_buf, io_u->xfer_buflen, io_u->offset);
- if (o->hipri)
- iocb->u.c.flags |= IOCB_FLAG_HIPRI;
} else if (io_u->ddir == DDIR_WRITE) {
io_prep_pwrite(iocb, f->fd, io_u->xfer_buf, io_u->xfer_buflen, io_u->offset);
- if (o->hipri)
- iocb->u.c.flags |= IOCB_FLAG_HIPRI;
} else if (ddir_sync(io_u->ddir))
io_prep_fsync(iocb, f->fd);
@@ -366,42 +342,12 @@ static void fio_libaio_cleanup(struct thread_data *td)
}
}
-static int fio_libaio_old_queue_init(struct libaio_data *ld, unsigned int depth,
- bool hipri)
-{
- if (hipri) {
- log_err("fio: polled aio not available on your platform\n");
- return 1;
- }
-
- return io_queue_init(depth, &ld->aio_ctx);
-}
-
-static int fio_libaio_queue_init(struct libaio_data *ld, unsigned int depth,
- bool hipri)
-{
-#ifdef __NR_sys_io_setup2
- int ret, flags = 0;
-
- if (hipri)
- flags |= IOCTX_FLAG_IOPOLL;
-
- ret = syscall(__NR_sys_io_setup2, depth, flags, NULL, NULL,
- &ld->aio_ctx);
- if (!ret)
- return 0;
- /* fall through to old syscall */
-#endif
- return fio_libaio_old_queue_init(ld, depth, hipri);
-}
-
static int fio_libaio_post_init(struct thread_data *td)
{
struct libaio_data *ld = td->io_ops_data;
- struct libaio_options *o = td->eo;
- int err = 0;
+ int err;
- err = fio_libaio_queue_init(ld, td->o.iodepth, o->hipri);
+ err = io_queue_init(td->o.iodepth, &ld->aio_ctx);
if (err) {
td_verror(td, -err, "io_queue_init");
return 1;
@@ -429,6 +375,7 @@ static int fio_libaio_init(struct thread_data *td)
static struct ioengine_ops ioengine = {
.name = "libaio",
.version = FIO_IOOPS_VERSION,
+ .flags = FIO_ASYNCIO_SYNC_TRIM,
.init = fio_libaio_init,
.post_init = fio_libaio_post_init,
.prep = fio_libaio_prep,
diff --git a/engines/libiscsi.c b/engines/libiscsi.c
new file mode 100644
index 00000000..58667fb2
--- /dev/null
+++ b/engines/libiscsi.c
@@ -0,0 +1,411 @@
+/*
+ * libiscsi engine
+ *
+ * this engine read/write iscsi lun with libiscsi.
+ */
+
+
+#include "../fio.h"
+#include "../optgroup.h"
+
+#include <stdlib.h>
+#include <iscsi/iscsi.h>
+#include <iscsi/scsi-lowlevel.h>
+#include <poll.h>
+
+struct iscsi_lun;
+struct iscsi_info;
+
+struct iscsi_task {
+ struct scsi_task *scsi_task;
+ struct iscsi_lun *iscsi_lun;
+ struct io_u *io_u;
+};
+
+struct iscsi_lun {
+ struct iscsi_info *iscsi_info;
+ struct iscsi_context *iscsi;
+ struct iscsi_url *url;
+ int block_size;
+ uint64_t num_blocks;
+};
+
+struct iscsi_info {
+ struct iscsi_lun **luns;
+ int nr_luns;
+ struct pollfd *pfds;
+ struct iscsi_task **complete_events;
+ int nr_events;
+};
+
+struct iscsi_options {
+ void *pad;
+ char *initiator;
+};
+
+static struct fio_option options[] = {
+ {
+ .name = "initiator",
+ .lname = "initiator",
+ .type = FIO_OPT_STR_STORE,
+ .off1 = offsetof(struct iscsi_options, initiator),
+ .def = "iqn.2019-04.org.fio:fio",
+ .help = "initiator name",
+ .category = FIO_OPT_C_ENGINE,
+ .group = FIO_OPT_G_ISCSI,
+ },
+
+ {
+ .name = NULL,
+ },
+};
+
+static int fio_iscsi_setup_lun(struct iscsi_info *iscsi_info,
+ char *initiator, struct fio_file *f, int i)
+{
+ struct iscsi_lun *iscsi_lun = NULL;
+ struct scsi_task *task = NULL;
+ struct scsi_readcapacity16 *rc16 = NULL;
+ int ret = 0;
+
+ iscsi_lun = malloc(sizeof(struct iscsi_lun));
+ memset(iscsi_lun, 0, sizeof(struct iscsi_lun));
+
+ iscsi_lun->iscsi_info = iscsi_info;
+
+ iscsi_lun->url = iscsi_parse_full_url(NULL, f->file_name);
+ if (iscsi_lun->url == NULL) {
+ log_err("iscsi: failed to parse url: %s\n", f->file_name);
+ ret = EINVAL;
+ goto out;
+ }
+
+ iscsi_lun->iscsi = iscsi_create_context(initiator);
+ if (iscsi_lun->iscsi == NULL) {
+ log_err("iscsi: failed to create iscsi context.\n");
+ ret = 1;
+ goto out;
+ }
+
+ if (iscsi_set_targetname(iscsi_lun->iscsi, iscsi_lun->url->target)) {
+ log_err("iscsi: failed to set target name.\n");
+ ret = EINVAL;
+ goto out;
+ }
+
+ if (iscsi_set_session_type(iscsi_lun->iscsi, ISCSI_SESSION_NORMAL) != 0) {
+ log_err("iscsi: failed to set session type.\n");
+ ret = EINVAL;
+ goto out;
+ }
+
+ if (iscsi_set_header_digest(iscsi_lun->iscsi,
+ ISCSI_HEADER_DIGEST_NONE_CRC32C) != 0) {
+ log_err("iscsi: failed to set header digest.\n");
+ ret = EINVAL;
+ goto out;
+ }
+
+ if (iscsi_full_connect_sync(iscsi_lun->iscsi,
+ iscsi_lun->url->portal,
+ iscsi_lun->url->lun)) {
+ log_err("sicsi: failed to connect to LUN : %s\n",
+ iscsi_get_error(iscsi_lun->iscsi));
+ ret = EINVAL;
+ goto out;
+ }
+
+ task = iscsi_readcapacity16_sync(iscsi_lun->iscsi, iscsi_lun->url->lun);
+ if (task == NULL || task->status != SCSI_STATUS_GOOD) {
+ log_err("iscsi: failed to send readcapacity command: %s\n",
+ iscsi_get_error(iscsi_lun->iscsi));
+ ret = EINVAL;
+ goto out;
+ }
+
+ rc16 = scsi_datain_unmarshall(task);
+ if (rc16 == NULL) {
+ log_err("iscsi: failed to unmarshal readcapacity16 data.\n");
+ ret = EINVAL;
+ goto out;
+ }
+
+ iscsi_lun->block_size = rc16->block_length;
+ iscsi_lun->num_blocks = rc16->returned_lba + 1;
+
+ scsi_free_scsi_task(task);
+ task = NULL;
+
+ f->real_file_size = iscsi_lun->num_blocks * iscsi_lun->block_size;
+ f->engine_data = iscsi_lun;
+
+ iscsi_info->luns[i] = iscsi_lun;
+ iscsi_info->pfds[i].fd = iscsi_get_fd(iscsi_lun->iscsi);
+
+out:
+ if (task) {
+ scsi_free_scsi_task(task);
+ }
+
+ if (ret && iscsi_lun) {
+ if (iscsi_lun->iscsi != NULL) {
+ if (iscsi_is_logged_in(iscsi_lun->iscsi)) {
+ iscsi_logout_sync(iscsi_lun->iscsi);
+ }
+ iscsi_destroy_context(iscsi_lun->iscsi);
+ }
+ free(iscsi_lun);
+ }
+
+ return ret;
+}
+
+static int fio_iscsi_setup(struct thread_data *td)
+{
+ struct iscsi_options *options = td->eo;
+ struct iscsi_info *iscsi_info = NULL;
+ int ret = 0;
+ struct fio_file *f;
+ int i;
+
+ iscsi_info = malloc(sizeof(struct iscsi_info));
+ iscsi_info->nr_luns = td->o.nr_files;
+ iscsi_info->luns = calloc(iscsi_info->nr_luns, sizeof(struct iscsi_lun*));
+ iscsi_info->pfds = calloc(iscsi_info->nr_luns, sizeof(struct pollfd));
+
+ iscsi_info->nr_events = 0;
+ iscsi_info->complete_events = calloc(td->o.iodepth, sizeof(struct iscsi_task*));
+
+ td->io_ops_data = iscsi_info;
+
+ for_each_file(td, f, i) {
+ ret = fio_iscsi_setup_lun(iscsi_info, options->initiator, f, i);
+ if (ret < 0) break;
+ }
+
+ return ret;
+}
+
+static int fio_iscsi_init(struct thread_data *td) {
+ return 0;
+}
+
+static void fio_iscsi_cleanup_lun(struct iscsi_lun *iscsi_lun) {
+ if (iscsi_lun->iscsi != NULL) {
+ if (iscsi_is_logged_in(iscsi_lun->iscsi)) {
+ iscsi_logout_sync(iscsi_lun->iscsi);
+ }
+ iscsi_destroy_context(iscsi_lun->iscsi);
+ }
+ free(iscsi_lun);
+}
+
+static void fio_iscsi_cleanup(struct thread_data *td)
+{
+ struct iscsi_info *iscsi_info = td->io_ops_data;
+
+ for (int i = 0; i < iscsi_info->nr_luns; i++) {
+ if (iscsi_info->luns[i]) {
+ fio_iscsi_cleanup_lun(iscsi_info->luns[i]);
+ iscsi_info->luns[i] = NULL;
+ }
+ }
+
+ free(iscsi_info->luns);
+ free(iscsi_info->pfds);
+ free(iscsi_info->complete_events);
+ free(iscsi_info);
+}
+
+static int fio_iscsi_prep(struct thread_data *td, struct io_u *io_u)
+{
+ return 0;
+}
+
+static int fio_iscsi_open_file(struct thread_data *td, struct fio_file *f)
+{
+ return 0;
+}
+
+static int fio_iscsi_close_file(struct thread_data *td, struct fio_file *f)
+{
+ return 0;
+}
+
+static void iscsi_cb(struct iscsi_context *iscsi, int status,
+ void *command_data, void *private_data)
+{
+ struct iscsi_task *iscsi_task = (struct iscsi_task*)private_data;
+ struct iscsi_lun *iscsi_lun = iscsi_task->iscsi_lun;
+ struct iscsi_info *iscsi_info = iscsi_lun->iscsi_info;
+ struct io_u *io_u = iscsi_task->io_u;
+
+ if (status == SCSI_STATUS_GOOD) {
+ io_u->error = 0;
+ } else {
+ log_err("iscsi: request failed with error %s.\n",
+ iscsi_get_error(iscsi_lun->iscsi));
+
+ io_u->error = 1;
+ io_u->resid = io_u->xfer_buflen;
+ }
+
+ iscsi_info->complete_events[iscsi_info->nr_events] = iscsi_task;
+ iscsi_info->nr_events++;
+}
+
+static enum fio_q_status fio_iscsi_queue(struct thread_data *td,
+ struct io_u *io_u)
+{
+ struct iscsi_lun *iscsi_lun = io_u->file->engine_data;
+ struct scsi_task *scsi_task = NULL;
+ struct iscsi_task *iscsi_task = malloc(sizeof(struct iscsi_task));
+ int ret = -1;
+
+ if (io_u->ddir == DDIR_READ || io_u->ddir == DDIR_WRITE) {
+ if (io_u->offset % iscsi_lun->block_size != 0) {
+ log_err("iscsi: offset is not align to block size.\n");
+ ret = -1;
+ goto out;
+ }
+
+ if (io_u->xfer_buflen % iscsi_lun->block_size != 0) {
+ log_err("iscsi: buflen is not align to block size.\n");
+ ret = -1;
+ goto out;
+ }
+ }
+
+ if (io_u->ddir == DDIR_READ) {
+ scsi_task = scsi_cdb_read16(io_u->offset / iscsi_lun->block_size,
+ io_u->xfer_buflen,
+ iscsi_lun->block_size,
+ 0, 0, 0, 0, 0);
+ ret = scsi_task_add_data_in_buffer(scsi_task, io_u->xfer_buflen,
+ io_u->xfer_buf);
+ if (ret < 0) {
+ log_err("iscsi: failed to add data in buffer.\n");
+ goto out;
+ }
+ } else if (io_u->ddir == DDIR_WRITE) {
+ scsi_task = scsi_cdb_write16(io_u->offset / iscsi_lun->block_size,
+ io_u->xfer_buflen,
+ iscsi_lun->block_size,
+ 0, 0, 0, 0, 0);
+ ret = scsi_task_add_data_out_buffer(scsi_task, io_u->xfer_buflen,
+ io_u->xfer_buf);
+ if (ret < 0) {
+ log_err("iscsi: failed to add data out buffer.\n");
+ goto out;
+ }
+ } else if (ddir_sync(io_u->ddir)) {
+ scsi_task = scsi_cdb_synchronizecache16(
+ 0, iscsi_lun->num_blocks * iscsi_lun->block_size, 0, 0);
+ } else {
+ log_err("iscsi: invalid I/O operation: %d\n", io_u->ddir);
+ ret = EINVAL;
+ goto out;
+ }
+
+ iscsi_task->scsi_task = scsi_task;
+ iscsi_task->iscsi_lun = iscsi_lun;
+ iscsi_task->io_u = io_u;
+
+ ret = iscsi_scsi_command_async(iscsi_lun->iscsi, iscsi_lun->url->lun,
+ scsi_task, iscsi_cb, NULL, iscsi_task);
+ if (ret < 0) {
+ log_err("iscsi: failed to send scsi command.\n");
+ goto out;
+ }
+
+ return FIO_Q_QUEUED;
+
+out:
+ if (iscsi_task) {
+ free(iscsi_task);
+ }
+
+ if (scsi_task) {
+ scsi_free_scsi_task(scsi_task);
+ }
+
+ if (ret) {
+ io_u->error = ret;
+ }
+ return FIO_Q_COMPLETED;
+}
+
+static int fio_iscsi_getevents(struct thread_data *td, unsigned int min,
+ unsigned int max, const struct timespec *t)
+{
+ struct iscsi_info *iscsi_info = td->io_ops_data;
+ int ret = 0;
+
+ iscsi_info->nr_events = 0;
+
+ while (iscsi_info->nr_events < min) {
+ for (int i = 0; i < iscsi_info->nr_luns; i++) {
+ int events = iscsi_which_events(iscsi_info->luns[i]->iscsi);
+ iscsi_info->pfds[i].events = events;
+ }
+
+ ret = poll(iscsi_info->pfds, iscsi_info->nr_luns, -1);
+ if (ret < 0) {
+ if (errno == EINTR || errno == EAGAIN) {
+ continue;
+ }
+ log_err("iscsi: failed to poll events: %s.\n",
+ strerror(errno));
+ break;
+ }
+
+ for (int i = 0; i < iscsi_info->nr_luns; i++) {
+ ret = iscsi_service(iscsi_info->luns[i]->iscsi,
+ iscsi_info->pfds[i].revents);
+ assert(ret >= 0);
+ }
+ }
+
+ return ret < 0 ? ret : iscsi_info->nr_events;
+}
+
+static struct io_u *fio_iscsi_event(struct thread_data *td, int event)
+{
+ struct iscsi_info *iscsi_info = (struct iscsi_info*)td->io_ops_data;
+ struct iscsi_task *iscsi_task = iscsi_info->complete_events[event];
+ struct io_u *io_u = iscsi_task->io_u;
+
+ iscsi_info->complete_events[event] = NULL;
+
+ scsi_free_scsi_task(iscsi_task->scsi_task);
+ free(iscsi_task);
+
+ return io_u;
+}
+
+static struct ioengine_ops ioengine_iscsi = {
+ .name = "libiscsi",
+ .version = FIO_IOOPS_VERSION,
+ .flags = FIO_SYNCIO | FIO_DISKLESSIO | FIO_NODISKUTIL,
+ .setup = fio_iscsi_setup,
+ .init = fio_iscsi_init,
+ .prep = fio_iscsi_prep,
+ .queue = fio_iscsi_queue,
+ .getevents = fio_iscsi_getevents,
+ .event = fio_iscsi_event,
+ .cleanup = fio_iscsi_cleanup,
+ .open_file = fio_iscsi_open_file,
+ .close_file = fio_iscsi_close_file,
+ .option_struct_size = sizeof(struct iscsi_options),
+ .options = options,
+};
+
+static void fio_init fio_iscsi_register(void)
+{
+ register_ioengine(&ioengine_iscsi);
+}
+
+static void fio_exit fio_iscsi_unregister(void)
+{
+ unregister_ioengine(&ioengine_iscsi);
+}
diff --git a/engines/mmap.c b/engines/mmap.c
index 308b4665..55ba1ab3 100644
--- a/engines/mmap.c
+++ b/engines/mmap.c
@@ -11,6 +11,7 @@
#include <sys/mman.h>
#include "../fio.h"
+#include "../optgroup.h"
#include "../verify.h"
/*
@@ -26,11 +27,40 @@ struct fio_mmap_data {
off_t mmap_off;
};
+#ifdef CONFIG_HAVE_THP
+struct mmap_options {
+ void *pad;
+ unsigned int thp;
+};
+
+static struct fio_option options[] = {
+ {
+ .name = "thp",
+ .lname = "Transparent Huge Pages",
+ .type = FIO_OPT_INT,
+ .off1 = offsetof(struct mmap_options, thp),
+ .help = "Memory Advise Huge Page",
+ .category = FIO_OPT_C_ENGINE,
+ .group = FIO_OPT_G_MMAP,
+ },
+ {
+ .name = NULL,
+ },
+};
+#endif
+
static bool fio_madvise_file(struct thread_data *td, struct fio_file *f,
size_t length)
{
struct fio_mmap_data *fmd = FILE_ENG_DATA(f);
+#ifdef CONFIG_HAVE_THP
+ struct mmap_options *o = td->eo;
+
+ /* Ignore errors on this optional advisory */
+ if (o->thp)
+ madvise(fmd->mmap_ptr, length, MADV_HUGEPAGE);
+#endif
if (!td->o.fadvise_hint)
return true;
@@ -50,11 +80,27 @@ static bool fio_madvise_file(struct thread_data *td, struct fio_file *f,
return true;
}
+#ifdef CONFIG_HAVE_THP
+static int fio_mmap_get_shared(struct thread_data *td)
+{
+ struct mmap_options *o = td->eo;
+
+ if (o->thp)
+ return MAP_PRIVATE;
+ return MAP_SHARED;
+}
+#else
+static int fio_mmap_get_shared(struct thread_data *td)
+{
+ return MAP_SHARED;
+}
+#endif
+
static int fio_mmap_file(struct thread_data *td, struct fio_file *f,
size_t length, off_t off)
{
struct fio_mmap_data *fmd = FILE_ENG_DATA(f);
- int flags = 0;
+ int flags = 0, shared = fio_mmap_get_shared(td);
if (td_rw(td) && !td->o.verify_only)
flags = PROT_READ | PROT_WRITE;
@@ -66,7 +112,7 @@ static int fio_mmap_file(struct thread_data *td, struct fio_file *f,
} else
flags = PROT_READ;
- fmd->mmap_ptr = mmap(NULL, length, flags, MAP_SHARED, f->fd, off);
+ fmd->mmap_ptr = mmap(NULL, length, flags, shared, f->fd, off);
if (fmd->mmap_ptr == MAP_FAILED) {
fmd->mmap_ptr = NULL;
td_verror(td, errno, "mmap");
@@ -275,6 +321,10 @@ static struct ioengine_ops ioengine = {
.close_file = fio_mmapio_close_file,
.get_file_size = generic_get_file_size,
.flags = FIO_SYNCIO | FIO_NOEXTEND,
+#ifdef CONFIG_HAVE_THP
+ .options = options,
+ .option_struct_size = sizeof(struct mmap_options),
+#endif
};
static void fio_init fio_mmapio_register(void)
diff --git a/engines/nbd.c b/engines/nbd.c
new file mode 100644
index 00000000..53237929
--- /dev/null
+++ b/engines/nbd.c
@@ -0,0 +1,359 @@
+/*
+ * NBD engine
+ *
+ * IO engine that talks to an NBD server.
+ *
+ * Copyright (C) 2019 Red Hat Inc.
+ * Written by Richard W.M. Jones <rjones@redhat.com>
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <errno.h>
+
+#include <libnbd.h>
+
+#include "../fio.h"
+#include "../optgroup.h"
+
+/* Actually this differs across servers, but for nbdkit ... */
+#define NBD_MAX_REQUEST_SIZE (64 * 1024 * 1024)
+
+/* Storage for the NBD handle. */
+struct nbd_data {
+ struct nbd_handle *nbd;
+ int debug;
+
+ /* The list of completed io_u structs. */
+ struct io_u **completed;
+ size_t nr_completed;
+};
+
+/* Options. */
+struct nbd_options {
+ void *padding;
+ char *uri;
+};
+
+static struct fio_option options[] = {
+ {
+ .name = "uri",
+ .lname = "NBD URI",
+ .help = "Name of NBD URI",
+ .category = FIO_OPT_C_ENGINE,
+ .group = FIO_OPT_G_NBD,
+ .type = FIO_OPT_STR_STORE,
+ .off1 = offsetof(struct nbd_options, uri),
+ },
+ {
+ .name = NULL,
+ },
+};
+
+/* Alocates nbd_data. */
+static int nbd_setup(struct thread_data *td)
+{
+ struct nbd_data *nbd_data;
+ struct nbd_options *o = td->eo;
+ struct fio_file *f;
+ int r;
+ int64_t size;
+
+ nbd_data = calloc(1, sizeof(*nbd_data));
+ if (!nbd_data) {
+ td_verror(td, errno, "calloc");
+ return 1;
+ }
+ td->io_ops_data = nbd_data;
+
+ /* Pretend to deal with files. See engines/rbd.c */
+ if (!td->files_index) {
+ add_file(td, "nbd", 0, 0);
+ td->o.nr_files = td->o.nr_files ? : 1;
+ td->o.open_files++;
+ }
+ f = td->files[0];
+
+ nbd_data->nbd = nbd_create();
+ if (!nbd_data->nbd) {
+ log_err("fio: nbd_create: %s\n", nbd_get_error());
+ return 1;
+ }
+
+ /* Get the debug flag which can be set through LIBNBD_DEBUG=1. */
+ nbd_data->debug = nbd_get_debug(nbd_data->nbd);
+
+ /* Connect synchronously here so we can check for the size and
+ * in future other properties of the server.
+ */
+ if (!o->uri) {
+ log_err("fio: nbd: uri parameter was not specified\n");
+ return 1;
+ }
+ r = nbd_connect_uri(nbd_data->nbd, o->uri);
+ if (r == -1) {
+ log_err("fio: nbd_connect_uri: %s\n", nbd_get_error());
+ return 1;
+ }
+ size = nbd_get_size(nbd_data->nbd);
+ if (size == -1) {
+ log_err("fio: nbd_get_size: %s\n", nbd_get_error());
+ return 1;
+ }
+
+ f->real_file_size = size;
+
+ nbd_close (nbd_data->nbd);
+ nbd_data->nbd = NULL;
+
+ return 0;
+}
+
+/* Closes socket and frees nbd_data -- the opposite of nbd_setup. */
+static void nbd_cleanup(struct thread_data *td)
+{
+ struct nbd_data *nbd_data = td->io_ops_data;
+
+ if (nbd_data) {
+ if (nbd_data->nbd)
+ nbd_close(nbd_data->nbd);
+ free(nbd_data);
+ }
+}
+
+/* Connect to the server from each thread. */
+static int nbd_init(struct thread_data *td)
+{
+ struct nbd_options *o = td->eo;
+ struct nbd_data *nbd_data = td->io_ops_data;
+ int r;
+
+ if (!o->uri) {
+ log_err("fio: nbd: uri parameter was not specified\n");
+ return 1;
+ }
+
+ nbd_data->nbd = nbd_create();
+ if (!nbd_data->nbd) {
+ log_err("fio: nbd_create: %s\n", nbd_get_error());
+ return 1;
+ }
+ /* This is actually a synchronous connect and handshake. */
+ r = nbd_connect_uri(nbd_data->nbd, o->uri);
+ if (r == -1) {
+ log_err("fio: nbd_connect_uri: %s\n", nbd_get_error());
+ return 1;
+ }
+
+ log_info("fio: connected to NBD server\n");
+ return 0;
+}
+
+/* A command in flight has been completed. */
+static int cmd_completed (void *vp, int *error)
+{
+ struct io_u *io_u;
+ struct nbd_data *nbd_data;
+ struct io_u **completed;
+
+ io_u = vp;
+ nbd_data = io_u->engine_data;
+
+ if (nbd_data->debug)
+ log_info("fio: nbd: command completed\n");
+
+ if (*error != 0)
+ io_u->error = *error;
+ else
+ io_u->error = 0;
+
+ /* Add this completion to the list so it can be picked up
+ * later by ->event.
+ */
+ completed = realloc(nbd_data->completed,
+ sizeof(struct io_u *) *
+ (nbd_data->nr_completed+1));
+ if (completed == NULL) {
+ io_u->error = errno;
+ return 0;
+ }
+
+ nbd_data->completed = completed;
+ nbd_data->completed[nbd_data->nr_completed] = io_u;
+ nbd_data->nr_completed++;
+
+ return 0;
+}
+
+/* Begin read or write request. */
+static enum fio_q_status nbd_queue(struct thread_data *td,
+ struct io_u *io_u)
+{
+ struct nbd_data *nbd_data = td->io_ops_data;
+ nbd_completion_callback completion = { .callback = cmd_completed,
+ .user_data = io_u };
+ int r;
+
+ fio_ro_check(td, io_u);
+
+ io_u->engine_data = nbd_data;
+
+ if (io_u->ddir == DDIR_WRITE || io_u->ddir == DDIR_READ)
+ assert(io_u->xfer_buflen <= NBD_MAX_REQUEST_SIZE);
+
+ switch (io_u->ddir) {
+ case DDIR_READ:
+ r = nbd_aio_pread(nbd_data->nbd,
+ io_u->xfer_buf, io_u->xfer_buflen,
+ io_u->offset, completion, 0);
+ break;
+ case DDIR_WRITE:
+ r = nbd_aio_pwrite(nbd_data->nbd,
+ io_u->xfer_buf, io_u->xfer_buflen,
+ io_u->offset, completion, 0);
+ break;
+ case DDIR_TRIM:
+ r = nbd_aio_trim(nbd_data->nbd, io_u->xfer_buflen,
+ io_u->offset, completion, 0);
+ break;
+ case DDIR_SYNC:
+ /* XXX We could probably also handle
+ * DDIR_SYNC_FILE_RANGE with a bit of effort.
+ */
+ r = nbd_aio_flush(nbd_data->nbd, completion, 0);
+ break;
+ default:
+ io_u->error = EINVAL;
+ return FIO_Q_COMPLETED;
+ }
+
+ if (r == -1) {
+ /* errno is optional information on libnbd error path;
+ * if it's 0, set it to a default value
+ */
+ io_u->error = nbd_get_errno();
+ if (io_u->error == 0)
+ io_u->error = EIO;
+ return FIO_Q_COMPLETED;
+ }
+
+ if (nbd_data->debug)
+ log_info("fio: nbd: command issued\n");
+ io_u->error = 0;
+ return FIO_Q_QUEUED;
+}
+
+static unsigned retire_commands(struct nbd_handle *nbd)
+{
+ int64_t cookie;
+ unsigned r = 0;
+
+ while ((cookie = nbd_aio_peek_command_completed(nbd)) > 0) {
+ /* Ignore the return value. cmd_completed has already
+ * checked for an error and set io_u->error. We only
+ * have to call this to retire the command.
+ */
+ nbd_aio_command_completed(nbd, cookie);
+ r++;
+ }
+
+ if (nbd_get_debug(nbd))
+ log_info("fio: nbd: %u commands retired\n", r);
+ return r;
+}
+
+static int nbd_getevents(struct thread_data *td, unsigned int min,
+ unsigned int max, const struct timespec *t)
+{
+ struct nbd_data *nbd_data = td->io_ops_data;
+ int r;
+ unsigned events = 0;
+ int timeout;
+
+ /* XXX This handling of timeout is wrong because it will wait
+ * for up to loop iterations * timeout.
+ */
+ timeout = !t ? -1 : t->tv_sec * 1000 + t->tv_nsec / 1000000;
+
+ while (events < min) {
+ r = nbd_poll(nbd_data->nbd, timeout);
+ if (r == -1) {
+ /* error in poll */
+ log_err("fio: nbd_poll: %s\n", nbd_get_error());
+ return -1;
+ }
+ else {
+ /* poll made progress */
+ events += retire_commands(nbd_data->nbd);
+ }
+ }
+
+ return events;
+}
+
+static struct io_u *nbd_event(struct thread_data *td, int event)
+{
+ struct nbd_data *nbd_data = td->io_ops_data;
+
+ if (nbd_data->nr_completed == 0)
+ return NULL;
+
+ /* XXX We ignore the event number and assume fio calls us
+ * exactly once for [0..nr_events-1].
+ */
+ nbd_data->nr_completed--;
+ return nbd_data->completed[nbd_data->nr_completed];
+}
+
+static int nbd_io_u_init(struct thread_data *td, struct io_u *io_u)
+{
+ io_u->engine_data = NULL;
+ return 0;
+}
+
+static void nbd_io_u_free(struct thread_data *td, struct io_u *io_u)
+{
+ /* Nothing needs to be done. */
+}
+
+static int nbd_open_file(struct thread_data *td, struct fio_file *f)
+{
+ return 0;
+}
+
+static int nbd_invalidate(struct thread_data *td, struct fio_file *f)
+{
+ return 0;
+}
+
+static struct ioengine_ops ioengine = {
+ .name = "nbd",
+ .version = FIO_IOOPS_VERSION,
+ .options = options,
+ .option_struct_size = sizeof(struct nbd_options),
+ .flags = FIO_DISKLESSIO | FIO_NOEXTEND,
+
+ .setup = nbd_setup,
+ .init = nbd_init,
+ .cleanup = nbd_cleanup,
+ .queue = nbd_queue,
+ .getevents = nbd_getevents,
+ .event = nbd_event,
+ .io_u_init = nbd_io_u_init,
+ .io_u_free = nbd_io_u_free,
+
+ .open_file = nbd_open_file,
+ .invalidate = nbd_invalidate,
+};
+
+static void fio_init fio_nbd_register(void)
+{
+ register_ioengine(&ioengine);
+}
+
+static void fio_exit fio_nbd_unregister(void)
+{
+ unregister_ioengine(&ioengine);
+}
diff --git a/engines/net.c b/engines/net.c
index ca6fb344..91f25774 100644
--- a/engines/net.c
+++ b/engines/net.c
@@ -1105,8 +1105,7 @@ static int fio_netio_setup_connect_unix(struct thread_data *td,
struct sockaddr_un *soun = &nd->addr_un;
soun->sun_family = AF_UNIX;
- memset(soun->sun_path, 0, sizeof(soun->sun_path));
- strncpy(soun->sun_path, path, sizeof(soun->sun_path) - 1);
+ snprintf(soun->sun_path, sizeof(soun->sun_path), "%s", path);
return 0;
}
@@ -1135,9 +1134,8 @@ static int fio_netio_setup_listen_unix(struct thread_data *td, const char *path)
mode = umask(000);
- memset(addr, 0, sizeof(*addr));
addr->sun_family = AF_UNIX;
- strncpy(addr->sun_path, path, sizeof(addr->sun_path) - 1);
+ snprintf(addr->sun_path, sizeof(addr->sun_path), "%s", path);
unlink(path);
len = sizeof(addr->sun_family) + strlen(path) + 1;
diff --git a/engines/posixaio.c b/engines/posixaio.c
index 4ac01957..82c6aa65 100644
--- a/engines/posixaio.c
+++ b/engines/posixaio.c
@@ -243,6 +243,7 @@ static int fio_posixaio_init(struct thread_data *td)
static struct ioengine_ops ioengine = {
.name = "posixaio",
.version = FIO_IOOPS_VERSION,
+ .flags = FIO_ASYNCIO_SYNC_TRIM,
.init = fio_posixaio_init,
.prep = fio_posixaio_prep,
.queue = fio_posixaio_queue,
diff --git a/engines/rbd.c b/engines/rbd.c
index 081b4a04..7d4d3faf 100644
--- a/engines/rbd.c
+++ b/engines/rbd.c
@@ -316,12 +316,14 @@ static inline int fri_check_complete(struct rbd_data *rbd, struct io_u *io_u,
return 0;
}
+#ifndef CONFIG_RBD_POLL
static inline int rbd_io_u_seen(struct io_u *io_u)
{
struct fio_rbd_iou *fri = io_u->engine_data;
return fri->io_seen;
}
+#endif
static void rbd_io_u_wait_complete(struct io_u *io_u)
{
diff --git a/engines/sg.c b/engines/sg.c
index c46b9aba..a1a6de4c 100644
--- a/engines/sg.c
+++ b/engines/sg.c
@@ -1181,8 +1181,8 @@ static char *fio_sgio_errdetails(struct io_u *io_u)
}
if (!(hdr->info & SG_INFO_CHECK) && !strlen(msg))
- strncpy(msg, "SG Driver did not report a Host, Driver or Device check",
- MAXERRDETAIL - 1);
+ snprintf(msg, MAXERRDETAIL, "%s",
+ "SG Driver did not report a Host, Driver or Device check");
return msg;
}
diff --git a/engines/splice.c b/engines/splice.c
index feb764fe..6fc36bb6 100644
--- a/engines/splice.c
+++ b/engines/splice.c
@@ -278,13 +278,6 @@ static int fio_spliceio_init(struct thread_data *td)
*/
sd->vmsplice_to_user_map = 1;
- /*
- * And if vmsplice_to_user works, we definitely need aligned
- * buffers. Just set ->odirect to force that.
- */
- if (td_read(td))
- td->o.mem_align = 1;
-
td->io_ops_data = sd;
return 0;
}
diff --git a/eta.c b/eta.c
index b69dd194..9950ef30 100644
--- a/eta.c
+++ b/eta.c
@@ -733,6 +733,10 @@ void print_thread_status(void)
void print_status_init(int thr_number)
{
+ struct jobs_eta_packed jep;
+
+ compiletime_assert(sizeof(struct jobs_eta) == sizeof(jep), "jobs_eta");
+
DRD_IGNORE_VAR(__run_str);
__run_str[thr_number] = 'P';
update_condensed_str(__run_str, run_str);
diff --git a/examples/libiscsi.fio b/examples/libiscsi.fio
new file mode 100644
index 00000000..565604dd
--- /dev/null
+++ b/examples/libiscsi.fio
@@ -0,0 +1,3 @@
+[iscsi]
+ioengine=libiscsi
+filename=iscsi\://127.0.0.1/iqn.2016-02.com.fio\:system\:fio/1
diff --git a/examples/nbd.fio b/examples/nbd.fio
new file mode 100644
index 00000000..6900ebe7
--- /dev/null
+++ b/examples/nbd.fio
@@ -0,0 +1,35 @@
+# To use fio to test nbdkit:
+#
+# nbdkit -U - memory size=256M --run 'export unixsocket; fio examples/nbd.fio'
+#
+# To use fio to test qemu-nbd:
+#
+# rm -f /tmp/disk.img /tmp/socket
+# truncate -s 256M /tmp/disk.img
+# export unixsocket=/tmp/socket
+# qemu-nbd -t -k $unixsocket -f raw /tmp/disk.img &
+# fio examples/nbd.fio
+# killall qemu-nbd
+
+[global]
+ioengine=nbd
+uri=nbd+unix:///?socket=${unixsocket}
+# Starting from nbdkit 1.14 the following will work:
+#uri=${uri}
+rw=randrw
+time_based
+runtime=60
+group_reporting
+iodepth=64
+
+[job0]
+offset=0
+
+[job1]
+offset=64m
+
+[job2]
+offset=128m
+
+[job3]
+offset=192m
diff --git a/exp/expression-parser.y b/exp/expression-parser.y
index 04a6e07a..8619025c 100644
--- a/exp/expression-parser.y
+++ b/exp/expression-parser.y
@@ -204,9 +204,9 @@ static void setup_to_parse_string(const char *string)
{
unsigned int len;
- len = strlen(string);
- if (len > sizeof(lexer_input_buffer) - 3)
- len = sizeof(lexer_input_buffer) - 3;
+ len = sizeof(lexer_input_buffer) - 3;
+ if (len > strlen(string))
+ len = strlen(string);
strncpy(lexer_input_buffer, string, len);
lexer_input_buffer[len] = '\0';
diff --git a/filesetup.c b/filesetup.c
index aa1a3945..a439b6d6 100644
--- a/filesetup.c
+++ b/filesetup.c
@@ -805,8 +805,7 @@ static unsigned long long get_fs_free_counts(struct thread_data *td)
} else if (f->filetype != FIO_TYPE_FILE)
continue;
- buf[255] = '\0';
- strncpy(buf, f->file_name, 255);
+ snprintf(buf, ARRAY_SIZE(buf), "%s", f->file_name);
if (stat(buf, &sb) < 0) {
if (errno != ENOENT)
@@ -829,7 +828,7 @@ static unsigned long long get_fs_free_counts(struct thread_data *td)
continue;
fm = calloc(1, sizeof(*fm));
- strncpy(fm->__base, buf, sizeof(fm->__base) - 1);
+ snprintf(fm->__base, ARRAY_SIZE(fm->__base), "%s", buf);
fm->base = basename(fm->__base);
fm->key = sb.st_dev;
flist_add(&fm->list, &list);
@@ -853,16 +852,37 @@ static unsigned long long get_fs_free_counts(struct thread_data *td)
uint64_t get_start_offset(struct thread_data *td, struct fio_file *f)
{
+ bool align = false;
struct thread_options *o = &td->o;
unsigned long long align_bs;
unsigned long long offset;
+ unsigned long long increment;
if (o->file_append && f->filetype == FIO_TYPE_FILE)
return f->real_file_size;
+ if (o->offset_increment_percent) {
+ assert(!o->offset_increment);
+ increment = o->offset_increment_percent * f->real_file_size / 100;
+ align = true;
+ } else
+ increment = o->offset_increment;
+
if (o->start_offset_percent > 0) {
+ /* calculate the raw offset */
+ offset = (f->real_file_size * o->start_offset_percent / 100) +
+ (td->subjob_number * increment);
+
+ align = true;
+ } else {
+ /* start_offset_percent not set */
+ offset = o->start_offset +
+ td->subjob_number * increment;
+ }
+
+ if (align) {
/*
- * if offset_align is provided, set initial offset
+ * if offset_align is provided, use it
*/
if (fio_option_is_set(o, start_offset_align)) {
align_bs = o->start_offset_align;
@@ -871,23 +891,50 @@ uint64_t get_start_offset(struct thread_data *td, struct fio_file *f)
align_bs = td_min_bs(td);
}
- /* calculate the raw offset */
- offset = (f->real_file_size * o->start_offset_percent / 100) +
- (td->subjob_number * o->offset_increment);
-
/*
* block align the offset at the next available boundary at
* ceiling(offset / align_bs) * align_bs
*/
offset = (offset / align_bs + (offset % align_bs != 0)) * align_bs;
+ }
+
+ return offset;
+}
+
+static bool create_work_dirs(struct thread_data *td, const char *fname)
+{
+ char path[PATH_MAX];
+ char *start, *end;
+ if (td->o.directory) {
+ snprintf(path, PATH_MAX, "%s%c%s", td->o.directory,
+ FIO_OS_PATH_SEPARATOR, fname);
+ start = strstr(path, fname);
} else {
- /* start_offset_percent not set */
- offset = o->start_offset +
- td->subjob_number * o->offset_increment;
+ snprintf(path, PATH_MAX, "%s", fname);
+ start = path;
}
- return offset;
+ end = start;
+ while ((end = strchr(end, FIO_OS_PATH_SEPARATOR)) != NULL) {
+ if (end == start)
+ break;
+ *end = '\0';
+ errno = 0;
+#ifdef CONFIG_HAVE_MKDIR_TWO
+ if (mkdir(path, 0600) && errno != EEXIST) {
+#else
+ if (mkdir(path) && errno != EEXIST) {
+#endif
+ log_err("fio: failed to create dir (%s): %d\n",
+ start, errno);
+ return false;
+ }
+ *end = FIO_OS_PATH_SEPARATOR;
+ end++;
+ }
+ td->flags |= TD_F_DIRS_CREATED;
+ return true;
}
/*
@@ -908,6 +955,14 @@ int setup_files(struct thread_data *td)
old_state = td_bump_runstate(td, TD_SETTING_UP);
+ for_each_file(td, f, i) {
+ if (!td_ioengine_flagged(td, FIO_DISKLESSIO) &&
+ strchr(f->file_name, FIO_OS_PATH_SEPARATOR) &&
+ !(td->flags & TD_F_DIRS_CREATED) &&
+ !create_work_dirs(td, f->file_name))
+ goto err_out;
+ }
+
/*
* Find out physical size of files or devices for this thread,
* before we determine I/O size and range of our targets.
@@ -1049,13 +1104,15 @@ int setup_files(struct thread_data *td)
}
if (f->filetype == FIO_TYPE_FILE &&
- (f->io_size + f->file_offset) > f->real_file_size &&
- !td_ioengine_flagged(td, FIO_DISKLESSIO)) {
- if (!o->create_on_open) {
+ (f->io_size + f->file_offset) > f->real_file_size) {
+ if (!td_ioengine_flagged(td, FIO_DISKLESSIO) &&
+ !o->create_on_open) {
need_extend++;
extend_size += (f->io_size + f->file_offset);
fio_file_set_extend(f);
- } else
+ } else if (!td_ioengine_flagged(td, FIO_DISKLESSIO) ||
+ (td_ioengine_flagged(td, FIO_DISKLESSIO) &&
+ td_ioengine_flagged(td, FIO_FAKEIO)))
f->real_file_size = f->io_size + f->file_offset;
}
}
@@ -1287,7 +1344,7 @@ bool init_random_map(struct thread_data *td)
return false;
if (td->o.random_generator == FIO_RAND_GEN_LFSR) {
- unsigned long seed;
+ uint64_t seed;
seed = td->rand_seeds[FIO_RAND_BLOCK_OFF];
@@ -1517,42 +1574,6 @@ bool exists_and_not_regfile(const char *filename)
return true;
}
-static bool create_work_dirs(struct thread_data *td, const char *fname)
-{
- char path[PATH_MAX];
- char *start, *end;
-
- if (td->o.directory) {
- snprintf(path, PATH_MAX, "%s%c%s", td->o.directory,
- FIO_OS_PATH_SEPARATOR, fname);
- start = strstr(path, fname);
- } else {
- snprintf(path, PATH_MAX, "%s", fname);
- start = path;
- }
-
- end = start;
- while ((end = strchr(end, FIO_OS_PATH_SEPARATOR)) != NULL) {
- if (end == start)
- break;
- *end = '\0';
- errno = 0;
-#ifdef CONFIG_HAVE_MKDIR_TWO
- if (mkdir(path, 0600) && errno != EEXIST) {
-#else
- if (mkdir(path) && errno != EEXIST) {
-#endif
- log_err("fio: failed to create dir (%s): %d\n",
- start, errno);
- return false;
- }
- *end = FIO_OS_PATH_SEPARATOR;
- end++;
- }
- td->flags |= TD_F_DIRS_CREATED;
- return true;
-}
-
int add_file(struct thread_data *td, const char *fname, int numjob, int inc)
{
int cur_files = td->files_index;
@@ -1568,11 +1589,6 @@ int add_file(struct thread_data *td, const char *fname, int numjob, int inc)
sprintf(file_name + len, "%s", fname);
- if (strchr(fname, FIO_OS_PATH_SEPARATOR) &&
- !(td->flags & TD_F_DIRS_CREATED) &&
- !create_work_dirs(td, fname))
- return 1;
-
/* clean cloned siblings using existing files */
if (numjob && is_already_allocated(file_name) &&
!exists_and_not_regfile(fname))
diff --git a/fio.1 b/fio.1
index ed492682..6685e507 100644
--- a/fio.1
+++ b/fio.1
@@ -112,8 +112,8 @@ only applies to job sections. The reserved *global* section is always
parsed and used.
.TP
.BI \-\-alloc\-size \fR=\fPkb
-Set the internal smalloc pool size to \fIkb\fR in KiB. The
-\fB\-\-alloc\-size\fR switch allows one to use a larger pool size for smalloc.
+Allocate additional internal smalloc pools of size \fIkb\fR in KiB. The
+\fB\-\-alloc\-size\fR option increases shared memory set aside for use by fio.
If running large jobs with randommap enabled, fio can run out of memory.
Smalloc is an internal allocator for shared structures from a fixed size
memory pool and can grow to 16 pools. The pool size defaults to 16MiB.
@@ -201,6 +201,8 @@ argument, \fB\-\-cmdhelp\fR will detail the given \fIcommand\fR.
See the `examples/' directory for inspiration on how to write job files. Note
the copyright and license requirements currently apply to
`examples/' files.
+
+Note that the maximum length of a line in the job file is 8192 bytes.
.SH "JOB FILE PARAMETERS"
Some parameters take an option of a given type, such as an integer or a
string. Anywhere a numeric value is required, an arithmetic expression may be
@@ -751,7 +753,10 @@ restricted to a single zone.
.RE
.TP
.BI zonerange \fR=\fPint
-Size of a single zone. See also \fBzonesize\fR and \fBzoneskip\fR.
+For \fBzonemode\fR=strided, this is the size of a single zone. See also
+\fBzonesize\fR and \fBzoneskip\fR.
+
+For \fBzonemode\fR=zbd, this parameter is ignored.
.TP
.BI zonesize \fR=\fPint
For \fBzonemode\fR=strided, this is the number of bytes to transfer before
@@ -760,13 +765,21 @@ skipping \fBzoneskip\fR bytes. If this parameter is smaller than
will be accessed. If this parameter is larger than \fBzonerange\fR then each
zone will be accessed multiple times before skipping to the next zone.
-For \fBzonemode\fR=zbd, this is the size of a single zone. The \fBzonerange\fR
-parameter is ignored in this mode.
+For \fBzonemode\fR=zbd, this is the size of a single zone. The
+\fBzonerange\fR parameter is ignored in this mode. For a job accessing a
+zoned block device, the specified \fBzonesize\fR must be 0 or equal to the
+device zone size. For a regular block device or file, the specified
+\fBzonesize\fR must be at least 512B.
.TP
.BI zoneskip \fR=\fPint
For \fBzonemode\fR=strided, the number of bytes to skip after \fBzonesize\fR
-bytes of data have been transferred. This parameter must be zero for
-\fBzonemode\fR=zbd.
+bytes of data have been transferred.
+
+For \fBzonemode\fR=zbd, the \fBzonesize\fR aligned number of bytes to skip
+once a zone is fully written (write workloads) or all written data in the
+zone have been read (read workloads). This parameter is valid only for
+sequential workloads and ignored for random workloads. For read workloads,
+see also \fBread_beyond_wp\fR.
.TP
.BI read_beyond_wp \fR=\fPbool
@@ -776,10 +789,10 @@ Zoned block devices are block devices that consist of multiple zones. Each
zone has a type, e.g. conventional or sequential. A conventional zone can be
written at any offset that is a multiple of the block size. Sequential zones
must be written sequentially. The position at which a write must occur is
-called the write pointer. A zoned block device can be either drive
-managed, host managed or host aware. For host managed devices the host must
-ensure that writes happen sequentially. Fio recognizes host managed devices
-and serializes writes to sequential zones for these devices.
+called the write pointer. A zoned block device can be either host managed or
+host aware. For host managed devices the host must ensure that writes happen
+sequentially. Fio recognizes host managed devices and serializes writes to
+sequential zones for these devices.
If a read occurs in a sequential zone beyond the write pointer then the zoned
block device will complete the read without reading any data from the storage
@@ -1013,7 +1026,9 @@ If this is provided, then the real offset becomes `\fBoffset\fR + \fBoffset_incr
is incremented for each sub\-job (i.e. when \fBnumjobs\fR option is
specified). This option is useful if there are several jobs which are
intended to operate on a file in parallel disjoint segments, with even
-spacing between the starting points.
+spacing between the starting points. Percentages can be used for this option.
+If a percentage is given, the generated offset will be aligned to the minimum
+\fBblocksize\fR or to the value of \fBoffset_align\fR if provided.
.TP
.BI number_ios \fR=\fPint
Fio will normally perform I/Os until it has exhausted the size of the region
@@ -1035,7 +1050,7 @@ see \fBend_fsync\fR and \fBfsync_on_close\fR.
.TP
.BI fdatasync \fR=\fPint
Like \fBfsync\fR but uses \fBfdatasync\fR\|(2) to only sync data and
-not metadata blocks. In Windows, FreeBSD, and DragonFlyBSD there is no
+not metadata blocks. In Windows, FreeBSD, DragonFlyBSD or OSX there is no
\fBfdatasync\fR\|(2) so this falls back to using \fBfsync\fR\|(2).
Defaults to 0, which means fio does not periodically issue and wait for a
data\-only sync to complete.
@@ -1751,12 +1766,47 @@ are "contiguous" and the IO depth is not exceeded) before issuing a call to IME.
Asynchronous read and write using DDN's Infinite Memory Engine (IME). This
engine will try to stack as much IOs as possible by creating requests for IME.
FIO will then decide when to commit these requests.
+.TP
+.B libiscsi
+Read and write iscsi lun with libiscsi.
+.TP
+.B nbd
+Synchronous read and write a Network Block Device (NBD).
.SS "I/O engine specific parameters"
In addition, there are some parameters which are only valid when a specific
\fBioengine\fR is in use. These are used identically to normal parameters,
with the caveat that when used on the command line, they must come after the
\fBioengine\fR that defines them is selected.
.TP
+.BI (io_uring)hipri
+If this option is set, fio will attempt to use polled IO completions. Normal IO
+completions generate interrupts to signal the completion of IO, polled
+completions do not. Hence they are require active reaping by the application.
+The benefits are more efficient IO for high IOPS scenarios, and lower latencies
+for low queue depth IO.
+.TP
+.BI (io_uring)fixedbufs
+If fio is asked to do direct IO, then Linux will map pages for each IO call, and
+release them when IO is done. If this option is set, the pages are pre-mapped
+before IO is started. This eliminates the need to map and release for each IO.
+This is more efficient, and reduces the IO latency as well.
+.TP
+.BI (io_uring)registerfiles
+With this option, fio registers the set of files being used with the kernel.
+This avoids the overhead of managing file counts in the kernel, making the
+submission and completion part more lightweight. Required for the below
+sqthread_poll option.
+.TP
+.BI (io_uring)sqthread_poll
+Normally fio will submit IO by issuing a system call to notify the kernel of
+available items in the SQ ring. If this option is set, the act of submitting IO
+will be done by a polling thread in the kernel. This frees up cycles for fio, at
+the cost of using more CPU in the system.
+.TP
+.BI (io_uring)sqthread_poll_cpu
+When `sqthread_poll` is set, this option provides a way to define which CPU
+should be used for the polling thread.
+.TP
.BI (libaio)userspace_reap
Normally, with the libaio engine in use, fio will use the
\fBio_getevents\fR\|(3) system call to reap newly returned events. With
@@ -1991,6 +2041,22 @@ blocksize=8k will write 16 sectors with each command. fio will still
generate 8k of data for each command butonly the first 512 bytes will
be used and transferred to the device. The writefua option is ignored
with this selection.
+.RE
+.RE
+.TP
+.BI (nbd)uri \fR=\fPstr
+Specify the NBD URI of the server to test.
+The string is a standard NBD URI (see
+\fIhttps://github.com/NetworkBlockDevice/nbd/tree/master/doc\fR).
+Example URIs:
+.RS
+.RS
+.TP
+\fInbd://localhost:10809\fR
+.TP
+\fInbd+unix:///?socket=/tmp/socket\fR
+.TP
+\fInbds://tlshost/exportname\fR
.SS "I/O depth"
.TP
@@ -3327,7 +3393,8 @@ is one long line of values, such as:
A description of this job goes here.
.fi
.P
-The job description (if provided) follows on a second line.
+The job description (if provided) follows on a second line for terse v2.
+It appears on the same line for other terse versions.
.P
To enable terse output, use the \fB\-\-minimal\fR or
`\-\-output\-format=terse' command line options. The
@@ -3462,6 +3529,11 @@ minimal output v3, separated by semicolons:
.nf
terse_version_3;fio_version;jobname;groupid;error;read_kb;read_bandwidth;read_iops;read_runtime_ms;read_slat_min;read_slat_max;read_slat_mean;read_slat_dev;read_clat_min;read_clat_max;read_clat_mean;read_clat_dev;read_clat_pct01;read_clat_pct02;read_clat_pct03;read_clat_pct04;read_clat_pct05;read_clat_pct06;read_clat_pct07;read_clat_pct08;read_clat_pct09;read_clat_pct10;read_clat_pct11;read_clat_pct12;read_clat_pct13;read_clat_pct14;read_clat_pct15;read_clat_pct16;read_clat_pct17;read_clat_pct18;read_clat_pct19;read_clat_pct20;read_tlat_min;read_lat_max;read_lat_mean;read_lat_dev;read_bw_min;read_bw_max;read_bw_agg_pct;read_bw_mean;read_bw_dev;write_kb;write_bandwidth;write_iops;write_runtime_ms;write_slat_min;write_slat_max;write_slat_mean;write_slat_dev;write_clat_min;write_clat_max;write_clat_mean;write_clat_dev;write_clat_pct01;write_clat_pct02;write_clat_pct03;write_clat_pct04;write_clat_pct05;write_clat_pct06;write_clat_pct07;write_clat_pct08;write_clat_pct09;write_clat_pct10;write_clat_pct11;write_clat_pct12;write_clat_pct13;write_clat_pct14;write_clat_pct15;write_clat_pct16;write_clat_pct17;write_clat_pct18;write_clat_pct19;write_clat_pct20;write_tlat_min;write_lat_max;write_lat_mean;write_lat_dev;write_bw_min;write_bw_max;write_bw_agg_pct;write_bw_mean;write_bw_dev;cpu_user;cpu_sys;cpu_csw;cpu_mjf;cpu_minf;iodepth_1;iodepth_2;iodepth_4;iodepth_8;iodepth_16;iodepth_32;iodepth_64;lat_2us;lat_4us;lat_10us;lat_20us;lat_50us;lat_100us;lat_250us;lat_500us;lat_750us;lat_1000us;lat_2ms;lat_4ms;lat_10ms;lat_20ms;lat_50ms;lat_100ms;lat_250ms;lat_500ms;lat_750ms;lat_1000ms;lat_2000ms;lat_over_2000ms;disk_name;disk_read_iops;disk_write_iops;disk_read_merges;disk_write_merges;disk_read_ticks;write_ticks;disk_queue_time;disk_util
.fi
+.P
+In client/server mode terse output differs from what appears when jobs are run
+locally. Disk utilization data is omitted from the standard terse output and
+for v3 and later appears on its own separate line at the end of each terse
+reporting cycle.
.SH JSON OUTPUT
The \fBjson\fR output format is intended to be both human readable and convenient
for automated parsing. For the most part its sections mirror those of the
@@ -3856,6 +3928,9 @@ containing two hostnames `h1' and `h2' with IP addresses 192.168.10.120 and
/mnt/nfs/fio/192.168.10.121.fileio.tmp
.PD
.RE
+.P
+Terse output in client/server mode will differ slightly from what is produced
+when fio is run in stand-alone mode. See the terse output section for details.
.SH AUTHORS
.B fio
was written by Jens Axboe <axboe@kernel.dk>.
diff --git a/fio.h b/fio.h
index b3ba5db2..2094d30b 100644
--- a/fio.h
+++ b/fio.h
@@ -245,7 +245,7 @@ struct thread_data {
void *iolog_buf;
FILE *iolog_f;
- unsigned long rand_seeds[FIO_RAND_NR_OFFS];
+ uint64_t rand_seeds[FIO_RAND_NR_OFFS];
struct frand_state bsrange_state[DDIR_RWDIR_CNT];
struct frand_state verify_state;
@@ -705,16 +705,6 @@ extern void lat_target_reset(struct thread_data *);
(i) < (td)->o.nr_files && ((f) = (td)->files[i]) != NULL; \
(i)++)
-#define fio_assert(td, cond) do { \
- if (!(cond)) { \
- int *__foo = NULL; \
- fprintf(stderr, "file:%s:%d, assert %s failed\n", __FILE__, __LINE__, #cond); \
- td_set_runstate((td), TD_EXITED); \
- (td)->error = EFAULT; \
- *__foo = 0; \
- } \
-} while (0)
-
static inline bool fio_fill_issue_time(struct thread_data *td)
{
if (td->o.read_iolog_file ||
diff --git a/gclient.c b/gclient.c
index 04275a13..d8dc62d2 100644
--- a/gclient.c
+++ b/gclient.c
@@ -318,7 +318,7 @@ static void gfio_update_thread_status(struct gui_entry *ge,
static char message[100];
const char *m = message;
- strncpy(message, status_message, sizeof(message) - 1);
+ snprintf(message, sizeof(message), "%s", status_message);
gtk_progress_bar_set_text(GTK_PROGRESS_BAR(ge->thread_status_pb), m);
gtk_progress_bar_set_fraction(GTK_PROGRESS_BAR(ge->thread_status_pb), perc / 100.0);
gtk_widget_queue_draw(ge->ui->window);
@@ -330,7 +330,7 @@ static void gfio_update_thread_status_all(struct gui *ui, char *status_message,
static char message[100];
const char *m = message;
- strncpy(message, status_message, sizeof(message) - 1);
+ snprintf(message, sizeof(message), "%s", status_message);
gtk_progress_bar_set_text(GTK_PROGRESS_BAR(ui->thread_status_pb), m);
gtk_progress_bar_set_fraction(GTK_PROGRESS_BAR(ui->thread_status_pb), perc / 100.0);
gtk_widget_queue_draw(ui->window);
diff --git a/init.c b/init.c
index e6378715..63f2168e 100644
--- a/init.c
+++ b/init.c
@@ -1217,7 +1217,7 @@ static void init_flags(struct thread_data *td)
static int setup_random_seeds(struct thread_data *td)
{
- unsigned long seed;
+ uint64_t seed;
unsigned int i;
if (!td->o.rand_repeatable && !fio_option_is_set(&td->o, rand_seed)) {
@@ -1273,8 +1273,7 @@ static char *make_filename(char *buf, size_t buf_size,struct thread_options *o,
for (f = &fpre_keywords[0]; f->keyword; f++)
f->strlen = strlen(f->keyword);
- buf[buf_size - 1] = '\0';
- strncpy(buf, o->filename_format, buf_size - 1);
+ snprintf(buf, buf_size, "%s", o->filename_format);
memset(copy, 0, sizeof(copy));
for (f = &fpre_keywords[0]; f->keyword; f++) {
@@ -1353,7 +1352,7 @@ static char *make_filename(char *buf, size_t buf_size,struct thread_options *o,
if (post_start)
strncpy(dst, buf + post_start, dst_left);
- strncpy(buf, copy, buf_size - 1);
+ snprintf(buf, buf_size, "%s", copy);
} while (1);
}
@@ -1438,7 +1437,7 @@ static int add_job(struct thread_data *td, const char *jobname, int job_add_num,
int recursed, int client_type)
{
unsigned int i;
- char fname[PATH_MAX];
+ char fname[PATH_MAX + 1];
int numjobs, file_alloced;
struct thread_options *o = &td->o;
char logname[PATH_MAX + 32];
@@ -1887,7 +1886,7 @@ static int __parse_jobs_ini(struct thread_data *td,
}
}
- string = malloc(4096);
+ string = malloc(OPT_LEN_MAX);
/*
* it's really 256 + small bit, 280 should suffice
@@ -1920,7 +1919,7 @@ static int __parse_jobs_ini(struct thread_data *td,
if (is_buf)
p = strsep(&file, "\n");
else
- p = fgets(string, 4096, f);
+ p = fgets(string, OPT_LEN_MAX, f);
if (!p)
break;
}
@@ -1989,7 +1988,7 @@ static int __parse_jobs_ini(struct thread_data *td,
if (is_buf)
p = strsep(&file, "\n");
else
- p = fgets(string, 4096, f);
+ p = fgets(string, OPT_LEN_MAX, f);
if (!p)
break;
dprint(FD_PARSE, "%s", p);
@@ -2029,19 +2028,12 @@ static int __parse_jobs_ini(struct thread_data *td,
*/
if (access(filename, F_OK) &&
(ts = strrchr(file, '/'))) {
- int len = ts - file +
- strlen(filename) + 2;
-
- if (!(full_fn = calloc(1, len))) {
+ if (asprintf(&full_fn, "%.*s%s",
+ (int)(ts - file + 1), file,
+ filename) < 0) {
ret = ENOMEM;
break;
}
-
- strncpy(full_fn,
- file, (ts - file) + 1);
- strncpy(full_fn + (ts - file) + 1,
- filename, strlen(filename));
- full_fn[len - 1] = 0;
filename = full_fn;
}
diff --git a/io_u.c b/io_u.c
index 910b7deb..94899552 100644
--- a/io_u.c
+++ b/io_u.c
@@ -557,10 +557,10 @@ static unsigned long long get_next_buflen(struct thread_data *td, struct io_u *i
for (i = 0; i < td->o.bssplit_nr[ddir]; i++) {
struct bssplit *bsp = &td->o.bssplit[ddir][i];
+ if (!bsp->perc)
+ continue;
buflen = bsp->bs;
perc += bsp->perc;
- if (!perc)
- break;
if ((r / perc <= frand_max / 100ULL) &&
io_u_fits(td, io_u, buflen))
break;
@@ -901,6 +901,8 @@ static int fill_io_u(struct thread_data *td, struct io_u *io_u)
if (td->o.zone_mode == ZONE_MODE_STRIDED)
setup_strided_zone_mode(td, io_u);
+ else if (td->o.zone_mode == ZONE_MODE_ZBD)
+ setup_zbd_zone_mode(td, io_u);
/*
* No log, let the seq/rand engine retrieve the next buflen and
diff --git a/ioengines.c b/ioengines.c
index 7e5a50cc..40fa75c3 100644
--- a/ioengines.c
+++ b/ioengines.c
@@ -125,8 +125,7 @@ static struct ioengine_ops *__load_ioengine(const char *name)
{
char engine[64];
- engine[sizeof(engine) - 1] = '\0';
- strncpy(engine, name, sizeof(engine) - 1);
+ snprintf(engine, sizeof(engine), "%s", name);
/*
* linux libaio has alias names, so convert to what we want
@@ -308,7 +307,9 @@ enum fio_q_status td_io_queue(struct thread_data *td, struct io_u *io_u)
io_u->error = 0;
io_u->resid = 0;
- if (td_ioengine_flagged(td, FIO_SYNCIO)) {
+ if (td_ioengine_flagged(td, FIO_SYNCIO) ||
+ (td_ioengine_flagged(td, FIO_ASYNCIO_SYNC_TRIM) &&
+ io_u->ddir == DDIR_TRIM)) {
if (fio_fill_issue_time(td))
fio_gettime(&io_u->issue_time, NULL);
@@ -389,7 +390,9 @@ enum fio_q_status td_io_queue(struct thread_data *td, struct io_u *io_u)
td_io_commit(td);
}
- if (!td_ioengine_flagged(td, FIO_SYNCIO)) {
+ if (!td_ioengine_flagged(td, FIO_SYNCIO) &&
+ (!td_ioengine_flagged(td, FIO_ASYNCIO_SYNC_TRIM) ||
+ io_u->ddir != DDIR_TRIM)) {
if (fio_fill_issue_time(td))
fio_gettime(&io_u->issue_time, NULL);
diff --git a/ioengines.h b/ioengines.h
index b9cd33d5..01a9b586 100644
--- a/ioengines.h
+++ b/ioengines.h
@@ -63,6 +63,8 @@ enum fio_ioengine_flags {
FIO_FAKEIO = 1 << 11, /* engine pretends to do IO */
FIO_NOSTATS = 1 << 12, /* don't do IO stats */
FIO_NOFILEHASH = 1 << 13, /* doesn't hash the files for lookup later. */
+ FIO_ASYNCIO_SYNC_TRIM
+ = 1 << 14 /* io engine has async ->queue except for trim */
};
/*
diff --git a/lib/lfsr.c b/lib/lfsr.c
index 32fbec56..1ef6ebbf 100644
--- a/lib/lfsr.c
+++ b/lib/lfsr.c
@@ -232,7 +232,7 @@ static int prepare_spin(struct fio_lfsr *fl, unsigned int spin)
return 0;
}
-int lfsr_reset(struct fio_lfsr *fl, unsigned long seed)
+int lfsr_reset(struct fio_lfsr *fl, uint64_t seed)
{
uint64_t bitmask = (fl->cached_bit << 1) - 1;
@@ -246,8 +246,8 @@ int lfsr_reset(struct fio_lfsr *fl, unsigned long seed)
return 0;
}
-int lfsr_init(struct fio_lfsr *fl, uint64_t nums, unsigned long seed,
- unsigned int spin)
+int lfsr_init(struct fio_lfsr *fl, uint64_t nums, uint64_t seed,
+ unsigned int spin)
{
uint8_t *taps;
diff --git a/lib/lfsr.h b/lib/lfsr.h
index c2d55693..95bc07fd 100644
--- a/lib/lfsr.h
+++ b/lib/lfsr.h
@@ -24,7 +24,7 @@ struct fio_lfsr {
int lfsr_next(struct fio_lfsr *fl, uint64_t *off);
int lfsr_init(struct fio_lfsr *fl, uint64_t size,
- unsigned long seed, unsigned int spin);
-int lfsr_reset(struct fio_lfsr *fl, unsigned long seed);
+ uint64_t seed, unsigned int spin);
+int lfsr_reset(struct fio_lfsr *fl, uint64_t seed);
#endif
diff --git a/lib/rand.c b/lib/rand.c
index f18bd8d8..69acb06c 100644
--- a/lib/rand.c
+++ b/lib/rand.c
@@ -95,7 +95,7 @@ void init_rand_seed(struct frand_state *state, unsigned int seed, bool use64)
__init_rand64(&state->state64, seed);
}
-void __fill_random_buf(void *buf, unsigned int len, unsigned long seed)
+void __fill_random_buf(void *buf, unsigned int len, uint64_t seed)
{
void *ptr = buf;
@@ -122,10 +122,10 @@ void __fill_random_buf(void *buf, unsigned int len, unsigned long seed)
}
}
-unsigned long fill_random_buf(struct frand_state *fs, void *buf,
- unsigned int len)
+uint64_t fill_random_buf(struct frand_state *fs, void *buf,
+ unsigned int len)
{
- unsigned long r = __rand(fs);
+ uint64_t r = __rand(fs);
if (sizeof(int) != sizeof(long *))
r *= (unsigned long) __rand(fs);
@@ -134,7 +134,7 @@ unsigned long fill_random_buf(struct frand_state *fs, void *buf,
return r;
}
-void __fill_random_buf_percentage(unsigned long seed, void *buf,
+void __fill_random_buf_percentage(uint64_t seed, void *buf,
unsigned int percentage,
unsigned int segment, unsigned int len,
char *pattern, unsigned int pbytes)
@@ -183,12 +183,12 @@ void __fill_random_buf_percentage(unsigned long seed, void *buf,
}
}
-unsigned long fill_random_buf_percentage(struct frand_state *fs, void *buf,
- unsigned int percentage,
- unsigned int segment, unsigned int len,
- char *pattern, unsigned int pbytes)
+uint64_t fill_random_buf_percentage(struct frand_state *fs, void *buf,
+ unsigned int percentage,
+ unsigned int segment, unsigned int len,
+ char *pattern, unsigned int pbytes)
{
- unsigned long r = __rand(fs);
+ uint64_t r = __rand(fs);
if (sizeof(int) != sizeof(long *))
r *= (unsigned long) __rand(fs);
diff --git a/lib/rand.h b/lib/rand.h
index 1676cf98..95d4f6d4 100644
--- a/lib/rand.h
+++ b/lib/rand.h
@@ -150,9 +150,9 @@ static inline uint64_t rand_between(struct frand_state *state, uint64_t start,
extern void init_rand(struct frand_state *, bool);
extern void init_rand_seed(struct frand_state *, unsigned int seed, bool);
-extern void __fill_random_buf(void *buf, unsigned int len, unsigned long seed);
-extern unsigned long fill_random_buf(struct frand_state *, void *buf, unsigned int len);
-extern void __fill_random_buf_percentage(unsigned long, void *, unsigned int, unsigned int, unsigned int, char *, unsigned int);
-extern unsigned long fill_random_buf_percentage(struct frand_state *, void *, unsigned int, unsigned int, unsigned int, char *, unsigned int);
+extern void __fill_random_buf(void *buf, unsigned int len, uint64_t seed);
+extern uint64_t fill_random_buf(struct frand_state *, void *buf, unsigned int len);
+extern void __fill_random_buf_percentage(uint64_t, void *, unsigned int, unsigned int, unsigned int, char *, unsigned int);
+extern uint64_t fill_random_buf_percentage(struct frand_state *, void *, unsigned int, unsigned int, unsigned int, char *, unsigned int);
#endif
diff --git a/optgroup.c b/optgroup.c
index 04ceec7e..c228ff29 100644
--- a/optgroup.c
+++ b/optgroup.c
@@ -170,6 +170,10 @@ static const struct opt_group fio_opt_cat_groups[] = {
.mask = FIO_OPT_G_HDFS,
},
{
+ .name = "NBD I/O engine", /* NBD */
+ .mask = FIO_OPT_G_NBD,
+ },
+ {
.name = NULL,
},
};
diff --git a/optgroup.h b/optgroup.h
index adf4d09b..55ef5934 100644
--- a/optgroup.h
+++ b/optgroup.h
@@ -61,6 +61,10 @@ enum opt_category_group {
__FIO_OPT_G_MTD,
__FIO_OPT_G_HDFS,
__FIO_OPT_G_SG,
+ __FIO_OPT_G_MMAP,
+ __FIO_OPT_G_ISCSI,
+ __FIO_OPT_G_NBD,
+ __FIO_OPT_G_IOURING,
__FIO_OPT_G_NR,
FIO_OPT_G_RATE = (1ULL << __FIO_OPT_G_RATE),
@@ -97,7 +101,11 @@ enum opt_category_group {
FIO_OPT_G_MTD = (1ULL << __FIO_OPT_G_MTD),
FIO_OPT_G_HDFS = (1ULL << __FIO_OPT_G_HDFS),
FIO_OPT_G_SG = (1ULL << __FIO_OPT_G_SG),
+ FIO_OPT_G_MMAP = (1ULL << __FIO_OPT_G_MMAP),
FIO_OPT_G_INVALID = (1ULL << __FIO_OPT_G_NR),
+ FIO_OPT_G_ISCSI = (1ULL << __FIO_OPT_G_ISCSI),
+ FIO_OPT_G_NBD = (1ULL << __FIO_OPT_G_NBD),
+ FIO_OPT_G_IOURING = (1ULL << __FIO_OPT_G_IOURING),
};
extern const struct opt_group *opt_group_from_mask(uint64_t *mask);
diff --git a/options.c b/options.c
index 95086074..2c5bf5e0 100644
--- a/options.c
+++ b/options.c
@@ -1434,6 +1434,22 @@ static int str_offset_cb(void *data, unsigned long long *__val)
return 0;
}
+static int str_offset_increment_cb(void *data, unsigned long long *__val)
+{
+ struct thread_data *td = cb_data_to_td(data);
+ unsigned long long v = *__val;
+
+ if (parse_is_percent(v)) {
+ td->o.offset_increment = 0;
+ td->o.offset_increment_percent = -1ULL - v;
+ dprint(FD_PARSE, "SET offset_increment_percent %d\n",
+ td->o.offset_increment_percent);
+ } else
+ td->o.offset_increment = v;
+
+ return 0;
+}
+
static int str_size_cb(void *data, unsigned long long *__val)
{
struct thread_data *td = cb_data_to_td(data);
@@ -1899,6 +1915,9 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
.help = "HTTP (WebDAV/S3) IO engine",
},
#endif
+ { .ival = "nbd",
+ .help = "Network Block Device (NBD) IO engine"
+ },
},
},
{
@@ -2081,6 +2100,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
.name = "offset_increment",
.lname = "IO offset increment",
.type = FIO_OPT_STR_VAL,
+ .cb = str_offset_increment_cb,
.off1 = offsetof(struct thread_options, offset_increment),
.help = "What is the increment from one offset to the next",
.parent = "offset",
@@ -4899,8 +4919,7 @@ char *fio_option_dup_subs(const char *opt)
return NULL;
}
- in[OPT_LEN_MAX] = '\0';
- strncpy(in, opt, OPT_LEN_MAX);
+ snprintf(in, sizeof(in), "%s", opt);
while (*inptr && nchr > 0) {
if (inptr[0] == '$' && inptr[1] == '{') {
diff --git a/os/linux/io_uring.h b/os/linux/io_uring.h
index 24906e99..ce03151e 100644
--- a/os/linux/io_uring.h
+++ b/os/linux/io_uring.h
@@ -26,6 +26,7 @@ struct io_uring_sqe {
__kernel_rwf_t rw_flags;
__u32 fsync_flags;
__u16 poll_events;
+ __u32 sync_range_flags;
};
__u64 user_data; /* data to be passed back at completion time */
union {
@@ -38,6 +39,8 @@ struct io_uring_sqe {
* sqe->flags
*/
#define IOSQE_FIXED_FILE (1U << 0) /* use fixed fileset */
+#define IOSQE_IO_DRAIN (1U << 1) /* issue after inflight IO */
+#define IOSQE_IO_LINK (1U << 2) /* next IO depends on this one */
/*
* io_uring_setup() flags
@@ -54,6 +57,7 @@ struct io_uring_sqe {
#define IORING_OP_WRITE_FIXED 5
#define IORING_OP_POLL_ADD 6
#define IORING_OP_POLL_REMOVE 7
+#define IORING_OP_SYNC_FILE_RANGE 8
/*
* sqe->fsync_flags
@@ -70,11 +74,6 @@ struct io_uring_cqe {
};
/*
- * io_uring_event->flags
- */
-#define IOCQE_FLAG_CACHEHIT (1U << 0) /* IO did not hit media */
-
-/*
* Magic offsets for the application to mmap the data it needs
*/
#define IORING_OFF_SQ_RING 0ULL
@@ -138,5 +137,7 @@ struct io_uring_params {
#define IORING_UNREGISTER_BUFFERS 1
#define IORING_REGISTER_FILES 2
#define IORING_UNREGISTER_FILES 3
+#define IORING_REGISTER_EVENTFD 4
+#define IORING_UNREGISTER_EVENTFD 5
#endif
diff --git a/os/os-dragonfly.h b/os/os-dragonfly.h
index eb92521f..3c460ae2 100644
--- a/os/os-dragonfly.h
+++ b/os/os-dragonfly.h
@@ -202,10 +202,12 @@ static inline unsigned long long os_phys_mem(void)
return mem;
}
+#ifndef CONFIG_HAVE_GETTID
static inline int gettid(void)
{
return (int) lwp_gettid();
}
+#endif
static inline unsigned long long get_fs_free_size(const char *path)
{
diff --git a/os/os-linux.h b/os/os-linux.h
index ba58bf7d..36339ef3 100644
--- a/os/os-linux.h
+++ b/os/os-linux.h
@@ -124,10 +124,12 @@ static inline int ioprio_set(int which, int who, int ioprio_class, int ioprio)
return syscall(__NR_ioprio_set, which, who, ioprio);
}
+#ifndef CONFIG_HAVE_GETTID
static inline int gettid(void)
{
return syscall(__NR_gettid);
}
+#endif
#define SPLICE_DEF_SIZE (64*1024)
diff --git a/os/os-mac.h b/os/os-mac.h
index 0b9c8707..0d97f6b9 100644
--- a/os/os-mac.h
+++ b/os/os-mac.h
@@ -90,16 +90,12 @@ static inline unsigned long long os_phys_mem(void)
return mem;
}
+#ifndef CONFIG_HAVE_GETTID
static inline int gettid(void)
{
return mach_thread_self();
}
-
-/*
- * For some reason, there's no header definition for fdatasync(), even
- * if it exists.
- */
-extern int fdatasync(int fd);
+#endif
static inline bool fio_fallocate(struct fio_file *f, uint64_t offset, uint64_t len)
{
diff --git a/os/os-netbsd.h b/os/os-netbsd.h
index c06261d4..88fb3ef1 100644
--- a/os/os-netbsd.h
+++ b/os/os-netbsd.h
@@ -65,10 +65,12 @@ static inline unsigned long long os_phys_mem(void)
return mem;
}
+#ifndef CONFIG_HAVE_GETTID
static inline int gettid(void)
{
return (int) _lwp_self();
}
+#endif
static inline unsigned long long get_fs_free_size(const char *path)
{
diff --git a/os/os-openbsd.h b/os/os-openbsd.h
index 70f58b49..43a649d4 100644
--- a/os/os-openbsd.h
+++ b/os/os-openbsd.h
@@ -65,10 +65,12 @@ static inline unsigned long long os_phys_mem(void)
return mem;
}
+#ifndef CONFIG_HAVE_GETTID
static inline int gettid(void)
{
return (int)(intptr_t) pthread_self();
}
+#endif
static inline unsigned long long get_fs_free_size(const char *path)
{
diff --git a/os/os-solaris.h b/os/os-solaris.h
index 1a411af6..f1966f44 100644
--- a/os/os-solaris.h
+++ b/os/os-solaris.h
@@ -164,10 +164,12 @@ static inline int fio_cpuset_exit(os_cpu_mask_t *mask)
return 0;
}
+#ifndef CONFIG_HAVE_GETTID
static inline int gettid(void)
{
return pthread_self();
}
+#endif
/*
* Should be enough, not aware of what (if any) restrictions Solaris has
diff --git a/os/os-windows.h b/os/os-windows.h
index ef955dc3..3e9f7341 100644
--- a/os/os-windows.h
+++ b/os/os-windows.h
@@ -162,12 +162,14 @@ static inline unsigned long long os_phys_mem(void)
return (unsigned long long) pages * (unsigned long long) pagesize;
}
+#ifndef CONFIG_HAVE_GETTID
static inline int gettid(void)
{
return GetCurrentThreadId();
}
+#endif
-static inline int init_random_seeds(unsigned long *rand_seeds, int size)
+static inline int init_random_seeds(uint64_t *rand_seeds, int size)
{
HCRYPTPROV hCryptProv;
diff --git a/os/os.h b/os/os.h
index 36b6bb2e..e4729680 100644
--- a/os/os.h
+++ b/os/os.h
@@ -323,7 +323,7 @@ static inline int blockdev_size(struct fio_file *f, unsigned long long *bytes)
#endif
#ifdef FIO_USE_GENERIC_INIT_RANDOM_STATE
-static inline int init_random_seeds(unsigned long *rand_seeds, int size)
+static inline int init_random_seeds(uint64_t *rand_seeds, int size)
{
int fd;
@@ -373,11 +373,13 @@ static inline int CPU_COUNT(os_cpu_mask_t *mask)
#endif
#ifndef FIO_HAVE_GETTID
+#ifndef CONFIG_HAVE_GETTID
static inline int gettid(void)
{
return getpid();
}
#endif
+#endif
#ifndef FIO_HAVE_SHM_ATTACH_REMOVED
static inline int shm_attach_to_open_removed(void)
diff --git a/os/windows/install.wxs b/os/windows/install.wxs
index 97d88e9f..dcb8c92c 100755
--- a/os/windows/install.wxs
+++ b/os/windows/install.wxs
@@ -43,6 +43,32 @@
<File Id="MORAL_LICENSE" Name="MORAL-LICENSE.txt" Source="..\..\MORAL-LICENSE"/>
</Component>
<Directory Id="examples" Name="examples"/>
+ <Directory Id="tests" Name="tests">
+ <Component>
+ <File Source="../../t/fio-genzipf.exe"/>
+ </Component>
+ <Component>
+ <File Source="../../t/fio-dedupe.exe"/>
+ </Component>
+ <Component>
+ <File Source="../../t/stest.exe"/>
+ </Component>
+ <Component>
+ <File Source="../../t/ieee754.exe"/>
+ </Component>
+ <Component>
+ <File Source="../../t/axmap.exe"/>
+ </Component>
+ <Component>
+ <File Source="../../t/lfsr-test.exe"/>
+ </Component>
+ <Component>
+ <File Source="../../t/gen-rand.exe"/>
+ </Component>
+ <Component>
+ <File Source="../../t/fio-verify-state.exe"/>
+ </Component>
+ </Directory>
</Directory>
</Directory>
</Directory>
@@ -56,6 +82,14 @@
<ComponentRef Id="COPYING"/>
<ComponentRef Id="MORAL_LICENSE"/>
<ComponentGroupRef Id="examples"/>
+ <ComponentRef Id="fio_genzipf.exe"/>
+ <ComponentRef Id="fio_dedupe.exe"/>
+ <ComponentRef Id="stest.exe"/>
+ <ComponentRef Id="ieee754.exe"/>
+ <ComponentRef Id="axmap.exe"/>
+ <ComponentRef Id="lfsr_test.exe"/>
+ <ComponentRef Id="gen_rand.exe"/>
+ <ComponentRef Id="fio_verify_state.exe"/>
</Feature>
<Property Id="ARPURLINFOABOUT" Value="http://git.kernel.dk/cgit/fio/" />
diff --git a/parse.c b/parse.c
index a7d4516e..c4fd4626 100644
--- a/parse.c
+++ b/parse.c
@@ -602,8 +602,7 @@ static int __handle_option(const struct fio_option *o, const char *ptr,
if (!is_time && o->is_time)
is_time = o->is_time;
- tmp[sizeof(tmp) - 1] = '\0';
- strncpy(tmp, ptr, sizeof(tmp) - 1);
+ snprintf(tmp, sizeof(tmp), "%s", ptr);
p = strchr(tmp, ',');
if (p)
*p = '\0';
@@ -829,8 +828,7 @@ static int __handle_option(const struct fio_option *o, const char *ptr,
char tmp[128];
char *p1, *p2;
- tmp[sizeof(tmp) - 1] = '\0';
- strncpy(tmp, ptr, sizeof(tmp) - 1);
+ snprintf(tmp, sizeof(tmp), "%s", ptr);
/* Handle bsrange with separate read,write values: */
p1 = strchr(tmp, ',');
diff --git a/parse.h b/parse.h
index b47a02c7..5828654f 100644
--- a/parse.h
+++ b/parse.h
@@ -37,8 +37,8 @@ struct value_pair {
void *cb; /* sub-option callback */
};
-#define OPT_LEN_MAX 4096
-#define PARSE_MAX_VP 24
+#define OPT_LEN_MAX 8192
+#define PARSE_MAX_VP 32
/*
* Option define
diff --git a/server.c b/server.c
index 2a337707..e7846227 100644
--- a/server.c
+++ b/server.c
@@ -865,7 +865,8 @@ static int handle_probe_cmd(struct fio_net_cmd *cmd)
strcpy(me, (char *) pdu->server);
gethostname((char *) probe.hostname, sizeof(probe.hostname));
- strncpy((char *) probe.fio_version, fio_version_string, sizeof(probe.fio_version) - 1);
+ snprintf((char *) probe.fio_version, sizeof(probe.fio_version), "%s",
+ fio_version_string);
/*
* If the client supports compression and we do too, then enable it
@@ -1470,9 +1471,10 @@ void fio_server_send_ts(struct thread_stat *ts, struct group_run_stats *rs)
memset(&p, 0, sizeof(p));
- strncpy(p.ts.name, ts->name, FIO_JOBNAME_SIZE - 1);
- strncpy(p.ts.verror, ts->verror, FIO_VERROR_SIZE - 1);
- strncpy(p.ts.description, ts->description, FIO_JOBDESC_SIZE - 1);
+ snprintf(p.ts.name, sizeof(p.ts.name), "%s", ts->name);
+ snprintf(p.ts.verror, sizeof(p.ts.verror), "%s", ts->verror);
+ snprintf(p.ts.description, sizeof(p.ts.description), "%s",
+ ts->description);
p.ts.error = cpu_to_le32(ts->error);
p.ts.thread_number = cpu_to_le32(ts->thread_number);
@@ -1663,8 +1665,7 @@ static void convert_dus(struct disk_util_stat *dst, struct disk_util_stat *src)
{
int i;
- dst->name[FIO_DU_NAME_SZ - 1] = '\0';
- strncpy((char *) dst->name, (char *) src->name, FIO_DU_NAME_SZ - 1);
+ snprintf((char *) dst->name, sizeof(dst->name), "%s", src->name);
for (i = 0; i < 2; i++) {
dst->s.ios[i] = cpu_to_le64(src->s.ios[i]);
@@ -1974,8 +1975,7 @@ int fio_send_iolog(struct thread_data *td, struct io_log *log, const char *name)
else
pdu.compressed = 0;
- strncpy((char *) pdu.name, name, FIO_NET_NAME_MAX);
- pdu.name[FIO_NET_NAME_MAX - 1] = '\0';
+ snprintf((char *) pdu.name, sizeof(pdu.name), "%s", name);
/*
* We can't do this for a pre-compressed log, but for that case,
@@ -2192,9 +2192,8 @@ static int fio_init_server_sock(void)
mode = umask(000);
- memset(&addr, 0, sizeof(addr));
addr.sun_family = AF_UNIX;
- strncpy(addr.sun_path, bind_sock, sizeof(addr.sun_path) - 1);
+ snprintf(addr.sun_path, sizeof(addr.sun_path), "%s", bind_sock);
len = sizeof(addr.sun_family) + strlen(bind_sock) + 1;
@@ -2244,9 +2243,9 @@ static int fio_init_server_connection(void)
if (p)
strcat(p, port);
else
- strncpy(bind_str, port, sizeof(bind_str) - 1);
+ snprintf(bind_str, sizeof(bind_str), "%s", port);
} else
- strncpy(bind_str, bind_sock, sizeof(bind_str) - 1);
+ snprintf(bind_str, sizeof(bind_str), "%s", bind_sock);
log_info("fio: server listening on %s\n", bind_str);
diff --git a/server.h b/server.h
index abb23bad..de1d7f9b 100644
--- a/server.h
+++ b/server.h
@@ -48,7 +48,7 @@ struct fio_net_cmd_reply {
};
enum {
- FIO_SERVER_VER = 78,
+ FIO_SERVER_VER = 80,
FIO_SERVER_MAX_FRAGMENT_PDU = 1024,
FIO_SERVER_MAX_CMD_MB = 2048,
diff --git a/smalloc.c b/smalloc.c
index a2ad25a0..fa00f0ee 100644
--- a/smalloc.c
+++ b/smalloc.c
@@ -48,7 +48,13 @@ struct block_hdr {
#endif
};
-static struct pool mp[MAX_POOLS];
+/*
+ * This suppresses the voluminous potential bitmap printout when
+ * smalloc encounters an OOM error
+ */
+static const bool enable_smalloc_debug = false;
+
+static struct pool *mp;
static unsigned int nr_pools;
static unsigned int last_pool;
@@ -167,7 +173,7 @@ static bool add_pool(struct pool *pool, unsigned int alloc_size)
pool->mmap_size = alloc_size;
pool->nr_blocks = bitmap_blocks;
- pool->free_blocks = bitmap_blocks * SMALLOC_BPB;
+ pool->free_blocks = bitmap_blocks * SMALLOC_BPI;
mmap_flags = OS_MAP_ANON;
#ifdef CONFIG_ESX
@@ -202,6 +208,20 @@ void sinit(void)
bool ret;
int i;
+ /*
+ * sinit() can be called more than once if alloc-size is
+ * set. But we want to allocate space for the struct pool
+ * instances only once.
+ */
+ if (!mp) {
+ mp = (struct pool *) mmap(NULL,
+ MAX_POOLS * sizeof(struct pool),
+ PROT_READ | PROT_WRITE,
+ OS_MAP_ANON | MAP_SHARED, -1, 0);
+
+ assert(mp != MAP_FAILED);
+ }
+
for (i = 0; i < INITIAL_POOLS; i++) {
ret = add_pool(&mp[nr_pools], smalloc_pool_size);
if (!ret)
@@ -233,6 +253,8 @@ void scleanup(void)
for (i = 0; i < nr_pools; i++)
cleanup_pool(&mp[i]);
+
+ munmap(mp, MAX_POOLS * sizeof(struct pool));
}
#ifdef SMALLOC_REDZONE
@@ -332,6 +354,25 @@ void sfree(void *ptr)
log_err("smalloc: ptr %p not from smalloc pool\n", ptr);
}
+static unsigned int find_best_index(struct pool *pool)
+{
+ unsigned int i;
+
+ assert(pool->free_blocks);
+
+ for (i = pool->next_non_full; pool->bitmap[i] == -1U; i++) {
+ if (i == pool->nr_blocks - 1) {
+ unsigned int j;
+
+ for (j = 0; j < pool->nr_blocks; j++)
+ if (pool->bitmap[j] != -1U)
+ return j;
+ }
+ }
+
+ return i;
+}
+
static void *__smalloc_pool(struct pool *pool, size_t size)
{
size_t nr_blocks;
@@ -346,15 +387,16 @@ static void *__smalloc_pool(struct pool *pool, size_t size)
if (nr_blocks > pool->free_blocks)
goto fail;
- i = pool->next_non_full;
+ pool->next_non_full = find_best_index(pool);
+
last_idx = 0;
offset = -1U;
+ i = pool->next_non_full;
while (i < pool->nr_blocks) {
unsigned int idx;
if (pool->bitmap[i] == -1U) {
i++;
- pool->next_non_full = i;
last_idx = 0;
continue;
}
@@ -387,10 +429,9 @@ fail:
return ret;
}
-static void *smalloc_pool(struct pool *pool, size_t size)
+static size_t size_to_alloc_size(size_t size)
{
size_t alloc_size = size + sizeof(struct block_hdr);
- void *ptr;
/*
* Round to int alignment, so that the postred pointer will
@@ -401,6 +442,14 @@ static void *smalloc_pool(struct pool *pool, size_t size)
alloc_size = (alloc_size + int_mask) & ~int_mask;
#endif
+ return alloc_size;
+}
+
+static void *smalloc_pool(struct pool *pool, size_t size)
+{
+ size_t alloc_size = size_to_alloc_size(size);
+ void *ptr;
+
ptr = __smalloc_pool(pool, alloc_size);
if (ptr) {
struct block_hdr *hdr = ptr;
@@ -415,6 +464,72 @@ static void *smalloc_pool(struct pool *pool, size_t size)
return ptr;
}
+static void smalloc_print_bitmap(struct pool *pool)
+{
+ size_t nr_blocks = pool->nr_blocks;
+ unsigned int *bitmap = pool->bitmap;
+ unsigned int i, j;
+ char *buffer;
+
+ if (!enable_smalloc_debug)
+ return;
+
+ buffer = malloc(SMALLOC_BPI + 1);
+ if (!buffer)
+ return;
+ buffer[SMALLOC_BPI] = '\0';
+
+ for (i = 0; i < nr_blocks; i++) {
+ unsigned int line = bitmap[i];
+
+ /* skip completely full lines */
+ if (line == -1U)
+ continue;
+
+ for (j = 0; j < SMALLOC_BPI; j++)
+ if ((1 << j) & line)
+ buffer[SMALLOC_BPI-1-j] = '1';
+ else
+ buffer[SMALLOC_BPI-1-j] = '0';
+
+ log_err("smalloc: bitmap %5u, %s\n", i, buffer);
+ }
+
+ free(buffer);
+}
+
+void smalloc_debug(size_t size)
+{
+ unsigned int i;
+ size_t alloc_size = size_to_alloc_size(size);
+ size_t alloc_blocks;
+
+ alloc_blocks = size_to_blocks(alloc_size);
+
+ if (size)
+ log_err("smalloc: size = %lu, alloc_size = %lu, blocks = %lu\n",
+ (unsigned long) size, (unsigned long) alloc_size,
+ (unsigned long) alloc_blocks);
+ for (i = 0; i < nr_pools; i++) {
+ log_err("smalloc: pool %u, free/total blocks %u/%u\n", i,
+ (unsigned int) (mp[i].free_blocks),
+ (unsigned int) (mp[i].nr_blocks*sizeof(unsigned int)*8));
+ if (size && mp[i].free_blocks >= alloc_blocks) {
+ void *ptr = smalloc_pool(&mp[i], size);
+ if (ptr) {
+ sfree(ptr);
+ last_pool = i;
+ log_err("smalloc: smalloc_pool %u succeeded\n", i);
+ } else {
+ log_err("smalloc: smalloc_pool %u failed\n", i);
+ log_err("smalloc: next_non_full=%u, nr_blocks=%u\n",
+ (unsigned int) mp[i].next_non_full, (unsigned int) mp[i].nr_blocks);
+ smalloc_print_bitmap(&mp[i]);
+ }
+ }
+ }
+}
+
void *smalloc(size_t size)
{
unsigned int i, end_pool;
@@ -445,6 +560,7 @@ void *smalloc(size_t size)
log_err("smalloc: OOM. Consider using --alloc-size to increase the "
"shared memory available.\n");
+ smalloc_debug(size);
return NULL;
}
diff --git a/smalloc.h b/smalloc.h
index 8df10e6f..1f7716f4 100644
--- a/smalloc.h
+++ b/smalloc.h
@@ -9,6 +9,7 @@ extern void sfree(void *);
extern char *smalloc_strdup(const char *);
extern void sinit(void);
extern void scleanup(void);
+extern void smalloc_debug(size_t);
extern unsigned int smalloc_pool_size;
diff --git a/stat.c b/stat.c
index 2bc21dad..33637900 100644
--- a/stat.c
+++ b/stat.c
@@ -1244,12 +1244,13 @@ static void show_thread_status_terse_all(struct thread_stat *ts,
/* Additional output if continue_on_error set - default off*/
if (ts->continue_on_error)
log_buf(out, ";%llu;%d", (unsigned long long) ts->total_err_count, ts->first_error);
- if (ver == 2)
- log_buf(out, "\n");
/* Additional output if description is set */
- if (strlen(ts->description))
+ if (strlen(ts->description)) {
+ if (ver == 2)
+ log_buf(out, "\n");
log_buf(out, ";%s", ts->description);
+ }
log_buf(out, "\n");
}
@@ -1827,10 +1828,11 @@ void __show_run_stats(void)
/*
* These are per-group shared already
*/
- strncpy(ts->name, td->o.name, FIO_JOBNAME_SIZE - 1);
+ snprintf(ts->name, sizeof(ts->name), "%s", td->o.name);
if (td->o.description)
- strncpy(ts->description, td->o.description,
- FIO_JOBDESC_SIZE - 1);
+ snprintf(ts->description,
+ sizeof(ts->description), "%s",
+ td->o.description);
else
memset(ts->description, 0, FIO_JOBDESC_SIZE);
@@ -1867,12 +1869,12 @@ void __show_run_stats(void)
if (!td->error && td->o.continue_on_error &&
td->first_error) {
ts->error = td->first_error;
- ts->verror[sizeof(ts->verror) - 1] = '\0';
- strncpy(ts->verror, td->verror, sizeof(ts->verror) - 1);
+ snprintf(ts->verror, sizeof(ts->verror), "%s",
+ td->verror);
} else if (td->error) {
ts->error = td->error;
- ts->verror[sizeof(ts->verror) - 1] = '\0';
- strncpy(ts->verror, td->verror, sizeof(ts->verror) - 1);
+ snprintf(ts->verror, sizeof(ts->verror), "%s",
+ td->verror);
}
}
diff --git a/stat.h b/stat.h
index e9551381..ba7e290d 100644
--- a/stat.h
+++ b/stat.h
@@ -251,32 +251,39 @@ struct thread_stat {
uint64_t cachemiss;
} __attribute__((packed));
-struct jobs_eta {
- uint32_t nr_running;
- uint32_t nr_ramp;
-
- uint32_t nr_pending;
- uint32_t nr_setting_up;
-
- uint64_t m_rate[DDIR_RWDIR_CNT], t_rate[DDIR_RWDIR_CNT];
- uint64_t rate[DDIR_RWDIR_CNT];
- uint32_t m_iops[DDIR_RWDIR_CNT], t_iops[DDIR_RWDIR_CNT];
- uint32_t iops[DDIR_RWDIR_CNT];
- uint64_t elapsed_sec;
- uint64_t eta_sec;
- uint32_t is_pow2;
- uint32_t unit_base;
-
- uint32_t sig_figs;
-
- uint32_t files_open;
+#define JOBS_ETA { \
+ uint32_t nr_running; \
+ uint32_t nr_ramp; \
+ \
+ uint32_t nr_pending; \
+ uint32_t nr_setting_up; \
+ \
+ uint64_t m_rate[DDIR_RWDIR_CNT]; \
+ uint64_t t_rate[DDIR_RWDIR_CNT]; \
+ uint64_t rate[DDIR_RWDIR_CNT]; \
+ uint32_t m_iops[DDIR_RWDIR_CNT]; \
+ uint32_t t_iops[DDIR_RWDIR_CNT]; \
+ uint32_t iops[DDIR_RWDIR_CNT]; \
+ uint32_t pad; \
+ uint64_t elapsed_sec; \
+ uint64_t eta_sec; \
+ uint32_t is_pow2; \
+ uint32_t unit_base; \
+ \
+ uint32_t sig_figs; \
+ \
+ uint32_t files_open; \
+ \
+ /* \
+ * Network 'copy' of run_str[] \
+ */ \
+ uint32_t nr_threads; \
+ uint32_t pad2; \
+ uint8_t run_str[]; \
+}
- /*
- * Network 'copy' of run_str[]
- */
- uint32_t nr_threads;
- uint8_t run_str[];
-} __attribute__((packed));
+struct jobs_eta JOBS_ETA;
+struct jobs_eta_packed JOBS_ETA __attribute__((packed));
struct io_u_plat_entry {
struct flist_head list;
diff --git a/t/io_uring.c b/t/io_uring.c
index 363cba3e..62dee805 100644
--- a/t/io_uring.c
+++ b/t/io_uring.c
@@ -71,7 +71,6 @@ struct submitter {
unsigned long reaps;
unsigned long done;
unsigned long calls;
- unsigned long cachehit, cachemiss;
volatile int finish;
__s32 *fds;
@@ -265,14 +264,10 @@ static int reap_events(struct submitter *s)
if (cqe->res != BS) {
printf("io: unexpected ret=%d\n", cqe->res);
if (polled && cqe->res == -EOPNOTSUPP)
- printf("Your filesystem doesn't support poll\n");
+ printf("Your filesystem/driver/kernel doesn't support polled IO\n");
return -1;
}
}
- if (cqe->flags & IOCQE_FLAG_CACHEHIT)
- s->cachehit++;
- else
- s->cachemiss++;
reaped++;
head++;
} while (1);
@@ -497,7 +492,7 @@ static void usage(char *argv)
int main(int argc, char *argv[])
{
struct submitter *s;
- unsigned long done, calls, reap, cache_hit, cache_miss;
+ unsigned long done, calls, reap;
int err, i, flags, fd, opt;
char *fdepths;
void *ret;
@@ -600,44 +595,29 @@ int main(int argc, char *argv[])
pthread_create(&s->thread, NULL, submitter_fn, s);
fdepths = malloc(8 * s->nr_files);
- cache_hit = cache_miss = reap = calls = done = 0;
+ reap = calls = done = 0;
do {
unsigned long this_done = 0;
unsigned long this_reap = 0;
unsigned long this_call = 0;
- unsigned long this_cache_hit = 0;
- unsigned long this_cache_miss = 0;
unsigned long rpc = 0, ipc = 0;
- double hit = 0.0;
sleep(1);
this_done += s->done;
this_call += s->calls;
this_reap += s->reaps;
- this_cache_hit += s->cachehit;
- this_cache_miss += s->cachemiss;
- if (this_cache_hit && this_cache_miss) {
- unsigned long hits, total;
-
- hits = this_cache_hit - cache_hit;
- total = hits + this_cache_miss - cache_miss;
- hit = (double) hits / (double) total;
- hit *= 100.0;
- }
if (this_call - calls) {
rpc = (this_done - done) / (this_call - calls);
ipc = (this_reap - reap) / (this_call - calls);
} else
rpc = ipc = -1;
file_depths(fdepths);
- printf("IOPS=%lu, IOS/call=%ld/%ld, inflight=%u (%s), Cachehit=%0.2f%%\n",
+ printf("IOPS=%lu, IOS/call=%ld/%ld, inflight=%u (%s)\n",
this_done - done, rpc, ipc, s->inflight,
- fdepths, hit);
+ fdepths);
done = this_done;
calls = this_call;
reap = this_reap;
- cache_hit = s->cachehit;
- cache_miss = s->cachemiss;
} while (!finish);
pthread_join(s->thread, &ret);
diff --git a/t/stest.c b/t/stest.c
index b95968f1..515ae5a5 100644
--- a/t/stest.c
+++ b/t/stest.c
@@ -11,11 +11,14 @@
#define MAGIC2 0xf0a1e9b3
#define LOOPS 32
+#define MAXSMALLOC 120*1024*1024UL
+#define LARGESMALLOC 128*1024U
struct elem {
unsigned int magic1;
struct flist_head list;
unsigned int magic2;
+ unsigned int size;
};
static FLIST_HEAD(list);
@@ -25,13 +28,15 @@ static int do_rand_allocs(void)
unsigned int size, nr, rounds = 0;
unsigned long total;
struct elem *e;
+ bool error;
while (rounds++ < LOOPS) {
#ifdef STEST_SEED
srand(MAGIC1);
#endif
+ error = false;
nr = total = 0;
- while (total < 120*1024*1024UL) {
+ while (total < MAXSMALLOC) {
size = 8 * sizeof(struct elem) + (int) (999.0 * (rand() / (RAND_MAX + 1.0)));
e = smalloc(size);
if (!e) {
@@ -40,6 +45,7 @@ static int do_rand_allocs(void)
}
e->magic1 = MAGIC1;
e->magic2 = MAGIC2;
+ e->size = size;
total += size;
flist_add_tail(&e->list, &list);
nr++;
@@ -51,8 +57,20 @@ static int do_rand_allocs(void)
e = flist_entry(list.next, struct elem, list);
assert(e->magic1 == MAGIC1);
assert(e->magic2 == MAGIC2);
+ total -= e->size;
flist_del(&e->list);
sfree(e);
+
+ if (!error) {
+ e = smalloc(LARGESMALLOC);
+ if (!e) {
+ error = true;
+ printf("failure allocating %u bytes at %lu allocated during sfree phase\n",
+ LARGESMALLOC, total);
+ }
+ else
+ sfree(e);
+ }
}
}
@@ -66,6 +84,7 @@ int main(int argc, char *argv[])
debug_init();
do_rand_allocs();
+ smalloc_debug(0); /* free and total blocks should match */
scleanup();
return 0;
diff --git a/t/zbd/run-tests-against-zoned-nullb b/t/zbd/run-tests-against-zoned-nullb
index 9336716d..0952011c 100755
--- a/t/zbd/run-tests-against-zoned-nullb
+++ b/t/zbd/run-tests-against-zoned-nullb
@@ -24,6 +24,6 @@ modprobe null_blk nr_devices=0 &&
echo 4096 > blocksize &&
echo 1024 > size &&
echo 1 > memory_backed &&
- echo 1 > power
+ echo 1 > power || exit $?
"${scriptdir}"/test-zbd-support "$@" /dev/nullb0
diff --git a/t/zbd/test-zbd-support b/t/zbd/test-zbd-support
index 10c78e9a..5d079a8b 100755
--- a/t/zbd/test-zbd-support
+++ b/t/zbd/test-zbd-support
@@ -85,7 +85,8 @@ run_fio() {
fio=$(dirname "$0")/../../fio
- opts=("--aux-path=/tmp" "--allow_file_create=0" "$@")
+ opts=("--aux-path=/tmp" "--allow_file_create=0" \
+ "--significant_figures=10" "$@")
{ echo; echo "fio ${opts[*]}"; echo; } >>"${logfile}.${test_number}"
"${dynamic_analyzer[@]}" "$fio" "${opts[@]}"
@@ -706,8 +707,9 @@ test42() {
grep -q 'Specifying the zone size is mandatory for regular block devices with --zonemode=zbd'
}
-# Check whether fio handles --zonesize=1 correctly.
+# Check whether fio handles --zonesize=1 correctly for regular block devices.
test43() {
+ [ -n "$is_zbd" ] && return 0
read_one_block --zonemode=zbd --zonesize=1 |
grep -q 'zone size must be at least 512 bytes for --zonemode=zbd'
}
@@ -741,6 +743,18 @@ test46() {
check_written $((size * 8)) || return $?
}
+# Check whether fio handles --zonemode=zbd --zoneskip=1 correctly.
+test47() {
+ local bs
+
+ [ -z "$is_zbd" ] && return 0
+ bs=$((logical_block_size))
+ run_one_fio_job --ioengine=psync --rw=write --bs=$bs \
+ --zonemode=zbd --zoneskip=1 \
+ >> "${logfile}.${test_number}" 2>&1 && return 1
+ grep -q 'zoneskip 1 is not a multiple of the device zone size' "${logfile}.${test_number}"
+}
+
tests=()
dynamic_analyzer=()
reset_all_zones=
@@ -771,8 +785,8 @@ source "$(dirname "$0")/functions" || exit $?
dev=$1
realdev=$(readlink -f "$dev")
basename=$(basename "$realdev")
-major=$((0x$(stat -L -c '%t' "$realdev")))
-minor=$((0x$(stat -L -c '%T' "$realdev")))
+major=$((0x$(stat -L -c '%t' "$realdev"))) || exit $?
+minor=$((0x$(stat -L -c '%T' "$realdev"))) || exit $?
disk_size=$(($(<"/sys/dev/block/$major:$minor/size")*512))
# When the target is a partition device, get basename of its holder device to
# access sysfs path of the holder device
@@ -812,9 +826,8 @@ case "$(<"/sys/class/block/$basename/queue/zoned")" in
esac
if [ "${#tests[@]}" = 0 ]; then
- for ((i=1;i<=46;i++)); do
- tests+=("$i")
- done
+ readarray -t tests < <(declare -F | grep "test[0-9]*" | \
+ tr -c -d "[:digit:]\n" | sort -n)
fi
logfile=$0.log
diff --git a/thread_options.h b/thread_options.h
index 14c6969f..ee6e4d6d 100644
--- a/thread_options.h
+++ b/thread_options.h
@@ -313,6 +313,7 @@ struct thread_options {
int flow_watermark;
unsigned int flow_sleep;
+ unsigned int offset_increment_percent;
unsigned long long offset_increment;
unsigned long long number_ios;
@@ -599,6 +600,8 @@ struct thread_options_pack {
int32_t flow_watermark;
uint32_t flow_sleep;
+ uint32_t offset_increment_percent;
+ uint32_t pad4;
uint64_t offset_increment;
uint64_t number_ios;
diff --git a/tools/plot/fio2gnuplot b/tools/plot/fio2gnuplot
index 4d1815cf..cc4ea4c7 100755
--- a/tools/plot/fio2gnuplot
+++ b/tools/plot/fio2gnuplot
@@ -36,10 +36,10 @@ def find_file(path, pattern):
fio_data_file=[]
# For all the local files
for file in os.listdir(path):
- # If the file matches the glob
- if fnmatch.fnmatch(file, pattern):
- # Let's consider this file
- fio_data_file.append(file)
+ # If the file matches the glob
+ if fnmatch.fnmatch(file, pattern):
+ # Let's consider this file
+ fio_data_file.append(file)
return fio_data_file
@@ -51,7 +51,7 @@ def generate_gnuplot_script(fio_data_file,title,gnuplot_output_filename,gnuplot_
# Plotting 3D or comparing graphs doesn't have a meaning unless if there is at least 2 traces
if len(fio_data_file) > 1:
- f.write("call \'%s/graph3D.gpm\' \'%s' \'%s\' \'\' \'%s\' \'%s\'\n" % (gpm_dir,title,gnuplot_output_filename,gnuplot_output_filename,mode))
+ f.write("call \'%s/graph3D.gpm\' \'%s' \'%s\' \'\' \'%s\' \'%s\'\n" % (gpm_dir,title,gnuplot_output_filename,gnuplot_output_filename,mode))
# Setting up the compare files that will be plot later
compare=open(gnuplot_output_dir + 'compare.gnuplot','w')
@@ -93,10 +93,10 @@ set style line 1 lt 1 lw 3 pt 3 linecolor rgb "green"
compare_smooth.write("plot %s w l ls 1 ti 'Global average value (%.2f)'" % (global_avg,global_avg));
compare_trend.write("plot %s w l ls 1 ti 'Global average value (%.2f)'" % (global_avg,global_avg));
- pos=0
- # Let's create a temporary file for each selected fio file
- for file in fio_data_file:
- tmp_filename = "gnuplot_temp_file.%d" % pos
+ pos=0
+ # Let's create a temporary file for each selected fio file
+ for file in fio_data_file:
+ tmp_filename = "gnuplot_temp_file.%d" % pos
# Plotting comparing graphs doesn't have a meaning unless if there is at least 2 traces
if len(fio_data_file) > 1:
@@ -106,12 +106,12 @@ set style line 1 lt 1 lw 3 pt 3 linecolor rgb "green"
compare_trend.write(",\\\n'%s' using 2:3 smooth bezier title '%s'" % (tmp_filename,fio_data_file[pos]))
png_file=file.replace('.log','')
- raw_filename = "%s-2Draw" % (png_file)
- smooth_filename = "%s-2Dsmooth" % (png_file)
- trend_filename = "%s-2Dtrend" % (png_file)
- avg = average(disk_perf[pos])
- f.write("call \'%s/graph2D.gpm\' \'%s' \'%s\' \'%s\' \'%s\' \'%s\' \'%s\' \'%s\' \'%f\'\n" % (gpm_dir,title,tmp_filename,fio_data_file[pos],raw_filename,mode,smooth_filename,trend_filename,avg))
- pos = pos +1
+ raw_filename = "%s-2Draw" % (png_file)
+ smooth_filename = "%s-2Dsmooth" % (png_file)
+ trend_filename = "%s-2Dtrend" % (png_file)
+ avg = average(disk_perf[pos])
+ f.write("call \'%s/graph2D.gpm\' \'%s' \'%s\' \'%s\' \'%s\' \'%s\' \'%s\' \'%s\' \'%f\'\n" % (gpm_dir,title,tmp_filename,fio_data_file[pos],raw_filename,mode,smooth_filename,trend_filename,avg))
+ pos = pos +1
# Plotting comparing graphs doesn't have a meaning unless if there is at least 2 traces
if len(fio_data_file) > 1:
@@ -125,7 +125,7 @@ def generate_gnuplot_math_script(title,gnuplot_output_filename,mode,average,gnup
filename=gnuplot_output_dir+'mymath';
temporary_files.append(filename)
f=open(filename,'a')
- f.write("call \'%s/math.gpm\' \'%s' \'%s\' \'\' \'%s\' \'%s\' %s\n" % (gpm_dir,title,gnuplot_output_filename,gnuplot_output_filename,mode,average))
+ f.write("call \'%s/math.gpm\' \'%s' \'%s\' \'\' \'%s\' \'%s\' %s\n" % (gpm_dir,title,gnuplot_output_filename,gnuplot_output_filename,mode,average))
f.close()
def compute_aggregated_file(fio_data_file, gnuplot_output_filename, gnuplot_output_dir):
@@ -250,10 +250,10 @@ def compute_math(fio_data_file, title,gnuplot_output_filename,gnuplot_output_dir
stddev_file.write('DiskName %s\n'% mode )
for disk in range(len(fio_data_file)):
# print disk_perf[disk]
- min_file.write("# Disk%d was coming from %s\n" % (disk,fio_data_file[disk]))
- max_file.write("# Disk%d was coming from %s\n" % (disk,fio_data_file[disk]))
- average_file.write("# Disk%d was coming from %s\n" % (disk,fio_data_file[disk]))
- stddev_file.write("# Disk%d was coming from %s\n" % (disk,fio_data_file[disk]))
+ min_file.write("# Disk%d was coming from %s\n" % (disk,fio_data_file[disk]))
+ max_file.write("# Disk%d was coming from %s\n" % (disk,fio_data_file[disk]))
+ average_file.write("# Disk%d was coming from %s\n" % (disk,fio_data_file[disk]))
+ stddev_file.write("# Disk%d was coming from %s\n" % (disk,fio_data_file[disk]))
avg = average(disk_perf[disk])
variance = [(x - avg)**2 for x in disk_perf[disk]]
standard_deviation = math.sqrt(average(variance))
@@ -406,126 +406,126 @@ def main(argv):
force_keep_temp_files=False
if not os.path.isfile(gpm_dir+'math.gpm'):
- gpm_dir="/usr/local/share/fio/"
- if not os.path.isfile(gpm_dir+'math.gpm'):
- print("Looks like fio didn't get installed properly as no gpm files found in '/usr/share/fio' or '/usr/local/share/fio'\n")
- sys.exit(3)
+ gpm_dir="/usr/local/share/fio/"
+ if not os.path.isfile(gpm_dir+'math.gpm'):
+ print("Looks like fio didn't get installed properly as no gpm files found in '/usr/share/fio' or '/usr/local/share/fio'\n")
+ sys.exit(3)
try:
- opts, args = getopt.getopt(argv[1:],"ghkbivo:d:t:p:G:m:M:",['bandwidth', 'iops', 'pattern', 'outputfile', 'outputdir', 'title', 'min_time', 'max_time', 'gnuplot', 'Global', 'help', 'verbose','keep'])
+ opts, args = getopt.getopt(argv[1:],"ghkbivo:d:t:p:G:m:M:",['bandwidth', 'iops', 'pattern', 'outputfile', 'outputdir', 'title', 'min_time', 'max_time', 'gnuplot', 'Global', 'help', 'verbose','keep'])
except getopt.GetoptError:
- print("Error: One of the options passed to the cmdline was not supported")
- print("Please fix your command line or read the help (-h option)")
- sys.exit(2)
+ print("Error: One of the options passed to the cmdline was not supported")
+ print("Please fix your command line or read the help (-h option)")
+ sys.exit(2)
for opt, arg in opts:
- if opt in ("-b", "--bandwidth"):
- pattern='*_bw.log'
- elif opt in ("-i", "--iops"):
- pattern='*_iops.log'
- elif opt in ("-v", "--verbose"):
- verbose=True
- elif opt in ("-k", "--keep"):
- #User really wants to keep the temporary files
- force_keep_temp_files=True
- elif opt in ("-p", "--pattern"):
- pattern_set_by_user=True
- pattern=arg
- pattern=pattern.replace('\\','')
- elif opt in ("-o", "--outputfile"):
- gnuplot_output_filename=arg
- elif opt in ("-d", "--outputdir"):
- gnuplot_output_dir=arg
- if not gnuplot_output_dir.endswith('/'):
- gnuplot_output_dir=gnuplot_output_dir+'/'
- if not os.path.exists(gnuplot_output_dir):
- os.makedirs(gnuplot_output_dir)
- elif opt in ("-t", "--title"):
- title=arg
- elif opt in ("-m", "--min_time"):
- min_time=arg
- elif opt in ("-M", "--max_time"):
- max_time=arg
- elif opt in ("-g", "--gnuplot"):
- run_gnuplot=True
- elif opt in ("-G", "--Global"):
- parse_global=True
- global_search=arg
- elif opt in ("-h", "--help"):
- print_help()
- sys.exit(1)
+ if opt in ("-b", "--bandwidth"):
+ pattern='*_bw.log'
+ elif opt in ("-i", "--iops"):
+ pattern='*_iops.log'
+ elif opt in ("-v", "--verbose"):
+ verbose=True
+ elif opt in ("-k", "--keep"):
+ #User really wants to keep the temporary files
+ force_keep_temp_files=True
+ elif opt in ("-p", "--pattern"):
+ pattern_set_by_user=True
+ pattern=arg
+ pattern=pattern.replace('\\','')
+ elif opt in ("-o", "--outputfile"):
+ gnuplot_output_filename=arg
+ elif opt in ("-d", "--outputdir"):
+ gnuplot_output_dir=arg
+ if not gnuplot_output_dir.endswith('/'):
+ gnuplot_output_dir=gnuplot_output_dir+'/'
+ if not os.path.exists(gnuplot_output_dir):
+ os.makedirs(gnuplot_output_dir)
+ elif opt in ("-t", "--title"):
+ title=arg
+ elif opt in ("-m", "--min_time"):
+ min_time=arg
+ elif opt in ("-M", "--max_time"):
+ max_time=arg
+ elif opt in ("-g", "--gnuplot"):
+ run_gnuplot=True
+ elif opt in ("-G", "--Global"):
+ parse_global=True
+ global_search=arg
+ elif opt in ("-h", "--help"):
+ print_help()
+ sys.exit(1)
# Adding .global extension to the file
if parse_global==True:
- if not gnuplot_output_filename.endswith('.global'):
- pattern = pattern+'.global'
+ if not gnuplot_output_filename.endswith('.global'):
+ pattern = pattern+'.global'
fio_data_file=find_file('.',pattern)
if len(fio_data_file) == 0:
- print("No log file found with pattern %s!" % pattern)
- # Try numjob log file format if per_numjob_logs=1
- if (pattern == '*_bw.log'):
- fio_data_file=find_file('.','*_bw.*.log')
- if (pattern == '*_iops.log'):
- fio_data_file=find_file('.','*_iops.*.log')
- if len(fio_data_file) == 0:
- sys.exit(1)
- else:
- print("Using log file per job format instead")
+ print("No log file found with pattern %s!" % pattern)
+ # Try numjob log file format if per_numjob_logs=1
+ if (pattern == '*_bw.log'):
+ fio_data_file=find_file('.','*_bw.*.log')
+ if (pattern == '*_iops.log'):
+ fio_data_file=find_file('.','*_iops.*.log')
+ if len(fio_data_file) == 0:
+ sys.exit(1)
+ else:
+ print("Using log file per job format instead")
else:
- print("%d files Selected with pattern '%s'" % (len(fio_data_file), pattern))
+ print("%d files Selected with pattern '%s'" % (len(fio_data_file), pattern))
fio_data_file=sorted(fio_data_file, key=str.lower)
for file in fio_data_file:
- print(' |-> %s' % file)
- if "_bw.log" in file :
- mode="Bandwidth (KB/sec)"
- if "_iops.log" in file :
- mode="IO per Seconds (IO/sec)"
+ print(' |-> %s' % file)
+ if "_bw.log" in file :
+ mode="Bandwidth (KB/sec)"
+ if "_iops.log" in file :
+ mode="IO per Seconds (IO/sec)"
if (title == 'No title') and (mode != 'unknown'):
- if "Bandwidth" in mode:
- title='Bandwidth benchmark with %d fio results' % len(fio_data_file)
- if "IO" in mode:
- title='IO benchmark with %d fio results' % len(fio_data_file)
+ if "Bandwidth" in mode:
+ title='Bandwidth benchmark with %d fio results' % len(fio_data_file)
+ if "IO" in mode:
+ title='IO benchmark with %d fio results' % len(fio_data_file)
print()
#We need to adjust the output filename regarding the pattern required by the user
if (pattern_set_by_user == True):
- gnuplot_output_filename=pattern
- # As we do have some glob in the pattern, let's make this simpliest
- # We do remove the simpliest parts of the expression to get a clear file name
- gnuplot_output_filename=gnuplot_output_filename.replace('-*-','-')
- gnuplot_output_filename=gnuplot_output_filename.replace('*','-')
- gnuplot_output_filename=gnuplot_output_filename.replace('--','-')
- gnuplot_output_filename=gnuplot_output_filename.replace('.log','')
- # Insure that we don't have any starting or trailing dash to the filename
- gnuplot_output_filename = gnuplot_output_filename[:-1] if gnuplot_output_filename.endswith('-') else gnuplot_output_filename
- gnuplot_output_filename = gnuplot_output_filename[1:] if gnuplot_output_filename.startswith('-') else gnuplot_output_filename
- if (gnuplot_output_filename == ''):
- gnuplot_output_filename='default'
+ gnuplot_output_filename=pattern
+ # As we do have some glob in the pattern, let's make this simpliest
+ # We do remove the simpliest parts of the expression to get a clear file name
+ gnuplot_output_filename=gnuplot_output_filename.replace('-*-','-')
+ gnuplot_output_filename=gnuplot_output_filename.replace('*','-')
+ gnuplot_output_filename=gnuplot_output_filename.replace('--','-')
+ gnuplot_output_filename=gnuplot_output_filename.replace('.log','')
+ # Insure that we don't have any starting or trailing dash to the filename
+ gnuplot_output_filename = gnuplot_output_filename[:-1] if gnuplot_output_filename.endswith('-') else gnuplot_output_filename
+ gnuplot_output_filename = gnuplot_output_filename[1:] if gnuplot_output_filename.startswith('-') else gnuplot_output_filename
+ if (gnuplot_output_filename == ''):
+ gnuplot_output_filename='default'
if parse_global==True:
- parse_global_files(fio_data_file, global_search)
+ parse_global_files(fio_data_file, global_search)
else:
- blk_size=compute_temp_file(fio_data_file,disk_perf,gnuplot_output_dir,min_time,max_time)
- title="%s @ Blocksize = %dK" % (title,blk_size/1024)
- compute_aggregated_file(fio_data_file, gnuplot_output_filename, gnuplot_output_dir)
- compute_math(fio_data_file,title,gnuplot_output_filename,gnuplot_output_dir,mode,disk_perf,gpm_dir)
- generate_gnuplot_script(fio_data_file,title,gnuplot_output_filename,gnuplot_output_dir,mode,disk_perf,gpm_dir)
-
- if (run_gnuplot==True):
- render_gnuplot(fio_data_file, gnuplot_output_dir)
-
- # Shall we clean the temporary files ?
- if keep_temp_files==False and force_keep_temp_files==False:
- # Cleaning temporary files
- if verbose: print("Cleaning temporary files")
- for f in enumerate(temporary_files):
- if verbose: print(" -> %s"%f[1])
- try:
- os.remove(f[1])
- except:
- True
+ blk_size=compute_temp_file(fio_data_file,disk_perf,gnuplot_output_dir,min_time,max_time)
+ title="%s @ Blocksize = %dK" % (title,blk_size/1024)
+ compute_aggregated_file(fio_data_file, gnuplot_output_filename, gnuplot_output_dir)
+ compute_math(fio_data_file,title,gnuplot_output_filename,gnuplot_output_dir,mode,disk_perf,gpm_dir)
+ generate_gnuplot_script(fio_data_file,title,gnuplot_output_filename,gnuplot_output_dir,mode,disk_perf,gpm_dir)
+
+ if (run_gnuplot==True):
+ render_gnuplot(fio_data_file, gnuplot_output_dir)
+
+ # Shall we clean the temporary files ?
+ if keep_temp_files==False and force_keep_temp_files==False:
+ # Cleaning temporary files
+ if verbose: print("Cleaning temporary files")
+ for f in enumerate(temporary_files):
+ if verbose: print(" -> %s"%f[1])
+ try:
+ os.remove(f[1])
+ except:
+ True
#Main
if __name__ == "__main__":
diff --git a/verify.c b/verify.c
index da429e79..37d2be8d 100644
--- a/verify.c
+++ b/verify.c
@@ -39,14 +39,14 @@ void fill_buffer_pattern(struct thread_data *td, void *p, unsigned int len)
(void)cpy_pattern(td->o.buffer_pattern, td->o.buffer_pattern_bytes, p, len);
}
-static void __fill_buffer(struct thread_options *o, unsigned long seed, void *p,
+static void __fill_buffer(struct thread_options *o, uint64_t seed, void *p,
unsigned int len)
{
__fill_random_buf_percentage(seed, p, o->compress_percentage, len, len, o->buffer_pattern, o->buffer_pattern_bytes);
}
-static unsigned long fill_buffer(struct thread_data *td, void *p,
- unsigned int len)
+static uint64_t fill_buffer(struct thread_data *td, void *p,
+ unsigned int len)
{
struct frand_state *fs = &td->verify_state;
struct thread_options *o = &td->o;
@@ -55,7 +55,7 @@ static unsigned long fill_buffer(struct thread_data *td, void *p,
}
void fill_verify_pattern(struct thread_data *td, void *p, unsigned int len,
- struct io_u *io_u, unsigned long seed, int use_seed)
+ struct io_u *io_u, uint64_t seed, int use_seed)
{
struct thread_options *o = &td->o;
@@ -100,7 +100,7 @@ static unsigned int get_hdr_inc(struct thread_data *td, struct io_u *io_u)
}
static void fill_pattern_headers(struct thread_data *td, struct io_u *io_u,
- unsigned long seed, int use_seed)
+ uint64_t seed, int use_seed)
{
unsigned int hdr_inc, header_num;
struct verify_header *hdr;
@@ -1191,6 +1191,10 @@ static void populate_hdr(struct thread_data *td, struct io_u *io_u,
fill_hdr(td, io_u, hdr, header_num, header_len, io_u->rand_seed);
+ if (header_len <= hdr_size(td, hdr)) {
+ td_verror(td, EINVAL, "Blocksize too small");
+ return;
+ }
data_len = header_len - hdr_size(td, hdr);
data = p + hdr_size(td, hdr);
@@ -1635,8 +1639,7 @@ struct all_io_list *get_all_io_list(int save_mask, size_t *sz)
s->rand.state32.s[3] = 0;
s->rand.use64 = 0;
}
- s->name[sizeof(s->name) - 1] = '\0';
- strncpy((char *) s->name, td->o.name, sizeof(s->name) - 1);
+ snprintf((char *) s->name, sizeof(s->name), "%s", td->o.name);
next = io_list_next(s);
}
diff --git a/verify.h b/verify.h
index 64121a51..539e6f6c 100644
--- a/verify.h
+++ b/verify.h
@@ -97,7 +97,7 @@ extern void populate_verify_io_u(struct thread_data *, struct io_u *);
extern int __must_check get_next_verify(struct thread_data *td, struct io_u *);
extern int __must_check verify_io_u(struct thread_data *, struct io_u **);
extern int verify_io_u_async(struct thread_data *, struct io_u **);
-extern void fill_verify_pattern(struct thread_data *td, void *p, unsigned int len, struct io_u *io_u, unsigned long seed, int use_seed);
+extern void fill_verify_pattern(struct thread_data *td, void *p, unsigned int len, struct io_u *io_u, uint64_t seed, int use_seed);
extern void fill_buffer_pattern(struct thread_data *td, void *p, unsigned int len);
extern void fio_verify_init(struct thread_data *td);
diff --git a/zbd.c b/zbd.c
index 2da742b7..99310c49 100644
--- a/zbd.c
+++ b/zbd.c
@@ -119,6 +119,30 @@ static bool zbd_verify_sizes(void)
continue;
if (!zbd_is_seq_job(f))
continue;
+
+ if (!td->o.zone_size) {
+ td->o.zone_size = f->zbd_info->zone_size;
+ if (!td->o.zone_size) {
+ log_err("%s: invalid 0 zone size\n",
+ f->file_name);
+ return false;
+ }
+ } else if (td->o.zone_size != f->zbd_info->zone_size) {
+ log_err("%s: job parameter zonesize %llu does not match disk zone size %llu.\n",
+ f->file_name, (unsigned long long) td->o.zone_size,
+ (unsigned long long) f->zbd_info->zone_size);
+ return false;
+ }
+
+ if (td->o.zone_skip &&
+ (td->o.zone_skip < td->o.zone_size ||
+ td->o.zone_skip % td->o.zone_size)) {
+ log_err("%s: zoneskip %llu is not a multiple of the device zone size %llu.\n",
+ f->file_name, (unsigned long long) td->o.zone_skip,
+ (unsigned long long) td->o.zone_size);
+ return false;
+ }
+
zone_idx = zbd_zone_idx(f, f->file_offset);
z = &f->zbd_info->zone_info[zone_idx];
if (f->file_offset != z->start) {
@@ -186,11 +210,14 @@ static bool zbd_verify_bs(void)
* size of @buf.
*
* Returns 0 upon success and a negative error code upon failure.
+ * If the zone report is empty, always assume an error (device problem) and
+ * return -EIO.
*/
static int read_zone_info(int fd, uint64_t start_sector,
void *buf, unsigned int bufsz)
{
struct blk_zone_report *hdr = buf;
+ int ret;
if (bufsz < sizeof(*hdr))
return -EINVAL;
@@ -199,7 +226,12 @@ static int read_zone_info(int fd, uint64_t start_sector,
hdr->nr_zones = (bufsz - sizeof(*hdr)) / sizeof(struct blk_zone);
hdr->sector = start_sector;
- return ioctl(fd, BLKREPORTZONE, hdr) >= 0 ? 0 : -errno;
+ ret = ioctl(fd, BLKREPORTZONE, hdr);
+ if (ret)
+ return -errno;
+ if (!hdr->nr_zones)
+ return -EIO;
+ return 0;
}
/*
@@ -304,13 +336,23 @@ static int init_zone_info(struct thread_data *td, struct fio_file *f)
{
uint32_t nr_zones;
struct fio_zone_info *p;
- uint64_t zone_size;
+ uint64_t zone_size = td->o.zone_size;
struct zoned_block_device_info *zbd_info = NULL;
pthread_mutexattr_t attr;
int i;
- zone_size = td->o.zone_size;
- assert(zone_size);
+ if (zone_size == 0) {
+ log_err("%s: Specifying the zone size is mandatory for regular block devices with --zonemode=zbd\n\n",
+ f->file_name);
+ return 1;
+ }
+
+ if (zone_size < 512) {
+ log_err("%s: zone size must be at least 512 bytes for --zonemode=zbd\n\n",
+ f->file_name);
+ return 1;
+ }
+
nr_zones = (f->real_file_size + zone_size - 1) / zone_size;
zbd_info = scalloc(1, sizeof(*zbd_info) +
(nr_zones + 1) * sizeof(zbd_info->zone_info[0]));
@@ -393,8 +435,8 @@ static int parse_zone_info(struct thread_data *td, struct fio_file *f)
if (td->o.zone_size == 0) {
td->o.zone_size = zone_size;
} else if (td->o.zone_size != zone_size) {
- log_info("fio: %s job parameter zonesize %llu does not match disk zone size %llu.\n",
- f->file_name, (unsigned long long) td->o.zone_size,
+ log_err("fio: %s job parameter zonesize %llu does not match disk zone size %llu.\n",
+ f->file_name, (unsigned long long) td->o.zone_size,
(unsigned long long) zone_size);
ret = -EINVAL;
goto close;
@@ -418,8 +460,6 @@ static int parse_zone_info(struct thread_data *td, struct fio_file *f)
p->start = z->start << 9;
switch (z->cond) {
case BLK_ZONE_COND_NOT_WP:
- p->wp = p->start;
- break;
case BLK_ZONE_COND_FULL:
p->wp = p->start + zone_size;
break;
@@ -475,7 +515,7 @@ out:
*
* Returns 0 upon success and a negative error code upon failure.
*/
-int zbd_create_zone_info(struct thread_data *td, struct fio_file *f)
+static int zbd_create_zone_info(struct thread_data *td, struct fio_file *f)
{
enum blk_zoned_model zbd_model;
int ret = 0;
@@ -543,7 +583,7 @@ static int zbd_init_zone_info(struct thread_data *td, struct fio_file *file)
ret = zbd_create_zone_info(td, file);
if (ret < 0)
- td_verror(td, -ret, "BLKREPORTZONE failed");
+ td_verror(td, -ret, "zbd_create_zone_info() failed");
return ret;
}
@@ -555,18 +595,8 @@ int zbd_init(struct thread_data *td)
for_each_file(td, f, i) {
if (f->filetype != FIO_TYPE_BLOCK)
continue;
- if (td->o.zone_size && td->o.zone_size < 512) {
- log_err("%s: zone size must be at least 512 bytes for --zonemode=zbd\n\n",
- f->file_name);
- return 1;
- }
- if (td->o.zone_size == 0 &&
- get_zbd_model(f->file_name) == ZBD_DM_NONE) {
- log_err("%s: Specifying the zone size is mandatory for regular block devices with --zonemode=zbd\n\n",
- f->file_name);
+ if (zbd_init_zone_info(td, f))
return 1;
- }
- zbd_init_zone_info(td, f);
}
if (!zbd_using_direct_io()) {
@@ -927,8 +957,8 @@ static void zbd_close_zone(struct thread_data *td, const struct fio_file *f,
* a multiple of the fio block size. The caller must neither hold z->mutex
* nor f->zbd_info->mutex. Returns with z->mutex held upon success.
*/
-struct fio_zone_info *zbd_convert_to_open_zone(struct thread_data *td,
- struct io_u *io_u)
+static struct fio_zone_info *zbd_convert_to_open_zone(struct thread_data *td,
+ struct io_u *io_u)
{
const uint32_t min_bs = td->o.min_bs[io_u->ddir];
const struct fio_file *f = io_u->file;
@@ -1214,6 +1244,65 @@ bool zbd_unaligned_write(int error_code)
}
/**
+ * setup_zbd_zone_mode - handle zoneskip as necessary for ZBD drives
+ * @td: FIO thread data.
+ * @io_u: FIO I/O unit.
+ *
+ * For sequential workloads, change the file offset to skip zoneskip bytes when
+ * no more IO can be performed in the current zone.
+ * - For read workloads, zoneskip is applied when the io has reached the end of
+ * the zone or the zone write position (when td->o.read_beyond_wp is false).
+ * - For write workloads, zoneskip is applied when the zone is full.
+ * This applies only to read and write operations.
+ */
+void setup_zbd_zone_mode(struct thread_data *td, struct io_u *io_u)
+{
+ struct fio_file *f = io_u->file;
+ enum fio_ddir ddir = io_u->ddir;
+ struct fio_zone_info *z;
+ uint32_t zone_idx;
+
+ assert(td->o.zone_mode == ZONE_MODE_ZBD);
+ assert(td->o.zone_size);
+
+ /*
+ * zone_skip is valid only for sequential workloads.
+ */
+ if (td_random(td) || !td->o.zone_skip)
+ return;
+
+ /*
+ * It is time to switch to a new zone if:
+ * - zone_bytes == zone_size bytes have already been accessed
+ * - The last position reached the end of the current zone.
+ * - For reads with td->o.read_beyond_wp == false, the last position
+ * reached the zone write pointer.
+ */
+ zone_idx = zbd_zone_idx(f, f->last_pos[ddir]);
+ z = &f->zbd_info->zone_info[zone_idx];
+
+ if (td->zone_bytes >= td->o.zone_size ||
+ f->last_pos[ddir] >= (z+1)->start ||
+ (ddir == DDIR_READ &&
+ (!td->o.read_beyond_wp) && f->last_pos[ddir] >= z->wp)) {
+ /*
+ * Skip zones.
+ */
+ td->zone_bytes = 0;
+ f->file_offset += td->o.zone_size + td->o.zone_skip;
+
+ /*
+ * Wrap from the beginning, if we exceed the file size
+ */
+ if (f->file_offset >= f->real_file_size)
+ f->file_offset = get_start_offset(td, f);
+
+ f->last_pos[ddir] = f->file_offset;
+ td->io_skip_bytes += td->o.zone_skip;
+ }
+}
+
+/**
* zbd_adjust_block - adjust the offset and length as necessary for ZBD drives
* @td: FIO thread data.
* @io_u: FIO I/O unit.
diff --git a/zbd.h b/zbd.h
index 521283b2..e0a7e447 100644
--- a/zbd.h
+++ b/zbd.h
@@ -94,6 +94,7 @@ void zbd_free_zone_info(struct fio_file *f);
int zbd_init(struct thread_data *td);
void zbd_file_reset(struct thread_data *td, struct fio_file *f);
bool zbd_unaligned_write(int error_code);
+void setup_zbd_zone_mode(struct thread_data *td, struct io_u *io_u);
enum io_u_action zbd_adjust_block(struct thread_data *td, struct io_u *io_u);
char *zbd_write_status(const struct thread_stat *ts);
@@ -147,6 +148,12 @@ static inline char *zbd_write_status(const struct thread_stat *ts)
static inline void zbd_queue_io_u(struct io_u *io_u,
enum fio_q_status status) {}
static inline void zbd_put_io_u(struct io_u *io_u) {}
+
+static inline void setup_zbd_zone_mode(struct thread_data *td,
+ struct io_u *io_u)
+{
+}
+
#endif
#endif /* FIO_ZBD_H */