summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xFIO-VERSION-GEN2
-rw-r--r--HOWTO40
-rw-r--r--Makefile8
-rw-r--r--backend.c66
-rwxr-xr-xconfigure31
-rw-r--r--engines/filedelete.c115
-rw-r--r--engines/librpma_fio.c11
-rw-r--r--engines/librpma_fio.h2
-rw-r--r--engines/librpma_gpspm.c25
-rw-r--r--engines/libzbc.c21
-rw-r--r--engines/nfs.c314
-rw-r--r--engines/skeleton_external.c13
-rw-r--r--examples/filedelete-ioengine.fio18
-rw-r--r--examples/librpma_gpspm-server.fio2
-rw-r--r--examples/nfs.fio22
-rw-r--r--filesetup.c11
-rw-r--r--fio.151
-rw-r--r--gettime.c2
-rw-r--r--init.c5
-rw-r--r--ioengines.c2
-rw-r--r--ioengines.h4
-rw-r--r--optgroup.c4
-rw-r--r--optgroup.h2
-rw-r--r--options.c5
-rw-r--r--os/os-aix.h6
-rw-r--r--os/os-android.h25
-rw-r--r--os/os-dragonfly.h6
-rw-r--r--os/os-freebsd.h6
-rw-r--r--os/os-hpux.h7
-rw-r--r--os/os-linux.h3
-rw-r--r--os/os-mac.h6
-rw-r--r--os/os-netbsd.h6
-rw-r--r--os/os-openbsd.h6
-rw-r--r--os/os-solaris.h6
-rw-r--r--os/os.h5
-rwxr-xr-xos/windows/WixUI_Minimal_NoEULA.wxs96
-rwxr-xr-xos/windows/WixUI_fio.wxl12
-rw-r--r--os/windows/dobuild.cmd5
-rwxr-xr-xos/windows/eula.rtfbin1075 -> 0 bytes
-rwxr-xr-xos/windows/install.wxs2
-rw-r--r--oslib/blkzoned.h7
-rw-r--r--oslib/linux-blkzoned.c116
-rw-r--r--parse.h2
-rw-r--r--stat.c2
-rw-r--r--zbd.c91
45 files changed, 1116 insertions, 75 deletions
diff --git a/FIO-VERSION-GEN b/FIO-VERSION-GEN
index 29486071..47af94e9 100755
--- a/FIO-VERSION-GEN
+++ b/FIO-VERSION-GEN
@@ -1,7 +1,7 @@
#!/bin/sh
GVF=FIO-VERSION-FILE
-DEF_VER=fio-3.26
+DEF_VER=fio-3.27
LF='
'
diff --git a/HOWTO b/HOWTO
index 2788670d..86fb2964 100644
--- a/HOWTO
+++ b/HOWTO
@@ -544,6 +544,9 @@ Parameter types
* *Ti* -- means tebi (Ti) or 1024**4
* *Pi* -- means pebi (Pi) or 1024**5
+ For Zone Block Device Mode:
+ * *z* -- means Zone
+
With :option:`kb_base`\=1024 (the default), the unit prefixes are opposite
from those specified in the SI and IEC 80000-13 standards to provide
compatibility with old scripts. For example, 4k means 4096.
@@ -1168,7 +1171,7 @@ I/O type
**1**
Backward-compatible alias for **mixed**.
-
+
**2**
Alias for **both**.
@@ -1277,13 +1280,14 @@ I/O type
.. option:: offset=int
Start I/O at the provided offset in the file, given as either a fixed size in
- bytes or a percentage. If a percentage is given, the generated offset will be
+ bytes, zones or a percentage. If a percentage is given, the generated offset will be
aligned to the minimum ``blocksize`` or to the value of ``offset_align`` if
provided. Data before the given offset will not be touched. This
effectively caps the file size at `real_size - offset`. Can be combined with
:option:`size` to constrain the start and end range of the I/O workload.
A percentage can be specified by a number between 1 and 100 followed by '%',
- for example, ``offset=20%`` to specify 20%.
+ for example, ``offset=20%`` to specify 20%. In ZBD mode, value can be set as
+ number of zones using 'z'.
.. option:: offset_align=int
@@ -1300,7 +1304,8 @@ I/O type
intended to operate on a file in parallel disjoint segments, with even
spacing between the starting points. Percentages can be used for this option.
If a percentage is given, the generated offset will be aligned to the minimum
- ``blocksize`` or to the value of ``offset_align`` if provided.
+ ``blocksize`` or to the value of ``offset_align`` if provided. In ZBD mode, value can
+ also be set as number of zones using 'z'.
.. option:: number_ios=int
@@ -1818,7 +1823,8 @@ I/O size
If this option is not specified, fio will use the full size of the given
files or devices. If the files do not exist, size must be given. It is also
possible to give size as a percentage between 1 and 100. If ``size=20%`` is
- given, fio will use 20% of the full size of the given files or devices.
+ given, fio will use 20% of the full size of the given files or devices.
+ In ZBD mode, value can also be set as number of zones using 'z'.
Can be combined with :option:`offset` to constrain the start and end range
that I/O will be done within.
@@ -1852,7 +1858,8 @@ I/O size
.. option:: fill_device=bool, fill_fs=bool
Sets size to something really large and waits for ENOSPC (no space left on
- device) as the terminating condition. Only makes sense with sequential
+ device) or EDQUOT (disk quota exceeded)
+ as the terminating condition. Only makes sense with sequential
write. For a read workload, the mount point will be filled first then I/O
started on the result. This option doesn't make sense if operating on a raw
device node, since the size of that is already known by the file system.
@@ -2055,6 +2062,11 @@ I/O engine
and 'nrfiles', so that files will be created.
This engine is to measure file lookup and meta data access.
+ **filedelete**
+ Simply delete the files by unlink() and do no I/O to them. You need to set 'filesize'
+ and 'nrfiles', so that the files will be created.
+ This engine is to measure file delete.
+
**libpmem**
Read and write using mmap I/O to a file on a filesystem
mounted with DAX on a persistent memory device through the PMDK
@@ -2091,6 +2103,12 @@ I/O engine
I/O engine supporting asynchronous read and write operations to the
DAOS File System (DFS) via libdfs.
+ **nfs**
+ I/O engine supporting asynchronous read and write operations to
+ NFS filesystems from userspace via libnfs. This is useful for
+ achieving higher concurrency and thus throughput than is possible
+ via kernel NFS.
+
I/O engine specific parameters
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -2232,6 +2250,11 @@ with the caveat that when used on the command line, they must come after the
Set to 1 only when Direct Write to PMem from the remote host is possible.
Otherwise, set to 0.
+.. option:: busy_wait_polling=bool : [librpma_*_server]
+
+ Set to 0 to wait for completion instead of busy-wait polling completion.
+ Default: 1.
+
.. option:: interface=str : [netsplice] [net]
The IP address of the network interface used to send or receive UDP
@@ -2508,6 +2531,11 @@ with the caveat that when used on the command line, they must come after the
Specificy a different object class for the dfs file.
Use DAOS container's object class by default.
+.. option:: nfs_url=str : [nfs]
+
+ URL in libnfs format, eg nfs://<server|ipv4|ipv6>/path[?arg=val[&arg=val]*]
+ Refer to the libnfs README for more details.
+
I/O depth
~~~~~~~~~
diff --git a/Makefile b/Makefile
index fce3d0d1..ef317373 100644
--- a/Makefile
+++ b/Makefile
@@ -51,7 +51,7 @@ SOURCE := $(sort $(patsubst $(SRCDIR)/%,%,$(wildcard $(SRCDIR)/crc/*.c)) \
pshared.c options.c \
smalloc.c filehash.c profile.c debug.c engines/cpu.c \
engines/mmap.c engines/sync.c engines/null.c engines/net.c \
- engines/ftruncate.c engines/filecreate.c engines/filestat.c \
+ engines/ftruncate.c engines/filecreate.c engines/filestat.c engines/filedelete.c \
server.c client.c iolog.c backend.c libfio.c flow.c cconv.c \
gettime-thread.c helpers.c json.c idletime.c td_error.c \
profiles/tiobench.c profiles/act.c io_u_queue.c filelock.c \
@@ -79,6 +79,12 @@ ifdef CONFIG_LIBNBD
ENGINES += nbd
endif
+ifdef CONFIG_LIBNFS
+ CFLAGS += $(LIBNFS_CFLAGS)
+ LIBS += $(LIBNFS_LIBS)
+ SOURCE += engines/nfs.c
+endif
+
ifdef CONFIG_64BIT
CPPFLAGS += -DBITS_PER_LONG=64
else ifdef CONFIG_32BIT
diff --git a/backend.c b/backend.c
index 52b4ca7e..6290e0d6 100644
--- a/backend.c
+++ b/backend.c
@@ -393,7 +393,7 @@ static bool break_on_this_error(struct thread_data *td, enum fio_ddir ddir,
td_clear_error(td);
*retptr = 0;
return false;
- } else if (td->o.fill_device && err == ENOSPC) {
+ } else if (td->o.fill_device && (err == ENOSPC || err == EDQUOT)) {
/*
* We expect to hit this error if
* fill_device option is set.
@@ -1105,7 +1105,7 @@ reap:
if (td->trim_entries)
log_err("fio: %lu trim entries leaked?\n", td->trim_entries);
- if (td->o.fill_device && td->error == ENOSPC) {
+ if (td->o.fill_device && (td->error == ENOSPC || td->error == EDQUOT)) {
td->error = 0;
fio_mark_td_terminate(td);
}
@@ -1120,7 +1120,8 @@ reap:
if (i) {
ret = io_u_queued_complete(td, i);
- if (td->o.fill_device && td->error == ENOSPC)
+ if (td->o.fill_device &&
+ (td->error == ENOSPC || td->error == EDQUOT))
td->error = 0;
}
@@ -1341,22 +1342,19 @@ int init_io_u_buffers(struct thread_data *td)
return 0;
}
+#ifdef FIO_HAVE_IOSCHED_SWITCH
/*
- * This function is Linux specific.
+ * These functions are Linux specific.
* FIO_HAVE_IOSCHED_SWITCH enabled currently means it's Linux.
*/
-static int switch_ioscheduler(struct thread_data *td)
+static int set_ioscheduler(struct thread_data *td, struct fio_file *file)
{
-#ifdef FIO_HAVE_IOSCHED_SWITCH
char tmp[256], tmp2[128], *p;
FILE *f;
int ret;
- if (td_ioengine_flagged(td, FIO_DISKLESSIO))
- return 0;
-
- assert(td->files && td->files[0]);
- sprintf(tmp, "%s/queue/scheduler", td->files[0]->du->sysfs_root);
+ assert(file->du && file->du->sysfs_root);
+ sprintf(tmp, "%s/queue/scheduler", file->du->sysfs_root);
f = fopen(tmp, "r+");
if (!f) {
@@ -1417,11 +1415,55 @@ static int switch_ioscheduler(struct thread_data *td)
fclose(f);
return 0;
+}
+
+static int switch_ioscheduler(struct thread_data *td)
+{
+ struct fio_file *f;
+ unsigned int i;
+ int ret = 0;
+
+ if (td_ioengine_flagged(td, FIO_DISKLESSIO))
+ return 0;
+
+ assert(td->files && td->files[0]);
+
+ for_each_file(td, f, i) {
+
+ /* Only consider regular files and block device files */
+ switch (f->filetype) {
+ case FIO_TYPE_FILE:
+ case FIO_TYPE_BLOCK:
+ /*
+ * Make sure that the device hosting the file could
+ * be determined.
+ */
+ if (!f->du)
+ continue;
+ break;
+ case FIO_TYPE_CHAR:
+ case FIO_TYPE_PIPE:
+ default:
+ continue;
+ }
+
+ ret = set_ioscheduler(td, f);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
#else
+
+static int switch_ioscheduler(struct thread_data *td)
+{
return 0;
-#endif
}
+#endif /* FIO_HAVE_IOSCHED_SWITCH */
+
static bool keep_running(struct thread_data *td)
{
unsigned long long limit;
diff --git a/configure b/configure
index a7d82be0..8b763700 100755
--- a/configure
+++ b/configure
@@ -142,7 +142,7 @@ check_min_lib_version() {
fi
: "${_feature:=${1}}"
if "${cross_prefix}"pkg-config --version > /dev/null 2>&1; then
- if eval "echo \$$_feature" = "yes" ; then
+ if test "$(eval echo \"\$$_feature\")" = "yes" ; then
feature_not_found "$_feature" "$1 >= $2"
fi
else
@@ -170,6 +170,7 @@ disable_native="no"
march_set="no"
libiscsi="no"
libnbd="no"
+libnfs="no"
libzbc=""
dfs=""
dynamic_engines="no"
@@ -241,6 +242,8 @@ for opt do
;;
--disable-tcmalloc) disable_tcmalloc="yes"
;;
+ --disable-nfs) disable_nfs="yes"
+ ;;
--dynamic-libengines) dynamic_engines="yes"
;;
--disable-dfs) dfs="no"
@@ -271,8 +274,10 @@ if test "$show_help" = "yes" ; then
echo "--disable-rados Disable Rados support even if found"
echo "--disable-rbd Disable Rados Block Device even if found"
echo "--disable-http Disable HTTP support even if found"
+ echo "--disable-nfs Disable userspace NFS support even if found"
echo "--disable-gfapi Disable gfapi"
echo "--enable-libhdfs Enable hdfs support"
+ echo "--enable-libnfs Enable nfs support"
echo "--disable-lex Disable use of lex/yacc for math"
echo "--disable-pmem Disable pmem based engines even if found"
echo "--enable-lex Enable use of lex/yacc for math"
@@ -2278,6 +2283,21 @@ fi
print_config "DAOS File System (dfs) Engine" "$dfs"
##########################################
+# Check if we have libnfs (for userspace nfs support).
+if test "$disable_nfs" != "yes"; then
+ if $(pkg-config libnfs); then
+ libnfs="yes"
+ libnfs_cflags=$(pkg-config --cflags libnfs)
+ libnfs_libs=$(pkg-config --libs libnfs)
+ else
+ if test "$libnfs" = "yes" ; then
+ echo "libnfs" "Install libnfs"
+ fi
+ fi
+fi
+print_config "NFS engine" "$libnfs"
+
+##########################################
# Check if we have lex/yacc available
yacc="no"
yacc_is_bison="no"
@@ -3101,6 +3121,9 @@ fi
if test "$dfs" = "yes" ; then
output_sym "CONFIG_DFS"
fi
+if test "$libnfs" = "yes" ; then
+ output_sym "CONFIG_NFS"
+fi
if test "$march_set" = "no" && test "$build_native" = "yes" ; then
output_sym "CONFIG_BUILD_NATIVE"
fi
@@ -3140,6 +3163,12 @@ if test "$libnbd" = "yes" ; then
echo "LIBNBD_CFLAGS=$libnbd_cflags" >> $config_host_mak
echo "LIBNBD_LIBS=$libnbd_libs" >> $config_host_mak
fi
+if test "$libnfs" = "yes" ; then
+ output_sym "CONFIG_LIBNFS"
+ echo "CONFIG_LIBNFS=m" >> $config_host_mak
+ echo "LIBNFS_CFLAGS=$libnfs_cflags" >> $config_host_mak
+ echo "LIBNFS_LIBS=$libnfs_libs" >> $config_host_mak
+fi
if test "$dynamic_engines" = "yes" ; then
output_sym "CONFIG_DYNAMIC_ENGINES"
fi
diff --git a/engines/filedelete.c b/engines/filedelete.c
new file mode 100644
index 00000000..64c58639
--- /dev/null
+++ b/engines/filedelete.c
@@ -0,0 +1,115 @@
+/*
+ * file delete engine
+ *
+ * IO engine that doesn't do any IO, just delete files and track the latency
+ * of the file deletion.
+ */
+#include <stdio.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include "../fio.h"
+
+struct fc_data {
+ enum fio_ddir stat_ddir;
+};
+
+static int delete_file(struct thread_data *td, struct fio_file *f)
+{
+ struct timespec start;
+ int do_lat = !td->o.disable_lat;
+ int ret;
+
+ dprint(FD_FILE, "fd delete %s\n", f->file_name);
+
+ if (f->filetype != FIO_TYPE_FILE) {
+ log_err("fio: only files are supported\n");
+ return 1;
+ }
+ if (!strcmp(f->file_name, "-")) {
+ log_err("fio: can't read/write to stdin/out\n");
+ return 1;
+ }
+
+ if (do_lat)
+ fio_gettime(&start, NULL);
+
+ ret = unlink(f->file_name);
+
+ if (ret == -1) {
+ char buf[FIO_VERROR_SIZE];
+ int e = errno;
+
+ snprintf(buf, sizeof(buf), "delete(%s)", f->file_name);
+ td_verror(td, e, buf);
+ return 1;
+ }
+
+ if (do_lat) {
+ struct fc_data *data = td->io_ops_data;
+ uint64_t nsec;
+
+ nsec = ntime_since_now(&start);
+ add_clat_sample(td, data->stat_ddir, nsec, 0, 0, 0);
+ }
+
+ return 0;
+}
+
+
+static enum fio_q_status queue_io(struct thread_data *td, struct io_u fio_unused *io_u)
+{
+ return FIO_Q_COMPLETED;
+}
+
+static int init(struct thread_data *td)
+{
+ struct fc_data *data;
+
+ data = calloc(1, sizeof(*data));
+
+ if (td_read(td))
+ data->stat_ddir = DDIR_READ;
+ else if (td_write(td))
+ data->stat_ddir = DDIR_WRITE;
+
+ td->io_ops_data = data;
+ return 0;
+}
+
+static int delete_invalidate(struct thread_data *td, struct fio_file *f)
+{
+ /* do nothing because file not opened */
+ return 0;
+}
+
+static void cleanup(struct thread_data *td)
+{
+ struct fc_data *data = td->io_ops_data;
+
+ free(data);
+}
+
+static struct ioengine_ops ioengine = {
+ .name = "filedelete",
+ .version = FIO_IOOPS_VERSION,
+ .init = init,
+ .invalidate = delete_invalidate,
+ .cleanup = cleanup,
+ .queue = queue_io,
+ .get_file_size = generic_get_file_size,
+ .open_file = delete_file,
+ .flags = FIO_SYNCIO | FIO_FAKEIO |
+ FIO_NOSTATS | FIO_NOFILEHASH,
+};
+
+static void fio_init fio_filedelete_register(void)
+{
+ register_ioengine(&ioengine);
+}
+
+static void fio_exit fio_filedelete_unregister(void)
+{
+ unregister_ioengine(&ioengine);
+}
diff --git a/engines/librpma_fio.c b/engines/librpma_fio.c
index 810b55e2..3d605ed6 100644
--- a/engines/librpma_fio.c
+++ b/engines/librpma_fio.c
@@ -50,6 +50,17 @@ struct fio_option librpma_fio_options[] = {
.group = FIO_OPT_G_LIBRPMA,
},
{
+ .name = "busy_wait_polling",
+ .lname = "Set to 0 to wait for completion instead of busy-wait polling completion.",
+ .type = FIO_OPT_BOOL,
+ .off1 = offsetof(struct librpma_fio_options_values,
+ busy_wait_polling),
+ .help = "Set to false if you want to reduce CPU usage",
+ .def = "1",
+ .category = FIO_OPT_C_ENGINE,
+ .group = FIO_OPT_G_LIBRPMA,
+ },
+ {
.name = NULL,
},
};
diff --git a/engines/librpma_fio.h b/engines/librpma_fio.h
index 8cfb2e2d..fb89d99d 100644
--- a/engines/librpma_fio.h
+++ b/engines/librpma_fio.h
@@ -41,6 +41,8 @@ struct librpma_fio_options_values {
char *port;
/* Direct Write to PMem is possible */
unsigned int direct_write_to_pmem;
+ /* Set to 0 to wait for completion instead of busy-wait polling completion. */
+ unsigned int busy_wait_polling;
};
extern struct fio_option librpma_fio_options[];
diff --git a/engines/librpma_gpspm.c b/engines/librpma_gpspm.c
index ac614f46..74147709 100644
--- a/engines/librpma_gpspm.c
+++ b/engines/librpma_gpspm.c
@@ -683,12 +683,33 @@ static int server_cmpl_process(struct thread_data *td)
struct librpma_fio_server_data *csd = td->io_ops_data;
struct server_data *sd = csd->server_data;
struct rpma_completion *cmpl = &sd->msgs_queued[sd->msg_queued_nr];
+ struct librpma_fio_options_values *o = td->eo;
int ret;
ret = rpma_conn_completion_get(csd->conn, cmpl);
if (ret == RPMA_E_NO_COMPLETION) {
- /* lack of completion is not an error */
- return 0;
+ if (o->busy_wait_polling == 0) {
+ ret = rpma_conn_completion_wait(csd->conn);
+ if (ret == RPMA_E_NO_COMPLETION) {
+ /* lack of completion is not an error */
+ return 0;
+ } else if (ret != 0) {
+ librpma_td_verror(td, ret, "rpma_conn_completion_wait");
+ goto err_terminate;
+ }
+
+ ret = rpma_conn_completion_get(csd->conn, cmpl);
+ if (ret == RPMA_E_NO_COMPLETION) {
+ /* lack of completion is not an error */
+ return 0;
+ } else if (ret != 0) {
+ librpma_td_verror(td, ret, "rpma_conn_completion_get");
+ goto err_terminate;
+ }
+ } else {
+ /* lack of completion is not an error */
+ return 0;
+ }
} else if (ret != 0) {
librpma_td_verror(td, ret, "rpma_conn_completion_get");
goto err_terminate;
diff --git a/engines/libzbc.c b/engines/libzbc.c
index 2aacf7bb..3dde93db 100644
--- a/engines/libzbc.c
+++ b/engines/libzbc.c
@@ -19,6 +19,7 @@ struct libzbc_data {
struct zbc_device *zdev;
enum zbc_dev_model model;
uint64_t nr_sectors;
+ uint32_t max_open_seq_req;
};
static int libzbc_get_dev_info(struct libzbc_data *ld, struct fio_file *f)
@@ -32,6 +33,7 @@ static int libzbc_get_dev_info(struct libzbc_data *ld, struct fio_file *f)
zbc_get_device_info(ld->zdev, zinfo);
ld->model = zinfo->zbd_model;
ld->nr_sectors = zinfo->zbd_sectors;
+ ld->max_open_seq_req = zinfo->zbd_max_nr_open_seq_req;
dprint(FD_ZBD, "%s: vendor_id:%s, type: %s, model: %s\n",
f->file_name, zinfo->zbd_vendor_id,
@@ -335,6 +337,24 @@ err:
return -ret;
}
+static int libzbc_get_max_open_zones(struct thread_data *td, struct fio_file *f,
+ unsigned int *max_open_zones)
+{
+ struct libzbc_data *ld;
+ int ret;
+
+ ret = libzbc_open_dev(td, f, &ld);
+ if (ret)
+ return ret;
+
+ if (ld->max_open_seq_req == ZBC_NO_LIMIT)
+ *max_open_zones = 0;
+ else
+ *max_open_zones = ld->max_open_seq_req;
+
+ return 0;
+}
+
ssize_t libzbc_rw(struct thread_data *td, struct io_u *io_u)
{
struct libzbc_data *ld = td->io_ops_data;
@@ -414,6 +434,7 @@ FIO_STATIC struct ioengine_ops ioengine = {
.get_zoned_model = libzbc_get_zoned_model,
.report_zones = libzbc_report_zones,
.reset_wp = libzbc_reset_wp,
+ .get_max_open_zones = libzbc_get_max_open_zones,
.queue = libzbc_queue,
.flags = FIO_SYNCIO | FIO_NOEXTEND | FIO_RAWIO,
};
diff --git a/engines/nfs.c b/engines/nfs.c
new file mode 100644
index 00000000..21be8833
--- /dev/null
+++ b/engines/nfs.c
@@ -0,0 +1,314 @@
+#include <stdlib.h>
+#include <poll.h>
+#include <nfsc/libnfs.h>
+#include <nfsc/libnfs-raw.h>
+#include <nfsc/libnfs-raw-mount.h>
+
+#include "../fio.h"
+#include "../optgroup.h"
+
+enum nfs_op_type {
+ NFS_READ_WRITE = 0,
+ NFS_STAT_MKDIR_RMDIR,
+ NFS_STAT_TOUCH_RM,
+};
+
+struct fio_libnfs_options {
+ struct nfs_context *context;
+ char *nfs_url;
+ unsigned int queue_depth; /* nfs_callback needs this info, but doesn't have fio td structure to pull it from */
+ /* the following implement a circular queue of outstanding IOs */
+ int outstanding_events; /* IOs issued to libnfs, that have not returned yet */
+ int prev_requested_event_index; /* event last returned via fio_libnfs_event */
+ int next_buffered_event; /* round robin-pointer within events[] */
+ int buffered_event_count; /* IOs completed by libnfs, waiting for FIO */
+ int free_event_buffer_index; /* next free buffer */
+ struct io_u**events;
+};
+
+struct nfs_data {
+ struct nfsfh *nfsfh;
+ struct fio_libnfs_options *options;
+};
+
+static struct fio_option options[] = {
+ {
+ .name = "nfs_url",
+ .lname = "nfs_url",
+ .type = FIO_OPT_STR_STORE,
+ .help = "URL in libnfs format, eg nfs://<server|ipv4|ipv6>/path[?arg=val[&arg=val]*]",
+ .off1 = offsetof(struct fio_libnfs_options, nfs_url),
+ .category = FIO_OPT_C_ENGINE,
+ .group = __FIO_OPT_G_NFS,
+ },
+ {
+ .name = NULL,
+ },
+};
+
+static struct io_u *fio_libnfs_event(struct thread_data *td, int event)
+{
+ struct fio_libnfs_options *o = td->eo;
+ struct io_u *io_u = o->events[o->next_buffered_event];
+ assert(o->events[o->next_buffered_event]);
+ o->events[o->next_buffered_event] = NULL;
+ o->next_buffered_event = (o->next_buffered_event + 1) % td->o.iodepth;
+ /* validate our state machine */
+ assert(o->buffered_event_count);
+ o->buffered_event_count--;
+ assert(io_u);
+ /* assert that fio_libnfs_event is being called in sequential fashion */
+ assert(event == 0 || o->prev_requested_event_index + 1 == event);
+ if (o->buffered_event_count == 0) {
+ o->prev_requested_event_index = -1;
+ } else {
+ o->prev_requested_event_index = event;
+ }
+ return io_u;
+}
+
+static int nfs_event_loop(struct thread_data *td, bool flush) {
+ struct fio_libnfs_options *o = td->eo;
+ struct pollfd pfds[1]; /* nfs:0 */
+ /* we already have stuff queued for fio, no need to waste cpu on poll() */
+ if (o->buffered_event_count)
+ return o->buffered_event_count;
+ /* fio core logic seems to stop calling this event-loop if we ever return with 0 events */
+ #define SHOULD_WAIT() (o->outstanding_events == td->o.iodepth || (flush && o->outstanding_events))
+
+ do {
+ int timeout = SHOULD_WAIT() ? -1 : 0;
+ int ret = 0;
+ pfds[0].fd = nfs_get_fd(o->context);
+ pfds[0].events = nfs_which_events(o->context);
+ ret = poll(&pfds[0], 1, timeout);
+ if (ret < 0) {
+ if (errno == EINTR || errno == EAGAIN) {
+ continue;
+ }
+ log_err("nfs: failed to poll events: %s.\n",
+ strerror(errno));
+ break;
+ }
+
+ ret = nfs_service(o->context, pfds[0].revents);
+ if (ret < 0) {
+ log_err("nfs: socket is in an unrecoverable error state.\n");
+ break;
+ }
+ } while (SHOULD_WAIT());
+ return o->buffered_event_count;
+#undef SHOULD_WAIT
+}
+
+static int fio_libnfs_getevents(struct thread_data *td, unsigned int min,
+ unsigned int max, const struct timespec *t)
+{
+ return nfs_event_loop(td, false);
+}
+
+static void nfs_callback(int res, struct nfs_context *nfs, void *data,
+ void *private_data)
+{
+ struct io_u *io_u = private_data;
+ struct nfs_data *nfs_data = io_u->file->engine_data;
+ struct fio_libnfs_options *o = nfs_data->options;
+ if (res < 0) {
+ log_err("Failed NFS operation(code:%d): %s\n", res, nfs_get_error(o->context));
+ io_u->error = -res;
+ /* res is used for read math below, don't wanna pass negative there */
+ res = 0;
+ } else if (io_u->ddir == DDIR_READ) {
+ memcpy(io_u->buf, data, res);
+ if (res == 0)
+ log_err("Got NFS EOF, this is probably not expected\n");
+ }
+ /* fio uses resid to track remaining data */
+ io_u->resid = io_u->xfer_buflen - res;
+
+ assert(!o->events[o->free_event_buffer_index]);
+ o->events[o->free_event_buffer_index] = io_u;
+ o->free_event_buffer_index = (o->free_event_buffer_index + 1) % o->queue_depth;
+ o->outstanding_events--;
+ o->buffered_event_count++;
+}
+
+static int queue_write(struct fio_libnfs_options *o, struct io_u *io_u) {
+ struct nfs_data *nfs_data = io_u->engine_data;
+ return nfs_pwrite_async(o->context, nfs_data->nfsfh,
+ io_u->offset, io_u->buflen, io_u->buf, nfs_callback,
+ io_u);
+}
+
+static int queue_read(struct fio_libnfs_options *o, struct io_u *io_u) {
+ struct nfs_data *nfs_data = io_u->engine_data;
+ return nfs_pread_async(o->context, nfs_data->nfsfh, io_u->offset, io_u->buflen, nfs_callback, io_u);
+}
+
+static enum fio_q_status fio_libnfs_queue(struct thread_data *td,
+ struct io_u *io_u)
+{
+ struct nfs_data *nfs_data = io_u->file->engine_data;
+ struct fio_libnfs_options *o = nfs_data->options;
+ struct nfs_context *nfs = o->context;
+ int err;
+ enum fio_q_status ret = FIO_Q_QUEUED;
+
+ io_u->engine_data = nfs_data;
+ switch(io_u->ddir) {
+ case DDIR_WRITE:
+ err = queue_write(o, io_u);
+ break;
+ case DDIR_READ:
+ err = queue_read(o, io_u);
+ break;
+ case DDIR_TRIM:
+ log_err("nfs: trim is not supported");
+ err = -1;
+ break;
+ default:
+ log_err("nfs: unhandled io %d\n", io_u->ddir);
+ err = -1;
+ }
+ if (err) {
+ log_err("nfs: Failed to queue nfs op: %s\n", nfs_get_error(nfs));
+ td->error = 1;
+ return FIO_Q_COMPLETED;
+ }
+ o->outstanding_events++;
+ return ret;
+}
+
+/*
+ * Do a mount if one has not been done before
+ */
+static int do_mount(struct thread_data *td, const char *url)
+{
+ size_t event_size = sizeof(struct io_u **) * td->o.iodepth;
+ struct fio_libnfs_options *options = td->eo;
+ struct nfs_url *nfs_url = NULL;
+ int ret = 0;
+ int path_len = 0;
+ char *mnt_dir = NULL;
+
+ if (options->context)
+ return 0;
+
+ options->context = nfs_init_context();
+ if (options->context == NULL) {
+ log_err("nfs: failed to init nfs context\n");
+ return -1;
+ }
+
+ options->events = malloc(event_size);
+ memset(options->events, 0, event_size);
+
+ options->prev_requested_event_index = -1;
+ options->queue_depth = td->o.iodepth;
+
+ nfs_url = nfs_parse_url_full(options->context, url);
+ path_len = strlen(nfs_url->path);
+ mnt_dir = malloc(path_len + strlen(nfs_url->file) + 1);
+ strcpy(mnt_dir, nfs_url->path);
+ strcpy(mnt_dir + strlen(nfs_url->path), nfs_url->file);
+ ret = nfs_mount(options->context, nfs_url->server, mnt_dir);
+ free(mnt_dir);
+ nfs_destroy_url(nfs_url);
+ return ret;
+}
+
+static int fio_libnfs_setup(struct thread_data *td)
+{
+ /* Using threads with libnfs causes fio to hang on exit, lower performance */
+ td->o.use_thread = 0;
+ return 0;
+}
+
+static void fio_libnfs_cleanup(struct thread_data *td)
+{
+ struct fio_libnfs_options *o = td->eo;
+ nfs_umount(o->context);
+ nfs_destroy_context(o->context);
+ free(o->events);
+}
+
+static int fio_libnfs_open(struct thread_data *td, struct fio_file *f)
+{
+ int ret;
+ struct fio_libnfs_options *options = td->eo;
+ struct nfs_data *nfs_data = NULL;
+ int flags = 0;
+
+ if (!options->nfs_url) {
+ log_err("nfs: nfs_url is a required parameter\n");
+ return -1;
+ }
+
+ ret = do_mount(td, options->nfs_url);
+
+ if (ret != 0) {
+ log_err("nfs: Failed to mount %s with code %d: %s\n", options->nfs_url, ret, nfs_get_error(options->context));
+ return ret;
+ }
+ nfs_data = malloc(sizeof(struct nfs_data));
+ memset(nfs_data, 0, sizeof(struct nfs_data));
+ nfs_data->options = options;
+
+ if (td->o.td_ddir == TD_DDIR_WRITE) {
+ flags |= O_CREAT | O_RDWR;
+ } else {
+ flags |= O_RDWR;
+ }
+ ret = nfs_open(options->context, f->file_name, flags, &nfs_data->nfsfh);
+
+ if (ret != 0)
+ log_err("Failed to open %s: %s\n", f->file_name, nfs_get_error(options->context));
+ f->engine_data = nfs_data;
+ return ret;
+}
+
+static int fio_libnfs_close(struct thread_data *td, struct fio_file *f)
+{
+ struct nfs_data *nfs_data = f->engine_data;
+ struct fio_libnfs_options *o = nfs_data->options;
+ int ret = 0;
+ if (nfs_data->nfsfh)
+ ret = nfs_close(o->context, nfs_data->nfsfh);
+ free(nfs_data);
+ f->engine_data = NULL;
+ return ret;
+}
+
+/*
+ * Hook for writing out outstanding data.
+ */
+static int fio_libnfs_commit(struct thread_data *td) {
+ nfs_event_loop(td, true);
+ return 0;
+}
+
+struct ioengine_ops ioengine = {
+ .name = "nfs",
+ .version = FIO_IOOPS_VERSION,
+ .setup = fio_libnfs_setup,
+ .queue = fio_libnfs_queue,
+ .getevents = fio_libnfs_getevents,
+ .event = fio_libnfs_event,
+ .cleanup = fio_libnfs_cleanup,
+ .open_file = fio_libnfs_open,
+ .close_file = fio_libnfs_close,
+ .commit = fio_libnfs_commit,
+ .flags = FIO_DISKLESSIO | FIO_NOEXTEND | FIO_NODISKUTIL,
+ .options = options,
+ .option_struct_size = sizeof(struct fio_libnfs_options),
+};
+
+static void fio_init fio_nfs_register(void)
+{
+ register_ioengine(&ioengine);
+}
+
+static void fio_exit fio_nfs_unregister(void)
+{
+ unregister_ioengine(&ioengine);
+}
diff --git a/engines/skeleton_external.c b/engines/skeleton_external.c
index 7f3e4cb3..c79b6f11 100644
--- a/engines/skeleton_external.c
+++ b/engines/skeleton_external.c
@@ -194,6 +194,18 @@ static int fio_skeleton_reset_wp(struct thread_data *td, struct fio_file *f,
}
/*
+ * Hook called for getting the maximum number of open zones for a
+ * ZBD_HOST_MANAGED zoned block device.
+ * A @max_open_zones value set to zero means no limit.
+ */
+static int fio_skeleton_get_max_open_zones(struct thread_data *td,
+ struct fio_file *f,
+ unsigned int *max_open_zones)
+{
+ return 0;
+}
+
+/*
* Note that the structure is exported, so that fio can get it via
* dlsym(..., "ioengine"); for (and only for) external engines.
*/
@@ -212,6 +224,7 @@ struct ioengine_ops ioengine = {
.get_zoned_model = fio_skeleton_get_zoned_model,
.report_zones = fio_skeleton_report_zones,
.reset_wp = fio_skeleton_reset_wp,
+ .get_max_open_zones = fio_skeleton_get_max_open_zones,
.options = options,
.option_struct_size = sizeof(struct fio_skeleton_options),
};
diff --git a/examples/filedelete-ioengine.fio b/examples/filedelete-ioengine.fio
new file mode 100644
index 00000000..3c0028f9
--- /dev/null
+++ b/examples/filedelete-ioengine.fio
@@ -0,0 +1,18 @@
+# Example filedelete job
+
+# 'filedelete' engine only do 'unlink(filename)', file will not be open().
+# 'filesize' must be set, then files will be created at setup stage.
+# 'unlink' is better set to 0, since the file is deleted in measurement.
+# the options disabled completion latency output such as 'disable_clat' and 'gtod_reduce' must not set.
+[global]
+ioengine=filedelete
+filesize=4k
+nrfiles=200
+unlink=0
+
+[t0]
+[t1]
+[t2]
+[t3]
+[t4]
+[t5]
diff --git a/examples/librpma_gpspm-server.fio b/examples/librpma_gpspm-server.fio
index d618f2db..67e92a28 100644
--- a/examples/librpma_gpspm-server.fio
+++ b/examples/librpma_gpspm-server.fio
@@ -20,6 +20,8 @@ thread
# set to 1 (true) ONLY when Direct Write to PMem from the remote host is possible
# (https://pmem.io/rpma/documentation/basic-direct-write-to-pmem.html)
direct_write_to_pmem=0
+# set to 0 (false) to wait for completion instead of busy-wait polling completion.
+busy_wait_polling=1
numjobs=1 # number of expected incomming connections
iodepth=2 # number of parallel GPSPM requests
size=100MiB # size of workspace for a single connection
diff --git a/examples/nfs.fio b/examples/nfs.fio
new file mode 100644
index 00000000..f856cebf
--- /dev/null
+++ b/examples/nfs.fio
@@ -0,0 +1,22 @@
+[global]
+nfs_url=nfs://127.0.0.1/nfs
+blocksize=524288
+iodepth=10
+ioengine=nfs
+size=104857600
+lat_percentiles=1
+group_reporting
+numjobs=10
+ramp_time=5s
+filename_format=myfiles.$clientuid.$jobnum.$filenum
+time_based=1
+
+[write]
+rw=write
+runtime=10s
+stonewall
+
+[read]
+wait_for=write
+rw=randread
+runtime=10s
diff --git a/filesetup.c b/filesetup.c
index e664f8b4..296de5a1 100644
--- a/filesetup.c
+++ b/filesetup.c
@@ -226,11 +226,16 @@ static int extend_file(struct thread_data *td, struct fio_file *f)
if (r < 0) {
int __e = errno;
- if (__e == ENOSPC) {
+ if (__e == ENOSPC || __e == EDQUOT) {
+ const char *__e_name;
if (td->o.fill_device)
break;
- log_info("fio: ENOSPC on laying out "
- "file, stopping\n");
+ if (__e == ENOSPC)
+ __e_name = "ENOSPC";
+ else
+ __e_name = "EDQUOT";
+ log_info("fio: %s on laying out "
+ "file, stopping\n", __e_name);
}
td_verror(td, errno, "write");
} else
diff --git a/fio.1 b/fio.1
index f959e00d..ab08cb01 100644
--- a/fio.1
+++ b/fio.1
@@ -288,6 +288,15 @@ Pi means pebi (Pi) or 1024**5
.PD
.RE
.P
+For Zone Block Device Mode:
+.RS
+.P
+.PD 0
+z means Zone
+.P
+.PD
+.RE
+.P
With `kb_base=1024' (the default), the unit prefixes are opposite
from those specified in the SI and IEC 80000-13 standards to provide
compatibility with old scripts. For example, 4k means 4096.
@@ -690,7 +699,8 @@ of how that would work.
.TP
.BI ioscheduler \fR=\fPstr
Attempt to switch the device hosting the file to the specified I/O scheduler
-before running.
+before running. If the file is a pipe, a character device file or if device
+hosting the file could not be determined, this option is ignored.
.TP
.BI create_serialize \fR=\fPbool
If true, serialize the file creation for the jobs. This may be handy to
@@ -1060,13 +1070,14 @@ should be associated with them.
.TP
.BI offset \fR=\fPint[%|z]
Start I/O at the provided offset in the file, given as either a fixed size in
-bytes or a percentage. If a percentage is given, the generated offset will be
+bytes, zones or a percentage. If a percentage is given, the generated offset will be
aligned to the minimum \fBblocksize\fR or to the value of \fBoffset_align\fR if
provided. Data before the given offset will not be touched. This
effectively caps the file size at `real_size \- offset'. Can be combined with
\fBsize\fR to constrain the start and end range of the I/O workload.
A percentage can be specified by a number between 1 and 100 followed by '%',
-for example, `offset=20%' to specify 20%.
+for example, `offset=20%' to specify 20%. In ZBD mode, value can be set as
+number of zones using 'z'.
.TP
.BI offset_align \fR=\fPint
If set to non-zero value, the byte offset generated by a percentage \fBoffset\fR
@@ -1081,7 +1092,8 @@ specified). This option is useful if there are several jobs which are
intended to operate on a file in parallel disjoint segments, with even
spacing between the starting points. Percentages can be used for this option.
If a percentage is given, the generated offset will be aligned to the minimum
-\fBblocksize\fR or to the value of \fBoffset_align\fR if provided.
+\fBblocksize\fR or to the value of \fBoffset_align\fR if provided.In ZBD mode, value
+can be set as number of zones using 'z'.
.TP
.BI number_ios \fR=\fPint
Fio will normally perform I/Os until it has exhausted the size of the region
@@ -1606,9 +1618,9 @@ set to the physical size of the given files or devices if they exist.
If this option is not specified, fio will use the full size of the given
files or devices. If the files do not exist, size must be given. It is also
possible to give size as a percentage between 1 and 100. If `size=20%' is
-given, fio will use 20% of the full size of the given files or devices.
-Can be combined with \fBoffset\fR to constrain the start and end range
-that I/O will be done within.
+given, fio will use 20% of the full size of the given files or devices. In ZBD mode,
+size can be given in units of number of zones using 'z'. Can be combined with \fBoffset\fR to
+constrain the start and end range that I/O will be done within.
.TP
.BI io_size \fR=\fPint[%|z] "\fR,\fB io_limit" \fR=\fPint[%|z]
Normally fio operates within the region set by \fBsize\fR, which means
@@ -1620,7 +1632,8 @@ will perform I/O within the first 20GiB but exit when 5GiB have been
done. The opposite is also possible \-\- if \fBsize\fR is set to 20GiB,
and \fBio_size\fR is set to 40GiB, then fio will do 40GiB of I/O within
the 0..20GiB region. Value can be set as percentage: \fBio_size\fR=N%.
-In this case \fBio_size\fR multiplies \fBsize\fR= value.
+In this case \fBio_size\fR multiplies \fBsize\fR= value. In ZBD mode, value can
+also be set as number of zones using 'z'.
.TP
.BI filesize \fR=\fPirange(int)
Individual file sizes. May be a range, in which case fio will select sizes
@@ -1637,7 +1650,8 @@ of a file. This option is ignored on non-regular files.
.TP
.BI fill_device \fR=\fPbool "\fR,\fB fill_fs" \fR=\fPbool
Sets size to something really large and waits for ENOSPC (no space left on
-device) as the terminating condition. Only makes sense with sequential
+device) or EDQUOT (disk quota exceeded)
+as the terminating condition. Only makes sense with sequential
write. For a read workload, the mount point will be filled first then I/O
started on the result. This option doesn't make sense if operating on a raw
device node, since the size of that is already known by the file system.
@@ -1847,6 +1861,11 @@ Simply do stat() and do no I/O to the file. You need to set 'filesize'
and 'nrfiles', so that files will be created.
This engine is to measure file lookup and meta data access.
.TP
+.B filedelete
+Simply delete files by unlink() and do no I/O to the file. You need to set 'filesize'
+and 'nrfiles', so that files will be created.
+This engine is to measure file delete.
+.TP
.B libpmem
Read and write using mmap I/O to a file on a filesystem
mounted with DAX on a persistent memory device through the PMDK
@@ -1882,6 +1901,12 @@ not be \fBcudamalloc\fR. This ioengine defines engine specific options.
.B dfs
I/O engine supporting asynchronous read and write operations to the DAOS File
System (DFS) via libdfs.
+.TP
+.B nfs
+I/O engine supporting asynchronous read and write operations to
+NFS filesystems from userspace via libnfs. This is useful for
+achieving higher concurrency and thus throughput than is possible
+via kernel NFS.
.SS "I/O engine specific parameters"
In addition, there are some parameters which are only valid when a specific
\fBioengine\fR is in use. These are used identically to normal parameters,
@@ -1993,6 +2018,10 @@ The IP address to be used for RDMA-CM based I/O.
.BI (librpma_*_server)direct_write_to_pmem \fR=\fPbool
Set to 1 only when Direct Write to PMem from the remote host is possible. Otherwise, set to 0.
.TP
+.BI (librpma_*_server)busy_wait_polling \fR=\fPbool
+Set to 0 to wait for completion instead of busy-wait polling completion.
+Default: 1.
+.TP
.BI (netsplice,net)interface \fR=\fPstr
The IP address of the network interface used to send or receive UDP
multicast.
@@ -2260,6 +2289,10 @@ Use DAOS container's chunk size by default.
.BI (dfs)object_class
Specificy a different object class for the dfs file.
Use DAOS container's object class by default.
+.TP
+.BI (nfs)nfs_url
+URL in libnfs format, eg nfs://<server|ipv4|ipv6>/path[?arg=val[&arg=val]*]
+Refer to the libnfs README for more details.
.SS "I/O depth"
.TP
.BI iodepth \fR=\fPint
diff --git a/gettime.c b/gettime.c
index e3f483a7..099e9d9f 100644
--- a/gettime.c
+++ b/gettime.c
@@ -679,7 +679,7 @@ int fio_monotonic_clocktest(int debug)
unsigned int i;
os_cpu_mask_t mask;
-#ifdef CONFIG_PTHREAD_GETAFFINITY
+#ifdef FIO_HAVE_GET_THREAD_AFFINITY
fio_get_thread_affinity(mask);
#else
memset(&mask, 0, sizeof(mask));
diff --git a/init.c b/init.c
index 37bff876..60c7cff4 100644
--- a/init.c
+++ b/init.c
@@ -633,6 +633,11 @@ static int fixup_options(struct thread_data *td)
ret |= 1;
}
+ if (o->zone_mode == ZONE_MODE_ZBD && !o->create_serialize) {
+ log_err("fio: --zonemode=zbd and --create_serialize=0 are not compatible.\n");
+ ret |= 1;
+ }
+
if (o->zone_mode == ZONE_MODE_STRIDED && !o->zone_size) {
log_err("fio: --zonesize must be specified when using --zonemode=strided.\n");
ret |= 1;
diff --git a/ioengines.c b/ioengines.c
index f88b0537..dd61af07 100644
--- a/ioengines.c
+++ b/ioengines.c
@@ -234,7 +234,6 @@ void free_ioengine(struct thread_data *td)
if (td->io_ops->dlhandle) {
dprint(FD_IO, "dlclose ioengine %s\n", td->io_ops->name);
dlclose(td->io_ops->dlhandle);
- td->io_ops->dlhandle = NULL;
}
td->io_ops = NULL;
@@ -414,7 +413,6 @@ enum fio_q_status td_io_queue(struct thread_data *td, struct io_u *io_u)
if (!td->io_ops->commit) {
io_u_mark_submit(td, 1);
io_u_mark_complete(td, 1);
- zbd_put_io_u(td, io_u);
}
if (ret == FIO_Q_COMPLETED) {
diff --git a/ioengines.h b/ioengines.h
index 1d01ab0a..b3f755b4 100644
--- a/ioengines.h
+++ b/ioengines.h
@@ -8,7 +8,7 @@
#include "io_u.h"
#include "zbd_types.h"
-#define FIO_IOOPS_VERSION 29
+#define FIO_IOOPS_VERSION 30
#ifndef CONFIG_DYNAMIC_ENGINES
#define FIO_STATIC static
@@ -59,6 +59,8 @@ struct ioengine_ops {
uint64_t, struct zbd_zone *, unsigned int);
int (*reset_wp)(struct thread_data *, struct fio_file *,
uint64_t, uint64_t);
+ int (*get_max_open_zones)(struct thread_data *, struct fio_file *,
+ unsigned int *);
int option_struct_size;
struct fio_option *options;
};
diff --git a/optgroup.c b/optgroup.c
index 15a16229..bebb4a51 100644
--- a/optgroup.c
+++ b/optgroup.c
@@ -186,6 +186,10 @@ static const struct opt_group fio_opt_cat_groups[] = {
.mask = FIO_OPT_G_DFS,
},
{
+ .name = "NFS I/O engine", /* nfs */
+ .mask = FIO_OPT_G_NFS,
+ },
+ {
.name = NULL,
},
};
diff --git a/optgroup.h b/optgroup.h
index ff748629..1fb84a29 100644
--- a/optgroup.h
+++ b/optgroup.h
@@ -70,6 +70,7 @@ enum opt_category_group {
__FIO_OPT_G_NR,
__FIO_OPT_G_LIBCUFILE,
__FIO_OPT_G_DFS,
+ __FIO_OPT_G_NFS,
FIO_OPT_G_RATE = (1ULL << __FIO_OPT_G_RATE),
FIO_OPT_G_ZONE = (1ULL << __FIO_OPT_G_ZONE),
@@ -110,6 +111,7 @@ enum opt_category_group {
FIO_OPT_G_INVALID = (1ULL << __FIO_OPT_G_NR),
FIO_OPT_G_ISCSI = (1ULL << __FIO_OPT_G_ISCSI),
FIO_OPT_G_NBD = (1ULL << __FIO_OPT_G_NBD),
+ FIO_OPT_G_NFS = (1ULL << __FIO_OPT_G_NFS),
FIO_OPT_G_IOURING = (1ULL << __FIO_OPT_G_IOURING),
FIO_OPT_G_FILESTAT = (1ULL << __FIO_OPT_G_FILESTAT),
FIO_OPT_G_LIBCUFILE = (1ULL << __FIO_OPT_G_LIBCUFILE),
diff --git a/options.c b/options.c
index ddabaa82..b82a10aa 100644
--- a/options.c
+++ b/options.c
@@ -2026,6 +2026,11 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
.help = "DAOS File System (dfs) IO engine",
},
#endif
+#ifdef CONFIG_NFS
+ { .ival = "nfs",
+ .help = "NFS IO engine",
+ },
+#endif
},
},
{
diff --git a/os/os-aix.h b/os/os-aix.h
index 1aab96e0..db99eef4 100644
--- a/os/os-aix.h
+++ b/os/os-aix.h
@@ -18,6 +18,12 @@
#define FIO_USE_GENERIC_SWAP
+#ifdef CONFIG_PTHREAD_GETAFFINITY
+#define FIO_HAVE_GET_THREAD_AFFINITY
+#define fio_get_thread_affinity(mask) \
+ pthread_getaffinity_np(pthread_self(), sizeof(mask), &(mask))
+#endif
+
static inline int blockdev_invalidate_cache(struct fio_file *f)
{
return ENOTSUP;
diff --git a/os/os-android.h b/os/os-android.h
index 3c050776..a81cd815 100644
--- a/os/os-android.h
+++ b/os/os-android.h
@@ -58,6 +58,12 @@
#define MAP_HUGETLB 0x40000 /* arch specific */
#endif
+#ifdef CONFIG_PTHREAD_GETAFFINITY
+#define FIO_HAVE_GET_THREAD_AFFINITY
+#define fio_get_thread_affinity(mask) \
+ pthread_getaffinity_np(pthread_self(), sizeof(mask), &(mask))
+#endif
+
#ifndef CONFIG_NO_SHM
/*
* Bionic doesn't support SysV shared memeory, so implement it using ashmem
@@ -65,11 +71,15 @@
#include <stdio.h>
#include <linux/ashmem.h>
#include <linux/shm.h>
+#include <android/api-level.h>
+#if __ANDROID_API__ >= __ANDROID_API_O__
+#include <android/sharedmem.h>
+#else
+#define ASHMEM_DEVICE "/dev/ashmem"
+#endif
#define shmid_ds shmid64_ds
#define SHM_HUGETLB 04000
-#define ASHMEM_DEVICE "/dev/ashmem"
-
static inline int shmctl(int __shmid, int __cmd, struct shmid_ds *__buf)
{
int ret=0;
@@ -83,6 +93,16 @@ static inline int shmctl(int __shmid, int __cmd, struct shmid_ds *__buf)
return ret;
}
+#if __ANDROID_API__ >= __ANDROID_API_O__
+static inline int shmget(key_t __key, size_t __size, int __shmflg)
+{
+ char keybuf[11];
+
+ sprintf(keybuf, "%d", __key);
+
+ return ASharedMemory_create(keybuf, __size + sizeof(uint64_t));
+}
+#else
static inline int shmget(key_t __key, size_t __size, int __shmflg)
{
int fd,ret;
@@ -108,6 +128,7 @@ error:
close(fd);
return ret;
}
+#endif
static inline void *shmat(int __shmid, const void *__shmaddr, int __shmflg)
{
diff --git a/os/os-dragonfly.h b/os/os-dragonfly.h
index 44bfcd5d..6e465894 100644
--- a/os/os-dragonfly.h
+++ b/os/os-dragonfly.h
@@ -92,6 +92,12 @@ typedef cpumask_t os_cpu_mask_t;
/* No CPU_COUNT(), but use the default function defined in os/os.h */
#define fio_cpu_count(mask) CPU_COUNT((mask))
+#ifdef CONFIG_PTHREAD_GETAFFINITY
+#define FIO_HAVE_GET_THREAD_AFFINITY
+#define fio_get_thread_affinity(mask) \
+ pthread_getaffinity_np(pthread_self(), sizeof(mask), &(mask))
+#endif
+
static inline int fio_cpuset_init(os_cpu_mask_t *mask)
{
CPUMASK_ASSZERO(*mask);
diff --git a/os/os-freebsd.h b/os/os-freebsd.h
index b3addf98..1b24fa02 100644
--- a/os/os-freebsd.h
+++ b/os/os-freebsd.h
@@ -37,6 +37,12 @@ typedef cpuset_t os_cpu_mask_t;
#define fio_cpu_isset(mask, cpu) (CPU_ISSET((cpu), (mask)) != 0)
#define fio_cpu_count(mask) CPU_COUNT((mask))
+#ifdef CONFIG_PTHREAD_GETAFFINITY
+#define FIO_HAVE_GET_THREAD_AFFINITY
+#define fio_get_thread_affinity(mask) \
+ pthread_getaffinity_np(pthread_self(), sizeof(mask), &(mask))
+#endif
+
static inline int fio_cpuset_init(os_cpu_mask_t *mask)
{
CPU_ZERO(mask);
diff --git a/os/os-hpux.h b/os/os-hpux.h
index c1dafe42..a80cb2bc 100644
--- a/os/os-hpux.h
+++ b/os/os-hpux.h
@@ -38,6 +38,13 @@
#define FIO_USE_GENERIC_SWAP
#define FIO_OS_HAVE_AIOCB_TYPEDEF
+
+#ifdef CONFIG_PTHREAD_GETAFFINITY
+#define FIO_HAVE_GET_THREAD_AFFINITY
+#define fio_get_thread_affinity(mask) \
+ pthread_getaffinity_np(pthread_self(), sizeof(mask), &(mask))
+#endif
+
typedef struct aiocb64 os_aiocb_t;
static inline int blockdev_invalidate_cache(struct fio_file *f)
diff --git a/os/os-linux.h b/os/os-linux.h
index ea8d7922..f7137abe 100644
--- a/os/os-linux.h
+++ b/os/os-linux.h
@@ -74,8 +74,11 @@ typedef cpu_set_t os_cpu_mask_t;
sched_getaffinity((pid), (ptr))
#endif
+#ifdef CONFIG_PTHREAD_GETAFFINITY
+#define FIO_HAVE_GET_THREAD_AFFINITY
#define fio_get_thread_affinity(mask) \
pthread_getaffinity_np(pthread_self(), sizeof(mask), &(mask))
+#endif
#define fio_cpu_clear(mask, cpu) (void) CPU_CLR((cpu), (mask))
#define fio_cpu_set(mask, cpu) (void) CPU_SET((cpu), (mask))
diff --git a/os/os-mac.h b/os/os-mac.h
index 683aab32..ec2cc1e5 100644
--- a/os/os-mac.h
+++ b/os/os-mac.h
@@ -27,6 +27,12 @@
#define fio_swap32(x) OSSwapInt32(x)
#define fio_swap64(x) OSSwapInt64(x)
+#ifdef CONFIG_PTHREAD_GETAFFINITY
+#define FIO_HAVE_GET_THREAD_AFFINITY
+#define fio_get_thread_affinity(mask) \
+ pthread_getaffinity_np(pthread_self(), sizeof(mask), &(mask))
+#endif
+
#ifndef CONFIG_CLOCKID_T
typedef unsigned int clockid_t;
#endif
diff --git a/os/os-netbsd.h b/os/os-netbsd.h
index abc1d3cb..624c7fa5 100644
--- a/os/os-netbsd.h
+++ b/os/os-netbsd.h
@@ -35,6 +35,12 @@
#define fio_swap32(x) bswap32(x)
#define fio_swap64(x) bswap64(x)
+#ifdef CONFIG_PTHREAD_GETAFFINITY
+#define FIO_HAVE_GET_THREAD_AFFINITY
+#define fio_get_thread_affinity(mask) \
+ pthread_getaffinity_np(pthread_self(), sizeof(mask), &(mask))
+#endif
+
static inline int blockdev_size(struct fio_file *f, unsigned long long *bytes)
{
struct disklabel dl;
diff --git a/os/os-openbsd.h b/os/os-openbsd.h
index 994bf078..f1bad671 100644
--- a/os/os-openbsd.h
+++ b/os/os-openbsd.h
@@ -35,6 +35,12 @@
#define fio_swap32(x) swap32(x)
#define fio_swap64(x) swap64(x)
+#ifdef CONFIG_PTHREAD_GETAFFINITY
+#define FIO_HAVE_GET_THREAD_AFFINITY
+#define fio_get_thread_affinity(mask) \
+ pthread_getaffinity_np(pthread_self(), sizeof(mask), &(mask))
+#endif
+
static inline int blockdev_size(struct fio_file *f, unsigned long long *bytes)
{
struct disklabel dl;
diff --git a/os/os-solaris.h b/os/os-solaris.h
index f1966f44..ea1f081c 100644
--- a/os/os-solaris.h
+++ b/os/os-solaris.h
@@ -46,6 +46,12 @@ struct solaris_rand_seed {
#define os_ctime_r(x, y, z) ctime_r((x), (y), (z))
#define FIO_OS_HAS_CTIME_R
+#ifdef CONFIG_PTHREAD_GETAFFINITY
+#define FIO_HAVE_GET_THREAD_AFFINITY
+#define fio_get_thread_affinity(mask) \
+ pthread_getaffinity_np(pthread_self(), sizeof(mask), &(mask))
+#endif
+
typedef psetid_t os_cpu_mask_t;
static inline int chardev_size(struct fio_file *f, unsigned long long *bytes)
diff --git a/os/os.h b/os/os.h
index b46f4164..e47d3d97 100644
--- a/os/os.h
+++ b/os/os.h
@@ -7,6 +7,7 @@
#include <pthread.h>
#include <unistd.h>
#include <stdlib.h>
+#include <errno.h>
#include "../arch/arch.h" /* IWYU pragma: export */
#include "../lib/types.h"
@@ -58,6 +59,10 @@ typedef enum {
#error "unsupported os"
#endif
+#ifndef EDQUOT
+#define EDQUOT EIO
+#endif
+
#ifdef CONFIG_POSIXAIO
#include <aio.h>
#ifndef FIO_OS_HAVE_AIOCB_TYPEDEF
diff --git a/os/windows/WixUI_Minimal_NoEULA.wxs b/os/windows/WixUI_Minimal_NoEULA.wxs
new file mode 100755
index 00000000..48391186
--- /dev/null
+++ b/os/windows/WixUI_Minimal_NoEULA.wxs
@@ -0,0 +1,96 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- Copyright (c) .NET Foundation and contributors. All rights reserved. Licensed under the Microsoft Reciprocal License. See LICENSE.TXT file in the project root for full license information. -->
+
+
+
+<!--
+First-time install dialog sequence:
+ - WixUI_MyWelcomeDlg
+Maintenance dialog sequence:
+ WixUI_MaintenanceWelcomeDlg
+ - WixUI_MaintenanceTypeDlg
+ - WixUI_VerifyReadyDlg
+-->
+
+<Wix xmlns="http://schemas.microsoft.com/wix/2006/wi">
+ <Fragment>
+ <UI Id="WixUI_Minimal_NoEULA">
+ <TextStyle Id="WixUI_Font_Normal" FaceName="Tahoma" Size="8" />
+ <TextStyle Id="WixUI_Font_Bigger" FaceName="Tahoma" Size="12" />
+ <TextStyle Id="WixUI_Font_Title" FaceName="Tahoma" Size="9" Bold="yes" />
+
+ <Property Id="DefaultUIFont" Value="WixUI_Font_Normal" />
+ <Property Id="WixUI_Mode" Value="Minimal" />
+
+ <DialogRef Id="ErrorDlg" />
+ <DialogRef Id="FatalError" />
+ <DialogRef Id="FilesInUse" />
+ <DialogRef Id="MsiRMFilesInUse" />
+ <DialogRef Id="PrepareDlg" />
+ <DialogRef Id="ProgressDlg" />
+ <DialogRef Id="ResumeDlg" />
+ <DialogRef Id="UserExit" />
+ <DialogRef Id="MyWelcomeDlg" />
+
+ <Dialog Id="MyWelcomeDlg" Width="370" Height="270" Title="!(loc.WelcomeDlg_Title)">
+ <Control Id="Install" Type="PushButton" ElevationShield="yes" X="236" Y="243" Width="56" Height="17" Default="yes" Hidden="yes" Text="!(loc.WelcomeEulaDlgInstall)" >
+ <Publish Property="WixUI_InstallMode" Value="Update">Installed AND PATCH</Publish>
+ <Publish Event="SpawnWaitDialog" Value="WaitForCostingDlg">!(wix.WixUICostingPopupOptOut) OR CostingComplete = 1</Publish>
+ <Publish Event="EndDialog" Value="Return"><![CDATA[OutOfDiskSpace <> 1]]></Publish>
+ <Publish Event="SpawnDialog" Value="OutOfRbDiskDlg">OutOfDiskSpace = 1 AND OutOfNoRbDiskSpace = 0 AND (PROMPTROLLBACKCOST="P" OR NOT PROMPTROLLBACKCOST)</Publish>
+ <Publish Event="EndDialog" Value="Return">OutOfDiskSpace = 1 AND OutOfNoRbDiskSpace = 0 AND PROMPTROLLBACKCOST="D"</Publish>
+ <Publish Event="EnableRollback" Value="False">OutOfDiskSpace = 1 AND OutOfNoRbDiskSpace = 0 AND PROMPTROLLBACKCOST="D"</Publish>
+ <Publish Event="SpawnDialog" Value="OutOfDiskDlg">(OutOfDiskSpace = 1 AND OutOfNoRbDiskSpace = 1) OR (OutOfDiskSpace = 1 AND PROMPTROLLBACKCOST="F")</Publish>
+ <Condition Action="show">ALLUSERS</Condition>
+ </Control>
+ <Control Id="InstallNoShield" Type="PushButton" ElevationShield="no" X="212" Y="243" Width="80" Height="17" Default="yes" Text="!(loc.WelcomeEulaDlgInstall)" Hidden="yes">
+ <Publish Event="SpawnWaitDialog" Value="WaitForCostingDlg">!(wix.WixUICostingPopupOptOut) OR CostingComplete = 1</Publish>
+ <Publish Event="EndDialog" Value="Return"><![CDATA[OutOfDiskSpace <> 1]]></Publish>
+ <Publish Event="SpawnDialog" Value="OutOfRbDiskDlg">OutOfDiskSpace = 1 AND OutOfNoRbDiskSpace = 0 AND (PROMPTROLLBACKCOST="P" OR NOT PROMPTROLLBACKCOST)</Publish>
+ <Publish Event="EndDialog" Value="Return">OutOfDiskSpace = 1 AND OutOfNoRbDiskSpace = 0 AND PROMPTROLLBACKCOST="D"</Publish>
+ <Publish Event="EnableRollback" Value="False">OutOfDiskSpace = 1 AND OutOfNoRbDiskSpace = 0 AND PROMPTROLLBACKCOST="D"</Publish>
+ <Publish Event="SpawnDialog" Value="OutOfDiskDlg">(OutOfDiskSpace = 1 AND OutOfNoRbDiskSpace = 1) OR (OutOfDiskSpace = 1 AND PROMPTROLLBACKCOST="F")</Publish>
+ <Condition Action="disable"><![CDATA[LicenseAccepted <> "1"]]></Condition>
+ <Condition Action="show">NOT ALLUSERS</Condition>
+ </Control>
+ <Control Id="Cancel" Type="PushButton" X="304" Y="243" Width="56" Height="17" Cancel="yes" Text="!(loc.WixUICancel)">
+ <Publish Event="SpawnDialog" Value="CancelDlg">1</Publish>
+ </Control>
+ <Control Id="Bitmap" Type="Bitmap" X="0" Y="0" Width="370" Height="234" TabSkip="no" Text="!(loc.WelcomeDlgBitmap)" />
+ <Control Id="Back" Type="PushButton" X="180" Y="243" Width="56" Height="17" Disabled="yes" Text="!(loc.WixUIBack)" />
+ <Control Id="BottomLine" Type="Line" X="0" Y="234" Width="370" Height="0" />
+ <Control Id="Description" Type="Text" X="135" Y="80" Width="220" Height="60" Transparent="yes" NoPrefix="yes" Text="!(loc.MyWelcomeDlgDescription)" >
+ <Condition Action="show">NOT Installed OR NOT PATCH</Condition>
+ <Condition Action="hide">Installed AND PATCH</Condition>
+ </Control>
+ <Control Id="PatchDescription" Type="Text" X="135" Y="80" Width="220" Height="60" Transparent="yes" NoPrefix="yes" Text="!(loc.WelcomeUpdateDlgDescriptionUpdate)" >
+ <Condition Action="show">Installed AND PATCH</Condition>
+ <Condition Action="hide">NOT Installed OR NOT PATCH</Condition>
+ </Control>
+ <Control Id="Title" Type="Text" X="135" Y="20" Width="220" Height="60" Transparent="yes" NoPrefix="yes" Text="!(loc.WelcomeDlgTitle)" />
+ </Dialog>
+
+ <Publish Dialog="ExitDialog" Control="Finish" Event="EndDialog" Value="Return" Order="999">1</Publish>
+
+ <Publish Dialog="VerifyReadyDlg" Control="Back" Event="NewDialog" Value="MaintenanceTypeDlg">1</Publish>
+
+ <Publish Dialog="MaintenanceWelcomeDlg" Control="Next" Event="NewDialog" Value="MaintenanceTypeDlg">1</Publish>
+
+ <Publish Dialog="MaintenanceTypeDlg" Control="RepairButton" Event="NewDialog" Value="VerifyReadyDlg">1</Publish>
+ <Publish Dialog="MaintenanceTypeDlg" Control="RemoveButton" Event="NewDialog" Value="VerifyReadyDlg">1</Publish>
+ <Publish Dialog="MaintenanceTypeDlg" Control="Back" Event="NewDialog" Value="MaintenanceWelcomeDlg">1</Publish>
+
+ <Publish Dialog="MyWelcomeDlg" Control="Install" Event="NewDialog" Value="PrepareDlg">1</Publish>
+ <Publish Dialog="VerifyReadyDlg" Control="Back" Event="NewDialog" Value="WelcomeDlg" Order="2">Installed AND PATCH</Publish>
+
+ <InstallUISequence>
+ <Show Dialog="WelcomeDlg" Before="ProgressDlg">0</Show>
+ <Show Dialog="MyWelcomeDlg" Before="ProgressDlg">NOT Installed</Show>
+ </InstallUISequence>
+
+ <Property Id="ARPNOMODIFY" Value="1" />
+ </UI>
+
+ <UIRef Id="WixUI_Common" />
+ </Fragment>
+</Wix> \ No newline at end of file
diff --git a/os/windows/WixUI_fio.wxl b/os/windows/WixUI_fio.wxl
new file mode 100755
index 00000000..11ec736a
--- /dev/null
+++ b/os/windows/WixUI_fio.wxl
@@ -0,0 +1,12 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!-- Copyright (c) .NET Foundation and contributors. All rights reserved. Licensed under the Microsoft Reciprocal License. See LICENSE.TXT file in the project root for full license information. -->
+
+
+<WixLocalization Culture="en-US" Codepage="1252" xmlns="http://schemas.microsoft.com/wix/2006/localization">
+ <!-- _locID@Culture="en-US" _locComment="American English" -->
+ <!-- _locID@Codepage="1252" _locComment="Windows-1252" -->
+
+<String Id="MyWelcomeDlgDescription" Overridable="yes">
+<!-- _locID_text="MyWelcomeDlgDescription" _locComment="MyWelcomeDlgDescription" -->The Setup Wizard will install [ProductName] on your computer. Click Install to continue or Cancel to exit the Setup Wizard.
+</String>
+</WixLocalization> \ No newline at end of file
diff --git a/os/windows/dobuild.cmd b/os/windows/dobuild.cmd
index 08df3e87..7b9cb1dd 100644
--- a/os/windows/dobuild.cmd
+++ b/os/windows/dobuild.cmd
@@ -44,7 +44,10 @@ if exist ..\..\fio.pdb (
@if ERRORLEVEL 1 goto end
"%WIX%bin\candle" -nologo -arch %FIO_ARCH% examples.wxs
@if ERRORLEVEL 1 goto end
-"%WIX%bin\light" -nologo -sice:ICE61 install.wixobj examples.wixobj -ext WixUIExtension -out %FIO_VERSION%-%FIO_ARCH%.msi
+"%WIX%bin\candle" -nologo -arch %FIO_ARCH% WixUI_Minimal_NoEULA.wxs
+@if ERRORLEVEL 1 goto end
+
+"%WIX%bin\light" -nologo -sice:ICE61 install.wixobj examples.wixobj WixUI_Minimal_NoEULA.wixobj -loc WixUI_fio.wxl -ext WixUIExtension -out %FIO_VERSION%-%FIO_ARCH%.msi
:end
if defined SIGN_FIO (
diff --git a/os/windows/eula.rtf b/os/windows/eula.rtf
deleted file mode 100755
index a931017c..00000000
--- a/os/windows/eula.rtf
+++ /dev/null
Binary files differ
diff --git a/os/windows/install.wxs b/os/windows/install.wxs
index f73ec5e2..7773bb3b 100755
--- a/os/windows/install.wxs
+++ b/os/windows/install.wxs
@@ -107,7 +107,7 @@
<WixVariable Id="WixUILicenseRtf" Value="eula.rtf" />
- <UIRef Id="WixUI_Minimal"/>
+ <UIRef Id="WixUI_Minimal_NoEULA"/>
<MajorUpgrade AllowDowngrades="no" DowngradeErrorMessage="A newer version of the application is already installed."
AllowSameVersionUpgrades="yes"/>
diff --git a/oslib/blkzoned.h b/oslib/blkzoned.h
index 4cc071dc..719b041d 100644
--- a/oslib/blkzoned.h
+++ b/oslib/blkzoned.h
@@ -16,6 +16,8 @@ extern int blkzoned_report_zones(struct thread_data *td,
struct zbd_zone *zones, unsigned int nr_zones);
extern int blkzoned_reset_wp(struct thread_data *td, struct fio_file *f,
uint64_t offset, uint64_t length);
+extern int blkzoned_get_max_open_zones(struct thread_data *td, struct fio_file *f,
+ unsigned int *max_open_zones);
#else
/*
* Define stubs for systems that do not have zoned block device support.
@@ -44,6 +46,11 @@ static inline int blkzoned_reset_wp(struct thread_data *td, struct fio_file *f,
{
return -EIO;
}
+static inline int blkzoned_get_max_open_zones(struct thread_data *td, struct fio_file *f,
+ unsigned int *max_open_zones)
+{
+ return -EIO;
+}
#endif
#endif /* FIO_BLKZONED_H */
diff --git a/oslib/linux-blkzoned.c b/oslib/linux-blkzoned.c
index f37c67fc..6f89ec6f 100644
--- a/oslib/linux-blkzoned.c
+++ b/oslib/linux-blkzoned.c
@@ -24,6 +24,37 @@
#include <linux/blkzoned.h>
/*
+ * If the uapi headers installed on the system lacks zone capacity support,
+ * use our local versions. If the installed headers are recent enough to
+ * support zone capacity, do not redefine any structs.
+ */
+#ifndef CONFIG_HAVE_REP_CAPACITY
+#define BLK_ZONE_REP_CAPACITY (1 << 0)
+
+struct blk_zone_v2 {
+ __u64 start; /* Zone start sector */
+ __u64 len; /* Zone length in number of sectors */
+ __u64 wp; /* Zone write pointer position */
+ __u8 type; /* Zone type */
+ __u8 cond; /* Zone condition */
+ __u8 non_seq; /* Non-sequential write resources active */
+ __u8 reset; /* Reset write pointer recommended */
+ __u8 resv[4];
+ __u64 capacity; /* Zone capacity in number of sectors */
+ __u8 reserved[24];
+};
+#define blk_zone blk_zone_v2
+
+struct blk_zone_report_v2 {
+ __u64 sector;
+ __u32 nr_zones;
+ __u32 flags;
+struct blk_zone zones[0];
+};
+#define blk_zone_report blk_zone_report_v2
+#endif /* CONFIG_HAVE_REP_CAPACITY */
+
+/*
* Read up to 255 characters from the first line of a file. Strip the trailing
* newline.
*/
@@ -43,12 +74,16 @@ static char *read_file(const char *path)
return strdup(line);
}
-int blkzoned_get_zoned_model(struct thread_data *td, struct fio_file *f,
- enum zbd_zoned_model *model)
+/*
+ * Get the value of a sysfs attribute for a block device.
+ *
+ * Returns NULL on failure.
+ * Returns a pointer to a string on success.
+ * The caller is responsible for freeing the memory.
+ */
+static char *blkzoned_get_sysfs_attr(const char *file_name, const char *attr)
{
- const char *file_name = f->file_name;
- char *zoned_attr_path = NULL;
- char *model_str = NULL;
+ char *attr_path = NULL;
struct stat statbuf;
char *sys_devno_path = NULL;
char *part_attr_path = NULL;
@@ -56,13 +91,7 @@ int blkzoned_get_zoned_model(struct thread_data *td, struct fio_file *f,
char sys_path[PATH_MAX];
ssize_t sz;
char *delim = NULL;
-
- if (f->filetype != FIO_TYPE_BLOCK) {
- *model = ZBD_IGNORE;
- return 0;
- }
-
- *model = ZBD_NONE;
+ char *attr_str = NULL;
if (stat(file_name, &statbuf) < 0)
goto out;
@@ -92,34 +121,73 @@ int blkzoned_get_zoned_model(struct thread_data *td, struct fio_file *f,
*delim = '\0';
}
- if (asprintf(&zoned_attr_path,
- "/sys/dev/block/%s/queue/zoned", sys_path) < 0)
+ if (asprintf(&attr_path,
+ "/sys/dev/block/%s/%s", sys_path, attr) < 0)
goto out;
- model_str = read_file(zoned_attr_path);
+ attr_str = read_file(attr_path);
+out:
+ free(attr_path);
+ free(part_str);
+ free(part_attr_path);
+ free(sys_devno_path);
+
+ return attr_str;
+}
+
+int blkzoned_get_zoned_model(struct thread_data *td, struct fio_file *f,
+ enum zbd_zoned_model *model)
+{
+ char *model_str = NULL;
+
+ if (f->filetype != FIO_TYPE_BLOCK) {
+ *model = ZBD_IGNORE;
+ return 0;
+ }
+
+ *model = ZBD_NONE;
+
+ model_str = blkzoned_get_sysfs_attr(f->file_name, "queue/zoned");
if (!model_str)
- goto out;
- dprint(FD_ZBD, "%s: zbd model string: %s\n", file_name, model_str);
+ return 0;
+
+ dprint(FD_ZBD, "%s: zbd model string: %s\n", f->file_name, model_str);
if (strcmp(model_str, "host-aware") == 0)
*model = ZBD_HOST_AWARE;
else if (strcmp(model_str, "host-managed") == 0)
*model = ZBD_HOST_MANAGED;
-out:
+
free(model_str);
- free(zoned_attr_path);
- free(part_str);
- free(part_attr_path);
- free(sys_devno_path);
+
+ return 0;
+}
+
+int blkzoned_get_max_open_zones(struct thread_data *td, struct fio_file *f,
+ unsigned int *max_open_zones)
+{
+ char *max_open_str;
+
+ if (f->filetype != FIO_TYPE_BLOCK)
+ return -EIO;
+
+ max_open_str = blkzoned_get_sysfs_attr(f->file_name, "queue/max_open_zones");
+ if (!max_open_str)
+ return 0;
+
+ dprint(FD_ZBD, "%s: max open zones supported by device: %s\n",
+ f->file_name, max_open_str);
+ *max_open_zones = atoll(max_open_str);
+
+ free(max_open_str);
+
return 0;
}
static uint64_t zone_capacity(struct blk_zone_report *hdr,
struct blk_zone *blkz)
{
-#ifdef CONFIG_HAVE_REP_CAPACITY
if (hdr->flags & BLK_ZONE_REP_CAPACITY)
return blkz->capacity << 9;
-#endif
return blkz->len << 9;
}
diff --git a/parse.h b/parse.h
index 4cf08fd2..d68484ea 100644
--- a/parse.h
+++ b/parse.h
@@ -131,7 +131,7 @@ static inline void *td_var(void *to, const struct fio_option *o,
static inline int parse_is_percent(unsigned long long val)
{
- return val >= -101;
+ return val >= -101ULL;
}
#define ZONE_BASE_VAL ((-1ULL >> 1) + 1)
diff --git a/stat.c b/stat.c
index b7222f46..a8a96c85 100644
--- a/stat.c
+++ b/stat.c
@@ -462,7 +462,7 @@ static double convert_agg_kbytes_percent(struct group_run_stats *rs, int ddir, i
{
double p_of_agg = 100.0;
if (rs && rs->agg[ddir] > 1024) {
- p_of_agg = mean * 100 / (double) (rs->agg[ddir] / 1024.0);
+ p_of_agg = mean * 100.0 / (double) (rs->agg[ddir] / 1024.0);
if (p_of_agg > 100.0)
p_of_agg = 100.0;
diff --git a/zbd.c b/zbd.c
index eed796b3..68cd58e1 100644
--- a/zbd.c
+++ b/zbd.c
@@ -114,6 +114,34 @@ int zbd_reset_wp(struct thread_data *td, struct fio_file *f,
}
/**
+ * zbd_get_max_open_zones - Get the maximum number of open zones
+ * @td: FIO thread data
+ * @f: FIO file for which to get max open zones
+ * @max_open_zones: Upon success, result will be stored here.
+ *
+ * A @max_open_zones value set to zero means no limit.
+ *
+ * Returns 0 upon success and a negative error code upon failure.
+ */
+int zbd_get_max_open_zones(struct thread_data *td, struct fio_file *f,
+ unsigned int *max_open_zones)
+{
+ int ret;
+
+ if (td->io_ops && td->io_ops->get_max_open_zones)
+ ret = td->io_ops->get_max_open_zones(td, f, max_open_zones);
+ else
+ ret = blkzoned_get_max_open_zones(td, f, max_open_zones);
+ if (ret < 0) {
+ td_verror(td, errno, "get max open zones failed");
+ log_err("%s: get max open zones failed (%d).\n",
+ f->file_name, errno);
+ }
+
+ return ret;
+}
+
+/**
* zbd_zone_idx - convert an offset into a zone number
* @f: file pointer.
* @offset: offset in bytes. If this offset is in the first zone_size bytes
@@ -554,6 +582,51 @@ out:
return ret;
}
+static int zbd_set_max_open_zones(struct thread_data *td, struct fio_file *f)
+{
+ struct zoned_block_device_info *zbd = f->zbd_info;
+ unsigned int max_open_zones;
+ int ret;
+
+ if (zbd->model != ZBD_HOST_MANAGED) {
+ /* Only host-managed devices have a max open limit */
+ zbd->max_open_zones = td->o.max_open_zones;
+ goto out;
+ }
+
+ /* If host-managed, get the max open limit */
+ ret = zbd_get_max_open_zones(td, f, &max_open_zones);
+ if (ret)
+ return ret;
+
+ if (!max_open_zones) {
+ /* No device limit */
+ zbd->max_open_zones = td->o.max_open_zones;
+ } else if (!td->o.max_open_zones) {
+ /* No user limit. Set limit to device limit */
+ zbd->max_open_zones = max_open_zones;
+ } else if (td->o.max_open_zones <= max_open_zones) {
+ /* Both user limit and dev limit. User limit not too large */
+ zbd->max_open_zones = td->o.max_open_zones;
+ } else {
+ /* Both user limit and dev limit. User limit too large */
+ td_verror(td, EINVAL,
+ "Specified --max_open_zones is too large");
+ log_err("Specified --max_open_zones (%d) is larger than max (%u)\n",
+ td->o.max_open_zones, max_open_zones);
+ return -EINVAL;
+ }
+
+out:
+ /* Ensure that the limit is not larger than FIO's internal limit */
+ zbd->max_open_zones = min_not_zero(zbd->max_open_zones,
+ (uint32_t) ZBD_MAX_OPEN_ZONES);
+ dprint(FD_ZBD, "%s: using max open zones limit: %"PRIu32"\n",
+ f->file_name, zbd->max_open_zones);
+
+ return 0;
+}
+
/*
* Allocate zone information and store it into f->zbd_info if zonemode=zbd.
*
@@ -576,9 +649,13 @@ static int zbd_create_zone_info(struct thread_data *td, struct fio_file *f)
case ZBD_HOST_AWARE:
case ZBD_HOST_MANAGED:
ret = parse_zone_info(td, f);
+ if (ret)
+ return ret;
break;
case ZBD_NONE:
ret = init_zone_info(td, f);
+ if (ret)
+ return ret;
break;
default:
td_verror(td, EINVAL, "Unsupported zoned model");
@@ -586,11 +663,15 @@ static int zbd_create_zone_info(struct thread_data *td, struct fio_file *f)
return -EINVAL;
}
- if (ret == 0) {
- f->zbd_info->model = zbd_model;
- f->zbd_info->max_open_zones = td->o.max_open_zones;
+ f->zbd_info->model = zbd_model;
+
+ ret = zbd_set_max_open_zones(td, f);
+ if (ret) {
+ zbd_free_zone_info(f);
+ return ret;
}
- return ret;
+
+ return 0;
}
void zbd_free_zone_info(struct fio_file *f)
@@ -726,8 +807,6 @@ int zbd_setup_files(struct thread_data *td)
if (zbd_is_seq_job(f))
assert(f->min_zone < f->max_zone);
- zbd->max_open_zones = zbd->max_open_zones ?: ZBD_MAX_OPEN_ZONES;
-
if (td->o.max_open_zones > 0 &&
zbd->max_open_zones != td->o.max_open_zones) {
log_err("Different 'max_open_zones' values\n");