/tags
/TAGS
/t/zbd/test-zbd-support.log.*
+/t/fuzz/fuzz_parseini
#!/bin/sh
GVF=FIO-VERSION-FILE
-DEF_VER=fio-3.25
+DEF_VER=fio-3.26
LF='
'
unless :option:`verify` is set or :option:`cuda_io` is `posix`.
:option:`iomem` must not be `cudamalloc`. This ioengine defines
engine specific options.
+ **dfs**
+ I/O engine supporting asynchronous read and write operations to the
+ DAOS File System (DFS) via libdfs.
I/O engine specific parameters
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
GPU to RAM before a write and copied from RAM to GPU after a
read. :option:`verify` does not affect use of cudaMemcpy.
+.. option:: pool=str : [dfs]
+
+ Specify the UUID of the DAOS pool to connect to.
+
+.. option:: cont=str : [dfs]
+
+ Specify the UUID of the DAOS container to open.
+
+.. option:: chunk_size=int : [dfs]
+
+ Specificy a different chunk size (in bytes) for the dfs file.
+ Use DAOS container's chunk size by default.
+
+.. option:: object_class=str : [dfs]
+
+ Specificy a different object class for the dfs file.
+ Use DAOS container's object class by default.
+
I/O depth
~~~~~~~~~
true, fio will continue running and try to meet :option:`latency_target`
by adjusting queue depth.
-.. option:: max_latency=time
+.. option:: max_latency=time[,time][,time]
If set, fio will exit the job with an ETIMEDOUT error if it exceeds this
maximum latency. When the unit is omitted, the value is interpreted in
- microseconds.
+ microseconds. Comma-separated values may be specified for reads, writes,
+ and trims as described in :option:`blocksize`.
.. option:: rate_cycle=int
http_LIBS = -lcurl -lssl -lcrypto
ENGINES += http
endif
+ifdef CONFIG_DFS
+ dfs_SRCS = engines/dfs.c
+ dfs_LIBS = -luuid -ldaos -ldfs
+ ENGINES += dfs
+endif
SOURCE += oslib/asprintf.c
ifndef CONFIG_STRSEP
SOURCE += oslib/strsep.c
for_each_td(td, i) {
steadystate_free(td);
fio_options_free(td);
+ fio_dump_options_free(td);
if (td->rusage_sem) {
fio_sem_remove(td->rusage_sem);
td->rusage_sem = NULL;
o->rate_iops_min[i] = le32_to_cpu(top->rate_iops_min[i]);
o->perc_rand[i] = le32_to_cpu(top->perc_rand[i]);
+
+ o->max_latency[i] = le64_to_cpu(top->max_latency[i]);
}
o->ratecycle = le32_to_cpu(top->ratecycle);
o->sync_file_range = le32_to_cpu(top->sync_file_range);
o->latency_target = le64_to_cpu(top->latency_target);
o->latency_window = le64_to_cpu(top->latency_window);
- o->max_latency = le64_to_cpu(top->max_latency);
o->latency_percentile.u.f = fio_uint64_to_double(le64_to_cpu(top->latency_percentile.u.i));
o->latency_run = le32_to_cpu(top->latency_run);
o->compress_percentage = le32_to_cpu(top->compress_percentage);
top->sync_file_range = cpu_to_le32(o->sync_file_range);
top->latency_target = __cpu_to_le64(o->latency_target);
top->latency_window = __cpu_to_le64(o->latency_window);
- top->max_latency = __cpu_to_le64(o->max_latency);
top->latency_percentile.u.i = __cpu_to_le64(fio_double_to_uint64(o->latency_percentile.u.f));
top->latency_run = __cpu_to_le32(o->latency_run);
top->compress_percentage = cpu_to_le32(o->compress_percentage);
top->rate_iops_min[i] = cpu_to_le32(o->rate_iops_min[i]);
top->perc_rand[i] = cpu_to_le32(o->perc_rand[i]);
+
+ top->max_latency[i] = __cpu_to_le64(o->max_latency[i]);
}
memcpy(top->verify_pattern, o->verify_pattern, MAX_PATTERN_SIZE);
libiscsi="no"
libnbd="no"
libzbc=""
+dfs=""
dynamic_engines="no"
prefix=/usr/local
;;
--dynamic-libengines) dynamic_engines="yes"
;;
+ --disable-dfs) dfs="no"
+ ;;
--help)
show_help="yes"
;;
echo "--disable-libzbc Disable libzbc even if found"
echo "--disable-tcmalloc Disable tcmalloc support"
echo "--dynamic-libengines Lib-based ioengines as dynamic libraries"
+ echo "--disable-dfs Disable DAOS File System support even if found"
exit $exit_val
fi
clock_gettime="yes" # clock_monotonic probe has dependency on this
clock_monotonic="yes"
sched_idle="yes"
+ pthread_condattr_setclock="no"
;;
esac
##########################################
# POSIX pthread_condattr_setclock() probe
-if test "$pthread_condattr_setclock" != "yes" ; then
- pthread_condattr_setclock="no"
-fi
-cat > $TMPC <<EOF
+if test "$pthread_condattr_setclock" != "no" ; then
+ cat > $TMPC <<EOF
#include <pthread.h>
int main(void)
{
return 0;
}
EOF
-if compile_prog "" "$LIBS" "pthread_condattr_setclock" ; then
- pthread_condattr_setclock=yes
-elif compile_prog "" "$LIBS -lpthread" "pthread_condattr_setclock" ; then
- pthread_condattr_setclock=yes
- LIBS="$LIBS -lpthread"
+ if compile_prog "" "$LIBS" "pthread_condattr_setclock" ; then
+ pthread_condattr_setclock=yes
+ elif compile_prog "" "$LIBS -lpthread" "pthread_condattr_setclock" ; then
+ pthread_condattr_setclock=yes
+ LIBS="$LIBS -lpthread"
+ fi
fi
print_config "pthread_condattr_setclock()" "$pthread_condattr_setclock"
fi
print_config "NBD engine" "$libnbd"
+##########################################
+# check for dfs (DAOS File System)
+if test "$dfs" != "no" ; then
+ cat > $TMPC << EOF
+#include <fcntl.h>
+#include <daos.h>
+#include <daos_fs.h>
+
+int main(int argc, char **argv)
+{
+ daos_handle_t poh;
+ daos_handle_t coh;
+ dfs_t *dfs;
+
+ (void) dfs_mount(poh, coh, O_RDWR, &dfs);
+
+ return 0;
+}
+EOF
+ if compile_prog "" "-luuid -ldfs -ldaos" "dfs"; then
+ dfs="yes"
+ else
+ dfs="no"
+ fi
+fi
+print_config "DAOS File System (dfs) Engine" "$dfs"
+
##########################################
# Check if we have lex/yacc available
yacc="no"
if test "$clock_monotonic" = "yes" ; then
output_sym "CONFIG_CLOCK_MONOTONIC"
fi
-if test "$clock_monotonic_raw" = "yes" ; then
- output_sym "CONFIG_CLOCK_MONOTONIC_RAW"
-fi
-if test "$clock_monotonic_precise" = "yes" ; then
- output_sym "CONFIG_CLOCK_MONOTONIC_PRECISE"
-fi
if test "$clockid_t" = "yes"; then
output_sym "CONFIG_CLOCKID_T"
fi
if test "$libcufile" = "yes" ; then
output_sym "CONFIG_LIBCUFILE"
fi
+if test "$dfs" = "yes" ; then
+ output_sym "CONFIG_DFS"
+fi
if test "$march_set" = "no" && test "$build_native" = "yes" ; then
output_sym "CONFIG_BUILD_NATIVE"
fi
--- /dev/null
+/**
+ * FIO engine for DAOS File System (dfs).
+ *
+ * (C) Copyright 2020-2021 Intel Corporation.
+ */
+
+#include <fio.h>
+#include <optgroup.h>
+
+#include <daos.h>
+#include <daos_fs.h>
+
+static bool daos_initialized;
+static int num_threads;
+static pthread_mutex_t daos_mutex = PTHREAD_MUTEX_INITIALIZER;
+daos_handle_t poh; /* pool handle */
+daos_handle_t coh; /* container handle */
+daos_oclass_id_t cid = OC_UNKNOWN; /* object class */
+dfs_t *dfs; /* dfs mount reference */
+
+struct daos_iou {
+ struct io_u *io_u;
+ daos_event_t ev;
+ d_sg_list_t sgl;
+ d_iov_t iov;
+ daos_size_t size;
+ bool complete;
+};
+
+struct daos_data {
+ daos_handle_t eqh;
+ dfs_obj_t *obj;
+ struct io_u **io_us;
+ int queued;
+ int num_ios;
+};
+
+struct daos_fio_options {
+ void *pad;
+ char *pool; /* Pool UUID */
+ char *cont; /* Container UUID */
+ daos_size_t chsz; /* Chunk size */
+ char *oclass; /* object class */
+#if !defined(DAOS_API_VERSION_MAJOR) || DAOS_API_VERSION_MAJOR < 1
+ char *svcl; /* service replica list, deprecated */
+#endif
+};
+
+static struct fio_option options[] = {
+ {
+ .name = "pool",
+ .lname = "pool uuid",
+ .type = FIO_OPT_STR_STORE,
+ .off1 = offsetof(struct daos_fio_options, pool),
+ .help = "DAOS pool uuid",
+ .category = FIO_OPT_C_ENGINE,
+ .group = FIO_OPT_G_DFS,
+ },
+ {
+ .name = "cont",
+ .lname = "container uuid",
+ .type = FIO_OPT_STR_STORE,
+ .off1 = offsetof(struct daos_fio_options, cont),
+ .help = "DAOS container uuid",
+ .category = FIO_OPT_C_ENGINE,
+ .group = FIO_OPT_G_DFS,
+ },
+ {
+ .name = "chunk_size",
+ .lname = "DFS chunk size",
+ .type = FIO_OPT_ULL,
+ .off1 = offsetof(struct daos_fio_options, chsz),
+ .help = "DFS chunk size in bytes",
+ .def = "0", /* use container default */
+ .category = FIO_OPT_C_ENGINE,
+ .group = FIO_OPT_G_DFS,
+ },
+ {
+ .name = "object_class",
+ .lname = "object class",
+ .type = FIO_OPT_STR_STORE,
+ .off1 = offsetof(struct daos_fio_options, oclass),
+ .help = "DAOS object class",
+ .category = FIO_OPT_C_ENGINE,
+ .group = FIO_OPT_G_DFS,
+ },
+#if !defined(DAOS_API_VERSION_MAJOR) || DAOS_API_VERSION_MAJOR < 1
+ {
+ .name = "svcl",
+ .lname = "List of service ranks",
+ .type = FIO_OPT_STR_STORE,
+ .off1 = offsetof(struct daos_fio_options, svcl),
+ .help = "List of pool replicated service ranks",
+ .category = FIO_OPT_C_ENGINE,
+ .group = FIO_OPT_G_DFS,
+ },
+#endif
+ {
+ .name = NULL,
+ },
+};
+
+static int daos_fio_global_init(struct thread_data *td)
+{
+ struct daos_fio_options *eo = td->eo;
+ uuid_t pool_uuid, co_uuid;
+ daos_pool_info_t pool_info;
+ daos_cont_info_t co_info;
+ int rc = 0;
+
+#if !defined(DAOS_API_VERSION_MAJOR) || DAOS_API_VERSION_MAJOR < 1
+ if (!eo->pool || !eo->cont || !eo->svcl) {
+#else
+ if (!eo->pool || !eo->cont) {
+#endif
+ log_err("Missing required DAOS options\n");
+ return EINVAL;
+ }
+
+ rc = daos_init();
+ if (rc != -DER_ALREADY && rc) {
+ log_err("Failed to initialize daos %d\n", rc);
+ td_verror(td, rc, "daos_init");
+ return rc;
+ }
+
+ rc = uuid_parse(eo->pool, pool_uuid);
+ if (rc) {
+ log_err("Failed to parse 'Pool uuid': %s\n", eo->pool);
+ td_verror(td, EINVAL, "uuid_parse(eo->pool)");
+ return EINVAL;
+ }
+
+ rc = uuid_parse(eo->cont, co_uuid);
+ if (rc) {
+ log_err("Failed to parse 'Cont uuid': %s\n", eo->cont);
+ td_verror(td, EINVAL, "uuid_parse(eo->cont)");
+ return EINVAL;
+ }
+
+ /* Connect to the DAOS pool */
+#if !defined(DAOS_API_VERSION_MAJOR) || DAOS_API_VERSION_MAJOR < 1
+ d_rank_list_t *svcl = NULL;
+
+ svcl = daos_rank_list_parse(eo->svcl, ":");
+ if (svcl == NULL) {
+ log_err("Failed to parse svcl\n");
+ td_verror(td, EINVAL, "daos_rank_list_parse");
+ return EINVAL;
+ }
+
+ rc = daos_pool_connect(pool_uuid, NULL, svcl, DAOS_PC_RW,
+ &poh, &pool_info, NULL);
+ d_rank_list_free(svcl);
+#else
+ rc = daos_pool_connect(pool_uuid, NULL, DAOS_PC_RW, &poh, &pool_info,
+ NULL);
+#endif
+ if (rc) {
+ log_err("Failed to connect to pool %d\n", rc);
+ td_verror(td, rc, "daos_pool_connect");
+ return rc;
+ }
+
+ /* Open the DAOS container */
+ rc = daos_cont_open(poh, co_uuid, DAOS_COO_RW, &coh, &co_info, NULL);
+ if (rc) {
+ log_err("Failed to open container: %d\n", rc);
+ td_verror(td, rc, "daos_cont_open");
+ (void)daos_pool_disconnect(poh, NULL);
+ return rc;
+ }
+
+ /* Mount encapsulated filesystem */
+ rc = dfs_mount(poh, coh, O_RDWR, &dfs);
+ if (rc) {
+ log_err("Failed to mount DFS namespace: %d\n", rc);
+ td_verror(td, rc, "dfs_mount");
+ (void)daos_pool_disconnect(poh, NULL);
+ (void)daos_cont_close(coh, NULL);
+ return rc;
+ }
+
+ /* Retrieve object class to use, if specified */
+ if (eo->oclass)
+ cid = daos_oclass_name2id(eo->oclass);
+
+ return 0;
+}
+
+static int daos_fio_global_cleanup()
+{
+ int rc;
+ int ret = 0;
+
+ rc = dfs_umount(dfs);
+ if (rc) {
+ log_err("failed to umount dfs: %d\n", rc);
+ ret = rc;
+ }
+ rc = daos_cont_close(coh, NULL);
+ if (rc) {
+ log_err("failed to close container: %d\n", rc);
+ if (ret == 0)
+ ret = rc;
+ }
+ rc = daos_pool_disconnect(poh, NULL);
+ if (rc) {
+ log_err("failed to disconnect pool: %d\n", rc);
+ if (ret == 0)
+ ret = rc;
+ }
+ rc = daos_fini();
+ if (rc) {
+ log_err("failed to finalize daos: %d\n", rc);
+ if (ret == 0)
+ ret = rc;
+ }
+
+ return ret;
+}
+
+static int daos_fio_setup(struct thread_data *td)
+{
+ return 0;
+}
+
+static int daos_fio_init(struct thread_data *td)
+{
+ struct daos_data *dd;
+ int rc = 0;
+
+ pthread_mutex_lock(&daos_mutex);
+
+ dd = malloc(sizeof(*dd));
+ if (dd == NULL) {
+ log_err("Failed to allocate DAOS-private data\n");
+ rc = ENOMEM;
+ goto out;
+ }
+
+ dd->queued = 0;
+ dd->num_ios = td->o.iodepth;
+ dd->io_us = calloc(dd->num_ios, sizeof(struct io_u *));
+ if (dd->io_us == NULL) {
+ log_err("Failed to allocate IO queue\n");
+ rc = ENOMEM;
+ goto out;
+ }
+
+ /* initialize DAOS stack if not already up */
+ if (!daos_initialized) {
+ rc = daos_fio_global_init(td);
+ if (rc)
+ goto out;
+ daos_initialized = true;
+ }
+
+ rc = daos_eq_create(&dd->eqh);
+ if (rc) {
+ log_err("Failed to create event queue: %d\n", rc);
+ td_verror(td, rc, "daos_eq_create");
+ goto out;
+ }
+
+ td->io_ops_data = dd;
+ num_threads++;
+out:
+ if (rc) {
+ if (dd) {
+ free(dd->io_us);
+ free(dd);
+ }
+ if (num_threads == 0 && daos_initialized) {
+ /* don't clobber error return value */
+ (void)daos_fio_global_cleanup();
+ daos_initialized = false;
+ }
+ }
+ pthread_mutex_unlock(&daos_mutex);
+ return rc;
+}
+
+static void daos_fio_cleanup(struct thread_data *td)
+{
+ struct daos_data *dd = td->io_ops_data;
+ int rc;
+
+ if (dd == NULL)
+ return;
+
+ rc = daos_eq_destroy(dd->eqh, DAOS_EQ_DESTROY_FORCE);
+ if (rc < 0) {
+ log_err("failed to destroy event queue: %d\n", rc);
+ td_verror(td, rc, "daos_eq_destroy");
+ }
+
+ free(dd->io_us);
+ free(dd);
+
+ pthread_mutex_lock(&daos_mutex);
+ num_threads--;
+ if (daos_initialized && num_threads == 0) {
+ int ret;
+
+ ret = daos_fio_global_cleanup();
+ if (ret < 0 && rc == 0) {
+ log_err("failed to clean up: %d\n", ret);
+ td_verror(td, ret, "daos_fio_global_cleanup");
+ }
+ daos_initialized = false;
+ }
+ pthread_mutex_unlock(&daos_mutex);
+}
+
+static int daos_fio_get_file_size(struct thread_data *td, struct fio_file *f)
+{
+ char *file_name = f->file_name;
+ struct stat stbuf = {0};
+ int rc;
+
+ dprint(FD_FILE, "dfs stat %s\n", f->file_name);
+
+ if (!daos_initialized)
+ return 0;
+
+ rc = dfs_stat(dfs, NULL, file_name, &stbuf);
+ if (rc) {
+ log_err("Failed to stat %s: %d\n", f->file_name, rc);
+ td_verror(td, rc, "dfs_stat");
+ return rc;
+ }
+
+ f->real_file_size = stbuf.st_size;
+ return 0;
+}
+
+static int daos_fio_close(struct thread_data *td, struct fio_file *f)
+{
+ struct daos_data *dd = td->io_ops_data;
+ int rc;
+
+ dprint(FD_FILE, "dfs release %s\n", f->file_name);
+
+ rc = dfs_release(dd->obj);
+ if (rc) {
+ log_err("Failed to release %s: %d\n", f->file_name, rc);
+ td_verror(td, rc, "dfs_release");
+ return rc;
+ }
+
+ return 0;
+}
+
+static int daos_fio_open(struct thread_data *td, struct fio_file *f)
+{
+ struct daos_data *dd = td->io_ops_data;
+ struct daos_fio_options *eo = td->eo;
+ int flags = 0;
+ int rc;
+
+ dprint(FD_FILE, "dfs open %s (%s/%d/%d)\n",
+ f->file_name, td_write(td) & !read_only ? "rw" : "r",
+ td->o.create_on_open, td->o.allow_create);
+
+ if (td->o.create_on_open && td->o.allow_create)
+ flags |= O_CREAT;
+
+ if (td_write(td)) {
+ if (!read_only)
+ flags |= O_RDWR;
+ if (td->o.allow_create)
+ flags |= O_CREAT;
+ } else if (td_read(td)) {
+ flags |= O_RDONLY;
+ }
+
+ rc = dfs_open(dfs, NULL, f->file_name,
+ S_IFREG | S_IRUSR | S_IWUSR,
+ flags, cid, eo->chsz, NULL, &dd->obj);
+ if (rc) {
+ log_err("Failed to open %s: %d\n", f->file_name, rc);
+ td_verror(td, rc, "dfs_open");
+ return rc;
+ }
+
+ return 0;
+}
+
+static int daos_fio_unlink(struct thread_data *td, struct fio_file *f)
+{
+ int rc;
+
+ dprint(FD_FILE, "dfs remove %s\n", f->file_name);
+
+ rc = dfs_remove(dfs, NULL, f->file_name, false, NULL);
+ if (rc) {
+ log_err("Failed to remove %s: %d\n", f->file_name, rc);
+ td_verror(td, rc, "dfs_remove");
+ return rc;
+ }
+
+ return 0;
+}
+
+static int daos_fio_invalidate(struct thread_data *td, struct fio_file *f)
+{
+ dprint(FD_FILE, "dfs invalidate %s\n", f->file_name);
+ return 0;
+}
+
+static void daos_fio_io_u_free(struct thread_data *td, struct io_u *io_u)
+{
+ struct daos_iou *io = io_u->engine_data;
+
+ if (io) {
+ io_u->engine_data = NULL;
+ free(io);
+ }
+}
+
+static int daos_fio_io_u_init(struct thread_data *td, struct io_u *io_u)
+{
+ struct daos_iou *io;
+
+ io = malloc(sizeof(struct daos_iou));
+ if (!io) {
+ td_verror(td, ENOMEM, "malloc");
+ return ENOMEM;
+ }
+ io->io_u = io_u;
+ io_u->engine_data = io;
+ return 0;
+}
+
+static struct io_u * daos_fio_event(struct thread_data *td, int event)
+{
+ struct daos_data *dd = td->io_ops_data;
+
+ return dd->io_us[event];
+}
+
+static int daos_fio_getevents(struct thread_data *td, unsigned int min,
+ unsigned int max, const struct timespec *t)
+{
+ struct daos_data *dd = td->io_ops_data;
+ daos_event_t *evp[max];
+ unsigned int events = 0;
+ int i;
+ int rc;
+
+ while (events < min) {
+ rc = daos_eq_poll(dd->eqh, 0, DAOS_EQ_NOWAIT, max, evp);
+ if (rc < 0) {
+ log_err("Event poll failed: %d\n", rc);
+ td_verror(td, rc, "daos_eq_poll");
+ return events;
+ }
+
+ for (i = 0; i < rc; i++) {
+ struct daos_iou *io;
+ struct io_u *io_u;
+
+ io = container_of(evp[i], struct daos_iou, ev);
+ if (io->complete)
+ log_err("Completion on already completed I/O\n");
+
+ io_u = io->io_u;
+ if (io->ev.ev_error)
+ io_u->error = io->ev.ev_error;
+ else
+ io_u->resid = 0;
+
+ dd->io_us[events] = io_u;
+ dd->queued--;
+ daos_event_fini(&io->ev);
+ io->complete = true;
+ events++;
+ }
+ }
+
+ dprint(FD_IO, "dfs eq_pool returning %d (%u/%u)\n", events, min, max);
+
+ return events;
+}
+
+static enum fio_q_status daos_fio_queue(struct thread_data *td,
+ struct io_u *io_u)
+{
+ struct daos_data *dd = td->io_ops_data;
+ struct daos_iou *io = io_u->engine_data;
+ daos_off_t offset = io_u->offset;
+ int rc;
+
+ if (dd->queued == td->o.iodepth)
+ return FIO_Q_BUSY;
+
+ io->sgl.sg_nr = 1;
+ io->sgl.sg_nr_out = 0;
+ d_iov_set(&io->iov, io_u->xfer_buf, io_u->xfer_buflen);
+ io->sgl.sg_iovs = &io->iov;
+ io->size = io_u->xfer_buflen;
+
+ io->complete = false;
+ rc = daos_event_init(&io->ev, dd->eqh, NULL);
+ if (rc) {
+ log_err("Event init failed: %d\n", rc);
+ io_u->error = rc;
+ return FIO_Q_COMPLETED;
+ }
+
+ switch (io_u->ddir) {
+ case DDIR_WRITE:
+ rc = dfs_write(dfs, dd->obj, &io->sgl, offset, &io->ev);
+ if (rc) {
+ log_err("dfs_write failed: %d\n", rc);
+ io_u->error = rc;
+ return FIO_Q_COMPLETED;
+ }
+ break;
+ case DDIR_READ:
+ rc = dfs_read(dfs, dd->obj, &io->sgl, offset, &io->size,
+ &io->ev);
+ if (rc) {
+ log_err("dfs_read failed: %d\n", rc);
+ io_u->error = rc;
+ return FIO_Q_COMPLETED;
+ }
+ break;
+ case DDIR_SYNC:
+ io_u->error = 0;
+ return FIO_Q_COMPLETED;
+ default:
+ dprint(FD_IO, "Invalid IO type: %d\n", io_u->ddir);
+ io_u->error = -DER_INVAL;
+ return FIO_Q_COMPLETED;
+ }
+
+ dd->queued++;
+ return FIO_Q_QUEUED;
+}
+
+static int daos_fio_prep(struct thread_data fio_unused *td, struct io_u *io_u)
+{
+ return 0;
+}
+
+/* ioengine_ops for get_ioengine() */
+FIO_STATIC struct ioengine_ops ioengine = {
+ .name = "dfs",
+ .version = FIO_IOOPS_VERSION,
+ .flags = FIO_DISKLESSIO | FIO_NODISKUTIL,
+
+ .setup = daos_fio_setup,
+ .init = daos_fio_init,
+ .prep = daos_fio_prep,
+ .cleanup = daos_fio_cleanup,
+
+ .open_file = daos_fio_open,
+ .invalidate = daos_fio_invalidate,
+ .get_file_size = daos_fio_get_file_size,
+ .close_file = daos_fio_close,
+ .unlink_file = daos_fio_unlink,
+
+ .queue = daos_fio_queue,
+ .getevents = daos_fio_getevents,
+ .event = daos_fio_event,
+ .io_u_init = daos_fio_io_u_init,
+ .io_u_free = daos_fio_io_u_free,
+
+ .option_struct_size = sizeof(struct daos_fio_options),
+ .options = options,
+};
+
+static void fio_init fio_dfs_register(void)
+{
+ register_ioengine(&ioengine);
+}
+
+static void fio_exit fio_dfs_unregister(void)
+{
+ unregister_ioengine(&ioengine);
+}
dprint(FD_FILE, "fd open %s\n", f->file_name);
- if (f->filetype != FIO_TYPE_FILE) {
- log_err("fio: only files are supported fallocate \n");
+ if (f->filetype != FIO_TYPE_FILE && f->filetype != FIO_TYPE_BLOCK) {
+ log_err("fio: only files and blockdev are supported fallocate \n");
return 1;
}
if (!strcmp(f->file_name, "-")) {
dprint(FD_FILE, "fd open %s\n", f->file_name);
if (f->filetype != FIO_TYPE_FILE) {
- log_err("fio: only files are supported fallocate \n");
+ log_err("fio: only files are supported\n");
return 1;
}
if (!strcmp(f->file_name, "-")) {
err = fio_ioring_queue_init(td);
if (err) {
- td_verror(td, errno, "io_queue_init");
+ int init_err = errno;
+
+ if (init_err == ENOSYS)
+ log_err("fio: your kernel doesn't support io_uring\n");
+ td_verror(td, init_err, "io_queue_init");
return 1;
}
.open_file = fio_libpmem_open_file,
.close_file = fio_libpmem_close_file,
.get_file_size = generic_get_file_size,
+ .prepopulate_file = generic_prepopulate_file,
.flags = FIO_SYNCIO | FIO_RAWIO | FIO_DISKLESSIO | FIO_NOEXTEND |
FIO_NODISKUTIL | FIO_BARRIER | FIO_MEMALIGN,
};
--- /dev/null
+[global]
+ioengine=dfs
+pool=${POOL}
+cont=${CONT}
+filename_format=fio-test.$jobnum
+
+cpus_allowed_policy=split
+group_reporting=1
+time_based=0
+percentile_list=99.0:99.9:99.99:99.999:99.9999:100
+disable_slat=1
+disable_clat=1
+
+bs=1M
+size=100G
+iodepth=16
+numjobs=16
+
+[daos-seqwrite]
+rw=write
+stonewall
+
+[daos-seqread]
+rw=read
+stonewall
+
+[daos-randwrite]
+rw=randwrite
+stonewall
+
+[daos-randread]
+rw=randread
+stonewall
extern int __must_check generic_open_file(struct thread_data *, struct fio_file *);
extern int __must_check generic_close_file(struct thread_data *, struct fio_file *);
extern int __must_check generic_get_file_size(struct thread_data *, struct fio_file *);
+extern int __must_check generic_prepopulate_file(struct thread_data *, struct fio_file *);
#ifdef __cplusplus
}
#endif
return ret;
}
+/*
+ * Generic function to prepopulate regular file with data.
+ * Useful if you want to make sure I/O engine has data to read.
+ * Leaves f->fd open on success, caller must close.
+ */
+int generic_prepopulate_file(struct thread_data *td, struct fio_file *f)
+{
+ int flags;
+ unsigned long long left, bs;
+ char *b = NULL;
+
+ /* generic function for regular files only */
+ assert(f->filetype == FIO_TYPE_FILE);
+
+ if (read_only) {
+ log_err("fio: refusing to write a file due to read-only\n");
+ return 0;
+ }
+
+ flags = O_WRONLY;
+ if (td->o.allow_create)
+ flags |= O_CREAT;
+
+#ifdef WIN32
+ flags |= _O_BINARY;
+#endif
+
+ dprint(FD_FILE, "open file %s, flags %x\n", f->file_name, flags);
+ f->fd = open(f->file_name, flags, 0644);
+ if (f->fd < 0) {
+ int err = errno;
+
+ if (err == ENOENT && !td->o.allow_create)
+ log_err("fio: file creation disallowed by "
+ "allow_file_create=0\n");
+ else
+ td_verror(td, err, "open");
+ return 1;
+ }
+
+ left = f->real_file_size;
+ bs = td->o.max_bs[DDIR_WRITE];
+ if (bs > left)
+ bs = left;
+
+ b = malloc(bs);
+ if (!b) {
+ td_verror(td, errno, "malloc");
+ goto err;
+ }
+
+ while (left && !td->terminate) {
+ ssize_t r;
+
+ if (bs > left)
+ bs = left;
+
+ fill_io_buffer(td, b, bs, bs);
+
+ r = write(f->fd, b, bs);
+
+ if (r > 0) {
+ left -= r;
+ } else {
+ td_verror(td, errno, "write");
+ goto err;
+ }
+ }
+
+ if (td->terminate) {
+ dprint(FD_FILE, "terminate unlink %s\n", f->file_name);
+ td_io_unlink_file(td, f);
+ } else if (td->o.create_fsync) {
+ if (fsync(f->fd) < 0) {
+ td_verror(td, errno, "fsync");
+ goto err;
+ }
+ }
+
+ free(b);
+ return 0;
+err:
+ close(f->fd);
+ f->fd = -1;
+ if (b)
+ free(b);
+ return 1;
+}
+
unsigned long long get_rand_file_size(struct thread_data *td)
{
unsigned long long ret, sized;
if (o->read_iolog_file)
goto done;
+ if (td->o.zone_mode == ZONE_MODE_ZBD) {
+ err = zbd_init_files(td);
+ if (err)
+ goto err_out;
+ }
+ zbd_recalc_options_with_zone_granularity(td);
+
/*
* check sizes. if the files/devices do not exist and the size
* isn't passed to fio, abort.
temp_stall_ts = 0;
}
+ if (err)
+ goto err_out;
+
+ /*
+ * Prepopulate files with data. It might be expected to read some
+ * "real" data instead of zero'ed files (if no writes to file occurred
+ * prior to a read job). Engine has to provide a way to do that.
+ */
+ if (td->io_ops->prepopulate_file) {
+ temp_stall_ts = 1;
+
+ for_each_file(td, f, i) {
+ if (output_format & FIO_OUTPUT_NORMAL) {
+ log_info("%s: Prepopulating IO file (%s)\n",
+ o->name, f->file_name);
+ }
+
+ err = td->io_ops->prepopulate_file(td, f);
+ if (err)
+ break;
+
+ err = __file_invalidate_cache(td, f, f->file_offset,
+ f->io_size);
+
+ /*
+ * Shut up static checker
+ */
+ if (f->fd != -1)
+ close(f->fd);
+
+ f->fd = -1;
+ if (err)
+ break;
+ }
+ temp_stall_ts = 0;
+ }
+
if (err)
goto err_out;
}
done:
- if (o->create_only)
- td->done = 1;
-
- td_restore_runstate(td, old_state);
-
if (td->o.zone_mode == ZONE_MODE_ZBD) {
err = zbd_setup_files(td);
if (err)
goto err_out;
}
+
+ if (o->create_only)
+ td->done = 1;
+
+ td_restore_runstate(td, old_state);
+
return 0;
err_offset:
.PD
.RE
.P
+`z' suffix specifies that the value is measured in zones.
+Value is recalculated once block device's zone size becomes known.
+.P
If the option accepts an upper and lower range, use a colon ':' or
minus '\-' to separate such values. See \fIirange\fR parameter type.
If the lower value specified happens to be larger than the upper value
block device, the zone capacity is obtained from the device information and this
option is ignored.
.TP
-.BI zoneskip \fR=\fPint
+.BI zoneskip \fR=\fPint[z]
For \fBzonemode\fR=strided, the number of bytes to skip after \fBzonesize\fR
bytes of data have been transferred.
should be associated with them.
.RE
.TP
-.BI offset \fR=\fPint
+.BI offset \fR=\fPint[%|z]
Start I/O at the provided offset in the file, given as either a fixed size in
bytes or a percentage. If a percentage is given, the generated offset will be
aligned to the minimum \fBblocksize\fR or to the value of \fBoffset_align\fR if
is aligned upwards to this value. Defaults to 0 meaning that a percentage
offset is aligned to the minimum block size.
.TP
-.BI offset_increment \fR=\fPint
+.BI offset_increment \fR=\fPint[%|z]
If this is provided, then the real offset becomes `\fBoffset\fR + \fBoffset_increment\fR
* thread_number', where the thread number is a counter that starts at 0 and
is incremented for each sub-job (i.e. when \fBnumjobs\fR option is
simulate a smaller amount of memory. The amount specified is per worker.
.SS "I/O size"
.TP
-.BI size \fR=\fPint
+.BI size \fR=\fPint[%|z]
The total size of file I/O for each thread of this job. Fio will run until
this many bytes has been transferred, unless runtime is limited by other options
(such as \fBruntime\fR, for instance, or increased/decreased by \fBio_size\fR).
Can be combined with \fBoffset\fR to constrain the start and end range
that I/O will be done within.
.TP
-.BI io_size \fR=\fPint "\fR,\fB io_limit" \fR=\fPint
+.BI io_size \fR=\fPint[%|z] "\fR,\fB io_limit" \fR=\fPint[%|z]
Normally fio operates within the region set by \fBsize\fR, which means
that the \fBsize\fR option sets both the region and size of I/O to be
performed. Sometimes that is not what you want. With this option, it is
I/O without transferring buffers between user-space and the kernel,
unless \fBverify\fR is set or \fBcuda_io\fR is \fBposix\fR. \fBiomem\fR must
not be \fBcudamalloc\fR. This ioengine defines engine specific options.
+.TP
+.B dfs
+I/O engine supporting asynchronous read and write operations to the DAOS File
+System (DFS) via libdfs.
.SS "I/O engine specific parameters"
In addition, there are some parameters which are only valid when a specific
\fBioengine\fR is in use. These are used identically to normal parameters,
the use of cudaMemcpy.
.RE
.RE
+.TP
+.BI (dfs)pool
+Specify the UUID of the DAOS pool to connect to.
+.TP
+.BI (dfs)cont
+Specify the UUID of the DAOS DAOS container to open.
+.TP
+.BI (dfs)chunk_size
+Specificy a different chunk size (in bytes) for the dfs file.
+Use DAOS container's chunk size by default.
+.TP
+.BI (dfs)object_class
+Specificy a different object class for the dfs file.
+Use DAOS container's object class by default.
.SS "I/O depth"
.TP
.BI iodepth \fR=\fPint
queue depth that meets \fBlatency_target\fR and exit. If true, fio will continue
running and try to meet \fBlatency_target\fR by adjusting queue depth.
.TP
-.BI max_latency \fR=\fPtime
+.BI max_latency \fR=\fPtime[,time][,time]
If set, fio will exit the job with an ETIMEDOUT error if it exceeds this
maximum latency. When the unit is omitted, the value is interpreted in
-microseconds.
+microseconds. Comma-separated values may be specified for reads, writes,
+and trims as described in \fBblocksize\fR.
.TP
.BI rate_cycle \fR=\fPint
Average bandwidth for \fBrate\fR and \fBrate_min\fR over this number
}
}
-static void fio_dump_options_free(struct thread_data *td)
-{
- while (!flist_empty(&td->opt_list)) {
- struct print_option *p;
-
- p = flist_first_entry(&td->opt_list, struct print_option, list);
- flist_del_init(&p->list);
- free(p->name);
- free(p->value);
- free(p);
- }
-}
-
static void copy_opt_list(struct thread_data *dst, struct thread_data *src)
{
struct flist_head *entry;
/*
* Fix these up to be nsec internally
*/
- o->max_latency *= 1000ULL;
+ for_each_rw_ddir(ddir)
+ o->max_latency[ddir] *= 1000ULL;
+
o->latency_target *= 1000ULL;
return ret;
return 0;
}
-static void lat_fatal(struct thread_data *td, struct io_completion_data *icd,
+static void lat_fatal(struct thread_data *td, struct io_u *io_u, struct io_completion_data *icd,
unsigned long long tnsec, unsigned long long max_nsec)
{
- if (!td->error)
- log_err("fio: latency of %llu nsec exceeds specified max (%llu nsec)\n", tnsec, max_nsec);
+ if (!td->error) {
+ log_err("fio: latency of %llu nsec exceeds specified max (%llu nsec): %s %s %llu %llu\n",
+ tnsec, max_nsec,
+ io_u->file->file_name,
+ io_ddir_name(io_u->ddir),
+ io_u->offset, io_u->buflen);
+ }
td_verror(td, ETIMEDOUT, "max latency exceeded");
icd->error = ETIMEDOUT;
}
icd->error = ops->io_u_lat(td, tnsec);
}
- if (td->o.max_latency && tnsec > td->o.max_latency)
- lat_fatal(td, icd, tnsec, td->o.max_latency);
- if (td->o.latency_target && tnsec > td->o.latency_target) {
- if (lat_target_failed(td))
- lat_fatal(td, icd, tnsec, td->o.latency_target);
+ if (ddir_rw(idx)) {
+ if (td->o.max_latency[idx] && tnsec > td->o.max_latency[idx])
+ lat_fatal(td, io_u, icd, tnsec, td->o.max_latency[idx]);
+ if (td->o.latency_target && tnsec > td->o.latency_target) {
+ if (lat_target_failed(td))
+ lat_fatal(td, io_u, icd, tnsec, td->o.latency_target);
+ }
}
}
#include "io_u.h"
#include "zbd_types.h"
-#define FIO_IOOPS_VERSION 28
+#define FIO_IOOPS_VERSION 29
#ifndef CONFIG_DYNAMIC_ENGINES
#define FIO_STATIC static
int (*invalidate)(struct thread_data *, struct fio_file *);
int (*unlink_file)(struct thread_data *, struct fio_file *);
int (*get_file_size)(struct thread_data *, struct fio_file *);
+ int (*prepopulate_file)(struct thread_data *, struct fio_file *);
void (*terminate)(struct thread_data *);
int (*iomem_alloc)(struct thread_data *, size_t);
void (*iomem_free)(struct thread_data *);
/*
* open iolog, check version, and call appropriate parser
*/
-static bool init_iolog_read(struct thread_data *td)
+static bool init_iolog_read(struct thread_data *td, char *fname)
{
- char buffer[256], *p, *fname;
+ char buffer[256], *p;
FILE *f = NULL;
- fname = get_name_by_idx(td->o.read_iolog_file, td->subjob_number);
dprint(FD_IO, "iolog: name=%s\n", fname);
if (is_socket(fname)) {
if (td->o.read_iolog_file) {
int need_swap;
+ char * fname = get_name_by_idx(td->o.read_iolog_file, td->subjob_number);
/*
* Check if it's a blktrace file and load that if possible.
* Otherwise assume it's a normal log file and load that.
*/
- if (is_blktrace(td->o.read_iolog_file, &need_swap))
- ret = load_blktrace(td, td->o.read_iolog_file, need_swap);
+ if (is_blktrace(fname, &need_swap))
+ ret = load_blktrace(td, fname, need_swap);
else
- ret = init_iolog_read(td);
+ ret = init_iolog_read(td, fname);
} else if (td->o.write_iolog_file)
ret = init_iolog_write(td);
else
.name = "libcufile I/O engine", /* libcufile */
.mask = FIO_OPT_G_LIBCUFILE,
},
+ {
+ .name = "DAOS File System (dfs) I/O engine", /* dfs */
+ .mask = FIO_OPT_G_DFS,
+ },
{
.name = NULL,
},
__FIO_OPT_G_FILESTAT,
__FIO_OPT_G_NR,
__FIO_OPT_G_LIBCUFILE,
+ __FIO_OPT_G_DFS,
FIO_OPT_G_RATE = (1ULL << __FIO_OPT_G_RATE),
FIO_OPT_G_ZONE = (1ULL << __FIO_OPT_G_ZONE),
FIO_OPT_G_IOURING = (1ULL << __FIO_OPT_G_IOURING),
FIO_OPT_G_FILESTAT = (1ULL << __FIO_OPT_G_FILESTAT),
FIO_OPT_G_LIBCUFILE = (1ULL << __FIO_OPT_G_LIBCUFILE),
+ FIO_OPT_G_DFS = (1ULL << __FIO_OPT_G_DFS),
};
extern const struct opt_group *opt_group_from_mask(uint64_t *mask);
if (parse_is_percent(v)) {
td->o.start_offset = 0;
td->o.start_offset_percent = -1ULL - v;
+ td->o.start_offset_nz = 0;
dprint(FD_PARSE, "SET start_offset_percent %d\n",
td->o.start_offset_percent);
+ } else if (parse_is_zone(v)) {
+ td->o.start_offset = 0;
+ td->o.start_offset_percent = 0;
+ td->o.start_offset_nz = v - ZONE_BASE_VAL;
} else
td->o.start_offset = v;
if (parse_is_percent(v)) {
td->o.offset_increment = 0;
td->o.offset_increment_percent = -1ULL - v;
+ td->o.offset_increment_nz = 0;
dprint(FD_PARSE, "SET offset_increment_percent %d\n",
td->o.offset_increment_percent);
+ } else if (parse_is_zone(v)) {
+ td->o.offset_increment = 0;
+ td->o.offset_increment_percent = 0;
+ td->o.offset_increment_nz = v - ZONE_BASE_VAL;
} else
td->o.offset_increment = v;
td->o.size_percent = -1ULL - v;
dprint(FD_PARSE, "SET size_percent %d\n",
td->o.size_percent);
+ } else if (parse_is_zone(v)) {
+ td->o.size = 0;
+ td->o.size_percent = 0;
+ td->o.size_nz = v - ZONE_BASE_VAL;
} else
td->o.size = v;
}
dprint(FD_PARSE, "SET io_size_percent %d\n",
td->o.io_size_percent);
+ } else if (parse_is_zone(v)) {
+ td->o.io_size = 0;
+ td->o.io_size_percent = 0;
+ td->o.io_size_nz = v - ZONE_BASE_VAL;
} else
td->o.io_size = v;
return 0;
}
+static int str_zoneskip_cb(void *data, unsigned long long *__val)
+{
+ struct thread_data *td = cb_data_to_td(data);
+ unsigned long long v = *__val;
+
+ if (parse_is_zone(v)) {
+ td->o.zone_skip = 0;
+ td->o.zone_skip_nz = v - ZONE_BASE_VAL;
+ } else
+ td->o.zone_skip = v;
+
+ return 0;
+}
+
static int str_write_bw_log_cb(void *data, const char *str)
{
struct thread_data *td = cb_data_to_td(data);
{ .ival = "nbd",
.help = "Network Block Device (NBD) IO engine"
},
+#ifdef CONFIG_DFS
+ { .ival = "dfs",
+ .help = "DAOS File System (dfs) IO engine",
+ },
+#endif
},
},
{
{
.name = "size",
.lname = "Size",
- .type = FIO_OPT_STR_VAL,
+ .type = FIO_OPT_STR_VAL_ZONE,
.cb = str_size_cb,
.off1 = offsetof(struct thread_options, size),
.help = "Total size of device or files",
- .interval = 1024 * 1024,
.category = FIO_OPT_C_IO,
.group = FIO_OPT_G_INVALID,
},
.name = "io_size",
.alias = "io_limit",
.lname = "IO Size",
- .type = FIO_OPT_STR_VAL,
+ .type = FIO_OPT_STR_VAL_ZONE,
.cb = str_io_size_cb,
.off1 = offsetof(struct thread_options, io_size),
.help = "Total size of I/O to be performed",
- .interval = 1024 * 1024,
.category = FIO_OPT_C_IO,
.group = FIO_OPT_G_INVALID,
},
.name = "offset",
.lname = "IO offset",
.alias = "fileoffset",
- .type = FIO_OPT_STR_VAL,
+ .type = FIO_OPT_STR_VAL_ZONE,
.cb = str_offset_cb,
.off1 = offsetof(struct thread_options, start_offset),
.help = "Start IO from this offset",
.def = "0",
- .interval = 1024 * 1024,
.category = FIO_OPT_C_IO,
.group = FIO_OPT_G_INVALID,
},
{
.name = "offset_increment",
.lname = "IO offset increment",
- .type = FIO_OPT_STR_VAL,
+ .type = FIO_OPT_STR_VAL_ZONE,
.cb = str_offset_increment_cb,
.off1 = offsetof(struct thread_options, offset_increment),
.help = "What is the increment from one offset to the next",
.parent = "offset",
.hide = 1,
.def = "0",
- .interval = 1024 * 1024,
.category = FIO_OPT_C_IO,
.group = FIO_OPT_G_INVALID,
},
{
.name = "zoneskip",
.lname = "Zone skip",
- .type = FIO_OPT_STR_VAL,
+ .type = FIO_OPT_STR_VAL_ZONE,
+ .cb = str_zoneskip_cb,
.off1 = offsetof(struct thread_options, zone_skip),
.help = "Space between IO zones",
.def = "0",
- .interval = 1024 * 1024,
.category = FIO_OPT_C_IO,
.group = FIO_OPT_G_ZONE,
},
{
.name = "max_latency",
.lname = "Max Latency (usec)",
- .type = FIO_OPT_STR_VAL_TIME,
- .off1 = offsetof(struct thread_options, max_latency),
+ .type = FIO_OPT_ULL,
+ .off1 = offsetof(struct thread_options, max_latency[DDIR_READ]),
+ .off2 = offsetof(struct thread_options, max_latency[DDIR_WRITE]),
+ .off3 = offsetof(struct thread_options, max_latency[DDIR_TRIM]),
.help = "Maximum tolerated IO latency (usec)",
.is_time = 1,
.category = FIO_OPT_C_IO,
}
}
+void fio_dump_options_free(struct thread_data *td)
+{
+ while (!flist_empty(&td->opt_list)) {
+ struct print_option *p;
+
+ p = flist_first_entry(&td->opt_list, struct print_option, list);
+ flist_del_init(&p->list);
+ free(p->name);
+ free(p->value);
+ free(p);
+ }
+}
+
struct fio_option *fio_option_find(const char *name)
{
return find_option(fio_options, name);
void del_opt_posval(const char *, const char *);
struct thread_data;
void fio_options_free(struct thread_data *);
+void fio_dump_options_free(struct thread_data *);
char *get_next_str(char **ptr);
int get_max_str_idx(char *input);
char* get_name_by_idx(char *input, int index);
"OPT_BOOL",
"OPT_FLOAT_LIST",
"OPT_STR_SET",
+ "OPT_STR_VAL_ZONE",
"OPT_DEPRECATED",
"OPT_SOFT_DEPRECATED",
"OPT_UNSUPPORTED",
fallthrough;
case FIO_OPT_ULL:
case FIO_OPT_INT:
- case FIO_OPT_STR_VAL: {
+ case FIO_OPT_STR_VAL:
+ case FIO_OPT_STR_VAL_ZONE:
+ {
fio_opt_str_val_fn *fn = o->cb;
char tmp[128], *p;
+ size_t len = strlen(ptr);
+
+ if (len > 0 && ptr[len - 1] == 'z') {
+ if (o->type == FIO_OPT_STR_VAL_ZONE) {
+ char *ep;
+ unsigned long long val;
+
+ errno = 0;
+ val = strtoul(ptr, &ep, 10);
+ if (errno == 0 && ep != ptr && *ep == 'z') {
+ ull = ZONE_BASE_VAL + (uint32_t)val;
+ ret = 0;
+ goto store_option_value;
+ } else {
+ log_err("%s: unexpected zone value '%s'\n",
+ o->name, ptr);
+ return 1;
+ }
+ } else {
+ log_err("%s: 'z' suffix isn't applicable\n",
+ o->name);
+ return 1;
+ }
+ }
if (!is_time && o->is_time)
is_time = o->is_time;
}
}
+store_option_value:
if (fn)
ret = fn(data, &ull);
else {
FIO_OPT_BOOL,
FIO_OPT_FLOAT_LIST,
FIO_OPT_STR_SET,
+ FIO_OPT_STR_VAL_ZONE,
FIO_OPT_DEPRECATED,
FIO_OPT_SOFT_DEPRECATED,
FIO_OPT_UNSUPPORTED, /* keep this last */
static inline int parse_is_percent(unsigned long long val)
{
- return val <= -1ULL && val >= (-1ULL - 100ULL);
+ return val >= -101;
}
+#define ZONE_BASE_VAL ((-1ULL >> 1) + 1)
static inline int parse_is_percent_uncapped(unsigned long long val)
{
- return (long long)val <= -1;
+ return ZONE_BASE_VAL + -1U < val;
+}
+
+static inline int parse_is_zone(unsigned long long val)
+{
+ return (val - ZONE_BASE_VAL) <= -1U;
}
struct print_option {
break;
}
flist_add_tail(&entry->list, &first->next);
- } while (ret != Z_STREAM_END);
+ }
ret = deflateEnd(&stream);
if (ret == Z_OK)
};
enum {
- FIO_SERVER_VER = 87,
+ FIO_SERVER_VER = 89,
FIO_SERVER_MAX_FRAGMENT_PDU = 1024,
FIO_SERVER_MAX_CMD_MB = 2048,
next_tail = tail = *ring->tail;
do {
next_tail++;
- read_barrier();
- if (next_tail == *ring->head)
+ if (next_tail == atomic_load_acquire(ring->head))
break;
index = tail & sq_ring_mask;
tail = next_tail;
} while (prepped < max_ios);
- if (*ring->tail != tail) {
- *ring->tail = tail;
- write_barrier();
- }
+ if (prepped)
+ atomic_store_release(ring->tail, tail);
return prepped;
}
struct file *f;
read_barrier();
- if (head == *ring->tail)
+ if (head == atomic_load_acquire(ring->tail))
break;
cqe = &ring->cqes[head & cq_ring_mask];
if (!do_nop) {
head++;
} while (1);
- s->inflight -= reaped;
- *ring->head = head;
- write_barrier();
+ if (reaped) {
+ s->inflight -= reaped;
+ atomic_store_release(ring->head, head);
+ }
return reaped;
}
prepped = 0;
do {
int to_wait, to_submit, this_reap, to_prep;
+ unsigned ring_flags = 0;
if (!prepped && s->inflight < depth) {
to_prep = min(depth - s->inflight, batch_submit);
* Only need to call io_uring_enter if we're not using SQ thread
* poll, or if IORING_SQ_NEED_WAKEUP is set.
*/
- if (!sq_thread_poll || (*ring->flags & IORING_SQ_NEED_WAKEUP)) {
+ if (sq_thread_poll)
+ ring_flags = atomic_load_acquire(ring->flags);
+ if (!sq_thread_poll || ring_flags & IORING_SQ_NEED_WAKEUP) {
unsigned flags = 0;
if (to_wait)
flags = IORING_ENTER_GETEVENTS;
- if ((*ring->flags & IORING_SQ_NEED_WAKEUP))
+ if (ring_flags & IORING_SQ_NEED_WAKEUP)
flags |= IORING_ENTER_SQ_WAKEUP;
ret = io_uring_enter(s, to_submit, to_wait, flags);
s->calls++;
+ } else {
+ /* for SQPOLL, we submitted it all effectively */
+ ret = to_submit;
}
/*
>> "${logfile}.${test_number}" 2>&1 || return $?
}
+# test 'z' suffix parsing only
+test55() {
+ local bs
+ bs=$((logical_block_size))
+
+ require_zbd || return $SKIP_TESTCASE
+ # offset=1z + offset_increment=10z + size=2z
+ require_seq_zones 13 || return $SKIP_TESTCASE
+
+ run_fio --name=j \
+ --filename=${dev} \
+ --direct=1 \
+ "$(ioengine "psync")" \
+ --zonemode=zbd \
+ --zonesize=${zone_size} \
+ --rw=write \
+ --bs=${bs} \
+ --numjobs=2 \
+ --offset_increment=10z \
+ --offset=1z \
+ --size=2z \
+ --io_size=3z \
+ ${job_var_opts[@]} --debug=zbd \
+ >> "${logfile}.${test_number}" 2>&1 || return $?
+}
+
+# test 'z' suffix parsing only
+test56() {
+ local bs
+ bs=$((logical_block_size))
+
+ require_regular_block_dev || return $SKIP_TESTCASE
+ require_seq_zones 10 || return $SKIP_TESTCASE
+
+ run_fio --name=j \
+ --filename=${dev} \
+ --direct=1 \
+ "$(ioengine "psync")" \
+ --zonemode=strided \
+ --zonesize=${zone_size} \
+ --rw=write \
+ --bs=${bs} \
+ --size=10z \
+ --zoneskip=2z \
+ ${job_var_opts[@]} --debug=zbd \
+ >> "${logfile}.${test_number}" 2>&1 || return $?
+}
+
SECONDS=0
tests=()
dynamic_analyzer=()
unsigned long long size;
unsigned long long io_size;
unsigned int size_percent;
+ unsigned int size_nz;
unsigned int io_size_percent;
+ unsigned int io_size_nz;
unsigned int fill_device;
unsigned int file_append;
unsigned long long file_size_low;
unsigned long long file_size_high;
unsigned long long start_offset;
unsigned long long start_offset_align;
+ unsigned int start_offset_nz;
unsigned long long bs[DDIR_RWDIR_CNT];
unsigned long long ba[DDIR_RWDIR_CNT];
unsigned long long zone_size;
unsigned long long zone_capacity;
unsigned long long zone_skip;
+ uint32_t zone_skip_nz;
enum fio_zone_mode zone_mode;
unsigned long long lockmem;
enum fio_memtype mem_type;
unsigned int mem_align;
- unsigned long long max_latency;
+ unsigned long long max_latency[DDIR_RWDIR_CNT];
unsigned int exit_what;
unsigned int stonewall;
unsigned int gid;
unsigned int offset_increment_percent;
+ unsigned int offset_increment_nz;
unsigned long long offset_increment;
unsigned long long number_ios;
uint64_t size;
uint64_t io_size;
uint32_t size_percent;
+ uint32_t size_nz;
uint32_t io_size_percent;
+ uint32_t io_size_nz;
uint32_t fill_device;
uint32_t file_append;
uint32_t unique_filename;
+ uint32_t pad3;
uint64_t file_size_low;
uint64_t file_size_high;
uint64_t start_offset;
uint64_t start_offset_align;
+ uint32_t start_offset_nz;
+ uint32_t pad4;
uint64_t bs[DDIR_RWDIR_CNT];
uint64_t ba[DDIR_RWDIR_CNT];
struct zone_split zone_split[DDIR_RWDIR_CNT][ZONESPLIT_MAX];
uint32_t zone_split_nr[DDIR_RWDIR_CNT];
- uint8_t pad1[4];
-
fio_fp64_t zipf_theta;
fio_fp64_t pareto_h;
fio_fp64_t gauss_dev;
uint64_t zone_capacity;
uint64_t zone_skip;
uint64_t lockmem;
+ uint32_t zone_skip_nz;
uint32_t mem_type;
uint32_t mem_align;
uint32_t new_group;
uint32_t numjobs;
- uint8_t pad3[4];
-
/*
* We currently can't convert these, so don't enable them
*/
uint32_t gid;
uint32_t offset_increment_percent;
+ uint32_t offset_increment_nz;
uint64_t offset_increment;
uint64_t number_ios;
uint64_t latency_target;
uint64_t latency_window;
- uint64_t max_latency;
+ uint64_t max_latency[DDIR_RWDIR_CNT];
+ uint32_t pad5;
fio_fp64_t latency_percentile;
uint32_t latency_run;
return false;
}
- if (td->o.zone_skip &&
- (td->o.zone_skip < td->o.zone_size ||
- td->o.zone_skip % td->o.zone_size)) {
+ if (td->o.zone_skip % td->o.zone_size) {
log_err("%s: zoneskip %llu is not a multiple of the device zone size %llu.\n",
f->file_name, (unsigned long long) td->o.zone_skip,
(unsigned long long) td->o.zone_size);
{
struct thread_data *td;
struct fio_file *f;
- uint32_t zone_size;
int i, j, k;
for_each_td(td, i) {
for_each_file(td, f, j) {
+ uint64_t zone_size;
+
if (!f->zbd_info)
continue;
zone_size = f->zbd_info->zone_size;
for (k = 0; k < FIO_ARRAY_SIZE(td->o.bs); k++) {
if (td->o.verify != VERIFY_NONE &&
zone_size % td->o.bs[k] != 0) {
- log_info("%s: block size %llu is not a divisor of the zone size %d\n",
+ log_info("%s: block size %llu is not a divisor of the zone size %llu\n",
f->file_name, td->o.bs[k],
- zone_size);
+ (unsigned long long)zone_size);
return false;
}
}
static int zbd_reset_zone(struct thread_data *td, struct fio_file *f,
struct fio_zone_info *z);
-int zbd_setup_files(struct thread_data *td)
+int zbd_init_files(struct thread_data *td)
{
struct fio_file *f;
int i;
if (zbd_init_zone_info(td, f))
return 1;
}
+ return 0;
+}
+
+void zbd_recalc_options_with_zone_granularity(struct thread_data *td)
+{
+ struct fio_file *f;
+ int i;
+
+ for_each_file(td, f, i) {
+ struct zoned_block_device_info *zbd = f->zbd_info;
+ // zonemode=strided doesn't get per-file zone size.
+ uint64_t zone_size = zbd ? zbd->zone_size : td->o.zone_size;
+
+ if (zone_size == 0)
+ continue;
+
+ if (td->o.size_nz > 0) {
+ td->o.size = td->o.size_nz * zone_size;
+ }
+ if (td->o.io_size_nz > 0) {
+ td->o.io_size = td->o.io_size_nz * zone_size;
+ }
+ if (td->o.start_offset_nz > 0) {
+ td->o.start_offset = td->o.start_offset_nz * zone_size;
+ }
+ if (td->o.offset_increment_nz > 0) {
+ td->o.offset_increment = td->o.offset_increment_nz * zone_size;
+ }
+ if (td->o.zone_skip_nz > 0) {
+ td->o.zone_skip = td->o.zone_skip_nz * zone_size;
+ }
+ }
+}
+
+int zbd_setup_files(struct thread_data *td)
+{
+ struct fio_file *f;
+ int i;
if (!zbd_using_direct_io()) {
log_err("Using direct I/O is mandatory for writing to ZBD drives\n\n");
struct fio_zone_info zone_info[0];
};
+int zbd_init_files(struct thread_data *td);
+void zbd_recalc_options_with_zone_granularity(struct thread_data *td);
int zbd_setup_files(struct thread_data *td);
void zbd_free_zone_info(struct fio_file *f);
void zbd_file_reset(struct thread_data *td, struct fio_file *f);