Merge branch 'add-librpma-engines' of https://github.com/janekmi/fio
authorJens Axboe <axboe@kernel.dk>
Thu, 18 Mar 2021 14:47:05 +0000 (08:47 -0600)
committerJens Axboe <axboe@kernel.dk>
Thu, 18 Mar 2021 14:47:05 +0000 (08:47 -0600)
* 'add-librpma-engines' of https://github.com/janekmi/fio:
  rpma: add librpma_apm_* and librpma_gpspm_* engines

33 files changed:
.gitignore
FIO-VERSION-GEN
HOWTO
Makefile
backend.c
cconv.c
configure
engines/dfs.c [new file with mode: 0644]
engines/falloc.c
engines/filecreate.c
engines/io_uring.c
engines/libpmem.c
examples/dfs.fio [new file with mode: 0644]
file.h
filesetup.c
fio.1
init.c
io_u.c
ioengines.h
iolog.c
optgroup.c
optgroup.h
options.c
options.h
parse.c
parse.h
server.c
server.h
t/io_uring.c
t/zbd/test-zbd-support
thread_options.h
zbd.c
zbd.h

index 0aa4a3611c031024f631418fee0fad1ba94d0cae..6651f96edc72ea3295c75cc9f9628eea9e267386 100644 (file)
@@ -30,3 +30,4 @@ doc/output
 /tags
 /TAGS
 /t/zbd/test-zbd-support.log.*
+/t/fuzz/fuzz_parseini
index 81a6355b981b1694bb79c3f073de9e693778e3b4..294860716cb75dc6e5dd099c7bc2cbb41cbae609 100755 (executable)
@@ -1,7 +1,7 @@
 #!/bin/sh
 
 GVF=FIO-VERSION-FILE
-DEF_VER=fio-3.25
+DEF_VER=fio-3.26
 
 LF='
 '
diff --git a/HOWTO b/HOWTO
index 39f8c63dc714bb87bee9537c80427982158a98a7..6219da663417b6d053e1794b77406fa8f805cd33 100644 (file)
--- a/HOWTO
+++ b/HOWTO
@@ -2067,6 +2067,9 @@ I/O engine
                        unless :option:`verify` is set or :option:`cuda_io` is `posix`.
                        :option:`iomem` must not be `cudamalloc`. This ioengine defines
                        engine specific options.
+               **dfs**
+                       I/O engine supporting asynchronous read and write operations to the
+                       DAOS File System (DFS) via libdfs.
 
 I/O engine specific parameters
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -2461,6 +2464,24 @@ with the caveat that when used on the command line, they must come after the
                GPU to RAM before a write and copied from RAM to GPU after a
                read. :option:`verify` does not affect use of cudaMemcpy.
 
+.. option:: pool=str : [dfs]
+
+       Specify the UUID of the DAOS pool to connect to.
+
+.. option:: cont=str : [dfs]
+
+       Specify the UUID of the DAOS container to open.
+
+.. option:: chunk_size=int : [dfs]
+
+       Specificy a different chunk size (in bytes) for the dfs file.
+       Use DAOS container's chunk size by default.
+
+.. option:: object_class=str : [dfs]
+
+       Specificy a different object class for the dfs file.
+       Use DAOS container's object class by default.
+
 I/O depth
 ~~~~~~~~~
 
@@ -2672,11 +2693,12 @@ I/O latency
        true, fio will continue running and try to meet :option:`latency_target`
        by adjusting queue depth.
 
-.. option:: max_latency=time
+.. option:: max_latency=time[,time][,time]
 
        If set, fio will exit the job with an ETIMEDOUT error if it exceeds this
        maximum latency. When the unit is omitted, the value is interpreted in
-       microseconds.
+       microseconds. Comma-separated values may be specified for reads, writes,
+       and trims as described in :option:`blocksize`.
 
 .. option:: rate_cycle=int
 
index 1aa9f37785e91c73a1644d4bebe5dce2d221ce04..fce3d0d134cffad9643615988e50c308057ad2a5 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -145,6 +145,11 @@ ifdef CONFIG_HTTP
   http_LIBS = -lcurl -lssl -lcrypto
   ENGINES += http
 endif
+ifdef CONFIG_DFS
+  dfs_SRCS = engines/dfs.c
+  dfs_LIBS = -luuid -ldaos -ldfs
+  ENGINES += dfs
+endif
 SOURCE += oslib/asprintf.c
 ifndef CONFIG_STRSEP
   SOURCE += oslib/strsep.c
index f2efddd67d365dc9c4300892275caaa88796a2e1..52b4ca7e1c83e8feda885a8d3d84219f5f784190 100644 (file)
--- a/backend.c
+++ b/backend.c
@@ -2537,6 +2537,7 @@ int fio_backend(struct sk_out *sk_out)
        for_each_td(td, i) {
                steadystate_free(td);
                fio_options_free(td);
+               fio_dump_options_free(td);
                if (td->rusage_sem) {
                        fio_sem_remove(td->rusage_sem);
                        td->rusage_sem = NULL;
diff --git a/cconv.c b/cconv.c
index b10868fb3de6b2dd0844799ab36fd145ff68448e..aa06e3ea6ee7004cc4b65d99839a83f492766d9c 100644 (file)
--- a/cconv.c
+++ b/cconv.c
@@ -143,6 +143,8 @@ void convert_thread_options_to_cpu(struct thread_options *o,
                o->rate_iops_min[i] = le32_to_cpu(top->rate_iops_min[i]);
 
                o->perc_rand[i] = le32_to_cpu(top->perc_rand[i]);
+
+               o->max_latency[i] = le64_to_cpu(top->max_latency[i]);
        }
 
        o->ratecycle = le32_to_cpu(top->ratecycle);
@@ -289,7 +291,6 @@ void convert_thread_options_to_cpu(struct thread_options *o,
        o->sync_file_range = le32_to_cpu(top->sync_file_range);
        o->latency_target = le64_to_cpu(top->latency_target);
        o->latency_window = le64_to_cpu(top->latency_window);
-       o->max_latency = le64_to_cpu(top->max_latency);
        o->latency_percentile.u.f = fio_uint64_to_double(le64_to_cpu(top->latency_percentile.u.i));
        o->latency_run = le32_to_cpu(top->latency_run);
        o->compress_percentage = le32_to_cpu(top->compress_percentage);
@@ -491,7 +492,6 @@ void convert_thread_options_to_net(struct thread_options_pack *top,
        top->sync_file_range = cpu_to_le32(o->sync_file_range);
        top->latency_target = __cpu_to_le64(o->latency_target);
        top->latency_window = __cpu_to_le64(o->latency_window);
-       top->max_latency = __cpu_to_le64(o->max_latency);
        top->latency_percentile.u.i = __cpu_to_le64(fio_double_to_uint64(o->latency_percentile.u.f));
        top->latency_run = __cpu_to_le32(o->latency_run);
        top->compress_percentage = cpu_to_le32(o->compress_percentage);
@@ -550,6 +550,8 @@ void convert_thread_options_to_net(struct thread_options_pack *top,
                top->rate_iops_min[i] = cpu_to_le32(o->rate_iops_min[i]);
 
                top->perc_rand[i] = cpu_to_le32(o->perc_rand[i]);
+
+               top->max_latency[i] = __cpu_to_le64(o->max_latency[i]);
        }
 
        memcpy(top->verify_pattern, o->verify_pattern, MAX_PATTERN_SIZE);
index 1bbdb8c4a4fca44759eebb3c1ee562c9ce895dbb..2f5ac91fb45387b6f6139b2582c266996eca7214 100755 (executable)
--- a/configure
+++ b/configure
@@ -171,6 +171,7 @@ march_set="no"
 libiscsi="no"
 libnbd="no"
 libzbc=""
+dfs=""
 dynamic_engines="no"
 prefix=/usr/local
 
@@ -242,6 +243,8 @@ for opt do
   ;;
   --dynamic-libengines) dynamic_engines="yes"
   ;;
+  --disable-dfs) dfs="no"
+  ;;
   --help)
     show_help="yes"
     ;;
@@ -284,6 +287,7 @@ if test "$show_help" = "yes" ; then
   echo "--disable-libzbc        Disable libzbc even if found"
   echo "--disable-tcmalloc     Disable tcmalloc support"
   echo "--dynamic-libengines   Lib-based ioengines as dynamic libraries"
+  echo "--disable-dfs          Disable DAOS File System support even if found"
   exit $exit_val
 fi
 
@@ -413,6 +417,7 @@ CYGWIN*)
   clock_gettime="yes" # clock_monotonic probe has dependency on this
   clock_monotonic="yes"
   sched_idle="yes"
+  pthread_condattr_setclock="no"
   ;;
 esac
 
@@ -758,10 +763,8 @@ print_config "POSIX pshared support" "$posix_pshared"
 
 ##########################################
 # POSIX pthread_condattr_setclock() probe
-if test "$pthread_condattr_setclock" != "yes" ; then
-  pthread_condattr_setclock="no"
-fi
-cat > $TMPC <<EOF
+if test "$pthread_condattr_setclock" != "no" ; then
+  cat > $TMPC <<EOF
 #include <pthread.h>
 int main(void)
 {
@@ -770,11 +773,12 @@ int main(void)
   return 0;
 }
 EOF
-if compile_prog "" "$LIBS" "pthread_condattr_setclock" ; then
-  pthread_condattr_setclock=yes
-elif compile_prog "" "$LIBS -lpthread" "pthread_condattr_setclock" ; then
-  pthread_condattr_setclock=yes
-  LIBS="$LIBS -lpthread"
+  if compile_prog "" "$LIBS" "pthread_condattr_setclock" ; then
+    pthread_condattr_setclock=yes
+  elif compile_prog "" "$LIBS -lpthread" "pthread_condattr_setclock" ; then
+    pthread_condattr_setclock=yes
+    LIBS="$LIBS -lpthread"
+  fi
 fi
 print_config "pthread_condattr_setclock()" "$pthread_condattr_setclock"
 
@@ -2222,6 +2226,33 @@ if test "$libnbd" != "no" ; then
 fi
 print_config "NBD engine" "$libnbd"
 
+##########################################
+# check for dfs (DAOS File System)
+if test "$dfs" != "no" ; then
+  cat > $TMPC << EOF
+#include <fcntl.h>
+#include <daos.h>
+#include <daos_fs.h>
+
+int main(int argc, char **argv)
+{
+  daos_handle_t        poh;
+  daos_handle_t        coh;
+  dfs_t                *dfs;
+
+  (void) dfs_mount(poh, coh, O_RDWR, &dfs);
+
+  return 0;
+}
+EOF
+  if compile_prog "" "-luuid -ldfs -ldaos" "dfs"; then
+    dfs="yes"
+  else
+    dfs="no"
+  fi
+fi
+print_config "DAOS File System (dfs) Engine" "$dfs"
+
 ##########################################
 # Check if we have lex/yacc available
 yacc="no"
@@ -2846,12 +2877,6 @@ fi
 if test "$clock_monotonic" = "yes" ; then
   output_sym "CONFIG_CLOCK_MONOTONIC"
 fi
-if test "$clock_monotonic_raw" = "yes" ; then
-  output_sym "CONFIG_CLOCK_MONOTONIC_RAW"
-fi
-if test "$clock_monotonic_precise" = "yes" ; then
-  output_sym "CONFIG_CLOCK_MONOTONIC_PRECISE"
-fi
 if test "$clockid_t" = "yes"; then
   output_sym "CONFIG_CLOCKID_T"
 fi
@@ -3046,6 +3071,9 @@ fi
 if test "$libcufile" = "yes" ; then
   output_sym "CONFIG_LIBCUFILE"
 fi
+if test "$dfs" = "yes" ; then
+  output_sym "CONFIG_DFS"
+fi
 if test "$march_set" = "no" && test "$build_native" = "yes" ; then
   output_sym "CONFIG_BUILD_NATIVE"
 fi
diff --git a/engines/dfs.c b/engines/dfs.c
new file mode 100644 (file)
index 0000000..0343b10
--- /dev/null
@@ -0,0 +1,583 @@
+/**
+ * FIO engine for DAOS File System (dfs).
+ *
+ * (C) Copyright 2020-2021 Intel Corporation.
+ */
+
+#include <fio.h>
+#include <optgroup.h>
+
+#include <daos.h>
+#include <daos_fs.h>
+
+static bool            daos_initialized;
+static int             num_threads;
+static pthread_mutex_t daos_mutex = PTHREAD_MUTEX_INITIALIZER;
+daos_handle_t          poh;  /* pool handle */
+daos_handle_t          coh;  /* container handle */
+daos_oclass_id_t       cid = OC_UNKNOWN;  /* object class */
+dfs_t                  *dfs; /* dfs mount reference */
+
+struct daos_iou {
+       struct io_u     *io_u;
+       daos_event_t    ev;
+       d_sg_list_t     sgl;
+       d_iov_t         iov;
+       daos_size_t     size;
+       bool            complete;
+};
+
+struct daos_data {
+       daos_handle_t   eqh;
+       dfs_obj_t       *obj;
+       struct io_u     **io_us;
+       int             queued;
+       int             num_ios;
+};
+
+struct daos_fio_options {
+       void            *pad;
+       char            *pool;   /* Pool UUID */
+       char            *cont;   /* Container UUID */
+       daos_size_t     chsz;    /* Chunk size */
+       char            *oclass; /* object class */
+#if !defined(DAOS_API_VERSION_MAJOR) || DAOS_API_VERSION_MAJOR < 1
+       char            *svcl;   /* service replica list, deprecated */
+#endif
+};
+
+static struct fio_option options[] = {
+       {
+               .name           = "pool",
+               .lname          = "pool uuid",
+               .type           = FIO_OPT_STR_STORE,
+               .off1           = offsetof(struct daos_fio_options, pool),
+               .help           = "DAOS pool uuid",
+               .category       = FIO_OPT_C_ENGINE,
+               .group          = FIO_OPT_G_DFS,
+       },
+       {
+               .name           = "cont",
+               .lname          = "container uuid",
+               .type           = FIO_OPT_STR_STORE,
+               .off1           = offsetof(struct daos_fio_options, cont),
+               .help           = "DAOS container uuid",
+               .category       = FIO_OPT_C_ENGINE,
+               .group          = FIO_OPT_G_DFS,
+       },
+       {
+               .name           = "chunk_size",
+               .lname          = "DFS chunk size",
+               .type           = FIO_OPT_ULL,
+               .off1           = offsetof(struct daos_fio_options, chsz),
+               .help           = "DFS chunk size in bytes",
+               .def            = "0", /* use container default */
+               .category       = FIO_OPT_C_ENGINE,
+               .group          = FIO_OPT_G_DFS,
+       },
+       {
+               .name           = "object_class",
+               .lname          = "object class",
+               .type           = FIO_OPT_STR_STORE,
+               .off1           = offsetof(struct daos_fio_options, oclass),
+               .help           = "DAOS object class",
+               .category       = FIO_OPT_C_ENGINE,
+               .group          = FIO_OPT_G_DFS,
+       },
+#if !defined(DAOS_API_VERSION_MAJOR) || DAOS_API_VERSION_MAJOR < 1
+       {
+               .name           = "svcl",
+               .lname          = "List of service ranks",
+               .type           = FIO_OPT_STR_STORE,
+               .off1           = offsetof(struct daos_fio_options, svcl),
+               .help           = "List of pool replicated service ranks",
+               .category       = FIO_OPT_C_ENGINE,
+               .group          = FIO_OPT_G_DFS,
+       },
+#endif
+       {
+               .name           = NULL,
+       },
+};
+
+static int daos_fio_global_init(struct thread_data *td)
+{
+       struct daos_fio_options *eo = td->eo;
+       uuid_t                  pool_uuid, co_uuid;
+       daos_pool_info_t        pool_info;
+       daos_cont_info_t        co_info;
+       int                     rc = 0;
+
+#if !defined(DAOS_API_VERSION_MAJOR) || DAOS_API_VERSION_MAJOR < 1
+       if (!eo->pool || !eo->cont || !eo->svcl) {
+#else
+       if (!eo->pool || !eo->cont) {
+#endif
+               log_err("Missing required DAOS options\n");
+               return EINVAL;
+       }
+
+       rc = daos_init();
+       if (rc != -DER_ALREADY && rc) {
+               log_err("Failed to initialize daos %d\n", rc);
+               td_verror(td, rc, "daos_init");
+               return rc;
+       }
+
+       rc = uuid_parse(eo->pool, pool_uuid);
+       if (rc) {
+               log_err("Failed to parse 'Pool uuid': %s\n", eo->pool);
+               td_verror(td, EINVAL, "uuid_parse(eo->pool)");
+               return EINVAL;
+       }
+
+       rc = uuid_parse(eo->cont, co_uuid);
+       if (rc) {
+               log_err("Failed to parse 'Cont uuid': %s\n", eo->cont);
+               td_verror(td, EINVAL, "uuid_parse(eo->cont)");
+               return EINVAL;
+       }
+
+       /* Connect to the DAOS pool */
+#if !defined(DAOS_API_VERSION_MAJOR) || DAOS_API_VERSION_MAJOR < 1
+       d_rank_list_t *svcl = NULL;
+
+       svcl = daos_rank_list_parse(eo->svcl, ":");
+       if (svcl == NULL) {
+               log_err("Failed to parse svcl\n");
+               td_verror(td, EINVAL, "daos_rank_list_parse");
+               return EINVAL;
+       }
+
+       rc = daos_pool_connect(pool_uuid, NULL, svcl, DAOS_PC_RW,
+                       &poh, &pool_info, NULL);
+       d_rank_list_free(svcl);
+#else
+       rc = daos_pool_connect(pool_uuid, NULL, DAOS_PC_RW, &poh, &pool_info,
+                              NULL);
+#endif
+       if (rc) {
+               log_err("Failed to connect to pool %d\n", rc);
+               td_verror(td, rc, "daos_pool_connect");
+               return rc;
+       }
+
+       /* Open the DAOS container */
+       rc = daos_cont_open(poh, co_uuid, DAOS_COO_RW, &coh, &co_info, NULL);
+       if (rc) {
+               log_err("Failed to open container: %d\n", rc);
+               td_verror(td, rc, "daos_cont_open");
+               (void)daos_pool_disconnect(poh, NULL);
+               return rc;
+       }
+
+       /* Mount encapsulated filesystem */
+       rc = dfs_mount(poh, coh, O_RDWR, &dfs);
+       if (rc) {
+               log_err("Failed to mount DFS namespace: %d\n", rc);
+               td_verror(td, rc, "dfs_mount");
+               (void)daos_pool_disconnect(poh, NULL);
+               (void)daos_cont_close(coh, NULL);
+               return rc;
+       }
+
+       /* Retrieve object class to use, if specified */
+       if (eo->oclass)
+               cid = daos_oclass_name2id(eo->oclass);
+
+       return 0;
+}
+
+static int daos_fio_global_cleanup()
+{
+       int rc;
+       int ret = 0;
+
+       rc = dfs_umount(dfs);
+       if (rc) {
+               log_err("failed to umount dfs: %d\n", rc);
+               ret = rc;
+       }
+       rc = daos_cont_close(coh, NULL);
+       if (rc) {
+               log_err("failed to close container: %d\n", rc);
+               if (ret == 0)
+                       ret = rc;
+       }
+       rc = daos_pool_disconnect(poh, NULL);
+       if (rc) {
+               log_err("failed to disconnect pool: %d\n", rc);
+               if (ret == 0)
+                       ret = rc;
+       }
+       rc = daos_fini();
+       if (rc) {
+               log_err("failed to finalize daos: %d\n", rc);
+               if (ret == 0)
+                       ret = rc;
+       }
+
+       return ret;
+}
+
+static int daos_fio_setup(struct thread_data *td)
+{
+       return 0;
+}
+
+static int daos_fio_init(struct thread_data *td)
+{
+       struct daos_data        *dd;
+       int                     rc = 0;
+
+       pthread_mutex_lock(&daos_mutex);
+
+       dd = malloc(sizeof(*dd));
+       if (dd == NULL) {
+               log_err("Failed to allocate DAOS-private data\n");
+               rc = ENOMEM;
+               goto out;
+       }
+
+       dd->queued      = 0;
+       dd->num_ios     = td->o.iodepth;
+       dd->io_us       = calloc(dd->num_ios, sizeof(struct io_u *));
+       if (dd->io_us == NULL) {
+               log_err("Failed to allocate IO queue\n");
+               rc = ENOMEM;
+               goto out;
+       }
+
+       /* initialize DAOS stack if not already up */
+       if (!daos_initialized) {
+               rc = daos_fio_global_init(td);
+               if (rc)
+                       goto out;
+               daos_initialized = true;
+       }
+
+       rc = daos_eq_create(&dd->eqh);
+       if (rc) {
+               log_err("Failed to create event queue: %d\n", rc);
+               td_verror(td, rc, "daos_eq_create");
+               goto out;
+       }
+
+       td->io_ops_data = dd;
+       num_threads++;
+out:
+       if (rc) {
+               if (dd) {
+                       free(dd->io_us);
+                       free(dd);
+               }
+               if (num_threads == 0 && daos_initialized) {
+                       /* don't clobber error return value */
+                       (void)daos_fio_global_cleanup();
+                       daos_initialized = false;
+               }
+       }
+       pthread_mutex_unlock(&daos_mutex);
+       return rc;
+}
+
+static void daos_fio_cleanup(struct thread_data *td)
+{
+       struct daos_data        *dd = td->io_ops_data;
+       int                     rc;
+
+       if (dd == NULL)
+               return;
+
+       rc = daos_eq_destroy(dd->eqh, DAOS_EQ_DESTROY_FORCE);
+       if (rc < 0) {
+               log_err("failed to destroy event queue: %d\n", rc);
+               td_verror(td, rc, "daos_eq_destroy");
+       }
+
+       free(dd->io_us);
+       free(dd);
+
+       pthread_mutex_lock(&daos_mutex);
+       num_threads--;
+       if (daos_initialized && num_threads == 0) {
+               int ret;
+
+               ret = daos_fio_global_cleanup();
+               if (ret < 0 && rc == 0) {
+                       log_err("failed to clean up: %d\n", ret);
+                       td_verror(td, ret, "daos_fio_global_cleanup");
+               }
+               daos_initialized = false;
+       }
+       pthread_mutex_unlock(&daos_mutex);
+}
+
+static int daos_fio_get_file_size(struct thread_data *td, struct fio_file *f)
+{
+       char            *file_name = f->file_name;
+       struct stat     stbuf = {0};
+       int             rc;
+
+       dprint(FD_FILE, "dfs stat %s\n", f->file_name);
+
+       if (!daos_initialized)
+               return 0;
+
+       rc = dfs_stat(dfs, NULL, file_name, &stbuf);
+       if (rc) {
+               log_err("Failed to stat %s: %d\n", f->file_name, rc);
+               td_verror(td, rc, "dfs_stat");
+               return rc;
+       }
+
+       f->real_file_size = stbuf.st_size;
+       return 0;
+}
+
+static int daos_fio_close(struct thread_data *td, struct fio_file *f)
+{
+       struct daos_data        *dd = td->io_ops_data;
+       int                     rc;
+
+       dprint(FD_FILE, "dfs release %s\n", f->file_name);
+
+       rc = dfs_release(dd->obj);
+       if (rc) {
+               log_err("Failed to release %s: %d\n", f->file_name, rc);
+               td_verror(td, rc, "dfs_release");
+               return rc;
+       }
+
+       return 0;
+}
+
+static int daos_fio_open(struct thread_data *td, struct fio_file *f)
+{
+       struct daos_data        *dd = td->io_ops_data;
+       struct daos_fio_options *eo = td->eo;
+       int                     flags = 0;
+       int                     rc;
+
+       dprint(FD_FILE, "dfs open %s (%s/%d/%d)\n",
+              f->file_name, td_write(td) & !read_only ? "rw" : "r",
+              td->o.create_on_open, td->o.allow_create);
+
+       if (td->o.create_on_open && td->o.allow_create)
+               flags |= O_CREAT;
+
+       if (td_write(td)) {
+               if (!read_only)
+                       flags |= O_RDWR;
+               if (td->o.allow_create)
+                       flags |= O_CREAT;
+       } else if (td_read(td)) {
+               flags |= O_RDONLY;
+       }
+
+       rc = dfs_open(dfs, NULL, f->file_name,
+                     S_IFREG | S_IRUSR | S_IWUSR,
+                     flags, cid, eo->chsz, NULL, &dd->obj);
+       if (rc) {
+               log_err("Failed to open %s: %d\n", f->file_name, rc);
+               td_verror(td, rc, "dfs_open");
+               return rc;
+       }
+
+       return 0;
+}
+
+static int daos_fio_unlink(struct thread_data *td, struct fio_file *f)
+{
+       int rc;
+
+       dprint(FD_FILE, "dfs remove %s\n", f->file_name);
+
+       rc = dfs_remove(dfs, NULL, f->file_name, false, NULL);
+       if (rc) {
+               log_err("Failed to remove %s: %d\n", f->file_name, rc);
+               td_verror(td, rc, "dfs_remove");
+               return rc;
+       }
+
+       return 0;
+}
+
+static int daos_fio_invalidate(struct thread_data *td, struct fio_file *f)
+{
+       dprint(FD_FILE, "dfs invalidate %s\n", f->file_name);
+       return 0;
+}
+
+static void daos_fio_io_u_free(struct thread_data *td, struct io_u *io_u)
+{
+       struct daos_iou *io = io_u->engine_data;
+
+       if (io) {
+               io_u->engine_data = NULL;
+               free(io);
+       }
+}
+
+static int daos_fio_io_u_init(struct thread_data *td, struct io_u *io_u)
+{
+       struct daos_iou *io;
+
+       io = malloc(sizeof(struct daos_iou));
+       if (!io) {
+               td_verror(td, ENOMEM, "malloc");
+               return ENOMEM;
+       }
+       io->io_u = io_u;
+       io_u->engine_data = io;
+       return 0;
+}
+
+static struct io_u * daos_fio_event(struct thread_data *td, int event)
+{
+       struct daos_data *dd = td->io_ops_data;
+
+       return dd->io_us[event];
+}
+
+static int daos_fio_getevents(struct thread_data *td, unsigned int min,
+                             unsigned int max, const struct timespec *t)
+{
+       struct daos_data        *dd = td->io_ops_data;
+       daos_event_t            *evp[max];
+       unsigned int            events = 0;
+       int                     i;
+       int                     rc;
+
+       while (events < min) {
+               rc = daos_eq_poll(dd->eqh, 0, DAOS_EQ_NOWAIT, max, evp);
+               if (rc < 0) {
+                       log_err("Event poll failed: %d\n", rc);
+                       td_verror(td, rc, "daos_eq_poll");
+                       return events;
+               }
+
+               for (i = 0; i < rc; i++) {
+                       struct daos_iou *io;
+                       struct io_u     *io_u;
+
+                       io = container_of(evp[i], struct daos_iou, ev);
+                       if (io->complete)
+                               log_err("Completion on already completed I/O\n");
+
+                       io_u = io->io_u;
+                       if (io->ev.ev_error)
+                               io_u->error = io->ev.ev_error;
+                       else
+                               io_u->resid = 0;
+
+                       dd->io_us[events] = io_u;
+                       dd->queued--;
+                       daos_event_fini(&io->ev);
+                       io->complete = true;
+                       events++;
+               }
+       }
+
+       dprint(FD_IO, "dfs eq_pool returning %d (%u/%u)\n", events, min, max);
+
+       return events;
+}
+
+static enum fio_q_status daos_fio_queue(struct thread_data *td,
+                                       struct io_u *io_u)
+{
+       struct daos_data        *dd = td->io_ops_data;
+       struct daos_iou         *io = io_u->engine_data;
+       daos_off_t              offset = io_u->offset;
+       int                     rc;
+
+       if (dd->queued == td->o.iodepth)
+               return FIO_Q_BUSY;
+
+       io->sgl.sg_nr = 1;
+       io->sgl.sg_nr_out = 0;
+       d_iov_set(&io->iov, io_u->xfer_buf, io_u->xfer_buflen);
+       io->sgl.sg_iovs = &io->iov;
+       io->size = io_u->xfer_buflen;
+
+       io->complete = false;
+       rc = daos_event_init(&io->ev, dd->eqh, NULL);
+       if (rc) {
+               log_err("Event init failed: %d\n", rc);
+               io_u->error = rc;
+               return FIO_Q_COMPLETED;
+       }
+
+       switch (io_u->ddir) {
+       case DDIR_WRITE:
+               rc = dfs_write(dfs, dd->obj, &io->sgl, offset, &io->ev);
+               if (rc) {
+                       log_err("dfs_write failed: %d\n", rc);
+                       io_u->error = rc;
+                       return FIO_Q_COMPLETED;
+               }
+               break;
+       case DDIR_READ:
+               rc = dfs_read(dfs, dd->obj, &io->sgl, offset, &io->size,
+                             &io->ev);
+               if (rc) {
+                       log_err("dfs_read failed: %d\n", rc);
+                       io_u->error = rc;
+                       return FIO_Q_COMPLETED;
+               }
+               break;
+       case DDIR_SYNC:
+               io_u->error = 0;
+               return FIO_Q_COMPLETED;
+       default:
+               dprint(FD_IO, "Invalid IO type: %d\n", io_u->ddir);
+               io_u->error = -DER_INVAL;
+               return FIO_Q_COMPLETED;
+       }
+
+       dd->queued++;
+       return FIO_Q_QUEUED;
+}
+
+static int daos_fio_prep(struct thread_data fio_unused *td, struct io_u *io_u)
+{
+       return 0;
+}
+
+/* ioengine_ops for get_ioengine() */
+FIO_STATIC struct ioengine_ops ioengine = {
+       .name                   = "dfs",
+       .version                = FIO_IOOPS_VERSION,
+       .flags                  = FIO_DISKLESSIO | FIO_NODISKUTIL,
+
+       .setup                  = daos_fio_setup,
+       .init                   = daos_fio_init,
+       .prep                   = daos_fio_prep,
+       .cleanup                = daos_fio_cleanup,
+
+       .open_file              = daos_fio_open,
+       .invalidate             = daos_fio_invalidate,
+       .get_file_size          = daos_fio_get_file_size,
+       .close_file             = daos_fio_close,
+       .unlink_file            = daos_fio_unlink,
+
+       .queue                  = daos_fio_queue,
+       .getevents              = daos_fio_getevents,
+       .event                  = daos_fio_event,
+       .io_u_init              = daos_fio_io_u_init,
+       .io_u_free              = daos_fio_io_u_free,
+
+       .option_struct_size     = sizeof(struct daos_fio_options),
+       .options                = options,
+};
+
+static void fio_init fio_dfs_register(void)
+{
+       register_ioengine(&ioengine);
+}
+
+static void fio_exit fio_dfs_unregister(void)
+{
+       unregister_ioengine(&ioengine);
+}
index 6382569b9a92c42c1ad5d76a75d9441808de6cc8..4b05ed68fb467263ddbebabe7e79dd5231f92b63 100644 (file)
@@ -25,8 +25,8 @@ static int open_file(struct thread_data *td, struct fio_file *f)
 
        dprint(FD_FILE, "fd open %s\n", f->file_name);
 
-       if (f->filetype != FIO_TYPE_FILE) {
-               log_err("fio: only files are supported fallocate \n");
+       if (f->filetype != FIO_TYPE_FILE && f->filetype != FIO_TYPE_BLOCK) {
+               log_err("fio: only files and blockdev are supported fallocate \n");
                return 1;
        }
        if (!strcmp(f->file_name, "-")) {
index 5fec8544a6a79898fed4e6c4854ffd416234c66a..16c64928162654ad76c6697958b846eb5bb0d985 100644 (file)
@@ -22,7 +22,7 @@ static int open_file(struct thread_data *td, struct fio_file *f)
        dprint(FD_FILE, "fd open %s\n", f->file_name);
 
        if (f->filetype != FIO_TYPE_FILE) {
-               log_err("fio: only files are supported fallocate \n");
+               log_err("fio: only files are supported\n");
                return 1;
        }
        if (!strcmp(f->file_name, "-")) {
index 9ce2ae80df59f54dea66630b1182bc7b5fec35e3..b962e8041b6f8d113669b4b2a31224a68d19aa0f 100644 (file)
@@ -696,7 +696,11 @@ static int fio_ioring_post_init(struct thread_data *td)
 
        err = fio_ioring_queue_init(td);
        if (err) {
-               td_verror(td, errno, "io_queue_init");
+               int init_err = errno;
+
+               if (init_err == ENOSYS)
+                       log_err("fio: your kernel doesn't support io_uring\n");
+               td_verror(td, init_err, "io_queue_init");
                return 1;
        }
 
index eefb7767f3eab953f202236a395b096ef8f67636..2338f0fa24f42b4f8318ca9a4bc299283e28c8bf 100644 (file)
@@ -255,6 +255,7 @@ FIO_STATIC struct ioengine_ops ioengine = {
        .open_file      = fio_libpmem_open_file,
        .close_file     = fio_libpmem_close_file,
        .get_file_size  = generic_get_file_size,
+       .prepopulate_file = generic_prepopulate_file,
        .flags          = FIO_SYNCIO | FIO_RAWIO | FIO_DISKLESSIO | FIO_NOEXTEND |
                                FIO_NODISKUTIL | FIO_BARRIER | FIO_MEMALIGN,
 };
diff --git a/examples/dfs.fio b/examples/dfs.fio
new file mode 100644 (file)
index 0000000..5de887d
--- /dev/null
@@ -0,0 +1,33 @@
+[global]
+ioengine=dfs
+pool=${POOL}
+cont=${CONT}
+filename_format=fio-test.$jobnum
+
+cpus_allowed_policy=split
+group_reporting=1
+time_based=0
+percentile_list=99.0:99.9:99.99:99.999:99.9999:100
+disable_slat=1
+disable_clat=1
+
+bs=1M
+size=100G
+iodepth=16
+numjobs=16
+
+[daos-seqwrite]
+rw=write
+stonewall
+
+[daos-seqread]
+rw=read
+stonewall
+
+[daos-randwrite]
+rw=randwrite
+stonewall
+
+[daos-randread]
+rw=randread
+stonewall
diff --git a/file.h b/file.h
index 493ec04a6677e90f6c0d0182e0b082b31302451f..faf65a2a013846fb716f0b80403d6549b90120d2 100644 (file)
--- a/file.h
+++ b/file.h
@@ -207,6 +207,7 @@ extern "C" {
 extern int __must_check generic_open_file(struct thread_data *, struct fio_file *);
 extern int __must_check generic_close_file(struct thread_data *, struct fio_file *);
 extern int __must_check generic_get_file_size(struct thread_data *, struct fio_file *);
+extern int __must_check generic_prepopulate_file(struct thread_data *, struct fio_file *);
 #ifdef __cplusplus
 }
 #endif
index 9d0337578d2266355b8ff478c5153cff88c26d77..e664f8b42f795f4d03675e437870382a0243b13b 100644 (file)
@@ -338,6 +338,95 @@ error:
        return ret;
 }
 
+/*
+ * Generic function to prepopulate regular file with data.
+ * Useful if you want to make sure I/O engine has data to read.
+ * Leaves f->fd open on success, caller must close.
+ */
+int generic_prepopulate_file(struct thread_data *td, struct fio_file *f)
+{
+       int flags;
+       unsigned long long left, bs;
+       char *b = NULL;
+
+       /* generic function for regular files only */
+       assert(f->filetype == FIO_TYPE_FILE);
+
+       if (read_only) {
+               log_err("fio: refusing to write a file due to read-only\n");
+               return 0;
+       }
+
+       flags = O_WRONLY;
+       if (td->o.allow_create)
+               flags |= O_CREAT;
+
+#ifdef WIN32
+       flags |= _O_BINARY;
+#endif
+
+       dprint(FD_FILE, "open file %s, flags %x\n", f->file_name, flags);
+       f->fd = open(f->file_name, flags, 0644);
+       if (f->fd < 0) {
+               int err = errno;
+
+               if (err == ENOENT && !td->o.allow_create)
+                       log_err("fio: file creation disallowed by "
+                                       "allow_file_create=0\n");
+               else
+                       td_verror(td, err, "open");
+               return 1;
+       }
+
+       left = f->real_file_size;
+       bs = td->o.max_bs[DDIR_WRITE];
+       if (bs > left)
+               bs = left;
+
+       b = malloc(bs);
+       if (!b) {
+               td_verror(td, errno, "malloc");
+               goto err;
+       }
+
+       while (left && !td->terminate) {
+               ssize_t r;
+
+               if (bs > left)
+                       bs = left;
+
+               fill_io_buffer(td, b, bs, bs);
+
+               r = write(f->fd, b, bs);
+
+               if (r > 0) {
+                       left -= r;
+               } else {
+                       td_verror(td, errno, "write");
+                       goto err;
+               }
+       }
+
+       if (td->terminate) {
+               dprint(FD_FILE, "terminate unlink %s\n", f->file_name);
+               td_io_unlink_file(td, f);
+       } else if (td->o.create_fsync) {
+               if (fsync(f->fd) < 0) {
+                       td_verror(td, errno, "fsync");
+                       goto err;
+               }
+       }
+
+       free(b);
+       return 0;
+err:
+       close(f->fd);
+       f->fd = -1;
+       if (b)
+               free(b);
+       return 1;
+}
+
 unsigned long long get_rand_file_size(struct thread_data *td)
 {
        unsigned long long ret, sized;
@@ -1029,6 +1118,13 @@ int setup_files(struct thread_data *td)
        if (o->read_iolog_file)
                goto done;
 
+       if (td->o.zone_mode == ZONE_MODE_ZBD) {
+               err = zbd_init_files(td);
+               if (err)
+                       goto err_out;
+       }
+       zbd_recalc_options_with_zone_granularity(td);
+
        /*
         * check sizes. if the files/devices do not exist and the size
         * isn't passed to fio, abort.
@@ -1254,6 +1350,43 @@ int setup_files(struct thread_data *td)
                temp_stall_ts = 0;
        }
 
+       if (err)
+               goto err_out;
+
+       /*
+        * Prepopulate files with data. It might be expected to read some
+        * "real" data instead of zero'ed files (if no writes to file occurred
+        * prior to a read job). Engine has to provide a way to do that.
+        */
+       if (td->io_ops->prepopulate_file) {
+               temp_stall_ts = 1;
+
+               for_each_file(td, f, i) {
+                       if (output_format & FIO_OUTPUT_NORMAL) {
+                               log_info("%s: Prepopulating IO file (%s)\n",
+                                                       o->name, f->file_name);
+                       }
+
+                       err = td->io_ops->prepopulate_file(td, f);
+                       if (err)
+                               break;
+
+                       err = __file_invalidate_cache(td, f, f->file_offset,
+                                                               f->io_size);
+
+                       /*
+                        * Shut up static checker
+                        */
+                       if (f->fd != -1)
+                               close(f->fd);
+
+                       f->fd = -1;
+                       if (err)
+                               break;
+               }
+               temp_stall_ts = 0;
+       }
+
        if (err)
                goto err_out;
 
@@ -1269,16 +1402,17 @@ int setup_files(struct thread_data *td)
        }
 
 done:
-       if (o->create_only)
-               td->done = 1;
-
-       td_restore_runstate(td, old_state);
-
        if (td->o.zone_mode == ZONE_MODE_ZBD) {
                err = zbd_setup_files(td);
                if (err)
                        goto err_out;
        }
+
+       if (o->create_only)
+               td->done = 1;
+
+       td_restore_runstate(td, old_state);
+
        return 0;
 
 err_offset:
diff --git a/fio.1 b/fio.1
index e190c241836b743f4875e104d4ae659890e523c5..231e2e286a6c7c1df5426bedf73eb79559cb4f49 100644 (file)
--- a/fio.1
+++ b/fio.1
@@ -348,6 +348,9 @@ us or usec means microseconds
 .PD
 .RE
 .P
+`z' suffix specifies that the value is measured in zones.
+Value is recalculated once block device's zone size becomes known.
+.P
 If the option accepts an upper and lower range, use a colon ':' or
 minus '\-' to separate such values. See \fIirange\fR parameter type.
 If the lower value specified happens to be larger than the upper value
@@ -783,7 +786,7 @@ If not specified it defaults to the zone size. If the target device is a zoned
 block device, the zone capacity is obtained from the device information and this
 option is ignored.
 .TP
-.BI zoneskip \fR=\fPint
+.BI zoneskip \fR=\fPint[z]
 For \fBzonemode\fR=strided, the number of bytes to skip after \fBzonesize\fR
 bytes of data have been transferred.
 
@@ -1033,7 +1036,7 @@ The values are all relative to each other, and no absolute meaning
 should be associated with them.
 .RE
 .TP
-.BI offset \fR=\fPint
+.BI offset \fR=\fPint[%|z]
 Start I/O at the provided offset in the file, given as either a fixed size in
 bytes or a percentage. If a percentage is given, the generated offset will be
 aligned to the minimum \fBblocksize\fR or to the value of \fBoffset_align\fR if
@@ -1048,7 +1051,7 @@ If set to non-zero value, the byte offset generated by a percentage \fBoffset\fR
 is aligned upwards to this value. Defaults to 0 meaning that a percentage
 offset is aligned to the minimum block size.
 .TP
-.BI offset_increment \fR=\fPint
+.BI offset_increment \fR=\fPint[%|z]
 If this is provided, then the real offset becomes `\fBoffset\fR + \fBoffset_increment\fR
 * thread_number', where the thread number is a counter that starts at 0 and
 is incremented for each sub-job (i.e. when \fBnumjobs\fR option is
@@ -1570,7 +1573,7 @@ Pin the specified amount of memory with \fBmlock\fR\|(2). Can be used to
 simulate a smaller amount of memory. The amount specified is per worker.
 .SS "I/O size"
 .TP
-.BI size \fR=\fPint
+.BI size \fR=\fPint[%|z]
 The total size of file I/O for each thread of this job. Fio will run until
 this many bytes has been transferred, unless runtime is limited by other options
 (such as \fBruntime\fR, for instance, or increased/decreased by \fBio_size\fR).
@@ -1585,7 +1588,7 @@ given, fio will use 20% of the full size of the given files or devices.
 Can be combined with \fBoffset\fR to constrain the start and end range
 that I/O will be done within.
 .TP
-.BI io_size \fR=\fPint "\fR,\fB io_limit" \fR=\fPint
+.BI io_size \fR=\fPint[%|z] "\fR,\fB io_limit" \fR=\fPint[%|z]
 Normally fio operates within the region set by \fBsize\fR, which means
 that the \fBsize\fR option sets both the region and size of I/O to be
 performed. Sometimes that is not what you want. With this option, it is
@@ -1853,6 +1856,10 @@ GPUDirect Storage-supported filesystem. This engine performs
 I/O without transferring buffers between user-space and the kernel,
 unless \fBverify\fR is set or \fBcuda_io\fR is \fBposix\fR. \fBiomem\fR must
 not be \fBcudamalloc\fR. This ioengine defines engine specific options.
+.TP
+.B dfs
+I/O engine supporting asynchronous read and write operations to the DAOS File
+System (DFS) via libdfs.
 .SS "I/O engine specific parameters"
 In addition, there are some parameters which are only valid when a specific
 \fBioengine\fR is in use. These are used identically to normal parameters,
@@ -2212,6 +2219,20 @@ from RAM to GPU after a read. \fBverify\fR does not affect
 the use of cudaMemcpy.
 .RE
 .RE
+.TP
+.BI (dfs)pool
+Specify the UUID of the DAOS pool to connect to.
+.TP
+.BI (dfs)cont
+Specify the UUID of the DAOS DAOS container to open.
+.TP
+.BI (dfs)chunk_size
+Specificy a different chunk size (in bytes) for the dfs file.
+Use DAOS container's chunk size by default.
+.TP
+.BI (dfs)object_class
+Specificy a different object class for the dfs file.
+Use DAOS container's object class by default.
 .SS "I/O depth"
 .TP
 .BI iodepth \fR=\fPint
@@ -2406,10 +2427,11 @@ Used with \fBlatency_target\fR. If false (default), fio will find the highest
 queue depth that meets \fBlatency_target\fR and exit. If true, fio will continue
 running and try to meet \fBlatency_target\fR by adjusting queue depth.
 .TP
-.BI max_latency \fR=\fPtime
+.BI max_latency \fR=\fPtime[,time][,time]
 If set, fio will exit the job with an ETIMEDOUT error if it exceeds this
 maximum latency. When the unit is omitted, the value is interpreted in
-microseconds.
+microseconds. Comma-separated values may be specified for reads, writes,
+and trims as described in \fBblocksize\fR.
 .TP
 .BI rate_cycle \fR=\fPint
 Average bandwidth for \fBrate\fR and \fBrate_min\fR over this number
diff --git a/init.c b/init.c
index eea6e54692b177036dce001134f8ed1baeb62ca8..37bff8763cbb5bf924f3a3fc768f3352fbe10cf6 100644 (file)
--- a/init.c
+++ b/init.c
@@ -448,19 +448,6 @@ static void dump_opt_list(struct thread_data *td)
        }
 }
 
-static void fio_dump_options_free(struct thread_data *td)
-{
-       while (!flist_empty(&td->opt_list)) {
-               struct print_option *p;
-
-               p = flist_first_entry(&td->opt_list, struct print_option, list);
-               flist_del_init(&p->list);
-               free(p->name);
-               free(p->value);
-               free(p);
-       }
-}
-
 static void copy_opt_list(struct thread_data *dst, struct thread_data *src)
 {
        struct flist_head *entry;
@@ -961,7 +948,9 @@ static int fixup_options(struct thread_data *td)
        /*
         * Fix these up to be nsec internally
         */
-       o->max_latency *= 1000ULL;
+       for_each_rw_ddir(ddir)
+               o->max_latency[ddir] *= 1000ULL;
+
        o->latency_target *= 1000ULL;
 
        return ret;
diff --git a/io_u.c b/io_u.c
index 00a219c2e85922906dd859583ef6f1ae31ad29c0..b421a579bd0a1aaa594692a21731a2774de77cea 100644 (file)
--- a/io_u.c
+++ b/io_u.c
@@ -1389,11 +1389,16 @@ static long set_io_u_file(struct thread_data *td, struct io_u *io_u)
        return 0;
 }
 
-static void lat_fatal(struct thread_data *td, struct io_completion_data *icd,
+static void lat_fatal(struct thread_data *td, struct io_u *io_u, struct io_completion_data *icd,
                      unsigned long long tnsec, unsigned long long max_nsec)
 {
-       if (!td->error)
-               log_err("fio: latency of %llu nsec exceeds specified max (%llu nsec)\n", tnsec, max_nsec);
+       if (!td->error) {
+               log_err("fio: latency of %llu nsec exceeds specified max (%llu nsec): %s %s %llu %llu\n",
+                                       tnsec, max_nsec,
+                                       io_u->file->file_name,
+                                       io_ddir_name(io_u->ddir),
+                                       io_u->offset, io_u->buflen);
+       }
        td_verror(td, ETIMEDOUT, "max latency exceeded");
        icd->error = ETIMEDOUT;
 }
@@ -1888,11 +1893,13 @@ static void account_io_completion(struct thread_data *td, struct io_u *io_u,
                                icd->error = ops->io_u_lat(td, tnsec);
                }
 
-               if (td->o.max_latency && tnsec > td->o.max_latency)
-                       lat_fatal(td, icd, tnsec, td->o.max_latency);
-               if (td->o.latency_target && tnsec > td->o.latency_target) {
-                       if (lat_target_failed(td))
-                               lat_fatal(td, icd, tnsec, td->o.latency_target);
+               if (ddir_rw(idx)) {
+                       if (td->o.max_latency[idx] && tnsec > td->o.max_latency[idx])
+                               lat_fatal(td, io_u, icd, tnsec, td->o.max_latency[idx]);
+                       if (td->o.latency_target && tnsec > td->o.latency_target) {
+                               if (lat_target_failed(td))
+                                       lat_fatal(td, io_u, icd, tnsec, td->o.latency_target);
+                       }
                }
        }
 
index 839b318da60464eb5d78d503ca930559c2548c41..1d01ab0a6d1345a71b04b0576f40fefca20062b9 100644 (file)
@@ -8,7 +8,7 @@
 #include "io_u.h"
 #include "zbd_types.h"
 
-#define FIO_IOOPS_VERSION      28
+#define FIO_IOOPS_VERSION      29
 
 #ifndef CONFIG_DYNAMIC_ENGINES
 #define FIO_STATIC     static
@@ -47,6 +47,7 @@ struct ioengine_ops {
        int (*invalidate)(struct thread_data *, struct fio_file *);
        int (*unlink_file)(struct thread_data *, struct fio_file *);
        int (*get_file_size)(struct thread_data *, struct fio_file *);
+       int (*prepopulate_file)(struct thread_data *, struct fio_file *);
        void (*terminate)(struct thread_data *);
        int (*iomem_alloc)(struct thread_data *, size_t);
        void (*iomem_free)(struct thread_data *);
diff --git a/iolog.c b/iolog.c
index fa40c8572664a4e14d1e53d39eae3547dc43b672..cf264916a9ecd83f7b3079bd186573441dba65fb 100644 (file)
--- a/iolog.c
+++ b/iolog.c
@@ -607,12 +607,11 @@ static int open_socket(const char *path)
 /*
  * open iolog, check version, and call appropriate parser
  */
-static bool init_iolog_read(struct thread_data *td)
+static bool init_iolog_read(struct thread_data *td, char *fname)
 {
-       char buffer[256], *p, *fname;
+       char buffer[256], *p;
        FILE *f = NULL;
 
-       fname = get_name_by_idx(td->o.read_iolog_file, td->subjob_number);
        dprint(FD_IO, "iolog: name=%s\n", fname);
 
        if (is_socket(fname)) {
@@ -701,15 +700,16 @@ bool init_iolog(struct thread_data *td)
 
        if (td->o.read_iolog_file) {
                int need_swap;
+               char * fname = get_name_by_idx(td->o.read_iolog_file, td->subjob_number);
 
                /*
                 * Check if it's a blktrace file and load that if possible.
                 * Otherwise assume it's a normal log file and load that.
                 */
-               if (is_blktrace(td->o.read_iolog_file, &need_swap))
-                       ret = load_blktrace(td, td->o.read_iolog_file, need_swap);
+               if (is_blktrace(fname, &need_swap))
+                       ret = load_blktrace(td, fname, need_swap);
                else
-                       ret = init_iolog_read(td);
+                       ret = init_iolog_read(td, fname);
        } else if (td->o.write_iolog_file)
                ret = init_iolog_write(td);
        else
index 5b6d22a3145430b0949a2cab69fff23949c36a28..15a16229ef9a5d02524cb478a74ba080ecc911e0 100644 (file)
@@ -181,6 +181,10 @@ static const struct opt_group fio_opt_cat_groups[] = {
                .name   = "libcufile I/O engine", /* libcufile */
                .mask   = FIO_OPT_G_LIBCUFILE,
        },
+       {
+               .name   = "DAOS File System (dfs) I/O engine", /* dfs */
+               .mask   = FIO_OPT_G_DFS,
+       },
        {
                .name   = NULL,
        },
index 641547d271f6e632cbc45d558afaf58c8cd25b67..ff74862968e4335a1bb064d499c08441a54b16ac 100644 (file)
@@ -69,6 +69,7 @@ enum opt_category_group {
        __FIO_OPT_G_FILESTAT,
        __FIO_OPT_G_NR,
        __FIO_OPT_G_LIBCUFILE,
+       __FIO_OPT_G_DFS,
 
        FIO_OPT_G_RATE          = (1ULL << __FIO_OPT_G_RATE),
        FIO_OPT_G_ZONE          = (1ULL << __FIO_OPT_G_ZONE),
@@ -112,6 +113,7 @@ enum opt_category_group {
        FIO_OPT_G_IOURING       = (1ULL << __FIO_OPT_G_IOURING),
        FIO_OPT_G_FILESTAT      = (1ULL << __FIO_OPT_G_FILESTAT),
        FIO_OPT_G_LIBCUFILE     = (1ULL << __FIO_OPT_G_LIBCUFILE),
+       FIO_OPT_G_DFS           = (1ULL << __FIO_OPT_G_DFS),
 };
 
 extern const struct opt_group *opt_group_from_mask(uint64_t *mask);
index 625112c5345dbb219fb06cdd0aa1e59e9ee92a6c..22a3e421c6c632a9435dfc719d4eb617705861a9 100644 (file)
--- a/options.c
+++ b/options.c
@@ -1471,8 +1471,13 @@ static int str_offset_cb(void *data, unsigned long long *__val)
        if (parse_is_percent(v)) {
                td->o.start_offset = 0;
                td->o.start_offset_percent = -1ULL - v;
+               td->o.start_offset_nz = 0;
                dprint(FD_PARSE, "SET start_offset_percent %d\n",
                                        td->o.start_offset_percent);
+       } else if (parse_is_zone(v)) {
+               td->o.start_offset = 0;
+               td->o.start_offset_percent = 0;
+               td->o.start_offset_nz = v - ZONE_BASE_VAL;
        } else
                td->o.start_offset = v;
 
@@ -1487,8 +1492,13 @@ static int str_offset_increment_cb(void *data, unsigned long long *__val)
        if (parse_is_percent(v)) {
                td->o.offset_increment = 0;
                td->o.offset_increment_percent = -1ULL - v;
+               td->o.offset_increment_nz = 0;
                dprint(FD_PARSE, "SET offset_increment_percent %d\n",
                                        td->o.offset_increment_percent);
+       } else if (parse_is_zone(v)) {
+               td->o.offset_increment = 0;
+               td->o.offset_increment_percent = 0;
+               td->o.offset_increment_nz = v - ZONE_BASE_VAL;
        } else
                td->o.offset_increment = v;
 
@@ -1505,6 +1515,10 @@ static int str_size_cb(void *data, unsigned long long *__val)
                td->o.size_percent = -1ULL - v;
                dprint(FD_PARSE, "SET size_percent %d\n",
                                        td->o.size_percent);
+       } else if (parse_is_zone(v)) {
+               td->o.size = 0;
+               td->o.size_percent = 0;
+               td->o.size_nz = v - ZONE_BASE_VAL;
        } else
                td->o.size = v;
 
@@ -1525,12 +1539,30 @@ static int str_io_size_cb(void *data, unsigned long long *__val)
                }
                dprint(FD_PARSE, "SET io_size_percent %d\n",
                                        td->o.io_size_percent);
+       } else if (parse_is_zone(v)) {
+               td->o.io_size = 0;
+               td->o.io_size_percent = 0;
+               td->o.io_size_nz = v - ZONE_BASE_VAL;
        } else
                td->o.io_size = v;
 
        return 0;
 }
 
+static int str_zoneskip_cb(void *data, unsigned long long *__val)
+{
+       struct thread_data *td = cb_data_to_td(data);
+       unsigned long long v = *__val;
+
+       if (parse_is_zone(v)) {
+               td->o.zone_skip = 0;
+               td->o.zone_skip_nz = v - ZONE_BASE_VAL;
+       } else
+               td->o.zone_skip = v;
+
+       return 0;
+}
+
 static int str_write_bw_log_cb(void *data, const char *str)
 {
        struct thread_data *td = cb_data_to_td(data);
@@ -1989,6 +2021,11 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                          { .ival = "nbd",
                            .help = "Network Block Device (NBD) IO engine"
                          },
+#ifdef CONFIG_DFS
+                         { .ival = "dfs",
+                           .help = "DAOS File System (dfs) IO engine",
+                         },
+#endif
                },
        },
        {
@@ -2091,11 +2128,10 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
        {
                .name   = "size",
                .lname  = "Size",
-               .type   = FIO_OPT_STR_VAL,
+               .type   = FIO_OPT_STR_VAL_ZONE,
                .cb     = str_size_cb,
                .off1   = offsetof(struct thread_options, size),
                .help   = "Total size of device or files",
-               .interval = 1024 * 1024,
                .category = FIO_OPT_C_IO,
                .group  = FIO_OPT_G_INVALID,
        },
@@ -2103,11 +2139,10 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                .name   = "io_size",
                .alias  = "io_limit",
                .lname  = "IO Size",
-               .type   = FIO_OPT_STR_VAL,
+               .type   = FIO_OPT_STR_VAL_ZONE,
                .cb     = str_io_size_cb,
                .off1   = offsetof(struct thread_options, io_size),
                .help   = "Total size of I/O to be performed",
-               .interval = 1024 * 1024,
                .category = FIO_OPT_C_IO,
                .group  = FIO_OPT_G_INVALID,
        },
@@ -2148,12 +2183,11 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                .name   = "offset",
                .lname  = "IO offset",
                .alias  = "fileoffset",
-               .type   = FIO_OPT_STR_VAL,
+               .type   = FIO_OPT_STR_VAL_ZONE,
                .cb     = str_offset_cb,
                .off1   = offsetof(struct thread_options, start_offset),
                .help   = "Start IO from this offset",
                .def    = "0",
-               .interval = 1024 * 1024,
                .category = FIO_OPT_C_IO,
                .group  = FIO_OPT_G_INVALID,
        },
@@ -2171,14 +2205,13 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
        {
                .name   = "offset_increment",
                .lname  = "IO offset increment",
-               .type   = FIO_OPT_STR_VAL,
+               .type   = FIO_OPT_STR_VAL_ZONE,
                .cb     = str_offset_increment_cb,
                .off1   = offsetof(struct thread_options, offset_increment),
                .help   = "What is the increment from one offset to the next",
                .parent = "offset",
                .hide   = 1,
                .def    = "0",
-               .interval = 1024 * 1024,
                .category = FIO_OPT_C_IO,
                .group  = FIO_OPT_G_INVALID,
        },
@@ -3414,11 +3447,11 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
        {
                .name   = "zoneskip",
                .lname  = "Zone skip",
-               .type   = FIO_OPT_STR_VAL,
+               .type   = FIO_OPT_STR_VAL_ZONE,
+               .cb     = str_zoneskip_cb,
                .off1   = offsetof(struct thread_options, zone_skip),
                .help   = "Space between IO zones",
                .def    = "0",
-               .interval = 1024 * 1024,
                .category = FIO_OPT_C_IO,
                .group  = FIO_OPT_G_ZONE,
        },
@@ -3738,8 +3771,10 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
        {
                .name   = "max_latency",
                .lname  = "Max Latency (usec)",
-               .type   = FIO_OPT_STR_VAL_TIME,
-               .off1   = offsetof(struct thread_options, max_latency),
+               .type   = FIO_OPT_ULL,
+               .off1   = offsetof(struct thread_options, max_latency[DDIR_READ]),
+               .off2   = offsetof(struct thread_options, max_latency[DDIR_WRITE]),
+               .off3   = offsetof(struct thread_options, max_latency[DDIR_TRIM]),
                .help   = "Maximum tolerated IO latency (usec)",
                .is_time = 1,
                .category = FIO_OPT_C_IO,
@@ -5436,6 +5471,19 @@ void fio_options_free(struct thread_data *td)
        }
 }
 
+void fio_dump_options_free(struct thread_data *td)
+{
+       while (!flist_empty(&td->opt_list)) {
+               struct print_option *p;
+
+               p = flist_first_entry(&td->opt_list, struct print_option, list);
+               flist_del_init(&p->list);
+               free(p->name);
+               free(p->value);
+               free(p);
+       }
+}
+
 struct fio_option *fio_option_find(const char *name)
 {
        return find_option(fio_options, name);
index 5276f31e6818673a338fcbc3ef18997263b0abd0..df80fd9864bdd3f18d84e22c9aee937f45eab8aa 100644 (file)
--- a/options.h
+++ b/options.h
@@ -16,6 +16,7 @@ void add_opt_posval(const char *, const char *, const char *);
 void del_opt_posval(const char *, const char *);
 struct thread_data;
 void fio_options_free(struct thread_data *);
+void fio_dump_options_free(struct thread_data *);
 char *get_next_str(char **ptr);
 int get_max_str_idx(char *input);
 char* get_name_by_idx(char *input, int index);
diff --git a/parse.c b/parse.c
index 44bf950768d9b51bcc3e43ba30c2564614a6e4c6..45f4f2d3dd6db800f4f8e11a168c8ef36e788d3b 100644 (file)
--- a/parse.c
+++ b/parse.c
@@ -37,6 +37,7 @@ static const char *opt_type_names[] = {
        "OPT_BOOL",
        "OPT_FLOAT_LIST",
        "OPT_STR_SET",
+       "OPT_STR_VAL_ZONE",
        "OPT_DEPRECATED",
        "OPT_SOFT_DEPRECATED",
        "OPT_UNSUPPORTED",
@@ -599,9 +600,35 @@ static int __handle_option(const struct fio_option *o, const char *ptr,
                fallthrough;
        case FIO_OPT_ULL:
        case FIO_OPT_INT:
-       case FIO_OPT_STR_VAL: {
+       case FIO_OPT_STR_VAL:
+       case FIO_OPT_STR_VAL_ZONE:
+       {
                fio_opt_str_val_fn *fn = o->cb;
                char tmp[128], *p;
+               size_t len = strlen(ptr);
+
+               if (len > 0 && ptr[len - 1] == 'z') {
+                       if (o->type == FIO_OPT_STR_VAL_ZONE) {
+                               char *ep;
+                               unsigned long long val;
+
+                               errno = 0;
+                               val = strtoul(ptr, &ep, 10);
+                               if (errno == 0 && ep != ptr && *ep == 'z') {
+                                       ull = ZONE_BASE_VAL + (uint32_t)val;
+                                       ret = 0;
+                                       goto store_option_value;
+                               } else {
+                                       log_err("%s: unexpected zone value '%s'\n",
+                                               o->name, ptr);
+                                       return 1;
+                               }
+                       } else {
+                               log_err("%s: 'z' suffix isn't applicable\n",
+                                       o->name);
+                               return 1;
+                       }
+               }
 
                if (!is_time && o->is_time)
                        is_time = o->is_time;
@@ -655,6 +682,7 @@ static int __handle_option(const struct fio_option *o, const char *ptr,
                        }
                }
 
+store_option_value:
                if (fn)
                        ret = fn(data, &ull);
                else {
diff --git a/parse.h b/parse.h
index e6663ed484ed343b096ebc33a28a52560f642aea..4cf08fd2cf924f84eebd4080709c1ae83246da87 100644 (file)
--- a/parse.h
+++ b/parse.h
@@ -21,6 +21,7 @@ enum fio_opt_type {
        FIO_OPT_BOOL,
        FIO_OPT_FLOAT_LIST,
        FIO_OPT_STR_SET,
+       FIO_OPT_STR_VAL_ZONE,
        FIO_OPT_DEPRECATED,
        FIO_OPT_SOFT_DEPRECATED,
        FIO_OPT_UNSUPPORTED,    /* keep this last */
@@ -130,12 +131,18 @@ static inline void *td_var(void *to, const struct fio_option *o,
 
 static inline int parse_is_percent(unsigned long long val)
 {
-       return val <= -1ULL && val >= (-1ULL - 100ULL);
+       return val >= -101;
 }
 
+#define ZONE_BASE_VAL ((-1ULL >> 1) + 1)
 static inline int parse_is_percent_uncapped(unsigned long long val)
 {
-       return (long long)val <= -1;
+       return ZONE_BASE_VAL + -1U < val;
+}
+
+static inline int parse_is_zone(unsigned long long val)
+{
+       return (val - ZONE_BASE_VAL) <= -1U;
 }
 
 struct print_option {
index 1b65297ec25feb166e3f39e6b01b7c081a96fa42..8daefbabfeae93f6c260c0b74eec6fedc7bbd973 100644 (file)
--- a/server.c
+++ b/server.c
@@ -1909,7 +1909,7 @@ static int fio_append_iolog_gz(struct sk_entry *first, struct io_log *log)
                        break;
                }
                flist_add_tail(&entry->list, &first->next);
-       } while (ret != Z_STREAM_END);
+       }
 
        ret = deflateEnd(&stream);
        if (ret == Z_OK)
index 9256d44c5001c2daa72232407650ecef36d4fdfa..b45b319ba2013e56ecc87056e8e0b6ca1aaec6e2 100644 (file)
--- a/server.h
+++ b/server.h
@@ -48,7 +48,7 @@ struct fio_net_cmd_reply {
 };
 
 enum {
-       FIO_SERVER_VER                  = 87,
+       FIO_SERVER_VER                  = 89,
 
        FIO_SERVER_MAX_FRAGMENT_PDU     = 1024,
        FIO_SERVER_MAX_CMD_MB           = 2048,
index 044f9195679566f802460672a3b9c6d96f33be07..ff4c7a7c01807ed46bd73dca4da85de0e606158d 100644 (file)
@@ -233,8 +233,7 @@ static int prep_more_ios(struct submitter *s, int max_ios)
        next_tail = tail = *ring->tail;
        do {
                next_tail++;
-               read_barrier();
-               if (next_tail == *ring->head)
+               if (next_tail == atomic_load_acquire(ring->head))
                        break;
 
                index = tail & sq_ring_mask;
@@ -244,10 +243,8 @@ static int prep_more_ios(struct submitter *s, int max_ios)
                tail = next_tail;
        } while (prepped < max_ios);
 
-       if (*ring->tail != tail) {
-               *ring->tail = tail;
-               write_barrier();
-       }
+       if (prepped)
+               atomic_store_release(ring->tail, tail);
        return prepped;
 }
 
@@ -284,7 +281,7 @@ static int reap_events(struct submitter *s)
                struct file *f;
 
                read_barrier();
-               if (head == *ring->tail)
+               if (head == atomic_load_acquire(ring->tail))
                        break;
                cqe = &ring->cqes[head & cq_ring_mask];
                if (!do_nop) {
@@ -301,9 +298,10 @@ static int reap_events(struct submitter *s)
                head++;
        } while (1);
 
-       s->inflight -= reaped;
-       *ring->head = head;
-       write_barrier();
+       if (reaped) {
+               s->inflight -= reaped;
+               atomic_store_release(ring->head, head);
+       }
        return reaped;
 }
 
@@ -320,6 +318,7 @@ static void *submitter_fn(void *data)
        prepped = 0;
        do {
                int to_wait, to_submit, this_reap, to_prep;
+               unsigned ring_flags = 0;
 
                if (!prepped && s->inflight < depth) {
                        to_prep = min(depth - s->inflight, batch_submit);
@@ -338,15 +337,20 @@ submit:
                 * Only need to call io_uring_enter if we're not using SQ thread
                 * poll, or if IORING_SQ_NEED_WAKEUP is set.
                 */
-               if (!sq_thread_poll || (*ring->flags & IORING_SQ_NEED_WAKEUP)) {
+               if (sq_thread_poll)
+                       ring_flags = atomic_load_acquire(ring->flags);
+               if (!sq_thread_poll || ring_flags & IORING_SQ_NEED_WAKEUP) {
                        unsigned flags = 0;
 
                        if (to_wait)
                                flags = IORING_ENTER_GETEVENTS;
-                       if ((*ring->flags & IORING_SQ_NEED_WAKEUP))
+                       if (ring_flags & IORING_SQ_NEED_WAKEUP)
                                flags |= IORING_ENTER_SQ_WAKEUP;
                        ret = io_uring_enter(s, to_submit, to_wait, flags);
                        s->calls++;
+               } else {
+                       /* for SQPOLL, we submitted it all effectively */
+                       ret = to_submit;
                }
 
                /*
index 1658dc25013be4c0714538bdd58fc2dcabeb03f8..be1296151930f46ea9694cdb9ba580ef60ba335a 100755 (executable)
@@ -1153,6 +1153,54 @@ test54() {
                >> "${logfile}.${test_number}" 2>&1 || return $?
 }
 
+# test 'z' suffix parsing only
+test55() {
+       local bs
+       bs=$((logical_block_size))
+
+       require_zbd || return $SKIP_TESTCASE
+       # offset=1z + offset_increment=10z + size=2z
+       require_seq_zones 13 || return $SKIP_TESTCASE
+
+       run_fio --name=j                \
+               --filename=${dev}       \
+               --direct=1              \
+               "$(ioengine "psync")"   \
+               --zonemode=zbd          \
+               --zonesize=${zone_size} \
+               --rw=write              \
+               --bs=${bs}              \
+               --numjobs=2             \
+               --offset_increment=10z  \
+               --offset=1z             \
+               --size=2z               \
+               --io_size=3z            \
+               ${job_var_opts[@]} --debug=zbd \
+               >> "${logfile}.${test_number}" 2>&1 || return $?
+}
+
+# test 'z' suffix parsing only
+test56() {
+       local bs
+       bs=$((logical_block_size))
+
+       require_regular_block_dev || return $SKIP_TESTCASE
+       require_seq_zones 10 || return $SKIP_TESTCASE
+
+       run_fio --name=j                \
+               --filename=${dev}       \
+               --direct=1              \
+               "$(ioengine "psync")"   \
+               --zonemode=strided      \
+               --zonesize=${zone_size} \
+               --rw=write              \
+               --bs=${bs}              \
+               --size=10z              \
+               --zoneskip=2z           \
+               ${job_var_opts[@]} --debug=zbd \
+               >> "${logfile}.${test_number}" 2>&1 || return $?
+}
+
 SECONDS=0
 tests=()
 dynamic_analyzer=()
index f6b15403c4c21df1c183b47337e71a6018e1fe62..5ecc72d7b590c37615d3625bff10a0ea907a5f1d 100644 (file)
@@ -83,13 +83,16 @@ struct thread_options {
        unsigned long long size;
        unsigned long long io_size;
        unsigned int size_percent;
+       unsigned int size_nz;
        unsigned int io_size_percent;
+       unsigned int io_size_nz;
        unsigned int fill_device;
        unsigned int file_append;
        unsigned long long file_size_low;
        unsigned long long file_size_high;
        unsigned long long start_offset;
        unsigned long long start_offset_align;
+       unsigned int start_offset_nz;
 
        unsigned long long bs[DDIR_RWDIR_CNT];
        unsigned long long ba[DDIR_RWDIR_CNT];
@@ -198,12 +201,13 @@ struct thread_options {
        unsigned long long zone_size;
        unsigned long long zone_capacity;
        unsigned long long zone_skip;
+       uint32_t zone_skip_nz;
        enum fio_zone_mode zone_mode;
        unsigned long long lockmem;
        enum fio_memtype mem_type;
        unsigned int mem_align;
 
-       unsigned long long max_latency;
+       unsigned long long max_latency[DDIR_RWDIR_CNT];
 
        unsigned int exit_what;
        unsigned int stonewall;
@@ -315,6 +319,7 @@ struct thread_options {
        unsigned int gid;
 
        unsigned int offset_increment_percent;
+       unsigned int offset_increment_nz;
        unsigned long long offset_increment;
        unsigned long long number_ios;
 
@@ -384,14 +389,19 @@ struct thread_options_pack {
        uint64_t size;
        uint64_t io_size;
        uint32_t size_percent;
+       uint32_t size_nz;
        uint32_t io_size_percent;
+       uint32_t io_size_nz;
        uint32_t fill_device;
        uint32_t file_append;
        uint32_t unique_filename;
+       uint32_t pad3;
        uint64_t file_size_low;
        uint64_t file_size_high;
        uint64_t start_offset;
        uint64_t start_offset_align;
+       uint32_t start_offset_nz;
+       uint32_t pad4;
 
        uint64_t bs[DDIR_RWDIR_CNT];
        uint64_t ba[DDIR_RWDIR_CNT];
@@ -464,8 +474,6 @@ struct thread_options_pack {
        struct zone_split zone_split[DDIR_RWDIR_CNT][ZONESPLIT_MAX];
        uint32_t zone_split_nr[DDIR_RWDIR_CNT];
 
-       uint8_t pad1[4];
-
        fio_fp64_t zipf_theta;
        fio_fp64_t pareto_h;
        fio_fp64_t gauss_dev;
@@ -501,6 +509,7 @@ struct thread_options_pack {
        uint64_t zone_capacity;
        uint64_t zone_skip;
        uint64_t lockmem;
+       uint32_t zone_skip_nz;
        uint32_t mem_type;
        uint32_t mem_align;
 
@@ -509,8 +518,6 @@ struct thread_options_pack {
        uint32_t new_group;
        uint32_t numjobs;
 
-       uint8_t pad3[4];
-
        /*
         * We currently can't convert these, so don't enable them
         */
@@ -616,12 +623,14 @@ struct thread_options_pack {
        uint32_t gid;
 
        uint32_t offset_increment_percent;
+       uint32_t offset_increment_nz;
        uint64_t offset_increment;
        uint64_t number_ios;
 
        uint64_t latency_target;
        uint64_t latency_window;
-       uint64_t max_latency;
+       uint64_t max_latency[DDIR_RWDIR_CNT];
+       uint32_t pad5;
        fio_fp64_t latency_percentile;
        uint32_t latency_run;
 
diff --git a/zbd.c b/zbd.c
index 6a26fe108a68acf55c7ad2b7e59e8fccb97f0dc8..d16b890fc6f1796445e007996ea5ab4ff0aee325 100644 (file)
--- a/zbd.c
+++ b/zbd.c
@@ -285,9 +285,7 @@ static bool zbd_verify_sizes(void)
                                return false;
                        }
 
-                       if (td->o.zone_skip &&
-                           (td->o.zone_skip < td->o.zone_size ||
-                            td->o.zone_skip % td->o.zone_size)) {
+                       if (td->o.zone_skip % td->o.zone_size) {
                                log_err("%s: zoneskip %llu is not a multiple of the device zone size %llu.\n",
                                        f->file_name, (unsigned long long) td->o.zone_skip,
                                        (unsigned long long) td->o.zone_size);
@@ -335,20 +333,21 @@ static bool zbd_verify_bs(void)
 {
        struct thread_data *td;
        struct fio_file *f;
-       uint32_t zone_size;
        int i, j, k;
 
        for_each_td(td, i) {
                for_each_file(td, f, j) {
+                       uint64_t zone_size;
+
                        if (!f->zbd_info)
                                continue;
                        zone_size = f->zbd_info->zone_size;
                        for (k = 0; k < FIO_ARRAY_SIZE(td->o.bs); k++) {
                                if (td->o.verify != VERIFY_NONE &&
                                    zone_size % td->o.bs[k] != 0) {
-                                       log_info("%s: block size %llu is not a divisor of the zone size %d\n",
+                                       log_info("%s: block size %llu is not a divisor of the zone size %llu\n",
                                                 f->file_name, td->o.bs[k],
-                                                zone_size);
+                                                (unsigned long long)zone_size);
                                        return false;
                                }
                        }
@@ -648,7 +647,7 @@ static bool zbd_open_zone(struct thread_data *td, const struct fio_file *f,
 static int zbd_reset_zone(struct thread_data *td, struct fio_file *f,
                          struct fio_zone_info *z);
 
-int zbd_setup_files(struct thread_data *td)
+int zbd_init_files(struct thread_data *td)
 {
        struct fio_file *f;
        int i;
@@ -657,6 +656,44 @@ int zbd_setup_files(struct thread_data *td)
                if (zbd_init_zone_info(td, f))
                        return 1;
        }
+       return 0;
+}
+
+void zbd_recalc_options_with_zone_granularity(struct thread_data *td)
+{
+       struct fio_file *f;
+       int i;
+
+       for_each_file(td, f, i) {
+               struct zoned_block_device_info *zbd = f->zbd_info;
+               // zonemode=strided doesn't get per-file zone size.
+               uint64_t zone_size = zbd ? zbd->zone_size : td->o.zone_size;
+
+               if (zone_size == 0)
+                       continue;
+
+               if (td->o.size_nz > 0) {
+                       td->o.size = td->o.size_nz * zone_size;
+               }
+               if (td->o.io_size_nz > 0) {
+                       td->o.io_size = td->o.io_size_nz * zone_size;
+               }
+               if (td->o.start_offset_nz > 0) {
+                       td->o.start_offset = td->o.start_offset_nz * zone_size;
+               }
+               if (td->o.offset_increment_nz > 0) {
+                       td->o.offset_increment = td->o.offset_increment_nz * zone_size;
+               }
+               if (td->o.zone_skip_nz > 0) {
+                       td->o.zone_skip = td->o.zone_skip_nz * zone_size;
+               }
+       }
+}
+
+int zbd_setup_files(struct thread_data *td)
+{
+       struct fio_file *f;
+       int i;
 
        if (!zbd_using_direct_io()) {
                log_err("Using direct I/O is mandatory for writing to ZBD drives\n\n");
diff --git a/zbd.h b/zbd.h
index cc3ab6241e9b35782c0580d8b397edf1f2bba9b4..6453439313f8de4d5c049c371237762f05d8b7d6 100644 (file)
--- a/zbd.h
+++ b/zbd.h
@@ -87,6 +87,8 @@ struct zoned_block_device_info {
        struct fio_zone_info    zone_info[0];
 };
 
+int zbd_init_files(struct thread_data *td);
+void zbd_recalc_options_with_zone_granularity(struct thread_data *td);
 int zbd_setup_files(struct thread_data *td);
 void zbd_free_zone_info(struct fio_file *f);
 void zbd_file_reset(struct thread_data *td, struct fio_file *f);