Merge branch 'add-librpma-engines' of https://github.com/janekmi/fio

author Jens Axboe <axboe@kernel.dk>

Thu, 18 Mar 2021 14:47:05 +0000 (08:47 -0600)

committer Jens Axboe <axboe@kernel.dk>

Thu, 18 Mar 2021 14:47:05 +0000 (08:47 -0600)
author Jens Axboe <axboe@kernel.dk>
Thu, 18 Mar 2021 14:47:05 +0000 (08:47 -0600)
committer Jens Axboe <axboe@kernel.dk>
Thu, 18 Mar 2021 14:47:05 +0000 (08:47 -0600)
diff --git a/.gitignore b/.gitignore

index 0aa4a3611c031024f631418fee0fad1ba94d0cae..6651f96edc72ea3295c75cc9f9628eea9e267386 100644 (file)
--- a/.gitignore
+++ b/.gitignore
@@ -30,3 +30,4 @@ doc/output
  /tags
  /TAGS
  /t/zbd/test-zbd-support.log.*
+/t/fuzz/fuzz_parseini
diff --git a/FIO-VERSION-GEN b/FIO-VERSION-GEN

index 81a6355b981b1694bb79c3f073de9e693778e3b4..294860716cb75dc6e5dd099c7bc2cbb41cbae609 100755 (executable)
--- a/FIO-VERSION-GEN
+++ b/FIO-VERSION-GEN
@@ -1,7 +1,7 @@
  #!/bin/sh
  
  GVF=FIO-VERSION-FILE
-DEF_VER=fio-3.25
+DEF_VER=fio-3.26
  
  LF='
  '
diff --git a/HOWTO b/HOWTO

index 39f8c63dc714bb87bee9537c80427982158a98a7..6219da663417b6d053e1794b77406fa8f805cd33 100644 (file)
--- a/HOWTO
+++ b/HOWTO
@@ -2067,6 +2067,9 @@ I/O engine
                         unless :option:`verify` is set or :option:`cuda_io` is `posix`.
                         :option:`iomem` must not be `cudamalloc`. This ioengine defines
                         engine specific options.
+               **dfs**
+                       I/O engine supporting asynchronous read and write operations to the
+                       DAOS File System (DFS) via libdfs.
  
  I/O engine specific parameters
  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -2461,6 +2464,24 @@ with the caveat that when used on the command line, they must come after the
                 GPU to RAM before a write and copied from RAM to GPU after a
                 read. :option:`verify` does not affect use of cudaMemcpy.
  
+.. option:: pool=str : [dfs]
+
+       Specify the UUID of the DAOS pool to connect to.
+
+.. option:: cont=str : [dfs]
+
+       Specify the UUID of the DAOS container to open.
+
+.. option:: chunk_size=int : [dfs]
+
+       Specificy a different chunk size (in bytes) for the dfs file.
+       Use DAOS container's chunk size by default.
+
+.. option:: object_class=str : [dfs]
+
+       Specificy a different object class for the dfs file.
+       Use DAOS container's object class by default.
+
  I/O depth
  ~~~~~~~~~
  
@@ -2672,11 +2693,12 @@ I/O latency
         true, fio will continue running and try to meet :option:`latency_target`
         by adjusting queue depth.
  
-.. option:: max_latency=time
+.. option:: max_latency=time[,time][,time]
  
         If set, fio will exit the job with an ETIMEDOUT error if it exceeds this
         maximum latency. When the unit is omitted, the value is interpreted in
-       microseconds.
+       microseconds. Comma-separated values may be specified for reads, writes,
+       and trims as described in :option:`blocksize`.
  
  .. option:: rate_cycle=int
  
diff --git a/Makefile b/Makefile

index 1aa9f37785e91c73a1644d4bebe5dce2d221ce04..fce3d0d134cffad9643615988e50c308057ad2a5 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -145,6 +145,11 @@ ifdef CONFIG_HTTP
    http_LIBS = -lcurl -lssl -lcrypto
    ENGINES += http
  endif
+ifdef CONFIG_DFS
+  dfs_SRCS = engines/dfs.c
+  dfs_LIBS = -luuid -ldaos -ldfs
+  ENGINES += dfs
+endif
  SOURCE += oslib/asprintf.c
  ifndef CONFIG_STRSEP
    SOURCE += oslib/strsep.c
diff --git a/backend.c b/backend.c

index f2efddd67d365dc9c4300892275caaa88796a2e1..52b4ca7e1c83e8feda885a8d3d84219f5f784190 100644 (file)
--- a/backend.c
+++ b/backend.c
@@ -2537,6 +2537,7 @@ int fio_backend(struct sk_out *sk_out)
         for_each_td(td, i) {
                 steadystate_free(td);
                 fio_options_free(td);
+               fio_dump_options_free(td);
                 if (td->rusage_sem) {
                         fio_sem_remove(td->rusage_sem);
                         td->rusage_sem = NULL;
diff --git a/cconv.c b/cconv.c

index b10868fb3de6b2dd0844799ab36fd145ff68448e..aa06e3ea6ee7004cc4b65d99839a83f492766d9c 100644 (file)
--- a/cconv.c
+++ b/cconv.c
@@ -143,6 +143,8 @@ void convert_thread_options_to_cpu(struct thread_options *o,
                 o->rate_iops_min[i] = le32_to_cpu(top->rate_iops_min[i]);
  
                 o->perc_rand[i] = le32_to_cpu(top->perc_rand[i]);
+
+               o->max_latency[i] = le64_to_cpu(top->max_latency[i]);
         }
  
         o->ratecycle = le32_to_cpu(top->ratecycle);
@@ -289,7 +291,6 @@ void convert_thread_options_to_cpu(struct thread_options *o,
         o->sync_file_range = le32_to_cpu(top->sync_file_range);
         o->latency_target = le64_to_cpu(top->latency_target);
         o->latency_window = le64_to_cpu(top->latency_window);
-       o->max_latency = le64_to_cpu(top->max_latency);
         o->latency_percentile.u.f = fio_uint64_to_double(le64_to_cpu(top->latency_percentile.u.i));
         o->latency_run = le32_to_cpu(top->latency_run);
         o->compress_percentage = le32_to_cpu(top->compress_percentage);
@@ -491,7 +492,6 @@ void convert_thread_options_to_net(struct thread_options_pack *top,
         top->sync_file_range = cpu_to_le32(o->sync_file_range);
         top->latency_target = __cpu_to_le64(o->latency_target);
         top->latency_window = __cpu_to_le64(o->latency_window);
-       top->max_latency = __cpu_to_le64(o->max_latency);
         top->latency_percentile.u.i = __cpu_to_le64(fio_double_to_uint64(o->latency_percentile.u.f));
         top->latency_run = __cpu_to_le32(o->latency_run);
         top->compress_percentage = cpu_to_le32(o->compress_percentage);
@@ -550,6 +550,8 @@ void convert_thread_options_to_net(struct thread_options_pack *top,
                 top->rate_iops_min[i] = cpu_to_le32(o->rate_iops_min[i]);
  
                 top->perc_rand[i] = cpu_to_le32(o->perc_rand[i]);
+
+               top->max_latency[i] = __cpu_to_le64(o->max_latency[i]);
         }
  
         memcpy(top->verify_pattern, o->verify_pattern, MAX_PATTERN_SIZE);
diff --git a/configure b/configure

index 1bbdb8c4a4fca44759eebb3c1ee562c9ce895dbb..2f5ac91fb45387b6f6139b2582c266996eca7214 100755 (executable)
--- a/configure
+++ b/configure
@@ -171,6 +171,7 @@ march_set="no"
  libiscsi="no"
  libnbd="no"
  libzbc=""
+dfs=""
  dynamic_engines="no"
  prefix=/usr/local
  
@@ -242,6 +243,8 @@ for opt do
    ;;
    --dynamic-libengines) dynamic_engines="yes"
    ;;
+  --disable-dfs) dfs="no"
+  ;;
    --help)
      show_help="yes"
      ;;
@@ -284,6 +287,7 @@ if test "$show_help" = "yes" ; then
    echo "--disable-libzbc        Disable libzbc even if found"
    echo "--disable-tcmalloc     Disable tcmalloc support"
    echo "--dynamic-libengines   Lib-based ioengines as dynamic libraries"
+  echo "--disable-dfs          Disable DAOS File System support even if found"
    exit $exit_val
  fi
  
@@ -413,6 +417,7 @@ CYGWIN*)
    clock_gettime="yes" # clock_monotonic probe has dependency on this
    clock_monotonic="yes"
    sched_idle="yes"
+  pthread_condattr_setclock="no"
    ;;
  esac
  
@@ -758,10 +763,8 @@ print_config "POSIX pshared support" "$posix_pshared"
  
  ##########################################
  # POSIX pthread_condattr_setclock() probe
-if test "$pthread_condattr_setclock" != "yes" ; then
-  pthread_condattr_setclock="no"
-fi
-cat > $TMPC <<EOF
+if test "$pthread_condattr_setclock" != "no" ; then
+  cat > $TMPC <<EOF
  #include <pthread.h>
  int main(void)
  {
@@ -770,11 +773,12 @@ int main(void)
    return 0;
  }
  EOF
-if compile_prog "" "$LIBS" "pthread_condattr_setclock" ; then
-  pthread_condattr_setclock=yes
-elif compile_prog "" "$LIBS -lpthread" "pthread_condattr_setclock" ; then
-  pthread_condattr_setclock=yes
-  LIBS="$LIBS -lpthread"
+  if compile_prog "" "$LIBS" "pthread_condattr_setclock" ; then
+    pthread_condattr_setclock=yes
+  elif compile_prog "" "$LIBS -lpthread" "pthread_condattr_setclock" ; then
+    pthread_condattr_setclock=yes
+    LIBS="$LIBS -lpthread"
+  fi
  fi
  print_config "pthread_condattr_setclock()" "$pthread_condattr_setclock"
  
@@ -2222,6 +2226,33 @@ if test "$libnbd" != "no" ; then
  fi
  print_config "NBD engine" "$libnbd"
  
+##########################################
+# check for dfs (DAOS File System)
+if test "$dfs" != "no" ; then
+  cat > $TMPC << EOF
+#include <fcntl.h>
+#include <daos.h>
+#include <daos_fs.h>
+
+int main(int argc, char **argv)
+{
+  daos_handle_t        poh;
+  daos_handle_t        coh;
+  dfs_t                *dfs;
+
+  (void) dfs_mount(poh, coh, O_RDWR, &dfs);
+
+  return 0;
+}
+EOF
+  if compile_prog "" "-luuid -ldfs -ldaos" "dfs"; then
+    dfs="yes"
+  else
+    dfs="no"
+  fi
+fi
+print_config "DAOS File System (dfs) Engine" "$dfs"
+
  ##########################################
  # Check if we have lex/yacc available
  yacc="no"
@@ -2846,12 +2877,6 @@ fi
  if test "$clock_monotonic" = "yes" ; then
    output_sym "CONFIG_CLOCK_MONOTONIC"
  fi
-if test "$clock_monotonic_raw" = "yes" ; then
-  output_sym "CONFIG_CLOCK_MONOTONIC_RAW"
-fi
-if test "$clock_monotonic_precise" = "yes" ; then
-  output_sym "CONFIG_CLOCK_MONOTONIC_PRECISE"
-fi
  if test "$clockid_t" = "yes"; then
    output_sym "CONFIG_CLOCKID_T"
  fi
@@ -3046,6 +3071,9 @@ fi
  if test "$libcufile" = "yes" ; then
    output_sym "CONFIG_LIBCUFILE"
  fi
+if test "$dfs" = "yes" ; then
+  output_sym "CONFIG_DFS"
+fi
  if test "$march_set" = "no" && test "$build_native" = "yes" ; then
    output_sym "CONFIG_BUILD_NATIVE"
  fi
diff --git a/engines/dfs.c b/engines/dfs.c

new file mode 100644 (file)

index 0000000..0343b10
--- /dev/null
+++ b/engines/dfs.c
@@ -0,0 +1,583 @@
+/**
+ * FIO engine for DAOS File System (dfs).
+ *
+ * (C) Copyright 2020-2021 Intel Corporation.
+ */
+
+#include <fio.h>
+#include <optgroup.h>
+
+#include <daos.h>
+#include <daos_fs.h>
+
+static bool            daos_initialized;
+static int             num_threads;
+static pthread_mutex_t daos_mutex = PTHREAD_MUTEX_INITIALIZER;
+daos_handle_t          poh;  /* pool handle */
+daos_handle_t          coh;  /* container handle */
+daos_oclass_id_t       cid = OC_UNKNOWN;  /* object class */
+dfs_t                  *dfs; /* dfs mount reference */
+
+struct daos_iou {
+       struct io_u     *io_u;
+       daos_event_t    ev;
+       d_sg_list_t     sgl;
+       d_iov_t         iov;
+       daos_size_t     size;
+       bool            complete;
+};
+
+struct daos_data {
+       daos_handle_t   eqh;
+       dfs_obj_t       *obj;
+       struct io_u     **io_us;
+       int             queued;
+       int             num_ios;
+};
+
+struct daos_fio_options {
+       void            *pad;
+       char            *pool;   /* Pool UUID */
+       char            *cont;   /* Container UUID */
+       daos_size_t     chsz;    /* Chunk size */
+       char            *oclass; /* object class */
+#if !defined(DAOS_API_VERSION_MAJOR) || DAOS_API_VERSION_MAJOR < 1
+       char            *svcl;   /* service replica list, deprecated */
+#endif
+};
+
+static struct fio_option options[] = {
+       {
+               .name           = "pool",
+               .lname          = "pool uuid",
+               .type           = FIO_OPT_STR_STORE,
+               .off1           = offsetof(struct daos_fio_options, pool),
+               .help           = "DAOS pool uuid",
+               .category       = FIO_OPT_C_ENGINE,
+               .group          = FIO_OPT_G_DFS,
+       },
+       {
+               .name           = "cont",
+               .lname          = "container uuid",
+               .type           = FIO_OPT_STR_STORE,
+               .off1           = offsetof(struct daos_fio_options, cont),
+               .help           = "DAOS container uuid",
+               .category       = FIO_OPT_C_ENGINE,
+               .group          = FIO_OPT_G_DFS,
+       },
+       {
+               .name           = "chunk_size",
+               .lname          = "DFS chunk size",
+               .type           = FIO_OPT_ULL,
+               .off1           = offsetof(struct daos_fio_options, chsz),
+               .help           = "DFS chunk size in bytes",
+               .def            = "0", /* use container default */
+               .category       = FIO_OPT_C_ENGINE,
+               .group          = FIO_OPT_G_DFS,
+       },
+       {
+               .name           = "object_class",
+               .lname          = "object class",
+               .type           = FIO_OPT_STR_STORE,
+               .off1           = offsetof(struct daos_fio_options, oclass),
+               .help           = "DAOS object class",
+               .category       = FIO_OPT_C_ENGINE,
+               .group          = FIO_OPT_G_DFS,
+       },
+#if !defined(DAOS_API_VERSION_MAJOR) || DAOS_API_VERSION_MAJOR < 1
+       {
+               .name           = "svcl",
+               .lname          = "List of service ranks",
+               .type           = FIO_OPT_STR_STORE,
+               .off1           = offsetof(struct daos_fio_options, svcl),
+               .help           = "List of pool replicated service ranks",
+               .category       = FIO_OPT_C_ENGINE,
+               .group          = FIO_OPT_G_DFS,
+       },
+#endif
+       {
+               .name           = NULL,
+       },
+};
+
+static int daos_fio_global_init(struct thread_data *td)
+{
+       struct daos_fio_options *eo = td->eo;
+       uuid_t                  pool_uuid, co_uuid;
+       daos_pool_info_t        pool_info;
+       daos_cont_info_t        co_info;
+       int                     rc = 0;
+
+#if !defined(DAOS_API_VERSION_MAJOR) || DAOS_API_VERSION_MAJOR < 1
+       if (!eo->pool || !eo->cont || !eo->svcl) {
+#else
+       if (!eo->pool || !eo->cont) {
+#endif
+               log_err("Missing required DAOS options\n");
+               return EINVAL;
+       }
+
+       rc = daos_init();
+       if (rc != -DER_ALREADY && rc) {
+               log_err("Failed to initialize daos %d\n", rc);
+               td_verror(td, rc, "daos_init");
+               return rc;
+       }
+
+       rc = uuid_parse(eo->pool, pool_uuid);
+       if (rc) {
+               log_err("Failed to parse 'Pool uuid': %s\n", eo->pool);
+               td_verror(td, EINVAL, "uuid_parse(eo->pool)");
+               return EINVAL;
+       }
+
+       rc = uuid_parse(eo->cont, co_uuid);
+       if (rc) {
+               log_err("Failed to parse 'Cont uuid': %s\n", eo->cont);
+               td_verror(td, EINVAL, "uuid_parse(eo->cont)");
+               return EINVAL;
+       }
+
+       /* Connect to the DAOS pool */
+#if !defined(DAOS_API_VERSION_MAJOR) || DAOS_API_VERSION_MAJOR < 1
+       d_rank_list_t *svcl = NULL;
+
+       svcl = daos_rank_list_parse(eo->svcl, ":");
+       if (svcl == NULL) {
+               log_err("Failed to parse svcl\n");
+               td_verror(td, EINVAL, "daos_rank_list_parse");
+               return EINVAL;
+       }
+
+       rc = daos_pool_connect(pool_uuid, NULL, svcl, DAOS_PC_RW,
+                       &poh, &pool_info, NULL);
+       d_rank_list_free(svcl);
+#else
+       rc = daos_pool_connect(pool_uuid, NULL, DAOS_PC_RW, &poh, &pool_info,
+                              NULL);
+#endif
+       if (rc) {
+               log_err("Failed to connect to pool %d\n", rc);
+               td_verror(td, rc, "daos_pool_connect");
+               return rc;
+       }
+
+       /* Open the DAOS container */
+       rc = daos_cont_open(poh, co_uuid, DAOS_COO_RW, &coh, &co_info, NULL);
+       if (rc) {
+               log_err("Failed to open container: %d\n", rc);
+               td_verror(td, rc, "daos_cont_open");
+               (void)daos_pool_disconnect(poh, NULL);
+               return rc;
+       }
+
+       /* Mount encapsulated filesystem */
+       rc = dfs_mount(poh, coh, O_RDWR, &dfs);
+       if (rc) {
+               log_err("Failed to mount DFS namespace: %d\n", rc);
+               td_verror(td, rc, "dfs_mount");
+               (void)daos_pool_disconnect(poh, NULL);
+               (void)daos_cont_close(coh, NULL);
+               return rc;
+       }
+
+       /* Retrieve object class to use, if specified */
+       if (eo->oclass)
+               cid = daos_oclass_name2id(eo->oclass);
+
+       return 0;
+}
+
+static int daos_fio_global_cleanup()
+{
+       int rc;
+       int ret = 0;
+
+       rc = dfs_umount(dfs);
+       if (rc) {
+               log_err("failed to umount dfs: %d\n", rc);
+               ret = rc;
+       }
+       rc = daos_cont_close(coh, NULL);
+       if (rc) {
+               log_err("failed to close container: %d\n", rc);
+               if (ret == 0)
+                       ret = rc;
+       }
+       rc = daos_pool_disconnect(poh, NULL);
+       if (rc) {
+               log_err("failed to disconnect pool: %d\n", rc);
+               if (ret == 0)
+                       ret = rc;
+       }
+       rc = daos_fini();
+       if (rc) {
+               log_err("failed to finalize daos: %d\n", rc);
+               if (ret == 0)
+                       ret = rc;
+       }
+
+       return ret;
+}
+
+static int daos_fio_setup(struct thread_data *td)
+{
+       return 0;
+}
+
+static int daos_fio_init(struct thread_data *td)
+{
+       struct daos_data        *dd;
+       int                     rc = 0;
+
+       pthread_mutex_lock(&daos_mutex);
+
+       dd = malloc(sizeof(*dd));
+       if (dd == NULL) {
+               log_err("Failed to allocate DAOS-private data\n");
+               rc = ENOMEM;
+               goto out;
+       }
+
+       dd->queued      = 0;
+       dd->num_ios     = td->o.iodepth;
+       dd->io_us       = calloc(dd->num_ios, sizeof(struct io_u *));
+       if (dd->io_us == NULL) {
+               log_err("Failed to allocate IO queue\n");
+               rc = ENOMEM;
+               goto out;
+       }
+
+       /* initialize DAOS stack if not already up */
+       if (!daos_initialized) {
+               rc = daos_fio_global_init(td);
+               if (rc)
+                       goto out;
+               daos_initialized = true;
+       }
+
+       rc = daos_eq_create(&dd->eqh);
+       if (rc) {
+               log_err("Failed to create event queue: %d\n", rc);
+               td_verror(td, rc, "daos_eq_create");
+               goto out;
+       }
+
+       td->io_ops_data = dd;
+       num_threads++;
+out:
+       if (rc) {
+               if (dd) {
+                       free(dd->io_us);
+                       free(dd);
+               }
+               if (num_threads == 0 && daos_initialized) {
+                       /* don't clobber error return value */
+                       (void)daos_fio_global_cleanup();
+                       daos_initialized = false;
+               }
+       }
+       pthread_mutex_unlock(&daos_mutex);
+       return rc;
+}
+
+static void daos_fio_cleanup(struct thread_data *td)
+{
+       struct daos_data        *dd = td->io_ops_data;
+       int                     rc;
+
+       if (dd == NULL)
+               return;
+
+       rc = daos_eq_destroy(dd->eqh, DAOS_EQ_DESTROY_FORCE);
+       if (rc < 0) {
+               log_err("failed to destroy event queue: %d\n", rc);
+               td_verror(td, rc, "daos_eq_destroy");
+       }
+
+       free(dd->io_us);
+       free(dd);
+
+       pthread_mutex_lock(&daos_mutex);
+       num_threads--;
+       if (daos_initialized && num_threads == 0) {
+               int ret;
+
+               ret = daos_fio_global_cleanup();
+               if (ret < 0 && rc == 0) {
+                       log_err("failed to clean up: %d\n", ret);
+                       td_verror(td, ret, "daos_fio_global_cleanup");
+               }
+               daos_initialized = false;
+       }
+       pthread_mutex_unlock(&daos_mutex);
+}
+
+static int daos_fio_get_file_size(struct thread_data *td, struct fio_file *f)
+{
+       char            *file_name = f->file_name;
+       struct stat     stbuf = {0};
+       int             rc;
+
+       dprint(FD_FILE, "dfs stat %s\n", f->file_name);
+
+       if (!daos_initialized)
+               return 0;
+
+       rc = dfs_stat(dfs, NULL, file_name, &stbuf);
+       if (rc) {
+               log_err("Failed to stat %s: %d\n", f->file_name, rc);
+               td_verror(td, rc, "dfs_stat");
+               return rc;
+       }
+
+       f->real_file_size = stbuf.st_size;
+       return 0;
+}
+
+static int daos_fio_close(struct thread_data *td, struct fio_file *f)
+{
+       struct daos_data        *dd = td->io_ops_data;
+       int                     rc;
+
+       dprint(FD_FILE, "dfs release %s\n", f->file_name);
+
+       rc = dfs_release(dd->obj);
+       if (rc) {
+               log_err("Failed to release %s: %d\n", f->file_name, rc);
+               td_verror(td, rc, "dfs_release");
+               return rc;
+       }
+
+       return 0;
+}
+
+static int daos_fio_open(struct thread_data *td, struct fio_file *f)
+{
+       struct daos_data        *dd = td->io_ops_data;
+       struct daos_fio_options *eo = td->eo;
+       int                     flags = 0;
+       int                     rc;
+
+       dprint(FD_FILE, "dfs open %s (%s/%d/%d)\n",
+              f->file_name, td_write(td) & !read_only ? "rw" : "r",
+              td->o.create_on_open, td->o.allow_create);
+
+       if (td->o.create_on_open && td->o.allow_create)
+               flags |= O_CREAT;
+
+       if (td_write(td)) {
+               if (!read_only)
+                       flags |= O_RDWR;
+               if (td->o.allow_create)
+                       flags |= O_CREAT;
+       } else if (td_read(td)) {
+               flags |= O_RDONLY;
+       }
+
+       rc = dfs_open(dfs, NULL, f->file_name,
+                     S_IFREG | S_IRUSR | S_IWUSR,
+                     flags, cid, eo->chsz, NULL, &dd->obj);
+       if (rc) {
+               log_err("Failed to open %s: %d\n", f->file_name, rc);
+               td_verror(td, rc, "dfs_open");
+               return rc;
+       }
+
+       return 0;
+}
+
+static int daos_fio_unlink(struct thread_data *td, struct fio_file *f)
+{
+       int rc;
+
+       dprint(FD_FILE, "dfs remove %s\n", f->file_name);
+
+       rc = dfs_remove(dfs, NULL, f->file_name, false, NULL);
+       if (rc) {
+               log_err("Failed to remove %s: %d\n", f->file_name, rc);
+               td_verror(td, rc, "dfs_remove");
+               return rc;
+       }
+
+       return 0;
+}
+
+static int daos_fio_invalidate(struct thread_data *td, struct fio_file *f)
+{
+       dprint(FD_FILE, "dfs invalidate %s\n", f->file_name);
+       return 0;
+}
+
+static void daos_fio_io_u_free(struct thread_data *td, struct io_u *io_u)
+{
+       struct daos_iou *io = io_u->engine_data;
+
+       if (io) {
+               io_u->engine_data = NULL;
+               free(io);
+       }
+}
+
+static int daos_fio_io_u_init(struct thread_data *td, struct io_u *io_u)
+{
+       struct daos_iou *io;
+
+       io = malloc(sizeof(struct daos_iou));
+       if (!io) {
+               td_verror(td, ENOMEM, "malloc");
+               return ENOMEM;
+       }
+       io->io_u = io_u;
+       io_u->engine_data = io;
+       return 0;
+}
+
+static struct io_u * daos_fio_event(struct thread_data *td, int event)
+{
+       struct daos_data *dd = td->io_ops_data;
+
+       return dd->io_us[event];
+}
+
+static int daos_fio_getevents(struct thread_data *td, unsigned int min,
+                             unsigned int max, const struct timespec *t)
+{
+       struct daos_data        *dd = td->io_ops_data;
+       daos_event_t            *evp[max];
+       unsigned int            events = 0;
+       int                     i;
+       int                     rc;
+
+       while (events < min) {
+               rc = daos_eq_poll(dd->eqh, 0, DAOS_EQ_NOWAIT, max, evp);
+               if (rc < 0) {
+                       log_err("Event poll failed: %d\n", rc);
+                       td_verror(td, rc, "daos_eq_poll");
+                       return events;
+               }
+
+               for (i = 0; i < rc; i++) {
+                       struct daos_iou *io;
+                       struct io_u     *io_u;
+
+                       io = container_of(evp[i], struct daos_iou, ev);
+                       if (io->complete)
+                               log_err("Completion on already completed I/O\n");
+
+                       io_u = io->io_u;
+                       if (io->ev.ev_error)
+                               io_u->error = io->ev.ev_error;
+                       else
+                               io_u->resid = 0;
+
+                       dd->io_us[events] = io_u;
+                       dd->queued--;
+                       daos_event_fini(&io->ev);
+                       io->complete = true;
+                       events++;
+               }
+       }
+
+       dprint(FD_IO, "dfs eq_pool returning %d (%u/%u)\n", events, min, max);
+
+       return events;
+}
+
+static enum fio_q_status daos_fio_queue(struct thread_data *td,
+                                       struct io_u *io_u)
+{
+       struct daos_data        *dd = td->io_ops_data;
+       struct daos_iou         *io = io_u->engine_data;
+       daos_off_t              offset = io_u->offset;
+       int                     rc;
+
+       if (dd->queued == td->o.iodepth)
+               return FIO_Q_BUSY;
+
+       io->sgl.sg_nr = 1;
+       io->sgl.sg_nr_out = 0;
+       d_iov_set(&io->iov, io_u->xfer_buf, io_u->xfer_buflen);
+       io->sgl.sg_iovs = &io->iov;
+       io->size = io_u->xfer_buflen;
+
+       io->complete = false;
+       rc = daos_event_init(&io->ev, dd->eqh, NULL);
+       if (rc) {
+               log_err("Event init failed: %d\n", rc);
+               io_u->error = rc;
+               return FIO_Q_COMPLETED;
+       }
+
+       switch (io_u->ddir) {
+       case DDIR_WRITE:
+               rc = dfs_write(dfs, dd->obj, &io->sgl, offset, &io->ev);
+               if (rc) {
+                       log_err("dfs_write failed: %d\n", rc);
+                       io_u->error = rc;
+                       return FIO_Q_COMPLETED;
+               }
+               break;
+       case DDIR_READ:
+               rc = dfs_read(dfs, dd->obj, &io->sgl, offset, &io->size,
+                             &io->ev);
+               if (rc) {
+                       log_err("dfs_read failed: %d\n", rc);
+                       io_u->error = rc;
+                       return FIO_Q_COMPLETED;
+               }
+               break;
+       case DDIR_SYNC:
+               io_u->error = 0;
+               return FIO_Q_COMPLETED;
+       default:
+               dprint(FD_IO, "Invalid IO type: %d\n", io_u->ddir);
+               io_u->error = -DER_INVAL;
+               return FIO_Q_COMPLETED;
+       }
+
+       dd->queued++;
+       return FIO_Q_QUEUED;
+}
+
+static int daos_fio_prep(struct thread_data fio_unused *td, struct io_u *io_u)
+{
+       return 0;
+}
+
+/* ioengine_ops for get_ioengine() */
+FIO_STATIC struct ioengine_ops ioengine = {
+       .name                   = "dfs",
+       .version                = FIO_IOOPS_VERSION,
+       .flags                  = FIO_DISKLESSIO | FIO_NODISKUTIL,
+
+       .setup                  = daos_fio_setup,
+       .init                   = daos_fio_init,
+       .prep                   = daos_fio_prep,
+       .cleanup                = daos_fio_cleanup,
+
+       .open_file              = daos_fio_open,
+       .invalidate             = daos_fio_invalidate,
+       .get_file_size          = daos_fio_get_file_size,
+       .close_file             = daos_fio_close,
+       .unlink_file            = daos_fio_unlink,
+
+       .queue                  = daos_fio_queue,
+       .getevents              = daos_fio_getevents,
+       .event                  = daos_fio_event,
+       .io_u_init              = daos_fio_io_u_init,
+       .io_u_free              = daos_fio_io_u_free,
+
+       .option_struct_size     = sizeof(struct daos_fio_options),
+       .options                = options,
+};
+
+static void fio_init fio_dfs_register(void)
+{
+       register_ioengine(&ioengine);
+}
+
+static void fio_exit fio_dfs_unregister(void)
+{
+       unregister_ioengine(&ioengine);
+}
diff --git a/engines/falloc.c b/engines/falloc.c

index 6382569b9a92c42c1ad5d76a75d9441808de6cc8..4b05ed68fb467263ddbebabe7e79dd5231f92b63 100644 (file)
--- a/engines/falloc.c
+++ b/engines/falloc.c
@@ -25,8 +25,8 @@ static int open_file(struct thread_data *td, struct fio_file *f)
  
         dprint(FD_FILE, "fd open %s\n", f->file_name);
  
-       if (f->filetype != FIO_TYPE_FILE) {
-               log_err("fio: only files are supported fallocate \n");
+       if (f->filetype != FIO_TYPE_FILE && f->filetype != FIO_TYPE_BLOCK) {
+               log_err("fio: only files and blockdev are supported fallocate \n");
                 return 1;
         }
         if (!strcmp(f->file_name, "-")) {
diff --git a/engines/filecreate.c b/engines/filecreate.c

index 5fec8544a6a79898fed4e6c4854ffd416234c66a..16c64928162654ad76c6697958b846eb5bb0d985 100644 (file)
--- a/engines/filecreate.c
+++ b/engines/filecreate.c
@@ -22,7 +22,7 @@ static int open_file(struct thread_data *td, struct fio_file *f)
         dprint(FD_FILE, "fd open %s\n", f->file_name);
  
         if (f->filetype != FIO_TYPE_FILE) {
-               log_err("fio: only files are supported fallocate \n");
+               log_err("fio: only files are supported\n");
                 return 1;
         }
         if (!strcmp(f->file_name, "-")) {
diff --git a/engines/io_uring.c b/engines/io_uring.c

index 9ce2ae80df59f54dea66630b1182bc7b5fec35e3..b962e8041b6f8d113669b4b2a31224a68d19aa0f 100644 (file)
--- a/engines/io_uring.c
+++ b/engines/io_uring.c
@@ -696,7 +696,11 @@ static int fio_ioring_post_init(struct thread_data *td)
  
         err = fio_ioring_queue_init(td);
         if (err) {
-               td_verror(td, errno, "io_queue_init");
+               int init_err = errno;
+
+               if (init_err == ENOSYS)
+                       log_err("fio: your kernel doesn't support io_uring\n");
+               td_verror(td, init_err, "io_queue_init");
                 return 1;
         }
  
diff --git a/engines/libpmem.c b/engines/libpmem.c

index eefb7767f3eab953f202236a395b096ef8f67636..2338f0fa24f42b4f8318ca9a4bc299283e28c8bf 100644 (file)
--- a/engines/libpmem.c
+++ b/engines/libpmem.c
@@ -255,6 +255,7 @@ FIO_STATIC struct ioengine_ops ioengine = {
         .open_file      = fio_libpmem_open_file,
         .close_file     = fio_libpmem_close_file,
         .get_file_size  = generic_get_file_size,
+       .prepopulate_file = generic_prepopulate_file,
         .flags          = FIO_SYNCIO | FIO_RAWIO | FIO_DISKLESSIO | FIO_NOEXTEND |
                                 FIO_NODISKUTIL | FIO_BARRIER | FIO_MEMALIGN,
  };
diff --git a/examples/dfs.fio b/examples/dfs.fio

new file mode 100644 (file)

index 0000000..5de887d
--- /dev/null
+++ b/examples/dfs.fio
@@ -0,0 +1,33 @@
+[global]
+ioengine=dfs
+pool=${POOL}
+cont=${CONT}
+filename_format=fio-test.$jobnum
+
+cpus_allowed_policy=split
+group_reporting=1
+time_based=0
+percentile_list=99.0:99.9:99.99:99.999:99.9999:100
+disable_slat=1
+disable_clat=1
+
+bs=1M
+size=100G
+iodepth=16
+numjobs=16
+
+[daos-seqwrite]
+rw=write
+stonewall
+
+[daos-seqread]
+rw=read
+stonewall
+
+[daos-randwrite]
+rw=randwrite
+stonewall
+
+[daos-randread]
+rw=randread
+stonewall
diff --git a/file.h b/file.h

index 493ec04a6677e90f6c0d0182e0b082b31302451f..faf65a2a013846fb716f0b80403d6549b90120d2 100644 (file)
--- a/file.h
+++ b/file.h
@@ -207,6 +207,7 @@ extern "C" {
  extern int __must_check generic_open_file(struct thread_data *, struct fio_file *);
  extern int __must_check generic_close_file(struct thread_data *, struct fio_file *);
  extern int __must_check generic_get_file_size(struct thread_data *, struct fio_file *);
+extern int __must_check generic_prepopulate_file(struct thread_data *, struct fio_file *);
  #ifdef __cplusplus
  }
  #endif
diff --git a/filesetup.c b/filesetup.c

index 9d0337578d2266355b8ff478c5153cff88c26d77..e664f8b42f795f4d03675e437870382a0243b13b 100644 (file)
--- a/filesetup.c
+++ b/filesetup.c
@@ -338,6 +338,95 @@ error:
         return ret;
  }
  
+/*
+ * Generic function to prepopulate regular file with data.
+ * Useful if you want to make sure I/O engine has data to read.
+ * Leaves f->fd open on success, caller must close.
+ */
+int generic_prepopulate_file(struct thread_data *td, struct fio_file *f)
+{
+       int flags;
+       unsigned long long left, bs;
+       char *b = NULL;
+
+       /* generic function for regular files only */
+       assert(f->filetype == FIO_TYPE_FILE);
+
+       if (read_only) {
+               log_err("fio: refusing to write a file due to read-only\n");
+               return 0;
+       }
+
+       flags = O_WRONLY;
+       if (td->o.allow_create)
+               flags |= O_CREAT;
+
+#ifdef WIN32
+       flags |= _O_BINARY;
+#endif
+
+       dprint(FD_FILE, "open file %s, flags %x\n", f->file_name, flags);
+       f->fd = open(f->file_name, flags, 0644);
+       if (f->fd < 0) {
+               int err = errno;
+
+               if (err == ENOENT && !td->o.allow_create)
+                       log_err("fio: file creation disallowed by "
+                                       "allow_file_create=0\n");
+               else
+                       td_verror(td, err, "open");
+               return 1;
+       }
+
+       left = f->real_file_size;
+       bs = td->o.max_bs[DDIR_WRITE];
+       if (bs > left)
+               bs = left;
+
+       b = malloc(bs);
+       if (!b) {
+               td_verror(td, errno, "malloc");
+               goto err;
+       }
+
+       while (left && !td->terminate) {
+               ssize_t r;
+
+               if (bs > left)
+                       bs = left;
+
+               fill_io_buffer(td, b, bs, bs);
+
+               r = write(f->fd, b, bs);
+
+               if (r > 0) {
+                       left -= r;
+               } else {
+                       td_verror(td, errno, "write");
+                       goto err;
+               }
+       }
+
+       if (td->terminate) {
+               dprint(FD_FILE, "terminate unlink %s\n", f->file_name);
+               td_io_unlink_file(td, f);
+       } else if (td->o.create_fsync) {
+               if (fsync(f->fd) < 0) {
+                       td_verror(td, errno, "fsync");
+                       goto err;
+               }
+       }
+
+       free(b);
+       return 0;
+err:
+       close(f->fd);
+       f->fd = -1;
+       if (b)
+               free(b);
+       return 1;
+}
+
  unsigned long long get_rand_file_size(struct thread_data *td)
  {
         unsigned long long ret, sized;
@@ -1029,6 +1118,13 @@ int setup_files(struct thread_data *td)
         if (o->read_iolog_file)
                 goto done;
  
+       if (td->o.zone_mode == ZONE_MODE_ZBD) {
+               err = zbd_init_files(td);
+               if (err)
+                       goto err_out;
+       }
+       zbd_recalc_options_with_zone_granularity(td);
+
         /*
          * check sizes. if the files/devices do not exist and the size
          * isn't passed to fio, abort.
@@ -1254,6 +1350,43 @@ int setup_files(struct thread_data *td)
                 temp_stall_ts = 0;
         }
  
+       if (err)
+               goto err_out;
+
+       /*
+        * Prepopulate files with data. It might be expected to read some
+        * "real" data instead of zero'ed files (if no writes to file occurred
+        * prior to a read job). Engine has to provide a way to do that.
+        */
+       if (td->io_ops->prepopulate_file) {
+               temp_stall_ts = 1;
+
+               for_each_file(td, f, i) {
+                       if (output_format & FIO_OUTPUT_NORMAL) {
+                               log_info("%s: Prepopulating IO file (%s)\n",
+                                                       o->name, f->file_name);
+                       }
+
+                       err = td->io_ops->prepopulate_file(td, f);
+                       if (err)
+                               break;
+
+                       err = __file_invalidate_cache(td, f, f->file_offset,
+                                                               f->io_size);
+
+                       /*
+                        * Shut up static checker
+                        */
+                       if (f->fd != -1)
+                               close(f->fd);
+
+                       f->fd = -1;
+                       if (err)
+                               break;
+               }
+               temp_stall_ts = 0;
+       }
+
         if (err)
                 goto err_out;
  
@@ -1269,16 +1402,17 @@ int setup_files(struct thread_data *td)
         }
  
  done:
-       if (o->create_only)
-               td->done = 1;
-
-       td_restore_runstate(td, old_state);
-
         if (td->o.zone_mode == ZONE_MODE_ZBD) {
                 err = zbd_setup_files(td);
                 if (err)
                         goto err_out;
         }
+
+       if (o->create_only)
+               td->done = 1;
+
+       td_restore_runstate(td, old_state);
+
         return 0;
  
  err_offset:
diff --git a/fio.1 b/fio.1

index e190c241836b743f4875e104d4ae659890e523c5..231e2e286a6c7c1df5426bedf73eb79559cb4f49 100644 (file)
--- a/fio.1
+++ b/fio.1
@@ -348,6 +348,9 @@ us or usec means microseconds
  .PD
  .RE
  .P
+`z' suffix specifies that the value is measured in zones.
+Value is recalculated once block device's zone size becomes known.
+.P
  If the option accepts an upper and lower range, use a colon ':' or
  minus '\-' to separate such values. See \fIirange\fR parameter type.
  If the lower value specified happens to be larger than the upper value
@@ -783,7 +786,7 @@ If not specified it defaults to the zone size. If the target device is a zoned
  block device, the zone capacity is obtained from the device information and this
  option is ignored.
  .TP
-.BI zoneskip \fR=\fPint
+.BI zoneskip \fR=\fPint[z]
  For \fBzonemode\fR=strided, the number of bytes to skip after \fBzonesize\fR
  bytes of data have been transferred.
  
@@ -1033,7 +1036,7 @@ The values are all relative to each other, and no absolute meaning
  should be associated with them.
  .RE
  .TP
-.BI offset \fR=\fPint
+.BI offset \fR=\fPint[%|z]
  Start I/O at the provided offset in the file, given as either a fixed size in
  bytes or a percentage. If a percentage is given, the generated offset will be
  aligned to the minimum \fBblocksize\fR or to the value of \fBoffset_align\fR if
@@ -1048,7 +1051,7 @@ If set to non-zero value, the byte offset generated by a percentage \fBoffset\fR
  is aligned upwards to this value. Defaults to 0 meaning that a percentage
  offset is aligned to the minimum block size.
  .TP
-.BI offset_increment \fR=\fPint
+.BI offset_increment \fR=\fPint[%|z]
  If this is provided, then the real offset becomes `\fBoffset\fR + \fBoffset_increment\fR
  * thread_number', where the thread number is a counter that starts at 0 and
  is incremented for each sub-job (i.e. when \fBnumjobs\fR option is
@@ -1570,7 +1573,7 @@ Pin the specified amount of memory with \fBmlock\fR\|(2). Can be used to
  simulate a smaller amount of memory. The amount specified is per worker.
  .SS "I/O size"
  .TP
-.BI size \fR=\fPint
+.BI size \fR=\fPint[%|z]
  The total size of file I/O for each thread of this job. Fio will run until
  this many bytes has been transferred, unless runtime is limited by other options
  (such as \fBruntime\fR, for instance, or increased/decreased by \fBio_size\fR).
@@ -1585,7 +1588,7 @@ given, fio will use 20% of the full size of the given files or devices.
  Can be combined with \fBoffset\fR to constrain the start and end range
  that I/O will be done within.
  .TP
-.BI io_size \fR=\fPint "\fR,\fB io_limit" \fR=\fPint
+.BI io_size \fR=\fPint[%|z] "\fR,\fB io_limit" \fR=\fPint[%|z]
  Normally fio operates within the region set by \fBsize\fR, which means
  that the \fBsize\fR option sets both the region and size of I/O to be
  performed. Sometimes that is not what you want. With this option, it is
@@ -1853,6 +1856,10 @@ GPUDirect Storage-supported filesystem. This engine performs
  I/O without transferring buffers between user-space and the kernel,
  unless \fBverify\fR is set or \fBcuda_io\fR is \fBposix\fR. \fBiomem\fR must
  not be \fBcudamalloc\fR. This ioengine defines engine specific options.
+.TP
+.B dfs
+I/O engine supporting asynchronous read and write operations to the DAOS File
+System (DFS) via libdfs.
  .SS "I/O engine specific parameters"
  In addition, there are some parameters which are only valid when a specific
  \fBioengine\fR is in use. These are used identically to normal parameters,
@@ -2212,6 +2219,20 @@ from RAM to GPU after a read. \fBverify\fR does not affect
  the use of cudaMemcpy.
  .RE
  .RE
+.TP
+.BI (dfs)pool
+Specify the UUID of the DAOS pool to connect to.
+.TP
+.BI (dfs)cont
+Specify the UUID of the DAOS DAOS container to open.
+.TP
+.BI (dfs)chunk_size
+Specificy a different chunk size (in bytes) for the dfs file.
+Use DAOS container's chunk size by default.
+.TP
+.BI (dfs)object_class
+Specificy a different object class for the dfs file.
+Use DAOS container's object class by default.
  .SS "I/O depth"
  .TP
  .BI iodepth \fR=\fPint
@@ -2406,10 +2427,11 @@ Used with \fBlatency_target\fR. If false (default), fio will find the highest
  queue depth that meets \fBlatency_target\fR and exit. If true, fio will continue
  running and try to meet \fBlatency_target\fR by adjusting queue depth.
  .TP
-.BI max_latency \fR=\fPtime
+.BI max_latency \fR=\fPtime[,time][,time]
  If set, fio will exit the job with an ETIMEDOUT error if it exceeds this
  maximum latency. When the unit is omitted, the value is interpreted in
-microseconds.
+microseconds. Comma-separated values may be specified for reads, writes,
+and trims as described in \fBblocksize\fR.
  .TP
  .BI rate_cycle \fR=\fPint
  Average bandwidth for \fBrate\fR and \fBrate_min\fR over this number
diff --git a/init.c b/init.c

index eea6e54692b177036dce001134f8ed1baeb62ca8..37bff8763cbb5bf924f3a3fc768f3352fbe10cf6 100644 (file)
--- a/init.c
+++ b/init.c
@@ -448,19 +448,6 @@ static void dump_opt_list(struct thread_data *td)
         }
  }
  
-static void fio_dump_options_free(struct thread_data *td)
-{
-       while (!flist_empty(&td->opt_list)) {
-               struct print_option *p;
-
-               p = flist_first_entry(&td->opt_list, struct print_option, list);
-               flist_del_init(&p->list);
-               free(p->name);
-               free(p->value);
-               free(p);
-       }
-}
-
  static void copy_opt_list(struct thread_data *dst, struct thread_data *src)
  {
         struct flist_head *entry;
@@ -961,7 +948,9 @@ static int fixup_options(struct thread_data *td)
         /*
          * Fix these up to be nsec internally
          */
-       o->max_latency *= 1000ULL;
+       for_each_rw_ddir(ddir)
+               o->max_latency[ddir] *= 1000ULL;
+
         o->latency_target *= 1000ULL;
  
         return ret;
diff --git a/io_u.c b/io_u.c

index 00a219c2e85922906dd859583ef6f1ae31ad29c0..b421a579bd0a1aaa594692a21731a2774de77cea 100644 (file)
--- a/io_u.c
+++ b/io_u.c
@@ -1389,11 +1389,16 @@ static long set_io_u_file(struct thread_data *td, struct io_u *io_u)
         return 0;
  }
  
-static void lat_fatal(struct thread_data *td, struct io_completion_data *icd,
+static void lat_fatal(struct thread_data *td, struct io_u *io_u, struct io_completion_data *icd,
                       unsigned long long tnsec, unsigned long long max_nsec)
  {
-       if (!td->error)
-               log_err("fio: latency of %llu nsec exceeds specified max (%llu nsec)\n", tnsec, max_nsec);
+       if (!td->error) {
+               log_err("fio: latency of %llu nsec exceeds specified max (%llu nsec): %s %s %llu %llu\n",
+                                       tnsec, max_nsec,
+                                       io_u->file->file_name,
+                                       io_ddir_name(io_u->ddir),
+                                       io_u->offset, io_u->buflen);
+       }
         td_verror(td, ETIMEDOUT, "max latency exceeded");
         icd->error = ETIMEDOUT;
  }
@@ -1888,11 +1893,13 @@ static void account_io_completion(struct thread_data *td, struct io_u *io_u,
                                 icd->error = ops->io_u_lat(td, tnsec);
                 }
  
-               if (td->o.max_latency && tnsec > td->o.max_latency)
-                       lat_fatal(td, icd, tnsec, td->o.max_latency);
-               if (td->o.latency_target && tnsec > td->o.latency_target) {
-                       if (lat_target_failed(td))
-                               lat_fatal(td, icd, tnsec, td->o.latency_target);
+               if (ddir_rw(idx)) {
+                       if (td->o.max_latency[idx] && tnsec > td->o.max_latency[idx])
+                               lat_fatal(td, io_u, icd, tnsec, td->o.max_latency[idx]);
+                       if (td->o.latency_target && tnsec > td->o.latency_target) {
+                               if (lat_target_failed(td))
+                                       lat_fatal(td, io_u, icd, tnsec, td->o.latency_target);
+                       }
                 }
         }
  
diff --git a/ioengines.h b/ioengines.h

index 839b318da60464eb5d78d503ca930559c2548c41..1d01ab0a6d1345a71b04b0576f40fefca20062b9 100644 (file)
--- a/ioengines.h
+++ b/ioengines.h
@@ -8,7 +8,7 @@
  #include "io_u.h"
  #include "zbd_types.h"
  
-#define FIO_IOOPS_VERSION      28
+#define FIO_IOOPS_VERSION      29
  
  #ifndef CONFIG_DYNAMIC_ENGINES
  #define FIO_STATIC     static
@@ -47,6 +47,7 @@ struct ioengine_ops {
         int (*invalidate)(struct thread_data *, struct fio_file *);
         int (*unlink_file)(struct thread_data *, struct fio_file *);
         int (*get_file_size)(struct thread_data *, struct fio_file *);
+       int (*prepopulate_file)(struct thread_data *, struct fio_file *);
         void (*terminate)(struct thread_data *);
         int (*iomem_alloc)(struct thread_data *, size_t);
         void (*iomem_free)(struct thread_data *);
diff --git a/iolog.c b/iolog.c

index fa40c8572664a4e14d1e53d39eae3547dc43b672..cf264916a9ecd83f7b3079bd186573441dba65fb 100644 (file)
--- a/iolog.c
+++ b/iolog.c
@@ -607,12 +607,11 @@ static int open_socket(const char *path)
  /*
   * open iolog, check version, and call appropriate parser
   */
-static bool init_iolog_read(struct thread_data *td)
+static bool init_iolog_read(struct thread_data *td, char *fname)
  {
-       char buffer[256], *p, *fname;
+       char buffer[256], *p;
         FILE *f = NULL;
  
-       fname = get_name_by_idx(td->o.read_iolog_file, td->subjob_number);
         dprint(FD_IO, "iolog: name=%s\n", fname);
  
         if (is_socket(fname)) {
@@ -701,15 +700,16 @@ bool init_iolog(struct thread_data *td)
  
         if (td->o.read_iolog_file) {
                 int need_swap;
+               char * fname = get_name_by_idx(td->o.read_iolog_file, td->subjob_number);
  
                 /*
                  * Check if it's a blktrace file and load that if possible.
                  * Otherwise assume it's a normal log file and load that.
                  */
-               if (is_blktrace(td->o.read_iolog_file, &need_swap))
-                       ret = load_blktrace(td, td->o.read_iolog_file, need_swap);
+               if (is_blktrace(fname, &need_swap))
+                       ret = load_blktrace(td, fname, need_swap);
                 else
-                       ret = init_iolog_read(td);
+                       ret = init_iolog_read(td, fname);
         } else if (td->o.write_iolog_file)
                 ret = init_iolog_write(td);
         else
diff --git a/optgroup.c b/optgroup.c

index 5b6d22a3145430b0949a2cab69fff23949c36a28..15a16229ef9a5d02524cb478a74ba080ecc911e0 100644 (file)
--- a/optgroup.c
+++ b/optgroup.c
@@ -181,6 +181,10 @@ static const struct opt_group fio_opt_cat_groups[] = {
                 .name   = "libcufile I/O engine", /* libcufile */
                 .mask   = FIO_OPT_G_LIBCUFILE,
         },
+       {
+               .name   = "DAOS File System (dfs) I/O engine", /* dfs */
+               .mask   = FIO_OPT_G_DFS,
+       },
         {
                 .name   = NULL,
         },
diff --git a/optgroup.h b/optgroup.h

index 641547d271f6e632cbc45d558afaf58c8cd25b67..ff74862968e4335a1bb064d499c08441a54b16ac 100644 (file)
--- a/optgroup.h
+++ b/optgroup.h
@@ -69,6 +69,7 @@ enum opt_category_group {
         __FIO_OPT_G_FILESTAT,
         __FIO_OPT_G_NR,
         __FIO_OPT_G_LIBCUFILE,
+       __FIO_OPT_G_DFS,
  
         FIO_OPT_G_RATE          = (1ULL << __FIO_OPT_G_RATE),
         FIO_OPT_G_ZONE          = (1ULL << __FIO_OPT_G_ZONE),
@@ -112,6 +113,7 @@ enum opt_category_group {
         FIO_OPT_G_IOURING       = (1ULL << __FIO_OPT_G_IOURING),
         FIO_OPT_G_FILESTAT      = (1ULL << __FIO_OPT_G_FILESTAT),
         FIO_OPT_G_LIBCUFILE     = (1ULL << __FIO_OPT_G_LIBCUFILE),
+       FIO_OPT_G_DFS           = (1ULL << __FIO_OPT_G_DFS),
  };
  
  extern const struct opt_group *opt_group_from_mask(uint64_t *mask);
diff --git a/options.c b/options.c

index 625112c5345dbb219fb06cdd0aa1e59e9ee92a6c..22a3e421c6c632a9435dfc719d4eb617705861a9 100644 (file)
--- a/options.c
+++ b/options.c
@@ -1471,8 +1471,13 @@ static int str_offset_cb(void *data, unsigned long long *__val)
         if (parse_is_percent(v)) {
                 td->o.start_offset = 0;
                 td->o.start_offset_percent = -1ULL - v;
+               td->o.start_offset_nz = 0;
                 dprint(FD_PARSE, "SET start_offset_percent %d\n",
                                         td->o.start_offset_percent);
+       } else if (parse_is_zone(v)) {
+               td->o.start_offset = 0;
+               td->o.start_offset_percent = 0;
+               td->o.start_offset_nz = v - ZONE_BASE_VAL;
         } else
                 td->o.start_offset = v;
  
@@ -1487,8 +1492,13 @@ static int str_offset_increment_cb(void *data, unsigned long long *__val)
         if (parse_is_percent(v)) {
                 td->o.offset_increment = 0;
                 td->o.offset_increment_percent = -1ULL - v;
+               td->o.offset_increment_nz = 0;
                 dprint(FD_PARSE, "SET offset_increment_percent %d\n",
                                         td->o.offset_increment_percent);
+       } else if (parse_is_zone(v)) {
+               td->o.offset_increment = 0;
+               td->o.offset_increment_percent = 0;
+               td->o.offset_increment_nz = v - ZONE_BASE_VAL;
         } else
                 td->o.offset_increment = v;
  
@@ -1505,6 +1515,10 @@ static int str_size_cb(void *data, unsigned long long *__val)
                 td->o.size_percent = -1ULL - v;
                 dprint(FD_PARSE, "SET size_percent %d\n",
                                         td->o.size_percent);
+       } else if (parse_is_zone(v)) {
+               td->o.size = 0;
+               td->o.size_percent = 0;
+               td->o.size_nz = v - ZONE_BASE_VAL;
         } else
                 td->o.size = v;
  
@@ -1525,12 +1539,30 @@ static int str_io_size_cb(void *data, unsigned long long *__val)
                 }
                 dprint(FD_PARSE, "SET io_size_percent %d\n",
                                         td->o.io_size_percent);
+       } else if (parse_is_zone(v)) {
+               td->o.io_size = 0;
+               td->o.io_size_percent = 0;
+               td->o.io_size_nz = v - ZONE_BASE_VAL;
         } else
                 td->o.io_size = v;
  
         return 0;
  }
  
+static int str_zoneskip_cb(void *data, unsigned long long *__val)
+{
+       struct thread_data *td = cb_data_to_td(data);
+       unsigned long long v = *__val;
+
+       if (parse_is_zone(v)) {
+               td->o.zone_skip = 0;
+               td->o.zone_skip_nz = v - ZONE_BASE_VAL;
+       } else
+               td->o.zone_skip = v;
+
+       return 0;
+}
+
  static int str_write_bw_log_cb(void *data, const char *str)
  {
         struct thread_data *td = cb_data_to_td(data);
@@ -1989,6 +2021,11 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                           { .ival = "nbd",
                             .help = "Network Block Device (NBD) IO engine"
                           },
+#ifdef CONFIG_DFS
+                         { .ival = "dfs",
+                           .help = "DAOS File System (dfs) IO engine",
+                         },
+#endif
                 },
         },
         {
@@ -2091,11 +2128,10 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
         {
                 .name   = "size",
                 .lname  = "Size",
-               .type   = FIO_OPT_STR_VAL,
+               .type   = FIO_OPT_STR_VAL_ZONE,
                 .cb     = str_size_cb,
                 .off1   = offsetof(struct thread_options, size),
                 .help   = "Total size of device or files",
-               .interval = 1024 * 1024,
                 .category = FIO_OPT_C_IO,
                 .group  = FIO_OPT_G_INVALID,
         },
@@ -2103,11 +2139,10 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "io_size",
                 .alias  = "io_limit",
                 .lname  = "IO Size",
-               .type   = FIO_OPT_STR_VAL,
+               .type   = FIO_OPT_STR_VAL_ZONE,
                 .cb     = str_io_size_cb,
                 .off1   = offsetof(struct thread_options, io_size),
                 .help   = "Total size of I/O to be performed",
-               .interval = 1024 * 1024,
                 .category = FIO_OPT_C_IO,
                 .group  = FIO_OPT_G_INVALID,
         },
@@ -2148,12 +2183,11 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                 .name   = "offset",
                 .lname  = "IO offset",
                 .alias  = "fileoffset",
-               .type   = FIO_OPT_STR_VAL,
+               .type   = FIO_OPT_STR_VAL_ZONE,
                 .cb     = str_offset_cb,
                 .off1   = offsetof(struct thread_options, start_offset),
                 .help   = "Start IO from this offset",
                 .def    = "0",
-               .interval = 1024 * 1024,
                 .category = FIO_OPT_C_IO,
                 .group  = FIO_OPT_G_INVALID,
         },
@@ -2171,14 +2205,13 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
         {
                 .name   = "offset_increment",
                 .lname  = "IO offset increment",
-               .type   = FIO_OPT_STR_VAL,
+               .type   = FIO_OPT_STR_VAL_ZONE,
                 .cb     = str_offset_increment_cb,
                 .off1   = offsetof(struct thread_options, offset_increment),
                 .help   = "What is the increment from one offset to the next",
                 .parent = "offset",
                 .hide   = 1,
                 .def    = "0",
-               .interval = 1024 * 1024,
                 .category = FIO_OPT_C_IO,
                 .group  = FIO_OPT_G_INVALID,
         },
@@ -3414,11 +3447,11 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
         {
                 .name   = "zoneskip",
                 .lname  = "Zone skip",
-               .type   = FIO_OPT_STR_VAL,
+               .type   = FIO_OPT_STR_VAL_ZONE,
+               .cb     = str_zoneskip_cb,
                 .off1   = offsetof(struct thread_options, zone_skip),
                 .help   = "Space between IO zones",
                 .def    = "0",
-               .interval = 1024 * 1024,
                 .category = FIO_OPT_C_IO,
                 .group  = FIO_OPT_G_ZONE,
         },
@@ -3738,8 +3771,10 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
         {
                 .name   = "max_latency",
                 .lname  = "Max Latency (usec)",
-               .type   = FIO_OPT_STR_VAL_TIME,
-               .off1   = offsetof(struct thread_options, max_latency),
+               .type   = FIO_OPT_ULL,
+               .off1   = offsetof(struct thread_options, max_latency[DDIR_READ]),
+               .off2   = offsetof(struct thread_options, max_latency[DDIR_WRITE]),
+               .off3   = offsetof(struct thread_options, max_latency[DDIR_TRIM]),
                 .help   = "Maximum tolerated IO latency (usec)",
                 .is_time = 1,
                 .category = FIO_OPT_C_IO,
@@ -5436,6 +5471,19 @@ void fio_options_free(struct thread_data *td)
         }
  }
  
+void fio_dump_options_free(struct thread_data *td)
+{
+       while (!flist_empty(&td->opt_list)) {
+               struct print_option *p;
+
+               p = flist_first_entry(&td->opt_list, struct print_option, list);
+               flist_del_init(&p->list);
+               free(p->name);
+               free(p->value);
+               free(p);
+       }
+}
+
  struct fio_option *fio_option_find(const char *name)
  {
         return find_option(fio_options, name);
diff --git a/options.h b/options.h

index 5276f31e6818673a338fcbc3ef18997263b0abd0..df80fd9864bdd3f18d84e22c9aee937f45eab8aa 100644 (file)
--- a/options.h
+++ b/options.h
@@ -16,6 +16,7 @@ void add_opt_posval(const char *, const char *, const char *);
  void del_opt_posval(const char *, const char *);
  struct thread_data;
  void fio_options_free(struct thread_data *);
+void fio_dump_options_free(struct thread_data *);
  char *get_next_str(char **ptr);
  int get_max_str_idx(char *input);
  char* get_name_by_idx(char *input, int index);
diff --git a/parse.c b/parse.c

index 44bf950768d9b51bcc3e43ba30c2564614a6e4c6..45f4f2d3dd6db800f4f8e11a168c8ef36e788d3b 100644 (file)
--- a/parse.c
+++ b/parse.c
@@ -37,6 +37,7 @@ static const char *opt_type_names[] = {
         "OPT_BOOL",
         "OPT_FLOAT_LIST",
         "OPT_STR_SET",
+       "OPT_STR_VAL_ZONE",
         "OPT_DEPRECATED",
         "OPT_SOFT_DEPRECATED",
         "OPT_UNSUPPORTED",
@@ -599,9 +600,35 @@ static int __handle_option(const struct fio_option *o, const char *ptr,
                 fallthrough;
         case FIO_OPT_ULL:
         case FIO_OPT_INT:
-       case FIO_OPT_STR_VAL: {
+       case FIO_OPT_STR_VAL:
+       case FIO_OPT_STR_VAL_ZONE:
+       {
                 fio_opt_str_val_fn *fn = o->cb;
                 char tmp[128], *p;
+               size_t len = strlen(ptr);
+
+               if (len > 0 && ptr[len - 1] == 'z') {
+                       if (o->type == FIO_OPT_STR_VAL_ZONE) {
+                               char *ep;
+                               unsigned long long val;
+
+                               errno = 0;
+                               val = strtoul(ptr, &ep, 10);
+                               if (errno == 0 && ep != ptr && *ep == 'z') {
+                                       ull = ZONE_BASE_VAL + (uint32_t)val;
+                                       ret = 0;
+                                       goto store_option_value;
+                               } else {
+                                       log_err("%s: unexpected zone value '%s'\n",
+                                               o->name, ptr);
+                                       return 1;
+                               }
+                       } else {
+                               log_err("%s: 'z' suffix isn't applicable\n",
+                                       o->name);
+                               return 1;
+                       }
+               }
  
                 if (!is_time && o->is_time)
                         is_time = o->is_time;
@@ -655,6 +682,7 @@ static int __handle_option(const struct fio_option *o, const char *ptr,
                         }
                 }
  
+store_option_value:
                 if (fn)
                         ret = fn(data, &ull);
                 else {
diff --git a/parse.h b/parse.h

index e6663ed484ed343b096ebc33a28a52560f642aea..4cf08fd2cf924f84eebd4080709c1ae83246da87 100644 (file)
--- a/parse.h
+++ b/parse.h
@@ -21,6 +21,7 @@ enum fio_opt_type {
         FIO_OPT_BOOL,
         FIO_OPT_FLOAT_LIST,
         FIO_OPT_STR_SET,
+       FIO_OPT_STR_VAL_ZONE,
         FIO_OPT_DEPRECATED,
         FIO_OPT_SOFT_DEPRECATED,
         FIO_OPT_UNSUPPORTED,    /* keep this last */
@@ -130,12 +131,18 @@ static inline void *td_var(void *to, const struct fio_option *o,
  
  static inline int parse_is_percent(unsigned long long val)
  {
-       return val <= -1ULL && val >= (-1ULL - 100ULL);
+       return val >= -101;
  }
  
+#define ZONE_BASE_VAL ((-1ULL >> 1) + 1)
  static inline int parse_is_percent_uncapped(unsigned long long val)
  {
-       return (long long)val <= -1;
+       return ZONE_BASE_VAL + -1U < val;
+}
+
+static inline int parse_is_zone(unsigned long long val)
+{
+       return (val - ZONE_BASE_VAL) <= -1U;
  }
  
  struct print_option {
diff --git a/server.c b/server.c

index 1b65297ec25feb166e3f39e6b01b7c081a96fa42..8daefbabfeae93f6c260c0b74eec6fedc7bbd973 100644 (file)
--- a/server.c
+++ b/server.c
@@ -1909,7 +1909,7 @@ static int fio_append_iolog_gz(struct sk_entry *first, struct io_log *log)
                         break;
                 }
                 flist_add_tail(&entry->list, &first->next);
-       } while (ret != Z_STREAM_END);
+       }
  
         ret = deflateEnd(&stream);
         if (ret == Z_OK)
diff --git a/server.h b/server.h

index 9256d44c5001c2daa72232407650ecef36d4fdfa..b45b319ba2013e56ecc87056e8e0b6ca1aaec6e2 100644 (file)
--- a/server.h
+++ b/server.h
@@ -48,7 +48,7 @@ struct fio_net_cmd_reply {
  };
  
  enum {
-       FIO_SERVER_VER                  = 87,
+       FIO_SERVER_VER                  = 89,
  
         FIO_SERVER_MAX_FRAGMENT_PDU     = 1024,
         FIO_SERVER_MAX_CMD_MB           = 2048,
diff --git a/t/io_uring.c b/t/io_uring.c

index 044f9195679566f802460672a3b9c6d96f33be07..ff4c7a7c01807ed46bd73dca4da85de0e606158d 100644 (file)
--- a/t/io_uring.c
+++ b/t/io_uring.c
@@ -233,8 +233,7 @@ static int prep_more_ios(struct submitter *s, int max_ios)
         next_tail = tail = *ring->tail;
         do {
                 next_tail++;
-               read_barrier();
-               if (next_tail == *ring->head)
+               if (next_tail == atomic_load_acquire(ring->head))
                         break;
  
                 index = tail & sq_ring_mask;
@@ -244,10 +243,8 @@ static int prep_more_ios(struct submitter *s, int max_ios)
                 tail = next_tail;
         } while (prepped < max_ios);
  
-       if (*ring->tail != tail) {
-               *ring->tail = tail;
-               write_barrier();
-       }
+       if (prepped)
+               atomic_store_release(ring->tail, tail);
         return prepped;
  }
  
@@ -284,7 +281,7 @@ static int reap_events(struct submitter *s)
                 struct file *f;
  
                 read_barrier();
-               if (head == *ring->tail)
+               if (head == atomic_load_acquire(ring->tail))
                         break;
                 cqe = &ring->cqes[head & cq_ring_mask];
                 if (!do_nop) {
@@ -301,9 +298,10 @@ static int reap_events(struct submitter *s)
                 head++;
         } while (1);
  
-       s->inflight -= reaped;
-       *ring->head = head;
-       write_barrier();
+       if (reaped) {
+               s->inflight -= reaped;
+               atomic_store_release(ring->head, head);
+       }
         return reaped;
  }
  
@@ -320,6 +318,7 @@ static void *submitter_fn(void *data)
         prepped = 0;
         do {
                 int to_wait, to_submit, this_reap, to_prep;
+               unsigned ring_flags = 0;
  
                 if (!prepped && s->inflight < depth) {
                         to_prep = min(depth - s->inflight, batch_submit);
@@ -338,15 +337,20 @@ submit:
                  * Only need to call io_uring_enter if we're not using SQ thread
                  * poll, or if IORING_SQ_NEED_WAKEUP is set.
                  */
-               if (!sq_thread_poll || (*ring->flags & IORING_SQ_NEED_WAKEUP)) {
+               if (sq_thread_poll)
+                       ring_flags = atomic_load_acquire(ring->flags);
+               if (!sq_thread_poll || ring_flags & IORING_SQ_NEED_WAKEUP) {
                         unsigned flags = 0;
  
                         if (to_wait)
                                 flags = IORING_ENTER_GETEVENTS;
-                       if ((*ring->flags & IORING_SQ_NEED_WAKEUP))
+                       if (ring_flags & IORING_SQ_NEED_WAKEUP)
                                 flags |= IORING_ENTER_SQ_WAKEUP;
                         ret = io_uring_enter(s, to_submit, to_wait, flags);
                         s->calls++;
+               } else {
+                       /* for SQPOLL, we submitted it all effectively */
+                       ret = to_submit;
                 }
  
                 /*
diff --git a/t/zbd/test-zbd-support b/t/zbd/test-zbd-support

index 1658dc25013be4c0714538bdd58fc2dcabeb03f8..be1296151930f46ea9694cdb9ba580ef60ba335a 100755 (executable)
--- a/t/zbd/test-zbd-support
+++ b/t/zbd/test-zbd-support
@@ -1153,6 +1153,54 @@ test54() {
                 >> "${logfile}.${test_number}" 2>&1 || return $?
  }
  
+# test 'z' suffix parsing only
+test55() {
+       local bs
+       bs=$((logical_block_size))
+
+       require_zbd || return $SKIP_TESTCASE
+       # offset=1z + offset_increment=10z + size=2z
+       require_seq_zones 13 || return $SKIP_TESTCASE
+
+       run_fio --name=j                \
+               --filename=${dev}       \
+               --direct=1              \
+               "$(ioengine "psync")"   \
+               --zonemode=zbd          \
+               --zonesize=${zone_size} \
+               --rw=write              \
+               --bs=${bs}              \
+               --numjobs=2             \
+               --offset_increment=10z  \
+               --offset=1z             \
+               --size=2z               \
+               --io_size=3z            \
+               ${job_var_opts[@]} --debug=zbd \
+               >> "${logfile}.${test_number}" 2>&1 || return $?
+}
+
+# test 'z' suffix parsing only
+test56() {
+       local bs
+       bs=$((logical_block_size))
+
+       require_regular_block_dev || return $SKIP_TESTCASE
+       require_seq_zones 10 || return $SKIP_TESTCASE
+
+       run_fio --name=j                \
+               --filename=${dev}       \
+               --direct=1              \
+               "$(ioengine "psync")"   \
+               --zonemode=strided      \
+               --zonesize=${zone_size} \
+               --rw=write              \
+               --bs=${bs}              \
+               --size=10z              \
+               --zoneskip=2z           \
+               ${job_var_opts[@]} --debug=zbd \
+               >> "${logfile}.${test_number}" 2>&1 || return $?
+}
+
  SECONDS=0
  tests=()
  dynamic_analyzer=()
diff --git a/thread_options.h b/thread_options.h

index f6b15403c4c21df1c183b47337e71a6018e1fe62..5ecc72d7b590c37615d3625bff10a0ea907a5f1d 100644 (file)
--- a/thread_options.h
+++ b/thread_options.h
@@ -83,13 +83,16 @@ struct thread_options {
         unsigned long long size;
         unsigned long long io_size;
         unsigned int size_percent;
+       unsigned int size_nz;
         unsigned int io_size_percent;
+       unsigned int io_size_nz;
         unsigned int fill_device;
         unsigned int file_append;
         unsigned long long file_size_low;
         unsigned long long file_size_high;
         unsigned long long start_offset;
         unsigned long long start_offset_align;
+       unsigned int start_offset_nz;
  
         unsigned long long bs[DDIR_RWDIR_CNT];
         unsigned long long ba[DDIR_RWDIR_CNT];
@@ -198,12 +201,13 @@ struct thread_options {
         unsigned long long zone_size;
         unsigned long long zone_capacity;
         unsigned long long zone_skip;
+       uint32_t zone_skip_nz;
         enum fio_zone_mode zone_mode;
         unsigned long long lockmem;
         enum fio_memtype mem_type;
         unsigned int mem_align;
  
-       unsigned long long max_latency;
+       unsigned long long max_latency[DDIR_RWDIR_CNT];
  
         unsigned int exit_what;
         unsigned int stonewall;
@@ -315,6 +319,7 @@ struct thread_options {
         unsigned int gid;
  
         unsigned int offset_increment_percent;
+       unsigned int offset_increment_nz;
         unsigned long long offset_increment;
         unsigned long long number_ios;
  
@@ -384,14 +389,19 @@ struct thread_options_pack {
         uint64_t size;
         uint64_t io_size;
         uint32_t size_percent;
+       uint32_t size_nz;
         uint32_t io_size_percent;
+       uint32_t io_size_nz;
         uint32_t fill_device;
         uint32_t file_append;
         uint32_t unique_filename;
+       uint32_t pad3;
         uint64_t file_size_low;
         uint64_t file_size_high;
         uint64_t start_offset;
         uint64_t start_offset_align;
+       uint32_t start_offset_nz;
+       uint32_t pad4;
  
         uint64_t bs[DDIR_RWDIR_CNT];
         uint64_t ba[DDIR_RWDIR_CNT];
@@ -464,8 +474,6 @@ struct thread_options_pack {
         struct zone_split zone_split[DDIR_RWDIR_CNT][ZONESPLIT_MAX];
         uint32_t zone_split_nr[DDIR_RWDIR_CNT];
  
-       uint8_t pad1[4];
-
         fio_fp64_t zipf_theta;
         fio_fp64_t pareto_h;
         fio_fp64_t gauss_dev;
@@ -501,6 +509,7 @@ struct thread_options_pack {
         uint64_t zone_capacity;
         uint64_t zone_skip;
         uint64_t lockmem;
+       uint32_t zone_skip_nz;
         uint32_t mem_type;
         uint32_t mem_align;
  
@@ -509,8 +518,6 @@ struct thread_options_pack {
         uint32_t new_group;
         uint32_t numjobs;
  
-       uint8_t pad3[4];
-
         /*
          * We currently can't convert these, so don't enable them
          */
@@ -616,12 +623,14 @@ struct thread_options_pack {
         uint32_t gid;
  
         uint32_t offset_increment_percent;
+       uint32_t offset_increment_nz;
         uint64_t offset_increment;
         uint64_t number_ios;
  
         uint64_t latency_target;
         uint64_t latency_window;
-       uint64_t max_latency;
+       uint64_t max_latency[DDIR_RWDIR_CNT];
+       uint32_t pad5;
         fio_fp64_t latency_percentile;
         uint32_t latency_run;
  
diff --git a/zbd.c b/zbd.c

index 6a26fe108a68acf55c7ad2b7e59e8fccb97f0dc8..d16b890fc6f1796445e007996ea5ab4ff0aee325 100644 (file)
--- a/zbd.c
+++ b/zbd.c
@@ -285,9 +285,7 @@ static bool zbd_verify_sizes(void)
                                 return false;
                         }
  
-                       if (td->o.zone_skip &&
-                           (td->o.zone_skip < td->o.zone_size ||
-                            td->o.zone_skip % td->o.zone_size)) {
+                       if (td->o.zone_skip % td->o.zone_size) {
                                 log_err("%s: zoneskip %llu is not a multiple of the device zone size %llu.\n",
                                         f->file_name, (unsigned long long) td->o.zone_skip,
                                         (unsigned long long) td->o.zone_size);
@@ -335,20 +333,21 @@ static bool zbd_verify_bs(void)
  {
         struct thread_data *td;
         struct fio_file *f;
-       uint32_t zone_size;
         int i, j, k;
  
         for_each_td(td, i) {
                 for_each_file(td, f, j) {
+                       uint64_t zone_size;
+
                         if (!f->zbd_info)
                                 continue;
                         zone_size = f->zbd_info->zone_size;
                         for (k = 0; k < FIO_ARRAY_SIZE(td->o.bs); k++) {
                                 if (td->o.verify != VERIFY_NONE &&
                                     zone_size % td->o.bs[k] != 0) {
-                                       log_info("%s: block size %llu is not a divisor of the zone size %d\n",
+                                       log_info("%s: block size %llu is not a divisor of the zone size %llu\n",
                                                  f->file_name, td->o.bs[k],
-                                                zone_size);
+                                                (unsigned long long)zone_size);
                                         return false;
                                 }
                         }
@@ -648,7 +647,7 @@ static bool zbd_open_zone(struct thread_data *td, const struct fio_file *f,
  static int zbd_reset_zone(struct thread_data *td, struct fio_file *f,
                           struct fio_zone_info *z);
  
-int zbd_setup_files(struct thread_data *td)
+int zbd_init_files(struct thread_data *td)
  {
         struct fio_file *f;
         int i;
@@ -657,6 +656,44 @@ int zbd_setup_files(struct thread_data *td)
                 if (zbd_init_zone_info(td, f))
                         return 1;
         }
+       return 0;
+}
+
+void zbd_recalc_options_with_zone_granularity(struct thread_data *td)
+{
+       struct fio_file *f;
+       int i;
+
+       for_each_file(td, f, i) {
+               struct zoned_block_device_info *zbd = f->zbd_info;
+               // zonemode=strided doesn't get per-file zone size.
+               uint64_t zone_size = zbd ? zbd->zone_size : td->o.zone_size;
+
+               if (zone_size == 0)
+                       continue;
+
+               if (td->o.size_nz > 0) {
+                       td->o.size = td->o.size_nz * zone_size;
+               }
+               if (td->o.io_size_nz > 0) {
+                       td->o.io_size = td->o.io_size_nz * zone_size;
+               }
+               if (td->o.start_offset_nz > 0) {
+                       td->o.start_offset = td->o.start_offset_nz * zone_size;
+               }
+               if (td->o.offset_increment_nz > 0) {
+                       td->o.offset_increment = td->o.offset_increment_nz * zone_size;
+               }
+               if (td->o.zone_skip_nz > 0) {
+                       td->o.zone_skip = td->o.zone_skip_nz * zone_size;
+               }
+       }
+}
+
+int zbd_setup_files(struct thread_data *td)
+{
+       struct fio_file *f;
+       int i;
  
         if (!zbd_using_direct_io()) {
                 log_err("Using direct I/O is mandatory for writing to ZBD drives\n\n");
diff --git a/zbd.h b/zbd.h

index cc3ab6241e9b35782c0580d8b397edf1f2bba9b4..6453439313f8de4d5c049c371237762f05d8b7d6 100644 (file)
--- a/zbd.h
+++ b/zbd.h
@@ -87,6 +87,8 @@ struct zoned_block_device_info {
         struct fio_zone_info    zone_info[0];
  };
  
+int zbd_init_files(struct thread_data *td);
+void zbd_recalc_options_with_zone_granularity(struct thread_data *td);
  int zbd_setup_files(struct thread_data *td);
  void zbd_free_zone_info(struct fio_file *f);
  void zbd_file_reset(struct thread_data *td, struct fio_file *f);
author	Jens Axboe <axboe@kernel.dk>
	Thu, 18 Mar 2021 14:47:05 +0000 (08:47 -0600)
committer	Jens Axboe <axboe@kernel.dk>
	Thu, 18 Mar 2021 14:47:05 +0000 (08:47 -0600)
.gitignore		patch \| blob \| blame \| history
FIO-VERSION-GEN		patch \| blob \| blame \| history
HOWTO		patch \| blob \| blame \| history
Makefile		patch \| blob \| blame \| history
backend.c		patch \| blob \| blame \| history
cconv.c		patch \| blob \| blame \| history
configure		patch \| blob \| blame \| history
engines/dfs.c	[new file with mode: 0644]	patch \| blob
engines/falloc.c		patch \| blob \| blame \| history
engines/filecreate.c		patch \| blob \| blame \| history
engines/io_uring.c		patch \| blob \| blame \| history
engines/libpmem.c		patch \| blob \| blame \| history
examples/dfs.fio	[new file with mode: 0644]	patch \| blob
file.h		patch \| blob \| blame \| history
filesetup.c		patch \| blob \| blame \| history
fio.1		patch \| blob \| blame \| history
init.c		patch \| blob \| blame \| history
io_u.c		patch \| blob \| blame \| history
ioengines.h		patch \| blob \| blame \| history
iolog.c		patch \| blob \| blame \| history
optgroup.c		patch \| blob \| blame \| history
optgroup.h		patch \| blob \| blame \| history
options.c		patch \| blob \| blame \| history
options.h		patch \| blob \| blame \| history
parse.c		patch \| blob \| blame \| history
parse.h		patch \| blob \| blame \| history
server.c		patch \| blob \| blame \| history
server.h		patch \| blob \| blame \| history
t/io_uring.c		patch \| blob \| blame \| history
t/zbd/test-zbd-support		patch \| blob \| blame \| history
thread_options.h		patch \| blob \| blame \| history
zbd.c		patch \| blob \| blame \| history
zbd.h		patch \| blob \| blame \| history