Merge branch 'master' of https://github.com/donny372/fio into master
authorJens Axboe <axboe@kernel.dk>
Sat, 8 Aug 2020 00:21:52 +0000 (18:21 -0600)
committerJens Axboe <axboe@kernel.dk>
Sat, 8 Aug 2020 00:21:52 +0000 (18:21 -0600)
* 'master' of https://github.com/donny372/fio:
  Add support for reading iolog from stdin.

67 files changed:
.appveyor.yml
.gitignore
.travis.yml
FIO-VERSION-GEN
HOWTO
Makefile
arch/arch.h
backend.c
cconv.c
ci/travis-build.sh
ci/travis-install.sh
compiler/compiler-gcc4.h [deleted file]
compiler/compiler.h
configure
engines/dev-dax.c
engines/guasi.c
engines/http.c
engines/io_uring.c
engines/libaio.c
engines/libhdfs.c
engines/libiscsi.c
engines/libpmem.c
engines/libzbc.c
engines/nbd.c
engines/pmemblk.c
engines/rados.c
engines/rbd.c
engines/rdma.c
fio.1
fio_sem.c
gettime-thread.c
gettime.h
init.c
io_u.c
ioengines.c
ioengines.h
iolog.c
lib/num2str.c
lib/rand.h
lib/roundup.h [new file with mode: 0644]
lib/seqlock.h
memory.c
options.c
os/os-linux.h
os/windows/posix.c
oslib/linux-blkzoned.c
rate-submit.c
stat.c
t/debug.c
t/io_uring.c
t/jobs/t0011-5d2788d5.fio
t/jobs/t0012.fio [new file with mode: 0644]
t/jobs/t0013.fio [new file with mode: 0644]
t/run-fio-tests.py
t/zbd/functions
t/zbd/run-tests-against-zoned-nullb
t/zbd/test-zbd-support
thread_options.h
tools/fio_generate_plots
unittests/lib/num2str.c [new file with mode: 0644]
unittests/unittest.c
unittests/unittest.h
verify.c
workqueue.c
zbd.c
zbd.h
zbd_types.h

index 70c337f8465cc3a10e9ac0c085d33a234a4ed282..5c0266a1ed40db9a90bdc6e325a0e2b9c0fcad57 100644 (file)
@@ -1,5 +1,8 @@
 clone_depth: 1 # NB: this stops FIO-VERSION-GEN making tag based versions
 
+image:
+  - Visual Studio 2019
+
 environment:
   CYG_MIRROR: http://cygwin.mirror.constant.com
   CYG_ROOT: C:\cygwin64
@@ -15,6 +18,7 @@ environment:
 install:
   - '%CYG_ROOT%\setup-x86_64.exe --quiet-mode --no-shortcuts --only-site --site "%CYG_MIRROR%" --packages "mingw64-%PACKAGE_ARCH%-zlib,mingw64-%PACKAGE_ARCH%-CUnit" > NUL'
   - SET PATH=C:\Python38-x64;%CYG_ROOT%\bin;%PATH% # NB: Changed env variables persist to later sections
+  - SET PYTHONUNBUFFERED=TRUE
   - python.exe -m pip install scipy six
 
 build_script:
index b84b0fda0aa71750c28a316322761af6d1aca0f6..0aa4a3611c031024f631418fee0fad1ba94d0cae 100644 (file)
@@ -1,5 +1,6 @@
 *.d
 *.o
+*.so
 *.exe
 /.depend
 /FIO-VERSION-FILE
index b64f0a958bbc418212e351e8bec4531c8e4ea18b..e35aff394b999f0cf6e54797c3cbb5bee1a0d029 100644 (file)
@@ -9,36 +9,28 @@ arch:
   - amd64
   - arm64
 env:
-  matrix:
-    - BUILD_ARCH="x86"
-    - BUILD_ARCH="x86_64"
   global:
     - MAKEFLAGS="-j 2"
 matrix:
   include:
+    - os: linux
+      compiler: gcc
+      arch: amd64
+      env: BUILD_ARCH="x86" # Only do the gcc x86 build to reduce clutter
     # Default xcode image
     - os: osx
       compiler: clang # Workaround travis setting CC=["clang", "gcc"]
-      env: BUILD_ARCH="x86_64"
       arch: amd64
     # Latest xcode image (needs periodic updating)
     - os: osx
       compiler: clang
       osx_image: xcode11.2
-      env: BUILD_ARCH="x86_64"
       arch: amd64
   exclude:
     - os: osx
       compiler: gcc
-    - os: linux
-      compiler: clang
-      arch: amd64
-      env: BUILD_ARCH="x86" # Only do the gcc x86 build to reduce clutter
-    - os: linux
-      env: BUILD_ARCH="x86"
-      arch: arm64
 
-before_install:
+install:
   - ci/travis-install.sh
 
 script:
index 7050f84edbbdb38f4f0f5207bd5fe06e51697d99..48e575fc53431e016db75f8811a7f51a06d1b0e8 100755 (executable)
@@ -1,7 +1,7 @@
 #!/bin/sh
 
 GVF=FIO-VERSION-FILE
-DEF_VER=fio-3.20
+DEF_VER=fio-3.21
 
 LF='
 '
diff --git a/HOWTO b/HOWTO
index 3c8fbd83adde00d9fc65ae4d4365d11189f39e8e..e0403b0803f04cb04ef7a14832dd39b3803c34d8 100644 (file)
--- a/HOWTO
+++ b/HOWTO
@@ -970,14 +970,15 @@ Target file/device
        Accepted values are:
 
                **none**
-                               The :option:`zonerange`, :option:`zonesize` and
-                               :option:`zoneskip` parameters are ignored.
+                               The :option:`zonerange`, :option:`zonesize`,
+                               :option `zonecapacity` and option:`zoneskip`
+                               parameters are ignored.
                **strided**
                                I/O happens in a single zone until
                                :option:`zonesize` bytes have been transferred.
                                After that number of bytes has been
                                transferred processing of the next zone
-                               starts.
+                               starts. :option `zonecapacity` is ignored.
                **zbd**
                                Zoned block device mode. I/O happens
                                sequentially in each zone, even if random I/O
@@ -1004,6 +1005,17 @@ Target file/device
        For :option:`zonemode` =zbd, this is the size of a single zone. The
        :option:`zonerange` parameter is ignored in this mode.
 
+
+.. option:: zonecapacity=int
+
+       For :option:`zonemode` =zbd, this defines the capacity of a single zone,
+       which is the accessible area starting from the zone start address.
+       This parameter only applies when using :option:`zonemode` =zbd in
+       combination with regular block devices. If not specified it defaults to
+       the zone size. If the target device is a zoned block device, the zone
+       capacity is obtained from the device information and this option is
+       ignored.
+
 .. option:: zoneskip=int
 
        For :option:`zonemode` =strided, the number of bytes to skip after
index 7eb5e899df994ee822ac1043737404e18c79026a..8e1ebc905fb259de72110df140cf9f7feb39281b 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -60,15 +60,17 @@ ifdef CONFIG_LIBHDFS
 endif
 
 ifdef CONFIG_LIBISCSI
-  CFLAGS := $(LIBISCSI_CFLAGS) $(CFLAGS)
-  LIBS += $(LIBISCSI_LIBS)
-  SOURCE += engines/libiscsi.c
+  iscsi_SRCS = engines/libiscsi.c
+  iscsi_LIBS = $(LIBISCSI_LIBS)
+  iscsi_CFLAGS = $(LIBISCSI_CFLAGS)
+  ENGINES += iscsi
 endif
 
 ifdef CONFIG_LIBNBD
-  CFLAGS := $(LIBNBD_CFLAGS) $(CFLAGS)
-  LIBS += $(LIBNBD_LIBS)
-  SOURCE += engines/nbd.c
+  nbd_SRCS = engines/nbd.c
+  nbd_LIBS = $(LIBNBD_LIBS)
+  nbd_CFLAGS = $(LIBNBD_CFLAGS)
+  ENGINES += nbd
 endif
 
 ifdef CONFIG_64BIT
@@ -78,10 +80,19 @@ ifdef CONFIG_32BIT
   CFLAGS := -DBITS_PER_LONG=32 $(CFLAGS)
 endif
 ifdef CONFIG_LIBAIO
-  SOURCE += engines/libaio.c
+  aio_SRCS = engines/libaio.c
+  aio_LIBS = -laio
+  ifdef CONFIG_LIBAIO_URING
+    aio_LIBS = -luring
+  else
+    aio_LIBS = -laio
+  endif
+  ENGINES += aio
 endif
 ifdef CONFIG_RDMA
-  SOURCE += engines/rdma.c
+  rdma_SRCS = engines/rdma.c
+  rdma_LIBS = -libverbs -lrdmacm
+  ENGINES += rdma
 endif
 ifdef CONFIG_POSIXAIO
   SOURCE += engines/posixaio.c
@@ -96,7 +107,8 @@ ifdef CONFIG_LINUX_SPLICE
   SOURCE += engines/splice.c
 endif
 ifdef CONFIG_GUASI
-  SOURCE += engines/guasi.c
+  guasi_SRCS = engines/guasi.c
+  ENGINES += guasi
 endif
 ifdef CONFIG_SOLARISAIO
   SOURCE += engines/solarisaio.c
@@ -105,13 +117,19 @@ ifdef CONFIG_WINDOWSAIO
   SOURCE += engines/windowsaio.c
 endif
 ifdef CONFIG_RADOS
-  SOURCE += engines/rados.c
+  rados_SRCS = engines/rados.c
+  rados_LIBS = -lrados
+  ENGINES += rados
 endif
 ifdef CONFIG_RBD
-  SOURCE += engines/rbd.c
+  rbd_SRCS = engines/rbd.c
+  rbd_LIBS = -lrbd -lrados
+  ENGINES += rbd
 endif
 ifdef CONFIG_HTTP
-  SOURCE += engines/http.c
+  http_SRCS = engines/http.c
+  http_LIBS = -lcurl -lssl -lcrypto
+  ENGINES += http
 endif
 SOURCE += oslib/asprintf.c
 ifndef CONFIG_STRSEP
@@ -139,6 +157,7 @@ ifdef CONFIG_GFAPI
   SOURCE += engines/glusterfs.c
   SOURCE += engines/glusterfs_sync.c
   SOURCE += engines/glusterfs_async.c
+  LIBS += -lgfapi -lglusterfs
   ifdef CONFIG_GF_FADVISE
     CFLAGS := "-DGFAPI_USE_FADVISE" $(CFLAGS)
   endif
@@ -149,19 +168,27 @@ ifdef CONFIG_MTD
   SOURCE += oslib/libmtd_legacy.c
 endif
 ifdef CONFIG_PMEMBLK
-  SOURCE += engines/pmemblk.c
+  pmemblk_SRCS = engines/pmemblk.c
+  pmemblk_LIBS = -lpmemblk
+  ENGINES += pmemblk
 endif
 ifdef CONFIG_LINUX_DEVDAX
-  SOURCE += engines/dev-dax.c
+  dev-dax_SRCS = engines/dev-dax.c
+  dev-dax_LIBS = -lpmem
+  ENGINES += dev-dax
 endif
 ifdef CONFIG_LIBPMEM
-  SOURCE += engines/libpmem.c
+  pmem_SRCS = engines/libpmem.c
+  pmem_LIBS = -lpmem
+  ENGINES += pmem
 endif
 ifdef CONFIG_IME
   SOURCE += engines/ime.c
 endif
 ifdef CONFIG_LIBZBC
-  SOURCE += engines/libzbc.c
+  zbc_SRCS = engines/libzbc.c
+  zbc_LIBS = -lzbc
+  ENGINES += zbc
 endif
 
 ifeq ($(CONFIG_TARGET_OS), Linux)
@@ -223,6 +250,26 @@ ifneq (,$(findstring CYGWIN,$(CONFIG_TARGET_OS)))
   CFLAGS := -DPSAPI_VERSION=1 -Ios/windows/posix/include -Wno-format $(CFLAGS)
 endif
 
+ifdef CONFIG_DYNAMIC_ENGINES
+ DYNAMIC_ENGS := $(ENGINES)
+define engine_template =
+$(1)_OBJS := $$($(1)_SRCS:.c=.o)
+$$($(1)_OBJS): CFLAGS := -fPIC $$($(1)_CFLAGS) $(CFLAGS)
+engines/lib$(1).so: $$($(1)_OBJS)
+       $$(QUIET_LINK)$(CC) -shared -rdynamic -fPIC -Wl,-soname,lib$(1).so.1 $$($(1)_LIBS) -o $$@ $$<
+ENGS_OBJS += engines/lib$(1).so
+all install: $(ENGS_OBJS)
+endef
+else # !CONFIG_DYNAMIC_ENGINES
+define engine_template =
+SOURCE += $$($(1)_SRCS)
+LIBS += $$($(1)_LIBS)
+CFLAGS := $$($(1)_CFLAGS) $(CFLAGS)
+endef
+endif
+
+$(foreach eng,$(ENGINES),$(eval $(call engine_template,$(eng))))
+
 OBJS := $(SOURCE:.c=.o)
 
 FIO_OBJS = $(OBJS) fio.o
@@ -337,12 +384,14 @@ PROGS += $(T_PROGS)
 ifdef CONFIG_HAVE_CUNIT
 UT_OBJS = unittests/unittest.o
 UT_OBJS += unittests/lib/memalign.o
+UT_OBJS += unittests/lib/num2str.o
 UT_OBJS += unittests/lib/strntol.o
 UT_OBJS += unittests/oslib/strlcat.o
 UT_OBJS += unittests/oslib/strndup.o
 UT_OBJS += unittests/oslib/strcasestr.o
 UT_OBJS += unittests/oslib/strsep.o
 UT_TARGET_OBJS = lib/memalign.o
+UT_TARGET_OBJS += lib/num2str.o
 UT_TARGET_OBJS += lib/strntol.o
 UT_TARGET_OBJS += oslib/strlcat.o
 UT_TARGET_OBJS += oslib/strndup.o
@@ -372,6 +421,7 @@ else
 endif
 prefix = $(INSTALL_PREFIX)
 bindir = $(prefix)/bin
+libdir = $(prefix)/lib/fio
 
 ifeq ($(CONFIG_TARGET_OS), Darwin)
 mandir = /usr/share/man
@@ -520,7 +570,7 @@ unittests/unittest: $(UT_OBJS) $(UT_TARGET_OBJS)
 endif
 
 clean: FORCE
-       @rm -f .depend $(FIO_OBJS) $(GFIO_OBJS) $(OBJS) $(T_OBJS) $(UT_OBJS) $(PROGS) $(T_PROGS) $(T_TEST_PROGS) core.* core gfio unittests/unittest FIO-VERSION-FILE *.[do] lib/*.d oslib/*.[do] crc/*.d engines/*.[do] profiles/*.[do] t/*.[do] unittests/*.[do] unittests/*/*.[do] config-host.mak config-host.h y.tab.[ch] lex.yy.c exp/*.[do] lexer.h
+       @rm -f .depend $(FIO_OBJS) $(GFIO_OBJS) $(OBJS) $(T_OBJS) $(UT_OBJS) $(PROGS) $(T_PROGS) $(T_TEST_PROGS) core.* core gfio unittests/unittest FIO-VERSION-FILE *.[do] lib/*.d oslib/*.[do] crc/*.d engines/*.[do] engines/*.so profiles/*.[do] t/*.[do] unittests/*.[do] unittests/*/*.[do] config-host.mak config-host.h y.tab.[ch] lex.yy.c exp/*.[do] lexer.h
        @rm -f t/fio-btrace2fio t/io_uring t/read-to-pipe-async
        @rm -rf  doc/output
 
@@ -560,6 +610,10 @@ fulltest:
 install: $(PROGS) $(SCRIPTS) tools/plot/fio2gnuplot.1 FORCE
        $(INSTALL) -m 755 -d $(DESTDIR)$(bindir)
        $(INSTALL) $(PROGS) $(SCRIPTS) $(DESTDIR)$(bindir)
+ifdef CONFIG_DYNAMIC_ENGINES
+       $(INSTALL) -m 755 -d $(DESTDIR)$(libdir)
+       $(INSTALL) -m 755 $(SRCDIR)/engines/*.so $(DESTDIR)$(libdir)
+endif
        $(INSTALL) -m 755 -d $(DESTDIR)$(mandir)/man1
        $(INSTALL) -m 644 $(SRCDIR)/fio.1 $(DESTDIR)$(mandir)/man1
        $(INSTALL) -m 644 $(SRCDIR)/tools/fio_generate_plots.1 $(DESTDIR)$(mandir)/man1
index 30c0d2056d3eaa7abcffd5cd703f698d99790edd..08c3d7033d3037f854649b861cc81415469d7096 100644 (file)
@@ -1,6 +1,8 @@
 #ifndef ARCH_H
 #define ARCH_H
 
+#include <stdatomic.h>
+
 #include "../lib/types.h"
 
 enum {
@@ -34,6 +36,13 @@ extern unsigned long arch_flags;
 
 #define ARCH_CPU_CLOCK_WRAPS
 
+#define atomic_load_acquire(p)                                 \
+       atomic_load_explicit((_Atomic typeof(*(p)) *)(p),       \
+                            memory_order_acquire)
+#define atomic_store_release(p, v)                             \
+       atomic_store_explicit((_Atomic typeof(*(p)) *)(p), (v), \
+                             memory_order_release)
+
 /* IWYU pragma: begin_exports */
 #if defined(__i386__)
 #include "arch-x86.h"
index 0075a733ffca7b2c5fd727f53d56d95e69592c17..0e454cdd1e9a4f2979320e7b913cc5f242d4b362 100644 (file)
--- a/backend.c
+++ b/backend.c
@@ -66,7 +66,11 @@ unsigned int stat_number = 0;
 int shm_id = 0;
 int temp_stall_ts;
 unsigned long done_secs = 0;
+#ifdef PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP
+pthread_mutex_t overlap_check = PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP;
+#else
 pthread_mutex_t overlap_check = PTHREAD_MUTEX_INITIALIZER;
+#endif
 
 #define JOB_START_TIMEOUT      (5 * 1000)
 
@@ -1535,7 +1539,7 @@ static void *thread_main(void *data)
        uint64_t bytes_done[DDIR_RWDIR_CNT];
        int deadlock_loop_cnt;
        bool clear_state;
-       int ret;
+       int res, ret;
 
        sk_out_assign(sk_out);
        free(fd);
@@ -1860,11 +1864,15 @@ static void *thread_main(void *data)
         * offload mode so that we don't clean up this job while
         * another thread is checking its io_u's for overlap
         */
-       if (td_offload_overlap(td))
-               pthread_mutex_lock(&overlap_check);
+       if (td_offload_overlap(td)) {
+               int res = pthread_mutex_lock(&overlap_check);
+               assert(res == 0);
+       }
        td_set_runstate(td, TD_FINISHING);
-       if (td_offload_overlap(td))
-               pthread_mutex_unlock(&overlap_check);
+       if (td_offload_overlap(td)) {
+               res = pthread_mutex_unlock(&overlap_check);
+               assert(res == 0);
+       }
 
        update_rusage_stat(td);
        td->ts.total_run_time = mtime_since_now(&td->epoch);
diff --git a/cconv.c b/cconv.c
index 449bcf7b1678bdd6443aafd760970c7f8d503795..2469389bdb62e2ca4410f37ffa4daac666df6da1 100644 (file)
--- a/cconv.c
+++ b/cconv.c
@@ -223,6 +223,7 @@ void convert_thread_options_to_cpu(struct thread_options *o,
        o->ss_limit.u.f = fio_uint64_to_double(le64_to_cpu(top->ss_limit.u.i));
        o->zone_range = le64_to_cpu(top->zone_range);
        o->zone_size = le64_to_cpu(top->zone_size);
+       o->zone_capacity = le64_to_cpu(top->zone_capacity);
        o->zone_skip = le64_to_cpu(top->zone_skip);
        o->zone_mode = le32_to_cpu(top->zone_mode);
        o->lockmem = le64_to_cpu(top->lockmem);
@@ -563,6 +564,7 @@ void convert_thread_options_to_net(struct thread_options_pack *top,
        top->ss_limit.u.i = __cpu_to_le64(fio_double_to_uint64(o->ss_limit.u.f));
        top->zone_range = __cpu_to_le64(o->zone_range);
        top->zone_size = __cpu_to_le64(o->zone_size);
+       top->zone_capacity = __cpu_to_le64(o->zone_capacity);
        top->zone_skip = __cpu_to_le64(o->zone_skip);
        top->zone_mode = __cpu_to_le32(o->zone_mode);
        top->lockmem = __cpu_to_le64(o->lockmem);
index fff9c088b35f01df0e517c1c27a54d1f46328118..231417e21a1458f581b27db0ec8eb4992a7f83da 100755 (executable)
@@ -1,15 +1,29 @@
 #!/bin/bash
 
+CI_TARGET_ARCH="${BUILD_ARCH:-$TRAVIS_CPU_ARCH}"
 EXTRA_CFLAGS="-Werror"
+PYTHONUNBUFFERED=TRUE
+CONFIGURE_FLAGS=()
 
-if [[ "$BUILD_ARCH" == "x86" ]]; then
-    EXTRA_CFLAGS="${EXTRA_CFLAGS} -m32"
-fi
+case "$TRAVIS_OS_NAME" in
+    "linux")
+        CONFIGURE_FLAGS+=(--enable-libiscsi)
+        case "$CI_TARGET_ARCH" in
+            "x86")
+                EXTRA_CFLAGS="${EXTRA_CFLAGS} -m32"
+                ;;
+            "amd64")
+                CONFIGURE_FLAGS+=(--enable-cuda)
+                ;;
+        esac
+    ;;
+esac
+CONFIGURE_FLAGS+=(--extra-cflags="${EXTRA_CFLAGS}")
 
-./configure --extra-cflags="${EXTRA_CFLAGS}" &&
+./configure "${CONFIGURE_FLAGS[@]}" &&
     make &&
     make test &&
-    if [[ "$TRAVIS_CPU_ARCH" == "arm64" ]]; then
+    if [[ "$CI_TARGET_ARCH" == "arm64" ]]; then
        sudo python3 t/run-fio-tests.py --skip 6 1007 1008 --debug -p 1010:"--skip 15 16 17 18 19 20"
     else
        sudo python3 t/run-fio-tests.py --skip 6 1007 1008 --debug
index 232ab6b10c3c74752ab413bbe0957af57ca23ead..b6895e82cb98daecc22b8eb1fba32a0540312598 100755 (executable)
@@ -1,13 +1,15 @@
 #!/bin/bash
+set -e
 
+CI_TARGET_ARCH="${BUILD_ARCH:-$TRAVIS_CPU_ARCH}"
 case "$TRAVIS_OS_NAME" in
     "linux")
        # Architecture-dependent packages.
        pkgs=(
            libaio-dev
-           libcunit1
            libcunit1-dev
-           libgoogle-perftools4
+           libfl-dev
+           libgoogle-perftools-dev
            libibverbs-dev
            libiscsi-dev
            libnuma-dev
@@ -15,15 +17,26 @@ case "$TRAVIS_OS_NAME" in
            librdmacm-dev
            libz-dev
        )
-       if [[ "$BUILD_ARCH" == "x86" ]]; then
-           pkgs=("${pkgs[@]/%/:i386}")
-           pkgs+=(gcc-multilib)
-       else
-           pkgs+=(glusterfs-common)
+       case "$CI_TARGET_ARCH" in
+           "x86")
+               pkgs=("${pkgs[@]/%/:i386}")
+               pkgs+=(
+                   gcc-multilib
+                   pkg-config:i386
+               )
+               ;;
+           "amd64")
+               pkgs+=(nvidia-cuda-dev)
+               ;;
+       esac
+       if [[ $CI_TARGET_ARCH != "x86" ]]; then
+               pkgs+=(glusterfs-common)
        fi
        # Architecture-independent packages and packages for which we don't
        # care about the architecture.
        pkgs+=(
+           bison
+           flex
            python3
            python3-scipy
            python3-six
@@ -34,8 +47,7 @@ case "$TRAVIS_OS_NAME" in
     "osx")
        brew update >/dev/null 2>&1
        brew install cunit
-       pip3 install scipy
-       pip3 install six
+       pip3 install scipy six
        ;;
 esac
 
diff --git a/compiler/compiler-gcc4.h b/compiler/compiler-gcc4.h
deleted file mode 100644 (file)
index e8701cf..0000000
+++ /dev/null
@@ -1,17 +0,0 @@
-#ifndef FIO_COMPILER_GCC4_H
-#define FIO_COMPILER_GCC4_H
-
-#ifndef __must_check
-#define __must_check           __attribute__((warn_unused_result))
-#endif
-
-#define GCC_VERSION (__GNUC__ * 10000          \
-                       + __GNUC_MINOR__ * 100  \
-                       + __GNUC_PATCHLEVEL__)
-
-#if GCC_VERSION >= 40300
-#define __compiletime_warning(message) __attribute__((warning(message)))
-#define __compiletime_error(message)   __attribute__((error(message)))
-#endif
-
-#endif
index ddfbcc124e77ac2b1a5d7aea1640ccbc4577a331..8a784b9269b6a66419fc7f51cb453f047309653f 100644 (file)
@@ -1,17 +1,10 @@
 #ifndef FIO_COMPILER_H
 #define FIO_COMPILER_H
 
-/* IWYU pragma: begin_exports */
-#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)
-#include "compiler-gcc4.h"
-#else
-#error Compiler too old, need at least gcc 4.1.0
-#endif
-/* IWYU pragma: end_exports */
+#define __must_check           __attribute__((warn_unused_result))
 
-#ifndef __must_check
-#define __must_check
-#endif
+#define __compiletime_warning(message) __attribute__((warning(message)))
+#define __compiletime_error(message)   __attribute__((error(message)))
 
 /*
  * Mark unused variables passed to ops functions as unused, to silence gcc
index 3ee8aaf2b8205617fa154cf3f8f8431965a6ae58..5925e94f7ac4624d2fb3e8e29001503e211f2a32 100755 (executable)
--- a/configure
+++ b/configure
@@ -144,6 +144,7 @@ libhdfs="no"
 pmemblk="no"
 devdax="no"
 pmem="no"
+cuda="no"
 disable_lex=""
 disable_pmem="no"
 disable_native="no"
@@ -151,6 +152,7 @@ march_set="no"
 libiscsi="no"
 libnbd="no"
 libaio_uring="no"
+dynamic_engines="no"
 prefix=/usr/local
 
 # parse options
@@ -201,7 +203,7 @@ for opt do
   ;;
   --disable-pmem) disable_pmem="yes"
   ;;
-  --enable-cuda) enable_cuda="yes"
+  --enable-cuda) cuda="yes"
   ;;
   --disable-native) disable_native="yes"
   ;;
@@ -215,6 +217,8 @@ for opt do
   ;;
   --enable-libaio-uring) libaio_uring="yes"
   ;;
+  --dynamic-libengines) dynamic_engines="yes"
+  ;;
   --help)
     show_help="yes"
     ;;
@@ -254,6 +258,7 @@ if test "$show_help" = "yes" ; then
   echo "--enable-libnbd         Enable libnbd (NBD engine) support"
   echo "--disable-tcmalloc     Disable tcmalloc support"
   echo "--enable-libaio-uring   Enable libaio emulated over io_uring"
+  echo "--dynamic-libengines   Lib-based ioengines as dynamic libraries"
   exit $exit_val
 fi
 
@@ -546,6 +551,25 @@ else
 fi
 print_config "Static build" "$build_static"
 
+##########################################
+# check for C11 atomics support
+cat > $TMPC <<EOF
+#include <stdatomic.h>
+int main(void)
+{
+  _Atomic unsigned v;
+  atomic_load(&v);
+  return 0;
+}
+EOF
+if ! compile_prog "" "" "C11 atomics"; then
+  echo
+  echo "Your compiler doesn't support C11 atomics. gcc 4.9/clang 3.6 are the"
+  echo "minimum versions with it - perhaps your compiler is too old?"
+  fatal "C11 atomics support not found"
+fi
+
+
 ##########################################
 # check for wordsize
 wordsize="0"
@@ -603,13 +627,16 @@ int main(void)
   return 0;
 }
 EOF
-  if test "$libaio_uring" = "yes" && compile_prog "" "-luring" "libaio io_uring" ; then
-    libaio=yes
-    LIBS="-luring $LIBS"
+  if test "$libaio_uring" = "yes"; then
+    if compile_prog "" "-luring" "libaio io_uring" ; then
+      libaio=yes
+      LIBS="-luring $LIBS"
+    else
+      feature_not_found "libaio io_uring" ""
+    fi
   elif compile_prog "" "-laio" "libaio" ; then
     libaio=yes
     libaio_uring=no
-    LIBS="-laio $LIBS"
   else
     if test "$libaio" = "yes" ; then
       feature_not_found "linux AIO" "libaio-dev or libaio-devel"
@@ -859,7 +886,6 @@ int main(int argc, char **argv)
 EOF
 if test "$disable_rdma" != "yes" && compile_prog "" "-libverbs" "libverbs" ; then
     libverbs="yes"
-    LIBS="-libverbs $LIBS"
 fi
 print_config "libverbs" "$libverbs"
 
@@ -879,7 +905,6 @@ int main(int argc, char **argv)
 EOF
 if test "$disable_rdma" != "yes" && compile_prog "" "-lrdmacm" "rdma"; then
     rdmacm="yes"
-    LIBS="-lrdmacm $LIBS"
 fi
 print_config "rdmacm" "$rdmacm"
 
@@ -1770,10 +1795,8 @@ if test "$disable_http" != "yes"; then
   if compile_prog "" "$HTTP_LIBS" "curl-new-ssl"; then
     output_sym "CONFIG_HAVE_OPAQUE_HMAC_CTX"
     http="yes"
-    LIBS="$HTTP_LIBS $LIBS"
   elif mv $TMPC2 $TMPC && compile_prog "" "$HTTP_LIBS" "curl-old-ssl"; then
     http="yes"
-    LIBS="$HTTP_LIBS $LIBS"
   fi
 fi
 print_config "http engine" "$http"
@@ -1802,7 +1825,6 @@ int main(int argc, char **argv)
 }
 EOF
 if test "$disable_rados" != "yes"  && compile_prog "" "-lrados" "rados"; then
-  LIBS="-lrados $LIBS"
   rados="yes"
 fi
 print_config "Rados engine" "$rados"
@@ -1833,7 +1855,6 @@ int main(int argc, char **argv)
 }
 EOF
 if test "$disable_rbd" != "yes"  && compile_prog "" "-lrbd -lrados" "rbd"; then
-  LIBS="-lrbd -lrados $LIBS"
   rbd="yes"
 fi
 print_config "Rados Block Device engine" "$rbd"
@@ -1924,7 +1945,6 @@ int main(int argc, char **argv)
 }
 EOF
 if test "$disable_gfapi" != "yes"  && compile_prog "" "-lgfapi -lglusterfs" "gfapi"; then
-  LIBS="-lgfapi -lglusterfs $LIBS"
   gfapi="yes"
 fi
 print_config "Gluster API engine" "$gfapi"
@@ -2038,7 +2058,7 @@ if test "$libhdfs" = "yes" ; then
     hdfs_conf_error=1
   fi
   if test "$hdfs_conf_error" = "1" ; then
-    exit 1
+    feature_not_found "libhdfs" ""
   fi
   FIO_HDFS_CPU=$cpu
   if test "$FIO_HDFS_CPU" = "x86_64" ; then
@@ -2086,7 +2106,6 @@ int main(int argc, char **argv)
 EOF
 if compile_prog "" "-lpmem" "libpmem"; then
   libpmem="yes"
-  LIBS="-lpmem $LIBS"
 fi
 print_config "libpmem" "$libpmem"
 
@@ -2108,7 +2127,6 @@ int main(int argc, char **argv)
 EOF
   if compile_prog "" "-lpmemblk" "libpmemblk"; then
     libpmemblk="yes"
-    LIBS="-lpmemblk $LIBS"
   fi
 fi
 print_config "libpmemblk" "$libpmemblk"
@@ -2158,15 +2176,16 @@ fi
 print_config "DDN's Infinite Memory Engine" "$libime"
 
 ##########################################
-# Check if we have required environment variables configured for libiscsi
-if test "$libiscsi" = "yes" ; then
-  if $(pkg-config --atleast-version=1.9.0 libiscsi); then
+# Check if we have libiscsi
+if test "$libiscsi" != "no" ; then
+  minimum_libiscsi=1.9.0
+  if $(pkg-config --atleast-version=$minimum_libiscsi libiscsi); then
     libiscsi="yes"
     libiscsi_cflags=$(pkg-config --cflags libiscsi)
     libiscsi_libs=$(pkg-config --libs libiscsi)
   else
     if test "$libiscsi" = "yes" ; then
-      echo "libiscsi" "Install libiscsi >= 1.9.0"
+      feature_not_found "libiscsi" "libiscsi >= $minimum_libiscsi"
     fi
     libiscsi="no"
   fi
@@ -2174,16 +2193,16 @@ fi
 print_config "iscsi engine" "$libiscsi"
 
 ##########################################
-# Check if we have libnbd (for NBD support).
-minimum_libnbd=0.9.8
-if test "$libnbd" = "yes" ; then
+# Check if we have libnbd (for NBD support)
+if test "$libnbd" != "no" ; then
+  minimum_libnbd=0.9.8
   if $(pkg-config --atleast-version=$minimum_libnbd libnbd); then
     libnbd="yes"
     libnbd_cflags=$(pkg-config --cflags libnbd)
     libnbd_libs=$(pkg-config --libs libnbd)
   else
     if test "$libnbd" = "yes" ; then
-      echo "libnbd" "Install libnbd >= $minimum_libnbd"
+      feature_not_found "libnbd" "libnbd >= $minimum_libnbd"
     fi
     libnbd="no"
   fi
@@ -2397,6 +2416,7 @@ if compile_prog "" "" "valgrind_dev"; then
 fi
 print_config "Valgrind headers" "$valgrind_dev"
 
+if test "$targetos" = "Linux" ; then
 ##########################################
 # <linux/blkzoned.h> probe
 if test "$linux_blkzoned" != "yes" ; then
@@ -2414,6 +2434,24 @@ if compile_prog "" "" "linux_blkzoned"; then
 fi
 print_config "Zoned block device support" "$linux_blkzoned"
 
+##########################################
+# Check BLK_ZONE_REP_CAPACITY
+cat > $TMPC << EOF
+#include <linux/blkzoned.h>
+int main(void)
+{
+  return BLK_ZONE_REP_CAPACITY;
+}
+EOF
+if compile_prog "" "" "blkzoned report capacity"; then
+  output_sym "CONFIG_HAVE_REP_CAPACITY"
+  rep_capacity="yes"
+else
+  rep_capacity="no"
+fi
+print_config "Zoned block device capacity" "$rep_capacity"
+fi
+
 ##########################################
 # libzbc probe
 if test "$libzbc" != "yes" ; then
@@ -2432,7 +2470,6 @@ if compile_prog "" "-lzbc" "libzbc"; then
   libzbcvermaj=$(pkg-config --modversion libzbc | sed 's/\.[0-9]*\.[0-9]*//')
   if test "$libzbcvermaj" -ge "5" ; then
     libzbc="yes"
-    LIBS="-lzbc $LIBS"
   else
     print_config "libzbc engine" "Unsupported libzbc version (version 5 or above required)"
     libzbc="no"
@@ -2476,9 +2513,7 @@ print_config "march_armv8_a_crc_crypto" "$march_armv8_a_crc_crypto"
 
 ##########################################
 # cuda probe
-if test "$cuda" != "yes" ; then
-  cuda="no"
-fi
+if test "$cuda" != "no" ; then
 cat > $TMPC << EOF
 #include <cuda.h>
 int main(int argc, char **argv)
@@ -2486,9 +2521,15 @@ int main(int argc, char **argv)
   return cuInit(0);
 }
 EOF
-if test "$enable_cuda" = "yes" && compile_prog "" "-lcuda" "cuda"; then
-  cuda="yes"
-  LIBS="-lcuda $LIBS"
+  if compile_prog "" "-lcuda" "cuda"; then
+    cuda="yes"
+    LIBS="-lcuda $LIBS"
+  else
+    if test "$cuda" = "yes" ; then
+      feature_not_found "cuda" ""
+    fi
+    cuda="no"
+  fi
 fi
 print_config "cuda" "$cuda"
 
@@ -2548,7 +2589,7 @@ fi
 print_config "__kernel_rwf_t" "$__kernel_rwf_t"
 
 ##########################################
-# check if gcc has -Wimplicit-fallthrough
+# check if gcc has -Wimplicit-fallthrough=2
 fallthrough="no"
 cat > $TMPC << EOF
 int main(int argc, char **argv)
@@ -2556,10 +2597,10 @@ int main(int argc, char **argv)
   return 0;
 }
 EOF
-if compile_prog "-Wimplicit-fallthrough" "" "-Wimplicit-fallthrough"; then
+if compile_prog "-Wimplicit-fallthrough=2" "" "-Wimplicit-fallthrough=2"; then
   fallthrough="yes"
 fi
-print_config "-Wimplicit-fallthrough" "$fallthrough"
+print_config "-Wimplicit-fallthrough=2" "$fallthrough"
 
 ##########################################
 # check for MADV_HUGEPAGE support
@@ -2966,17 +3007,26 @@ if test "$libnbd" = "yes" ; then
   echo "LIBNBD_CFLAGS=$libnbd_cflags" >> $config_host_mak
   echo "LIBNBD_LIBS=$libnbd_libs" >> $config_host_mak
 fi
+if test "$dynamic_engines" = "yes" ; then
+  output_sym "CONFIG_DYNAMIC_ENGINES"
+fi
+print_config "Lib-based ioengines dynamic" "$dynamic_engines"
 cat > $TMPC << EOF
 int main(int argc, char **argv)
 {
   return 0;
 }
 EOF
-if test "$disable_tcmalloc" != "yes"  && compile_prog "" "-ltcmalloc" "tcmalloc"; then
-  LIBS="-ltcmalloc $LIBS"
-  tcmalloc="yes"
-else
-  tcmalloc="no"
+if test "$disable_tcmalloc" != "yes"; then
+  if compile_prog "" "-ltcmalloc" "tcmalloc"; then
+    tcmalloc="yes"
+    LIBS="-ltcmalloc $LIBS"
+  elif compile_prog "" "-l:libtcmalloc_minimal.so.4" "tcmalloc_minimal4"; then
+    tcmalloc="yes"
+    LIBS="-l:libtcmalloc_minimal.so.4 $LIBS"
+  else
+    tcmalloc="no"
+  fi
 fi
 print_config "TCMalloc support" "$tcmalloc"
 
index 422ea634ff1d2530b6dae4ba40747417281ee27c..1d0f66cb1aba7ad57069cd742f93fb3783da567f 100644 (file)
@@ -328,7 +328,7 @@ fio_devdax_get_file_size(struct thread_data *td, struct fio_file *f)
        return 0;
 }
 
-static struct ioengine_ops ioengine = {
+FIO_STATIC struct ioengine_ops ioengine = {
        .name           = "dev-dax",
        .version        = FIO_IOOPS_VERSION,
        .init           = fio_devdax_init,
index cb26802cce8b8f8acbbb510d715262f9b48fa2a9..d4121757e61975be59ebb1549241ab35aa8ab4fd 100644 (file)
@@ -242,7 +242,7 @@ static int fio_guasi_init(struct thread_data *td)
        return 0;
 }
 
-static struct ioengine_ops ioengine = {
+FIO_STATIC struct ioengine_ops ioengine = {
        .name           = "guasi",
        .version        = FIO_IOOPS_VERSION,
        .init           = fio_guasi_init,
index 275fcab561048f9e81908863cbda3366c28b73b1..7a61b132b92bd72b94ebdc9915e311235b53fc56 100644 (file)
@@ -639,7 +639,7 @@ static int fio_http_invalidate(struct thread_data *td, struct fio_file *f)
        return 0;
 }
 
-static struct ioengine_ops ioengine = {
+FIO_STATIC struct ioengine_ops ioengine = {
        .name = "http",
        .version                = FIO_IOOPS_VERSION,
        .flags                  = FIO_DISKLESSIO | FIO_SYNCIO,
index cab7ecaf1ac08a503ee0aa349892f6343dad69e4..0ccd23184fab3b17a9c68c93a9c30375b5673986 100644 (file)
@@ -17,6 +17,7 @@
 #include "../optgroup.h"
 #include "../lib/memalign.h"
 #include "../lib/fls.h"
+#include "../lib/roundup.h"
 
 #ifdef ARCH_HAVE_IOURING
 
@@ -301,15 +302,15 @@ static int fio_ioring_cqring_reap(struct thread_data *td, unsigned int events,
 
        head = *ring->head;
        do {
-               read_barrier();
-               if (head == *ring->tail)
+               if (head == atomic_load_acquire(ring->tail))
                        break;
                reaped++;
                head++;
        } while (reaped + events < max);
 
-       *ring->head = head;
-       write_barrier();
+       if (reaped)
+               atomic_store_release(ring->head, head);
+
        return reaped;
 }
 
@@ -384,15 +385,13 @@ static enum fio_q_status fio_ioring_queue(struct thread_data *td,
 
        tail = *ring->tail;
        next_tail = tail + 1;
-       read_barrier();
-       if (next_tail == *ring->head)
+       if (next_tail == atomic_load_acquire(ring->head))
                return FIO_Q_BUSY;
 
        if (o->cmdprio_percentage)
                fio_ioring_prio_prep(td, io_u);
        ring->array[tail & ld->sq_ring_mask] = io_u->index;
-       *ring->tail = next_tail;
-       write_barrier();
+       atomic_store_release(ring->tail, next_tail);
 
        ld->queued++;
        return FIO_Q_QUEUED;
@@ -656,11 +655,6 @@ static int fio_ioring_post_init(struct thread_data *td)
        return 0;
 }
 
-static unsigned roundup_pow2(unsigned depth)
-{
-       return 1UL << __fls(depth - 1);
-}
-
 static int fio_ioring_init(struct thread_data *td)
 {
        struct ioring_options *o = td->eo;
index daa576dad3e64b97f4af427633921f0ab9c61e6c..b909b79e9c7169f7898e2aa32be37f895cafe4a0 100644 (file)
@@ -195,8 +195,8 @@ static int user_io_getevents(io_context_t aio_ctx, unsigned int max,
                } else {
                        /* There is another completion to reap */
                        events[i] = ring->events[head];
-                       read_barrier();
-                       ring->head = (head + 1) % ring->nr;
+                       atomic_store_release(&ring->head,
+                                            (head + 1) % ring->nr);
                        i++;
                }
        }
@@ -445,7 +445,7 @@ static int fio_libaio_init(struct thread_data *td)
        return 0;
 }
 
-static struct ioengine_ops ioengine = {
+FIO_STATIC struct ioengine_ops ioengine = {
        .name                   = "libaio",
        .version                = FIO_IOOPS_VERSION,
        .flags                  = FIO_ASYNCIO_SYNC_TRIM,
index c57fcea6353821dbdd90db055561f0424f9cbcf0..9ca82f78cb067469190b2e393f7bc0dd7215df7c 100644 (file)
@@ -393,7 +393,7 @@ static void fio_hdfsio_io_u_free(struct thread_data *td, struct io_u *io_u)
        }
 }
 
-static struct ioengine_ops ioengine_hdfs = {
+FIO_STATIC struct ioengine_ops ioengine = {
        .name = "libhdfs",
        .version = FIO_IOOPS_VERSION,
        .flags = FIO_SYNCIO | FIO_DISKLESSIO | FIO_NODISKUTIL,
@@ -412,10 +412,10 @@ static struct ioengine_ops ioengine_hdfs = {
 
 static void fio_init fio_hdfsio_register(void)
 {
-       register_ioengine(&ioengine_hdfs);
+       register_ioengine(&ioengine);
 }
 
 static void fio_exit fio_hdfsio_unregister(void)
 {
-       unregister_ioengine(&ioengine_hdfs);
+       unregister_ioengine(&ioengine);
 }
index 35761a619f89099520d4f0b37730ed7f7324a42a..c97b5709ae779eda9af451c0418d352ad365e772 100644 (file)
@@ -383,7 +383,7 @@ static struct io_u *fio_iscsi_event(struct thread_data *td, int event)
        return io_u;
 }
 
-static struct ioengine_ops ioengine_iscsi = {
+FIO_STATIC struct ioengine_ops ioengine = {
        .name               = "libiscsi",
        .version            = FIO_IOOPS_VERSION,
        .flags              = FIO_SYNCIO | FIO_DISKLESSIO | FIO_NODISKUTIL,
@@ -402,10 +402,10 @@ static struct ioengine_ops ioengine_iscsi = {
 
 static void fio_init fio_iscsi_register(void)
 {
-       register_ioengine(&ioengine_iscsi);
+       register_ioengine(&ioengine);
 }
 
 static void fio_exit fio_iscsi_unregister(void)
 {
-       unregister_ioengine(&ioengine_iscsi);
+       unregister_ioengine(&ioengine);
 }
index 99c7b50ddc23a1f188d1a30e0fb249a38afbdfae..3f63055c1d9759675d8db844dc77ed005b4cf476 100644 (file)
@@ -558,7 +558,7 @@ static int fio_libpmem_close_file(struct thread_data *td, struct fio_file *f)
        return generic_close_file(td, f);
 }
 
-static struct ioengine_ops ioengine = {
+FIO_STATIC struct ioengine_ops ioengine = {
        .name           = "libpmem",
        .version        = FIO_IOOPS_VERSION,
        .init           = fio_libpmem_init,
index 9e5683349fb1c60d99c782d085de23cb60bafbb5..4b9002330dcfd66d106087d1dc9275c6402fe1d3 100644 (file)
@@ -235,6 +235,11 @@ static int libzbc_report_zones(struct thread_data *td, struct fio_file *f,
                zbdz->start = zones[i].zbz_start << 9;
                zbdz->len = zones[i].zbz_length << 9;
                zbdz->wp = zones[i].zbz_write_pointer << 9;
+               /*
+                * ZBC/ZAC do not define zone capacity, so use the zone size as
+                * the zone capacity.
+                */
+               zbdz->capacity = zbdz->len;
 
                switch (zones[i].zbz_type) {
                case ZBC_ZT_CONVENTIONAL:
@@ -397,7 +402,7 @@ static enum fio_q_status libzbc_queue(struct thread_data *td, struct io_u *io_u)
        return FIO_Q_COMPLETED;
 }
 
-static struct ioengine_ops ioengine = {
+FIO_STATIC struct ioengine_ops ioengine = {
        .name                   = "libzbc",
        .version                = FIO_IOOPS_VERSION,
        .open_file              = libzbc_open_file,
index 5323792907a9d3e505a5e27a970fd0cdaf327da6..b0ba75e69428f63324a764cc702f679bf6c1e5da 100644 (file)
@@ -328,7 +328,7 @@ static int nbd_invalidate(struct thread_data *td, struct fio_file *f)
        return 0;
 }
 
-static struct ioengine_ops ioengine = {
+FIO_STATIC struct ioengine_ops ioengine = {
        .name                   = "nbd",
        .version                = FIO_IOOPS_VERSION,
        .options                = options,
index 730f4d776f1eb46d8bbfc792ed87b103c078641b..fc6358e8e11f3730fc6163127830aaf5bc1ebf7a 100644 (file)
@@ -220,14 +220,14 @@ static fio_pmemblk_file_t pmb_open(const char *pathspec, int flags)
                pmb->pmb_nblocks = pmemblk_nblock(pmb->pmb_pool);
 
                fio_pmemblk_cache_insert(pmb);
+       } else {
+               free(path);
        }
 
        pmb->pmb_refcnt += 1;
 
        pthread_mutex_unlock(&CacheLock);
 
-       free(path);
-
        return pmb;
 
 error:
@@ -426,7 +426,7 @@ static int fio_pmemblk_unlink_file(struct thread_data *td, struct fio_file *f)
        return 0;
 }
 
-static struct ioengine_ops ioengine = {
+FIO_STATIC struct ioengine_ops ioengine = {
        .name = "pmemblk",
        .version = FIO_IOOPS_VERSION,
        .queue = fio_pmemblk_queue,
index d44134276b471010f9a96f5a0c75c547f38990ea..42ee48ff02b3f6371027ab4cbcbc304aefbbea10 100644 (file)
@@ -444,7 +444,7 @@ static int fio_rados_io_u_init(struct thread_data *td, struct io_u *io_u)
 }
 
 /* ioengine_ops for get_ioengine() */
-static struct ioengine_ops ioengine = {
+FIO_STATIC struct ioengine_ops ioengine = {
        .name = "rados",
        .version                = FIO_IOOPS_VERSION,
        .flags                  = FIO_DISKLESSIO,
index a08f47757acdfbe03e18f009d251cb7846272d4d..268b6ebdffad2b71e4da565ed90867e76f90947a 100644 (file)
@@ -668,7 +668,7 @@ static int fio_rbd_io_u_init(struct thread_data *td, struct io_u *io_u)
        return 0;
 }
 
-static struct ioengine_ops ioengine = {
+FIO_STATIC struct ioengine_ops ioengine = {
        .name                   = "rbd",
        .version                = FIO_IOOPS_VERSION,
        .setup                  = fio_rbd_setup,
index f192f432738da6e8fa448766348ae6e4578fee5d..f4471869813693eb140bec7d87a31123b539ea89 100644 (file)
@@ -226,7 +226,8 @@ static int client_recv(struct thread_data *td, struct ibv_wc *wc)
                rd->rmt_nr = ntohl(rd->recv_buf.nr);
 
                for (i = 0; i < rd->rmt_nr; i++) {
-                       rd->rmt_us[i].buf = be64_to_cpu(rd->recv_buf.rmt_us[i].buf);
+                       rd->rmt_us[i].buf = __be64_to_cpu(
+                                               rd->recv_buf.rmt_us[i].buf);
                        rd->rmt_us[i].rkey = ntohl(rd->recv_buf.rmt_us[i].rkey);
                        rd->rmt_us[i].size = ntohl(rd->recv_buf.rmt_us[i].size);
 
@@ -1389,7 +1390,7 @@ static int fio_rdmaio_setup(struct thread_data *td)
        return 0;
 }
 
-static struct ioengine_ops ioengine_rw = {
+FIO_STATIC struct ioengine_ops ioengine = {
        .name                   = "rdma",
        .version                = FIO_IOOPS_VERSION,
        .setup                  = fio_rdmaio_setup,
@@ -1410,10 +1411,10 @@ static struct ioengine_ops ioengine_rw = {
 
 static void fio_init fio_rdmaio_register(void)
 {
-       register_ioengine(&ioengine_rw);
+       register_ioengine(&ioengine);
 }
 
 static void fio_exit fio_rdmaio_unregister(void)
 {
-       unregister_ioengine(&ioengine_rw);
+       unregister_ioengine(&ioengine);
 }
diff --git a/fio.1 b/fio.1
index 71ea0731076a389ae1f1b506cf3386c1d676793b..cdd105d7b3ea1496b828c4e3ce0a6d14aae03b21 100644 (file)
--- a/fio.1
+++ b/fio.1
@@ -738,12 +738,13 @@ Accepted values are:
 .RS
 .TP
 .B none
-The \fBzonerange\fR, \fBzonesize\fR and \fBzoneskip\fR parameters are ignored.
+The \fBzonerange\fR, \fBzonesize\fR \fBzonecapacity\fR and \fBzoneskip\fR
+parameters are ignored.
 .TP
 .B strided
 I/O happens in a single zone until \fBzonesize\fR bytes have been transferred.
 After that number of bytes has been transferred processing of the next zone
-starts.
+starts. The \fBzonecapacity\fR parameter is ignored.
 .TP
 .B zbd
 Zoned block device mode. I/O happens sequentially in each zone, even if random
@@ -771,6 +772,14 @@ zoned block device, the specified \fBzonesize\fR must be 0 or equal to the
 device zone size. For a regular block device or file, the specified
 \fBzonesize\fR must be at least 512B.
 .TP
+.BI zonecapacity \fR=\fPint
+For \fBzonemode\fR=zbd, this defines the capacity of a single zone, which is
+the accessible area starting from the zone start address. This parameter only
+applies when using \fBzonemode\fR=zbd in combination with regular block devices.
+If not specified it defaults to the zone size. If the target device is a zoned
+block device, the zone capacity is obtained from the device information and this
+option is ignored.
+.TP
 .BI zoneskip \fR=\fPint
 For \fBzonemode\fR=strided, the number of bytes to skip after \fBzonesize\fR
 bytes of data have been transferred.
index c34d8bf76565ab7f93be9b9d2f6bbf41808f3950..c7806acb26ae3ea51211e5488819ce24add451db 100644 (file)
--- a/fio_sem.c
+++ b/fio_sem.c
@@ -169,7 +169,6 @@ void fio_sem_up(struct fio_sem *sem)
        assert(sem->magic == FIO_SEM_MAGIC);
 
        pthread_mutex_lock(&sem->lock);
-       read_barrier();
        if (!sem->value && sem->waiters)
                do_wake = 1;
        sem->value++;
index 0a2cc6c451d533d1a5c56ce2ce857c85a7b31de2..953e4e67e36db81e39aae9bb073b332f8bd373ab 100644 (file)
@@ -2,9 +2,10 @@
 #include <time.h>
 
 #include "fio.h"
+#include "lib/seqlock.h"
 #include "smalloc.h"
 
-struct timespec *fio_ts = NULL;
+struct fio_ts *fio_ts;
 int fio_gtod_offload = 0;
 static pthread_t gtod_thread;
 static os_cpu_mask_t fio_gtod_cpumask;
@@ -19,15 +20,17 @@ void fio_gtod_init(void)
 
 static void fio_gtod_update(void)
 {
-       if (fio_ts) {
-               struct timeval __tv;
-
-               gettimeofday(&__tv, NULL);
-               fio_ts->tv_sec = __tv.tv_sec;
-               write_barrier();
-               fio_ts->tv_nsec = __tv.tv_usec * 1000;
-               write_barrier();
-       }
+       struct timeval __tv;
+
+       if (!fio_ts)
+               return;
+
+       gettimeofday(&__tv, NULL);
+
+       write_seqlock_begin(&fio_ts->seqlock);
+       fio_ts->ts.tv_sec = __tv.tv_sec;
+       fio_ts->ts.tv_nsec = __tv.tv_usec * 1000;
+       write_seqlock_end(&fio_ts->seqlock);
 }
 
 struct gtod_cpu_data {
index f92ee8c430c8d9cdb48f31effabd885af07c9257..c55f5cba779f5269e6fbc6b96c1e85e419d40683 100644 (file)
--- a/gettime.h
+++ b/gettime.h
@@ -4,6 +4,7 @@
 #include <sys/time.h>
 
 #include "arch/arch.h"
+#include "lib/seqlock.h"
 
 /*
  * Clock sources
@@ -22,20 +23,22 @@ extern int fio_start_gtod_thread(void);
 extern int fio_monotonic_clocktest(int debug);
 extern void fio_local_clock_init(void);
 
-extern struct timespec *fio_ts;
+extern struct fio_ts {
+       struct seqlock seqlock;
+       struct timespec ts;
+} *fio_ts;
 
 static inline int fio_gettime_offload(struct timespec *ts)
 {
-       time_t last_sec;
+       unsigned int seq;
 
        if (!fio_ts)
                return 0;
 
        do {
-               read_barrier();
-               last_sec = ts->tv_sec = fio_ts->tv_sec;
-               ts->tv_nsec = fio_ts->tv_nsec;
-       } while (fio_ts->tv_sec != last_sec);
+               seq = read_seqlock_begin(&fio_ts->seqlock);
+               *ts = fio_ts->ts;
+       } while (read_seqlock_retry(&fio_ts->seqlock, seq));
 
        return 1;
 }
diff --git a/init.c b/init.c
index e4a9ba6b57751f247bd4e942c19a2583a8c13c10..84325f1e86fc800cdef58140d677004b4eacf9f1 100644 (file)
--- a/init.c
+++ b/init.c
@@ -1099,6 +1099,9 @@ int ioengine_load(struct thread_data *td)
                 */
                dlhandle = td->io_ops_dlhandle;
                ops = load_ioengine(td);
+               if (!ops)
+                       goto fail;
+
                if (ops == td->io_ops && dlhandle == td->io_ops_dlhandle) {
                        if (dlhandle)
                                dlclose(dlhandle);
@@ -1113,10 +1116,8 @@ int ioengine_load(struct thread_data *td)
        }
 
        td->io_ops = load_ioengine(td);
-       if (!td->io_ops) {
-               log_err("fio: failed to load engine\n");
-               return 1;
-       }
+       if (!td->io_ops)
+               goto fail;
 
        if (td->io_ops->option_struct_size && td->io_ops->options) {
                /*
@@ -1155,6 +1156,11 @@ int ioengine_load(struct thread_data *td)
 
        td_set_ioengine_flags(td);
        return 0;
+
+fail:
+       log_err("fio: failed to load engine\n");
+       return 1;
+
 }
 
 static void init_flags(struct thread_data *td)
diff --git a/io_u.c b/io_u.c
index ae1438fd665673e3077cc41c8dae0e4ace1b01c4..6a729e513c60a8e6fcfa87fb7958c0b6af84efbf 100644 (file)
--- a/io_u.c
+++ b/io_u.c
@@ -680,7 +680,22 @@ static enum fio_ddir rate_ddir(struct thread_data *td, enum fio_ddir ddir)
        if (td->o.io_submit_mode == IO_MODE_INLINE)
                io_u_quiesce(td);
 
+       if (td->o.timeout && ((usec + now) > td->o.timeout)) {
+               /*
+                * check if the usec is capable of taking negative values
+                */
+               if (now > td->o.timeout) {
+                       ddir = DDIR_INVAL;
+                       return ddir;
+               }
+               usec = td->o.timeout - now;
+       }
        usec_sleep(td, usec);
+
+       now = utime_since_now(&td->epoch);
+       if ((td->o.timeout && (now > td->o.timeout)) || td->terminate)
+               ddir = DDIR_INVAL;
+
        return ddir;
 }
 
@@ -896,6 +911,10 @@ static int fill_io_u(struct thread_data *td, struct io_u *io_u)
 
        set_rw_ddir(td, io_u);
 
+       if (io_u->ddir == DDIR_INVAL) {
+               dprint(FD_IO, "invalid direction received ddir = %d", io_u->ddir);
+               return 1;
+       }
        /*
         * fsync() or fdatasync() or trim etc, we are done
         */
@@ -1934,8 +1953,8 @@ static void io_completed(struct thread_data *td, struct io_u **io_u_ptr,
                if (io_u->error)
                        unlog_io_piece(td, io_u);
                else {
-                       io_u->ipo->flags &= ~IP_F_IN_FLIGHT;
-                       write_barrier();
+                       atomic_store_release(&io_u->ipo->flags,
+                                       io_u->ipo->flags & ~IP_F_IN_FLIGHT);
                }
        }
 
index 2c7a0df9ed39fa02da08691c4fb85cdf6eadbb90..1c5970a4b5a188e5f4aa5f924b483c976a04142f 100644 (file)
@@ -75,6 +75,25 @@ static struct ioengine_ops *find_ioengine(const char *name)
        return NULL;
 }
 
+#ifdef CONFIG_DYNAMIC_ENGINES
+static void *dlopen_external(struct thread_data *td, const char *engine)
+{
+       char engine_path[PATH_MAX];
+       void *dlhandle;
+
+       sprintf(engine_path, "%s/lib%s.so", FIO_EXT_ENG_DIR, engine);
+
+       dlhandle = dlopen(engine_path, RTLD_LAZY);
+       if (!dlhandle)
+               log_info("Engine %s not found; Either name is invalid, was not built, or fio-engine-%s package is missing.\n",
+                        engine, engine);
+
+       return dlhandle;
+}
+#else
+#define dlopen_external(td, engine) (NULL)
+#endif
+
 static struct ioengine_ops *dlopen_ioengine(struct thread_data *td,
                                            const char *engine_lib)
 {
@@ -86,8 +105,11 @@ static struct ioengine_ops *dlopen_ioengine(struct thread_data *td,
        dlerror();
        dlhandle = dlopen(engine_lib, RTLD_LAZY);
        if (!dlhandle) {
-               td_vmsg(td, -1, dlerror(), "dlopen");
-               return NULL;
+               dlhandle = dlopen_external(td, engine_lib);
+               if (!dlhandle) {
+                       td_vmsg(td, -1, dlerror(), "dlopen");
+                       return NULL;
+               }
        }
 
        /*
@@ -291,8 +313,10 @@ enum fio_q_status td_io_queue(struct thread_data *td, struct io_u *io_u)
         * started the overlap check because the IO_U_F_FLIGHT
         * flag is now set
         */
-       if (td_offload_overlap(td))
-               pthread_mutex_unlock(&overlap_check);
+       if (td_offload_overlap(td)) {
+               int res = pthread_mutex_unlock(&overlap_check);
+               assert(res == 0);
+       }
 
        assert(fio_file_open(io_u->file));
 
index f48b4db934851fd944eba8e66f462858aed183a5..54dadba2cbce34921d6d46b05c233b4aeb8c4a8a 100644 (file)
 
 #define FIO_IOOPS_VERSION      26
 
+#ifndef CONFIG_DYNAMIC_ENGINES
+#define FIO_STATIC     static
+#else
+#define FIO_STATIC
+#endif
+
 /*
  * io_ops->queue() return values
  */
diff --git a/iolog.c b/iolog.c
index d5a18582c0301f8685566c72cc6603c316e90160..7f21be51942024e7a2daf199770082e77291b136 100644 (file)
--- a/iolog.c
+++ b/iolog.c
@@ -19,6 +19,7 @@
 #include "smalloc.h"
 #include "blktrace.h"
 #include "pshared.h"
+#include "lib/roundup.h"
 
 #include <netinet/in.h>
 #include <netinet/tcp.h>
@@ -750,10 +751,13 @@ void setup_log(struct io_log **log, struct log_params *p,
        }
 
        if (l->td && l->td->o.io_submit_mode != IO_MODE_OFFLOAD) {
+               unsigned int def_samples = DEF_LOG_ENTRIES;
                struct io_logs *__p;
 
                __p = calloc(1, sizeof(*l->pending));
-               __p->max_samples = DEF_LOG_ENTRIES;
+               if (l->td->o.iodepth > DEF_LOG_ENTRIES)
+                       def_samples = roundup_pow2(l->td->o.iodepth);
+               __p->max_samples = def_samples;
                __p->log = calloc(__p->max_samples, log_entry_sz(l));
                l->pending = __p;
        }
index 1abe22f33794c0ccf6b724e74aef7a1f62271166..726f1c44159fd8a16379777411030b262eeb6cb1 100644 (file)
@@ -4,6 +4,7 @@
 #include <string.h>
 
 #include "../compiler/compiler.h"
+#include "../oslib/asprintf.h"
 #include "num2str.h"
 
 #define ARRAY_SIZE(x)    (sizeof((x)) / (sizeof((x)[0])))
@@ -19,8 +20,8 @@
  */
 char *num2str(uint64_t num, int maxlen, int base, int pow2, enum n2s_unit units)
 {
-       const char *sistr[] = { "", "k", "M", "G", "T", "P" };
-       const char *iecstr[] = { "", "Ki", "Mi", "Gi", "Ti", "Pi" };
+       const char *sistr[] = { "", "k", "M", "G", "T", "P", "E" };
+       const char *iecstr[] = { "", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei" };
        const char **unitprefix;
        static const char *const unitstr[] = {
                [N2S_NONE]      = "",
@@ -33,16 +34,12 @@ char *num2str(uint64_t num, int maxlen, int base, int pow2, enum n2s_unit units)
        const unsigned int thousand = pow2 ? 1024 : 1000;
        unsigned int modulo;
        int post_index, carry = 0;
-       char tmp[32], fmt[32];
+       char tmp[32];
        char *buf;
 
        compiletime_assert(sizeof(sistr) == sizeof(iecstr), "unit prefix arrays must be identical sizes");
        assert(units < ARRAY_SIZE(unitstr));
 
-       buf = malloc(128);
-       if (!buf)
-               return NULL;
-
        if (pow2)
                unitprefix = iecstr;
        else
@@ -83,16 +80,17 @@ char *num2str(uint64_t num, int maxlen, int base, int pow2, enum n2s_unit units)
                post_index++;
        }
 
+       if (post_index >= ARRAY_SIZE(sistr))
+               post_index = 0;
+
        /*
         * If no modulo, then we're done.
         */
        if (modulo == -1U) {
 done:
-               if (post_index >= ARRAY_SIZE(sistr))
-                       post_index = 0;
-
-               sprintf(buf, "%llu%s%s", (unsigned long long) num,
-                       unitprefix[post_index], unitstr[units]);
+               if (asprintf(&buf, "%llu%s%s", (unsigned long long) num,
+                            unitprefix[post_index], unitstr[units]) < 0)
+                       buf = NULL;
                return buf;
        }
 
@@ -111,10 +109,11 @@ done:
         */
        assert(maxlen - strlen(tmp) - 1 > 0);
        assert(modulo < thousand);
-       sprintf(fmt, "%%.%df", (int)(maxlen - strlen(tmp) - 1));
-       sprintf(tmp, fmt, (double)modulo / (double)thousand);
+       sprintf(tmp, "%.*f", (int)(maxlen - strlen(tmp) - 1),
+               (double)modulo / (double)thousand);
 
-       sprintf(buf, "%llu.%s%s%s", (unsigned long long) num, &tmp[2],
-                       unitprefix[post_index], unitstr[units]);
+       if (asprintf(&buf, "%llu.%s%s%s", (unsigned long long) num, &tmp[2],
+                    unitprefix[post_index], unitstr[units]) < 0)
+               buf = NULL;
        return buf;
 }
index 2ccc1b3723c42f4f90ab821bf2824611037d24dc..46c1c5e023a132513ded05d885bd5769dbf7ceb9 100644 (file)
@@ -6,7 +6,9 @@
 #include "types.h"
 
 #define FRAND32_MAX    (-1U)
+#define FRAND32_MAX_PLUS_ONE   (1.0 * (1ULL << 32))
 #define FRAND64_MAX    (-1ULL)
+#define FRAND64_MAX_PLUS_ONE   (1.0 * (1ULL << 32) * (1ULL << 32))
 
 struct taus88_state {
        unsigned int s1, s2, s3;
@@ -106,11 +108,11 @@ static inline double __rand_0_1(struct frand_state *state)
        if (state->use64) {
                uint64_t val = __rand64(&state->state64);
 
-               return (val + 1.0) / (FRAND64_MAX + 1.0);
+               return (val + 1.0) / FRAND64_MAX_PLUS_ONE;
        } else {
                uint32_t val = __rand32(&state->state32);
 
-               return (val + 1.0) / (FRAND32_MAX + 1.0);
+               return (val + 1.0) / FRAND32_MAX_PLUS_ONE;
        }
 }
 
@@ -122,7 +124,7 @@ static inline uint32_t rand32_upto(struct frand_state *state, uint32_t end)
 
        r = __rand32(&state->state32);
        end++;
-       return (int) ((double)end * (r / (FRAND32_MAX + 1.0)));
+       return (int) ((double)end * (r / FRAND32_MAX_PLUS_ONE));
 }
 
 static inline uint64_t rand64_upto(struct frand_state *state, uint64_t end)
@@ -133,7 +135,7 @@ static inline uint64_t rand64_upto(struct frand_state *state, uint64_t end)
 
        r = __rand64(&state->state64);
        end++;
-       return (uint64_t) ((double)end * (r / (FRAND64_MAX + 1.0)));
+       return (uint64_t) ((double)end * (r / FRAND64_MAX_PLUS_ONE));
 }
 
 /*
diff --git a/lib/roundup.h b/lib/roundup.h
new file mode 100644 (file)
index 0000000..5a99c8a
--- /dev/null
@@ -0,0 +1,11 @@
+#ifndef FIO_ROUNDUP_H
+#define FIO_ROUNDUP_H
+
+#include "lib/fls.h"
+
+static inline unsigned roundup_pow2(unsigned depth)
+{
+       return 1UL << __fls(depth - 1);
+}
+
+#endif
index 762b6ec1d2dc7fa7ba5df9fe75d4e28da9a1c53b..56f3e37dab5b35750a390714c3af2d9579de5628 100644 (file)
@@ -5,7 +5,7 @@
 #include "../arch/arch.h"
 
 struct seqlock {
-       volatile int sequence;
+       volatile unsigned int sequence;
 };
 
 static inline void seqlock_init(struct seqlock *s)
@@ -18,13 +18,12 @@ static inline unsigned int read_seqlock_begin(struct seqlock *s)
        unsigned int seq;
 
        do {
-               seq = s->sequence;
+               seq = atomic_load_acquire(&s->sequence);
                if (!(seq & 1))
                        break;
                nop;
        } while (1);
 
-       read_barrier();
        return seq;
 }
 
@@ -36,14 +35,12 @@ static inline bool read_seqlock_retry(struct seqlock *s, unsigned int seq)
 
 static inline void write_seqlock_begin(struct seqlock *s)
 {
-       s->sequence++;
-       write_barrier();
+       s->sequence = atomic_load_acquire(&s->sequence) + 1;
 }
 
 static inline void write_seqlock_end(struct seqlock *s)
 {
-       write_barrier();
-       s->sequence++;
+       atomic_store_release(&s->sequence, s->sequence + 1);
 }
 
 #endif
index 5f0225f71094d4352a61e99eccc12527ff5bd908..6cf7333375d035dd6eb36526eb338f355f310e78 100644 (file)
--- a/memory.c
+++ b/memory.c
@@ -274,7 +274,7 @@ static int alloc_mem_cudamalloc(struct thread_data *td, size_t total_mem)
 static void free_mem_cudamalloc(struct thread_data *td)
 {
 #ifdef CONFIG_CUDA
-       if (td->dev_mem_ptr != NULL)
+       if (td->dev_mem_ptr)
                cuMemFree(td->dev_mem_ptr);
 
        if (cuCtxDestroy(td->cu_ctx) != CUDA_SUCCESS)
index 85a0f490a0721d5ce3b54af6d890576c0b9b26de..251ad2c1adff50e1068c70eb9b929f7288cc079d 100644 (file)
--- a/options.c
+++ b/options.c
@@ -3327,6 +3327,17 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                .category = FIO_OPT_C_IO,
                .group  = FIO_OPT_G_ZONE,
        },
+       {
+               .name   = "zonecapacity",
+               .lname  = "Zone capacity",
+               .type   = FIO_OPT_STR_VAL,
+               .off1   = offsetof(struct thread_options, zone_capacity),
+               .help   = "Capacity per zone",
+               .def    = "0",
+               .interval = 1024 * 1024,
+               .category = FIO_OPT_C_IO,
+               .group  = FIO_OPT_G_ZONE,
+       },
        {
                .name   = "zonerange",
                .lname  = "Zone range",
index 6ec7243d49a3fc62166ea4723d855d7d2c3a9f28..65d3b429a87f408786c5569bf2c3884e9ea4474c 100644 (file)
@@ -58,6 +58,8 @@
 
 #define OS_MAP_ANON            MAP_ANONYMOUS
 
+#define FIO_EXT_ENG_DIR        "/usr/lib/fio"
+
 typedef cpu_set_t os_cpu_mask_t;
 
 #ifdef CONFIG_3ARG_AFFINITY
index e36453e9e8c77d129e760fe31424cb8af88c57e6..31271de049133b8728c26f47f6d4e34a13208cc6 100644 (file)
@@ -750,7 +750,7 @@ int setgid(gid_t gid)
 int nice(int incr)
 {
        DWORD prioclass = NORMAL_PRIORITY_CLASS;
-       
+
        if (incr < -15)
                prioclass = HIGH_PRIORITY_CLASS;
        else if (incr < 0)
@@ -759,7 +759,7 @@ int nice(int incr)
                prioclass = IDLE_PRIORITY_CLASS;
        else if (incr > 0)
                prioclass = BELOW_NORMAL_PRIORITY_CLASS;
-       
+
        if (!SetPriorityClass(GetCurrentProcess(), prioclass))
                log_err("fio: SetPriorityClass failed\n");
 
@@ -883,7 +883,7 @@ int poll(struct pollfd fds[], nfds_t nfds, int timeout)
        FD_ZERO(&exceptfds);
 
        for (i = 0; i < nfds; i++) {
-               if (fds[i].fd < 0) {
+               if (fds[i].fd == INVALID_SOCKET) {
                        fds[i].revents = 0;
                        continue;
                }
@@ -900,7 +900,7 @@ int poll(struct pollfd fds[], nfds_t nfds, int timeout)
 
        if (rc != SOCKET_ERROR) {
                for (i = 0; i < nfds; i++) {
-                       if (fds[i].fd < 0)
+                       if (fds[i].fd == INVALID_SOCKET)
                                continue;
 
                        if ((fds[i].events & POLLIN) && FD_ISSET(fds[i].fd, &readfds))
index 61ea3a53d6d5ce1b029db3bef33a18ece1baa4ac..6fe78b9ce79dc24abe24af38b7784f34e1029648 100644 (file)
@@ -113,6 +113,16 @@ out:
        return 0;
 }
 
+static uint64_t zone_capacity(struct blk_zone_report *hdr,
+                             struct blk_zone *blkz)
+{
+#ifdef CONFIG_HAVE_REP_CAPACITY
+       if (hdr->flags & BLK_ZONE_REP_CAPACITY)
+               return blkz->capacity << 9;
+#endif
+       return blkz->len << 9;
+}
+
 int blkzoned_report_zones(struct thread_data *td, struct fio_file *f,
                          uint64_t offset, struct zbd_zone *zones,
                          unsigned int nr_zones)
@@ -143,12 +153,13 @@ int blkzoned_report_zones(struct thread_data *td, struct fio_file *f,
        }
 
        nr_zones = hdr->nr_zones;
-       blkz = &hdr->zones[0];
+       blkz = (void *) hdr + sizeof(*hdr);
        z = &zones[0];
        for (i = 0; i < nr_zones; i++, z++, blkz++) {
                z->start = blkz->start << 9;
                z->wp = blkz->wp << 9;
                z->len = blkz->len << 9;
+               z->capacity = zone_capacity(hdr, blkz);
 
                switch (blkz->type) {
                case BLK_ZONE_TYPE_CONVENTIONAL:
index cf00d9bc75c56f057b41a337ae8bdc740aebaa66..13dbe7a2e9ff8f3face1619e750b83018ba222df 100644 (file)
@@ -4,6 +4,7 @@
  * Copyright (C) 2015 Jens Axboe <axboe@kernel.dk>
  *
  */
+#include <assert.h>
 #include "fio.h"
 #include "ioengines.h"
 #include "lib/getrusage.h"
 
 static void check_overlap(struct io_u *io_u)
 {
-       int i;
+       int i, res;
        struct thread_data *td;
-       bool overlap = false;
 
-       do {
-               /*
-                * Allow only one thread to check for overlap at a
-                * time to prevent two threads from thinking the coast
-                * is clear and then submitting IOs that overlap with
-                * each other
-                *
-                * If an overlap is found, release the lock and
-                * re-acquire it before checking again to give other
-                * threads a chance to make progress
-                *
-                * If an overlap is not found, release the lock when the
-                * io_u's IO_U_F_FLIGHT flag is set so that this io_u
-                * can be checked by other threads as they assess overlap
-                */
-               pthread_mutex_lock(&overlap_check);
-               for_each_td(td, i) {
-                       if (td->runstate <= TD_SETTING_UP ||
-                               td->runstate >= TD_FINISHING ||
-                               !td->o.serialize_overlap ||
-                               td->o.io_submit_mode != IO_MODE_OFFLOAD)
-                               continue;
-
-                       overlap = in_flight_overlap(&td->io_u_all, io_u);
-                       if (overlap) {
-                               pthread_mutex_unlock(&overlap_check);
-                               break;
-                       }
-               }
-       } while (overlap);
+       /*
+        * Allow only one thread to check for overlap at a time to prevent two
+        * threads from thinking the coast is clear and then submitting IOs
+        * that overlap with each other.
+        *
+        * If an overlap is found, release the lock and re-acquire it before
+        * checking again to give other threads a chance to make progress.
+        *
+        * If no overlap is found, release the lock when the io_u's
+        * IO_U_F_FLIGHT flag is set so that this io_u can be checked by other
+        * threads as they assess overlap.
+        */
+       res = pthread_mutex_lock(&overlap_check);
+       assert(res == 0);
+
+retry:
+       for_each_td(td, i) {
+               if (td->runstate <= TD_SETTING_UP ||
+                   td->runstate >= TD_FINISHING ||
+                   !td->o.serialize_overlap ||
+                   td->o.io_submit_mode != IO_MODE_OFFLOAD)
+                       continue;
+
+               if (!in_flight_overlap(&td->io_u_all, io_u))
+                       continue;
+
+               res = pthread_mutex_unlock(&overlap_check);
+               assert(res == 0);
+               res = pthread_mutex_lock(&overlap_check);
+               assert(res == 0);
+               goto retry;
+       }
 }
 
 static int io_workqueue_fn(struct submit_worker *sw,
@@ -95,8 +97,11 @@ static int io_workqueue_fn(struct submit_worker *sw,
                        td->cur_depth -= ret;
        }
 
-       if (error || td->error)
+       if (error || td->error) {
+               pthread_mutex_lock(&td->io_u_lock);
                pthread_cond_signal(&td->parent->free_cond);
+               pthread_mutex_unlock(&td->io_u_lock);
+       }
 
        return 0;
 }
diff --git a/stat.c b/stat.c
index b3951199bcc81a28f4728f2c327a30071faa252f..23657cee6a7702c3aacd81783acb1516b3960a1b 100644 (file)
--- a/stat.c
+++ b/stat.c
@@ -414,6 +414,18 @@ static void display_lat(const char *name, unsigned long long min,
        free(maxp);
 }
 
+static double convert_agg_kbytes_percent(struct group_run_stats *rs, int ddir, int mean)
+{
+       double p_of_agg = 100.0;
+       if (rs && rs->agg[ddir] > 1024) {
+               p_of_agg = mean * 100 / (double) (rs->agg[ddir] / 1024.0);
+
+               if (p_of_agg > 100.0)
+                       p_of_agg = 100.0;
+       }
+       return p_of_agg;
+}
+
 static void show_ddir_status(struct group_run_stats *rs, struct thread_stat *ts,
                             int ddir, struct buf_output *out)
 {
@@ -551,11 +563,7 @@ static void show_ddir_status(struct group_run_stats *rs, struct thread_stat *ts,
                else
                        bw_str = "kB";
 
-               if (rs->agg[ddir]) {
-                       p_of_agg = mean * 100 / (double) (rs->agg[ddir] / 1024);
-                       if (p_of_agg > 100.0)
-                               p_of_agg = 100.0;
-               }
+               p_of_agg = convert_agg_kbytes_percent(rs, ddir, mean);
 
                if (rs->unit_base == 1) {
                        min *= 8.0;
@@ -1376,11 +1384,7 @@ static void add_ddir_status_json(struct thread_stat *ts,
        }
 
        if (calc_lat(&ts->bw_stat[ddir], &min, &max, &mean, &dev)) {
-               if (rs->agg[ddir]) {
-                       p_of_agg = mean * 100 / (double) (rs->agg[ddir] / 1024);
-                       if (p_of_agg > 100.0)
-                               p_of_agg = 100.0;
-               }
+               p_of_agg = convert_agg_kbytes_percent(rs, ddir, mean);
        } else {
                min = max = 0;
                p_of_agg = mean = dev = 0.0;
@@ -3130,3 +3134,4 @@ uint32_t *io_u_block_info(struct thread_data *td, struct io_u *io_u)
        assert(idx < td->ts.nr_block_infos);
        return info;
 }
+
index 8965cfbc4547241b5a6af4b66c03339d3f943696..0c913368262a709c3e13ed51c049d5284659c0c5 100644 (file)
--- a/t/debug.c
+++ b/t/debug.c
@@ -1,7 +1,7 @@
 #include <stdio.h>
 
 FILE *f_err;
-struct timespec *fio_ts = NULL;
+void *fio_ts;
 unsigned long fio_debug = 0;
 
 void __dprint(int type, const char *str, ...)
index d48db1e95d5ab5a94a7d529c293e95a2f37dd02d..7fa84f99075bb8fb2f7c9ad0120edba911889a72 100644 (file)
@@ -46,7 +46,6 @@ struct io_cq_ring {
 #define DEPTH                  128
 #define BATCH_SUBMIT           32
 #define BATCH_COMPLETE         32
-
 #define BS                     4096
 
 #define MAX_FDS                        16
@@ -86,6 +85,7 @@ static volatile int finish;
 static int depth = DEPTH;
 static int batch_submit = BATCH_SUBMIT;
 static int batch_complete = BATCH_COMPLETE;
+static int bs = BS;
 static int polled = 1;         /* use IO polling */
 static int fixedbufs = 1;      /* use fixed user buffers */
 static int register_files = 1; /* use fixed files */
@@ -170,7 +170,7 @@ static void init_io(struct submitter *s, unsigned index)
        f->pending_ios++;
 
        r = lrand48();
-       offset = (r % (f->max_blocks - 1)) * BS;
+       offset = (r % (f->max_blocks - 1)) * bs;
 
        if (register_files) {
                sqe->flags = IOSQE_FIXED_FILE;
@@ -182,7 +182,7 @@ static void init_io(struct submitter *s, unsigned index)
        if (fixedbufs) {
                sqe->opcode = IORING_OP_READ_FIXED;
                sqe->addr = (unsigned long) s->iovecs[index].iov_base;
-               sqe->len = BS;
+               sqe->len = bs;
                sqe->buf_index = index;
        } else {
                sqe->opcode = IORING_OP_READV;
@@ -233,10 +233,10 @@ static int get_file_size(struct file *f)
                if (ioctl(f->real_fd, BLKGETSIZE64, &bytes) != 0)
                        return -1;
 
-               f->max_blocks = bytes / BS;
+               f->max_blocks = bytes / bs;
                return 0;
        } else if (S_ISREG(st.st_mode)) {
-               f->max_blocks = st.st_size / BS;
+               f->max_blocks = st.st_size / bs;
                return 0;
        }
 
@@ -260,7 +260,7 @@ static int reap_events(struct submitter *s)
                if (!do_nop) {
                        f = (struct file *) (uintptr_t) cqe->user_data;
                        f->pending_ios--;
-                       if (cqe->res != BS) {
+                       if (cqe->res != bs) {
                                printf("io: unexpected ret=%d\n", cqe->res);
                                if (polled && cqe->res == -EOPNOTSUPP)
                                        printf("Your filesystem/driver/kernel doesn't support polled IO\n");
@@ -483,8 +483,10 @@ static void usage(char *argv)
        printf("%s [options] -- [filenames]\n"
                " -d <int> : IO Depth, default %d\n"
                " -s <int> : Batch submit, default %d\n"
-               " -c <int> : Batch complete, default %d\n",
-               argv, DEPTH, BATCH_SUBMIT, BATCH_COMPLETE);
+               " -c <int> : Batch complete, default %d\n"
+               " -b <int> : Block size, default %d\n"
+               " -p <bool> : Polled IO, default %d\n",
+               argv, DEPTH, BATCH_SUBMIT, BATCH_COMPLETE, BS, polled);
        exit(0);
 }
 
@@ -501,7 +503,7 @@ int main(int argc, char *argv[])
                return 1;
        }
 
-       while ((opt = getopt(argc, argv, "d:s:c:h?")) != -1) {
+       while ((opt = getopt(argc, argv, "d:s:c:b:p:h?")) != -1) {
                switch (opt) {
                case 'd':
                        depth = atoi(optarg);
@@ -512,6 +514,12 @@ int main(int argc, char *argv[])
                case 'c':
                        batch_complete = atoi(optarg);
                        break;
+               case 'b':
+                       bs = atoi(optarg);
+                       break;
+               case 'p':
+                       polled = !!atoi(optarg);
+                       break;
                case 'h':
                case '?':
                default:
@@ -575,12 +583,12 @@ int main(int argc, char *argv[])
        for (i = 0; i < depth; i++) {
                void *buf;
 
-               if (posix_memalign(&buf, BS, BS)) {
+               if (posix_memalign(&buf, bs, bs)) {
                        printf("failed alloc\n");
                        return 1;
                }
                s->iovecs[i].iov_base = buf;
-               s->iovecs[i].iov_len = BS;
+               s->iovecs[i].iov_len = bs;
        }
 
        err = setup_ring(s);
index 50daf612922e089f8d51f97c9e07ecf5f5655102..f90cee90ea27f300e62abbbcff0f29dac17b2c4a 100644 (file)
@@ -7,7 +7,7 @@
 bs=4k
 ioengine=null
 size=100g
-runtime=3
+runtime=10
 flow_id=1
 
 [flow1]
diff --git a/t/jobs/t0012.fio b/t/jobs/t0012.fio
new file mode 100644 (file)
index 0000000..03fea62
--- /dev/null
@@ -0,0 +1,20 @@
+# Expected results: no parse warnings, runs and with roughly 1/8 iops between
+#                      the two jobs.
+# Buggy result: parse warning on flow value overflow, no 1/8 division between
+#                      jobs.
+#
+
+[global]
+bs=4k
+ioengine=null
+size=100g
+runtime=10
+flow_id=1
+gtod_cpu=1
+
+[flow1]
+flow=-8
+rate_iops=1000
+
+[flow2]
+flow=1
diff --git a/t/jobs/t0013.fio b/t/jobs/t0013.fio
new file mode 100644 (file)
index 0000000..b4ec1b4
--- /dev/null
@@ -0,0 +1,14 @@
+# Trigger the fio code that serializes overlapping I/O. The job size is very
+# small to make overlapping I/O more likely.
+
+[test]
+ioengine=null
+thread=1
+size=4K
+blocksize=4K
+io_submit_mode=offload
+iodepth=1
+serialize_overlap=1
+numjobs=8
+loops=1000000
+runtime=10
index c2352d80c2bf5ab339672ecc19768510971686e9..6f1fc092ae223481141f8231fb9470888f84e3c0 100755 (executable)
@@ -420,14 +420,14 @@ class FioJobTest_t0009(FioJobTest):
             self.passed = False
 
 
-class FioJobTest_t0011(FioJobTest):
+class FioJobTest_iops_rate(FioJobTest):
     """Test consists of fio test job t0009
     Confirm that job0 iops == 1000
     and that job1_iops / job0_iops ~ 8
     With two runs of fio-3.16 I observed a ratio of 8.3"""
 
     def check_result(self):
-        super(FioJobTest_t0011, self).check_result()
+        super(FioJobTest_iops_rate, self).check_result()
 
         if not self.passed:
             return
@@ -438,7 +438,7 @@ class FioJobTest_t0011(FioJobTest):
         logging.debug("Test %d: iops1: %f", self.testnum, iops1)
         logging.debug("Test %d: ratio: %f", self.testnum, ratio)
 
-        if iops1 < 997 or iops1 > 1003:
+        if iops1 < 950 or iops1 > 1050:
             self.failure_reason = "{0} iops value mismatch,".format(self.failure_reason)
             self.passed = False
 
@@ -478,11 +478,14 @@ class Requirements(object):
 
             Requirements._root = (os.geteuid() == 0)
             if Requirements._zbd and Requirements._root:
-                subprocess.run(["modprobe", "null_blk"],
-                               stdout=subprocess.PIPE,
-                               stderr=subprocess.PIPE)
-                if os.path.exists("/sys/module/null_blk/parameters/zoned"):
-                    Requirements._zoned_nullb = True
+                try:
+                    subprocess.run(["modprobe", "null_blk"],
+                                   stdout=subprocess.PIPE,
+                                   stderr=subprocess.PIPE)
+                    if os.path.exists("/sys/module/null_blk/parameters/zoned"):
+                        Requirements._zoned_nullb = True
+                except Exception:
+                    pass
 
         if platform.system() == "Windows":
             utest_exe = "unittest.exe"
@@ -667,7 +670,7 @@ TEST_LIST = [
     },
     {
         'test_id':          11,
-        'test_class':       FioJobTest_t0011,
+        'test_class':       FioJobTest_iops_rate,
         'job':              't0011-5d2788d5.fio',
         'success':          SUCCESS_DEFAULT,
         'pre_job':          None,
@@ -675,6 +678,28 @@ TEST_LIST = [
         'output_format':    'json',
         'requirements':     [],
     },
+    {
+        'test_id':          12,
+        'test_class':       FioJobTest_iops_rate,
+        'job':              't0012.fio',
+        'success':          SUCCESS_DEFAULT,
+        'pre_job':          None,
+        'pre_success':      None,
+        'output_format':    'json',
+        'requirements':     [Requirements.not_macos],
+        # mac os does not support CPU affinity
+        # which is required for gtod offloading
+    },
+    {
+        'test_id':          13,
+        'test_class':       FioJobTest,
+        'job':              't0013.fio',
+        'success':          SUCCESS_DEFAULT,
+        'pre_job':          None,
+        'pre_success':      None,
+        'output_format':    'json',
+        'requirements':     [],
+    },
     {
         'test_id':          1000,
         'test_class':       FioExeTest,
@@ -817,9 +842,9 @@ def main():
                 print("Invalid --pass-through argument '%s'" % arg)
                 print("Syntax for --pass-through is TESTNUMBER:ARGUMENT")
                 return
-            split = arg.split(":",1)
+            split = arg.split(":", 1)
             pass_through[int(split[0])] = split[1]
-        logging.debug("Pass-through arguments: %s" % pass_through)
+        logging.debug("Pass-through arguments: %s", pass_through)
 
     if args.fio_root:
         fio_root = args.fio_root
@@ -879,6 +904,7 @@ def main():
                 fio_pre_job=fio_pre_job,
                 fio_pre_success=fio_pre_success,
                 output_format=output_format)
+            desc = config['job']
         elif issubclass(config['test_class'], FioExeTest):
             exe_path = os.path.join(fio_root, config['exe'])
             if config['parameters']:
@@ -892,6 +918,7 @@ def main():
                 parameters += pass_through[config['test_id']].split()
             test = config['test_class'](exe_path, parameters,
                                         config['success'])
+            desc = config['exe']
         else:
             print("Test {0} FAILED: unable to process test config".format(config['test_id']))
             failed = failed + 1
@@ -906,7 +933,7 @@ def main():
                 if not reqs_met:
                     break
             if not reqs_met:
-                print("Test {0} SKIPPED ({1})".format(config['test_id'], reason))
+                print("Test {0} SKIPPED ({1}) {2}".format(config['test_id'], reason, desc))
                 skipped = skipped + 1
                 continue
 
@@ -923,7 +950,7 @@ def main():
             logging.debug("Test %d: stderr:\n%s", config['test_id'], contents)
             contents, _ = FioJobTest.get_file(test.stdout_file)
             logging.debug("Test %d: stdout:\n%s", config['test_id'], contents)
-        print("Test {0} {1}".format(config['test_id'], result))
+        print("Test {0} {1} {2}".format(config['test_id'], result, desc))
 
     print("{0} test(s) passed, {1} failed, {2} skipped".format(passed, failed, skipped))
 
index 1bd22ec4259ddbcc86c5aa397d06ffe7fd21d7a6..81b6f3f7dd115a2df800773769dfc16cc20a3c9b 100644 (file)
@@ -19,6 +19,51 @@ if [ -n "${use_libzbc}" ] &&
     exit 1
 fi
 
+blkzone_reports_capacity() {
+       local dev="${1}"
+
+       [[ -n "${blkzone}" ]] &&
+               "${blkzone}" report -c 1 -o 0 "${dev}" | grep -q 'cap '
+}
+
+# Whether or not $1 (/dev/...) is a NVME ZNS device.
+is_nvme_zns() {
+       local s
+
+       s=/sys/block/$(basename "${1}")/device/subsystem
+
+       if [[ ! -h "${s}" || $(realpath "${s}") != /sys/class/nvme ]]; then
+               return 1
+       fi
+
+       [[ $(</sys/block/$(basename "${1}")/queue/zoned) == host-managed ]]
+}
+
+# Whether or not $1 (/dev/...) is a null_blk device with zone capacity smaller
+# than zone size.
+is_nullb_with_zone_cap() {
+       local f
+
+       f=/sys/kernel/config/nullb/$(basename "${1}")
+       [[ -r "${f}/zone_capacity" &&
+                  $(<"${f}/zone_capacity") -lt $(<"${f}/zone_size") ]]
+}
+
+# Check if blkzone is available and suitable for the test target device. If not
+# available, print error message and return 1. Otherwise return 0.
+check_blkzone() {
+       local dev="${1}"
+
+       # If the device supports zone capacity, mandate zone capacity report by
+       # blkzone.
+       if (is_nvme_zns "${dev}" || is_nullb_with_zone_cap "${dev}") &&
+                               ! blkzone_reports_capacity "${dev}"; then
+               echo "Error: blkzone does not report zone capacity"
+               echo "Error: install latest util-linux with blkzone"
+               return 1
+       fi
+}
+
 # Reports the starting sector and length of the first sequential zone of device
 # $1.
 first_sequential_zone() {
@@ -39,6 +84,43 @@ first_sequential_zone() {
     fi
 }
 
+# Reports the summed zone capacity of $1 number of zones starting from offset $2
+# on device $3.
+total_zone_capacity() {
+       local nr_zones=$1
+       local sector=$(($2 / 512))
+       local dev=$3
+       local capacity=0 num
+       local grep_str
+
+       if [ -z "$is_zbd" ]; then
+               # For regular block devices, handle zone size as zone capacity.
+               echo $((zone_size * nr_zones))
+               return
+       fi
+
+       if [ -n "${blkzone}" ] && [ ! -n "${use_libzbc}" ]; then
+               if blkzone_reports_capacity "${dev}"; then
+                       grep_str='cap \K[0-9a-zA-Z]*'
+               else
+                       # If zone capacity is not reported, refer zone length.
+                       grep_str='len \K[0-9a-zA-Z]*'
+               fi
+               while read num; do
+                       capacity=$((capacity + num))
+               done < <(${blkzone} report -c "$nr_zones" -o "$sector" "$dev" |
+                               grep -Po "${grep_str}")
+       else
+               # ZBC devices do not have zone capacity. Use zone size.
+               while read num; do
+                       capacity=$((capacity + num))
+               done < <(${zbc_report_zones} -nz "$nr_zones" -start "$sector" \
+                               "$dev" | grep -Po 'sector [0-9]*, \K[0-9]*')
+       fi
+
+       echo $((capacity * 512))
+}
+
 max_open_zones() {
     local dev=$1
 
index 53aee3e84caf9fa6c67a3a5817b18690c336d729..f9c9530ce68335addd2fbd482f196b0cbb7a89af 100755 (executable)
@@ -6,6 +6,21 @@
 
 scriptdir="$(cd "$(dirname "$0")" && pwd)"
 
+zone_size=1
+zone_capacity=1
+if [[ ${1} == "-h" ]]; then
+    echo "Usage: ${0} [OPTIONS]"
+    echo "Options:"
+    echo -e "\t-h Show this message."
+    echo -e "\t-zone-cap Use null blk with zone capacity less than zone size."
+    echo -e "\tany option supported by test-zbd-support script."
+    exit 1
+elif [[ ${1} == "-zone-cap" ]]; then
+    zone_size=4
+    zone_capacity=3
+    shift
+fi
+
 for d in /sys/kernel/config/nullb/*; do [ -d "$d" ] && rmdir "$d"; done
 modprobe -r null_blk
 modprobe null_blk nr_devices=0 || exit $?
@@ -17,9 +32,18 @@ modprobe -r null_blk
 modprobe null_blk nr_devices=0 &&
     cd /sys/kernel/config/nullb &&
     mkdir nullb0 &&
-    cd nullb0 &&
-    echo 1 > zoned &&
-    echo 1 > zone_size &&
+    cd nullb0 || exit $?
+
+if ((zone_capacity < zone_size)); then
+    if [[ ! -w zone_capacity ]]; then
+        echo "null blk does not support zone capacity"
+        exit 1
+    fi
+    echo "${zone_capacity}" > zone_capacity
+fi
+
+echo 1 > zoned &&
+    echo "${zone_size}" > zone_size &&
     echo 0 > completion_nsec &&
     echo 4096 > blocksize &&
     echo 1024 > size &&
index 4001be3b6c3f0d1615aaf076729b6a6aaba72195..471a3487c52207b96bc28946512bba108851971f 100755 (executable)
@@ -109,6 +109,20 @@ run_one_fio_job() {
            --thread=1 --direct=1
 }
 
+write_and_run_one_fio_job() {
+    local r
+    local write_offset="${1}"
+    local write_size="${2}"
+
+    shift 2
+    r=$(((RANDOM << 16) | RANDOM))
+    run_fio --filename="$dev" --randseed="$r"  --name="write_job" --rw=write \
+           "$(ioengine "psync")" --bs="${logical_block_size}" \
+           --zonemode=zbd --zonesize="${zone_size}" --thread=1 --direct=1 \
+           --offset="${write_offset}" --size="${write_size}" \
+           --name="$dev" --wait_for="write_job" "$@" --thread=1 --direct=1
+}
+
 # Run fio on the first four sequential zones of the disk.
 run_fio_on_seq() {
     local opts=()
@@ -170,13 +184,7 @@ test3() {
        opts+=("--zonesize=${zone_size}")
     fi
     run_fio "${opts[@]}" >> "${logfile}.${test_number}" 2>&1 || return $?
-    grep -q 'READ:' "${logfile}.${test_number}"
-    rc=$?
-    if [ -n "$is_zbd" ]; then
-       [ $rc != 0 ]
-    else
-       [ $rc = 0 ]
-    fi
+    ! grep -q 'READ:' "${logfile}.${test_number}"
 }
 
 # Run fio with --read_beyond_wp=1 against an empty zone.
@@ -196,51 +204,64 @@ test4() {
 
 # Sequential write to sequential zones.
 test5() {
-    local size
+    local size off capacity
 
+    off=$((first_sequential_zone_sector * 512))
+    capacity=$(total_zone_capacity 4 $off $dev)
     size=$((4 * zone_size))
     run_fio_on_seq "$(ioengine "psync")" --iodepth=1 --rw=write        \
                   --bs="$(max $((zone_size / 64)) "$logical_block_size")"\
                   --do_verify=1 --verify=md5                           \
                   >>"${logfile}.${test_number}" 2>&1 || return $?
-    check_written $size || return $?
-    check_read $size || return $?
+    check_written $capacity || return $?
+    check_read $capacity || return $?
 }
 
-# Sequential read from sequential zones. Must be run after test5.
+# Sequential read from sequential zones.
 test6() {
-    local size
+    local size off capacity
 
+    off=$((first_sequential_zone_sector * 512))
+    capacity=$(total_zone_capacity 4 $off $dev)
     size=$((4 * zone_size))
-    run_fio_on_seq "$(ioengine "psync")" --iodepth=1 --rw=read \
-                  --bs="$(max $((zone_size / 64)) "$logical_block_size")"\
-                  >>"${logfile}.${test_number}" 2>&1 || return $?
-    check_read $size || return $?
+    write_and_run_one_fio_job \
+           $((first_sequential_zone_sector * 512)) "${size}" \
+           --offset="${off}" \
+           --size="${size}" --zonemode=zbd --zonesize="${zone_size}" \
+           "$(ioengine "psync")" --iodepth=1 --rw=read \
+           --bs="$(max $((zone_size / 64)) "$logical_block_size")" \
+           >>"${logfile}.${test_number}" 2>&1 || return $?
+    check_read $capacity || return $?
 }
 
 # Random write to sequential zones, libaio, queue depth 1.
 test7() {
     local size=$((zone_size))
+    local off capacity
 
+    off=$((first_sequential_zone_sector * 512))
+    capacity=$(total_zone_capacity 1 $off $dev)
     run_fio_on_seq "$(ioengine "libaio")" --iodepth=1 --rw=randwrite   \
                   --bs="$(min 16384 "${zone_size}")"                   \
                   --do_verify=1 --verify=md5 --size="$size"            \
                   >>"${logfile}.${test_number}" 2>&1 || return $?
-    check_written $size || return $?
-    check_read $size || return $?
+    check_written $capacity || return $?
+    check_read $capacity || return $?
 }
 
 # Random write to sequential zones, libaio, queue depth 64.
 test8() {
-    local size
+    local size off capacity
 
     size=$((4 * zone_size))
+    off=$((first_sequential_zone_sector * 512))
+    capacity=$(total_zone_capacity 4 $off $dev)
     run_fio_on_seq "$(ioengine "libaio")" --iodepth=64 --rw=randwrite  \
                   --bs="$(min 16384 "${zone_size}")"                   \
                   --do_verify=1 --verify=md5                           \
                   >>"${logfile}.${test_number}" 2>&1 || return $?
-    check_written $size || return $?
-    check_read $size || return $?
+    check_written $capacity || return $?
+    check_read $capacity || return $?
 }
 
 # Random write to sequential zones, sg, queue depth 1.
@@ -281,39 +302,45 @@ test10() {
 
 # Random write to sequential zones, libaio, queue depth 64, random block size.
 test11() {
-    local size
+    local size off capacity
 
     size=$((4 * zone_size))
+    off=$((first_sequential_zone_sector * 512))
+    capacity=$(total_zone_capacity 4 $off $dev)
     run_fio_on_seq "$(ioengine "libaio")" --iodepth=64 --rw=randwrite  \
                   --bsrange=4K-64K --do_verify=1 --verify=md5          \
                   --debug=zbd >>"${logfile}.${test_number}" 2>&1 || return $?
-    check_written $size || return $?
-    check_read $size || return $?
+    check_written $capacity || return $?
+    check_read $capacity || return $?
 }
 
 # Random write to sequential zones, libaio, queue depth 64, max 1 open zone.
 test12() {
-    local size
+    local size off capacity
 
     size=$((8 * zone_size))
+    off=$((first_sequential_zone_sector * 512))
+    capacity=$(total_zone_capacity 8 $off $dev)
     run_fio_on_seq "$(ioengine "libaio")" --iodepth=64 --rw=randwrite --bs=16K \
                   --max_open_zones=1 --size=$size --do_verify=1 --verify=md5 \
                   --debug=zbd >>"${logfile}.${test_number}" 2>&1 || return $?
-    check_written $size || return $?
-    check_read $size || return $?
+    check_written $capacity || return $?
+    check_read $capacity || return $?
 }
 
 # Random write to sequential zones, libaio, queue depth 64, max 4 open zones.
 test13() {
-    local size
+    local size off capacity
 
     size=$((8 * zone_size))
+    off=$((first_sequential_zone_sector * 512))
+    capacity=$(total_zone_capacity 8 $off $dev)
     run_fio_on_seq "$(ioengine "libaio")" --iodepth=64 --rw=randwrite --bs=16K \
                   --max_open_zones=4 --size=$size --do_verify=1 --verify=md5 \
                   --debug=zbd                                                \
                   >>"${logfile}.${test_number}" 2>&1 || return $?
-    check_written $size || return $?
-    check_read $size || return $?
+    check_written $capacity || return $?
+    check_read $capacity || return $?
 }
 
 # Random write to conventional zones.
@@ -337,41 +364,47 @@ test14() {
 # Sequential read on a mix of empty and full zones.
 test15() {
     local i off size
+    local w_off w_size w_capacity
 
     for ((i=0;i<4;i++)); do
        [ -n "$is_zbd" ] &&
            reset_zone "$dev" $((first_sequential_zone_sector +
                                 i*sectors_per_zone))
     done
-    off=$(((first_sequential_zone_sector + 2 * sectors_per_zone) * 512))
-    size=$((2 * zone_size))
-    run_one_fio_job "$(ioengine "psync")" --rw=write --bs=$((zone_size / 16))\
-                   --zonemode=zbd --zonesize="${zone_size}" --offset=$off \
-                   --size=$size >>"${logfile}.${test_number}" 2>&1 ||
-       return $?
-    check_written $size || return $?
+    w_off=$(((first_sequential_zone_sector + 2 * sectors_per_zone) * 512))
+    w_size=$((2 * zone_size))
+    w_capacity=$(total_zone_capacity 2 $w_off $dev)
     off=$((first_sequential_zone_sector * 512))
     size=$((4 * zone_size))
-    run_one_fio_job "$(ioengine "psync")" --rw=read --bs=$((zone_size / 16)) \
+    write_and_run_one_fio_job "${w_off}" "${w_size}" \
+                   "$(ioengine "psync")" --rw=read --bs=$((zone_size / 16)) \
                    --zonemode=zbd --zonesize="${zone_size}" --offset=$off \
                    --size=$((size)) >>"${logfile}.${test_number}" 2>&1 ||
        return $?
-    if [ -n "$is_zbd" ]; then
-       check_read $((size / 2))
-    else
-       check_read $size
-    fi
+    check_written $((w_capacity)) || return $?
+    check_read $((w_capacity))
 }
 
-# Random read on a mix of empty and full zones. Must be run after test15.
+# Random read on a mix of empty and full zones.
 test16() {
     local off size
+    local i w_off w_size w_capacity
 
+    for ((i=0;i<4;i++)); do
+       [ -n "$is_zbd" ] &&
+           reset_zone "$dev" $((first_sequential_zone_sector +
+                                i*sectors_per_zone))
+    done
+    w_off=$(((first_sequential_zone_sector + 2 * sectors_per_zone) * 512))
+    w_size=$((2 * zone_size))
+    w_capacity=$(total_zone_capacity 2 $w_off $dev)
     off=$((first_sequential_zone_sector * 512))
     size=$((4 * zone_size))
-    run_one_fio_job "$(ioengine "libaio")" --iodepth=64 --rw=randread --bs=16K \
+    write_and_run_one_fio_job "${w_off}" "${w_size}" \
+                   "$(ioengine "libaio")" --iodepth=64 --rw=randread --bs=16K \
                    --zonemode=zbd --zonesize="${zone_size}" --offset=$off \
                    --size=$size >>"${logfile}.${test_number}" 2>&1 || return $?
+    check_written $w_capacity || return $?
     check_read $size || return $?
 }
 
@@ -381,15 +414,9 @@ test17() {
 
     off=$(((disk_size / zone_size - 1) * zone_size))
     size=$((disk_size - off))
-    # Overwrite the last zone to avoid that reading from that zone fails.
     if [ -n "$is_zbd" ]; then
        reset_zone "$dev" $((off / 512)) || return $?
     fi
-    run_one_fio_job "$(ioengine "psync")" --rw=write --offset="$off"   \
-                   --zonemode=zbd --zonesize="${zone_size}"            \
-                   --bs="$zone_size" --size="$zone_size"               \
-                   >>"${logfile}.${test_number}" 2>&1 || return $?
-    check_written "$zone_size" || return $?
     run_one_fio_job "$(ioengine "libaio")" --iodepth=8 --rw=randrw --bs=4K \
                    --zonemode=zbd --zonesize="${zone_size}"            \
                    --offset=$off --loops=2 --norandommap=1\
@@ -441,13 +468,17 @@ test23() {
 
 test24() {
     local bs loops=9 size=$((zone_size))
+    local off capacity
+
+    off=$((first_sequential_zone_sector * 512))
+    capacity=$(total_zone_capacity 1 $off $dev)
 
     bs=$(min $((256*1024)) "$zone_size")
     run_fio_on_seq "$(ioengine "psync")" --rw=write --bs="$bs"         \
                   --size=$size --loops=$loops                          \
                   --zone_reset_frequency=.01 --zone_reset_threshold=.90 \
                   >> "${logfile}.${test_number}" 2>&1 || return $?
-    check_written $((size * loops)) || return $?
+    check_written $((capacity * loops)) || return $?
     check_reset_count -eq 8 ||
        check_reset_count -eq 9 ||
        check_reset_count -eq 10 || return $?
@@ -473,15 +504,19 @@ test25() {
 
 write_to_first_seq_zone() {
     local loops=4 r
+    local off capacity
+
+    off=$((first_sequential_zone_sector * 512))
+    capacity=$(total_zone_capacity 1 $off $dev)
 
     r=$(((RANDOM << 16) | RANDOM))
     run_fio --name="$dev" --filename="$dev" "$(ioengine "psync")" --rw="$1" \
            --thread=1 --do_verify=1 --verify=md5 --direct=1 --bs=4K    \
-           --offset=$((first_sequential_zone_sector * 512))            \
-           "--size=$zone_size" --loops=$loops --randseed="$r"          \
+           --offset=$off                                               \
+           --size=$zone_size --loops=$loops --randseed="$r"            \
            --zonemode=zbd --zonesize="${zone_size}" --group_reporting=1        \
            --gtod_reduce=1 >> "${logfile}.${test_number}" 2>&1 || return $?
-    check_written $((loops * zone_size)) || return $?
+    check_written $((loops * capacity)) || return $?
 }
 
 # Overwrite the first sequential zone four times sequentially.
@@ -501,15 +536,16 @@ test28() {
     off=$((first_sequential_zone_sector * 512 + 64 * zone_size))
     [ -n "$is_zbd" ] && reset_zone "$dev" $((off / 512))
     opts=("--debug=zbd")
+    capacity=$(total_zone_capacity 1 $off $dev)
     for ((i=0;i<jobs;i++)); do
        opts+=("--name=job$i" "--filename=$dev" "--offset=$off" "--bs=16K")
-       opts+=("--size=$zone_size" "$(ioengine "psync")" "--rw=randwrite")
+       opts+=("--size=$zone_size" "--io_size=$capacity" "$(ioengine "psync")" "--rw=randwrite")
        opts+=("--thread=1" "--direct=1" "--zonemode=zbd")
        opts+=("--zonesize=${zone_size}" "--group_reporting=1")
        opts+=(${var_opts[@]})
     done
     run_fio "${opts[@]}" >> "${logfile}.${test_number}" 2>&1 || return $?
-    check_written $((jobs * zone_size)) || return $?
+    check_written $((jobs * $capacity)) || return $?
     check_reset_count -eq $jobs ||
        check_reset_count -eq $((jobs - 1)) ||
        return $?
@@ -598,10 +634,13 @@ test32() {
 # zone size.
 test33() {
     local bs io_size size
+    local off capacity=0;
 
+    off=$((first_sequential_zone_sector * 512))
+    capacity=$(total_zone_capacity 1 $off $dev)
     size=$((2 * zone_size))
-    io_size=$((5 * zone_size))
-    bs=$((3 * zone_size / 4))
+    io_size=$((5 * capacity))
+    bs=$((3 * capacity / 4))
     run_fio_on_seq "$(ioengine "psync")" --iodepth=1 --rw=write        \
                   --size=$size --io_size=$io_size --bs=$bs     \
                   >> "${logfile}.${test_number}" 2>&1 || return $?
@@ -650,8 +689,9 @@ test36() {
 
 # Test 3/4 for the I/O boundary rounding code: $size > $zone_size.
 test37() {
-    local bs off size
+    local bs off size capacity
 
+    capacity=$(total_zone_capacity 1 $first_sequential_zone_sector $dev)
     if [ "$first_sequential_zone_sector" = 0 ]; then
        off=0
     else
@@ -663,7 +703,7 @@ test37() {
                    --iodepth=1 --rw=write --do_verify=1 --verify=md5   \
                    --bs=$bs --zonemode=zbd --zonesize="${zone_size}"   \
                    >> "${logfile}.${test_number}" 2>&1
-    check_written $((zone_size)) || return $?
+    check_written $capacity || return $?
 }
 
 # Test 4/4 for the I/O boundary rounding code: $offset > $disk_size - $zone_size
@@ -763,10 +803,8 @@ test46() {
 test47() {
     local bs
 
-    [ -z "$is_zbd" ] && return 0
     bs=$((logical_block_size))
-    run_one_fio_job "$(ioengine "psync")" --rw=write --bs=$bs \
-                   --zonemode=zbd --zoneskip=1          \
+    run_fio_on_seq "$(ioengine "psync")" --rw=write --bs=$bs --zoneskip=1 \
                    >> "${logfile}.${test_number}" 2>&1 && return 1
     grep -q 'zoneskip 1 is not a multiple of the device zone size' "${logfile}.${test_number}"
 }
@@ -801,6 +839,27 @@ test48() {
            >> "${logfile}.${test_number}" 2>&1 || return $?
 }
 
+# Check if fio handles --zonecapacity on a normal block device correctly
+test49() {
+
+    if [ -n "$is_zbd" ]; then
+       echo "$dev is not a regular block device" \
+            >>"${logfile}.${test_number}"
+       return 0
+    fi
+
+    size=$((2 * zone_size))
+    capacity=$((zone_size * 3 / 4))
+
+    run_one_fio_job "$(ioengine "psync")" --rw=write \
+                   --zonemode=zbd --zonesize="${zone_size}" \
+                   --zonecapacity=${capacity} \
+                   --verify=md5  --size=${size} >>"${logfile}.${test_number}" 2>&1 ||
+       return $?
+    check_written $((capacity * 2)) || return $?
+    check_read $((capacity * 2)) || return $?
+}
+
 tests=()
 dynamic_analyzer=()
 reset_all_zones=
@@ -855,6 +914,9 @@ if [[ -b "$realdev" ]]; then
        case "$(<"/sys/class/block/$basename/queue/zoned")" in
        host-managed|host-aware)
                is_zbd=true
+               if ! check_blkzone "${dev}"; then
+                       exit 1
+               fi
                if ! result=($(first_sequential_zone "$dev")); then
                        echo "Failed to determine first sequential zone"
                        exit 1
index 968ea0abc0b15a91bbc55fa4d04a231f5e3a3474..3fe48ecc5705b8368d9edefd84d2d603b15547ea 100644 (file)
@@ -193,6 +193,7 @@ struct thread_options {
        unsigned int loops;
        unsigned long long zone_range;
        unsigned long long zone_size;
+       unsigned long long zone_capacity;
        unsigned long long zone_skip;
        enum fio_zone_mode zone_mode;
        unsigned long long lockmem;
@@ -487,6 +488,7 @@ struct thread_options_pack {
        uint32_t loops;
        uint64_t zone_range;
        uint64_t zone_size;
+       uint64_t zone_capacity;
        uint64_t zone_skip;
        uint64_t lockmem;
        uint32_t mem_type;
index 8872206e5b5b87d4e5b4b062669da54a21f94aee..e4558788150d2cadfa9706a24a16cb75c5994246 100755 (executable)
@@ -123,10 +123,10 @@ plot () {
 # plot <sub title> <file name tag> <y axis label> <y axis scale>
 #
 
-plot "I/O Latency" lat "Time (msec)" 1000
+plot "I/O Latency" lat "Time (msec)" 1000000
 plot "I/O Operations Per Second" iops "IOPS" 1
-plot "I/O Submission Latency" slat "Time (μsec)" 1
-plot "I/O Completion Latency" clat "Time (msec)" 1000
+plot "I/O Submission Latency" slat "Time (μsec)" 1000
+plot "I/O Completion Latency" clat "Time (msec)" 1000000
 plot "I/O Bandwidth" bw "Throughput (KB/s)" 1
 
 
diff --git a/unittests/lib/num2str.c b/unittests/lib/num2str.c
new file mode 100644 (file)
index 0000000..a3492a8
--- /dev/null
@@ -0,0 +1,53 @@
+#include <limits.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include "../../compiler/compiler.h"
+#include "../../lib/num2str.h"
+#include "../unittest.h"
+
+struct testcase {
+       uint64_t num;
+       int maxlen;
+       int base;
+       int pow2;
+       enum n2s_unit unit;
+       const char *expected;
+};
+
+static const struct testcase testcases[] = {
+       { 1, 1, 1, 0, N2S_NONE, "1" },
+       { UINT64_MAX, 99, 1, 0, N2S_NONE, "18446744073709551615" },
+       { 18446744073709551, 2, 1, 0, N2S_NONE, "18P" },
+       { 18446744073709551, 4, 1, 0, N2S_NONE, "18.4P" },
+       { UINT64_MAX, 2, 1, 0, N2S_NONE, "18E" },
+       { UINT64_MAX, 4, 1, 0, N2S_NONE, "18.4E" },
+};
+
+static void test_num2str(void)
+{
+       const struct testcase *p;
+       char *str;
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(testcases); ++i) {
+               p = &testcases[i];
+               str = num2str(p->num, p->maxlen, p->base, p->pow2, p->unit);
+               CU_ASSERT_STRING_EQUAL(str, p->expected);
+               free(str);
+       }
+}
+
+static struct fio_unittest_entry tests[] = {
+       {
+               .name   = "num2str/1",
+               .fn     = test_num2str,
+       },
+       {
+               .name   = NULL,
+       },
+};
+
+CU_ErrorCode fio_unittest_lib_num2str(void)
+{
+       return fio_unittest_add_suite("lib/num2str.c", NULL, NULL, tests);
+}
index c37e1971a0518544c2cdc136b5e276f387b16d4e..f490b4852b2f0a0a5e7ebbb62be33f2039573fcc 100644 (file)
@@ -48,6 +48,7 @@ int main(void)
        }
 
        fio_unittest_register(fio_unittest_lib_memalign);
+       fio_unittest_register(fio_unittest_lib_num2str);
        fio_unittest_register(fio_unittest_lib_strntol);
        fio_unittest_register(fio_unittest_oslib_strlcat);
        fio_unittest_register(fio_unittest_oslib_strndup);
index 786c1c97ec8f149b8b43c4cd7b6dbe017966230c..ecb7d12415209464da4a6d13b488233108eb25d6 100644 (file)
@@ -15,6 +15,7 @@ CU_ErrorCode fio_unittest_add_suite(const char*, CU_InitializeFunc,
        CU_CleanupFunc, struct fio_unittest_entry*);
 
 CU_ErrorCode fio_unittest_lib_memalign(void);
+CU_ErrorCode fio_unittest_lib_num2str(void);
 CU_ErrorCode fio_unittest_lib_strntol(void);
 CU_ErrorCode fio_unittest_oslib_strlcat(void);
 CU_ErrorCode fio_unittest_oslib_strndup(void);
index b7fa6693068d851f2ffb4c48a3044cfe4eb11f34..5ee0029d130e2f891d48cbd6c1ee01203085a6c3 100644 (file)
--- a/verify.c
+++ b/verify.c
@@ -8,6 +8,7 @@
 #include <pthread.h>
 #include <libgen.h>
 
+#include "arch/arch.h"
 #include "fio.h"
 #include "verify.h"
 #include "trim.h"
@@ -1309,8 +1310,7 @@ int get_next_verify(struct thread_data *td, struct io_u *io_u)
                /*
                 * Ensure that the associated IO has completed
                 */
-               read_barrier();
-               if (ipo->flags & IP_F_IN_FLIGHT)
+               if (atomic_load_acquire(&ipo->flags) & IP_F_IN_FLIGHT)
                        goto nothing;
 
                rb_erase(n, &td->io_hist_tree);
@@ -1322,8 +1322,7 @@ int get_next_verify(struct thread_data *td, struct io_u *io_u)
                /*
                 * Ensure that the associated IO has completed
                 */
-               read_barrier();
-               if (ipo->flags & IP_F_IN_FLIGHT)
+               if (atomic_load_acquire(&ipo->flags) & IP_F_IN_FLIGHT)
                        goto nothing;
 
                flist_del(&ipo->list);
index b59595124913338bc59dc20fb6a036c895b4f1f1..9e6c41ff2f399172703b6e438061236670962df8 100644 (file)
@@ -85,15 +85,14 @@ static bool all_sw_idle(struct workqueue *wq)
  */
 void workqueue_flush(struct workqueue *wq)
 {
+       pthread_mutex_lock(&wq->flush_lock);
        wq->wake_idle = 1;
 
-       while (!all_sw_idle(wq)) {
-               pthread_mutex_lock(&wq->flush_lock);
+       while (!all_sw_idle(wq))
                pthread_cond_wait(&wq->flush_cond, &wq->flush_lock);
-               pthread_mutex_unlock(&wq->flush_lock);
-       }
 
        wq->wake_idle = 0;
+       pthread_mutex_unlock(&wq->flush_lock);
 }
 
 /*
@@ -159,12 +158,10 @@ static void *worker_thread(void *data)
        if (sw->flags & SW_F_ERROR)
                goto done;
 
+       pthread_mutex_lock(&sw->lock);
        while (1) {
-               pthread_mutex_lock(&sw->lock);
-
                if (flist_empty(&sw->work_list)) {
                        if (sw->flags & SW_F_EXIT) {
-                               pthread_mutex_unlock(&sw->lock);
                                break;
                        }
 
@@ -173,34 +170,41 @@ static void *worker_thread(void *data)
                                workqueue_pre_sleep(sw);
                                pthread_mutex_lock(&sw->lock);
                        }
-
-                       /*
-                        * We dropped and reaquired the lock, check
-                        * state again.
-                        */
-                       if (!flist_empty(&sw->work_list))
-                               goto handle_work;
-
+               }
+               /*
+                * We may have dropped and reaquired the lock, check state
+                * again.
+                */
+               if (flist_empty(&sw->work_list)) {
                        if (sw->flags & SW_F_EXIT) {
-                               pthread_mutex_unlock(&sw->lock);
                                break;
-                       } else if (!(sw->flags & SW_F_IDLE)) {
+                       }
+                       if (!(sw->flags & SW_F_IDLE)) {
                                sw->flags |= SW_F_IDLE;
                                wq->next_free_worker = sw->index;
+                               pthread_mutex_unlock(&sw->lock);
+                               pthread_mutex_lock(&wq->flush_lock);
                                if (wq->wake_idle)
                                        pthread_cond_signal(&wq->flush_cond);
+                               pthread_mutex_unlock(&wq->flush_lock);
+                               pthread_mutex_lock(&sw->lock);
+                       }
+               }
+               if (flist_empty(&sw->work_list)) {
+                       if (sw->flags & SW_F_EXIT) {
+                               break;
                        }
-
                        pthread_cond_wait(&sw->cond, &sw->lock);
                } else {
-handle_work:
                        flist_splice_init(&sw->work_list, &local_list);
                }
                pthread_mutex_unlock(&sw->lock);
                handle_list(sw, &local_list);
                if (wq->ops.update_acct_fn)
                        wq->ops.update_acct_fn(sw);
+               pthread_mutex_lock(&sw->lock);
        }
+       pthread_mutex_unlock(&sw->lock);
 
 done:
        sk_out_drop();
@@ -336,11 +340,11 @@ int workqueue_init(struct thread_data *td, struct workqueue *wq,
         * Wait for them all to be started and initialized
         */
        error = 0;
+       pthread_mutex_lock(&wq->flush_lock);
        do {
                struct submit_worker *sw;
 
                running = 0;
-               pthread_mutex_lock(&wq->flush_lock);
                for (i = 0; i < wq->max_workers; i++) {
                        sw = &wq->workers[i];
                        pthread_mutex_lock(&sw->lock);
@@ -351,14 +355,12 @@ int workqueue_init(struct thread_data *td, struct workqueue *wq,
                        pthread_mutex_unlock(&sw->lock);
                }
 
-               if (error || running == wq->max_workers) {
-                       pthread_mutex_unlock(&wq->flush_lock);
+               if (error || running == wq->max_workers)
                        break;
-               }
 
                pthread_cond_wait(&wq->flush_cond, &wq->flush_lock);
-               pthread_mutex_unlock(&wq->flush_lock);
        } while (1);
+       pthread_mutex_unlock(&wq->flush_lock);
 
        if (!error)
                return 0;
diff --git a/zbd.c b/zbd.c
index 8cf8f81213ebfcd8083a2f3602932deec9c276cc..3eac5df3bef7313261cc864f04fd8db408514fa7 100644 (file)
--- a/zbd.c
+++ b/zbd.c
@@ -140,6 +140,24 @@ static inline bool zbd_zone_swr(struct fio_zone_info *z)
        return z->type == ZBD_ZONE_TYPE_SWR;
 }
 
+/**
+ * zbd_zone_end - Return zone end location
+ * @z: zone info pointer.
+ */
+static inline uint64_t zbd_zone_end(const struct fio_zone_info *z)
+{
+       return (z+1)->start;
+}
+
+/**
+ * zbd_zone_capacity_end - Return zone capacity limit end location
+ * @z: zone info pointer.
+ */
+static inline uint64_t zbd_zone_capacity_end(const struct fio_zone_info *z)
+{
+       return z->start + z->capacity;
+}
+
 /**
  * zbd_zone_full - verify whether a minimum number of bytes remain in a zone
  * @f: file pointer.
@@ -154,7 +172,7 @@ static bool zbd_zone_full(const struct fio_file *f, struct fio_zone_info *z,
        assert((required & 511) == 0);
 
        return zbd_zone_swr(z) &&
-               z->wp + required > z->start + f->zbd_info->zone_size;
+               z->wp + required > zbd_zone_capacity_end(z);
 }
 
 static void zone_lock(struct thread_data *td, struct fio_file *f, struct fio_zone_info *z)
@@ -271,7 +289,7 @@ static bool zbd_verify_sizes(void)
                        z = &f->zbd_info->zone_info[zone_idx];
                        if ((f->file_offset != z->start) &&
                            (td->o.td_ddir != TD_DDIR_READ)) {
-                               new_offset = (z+1)->start;
+                               new_offset = zbd_zone_end(z);
                                if (new_offset >= f->file_offset + f->io_size) {
                                        log_info("%s: io_size must be at least one zone\n",
                                                 f->file_name);
@@ -353,6 +371,7 @@ static int init_zone_info(struct thread_data *td, struct fio_file *f)
        uint32_t nr_zones;
        struct fio_zone_info *p;
        uint64_t zone_size = td->o.zone_size;
+       uint64_t zone_capacity = td->o.zone_capacity;
        struct zoned_block_device_info *zbd_info = NULL;
        int i;
 
@@ -368,6 +387,16 @@ static int init_zone_info(struct thread_data *td, struct fio_file *f)
                return 1;
        }
 
+       if (zone_capacity == 0)
+               zone_capacity = zone_size;
+
+       if (zone_capacity > zone_size) {
+               log_err("%s: job parameter zonecapacity %llu is larger than zone size %llu\n",
+                       f->file_name, (unsigned long long) td->o.zone_capacity,
+                       (unsigned long long) td->o.zone_size);
+               return 1;
+       }
+
        nr_zones = (f->real_file_size + zone_size - 1) / zone_size;
        zbd_info = scalloc(1, sizeof(*zbd_info) +
                           (nr_zones + 1) * sizeof(zbd_info->zone_info[0]));
@@ -381,9 +410,10 @@ static int init_zone_info(struct thread_data *td, struct fio_file *f)
                mutex_init_pshared_with_type(&p->mutex,
                                             PTHREAD_MUTEX_RECURSIVE);
                p->start = i * zone_size;
-               p->wp = p->start + zone_size;
+               p->wp = p->start;
                p->type = ZBD_ZONE_TYPE_SWR;
                p->cond = ZBD_ZONE_COND_EMPTY;
+               p->capacity = zone_capacity;
        }
        /* a sentinel */
        p->start = nr_zones * zone_size;
@@ -456,10 +486,11 @@ static int parse_zone_info(struct thread_data *td, struct fio_file *f)
                        mutex_init_pshared_with_type(&p->mutex,
                                                     PTHREAD_MUTEX_RECURSIVE);
                        p->start = z->start;
+                       p->capacity = z->capacity;
                        switch (z->cond) {
                        case ZBD_ZONE_COND_NOT_WP:
                        case ZBD_ZONE_COND_FULL:
-                               p->wp = p->start + zone_size;
+                               p->wp = p->start + p->capacity;
                                break;
                        default:
                                assert(z->start <= z->wp);
@@ -707,7 +738,7 @@ static int zbd_reset_zone(struct thread_data *td, struct fio_file *f,
        dprint(FD_ZBD, "%s: resetting wp of zone %u.\n", f->file_name,
                zbd_zone_nr(f->zbd_info, z));
 
-       return zbd_reset_range(td, f, z->start, (z+1)->start - z->start);
+       return zbd_reset_range(td, f, z->start, zbd_zone_end(z) - z->start);
 }
 
 /* The caller must hold f->zbd_info->mutex */
@@ -1068,7 +1099,7 @@ found_candidate_zone:
        /* Both z->mutex and f->zbd_info->mutex are held. */
 
 examine_zone:
-       if (z->wp + min_bs <= (z+1)->start) {
+       if (z->wp + min_bs <= zbd_zone_capacity_end(z)) {
                pthread_mutex_unlock(&f->zbd_info->mutex);
                goto out;
        }
@@ -1112,7 +1143,7 @@ examine_zone:
                z = &f->zbd_info->zone_info[zone_idx];
 
                zone_lock(td, f, z);
-               if (z->wp + min_bs <= (z+1)->start)
+               if (z->wp + min_bs <= zbd_zone_capacity_end(z))
                        goto out;
                pthread_mutex_lock(&f->zbd_info->mutex);
        }
@@ -1143,9 +1174,9 @@ static struct fio_zone_info *zbd_replay_write_order(struct thread_data *td,
                assert(z);
        }
 
-       if (z->verify_block * min_bs >= f->zbd_info->zone_size)
+       if (z->verify_block * min_bs >= z->capacity)
                log_err("%s: %d * %d >= %llu\n", f->file_name, z->verify_block,
-                       min_bs, (unsigned long long) f->zbd_info->zone_size);
+                       min_bs, (unsigned long long)z->capacity);
        io_u->offset = z->start + z->verify_block++ * min_bs;
        return z;
 }
@@ -1231,7 +1262,7 @@ static void zbd_queue_io(struct io_u *io_u, int q, bool success)
        switch (io_u->ddir) {
        case DDIR_WRITE:
                zone_end = min((uint64_t)(io_u->offset + io_u->buflen),
-                              (z + 1)->start);
+                              zbd_zone_capacity_end(z));
                pthread_mutex_lock(&zbd_info->mutex);
                /*
                 * z->wp > zone_end means that one or more I/O errors
@@ -1327,6 +1358,28 @@ void setup_zbd_zone_mode(struct thread_data *td, struct io_u *io_u)
        assert(td->o.zone_mode == ZONE_MODE_ZBD);
        assert(td->o.zone_size);
 
+       zone_idx = zbd_zone_idx(f, f->last_pos[ddir]);
+       z = &f->zbd_info->zone_info[zone_idx];
+
+       /*
+        * When the zone capacity is smaller than the zone size and the I/O is
+        * sequential write, skip to zone end if the latest position is at the
+        * zone capacity limit.
+        */
+       if (z->capacity < f->zbd_info->zone_size && !td_random(td) &&
+           ddir == DDIR_WRITE &&
+           f->last_pos[ddir] >= zbd_zone_capacity_end(z)) {
+               dprint(FD_ZBD,
+                      "%s: Jump from zone capacity limit to zone end:"
+                      " (%llu -> %llu) for zone %u (%llu)\n",
+                      f->file_name, (unsigned long long) f->last_pos[ddir],
+                      (unsigned long long) zbd_zone_end(z),
+                      zbd_zone_nr(f->zbd_info, z),
+                      (unsigned long long) z->capacity);
+               td->io_skip_bytes += zbd_zone_end(z) - f->last_pos[ddir];
+               f->last_pos[ddir] = zbd_zone_end(z);
+       }
+
        /*
         * zone_skip is valid only for sequential workloads.
         */
@@ -1340,11 +1393,8 @@ void setup_zbd_zone_mode(struct thread_data *td, struct io_u *io_u)
         * - For reads with td->o.read_beyond_wp == false, the last position
         *   reached the zone write pointer.
         */
-       zone_idx = zbd_zone_idx(f, f->last_pos[ddir]);
-       z = &f->zbd_info->zone_info[zone_idx];
-
        if (td->zone_bytes >= td->o.zone_size ||
-           f->last_pos[ddir] >= (z+1)->start ||
+           f->last_pos[ddir] >= zbd_zone_end(z) ||
            (ddir == DDIR_READ &&
             (!td->o.read_beyond_wp) && f->last_pos[ddir] >= z->wp)) {
                /*
@@ -1530,6 +1580,13 @@ enum io_u_action zbd_adjust_block(struct thread_data *td, struct io_u *io_u)
                        zb->reset_zone = 0;
                        if (zbd_reset_zone(td, f, zb) < 0)
                                goto eof;
+
+                       if (zb->capacity < min_bs) {
+                               log_err("zone capacity %llu smaller than minimum block size %d\n",
+                                       (unsigned long long)zb->capacity,
+                                       min_bs);
+                               goto eof;
+                       }
                }
                /* Make writes occur at the write pointer */
                assert(!zbd_zone_full(f, zb, min_bs));
@@ -1545,7 +1602,7 @@ enum io_u_action zbd_adjust_block(struct thread_data *td, struct io_u *io_u)
                 * small.
                 */
                new_len = min((unsigned long long)io_u->buflen,
-                             (zb + 1)->start - io_u->offset);
+                             zbd_zone_capacity_end(zb) - io_u->offset);
                new_len = new_len / min_bs * min_bs;
                if (new_len == io_u->buflen)
                        goto accept;
@@ -1556,7 +1613,7 @@ enum io_u_action zbd_adjust_block(struct thread_data *td, struct io_u *io_u)
                        goto accept;
                }
                log_err("Zone remainder %lld smaller than minimum block size %d\n",
-                       ((zb + 1)->start - io_u->offset),
+                       (zbd_zone_capacity_end(zb) - io_u->offset),
                        min_bs);
                goto eof;
        case DDIR_TRIM:
diff --git a/zbd.h b/zbd.h
index e942a7f61b0ce21e39e14c77ecafccd9f35ce712..021174c110ed459fea763338be666619dfd1965a 100644 (file)
--- a/zbd.h
+++ b/zbd.h
@@ -23,6 +23,7 @@ enum io_u_action {
  * struct fio_zone_info - information about a single ZBD zone
  * @start: zone start location (bytes)
  * @wp: zone write pointer location (bytes)
+ * @capacity: maximum size usable from the start of a zone (bytes)
  * @verify_block: number of blocks that have been verified for this zone
  * @mutex: protects the modifiable members in this structure
  * @type: zone type (BLK_ZONE_TYPE_*)
@@ -35,6 +36,7 @@ struct fio_zone_info {
        pthread_mutex_t         mutex;
        uint64_t                start;
        uint64_t                wp;
+       uint64_t                capacity;
        uint32_t                verify_block;
        enum zbd_zone_type      type:2;
        enum zbd_zone_cond      cond:4;
index d63c0d0a7262978db87315a1e7daea1461603c5a..5ed41aa06c8ac371e264b9fd62af3a33a848bf9f 100644 (file)
@@ -50,6 +50,7 @@ struct zbd_zone {
        uint64_t                start;
        uint64_t                wp;
        uint64_t                len;
+       uint64_t                capacity;
        enum zbd_zone_type      type;
        enum zbd_zone_cond      cond;
 };