Merge branch 'atomic-writes' master
authorJens Axboe <axboe@kernel.dk>
Tue, 17 Sep 2024 02:23:06 +0000 (20:23 -0600)
committerJens Axboe <axboe@kernel.dk>
Tue, 17 Sep 2024 02:23:06 +0000 (20:23 -0600)
Merge the atomc write support.

* atomic-writes:
  examples: Add example for atomic write verify
  fio: Support verify_write_sequence
  doc: Document atomic command
  tools/fiograph: Update for atomic support
  io_uring: Support RWF_ATOMIC
  libaio: Support RWF_ATOMIC
  pvsync2: Support RWF_ATOMIC
  os: Reintroduce atomic write support
  os-linux: Define RWF_ATOMIC

74 files changed:
.github/workflows/ci.yml
HOWTO.rst
Makefile
README.rst
cconv.c
ci/actions-build.sh
ci/actions-full-test.sh
ci/actions-install-librpma.sh [deleted file]
ci/actions-install.sh
client.c
configure
dataplacement.c
dataplacement.h
engines/e4defrag.c
engines/fileoperations.c
engines/io_uring.c
engines/libaio.c
engines/librpma_apm.c [deleted file]
engines/librpma_fio.c [deleted file]
engines/librpma_fio.h [deleted file]
engines/librpma_fio_pmem.h [deleted file]
engines/librpma_fio_pmem2.h [deleted file]
engines/librpma_gpspm.c [deleted file]
engines/librpma_gpspm_flush.pb-c.c [deleted file]
engines/librpma_gpspm_flush.pb-c.h [deleted file]
engines/librpma_gpspm_flush.proto [deleted file]
engines/nfs.c
engines/nvme.c
engines/nvme.h
engines/sg.c
engines/sync.c
engines/xnvme.c
examples/atomic-verify.fio [new file with mode: 0644]
examples/http-s3.fio
examples/librpma_apm-client.fio [deleted file]
examples/librpma_apm-client.png [deleted file]
examples/librpma_apm-server.fio [deleted file]
examples/librpma_apm-server.png [deleted file]
examples/librpma_gpspm-client.fio [deleted file]
examples/librpma_gpspm-client.png [deleted file]
examples/librpma_gpspm-server.fio [deleted file]
examples/librpma_gpspm-server.png [deleted file]
fio.1
fio.h
helper_thread.c
init.c
io_u.c
ioengines.c
ioengines.h
iolog.c
iolog.h
libfio.c
optgroup.c
optgroup.h
options.c
os/os-linux.h
os/os-qnx.h [new file with mode: 0755]
os/os.h
os/windows/posix.c
server.c
server.h
smalloc.c
stat.c
stat.h
t/jobs/t0033.fio [new file with mode: 0644]
t/jobs/t0034.fio [new file with mode: 0644]
t/jobs/t0035.fio [new file with mode: 0644]
t/nvmept.py
t/nvmept_fdp.py
t/run-fio-tests.py
t/stest.c
thread_options.h
tools/fiograph/fiograph.conf
verify.c

index e53082c302ecf6ffe41f94292265ab39c00a47b2..6fbba591f25e99da845ca0b408f509cbbb4c6f44 100644 (file)
@@ -5,6 +5,41 @@ on:
   pull_request:
 
 jobs:
+  build-containers:
+    runs-on: ubuntu-latest
+
+    strategy:
+      fail-fast: false
+      matrix:
+        container:
+        - {os: 'debian', dh: 'debian', ver: 'bookworm', target_arch: 'x86_64'}
+        - {os: 'fedora', dh: 'fedora', ver: '40', target_arch: 'x86_64'}
+        - {os: 'alma', dh: 'almalinux', ver: '9', target_arch: 'x86_64'}
+        - {os: 'oracle', dh: 'oraclelinux', ver: '9', target_arch: 'x86_64'}
+        - {os: 'rocky', dh: 'rockylinux', ver: '9', target_arch: 'x86_64'}
+        - {os: 'ubuntu', dh: 'ubuntu', ver: 'noble', target_arch: 'i686'}
+        - {os: 'ubuntu', dh: 'ubuntu', ver: 'noble', target_arch: 'x86_64'}
+
+    container:
+      image: ${{ matrix.container.dh }}:${{ matrix.container.ver }}
+      env:
+        CI_TARGET_BUILD: Linux
+        CI_TARGET_ARCH: ${{ matrix.container.target_arch }}
+        CI_TARGET_OS: ${{ matrix.container.os }}
+        CI_TARGET_OS_VER: ${{ matrix.container.ver }}
+
+    steps:
+    - name: Checkout repo
+      uses: actions/checkout@v4
+    - name: Install dependencies
+      run:  ./ci/actions-install.sh
+    - name: Build
+      run:  ./ci/actions-build.sh
+    - name: Smoke test
+      run:  ./ci/actions-smoke-test.sh
+    - name: Full test
+      run:  ./ci/actions-full-test.sh
+
   build:
     runs-on: ${{ matrix.os }}
     strategy:
@@ -95,6 +130,9 @@ jobs:
           mingw-w64-${{matrix.arch}}-python-statsmodels
           mingw-w64-${{matrix.arch}}-python-sphinx
 
+    - name: install bash 4 (macOS)
+      if: ${{ contains( matrix.build, 'macOS' ) }}
+      run:  HOMEBREW_NO_AUTO_UPDATE=1 brew install bash
     - name: Install dependencies
       run: ${{matrix.shell}} ./ci/actions-install.sh
       if: ${{ !contains( matrix.build, 'msys2' ) }}
index 3b262faeae7a7d340c816aaef21b97e129241cc0..4f071484bdc3fa7b02cc4dd51f87ec3ef97a601d 100644 (file)
--- a/HOWTO.rst
+++ b/HOWTO.rst
@@ -805,6 +805,11 @@ Target file/device
        Note: Windows and FreeBSD (refer to geom(4)) prevent write access to areas
        of the disk containing in-use data (e.g. filesystems).
 
+       For HTTP and S3 access, specify a valid URL path or S3 key, respectively.
+       A filename for path-style S3 includes a bucket name (:file:`/bucket/k/e.y`)
+       while a virtual-hosted-style S3 filename :file:`/k/e.y` does not because 
+       its bucket name is specified in :option:`http_host`.
+
        The filename "`-`" is a reserved name, meaning *stdin* or *stdout*.  Which
        of the two depends on the read/write direction set.
 
@@ -2496,6 +2501,20 @@ with the caveat that when used on the command line, they must come after the
        For direct I/O, requests will only succeed if cache invalidation isn't required,
        file blocks are fully allocated and the disk request could be issued immediately.
 
+.. option:: atomic=bool : [pvsync2] [libaio] [io_uring]
+
+       This option means that writes are issued with torn-write protection, meaning
+       that for a power fail or kernel crash, all or none of the data from the write
+       will be stored, but never a mix of old and new data. Torn-write protection is
+       also known as atomic writes.
+
+       This option sets the RWF_ATOMIC flag (supported from the 6.11 Linux kernel) on
+       a per-IO basis.
+
+       Writes with RWF_ATOMIC set will be rejected by the kernel when the file does
+       not support torn-write protection. To learn a file's torn-write limits, issue
+       statx with STATX_WRITE_ATOMIC.
+
 .. option:: fdp=bool : [io_uring_cmd] [xnvme]
 
        Enable Flexible Data Placement mode for write commands.
@@ -2538,12 +2557,17 @@ with the caveat that when used on the command line, they must come after the
 
 .. option:: plids=str, fdp_pli=str : [io_uring_cmd] [xnvme]
 
-        Select which Placement IDs (streams) or Placement ID Indices (FDP) this
-        job is allowed to use for writes. For FDP by default, the job will
-        cycle through all available Placement IDs, so use this to isolate these
-        identifiers to specific jobs. If you want fio to use FDP placement
-        identifiers only at indices 0, 2 and 5 specify ``plids=0,2,5``. For
-        streams this should be a comma-separated list of Stream IDs.
+        Select which Placement ID Indices (FDP) or Placement IDs (streams) this
+        job is allowed to use for writes. This option accepts a comma-separated
+        list of values or ranges (e.g., 1,2-4,5,6-8).
+
+        For FDP by default, the job will cycle through all available Placement
+        IDs, so use this option to be selective. The values specified here are
+        array indices for the list of placement IDs returned by the nvme-cli
+        command ``nvme fdp status``. If you want fio to use FDP placement
+        identifiers only at indices 0, 2 and 5, set ``plids=0,2,5``.
+
+        For streams this should be a list of Stream IDs.
 
 .. option:: dp_scheme=str : [io_uring_cmd] [xnvme]
 
@@ -2657,7 +2681,7 @@ with the caveat that when used on the command line, they must come after the
                this will be the starting port number since fio will use a range of
                ports.
 
-   [rdma], [librpma_*]
+   [rdma]
 
                The port to use for RDMA-CM communication. This should be the same value
                on the client and the server side.
@@ -2668,20 +2692,6 @@ with the caveat that when used on the command line, they must come after the
        is a TCP listener or UDP reader, the hostname is not used and must be omitted
        unless it is a valid UDP multicast address.
 
-.. option:: serverip=str : [librpma_*]
-
-       The IP address to be used for RDMA-CM based I/O.
-
-.. option:: direct_write_to_pmem=bool : [librpma_*]
-
-       Set to 1 only when Direct Write to PMem from the remote host is possible.
-       Otherwise, set to 0.
-
-.. option:: busy_wait_polling=bool : [librpma_*_server]
-
-       Set to 0 to wait for completion instead of busy-wait polling completion.
-       Default: 1.
-
 .. option:: interface=str : [netsplice] [net]
 
        The IP address of the network interface used to send or receive UDP
@@ -2847,16 +2857,32 @@ with the caveat that when used on the command line, they must come after the
        Specify stat system call type to measure lookup/getattr performance.
        Default is **stat** for :manpage:`stat(2)`.
 
-.. option:: readfua=bool : [sg]
+.. option:: readfua=bool : [sg] [io_uring_cmd]
 
        With readfua option set to 1, read operations include
        the force unit access (fua) flag. Default is 0.
 
-.. option:: writefua=bool : [sg]
+.. option:: writefua=bool : [sg] [io_uring_cmd]
 
        With writefua option set to 1, write operations include
        the force unit access (fua) flag. Default is 0.
 
+.. option:: write_mode=str : [io_uring_cmd]
+
+        Specifies the type of write operation.  Defaults to 'write'.
+
+                **write**
+                        Use Write commands for write operations
+
+                **uncor**
+                        Use Write Uncorrectable commands for write operations
+
+                **zeroes**
+                        Use Write Zeroes commands for write operations
+
+                **verify**
+                        Use Verify commands for write operations
+
 .. option:: sg_write_mode=str : [sg]
 
        Specify the type of write commands to issue. This option can take ten values:
@@ -2913,8 +2939,13 @@ with the caveat that when used on the command line, they must come after the
 
 .. option:: http_host=str : [http]
 
-       Hostname to connect to. For S3, this could be the bucket hostname.
-       Default is **localhost**
+       Hostname to connect to. HTTP port 80 is used automatically when the value of 
+       the https parameter is *off*, and HTTPS port 443 if it is *on*. A 
+       virtual-hosted-style S3 hostname starts with a bucket name, while a 
+       path-style S3 hostname does not. See 
+       https://docs.aws.amazon.com/AmazonS3/latest/userguide/VirtualHosting.html for 
+       detailed examples.
+       Default is **localhost** (path-style S3 hostname)
 
 .. option:: http_user=str : [http]
 
@@ -3971,6 +4002,17 @@ Verification
         instead resets the file after the write phase and then replays I/Os for
         the verification phase.
 
+.. option:: verify_write_sequence=bool
+
+        Verify the header write sequence number. In a scenario with multiple jobs,
+        verification of the write sequence number may fail. Disabling this option
+        will mean that write sequence number checking is skipped. Doing that can be
+        useful for testing atomic writes, as it means that checksum verification can
+        still be attempted. For when :option:`atomic` is enabled, checksum
+        verification is expected to succeed (while write sequence checking can still
+        fail).
+        Defaults to true.
+
 .. option:: trim_percentage=int
 
        Number of verify blocks to discard/trim.
@@ -4206,6 +4248,21 @@ Measurements and reporting
        entry as well as the other data values. Defaults to 0 meaning that
        offsets are not present in logs. Also see `Log File Formats`_.
 
+.. option:: log_prio=bool
+
+       If this is set, the *Command priority* field in `Log File Formats`_
+       shows the priority value and the IO priority class of the command.
+       Otherwise, the field shows if the command has the highest RT
+       priority class or not. Also see `Log File Formats`_.
+
+.. option:: log_issue_time=bool
+
+       If this is set, the iolog options will include the command issue time
+       for the I/O entry as well as the other data values. Defaults to 0
+       meaning that command issue times are not present in logs. Also see
+       `Log File Formats`_. This option shall be set together with
+       :option:`write_lat_log` and :option:`log_offset`.
+
 .. option:: log_compression=int
 
        If this is set, fio will compress the I/O logs as it goes, to keep the
@@ -5171,7 +5228,7 @@ Fio supports a variety of log file formats, for logging latencies, bandwidth,
 and IOPS. The logs share a common format, which looks like this:
 
     *time* (`msec`), *value*, *data direction*, *block size* (`bytes`),
-    *offset* (`bytes`), *command priority*
+    *offset* (`bytes`), *command priority*, *issue time* (`nsec`)
 
 *Time* for the log entry is always in milliseconds. The *value* logged depends
 on the type of log, it will be one of the following:
@@ -5196,8 +5253,21 @@ The entry's *block size* is always in bytes. The *offset* is the position in byt
 from the start of the file for that particular I/O. The logging of the offset can be
 toggled with :option:`log_offset`.
 
-*Command priority* is 0 for normal priority and 1 for high priority. This is controlled
-by the ioengine specific :option:`cmdprio_percentage`.
+If :option:`log_prio` is not set, the entry's *Command priority* is 1 for an IO
+executed with the highest RT priority class (:option:`prioclass` =1 or
+:option:`cmdprio_class` =1) and 0 otherwise. This is controlled by the
+:option:`prioclass` option and the ioengine specific
+:option:`cmdprio_percentage`  :option:`cmdprio_class` options. If
+:option:`log_prio` is set, the entry's *Command priority* is the priority set
+for the IO, as a 16-bits hexadecimal number with the lowest 13 bits indicating
+the priority value (:option:`prio` and :option:`cmdprio` options) and the
+highest 3 bits indicating the IO priority class (:option:`prioclass` and
+:option:`cmdprio_class` options).
+
+The entry's *issue time* is the command issue time in nanoseconds. The logging
+of the issue time can be toggled with :option:`log_issue_time`. This field has
+valid values in completion latency log file (clat), or submit latency log file
+(slat). The field has value 0 in other logs files.
 
 Fio defaults to logging every individual I/O but when windowed logging is set
 through :option:`log_avg_msec`, either the average (by default), the maximum
@@ -5207,12 +5277,12 @@ is set to both) is recorded. The log file format when both the values are report
 takes this form:
 
     *time* (`msec`), *value*, *value1*, *data direction*, *block size* (`bytes`),
-    *offset* (`bytes`), *command priority*
+    *offset* (`bytes`), *command priority*, *issue time* (`nsec`)
 
 
 Each *data direction* seen within the window period will aggregate its values in a
-separate row. Further, when using windowed logging the *block size* and *offset*
-entries will always contain 0.
+separate row. Further, when using windowed logging the *block size*, *offset*
+and *issue time* entries will always contain 0.
 
 
 Client/Server
index be57e296516d096c1e7a389d1a7d5f1362ddd953..746a27d4804573ef3cf39ec7c89a1a01e3236ad0 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -108,29 +108,6 @@ ifdef CONFIG_RDMA
   rdma_LIBS = -libverbs -lrdmacm
   ENGINES += rdma
 endif
-ifdef CONFIG_LIBRPMA_APM
-  librpma_apm_SRCS = engines/librpma_apm.c
-  librpma_fio_SRCS = engines/librpma_fio.c
-  ifdef CONFIG_LIBPMEM2_INSTALLED
-    librpma_apm_LIBS = -lrpma -lpmem2
-  else
-    librpma_apm_LIBS = -lrpma -lpmem
-  endif
-  ENGINES += librpma_apm
-endif
-ifdef CONFIG_LIBRPMA_GPSPM
-  librpma_gpspm_SRCS = engines/librpma_gpspm.c engines/librpma_gpspm_flush.pb-c.c
-  librpma_fio_SRCS = engines/librpma_fio.c
-  ifdef CONFIG_LIBPMEM2_INSTALLED
-    librpma_gpspm_LIBS = -lrpma -lpmem2 -lprotobuf-c
-  else
-    librpma_gpspm_LIBS = -lrpma -lpmem -lprotobuf-c
-  endif
-  ENGINES += librpma_gpspm
-endif
-ifdef librpma_fio_SRCS
-  SOURCE += $(librpma_fio_SRCS)
-endif
 ifdef CONFIG_POSIXAIO
   SOURCE += engines/posixaio.c
 endif
index dd521daf9c49f39bf941dec39bbf9a4995d75049..e672ce6477953340d064ba271bce68d526989f31 100644 (file)
@@ -53,11 +53,10 @@ see REPORTING-BUGS.
 
 An automated mail detailing recent commits is automatically sent to the list at
 most daily. The list address is fio@vger.kernel.org, subscribe by sending an
-email to majordomo@vger.kernel.org with
+email to fio+subscribe@vger.kernel.org or visit
+https://subspace.kernel.org/vger.kernel.org.html.
 
-       subscribe fio
-
-in the body of the email. Archives can be found here:
+Archives can be found here:
 
        https://www.spinics.net/lists/fio/
 
diff --git a/cconv.c b/cconv.c
index 9b344940cb73f0676c71560f82b074ff3c26f8a1..9571f1a8ad509dd875c72ab989652d46686e2c41 100644 (file)
--- a/cconv.c
+++ b/cconv.c
@@ -216,6 +216,7 @@ int convert_thread_options_to_cpu(struct thread_options *o,
        o->log_max = le32_to_cpu(top->log_max);
        o->log_offset = le32_to_cpu(top->log_offset);
        o->log_prio = le32_to_cpu(top->log_prio);
+       o->log_issue_time = le32_to_cpu(top->log_issue_time);
        o->log_gz = le32_to_cpu(top->log_gz);
        o->log_gz_store = le32_to_cpu(top->log_gz_store);
        o->log_alternate_epoch = le32_to_cpu(top->log_alternate_epoch);
@@ -359,7 +360,7 @@ int convert_thread_options_to_cpu(struct thread_options *o,
        o->dp_id_select = le32_to_cpu(top->dp_id_select);
        o->dp_nr_ids = le32_to_cpu(top->dp_nr_ids);
        for (i = 0; i < o->dp_nr_ids; i++)
-               o->dp_ids[i] = le32_to_cpu(top->dp_ids[i]);
+               o->dp_ids[i] = le16_to_cpu(top->dp_ids[i]);
 #if 0
        uint8_t cpumask[FIO_TOP_STR_MAX];
        uint8_t verify_cpumask[FIO_TOP_STR_MAX];
@@ -458,6 +459,7 @@ void convert_thread_options_to_net(struct thread_options_pack *top,
        top->log_max = cpu_to_le32(o->log_max);
        top->log_offset = cpu_to_le32(o->log_offset);
        top->log_prio = cpu_to_le32(o->log_prio);
+       top->log_issue_time = cpu_to_le32(o->log_issue_time);
        top->log_gz = cpu_to_le32(o->log_gz);
        top->log_gz_store = cpu_to_le32(o->log_gz_store);
        top->log_alternate_epoch = cpu_to_le32(o->log_alternate_epoch);
@@ -659,7 +661,7 @@ void convert_thread_options_to_net(struct thread_options_pack *top,
        top->dp_id_select = cpu_to_le32(o->dp_id_select);
        top->dp_nr_ids = cpu_to_le32(o->dp_nr_ids);
        for (i = 0; i < o->dp_nr_ids; i++)
-               top->dp_ids[i] = cpu_to_le32(o->dp_ids[i]);
+               top->dp_ids[i] = cpu_to_le16(o->dp_ids[i]);
 #if 0
        uint8_t cpumask[FIO_TOP_STR_MAX];
        uint8_t verify_cpumask[FIO_TOP_STR_MAX];
index 47d4f044ecc49b7cd3efa6ae061c8a06cb9260c0..861ed3a8d59481f45551dad10d39d6bf50ec76be 100755 (executable)
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # This script expects to be invoked from the base fio directory.
 set -eu
 
@@ -29,7 +29,16 @@ main() {
                 return 1
             fi
             ;;
-        */linux)
+        */linux | */ubuntu)
+            case "${CI_TARGET_ARCH}" in
+                "x86_64")
+                    configure_flags+=(
+                        "--enable-cuda"
+                    )
+                    ;;
+           esac
+           ;;&
+        */linux | */ubuntu | */debian | */fedora | */alma | */oracle | */rocky)
             case "${CI_TARGET_ARCH}" in
                 "i686")
                     extra_cflags="${extra_cflags} -m32"
@@ -37,7 +46,6 @@ main() {
                     ;;
                 "x86_64")
                     configure_flags+=(
-                        "--enable-cuda"
                         "--enable-libiscsi"
                         "--enable-libnbd"
                     )
index d2fb4201ae37ba1f4f9bcdce91acc8dcc3338733..23bdd219820fc0fdb872d81783111fe630a38a3c 100755 (executable)
@@ -10,11 +10,31 @@ main() {
 
     echo "Running long running tests..."
     export PYTHONUNBUFFERED="TRUE"
-    if [[ "${CI_TARGET_ARCH}" == "arm64" ]]; then
-        python3 t/run-fio-tests.py --skip 6 1007 1008 --debug -p 1010:"--skip 15 16 17 18 19 20"
-    else
-        python3 t/run-fio-tests.py --skip 6 1007 1008 --debug
+    skip=(
+        6
+       1007
+       1008
+    )
+    args=(
+        --debug
+    )
+    if [ "${GITHUB_JOB}" == "build-containers" ]; then
+        # io_uring is disabled in containers
+        # so skip the io_uring test
+        skip+=(
+            18
+        )
+       # cmd priority does not work in containers
+       # so skip the related latency test cases
+       args+=(
+           -p
+            "1010:--skip 15 16 17 18 19 20 21 22"
+        )
+
     fi
+
+    echo python3 t/run-fio-tests.py --skip "${skip[@]}" "${args[@]}"
+    python3 t/run-fio-tests.py --skip "${skip[@]}" "${args[@]}"
     make -C doc html
 }
 
diff --git a/ci/actions-install-librpma.sh b/ci/actions-install-librpma.sh
deleted file mode 100755 (executable)
index 31f9f71..0000000
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/bin/bash -e
-
-LIBRPMA_VERSION="1.0.0"
-ZIP_FILE=rpma.zip
-
-WORKDIR=$(pwd)
-
-# install librpma
-wget -O $ZIP_FILE https://github.com/pmem/rpma/archive/${LIBRPMA_VERSION}.zip
-unzip $ZIP_FILE
-mkdir -p rpma-${LIBRPMA_VERSION}/build
-cd rpma-${LIBRPMA_VERSION}/build
-cmake .. -DCMAKE_BUILD_TYPE=Release \
-       -DCMAKE_INSTALL_PREFIX=/usr \
-       -DBUILD_DOC=OFF \
-       -DBUILD_EXAMPLES=OFF \
-       -DBUILD_TESTS=OFF
-make -j"$(nproc)"
-sudo make -j"$(nproc)" install
-cd "$WORKDIR"
-rm -rf $ZIP_FILE rpma-${LIBRPMA_VERSION}
index 6eb2d795e7d1d93b0327c3bbd3a3fbcb4b295129..7a87fbe3340cbcd58d9221648883d9e27911258d 100755 (executable)
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # This script expects to be invoked from the base fio directory.
 set -eu
 
@@ -9,6 +9,12 @@ SCRIPT_DIR=$(dirname "$0")
 install_ubuntu() {
     local pkgs
 
+    if [ "${GITHUB_JOB}" == "build-containers" ]; then
+        # containers run as root and do not have sudo
+        apt update
+        apt -y install sudo
+    fi
+
     cat <<DPKGCFG | sudo tee /etc/dpkg/dpkg.cfg.d/dpkg-speedup > /dev/null
 # Skip fsync
 force-unsafe-io
@@ -17,6 +23,7 @@ path-exclude=/usr/share/man/*
 path-exclude=/usr/share/locale/*/LC_MESSAGES/*.mo
 path-exclude=/usr/share/doc/*
 DPKGCFG
+
     # Packages available on i686 and x86_64
     pkgs=(
         libaio-dev
@@ -43,19 +50,24 @@ DPKGCFG
             pkgs+=(
                 libglusterfs-dev
                 libgoogle-perftools-dev
+                libisal-dev
                 libiscsi-dev
                 libnbd-dev
                 libpmem-dev
-                libpmem2-dev
-                libprotobuf-c-dev
                 librbd-dev
                 libtcmalloc-minimal4
-                nvidia-cuda-dev
                 libibverbs-dev
                 librdmacm-dev
+               pkg-config
             )
            echo "Removing libunwind-14-dev because of conflicts with libunwind-dev"
            sudo apt remove -y libunwind-14-dev
+           if [ "${CI_TARGET_OS}" == "linux" ] || [ "${CI_TARGET_OS}" == "ubuntu" ]; then
+               # Only for Ubuntu
+               pkgs+=(
+                  nvidia-cuda-dev
+               )
+           fi
             ;;
     esac
 
@@ -66,15 +78,104 @@ DPKGCFG
        python3-sphinx
        python3-statsmodels
     )
+    if [ "${GITHUB_JOB}" == "build-containers" ]; then
+        pkgs+=(
+            bison
+            build-essential
+            cmake
+            flex
+            unzip
+            wget
+            zlib1g-dev
+        )
+    fi
 
     echo "Updating APT..."
     sudo apt-get -qq update
     echo "Installing packages... ${pkgs[@]}"
     sudo apt-get install -o APT::Immediate-Configure=false --no-install-recommends -qq -y "${pkgs[@]}"
-    if [ "${CI_TARGET_ARCH}" == "x86_64" ]; then
-        # install librpma from sources
-        ci/actions-install-librpma.sh
-    fi
+}
+
+# Fedora and related distributions
+install_fedora() {
+    pkgs=(
+        bison-devel
+        git
+        cmake
+        flex-devel
+        gperftools
+        isa-l-devel
+        kernel-devel
+        libaio-devel
+        libibverbs-devel
+        libiscsi-devel
+        libnbd-devel
+        libnfs-devel
+        libpmem-devel
+        libpmem2-devel
+        librbd-devel
+        numactl-devel
+        protobuf-c-devel
+        python3-scipy
+        python3-sphinx
+        sudo
+        unzip
+        valgrind-devel
+        wget
+    )
+
+    case "${CI_TARGET_OS}" in
+        "fedora")
+            pkgs+=(
+                cunit-devel
+                libgfapi-devel
+                python3-statsmodels
+            )
+            ;;
+        "rocky" | "alma" | "oracle")
+            pkgs+=(
+                CUnit-devel
+                python-pip
+            )
+            ;;&
+        "rocky" | "alma")
+            pkgs+=(
+                glusterfs-api-devel
+            )
+            ;;
+    esac
+    dnf install -y "${pkgs[@]}"
+}
+
+install_rhel_clone() {
+    dnf install -y epel-release
+    install_fedora
+
+    # I could not find a python3-statsmodels package in the repos
+    pip3 install statsmodels
+}
+
+install_oracle() {
+    dnf config-manager --set-enabled ol9_codeready_builder
+    install_rhel_clone
+}
+
+install_alma() {
+    dnf install -y 'dnf-command(config-manager)'
+    dnf config-manager --set-enabled crb
+    dnf install -y almalinux-release-devel
+    install_rhel_clone
+}
+
+install_rocky() {
+    dnf install -y 'dnf-command(config-manager)'
+    dnf config-manager --set-enabled crb
+    dnf config-manager --set-enabled devel
+    install_rhel_clone
+}
+
+install_debian() {
+    install_ubuntu
 }
 
 install_linux() {
index 4cb7dffede753d52f641afd1d4f1a448588a8509..5964faba04104484022897793b46cd03eab3c96c 100644 (file)
--- a/client.c
+++ b/client.c
@@ -1388,8 +1388,8 @@ static void handle_eta(struct fio_client *client, struct fio_net_cmd *cmd)
 static void client_flush_hist_samples(FILE *f, int hist_coarseness, void *samples,
                                      uint64_t sample_size)
 {
-       struct io_sample *s;
-       int log_offset;
+       struct io_sample *s, *s_tmp;
+       bool log_offset, log_issue_time;
        uint64_t i, j, nr_samples;
        struct io_u_plat_entry *entry;
        uint64_t *io_u_plat;
@@ -1399,15 +1399,17 @@ static void client_flush_hist_samples(FILE *f, int hist_coarseness, void *sample
        if (!sample_size)
                return;
 
-       s = __get_sample(samples, 0, 0);
+       s = __get_sample(samples, 0, 0, 0);
        log_offset = (s->__ddir & LOG_OFFSET_SAMPLE_BIT) != 0;
+       log_issue_time = (s->__ddir & LOG_ISSUE_TIME_SAMPLE_BIT) != 0;
 
-       nr_samples = sample_size / __log_entry_sz(log_offset);
+       nr_samples = sample_size / __log_entry_sz(log_offset, log_issue_time);
 
        for (i = 0; i < nr_samples; i++) {
 
-               s = (struct io_sample *)((char *)__get_sample(samples, log_offset, i) +
-                       i * sizeof(struct io_u_plat_entry));
+               s_tmp = __get_sample(samples, log_offset, log_issue_time, i);
+               s = (struct io_sample *)((char *)s_tmp +
+                                        i * sizeof(struct io_u_plat_entry));
 
                entry = s->data.plat_entry;
                io_u_plat = entry->io_u_plat;
@@ -1595,6 +1597,7 @@ static struct cmd_iolog_pdu *convert_iolog_gz(struct fio_net_cmd *cmd,
        uint64_t nr_samples;
        size_t total;
        char *p;
+       size_t log_entry_size;
 
        stream.zalloc = Z_NULL;
        stream.zfree = Z_NULL;
@@ -1610,11 +1613,13 @@ static struct cmd_iolog_pdu *convert_iolog_gz(struct fio_net_cmd *cmd,
         */
        nr_samples = le64_to_cpu(pdu->nr_samples);
 
+       log_entry_size = __log_entry_sz(le32_to_cpu(pdu->log_offset),
+                                       le32_to_cpu(pdu->log_issue_time));
        if (pdu->log_type == IO_LOG_TYPE_HIST)
-               total = nr_samples * (__log_entry_sz(le32_to_cpu(pdu->log_offset)) +
-                                       sizeof(struct io_u_plat_entry));
+               total = nr_samples * (log_entry_size +
+                                     sizeof(struct io_u_plat_entry));
        else
-               total = nr_samples * __log_entry_sz(le32_to_cpu(pdu->log_offset));
+               total = nr_samples * log_entry_size;
        ret = malloc(total + sizeof(*pdu));
        ret->nr_samples = nr_samples;
 
@@ -1703,6 +1708,7 @@ static struct cmd_iolog_pdu *convert_iolog(struct fio_net_cmd *cmd,
        ret->compressed         = le32_to_cpu(ret->compressed);
        ret->log_offset         = le32_to_cpu(ret->log_offset);
        ret->log_prio           = le32_to_cpu(ret->log_prio);
+       ret->log_issue_time     = le32_to_cpu(ret->log_issue_time);
        ret->log_hist_coarseness = le32_to_cpu(ret->log_hist_coarseness);
        ret->per_job_logs       = le32_to_cpu(ret->per_job_logs);
 
@@ -1713,7 +1719,7 @@ static struct cmd_iolog_pdu *convert_iolog(struct fio_net_cmd *cmd,
        for (i = 0; i < ret->nr_samples; i++) {
                struct io_sample *s;
 
-               s = __get_sample(samples, ret->log_offset, i);
+               s = __get_sample(samples, ret->log_offset, ret->log_issue_time, i);
                if (ret->log_type == IO_LOG_TYPE_HIST)
                        s = (struct io_sample *)((char *)s + sizeof(struct io_u_plat_entry) * i);
 
@@ -1726,11 +1732,13 @@ static struct cmd_iolog_pdu *convert_iolog(struct fio_net_cmd *cmd,
                s->bs           = le64_to_cpu(s->bs);
                s->priority     = le16_to_cpu(s->priority);
 
-               if (ret->log_offset) {
-                       struct io_sample_offset *so = (void *) s;
+               if (ret->log_offset)
+                       s->aux[IOS_AUX_OFFSET_INDEX] =
+                               le64_to_cpu(s->aux[IOS_AUX_OFFSET_INDEX]);
 
-                       so->offset = le64_to_cpu(so->offset);
-               }
+               if (ret->log_issue_time)
+                       s->aux[IOS_AUX_ISSUE_TIME_INDEX] =
+                               le64_to_cpu(s->aux[IOS_AUX_ISSUE_TIME_INDEX]);
 
                if (ret->log_type == IO_LOG_TYPE_HIST) {
                        s->data.plat_entry = (struct io_u_plat_entry *)(((char *)s) + sizeof(*s));
index 3eef022b909f48e5331c640af63f8cb674c28350..eb92cde322d6d32844dd852e357d82abb35c88c3 100755 (executable)
--- a/configure
+++ b/configure
@@ -366,6 +366,8 @@ elif check_define __sun__ ; then
   CFLAGS="$CFLAGS -D_REENTRANT"
 elif check_define _WIN32 ; then
   targetos='CYGWIN'
+elif check_define __QNX__ ; then
+  targetos='QNX'
 else
   targetos=`uname -s`
 fi
@@ -466,6 +468,9 @@ CYGWIN*)
   pthread_condattr_setclock="no"
   pthread_affinity="no"
   ;;
+QNX)
+  LIBS="-lsocket"
+  ;;
 esac
 
 # Now we know the target platform we can have another guess at the preferred
@@ -1024,48 +1029,6 @@ if test "$disable_rdma" != "yes" && compile_prog "" "-lrdmacm" "rdma"; then
 fi
 print_config "rdmacm" "$rdmacm"
 
-##########################################
-# librpma probe
-# The librpma engines require librpma>=v0.11.0 with rpma_cq_get_wc().
-if test "$librpma" != "yes" ; then
-  librpma="no"
-fi
-cat > $TMPC << EOF
-#include <librpma.h>
-int main(void)
-{
-  void *ptr = rpma_cq_get_wc;
-  (void) ptr; /* unused */
-  return 0;
-}
-EOF
-if test "$disable_rdma" != "yes" && compile_prog "" "-lrpma" "rpma"; then
-    librpma="yes"
-fi
-print_config "librpma" "$librpma"
-
-##########################################
-# libprotobuf-c probe
-if test "$libprotobuf_c" != "yes" ; then
-  libprotobuf_c="no"
-fi
-cat > $TMPC << EOF
-#include <stdio.h>
-#include <protobuf-c/protobuf-c.h>
-#if !defined(PROTOBUF_C_VERSION_NUMBER)
-# error PROTOBUF_C_VERSION_NUMBER is not defined!
-#endif
-int main(int argc, char **argv)
-{
-  (void)protobuf_c_message_check(NULL);
-  return 0;
-}
-EOF
-if compile_prog "" "-lprotobuf-c" "protobuf_c"; then
-    libprotobuf_c="yes"
-fi
-print_config "libprotobuf_c" "$libprotobuf_c"
-
 ##########################################
 # asprintf() and vasprintf() probes
 if test "$have_asprintf" != "yes" ; then
@@ -1090,6 +1053,7 @@ if test "$have_vasprintf" != "yes" ; then
 fi
 cat > $TMPC << EOF
 #include <stdio.h>
+#include <stdarg.h>
 
 int main(int argc, char **argv)
 {
@@ -3113,17 +3077,6 @@ fi
 if test "$libverbs" = "yes" -a "$rdmacm" = "yes" ; then
   output_sym "CONFIG_RDMA"
 fi
-# librpma is supported on the 'x86_64' architecture for now
-if test "$cpu" = "x86_64" -a "$libverbs" = "yes" -a "$rdmacm" = "yes" \
-    -a "$librpma" = "yes" \
-    && test "$libpmem" = "yes" -o "$libpmem2" = "yes" ; then
-  output_sym "CONFIG_LIBRPMA_APM"
-fi
-if test "$cpu" = "x86_64" -a "$libverbs" = "yes" -a "$rdmacm" = "yes" \
-    -a "$librpma" = "yes" -a "$libprotobuf_c" = "yes" \
-    && test "$libpmem" = "yes" -o "$libpmem2" = "yes" ; then
-  output_sym "CONFIG_LIBRPMA_GPSPM"
-fi
 if test "$clock_gettime" = "yes" ; then
   output_sym "CONFIG_CLOCK_GETTIME"
 fi
index 8a4c8e64419c33e0912b6e921fb6ec6561abbca5..fc45cd27d8d3ddc2c350ad590629eef71c74576e 100644 (file)
@@ -43,18 +43,19 @@ static int fdp_ruh_info(struct thread_data *td, struct fio_file *f,
 static int init_ruh_info(struct thread_data *td, struct fio_file *f)
 {
        struct fio_ruhs_info *ruhs, *tmp;
+       uint32_t nr_ruhs;
        int i, ret;
 
-       ruhs = scalloc(1, sizeof(*ruhs) + FDP_MAX_RUHS * sizeof(*ruhs->plis));
-       if (!ruhs)
-               return -ENOMEM;
-
        /* set up the data structure used for FDP to work with the supplied stream IDs */
        if (td->o.dp_type == FIO_DP_STREAMS) {
                if (!td->o.dp_nr_ids) {
                        log_err("fio: stream IDs must be provided for dataplacement=streams\n");
                        return -EINVAL;
                }
+               ruhs = scalloc(1, sizeof(*ruhs) + td->o.dp_nr_ids * sizeof(*ruhs->plis));
+               if (!ruhs)
+                       return -ENOMEM;
+
                ruhs->nr_ruhs = td->o.dp_nr_ids;
                for (int i = 0; i < ruhs->nr_ruhs; i++)
                        ruhs->plis[i] = td->o.dp_ids[i];
@@ -63,26 +64,49 @@ static int init_ruh_info(struct thread_data *td, struct fio_file *f)
                return 0;
        }
 
+       /*
+        * Since we don't know the actual number of ruhs. Only fetch the header.
+        * We will reallocate this buffer and then fetch all the ruhs again.
+        */
+       ruhs = calloc(1, sizeof(*ruhs));
        ret = fdp_ruh_info(td, f, ruhs);
        if (ret) {
-               log_info("fio: ruh info failed for %s (%d)\n",
-                        f->file_name, -ret);
+               log_err("fio: ruh info failed for %s (%d)\n",
+                       f->file_name, -ret);
                goto out;
        }
 
-       if (ruhs->nr_ruhs > FDP_MAX_RUHS)
-               ruhs->nr_ruhs = FDP_MAX_RUHS;
+       nr_ruhs = ruhs->nr_ruhs;
+       ruhs = realloc(ruhs, sizeof(*ruhs) + nr_ruhs * sizeof(*ruhs->plis));
+       if (!ruhs) {
+               log_err("fio: ruhs buffer realloc failed for %s\n",
+                       f->file_name);
+               ret = -ENOMEM;
+               goto out;
+       }
 
-       if (td->o.dp_nr_ids == 0) {
-               f->ruhs_info = ruhs;
-               return 0;
+       ruhs->nr_ruhs = nr_ruhs;
+       ret = fdp_ruh_info(td, f, ruhs);
+       if (ret) {
+               log_err("fio: ruh info failed for %s (%d)\n",
+                       f->file_name, -ret);
+               goto out;
        }
 
-       for (i = 0; i < td->o.dp_nr_ids; i++) {
-               if (td->o.dp_ids[i] >= ruhs->nr_ruhs) {
-                       ret = -EINVAL;
-                       goto out;
+       if (td->o.dp_nr_ids == 0) {
+               if (ruhs->nr_ruhs > FIO_MAX_DP_IDS)
+                       ruhs->nr_ruhs = FIO_MAX_DP_IDS;
+       } else {
+               for (i = 0; i < td->o.dp_nr_ids; i++) {
+                       if (td->o.dp_ids[i] >= ruhs->nr_ruhs) {
+                               log_err("fio: for %s PID index %d must be smaller than %d\n",
+                                       f->file_name, td->o.dp_ids[i],
+                                       ruhs->nr_ruhs);
+                               ret = -EINVAL;
+                               goto out;
+                       }
                }
+               ruhs->nr_ruhs = td->o.dp_nr_ids;
        }
 
        tmp = scalloc(1, sizeof(*tmp) + ruhs->nr_ruhs * sizeof(*tmp->plis));
@@ -91,12 +115,23 @@ static int init_ruh_info(struct thread_data *td, struct fio_file *f)
                goto out;
        }
 
+       if (td->o.dp_nr_ids == 0) {
+               for (i = 0; i < ruhs->nr_ruhs; i++)
+                       tmp->plis[i] = ruhs->plis[i];
+
+               tmp->nr_ruhs = ruhs->nr_ruhs;
+               f->ruhs_info = tmp;
+               free(ruhs);
+
+               return 0;
+       }
+
        tmp->nr_ruhs = td->o.dp_nr_ids;
        for (i = 0; i < td->o.dp_nr_ids; i++)
                tmp->plis[i] = ruhs->plis[td->o.dp_ids[i]];
        f->ruhs_info = tmp;
 out:
-       sfree(ruhs);
+       free(ruhs);
        return ret;
 }
 
@@ -116,7 +151,7 @@ static int init_ruh_scheme(struct thread_data *td, struct fio_file *f)
 
        if (!scheme_fp) {
                log_err("fio: ruh scheme failed to open scheme file %s\n",
-                        td->o.dp_scheme_file);
+                       td->o.dp_scheme_file);
                ret = -errno;
                goto out;
        }
index 71d19d69653c9e50eabc87cb3effa705c31f7e5d..84b7be5b407fe1811e77d2cfe5461d8c612a0de0 100644 (file)
@@ -5,8 +5,7 @@
 
 #define STREAMS_DIR_DTYPE      1
 #define FDP_DIR_DTYPE          2
-#define FDP_MAX_RUHS           128
-#define FIO_MAX_DP_IDS                 16
+#define FIO_MAX_DP_IDS                 128
 #define DP_MAX_SCHEME_ENTRIES  32
 
 /*
index 37cc2ada817dad98faa6968c9d3c8b2e8d0a8af9..65ef0da700bf2027d1cbab2637ee8d0a0d37d40f 100644 (file)
@@ -11,6 +11,7 @@
 #include <stdio.h>
 #include <errno.h>
 #include <fcntl.h>
+#include <stdint.h>
 
 #include "../fio.h"
 #include "../optgroup.h"
index c52f09004eefed0f0b04bca197d1ae3601271be1..e530335958f6ab532855a15fb0c808aaa89c36fa 100644 (file)
@@ -129,7 +129,7 @@ static int open_file(struct thread_data *td, struct fio_file *f)
                uint64_t nsec;
 
                nsec = ntime_since_now(&start);
-               add_clat_sample(td, data->stat_ddir, nsec, 0, 0, 0, 0);
+               add_clat_sample(td, data->stat_ddir, nsec, 0, NULL);
        }
 
        return 0;
@@ -200,7 +200,7 @@ static int stat_file(struct thread_data *td, struct fio_file *f)
                uint64_t nsec;
 
                nsec = ntime_since_now(&start);
-               add_clat_sample(td, data->stat_ddir, nsec, 0, 0, 0, 0);
+               add_clat_sample(td, data->stat_ddir, nsec, 0, NULL);
        }
 
        return 0;
@@ -250,7 +250,7 @@ static int delete_file(struct thread_data *td, struct fio_file *f)
                uint64_t nsec;
 
                nsec = ntime_since_now(&start);
-               add_clat_sample(td, data->stat_ddir, nsec, 0, 0, 0, 0);
+               add_clat_sample(td, data->stat_ddir, nsec, 0, NULL);
        }
 
        return 0;
index 9069fa3e81a5caaf03c038481f072c25fcca3385..96a042a88820b801481c737cbd931b646d2a6d49 100644 (file)
@@ -34,6 +34,13 @@ enum uring_cmd_type {
        FIO_URING_CMD_NVME = 1,
 };
 
+enum uring_cmd_write_mode {
+       FIO_URING_CMD_WMODE_WRITE = 1,
+       FIO_URING_CMD_WMODE_UNCOR,
+       FIO_URING_CMD_WMODE_ZEROES,
+       FIO_URING_CMD_WMODE_VERIFY,
+};
+
 struct io_sq_ring {
        unsigned *head;
        unsigned *tail;
@@ -82,11 +89,16 @@ struct ioring_data {
        struct cmdprio cmdprio;
 
        struct nvme_dsm *dsm;
+       uint32_t cdw12_flags[DDIR_RWDIR_CNT];
+       uint8_t write_opcode;
 };
 
 struct ioring_options {
        struct thread_data *td;
        unsigned int hipri;
+       unsigned int readfua;
+       unsigned int writefua;
+       unsigned int write_mode;
        struct cmdprio_options cmdprio_options;
        unsigned int fixedbufs;
        unsigned int registerfiles;
@@ -94,7 +106,6 @@ struct ioring_options {
        unsigned int sqpoll_set;
        unsigned int sqpoll_cpu;
        unsigned int nonvectored;
-       unsigned int uncached;
        unsigned int nowait;
        unsigned int force_async;
        unsigned int md_per_io_size;
@@ -135,6 +146,54 @@ static struct fio_option options[] = {
                .category = FIO_OPT_C_ENGINE,
                .group  = FIO_OPT_G_IOURING,
        },
+       {
+               .name   = "readfua",
+               .lname  = "Read fua flag support",
+               .type   = FIO_OPT_BOOL,
+               .off1   = offsetof(struct ioring_options, readfua),
+               .help   = "Set FUA flag (force unit access) for all Read operations",
+               .def    = "0",
+               .category = FIO_OPT_C_ENGINE,
+               .group  = FIO_OPT_G_IOURING,
+       },
+       {
+               .name   = "writefua",
+               .lname  = "Write fua flag support",
+               .type   = FIO_OPT_BOOL,
+               .off1   = offsetof(struct ioring_options, writefua),
+               .help   = "Set FUA flag (force unit access) for all Write operations",
+               .def    = "0",
+               .category = FIO_OPT_C_ENGINE,
+               .group  = FIO_OPT_G_IOURING,
+       },
+       {
+               .name   = "write_mode",
+               .lname  = "Additional Write commands support (Write Uncorrectable, Write Zeores)",
+               .type   = FIO_OPT_STR,
+               .off1   = offsetof(struct ioring_options, write_mode),
+               .help   = "Issue Write Uncorrectable or Zeroes command instaed of Write command",
+               .def    = "write",
+               .posval = {
+                         { .ival = "write",
+                           .oval = FIO_URING_CMD_WMODE_WRITE,
+                           .help = "Issue Write commands for write operations"
+                         },
+                         { .ival = "uncor",
+                           .oval = FIO_URING_CMD_WMODE_UNCOR,
+                           .help = "Issue Write Uncorrectable commands for write operations"
+                         },
+                         { .ival = "zeroes",
+                           .oval = FIO_URING_CMD_WMODE_ZEROES,
+                           .help = "Issue Write Zeroes commands for write operations"
+                         },
+                         { .ival = "verify",
+                           .oval = FIO_URING_CMD_WMODE_VERIFY,
+                           .help = "Issue Verify commands for write operations"
+                         },
+               },
+               .category = FIO_OPT_C_ENGINE,
+               .group  = FIO_OPT_G_IOURING,
+       },
        {
                .name   = "fixedbufs",
                .lname  = "Fixed (pre-mapped) IO buffers",
@@ -184,11 +243,7 @@ static struct fio_option options[] = {
        {
                .name   = "uncached",
                .lname  = "Uncached",
-               .type   = FIO_OPT_INT,
-               .off1   = offsetof(struct ioring_options, uncached),
-               .help   = "Use RWF_UNCACHED for buffered read/writes",
-               .category = FIO_OPT_C_ENGINE,
-               .group  = FIO_OPT_G_IOURING,
+               .type   = FIO_OPT_SOFT_DEPRECATED,
        },
        {
                .name   = "nowait",
@@ -335,10 +390,10 @@ static int fio_ioring_prep(struct thread_data *td, struct io_u *io_u)
                        }
                }
                sqe->rw_flags = 0;
-               if (!td->o.odirect && o->uncached)
-                       sqe->rw_flags |= RWF_UNCACHED;
                if (o->nowait)
                        sqe->rw_flags |= RWF_NOWAIT;
+               if (td->o.oatomic && io_u->ddir == DDIR_WRITE)
+                       sqe->rw_flags |= RWF_ATOMIC;
 
                /*
                 * Since io_uring can have a submission context (sqthread_poll)
@@ -405,8 +460,6 @@ static int fio_ioring_cmd_prep(struct thread_data *td, struct io_u *io_u)
                sqe->fd = f->fd;
        }
        sqe->rw_flags = 0;
-       if (!td->o.odirect && o->uncached)
-               sqe->rw_flags |= RWF_UNCACHED;
        if (o->nowait)
                sqe->rw_flags |= RWF_NOWAIT;
 
@@ -432,7 +485,7 @@ static int fio_ioring_cmd_prep(struct thread_data *td, struct io_u *io_u)
 
        return fio_nvme_uring_cmd_prep(cmd, io_u,
                        o->nonvectored ? NULL : &ld->iovecs[io_u->index],
-                       dsm);
+                       dsm, ld->write_opcode, ld->cdw12_flags[io_u->ddir]);
 }
 
 static struct io_u *fio_ioring_event(struct thread_data *td, int event)
@@ -476,7 +529,7 @@ static struct io_u *fio_ioring_cmd_event(struct thread_data *td, int event)
        io_u = (struct io_u *) (uintptr_t) cqe->user_data;
 
        if (cqe->res != 0) {
-               io_u->error = -cqe->res;
+               io_u->error = abs(cqe->res);
                return io_u;
        } else {
                io_u->error = 0;
@@ -494,6 +547,39 @@ static struct io_u *fio_ioring_cmd_event(struct thread_data *td, int event)
        return io_u;
 }
 
+static char *fio_ioring_cmd_errdetails(struct thread_data *td,
+                                      struct io_u *io_u)
+{
+       struct ioring_options *o = td->eo;
+       unsigned int sct = (io_u->error >> 8) & 0x7;
+       unsigned int sc = io_u->error & 0xff;
+#define MAXERRDETAIL 1024
+#define MAXMSGCHUNK 128
+       char *msg, msgchunk[MAXMSGCHUNK];
+
+       msg = calloc(1, MAXERRDETAIL);
+       strcpy(msg, "io_uring_cmd: ");
+
+       snprintf(msgchunk, MAXMSGCHUNK, "%s: ", io_u->file->file_name);
+       strlcat(msg, msgchunk, MAXERRDETAIL);
+
+       if (o->cmd_type == FIO_URING_CMD_NVME) {
+               strlcat(msg, "cq entry status (", MAXERRDETAIL);
+
+               snprintf(msgchunk, MAXMSGCHUNK, "sct=0x%02x; ", sct);
+               strlcat(msg, msgchunk, MAXERRDETAIL);
+
+               snprintf(msgchunk, MAXMSGCHUNK, "sc=0x%02x)", sc);
+               strlcat(msg, msgchunk, MAXERRDETAIL);
+       } else {
+               /* Print status code in generic */
+               snprintf(msgchunk, MAXMSGCHUNK, "status=0x%x", io_u->error);
+               strlcat(msg, msgchunk, MAXERRDETAIL);
+       }
+
+       return msg;
+}
+
 static int fio_ioring_cqring_reap(struct thread_data *td, unsigned int events,
                                   unsigned int max)
 {
@@ -1219,6 +1305,30 @@ static int fio_ioring_init(struct thread_data *td)
                }
        }
 
+       if (!strcmp(td->io_ops->name, "io_uring_cmd")) {
+               if (td_write(td)) {
+                       switch (o->write_mode) {
+                       case FIO_URING_CMD_WMODE_UNCOR:
+                               ld->write_opcode = nvme_cmd_write_uncor;
+                               break;
+                       case FIO_URING_CMD_WMODE_ZEROES:
+                               ld->write_opcode = nvme_cmd_write_zeroes;
+                               break;
+                       case FIO_URING_CMD_WMODE_VERIFY:
+                               ld->write_opcode = nvme_cmd_verify;
+                               break;
+                       default:
+                               ld->write_opcode = nvme_cmd_write;
+                               break;
+                       }
+               }
+
+               if (o->readfua)
+                       ld->cdw12_flags[DDIR_READ] = 1 << 30;
+               if (o->writefua)
+                       ld->cdw12_flags[DDIR_WRITE] = 1 << 30;
+       }
+
        return 0;
 }
 
@@ -1341,6 +1451,14 @@ static int fio_ioring_cmd_open_file(struct thread_data *td, struct fio_file *f)
                        td_verror(td, EINVAL, "fio_ioring_cmd_open_file");
                        return 1;
                }
+
+               if (o->write_mode != FIO_URING_CMD_WMODE_WRITE &&
+                   !td_write(td)) {
+                       log_err("%s: 'readwrite=|rw=' has no write\n",
+                                       f->file_name);
+                       td_verror(td, EINVAL, "fio_ioring_cmd_open_file");
+                       return 1;
+               }
        }
        if (!ld || !o->registerfiles)
                return generic_open_file(td, f);
@@ -1441,10 +1559,12 @@ static int fio_ioring_cmd_fetch_ruhs(struct thread_data *td, struct fio_file *f,
                                     struct fio_ruhs_info *fruhs_info)
 {
        struct nvme_fdp_ruh_status *ruhs;
-       int bytes, ret, i;
+       int bytes, nr_ruhs, ret, i;
+
+       nr_ruhs = fruhs_info->nr_ruhs;
+       bytes = sizeof(*ruhs) + fruhs_info->nr_ruhs * sizeof(struct nvme_fdp_ruh_status_desc);
 
-       bytes = sizeof(*ruhs) + FDP_MAX_RUHS * sizeof(struct nvme_fdp_ruh_status_desc);
-       ruhs = scalloc(1, bytes);
+       ruhs = calloc(1, bytes);
        if (!ruhs)
                return -ENOMEM;
 
@@ -1453,10 +1573,10 @@ static int fio_ioring_cmd_fetch_ruhs(struct thread_data *td, struct fio_file *f,
                goto free;
 
        fruhs_info->nr_ruhs = le16_to_cpu(ruhs->nruhsd);
-       for (i = 0; i < fruhs_info->nr_ruhs; i++)
+       for (i = 0; i < nr_ruhs; i++)
                fruhs_info->plis[i] = le16_to_cpu(ruhs->ruhss[i].pid);
 free:
-       sfree(ruhs);
+       free(ruhs);
        return ret;
 }
 
@@ -1464,7 +1584,8 @@ static struct ioengine_ops ioengine_uring = {
        .name                   = "io_uring",
        .version                = FIO_IOOPS_VERSION,
        .flags                  = FIO_ASYNCIO_SYNC_TRIM | FIO_NO_OFFLOAD |
-                                       FIO_ASYNCIO_SETS_ISSUE_TIME,
+                                       FIO_ASYNCIO_SETS_ISSUE_TIME |
+                                       FIO_ATOMICWRITES,
        .init                   = fio_ioring_init,
        .post_init              = fio_ioring_post_init,
        .io_u_init              = fio_ioring_io_u_init,
@@ -1496,6 +1617,7 @@ static struct ioengine_ops ioengine_uring_cmd = {
        .commit                 = fio_ioring_commit,
        .getevents              = fio_ioring_getevents,
        .event                  = fio_ioring_cmd_event,
+       .errdetails             = fio_ioring_cmd_errdetails,
        .cleanup                = fio_ioring_cleanup,
        .open_file              = fio_ioring_cmd_open_file,
        .close_file             = fio_ioring_cmd_close_file,
index aaccc7ce097fc6db724fdcea9f4e8b60cc42fa4a..c2d437938e2284d50737dbae4a9f7c43ffe9a194 100644 (file)
@@ -110,6 +110,10 @@ static int fio_libaio_prep(struct thread_data *td, struct io_u *io_u)
                io_prep_pwrite(iocb, f->fd, io_u->xfer_buf, io_u->xfer_buflen, io_u->offset);
                if (o->nowait)
                        iocb->aio_rw_flags |= RWF_NOWAIT;
+#ifdef FIO_HAVE_RWF_ATOMIC
+               if (td->o.oatomic)
+                       iocb->aio_rw_flags |= RWF_ATOMIC;
+#endif
        } else if (ddir_sync(io_u->ddir))
                io_prep_fsync(iocb, f->fd);
 
@@ -440,7 +444,8 @@ FIO_STATIC struct ioengine_ops ioengine = {
        .name                   = "libaio",
        .version                = FIO_IOOPS_VERSION,
        .flags                  = FIO_ASYNCIO_SYNC_TRIM |
-                                       FIO_ASYNCIO_SETS_ISSUE_TIME,
+                                       FIO_ASYNCIO_SETS_ISSUE_TIME |
+                                       FIO_ATOMICWRITES,
        .init                   = fio_libaio_init,
        .post_init              = fio_libaio_post_init,
        .prep                   = fio_libaio_prep,
diff --git a/engines/librpma_apm.c b/engines/librpma_apm.c
deleted file mode 100644 (file)
index 896240d..0000000
+++ /dev/null
@@ -1,254 +0,0 @@
-/*
-* librpma_apm: IO engine that uses PMDK librpma to read and write data,
- *             based on Appliance Persistency Method
- *
- * Copyright 2020-2021, Intel Corporation
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License,
- * version 2 as published by the Free Software Foundation..
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- */
-
-#include "librpma_fio.h"
-
-/* client side implementation */
-
-static inline int client_io_flush(struct thread_data *td,
-               struct io_u *first_io_u, struct io_u *last_io_u,
-               unsigned long long int len);
-
-static int client_get_io_u_index(struct ibv_wc *wc, unsigned int *io_u_index);
-
-static int client_init(struct thread_data *td)
-{
-       struct librpma_fio_client_data *ccd;
-       unsigned int sq_size;
-       uint32_t cq_size;
-       struct rpma_conn_cfg *cfg = NULL;
-       struct rpma_peer_cfg *pcfg = NULL;
-       int ret;
-
-       /* not supported readwrite = trim / randtrim / trimwrite */
-       if (td_trim(td)) {
-               td_verror(td, EINVAL, "Not supported mode.");
-               return -1;
-       }
-
-       /*
-        * Calculate the required queue sizes where:
-        * - the send queue (SQ) has to be big enough to accommodate
-        *   all io_us (WRITEs) and all flush requests (FLUSHes)
-        * - the completion queue (CQ) has to be big enough to accommodate all
-        *   success and error completions (cq_size = sq_size)
-        */
-       if (td_random(td) || td_rw(td)) {
-               /*
-                * sq_size = max(rand_read_sq_size, rand_write_sq_size)
-                * where rand_read_sq_size < rand_write_sq_size because read
-                * does not require flush afterwards
-                * rand_write_sq_size = N * (WRITE + FLUSH)
-                *
-                * Note: rw is no different from random write since having
-                * interleaved reads with writes in extreme forces you to flush
-                * as often as when the writes are random.
-                */
-               sq_size = 2 * td->o.iodepth;
-       } else if (td_write(td)) {
-               /* sequential TD_DDIR_WRITE only */
-               if (td->o.sync_io) {
-                       sq_size = 2; /* WRITE + FLUSH */
-               } else {
-                       /*
-                        * N * WRITE + B * FLUSH where:
-                        * - B == ceil(iodepth / iodepth_batch)
-                        *   which is the number of batches for N writes
-                        */
-                       sq_size = td->o.iodepth + LIBRPMA_FIO_CEIL(td->o.iodepth,
-                                       td->o.iodepth_batch);
-               }
-       } else {
-               /* TD_DDIR_READ only */
-               if (td->o.sync_io) {
-                       sq_size = 1; /* READ */
-               } else {
-                       sq_size = td->o.iodepth; /* N x READ */
-               }
-       }
-       cq_size = sq_size;
-
-       /* create a connection configuration object */
-       if ((ret = rpma_conn_cfg_new(&cfg))) {
-               librpma_td_verror(td, ret, "rpma_conn_cfg_new");
-               return -1;
-       }
-
-       /* apply queue sizes */
-       if ((ret = rpma_conn_cfg_set_sq_size(cfg, sq_size))) {
-               librpma_td_verror(td, ret, "rpma_conn_cfg_set_sq_size");
-               goto err_cfg_delete;
-       }
-       if ((ret = rpma_conn_cfg_set_cq_size(cfg, cq_size))) {
-               librpma_td_verror(td, ret, "rpma_conn_cfg_set_cq_size");
-               goto err_cfg_delete;
-       }
-
-       if (librpma_fio_client_init(td, cfg))
-               goto err_cfg_delete;
-
-       ccd = td->io_ops_data;
-
-       if (ccd->server_mr_flush_type == RPMA_FLUSH_TYPE_PERSISTENT) {
-               if (!ccd->ws->direct_write_to_pmem) {
-                       if (td->thread_number == 1)
-                               log_err(
-                                       "Fio librpma engine will not work until the Direct Write to PMem on the server side is possible (direct_write_to_pmem)\n");
-                       goto err_cleanup_common;
-               }
-
-               /* configure peer's direct write to pmem support */
-               if ((ret = rpma_peer_cfg_new(&pcfg))) {
-                       librpma_td_verror(td, ret, "rpma_peer_cfg_new");
-                       goto err_cleanup_common;
-               }
-
-               if ((ret = rpma_peer_cfg_set_direct_write_to_pmem(pcfg, true))) {
-                       librpma_td_verror(td, ret,
-                               "rpma_peer_cfg_set_direct_write_to_pmem");
-                       (void) rpma_peer_cfg_delete(&pcfg);
-                       goto err_cleanup_common;
-               }
-
-               if ((ret = rpma_conn_apply_remote_peer_cfg(ccd->conn, pcfg))) {
-                       librpma_td_verror(td, ret,
-                               "rpma_conn_apply_remote_peer_cfg");
-                       (void) rpma_peer_cfg_delete(&pcfg);
-                       goto err_cleanup_common;
-               }
-
-               (void) rpma_peer_cfg_delete(&pcfg);
-       } else if (td->thread_number == 1) {
-               /* XXX log_info mixes with the JSON output */
-               log_err(
-                       "Note: Direct Write to PMem is not supported by default nor required if you use DRAM instead of PMem on the server side (direct_write_to_pmem).\n"
-                       "Remember that flushing to DRAM does not make your data persistent and may be used only for experimental purposes.\n");
-       }
-
-       if ((ret = rpma_conn_cfg_delete(&cfg))) {
-               librpma_td_verror(td, ret, "rpma_conn_cfg_delete");
-               /* non fatal error - continue */
-       }
-
-       ccd->flush = client_io_flush;
-       ccd->get_io_u_index = client_get_io_u_index;
-
-       return 0;
-
-err_cleanup_common:
-       librpma_fio_client_cleanup(td);
-
-err_cfg_delete:
-       (void) rpma_conn_cfg_delete(&cfg);
-
-       return -1;
-}
-
-static void client_cleanup(struct thread_data *td)
-{
-       struct librpma_fio_client_data *ccd = td->io_ops_data;
-
-       if (ccd == NULL)
-               return;
-
-       free(ccd->client_data);
-
-       librpma_fio_client_cleanup(td);
-}
-
-static inline int client_io_flush(struct thread_data *td,
-               struct io_u *first_io_u, struct io_u *last_io_u,
-               unsigned long long int len)
-{
-       struct librpma_fio_client_data *ccd = td->io_ops_data;
-       size_t dst_offset = first_io_u->offset;
-       int ret;
-
-       if ((ret = rpma_flush(ccd->conn, ccd->server_mr, dst_offset, len,
-                       ccd->server_mr_flush_type, RPMA_F_COMPLETION_ALWAYS,
-                       (void *)(uintptr_t)last_io_u->index))) {
-               librpma_td_verror(td, ret, "rpma_flush");
-               return -1;
-       }
-
-       return 0;
-}
-
-static int client_get_io_u_index(struct ibv_wc *wc, unsigned int *io_u_index)
-{
-       memcpy(io_u_index, &wc->wr_id, sizeof(*io_u_index));
-
-       return 1;
-}
-
-FIO_STATIC struct ioengine_ops ioengine_client = {
-       .name                   = "librpma_apm_client",
-       .version                = FIO_IOOPS_VERSION,
-       .init                   = client_init,
-       .post_init              = librpma_fio_client_post_init,
-       .get_file_size          = librpma_fio_client_get_file_size,
-       .open_file              = librpma_fio_file_nop,
-       .queue                  = librpma_fio_client_queue,
-       .commit                 = librpma_fio_client_commit,
-       .getevents              = librpma_fio_client_getevents,
-       .event                  = librpma_fio_client_event,
-       .errdetails             = librpma_fio_client_errdetails,
-       .close_file             = librpma_fio_file_nop,
-       .cleanup                = client_cleanup,
-       .flags                  = FIO_DISKLESSIO | FIO_ASYNCIO_SETS_ISSUE_TIME,
-       .options                = librpma_fio_options,
-       .option_struct_size     = sizeof(struct librpma_fio_options_values),
-};
-
-/* server side implementation */
-
-static int server_open_file(struct thread_data *td, struct fio_file *f)
-{
-       return librpma_fio_server_open_file(td, f, NULL);
-}
-
-static enum fio_q_status server_queue(struct thread_data *td, struct io_u *io_u)
-{
-       return FIO_Q_COMPLETED;
-}
-
-FIO_STATIC struct ioengine_ops ioengine_server = {
-       .name                   = "librpma_apm_server",
-       .version                = FIO_IOOPS_VERSION,
-       .init                   = librpma_fio_server_init,
-       .open_file              = server_open_file,
-       .close_file             = librpma_fio_server_close_file,
-       .queue                  = server_queue,
-       .invalidate             = librpma_fio_file_nop,
-       .cleanup                = librpma_fio_server_cleanup,
-       .flags                  = FIO_SYNCIO,
-       .options                = librpma_fio_options,
-       .option_struct_size     = sizeof(struct librpma_fio_options_values),
-};
-
-/* register both engines */
-
-static void fio_init fio_librpma_apm_register(void)
-{
-       register_ioengine(&ioengine_client);
-       register_ioengine(&ioengine_server);
-}
-
-static void fio_exit fio_librpma_apm_unregister(void)
-{
-       unregister_ioengine(&ioengine_client);
-       unregister_ioengine(&ioengine_server);
-}
diff --git a/engines/librpma_fio.c b/engines/librpma_fio.c
deleted file mode 100644 (file)
index 42d6163..0000000
+++ /dev/null
@@ -1,1079 +0,0 @@
-/*
- * librpma_fio: librpma_apm and librpma_gpspm engines' common part.
- *
- * Copyright 2021-2022, Intel Corporation
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License,
- * version 2 as published by the Free Software Foundation..
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- */
-
-#ifdef CONFIG_LIBPMEM2_INSTALLED
-#include "librpma_fio_pmem2.h"
-#else
-#include "librpma_fio_pmem.h"
-#endif /* CONFIG_LIBPMEM2_INSTALLED */
-
-struct fio_option librpma_fio_options[] = {
-       {
-               .name   = "serverip",
-               .lname  = "rpma_server_ip",
-               .type   = FIO_OPT_STR_STORE,
-               .off1   = offsetof(struct librpma_fio_options_values, server_ip),
-               .help   = "IP address the server is listening on",
-               .def    = "",
-               .category = FIO_OPT_C_ENGINE,
-               .group  = FIO_OPT_G_LIBRPMA,
-       },
-       {
-               .name   = "port",
-               .lname  = "rpma_server port",
-               .type   = FIO_OPT_STR_STORE,
-               .off1   = offsetof(struct librpma_fio_options_values, port),
-               .help   = "port the server is listening on",
-               .def    = "7204",
-               .category = FIO_OPT_C_ENGINE,
-               .group  = FIO_OPT_G_LIBRPMA,
-       },
-       {
-               .name   = "direct_write_to_pmem",
-               .lname  = "Direct Write to PMem (via RDMA) from the remote host is possible",
-               .type   = FIO_OPT_BOOL,
-               .off1   = offsetof(struct librpma_fio_options_values,
-                                       direct_write_to_pmem),
-               .help   = "Set to true ONLY when Direct Write to PMem from the remote host is possible (https://pmem.io/rpma/documentation/basic-direct-write-to-pmem.html)",
-               .def    = "",
-               .category = FIO_OPT_C_ENGINE,
-               .group  = FIO_OPT_G_LIBRPMA,
-       },
-       {
-               .name   = "busy_wait_polling",
-               .lname  = "Set to 0 to wait for completion instead of busy-wait polling completion.",
-               .type   = FIO_OPT_BOOL,
-               .off1   = offsetof(struct librpma_fio_options_values,
-                                       busy_wait_polling),
-               .help   = "Set to false if you want to reduce CPU usage",
-               .def    = "1",
-               .category = FIO_OPT_C_ENGINE,
-               .group  = FIO_OPT_G_LIBRPMA,
-       },
-       {
-               .name   = NULL,
-       },
-};
-
-int librpma_fio_td_port(const char *port_base_str, struct thread_data *td,
-               char *port_out)
-{
-       unsigned long int port_ul = strtoul(port_base_str, NULL, 10);
-       unsigned int port_new;
-
-       port_out[0] = '\0';
-
-       if (port_ul == ULONG_MAX) {
-               td_verror(td, errno, "strtoul");
-               return -1;
-       }
-       port_ul += td->thread_number - 1;
-       if (port_ul >= UINT_MAX) {
-               log_err("[%u] port number (%lu) bigger than UINT_MAX\n",
-                       td->thread_number, port_ul);
-               return -1;
-       }
-
-       port_new = port_ul;
-       snprintf(port_out, LIBRPMA_FIO_PORT_STR_LEN_MAX - 1, "%u", port_new);
-
-       return 0;
-}
-
-char *librpma_fio_allocate_dram(struct thread_data *td, size_t size,
-       struct librpma_fio_mem *mem)
-{
-       char *mem_ptr = NULL;
-       int ret;
-
-       if ((ret = posix_memalign((void **)&mem_ptr, page_size, size))) {
-               log_err("fio: posix_memalign() failed\n");
-               td_verror(td, ret, "posix_memalign");
-               return NULL;
-       }
-
-       mem->mem_ptr = mem_ptr;
-       mem->size_mmap = 0;
-
-       return mem_ptr;
-}
-
-char *librpma_fio_allocate_pmem(struct thread_data *td, struct fio_file *f,
-               size_t size, struct librpma_fio_mem *mem)
-{
-       size_t ws_offset;
-       mem->mem_ptr = NULL;
-
-       if (size % page_size) {
-               log_err("fio: size (%zu) is not aligned to page size (%zu)\n",
-                       size, page_size);
-               return NULL;
-       }
-
-       if (f->filetype == FIO_TYPE_CHAR) {
-               /* Each thread uses a separate offset within DeviceDAX. */
-               ws_offset = (td->thread_number - 1) * size;
-       } else {
-               /* Each thread uses a separate FileSystemDAX file. No offset is needed. */
-               ws_offset = 0;
-       }
-
-       if (!f->file_name) {
-               log_err("fio: filename is not set\n");
-               return NULL;
-       }
-
-       if (librpma_fio_pmem_map_file(f, size, mem, ws_offset)) {
-               log_err("fio: librpma_fio_pmem_map_file(%s) failed\n",
-                       f->file_name);
-               return NULL;
-       }
-
-       log_info("fio: size of memory mapped from the file %s: %zu\n",
-               f->file_name, mem->size_mmap);
-
-       log_info("fio: library used to map PMem from file: %s\n", RPMA_PMEM_USED);
-
-       return mem->mem_ptr ? mem->mem_ptr + ws_offset : NULL;
-}
-
-void librpma_fio_free(struct librpma_fio_mem *mem)
-{
-       if (mem->size_mmap)
-               librpma_fio_unmap(mem);
-       else
-               free(mem->mem_ptr);
-}
-
-#define LIBRPMA_FIO_RETRY_MAX_NO       10
-#define LIBRPMA_FIO_RETRY_DELAY_S      5
-
-int librpma_fio_client_init(struct thread_data *td,
-               struct rpma_conn_cfg *cfg)
-{
-       struct librpma_fio_client_data *ccd;
-       struct librpma_fio_options_values *o = td->eo;
-       struct ibv_context *dev = NULL;
-       char port_td[LIBRPMA_FIO_PORT_STR_LEN_MAX];
-       struct rpma_conn_req *req = NULL;
-       enum rpma_conn_event event;
-       struct rpma_conn_private_data pdata;
-       enum rpma_log_level log_level_aux = RPMA_LOG_LEVEL_WARNING;
-       int remote_flush_type;
-       int retry;
-       int ret;
-
-       /* --debug=net sets RPMA_LOG_THRESHOLD_AUX to RPMA_LOG_LEVEL_INFO */
-#ifdef FIO_INC_DEBUG
-       if ((1UL << FD_NET) & fio_debug)
-               log_level_aux = RPMA_LOG_LEVEL_INFO;
-#endif
-
-       /* configure logging thresholds to see more details */
-       rpma_log_set_threshold(RPMA_LOG_THRESHOLD, RPMA_LOG_LEVEL_INFO);
-       rpma_log_set_threshold(RPMA_LOG_THRESHOLD_AUX, log_level_aux);
-
-       /* obtain an IBV context for a remote IP address */
-       if ((ret = rpma_utils_get_ibv_context(o->server_ip,
-                       RPMA_UTIL_IBV_CONTEXT_REMOTE, &dev))) {
-               librpma_td_verror(td, ret, "rpma_utils_get_ibv_context");
-               return -1;
-       }
-
-       /* allocate client's data */
-       ccd = calloc(1, sizeof(*ccd));
-       if (ccd == NULL) {
-               td_verror(td, errno, "calloc");
-               return -1;
-       }
-
-       /* allocate all in-memory queues */
-       ccd->io_us_queued = calloc(td->o.iodepth, sizeof(*ccd->io_us_queued));
-       if (ccd->io_us_queued == NULL) {
-               td_verror(td, errno, "calloc");
-               goto err_free_ccd;
-       }
-
-       ccd->io_us_flight = calloc(td->o.iodepth, sizeof(*ccd->io_us_flight));
-       if (ccd->io_us_flight == NULL) {
-               td_verror(td, errno, "calloc");
-               goto err_free_io_u_queues;
-       }
-
-       ccd->io_us_completed = calloc(td->o.iodepth,
-                       sizeof(*ccd->io_us_completed));
-       if (ccd->io_us_completed == NULL) {
-               td_verror(td, errno, "calloc");
-               goto err_free_io_u_queues;
-       }
-
-       /* create a new peer object */
-       if ((ret = rpma_peer_new(dev, &ccd->peer))) {
-               librpma_td_verror(td, ret, "rpma_peer_new");
-               goto err_free_io_u_queues;
-       }
-
-       /* create a connection request */
-       if (librpma_fio_td_port(o->port, td, port_td))
-               goto err_peer_delete;
-
-       for (retry = 0; retry < LIBRPMA_FIO_RETRY_MAX_NO; retry++) {
-               if ((ret = rpma_conn_req_new(ccd->peer, o->server_ip, port_td,
-                               cfg, &req))) {
-                       librpma_td_verror(td, ret, "rpma_conn_req_new");
-                       goto err_peer_delete;
-               }
-
-               /*
-                * Connect the connection request
-                * and obtain the connection object.
-                */
-               if ((ret = rpma_conn_req_connect(&req, NULL, &ccd->conn))) {
-                       librpma_td_verror(td, ret, "rpma_conn_req_connect");
-                       goto err_req_delete;
-               }
-
-               /* wait for the connection to establish */
-               if ((ret = rpma_conn_next_event(ccd->conn, &event))) {
-                       librpma_td_verror(td, ret, "rpma_conn_next_event");
-                       goto err_conn_delete;
-               } else if (event == RPMA_CONN_ESTABLISHED) {
-                       break;
-               } else if (event == RPMA_CONN_REJECTED) {
-                       (void) rpma_conn_disconnect(ccd->conn);
-                       (void) rpma_conn_delete(&ccd->conn);
-                       if (retry < LIBRPMA_FIO_RETRY_MAX_NO - 1) {
-                               log_err("Thread [%d]: Retrying (#%i) ...\n",
-                                       td->thread_number, retry + 1);
-                               sleep(LIBRPMA_FIO_RETRY_DELAY_S);
-                       } else {
-                               log_err(
-                                       "Thread [%d]: The maximum number of retries exceeded. Closing.\n",
-                                       td->thread_number);
-                       }
-               } else {
-                       log_err(
-                               "rpma_conn_next_event returned an unexptected event: (%s != RPMA_CONN_ESTABLISHED)\n",
-                               rpma_utils_conn_event_2str(event));
-                       goto err_conn_delete;
-               }
-       }
-
-       if (retry > 0)
-               log_err("Thread [%d]: Connected after retry #%i\n",
-                       td->thread_number, retry);
-
-       if (ccd->conn == NULL)
-               goto err_peer_delete;
-
-       /* get the connection's main CQ */
-       if ((ret = rpma_conn_get_cq(ccd->conn, &ccd->cq))) {
-               librpma_td_verror(td, ret, "rpma_conn_get_cq");
-               goto err_conn_delete;
-       }
-
-       /* get the connection's private data sent from the server */
-       if ((ret = rpma_conn_get_private_data(ccd->conn, &pdata))) {
-               librpma_td_verror(td, ret, "rpma_conn_get_private_data");
-               goto err_conn_delete;
-       }
-
-       /* get the server's workspace representation */
-       ccd->ws = pdata.ptr;
-
-       /* create the server's memory representation */
-       if ((ret = rpma_mr_remote_from_descriptor(&ccd->ws->descriptor[0],
-                       ccd->ws->mr_desc_size, &ccd->server_mr))) {
-               librpma_td_verror(td, ret, "rpma_mr_remote_from_descriptor");
-               goto err_conn_delete;
-       }
-
-       /* get the total size of the shared server memory */
-       if ((ret = rpma_mr_remote_get_size(ccd->server_mr, &ccd->ws_size))) {
-               librpma_td_verror(td, ret, "rpma_mr_remote_get_size");
-               goto err_conn_delete;
-       }
-
-       /* get flush type of the remote node */
-       if ((ret = rpma_mr_remote_get_flush_type(ccd->server_mr,
-                       &remote_flush_type))) {
-               librpma_td_verror(td, ret, "rpma_mr_remote_get_flush_type");
-               goto err_conn_delete;
-       }
-
-       ccd->server_mr_flush_type =
-               (remote_flush_type & RPMA_MR_USAGE_FLUSH_TYPE_PERSISTENT) ?
-               RPMA_FLUSH_TYPE_PERSISTENT : RPMA_FLUSH_TYPE_VISIBILITY;
-
-       /*
-        * Assure an io_us buffer allocation is page-size-aligned which is required
-        * to register for RDMA. User-provided value is intentionally ignored.
-        */
-       td->o.mem_align = page_size;
-
-       td->io_ops_data = ccd;
-
-       return 0;
-
-err_conn_delete:
-       (void) rpma_conn_disconnect(ccd->conn);
-       (void) rpma_conn_delete(&ccd->conn);
-
-err_req_delete:
-       (void) rpma_conn_req_delete(&req);
-
-err_peer_delete:
-       (void) rpma_peer_delete(&ccd->peer);
-
-err_free_io_u_queues:
-       free(ccd->io_us_queued);
-       free(ccd->io_us_flight);
-       free(ccd->io_us_completed);
-
-err_free_ccd:
-       free(ccd);
-
-       return -1;
-}
-
-void librpma_fio_client_cleanup(struct thread_data *td)
-{
-       struct librpma_fio_client_data *ccd = td->io_ops_data;
-       enum rpma_conn_event ev;
-       int ret;
-
-       if (ccd == NULL)
-               return;
-
-       /* delete the iou's memory registration */
-       if ((ret = rpma_mr_dereg(&ccd->orig_mr)))
-               librpma_td_verror(td, ret, "rpma_mr_dereg");
-       /* delete the iou's memory registration */
-       if ((ret = rpma_mr_remote_delete(&ccd->server_mr)))
-               librpma_td_verror(td, ret, "rpma_mr_remote_delete");
-       /* initiate disconnection */
-       if ((ret = rpma_conn_disconnect(ccd->conn)))
-               librpma_td_verror(td, ret, "rpma_conn_disconnect");
-       /* wait for disconnection to end up */
-       if ((ret = rpma_conn_next_event(ccd->conn, &ev))) {
-               librpma_td_verror(td, ret, "rpma_conn_next_event");
-       } else if (ev != RPMA_CONN_CLOSED) {
-               log_err(
-                       "client_cleanup received an unexpected event (%s != RPMA_CONN_CLOSED)\n",
-                       rpma_utils_conn_event_2str(ev));
-       }
-       /* delete the connection */
-       if ((ret = rpma_conn_delete(&ccd->conn)))
-               librpma_td_verror(td, ret, "rpma_conn_delete");
-       /* delete the peer */
-       if ((ret = rpma_peer_delete(&ccd->peer)))
-               librpma_td_verror(td, ret, "rpma_peer_delete");
-       /* free the software queues */
-       free(ccd->io_us_queued);
-       free(ccd->io_us_flight);
-       free(ccd->io_us_completed);
-       free(ccd);
-       td->io_ops_data = NULL; /* zero ccd */
-}
-
-int librpma_fio_file_nop(struct thread_data *td, struct fio_file *f)
-{
-       /* NOP */
-       return 0;
-}
-
-int librpma_fio_client_post_init(struct thread_data *td)
-{
-       struct librpma_fio_client_data *ccd =  td->io_ops_data;
-       size_t io_us_size;
-       int ret;
-
-       /*
-        * td->orig_buffer is not aligned. The engine requires aligned io_us
-        * so FIO aligns up the address using the formula below.
-        */
-       ccd->orig_buffer_aligned = PTR_ALIGN(td->orig_buffer, page_mask) +
-                       td->o.mem_align;
-
-       /*
-        * td->orig_buffer_size beside the space really consumed by io_us
-        * has paddings which can be omitted for the memory registration.
-        */
-       io_us_size = (unsigned long long)td_max_bs(td) *
-                       (unsigned long long)td->o.iodepth;
-
-       if ((ret = rpma_mr_reg(ccd->peer, ccd->orig_buffer_aligned, io_us_size,
-                       RPMA_MR_USAGE_READ_DST | RPMA_MR_USAGE_READ_SRC |
-                       RPMA_MR_USAGE_WRITE_DST | RPMA_MR_USAGE_WRITE_SRC |
-                       RPMA_MR_USAGE_FLUSH_TYPE_PERSISTENT, &ccd->orig_mr)))
-               librpma_td_verror(td, ret, "rpma_mr_reg");
-       return ret;
-}
-
-int librpma_fio_client_get_file_size(struct thread_data *td,
-               struct fio_file *f)
-{
-       struct librpma_fio_client_data *ccd = td->io_ops_data;
-
-       f->real_file_size = ccd->ws_size;
-       fio_file_set_size_known(f);
-
-       return 0;
-}
-
-static enum fio_q_status client_queue_sync(struct thread_data *td,
-               struct io_u *io_u)
-{
-       struct librpma_fio_client_data *ccd = td->io_ops_data;
-       struct ibv_wc wc;
-       unsigned io_u_index;
-       int ret;
-
-       /* execute io_u */
-       if (io_u->ddir == DDIR_READ) {
-               /* post an RDMA read operation */
-               if (librpma_fio_client_io_read(td, io_u,
-                               RPMA_F_COMPLETION_ALWAYS))
-                       goto err;
-       } else if (io_u->ddir == DDIR_WRITE) {
-               /* post an RDMA write operation */
-               if (librpma_fio_client_io_write(td, io_u))
-                       goto err;
-               if (ccd->flush(td, io_u, io_u, io_u->xfer_buflen))
-                       goto err;
-       } else {
-               log_err("unsupported IO mode: %s\n", io_ddir_name(io_u->ddir));
-               goto err;
-       }
-
-       do {
-               /* get a completion */
-               ret = rpma_cq_get_wc(ccd->cq, 1, &wc, NULL);
-               if (ret == RPMA_E_NO_COMPLETION) {
-                       /* lack of completion is not an error */
-                       continue;
-               } else if (ret != 0) {
-                       /* an error occurred */
-                       librpma_td_verror(td, ret, "rpma_cq_get_wc");
-                       goto err;
-               }
-
-               /* if io_us has completed with an error */
-               if (wc.status != IBV_WC_SUCCESS)
-                       goto err;
-
-               if (wc.opcode == IBV_WC_SEND)
-                       ++ccd->op_send_completed;
-               else {
-                       if (wc.opcode == IBV_WC_RECV)
-                               ++ccd->op_recv_completed;
-
-                       break;
-               }
-       } while (1);
-
-       if (ccd->get_io_u_index(&wc, &io_u_index) != 1)
-               goto err;
-
-       if (io_u->index != io_u_index) {
-               log_err(
-                       "no matching io_u for received completion found (io_u_index=%u)\n",
-                       io_u_index);
-               goto err;
-       }
-
-       /* make sure all SENDs are completed before exit - clean up SQ */
-       if (librpma_fio_client_io_complete_all_sends(td))
-               goto err;
-
-       return FIO_Q_COMPLETED;
-
-err:
-       io_u->error = -1;
-       return FIO_Q_COMPLETED;
-}
-
-enum fio_q_status librpma_fio_client_queue(struct thread_data *td,
-               struct io_u *io_u)
-{
-       struct librpma_fio_client_data *ccd = td->io_ops_data;
-
-       if (ccd->io_u_queued_nr == (int)td->o.iodepth)
-               return FIO_Q_BUSY;
-
-       if (td->o.sync_io)
-               return client_queue_sync(td, io_u);
-
-       /* io_u -> queued[] */
-       ccd->io_us_queued[ccd->io_u_queued_nr] = io_u;
-       ccd->io_u_queued_nr++;
-
-       return FIO_Q_QUEUED;
-}
-
-int librpma_fio_client_commit(struct thread_data *td)
-{
-       struct librpma_fio_client_data *ccd = td->io_ops_data;
-       int flags = RPMA_F_COMPLETION_ON_ERROR;
-       struct timespec now;
-       bool fill_time;
-       int i;
-       struct io_u *flush_first_io_u = NULL;
-       unsigned long long int flush_len = 0;
-
-       if (!ccd->io_us_queued)
-               return -1;
-
-       /* execute all io_us from queued[] */
-       for (i = 0; i < ccd->io_u_queued_nr; i++) {
-               struct io_u *io_u = ccd->io_us_queued[i];
-
-               if (io_u->ddir == DDIR_READ) {
-                       if (i + 1 == ccd->io_u_queued_nr ||
-                           ccd->io_us_queued[i + 1]->ddir == DDIR_WRITE)
-                               flags = RPMA_F_COMPLETION_ALWAYS;
-                       /* post an RDMA read operation */
-                       if (librpma_fio_client_io_read(td, io_u, flags))
-                               return -1;
-               } else if (io_u->ddir == DDIR_WRITE) {
-                       /* post an RDMA write operation */
-                       if (librpma_fio_client_io_write(td, io_u))
-                               return -1;
-
-                       /* cache the first io_u in the sequence */
-                       if (flush_first_io_u == NULL)
-                               flush_first_io_u = io_u;
-
-                       /*
-                        * the flush length is the sum of all io_u's creating
-                        * the sequence
-                        */
-                       flush_len += io_u->xfer_buflen;
-
-                       /*
-                        * if io_u's are random the rpma_flush is required
-                        * after each one of them
-                        */
-                       if (!td_random(td)) {
-                               /*
-                                * When the io_u's are sequential and
-                                * the current io_u is not the last one and
-                                * the next one is also a write operation
-                                * the flush can be postponed by one io_u and
-                                * cover all of them which build a continuous
-                                * sequence.
-                                */
-                               if ((i + 1 < ccd->io_u_queued_nr) &&
-                                   (ccd->io_us_queued[i + 1]->ddir == DDIR_WRITE))
-                                       continue;
-                       }
-
-                       /* flush all writes which build a continuous sequence */
-                       if (ccd->flush(td, flush_first_io_u, io_u, flush_len))
-                               return -1;
-
-                       /*
-                        * reset the flush parameters in preparation for
-                        * the next one
-                        */
-                       flush_first_io_u = NULL;
-                       flush_len = 0;
-               } else {
-                       log_err("unsupported IO mode: %s\n",
-                               io_ddir_name(io_u->ddir));
-                       return -1;
-               }
-       }
-
-       if ((fill_time = fio_fill_issue_time(td))) {
-               fio_gettime(&now, NULL);
-
-               /*
-                * only used for iolog
-                */
-               if (td->o.read_iolog_file)
-                       memcpy(&td->last_issue, &now, sizeof(now));
-
-       }
-       /* move executed io_us from queued[] to flight[] */
-       for (i = 0; i < ccd->io_u_queued_nr; i++) {
-               struct io_u *io_u = ccd->io_us_queued[i];
-
-               /* FIO does not do this if the engine is asynchronous */
-               if (fill_time)
-                       memcpy(&io_u->issue_time, &now, sizeof(now));
-
-               /* move executed io_us from queued[] to flight[] */
-               ccd->io_us_flight[ccd->io_u_flight_nr] = io_u;
-               ccd->io_u_flight_nr++;
-
-               /*
-                * FIO says:
-                * If an engine has the commit hook
-                * it has to call io_u_queued() itself.
-                */
-               io_u_queued(td, io_u);
-       }
-
-       /* FIO does not do this if an engine has the commit hook. */
-       io_u_mark_submit(td, ccd->io_u_queued_nr);
-       ccd->io_u_queued_nr = 0;
-
-       return 0;
-}
-
-/*
- * RETURN VALUE
- * - > 0  - a number of completed io_us
- * -   0  - when no complicitions received
- * - (-1) - when an error occurred
- */
-static int client_getevent_process(struct thread_data *td)
-{
-       struct librpma_fio_client_data *ccd = td->io_ops_data;
-       struct ibv_wc wc;
-       /* io_u->index of completed io_u (wc.wr_id) */
-       unsigned int io_u_index;
-       /* # of completed io_us */
-       int cmpl_num = 0;
-       /* helpers */
-       struct io_u *io_u;
-       int i;
-       int ret;
-
-       /* get a completion */
-       if ((ret = rpma_cq_get_wc(ccd->cq, 1, &wc, NULL))) {
-               /* lack of completion is not an error */
-               if (ret == RPMA_E_NO_COMPLETION) {
-                       /* lack of completion is not an error */
-                       return 0;
-               }
-
-               /* an error occurred */
-               librpma_td_verror(td, ret, "rpma_cq_get_wc");
-               return -1;
-       }
-
-       /* if io_us has completed with an error */
-       if (wc.status != IBV_WC_SUCCESS) {
-               td->error = wc.status;
-               return -1;
-       }
-
-       if (wc.opcode == IBV_WC_SEND)
-               ++ccd->op_send_completed;
-       else if (wc.opcode == IBV_WC_RECV)
-               ++ccd->op_recv_completed;
-
-       if ((ret = ccd->get_io_u_index(&wc, &io_u_index)) != 1)
-               return ret;
-
-       /* look for an io_u being completed */
-       for (i = 0; i < ccd->io_u_flight_nr; ++i) {
-               if (ccd->io_us_flight[i]->index == io_u_index) {
-                       cmpl_num = i + 1;
-                       break;
-               }
-       }
-
-       /* if no matching io_u has been found */
-       if (cmpl_num == 0) {
-               log_err(
-                       "no matching io_u for received completion found (io_u_index=%u)\n",
-                       io_u_index);
-               return -1;
-       }
-
-       /* move completed io_us to the completed in-memory queue */
-       for (i = 0; i < cmpl_num; ++i) {
-               /* get and prepare io_u */
-               io_u = ccd->io_us_flight[i];
-
-               /* append to the queue */
-               ccd->io_us_completed[ccd->io_u_completed_nr] = io_u;
-               ccd->io_u_completed_nr++;
-       }
-
-       /* remove completed io_us from the flight queue */
-       for (i = cmpl_num; i < ccd->io_u_flight_nr; ++i)
-               ccd->io_us_flight[i - cmpl_num] = ccd->io_us_flight[i];
-       ccd->io_u_flight_nr -= cmpl_num;
-
-       return cmpl_num;
-}
-
-int librpma_fio_client_getevents(struct thread_data *td, unsigned int min,
-               unsigned int max, const struct timespec *t)
-{
-       struct librpma_fio_client_data *ccd = td->io_ops_data;
-       /* total # of completed io_us */
-       int cmpl_num_total = 0;
-       /* # of completed io_us from a single event */
-       int cmpl_num;
-
-       do {
-               cmpl_num = client_getevent_process(td);
-               if (cmpl_num > 0) {
-                       /* new completions collected */
-                       cmpl_num_total += cmpl_num;
-               } else if (cmpl_num == 0) {
-                       /*
-                        * It is required to make sure that CQEs for SENDs
-                        * will flow at least at the same pace as CQEs for RECVs.
-                        */
-                       if (cmpl_num_total >= min &&
-                           ccd->op_send_completed >= ccd->op_recv_completed)
-                               break;
-
-                       /*
-                        * To reduce CPU consumption one can use
-                        * the rpma_cq_wait() function.
-                        * Note this greatly increase the latency
-                        * and make the results less stable.
-                        * The bandwidth stays more or less the same.
-                        */
-               } else {
-                       /* an error occurred */
-                       return -1;
-               }
-
-               /*
-                * The expected max can be exceeded if CQEs for RECVs will come up
-                * faster than CQEs for SENDs. But it is required to make sure CQEs for
-                * SENDs will flow at least at the same pace as CQEs for RECVs.
-                */
-       } while (cmpl_num_total < max ||
-                       ccd->op_send_completed < ccd->op_recv_completed);
-
-       /*
-        * All posted SENDs are completed and RECVs for them (responses) are
-        * completed. This is the initial situation so the counters are reset.
-        */
-       if (ccd->op_send_posted == ccd->op_send_completed &&
-                       ccd->op_send_completed == ccd->op_recv_completed) {
-               ccd->op_send_posted = 0;
-               ccd->op_send_completed = 0;
-               ccd->op_recv_completed = 0;
-       }
-
-       return cmpl_num_total;
-}
-
-struct io_u *librpma_fio_client_event(struct thread_data *td, int event)
-{
-       struct librpma_fio_client_data *ccd = td->io_ops_data;
-       struct io_u *io_u;
-       int i;
-
-       /* get the first io_u from the queue */
-       io_u = ccd->io_us_completed[0];
-
-       /* remove the first io_u from the queue */
-       for (i = 1; i < ccd->io_u_completed_nr; ++i)
-               ccd->io_us_completed[i - 1] = ccd->io_us_completed[i];
-       ccd->io_u_completed_nr--;
-
-       dprint_io_u(io_u, "client_event");
-
-       return io_u;
-}
-
-char *librpma_fio_client_errdetails(struct io_u *io_u)
-{
-       /* get the string representation of an error */
-       enum ibv_wc_status status = io_u->error;
-       const char *status_str = ibv_wc_status_str(status);
-
-       char *details = strdup(status_str);
-       if (details == NULL) {
-               fprintf(stderr, "Error: %s\n", status_str);
-               fprintf(stderr, "Fatal error: out of memory. Aborting.\n");
-               abort();
-       }
-
-       /* FIO frees the returned string when it becomes obsolete */
-       return details;
-}
-
-int librpma_fio_server_init(struct thread_data *td)
-{
-       struct librpma_fio_options_values *o = td->eo;
-       struct librpma_fio_server_data *csd;
-       struct ibv_context *dev = NULL;
-       enum rpma_log_level log_level_aux = RPMA_LOG_LEVEL_WARNING;
-       int ret = -1;
-
-       /* --debug=net sets RPMA_LOG_THRESHOLD_AUX to RPMA_LOG_LEVEL_INFO */
-#ifdef FIO_INC_DEBUG
-       if ((1UL << FD_NET) & fio_debug)
-               log_level_aux = RPMA_LOG_LEVEL_INFO;
-#endif
-
-       /* configure logging thresholds to see more details */
-       rpma_log_set_threshold(RPMA_LOG_THRESHOLD, RPMA_LOG_LEVEL_INFO);
-       rpma_log_set_threshold(RPMA_LOG_THRESHOLD_AUX, log_level_aux);
-
-
-       /* obtain an IBV context for a remote IP address */
-       if ((ret = rpma_utils_get_ibv_context(o->server_ip,
-                       RPMA_UTIL_IBV_CONTEXT_LOCAL, &dev))) {
-               librpma_td_verror(td, ret, "rpma_utils_get_ibv_context");
-               return -1;
-       }
-
-       /* allocate server's data */
-       csd = calloc(1, sizeof(*csd));
-       if (csd == NULL) {
-               td_verror(td, errno, "calloc");
-               return -1;
-       }
-
-       /* create a new peer object */
-       if ((ret = rpma_peer_new(dev, &csd->peer))) {
-               librpma_td_verror(td, ret, "rpma_peer_new");
-               goto err_free_csd;
-       }
-
-       td->io_ops_data = csd;
-
-       return 0;
-
-err_free_csd:
-       free(csd);
-
-       return -1;
-}
-
-void librpma_fio_server_cleanup(struct thread_data *td)
-{
-       struct librpma_fio_server_data *csd =  td->io_ops_data;
-       int ret;
-
-       if (csd == NULL)
-               return;
-
-       /* free the peer */
-       if ((ret = rpma_peer_delete(&csd->peer)))
-               librpma_td_verror(td, ret, "rpma_peer_delete");
-
-       free(csd);
-}
-
-int librpma_fio_server_open_file(struct thread_data *td, struct fio_file *f,
-               struct rpma_conn_cfg *cfg)
-{
-       struct librpma_fio_server_data *csd = td->io_ops_data;
-       struct librpma_fio_options_values *o = td->eo;
-       enum rpma_conn_event conn_event = RPMA_CONN_UNDEFINED;
-       struct librpma_fio_workspace ws = {0};
-       struct rpma_conn_private_data pdata;
-       uint32_t max_msg_num;
-       struct rpma_conn_req *conn_req;
-       struct rpma_conn *conn;
-       struct rpma_mr_local *mr;
-       char port_td[LIBRPMA_FIO_PORT_STR_LEN_MAX];
-       struct rpma_ep *ep;
-       size_t mem_size = td->o.size;
-       size_t mr_desc_size;
-       void *ws_ptr;
-       bool is_dram;
-       int usage_mem_type;
-       int ret;
-
-       if (!f->file_name) {
-               log_err("fio: filename is not set\n");
-               return -1;
-       }
-
-       /* start a listening endpoint at addr:port */
-       if (librpma_fio_td_port(o->port, td, port_td))
-               return -1;
-
-       if ((ret = rpma_ep_listen(csd->peer, o->server_ip, port_td, &ep))) {
-               librpma_td_verror(td, ret, "rpma_ep_listen");
-               return -1;
-       }
-
-       is_dram = !strcmp(f->file_name, "malloc");
-       if (is_dram) {
-               /* allocation from DRAM using posix_memalign() */
-               ws_ptr = librpma_fio_allocate_dram(td, mem_size, &csd->mem);
-               usage_mem_type = RPMA_MR_USAGE_FLUSH_TYPE_VISIBILITY;
-       } else {
-               /* allocation from PMEM using pmem_map_file() */
-               ws_ptr = librpma_fio_allocate_pmem(td, f, mem_size, &csd->mem);
-               usage_mem_type = RPMA_MR_USAGE_FLUSH_TYPE_PERSISTENT;
-       }
-
-       if (ws_ptr == NULL)
-               goto err_ep_shutdown;
-
-       f->real_file_size = mem_size;
-
-       if ((ret = rpma_mr_reg(csd->peer, ws_ptr, mem_size,
-                       RPMA_MR_USAGE_READ_DST | RPMA_MR_USAGE_READ_SRC |
-                       RPMA_MR_USAGE_WRITE_DST | RPMA_MR_USAGE_WRITE_SRC |
-                       usage_mem_type, &mr))) {
-               librpma_td_verror(td, ret, "rpma_mr_reg");
-               goto err_free;
-       }
-
-       if (!is_dram && f->filetype == FIO_TYPE_FILE) {
-               ret = rpma_mr_advise(mr, 0, mem_size,
-                               IBV_ADVISE_MR_ADVICE_PREFETCH_WRITE,
-                               IBV_ADVISE_MR_FLAG_FLUSH);
-               if (ret) {
-                       librpma_td_verror(td, ret, "rpma_mr_advise");
-                       /* an invalid argument is an error */
-                       if (ret == RPMA_E_INVAL)
-                               goto err_mr_dereg;
-
-                       /* log_err used instead of log_info to avoid corruption of the JSON output */
-                       log_err("Note: having rpma_mr_advise(3) failed because of RPMA_E_NOSUPP or RPMA_E_PROVIDER may come with a performance penalty, but it is not a blocker for running the benchmark.\n");
-               }
-       }
-
-       /* get size of the memory region's descriptor */
-       if ((ret = rpma_mr_get_descriptor_size(mr, &mr_desc_size))) {
-               librpma_td_verror(td, ret, "rpma_mr_get_descriptor_size");
-               goto err_mr_dereg;
-       }
-
-       /* verify size of the memory region's descriptor */
-       if (mr_desc_size > LIBRPMA_FIO_DESCRIPTOR_MAX_SIZE) {
-               log_err(
-                       "size of the memory region's descriptor is too big (max=%i)\n",
-                       LIBRPMA_FIO_DESCRIPTOR_MAX_SIZE);
-               goto err_mr_dereg;
-       }
-
-       /* get the memory region's descriptor */
-       if ((ret = rpma_mr_get_descriptor(mr, &ws.descriptor[0]))) {
-               librpma_td_verror(td, ret, "rpma_mr_get_descriptor");
-               goto err_mr_dereg;
-       }
-
-       if (cfg != NULL) {
-               if ((ret = rpma_conn_cfg_get_rq_size(cfg, &max_msg_num))) {
-                       librpma_td_verror(td, ret, "rpma_conn_cfg_get_rq_size");
-                       goto err_mr_dereg;
-               }
-
-               /* verify whether iodepth fits into uint16_t */
-               if (max_msg_num > UINT16_MAX) {
-                       log_err("fio: iodepth too big (%u > %u)\n",
-                               max_msg_num, UINT16_MAX);
-                       return -1;
-               }
-
-               ws.max_msg_num = max_msg_num;
-       }
-
-       /* prepare a workspace description */
-       ws.direct_write_to_pmem = o->direct_write_to_pmem;
-       ws.mr_desc_size = mr_desc_size;
-       pdata.ptr = &ws;
-       pdata.len = sizeof(ws);
-
-       /* receive an incoming connection request */
-       if ((ret = rpma_ep_next_conn_req(ep, cfg, &conn_req))) {
-               librpma_td_verror(td, ret, "rpma_ep_next_conn_req");
-               goto err_mr_dereg;
-       }
-
-       if (csd->prepare_connection && csd->prepare_connection(td, conn_req))
-               goto err_req_delete;
-
-       /* accept the connection request and obtain the connection object */
-       if ((ret = rpma_conn_req_connect(&conn_req, &pdata, &conn))) {
-               librpma_td_verror(td, ret, "rpma_conn_req_connect");
-               goto err_req_delete;
-       }
-
-       /* wait for the connection to be established */
-       if ((ret = rpma_conn_next_event(conn, &conn_event))) {
-               librpma_td_verror(td, ret, "rpma_conn_next_event");
-               goto err_conn_delete;
-       } else if (conn_event != RPMA_CONN_ESTABLISHED) {
-               log_err("rpma_conn_next_event returned an unexptected event\n");
-               goto err_conn_delete;
-       }
-
-       /* end-point is no longer needed */
-       (void) rpma_ep_shutdown(&ep);
-
-       csd->ws_mr = mr;
-       csd->ws_ptr = ws_ptr;
-       csd->conn = conn;
-
-       /* get the connection's main CQ */
-       if ((ret = rpma_conn_get_cq(csd->conn, &csd->cq))) {
-               librpma_td_verror(td, ret, "rpma_conn_get_cq");
-               goto err_conn_delete;
-       }
-
-       return 0;
-
-err_conn_delete:
-       (void) rpma_conn_delete(&conn);
-
-err_req_delete:
-       (void) rpma_conn_req_delete(&conn_req);
-
-err_mr_dereg:
-       (void) rpma_mr_dereg(&mr);
-
-err_free:
-       librpma_fio_free(&csd->mem);
-
-err_ep_shutdown:
-       (void) rpma_ep_shutdown(&ep);
-
-       return -1;
-}
-
-int librpma_fio_server_close_file(struct thread_data *td, struct fio_file *f)
-{
-       struct librpma_fio_server_data *csd = td->io_ops_data;
-       enum rpma_conn_event conn_event = RPMA_CONN_UNDEFINED;
-       int rv = 0;
-       int ret;
-
-       /* wait for the connection to be closed */
-       ret = rpma_conn_next_event(csd->conn, &conn_event);
-       if (!ret && conn_event != RPMA_CONN_CLOSED) {
-               log_err("rpma_conn_next_event returned an unexptected event\n");
-               rv = -1;
-       }
-
-       if ((ret = rpma_conn_disconnect(csd->conn))) {
-               librpma_td_verror(td, ret, "rpma_conn_disconnect");
-               rv = -1;
-       }
-
-       if ((ret = rpma_conn_delete(&csd->conn))) {
-               librpma_td_verror(td, ret, "rpma_conn_delete");
-               rv = -1;
-       }
-
-       if ((ret = rpma_mr_dereg(&csd->ws_mr))) {
-               librpma_td_verror(td, ret, "rpma_mr_dereg");
-               rv = -1;
-       }
-
-       librpma_fio_free(&csd->mem);
-
-       return rv;
-}
diff --git a/engines/librpma_fio.h b/engines/librpma_fio.h
deleted file mode 100644 (file)
index 480ded1..0000000
+++ /dev/null
@@ -1,282 +0,0 @@
-/*
- * librpma_fio: librpma_apm and librpma_gpspm engines' common header.
- *
- * Copyright 2021-2022, Intel Corporation
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License,
- * version 2 as published by the Free Software Foundation..
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- */
-
-#ifndef LIBRPMA_FIO_H
-#define LIBRPMA_FIO_H 1
-
-#include "../fio.h"
-#include "../optgroup.h"
-
-#include <librpma.h>
-
-/* servers' and clients' common */
-
-#define librpma_td_verror(td, err, func) \
-       td_vmsg((td), (err), rpma_err_2str(err), (func))
-
-/* ceil(a / b) = (a + b - 1) / b */
-#define LIBRPMA_FIO_CEIL(a, b) (((a) + (b) - 1) / (b))
-
-/* common option structure for server and client */
-struct librpma_fio_options_values {
-       /*
-        * FIO considers .off1 == 0 absent so the first meaningful field has to
-        * have padding ahead of it.
-        */
-       void *pad;
-       char *server_ip;
-       /* base server listening port */
-       char *port;
-       /* Direct Write to PMem is possible */
-       unsigned int direct_write_to_pmem;
-       /* Set to 0 to wait for completion instead of busy-wait polling completion. */
-       unsigned int busy_wait_polling;
-};
-
-extern struct fio_option librpma_fio_options[];
-
-/*
- * Limited by the maximum length of the private data
- * for rdma_connect() in case of RDMA_PS_TCP (28 bytes).
- */
-#define LIBRPMA_FIO_DESCRIPTOR_MAX_SIZE 24
-
-struct librpma_fio_workspace {
-       uint16_t max_msg_num;   /* # of RQ slots */
-       uint8_t direct_write_to_pmem; /* Direct Write to PMem is possible */
-       uint8_t mr_desc_size;   /* size of mr_desc in descriptor[] */
-       /* buffer containing mr_desc */
-       char descriptor[LIBRPMA_FIO_DESCRIPTOR_MAX_SIZE];
-};
-
-#define LIBRPMA_FIO_PORT_STR_LEN_MAX 12
-
-int librpma_fio_td_port(const char *port_base_str, struct thread_data *td,
-               char *port_out);
-
-struct librpma_fio_mem {
-       /* memory buffer */
-       char *mem_ptr;
-
-       /* size of the mapped persistent memory */
-       size_t size_mmap;
-
-#ifdef CONFIG_LIBPMEM2_INSTALLED
-       /* libpmem2 structure used for mapping PMem */
-       struct pmem2_map *map;
-#endif
-};
-
-char *librpma_fio_allocate_dram(struct thread_data *td, size_t size,
-               struct librpma_fio_mem *mem);
-
-char *librpma_fio_allocate_pmem(struct thread_data *td, struct fio_file *f,
-               size_t size, struct librpma_fio_mem *mem);
-
-void librpma_fio_free(struct librpma_fio_mem *mem);
-
-/* clients' common */
-
-typedef int (*librpma_fio_flush_t)(struct thread_data *td,
-               struct io_u *first_io_u, struct io_u *last_io_u,
-               unsigned long long int len);
-
-/*
- * RETURN VALUE
- * - ( 1) - on success
- * - ( 0) - skip
- * - (-1) - on error
- */
-typedef int (*librpma_fio_get_io_u_index_t)(struct ibv_wc *wc,
-               unsigned int *io_u_index);
-
-struct librpma_fio_client_data {
-       struct rpma_peer *peer;
-       struct rpma_conn *conn;
-       struct rpma_cq *cq;
-
-       /* aligned td->orig_buffer */
-       char *orig_buffer_aligned;
-
-       /* ious's base address memory registration (cd->orig_buffer_aligned) */
-       struct rpma_mr_local *orig_mr;
-
-       struct librpma_fio_workspace *ws;
-
-       /* a server's memory representation */
-       struct rpma_mr_remote *server_mr;
-       enum rpma_flush_type server_mr_flush_type;
-
-       /* remote workspace description */
-       size_t ws_size;
-
-       /* in-memory queues */
-       struct io_u **io_us_queued;
-       int io_u_queued_nr;
-       struct io_u **io_us_flight;
-       int io_u_flight_nr;
-       struct io_u **io_us_completed;
-       int io_u_completed_nr;
-
-       /* SQ control. Note: all of them have to be kept in sync. */
-       uint32_t op_send_posted;
-       uint32_t op_send_completed;
-       uint32_t op_recv_completed;
-
-       librpma_fio_flush_t flush;
-       librpma_fio_get_io_u_index_t get_io_u_index;
-
-       /* engine-specific client data */
-       void *client_data;
-};
-
-int librpma_fio_client_init(struct thread_data *td,
-               struct rpma_conn_cfg *cfg);
-void librpma_fio_client_cleanup(struct thread_data *td);
-
-int librpma_fio_file_nop(struct thread_data *td, struct fio_file *f);
-int librpma_fio_client_get_file_size(struct thread_data *td,
-               struct fio_file *f);
-
-int librpma_fio_client_post_init(struct thread_data *td);
-
-enum fio_q_status librpma_fio_client_queue(struct thread_data *td,
-               struct io_u *io_u);
-
-int librpma_fio_client_commit(struct thread_data *td);
-
-int librpma_fio_client_getevents(struct thread_data *td, unsigned int min,
-               unsigned int max, const struct timespec *t);
-
-struct io_u *librpma_fio_client_event(struct thread_data *td, int event);
-
-char *librpma_fio_client_errdetails(struct io_u *io_u);
-
-static inline int librpma_fio_client_io_read(struct thread_data *td,
-               struct io_u *io_u, int flags)
-{
-       struct librpma_fio_client_data *ccd = td->io_ops_data;
-       size_t dst_offset = (char *)(io_u->xfer_buf) - ccd->orig_buffer_aligned;
-       size_t src_offset = io_u->offset;
-       int ret;
-
-       if ((ret = rpma_read(ccd->conn, ccd->orig_mr, dst_offset,
-                       ccd->server_mr, src_offset, io_u->xfer_buflen,
-                       flags, (void *)(uintptr_t)io_u->index))) {
-               librpma_td_verror(td, ret, "rpma_read");
-               return -1;
-       }
-
-       return 0;
-}
-
-static inline int librpma_fio_client_io_write(struct thread_data *td,
-               struct io_u *io_u)
-{
-       struct librpma_fio_client_data *ccd = td->io_ops_data;
-       size_t src_offset = (char *)(io_u->xfer_buf) - ccd->orig_buffer_aligned;
-       size_t dst_offset = io_u->offset;
-       int ret;
-
-       if ((ret = rpma_write(ccd->conn, ccd->server_mr, dst_offset,
-                       ccd->orig_mr, src_offset, io_u->xfer_buflen,
-                       RPMA_F_COMPLETION_ON_ERROR,
-                       (void *)(uintptr_t)io_u->index))) {
-               librpma_td_verror(td, ret, "rpma_write");
-               return -1;
-       }
-
-       return 0;
-}
-
-static inline int librpma_fio_client_io_complete_all_sends(
-               struct thread_data *td)
-{
-       struct librpma_fio_client_data *ccd = td->io_ops_data;
-       struct ibv_wc wc;
-       int ret;
-
-       while (ccd->op_send_posted != ccd->op_send_completed) {
-               /* get a completion */
-               ret = rpma_cq_get_wc(ccd->cq, 1, &wc, NULL);
-               if (ret == RPMA_E_NO_COMPLETION) {
-                       /* lack of completion is not an error */
-                       continue;
-               } else if (ret != 0) {
-                       /* an error occurred */
-                       librpma_td_verror(td, ret, "rpma_cq_get_wc");
-                       break;
-               }
-
-               if (wc.status != IBV_WC_SUCCESS)
-                       return -1;
-
-               if (wc.opcode == IBV_WC_SEND)
-                       ++ccd->op_send_completed;
-               else {
-                       log_err(
-                               "A completion other than IBV_WC_SEND got during cleaning up the CQ from SENDs\n");
-                       return -1;
-               }
-       }
-
-       /*
-        * All posted SENDs are completed and RECVs for them (responses) are
-        * completed. This is the initial situation so the counters are reset.
-        */
-       if (ccd->op_send_posted == ccd->op_send_completed &&
-                       ccd->op_send_completed == ccd->op_recv_completed) {
-               ccd->op_send_posted = 0;
-               ccd->op_send_completed = 0;
-               ccd->op_recv_completed = 0;
-       }
-
-       return 0;
-}
-
-/* servers' common */
-
-typedef int (*librpma_fio_prepare_connection_t)(
-               struct thread_data *td,
-               struct rpma_conn_req *conn_req);
-
-struct librpma_fio_server_data {
-       struct rpma_peer *peer;
-
-       /* resources of an incoming connection */
-       struct rpma_conn *conn;
-       struct rpma_cq *cq;
-
-       char *ws_ptr;
-       struct rpma_mr_local *ws_mr;
-       struct librpma_fio_mem mem;
-
-       /* engine-specific server data */
-       void *server_data;
-
-       librpma_fio_prepare_connection_t prepare_connection;
-};
-
-int librpma_fio_server_init(struct thread_data *td);
-
-void librpma_fio_server_cleanup(struct thread_data *td);
-
-int librpma_fio_server_open_file(struct thread_data *td,
-               struct fio_file *f, struct rpma_conn_cfg *cfg);
-
-int librpma_fio_server_close_file(struct thread_data *td,
-               struct fio_file *f);
-
-#endif /* LIBRPMA_FIO_H */
diff --git a/engines/librpma_fio_pmem.h b/engines/librpma_fio_pmem.h
deleted file mode 100644 (file)
index 4854292..0000000
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * librpma_fio_pmem: allocates pmem using libpmem.
- *
- * Copyright 2022, Intel Corporation
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License,
- * version 2 as published by the Free Software Foundation..
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- */
-
-#include <libpmem.h>
-#include "librpma_fio.h"
-
-#define RPMA_PMEM_USED "libpmem"
-
-static int librpma_fio_pmem_map_file(struct fio_file *f, size_t size,
-               struct librpma_fio_mem *mem, size_t ws_offset)
-{
-       int is_pmem = 0;
-       size_t size_mmap = 0;
-
-       /* map the file */
-       mem->mem_ptr = pmem_map_file(f->file_name, 0 /* len */, 0 /* flags */,
-                       0 /* mode */, &size_mmap, &is_pmem);
-       if (mem->mem_ptr == NULL) {
-               /* pmem_map_file() sets errno on failure */
-               log_err("fio: pmem_map_file(%s) failed: %s (errno %i)\n",
-                       f->file_name, strerror(errno), errno);
-               return -1;
-       }
-
-       /* pmem is expected */
-       if (!is_pmem) {
-               log_err("fio: %s is not located in persistent memory\n",
-                       f->file_name);
-               goto err_unmap;
-       }
-
-       /* check size of allocated persistent memory */
-       if (size_mmap < ws_offset + size) {
-               log_err(
-                       "fio: %s is too small to handle so many threads (%zu < %zu)\n",
-                       f->file_name, size_mmap, ws_offset + size);
-               goto err_unmap;
-       }
-
-       log_info("fio: size of memory mapped from the file %s: %zu\n",
-               f->file_name, size_mmap);
-
-       mem->size_mmap = size_mmap;
-
-       return 0;
-
-err_unmap:
-       (void) pmem_unmap(mem->mem_ptr, size_mmap);
-       return -1;
-}
-
-static inline void librpma_fio_unmap(struct librpma_fio_mem *mem)
-{
-       (void) pmem_unmap(mem->mem_ptr, mem->size_mmap);
-}
diff --git a/engines/librpma_fio_pmem2.h b/engines/librpma_fio_pmem2.h
deleted file mode 100644 (file)
index 09a51f5..0000000
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * librpma_fio_pmem2: allocates pmem using libpmem2.
- *
- * Copyright 2022, Intel Corporation
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License,
- * version 2 as published by the Free Software Foundation..
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- */
-
-#include <libpmem2.h>
-#include "librpma_fio.h"
-
-#define RPMA_PMEM_USED "libpmem2"
-
-static int librpma_fio_pmem_map_file(struct fio_file *f, size_t size,
-               struct librpma_fio_mem *mem, size_t ws_offset)
-{
-       int fd;
-       struct pmem2_config *cfg = NULL;
-       struct pmem2_map *map = NULL;
-       struct pmem2_source *src = NULL;
-
-       size_t size_mmap;
-
-       if((fd = open(f->file_name, O_RDWR)) < 0) {
-               log_err("fio: cannot open fio file\n");
-               return -1;
-       }
-
-       if (pmem2_source_from_fd(&src, fd) != 0) {
-               log_err("fio: pmem2_source_from_fd() failed\n");
-               goto err_close;
-       }
-
-       if (pmem2_config_new(&cfg) != 0) {
-               log_err("fio: pmem2_config_new() failed\n");
-               goto err_source_delete;
-       }
-
-       if (pmem2_config_set_required_store_granularity(cfg,
-                                       PMEM2_GRANULARITY_CACHE_LINE) != 0) {
-               log_err("fio: pmem2_config_set_required_store_granularity() failed: %s\n", pmem2_errormsg());
-               goto err_config_delete;
-       }
-
-       if (pmem2_map_new(&map, cfg, src) != 0) {
-               log_err("fio: pmem2_map_new(%s) failed: %s\n", f->file_name, pmem2_errormsg());
-               goto err_config_delete;
-       }
-
-       size_mmap = pmem2_map_get_size(map);
-
-       /* check size of allocated persistent memory */
-       if (size_mmap < ws_offset + size) {
-               log_err(
-                       "fio: %s is too small to handle so many threads (%zu < %zu)\n",
-                       f->file_name, size_mmap, ws_offset + size);
-               goto err_map_delete;
-       }
-
-       mem->mem_ptr = pmem2_map_get_address(map);
-       mem->size_mmap = size_mmap;
-       mem->map = map;
-       pmem2_config_delete(&cfg);
-       pmem2_source_delete(&src);
-       close(fd);
-
-       return 0;
-
-err_map_delete:
-       pmem2_map_delete(&map);
-err_config_delete:
-       pmem2_config_delete(&cfg);
-err_source_delete:
-       pmem2_source_delete(&src);
-err_close:
-       close(fd);
-
-       return -1;
-}
-
-static inline void librpma_fio_unmap(struct librpma_fio_mem *mem)
-{
-       (void) pmem2_map_delete(&mem->map);
-}
diff --git a/engines/librpma_gpspm.c b/engines/librpma_gpspm.c
deleted file mode 100644 (file)
index 70116d0..0000000
+++ /dev/null
@@ -1,784 +0,0 @@
-/*
- * librpma_gpspm: IO engine that uses PMDK librpma to write data,
- *             based on General Purpose Server Persistency Method
- *
- * Copyright 2020-2022, Intel Corporation
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License,
- * version 2 as published by the Free Software Foundation..
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- */
-
-#include "librpma_fio.h"
-
-#ifdef CONFIG_LIBPMEM2_INSTALLED
-#include <libpmem2.h>
-#else
-#include <libpmem.h>
-#endif
-
-/* Generated by the protocol buffer compiler from: librpma_gpspm_flush.proto */
-#include "librpma_gpspm_flush.pb-c.h"
-
-#define MAX_MSG_SIZE (512)
-#define IO_U_BUF_LEN (2 * MAX_MSG_SIZE)
-#define SEND_OFFSET (0)
-#define RECV_OFFSET (SEND_OFFSET + MAX_MSG_SIZE)
-
-#define GPSPM_FLUSH_REQUEST__LAST \
-       { PROTOBUF_C_MESSAGE_INIT(&gpspm_flush_request__descriptor), 0, 0, 0 }
-
-/*
- * 'Flush_req_last' is the last flush request
- * the client has to send to server to indicate
- * that the client is done.
- */
-static const GPSPMFlushRequest Flush_req_last = GPSPM_FLUSH_REQUEST__LAST;
-
-#define IS_NOT_THE_LAST_MESSAGE(flush_req) \
-       (flush_req->length != Flush_req_last.length || \
-       flush_req->offset != Flush_req_last.offset)
-
-/* client side implementation */
-
-/* get next io_u message buffer in the round-robin fashion */
-#define IO_U_NEXT_BUF_OFF_CLIENT(cd) \
-       (IO_U_BUF_LEN * ((cd->msg_curr++) % cd->msg_num))
-
-struct client_data {
-       /* memory for sending and receiving buffered */
-       char *io_us_msgs;
-
-       /* resources for messaging buffer */
-       uint32_t msg_num;
-       uint32_t msg_curr;
-       struct rpma_mr_local *msg_mr;
-};
-
-static inline int client_io_flush(struct thread_data *td,
-               struct io_u *first_io_u, struct io_u *last_io_u,
-               unsigned long long int len);
-
-static int client_get_io_u_index(struct ibv_wc *wc, unsigned int *io_u_index);
-
-static int client_init(struct thread_data *td)
-{
-       struct librpma_fio_client_data *ccd;
-       struct client_data *cd;
-       uint32_t write_num;
-       struct rpma_conn_cfg *cfg = NULL;
-       int ret;
-
-       /*
-        * not supported:
-        * - readwrite = read / trim / randread / randtrim /
-        *               / rw / randrw / trimwrite
-        */
-       if (td_read(td) || td_trim(td)) {
-               td_verror(td, EINVAL, "Not supported mode.");
-               return -1;
-       }
-
-       /* allocate client's data */
-       cd = calloc(1, sizeof(*cd));
-       if (cd == NULL) {
-               td_verror(td, errno, "calloc");
-               return -1;
-       }
-
-       /*
-        * Calculate the required number of WRITEs and FLUSHes.
-        *
-        * Note: Each flush is a request (SEND) and response (RECV) pair.
-        */
-       if (td_random(td)) {
-               write_num = td->o.iodepth; /* WRITE * N */
-               cd->msg_num = td->o.iodepth; /* FLUSH * N */
-       } else {
-               if (td->o.sync_io) {
-                       write_num = 1; /* WRITE */
-                       cd->msg_num = 1; /* FLUSH */
-               } else {
-                       write_num = td->o.iodepth; /* WRITE * N */
-                       /*
-                        * FLUSH * B where:
-                        * - B == ceil(iodepth / iodepth_batch)
-                        *   which is the number of batches for N writes
-                        */
-                       cd->msg_num = LIBRPMA_FIO_CEIL(td->o.iodepth,
-                                       td->o.iodepth_batch);
-               }
-       }
-
-       /* create a connection configuration object */
-       if ((ret = rpma_conn_cfg_new(&cfg))) {
-               librpma_td_verror(td, ret, "rpma_conn_cfg_new");
-               goto err_free_cd;
-       }
-
-       /*
-        * Calculate the required queue sizes where:
-        * - the send queue (SQ) has to be big enough to accommodate
-        *   all io_us (WRITEs) and all flush requests (SENDs)
-        * - the receive queue (RQ) has to be big enough to accommodate
-        *   all flush responses (RECVs)
-        * - the completion queue (CQ) has to be big enough to accommodate all
-        *   success and error completions (sq_size + rq_size)
-        */
-       if ((ret = rpma_conn_cfg_set_sq_size(cfg, write_num + cd->msg_num))) {
-               librpma_td_verror(td, ret, "rpma_conn_cfg_set_sq_size");
-               goto err_cfg_delete;
-       }
-       if ((ret = rpma_conn_cfg_set_rq_size(cfg, cd->msg_num))) {
-               librpma_td_verror(td, ret, "rpma_conn_cfg_set_rq_size");
-               goto err_cfg_delete;
-       }
-       if ((ret = rpma_conn_cfg_set_cq_size(cfg, write_num + cd->msg_num * 2))) {
-               librpma_td_verror(td, ret, "rpma_conn_cfg_set_cq_size");
-               goto err_cfg_delete;
-       }
-
-       if (librpma_fio_client_init(td, cfg))
-               goto err_cfg_delete;
-
-       ccd = td->io_ops_data;
-
-       if (ccd->ws->direct_write_to_pmem &&
-           ccd->server_mr_flush_type == RPMA_FLUSH_TYPE_PERSISTENT &&
-           td->thread_number == 1) {
-               /* XXX log_info mixes with the JSON output */
-               log_err(
-                       "Note: The server side supports Direct Write to PMem and it is equipped with PMem (direct_write_to_pmem).\n"
-                       "You can use librpma_client and librpma_server engines for better performance instead of GPSPM.\n");
-       }
-
-       /* validate the server's RQ capacity */
-       if (cd->msg_num > ccd->ws->max_msg_num) {
-               log_err(
-                       "server's RQ size (iodepth) too small to handle the client's workspace requirements (%u < %u)\n",
-                       ccd->ws->max_msg_num, cd->msg_num);
-               goto err_cleanup_common;
-       }
-
-       if ((ret = rpma_conn_cfg_delete(&cfg))) {
-               librpma_td_verror(td, ret, "rpma_conn_cfg_delete");
-               /* non fatal error - continue */
-       }
-
-       ccd->flush = client_io_flush;
-       ccd->get_io_u_index = client_get_io_u_index;
-       ccd->client_data = cd;
-
-       return 0;
-
-err_cleanup_common:
-       librpma_fio_client_cleanup(td);
-
-err_cfg_delete:
-       (void) rpma_conn_cfg_delete(&cfg);
-
-err_free_cd:
-       free(cd);
-
-       return -1;
-}
-
-static int client_post_init(struct thread_data *td)
-{
-       struct librpma_fio_client_data *ccd = td->io_ops_data;
-       struct client_data *cd = ccd->client_data;
-       unsigned int io_us_msgs_size;
-       int ret;
-
-       /* message buffers initialization and registration */
-       io_us_msgs_size = cd->msg_num * IO_U_BUF_LEN;
-       if ((ret = posix_memalign((void **)&cd->io_us_msgs, page_size,
-                       io_us_msgs_size))) {
-               td_verror(td, ret, "posix_memalign");
-               return ret;
-       }
-       if ((ret = rpma_mr_reg(ccd->peer, cd->io_us_msgs, io_us_msgs_size,
-                       RPMA_MR_USAGE_SEND | RPMA_MR_USAGE_RECV,
-                       &cd->msg_mr))) {
-               librpma_td_verror(td, ret, "rpma_mr_reg");
-               return ret;
-       }
-
-       return librpma_fio_client_post_init(td);
-}
-
-static void client_cleanup(struct thread_data *td)
-{
-       struct librpma_fio_client_data *ccd = td->io_ops_data;
-       struct client_data *cd;
-       size_t flush_req_size;
-       size_t io_u_buf_off;
-       size_t send_offset;
-       void *send_ptr;
-       int ret;
-
-       if (ccd == NULL)
-               return;
-
-       cd = ccd->client_data;
-       if (cd == NULL) {
-               librpma_fio_client_cleanup(td);
-               return;
-       }
-
-       /*
-        * Make sure all SEND completions are collected ergo there are free
-        * slots in the SQ for the last SEND message.
-        *
-        * Note: If any operation will fail we still can send the termination
-        * notice.
-        */
-       (void) librpma_fio_client_io_complete_all_sends(td);
-
-       /* prepare the last flush message and pack it to the send buffer */
-       flush_req_size = gpspm_flush_request__get_packed_size(&Flush_req_last);
-       if (flush_req_size > MAX_MSG_SIZE) {
-               log_err(
-                       "Packed flush request size is bigger than available send buffer space (%zu > %d\n",
-                       flush_req_size, MAX_MSG_SIZE);
-       } else {
-               io_u_buf_off = IO_U_NEXT_BUF_OFF_CLIENT(cd);
-               send_offset = io_u_buf_off + SEND_OFFSET;
-               send_ptr = cd->io_us_msgs + send_offset;
-               (void) gpspm_flush_request__pack(&Flush_req_last, send_ptr);
-
-               /* send the flush message */
-               if ((ret = rpma_send(ccd->conn, cd->msg_mr, send_offset,
-                               flush_req_size, RPMA_F_COMPLETION_ALWAYS,
-                               NULL)))
-                       librpma_td_verror(td, ret, "rpma_send");
-
-               ++ccd->op_send_posted;
-
-               /* Wait for the SEND to complete */
-               (void) librpma_fio_client_io_complete_all_sends(td);
-       }
-
-       /* deregister the messaging buffer memory */
-       if ((ret = rpma_mr_dereg(&cd->msg_mr)))
-               librpma_td_verror(td, ret, "rpma_mr_dereg");
-
-       free(ccd->client_data);
-
-       librpma_fio_client_cleanup(td);
-}
-
-static inline int client_io_flush(struct thread_data *td,
-               struct io_u *first_io_u, struct io_u *last_io_u,
-               unsigned long long int len)
-{
-       struct librpma_fio_client_data *ccd = td->io_ops_data;
-       struct client_data *cd = ccd->client_data;
-       size_t io_u_buf_off = IO_U_NEXT_BUF_OFF_CLIENT(cd);
-       size_t send_offset = io_u_buf_off + SEND_OFFSET;
-       size_t recv_offset = io_u_buf_off + RECV_OFFSET;
-       void *send_ptr = cd->io_us_msgs + send_offset;
-       void *recv_ptr = cd->io_us_msgs + recv_offset;
-       GPSPMFlushRequest flush_req = GPSPM_FLUSH_REQUEST__INIT;
-       size_t flush_req_size = 0;
-       int ret;
-
-       /* prepare a response buffer */
-       if ((ret = rpma_recv(ccd->conn, cd->msg_mr, recv_offset, MAX_MSG_SIZE,
-                       recv_ptr))) {
-               librpma_td_verror(td, ret, "rpma_recv");
-               return -1;
-       }
-
-       /* prepare a flush message and pack it to a send buffer */
-       flush_req.offset = first_io_u->offset;
-       flush_req.length = len;
-       flush_req.op_context = last_io_u->index;
-       flush_req_size = gpspm_flush_request__get_packed_size(&flush_req);
-       if (flush_req_size > MAX_MSG_SIZE) {
-               log_err(
-                       "Packed flush request size is bigger than available send buffer space (%"
-                       PRIu64 " > %d\n", flush_req_size, MAX_MSG_SIZE);
-               return -1;
-       }
-       (void) gpspm_flush_request__pack(&flush_req, send_ptr);
-
-       /* send the flush message */
-       if ((ret = rpma_send(ccd->conn, cd->msg_mr, send_offset, flush_req_size,
-                       RPMA_F_COMPLETION_ALWAYS, NULL))) {
-               librpma_td_verror(td, ret, "rpma_send");
-               return -1;
-       }
-
-       ++ccd->op_send_posted;
-
-       return 0;
-}
-
-static int client_get_io_u_index(struct ibv_wc *wc, unsigned int *io_u_index)
-{
-       GPSPMFlushResponse *flush_resp;
-
-       if (wc->opcode != IBV_WC_RECV)
-               return 0;
-
-       /* unpack a response from the received buffer */
-       flush_resp = gpspm_flush_response__unpack(NULL,
-                       wc->byte_len, (void *)wc->wr_id);
-       if (flush_resp == NULL) {
-               log_err("Cannot unpack the flush response buffer\n");
-               return -1;
-       }
-
-       memcpy(io_u_index, &flush_resp->op_context, sizeof(*io_u_index));
-
-       gpspm_flush_response__free_unpacked(flush_resp, NULL);
-
-       return 1;
-}
-
-FIO_STATIC struct ioengine_ops ioengine_client = {
-       .name                   = "librpma_gpspm_client",
-       .version                = FIO_IOOPS_VERSION,
-       .init                   = client_init,
-       .post_init              = client_post_init,
-       .get_file_size          = librpma_fio_client_get_file_size,
-       .open_file              = librpma_fio_file_nop,
-       .queue                  = librpma_fio_client_queue,
-       .commit                 = librpma_fio_client_commit,
-       .getevents              = librpma_fio_client_getevents,
-       .event                  = librpma_fio_client_event,
-       .errdetails             = librpma_fio_client_errdetails,
-       .close_file             = librpma_fio_file_nop,
-       .cleanup                = client_cleanup,
-       .flags                  = FIO_DISKLESSIO | FIO_ASYNCIO_SETS_ISSUE_TIME,
-       .options                = librpma_fio_options,
-       .option_struct_size     = sizeof(struct librpma_fio_options_values),
-};
-
-/* server side implementation */
-
-#define IO_U_BUFF_OFF_SERVER(i) (i * IO_U_BUF_LEN)
-
-typedef void (*librpma_fio_persist_fn)(const void *ptr, size_t size);
-
-struct server_data {
-       /* aligned td->orig_buffer */
-       char *orig_buffer_aligned;
-
-       /* resources for messaging buffer from DRAM allocated by fio */
-       struct rpma_mr_local *msg_mr;
-
-       uint32_t msg_sqe_available; /* # of free SQ slots */
-
-       /* in-memory queues */
-       struct ibv_wc *msgs_queued;
-       uint32_t msg_queued_nr;
-
-       librpma_fio_persist_fn persist;
-};
-
-static int server_init(struct thread_data *td)
-{
-       struct librpma_fio_server_data *csd;
-       struct server_data *sd;
-       int ret = -1;
-
-       if ((ret = librpma_fio_server_init(td)))
-               return ret;
-
-       csd = td->io_ops_data;
-
-       /* allocate server's data */
-       sd = calloc(1, sizeof(*sd));
-       if (sd == NULL) {
-               td_verror(td, errno, "calloc");
-               goto err_server_cleanup;
-       }
-
-       /* allocate in-memory queue */
-       sd->msgs_queued = calloc(td->o.iodepth, sizeof(*sd->msgs_queued));
-       if (sd->msgs_queued == NULL) {
-               td_verror(td, errno, "calloc");
-               goto err_free_sd;
-       }
-
-#ifdef CONFIG_LIBPMEM2_INSTALLED
-       /* get libpmem2 persist function from pmem2_map */
-       sd->persist = pmem2_get_persist_fn(csd->mem.map);
-#else
-       sd->persist = pmem_persist;
-#endif
-
-       /*
-        * Assure a single io_u buffer can store both SEND and RECV messages and
-        * an io_us buffer allocation is page-size-aligned which is required
-        * to register for RDMA. User-provided values are intentionally ignored.
-        */
-       td->o.max_bs[DDIR_READ] = IO_U_BUF_LEN;
-       td->o.mem_align = page_size;
-
-       csd->server_data = sd;
-
-       return 0;
-
-err_free_sd:
-       free(sd);
-
-err_server_cleanup:
-       librpma_fio_server_cleanup(td);
-
-       return -1;
-}
-
-static int server_post_init(struct thread_data *td)
-{
-       struct librpma_fio_server_data *csd = td->io_ops_data;
-       struct server_data *sd = csd->server_data;
-       size_t io_us_size;
-       size_t io_u_buflen;
-       int ret;
-
-       /*
-        * td->orig_buffer is not aligned. The engine requires aligned io_us
-        * so FIO aligns up the address using the formula below.
-        */
-       sd->orig_buffer_aligned = PTR_ALIGN(td->orig_buffer, page_mask) +
-                       td->o.mem_align;
-
-       /*
-        * XXX
-        * Each io_u message buffer contains recv and send messages.
-        * Aligning each of those buffers may potentially give
-        * some performance benefits.
-        */
-       io_u_buflen = td_max_bs(td);
-
-       /* check whether io_u buffer is big enough */
-       if (io_u_buflen < IO_U_BUF_LEN) {
-               log_err(
-                       "blocksize too small to accommodate assumed maximal request/response pair size (%" PRIu64 " < %d)\n",
-                       io_u_buflen, IO_U_BUF_LEN);
-               return -1;
-       }
-
-       /*
-        * td->orig_buffer_size beside the space really consumed by io_us
-        * has paddings which can be omitted for the memory registration.
-        */
-       io_us_size = (unsigned long long)io_u_buflen *
-                       (unsigned long long)td->o.iodepth;
-
-       if ((ret = rpma_mr_reg(csd->peer, sd->orig_buffer_aligned, io_us_size,
-                       RPMA_MR_USAGE_SEND | RPMA_MR_USAGE_RECV,
-                       &sd->msg_mr))) {
-               librpma_td_verror(td, ret, "rpma_mr_reg");
-               return -1;
-       }
-
-       return 0;
-}
-
-static void server_cleanup(struct thread_data *td)
-{
-       struct librpma_fio_server_data *csd = td->io_ops_data;
-       struct server_data *sd;
-       int ret;
-
-       if (csd == NULL)
-               return;
-
-       sd = csd->server_data;
-
-       if (sd != NULL) {
-               /* rpma_mr_dereg(messaging buffer from DRAM) */
-               if ((ret = rpma_mr_dereg(&sd->msg_mr)))
-                       librpma_td_verror(td, ret, "rpma_mr_dereg");
-
-               free(sd->msgs_queued);
-               free(sd);
-       }
-
-       librpma_fio_server_cleanup(td);
-}
-
-static int prepare_connection(struct thread_data *td,
-               struct rpma_conn_req *conn_req)
-{
-       struct librpma_fio_server_data *csd = td->io_ops_data;
-       struct server_data *sd = csd->server_data;
-       int ret;
-       int i;
-
-       /* prepare buffers for a flush requests */
-       sd->msg_sqe_available = td->o.iodepth;
-       for (i = 0; i < td->o.iodepth; i++) {
-               size_t offset_recv_msg = IO_U_BUFF_OFF_SERVER(i) + RECV_OFFSET;
-               if ((ret = rpma_conn_req_recv(conn_req, sd->msg_mr,
-                               offset_recv_msg, MAX_MSG_SIZE,
-                               (const void *)(uintptr_t)i))) {
-                       librpma_td_verror(td, ret, "rpma_conn_req_recv");
-                       return ret;
-               }
-       }
-
-       return 0;
-}
-
-static int server_open_file(struct thread_data *td, struct fio_file *f)
-{
-       struct librpma_fio_server_data *csd = td->io_ops_data;
-       struct rpma_conn_cfg *cfg = NULL;
-       uint16_t max_msg_num = td->o.iodepth;
-       int ret;
-
-       csd->prepare_connection = prepare_connection;
-
-       /* create a connection configuration object */
-       if ((ret = rpma_conn_cfg_new(&cfg))) {
-               librpma_td_verror(td, ret, "rpma_conn_cfg_new");
-               return -1;
-       }
-
-       /*
-        * Calculate the required queue sizes where:
-        * - the send queue (SQ) has to be big enough to accommodate
-        *   all possible flush requests (SENDs)
-        * - the receive queue (RQ) has to be big enough to accommodate
-        *   all flush responses (RECVs)
-        * - the completion queue (CQ) has to be big enough to accommodate
-        *   all success and error completions (sq_size + rq_size)
-        */
-       if ((ret = rpma_conn_cfg_set_sq_size(cfg, max_msg_num))) {
-               librpma_td_verror(td, ret, "rpma_conn_cfg_set_sq_size");
-               goto err_cfg_delete;
-       }
-       if ((ret = rpma_conn_cfg_set_rq_size(cfg, max_msg_num))) {
-               librpma_td_verror(td, ret, "rpma_conn_cfg_set_rq_size");
-               goto err_cfg_delete;
-       }
-       if ((ret = rpma_conn_cfg_set_cq_size(cfg, max_msg_num * 2))) {
-               librpma_td_verror(td, ret, "rpma_conn_cfg_set_cq_size");
-               goto err_cfg_delete;
-       }
-
-       ret = librpma_fio_server_open_file(td, f, cfg);
-
-err_cfg_delete:
-       (void) rpma_conn_cfg_delete(&cfg);
-
-       return ret;
-}
-
-static int server_qe_process(struct thread_data *td, struct ibv_wc *wc)
-{
-       struct librpma_fio_server_data *csd = td->io_ops_data;
-       struct server_data *sd = csd->server_data;
-       GPSPMFlushRequest *flush_req;
-       GPSPMFlushResponse flush_resp = GPSPM_FLUSH_RESPONSE__INIT;
-       size_t flush_resp_size = 0;
-       size_t send_buff_offset;
-       size_t recv_buff_offset;
-       size_t io_u_buff_offset;
-       void *send_buff_ptr;
-       void *recv_buff_ptr;
-       void *op_ptr;
-       int msg_index;
-       int ret;
-
-       /* calculate SEND/RECV pair parameters */
-       msg_index = (int)(uintptr_t)wc->wr_id;
-       io_u_buff_offset = IO_U_BUFF_OFF_SERVER(msg_index);
-       send_buff_offset = io_u_buff_offset + SEND_OFFSET;
-       recv_buff_offset = io_u_buff_offset + RECV_OFFSET;
-       send_buff_ptr = sd->orig_buffer_aligned + send_buff_offset;
-       recv_buff_ptr = sd->orig_buffer_aligned + recv_buff_offset;
-
-       /* unpack a flush request from the received buffer */
-       flush_req = gpspm_flush_request__unpack(NULL, wc->byte_len,
-                       recv_buff_ptr);
-       if (flush_req == NULL) {
-               log_err("cannot unpack the flush request buffer\n");
-               goto err_terminate;
-       }
-
-       if (IS_NOT_THE_LAST_MESSAGE(flush_req)) {
-               op_ptr = csd->ws_ptr + flush_req->offset;
-               sd->persist(op_ptr, flush_req->length);
-       } else {
-               /*
-                * This is the last message - the client is done.
-                */
-               gpspm_flush_request__free_unpacked(flush_req, NULL);
-               td->done = true;
-               return 0;
-       }
-
-       /* initiate the next receive operation */
-       if ((ret = rpma_recv(csd->conn, sd->msg_mr, recv_buff_offset,
-                       MAX_MSG_SIZE,
-                       (const void *)(uintptr_t)msg_index))) {
-               librpma_td_verror(td, ret, "rpma_recv");
-               goto err_free_unpacked;
-       }
-
-       /* prepare a flush response and pack it to a send buffer */
-       flush_resp.op_context = flush_req->op_context;
-       flush_resp_size = gpspm_flush_response__get_packed_size(&flush_resp);
-       if (flush_resp_size > MAX_MSG_SIZE) {
-               log_err(
-                       "Size of the packed flush response is bigger than the available space of the send buffer (%"
-                       PRIu64 " > %i\n", flush_resp_size, MAX_MSG_SIZE);
-               goto err_free_unpacked;
-       }
-
-       (void) gpspm_flush_response__pack(&flush_resp, send_buff_ptr);
-
-       /* send the flush response */
-       if ((ret = rpma_send(csd->conn, sd->msg_mr, send_buff_offset,
-                       flush_resp_size, RPMA_F_COMPLETION_ALWAYS, NULL))) {
-               librpma_td_verror(td, ret, "rpma_send");
-               goto err_free_unpacked;
-       }
-       --sd->msg_sqe_available;
-
-       gpspm_flush_request__free_unpacked(flush_req, NULL);
-
-       return 0;
-
-err_free_unpacked:
-       gpspm_flush_request__free_unpacked(flush_req, NULL);
-
-err_terminate:
-       td->terminate = true;
-
-       return -1;
-}
-
-static inline int server_queue_process(struct thread_data *td)
-{
-       struct librpma_fio_server_data *csd = td->io_ops_data;
-       struct server_data *sd = csd->server_data;
-       int ret;
-       int i;
-
-       /* min(# of queue entries, # of SQ entries available) */
-       uint32_t qes_to_process = min(sd->msg_queued_nr, sd->msg_sqe_available);
-       if (qes_to_process == 0)
-               return 0;
-
-       /* process queued completions */
-       for (i = 0; i < qes_to_process; ++i) {
-               if ((ret = server_qe_process(td, &sd->msgs_queued[i])))
-                       return ret;
-       }
-
-       /* progress the queue */
-       for (i = 0; i < sd->msg_queued_nr - qes_to_process; ++i) {
-               memcpy(&sd->msgs_queued[i],
-                       &sd->msgs_queued[qes_to_process + i],
-                       sizeof(sd->msgs_queued[i]));
-       }
-
-       sd->msg_queued_nr -= qes_to_process;
-
-       return 0;
-}
-
-static int server_cmpl_process(struct thread_data *td)
-{
-       struct librpma_fio_server_data *csd = td->io_ops_data;
-       struct server_data *sd = csd->server_data;
-       struct ibv_wc *wc = &sd->msgs_queued[sd->msg_queued_nr];
-       struct librpma_fio_options_values *o = td->eo;
-       int ret;
-
-       ret = rpma_cq_get_wc(csd->cq, 1, wc, NULL);
-       if (ret == RPMA_E_NO_COMPLETION) {
-               if (o->busy_wait_polling)
-                       return 0; /* lack of completion is not an error */
-
-               ret = rpma_cq_wait(csd->cq);
-               if (ret == RPMA_E_NO_COMPLETION)
-                       return 0; /* lack of completion is not an error */
-               if (ret) {
-                       librpma_td_verror(td, ret, "rpma_cq_wait");
-                       goto err_terminate;
-               }
-
-               ret = rpma_cq_get_wc(csd->cq, 1, wc, NULL);
-               if (ret == RPMA_E_NO_COMPLETION)
-                       return 0; /* lack of completion is not an error */
-               if (ret) {
-                       librpma_td_verror(td, ret, "rpma_cq_get_wc");
-                       goto err_terminate;
-               }
-       } else if (ret) {
-               librpma_td_verror(td, ret, "rpma_cq_get_wc");
-               goto err_terminate;
-       }
-
-       /* validate the completion */
-       if (wc->status != IBV_WC_SUCCESS)
-               goto err_terminate;
-
-       if (wc->opcode == IBV_WC_RECV)
-               ++sd->msg_queued_nr;
-       else if (wc->opcode == IBV_WC_SEND)
-               ++sd->msg_sqe_available;
-
-       return 0;
-
-err_terminate:
-       td->terminate = true;
-
-       return -1;
-}
-
-static enum fio_q_status server_queue(struct thread_data *td, struct io_u *io_u)
-{
-       do {
-               if (server_cmpl_process(td))
-                       return FIO_Q_BUSY;
-
-               if (server_queue_process(td))
-                       return FIO_Q_BUSY;
-
-       } while (!td->done);
-
-       return FIO_Q_COMPLETED;
-}
-
-FIO_STATIC struct ioengine_ops ioengine_server = {
-       .name                   = "librpma_gpspm_server",
-       .version                = FIO_IOOPS_VERSION,
-       .init                   = server_init,
-       .post_init              = server_post_init,
-       .open_file              = server_open_file,
-       .close_file             = librpma_fio_server_close_file,
-       .queue                  = server_queue,
-       .invalidate             = librpma_fio_file_nop,
-       .cleanup                = server_cleanup,
-       .flags                  = FIO_SYNCIO,
-       .options                = librpma_fio_options,
-       .option_struct_size     = sizeof(struct librpma_fio_options_values),
-};
-
-/* register both engines */
-
-static void fio_init fio_librpma_gpspm_register(void)
-{
-       register_ioengine(&ioengine_client);
-       register_ioengine(&ioengine_server);
-}
-
-static void fio_exit fio_librpma_gpspm_unregister(void)
-{
-       unregister_ioengine(&ioengine_client);
-       unregister_ioengine(&ioengine_server);
-}
diff --git a/engines/librpma_gpspm_flush.pb-c.c b/engines/librpma_gpspm_flush.pb-c.c
deleted file mode 100644 (file)
index 3ff2475..0000000
+++ /dev/null
@@ -1,214 +0,0 @@
-/*
- * Copyright 2020, Intel Corporation
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License,
- * version 2 as published by the Free Software Foundation..
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- */
-
-/* Generated by the protocol buffer compiler. DO NOT EDIT! */
-/* Generated from: librpma_gpspm_flush.proto */
-
-/* Do not generate deprecated warnings for self */
-#ifndef PROTOBUF_C__NO_DEPRECATED
-#define PROTOBUF_C__NO_DEPRECATED
-#endif
-
-#include "librpma_gpspm_flush.pb-c.h"
-void   gpspm_flush_request__init
-                     (GPSPMFlushRequest         *message)
-{
-  static const GPSPMFlushRequest init_value = GPSPM_FLUSH_REQUEST__INIT;
-  *message = init_value;
-}
-size_t gpspm_flush_request__get_packed_size
-                     (const GPSPMFlushRequest *message)
-{
-  assert(message->base.descriptor == &gpspm_flush_request__descriptor);
-  return protobuf_c_message_get_packed_size ((const ProtobufCMessage*)(message));
-}
-size_t gpspm_flush_request__pack
-                     (const GPSPMFlushRequest *message,
-                      uint8_t       *out)
-{
-  assert(message->base.descriptor == &gpspm_flush_request__descriptor);
-  return protobuf_c_message_pack ((const ProtobufCMessage*)message, out);
-}
-size_t gpspm_flush_request__pack_to_buffer
-                     (const GPSPMFlushRequest *message,
-                      ProtobufCBuffer *buffer)
-{
-  assert(message->base.descriptor == &gpspm_flush_request__descriptor);
-  return protobuf_c_message_pack_to_buffer ((const ProtobufCMessage*)message, buffer);
-}
-GPSPMFlushRequest *
-       gpspm_flush_request__unpack
-                     (ProtobufCAllocator  *allocator,
-                      size_t               len,
-                      const uint8_t       *data)
-{
-  return (GPSPMFlushRequest *)
-     protobuf_c_message_unpack (&gpspm_flush_request__descriptor,
-                                allocator, len, data);
-}
-void   gpspm_flush_request__free_unpacked
-                     (GPSPMFlushRequest *message,
-                      ProtobufCAllocator *allocator)
-{
-  if(!message)
-    return;
-  assert(message->base.descriptor == &gpspm_flush_request__descriptor);
-  protobuf_c_message_free_unpacked ((ProtobufCMessage*)message, allocator);
-}
-void   gpspm_flush_response__init
-                     (GPSPMFlushResponse         *message)
-{
-  static const GPSPMFlushResponse init_value = GPSPM_FLUSH_RESPONSE__INIT;
-  *message = init_value;
-}
-size_t gpspm_flush_response__get_packed_size
-                     (const GPSPMFlushResponse *message)
-{
-  assert(message->base.descriptor == &gpspm_flush_response__descriptor);
-  return protobuf_c_message_get_packed_size ((const ProtobufCMessage*)(message));
-}
-size_t gpspm_flush_response__pack
-                     (const GPSPMFlushResponse *message,
-                      uint8_t       *out)
-{
-  assert(message->base.descriptor == &gpspm_flush_response__descriptor);
-  return protobuf_c_message_pack ((const ProtobufCMessage*)message, out);
-}
-size_t gpspm_flush_response__pack_to_buffer
-                     (const GPSPMFlushResponse *message,
-                      ProtobufCBuffer *buffer)
-{
-  assert(message->base.descriptor == &gpspm_flush_response__descriptor);
-  return protobuf_c_message_pack_to_buffer ((const ProtobufCMessage*)message, buffer);
-}
-GPSPMFlushResponse *
-       gpspm_flush_response__unpack
-                     (ProtobufCAllocator  *allocator,
-                      size_t               len,
-                      const uint8_t       *data)
-{
-  return (GPSPMFlushResponse *)
-     protobuf_c_message_unpack (&gpspm_flush_response__descriptor,
-                                allocator, len, data);
-}
-void   gpspm_flush_response__free_unpacked
-                     (GPSPMFlushResponse *message,
-                      ProtobufCAllocator *allocator)
-{
-  if(!message)
-    return;
-  assert(message->base.descriptor == &gpspm_flush_response__descriptor);
-  protobuf_c_message_free_unpacked ((ProtobufCMessage*)message, allocator);
-}
-static const ProtobufCFieldDescriptor gpspm_flush_request__field_descriptors[3] =
-{
-  {
-    "offset",
-    1,
-    PROTOBUF_C_LABEL_REQUIRED,
-    PROTOBUF_C_TYPE_FIXED64,
-    0,   /* quantifier_offset */
-    offsetof(GPSPMFlushRequest, offset),
-    NULL,
-    NULL,
-    0,             /* flags */
-    0,NULL,NULL    /* reserved1,reserved2, etc */
-  },
-  {
-    "length",
-    2,
-    PROTOBUF_C_LABEL_REQUIRED,
-    PROTOBUF_C_TYPE_FIXED64,
-    0,   /* quantifier_offset */
-    offsetof(GPSPMFlushRequest, length),
-    NULL,
-    NULL,
-    0,             /* flags */
-    0,NULL,NULL    /* reserved1,reserved2, etc */
-  },
-  {
-    "op_context",
-    3,
-    PROTOBUF_C_LABEL_REQUIRED,
-    PROTOBUF_C_TYPE_FIXED64,
-    0,   /* quantifier_offset */
-    offsetof(GPSPMFlushRequest, op_context),
-    NULL,
-    NULL,
-    0,             /* flags */
-    0,NULL,NULL    /* reserved1,reserved2, etc */
-  },
-};
-static const unsigned gpspm_flush_request__field_indices_by_name[] = {
-  1,   /* field[1] = length */
-  0,   /* field[0] = offset */
-  2,   /* field[2] = op_context */
-};
-static const ProtobufCIntRange gpspm_flush_request__number_ranges[1 + 1] =
-{
-  { 1, 0 },
-  { 0, 3 }
-};
-const ProtobufCMessageDescriptor gpspm_flush_request__descriptor =
-{
-  PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC,
-  "GPSPM_flush_request",
-  "GPSPMFlushRequest",
-  "GPSPMFlushRequest",
-  "",
-  sizeof(GPSPMFlushRequest),
-  3,
-  gpspm_flush_request__field_descriptors,
-  gpspm_flush_request__field_indices_by_name,
-  1,  gpspm_flush_request__number_ranges,
-  (ProtobufCMessageInit) gpspm_flush_request__init,
-  NULL,NULL,NULL    /* reserved[123] */
-};
-static const ProtobufCFieldDescriptor gpspm_flush_response__field_descriptors[1] =
-{
-  {
-    "op_context",
-    1,
-    PROTOBUF_C_LABEL_REQUIRED,
-    PROTOBUF_C_TYPE_FIXED64,
-    0,   /* quantifier_offset */
-    offsetof(GPSPMFlushResponse, op_context),
-    NULL,
-    NULL,
-    0,             /* flags */
-    0,NULL,NULL    /* reserved1,reserved2, etc */
-  },
-};
-static const unsigned gpspm_flush_response__field_indices_by_name[] = {
-  0,   /* field[0] = op_context */
-};
-static const ProtobufCIntRange gpspm_flush_response__number_ranges[1 + 1] =
-{
-  { 1, 0 },
-  { 0, 1 }
-};
-const ProtobufCMessageDescriptor gpspm_flush_response__descriptor =
-{
-  PROTOBUF_C__MESSAGE_DESCRIPTOR_MAGIC,
-  "GPSPM_flush_response",
-  "GPSPMFlushResponse",
-  "GPSPMFlushResponse",
-  "",
-  sizeof(GPSPMFlushResponse),
-  1,
-  gpspm_flush_response__field_descriptors,
-  gpspm_flush_response__field_indices_by_name,
-  1,  gpspm_flush_response__number_ranges,
-  (ProtobufCMessageInit) gpspm_flush_response__init,
-  NULL,NULL,NULL    /* reserved[123] */
-};
diff --git a/engines/librpma_gpspm_flush.pb-c.h b/engines/librpma_gpspm_flush.pb-c.h
deleted file mode 100644 (file)
index ad475a9..0000000
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * Copyright 2020, Intel Corporation
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License,
- * version 2 as published by the Free Software Foundation..
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- */
-
-/* Generated by the protocol buffer compiler. DO NOT EDIT! */
-/* Generated from: librpma_gpspm_flush.proto */
-
-#ifndef PROTOBUF_C_GPSPM_5fflush_2eproto__INCLUDED
-#define PROTOBUF_C_GPSPM_5fflush_2eproto__INCLUDED
-
-#include <protobuf-c/protobuf-c.h>
-
-PROTOBUF_C__BEGIN_DECLS
-
-#if PROTOBUF_C_VERSION_NUMBER < 1000000
-# error This file was generated by a newer version of protoc-c which is incompatible with your libprotobuf-c headers. Please update your headers.
-#elif 1003003 < PROTOBUF_C_MIN_COMPILER_VERSION
-# error This file was generated by an older version of protoc-c which is incompatible with your libprotobuf-c headers. Please regenerate this file with a newer version of protoc-c.
-#endif
-
-
-typedef struct _GPSPMFlushRequest GPSPMFlushRequest;
-typedef struct _GPSPMFlushResponse GPSPMFlushResponse;
-
-
-/* --- enums --- */
-
-
-/* --- messages --- */
-
-struct  _GPSPMFlushRequest
-{
-  ProtobufCMessage base;
-  uint64_t offset;
-  uint64_t length;
-  uint64_t op_context;
-};
-#define GPSPM_FLUSH_REQUEST__INIT \
- { PROTOBUF_C_MESSAGE_INIT (&gpspm_flush_request__descriptor) \
-    , 0, 0, 0 }
-
-
-struct  _GPSPMFlushResponse
-{
-  ProtobufCMessage base;
-  uint64_t op_context;
-};
-#define GPSPM_FLUSH_RESPONSE__INIT \
- { PROTOBUF_C_MESSAGE_INIT (&gpspm_flush_response__descriptor) \
-    , 0 }
-
-
-/* GPSPMFlushRequest methods */
-void   gpspm_flush_request__init
-                     (GPSPMFlushRequest         *message);
-size_t gpspm_flush_request__get_packed_size
-                     (const GPSPMFlushRequest   *message);
-size_t gpspm_flush_request__pack
-                     (const GPSPMFlushRequest   *message,
-                      uint8_t             *out);
-size_t gpspm_flush_request__pack_to_buffer
-                     (const GPSPMFlushRequest   *message,
-                      ProtobufCBuffer     *buffer);
-GPSPMFlushRequest *
-       gpspm_flush_request__unpack
-                     (ProtobufCAllocator  *allocator,
-                      size_t               len,
-                      const uint8_t       *data);
-void   gpspm_flush_request__free_unpacked
-                     (GPSPMFlushRequest *message,
-                      ProtobufCAllocator *allocator);
-/* GPSPMFlushResponse methods */
-void   gpspm_flush_response__init
-                     (GPSPMFlushResponse         *message);
-size_t gpspm_flush_response__get_packed_size
-                     (const GPSPMFlushResponse   *message);
-size_t gpspm_flush_response__pack
-                     (const GPSPMFlushResponse   *message,
-                      uint8_t             *out);
-size_t gpspm_flush_response__pack_to_buffer
-                     (const GPSPMFlushResponse   *message,
-                      ProtobufCBuffer     *buffer);
-GPSPMFlushResponse *
-       gpspm_flush_response__unpack
-                     (ProtobufCAllocator  *allocator,
-                      size_t               len,
-                      const uint8_t       *data);
-void   gpspm_flush_response__free_unpacked
-                     (GPSPMFlushResponse *message,
-                      ProtobufCAllocator *allocator);
-/* --- per-message closures --- */
-
-typedef void (*GPSPMFlushRequest_Closure)
-                 (const GPSPMFlushRequest *message,
-                  void *closure_data);
-typedef void (*GPSPMFlushResponse_Closure)
-                 (const GPSPMFlushResponse *message,
-                  void *closure_data);
-
-/* --- services --- */
-
-
-/* --- descriptors --- */
-
-extern const ProtobufCMessageDescriptor gpspm_flush_request__descriptor;
-extern const ProtobufCMessageDescriptor gpspm_flush_response__descriptor;
-
-PROTOBUF_C__END_DECLS
-
-
-#endif  /* PROTOBUF_C_GPSPM_5fflush_2eproto__INCLUDED */
diff --git a/engines/librpma_gpspm_flush.proto b/engines/librpma_gpspm_flush.proto
deleted file mode 100644 (file)
index 91765a7..0000000
+++ /dev/null
@@ -1,15 +0,0 @@
-syntax = "proto2";
-
-message GPSPM_flush_request {
-    /* an offset of a region to be flushed within its memory registration */
-    required fixed64 offset = 1;
-    /* a length of a region to be flushed */
-    required fixed64 length = 2;
-    /* a user-defined operation context */
-    required fixed64 op_context = 3;
-}
-
-message GPSPM_flush_response {
-    /* the operation context of a completed request */
-    required fixed64 op_context = 1;
-}
index ce748d1448ef4b852aaa7cef0c2d2579129c70e9..6bc4af1fac76851c4498354bc643ea31bfcbca1a 100644 (file)
@@ -280,7 +280,7 @@ static int fio_libnfs_open(struct thread_data *td, struct fio_file *f)
        nfs_data = calloc(1, sizeof(struct nfs_data));
        nfs_data->options = options;
 
-       if (td->o.td_ddir == TD_DDIR_WRITE)
+       if (td_write(td))
                flags |= O_CREAT | O_RDWR;
        else
                flags |= O_RDWR;
index c6629e8644c67bef77c1d6deb399d4993b93831f..33d874773799d343f711cbf03ed7ac93db20f191 100644 (file)
@@ -362,7 +362,8 @@ void fio_nvme_uring_cmd_trim_prep(struct nvme_uring_cmd *cmd, struct io_u *io_u,
 }
 
 int fio_nvme_uring_cmd_prep(struct nvme_uring_cmd *cmd, struct io_u *io_u,
-                           struct iovec *iov, struct nvme_dsm *dsm)
+                           struct iovec *iov, struct nvme_dsm *dsm,
+                           uint8_t write_opcode, unsigned int cdw12_flags)
 {
        struct nvme_data *data = FILE_ENG_DATA(io_u->file);
        __u64 slba;
@@ -375,11 +376,16 @@ int fio_nvme_uring_cmd_prep(struct nvme_uring_cmd *cmd, struct io_u *io_u,
                cmd->opcode = nvme_cmd_read;
                break;
        case DDIR_WRITE:
-               cmd->opcode = nvme_cmd_write;
+               cmd->opcode = write_opcode;
                break;
        case DDIR_TRIM:
                fio_nvme_uring_cmd_trim_prep(cmd, io_u, dsm);
                return 0;
+       case DDIR_SYNC:
+       case DDIR_DATASYNC:
+               cmd->opcode = nvme_cmd_flush;
+               cmd->nsid = data->nsid;
+               return 0;
        default:
                return -ENOTSUP;
        }
@@ -391,7 +397,7 @@ int fio_nvme_uring_cmd_prep(struct nvme_uring_cmd *cmd, struct io_u *io_u,
        cmd->cdw10 = slba & 0xffffffff;
        cmd->cdw11 = slba >> 32;
        /* cdw12 represent number of lba's for read/write */
-       cmd->cdw12 = nlb | (io_u->dtype << 20);
+       cmd->cdw12 = nlb | (io_u->dtype << 20) | cdw12_flags;
        cmd->cdw13 = io_u->dspec << 16;
        if (iov) {
                iov->iov_base = io_u->xfer_buf;
index 2d5204fc01f9c6ce050937d90a45078fa261b14b..b5fef2fb2cf822887f57a11072b4a6754a5c8895 100644 (file)
@@ -73,9 +73,13 @@ enum nvme_admin_opcode {
 };
 
 enum nvme_io_opcode {
+       nvme_cmd_flush                  = 0x00,
        nvme_cmd_write                  = 0x01,
        nvme_cmd_read                   = 0x02,
+       nvme_cmd_write_uncor            = 0x04,
+       nvme_cmd_write_zeroes           = 0x08,
        nvme_cmd_dsm                    = 0x09,
+       nvme_cmd_verify                 = 0x0c,
        nvme_cmd_io_mgmt_recv           = 0x12,
        nvme_zns_cmd_mgmt_send          = 0x79,
        nvme_zns_cmd_mgmt_recv          = 0x7a,
@@ -426,7 +430,8 @@ int fio_nvme_get_info(struct fio_file *f, __u64 *nlba, __u32 pi_act,
                      struct nvme_data *data);
 
 int fio_nvme_uring_cmd_prep(struct nvme_uring_cmd *cmd, struct io_u *io_u,
-                           struct iovec *iov, struct nvme_dsm *dsm);
+                           struct iovec *iov, struct nvme_dsm *dsm,
+                           uint8_t write_opcode, unsigned int cdw12_flags);
 
 void fio_nvme_pi_fill(struct nvme_uring_cmd *cmd, struct io_u *io_u,
                      struct nvme_cmd_ext_io_opts *opts);
index 0bb5be4a9d64f6195a0165eee8f8e54e279f1ded..9df70bd28b75aefe8b7282cd2ec76bd54ecd64a7 100644 (file)
@@ -1154,7 +1154,7 @@ int fio_sgio_close(struct thread_data *td, struct fio_file *f)
  * Build an error string with details about the driver, host or scsi
  * error contained in the sg header Caller will use as necessary.
  */
-static char *fio_sgio_errdetails(struct io_u *io_u)
+static char *fio_sgio_errdetails(struct thread_data *td, struct io_u *io_u)
 {
        struct sg_io_hdr *hdr = &io_u->hdr;
 #define MAXERRDETAIL 1024
index d19991222e80ba46dd3decdb343704781439cc87..729d8a71cfea25f6827a6f70fff469493b473b25 100644 (file)
@@ -39,7 +39,6 @@ struct psyncv2_options {
        void *pad;
        unsigned int hipri;
        unsigned int hipri_percentage;
-       unsigned int uncached;
        unsigned int nowait;
 };
 
@@ -68,11 +67,7 @@ static struct fio_option options[] = {
        {
                .name   = "uncached",
                .lname  = "Uncached",
-               .type   = FIO_OPT_INT,
-               .off1   = offsetof(struct psyncv2_options, uncached),
-               .help   = "Use RWF_UNCACHED for buffered read/writes",
-               .category = FIO_OPT_C_ENGINE,
-               .group  = FIO_OPT_G_INVALID,
+               .type   = FIO_OPT_SOFT_DEPRECATED,
        },
        {
                .name   = "nowait",
@@ -172,8 +167,6 @@ static enum fio_q_status fio_pvsyncio2_queue(struct thread_data *td,
        if (o->hipri &&
            (rand_between(&sd->rand_state, 1, 100) <= o->hipri_percentage))
                flags |= RWF_HIPRI;
-       if (!td->o.odirect && o->uncached)
-               flags |= RWF_UNCACHED;
        if (o->nowait)
                flags |= RWF_NOWAIT;
 
@@ -182,9 +175,11 @@ static enum fio_q_status fio_pvsyncio2_queue(struct thread_data *td,
 
        if (io_u->ddir == DDIR_READ)
                ret = preadv2(f->fd, iov, 1, io_u->offset, flags);
-       else if (io_u->ddir == DDIR_WRITE)
+       else if (io_u->ddir == DDIR_WRITE) {
+               if (td->o.oatomic)
+                       flags |= RWF_ATOMIC;
                ret = pwritev2(f->fd, iov, 1, io_u->offset, flags);
-       else if (io_u->ddir == DDIR_TRIM) {
+       else if (io_u->ddir == DDIR_TRIM) {
                do_io_u_trim(td, io_u);
                return FIO_Q_COMPLETED;
        } else
@@ -483,7 +478,8 @@ static struct ioengine_ops ioengine_pvrw2 = {
        .open_file      = generic_open_file,
        .close_file     = generic_close_file,
        .get_file_size  = generic_get_file_size,
-       .flags          = FIO_SYNCIO,
+       .flags          = FIO_SYNCIO |
+                         FIO_ATOMICWRITES,
        .options        = options,
        .option_struct_size     = sizeof(struct psyncv2_options),
 };
index 6ba4aa467d5501408c52604ddf7daa55e6d3fc10..5f1af78d3de34a0e3d7786c527b9a2ff63474957 100644 (file)
@@ -1253,7 +1253,7 @@ static int xnvme_fioe_fetch_ruhs(struct thread_data *td, struct fio_file *f,
        struct xnvme_dev *dev;
        struct xnvme_spec_ruhs *ruhs;
        struct xnvme_cmd_ctx ctx;
-       uint32_t ruhs_nbytes;
+       uint32_t ruhs_nbytes, nr_ruhs;
        uint32_t nsid;
        int err = 0, err_lock;
 
@@ -1276,7 +1276,8 @@ static int xnvme_fioe_fetch_ruhs(struct thread_data *td, struct fio_file *f,
                goto exit;
        }
 
-       ruhs_nbytes = sizeof(*ruhs) + (FDP_MAX_RUHS * sizeof(struct xnvme_spec_ruhs_desc));
+       nr_ruhs = fruhs_info->nr_ruhs;
+       ruhs_nbytes = sizeof(*ruhs) + (fruhs_info->nr_ruhs * sizeof(struct xnvme_spec_ruhs_desc));
        ruhs = xnvme_buf_alloc(dev, ruhs_nbytes);
        if (!ruhs) {
                err = -errno;
@@ -1296,7 +1297,7 @@ static int xnvme_fioe_fetch_ruhs(struct thread_data *td, struct fio_file *f,
        }
 
        fruhs_info->nr_ruhs = ruhs->nruhsd;
-       for (uint32_t idx = 0; idx < fruhs_info->nr_ruhs; ++idx) {
+       for (uint32_t idx = 0; idx < nr_ruhs; ++idx) {
                fruhs_info->plis[idx] = le16_to_cpu(ruhs->desc[idx].pi);
        }
 
diff --git a/examples/atomic-verify.fio b/examples/atomic-verify.fio
new file mode 100644 (file)
index 0000000..17bcd89
--- /dev/null
@@ -0,0 +1,36 @@
+# Data verification with atomic writes
+#
+# Some background on atomic writes:
+#
+# The main selling point of atomic writes is that it is guaranteed writes
+# to storage will not be torn for a power failure or kernel crash.
+
+# Another aspect of atomic writes is that they handle racing writes and
+# reads, such that a read racing with a write will see all the data from
+# the write or none. Well, SCSI and NVMe guarantee this if using
+# RWF_ATOMIC, but it is not formally stated as a feature of RWF_ATOMIC.
+#
+# Fio verify mode can be used to prove that atomic writes can make "safe"
+# racing reads and writes. This done by having many jobs in a xsum verify
+# mode. In this way, xsums should be correct, although a job may be
+# reading a data block written by another job; however
+# verify_write_sequence must be disabled, as it cannot be helped that data
+# blocks will be out of sequence between with many jobs.
+#
+# Atomic write limits:
+# For a block device, the max block size for atomic=1 is in
+# /sys/block/sdXXX/queue/atomic_write_unit_max_bytes
+# or this value can also be read with a statx syscall on the bdev file.
+
+[write-and-verify]
+rw=randwrite
+bs=4k
+direct=1
+ioengine=libaio
+iodepth=16
+verify=crc64
+atomic=1
+verify_write_sequence=0
+numjobs=10
+# Use /dev/XXX or filename
+filename=/dev/XXX
index 2dcae364a910c45fce925310cf297758daa8395c..c16e89b169a4dd7ffdcf7ac922172af211968cdd 100644 (file)
@@ -1,19 +1,29 @@
 # Example test for the HTTP engine's S3 support against Amazon AWS.
 # Obviously, you have to adjust the S3 credentials; for this example,
 # they're passed in via the environment.
-#
+# For non-AWS S3 implementations, refer to your S3 vendor's region 
+# settings. Note that the region value appears twice, in http_host and 
+# http_s3_region.
+# This example uses virtual-hosted-style requests:
+# https://bucket-name.s3.region-code.amazonaws.com/k/e.y
+# For path-style, prefix the key with the bucket name in the filename
+# so that filename=/bucket-name/k/e.y:
+# https://s3.region-code.amazonaws.com/bucket-name/k/e.y
+
+# Reference for Virtual-hosted-style vs. Path-style URLs:
+# https://docs.aws.amazon.com/AmazonS3/latest/userguide/VirtualHosting.html
 
 [global]
 ioengine=http
 name=test
 direct=1
-filename=/larsmb-fio-test/object
+filename=/k/e.y
 http_verbose=0
 https=on
 http_mode=s3
 http_s3_key=${S3_KEY}
 http_s3_keyid=${S3_ID}
-http_host=s3.eu-central-1.amazonaws.com
+http_host=bucket-name.s3.eu-central-1.amazonaws.com
 http_s3_region=eu-central-1
 group_reporting
 
diff --git a/examples/librpma_apm-client.fio b/examples/librpma_apm-client.fio
deleted file mode 100644 (file)
index 82a5d20..0000000
+++ /dev/null
@@ -1,24 +0,0 @@
-# Example of the librpma_apm_client job
-
-[global]
-ioengine=librpma_apm_client
-create_serialize=0 # (required) forces specific initiation sequence
-serverip=[serverip] #IP address the server is listening on
-port=7204 # port(s) the server will listen on, <port; port + numjobs - 1> will be used
-thread
-
-# The client will get a remote memory region description after establishing
-# a connection.
-
-[client]
-numjobs=1 # number of parallel connections
-group_reporting=1
-sync=1 # 1 is the best for latency measurements, 0 for bandwidth
-iodepth=2 # total number of ious
-iodepth_batch_submit=1 # number of ious to be submitted at once
-rw=write # read/write/randread/randwrite/readwrite/rw
-rwmixread=70 # % of a mixed workload that should be reads
-blocksize=4KiB
-ramp_time=15s # gives some time to stabilize the workload
-time_based
-runtime=60s # run the workload for the specified period of time
diff --git a/examples/librpma_apm-client.png b/examples/librpma_apm-client.png
deleted file mode 100644 (file)
index 2fe02cd..0000000
Binary files a/examples/librpma_apm-client.png and /dev/null differ
diff --git a/examples/librpma_apm-server.fio b/examples/librpma_apm-server.fio
deleted file mode 100644 (file)
index dc1ddba..0000000
+++ /dev/null
@@ -1,26 +0,0 @@
-# Example of the librpma_apm_server job
-
-[global]
-ioengine=librpma_apm_server
-create_serialize=0 # (required) forces specific initiation sequence
-kb_base=1000 # turn on the straight units handling (non-compatibility mode)
-serverip=[serverip] # IP address to listen on
-port=7204 # port(s) the server jobs will listen on, ports <port; port + numjobs - 1> will be used
-thread
-
-# The server side spawns one thread for each expected connection from
-# the client-side, opens and registers the range dedicated for this thread
-# (a workspace) from the provided memory.
-# Each of the server threads accepts a connection on the dedicated port
-# (different for each and every working thread) and waits for it to end up,
-# and closes itself.
-
-[server]
-# set to 1 (true) ONLY when Direct Write to PMem from the remote host is possible
-# (https://pmem.io/rpma/documentation/basic-direct-write-to-pmem.html)
-direct_write_to_pmem=0
-
-numjobs=1 # number of expected incoming connections
-size=100MiB # size of workspace for a single connection
-filename=malloc # device dax or an existing fsdax file or "malloc" for allocation from DRAM
-# filename=/dev/dax1.0
diff --git a/examples/librpma_apm-server.png b/examples/librpma_apm-server.png
deleted file mode 100644 (file)
index f78ae02..0000000
Binary files a/examples/librpma_apm-server.png and /dev/null differ
diff --git a/examples/librpma_gpspm-client.fio b/examples/librpma_gpspm-client.fio
deleted file mode 100644 (file)
index 843382d..0000000
+++ /dev/null
@@ -1,23 +0,0 @@
-# Example of the librpma_gpspm_client job
-
-[global]
-ioengine=librpma_gpspm_client
-create_serialize=0 # (required) forces specific initiation sequence
-serverip=[serverip] #IP address the server is listening on
-port=7204 # port(s) the server will listen on, <port; port + numjobs - 1> will be used
-thread
-
-# The client will get a remote memory region description after establishing
-# a connection.
-
-[client]
-numjobs=1 # number of parallel connections
-group_reporting=1
-sync=1 # 1 is the best for latency measurements, 0 for bandwidth
-iodepth=2 # total number of ious
-iodepth_batch_submit=1 # number of ious to be submitted at once
-rw=write # write/randwrite
-blocksize=4KiB
-ramp_time=15s # gives some time to stabilize the workload
-time_based
-runtime=60s # run the workload for the specified period of time
diff --git a/examples/librpma_gpspm-client.png b/examples/librpma_gpspm-client.png
deleted file mode 100644 (file)
index 0c975a2..0000000
Binary files a/examples/librpma_gpspm-client.png and /dev/null differ
diff --git a/examples/librpma_gpspm-server.fio b/examples/librpma_gpspm-server.fio
deleted file mode 100644 (file)
index 4555314..0000000
+++ /dev/null
@@ -1,33 +0,0 @@
-# Example of the librpma_gpspm_server job
-
-[global]
-ioengine=librpma_gpspm_server
-create_serialize=0 # (required) forces specific initiation sequence
-kb_base=1000 # turn on the straight units handling (non-compatibility mode)
-serverip=[serverip] #IP address to listen on
-port=7204 # port(s) the server jobs will listen on, ports <port; port + numjobs - 1> will be used
-thread
-
-# The server side spawns one thread for each expected connection from
-# the client-side, opens and registers the range dedicated for this thread
-# (a workspace) from the provided memory.
-# Each of the server threads accepts a connection on the dedicated port
-# (different for each and every working thread), accepts and executes flush
-# requests, and sends back a flush response for each of the requests.
-# When the client is done it sends the termination notice to the server's thread.
-
-[server]
-# set to 1 (true) ONLY when Direct Write to PMem from the remote host is possible
-# (https://pmem.io/rpma/documentation/basic-direct-write-to-pmem.html)
-direct_write_to_pmem=0
-# set to 0 (false) to wait for completion instead of busy-wait polling completion.
-busy_wait_polling=1
-numjobs=1 # number of expected incoming connections
-iodepth=2 # number of parallel GPSPM requests
-size=100MiB # size of workspace for a single connection
-filename=malloc # device dax or an existing fsdax file or "malloc" for allocation from DRAM
-# filename=/dev/dax1.0
-
-# The client will terminate the server when the client will end up its job.
-time_based
-runtime=365d
diff --git a/examples/librpma_gpspm-server.png b/examples/librpma_gpspm-server.png
deleted file mode 100644 (file)
index 5612453..0000000
Binary files a/examples/librpma_gpspm-server.png and /dev/null differ
diff --git a/fio.1 b/fio.1
index 1c8e3a56707e3e3700e3267bd1747786cd2540d0..0fd0fb25f288e6a027c6611e2c49c425577750f7 100644 (file)
--- a/fio.1
+++ b/fio.1
@@ -585,6 +585,11 @@ the first device, `\\\\.\\PhysicalDrive1' for the second etc.
 Note: Windows and FreeBSD prevent write access to areas
 of the disk containing in-use data (e.g. filesystems).
 .P
+For HTTP and S3 access, specify a valid URL path or S3 key, respectively. 
+A filename for path-style S3 includes a bucket name (`/bucket/k/e.y') 
+while a virtual-hosted-style S3 filename (`/k/e.y') does not because its 
+bucket name is specified in \fBhttp_host\fR.
+.P
 The filename `\-' is a reserved name, meaning *stdin* or *stdout*. Which
 of the two depends on the read/write direction set.
 .RE
@@ -2261,6 +2266,19 @@ cached data. Currently the RWF_NOWAIT flag does not supported for cached write.
 For direct I/O, requests will only succeed if cache invalidation isn't required,
 file blocks are fully allocated and the disk request could be issued immediately.
 .TP
+.BI (pvsync2,libaio,io_uring)atomic \fR=\fPbool
+This option means that writes are issued with torn-write protection, meaning
+that for a power fail or kernel crash, all or none of the data from the write
+will be stored, but never a mix of old and new data. Torn-write protection is
+also known as atomic writes.
+
+This option sets the RWF_ATOMIC flag (supported from the 6.11 Linux kernel) on
+a per-IO basis.
+
+Writes with RWF_ATOMIC set will be rejected by the kernel when the file does
+not support torn-write protection. To learn a file's torn-write limits, issue
+statx with STATX_WRITE_ATOMIC.
+.TP
 .BI (io_uring_cmd,xnvme)fdp \fR=\fPbool
 Enable Flexible Data Placement mode for write commands.
 .TP
@@ -2305,12 +2323,17 @@ The available placement ID (indices) are defined by \fBplids\fR or
 .RE
 .TP
 .BI (io_uring_cmd,xnvme)plids=str, fdp_pli \fR=\fPstr
-Select which Placement IDs (streams) or Placement ID Indicies (FDP) this job is
-allowed to use for writes.  For FDP by default, the job will cycle through all
-available Placement IDs, so use this to isolate these identifiers to specific
-jobs. If you want fio to use placement identifier only at indices 0, 2 and 5
-specify, you would set `plids=0,2,5`. For streams this should be a
-comma-separated list of Stream IDs.
+Select which Placement ID Indices (FDP) or Placement IDs (streams) this job is
+allowed to use for writes. This option accepts a comma-separated list of values
+or ranges (e.g., 1,2-4,5,6-8).
+
+For FDP by default, the job will cycle through all available Placement IDs, so
+use this option to be selective. The values specified here are array indices
+for the list of placement IDs returned by the nvme-cli command `nvme fdp
+status'. If you want fio to use FDP placement identifiers only at indices 0, 2
+and 5, set `plids=0,2,5'.
+
+For streams this should be a list of Stream IDs.
 .TP
 .BI (io_uring_cmd,xnvme)\fR\fBdp_scheme\fP=str
 Defines which placement ID (index) to be selected based on offset(LBA) range.
@@ -2421,7 +2444,7 @@ The TCP or UDP port to bind to or connect to. If this is used with
 this will be the starting port number since fio will use a range of
 ports.
 .TP
-.BI (rdma,librpma_*)port \fR=\fPint
+.BI (rdma)port \fR=\fPint
 The port to use for RDMA-CM communication. This should be the same
 value on the client and the server side.
 .TP
@@ -2430,16 +2453,6 @@ The hostname or IP address to use for TCP, UDP or RDMA-CM based I/O.
 If the job is a TCP listener or UDP reader, the hostname is not used
 and must be omitted unless it is a valid UDP multicast address.
 .TP
-.BI (librpma_*)serverip \fR=\fPstr
-The IP address to be used for RDMA-CM based I/O.
-.TP
-.BI (librpma_*_server)direct_write_to_pmem \fR=\fPbool
-Set to 1 only when Direct Write to PMem from the remote host is possible. Otherwise, set to 0.
-.TP
-.BI (librpma_*_server)busy_wait_polling \fR=\fPbool
-Set to 0 to wait for completion instead of busy-wait polling completion.
-Default: 1.
-.TP
 .BI (netsplice,net)interface \fR=\fPstr
 The IP address of the network interface used to send or receive UDP
 multicast.
@@ -2549,8 +2562,10 @@ Touching all objects affects ceph caches and likely impacts test results.
 Enabled by default.
 .TP
 .BI (http)http_host \fR=\fPstr
-Hostname to connect to. For S3, this could be the bucket name. Default
-is \fBlocalhost\fR
+Hostname to connect to.  HTTP port 80 is used automatically when the value 
+of the \fBhttps\fP parameter is \fRoff\fP, and HTTPS port 443 if it is \Ron\fP.  
+A virtual-hosted-style S3 hostname starts with a bucket name, while a 
+path-style S3 hostname does not.  Default is \fBlocalhost\fR.
 .TP
 .BI (http)http_user \fR=\fPstr
 Username for HTTP authentication.
@@ -2632,14 +2647,34 @@ that "owns" the device also needs to support hipri (also known as iopoll
 and mq_poll). The MegaRAID driver is an example of a SCSI LLD.
 Default: clear (0) which does normal (interrupted based) IO.
 .TP
-.BI (sg)readfua \fR=\fPbool
+.BI (sg, io_uring_cmd)readfua \fR=\fPbool
 With readfua option set to 1, read operations include the force
 unit access (fua) flag. Default: 0.
 .TP
-.BI (sg)writefua \fR=\fPbool
+.BI (sg, io_uring_cmd)writefua \fR=\fPbool
 With writefua option set to 1, write operations include the force
 unit access (fua) flag. Default: 0.
 .TP
+.BI (io_uring_cmd)write_mode \fR=\fPstr
+Specifies the type of write operation.  Defaults to 'write'.
+.RS
+.RS
+.TP
+.B write
+Use Write commands for write operations
+.TP
+.B uncor
+Use Write Uncorrectable commands for write operations
+.TP
+.B zeroes
+Use Write Zeroes commands for write operations
+.TP
+.B verify
+Use Verify commands for write operations
+.TP
+.RE
+.RE
+.TP
 .BI (sg)sg_write_mode \fR=\fPstr
 Specify the type of write commands to issue. This option can take multiple
 values:
@@ -3691,6 +3726,14 @@ Enable experimental verification. Standard verify records I/O metadata for
 later use during the verification phase. Experimental verify instead resets the
 file after the write phase and then replays I/Os for the verification phase.
 .TP
+.BI verify_write_sequence \fR=\fPbool
+Verify the header write sequence number. In a scenario with multiple jobs,
+verification of the write sequence number may fail. Disabling this option
+will mean that write sequence number checking is skipped. Doing that can be
+useful for testing atomic writes, as it means that checksum verification can
+still be attempted. For when \fBatomic\fR is enabled, checksum verification
+is expected to succeed (while write sequence checking can still fail).
+.TP
 .BI trim_percentage \fR=\fPint
 Number of verify blocks to discard/trim.
 .TP
@@ -3911,9 +3954,16 @@ entry as well as the other data values. Defaults to 0 meaning that
 offsets are not present in logs. Also see \fBLOG FILE FORMATS\fR section.
 .TP
 .BI log_prio \fR=\fPbool
-If this is set, the iolog options will include the I/O priority for the I/O
-entry as well as the other data values. Defaults to 0 meaning that
-I/O priorities are not present in logs. Also see \fBLOG FILE FORMATS\fR section.
+If this is set, the `Command priority` field in \fBLOG FILE FORMATS\fR
+shows the priority value and the IO priority class of the command.
+Otherwise, the field shows if the command has the highest RT priority
+class or not. Also see \fBLOG FILE FORMATS\fR section.
+.TP
+.BI log_issue_time \fR=\fPbool
+If this is set, the iolog options will include the command issue time for the
+I/O entry as well as the other data values. Defaults to 0 meaning that command
+issue times are not present in logs. Also see \fBLOG FILE FORMATS\fR section.
+This option shall be set together with \fBwrite_lat_log\fR and \fBlog_offset\fR.
 .TP
 .BI log_compression \fR=\fPint
 If this is set, fio will compress the I/O logs as it goes, to keep the
@@ -4910,7 +4960,7 @@ and IOPS. The logs share a common format, which looks like this:
 .RS
 .P
 time (msec), value, data direction, block size (bytes), offset (bytes),
-command priority
+command priority, issue time (nsec)
 .RE
 .P
 `Time' for the log entry is always in milliseconds. The `value' logged depends
@@ -4953,6 +5003,11 @@ number with the lowest 13 bits indicating the priority value (\fBprio\fR and
 \fBcmdprio\fR options) and the highest 3 bits indicating the IO priority class
 (\fBprioclass\fR and \fBcmdprio_class\fR options).
 .P
+The entry's `issue time` is the command issue time in nanoseconds. The logging
+of the issue time can be toggled with \fBlog_issue_time\fR. This field has valid
+values in completion latency log file (clat), or submit latency log file (slat).
+The field has value 0 in other log files.
+.P
 Fio defaults to logging every individual I/O but when windowed logging is set
 through \fBlog_avg_msec\fR, either the average (by default), the maximum
 (\fBlog_window_value\fR is set to max) `value' seen over the specified period of
@@ -4962,12 +5017,12 @@ takes this form:
 .RS
 .P
 time (msec), value, value1, data direction, block size (bytes), offset (bytes),
-command priority
+command priority, issue time (nsec)
 .RE
 .P
 Each `data direction' seen within the window period will aggregate its values
-in a separate row. Further, when using windowed logging the `block size' and
-`offset' entries will always contain 0.
+in a separate row. Further, when using windowed logging the `block size',
+`offset' and `issue time` entries will always contain 0.
 .SH CLIENT / SERVER
 Normally fio is invoked as a stand-alone application on the machine where the
 I/O workload should be generated. However, the backend and frontend of fio can
diff --git a/fio.h b/fio.h
index 7d9927a006551af0bfb6080e06c36844aeb1fce0..4bb6cfa7f39e0b1152cd62bbadd03d3e4744f62c 100644 (file)
--- a/fio.h
+++ b/fio.h
@@ -258,8 +258,9 @@ struct thread_data {
        size_t orig_buffer_size;
        volatile int runstate;
        volatile bool terminate;
-       bool last_was_sync;
-       enum fio_ddir last_ddir;
+
+       enum fio_ddir last_ddir_completed;
+       enum fio_ddir last_ddir_issued;
 
        int mmapfd;
 
@@ -629,7 +630,7 @@ static inline bool multi_range_trim(struct thread_data *td, struct io_u *io_u)
 
 static inline bool should_fsync(struct thread_data *td)
 {
-       if (td->last_was_sync)
+       if (ddir_sync(td->last_ddir_issued))
                return false;
        if (td_write(td) || td->o.override_sync)
                return true;
index 332ccb53c254e45087919f9a1f758390a9873f2f..fed21d1d61e7b5dcca82a51f4c343e5eff868cb8 100644 (file)
@@ -418,6 +418,8 @@ int helper_thread_create(struct fio_sem *startup_sem, struct sk_out *sk_out)
        int ret;
 
        hd = scalloc(1, sizeof(*hd));
+       if (!hd)
+               return 1;
 
        setup_disk_util();
        steadystate_setup();
diff --git a/init.c b/init.c
index ff3e9a90d551500b3880df95c2505e3a7df9e684..96a03d984b08b602ec42e1ac987d90bde322b221 100644 (file)
--- a/init.c
+++ b/init.c
@@ -853,6 +853,20 @@ static int fixup_options(struct thread_data *td)
                    (o->max_bs[DDIR_WRITE] % o->verify_interval))
                        o->verify_interval = gcd(o->min_bs[DDIR_WRITE],
                                                        o->max_bs[DDIR_WRITE]);
+
+               if (td->o.verify_only)
+                       o->verify_write_sequence = 0;
+       }
+
+       if (td->o.oatomic) {
+               if (!td_ioengine_flagged(td, FIO_ATOMICWRITES)) {
+                       log_err("fio: engine does not support atomic writes\n");
+                       td->o.oatomic = 0;
+                       ret |= 1;
+               }
+
+               if (!td_write(td))
+                       td->o.oatomic = 0;
        }
 
        if (o->pre_read) {
@@ -1621,12 +1635,18 @@ static int add_job(struct thread_data *td, const char *jobname, int job_add_num,
                        .log_type = IO_LOG_TYPE_LAT,
                        .log_offset = o->log_offset,
                        .log_prio = o->log_prio,
+                       .log_issue_time = o->log_issue_time,
                        .log_gz = o->log_gz,
                        .log_gz_store = o->log_gz_store,
                };
                const char *pre = make_log_name(o->lat_log_file, o->name);
                const char *suf;
 
+               if (o->log_issue_time && !o->log_offset) {
+                       log_err("fio: log_issue_time option requires write_lat_log and log_offset options\n");
+                       goto err;
+               }
+
                if (p.log_gz_store)
                        suf = "log.fz";
                else
@@ -1650,6 +1670,9 @@ static int add_job(struct thread_data *td, const char *jobname, int job_add_num,
                        setup_log(&td->clat_log, &p, logname);
                }
 
+       } else if (o->log_issue_time) {
+               log_err("fio: log_issue_time option requires write_lat_log and log_offset options\n");
+               goto err;
        }
 
        if (o->write_hist_log) {
@@ -1661,6 +1684,7 @@ static int add_job(struct thread_data *td, const char *jobname, int job_add_num,
                        .log_type = IO_LOG_TYPE_HIST,
                        .log_offset = o->log_offset,
                        .log_prio = o->log_prio,
+                       .log_issue_time = o->log_issue_time,
                        .log_gz = o->log_gz,
                        .log_gz_store = o->log_gz_store,
                };
@@ -1693,6 +1717,7 @@ static int add_job(struct thread_data *td, const char *jobname, int job_add_num,
                        .log_type = IO_LOG_TYPE_BW,
                        .log_offset = o->log_offset,
                        .log_prio = o->log_prio,
+                       .log_issue_time = o->log_issue_time,
                        .log_gz = o->log_gz,
                        .log_gz_store = o->log_gz_store,
                };
@@ -1725,6 +1750,7 @@ static int add_job(struct thread_data *td, const char *jobname, int job_add_num,
                        .log_type = IO_LOG_TYPE_IOPS,
                        .log_offset = o->log_offset,
                        .log_prio = o->log_prio,
+                       .log_issue_time = o->log_issue_time,
                        .log_gz = o->log_gz,
                        .log_gz_store = o->log_gz_store,
                };
diff --git a/io_u.c b/io_u.c
index a090e12122642f238569f9fa663ab44d5091bef6..c49cd4df0237be5e1c25412d20390a0703eda04b 100644 (file)
--- a/io_u.c
+++ b/io_u.c
@@ -755,7 +755,7 @@ static enum fio_ddir get_rw_ddir(struct thread_data *td)
         * See if it's time to fsync/fdatasync/sync_file_range first,
         * and if not then move on to check regular I/Os.
         */
-       if (should_fsync(td)) {
+       if (should_fsync(td) && td->last_ddir_issued == DDIR_WRITE) {
                if (td->o.fsync_blocks && td->io_issues[DDIR_WRITE] &&
                    !(td->io_issues[DDIR_WRITE] % td->o.fsync_blocks))
                        return DDIR_SYNC;
@@ -815,7 +815,7 @@ static void set_rw_ddir(struct thread_data *td, struct io_u *io_u)
        if (td->o.zone_mode == ZONE_MODE_ZBD)
                ddir = zbd_adjust_ddir(td, io_u, ddir);
 
-       if (td_trimwrite(td)) {
+       if (td_trimwrite(td) && !ddir_sync(ddir)) {
                struct fio_file *f = io_u->file;
                if (f->last_start[DDIR_WRITE] == f->last_start[DDIR_TRIM])
                        ddir = DDIR_TRIM;
@@ -1757,7 +1757,7 @@ static bool check_get_trim(struct thread_data *td, struct io_u *io_u)
                if (get_next_trim(td, io_u))
                        return true;
        } else if (!(td->io_hist_len % td->o.trim_backlog) &&
-                    td->last_ddir != DDIR_READ) {
+                    td->last_ddir_completed != DDIR_READ) {
                td->trim_batch = td->o.trim_batch;
                if (!td->trim_batch)
                        td->trim_batch = td->o.trim_backlog;
@@ -1779,7 +1779,7 @@ static bool check_get_verify(struct thread_data *td, struct io_u *io_u)
                if (td->verify_batch)
                        get_verify = 1;
                else if (!(td->io_hist_len % td->o.verify_backlog) &&
-                        td->last_ddir != DDIR_READ) {
+                        td->last_ddir_completed != DDIR_READ) {
                        td->verify_batch = td->o.verify_batch;
                        if (!td->verify_batch)
                                td->verify_batch = td->o.verify_backlog;
@@ -1963,7 +1963,7 @@ static void __io_u_log_error(struct thread_data *td, struct io_u *io_u)
        zbd_log_err(td, io_u);
 
        if (td->io_ops->errdetails) {
-               char *err = td->io_ops->errdetails(io_u);
+               char *err = td->io_ops->errdetails(td, io_u);
 
                log_err("fio: %s\n", err);
                free(err);
@@ -2016,8 +2016,7 @@ static void account_io_completion(struct thread_data *td, struct io_u *io_u,
                unsigned long long tnsec;
 
                tnsec = ntime_since(&io_u->start_time, &icd->time);
-               add_lat_sample(td, idx, tnsec, bytes, io_u->offset,
-                              io_u->ioprio, io_u->clat_prio_index);
+               add_lat_sample(td, idx, tnsec, bytes, io_u);
 
                if (td->flags & TD_F_PROFILE_OPS) {
                        struct prof_io_ops *ops = &td->prof_io_ops;
@@ -2038,8 +2037,7 @@ static void account_io_completion(struct thread_data *td, struct io_u *io_u,
 
        if (ddir_rw(idx)) {
                if (!td->o.disable_clat) {
-                       add_clat_sample(td, idx, llnsec, bytes, io_u->offset,
-                                       io_u->ioprio, io_u->clat_prio_index);
+                       add_clat_sample(td, idx, llnsec, bytes, io_u);
                        io_u_mark_latency(td, llnsec);
                }
 
@@ -2122,7 +2120,7 @@ static void io_completed(struct thread_data *td, struct io_u **io_u_ptr,
                return;
        }
 
-       td->last_ddir = ddir;
+       td->last_ddir_completed = ddir;
 
        if (!io_u->error && ddir_rw(ddir)) {
                unsigned long long bytes = io_u->xfer_buflen - io_u->resid;
@@ -2301,15 +2299,9 @@ int io_u_queued_complete(struct thread_data *td, int min_evts)
 void io_u_queued(struct thread_data *td, struct io_u *io_u)
 {
        if (!td->o.disable_slat && ramp_time_over(td) && td->o.stats) {
-               unsigned long slat_time;
-
-               slat_time = ntime_since(&io_u->start_time, &io_u->issue_time);
-
                if (td->parent)
                        td = td->parent;
-
-               add_slat_sample(td, io_u->ddir, slat_time, io_u->xfer_buflen,
-                               io_u->offset, io_u->ioprio);
+               add_slat_sample(td, io_u);
        }
 }
 
index 6b81dc772ad3284683eb1c668e24d899f758d0ec..dcd4164d4e8f00ec9b4de514cac9b8b6d844e1e7 100644 (file)
@@ -437,7 +437,7 @@ enum fio_q_status td_io_queue(struct thread_data *td, struct io_u *io_u)
                        td->ts.total_io_u[io_u->ddir]++;
                }
 
-               td->last_was_sync = ddir_sync(io_u->ddir);
+               td->last_ddir_issued = ddir;
        } else if (ret == FIO_Q_QUEUED) {
                td->io_u_queued++;
 
@@ -448,7 +448,7 @@ enum fio_q_status td_io_queue(struct thread_data *td, struct io_u *io_u)
                if (td->io_u_queued >= td->o.iodepth_batch)
                        td_io_commit(td);
 
-               td->last_was_sync = ddir_sync(io_u->ddir);
+               td->last_ddir_issued = ddir;
        }
 
        if (!td_ioengine_flagged(td, FIO_SYNCIO) &&
index d5b0cafe33773cfebe931ea3ff57e9a34efbc472..1531cd897739ae61805746b96555b76ff74fc8f7 100644 (file)
@@ -9,7 +9,7 @@
 #include "zbd_types.h"
 #include "dataplacement.h"
 
-#define FIO_IOOPS_VERSION      34
+#define FIO_IOOPS_VERSION      36
 
 #ifndef CONFIG_DYNAMIC_ENGINES
 #define FIO_STATIC     static
@@ -40,7 +40,7 @@ struct ioengine_ops {
        int (*commit)(struct thread_data *);
        int (*getevents)(struct thread_data *, unsigned int, unsigned int, const struct timespec *);
        struct io_u *(*event)(struct thread_data *, int);
-       char *(*errdetails)(struct io_u *);
+       char *(*errdetails)(struct thread_data *, struct io_u *);
        int (*cancel)(struct thread_data *, struct io_u *);
        void (*cleanup)(struct thread_data *);
        int (*open_file)(struct thread_data *, struct fio_file *);
@@ -96,6 +96,7 @@ enum {
        __FIO_RO_NEEDS_RW_OPEN,         /* open files in rw mode even if we have a read job; only
                                           affects ioengines using generic_open_file */
        __FIO_MULTI_RANGE_TRIM,         /* ioengine supports trim with more than one range */
+       __FIO_ATOMICWRITES,             /* ioengine supports atomic writes */
        __FIO_IOENGINE_F_LAST,          /* not a real bit; used to count number of bits */
 };
 
@@ -120,6 +121,7 @@ enum fio_ioengine_flags {
        FIO_SKIPPABLE_IOMEM_ALLOC       = 1 << __FIO_SKIPPABLE_IOMEM_ALLOC,
        FIO_RO_NEEDS_RW_OPEN            = 1 << __FIO_RO_NEEDS_RW_OPEN,
        FIO_MULTI_RANGE_TRIM            = 1 << __FIO_MULTI_RANGE_TRIM,
+       FIO_ATOMICWRITES                = 1 << __FIO_ATOMICWRITES,
 };
 
 /*
diff --git a/iolog.c b/iolog.c
index 96af4f33e186dd4ec1a02b851a1c7ae4c0b1eaeb..ef173b092947fd4aad54f8b583af9a27f7526a76 100644 (file)
--- a/iolog.c
+++ b/iolog.c
@@ -140,8 +140,17 @@ static int ipo_special(struct thread_data *td, struct io_piece *ipo)
                        break;
                }
                ret = td_io_open_file(td, f);
-               if (!ret)
+               if (!ret) {
+                       if (td->o.dp_type != FIO_DP_NONE) {
+                               int dp_init_ret = dp_init(td);
+
+                               if (dp_init_ret != 0) {
+                                       td_verror(td, abs(dp_init_ret), "dp_init");
+                                       return -1;
+                               }
+                       }
                        break;
+               }
                td_verror(td, ret, "iolog open file");
                return -1;
        case FIO_LOG_CLOSE_FILE:
@@ -227,6 +236,9 @@ int read_iolog_get(struct thread_data *td, struct io_u *io_u)
                                                io_u->buflen, io_u->file->file_name);
                        if (ipo->delay)
                                iolog_delay(td, ipo->delay);
+
+                       if (td->o.dp_type != FIO_DP_NONE)
+                               dp_fill_dspec_data(td, io_u);
                } else {
                        elapsed = mtime_since_genesis();
                        if (ipo->delay > elapsed)
@@ -828,10 +840,12 @@ void setup_log(struct io_log **log, struct log_params *p,
        struct flist_head *list;
 
        l = scalloc(1, sizeof(*l));
+       assert(l);
        INIT_FLIST_HEAD(&l->io_logs);
        l->log_type = p->log_type;
        l->log_offset = p->log_offset;
        l->log_prio = p->log_prio;
+       l->log_issue_time = p->log_issue_time;
        l->log_gz = p->log_gz;
        l->log_gz_store = p->log_gz_store;
        l->avg_msec = p->avg_msec;
@@ -874,6 +888,9 @@ void setup_log(struct io_log **log, struct log_params *p,
        if (l->td && l->td->o.log_max == IO_LOG_SAMPLE_BOTH)
                l->log_ddir_mask |= LOG_AVG_MAX_SAMPLE_BIT;
 
+       if (l->log_issue_time)
+               l->log_ddir_mask |= LOG_ISSUE_TIME_SAMPLE_BIT;
+
        INIT_FLIST_HEAD(&l->chunk_list);
 
        if (l->log_gz && !p->td)
@@ -956,7 +973,7 @@ static void flush_hist_samples(FILE *f, int hist_coarseness, void *samples,
                               uint64_t sample_size)
 {
        struct io_sample *s;
-       int log_offset;
+       bool log_offset, log_issue_time;
        uint64_t i, j, nr_samples;
        struct io_u_plat_entry *entry, *entry_before;
        uint64_t *io_u_plat;
@@ -967,13 +984,14 @@ static void flush_hist_samples(FILE *f, int hist_coarseness, void *samples,
        if (!sample_size)
                return;
 
-       s = __get_sample(samples, 0, 0);
+       s = __get_sample(samples, 0, 0, 0);
        log_offset = (s->__ddir & LOG_OFFSET_SAMPLE_BIT) != 0;
+       log_issue_time = (s->__ddir & LOG_ISSUE_TIME_SAMPLE_BIT) != 0;
 
-       nr_samples = sample_size / __log_entry_sz(log_offset);
+       nr_samples = sample_size / __log_entry_sz(log_offset, log_issue_time);
 
        for (i = 0; i < nr_samples; i++) {
-               s = __get_sample(samples, log_offset, i);
+               s = __get_sample(samples, log_offset, log_issue_time, i);
 
                entry = s->data.plat_entry;
                io_u_plat = entry->io_u_plat;
@@ -996,91 +1014,101 @@ static void flush_hist_samples(FILE *f, int hist_coarseness, void *samples,
        }
 }
 
+static int print_sample_fields(char **p, size_t *left, const char *fmt, ...) {
+       va_list ap;
+       int ret;
+
+       va_start(ap, fmt);
+       ret = vsnprintf(*p, *left, fmt, ap);
+       if (ret < 0 || ret >= *left) {
+               log_err("sample file write failed: %d\n", ret);
+               va_end(ap);
+               return -1;
+       }
+       va_end(ap);
+
+       *p += ret;
+       *left -= ret;
+
+       return 0;
+}
+
+/*
+ * flush_samples - Generate output for log samples
+ * Each sample output is built using a temporary buffer. This buffer size
+ * assumptions are:
+ * - Each sample has less than 10 fields
+ * - Each sample field fits in 25 characters (20 digits for 64 bit number
+ *   and a few bytes delimiter)
+ */
 void flush_samples(FILE *f, void *samples, uint64_t sample_size)
 {
        struct io_sample *s;
-       int log_offset, log_prio, log_avg_max;
+       bool log_offset, log_prio, log_avg_max, log_issue_time;
        uint64_t i, nr_samples;
-       unsigned int prio_val;
-       const char *fmt;
+       char buf[256];
+       char *p;
+       size_t left;
+       int ret;
 
        if (!sample_size)
                return;
 
-       s = __get_sample(samples, 0, 0);
+       s = __get_sample(samples, 0, 0, 0);
        log_offset = (s->__ddir & LOG_OFFSET_SAMPLE_BIT) != 0;
        log_prio = (s->__ddir & LOG_PRIO_SAMPLE_BIT) != 0;
        log_avg_max = (s->__ddir & LOG_AVG_MAX_SAMPLE_BIT) != 0;
+       log_issue_time = (s->__ddir & LOG_ISSUE_TIME_SAMPLE_BIT) != 0;
 
-       if (log_offset) {
-               if (log_prio) {
-                       if (log_avg_max)
-                               fmt = "%" PRIu64 ", %" PRId64 ", %" PRId64 ", %u, %llu, %llu, 0x%04x\n";
-                       else
-                               fmt = "%" PRIu64 ", %" PRId64 ", %u, %llu, %llu, 0x%04x\n";
-               } else {
-                       if (log_avg_max)
-                               fmt = "%" PRIu64 ", %" PRId64 ", %" PRId64 ", %u, %llu, %llu, %u\n";
-                       else
-                               fmt = "%" PRIu64 ", %" PRId64 ", %u, %llu, %llu, %u\n";
-               }
-       } else {
-               if (log_prio) {
-                       if (log_avg_max)
-                               fmt = "%" PRIu64 ", %" PRId64 ", %" PRId64 ", %u, %llu, 0x%04x\n";
-                       else
-                               fmt = "%" PRIu64 ", %" PRId64 ", %u, %llu, 0x%04x\n";
-               } else {
-                       if (log_avg_max)
-                               fmt = "%" PRIu64 ", %" PRId64 ", %" PRId64 ", %u, %llu, %u\n";
-                       else
-                               fmt = "%" PRIu64 ", %" PRId64 ", %u, %llu, %u\n";
-               }
-       }
-
-       nr_samples = sample_size / __log_entry_sz(log_offset);
+       nr_samples = sample_size / __log_entry_sz(log_offset, log_issue_time);
 
        for (i = 0; i < nr_samples; i++) {
-               s = __get_sample(samples, log_offset, i);
+               s = __get_sample(samples, log_offset, log_issue_time, i);
+               p = buf;
+               left = sizeof(buf);
+
+               ret = print_sample_fields(&p, &left, "%" PRIu64 ", %" PRId64,
+                                         s->time, s->data.val.val0);
+               if (ret)
+                       return;
+
+               if (log_avg_max) {
+                       ret = print_sample_fields(&p, &left, ", %" PRId64,
+                                                 s->data.val.val1);
+                       if (ret)
+                               return;
+               }
+
+               ret = print_sample_fields(&p, &left, ", %u, %llu",
+                                         io_sample_ddir(s),
+                                         (unsigned long long) s->bs);
+               if (ret)
+                       return;
+
+               if (log_offset) {
+                       ret = print_sample_fields(&p, &left, ", %llu",
+                                                 (unsigned long long) s->aux[IOS_AUX_OFFSET_INDEX]);
+                       if (ret)
+                               return;
+               }
 
                if (log_prio)
-                       prio_val = s->priority;
+                       ret = print_sample_fields(&p, &left, ", 0x%04x",
+                                                 s->priority);
                else
-                       prio_val = ioprio_value_is_class_rt(s->priority);
-
-               if (!log_offset) {
-                       if (log_avg_max)
-                               fprintf(f, fmt,
-                                       s->time,
-                                       s->data.val.val0,
-                                       s->data.val.val1,
-                                       io_sample_ddir(s), (unsigned long long) s->bs,
-                                       prio_val);
-                       else
-                               fprintf(f, fmt,
-                                       s->time,
-                                       s->data.val.val0,
-                                       io_sample_ddir(s), (unsigned long long) s->bs,
-                                       prio_val);
-               } else {
-                       struct io_sample_offset *so = (void *) s;
-
-                       if (log_avg_max)
-                               fprintf(f, fmt,
-                                       s->time,
-                                       s->data.val.val0,
-                                       s->data.val.val1,
-                                       io_sample_ddir(s), (unsigned long long) s->bs,
-                                       (unsigned long long) so->offset,
-                                       prio_val);
-                       else
-                               fprintf(f, fmt,
-                                       s->time,
-                                       s->data.val.val0,
-                                       io_sample_ddir(s), (unsigned long long) s->bs,
-                                       (unsigned long long) so->offset,
-                                       prio_val);
+                       ret = print_sample_fields(&p, &left, ", %u",
+                                                 ioprio_value_is_class_rt(s->priority));
+               if (ret)
+                       return;
+
+               if (log_issue_time) {
+                       ret = print_sample_fields(&p, &left, ", %llu",
+                                                 (unsigned long long) s->aux[IOS_AUX_ISSUE_TIME_INDEX]);
+                       if (ret)
+                               return;
                }
+
+               fprintf(f, "%s\n", buf);
        }
 }
 
diff --git a/iolog.h b/iolog.h
index 26dd5cca81f388e194199caf4197dcf9368592b7..e864d169900526d5535ab4311359df1ae88ae32b 100644 (file)
--- a/iolog.h
+++ b/iolog.h
@@ -54,11 +54,15 @@ struct io_sample {
        uint32_t __ddir;
        uint16_t priority;
        uint64_t bs;
+       uint64_t aux[];
 };
 
-struct io_sample_offset {
-       struct io_sample s;
-       uint64_t offset;
+/*
+ * Enumerate indexes of auxiliary log data in struct io_sample aux[] array
+ */
+enum {
+       IOS_AUX_OFFSET_INDEX,
+       IOS_AUX_ISSUE_TIME_INDEX,
 };
 
 enum {
@@ -119,6 +123,11 @@ struct io_log {
         */
        unsigned int log_prio;
 
+       /*
+        * Log I/O issuing time
+        */
+       unsigned int log_issue_time;
+
        /*
         * Max size of log entries before a chunk is compressed
         */
@@ -168,9 +177,14 @@ struct io_log {
  * If the bit following prioity sample vit is set, we report both avg and max
  */
 #define LOG_AVG_MAX_SAMPLE_BIT 0x20000000U
+/*
+ * If the bit following AVG_MAX_SAMPLE_BIT is set, we report the issue time also
+ */
+#define LOG_ISSUE_TIME_SAMPLE_BIT      0x10000000U
 
 #define LOG_SAMPLE_BITS                (LOG_OFFSET_SAMPLE_BIT | LOG_PRIO_SAMPLE_BIT |\
-                                       LOG_AVG_MAX_SAMPLE_BIT)
+                                       LOG_AVG_MAX_SAMPLE_BIT |\
+                                       LOG_ISSUE_TIME_SAMPLE_BIT)
 #define io_sample_ddir(io)     ((io)->__ddir & ~LOG_SAMPLE_BITS)
 
 static inline void io_sample_set_ddir(struct io_log *log,
@@ -180,17 +194,22 @@ static inline void io_sample_set_ddir(struct io_log *log,
        io->__ddir = ddir | log->log_ddir_mask;
 }
 
-static inline size_t __log_entry_sz(int log_offset)
+static inline size_t __log_entry_sz(bool log_offset, bool log_issue_time)
 {
+       size_t ret = sizeof(struct io_sample);
+
        if (log_offset)
-               return sizeof(struct io_sample_offset);
-       else
-               return sizeof(struct io_sample);
+               ret += sizeof(uint64_t);
+
+       if (log_issue_time)
+               ret += sizeof(uint64_t);
+
+       return ret;
 }
 
 static inline size_t log_entry_sz(struct io_log *log)
 {
-       return __log_entry_sz(log->log_offset);
+       return __log_entry_sz(log->log_offset, log->log_issue_time);
 }
 
 static inline size_t log_sample_sz(struct io_log *log, struct io_logs *cur_log)
@@ -198,10 +217,12 @@ static inline size_t log_sample_sz(struct io_log *log, struct io_logs *cur_log)
        return cur_log->nr_samples * log_entry_sz(log);
 }
 
-static inline struct io_sample *__get_sample(void *samples, int log_offset,
+static inline struct io_sample *__get_sample(void *samples, bool log_offset,
+                                            bool log_issue_time,
                                             uint64_t sample)
 {
-       uint64_t sample_offset = sample * __log_entry_sz(log_offset);
+       uint64_t sample_offset = sample *
+               __log_entry_sz(log_offset, log_issue_time);
        return (struct io_sample *) ((char *) samples + sample_offset);
 }
 
@@ -214,7 +235,8 @@ static inline struct io_sample *get_sample(struct io_log *iolog,
                                           struct io_logs *cur_log,
                                           uint64_t sample)
 {
-       return __get_sample(cur_log->log, iolog->log_offset, sample);
+       return __get_sample(cur_log->log,
+                           iolog->log_offset, iolog->log_issue_time, sample);
 }
 
 enum {
@@ -290,6 +312,7 @@ struct log_params {
        int log_type;
        int log_offset;
        int log_prio;
+       int log_issue_time;
        int log_gz;
        int log_gz_store;
        int log_compress;
index d0c6bf8f5667ba69bb42f54a5e4ea4a0d5fe211a..2596ae5a98900d5a5b8e7507b9025cddb3cbdd16 100644 (file)
--- a/libfio.c
+++ b/libfio.c
@@ -101,7 +101,6 @@ static void reset_io_counters(struct thread_data *td, int all)
 
        td->zone_bytes = 0;
 
-       td->last_was_sync = false;
        td->rwmix_issues = 0;
 
        /*
index bebb4a5133a8615fa341c83d0dddcc80c38ba1b4..f6acf88fefe4aced2474022a692aa04ae11f60fc 100644 (file)
@@ -141,10 +141,6 @@ static const struct opt_group fio_opt_cat_groups[] = {
                .name   = "RDMA I/O engine", /* rdma */
                .mask   = FIO_OPT_G_RDMA,
        },
-       {
-               .name   = "librpma I/O engines", /* librpma_apm && librpma_gpspm */
-               .mask   = FIO_OPT_G_LIBRPMA,
-       },
        {
                .name   = "libaio I/O engine", /* libaio */
                .mask   = FIO_OPT_G_LIBAIO,
index 024b902f63ae5734190f08ad4faa206372d61faa..eb5e6f35eb2ffdca5638c33c3604fdf05e6a0946 100644 (file)
@@ -52,7 +52,6 @@ enum opt_category_group {
        __FIO_OPT_G_E4DEFRAG,
        __FIO_OPT_G_NETIO,
        __FIO_OPT_G_RDMA,
-       __FIO_OPT_G_LIBRPMA,
        __FIO_OPT_G_LIBAIO,
        __FIO_OPT_G_ACT,
        __FIO_OPT_G_LATPROF,
@@ -100,7 +99,6 @@ enum opt_category_group {
        FIO_OPT_G_E4DEFRAG      = (1ULL << __FIO_OPT_G_E4DEFRAG),
        FIO_OPT_G_NETIO         = (1ULL << __FIO_OPT_G_NETIO),
        FIO_OPT_G_RDMA          = (1ULL << __FIO_OPT_G_RDMA),
-       FIO_OPT_G_LIBRPMA       = (1ULL << __FIO_OPT_G_LIBRPMA),
        FIO_OPT_G_LIBAIO        = (1ULL << __FIO_OPT_G_LIBAIO),
        FIO_OPT_G_ACT           = (1ULL << __FIO_OPT_G_ACT),
        FIO_OPT_G_LATPROF       = (1ULL << __FIO_OPT_G_LATPROF),
index f5d221c776317fe50e2e0229a8e9564d244b8db4..c35878f7bf6d9c9f3287e7d13e6e447ed7b89f68 100644 (file)
--- a/options.c
+++ b/options.c
@@ -263,28 +263,61 @@ static int fio_fdp_cmp(const void *p1, const void *p2)
 static int str_fdp_pli_cb(void *data, const char *input)
 {
        struct thread_data *td = cb_data_to_td(data);
-       char *str, *p, *v;
-       int i = 0;
+       char *str, *p, *id1;
+       int i = 0, ret = 0;
 
        p = str = strdup(input);
        strip_blank_front(&str);
        strip_blank_end(str);
 
-       while ((v = strsep(&str, ",")) != NULL && i < FIO_MAX_DP_IDS) {
-               unsigned long long id = strtoull(v, NULL, 0);
-               if (id > 0xFFFF) {
-                       log_err("Placement IDs cannot exceed 0xFFFF\n");
-                       free(p);
-                       return 1;
+       while ((id1 = strsep(&str, ",")) != NULL) {
+               char *str2, *id2;
+               unsigned int start, end;
+
+               if (!strlen(id1))
+                       break;
+
+               str2 = id1;
+               end = -1;
+               while ((id2 = strsep(&str2, "-")) != NULL) {
+                       if (!strlen(id2))
+                               break;
+
+                       end = strtoull(id2, NULL, 0);
                }
-               td->o.dp_ids[i++] = id;
+
+               start = strtoull(id1, NULL, 0);
+               if (end == -1)
+                       end = start;
+               if (start > end) {
+                       ret = 1;
+                       break;
+               }
+
+               while (start <= end) {
+                       if (i >= FIO_MAX_DP_IDS) {
+                               log_err("fio: only %d IDs supported\n", FIO_MAX_DP_IDS);
+                               ret = 1;
+                               break;
+                       }
+                       if (start > 0xFFFF) {
+                               log_err("Placement IDs cannot exceed 0xFFFF\n");
+                               ret = 1;
+                               break;
+                       }
+                       td->o.dp_ids[i++] = start++;
+               }
+
+               if (ret)
+                       break;
        }
+
        free(p);
 
        qsort(td->o.dp_ids, i, sizeof(*td->o.dp_ids), fio_fdp_cmp);
        td->o.dp_nr_ids = i;
 
-       return 0;
+       return ret;
 }
 
 /* str_dp_scheme_cb() is a callback function for parsing the fdp_scheme option
@@ -532,7 +565,11 @@ static int ignore_error_type(struct thread_data *td, enum error_type_bit etype,
                if (fname[0] == 'E') {
                        error[i] = str2error(fname);
                } else {
-                       error[i] = atoi(fname);
+                       int base = 10;
+                       if (!strncmp(fname, "0x", 2) ||
+                                       !strncmp(fname, "0X", 2))
+                               base = 16;
+                       error[i] = strtol(fname, NULL, base);
                        if (error[i] < 0)
                                error[i] = -error[i];
                }
@@ -2155,16 +2192,6 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                            .help = "RDMA IO engine",
                          },
 #endif
-#ifdef CONFIG_LIBRPMA_APM
-                         { .ival = "librpma_apm",
-                           .help = "librpma IO engine in APM mode",
-                         },
-#endif
-#ifdef CONFIG_LIBRPMA_GPSPM
-                         { .ival = "librpma_gpspm",
-                           .help = "librpma IO engine in GPSPM mode",
-                         },
-#endif
 #ifdef CONFIG_LINUX_EXT4_MOVE_EXTENT
                          { .ival = "e4defrag",
                            .help = "ext4 defrag engine",
@@ -2899,6 +2926,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                .category = FIO_OPT_C_IO,
                .group  = FIO_OPT_G_IO_TYPE,
        },
+#ifdef FIO_HAVE_RWF_ATOMIC
        {
                .name   = "atomic",
                .lname  = "Atomic I/O",
@@ -2909,6 +2937,7 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                .category = FIO_OPT_C_IO,
                .group  = FIO_OPT_G_IO_TYPE,
        },
+#endif
        {
                .name   = "buffered",
                .lname  = "Buffered I/O",
@@ -3368,6 +3397,17 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                .category = FIO_OPT_C_IO,
                .group  = FIO_OPT_G_VERIFY,
        },
+       {
+               .name   = "verify_write_sequence",
+               .lname  = "Verify write sequence number",
+               .off1   = offsetof(struct thread_options, verify_write_sequence),
+               .type   = FIO_OPT_BOOL,
+               .def    = "1",
+               .help   = "Verify header write sequence number",
+               .parent = "verify",
+               .category = FIO_OPT_C_IO,
+               .group  = FIO_OPT_G_VERIFY,
+       },
 #ifdef FIO_HAVE_TRIM
        {
                .name   = "trim_percentage",
@@ -4690,6 +4730,16 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
                .category = FIO_OPT_C_LOG,
                .group  = FIO_OPT_G_INVALID,
        },
+       {
+               .name   = "log_issue_time",
+               .lname  = "Log IO issue time",
+               .type   = FIO_OPT_BOOL,
+               .off1   = offsetof(struct thread_options, log_issue_time),
+               .help   = "Include IO issue time for each log entry",
+               .def    = "0",
+               .category = FIO_OPT_C_LOG,
+               .group  = FIO_OPT_G_INVALID,
+       },
 #ifdef CONFIG_ZLIB
        {
                .name   = "log_compression",
index c5cd6515813146d4f63e0d17427a1b8ee2d9efe1..ead8295c4413545324056bdce6f2839e9cb365c1 100644 (file)
@@ -62,6 +62,7 @@
 #define FIO_HAVE_BYTEORDER_FUNCS
 #define FIO_HAVE_PWRITEV2
 #define FIO_HAVE_SHM_ATTACH_REMOVED
+#define FIO_HAVE_RWF_ATOMIC
 
 #ifdef MAP_HUGETLB
 #define FIO_HAVE_MMAP_HUGE
@@ -328,8 +329,8 @@ static inline int fio_set_sched_idle(void)
 #define RWF_NOWAIT     0x00000008
 #endif
 
-#ifndef RWF_UNCACHED
-#define RWF_UNCACHED   0x00000040
+#ifndef RWF_ATOMIC
+#define RWF_ATOMIC     0x00000040
 #endif
 
 #ifndef RWF_WRITE_LIFE_SHIFT
diff --git a/os/os-qnx.h b/os/os-qnx.h
new file mode 100755 (executable)
index 0000000..8ae9695
--- /dev/null
@@ -0,0 +1,105 @@
+#ifndef FIO_OS_QNX_H
+#define FIO_OS_QNX_H
+
+#define        FIO_OS  os_qnx
+#include <stdint.h>
+#include <stdio.h>
+#include <errno.h>
+#include <sys/param.h>
+#include <sys/statvfs.h>
+#include <sys/ioctl.h>
+#include <sys/utsname.h>
+#include <sys/syspage.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/dcmd_cam.h>
+
+/* XXX hack to avoid conflicts between rbtree.h and <sys/tree.h> */
+#undef RB_BLACK
+#undef RB_RED
+#undef RB_ROOT
+
+#include "../file.h"
+
+/* QNX is not supporting SA_RESTART. Use SA_NOCLDSTOP instead of it */
+#ifndef SA_RESTART
+#define SA_RESTART SA_NOCLDSTOP
+#endif
+
+#define FIO_NO_HAVE_SHM_H
+
+typedef uint64_t __u64;
+typedef unsigned int __u32;
+
+#define FIO_USE_GENERIC_INIT_RANDOM_STATE
+#define FIO_HAVE_FS_STAT
+#define FIO_HAVE_GETTID
+
+#define OS_MAP_ANON            MAP_ANON
+
+#ifndef PTHREAD_STACK_MIN
+#define PTHREAD_STACK_MIN 4096
+#endif
+
+#define fio_swap16(x)  swap16(x)
+#define fio_swap32(x)  swap32(x)
+#define fio_swap64(x)  swap64(x)
+
+#ifdef CONFIG_PTHREAD_GETAFFINITY
+#define FIO_HAVE_GET_THREAD_AFFINITY
+#define fio_get_thread_affinity(mask)  \
+       pthread_getaffinity_np(pthread_self(), sizeof(mask), &(mask))
+#endif
+
+static inline int blockdev_size(struct fio_file *f, unsigned long long *bytes)
+{
+       struct stat statbuf;
+
+       if (fstat(f->fd, &statbuf) == -1) {
+               *bytes = 0;
+               return errno;
+       }
+
+       *bytes = (unsigned long long)(statbuf.st_blocksize * statbuf.st_nblocks);
+       return 0;
+}
+
+static inline int blockdev_invalidate_cache(struct fio_file *f)
+{
+       return ENOTSUP;
+}
+
+static inline unsigned long long os_phys_mem(void)
+{
+       uint64_t mem = 0;
+       const char *const strings = SYSPAGE_ENTRY(strings)->data;
+       const struct asinfo_entry *const begin = SYSPAGE_ENTRY(asinfo);
+       const struct asinfo_entry *const end = begin + SYSPAGE_ENTRY_SIZE(asinfo) / SYSPAGE_ELEMENT_SIZE(asinfo);
+
+       assert(SYSPAGE_ELEMENT_SIZE(asinfo) == sizeof(struct asinfo_entry));
+
+       for (const struct asinfo_entry *e = begin; e < end; ++e) {
+               if (!strcmp(strings + e->name, "ram"))
+                       mem += e->end - e->start + 1;
+       }
+       return mem;
+}
+
+static inline unsigned long long get_fs_free_size(const char *path)
+{
+       unsigned long long ret;
+       struct statvfs s;
+
+       if (statvfs(path, &s) < 0)
+               return -1ULL;
+
+       ret = s.f_frsize;
+       ret *= (unsigned long long) s.f_bfree;
+       return ret;
+}
+
+#ifdef MADV_FREE
+#define FIO_MADV_FREE  MADV_FREE
+#endif
+
+#endif
diff --git a/os/os.h b/os/os.h
index 0f1823240faa9f11e638b95c2207f03f5480b4ff..d54e7c0dd7402c4d4f653dd3f456c0e9a06763c3 100644 (file)
--- a/os/os.h
+++ b/os/os.h
@@ -24,6 +24,7 @@ enum {
        os_windows,
        os_android,
        os_dragonfly,
+       os_qnx,
 
        os_nr,
 };
@@ -39,6 +40,8 @@ typedef enum {
 #include "os-freebsd.h"
 #elif defined(__OpenBSD__)
 #include "os-openbsd.h"
+#elif defined(__QNX__)
+#include "os-qnx.h"
 #elif defined(__NetBSD__)
 #include "os-netbsd.h"
 #elif defined(__sun__)
index a47223daaf761e0a5ae5f0843195ea93de62d011..2ce18b8bdb8dae781f52dcae15d4393bd184d34f 100644 (file)
@@ -873,10 +873,12 @@ ssize_t writev(int fildes, const struct iovec *iov, int iovcnt)
        return bytes_written;
 }
 
+#ifndef _WIN32
 long long strtoll(const char *restrict str, char **restrict endptr, int base)
 {
        return _strtoi64(str, endptr, base);
 }
+#endif
 
 int poll(struct pollfd fds[], nfds_t nfds, int timeout)
 {
index afaeb3482b0fd0072c123bf78b925b5a35176bf1..5967f421d4868fe9d05f060c23a262d0e40d1e71 100644 (file)
--- a/server.c
+++ b/server.c
@@ -2295,11 +2295,13 @@ int fio_send_iolog(struct thread_data *td, struct io_log *log, const char *name)
                        s->__ddir       = __cpu_to_le32(s->__ddir);
                        s->bs           = cpu_to_le64(s->bs);
 
-                       if (log->log_offset) {
-                               struct io_sample_offset *so = (void *) s;
+                       if (log->log_offset)
+                               s->aux[IOS_AUX_OFFSET_INDEX] =
+                                       cpu_to_le64(s->aux[IOS_AUX_OFFSET_INDEX]);
 
-                               so->offset = cpu_to_le64(so->offset);
-                       }
+                       if (log->log_issue_time)
+                               s->aux[IOS_AUX_ISSUE_TIME_INDEX] =
+                                       cpu_to_le64(s->aux[IOS_AUX_ISSUE_TIME_INDEX]);
                }
        }
 
index e8659f79209a46c9696f9d4c4fcdf7d99f6e2fe2..449c18cfbcb663ed974941cbf64ddac6892553bf 100644 (file)
--- a/server.h
+++ b/server.h
@@ -51,7 +51,7 @@ struct fio_net_cmd_reply {
 };
 
 enum {
-       FIO_SERVER_VER                  = 105,
+       FIO_SERVER_VER                  = 107,
 
        FIO_SERVER_MAX_FRAGMENT_PDU     = 1024,
        FIO_SERVER_MAX_CMD_MB           = 2048,
@@ -197,6 +197,7 @@ struct cmd_iolog_pdu {
        uint32_t compressed;
        uint32_t log_offset;
        uint32_t log_prio;
+       uint32_t log_issue_time;
        uint32_t log_hist_coarseness;
        uint32_t per_job_logs;
        uint8_t name[FIO_NET_NAME_MAX];
index 23243054ec7ab401ee7d89d73db81d3d809dd612..ac7ef70168cf4bb8a5c260e1b1ffac5fd7badc8b 100644 (file)
--- a/smalloc.c
+++ b/smalloc.c
@@ -566,6 +566,10 @@ void *smalloc(size_t size)
 
 void *scalloc(size_t nmemb, size_t size)
 {
+       /*
+        * smalloc_pool (called by smalloc) will zero the memory, so we don't
+        * need to do it here.
+        */
        return smalloc(nmemb * size);
 }
 
diff --git a/stat.c b/stat.c
index b98e8b27c3b0a70b2cb220411406665ac96d68f5..c5413f2f138e6b231bf4114796873da3aa11874d 100644 (file)
--- a/stat.c
+++ b/stat.c
 #define LOG_MSEC_SLACK 1
 #endif
 
+struct log_sample {
+       union io_sample_data data;
+       uint32_t ddir;
+       uint64_t bs;
+       uint64_t offset;
+       uint16_t priority;
+       uint64_t issue_time;
+};
+
 struct fio_sem *stat_sem;
 
 void clear_rusage_stat(struct thread_data *td)
@@ -3031,17 +3040,15 @@ static struct io_logs *get_cur_log(struct io_log *iolog)
        return iolog->pending;
 }
 
-static void __add_log_sample(struct io_log *iolog, union io_sample_data data,
-                            enum fio_ddir ddir, unsigned long long bs,
-                            unsigned long t, uint64_t offset,
-                            unsigned int priority)
+static void __add_log_sample(struct io_log *iolog, unsigned long t,
+                            struct log_sample *sample)
 {
        struct io_logs *cur_log;
 
        if (iolog->disabled)
                return;
        if (flist_empty(&iolog->io_logs))
-               iolog->avg_last[ddir] = t;
+               iolog->avg_last[sample->ddir] = t;
 
        cur_log = get_cur_log(iolog);
        if (cur_log) {
@@ -3049,19 +3056,19 @@ static void __add_log_sample(struct io_log *iolog, union io_sample_data data,
 
                s = get_sample(iolog, cur_log, cur_log->nr_samples);
 
-               s->data = data;
+               s->data = sample->data;
                s->time = t;
                if (iolog->td && iolog->td->o.log_alternate_epoch)
                        s->time += iolog->td->alternate_epoch;
-               io_sample_set_ddir(iolog, s, ddir);
-               s->bs = bs;
-               s->priority = priority;
+               io_sample_set_ddir(iolog, s, sample->ddir);
+               s->bs = sample->bs;
+               s->priority = sample->priority;
 
-               if (iolog->log_offset) {
-                       struct io_sample_offset *so = (void *) s;
+               if (iolog->log_offset)
+                       s->aux[IOS_AUX_OFFSET_INDEX] = sample->offset;
 
-                       so->offset = offset;
-               }
+               if (iolog->log_issue_time)
+                       s->aux[IOS_AUX_ISSUE_TIME_INDEX] = sample->issue_time;
 
                cur_log->nr_samples++;
                return;
@@ -3157,20 +3164,21 @@ static void __add_stat_to_log(struct io_log *iolog, enum fio_ddir ddir,
         * had actual samples done.
         */
        if (iolog->avg_window[ddir].samples) {
-               union io_sample_data data;
+               struct log_sample sample = { {{ 0, 0 }}, ddir, 0, 0, 0, 0 };
+               union io_sample_data *d = &sample.data;
 
                if (log_max == IO_LOG_SAMPLE_AVG) {
-                       data.val.val0 = iolog->avg_window[ddir].mean.u.f + 0.50;
-                       data.val.val1 = 0;
+                       d->val.val0 = iolog->avg_window[ddir].mean.u.f + 0.50;
+                       d->val.val1 = 0;
                } else if (log_max == IO_LOG_SAMPLE_MAX) {
-                       data.val.val0 = iolog->avg_window[ddir].max_val;
-                       data.val.val1 = 0;
+                       d->val.val0 = iolog->avg_window[ddir].max_val;
+                       d->val.val1 = 0;
                } else {
-                       data.val.val0 = iolog->avg_window[ddir].mean.u.f + 0.50;
-                       data.val.val1 = iolog->avg_window[ddir].max_val;
+                       d->val.val0 = iolog->avg_window[ddir].mean.u.f + 0.50;
+                       d->val.val1 = iolog->avg_window[ddir].max_val;
                }
 
-               __add_log_sample(iolog, data, ddir, 0, elapsed, 0, 0);
+               __add_log_sample(iolog, elapsed, &sample);
        }
 
        reset_io_stat(&iolog->avg_window[ddir]);
@@ -3187,11 +3195,10 @@ static void _add_stat_to_log(struct io_log *iolog, unsigned long elapsed,
 
 static unsigned long add_log_sample(struct thread_data *td,
                                    struct io_log *iolog,
-                                   union io_sample_data data,
-                                   enum fio_ddir ddir, unsigned long long bs,
-                                   uint64_t offset, unsigned int ioprio)
+                                   struct log_sample *sample)
 {
        unsigned long elapsed, this_window;
+       enum fio_ddir ddir = sample->ddir;
 
        if (!ddir_rw(ddir))
                return 0;
@@ -3202,8 +3209,7 @@ static unsigned long add_log_sample(struct thread_data *td,
         * If no time averaging, just add the log sample.
         */
        if (!iolog->avg_msec) {
-               __add_log_sample(iolog, data, ddir, bs, elapsed, offset,
-                                ioprio);
+               __add_log_sample(iolog, elapsed, sample);
                return 0;
        }
 
@@ -3211,7 +3217,7 @@ static unsigned long add_log_sample(struct thread_data *td,
         * Add the sample. If the time period has passed, then
         * add that entry to the log and clear.
         */
-       add_stat_sample(&iolog->avg_window[ddir], data.val.val0);
+       add_stat_sample(&iolog->avg_window[ddir], sample->data.val.val0);
 
        /*
         * If period hasn't passed, adding the above sample is all we
@@ -3256,12 +3262,13 @@ void add_agg_sample(union io_sample_data data, enum fio_ddir ddir,
                    unsigned long long bs)
 {
        struct io_log *iolog;
+       struct log_sample sample = { data, ddir, bs, 0, 0, 0 };
 
        if (!ddir_rw(ddir))
                return;
 
        iolog = agg_io_log[ddir];
-       __add_log_sample(iolog, data, ddir, bs, mtime_since_genesis(), 0, 0);
+       __add_log_sample(iolog, mtime_since_genesis(), &sample);
 }
 
 void add_sync_clat_sample(struct thread_stat *ts, unsigned long long nsec)
@@ -3297,17 +3304,25 @@ add_lat_percentile_prio_sample(struct thread_stat *ts, unsigned long long nsec,
 
 void add_clat_sample(struct thread_data *td, enum fio_ddir ddir,
                     unsigned long long nsec, unsigned long long bs,
-                    uint64_t offset, unsigned int ioprio,
-                    unsigned short clat_prio_index)
+                    struct io_u *io_u)
 {
        const bool needs_lock = td_async_processing(td);
        unsigned long elapsed, this_window;
        struct thread_stat *ts = &td->ts;
        struct io_log *iolog = td->clat_hist_log;
+       uint64_t offset = 0;
+       unsigned int ioprio = 0;
+       unsigned short clat_prio_index = 0;
 
        if (needs_lock)
                __td_io_u_lock(td);
 
+       if (io_u) {
+               offset = io_u->offset;
+               ioprio = io_u->ioprio;
+               clat_prio_index = io_u->clat_prio_index;
+       }
+
        add_stat_sample(&ts->clat_stat[ddir], nsec);
 
        /*
@@ -3323,9 +3338,16 @@ void add_clat_sample(struct thread_data *td, enum fio_ddir ddir,
                add_stat_prio_sample(ts->clat_prio[ddir], clat_prio_index,
                                     nsec);
 
-       if (td->clat_log)
-               add_log_sample(td, td->clat_log, sample_val(nsec), ddir, bs,
-                              offset, ioprio);
+       if (td->clat_log) {
+               struct log_sample sample = { sample_val(nsec), ddir, bs,
+                       offset, ioprio, 0 };
+
+               if (io_u)
+                       sample.issue_time =
+                               ntime_since(&td->epoch, &io_u->issue_time);
+
+               add_log_sample(td, td->clat_log, &sample);
+       }
 
        if (ts->clat_percentiles) {
                /*
@@ -3351,6 +3373,8 @@ void add_clat_sample(struct thread_data *td, enum fio_ddir ddir,
                if (this_window >= iolog->hist_msec) {
                        uint64_t *io_u_plat;
                        struct io_u_plat_entry *dst;
+                       struct log_sample sample = { {{ 0, 0 }}, ddir, bs,
+                               offset, ioprio, 0 };
 
                        /*
                         * Make a byte-for-byte copy of the latency histogram
@@ -3364,8 +3388,9 @@ void add_clat_sample(struct thread_data *td, enum fio_ddir ddir,
                        memcpy(&(dst->io_u_plat), io_u_plat,
                                FIO_IO_U_PLAT_NR * sizeof(uint64_t));
                        flist_add(&dst->list, &hw->list);
-                       __add_log_sample(iolog, sample_plat(dst), ddir, bs,
-                                        elapsed, offset, ioprio);
+
+                       sample.data = sample_plat(dst);
+                       __add_log_sample(iolog, elapsed, &sample);
 
                        /*
                         * Update the last time we recorded as being now, minus
@@ -3381,24 +3406,31 @@ void add_clat_sample(struct thread_data *td, enum fio_ddir ddir,
                __td_io_u_unlock(td);
 }
 
-void add_slat_sample(struct thread_data *td, enum fio_ddir ddir,
-                    unsigned long long nsec, unsigned long long bs,
-                    uint64_t offset, unsigned int ioprio)
+void add_slat_sample(struct thread_data *td, struct io_u *io_u)
 {
        const bool needs_lock = td_async_processing(td);
        struct thread_stat *ts = &td->ts;
+       enum fio_ddir ddir;
+       unsigned long long nsec;
 
+       ddir = io_u->ddir;
        if (!ddir_rw(ddir))
                return;
 
        if (needs_lock)
                __td_io_u_lock(td);
 
+       nsec = ntime_since(&io_u->start_time, &io_u->issue_time);
+
        add_stat_sample(&ts->slat_stat[ddir], nsec);
 
-       if (td->slat_log)
-               add_log_sample(td, td->slat_log, sample_val(nsec), ddir, bs,
-                              offset, ioprio);
+       if (td->slat_log) {
+               struct log_sample sample = { sample_val(nsec), ddir,
+                       io_u->xfer_buflen, io_u->offset, io_u->ioprio,
+                       ntime_since(&td->epoch, &io_u->issue_time) };
+
+               add_log_sample(td, td->slat_log, &sample);
+       }
 
        if (ts->slat_percentiles)
                add_lat_percentile_sample(ts, nsec, ddir, FIO_SLAT);
@@ -3409,8 +3441,7 @@ void add_slat_sample(struct thread_data *td, enum fio_ddir ddir,
 
 void add_lat_sample(struct thread_data *td, enum fio_ddir ddir,
                    unsigned long long nsec, unsigned long long bs,
-                   uint64_t offset, unsigned int ioprio,
-                   unsigned short clat_prio_index)
+                   struct io_u * io_u)
 {
        const bool needs_lock = td_async_processing(td);
        struct thread_stat *ts = &td->ts;
@@ -3423,9 +3454,12 @@ void add_lat_sample(struct thread_data *td, enum fio_ddir ddir,
 
        add_stat_sample(&ts->lat_stat[ddir], nsec);
 
-       if (td->lat_log)
-               add_log_sample(td, td->lat_log, sample_val(nsec), ddir, bs,
-                              offset, ioprio);
+       if (td->lat_log) {
+               struct log_sample sample = { sample_val(nsec), ddir, bs,
+                       io_u->offset, io_u->ioprio, 0 };
+
+               add_log_sample(td, td->lat_log, &sample);
+       }
 
        /*
         * When lat_percentiles=1 (default 0), the reported per priority
@@ -3439,8 +3473,9 @@ void add_lat_sample(struct thread_data *td, enum fio_ddir ddir,
         */
        if (ts->lat_percentiles) {
                add_lat_percentile_sample(ts, nsec, ddir, FIO_LAT);
-               add_lat_percentile_prio_sample(ts, nsec, ddir, clat_prio_index);
-               add_stat_prio_sample(ts->clat_prio[ddir], clat_prio_index,
+               add_lat_percentile_prio_sample(ts, nsec, ddir,
+                                              io_u->clat_prio_index);
+               add_stat_prio_sample(ts->clat_prio[ddir], io_u->clat_prio_index,
                                     nsec);
        }
        if (needs_lock)
@@ -3464,9 +3499,12 @@ void add_bw_sample(struct thread_data *td, struct io_u *io_u,
 
        add_stat_sample(&ts->bw_stat[io_u->ddir], rate);
 
-       if (td->bw_log)
-               add_log_sample(td, td->bw_log, sample_val(rate), io_u->ddir,
-                              bytes, io_u->offset, io_u->ioprio);
+       if (td->bw_log) {
+               struct log_sample sample = { sample_val(rate), io_u->ddir,
+                       bytes, io_u->offset, io_u->ioprio, 0 };
+
+               add_log_sample(td, td->bw_log, &sample);
+       }
 
        td->stat_io_bytes[io_u->ddir] = td->this_io_bytes[io_u->ddir];
 
@@ -3515,13 +3553,12 @@ static int __add_samples(struct thread_data *td, struct timespec *parent_tv,
                add_stat_sample(&stat[ddir], rate);
 
                if (log) {
-                       unsigned long long bs = 0;
+                       struct log_sample sample = {
+                               sample_val(rate), ddir, 0, 0, 0, 0 };
 
                        if (td->o.min_bs[ddir] == td->o.max_bs[ddir])
-                               bs = td->o.min_bs[ddir];
-
-                       next = add_log_sample(td, log, sample_val(rate), ddir,
-                                             bs, 0, 0);
+                               sample.bs = td->o.min_bs[ddir];
+                       next = add_log_sample(td, log, &sample);
                        next_log = min(next_log, next);
                }
 
@@ -3559,9 +3596,12 @@ void add_iops_sample(struct thread_data *td, struct io_u *io_u,
 
        add_stat_sample(&ts->iops_stat[io_u->ddir], 1);
 
-       if (td->iops_log)
-               add_log_sample(td, td->iops_log, sample_val(1), io_u->ddir,
-                              bytes, io_u->offset, io_u->ioprio);
+       if (td->iops_log) {
+               struct log_sample sample = { sample_val(1), io_u->ddir, bytes,
+                       io_u->offset, io_u->ioprio, 0 };
+
+               add_log_sample(td, td->iops_log, &sample);
+       }
 
        td->stat_io_blocks[io_u->ddir] = td->this_io_blocks[io_u->ddir];
 
diff --git a/stat.h b/stat.h
index 0d57cceb217543e596364cfeeefd75edad142dbd..ac74d6c26b4ae7f277f42ef00626ec4f5788b8e7 100644 (file)
--- a/stat.h
+++ b/stat.h
@@ -366,12 +366,13 @@ extern void reset_io_stats(struct thread_data *);
 extern void update_rusage_stat(struct thread_data *);
 extern void clear_rusage_stat(struct thread_data *);
 
-extern void add_lat_sample(struct thread_data *, enum fio_ddir, unsigned long long,
-                          unsigned long long, uint64_t, unsigned int, unsigned short);
-extern void add_clat_sample(struct thread_data *, enum fio_ddir, unsigned long long,
-                           unsigned long long, uint64_t, unsigned int, unsigned short);
-extern void add_slat_sample(struct thread_data *, enum fio_ddir, unsigned long long,
-                               unsigned long long, uint64_t, unsigned int);
+extern void add_lat_sample(struct thread_data *, enum fio_ddir,
+                          unsigned long long, unsigned long long,
+                          struct io_u *);
+extern void add_clat_sample(struct thread_data *, enum fio_ddir,
+                           unsigned long long, unsigned long long,
+                           struct io_u *);
+extern void add_slat_sample(struct thread_data *, struct io_u *);
 extern void add_agg_sample(union io_sample_data, enum fio_ddir, unsigned long long);
 extern void add_iops_sample(struct thread_data *, struct io_u *,
                                unsigned int);
diff --git a/t/jobs/t0033.fio b/t/jobs/t0033.fio
new file mode 100644 (file)
index 0000000..156bdad
--- /dev/null
@@ -0,0 +1,28 @@
+[global]
+rw=read
+filename=t0033file
+size=8k
+time_based
+runtime=2s
+ioengine=libaio
+iodepth=1
+
+[job1]
+write_bw_log=log
+log_prio=1
+
+[job2]
+write_lat_log=log
+log_avg_msec=100
+log_window_value=both
+
+[job3]
+write_iops_log=log
+log_offset=1
+log_prio=1
+
+[job4]
+write_iops_log=log
+log_avg_msec=100
+log_window_value=both
+log_offset=1
diff --git a/t/jobs/t0034.fio b/t/jobs/t0034.fio
new file mode 100644 (file)
index 0000000..2b6c4b2
--- /dev/null
@@ -0,0 +1,27 @@
+[global]
+rw=read
+filename=t0034file
+size=8k
+time_based
+runtime=2s
+ioengine=libaio
+iodepth=1
+
+[job1]
+write_lat_log=log
+log_offset=1
+log_issue_time=1
+
+[job2]
+write_lat_log=log
+log_offset=1
+log_issue_time=1
+log_avg_msec=100
+log_window_value=both
+
+[job3]
+write_lat_log=log
+write_bw_log=log
+write_iops_log=log
+log_offset=1
+log_issue_time=1
diff --git a/t/jobs/t0035.fio b/t/jobs/t0035.fio
new file mode 100644 (file)
index 0000000..fd3488f
--- /dev/null
@@ -0,0 +1,27 @@
+[global]
+size=4k
+write_lat_log=log
+
+[job1]
+ioengine=filecreate
+filename=t0035file1
+
+[job2]
+ioengine=filestat
+filename=t0035file2
+
+[job3]
+ioengine=filedelete
+filename=t0035file3
+
+[job4]
+ioengine=dircreate
+filename=t0035dir1
+
+[job5]
+ioengine=dirstat
+filename=t0035dir2
+
+[job6]
+ioengine=dirdelete
+filename=t0035dir3
index 1ade64dc572a68a691c483b588db21f355399a87..3d90f4bfcd02bd77f49e50fc083bf5a4ad13bda3 100755 (executable)
@@ -87,6 +87,53 @@ class PassThruTest(FioJobCmdTest):
             self.passed = False
 
 
+class FlushTest(FioJobCmdTest):
+    def setup(self, parameters):
+        fio_args = [
+            "--name=nvmept-flush",
+            "--ioengine=io_uring_cmd",
+            "--cmd_type=nvme",
+            "--randrepeat=0",
+            f"--filename={self.fio_opts['filename']}",
+            f"--rw={self.fio_opts['rw']}",
+            f"--output={self.filenames['output']}",
+            f"--output-format={self.fio_opts['output-format']}",
+        ]
+
+        for opt in ['fixedbufs', 'nonvectored', 'force_async', 'registerfiles',
+                    'sqthread_poll', 'sqthread_poll_cpu', 'hipri', 'nowait',
+                    'time_based', 'runtime', 'verify', 'io_size', 'num_range',
+                    'iodepth', 'iodepth_batch', 'iodepth_batch_complete',
+                    'size', 'rate', 'bs', 'bssplit', 'bsrange', 'randrepeat',
+                    'buffer_pattern', 'verify_pattern', 'offset', 'fdp',
+                    'fdp_pli', 'fdp_pli_select', 'dataplacement', 'plid_select',
+                    'plids', 'dp_scheme', 'number_ios', 'read_iolog', 'fsync']:
+            if opt in self.fio_opts:
+                option = f"--{opt}={self.fio_opts[opt]}"
+                fio_args.append(option)
+
+        super().setup(fio_args)
+
+    def check_result(self):
+        super().check_result()
+
+        job = self.json_data['jobs'][0]
+
+        rw = self.fio_opts['rw']
+        fsync = self.fio_opts['fsync']
+
+        nr_write = job['write']['total_ios']
+        nr_sync = job['sync']['total_ios']
+
+        nr_sync_exp = nr_write // fsync
+
+        # The actual number of DDIR_SYNC issued might miss one DDIR_SYNC command
+        # when the last command issued was DDIR_WRITE command.
+        if not ((nr_sync == nr_sync_exp) or (nr_sync + 1 == nr_sync_exp)):
+            logging.error(f"nr_write={nr_write}, nr_sync={nr_sync}, fsync={fsync}")
+            self.passed = False
+
+
 TEST_LIST = [
     {
         "test_id": 1,
@@ -255,6 +302,50 @@ TEST_LIST = [
             },
         "test_class": PassThruTest,
     },
+    {
+        "test_id": 16,
+        "fio_opts": {
+            "rw": 'read',
+            "bs": 4096,
+            "number_ios": 10,
+            "fsync": 1,
+            "output-format": "json",
+            },
+        "test_class": FlushTest,
+    },
+    {
+        "test_id": 17,
+        "fio_opts": {
+            "rw": 'write',
+            "bs": 4096,
+            "number_ios": 10,
+            "fsync": 1,
+            "output-format": "json",
+            },
+        "test_class": FlushTest,
+    },
+    {
+        "test_id": 18,
+        "fio_opts": {
+            "rw": 'readwrite',
+            "bs": 4096,
+            "number_ios": 10,
+            "fsync": 1,
+            "output-format": "json",
+            },
+        "test_class": FlushTest,
+    },
+    {
+        "test_id": 19,
+        "fio_opts": {
+            "rw": 'trimwrite',
+            "bs": 4096,
+            "number_ios": 10,
+            "fsync": 1,
+            "output-format": "json",
+            },
+        "test_class": FlushTest,
+    },
 ]
 
 def parse_args():
index d6a543f286073806533c98e63a0312e6f48cfeed..31a54a1efa6029870098be59a0affdd93e219a85 100755 (executable)
@@ -36,6 +36,10 @@ from pathlib import Path
 from fiotestlib import FioJobCmdTest, run_fio_tests
 from fiotestcommon import SUCCESS_NONZERO
 
+# This needs to match FIO_MAX_DP_IDS and DP_MAX_SCHEME_ENTRIES in
+# dataplacement.h
+FIO_MAX_DP_IDS = 128
+DP_MAX_SCHEME_ENTRIES = 32
 
 class FDPTest(FioJobCmdTest):
     """
@@ -64,7 +68,7 @@ class FDPTest(FioJobCmdTest):
                     'size', 'rate', 'bs', 'bssplit', 'bsrange', 'randrepeat',
                     'buffer_pattern', 'verify_pattern', 'offset', 'fdp',
                     'fdp_pli', 'fdp_pli_select', 'dataplacement', 'plid_select',
-                    'plids', 'dp_scheme', 'number_ios']:
+                    'plids', 'dp_scheme', 'number_ios', 'read_iolog']:
             if opt in self.fio_opts:
                 option = f"--{opt}={self.fio_opts[opt]}"
                 fio_args.append(option)
@@ -130,9 +134,10 @@ class FDPMultiplePLIDTest(FDPTest):
         mapping = {
                     'nruhsd': FIO_FDP_NUMBER_PLIDS,
                     'max_ruamw': FIO_FDP_MAX_RUAMW,
+                    'maxplid': FIO_FDP_NUMBER_PLIDS-1,
                     # parameters for 400, 401 tests
                     'hole_size': 64*1024,
-                    'nios_for_scheme': FIO_FDP_NUMBER_PLIDS//2,
+                    'nios_for_scheme': min(FIO_FDP_NUMBER_PLIDS//2, DP_MAX_SCHEME_ENTRIES),
                 }
         if 'number_ios' in self.fio_opts and isinstance(self.fio_opts['number_ios'], str):
             self.fio_opts['number_ios'] = eval(self.fio_opts['number_ios'].format(**mapping))
@@ -140,6 +145,10 @@ class FDPMultiplePLIDTest(FDPTest):
             self.fio_opts['bs'] = eval(self.fio_opts['bs'].format(**mapping))
         if 'rw' in self.fio_opts and isinstance(self.fio_opts['rw'], str):
             self.fio_opts['rw'] = self.fio_opts['rw'].format(**mapping)
+        if 'plids' in self.fio_opts and isinstance(self.fio_opts['plids'], str):
+            self.fio_opts['plids'] = self.fio_opts['plids'].format(**mapping)
+        if 'fdp_pli' in self.fio_opts and isinstance(self.fio_opts['fdp_pli'], str):
+            self.fio_opts['fdp_pli'] = self.fio_opts['fdp_pli'].format(**mapping)
 
         super().setup(parameters)
         
@@ -148,6 +157,18 @@ class FDPMultiplePLIDTest(FDPTest):
             with open(scheme_path, mode='w') as f:
                 for i in range(mapping['nios_for_scheme']):
                     f.write(f'{mapping["hole_size"] * 2 * i}, {mapping["hole_size"] * 2 * (i+1)}, {i}\n')
+
+        if 'read_iolog' in self.fio_opts:
+            read_iolog_path = os.path.join(self.paths['test_dir'], self.fio_opts['read_iolog'])
+            with open(read_iolog_path, mode='w') as f:
+                f.write('fio version 2 iolog\n')
+                f.write(f'{self.fio_opts["filename"]} add\n')
+                f.write(f'{self.fio_opts["filename"]} open\n')
+
+                for i in range(mapping['nios_for_scheme']):
+                    f.write(f'{self.fio_opts["filename"]} write {mapping["hole_size"] * 2 * i} {mapping["hole_size"]}\n')
+
+                f.write(f'{self.fio_opts["filename"]} close')
  
     def _check_result(self):
         if 'fdp_pli' in self.fio_opts:
@@ -155,9 +176,17 @@ class FDPMultiplePLIDTest(FDPTest):
         elif 'plids' in self.fio_opts:
             plid_list = self.fio_opts['plids'].split(',')
         else:
-            plid_list = list(range(FIO_FDP_NUMBER_PLIDS))
+            plid_list = [str(i) for i in range(FIO_FDP_NUMBER_PLIDS)]
 
-        plid_list = sorted([int(i) for i in plid_list])
+        range_ids = []
+        for plid in plid_list:
+            if '-' in plid:
+                [start, end] = plid.split('-')
+                range_ids.extend(list(range(int(start), int(end)+1)))
+            else:
+                range_ids.append(int(plid))
+
+        plid_list = sorted(range_ids)
         logging.debug("plid_list: %s", str(plid_list))
 
         fdp_status = get_fdp_status(self.fio_opts['filename'])
@@ -187,9 +216,10 @@ class FDPMultiplePLIDTest(FDPTest):
         """
         ruamw = [FIO_FDP_MAX_RUAMW] * FIO_FDP_NUMBER_PLIDS
 
-        remainder = int(self.fio_opts['number_ios'] % len(plid_list))
-        whole = int((self.fio_opts['number_ios'] - remainder) / len(plid_list))
-        logging.debug("PLIDs in the list should receive %d writes; %d PLIDs will receive one extra",
+        number_ios = self.fio_opts['number_ios'] % (len(plid_list)*FIO_FDP_MAX_RUAMW)
+        remainder = int(number_ios % len(plid_list))
+        whole = int((number_ios - remainder) / len(plid_list))
+        logging.debug("PLIDs in the list should show they have received %d writes; %d PLIDs will receive one extra",
                       whole, remainder)
 
         for plid in plid_list:
@@ -200,6 +230,9 @@ class FDPMultiplePLIDTest(FDPTest):
         logging.debug("Expected ruamw values: %s", str(ruamw))
 
         for idx, ruhs in enumerate(fdp_status['ruhss']):
+            if idx >= FIO_FDP_NUMBER_PLIDS:
+                break
+
             if ruhs['ruamw'] != ruamw[idx]:
                 logging.error("RUAMW mismatch with idx %d, pid %d, expected %d, observed %d", idx,
                               ruhs['pid'], ruamw[idx], ruhs['ruamw'])
@@ -695,6 +728,121 @@ TEST_LIST = [
             },
         "test_class": FDPMultiplePLIDTest,
     },
+    ### use 3-4 to specify plids
+    {
+        "test_id": 204,
+        "fio_opts": {
+            "rw": 'randwrite',
+            "bs": 4096,
+            "number_ios": "{max_ruamw}-1",
+            "verify": "crc32c",
+            "fdp": 1,
+            "fdp_pli": "1,3-4",
+            "fdp_pli_select": "random",
+            "output-format": "json",
+            },
+        "test_class": FDPMultiplePLIDTest,
+    },
+    {
+        "test_id": 205,
+        "fio_opts": {
+            "rw": 'randwrite',
+            "bs": 4096,
+            "number_ios": "{max_ruamw}-1",
+            "verify": "crc32c",
+            "dataplacement": "fdp",
+            "plids": "1,3-4",
+            "plid_select": "random",
+            "output-format": "json",
+            },
+        "test_class": FDPMultiplePLIDTest,
+    },
+    ### use 1-3 to specify plids
+    {
+        "test_id": 206,
+        "fio_opts": {
+            "rw": 'randwrite',
+            "bs": 4096,
+            "number_ios": "{max_ruamw}-1",
+            "verify": "crc32c",
+            "fdp": 1,
+            "fdp_pli": "1-3",
+            "fdp_pli_select": "random",
+            "output-format": "json",
+            },
+        "test_class": FDPMultiplePLIDTest,
+    },
+    {
+        "test_id": 207,
+        "fio_opts": {
+            "rw": 'randwrite',
+            "bs": 4096,
+            "number_ios": "{max_ruamw}-1",
+            "verify": "crc32c",
+            "dataplacement": "fdp",
+            "plids": "1-3",
+            "plid_select": "random",
+            "output-format": "json",
+            },
+        "test_class": FDPMultiplePLIDTest,
+    },
+    ### use multiple ranges to specify plids
+    {
+        "test_id": 208,
+        "fio_opts": {
+            "rw": 'randwrite',
+            "bs": 4096,
+            "number_ios": "{max_ruamw}-1",
+            "verify": "crc32c",
+            "fdp": 1,
+            "fdp_pli": "1-2,3-3",
+            "fdp_pli_select": "random",
+            "output-format": "json",
+            },
+        "test_class": FDPMultiplePLIDTest,
+    },
+    {
+        "test_id": 209,
+        "fio_opts": {
+            "rw": 'randwrite',
+            "bs": 4096,
+            "number_ios": "{max_ruamw}-1",
+            "verify": "crc32c",
+            "dataplacement": "fdp",
+            "plids": "1-2,3-3",
+            "plid_select": "random",
+            "output-format": "json",
+            },
+        "test_class": FDPMultiplePLIDTest,
+    },
+    {
+        "test_id": 210,
+        "fio_opts": {
+            "rw": 'randwrite',
+            "bs": 4096,
+            "number_ios": "{max_ruamw}-1",
+            "verify": "crc32c",
+            "fdp": 1,
+            "fdp_pli": "0-{maxplid}",
+            "fdp_pli_select": "random",
+            "output-format": "json",
+            },
+        "test_class": FDPMultiplePLIDTest,
+    },
+    {
+        "test_id": 211,
+        "fio_opts": {
+            "rw": 'randwrite',
+            "bs": 4096,
+            "number_ios": "{max_ruamw}-1",
+            "verify": "crc32c",
+            "dataplacement": "fdp",
+            "fdp_pli": "0-{maxplid}",
+            "plid_select": "random",
+            "output-format": "json",
+            },
+        "test_class": FDPMultiplePLIDTest,
+    },
     # Specify invalid options fdp=1 and dataplacement=none
     {
         "test_id": 300,
@@ -759,6 +907,64 @@ TEST_LIST = [
         "test_class": FDPTest,
         "success": SUCCESS_NONZERO,
     },
+    ## Specify invalid ranges with start > end
+    {
+        "test_id": 304,
+        "fio_opts": {
+            "rw": 'write',
+            "bs": 4096,
+            "io_size": 4096,
+            "verify": "crc32c",
+            "fdp": 1,
+            "plids": "3-1",
+            "output-format": "normal",
+            },
+        "test_class": FDPTest,
+        "success": SUCCESS_NONZERO,
+    },
+    {
+        "test_id": 305,
+        "fio_opts": {
+            "rw": 'write',
+            "bs": 4096,
+            "io_size": 4096,
+            "verify": "crc32c",
+            "fdp": 1,
+            "fdp_pli": "3-1",
+            "output-format": "normal",
+            },
+        "test_class": FDPTest,
+        "success": SUCCESS_NONZERO,
+    },
+    ## Specify too many plids
+    {
+        "test_id": 306,
+        "fio_opts": {
+            "rw": 'write',
+            "bs": 4096,
+            "io_size": 4096,
+            "verify": "crc32c",
+            "fdp": 1,
+            "plids": "0-65535",
+            "output-format": "normal",
+            },
+        "test_class": FDPTest,
+        "success": SUCCESS_NONZERO,
+    },
+    {
+        "test_id": 307,
+        "fio_opts": {
+            "rw": 'write',
+            "bs": 4096,
+            "io_size": 4096,
+            "verify": "crc32c",
+            "fdp": 1,
+            "fdp_pli": "0-65535",
+            "output-format": "normal",
+            },
+        "test_class": FDPTest,
+        "success": SUCCESS_NONZERO,
+    },
     # write to multiple PLIDs using scheme selection of PLIDs
     ## using old and new sets of options
     {
@@ -789,6 +995,22 @@ TEST_LIST = [
             },
         "test_class": FDPMultiplePLIDTest,
     },
+    # check whether dataplacement works while replaying iologs
+    {
+        "test_id": 402,
+        "fio_opts": {
+            "rw": "write:{hole_size}",
+            "bs": "{hole_size}",
+            "number_ios": "{nios_for_scheme}",
+            "verify": "crc32c",
+            "read_iolog": "iolog",
+            "dataplacement": "fdp",
+            "plid_select": "scheme",
+            "dp_scheme": "lba.scheme",
+            "output-format": "json",
+        },
+        "test_class": FDPMultiplePLIDTest,
+    },
 ]
 
 def parse_args():
@@ -839,7 +1061,7 @@ def main():
         test['fio_opts']['filename'] = args.dut
 
     fdp_status = get_fdp_status(args.dut)
-    FIO_FDP_NUMBER_PLIDS = fdp_status['nruhsd']
+    FIO_FDP_NUMBER_PLIDS = min(fdp_status['nruhsd'], 128)
     update_all_ruhs(args.dut)
     FIO_FDP_MAX_RUAMW = check_all_ruhs(args.dut)
     if not FIO_FDP_MAX_RUAMW:
index 225806134e59a6ea3d181500e1f7c1680717363c..d713c1c424f09475047073f27a0ae3633d22679c 100755 (executable)
@@ -47,6 +47,7 @@ import time
 import shutil
 import logging
 import argparse
+import re
 from pathlib import Path
 from statsmodels.sandbox.stats.runs import runstest_1samp
 from fiotestlib import FioExeTest, FioJobFileTest, run_fio_tests
@@ -553,6 +554,51 @@ class FioJobFileTest_t0029(FioJobFileTest):
         if self.json_data['jobs'][1]['read']['io_kbytes'] != 8:
             self.passed = False
 
+class FioJobFileTest_LogFileFormat(FioJobFileTest):
+    """Test log file format"""
+    def setup(self, *args, **kws):
+        super().setup(*args, **kws)
+        self.patterns = {}
+
+    def check_result(self):
+        super().check_result()
+
+        if not self.passed:
+            return
+
+        for logfile in self.patterns.keys():
+            file_path = os.path.join(self.paths['test_dir'], logfile)
+            with open(file_path, "r") as f:
+                line = f.readline()
+                if not re.match(self.patterns[logfile], line):
+                    self.passed = False
+                    self.failure_reason = "wrong log file format: " + logfile
+                    return
+
+class FioJobFileTest_t0033(FioJobFileTest_LogFileFormat):
+    """Test log file format"""
+    def setup(self, *args, **kws):
+        super().setup(*args, **kws)
+        self.patterns = {
+            'log_bw.1.log': '\\d+, \\d+, \\d+, \\d+, 0x[\\da-f]+\\n',
+            'log_clat.2.log': '\\d+, \\d+, \\d+, \\d+, 0, \\d+\\n',
+            'log_iops.3.log': '\\d+, \\d+, \\d+, \\d+, \\d+, 0x[\\da-f]+\\n',
+            'log_iops.4.log': '\\d+, \\d+, \\d+, \\d+, 0, 0, \\d+\\n',
+        }
+
+class FioJobFileTest_t0034(FioJobFileTest_LogFileFormat):
+    """Test log file format"""
+    def setup(self, *args, **kws):
+        super().setup(*args, **kws)
+        self.patterns = {
+            'log_clat.1.log': '\\d+, \\d+, \\d+, \\d+, \\d+, \\d+, \\d+\\n',
+            'log_slat.1.log': '\\d+, \\d+, \\d+, \\d+, \\d+, \\d+, \\d+\\n',
+            'log_lat.1.log': '\\d+, \\d+, \\d+, \\d+, \\d+, \\d+, 0\\n',
+            'log_clat.2.log': '\\d+, \\d+, \\d+, \\d+, 0, 0, \\d+, 0\\n',
+            'log_bw.3.log': '\\d+, \\d+, \\d+, \\d+, \\d+, \\d+, 0\\n',
+            'log_iops.3.log': '\\d+, \\d+, \\d+, \\d+, \\d+, \\d+, 0\\n',
+        }
+
 class FioJobFileTest_iops_rate(FioJobFileTest):
     """Test consists of fio test job t0011
     Confirm that job0 iops == 1000
@@ -878,6 +924,36 @@ TEST_LIST = [
         'pre_success':      SUCCESS_DEFAULT,
         'requirements':     [Requirements.linux, Requirements.libaio],
     },
+    {
+        'test_id':          33,
+        'test_class':       FioJobFileTest_t0033,
+        'job':              't0033.fio',
+        'success':          SUCCESS_DEFAULT,
+        'pre_job':          None,
+        'pre_success':      None,
+        'pre_success':      SUCCESS_DEFAULT,
+        'requirements':     [Requirements.linux, Requirements.libaio],
+    },
+    {
+        'test_id':          34,
+        'test_class':       FioJobFileTest_t0034,
+        'job':              't0034.fio',
+        'success':          SUCCESS_DEFAULT,
+        'pre_job':          None,
+        'pre_success':      None,
+        'pre_success':      SUCCESS_DEFAULT,
+        'requirements':     [Requirements.linux, Requirements.libaio],
+    },
+    {
+        'test_id':          35,
+        'test_class':       FioJobFileTest,
+        'job':              't0035.fio',
+        'success':          SUCCESS_DEFAULT,
+        'pre_job':          None,
+        'pre_success':      None,
+        'pre_success':      SUCCESS_DEFAULT,
+        'requirements':     [],
+    },
     {
         'test_id':          1000,
         'test_class':       FioExeTest,
index c6bf2d1efa8e86899c3a4e4f54b442d801d53533..16ce6923231d7bd3c5c6993cb632bd9566b8136b 100644 (file)
--- a/t/stest.c
+++ b/t/stest.c
@@ -25,10 +25,11 @@ static FLIST_HEAD(list);
 
 static int do_rand_allocs(void)
 {
-       unsigned int size, nr, rounds = 0, ret = 0;
+       unsigned int i, size, nr, rounds = 0, ret = 0;
        unsigned long total;
        struct elem *e;
        bool error;
+       char *c;
 
        while (rounds++ < LOOPS) {
 #ifdef STEST_SEED
@@ -38,12 +39,26 @@ static int do_rand_allocs(void)
                nr = total = 0;
                while (total < MAXSMALLOC) {
                        size = 8 * sizeof(struct elem) + (int) (999.0 * (rand() / (RAND_MAX + 1.0)));
-                       e = smalloc(size);
+                       e = scalloc(1, size);
                        if (!e) {
                                printf("fail at %lu, size %u\n", total, size);
                                ret++;
                                break;
                        }
+
+                       c = (char *)e;
+                       for (i = 0; i < size; i++) {
+                               if (*(c+i) != 0) {
+                                       printf("buffer not cleared at %lu, size %u\n", total, size);
+                                       ret++;
+                                       break;
+                               }
+                       }
+
+                       /* stop the while loop if buffer was not cleared */
+                       if (i < size)
+                               break;
+
                        e->magic1 = MAGIC1;
                        e->magic2 = MAGIC2;
                        e->size = size;
@@ -63,15 +78,25 @@ static int do_rand_allocs(void)
                        sfree(e);
 
                        if (!error) {
-                               e = smalloc(LARGESMALLOC);
+                               e = scalloc(1, LARGESMALLOC);
                                if (!e) {
-                                       error = true;
                                        ret++;
                                        printf("failure allocating %u bytes at %lu allocated during sfree phase\n",
                                                LARGESMALLOC, total);
+                                       break;
                                }
-                               else
-                                       sfree(e);
+
+                               c = (char *)e;
+                               for (i = 0; i < LARGESMALLOC; i++) {
+                                       if (*(c+i) != 0) {
+                                               error = true;
+                                               ret++;
+                                               printf("large buffer not cleared at %lu, size %u\n", total, size);
+                                               break;
+                                       }
+                               }
+
+                               sfree(e);
                        }
                }
        }
index ccd0c064b928e081b392c03a3c11bee334902745..d0e0a4aea2d3222f5eac37c96006e1ab04f5eed4 100644 (file)
@@ -156,6 +156,7 @@ struct thread_options {
        unsigned int experimental_verify;
        unsigned int verify_state;
        unsigned int verify_state_save;
+       unsigned int verify_write_sequence;
        unsigned int use_thread;
        unsigned int unlink;
        unsigned int unlink_each_loop;
@@ -394,12 +395,13 @@ struct thread_options {
        unsigned int fdp;
        unsigned int dp_type;
        unsigned int dp_id_select;
-       unsigned int dp_ids[FIO_MAX_DP_IDS];
+       uint16_t dp_ids[FIO_MAX_DP_IDS];
        unsigned int dp_nr_ids;
        char *dp_scheme_file;
 
        unsigned int log_entries;
        unsigned int log_prio;
+       unsigned int log_issue_time;
 };
 
 #define FIO_TOP_STR_MAX                256
@@ -708,11 +710,12 @@ struct thread_options_pack {
 
        uint32_t log_entries;
        uint32_t log_prio;
+       uint32_t log_issue_time;
 
        uint32_t fdp;
        uint32_t dp_type;
        uint32_t dp_id_select;
-       uint32_t dp_ids[FIO_MAX_DP_IDS];
+       uint16_t dp_ids[FIO_MAX_DP_IDS];
        uint32_t dp_nr_ids;
        uint8_t dp_scheme_file[FIO_TOP_STR_MAX];
 
index 123c39ae71568165f73e5bd8eeb9ba66542a7a8e..757121806e28f03f48b2c3e18461ae1348ddfdec 100644 (file)
@@ -51,13 +51,13 @@ specific_options=https  http_host  http_user  http_pass  http_s3_key  http_s3_ke
 specific_options=ime_psync  ime_psyncv
 
 [ioengine_io_uring]
-specific_options=hipri  cmdprio_percentage  cmdprio_class  cmdprio  cmdprio_bssplit  fixedbufs  registerfiles  sqthread_poll  sqthread_poll_cpu  nonvectored  uncached  nowait  force_async
+specific_options=hipri  cmdprio_percentage  cmdprio_class  cmdprio  cmdprio_bssplit  fixedbufs  registerfiles  sqthread_poll  sqthread_poll_cpu  nonvectored  nowait  force_async atomic
 
 [ioengine_io_uring_cmd]
-specific_options=hipri  cmdprio_percentage  cmdprio_class  cmdprio  cmdprio_bssplit  fixedbufs  registerfiles  sqthread_poll  sqthread_poll_cpu  nonvectored  uncached  nowait  force_async  cmd_type  md_per_io_size  pi_act  pi_chk  apptag  apptag_mask
+specific_options=hipri  cmdprio_percentage  cmdprio_class  cmdprio  cmdprio_bssplit  fixedbufs  registerfiles  sqthread_poll  sqthread_poll_cpu  nonvectored  nowait  force_async  cmd_type  md_per_io_size  pi_act  pi_chk  apptag  apptag_mask
 
 [ioengine_libaio]
-specific_options=userspace_reap  cmdprio_percentage  cmdprio_class  cmdprio  cmdprio_bssplit  nowait
+specific_options=userspace_reap  cmdprio_percentage  cmdprio_class  cmdprio  cmdprio_bssplit  nowait atomic
 
 [ioengine_libblkio]
 specific_options=libblkio_driver  libblkio_path  libblkio_pre_connect_props  libblkio_num_entries  libblkio_queue_size  libblkio_pre_start_props  hipri  libblkio_vectored  libblkio_write_zeroes_on_trim  libblkio_wait_mode  libblkio_force_enable_completion_eventfd
@@ -71,15 +71,6 @@ specific_options=namenode  hostname  port  hdfsdirectory  chunk_size  single_ins
 [ioengine_libiscsi]
 specific_options=initiator
 
-[ioengine_librpma_apm_server]
-specific_options=librpma_apm_client
-
-[ioengine_busy_wait_polling]
-specific_options=serverip  port  direct_write_to_pmem
-
-[ioengine_librpma_gpspm_server]
-specific_options=librpma_gpspm_client
-
 [ioengine_mmap]
 specific_options=thp
 
@@ -108,7 +99,7 @@ specific_options=hostname  bindname  port  verb
 specific_options=hipri  readfua  writefua  sg_write_mode  stream_id
 
 [ioengine_pvsync2]
-specific_options=hipri  hipri_percentage  uncached  nowait  sync  psync  vsync  pvsync
+specific_options=hipri  hipri_percentage  nowait  sync  psync  vsync  pvsync atomic
 
 [ioengine_xnvme]
 specific_options=hipri  sqthread_poll  xnvme_be  xnvme_async  xnvme_sync  xnvme_admin  xnvme_dev_nsid  xnvme_iovec
index b2fede24710fd3fcedfbcda890b3b9def9a957be..f3d228ba7df8472c9fcbde72c776ff618b2a9486 100644 (file)
--- a/verify.c
+++ b/verify.c
@@ -848,12 +848,13 @@ static int verify_header(struct io_u *io_u, struct thread_data *td,
        /*
         * For read-only workloads, the program cannot be certain of the
         * last numberio written to a block. Checking of numberio will be
-        * done only for workloads that write data.  For verify_only,
-        * numberio check is skipped.
+        * done only for workloads that write data.  For verify_only or
+        * any mode de-selecting verify_write_sequence, numberio check is
+        * skipped.
         */
        if (td_write(td) && (td_min_bs(td) == td_max_bs(td)) &&
            !td->o.time_based)
-               if (!td->o.verify_only)
+               if (td->o.verify_write_sequence)
                        if (hdr->numberio != io_u->numberio) {
                                log_err("verify: bad header numberio %"PRIu16
                                        ", wanted %"PRIu16,