os: ubuntu-20.04
cc: clang
- build: macos
- os: macos-10.15
+ os: macos-11
- build: linux-i686-gcc
os: ubuntu-20.04
arch: i686
--- /dev/null
+name: CIFuzz
+on: [pull_request]
+jobs:
+ Fuzzing:
+ runs-on: ubuntu-latest
+ steps:
+ - name: Build Fuzzers
+ id: build
+ uses: google/oss-fuzz/infra/cifuzz/actions/build_fuzzers@master
+ with:
+ oss-fuzz-project-name: 'fio'
+ dry-run: false
+ - name: Run Fuzzers
+ uses: google/oss-fuzz/infra/cifuzz/actions/run_fuzzers@master
+ with:
+ oss-fuzz-project-name: 'fio'
+ fuzz-seconds: 600
+ dry-run: false
+ - name: Upload Crash
+ uses: actions/upload-artifact@v1
+ if: failure() && steps.build.outcome == 'success'
+ with:
+ name: artifacts
+ path: ./out/artifacts
#!/bin/sh
GVF=FIO-VERSION-FILE
-DEF_VER=fio-3.28
+DEF_VER=fio-3.29
LF='
'
Limit on the number of simultaneously opened zones per single
thread/process.
+.. option:: ignore_zone_limits=bool
+ If this option is used, fio will ignore the maximum number of open
+ zones limit of the zoned block device in use, thus allowing the
+ option :option:`max_open_zones` value to be larger than the device
+ reported limit. Default: false.
+
.. option:: zone_reset_threshold=float
A number between zero and one that indicates the ratio of logical
.. option:: fdatasync=int
Like :option:`fsync` but uses :manpage:`fdatasync(2)` to only sync data and
- not metadata blocks. In Windows, FreeBSD, DragonFlyBSD or OSX there is no
+ not metadata blocks. In Windows, DragonFlyBSD or OSX there is no
:manpage:`fdatasync(2)` so this falls back to using :manpage:`fsync(2)`.
Defaults to 0, which means fio does not periodically issue and wait for a
data-only sync to complete.
.. option:: filesize=irange(int)
- Individual file sizes. May be a range, in which case fio will select sizes
- for files at random within the given range and limited to :option:`size` in
- total (if that is given). If not given, each created file is the same size.
- This option overrides :option:`size` in terms of file size, which means
- this value is used as a fixed size or possible range of each file.
+ Individual file sizes. May be a range, in which case fio will select sizes for
+ files at random within the given range. If not given, each created file is the
+ same size. This option overrides :option:`size` in terms of file size, i.e. if
+ :option:`filesize` is specified then :option:`size` becomes merely the default
+ for :option:`io_size` and has no effect at all if :option:`io_size` is set
+ explicitly.
.. option:: file_append=bool
depending on the block size of the IO. This option is useful only
when used together with the :option:`bssplit` option, that is,
multiple different block sizes are used for reads and writes.
- The format for this option is the same as the format of the
- :option:`bssplit` option, with the exception that values for
- trim IOs are ignored. This option is mutually exclusive with the
- :option:`cmdprio_percentage` option.
+
+ The first accepted format for this option is the same as the format of
+ the :option:`bssplit` option:
+
+ cmdprio_bssplit=blocksize/percentage:blocksize/percentage
+
+ In this case, each entry will use the priority class and priority
+ level defined by the options :option:`cmdprio_class` and
+ :option:`cmdprio` respectively.
+
+ The second accepted format for this option is:
+
+ cmdprio_bssplit=blocksize/percentage/class/level:blocksize/percentage/class/level
+
+ In this case, the priority class and priority level is defined inside
+ each entry. In comparison with the first accepted format, the second
+ accepted format does not restrict all entries to have the same priority
+ class and priority level.
+
+ For both formats, only the read and write data directions are supported,
+ values for trim IOs are ignored. This option is mutually exclusive with
+ the :option:`cmdprio_percentage` option.
.. option:: fixedbufs : [io_uring]
**write**
This is the default where write opcodes are issued as usual.
- **verify**
+ **write_and_verify**
Issue WRITE AND VERIFY commands. The BYTCHK bit is set to 0. This
directs the device to carry out a medium verification with no data
comparison. The writefua option is ignored with this selection.
- **same**
+ **verify**
+ This option is deprecated. Use write_and_verify instead.
+ **write_same**
Issue WRITE SAME commands. This transfers a single block to the device
and writes this same block of data to a contiguous sequence of LBAs
beginning at the specified offset. fio's block size parameter specifies
for each command but only the first 512 bytes will be used and
transferred to the device. The writefua option is ignored with this
selection.
+ **same**
+ This option is deprecated. Use write_same instead.
+ **write_same_ndob**
+ Issue WRITE SAME(16) commands as above but with the No Data Output
+ Buffer (NDOB) bit set. No data will be transferred to the device with
+ this bit set. Data written will be a pre-determined pattern such as
+ all zeroes.
+ **write_stream**
+ Issue WRITE STREAM(16) commands. Use the **stream_id** option to specify
+ the stream identifier.
+ **verify_bytchk_00**
+ Issue VERIFY commands with BYTCHK set to 00. This directs the
+ device to carry out a medium verification with no data comparison.
+ **verify_bytchk_01**
+ Issue VERIFY commands with BYTCHK set to 01. This directs the device to
+ compare the data on the device with the data transferred to the device.
+ **verify_bytchk_11**
+ Issue VERIFY commands with BYTCHK set to 11. This transfers a
+ single block to the device and compares the contents of this block with the
+ data on the device beginning at the specified offset. fio's block size
+ parameter specifies the total amount of data compared with this command.
+ However, only one block (sector) worth of data is transferred to the device.
+ This is similar to the WRITE SAME command except that data is compared instead
+ of written.
+
+.. option:: stream_id=int : [sg]
+
+ Set the stream identifier for WRITE STREAM commands. If this is set to 0 (which is not
+ a valid stream identifier) fio will open a stream and then close it when done. Default
+ is 0.
.. option:: hipri : [sg]
ifdef CONFIG_LIBAIO
libaio_SRCS = engines/libaio.c
cmdprio_SRCS = engines/cmdprio.c
+ LIBS += -laio
libaio_LIBS = -laio
ENGINES += libaio
endif
$(1)_OBJS := $$($(1)_SRCS:.c=.o)
$$($(1)_OBJS): CFLAGS := -fPIC $$($(1)_CFLAGS) $(CFLAGS)
engines/fio-$(1).so: $$($(1)_OBJS)
- $$(QUIET_LINK)$(CC) -shared -rdynamic -fPIC -Wl,-soname,fio-$(1).so.1 -o $$@ $$< $$($(1)_LIBS)
+ $$(QUIET_LINK)$(CC) $(DYNAMIC) -shared -rdynamic -fPIC -Wl,-soname,fio-$(1).so.1 -o $$@ $$< $$($(1)_LIBS)
ENGS_OBJS += engines/fio-$(1).so
endef
else # !CONFIG_DYNAMIC_ENGINES
T_TEST_PROGS += $(T_LFSR_TEST_PROGS)
T_TEST_PROGS += $(T_GEN_RAND_PROGS)
T_PROGS += $(T_BTRACE_FIO_PROGS)
+ifdef CONFIG_ZLIB
T_PROGS += $(T_DEDUPE_PROGS)
+endif
T_PROGS += $(T_VS_PROGS)
T_TEST_PROGS += $(T_MEMLOCK_PROGS)
ifdef CONFIG_PREAD
$(QUIET_LINK)$(CC) $(LDFLAGS) -o $@ $(T_BTRACE_FIO_OBJS) $(LIBS)
endif
+ifdef CONFIG_ZLIB
t/fio-dedupe: $(T_DEDUPE_OBJS)
$(QUIET_LINK)$(CC) $(LDFLAGS) -o $@ $(T_DEDUPE_OBJS) $(LIBS)
+endif
t/fio-verify-state: $(T_VS_OBJS)
$(QUIET_LINK)$(CC) $(LDFLAGS) -o $@ $(T_VS_OBJS) $(LIBS)
+++ /dev/null
-Overview and history
---------------------
-
-Fio was originally written to save me the hassle of writing special test case
-programs when I wanted to test a specific workload, either for performance
-reasons or to find/reproduce a bug. The process of writing such a test app can
-be tiresome, especially if you have to do it often. Hence I needed a tool that
-would be able to simulate a given I/O workload without resorting to writing a
-tailored test case again and again.
-
-A test work load is difficult to define, though. There can be any number of
-processes or threads involved, and they can each be using their own way of
-generating I/O. You could have someone dirtying large amounts of memory in a
-memory mapped file, or maybe several threads issuing reads using asynchronous
-I/O. fio needed to be flexible enough to simulate both of these cases, and many
-more.
-
-Fio spawns a number of threads or processes doing a particular type of I/O
-action as specified by the user. fio takes a number of global parameters, each
-inherited by the thread unless otherwise parameters given to them overriding
-that setting is given. The typical use of fio is to write a job file matching
-the I/O load one wants to simulate.
-
-
-Source
-------
-
-Fio resides in a git repo, the canonical place is:
-
- git://git.kernel.dk/fio.git
-
-When inside a corporate firewall, git:// URL sometimes does not work.
-If git:// does not work, use the http protocol instead:
-
- http://git.kernel.dk/fio.git
-
-Snapshots are frequently generated and :file:`fio-git-*.tar.gz` include the git
-meta data as well. Other tarballs are archives of official fio releases.
-Snapshots can download from:
-
- http://brick.kernel.dk/snaps/
-
-There are also two official mirrors. Both of these are automatically synced with
-the main repository, when changes are pushed. If the main repo is down for some
-reason, either one of these is safe to use as a backup:
-
- git://git.kernel.org/pub/scm/linux/kernel/git/axboe/fio.git
-
- https://git.kernel.org/pub/scm/linux/kernel/git/axboe/fio.git
-
-or
-
- git://github.com/axboe/fio.git
-
- https://github.com/axboe/fio.git
-
-
-Mailing list
-------------
-
-The fio project mailing list is meant for anything related to fio including
-general discussion, bug reporting, questions, and development. For bug reporting,
-see REPORTING-BUGS.
-
-An automated mail detailing recent commits is automatically sent to the list at
-most daily. The list address is fio@vger.kernel.org, subscribe by sending an
-email to majordomo@vger.kernel.org with
-
- subscribe fio
-
-in the body of the email. Archives can be found here:
-
- http://www.spinics.net/lists/fio/
-
-or here:
-
- https://lore.kernel.org/fio/
-
-and archives for the old list can be found here:
-
- http://maillist.kernel.dk/fio-devel/
-
-
-Author
-------
-
-Fio was written by Jens Axboe <axboe@kernel.dk> to enable flexible testing of
-the Linux I/O subsystem and schedulers. He got tired of writing specific test
-applications to simulate a given workload, and found that the existing I/O
-benchmark/test tools out there weren't flexible enough to do what he wanted.
-
-Jens Axboe <axboe@kernel.dk> 20060905
-
-
-Binary packages
----------------
-
-Debian:
- Starting with Debian "Squeeze", fio packages are part of the official
- Debian repository. http://packages.debian.org/search?keywords=fio .
-
-Ubuntu:
- Starting with Ubuntu 10.04 LTS (aka "Lucid Lynx"), fio packages are part
- of the Ubuntu "universe" repository.
- http://packages.ubuntu.com/search?keywords=fio .
-
-Red Hat, Fedora, CentOS & Co:
- Starting with Fedora 9/Extra Packages for Enterprise Linux 4, fio
- packages are part of the Fedora/EPEL repositories.
- https://apps.fedoraproject.org/packages/fio .
-
-Mandriva:
- Mandriva has integrated fio into their package repository, so installing
- on that distro should be as easy as typing ``urpmi fio``.
-
-Arch Linux:
- An Arch Linux package is provided under the Community sub-repository:
- https://www.archlinux.org/packages/?sort=&q=fio
-
-Solaris:
- Packages for Solaris are available from OpenCSW. Install their pkgutil
- tool (http://www.opencsw.org/get-it/pkgutil/) and then install fio via
- ``pkgutil -i fio``.
-
-Windows:
- Rebecca Cran <rebecca@bsdio.com> has fio packages for Windows at
- https://bsdio.com/fio/ . The latest builds for Windows can also
- be grabbed from https://ci.appveyor.com/project/axboe/fio by clicking
- the latest x86 or x64 build, then selecting the ARTIFACTS tab.
-
-BSDs:
- Packages for BSDs may be available from their binary package repositories.
- Look for a package "fio" using their binary package managers.
-
-
-Building
---------
-
-Just type::
-
- $ ./configure
- $ make
- $ make install
-
-Note that GNU make is required. On BSDs it's available from devel/gmake within
-ports directory; on Solaris it's in the SUNWgmake package. On platforms where
-GNU make isn't the default, type ``gmake`` instead of ``make``.
-
-Configure will print the enabled options. Note that on Linux based platforms,
-the libaio development packages must be installed to use the libaio
-engine. Depending on distro, it is usually called libaio-devel or libaio-dev.
-
-For gfio, gtk 2.18 (or newer), associated glib threads, and cairo are required
-to be installed. gfio isn't built automatically and can be enabled with a
-``--enable-gfio`` option to configure.
-
-To build fio with a cross-compiler::
-
- $ make clean
- $ make CROSS_COMPILE=/path/to/toolchain/prefix
-
-Configure will attempt to determine the target platform automatically.
-
-It's possible to build fio for ESX as well, use the ``--esx`` switch to
-configure.
-
-
-Windows
-~~~~~~~
-
-The minimum versions of Windows for building/runing fio are Windows 7/Windows
-Server 2008 R2. On Windows, Cygwin (https://www.cygwin.com/) is required in
-order to build fio. To create an MSI installer package install WiX from
-https://wixtoolset.org and run :file:`dobuild.cmd` from the :file:`os/windows`
-directory.
-
-How to compile fio on 64-bit Windows:
-
- 1. Install Cygwin (http://www.cygwin.com/). Install **make** and all
- packages starting with **mingw64-x86_64**. Ensure
- **mingw64-x86_64-zlib** are installed if you wish
- to enable fio's log compression functionality.
- 2. Open the Cygwin Terminal.
- 3. Go to the fio directory (source files).
- 4. Run ``make clean && make -j``.
-
-To build fio for 32-bit Windows, ensure the -i686 versions of the previously
-mentioned -x86_64 packages are installed and run ``./configure
---build-32bit-win`` before ``make``.
-
-It's recommended that once built or installed, fio be run in a Command Prompt or
-other 'native' console such as console2, since there are known to be display and
-signal issues when running it under a Cygwin shell (see
-https://github.com/mintty/mintty/issues/56 and
-https://github.com/mintty/mintty/wiki/Tips#inputoutput-interaction-with-alien-programs
-for details).
-
-
-Documentation
-~~~~~~~~~~~~~
-
-Fio uses Sphinx_ to generate documentation from the reStructuredText_ files.
-To build HTML formatted documentation run ``make -C doc html`` and direct your
-browser to :file:`./doc/output/html/index.html`. To build manual page run
-``make -C doc man`` and then ``man doc/output/man/fio.1``. To see what other
-output formats are supported run ``make -C doc help``.
-
-.. _reStructuredText: http://www.sphinx-doc.org/rest.html
-.. _Sphinx: http://www.sphinx-doc.org
-
-
-Platforms
----------
-
-Fio works on (at least) Linux, Solaris, AIX, HP-UX, OSX, NetBSD, OpenBSD,
-Windows, FreeBSD, and DragonFly. Some features and/or options may only be
-available on some of the platforms, typically because those features only apply
-to that platform (like the solarisaio engine, or the splice engine on Linux).
-
-Some features are not available on FreeBSD/Solaris even if they could be
-implemented, I'd be happy to take patches for that. An example of that is disk
-utility statistics and (I think) huge page support, support for that does exist
-in FreeBSD/Solaris.
-
-Fio uses pthread mutexes for signalling and locking and some platforms do not
-support process shared pthread mutexes. As a result, on such platforms only
-threads are supported. This could be fixed with sysv ipc locking or other
-locking alternatives.
-
-Other \*BSD platforms are untested, but fio should work there almost out of the
-box. Since I don't do test runs or even compiles on those platforms, your
-mileage may vary. Sending me patches for other platforms is greatly
-appreciated. There's a lot of value in having the same test/benchmark tool
-available on all platforms.
-
-Note that POSIX aio is not enabled by default on AIX. Messages like these::
-
- Symbol resolution failed for /usr/lib/libc.a(posix_aio.o) because:
- Symbol _posix_kaio_rdwr (number 2) is not exported from dependent module /unix.
-
-indicate one needs to enable POSIX aio. Run the following commands as root::
-
- # lsdev -C -l posix_aio0
- posix_aio0 Defined Posix Asynchronous I/O
- # cfgmgr -l posix_aio0
- # lsdev -C -l posix_aio0
- posix_aio0 Available Posix Asynchronous I/O
-
-POSIX aio should work now. To make the change permanent::
-
- # chdev -l posix_aio0 -P -a autoconfig='available'
- posix_aio0 changed
-
-
-Running fio
------------
-
-Running fio is normally the easiest part - you just give it the job file
-(or job files) as parameters::
-
- $ fio [options] [jobfile] ...
-
-and it will start doing what the *jobfile* tells it to do. You can give more
-than one job file on the command line, fio will serialize the running of those
-files. Internally that is the same as using the :option:`stonewall` parameter
-described in the parameter section.
-
-If the job file contains only one job, you may as well just give the parameters
-on the command line. The command line parameters are identical to the job
-parameters, with a few extra that control global parameters. For example, for
-the job file parameter :option:`iodepth=2 <iodepth>`, the mirror command line
-option would be :option:`--iodepth 2 <iodepth>` or :option:`--iodepth=2
-<iodepth>`. You can also use the command line for giving more than one job
-entry. For each :option:`--name <name>` option that fio sees, it will start a
-new job with that name. Command line entries following a
-:option:`--name <name>` entry will apply to that job, until there are no more
-entries or a new :option:`--name <name>` entry is seen. This is similar to the
-job file options, where each option applies to the current job until a new []
-job entry is seen.
-
-fio does not need to run as root, except if the files or devices specified in
-the job section requires that. Some other options may also be restricted, such
-as memory locking, I/O scheduler switching, and decreasing the nice value.
-
-If *jobfile* is specified as ``-``, the job file will be read from standard
-input.
--- /dev/null
+Overview and history
+--------------------
+
+Fio was originally written to save me the hassle of writing special test case
+programs when I wanted to test a specific workload, either for performance
+reasons or to find/reproduce a bug. The process of writing such a test app can
+be tiresome, especially if you have to do it often. Hence I needed a tool that
+would be able to simulate a given I/O workload without resorting to writing a
+tailored test case again and again.
+
+A test work load is difficult to define, though. There can be any number of
+processes or threads involved, and they can each be using their own way of
+generating I/O. You could have someone dirtying large amounts of memory in a
+memory mapped file, or maybe several threads issuing reads using asynchronous
+I/O. fio needed to be flexible enough to simulate both of these cases, and many
+more.
+
+Fio spawns a number of threads or processes doing a particular type of I/O
+action as specified by the user. fio takes a number of global parameters, each
+inherited by the thread unless otherwise parameters given to them overriding
+that setting is given. The typical use of fio is to write a job file matching
+the I/O load one wants to simulate.
+
+
+Source
+------
+
+Fio resides in a git repo, the canonical place is:
+
+ git://git.kernel.dk/fio.git
+
+When inside a corporate firewall, git:// URL sometimes does not work.
+If git:// does not work, use the http protocol instead:
+
+ http://git.kernel.dk/fio.git
+
+Snapshots are frequently generated and :file:`fio-git-*.tar.gz` include the git
+meta data as well. Other tarballs are archives of official fio releases.
+Snapshots can download from:
+
+ http://brick.kernel.dk/snaps/
+
+There are also two official mirrors. Both of these are automatically synced with
+the main repository, when changes are pushed. If the main repo is down for some
+reason, either one of these is safe to use as a backup:
+
+ git://git.kernel.org/pub/scm/linux/kernel/git/axboe/fio.git
+
+ https://git.kernel.org/pub/scm/linux/kernel/git/axboe/fio.git
+
+or
+
+ git://github.com/axboe/fio.git
+
+ https://github.com/axboe/fio.git
+
+
+Mailing list
+------------
+
+The fio project mailing list is meant for anything related to fio including
+general discussion, bug reporting, questions, and development. For bug reporting,
+see REPORTING-BUGS.
+
+An automated mail detailing recent commits is automatically sent to the list at
+most daily. The list address is fio@vger.kernel.org, subscribe by sending an
+email to majordomo@vger.kernel.org with
+
+ subscribe fio
+
+in the body of the email. Archives can be found here:
+
+ http://www.spinics.net/lists/fio/
+
+or here:
+
+ https://lore.kernel.org/fio/
+
+and archives for the old list can be found here:
+
+ http://maillist.kernel.dk/fio-devel/
+
+
+Author
+------
+
+Fio was written by Jens Axboe <axboe@kernel.dk> to enable flexible testing of
+the Linux I/O subsystem and schedulers. He got tired of writing specific test
+applications to simulate a given workload, and found that the existing I/O
+benchmark/test tools out there weren't flexible enough to do what he wanted.
+
+Jens Axboe <axboe@kernel.dk> 20060905
+
+
+Binary packages
+---------------
+
+Debian:
+ Starting with Debian "Squeeze", fio packages are part of the official
+ Debian repository. http://packages.debian.org/search?keywords=fio .
+
+Ubuntu:
+ Starting with Ubuntu 10.04 LTS (aka "Lucid Lynx"), fio packages are part
+ of the Ubuntu "universe" repository.
+ http://packages.ubuntu.com/search?keywords=fio .
+
+Red Hat, Fedora, CentOS & Co:
+ Starting with Fedora 9/Extra Packages for Enterprise Linux 4, fio
+ packages are part of the Fedora/EPEL repositories.
+ https://apps.fedoraproject.org/packages/fio .
+
+Mandriva:
+ Mandriva has integrated fio into their package repository, so installing
+ on that distro should be as easy as typing ``urpmi fio``.
+
+Arch Linux:
+ An Arch Linux package is provided under the Community sub-repository:
+ https://www.archlinux.org/packages/?sort=&q=fio
+
+Solaris:
+ Packages for Solaris are available from OpenCSW. Install their pkgutil
+ tool (http://www.opencsw.org/get-it/pkgutil/) and then install fio via
+ ``pkgutil -i fio``.
+
+Windows:
+ Rebecca Cran <rebecca@bsdio.com> has fio packages for Windows at
+ https://bsdio.com/fio/ . The latest builds for Windows can also
+ be grabbed from https://ci.appveyor.com/project/axboe/fio by clicking
+ the latest x86 or x64 build, then selecting the ARTIFACTS tab.
+
+BSDs:
+ Packages for BSDs may be available from their binary package repositories.
+ Look for a package "fio" using their binary package managers.
+
+
+Building
+--------
+
+Just type::
+
+ $ ./configure
+ $ make
+ $ make install
+
+Note that GNU make is required. On BSDs it's available from devel/gmake within
+ports directory; on Solaris it's in the SUNWgmake package. On platforms where
+GNU make isn't the default, type ``gmake`` instead of ``make``.
+
+Configure will print the enabled options. Note that on Linux based platforms,
+the libaio development packages must be installed to use the libaio
+engine. Depending on distro, it is usually called libaio-devel or libaio-dev.
+
+For gfio, gtk 2.18 (or newer), associated glib threads, and cairo are required
+to be installed. gfio isn't built automatically and can be enabled with a
+``--enable-gfio`` option to configure.
+
+To build fio with a cross-compiler::
+
+ $ make clean
+ $ make CROSS_COMPILE=/path/to/toolchain/prefix
+
+Configure will attempt to determine the target platform automatically.
+
+It's possible to build fio for ESX as well, use the ``--esx`` switch to
+configure.
+
+
+Windows
+~~~~~~~
+
+The minimum versions of Windows for building/runing fio are Windows 7/Windows
+Server 2008 R2. On Windows, Cygwin (https://www.cygwin.com/) is required in
+order to build fio. To create an MSI installer package install WiX from
+https://wixtoolset.org and run :file:`dobuild.cmd` from the :file:`os/windows`
+directory.
+
+How to compile fio on 64-bit Windows:
+
+ 1. Install Cygwin (http://www.cygwin.com/). Install **make** and all
+ packages starting with **mingw64-x86_64**. Ensure
+ **mingw64-x86_64-zlib** are installed if you wish
+ to enable fio's log compression functionality.
+ 2. Open the Cygwin Terminal.
+ 3. Go to the fio directory (source files).
+ 4. Run ``make clean && make -j``.
+
+To build fio for 32-bit Windows, ensure the -i686 versions of the previously
+mentioned -x86_64 packages are installed and run ``./configure
+--build-32bit-win`` before ``make``.
+
+It's recommended that once built or installed, fio be run in a Command Prompt or
+other 'native' console such as console2, since there are known to be display and
+signal issues when running it under a Cygwin shell (see
+https://github.com/mintty/mintty/issues/56 and
+https://github.com/mintty/mintty/wiki/Tips#inputoutput-interaction-with-alien-programs
+for details).
+
+
+Documentation
+~~~~~~~~~~~~~
+
+Fio uses Sphinx_ to generate documentation from the reStructuredText_ files.
+To build HTML formatted documentation run ``make -C doc html`` and direct your
+browser to :file:`./doc/output/html/index.html`. To build manual page run
+``make -C doc man`` and then ``man doc/output/man/fio.1``. To see what other
+output formats are supported run ``make -C doc help``.
+
+.. _reStructuredText: http://www.sphinx-doc.org/rest.html
+.. _Sphinx: http://www.sphinx-doc.org
+
+
+Platforms
+---------
+
+Fio works on (at least) Linux, Solaris, AIX, HP-UX, OSX, NetBSD, OpenBSD,
+Windows, FreeBSD, and DragonFly. Some features and/or options may only be
+available on some of the platforms, typically because those features only apply
+to that platform (like the solarisaio engine, or the splice engine on Linux).
+
+Some features are not available on FreeBSD/Solaris even if they could be
+implemented, I'd be happy to take patches for that. An example of that is disk
+utility statistics and (I think) huge page support, support for that does exist
+in FreeBSD/Solaris.
+
+Fio uses pthread mutexes for signalling and locking and some platforms do not
+support process shared pthread mutexes. As a result, on such platforms only
+threads are supported. This could be fixed with sysv ipc locking or other
+locking alternatives.
+
+Other \*BSD platforms are untested, but fio should work there almost out of the
+box. Since I don't do test runs or even compiles on those platforms, your
+mileage may vary. Sending me patches for other platforms is greatly
+appreciated. There's a lot of value in having the same test/benchmark tool
+available on all platforms.
+
+Note that POSIX aio is not enabled by default on AIX. Messages like these::
+
+ Symbol resolution failed for /usr/lib/libc.a(posix_aio.o) because:
+ Symbol _posix_kaio_rdwr (number 2) is not exported from dependent module /unix.
+
+indicate one needs to enable POSIX aio. Run the following commands as root::
+
+ # lsdev -C -l posix_aio0
+ posix_aio0 Defined Posix Asynchronous I/O
+ # cfgmgr -l posix_aio0
+ # lsdev -C -l posix_aio0
+ posix_aio0 Available Posix Asynchronous I/O
+
+POSIX aio should work now. To make the change permanent::
+
+ # chdev -l posix_aio0 -P -a autoconfig='available'
+ posix_aio0 changed
+
+
+Running fio
+-----------
+
+Running fio is normally the easiest part - you just give it the job file
+(or job files) as parameters::
+
+ $ fio [options] [jobfile] ...
+
+and it will start doing what the *jobfile* tells it to do. You can give more
+than one job file on the command line, fio will serialize the running of those
+files. Internally that is the same as using the :option:`stonewall` parameter
+described in the parameter section.
+
+If the job file contains only one job, you may as well just give the parameters
+on the command line. The command line parameters are identical to the job
+parameters, with a few extra that control global parameters. For example, for
+the job file parameter :option:`iodepth=2 <iodepth>`, the mirror command line
+option would be :option:`--iodepth 2 <iodepth>` or :option:`--iodepth=2
+<iodepth>`. You can also use the command line for giving more than one job
+entry. For each :option:`--name <name>` option that fio sees, it will start a
+new job with that name. Command line entries following a
+:option:`--name <name>` entry will apply to that job, until there are no more
+entries or a new :option:`--name <name>` entry is seen. This is similar to the
+job file options, where each option applies to the current job until a new []
+job entry is seen.
+
+fio does not need to run as root, except if the files or devices specified in
+the job section requires that. Some other options may also be restricted, such
+as memory locking, I/O scheduler switching, and decreasing the nice value.
+
+If *jobfile* is specified as ``-``, the job file will be read from standard
+input.
if (!init_iolog(td))
goto err;
+ /* ioprio_set() has to be done before td_io_init() */
+ if (fio_option_is_set(o, ioprio) ||
+ fio_option_is_set(o, ioprio_class)) {
+ ret = ioprio_set(IOPRIO_WHO_PROCESS, 0, o->ioprio_class, o->ioprio);
+ if (ret == -1) {
+ td_verror(td, errno, "ioprio_set");
+ goto err;
+ }
+ td->ioprio = ioprio_value(o->ioprio_class, o->ioprio);
+ td->ts.ioprio = td->ioprio;
+ }
+
if (td_io_init(td))
goto err;
if (o->verify_async && verify_async_init(td))
goto err;
- if (fio_option_is_set(o, ioprio) ||
- fio_option_is_set(o, ioprio_class)) {
- ret = ioprio_set(IOPRIO_WHO_PROCESS, 0, o->ioprio_class, o->ioprio);
- if (ret == -1) {
- td_verror(td, errno, "ioprio_set");
- goto err;
- }
- td->ioprio = ioprio_value(o->ioprio_class, o->ioprio);
- }
-
if (o->cgroup && cgroup_setup(td, cgroup_list, &cgroup_mnt))
goto err;
}
for_each_td(td, i) {
+ struct thread_stat *ts = &td->ts;
+
+ free_clat_prio_stats(ts);
steadystate_free(td);
fio_options_free(td);
fio_dump_options_free(td);
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
+#include <errno.h>
#include "flist.h"
#include "fio.h"
+#include "iolog.h"
#include "blktrace.h"
#include "blktrace_api.h"
#include "oslib/linux-dev-lookup.h"
-#define TRACE_FIFO_SIZE 8192
-
-/*
- * fifo refill frontend, to avoid reading data in trace sized bites
- */
-static int refill_fifo(struct thread_data *td, struct fifo *fifo, int fd)
-{
- char buf[TRACE_FIFO_SIZE];
- unsigned int total;
- int ret;
-
- total = sizeof(buf);
- if (total > fifo_room(fifo))
- total = fifo_room(fifo);
-
- ret = read(fd, buf, total);
- if (ret < 0) {
- int read_err = errno;
-
- assert(read_err > 0);
- td_verror(td, read_err, "read blktrace file");
- return -read_err;
- }
-
- if (ret > 0)
- ret = fifo_put(fifo, buf, ret);
-
- dprint(FD_BLKTRACE, "refill: filled %d bytes\n", ret);
- return ret;
-}
-
-/*
- * Retrieve 'len' bytes from the fifo, refilling if necessary.
- */
-static int trace_fifo_get(struct thread_data *td, struct fifo *fifo, int fd,
- void *buf, unsigned int len)
-{
- if (fifo_len(fifo) < len) {
- int ret = refill_fifo(td, fifo, fd);
-
- if (ret < 0)
- return ret;
- }
-
- return fifo_get(fifo, buf, len);
-}
+struct file_cache {
+ unsigned int maj;
+ unsigned int min;
+ unsigned int fileno;
+};
/*
* Just discard the pdu by seeking past it.
*/
-static int discard_pdu(struct thread_data *td, struct fifo *fifo, int fd,
- struct blk_io_trace *t)
+static int discard_pdu(FILE* f, struct blk_io_trace *t)
{
if (t->pdu_len == 0)
return 0;
dprint(FD_BLKTRACE, "discard pdu len %u\n", t->pdu_len);
- return trace_fifo_get(td, fifo, fd, NULL, t->pdu_len);
+ if (fseek(f, t->pdu_len, SEEK_CUR) < 0)
+ return -errno;
+
+ return t->pdu_len;
}
/*
flist_add_tail(&ipo->list, &td->io_log_list);
}
-static int trace_add_file(struct thread_data *td, __u32 device)
+static int trace_add_file(struct thread_data *td, __u32 device,
+ struct file_cache *cache)
{
- static unsigned int last_maj, last_min, last_fileno;
unsigned int maj = FMAJOR(device);
unsigned int min = FMINOR(device);
struct fio_file *f;
char dev[256];
unsigned int i;
- if (last_maj == maj && last_min == min)
- return last_fileno;
+ if (cache->maj == maj && cache->min == min)
+ return cache->fileno;
- last_maj = maj;
- last_min = min;
+ cache->maj = maj;
+ cache->min = min;
/*
* check for this file in our list
*/
for_each_file(td, f, i)
if (f->major == maj && f->minor == min) {
- last_fileno = f->fileno;
- return last_fileno;
+ cache->fileno = f->fileno;
+ return cache->fileno;
}
strcpy(dev, "/dev");
td->files[fileno]->major = maj;
td->files[fileno]->minor = min;
trace_add_open_close_event(td, fileno, FIO_LOG_OPEN_FILE);
- last_fileno = fileno;
+ cache->fileno = fileno;
}
- return last_fileno;
+ return cache->fileno;
}
static void t_bytes_align(struct thread_options *o, struct blk_io_trace *t)
queue_io_piece(td, ipo);
}
-static void handle_trace_notify(struct blk_io_trace *t)
+static bool handle_trace_notify(struct blk_io_trace *t)
{
switch (t->action) {
case BLK_TN_PROCESS:
dprint(FD_BLKTRACE, "unknown trace act %x\n", t->action);
break;
}
+ return false;
}
-static void handle_trace_discard(struct thread_data *td,
+static bool handle_trace_discard(struct thread_data *td,
struct blk_io_trace *t,
unsigned long long ttime,
- unsigned long *ios, unsigned int *bs)
+ unsigned long *ios, unsigned long long *bs,
+ struct file_cache *cache)
{
struct io_piece *ipo;
int fileno;
if (td->o.replay_skip & (1u << DDIR_TRIM))
- return;
+ return false;
ipo = calloc(1, sizeof(*ipo));
init_ipo(ipo);
- fileno = trace_add_file(td, t->device);
+ fileno = trace_add_file(td, t->device, cache);
ios[DDIR_TRIM]++;
if (t->bytes > bs[DDIR_TRIM])
ipo->offset, ipo->len,
ipo->delay);
queue_io_piece(td, ipo);
+ return true;
}
static void dump_trace(struct blk_io_trace *t)
log_err("blktrace: ignoring zero byte trace: action=%x\n", t->action);
}
-static void handle_trace_fs(struct thread_data *td, struct blk_io_trace *t,
+static bool handle_trace_fs(struct thread_data *td, struct blk_io_trace *t,
unsigned long long ttime, unsigned long *ios,
- unsigned int *bs)
+ unsigned long long *bs, struct file_cache *cache)
{
int rw;
int fileno;
- fileno = trace_add_file(td, t->device);
+ fileno = trace_add_file(td, t->device, cache);
rw = (t->action & BLK_TC_ACT(BLK_TC_WRITE)) != 0;
if (rw) {
if (td->o.replay_skip & (1u << DDIR_WRITE))
- return;
+ return false;
} else {
if (td->o.replay_skip & (1u << DDIR_READ))
- return;
+ return false;
}
if (!t->bytes) {
if (!fio_did_warn(FIO_WARN_BTRACE_ZERO))
dump_trace(t);
- return;
+ return false;
}
if (t->bytes > bs[rw])
ios[rw]++;
td->o.size += t->bytes;
store_ipo(td, t->sector, t->bytes, rw, ttime, fileno);
+ return true;
}
-static void handle_trace_flush(struct thread_data *td, struct blk_io_trace *t,
- unsigned long long ttime, unsigned long *ios)
+static bool handle_trace_flush(struct thread_data *td, struct blk_io_trace *t,
+ unsigned long long ttime, unsigned long *ios,
+ struct file_cache *cache)
{
struct io_piece *ipo;
int fileno;
if (td->o.replay_skip & (1u << DDIR_SYNC))
- return;
+ return false;
ipo = calloc(1, sizeof(*ipo));
init_ipo(ipo);
- fileno = trace_add_file(td, t->device);
+ fileno = trace_add_file(td, t->device, cache);
ipo->delay = ttime / 1000;
ipo->ddir = DDIR_SYNC;
ios[DDIR_SYNC]++;
dprint(FD_BLKTRACE, "store flush delay=%lu\n", ipo->delay);
queue_io_piece(td, ipo);
+ return true;
}
/*
* We only care for queue traces, most of the others are side effects
* due to internal workings of the block layer.
*/
-static void handle_trace(struct thread_data *td, struct blk_io_trace *t,
- unsigned long *ios, unsigned int *bs)
+static bool queue_trace(struct thread_data *td, struct blk_io_trace *t,
+ unsigned long *ios, unsigned long long *bs,
+ struct file_cache *cache)
{
- static unsigned long long last_ttime;
+ unsigned long long *last_ttime = &td->io_log_blktrace_last_ttime;
unsigned long long delay = 0;
if ((t->action & 0xffff) != __BLK_TA_QUEUE)
- return;
+ return false;
if (!(t->action & BLK_TC_ACT(BLK_TC_NOTIFY))) {
- if (!last_ttime || td->o.no_stall)
+ if (!*last_ttime || td->o.no_stall || t->time < *last_ttime)
delay = 0;
else if (td->o.replay_time_scale == 100)
- delay = t->time - last_ttime;
+ delay = t->time - *last_ttime;
else {
- double tmp = t->time - last_ttime;
+ double tmp = t->time - *last_ttime;
double scale;
scale = (double) 100.0 / (double) td->o.replay_time_scale;
tmp *= scale;
delay = tmp;
}
- last_ttime = t->time;
+ *last_ttime = t->time;
}
t_bytes_align(&td->o, t);
if (t->action & BLK_TC_ACT(BLK_TC_NOTIFY))
- handle_trace_notify(t);
+ return handle_trace_notify(t);
else if (t->action & BLK_TC_ACT(BLK_TC_DISCARD))
- handle_trace_discard(td, t, delay, ios, bs);
+ return handle_trace_discard(td, t, delay, ios, bs, cache);
else if (t->action & BLK_TC_ACT(BLK_TC_FLUSH))
- handle_trace_flush(td, t, delay, ios);
+ return handle_trace_flush(td, t, delay, ios, cache);
else
- handle_trace_fs(td, t, delay, ios, bs);
+ return handle_trace_fs(td, t, delay, ios, bs, cache);
}
static void byteswap_trace(struct blk_io_trace *t)
* Load a blktrace file by reading all the blk_io_trace entries, and storing
* them as io_pieces like the fio text version would do.
*/
-bool load_blktrace(struct thread_data *td, const char *filename, int need_swap)
+bool init_blktrace_read(struct thread_data *td, const char *filename, int need_swap)
+{
+ int old_state;
+
+ td->io_log_rfile = fopen(filename, "rb");
+ if (!td->io_log_rfile) {
+ td_verror(td, errno, "open blktrace file");
+ goto err;
+ }
+ td->io_log_blktrace_swap = need_swap;
+ td->io_log_blktrace_last_ttime = 0;
+ td->o.size = 0;
+
+ free_release_files(td);
+
+ old_state = td_bump_runstate(td, TD_SETTING_UP);
+
+ if (!read_blktrace(td)) {
+ goto err;
+ }
+
+ td_restore_runstate(td, old_state);
+
+ if (!td->files_index) {
+ log_err("fio: did not find replay device(s)\n");
+ return false;
+ }
+
+ return true;
+
+err:
+ if (td->io_log_rfile) {
+ fclose(td->io_log_rfile);
+ td->io_log_rfile = NULL;
+ }
+ return false;
+}
+
+bool read_blktrace(struct thread_data* td)
{
struct blk_io_trace t;
+ struct file_cache cache = { };
unsigned long ios[DDIR_RWDIR_SYNC_CNT] = { };
- unsigned int rw_bs[DDIR_RWDIR_CNT] = { };
+ unsigned long long rw_bs[DDIR_RWDIR_CNT] = { };
unsigned long skipped_writes;
- struct fifo *fifo;
- int fd, i, old_state, max_depth;
- struct fio_file *f;
+ FILE *f = td->io_log_rfile;
+ int i, max_depth;
+ struct fio_file *fiof;
int this_depth[DDIR_RWDIR_CNT] = { };
int depth[DDIR_RWDIR_CNT] = { };
+ int64_t items_to_fetch = 0;
- fd = open(filename, O_RDONLY);
- if (fd < 0) {
- td_verror(td, errno, "open blktrace file");
- return false;
+ if (td->o.read_iolog_chunked) {
+ items_to_fetch = iolog_items_to_fetch(td);
+ if (!items_to_fetch)
+ return true;
}
- fifo = fifo_alloc(TRACE_FIFO_SIZE);
-
- old_state = td_bump_runstate(td, TD_SETTING_UP);
-
- td->o.size = 0;
skipped_writes = 0;
do {
- int ret = trace_fifo_get(td, fifo, fd, &t, sizeof(t));
+ int ret = fread(&t, 1, sizeof(t), f);
- if (ret < 0)
+ if (ferror(f)) {
+ td_verror(td, errno, "read blktrace file");
goto err;
- else if (!ret)
+ } else if (feof(f)) {
break;
- else if (ret < (int) sizeof(t)) {
- log_err("fio: short fifo get\n");
+ } else if (ret < (int) sizeof(t)) {
+ log_err("fio: iolog short read\n");
break;
}
- if (need_swap)
+ if (td->io_log_blktrace_swap)
byteswap_trace(&t);
if ((t.magic & 0xffffff00) != BLK_IO_TRACE_MAGIC) {
t.magic & 0xff);
goto err;
}
- ret = discard_pdu(td, fifo, fd, &t);
+ ret = discard_pdu(f, &t);
if (ret < 0) {
td_verror(td, -ret, "blktrace lseek");
goto err;
- } else if (t.pdu_len != ret) {
- log_err("fio: discarded %d of %d\n", ret, t.pdu_len);
- goto err;
}
if ((t.action & BLK_TC_ACT(BLK_TC_NOTIFY)) == 0) {
if ((t.action & 0xffff) == __BLK_TA_QUEUE)
}
}
- handle_trace(td, &t, ios, rw_bs);
- } while (1);
+ if (!queue_trace(td, &t, ios, rw_bs, &cache))
+ continue;
- for_each_file(td, f, i)
- trace_add_open_close_event(td, f->fileno, FIO_LOG_CLOSE_FILE);
+ if (td->o.read_iolog_chunked) {
+ td->io_log_current++;
+ items_to_fetch--;
+ if (items_to_fetch == 0)
+ break;
+ }
+ } while (1);
- fifo_free(fifo);
- close(fd);
+ if (td->o.read_iolog_chunked) {
+ td->io_log_highmark = td->io_log_current;
+ td->io_log_checkmark = (td->io_log_highmark + 1) / 2;
+ fio_gettime(&td->io_log_highmark_time, NULL);
+ }
- td_restore_runstate(td, old_state);
+ if (skipped_writes)
+ log_err("fio: %s skips replay of %lu writes due to read-only\n",
+ td->o.name, skipped_writes);
- if (!td->files_index) {
- log_err("fio: did not find replay device(s)\n");
- return false;
+ if (td->o.read_iolog_chunked) {
+ if (td->io_log_current == 0) {
+ return false;
+ }
+ td->o.td_ddir = TD_DDIR_RW;
+ if ((rw_bs[DDIR_READ] > td->o.max_bs[DDIR_READ] ||
+ rw_bs[DDIR_WRITE] > td->o.max_bs[DDIR_WRITE] ||
+ rw_bs[DDIR_TRIM] > td->o.max_bs[DDIR_TRIM]) &&
+ td->orig_buffer)
+ {
+ td->o.max_bs[DDIR_READ] = max(td->o.max_bs[DDIR_READ], rw_bs[DDIR_READ]);
+ td->o.max_bs[DDIR_WRITE] = max(td->o.max_bs[DDIR_WRITE], rw_bs[DDIR_WRITE]);
+ td->o.max_bs[DDIR_TRIM] = max(td->o.max_bs[DDIR_TRIM], rw_bs[DDIR_TRIM]);
+ io_u_quiesce(td);
+ free_io_mem(td);
+ init_io_u_buffers(td);
+ }
+ return true;
}
+ for_each_file(td, fiof, i)
+ trace_add_open_close_event(td, fiof->fileno, FIO_LOG_CLOSE_FILE);
+
+ fclose(td->io_log_rfile);
+ td->io_log_rfile = NULL;
+
/*
* For stacked devices, we don't always get a COMPLETE event so
* the depth grows to insane values. Limit it to something sane(r).
max_depth = max(depth[i], max_depth);
}
- if (skipped_writes)
- log_err("fio: %s skips replay of %lu writes due to read-only\n",
- td->o.name, skipped_writes);
-
if (!ios[DDIR_READ] && !ios[DDIR_WRITE] && !ios[DDIR_TRIM] &&
!ios[DDIR_SYNC]) {
log_err("fio: found no ios in blktrace data\n");
td->o.max_bs[DDIR_TRIM] = rw_bs[DDIR_TRIM];
}
- /*
- * We need to do direct/raw ios to the device, to avoid getting
- * read-ahead in our way. But only do so if the minimum block size
- * is a multiple of 4k, otherwise we don't know if it's safe to do so.
- */
- if (!fio_option_is_set(&td->o, odirect) && !(td_min_bs(td) & 4095))
- td->o.odirect = 1;
-
/*
* If depth wasn't manually set, use probed depth
*/
return true;
err:
- close(fd);
- fifo_free(fifo);
+ fclose(f);
return false;
}
{
bcs[i].iter++;
if (bcs[i].iter < bcs[i].nr_iter) {
- lseek(bcs[i].fd, 0, SEEK_SET);
+ fseek(bcs[i].f, 0, SEEK_SET);
return;
}
*nr_logs -= 1;
/* close file */
- fifo_free(bcs[i].fifo);
- close(bcs[i].fd);
+ fclose(bcs[i].f);
/* keep active files contiguous */
memmove(&bcs[i], &bcs[*nr_logs], sizeof(bcs[i]));
read_skip:
/* read an io trace */
- ret = trace_fifo_get(td, bc->fifo, bc->fd, t, sizeof(*t));
- if (ret < 0) {
+ ret = fread(&t, 1, sizeof(t), bc->f);
+ if (ferror(bc->f)) {
+ td_verror(td, errno, "read blktrace file");
return ret;
- } else if (!ret) {
+ } else if (feof(bc->f)) {
if (!bc->length)
bc->length = bc->t.time;
return ret;
} else if (ret < (int) sizeof(*t)) {
- log_err("fio: short fifo get\n");
+ log_err("fio: iolog short read\n");
return -1;
}
/* skip over actions that fio does not care about */
if ((t->action & 0xffff) != __BLK_TA_QUEUE ||
t_get_ddir(t) == DDIR_INVAL) {
- ret = discard_pdu(td, bc->fifo, bc->fd, t);
+ ret = discard_pdu(bc->f, t);
if (ret < 0) {
td_verror(td, -ret, "blktrace lseek");
return ret;
- } else if (t->pdu_len != ret) {
- log_err("fio: discarded %d of %d\n", ret,
- t->pdu_len);
- return -1;
}
goto read_skip;
}
str = ptr = strdup(td->o.read_iolog_file);
nr_logs = 0;
for (i = 0; (name = get_next_str(&ptr)) != NULL; i++) {
- bcs[i].fd = open(name, O_RDONLY);
- if (bcs[i].fd < 0) {
+ bcs[i].f = fopen(name, "rb");
+ if (!bcs[i].f) {
log_err("fio: could not open file: %s\n", name);
- ret = bcs[i].fd;
+ ret = -errno;
free(str);
goto err_file;
}
- bcs[i].fifo = fifo_alloc(TRACE_FIFO_SIZE);
nr_logs++;
if (!is_blktrace(name, &bcs[i].swap)) {
i = find_earliest_io(bcs, nr_logs);
bc = &bcs[i];
/* skip over the pdu */
- ret = discard_pdu(td, bc->fifo, bc->fd, &bc->t);
+ ret = discard_pdu(bc->f, &bc->t);
if (ret < 0) {
td_verror(td, -ret, "blktrace lseek");
goto err_file;
- } else if (bc->t.pdu_len != ret) {
- log_err("fio: discarded %d of %d\n", ret,
- bc->t.pdu_len);
- goto err_file;
}
ret = write_trace(merge_fp, &bc->t);
err_file:
/* cleanup */
for (i = 0; i < nr_logs; i++) {
- fifo_free(bcs[i].fifo);
- close(bcs[i].fd);
+ fclose(bcs[i].f);
}
err_merge_buf:
free(merge_buf);
struct blktrace_cursor {
struct fifo *fifo; // fifo queue for reading
- int fd; // blktrace file
+ FILE *f; // blktrace file
__u64 length; // length of trace
struct blk_io_trace t; // current io trace
int swap; // bitwise reverse required
};
bool is_blktrace(const char *, int *);
-bool load_blktrace(struct thread_data *, const char *, int);
+bool init_blktrace_read(struct thread_data *, const char *, int);
+bool read_blktrace(struct thread_data* td);
+
int merge_blktrace_iologs(struct thread_data *td);
#else
return false;
}
-static inline bool load_blktrace(struct thread_data *td, const char *fname,
+static inline bool init_blktrace_read(struct thread_data *td, const char *fname,
int need_swap)
{
return false;
}
+static inline bool read_blktrace(struct thread_data* td)
+{
+ return false;
+}
+
+
static inline int merge_blktrace_iologs(struct thread_data *td)
{
return false;
case "${CI_TARGET_ARCH}" in
"i686")
sudo dpkg --add-architecture i386
+ opts="--allow-downgrades"
pkgs=("${pkgs[@]/%/:i386}")
pkgs+=(
gcc-multilib
pkg-config:i386
zlib1g-dev:i386
+ libpcre2-8-0=10.34-7
)
;;
"x86_64")
+ opts=""
pkgs+=(
libglusterfs-dev
libgoogle-perftools-dev
echo "Updating APT..."
sudo apt-get -qq update
echo "Installing packages..."
- sudo apt-get install -o APT::Immediate-Configure=false --no-install-recommends -qq -y "${pkgs[@]}"
+ sudo apt-get install "$opts" -o APT::Immediate-Configure=false --no-install-recommends -qq -y "${pkgs[@]}"
}
install_linux() {
static void fio_drain_client_text(struct fio_client *client)
{
do {
- struct fio_net_cmd *cmd;
+ struct fio_net_cmd *cmd = NULL;
- cmd = fio_net_recv_cmd(client->fd, false);
+ if (fio_server_poll_fd(client->fd, POLLIN, 0))
+ cmd = fio_net_recv_cmd(client->fd, false);
if (!cmd)
break;
dst->pid = le32_to_cpu(src->pid);
dst->members = le32_to_cpu(src->members);
dst->unified_rw_rep = le32_to_cpu(src->unified_rw_rep);
+ dst->ioprio = le32_to_cpu(src->ioprio);
+ dst->disable_prio_stat = le32_to_cpu(src->disable_prio_stat);
for (i = 0; i < DDIR_RWDIR_CNT; i++) {
convert_io_stat(&dst->clat_stat[i], &src->clat_stat[i]);
dst->nr_block_infos = le64_to_cpu(src->nr_block_infos);
for (i = 0; i < dst->nr_block_infos; i++)
dst->block_infos[i] = le32_to_cpu(src->block_infos[i]);
- for (i = 0; i < DDIR_RWDIR_CNT; i++) {
- for (j = 0; j < FIO_IO_U_PLAT_NR; j++) {
- dst->io_u_plat_high_prio[i][j] = le64_to_cpu(src->io_u_plat_high_prio[i][j]);
- dst->io_u_plat_low_prio[i][j] = le64_to_cpu(src->io_u_plat_low_prio[i][j]);
- }
- convert_io_stat(&dst->clat_high_prio_stat[i], &src->clat_high_prio_stat[i]);
- convert_io_stat(&dst->clat_low_prio_stat[i], &src->clat_low_prio_stat[i]);
- }
dst->ss_dur = le64_to_cpu(src->ss_dur);
dst->ss_state = le32_to_cpu(src->ss_state);
dst->ss_deviation.u.f = fio_uint64_to_double(le64_to_cpu(src->ss_deviation.u.i));
dst->ss_criterion.u.f = fio_uint64_to_double(le64_to_cpu(src->ss_criterion.u.i));
+ for (i = 0; i < DDIR_RWDIR_CNT; i++) {
+ dst->nr_clat_prio[i] = le32_to_cpu(src->nr_clat_prio[i]);
+ for (j = 0; j < dst->nr_clat_prio[i]; j++) {
+ for (k = 0; k < FIO_IO_U_PLAT_NR; k++)
+ dst->clat_prio[i][j].io_u_plat[k] =
+ le64_to_cpu(src->clat_prio[i][j].io_u_plat[k]);
+ convert_io_stat(&dst->clat_prio[i][j].clat_stat,
+ &src->clat_prio[i][j].clat_stat);
+ dst->clat_prio[i][j].ioprio =
+ le32_to_cpu(dst->clat_prio[i][j].ioprio);
+ }
+ }
+
if (dst->ss_state & FIO_SS_DATA) {
for (i = 0; i < dst->ss_dur; i++ ) {
dst->ss_iops_data[i] = le64_to_cpu(src->ss_iops_data[i]);
if (sum_stat_clients <= 1)
return;
- sum_thread_stats(&client_ts, &p->ts, sum_stat_nr == 1);
+ sum_thread_stats(&client_ts, &p->ts);
sum_group_stats(&client_gs, &p->rs);
client_ts.members++;
{
struct client_ops *ops = client->ops;
struct fio_net_cmd *cmd;
- int size;
dprint(FD_NET, "client: handle %s\n", client->hostname);
}
case FIO_NET_CMD_TS: {
struct cmd_ts_pdu *p = (struct cmd_ts_pdu *) cmd->payload;
+ uint64_t offset;
+ int i;
+
+ for (i = 0; i < DDIR_RWDIR_CNT; i++) {
+ if (le32_to_cpu(p->ts.nr_clat_prio[i])) {
+ offset = le64_to_cpu(p->ts.clat_prio_offset[i]);
+ p->ts.clat_prio[i] =
+ (struct clat_prio_stat *)((char *)p + offset);
+ }
+ }
dprint(FD_NET, "client: ts->ss_state = %u\n", (unsigned int) le32_to_cpu(p->ts.ss_state));
if (le32_to_cpu(p->ts.ss_state) & FIO_SS_DATA) {
dprint(FD_NET, "client: received steadystate ring buffers\n");
- size = le64_to_cpu(p->ts.ss_dur);
- p->ts.ss_iops_data = (uint64_t *) ((struct cmd_ts_pdu *)cmd->payload + 1);
- p->ts.ss_bw_data = p->ts.ss_iops_data + size;
+ offset = le64_to_cpu(p->ts.ss_iops_data_offset);
+ p->ts.ss_iops_data = (uint64_t *)((char *)p + offset);
+
+ offset = le64_to_cpu(p->ts.ss_bw_data_offset);
+ p->ts.ss_bw_data = (uint64_t *)((char *)p + offset);
}
convert_ts(&p->ts, &p->ts);
fio_client_json_fini();
+ free_clat_prio_stats(&client_ts);
free(pfds);
return retval || error_clients;
}
##########################################
# librpma probe
+# The librpma engine requires librpma>=v0.10.0 with rpma_mr_advise().
if test "$librpma" != "yes" ; then
librpma="no"
fi
cat > $TMPC << EOF
-#include <stdio.h>
#include <librpma.h>
-int main(int argc, char **argv)
+int main(void)
{
- enum rpma_conn_event event = RPMA_CONN_REJECTED;
- (void) event; /* unused */
- rpma_log_set_threshold(RPMA_LOG_THRESHOLD, RPMA_LOG_LEVEL_INFO);
+ void *ptr = rpma_mr_advise;
+ (void) ptr; /* unused */
return 0;
}
EOF
========================================
-.. include:: ../README
+.. include:: ../README.rst
.. include:: ../HOWTO
(rev. |release|)
-.. include:: ../README
+.. include:: ../README.rst
.. include:: ../HOWTO
#include "cmdprio.h"
-static int fio_cmdprio_bssplit_ddir(struct thread_options *to, void *cb_arg,
- enum fio_ddir ddir, char *str, bool data)
+/*
+ * Temporary array used during parsing. Will be freed after the corresponding
+ * struct bsprio_desc has been generated and saved in cmdprio->bsprio_desc.
+ */
+struct cmdprio_parse_result {
+ struct split_prio *entries;
+ int nr_entries;
+};
+
+/*
+ * Temporary array used during init. Will be freed after the corresponding
+ * struct clat_prio_stat array has been saved in td->ts.clat_prio and the
+ * matching clat_prio_indexes have been saved in each struct cmdprio_prio.
+ */
+struct cmdprio_values {
+ unsigned int *prios;
+ int nr_prios;
+};
+
+static int find_clat_prio_index(unsigned int *all_prios, int nr_prios,
+ int32_t prio)
{
- struct cmdprio *cmdprio = cb_arg;
- struct split split;
- unsigned int i;
+ int i;
- if (ddir == DDIR_TRIM)
- return 0;
+ for (i = 0; i < nr_prios; i++) {
+ if (all_prios[i] == prio)
+ return i;
+ }
- memset(&split, 0, sizeof(split));
+ return -1;
+}
- if (split_parse_ddir(to, &split, str, data, BSSPLIT_MAX))
+/**
+ * assign_clat_prio_index - In order to avoid stat.c the need to loop through
+ * all possible priorities each time add_clat_sample() / add_lat_sample() is
+ * called, save which index to use in each cmdprio_prio. This will later be
+ * propagated to the io_u, if the specific io_u was determined to use a cmdprio
+ * priority value.
+ */
+static void assign_clat_prio_index(struct cmdprio_prio *prio,
+ struct cmdprio_values *values)
+{
+ int clat_prio_index = find_clat_prio_index(values->prios,
+ values->nr_prios,
+ prio->prio);
+ if (clat_prio_index == -1) {
+ clat_prio_index = values->nr_prios;
+ values->prios[clat_prio_index] = prio->prio;
+ values->nr_prios++;
+ }
+ prio->clat_prio_index = clat_prio_index;
+}
+
+/**
+ * init_cmdprio_values - Allocate a temporary array that can hold all unique
+ * priorities (per ddir), so that we can assign_clat_prio_index() for each
+ * cmdprio_prio during setup. This temporary array is freed after setup.
+ */
+static int init_cmdprio_values(struct cmdprio_values *values,
+ int max_unique_prios, struct thread_stat *ts)
+{
+ values->prios = calloc(max_unique_prios + 1,
+ sizeof(*values->prios));
+ if (!values->prios)
return 1;
- if (!split.nr)
- return 0;
- cmdprio->bssplit_nr[ddir] = split.nr;
- cmdprio->bssplit[ddir] = malloc(split.nr * sizeof(struct bssplit));
- if (!cmdprio->bssplit[ddir])
+ /* td->ioprio/ts->ioprio is always stored at index 0. */
+ values->prios[0] = ts->ioprio;
+ values->nr_prios++;
+
+ return 0;
+}
+
+/**
+ * init_ts_clat_prio - Allocates and fills a clat_prio_stat array which holds
+ * all unique priorities (per ddir).
+ */
+static int init_ts_clat_prio(struct thread_stat *ts, enum fio_ddir ddir,
+ struct cmdprio_values *values)
+{
+ int i;
+
+ if (alloc_clat_prio_stat_ddir(ts, ddir, values->nr_prios))
return 1;
- for (i = 0; i < split.nr; i++) {
- cmdprio->bssplit[ddir][i].bs = split.val1[i];
- if (split.val2[i] == -1U) {
- cmdprio->bssplit[ddir][i].perc = 0;
- } else {
- if (split.val2[i] > 100)
- cmdprio->bssplit[ddir][i].perc = 100;
- else
- cmdprio->bssplit[ddir][i].perc = split.val2[i];
+ for (i = 0; i < values->nr_prios; i++)
+ ts->clat_prio[ddir][i].ioprio = values->prios[i];
+
+ return 0;
+}
+
+static int fio_cmdprio_fill_bsprio(struct cmdprio_bsprio *bsprio,
+ struct split_prio *entries,
+ struct cmdprio_values *values,
+ int implicit_cmdprio, int start, int end)
+{
+ struct cmdprio_prio *prio;
+ int i = end - start + 1;
+
+ bsprio->prios = calloc(i, sizeof(*bsprio->prios));
+ if (!bsprio->prios)
+ return 1;
+
+ bsprio->bs = entries[start].bs;
+ bsprio->nr_prios = 0;
+ for (i = start; i <= end; i++) {
+ prio = &bsprio->prios[bsprio->nr_prios];
+ prio->perc = entries[i].perc;
+ if (entries[i].prio == -1)
+ prio->prio = implicit_cmdprio;
+ else
+ prio->prio = entries[i].prio;
+ assign_clat_prio_index(prio, values);
+ bsprio->tot_perc += entries[i].perc;
+ if (bsprio->tot_perc > 100) {
+ log_err("fio: cmdprio_bssplit total percentage "
+ "for bs: %"PRIu64" exceeds 100\n",
+ bsprio->bs);
+ free(bsprio->prios);
+ return 1;
}
+ bsprio->nr_prios++;
+ }
+
+ return 0;
+}
+
+static int
+fio_cmdprio_generate_bsprio_desc(struct cmdprio_bsprio_desc *bsprio_desc,
+ struct cmdprio_parse_result *parse_res,
+ struct cmdprio_values *values,
+ int implicit_cmdprio)
+{
+ struct split_prio *entries = parse_res->entries;
+ int nr_entries = parse_res->nr_entries;
+ struct cmdprio_bsprio *bsprio;
+ int i, start, count = 0;
+
+ /*
+ * The parsed result is sorted by blocksize, so count only the number
+ * of different blocksizes, to know how many cmdprio_bsprio we need.
+ */
+ for (i = 0; i < nr_entries; i++) {
+ while (i + 1 < nr_entries && entries[i].bs == entries[i + 1].bs)
+ i++;
+ count++;
+ }
+
+ /*
+ * This allocation is not freed on error. Instead, the calling function
+ * is responsible for calling fio_cmdprio_cleanup() on error.
+ */
+ bsprio_desc->bsprios = calloc(count, sizeof(*bsprio_desc->bsprios));
+ if (!bsprio_desc->bsprios)
+ return 1;
+
+ start = 0;
+ bsprio_desc->nr_bsprios = 0;
+ for (i = 0; i < nr_entries; i++) {
+ while (i + 1 < nr_entries && entries[i].bs == entries[i + 1].bs)
+ i++;
+ bsprio = &bsprio_desc->bsprios[bsprio_desc->nr_bsprios];
+ /*
+ * All parsed entries with the same blocksize get saved in the
+ * same cmdprio_bsprio, to expedite the search in the hot path.
+ */
+ if (fio_cmdprio_fill_bsprio(bsprio, entries, values,
+ implicit_cmdprio, start, i))
+ return 1;
+
+ start = i + 1;
+ bsprio_desc->nr_bsprios++;
}
return 0;
}
-int fio_cmdprio_bssplit_parse(struct thread_data *td, const char *input,
- struct cmdprio *cmdprio)
+static int fio_cmdprio_bssplit_ddir(struct thread_options *to, void *cb_arg,
+ enum fio_ddir ddir, char *str, bool data)
+{
+ struct cmdprio_parse_result *parse_res_arr = cb_arg;
+ struct cmdprio_parse_result *parse_res = &parse_res_arr[ddir];
+
+ if (ddir == DDIR_TRIM)
+ return 0;
+
+ if (split_parse_prio_ddir(to, &parse_res->entries,
+ &parse_res->nr_entries, str))
+ return 1;
+
+ return 0;
+}
+
+static int fio_cmdprio_bssplit_parse(struct thread_data *td, const char *input,
+ struct cmdprio_parse_result *parse_res)
{
char *str, *p;
int ret = 0;
strip_blank_front(&str);
strip_blank_end(str);
- ret = str_split_parse(td, str, fio_cmdprio_bssplit_ddir, cmdprio,
+ ret = str_split_parse(td, str, fio_cmdprio_bssplit_ddir, parse_res,
false);
free(p);
return ret;
}
-static int fio_cmdprio_percentage(struct cmdprio *cmdprio, struct io_u *io_u)
+/**
+ * fio_cmdprio_percentage - Returns the percentage of I/Os that should
+ * use a cmdprio priority value (rather than the default context priority).
+ *
+ * For CMDPRIO_MODE_BSSPLIT, if the percentage is non-zero, we will also
+ * return the matching bsprio, to avoid the same linear search elsewhere.
+ * For CMDPRIO_MODE_PERC, we will never return a bsprio.
+ */
+static int fio_cmdprio_percentage(struct cmdprio *cmdprio, struct io_u *io_u,
+ struct cmdprio_bsprio **bsprio)
{
+ struct cmdprio_bsprio *bsprio_entry;
enum fio_ddir ddir = io_u->ddir;
- struct cmdprio_options *options = cmdprio->options;
int i;
switch (cmdprio->mode) {
case CMDPRIO_MODE_PERC:
- return options->percentage[ddir];
+ *bsprio = NULL;
+ return cmdprio->perc_entry[ddir].perc;
case CMDPRIO_MODE_BSSPLIT:
- for (i = 0; i < cmdprio->bssplit_nr[ddir]; i++) {
- if (cmdprio->bssplit[ddir][i].bs == io_u->buflen)
- return cmdprio->bssplit[ddir][i].perc;
+ for (i = 0; i < cmdprio->bsprio_desc[ddir].nr_bsprios; i++) {
+ bsprio_entry = &cmdprio->bsprio_desc[ddir].bsprios[i];
+ if (bsprio_entry->bs == io_u->buflen) {
+ *bsprio = bsprio_entry;
+ return bsprio_entry->tot_perc;
+ }
}
break;
default:
assert(0);
}
+ /*
+ * This is totally fine, the given blocksize simply does not
+ * have any (non-zero) cmdprio_bssplit entries defined.
+ */
+ *bsprio = NULL;
return 0;
}
bool fio_cmdprio_set_ioprio(struct thread_data *td, struct cmdprio *cmdprio,
struct io_u *io_u)
{
- enum fio_ddir ddir = io_u->ddir;
- struct cmdprio_options *options = cmdprio->options;
- unsigned int p;
- unsigned int cmdprio_value =
- ioprio_value(options->class[ddir], options->level[ddir]);
-
- p = fio_cmdprio_percentage(cmdprio, io_u);
- if (p && rand_between(&td->prio_state, 0, 99) < p) {
- io_u->ioprio = cmdprio_value;
- if (!td->ioprio || cmdprio_value < td->ioprio) {
- /*
- * The async IO priority is higher (has a lower value)
- * than the default priority (which is either 0 or the
- * value set by "prio" and "prioclass" options).
- */
- io_u->flags |= IO_U_F_HIGH_PRIO;
- }
+ struct cmdprio_bsprio *bsprio;
+ unsigned int p, rand;
+ uint32_t perc = 0;
+ int i;
+
+ p = fio_cmdprio_percentage(cmdprio, io_u, &bsprio);
+ if (!p)
+ return false;
+
+ rand = rand_between(&td->prio_state, 0, 99);
+ if (rand >= p)
+ return false;
+
+ switch (cmdprio->mode) {
+ case CMDPRIO_MODE_PERC:
+ io_u->ioprio = cmdprio->perc_entry[io_u->ddir].prio;
+ io_u->clat_prio_index =
+ cmdprio->perc_entry[io_u->ddir].clat_prio_index;
return true;
+ case CMDPRIO_MODE_BSSPLIT:
+ assert(bsprio);
+ for (i = 0; i < bsprio->nr_prios; i++) {
+ struct cmdprio_prio *prio = &bsprio->prios[i];
+
+ perc += prio->perc;
+ if (rand < perc) {
+ io_u->ioprio = prio->prio;
+ io_u->clat_prio_index = prio->clat_prio_index;
+ return true;
+ }
+ }
+ break;
+ default:
+ assert(0);
}
- if (td->ioprio && td->ioprio < cmdprio_value) {
+ /* When rand < p (total perc), we should always find a cmdprio_prio. */
+ assert(0);
+ return false;
+}
+
+static int fio_cmdprio_gen_perc(struct thread_data *td, struct cmdprio *cmdprio)
+{
+ struct cmdprio_options *options = cmdprio->options;
+ struct cmdprio_prio *prio;
+ struct cmdprio_values values[CMDPRIO_RWDIR_CNT] = {0};
+ struct thread_stat *ts = &td->ts;
+ enum fio_ddir ddir;
+ int ret;
+
+ for (ddir = 0; ddir < CMDPRIO_RWDIR_CNT; ddir++) {
/*
- * The IO will be executed with the default priority (which is
- * either 0 or the value set by "prio" and "prioclass options),
- * and this priority is higher (has a lower value) than the
- * async IO priority.
+ * Do not allocate a clat_prio array nor set the cmdprio struct
+ * if zero percent of the I/Os (for the ddir) should use a
+ * cmdprio priority value, or when the ddir is not enabled.
*/
- io_u->flags |= IO_U_F_HIGH_PRIO;
+ if (!options->percentage[ddir] ||
+ (ddir == DDIR_READ && !td_read(td)) ||
+ (ddir == DDIR_WRITE && !td_write(td)))
+ continue;
+
+ ret = init_cmdprio_values(&values[ddir], 1, ts);
+ if (ret)
+ goto err;
+
+ prio = &cmdprio->perc_entry[ddir];
+ prio->perc = options->percentage[ddir];
+ prio->prio = ioprio_value(options->class[ddir],
+ options->level[ddir]);
+ assign_clat_prio_index(prio, &values[ddir]);
+
+ ret = init_ts_clat_prio(ts, ddir, &values[ddir]);
+ if (ret)
+ goto err;
+
+ free(values[ddir].prios);
+ values[ddir].prios = NULL;
+ values[ddir].nr_prios = 0;
}
- return false;
+ return 0;
+
+err:
+ for (ddir = 0; ddir < CMDPRIO_RWDIR_CNT; ddir++)
+ free(values[ddir].prios);
+ free_clat_prio_stats(ts);
+
+ return ret;
}
static int fio_cmdprio_parse_and_gen_bssplit(struct thread_data *td,
struct cmdprio *cmdprio)
{
struct cmdprio_options *options = cmdprio->options;
- int ret;
-
- ret = fio_cmdprio_bssplit_parse(td, options->bssplit_str, cmdprio);
+ struct cmdprio_parse_result parse_res[CMDPRIO_RWDIR_CNT] = {0};
+ struct cmdprio_values values[CMDPRIO_RWDIR_CNT] = {0};
+ struct thread_stat *ts = &td->ts;
+ int ret, implicit_cmdprio;
+ enum fio_ddir ddir;
+
+ ret = fio_cmdprio_bssplit_parse(td, options->bssplit_str,
+ &parse_res[0]);
if (ret)
goto err;
+ for (ddir = 0; ddir < CMDPRIO_RWDIR_CNT; ddir++) {
+ /*
+ * Do not allocate a clat_prio array nor set the cmdprio structs
+ * if there are no non-zero entries (for the ddir), or when the
+ * ddir is not enabled.
+ */
+ if (!parse_res[ddir].nr_entries ||
+ (ddir == DDIR_READ && !td_read(td)) ||
+ (ddir == DDIR_WRITE && !td_write(td))) {
+ free(parse_res[ddir].entries);
+ parse_res[ddir].entries = NULL;
+ parse_res[ddir].nr_entries = 0;
+ continue;
+ }
+
+ ret = init_cmdprio_values(&values[ddir],
+ parse_res[ddir].nr_entries, ts);
+ if (ret)
+ goto err;
+
+ implicit_cmdprio = ioprio_value(options->class[ddir],
+ options->level[ddir]);
+
+ ret = fio_cmdprio_generate_bsprio_desc(&cmdprio->bsprio_desc[ddir],
+ &parse_res[ddir],
+ &values[ddir],
+ implicit_cmdprio);
+ if (ret)
+ goto err;
+
+ free(parse_res[ddir].entries);
+ parse_res[ddir].entries = NULL;
+ parse_res[ddir].nr_entries = 0;
+
+ ret = init_ts_clat_prio(ts, ddir, &values[ddir]);
+ if (ret)
+ goto err;
+
+ free(values[ddir].prios);
+ values[ddir].prios = NULL;
+ values[ddir].nr_prios = 0;
+ }
+
return 0;
err:
+ for (ddir = 0; ddir < CMDPRIO_RWDIR_CNT; ddir++) {
+ free(parse_res[ddir].entries);
+ free(values[ddir].prios);
+ }
+ free_clat_prio_stats(ts);
fio_cmdprio_cleanup(cmdprio);
return ret;
struct cmdprio_options *options = cmdprio->options;
int i, ret;
+ /*
+ * If cmdprio_percentage/cmdprio_bssplit is set and cmdprio_class
+ * is not set, default to RT priority class.
+ */
+ for (i = 0; i < CMDPRIO_RWDIR_CNT; i++) {
+ /*
+ * A cmdprio value is only used when fio_cmdprio_percentage()
+ * returns non-zero, so it is safe to set a class even for a
+ * DDIR that will never use it.
+ */
+ if (!options->class[i])
+ options->class[i] = IOPRIO_CLASS_RT;
+ }
+
switch (cmdprio->mode) {
case CMDPRIO_MODE_BSSPLIT:
ret = fio_cmdprio_parse_and_gen_bssplit(td, cmdprio);
break;
case CMDPRIO_MODE_PERC:
- ret = 0;
+ ret = fio_cmdprio_gen_perc(td, cmdprio);
break;
default:
assert(0);
return 1;
}
- /*
- * If cmdprio_percentage/cmdprio_bssplit is set and cmdprio_class
- * is not set, default to RT priority class.
- */
- for (i = 0; i < CMDPRIO_RWDIR_CNT; i++) {
- if (options->percentage[i] || cmdprio->bssplit_nr[i]) {
- if (!options->class[i])
- options->class[i] = IOPRIO_CLASS_RT;
- }
- }
-
return ret;
}
void fio_cmdprio_cleanup(struct cmdprio *cmdprio)
{
- int ddir;
+ enum fio_ddir ddir;
+ int i;
for (ddir = 0; ddir < CMDPRIO_RWDIR_CNT; ddir++) {
- free(cmdprio->bssplit[ddir]);
- cmdprio->bssplit[ddir] = NULL;
- cmdprio->bssplit_nr[ddir] = 0;
+ for (i = 0; i < cmdprio->bsprio_desc[ddir].nr_bsprios; i++)
+ free(cmdprio->bsprio_desc[ddir].bsprios[i].prios);
+ free(cmdprio->bsprio_desc[ddir].bsprios);
+ cmdprio->bsprio_desc[ddir].bsprios = NULL;
+ cmdprio->bsprio_desc[ddir].nr_bsprios = 0;
}
/*
CMDPRIO_MODE_BSSPLIT,
};
+struct cmdprio_prio {
+ int32_t prio;
+ uint32_t perc;
+ uint16_t clat_prio_index;
+};
+
+struct cmdprio_bsprio {
+ uint64_t bs;
+ uint32_t tot_perc;
+ unsigned int nr_prios;
+ struct cmdprio_prio *prios;
+};
+
+struct cmdprio_bsprio_desc {
+ struct cmdprio_bsprio *bsprios;
+ unsigned int nr_bsprios;
+};
+
struct cmdprio_options {
unsigned int percentage[CMDPRIO_RWDIR_CNT];
unsigned int class[CMDPRIO_RWDIR_CNT];
struct cmdprio {
struct cmdprio_options *options;
- unsigned int bssplit_nr[CMDPRIO_RWDIR_CNT];
- struct bssplit *bssplit[CMDPRIO_RWDIR_CNT];
+ struct cmdprio_prio perc_entry[CMDPRIO_RWDIR_CNT];
+ struct cmdprio_bsprio_desc bsprio_desc[CMDPRIO_RWDIR_CNT];
unsigned int mode;
};
uint64_t nsec;
nsec = ntime_since_now(&start);
- add_clat_sample(td, data->stat_ddir, nsec, 0, 0, 0, false);
+ add_clat_sample(td, data->stat_ddir, nsec, 0, 0, 0, 0);
}
return 0;
uint64_t nsec;
nsec = ntime_since_now(&start);
- add_clat_sample(td, data->stat_ddir, nsec, 0, 0, 0, false);
+ add_clat_sample(td, data->stat_ddir, nsec, 0, 0, 0, 0);
}
return 0;
uint64_t nsec;
nsec = ntime_since_now(&start);
- add_clat_sample(td, data->stat_ddir, nsec, 0, 0, 0, false);
+ add_clat_sample(td, data->stat_ddir, nsec, 0, 0, 0, 0);
}
return 0;
p.flags |= IORING_SETUP_CQSIZE;
p.cq_entries = depth;
+retry:
ret = syscall(__NR_io_uring_setup, depth, &p);
- if (ret < 0)
+ if (ret < 0) {
+ if (errno == EINVAL && p.flags & IORING_SETUP_CQSIZE) {
+ p.flags &= ~IORING_SETUP_CQSIZE;
+ goto retry;
+ }
return ret;
+ }
ld->ring_fd = ret;
return mem_ptr;
}
-char *librpma_fio_allocate_pmem(struct thread_data *td, const char *filename,
+char *librpma_fio_allocate_pmem(struct thread_data *td, struct fio_file *f,
size_t size, struct librpma_fio_mem *mem)
{
size_t size_mmap = 0;
return NULL;
}
- ws_offset = (td->thread_number - 1) * size;
+ if (f->filetype == FIO_TYPE_CHAR) {
+ /* Each thread uses a separate offset within DeviceDAX. */
+ ws_offset = (td->thread_number - 1) * size;
+ } else {
+ /* Each thread uses a separate FileSystemDAX file. No offset is needed. */
+ ws_offset = 0;
+ }
- if (!filename) {
+ if (!f->file_name) {
log_err("fio: filename is not set\n");
return NULL;
}
/* map the file */
- mem_ptr = pmem_map_file(filename, 0 /* len */, 0 /* flags */,
+ mem_ptr = pmem_map_file(f->file_name, 0 /* len */, 0 /* flags */,
0 /* mode */, &size_mmap, &is_pmem);
if (mem_ptr == NULL) {
- log_err("fio: pmem_map_file(%s) failed\n", filename);
+ log_err("fio: pmem_map_file(%s) failed\n", f->file_name);
/* pmem_map_file() sets errno on failure */
td_verror(td, errno, "pmem_map_file");
return NULL;
/* pmem is expected */
if (!is_pmem) {
log_err("fio: %s is not located in persistent memory\n",
- filename);
+ f->file_name);
goto err_unmap;
}
if (size_mmap < ws_offset + size) {
log_err(
"fio: %s is too small to handle so many threads (%zu < %zu)\n",
- filename, size_mmap, ws_offset + size);
+ f->file_name, size_mmap, ws_offset + size);
goto err_unmap;
}
log_info("fio: size of memory mapped from the file %s: %zu\n",
- filename, size_mmap);
+ f->file_name, size_mmap);
mem->mem_ptr = mem_ptr;
mem->size_mmap = size_mmap;
size_t mem_size = td->o.size;
size_t mr_desc_size;
void *ws_ptr;
+ bool is_dram;
int usage_mem_type;
int ret;
return -1;
}
- if (strcmp(f->file_name, "malloc") == 0) {
+ is_dram = !strcmp(f->file_name, "malloc");
+ if (is_dram) {
/* allocation from DRAM using posix_memalign() */
ws_ptr = librpma_fio_allocate_dram(td, mem_size, &csd->mem);
usage_mem_type = RPMA_MR_USAGE_FLUSH_TYPE_VISIBILITY;
} else {
/* allocation from PMEM using pmem_map_file() */
- ws_ptr = librpma_fio_allocate_pmem(td, f->file_name,
- mem_size, &csd->mem);
+ ws_ptr = librpma_fio_allocate_pmem(td, f, mem_size, &csd->mem);
usage_mem_type = RPMA_MR_USAGE_FLUSH_TYPE_PERSISTENT;
}
goto err_free;
}
+ if (!is_dram && f->filetype == FIO_TYPE_FILE) {
+ ret = rpma_mr_advise(mr, 0, mem_size,
+ IBV_ADVISE_MR_ADVICE_PREFETCH_WRITE,
+ IBV_ADVISE_MR_FLAG_FLUSH);
+ if (ret) {
+ librpma_td_verror(td, ret, "rpma_mr_advise");
+ /* an invalid argument is an error */
+ if (ret == RPMA_E_INVAL)
+ goto err_mr_dereg;
+
+ /* log_err used instead of log_info to avoid corruption of the JSON output */
+ log_err("Note: having rpma_mr_advise(3) failed because of RPMA_E_NOSUPP or RPMA_E_PROVIDER may come with a performance penalty, but it is not a blocker for running the benchmark.\n");
+ }
+ }
+
/* get size of the memory region's descriptor */
if ((ret = rpma_mr_get_descriptor_size(mr, &mr_desc_size))) {
librpma_td_verror(td, ret, "rpma_mr_get_descriptor_size");
char *librpma_fio_allocate_dram(struct thread_data *td, size_t size,
struct librpma_fio_mem *mem);
-char *librpma_fio_allocate_pmem(struct thread_data *td, const char *filename,
+char *librpma_fio_allocate_pmem(struct thread_data *td, struct fio_file *f,
size_t size, struct librpma_fio_mem *mem);
void librpma_fio_free(struct librpma_fio_mem *mem);
enum {
FIO_SG_WRITE = 1,
- FIO_SG_WRITE_VERIFY = 2,
- FIO_SG_WRITE_SAME = 3
+ FIO_SG_WRITE_VERIFY,
+ FIO_SG_WRITE_SAME,
+ FIO_SG_WRITE_SAME_NDOB,
+ FIO_SG_WRITE_STREAM,
+ FIO_SG_VERIFY_BYTCHK_00,
+ FIO_SG_VERIFY_BYTCHK_01,
+ FIO_SG_VERIFY_BYTCHK_11,
};
struct sg_options {
unsigned int readfua;
unsigned int writefua;
unsigned int write_mode;
+ uint16_t stream_id;
};
static struct fio_option options[] = {
.oval = FIO_SG_WRITE,
.help = "Issue standard SCSI WRITE commands",
},
- { .ival = "verify",
+ { .ival = "write_and_verify",
.oval = FIO_SG_WRITE_VERIFY,
.help = "Issue SCSI WRITE AND VERIFY commands",
},
- { .ival = "same",
+ { .ival = "verify",
+ .oval = FIO_SG_WRITE_VERIFY,
+ .help = "Issue SCSI WRITE AND VERIFY commands. This "
+ "option is deprecated. Use write_and_verify instead.",
+ },
+ { .ival = "write_same",
.oval = FIO_SG_WRITE_SAME,
.help = "Issue SCSI WRITE SAME commands",
},
+ { .ival = "same",
+ .oval = FIO_SG_WRITE_SAME,
+ .help = "Issue SCSI WRITE SAME commands. This "
+ "option is deprecated. Use write_same instead.",
+ },
+ { .ival = "write_same_ndob",
+ .oval = FIO_SG_WRITE_SAME_NDOB,
+ .help = "Issue SCSI WRITE SAME(16) commands with NDOB flag set",
+ },
+ { .ival = "verify_bytchk_00",
+ .oval = FIO_SG_VERIFY_BYTCHK_00,
+ .help = "Issue SCSI VERIFY commands with BYTCHK set to 00",
+ },
+ { .ival = "verify_bytchk_01",
+ .oval = FIO_SG_VERIFY_BYTCHK_01,
+ .help = "Issue SCSI VERIFY commands with BYTCHK set to 01",
+ },
+ { .ival = "verify_bytchk_11",
+ .oval = FIO_SG_VERIFY_BYTCHK_11,
+ .help = "Issue SCSI VERIFY commands with BYTCHK set to 11",
+ },
+ { .ival = "write_stream",
+ .oval = FIO_SG_WRITE_STREAM,
+ .help = "Issue SCSI WRITE STREAM(16) commands",
+ },
},
.category = FIO_OPT_C_ENGINE,
.group = FIO_OPT_G_SG,
},
+ {
+ .name = "stream_id",
+ .lname = "stream id for WRITE STREAM(16) commands",
+ .type = FIO_OPT_INT,
+ .off1 = offsetof(struct sg_options, stream_id),
+ .help = "Stream ID for WRITE STREAM(16) commands",
+ .def = "0",
+ .category = FIO_OPT_C_ENGINE,
+ .group = FIO_OPT_G_SG,
+ },
{
.name = NULL,
},
#endif
};
+static inline uint16_t sgio_get_be16(uint8_t *buf)
+{
+ return be16_to_cpu(*((uint16_t *) buf));
+}
+
static inline uint32_t sgio_get_be32(uint8_t *buf)
{
return be32_to_cpu(*((uint32_t *) buf));
}
static void fio_sgio_rw_lba(struct sg_io_hdr *hdr, unsigned long long lba,
- unsigned long long nr_blocks)
+ unsigned long long nr_blocks, bool override16)
{
- if (lba < MAX_10B_LBA) {
+ if (lba < MAX_10B_LBA && !override16) {
sgio_set_be32((uint32_t) lba, &hdr->cmdp[2]);
sgio_set_be16((uint16_t) nr_blocks, &hdr->cmdp[7]);
} else {
if (o->readfua)
hdr->cmdp[1] |= 0x08;
- fio_sgio_rw_lba(hdr, lba, nr_blocks);
+ fio_sgio_rw_lba(hdr, lba, nr_blocks, false);
} else if (io_u->ddir == DDIR_WRITE) {
sgio_hdr_init(sd, hdr, io_u, 1);
else
hdr->cmdp[0] = 0x93; // write same(16)
break;
+ case FIO_SG_WRITE_SAME_NDOB:
+ hdr->cmdp[0] = 0x93; // write same(16)
+ hdr->cmdp[1] |= 0x1; // no data output buffer
+ hdr->dxfer_len = 0;
+ break;
+ case FIO_SG_WRITE_STREAM:
+ hdr->cmdp[0] = 0x9a; // write stream (16)
+ if (o->writefua)
+ hdr->cmdp[1] |= 0x08;
+ sgio_set_be64(lba, &hdr->cmdp[2]);
+ sgio_set_be16((uint16_t) io_u->file->engine_pos, &hdr->cmdp[10]);
+ sgio_set_be16((uint16_t) nr_blocks, &hdr->cmdp[12]);
+ break;
+ case FIO_SG_VERIFY_BYTCHK_00:
+ if (lba < MAX_10B_LBA)
+ hdr->cmdp[0] = 0x2f; // VERIFY(10)
+ else
+ hdr->cmdp[0] = 0x8f; // VERIFY(16)
+ hdr->dxfer_len = 0;
+ break;
+ case FIO_SG_VERIFY_BYTCHK_01:
+ if (lba < MAX_10B_LBA)
+ hdr->cmdp[0] = 0x2f; // VERIFY(10)
+ else
+ hdr->cmdp[0] = 0x8f; // VERIFY(16)
+ hdr->cmdp[1] |= 0x02; // BYTCHK = 01b
+ break;
+ case FIO_SG_VERIFY_BYTCHK_11:
+ if (lba < MAX_10B_LBA)
+ hdr->cmdp[0] = 0x2f; // VERIFY(10)
+ else
+ hdr->cmdp[0] = 0x8f; // VERIFY(16)
+ hdr->cmdp[1] |= 0x06; // BYTCHK = 11b
+ hdr->dxfer_len = sd->bs;
+ break;
};
- fio_sgio_rw_lba(hdr, lba, nr_blocks);
+ if (o->write_mode != FIO_SG_WRITE_STREAM)
+ fio_sgio_rw_lba(hdr, lba, nr_blocks,
+ o->write_mode == FIO_SG_WRITE_SAME_NDOB);
} else if (io_u->ddir == DDIR_TRIM) {
struct sgio_trim *st;
return 0;
}
+static int fio_sgio_stream_control(struct fio_file *f, bool open_stream, uint16_t *stream_id)
+{
+ struct sg_io_hdr hdr;
+ unsigned char cmd[16];
+ unsigned char sb[64];
+ unsigned char buf[8];
+ int ret;
+
+ memset(&hdr, 0, sizeof(hdr));
+ memset(cmd, 0, sizeof(cmd));
+ memset(sb, 0, sizeof(sb));
+ memset(buf, 0, sizeof(buf));
+
+ hdr.interface_id = 'S';
+ hdr.cmdp = cmd;
+ hdr.cmd_len = 16;
+ hdr.sbp = sb;
+ hdr.mx_sb_len = sizeof(sb);
+ hdr.timeout = SCSI_TIMEOUT_MS;
+ hdr.cmdp[0] = 0x9e;
+ hdr.dxfer_direction = SG_DXFER_FROM_DEV;
+ hdr.dxferp = buf;
+ hdr.dxfer_len = sizeof(buf);
+ sgio_set_be32(sizeof(buf), &hdr.cmdp[10]);
+
+ if (open_stream)
+ hdr.cmdp[1] = 0x34;
+ else {
+ hdr.cmdp[1] = 0x54;
+ sgio_set_be16(*stream_id, &hdr.cmdp[4]);
+ }
+
+ ret = ioctl(f->fd, SG_IO, &hdr);
+
+ if (ret < 0)
+ return ret;
+
+ if (hdr.info & SG_INFO_CHECK)
+ return 1;
+
+ if (open_stream) {
+ *stream_id = sgio_get_be16(&buf[4]);
+ dprint(FD_FILE, "sgio_stream_control: opened stream %u\n", (unsigned int) *stream_id);
+ assert(*stream_id != 0);
+ } else
+ dprint(FD_FILE, "sgio_stream_control: closed stream %u\n", (unsigned int) *stream_id);
+
+ return 0;
+}
+
static int fio_sgio_open(struct thread_data *td, struct fio_file *f)
{
struct sgio_data *sd = td->io_ops_data;
+ struct sg_options *o = td->eo;
int ret;
ret = generic_open_file(td, f);
return ret;
}
+ if (o->write_mode == FIO_SG_WRITE_STREAM) {
+ if (o->stream_id)
+ f->engine_pos = o->stream_id;
+ else {
+ ret = fio_sgio_stream_control(f, true, (uint16_t *) &f->engine_pos);
+ if (ret)
+ return ret;
+ }
+ }
+
return 0;
}
+int fio_sgio_close(struct thread_data *td, struct fio_file *f)
+{
+ struct sg_options *o = td->eo;
+ int ret;
+
+ if (!o->stream_id && o->write_mode == FIO_SG_WRITE_STREAM) {
+ ret = fio_sgio_stream_control(f, false, (uint16_t *) &f->engine_pos);
+ if (ret)
+ return ret;
+ }
+
+ return generic_close_file(td, f);
+}
+
/*
* Build an error string with details about the driver, host or scsi
* error contained in the sg header Caller will use as necessary.
.event = fio_sgio_event,
.cleanup = fio_sgio_cleanup,
.open_file = fio_sgio_open,
- .close_file = generic_close_file,
+ .close_file = fio_sgio_close,
.get_file_size = fio_sgio_get_file_size,
.flags = FIO_SYNCIO | FIO_RAWIO,
.options = options,
#include <errno.h>
#include "../fio.h"
+#include "../optgroup.h"
typedef BOOL (WINAPI *CANCELIOEX)(HANDLE hFile, LPOVERLAPPED lpOverlapped);
struct windowsaio_data *wd;
};
+struct windowsaio_options {
+ struct thread_data *td;
+ unsigned int no_completion_thread;
+};
+
+static struct fio_option options[] = {
+ {
+ .name = "no_completion_thread",
+ .lname = "No completion polling thread",
+ .type = FIO_OPT_STR_SET,
+ .off1 = offsetof(struct windowsaio_options, no_completion_thread),
+ .help = "Use to avoid separate completion polling thread",
+ .category = FIO_OPT_C_ENGINE,
+ .group = FIO_OPT_G_WINDOWSAIO,
+ },
+ {
+ .name = NULL,
+ },
+};
+
static DWORD WINAPI IoCompletionRoutine(LPVOID lpParameter);
static int fio_windowsaio_init(struct thread_data *td)
struct thread_ctx *ctx;
struct windowsaio_data *wd;
HANDLE hFile;
+ struct windowsaio_options *o = td->eo;
hFile = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 0);
if (hFile == INVALID_HANDLE_VALUE) {
wd->iothread_running = TRUE;
wd->iocp = hFile;
- if (!rc)
- ctx = malloc(sizeof(struct thread_ctx));
+ if (o->no_completion_thread == 0) {
+ if (!rc)
+ ctx = malloc(sizeof(struct thread_ctx));
- if (!rc && ctx == NULL) {
- log_err("windowsaio: failed to allocate memory for thread context structure\n");
- CloseHandle(hFile);
- rc = 1;
- }
+ if (!rc && ctx == NULL) {
+ log_err("windowsaio: failed to allocate memory for thread context structure\n");
+ CloseHandle(hFile);
+ rc = 1;
+ }
- if (!rc) {
- DWORD threadid;
+ if (!rc) {
+ DWORD threadid;
- ctx->iocp = hFile;
- ctx->wd = wd;
- wd->iothread = CreateThread(NULL, 0, IoCompletionRoutine, ctx, 0, &threadid);
- if (!wd->iothread)
- log_err("windowsaio: failed to create io completion thread\n");
- else if (fio_option_is_set(&td->o, cpumask))
- fio_setaffinity(threadid, td->o.cpumask);
+ ctx->iocp = hFile;
+ ctx->wd = wd;
+ wd->iothread = CreateThread(NULL, 0, IoCompletionRoutine, ctx, 0, &threadid);
+ if (!wd->iothread)
+ log_err("windowsaio: failed to create io completion thread\n");
+ else if (fio_option_is_set(&td->o, cpumask))
+ fio_setaffinity(threadid, td->o.cpumask);
+ }
+ if (rc || wd->iothread == NULL)
+ rc = 1;
}
-
- if (rc || wd->iothread == NULL)
- rc = 1;
}
return rc;
return wd->aio_events[event];
}
-static int fio_windowsaio_getevents(struct thread_data *td, unsigned int min,
- unsigned int max,
- const struct timespec *t)
+/* dequeue completion entrees directly (no separate completion thread) */
+static int fio_windowsaio_getevents_nothread(struct thread_data *td, unsigned int min,
+ unsigned int max, const struct timespec *t)
+{
+ struct windowsaio_data *wd = td->io_ops_data;
+ unsigned int dequeued = 0;
+ struct io_u *io_u;
+ DWORD start_count = 0;
+ DWORD end_count = 0;
+ DWORD mswait = 250;
+ struct fio_overlapped *fov;
+
+ if (t != NULL) {
+ mswait = (t->tv_sec * 1000) + (t->tv_nsec / 1000000);
+ start_count = GetTickCount();
+ end_count = start_count + (t->tv_sec * 1000) + (t->tv_nsec / 1000000);
+ }
+
+ do {
+ BOOL ret;
+ OVERLAPPED *ovl;
+
+ ULONG entries = min(16, max-dequeued);
+ OVERLAPPED_ENTRY oe[16];
+ ret = GetQueuedCompletionStatusEx(wd->iocp, oe, 16, &entries, mswait, 0);
+ if (ret && entries) {
+ int entry_num;
+
+ for (entry_num=0; entry_num<entries; entry_num++) {
+ ovl = oe[entry_num].lpOverlapped;
+ fov = CONTAINING_RECORD(ovl, struct fio_overlapped, o);
+ io_u = fov->io_u;
+
+ if (ovl->Internal == ERROR_SUCCESS) {
+ io_u->resid = io_u->xfer_buflen - ovl->InternalHigh;
+ io_u->error = 0;
+ } else {
+ io_u->resid = io_u->xfer_buflen;
+ io_u->error = win_to_posix_error(GetLastError());
+ }
+
+ fov->io_complete = FALSE;
+ wd->aio_events[dequeued] = io_u;
+ dequeued++;
+ }
+ }
+
+ if (dequeued >= min ||
+ (t != NULL && timeout_expired(start_count, end_count)))
+ break;
+ } while (1);
+ return dequeued;
+}
+
+/* dequeue completion entrees creates by separate IoCompletionRoutine thread */
+static int fio_windowaio_getevents_thread(struct thread_data *td, unsigned int min,
+ unsigned int max, const struct timespec *t)
{
struct windowsaio_data *wd = td->io_ops_data;
unsigned int dequeued = 0;
wd->aio_events[dequeued] = io_u;
dequeued++;
}
-
}
if (dequeued >= min)
break;
return dequeued;
}
+static int fio_windowsaio_getevents(struct thread_data *td, unsigned int min,
+ unsigned int max, const struct timespec *t)
+{
+ struct windowsaio_options *o = td->eo;
+
+ if (o->no_completion_thread)
+ return fio_windowsaio_getevents_nothread(td, min, max, t);
+ return fio_windowaio_getevents_thread(td, min, max, t);
+}
+
static enum fio_q_status fio_windowsaio_queue(struct thread_data *td,
struct io_u *io_u)
{
.get_file_size = generic_get_file_size,
.io_u_init = fio_windowsaio_io_u_init,
.io_u_free = fio_windowsaio_io_u_free,
+ .options = options,
+ .option_struct_size = sizeof(struct windowsaio_options),
};
static void fio_init fio_windowsaio_register(void)
; Randomly read/write a block device file at queue depth 16.
-; 40 % of read IOs are 64kB and 60% are 1MB. 100% of writes are 1MB.
-; 100% of the 64kB reads are executed at the highest priority and
-; all other IOs executed without a priority set.
[global]
filename=/dev/sda
direct=1
write_lat_log=prio-run.log
log_prio=1
-
-[randrw]
rw=randrw
-bssplit=64k/40:1024k/60,1024k/100
ioengine=libaio
iodepth=16
+
+; Simple cmdprio_bssplit format. All non-zero percentage entries will
+; use the same prio class and prio level defined by the cmdprio_class
+; and cmdprio options.
+[cmdprio]
+; 40% of read IOs are 64kB and 60% are 1MB. 100% of writes are 1MB.
+; 100% of the 64kB reads are executed with prio class 1 and prio level 0.
+; All other IOs are executed without a priority set.
+bssplit=64k/40:1024k/60,1024k/100
cmdprio_bssplit=64k/100:1024k/0,1024k/0
cmdprio_class=1
+cmdprio=0
+
+; Advanced cmdprio_bssplit format. Each non-zero percentage entry can
+; use a different prio class and prio level (appended to each entry).
+[cmdprio-adv]
+; 40% of read IOs are 64kB and 60% are 1MB. 100% of writes are 1MB.
+; 25% of the 64kB reads are executed with prio class 1 and prio level 1,
+; 75% of the 64kB reads are executed with prio class 3 and prio level 2.
+; All other IOs are executed without a priority set.
+stonewall
+bssplit=64k/40:1024k/60,1024k/100
+cmdprio_bssplit=64k/25/1/1:64k/75/3/2:1024k/0,1024k/0
+
+; Identical to the previous example, but with a default priority defined.
+[cmdprio-adv-def]
+; 40% of read IOs are 64kB and 60% are 1MB. 100% of writes are 1MB.
+; 25% of the 64kB reads are executed with prio class 1 and prio level 1,
+; 75% of the 64kB reads are executed with prio class 3 and prio level 2.
+; All other IOs are executed with prio class 2 and prio level 7.
+stonewall
+prioclass=2
+prio=7
+bssplit=64k/40:1024k/60,1024k/100
+cmdprio_bssplit=64k/25/1/1:64k/75/3/2:1024k/0,1024k/0
--- /dev/null
+#
+# **********************************
+# * !!THIS IS A DESTRUCTIVE TEST!! *
+# * IF NOT CHANGED THIS TEST WILL *
+# * DESTROY DATA ON /dev/sdb *
+# **********************************
+#
+# Test SCSI VERIFY commands issued via the sg ioengine
+# The jobs with fail in the name should produce errors
+#
+# job description
+# precon precondition the device by writing with a known
+# pattern
+# verify01 verify each block one at a time by comparing to known
+# pattern
+# verify01-fail verifying one too many blocks should produce a failure
+# verify11-one_ios verify all 20 blocks by sending only 512 bytes
+# verify11-fail verifying beyond the preconditioned region should
+# produce a failure
+
+[global]
+filename=/dev/sdb
+buffer_pattern=0x01
+ioengine=sg
+rw=write
+bs=512
+number_ios=20
+stonewall
+
+[precon]
+
+[verify01]
+sg_write_mode=verify_bytchk_01
+number_ios=20
+
+[verify01-fail]
+sg_write_mode=verify_bytchk_01
+number_ios=21
+
+[verify11-one_ios]
+sg_write_mode=verify_bytchk_11
+number_ios=1
+bs=10240
+
+[verify11-fail]
+sg_write_mode=verify_bytchk_11
+number_ios=1
+bs=10752
--- /dev/null
+#
+# **********************************
+# * !!THIS IS A DESTRUCTIVE TEST!! *
+# * IF NOT CHANGED THIS TEST WILL *
+# * DESTROY DATA ON /dev/sdb *
+# **********************************
+#
+# Test SCSI VERIFY commands issued via the sg ioengine
+# All of the jobs below should complete without error
+#
+# job description
+# precon precondition the device by writing with a known
+# pattern
+# verify00 verify written data on medium only
+# verify01 verify each block one at a time by comparing to known
+# pattern
+# verify01-two_ios verify same data but with only two VERIFY operations
+# verify11 verify each block one at a time
+# verify11-five_ios verify data with five IOs, four blocks at a time,
+# sending 512 bytes for each IO
+# verify11-one_ios verify all 20 blocks by sending only 512 bytes
+#
+
+[global]
+filename=/dev/sdb
+buffer_pattern=0x01
+ioengine=sg
+rw=write
+bs=512
+number_ios=20
+stonewall
+
+[precon]
+
+[verify00]
+sg_write_mode=verify_bytchk_00
+
+[verify01]
+sg_write_mode=verify_bytchk_01
+
+[verify01-two_ios]
+sg_write_mode=verify_bytchk_01
+bs=5120
+number_ios=2
+
+[verify11]
+sg_write_mode=verify_bytchk_11
+
+[verify11-five_ios]
+sg_write_mode=verify_bytchk_11
+bs=2048
+number_ios=5
+
+[verify11-one_ios]
+sg_write_mode=verify_bytchk_11
+bs=10240
+number_ios=1
--- /dev/null
+#
+# **********************************
+# * !!THIS IS A DESTRUCTIVE TEST!! *
+# * IF NOT CHANGED THIS TEST WILL *
+# * DESTROY DATA ON /dev/sdb *
+# **********************************
+#
+# Test WRITE SAME commands with the NDOB flag set
+# issued via the sg ioengine
+# All of the jobs below should complete without error
+# except the last one
+#
+# job description
+# precon Precondition the device by writing 20 blocks with a
+# known pattern
+# write_same_ndob Write 19 sectors of all zeroes with the NDOB flag set
+# verify-pass Verify 19 blocks of all zeroes
+# verify-fail Verify 20 blocks of all zeroes. This should fail.
+#
+
+[global]
+filename=/dev/sdb
+buffer_pattern=0x01
+ioengine=sg
+rw=write
+bs=512
+stonewall
+
+[precon]
+number_ios=20
+
+[write_same_ndob]
+sg_write_mode=write_same_ndob
+number_ios=19
+
+[verify-pass]
+sg_write_mode=verify_bytchk_01
+buffer_pattern=0x00
+number_ios=19
+
+[verify-fail]
+sg_write_mode=verify_bytchk_01
+buffer_pattern=0x00
+number_ios=20
Limit on the number of simultaneously opened zones per single thread/process.
.TP
.BI ignore_zone_limits \fR=\fPbool
-If this isn't set, fio will query the max open zones limit from the zoned block
-device, and exit if the specified \fBmax_open_zones\fR value is larger than the
-limit reported by the device. Default: false.
+If this option is used, fio will ignore the maximum number of open zones limit
+of the zoned block device in use, thus allowing the option \fBmax_open_zones\fR
+value to be larger than the device reported limit. Default: false.
.TP
.BI zone_reset_threshold \fR=\fPfloat
A number between zero and one that indicates the ratio of logical blocks with
.TP
.BI fdatasync \fR=\fPint
Like \fBfsync\fR but uses \fBfdatasync\fR\|(2) to only sync data and
-not metadata blocks. In Windows, FreeBSD, DragonFlyBSD or OSX there is no
+not metadata blocks. In Windows, DragonFlyBSD or OSX there is no
\fBfdatasync\fR\|(2) so this falls back to using \fBfsync\fR\|(2).
Defaults to 0, which means fio does not periodically issue and wait for a
data-only sync to complete.
.TP
.BI filesize \fR=\fPirange(int)
Individual file sizes. May be a range, in which case fio will select sizes
-for files at random within the given range and limited to \fBsize\fR in
-total (if that is given). If not given, each created file is the same size.
-This option overrides \fBsize\fR in terms of file size, which means
-this value is used as a fixed size or possible range of each file.
+for files at random within the given range. If not given, each created file
+is the same size. This option overrides \fBsize\fR in terms of file size,
+i.e. \fBsize\fR becomes merely the default for \fBio_size\fR (and
+has no effect it all if \fBio_size\fR is set explicitly).
.TP
.BI file_append \fR=\fPbool
Perform I/O after the end of the file. Normally fio will operate within the
the percentage of IOs that must have a priority set depending on the block
size of the IO. This option is useful only when used together with the option
\fBbssplit\fR, that is, multiple different block sizes are used for reads and
-writes. The format for this option is the same as the format of the
-\fBbssplit\fR option, with the exception that values for trim IOs are
-ignored. This option is mutually exclusive with the \fBcmdprio_percentage\fR
-option.
+writes.
+.RS
+.P
+The first accepted format for this option is the same as the format of the
+\fBbssplit\fR option:
+.RS
+.P
+cmdprio_bssplit=blocksize/percentage:blocksize/percentage
+.RE
+.P
+In this case, each entry will use the priority class and priority level defined
+by the options \fBcmdprio_class\fR and \fBcmdprio\fR respectively.
+.P
+The second accepted format for this option is:
+.RS
+.P
+cmdprio_bssplit=blocksize/percentage/class/level:blocksize/percentage/class/level
+.RE
+.P
+In this case, the priority class and priority level is defined inside each
+entry. In comparison with the first accepted format, the second accepted format
+does not restrict all entries to have the same priority class and priority
+level.
+.P
+For both formats, only the read and write data directions are supported, values
+for trim IOs are ignored. This option is mutually exclusive with the
+\fBcmdprio_percentage\fR option.
+.RE
.TP
.BI (io_uring)fixedbufs
If fio is asked to do direct IO, then Linux will map pages for each IO call, and
unit access (fua) flag. Default: 0.
.TP
.BI (sg)sg_write_mode \fR=\fPstr
-Specify the type of write commands to issue. This option can take three
+Specify the type of write commands to issue. This option can take multiple
values:
.RS
.RS
.B write (default)
Write opcodes are issued as usual
.TP
+.B write_and_verify
+Issue WRITE AND VERIFY commands. The BYTCHK bit is set to 00b. This directs the
+device to carry out a medium verification with no data comparison for the data
+that was written. The writefua option is ignored with this selection.
+.TP
.B verify
-Issue WRITE AND VERIFY commands. The BYTCHK bit is set to 0. This
-directs the device to carry out a medium verification with no data
-comparison. The writefua option is ignored with this selection.
+This option is deprecated. Use write_and_verify instead.
.TP
-.B same
+.B write_same
Issue WRITE SAME commands. This transfers a single block to the device
and writes this same block of data to a contiguous sequence of LBAs
beginning at the specified offset. fio's block size parameter
generate 8k of data for each command butonly the first 512 bytes will
be used and transferred to the device. The writefua option is ignored
with this selection.
+.TP
+.B same
+This option is deprecated. Use write_same instead.
+.TP
+.B write_same_ndob
+Issue WRITE SAME(16) commands as above but with the No Data Output
+Buffer (NDOB) bit set. No data will be transferred to the device with
+this bit set. Data written will be a pre-determined pattern such as
+all zeroes.
+.TP
+.B write_stream
+Issue WRITE STREAM(16) commands. Use the stream_id option to specify
+the stream identifier.
+.TP
+.B verify_bytchk_00
+Issue VERIFY commands with BYTCHK set to 00. This directs the device to carry
+out a medium verification with no data comparison.
+.TP
+.B verify_bytchk_01
+Issue VERIFY commands with BYTCHK set to 01. This directs the device to
+compare the data on the device with the data transferred to the device.
+.TP
+.B verify_bytchk_11
+Issue VERIFY commands with BYTCHK set to 11. This transfers a single block to
+the device and compares the contents of this block with the data on the device
+beginning at the specified offset. fio's block size parameter specifies the
+total amount of data compared with this command. However, only one block
+(sector) worth of data is transferred to the device. This is similar to the
+WRITE SAME command except that data is compared instead of written.
.RE
.RE
.TP
+.BI (sg)stream_id \fR=\fPint
+Set the stream identifier for WRITE STREAM commands. If this is set to 0 (which is not
+a valid stream identifier) fio will open a stream and then close it when done. Default
+is 0.
+.TP
.BI (nbd)uri \fR=\fPstr
Specify the NBD URI of the server to test.
The string is a standard NBD URI (see
struct flist_head io_log_list;
FILE *io_log_rfile;
unsigned int io_log_blktrace;
+ unsigned int io_log_blktrace_swap;
+ unsigned long long io_log_blktrace_last_ttime;
unsigned int io_log_current;
unsigned int io_log_checkmark;
unsigned int io_log_highmark;
if (sum_stat_clients == 1)
return;
- sum_thread_stats(&client_ts, &p->ts, sum_stat_nr == 1);
+ sum_thread_stats(&client_ts, &p->ts);
sum_group_stats(&client_gs, &p->rs);
client_ts.members++;
#define GFIO_CLAT 1
#define GFIO_SLAT 2
#define GFIO_LAT 4
-#define GFIO_HILAT 8
-#define GFIO_LOLAT 16
static void gfio_show_ddir_status(struct gfio_client *gc, GtkWidget *mbox,
struct group_run_stats *rs,
struct thread_stat *ts, int ddir)
{
const char *ddir_label[3] = { "Read", "Write", "Trim" };
- const char *hilat, *lolat;
GtkWidget *frame, *label, *box, *vbox, *main_vbox;
- unsigned long long min[5], max[5];
+ unsigned long long min[3], max[3];
unsigned long runt;
unsigned long long bw, iops;
unsigned int flags = 0;
- double mean[5], dev[5];
+ double mean[3], dev[3];
char *io_p, *io_palt, *bw_p, *bw_palt, *iops_p;
char tmp[128];
int i2p;
flags |= GFIO_CLAT;
if (calc_lat(&ts->lat_stat[ddir], &min[2], &max[2], &mean[2], &dev[2]))
flags |= GFIO_LAT;
- if (calc_lat(&ts->clat_high_prio_stat[ddir], &min[3], &max[3], &mean[3], &dev[3])) {
- flags |= GFIO_HILAT;
- if (calc_lat(&ts->clat_low_prio_stat[ddir], &min[4], &max[4], &mean[4], &dev[4]))
- flags |= GFIO_LOLAT;
- /* we only want to print low priority statistics if other IOs were
- * submitted with the priority bit set
- */
- }
if (flags) {
frame = gtk_frame_new("Latency");
vbox = gtk_vbox_new(FALSE, 3);
gtk_container_add(GTK_CONTAINER(frame), vbox);
- if (ts->lat_percentiles) {
- hilat = "High priority total latency";
- lolat = "Low priority total latency";
- } else {
- hilat = "High priority completion latency";
- lolat = "Low priority completion latency";
- }
-
if (flags & GFIO_SLAT)
gfio_show_lat(vbox, "Submission latency", min[0], max[0], mean[0], dev[0]);
if (flags & GFIO_CLAT)
gfio_show_lat(vbox, "Completion latency", min[1], max[1], mean[1], dev[1]);
if (flags & GFIO_LAT)
gfio_show_lat(vbox, "Total latency", min[2], max[2], mean[2], dev[2]);
- if (flags & GFIO_HILAT)
- gfio_show_lat(vbox, hilat, min[3], max[3], mean[3], dev[3]);
- if (flags & GFIO_LOLAT)
- gfio_show_lat(vbox, lolat, min[4], max[4], mean[4], dev[4]);
}
if (ts->slat_percentiles && flags & GFIO_SLAT)
ts->io_u_plat[FIO_SLAT][ddir],
ts->slat_stat[ddir].samples,
"Submission");
- if (ts->clat_percentiles && flags & GFIO_CLAT) {
+ if (ts->clat_percentiles && flags & GFIO_CLAT)
gfio_show_clat_percentiles(gc, main_vbox, ts, ddir,
ts->io_u_plat[FIO_CLAT][ddir],
ts->clat_stat[ddir].samples,
"Completion");
- if (!ts->lat_percentiles) {
- if (flags & GFIO_HILAT)
- gfio_show_clat_percentiles(gc, main_vbox, ts, ddir,
- ts->io_u_plat_high_prio[ddir],
- ts->clat_high_prio_stat[ddir].samples,
- "High priority completion");
- if (flags & GFIO_LOLAT)
- gfio_show_clat_percentiles(gc, main_vbox, ts, ddir,
- ts->io_u_plat_low_prio[ddir],
- ts->clat_low_prio_stat[ddir].samples,
- "Low priority completion");
- }
- }
- if (ts->lat_percentiles && flags & GFIO_LAT) {
+ if (ts->lat_percentiles && flags & GFIO_LAT)
gfio_show_clat_percentiles(gc, main_vbox, ts, ddir,
ts->io_u_plat[FIO_LAT][ddir],
ts->lat_stat[ddir].samples,
"Total");
- if (flags & GFIO_HILAT)
- gfio_show_clat_percentiles(gc, main_vbox, ts, ddir,
- ts->io_u_plat_high_prio[ddir],
- ts->clat_high_prio_stat[ddir].samples,
- "High priority total");
- if (flags & GFIO_LOLAT)
- gfio_show_clat_percentiles(gc, main_vbox, ts, ddir,
- ts->io_u_plat_low_prio[ddir],
- ts->clat_low_prio_stat[ddir].samples,
- "Low priority total");
- }
free(io_p);
free(bw_p);
.has_arg = optional_argument,
.val = 'S',
},
+#ifdef WIN32
+ {
+ .name = (char *) "server-internal",
+ .has_arg = required_argument,
+ .val = 'N',
+ },
+#endif
{ .name = (char *) "daemonize",
.has_arg = required_argument,
.val = 'D',
return true;
}
+static int verify_per_group_options(struct thread_data *td, const char *jobname)
+{
+ struct thread_data *td2;
+ int i;
+
+ for_each_td(td2, i) {
+ if (td->groupid != td2->groupid)
+ continue;
+
+ if (td->o.stats &&
+ td->o.lat_percentiles != td2->o.lat_percentiles) {
+ log_err("fio: lat_percentiles in job: %s differs from group\n",
+ jobname);
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
/*
* Treat an empty log file name the same as a one not given
*/
td->groupid = groupid;
prev_group_jobs++;
+ if (td->o.group_reporting && prev_group_jobs > 1 &&
+ verify_per_group_options(td, jobname))
+ goto err;
+
if (setup_rate(td))
goto err;
else
suf = "log";
- gen_log_name(logname, sizeof(logname), "lat", pre,
- td->thread_number, suf, o->per_job_logs);
- setup_log(&td->lat_log, &p, logname);
+ if (!o->disable_lat) {
+ gen_log_name(logname, sizeof(logname), "lat", pre,
+ td->thread_number, suf, o->per_job_logs);
+ setup_log(&td->lat_log, &p, logname);
+ }
- gen_log_name(logname, sizeof(logname), "slat", pre,
- td->thread_number, suf, o->per_job_logs);
- setup_log(&td->slat_log, &p, logname);
+ if (!o->disable_slat) {
+ gen_log_name(logname, sizeof(logname), "slat", pre,
+ td->thread_number, suf, o->per_job_logs);
+ setup_log(&td->slat_log, &p, logname);
+ }
- gen_log_name(logname, sizeof(logname), "clat", pre,
- td->thread_number, suf, o->per_job_logs);
- setup_log(&td->clat_log, &p, logname);
+ if (!o->disable_clat) {
+ gen_log_name(logname, sizeof(logname), "clat", pre,
+ td->thread_number, suf, o->per_job_logs);
+ setup_log(&td->clat_log, &p, logname);
+ }
}
exit_val = 1;
#endif
break;
+#ifdef WIN32
+ case 'N':
+ did_arg = true;
+ fio_server_internal_set(optarg);
+ break;
+#endif
case 'D':
if (pid_file)
free(pid_file);
assert(io_u->flags & IO_U_F_FREE);
io_u_clear(td, io_u, IO_U_F_FREE | IO_U_F_NO_FILE_PUT |
IO_U_F_TRIMMED | IO_U_F_BARRIER |
- IO_U_F_VER_LIST | IO_U_F_HIGH_PRIO);
+ IO_U_F_VER_LIST);
io_u->error = 0;
io_u->acct_ddir = -1;
* Remember the issuing context priority. The IO engine may change this.
*/
io_u->ioprio = td->ioprio;
+ io_u->clat_prio_index = 0;
out:
assert(io_u->file);
if (!td_io_prep(td, io_u)) {
tnsec = ntime_since(&io_u->start_time, &icd->time);
add_lat_sample(td, idx, tnsec, bytes, io_u->offset,
- io_u->ioprio, io_u_is_high_prio(io_u));
+ io_u->ioprio, io_u->clat_prio_index);
if (td->flags & TD_F_PROFILE_OPS) {
struct prof_io_ops *ops = &td->prof_io_ops;
if (ddir_rw(idx)) {
if (!td->o.disable_clat) {
add_clat_sample(td, idx, llnsec, bytes, io_u->offset,
- io_u->ioprio, io_u_is_high_prio(io_u));
+ io_u->ioprio, io_u->clat_prio_index);
io_u_mark_latency(td, llnsec);
}
IO_U_F_TRIMMED = 1 << 5,
IO_U_F_BARRIER = 1 << 6,
IO_U_F_VER_LIST = 1 << 7,
- IO_U_F_HIGH_PRIO = 1 << 8,
};
/*
* IO priority.
*/
unsigned short ioprio;
+ unsigned short clat_prio_index;
/*
* Allocated/set buffer and length
td_flags_clear((td), &(io_u->flags), (val))
#define io_u_set(td, io_u, val) \
td_flags_set((td), &(io_u)->flags, (val))
-#define io_u_is_high_prio(io_u) (io_u->flags & IO_U_F_HIGH_PRIO)
#endif
while (!flist_empty(&td->io_log_list)) {
int ret;
- if (!td->io_log_blktrace && td->o.read_iolog_chunked) {
+ if (td->o.read_iolog_chunked) {
if (td->io_log_checkmark == td->io_log_current) {
- if (!read_iolog2(td))
- return 1;
+ if (td->io_log_blktrace) {
+ if (!read_blktrace(td))
+ return 1;
+ } else {
+ if (!read_iolog2(td))
+ return 1;
+ }
}
td->io_log_current--;
}
td->iolog_buf = NULL;
}
-static int64_t iolog_items_to_fetch(struct thread_data *td)
+int64_t iolog_items_to_fetch(struct thread_data *td)
{
struct timespec now;
uint64_t elapsed;
} else
f = fopen(fname, "r");
- free(fname);
-
if (!f) {
perror("fopen read iolog");
return false;
*/
if (is_blktrace(fname, &need_swap)) {
td->io_log_blktrace = 1;
- ret = load_blktrace(td, fname, need_swap);
+ ret = init_blktrace_read(td, fname, need_swap);
} else {
td->io_log_blktrace = 0;
ret = init_iolog_read(td, fname);
}
+ free(fname);
} else if (td->o.write_iolog_file)
ret = init_iolog_write(td);
else
extern void queue_io_piece(struct thread_data *, struct io_piece *);
extern void prune_io_piece_log(struct thread_data *);
extern void write_iolog_close(struct thread_data *);
+int64_t iolog_items_to_fetch(struct thread_data *td);
extern int iolog_compress_init(struct thread_data *, struct sk_out *);
extern void iolog_compress_exit(struct thread_data *);
extern size_t log_chunk_sizes(struct io_log *);
__FIO_OPT_G_LIBCUFILE,
__FIO_OPT_G_DFS,
__FIO_OPT_G_NFS,
+ __FIO_OPT_G_WINDOWSAIO,
FIO_OPT_G_RATE = (1ULL << __FIO_OPT_G_RATE),
FIO_OPT_G_ZONE = (1ULL << __FIO_OPT_G_ZONE),
FIO_OPT_G_FILESTAT = (1ULL << __FIO_OPT_G_FILESTAT),
FIO_OPT_G_LIBCUFILE = (1ULL << __FIO_OPT_G_LIBCUFILE),
FIO_OPT_G_DFS = (1ULL << __FIO_OPT_G_DFS),
+ FIO_OPT_G_WINDOWSAIO = (1ULL << __FIO_OPT_G_WINDOWSAIO),
};
extern const struct opt_group *opt_group_from_mask(uint64_t *mask);
return ret;
}
+static int parse_cmdprio_bssplit_entry(struct thread_options *o,
+ struct split_prio *entry, char *str)
+{
+ int matches = 0;
+ char *bs_str = NULL;
+ long long bs_val;
+ unsigned int perc = 0, class, level;
+
+ /*
+ * valid entry formats:
+ * bs/ - %s/ - set perc to 0, prio to -1.
+ * bs/perc - %s/%u - set prio to -1.
+ * bs/perc/class/level - %s/%u/%u/%u
+ */
+ matches = sscanf(str, "%m[^/]/%u/%u/%u", &bs_str, &perc, &class, &level);
+ if (matches < 1) {
+ log_err("fio: invalid cmdprio_bssplit format\n");
+ return 1;
+ }
+
+ if (str_to_decimal(bs_str, &bs_val, 1, o, 0, 0)) {
+ log_err("fio: split conversion failed\n");
+ free(bs_str);
+ return 1;
+ }
+ free(bs_str);
+
+ entry->bs = bs_val;
+ entry->perc = min(perc, 100u);
+ entry->prio = -1;
+ switch (matches) {
+ case 1: /* bs/ case */
+ case 2: /* bs/perc case */
+ break;
+ case 4: /* bs/perc/class/level case */
+ class = min(class, (unsigned int) IOPRIO_MAX_PRIO_CLASS);
+ level = min(level, (unsigned int) IOPRIO_MAX_PRIO);
+ entry->prio = ioprio_value(class, level);
+ break;
+ default:
+ log_err("fio: invalid cmdprio_bssplit format\n");
+ return 1;
+ }
+
+ return 0;
+}
+
+/*
+ * Returns a negative integer if the first argument should be before the second
+ * argument in the sorted list. A positive integer if the first argument should
+ * be after the second argument in the sorted list. A zero if they are equal.
+ */
+static int fio_split_prio_cmp(const void *p1, const void *p2)
+{
+ const struct split_prio *tmp1 = p1;
+ const struct split_prio *tmp2 = p2;
+
+ if (tmp1->bs > tmp2->bs)
+ return 1;
+ if (tmp1->bs < tmp2->bs)
+ return -1;
+ return 0;
+}
+
+int split_parse_prio_ddir(struct thread_options *o, struct split_prio **entries,
+ int *nr_entries, char *str)
+{
+ struct split_prio *tmp_entries;
+ unsigned int nr_bssplits;
+ char *str_cpy, *p, *fname;
+
+ /* strsep modifies the string, dup it so that we can use strsep twice */
+ p = str_cpy = strdup(str);
+ if (!p)
+ return 1;
+
+ nr_bssplits = 0;
+ while ((fname = strsep(&str_cpy, ":")) != NULL) {
+ if (!strlen(fname))
+ break;
+ nr_bssplits++;
+ }
+ free(p);
+
+ if (nr_bssplits > BSSPLIT_MAX) {
+ log_err("fio: too many cmdprio_bssplit entries\n");
+ return 1;
+ }
+
+ tmp_entries = calloc(nr_bssplits, sizeof(*tmp_entries));
+ if (!tmp_entries)
+ return 1;
+
+ nr_bssplits = 0;
+ while ((fname = strsep(&str, ":")) != NULL) {
+ struct split_prio *entry;
+
+ if (!strlen(fname))
+ break;
+
+ entry = &tmp_entries[nr_bssplits];
+
+ if (parse_cmdprio_bssplit_entry(o, entry, fname)) {
+ log_err("fio: failed to parse cmdprio_bssplit entry\n");
+ free(tmp_entries);
+ return 1;
+ }
+
+ /* skip zero perc entries, they provide no useful information */
+ if (entry->perc)
+ nr_bssplits++;
+ }
+
+ qsort(tmp_entries, nr_bssplits, sizeof(*tmp_entries),
+ fio_split_prio_cmp);
+
+ *entries = tmp_entries;
+ *nr_entries = nr_bssplits;
+
+ return 0;
+}
+
static int str2error(char *str)
{
const char *err[] = { "EPERM", "ENOENT", "ESRCH", "EINTR", "EIO",
ssize_t pread(int fildes, void *buf, size_t nbyte, off_t offset);
ssize_t pwrite(int fildes, const void *buf, size_t nbyte,
off_t offset);
+HANDLE windows_handle_connection(HANDLE hjob, int sk);
+HANDLE windows_create_job(void);
static inline int blockdev_size(struct fio_file *f, unsigned long long *bytes)
{
#ifndef FIO_HAVE_IOPRIO_CLASS
#define ioprio_value_is_class_rt(prio) (false)
+#define IOPRIO_MIN_PRIO_CLASS 0
+#define IOPRIO_MAX_PRIO_CLASS 0
#endif
#ifndef FIO_HAVE_IOPRIO
#define ioprio_value(prioclass, prio) (0)
#define ioprio_set(which, who, prioclass, prio) (0)
+#define IOPRIO_MIN_PRIO 0
+#define IOPRIO_MAX_PRIO 0
#endif
#ifndef FIO_HAVE_ODIRECT
hbo = ((nbo & 0xFF) << 24) + ((nbo & 0xFF00) << 8) + ((nbo & 0xFF0000) >> 8) + ((nbo & 0xFF000000) >> 24);
return hbo;
}
+
+static HANDLE create_named_pipe(char *pipe_name, int wait_connect_time)
+{
+ HANDLE hpipe;
+
+ hpipe = CreateNamedPipe (
+ pipe_name,
+ PIPE_ACCESS_DUPLEX,
+ PIPE_WAIT | PIPE_TYPE_BYTE,
+ 1, 0, 0, wait_connect_time, NULL);
+
+ if (hpipe == INVALID_HANDLE_VALUE) {
+ log_err("ConnectNamedPipe failed (%lu).\n", GetLastError());
+ return INVALID_HANDLE_VALUE;
+ }
+
+ if (!ConnectNamedPipe(hpipe, NULL)) {
+ log_err("ConnectNamedPipe failed (%lu).\n", GetLastError());
+ CloseHandle(hpipe);
+ return INVALID_HANDLE_VALUE;
+ }
+
+ return hpipe;
+}
+
+static BOOL windows_create_process(PROCESS_INFORMATION *pi, const char *args, HANDLE *hjob)
+{
+ LPSTR this_cmd_line = GetCommandLine();
+ LPSTR new_process_cmd_line = malloc((strlen(this_cmd_line)+strlen(args)) * sizeof(char *));
+ STARTUPINFO si = {0};
+ DWORD flags = 0;
+
+ strcpy(new_process_cmd_line, this_cmd_line);
+ strcat(new_process_cmd_line, args);
+
+ si.cb = sizeof(si);
+ memset(pi, 0, sizeof(*pi));
+
+ if ((hjob != NULL) && (*hjob != INVALID_HANDLE_VALUE))
+ flags = CREATE_SUSPENDED | CREATE_BREAKAWAY_FROM_JOB;
+
+ flags |= CREATE_NEW_CONSOLE;
+
+ if( !CreateProcess( NULL,
+ new_process_cmd_line,
+ NULL, /* Process handle not inherited */
+ NULL, /* Thread handle not inherited */
+ TRUE, /* no handle inheritance */
+ flags,
+ NULL, /* Use parent's environment block */
+ NULL, /* Use parent's starting directory */
+ &si,
+ pi )
+ )
+ {
+ log_err("CreateProcess failed (%lu).\n", GetLastError() );
+ free(new_process_cmd_line);
+ return 1;
+ }
+ if ((hjob != NULL) && (*hjob != INVALID_HANDLE_VALUE)) {
+ BOOL ret = AssignProcessToJobObject(*hjob, pi->hProcess);
+ if (!ret) {
+ log_err("AssignProcessToJobObject failed (%lu).\n", GetLastError() );
+ return 1;
+ }
+
+ ResumeThread(pi->hThread);
+ }
+
+ free(new_process_cmd_line);
+ return 0;
+}
+
+HANDLE windows_create_job(void)
+{
+ JOBOBJECT_EXTENDED_LIMIT_INFORMATION jeli = { 0 };
+ BOOL success;
+ HANDLE hjob = CreateJobObject(NULL, NULL);
+
+ jeli.BasicLimitInformation.LimitFlags = JOB_OBJECT_LIMIT_KILL_ON_JOB_CLOSE;
+ success = SetInformationJobObject(hjob, JobObjectExtendedLimitInformation, &jeli, sizeof(jeli));
+ if ( success == 0 ) {
+ log_err( "SetInformationJobObject failed: error %lu\n", GetLastError() );
+ return INVALID_HANDLE_VALUE;
+ }
+ return hjob;
+}
+
+/* wait for a child process to either exit or connect to a child */
+static bool monitor_process_till_connect(PROCESS_INFORMATION *pi, HANDLE *hpipe)
+{
+ bool connected = FALSE;
+ bool process_alive = TRUE;
+ char buffer[32] = {0};
+ DWORD bytes_read;
+
+ do {
+ DWORD exit_code;
+ GetExitCodeProcess(pi->hProcess, &exit_code);
+ if (exit_code != STILL_ACTIVE) {
+ dprint(FD_PROCESS, "process %u exited %d\n", GetProcessId(pi->hProcess), exit_code);
+ break;
+ }
+
+ memset(buffer, 0, sizeof(buffer));
+ ReadFile(*hpipe, &buffer, sizeof(buffer) - 1, &bytes_read, NULL);
+ if (bytes_read && strstr(buffer, "connected")) {
+ dprint(FD_PROCESS, "process %u connected to client\n", GetProcessId(pi->hProcess));
+ connected = TRUE;
+ }
+ usleep(10*1000);
+ } while (process_alive && !connected);
+ return connected;
+}
+
+/*create a process with --server-internal to emulate fork() */
+HANDLE windows_handle_connection(HANDLE hjob, int sk)
+{
+ char pipe_name[64] = "\\\\.\\pipe\\fiointernal-";
+ char args[128] = " --server-internal=";
+ PROCESS_INFORMATION pi;
+ HANDLE hpipe = INVALID_HANDLE_VALUE;
+ WSAPROTOCOL_INFO protocol_info;
+ HANDLE ret;
+
+ sprintf(pipe_name+strlen(pipe_name), "%d", GetCurrentProcessId());
+ sprintf(args+strlen(args), "%s", pipe_name);
+
+ if (windows_create_process(&pi, args, &hjob) != 0)
+ return INVALID_HANDLE_VALUE;
+ else
+ ret = pi.hProcess;
+
+ /* duplicate socket and write the protocol_info to pipe so child can
+ * duplicate the communciation socket */
+ if (WSADuplicateSocket(sk, GetProcessId(pi.hProcess), &protocol_info)) {
+ log_err("WSADuplicateSocket failed (%lu).\n", GetLastError());
+ ret = INVALID_HANDLE_VALUE;
+ goto cleanup;
+ }
+
+ /* make a pipe with a unique name based upon processid */
+ hpipe = create_named_pipe(pipe_name, 1000);
+ if (hpipe == INVALID_HANDLE_VALUE) {
+ ret = INVALID_HANDLE_VALUE;
+ goto cleanup;
+ }
+
+ if (!WriteFile(hpipe, &protocol_info, sizeof(protocol_info), NULL, NULL)) {
+ log_err("WriteFile failed (%lu).\n", GetLastError());
+ ret = INVALID_HANDLE_VALUE;
+ goto cleanup;
+ }
+
+ dprint(FD_PROCESS, "process %d created child process %u\n", GetCurrentProcessId(), GetProcessId(pi.hProcess));
+
+ /* monitor the process until it either exits or connects. This level
+ * doesnt care which of those occurs because the result is that it
+ * needs to loop around and create another child process to monitor */
+ if (!monitor_process_till_connect(&pi, &hpipe))
+ ret = INVALID_HANDLE_VALUE;
+
+cleanup:
+ /* close the handles and pipes because this thread is done monitoring them */
+ if (ret == INVALID_HANDLE_VALUE)
+ CloseHandle(pi.hProcess);
+ CloseHandle(pi.hThread);
+ DisconnectNamedPipe(hpipe);
+ CloseHandle(hpipe);
+ return ret;
+}
\ No newline at end of file
int found = 0;
DIR *D;
+ /*
+ * If replay_redirect is set then always return this device
+ * upon lookup which overrides the device lookup based on
+ * major minor in the actual blktrace
+ */
+ if (redirect) {
+ strcpy(path, redirect);
+ return 1;
+ }
+
D = opendir(path);
if (!D)
return 0;
if (!S_ISBLK(st.st_mode))
continue;
- /*
- * If replay_redirect is set then always return this device
- * upon lookup which overrides the device lookup based on
- * major minor in the actual blktrace
- */
- if (redirect) {
- strcpy(path, redirect);
- found = 1;
- break;
- }
-
if (maj == major(st.st_rdev) && min == minor(st.st_rdev)) {
strcpy(path, full_path);
found = 1;
struct thread_data *td = sw->priv;
(*sum_cnt)++;
- sum_thread_stats(&sw->wq->td->ts, &td->ts, *sum_cnt == 1);
+
+ /*
+ * io_workqueue_update_acct_fn() doesn't support per prio stats, and
+ * even if it did, offload can't be used with all async IO engines.
+ * If group reporting is set in the parent td, the group result
+ * generated by __show_run_stats() can still contain multiple prios
+ * from different offloaded jobs.
+ */
+ sw->wq->td->ts.disable_prio_stat = 1;
+ sum_thread_stats(&sw->wq->td->ts, &td->ts);
fio_options_free(td);
close_and_free_files(td);
static pthread_key_t sk_out_key;
+#ifdef WIN32
+static char *fio_server_pipe_name = NULL;
+static HANDLE hjob = INVALID_HANDLE_VALUE;
+struct ffi_element {
+ union {
+ pthread_t thread;
+ HANDLE hProcess;
+ };
+ bool is_thread;
+};
+#endif
+
struct fio_fork_item {
struct flist_head list;
int exitval;
int signal;
int exited;
+#ifdef WIN32
+ struct ffi_element element;
+#else
pid_t pid;
+#endif
};
struct cmd_reply {
return fio_sendv_data(sk, &iov, 1);
}
+bool fio_server_poll_fd(int fd, short events, int timeout)
+{
+ struct pollfd pfd = {
+ .fd = fd,
+ .events = events,
+ };
+ int ret;
+
+ ret = poll(&pfd, 1, timeout);
+ if (ret < 0) {
+ if (errno == EINTR)
+ return false;
+ log_err("fio: poll: %s\n", strerror(errno));
+ return false;
+ } else if (!ret) {
+ return false;
+ }
+ if (pfd.revents & events)
+ return true;
+ return false;
+}
+
static int fio_recv_data(int sk, void *buf, unsigned int len, bool wait)
{
int flags;
return fio_net_send_ack(NULL, error, signal);
}
+#ifdef WIN32
+static void fio_server_add_fork_item(struct ffi_element *element, struct flist_head *list)
+{
+ struct fio_fork_item *ffi;
+
+ ffi = malloc(sizeof(*ffi));
+ ffi->exitval = 0;
+ ffi->signal = 0;
+ ffi->exited = 0;
+ ffi->element = *element;
+ flist_add_tail(&ffi->list, list);
+}
+
+static void fio_server_add_conn_pid(struct flist_head *conn_list, HANDLE hProcess)
+{
+ struct ffi_element element = {.hProcess = hProcess, .is_thread=FALSE};
+ dprint(FD_NET, "server: forked off connection job (tid=%u)\n", (int) element.thread);
+
+ fio_server_add_fork_item(&element, conn_list);
+}
+
+static void fio_server_add_job_pid(struct flist_head *job_list, pthread_t thread)
+{
+ struct ffi_element element = {.thread = thread, .is_thread=TRUE};
+ dprint(FD_NET, "server: forked off job job (tid=%u)\n", (int) element.thread);
+ fio_server_add_fork_item(&element, job_list);
+}
+
+static void fio_server_check_fork_item(struct fio_fork_item *ffi)
+{
+ int ret;
+
+ if (ffi->element.is_thread) {
+
+ ret = pthread_kill(ffi->element.thread, 0);
+ if (ret) {
+ int rev_val;
+ pthread_join(ffi->element.thread, (void**) &rev_val); /*if the thread is dead, then join it to get status*/
+
+ ffi->exitval = rev_val;
+ if (ffi->exitval)
+ log_err("thread (tid=%u) exited with %x\n", (int) ffi->element.thread, (int) ffi->exitval);
+ dprint(FD_PROCESS, "thread (tid=%u) exited with %x\n", (int) ffi->element.thread, (int) ffi->exitval);
+ ffi->exited = 1;
+ }
+ } else {
+ DWORD exit_val;
+ GetExitCodeProcess(ffi->element.hProcess, &exit_val);
+
+ if (exit_val != STILL_ACTIVE) {
+ dprint(FD_PROCESS, "process %u exited with %d\n", GetProcessId(ffi->element.hProcess), exit_val);
+ ffi->exited = 1;
+ ffi->exitval = exit_val;
+ }
+ }
+}
+#else
static void fio_server_add_fork_item(pid_t pid, struct flist_head *list)
{
struct fio_fork_item *ffi;
}
}
}
+#endif
static void fio_server_fork_item_done(struct fio_fork_item *ffi, bool stop)
{
+#ifdef WIN32
+ if (ffi->element.is_thread)
+ dprint(FD_NET, "tid %u exited, sig=%u, exitval=%d\n", (int) ffi->element.thread, ffi->signal, ffi->exitval);
+ else {
+ dprint(FD_NET, "pid %u exited, sig=%u, exitval=%d\n", (int) GetProcessId(ffi->element.hProcess), ffi->signal, ffi->exitval);
+ CloseHandle(ffi->element.hProcess);
+ ffi->element.hProcess = INVALID_HANDLE_VALUE;
+ }
+#else
dprint(FD_NET, "pid %u exited, sig=%u, exitval=%d\n", (int) ffi->pid, ffi->signal, ffi->exitval);
+#endif
/*
* Fold STOP and QUIT...
return 0;
}
-static int handle_run_cmd(struct sk_out *sk_out, struct flist_head *job_list,
- struct fio_net_cmd *cmd)
+#ifdef WIN32
+static void *fio_backend_thread(void *data)
{
- pid_t pid;
int ret;
+ struct sk_out *sk_out = (struct sk_out *) data;
sk_out_assign(sk_out);
+ ret = fio_backend(sk_out);
+ sk_out_drop();
+
+ pthread_exit((void*) (intptr_t) ret);
+ return NULL;
+}
+#endif
+
+static int handle_run_cmd(struct sk_out *sk_out, struct flist_head *job_list,
+ struct fio_net_cmd *cmd)
+{
+ int ret;
+
fio_time_init();
set_genesis_time();
- pid = fork();
- if (pid) {
- fio_server_add_job_pid(job_list, pid);
- return 0;
+#ifdef WIN32
+ {
+ pthread_t thread;
+ /* both this thread and backend_thread call sk_out_assign() to double increment
+ * the ref count. This ensures struct is valid until both threads are done with it
+ */
+ sk_out_assign(sk_out);
+ ret = pthread_create(&thread, NULL, fio_backend_thread, sk_out);
+ if (ret) {
+ log_err("pthread_create: %s\n", strerror(ret));
+ return ret;
+ }
+
+ fio_server_add_job_pid(job_list, thread);
+ return ret;
}
+#else
+ {
+ pid_t pid;
+ sk_out_assign(sk_out);
+ pid = fork();
+ if (pid) {
+ fio_server_add_job_pid(job_list, pid);
+ return 0;
+ }
- ret = fio_backend(sk_out);
- free_threads_shm();
- sk_out_drop();
- _exit(ret);
+ ret = fio_backend(sk_out);
+ free_threads_shm();
+ sk_out_drop();
+ _exit(ret);
+ }
+#endif
}
static int handle_job_cmd(struct fio_net_cmd *cmd)
if (ret < 0)
break;
- cmd = fio_net_recv_cmd(sk_out->sk, true);
+ if (pfd.revents & POLLIN)
+ cmd = fio_net_recv_cmd(sk_out->sk, true);
if (!cmd) {
ret = -1;
break;
return 0;
}
+#ifdef WIN32
+static int handle_connection_process(void)
+{
+ WSAPROTOCOL_INFO protocol_info;
+ DWORD bytes_read;
+ HANDLE hpipe;
+ int sk;
+ struct sk_out *sk_out;
+ int ret;
+ char *msg = (char *) "connected";
+
+ log_info("server enter accept loop. ProcessID %d\n", GetCurrentProcessId());
+
+ hpipe = CreateFile(
+ fio_server_pipe_name,
+ GENERIC_READ | GENERIC_WRITE,
+ 0, NULL,
+ OPEN_EXISTING,
+ 0, NULL);
+
+ if (hpipe == INVALID_HANDLE_VALUE) {
+ log_err("couldnt open pipe %s error %lu\n",
+ fio_server_pipe_name, GetLastError());
+ return -1;
+ }
+
+ if (!ReadFile(hpipe, &protocol_info, sizeof(protocol_info), &bytes_read, NULL)) {
+ log_err("couldnt read pi from pipe %s error %lu\n", fio_server_pipe_name,
+ GetLastError());
+ }
+
+ if (use_ipv6) /* use protocol_info to create a duplicate of parents socket */
+ sk = WSASocket(AF_INET6, SOCK_STREAM, 0, &protocol_info, 0, 0);
+ else
+ sk = WSASocket(AF_INET, SOCK_STREAM, 0, &protocol_info, 0, 0);
+
+ sk_out = scalloc(1, sizeof(*sk_out));
+ if (!sk_out) {
+ CloseHandle(hpipe);
+ close(sk);
+ return -1;
+ }
+
+ sk_out->sk = sk;
+ sk_out->hProcess = INVALID_HANDLE_VALUE;
+ INIT_FLIST_HEAD(&sk_out->list);
+ __fio_sem_init(&sk_out->lock, FIO_SEM_UNLOCKED);
+ __fio_sem_init(&sk_out->wait, FIO_SEM_LOCKED);
+ __fio_sem_init(&sk_out->xmit, FIO_SEM_UNLOCKED);
+
+ get_my_addr_str(sk);
+
+ if (!WriteFile(hpipe, msg, strlen(msg), NULL, NULL)) {
+ log_err("couldnt write pipe\n");
+ close(sk);
+ return -1;
+ }
+ CloseHandle(hpipe);
+
+ sk_out_assign(sk_out);
+
+ ret = handle_connection(sk_out);
+ __sk_out_drop(sk_out);
+ return ret;
+}
+#endif
+
static int accept_loop(int listen_sk)
{
struct sockaddr_in addr;
struct sk_out *sk_out;
const char *from;
char buf[64];
+#ifdef WIN32
+ HANDLE hProcess;
+#else
pid_t pid;
-
+#endif
pfd.fd = listen_sk;
pfd.events = POLLIN;
do {
__fio_sem_init(&sk_out->wait, FIO_SEM_LOCKED);
__fio_sem_init(&sk_out->xmit, FIO_SEM_UNLOCKED);
+#ifdef WIN32
+ hProcess = windows_handle_connection(hjob, sk);
+ if (hProcess == INVALID_HANDLE_VALUE)
+ return -1;
+ sk_out->hProcess = hProcess;
+ fio_server_add_conn_pid(&conn_list, hProcess);
+#else
pid = fork();
if (pid) {
close(sk);
*/
sk_out_assign(sk_out);
handle_connection(sk_out);
+#endif
}
return exitval;
{
struct cmd_ts_pdu p;
int i, j, k;
- void *ss_buf;
- uint64_t *ss_iops, *ss_bw;
+ size_t clat_prio_stats_extra_size = 0;
+ size_t ss_extra_size = 0;
+ size_t extended_buf_size = 0;
+ void *extended_buf;
+ void *extended_buf_wp;
dprint(FD_NET, "server sending end stats\n");
p.ts.pid = cpu_to_le32(ts->pid);
p.ts.members = cpu_to_le32(ts->members);
p.ts.unified_rw_rep = cpu_to_le32(ts->unified_rw_rep);
+ p.ts.ioprio = cpu_to_le32(ts->ioprio);
+ p.ts.disable_prio_stat = cpu_to_le32(ts->disable_prio_stat);
for (i = 0; i < DDIR_RWDIR_CNT; i++) {
convert_io_stat(&p.ts.clat_stat[i], &ts->clat_stat[i]);
p.ts.cachehit = cpu_to_le64(ts->cachehit);
p.ts.cachemiss = cpu_to_le64(ts->cachemiss);
+ convert_gs(&p.rs, rs);
+
for (i = 0; i < DDIR_RWDIR_CNT; i++) {
- for (j = 0; j < FIO_IO_U_PLAT_NR; j++) {
- p.ts.io_u_plat_high_prio[i][j] = cpu_to_le64(ts->io_u_plat_high_prio[i][j]);
- p.ts.io_u_plat_low_prio[i][j] = cpu_to_le64(ts->io_u_plat_low_prio[i][j]);
+ if (ts->nr_clat_prio[i])
+ clat_prio_stats_extra_size += ts->nr_clat_prio[i] * sizeof(*ts->clat_prio[i]);
+ }
+ extended_buf_size += clat_prio_stats_extra_size;
+
+ dprint(FD_NET, "ts->ss_state = %d\n", ts->ss_state);
+ if (ts->ss_state & FIO_SS_DATA)
+ ss_extra_size = 2 * ts->ss_dur * sizeof(uint64_t);
+
+ extended_buf_size += ss_extra_size;
+ if (!extended_buf_size) {
+ fio_net_queue_cmd(FIO_NET_CMD_TS, &p, sizeof(p), NULL, SK_F_COPY);
+ return;
+ }
+
+ extended_buf_size += sizeof(p);
+ extended_buf = calloc(1, extended_buf_size);
+ if (!extended_buf) {
+ log_err("fio: failed to allocate FIO_NET_CMD_TS buffer\n");
+ return;
+ }
+
+ memcpy(extended_buf, &p, sizeof(p));
+ extended_buf_wp = (struct cmd_ts_pdu *)extended_buf + 1;
+
+ if (clat_prio_stats_extra_size) {
+ for (i = 0; i < DDIR_RWDIR_CNT; i++) {
+ struct clat_prio_stat *prio = (struct clat_prio_stat *) extended_buf_wp;
+
+ for (j = 0; j < ts->nr_clat_prio[i]; j++) {
+ for (k = 0; k < FIO_IO_U_PLAT_NR; k++)
+ prio->io_u_plat[k] =
+ cpu_to_le64(ts->clat_prio[i][j].io_u_plat[k]);
+ convert_io_stat(&prio->clat_stat,
+ &ts->clat_prio[i][j].clat_stat);
+ prio->ioprio = cpu_to_le32(ts->clat_prio[i][j].ioprio);
+ prio++;
+ }
+
+ if (ts->nr_clat_prio[i]) {
+ uint64_t offset = (char *)extended_buf_wp - (char *)extended_buf;
+ struct cmd_ts_pdu *ptr = extended_buf;
+
+ ptr->ts.clat_prio_offset[i] = cpu_to_le64(offset);
+ ptr->ts.nr_clat_prio[i] = cpu_to_le32(ts->nr_clat_prio[i]);
+ }
+
+ extended_buf_wp = prio;
}
- convert_io_stat(&p.ts.clat_high_prio_stat[i], &ts->clat_high_prio_stat[i]);
- convert_io_stat(&p.ts.clat_low_prio_stat[i], &ts->clat_low_prio_stat[i]);
}
- convert_gs(&p.rs, rs);
+ if (ss_extra_size) {
+ uint64_t *ss_iops, *ss_bw;
+ uint64_t offset;
+ struct cmd_ts_pdu *ptr = extended_buf;
- dprint(FD_NET, "ts->ss_state = %d\n", ts->ss_state);
- if (ts->ss_state & FIO_SS_DATA) {
dprint(FD_NET, "server sending steadystate ring buffers\n");
- ss_buf = malloc(sizeof(p) + 2*ts->ss_dur*sizeof(uint64_t));
+ /* ss iops */
+ ss_iops = (uint64_t *) extended_buf_wp;
+ for (i = 0; i < ts->ss_dur; i++)
+ ss_iops[i] = cpu_to_le64(ts->ss_iops_data[i]);
- memcpy(ss_buf, &p, sizeof(p));
+ offset = (char *)extended_buf_wp - (char *)extended_buf;
+ ptr->ts.ss_iops_data_offset = cpu_to_le64(offset);
+ extended_buf_wp = ss_iops + (int) ts->ss_dur;
- ss_iops = (uint64_t *) ((struct cmd_ts_pdu *)ss_buf + 1);
- ss_bw = ss_iops + (int) ts->ss_dur;
- for (i = 0; i < ts->ss_dur; i++) {
- ss_iops[i] = cpu_to_le64(ts->ss_iops_data[i]);
+ /* ss bw */
+ ss_bw = extended_buf_wp;
+ for (i = 0; i < ts->ss_dur; i++)
ss_bw[i] = cpu_to_le64(ts->ss_bw_data[i]);
- }
-
- fio_net_queue_cmd(FIO_NET_CMD_TS, ss_buf, sizeof(p) + 2*ts->ss_dur*sizeof(uint64_t), NULL, SK_F_COPY);
- free(ss_buf);
+ offset = (char *)extended_buf_wp - (char *)extended_buf;
+ ptr->ts.ss_bw_data_offset = cpu_to_le64(offset);
+ extended_buf_wp = ss_bw + (int) ts->ss_dur;
}
- else
- fio_net_queue_cmd(FIO_NET_CMD_TS, &p, sizeof(p), NULL, SK_F_COPY);
+
+ fio_net_queue_cmd(FIO_NET_CMD_TS, extended_buf, extended_buf_size, NULL, SK_F_COPY);
+ free(extended_buf);
}
void fio_server_send_gs(struct group_run_stats *rs)
if (fio_handle_server_arg())
return -1;
+ set_sig_handlers();
+
+#ifdef WIN32
+ /* if this is a child process, go handle the connection */
+ if (fio_server_pipe_name != NULL) {
+ ret = handle_connection_process();
+ return ret;
+ }
+
+ /* job to link child processes so they terminate together */
+ hjob = windows_create_job();
+ if (hjob == INVALID_HANDLE_VALUE)
+ return -1;
+#endif
+
sk = fio_init_server_connection();
if (sk < 0)
return -1;
- set_sig_handlers();
-
ret = accept_loop(sk);
close(sk);
{
fio_server_arg = strdup(arg);
}
+
+#ifdef WIN32
+void fio_server_internal_set(const char *arg)
+{
+ fio_server_pipe_name = strdup(arg);
+}
+#endif
unsigned int refs; /* frees sk_out when it drops to zero.
* protected by below ->lock */
+#ifdef WIN32
+ HANDLE hProcess; /* process handle of handle_connection_process*/
+#endif
int sk; /* socket fd to talk to client */
struct fio_sem lock; /* protects ref and below list */
struct flist_head list; /* list of pending transmit work */
};
enum {
- FIO_SERVER_VER = 95,
+ FIO_SERVER_VER = 96,
FIO_SERVER_MAX_FRAGMENT_PDU = 1024,
FIO_SERVER_MAX_CMD_MB = 2048,
extern int fio_net_send_cmd(int, uint16_t, const void *, off_t, uint64_t *, struct flist_head *);
extern int fio_net_send_simple_cmd(int, uint16_t, uint64_t, struct flist_head *);
extern void fio_server_set_arg(const char *);
+extern void fio_server_internal_set(const char *);
extern int fio_server_parse_string(const char *, char **, bool *, int *, struct in_addr *, struct in6_addr *, int *);
extern int fio_server_parse_host(const char *, int, struct in_addr *, struct in6_addr *);
extern const char *fio_server_op(unsigned int);
extern void fio_server_send_du(void);
extern void fio_server_send_job_options(struct flist_head *, unsigned int);
extern int fio_server_get_verify_state(const char *, int, void **);
+extern bool fio_server_poll_fd(int fd, short events, int timeout);
extern struct fio_net_cmd *fio_net_recv_cmd(int sk, bool wait);
free(ovals);
}
+static int get_nr_prios_with_samples(struct thread_stat *ts, enum fio_ddir ddir)
+{
+ int i, nr_prios_with_samples = 0;
+
+ for (i = 0; i < ts->nr_clat_prio[ddir]; i++) {
+ if (ts->clat_prio[ddir][i].clat_stat.samples)
+ nr_prios_with_samples++;
+ }
+
+ return nr_prios_with_samples;
+}
+
bool calc_lat(struct io_stat *is, unsigned long long *min,
unsigned long long *max, double *mean, double *dev)
{
{
char *io, *agg, *min, *max;
char *ioalt, *aggalt, *minalt, *maxalt;
- uint64_t io_mix = 0, agg_mix = 0, min_mix = -1, max_mix = 0, min_run = -1, max_run = 0;
- int i;
+ uint64_t io_mix = 0, agg_mix = 0, min_mix = -1, max_mix = 0;
+ uint64_t min_run = -1, max_run = 0;
const int i2p = is_power_of_2(rs->kb_base);
+ int i;
for (i = 0; i < DDIR_RWDIR_CNT; i++) {
if (!rs->max_run[i])
free(minalt);
free(maxalt);
}
-
+
/* Need to aggregate statisitics to show mixed values */
- if (rs->unified_rw_rep == UNIFIED_BOTH)
+ if (rs->unified_rw_rep == UNIFIED_BOTH)
show_mixed_group_stats(rs, out);
}
free(maxp);
}
-static double convert_agg_kbytes_percent(struct group_run_stats *rs, int ddir, int mean)
+static struct thread_stat *gen_mixed_ddir_stats_from_ts(struct thread_stat *ts)
{
- double p_of_agg = 100.0;
- if (rs && rs->agg[ddir] > 1024) {
- p_of_agg = mean * 100.0 / (double) (rs->agg[ddir] / 1024.0);
-
- if (p_of_agg > 100.0)
- p_of_agg = 100.0;
- }
- return p_of_agg;
-}
-
-static void show_mixed_ddir_status(struct group_run_stats *rs, struct thread_stat *ts,
- struct buf_output *out)
-{
- unsigned long runt;
- unsigned long long min, max, bw, iops;
- double mean, dev;
- char *io_p, *bw_p, *bw_p_alt, *iops_p, *post_st = NULL;
struct thread_stat *ts_lcl;
- int i2p;
- int ddir = 0;
-
- /* Handle aggregation of Reads (ddir = 0), Writes (ddir = 1), and Trims (ddir = 2) */
+ /*
+ * Handle aggregation of Reads (ddir = 0), Writes (ddir = 1), and
+ * Trims (ddir = 2)
+ */
ts_lcl = malloc(sizeof(struct thread_stat));
- memset((void *)ts_lcl, 0, sizeof(struct thread_stat));
- ts_lcl->unified_rw_rep = UNIFIED_MIXED; /* calculate mixed stats */
- init_thread_stat_min_vals(ts_lcl);
-
- sum_thread_stats(ts_lcl, ts, 1);
-
- assert(ddir_rw(ddir));
-
- if (!ts_lcl->runtime[ddir])
- return;
-
- i2p = is_power_of_2(rs->kb_base);
- runt = ts_lcl->runtime[ddir];
-
- bw = (1000 * ts_lcl->io_bytes[ddir]) / runt;
- io_p = num2str(ts_lcl->io_bytes[ddir], ts->sig_figs, 1, i2p, N2S_BYTE);
- bw_p = num2str(bw, ts->sig_figs, 1, i2p, ts->unit_base);
- bw_p_alt = num2str(bw, ts->sig_figs, 1, !i2p, ts->unit_base);
-
- iops = (1000 * ts_lcl->total_io_u[ddir]) / runt;
- iops_p = num2str(iops, ts->sig_figs, 1, 0, N2S_NONE);
-
- log_buf(out, " mixed: IOPS=%s, BW=%s (%s)(%s/%llumsec)%s\n",
- iops_p, bw_p, bw_p_alt, io_p,
- (unsigned long long) ts_lcl->runtime[ddir],
- post_st ? : "");
-
- free(post_st);
- free(io_p);
- free(bw_p);
- free(bw_p_alt);
- free(iops_p);
-
- if (calc_lat(&ts_lcl->slat_stat[ddir], &min, &max, &mean, &dev))
- display_lat("slat", min, max, mean, dev, out);
- if (calc_lat(&ts_lcl->clat_stat[ddir], &min, &max, &mean, &dev))
- display_lat("clat", min, max, mean, dev, out);
- if (calc_lat(&ts_lcl->lat_stat[ddir], &min, &max, &mean, &dev))
- display_lat(" lat", min, max, mean, dev, out);
- if (calc_lat(&ts_lcl->clat_high_prio_stat[ddir], &min, &max, &mean, &dev)) {
- display_lat(ts_lcl->lat_percentiles ? "high prio_lat" : "high prio_clat",
- min, max, mean, dev, out);
- if (calc_lat(&ts_lcl->clat_low_prio_stat[ddir], &min, &max, &mean, &dev))
- display_lat(ts_lcl->lat_percentiles ? "low prio_lat" : "low prio_clat",
- min, max, mean, dev, out);
- }
-
- if (ts->slat_percentiles && ts_lcl->slat_stat[ddir].samples > 0)
- show_clat_percentiles(ts_lcl->io_u_plat[FIO_SLAT][ddir],
- ts_lcl->slat_stat[ddir].samples,
- ts->percentile_list,
- ts->percentile_precision, "slat", out);
- if (ts->clat_percentiles && ts_lcl->clat_stat[ddir].samples > 0)
- show_clat_percentiles(ts_lcl->io_u_plat[FIO_CLAT][ddir],
- ts_lcl->clat_stat[ddir].samples,
- ts->percentile_list,
- ts->percentile_precision, "clat", out);
- if (ts->lat_percentiles && ts_lcl->lat_stat[ddir].samples > 0)
- show_clat_percentiles(ts_lcl->io_u_plat[FIO_LAT][ddir],
- ts_lcl->lat_stat[ddir].samples,
- ts->percentile_list,
- ts->percentile_precision, "lat", out);
-
- if (ts->clat_percentiles || ts->lat_percentiles) {
- const char *name = ts->lat_percentiles ? "lat" : "clat";
- char prio_name[32];
- uint64_t samples;
-
- if (ts->lat_percentiles)
- samples = ts_lcl->lat_stat[ddir].samples;
- else
- samples = ts_lcl->clat_stat[ddir].samples;
-
- /* Only print this if some high and low priority stats were collected */
- if (ts_lcl->clat_high_prio_stat[ddir].samples > 0 &&
- ts_lcl->clat_low_prio_stat[ddir].samples > 0)
- {
- sprintf(prio_name, "high prio (%.2f%%) %s",
- 100. * (double) ts_lcl->clat_high_prio_stat[ddir].samples / (double) samples,
- name);
- show_clat_percentiles(ts_lcl->io_u_plat_high_prio[ddir],
- ts_lcl->clat_high_prio_stat[ddir].samples,
- ts->percentile_list,
- ts->percentile_precision, prio_name, out);
-
- sprintf(prio_name, "low prio (%.2f%%) %s",
- 100. * (double) ts_lcl->clat_low_prio_stat[ddir].samples / (double) samples,
- name);
- show_clat_percentiles(ts_lcl->io_u_plat_low_prio[ddir],
- ts_lcl->clat_low_prio_stat[ddir].samples,
- ts->percentile_list,
- ts->percentile_precision, prio_name, out);
- }
+ if (!ts_lcl) {
+ log_err("fio: failed to allocate local thread stat\n");
+ return NULL;
}
- if (calc_lat(&ts_lcl->bw_stat[ddir], &min, &max, &mean, &dev)) {
- double p_of_agg = 100.0, fkb_base = (double)rs->kb_base;
- const char *bw_str;
+ init_thread_stat(ts_lcl);
- if ((rs->unit_base == 1) && i2p)
- bw_str = "Kibit";
- else if (rs->unit_base == 1)
- bw_str = "kbit";
- else if (i2p)
- bw_str = "KiB";
- else
- bw_str = "kB";
+ /* calculate mixed stats */
+ ts_lcl->unified_rw_rep = UNIFIED_MIXED;
+ ts_lcl->lat_percentiles = ts->lat_percentiles;
+ ts_lcl->clat_percentiles = ts->clat_percentiles;
+ ts_lcl->slat_percentiles = ts->slat_percentiles;
+ ts_lcl->percentile_precision = ts->percentile_precision;
+ memcpy(ts_lcl->percentile_list, ts->percentile_list, sizeof(ts->percentile_list));
- p_of_agg = convert_agg_kbytes_percent(rs, ddir, mean);
+ sum_thread_stats(ts_lcl, ts);
- if (rs->unit_base == 1) {
- min *= 8.0;
- max *= 8.0;
- mean *= 8.0;
- dev *= 8.0;
- }
+ return ts_lcl;
+}
- if (mean > fkb_base * fkb_base) {
- min /= fkb_base;
- max /= fkb_base;
- mean /= fkb_base;
- dev /= fkb_base;
- bw_str = (rs->unit_base == 1 ? "Mibit" : "MiB");
- }
+static double convert_agg_kbytes_percent(struct group_run_stats *rs,
+ enum fio_ddir ddir, int mean)
+{
+ double p_of_agg = 100.0;
+ if (rs && rs->agg[ddir] > 1024) {
+ p_of_agg = mean * 100.0 / (double) (rs->agg[ddir] / 1024.0);
- log_buf(out, " bw (%5s/s): min=%5llu, max=%5llu, per=%3.2f%%, "
- "avg=%5.02f, stdev=%5.02f, samples=%" PRIu64 "\n",
- bw_str, min, max, p_of_agg, mean, dev,
- (&ts_lcl->bw_stat[ddir])->samples);
- }
- if (calc_lat(&ts_lcl->iops_stat[ddir], &min, &max, &mean, &dev)) {
- log_buf(out, " iops : min=%5llu, max=%5llu, "
- "avg=%5.02f, stdev=%5.02f, samples=%" PRIu64 "\n",
- min, max, mean, dev, (&ts_lcl->iops_stat[ddir])->samples);
+ if (p_of_agg > 100.0)
+ p_of_agg = 100.0;
}
-
- free(ts_lcl);
+ return p_of_agg;
}
static void show_ddir_status(struct group_run_stats *rs, struct thread_stat *ts,
- int ddir, struct buf_output *out)
+ enum fio_ddir ddir, struct buf_output *out)
{
unsigned long runt;
unsigned long long min, max, bw, iops;
double mean, dev;
char *io_p, *bw_p, *bw_p_alt, *iops_p, *post_st = NULL;
- int i2p;
+ int i2p, i;
+ const char *clat_type = ts->lat_percentiles ? "lat" : "clat";
if (ddir_sync(ddir)) {
if (calc_lat(&ts->sync_stat, &min, &max, &mean, &dev)) {
display_lat("clat", min, max, mean, dev, out);
if (calc_lat(&ts->lat_stat[ddir], &min, &max, &mean, &dev))
display_lat(" lat", min, max, mean, dev, out);
- if (calc_lat(&ts->clat_high_prio_stat[ddir], &min, &max, &mean, &dev)) {
- display_lat(ts->lat_percentiles ? "high prio_lat" : "high prio_clat",
- min, max, mean, dev, out);
- if (calc_lat(&ts->clat_low_prio_stat[ddir], &min, &max, &mean, &dev))
- display_lat(ts->lat_percentiles ? "low prio_lat" : "low prio_clat",
- min, max, mean, dev, out);
+
+ /* Only print per prio stats if there are >= 2 prios with samples */
+ if (get_nr_prios_with_samples(ts, ddir) >= 2) {
+ for (i = 0; i < ts->nr_clat_prio[ddir]; i++) {
+ if (calc_lat(&ts->clat_prio[ddir][i].clat_stat, &min,
+ &max, &mean, &dev)) {
+ char buf[64];
+
+ snprintf(buf, sizeof(buf),
+ "%s prio %u/%u",
+ clat_type,
+ ts->clat_prio[ddir][i].ioprio >> 13,
+ ts->clat_prio[ddir][i].ioprio & 7);
+ display_lat(buf, min, max, mean, dev, out);
+ }
+ }
}
if (ts->slat_percentiles && ts->slat_stat[ddir].samples > 0)
ts->percentile_precision, "lat", out);
if (ts->clat_percentiles || ts->lat_percentiles) {
- const char *name = ts->lat_percentiles ? "lat" : "clat";
- char prio_name[32];
+ char prio_name[64];
uint64_t samples;
if (ts->lat_percentiles)
else
samples = ts->clat_stat[ddir].samples;
- /* Only print this if some high and low priority stats were collected */
- if (ts->clat_high_prio_stat[ddir].samples > 0 &&
- ts->clat_low_prio_stat[ddir].samples > 0)
- {
- sprintf(prio_name, "high prio (%.2f%%) %s",
- 100. * (double) ts->clat_high_prio_stat[ddir].samples / (double) samples,
- name);
- show_clat_percentiles(ts->io_u_plat_high_prio[ddir],
- ts->clat_high_prio_stat[ddir].samples,
- ts->percentile_list,
- ts->percentile_precision, prio_name, out);
-
- sprintf(prio_name, "low prio (%.2f%%) %s",
- 100. * (double) ts->clat_low_prio_stat[ddir].samples / (double) samples,
- name);
- show_clat_percentiles(ts->io_u_plat_low_prio[ddir],
- ts->clat_low_prio_stat[ddir].samples,
- ts->percentile_list,
- ts->percentile_precision, prio_name, out);
+ /* Only print per prio stats if there are >= 2 prios with samples */
+ if (get_nr_prios_with_samples(ts, ddir) >= 2) {
+ for (i = 0; i < ts->nr_clat_prio[ddir]; i++) {
+ uint64_t prio_samples = ts->clat_prio[ddir][i].clat_stat.samples;
+
+ if (prio_samples > 0) {
+ snprintf(prio_name, sizeof(prio_name),
+ "%s prio %u/%u (%.2f%% of IOs)",
+ clat_type,
+ ts->clat_prio[ddir][i].ioprio >> 13,
+ ts->clat_prio[ddir][i].ioprio & 7,
+ 100. * (double) prio_samples / (double) samples);
+ show_clat_percentiles(ts->clat_prio[ddir][i].io_u_plat,
+ prio_samples, ts->percentile_list,
+ ts->percentile_precision,
+ prio_name, out);
+ }
+ }
}
}
}
}
+static void show_mixed_ddir_status(struct group_run_stats *rs,
+ struct thread_stat *ts,
+ struct buf_output *out)
+{
+ struct thread_stat *ts_lcl = gen_mixed_ddir_stats_from_ts(ts);
+
+ if (ts_lcl)
+ show_ddir_status(rs, ts_lcl, DDIR_READ, out);
+
+ free_clat_prio_stats(ts_lcl);
+ free(ts_lcl);
+}
+
static bool show_lat(double *io_u_lat, int nr, const char **ranges,
const char *msg, struct buf_output *out)
{
if (!is_running_backend())
return;
- if (flist_empty(&disk_list)) {
+ if (flist_empty(&disk_list))
return;
- }
if ((output_format & FIO_OUTPUT_JSON) && parent)
do_json = true;
if (!terse && !do_json)
log_buf(out, "\nDisk stats (read/write):\n");
- if (do_json)
+ if (do_json) {
json_object_add_disk_utils(parent, &disk_list);
- else if (output_format & ~(FIO_OUTPUT_JSON | FIO_OUTPUT_JSON_PLUS)) {
+ } else if (output_format & ~(FIO_OUTPUT_JSON | FIO_OUTPUT_JSON_PLUS)) {
flist_for_each(entry, &disk_list) {
du = flist_entry(entry, struct disk_util, list);
}
static void show_ddir_status_terse(struct thread_stat *ts,
- struct group_run_stats *rs, int ddir,
- int ver, struct buf_output *out)
+ struct group_run_stats *rs,
+ enum fio_ddir ddir, int ver,
+ struct buf_output *out)
{
unsigned long long min, max, minv, maxv, bw, iops;
unsigned long long *ovals = NULL;
else
log_buf(out, ";%llu;%llu;%f;%f", 0ULL, 0ULL, 0.0, 0.0);
- if (ts->lat_percentiles)
+ if (ts->lat_percentiles) {
len = calc_clat_percentiles(ts->io_u_plat[FIO_LAT][ddir],
ts->lat_stat[ddir].samples,
ts->percentile_list, &ovals, &maxv,
&minv);
- else if (ts->clat_percentiles)
+ } else if (ts->clat_percentiles) {
len = calc_clat_percentiles(ts->io_u_plat[FIO_CLAT][ddir],
ts->clat_stat[ddir].samples,
ts->percentile_list, &ovals, &maxv,
&minv);
- else
+ } else {
len = 0;
-
+ }
+
for (i = 0; i < FIO_IO_U_LIST_MAX_LEN; i++) {
if (i >= len) {
log_buf(out, ";0%%=0");
}
log_buf(out, ";%llu;%llu;%f%%;%f;%f", min, max, p_of_agg, mean, dev);
- } else
+ } else {
log_buf(out, ";%llu;%llu;%f%%;%f;%f", 0ULL, 0ULL, 0.0, 0.0, 0.0);
+ }
if (ver == 5) {
if (bw_stat)
struct group_run_stats *rs,
int ver, struct buf_output *out)
{
- struct thread_stat *ts_lcl;
+ struct thread_stat *ts_lcl = gen_mixed_ddir_stats_from_ts(ts);
- /* Handle aggregation of Reads (ddir = 0), Writes (ddir = 1), and Trims (ddir = 2) */
- ts_lcl = malloc(sizeof(struct thread_stat));
- memset((void *)ts_lcl, 0, sizeof(struct thread_stat));
- ts_lcl->unified_rw_rep = UNIFIED_MIXED; /* calculate mixed stats */
- init_thread_stat_min_vals(ts_lcl);
- ts_lcl->lat_percentiles = ts->lat_percentiles;
- ts_lcl->clat_percentiles = ts->clat_percentiles;
- ts_lcl->slat_percentiles = ts->slat_percentiles;
- ts_lcl->percentile_precision = ts->percentile_precision;
- memcpy(ts_lcl->percentile_list, ts->percentile_list, sizeof(ts->percentile_list));
-
- sum_thread_stats(ts_lcl, ts, 1);
+ if (ts_lcl)
+ show_ddir_status_terse(ts_lcl, rs, DDIR_READ, ver, out);
- /* add the aggregated stats to json parent */
- show_ddir_status_terse(ts_lcl, rs, DDIR_READ, ver, out);
+ free_clat_prio_stats(ts_lcl);
free(ts_lcl);
}
-static struct json_object *add_ddir_lat_json(struct thread_stat *ts, uint32_t percentiles,
- struct io_stat *lat_stat, uint64_t *io_u_plat)
+static struct json_object *add_ddir_lat_json(struct thread_stat *ts,
+ uint32_t percentiles,
+ struct io_stat *lat_stat,
+ uint64_t *io_u_plat)
{
char buf[120];
double mean, dev;
}
static void add_ddir_status_json(struct thread_stat *ts,
- struct group_run_stats *rs, int ddir, struct json_object *parent)
+ struct group_run_stats *rs, enum fio_ddir ddir,
+ struct json_object *parent)
{
unsigned long long min, max;
unsigned long long bw_bytes, bw;
if (!ddir_rw(ddir))
return;
- /* Only print PRIO latencies if some high priority samples were gathered */
- if (ts->clat_high_prio_stat[ddir].samples > 0) {
- const char *high, *low;
+ /* Only include per prio stats if there are >= 2 prios with samples */
+ if (get_nr_prios_with_samples(ts, ddir) >= 2) {
+ struct json_array *array = json_create_array();
+ const char *obj_name;
+ int i;
- if (ts->lat_percentiles) {
- high = "lat_high_prio";
- low = "lat_low_prio";
- } else {
- high = "clat_high_prio";
- low = "clat_low_prio";
+ if (ts->lat_percentiles)
+ obj_name = "lat_ns";
+ else
+ obj_name = "clat_ns";
+
+ json_object_add_value_array(dir_object, "prios", array);
+
+ for (i = 0; i < ts->nr_clat_prio[ddir]; i++) {
+ if (ts->clat_prio[ddir][i].clat_stat.samples > 0) {
+ struct json_object *obj = json_create_object();
+ unsigned long long class, level;
+
+ class = ts->clat_prio[ddir][i].ioprio >> 13;
+ json_object_add_value_int(obj, "prioclass", class);
+ level = ts->clat_prio[ddir][i].ioprio & 7;
+ json_object_add_value_int(obj, "prio", level);
+
+ tmp_object = add_ddir_lat_json(ts,
+ ts->clat_percentiles | ts->lat_percentiles,
+ &ts->clat_prio[ddir][i].clat_stat,
+ ts->clat_prio[ddir][i].io_u_plat);
+ json_object_add_value_object(obj, obj_name, tmp_object);
+ json_array_add_value_object(array, obj);
+ }
}
-
- tmp_object = add_ddir_lat_json(ts, ts->clat_percentiles | ts->lat_percentiles,
- &ts->clat_high_prio_stat[ddir], ts->io_u_plat_high_prio[ddir]);
- json_object_add_value_object(dir_object, high, tmp_object);
-
- tmp_object = add_ddir_lat_json(ts, ts->clat_percentiles | ts->lat_percentiles,
- &ts->clat_low_prio_stat[ddir], ts->io_u_plat_low_prio[ddir]);
- json_object_add_value_object(dir_object, low, tmp_object);
}
if (calc_lat(&ts->bw_stat[ddir], &min, &max, &mean, &dev)) {
static void add_mixed_ddir_status_json(struct thread_stat *ts,
struct group_run_stats *rs, struct json_object *parent)
{
- struct thread_stat *ts_lcl;
-
- /* Handle aggregation of Reads (ddir = 0), Writes (ddir = 1), and Trims (ddir = 2) */
- ts_lcl = malloc(sizeof(struct thread_stat));
- memset((void *)ts_lcl, 0, sizeof(struct thread_stat));
- ts_lcl->unified_rw_rep = UNIFIED_MIXED; /* calculate mixed stats */
- init_thread_stat_min_vals(ts_lcl);
- ts_lcl->lat_percentiles = ts->lat_percentiles;
- ts_lcl->clat_percentiles = ts->clat_percentiles;
- ts_lcl->slat_percentiles = ts->slat_percentiles;
- ts_lcl->percentile_precision = ts->percentile_precision;
- memcpy(ts_lcl->percentile_list, ts->percentile_list, sizeof(ts->percentile_list));
-
- sum_thread_stats(ts_lcl, ts, 1);
+ struct thread_stat *ts_lcl = gen_mixed_ddir_stats_from_ts(ts);
/* add the aggregated stats to json parent */
- add_ddir_status_json(ts_lcl, rs, DDIR_READ, parent);
+ if (ts_lcl)
+ add_ddir_status_json(ts_lcl, rs, DDIR_READ, parent);
+
+ free_clat_prio_stats(ts_lcl);
free(ts_lcl);
}
* numbers. For group_reporting, we should just add those up, not make
* them the mean of everything.
*/
-static void sum_stat(struct io_stat *dst, struct io_stat *src, bool first,
- bool pure_sum)
+static void sum_stat(struct io_stat *dst, struct io_stat *src, bool pure_sum)
{
+ bool first = dst->samples == 0;
+
if (src->samples == 0)
return;
dst->sig_figs = src->sig_figs;
}
-void sum_thread_stats(struct thread_stat *dst, struct thread_stat *src,
- bool first)
+/*
+ * Free the clat_prio_stat arrays allocated by alloc_clat_prio_stat_ddir().
+ */
+void free_clat_prio_stats(struct thread_stat *ts)
+{
+ enum fio_ddir ddir;
+
+ for (ddir = 0; ddir < DDIR_RWDIR_CNT; ddir++) {
+ sfree(ts->clat_prio[ddir]);
+ ts->clat_prio[ddir] = NULL;
+ ts->nr_clat_prio[ddir] = 0;
+ }
+}
+
+/*
+ * Allocate a clat_prio_stat array. The array has to be allocated/freed using
+ * smalloc/sfree, so that it is accessible by the process/thread summing the
+ * thread_stats.
+ */
+int alloc_clat_prio_stat_ddir(struct thread_stat *ts, enum fio_ddir ddir,
+ int nr_prios)
+{
+ struct clat_prio_stat *clat_prio;
+ int i;
+
+ clat_prio = scalloc(nr_prios, sizeof(*ts->clat_prio[ddir]));
+ if (!clat_prio) {
+ log_err("fio: failed to allocate ts clat data\n");
+ return 1;
+ }
+
+ for (i = 0; i < nr_prios; i++)
+ clat_prio[i].clat_stat.min_val = ULONG_MAX;
+
+ ts->clat_prio[ddir] = clat_prio;
+ ts->nr_clat_prio[ddir] = nr_prios;
+
+ return 0;
+}
+
+static int grow_clat_prio_stat(struct thread_stat *dst, enum fio_ddir ddir)
+{
+ int curr_len = dst->nr_clat_prio[ddir];
+ void *new_arr;
+
+ new_arr = scalloc(curr_len + 1, sizeof(*dst->clat_prio[ddir]));
+ if (!new_arr) {
+ log_err("fio: failed to grow clat prio array\n");
+ return 1;
+ }
+
+ memcpy(new_arr, dst->clat_prio[ddir],
+ curr_len * sizeof(*dst->clat_prio[ddir]));
+ sfree(dst->clat_prio[ddir]);
+
+ dst->clat_prio[ddir] = new_arr;
+ dst->clat_prio[ddir][curr_len].clat_stat.min_val = ULONG_MAX;
+ dst->nr_clat_prio[ddir]++;
+
+ return 0;
+}
+
+static int find_clat_prio_index(struct thread_stat *dst, enum fio_ddir ddir,
+ uint32_t ioprio)
+{
+ int i, nr_prios = dst->nr_clat_prio[ddir];
+
+ for (i = 0; i < nr_prios; i++) {
+ if (dst->clat_prio[ddir][i].ioprio == ioprio)
+ return i;
+ }
+
+ return -1;
+}
+
+static int alloc_or_get_clat_prio_index(struct thread_stat *dst,
+ enum fio_ddir ddir, uint32_t ioprio,
+ int *idx)
+{
+ int index = find_clat_prio_index(dst, ddir, ioprio);
+
+ if (index == -1) {
+ index = dst->nr_clat_prio[ddir];
+
+ if (grow_clat_prio_stat(dst, ddir))
+ return 1;
+
+ dst->clat_prio[ddir][index].ioprio = ioprio;
+ }
+
+ *idx = index;
+
+ return 0;
+}
+
+static int clat_prio_stats_copy(struct thread_stat *dst, struct thread_stat *src,
+ enum fio_ddir dst_ddir, enum fio_ddir src_ddir)
+{
+ size_t sz = sizeof(*src->clat_prio[src_ddir]) *
+ src->nr_clat_prio[src_ddir];
+
+ dst->clat_prio[dst_ddir] = smalloc(sz);
+ if (!dst->clat_prio[dst_ddir]) {
+ log_err("fio: failed to alloc clat prio array\n");
+ return 1;
+ }
+
+ memcpy(dst->clat_prio[dst_ddir], src->clat_prio[src_ddir], sz);
+ dst->nr_clat_prio[dst_ddir] = src->nr_clat_prio[src_ddir];
+
+ return 0;
+}
+
+static int clat_prio_stat_add_samples(struct thread_stat *dst,
+ enum fio_ddir dst_ddir, uint32_t ioprio,
+ struct io_stat *io_stat,
+ uint64_t *io_u_plat)
+{
+ int i, dst_index;
+
+ if (!io_stat->samples)
+ return 0;
+
+ if (alloc_or_get_clat_prio_index(dst, dst_ddir, ioprio, &dst_index))
+ return 1;
+
+ sum_stat(&dst->clat_prio[dst_ddir][dst_index].clat_stat, io_stat,
+ false);
+
+ for (i = 0; i < FIO_IO_U_PLAT_NR; i++)
+ dst->clat_prio[dst_ddir][dst_index].io_u_plat[i] += io_u_plat[i];
+
+ return 0;
+}
+
+static int sum_clat_prio_stats_src_single_prio(struct thread_stat *dst,
+ struct thread_stat *src,
+ enum fio_ddir dst_ddir,
+ enum fio_ddir src_ddir)
+{
+ struct io_stat *io_stat;
+ uint64_t *io_u_plat;
+
+ /*
+ * If src ts has no clat_prio_stat array, then all I/Os were submitted
+ * using src->ioprio. Thus, the global samples in src->clat_stat (or
+ * src->lat_stat) can be used as the 'per prio' samples for src->ioprio.
+ */
+ assert(!src->clat_prio[src_ddir]);
+ assert(src->nr_clat_prio[src_ddir] == 0);
+
+ if (src->lat_percentiles) {
+ io_u_plat = src->io_u_plat[FIO_LAT][src_ddir];
+ io_stat = &src->lat_stat[src_ddir];
+ } else {
+ io_u_plat = src->io_u_plat[FIO_CLAT][src_ddir];
+ io_stat = &src->clat_stat[src_ddir];
+ }
+
+ return clat_prio_stat_add_samples(dst, dst_ddir, src->ioprio, io_stat,
+ io_u_plat);
+}
+
+static int sum_clat_prio_stats_src_multi_prio(struct thread_stat *dst,
+ struct thread_stat *src,
+ enum fio_ddir dst_ddir,
+ enum fio_ddir src_ddir)
+{
+ int i;
+
+ /*
+ * If src ts has a clat_prio_stat array, then there are multiple prios
+ * in use (i.e. src ts had cmdprio_percentage or cmdprio_bssplit set).
+ * The samples for the default prio will exist in the src->clat_prio
+ * array, just like the samples for any other prio.
+ */
+ assert(src->clat_prio[src_ddir]);
+ assert(src->nr_clat_prio[src_ddir]);
+
+ /* If the dst ts doesn't yet have a clat_prio array, simply memcpy. */
+ if (!dst->clat_prio[dst_ddir])
+ return clat_prio_stats_copy(dst, src, dst_ddir, src_ddir);
+
+ /* The dst ts already has a clat_prio_array, add src stats into it. */
+ for (i = 0; i < src->nr_clat_prio[src_ddir]; i++) {
+ struct io_stat *io_stat = &src->clat_prio[src_ddir][i].clat_stat;
+ uint64_t *io_u_plat = src->clat_prio[src_ddir][i].io_u_plat;
+ uint32_t ioprio = src->clat_prio[src_ddir][i].ioprio;
+
+ if (clat_prio_stat_add_samples(dst, dst_ddir, ioprio, io_stat, io_u_plat))
+ return 1;
+ }
+
+ return 0;
+}
+
+static int sum_clat_prio_stats(struct thread_stat *dst, struct thread_stat *src,
+ enum fio_ddir dst_ddir, enum fio_ddir src_ddir)
+{
+ if (dst->disable_prio_stat)
+ return 0;
+
+ if (!src->clat_prio[src_ddir])
+ return sum_clat_prio_stats_src_single_prio(dst, src, dst_ddir,
+ src_ddir);
+
+ return sum_clat_prio_stats_src_multi_prio(dst, src, dst_ddir, src_ddir);
+}
+
+void sum_thread_stats(struct thread_stat *dst, struct thread_stat *src)
{
int k, l, m;
for (l = 0; l < DDIR_RWDIR_CNT; l++) {
- if (!(dst->unified_rw_rep == UNIFIED_MIXED)) {
- sum_stat(&dst->clat_stat[l], &src->clat_stat[l], first, false);
- sum_stat(&dst->clat_high_prio_stat[l], &src->clat_high_prio_stat[l], first, false);
- sum_stat(&dst->clat_low_prio_stat[l], &src->clat_low_prio_stat[l], first, false);
- sum_stat(&dst->slat_stat[l], &src->slat_stat[l], first, false);
- sum_stat(&dst->lat_stat[l], &src->lat_stat[l], first, false);
- sum_stat(&dst->bw_stat[l], &src->bw_stat[l], first, true);
- sum_stat(&dst->iops_stat[l], &src->iops_stat[l], first, true);
+ if (dst->unified_rw_rep != UNIFIED_MIXED) {
+ sum_stat(&dst->clat_stat[l], &src->clat_stat[l], false);
+ sum_stat(&dst->slat_stat[l], &src->slat_stat[l], false);
+ sum_stat(&dst->lat_stat[l], &src->lat_stat[l], false);
+ sum_stat(&dst->bw_stat[l], &src->bw_stat[l], true);
+ sum_stat(&dst->iops_stat[l], &src->iops_stat[l], true);
+ sum_clat_prio_stats(dst, src, l, l);
dst->io_bytes[l] += src->io_bytes[l];
if (dst->runtime[l] < src->runtime[l])
dst->runtime[l] = src->runtime[l];
} else {
- sum_stat(&dst->clat_stat[0], &src->clat_stat[l], first, false);
- sum_stat(&dst->clat_high_prio_stat[0], &src->clat_high_prio_stat[l], first, false);
- sum_stat(&dst->clat_low_prio_stat[0], &src->clat_low_prio_stat[l], first, false);
- sum_stat(&dst->slat_stat[0], &src->slat_stat[l], first, false);
- sum_stat(&dst->lat_stat[0], &src->lat_stat[l], first, false);
- sum_stat(&dst->bw_stat[0], &src->bw_stat[l], first, true);
- sum_stat(&dst->iops_stat[0], &src->iops_stat[l], first, true);
+ sum_stat(&dst->clat_stat[0], &src->clat_stat[l], false);
+ sum_stat(&dst->slat_stat[0], &src->slat_stat[l], false);
+ sum_stat(&dst->lat_stat[0], &src->lat_stat[l], false);
+ sum_stat(&dst->bw_stat[0], &src->bw_stat[l], true);
+ sum_stat(&dst->iops_stat[0], &src->iops_stat[l], true);
+ sum_clat_prio_stats(dst, src, 0, l);
dst->io_bytes[0] += src->io_bytes[l];
if (dst->runtime[0] < src->runtime[l])
dst->runtime[0] = src->runtime[l];
-
- /*
- * We're summing to the same destination, so override
- * 'first' after the first iteration of the loop
- */
- first = false;
}
}
- sum_stat(&dst->sync_stat, &src->sync_stat, first, false);
+ sum_stat(&dst->sync_stat, &src->sync_stat, false);
dst->usr_time += src->usr_time;
dst->sys_time += src->sys_time;
dst->ctx += src->ctx;
dst->io_u_lat_m[k] += src->io_u_lat_m[k];
for (k = 0; k < DDIR_RWDIR_CNT; k++) {
- if (!(dst->unified_rw_rep == UNIFIED_MIXED)) {
+ if (dst->unified_rw_rep != UNIFIED_MIXED) {
dst->total_io_u[k] += src->total_io_u[k];
dst->short_io_u[k] += src->short_io_u[k];
dst->drop_io_u[k] += src->drop_io_u[k];
for (k = 0; k < FIO_LAT_CNT; k++)
for (l = 0; l < DDIR_RWDIR_CNT; l++)
for (m = 0; m < FIO_IO_U_PLAT_NR; m++)
- if (!(dst->unified_rw_rep == UNIFIED_MIXED))
+ if (dst->unified_rw_rep != UNIFIED_MIXED)
dst->io_u_plat[k][l][m] += src->io_u_plat[k][l][m];
else
dst->io_u_plat[k][0][m] += src->io_u_plat[k][l][m];
for (k = 0; k < FIO_IO_U_PLAT_NR; k++)
dst->io_u_sync_plat[k] += src->io_u_sync_plat[k];
- for (k = 0; k < DDIR_RWDIR_CNT; k++) {
- for (m = 0; m < FIO_IO_U_PLAT_NR; m++) {
- if (!(dst->unified_rw_rep == UNIFIED_MIXED)) {
- dst->io_u_plat_high_prio[k][m] += src->io_u_plat_high_prio[k][m];
- dst->io_u_plat_low_prio[k][m] += src->io_u_plat_low_prio[k][m];
- } else {
- dst->io_u_plat_high_prio[0][m] += src->io_u_plat_high_prio[k][m];
- dst->io_u_plat_low_prio[0][m] += src->io_u_plat_low_prio[k][m];
- }
-
- }
- }
-
dst->total_run_time += src->total_run_time;
dst->total_submit += src->total_submit;
dst->total_complete += src->total_complete;
ts->lat_stat[i].min_val = ULONG_MAX;
ts->bw_stat[i].min_val = ULONG_MAX;
ts->iops_stat[i].min_val = ULONG_MAX;
- ts->clat_high_prio_stat[i].min_val = ULONG_MAX;
- ts->clat_low_prio_stat[i].min_val = ULONG_MAX;
}
ts->sync_stat.min_val = ULONG_MAX;
}
ts->groupid = -1;
}
+static void init_per_prio_stats(struct thread_stat *threadstats, int nr_ts)
+{
+ struct thread_data *td;
+ struct thread_stat *ts;
+ int i, j, last_ts, idx;
+ enum fio_ddir ddir;
+
+ j = 0;
+ last_ts = -1;
+ idx = 0;
+
+ /*
+ * Loop through all tds, if a td requires per prio stats, temporarily
+ * store a 1 in ts->disable_prio_stat, and then do an additional
+ * loop at the end where we invert the ts->disable_prio_stat values.
+ */
+ for_each_td(td, i) {
+ if (!td->o.stats)
+ continue;
+ if (idx &&
+ (!td->o.group_reporting ||
+ (td->o.group_reporting && last_ts != td->groupid))) {
+ idx = 0;
+ j++;
+ }
+
+ last_ts = td->groupid;
+ ts = &threadstats[j];
+
+ /* idx == 0 means first td in group, or td is not in a group. */
+ if (idx == 0)
+ ts->ioprio = td->ioprio;
+ else if (td->ioprio != ts->ioprio)
+ ts->disable_prio_stat = 1;
+
+ for (ddir = 0; ddir < DDIR_RWDIR_CNT; ddir++) {
+ if (td->ts.clat_prio[ddir]) {
+ ts->disable_prio_stat = 1;
+ break;
+ }
+ }
+
+ idx++;
+ }
+
+ /* Loop through all dst threadstats and fixup the values. */
+ for (i = 0; i < nr_ts; i++) {
+ ts = &threadstats[i];
+ ts->disable_prio_stat = !ts->disable_prio_stat;
+ }
+}
+
void __show_run_stats(void)
{
struct group_run_stats *runstats, *rs;
opt_lists[i] = NULL;
}
+ init_per_prio_stats(threadstats, nr_ts);
+
j = 0;
last_ts = -1;
idx = 0;
opt_lists[j] = &td->opt_list;
idx++;
- ts->members++;
if (ts->groupid == -1) {
/*
for (k = 0; k < ts->nr_block_infos; k++)
ts->block_infos[k] = td->ts.block_infos[k];
- sum_thread_stats(ts, &td->ts, idx == 1);
+ sum_thread_stats(ts, &td->ts);
+
+ ts->members++;
if (td->o.ss_dur) {
ts->ss_state = td->ss.state;
}
for (i = 0; i < groupid + 1; i++) {
- int ddir;
+ enum fio_ddir ddir;
rs = &runstats[i];
log_info_flush();
free(runstats);
+
+ /* free arrays allocated by sum_thread_stats(), if any */
+ for (i = 0; i < nr_ts; i++) {
+ ts = &threadstats[i];
+ free_clat_prio_stats(ts);
+ }
free(threadstats);
free(opt_lists);
}
is->samples++;
}
+static inline void add_stat_prio_sample(struct clat_prio_stat *clat_prio,
+ unsigned short clat_prio_index,
+ unsigned long long nsec)
+{
+ if (clat_prio)
+ add_stat_sample(&clat_prio[clat_prio_index].clat_stat, nsec);
+}
+
/*
* Return a struct io_logs, which is added to the tail of the log
* list for 'iolog'.
ios->mean.u.f = ios->S.u.f = 0;
}
+static inline void reset_io_u_plat(uint64_t *io_u_plat)
+{
+ int i;
+
+ for (i = 0; i < FIO_IO_U_PLAT_NR; i++)
+ io_u_plat[i] = 0;
+}
+
+static inline void reset_clat_prio_stats(struct thread_stat *ts)
+{
+ enum fio_ddir ddir;
+ int i;
+
+ for (ddir = 0; ddir < DDIR_RWDIR_CNT; ddir++) {
+ if (!ts->clat_prio[ddir])
+ continue;
+
+ for (i = 0; i < ts->nr_clat_prio[ddir]; i++) {
+ reset_io_stat(&ts->clat_prio[ddir][i].clat_stat);
+ reset_io_u_plat(ts->clat_prio[ddir][i].io_u_plat);
+ }
+ }
+}
+
void reset_io_stats(struct thread_data *td)
{
struct thread_stat *ts = &td->ts;
- int i, j, k;
+ int i, j;
for (i = 0; i < DDIR_RWDIR_CNT; i++) {
- reset_io_stat(&ts->clat_high_prio_stat[i]);
- reset_io_stat(&ts->clat_low_prio_stat[i]);
reset_io_stat(&ts->clat_stat[i]);
reset_io_stat(&ts->slat_stat[i]);
reset_io_stat(&ts->lat_stat[i]);
ts->total_io_u[i] = 0;
ts->short_io_u[i] = 0;
ts->drop_io_u[i] = 0;
-
- for (j = 0; j < FIO_IO_U_PLAT_NR; j++) {
- ts->io_u_plat_high_prio[i][j] = 0;
- ts->io_u_plat_low_prio[i][j] = 0;
- if (!i)
- ts->io_u_sync_plat[j] = 0;
- }
}
for (i = 0; i < FIO_LAT_CNT; i++)
for (j = 0; j < DDIR_RWDIR_CNT; j++)
- for (k = 0; k < FIO_IO_U_PLAT_NR; k++)
- ts->io_u_plat[i][j][k] = 0;
+ reset_io_u_plat(ts->io_u_plat[i][j]);
+
+ reset_clat_prio_stats(ts);
ts->total_io_u[DDIR_SYNC] = 0;
+ reset_io_u_plat(ts->io_u_sync_plat);
for (i = 0; i < FIO_IO_U_MAP_NR; i++) {
ts->io_u_map[i] = 0;
static void _add_stat_to_log(struct io_log *iolog, unsigned long elapsed,
bool log_max)
{
- int ddir;
+ enum fio_ddir ddir;
for (ddir = 0; ddir < DDIR_RWDIR_CNT; ddir++)
__add_stat_to_log(iolog, ddir, elapsed, log_max);
ts->io_u_plat[lat][ddir][idx]++;
}
-static inline void add_lat_percentile_prio_sample(struct thread_stat *ts,
- unsigned long long nsec,
- enum fio_ddir ddir,
- bool high_prio)
+static inline void
+add_lat_percentile_prio_sample(struct thread_stat *ts, unsigned long long nsec,
+ enum fio_ddir ddir,
+ unsigned short clat_prio_index)
{
unsigned int idx = plat_val_to_idx(nsec);
- if (!high_prio)
- ts->io_u_plat_low_prio[ddir][idx]++;
- else
- ts->io_u_plat_high_prio[ddir][idx]++;
+ if (ts->clat_prio[ddir])
+ ts->clat_prio[ddir][clat_prio_index].io_u_plat[idx]++;
}
void add_clat_sample(struct thread_data *td, enum fio_ddir ddir,
unsigned long long nsec, unsigned long long bs,
- uint64_t offset, unsigned int ioprio, bool high_prio)
+ uint64_t offset, unsigned int ioprio,
+ unsigned short clat_prio_index)
{
const bool needs_lock = td_async_processing(td);
unsigned long elapsed, this_window;
add_stat_sample(&ts->clat_stat[ddir], nsec);
/*
- * When lat_percentiles=1 (default 0), the reported high/low priority
+ * When lat_percentiles=1 (default 0), the reported per priority
* percentiles and stats are used for describing total latency values,
* even though the variable names themselves start with clat_.
*
* lat_percentiles=0. add_lat_sample() will add the prio stat sample
* when lat_percentiles=1.
*/
- if (!ts->lat_percentiles) {
- if (high_prio)
- add_stat_sample(&ts->clat_high_prio_stat[ddir], nsec);
- else
- add_stat_sample(&ts->clat_low_prio_stat[ddir], nsec);
- }
+ if (!ts->lat_percentiles)
+ add_stat_prio_sample(ts->clat_prio[ddir], clat_prio_index,
+ nsec);
if (td->clat_log)
add_log_sample(td, td->clat_log, sample_val(nsec), ddir, bs,
add_lat_percentile_sample(ts, nsec, ddir, FIO_CLAT);
if (!ts->lat_percentiles)
add_lat_percentile_prio_sample(ts, nsec, ddir,
- high_prio);
+ clat_prio_index);
}
if (iolog && iolog->hist_msec) {
void add_lat_sample(struct thread_data *td, enum fio_ddir ddir,
unsigned long long nsec, unsigned long long bs,
- uint64_t offset, unsigned int ioprio, bool high_prio)
+ uint64_t offset, unsigned int ioprio,
+ unsigned short clat_prio_index)
{
const bool needs_lock = td_async_processing(td);
struct thread_stat *ts = &td->ts;
offset, ioprio);
/*
- * When lat_percentiles=1 (default 0), the reported high/low priority
+ * When lat_percentiles=1 (default 0), the reported per priority
* percentiles and stats are used for describing total latency values,
* even though the variable names themselves start with clat_.
*
*/
if (ts->lat_percentiles) {
add_lat_percentile_sample(ts, nsec, ddir, FIO_LAT);
- add_lat_percentile_prio_sample(ts, nsec, ddir, high_prio);
- if (high_prio)
- add_stat_sample(&ts->clat_high_prio_stat[ddir], nsec);
- else
- add_stat_sample(&ts->clat_low_prio_stat[ddir], nsec);
-
+ add_lat_percentile_prio_sample(ts, nsec, ddir, clat_prio_index);
+ add_stat_prio_sample(ts->clat_prio[ddir], clat_prio_index,
+ nsec);
}
if (needs_lock)
__td_io_u_unlock(td);
FIO_LAT_CNT = 3,
};
+struct clat_prio_stat {
+ uint64_t io_u_plat[FIO_IO_U_PLAT_NR];
+ struct io_stat clat_stat;
+ uint32_t ioprio;
+};
+
struct thread_stat {
char name[FIO_JOBNAME_SIZE];
char verror[FIO_VERROR_SIZE];
char description[FIO_JOBDESC_SIZE];
uint32_t members;
uint32_t unified_rw_rep;
+ uint32_t disable_prio_stat;
/*
* bandwidth and latency stats
fio_fp64_t ss_deviation;
fio_fp64_t ss_criterion;
- uint64_t io_u_plat_high_prio[DDIR_RWDIR_CNT][FIO_IO_U_PLAT_NR] __attribute__((aligned(8)));;
- uint64_t io_u_plat_low_prio[DDIR_RWDIR_CNT][FIO_IO_U_PLAT_NR];
- struct io_stat clat_high_prio_stat[DDIR_RWDIR_CNT] __attribute__((aligned(8)));
- struct io_stat clat_low_prio_stat[DDIR_RWDIR_CNT];
+ /* A mirror of td->ioprio. */
+ uint32_t ioprio;
union {
uint64_t *ss_iops_data;
+ /*
+ * For FIO_NET_CMD_TS, the pointed to data will temporarily
+ * be stored at this offset from the start of the payload.
+ */
+ uint64_t ss_iops_data_offset;
uint64_t pad4;
};
union {
uint64_t *ss_bw_data;
+ /*
+ * For FIO_NET_CMD_TS, the pointed to data will temporarily
+ * be stored at this offset from the start of the payload.
+ */
+ uint64_t ss_bw_data_offset;
uint64_t pad5;
};
+ union {
+ struct clat_prio_stat *clat_prio[DDIR_RWDIR_CNT];
+ /*
+ * For FIO_NET_CMD_TS, the pointed to data will temporarily
+ * be stored at this offset from the start of the payload.
+ */
+ uint64_t clat_prio_offset[DDIR_RWDIR_CNT];
+ uint64_t pad6;
+ };
+ uint32_t nr_clat_prio[DDIR_RWDIR_CNT];
+
uint64_t cachehit;
uint64_t cachemiss;
} __attribute__((packed));
extern int __show_running_run_stats(void);
extern void show_running_run_stats(void);
extern void check_for_running_stats(void);
-extern void sum_thread_stats(struct thread_stat *dst, struct thread_stat *src, bool first);
+extern void sum_thread_stats(struct thread_stat *dst, struct thread_stat *src);
extern void sum_group_stats(struct group_run_stats *dst, struct group_run_stats *src);
extern void init_thread_stat_min_vals(struct thread_stat *ts);
extern void init_thread_stat(struct thread_stat *ts);
extern void clear_rusage_stat(struct thread_data *);
extern void add_lat_sample(struct thread_data *, enum fio_ddir, unsigned long long,
- unsigned long long, uint64_t, unsigned int, bool);
+ unsigned long long, uint64_t, unsigned int, unsigned short);
extern void add_clat_sample(struct thread_data *, enum fio_ddir, unsigned long long,
- unsigned long long, uint64_t, unsigned int, bool);
+ unsigned long long, uint64_t, unsigned int, unsigned short);
extern void add_slat_sample(struct thread_data *, enum fio_ddir, unsigned long long,
unsigned long long, uint64_t, unsigned int);
extern void add_agg_sample(union io_sample_data, enum fio_ddir, unsigned long long);
extern void add_sync_clat_sample(struct thread_stat *ts,
unsigned long long nsec);
extern int calc_log_samples(void);
+extern void free_clat_prio_stats(struct thread_stat *);
+extern int alloc_clat_prio_stat_ddir(struct thread_stat *, enum fio_ddir, int);
extern void print_disk_util(struct disk_util_stat *, struct disk_util_agg *, int terse, struct buf_output *);
extern void json_array_add_disk_util(struct disk_util_stat *dus,
#ifdef CONFIG_LIBAIO
static int prep_more_ios_aio(struct submitter *s, int max_ios, struct iocb *iocbs)
{
- unsigned long offset, data;
+ uint64_t data;
+ long long offset;
struct file *f;
unsigned index;
long r;
data = f->fileno;
if (stats && stats_running)
- data |= ((unsigned long) s->clock_index << 32);
+ data |= (((uint64_t) s->clock_index) << 32);
iocb->data = (void *) (uintptr_t) data;
index++;
}
int reaped = 0;
while (evs) {
- unsigned long data = (uintptr_t) events[reaped].data;
+ uint64_t data = (uintptr_t) events[reaped].data;
struct file *f = &s->files[data & 0xffffffff];
f->pending_ios--;
" -a <bool> : Use legacy aio, default %d\n",
argv, DEPTH, BATCH_SUBMIT, BATCH_COMPLETE, BS, polled,
fixedbufs, dma_map, register_files, nthreads, !buffered, do_nop,
- stats, runtime == 0 ? "unlimited" : runtime_str, aio, random_io);
+ stats, runtime == 0 ? "unlimited" : runtime_str, random_io, aio);
exit(status);
}
import argparse
import platform
import subprocess
+from collections import Counter
from pathlib import Path
"--output-format={output-format}".format(**self.test_options),
]
for opt in ['slat_percentiles', 'clat_percentiles', 'lat_percentiles',
- 'unified_rw_reporting', 'fsync', 'fdatasync', 'numjobs', 'cmdprio_percentage']:
+ 'unified_rw_reporting', 'fsync', 'fdatasync', 'numjobs',
+ 'cmdprio_percentage', 'bssplit', 'cmdprio_bssplit']:
if opt in self.test_options:
option = '--{0}={{{0}}}'.format(opt)
fio_args.append(option.format(**self.test_options))
def check_nocmdprio_lat(self, job):
"""
- Make sure no high/low priority latencies appear.
+ Make sure no per priority latencies appear.
job JSON object to check
"""
for ddir in ['read', 'write', 'trim']:
if ddir in job:
- if 'lat_high_prio' in job[ddir] or 'lat_low_prio' in job[ddir] or \
- 'clat_high_prio' in job[ddir] or 'clat_low_prio' in job[ddir]:
- print("Unexpected high/low priority latencies found in %s output" % ddir)
+ if 'prios' in job[ddir]:
+ print("Unexpected per priority latencies found in %s output" % ddir)
return False
if self.debug:
- print("No high/low priority latencies found")
+ print("No per priority latencies found")
return True
return retval
def check_prio_latencies(self, jsondata, clat=True, plus=False):
- """Check consistency of high/low priority latencies.
+ """Check consistency of per priority latencies.
clat True if we should check clat data; other check lat data
plus True if we have json+ format data where additional checks can
"""
if clat:
- high = 'clat_high_prio'
- low = 'clat_low_prio'
- combined = 'clat_ns'
+ obj = combined = 'clat_ns'
else:
- high = 'lat_high_prio'
- low = 'lat_low_prio'
- combined = 'lat_ns'
+ obj = combined = 'lat_ns'
- if not high in jsondata or not low in jsondata or not combined in jsondata:
- print("Error identifying high/low priority latencies")
+ if not 'prios' in jsondata or not combined in jsondata:
+ print("Error identifying per priority latencies")
return False
- if jsondata[high]['N'] + jsondata[low]['N'] != jsondata[combined]['N']:
- print("High %d + low %d != combined sample size %d" % \
- (jsondata[high]['N'], jsondata[low]['N'], jsondata[combined]['N']))
+ sum_sample_size = sum([x[obj]['N'] for x in jsondata['prios']])
+ if sum_sample_size != jsondata[combined]['N']:
+ print("Per prio sample size sum %d != combined sample size %d" %
+ (sum_sample_size, jsondata[combined]['N']))
return False
elif self.debug:
- print("High %d + low %d == combined sample size %d" % \
- (jsondata[high]['N'], jsondata[low]['N'], jsondata[combined]['N']))
+ print("Per prio sample size sum %d == combined sample size %d" %
+ (sum_sample_size, jsondata[combined]['N']))
- if min(jsondata[high]['min'], jsondata[low]['min']) != jsondata[combined]['min']:
- print("Min of high %d, low %d min latencies does not match min %d from combined data" % \
- (jsondata[high]['min'], jsondata[low]['min'], jsondata[combined]['min']))
+ min_val = min([x[obj]['min'] for x in jsondata['prios']])
+ if min_val != jsondata[combined]['min']:
+ print("Min per prio min latency %d does not match min %d from combined data" %
+ (min_val, jsondata[combined]['min']))
return False
elif self.debug:
- print("Min of high %d, low %d min latencies matches min %d from combined data" % \
- (jsondata[high]['min'], jsondata[low]['min'], jsondata[combined]['min']))
+ print("Min per prio min latency %d matches min %d from combined data" %
+ (min_val, jsondata[combined]['min']))
- if max(jsondata[high]['max'], jsondata[low]['max']) != jsondata[combined]['max']:
- print("Max of high %d, low %d max latencies does not match max %d from combined data" % \
- (jsondata[high]['max'], jsondata[low]['max'], jsondata[combined]['max']))
+ max_val = max([x[obj]['max'] for x in jsondata['prios']])
+ if max_val != jsondata[combined]['max']:
+ print("Max per prio max latency %d does not match max %d from combined data" %
+ (max_val, jsondata[combined]['max']))
return False
elif self.debug:
- print("Max of high %d, low %d max latencies matches max %d from combined data" % \
- (jsondata[high]['max'], jsondata[low]['max'], jsondata[combined]['max']))
+ print("Max per prio max latency %d matches max %d from combined data" %
+ (max_val, jsondata[combined]['max']))
- weighted_avg = (jsondata[high]['mean'] * jsondata[high]['N'] + \
- jsondata[low]['mean'] * jsondata[low]['N']) / jsondata[combined]['N']
+ weighted_vals = [x[obj]['mean'] * x[obj]['N'] for x in jsondata['prios']]
+ weighted_avg = sum(weighted_vals) / jsondata[combined]['N']
delta = abs(weighted_avg - jsondata[combined]['mean'])
if (delta / jsondata[combined]['mean']) > 0.0001:
- print("Difference between weighted average %f of high, low means "
+ print("Difference between merged per prio weighted average %f mean "
"and actual mean %f exceeds 0.01%%" % (weighted_avg, jsondata[combined]['mean']))
return False
elif self.debug:
- print("Weighted average %f of high, low means matches actual mean %f" % \
- (weighted_avg, jsondata[combined]['mean']))
+ print("Merged per prio weighted average %f mean matches actual mean %f" %
+ (weighted_avg, jsondata[combined]['mean']))
if plus:
- if not self.check_jsonplus(jsondata[high]):
- return False
- if not self.check_jsonplus(jsondata[low]):
- return False
+ for prio in jsondata['prios']:
+ if not self.check_jsonplus(prio[obj]):
+ return False
- bins = {**jsondata[high]['bins'], **jsondata[low]['bins']}
- for duration in bins.keys():
- if duration in jsondata[high]['bins'] and duration in jsondata[low]['bins']:
- bins[duration] = jsondata[high]['bins'][duration] + \
- jsondata[low]['bins'][duration]
+ counter = Counter()
+ for prio in jsondata['prios']:
+ counter.update(prio[obj]['bins'])
+
+ bins = dict(counter)
if len(bins) != len(jsondata[combined]['bins']):
- print("Number of combined high/low bins does not match number of overall bins")
+ print("Number of merged bins %d does not match number of overall bins %d" %
+ (len(bins), len(jsondata[combined]['bins'])))
return False
elif self.debug:
- print("Number of bins from merged high/low data matches number of overall bins")
+ print("Number of merged bins %d matches number of overall bins %d" %
+ (len(bins), len(jsondata[combined]['bins'])))
for duration in bins.keys():
if bins[duration] != jsondata[combined]['bins'][duration]:
- print("Merged high/low count does not match overall count for duration %d" \
- % duration)
+ print("Merged per prio count does not match overall count for duration %d" %
+ duration)
return False
- print("Merged high/low priority latency data match combined latency data")
+ print("Merged per priority latency data match combined latency data")
return True
def check(self):
print("Unexpected trim data found in output")
retval = False
if not self.check_nocmdprio_lat(job):
- print("Unexpected high/low priority latencies found")
+ print("Unexpected per priority latencies found")
retval = False
retval &= self.check_latencies(job['read'], 0, slat=False)
print("Unexpected trim data found in output")
retval = False
if not self.check_nocmdprio_lat(job):
- print("Unexpected high/low priority latencies found")
+ print("Unexpected per priority latencies found")
retval = False
retval &= self.check_latencies(job['write'], 1, slat=False, clat=False)
print("Unexpected write data found in output")
retval = False
if not self.check_nocmdprio_lat(job):
- print("Unexpected high/low priority latencies found")
+ print("Unexpected per priority latencies found")
retval = False
retval &= self.check_latencies(job['trim'], 2, slat=False, tlat=False)
print("Unexpected trim data found in output")
retval = False
if not self.check_nocmdprio_lat(job):
- print("Unexpected high/low priority latencies found")
+ print("Unexpected per priority latencies found")
retval = False
retval &= self.check_latencies(job['read'], 0, plus=True)
print("Unexpected trim data found in output")
retval = False
if not self.check_nocmdprio_lat(job):
- print("Unexpected high/low priority latencies found")
+ print("Unexpected per priority latencies found")
retval = False
retval &= self.check_latencies(job['write'], 1, slat=False, plus=True)
print("Unexpected trim data found in output")
retval = False
if not self.check_nocmdprio_lat(job):
- print("Unexpected high/low priority latencies found")
+ print("Unexpected per priority latencies found")
retval = False
retval &= self.check_latencies(job['read'], 0, slat=False, tlat=False, plus=True)
print("Unexpected trim data found in output")
retval = False
if not self.check_nocmdprio_lat(job):
- print("Unexpected high/low priority latencies found")
+ print("Unexpected per priority latencies found")
retval = False
retval &= self.check_latencies(job['read'], 0, clat=False, tlat=False, plus=True)
job = self.json_data['jobs'][0]
retval = True
- if 'read' in job or 'write'in job or 'trim' in job:
+ if 'read' in job or 'write' in job or 'trim' in job:
print("Unexpected data direction found in fio output")
retval = False
if not self.check_nocmdprio_lat(job):
- print("Unexpected high/low priority latencies found")
+ print("Unexpected per priority latencies found")
retval = False
retval &= self.check_latencies(job['mixed'], 0, plus=True, unified=True)
print("Error checking fsync latency data")
retval = False
if not self.check_nocmdprio_lat(job):
- print("Unexpected high/low priority latencies found")
+ print("Unexpected per priority latencies found")
retval = False
retval &= self.check_latencies(job['write'], 1, slat=False, plus=True)
print("Unexpected trim data found in output")
retval = False
if not self.check_nocmdprio_lat(job):
- print("Unexpected high/low priority latencies found")
+ print("Unexpected per priority latencies found")
retval = False
retval &= self.check_latencies(job['read'], 0, plus=True)
print("Unexpected trim data found in output")
retval = False
if not self.check_nocmdprio_lat(job):
- print("Unexpected high/low priority latencies found")
+ print("Unexpected per priority latencies found")
retval = False
retval &= self.check_latencies(job['read'], 0, slat=False, clat=False, plus=True)
job = self.json_data['jobs'][0]
retval = True
- if 'read' in job or 'write'in job or 'trim' in job:
+ if 'read' in job or 'write' in job or 'trim' in job:
print("Unexpected data direction found in fio output")
retval = False
return retval
+class Test021(FioLatTest):
+ """Test object for Test 21."""
+
+ def check(self):
+ """Check Test 21 output."""
+
+ job = self.json_data['jobs'][0]
+
+ retval = True
+ if not self.check_empty(job['trim']):
+ print("Unexpected trim data found in output")
+ retval = False
+
+ retval &= self.check_latencies(job['read'], 0, slat=False, tlat=False, plus=True)
+ retval &= self.check_latencies(job['write'], 1, slat=False, tlat=False, plus=True)
+ retval &= self.check_prio_latencies(job['read'], clat=True, plus=True)
+ retval &= self.check_prio_latencies(job['write'], clat=True, plus=True)
+
+ return retval
+
+
def parse_args():
"""Parse command-line arguments."""
# randread, null
# enable slat, clat, lat
# only clat and lat will appear because
- # because the null ioengine is syncrhonous
+ # because the null ioengine is synchronous
"test_id": 1,
"runtime": 2,
"output-format": "json",
{
# randread, aio
# enable slat, clat, lat
- # all will appear because liaio is asynchronous
+ # all will appear because libaio is asynchronous
"test_id": 4,
"runtime": 5,
"output-format": "json+",
# randread, null
# enable slat, clat, lat
# only clat and lat will appear because
- # because the null ioengine is syncrhonous
- # same as Test 1 except
- # numjobs = 4 to test sum_thread_stats() changes
+ # because the null ioengine is synchronous
+ # same as Test 1 except add numjobs = 4 to test
+ # sum_thread_stats() changes
"test_id": 12,
"runtime": 2,
"output-format": "json",
{
# randread, aio
# enable slat, clat, lat
- # all will appear because liaio is asynchronous
- # same as Test 4 except
- # numjobs = 4 to test sum_thread_stats() changes
+ # all will appear because libaio is asynchronous
+ # same as Test 4 except add numjobs = 4 to test
+ # sum_thread_stats() changes
"test_id": 13,
"runtime": 5,
"output-format": "json+",
{
# 50/50 r/w, aio, unified_rw_reporting
# enable slat, clat, lata
- # same as Test 8 except
- # numjobs = 4 to test sum_thread_stats() changes
+ # same as Test 8 except add numjobs = 4 to test
+ # sum_thread_stats() changes
"test_id": 14,
"runtime": 5,
"output-format": "json+",
{
# randread, aio
# enable slat, clat, lat
- # all will appear because liaio is asynchronous
+ # all will appear because libaio is asynchronous
# same as Test 4 except add cmdprio_percentage
"test_id": 15,
"runtime": 5,
{
# 50/50 r/w, aio, unified_rw_reporting
# enable slat, clat, lat
- # same as Test 19 except
- # add numjobs = 4 to test sum_thread_stats() changes
+ # same as Test 19 except add numjobs = 4 to test
+ # sum_thread_stats() changes
"test_id": 20,
"runtime": 5,
"output-format": "json+",
'numjobs': 4,
"test_obj": Test019,
},
+ {
+ # r/w, aio
+ # enable only clat
+ # test bssplit and cmdprio_bssplit
+ "test_id": 21,
+ "runtime": 5,
+ "output-format": "json+",
+ "slat_percentiles": 0,
+ "clat_percentiles": 1,
+ "lat_percentiles": 0,
+ "ioengine": aio,
+ 'rw': 'randrw',
+ 'bssplit': '64k/40:1024k/60',
+ 'cmdprio_bssplit': '64k/25/1/1:64k/75/3/2:1024k/0',
+ "test_obj": Test021,
+ },
+ {
+ # r/w, aio
+ # enable only clat
+ # same as Test 21 except add numjobs = 4 to test
+ # sum_thread_stats() changes
+ "test_id": 22,
+ "runtime": 5,
+ "output-format": "json+",
+ "slat_percentiles": 0,
+ "clat_percentiles": 1,
+ "lat_percentiles": 0,
+ "ioengine": aio,
+ 'rw': 'randrw',
+ 'bssplit': '64k/40:1024k/60',
+ 'cmdprio_bssplit': '64k/25/1/1:64k/75/3/2:1024k/0',
+ 'numjobs': 4,
+ "test_obj": Test021,
+ },
]
passed = 0
(args.run_only and test['test_id'] not in args.run_only):
skipped = skipped + 1
outcome = 'SKIPPED (User request)'
- elif (platform.system() != 'Linux' or os.geteuid() != 0) and 'cmdprio_percentage' in test:
+ elif (platform.system() != 'Linux' or os.geteuid() != 0) and \
+ ('cmdprio_percentage' in test or 'cmdprio_bssplit' in test):
skipped = skipped + 1
- outcome = 'SKIPPED (Linux root required for cmdprio_percentage tests)'
+ outcome = 'SKIPPED (Linux root required for cmdprio tests)'
else:
test_obj = test['test_obj'](artifact_root, test, args.debug)
status = test_obj.run_fio(fio)
local sed_str='s/.*len \([0-9A-Za-z]*\), cap \([0-9A-Za-z]*\).*/\1 \2/p'
local cap bs="$zone_size"
- # When blkzone is not available or blkzone does not report capacity,
+ # When blkzone command is neither available nor relevant to the
+ # test device, or when blkzone command does not report capacity,
# assume that zone capacity is same as zone size for all zones.
- if [ -z "${blkzone}" ] || ! blkzone_reports_capacity "${dev}"; then
+ if [ -z "${blkzone}" ] || [ -z "$is_zbd" ] || [ -c "$dev" ] ||
+ ! blkzone_reports_capacity "${dev}"; then
echo "$zone_size"
return
fi
unsigned long long val2[ZONESPLIT_MAX];
};
+struct split_prio {
+ uint64_t bs;
+ int32_t prio;
+ uint32_t perc;
+};
+
struct bssplit {
uint64_t bs;
uint32_t perc;
extern int split_parse_ddir(struct thread_options *o, struct split *split,
char *str, bool absolute, unsigned int max_splits);
+extern int split_parse_prio_ddir(struct thread_options *o,
+ struct split_prio **entries, int *nr_entries,
+ char *str);
+
#endif
#include "pshared.h"
#include "zbd.h"
+static bool is_valid_offset(const struct fio_file *f, uint64_t offset)
+{
+ return (uint64_t)(offset - f->file_offset) < f->io_size;
+}
+
+static inline unsigned int zbd_zone_idx(const struct fio_file *f,
+ struct fio_zone_info *zone)
+{
+ return zone - f->zbd_info->zone_info;
+}
+
+/**
+ * zbd_offset_to_zone_idx - convert an offset into a zone number
+ * @f: file pointer.
+ * @offset: offset in bytes. If this offset is in the first zone_size bytes
+ * past the disk size then the index of the sentinel is returned.
+ */
+static unsigned int zbd_offset_to_zone_idx(const struct fio_file *f,
+ uint64_t offset)
+{
+ uint32_t zone_idx;
+
+ if (f->zbd_info->zone_size_log2 > 0)
+ zone_idx = offset >> f->zbd_info->zone_size_log2;
+ else
+ zone_idx = offset / f->zbd_info->zone_size;
+
+ return min(zone_idx, f->zbd_info->nr_zones);
+}
+
+/**
+ * zbd_zone_end - Return zone end location
+ * @z: zone info pointer.
+ */
+static inline uint64_t zbd_zone_end(const struct fio_zone_info *z)
+{
+ return (z+1)->start;
+}
+
+/**
+ * zbd_zone_capacity_end - Return zone capacity limit end location
+ * @z: zone info pointer.
+ */
+static inline uint64_t zbd_zone_capacity_end(const struct fio_zone_info *z)
+{
+ return z->start + z->capacity;
+}
+
+/**
+ * zbd_zone_full - verify whether a minimum number of bytes remain in a zone
+ * @f: file pointer.
+ * @z: zone info pointer.
+ * @required: minimum number of bytes that must remain in a zone.
+ *
+ * The caller must hold z->mutex.
+ */
+static bool zbd_zone_full(const struct fio_file *f, struct fio_zone_info *z,
+ uint64_t required)
+{
+ assert((required & 511) == 0);
+
+ return z->has_wp &&
+ z->wp + required > zbd_zone_capacity_end(z);
+}
+
+static void zone_lock(struct thread_data *td, const struct fio_file *f,
+ struct fio_zone_info *z)
+{
+ struct zoned_block_device_info *zbd = f->zbd_info;
+ uint32_t nz = z - zbd->zone_info;
+
+ /* A thread should never lock zones outside its working area. */
+ assert(f->min_zone <= nz && nz < f->max_zone);
+
+ assert(z->has_wp);
+
+ /*
+ * Lock the io_u target zone. The zone will be unlocked if io_u offset
+ * is changed or when io_u completes and zbd_put_io() executed.
+ * To avoid multiple jobs doing asynchronous I/Os from deadlocking each
+ * other waiting for zone locks when building an io_u batch, first
+ * only trylock the zone. If the zone is already locked by another job,
+ * process the currently queued I/Os so that I/O progress is made and
+ * zones unlocked.
+ */
+ if (pthread_mutex_trylock(&z->mutex) != 0) {
+ if (!td_ioengine_flagged(td, FIO_SYNCIO))
+ io_u_quiesce(td);
+ pthread_mutex_lock(&z->mutex);
+ }
+}
+
+static inline void zone_unlock(struct fio_zone_info *z)
+{
+ int ret;
+
+ assert(z->has_wp);
+ ret = pthread_mutex_unlock(&z->mutex);
+ assert(!ret);
+}
+
+static inline struct fio_zone_info *zbd_get_zone(const struct fio_file *f,
+ unsigned int zone_idx)
+{
+ return &f->zbd_info->zone_info[zone_idx];
+}
+
+static inline struct fio_zone_info *
+zbd_offset_to_zone(const struct fio_file *f, uint64_t offset)
+{
+ return zbd_get_zone(f, zbd_offset_to_zone_idx(f, offset));
+}
+
/**
* zbd_get_zoned_model - Get a device zoned model
* @td: FIO thread data
* @f: FIO file for which to get model information
*/
-int zbd_get_zoned_model(struct thread_data *td, struct fio_file *f,
- enum zbd_zoned_model *model)
+static int zbd_get_zoned_model(struct thread_data *td, struct fio_file *f,
+ enum zbd_zoned_model *model)
{
int ret;
* upon failure. If the zone report is empty, always assume an error (device
* problem) and return -EIO.
*/
-int zbd_report_zones(struct thread_data *td, struct fio_file *f,
- uint64_t offset, struct zbd_zone *zones,
- unsigned int nr_zones)
+static int zbd_report_zones(struct thread_data *td, struct fio_file *f,
+ uint64_t offset, struct zbd_zone *zones,
+ unsigned int nr_zones)
{
int ret;
* Reset the write pointer of all zones in the range @offset...@offset+@length.
* Returns 0 upon success and a negative error code upon failure.
*/
-int zbd_reset_wp(struct thread_data *td, struct fio_file *f,
- uint64_t offset, uint64_t length)
+static int zbd_reset_wp(struct thread_data *td, struct fio_file *f,
+ uint64_t offset, uint64_t length)
{
int ret;
}
/**
- * zbd_get_max_open_zones - Get the maximum number of open zones
- * @td: FIO thread data
- * @f: FIO file for which to get max open zones
- * @max_open_zones: Upon success, result will be stored here.
- *
- * A @max_open_zones value set to zero means no limit.
+ * zbd_reset_zone - reset the write pointer of a single zone
+ * @td: FIO thread data.
+ * @f: FIO file associated with the disk for which to reset a write pointer.
+ * @z: Zone to reset.
*
* Returns 0 upon success and a negative error code upon failure.
+ *
+ * The caller must hold z->mutex.
*/
-int zbd_get_max_open_zones(struct thread_data *td, struct fio_file *f,
- unsigned int *max_open_zones)
+static int zbd_reset_zone(struct thread_data *td, struct fio_file *f,
+ struct fio_zone_info *z)
{
- int ret;
+ uint64_t offset = z->start;
+ uint64_t length = (z+1)->start - offset;
+ uint64_t data_in_zone = z->wp - z->start;
+ int ret = 0;
- if (td->io_ops && td->io_ops->get_max_open_zones)
- ret = td->io_ops->get_max_open_zones(td, f, max_open_zones);
- else
- ret = blkzoned_get_max_open_zones(td, f, max_open_zones);
- if (ret < 0) {
- td_verror(td, errno, "get max open zones failed");
- log_err("%s: get max open zones failed (%d).\n",
- f->file_name, errno);
+ if (!data_in_zone)
+ return 0;
+
+ assert(is_valid_offset(f, offset + length - 1));
+
+ dprint(FD_ZBD, "%s: resetting wp of zone %u.\n",
+ f->file_name, zbd_zone_idx(f, z));
+
+ switch (f->zbd_info->model) {
+ case ZBD_HOST_AWARE:
+ case ZBD_HOST_MANAGED:
+ ret = zbd_reset_wp(td, f, offset, length);
+ if (ret < 0)
+ return ret;
+ break;
+ default:
+ break;
}
+ pthread_mutex_lock(&f->zbd_info->mutex);
+ f->zbd_info->sectors_with_data -= data_in_zone;
+ f->zbd_info->wp_sectors_with_data -= data_in_zone;
+ pthread_mutex_unlock(&f->zbd_info->mutex);
+
+ z->wp = z->start;
+ z->verify_block = 0;
+
+ td->ts.nr_zone_resets++;
+
return ret;
}
/**
- * zbd_zone_idx - convert an offset into a zone number
- * @f: file pointer.
- * @offset: offset in bytes. If this offset is in the first zone_size bytes
- * past the disk size then the index of the sentinel is returned.
+ * zbd_close_zone - Remove a zone from the open zones array.
+ * @td: FIO thread data.
+ * @f: FIO file associated with the disk for which to reset a write pointer.
+ * @zone_idx: Index of the zone to remove.
+ *
+ * The caller must hold f->zbd_info->mutex.
*/
-static uint32_t zbd_zone_idx(const struct fio_file *f, uint64_t offset)
+static void zbd_close_zone(struct thread_data *td, const struct fio_file *f,
+ struct fio_zone_info *z)
{
- uint32_t zone_idx;
+ uint32_t ozi;
- if (f->zbd_info->zone_size_log2 > 0)
- zone_idx = offset >> f->zbd_info->zone_size_log2;
- else
- zone_idx = offset / f->zbd_info->zone_size;
+ if (!z->open)
+ return;
- return min(zone_idx, f->zbd_info->nr_zones);
-}
+ for (ozi = 0; ozi < f->zbd_info->num_open_zones; ozi++) {
+ if (zbd_get_zone(f, f->zbd_info->open_zones[ozi]) == z)
+ break;
+ }
+ if (ozi == f->zbd_info->num_open_zones)
+ return;
-/**
- * zbd_zone_end - Return zone end location
- * @z: zone info pointer.
- */
-static inline uint64_t zbd_zone_end(const struct fio_zone_info *z)
-{
- return (z+1)->start;
+ dprint(FD_ZBD, "%s: closing zone %u\n",
+ f->file_name, zbd_zone_idx(f, z));
+
+ memmove(f->zbd_info->open_zones + ozi,
+ f->zbd_info->open_zones + ozi + 1,
+ (ZBD_MAX_OPEN_ZONES - (ozi + 1)) *
+ sizeof(f->zbd_info->open_zones[0]));
+
+ f->zbd_info->num_open_zones--;
+ td->num_open_zones--;
+ z->open = 0;
}
/**
- * zbd_zone_capacity_end - Return zone capacity limit end location
- * @z: zone info pointer.
+ * zbd_reset_zones - Reset a range of zones.
+ * @td: fio thread data.
+ * @f: fio file for which to reset zones
+ * @zb: first zone to reset.
+ * @ze: first zone not to reset.
+ *
+ * Returns 0 upon success and 1 upon failure.
*/
-static inline uint64_t zbd_zone_capacity_end(const struct fio_zone_info *z)
+static int zbd_reset_zones(struct thread_data *td, struct fio_file *f,
+ struct fio_zone_info *const zb,
+ struct fio_zone_info *const ze)
{
- return z->start + z->capacity;
+ struct fio_zone_info *z;
+ const uint64_t min_bs = td->o.min_bs[DDIR_WRITE];
+ int res = 0;
+
+ assert(min_bs);
+
+ dprint(FD_ZBD, "%s: examining zones %u .. %u\n",
+ f->file_name, zbd_zone_idx(f, zb), zbd_zone_idx(f, ze));
+
+ for (z = zb; z < ze; z++) {
+ if (!z->has_wp)
+ continue;
+
+ zone_lock(td, f, z);
+ pthread_mutex_lock(&f->zbd_info->mutex);
+ zbd_close_zone(td, f, z);
+ pthread_mutex_unlock(&f->zbd_info->mutex);
+
+ if (z->wp != z->start) {
+ dprint(FD_ZBD, "%s: resetting zone %u\n",
+ f->file_name, zbd_zone_idx(f, z));
+ if (zbd_reset_zone(td, f, z) < 0)
+ res = 1;
+ }
+
+ zone_unlock(z);
+ }
+
+ return res;
}
/**
- * zbd_zone_full - verify whether a minimum number of bytes remain in a zone
- * @f: file pointer.
- * @z: zone info pointer.
- * @required: minimum number of bytes that must remain in a zone.
+ * zbd_get_max_open_zones - Get the maximum number of open zones
+ * @td: FIO thread data
+ * @f: FIO file for which to get max open zones
+ * @max_open_zones: Upon success, result will be stored here.
*
- * The caller must hold z->mutex.
+ * A @max_open_zones value set to zero means no limit.
+ *
+ * Returns 0 upon success and a negative error code upon failure.
*/
-static bool zbd_zone_full(const struct fio_file *f, struct fio_zone_info *z,
- uint64_t required)
+static int zbd_get_max_open_zones(struct thread_data *td, struct fio_file *f,
+ unsigned int *max_open_zones)
{
- assert((required & 511) == 0);
+ int ret;
- return z->has_wp &&
- z->wp + required > zbd_zone_capacity_end(z);
+ if (td->io_ops && td->io_ops->get_max_open_zones)
+ ret = td->io_ops->get_max_open_zones(td, f, max_open_zones);
+ else
+ ret = blkzoned_get_max_open_zones(td, f, max_open_zones);
+ if (ret < 0) {
+ td_verror(td, errno, "get max open zones failed");
+ log_err("%s: get max open zones failed (%d).\n",
+ f->file_name, errno);
+ }
+
+ return ret;
}
-static void zone_lock(struct thread_data *td, const struct fio_file *f,
- struct fio_zone_info *z)
+/**
+ * zbd_open_zone - Add a zone to the array of open zones.
+ * @td: fio thread data.
+ * @f: fio file that has the open zones to add.
+ * @zone_idx: Index of the zone to add.
+ *
+ * Open a ZBD zone if it is not already open. Returns true if either the zone
+ * was already open or if the zone was successfully added to the array of open
+ * zones without exceeding the maximum number of open zones. Returns false if
+ * the zone was not already open and opening the zone would cause the zone limit
+ * to be exceeded.
+ */
+static bool zbd_open_zone(struct thread_data *td, const struct fio_file *f,
+ struct fio_zone_info *z)
{
- struct zoned_block_device_info *zbd = f->zbd_info;
- uint32_t nz = z - zbd->zone_info;
+ const uint64_t min_bs = td->o.min_bs[DDIR_WRITE];
+ struct zoned_block_device_info *zbdi = f->zbd_info;
+ uint32_t zone_idx = zbd_zone_idx(f, z);
+ bool res = true;
- /* A thread should never lock zones outside its working area. */
- assert(f->min_zone <= nz && nz < f->max_zone);
+ if (z->cond == ZBD_ZONE_COND_OFFLINE)
+ return false;
- assert(z->has_wp);
+ /*
+ * Skip full zones with data verification enabled because resetting a
+ * zone causes data loss and hence causes verification to fail.
+ */
+ if (td->o.verify != VERIFY_NONE && zbd_zone_full(f, z, min_bs))
+ return false;
/*
- * Lock the io_u target zone. The zone will be unlocked if io_u offset
- * is changed or when io_u completes and zbd_put_io() executed.
- * To avoid multiple jobs doing asynchronous I/Os from deadlocking each
- * other waiting for zone locks when building an io_u batch, first
- * only trylock the zone. If the zone is already locked by another job,
- * process the currently queued I/Os so that I/O progress is made and
- * zones unlocked.
+ * zbdi->max_open_zones == 0 means that there is no limit on the maximum
+ * number of open zones. In this case, do no track open zones in
+ * zbdi->open_zones array.
*/
- if (pthread_mutex_trylock(&z->mutex) != 0) {
- if (!td_ioengine_flagged(td, FIO_SYNCIO))
- io_u_quiesce(td);
- pthread_mutex_lock(&z->mutex);
+ if (!zbdi->max_open_zones)
+ return true;
+
+ pthread_mutex_lock(&zbdi->mutex);
+
+ if (z->open) {
+ /*
+ * If the zone is going to be completely filled by writes
+ * already in-flight, handle it as a full zone instead of an
+ * open zone.
+ */
+ if (z->wp >= zbd_zone_capacity_end(z))
+ res = false;
+ goto out;
}
-}
-static inline void zone_unlock(struct fio_zone_info *z)
-{
- int ret;
+ res = false;
+ /* Zero means no limit */
+ if (td->o.job_max_open_zones > 0 &&
+ td->num_open_zones >= td->o.job_max_open_zones)
+ goto out;
+ if (zbdi->num_open_zones >= zbdi->max_open_zones)
+ goto out;
- assert(z->has_wp);
- ret = pthread_mutex_unlock(&z->mutex);
- assert(!ret);
-}
+ dprint(FD_ZBD, "%s: opening zone %u\n",
+ f->file_name, zone_idx);
-static bool is_valid_offset(const struct fio_file *f, uint64_t offset)
-{
- return (uint64_t)(offset - f->file_offset) < f->io_size;
-}
+ zbdi->open_zones[zbdi->num_open_zones++] = zone_idx;
+ td->num_open_zones++;
+ z->open = 1;
+ res = true;
-static inline struct fio_zone_info *get_zone(const struct fio_file *f,
- unsigned int zone_nr)
-{
- return &f->zbd_info->zone_info[zone_nr];
+out:
+ pthread_mutex_unlock(&zbdi->mutex);
+ return res;
}
/* Verify whether direct I/O is used for all host-managed zoned drives. */
uint32_t zone_idx, zone_idx_b, zone_idx_e;
assert(f->zbd_info);
+
if (f->io_size == 0)
return false;
- zone_idx_b = zbd_zone_idx(f, f->file_offset);
- zone_idx_e = zbd_zone_idx(f, f->file_offset + f->io_size - 1);
+
+ zone_idx_b = zbd_offset_to_zone_idx(f, f->file_offset);
+ zone_idx_e =
+ zbd_offset_to_zone_idx(f, f->file_offset + f->io_size - 1);
for (zone_idx = zone_idx_b; zone_idx <= zone_idx_e; zone_idx++)
- if (get_zone(f, zone_idx)->has_wp)
+ if (zbd_get_zone(f, zone_idx)->has_wp)
return true;
- return false;
+ return false;
+}
+
+/*
+ * Verify whether the file offset and size parameters are aligned with zone
+ * boundaries. If the file offset is not aligned, align it down to the start of
+ * the zone containing the start offset and align up the file io_size parameter.
+ */
+static bool zbd_zone_align_file_sizes(struct thread_data *td,
+ struct fio_file *f)
+{
+ const struct fio_zone_info *z;
+ uint64_t new_offset, new_end;
+
+ if (!f->zbd_info)
+ return true;
+ if (f->file_offset >= f->real_file_size)
+ return true;
+ if (!zbd_is_seq_job(f))
+ return true;
+
+ if (!td->o.zone_size) {
+ td->o.zone_size = f->zbd_info->zone_size;
+ if (!td->o.zone_size) {
+ log_err("%s: invalid 0 zone size\n",
+ f->file_name);
+ return false;
+ }
+ } else if (td->o.zone_size != f->zbd_info->zone_size) {
+ log_err("%s: zonesize %llu does not match the device zone size %"PRIu64".\n",
+ f->file_name, td->o.zone_size,
+ f->zbd_info->zone_size);
+ return false;
+ }
+
+ if (td->o.zone_skip % td->o.zone_size) {
+ log_err("%s: zoneskip %llu is not a multiple of the device zone size %llu.\n",
+ f->file_name, td->o.zone_skip,
+ td->o.zone_size);
+ return false;
+ }
+
+ z = zbd_offset_to_zone(f, f->file_offset);
+ if ((f->file_offset != z->start) &&
+ (td->o.td_ddir != TD_DDIR_READ)) {
+ new_offset = zbd_zone_end(z);
+ if (new_offset >= f->file_offset + f->io_size) {
+ log_info("%s: io_size must be at least one zone\n",
+ f->file_name);
+ return false;
+ }
+ log_info("%s: rounded up offset from %"PRIu64" to %"PRIu64"\n",
+ f->file_name, f->file_offset,
+ new_offset);
+ f->io_size -= (new_offset - f->file_offset);
+ f->file_offset = new_offset;
+ }
+
+ z = zbd_offset_to_zone(f, f->file_offset + f->io_size);
+ new_end = z->start;
+ if ((td->o.td_ddir != TD_DDIR_READ) &&
+ (f->file_offset + f->io_size != new_end)) {
+ if (new_end <= f->file_offset) {
+ log_info("%s: io_size must be at least one zone\n",
+ f->file_name);
+ return false;
+ }
+ log_info("%s: rounded down io_size from %"PRIu64" to %"PRIu64"\n",
+ f->file_name, f->io_size,
+ new_end - f->file_offset);
+ f->io_size = new_end - f->file_offset;
+ }
+
+ return true;
}
/*
*/
static bool zbd_verify_sizes(void)
{
- const struct fio_zone_info *z;
struct thread_data *td;
struct fio_file *f;
- uint64_t new_offset, new_end;
- uint32_t zone_idx;
int i, j;
for_each_td(td, i) {
for_each_file(td, f, j) {
- if (!f->zbd_info)
- continue;
- if (f->file_offset >= f->real_file_size)
- continue;
- if (!zbd_is_seq_job(f))
- continue;
-
- if (!td->o.zone_size) {
- td->o.zone_size = f->zbd_info->zone_size;
- if (!td->o.zone_size) {
- log_err("%s: invalid 0 zone size\n",
- f->file_name);
- return false;
- }
- } else if (td->o.zone_size != f->zbd_info->zone_size) {
- log_err("%s: job parameter zonesize %llu does not match disk zone size %"PRIu64".\n",
- f->file_name, td->o.zone_size,
- f->zbd_info->zone_size);
- return false;
- }
-
- if (td->o.zone_skip % td->o.zone_size) {
- log_err("%s: zoneskip %llu is not a multiple of the device zone size %llu.\n",
- f->file_name, td->o.zone_skip,
- td->o.zone_size);
+ if (!zbd_zone_align_file_sizes(td, f))
return false;
- }
-
- zone_idx = zbd_zone_idx(f, f->file_offset);
- z = get_zone(f, zone_idx);
- if ((f->file_offset != z->start) &&
- (td->o.td_ddir != TD_DDIR_READ)) {
- new_offset = zbd_zone_end(z);
- if (new_offset >= f->file_offset + f->io_size) {
- log_info("%s: io_size must be at least one zone\n",
- f->file_name);
- return false;
- }
- log_info("%s: rounded up offset from %"PRIu64" to %"PRIu64"\n",
- f->file_name, f->file_offset,
- new_offset);
- f->io_size -= (new_offset - f->file_offset);
- f->file_offset = new_offset;
- }
- zone_idx = zbd_zone_idx(f, f->file_offset + f->io_size);
- z = get_zone(f, zone_idx);
- new_end = z->start;
- if ((td->o.td_ddir != TD_DDIR_READ) &&
- (f->file_offset + f->io_size != new_end)) {
- if (new_end <= f->file_offset) {
- log_info("%s: io_size must be at least one zone\n",
- f->file_name);
- return false;
- }
- log_info("%s: rounded down io_size from %"PRIu64" to %"PRIu64"\n",
- f->file_name, f->io_size,
- new_end - f->file_offset);
- f->io_size = new_end - f->file_offset;
- }
}
}
if (!f->zbd_info)
continue;
+
zone_size = f->zbd_info->zone_size;
if (td_trim(td) && td->o.bs[DDIR_TRIM] != zone_size) {
log_info("%s: trim block size %llu is not the zone size %"PRIu64"\n",
goto out;
}
- dprint(FD_ZBD, "Device %s has %d zones of size %"PRIu64" KB\n", f->file_name,
- nr_zones, zone_size / 1024);
+ dprint(FD_ZBD, "Device %s has %d zones of size %"PRIu64" KB\n",
+ f->file_name, nr_zones, zone_size / 1024);
zbd_info = scalloc(1, sizeof(*zbd_info) +
(nr_zones + 1) * sizeof(zbd_info->zone_info[0]));
PTHREAD_MUTEX_RECURSIVE);
p->start = z->start;
p->capacity = z->capacity;
+
switch (z->cond) {
case ZBD_ZONE_COND_NOT_WP:
case ZBD_ZONE_COND_FULL:
offset = z->start + z->len;
if (j >= nr_zones)
break;
+
nrz = zbd_report_zones(td, f, offset, zones,
min((uint32_t)(nr_zones - j),
ZBD_REPORT_MAX_ZONES));
/* Ensure that the limit is not larger than FIO's internal limit */
if (zbd->max_open_zones > ZBD_MAX_OPEN_ZONES) {
td_verror(td, EINVAL, "'max_open_zones' value is too large");
- log_err("'max_open_zones' value is larger than %u\n", ZBD_MAX_OPEN_ZONES);
+ log_err("'max_open_zones' value is larger than %u\n",
+ ZBD_MAX_OPEN_ZONES);
return -EINVAL;
}
ret = zbd_create_zone_info(td, file);
if (ret < 0)
td_verror(td, -ret, "zbd_create_zone_info() failed");
+
return ret;
}
-static bool zbd_open_zone(struct thread_data *td, const struct fio_file *f,
- uint32_t zone_idx);
-static int zbd_reset_zone(struct thread_data *td, struct fio_file *f,
- struct fio_zone_info *z);
-
int zbd_init_files(struct thread_data *td)
{
struct fio_file *f;
if (zbd_init_zone_info(td, f))
return 1;
}
+
return 0;
}
for_each_file(td, f, i) {
struct zoned_block_device_info *zbd = f->zbd_info;
- // zonemode=strided doesn't get per-file zone size.
- uint64_t zone_size = zbd ? zbd->zone_size : td->o.zone_size;
+ uint64_t zone_size;
+ /* zonemode=strided doesn't get per-file zone size. */
+ zone_size = zbd ? zbd->zone_size : td->o.zone_size;
if (zone_size == 0)
continue;
- if (td->o.size_nz > 0) {
+ if (td->o.size_nz > 0)
td->o.size = td->o.size_nz * zone_size;
- }
- if (td->o.io_size_nz > 0) {
+ if (td->o.io_size_nz > 0)
td->o.io_size = td->o.io_size_nz * zone_size;
- }
- if (td->o.start_offset_nz > 0) {
+ if (td->o.start_offset_nz > 0)
td->o.start_offset = td->o.start_offset_nz * zone_size;
- }
- if (td->o.offset_increment_nz > 0) {
- td->o.offset_increment = td->o.offset_increment_nz * zone_size;
- }
- if (td->o.zone_skip_nz > 0) {
+ if (td->o.offset_increment_nz > 0)
+ td->o.offset_increment =
+ td->o.offset_increment_nz * zone_size;
+ if (td->o.zone_skip_nz > 0)
td->o.zone_skip = td->o.zone_skip_nz * zone_size;
- }
}
}
assert(zbd);
- f->min_zone = zbd_zone_idx(f, f->file_offset);
- f->max_zone = zbd_zone_idx(f, f->file_offset + f->io_size);
+ f->min_zone = zbd_offset_to_zone_idx(f, f->file_offset);
+ f->max_zone =
+ zbd_offset_to_zone_idx(f, f->file_offset + f->io_size);
/*
* When all zones in the I/O range are conventional, io_size
if (z->cond != ZBD_ZONE_COND_IMP_OPEN &&
z->cond != ZBD_ZONE_COND_EXP_OPEN)
continue;
- if (zbd_open_zone(td, f, zi))
+ if (zbd_open_zone(td, f, z))
continue;
/*
* If the number of open zones exceeds specified limits,
return 0;
}
-static inline unsigned int zbd_zone_nr(const struct fio_file *f,
- struct fio_zone_info *zone)
-{
- return zone - f->zbd_info->zone_info;
-}
-
-/**
- * zbd_reset_zone - reset the write pointer of a single zone
- * @td: FIO thread data.
- * @f: FIO file associated with the disk for which to reset a write pointer.
- * @z: Zone to reset.
- *
- * Returns 0 upon success and a negative error code upon failure.
- *
- * The caller must hold z->mutex.
- */
-static int zbd_reset_zone(struct thread_data *td, struct fio_file *f,
- struct fio_zone_info *z)
-{
- uint64_t offset = z->start;
- uint64_t length = (z+1)->start - offset;
- uint64_t data_in_zone = z->wp - z->start;
- int ret = 0;
-
- if (!data_in_zone)
- return 0;
-
- assert(is_valid_offset(f, offset + length - 1));
-
- dprint(FD_ZBD, "%s: resetting wp of zone %u.\n", f->file_name,
- zbd_zone_nr(f, z));
- switch (f->zbd_info->model) {
- case ZBD_HOST_AWARE:
- case ZBD_HOST_MANAGED:
- ret = zbd_reset_wp(td, f, offset, length);
- if (ret < 0)
- return ret;
- break;
- default:
- break;
- }
-
- pthread_mutex_lock(&f->zbd_info->mutex);
- f->zbd_info->sectors_with_data -= data_in_zone;
- f->zbd_info->wp_sectors_with_data -= data_in_zone;
- pthread_mutex_unlock(&f->zbd_info->mutex);
- z->wp = z->start;
- z->verify_block = 0;
-
- td->ts.nr_zone_resets++;
-
- return ret;
-}
-
-/* The caller must hold f->zbd_info->mutex */
-static void zbd_close_zone(struct thread_data *td, const struct fio_file *f,
- unsigned int zone_idx)
-{
- uint32_t open_zone_idx = 0;
-
- for (; open_zone_idx < f->zbd_info->num_open_zones; open_zone_idx++) {
- if (f->zbd_info->open_zones[open_zone_idx] == zone_idx)
- break;
- }
- if (open_zone_idx == f->zbd_info->num_open_zones)
- return;
-
- dprint(FD_ZBD, "%s: closing zone %d\n", f->file_name, zone_idx);
- memmove(f->zbd_info->open_zones + open_zone_idx,
- f->zbd_info->open_zones + open_zone_idx + 1,
- (ZBD_MAX_OPEN_ZONES - (open_zone_idx + 1)) *
- sizeof(f->zbd_info->open_zones[0]));
- f->zbd_info->num_open_zones--;
- td->num_open_zones--;
- get_zone(f, zone_idx)->open = 0;
-}
-
-/*
- * Reset a range of zones. Returns 0 upon success and 1 upon failure.
- * @td: fio thread data.
- * @f: fio file for which to reset zones
- * @zb: first zone to reset.
- * @ze: first zone not to reset.
- */
-static int zbd_reset_zones(struct thread_data *td, struct fio_file *f,
- struct fio_zone_info *const zb,
- struct fio_zone_info *const ze)
-{
- struct fio_zone_info *z;
- const uint64_t min_bs = td->o.min_bs[DDIR_WRITE];
- int res = 0;
-
- assert(min_bs);
-
- dprint(FD_ZBD, "%s: examining zones %u .. %u\n", f->file_name,
- zbd_zone_nr(f, zb), zbd_zone_nr(f, ze));
- for (z = zb; z < ze; z++) {
- uint32_t nz = zbd_zone_nr(f, z);
-
- if (!z->has_wp)
- continue;
- zone_lock(td, f, z);
- pthread_mutex_lock(&f->zbd_info->mutex);
- zbd_close_zone(td, f, nz);
- pthread_mutex_unlock(&f->zbd_info->mutex);
- if (z->wp != z->start) {
- dprint(FD_ZBD, "%s: resetting zone %u\n",
- f->file_name, zbd_zone_nr(f, z));
- if (zbd_reset_zone(td, f, z) < 0)
- res = 1;
- }
- zone_unlock(z);
- }
-
- return res;
-}
-
/*
* Reset zbd_info.write_cnt, the counter that counts down towards the next
* zone reset.
uint64_t swd = 0;
uint64_t wp_swd = 0;
- zb = get_zone(f, f->min_zone);
- ze = get_zone(f, f->max_zone);
+ zb = zbd_get_zone(f, f->min_zone);
+ ze = zbd_get_zone(f, f->max_zone);
for (z = zb; z < ze; z++) {
if (z->has_wp) {
zone_lock(td, f, z);
}
swd += z->wp - z->start;
}
+
pthread_mutex_lock(&f->zbd_info->mutex);
switch (a) {
case CHECK_SWD:
break;
}
pthread_mutex_unlock(&f->zbd_info->mutex);
+
for (z = zb; z < ze; z++)
if (z->has_wp)
zone_unlock(z);
if (!f->zbd_info || !td_write(td))
return;
- zb = get_zone(f, f->min_zone);
- ze = get_zone(f, f->max_zone);
+ zb = zbd_get_zone(f, f->min_zone);
+ ze = zbd_get_zone(f, f->max_zone);
swd = zbd_process_swd(td, f, SET_SWD);
- dprint(FD_ZBD, "%s(%s): swd = %" PRIu64 "\n", __func__, f->file_name,
- swd);
+
+ dprint(FD_ZBD, "%s(%s): swd = %" PRIu64 "\n",
+ __func__, f->file_name, swd);
+
/*
* If data verification is enabled reset the affected zones before
* writing any data to avoid that a zone reset has to be issued while
zbd_reset_write_cnt(td, f);
}
-/* The caller must hold f->zbd_info->mutex. */
-static bool is_zone_open(const struct thread_data *td, const struct fio_file *f,
- unsigned int zone_idx)
-{
- struct zoned_block_device_info *zbdi = f->zbd_info;
- int i;
-
- /* This function should never be called when zbdi->max_open_zones == 0 */
- assert(zbdi->max_open_zones);
- assert(td->o.job_max_open_zones == 0 || td->num_open_zones <= td->o.job_max_open_zones);
- assert(td->o.job_max_open_zones <= zbdi->max_open_zones);
- assert(zbdi->num_open_zones <= zbdi->max_open_zones);
-
- for (i = 0; i < zbdi->num_open_zones; i++)
- if (zbdi->open_zones[i] == zone_idx)
- return true;
-
- return false;
-}
-
-/*
- * Open a ZBD zone if it was not yet open. Returns true if either the zone was
- * already open or if opening a new zone is allowed. Returns false if the zone
- * was not yet open and opening a new zone would cause the zone limit to be
- * exceeded.
- */
-static bool zbd_open_zone(struct thread_data *td, const struct fio_file *f,
- uint32_t zone_idx)
-{
- const uint64_t min_bs = td->o.min_bs[DDIR_WRITE];
- struct zoned_block_device_info *zbdi = f->zbd_info;
- struct fio_zone_info *z = get_zone(f, zone_idx);
- bool res = true;
-
- if (z->cond == ZBD_ZONE_COND_OFFLINE)
- return false;
-
- /*
- * Skip full zones with data verification enabled because resetting a
- * zone causes data loss and hence causes verification to fail.
- */
- if (td->o.verify != VERIFY_NONE && zbd_zone_full(f, z, min_bs))
- return false;
-
- /*
- * zbdi->max_open_zones == 0 means that there is no limit on the maximum
- * number of open zones. In this case, do no track open zones in
- * zbdi->open_zones array.
- */
- if (!zbdi->max_open_zones)
- return true;
-
- pthread_mutex_lock(&zbdi->mutex);
- if (is_zone_open(td, f, zone_idx)) {
- /*
- * If the zone is already open and going to be full by writes
- * in-flight, handle it as a full zone instead of an open zone.
- */
- if (z->wp >= zbd_zone_capacity_end(z))
- res = false;
- goto out;
- }
- res = false;
- /* Zero means no limit */
- if (td->o.job_max_open_zones > 0 &&
- td->num_open_zones >= td->o.job_max_open_zones)
- goto out;
- if (zbdi->num_open_zones >= zbdi->max_open_zones)
- goto out;
- dprint(FD_ZBD, "%s: opening zone %d\n", f->file_name, zone_idx);
- zbdi->open_zones[zbdi->num_open_zones++] = zone_idx;
- td->num_open_zones++;
- z->open = 1;
- res = true;
-
-out:
- pthread_mutex_unlock(&zbdi->mutex);
- return res;
-}
-
/* Return random zone index for one of the open zones. */
static uint32_t pick_random_zone_idx(const struct fio_file *f,
const struct io_u *io_u)
{
- return (io_u->offset - f->file_offset) * f->zbd_info->num_open_zones /
- f->io_size;
+ return (io_u->offset - f->file_offset) *
+ f->zbd_info->num_open_zones / f->io_size;
}
static bool any_io_in_flight(void)
*/
zone_idx = zbdi->open_zones[pick_random_zone_idx(f, io_u)];
} else {
- zone_idx = zbd_zone_idx(f, io_u->offset);
+ zone_idx = zbd_offset_to_zone_idx(f, io_u->offset);
}
if (zone_idx < f->min_zone)
zone_idx = f->min_zone;
else if (zone_idx >= f->max_zone)
zone_idx = f->max_zone - 1;
- dprint(FD_ZBD, "%s(%s): starting from zone %d (offset %lld, buflen %lld)\n",
+
+ dprint(FD_ZBD,
+ "%s(%s): starting from zone %d (offset %lld, buflen %lld)\n",
__func__, f->file_name, zone_idx, io_u->offset, io_u->buflen);
/*
for (;;) {
uint32_t tmp_idx;
- z = get_zone(f, zone_idx);
+ z = zbd_get_zone(f, zone_idx);
if (z->has_wp)
zone_lock(td, f, z);
+
pthread_mutex_lock(&zbdi->mutex);
+
if (z->has_wp) {
if (z->cond != ZBD_ZONE_COND_OFFLINE &&
- zbdi->max_open_zones == 0 && td->o.job_max_open_zones == 0)
+ zbdi->max_open_zones == 0 &&
+ td->o.job_max_open_zones == 0)
goto examine_zone;
if (zbdi->num_open_zones == 0) {
dprint(FD_ZBD, "%s(%s): no zones are open\n",
}
/*
- * List of opened zones is per-device, shared across all threads.
- * Start with quasi-random candidate zone.
- * Ignore zones which don't belong to thread's offset/size area.
+ * List of opened zones is per-device, shared across all
+ * threads. Start with quasi-random candidate zone. Ignore
+ * zones which don't belong to thread's offset/size area.
*/
open_zone_idx = pick_random_zone_idx(f, io_u);
assert(!open_zone_idx ||
open_zone_idx < zbdi->num_open_zones);
tmp_idx = open_zone_idx;
+
for (i = 0; i < zbdi->num_open_zones; i++) {
uint32_t tmpz;
dprint(FD_ZBD, "%s(%s): no candidate zone\n",
__func__, f->file_name);
+
pthread_mutex_unlock(&zbdi->mutex);
+
if (z->has_wp)
zone_unlock(z);
+
return NULL;
found_candidate_zone:
if (new_zone_idx == zone_idx)
break;
zone_idx = new_zone_idx;
+
pthread_mutex_unlock(&zbdi->mutex);
+
if (z->has_wp)
zone_unlock(z);
}
* zone close before opening a new zone.
*/
if (wait_zone_close) {
- dprint(FD_ZBD, "%s(%s): quiesce to allow open zones to close\n",
+ dprint(FD_ZBD,
+ "%s(%s): quiesce to allow open zones to close\n",
__func__, f->file_name);
io_u_quiesce(td);
}
if (!is_valid_offset(f, z->start)) {
/* Wrap-around. */
zone_idx = f->min_zone;
- z = get_zone(f, zone_idx);
+ z = zbd_get_zone(f, zone_idx);
}
assert(is_valid_offset(f, z->start));
if (!z->has_wp)
zone_lock(td, f, z);
if (z->open)
continue;
- if (zbd_open_zone(td, f, zone_idx))
+ if (zbd_open_zone(td, f, z))
goto out;
}
pthread_mutex_unlock(&zbdi->mutex);
zone_unlock(z);
- z = get_zone(f, zone_idx);
+ z = zbd_get_zone(f, zone_idx);
zone_lock(td, f, z);
if (z->wp + min_bs <= zbd_zone_capacity_end(z))
*/
in_flight = any_io_in_flight();
if (in_flight || should_retry) {
- dprint(FD_ZBD, "%s(%s): wait zone close and retry open zones\n",
+ dprint(FD_ZBD,
+ "%s(%s): wait zone close and retry open zones\n",
__func__, f->file_name);
pthread_mutex_unlock(&zbdi->mutex);
zone_unlock(z);
}
pthread_mutex_unlock(&zbdi->mutex);
+
zone_unlock(z);
- dprint(FD_ZBD, "%s(%s): did not open another zone\n", __func__,
- f->file_name);
+
+ dprint(FD_ZBD, "%s(%s): did not open another zone\n",
+ __func__, f->file_name);
+
return NULL;
out:
- dprint(FD_ZBD, "%s(%s): returning zone %d\n", __func__, f->file_name,
- zone_idx);
+ dprint(FD_ZBD, "%s(%s): returning zone %d\n",
+ __func__, f->file_name, zone_idx);
+
io_u->offset = z->start;
assert(z->has_wp);
assert(z->cond != ZBD_ZONE_COND_OFFLINE);
+
return z;
}
const struct fio_file *f = io_u->file;
const uint64_t min_bs = td->o.min_bs[DDIR_WRITE];
- if (!zbd_open_zone(td, f, zbd_zone_nr(f, z))) {
+ if (!zbd_open_zone(td, f, z)) {
zone_unlock(z);
z = zbd_convert_to_open_zone(td, io_u);
assert(z);
}
if (z->verify_block * min_bs >= z->capacity) {
- log_err("%s: %d * %"PRIu64" >= %"PRIu64"\n", f->file_name, z->verify_block,
- min_bs, z->capacity);
+ log_err("%s: %d * %"PRIu64" >= %"PRIu64"\n",
+ f->file_name, z->verify_block, min_bs, z->capacity);
/*
* If the assertion below fails during a test run, adding
* "--experimental_verify=1" to the command line may help.
*/
assert(false);
}
+
io_u->offset = z->start + z->verify_block * min_bs;
if (io_u->offset + io_u->buflen >= zbd_zone_capacity_end(z)) {
- log_err("%s: %llu + %llu >= %"PRIu64"\n", f->file_name, io_u->offset,
- io_u->buflen, zbd_zone_capacity_end(z));
+ log_err("%s: %llu + %llu >= %"PRIu64"\n",
+ f->file_name, io_u->offset, io_u->buflen,
+ zbd_zone_capacity_end(z));
assert(false);
}
z->verify_block += io_u->buflen / min_bs;
{
struct fio_file *f = io_u->file;
struct fio_zone_info *z1, *z2;
- const struct fio_zone_info *const zf = get_zone(f, f->min_zone);
+ const struct fio_zone_info *const zf = zbd_get_zone(f, f->min_zone);
/*
* Skip to the next non-empty zone in case of sequential I/O and to
} else if (!td_random(td)) {
break;
}
+
if (td_random(td) && z2 >= zf &&
z2->cond != ZBD_ZONE_COND_OFFLINE) {
if (z2->has_wp)
zone_unlock(z2);
}
}
- dprint(FD_ZBD, "%s: no zone has %"PRIu64" bytes of readable data\n",
+
+ dprint(FD_ZBD,
+ "%s: no zone has %"PRIu64" bytes of readable data\n",
f->file_name, min_bytes);
+
return NULL;
}
if (io_u->ddir == DDIR_WRITE &&
io_u->offset + io_u->buflen >= zbd_zone_capacity_end(z)) {
pthread_mutex_lock(&f->zbd_info->mutex);
- zbd_close_zone(td, f, zbd_zone_nr(f, z));
+ zbd_close_zone(td, f, z);
pthread_mutex_unlock(&f->zbd_info->mutex);
}
}
const struct fio_file *f = io_u->file;
struct zoned_block_device_info *zbd_info = f->zbd_info;
struct fio_zone_info *z;
- uint32_t zone_idx;
uint64_t zone_end;
assert(zbd_info);
- zone_idx = zbd_zone_idx(f, io_u->offset);
- assert(zone_idx < zbd_info->nr_zones);
- z = get_zone(f, zone_idx);
-
+ z = zbd_offset_to_zone(f, io_u->offset);
assert(z->has_wp);
if (!success)
dprint(FD_ZBD,
"%s: queued I/O (%lld, %llu) for zone %u\n",
- f->file_name, io_u->offset, io_u->buflen, zone_idx);
+ f->file_name, io_u->offset, io_u->buflen, zbd_zone_idx(f, z));
switch (io_u->ddir) {
case DDIR_WRITE:
zone_end = min((uint64_t)(io_u->offset + io_u->buflen),
zbd_zone_capacity_end(z));
- pthread_mutex_lock(&zbd_info->mutex);
+
/*
* z->wp > zone_end means that one or more I/O errors
* have occurred.
*/
+ pthread_mutex_lock(&zbd_info->mutex);
if (z->wp <= zone_end) {
zbd_info->sectors_with_data += zone_end - z->wp;
zbd_info->wp_sectors_with_data += zone_end - z->wp;
const struct fio_file *f = io_u->file;
struct zoned_block_device_info *zbd_info = f->zbd_info;
struct fio_zone_info *z;
- uint32_t zone_idx;
assert(zbd_info);
- zone_idx = zbd_zone_idx(f, io_u->offset);
- assert(zone_idx < zbd_info->nr_zones);
- z = get_zone(f, zone_idx);
-
+ z = zbd_offset_to_zone(f, io_u->offset);
assert(z->has_wp);
dprint(FD_ZBD,
"%s: terminate I/O (%lld, %llu) for zone %u\n",
- f->file_name, io_u->offset, io_u->buflen, zone_idx);
+ f->file_name, io_u->offset, io_u->buflen, zbd_zone_idx(f, z));
zbd_end_zone_io(td, io_u, z);
struct fio_file *f = io_u->file;
enum fio_ddir ddir = io_u->ddir;
struct fio_zone_info *z;
- uint32_t zone_idx;
assert(td->o.zone_mode == ZONE_MODE_ZBD);
assert(td->o.zone_size);
assert(f->zbd_info);
- zone_idx = zbd_zone_idx(f, f->last_pos[ddir]);
- z = get_zone(f, zone_idx);
+ z = zbd_offset_to_zone(f, f->last_pos[ddir]);
/*
* When the zone capacity is smaller than the zone size and the I/O is
* sequential write, skip to zone end if the latest position is at the
* zone capacity limit.
*/
- if (z->capacity < f->zbd_info->zone_size && !td_random(td) &&
- ddir == DDIR_WRITE &&
+ if (z->capacity < f->zbd_info->zone_size &&
+ !td_random(td) && ddir == DDIR_WRITE &&
f->last_pos[ddir] >= zbd_zone_capacity_end(z)) {
dprint(FD_ZBD,
"%s: Jump from zone capacity limit to zone end:"
" (%"PRIu64" -> %"PRIu64") for zone %u (%"PRIu64")\n",
f->file_name, f->last_pos[ddir],
- zbd_zone_end(z), zone_idx, z->capacity);
+ zbd_zone_end(z), zbd_zone_idx(f, z), z->capacity);
td->io_skip_bytes += zbd_zone_end(z) - f->last_pos[ddir];
f->last_pos[ddir] = zbd_zone_end(z);
}
{
struct fio_file *f = io_u->file;
struct zoned_block_device_info *zbdi = f->zbd_info;
- uint32_t zone_idx_b;
struct fio_zone_info *zb, *zl, *orig_zb;
uint32_t orig_len = io_u->buflen;
uint64_t min_bs = td->o.min_bs[io_u->ddir];
assert(min_bs);
assert(is_valid_offset(f, io_u->offset));
assert(io_u->buflen);
- zone_idx_b = zbd_zone_idx(f, io_u->offset);
- zb = get_zone(f, zone_idx_b);
+
+ zb = zbd_offset_to_zone(f, io_u->offset);
orig_zb = zb;
if (!zb->has_wp) {
/* Accept non-write I/Os for conventional zones. */
if (io_u->ddir != DDIR_WRITE)
return io_u_accept;
+
/*
* Make sure that writes to conventional zones
* don't cross over to any sequential zones.
"%s: off=%llu + min_bs=%"PRIu64" > next zone %"PRIu64"\n",
f->file_name, io_u->offset,
min_bs, (zb + 1)->start);
- io_u->offset = zb->start + (zb + 1)->start - io_u->offset;
- new_len = min(io_u->buflen, (zb + 1)->start - io_u->offset);
+ io_u->offset =
+ zb->start + (zb + 1)->start - io_u->offset;
+ new_len = min(io_u->buflen,
+ (zb + 1)->start - io_u->offset);
} else {
new_len = (zb + 1)->start - io_u->offset;
}
+
io_u->buflen = new_len / min_bs * min_bs;
+
return io_u_accept;
}
zb = zbd_replay_write_order(td, io_u, zb);
goto accept;
}
+
/*
* Check that there is enough written data in the zone to do an
* I/O of at least min_bs B. If there isn't, find a new zone for
if (range < min_bs ||
((!td_random(td)) && (io_u->offset + min_bs > zb->wp))) {
zone_unlock(zb);
- zl = get_zone(f, f->max_zone);
+ zl = zbd_get_zone(f, f->max_zone);
zb = zbd_find_zone(td, io_u, min_bs, zb, zl);
if (!zb) {
dprint(FD_ZBD,
if (!td_random(td))
io_u->offset = zb->start;
}
+
/*
* Make sure the I/O is within the zone valid data range while
* maximizing the I/O size and preserving randomness.
io_u->offset = zb->start +
((io_u->offset - orig_zb->start) %
(range - io_u->buflen)) / min_bs * min_bs;
+
/*
* When zbd_find_zone() returns a conventional zone,
* we can simply accept the new i/o offset here.
*/
if (!zb->has_wp)
return io_u_accept;
+
/*
* Make sure the I/O does not cross over the zone wp position.
*/
dprint(FD_IO, "Changed length from %u into %llu\n",
orig_len, io_u->buflen);
}
+
assert(zb->start <= io_u->offset);
assert(io_u->offset + io_u->buflen <= zb->wp);
+
goto accept;
+
case DDIR_WRITE:
if (io_u->buflen > zbdi->zone_size) {
td_verror(td, EINVAL, "I/O buflen exceeds zone size");
f->file_name, io_u->buflen, zbdi->zone_size);
goto eof;
}
- if (!zbd_open_zone(td, f, zone_idx_b)) {
+
+ if (!zbd_open_zone(td, f, zb)) {
zone_unlock(zb);
zb = zbd_convert_to_open_zone(td, io_u);
if (!zb) {
goto eof;
}
}
+
/* Check whether the zone reset threshold has been exceeded */
if (td->o.zrf.u.f) {
- if (zbdi->wp_sectors_with_data >=
- f->io_size * td->o.zrt.u.f &&
- zbd_dec_and_reset_write_cnt(td, f)) {
+ if (zbdi->wp_sectors_with_data >= f->io_size * td->o.zrt.u.f &&
+ zbd_dec_and_reset_write_cnt(td, f))
zb->reset_zone = 1;
- }
}
+
/* Reset the zone pointer if necessary */
if (zb->reset_zone || zbd_zone_full(f, zb, min_bs)) {
assert(td->o.verify == VERIFY_NONE);
goto eof;
}
}
+
/* Make writes occur at the write pointer */
assert(!zbd_zone_full(f, zb, min_bs));
io_u->offset = zb->wp;
f->file_name, io_u->offset);
goto eof;
}
+
/*
* Make sure that the buflen is a multiple of the minimal
* block size. Give up if shrinking would make the request too
orig_len, io_u->buflen);
goto accept;
}
+
td_verror(td, EIO, "zone remainder too small");
log_err("zone remainder %lld smaller than min block size %"PRIu64"\n",
(zbd_zone_capacity_end(zb) - io_u->offset), min_bs);
+
goto eof;
+
case DDIR_TRIM:
/* Check random trim targets a non-empty zone */
if (!td_random(td) || zb->wp > zb->start)
/* Find out a non-empty zone to trim */
zone_unlock(zb);
- zl = get_zone(f, f->max_zone);
+ zl = zbd_get_zone(f, f->max_zone);
zb = zbd_find_zone(td, io_u, 1, zb, zl);
if (zb) {
io_u->offset = zb->start;
f->file_name, io_u->offset);
goto accept;
}
+
goto eof;
+
case DDIR_SYNC:
/* fall-through */
case DDIR_DATASYNC:
assert(zb->cond != ZBD_ZONE_COND_OFFLINE);
assert(!io_u->zbd_queue_io);
assert(!io_u->zbd_put_io);
+
io_u->zbd_queue_io = zbd_queue_io;
io_u->zbd_put_io = zbd_put_io;
+
/*
* Since we return with the zone lock still held,
* add an annotation to let Coverity know that it
* is intentional.
*/
/* coverity[missing_unlock] */
+
return io_u_accept;
eof:
if (zb && zb->has_wp)
zone_unlock(zb);
+
return io_u_eof;
}
{
struct fio_file *f = io_u->file;
struct fio_zone_info *z;
- uint32_t zone_idx;
int ret;
- zone_idx = zbd_zone_idx(f, io_u->offset);
- z = get_zone(f, zone_idx);
-
+ z = zbd_offset_to_zone(f, io_u->offset);
if (!z->has_wp)
return 0;
if (io_u->offset != z->start) {
- log_err("Trim offset not at zone start (%lld)\n", io_u->offset);
+ log_err("Trim offset not at zone start (%lld)\n",
+ io_u->offset);
return -EINVAL;
}