From 7d42e66ec189ed87b247cef20797d84bf6ec4e0e Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Mon, 25 May 2020 20:29:22 +0300 Subject: [PATCH] engines: pvsync2 libaio io_uring: add support for RWF_NOWAIT Add bool option "nowait" into engines which could support this feature. Add test for libaio >= 0.3.111 into configure script to be compatible with prior versions where field iocb.aio_rw_flags was declared as "__pad2". By default if a request cannot be executed immediately (e.g. resource starvation, waiting on locks) it is queued and the initiating process will be blocked until the required resource becomes free. This option sets the RWF_NOWAIT flag (supported from the 4.14 Linux kernel) and the call will return instantly with EAGAIN or a partial result rather than waiting. It is useful to also use ignore_error=EAGAIN when using this option. Note: glibc 2.27, 2.28 have a bug in syscall wrappers preadv2, pwritev2. They return EOPNOTSUP instead of EAGAIN. For cached I/O, using this option usually means a request operates only with cached data. Currently the RWF_NOWAIT flag does not supported for cached write. For direct I/O, requests will only succeed if cache invalidation isn't required, file blocks are fully allocated and the disk request could be issued immediately. Signed-off-by: Konstantin Khlebnikov Link: https://lwn.net/Articles/724631/ --- HOWTO | 20 ++++++++++++++++++++ configure | 20 ++++++++++++++++++++ engines/io_uring.c | 12 ++++++++++++ engines/libaio.c | 22 +++++++++++++++++++++- engines/sync.c | 12 ++++++++++++ fio.1 | 16 ++++++++++++++++ os/os-linux.h | 3 +++ 7 files changed, 104 insertions(+), 1 deletion(-) diff --git a/HOWTO b/HOWTO index 9e71a619..9e339bb8 100644 --- a/HOWTO +++ b/HOWTO @@ -2097,6 +2097,26 @@ with the caveat that when used on the command line, they must come after the When hipri is set this determines the probability of a pvsync2 I/O being high priority. The default is 100%. +.. option:: nowait : [pvsync2] [libaio] [io_uring] + + By default if a request cannot be executed immediately (e.g. resource starvation, + waiting on locks) it is queued and the initiating process will be blocked until + the required resource becomes free. + + This option sets the RWF_NOWAIT flag (supported from the 4.14 Linux kernel) and + the call will return instantly with EAGAIN or a partial result rather than waiting. + + It is useful to also use ignore_error=EAGAIN when using this option. + + Note: glibc 2.27, 2.28 have a bug in syscall wrappers preadv2, pwritev2. + They return EOPNOTSUP instead of EAGAIN. + + For cached I/O, using this option usually means a request operates only with + cached data. Currently the RWF_NOWAIT flag does not supported for cached write. + + For direct I/O, requests will only succeed if cache invalidation isn't required, + file blocks are fully allocated and the disk request could be issued immediately. + .. option:: cpuload=int : [cpuio] Attempt to use the specified percentage of CPU cycles. This is a mandatory diff --git a/configure b/configure index cf8b88e4..3ee8aaf2 100755 --- a/configure +++ b/configure @@ -617,8 +617,25 @@ EOF libaio=no libaio_uring=no fi + + cat > $TMPC < +#include +int main(void) +{ + io_prep_preadv2(NULL, 0, NULL, 0, 0, 0); + io_prep_pwritev2(NULL, 0, NULL, 0, 0, 0); + return 0; +} +EOF + if compile_prog "" "" "libaio rw flags" ; then + libaio_rw_flags=yes + else + libaio_rw_flags=no + fi fi print_config "Linux AIO support" "$libaio" +print_config "Linux AIO support rw flags" "$libaio_rw_flags" print_config "Linux AIO over io_uring" "$libaio_uring" ########################################## @@ -2646,6 +2663,9 @@ if test "$zlib" = "yes" ; then fi if test "$libaio" = "yes" ; then output_sym "CONFIG_LIBAIO" + if test "$libaio_rw_flags" = "yes" ; then + output_sym "CONFIG_LIBAIO_RW_FLAGS" + fi if test "$libaio_uring" = "yes" ; then output_sym "CONFIG_LIBAIO_URING" fi diff --git a/engines/io_uring.c b/engines/io_uring.c index ac57af8f..cab7ecaf 100644 --- a/engines/io_uring.c +++ b/engines/io_uring.c @@ -80,6 +80,7 @@ struct ioring_options { unsigned int sqpoll_cpu; unsigned int nonvectored; unsigned int uncached; + unsigned int nowait; }; static const int ddir_to_op[2][2] = { @@ -185,6 +186,15 @@ static struct fio_option options[] = { .category = FIO_OPT_C_ENGINE, .group = FIO_OPT_G_IOURING, }, + { + .name = "nowait", + .lname = "RWF_NOWAIT", + .type = FIO_OPT_BOOL, + .off1 = offsetof(struct ioring_options, nowait), + .help = "Use RWF_NOWAIT for reads/writes", + .category = FIO_OPT_C_ENGINE, + .group = FIO_OPT_G_IOURING, + }, { .name = NULL, }, @@ -235,6 +245,8 @@ static int fio_ioring_prep(struct thread_data *td, struct io_u *io_u) } if (!td->o.odirect && o->uncached) sqe->rw_flags = RWF_UNCACHED; + if (o->nowait) + sqe->rw_flags |= RWF_NOWAIT; if (ld->ioprio_class_set) sqe->ioprio = td->o.ioprio_class << 13; if (ld->ioprio_set) diff --git a/engines/libaio.c b/engines/libaio.c index 299798ae..daa576da 100644 --- a/engines/libaio.c +++ b/engines/libaio.c @@ -21,6 +21,11 @@ #define IOCB_FLAG_IOPRIO (1 << 1) #endif +/* Hack for libaio < 0.3.111 */ +#ifndef CONFIG_LIBAIO_RW_FLAGS +#define aio_rw_flags __pad2 +#endif + static int fio_libaio_commit(struct thread_data *td); static int fio_libaio_init(struct thread_data *td); @@ -51,6 +56,7 @@ struct libaio_options { void *pad; unsigned int userspace_reap; unsigned int cmdprio_percentage; + unsigned int nowait; }; static struct fio_option options[] = { @@ -83,6 +89,15 @@ static struct fio_option options[] = { .help = "Your platform does not support I/O priority classes", }, #endif + { + .name = "nowait", + .lname = "RWF_NOWAIT", + .type = FIO_OPT_BOOL, + .off1 = offsetof(struct libaio_options, nowait), + .help = "Set RWF_NOWAIT for reads/writes", + .category = FIO_OPT_C_ENGINE, + .group = FIO_OPT_G_LIBAIO, + }, { .name = NULL, }, @@ -97,15 +112,20 @@ static inline void ring_inc(struct libaio_data *ld, unsigned int *val, *val = (*val + add) % ld->entries; } -static int fio_libaio_prep(struct thread_data fio_unused *td, struct io_u *io_u) +static int fio_libaio_prep(struct thread_data *td, struct io_u *io_u) { + struct libaio_options *o = td->eo; struct fio_file *f = io_u->file; struct iocb *iocb = &io_u->iocb; if (io_u->ddir == DDIR_READ) { io_prep_pread(iocb, f->fd, io_u->xfer_buf, io_u->xfer_buflen, io_u->offset); + if (o->nowait) + iocb->aio_rw_flags |= RWF_NOWAIT; } else if (io_u->ddir == DDIR_WRITE) { io_prep_pwrite(iocb, f->fd, io_u->xfer_buf, io_u->xfer_buflen, io_u->offset); + if (o->nowait) + iocb->aio_rw_flags |= RWF_NOWAIT; } else if (ddir_sync(io_u->ddir)) io_prep_fsync(iocb, f->fd); diff --git a/engines/sync.c b/engines/sync.c index 65fd210c..339ba999 100644 --- a/engines/sync.c +++ b/engines/sync.c @@ -40,6 +40,7 @@ struct psyncv2_options { unsigned int hipri; unsigned int hipri_percentage; unsigned int uncached; + unsigned int nowait; }; static struct fio_option options[] = { @@ -73,6 +74,15 @@ static struct fio_option options[] = { .category = FIO_OPT_C_ENGINE, .group = FIO_OPT_G_INVALID, }, + { + .name = "nowait", + .lname = "RWF_NOWAIT", + .type = FIO_OPT_BOOL, + .off1 = offsetof(struct psyncv2_options, nowait), + .help = "Set RWF_NOWAIT for pwritev2/preadv2", + .category = FIO_OPT_C_ENGINE, + .group = FIO_OPT_G_INVALID, + }, { .name = NULL, }, @@ -164,6 +174,8 @@ static enum fio_q_status fio_pvsyncio2_queue(struct thread_data *td, flags |= RWF_HIPRI; if (!td->o.odirect && o->uncached) flags |= RWF_UNCACHED; + if (o->nowait) + flags |= RWF_NOWAIT; iov->iov_base = io_u->xfer_buf; iov->iov_len = io_u->xfer_buflen; diff --git a/fio.1 b/fio.1 index 47bc1592..f469c46e 100644 --- a/fio.1 +++ b/fio.1 @@ -1857,6 +1857,22 @@ than normal. When hipri is set this determines the probability of a pvsync2 I/O being high priority. The default is 100%. .TP +.BI (pvsync2,libaio,io_uring)nowait +By default if a request cannot be executed immediately (e.g. resource starvation, +waiting on locks) it is queued and the initiating process will be blocked until +the required resource becomes free. +This option sets the RWF_NOWAIT flag (supported from the 4.14 Linux kernel) and +the call will return instantly with EAGAIN or a partial result rather than waiting. + +It is useful to also use \fBignore_error\fR=EAGAIN when using this option. +Note: glibc 2.27, 2.28 have a bug in syscall wrappers preadv2, pwritev2. +They return EOPNOTSUP instead of EAGAIN. + +For cached I/O, using this option usually means a request operates only with +cached data. Currently the RWF_NOWAIT flag does not supported for cached write. +For direct I/O, requests will only succeed if cache invalidation isn't required, +file blocks are fully allocated and the disk request could be issued immediately. +.TP .BI (cpuio)cpuload \fR=\fPint Attempt to use the specified percentage of CPU cycles. This is a mandatory option when using cpuio I/O engine. diff --git a/os/os-linux.h b/os/os-linux.h index 0f0bcc3a..6ec7243d 100644 --- a/os/os-linux.h +++ b/os/os-linux.h @@ -325,6 +325,9 @@ static inline int fio_set_sched_idle(void) #ifndef RWF_SYNC #define RWF_SYNC 0x00000004 #endif +#ifndef RWF_NOWAIT +#define RWF_NOWAIT 0x00000008 +#endif #ifndef RWF_UNCACHED #define RWF_UNCACHED 0x00000040 -- 2.25.1