summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.github/workflows/build.yml52
-rw-r--r--.github/workflows/shellcheck.yml20
-rw-r--r--.gitignore14
-rw-r--r--.travis.yml22
-rwxr-xr-xconfigure60
-rwxr-xr-xdebian/rules2
-rw-r--r--examples/Makefile8
-rw-r--r--examples/io_uring-cp.c5
-rw-r--r--examples/link-cp.c4
-rw-r--r--liburing.spec2
-rw-r--r--man/io_uring.740
-rw-r--r--man/io_uring_enter.2319
-rw-r--r--man/io_uring_register.2216
-rw-r--r--man/io_uring_setup.267
-rw-r--r--src/Makefile9
-rw-r--r--src/include/liburing.h279
-rw-r--r--src/include/liburing/io_uring.h87
-rw-r--r--src/liburing.map7
-rw-r--r--src/queue.c14
-rw-r--r--src/register.c110
-rw-r--r--src/syscall.c16
-rw-r--r--src/syscall.h14
-rw-r--r--test/35fa71a030ca-test.c3
-rw-r--r--test/Makefile88
-rw-r--r--test/accept.c77
-rw-r--r--test/connect.c55
-rw-r--r--test/cq-overflow.c13
-rw-r--r--test/d4ae271dfaae-test.c9
-rw-r--r--test/defer.c39
-rw-r--r--test/double-poll-crash.c4
-rw-r--r--test/eeed8b54e0df-test.c11
-rw-r--r--test/empty-eownerdead.c45
-rw-r--r--test/exec-target.c4
-rw-r--r--test/fadvise.c4
-rw-r--r--test/fallocate.c9
-rw-r--r--test/fc2a85cb02ef-test.c7
-rw-r--r--test/file-register.c22
-rw-r--r--test/file-update.c1
-rw-r--r--test/file-verify.c628
-rw-r--r--test/fsync.c17
-rw-r--r--test/hardlink.c136
-rw-r--r--test/helpers.c19
-rw-r--r--test/helpers.h6
-rw-r--r--test/io-cancel.c30
-rw-r--r--test/io_uring_enter.c5
-rw-r--r--test/io_uring_register.c146
-rw-r--r--test/io_uring_setup.c10
-rw-r--r--test/iopoll.c66
-rw-r--r--test/link-timeout.c25
-rw-r--r--test/link.c53
-rw-r--r--test/link_drain.c3
-rw-r--r--test/madvise.c3
-rw-r--r--test/mkdir.c108
-rw-r--r--test/multicqes_drain.c2
-rw-r--r--test/openat2.c225
-rw-r--r--test/poll-cancel-ton.c5
-rw-r--r--test/poll-mshot-update.c74
-rw-r--r--test/read-write.c55
-rw-r--r--test/ring-leak2.c1
-rw-r--r--test/rsrc_tags.c449
-rwxr-xr-xtest/runtests-loop.sh4
-rwxr-xr-xtest/runtests.sh81
-rw-r--r--test/send_recv.c3
-rw-r--r--test/send_recvmsg.c1
-rw-r--r--test/sendmsg_fs_cve.c8
-rw-r--r--test/socket-rw-eagain.c6
-rw-r--r--test/sq-poll-dup.c7
-rw-r--r--test/sq-poll-kthread.c14
-rw-r--r--test/sq-poll-share.c6
-rw-r--r--test/sqpoll-cancel-hang.c156
-rw-r--r--test/submit-link-fail.c154
-rw-r--r--test/submit-reuse.c25
-rw-r--r--test/symlink.c116
-rw-r--r--test/thread-exit.c10
-rw-r--r--test/timeout-new.c4
-rw-r--r--test/timeout.c119
76 files changed, 3908 insertions, 630 deletions
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
new file mode 100644
index 0000000..a8c0503
--- /dev/null
+++ b/.github/workflows/build.yml
@@ -0,0 +1,52 @@
+name: Build test
+
+on:
+ # Trigger the workflow on push or pull requests.
+ push:
+ pull_request:
+
+jobs:
+ build:
+ runs-on: ubuntu-latest
+
+ strategy:
+ fail-fast: false
+ matrix:
+ include:
+ - cc: gcc
+ cxx: g++
+ - cc: clang
+ cxx: clang++
+
+ env:
+ FLAGS: -g -O2 -Wall -Wextra -Werror
+
+ steps:
+ - name: Checkout source
+ uses: actions/checkout@v2
+
+ - name: Display compiler versions
+ run: |
+ ${{matrix.cc}} --version;
+ ${{matrix.cxx}} --version;
+
+ - name: Build
+ run: |
+ ./configure --cc=${{matrix.cc}} --cxx=${{matrix.cxx}};
+ make V=1 -j$(nproc) \
+ CPPFLAGS="-Werror" \
+ CFLAGS="$FLAGS" \
+ CXXFLAGS="$FLAGS";
+
+ - name: Build (32 bit)
+ run: |
+ sudo apt-get install libc6-dev-i386 gcc-multilib g++-multilib -y;
+ make clean;
+ make V=1 -j$(nproc) \
+ CPPFLAGS="-Werror" \
+ CFLAGS="$FLAGS -m32" \
+ CXXFLAGS="$FLAGS -m32";
+
+ - name: Test install command
+ run: |
+ sudo make install;
diff --git a/.github/workflows/shellcheck.yml b/.github/workflows/shellcheck.yml
new file mode 100644
index 0000000..8873f0b
--- /dev/null
+++ b/.github/workflows/shellcheck.yml
@@ -0,0 +1,20 @@
+name: Shellcheck
+
+on:
+ # Trigger the workflow on push or pull requests.
+ push:
+ pull_request:
+
+jobs:
+ test:
+ runs-on: ubuntu-latest
+
+ steps:
+ - name: Checkout source
+ uses: actions/checkout@v2
+
+ - name: Display shellcheck version
+ run: shellcheck --version
+
+ - name: Shellcheck execution
+ run: shellcheck test/runtest*.sh
diff --git a/.gitignore b/.gitignore
index 17ec415..fb3a859 100644
--- a/.gitignore
+++ b/.gitignore
@@ -45,6 +45,7 @@
/test/defer
/test/double-poll-crash
/test/eeed8b54e0df-test
+/test/empty-eownerdead
/test/eventfd
/test/eventfd-disable
/test/eventfd-ring
@@ -53,10 +54,12 @@
/test/fc2a85cb02ef-test
/test/file-register
/test/file-update
+/test/file-verify
/test/files-exit-hang-poll
/test/files-exit-hang-timeout
/test/fixed-link
/test/fsync
+/test/hardlink
/test/io-cancel
/test/io_uring_enter
/test/io_uring_register
@@ -68,6 +71,7 @@
/test/link-timeout
/test/link_drain
/test/madvise
+/test/mkdir
/test/nop
/test/nop-all-sizes
/test/open-close
@@ -111,6 +115,7 @@
/test/statx
/test/stdout
/test/submit-reuse
+/test/symlink
/test/teardowns
/test/thread-exit
/test/timeout
@@ -118,7 +123,16 @@
/test/timeout-overflow
/test/unlink
/test/wakeup-hang
+/test/multicqes_drain
+/test/poll-mshot-update
+/test/rsrc_tags
+/test/rw_merge_test
+/test/sqpoll-cancel-hang
+/test/testfile
+/test/submit-link-fail
+/test/exec-target
/test/*.dmesg
+/test/output/
config-host.h
config-host.mak
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index e02fdd0..0000000
--- a/.travis.yml
+++ /dev/null
@@ -1,22 +0,0 @@
-language: cpp
-os:
- - linux
-compiler:
- - clang
- - gcc
-env:
- matrix:
- - BUILD_ARCH="x86"
- - BUILD_ARCH="x86_64"
- global:
- - MAKEFLAGS="-j 2"
-matrix:
- exclude:
- - os: linux
- compiler: clang
- env: BUILD_ARCH="x86" # Only do the gcc x86 build to reduce clutter
-before_install:
- - EXTRA_CFLAGS="-Werror"
-script:
- - ./configure && make
- - sudo make runtests || true
diff --git a/configure b/configure
index 3b96cde..a7caa07 100755
--- a/configure
+++ b/configure
@@ -69,6 +69,8 @@ Options: [defaults in brackets after descriptions]
--libdevdir=PATH install development libraries in PATH [$libdevdir]
--mandir=PATH install man pages in PATH [$mandir]
--datadir=PATH install shared data in PATH [$datadir]
+ --cc=CMD use CMD as the C compiler
+ --cxx=CMD use CMD as the C++ compiler
EOF
exit 0
fi
@@ -192,6 +194,37 @@ print_and_output_mak "mandir" "$mandir"
print_and_output_mak "datadir" "$datadir"
##########################################
+# check for compiler -Wstringop-overflow
+stringop_overflow="no"
+cat > $TMPC << EOF
+#include <linux/fs.h>
+int main(int argc, char **argv)
+{
+ return 0;
+}
+EOF
+if compile_prog "-Werror -Wstringop-overflow=0" "" "stringop_overflow"; then
+ stringop_overflow="yes"
+fi
+print_config "stringop_overflow" "$stringop_overflow"
+
+##########################################
+# check for compiler -Warryr-bounds
+array_bounds="no"
+cat > $TMPC << EOF
+#include <linux/fs.h>
+int main(int argc, char **argv)
+{
+ return 0;
+}
+EOF
+if compile_prog "-Werror -Warray-bounds=0" "" "array_bounds"; then
+ array_bounds="yes"
+fi
+print_config "array_bounds" "$array_bounds"
+
+
+##########################################
# check for __kernel_rwf_t
__kernel_rwf_t="no"
cat > $TMPC << EOF
@@ -296,6 +329,7 @@ int main(int argc, char **argv)
{
ucontext_t ctx;
getcontext(&ctx);
+ makecontext(&ctx, 0, 0);
return 0;
}
EOF
@@ -304,6 +338,23 @@ if compile_prog "" "" "has_ucontext"; then
fi
print_config "has_ucontext" "$has_ucontext"
+##########################################
+# check for memfd_create(2)
+has_memfd_create="no"
+cat > $TMPC << EOF
+#define _GNU_SOURCE
+#include <sys/mman.h>
+int main(int argc, char **argv)
+{
+ int memfd = memfd_create("test", 0);
+ return 0;
+}
+EOF
+if compile_prog "-Werror=implicit-function-declaration" "" "has_memfd_create"; then
+ has_memfd_create="yes"
+fi
+print_config "has_memfd_create" "$has_memfd_create"
+
#############################################################################
@@ -325,6 +376,15 @@ fi
if test "$has_ucontext" = "yes"; then
output_sym "CONFIG_HAVE_UCONTEXT"
fi
+if test "$stringop_overflow" = "yes"; then
+ output_sym "CONFIG_HAVE_STRINGOP_OVERFLOW"
+fi
+if test "$array_bounds" = "yes"; then
+ output_sym "CONFIG_HAVE_ARRAY_BOUNDS"
+fi
+if test "$has_memfd_create" = "yes"; then
+ output_sym "CONFIG_HAVE_MEMFD_CREATE"
+fi
echo "CC=$cc" >> $config_host_mak
print_config "CC" "$cc"
diff --git a/debian/rules b/debian/rules
index 385755c..1a334b3 100755
--- a/debian/rules
+++ b/debian/rules
@@ -51,7 +51,7 @@ install-arch: check-arch
DESTDIR=$(CURDIR)/debian/tmp \
libdir=/lib/$(DEB_HOST_MULTIARCH) \
libdevdir=/usr/lib/$(DEB_HOST_MULTIARCH) \
- relativelibdir=/lib/$(DEB_HOST_MULTIARCH)
+ relativelibdir=/lib/$(DEB_HOST_MULTIARCH)/
binary: binary-indep binary-arch
diff --git a/examples/Makefile b/examples/Makefile
index 60c1b71..d3c5000 100644
--- a/examples/Makefile
+++ b/examples/Makefile
@@ -1,8 +1,8 @@
CPPFLAGS ?=
override CPPFLAGS += -D_GNU_SOURCE -I../src/include/
-CFLAGS ?= -g -O2
-XCFLAGS =
-override CFLAGS += -Wall -L../src/
+CFLAGS ?= -g -O2 -Wall
+LDFLAGS ?=
+override LDFLAGS += -L../src/ -luring
include ../Makefile.quiet
@@ -23,7 +23,7 @@ test_srcs := io_uring-test.c io_uring-cp.c link-cp.c
test_objs := $(patsubst %.c,%.ol,$(test_srcs))
%: %.c
- $(QUIET_CC)$(CC) $(CPPFLAGS) $(CFLAGS) -o $@ $< -luring $(XCFLAGS)
+ $(QUIET_CC)$(CC) $(CPPFLAGS) $(CFLAGS) -o $@ $< $(LDFLAGS)
clean:
@rm -f $(all_targets) $(test_objs)
diff --git a/examples/io_uring-cp.c b/examples/io_uring-cp.c
index 2a44c30..9322575 100644
--- a/examples/io_uring-cp.c
+++ b/examples/io_uring-cp.c
@@ -127,7 +127,8 @@ static int copy_file(struct io_uring *ring, off_t insize)
writes = reads = offset = 0;
while (insize || write_left) {
- int had_reads, got_comp;
+ unsigned long had_reads;
+ int got_comp;
/*
* Queue up as many reads as we can
@@ -194,7 +195,7 @@ static int copy_file(struct io_uring *ring, off_t insize)
fprintf(stderr, "cqe failed: %s\n",
strerror(-cqe->res));
return 1;
- } else if (cqe->res != data->iov.iov_len) {
+ } else if ((size_t)cqe->res != data->iov.iov_len) {
/* Short read/write, adjust and requeue */
data->iov.iov_base += cqe->res;
data->iov.iov_len -= cqe->res;
diff --git a/examples/link-cp.c b/examples/link-cp.c
index e15dfc3..244d21a 100644
--- a/examples/link-cp.c
+++ b/examples/link-cp.c
@@ -26,7 +26,7 @@ struct io_data {
};
static int infd, outfd;
-static unsigned inflight;
+static int inflight;
static int setup_context(unsigned entries, struct io_uring *ring)
{
@@ -115,7 +115,7 @@ static int handle_cqe(struct io_uring *ring, struct io_uring_cqe *cqe)
static int copy_file(struct io_uring *ring, off_t insize)
{
struct io_uring_cqe *cqe;
- size_t this_size;
+ off_t this_size;
off_t offset;
offset = 0;
diff --git a/liburing.spec b/liburing.spec
index 0268d23..7eb5731 100644
--- a/liburing.spec
+++ b/liburing.spec
@@ -1,5 +1,5 @@
Name: liburing
-Version: 2.0
+Version: 2.1
Release: 1%{?dist}
Summary: Linux-native io_uring I/O access library
License: (GPLv2 with exceptions and LGPLv2+) or MIT
diff --git a/man/io_uring.7 b/man/io_uring.7
index a63b3e9..dc76f42 100644
--- a/man/io_uring.7
+++ b/man/io_uring.7
@@ -84,17 +84,35 @@ a read operation under
.BR io_uring ,
started with the
.BR IORING_OP_READ
-operation,
-which issues the equivalent of the
+operation, issues the equivalent of the
.BR read (2)
-system call,
-would return as part of
+system call. In practice, it mixes the semantics of
+.BR pread (2)
+and
+.BR preadv2 (2)
+in that it takes an explicit offset, and supports using -1 for the offset to
+indicate that the current file position should be used instead of passing in
+an explicit offset. See the opcode documentation for more details. Given that
+io_uring is an async interface,
+.I errno
+is never used for passing back error information. Instead,
.I res
-what
-.BR read (2)
-would have returned if called directly,
-without using
-.BR io_uring .
+will contain what the equivalent system call would have returned in case
+of success, and in case of error
+.I res
+will contain
+.I -errno .
+For example, if the normal read system call would have returned -1 and set
+.I errno
+to
+.B EINVAL ,
+then
+.I res
+would contain
+.B -EINVAL .
+If the normal system call would have returned a read size of 1024, then
+.I res
+would contain 1024.
.IP \(bu
Optionally,
.BR io_uring_enter (2)
@@ -425,7 +443,7 @@ successful read and update of the head.
Because of the shared ring buffers between kernel and user space,
.B io_uring
can be a zero-copy system.
-Copying buffers to and fro becomes necessary when system calls that
+Copying buffers to and from becomes necessary when system calls that
transfer data between kernel and user space are involved.
But since the bulk of the communication in
.B io_uring
@@ -435,7 +453,7 @@ this huge performance overhead is completely avoided.
While system calls may not seem like a significant overhead,
in high performance applications,
making a lot of them will begin to matter.
-While workarounds the operating system has in place to deal with Specter
+While workarounds the operating system has in place to deal with Spectre
and Meltdown are ideally best done away with,
unfortunately,
some of these workarounds are around the system call interface,
diff --git a/man/io_uring_enter.2 b/man/io_uring_enter.2
index f898ffd..6b31061 100644
--- a/man/io_uring_enter.2
+++ b/man/io_uring_enter.2
@@ -55,6 +55,45 @@ application can no longer get a free SQE entry to submit, without knowing
when it one becomes available as the SQ kernel thread consumes them. If
the system call is used with this flag set, then it will wait until at least
one entry is free in the SQ ring.
+.TP
+.B IORING_ENTER_EXT_ARG
+Since kernel 5.11, the system calls arguments have been modified to look like
+the following:
+
+.nf
+.BI "int io_uring_enter(unsigned int " fd ", unsigned int " to_submit ,
+.BI " unsigned int " min_complete ", unsigned int " flags ,
+.BI " const void *" arg ", size_t " argsz );
+.fi
+
+which is behaves just like the original definition by default. However, if
+.B IORING_ENTER_EXT_ARG
+is set, then instead of a
+.I sigset_t
+being passed in, a pointer to a
+.I struct io_uring_getevents_arg
+is used instead and
+.I argsz
+must be set to the size of this structure. The definition is as follows:
+
+.nf
+.BI "struct io_uring_getevents_args {
+.BI " __u64 sigmask;
+.BI " __u32 sigmask_sz;
+.BI " __u32 pad;
+.BI " __u64 ts;
+.BI "};
+.fi
+
+which allows passing in both a signal mask as well as pointer to a
+.I struct __kernel_timespec
+timeout value. If
+.I ts
+is set to a valid pointer, then this time value indicates the timeout for
+waiting on events. If an application is waiting on events and wishes to
+stop waiting after a specified amount of time, then this can be accomplished
+directly in version 5.11 and newer by using this feature.
+
.PP
.PP
If the io_uring instance was configured for polling, by specifying
@@ -159,22 +198,28 @@ struct io_uring_sqe {
__u32 statx_flags;
__u32 fadvise_advice;
__u32 splice_flags;
+ __u32 rename_flags;
+ __u32 unlink_flags;
+ __u32 hardlink_flags;
};
__u64 user_data; /* data to be passed back at completion time */
union {
- struct {
- /* index into fixed buffers, if used */
+ struct {
+ /* index into fixed buffers, if used */
union {
/* index into fixed buffers, if used */
__u16 buf_index;
/* for grouped buffer selection */
__u16 buf_group;
}
- /* personality to use, if used */
- __u16 personality;
+ /* personality to use, if used */
+ __u16 personality;
+ union {
__s32 splice_fd_in;
+ __u32 file_index;
};
- __u64 __pad2[3];
+ };
+ __u64 __pad2[3];
};
};
.EE
@@ -195,6 +240,9 @@ Vectored read and write operations, similar to
.BR preadv2 (2)
and
.BR pwritev2 (2).
+If the file is not seekable,
+.I off
+must be set to zero.
.TP
.B IORING_OP_READ_FIXED
@@ -225,11 +273,55 @@ specified in the
.I poll_events
field. Unlike poll or epoll without
.BR EPOLLONESHOT ,
-this interface always works in one shot mode. That is, once the poll
-operation is completed, it will have to be resubmitted. This command works like
+by default this interface always works in one shot mode. That is, once the poll
+operation is completed, it will have to be resubmitted.
+
+If
+.B IORING_POLL_ADD_MULTI
+is set in the SQE
+.I len
+field, then the poll will work in multi shot mode instead. That means it'll
+repatedly trigger when the requested event becomes true, and hence multiple
+CQEs can be generated from this single SQE. The CQE
+.I flags
+field will have
+.B IORING_CQE_F_MORE
+set on completion if the application should expect further CQE entries from
+the original request. If this flag isn't set on completion, then the poll
+request has been terminated and no further events will be generated. This mode
+is available since 5.13.
+
+If
+.B IORING_POLL_UPDATE_EVENTS
+is set in the SQE
+.I len
+field, then the request will update an existing poll request with the mask of
+events passed in with this request. The lookup is based on the
+.I user_data
+field of the original SQE submitted, and this values is passed in the
+.I addr
+field of the SQE. This mode is available since 5.13.
+
+If
+.B IORING_POLL_UPDATE_USER_DATA
+is set in the SQE
+.I len
+field, then the request will update the
+.I user_data
+of an existing poll request based on the value passed in the
+.I off
+field. This mode is available since 5.13.
+
+This command works like
an async
.BR poll(2)
-and the completion event result is the returned mask of events.
+and the completion event result is the returned mask of events. For the
+variants that update
+.I user_data
+or
+.I events
+, the completion result will be similar to
+.B IORING_OP_POLL_REMOVE.
.TP
.B IORING_OP_POLL_REMOVE
@@ -240,7 +332,10 @@ field of the
will contain 0. If not found,
.I res
will contain
-.B -ENOENT.
+.B -ENOENT,
+or
+.B -EALREADY
+if the poll request was in the process of completing already.
.TP
.B IORING_OP_EPOLL_CTL
@@ -343,6 +438,28 @@ the timeout was cancelled before it expired, the request will complete with
.I -ECANCELED.
Available since 5.4.
+Since 5.15, this command also supports the following modifiers in
+.I timeout_flags:
+
+.PP
+.in +12
+.B IORING_TIMEOUT_BOOTTIME
+If set, then the clocksource used is
+.I CLOCK_BOOTTIME
+instead of
+.I CLOCK_MONOTONIC.
+This clocksource differs in that it includes time elapsed if the system was
+suspend while having a timeout request in-flight.
+
+.B IORING_TIMEOUT_REALTIME
+If set, then the clocksource used is
+.I CLOCK_BOOTTIME
+instead of
+.I CLOCK_MONOTONIC.
+.EE
+.in
+.PP
+
.TP
.B IORING_OP_TIMEOUT_REMOVE
If
@@ -367,13 +484,14 @@ If
.I timeout_flags
contain
.I IORING_TIMEOUT_UPDATE,
-instead of removing an existing operation it updates it.
+instead of removing an existing operation, it updates it.
.I addr
and return values are same as before.
.I addr2
field must contain a pointer to a struct timespec64 structure.
.I timeout_flags
-may also contain IORING_TIMEOUT_ABS.
+may also contain IORING_TIMEOUT_ABS, in which case the value given is an
+absolute one, not a relative one.
Available since 5.11.
.TP
@@ -386,10 +504,31 @@ must be set to the socket file descriptor,
.I addr
must contain the pointer to the sockaddr structure, and
.I addr2
-must contain a pointer to the socklen_t addrlen field. See also
+must contain a pointer to the socklen_t addrlen field. Flags can be passed using
+the
+.I accept_flags
+field. See also
.BR accept4(2)
for the general description of the related system call. Available since 5.5.
+If the
+.I file_index
+field is set to a positive number, the file won't be installed into the
+normal file table as usual but will be placed into the fixed file table at index
+.I file_index - 1.
+In this case, instead of returning a file descriptor, the result will contain
+either 0 on success or an error. If the index points to a valid empty slot, the
+installation is guaranteed to not fail. If there is already a file in the slot,
+it will be replaced, similar to
+.B IORING_OP_FILES_UPDATE.
+Please note that only io_uring has access to such files and no other syscall
+can use them. See
+.B IOSQE_FIXED_FILE
+and
+.B IORING_REGISTER_FILES.
+
+Available since 5.15.
+
.TP
.B IORING_OP_ASYNC_CANCEL
Attempt to cancel an already issued request.
@@ -513,6 +652,24 @@ is access mode of the file. See also
.BR openat(2)
for the general description of the related system call. Available since 5.6.
+If the
+.I file_index
+field is set to a positive number, the file won't be installed into the
+normal file table as usual but will be placed into the fixed file table at index
+.I file_index - 1.
+In this case, instead of returning a file descriptor, the result will contain
+either 0 on success or an error. If the index points to a valid empty slot, the
+installation is guaranteed to not fail. If there is already a file in the slot,
+it will be replaced, similar to
+.B IORING_OP_FILES_UPDATE.
+Please note that only io_uring has access to such files and no other syscall
+can use them. See
+.B IOSQE_FIXED_FILE
+and
+.B IORING_REGISTER_FILES.
+
+Available since 5.15.
+
.TP
.B IORING_OP_OPENAT2
Issue the equivalent of a
@@ -533,6 +690,24 @@ should be set to the address of the open_how structure. See also
.BR openat2(2)
for the general description of the related system call. Available since 5.6.
+If the
+.I file_index
+field is set to a positive number, the file won't be installed into the
+normal file table as usual but will be placed into the fixed file table at index
+.I file_index - 1.
+In this case, instead of returning a file descriptor, the result will contain
+either 0 on success or an error. If the index points to a valid empty slot, the
+installation is guaranteed to not fail. If there is already a file in the slot,
+it will be replaced, similar to
+.B IORING_OP_FILES_UPDATE.
+Please note that only io_uring has access to such files and no other syscall
+can use them. See
+.B IOSQE_FIXED_FILE
+and
+.B IORING_REGISTER_FILES.
+
+Available since 5.15.
+
.TP
.B IORING_OP_CLOSE
Issue the equivalent of a
@@ -576,16 +751,28 @@ for the general description of the related system call. Available since 5.6.
.TP
.B IORING_OP_WRITE
Issue the equivalent of a
-.BR read(2)
+.BR pread(2)
or
-.BR write(2)
+.BR pwrite(2)
system call.
.I fd
is the file descriptor to be operated on,
.I addr
-contains the buffer in question, and
+contains the buffer in question,
.I len
-contains the length of the IO operation. These are non-vectored versions of the
+contains the length of the IO operation, and
+.I offs
+contains the read or write offset. If
+.I fd
+does not refer to a seekable file,
+.I off
+must be set to zero. If
+.I offs
+is set to -1, the offset will use (and advance) the file position, like the
+.BR read(2)
+and
+.BR write(2)
+system calls. These are non-vectored versions of the
.B IORING_OP_READV
and
.B IORING_OP_WRITEV
@@ -759,6 +946,69 @@ being passed in to
.BR unlinkat(2).
Available since 5.11.
+.TP
+.B IORING_OP_MKDIRAT
+Issue the equivalent of a
+.BR mkdirat2(2)
+system call.
+.I fd
+should be set to the
+.I dirfd,
+.I addr
+should be set to the
+.I pathname,
+and
+.I len
+should be set to the
+.I mode
+being passed in to
+.BR mkdirat(2).
+Available since 5.15.
+
+.TP
+.B IORING_OP_SYMLINKAT
+Issue the equivalent of a
+.BR symlinkat2(2)
+system call.
+.I fd
+should be set to the
+.I newdirfd,
+.I addr
+should be set to the
+.I target
+and
+.I addr2
+should be set to the
+.I linkpath
+being passed in to
+.BR symlinkat(2).
+Available since 5.15.
+
+.TP
+.B IORING_OP_LINKAT
+Issue the equivalent of a
+.BR linkat2(2)
+system call.
+.I fd
+should be set to the
+.I olddirfd,
+.I addr
+should be set to the
+.I oldpath,
+.I len
+should be set to the
+.I newdirfd,
+.I addr2
+should be set to the
+.I newpath,
+and
+.I hardlink_flags
+should be set to the
+.I flags
+being passed in to
+.BR linkat(2).
+Available since 5.15.
+
.PP
The
.I flags
@@ -771,7 +1021,10 @@ is an index into the files array registered with the io_uring instance (see the
.B IORING_REGISTER_FILES
section of the
.BR io_uring_register (2)
-man page). Available since 5.1.
+man page). Note that this isn't always available for all commands. If used on
+a command that doesn't support fixed files, the SQE will error with
+.B -EBADF.
+Available since 5.1.
.TP
.B IOSQE_IO_DRAIN
When this flag is specified, the SQE will not be started before previously
@@ -814,7 +1067,7 @@ Used in conjunction with the
command, which registers a pool of buffers to be used by commands that read
or receive data. When buffers are registered for this use case, and this
flag is set in the command, io_uring will grab a buffer from this pool when
-the request is ready to receive or read data. If succesful, the resulting CQE
+the request is ready to receive or read data. If successful, the resulting CQE
will have
.B IORING_CQE_F_BUFFER
set in the flags part of the struct, and the upper
@@ -918,7 +1171,11 @@ is copied from the field of the same name in the submission queue
entry. The primary use case is to store data that the application
will need to access upon completion of this particular I/O. The
.I flags
-is reserved for future use.
+is used for certain commands, like
+.B IORING_OP_POLL_ADD
+or in conjunction with
+.B IOSQE_BUFFER_SELECT
+, see those entries.
.I res
is the operation-specific result, but io_uring-specific errors
(e.g. flags or opcode invalid) are returned through this field.
@@ -926,20 +1183,34 @@ They are described in section
.B CQE ERRORS.
.PP
For read and write opcodes, the
-return values match those documented in the
+return values match
+.I errno
+values documented in the
.BR preadv2 (2)
and
.BR pwritev2 (2)
-man pages.
-Return codes for the io_uring-specific opcodes are documented in the
-description of the opcodes above.
+man pages, with
+.I
+res
+holding the equivalent of
+.I -errno
+for error cases, or the transferred number of bytes in case the operation
+is successful. Hence both error and success return can be found in that
+field in the CQE. For other request types, the return values are documented
+in the matching man page for that type, or in the opcodes section above for
+io_uring-specific opcodes.
.PP
.SH RETURN VALUE
.BR io_uring_enter ()
returns the number of I/Os successfully consumed. This can be zero
if
.I to_submit
-was zero or if the submission queue was empty.
+was zero or if the submission queue was empty. Note that if the ring was
+created with
+.B IORING_SETUP_SQPOLL
+specified, then the return value will generally be the same as
+.I to_submit
+as submission happens outside the context of the system call.
The errors related to a submission queue entry will be returned through a
completion queue entry (see section
diff --git a/man/io_uring_register.2 b/man/io_uring_register.2
index 5326a87..887567f 100644
--- a/man/io_uring_register.2
+++ b/man/io_uring_register.2
@@ -88,14 +88,107 @@ then issuing a new call to
.BR io_uring_register ()
with the new buffers.
-Note that registering buffers will wait for the ring to idle. If the application
-currently has requests in-flight, the registration will wait for those to
-finish before proceeding.
+Note that before 5.13 registering buffers would wait for the ring to idle.
+If the application currently has requests in-flight, the registration will
+wait for those to finish before proceeding.
An application need not unregister buffers explicitly before shutting
down the io_uring instance. Available since 5.1.
.TP
+.B IORING_REGISTER_BUFFERS2
+Register buffers for I/O. Similar to
+.B IORING_REGISTER_BUFFERS
+but aims to have a more extensible ABI.
+
+.I arg
+points to a
+.I struct io_uring_rsrc_register,
+and
+.I nr_args
+should be set to the number of bytes in the structure.
+
+.PP
+.in +8n
+.EX
+struct io_uring_rsrc_register {
+ __u32 nr;
+ __u32 resv;
+ __u64 resv2;
+ __aligned_u64 data;
+ __aligned_u64 tags;
+};
+
+.EE
+.in
+.PP
+
+.in +8n
+
+The
+.I data
+field contains a pointer to a
+.I struct iovec
+array of
+.I nr
+entries.
+The
+.I tags
+field should either be 0, then tagging is disabled, or point to an array
+of
+.I nr
+"tags" (unsigned 64 bit integers). If a tag is zero, then tagging for this
+particular resource (a buffer in this case) is disabled. Otherwise, after the
+resource had been unregistered and it's not used anymore, a CQE will be
+posted with
+.I user_data
+set to the specified tag and all other fields zeroed.
+
+Note that resource updates, e.g.
+.B IORING_REGISTER_BUFFERS_UPDATE,
+don't necessarily deallocate resources by the time it returns, but they might
+be held alive until all requests using it complete.
+
+Available since 5.13.
+
+.TP
+.B IORING_REGISTER_BUFFERS_UPDATE
+Updates registered buffers with new ones, either turning a sparse entry into
+a real one, or replacing an existing entry.
+
+.I arg
+must contain a pointer to a struct io_uring_rsrc_update2, which contains
+an offset on which to start the update, and an array of
+.I struct iovec.
+.I tags
+points to an array of tags.
+.I nr
+must contain the number of descriptors in the passed in arrays.
+See
+.B IORING_REGISTER_BUFFERS2
+for the resource tagging description.
+
+.PP
+.in +8n
+.EX
+
+struct io_uring_rsrc_update2 {
+ __u32 offset;
+ __u32 resv;
+ __aligned_u64 data;
+ __aligned_u64 tags;
+ __u32 nr;
+ __u32 resv2;
+};
+.EE
+.in
+.PP
+
+.in +8n
+
+Available since 5.13.
+
+.TP
.B IORING_UNREGISTER_BUFFERS
This operation takes no argument, and
.I arg
@@ -128,17 +221,48 @@ See
.B IORING_REGISTER_FILES_UPDATE
for how to update files in place.
-Note that registering files will wait for the ring to idle. If the application
-currently has requests in-flight, the registration will wait for those to
-finish before proceeding. See
+Note that before 5.13 registering files would wait for the ring to idle.
+If the application currently has requests in-flight, the registration will
+wait for those to finish before proceeding. See
.B IORING_REGISTER_FILES_UPDATE
for how to update an existing set without that limitation.
Files are automatically unregistered when the io_uring instance is
-torn down. An application need only unregister if it wishes to
+torn down. An application needs only unregister if it wishes to
register a new set of fds. Available since 5.1.
.TP
+.B IORING_REGISTER_FILES2
+Register files for I/O. Similar to
+.B IORING_REGISTER_FILES.
+
+.I arg
+points to a
+.I struct io_uring_rsrc_register,
+and
+.I nr_args
+should be set to the number of bytes in the structure.
+
+The
+.I data
+field contains a pointer to an array of
+.I nr
+file descriptors (signed 32 bit integers).
+.I tags
+field should either be 0 or or point to an array of
+.I nr
+"tags" (unsigned 64 bit integers). See
+.B IORING_REGISTER_BUFFERS2
+for more info on resource tagging.
+
+Note that resource updates, e.g.
+.B IORING_REGISTER_FILES_UPDATE,
+don't necessarily deallocate resources, they might be held until all requests
+using that resource complete.
+
+Available since 5.13.
+
+.TP
.B IORING_REGISTER_FILES_UPDATE
This operation replaces existing files in the registered file set with new
ones, either turning a sparse entry (one where fd is equal to -1) into a
@@ -146,7 +270,9 @@ real one, removing an existing entry (new one is set to -1), or replacing
an existing entry with a new existing entry.
.I arg
-must contain a pointer to a struct io_uring_files_update, which contains
+must contain a pointer to a
+.I struct io_uring_files_update,
+which contains
an offset on which to start the update, and an array of file descriptors to
use for the update.
.I nr_args
@@ -158,6 +284,29 @@ File descriptors can be skipped if they are set to
Skipping an fd will not touch the file associated with the previous
fd at that index. Available since 5.12.
+.TP
+.B IORING_REGISTER_FILES_UPDATE2
+Similar to IORING_REGISTER_FILES_UPDATE, replaces existing files in the
+registered file set with new ones, either turning a sparse entry (one where
+fd is equal to -1) into a real one, removing an existing entry (new one is
+set to -1), or replacing an existing entry with a new existing entry.
+
+.I arg
+must contain a pointer to a
+.I struct io_uring_rsrc_update2,
+which contains
+an offset on which to start the update, and an array of file descriptors to
+use for the update stored in
+.I data.
+.I tags
+points to an array of tags.
+.I nr
+must contain the number of descriptors in the passed in arrays.
+See
+.B IORING_REGISTER_BUFFERS2
+for the resource tagging description.
+
+Available since 5.13.
.TP
.B IORING_UNREGISTER_FILES
@@ -292,6 +441,57 @@ must be specified in the call to
Available since 5.10.
+.TP
+.B IORING_REGISTER_IOWQ_AFF
+By default, async workers created by io_uring will inherit the CPU mask of its
+parent. This is usually all the CPUs in the system, unless the parent is being
+run with a limited set. If this isn't the desired outcome, the application
+may explicitly tell io_uring what CPUs the async workers may run on.
+.I arg
+must point to a
+.B cpu_set_t
+mask, and
+.I nr_args
+the byte size of that mask.
+
+Available since 5.14.
+
+.TP
+.B IORING_UNREGISTER_IOWQ_AFF
+Undoes a CPU mask previously set with
+.B IORING_REGISTER_IOWQ_AFF.
+Must not have
+.I arg
+or
+.I nr_args
+set.
+
+Available since 5.14.
+
+.TP
+.B IORING_REGISTER_IOWQ_MAX_WORKERS
+By default, io_uring limits the unbounded workers created to the maximum
+processor count set by
+.I RLIMIT_NPROC
+and the bounded workers is a function of the SQ ring size and the number
+of CPUs in the system. Sometimes this can be excessive (or too little, for
+bounded), and this command provides a way to change the count per ring (per NUMA
+node) instead.
+
+.I arg
+must be set to an
+.I unsigned int
+pointer to an array of two values, with the values in the array being set to
+the maximum count of workers per NUMA node. Index 0 holds the bounded worker
+count, and index 1 holds the unbounded worker count. On successful return, the
+passed in array will contain the previous maximum valyes for each type. If the
+count being passed in is 0, then this command returns the current maximum values
+and doesn't modify the current setting.
+.I nr_args
+must be set to 2, as the command takes two values.
+
+Available since 5.15.
+
.SH RETURN VALUE
On success,
diff --git a/man/io_uring_setup.2 b/man/io_uring_setup.2
index 3122313..88ce1f9 100644
--- a/man/io_uring_setup.2
+++ b/man/io_uring_setup.2
@@ -139,7 +139,10 @@ field of the
.IR "struct io_uring_params" .
This flag is only meaningful when
.B IORING_SETUP_SQPOLL
-is specified.
+is specified. When cgroup setting
+.I cpuset.cpus
+changes (typically in container environment), the bounded cpu set may be
+changed as well.
.TP
.B IORING_SETUP_CQSIZE
Create the completion queue with
@@ -199,22 +202,23 @@ If this flag is set, the two SQ and CQ rings can be mapped with a single
.I mmap(2)
call. The SQEs must still be allocated separately. This brings the necessary
.I mmap(2)
-calls down from three to two.
+calls down from three to two. Available since kernel 5.4.
.TP
.B IORING_FEAT_NODROP
If this flag is set, io_uring supports never dropping completion events.
If a completion event occurs and the CQ ring is full, the kernel stores
the event internally until such a time that the CQ ring has room for more
entries. If this overflow condition is entered, attempting to submit more
-IO with fail with the
+IO will fail with the
.B -EBUSY
error value, if it can't flush the overflown events to the CQ ring. If this
happens, the application must reap events from the CQ ring and attempt the
-submit again.
+submit again. Available since kernel 5.5.
.TP
.B IORING_FEAT_SUBMIT_STABLE
If this flag is set, applications can be certain that any data for
-async offload has been consumed when the kernel has consumed the SQE.
+async offload has been consumed when the kernel has consumed the SQE. Available
+since kernel 5.5.
.TP
.B IORING_FEAT_RW_CUR_POS
If this flag is set, applications can specify
@@ -234,7 +238,8 @@ with
== -1. It'll use (and update) the current file position. This obviously comes
with the caveat that if the application has multiple reads or writes in flight,
then the end result will not be as expected. This is similar to threads sharing
-a file descriptor and doing IO using the current file position.
+a file descriptor and doing IO using the current file position. Available since
+kernel 5.6.
.TP
.B IORING_FEAT_CUR_PERSONALITY
If this flag is set, then io_uring guarantees that both sync and async
@@ -250,7 +255,7 @@ still register different personalities through
io_uring_register(2)
with
.B IORING_REGISTER_PERSONALITY
-and specify the personality to use in the sqe.
+and specify the personality to use in the sqe. Available since kernel 5.6.
.TP
.B IORING_FEAT_FAST_POLL
If this flag is set, then io_uring supports using an internal poll mechanism
@@ -259,20 +264,62 @@ write data to a file no longer need to be punted to an async thread for
handling, instead they will begin operation when the file is ready. This is
similar to doing poll + read/write in userspace, but eliminates the need to do
so. If this flag is set, requests waiting on space/data consume a lot less
-resources doing so as they are not blocking a thread.
+resources doing so as they are not blocking a thread. Available since kernel
+5.7.
.TP
.B IORING_FEAT_POLL_32BITS
If this flag is set, the
.B IORING_OP_POLL_ADD
command accepts the full 32-bit range of epoll based flags. Most notably
.B EPOLLEXCLUSIVE
-which allows exclusive (waking single waiters) behavior.
+which allows exclusive (waking single waiters) behavior. Available since kernel
+5.9.
.TP
.B IORING_FEAT_SQPOLL_NONFIXED
If this flag is set, the
.B IORING_SETUP_SQPOLL
feature no longer requires the use of fixed files. Any normal file descriptor
-can be used for IO commands without needing registration.
+can be used for IO commands without needing registration. Available since
+kernel 5.11.
+.TP
+.B IORING_FEAT_ENTER_EXT_ARG
+If this flag is set, then the
+.BR io_uring_enter (2)
+system call supports passing in an extended argument instead of just the
+.IR "sigset_t"
+of earlier kernels. This.
+extended argument is of type
+.IR "struct io_uring_getevents_arg"
+and allows the caller to pass in both a
+.IR "sigset_t"
+and a timeout argument for waiting on events. The struct layout is as follows:
+.TP
+.in +8n
+.EX
+struct io_uring_getevents_arg {
+ __u64 sigmask;
+ __u32 sigmask_sz;
+ __u32 pad;
+ __u64 ts;
+};
+.EE
+
+and a pointer to this struct must be passed in if
+.B IORING_ENTER_EXT_ARG
+is set in the flags for the enter system call. Available since kernel 5.11.
+.TP
+.B IORING_FEAT_NATIVE_WORKERS
+If this flag is set, io_uring is using native workers for its async helpers.
+Previous kernels used kernel threads that assumed the identity of the
+original io_uring owning task, but later kernels will actively create what
+looks more like regular process threads instead. Available since kernel
+5.12.
+.TP
+.B IORING_FEAT_RSRC_TAGS
+If this flag is set, then io_uring supports a variety of features related
+to fixed files and buffers. In particular, it indicates that registered
+buffers can be updated in-place, whereas before the full set would have to
+be unregistered first. Available since kernel 5.13.
.PP
The rest of the fields in the
diff --git a/src/Makefile b/src/Makefile
index dfca826..5e46a9d 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -4,9 +4,10 @@ libdir ?= $(prefix)/lib
libdevdir ?= $(prefix)/lib
CPPFLAGS ?=
-override CPPFLAGS += -Iinclude/ -include ../config-host.h
-CFLAGS ?= -g -fomit-frame-pointer -O2
-override CFLAGS += -Wall -Wextra -Wno-unused-parameter -Wno-sign-compare
+override CPPFLAGS += -D_GNU_SOURCE \
+ -Iinclude/ -include ../config-host.h
+CFLAGS ?= -g -fomit-frame-pointer -O2 -Wall -Wextra
+override CFLAGS += -Wno-unused-parameter -Wno-sign-compare
SO_CFLAGS=-fPIC $(CFLAGS)
L_CFLAGS=$(CFLAGS)
LINK_FLAGS=
@@ -14,7 +15,7 @@ LINK_FLAGS+=$(LDFLAGS)
ENABLE_SHARED ?= 1
soname=liburing.so.2
-minor=0
+minor=1
micro=0
libname=$(soname).$(minor).$(micro)
all_targets += liburing.a
diff --git a/src/include/liburing.h b/src/include/liburing.h
index d3f8f91..38dbbfe 100644
--- a/src/include/liburing.h
+++ b/src/include/liburing.h
@@ -14,6 +14,7 @@
#include <stdbool.h>
#include <inttypes.h>
#include <time.h>
+#include <sched.h>
#include <linux/swab.h>
#include "liburing/compat.h"
#include "liburing/io_uring.h"
@@ -86,15 +87,15 @@ struct io_uring {
* return an allocated io_uring_probe structure, or NULL if probe fails (for
* example, if it is not available). The caller is responsible for freeing it
*/
-extern struct io_uring_probe *io_uring_get_probe_ring(struct io_uring *ring);
+struct io_uring_probe *io_uring_get_probe_ring(struct io_uring *ring);
/* same as io_uring_get_probe_ring, but takes care of ring init and teardown */
-extern struct io_uring_probe *io_uring_get_probe(void);
+struct io_uring_probe *io_uring_get_probe(void);
/*
* frees a probe allocated through io_uring_get_probe() or
* io_uring_get_probe_ring()
*/
-extern void io_uring_free_probe(struct io_uring_probe *probe);
+void io_uring_free_probe(struct io_uring_probe *probe);
static inline int io_uring_opcode_supported(const struct io_uring_probe *p, int op)
{
@@ -103,54 +104,73 @@ static inline int io_uring_opcode_supported(const struct io_uring_probe *p, int
return (p->ops[op].flags & IO_URING_OP_SUPPORTED) != 0;
}
-extern int io_uring_queue_init_params(unsigned entries, struct io_uring *ring,
- struct io_uring_params *p);
-extern int io_uring_queue_init(unsigned entries, struct io_uring *ring,
- unsigned flags);
-extern int io_uring_queue_mmap(int fd, struct io_uring_params *p,
- struct io_uring *ring);
-extern int io_uring_ring_dontfork(struct io_uring *ring);
-extern void io_uring_queue_exit(struct io_uring *ring);
+int io_uring_queue_init_params(unsigned entries, struct io_uring *ring,
+ struct io_uring_params *p);
+int io_uring_queue_init(unsigned entries, struct io_uring *ring,
+ unsigned flags);
+int io_uring_queue_mmap(int fd, struct io_uring_params *p,
+ struct io_uring *ring);
+int io_uring_ring_dontfork(struct io_uring *ring);
+void io_uring_queue_exit(struct io_uring *ring);
unsigned io_uring_peek_batch_cqe(struct io_uring *ring,
struct io_uring_cqe **cqes, unsigned count);
-extern int io_uring_wait_cqes(struct io_uring *ring,
- struct io_uring_cqe **cqe_ptr, unsigned wait_nr,
- struct __kernel_timespec *ts, sigset_t *sigmask);
-extern int io_uring_wait_cqe_timeout(struct io_uring *ring,
- struct io_uring_cqe **cqe_ptr, struct __kernel_timespec *ts);
-extern int io_uring_submit(struct io_uring *ring);
-extern int io_uring_submit_and_wait(struct io_uring *ring, unsigned wait_nr);
-extern struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring);
-
-extern int io_uring_register_buffers(struct io_uring *ring,
- const struct iovec *iovecs,
- unsigned nr_iovecs);
-extern int io_uring_unregister_buffers(struct io_uring *ring);
-extern int io_uring_register_files(struct io_uring *ring, const int *files,
- unsigned nr_files);
-extern int io_uring_unregister_files(struct io_uring *ring);
-extern int io_uring_register_files_update(struct io_uring *ring, unsigned off,
- int *files, unsigned nr_files);
-extern int io_uring_register_eventfd(struct io_uring *ring, int fd);
-extern int io_uring_register_eventfd_async(struct io_uring *ring, int fd);
-extern int io_uring_unregister_eventfd(struct io_uring *ring);
-extern int io_uring_register_probe(struct io_uring *ring,
- struct io_uring_probe *p, unsigned nr);
-extern int io_uring_register_personality(struct io_uring *ring);
-extern int io_uring_unregister_personality(struct io_uring *ring, int id);
-extern int io_uring_register_restrictions(struct io_uring *ring,
- struct io_uring_restriction *res,
- unsigned int nr_res);
-extern int io_uring_enable_rings(struct io_uring *ring);
-extern int __io_uring_sqring_wait(struct io_uring *ring);
+int io_uring_wait_cqes(struct io_uring *ring, struct io_uring_cqe **cqe_ptr,
+ unsigned wait_nr, struct __kernel_timespec *ts,
+ sigset_t *sigmask);
+int io_uring_wait_cqe_timeout(struct io_uring *ring,
+ struct io_uring_cqe **cqe_ptr,
+ struct __kernel_timespec *ts);
+int io_uring_submit(struct io_uring *ring);
+int io_uring_submit_and_wait(struct io_uring *ring, unsigned wait_nr);
+struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring);
+
+int io_uring_register_buffers(struct io_uring *ring, const struct iovec *iovecs,
+ unsigned nr_iovecs);
+int io_uring_register_buffers_tags(struct io_uring *ring,
+ const struct iovec *iovecs,
+ const __u64 *tags, unsigned nr);
+int io_uring_register_buffers_update_tag(struct io_uring *ring,
+ unsigned off,
+ const struct iovec *iovecs,
+ const __u64 *tags, unsigned nr);
+int io_uring_unregister_buffers(struct io_uring *ring);
+
+int io_uring_register_files(struct io_uring *ring, const int *files,
+ unsigned nr_files);
+int io_uring_register_files_tags(struct io_uring *ring, const int *files,
+ const __u64 *tags, unsigned nr);
+int io_uring_register_files_update_tag(struct io_uring *ring, unsigned off,
+ const int *files, const __u64 *tags,
+ unsigned nr_files);
+
+int io_uring_unregister_files(struct io_uring *ring);
+int io_uring_register_files_update(struct io_uring *ring, unsigned off,
+ int *files, unsigned nr_files);
+int io_uring_register_eventfd(struct io_uring *ring, int fd);
+int io_uring_register_eventfd_async(struct io_uring *ring, int fd);
+int io_uring_unregister_eventfd(struct io_uring *ring);
+int io_uring_register_probe(struct io_uring *ring, struct io_uring_probe *p,
+ unsigned nr);
+int io_uring_register_personality(struct io_uring *ring);
+int io_uring_unregister_personality(struct io_uring *ring, int id);
+int io_uring_register_restrictions(struct io_uring *ring,
+ struct io_uring_restriction *res,
+ unsigned int nr_res);
+int io_uring_enable_rings(struct io_uring *ring);
+int __io_uring_sqring_wait(struct io_uring *ring);
+int io_uring_register_iowq_aff(struct io_uring *ring, size_t cpusz,
+ const cpu_set_t *mask);
+int io_uring_unregister_iowq_aff(struct io_uring *ring);
+int io_uring_register_iowq_max_workers(struct io_uring *ring,
+ unsigned int *values);
/*
* Helper for the peek/wait single cqe functions. Exported because of that,
* but probably shouldn't be used directly in an application.
*/
-extern int __io_uring_get_cqe(struct io_uring *ring,
- struct io_uring_cqe **cqe_ptr, unsigned submit,
- unsigned wait_nr, sigset_t *sigmask);
+int __io_uring_get_cqe(struct io_uring *ring,
+ struct io_uring_cqe **cqe_ptr, unsigned submit,
+ unsigned wait_nr, sigset_t *sigmask);
#define LIBURING_UDATA_TIMEOUT ((__u64) -1)
@@ -208,14 +228,21 @@ static inline void *io_uring_cqe_get_data(const struct io_uring_cqe *cqe)
static inline void io_uring_sqe_set_flags(struct io_uring_sqe *sqe,
unsigned flags)
{
- sqe->flags = flags;
+ sqe->flags = (__u8) flags;
+}
+
+static inline void __io_uring_set_target_fixed_file(struct io_uring_sqe *sqe,
+ unsigned int file_index)
+{
+ /* 0 means no fixed files, indexes should be encoded as "index + 1" */
+ sqe->file_index = file_index + 1;
}
static inline void io_uring_prep_rw(int op, struct io_uring_sqe *sqe, int fd,
const void *addr, unsigned len,
__u64 offset)
{
- sqe->opcode = op;
+ sqe->opcode = (__u8) op;
sqe->flags = 0;
sqe->ioprio = 0;
sqe->fd = fd;
@@ -224,7 +251,10 @@ static inline void io_uring_prep_rw(int op, struct io_uring_sqe *sqe, int fd,
sqe->len = len;
sqe->rw_flags = 0;
sqe->user_data = 0;
- sqe->__pad2[0] = sqe->__pad2[1] = sqe->__pad2[2] = 0;
+ sqe->buf_index = 0;
+ sqe->personality = 0;
+ sqe->file_index = 0;
+ sqe->__pad2[0] = sqe->__pad2[1] = 0;
}
/**
@@ -252,8 +282,9 @@ static inline void io_uring_prep_splice(struct io_uring_sqe *sqe,
unsigned int nbytes,
unsigned int splice_flags)
{
- io_uring_prep_rw(IORING_OP_SPLICE, sqe, fd_out, NULL, nbytes, off_out);
- sqe->splice_off_in = off_in;
+ io_uring_prep_rw(IORING_OP_SPLICE, sqe, fd_out, NULL, nbytes,
+ (__u64) off_out);
+ sqe->splice_off_in = (__u64) off_in;
sqe->splice_fd_in = fd_in;
sqe->splice_flags = splice_flags;
}
@@ -271,32 +302,32 @@ static inline void io_uring_prep_tee(struct io_uring_sqe *sqe,
static inline void io_uring_prep_readv(struct io_uring_sqe *sqe, int fd,
const struct iovec *iovecs,
- unsigned nr_vecs, off_t offset)
+ unsigned nr_vecs, __u64 offset)
{
io_uring_prep_rw(IORING_OP_READV, sqe, fd, iovecs, nr_vecs, offset);
}
static inline void io_uring_prep_read_fixed(struct io_uring_sqe *sqe, int fd,
void *buf, unsigned nbytes,
- off_t offset, int buf_index)
+ __u64 offset, int buf_index)
{
io_uring_prep_rw(IORING_OP_READ_FIXED, sqe, fd, buf, nbytes, offset);
- sqe->buf_index = buf_index;
+ sqe->buf_index = (__u16) buf_index;
}
static inline void io_uring_prep_writev(struct io_uring_sqe *sqe, int fd,
const struct iovec *iovecs,
- unsigned nr_vecs, off_t offset)
+ unsigned nr_vecs, __u64 offset)
{
io_uring_prep_rw(IORING_OP_WRITEV, sqe, fd, iovecs, nr_vecs, offset);
}
static inline void io_uring_prep_write_fixed(struct io_uring_sqe *sqe, int fd,
const void *buf, unsigned nbytes,
- off_t offset, int buf_index)
+ __u64 offset, int buf_index)
{
io_uring_prep_rw(IORING_OP_WRITE_FIXED, sqe, fd, buf, nbytes, offset);
- sqe->buf_index = buf_index;
+ sqe->buf_index = (__u16) buf_index;
}
static inline void io_uring_prep_recvmsg(struct io_uring_sqe *sqe, int fd,
@@ -313,14 +344,26 @@ static inline void io_uring_prep_sendmsg(struct io_uring_sqe *sqe, int fd,
sqe->msg_flags = flags;
}
-static inline void io_uring_prep_poll_add(struct io_uring_sqe *sqe, int fd,
- unsigned poll_mask)
+static inline unsigned __io_uring_prep_poll_mask(unsigned poll_mask)
{
- io_uring_prep_rw(IORING_OP_POLL_ADD, sqe, fd, NULL, 0, 0);
#if __BYTE_ORDER == __BIG_ENDIAN
poll_mask = __swahw32(poll_mask);
#endif
- sqe->poll32_events = poll_mask;
+ return poll_mask;
+}
+
+static inline void io_uring_prep_poll_add(struct io_uring_sqe *sqe, int fd,
+ unsigned poll_mask)
+{
+ io_uring_prep_rw(IORING_OP_POLL_ADD, sqe, fd, NULL, 0, 0);
+ sqe->poll32_events = __io_uring_prep_poll_mask(poll_mask);
+}
+
+static inline void io_uring_prep_poll_multishot(struct io_uring_sqe *sqe,
+ int fd, unsigned poll_mask)
+{
+ io_uring_prep_poll_add(sqe, fd, poll_mask);
+ sqe->len = IORING_POLL_ADD_MULTI;
}
static inline void io_uring_prep_poll_remove(struct io_uring_sqe *sqe,
@@ -335,11 +378,8 @@ static inline void io_uring_prep_poll_update(struct io_uring_sqe *sqe,
unsigned poll_mask, unsigned flags)
{
io_uring_prep_rw(IORING_OP_POLL_REMOVE, sqe, -1, old_user_data, flags,
- (__u64)new_user_data);
-#if __BYTE_ORDER == __BIG_ENDIAN
- poll_mask = __swahw32(poll_mask);
-#endif
- sqe->poll32_events = poll_mask;
+ (__u64)(uintptr_t)new_user_data);
+ sqe->poll32_events = __io_uring_prep_poll_mask(poll_mask);
}
static inline void io_uring_prep_fsync(struct io_uring_sqe *sqe, int fd,
@@ -386,14 +426,24 @@ static inline void io_uring_prep_accept(struct io_uring_sqe *sqe, int fd,
{
io_uring_prep_rw(IORING_OP_ACCEPT, sqe, fd, addr, 0,
(__u64) (unsigned long) addrlen);
- sqe->accept_flags = flags;
+ sqe->accept_flags = (__u32) flags;
+}
+
+/* accept directly into the fixed file table */
+static inline void io_uring_prep_accept_direct(struct io_uring_sqe *sqe, int fd,
+ struct sockaddr *addr,
+ socklen_t *addrlen, int flags,
+ unsigned int file_index)
+{
+ io_uring_prep_accept(sqe, fd, addr, addrlen, flags);
+ __io_uring_set_target_fixed_file(sqe, file_index);
}
static inline void io_uring_prep_cancel(struct io_uring_sqe *sqe, void *user_data,
int flags)
{
io_uring_prep_rw(IORING_OP_ASYNC_CANCEL, sqe, -1, user_data, 0, 0);
- sqe->cancel_flags = flags;
+ sqe->cancel_flags = (__u32) flags;
}
static inline void io_uring_prep_link_timeout(struct io_uring_sqe *sqe,
@@ -415,7 +465,8 @@ static inline void io_uring_prep_files_update(struct io_uring_sqe *sqe,
int *fds, unsigned nr_fds,
int offset)
{
- io_uring_prep_rw(IORING_OP_FILES_UPDATE, sqe, -1, fds, nr_fds, offset);
+ io_uring_prep_rw(IORING_OP_FILES_UPDATE, sqe, -1, fds, nr_fds,
+ (__u64) offset);
}
static inline void io_uring_prep_fallocate(struct io_uring_sqe *sqe, int fd,
@@ -423,29 +474,41 @@ static inline void io_uring_prep_fallocate(struct io_uring_sqe *sqe, int fd,
{
io_uring_prep_rw(IORING_OP_FALLOCATE, sqe, fd,
- (const uintptr_t *) (unsigned long) len, mode, offset);
+ (const uintptr_t *) (unsigned long) len,
+ (unsigned int) mode, (__u64) offset);
}
static inline void io_uring_prep_openat(struct io_uring_sqe *sqe, int dfd,
const char *path, int flags, mode_t mode)
{
io_uring_prep_rw(IORING_OP_OPENAT, sqe, dfd, path, mode, 0);
- sqe->open_flags = flags;
+ sqe->open_flags = (__u32) flags;
}
+/* open directly into the fixed file table */
+static inline void io_uring_prep_openat_direct(struct io_uring_sqe *sqe,
+ int dfd, const char *path,
+ int flags, mode_t mode,
+ unsigned file_index)
+{
+ io_uring_prep_openat(sqe, dfd, path, flags, mode);
+ __io_uring_set_target_fixed_file(sqe, file_index);
+}
+
+
static inline void io_uring_prep_close(struct io_uring_sqe *sqe, int fd)
{
io_uring_prep_rw(IORING_OP_CLOSE, sqe, fd, NULL, 0, 0);
}
static inline void io_uring_prep_read(struct io_uring_sqe *sqe, int fd,
- void *buf, unsigned nbytes, off_t offset)
+ void *buf, unsigned nbytes, __u64 offset)
{
io_uring_prep_rw(IORING_OP_READ, sqe, fd, buf, nbytes, offset);
}
static inline void io_uring_prep_write(struct io_uring_sqe *sqe, int fd,
- const void *buf, unsigned nbytes, off_t offset)
+ const void *buf, unsigned nbytes, __u64 offset)
{
io_uring_prep_rw(IORING_OP_WRITE, sqe, fd, buf, nbytes, offset);
}
@@ -457,35 +520,35 @@ static inline void io_uring_prep_statx(struct io_uring_sqe *sqe, int dfd,
{
io_uring_prep_rw(IORING_OP_STATX, sqe, dfd, path, mask,
(__u64) (unsigned long) statxbuf);
- sqe->statx_flags = flags;
+ sqe->statx_flags = (__u32) flags;
}
static inline void io_uring_prep_fadvise(struct io_uring_sqe *sqe, int fd,
- off_t offset, off_t len, int advice)
+ __u64 offset, off_t len, int advice)
{
- io_uring_prep_rw(IORING_OP_FADVISE, sqe, fd, NULL, len, offset);
- sqe->fadvise_advice = advice;
+ io_uring_prep_rw(IORING_OP_FADVISE, sqe, fd, NULL, (__u32) len, offset);
+ sqe->fadvise_advice = (__u32) advice;
}
static inline void io_uring_prep_madvise(struct io_uring_sqe *sqe, void *addr,
off_t length, int advice)
{
- io_uring_prep_rw(IORING_OP_MADVISE, sqe, -1, addr, length, 0);
- sqe->fadvise_advice = advice;
+ io_uring_prep_rw(IORING_OP_MADVISE, sqe, -1, addr, (__u32) length, 0);
+ sqe->fadvise_advice = (__u32) advice;
}
static inline void io_uring_prep_send(struct io_uring_sqe *sqe, int sockfd,
const void *buf, size_t len, int flags)
{
- io_uring_prep_rw(IORING_OP_SEND, sqe, sockfd, buf, len, 0);
- sqe->msg_flags = flags;
+ io_uring_prep_rw(IORING_OP_SEND, sqe, sockfd, buf, (__u32) len, 0);
+ sqe->msg_flags = (__u32) flags;
}
static inline void io_uring_prep_recv(struct io_uring_sqe *sqe, int sockfd,
void *buf, size_t len, int flags)
{
- io_uring_prep_rw(IORING_OP_RECV, sqe, sockfd, buf, len, 0);
- sqe->msg_flags = flags;
+ io_uring_prep_rw(IORING_OP_RECV, sqe, sockfd, buf, (__u32) len, 0);
+ sqe->msg_flags = (__u32) flags;
}
static inline void io_uring_prep_openat2(struct io_uring_sqe *sqe, int dfd,
@@ -495,57 +558,91 @@ static inline void io_uring_prep_openat2(struct io_uring_sqe *sqe, int dfd,
(uint64_t) (uintptr_t) how);
}
+/* open directly into the fixed file table */
+static inline void io_uring_prep_openat2_direct(struct io_uring_sqe *sqe,
+ int dfd, const char *path,
+ struct open_how *how,
+ unsigned file_index)
+{
+ io_uring_prep_openat2(sqe, dfd, path, how);
+ __io_uring_set_target_fixed_file(sqe, file_index);
+}
+
struct epoll_event;
static inline void io_uring_prep_epoll_ctl(struct io_uring_sqe *sqe, int epfd,
int fd, int op,
struct epoll_event *ev)
{
- io_uring_prep_rw(IORING_OP_EPOLL_CTL, sqe, epfd, ev, op, fd);
+ io_uring_prep_rw(IORING_OP_EPOLL_CTL, sqe, epfd, ev,
+ (__u32) op, (__u32) fd);
}
static inline void io_uring_prep_provide_buffers(struct io_uring_sqe *sqe,
void *addr, int len, int nr,
int bgid, int bid)
{
- io_uring_prep_rw(IORING_OP_PROVIDE_BUFFERS, sqe, nr, addr, len, bid);
- sqe->buf_group = bgid;
+ io_uring_prep_rw(IORING_OP_PROVIDE_BUFFERS, sqe, nr, addr, (__u32) len,
+ (__u64) bid);
+ sqe->buf_group = (__u16) bgid;
}
static inline void io_uring_prep_remove_buffers(struct io_uring_sqe *sqe,
int nr, int bgid)
{
io_uring_prep_rw(IORING_OP_REMOVE_BUFFERS, sqe, nr, NULL, 0, 0);
- sqe->buf_group = bgid;
+ sqe->buf_group = (__u16) bgid;
}
static inline void io_uring_prep_shutdown(struct io_uring_sqe *sqe, int fd,
int how)
{
- io_uring_prep_rw(IORING_OP_SHUTDOWN, sqe, fd, NULL, how, 0);
+ io_uring_prep_rw(IORING_OP_SHUTDOWN, sqe, fd, NULL, (__u32) how, 0);
}
static inline void io_uring_prep_unlinkat(struct io_uring_sqe *sqe, int dfd,
const char *path, int flags)
{
io_uring_prep_rw(IORING_OP_UNLINKAT, sqe, dfd, path, 0, 0);
- sqe->unlink_flags = flags;
+ sqe->unlink_flags = (__u32) flags;
}
static inline void io_uring_prep_renameat(struct io_uring_sqe *sqe, int olddfd,
const char *oldpath, int newdfd,
const char *newpath, int flags)
{
- io_uring_prep_rw(IORING_OP_RENAMEAT, sqe, olddfd, oldpath, newdfd,
+ io_uring_prep_rw(IORING_OP_RENAMEAT, sqe, olddfd, oldpath, (__u32) newdfd,
(uint64_t) (uintptr_t) newpath);
- sqe->rename_flags = flags;
+ sqe->rename_flags = (__u32) flags;
}
static inline void io_uring_prep_sync_file_range(struct io_uring_sqe *sqe,
int fd, unsigned len,
- off_t offset, int flags)
+ __u64 offset, int flags)
{
io_uring_prep_rw(IORING_OP_SYNC_FILE_RANGE, sqe, fd, NULL, len, offset);
- sqe->sync_range_flags = flags;
+ sqe->sync_range_flags = (__u32) flags;
+}
+
+static inline void io_uring_prep_mkdirat(struct io_uring_sqe *sqe, int dfd,
+ const char *path, mode_t mode)
+{
+ io_uring_prep_rw(IORING_OP_MKDIRAT, sqe, dfd, path, mode, 0);
+}
+
+static inline void io_uring_prep_symlinkat(struct io_uring_sqe *sqe,
+ const char *target, int newdirfd, const char *linkpath)
+{
+ io_uring_prep_rw(IORING_OP_SYMLINKAT, sqe, newdirfd, target, 0,
+ (uint64_t) (uintptr_t) linkpath);
+}
+
+static inline void io_uring_prep_linkat(struct io_uring_sqe *sqe, int olddfd,
+ const char *oldpath, int newdfd,
+ const char *newpath, int flags)
+{
+ io_uring_prep_rw(IORING_OP_LINKAT, sqe, olddfd, oldpath, (__u32) newdfd,
+ (uint64_t) (uintptr_t) newpath);
+ sqe->hardlink_flags = (__u32) flags;
}
/*
diff --git a/src/include/liburing/io_uring.h b/src/include/liburing/io_uring.h
index eed991d..69f23a9 100644
--- a/src/include/liburing/io_uring.h
+++ b/src/include/liburing/io_uring.h
@@ -48,23 +48,23 @@ struct io_uring_sqe {
__u32 splice_flags;
__u32 rename_flags;
__u32 unlink_flags;
+ __u32 hardlink_flags;
};
__u64 user_data; /* data to be passed back at completion time */
+ /* pack this to avoid bogus arm OABI complaints */
union {
- struct {
- /* pack this to avoid bogus arm OABI complaints */
- union {
- /* index into fixed buffers, if used */
- __u16 buf_index;
- /* for grouped buffer selection */
- __u16 buf_group;
- } __attribute__((packed));
- /* personality to use, if used */
- __u16 personality;
- __s32 splice_fd_in;
- };
- __u64 __pad2[3];
+ /* index into fixed buffers, if used */
+ __u16 buf_index;
+ /* for grouped buffer selection */
+ __u16 buf_group;
+ } __attribute__((packed));
+ /* personality to use, if used */
+ __u16 personality;
+ union {
+ __s32 splice_fd_in;
+ __u32 file_index;
};
+ __u64 __pad2[2];
};
enum {
@@ -142,6 +142,8 @@ enum {
IORING_OP_RENAMEAT,
IORING_OP_UNLINKAT,
IORING_OP_MKDIRAT,
+ IORING_OP_SYMLINKAT,
+ IORING_OP_LINKAT,
/* this goes last, obviously */
IORING_OP_LAST,
@@ -155,9 +157,13 @@ enum {
/*
* sqe->timeout_flags
*/
-#define IORING_TIMEOUT_ABS (1U << 0)
-#define IORING_TIMEOUT_UPDATE (1U << 1)
-
+#define IORING_TIMEOUT_ABS (1U << 0)
+#define IORING_TIMEOUT_UPDATE (1U << 1)
+#define IORING_TIMEOUT_BOOTTIME (1U << 2)
+#define IORING_TIMEOUT_REALTIME (1U << 3)
+#define IORING_LINK_TIMEOUT_UPDATE (1U << 4)
+#define IORING_TIMEOUT_CLOCK_MASK (IORING_TIMEOUT_BOOTTIME | IORING_TIMEOUT_REALTIME)
+#define IORING_TIMEOUT_UPDATE_MASK (IORING_TIMEOUT_UPDATE | IORING_LINK_TIMEOUT_UPDATE)
/*
* sqe->splice_flags
* extends splice(2) flags
@@ -168,16 +174,16 @@ enum {
* POLL_ADD flags. Note that since sqe->poll_events is the flag space, the
* command flags for POLL_ADD are stored in sqe->len.
*
- * IORING_POLL_ADD_MULTI Multishot poll. Sets IORING_CQE_F_MORE if
- * the poll handler will continue to report
- * CQEs on behalf of the same SQE.
+ * IORING_POLL_ADD_MULTI Multishot poll. Sets IORING_CQE_F_MORE if
+ * the poll handler will continue to report
+ * CQEs on behalf of the same SQE.
*
- * IORING_POLL_UPDATE Update existing poll request, matching
- * sqe->addr as the old user_data field.
+ * IORING_POLL_UPDATE Update existing poll request, matching
+ * sqe->addr as the old user_data field.
*/
-#define IORING_POLL_ADD_MULTI (1U << 0)
-#define IORING_POLL_UPDATE_EVENTS (1U << 1)
-#define IORING_POLL_UPDATE_USER_DATA (1U << 2)
+#define IORING_POLL_ADD_MULTI (1U << 0)
+#define IORING_POLL_UPDATE_EVENTS (1U << 1)
+#define IORING_POLL_UPDATE_USER_DATA (1U << 2)
/*
* IO completion data structure (Completion Queue Entry)
@@ -192,8 +198,10 @@ struct io_uring_cqe {
* cqe->flags
*
* IORING_CQE_F_BUFFER If set, the upper 16 bits are the buffer ID
+ * IORING_CQE_F_MORE If set, parent SQE will generate more CQE entries
*/
#define IORING_CQE_F_BUFFER (1U << 0)
+#define IORING_CQE_F_MORE (1U << 1)
enum {
IORING_CQE_BUFFER_SHIFT = 16,
@@ -283,6 +291,7 @@ struct io_uring_params {
#define IORING_FEAT_SQPOLL_NONFIXED (1U << 7)
#define IORING_FEAT_EXT_ARG (1U << 8)
#define IORING_FEAT_NATIVE_WORKERS (1U << 9)
+#define IORING_FEAT_RSRC_TAGS (1U << 10)
/*
* io_uring_register(2) opcodes and arguments
@@ -302,6 +311,19 @@ enum {
IORING_REGISTER_RESTRICTIONS = 11,
IORING_REGISTER_ENABLE_RINGS = 12,
+ /* extended with tagging */
+ IORING_REGISTER_FILES2 = 13,
+ IORING_REGISTER_FILES_UPDATE2 = 14,
+ IORING_REGISTER_BUFFERS2 = 15,
+ IORING_REGISTER_BUFFERS_UPDATE = 16,
+
+ /* set/clear io-wq thread affinities */
+ IORING_REGISTER_IOWQ_AFF = 17,
+ IORING_UNREGISTER_IOWQ_AFF = 18,
+
+ /* set/get max number of async workers */
+ IORING_REGISTER_IOWQ_MAX_WORKERS = 19,
+
/* this goes last */
IORING_REGISTER_LAST
};
@@ -313,12 +335,29 @@ struct io_uring_files_update {
__aligned_u64 /* __s32 * */ fds;
};
+struct io_uring_rsrc_register {
+ __u32 nr;
+ __u32 resv;
+ __u64 resv2;
+ __aligned_u64 data;
+ __aligned_u64 tags;
+};
+
struct io_uring_rsrc_update {
__u32 offset;
__u32 resv;
__aligned_u64 data;
};
+struct io_uring_rsrc_update2 {
+ __u32 offset;
+ __u32 resv;
+ __aligned_u64 data;
+ __aligned_u64 tags;
+ __u32 nr;
+ __u32 resv2;
+};
+
/* Skip updating fd indexes set to this value in the fd table */
#define IORING_REGISTER_FILES_SKIP (-2)
diff --git a/src/liburing.map b/src/liburing.map
index 012ac4e..6692a3b 100644
--- a/src/liburing.map
+++ b/src/liburing.map
@@ -36,4 +36,11 @@ LIBURING_2.1 {
global:
io_uring_mlock_size_params;
io_uring_mlock_size;
+ io_uring_register_buffers_tags;
+ io_uring_register_buffers_update_tag;
+ io_uring_register_files_tags;
+ io_uring_register_files_update_tag;
+ io_uring_register_iowq_aff;
+ io_uring_unregister_iowq_aff;
+ io_uring_register_iowq_max_workers;
} LIBURING_2.0;
diff --git a/src/queue.c b/src/queue.c
index ce5d237..5d2c2a5 100644
--- a/src/queue.c
+++ b/src/queue.c
@@ -198,23 +198,20 @@ int __io_uring_flush_sq(struct io_uring *ring)
{
struct io_uring_sq *sq = &ring->sq;
const unsigned mask = *sq->kring_mask;
- unsigned ktail, to_submit;
+ unsigned ktail = *sq->ktail;
+ unsigned to_submit = sq->sqe_tail - sq->sqe_head;
- if (sq->sqe_head == sq->sqe_tail) {
- ktail = *sq->ktail;
+ if (!to_submit)
goto out;
- }
/*
* Fill in sqes that we have queued up, adding them to the kernel ring
*/
- ktail = *sq->ktail;
- to_submit = sq->sqe_tail - sq->sqe_head;
- while (to_submit--) {
+ do {
sq->array[ktail & mask] = sq->sqe_head & mask;
ktail++;
sq->sqe_head++;
- }
+ } while (--to_submit);
/*
* Ensure that the kernel sees the SQE updates before it sees the tail
@@ -251,7 +248,6 @@ static int io_uring_wait_cqes_new(struct io_uring *ring,
.ts = (unsigned long) ts
};
struct get_data data = {
- .submit = __io_uring_flush_sq(ring),
.wait_nr = wait_nr,
.get_flags = IORING_ENTER_EXT_ARG,
.sz = sizeof(arg),
diff --git a/src/register.c b/src/register.c
index 994aaff..a4e8c37 100644
--- a/src/register.c
+++ b/src/register.c
@@ -14,6 +14,42 @@
#include "syscall.h"
+int io_uring_register_buffers_update_tag(struct io_uring *ring, unsigned off,
+ const struct iovec *iovecs,
+ const __u64 *tags,
+ unsigned nr)
+{
+ struct io_uring_rsrc_update2 up = {
+ .offset = off,
+ .data = (unsigned long)iovecs,
+ .tags = (unsigned long)tags,
+ .nr = nr,
+ };
+ int ret;
+
+ ret = __sys_io_uring_register(ring->ring_fd,
+ IORING_REGISTER_BUFFERS_UPDATE,
+ &up, sizeof(up));
+ return ret < 0 ? -errno : ret;
+}
+
+int io_uring_register_buffers_tags(struct io_uring *ring,
+ const struct iovec *iovecs,
+ const __u64 *tags,
+ unsigned nr)
+{
+ struct io_uring_rsrc_register reg = {
+ .nr = nr,
+ .data = (unsigned long)iovecs,
+ .tags = (unsigned long)tags,
+ };
+ int ret;
+
+ ret = __sys_io_uring_register(ring->ring_fd, IORING_REGISTER_BUFFERS2,
+ &reg, sizeof(reg));
+ return ret < 0 ? -errno : ret;
+}
+
int io_uring_register_buffers(struct io_uring *ring, const struct iovec *iovecs,
unsigned nr_iovecs)
{
@@ -39,6 +75,24 @@ int io_uring_unregister_buffers(struct io_uring *ring)
return 0;
}
+int io_uring_register_files_update_tag(struct io_uring *ring, unsigned off,
+ const int *files, const __u64 *tags,
+ unsigned nr_files)
+{
+ struct io_uring_rsrc_update2 up = {
+ .offset = off,
+ .data = (unsigned long)files,
+ .tags = (unsigned long)tags,
+ .nr = nr_files,
+ };
+ int ret;
+
+ ret = __sys_io_uring_register(ring->ring_fd,
+ IORING_REGISTER_FILES_UPDATE2,
+ &up, sizeof(up));
+ return ret < 0 ? -errno : ret;
+}
+
/*
* Register an update for an existing file set. The updates will start at
* 'off' in the original array, and 'nr_files' is the number of files we'll
@@ -64,6 +118,23 @@ int io_uring_register_files_update(struct io_uring *ring, unsigned off,
return ret;
}
+
+int io_uring_register_files_tags(struct io_uring *ring,
+ const int *files, const __u64 *tags,
+ unsigned nr)
+{
+ struct io_uring_rsrc_register reg = {
+ .nr = nr,
+ .data = (unsigned long)files,
+ .tags = (unsigned long)tags,
+ };
+ int ret;
+
+ ret = __sys_io_uring_register(ring->ring_fd, IORING_REGISTER_FILES2,
+ &reg, sizeof(reg));
+ return ret < 0 ? -errno : ret;
+}
+
int io_uring_register_files(struct io_uring *ring, const int *files,
unsigned nr_files)
{
@@ -187,3 +258,42 @@ int io_uring_enable_rings(struct io_uring *ring)
return ret;
}
+
+int io_uring_register_iowq_aff(struct io_uring *ring, size_t cpusz,
+ const cpu_set_t *mask)
+{
+ int ret;
+
+ ret = __sys_io_uring_register(ring->ring_fd,
+ IORING_REGISTER_IOWQ_AFF, mask, cpusz);
+ if (ret < 0)
+ return -errno;
+
+ return ret;
+}
+
+int io_uring_unregister_iowq_aff(struct io_uring *ring)
+{
+ int ret;
+
+ ret = __sys_io_uring_register(ring->ring_fd,
+ IORING_REGISTER_IOWQ_AFF, NULL, 0);
+ if (ret < 0)
+ return -errno;
+
+ return ret;
+}
+
+int io_uring_register_iowq_max_workers(struct io_uring *ring, unsigned int *val)
+{
+ int ret;
+
+ ret = __sys_io_uring_register(ring->ring_fd,
+ IORING_REGISTER_IOWQ_MAX_WORKERS,
+ val, 2);
+ if (ret < 0)
+ return -errno;
+
+ return ret;
+
+}
diff --git a/src/syscall.c b/src/syscall.c
index 2fd3dd4..69027e5 100644
--- a/src/syscall.c
+++ b/src/syscall.c
@@ -13,8 +13,8 @@
#ifdef __alpha__
/*
- * alpha is the only exception, all other architectures
- * have common numbers for new system calls.
+ * alpha and mips are exception, other architectures have
+ * common numbers for new system calls.
*/
# ifndef __NR_io_uring_setup
# define __NR_io_uring_setup 535
@@ -25,7 +25,17 @@
# ifndef __NR_io_uring_register
# define __NR_io_uring_register 537
# endif
-#else /* !__alpha__ */
+#elif defined __mips__
+# ifndef __NR_io_uring_setup
+# define __NR_io_uring_setup (__NR_Linux + 425)
+# endif
+# ifndef __NR_io_uring_enter
+# define __NR_io_uring_enter (__NR_Linux + 426)
+# endif
+# ifndef __NR_io_uring_register
+# define __NR_io_uring_register (__NR_Linux + 427)
+# endif
+#else /* !__alpha__ and !__mips__ */
# ifndef __NR_io_uring_setup
# define __NR_io_uring_setup 425
# endif
diff --git a/src/syscall.h b/src/syscall.h
index 3b94efc..2368f83 100644
--- a/src/syscall.h
+++ b/src/syscall.h
@@ -9,12 +9,12 @@ struct io_uring_params;
/*
* System calls
*/
-extern int __sys_io_uring_setup(unsigned entries, struct io_uring_params *p);
-extern int __sys_io_uring_enter(int fd, unsigned to_submit,
- unsigned min_complete, unsigned flags, sigset_t *sig);
-extern int __sys_io_uring_enter2(int fd, unsigned to_submit,
- unsigned min_complete, unsigned flags, sigset_t *sig, int sz);
-extern int __sys_io_uring_register(int fd, unsigned int opcode, const void *arg,
- unsigned int nr_args);
+int __sys_io_uring_setup(unsigned entries, struct io_uring_params *p);
+int __sys_io_uring_enter(int fd, unsigned to_submit, unsigned min_complete,
+ unsigned flags, sigset_t *sig);
+int __sys_io_uring_enter2(int fd, unsigned to_submit, unsigned min_complete,
+ unsigned flags, sigset_t *sig, int sz);
+int __sys_io_uring_register(int fd, unsigned int opcode, const void *arg,
+ unsigned int nr_args);
#endif
diff --git a/test/35fa71a030ca-test.c b/test/35fa71a030ca-test.c
index f5fcc4d..f83cc9d 100644
--- a/test/35fa71a030ca-test.c
+++ b/test/35fa71a030ca-test.c
@@ -321,6 +321,9 @@ int main(int argc, char *argv[])
return 0;
signal(SIGINT, sig_int);
mmap((void *) 0x20000000, 0x1000000, 3, 0x32, -1, 0);
+ signal(SIGALRM, sig_int);
+ alarm(5);
+
loop();
return 0;
}
diff --git a/test/Makefile b/test/Makefile
index 2a1985b..2936469 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -3,15 +3,28 @@ datadir ?= $(prefix)/share
INSTALL=install
+ifneq ($(MAKECMDGOALS),clean)
+include ../config-host.mak
+endif
+
CPPFLAGS ?=
override CPPFLAGS += -D_GNU_SOURCE -D__SANE_USERSPACE_TYPES__ \
-I../src/include/ -include ../config-host.h
-CFLAGS ?= -g -O2
-XCFLAGS =
-override CFLAGS += -Wall -Wextra -Wno-unused-parameter -Wno-sign-compare \
- -L../src/
-CXXFLAGS ?=
-override CXXFLAGS += $(CFLAGS) -std=c++11
+CFLAGS ?= -g -O2 -Wall -Wextra
+
+XCFLAGS = -Wno-unused-parameter -Wno-sign-compare
+ifdef CONFIG_HAVE_STRINGOP_OVERFLOW
+ XCFLAGS += -Wstringop-overflow=0
+endif
+ifdef CONFIG_HAVE_ARRAY_BOUNDS
+ XCFLAGS += -Warray-bounds=0
+endif
+
+CXXFLAGS ?= $(CFLAGS)
+override CFLAGS += $(XCFLAGS)
+override CXXFLAGS += $(XCFLAGS) -std=c++11
+LDFLAGS ?=
+override LDFLAGS += -L../src/ -luring
test_targets += \
232c93d07b74-test \
@@ -42,6 +55,7 @@ test_targets += \
defer \
double-poll-crash \
eeed8b54e0df-test \
+ empty-eownerdead \
eventfd \
eventfd-disable \
eventfd-ring \
@@ -49,11 +63,13 @@ test_targets += \
fallocate \
fc2a85cb02ef-test \
file-register \
+ file-verify \
file-update \
files-exit-hang-poll \
files-exit-hang-timeout \
fixed-link \
fsync \
+ hardlink \
io-cancel \
io_uring_enter \
io_uring_register \
@@ -65,6 +81,7 @@ test_targets += \
link-timeout \
link_drain \
madvise \
+ mkdir \
multicqes_drain \
nop \
nop-all-sizes \
@@ -103,10 +120,13 @@ test_targets += \
sq-poll-share \
sqpoll-disable-exit \
sqpoll-exit-hang \
+ sqpoll-cancel-hang \
sqpoll-sleep \
sq-space_left \
stdout \
submit-reuse \
+ submit-link-fail \
+ symlink \
teardowns \
thread-exit \
timeout \
@@ -115,16 +135,14 @@ test_targets += \
unlink \
wakeup-hang \
sendmsg_fs_cve \
+ rsrc_tags \
+ exec-target \
# EOL
all_targets += $(test_targets)
include ../Makefile.quiet
-ifneq ($(MAKECMDGOALS),clean)
-include ../config-host.mak
-endif
-
ifdef CONFIG_HAVE_STATX
test_targets += statx
endif
@@ -140,13 +158,13 @@ helpers = helpers.o
all: ${helpers} $(test_targets)
helpers.o: helpers.c helpers.c
- $(QUIET_CC)$(CC) $(CPPFLAGS) $(CFLAGS) -o $@ -c $< -luring
+ $(QUIET_CC)$(CC) $(CPPFLAGS) $(CFLAGS) -o $@ -c $<
%: %.c ${helpers} helpers.h
- $(QUIET_CC)$(CC) $(CPPFLAGS) $(CFLAGS) -o $@ $< ${helpers} -luring $(XCFLAGS)
+ $(QUIET_CC)$(CC) $(CPPFLAGS) $(CFLAGS) -o $@ $< ${helpers} $(LDFLAGS)
%: %.cc ${helpers} helpers.h
- $(QUIET_CXX)$(CXX) $(CPPFLAGS) $(CXXFLAGS) -o $@ $< ${helpers} -luring $(XCFLAGS)
+ $(QUIET_CXX)$(CXX) $(CPPFLAGS) $(CXXFLAGS) -o $@ $< ${helpers} $(LDFLAGS)
test_srcs := \
helpers.c \
@@ -178,6 +196,7 @@ test_srcs := \
defer.c \
double-poll-crash.c \
eeed8b54e0df-test.c \
+ empty-eownerdead.c \
eventfd-disable.c \
eventfd-ring.c \
eventfd.c \
@@ -185,11 +204,13 @@ test_srcs := \
fallocate.c \
fc2a85cb02ef-test.c \
file-register.c \
+ file-verify.c \
file-update.c \
files-exit-hang-poll.c \
files-exit-hang-timeout.c \
fixed-link.c \
fsync.c \
+ hardlink.c \
io-cancel.c \
io_uring_enter.c \
io_uring_register.c \
@@ -201,6 +222,7 @@ test_srcs := \
link.c \
link_drain.c \
madvise.c \
+ mkdir.c \
multicqes_drain.c \
nop-all-sizes.c \
nop.c \
@@ -240,11 +262,14 @@ test_srcs := \
sq-poll-share.c \
sqpoll-disable-exit.c \
sqpoll-exit-hang.c \
+ sqpoll-cancel-hang.c \
sqpoll-sleep.c \
sq-space_left.c \
statx.c \
stdout.c \
submit-reuse.c \
+ submit-link-fail.c \
+ symlink.c \
teardowns.c \
thread-exit.c \
timeout-new.c \
@@ -253,26 +278,28 @@ test_srcs := \
unlink.c \
wakeup-hang.c \
sendmsg_fs_cve.c \
+ rsrc_tags.c \
+ exec-target.c \
# EOL
test_objs := $(patsubst %.c,%.ol,$(patsubst %.cc,%.ol,$(test_srcs)))
-35fa71a030ca-test: XCFLAGS = -lpthread
-232c93d07b74-test: XCFLAGS = -lpthread
-send_recv: XCFLAGS = -lpthread
-send_recvmsg: XCFLAGS = -lpthread
-poll-link: XCFLAGS = -lpthread
-accept-link: XCFLAGS = -lpthread
-submit-reuse: XCFLAGS = -lpthread
-poll-v-poll: XCFLAGS = -lpthread
-across-fork: XCFLAGS = -lpthread
-ce593a6c480a-test: XCFLAGS = -lpthread
-wakeup-hang: XCFLAGS = -lpthread
-pipe-eof: XCFLAGS = -lpthread
-timeout-new: XCFLAGS = -lpthread
-thread-exit: XCFLAGS = -lpthread
-ring-leak2: XCFLAGS = -lpthread
-poll-mshot-update: XCFLAGS = -lpthread
+35fa71a030ca-test: override LDFLAGS += -lpthread
+232c93d07b74-test: override LDFLAGS += -lpthread
+send_recv: override LDFLAGS += -lpthread
+send_recvmsg: override LDFLAGS += -lpthread
+poll-link: override LDFLAGS += -lpthread
+accept-link: override LDFLAGS += -lpthread
+submit-reuse: override LDFLAGS += -lpthread
+poll-v-poll: override LDFLAGS += -lpthread
+across-fork: override LDFLAGS += -lpthread
+ce593a6c480a-test: override LDFLAGS += -lpthread
+wakeup-hang: override LDFLAGS += -lpthread
+pipe-eof: override LDFLAGS += -lpthread
+timeout-new: override LDFLAGS += -lpthread
+thread-exit: override LDFLAGS += -lpthread
+ring-leak2: override LDFLAGS += -lpthread
+poll-mshot-update: override LDFLAGS += -lpthread
install: $(test_targets) runtests.sh runtests-loop.sh
$(INSTALL) -D -d -m 755 $(datadir)/liburing-test/
@@ -280,7 +307,8 @@ install: $(test_targets) runtests.sh runtests-loop.sh
$(INSTALL) -D -m 755 runtests.sh $(datadir)/liburing-test/
$(INSTALL) -D -m 755 runtests-loop.sh $(datadir)/liburing-test/
clean:
- @rm -f $(all_targets) $(test_objs) helpers.o
+ @rm -f $(all_targets) $(test_objs) helpers.o output/*
+ @rm -rf output/
runtests: all
@./runtests.sh $(test_targets)
diff --git a/test/accept.c b/test/accept.c
index f096f8a..0c69b98 100644
--- a/test/accept.c
+++ b/test/accept.c
@@ -39,9 +39,10 @@ static void queue_send(struct io_uring *ring, int fd)
sqe = io_uring_get_sqe(ring);
io_uring_prep_writev(sqe, fd, &d->iov, 1, 0);
+ sqe->user_data = 1;
}
-static void queue_recv(struct io_uring *ring, int fd)
+static void queue_recv(struct io_uring *ring, int fd, bool fixed)
{
struct io_uring_sqe *sqe;
struct data *d;
@@ -52,16 +53,22 @@ static void queue_recv(struct io_uring *ring, int fd)
sqe = io_uring_get_sqe(ring);
io_uring_prep_readv(sqe, fd, &d->iov, 1, 0);
+ sqe->user_data = 2;
+ if (fixed)
+ sqe->flags |= IOSQE_FIXED_FILE;
}
-static int accept_conn(struct io_uring *ring, int fd)
+static int accept_conn(struct io_uring *ring, int fd, bool fixed)
{
struct io_uring_sqe *sqe;
struct io_uring_cqe *cqe;
- int ret;
+ int ret, fixed_idx = 0;
sqe = io_uring_get_sqe(ring);
- io_uring_prep_accept(sqe, fd, NULL, NULL, 0);
+ if (!fixed)
+ io_uring_prep_accept(sqe, fd, NULL, NULL, 0);
+ else
+ io_uring_prep_accept_direct(sqe, fd, NULL, NULL, 0, fixed_idx);
ret = io_uring_submit(ring);
assert(ret != -1);
@@ -70,6 +77,15 @@ static int accept_conn(struct io_uring *ring, int fd)
assert(!ret);
ret = cqe->res;
io_uring_cqe_seen(ring, cqe);
+
+ if (fixed) {
+ if (ret > 0) {
+ close(ret);
+ return -EINVAL;
+ } else if (!ret) {
+ ret = fixed_idx;
+ }
+ }
return ret;
}
@@ -102,15 +118,12 @@ static int start_accept_listen(struct sockaddr_in *addr, int port_off)
return fd;
}
-static int test(struct io_uring *ring, int accept_should_error)
+static int test(struct io_uring *ring, int accept_should_error, bool fixed)
{
struct io_uring_cqe *cqe;
struct sockaddr_in addr;
- uint32_t head;
- uint32_t count = 0;
- int done = 0;
- int p_fd[2];
- int ret;
+ uint32_t head, count = 0;
+ int ret, p_fd[2], done = 0;
int32_t val, recv_s0 = start_accept_listen(&addr, 0);
@@ -137,11 +150,14 @@ static int test(struct io_uring *ring, int accept_should_error)
ret = fcntl(p_fd[1], F_SETFL, flags);
assert(ret != -1);
- p_fd[0] = accept_conn(ring, recv_s0);
+ p_fd[0] = accept_conn(ring, recv_s0, fixed);
if (p_fd[0] == -EINVAL) {
if (accept_should_error)
goto out;
- fprintf(stdout, "Accept not supported, skipping\n");
+ if (fixed)
+ fprintf(stdout, "Fixed accept not supported, skipping\n");
+ else
+ fprintf(stdout, "Accept not supported, skipping\n");
no_accept = 1;
goto out;
} else if (p_fd[0] < 0) {
@@ -153,7 +169,7 @@ static int test(struct io_uring *ring, int accept_should_error)
}
queue_send(ring, p_fd[1]);
- queue_recv(ring, p_fd[0]);
+ queue_recv(ring, p_fd[0], fixed);
ret = io_uring_submit_and_wait(ring, 2);
assert(ret != -1);
@@ -161,7 +177,8 @@ static int test(struct io_uring *ring, int accept_should_error)
while (count < 2) {
io_uring_for_each_cqe(ring, head, cqe) {
if (cqe->res < 0) {
- fprintf(stderr, "Got cqe res %d\n", cqe->res);
+ fprintf(stderr, "Got cqe res %d, user_data %i\n",
+ cqe->res, (int)cqe->user_data);
done = 1;
break;
}
@@ -176,12 +193,14 @@ static int test(struct io_uring *ring, int accept_should_error)
}
out:
- close(p_fd[0]);
+ if (!fixed)
+ close(p_fd[0]);
close(p_fd[1]);
close(recv_s0);
return 0;
err:
- close(p_fd[0]);
+ if (!fixed)
+ close(p_fd[0]);
close(p_fd[1]);
close(recv_s0);
return 1;
@@ -302,7 +321,7 @@ static int test_accept_cancel(unsigned usecs)
sqe = io_uring_get_sqe(&m_io_uring);
io_uring_prep_accept(sqe, fd, NULL, NULL, 0);
sqe->user_data = 1;
- ret = io_uring_submit(&m_io_uring);
+ ret = io_uring_submit(&m_io_uring);
assert(ret == 1);
if (usecs)
@@ -355,7 +374,21 @@ static int test_accept(void)
ret = io_uring_queue_init(32, &m_io_uring, 0);
assert(ret >= 0);
- ret = test(&m_io_uring, 0);
+ ret = test(&m_io_uring, 0, false);
+ io_uring_queue_exit(&m_io_uring);
+ return ret;
+}
+
+static int test_accept_fixed(void)
+{
+ struct io_uring m_io_uring;
+ int ret, fd = -1;
+
+ ret = io_uring_queue_init(32, &m_io_uring, 0);
+ assert(ret >= 0);
+ ret = io_uring_register_files(&m_io_uring, &fd, 1);
+ assert(ret == 0);
+ ret = test(&m_io_uring, 0, true);
io_uring_queue_exit(&m_io_uring);
return ret;
}
@@ -377,7 +410,7 @@ static int test_accept_sqpoll(void)
if (p.features & IORING_FEAT_SQPOLL_NONFIXED)
should_fail = 0;
- ret = test(&m_io_uring, should_fail);
+ ret = test(&m_io_uring, should_fail, false);
io_uring_queue_exit(&m_io_uring);
return ret;
}
@@ -397,6 +430,12 @@ int main(int argc, char *argv[])
if (no_accept)
return 0;
+ ret = test_accept_fixed();
+ if (ret) {
+ fprintf(stderr, "test_accept_fixed failed\n");
+ return ret;
+ }
+
ret = test_accept_sqpoll();
if (ret) {
fprintf(stderr, "test_accept_sqpoll failed\n");
diff --git a/test/connect.c b/test/connect.c
index ab81bb8..3ae10de 100644
--- a/test/connect.c
+++ b/test/connect.c
@@ -14,11 +14,13 @@
#include <sys/socket.h>
#include <netinet/in.h>
#include <netinet/tcp.h>
+#include <arpa/inet.h>
#include "liburing.h"
static int no_connect;
-static int use_port;
+static unsigned short use_port;
+static unsigned int use_addr;
static int create_socket(void)
{
@@ -89,7 +91,7 @@ static int listen_on_socket(int fd)
memset(&addr, 0, sizeof(addr));
addr.sin_family = AF_INET;
addr.sin_port = use_port;
- addr.sin_addr.s_addr = 0x0100007fU;
+ addr.sin_addr.s_addr = use_addr;
ret = bind(fd, (struct sockaddr*)&addr, sizeof(addr));
if (ret == -1) {
@@ -125,9 +127,8 @@ static int configure_connect(int fd, struct sockaddr_in* addr)
memset(addr, 0, sizeof(*addr));
addr->sin_family = AF_INET;
addr->sin_port = use_port;
- addr->sin_addr.s_addr = 0x0100007fU;
-
- return 0;
+ ret = inet_aton("127.0.0.1", &addr->sin_addr);
+ return ret;
}
static int connect_socket(struct io_uring *ring, int fd, int *code)
@@ -248,21 +249,29 @@ err1:
static int test_connect_timeout(struct io_uring *ring)
{
- int connect_fd = -1, accept_fd = -1;
- int ret;
+ int connect_fd[2] = {-1, -1};
+ int accept_fd = -1;
+ int ret, code;
struct sockaddr_in addr;
struct io_uring_sqe *sqe;
struct __kernel_timespec ts = {.tv_sec = 0, .tv_nsec = 100000};
- connect_fd = create_socket();
- if (connect_fd == -1)
+ connect_fd[0] = create_socket();
+ if (connect_fd[0] == -1)
return -1;
+ connect_fd[1] = create_socket();
+ if (connect_fd[1] == -1)
+ goto err;
+
accept_fd = create_socket();
if (accept_fd == -1)
goto err;
- if (configure_connect(connect_fd, &addr) == -1)
+ if (configure_connect(connect_fd[0], &addr) == -1)
+ goto err;
+
+ if (configure_connect(connect_fd[1], &addr) == -1)
goto err;
ret = bind(accept_fd, (struct sockaddr*)&addr, sizeof(addr));
@@ -271,19 +280,28 @@ static int test_connect_timeout(struct io_uring *ring)
goto err;
}
- ret = listen(accept_fd, 0); // no backlog in order to block connect_fd
+ ret = listen(accept_fd, 0); // no backlog in order to block connect_fd[1]
if (ret == -1) {
perror("listen()");
goto err;
}
+ // We first connect with one client socket in order to fill the accept queue.
+ ret = connect_socket(ring, connect_fd[0], &code);
+ if (ret == -1 || code != 0) {
+ fprintf(stderr, "unable to connect\n");
+ goto err;
+ }
+
+ // We do not offload completion events from listening socket on purpose.
+ // This way we create a state where the second connect request being stalled by OS.
sqe = io_uring_get_sqe(ring);
if (!sqe) {
fprintf(stderr, "unable to get sqe\n");
goto err;
}
- io_uring_prep_connect(sqe, connect_fd, (struct sockaddr*)&addr, sizeof(addr));
+ io_uring_prep_connect(sqe, connect_fd[1], (struct sockaddr*)&addr, sizeof(addr));
sqe->user_data = 1;
sqe->flags |= IOSQE_IO_LINK;
@@ -292,9 +310,9 @@ static int test_connect_timeout(struct io_uring *ring)
fprintf(stderr, "unable to get sqe\n");
goto err;
}
+ io_uring_prep_link_timeout(sqe, &ts, 0);
sqe->user_data = 2;
- io_uring_prep_link_timeout(sqe, &ts, 0);
ret = io_uring_submit(ring);
if (ret != 2) {
fprintf(stderr, "submitted %d\n", ret);
@@ -320,12 +338,17 @@ static int test_connect_timeout(struct io_uring *ring)
io_uring_cqe_seen(ring, cqe);
}
- close(connect_fd);
+ close(connect_fd[0]);
+ close(connect_fd[1]);
close(accept_fd);
return 0;
err:
- close(connect_fd);
+ if (connect_fd[0] != -1)
+ close(connect_fd[0]);
+ if (connect_fd[1] != -1)
+ close(connect_fd[1]);
+
if (accept_fd != -1)
close(accept_fd);
return -1;
@@ -347,6 +370,8 @@ int main(int argc, char *argv[])
srand(getpid());
use_port = (rand() % 61440) + 4096;
+ use_port = htons(use_port);
+ use_addr = inet_addr("127.0.0.1");
ret = test_connect_with_no_peer(&ring);
if (ret == -1) {
diff --git a/test/cq-overflow.c b/test/cq-overflow.c
index 945dc93..057570e 100644
--- a/test/cq-overflow.c
+++ b/test/cq-overflow.c
@@ -243,6 +243,7 @@ err:
int main(int argc, char *argv[])
{
+ const char *fname = ".cq-overflow";
unsigned iters, drops;
unsigned long usecs;
int ret;
@@ -256,7 +257,7 @@ int main(int argc, char *argv[])
return ret;
}
- t_create_file(".basic-rw", FILE_SIZE);
+ t_create_file(fname, FILE_SIZE);
vecs = t_create_buffers(BUFFERS, BS);
@@ -265,7 +266,7 @@ int main(int argc, char *argv[])
do {
drops = 0;
- if (test_io(".basic-rw", usecs, &drops, 0)) {
+ if (test_io(fname, usecs, &drops, 0)) {
fprintf(stderr, "test_io nofault failed\n");
goto err;
}
@@ -275,19 +276,19 @@ int main(int argc, char *argv[])
iters++;
} while (iters < 40);
- if (test_io(".basic-rw", usecs, &drops, 0)) {
+ if (test_io(fname, usecs, &drops, 0)) {
fprintf(stderr, "test_io nofault failed\n");
goto err;
}
- if (test_io(".basic-rw", usecs, &drops, 1)) {
+ if (test_io(fname, usecs, &drops, 1)) {
fprintf(stderr, "test_io fault failed\n");
goto err;
}
- unlink(".basic-rw");
+ unlink(fname);
return 0;
err:
- unlink(".basic-rw");
+ unlink(fname);
return 1;
}
diff --git a/test/d4ae271dfaae-test.c b/test/d4ae271dfaae-test.c
index 80d3f71..397b94b 100644
--- a/test/d4ae271dfaae-test.c
+++ b/test/d4ae271dfaae-test.c
@@ -27,11 +27,6 @@ int main(int argc, char *argv[])
char *fname;
void *buf;
- if (geteuid()) {
- fprintf(stdout, "Test requires root, skipping\n");
- return 0;
- }
-
memset(&p, 0, sizeof(p));
p.flags = IORING_SETUP_SQPOLL;
ret = t_create_ring_params(4, &ring, &p);
@@ -48,6 +43,8 @@ int main(int argc, char *argv[])
}
fd = open(fname, O_RDONLY | O_DIRECT);
+ if (fname != argv[1])
+ unlink(fname);
if (fd < 0) {
perror("open");
goto out;
@@ -94,8 +91,6 @@ int main(int argc, char *argv[])
close(fd);
out:
- if (fname != argv[1])
- unlink(fname);
io_uring_queue_exit(&ring);
return ret;
}
diff --git a/test/defer.c b/test/defer.c
index 885cf5c..825b69f 100644
--- a/test/defer.c
+++ b/test/defer.c
@@ -11,6 +11,8 @@
#include "helpers.h"
#include "liburing.h"
+#define RING_SIZE 128
+
struct test_context {
struct io_uring *ring;
struct io_uring_sqe **sqes;
@@ -243,30 +245,24 @@ int main(int argc, char *argv[])
{
struct io_uring ring, poll_ring, sqthread_ring;
struct io_uring_params p;
- int ret, no_sqthread = 0;
+ int ret;
if (argc > 1)
return 0;
memset(&p, 0, sizeof(p));
- ret = io_uring_queue_init_params(1000, &ring, &p);
+ ret = io_uring_queue_init_params(RING_SIZE, &ring, &p);
if (ret) {
- printf("ring setup failed\n");
+ printf("ring setup failed %i\n", ret);
return 1;
}
- ret = io_uring_queue_init(1000, &poll_ring, IORING_SETUP_IOPOLL);
+ ret = io_uring_queue_init(RING_SIZE, &poll_ring, IORING_SETUP_IOPOLL);
if (ret) {
printf("poll_ring setup failed\n");
return 1;
}
- ret = t_create_ring(1000, &sqthread_ring,
- IORING_SETUP_SQPOLL | IORING_SETUP_IOPOLL);
- if (ret == T_SETUP_SKIP)
- return 0;
- else if (ret < 0)
- return 1;
ret = test_cancelled_userdata(&poll_ring);
if (ret) {
@@ -274,16 +270,6 @@ int main(int argc, char *argv[])
return ret;
}
- if (no_sqthread) {
- printf("test_thread_link_cancel: skipped, not root\n");
- } else {
- ret = test_thread_link_cancel(&sqthread_ring);
- if (ret) {
- printf("test_thread_link_cancel failed\n");
- return ret;
- }
- }
-
if (!(p.features & IORING_FEAT_NODROP)) {
ret = test_overflow_hung(&ring);
if (ret) {
@@ -304,5 +290,18 @@ int main(int argc, char *argv[])
return ret;
}
+ ret = t_create_ring(RING_SIZE, &sqthread_ring,
+ IORING_SETUP_SQPOLL | IORING_SETUP_IOPOLL);
+ if (ret == T_SETUP_SKIP)
+ return 0;
+ else if (ret < 0)
+ return 1;
+
+ ret = test_thread_link_cancel(&sqthread_ring);
+ if (ret) {
+ printf("test_thread_link_cancel failed\n");
+ return ret;
+ }
+
return 0;
}
diff --git a/test/double-poll-crash.c b/test/double-poll-crash.c
index 2a012e5..d9277be 100644
--- a/test/double-poll-crash.c
+++ b/test/double-poll-crash.c
@@ -103,10 +103,6 @@ static long syz_open_dev(volatile long a0, volatile long a1, volatile long a2)
}
}
-#ifndef __NR_io_uring_enter
-#define __NR_io_uring_enter 426
-#endif
-
uint64_t r[4] = {0xffffffffffffffff, 0x0, 0x0, 0xffffffffffffffff};
int main(int argc, char *argv[])
diff --git a/test/eeed8b54e0df-test.c b/test/eeed8b54e0df-test.c
index b6e27cc..62f6f45 100644
--- a/test/eeed8b54e0df-test.c
+++ b/test/eeed8b54e0df-test.c
@@ -26,6 +26,7 @@ static int get_file_fd(void)
int fd;
fd = open("testfile", O_RDWR | O_CREAT, 0644);
+ unlink("testfile");
if (fd < 0) {
perror("open file");
return -1;
@@ -54,12 +55,6 @@ err:
return fd;
}
-static void put_file_fd(int fd)
-{
- close(fd);
- unlink("testfile");
-}
-
int main(int argc, char *argv[])
{
struct io_uring ring;
@@ -111,9 +106,9 @@ int main(int argc, char *argv[])
goto err;
}
- put_file_fd(fd);
+ close(fd);
return 0;
err:
- put_file_fd(fd);
+ close(fd);
return 1;
}
diff --git a/test/empty-eownerdead.c b/test/empty-eownerdead.c
new file mode 100644
index 0000000..40f854f
--- /dev/null
+++ b/test/empty-eownerdead.c
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Test if entering with nothing to submit/wait for SQPOLL returns an error.
+ */
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+
+#include "liburing.h"
+#include "helpers.h"
+#include "../src/syscall.h"
+
+int main(int argc, char *argv[])
+{
+ struct io_uring_params p = {};
+ struct io_uring ring;
+ int ret;
+
+ if (argc > 1)
+ return 0;
+
+ p.flags = IORING_SETUP_SQPOLL;
+ p.sq_thread_idle = 100;
+
+ ret = t_create_ring_params(1, &ring, &p);
+ if (ret == T_SETUP_SKIP)
+ return 0;
+ else if (ret < 0)
+ goto err;
+
+ ret = __sys_io_uring_enter(ring.ring_fd, 0, 0, 0, NULL);
+ if (ret < 0) {
+ int __e = errno;
+
+ if (__e == EOWNERDEAD)
+ fprintf(stderr, "sqe submit unexpected failure due old kernel bug: %s\n", strerror(__e));
+ else
+ fprintf(stderr, "sqe submit unexpected failure: %s\n", strerror(__e));
+ goto err;
+ }
+
+ return 0;
+err:
+ return 1;
+}
diff --git a/test/exec-target.c b/test/exec-target.c
new file mode 100644
index 0000000..50bc2c9
--- /dev/null
+++ b/test/exec-target.c
@@ -0,0 +1,4 @@
+int main(int argc, char *argv[])
+{
+ return 0;
+}
diff --git a/test/fadvise.c b/test/fadvise.c
index b6d4462..278a045 100644
--- a/test/fadvise.c
+++ b/test/fadvise.c
@@ -184,7 +184,9 @@ int main(int argc, char *argv[])
if (i >= MIN_LOOPS && !bad)
break;
}
- if (bad > good) {
+
+ /* too hard to reliably test, just ignore */
+ if (0 && bad > good) {
fprintf(stderr, "Suspicious timings\n");
goto err;
}
diff --git a/test/fallocate.c b/test/fallocate.c
index da90be8..ddb53a6 100644
--- a/test/fallocate.c
+++ b/test/fallocate.c
@@ -42,6 +42,7 @@ static int test_fallocate_rlimit(struct io_uring *ring)
perror("open");
return 1;
}
+ unlink(buf);
sqe = io_uring_get_sqe(ring);
if (!sqe) {
@@ -72,10 +73,8 @@ static int test_fallocate_rlimit(struct io_uring *ring)
}
io_uring_cqe_seen(ring, cqe);
out:
- unlink(buf);
return 0;
err:
- unlink(buf);
return 1;
}
@@ -93,6 +92,7 @@ static int test_fallocate(struct io_uring *ring)
perror("open");
return 1;
}
+ unlink(buf);
sqe = io_uring_get_sqe(ring);
if (!sqe) {
@@ -136,10 +136,8 @@ static int test_fallocate(struct io_uring *ring)
}
out:
- unlink(buf);
return 0;
err:
- unlink(buf);
return 1;
}
@@ -160,6 +158,7 @@ static int test_fallocate_fsync(struct io_uring *ring)
perror("open");
return 1;
}
+ unlink(buf);
sqe = io_uring_get_sqe(ring);
if (!sqe) {
@@ -209,10 +208,8 @@ static int test_fallocate_fsync(struct io_uring *ring)
goto err;
}
- unlink(buf);
return 0;
err:
- unlink(buf);
return 1;
}
diff --git a/test/fc2a85cb02ef-test.c b/test/fc2a85cb02ef-test.c
index 35addf5..bdc3c48 100644
--- a/test/fc2a85cb02ef-test.c
+++ b/test/fc2a85cb02ef-test.c
@@ -78,13 +78,6 @@ static int setup_fault()
return 0;
}
-#ifndef __NR_io_uring_register
-#define __NR_io_uring_register 427
-#endif
-#ifndef __NR_io_uring_setup
-#define __NR_io_uring_setup 425
-#endif
-
uint64_t r[2] = {0xffffffffffffffff, 0xffffffffffffffff};
int main(int argc, char *argv[])
diff --git a/test/file-register.c b/test/file-register.c
index c5c5507..bd15408 100644
--- a/test/file-register.c
+++ b/test/file-register.c
@@ -9,6 +9,7 @@
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
+#include <sys/resource.h>
#include "helpers.h"
#include "liburing.h"
@@ -351,8 +352,9 @@ static int test_basic(struct io_uring *ring, int fail)
{
int *files;
int ret;
+ int nr_files = fail ? 10 : 100;
- files = open_files(fail ? 10 : 100, 0, 0);
+ files = open_files(nr_files, 0, 0);
ret = io_uring_register_files(ring, files, 100);
if (ret) {
if (fail) {
@@ -371,10 +373,10 @@ static int test_basic(struct io_uring *ring, int fail)
fprintf(stderr, "%s: unregister %d\n", __FUNCTION__, ret);
goto err;
}
- close_files(files, 100, 0);
+ close_files(files, nr_files, 0);
return 0;
err:
- close_files(files, 100, 0);
+ close_files(files, nr_files, 0);
return 1;
}
@@ -493,6 +495,18 @@ static int test_fixed_read_write(struct io_uring *ring, int index)
return 0;
}
+static void adjust_nfiles(int want_files)
+{
+ struct rlimit rlim;
+
+ if (getrlimit(RLIMIT_NOFILE, &rlim) < 0)
+ return;
+ if (rlim.rlim_cur >= want_files)
+ return;
+ rlim.rlim_cur = want_files;
+ setrlimit(RLIMIT_NOFILE, &rlim);
+}
+
/*
* Register 8K of sparse files, update one at a random spot, then do some
* file IO to verify it works.
@@ -502,6 +516,8 @@ static int test_huge(struct io_uring *ring)
int *files;
int ret;
+ adjust_nfiles(16384);
+
files = open_files(0, 8192, 0);
ret = io_uring_register_files(ring, files, 8192);
if (ret) {
diff --git a/test/file-update.c b/test/file-update.c
index 38059d4..578017e 100644
--- a/test/file-update.c
+++ b/test/file-update.c
@@ -128,6 +128,7 @@ static int test_sqe_update(struct io_uring *ring)
ret = cqe->res;
io_uring_cqe_seen(ring, cqe);
+ free(fds);
if (ret == -EINVAL) {
fprintf(stdout, "IORING_OP_FILES_UPDATE not supported, skipping\n");
return 0;
diff --git a/test/file-verify.c b/test/file-verify.c
new file mode 100644
index 0000000..50cad45
--- /dev/null
+++ b/test/file-verify.c
@@ -0,0 +1,628 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Description: run various reads tests, verifying data
+ *
+ */
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <assert.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <linux/fs.h>
+
+#include "helpers.h"
+#include "liburing.h"
+
+#define FSIZE 128*1024*1024
+#define CHUNK_SIZE 131072
+#define PUNCH_SIZE 32768
+
+/*
+ * 8 because it fits within the on-stack iov, 16 because it's larger than 8
+ */
+#define MIN_VECS 8
+#define MAX_VECS 16
+
+/*
+ * Can be anything, let's just do something for a bit of parallellism
+ */
+#define READ_BATCH 16
+
+/*
+ * Each offset in the file has the offset / sizeof(int) stored for every
+ * sizeof(int) address.
+ */
+static int verify_buf(void *buf, size_t size, off_t off)
+{
+ int i, u_in_buf = size / sizeof(unsigned int);
+ unsigned int *ptr;
+
+ off /= sizeof(unsigned int);
+ ptr = buf;
+ for (i = 0; i < u_in_buf; i++) {
+ if (off != *ptr) {
+ fprintf(stderr, "Found %u, wanted %lu\n", *ptr, off);
+ return 1;
+ }
+ ptr++;
+ off++;
+ }
+
+ return 0;
+}
+
+static int test_truncate(struct io_uring *ring, const char *fname, int buffered,
+ int vectored, int provide_buf)
+{
+ struct io_uring_cqe *cqe;
+ struct io_uring_sqe *sqe;
+ struct iovec vec;
+ struct stat sb;
+ off_t punch_off, off, file_size;
+ void *buf = NULL;
+ int u_in_buf, i, ret, fd, first_pass = 1;
+ unsigned int *ptr;
+
+ if (buffered)
+ fd = open(fname, O_RDWR);
+ else
+ fd = open(fname, O_DIRECT | O_RDWR);
+ if (fd < 0) {
+ perror("open");
+ return 1;
+ }
+
+ if (fstat(fd, &sb) < 0) {
+ perror("stat");
+ close(fd);
+ return 1;
+ }
+
+ if (S_ISREG(sb.st_mode)) {
+ file_size = sb.st_size;
+ } else if (S_ISBLK(sb.st_mode)) {
+ unsigned long long bytes;
+
+ if (ioctl(fd, BLKGETSIZE64, &bytes) < 0) {
+ perror("ioctl");
+ close(fd);
+ return 1;
+ }
+ file_size = bytes;
+ } else {
+ goto out;
+ }
+
+ if (file_size < CHUNK_SIZE)
+ goto out;
+
+ t_posix_memalign(&buf, 4096, CHUNK_SIZE);
+
+ off = file_size - (CHUNK_SIZE / 2);
+ punch_off = off + CHUNK_SIZE / 4;
+
+ u_in_buf = CHUNK_SIZE / sizeof(unsigned int);
+ ptr = buf;
+ for (i = 0; i < u_in_buf; i++) {
+ *ptr = i;
+ ptr++;
+ }
+ ret = pwrite(fd, buf, CHUNK_SIZE / 2, off);
+ if (ret < 0) {
+ perror("pwrite");
+ goto err;
+ } else if (ret != CHUNK_SIZE / 2)
+ goto out;
+
+again:
+ /*
+ * Read in last bit of file so it's known cached, then remove half of that
+ * last bit so we get a short read that needs retry
+ */
+ ret = pread(fd, buf, CHUNK_SIZE / 2, off);
+ if (ret < 0) {
+ perror("pread");
+ goto err;
+ } else if (ret != CHUNK_SIZE / 2)
+ goto out;
+
+ if (posix_fadvise(fd, punch_off, CHUNK_SIZE / 4, POSIX_FADV_DONTNEED) < 0) {
+ perror("posix_fadivse");
+ goto err;
+ }
+
+ if (provide_buf) {
+ sqe = io_uring_get_sqe(ring);
+ io_uring_prep_provide_buffers(sqe, buf, CHUNK_SIZE, 1, 0, 0);
+ ret = io_uring_submit(ring);
+ if (ret != 1) {
+ fprintf(stderr, "submit failed %d\n", ret);
+ goto err;
+ }
+ ret = io_uring_wait_cqe(ring, &cqe);
+ if (ret < 0) {
+ fprintf(stderr, "wait completion %d\n", ret);
+ goto err;
+ }
+ ret = cqe->res;
+ io_uring_cqe_seen(ring, cqe);
+ if (ret) {
+ fprintf(stderr, "Provide buffer failed %d\n", ret);
+ goto err;
+ }
+ }
+
+ sqe = io_uring_get_sqe(ring);
+ if (!sqe) {
+ fprintf(stderr, "get sqe failed\n");
+ goto err;
+ }
+
+ if (vectored) {
+ assert(!provide_buf);
+ vec.iov_base = buf;
+ vec.iov_len = CHUNK_SIZE;
+ io_uring_prep_readv(sqe, fd, &vec, 1, off);
+ } else {
+ if (provide_buf) {
+ io_uring_prep_read(sqe, fd, NULL, CHUNK_SIZE, off);
+ sqe->flags |= IOSQE_BUFFER_SELECT;
+ } else {
+ io_uring_prep_read(sqe, fd, buf, CHUNK_SIZE, off);
+ }
+ }
+ memset(buf, 0, CHUNK_SIZE);
+
+ ret = io_uring_submit(ring);
+ if (ret != 1) {
+ fprintf(stderr, "Submit failed %d\n", ret);
+ goto err;
+ }
+
+ ret = io_uring_wait_cqe(ring, &cqe);
+ if (ret < 0) {
+ fprintf(stderr, "wait completion %d\n", ret);
+ goto err;
+ }
+
+ ret = cqe->res;
+ io_uring_cqe_seen(ring, cqe);
+ if (ret != CHUNK_SIZE / 2) {
+ fprintf(stderr, "Unexpected truncated read %d\n", ret);
+ goto err;
+ }
+
+ if (verify_buf(buf, CHUNK_SIZE / 2, 0))
+ goto err;
+
+ /*
+ * Repeat, but punch first part instead of last
+ */
+ if (first_pass) {
+ punch_off = file_size - CHUNK_SIZE / 4;
+ first_pass = 0;
+ goto again;
+ }
+
+out:
+ free(buf);
+ close(fd);
+ return 0;
+err:
+ free(buf);
+ close(fd);
+ return 1;
+}
+
+enum {
+ PUNCH_NONE,
+ PUNCH_FRONT,
+ PUNCH_MIDDLE,
+ PUNCH_END,
+};
+
+/*
+ * For each chunk in file, DONTNEED a start, end, or middle segment of it.
+ * We enter here with the file fully cached every time, either freshly
+ * written or after other reads. This forces (at least) the buffered reads
+ * to be handled incrementally, exercising that path.
+ */
+static int do_punch(int fd)
+{
+ off_t offset = 0;
+ int punch_type;
+
+ while (offset + CHUNK_SIZE <= FSIZE) {
+ off_t punch_off;
+
+ punch_type = rand() % (PUNCH_END + 1);
+ switch (punch_type) {
+ default:
+ case PUNCH_NONE:
+ punch_off = -1; /* gcc... */
+ break;
+ case PUNCH_FRONT:
+ punch_off = offset;
+ break;
+ case PUNCH_MIDDLE:
+ punch_off = offset + PUNCH_SIZE;
+ break;
+ case PUNCH_END:
+ punch_off = offset + CHUNK_SIZE - PUNCH_SIZE;
+ break;
+ }
+
+ offset += CHUNK_SIZE;
+ if (punch_type == PUNCH_NONE)
+ continue;
+ if (posix_fadvise(fd, punch_off, PUNCH_SIZE, POSIX_FADV_DONTNEED) < 0) {
+ perror("posix_fadivse");
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static int provide_buffers(struct io_uring *ring, void **buf)
+{
+ struct io_uring_cqe *cqe;
+ struct io_uring_sqe *sqe;
+ int i, ret;
+
+ /* real use case would have one buffer chopped up, but... */
+ for (i = 0; i < READ_BATCH; i++) {
+ sqe = io_uring_get_sqe(ring);
+ io_uring_prep_provide_buffers(sqe, buf[i], CHUNK_SIZE, 1, 0, i);
+ }
+
+ ret = io_uring_submit(ring);
+ if (ret != READ_BATCH) {
+ fprintf(stderr, "Submit failed %d\n", ret);
+ return 1;
+ }
+
+ for (i = 0; i < READ_BATCH; i++) {
+ ret = io_uring_wait_cqe(ring, &cqe);
+ if (ret) {
+ fprintf(stderr, "wait cqe %d\n", ret);
+ return 1;
+ }
+ if (cqe->res < 0) {
+ fprintf(stderr, "cqe res provide %d\n", cqe->res);
+ return 1;
+ }
+ io_uring_cqe_seen(ring, cqe);
+ }
+
+ return 0;
+}
+
+static int test(struct io_uring *ring, const char *fname, int buffered,
+ int vectored, int small_vecs, int registered, int provide)
+{
+ struct iovec vecs[READ_BATCH][MAX_VECS];
+ struct io_uring_cqe *cqe;
+ struct io_uring_sqe *sqe;
+ void *buf[READ_BATCH];
+ int ret, fd, flags;
+ int i, j, nr_vecs;
+ off_t off, voff;
+ size_t left;
+
+ if (registered) {
+ assert(!provide);
+ assert(!vectored && !small_vecs);
+ }
+ if (provide) {
+ assert(!registered);
+ assert(!vectored && !small_vecs);
+ }
+
+ flags = O_RDONLY;
+ if (!buffered)
+ flags |= O_DIRECT;
+ fd = open(fname, flags);
+ if (fd < 0) {
+ perror("open");
+ return 1;
+ }
+
+ if (do_punch(fd))
+ return 1;
+
+ if (vectored) {
+ if (small_vecs)
+ nr_vecs = MIN_VECS;
+ else
+ nr_vecs = MAX_VECS;
+
+ for (j = 0; j < READ_BATCH; j++) {
+ for (i = 0; i < nr_vecs; i++) {
+ void *ptr;
+
+ t_posix_memalign(&ptr, 4096, CHUNK_SIZE / nr_vecs);
+ vecs[j][i].iov_base = ptr;
+ vecs[j][i].iov_len = CHUNK_SIZE / nr_vecs;
+ }
+ }
+ } else {
+ for (j = 0; j < READ_BATCH; j++)
+ t_posix_memalign(&buf[j], 4096, CHUNK_SIZE);
+ nr_vecs = 0;
+ }
+
+ if (registered) {
+ struct iovec v[READ_BATCH];
+
+ for (i = 0; i < READ_BATCH; i++) {
+ v[i].iov_base = buf[i];
+ v[i].iov_len = CHUNK_SIZE;
+ }
+ ret = io_uring_register_buffers(ring, v, READ_BATCH);
+ if (ret) {
+ fprintf(stderr, "Error buffer reg %d\n", ret);
+ goto err;
+ }
+ }
+
+ i = 0;
+ left = FSIZE;
+ off = 0;
+ while (left) {
+ int pending = 0;
+
+ if (provide && provide_buffers(ring, buf))
+ goto err;
+
+ for (i = 0; i < READ_BATCH; i++) {
+ size_t this = left;
+
+ if (this > CHUNK_SIZE)
+ this = CHUNK_SIZE;
+
+ sqe = io_uring_get_sqe(ring);
+ if (!sqe) {
+ fprintf(stderr, "get sqe failed\n");
+ goto err;
+ }
+
+ if (vectored) {
+ io_uring_prep_readv(sqe, fd, vecs[i], nr_vecs, off);
+ } else {
+ if (registered) {
+ io_uring_prep_read_fixed(sqe, fd, buf[i], this, off, i);
+ } else if (provide) {
+ io_uring_prep_read(sqe, fd, NULL, this, off);
+ sqe->flags |= IOSQE_BUFFER_SELECT;
+ } else {
+ io_uring_prep_read(sqe, fd, buf[i], this, off);
+ }
+ }
+ sqe->user_data = ((uint64_t)off << 32) | i;
+ off += this;
+ left -= this;
+ pending++;
+ if (!left)
+ break;
+ }
+
+ ret = io_uring_submit(ring);
+ if (ret != pending) {
+ fprintf(stderr, "sqe submit failed: %d\n", ret);
+ goto err;
+ }
+
+ for (i = 0; i < pending; i++) {
+ int index;
+
+ ret = io_uring_wait_cqe(ring, &cqe);
+ if (ret < 0) {
+ fprintf(stderr, "wait completion %d\n", ret);
+ goto err;
+ }
+ if (cqe->res < 0) {
+ fprintf(stderr, "bad read %d, read %d\n", cqe->res, i);
+ goto err;
+ }
+ if (cqe->flags & IORING_CQE_F_BUFFER)
+ index = cqe->flags >> 16;
+ else
+ index = cqe->user_data & 0xffffffff;
+ voff = cqe->user_data >> 32;
+ io_uring_cqe_seen(ring, cqe);
+ if (vectored) {
+ for (j = 0; j < nr_vecs; j++) {
+ void *buf = vecs[index][j].iov_base;
+ size_t len = vecs[index][j].iov_len;
+
+ if (verify_buf(buf, len, voff))
+ goto err;
+ voff += len;
+ }
+ } else {
+ if (verify_buf(buf[index], CHUNK_SIZE, voff))
+ goto err;
+ }
+ }
+ }
+
+ ret = 0;
+done:
+ if (registered)
+ io_uring_unregister_buffers(ring);
+ if (vectored) {
+ for (j = 0; j < READ_BATCH; j++)
+ for (i = 0; i < nr_vecs; i++)
+ free(vecs[j][i].iov_base);
+ } else {
+ for (j = 0; j < READ_BATCH; j++)
+ free(buf[j]);
+ }
+ close(fd);
+ return ret;
+err:
+ ret = 1;
+ goto done;
+}
+
+static int fill_pattern(const char *fname)
+{
+ size_t left = FSIZE;
+ unsigned int val, *ptr;
+ void *buf;
+ int fd, i;
+
+ fd = open(fname, O_WRONLY);
+ if (fd < 0) {
+ perror("open");
+ return 1;
+ }
+
+ val = 0;
+ buf = t_malloc(4096);
+ while (left) {
+ int u_in_buf = 4096 / sizeof(val);
+ size_t this = left;
+
+ if (this > 4096)
+ this = 4096;
+ ptr = buf;
+ for (i = 0; i < u_in_buf; i++) {
+ *ptr = val;
+ val++;
+ ptr++;
+ }
+ if (write(fd, buf, 4096) != 4096)
+ return 1;
+ left -= 4096;
+ }
+
+ fsync(fd);
+ close(fd);
+ free(buf);
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ struct io_uring ring;
+ const char *fname;
+ char buf[32];
+ int ret;
+
+ srand(getpid());
+
+ if (argc > 1) {
+ fname = argv[1];
+ } else {
+ sprintf(buf, ".file-verify.%d", getpid());
+ fname = buf;
+ t_create_file(fname, FSIZE);
+ }
+
+ ret = io_uring_queue_init(READ_BATCH, &ring, 0);
+ if (ret) {
+ fprintf(stderr, "ring setup failed: %d\n", ret);
+ goto err;
+ }
+
+ if (fill_pattern(fname))
+ goto err;
+
+ ret = test(&ring, fname, 1, 0, 0, 0, 0);
+ if (ret) {
+ fprintf(stderr, "Buffered novec test failed\n");
+ goto err;
+ }
+ ret = test(&ring, fname, 1, 0, 0, 1, 0);
+ if (ret) {
+ fprintf(stderr, "Buffered novec reg test failed\n");
+ goto err;
+ }
+ ret = test(&ring, fname, 1, 0, 0, 0, 1);
+ if (ret) {
+ fprintf(stderr, "Buffered novec provide test failed\n");
+ goto err;
+ }
+ ret = test(&ring, fname, 1, 1, 0, 0, 0);
+ if (ret) {
+ fprintf(stderr, "Buffered vec test failed\n");
+ goto err;
+ }
+ ret = test(&ring, fname, 1, 1, 1, 0, 0);
+ if (ret) {
+ fprintf(stderr, "Buffered small vec test failed\n");
+ goto err;
+ }
+
+ ret = test(&ring, fname, 0, 0, 0, 0, 0);
+ if (ret) {
+ fprintf(stderr, "O_DIRECT novec test failed\n");
+ goto err;
+ }
+ ret = test(&ring, fname, 0, 0, 0, 1, 0);
+ if (ret) {
+ fprintf(stderr, "O_DIRECT novec reg test failed\n");
+ goto err;
+ }
+ ret = test(&ring, fname, 0, 0, 0, 0, 1);
+ if (ret) {
+ fprintf(stderr, "O_DIRECT novec provide test failed\n");
+ goto err;
+ }
+ ret = test(&ring, fname, 0, 1, 0, 0, 0);
+ if (ret) {
+ fprintf(stderr, "O_DIRECT vec test failed\n");
+ goto err;
+ }
+ ret = test(&ring, fname, 0, 1, 1, 0, 0);
+ if (ret) {
+ fprintf(stderr, "O_DIRECT small vec test failed\n");
+ goto err;
+ }
+
+ ret = test_truncate(&ring, fname, 1, 0, 0);
+ if (ret) {
+ fprintf(stderr, "Buffered end truncate read failed\n");
+ goto err;
+ }
+ ret = test_truncate(&ring, fname, 1, 1, 0);
+ if (ret) {
+ fprintf(stderr, "Buffered end truncate vec read failed\n");
+ goto err;
+ }
+ ret = test_truncate(&ring, fname, 1, 0, 1);
+ if (ret) {
+ fprintf(stderr, "Buffered end truncate pbuf read failed\n");
+ goto err;
+ }
+
+ ret = test_truncate(&ring, fname, 0, 0, 0);
+ if (ret) {
+ fprintf(stderr, "O_DIRECT end truncate read failed\n");
+ goto err;
+ }
+ ret = test_truncate(&ring, fname, 0, 1, 0);
+ if (ret) {
+ fprintf(stderr, "O_DIRECT end truncate vec read failed\n");
+ goto err;
+ }
+ ret = test_truncate(&ring, fname, 0, 0, 1);
+ if (ret) {
+ fprintf(stderr, "O_DIRECT end truncate pbuf read failed\n");
+ goto err;
+ }
+
+ if (buf == fname)
+ unlink(fname);
+ return 0;
+err:
+ if (buf == fname)
+ unlink(fname);
+ return 1;
+}
diff --git a/test/fsync.c b/test/fsync.c
index 7e93ecc..5ae8441 100644
--- a/test/fsync.c
+++ b/test/fsync.c
@@ -63,13 +63,14 @@ static int test_barrier_fsync(struct io_uring *ring)
int i, fd, ret;
off_t off;
- fd = open("testfile", O_WRONLY | O_CREAT, 0644);
+ fd = open("fsync-testfile", O_WRONLY | O_CREAT, 0644);
if (fd < 0) {
perror("open");
return 1;
}
+ unlink("fsync-testfile");
- for (i = 0; i < 4; i++) {
+ for (i = 0; i < ARRAY_SIZE(iovecs); i++) {
iovecs[i].iov_base = t_malloc(4096);
iovecs[i].iov_len = 4096;
}
@@ -129,11 +130,15 @@ static int test_barrier_fsync(struct io_uring *ring)
io_uring_cqe_seen(ring, cqe);
}
- unlink("testfile");
- return 0;
+
+ ret = 0;
+ goto out;
err:
- unlink("testfile");
- return 1;
+ ret = 1;
+out:
+ for (i = 0; i < ARRAY_SIZE(iovecs); i++)
+ free(iovecs[i].iov_base);
+ return ret;
}
#define FILE_SIZE 1024
diff --git a/test/hardlink.c b/test/hardlink.c
new file mode 100644
index 0000000..634b8ed
--- /dev/null
+++ b/test/hardlink.c
@@ -0,0 +1,136 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Description: test io_uring linkat handling
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "liburing.h"
+
+
+static int do_linkat(struct io_uring *ring, const char *oldname, const char *newname)
+{
+ int ret;
+ struct io_uring_sqe *sqe;
+ struct io_uring_cqe *cqe;
+
+ sqe = io_uring_get_sqe(ring);
+ if (!sqe) {
+ fprintf(stderr, "sqe get failed\n");
+ goto err;
+ }
+ io_uring_prep_linkat(sqe, AT_FDCWD, oldname, AT_FDCWD, newname, 0);
+
+ ret = io_uring_submit(ring);
+ if (ret != 1) {
+ fprintf(stderr, "submit failed: %d\n", ret);
+ goto err;
+ }
+
+ ret = io_uring_wait_cqes(ring, &cqe, 1, 0, 0);
+ if (ret) {
+ fprintf(stderr, "wait_cqe failed: %d\n", ret);
+ goto err;
+ }
+ ret = cqe->res;
+ io_uring_cqe_seen(ring, cqe);
+ return ret;
+err:
+ return 1;
+}
+
+int files_linked_ok(const char* fn1, const char *fn2)
+{
+ struct stat s1, s2;
+
+ if (stat(fn1, &s1)) {
+ fprintf(stderr, "stat(%s): %s\n", fn1, strerror(errno));
+ return 0;
+ }
+ if (stat(fn2, &s2)) {
+ fprintf(stderr, "stat(%s): %s\n", fn2, strerror(errno));
+ return 0;
+ }
+ if (s1.st_dev != s2.st_dev || s1.st_ino != s2.st_ino) {
+ fprintf(stderr, "linked files have different device / inode numbers\n");
+ return 0;
+ }
+ if (s1.st_nlink != 2 || s2.st_nlink != 2) {
+ fprintf(stderr, "linked files have unexpected links count\n");
+ return 0;
+ }
+ return 1;
+}
+
+int main(int argc, char *argv[])
+{
+ static const char target[] = "io_uring-linkat-test-target";
+ static const char linkname[] = "io_uring-linkat-test-link";
+ int ret;
+ struct io_uring ring;
+
+ if (argc > 1)
+ return 0;
+
+ ret = io_uring_queue_init(8, &ring, 0);
+ if (ret) {
+ fprintf(stderr, "queue init failed: %d\n", ret);
+ return ret;
+ }
+
+ ret = open(target, O_CREAT | O_RDWR | O_EXCL, 0600);
+ if (ret < 0) {
+ perror("open");
+ goto err;
+ }
+ if (write(ret, "linktest", 8) != 8) {
+ close(ret);
+ goto err1;
+ }
+ close(ret);
+
+ ret = do_linkat(&ring, target, linkname);
+ if (ret < 0) {
+ if (ret == -EBADF || ret == -EINVAL) {
+ fprintf(stdout, "linkat not supported, skipping\n");
+ goto out;
+ }
+ fprintf(stderr, "linkat: %s\n", strerror(-ret));
+ goto err1;
+ } else if (ret) {
+ goto err1;
+ }
+
+ if (!files_linked_ok(linkname, target))
+ goto err2;
+
+ ret = do_linkat(&ring, target, linkname);
+ if (ret != -EEXIST) {
+ fprintf(stderr, "test_linkat linkname already exists failed: %d\n", ret);
+ goto err2;
+ }
+
+ ret = do_linkat(&ring, target, "surely/this/does/not/exist");
+ if (ret != -ENOENT) {
+ fprintf(stderr, "test_linkat no parent failed: %d\n", ret);
+ goto err2;
+ }
+
+out:
+ unlinkat(AT_FDCWD, linkname, 0);
+ unlinkat(AT_FDCWD, target, 0);
+ io_uring_queue_exit(&ring);
+ return 0;
+err2:
+ unlinkat(AT_FDCWD, linkname, 0);
+err1:
+ unlinkat(AT_FDCWD, target, 0);
+err:
+ io_uring_queue_exit(&ring);
+ return 1;
+}
+
diff --git a/test/helpers.c b/test/helpers.c
index 930d82a..975e7cb 100644
--- a/test/helpers.c
+++ b/test/helpers.c
@@ -114,3 +114,22 @@ enum t_setup_ret t_create_ring(int depth, struct io_uring *ring,
p.flags = flags;
return t_create_ring_params(depth, ring, &p);
}
+
+enum t_setup_ret t_register_buffers(struct io_uring *ring,
+ const struct iovec *iovecs,
+ unsigned nr_iovecs)
+{
+ int ret;
+
+ ret = io_uring_register_buffers(ring, iovecs, nr_iovecs);
+ if (!ret)
+ return T_SETUP_OK;
+
+ if ((ret == -EPERM || ret == -ENOMEM) && geteuid()) {
+ fprintf(stdout, "too large non-root buffer registration, skip\n");
+ return T_SETUP_SKIP;
+ }
+
+ fprintf(stderr, "buffer register failed: %s\n", strerror(-ret));
+ return ret;
+}
diff --git a/test/helpers.h b/test/helpers.h
index 74fe162..7526d46 100644
--- a/test/helpers.h
+++ b/test/helpers.h
@@ -54,6 +54,12 @@ enum t_setup_ret t_create_ring_params(int depth, struct io_uring *ring,
enum t_setup_ret t_create_ring(int depth, struct io_uring *ring,
unsigned int flags);
+enum t_setup_ret t_register_buffers(struct io_uring *ring,
+ const struct iovec *iovecs,
+ unsigned nr_iovecs);
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+
#ifdef __cplusplus
}
#endif
diff --git a/test/io-cancel.c b/test/io-cancel.c
index 9a36dd9..b5b443d 100644
--- a/test/io-cancel.c
+++ b/test/io-cancel.c
@@ -115,7 +115,7 @@ static int do_io(struct io_uring *ring, int fd, int do_write)
return 0;
}
-static int start_cancel(struct io_uring *ring, int do_partial)
+static int start_cancel(struct io_uring *ring, int do_partial, int async_cancel)
{
struct io_uring_sqe *sqe;
int i, ret, submitted = 0;
@@ -129,6 +129,8 @@ static int start_cancel(struct io_uring *ring, int do_partial)
goto err;
}
io_uring_prep_cancel(sqe, (void *) (unsigned long) i + 1, 0);
+ if (async_cancel)
+ sqe->flags |= IOSQE_ASYNC;
sqe->user_data = 0;
submitted++;
}
@@ -148,7 +150,8 @@ err:
* the submitted IO. This is done to verify that cancelling one piece of IO doesn't
* impact others.
*/
-static int test_io_cancel(const char *file, int do_write, int do_partial)
+static int test_io_cancel(const char *file, int do_write, int do_partial,
+ int async_cancel)
{
struct io_uring ring;
struct timeval start_tv;
@@ -179,7 +182,7 @@ static int test_io_cancel(const char *file, int do_write, int do_partial)
goto err;
/* sleep for 1/3 of the total time, to allow some to start/complete */
usleep(usecs / 3);
- if (start_cancel(&ring, do_partial))
+ if (start_cancel(&ring, do_partial, async_cancel))
goto err;
to_wait = BUFFERS;
if (do_partial)
@@ -339,7 +342,7 @@ static int test_cancel_req_across_fork(void)
return 1;
}
if ((cqe->user_data == 1 && cqe->res != -EINTR) ||
- (cqe->user_data == 2 && cqe->res != -EALREADY)) {
+ (cqe->user_data == 2 && cqe->res != -EALREADY && cqe->res)) {
fprintf(stderr, "%i %i\n", (int)cqe->user_data, cqe->res);
exit(1);
}
@@ -483,6 +486,7 @@ static int test_sqpoll_cancel_iowq_requests(void)
int main(int argc, char *argv[])
{
+ const char *fname = ".io-cancel-test";
int i, ret;
if (argc > 1)
@@ -508,24 +512,26 @@ int main(int argc, char *argv[])
return 1;
}
- t_create_file(".basic-rw", FILE_SIZE);
+ t_create_file(fname, FILE_SIZE);
vecs = t_create_buffers(BUFFERS, BS);
- for (i = 0; i < 4; i++) {
- int v1 = (i & 1) != 0;
- int v2 = (i & 2) != 0;
+ for (i = 0; i < 8; i++) {
+ int write = (i & 1) != 0;
+ int partial = (i & 2) != 0;
+ int async = (i & 4) != 0;
- ret = test_io_cancel(".basic-rw", v1, v2);
+ ret = test_io_cancel(fname, write, partial, async);
if (ret) {
- fprintf(stderr, "test_io_cancel %d %d failed\n", v1, v2);
+ fprintf(stderr, "test_io_cancel %d %d %d failed\n",
+ write, partial, async);
goto err;
}
}
- unlink(".basic-rw");
+ unlink(fname);
return 0;
err:
- unlink(".basic-rw");
+ unlink(fname);
return 1;
}
diff --git a/test/io_uring_enter.c b/test/io_uring_enter.c
index a6bb8f5..4ea990c 100644
--- a/test/io_uring_enter.c
+++ b/test/io_uring_enter.c
@@ -30,6 +30,7 @@
#include "../src/syscall.h"
#define IORING_MAX_ENTRIES 4096
+#define IORING_MAX_ENTRIES_FALLBACK 128
int
expect_failed_submit(struct io_uring *ring, int error)
@@ -218,6 +219,8 @@ main(int argc, char **argv)
return 0;
ret = io_uring_queue_init(IORING_MAX_ENTRIES, &ring, 0);
+ if (ret == -ENOMEM)
+ ret = io_uring_queue_init(IORING_MAX_ENTRIES_FALLBACK, &ring, 0);
if (ret < 0) {
perror("io_uring_queue_init");
exit(1);
@@ -234,7 +237,7 @@ main(int argc, char **argv)
status |= try_io_uring_enter(0, 0, 0, 0, NULL, -1, EOPNOTSUPP);
/* to_submit: 0, flags: 0; should get back 0. */
- status |= try_io_uring_enter(ring.ring_fd, 1, 0, 0, NULL, 0, 0);
+ status |= try_io_uring_enter(ring.ring_fd, 0, 0, 0, NULL, 0, 0);
/* fill the sq ring */
sq_entries = *ring.sq.kring_entries;
diff --git a/test/io_uring_register.c b/test/io_uring_register.c
index 7bcb036..b8a4ea5 100644
--- a/test/io_uring_register.c
+++ b/test/io_uring_register.c
@@ -31,6 +31,17 @@ static int pagesize;
static rlim_t mlock_limit;
static int devnull;
+#if !defined(CONFIG_HAVE_MEMFD_CREATE)
+#include <sys/syscall.h>
+#include <linux/memfd.h>
+
+static int memfd_create(const char *name, unsigned int flags)
+{
+ return (int)syscall(SYS_memfd_create, name, flags);
+}
+#endif
+
+
int
expect_fail(int fd, unsigned int opcode, void *arg,
unsigned int nr_args, int error)
@@ -254,6 +265,10 @@ test_memlock_exceeded(int fd)
iov.iov_len /= 2;
continue;
}
+ if (errno == EFAULT) {
+ free(buf);
+ return 0;
+ }
printf("expected success or EFAULT, got %d\n", errno);
free(buf);
return 1;
@@ -280,12 +295,16 @@ int
test_iovec_nr(int fd)
{
int i, ret, status = 0;
- unsigned int nr = UIO_MAXIOV + 1;
+ unsigned int nr = 1000000;
struct iovec *iovs;
void *buf;
+ iovs = malloc(nr * sizeof(struct iovec));
+ if (!iovs) {
+ fprintf(stdout, "can't allocate iovecs, skip\n");
+ return 0;
+ }
buf = t_malloc(pagesize);
- iovs = t_malloc(nr * sizeof(struct iovec));
for (i = 0; i < nr; i++) {
iovs[i].iov_base = buf;
@@ -295,16 +314,18 @@ test_iovec_nr(int fd)
status |= expect_fail(fd, IORING_REGISTER_BUFFERS, iovs, nr, EINVAL);
/* reduce to UIO_MAXIOV */
- nr--;
+ nr = UIO_MAXIOV;
printf("io_uring_register(%d, %u, %p, %u)\n",
fd, IORING_REGISTER_BUFFERS, iovs, nr);
ret = __sys_io_uring_register(fd, IORING_REGISTER_BUFFERS, iovs, nr);
- if (ret != 0) {
+ if (ret && (errno == ENOMEM || errno == EPERM) && geteuid()) {
+ printf("can't register large iovec for regular users, skip\n");
+ } else if (ret != 0) {
printf("expected success, got %d\n", errno);
status = 1;
- } else
+ } else {
__sys_io_uring_register(fd, IORING_UNREGISTER_BUFFERS, 0, 0);
-
+ }
free(buf);
free(iovs);
return status;
@@ -481,6 +502,113 @@ test_poll_ringfd(void)
return status;
}
+static int test_shmem(void)
+{
+ const char pattern = 0xEA;
+ const int len = 4096;
+ struct io_uring_sqe *sqe;
+ struct io_uring_cqe *cqe;
+ struct io_uring ring;
+ struct iovec iov;
+ int memfd, ret, i;
+ char *mem;
+ int pipefd[2] = {-1, -1};
+
+ ret = io_uring_queue_init(8, &ring, 0);
+ if (ret)
+ return 1;
+
+ if (pipe(pipefd)) {
+ perror("pipe");
+ return 1;
+ }
+ memfd = memfd_create("uring-shmem-test", 0);
+ if (memfd < 0) {
+ fprintf(stderr, "memfd_create() failed %i\n", -errno);
+ return 1;
+ }
+ if (ftruncate(memfd, len)) {
+ fprintf(stderr, "can't truncate memfd\n");
+ return 1;
+ }
+ mem = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_SHARED, memfd, 0);
+ if (!mem) {
+ fprintf(stderr, "mmap failed\n");
+ return 1;
+ }
+ for (i = 0; i < len; i++)
+ mem[i] = pattern;
+
+ iov.iov_base = mem;
+ iov.iov_len = len;
+ ret = io_uring_register_buffers(&ring, &iov, 1);
+ if (ret) {
+ if (ret == -EOPNOTSUPP) {
+ fprintf(stdout, "memfd registration isn't supported, "
+ "skip\n");
+ goto out;
+ }
+
+ fprintf(stderr, "buffer reg failed: %d\n", ret);
+ return 1;
+ }
+
+ /* check that we can read and write from/to shmem reg buffer */
+ sqe = io_uring_get_sqe(&ring);
+ io_uring_prep_write_fixed(sqe, pipefd[1], mem, 512, 0, 0);
+ sqe->user_data = 1;
+
+ ret = io_uring_submit(&ring);
+ if (ret != 1) {
+ fprintf(stderr, "submit write failed\n");
+ return 1;
+ }
+ ret = io_uring_wait_cqe(&ring, &cqe);
+ if (ret < 0 || cqe->user_data != 1 || cqe->res != 512) {
+ fprintf(stderr, "reading from shmem failed\n");
+ return 1;
+ }
+ io_uring_cqe_seen(&ring, cqe);
+
+ /* clean it, should be populated with the pattern back from the pipe */
+ memset(mem, 0, 512);
+ sqe = io_uring_get_sqe(&ring);
+ io_uring_prep_read_fixed(sqe, pipefd[0], mem, 512, 0, 0);
+ sqe->user_data = 2;
+
+ ret = io_uring_submit(&ring);
+ if (ret != 1) {
+ fprintf(stderr, "submit write failed\n");
+ return 1;
+ }
+ ret = io_uring_wait_cqe(&ring, &cqe);
+ if (ret < 0 || cqe->user_data != 2 || cqe->res != 512) {
+ fprintf(stderr, "reading from shmem failed\n");
+ return 1;
+ }
+ io_uring_cqe_seen(&ring, cqe);
+
+ for (i = 0; i < 512; i++) {
+ if (mem[i] != pattern) {
+ fprintf(stderr, "data integrity fail\n");
+ return 1;
+ }
+ }
+
+ ret = io_uring_unregister_buffers(&ring);
+ if (ret) {
+ fprintf(stderr, "buffer unreg failed: %d\n", ret);
+ return 1;
+ }
+out:
+ io_uring_queue_exit(&ring);
+ close(pipefd[0]);
+ close(pipefd[1]);
+ munmap(mem, len);
+ close(memfd);
+ return 0;
+}
+
int
main(int argc, char **argv)
{
@@ -537,5 +665,11 @@ main(int argc, char **argv)
else
printf("FAIL\n");
+ ret = test_shmem();
+ if (ret) {
+ fprintf(stderr, "test_shmem() failed\n");
+ status |= 1;
+ }
+
return status;
}
diff --git a/test/io_uring_setup.c b/test/io_uring_setup.c
index a0709a7..94b54fd 100644
--- a/test/io_uring_setup.c
+++ b/test/io_uring_setup.c
@@ -99,7 +99,7 @@ dump_resv(struct io_uring_params *p)
int
try_io_uring_setup(unsigned entries, struct io_uring_params *p, int expect, int error)
{
- int ret, __errno;
+ int ret, err;
printf("io_uring_setup(%u, %p), flags: %s, feat: %s, resv: %s, sq_thread_cpu: %u\n",
entries, p, flags_string(p), features_string(p), dump_resv(p),
@@ -113,13 +113,13 @@ try_io_uring_setup(unsigned entries, struct io_uring_params *p, int expect, int
close(ret);
return 1;
}
- __errno = errno;
- if (expect == -1 && error != __errno) {
- if (__errno == EPERM && geteuid() != 0) {
+ err = errno;
+ if (expect == -1 && error != err) {
+ if (err == EPERM && geteuid() != 0) {
printf("Needs root, not flagging as an error\n");
return 0;
}
- printf("expected errno %d, got %d\n", error, __errno);
+ printf("expected errno %d, got %d\n", error, err);
return 1;
}
diff --git a/test/iopoll.c b/test/iopoll.c
index 3d94dfe..de36473 100644
--- a/test/iopoll.c
+++ b/test/iopoll.c
@@ -60,14 +60,13 @@ static int __test_io(const char *file, struct io_uring *ring, int write, int sqt
struct io_uring_sqe *sqe;
struct io_uring_cqe *cqe;
int open_flags;
- int i, fd, ret;
+ int i, fd = -1, ret;
off_t offset;
- if (buf_select && write)
+ if (buf_select) {
write = 0;
- if (buf_select && fixed)
fixed = 0;
-
+ }
if (buf_select && provide_buffers(ring))
return 1;
@@ -77,19 +76,20 @@ static int __test_io(const char *file, struct io_uring *ring, int write, int sqt
open_flags = O_RDONLY;
open_flags |= O_DIRECT;
- fd = open(file, open_flags);
- if (fd < 0) {
- perror("file open");
- goto err;
- }
-
if (fixed) {
- ret = io_uring_register_buffers(ring, vecs, BUFFERS);
- if (ret) {
+ ret = t_register_buffers(ring, vecs, BUFFERS);
+ if (ret == T_SETUP_SKIP)
+ return 0;
+ if (ret != T_SETUP_OK) {
fprintf(stderr, "buffer reg failed: %d\n", ret);
goto err;
}
}
+ fd = open(file, open_flags);
+ if (fd < 0) {
+ perror("file open");
+ goto err;
+ }
if (sqthread) {
ret = io_uring_register_files(ring, &fd, 1);
if (ret) {
@@ -271,31 +271,19 @@ static int test_io(const char *file, int write, int sqthread, int fixed,
int buf_select)
{
struct io_uring ring;
- int ret, ring_flags;
+ int ret, ring_flags = IORING_SETUP_IOPOLL;
if (no_iopoll)
return 0;
- ring_flags = IORING_SETUP_IOPOLL;
- if (sqthread) {
- static int warned;
-
- if (geteuid()) {
- if (!warned)
- fprintf(stdout, "SQPOLL requires root, skipping\n");
- warned = 1;
- return 0;
- }
- }
-
- ret = io_uring_queue_init(64, &ring, ring_flags);
- if (ret) {
+ ret = t_create_ring(64, &ring, ring_flags);
+ if (ret == T_SETUP_SKIP)
+ return 0;
+ if (ret != T_SETUP_OK) {
fprintf(stderr, "ring create failed: %d\n", ret);
return 1;
}
-
ret = __test_io(file, &ring, write, sqthread, fixed, buf_select);
-
io_uring_queue_exit(&ring);
return ret;
}
@@ -325,6 +313,7 @@ static int probe_buf_select(void)
int main(int argc, char *argv[])
{
int i, ret, nr;
+ char buf[256];
char *fname;
if (probe_buf_select())
@@ -333,7 +322,10 @@ int main(int argc, char *argv[])
if (argc > 1) {
fname = argv[1];
} else {
- fname = ".iopoll-rw";
+ srand((unsigned)time(NULL));
+ snprintf(buf, sizeof(buf), ".basic-rw-%u-%u",
+ (unsigned)rand(), (unsigned)getpid());
+ fname = buf;
t_create_file(fname, FILE_SIZE);
}
@@ -343,15 +335,15 @@ int main(int argc, char *argv[])
if (no_buf_select)
nr = 8;
for (i = 0; i < nr; i++) {
- int v1, v2, v3, v4;
+ int write = (i & 1) != 0;
+ int sqthread = (i & 2) != 0;
+ int fixed = (i & 4) != 0;
+ int buf_select = (i & 8) != 0;
- v1 = (i & 1) != 0;
- v2 = (i & 2) != 0;
- v3 = (i & 4) != 0;
- v4 = (i & 8) != 0;
- ret = test_io(fname, v1, v2, v3, v4);
+ ret = test_io(fname, write, sqthread, fixed, buf_select);
if (ret) {
- fprintf(stderr, "test_io failed %d/%d/%d/%d\n", v1, v2, v3, v4);
+ fprintf(stderr, "test_io failed %d/%d/%d/%d\n",
+ write, sqthread, fixed, buf_select);
goto err;
}
if (no_iopoll)
diff --git a/test/link-timeout.c b/test/link-timeout.c
index 5d8417f..c8c289c 100644
--- a/test/link-timeout.c
+++ b/test/link-timeout.c
@@ -63,7 +63,7 @@ static int test_fail_two_link_timeouts(struct io_uring *ring)
struct __kernel_timespec ts;
struct io_uring_cqe *cqe;
struct io_uring_sqe *sqe;
- int ret, i;
+ int ret, i, nr_wait;
ts.tv_sec = 1;
ts.tv_nsec = 0;
@@ -114,12 +114,13 @@ static int test_fail_two_link_timeouts(struct io_uring *ring)
sqe->user_data = 4;
ret = io_uring_submit(ring);
- if (ret != 4) {
+ if (ret < 3) {
printf("sqe submit failed: %d\n", ret);
goto err;
}
+ nr_wait = ret;
- for (i = 0; i < 4; i++) {
+ for (i = 0; i < nr_wait; i++) {
ret = io_uring_wait_cqe(ring, &cqe);
if (ret < 0) {
printf("wait completion %d\n", ret);
@@ -619,6 +620,8 @@ static int test_timeout_link_chain1(struct io_uring *ring)
io_uring_cqe_seen(ring, cqe);
}
+ close(fds[0]);
+ close(fds[1]);
return 0;
err:
return 1;
@@ -713,6 +716,8 @@ static int test_timeout_link_chain2(struct io_uring *ring)
io_uring_cqe_seen(ring, cqe);
}
+ close(fds[0]);
+ close(fds[1]);
return 0;
err:
return 1;
@@ -833,6 +838,8 @@ static int test_timeout_link_chain3(struct io_uring *ring)
io_uring_cqe_seen(ring, cqe);
}
+ close(fds[0]);
+ close(fds[1]);
return 0;
err:
return 1;
@@ -917,6 +924,8 @@ static int test_timeout_link_chain4(struct io_uring *ring)
io_uring_cqe_seen(ring, cqe);
}
+ close(fds[0]);
+ close(fds[1]);
return 0;
err:
return 1;
@@ -973,14 +982,16 @@ static int test_timeout_link_chain5(struct io_uring *ring)
}
switch (cqe->user_data) {
case 1:
- if (cqe->res) {
- fprintf(stderr, "Timeout got %d, wanted -EINVAL\n",
+ case 2:
+ if (cqe->res && cqe->res != -ECANCELED) {
+ fprintf(stderr, "Request got %d, wanted -EINVAL "
+ "or -ECANCELED\n",
cqe->res);
goto err;
}
break;
- case 2:
- if (cqe->res != -ECANCELED) {
+ case 3:
+ if (cqe->res != -ECANCELED && cqe->res != -EINVAL) {
fprintf(stderr, "Link timeout got %d, wanted -ECANCELED\n", cqe->res);
goto err;
}
diff --git a/test/link.c b/test/link.c
index fadd0b5..c89d6b2 100644
--- a/test/link.c
+++ b/test/link.c
@@ -429,53 +429,6 @@ err:
return 1;
}
-static int test_link_fail_ordering(struct io_uring *ring)
-{
- struct io_uring_cqe *cqe;
- struct io_uring_sqe *sqe;
- int ret, i, nr_compl;
-
- sqe = io_uring_get_sqe(ring);
- io_uring_prep_nop(sqe);
- sqe->flags |= IOSQE_IO_LINK;
- sqe->user_data = 0;
-
- sqe = io_uring_get_sqe(ring);
- io_uring_prep_write(sqe, -1, NULL, 100, 0);
- sqe->flags |= IOSQE_IO_LINK;
- sqe->user_data = 1;
-
- sqe = io_uring_get_sqe(ring);
- io_uring_prep_nop(sqe);
- sqe->flags |= IOSQE_IO_LINK;
- sqe->user_data = 2;
-
- nr_compl = ret = io_uring_submit(ring);
- /* at least the first nop should have been submitted */
- if (ret < 1) {
- fprintf(stderr, "sqe submit failed: %d\n", ret);
- goto err;
- }
-
- for (i = 0; i < nr_compl; i++) {
- ret = io_uring_wait_cqe(ring, &cqe);
- if (ret) {
- fprintf(stderr, "wait completion %d\n", ret);
- goto err;
- }
- if (cqe->user_data != i) {
- fprintf(stderr, "wrong CQE order, got %i, expected %i\n",
- (int)cqe->user_data, i);
- goto err;
- }
- io_uring_cqe_seen(ring, cqe);
- }
-
- return 0;
-err:
- return 1;
-}
-
int main(int argc, char *argv[])
{
struct io_uring ring, poll_ring;
@@ -539,11 +492,5 @@ int main(int argc, char *argv[])
return ret;
}
- ret = test_link_fail_ordering(&ring);
- if (ret) {
- fprintf(stderr, "test_link_fail_ordering last failed\n");
- return ret;
- }
-
return 0;
}
diff --git a/test/link_drain.c b/test/link_drain.c
index a50fe88..b95168d 100644
--- a/test/link_drain.c
+++ b/test/link_drain.c
@@ -111,6 +111,7 @@ int test_link_drain_multi(struct io_uring *ring)
perror("open");
return 1;
}
+ unlink("testfile");
iovecs.iov_base = t_malloc(4096);
iovecs.iov_len = 4096;
@@ -189,12 +190,10 @@ int test_link_drain_multi(struct io_uring *ring)
free(iovecs.iov_base);
close(fd);
- unlink("testfile");
return 0;
err:
free(iovecs.iov_base);
close(fd);
- unlink("testfile");
return 1;
}
diff --git a/test/madvise.c b/test/madvise.c
index 89057af..b85aba8 100644
--- a/test/madvise.c
+++ b/test/madvise.c
@@ -181,7 +181,8 @@ int main(int argc, char *argv[])
break;
}
- if (bad > good)
+ /* too hard to reliably test, just ignore */
+ if (0 && bad > good)
fprintf(stderr, "Suspicious timings (%u > %u)\n", bad, good);
if (fname != argv[1])
unlink(fname);
diff --git a/test/mkdir.c b/test/mkdir.c
new file mode 100644
index 0000000..363fe1e
--- /dev/null
+++ b/test/mkdir.c
@@ -0,0 +1,108 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Description: test io_uring mkdirat handling
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "liburing.h"
+
+static int do_mkdirat(struct io_uring *ring, const char *fn)
+{
+ int ret;
+ struct io_uring_sqe *sqe;
+ struct io_uring_cqe *cqe;
+
+ sqe = io_uring_get_sqe(ring);
+ if (!sqe) {
+ fprintf(stderr, "sqe get failed\n");
+ goto err;
+ }
+ io_uring_prep_mkdirat(sqe, AT_FDCWD, fn, 0700);
+
+ ret = io_uring_submit(ring);
+ if (ret != 1) {
+ fprintf(stderr, "submit failed: %d\n", ret);
+ goto err;
+ }
+
+ ret = io_uring_wait_cqes(ring, &cqe, 1, 0, 0);
+ if (ret) {
+ fprintf(stderr, "wait_cqe failed: %d\n", ret);
+ goto err;
+ }
+ ret = cqe->res;
+ io_uring_cqe_seen(ring, cqe);
+ return ret;
+err:
+ return 1;
+}
+
+static int stat_file(const char *fn)
+{
+ struct stat sb;
+
+ if (!stat(fn, &sb))
+ return 0;
+
+ return errno;
+}
+
+int main(int argc, char *argv[])
+{
+ static const char fn[] = "io_uring-mkdirat-test";
+ int ret;
+ struct io_uring ring;
+
+ if (argc > 1)
+ return 0;
+
+ ret = io_uring_queue_init(8, &ring, 0);
+ if (ret) {
+ fprintf(stderr, "queue init failed: %d\n", ret);
+ return ret;
+ }
+
+ ret = do_mkdirat(&ring, fn);
+ if (ret < 0) {
+ if (ret == -EBADF || ret == -EINVAL) {
+ fprintf(stdout, "mkdirat not supported, skipping\n");
+ goto out;
+ }
+ fprintf(stderr, "mkdirat: %s\n", strerror(-ret));
+ goto err;
+ } else if (ret) {
+ goto err;
+ }
+
+ if (stat_file(fn)) {
+ perror("stat");
+ goto err;
+ }
+
+ ret = do_mkdirat(&ring, fn);
+ if (ret != -EEXIST) {
+ fprintf(stderr, "do_mkdirat already exists failed: %d\n", ret);
+ goto err1;
+ }
+
+ ret = do_mkdirat(&ring, "surely/this/wont/exist");
+ if (ret != -ENOENT) {
+ fprintf(stderr, "do_mkdirat no parent failed: %d\n", ret);
+ goto err1;
+ }
+
+out:
+ unlinkat(AT_FDCWD, fn, AT_REMOVEDIR);
+ io_uring_queue_exit(&ring);
+ return 0;
+err1:
+ unlinkat(AT_FDCWD, fn, AT_REMOVEDIR);
+err:
+ io_uring_queue_exit(&ring);
+ return 1;
+}
diff --git a/test/multicqes_drain.c b/test/multicqes_drain.c
index 609d583..d04cf37 100644
--- a/test/multicqes_drain.c
+++ b/test/multicqes_drain.c
@@ -157,7 +157,7 @@ int generate_opcode(int i, int pre_flags)
return sqe_op;
}
-inline void add_multishot_sqe(int index)
+static inline void add_multishot_sqe(int index)
{
multi_sqes[cnt++] = index;
}
diff --git a/test/openat2.c b/test/openat2.c
index 65f81b1..379c61e 100644
--- a/test/openat2.c
+++ b/test/openat2.c
@@ -9,11 +9,13 @@
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
+#include <sys/uio.h>
#include "helpers.h"
#include "liburing.h"
-static int test_openat2(struct io_uring *ring, const char *path, int dfd)
+static int test_openat2(struct io_uring *ring, const char *path, int dfd,
+ bool direct, int fixed_index)
{
struct io_uring_cqe *cqe;
struct io_uring_sqe *sqe;
@@ -23,28 +25,212 @@ static int test_openat2(struct io_uring *ring, const char *path, int dfd)
sqe = io_uring_get_sqe(ring);
if (!sqe) {
fprintf(stderr, "get sqe failed\n");
- goto err;
+ return -1;
}
memset(&how, 0, sizeof(how));
- how.flags = O_RDONLY;
- io_uring_prep_openat2(sqe, dfd, path, &how);
+ how.flags = O_RDWR;
+
+ if (!direct)
+ io_uring_prep_openat2(sqe, dfd, path, &how);
+ else
+ io_uring_prep_openat2_direct(sqe, dfd, path, &how, fixed_index);
ret = io_uring_submit(ring);
if (ret <= 0) {
fprintf(stderr, "sqe submit failed: %d\n", ret);
- goto err;
+ return -1;
}
ret = io_uring_wait_cqe(ring, &cqe);
if (ret < 0) {
fprintf(stderr, "wait completion %d\n", ret);
- goto err;
+ return -1;
}
ret = cqe->res;
io_uring_cqe_seen(ring, cqe);
+
+ if (direct && ret > 0) {
+ close(ret);
+ return -EINVAL;
+ }
return ret;
-err:
- return -1;
+}
+
+static int test_open_fixed(const char *path, int dfd)
+{
+ struct io_uring_cqe *cqe;
+ struct io_uring_sqe *sqe;
+ struct io_uring ring;
+ const char pattern = 0xac;
+ char buffer[] = { 0, 0 };
+ int i, ret, fd = -1;
+
+ ret = io_uring_queue_init(8, &ring, 0);
+ if (ret) {
+ fprintf(stderr, "ring setup failed\n");
+ return -1;
+ }
+ ret = io_uring_register_files(&ring, &fd, 1);
+ if (ret) {
+ fprintf(stderr, "%s: register ret=%d\n", __FUNCTION__, ret);
+ return -1;
+ }
+
+ ret = test_openat2(&ring, path, dfd, true, 0);
+ if (ret == -EINVAL) {
+ printf("fixed open isn't supported\n");
+ return 1;
+ } else if (ret) {
+ fprintf(stderr, "direct open failed %d\n", ret);
+ return -1;
+ }
+
+ sqe = io_uring_get_sqe(&ring);
+ io_uring_prep_write(sqe, 0, &pattern, 1, 0);
+ sqe->user_data = 1;
+ sqe->flags |= IOSQE_FIXED_FILE | IOSQE_IO_LINK;
+
+ sqe = io_uring_get_sqe(&ring);
+ io_uring_prep_read(sqe, 0, buffer, 1, 0);
+ sqe->user_data = 2;
+ sqe->flags |= IOSQE_FIXED_FILE;
+
+ ret = io_uring_submit(&ring);
+ if (ret != 2) {
+ fprintf(stderr, "%s: got %d, wanted 2\n", __FUNCTION__, ret);
+ return -1;
+ }
+
+ for (i = 0; i < 2; i++) {
+ ret = io_uring_wait_cqe(&ring, &cqe);
+ if (ret < 0) {
+ fprintf(stderr, "wait completion %d\n", ret);
+ return -1;
+ }
+ if (cqe->res != 1) {
+ fprintf(stderr, "unexpectetd ret %d\n", cqe->res);
+ return -1;
+ }
+ io_uring_cqe_seen(&ring, cqe);
+ }
+ if (memcmp(&pattern, buffer, 1) != 0) {
+ fprintf(stderr, "buf validation failed\n");
+ return -1;
+ }
+
+ io_uring_queue_exit(&ring);
+ return 0;
+}
+
+static int test_open_fixed_fail(const char *path, int dfd)
+{
+ struct io_uring ring;
+ int ret, fd = -1;
+
+ ret = io_uring_queue_init(8, &ring, 0);
+ if (ret) {
+ fprintf(stderr, "ring setup failed\n");
+ return -1;
+ }
+
+ ret = test_openat2(&ring, path, dfd, true, 0);
+ if (ret != -ENXIO) {
+ fprintf(stderr, "install into not existing table, %i\n", ret);
+ return 1;
+ }
+
+ ret = io_uring_register_files(&ring, &fd, 1);
+ if (ret) {
+ fprintf(stderr, "%s: register ret=%d\n", __FUNCTION__, ret);
+ return -1;
+ }
+
+ ret = test_openat2(&ring, path, dfd, true, 1);
+ if (ret != -EINVAL) {
+ fprintf(stderr, "install out of bounds, %i\n", ret);
+ return -1;
+ }
+
+ ret = test_openat2(&ring, path, dfd, true, (1u << 16));
+ if (ret != -EINVAL) {
+ fprintf(stderr, "install out of bounds or u16 overflow, %i\n", ret);
+ return -1;
+ }
+
+ ret = test_openat2(&ring, path, dfd, true, (1u << 16) + 1);
+ if (ret != -EINVAL) {
+ fprintf(stderr, "install out of bounds or u16 overflow, %i\n", ret);
+ return -1;
+ }
+
+ io_uring_queue_exit(&ring);
+ return 0;
+}
+
+static int test_direct_reinstall(const char *path, int dfd)
+{
+ struct io_uring_cqe *cqe;
+ struct io_uring_sqe *sqe;
+ char buf[1] = { 0xfa };
+ struct io_uring ring;
+ int ret, pipe_fds[2];
+ ssize_t ret2;
+
+ if (pipe2(pipe_fds, O_NONBLOCK)) {
+ fprintf(stderr, "pipe() failed\n");
+ return -1;
+ }
+ ret = io_uring_queue_init(8, &ring, 0);
+ if (ret) {
+ fprintf(stderr, "ring setup failed\n");
+ return -1;
+ }
+ ret = io_uring_register_files(&ring, pipe_fds, 2);
+ if (ret) {
+ fprintf(stderr, "%s: register ret=%d\n", __FUNCTION__, ret);
+ return -1;
+ }
+
+ /* reinstall into the second slot */
+ ret = test_openat2(&ring, path, dfd, true, 1);
+ if (ret != 0) {
+ fprintf(stderr, "reinstall failed, %i\n", ret);
+ return -1;
+ }
+
+ /* verify it's reinstalled, first write into the slot... */
+ sqe = io_uring_get_sqe(&ring);
+ io_uring_prep_write(sqe, 1, buf, sizeof(buf), 0);
+ sqe->flags |= IOSQE_FIXED_FILE;
+
+ ret = io_uring_submit(&ring);
+ if (ret != 1) {
+ fprintf(stderr, "sqe submit failed: %d\n", ret);
+ return -1;
+ }
+ ret = io_uring_wait_cqe(&ring, &cqe);
+ if (ret < 0) {
+ fprintf(stderr, "wait completion %d\n", ret);
+ return ret;
+ }
+ ret = cqe->res;
+ io_uring_cqe_seen(&ring, cqe);
+ if (ret != 1) {
+ fprintf(stderr, "invalid write %i\n", ret);
+ return -1;
+ }
+
+ /* ... and make sure nothing has been written to the pipe */
+ ret2 = read(pipe_fds[0], buf, 1);
+ if (ret2 != 0 && !(ret2 < 0 && errno == EAGAIN)) {
+ fprintf(stderr, "invalid pipe read, %d %d\n", errno, (int)ret2);
+ return -1;
+ }
+
+ close(pipe_fds[0]);
+ close(pipe_fds[1]);
+ io_uring_queue_exit(&ring);
+ return 0;
}
int main(int argc, char *argv[])
@@ -74,7 +260,7 @@ int main(int argc, char *argv[])
if (do_unlink)
t_create_file(path_rel, 4096);
- ret = test_openat2(&ring, path, -1);
+ ret = test_openat2(&ring, path, -1, false, 0);
if (ret < 0) {
if (ret == -EINVAL) {
fprintf(stdout, "openat2 not supported, skipping\n");
@@ -84,12 +270,31 @@ int main(int argc, char *argv[])
goto err;
}
- ret = test_openat2(&ring, path_rel, AT_FDCWD);
+ ret = test_openat2(&ring, path_rel, AT_FDCWD, false, 0);
if (ret < 0) {
fprintf(stderr, "test_openat2 relative failed: %d\n", ret);
goto err;
}
+ ret = test_open_fixed(path, -1);
+ if (ret > 0)
+ goto done;
+ if (ret) {
+ fprintf(stderr, "test_open_fixed failed\n");
+ goto err;
+ }
+ ret = test_open_fixed_fail(path, -1);
+ if (ret) {
+ fprintf(stderr, "test_open_fixed_fail failed\n");
+ goto err;
+ }
+
+ ret = test_direct_reinstall(path, -1);
+ if (ret) {
+ fprintf(stderr, "test_direct_reinstall failed\n");
+ goto err;
+ }
+
done:
unlink(path);
if (do_unlink)
diff --git a/test/poll-cancel-ton.c b/test/poll-cancel-ton.c
index e9d612e..f0875cd 100644
--- a/test/poll-cancel-ton.c
+++ b/test/poll-cancel-ton.c
@@ -71,10 +71,10 @@ static int del_polls(struct io_uring *ring, int fd, int nr)
static int add_polls(struct io_uring *ring, int fd, int nr)
{
- int pending, batch, i, count, ret;
+ int batch, i, count, ret;
struct io_uring_sqe *sqe;
- pending = count = 0;
+ count = 0;
while (nr) {
batch = 1024;
if (batch > nr)
@@ -93,7 +93,6 @@ static int add_polls(struct io_uring *ring, int fd, int nr)
return 1;
}
nr -= batch;
- pending += batch;
reap_events(ring, batch, 1);
}
return 0;
diff --git a/test/poll-mshot-update.c b/test/poll-mshot-update.c
index 1a9ea0a..75ee52f 100644
--- a/test/poll-mshot-update.c
+++ b/test/poll-mshot-update.c
@@ -28,7 +28,37 @@ struct p {
};
static struct p p[NFILES];
-static int no_update;
+
+static int has_poll_update(void)
+{
+ struct io_uring ring;
+ struct io_uring_cqe *cqe;
+ struct io_uring_sqe *sqe;
+ bool has_update = false;
+ int ret;
+
+ ret = io_uring_queue_init(8, &ring, 0);
+ if (ret)
+ return -1;
+
+ sqe = io_uring_get_sqe(&ring);
+ io_uring_prep_poll_update(sqe, NULL, NULL, POLLIN, IORING_TIMEOUT_UPDATE);
+
+ ret = io_uring_submit(&ring);
+ if (ret != 1)
+ return -1;
+
+ ret = io_uring_wait_cqe(&ring, &cqe);
+ if (!ret) {
+ if (cqe->res == -ENOENT)
+ has_update = true;
+ else if (cqe->res != -EINVAL)
+ return -1;
+ io_uring_cqe_seen(&ring, cqe);
+ }
+ io_uring_queue_exit(&ring);
+ return has_update;
+}
static int arm_poll(struct io_uring *ring, int off)
{
@@ -40,8 +70,7 @@ static int arm_poll(struct io_uring *ring, int off)
return 1;
}
- io_uring_prep_poll_add(sqe, p[off].fd[0], POLLIN);
- sqe->len = 1;
+ io_uring_prep_poll_multishot(sqe, p[off].fd[0], POLLIN);
sqe->user_data = off;
return 0;
}
@@ -58,7 +87,7 @@ static int reap_polls(struct io_uring *ring)
sqe = io_uring_get_sqe(ring);
/* update event */
io_uring_prep_poll_update(sqe, (void *)(unsigned long)i, NULL,
- POLLIN, 2);
+ POLLIN, IORING_POLL_UPDATE_EVENTS);
sqe->user_data = 0x12345678;
}
@@ -77,7 +106,6 @@ static int reap_polls(struct io_uring *ring)
off = cqe->user_data;
if (off == 0x12345678)
goto seen;
- p[off].triggered = 0;
ret = read(p[off].fd[0], &c, 1);
if (ret != 1) {
if (ret == -1 && errno == EAGAIN)
@@ -128,19 +156,6 @@ static void *trigger_polls_fn(void *data)
return NULL;
}
-static int check_no_update(struct io_uring *ring)
-{
- struct io_uring_cqe *cqe;
- int ret;
-
- ret = io_uring_wait_cqe(ring, &cqe);
- if (ret)
- return 0;
- ret = cqe->res;
- io_uring_cqe_seen(ring, cqe);
- return ret == -EINVAL;
-}
-
static int arm_polls(struct io_uring *ring)
{
int ret, to_arm = NFILES, i, off;
@@ -163,10 +178,6 @@ static int arm_polls(struct io_uring *ring)
ret = io_uring_submit(ring);
if (ret != this_arm) {
- if (ret > 0 && check_no_update(ring)) {
- no_update = 1;
- return 0;
- }
fprintf(stderr, "submitted %d, %d\n", ret, this_arm);
return 1;
}
@@ -182,11 +193,20 @@ int main(int argc, char *argv[])
struct io_uring_params params = { };
struct rlimit rlim;
pthread_t thread;
- int i, ret;
+ int i, j, ret;
if (argc > 1)
return 0;
+ ret = has_poll_update();
+ if (ret < 0) {
+ fprintf(stderr, "poll update check failed %i\n", ret);
+ return -1;
+ } else if (!ret) {
+ fprintf(stderr, "no poll update, skip\n");
+ return 0;
+ }
+
if (getrlimit(RLIMIT_NOFILE, &rlim) < 0) {
perror("getrlimit");
goto err_noring;
@@ -227,10 +247,6 @@ int main(int argc, char *argv[])
if (arm_polls(&ring))
goto err;
- if (no_update) {
- printf("No poll update support, skipping\n");
- goto done;
- }
for (i = 0; i < NLOOPS; i++) {
pthread_create(&thread, NULL, trigger_polls_fn, NULL);
@@ -238,9 +254,11 @@ int main(int argc, char *argv[])
if (ret)
goto err;
pthread_join(thread, NULL);
+
+ for (j = 0; j < NFILES; j++)
+ p[j].triggered = 0;
}
-done:
io_uring_queue_exit(&ring);
return 0;
err:
diff --git a/test/read-write.c b/test/read-write.c
index d0a77fa..885905b 100644
--- a/test/read-write.c
+++ b/test/read-write.c
@@ -49,7 +49,7 @@ static int __test_io(const char *file, struct io_uring *ring, int write,
struct io_uring_sqe *sqe;
struct io_uring_cqe *cqe;
int open_flags;
- int i, fd, ret;
+ int i, fd = -1, ret;
off_t offset;
#ifdef VERBOSE
@@ -57,13 +57,6 @@ static int __test_io(const char *file, struct io_uring *ring, int write,
buffered, sqthread,
fixed, nonvec);
#endif
- if (sqthread && geteuid()) {
-#ifdef VERBOSE
- fprintf(stdout, "SKIPPED (not root)\n");
-#endif
- return 0;
- }
-
if (write)
open_flags = O_WRONLY;
else
@@ -71,19 +64,22 @@ static int __test_io(const char *file, struct io_uring *ring, int write,
if (!buffered)
open_flags |= O_DIRECT;
+ if (fixed) {
+ ret = t_register_buffers(ring, vecs, BUFFERS);
+ if (ret == T_SETUP_SKIP)
+ return 0;
+ if (ret != T_SETUP_OK) {
+ fprintf(stderr, "buffer reg failed: %d\n", ret);
+ goto err;
+ }
+ }
+
fd = open(file, open_flags);
if (fd < 0) {
perror("file open");
goto err;
}
- if (fixed) {
- ret = io_uring_register_buffers(ring, vecs, BUFFERS);
- if (ret) {
- fprintf(stderr, "buffer reg failed: %d\n", ret);
- goto err;
- }
- }
if (sqthread) {
ret = io_uring_register_files(ring, &fd, 1);
if (ret) {
@@ -235,30 +231,21 @@ static int test_io(const char *file, int write, int buffered, int sqthread,
int fixed, int nonvec, int exp_len)
{
struct io_uring ring;
- int ret, ring_flags;
+ int ret, ring_flags = 0;
- if (sqthread) {
- if (geteuid()) {
- if (!warned) {
- fprintf(stderr, "SQPOLL requires root, skipping\n");
- warned = 1;
- }
- return 0;
- }
+ if (sqthread)
ring_flags = IORING_SETUP_SQPOLL;
- } else {
- ring_flags = 0;
- }
- ret = io_uring_queue_init(64, &ring, ring_flags);
- if (ret) {
+ ret = t_create_ring(64, &ring, ring_flags);
+ if (ret == T_SETUP_SKIP)
+ return 0;
+ if (ret != T_SETUP_OK) {
fprintf(stderr, "ring create failed: %d\n", ret);
return 1;
}
ret = __test_io(file, &ring, write, buffered, sqthread, fixed, nonvec,
0, 0, exp_len);
-
io_uring_queue_exit(&ring);
return ret;
}
@@ -683,6 +670,7 @@ static int test_write_efbig(void)
perror("file open");
goto err;
}
+ unlink(".efbig");
ret = io_uring_queue_init(32, &ring, 0);
if (ret) {
@@ -739,19 +727,22 @@ static int test_write_efbig(void)
err:
if (fd != -1)
close(fd);
- unlink(".efbig");
return 1;
}
int main(int argc, char *argv[])
{
int i, ret, nr;
+ char buf[256];
char *fname;
if (argc > 1) {
fname = argv[1];
} else {
- fname = ".basic-rw";
+ srand((unsigned)time(NULL));
+ snprintf(buf, sizeof(buf), ".basic-rw-%u-%u",
+ (unsigned)rand(), (unsigned)getpid());
+ fname = buf;
t_create_file(fname, FILE_SIZE);
}
diff --git a/test/ring-leak2.c b/test/ring-leak2.c
index d9bfe0f..77e7d82 100644
--- a/test/ring-leak2.c
+++ b/test/ring-leak2.c
@@ -197,6 +197,7 @@ static void *client_thread(void *arg)
// connection closed or error
shutdown(conn_i.fd, SHUT_RDWR);
} else {
+ pthread_mutex_unlock(&lock);
break;
}
add_socket_pollin(&ring, conn_i.fd);
diff --git a/test/rsrc_tags.c b/test/rsrc_tags.c
new file mode 100644
index 0000000..a3fec0c
--- /dev/null
+++ b/test/rsrc_tags.c
@@ -0,0 +1,449 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Description: run various file registration tests
+ *
+ */
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <assert.h>
+
+#include "../src/syscall.h"
+#include "helpers.h"
+#include "liburing.h"
+
+static int pipes[2];
+
+enum {
+ TEST_IORING_RSRC_FILE = 0,
+ TEST_IORING_RSRC_BUFFER = 1,
+};
+
+static bool check_cq_empty(struct io_uring *ring)
+{
+ struct io_uring_cqe *cqe = NULL;
+ int ret;
+
+ sleep(1); /* doesn't happen immediately, so wait */
+ ret = io_uring_peek_cqe(ring, &cqe); /* nothing should be there */
+ return ret == -EAGAIN;
+}
+
+/*
+ * There are io_uring_register_buffers_tags() and other wrappers,
+ * but they may change, so hand-code to specifically test this ABI.
+ */
+static int register_rsrc(struct io_uring *ring, int type, int nr,
+ const void *arg, const __u64 *tags)
+{
+ struct io_uring_rsrc_register reg;
+ int ret, reg_type;
+
+ memset(&reg, 0, sizeof(reg));
+ reg.nr = nr;
+ reg.data = (__u64)(uintptr_t)arg;
+ reg.tags = (__u64)(uintptr_t)tags;
+
+ reg_type = IORING_REGISTER_FILES2;
+ if (type != TEST_IORING_RSRC_FILE)
+ reg_type = IORING_REGISTER_BUFFERS2;
+
+ ret = __sys_io_uring_register(ring->ring_fd, reg_type,
+ &reg, sizeof(reg));
+ return ret ? -errno : 0;
+}
+
+/*
+ * There are io_uring_register_buffers_update_tag() and other wrappers,
+ * but they may change, so hand-code to specifically test this ABI.
+ */
+static int update_rsrc(struct io_uring *ring, int type, int nr, int off,
+ const void *arg, const __u64 *tags)
+{
+ struct io_uring_rsrc_update2 up;
+ int ret, up_type;
+
+ memset(&up, 0, sizeof(up));
+ up.offset = off;
+ up.data = (__u64)(uintptr_t)arg;
+ up.tags = (__u64)(uintptr_t)tags;
+ up.nr = nr;
+
+ up_type = IORING_REGISTER_FILES_UPDATE2;
+ if (type != TEST_IORING_RSRC_FILE)
+ up_type = IORING_REGISTER_BUFFERS_UPDATE;
+ ret = __sys_io_uring_register(ring->ring_fd, up_type,
+ &up, sizeof(up));
+ return ret < 0 ? -errno : ret;
+}
+
+static bool has_rsrc_update(void)
+{
+ struct io_uring ring;
+ int ret;
+
+ ret = io_uring_queue_init(1, &ring, 0);
+ if (ret) {
+ fprintf(stderr, "io_uring_queue_init() failed, %d\n", ret);
+ exit(1);
+ }
+
+ ret = ring.features & IORING_FEAT_RSRC_TAGS;
+ io_uring_queue_exit(&ring);
+ return ret;
+}
+
+static int test_tags_generic(int nr, int type, void *rsrc, int ring_flags)
+{
+ struct io_uring_cqe *cqe = NULL;
+ struct io_uring ring;
+ int i, ret;
+ __u64 *tags;
+
+ tags = malloc(nr * sizeof(*tags));
+ if (!tags)
+ return 1;
+ for (i = 0; i < nr; i++)
+ tags[i] = i + 1;
+ ret = io_uring_queue_init(1, &ring, 0);
+ if (ret) {
+ printf("ring setup failed\n");
+ return 1;
+ }
+
+ ret = register_rsrc(&ring, type, nr, rsrc, tags);
+ if (ret) {
+ fprintf(stderr, "rsrc register failed %i\n", ret);
+ return 1;
+ }
+
+ /* test that tags are set */
+ tags[0] = 666;
+ ret = update_rsrc(&ring, type, 1, 0, rsrc, &tags[0]);
+ assert(ret == 1);
+ ret = io_uring_wait_cqe(&ring, &cqe);
+ assert(!ret && cqe->user_data == 1);
+ io_uring_cqe_seen(&ring, cqe);
+
+ /* test that tags are updated */
+ tags[0] = 0;
+ ret = update_rsrc(&ring, type, 1, 0, rsrc, &tags[0]);
+ assert(ret == 1);
+ ret = io_uring_wait_cqe(&ring, &cqe);
+ assert(!ret && cqe->user_data == 666);
+ io_uring_cqe_seen(&ring, cqe);
+
+ /* test tag=0 doesn't emit CQE */
+ tags[0] = 1;
+ ret = update_rsrc(&ring, type, 1, 0, rsrc, &tags[0]);
+ assert(ret == 1);
+ assert(check_cq_empty(&ring));
+
+ free(tags);
+ io_uring_queue_exit(&ring);
+ return 0;
+}
+
+static int test_buffers_update(void)
+{
+ struct io_uring_sqe *sqe;
+ struct io_uring_cqe *cqe = NULL;
+ struct io_uring ring;
+ const int nr = 5;
+ int buf_idx = 1, i, ret;
+ int pipes[2];
+ char tmp_buf[1024];
+ char tmp_buf2[1024];
+ struct iovec vecs[nr];
+ __u64 tags[nr];
+
+ for (i = 0; i < nr; i++) {
+ vecs[i].iov_base = tmp_buf;
+ vecs[i].iov_len = 1024;
+ tags[i] = i + 1;
+ }
+
+ ret = test_tags_generic(nr, TEST_IORING_RSRC_BUFFER, vecs, 0);
+ if (ret)
+ return 1;
+
+ ret = io_uring_queue_init(1, &ring, 0);
+ if (ret) {
+ printf("ring setup failed\n");
+ return 1;
+ }
+ if (pipe(pipes) < 0) {
+ perror("pipe");
+ return 1;
+ }
+ ret = register_rsrc(&ring, TEST_IORING_RSRC_BUFFER, nr, vecs, tags);
+ if (ret) {
+ fprintf(stderr, "rsrc register failed %i\n", ret);
+ return 1;
+ }
+
+ /* test that CQE is not emmited before we're done with a buffer */
+ sqe = io_uring_get_sqe(&ring);
+ io_uring_prep_read_fixed(sqe, pipes[0], tmp_buf, 10, 0, 0);
+ sqe->user_data = 100;
+ ret = io_uring_submit(&ring);
+ if (ret != 1) {
+ fprintf(stderr, "%s: got %d, wanted 1\n", __FUNCTION__, ret);
+ return 1;
+ }
+ ret = io_uring_peek_cqe(&ring, &cqe);
+ assert(ret == -EAGAIN);
+
+ vecs[buf_idx].iov_base = tmp_buf2;
+ ret = update_rsrc(&ring, TEST_IORING_RSRC_BUFFER, 1, buf_idx,
+ &vecs[buf_idx], &tags[buf_idx]);
+ if (ret != 1) {
+ fprintf(stderr, "rsrc update failed %i %i\n", ret, errno);
+ return 1;
+ }
+
+ ret = io_uring_peek_cqe(&ring, &cqe); /* nothing should be there */
+ assert(ret == -EAGAIN);
+ close(pipes[0]);
+ close(pipes[1]);
+
+ ret = io_uring_wait_cqe(&ring, &cqe);
+ assert(!ret && cqe->user_data == 100);
+ io_uring_cqe_seen(&ring, cqe);
+ ret = io_uring_wait_cqe(&ring, &cqe);
+ assert(!ret && cqe->user_data == buf_idx + 1);
+ io_uring_cqe_seen(&ring, cqe);
+
+ io_uring_queue_exit(&ring);
+ return 0;
+}
+
+static int test_buffers_empty_buffers(void)
+{
+ struct io_uring_sqe *sqe;
+ struct io_uring_cqe *cqe = NULL;
+ struct io_uring ring;
+ const int nr = 5;
+ int ret, i;
+ char tmp_buf[1024];
+ struct iovec vecs[nr];
+
+ for (i = 0; i < nr; i++) {
+ vecs[i].iov_base = 0;
+ vecs[i].iov_len = 0;
+ }
+ vecs[0].iov_base = tmp_buf;
+ vecs[0].iov_len = 10;
+
+ ret = io_uring_queue_init(1, &ring, 0);
+ if (ret) {
+ printf("ring setup failed\n");
+ return 1;
+ }
+
+ ret = register_rsrc(&ring, TEST_IORING_RSRC_BUFFER, nr, vecs, NULL);
+ if (ret) {
+ fprintf(stderr, "rsrc register failed %i\n", ret);
+ return 1;
+ }
+
+ /* empty to buffer */
+ vecs[1].iov_base = tmp_buf;
+ vecs[1].iov_len = 10;
+ ret = update_rsrc(&ring, TEST_IORING_RSRC_BUFFER, 1, 1, &vecs[1], NULL);
+ if (ret != 1) {
+ fprintf(stderr, "rsrc update failed %i %i\n", ret, errno);
+ return 1;
+ }
+
+ /* buffer to empty */
+ vecs[0].iov_base = 0;
+ vecs[0].iov_len = 0;
+ ret = update_rsrc(&ring, TEST_IORING_RSRC_BUFFER, 1, 0, &vecs[0], NULL);
+ if (ret != 1) {
+ fprintf(stderr, "rsrc update failed %i %i\n", ret, errno);
+ return 1;
+ }
+
+ /* zero to zero is ok */
+ ret = update_rsrc(&ring, TEST_IORING_RSRC_BUFFER, 1, 2, &vecs[2], NULL);
+ if (ret != 1) {
+ fprintf(stderr, "rsrc update failed %i %i\n", ret, errno);
+ return 1;
+ }
+
+ /* empty buf with non-zero len fails */
+ vecs[3].iov_base = 0;
+ vecs[3].iov_len = 1;
+ ret = update_rsrc(&ring, TEST_IORING_RSRC_BUFFER, 1, 3, &vecs[3], NULL);
+ if (ret >= 0) {
+ fprintf(stderr, "rsrc update failed %i %i\n", ret, errno);
+ return 1;
+ }
+
+ /* test rw on empty ubuf is failed */
+ sqe = io_uring_get_sqe(&ring);
+ io_uring_prep_read_fixed(sqe, pipes[0], tmp_buf, 10, 0, 2);
+ sqe->user_data = 100;
+ ret = io_uring_submit(&ring);
+ if (ret != 1) {
+ fprintf(stderr, "%s: got %d, wanted 1\n", __FUNCTION__, ret);
+ return 1;
+ }
+ ret = io_uring_wait_cqe(&ring, &cqe);
+ assert(!ret && cqe->user_data == 100);
+ assert(cqe->res);
+ io_uring_cqe_seen(&ring, cqe);
+
+ sqe = io_uring_get_sqe(&ring);
+ io_uring_prep_read_fixed(sqe, pipes[0], tmp_buf, 0, 0, 2);
+ sqe->user_data = 100;
+ ret = io_uring_submit(&ring);
+ if (ret != 1) {
+ fprintf(stderr, "%s: got %d, wanted 1\n", __FUNCTION__, ret);
+ return 1;
+ }
+ ret = io_uring_wait_cqe(&ring, &cqe);
+ assert(!ret && cqe->user_data == 100);
+ assert(cqe->res);
+ io_uring_cqe_seen(&ring, cqe);
+
+ io_uring_queue_exit(&ring);
+ return 0;
+}
+
+
+static int test_files(int ring_flags)
+{
+ struct io_uring_cqe *cqe = NULL;
+ struct io_uring ring;
+ const int nr = 50;
+ int off = 5, i, ret, fd;
+ int files[nr];
+ __u64 tags[nr], tag;
+
+ for (i = 0; i < nr; ++i) {
+ files[i] = pipes[0];
+ tags[i] = i + 1;
+ }
+
+ ret = test_tags_generic(nr, TEST_IORING_RSRC_FILE, files, ring_flags);
+ if (ret)
+ return 1;
+
+ ret = io_uring_queue_init(1, &ring, ring_flags);
+ if (ret) {
+ printf("ring setup failed\n");
+ return 1;
+ }
+ ret = register_rsrc(&ring, TEST_IORING_RSRC_FILE, nr, files, tags);
+ if (ret) {
+ fprintf(stderr, "rsrc register failed %i\n", ret);
+ return 1;
+ }
+
+ /* check update did update tag */
+ fd = -1;
+ ret = io_uring_register_files_update(&ring, off, &fd, 1);
+ assert(ret == 1);
+ ret = io_uring_wait_cqe(&ring, &cqe);
+ assert(!ret && cqe->user_data == tags[off]);
+ io_uring_cqe_seen(&ring, cqe);
+
+ /* remove removed file, shouldn't emit old tag */
+ ret = io_uring_register_files_update(&ring, off, &fd, 1);
+ assert(ret <= 1);
+ assert(check_cq_empty(&ring));
+
+ /* non-zero tag with remove update is disallowed */
+ tag = 1;
+ fd = -1;
+ ret = update_rsrc(&ring, TEST_IORING_RSRC_FILE, 1, off + 1, &fd, &tag);
+ assert(ret);
+
+ io_uring_queue_exit(&ring);
+ return 0;
+}
+
+static int test_notag(void)
+{
+ struct io_uring_cqe *cqe = NULL;
+ struct io_uring ring;
+ int i, ret, fd;
+ const int nr = 50;
+ int files[nr];
+
+ ret = io_uring_queue_init(1, &ring, 0);
+ if (ret) {
+ printf("ring setup failed\n");
+ return 1;
+ }
+ for (i = 0; i < nr; ++i)
+ files[i] = pipes[0];
+
+ ret = io_uring_register_files(&ring, files, nr);
+ assert(!ret);
+
+ /* default register, update shouldn't emit CQE */
+ fd = -1;
+ ret = io_uring_register_files_update(&ring, 0, &fd, 1);
+ assert(ret == 1);
+ assert(check_cq_empty(&ring));
+
+ ret = io_uring_unregister_files(&ring);
+ assert(!ret);
+ ret = io_uring_peek_cqe(&ring, &cqe); /* nothing should be there */
+ assert(ret);
+
+ io_uring_queue_exit(&ring);
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ int ring_flags[] = {0, IORING_SETUP_IOPOLL, IORING_SETUP_SQPOLL};
+ int i, ret;
+
+ if (argc > 1)
+ return 0;
+ if (!has_rsrc_update()) {
+ fprintf(stderr, "doesn't support rsrc tags, skip\n");
+ return 0;
+ }
+
+ if (pipe(pipes) < 0) {
+ perror("pipe");
+ return 1;
+ }
+
+ ret = test_notag();
+ if (ret) {
+ printf("test_notag failed\n");
+ return ret;
+ }
+
+ for (i = 0; i < sizeof(ring_flags) / sizeof(ring_flags[0]); i++) {
+ ret = test_files(ring_flags[i]);
+ if (ret) {
+ printf("test_tag failed, type %i\n", i);
+ return ret;
+ }
+ }
+
+ ret = test_buffers_update();
+ if (ret) {
+ printf("test_buffers_update failed\n");
+ return ret;
+ }
+
+ ret = test_buffers_empty_buffers();
+ if (ret) {
+ printf("test_buffers_empty_buffers failed\n");
+ return ret;
+ }
+
+ return 0;
+}
diff --git a/test/runtests-loop.sh b/test/runtests-loop.sh
index 4019eba..f56d26d 100755
--- a/test/runtests-loop.sh
+++ b/test/runtests-loop.sh
@@ -1,10 +1,10 @@
#!/bin/bash
-TESTS="$@"
+TESTS=("$@")
ITER=0
while true; do
- ./runtests.sh "$TESTS"
+ ./runtests.sh "${TESTS[@]}"
RET="$?"
if [ "${RET}" -ne 0 ]; then
echo "Tests failed at loop $ITER"
diff --git a/test/runtests.sh b/test/runtests.sh
index e8f4ae5..c77df6a 100755
--- a/test/runtests.sh
+++ b/test/runtests.sh
@@ -1,13 +1,13 @@
#!/bin/bash
-TESTS="$@"
+TESTS=("$@")
RET=0
TIMEOUT=60
DMESG_FILTER="cat"
-TEST_DIR=$(dirname $0)
+TEST_DIR=$(dirname "$0")
FAILED=""
SKIPPED=""
-MAYBE_FAILED=""
+TIMED_OUT=""
TEST_FILES=""
declare -A TEST_MAP
@@ -17,14 +17,15 @@ DO_KMSG="1"
# Include config.local if exists and check TEST_FILES for valid devices
if [ -f "$TEST_DIR/config.local" ]; then
- . $TEST_DIR/config.local
+ # shellcheck source=/dev/null disable=SC1091
+ . "$TEST_DIR/config.local"
for dev in $TEST_FILES; do
if [ ! -e "$dev" ]; then
echo "Test file $dev not valid"
exit 1
fi
done
- for dev in ${TEST_MAP[@]}; do
+ for dev in "${TEST_MAP[@]}"; do
if [ ! -e "$dev" ]; then
echo "Test file in map $dev not valid"
exit 1
@@ -37,7 +38,7 @@ _check_dmesg()
local dmesg_marker="$1"
local seqres="$2.seqres"
- if [ $DO_KMSG -eq 0 ]; then
+ if [ "$DO_KMSG" -eq 0 ]; then
return 0
fi
@@ -66,24 +67,31 @@ run_test()
{
local test_name="$1"
local dev="$2"
- local test_string=$test_name
+ local test_exec=("./$test_name")
+ local test_string="$test_name"
+ local out_name="$test_name"
# Specify test string to print
if [ -n "$dev" ]; then
+ test_exec+=("$dev")
test_string="$test_name $dev"
+ local suffix
+ suffix=$(basename "$dev")
+ out_name="$out_name.$suffix"
fi
# Log start of the test
if [ "$DO_KMSG" -eq 1 ]; then
local dmesg_marker="Running test $test_string:"
- echo $dmesg_marker | tee /dev/kmsg
+ echo "$dmesg_marker" > /dev/kmsg
else
local dmesg_marker=""
- echo Running test $test_name $dev
fi
+ printf "Running test %-55s" "$test_string"
# Do we have to exclude the test ?
- echo $TEST_EXCLUDE | grep -w "$test_name" > /dev/null 2>&1
+ echo "$TEST_EXCLUDE" | grep -w "$test_name" > /dev/null 2>&1
+ # shellcheck disable=SC2181
if [ $? -eq 0 ]; then
echo "Test skipped"
SKIPPED="$SKIPPED <$test_string>"
@@ -91,12 +99,19 @@ run_test()
fi
# Run the test
- timeout -s INT -k $TIMEOUT $TIMEOUT ./$test_name $dev
+ T_START=$(date +%s)
+ timeout -s INT -k $TIMEOUT $TIMEOUT "${test_exec[@]}"
local status=$?
+ T_END=$(date +%s)
+
+ if [ -e ./core ]; then
+ mv core "core-$test_name"
+ fi
# Check test status
if [ "$status" -eq 124 ]; then
echo "Test $test_name timed out (may not be a failure)"
+ TIMED_OUT="$TIMED_OUT <$test_string>"
elif [ "$status" -ne 0 ]; then
echo "Test $test_name failed with ret $status"
FAILED="$FAILED <$test_string>"
@@ -105,26 +120,36 @@ run_test()
echo "Test $test_name failed dmesg check"
FAILED="$FAILED <$test_string>"
RET=1
- elif [ -n "$dev" ]; then
- sleep .1
- ps aux | grep "\[io_wq_manager\]" > /dev/null
- if [ $? -eq 0 ]; then
- MAYBE_FAILED="$MAYBE_FAILED $test_string"
+ else
+ if [ -f "output/$out_name" ]; then
+ T_PREV=$(cat "output/$out_name")
+ else
+ T_PREV=""
+ fi
+ T_DIFF=$((T_END-T_START))
+ if [ -n "$T_PREV" ]; then
+ echo "$T_DIFF sec [$T_PREV]"
+ else
+ echo "$T_DIFF sec"
fi
+ echo $T_DIFF > "output/$out_name"
fi
}
# Run all specified tests
-for tst in $TESTS; do
- if [ ! -n "${TEST_MAP[$tst]}" ]; then
- run_test $tst
- if [ ! -z "$TEST_FILES" ]; then
+for tst in "${TESTS[@]}"; do
+ if [ ! -d output ]; then
+ mkdir output
+ fi
+ if [ -z "${TEST_MAP[$tst]}" ]; then
+ run_test "$tst"
+ if [ -n "$TEST_FILES" ]; then
for dev in $TEST_FILES; do
- run_test $tst $dev
+ run_test "$tst" "$dev"
done
fi
else
- run_test $tst ${TEST_MAP[$tst]}
+ run_test "$tst" "${TEST_MAP[$tst]}"
fi
done
@@ -132,18 +157,14 @@ if [ -n "$SKIPPED" ]; then
echo "Tests skipped: $SKIPPED"
fi
+if [ -n "$TIMED_OUT" ]; then
+ echo "Tests timed out: $TIMED_OUT"
+fi
+
if [ "${RET}" -ne 0 ]; then
echo "Tests failed: $FAILED"
exit $RET
else
- sleep 1
- ps aux | grep "\[io_wq_manager\]" > /dev/null
- if [ $? -ne 0 ]; then
- MAYBE_FAILED=""
- fi
- if [ ! -z "$MAYBE_FAILED" ]; then
- echo "Tests _maybe_ failed: $MAYBE_FAILED"
- fi
echo "All tests passed"
exit 0
fi
diff --git a/test/send_recv.c b/test/send_recv.c
index 19adbdd..1ee0234 100644
--- a/test/send_recv.c
+++ b/test/send_recv.c
@@ -200,7 +200,7 @@ static int do_send(void)
return 1;
}
- ret = connect(sockfd, &saddr, sizeof(saddr));
+ ret = connect(sockfd, (struct sockaddr *)&saddr, sizeof(saddr));
if (ret < 0) {
perror("connect");
return 1;
@@ -252,6 +252,7 @@ static int test(int use_sqthread, int regfiles)
ret = pthread_create(&recv_thread, NULL, recv_fn, &rd);
if (ret) {
fprintf(stderr, "Thread create failed: %d\n", ret);
+ pthread_mutex_unlock(&rd.mutex);
return 1;
}
diff --git a/test/send_recvmsg.c b/test/send_recvmsg.c
index 6b513bc..2ff8d9d 100644
--- a/test/send_recvmsg.c
+++ b/test/send_recvmsg.c
@@ -297,6 +297,7 @@ static int test(int buf_select, int no_buf_add, int iov_count)
rd.iov_count = iov_count;
ret = pthread_create(&recv_thread, NULL, recv_fn, &rd);
if (ret) {
+ pthread_mutex_unlock(&mutex);
fprintf(stderr, "Thread create failed\n");
return 1;
}
diff --git a/test/sendmsg_fs_cve.c b/test/sendmsg_fs_cve.c
index 8de220a..3866e5d 100644
--- a/test/sendmsg_fs_cve.c
+++ b/test/sendmsg_fs_cve.c
@@ -154,7 +154,13 @@ int main(int argc, char *argv[])
if (!c) {
close(rcv_sock);
- if (chroot(tmpdir)) {
+ r = chroot(tmpdir);
+ if (r) {
+ if (errno == EPERM) {
+ fprintf(stderr, "chroot not allowed, skip\n");
+ return 0;
+ }
+
perror("chroot()");
return 1;
}
diff --git a/test/socket-rw-eagain.c b/test/socket-rw-eagain.c
index f15c0c1..cc87aca 100644
--- a/test/socket-rw-eagain.c
+++ b/test/socket-rw-eagain.c
@@ -92,10 +92,14 @@ int main(int argc, char *argv[])
}
struct io_uring m_io_uring;
+ struct io_uring_params p = { };
- ret = io_uring_queue_init(32, &m_io_uring, 0);
+ ret = io_uring_queue_init_params(32, &m_io_uring, &p);
assert(ret >= 0);
+ if (p.features & IORING_FEAT_FAST_POLL)
+ return 0;
+
char recv_buff[128];
char send_buff[128];
diff --git a/test/sq-poll-dup.c b/test/sq-poll-dup.c
index eeb619c..e688c9f 100644
--- a/test/sq-poll-dup.c
+++ b/test/sq-poll-dup.c
@@ -167,6 +167,9 @@ int main(int argc, char *argv[])
vecs = t_create_buffers(BUFFERS, BS);
fd = open(fname, O_RDONLY | O_DIRECT);
+ if (fname != argv[1])
+ unlink(fname);
+
if (fd < 0) {
perror("open");
return -1;
@@ -191,11 +194,7 @@ int main(int argc, char *argv[])
goto err;
}
- if (fname != argv[1])
- unlink(fname);
return 0;
err:
- if (fname != argv[1])
- unlink(fname);
return 1;
}
diff --git a/test/sq-poll-kthread.c b/test/sq-poll-kthread.c
index ed7d0bf..0a0a75a 100644
--- a/test/sq-poll-kthread.c
+++ b/test/sq-poll-kthread.c
@@ -17,6 +17,7 @@
#include <sys/epoll.h>
#include "liburing.h"
+#include "helpers.h"
#define SQ_THREAD_IDLE 2000
#define BUF_SIZE 128
@@ -38,23 +39,20 @@ static int do_test_sq_poll_kthread_stopped(bool do_exit)
uint8_t buf[BUF_SIZE];
struct iovec iov;
- if (geteuid()) {
- fprintf(stderr, "sqpoll requires root!\n");
- return TEST_SKIPPED;
- }
-
if (pipe(pipe1) != 0) {
perror("pipe");
return TEST_FAILED;
}
memset(&param, 0, sizeof(param));
-
param.flags |= IORING_SETUP_SQPOLL;
param.sq_thread_idle = SQ_THREAD_IDLE;
- ret = io_uring_queue_init_params(16, &ring, &param);
- if (ret) {
+ ret = t_create_ring_params(16, &ring, &param);
+ if (ret == T_SETUP_SKIP) {
+ ret = TEST_FAILED;
+ goto err_pipe;
+ } else if (ret != T_SETUP_OK) {
fprintf(stderr, "ring setup failed\n");
ret = TEST_FAILED;
goto err_pipe;
diff --git a/test/sq-poll-share.c b/test/sq-poll-share.c
index a46b94f..99227d5 100644
--- a/test/sq-poll-share.c
+++ b/test/sq-poll-share.c
@@ -89,6 +89,8 @@ int main(int argc, char *argv[])
vecs = t_create_buffers(BUFFERS, BS);
fd = open(fname, O_RDONLY | O_DIRECT);
+ if (fname != argv[1])
+ unlink(fname);
if (fd < 0) {
perror("open");
return -1;
@@ -129,11 +131,7 @@ int main(int argc, char *argv[])
ios += BUFFERS;
}
- if (fname != argv[1])
- unlink(fname);
return 0;
err:
- if (fname != argv[1])
- unlink(fname);
return 1;
}
diff --git a/test/sqpoll-cancel-hang.c b/test/sqpoll-cancel-hang.c
new file mode 100644
index 0000000..e561478
--- /dev/null
+++ b/test/sqpoll-cancel-hang.c
@@ -0,0 +1,156 @@
+#include <fcntl.h>
+#include <signal.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/wait.h>
+#include <time.h>
+#include <unistd.h>
+#include "liburing.h"
+#include "../src/syscall.h"
+
+static uint64_t current_time_ms(void)
+{
+ struct timespec ts;
+ if (clock_gettime(CLOCK_MONOTONIC, &ts))
+ exit(1);
+ return (uint64_t)ts.tv_sec * 1000 + (uint64_t)ts.tv_nsec / 1000000;
+}
+
+#define SIZEOF_IO_URING_SQE 64
+#define SIZEOF_IO_URING_CQE 16
+#define SQ_TAIL_OFFSET 64
+#define SQ_RING_MASK_OFFSET 256
+#define SQ_RING_ENTRIES_OFFSET 264
+#define CQ_RING_ENTRIES_OFFSET 268
+#define CQ_CQES_OFFSET 320
+
+#define IORING_OFF_SQES 0x10000000ULL
+
+static void kill_and_wait(int pid, int* status)
+{
+ kill(-pid, SIGKILL);
+ kill(pid, SIGKILL);
+ while (waitpid(-1, status, __WALL) != pid) {
+ }
+}
+
+#define WAIT_FLAGS __WALL
+
+uint64_t r[3] = {0xffffffffffffffff, 0x0, 0x0};
+
+static long syz_io_uring_setup(volatile long a0, volatile long a1,
+volatile long a2, volatile long a3, volatile long a4, volatile long
+a5)
+{
+ uint32_t entries = (uint32_t)a0;
+ struct io_uring_params* setup_params = (struct io_uring_params*)a1;
+ void* vma1 = (void*)a2;
+ void* vma2 = (void*)a3;
+ void** ring_ptr_out = (void**)a4;
+ void** sqes_ptr_out = (void**)a5;
+ uint32_t fd_io_uring = __sys_io_uring_setup(entries, setup_params);
+ uint32_t sq_ring_sz = setup_params->sq_off.array +
+setup_params->sq_entries * sizeof(uint32_t);
+ uint32_t cq_ring_sz = setup_params->cq_off.cqes +
+setup_params->cq_entries * SIZEOF_IO_URING_CQE;
+ uint32_t ring_sz = sq_ring_sz > cq_ring_sz ? sq_ring_sz : cq_ring_sz;
+ *ring_ptr_out = mmap(vma1, ring_sz, PROT_READ | PROT_WRITE,
+MAP_SHARED | MAP_POPULATE | MAP_FIXED, fd_io_uring,
+IORING_OFF_SQ_RING);
+ uint32_t sqes_sz = setup_params->sq_entries * SIZEOF_IO_URING_SQE;
+ *sqes_ptr_out = mmap(vma2, sqes_sz, PROT_READ | PROT_WRITE,
+MAP_SHARED | MAP_POPULATE | MAP_FIXED, fd_io_uring, IORING_OFF_SQES);
+ return fd_io_uring;
+}
+
+static long syz_io_uring_submit(volatile long a0, volatile long a1,
+volatile long a2, volatile long a3)
+{
+ char* ring_ptr = (char*)a0;
+ char* sqes_ptr = (char*)a1;
+ char* sqe = (char*)a2;
+ uint32_t sqes_index = (uint32_t)a3;
+ uint32_t sq_ring_entries = *(uint32_t*)(ring_ptr + SQ_RING_ENTRIES_OFFSET);
+ uint32_t cq_ring_entries = *(uint32_t*)(ring_ptr + CQ_RING_ENTRIES_OFFSET);
+ uint32_t sq_array_off = (CQ_CQES_OFFSET + cq_ring_entries *
+SIZEOF_IO_URING_CQE + 63) & ~63;
+ if (sq_ring_entries)
+ sqes_index %= sq_ring_entries;
+ char* sqe_dest = sqes_ptr + sqes_index * SIZEOF_IO_URING_SQE;
+ memcpy(sqe_dest, sqe, SIZEOF_IO_URING_SQE);
+ uint32_t sq_ring_mask = *(uint32_t*)(ring_ptr + SQ_RING_MASK_OFFSET);
+ uint32_t* sq_tail_ptr = (uint32_t*)(ring_ptr + SQ_TAIL_OFFSET);
+ uint32_t sq_tail = *sq_tail_ptr & sq_ring_mask;
+ uint32_t sq_tail_next = *sq_tail_ptr + 1;
+ uint32_t* sq_array = (uint32_t*)(ring_ptr + sq_array_off);
+ *(sq_array + sq_tail) = sqes_index;
+ __atomic_store_n(sq_tail_ptr, sq_tail_next, __ATOMIC_RELEASE);
+ return 0;
+}
+
+
+void trigger_bug(void)
+{
+ intptr_t res = 0;
+ *(uint32_t*)0x20000204 = 0;
+ *(uint32_t*)0x20000208 = 2;
+ *(uint32_t*)0x2000020c = 0;
+ *(uint32_t*)0x20000210 = 0;
+ *(uint32_t*)0x20000218 = -1;
+ memset((void*)0x2000021c, 0, 12);
+ res = -1;
+ res = syz_io_uring_setup(0x7987, 0x20000200, 0x20400000, 0x20ffd000, 0x200000c0, 0x200001c0);
+ if (res != -1) {
+ r[0] = res;
+ r[1] = *(uint64_t*)0x200000c0;
+ r[2] = *(uint64_t*)0x200001c0;
+ }
+ *(uint8_t*)0x20000180 = 0xb;
+ *(uint8_t*)0x20000181 = 1;
+ *(uint16_t*)0x20000182 = 0;
+ *(uint32_t*)0x20000184 = 0;
+ *(uint64_t*)0x20000188 = 4;
+ *(uint64_t*)0x20000190 = 0x20000140;
+ *(uint64_t*)0x20000140 = 0x77359400;
+ *(uint64_t*)0x20000148 = 0;
+ *(uint32_t*)0x20000198 = 1;
+ *(uint32_t*)0x2000019c = 0;
+ *(uint64_t*)0x200001a0 = 0;
+ *(uint16_t*)0x200001a8 = 0;
+ *(uint16_t*)0x200001aa = 0;
+ memset((void*)0x200001ac, 0, 20);
+ syz_io_uring_submit(r[1], r[2], 0x20000180, 1);
+ *(uint32_t*)0x20000544 = 0;
+ *(uint32_t*)0x20000548 = 0x36;
+ *(uint32_t*)0x2000054c = 0;
+ *(uint32_t*)0x20000550 = 0;
+ *(uint32_t*)0x20000558 = r[0];
+ memset((void*)0x2000055c, 0, 12);
+
+}
+int main(void)
+{
+ mmap((void *)0x20000000ul, 0x1000000ul, 7ul, 0x32ul, -1, 0ul);
+ int pid = fork();
+ if (pid < 0)
+ exit(1);
+ if (pid == 0) {
+ trigger_bug();
+ exit(0);
+ }
+ int status = 0;
+ uint64_t start = current_time_ms();
+ for (;;) {
+ if (current_time_ms() - start < 1000) {
+ continue;
+ }
+ kill_and_wait(pid, &status);
+ break;
+ }
+ return 0;
+}
+
+
+
diff --git a/test/submit-link-fail.c b/test/submit-link-fail.c
new file mode 100644
index 0000000..a12bdae
--- /dev/null
+++ b/test/submit-link-fail.c
@@ -0,0 +1,154 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Description: tests linked requests failing during submission
+ */
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <assert.h>
+
+#include "liburing.h"
+
+#define DRAIN_USER_DATA 42
+
+static int test_underprep_fail(bool hardlink, bool drain, bool link_last,
+ int link_size, int fail_idx)
+{
+ const int invalid_fd = 42;
+ int link_flags = IOSQE_IO_LINK;
+ int total_submit = link_size;
+ struct io_uring ring;
+ struct io_uring_sqe *sqe;
+ struct io_uring_cqe *cqe;
+ char buffer[1];
+ int i, ret, fds[2];
+
+ if (drain)
+ link_flags |= IOSQE_IO_DRAIN;
+ if (hardlink)
+ link_flags |= IOSQE_IO_HARDLINK;
+
+ assert(fail_idx < link_size);
+ assert(link_size < 40);
+
+ /* create a new ring as it leaves it dirty */
+ ret = io_uring_queue_init(8, &ring, 0);
+ if (ret) {
+ printf("ring setup failed\n");
+ return -1;
+ }
+ if (pipe(fds)) {
+ perror("pipe");
+ return -1;
+ }
+
+ if (drain) {
+ /* clog drain, so following reqs sent to draining */
+ sqe = io_uring_get_sqe(&ring);
+ io_uring_prep_read(sqe, fds[0], buffer, sizeof(buffer), 0);
+ sqe->user_data = DRAIN_USER_DATA;
+ sqe->flags |= IOSQE_IO_DRAIN;
+ total_submit++;
+ }
+
+ for (i = 0; i < link_size; i++) {
+ sqe = io_uring_get_sqe(&ring);
+ if (i == fail_idx)
+ io_uring_prep_read(sqe, invalid_fd, buffer, 1, 0);
+ else
+ io_uring_prep_nop(sqe);
+
+ if (i != link_size - 1 || !link_last)
+ sqe->flags |= link_flags;
+ sqe->user_data = i;
+ }
+
+ ret = io_uring_submit(&ring);
+ if (ret != total_submit) {
+ /* Old behaviour, failed early and under-submitted */
+ if (ret == fail_idx + 1 + drain)
+ goto out;
+ fprintf(stderr, "submit failed: %d\n", ret);
+ return -1;
+ }
+
+ if (drain) {
+ /* unclog drain */
+ ret = write(fds[1], buffer, sizeof(buffer));
+ if (ret < 0) {
+ perror("write");
+ return 1;
+ }
+ }
+
+ for (i = 0; i < total_submit; i++) {
+ ret = io_uring_wait_cqe(&ring, &cqe);
+ if (ret) {
+ fprintf(stderr, "wait_cqe=%d\n", ret);
+ return 1;
+ }
+
+ ret = cqe->res;
+ if (cqe->user_data == DRAIN_USER_DATA) {
+ if (ret != 1) {
+ fprintf(stderr, "drain failed %d\n", ret);
+ return 1;
+ }
+ } else if (cqe->user_data == fail_idx) {
+ if (ret == 0 || ret == -ECANCELED) {
+ fprintf(stderr, "half-prep req unexpected return %d\n", ret);
+ return 1;
+ }
+ } else {
+ if (ret != -ECANCELED) {
+ fprintf(stderr, "cancel failed %d, ud %d\n", ret, (int)cqe->user_data);
+ return 1;
+ }
+ }
+ io_uring_cqe_seen(&ring, cqe);
+ }
+out:
+ close(fds[0]);
+ close(fds[1]);
+ io_uring_queue_exit(&ring);
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ int ret, link_size, fail_idx, i;
+
+ if (argc > 1)
+ return 0;
+
+ /*
+ * hardlink, size=3, fail_idx=1, drain=false -- kernel fault
+ * link, size=3, fail_idx=0, drain=true -- kernel fault
+ * link, size=3, fail_idx=1, drain=true -- invalid cqe->res
+ */
+ for (link_size = 0; link_size < 3; link_size++) {
+ for (fail_idx = 0; fail_idx < link_size; fail_idx++) {
+ for (i = 0; i < 8; i++) {
+ bool hardlink = (i & 1) != 0;
+ bool drain = (i & 2) != 0;
+ bool link_last = (i & 4) != 0;
+
+ ret = test_underprep_fail(hardlink, drain, link_last,
+ link_size, fail_idx);
+ if (!ret)
+ continue;
+
+ fprintf(stderr, "failed %d, hard %d, drain %d,"
+ "link_last %d, size %d, idx %d\n",
+ ret, hardlink, drain, link_last,
+ link_size, fail_idx);
+ return 1;
+ }
+ }
+ }
+
+ return 0;
+}
diff --git a/test/submit-reuse.c b/test/submit-reuse.c
index 74ba769..ca30e98 100644
--- a/test/submit-reuse.c
+++ b/test/submit-reuse.c
@@ -140,11 +140,6 @@ static int test_reuse(int argc, char *argv[], int split, int async)
int do_unlink = 1;
void *tret;
- if (argc > 1) {
- fname1 = argv[1];
- do_unlink = 0;
- }
-
ret = io_uring_queue_init_params(32, &ring, &p);
if (ret) {
fprintf(stderr, "io_uring_queue_init: %d\n", ret);
@@ -153,21 +148,29 @@ static int test_reuse(int argc, char *argv[], int split, int async)
if (!(p.features & IORING_FEAT_SUBMIT_STABLE)) {
fprintf(stdout, "FEAT_SUBMIT_STABLE not there, skipping\n");
+ io_uring_queue_exit(&ring);
no_stable = 1;
return 0;
}
- if (do_unlink)
+ if (argc > 1) {
+ fname1 = argv[1];
+ do_unlink = 0;
+ } else {
t_create_file(fname1, FILE_SIZE);
-
- t_create_file(".reuse.2", FILE_SIZE);
+ }
fd1 = open(fname1, O_RDONLY);
+ if (do_unlink)
+ unlink(fname1);
if (fd1 < 0) {
perror("open fname1");
goto err;
}
+
+ t_create_file(".reuse.2", FILE_SIZE);
fd2 = open(".reuse.2", O_RDONLY);
+ unlink(".reuse.2");
if (fd2 < 0) {
perror("open .reuse.2");
goto err;
@@ -206,15 +209,9 @@ static int test_reuse(int argc, char *argv[], int split, int async)
close(fd2);
close(fd1);
io_uring_queue_exit(&ring);
- if (do_unlink)
- unlink(fname1);
- unlink(".reuse.2");
return 0;
err:
io_uring_queue_exit(&ring);
- if (do_unlink)
- unlink(fname1);
- unlink(".reuse.2");
return 1;
}
diff --git a/test/symlink.c b/test/symlink.c
new file mode 100644
index 0000000..cf4aa96
--- /dev/null
+++ b/test/symlink.c
@@ -0,0 +1,116 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Description: test io_uring symlinkat handling
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "liburing.h"
+
+
+static int do_symlinkat(struct io_uring *ring, const char *oldname, const char *newname)
+{
+ int ret;
+ struct io_uring_sqe *sqe;
+ struct io_uring_cqe *cqe;
+
+ sqe = io_uring_get_sqe(ring);
+ if (!sqe) {
+ fprintf(stderr, "sqe get failed\n");
+ goto err;
+ }
+ io_uring_prep_symlinkat(sqe, oldname, AT_FDCWD, newname);
+
+ ret = io_uring_submit(ring);
+ if (ret != 1) {
+ fprintf(stderr, "submit failed: %d\n", ret);
+ goto err;
+ }
+
+ ret = io_uring_wait_cqes(ring, &cqe, 1, 0, 0);
+ if (ret) {
+ fprintf(stderr, "wait_cqe failed: %d\n", ret);
+ goto err;
+ }
+ ret = cqe->res;
+ io_uring_cqe_seen(ring, cqe);
+ return ret;
+err:
+ return 1;
+}
+
+int test_link_contents(const char* linkname, const char *expected_contents)
+{
+ char buf[128];
+ int ret = readlink(linkname, buf, 127);
+ if (ret < 0) {
+ perror("readlink");
+ return ret;
+ }
+ buf[ret] = 0;
+ if (strncmp(buf, expected_contents, 128)) {
+ fprintf(stderr, "link contents differs from expected: '%s' vs '%s'",
+ buf, expected_contents);
+ return -1;
+ }
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ static const char target[] = "io_uring-symlinkat-test-target";
+ static const char linkname[] = "io_uring-symlinkat-test-link";
+ int ret;
+ struct io_uring ring;
+
+ if (argc > 1)
+ return 0;
+
+ ret = io_uring_queue_init(8, &ring, 0);
+ if (ret) {
+ fprintf(stderr, "queue init failed: %d\n", ret);
+ return ret;
+ }
+
+ ret = do_symlinkat(&ring, target, linkname);
+ if (ret < 0) {
+ if (ret == -EBADF || ret == -EINVAL) {
+ fprintf(stdout, "symlinkat not supported, skipping\n");
+ goto out;
+ }
+ fprintf(stderr, "symlinkat: %s\n", strerror(-ret));
+ goto err;
+ } else if (ret) {
+ goto err;
+ }
+
+ ret = test_link_contents(linkname, target);
+ if (ret < 0)
+ goto err1;
+
+ ret = do_symlinkat(&ring, target, linkname);
+ if (ret != -EEXIST) {
+ fprintf(stderr, "test_symlinkat linkname already exists failed: %d\n", ret);
+ goto err1;
+ }
+
+ ret = do_symlinkat(&ring, target, "surely/this/does/not/exist");
+ if (ret != -ENOENT) {
+ fprintf(stderr, "test_symlinkat no parent failed: %d\n", ret);
+ goto err1;
+ }
+
+out:
+ unlinkat(AT_FDCWD, linkname, 0);
+ io_uring_queue_exit(&ring);
+ return 0;
+err1:
+ unlinkat(AT_FDCWD, linkname, 0);
+err:
+ io_uring_queue_exit(&ring);
+ return 1;
+}
diff --git a/test/thread-exit.c b/test/thread-exit.c
index c2f2148..7f66028 100644
--- a/test/thread-exit.c
+++ b/test/thread-exit.c
@@ -86,12 +86,12 @@ int main(int argc, char *argv[])
} else {
fname = ".thread.exit";
do_unlink = 1;
- }
-
- if (do_unlink)
t_create_file(fname, 4096);
+ }
fd = open(fname, O_WRONLY);
+ if (do_unlink)
+ unlink(fname);
if (fd < 0) {
perror("open");
return 1;
@@ -125,11 +125,7 @@ int main(int argc, char *argv[])
io_uring_cqe_seen(&ring, cqe);
}
- if (do_unlink)
- unlink(fname);
return d.err;
err:
- if (do_unlink)
- unlink(fname);
return 1;
}
diff --git a/test/timeout-new.c b/test/timeout-new.c
index 45b9a14..19c5ac3 100644
--- a/test/timeout-new.c
+++ b/test/timeout-new.c
@@ -202,8 +202,8 @@ int main(int argc, char *argv[])
return 1;
}
if (!(ring_normal.features & IORING_FEAT_EXT_ARG)) {
- fprintf(stderr, "feature IORING_FEAT_EXT_ARG not supported.\n");
- return 1;
+ fprintf(stderr, "feature IORING_FEAT_EXT_ARG not supported, skipping.\n");
+ return 0;
}
ret = test_return_before_timeout(&ring_normal);
diff --git a/test/timeout.c b/test/timeout.c
index a28d599..775063f 100644
--- a/test/timeout.c
+++ b/test/timeout.c
@@ -10,6 +10,9 @@
#include <string.h>
#include <fcntl.h>
#include <sys/time.h>
+#include <sys/wait.h>
+#include <sys/types.h>
+#include <sys/stat.h>
#include "liburing.h"
#include "../src/syscall.h"
@@ -180,7 +183,8 @@ err:
return 1;
}
-static int test_single_timeout_wait(struct io_uring *ring)
+static int test_single_timeout_wait(struct io_uring *ring,
+ struct io_uring_params *p)
{
struct io_uring_cqe *cqe;
struct io_uring_sqe *sqe;
@@ -195,6 +199,15 @@ static int test_single_timeout_wait(struct io_uring *ring)
io_uring_prep_nop(sqe);
io_uring_sqe_set_data(sqe, (void *) 1);
+ /* no implied submit for newer kernels */
+ if (p->features & IORING_FEAT_EXT_ARG) {
+ ret = io_uring_submit(ring);
+ if (ret != 2) {
+ fprintf(stderr, "%s: submit %d\n", __FUNCTION__, ret);
+ return 1;
+ }
+ }
+
msec_to_ts(&ts, 1000);
i = 0;
@@ -1161,16 +1174,110 @@ err:
return 1;
}
+static int fill_exec_target(char *dst, char *path)
+{
+ struct stat sb;
+
+ /*
+ * Should either be ./exec-target or test/exec-target
+ */
+ sprintf(dst, "%s", path);
+ return stat(dst, &sb);
+}
+
+static int test_timeout_link_cancel(void)
+{
+ struct io_uring ring;
+ struct io_uring_cqe *cqe;
+ char prog_path[PATH_MAX];
+ pid_t p;
+ int ret, i, wstatus;
+
+ if (fill_exec_target(prog_path, "./exec-target") &&
+ fill_exec_target(prog_path, "test/exec-target")) {
+ fprintf(stdout, "Can't find exec-target, skipping\n");
+ return 0;
+ }
+
+ ret = io_uring_queue_init(8, &ring, 0);
+ if (ret) {
+ fprintf(stderr, "ring create failed: %d\n", ret);
+ return 1;
+ }
+
+ p = fork();
+ if (p == -1) {
+ fprintf(stderr, "fork() failed\n");
+ return 1;
+ }
+
+ if (p == 0) {
+ struct io_uring_sqe *sqe;
+ struct __kernel_timespec ts;
+
+ msec_to_ts(&ts, 10000);
+ sqe = io_uring_get_sqe(&ring);
+ io_uring_prep_timeout(sqe, &ts, 0, 0);
+ sqe->flags |= IOSQE_IO_LINK;
+ sqe->user_data = 0;
+
+ sqe = io_uring_get_sqe(&ring);
+ io_uring_prep_nop(sqe);
+ sqe->user_data = 1;
+
+ ret = io_uring_submit(&ring);
+ if (ret != 2) {
+ fprintf(stderr, "%s: got %d, wanted 1\n", __FUNCTION__, ret);
+ exit(1);
+ }
+
+ /* trigger full cancellation */
+ ret = execl(prog_path, prog_path, NULL);
+ if (ret) {
+ fprintf(stderr, "exec failed %i\n", errno);
+ exit(1);
+ }
+ exit(0);
+ }
+
+ if (waitpid(p, &wstatus, 0) == (pid_t)-1) {
+ perror("waitpid()");
+ return 1;
+ }
+ if (!WIFEXITED(wstatus) || WEXITSTATUS(wstatus)) {
+ fprintf(stderr, "child failed %i\n", WEXITSTATUS(wstatus));
+ return 1;
+ }
+
+ for (i = 0; i < 2; ++i) {
+ ret = io_uring_wait_cqe(&ring, &cqe);
+ if (ret) {
+ fprintf(stderr, "wait_cqe=%d\n", ret);
+ return 1;
+ }
+ if (cqe->res != -ECANCELED) {
+ fprintf(stderr, "invalid result, user_data: %i res: %i\n",
+ (int)cqe->user_data, cqe->res);
+ return 1;
+ }
+ io_uring_cqe_seen(&ring, cqe);
+ }
+
+ io_uring_queue_exit(&ring);
+ return 0;
+}
+
int main(int argc, char *argv[])
{
struct io_uring ring, sqpoll_ring;
bool has_timeout_update, sqpoll;
+ struct io_uring_params p = { };
int ret;
if (argc > 1)
return 0;
- ret = io_uring_queue_init(8, &ring, 0);
+ ret = io_uring_queue_init_params(8, &ring, &p);
if (ret) {
fprintf(stderr, "ring setup failed\n");
return 1;
@@ -1252,7 +1359,7 @@ int main(int argc, char *argv[])
return ret;
}
- ret = test_single_timeout_wait(&ring);
+ ret = test_single_timeout_wait(&ring, &p);
if (ret) {
fprintf(stderr, "test_single_timeout_wait failed\n");
return ret;
@@ -1337,6 +1444,12 @@ int main(int argc, char *argv[])
return ret;
}
+ ret = test_timeout_link_cancel();
+ if (ret) {
+ fprintf(stderr, "test_timeout_link_cancel failed\n");
+ return ret;
+ }
+
if (sqpoll)
io_uring_queue_exit(&sqpoll_ring);
return 0;