summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJeff Moyer <jmoyer@redhat.com>2019-03-04 17:35:49 -0500
committerJens Axboe <axboe@kernel.dk>2019-03-04 15:37:40 -0700
commit765ba233c88f6dc932c61a5b66c26db0ebd082ba (patch)
treea503c79c355de8fbe9e6cc142936f10e29238dce
parent432fa1d3d1818bbbcfac0710c28a17d62a31f719 (diff)
downloadliburing-765ba233c88f6dc932c61a5b66c26db0ebd082ba.tar.gz
liburing-765ba233c88f6dc932c61a5b66c26db0ebd082ba.tar.bz2
add syscall unit tests
Add tests for io_uring_setup, io_uring_register and io_uring_enter. The test coverage is nowhere near complete and the reporting is not uniform. But, it's a start. Signed-off-by: Jeff Moyer <jmoyer@redhat.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r--test/Makefile12
-rw-r--r--test/io_uring_enter.c282
-rw-r--r--test/io_uring_register.c525
-rw-r--r--test/io_uring_setup.c161
4 files changed, 978 insertions, 2 deletions
diff --git a/test/Makefile b/test/Makefile
index 8d8a65f..e1af59a 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -1,10 +1,12 @@
CFLAGS ?= -g -O2 -Wall -D_GNU_SOURCE -L../src/
-all_targets += io_uring-test io_uring-cp poll poll-cancel ring-leak fsync
+all_targets += io_uring-test io_uring-cp poll poll-cancel ring-leak fsync \
+ io_uring_setup io_uring_register io_uring_enter
all: $(all_targets)
-test_srcs := io_uring-test.c io_uring-cp.c poll.c poll-cancel.c ring-leak.c fsync.c
+test_srcs := io_uring-test.c io_uring-cp.c poll.c poll-cancel.c ring-leak.c \
+ fsync.c io_uring_setup.c io_uring_register.c io_uring_enter.c
test_objs := $(patsubst %.c,%.ol,$(test_srcs))
@@ -20,5 +22,11 @@ ring-leak: ring-leak.c
$(CC) $(CFLAGS) -o $@ ring-leak.c -luring
fsync: fsync.c
$(CC) $(CFLAGS) -o $@ fsync.c -luring
+io_uring_setup: io_uring_setup.c
+ $(CC) $(CFLAGS) -o $@ io_uring_setup.c -luring
+io_uring_register: io_uring_register.c
+ $(CC) $(CFLAGS) -o $@ io_uring_register.c -luring
+io_uring_enter: io_uring_enter.c
+ $(CC) $(CFLAGS) -o $@ io_uring_enter.c -luring
clean:
rm -f $(all_targets) $(test_objs)
diff --git a/test/io_uring_enter.c b/test/io_uring_enter.c
new file mode 100644
index 0000000..a86eeaa
--- /dev/null
+++ b/test/io_uring_enter.c
@@ -0,0 +1,282 @@
+/*
+ * io_uring_enter.c
+ *
+ * Description: Unit tests for the io_uring_enter system call.
+ *
+ * Copyright 2019, Red Hat, Inc.
+ * Author: Jeff Moyer <jmoyer@redhat.com>
+ */
+#include <stdio.h>
+#include <fcntl.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sys/sysinfo.h>
+#include <poll.h>
+#include <assert.h>
+#include <sys/uio.h>
+#include <sys/mman.h>
+#include <linux/mman.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <limits.h>
+#include <sys/time.h>
+#include "../src/liburing.h"
+#include "../src/barrier.h"
+
+#define IORING_MAX_ENTRIES 4096
+
+int
+expect_failed_submit(struct io_uring *ring, int error)
+{
+ int ret;
+
+ ret = io_uring_submit(ring);
+ if (ret == 1) {
+ printf("expected failure, but io_uring_submit succeeded.\n");
+ return 1;
+ }
+
+ if (errno != error) {
+ printf("expected %d, got %d\n", error, errno);
+ return 1;
+ }
+
+ return 0;
+}
+
+int
+expect_fail(int fd, unsigned int to_submit, unsigned int min_complete,
+ unsigned int flags, sigset_t *sig, int error)
+{
+ int ret;
+
+ ret = io_uring_enter(fd, to_submit, min_complete, flags, sig);
+ if (ret != -1) {
+ printf("expected %s, but call succeeded\n", strerror(error));
+ return 1;
+ }
+
+ if (errno != error) {
+ printf("expected %d, got %d\n", error, errno);
+ return 1;
+ }
+
+ return 0;
+}
+
+int
+try_io_uring_enter(int fd, unsigned int to_submit, unsigned int min_complete,
+ unsigned int flags, sigset_t *sig, int expect, int error)
+{
+ int ret;
+
+ printf("io_uring_enter(%d, %u, %u, %u, %p)\n", fd, to_submit,
+ min_complete, flags, sig);
+
+ if (expect == -1)
+ return expect_fail(fd, to_submit, min_complete,
+ flags, sig, error);
+
+ ret = io_uring_enter(fd, to_submit, min_complete, flags, sig);
+ if (ret != expect) {
+ printf("Expected %d, got %d\n", expect, errno);
+ return 1;
+ }
+
+ return 0;
+}
+
+/*
+ * prep a read I/O. index is treated like a block number.
+ */
+int
+setup_file(off_t len)
+{
+ int fd, ret;
+ static char template[32] = "/tmp/io_uring_enter-test.XXXXXX";
+ char buf[4096];
+
+ fd = mkstemp(template);
+ if (fd < 0) {
+ perror("mkstemp");
+ exit(1);
+ }
+ ret = ftruncate(fd, len);
+ if (ret < 0) {
+ perror("ftruncate");
+ exit(1);
+ }
+
+ ret = read(fd, buf, 4096);
+ if (ret != 4096) {
+ printf("read returned %d, expected 4096\n", ret);
+ exit(1);
+ }
+
+ return fd;
+}
+
+void
+io_prep_read(struct io_uring_sqe *sqe, int fd, off_t offset, size_t len)
+{
+ struct iovec *iov;
+
+ iov = malloc(sizeof(*iov));
+ assert(iov);
+
+ iov->iov_base = malloc(len);
+ assert(iov->iov_base);
+ iov->iov_len = len;
+
+ io_uring_prep_readv(sqe, fd, iov, 1, offset);
+ io_uring_sqe_set_data(sqe, iov); // free on completion
+}
+
+void
+reap_events(struct io_uring *ring, unsigned nr)
+{
+ int ret;
+ unsigned left = nr;
+ struct io_uring_cqe *cqe;
+ struct iovec *iov;
+ struct timeval start, now, elapsed;
+
+ printf("Reaping %u I/Os\n", nr);
+ gettimeofday(&start, NULL);
+ while (left) {
+ ret = io_uring_wait_completion(ring, &cqe);
+ if (ret < 0) {
+ printf("io_uring_wait_completion returned %d\n", ret);
+ printf("expected success\n");
+ exit(1);
+ }
+ if (cqe->res != 4096)
+ printf("cqe->res: %d, expected 4096\n", cqe->res);
+ iov = (struct iovec *)cqe->user_data;
+ free(iov->iov_base);
+ free(iov);
+ left--;
+
+ gettimeofday(&now, NULL);
+ timersub(&now, &start, &elapsed);
+ if (elapsed.tv_sec > 10) {
+ printf("Timed out waiting for I/Os to complete.\n");
+ printf("%u expected, %u completed\n", nr, left);
+ break;
+ }
+ }
+}
+
+void
+submit_io(struct io_uring *ring, unsigned nr)
+{
+ int fd, ret;
+ off_t file_len;
+ unsigned i;
+ struct io_uring_sqe *sqe;
+
+ printf("Allocating %u sqes\n", nr);
+ file_len = nr * 4096;
+ fd = setup_file(file_len);
+ for (i = 0; i < nr; i++) {
+ /* allocate an sqe */
+ sqe = io_uring_get_sqe(ring);
+ /* fill it in */
+ io_prep_read(sqe, fd, i * 4096, 4096);
+ }
+
+ /* submit the I/Os */
+ printf("Submitting %u I/Os\n", nr);
+ ret = io_uring_submit(ring);
+ if (ret < 0) {
+ perror("io_uring_enter");
+ exit(1);
+ }
+ printf("Done\n");
+}
+
+int
+main(int argc, char **argv)
+{
+ int ret;
+ unsigned int status = 0;
+ struct io_uring ring;
+ struct io_uring_sq *sq = &ring.sq;
+ unsigned ktail, mask, index;
+ unsigned sq_entries;
+ unsigned completed, dropped;
+
+ ret = io_uring_queue_init(IORING_MAX_ENTRIES, &ring, 0);
+ if (ret < 0) {
+ perror("io_uring_queue_init");
+ exit(1);
+ }
+ mask = *sq->kring_mask;
+
+ /* invalid flags */
+ status |= try_io_uring_enter(ring.ring_fd, 1, 0, ~0U, NULL, -1, EINVAL);
+
+ /* invalid fd, EBADF */
+ status |= try_io_uring_enter(-1, 0, 0, 0, NULL, -1, EBADF);
+
+ /* valid, non-ring fd, EOPNOTSUPP */
+ status |= try_io_uring_enter(0, 0, 0, 0, NULL, -1, EOPNOTSUPP);
+
+ /* to_submit: 0, flags: 0; should get back 0. */
+ status |= try_io_uring_enter(ring.ring_fd, 1, 0, 0, NULL, 0, 0);
+
+ /* fill the sq ring */
+ sq_entries = *ring.sq.kring_entries;
+ submit_io(&ring, sq_entries);
+ printf("Waiting for %u events\n", sq_entries);
+ ret = io_uring_enter(ring.ring_fd, 0, sq_entries,
+ IORING_ENTER_GETEVENTS, NULL);
+ if (ret < 0) {
+ perror("io_uring_enter");
+ status = 1;
+ } else {
+ /*
+ * This is a non-IOPOLL ring, which means that io_uring_enter
+ * should not return until min_complete events are available
+ * in the completion queue.
+ */
+ completed = *ring.cq.ktail - *ring.cq.khead;
+ if (completed != sq_entries) {
+ printf("Submitted %u I/Os, but only got %u completions\n",
+ sq_entries, completed);
+ status = 1;
+ }
+ reap_events(&ring, sq_entries);
+ }
+
+ /*
+ * Add an invalid index to the submission queue. This should
+ * result in the dropped counter increasing.
+ */
+ printf("Submitting invalid sqe index.\n");
+ index = *sq->kring_entries + 1; // invalid index
+ dropped = *sq->kdropped;
+ ktail = *sq->ktail;
+ sq->array[ktail & mask] = index;
+ ++ktail;
+ write_barrier();
+ *sq->ktail = ktail;
+ write_barrier();
+
+ ret = io_uring_enter(ring.ring_fd, 1, 0, 0, NULL);
+ /* now check to see if our sqe was dropped */
+ if (*sq->kdropped == dropped) {
+ printf("dropped counter did not increase\n");
+ status = 1;
+ }
+
+ if (!status) {
+ printf("PASS\n");
+ return 0;
+ }
+
+ printf("FAIL\n");
+ return -1;
+}
diff --git a/test/io_uring_register.c b/test/io_uring_register.c
new file mode 100644
index 0000000..30a225b
--- /dev/null
+++ b/test/io_uring_register.c
@@ -0,0 +1,525 @@
+/*
+ * io_uring_register.c
+ *
+ * Description: Unit tests for the io_uring_register system call.
+ *
+ * Copyright 2019, Red Hat, Inc.
+ * Author: Jeff Moyer <jmoyer@redhat.com>
+ */
+#include <stdio.h>
+#include <fcntl.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sys/sysinfo.h>
+#include <poll.h>
+#include <assert.h>
+#include <sys/uio.h>
+#include <sys/mman.h>
+#include <linux/mman.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <limits.h>
+#include "../src/liburing.h"
+
+static int pagesize;
+static rlim_t mlock_limit;
+static int devnull;
+
+int
+expect_fail(int fd, unsigned int opcode, void *arg,
+ unsigned int nr_args, int error)
+{
+ int ret;
+
+ printf("io_uring_register(%d, %u, %p, %u)\n",
+ fd, opcode, arg, nr_args);
+ ret = io_uring_register(fd, opcode, arg, nr_args);
+ if (ret != -1) {
+ int ret2 = 0;
+
+ printf("expected %s, but call succeeded\n", strerror(error));
+ if (opcode == IORING_REGISTER_BUFFERS) {
+ ret2 = io_uring_register(fd, IORING_UNREGISTER_BUFFERS,
+ 0, 0);
+ } else if (opcode == IORING_REGISTER_FILES) {
+ ret2 = io_uring_register(fd, IORING_UNREGISTER_FILES,
+ 0, 0);
+ }
+ if (ret2) {
+ printf("internal error: failed to unregister\n");
+ exit(1);
+ }
+ return 1;
+ }
+
+ if (errno != error) {
+ printf("expected %d, got %d\n", error, errno);
+ return 1;
+ }
+ return 0;
+}
+
+int
+new_io_uring(int entries, struct io_uring_params *p)
+{
+ int fd;
+
+ fd = io_uring_setup(entries, p);
+ if (fd < 0) {
+ perror("io_uring_setup");
+ exit(1);
+ }
+ return fd;
+}
+
+#define MAXFDS (UINT_MAX * sizeof(int))
+
+void *
+map_filebacked(size_t size)
+{
+ int fd, ret;
+ void *addr;
+ char template[32] = "io_uring_register-test-XXXXXXXX";
+
+ fd = mkstemp(template);
+ if (fd < 0) {
+ perror("mkstemp");
+ return NULL;
+ }
+ unlink(template);
+
+ ret = ftruncate(fd, size);
+ if (ret < 0) {
+ perror("ftruncate");
+ close(fd);
+ return NULL;
+ }
+
+ addr = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
+ if (addr == MAP_FAILED) {
+ perror("mmap");
+ close(fd);
+ return NULL;
+ }
+
+ close(fd);
+ return addr;
+}
+
+/*
+ * NOTE: this is now limited by SCM_MAX_FD (253). Keep the code for now,
+ * but probably should augment it to test 253 and 254, specifically.
+ */
+int
+test_max_fds(int uring_fd)
+{
+ int status = 1;
+ int ret;
+ void *fd_as; /* file descriptor address space */
+ int fdtable_fd; /* fd for the file that will be mapped over and over */
+ int io_fd; /* the valid fd for I/O -- /dev/null */
+ int *fds; /* used to map the file into the address space */
+ char template[32] = "io_uring_register-test-XXXXXXXX";
+ unsigned long long i, nr_maps, nr_fds;
+
+ /*
+ * First, mmap anonymous the full size. That will guarantee the
+ * mapping will fit in the memory area selected by mmap. Then,
+ * over-write that mapping using a file-backed mapping, 128MiB at
+ * a time using MAP_FIXED.
+ */
+ fd_as = mmap(NULL, UINT_MAX * sizeof(int), PROT_READ|PROT_WRITE,
+ MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+ if (fd_as == MAP_FAILED) {
+ perror("mmap fd_as");
+ exit(1);
+ }
+ printf("allocated %lu bytes of address space\n", UINT_MAX * sizeof(int));
+
+ fdtable_fd = mkstemp(template);
+ if (fdtable_fd < 0) {
+ perror("mkstemp");
+ exit(1);
+ }
+ unlink(template);
+ ret = ftruncate(fdtable_fd, 128*1024*1024);
+ if (ret < 0) {
+ perror("ftruncate");
+ exit(1);
+ }
+
+ io_fd = open("/dev/null", O_RDWR);
+ if (io_fd < 0) {
+ perror("open /dev/null");
+ exit(1);
+ }
+ fds = mmap(fd_as, 128*1024*1024, PROT_READ|PROT_WRITE,
+ MAP_SHARED|MAP_FIXED, fdtable_fd, 0);
+ if (fds == MAP_FAILED) {
+ perror("mmap fdtable");
+ exit(1);
+ }
+
+ /* fill the fd table */
+ nr_fds = 128*1024*1024 / sizeof(int);
+ for (i = 0; i < nr_fds; i++)
+ fds[i] = io_fd;
+
+ /* map the file through the rest of the address space */
+ nr_maps = (UINT_MAX * sizeof(int)) / (128*1024*1024);
+ for (i = 0; i < nr_maps; i++) {
+ fds = &fds[nr_fds]; /* advance fds by 128MiB */
+ fds = mmap(fds, 128*1024*1024, PROT_READ|PROT_WRITE,
+ MAP_SHARED|MAP_FIXED, fdtable_fd, 0);
+ if (fds == MAP_FAILED) {
+ printf("mmap failed at offset %lu\n", (char *)fd_as - (char *)fds);
+ exit(1);
+ }
+ }
+
+ /* Now fd_as points to the file descriptor array. */
+ /*
+ * We may not be able to map all of these files. Let's back off
+ * until success.
+ */
+ nr_fds = UINT_MAX;
+ while (nr_fds) {
+ ret = io_uring_register(uring_fd, IORING_REGISTER_FILES,
+ fd_as, nr_fds);
+ if (ret != 0) {
+ nr_fds /= 2;
+ continue;
+ }
+ printf("io_uring_register(%d, IORING_REGISTER_FILES, %p, %llu)"
+ "...succeeded\n", uring_fd, fd_as, nr_fds);
+ status = 0;
+ printf("io_uring_register(%d, IORING_UNREGISTER_FILES, 0, 0)...",
+ uring_fd);
+ ret = io_uring_register(uring_fd, IORING_UNREGISTER_FILES, 0, 0);
+ if (ret < 0) {
+ ret = errno;
+ printf("failed\n");
+ errno = ret;
+ perror("io_uring_register UNREGISTER_FILES");
+ exit(1);
+ }
+ printf("succeeded\n");
+ break;
+ }
+
+ close(io_fd);
+ close(fdtable_fd);
+ ret = munmap(fd_as, UINT_MAX * sizeof(int));
+ if (ret != 0) {
+ printf("munmap(%lu) failed\n", UINT_MAX * sizeof(int));
+ exit(1);
+ }
+
+ return status;
+}
+
+int
+test_memlock_exceeded(int fd)
+{
+ int ret;
+ void *buf;
+ struct iovec iov;
+
+ iov.iov_len = mlock_limit * 2;
+ buf = malloc(iov.iov_len);
+ assert(buf);
+ iov.iov_base = buf;
+
+ while (iov.iov_len) {
+ ret = io_uring_register(fd, IORING_REGISTER_BUFFERS, &iov, 1);
+ if (ret < 0) {
+ if (errno == ENOMEM) {
+ printf("io_uring_register of %lu bytes failed "
+ "with ENOMEM (expected).\n", iov.iov_len);
+ iov.iov_len /= 2;
+ continue;
+ }
+ printf("expected success or EFAULT, got %d\n", errno);
+ free(buf);
+ return 1;
+ }
+ printf("successfully registered %lu bytes (%d).\n",
+ iov.iov_len, ret);
+ ret = io_uring_register(fd, IORING_UNREGISTER_BUFFERS, NULL, 0);
+ if (ret != 0) {
+ printf("error: unregister failed with %d\n", errno);
+ free(buf);
+ return 1;
+ }
+ break;
+ }
+ if (!iov.iov_len)
+ printf("Unable to register buffers. Check memlock rlimit.\n");
+
+ free(buf);
+ return 0;
+}
+
+int
+test_iovec_nr(int fd)
+{
+ int i, ret, status = 0;
+ unsigned int nr = UIO_MAXIOV + 1;
+ struct iovec *iovs;
+ void *buf;
+
+ buf = malloc(pagesize);
+ assert(buf);
+
+ iovs = malloc(nr * sizeof(struct iovec));
+ assert(iovs);
+
+ for (i = 0; i < nr; i++) {
+ iovs[i].iov_base = buf;
+ iovs[i].iov_len = pagesize;
+ }
+
+ status |= expect_fail(fd, IORING_REGISTER_BUFFERS, iovs, nr, EINVAL);
+
+ /* reduce to UIO_MAXIOV */
+ nr--;
+ printf("io_uring_register(%d, %u, %p, %u)\n",
+ fd, IORING_REGISTER_BUFFERS, iovs, nr);
+ ret = io_uring_register(fd, IORING_REGISTER_BUFFERS, iovs, nr);
+ if (ret != 0) {
+ printf("expected success, got %d\n", errno);
+ status = 1;
+ } else
+ io_uring_register(fd, IORING_UNREGISTER_BUFFERS, 0, 0);
+
+ free(buf);
+ free(iovs);
+ return status;
+}
+
+/*
+ * io_uring limit is 1G. iov_len limit is ~OUL, I think
+ */
+int
+test_iovec_size(int fd)
+{
+ unsigned int status = 0;
+ int ret;
+ struct iovec iov;
+ void *buf;
+
+ /* NULL pointer for base */
+ iov.iov_base = 0;
+ iov.iov_len = 4096;
+ status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, EFAULT);
+
+ /* valid base, 0 length */
+ iov.iov_base = &buf;
+ iov.iov_len = 0;
+ status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, EFAULT);
+
+ /* valid base, length exceeds size */
+ /* this requires an unampped page directly after buf */
+ buf = mmap(NULL, 2 * pagesize, PROT_READ|PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ assert(buf != MAP_FAILED);
+ ret = munmap(buf + pagesize, pagesize);
+ assert(ret == 0);
+ iov.iov_base = buf;
+ iov.iov_len = 2 * pagesize;
+ status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, EFAULT);
+ munmap(buf, pagesize);
+
+ /* huge page */
+ buf = mmap(NULL, 2*1024*1024, PROT_READ|PROT_WRITE,
+ MAP_PRIVATE | MAP_HUGETLB | MAP_HUGE_2MB | MAP_ANONYMOUS,
+ -1, 0);
+ if (buf == MAP_FAILED) {
+ printf("Unable to map a huge page. Try increasing "
+ "/proc/sys/vm/nr_hugepages by at least 1.\n");
+ printf("Skipping the hugepage test\n");
+ } else {
+ /*
+ * This should succeed, so long as RLIMIT_MEMLOCK is
+ * not exceeded
+ */
+ iov.iov_base = buf;
+ iov.iov_len = 2*1024*1024;
+ ret = io_uring_register(fd, IORING_REGISTER_BUFFERS, &iov, 1);
+ if (ret < 0) {
+ if (errno == ENOMEM)
+ printf("Unable to test registering of a huge "
+ "page. Try increasing the "
+ "RLIMIT_MEMLOCK resource limit by at "
+ "least 2MB.");
+ else {
+ printf("expected success, got %d\n", errno);
+ status = 1;
+ }
+ } else {
+ printf("Success!\n");
+ ret = io_uring_register(fd, IORING_UNREGISTER_BUFFERS,
+ 0, 0);
+ if (ret < 0) {
+ perror("io_uring_unregister");
+ status = 1;
+ }
+ }
+ }
+ ret = munmap(iov.iov_base, iov.iov_len);
+ assert(ret == 0);
+
+ /* file-backed buffers -- not supported */
+ buf = map_filebacked(2*1024*1024);
+ if (!buf)
+ status = 1;
+ iov.iov_base = buf;
+ iov.iov_len = 2*1024*1024;
+ printf("reserve file-backed buffers\n");
+ status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, EOPNOTSUPP);
+ munmap(buf, 2*1024*1024);
+
+ /* bump up against the soft limit and make sure we get EFAULT
+ * or whatever we're supposed to get. NOTE: this requires
+ * running the test as non-root. */
+ if (getuid() != 0)
+ status |= test_memlock_exceeded(fd);
+
+ return status;
+}
+
+void
+dump_sqe(struct io_uring_sqe *sqe)
+{
+ printf("\topcode: %d\n", sqe->opcode);
+ printf("\tflags: 0x%.8x\n", sqe->flags);
+ printf("\tfd: %d\n", sqe->fd);
+ if (sqe->opcode == IORING_OP_POLL_ADD)
+ printf("\tpoll_events: 0x%.8x\n", sqe->poll_events);
+}
+
+int
+ioring_poll(struct io_uring *ring, int fd, int fixed)
+{
+ int ret;
+ struct io_uring_sqe *sqe;
+ struct io_uring_cqe *cqe;
+
+ sqe = io_uring_get_sqe(ring);
+ memset(sqe, 0, sizeof(*sqe));
+ sqe->opcode = IORING_OP_POLL_ADD;
+ if (fixed)
+ sqe->flags = IOSQE_FIXED_FILE;
+ sqe->fd = fd;
+ sqe->poll_events = POLLIN|POLLOUT;
+
+ printf("io_uring_submit:\n");
+ dump_sqe(sqe);
+ ret = io_uring_submit(ring);
+ if (ret != 1) {
+ printf("failed to submit poll sqe: %d.\n", errno);
+ return 1;
+ }
+
+ ret = io_uring_wait_completion(ring, &cqe);
+ if (ret < 0) {
+ printf("io_uring_wait_completion failed with %d\n", ret);
+ return 1;
+ }
+ if (cqe->res != POLLOUT) {
+ printf("io_uring_wait_completion: expected 0x%.8x, got 0x%.8x\n",
+ POLLOUT, cqe->res);
+ return 1;
+ }
+
+ return 0;
+}
+
+int
+test_poll_ringfd(void)
+{
+ int status = 0;
+ int ret;
+ int fd;
+ struct io_uring ring;
+
+ ret = io_uring_queue_init(1, &ring, 0);
+ if (ret) {
+ perror("io_uring_queue_init");
+ return 1;
+ }
+ fd = ring.ring_fd;
+
+ /* try polling the ring fd */
+ status = ioring_poll(&ring, fd, 0);
+
+ /*
+ * now register the ring fd, and try the poll again. This should
+ * fail, because the kernel does not allow registering of the
+ * ring_fd.
+ */
+ status |= expect_fail(fd, IORING_REGISTER_FILES, &fd, 1, EBADF);
+
+ /* tear down queue */
+ io_uring_queue_exit(&ring);
+
+ return status;
+}
+
+int
+main(int argc, char **argv)
+{
+ int fd, ret;
+ unsigned int status = 0;
+ struct io_uring_params p;
+ struct rlimit rlim;
+
+ /* setup globals */
+ pagesize = getpagesize();
+ ret = getrlimit(RLIMIT_MEMLOCK, &rlim);
+ if (ret < 0) {
+ perror("getrlimit");
+ return 1;
+ }
+ mlock_limit = rlim.rlim_cur;
+ printf("RELIMIT_MEMLOCK: %lu (%lu)\n", rlim.rlim_cur, rlim.rlim_max);
+ devnull = open("/dev/null", O_RDWR);
+ if (devnull < 0) {
+ perror("open /dev/null");
+ exit(1);
+ }
+
+ /* invalid fd */
+ status |= expect_fail(-1, 0, NULL, 0, EBADF);
+ /* valid fd that is not an io_uring fd */
+ status |= expect_fail(devnull, 0, NULL, 0, EOPNOTSUPP);
+
+ /* invalid opcode */
+ memset(&p, 0, sizeof(p));
+ fd = new_io_uring(1, &p);
+ ret = expect_fail(fd, ~0U, NULL, 0, EINVAL);
+ if (ret) {
+ /* if this succeeds, tear down the io_uring instance
+ * and start clean for the next test. */
+ close(fd);
+ fd = new_io_uring(1, &p);
+ }
+
+ /* IORING_REGISTER_BUFFERS */
+ status |= test_iovec_size(fd);
+ status |= test_iovec_nr(fd);
+ /* IORING_REGISTER_FILES */
+ status |= test_max_fds(fd);
+ close(fd);
+ /* uring poll on the uring fd */
+ status |= test_poll_ringfd();
+
+ if (!status)
+ printf("PASS\n");
+ else
+ printf("FAIL\n");
+
+ return status;
+}
diff --git a/test/io_uring_setup.c b/test/io_uring_setup.c
new file mode 100644
index 0000000..2b76402
--- /dev/null
+++ b/test/io_uring_setup.c
@@ -0,0 +1,161 @@
+/*
+ * io_uring_setup.c
+ *
+ * Description: Unit tests for the io_uring_setup system call.
+ *
+ * Copyright 2019, Red Hat, Inc.
+ * Author: Jeff Moyer <jmoyer@redhat.com>
+ */
+#include <stdio.h>
+#include <fcntl.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sys/sysinfo.h>
+#include "../src/liburing.h"
+
+/*
+ * Attempt the call with the given args. Return 0 when expect matches
+ * the return value of the system call, 1 otherwise.
+ */
+char *
+flags_string(struct io_uring_params *p)
+{
+ static char flagstr[64];
+ int add_pipe = 0;
+
+ memset(flagstr, 0, sizeof(flagstr));
+
+ if (!p || p->flags == 0)
+ return "none";
+
+ /*
+ * If unsupported flags are present, just print the bitmask.
+ */
+ if (p->flags & ~(IORING_SETUP_IOPOLL | IORING_SETUP_SQPOLL |
+ IORING_SETUP_SQ_AFF)) {
+ snprintf(flagstr, 64, "0x%.8x", p->flags);
+ return flagstr;
+ }
+
+ if (p->flags & IORING_SETUP_IOPOLL) {
+ strncat(flagstr, "IORING_SETUP_IOPOLL", 64 - strlen(flagstr));
+ add_pipe = 1;
+ }
+ if (p->flags & IORING_SETUP_SQPOLL) {
+ if (add_pipe)
+ strncat(flagstr, "|", 64 - strlen(flagstr));
+ strncat(flagstr, "IORING_SETUP_SQPOLL", 64 - strlen(flagstr));
+ }
+ if (p->flags & IORING_SETUP_SQ_AFF) {
+ if (add_pipe)
+ strncat(flagstr, "|", 64 - strlen(flagstr));
+ strncat(flagstr, "IORING_SETUP_SQ_AFF", 64 - strlen(flagstr));
+ }
+
+ return flagstr;
+}
+
+char *
+dump_resv(struct io_uring_params *p)
+{
+ static char resvstr[4096];
+
+ if (!p)
+ return "";
+
+ sprintf(resvstr, "0x%.8x 0x%.8x 0x%.8x 0x%.8x 0x%.8x", p->resv[0],
+ p->resv[1], p->resv[2], p->resv[3], p->resv[4]);
+
+ return resvstr;
+}
+
+/* bogus: setup returns a valid fd on success... expect can't predict the
+ fd we'll get, so this really only takes 1 parameter: error */
+int
+try_io_uring_setup(unsigned entries, struct io_uring_params *p, int expect, int error)
+{
+ int ret;
+
+ printf("io_uring_setup(%u, %p), flags: %s, resv: %s, sq_thread_cpu: %u\n",
+ entries, p, flags_string(p), dump_resv(p),
+ p ? p->sq_thread_cpu : 0);
+
+ ret = io_uring_setup(entries, p);
+ if (ret != expect) {
+ printf("expected %d, got %d\n", expect, ret);
+ /* if we got a valid uring, close it */
+ if (ret > 0)
+ close(ret);
+ return 1;
+ }
+ if (expect == -1 && error != errno) {
+ printf("expected errno %d, got %d\n", error, errno);
+ return 1;
+ }
+
+ return 0;
+}
+
+int
+main(int argc, char **argv)
+{
+ int fd;
+ unsigned int status = 0;
+ struct io_uring_params p;
+
+ memset(&p, 0, sizeof(p));
+ status |= try_io_uring_setup(0, &p, -1, EINVAL);
+ status |= try_io_uring_setup(1, NULL, -1, EFAULT);
+
+ /* resv array is non-zero */
+ memset(&p, 0, sizeof(p));
+ p.resv[0] = p.resv[1] = p.resv[2] = p.resv[3] = p.resv[4] = 1;
+ status |= try_io_uring_setup(1, &p, -1, EINVAL);
+
+ /* invalid flags */
+ memset(&p, 0, sizeof(p));
+ p.flags = ~0U;
+ status |= try_io_uring_setup(1, &p, -1, EINVAL);
+
+ /* IORING_SETUP_SQ_AFF set but not IORING_SETUP_SQPOLL */
+ memset(&p, 0, sizeof(p));
+ p.flags = IORING_SETUP_SQ_AFF;
+ status |= try_io_uring_setup(1, &p, -1, EINVAL);
+
+ /* attempt to bind to invalid cpu */
+ memset(&p, 0, sizeof(p));
+ p.flags = IORING_SETUP_SQPOLL | IORING_SETUP_SQ_AFF;
+ p.sq_thread_cpu = get_nprocs_conf();
+ status |= try_io_uring_setup(1, &p, -1, EINVAL);
+
+ /* I think we can limit a process to a set of cpus. I assume
+ * we shouldn't be able to setup a kernel thread outside of that.
+ * try to do that. (task->cpus_allowed) */
+
+ /* read/write on io_uring_fd */
+ memset(&p, 0, sizeof(p));
+ fd = io_uring_setup(1, &p);
+ if (fd < 0) {
+ printf("io_uring_setup failed with %d, expected success\n",
+ errno);
+ status = 1;
+ } else {
+ char buf[4096];
+ int ret;
+ ret = read(fd, buf, 4096);
+ if (ret >= 0) {
+ printf("read from io_uring fd succeeded. expected fail\n");
+ status = 1;
+ }
+ }
+
+ if (!status) {
+ printf("PASS\n");
+ return 0;
+ }
+
+ printf("FAIL\n");
+ return -1;
+}