diff options
author | Jens Axboe <axboe@kernel.dk> | 2022-05-16 09:34:02 -0600 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2022-05-16 09:34:02 -0600 |
commit | db2153d86f04fbfccfb67e52caafdeeb658d15e4 (patch) | |
tree | d3290a1d561507311a4014b4d603d3e0cd9e92a4 | |
parent | 4d6a12e5d6bd7d7260d202cf888c911a039376ea (diff) | |
parent | 797439af0962b65451425032c1559f0800fca13e (diff) | |
download | liburing-db2153d86f04fbfccfb67e52caafdeeb658d15e4.tar.gz liburing-db2153d86f04fbfccfb67e52caafdeeb658d15e4.tar.bz2 |
Merge branch 'big-sqe'
* big-sqe:
test/nop: make less verbose and don't fail on older kernels
liburing: Update io_uring.h
liburing: Test all configurations with NOP test
liburing: add large CQE tests to nop test
liburing: index large CQE's correctly
liburing: return correct ring size for large CQE's
liburing: increase mmap size for large CQE's
liburing: Update io_uring.h with large CQE kernel changes
test/nop: add basic IORING_SETUP_SQE128 tests
setup: add basic support for SQE128
io_uring.h: add IORING_SETUP_SQE128
-rw-r--r-- | src/include/liburing.h | 31 | ||||
-rw-r--r-- | src/include/liburing/io_uring.h | 25 | ||||
-rw-r--r-- | src/queue.c | 6 | ||||
-rw-r--r-- | src/setup.c | 41 | ||||
-rw-r--r-- | test/nop.c | 79 | ||||
-rw-r--r-- | test/test.h | 35 |
6 files changed, 192 insertions, 25 deletions
diff --git a/src/include/liburing.h b/src/include/liburing.h index 89b2e5b..1aedc35 100644 --- a/src/include/liburing.h +++ b/src/include/liburing.h @@ -189,6 +189,16 @@ int __io_uring_get_cqe(struct io_uring *ring, #define LIBURING_UDATA_TIMEOUT ((__u64) -1) +/* + * Calculates the step size for CQE iteration. + * For standard CQE's its 1, for big CQE's its two. + */ +#define io_uring_cqe_shift(ring) \ + (!!((ring)->flags & IORING_SETUP_CQE32)) + +#define io_uring_cqe_index(ring,ptr,mask) \ + (((ptr) & (mask)) << io_uring_cqe_shift(ring)) + #define io_uring_for_each_cqe(ring, head, cqe) \ /* \ * io_uring_smp_load_acquire() enforces the order of tail \ @@ -196,7 +206,7 @@ int __io_uring_get_cqe(struct io_uring *ring, */ \ for (head = *(ring)->cq.khead; \ (cqe = (head != io_uring_smp_load_acquire((ring)->cq.ktail) ? \ - &(ring)->cq.cqes[head & (*(ring)->cq.kring_mask)] : NULL)); \ + &(ring)->cq.cqes[io_uring_cqe_index(ring, head, *(ring)->cq.kring_mask)] : NULL)); \ head++) \ /* @@ -901,6 +911,10 @@ static inline int __io_uring_peek_cqe(struct io_uring *ring, int err = 0; unsigned available; unsigned mask = *ring->cq.kring_mask; + int shift = 0; + + if (ring->flags & IORING_SETUP_CQE32) + shift = 1; do { unsigned tail = io_uring_smp_load_acquire(ring->cq.ktail); @@ -911,7 +925,7 @@ static inline int __io_uring_peek_cqe(struct io_uring *ring, if (!available) break; - cqe = &ring->cq.cqes[head & mask]; + cqe = &ring->cq.cqes[(head & mask) << shift]; if (!(ring->features & IORING_FEAT_EXT_ARG) && cqe->user_data == LIBURING_UDATA_TIMEOUT) { if (cqe->res < 0) @@ -969,13 +983,20 @@ static inline struct io_uring_sqe *_io_uring_get_sqe(struct io_uring *ring) struct io_uring_sq *sq = &ring->sq; unsigned int head = io_uring_smp_load_acquire(sq->khead); unsigned int next = sq->sqe_tail + 1; - struct io_uring_sqe *sqe = NULL; + int shift = 0; + + if (ring->flags & IORING_SETUP_SQE128) + shift = 1; if (next - head <= *sq->kring_entries) { - sqe = &sq->sqes[sq->sqe_tail & *sq->kring_mask]; + struct io_uring_sqe *sqe; + + sqe = &sq->sqes[(sq->sqe_tail & *sq->kring_mask) << shift]; sq->sqe_tail = next; + return sqe; } - return sqe; + + return NULL; } #ifndef LIBURING_INTERNAL diff --git a/src/include/liburing/io_uring.h b/src/include/liburing/io_uring.h index 166d8ca..0acf05d 100644 --- a/src/include/liburing/io_uring.h +++ b/src/include/liburing/io_uring.h @@ -108,8 +108,25 @@ enum { #define IORING_SETUP_ATTACH_WQ (1U << 5) /* attach to existing wq */ #define IORING_SETUP_R_DISABLED (1U << 6) /* start with ring disabled */ #define IORING_SETUP_SUBMIT_ALL (1U << 7) /* continue submit on error */ +/* + * Cooperative task running. When requests complete, they often require + * forcing the submitter to transition to the kernel to complete. If this + * flag is set, work will be done when the task transitions anyway, rather + * than force an inter-processor interrupt reschedule. This avoids interrupting + * a task running in userspace, and saves an IPI. + */ +#define IORING_SETUP_COOP_TASKRUN (1U << 8) +/* + * If COOP_TASKRUN is set, get notified if task work is available for + * running and a kernel transition would be needed to run it. This sets + * IORING_SQ_TASKRUN in the sq ring flags. Not valid with COOP_TASKRUN. + */ +#define IORING_SETUP_TASKRUN_FLAG (1U << 9) -enum { +#define IORING_SETUP_SQE128 (1U << 10) /* SQEs are 128 byte */ +#define IORING_SETUP_CQE32 (1U << 11) /* CQEs are 32 byte */ + +enum io_uring_op { IORING_OP_NOP, IORING_OP_READV, IORING_OP_WRITEV, @@ -226,6 +243,12 @@ struct io_uring_cqe { __u64 user_data; /* sqe->data submission passed back */ __s32 res; /* result code for this event */ __u32 flags; + + /* + * If the ring is initialized with IORING_SETUP_CQE32, then this field + * contains 16-bytes of padding, doubling the size of the CQE. + */ + __u64 big_cqe[]; }; /* diff --git a/src/queue.c b/src/queue.c index 36b4b29..ce0ecf6 100644 --- a/src/queue.c +++ b/src/queue.c @@ -133,6 +133,10 @@ unsigned io_uring_peek_batch_cqe(struct io_uring *ring, { unsigned ready; bool overflow_checked = false; + int shift = 0; + + if (ring->flags & IORING_SETUP_CQE32) + shift = 1; again: ready = io_uring_cq_ready(ring); @@ -145,7 +149,7 @@ again: count = count > ready ? ready : count; last = head + count; for (;head != last; head++, i++) - cqes[i] = &ring->cq.cqes[head & mask]; + cqes[i] = &ring->cq.cqes[(head & mask) << shift]; return count; } diff --git a/src/setup.c b/src/setup.c index 35981da..d2adc7f 100644 --- a/src/setup.c +++ b/src/setup.c @@ -21,8 +21,12 @@ static int io_uring_mmap(int fd, struct io_uring_params *p, size_t size; int ret; + size = sizeof(struct io_uring_cqe); + if (p->flags & IORING_SETUP_CQE32) + size += sizeof(struct io_uring_cqe); + sq->ring_sz = p->sq_off.array + p->sq_entries * sizeof(unsigned); - cq->ring_sz = p->cq_off.cqes + p->cq_entries * sizeof(struct io_uring_cqe); + cq->ring_sz = p->cq_off.cqes + p->cq_entries * size; if (p->features & IORING_FEAT_SINGLE_MMAP) { if (cq->ring_sz > sq->ring_sz) @@ -56,8 +60,10 @@ static int io_uring_mmap(int fd, struct io_uring_params *p, sq->kdropped = sq->ring_ptr + p->sq_off.dropped; sq->array = sq->ring_ptr + p->sq_off.array; - size = p->sq_entries * sizeof(struct io_uring_sqe); - sq->sqes = __sys_mmap(0, size, PROT_READ | PROT_WRITE, + size = sizeof(struct io_uring_sqe); + if (p->flags & IORING_SETUP_SQE128) + size += 64; + sq->sqes = __sys_mmap(0, size * p->sq_entries, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_SQES); if (IS_ERR(sq->sqes)) { ret = PTR_ERR(sq->sqes); @@ -109,7 +115,10 @@ int io_uring_ring_dontfork(struct io_uring *ring) if (!ring->sq.ring_ptr || !ring->sq.sqes || !ring->cq.ring_ptr) return -EINVAL; - len = *ring->sq.kring_entries * sizeof(struct io_uring_sqe); + len = sizeof(struct io_uring_sqe); + if (ring->flags & IORING_SETUP_SQE128) + len += 64; + len *= *ring->sq.kring_entries; ret = __sys_madvise(ring->sq.sqes, len, MADV_DONTFORK); if (ret < 0) return ret; @@ -166,8 +175,12 @@ void io_uring_queue_exit(struct io_uring *ring) { struct io_uring_sq *sq = &ring->sq; struct io_uring_cq *cq = &ring->cq; + size_t sqe_size; - __sys_munmap(sq->sqes, *sq->kring_entries * sizeof(struct io_uring_sqe)); + sqe_size = sizeof(struct io_uring_sqe); + if (ring->flags & IORING_SETUP_SQE128) + sqe_size += 64; + __sys_munmap(sq->sqes, sqe_size * *sq->kring_entries); io_uring_unmap_rings(sq, cq); /* * Not strictly required, but frees up the slot we used now rather @@ -239,17 +252,23 @@ static size_t npages(size_t size, unsigned page_size) #define KRING_SIZE 320 -static size_t rings_size(unsigned entries, unsigned cq_entries, - unsigned page_size) +static size_t rings_size(struct io_uring_params *p, unsigned entries, + unsigned cq_entries, unsigned page_size) { size_t pages, sq_size, cq_size; - cq_size = KRING_SIZE; - cq_size += cq_entries * sizeof(struct io_uring_cqe); + cq_size = sizeof(struct io_uring_cqe); + if (p->flags & IORING_SETUP_CQE32) + cq_size += sizeof(struct io_uring_cqe); + cq_size *= cq_entries; + cq_size += KRING_SIZE; cq_size = (cq_size + 63) & ~63UL; pages = (size_t) 1 << npages(cq_size, page_size); - sq_size = sizeof(struct io_uring_sqe) * entries; + sq_size = sizeof(struct io_uring_sqe); + if (p->flags & IORING_SETUP_SQE128) + sq_size += 64; + sq_size *= entries; pages += (size_t) 1 << npages(sq_size, page_size); return pages * page_size; } @@ -317,7 +336,7 @@ ssize_t io_uring_mlock_size_params(unsigned entries, struct io_uring_params *p) } page_size = get_page_size(); - return rings_size(entries, cq_entries, page_size); + return rings_size(p, entries, cq_entries, page_size); } /* @@ -11,12 +11,16 @@ #include <fcntl.h> #include "liburing.h" +#include "test.h" + +static int seq; static int test_single_nop(struct io_uring *ring) { struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; int ret; + bool cqe32 = (ring->flags & IORING_SETUP_CQE32); sqe = io_uring_get_sqe(ring); if (!sqe) { @@ -25,6 +29,11 @@ static int test_single_nop(struct io_uring *ring) } io_uring_prep_nop(sqe); + if (cqe32) { + sqe->addr = 1234; + sqe->addr2 = 5678; + } + sqe->user_data = ++seq; ret = io_uring_submit(ring); if (ret <= 0) { @@ -37,7 +46,21 @@ static int test_single_nop(struct io_uring *ring) fprintf(stderr, "wait completion %d\n", ret); goto err; } + if (!cqe->user_data) { + fprintf(stderr, "Unexpected 0 user_data\n"); + goto err; + } + if (cqe32) { + if (cqe->big_cqe[0] != 1234) { + fprintf(stderr, "Unexpected extra1\n"); + goto err; + } + if (cqe->big_cqe[1] != 5678) { + fprintf(stderr, "Unexpected extra2\n"); + goto err; + } + } io_uring_cqe_seen(ring, cqe); return 0; err: @@ -49,6 +72,7 @@ static int test_barrier_nop(struct io_uring *ring) struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; int ret, i; + bool cqe32 = (ring->flags & IORING_SETUP_CQE32); for (i = 0; i < 8; i++) { sqe = io_uring_get_sqe(ring); @@ -60,6 +84,11 @@ static int test_barrier_nop(struct io_uring *ring) io_uring_prep_nop(sqe); if (i == 4) sqe->flags = IOSQE_IO_DRAIN; + if (cqe32) { + sqe->addr = 1234; + sqe->addr2 = 5678; + } + sqe->user_data = ++seq; } ret = io_uring_submit(ring); @@ -77,6 +106,20 @@ static int test_barrier_nop(struct io_uring *ring) fprintf(stderr, "wait completion %d\n", ret); goto err; } + if (!cqe->user_data) { + fprintf(stderr, "Unexpected 0 user_data\n"); + goto err; + } + if (cqe32) { + if (cqe->big_cqe[0] != 1234) { + fprintf(stderr, "Unexpected extra1\n"); + goto err; + } + if (cqe->big_cqe[1] != 5678) { + fprintf(stderr, "Unexpected extra2\n"); + goto err; + } + } io_uring_cqe_seen(ring, cqe); } @@ -85,16 +128,17 @@ err: return 1; } -int main(int argc, char *argv[]) +static int test_ring(unsigned flags) { struct io_uring ring; + struct io_uring_params p = { }; int ret; - if (argc > 1) - return 0; - - ret = io_uring_queue_init(8, &ring, 0); + p.flags = flags; + ret = io_uring_queue_init_params(8, &ring, &p); if (ret) { + if (ret == -EINVAL) + return 0; fprintf(stderr, "ring setup failed: %d\n", ret); return 1; } @@ -102,13 +146,34 @@ int main(int argc, char *argv[]) ret = test_single_nop(&ring); if (ret) { fprintf(stderr, "test_single_nop failed\n"); - return ret; + goto err; } ret = test_barrier_nop(&ring); if (ret) { fprintf(stderr, "test_barrier_nop failed\n"); - return ret; + goto err; + } + +err: + io_uring_queue_exit(&ring); + return ret; +} + +int main(int argc, char *argv[]) +{ + int ret; + + if (argc > 1) + return 0; + + FOR_ALL_TEST_CONFIGS { + ret = test_ring(IORING_GET_TEST_CONFIG_FLAGS()); + if (ret) { + fprintf(stderr, "Normal ring test failed: %s\n", + IORING_GET_TEST_CONFIG_DESCRIPTION()); + return ret; + } } return 0; diff --git a/test/test.h b/test/test.h new file mode 100644 index 0000000..3628163 --- /dev/null +++ b/test/test.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Description: Test configs for tests. + */ +#ifndef LIBURING_TEST_H +#define LIBURING_TEST_H + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct io_uring_test_config { + unsigned int flags; + const char *description; +} io_uring_test_config; + +io_uring_test_config io_uring_test_configs[] = { + { 0, "default" }, + { IORING_SETUP_SQE128, "large SQE"}, + { IORING_SETUP_CQE32, "large CQE"}, + { IORING_SETUP_SQE128 | IORING_SETUP_CQE32, "large SQE/CQE" }, +}; + +#define FOR_ALL_TEST_CONFIGS \ + for (int i = 0; i < sizeof(io_uring_test_configs) / sizeof(io_uring_test_configs[0]); i++) + +#define IORING_GET_TEST_CONFIG_FLAGS() (io_uring_test_configs[i].flags) +#define IORING_GET_TEST_CONFIG_DESCRIPTION() (io_uring_test_configs[i].description) + + +#ifdef __cplusplus +} +#endif + +#endif |