summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorXiaoguang Wang <xiaoguang.wang@linux.alibaba.com>2020-03-02 12:18:11 +0800
committerJens Axboe <axboe@kernel.dk>2020-03-02 07:03:12 -0700
commit8a031500a30416844efdcf3b30e42b7cc1dece5b (patch)
tree11335f699395ea585032758bf9520f27d5fb8aa3
parent4c7ae1de848dc852bcc5853ab0a012102764fa93 (diff)
downloadliburing-8a031500a30416844efdcf3b30e42b7cc1dece5b.tar.gz
liburing-8a031500a30416844efdcf3b30e42b7cc1dece5b.tar.bz2
__io_uring_get_cqe: eliminate unnecessary io_uring_enter() syscalls
When user applis programming mode, like sumbit one sqe and wait its completion event, __io_uring_get_cqe() will result in many unnecessary syscalls, see below test program: int main(int argc, char *argv[]) { struct io_uring ring; int fd, ret; struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; struct iovec iov; off_t offset, filesize = 0; void *buf; if (argc < 2) { printf("%s: file\n", argv[0]); return 1; } ret = io_uring_queue_init(4, &ring, 0); if (ret < 0) { fprintf(stderr, "queue_init: %s\n", strerror(-ret)); return 1; } fd = open(argv[1], O_RDONLY | O_DIRECT); if (fd < 0) { perror("open"); return 1; } if (posix_memalign(&buf, 4096, 4096)) return 1; iov.iov_base = buf; iov.iov_len = 4096; offset = 0; do { sqe = io_uring_get_sqe(&ring); if (!sqe) { printf("here\n"); break; } io_uring_prep_readv(sqe, fd, &iov, 1, offset); ret = io_uring_submit(&ring); if (ret < 0) { fprintf(stderr, "io_uring_submit: %s\n", strerror(-ret)); return 1; } ret = io_uring_wait_cqe(&ring, &cqe); if (ret < 0) { fprintf(stderr, "io_uring_wait_cqe: %s\n", strerror(-ret)); return 1; } if (cqe->res <= 0) { if (cqe->res < 0) { fprintf(stderr, "got eror: %d\n", cqe->res); ret = 1; } io_uring_cqe_seen(&ring, cqe); break; } offset += cqe->res; filesize += cqe->res; io_uring_cqe_seen(&ring, cqe); } while (1); printf("filesize: %ld\n", filesize); close(fd); io_uring_queue_exit(&ring); return 0; } dd if=/dev/zero of=testfile bs=4096 count=16 ./test testfile and use bpftrace to trace io_uring_enter syscalls, in original codes, [lege@localhost ~]$ sudo bpftrace -e "tracepoint:syscalls:sys_enter_io_uring_enter {@c[tid] = count();}" Attaching 1 probe... @c[11184]: 49 Above test issues 49 syscalls, it's counterintuitive. After looking into the codes, it's because __io_uring_get_cqe issue one more syscall, indded when __io_uring_get_cqe issues the first syscall, one cqe should already be ready, we don't need to wait again. To fix this issue, after the first syscall, set wait_nr to be zero, with tihs patch, bpftrace shows the number of io_uring_enter syscall is 33. Signed-off-by: Xiaoguang Wang <xiaoguang.wang@linux.alibaba.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r--src/queue.c2
1 files changed, 2 insertions, 0 deletions
diff --git a/src/queue.c b/src/queue.c
index ebe85ac..a6376a6 100644
--- a/src/queue.c
+++ b/src/queue.c
@@ -54,6 +54,8 @@ int __io_uring_get_cqe(struct io_uring *ring, struct io_uring_cqe **cqe_ptr,
if (wait_nr || submit)
ret = __sys_io_uring_enter(ring->ring_fd, submit,
wait_nr, flags, sigmask);
+ if (wait_nr)
+ wait_nr = 0;
if (ret < 0)
err = -errno;
submit -= ret;