summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorXiaoguang Wang <xiaoguang.wang@linux.alibaba.com>2020-03-11 09:26:09 +0800
committerJens Axboe <axboe@kernel.dk>2020-03-11 07:14:12 -0600
commit32b2244a840a90ea94ba42392de5c48d53f521f5 (patch)
treeb397304758f842a8b77ff0cef06165978be535ae
parent469956e853ccdba72bb82ad2eea6e8ab6b15791f (diff)
io_uring: io_uring_enter(2) don't poll while SETUP_IOPOLL|SETUP_SQPOLL enabled
When SETUP_IOPOLL and SETUP_SQPOLL are both enabled, applications don't need to do io completion events polling again, they can rely on io_sq_thread to do polling work, which can reduce cpu usage and uring_lock contention. I modify fio io_uring engine codes a bit to evaluate the performance: static int fio_ioring_getevents(struct thread_data *td, unsigned int min, continue; } - if (!o->sqpoll_thread) { + if (o->sqpoll_thread && o->hipri) { r = io_uring_enter(ld, 0, actual_min, IORING_ENTER_GETEVENTS); if (r < 0) { and use "fio -name=fiotest -filename=/dev/nvme0n1 -iodepth=$depth -thread -rw=read -ioengine=io_uring -hipri=1 -sqthread_poll=1 -direct=1 -bs=4k -size=10G -numjobs=1 -time_based -runtime=120" original codes -------------------------------------------------------------------- iodepth | 4 | 8 | 16 | 32 | 64 bw | 1133MB/s | 1519MB/s | 2090MB/s | 2710MB/s | 3012MB/s fio cpu usage | 100% | 100% | 100% | 100% | 100% -------------------------------------------------------------------- with patch -------------------------------------------------------------------- iodepth | 4 | 8 | 16 | 32 | 64 bw | 1196MB/s | 1721MB/s | 2351MB/s | 2977MB/s | 3357MB/s fio cpu usage | 63.8% | 74.4%% | 81.1% | 83.7% | 82.4% -------------------------------------------------------------------- bw improve | 5.5% | 13.2% | 12.3% | 9.8% | 11.5% -------------------------------------------------------------------- From above test results, we can see that bw has above 5.5%~13% improvement, and fio process's cpu usage also drops much. Note this won't improve io_sq_thread's cpu usage when SETUP_IOPOLL|SETUP_SQPOLL are both enabled, in this case, io_sq_thread always has 100% cpu usage. I think this patch will be friendly to applications which will often use io_uring_wait_cqe() or similar from liburing. Signed-off-by: Xiaoguang Wang <xiaoguang.wang@linux.alibaba.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r--fs/io_uring.c11
1 files changed, 10 insertions, 1 deletions
diff --git a/fs/io_uring.c b/fs/io_uring.c
index eedcf9aaee3c..9f1a462eb780 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1732,6 +1732,8 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
}
io_commit_cqring(ctx);
+ if (ctx->flags & IORING_SETUP_SQPOLL)
+ io_cqring_ev_posted(ctx);
io_free_req_many(ctx, &rb);
}
@@ -7408,7 +7410,14 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
min_complete = min(min_complete, ctx->cq_entries);
- if (ctx->flags & IORING_SETUP_IOPOLL) {
+ /*
+ * When SETUP_IOPOLL and SETUP_SQPOLL are both enabled, user
+ * space applications don't need to do io completion events
+ * polling again, they can rely on io_sq_thread to do polling
+ * work, which can reduce cpu usage and uring_lock contention.
+ */
+ if (ctx->flags & IORING_SETUP_IOPOLL &&
+ !(ctx->flags & IORING_SETUP_SQPOLL)) {
ret = io_iopoll_check(ctx, &nr_events, min_complete);
} else {
ret = io_cqring_wait(ctx, min_complete, sig, sigsz);