diff options
author | Jens Axboe <axboe@kernel.dk> | 2021-02-19 12:33:30 -0700 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2021-02-19 13:07:17 -0700 |
commit | 779f4c5754c5a9d30dfe909fad2a2546afd70621 (patch) | |
tree | e8b130b8c1e4fbdb732b590b61ac2d69791537ef | |
parent | 0ed35371334de739d8a3293408be41f97a8f93bf (diff) |
io-wq: make hashed work map + lock per-ctxio_uring-worker.v2
Before the io-wq thread change, we maintained a hash work map and lock
per-node per-ring. That wasn't ideal, as we really wanted it to be per
ring. But now that we have per-task workers, the hash map ends up being
just per-task. That'll work just fine for the normal case of having
one task use a ring, but if you share the ring between tasks, then it's
considerably worse than it was before.
Make the hash map per ctx instead, which provides full per-ctx buffered
write serialization on hashed writes.
Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r-- | fs/io-wq.c | 10 | ||||
-rw-r--r-- | fs/io-wq.h | 2 | ||||
-rw-r--r-- | fs/io_uring.c | 4 |
3 files changed, 12 insertions, 4 deletions
diff --git a/fs/io-wq.c b/fs/io-wq.c index b0d09f60200b..9d5ab6d744dc 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -85,7 +85,6 @@ struct io_wqe { struct { raw_spinlock_t lock; struct io_wq_work_list work_list; - unsigned long hash_map; unsigned flags; } ____cacheline_aligned_in_smp; @@ -111,6 +110,9 @@ struct io_wq { struct task_struct *manager; struct user_struct *user; + + unsigned long *hash_map; + refcount_t refs; struct completion done; @@ -353,8 +355,7 @@ static struct io_wq_work *io_get_next_work(struct io_wqe *wqe) /* hashed, can run if not already running */ hash = io_get_work_hash(work); - if (!(wqe->hash_map & BIT(hash))) { - wqe->hash_map |= BIT(hash); + if (!test_and_set_bit(hash, wqe->wq->hash_map)) { /* all items with this hash lie in [work, tail] */ tail = wqe->hash_tail[hash]; wqe->hash_tail[hash] = NULL; @@ -452,7 +453,7 @@ get_next: if (hash != -1U && !next_hashed) { raw_spin_lock_irq(&wqe->lock); - wqe->hash_map &= ~BIT_ULL(hash); + clear_bit(hash, wq->hash_map); wqe->flags &= ~IO_WQE_FLAG_STALLED; /* skip unnecessary unlock-lock wqe->lock */ if (!work) @@ -975,6 +976,7 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data) if (ret) goto err_wqes; + wq->hash_map = data->hash_map; wq->free_work = data->free_work; wq->do_work = data->do_work; diff --git a/fs/io-wq.h b/fs/io-wq.h index 3c63a99d1629..d5f4e1ae2d5f 100644 --- a/fs/io-wq.h +++ b/fs/io-wq.h @@ -96,6 +96,8 @@ typedef void (io_wq_work_fn)(struct io_wq_work *); struct io_wq_data { struct user_struct *user; + unsigned long *hash_map; + io_wq_work_fn *do_work; free_work_fn *free_work; }; diff --git a/fs/io_uring.c b/fs/io_uring.c index 239eacec3f3a..e71bc4e3bf08 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -359,6 +359,9 @@ struct io_ring_ctx { unsigned cached_cq_overflow; unsigned long sq_check_overflow; + /* hashed buffered write serialization */ + unsigned long hash_map; + struct list_head defer_list; struct list_head timeout_list; struct list_head cq_overflow_list; @@ -7796,6 +7799,7 @@ static struct io_wq *io_init_wq_offload(struct io_ring_ctx *ctx) unsigned int concurrency; data.user = ctx->user; + data.hash_map = &ctx->hash_map; data.free_work = io_free_work; data.do_work = io_wq_submit_work; |