summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSebastian Andrzej Siewior <bigeasy@linutronix.de>2020-09-01 10:41:46 +0200
committerJens Axboe <axboe@kernel.dk>2020-10-12 14:59:33 -0600
commitece95827da13d9f9ded011777c103dc4762b5d94 (patch)
tree4785070ce6542d298eebacd561dddbe452ea4e54
parent291017f8ad7f58e52072d8b061dece287c908206 (diff)
io_wq: Make io_wqe::lock a raw_spinlock_t
commit 95da84659226d75698a1ab958be0af21d9cc2a9c upstream. During a context switch the scheduler invokes wq_worker_sleeping() with disabled preemption. Disabling preemption is needed because it protects access to `worker->sleeping'. As an optimisation it avoids invoking schedule() within the schedule path as part of possible wake up (thus preempt_enable_no_resched() afterwards). The io-wq has been added to the mix in the same section with disabled preemption. This breaks on PREEMPT_RT because io_wq_worker_sleeping() acquires a spinlock_t. Also within the schedule() the spinlock_t must be acquired after tsk_is_pi_blocked() otherwise it will block on the sleeping lock again while scheduling out. While playing with `io_uring-bench' I didn't notice a significant latency spike after converting io_wqe::lock to a raw_spinlock_t. The latency was more or less the same. In order to keep the spinlock_t it would have to be moved after the tsk_is_pi_blocked() check which would introduce a branch instruction into the hot path. The lock is used to maintain the `work_list' and wakes one task up at most. Should io_wqe_cancel_pending_work() cause latency spikes, while searching for a specific item, then it would need to drop the lock during iterations. revert_creds() is also invoked under the lock. According to debug cred::non_rcu is 0. Otherwise it should be moved outside of the locked section because put_cred_rcu()->free_uid() acquires a sleeping lock. Convert io_wqe::lock to a raw_spinlock_t.c Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r--fs/io-wq.c52
1 files changed, 26 insertions, 26 deletions
diff --git a/fs/io-wq.c b/fs/io-wq.c
index 5257bb943d76..19f9072f310a 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -88,7 +88,7 @@ enum {
*/
struct io_wqe {
struct {
- spinlock_t lock;
+ raw_spinlock_t lock;
struct io_wq_work_list work_list;
unsigned long hash_map;
unsigned flags;
@@ -149,7 +149,7 @@ static bool __io_worker_unuse(struct io_wqe *wqe, struct io_worker *worker)
if (current->files != worker->restore_files) {
__acquire(&wqe->lock);
- spin_unlock_irq(&wqe->lock);
+ raw_spin_unlock_irq(&wqe->lock);
dropped_lock = true;
task_lock(current);
@@ -168,7 +168,7 @@ static bool __io_worker_unuse(struct io_wqe *wqe, struct io_worker *worker)
if (worker->mm) {
if (!dropped_lock) {
__acquire(&wqe->lock);
- spin_unlock_irq(&wqe->lock);
+ raw_spin_unlock_irq(&wqe->lock);
dropped_lock = true;
}
__set_current_state(TASK_RUNNING);
@@ -222,17 +222,17 @@ static void io_worker_exit(struct io_worker *worker)
worker->flags = 0;
preempt_enable();
- spin_lock_irq(&wqe->lock);
+ raw_spin_lock_irq(&wqe->lock);
hlist_nulls_del_rcu(&worker->nulls_node);
list_del_rcu(&worker->all_list);
if (__io_worker_unuse(wqe, worker)) {
__release(&wqe->lock);
- spin_lock_irq(&wqe->lock);
+ raw_spin_lock_irq(&wqe->lock);
}
acct->nr_workers--;
nr_workers = wqe->acct[IO_WQ_ACCT_BOUND].nr_workers +
wqe->acct[IO_WQ_ACCT_UNBOUND].nr_workers;
- spin_unlock_irq(&wqe->lock);
+ raw_spin_unlock_irq(&wqe->lock);
/* all workers gone, wq exit can proceed */
if (!nr_workers && refcount_dec_and_test(&wqe->wq->refs))
@@ -508,7 +508,7 @@ get_next:
else if (!wq_list_empty(&wqe->work_list))
wqe->flags |= IO_WQE_FLAG_STALLED;
- spin_unlock_irq(&wqe->lock);
+ raw_spin_unlock_irq(&wqe->lock);
if (!work)
break;
io_assign_current_work(worker, work);
@@ -543,7 +543,7 @@ get_next:
io_wqe_enqueue(wqe, linked);
if (hash != -1U && !next_hashed) {
- spin_lock_irq(&wqe->lock);
+ raw_spin_lock_irq(&wqe->lock);
wqe->hash_map &= ~BIT_ULL(hash);
wqe->flags &= ~IO_WQE_FLAG_STALLED;
/* dependent work is not hashed */
@@ -551,11 +551,11 @@ get_next:
/* skip unnecessary unlock-lock wqe->lock */
if (!work)
goto get_next;
- spin_unlock_irq(&wqe->lock);
+ raw_spin_unlock_irq(&wqe->lock);
}
} while (work);
- spin_lock_irq(&wqe->lock);
+ raw_spin_lock_irq(&wqe->lock);
} while (1);
}
@@ -570,7 +570,7 @@ static int io_wqe_worker(void *data)
while (!test_bit(IO_WQ_BIT_EXIT, &wq->state)) {
set_current_state(TASK_INTERRUPTIBLE);
loop:
- spin_lock_irq(&wqe->lock);
+ raw_spin_lock_irq(&wqe->lock);
if (io_wqe_run_queue(wqe)) {
__set_current_state(TASK_RUNNING);
io_worker_handle_work(worker);
@@ -581,7 +581,7 @@ loop:
__release(&wqe->lock);
goto loop;
}
- spin_unlock_irq(&wqe->lock);
+ raw_spin_unlock_irq(&wqe->lock);
if (signal_pending(current))
flush_signals(current);
if (schedule_timeout(WORKER_IDLE_TIMEOUT))
@@ -593,11 +593,11 @@ loop:
}
if (test_bit(IO_WQ_BIT_EXIT, &wq->state)) {
- spin_lock_irq(&wqe->lock);
+ raw_spin_lock_irq(&wqe->lock);
if (!wq_list_empty(&wqe->work_list))
io_worker_handle_work(worker);
else
- spin_unlock_irq(&wqe->lock);
+ raw_spin_unlock_irq(&wqe->lock);
}
io_worker_exit(worker);
@@ -637,9 +637,9 @@ void io_wq_worker_sleeping(struct task_struct *tsk)
worker->flags &= ~IO_WORKER_F_RUNNING;
- spin_lock_irq(&wqe->lock);
+ raw_spin_lock_irq(&wqe->lock);
io_wqe_dec_running(wqe, worker);
- spin_unlock_irq(&wqe->lock);
+ raw_spin_unlock_irq(&wqe->lock);
}
static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
@@ -663,7 +663,7 @@ static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
return false;
}
- spin_lock_irq(&wqe->lock);
+ raw_spin_lock_irq(&wqe->lock);
hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list);
list_add_tail_rcu(&worker->all_list, &wqe->all_list);
worker->flags |= IO_WORKER_F_FREE;
@@ -672,7 +672,7 @@ static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
if (!acct->nr_workers && (worker->flags & IO_WORKER_F_BOUND))
worker->flags |= IO_WORKER_F_FIXED;
acct->nr_workers++;
- spin_unlock_irq(&wqe->lock);
+ raw_spin_unlock_irq(&wqe->lock);
if (index == IO_WQ_ACCT_UNBOUND)
atomic_inc(&wq->user->processes);
@@ -727,12 +727,12 @@ static int io_wq_manager(void *data)
if (!node_online(node))
continue;
- spin_lock_irq(&wqe->lock);
+ raw_spin_lock_irq(&wqe->lock);
if (io_wqe_need_worker(wqe, IO_WQ_ACCT_BOUND))
fork_worker[IO_WQ_ACCT_BOUND] = true;
if (io_wqe_need_worker(wqe, IO_WQ_ACCT_UNBOUND))
fork_worker[IO_WQ_ACCT_UNBOUND] = true;
- spin_unlock_irq(&wqe->lock);
+ raw_spin_unlock_irq(&wqe->lock);
if (fork_worker[IO_WQ_ACCT_BOUND])
create_io_worker(wq, wqe, IO_WQ_ACCT_BOUND);
if (fork_worker[IO_WQ_ACCT_UNBOUND])
@@ -829,10 +829,10 @@ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
}
work_flags = work->flags;
- spin_lock_irqsave(&wqe->lock, flags);
+ raw_spin_lock_irqsave(&wqe->lock, flags);
io_wqe_insert_work(wqe, work);
wqe->flags &= ~IO_WQE_FLAG_STALLED;
- spin_unlock_irqrestore(&wqe->lock, flags);
+ raw_spin_unlock_irqrestore(&wqe->lock, flags);
if ((work_flags & IO_WQ_WORK_CONCURRENT) ||
!atomic_read(&acct->nr_running))
@@ -959,13 +959,13 @@ static void io_wqe_cancel_pending_work(struct io_wqe *wqe,
unsigned long flags;
retry:
- spin_lock_irqsave(&wqe->lock, flags);
+ raw_spin_lock_irqsave(&wqe->lock, flags);
wq_list_for_each(node, prev, &wqe->work_list) {
work = container_of(node, struct io_wq_work, list);
if (!match->fn(work, match->data))
continue;
io_wqe_remove_pending(wqe, work, prev);
- spin_unlock_irqrestore(&wqe->lock, flags);
+ raw_spin_unlock_irqrestore(&wqe->lock, flags);
io_run_cancel(work, wqe);
match->nr_pending++;
if (!match->cancel_all)
@@ -974,7 +974,7 @@ retry:
/* not safe to continue after unlock */
goto retry;
}
- spin_unlock_irqrestore(&wqe->lock, flags);
+ raw_spin_unlock_irqrestore(&wqe->lock, flags);
}
static void io_wqe_cancel_running_work(struct io_wqe *wqe,
@@ -1082,7 +1082,7 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
}
atomic_set(&wqe->acct[IO_WQ_ACCT_UNBOUND].nr_running, 0);
wqe->wq = wq;
- spin_lock_init(&wqe->lock);
+ raw_spin_lock_init(&wqe->lock);
INIT_WQ_LIST(&wqe->work_list);
INIT_HLIST_NULLS_HEAD(&wqe->free_list, 0);
INIT_LIST_HEAD(&wqe->all_list);