Merge tag 'for-5.4/io_uring-2019-09-24' of git://git.kernel.dk/linux-block
[linux-2.6-block.git] / kernel / locking / rtmutex.c
CommitLineData
457c8996 1// SPDX-License-Identifier: GPL-2.0-only
23f78d4a
IM
2/*
3 * RT-Mutexes: simple blocking mutual exclusion locks with PI support
4 *
5 * started by Ingo Molnar and Thomas Gleixner.
6 *
7 * Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
8 * Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
9 * Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt
10 * Copyright (C) 2006 Esben Nielsen
d07fe82c 11 *
387b1468 12 * See Documentation/locking/rt-mutex-design.rst for details.
23f78d4a
IM
13 */
14#include <linux/spinlock.h>
9984de1a 15#include <linux/export.h>
174cd4b1 16#include <linux/sched/signal.h>
8bd75c77 17#include <linux/sched/rt.h>
fb00aca4 18#include <linux/sched/deadline.h>
84f001e1 19#include <linux/sched/wake_q.h>
b17b0153 20#include <linux/sched/debug.h>
23f78d4a
IM
21#include <linux/timer.h>
22
23#include "rtmutex_common.h"
24
23f78d4a
IM
25/*
26 * lock->owner state tracking:
27 *
8161239a
LJ
28 * lock->owner holds the task_struct pointer of the owner. Bit 0
29 * is used to keep track of the "lock has waiters" state.
23f78d4a 30 *
8161239a
LJ
31 * owner bit0
32 * NULL 0 lock is free (fast acquire possible)
33 * NULL 1 lock is free and has waiters and the top waiter
34 * is going to take the lock*
35 * taskpointer 0 lock is held (fast release possible)
36 * taskpointer 1 lock is held and has waiters**
23f78d4a
IM
37 *
38 * The fast atomic compare exchange based acquire and release is only
8161239a
LJ
39 * possible when bit 0 of lock->owner is 0.
40 *
41 * (*) It also can be a transitional state when grabbing the lock
42 * with ->wait_lock is held. To prevent any fast path cmpxchg to the lock,
43 * we need to set the bit0 before looking at the lock, and the owner may be
44 * NULL in this small time, hence this can be a transitional state.
23f78d4a 45 *
8161239a
LJ
46 * (**) There is a small time when bit 0 is set but there are no
47 * waiters. This can happen when grabbing the lock in the slow path.
48 * To prevent a cmpxchg of the owner releasing the lock, we need to
49 * set this bit before looking at the lock.
23f78d4a
IM
50 */
51
bd197234 52static void
8161239a 53rt_mutex_set_owner(struct rt_mutex *lock, struct task_struct *owner)
23f78d4a 54{
8161239a 55 unsigned long val = (unsigned long)owner;
23f78d4a
IM
56
57 if (rt_mutex_has_waiters(lock))
58 val |= RT_MUTEX_HAS_WAITERS;
59
60 lock->owner = (struct task_struct *)val;
61}
62
63static inline void clear_rt_mutex_waiters(struct rt_mutex *lock)
64{
65 lock->owner = (struct task_struct *)
66 ((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS);
67}
68
69static void fixup_rt_mutex_waiters(struct rt_mutex *lock)
70{
dbb26055
TG
71 unsigned long owner, *p = (unsigned long *) &lock->owner;
72
73 if (rt_mutex_has_waiters(lock))
74 return;
75
76 /*
77 * The rbtree has no waiters enqueued, now make sure that the
78 * lock->owner still has the waiters bit set, otherwise the
79 * following can happen:
80 *
81 * CPU 0 CPU 1 CPU2
82 * l->owner=T1
83 * rt_mutex_lock(l)
84 * lock(l->lock)
85 * l->owner = T1 | HAS_WAITERS;
86 * enqueue(T2)
87 * boost()
88 * unlock(l->lock)
89 * block()
90 *
91 * rt_mutex_lock(l)
92 * lock(l->lock)
93 * l->owner = T1 | HAS_WAITERS;
94 * enqueue(T3)
95 * boost()
96 * unlock(l->lock)
97 * block()
98 * signal(->T2) signal(->T3)
99 * lock(l->lock)
100 * dequeue(T2)
101 * deboost()
102 * unlock(l->lock)
103 * lock(l->lock)
104 * dequeue(T3)
105 * ==> wait list is empty
106 * deboost()
107 * unlock(l->lock)
108 * lock(l->lock)
109 * fixup_rt_mutex_waiters()
110 * if (wait_list_empty(l) {
111 * l->owner = owner
112 * owner = l->owner & ~HAS_WAITERS;
113 * ==> l->owner = T1
114 * }
115 * lock(l->lock)
116 * rt_mutex_unlock(l) fixup_rt_mutex_waiters()
117 * if (wait_list_empty(l) {
118 * owner = l->owner & ~HAS_WAITERS;
119 * cmpxchg(l->owner, T1, NULL)
120 * ===> Success (l->owner = NULL)
121 *
122 * l->owner = owner
123 * ==> l->owner = T1
124 * }
125 *
126 * With the check for the waiter bit in place T3 on CPU2 will not
127 * overwrite. All tasks fiddling with the waiters bit are
128 * serialized by l->lock, so nothing else can modify the waiters
129 * bit. If the bit is set then nothing can change l->owner either
130 * so the simple RMW is safe. The cmpxchg() will simply fail if it
131 * happens in the middle of the RMW because the waiters bit is
132 * still set.
133 */
134 owner = READ_ONCE(*p);
135 if (owner & RT_MUTEX_HAS_WAITERS)
136 WRITE_ONCE(*p, owner & ~RT_MUTEX_HAS_WAITERS);
23f78d4a
IM
137}
138
bd197234 139/*
cede8841
SAS
140 * We can speed up the acquire/release, if there's no debugging state to be
141 * set up.
bd197234 142 */
cede8841 143#ifndef CONFIG_DEBUG_RT_MUTEXES
700318d1
DB
144# define rt_mutex_cmpxchg_relaxed(l,c,n) (cmpxchg_relaxed(&l->owner, c, n) == c)
145# define rt_mutex_cmpxchg_acquire(l,c,n) (cmpxchg_acquire(&l->owner, c, n) == c)
146# define rt_mutex_cmpxchg_release(l,c,n) (cmpxchg_release(&l->owner, c, n) == c)
147
148/*
149 * Callers must hold the ->wait_lock -- which is the whole purpose as we force
150 * all future threads that attempt to [Rmw] the lock to the slowpath. As such
151 * relaxed semantics suffice.
152 */
bd197234
TG
153static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
154{
155 unsigned long owner, *p = (unsigned long *) &lock->owner;
156
157 do {
158 owner = *p;
700318d1
DB
159 } while (cmpxchg_relaxed(p, owner,
160 owner | RT_MUTEX_HAS_WAITERS) != owner);
bd197234 161}
27e35715
TG
162
163/*
164 * Safe fastpath aware unlock:
165 * 1) Clear the waiters bit
166 * 2) Drop lock->wait_lock
167 * 3) Try to unlock the lock with cmpxchg
168 */
b4abf910
TG
169static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock,
170 unsigned long flags)
27e35715
TG
171 __releases(lock->wait_lock)
172{
173 struct task_struct *owner = rt_mutex_owner(lock);
174
175 clear_rt_mutex_waiters(lock);
b4abf910 176 raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
27e35715
TG
177 /*
178 * If a new waiter comes in between the unlock and the cmpxchg
179 * we have two situations:
180 *
181 * unlock(wait_lock);
182 * lock(wait_lock);
183 * cmpxchg(p, owner, 0) == owner
184 * mark_rt_mutex_waiters(lock);
185 * acquire(lock);
186 * or:
187 *
188 * unlock(wait_lock);
189 * lock(wait_lock);
190 * mark_rt_mutex_waiters(lock);
191 *
192 * cmpxchg(p, owner, 0) != owner
193 * enqueue_waiter();
194 * unlock(wait_lock);
195 * lock(wait_lock);
196 * wake waiter();
197 * unlock(wait_lock);
198 * lock(wait_lock);
199 * acquire(lock);
200 */
700318d1 201 return rt_mutex_cmpxchg_release(lock, owner, NULL);
27e35715
TG
202}
203
bd197234 204#else
700318d1
DB
205# define rt_mutex_cmpxchg_relaxed(l,c,n) (0)
206# define rt_mutex_cmpxchg_acquire(l,c,n) (0)
207# define rt_mutex_cmpxchg_release(l,c,n) (0)
208
bd197234
TG
209static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
210{
211 lock->owner = (struct task_struct *)
212 ((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS);
213}
27e35715
TG
214
215/*
216 * Simple slow path only version: lock->owner is protected by lock->wait_lock.
217 */
b4abf910
TG
218static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock,
219 unsigned long flags)
27e35715
TG
220 __releases(lock->wait_lock)
221{
222 lock->owner = NULL;
b4abf910 223 raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
27e35715
TG
224 return true;
225}
bd197234
TG
226#endif
227
19830e55
PZ
228/*
229 * Only use with rt_mutex_waiter_{less,equal}()
230 */
231#define task_to_waiter(p) \
232 &(struct rt_mutex_waiter){ .prio = (p)->prio, .deadline = (p)->dl.deadline }
233
fb00aca4
PZ
234static inline int
235rt_mutex_waiter_less(struct rt_mutex_waiter *left,
236 struct rt_mutex_waiter *right)
237{
2d3d891d 238 if (left->prio < right->prio)
fb00aca4
PZ
239 return 1;
240
241 /*
2d3d891d
DF
242 * If both waiters have dl_prio(), we check the deadlines of the
243 * associated tasks.
244 * If left waiter has a dl_prio(), and we didn't return 1 above,
245 * then right waiter has a dl_prio() too.
fb00aca4 246 */
2d3d891d 247 if (dl_prio(left->prio))
e0aad5b4 248 return dl_time_before(left->deadline, right->deadline);
fb00aca4
PZ
249
250 return 0;
251}
252
19830e55
PZ
253static inline int
254rt_mutex_waiter_equal(struct rt_mutex_waiter *left,
255 struct rt_mutex_waiter *right)
256{
257 if (left->prio != right->prio)
258 return 0;
259
260 /*
261 * If both waiters have dl_prio(), we check the deadlines of the
262 * associated tasks.
263 * If left waiter has a dl_prio(), and we didn't return 0 above,
264 * then right waiter has a dl_prio() too.
265 */
266 if (dl_prio(left->prio))
267 return left->deadline == right->deadline;
268
269 return 1;
270}
271
fb00aca4
PZ
272static void
273rt_mutex_enqueue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter)
274{
a23ba907 275 struct rb_node **link = &lock->waiters.rb_root.rb_node;
fb00aca4
PZ
276 struct rb_node *parent = NULL;
277 struct rt_mutex_waiter *entry;
a23ba907 278 bool leftmost = true;
fb00aca4
PZ
279
280 while (*link) {
281 parent = *link;
282 entry = rb_entry(parent, struct rt_mutex_waiter, tree_entry);
283 if (rt_mutex_waiter_less(waiter, entry)) {
284 link = &parent->rb_left;
285 } else {
286 link = &parent->rb_right;
a23ba907 287 leftmost = false;
fb00aca4
PZ
288 }
289 }
290
fb00aca4 291 rb_link_node(&waiter->tree_entry, parent, link);
a23ba907 292 rb_insert_color_cached(&waiter->tree_entry, &lock->waiters, leftmost);
fb00aca4
PZ
293}
294
295static void
296rt_mutex_dequeue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter)
297{
298 if (RB_EMPTY_NODE(&waiter->tree_entry))
299 return;
300
a23ba907 301 rb_erase_cached(&waiter->tree_entry, &lock->waiters);
fb00aca4
PZ
302 RB_CLEAR_NODE(&waiter->tree_entry);
303}
304
305static void
306rt_mutex_enqueue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
307{
a23ba907 308 struct rb_node **link = &task->pi_waiters.rb_root.rb_node;
fb00aca4
PZ
309 struct rb_node *parent = NULL;
310 struct rt_mutex_waiter *entry;
a23ba907 311 bool leftmost = true;
fb00aca4
PZ
312
313 while (*link) {
314 parent = *link;
315 entry = rb_entry(parent, struct rt_mutex_waiter, pi_tree_entry);
316 if (rt_mutex_waiter_less(waiter, entry)) {
317 link = &parent->rb_left;
318 } else {
319 link = &parent->rb_right;
a23ba907 320 leftmost = false;
fb00aca4
PZ
321 }
322 }
323
fb00aca4 324 rb_link_node(&waiter->pi_tree_entry, parent, link);
a23ba907 325 rb_insert_color_cached(&waiter->pi_tree_entry, &task->pi_waiters, leftmost);
fb00aca4
PZ
326}
327
328static void
329rt_mutex_dequeue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
330{
331 if (RB_EMPTY_NODE(&waiter->pi_tree_entry))
332 return;
333
a23ba907 334 rb_erase_cached(&waiter->pi_tree_entry, &task->pi_waiters);
fb00aca4
PZ
335 RB_CLEAR_NODE(&waiter->pi_tree_entry);
336}
337
acd58620 338static void rt_mutex_adjust_prio(struct task_struct *p)
c365c292 339{
acd58620 340 struct task_struct *pi_task = NULL;
e96a7705 341
acd58620 342 lockdep_assert_held(&p->pi_lock);
c365c292 343
acd58620
PZ
344 if (task_has_pi_waiters(p))
345 pi_task = task_top_pi_waiter(p)->task;
c365c292 346
acd58620 347 rt_mutex_setprio(p, pi_task);
23f78d4a
IM
348}
349
8930ed80
TG
350/*
351 * Deadlock detection is conditional:
352 *
353 * If CONFIG_DEBUG_RT_MUTEXES=n, deadlock detection is only conducted
354 * if the detect argument is == RT_MUTEX_FULL_CHAINWALK.
355 *
356 * If CONFIG_DEBUG_RT_MUTEXES=y, deadlock detection is always
357 * conducted independent of the detect argument.
358 *
359 * If the waiter argument is NULL this indicates the deboost path and
360 * deadlock detection is disabled independent of the detect argument
361 * and the config settings.
362 */
363static bool rt_mutex_cond_detect_deadlock(struct rt_mutex_waiter *waiter,
364 enum rtmutex_chainwalk chwalk)
365{
366 /*
367 * This is just a wrapper function for the following call,
368 * because debug_rt_mutex_detect_deadlock() smells like a magic
369 * debug feature and I wanted to keep the cond function in the
370 * main source file along with the comments instead of having
371 * two of the same in the headers.
372 */
373 return debug_rt_mutex_detect_deadlock(waiter, chwalk);
374}
375
23f78d4a
IM
376/*
377 * Max number of times we'll walk the boosting chain:
378 */
379int max_lock_depth = 1024;
380
82084984
TG
381static inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p)
382{
383 return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL;
384}
385
23f78d4a
IM
386/*
387 * Adjust the priority chain. Also used for deadlock detection.
388 * Decreases task's usage by one - may thus free the task.
0c106173 389 *
82084984
TG
390 * @task: the task owning the mutex (owner) for which a chain walk is
391 * probably needed
e6beaa36 392 * @chwalk: do we have to carry out deadlock detection?
82084984
TG
393 * @orig_lock: the mutex (can be NULL if we are walking the chain to recheck
394 * things for a task that has just got its priority adjusted, and
395 * is waiting on a mutex)
396 * @next_lock: the mutex on which the owner of @orig_lock was blocked before
397 * we dropped its pi_lock. Is never dereferenced, only used for
398 * comparison to detect lock chain changes.
0c106173 399 * @orig_waiter: rt_mutex_waiter struct for the task that has just donated
82084984
TG
400 * its priority to the mutex owner (can be NULL in the case
401 * depicted above or if the top waiter is gone away and we are
402 * actually deboosting the owner)
403 * @top_task: the current top waiter
0c106173 404 *
23f78d4a 405 * Returns 0 or -EDEADLK.
3eb65aea
TG
406 *
407 * Chain walk basics and protection scope
408 *
409 * [R] refcount on task
410 * [P] task->pi_lock held
411 * [L] rtmutex->wait_lock held
412 *
413 * Step Description Protected by
414 * function arguments:
415 * @task [R]
416 * @orig_lock if != NULL @top_task is blocked on it
417 * @next_lock Unprotected. Cannot be
418 * dereferenced. Only used for
419 * comparison.
420 * @orig_waiter if != NULL @top_task is blocked on it
421 * @top_task current, or in case of proxy
422 * locking protected by calling
423 * code
424 * again:
425 * loop_sanity_check();
426 * retry:
427 * [1] lock(task->pi_lock); [R] acquire [P]
428 * [2] waiter = task->pi_blocked_on; [P]
429 * [3] check_exit_conditions_1(); [P]
430 * [4] lock = waiter->lock; [P]
431 * [5] if (!try_lock(lock->wait_lock)) { [P] try to acquire [L]
432 * unlock(task->pi_lock); release [P]
433 * goto retry;
434 * }
435 * [6] check_exit_conditions_2(); [P] + [L]
436 * [7] requeue_lock_waiter(lock, waiter); [P] + [L]
437 * [8] unlock(task->pi_lock); release [P]
438 * put_task_struct(task); release [R]
439 * [9] check_exit_conditions_3(); [L]
440 * [10] task = owner(lock); [L]
441 * get_task_struct(task); [L] acquire [R]
442 * lock(task->pi_lock); [L] acquire [P]
443 * [11] requeue_pi_waiter(tsk, waiters(lock));[P] + [L]
444 * [12] check_exit_conditions_4(); [P] + [L]
445 * [13] unlock(task->pi_lock); release [P]
446 * unlock(lock->wait_lock); release [L]
447 * goto again;
23f78d4a 448 */
bd197234 449static int rt_mutex_adjust_prio_chain(struct task_struct *task,
8930ed80 450 enum rtmutex_chainwalk chwalk,
bd197234 451 struct rt_mutex *orig_lock,
82084984 452 struct rt_mutex *next_lock,
bd197234
TG
453 struct rt_mutex_waiter *orig_waiter,
454 struct task_struct *top_task)
23f78d4a 455{
23f78d4a 456 struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter;
a57594a1 457 struct rt_mutex_waiter *prerequeue_top_waiter;
8930ed80 458 int ret = 0, depth = 0;
a57594a1 459 struct rt_mutex *lock;
8930ed80 460 bool detect_deadlock;
67792e2c 461 bool requeue = true;
23f78d4a 462
8930ed80 463 detect_deadlock = rt_mutex_cond_detect_deadlock(orig_waiter, chwalk);
23f78d4a
IM
464
465 /*
466 * The (de)boosting is a step by step approach with a lot of
467 * pitfalls. We want this to be preemptible and we want hold a
468 * maximum of two locks per step. So we have to check
469 * carefully whether things change under us.
470 */
471 again:
3eb65aea
TG
472 /*
473 * We limit the lock chain length for each invocation.
474 */
23f78d4a
IM
475 if (++depth > max_lock_depth) {
476 static int prev_max;
477
478 /*
479 * Print this only once. If the admin changes the limit,
480 * print a new message when reaching the limit again.
481 */
482 if (prev_max != max_lock_depth) {
483 prev_max = max_lock_depth;
484 printk(KERN_WARNING "Maximum lock depth %d reached "
485 "task: %s (%d)\n", max_lock_depth,
ba25f9dc 486 top_task->comm, task_pid_nr(top_task));
23f78d4a
IM
487 }
488 put_task_struct(task);
489
3d5c9340 490 return -EDEADLK;
23f78d4a 491 }
3eb65aea
TG
492
493 /*
494 * We are fully preemptible here and only hold the refcount on
495 * @task. So everything can have changed under us since the
496 * caller or our own code below (goto retry/again) dropped all
497 * locks.
498 */
23f78d4a
IM
499 retry:
500 /*
3eb65aea 501 * [1] Task cannot go away as we did a get_task() before !
23f78d4a 502 */
b4abf910 503 raw_spin_lock_irq(&task->pi_lock);
23f78d4a 504
3eb65aea
TG
505 /*
506 * [2] Get the waiter on which @task is blocked on.
507 */
23f78d4a 508 waiter = task->pi_blocked_on;
3eb65aea
TG
509
510 /*
511 * [3] check_exit_conditions_1() protected by task->pi_lock.
512 */
513
23f78d4a
IM
514 /*
515 * Check whether the end of the boosting chain has been
516 * reached or the state of the chain has changed while we
517 * dropped the locks.
518 */
8161239a 519 if (!waiter)
23f78d4a
IM
520 goto out_unlock_pi;
521
1a539a87
TG
522 /*
523 * Check the orig_waiter state. After we dropped the locks,
8161239a 524 * the previous owner of the lock might have released the lock.
1a539a87 525 */
8161239a 526 if (orig_waiter && !rt_mutex_owner(orig_lock))
1a539a87
TG
527 goto out_unlock_pi;
528
82084984
TG
529 /*
530 * We dropped all locks after taking a refcount on @task, so
531 * the task might have moved on in the lock chain or even left
532 * the chain completely and blocks now on an unrelated lock or
533 * on @orig_lock.
534 *
535 * We stored the lock on which @task was blocked in @next_lock,
536 * so we can detect the chain change.
537 */
538 if (next_lock != waiter->lock)
539 goto out_unlock_pi;
540
1a539a87
TG
541 /*
542 * Drop out, when the task has no waiters. Note,
543 * top_waiter can be NULL, when we are in the deboosting
544 * mode!
545 */
397335f0
TG
546 if (top_waiter) {
547 if (!task_has_pi_waiters(task))
548 goto out_unlock_pi;
549 /*
550 * If deadlock detection is off, we stop here if we
67792e2c
TG
551 * are not the top pi waiter of the task. If deadlock
552 * detection is enabled we continue, but stop the
553 * requeueing in the chain walk.
397335f0 554 */
67792e2c
TG
555 if (top_waiter != task_top_pi_waiter(task)) {
556 if (!detect_deadlock)
557 goto out_unlock_pi;
558 else
559 requeue = false;
560 }
397335f0 561 }
23f78d4a
IM
562
563 /*
67792e2c
TG
564 * If the waiter priority is the same as the task priority
565 * then there is no further priority adjustment necessary. If
566 * deadlock detection is off, we stop the chain walk. If its
567 * enabled we continue, but stop the requeueing in the chain
568 * walk.
23f78d4a 569 */
19830e55 570 if (rt_mutex_waiter_equal(waiter, task_to_waiter(task))) {
67792e2c
TG
571 if (!detect_deadlock)
572 goto out_unlock_pi;
573 else
574 requeue = false;
575 }
23f78d4a 576
3eb65aea
TG
577 /*
578 * [4] Get the next lock
579 */
23f78d4a 580 lock = waiter->lock;
3eb65aea
TG
581 /*
582 * [5] We need to trylock here as we are holding task->pi_lock,
583 * which is the reverse lock order versus the other rtmutex
584 * operations.
585 */
d209d74d 586 if (!raw_spin_trylock(&lock->wait_lock)) {
b4abf910 587 raw_spin_unlock_irq(&task->pi_lock);
23f78d4a
IM
588 cpu_relax();
589 goto retry;
590 }
591
397335f0 592 /*
3eb65aea
TG
593 * [6] check_exit_conditions_2() protected by task->pi_lock and
594 * lock->wait_lock.
595 *
397335f0
TG
596 * Deadlock detection. If the lock is the same as the original
597 * lock which caused us to walk the lock chain or if the
598 * current lock is owned by the task which initiated the chain
599 * walk, we detected a deadlock.
600 */
95e02ca9 601 if (lock == orig_lock || rt_mutex_owner(lock) == top_task) {
8930ed80 602 debug_rt_mutex_deadlock(chwalk, orig_waiter, lock);
d209d74d 603 raw_spin_unlock(&lock->wait_lock);
3d5c9340 604 ret = -EDEADLK;
23f78d4a
IM
605 goto out_unlock_pi;
606 }
607
67792e2c
TG
608 /*
609 * If we just follow the lock chain for deadlock detection, no
610 * need to do all the requeue operations. To avoid a truckload
611 * of conditionals around the various places below, just do the
612 * minimum chain walk checks.
613 */
614 if (!requeue) {
615 /*
616 * No requeue[7] here. Just release @task [8]
617 */
b4abf910 618 raw_spin_unlock(&task->pi_lock);
67792e2c
TG
619 put_task_struct(task);
620
621 /*
622 * [9] check_exit_conditions_3 protected by lock->wait_lock.
623 * If there is no owner of the lock, end of chain.
624 */
625 if (!rt_mutex_owner(lock)) {
b4abf910 626 raw_spin_unlock_irq(&lock->wait_lock);
67792e2c
TG
627 return 0;
628 }
629
630 /* [10] Grab the next task, i.e. owner of @lock */
7b3c92b8 631 task = get_task_struct(rt_mutex_owner(lock));
b4abf910 632 raw_spin_lock(&task->pi_lock);
67792e2c
TG
633
634 /*
635 * No requeue [11] here. We just do deadlock detection.
636 *
637 * [12] Store whether owner is blocked
638 * itself. Decision is made after dropping the locks
639 */
640 next_lock = task_blocked_on_lock(task);
641 /*
642 * Get the top waiter for the next iteration
643 */
644 top_waiter = rt_mutex_top_waiter(lock);
645
646 /* [13] Drop locks */
b4abf910
TG
647 raw_spin_unlock(&task->pi_lock);
648 raw_spin_unlock_irq(&lock->wait_lock);
67792e2c
TG
649
650 /* If owner is not blocked, end of chain. */
651 if (!next_lock)
652 goto out_put_task;
653 goto again;
654 }
655
a57594a1
TG
656 /*
657 * Store the current top waiter before doing the requeue
658 * operation on @lock. We need it for the boost/deboost
659 * decision below.
660 */
661 prerequeue_top_waiter = rt_mutex_top_waiter(lock);
23f78d4a 662
9f40a51a 663 /* [7] Requeue the waiter in the lock waiter tree. */
fb00aca4 664 rt_mutex_dequeue(lock, waiter);
e0aad5b4
PZ
665
666 /*
667 * Update the waiter prio fields now that we're dequeued.
668 *
669 * These values can have changed through either:
670 *
671 * sys_sched_set_scheduler() / sys_sched_setattr()
672 *
673 * or
674 *
675 * DL CBS enforcement advancing the effective deadline.
676 *
677 * Even though pi_waiters also uses these fields, and that tree is only
678 * updated in [11], we can do this here, since we hold [L], which
679 * serializes all pi_waiters access and rb_erase() does not care about
680 * the values of the node being removed.
681 */
2d3d891d 682 waiter->prio = task->prio;
e0aad5b4
PZ
683 waiter->deadline = task->dl.deadline;
684
fb00aca4 685 rt_mutex_enqueue(lock, waiter);
23f78d4a 686
3eb65aea 687 /* [8] Release the task */
b4abf910 688 raw_spin_unlock(&task->pi_lock);
2ffa5a5c
TG
689 put_task_struct(task);
690
a57594a1 691 /*
3eb65aea
TG
692 * [9] check_exit_conditions_3 protected by lock->wait_lock.
693 *
a57594a1
TG
694 * We must abort the chain walk if there is no lock owner even
695 * in the dead lock detection case, as we have nothing to
696 * follow here. This is the end of the chain we are walking.
697 */
8161239a
LJ
698 if (!rt_mutex_owner(lock)) {
699 /*
3eb65aea
TG
700 * If the requeue [7] above changed the top waiter,
701 * then we need to wake the new top waiter up to try
702 * to get the lock.
8161239a 703 */
a57594a1 704 if (prerequeue_top_waiter != rt_mutex_top_waiter(lock))
8161239a 705 wake_up_process(rt_mutex_top_waiter(lock)->task);
b4abf910 706 raw_spin_unlock_irq(&lock->wait_lock);
2ffa5a5c 707 return 0;
8161239a 708 }
23f78d4a 709
3eb65aea 710 /* [10] Grab the next task, i.e. the owner of @lock */
7b3c92b8 711 task = get_task_struct(rt_mutex_owner(lock));
b4abf910 712 raw_spin_lock(&task->pi_lock);
23f78d4a 713
3eb65aea 714 /* [11] requeue the pi waiters if necessary */
23f78d4a 715 if (waiter == rt_mutex_top_waiter(lock)) {
a57594a1
TG
716 /*
717 * The waiter became the new top (highest priority)
718 * waiter on the lock. Replace the previous top waiter
9f40a51a 719 * in the owner tasks pi waiters tree with this waiter
a57594a1
TG
720 * and adjust the priority of the owner.
721 */
722 rt_mutex_dequeue_pi(task, prerequeue_top_waiter);
fb00aca4 723 rt_mutex_enqueue_pi(task, waiter);
acd58620 724 rt_mutex_adjust_prio(task);
23f78d4a 725
a57594a1
TG
726 } else if (prerequeue_top_waiter == waiter) {
727 /*
728 * The waiter was the top waiter on the lock, but is
729 * no longer the top prority waiter. Replace waiter in
9f40a51a 730 * the owner tasks pi waiters tree with the new top
a57594a1
TG
731 * (highest priority) waiter and adjust the priority
732 * of the owner.
733 * The new top waiter is stored in @waiter so that
734 * @waiter == @top_waiter evaluates to true below and
735 * we continue to deboost the rest of the chain.
736 */
fb00aca4 737 rt_mutex_dequeue_pi(task, waiter);
23f78d4a 738 waiter = rt_mutex_top_waiter(lock);
fb00aca4 739 rt_mutex_enqueue_pi(task, waiter);
acd58620 740 rt_mutex_adjust_prio(task);
a57594a1
TG
741 } else {
742 /*
743 * Nothing changed. No need to do any priority
744 * adjustment.
745 */
23f78d4a
IM
746 }
747
82084984 748 /*
3eb65aea
TG
749 * [12] check_exit_conditions_4() protected by task->pi_lock
750 * and lock->wait_lock. The actual decisions are made after we
751 * dropped the locks.
752 *
82084984
TG
753 * Check whether the task which owns the current lock is pi
754 * blocked itself. If yes we store a pointer to the lock for
755 * the lock chain change detection above. After we dropped
756 * task->pi_lock next_lock cannot be dereferenced anymore.
757 */
758 next_lock = task_blocked_on_lock(task);
a57594a1
TG
759 /*
760 * Store the top waiter of @lock for the end of chain walk
761 * decision below.
762 */
23f78d4a 763 top_waiter = rt_mutex_top_waiter(lock);
3eb65aea
TG
764
765 /* [13] Drop the locks */
b4abf910
TG
766 raw_spin_unlock(&task->pi_lock);
767 raw_spin_unlock_irq(&lock->wait_lock);
23f78d4a 768
82084984 769 /*
3eb65aea
TG
770 * Make the actual exit decisions [12], based on the stored
771 * values.
772 *
82084984
TG
773 * We reached the end of the lock chain. Stop right here. No
774 * point to go back just to figure that out.
775 */
776 if (!next_lock)
777 goto out_put_task;
778
a57594a1
TG
779 /*
780 * If the current waiter is not the top waiter on the lock,
781 * then we can stop the chain walk here if we are not in full
782 * deadlock detection mode.
783 */
23f78d4a
IM
784 if (!detect_deadlock && waiter != top_waiter)
785 goto out_put_task;
786
787 goto again;
788
789 out_unlock_pi:
b4abf910 790 raw_spin_unlock_irq(&task->pi_lock);
23f78d4a
IM
791 out_put_task:
792 put_task_struct(task);
36c8b586 793
23f78d4a
IM
794 return ret;
795}
796
23f78d4a
IM
797/*
798 * Try to take an rt-mutex
799 *
b4abf910 800 * Must be called with lock->wait_lock held and interrupts disabled
8161239a 801 *
358c331f
TG
802 * @lock: The lock to be acquired.
803 * @task: The task which wants to acquire the lock
9f40a51a 804 * @waiter: The waiter that is queued to the lock's wait tree if the
358c331f 805 * callsite called task_blocked_on_lock(), otherwise NULL
23f78d4a 806 */
8161239a 807static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
358c331f 808 struct rt_mutex_waiter *waiter)
23f78d4a 809{
e0aad5b4
PZ
810 lockdep_assert_held(&lock->wait_lock);
811
23f78d4a 812 /*
358c331f
TG
813 * Before testing whether we can acquire @lock, we set the
814 * RT_MUTEX_HAS_WAITERS bit in @lock->owner. This forces all
815 * other tasks which try to modify @lock into the slow path
816 * and they serialize on @lock->wait_lock.
23f78d4a 817 *
358c331f
TG
818 * The RT_MUTEX_HAS_WAITERS bit can have a transitional state
819 * as explained at the top of this file if and only if:
23f78d4a 820 *
358c331f
TG
821 * - There is a lock owner. The caller must fixup the
822 * transient state if it does a trylock or leaves the lock
823 * function due to a signal or timeout.
824 *
825 * - @task acquires the lock and there are no other
826 * waiters. This is undone in rt_mutex_set_owner(@task) at
827 * the end of this function.
23f78d4a
IM
828 */
829 mark_rt_mutex_waiters(lock);
830
358c331f
TG
831 /*
832 * If @lock has an owner, give up.
833 */
8161239a 834 if (rt_mutex_owner(lock))
23f78d4a
IM
835 return 0;
836
8161239a 837 /*
358c331f 838 * If @waiter != NULL, @task has already enqueued the waiter
9f40a51a 839 * into @lock waiter tree. If @waiter == NULL then this is a
358c331f 840 * trylock attempt.
8161239a 841 */
358c331f
TG
842 if (waiter) {
843 /*
844 * If waiter is not the highest priority waiter of
845 * @lock, give up.
846 */
847 if (waiter != rt_mutex_top_waiter(lock))
848 return 0;
8161239a 849
358c331f
TG
850 /*
851 * We can acquire the lock. Remove the waiter from the
9f40a51a 852 * lock waiters tree.
358c331f
TG
853 */
854 rt_mutex_dequeue(lock, waiter);
8161239a 855
358c331f 856 } else {
8161239a 857 /*
358c331f
TG
858 * If the lock has waiters already we check whether @task is
859 * eligible to take over the lock.
860 *
861 * If there are no other waiters, @task can acquire
862 * the lock. @task->pi_blocked_on is NULL, so it does
863 * not need to be dequeued.
8161239a
LJ
864 */
865 if (rt_mutex_has_waiters(lock)) {
358c331f
TG
866 /*
867 * If @task->prio is greater than or equal to
868 * the top waiter priority (kernel view),
869 * @task lost.
870 */
19830e55
PZ
871 if (!rt_mutex_waiter_less(task_to_waiter(task),
872 rt_mutex_top_waiter(lock)))
358c331f
TG
873 return 0;
874
875 /*
876 * The current top waiter stays enqueued. We
877 * don't have to change anything in the lock
878 * waiters order.
879 */
880 } else {
881 /*
882 * No waiters. Take the lock without the
883 * pi_lock dance.@task->pi_blocked_on is NULL
884 * and we have no waiters to enqueue in @task
9f40a51a 885 * pi waiters tree.
358c331f
TG
886 */
887 goto takeit;
8161239a 888 }
8161239a
LJ
889 }
890
358c331f
TG
891 /*
892 * Clear @task->pi_blocked_on. Requires protection by
893 * @task->pi_lock. Redundant operation for the @waiter == NULL
894 * case, but conditionals are more expensive than a redundant
895 * store.
896 */
b4abf910 897 raw_spin_lock(&task->pi_lock);
358c331f
TG
898 task->pi_blocked_on = NULL;
899 /*
900 * Finish the lock acquisition. @task is the new owner. If
901 * other waiters exist we have to insert the highest priority
9f40a51a 902 * waiter into @task->pi_waiters tree.
358c331f
TG
903 */
904 if (rt_mutex_has_waiters(lock))
905 rt_mutex_enqueue_pi(task, rt_mutex_top_waiter(lock));
b4abf910 906 raw_spin_unlock(&task->pi_lock);
358c331f
TG
907
908takeit:
23f78d4a 909 /* We got the lock. */
9a11b49a 910 debug_rt_mutex_lock(lock);
23f78d4a 911
358c331f
TG
912 /*
913 * This either preserves the RT_MUTEX_HAS_WAITERS bit if there
914 * are still waiters or clears it.
915 */
8161239a 916 rt_mutex_set_owner(lock, task);
23f78d4a 917
23f78d4a
IM
918 return 1;
919}
920
921/*
922 * Task blocks on lock.
923 *
924 * Prepare waiter and propagate pi chain
925 *
b4abf910 926 * This must be called with lock->wait_lock held and interrupts disabled
23f78d4a
IM
927 */
928static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
929 struct rt_mutex_waiter *waiter,
8dac456a 930 struct task_struct *task,
8930ed80 931 enum rtmutex_chainwalk chwalk)
23f78d4a 932{
36c8b586 933 struct task_struct *owner = rt_mutex_owner(lock);
23f78d4a 934 struct rt_mutex_waiter *top_waiter = waiter;
82084984 935 struct rt_mutex *next_lock;
db630637 936 int chain_walk = 0, res;
23f78d4a 937
e0aad5b4
PZ
938 lockdep_assert_held(&lock->wait_lock);
939
397335f0
TG
940 /*
941 * Early deadlock detection. We really don't want the task to
942 * enqueue on itself just to untangle the mess later. It's not
943 * only an optimization. We drop the locks, so another waiter
944 * can come in before the chain walk detects the deadlock. So
945 * the other will detect the deadlock and return -EDEADLOCK,
946 * which is wrong, as the other waiter is not in a deadlock
947 * situation.
948 */
3d5c9340 949 if (owner == task)
397335f0
TG
950 return -EDEADLK;
951
b4abf910 952 raw_spin_lock(&task->pi_lock);
8dac456a 953 waiter->task = task;
23f78d4a 954 waiter->lock = lock;
2d3d891d 955 waiter->prio = task->prio;
e0aad5b4 956 waiter->deadline = task->dl.deadline;
23f78d4a
IM
957
958 /* Get the top priority waiter on the lock */
959 if (rt_mutex_has_waiters(lock))
960 top_waiter = rt_mutex_top_waiter(lock);
fb00aca4 961 rt_mutex_enqueue(lock, waiter);
23f78d4a 962
8dac456a 963 task->pi_blocked_on = waiter;
23f78d4a 964
b4abf910 965 raw_spin_unlock(&task->pi_lock);
23f78d4a 966
8161239a
LJ
967 if (!owner)
968 return 0;
969
b4abf910 970 raw_spin_lock(&owner->pi_lock);
23f78d4a 971 if (waiter == rt_mutex_top_waiter(lock)) {
fb00aca4
PZ
972 rt_mutex_dequeue_pi(owner, top_waiter);
973 rt_mutex_enqueue_pi(owner, waiter);
23f78d4a 974
acd58620 975 rt_mutex_adjust_prio(owner);
db630637
SR
976 if (owner->pi_blocked_on)
977 chain_walk = 1;
8930ed80 978 } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) {
db630637 979 chain_walk = 1;
82084984 980 }
db630637 981
82084984
TG
982 /* Store the lock on which owner is blocked or NULL */
983 next_lock = task_blocked_on_lock(owner);
984
b4abf910 985 raw_spin_unlock(&owner->pi_lock);
82084984
TG
986 /*
987 * Even if full deadlock detection is on, if the owner is not
988 * blocked itself, we can avoid finding this out in the chain
989 * walk.
990 */
991 if (!chain_walk || !next_lock)
23f78d4a
IM
992 return 0;
993
db630637
SR
994 /*
995 * The owner can't disappear while holding a lock,
996 * so the owner struct is protected by wait_lock.
997 * Gets dropped in rt_mutex_adjust_prio_chain()!
998 */
999 get_task_struct(owner);
1000
b4abf910 1001 raw_spin_unlock_irq(&lock->wait_lock);
23f78d4a 1002
8930ed80 1003 res = rt_mutex_adjust_prio_chain(owner, chwalk, lock,
82084984 1004 next_lock, waiter, task);
23f78d4a 1005
b4abf910 1006 raw_spin_lock_irq(&lock->wait_lock);
23f78d4a
IM
1007
1008 return res;
1009}
1010
1011/*
9f40a51a 1012 * Remove the top waiter from the current tasks pi waiter tree and
45ab4eff 1013 * queue it up.
23f78d4a 1014 *
b4abf910 1015 * Called with lock->wait_lock held and interrupts disabled.
23f78d4a 1016 */
45ab4eff
DB
1017static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
1018 struct rt_mutex *lock)
23f78d4a
IM
1019{
1020 struct rt_mutex_waiter *waiter;
23f78d4a 1021
b4abf910 1022 raw_spin_lock(&current->pi_lock);
23f78d4a
IM
1023
1024 waiter = rt_mutex_top_waiter(lock);
23f78d4a
IM
1025
1026 /*
acd58620
PZ
1027 * Remove it from current->pi_waiters and deboost.
1028 *
1029 * We must in fact deboost here in order to ensure we call
1030 * rt_mutex_setprio() to update p->pi_top_task before the
1031 * task unblocks.
23f78d4a 1032 */
fb00aca4 1033 rt_mutex_dequeue_pi(current, waiter);
acd58620 1034 rt_mutex_adjust_prio(current);
23f78d4a 1035
27e35715
TG
1036 /*
1037 * As we are waking up the top waiter, and the waiter stays
1038 * queued on the lock until it gets the lock, this lock
1039 * obviously has waiters. Just set the bit here and this has
1040 * the added benefit of forcing all new tasks into the
1041 * slow path making sure no task of lower priority than
1042 * the top waiter can steal this lock.
1043 */
1044 lock->owner = (void *) RT_MUTEX_HAS_WAITERS;
23f78d4a 1045
acd58620
PZ
1046 /*
1047 * We deboosted before waking the top waiter task such that we don't
1048 * run two tasks with the 'same' priority (and ensure the
1049 * p->pi_top_task pointer points to a blocked task). This however can
1050 * lead to priority inversion if we would get preempted after the
1051 * deboost but before waking our donor task, hence the preempt_disable()
1052 * before unlock.
1053 *
1054 * Pairs with preempt_enable() in rt_mutex_postunlock();
1055 */
1056 preempt_disable();
45ab4eff 1057 wake_q_add(wake_q, waiter->task);
acd58620 1058 raw_spin_unlock(&current->pi_lock);
23f78d4a
IM
1059}
1060
1061/*
8161239a 1062 * Remove a waiter from a lock and give up
23f78d4a 1063 *
b4abf910 1064 * Must be called with lock->wait_lock held and interrupts disabled. I must
8161239a 1065 * have just failed to try_to_take_rt_mutex().
23f78d4a 1066 */
bd197234
TG
1067static void remove_waiter(struct rt_mutex *lock,
1068 struct rt_mutex_waiter *waiter)
23f78d4a 1069{
1ca7b860 1070 bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock));
36c8b586 1071 struct task_struct *owner = rt_mutex_owner(lock);
1ca7b860 1072 struct rt_mutex *next_lock;
23f78d4a 1073
e0aad5b4
PZ
1074 lockdep_assert_held(&lock->wait_lock);
1075
b4abf910 1076 raw_spin_lock(&current->pi_lock);
fb00aca4 1077 rt_mutex_dequeue(lock, waiter);
23f78d4a 1078 current->pi_blocked_on = NULL;
b4abf910 1079 raw_spin_unlock(&current->pi_lock);
23f78d4a 1080
1ca7b860
TG
1081 /*
1082 * Only update priority if the waiter was the highest priority
1083 * waiter of the lock and there is an owner to update.
1084 */
1085 if (!owner || !is_top_waiter)
8161239a
LJ
1086 return;
1087
b4abf910 1088 raw_spin_lock(&owner->pi_lock);
23f78d4a 1089
1ca7b860 1090 rt_mutex_dequeue_pi(owner, waiter);
23f78d4a 1091
1ca7b860
TG
1092 if (rt_mutex_has_waiters(lock))
1093 rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock));
23f78d4a 1094
acd58620 1095 rt_mutex_adjust_prio(owner);
23f78d4a 1096
1ca7b860
TG
1097 /* Store the lock on which owner is blocked or NULL */
1098 next_lock = task_blocked_on_lock(owner);
db630637 1099
b4abf910 1100 raw_spin_unlock(&owner->pi_lock);
23f78d4a 1101
1ca7b860
TG
1102 /*
1103 * Don't walk the chain, if the owner task is not blocked
1104 * itself.
1105 */
82084984 1106 if (!next_lock)
23f78d4a
IM
1107 return;
1108
db630637
SR
1109 /* gets dropped in rt_mutex_adjust_prio_chain()! */
1110 get_task_struct(owner);
1111
b4abf910 1112 raw_spin_unlock_irq(&lock->wait_lock);
23f78d4a 1113
8930ed80
TG
1114 rt_mutex_adjust_prio_chain(owner, RT_MUTEX_MIN_CHAINWALK, lock,
1115 next_lock, NULL, current);
23f78d4a 1116
b4abf910 1117 raw_spin_lock_irq(&lock->wait_lock);
23f78d4a
IM
1118}
1119
95e02ca9
TG
1120/*
1121 * Recheck the pi chain, in case we got a priority setting
1122 *
1123 * Called from sched_setscheduler
1124 */
1125void rt_mutex_adjust_pi(struct task_struct *task)
1126{
1127 struct rt_mutex_waiter *waiter;
82084984 1128 struct rt_mutex *next_lock;
95e02ca9
TG
1129 unsigned long flags;
1130
1d615482 1131 raw_spin_lock_irqsave(&task->pi_lock, flags);
95e02ca9
TG
1132
1133 waiter = task->pi_blocked_on;
19830e55 1134 if (!waiter || rt_mutex_waiter_equal(waiter, task_to_waiter(task))) {
1d615482 1135 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
95e02ca9
TG
1136 return;
1137 }
82084984 1138 next_lock = waiter->lock;
1d615482 1139 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
95e02ca9 1140
db630637
SR
1141 /* gets dropped in rt_mutex_adjust_prio_chain()! */
1142 get_task_struct(task);
82084984 1143
8930ed80
TG
1144 rt_mutex_adjust_prio_chain(task, RT_MUTEX_MIN_CHAINWALK, NULL,
1145 next_lock, NULL, task);
95e02ca9
TG
1146}
1147
50809358
PZ
1148void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter)
1149{
1150 debug_rt_mutex_init_waiter(waiter);
1151 RB_CLEAR_NODE(&waiter->pi_tree_entry);
1152 RB_CLEAR_NODE(&waiter->tree_entry);
1153 waiter->task = NULL;
1154}
1155
8dac456a
DH
1156/**
1157 * __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop
1158 * @lock: the rt_mutex to take
1159 * @state: the state the task should block in (TASK_INTERRUPTIBLE
b4abf910 1160 * or TASK_UNINTERRUPTIBLE)
8dac456a
DH
1161 * @timeout: the pre-initialized and started timer, or NULL for none
1162 * @waiter: the pre-initialized rt_mutex_waiter
8dac456a 1163 *
b4abf910 1164 * Must be called with lock->wait_lock held and interrupts disabled
23f78d4a
IM
1165 */
1166static int __sched
8dac456a
DH
1167__rt_mutex_slowlock(struct rt_mutex *lock, int state,
1168 struct hrtimer_sleeper *timeout,
8161239a 1169 struct rt_mutex_waiter *waiter)
23f78d4a 1170{
23f78d4a
IM
1171 int ret = 0;
1172
23f78d4a
IM
1173 for (;;) {
1174 /* Try to acquire the lock: */
8161239a 1175 if (try_to_take_rt_mutex(lock, current, waiter))
23f78d4a
IM
1176 break;
1177
1178 /*
1179 * TASK_INTERRUPTIBLE checks for signals and
1180 * timeout. Ignored otherwise.
1181 */
4009f4b3 1182 if (likely(state == TASK_INTERRUPTIBLE)) {
23f78d4a
IM
1183 /* Signal pending? */
1184 if (signal_pending(current))
1185 ret = -EINTR;
1186 if (timeout && !timeout->task)
1187 ret = -ETIMEDOUT;
1188 if (ret)
1189 break;
1190 }
1191
b4abf910 1192 raw_spin_unlock_irq(&lock->wait_lock);
23f78d4a 1193
8dac456a 1194 debug_rt_mutex_print_deadlock(waiter);
23f78d4a 1195
1b0b7c17 1196 schedule();
23f78d4a 1197
b4abf910 1198 raw_spin_lock_irq(&lock->wait_lock);
23f78d4a
IM
1199 set_current_state(state);
1200 }
1201
afffc6c1 1202 __set_current_state(TASK_RUNNING);
8dac456a
DH
1203 return ret;
1204}
1205
3d5c9340
TG
1206static void rt_mutex_handle_deadlock(int res, int detect_deadlock,
1207 struct rt_mutex_waiter *w)
1208{
1209 /*
1210 * If the result is not -EDEADLOCK or the caller requested
1211 * deadlock detection, nothing to do here.
1212 */
1213 if (res != -EDEADLOCK || detect_deadlock)
1214 return;
1215
1216 /*
1217 * Yell lowdly and stop the task right here.
1218 */
1219 rt_mutex_print_deadlock(w);
1220 while (1) {
1221 set_current_state(TASK_INTERRUPTIBLE);
1222 schedule();
1223 }
1224}
1225
8dac456a
DH
1226/*
1227 * Slow path lock function:
1228 */
1229static int __sched
1230rt_mutex_slowlock(struct rt_mutex *lock, int state,
1231 struct hrtimer_sleeper *timeout,
8930ed80 1232 enum rtmutex_chainwalk chwalk)
8dac456a
DH
1233{
1234 struct rt_mutex_waiter waiter;
b4abf910 1235 unsigned long flags;
8dac456a
DH
1236 int ret = 0;
1237
50809358 1238 rt_mutex_init_waiter(&waiter);
8dac456a 1239
b4abf910
TG
1240 /*
1241 * Technically we could use raw_spin_[un]lock_irq() here, but this can
1242 * be called in early boot if the cmpxchg() fast path is disabled
1243 * (debug, no architecture support). In this case we will acquire the
1244 * rtmutex with lock->wait_lock held. But we cannot unconditionally
1245 * enable interrupts in that early boot case. So we need to use the
1246 * irqsave/restore variants.
1247 */
1248 raw_spin_lock_irqsave(&lock->wait_lock, flags);
8dac456a
DH
1249
1250 /* Try to acquire the lock again: */
8161239a 1251 if (try_to_take_rt_mutex(lock, current, NULL)) {
b4abf910 1252 raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
8dac456a
DH
1253 return 0;
1254 }
1255
1256 set_current_state(state);
1257
1258 /* Setup the timer, when timeout != NULL */
ccdd92c1 1259 if (unlikely(timeout))
8dac456a 1260 hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS);
8dac456a 1261
8930ed80 1262 ret = task_blocks_on_rt_mutex(lock, &waiter, current, chwalk);
8161239a
LJ
1263
1264 if (likely(!ret))
afffc6c1 1265 /* sleep on the mutex */
8161239a 1266 ret = __rt_mutex_slowlock(lock, state, timeout, &waiter);
8dac456a 1267
3d5c9340 1268 if (unlikely(ret)) {
9d3e2d02 1269 __set_current_state(TASK_RUNNING);
c28d62cf 1270 remove_waiter(lock, &waiter);
8930ed80 1271 rt_mutex_handle_deadlock(ret, chwalk, &waiter);
3d5c9340 1272 }
23f78d4a
IM
1273
1274 /*
1275 * try_to_take_rt_mutex() sets the waiter bit
1276 * unconditionally. We might have to fix that up.
1277 */
1278 fixup_rt_mutex_waiters(lock);
1279
b4abf910 1280 raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
23f78d4a
IM
1281
1282 /* Remove pending timer: */
1283 if (unlikely(timeout))
1284 hrtimer_cancel(&timeout->timer);
1285
23f78d4a
IM
1286 debug_rt_mutex_free_waiter(&waiter);
1287
1288 return ret;
1289}
1290
c1e2f0ea
PZ
1291static inline int __rt_mutex_slowtrylock(struct rt_mutex *lock)
1292{
1293 int ret = try_to_take_rt_mutex(lock, current, NULL);
1294
1295 /*
1296 * try_to_take_rt_mutex() sets the lock waiters bit
1297 * unconditionally. Clean this up.
1298 */
1299 fixup_rt_mutex_waiters(lock);
1300
1301 return ret;
1302}
1303
23f78d4a
IM
1304/*
1305 * Slow path try-lock function:
1306 */
88f2b4c1 1307static inline int rt_mutex_slowtrylock(struct rt_mutex *lock)
23f78d4a 1308{
b4abf910 1309 unsigned long flags;
88f2b4c1
TG
1310 int ret;
1311
1312 /*
1313 * If the lock already has an owner we fail to get the lock.
1314 * This can be done without taking the @lock->wait_lock as
1315 * it is only being read, and this is a trylock anyway.
1316 */
1317 if (rt_mutex_owner(lock))
1318 return 0;
23f78d4a 1319
88f2b4c1 1320 /*
b4abf910
TG
1321 * The mutex has currently no owner. Lock the wait lock and try to
1322 * acquire the lock. We use irqsave here to support early boot calls.
88f2b4c1 1323 */
b4abf910 1324 raw_spin_lock_irqsave(&lock->wait_lock, flags);
23f78d4a 1325
c1e2f0ea 1326 ret = __rt_mutex_slowtrylock(lock);
23f78d4a 1327
b4abf910 1328 raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
23f78d4a
IM
1329
1330 return ret;
1331}
1332
1333/*
802ab58d 1334 * Slow path to release a rt-mutex.
aa2bfe55
PZ
1335 *
1336 * Return whether the current task needs to call rt_mutex_postunlock().
23f78d4a 1337 */
802ab58d
SAS
1338static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock,
1339 struct wake_q_head *wake_q)
23f78d4a 1340{
b4abf910
TG
1341 unsigned long flags;
1342
1343 /* irqsave required to support early boot calls */
1344 raw_spin_lock_irqsave(&lock->wait_lock, flags);
23f78d4a
IM
1345
1346 debug_rt_mutex_unlock(lock);
1347
27e35715
TG
1348 /*
1349 * We must be careful here if the fast path is enabled. If we
1350 * have no waiters queued we cannot set owner to NULL here
1351 * because of:
1352 *
1353 * foo->lock->owner = NULL;
1354 * rtmutex_lock(foo->lock); <- fast path
1355 * free = atomic_dec_and_test(foo->refcnt);
1356 * rtmutex_unlock(foo->lock); <- fast path
1357 * if (free)
1358 * kfree(foo);
1359 * raw_spin_unlock(foo->lock->wait_lock);
1360 *
1361 * So for the fastpath enabled kernel:
1362 *
1363 * Nothing can set the waiters bit as long as we hold
1364 * lock->wait_lock. So we do the following sequence:
1365 *
1366 * owner = rt_mutex_owner(lock);
1367 * clear_rt_mutex_waiters(lock);
1368 * raw_spin_unlock(&lock->wait_lock);
1369 * if (cmpxchg(&lock->owner, owner, 0) == owner)
1370 * return;
1371 * goto retry;
1372 *
1373 * The fastpath disabled variant is simple as all access to
1374 * lock->owner is serialized by lock->wait_lock:
1375 *
1376 * lock->owner = NULL;
1377 * raw_spin_unlock(&lock->wait_lock);
1378 */
1379 while (!rt_mutex_has_waiters(lock)) {
1380 /* Drops lock->wait_lock ! */
b4abf910 1381 if (unlock_rt_mutex_safe(lock, flags) == true)
802ab58d 1382 return false;
27e35715 1383 /* Relock the rtmutex and try again */
b4abf910 1384 raw_spin_lock_irqsave(&lock->wait_lock, flags);
23f78d4a
IM
1385 }
1386
27e35715
TG
1387 /*
1388 * The wakeup next waiter path does not suffer from the above
1389 * race. See the comments there.
45ab4eff
DB
1390 *
1391 * Queue the next waiter for wakeup once we release the wait_lock.
27e35715 1392 */
802ab58d 1393 mark_wakeup_next_waiter(wake_q, lock);
b4abf910 1394 raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
23f78d4a 1395
aa2bfe55 1396 return true; /* call rt_mutex_postunlock() */
23f78d4a
IM
1397}
1398
1399/*
1400 * debug aware fast / slowpath lock,trylock,unlock
1401 *
1402 * The atomic acquire/release ops are compiled away, when either the
1403 * architecture does not support cmpxchg or when debugging is enabled.
1404 */
1405static inline int
1406rt_mutex_fastlock(struct rt_mutex *lock, int state,
23f78d4a
IM
1407 int (*slowfn)(struct rt_mutex *lock, int state,
1408 struct hrtimer_sleeper *timeout,
8930ed80 1409 enum rtmutex_chainwalk chwalk))
23f78d4a 1410{
fffa954f 1411 if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current)))
23f78d4a 1412 return 0;
fffa954f
PZ
1413
1414 return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK);
23f78d4a
IM
1415}
1416
1417static inline int
1418rt_mutex_timed_fastlock(struct rt_mutex *lock, int state,
8930ed80
TG
1419 struct hrtimer_sleeper *timeout,
1420 enum rtmutex_chainwalk chwalk,
23f78d4a
IM
1421 int (*slowfn)(struct rt_mutex *lock, int state,
1422 struct hrtimer_sleeper *timeout,
8930ed80 1423 enum rtmutex_chainwalk chwalk))
23f78d4a 1424{
8930ed80 1425 if (chwalk == RT_MUTEX_MIN_CHAINWALK &&
fffa954f 1426 likely(rt_mutex_cmpxchg_acquire(lock, NULL, current)))
23f78d4a 1427 return 0;
fffa954f
PZ
1428
1429 return slowfn(lock, state, timeout, chwalk);
23f78d4a
IM
1430}
1431
1432static inline int
1433rt_mutex_fasttrylock(struct rt_mutex *lock,
9a11b49a 1434 int (*slowfn)(struct rt_mutex *lock))
23f78d4a 1435{
fffa954f 1436 if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current)))
23f78d4a 1437 return 1;
fffa954f 1438
9a11b49a 1439 return slowfn(lock);
23f78d4a
IM
1440}
1441
2a1c6029 1442/*
aa2bfe55 1443 * Performs the wakeup of the the top-waiter and re-enables preemption.
2a1c6029 1444 */
aa2bfe55 1445void rt_mutex_postunlock(struct wake_q_head *wake_q)
2a1c6029
XP
1446{
1447 wake_up_q(wake_q);
1448
1449 /* Pairs with preempt_disable() in rt_mutex_slowunlock() */
aa2bfe55 1450 preempt_enable();
2a1c6029
XP
1451}
1452
23f78d4a
IM
1453static inline void
1454rt_mutex_fastunlock(struct rt_mutex *lock,
802ab58d
SAS
1455 bool (*slowfn)(struct rt_mutex *lock,
1456 struct wake_q_head *wqh))
23f78d4a 1457{
194a6b5b 1458 DEFINE_WAKE_Q(wake_q);
802ab58d 1459
fffa954f
PZ
1460 if (likely(rt_mutex_cmpxchg_release(lock, current, NULL)))
1461 return;
802ab58d 1462
aa2bfe55
PZ
1463 if (slowfn(lock, &wake_q))
1464 rt_mutex_postunlock(&wake_q);
23f78d4a
IM
1465}
1466
62cedf3e
PR
1467static inline void __rt_mutex_lock(struct rt_mutex *lock, unsigned int subclass)
1468{
1469 might_sleep();
1470
1471 mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
1472 rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, rt_mutex_slowlock);
1473}
1474
1475#ifdef CONFIG_DEBUG_LOCK_ALLOC
1476/**
1477 * rt_mutex_lock_nested - lock a rt_mutex
1478 *
1479 * @lock: the rt_mutex to be locked
1480 * @subclass: the lockdep subclass
1481 */
1482void __sched rt_mutex_lock_nested(struct rt_mutex *lock, unsigned int subclass)
1483{
1484 __rt_mutex_lock(lock, subclass);
1485}
1486EXPORT_SYMBOL_GPL(rt_mutex_lock_nested);
62cedf3e 1487
84818af2
SRV
1488#else /* !CONFIG_DEBUG_LOCK_ALLOC */
1489
23f78d4a
IM
1490/**
1491 * rt_mutex_lock - lock a rt_mutex
1492 *
1493 * @lock: the rt_mutex to be locked
1494 */
1495void __sched rt_mutex_lock(struct rt_mutex *lock)
1496{
62cedf3e 1497 __rt_mutex_lock(lock, 0);
23f78d4a
IM
1498}
1499EXPORT_SYMBOL_GPL(rt_mutex_lock);
62cedf3e 1500#endif
23f78d4a
IM
1501
1502/**
1503 * rt_mutex_lock_interruptible - lock a rt_mutex interruptible
1504 *
c051b21f 1505 * @lock: the rt_mutex to be locked
23f78d4a
IM
1506 *
1507 * Returns:
c051b21f
TG
1508 * 0 on success
1509 * -EINTR when interrupted by a signal
23f78d4a 1510 */
c051b21f 1511int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock)
23f78d4a 1512{
f5694788
PZ
1513 int ret;
1514
23f78d4a
IM
1515 might_sleep();
1516
f5694788
PZ
1517 mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
1518 ret = rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, rt_mutex_slowlock);
1519 if (ret)
1520 mutex_release(&lock->dep_map, 1, _RET_IP_);
1521
1522 return ret;
23f78d4a
IM
1523}
1524EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible);
1525
5293c2ef
PZ
1526/*
1527 * Futex variant, must not use fastpath.
1528 */
1529int __sched rt_mutex_futex_trylock(struct rt_mutex *lock)
1530{
1531 return rt_mutex_slowtrylock(lock);
c051b21f
TG
1532}
1533
c1e2f0ea
PZ
1534int __sched __rt_mutex_futex_trylock(struct rt_mutex *lock)
1535{
1536 return __rt_mutex_slowtrylock(lock);
1537}
1538
23f78d4a 1539/**
23b94b96
LH
1540 * rt_mutex_timed_lock - lock a rt_mutex interruptible
1541 * the timeout structure is provided
1542 * by the caller
23f78d4a 1543 *
c051b21f 1544 * @lock: the rt_mutex to be locked
23f78d4a 1545 * @timeout: timeout structure or NULL (no timeout)
23f78d4a
IM
1546 *
1547 * Returns:
c051b21f
TG
1548 * 0 on success
1549 * -EINTR when interrupted by a signal
3ac49a1c 1550 * -ETIMEDOUT when the timeout expired
23f78d4a
IM
1551 */
1552int
c051b21f 1553rt_mutex_timed_lock(struct rt_mutex *lock, struct hrtimer_sleeper *timeout)
23f78d4a 1554{
f5694788
PZ
1555 int ret;
1556
23f78d4a
IM
1557 might_sleep();
1558
f5694788
PZ
1559 mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
1560 ret = rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout,
8930ed80 1561 RT_MUTEX_MIN_CHAINWALK,
c051b21f 1562 rt_mutex_slowlock);
f5694788
PZ
1563 if (ret)
1564 mutex_release(&lock->dep_map, 1, _RET_IP_);
1565
1566 return ret;
23f78d4a
IM
1567}
1568EXPORT_SYMBOL_GPL(rt_mutex_timed_lock);
1569
1570/**
1571 * rt_mutex_trylock - try to lock a rt_mutex
1572 *
1573 * @lock: the rt_mutex to be locked
1574 *
6ce47fd9
TG
1575 * This function can only be called in thread context. It's safe to
1576 * call it from atomic regions, but not from hard interrupt or soft
1577 * interrupt context.
1578 *
23f78d4a
IM
1579 * Returns 1 on success and 0 on contention
1580 */
1581int __sched rt_mutex_trylock(struct rt_mutex *lock)
1582{
f5694788
PZ
1583 int ret;
1584
a461d587 1585 if (WARN_ON_ONCE(in_irq() || in_nmi() || in_serving_softirq()))
6ce47fd9
TG
1586 return 0;
1587
f5694788
PZ
1588 ret = rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock);
1589 if (ret)
1590 mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_);
1591
1592 return ret;
23f78d4a
IM
1593}
1594EXPORT_SYMBOL_GPL(rt_mutex_trylock);
1595
1596/**
1597 * rt_mutex_unlock - unlock a rt_mutex
1598 *
1599 * @lock: the rt_mutex to be unlocked
1600 */
1601void __sched rt_mutex_unlock(struct rt_mutex *lock)
1602{
f5694788 1603 mutex_release(&lock->dep_map, 1, _RET_IP_);
23f78d4a
IM
1604 rt_mutex_fastunlock(lock, rt_mutex_slowunlock);
1605}
1606EXPORT_SYMBOL_GPL(rt_mutex_unlock);
1607
802ab58d 1608/**
5293c2ef
PZ
1609 * Futex variant, that since futex variants do not use the fast-path, can be
1610 * simple and will not need to retry.
802ab58d 1611 */
5293c2ef
PZ
1612bool __sched __rt_mutex_futex_unlock(struct rt_mutex *lock,
1613 struct wake_q_head *wake_q)
802ab58d 1614{
5293c2ef
PZ
1615 lockdep_assert_held(&lock->wait_lock);
1616
1617 debug_rt_mutex_unlock(lock);
1618
1619 if (!rt_mutex_has_waiters(lock)) {
1620 lock->owner = NULL;
1621 return false; /* done */
1622 }
1623
2a1c6029 1624 /*
def34eaa
MG
1625 * We've already deboosted, mark_wakeup_next_waiter() will
1626 * retain preempt_disabled when we drop the wait_lock, to
1627 * avoid inversion prior to the wakeup. preempt_disable()
1628 * therein pairs with rt_mutex_postunlock().
2a1c6029 1629 */
def34eaa 1630 mark_wakeup_next_waiter(wake_q, lock);
2a1c6029 1631
aa2bfe55 1632 return true; /* call postunlock() */
5293c2ef 1633}
fffa954f 1634
5293c2ef
PZ
1635void __sched rt_mutex_futex_unlock(struct rt_mutex *lock)
1636{
1637 DEFINE_WAKE_Q(wake_q);
6b0ef92f 1638 unsigned long flags;
aa2bfe55 1639 bool postunlock;
5293c2ef 1640
6b0ef92f 1641 raw_spin_lock_irqsave(&lock->wait_lock, flags);
aa2bfe55 1642 postunlock = __rt_mutex_futex_unlock(lock, &wake_q);
6b0ef92f 1643 raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
5293c2ef 1644
aa2bfe55
PZ
1645 if (postunlock)
1646 rt_mutex_postunlock(&wake_q);
802ab58d
SAS
1647}
1648
23b94b96 1649/**
23f78d4a
IM
1650 * rt_mutex_destroy - mark a mutex unusable
1651 * @lock: the mutex to be destroyed
1652 *
1653 * This function marks the mutex uninitialized, and any subsequent
1654 * use of the mutex is forbidden. The mutex must not be locked when
1655 * this function is called.
1656 */
1657void rt_mutex_destroy(struct rt_mutex *lock)
1658{
1659 WARN_ON(rt_mutex_is_locked(lock));
1660#ifdef CONFIG_DEBUG_RT_MUTEXES
1661 lock->magic = NULL;
1662#endif
1663}
23f78d4a
IM
1664EXPORT_SYMBOL_GPL(rt_mutex_destroy);
1665
1666/**
1667 * __rt_mutex_init - initialize the rt lock
1668 *
1669 * @lock: the rt lock to be initialized
1670 *
1671 * Initialize the rt lock to unlocked state.
1672 *
1673 * Initializing of a locked rt lock is not allowed
1674 */
f5694788
PZ
1675void __rt_mutex_init(struct rt_mutex *lock, const char *name,
1676 struct lock_class_key *key)
23f78d4a
IM
1677{
1678 lock->owner = NULL;
d209d74d 1679 raw_spin_lock_init(&lock->wait_lock);
a23ba907 1680 lock->waiters = RB_ROOT_CACHED;
23f78d4a 1681
cde50a67
LASL
1682 if (name && key)
1683 debug_rt_mutex_init(lock, name, key);
23f78d4a
IM
1684}
1685EXPORT_SYMBOL_GPL(__rt_mutex_init);
0cdbee99
IM
1686
1687/**
1688 * rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a
1689 * proxy owner
1690 *
84d82ec5 1691 * @lock: the rt_mutex to be locked
0cdbee99
IM
1692 * @proxy_owner:the task to set as owner
1693 *
1694 * No locking. Caller has to do serializing itself
84d82ec5
TG
1695 *
1696 * Special API call for PI-futex support. This initializes the rtmutex and
1697 * assigns it to @proxy_owner. Concurrent operations on the rtmutex are not
1698 * possible at this point because the pi_state which contains the rtmutex
1699 * is not yet visible to other tasks.
0cdbee99
IM
1700 */
1701void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
1702 struct task_struct *proxy_owner)
1703{
f5694788 1704 __rt_mutex_init(lock, NULL, NULL);
9a11b49a 1705 debug_rt_mutex_proxy_lock(lock, proxy_owner);
8161239a 1706 rt_mutex_set_owner(lock, proxy_owner);
0cdbee99
IM
1707}
1708
1709/**
1710 * rt_mutex_proxy_unlock - release a lock on behalf of owner
1711 *
84d82ec5 1712 * @lock: the rt_mutex to be locked
0cdbee99
IM
1713 *
1714 * No locking. Caller has to do serializing itself
84d82ec5
TG
1715 *
1716 * Special API call for PI-futex support. This merrily cleans up the rtmutex
1717 * (debugging) state. Concurrent operations on this rt_mutex are not
1718 * possible because it belongs to the pi_state which is about to be freed
1719 * and it is not longer visible to other tasks.
0cdbee99
IM
1720 */
1721void rt_mutex_proxy_unlock(struct rt_mutex *lock,
1722 struct task_struct *proxy_owner)
1723{
1724 debug_rt_mutex_proxy_unlock(lock);
8161239a 1725 rt_mutex_set_owner(lock, NULL);
0cdbee99
IM
1726}
1727
1a1fb985
TG
1728/**
1729 * __rt_mutex_start_proxy_lock() - Start lock acquisition for another task
1730 * @lock: the rt_mutex to take
1731 * @waiter: the pre-initialized rt_mutex_waiter
1732 * @task: the task to prepare
1733 *
1734 * Starts the rt_mutex acquire; it enqueues the @waiter and does deadlock
1735 * detection. It does not wait, see rt_mutex_wait_proxy_lock() for that.
1736 *
1737 * NOTE: does _NOT_ remove the @waiter on failure; must either call
1738 * rt_mutex_wait_proxy_lock() or rt_mutex_cleanup_proxy_lock() after this.
1739 *
1740 * Returns:
1741 * 0 - task blocked on lock
1742 * 1 - acquired the lock for task, caller should wake it up
1743 * <0 - error
1744 *
1745 * Special API call for PI-futex support.
1746 */
56222b21 1747int __rt_mutex_start_proxy_lock(struct rt_mutex *lock,
8dac456a 1748 struct rt_mutex_waiter *waiter,
c051b21f 1749 struct task_struct *task)
8dac456a
DH
1750{
1751 int ret;
1752
1a1fb985
TG
1753 lockdep_assert_held(&lock->wait_lock);
1754
56222b21 1755 if (try_to_take_rt_mutex(lock, task, NULL))
8dac456a 1756 return 1;
8dac456a 1757
3d5c9340 1758 /* We enforce deadlock detection for futexes */
8930ed80
TG
1759 ret = task_blocks_on_rt_mutex(lock, waiter, task,
1760 RT_MUTEX_FULL_CHAINWALK);
8dac456a 1761
8161239a 1762 if (ret && !rt_mutex_owner(lock)) {
8dac456a
DH
1763 /*
1764 * Reset the return value. We might have
1765 * returned with -EDEADLK and the owner
1766 * released the lock while we were walking the
1767 * pi chain. Let the waiter sort it out.
1768 */
1769 ret = 0;
1770 }
8161239a 1771
8dac456a
DH
1772 debug_rt_mutex_print_deadlock(waiter);
1773
1774 return ret;
1775}
1776
56222b21
PZ
1777/**
1778 * rt_mutex_start_proxy_lock() - Start lock acquisition for another task
1779 * @lock: the rt_mutex to take
1780 * @waiter: the pre-initialized rt_mutex_waiter
1781 * @task: the task to prepare
1782 *
1a1fb985
TG
1783 * Starts the rt_mutex acquire; it enqueues the @waiter and does deadlock
1784 * detection. It does not wait, see rt_mutex_wait_proxy_lock() for that.
1785 *
1786 * NOTE: unlike __rt_mutex_start_proxy_lock this _DOES_ remove the @waiter
1787 * on failure.
1788 *
56222b21
PZ
1789 * Returns:
1790 * 0 - task blocked on lock
1791 * 1 - acquired the lock for task, caller should wake it up
1792 * <0 - error
1793 *
1a1fb985 1794 * Special API call for PI-futex support.
56222b21
PZ
1795 */
1796int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
1797 struct rt_mutex_waiter *waiter,
1798 struct task_struct *task)
1799{
1800 int ret;
1801
1802 raw_spin_lock_irq(&lock->wait_lock);
1803 ret = __rt_mutex_start_proxy_lock(lock, waiter, task);
1a1fb985
TG
1804 if (unlikely(ret))
1805 remove_waiter(lock, waiter);
56222b21
PZ
1806 raw_spin_unlock_irq(&lock->wait_lock);
1807
1808 return ret;
1809}
1810
0cdbee99
IM
1811/**
1812 * rt_mutex_next_owner - return the next owner of the lock
1813 *
1814 * @lock: the rt lock query
1815 *
1816 * Returns the next owner of the lock or NULL
1817 *
1818 * Caller has to serialize against other accessors to the lock
1819 * itself.
1820 *
1821 * Special API call for PI-futex support
1822 */
1823struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock)
1824{
1825 if (!rt_mutex_has_waiters(lock))
1826 return NULL;
1827
1828 return rt_mutex_top_waiter(lock)->task;
1829}
8dac456a
DH
1830
1831/**
38d589f2 1832 * rt_mutex_wait_proxy_lock() - Wait for lock acquisition
8dac456a
DH
1833 * @lock: the rt_mutex we were woken on
1834 * @to: the timeout, null if none. hrtimer should already have
c051b21f 1835 * been started.
8dac456a 1836 * @waiter: the pre-initialized rt_mutex_waiter
8dac456a 1837 *
38d589f2
PZ
1838 * Wait for the the lock acquisition started on our behalf by
1839 * rt_mutex_start_proxy_lock(). Upon failure, the caller must call
1840 * rt_mutex_cleanup_proxy_lock().
8dac456a
DH
1841 *
1842 * Returns:
1843 * 0 - success
c051b21f 1844 * <0 - error, one of -EINTR, -ETIMEDOUT
8dac456a 1845 *
38d589f2 1846 * Special API call for PI-futex support
8dac456a 1847 */
38d589f2 1848int rt_mutex_wait_proxy_lock(struct rt_mutex *lock,
8dac456a 1849 struct hrtimer_sleeper *to,
c051b21f 1850 struct rt_mutex_waiter *waiter)
8dac456a
DH
1851{
1852 int ret;
1853
b4abf910 1854 raw_spin_lock_irq(&lock->wait_lock);
afffc6c1 1855 /* sleep on the mutex */
04dc1b2f 1856 set_current_state(TASK_INTERRUPTIBLE);
8161239a 1857 ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter);
04dc1b2f
PZ
1858 /*
1859 * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
1860 * have to fix that up.
1861 */
1862 fixup_rt_mutex_waiters(lock);
b4abf910 1863 raw_spin_unlock_irq(&lock->wait_lock);
8dac456a 1864
8dac456a
DH
1865 return ret;
1866}
38d589f2
PZ
1867
1868/**
1869 * rt_mutex_cleanup_proxy_lock() - Cleanup failed lock acquisition
1870 * @lock: the rt_mutex we were woken on
1871 * @waiter: the pre-initialized rt_mutex_waiter
1872 *
1a1fb985
TG
1873 * Attempt to clean up after a failed __rt_mutex_start_proxy_lock() or
1874 * rt_mutex_wait_proxy_lock().
38d589f2
PZ
1875 *
1876 * Unless we acquired the lock; we're still enqueued on the wait-list and can
1877 * in fact still be granted ownership until we're removed. Therefore we can
1878 * find we are in fact the owner and must disregard the
1879 * rt_mutex_wait_proxy_lock() failure.
1880 *
1881 * Returns:
1882 * true - did the cleanup, we done.
1883 * false - we acquired the lock after rt_mutex_wait_proxy_lock() returned,
1884 * caller should disregards its return value.
1885 *
1886 * Special API call for PI-futex support
1887 */
1888bool rt_mutex_cleanup_proxy_lock(struct rt_mutex *lock,
1889 struct rt_mutex_waiter *waiter)
1890{
1891 bool cleanup = false;
1892
1893 raw_spin_lock_irq(&lock->wait_lock);
04dc1b2f
PZ
1894 /*
1895 * Do an unconditional try-lock, this deals with the lock stealing
1896 * state where __rt_mutex_futex_unlock() -> mark_wakeup_next_waiter()
1897 * sets a NULL owner.
1898 *
1899 * We're not interested in the return value, because the subsequent
1900 * test on rt_mutex_owner() will infer that. If the trylock succeeded,
1901 * we will own the lock and it will have removed the waiter. If we
1902 * failed the trylock, we're still not owner and we need to remove
1903 * ourselves.
1904 */
1905 try_to_take_rt_mutex(lock, current, waiter);
38d589f2
PZ
1906 /*
1907 * Unless we're the owner; we're still enqueued on the wait_list.
1908 * So check if we became owner, if not, take us off the wait_list.
1909 */
1910 if (rt_mutex_owner(lock) != current) {
1911 remove_waiter(lock, waiter);
38d589f2
PZ
1912 cleanup = true;
1913 }
cfafcd11
PZ
1914 /*
1915 * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
1916 * have to fix that up.
1917 */
1918 fixup_rt_mutex_waiters(lock);
1919
38d589f2
PZ
1920 raw_spin_unlock_irq(&lock->wait_lock);
1921
1922 return cleanup;
1923}