Commit | Line | Data |
---|---|---|
457c8996 | 1 | // SPDX-License-Identifier: GPL-2.0-only |
6053ee3b | 2 | /* |
67a6de49 | 3 | * kernel/locking/mutex.c |
6053ee3b IM |
4 | * |
5 | * Mutexes: blocking mutual exclusion locks | |
6 | * | |
7 | * Started by Ingo Molnar: | |
8 | * | |
9 | * Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> | |
10 | * | |
11 | * Many thanks to Arjan van de Ven, Thomas Gleixner, Steven Rostedt and | |
12 | * David Howells for suggestions and improvements. | |
13 | * | |
0d66bf6d PZ |
14 | * - Adaptive spinning for mutexes by Peter Zijlstra. (Ported to mainline |
15 | * from the -rt tree, where it was originally implemented for rtmutexes | |
16 | * by Steven Rostedt, based on work by Gregory Haskins, Peter Morreale | |
17 | * and Sven Dietrich. | |
18 | * | |
387b1468 | 19 | * Also see Documentation/locking/mutex-design.rst. |
6053ee3b IM |
20 | */ |
21 | #include <linux/mutex.h> | |
1b375dc3 | 22 | #include <linux/ww_mutex.h> |
174cd4b1 | 23 | #include <linux/sched/signal.h> |
8bd75c77 | 24 | #include <linux/sched/rt.h> |
84f001e1 | 25 | #include <linux/sched/wake_q.h> |
b17b0153 | 26 | #include <linux/sched/debug.h> |
9984de1a | 27 | #include <linux/export.h> |
6053ee3b IM |
28 | #include <linux/spinlock.h> |
29 | #include <linux/interrupt.h> | |
9a11b49a | 30 | #include <linux/debug_locks.h> |
7a215f89 | 31 | #include <linux/osq_lock.h> |
6053ee3b | 32 | |
6053ee3b IM |
33 | #ifdef CONFIG_DEBUG_MUTEXES |
34 | # include "mutex-debug.h" | |
6053ee3b IM |
35 | #else |
36 | # include "mutex.h" | |
6053ee3b IM |
37 | #endif |
38 | ||
ef5d4707 IM |
39 | void |
40 | __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key) | |
6053ee3b | 41 | { |
3ca0ff57 | 42 | atomic_long_set(&lock->owner, 0); |
6053ee3b IM |
43 | spin_lock_init(&lock->wait_lock); |
44 | INIT_LIST_HEAD(&lock->wait_list); | |
2bd2c92c | 45 | #ifdef CONFIG_MUTEX_SPIN_ON_OWNER |
4d9d951e | 46 | osq_lock_init(&lock->osq); |
2bd2c92c | 47 | #endif |
6053ee3b | 48 | |
ef5d4707 | 49 | debug_mutex_init(lock, name, key); |
6053ee3b | 50 | } |
6053ee3b IM |
51 | EXPORT_SYMBOL(__mutex_init); |
52 | ||
3ca0ff57 PZ |
53 | /* |
54 | * @owner: contains: 'struct task_struct *' to the current lock owner, | |
55 | * NULL means not owned. Since task_struct pointers are aligned at | |
e274795e | 56 | * at least L1_CACHE_BYTES, we have low bits to store extra state. |
3ca0ff57 PZ |
57 | * |
58 | * Bit0 indicates a non-empty waiter list; unlock must issue a wakeup. | |
9d659ae1 | 59 | * Bit1 indicates unlock needs to hand the lock to the top-waiter |
e274795e | 60 | * Bit2 indicates handoff has been done and we're waiting for pickup. |
3ca0ff57 PZ |
61 | */ |
62 | #define MUTEX_FLAG_WAITERS 0x01 | |
9d659ae1 | 63 | #define MUTEX_FLAG_HANDOFF 0x02 |
e274795e | 64 | #define MUTEX_FLAG_PICKUP 0x04 |
3ca0ff57 | 65 | |
e274795e | 66 | #define MUTEX_FLAGS 0x07 |
3ca0ff57 | 67 | |
5f35d5a6 MO |
68 | /* |
69 | * Internal helper function; C doesn't allow us to hide it :/ | |
70 | * | |
71 | * DO NOT USE (outside of mutex code). | |
72 | */ | |
73 | static inline struct task_struct *__mutex_owner(struct mutex *lock) | |
74 | { | |
a037d269 | 75 | return (struct task_struct *)(atomic_long_read(&lock->owner) & ~MUTEX_FLAGS); |
5f35d5a6 MO |
76 | } |
77 | ||
3ca0ff57 PZ |
78 | static inline struct task_struct *__owner_task(unsigned long owner) |
79 | { | |
80 | return (struct task_struct *)(owner & ~MUTEX_FLAGS); | |
81 | } | |
82 | ||
5f35d5a6 MO |
83 | bool mutex_is_locked(struct mutex *lock) |
84 | { | |
85 | return __mutex_owner(lock) != NULL; | |
86 | } | |
87 | EXPORT_SYMBOL(mutex_is_locked); | |
88 | ||
3ca0ff57 PZ |
89 | static inline unsigned long __owner_flags(unsigned long owner) |
90 | { | |
91 | return owner & MUTEX_FLAGS; | |
92 | } | |
93 | ||
94 | /* | |
e2db7592 | 95 | * Trylock variant that returns the owning task on failure. |
3ca0ff57 | 96 | */ |
e274795e | 97 | static inline struct task_struct *__mutex_trylock_or_owner(struct mutex *lock) |
3ca0ff57 PZ |
98 | { |
99 | unsigned long owner, curr = (unsigned long)current; | |
100 | ||
101 | owner = atomic_long_read(&lock->owner); | |
102 | for (;;) { /* must loop, can race against a flag */ | |
ab4e4d9f | 103 | unsigned long flags = __owner_flags(owner); |
e274795e | 104 | unsigned long task = owner & ~MUTEX_FLAGS; |
9d659ae1 | 105 | |
e274795e PZ |
106 | if (task) { |
107 | if (likely(task != curr)) | |
108 | break; | |
3ca0ff57 | 109 | |
e274795e PZ |
110 | if (likely(!(flags & MUTEX_FLAG_PICKUP))) |
111 | break; | |
9d659ae1 | 112 | |
e274795e PZ |
113 | flags &= ~MUTEX_FLAG_PICKUP; |
114 | } else { | |
115 | #ifdef CONFIG_DEBUG_MUTEXES | |
116 | DEBUG_LOCKS_WARN_ON(flags & MUTEX_FLAG_PICKUP); | |
117 | #endif | |
9d659ae1 PZ |
118 | } |
119 | ||
120 | /* | |
121 | * We set the HANDOFF bit, we must make sure it doesn't live | |
122 | * past the point where we acquire it. This would be possible | |
123 | * if we (accidentally) set the bit on an unlocked mutex. | |
124 | */ | |
e274795e | 125 | flags &= ~MUTEX_FLAG_HANDOFF; |
3ca0ff57 | 126 | |
ab4e4d9f | 127 | if (atomic_long_try_cmpxchg_acquire(&lock->owner, &owner, curr | flags)) |
e274795e | 128 | return NULL; |
3ca0ff57 | 129 | } |
e274795e PZ |
130 | |
131 | return __owner_task(owner); | |
132 | } | |
133 | ||
134 | /* | |
135 | * Actual trylock that will work on any unlocked state. | |
136 | */ | |
137 | static inline bool __mutex_trylock(struct mutex *lock) | |
138 | { | |
139 | return !__mutex_trylock_or_owner(lock); | |
3ca0ff57 PZ |
140 | } |
141 | ||
142 | #ifndef CONFIG_DEBUG_LOCK_ALLOC | |
143 | /* | |
144 | * Lockdep annotations are contained to the slow paths for simplicity. | |
145 | * There is nothing that would stop spreading the lockdep annotations outwards | |
146 | * except more code. | |
147 | */ | |
148 | ||
149 | /* | |
150 | * Optimistic trylock that only works in the uncontended case. Make sure to | |
151 | * follow with a __mutex_trylock() before failing. | |
152 | */ | |
153 | static __always_inline bool __mutex_trylock_fast(struct mutex *lock) | |
154 | { | |
155 | unsigned long curr = (unsigned long)current; | |
c427f695 | 156 | unsigned long zero = 0UL; |
3ca0ff57 | 157 | |
c427f695 | 158 | if (atomic_long_try_cmpxchg_acquire(&lock->owner, &zero, curr)) |
3ca0ff57 PZ |
159 | return true; |
160 | ||
161 | return false; | |
162 | } | |
163 | ||
164 | static __always_inline bool __mutex_unlock_fast(struct mutex *lock) | |
165 | { | |
166 | unsigned long curr = (unsigned long)current; | |
167 | ||
ab4e4d9f | 168 | return atomic_long_try_cmpxchg_release(&lock->owner, &curr, 0UL); |
3ca0ff57 PZ |
169 | } |
170 | #endif | |
171 | ||
172 | static inline void __mutex_set_flag(struct mutex *lock, unsigned long flag) | |
173 | { | |
174 | atomic_long_or(flag, &lock->owner); | |
175 | } | |
176 | ||
177 | static inline void __mutex_clear_flag(struct mutex *lock, unsigned long flag) | |
178 | { | |
179 | atomic_long_andnot(flag, &lock->owner); | |
180 | } | |
181 | ||
9d659ae1 PZ |
182 | static inline bool __mutex_waiter_is_first(struct mutex *lock, struct mutex_waiter *waiter) |
183 | { | |
184 | return list_first_entry(&lock->wait_list, struct mutex_waiter, list) == waiter; | |
185 | } | |
186 | ||
08295b3b TH |
187 | /* |
188 | * Add @waiter to a given location in the lock wait_list and set the | |
189 | * FLAG_WAITERS flag if it's the first waiter. | |
190 | */ | |
191 | static void __sched | |
192 | __mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter, | |
193 | struct list_head *list) | |
194 | { | |
195 | debug_mutex_add_waiter(lock, waiter, current); | |
196 | ||
197 | list_add_tail(&waiter->list, list); | |
198 | if (__mutex_waiter_is_first(lock, waiter)) | |
199 | __mutex_set_flag(lock, MUTEX_FLAG_WAITERS); | |
200 | } | |
201 | ||
9d659ae1 PZ |
202 | /* |
203 | * Give up ownership to a specific task, when @task = NULL, this is equivalent | |
e2db7592 | 204 | * to a regular unlock. Sets PICKUP on a handoff, clears HANDOFF, preserves |
e274795e PZ |
205 | * WAITERS. Provides RELEASE semantics like a regular unlock, the |
206 | * __mutex_trylock() provides a matching ACQUIRE semantics for the handoff. | |
9d659ae1 PZ |
207 | */ |
208 | static void __mutex_handoff(struct mutex *lock, struct task_struct *task) | |
209 | { | |
210 | unsigned long owner = atomic_long_read(&lock->owner); | |
211 | ||
212 | for (;;) { | |
ab4e4d9f | 213 | unsigned long new; |
9d659ae1 PZ |
214 | |
215 | #ifdef CONFIG_DEBUG_MUTEXES | |
216 | DEBUG_LOCKS_WARN_ON(__owner_task(owner) != current); | |
e274795e | 217 | DEBUG_LOCKS_WARN_ON(owner & MUTEX_FLAG_PICKUP); |
9d659ae1 PZ |
218 | #endif |
219 | ||
220 | new = (owner & MUTEX_FLAG_WAITERS); | |
221 | new |= (unsigned long)task; | |
e274795e PZ |
222 | if (task) |
223 | new |= MUTEX_FLAG_PICKUP; | |
9d659ae1 | 224 | |
ab4e4d9f | 225 | if (atomic_long_try_cmpxchg_release(&lock->owner, &owner, new)) |
9d659ae1 | 226 | break; |
9d659ae1 PZ |
227 | } |
228 | } | |
229 | ||
e4564f79 | 230 | #ifndef CONFIG_DEBUG_LOCK_ALLOC |
6053ee3b IM |
231 | /* |
232 | * We split the mutex lock/unlock logic into separate fastpath and | |
233 | * slowpath functions, to reduce the register pressure on the fastpath. | |
234 | * We also put the fastpath first in the kernel image, to make sure the | |
235 | * branch is predicted by the CPU as default-untaken. | |
236 | */ | |
3ca0ff57 | 237 | static void __sched __mutex_lock_slowpath(struct mutex *lock); |
6053ee3b | 238 | |
ef5dc121 | 239 | /** |
6053ee3b IM |
240 | * mutex_lock - acquire the mutex |
241 | * @lock: the mutex to be acquired | |
242 | * | |
243 | * Lock the mutex exclusively for this task. If the mutex is not | |
244 | * available right now, it will sleep until it can get it. | |
245 | * | |
246 | * The mutex must later on be released by the same task that | |
247 | * acquired it. Recursive locking is not allowed. The task | |
248 | * may not exit without first unlocking the mutex. Also, kernel | |
139b6fd2 | 249 | * memory where the mutex resides must not be freed with |
6053ee3b IM |
250 | * the mutex still locked. The mutex must first be initialized |
251 | * (or statically defined) before it can be locked. memset()-ing | |
252 | * the mutex to 0 is not allowed. | |
253 | * | |
7b4ff1ad MCC |
254 | * (The CONFIG_DEBUG_MUTEXES .config option turns on debugging |
255 | * checks that will enforce the restrictions and will also do | |
256 | * deadlock debugging) | |
6053ee3b IM |
257 | * |
258 | * This function is similar to (but not equivalent to) down(). | |
259 | */ | |
b09d2501 | 260 | void __sched mutex_lock(struct mutex *lock) |
6053ee3b | 261 | { |
c544bdb1 | 262 | might_sleep(); |
6053ee3b | 263 | |
3ca0ff57 PZ |
264 | if (!__mutex_trylock_fast(lock)) |
265 | __mutex_lock_slowpath(lock); | |
266 | } | |
6053ee3b | 267 | EXPORT_SYMBOL(mutex_lock); |
e4564f79 | 268 | #endif |
6053ee3b | 269 | |
55f036ca PZ |
270 | /* |
271 | * Wait-Die: | |
272 | * The newer transactions are killed when: | |
273 | * It (the new transaction) makes a request for a lock being held | |
274 | * by an older transaction. | |
08295b3b TH |
275 | * |
276 | * Wound-Wait: | |
277 | * The newer transactions are wounded when: | |
278 | * An older transaction makes a request for a lock being held by | |
279 | * the newer transaction. | |
55f036ca PZ |
280 | */ |
281 | ||
282 | /* | |
283 | * Associate the ww_mutex @ww with the context @ww_ctx under which we acquired | |
284 | * it. | |
285 | */ | |
427b1820 PZ |
286 | static __always_inline void |
287 | ww_mutex_lock_acquired(struct ww_mutex *ww, struct ww_acquire_ctx *ww_ctx) | |
76916515 DB |
288 | { |
289 | #ifdef CONFIG_DEBUG_MUTEXES | |
290 | /* | |
291 | * If this WARN_ON triggers, you used ww_mutex_lock to acquire, | |
292 | * but released with a normal mutex_unlock in this call. | |
293 | * | |
294 | * This should never happen, always use ww_mutex_unlock. | |
295 | */ | |
296 | DEBUG_LOCKS_WARN_ON(ww->ctx); | |
297 | ||
298 | /* | |
299 | * Not quite done after calling ww_acquire_done() ? | |
300 | */ | |
301 | DEBUG_LOCKS_WARN_ON(ww_ctx->done_acquire); | |
302 | ||
303 | if (ww_ctx->contending_lock) { | |
304 | /* | |
305 | * After -EDEADLK you tried to | |
306 | * acquire a different ww_mutex? Bad! | |
307 | */ | |
308 | DEBUG_LOCKS_WARN_ON(ww_ctx->contending_lock != ww); | |
309 | ||
310 | /* | |
311 | * You called ww_mutex_lock after receiving -EDEADLK, | |
312 | * but 'forgot' to unlock everything else first? | |
313 | */ | |
314 | DEBUG_LOCKS_WARN_ON(ww_ctx->acquired > 0); | |
315 | ww_ctx->contending_lock = NULL; | |
316 | } | |
317 | ||
318 | /* | |
319 | * Naughty, using a different class will lead to undefined behavior! | |
320 | */ | |
321 | DEBUG_LOCKS_WARN_ON(ww_ctx->ww_class != ww->ww_class); | |
322 | #endif | |
323 | ww_ctx->acquired++; | |
55f036ca | 324 | ww->ctx = ww_ctx; |
76916515 DB |
325 | } |
326 | ||
55f036ca PZ |
327 | /* |
328 | * Determine if context @a is 'after' context @b. IOW, @a is a younger | |
329 | * transaction than @b and depending on algorithm either needs to wait for | |
330 | * @b or die. | |
331 | */ | |
3822da3e NH |
332 | static inline bool __sched |
333 | __ww_ctx_stamp_after(struct ww_acquire_ctx *a, struct ww_acquire_ctx *b) | |
334 | { | |
55f036ca PZ |
335 | |
336 | return (signed long)(a->stamp - b->stamp) > 0; | |
337 | } | |
338 | ||
339 | /* | |
340 | * Wait-Die; wake a younger waiter context (when locks held) such that it can | |
341 | * die. | |
342 | * | |
343 | * Among waiters with context, only the first one can have other locks acquired | |
344 | * already (ctx->acquired > 0), because __ww_mutex_add_waiter() and | |
345 | * __ww_mutex_check_kill() wake any but the earliest context. | |
346 | */ | |
347 | static bool __sched | |
348 | __ww_mutex_die(struct mutex *lock, struct mutex_waiter *waiter, | |
349 | struct ww_acquire_ctx *ww_ctx) | |
350 | { | |
08295b3b TH |
351 | if (!ww_ctx->is_wait_die) |
352 | return false; | |
353 | ||
55f036ca PZ |
354 | if (waiter->ww_ctx->acquired > 0 && |
355 | __ww_ctx_stamp_after(waiter->ww_ctx, ww_ctx)) { | |
356 | debug_mutex_wake_waiter(lock, waiter); | |
357 | wake_up_process(waiter->task); | |
358 | } | |
359 | ||
360 | return true; | |
3822da3e NH |
361 | } |
362 | ||
08295b3b TH |
363 | /* |
364 | * Wound-Wait; wound a younger @hold_ctx if it holds the lock. | |
365 | * | |
366 | * Wound the lock holder if there are waiters with older transactions than | |
367 | * the lock holders. Even if multiple waiters may wound the lock holder, | |
368 | * it's sufficient that only one does. | |
369 | */ | |
370 | static bool __ww_mutex_wound(struct mutex *lock, | |
371 | struct ww_acquire_ctx *ww_ctx, | |
372 | struct ww_acquire_ctx *hold_ctx) | |
373 | { | |
374 | struct task_struct *owner = __mutex_owner(lock); | |
375 | ||
376 | lockdep_assert_held(&lock->wait_lock); | |
377 | ||
378 | /* | |
379 | * Possible through __ww_mutex_add_waiter() when we race with | |
380 | * ww_mutex_set_context_fastpath(). In that case we'll get here again | |
381 | * through __ww_mutex_check_waiters(). | |
382 | */ | |
383 | if (!hold_ctx) | |
384 | return false; | |
385 | ||
386 | /* | |
387 | * Can have !owner because of __mutex_unlock_slowpath(), but if owner, | |
388 | * it cannot go away because we'll have FLAG_WAITERS set and hold | |
389 | * wait_lock. | |
390 | */ | |
391 | if (!owner) | |
392 | return false; | |
393 | ||
394 | if (ww_ctx->acquired > 0 && __ww_ctx_stamp_after(hold_ctx, ww_ctx)) { | |
395 | hold_ctx->wounded = 1; | |
396 | ||
397 | /* | |
398 | * wake_up_process() paired with set_current_state() | |
399 | * inserts sufficient barriers to make sure @owner either sees | |
e13e2366 | 400 | * it's wounded in __ww_mutex_check_kill() or has a |
08295b3b TH |
401 | * wakeup pending to re-read the wounded state. |
402 | */ | |
403 | if (owner != current) | |
404 | wake_up_process(owner); | |
405 | ||
406 | return true; | |
407 | } | |
408 | ||
409 | return false; | |
410 | } | |
411 | ||
659cf9f5 | 412 | /* |
55f036ca | 413 | * We just acquired @lock under @ww_ctx, if there are later contexts waiting |
08295b3b | 414 | * behind us on the wait-list, check if they need to die, or wound us. |
659cf9f5 | 415 | * |
55f036ca PZ |
416 | * See __ww_mutex_add_waiter() for the list-order construction; basically the |
417 | * list is ordered by stamp, smallest (oldest) first. | |
659cf9f5 | 418 | * |
08295b3b TH |
419 | * This relies on never mixing wait-die/wound-wait on the same wait-list; |
420 | * which is currently ensured by that being a ww_class property. | |
421 | * | |
659cf9f5 NH |
422 | * The current task must not be on the wait list. |
423 | */ | |
424 | static void __sched | |
55f036ca | 425 | __ww_mutex_check_waiters(struct mutex *lock, struct ww_acquire_ctx *ww_ctx) |
659cf9f5 NH |
426 | { |
427 | struct mutex_waiter *cur; | |
428 | ||
429 | lockdep_assert_held(&lock->wait_lock); | |
430 | ||
431 | list_for_each_entry(cur, &lock->wait_list, list) { | |
432 | if (!cur->ww_ctx) | |
433 | continue; | |
434 | ||
08295b3b TH |
435 | if (__ww_mutex_die(lock, cur, ww_ctx) || |
436 | __ww_mutex_wound(lock, cur->ww_ctx, ww_ctx)) | |
55f036ca | 437 | break; |
659cf9f5 NH |
438 | } |
439 | } | |
440 | ||
76916515 | 441 | /* |
55f036ca PZ |
442 | * After acquiring lock with fastpath, where we do not hold wait_lock, set ctx |
443 | * and wake up any waiters so they can recheck. | |
76916515 DB |
444 | */ |
445 | static __always_inline void | |
427b1820 | 446 | ww_mutex_set_context_fastpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) |
76916515 | 447 | { |
76916515 DB |
448 | ww_mutex_lock_acquired(lock, ctx); |
449 | ||
76916515 DB |
450 | /* |
451 | * The lock->ctx update should be visible on all cores before | |
55f036ca | 452 | * the WAITERS check is done, otherwise contended waiters might be |
76916515 DB |
453 | * missed. The contended waiters will either see ww_ctx == NULL |
454 | * and keep spinning, or it will acquire wait_lock, add itself | |
455 | * to waiter list and sleep. | |
456 | */ | |
08295b3b | 457 | smp_mb(); /* See comments above and below. */ |
76916515 DB |
458 | |
459 | /* | |
08295b3b TH |
460 | * [W] ww->ctx = ctx [W] MUTEX_FLAG_WAITERS |
461 | * MB MB | |
462 | * [R] MUTEX_FLAG_WAITERS [R] ww->ctx | |
463 | * | |
464 | * The memory barrier above pairs with the memory barrier in | |
465 | * __ww_mutex_add_waiter() and makes sure we either observe ww->ctx | |
466 | * and/or !empty list. | |
76916515 | 467 | */ |
3ca0ff57 | 468 | if (likely(!(atomic_long_read(&lock->base.owner) & MUTEX_FLAG_WAITERS))) |
76916515 DB |
469 | return; |
470 | ||
471 | /* | |
55f036ca | 472 | * Uh oh, we raced in fastpath, check if any of the waiters need to |
08295b3b | 473 | * die or wound us. |
76916515 | 474 | */ |
b9c16a0e | 475 | spin_lock(&lock->base.wait_lock); |
55f036ca | 476 | __ww_mutex_check_waiters(&lock->base, ctx); |
b9c16a0e | 477 | spin_unlock(&lock->base.wait_lock); |
76916515 DB |
478 | } |
479 | ||
41fcb9f2 | 480 | #ifdef CONFIG_MUTEX_SPIN_ON_OWNER |
c516df97 NH |
481 | |
482 | static inline | |
483 | bool ww_mutex_spin_on_owner(struct mutex *lock, struct ww_acquire_ctx *ww_ctx, | |
484 | struct mutex_waiter *waiter) | |
485 | { | |
486 | struct ww_mutex *ww; | |
487 | ||
488 | ww = container_of(lock, struct ww_mutex, base); | |
4bd19084 DB |
489 | |
490 | /* | |
c516df97 NH |
491 | * If ww->ctx is set the contents are undefined, only |
492 | * by acquiring wait_lock there is a guarantee that | |
493 | * they are not invalid when reading. | |
494 | * | |
495 | * As such, when deadlock detection needs to be | |
496 | * performed the optimistic spinning cannot be done. | |
497 | * | |
498 | * Check this in every inner iteration because we may | |
499 | * be racing against another thread's ww_mutex_lock. | |
4bd19084 | 500 | */ |
c516df97 NH |
501 | if (ww_ctx->acquired > 0 && READ_ONCE(ww->ctx)) |
502 | return false; | |
503 | ||
504 | /* | |
505 | * If we aren't on the wait list yet, cancel the spin | |
506 | * if there are waiters. We want to avoid stealing the | |
507 | * lock from a waiter with an earlier stamp, since the | |
508 | * other thread may already own a lock that we also | |
509 | * need. | |
510 | */ | |
511 | if (!waiter && (atomic_long_read(&lock->owner) & MUTEX_FLAG_WAITERS)) | |
512 | return false; | |
513 | ||
514 | /* | |
515 | * Similarly, stop spinning if we are no longer the | |
516 | * first waiter. | |
517 | */ | |
518 | if (waiter && !__mutex_waiter_is_first(lock, waiter)) | |
519 | return false; | |
520 | ||
521 | return true; | |
4bd19084 | 522 | } |
76916515 | 523 | |
41fcb9f2 | 524 | /* |
25f13b40 NH |
525 | * Look out! "owner" is an entirely speculative pointer access and not |
526 | * reliable. | |
527 | * | |
528 | * "noinline" so that this function shows up on perf profiles. | |
41fcb9f2 WL |
529 | */ |
530 | static noinline | |
25f13b40 | 531 | bool mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner, |
c516df97 | 532 | struct ww_acquire_ctx *ww_ctx, struct mutex_waiter *waiter) |
41fcb9f2 | 533 | { |
01ac33c1 | 534 | bool ret = true; |
be1f7bf2 | 535 | |
41fcb9f2 | 536 | rcu_read_lock(); |
3ca0ff57 | 537 | while (__mutex_owner(lock) == owner) { |
be1f7bf2 JL |
538 | /* |
539 | * Ensure we emit the owner->on_cpu, dereference _after_ | |
01ac33c1 JL |
540 | * checking lock->owner still matches owner. If that fails, |
541 | * owner might point to freed memory. If it still matches, | |
be1f7bf2 JL |
542 | * the rcu_read_lock() ensures the memory stays valid. |
543 | */ | |
544 | barrier(); | |
545 | ||
05ffc951 PX |
546 | /* |
547 | * Use vcpu_is_preempted to detect lock holder preemption issue. | |
548 | */ | |
549 | if (!owner->on_cpu || need_resched() || | |
550 | vcpu_is_preempted(task_cpu(owner))) { | |
be1f7bf2 JL |
551 | ret = false; |
552 | break; | |
553 | } | |
41fcb9f2 | 554 | |
c516df97 NH |
555 | if (ww_ctx && !ww_mutex_spin_on_owner(lock, ww_ctx, waiter)) { |
556 | ret = false; | |
557 | break; | |
25f13b40 NH |
558 | } |
559 | ||
f2f09a4c | 560 | cpu_relax(); |
41fcb9f2 WL |
561 | } |
562 | rcu_read_unlock(); | |
563 | ||
be1f7bf2 | 564 | return ret; |
41fcb9f2 | 565 | } |
2bd2c92c WL |
566 | |
567 | /* | |
568 | * Initial check for entering the mutex spinning loop | |
569 | */ | |
570 | static inline int mutex_can_spin_on_owner(struct mutex *lock) | |
571 | { | |
1e40c2ed | 572 | struct task_struct *owner; |
2bd2c92c WL |
573 | int retval = 1; |
574 | ||
46af29e4 JL |
575 | if (need_resched()) |
576 | return 0; | |
577 | ||
2bd2c92c | 578 | rcu_read_lock(); |
3ca0ff57 | 579 | owner = __mutex_owner(lock); |
05ffc951 PX |
580 | |
581 | /* | |
582 | * As lock holder preemption issue, we both skip spinning if task is not | |
583 | * on cpu or its cpu is preempted | |
584 | */ | |
1e40c2ed | 585 | if (owner) |
05ffc951 | 586 | retval = owner->on_cpu && !vcpu_is_preempted(task_cpu(owner)); |
2bd2c92c | 587 | rcu_read_unlock(); |
3ca0ff57 | 588 | |
2bd2c92c | 589 | /* |
3ca0ff57 PZ |
590 | * If lock->owner is not set, the mutex has been released. Return true |
591 | * such that we'll trylock in the spin path, which is a faster option | |
592 | * than the blocking slow path. | |
2bd2c92c WL |
593 | */ |
594 | return retval; | |
595 | } | |
76916515 | 596 | |
76916515 DB |
597 | /* |
598 | * Optimistic spinning. | |
599 | * | |
600 | * We try to spin for acquisition when we find that the lock owner | |
601 | * is currently running on a (different) CPU and while we don't | |
602 | * need to reschedule. The rationale is that if the lock owner is | |
603 | * running, it is likely to release the lock soon. | |
604 | * | |
76916515 DB |
605 | * The mutex spinners are queued up using MCS lock so that only one |
606 | * spinner can compete for the mutex. However, if mutex spinning isn't | |
607 | * going to happen, there is no point in going through the lock/unlock | |
608 | * overhead. | |
609 | * | |
610 | * Returns true when the lock was taken, otherwise false, indicating | |
611 | * that we need to jump to the slowpath and sleep. | |
b341afb3 WL |
612 | * |
613 | * The waiter flag is set to true if the spinner is a waiter in the wait | |
614 | * queue. The waiter-spinner will spin on the lock directly and concurrently | |
615 | * with the spinner at the head of the OSQ, if present, until the owner is | |
616 | * changed to itself. | |
76916515 | 617 | */ |
427b1820 PZ |
618 | static __always_inline bool |
619 | mutex_optimistic_spin(struct mutex *lock, struct ww_acquire_ctx *ww_ctx, | |
5de2055d | 620 | struct mutex_waiter *waiter) |
76916515 | 621 | { |
b341afb3 WL |
622 | if (!waiter) { |
623 | /* | |
624 | * The purpose of the mutex_can_spin_on_owner() function is | |
625 | * to eliminate the overhead of osq_lock() and osq_unlock() | |
626 | * in case spinning isn't possible. As a waiter-spinner | |
627 | * is not going to take OSQ lock anyway, there is no need | |
628 | * to call mutex_can_spin_on_owner(). | |
629 | */ | |
630 | if (!mutex_can_spin_on_owner(lock)) | |
631 | goto fail; | |
76916515 | 632 | |
b341afb3 WL |
633 | /* |
634 | * In order to avoid a stampede of mutex spinners trying to | |
635 | * acquire the mutex all at once, the spinners need to take a | |
636 | * MCS (queued) lock first before spinning on the owner field. | |
637 | */ | |
638 | if (!osq_lock(&lock->osq)) | |
639 | goto fail; | |
640 | } | |
76916515 | 641 | |
b341afb3 | 642 | for (;;) { |
76916515 DB |
643 | struct task_struct *owner; |
644 | ||
e274795e PZ |
645 | /* Try to acquire the mutex... */ |
646 | owner = __mutex_trylock_or_owner(lock); | |
647 | if (!owner) | |
648 | break; | |
76916515 DB |
649 | |
650 | /* | |
e274795e | 651 | * There's an owner, wait for it to either |
76916515 DB |
652 | * release the lock or go to sleep. |
653 | */ | |
c516df97 | 654 | if (!mutex_spin_on_owner(lock, owner, ww_ctx, waiter)) |
e274795e | 655 | goto fail_unlock; |
b341afb3 | 656 | |
76916515 DB |
657 | /* |
658 | * The cpu_relax() call is a compiler barrier which forces | |
659 | * everything in this loop to be re-loaded. We don't need | |
660 | * memory barriers as we'll eventually observe the right | |
661 | * values at the cost of a few extra spins. | |
662 | */ | |
f2f09a4c | 663 | cpu_relax(); |
76916515 DB |
664 | } |
665 | ||
b341afb3 WL |
666 | if (!waiter) |
667 | osq_unlock(&lock->osq); | |
668 | ||
669 | return true; | |
670 | ||
671 | ||
672 | fail_unlock: | |
673 | if (!waiter) | |
674 | osq_unlock(&lock->osq); | |
675 | ||
676 | fail: | |
76916515 DB |
677 | /* |
678 | * If we fell out of the spin path because of need_resched(), | |
679 | * reschedule now, before we try-lock the mutex. This avoids getting | |
680 | * scheduled out right after we obtained the mutex. | |
681 | */ | |
6f942a1f PZ |
682 | if (need_resched()) { |
683 | /* | |
684 | * We _should_ have TASK_RUNNING here, but just in case | |
685 | * we do not, make it so, otherwise we might get stuck. | |
686 | */ | |
687 | __set_current_state(TASK_RUNNING); | |
76916515 | 688 | schedule_preempt_disabled(); |
6f942a1f | 689 | } |
76916515 DB |
690 | |
691 | return false; | |
692 | } | |
693 | #else | |
427b1820 PZ |
694 | static __always_inline bool |
695 | mutex_optimistic_spin(struct mutex *lock, struct ww_acquire_ctx *ww_ctx, | |
5de2055d | 696 | struct mutex_waiter *waiter) |
76916515 DB |
697 | { |
698 | return false; | |
699 | } | |
41fcb9f2 WL |
700 | #endif |
701 | ||
3ca0ff57 | 702 | static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigned long ip); |
6053ee3b | 703 | |
ef5dc121 | 704 | /** |
6053ee3b IM |
705 | * mutex_unlock - release the mutex |
706 | * @lock: the mutex to be released | |
707 | * | |
708 | * Unlock a mutex that has been locked by this task previously. | |
709 | * | |
710 | * This function must not be used in interrupt context. Unlocking | |
711 | * of a not locked mutex is not allowed. | |
712 | * | |
713 | * This function is similar to (but not equivalent to) up(). | |
714 | */ | |
7ad5b3a5 | 715 | void __sched mutex_unlock(struct mutex *lock) |
6053ee3b | 716 | { |
3ca0ff57 PZ |
717 | #ifndef CONFIG_DEBUG_LOCK_ALLOC |
718 | if (__mutex_unlock_fast(lock)) | |
719 | return; | |
0d66bf6d | 720 | #endif |
3ca0ff57 | 721 | __mutex_unlock_slowpath(lock, _RET_IP_); |
6053ee3b | 722 | } |
6053ee3b IM |
723 | EXPORT_SYMBOL(mutex_unlock); |
724 | ||
040a0a37 ML |
725 | /** |
726 | * ww_mutex_unlock - release the w/w mutex | |
727 | * @lock: the mutex to be released | |
728 | * | |
729 | * Unlock a mutex that has been locked by this task previously with any of the | |
730 | * ww_mutex_lock* functions (with or without an acquire context). It is | |
731 | * forbidden to release the locks after releasing the acquire context. | |
732 | * | |
733 | * This function must not be used in interrupt context. Unlocking | |
734 | * of a unlocked mutex is not allowed. | |
735 | */ | |
736 | void __sched ww_mutex_unlock(struct ww_mutex *lock) | |
737 | { | |
738 | /* | |
739 | * The unlocking fastpath is the 0->1 transition from 'locked' | |
740 | * into 'unlocked' state: | |
741 | */ | |
742 | if (lock->ctx) { | |
743 | #ifdef CONFIG_DEBUG_MUTEXES | |
744 | DEBUG_LOCKS_WARN_ON(!lock->ctx->acquired); | |
745 | #endif | |
746 | if (lock->ctx->acquired > 0) | |
747 | lock->ctx->acquired--; | |
748 | lock->ctx = NULL; | |
749 | } | |
750 | ||
3ca0ff57 | 751 | mutex_unlock(&lock->base); |
040a0a37 ML |
752 | } |
753 | EXPORT_SYMBOL(ww_mutex_unlock); | |
754 | ||
55f036ca PZ |
755 | |
756 | static __always_inline int __sched | |
757 | __ww_mutex_kill(struct mutex *lock, struct ww_acquire_ctx *ww_ctx) | |
758 | { | |
759 | if (ww_ctx->acquired > 0) { | |
760 | #ifdef CONFIG_DEBUG_MUTEXES | |
761 | struct ww_mutex *ww; | |
762 | ||
763 | ww = container_of(lock, struct ww_mutex, base); | |
764 | DEBUG_LOCKS_WARN_ON(ww_ctx->contending_lock); | |
765 | ww_ctx->contending_lock = ww; | |
766 | #endif | |
767 | return -EDEADLK; | |
768 | } | |
769 | ||
770 | return 0; | |
771 | } | |
772 | ||
773 | ||
774 | /* | |
08295b3b TH |
775 | * Check the wound condition for the current lock acquire. |
776 | * | |
777 | * Wound-Wait: If we're wounded, kill ourself. | |
55f036ca PZ |
778 | * |
779 | * Wait-Die: If we're trying to acquire a lock already held by an older | |
780 | * context, kill ourselves. | |
781 | * | |
782 | * Since __ww_mutex_add_waiter() orders the wait-list on stamp, we only have to | |
783 | * look at waiters before us in the wait-list. | |
784 | */ | |
040a0a37 | 785 | static inline int __sched |
55f036ca PZ |
786 | __ww_mutex_check_kill(struct mutex *lock, struct mutex_waiter *waiter, |
787 | struct ww_acquire_ctx *ctx) | |
040a0a37 ML |
788 | { |
789 | struct ww_mutex *ww = container_of(lock, struct ww_mutex, base); | |
4d3199e4 | 790 | struct ww_acquire_ctx *hold_ctx = READ_ONCE(ww->ctx); |
200b1874 | 791 | struct mutex_waiter *cur; |
040a0a37 | 792 | |
55f036ca PZ |
793 | if (ctx->acquired == 0) |
794 | return 0; | |
795 | ||
08295b3b TH |
796 | if (!ctx->is_wait_die) { |
797 | if (ctx->wounded) | |
798 | return __ww_mutex_kill(lock, ctx); | |
799 | ||
800 | return 0; | |
801 | } | |
802 | ||
200b1874 | 803 | if (hold_ctx && __ww_ctx_stamp_after(ctx, hold_ctx)) |
55f036ca | 804 | return __ww_mutex_kill(lock, ctx); |
040a0a37 | 805 | |
200b1874 NH |
806 | /* |
807 | * If there is a waiter in front of us that has a context, then its | |
55f036ca | 808 | * stamp is earlier than ours and we must kill ourself. |
200b1874 NH |
809 | */ |
810 | cur = waiter; | |
811 | list_for_each_entry_continue_reverse(cur, &lock->wait_list, list) { | |
55f036ca PZ |
812 | if (!cur->ww_ctx) |
813 | continue; | |
814 | ||
815 | return __ww_mutex_kill(lock, ctx); | |
040a0a37 ML |
816 | } |
817 | ||
818 | return 0; | |
819 | } | |
820 | ||
55f036ca PZ |
821 | /* |
822 | * Add @waiter to the wait-list, keep the wait-list ordered by stamp, smallest | |
823 | * first. Such that older contexts are preferred to acquire the lock over | |
824 | * younger contexts. | |
825 | * | |
826 | * Waiters without context are interspersed in FIFO order. | |
827 | * | |
828 | * Furthermore, for Wait-Die kill ourself immediately when possible (there are | |
08295b3b TH |
829 | * older contexts already waiting) to avoid unnecessary waiting and for |
830 | * Wound-Wait ensure we wound the owning context when it is younger. | |
55f036ca | 831 | */ |
6baa5c60 NH |
832 | static inline int __sched |
833 | __ww_mutex_add_waiter(struct mutex_waiter *waiter, | |
834 | struct mutex *lock, | |
835 | struct ww_acquire_ctx *ww_ctx) | |
836 | { | |
837 | struct mutex_waiter *cur; | |
838 | struct list_head *pos; | |
08295b3b | 839 | bool is_wait_die; |
6baa5c60 NH |
840 | |
841 | if (!ww_ctx) { | |
08295b3b | 842 | __mutex_add_waiter(lock, waiter, &lock->wait_list); |
040a0a37 | 843 | return 0; |
6baa5c60 | 844 | } |
040a0a37 | 845 | |
08295b3b TH |
846 | is_wait_die = ww_ctx->is_wait_die; |
847 | ||
6baa5c60 NH |
848 | /* |
849 | * Add the waiter before the first waiter with a higher stamp. | |
850 | * Waiters without a context are skipped to avoid starving | |
08295b3b TH |
851 | * them. Wait-Die waiters may die here. Wound-Wait waiters |
852 | * never die here, but they are sorted in stamp order and | |
853 | * may wound the lock holder. | |
6baa5c60 NH |
854 | */ |
855 | pos = &lock->wait_list; | |
856 | list_for_each_entry_reverse(cur, &lock->wait_list, list) { | |
857 | if (!cur->ww_ctx) | |
858 | continue; | |
859 | ||
860 | if (__ww_ctx_stamp_after(ww_ctx, cur->ww_ctx)) { | |
55f036ca PZ |
861 | /* |
862 | * Wait-Die: if we find an older context waiting, there | |
863 | * is no point in queueing behind it, as we'd have to | |
864 | * die the moment it would acquire the lock. | |
865 | */ | |
08295b3b TH |
866 | if (is_wait_die) { |
867 | int ret = __ww_mutex_kill(lock, ww_ctx); | |
6baa5c60 | 868 | |
08295b3b TH |
869 | if (ret) |
870 | return ret; | |
871 | } | |
6baa5c60 NH |
872 | |
873 | break; | |
874 | } | |
875 | ||
876 | pos = &cur->list; | |
200b1874 | 877 | |
55f036ca PZ |
878 | /* Wait-Die: ensure younger waiters die. */ |
879 | __ww_mutex_die(lock, cur, ww_ctx); | |
040a0a37 ML |
880 | } |
881 | ||
08295b3b TH |
882 | __mutex_add_waiter(lock, waiter, pos); |
883 | ||
884 | /* | |
885 | * Wound-Wait: if we're blocking on a mutex owned by a younger context, | |
886 | * wound that such that we might proceed. | |
887 | */ | |
888 | if (!is_wait_die) { | |
889 | struct ww_mutex *ww = container_of(lock, struct ww_mutex, base); | |
890 | ||
891 | /* | |
892 | * See ww_mutex_set_context_fastpath(). Orders setting | |
893 | * MUTEX_FLAG_WAITERS vs the ww->ctx load, | |
894 | * such that either we or the fastpath will wound @ww->ctx. | |
895 | */ | |
896 | smp_mb(); | |
897 | __ww_mutex_wound(lock, ww_ctx, ww->ctx); | |
898 | } | |
55f036ca | 899 | |
040a0a37 ML |
900 | return 0; |
901 | } | |
902 | ||
6053ee3b IM |
903 | /* |
904 | * Lock a mutex (possibly interruptible), slowpath: | |
905 | */ | |
040a0a37 | 906 | static __always_inline int __sched |
e4564f79 | 907 | __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, |
040a0a37 | 908 | struct lockdep_map *nest_lock, unsigned long ip, |
b0267507 | 909 | struct ww_acquire_ctx *ww_ctx, const bool use_ww_ctx) |
6053ee3b | 910 | { |
6053ee3b | 911 | struct mutex_waiter waiter; |
9d659ae1 | 912 | bool first = false; |
a40ca565 | 913 | struct ww_mutex *ww; |
040a0a37 | 914 | int ret; |
6053ee3b | 915 | |
5de2055d WL |
916 | if (!use_ww_ctx) |
917 | ww_ctx = NULL; | |
918 | ||
427b1820 | 919 | might_sleep(); |
ea9e0fb8 | 920 | |
6c11c6e3 SAS |
921 | #ifdef CONFIG_DEBUG_MUTEXES |
922 | DEBUG_LOCKS_WARN_ON(lock->magic != lock); | |
923 | #endif | |
924 | ||
427b1820 | 925 | ww = container_of(lock, struct ww_mutex, base); |
5de2055d | 926 | if (ww_ctx) { |
0422e83d CW |
927 | if (unlikely(ww_ctx == READ_ONCE(ww->ctx))) |
928 | return -EALREADY; | |
08295b3b TH |
929 | |
930 | /* | |
931 | * Reset the wounded flag after a kill. No other process can | |
932 | * race and wound us here since they can't have a valid owner | |
933 | * pointer if we don't have any locks held. | |
934 | */ | |
935 | if (ww_ctx->acquired == 0) | |
936 | ww_ctx->wounded = 0; | |
0422e83d CW |
937 | } |
938 | ||
41719b03 | 939 | preempt_disable(); |
e4c70a66 | 940 | mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip); |
c0226027 | 941 | |
e274795e | 942 | if (__mutex_trylock(lock) || |
5de2055d | 943 | mutex_optimistic_spin(lock, ww_ctx, NULL)) { |
76916515 | 944 | /* got the lock, yay! */ |
3ca0ff57 | 945 | lock_acquired(&lock->dep_map, ip); |
5de2055d | 946 | if (ww_ctx) |
3ca0ff57 | 947 | ww_mutex_set_context_fastpath(ww, ww_ctx); |
76916515 DB |
948 | preempt_enable(); |
949 | return 0; | |
0d66bf6d | 950 | } |
76916515 | 951 | |
b9c16a0e | 952 | spin_lock(&lock->wait_lock); |
1e820c96 | 953 | /* |
3ca0ff57 | 954 | * After waiting to acquire the wait_lock, try again. |
1e820c96 | 955 | */ |
659cf9f5 | 956 | if (__mutex_trylock(lock)) { |
5de2055d | 957 | if (ww_ctx) |
55f036ca | 958 | __ww_mutex_check_waiters(lock, ww_ctx); |
659cf9f5 | 959 | |
ec83f425 | 960 | goto skip_wait; |
659cf9f5 | 961 | } |
ec83f425 | 962 | |
9a11b49a | 963 | debug_mutex_lock_common(lock, &waiter); |
6053ee3b | 964 | |
6baa5c60 NH |
965 | lock_contended(&lock->dep_map, ip); |
966 | ||
967 | if (!use_ww_ctx) { | |
968 | /* add waiting tasks to the end of the waitqueue (FIFO): */ | |
08295b3b TH |
969 | __mutex_add_waiter(lock, &waiter, &lock->wait_list); |
970 | ||
977625a6 NH |
971 | |
972 | #ifdef CONFIG_DEBUG_MUTEXES | |
973 | waiter.ww_ctx = MUTEX_POISON_WW_CTX; | |
974 | #endif | |
6baa5c60 | 975 | } else { |
55f036ca PZ |
976 | /* |
977 | * Add in stamp order, waking up waiters that must kill | |
978 | * themselves. | |
979 | */ | |
6baa5c60 NH |
980 | ret = __ww_mutex_add_waiter(&waiter, lock, ww_ctx); |
981 | if (ret) | |
55f036ca | 982 | goto err_early_kill; |
6baa5c60 NH |
983 | |
984 | waiter.ww_ctx = ww_ctx; | |
985 | } | |
986 | ||
d269a8b8 | 987 | waiter.task = current; |
6053ee3b | 988 | |
642fa448 | 989 | set_current_state(state); |
6053ee3b | 990 | for (;;) { |
5bbd7e64 PZ |
991 | /* |
992 | * Once we hold wait_lock, we're serialized against | |
993 | * mutex_unlock() handing the lock off to us, do a trylock | |
994 | * before testing the error conditions to make sure we pick up | |
995 | * the handoff. | |
996 | */ | |
e274795e | 997 | if (__mutex_trylock(lock)) |
5bbd7e64 | 998 | goto acquired; |
6053ee3b IM |
999 | |
1000 | /* | |
55f036ca | 1001 | * Check for signals and kill conditions while holding |
5bbd7e64 PZ |
1002 | * wait_lock. This ensures the lock cancellation is ordered |
1003 | * against mutex_unlock() and wake-ups do not go missing. | |
6053ee3b | 1004 | */ |
3bb5f4ac | 1005 | if (signal_pending_state(state, current)) { |
040a0a37 ML |
1006 | ret = -EINTR; |
1007 | goto err; | |
1008 | } | |
6053ee3b | 1009 | |
5de2055d | 1010 | if (ww_ctx) { |
55f036ca | 1011 | ret = __ww_mutex_check_kill(lock, &waiter, ww_ctx); |
040a0a37 ML |
1012 | if (ret) |
1013 | goto err; | |
6053ee3b | 1014 | } |
040a0a37 | 1015 | |
b9c16a0e | 1016 | spin_unlock(&lock->wait_lock); |
bd2f5536 | 1017 | schedule_preempt_disabled(); |
9d659ae1 | 1018 | |
6baa5c60 NH |
1019 | /* |
1020 | * ww_mutex needs to always recheck its position since its waiter | |
1021 | * list is not FIFO ordered. | |
1022 | */ | |
5de2055d | 1023 | if (ww_ctx || !first) { |
6baa5c60 NH |
1024 | first = __mutex_waiter_is_first(lock, &waiter); |
1025 | if (first) | |
1026 | __mutex_set_flag(lock, MUTEX_FLAG_HANDOFF); | |
9d659ae1 | 1027 | } |
5bbd7e64 | 1028 | |
642fa448 | 1029 | set_current_state(state); |
5bbd7e64 PZ |
1030 | /* |
1031 | * Here we order against unlock; we must either see it change | |
1032 | * state back to RUNNING and fall through the next schedule(), | |
1033 | * or we must see its unlock and acquire. | |
1034 | */ | |
e274795e | 1035 | if (__mutex_trylock(lock) || |
5de2055d | 1036 | (first && mutex_optimistic_spin(lock, ww_ctx, &waiter))) |
5bbd7e64 PZ |
1037 | break; |
1038 | ||
b9c16a0e | 1039 | spin_lock(&lock->wait_lock); |
6053ee3b | 1040 | } |
b9c16a0e | 1041 | spin_lock(&lock->wait_lock); |
5bbd7e64 | 1042 | acquired: |
642fa448 | 1043 | __set_current_state(TASK_RUNNING); |
51587bcf | 1044 | |
5de2055d | 1045 | if (ww_ctx) { |
08295b3b TH |
1046 | /* |
1047 | * Wound-Wait; we stole the lock (!first_waiter), check the | |
1048 | * waiters as anyone might want to wound us. | |
1049 | */ | |
1050 | if (!ww_ctx->is_wait_die && | |
1051 | !__mutex_waiter_is_first(lock, &waiter)) | |
1052 | __ww_mutex_check_waiters(lock, ww_ctx); | |
1053 | } | |
1054 | ||
d269a8b8 | 1055 | mutex_remove_waiter(lock, &waiter, current); |
ec83f425 | 1056 | if (likely(list_empty(&lock->wait_list))) |
9d659ae1 | 1057 | __mutex_clear_flag(lock, MUTEX_FLAGS); |
3ca0ff57 | 1058 | |
ec83f425 | 1059 | debug_mutex_free_waiter(&waiter); |
6053ee3b | 1060 | |
ec83f425 DB |
1061 | skip_wait: |
1062 | /* got the lock - cleanup and rejoice! */ | |
c7e78cff | 1063 | lock_acquired(&lock->dep_map, ip); |
6053ee3b | 1064 | |
5de2055d | 1065 | if (ww_ctx) |
55f036ca | 1066 | ww_mutex_lock_acquired(ww, ww_ctx); |
040a0a37 | 1067 | |
b9c16a0e | 1068 | spin_unlock(&lock->wait_lock); |
41719b03 | 1069 | preempt_enable(); |
6053ee3b | 1070 | return 0; |
040a0a37 ML |
1071 | |
1072 | err: | |
642fa448 | 1073 | __set_current_state(TASK_RUNNING); |
d269a8b8 | 1074 | mutex_remove_waiter(lock, &waiter, current); |
55f036ca | 1075 | err_early_kill: |
b9c16a0e | 1076 | spin_unlock(&lock->wait_lock); |
040a0a37 | 1077 | debug_mutex_free_waiter(&waiter); |
5facae4f | 1078 | mutex_release(&lock->dep_map, ip); |
040a0a37 ML |
1079 | preempt_enable(); |
1080 | return ret; | |
6053ee3b IM |
1081 | } |
1082 | ||
427b1820 PZ |
1083 | static int __sched |
1084 | __mutex_lock(struct mutex *lock, long state, unsigned int subclass, | |
1085 | struct lockdep_map *nest_lock, unsigned long ip) | |
1086 | { | |
1087 | return __mutex_lock_common(lock, state, subclass, nest_lock, ip, NULL, false); | |
1088 | } | |
1089 | ||
1090 | static int __sched | |
1091 | __ww_mutex_lock(struct mutex *lock, long state, unsigned int subclass, | |
1092 | struct lockdep_map *nest_lock, unsigned long ip, | |
1093 | struct ww_acquire_ctx *ww_ctx) | |
1094 | { | |
1095 | return __mutex_lock_common(lock, state, subclass, nest_lock, ip, ww_ctx, true); | |
1096 | } | |
1097 | ||
ef5d4707 IM |
1098 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
1099 | void __sched | |
1100 | mutex_lock_nested(struct mutex *lock, unsigned int subclass) | |
1101 | { | |
427b1820 | 1102 | __mutex_lock(lock, TASK_UNINTERRUPTIBLE, subclass, NULL, _RET_IP_); |
ef5d4707 IM |
1103 | } |
1104 | ||
1105 | EXPORT_SYMBOL_GPL(mutex_lock_nested); | |
d63a5a74 | 1106 | |
e4c70a66 PZ |
1107 | void __sched |
1108 | _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest) | |
1109 | { | |
427b1820 | 1110 | __mutex_lock(lock, TASK_UNINTERRUPTIBLE, 0, nest, _RET_IP_); |
e4c70a66 | 1111 | } |
e4c70a66 PZ |
1112 | EXPORT_SYMBOL_GPL(_mutex_lock_nest_lock); |
1113 | ||
ad776537 LH |
1114 | int __sched |
1115 | mutex_lock_killable_nested(struct mutex *lock, unsigned int subclass) | |
1116 | { | |
427b1820 | 1117 | return __mutex_lock(lock, TASK_KILLABLE, subclass, NULL, _RET_IP_); |
ad776537 LH |
1118 | } |
1119 | EXPORT_SYMBOL_GPL(mutex_lock_killable_nested); | |
1120 | ||
d63a5a74 N |
1121 | int __sched |
1122 | mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass) | |
1123 | { | |
427b1820 | 1124 | return __mutex_lock(lock, TASK_INTERRUPTIBLE, subclass, NULL, _RET_IP_); |
d63a5a74 | 1125 | } |
d63a5a74 | 1126 | EXPORT_SYMBOL_GPL(mutex_lock_interruptible_nested); |
040a0a37 | 1127 | |
1460cb65 TH |
1128 | void __sched |
1129 | mutex_lock_io_nested(struct mutex *lock, unsigned int subclass) | |
1130 | { | |
1131 | int token; | |
1132 | ||
1133 | might_sleep(); | |
1134 | ||
1135 | token = io_schedule_prepare(); | |
1136 | __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, | |
1137 | subclass, NULL, _RET_IP_, NULL, 0); | |
1138 | io_schedule_finish(token); | |
1139 | } | |
1140 | EXPORT_SYMBOL_GPL(mutex_lock_io_nested); | |
1141 | ||
23010027 DV |
1142 | static inline int |
1143 | ww_mutex_deadlock_injection(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) | |
1144 | { | |
1145 | #ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH | |
1146 | unsigned tmp; | |
1147 | ||
1148 | if (ctx->deadlock_inject_countdown-- == 0) { | |
1149 | tmp = ctx->deadlock_inject_interval; | |
1150 | if (tmp > UINT_MAX/4) | |
1151 | tmp = UINT_MAX; | |
1152 | else | |
1153 | tmp = tmp*2 + tmp + tmp/2; | |
1154 | ||
1155 | ctx->deadlock_inject_interval = tmp; | |
1156 | ctx->deadlock_inject_countdown = tmp; | |
1157 | ctx->contending_lock = lock; | |
1158 | ||
1159 | ww_mutex_unlock(lock); | |
1160 | ||
1161 | return -EDEADLK; | |
1162 | } | |
1163 | #endif | |
1164 | ||
1165 | return 0; | |
1166 | } | |
040a0a37 ML |
1167 | |
1168 | int __sched | |
c5470b22 | 1169 | ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) |
040a0a37 | 1170 | { |
23010027 DV |
1171 | int ret; |
1172 | ||
040a0a37 | 1173 | might_sleep(); |
427b1820 PZ |
1174 | ret = __ww_mutex_lock(&lock->base, TASK_UNINTERRUPTIBLE, |
1175 | 0, ctx ? &ctx->dep_map : NULL, _RET_IP_, | |
1176 | ctx); | |
ea9e0fb8 | 1177 | if (!ret && ctx && ctx->acquired > 1) |
23010027 DV |
1178 | return ww_mutex_deadlock_injection(lock, ctx); |
1179 | ||
1180 | return ret; | |
040a0a37 | 1181 | } |
c5470b22 | 1182 | EXPORT_SYMBOL_GPL(ww_mutex_lock); |
040a0a37 ML |
1183 | |
1184 | int __sched | |
c5470b22 | 1185 | ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) |
040a0a37 | 1186 | { |
23010027 DV |
1187 | int ret; |
1188 | ||
040a0a37 | 1189 | might_sleep(); |
427b1820 PZ |
1190 | ret = __ww_mutex_lock(&lock->base, TASK_INTERRUPTIBLE, |
1191 | 0, ctx ? &ctx->dep_map : NULL, _RET_IP_, | |
1192 | ctx); | |
23010027 | 1193 | |
ea9e0fb8 | 1194 | if (!ret && ctx && ctx->acquired > 1) |
23010027 DV |
1195 | return ww_mutex_deadlock_injection(lock, ctx); |
1196 | ||
1197 | return ret; | |
040a0a37 | 1198 | } |
c5470b22 | 1199 | EXPORT_SYMBOL_GPL(ww_mutex_lock_interruptible); |
040a0a37 | 1200 | |
ef5d4707 IM |
1201 | #endif |
1202 | ||
6053ee3b IM |
1203 | /* |
1204 | * Release the lock, slowpath: | |
1205 | */ | |
3ca0ff57 | 1206 | static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigned long ip) |
6053ee3b | 1207 | { |
9d659ae1 | 1208 | struct task_struct *next = NULL; |
194a6b5b | 1209 | DEFINE_WAKE_Q(wake_q); |
b9c16a0e | 1210 | unsigned long owner; |
6053ee3b | 1211 | |
5facae4f | 1212 | mutex_release(&lock->dep_map, ip); |
3ca0ff57 | 1213 | |
6053ee3b | 1214 | /* |
9d659ae1 PZ |
1215 | * Release the lock before (potentially) taking the spinlock such that |
1216 | * other contenders can get on with things ASAP. | |
1217 | * | |
1218 | * Except when HANDOFF, in that case we must not clear the owner field, | |
1219 | * but instead set it to the top waiter. | |
6053ee3b | 1220 | */ |
9d659ae1 PZ |
1221 | owner = atomic_long_read(&lock->owner); |
1222 | for (;;) { | |
9d659ae1 PZ |
1223 | #ifdef CONFIG_DEBUG_MUTEXES |
1224 | DEBUG_LOCKS_WARN_ON(__owner_task(owner) != current); | |
e274795e | 1225 | DEBUG_LOCKS_WARN_ON(owner & MUTEX_FLAG_PICKUP); |
9d659ae1 PZ |
1226 | #endif |
1227 | ||
1228 | if (owner & MUTEX_FLAG_HANDOFF) | |
1229 | break; | |
1230 | ||
ab4e4d9f | 1231 | if (atomic_long_try_cmpxchg_release(&lock->owner, &owner, __owner_flags(owner))) { |
9d659ae1 PZ |
1232 | if (owner & MUTEX_FLAG_WAITERS) |
1233 | break; | |
1234 | ||
1235 | return; | |
1236 | } | |
9d659ae1 | 1237 | } |
6053ee3b | 1238 | |
b9c16a0e | 1239 | spin_lock(&lock->wait_lock); |
1d8fe7dc | 1240 | debug_mutex_unlock(lock); |
6053ee3b IM |
1241 | if (!list_empty(&lock->wait_list)) { |
1242 | /* get the first entry from the wait-list: */ | |
1243 | struct mutex_waiter *waiter = | |
9d659ae1 PZ |
1244 | list_first_entry(&lock->wait_list, |
1245 | struct mutex_waiter, list); | |
1246 | ||
1247 | next = waiter->task; | |
6053ee3b IM |
1248 | |
1249 | debug_mutex_wake_waiter(lock, waiter); | |
9d659ae1 | 1250 | wake_q_add(&wake_q, next); |
6053ee3b IM |
1251 | } |
1252 | ||
9d659ae1 PZ |
1253 | if (owner & MUTEX_FLAG_HANDOFF) |
1254 | __mutex_handoff(lock, next); | |
1255 | ||
b9c16a0e | 1256 | spin_unlock(&lock->wait_lock); |
9d659ae1 | 1257 | |
1329ce6f | 1258 | wake_up_q(&wake_q); |
6053ee3b IM |
1259 | } |
1260 | ||
e4564f79 | 1261 | #ifndef CONFIG_DEBUG_LOCK_ALLOC |
6053ee3b IM |
1262 | /* |
1263 | * Here come the less common (and hence less performance-critical) APIs: | |
1264 | * mutex_lock_interruptible() and mutex_trylock(). | |
1265 | */ | |
7ad5b3a5 | 1266 | static noinline int __sched |
a41b56ef | 1267 | __mutex_lock_killable_slowpath(struct mutex *lock); |
ad776537 | 1268 | |
7ad5b3a5 | 1269 | static noinline int __sched |
a41b56ef | 1270 | __mutex_lock_interruptible_slowpath(struct mutex *lock); |
6053ee3b | 1271 | |
ef5dc121 | 1272 | /** |
45dbac0e MW |
1273 | * mutex_lock_interruptible() - Acquire the mutex, interruptible by signals. |
1274 | * @lock: The mutex to be acquired. | |
6053ee3b | 1275 | * |
45dbac0e MW |
1276 | * Lock the mutex like mutex_lock(). If a signal is delivered while the |
1277 | * process is sleeping, this function will return without acquiring the | |
1278 | * mutex. | |
6053ee3b | 1279 | * |
45dbac0e MW |
1280 | * Context: Process context. |
1281 | * Return: 0 if the lock was successfully acquired or %-EINTR if a | |
1282 | * signal arrived. | |
6053ee3b | 1283 | */ |
7ad5b3a5 | 1284 | int __sched mutex_lock_interruptible(struct mutex *lock) |
6053ee3b | 1285 | { |
c544bdb1 | 1286 | might_sleep(); |
3ca0ff57 PZ |
1287 | |
1288 | if (__mutex_trylock_fast(lock)) | |
a41b56ef | 1289 | return 0; |
3ca0ff57 PZ |
1290 | |
1291 | return __mutex_lock_interruptible_slowpath(lock); | |
6053ee3b IM |
1292 | } |
1293 | ||
1294 | EXPORT_SYMBOL(mutex_lock_interruptible); | |
1295 | ||
45dbac0e MW |
1296 | /** |
1297 | * mutex_lock_killable() - Acquire the mutex, interruptible by fatal signals. | |
1298 | * @lock: The mutex to be acquired. | |
1299 | * | |
1300 | * Lock the mutex like mutex_lock(). If a signal which will be fatal to | |
1301 | * the current process is delivered while the process is sleeping, this | |
1302 | * function will return without acquiring the mutex. | |
1303 | * | |
1304 | * Context: Process context. | |
1305 | * Return: 0 if the lock was successfully acquired or %-EINTR if a | |
1306 | * fatal signal arrived. | |
1307 | */ | |
7ad5b3a5 | 1308 | int __sched mutex_lock_killable(struct mutex *lock) |
ad776537 LH |
1309 | { |
1310 | might_sleep(); | |
3ca0ff57 PZ |
1311 | |
1312 | if (__mutex_trylock_fast(lock)) | |
a41b56ef | 1313 | return 0; |
3ca0ff57 PZ |
1314 | |
1315 | return __mutex_lock_killable_slowpath(lock); | |
ad776537 LH |
1316 | } |
1317 | EXPORT_SYMBOL(mutex_lock_killable); | |
1318 | ||
45dbac0e MW |
1319 | /** |
1320 | * mutex_lock_io() - Acquire the mutex and mark the process as waiting for I/O | |
1321 | * @lock: The mutex to be acquired. | |
1322 | * | |
1323 | * Lock the mutex like mutex_lock(). While the task is waiting for this | |
1324 | * mutex, it will be accounted as being in the IO wait state by the | |
1325 | * scheduler. | |
1326 | * | |
1327 | * Context: Process context. | |
1328 | */ | |
1460cb65 TH |
1329 | void __sched mutex_lock_io(struct mutex *lock) |
1330 | { | |
1331 | int token; | |
1332 | ||
1333 | token = io_schedule_prepare(); | |
1334 | mutex_lock(lock); | |
1335 | io_schedule_finish(token); | |
1336 | } | |
1337 | EXPORT_SYMBOL_GPL(mutex_lock_io); | |
1338 | ||
3ca0ff57 PZ |
1339 | static noinline void __sched |
1340 | __mutex_lock_slowpath(struct mutex *lock) | |
e4564f79 | 1341 | { |
427b1820 | 1342 | __mutex_lock(lock, TASK_UNINTERRUPTIBLE, 0, NULL, _RET_IP_); |
e4564f79 PZ |
1343 | } |
1344 | ||
7ad5b3a5 | 1345 | static noinline int __sched |
a41b56ef | 1346 | __mutex_lock_killable_slowpath(struct mutex *lock) |
ad776537 | 1347 | { |
427b1820 | 1348 | return __mutex_lock(lock, TASK_KILLABLE, 0, NULL, _RET_IP_); |
ad776537 LH |
1349 | } |
1350 | ||
7ad5b3a5 | 1351 | static noinline int __sched |
a41b56ef | 1352 | __mutex_lock_interruptible_slowpath(struct mutex *lock) |
6053ee3b | 1353 | { |
427b1820 | 1354 | return __mutex_lock(lock, TASK_INTERRUPTIBLE, 0, NULL, _RET_IP_); |
040a0a37 ML |
1355 | } |
1356 | ||
1357 | static noinline int __sched | |
1358 | __ww_mutex_lock_slowpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) | |
1359 | { | |
427b1820 PZ |
1360 | return __ww_mutex_lock(&lock->base, TASK_UNINTERRUPTIBLE, 0, NULL, |
1361 | _RET_IP_, ctx); | |
6053ee3b | 1362 | } |
040a0a37 ML |
1363 | |
1364 | static noinline int __sched | |
1365 | __ww_mutex_lock_interruptible_slowpath(struct ww_mutex *lock, | |
1366 | struct ww_acquire_ctx *ctx) | |
1367 | { | |
427b1820 PZ |
1368 | return __ww_mutex_lock(&lock->base, TASK_INTERRUPTIBLE, 0, NULL, |
1369 | _RET_IP_, ctx); | |
040a0a37 ML |
1370 | } |
1371 | ||
e4564f79 | 1372 | #endif |
6053ee3b | 1373 | |
ef5dc121 RD |
1374 | /** |
1375 | * mutex_trylock - try to acquire the mutex, without waiting | |
6053ee3b IM |
1376 | * @lock: the mutex to be acquired |
1377 | * | |
1378 | * Try to acquire the mutex atomically. Returns 1 if the mutex | |
1379 | * has been acquired successfully, and 0 on contention. | |
1380 | * | |
1381 | * NOTE: this function follows the spin_trylock() convention, so | |
ef5dc121 | 1382 | * it is negated from the down_trylock() return values! Be careful |
6053ee3b IM |
1383 | * about this when converting semaphore users to mutexes. |
1384 | * | |
1385 | * This function must not be used in interrupt context. The | |
1386 | * mutex must be released by the same task that acquired it. | |
1387 | */ | |
7ad5b3a5 | 1388 | int __sched mutex_trylock(struct mutex *lock) |
6053ee3b | 1389 | { |
6c11c6e3 SAS |
1390 | bool locked; |
1391 | ||
1392 | #ifdef CONFIG_DEBUG_MUTEXES | |
1393 | DEBUG_LOCKS_WARN_ON(lock->magic != lock); | |
1394 | #endif | |
0d66bf6d | 1395 | |
6c11c6e3 | 1396 | locked = __mutex_trylock(lock); |
3ca0ff57 PZ |
1397 | if (locked) |
1398 | mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_); | |
0d66bf6d | 1399 | |
3ca0ff57 | 1400 | return locked; |
6053ee3b | 1401 | } |
6053ee3b | 1402 | EXPORT_SYMBOL(mutex_trylock); |
a511e3f9 | 1403 | |
040a0a37 ML |
1404 | #ifndef CONFIG_DEBUG_LOCK_ALLOC |
1405 | int __sched | |
c5470b22 | 1406 | ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) |
040a0a37 | 1407 | { |
040a0a37 ML |
1408 | might_sleep(); |
1409 | ||
3ca0ff57 | 1410 | if (__mutex_trylock_fast(&lock->base)) { |
ea9e0fb8 NH |
1411 | if (ctx) |
1412 | ww_mutex_set_context_fastpath(lock, ctx); | |
3ca0ff57 PZ |
1413 | return 0; |
1414 | } | |
1415 | ||
1416 | return __ww_mutex_lock_slowpath(lock, ctx); | |
040a0a37 | 1417 | } |
c5470b22 | 1418 | EXPORT_SYMBOL(ww_mutex_lock); |
040a0a37 ML |
1419 | |
1420 | int __sched | |
c5470b22 | 1421 | ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) |
040a0a37 | 1422 | { |
040a0a37 ML |
1423 | might_sleep(); |
1424 | ||
3ca0ff57 | 1425 | if (__mutex_trylock_fast(&lock->base)) { |
ea9e0fb8 NH |
1426 | if (ctx) |
1427 | ww_mutex_set_context_fastpath(lock, ctx); | |
3ca0ff57 PZ |
1428 | return 0; |
1429 | } | |
1430 | ||
1431 | return __ww_mutex_lock_interruptible_slowpath(lock, ctx); | |
040a0a37 | 1432 | } |
c5470b22 | 1433 | EXPORT_SYMBOL(ww_mutex_lock_interruptible); |
040a0a37 ML |
1434 | |
1435 | #endif | |
1436 | ||
a511e3f9 AM |
1437 | /** |
1438 | * atomic_dec_and_mutex_lock - return holding mutex if we dec to 0 | |
1439 | * @cnt: the atomic which we are to dec | |
1440 | * @lock: the mutex to return holding if we dec to 0 | |
1441 | * | |
1442 | * return true and hold lock if we dec to 0, return false otherwise | |
1443 | */ | |
1444 | int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock) | |
1445 | { | |
1446 | /* dec if we can't possibly hit 0 */ | |
1447 | if (atomic_add_unless(cnt, -1, 1)) | |
1448 | return 0; | |
1449 | /* we might hit 0, so take the lock */ | |
1450 | mutex_lock(lock); | |
1451 | if (!atomic_dec_and_test(cnt)) { | |
1452 | /* when we actually did the dec, we didn't hit 0 */ | |
1453 | mutex_unlock(lock); | |
1454 | return 0; | |
1455 | } | |
1456 | /* we hit 0, and we hold the lock */ | |
1457 | return 1; | |
1458 | } | |
1459 | EXPORT_SYMBOL(atomic_dec_and_mutex_lock); |