Commit | Line | Data |
---|---|---|
85dc28fa PZ |
1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | ||
3 | #include <linux/slab.h> | |
4 | #include <linux/sched/task.h> | |
5 | ||
6 | #include "futex.h" | |
7 | #include "../locking/rtmutex_common.h" | |
8 | ||
9 | /* | |
10 | * PI code: | |
11 | */ | |
12 | int refill_pi_state_cache(void) | |
13 | { | |
14 | struct futex_pi_state *pi_state; | |
15 | ||
16 | if (likely(current->pi_state_cache)) | |
17 | return 0; | |
18 | ||
19 | pi_state = kzalloc(sizeof(*pi_state), GFP_KERNEL); | |
20 | ||
21 | if (!pi_state) | |
22 | return -ENOMEM; | |
23 | ||
24 | INIT_LIST_HEAD(&pi_state->list); | |
25 | /* pi_mutex gets initialized later */ | |
26 | pi_state->owner = NULL; | |
27 | refcount_set(&pi_state->refcount, 1); | |
28 | pi_state->key = FUTEX_KEY_INIT; | |
29 | ||
30 | current->pi_state_cache = pi_state; | |
31 | ||
32 | return 0; | |
33 | } | |
34 | ||
35 | static struct futex_pi_state *alloc_pi_state(void) | |
36 | { | |
37 | struct futex_pi_state *pi_state = current->pi_state_cache; | |
38 | ||
39 | WARN_ON(!pi_state); | |
40 | current->pi_state_cache = NULL; | |
41 | ||
42 | return pi_state; | |
43 | } | |
44 | ||
45 | static void pi_state_update_owner(struct futex_pi_state *pi_state, | |
46 | struct task_struct *new_owner) | |
47 | { | |
48 | struct task_struct *old_owner = pi_state->owner; | |
49 | ||
50 | lockdep_assert_held(&pi_state->pi_mutex.wait_lock); | |
51 | ||
52 | if (old_owner) { | |
53 | raw_spin_lock(&old_owner->pi_lock); | |
54 | WARN_ON(list_empty(&pi_state->list)); | |
55 | list_del_init(&pi_state->list); | |
56 | raw_spin_unlock(&old_owner->pi_lock); | |
57 | } | |
58 | ||
59 | if (new_owner) { | |
60 | raw_spin_lock(&new_owner->pi_lock); | |
61 | WARN_ON(!list_empty(&pi_state->list)); | |
62 | list_add(&pi_state->list, &new_owner->pi_state_list); | |
63 | pi_state->owner = new_owner; | |
64 | raw_spin_unlock(&new_owner->pi_lock); | |
65 | } | |
66 | } | |
67 | ||
68 | void get_pi_state(struct futex_pi_state *pi_state) | |
69 | { | |
70 | WARN_ON_ONCE(!refcount_inc_not_zero(&pi_state->refcount)); | |
71 | } | |
72 | ||
73 | /* | |
74 | * Drops a reference to the pi_state object and frees or caches it | |
75 | * when the last reference is gone. | |
76 | */ | |
77 | void put_pi_state(struct futex_pi_state *pi_state) | |
78 | { | |
79 | if (!pi_state) | |
80 | return; | |
81 | ||
82 | if (!refcount_dec_and_test(&pi_state->refcount)) | |
83 | return; | |
84 | ||
85 | /* | |
86 | * If pi_state->owner is NULL, the owner is most probably dying | |
87 | * and has cleaned up the pi_state already | |
88 | */ | |
89 | if (pi_state->owner) { | |
90 | unsigned long flags; | |
91 | ||
92 | raw_spin_lock_irqsave(&pi_state->pi_mutex.wait_lock, flags); | |
93 | pi_state_update_owner(pi_state, NULL); | |
94 | rt_mutex_proxy_unlock(&pi_state->pi_mutex); | |
95 | raw_spin_unlock_irqrestore(&pi_state->pi_mutex.wait_lock, flags); | |
96 | } | |
97 | ||
98 | if (current->pi_state_cache) { | |
99 | kfree(pi_state); | |
100 | } else { | |
101 | /* | |
102 | * pi_state->list is already empty. | |
103 | * clear pi_state->owner. | |
104 | * refcount is at 0 - put it back to 1. | |
105 | */ | |
106 | pi_state->owner = NULL; | |
107 | refcount_set(&pi_state->refcount, 1); | |
108 | current->pi_state_cache = pi_state; | |
109 | } | |
110 | } | |
111 | ||
112 | /* | |
113 | * We need to check the following states: | |
114 | * | |
115 | * Waiter | pi_state | pi->owner | uTID | uODIED | ? | |
116 | * | |
117 | * [1] NULL | --- | --- | 0 | 0/1 | Valid | |
118 | * [2] NULL | --- | --- | >0 | 0/1 | Valid | |
119 | * | |
120 | * [3] Found | NULL | -- | Any | 0/1 | Invalid | |
121 | * | |
122 | * [4] Found | Found | NULL | 0 | 1 | Valid | |
123 | * [5] Found | Found | NULL | >0 | 1 | Invalid | |
124 | * | |
125 | * [6] Found | Found | task | 0 | 1 | Valid | |
126 | * | |
127 | * [7] Found | Found | NULL | Any | 0 | Invalid | |
128 | * | |
129 | * [8] Found | Found | task | ==taskTID | 0/1 | Valid | |
130 | * [9] Found | Found | task | 0 | 0 | Invalid | |
131 | * [10] Found | Found | task | !=taskTID | 0/1 | Invalid | |
132 | * | |
133 | * [1] Indicates that the kernel can acquire the futex atomically. We | |
134 | * came here due to a stale FUTEX_WAITERS/FUTEX_OWNER_DIED bit. | |
135 | * | |
136 | * [2] Valid, if TID does not belong to a kernel thread. If no matching | |
137 | * thread is found then it indicates that the owner TID has died. | |
138 | * | |
139 | * [3] Invalid. The waiter is queued on a non PI futex | |
140 | * | |
141 | * [4] Valid state after exit_robust_list(), which sets the user space | |
142 | * value to FUTEX_WAITERS | FUTEX_OWNER_DIED. | |
143 | * | |
144 | * [5] The user space value got manipulated between exit_robust_list() | |
145 | * and exit_pi_state_list() | |
146 | * | |
147 | * [6] Valid state after exit_pi_state_list() which sets the new owner in | |
148 | * the pi_state but cannot access the user space value. | |
149 | * | |
150 | * [7] pi_state->owner can only be NULL when the OWNER_DIED bit is set. | |
151 | * | |
152 | * [8] Owner and user space value match | |
153 | * | |
154 | * [9] There is no transient state which sets the user space TID to 0 | |
155 | * except exit_robust_list(), but this is indicated by the | |
156 | * FUTEX_OWNER_DIED bit. See [4] | |
157 | * | |
158 | * [10] There is no transient state which leaves owner and user space | |
159 | * TID out of sync. Except one error case where the kernel is denied | |
160 | * write access to the user address, see fixup_pi_state_owner(). | |
161 | * | |
162 | * | |
163 | * Serialization and lifetime rules: | |
164 | * | |
165 | * hb->lock: | |
166 | * | |
167 | * hb -> futex_q, relation | |
168 | * futex_q -> pi_state, relation | |
169 | * | |
170 | * (cannot be raw because hb can contain arbitrary amount | |
171 | * of futex_q's) | |
172 | * | |
173 | * pi_mutex->wait_lock: | |
174 | * | |
175 | * {uval, pi_state} | |
176 | * | |
177 | * (and pi_mutex 'obviously') | |
178 | * | |
179 | * p->pi_lock: | |
180 | * | |
181 | * p->pi_state_list -> pi_state->list, relation | |
182 | * pi_mutex->owner -> pi_state->owner, relation | |
183 | * | |
184 | * pi_state->refcount: | |
185 | * | |
186 | * pi_state lifetime | |
187 | * | |
188 | * | |
189 | * Lock order: | |
190 | * | |
191 | * hb->lock | |
192 | * pi_mutex->wait_lock | |
193 | * p->pi_lock | |
194 | * | |
195 | */ | |
196 | ||
197 | /* | |
198 | * Validate that the existing waiter has a pi_state and sanity check | |
199 | * the pi_state against the user space value. If correct, attach to | |
200 | * it. | |
201 | */ | |
202 | static int attach_to_pi_state(u32 __user *uaddr, u32 uval, | |
203 | struct futex_pi_state *pi_state, | |
204 | struct futex_pi_state **ps) | |
205 | { | |
206 | pid_t pid = uval & FUTEX_TID_MASK; | |
207 | u32 uval2; | |
208 | int ret; | |
209 | ||
210 | /* | |
211 | * Userspace might have messed up non-PI and PI futexes [3] | |
212 | */ | |
213 | if (unlikely(!pi_state)) | |
214 | return -EINVAL; | |
215 | ||
216 | /* | |
217 | * We get here with hb->lock held, and having found a | |
218 | * futex_top_waiter(). This means that futex_lock_pi() of said futex_q | |
219 | * has dropped the hb->lock in between futex_queue() and futex_unqueue_pi(), | |
220 | * which in turn means that futex_lock_pi() still has a reference on | |
221 | * our pi_state. | |
222 | * | |
223 | * The waiter holding a reference on @pi_state also protects against | |
224 | * the unlocked put_pi_state() in futex_unlock_pi(), futex_lock_pi() | |
225 | * and futex_wait_requeue_pi() as it cannot go to 0 and consequently | |
226 | * free pi_state before we can take a reference ourselves. | |
227 | */ | |
228 | WARN_ON(!refcount_read(&pi_state->refcount)); | |
229 | ||
230 | /* | |
231 | * Now that we have a pi_state, we can acquire wait_lock | |
232 | * and do the state validation. | |
233 | */ | |
234 | raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); | |
235 | ||
236 | /* | |
237 | * Since {uval, pi_state} is serialized by wait_lock, and our current | |
238 | * uval was read without holding it, it can have changed. Verify it | |
239 | * still is what we expect it to be, otherwise retry the entire | |
240 | * operation. | |
241 | */ | |
242 | if (futex_get_value_locked(&uval2, uaddr)) | |
243 | goto out_efault; | |
244 | ||
245 | if (uval != uval2) | |
246 | goto out_eagain; | |
247 | ||
248 | /* | |
249 | * Handle the owner died case: | |
250 | */ | |
251 | if (uval & FUTEX_OWNER_DIED) { | |
252 | /* | |
253 | * exit_pi_state_list sets owner to NULL and wakes the | |
254 | * topmost waiter. The task which acquires the | |
255 | * pi_state->rt_mutex will fixup owner. | |
256 | */ | |
257 | if (!pi_state->owner) { | |
258 | /* | |
259 | * No pi state owner, but the user space TID | |
260 | * is not 0. Inconsistent state. [5] | |
261 | */ | |
262 | if (pid) | |
263 | goto out_einval; | |
264 | /* | |
265 | * Take a ref on the state and return success. [4] | |
266 | */ | |
267 | goto out_attach; | |
268 | } | |
269 | ||
270 | /* | |
271 | * If TID is 0, then either the dying owner has not | |
272 | * yet executed exit_pi_state_list() or some waiter | |
273 | * acquired the rtmutex in the pi state, but did not | |
274 | * yet fixup the TID in user space. | |
275 | * | |
276 | * Take a ref on the state and return success. [6] | |
277 | */ | |
278 | if (!pid) | |
279 | goto out_attach; | |
280 | } else { | |
281 | /* | |
282 | * If the owner died bit is not set, then the pi_state | |
283 | * must have an owner. [7] | |
284 | */ | |
285 | if (!pi_state->owner) | |
286 | goto out_einval; | |
287 | } | |
288 | ||
289 | /* | |
290 | * Bail out if user space manipulated the futex value. If pi | |
291 | * state exists then the owner TID must be the same as the | |
292 | * user space TID. [9/10] | |
293 | */ | |
294 | if (pid != task_pid_vnr(pi_state->owner)) | |
295 | goto out_einval; | |
296 | ||
297 | out_attach: | |
298 | get_pi_state(pi_state); | |
299 | raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); | |
300 | *ps = pi_state; | |
301 | return 0; | |
302 | ||
303 | out_einval: | |
304 | ret = -EINVAL; | |
305 | goto out_error; | |
306 | ||
307 | out_eagain: | |
308 | ret = -EAGAIN; | |
309 | goto out_error; | |
310 | ||
311 | out_efault: | |
312 | ret = -EFAULT; | |
313 | goto out_error; | |
314 | ||
315 | out_error: | |
316 | raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); | |
317 | return ret; | |
318 | } | |
319 | ||
320 | static int handle_exit_race(u32 __user *uaddr, u32 uval, | |
321 | struct task_struct *tsk) | |
322 | { | |
323 | u32 uval2; | |
324 | ||
325 | /* | |
326 | * If the futex exit state is not yet FUTEX_STATE_DEAD, tell the | |
327 | * caller that the alleged owner is busy. | |
328 | */ | |
329 | if (tsk && tsk->futex_state != FUTEX_STATE_DEAD) | |
330 | return -EBUSY; | |
331 | ||
332 | /* | |
333 | * Reread the user space value to handle the following situation: | |
334 | * | |
335 | * CPU0 CPU1 | |
336 | * | |
337 | * sys_exit() sys_futex() | |
338 | * do_exit() futex_lock_pi() | |
339 | * futex_lock_pi_atomic() | |
340 | * exit_signals(tsk) No waiters: | |
341 | * tsk->flags |= PF_EXITING; *uaddr == 0x00000PID | |
342 | * mm_release(tsk) Set waiter bit | |
343 | * exit_robust_list(tsk) { *uaddr = 0x80000PID; | |
344 | * Set owner died attach_to_pi_owner() { | |
345 | * *uaddr = 0xC0000000; tsk = get_task(PID); | |
346 | * } if (!tsk->flags & PF_EXITING) { | |
347 | * ... attach(); | |
348 | * tsk->futex_state = } else { | |
349 | * FUTEX_STATE_DEAD; if (tsk->futex_state != | |
350 | * FUTEX_STATE_DEAD) | |
351 | * return -EAGAIN; | |
352 | * return -ESRCH; <--- FAIL | |
353 | * } | |
354 | * | |
355 | * Returning ESRCH unconditionally is wrong here because the | |
356 | * user space value has been changed by the exiting task. | |
357 | * | |
358 | * The same logic applies to the case where the exiting task is | |
359 | * already gone. | |
360 | */ | |
361 | if (futex_get_value_locked(&uval2, uaddr)) | |
362 | return -EFAULT; | |
363 | ||
364 | /* If the user space value has changed, try again. */ | |
365 | if (uval2 != uval) | |
366 | return -EAGAIN; | |
367 | ||
368 | /* | |
369 | * The exiting task did not have a robust list, the robust list was | |
370 | * corrupted or the user space value in *uaddr is simply bogus. | |
371 | * Give up and tell user space. | |
372 | */ | |
373 | return -ESRCH; | |
374 | } | |
375 | ||
376 | static void __attach_to_pi_owner(struct task_struct *p, union futex_key *key, | |
377 | struct futex_pi_state **ps) | |
378 | { | |
379 | /* | |
380 | * No existing pi state. First waiter. [2] | |
381 | * | |
382 | * This creates pi_state, we have hb->lock held, this means nothing can | |
383 | * observe this state, wait_lock is irrelevant. | |
384 | */ | |
385 | struct futex_pi_state *pi_state = alloc_pi_state(); | |
386 | ||
387 | /* | |
388 | * Initialize the pi_mutex in locked state and make @p | |
389 | * the owner of it: | |
390 | */ | |
391 | rt_mutex_init_proxy_locked(&pi_state->pi_mutex, p); | |
392 | ||
393 | /* Store the key for possible exit cleanups: */ | |
394 | pi_state->key = *key; | |
395 | ||
396 | WARN_ON(!list_empty(&pi_state->list)); | |
397 | list_add(&pi_state->list, &p->pi_state_list); | |
398 | /* | |
399 | * Assignment without holding pi_state->pi_mutex.wait_lock is safe | |
400 | * because there is no concurrency as the object is not published yet. | |
401 | */ | |
402 | pi_state->owner = p; | |
403 | ||
404 | *ps = pi_state; | |
405 | } | |
406 | /* | |
407 | * Lookup the task for the TID provided from user space and attach to | |
408 | * it after doing proper sanity checks. | |
409 | */ | |
410 | static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key, | |
411 | struct futex_pi_state **ps, | |
412 | struct task_struct **exiting) | |
413 | { | |
414 | pid_t pid = uval & FUTEX_TID_MASK; | |
415 | struct task_struct *p; | |
416 | ||
417 | /* | |
418 | * We are the first waiter - try to look up the real owner and attach | |
419 | * the new pi_state to it, but bail out when TID = 0 [1] | |
420 | * | |
421 | * The !pid check is paranoid. None of the call sites should end up | |
422 | * with pid == 0, but better safe than sorry. Let the caller retry | |
423 | */ | |
424 | if (!pid) | |
425 | return -EAGAIN; | |
426 | p = find_get_task_by_vpid(pid); | |
427 | if (!p) | |
428 | return handle_exit_race(uaddr, uval, NULL); | |
429 | ||
430 | if (unlikely(p->flags & PF_KTHREAD)) { | |
431 | put_task_struct(p); | |
432 | return -EPERM; | |
433 | } | |
434 | ||
435 | /* | |
436 | * We need to look at the task state to figure out, whether the | |
437 | * task is exiting. To protect against the change of the task state | |
438 | * in futex_exit_release(), we do this protected by p->pi_lock: | |
439 | */ | |
440 | raw_spin_lock_irq(&p->pi_lock); | |
441 | if (unlikely(p->futex_state != FUTEX_STATE_OK)) { | |
442 | /* | |
443 | * The task is on the way out. When the futex state is | |
444 | * FUTEX_STATE_DEAD, we know that the task has finished | |
445 | * the cleanup: | |
446 | */ | |
447 | int ret = handle_exit_race(uaddr, uval, p); | |
448 | ||
449 | raw_spin_unlock_irq(&p->pi_lock); | |
450 | /* | |
451 | * If the owner task is between FUTEX_STATE_EXITING and | |
452 | * FUTEX_STATE_DEAD then store the task pointer and keep | |
453 | * the reference on the task struct. The calling code will | |
454 | * drop all locks, wait for the task to reach | |
455 | * FUTEX_STATE_DEAD and then drop the refcount. This is | |
456 | * required to prevent a live lock when the current task | |
457 | * preempted the exiting task between the two states. | |
458 | */ | |
459 | if (ret == -EBUSY) | |
460 | *exiting = p; | |
461 | else | |
462 | put_task_struct(p); | |
463 | return ret; | |
464 | } | |
465 | ||
466 | __attach_to_pi_owner(p, key, ps); | |
467 | raw_spin_unlock_irq(&p->pi_lock); | |
468 | ||
469 | put_task_struct(p); | |
470 | ||
471 | return 0; | |
472 | } | |
473 | ||
474 | static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval) | |
475 | { | |
476 | int err; | |
477 | u32 curval; | |
478 | ||
479 | if (unlikely(should_fail_futex(true))) | |
480 | return -EFAULT; | |
481 | ||
482 | err = futex_cmpxchg_value_locked(&curval, uaddr, uval, newval); | |
483 | if (unlikely(err)) | |
484 | return err; | |
485 | ||
486 | /* If user space value changed, let the caller retry */ | |
487 | return curval != uval ? -EAGAIN : 0; | |
488 | } | |
489 | ||
490 | /** | |
491 | * futex_lock_pi_atomic() - Atomic work required to acquire a pi aware futex | |
492 | * @uaddr: the pi futex user address | |
493 | * @hb: the pi futex hash bucket | |
494 | * @key: the futex key associated with uaddr and hb | |
495 | * @ps: the pi_state pointer where we store the result of the | |
496 | * lookup | |
497 | * @task: the task to perform the atomic lock work for. This will | |
498 | * be "current" except in the case of requeue pi. | |
499 | * @exiting: Pointer to store the task pointer of the owner task | |
500 | * which is in the middle of exiting | |
501 | * @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0) | |
502 | * | |
503 | * Return: | |
504 | * - 0 - ready to wait; | |
505 | * - 1 - acquired the lock; | |
506 | * - <0 - error | |
507 | * | |
508 | * The hb->lock must be held by the caller. | |
509 | * | |
510 | * @exiting is only set when the return value is -EBUSY. If so, this holds | |
511 | * a refcount on the exiting task on return and the caller needs to drop it | |
512 | * after waiting for the exit to complete. | |
513 | */ | |
514 | int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb, | |
515 | union futex_key *key, | |
516 | struct futex_pi_state **ps, | |
517 | struct task_struct *task, | |
518 | struct task_struct **exiting, | |
519 | int set_waiters) | |
520 | { | |
521 | u32 uval, newval, vpid = task_pid_vnr(task); | |
522 | struct futex_q *top_waiter; | |
523 | int ret; | |
524 | ||
525 | /* | |
526 | * Read the user space value first so we can validate a few | |
527 | * things before proceeding further. | |
528 | */ | |
529 | if (futex_get_value_locked(&uval, uaddr)) | |
530 | return -EFAULT; | |
531 | ||
532 | if (unlikely(should_fail_futex(true))) | |
533 | return -EFAULT; | |
534 | ||
535 | /* | |
536 | * Detect deadlocks. | |
537 | */ | |
538 | if ((unlikely((uval & FUTEX_TID_MASK) == vpid))) | |
539 | return -EDEADLK; | |
540 | ||
541 | if ((unlikely(should_fail_futex(true)))) | |
542 | return -EDEADLK; | |
543 | ||
544 | /* | |
545 | * Lookup existing state first. If it exists, try to attach to | |
546 | * its pi_state. | |
547 | */ | |
548 | top_waiter = futex_top_waiter(hb, key); | |
549 | if (top_waiter) | |
550 | return attach_to_pi_state(uaddr, uval, top_waiter->pi_state, ps); | |
551 | ||
552 | /* | |
553 | * No waiter and user TID is 0. We are here because the | |
554 | * waiters or the owner died bit is set or called from | |
555 | * requeue_cmp_pi or for whatever reason something took the | |
556 | * syscall. | |
557 | */ | |
558 | if (!(uval & FUTEX_TID_MASK)) { | |
559 | /* | |
560 | * We take over the futex. No other waiters and the user space | |
561 | * TID is 0. We preserve the owner died bit. | |
562 | */ | |
563 | newval = uval & FUTEX_OWNER_DIED; | |
564 | newval |= vpid; | |
565 | ||
566 | /* The futex requeue_pi code can enforce the waiters bit */ | |
567 | if (set_waiters) | |
568 | newval |= FUTEX_WAITERS; | |
569 | ||
570 | ret = lock_pi_update_atomic(uaddr, uval, newval); | |
571 | if (ret) | |
572 | return ret; | |
573 | ||
574 | /* | |
575 | * If the waiter bit was requested the caller also needs PI | |
576 | * state attached to the new owner of the user space futex. | |
577 | * | |
578 | * @task is guaranteed to be alive and it cannot be exiting | |
579 | * because it is either sleeping or waiting in | |
580 | * futex_requeue_pi_wakeup_sync(). | |
581 | * | |
582 | * No need to do the full attach_to_pi_owner() exercise | |
583 | * because @task is known and valid. | |
584 | */ | |
585 | if (set_waiters) { | |
586 | raw_spin_lock_irq(&task->pi_lock); | |
587 | __attach_to_pi_owner(task, key, ps); | |
588 | raw_spin_unlock_irq(&task->pi_lock); | |
589 | } | |
590 | return 1; | |
591 | } | |
592 | ||
593 | /* | |
594 | * First waiter. Set the waiters bit before attaching ourself to | |
595 | * the owner. If owner tries to unlock, it will be forced into | |
596 | * the kernel and blocked on hb->lock. | |
597 | */ | |
598 | newval = uval | FUTEX_WAITERS; | |
599 | ret = lock_pi_update_atomic(uaddr, uval, newval); | |
600 | if (ret) | |
601 | return ret; | |
602 | /* | |
603 | * If the update of the user space value succeeded, we try to | |
604 | * attach to the owner. If that fails, no harm done, we only | |
605 | * set the FUTEX_WAITERS bit in the user space variable. | |
606 | */ | |
607 | return attach_to_pi_owner(uaddr, newval, key, ps, exiting); | |
608 | } | |
609 | ||
610 | /* | |
611 | * Caller must hold a reference on @pi_state. | |
612 | */ | |
613 | static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_state) | |
614 | { | |
615 | struct rt_mutex_waiter *top_waiter; | |
616 | struct task_struct *new_owner; | |
617 | bool postunlock = false; | |
618 | DEFINE_RT_WAKE_Q(wqh); | |
619 | u32 curval, newval; | |
620 | int ret = 0; | |
621 | ||
622 | top_waiter = rt_mutex_top_waiter(&pi_state->pi_mutex); | |
623 | if (WARN_ON_ONCE(!top_waiter)) { | |
624 | /* | |
625 | * As per the comment in futex_unlock_pi() this should not happen. | |
626 | * | |
627 | * When this happens, give up our locks and try again, giving | |
628 | * the futex_lock_pi() instance time to complete, either by | |
629 | * waiting on the rtmutex or removing itself from the futex | |
630 | * queue. | |
631 | */ | |
632 | ret = -EAGAIN; | |
633 | goto out_unlock; | |
634 | } | |
635 | ||
636 | new_owner = top_waiter->task; | |
637 | ||
638 | /* | |
639 | * We pass it to the next owner. The WAITERS bit is always kept | |
640 | * enabled while there is PI state around. We cleanup the owner | |
641 | * died bit, because we are the owner. | |
642 | */ | |
643 | newval = FUTEX_WAITERS | task_pid_vnr(new_owner); | |
644 | ||
645 | if (unlikely(should_fail_futex(true))) { | |
646 | ret = -EFAULT; | |
647 | goto out_unlock; | |
648 | } | |
649 | ||
650 | ret = futex_cmpxchg_value_locked(&curval, uaddr, uval, newval); | |
651 | if (!ret && (curval != uval)) { | |
652 | /* | |
653 | * If a unconditional UNLOCK_PI operation (user space did not | |
654 | * try the TID->0 transition) raced with a waiter setting the | |
655 | * FUTEX_WAITERS flag between get_user() and locking the hash | |
656 | * bucket lock, retry the operation. | |
657 | */ | |
658 | if ((FUTEX_TID_MASK & curval) == uval) | |
659 | ret = -EAGAIN; | |
660 | else | |
661 | ret = -EINVAL; | |
662 | } | |
663 | ||
664 | if (!ret) { | |
665 | /* | |
666 | * This is a point of no return; once we modified the uval | |
667 | * there is no going back and subsequent operations must | |
668 | * not fail. | |
669 | */ | |
670 | pi_state_update_owner(pi_state, new_owner); | |
671 | postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wqh); | |
672 | } | |
673 | ||
674 | out_unlock: | |
675 | raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); | |
676 | ||
677 | if (postunlock) | |
678 | rt_mutex_postunlock(&wqh); | |
679 | ||
680 | return ret; | |
681 | } | |
682 | ||
683 | static int __fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, | |
684 | struct task_struct *argowner) | |
685 | { | |
686 | struct futex_pi_state *pi_state = q->pi_state; | |
687 | struct task_struct *oldowner, *newowner; | |
688 | u32 uval, curval, newval, newtid; | |
689 | int err = 0; | |
690 | ||
691 | oldowner = pi_state->owner; | |
692 | ||
693 | /* | |
694 | * We are here because either: | |
695 | * | |
696 | * - we stole the lock and pi_state->owner needs updating to reflect | |
697 | * that (@argowner == current), | |
698 | * | |
699 | * or: | |
700 | * | |
701 | * - someone stole our lock and we need to fix things to point to the | |
702 | * new owner (@argowner == NULL). | |
703 | * | |
704 | * Either way, we have to replace the TID in the user space variable. | |
705 | * This must be atomic as we have to preserve the owner died bit here. | |
706 | * | |
707 | * Note: We write the user space value _before_ changing the pi_state | |
708 | * because we can fault here. Imagine swapped out pages or a fork | |
709 | * that marked all the anonymous memory readonly for cow. | |
710 | * | |
711 | * Modifying pi_state _before_ the user space value would leave the | |
712 | * pi_state in an inconsistent state when we fault here, because we | |
713 | * need to drop the locks to handle the fault. This might be observed | |
714 | * in the PID checks when attaching to PI state . | |
715 | */ | |
716 | retry: | |
717 | if (!argowner) { | |
718 | if (oldowner != current) { | |
719 | /* | |
720 | * We raced against a concurrent self; things are | |
721 | * already fixed up. Nothing to do. | |
722 | */ | |
723 | return 0; | |
724 | } | |
725 | ||
726 | if (__rt_mutex_futex_trylock(&pi_state->pi_mutex)) { | |
727 | /* We got the lock. pi_state is correct. Tell caller. */ | |
728 | return 1; | |
729 | } | |
730 | ||
731 | /* | |
732 | * The trylock just failed, so either there is an owner or | |
733 | * there is a higher priority waiter than this one. | |
734 | */ | |
735 | newowner = rt_mutex_owner(&pi_state->pi_mutex); | |
736 | /* | |
737 | * If the higher priority waiter has not yet taken over the | |
738 | * rtmutex then newowner is NULL. We can't return here with | |
739 | * that state because it's inconsistent vs. the user space | |
740 | * state. So drop the locks and try again. It's a valid | |
741 | * situation and not any different from the other retry | |
742 | * conditions. | |
743 | */ | |
744 | if (unlikely(!newowner)) { | |
745 | err = -EAGAIN; | |
746 | goto handle_err; | |
747 | } | |
748 | } else { | |
749 | WARN_ON_ONCE(argowner != current); | |
750 | if (oldowner == current) { | |
751 | /* | |
752 | * We raced against a concurrent self; things are | |
753 | * already fixed up. Nothing to do. | |
754 | */ | |
755 | return 1; | |
756 | } | |
757 | newowner = argowner; | |
758 | } | |
759 | ||
760 | newtid = task_pid_vnr(newowner) | FUTEX_WAITERS; | |
761 | /* Owner died? */ | |
762 | if (!pi_state->owner) | |
763 | newtid |= FUTEX_OWNER_DIED; | |
764 | ||
765 | err = futex_get_value_locked(&uval, uaddr); | |
766 | if (err) | |
767 | goto handle_err; | |
768 | ||
769 | for (;;) { | |
770 | newval = (uval & FUTEX_OWNER_DIED) | newtid; | |
771 | ||
772 | err = futex_cmpxchg_value_locked(&curval, uaddr, uval, newval); | |
773 | if (err) | |
774 | goto handle_err; | |
775 | ||
776 | if (curval == uval) | |
777 | break; | |
778 | uval = curval; | |
779 | } | |
780 | ||
781 | /* | |
782 | * We fixed up user space. Now we need to fix the pi_state | |
783 | * itself. | |
784 | */ | |
785 | pi_state_update_owner(pi_state, newowner); | |
786 | ||
787 | return argowner == current; | |
788 | ||
789 | /* | |
790 | * In order to reschedule or handle a page fault, we need to drop the | |
791 | * locks here. In the case of a fault, this gives the other task | |
792 | * (either the highest priority waiter itself or the task which stole | |
793 | * the rtmutex) the chance to try the fixup of the pi_state. So once we | |
794 | * are back from handling the fault we need to check the pi_state after | |
795 | * reacquiring the locks and before trying to do another fixup. When | |
796 | * the fixup has been done already we simply return. | |
797 | * | |
798 | * Note: we hold both hb->lock and pi_mutex->wait_lock. We can safely | |
799 | * drop hb->lock since the caller owns the hb -> futex_q relation. | |
800 | * Dropping the pi_mutex->wait_lock requires the state revalidate. | |
801 | */ | |
802 | handle_err: | |
803 | raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); | |
804 | spin_unlock(q->lock_ptr); | |
805 | ||
806 | switch (err) { | |
807 | case -EFAULT: | |
808 | err = fault_in_user_writeable(uaddr); | |
809 | break; | |
810 | ||
811 | case -EAGAIN: | |
812 | cond_resched(); | |
813 | err = 0; | |
814 | break; | |
815 | ||
816 | default: | |
817 | WARN_ON_ONCE(1); | |
818 | break; | |
819 | } | |
820 | ||
821 | spin_lock(q->lock_ptr); | |
822 | raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); | |
823 | ||
824 | /* | |
825 | * Check if someone else fixed it for us: | |
826 | */ | |
827 | if (pi_state->owner != oldowner) | |
828 | return argowner == current; | |
829 | ||
830 | /* Retry if err was -EAGAIN or the fault in succeeded */ | |
831 | if (!err) | |
832 | goto retry; | |
833 | ||
834 | /* | |
835 | * fault_in_user_writeable() failed so user state is immutable. At | |
836 | * best we can make the kernel state consistent but user state will | |
837 | * be most likely hosed and any subsequent unlock operation will be | |
838 | * rejected due to PI futex rule [10]. | |
839 | * | |
840 | * Ensure that the rtmutex owner is also the pi_state owner despite | |
841 | * the user space value claiming something different. There is no | |
842 | * point in unlocking the rtmutex if current is the owner as it | |
843 | * would need to wait until the next waiter has taken the rtmutex | |
844 | * to guarantee consistent state. Keep it simple. Userspace asked | |
845 | * for this wreckaged state. | |
846 | * | |
847 | * The rtmutex has an owner - either current or some other | |
848 | * task. See the EAGAIN loop above. | |
849 | */ | |
850 | pi_state_update_owner(pi_state, rt_mutex_owner(&pi_state->pi_mutex)); | |
851 | ||
852 | return err; | |
853 | } | |
854 | ||
855 | static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, | |
856 | struct task_struct *argowner) | |
857 | { | |
858 | struct futex_pi_state *pi_state = q->pi_state; | |
859 | int ret; | |
860 | ||
861 | lockdep_assert_held(q->lock_ptr); | |
862 | ||
863 | raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); | |
864 | ret = __fixup_pi_state_owner(uaddr, q, argowner); | |
865 | raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); | |
866 | return ret; | |
867 | } | |
868 | ||
869 | /** | |
870 | * fixup_pi_owner() - Post lock pi_state and corner case management | |
871 | * @uaddr: user address of the futex | |
872 | * @q: futex_q (contains pi_state and access to the rt_mutex) | |
873 | * @locked: if the attempt to take the rt_mutex succeeded (1) or not (0) | |
874 | * | |
875 | * After attempting to lock an rt_mutex, this function is called to cleanup | |
876 | * the pi_state owner as well as handle race conditions that may allow us to | |
877 | * acquire the lock. Must be called with the hb lock held. | |
878 | * | |
879 | * Return: | |
880 | * - 1 - success, lock taken; | |
881 | * - 0 - success, lock not taken; | |
882 | * - <0 - on error (-EFAULT) | |
883 | */ | |
884 | int fixup_pi_owner(u32 __user *uaddr, struct futex_q *q, int locked) | |
885 | { | |
886 | if (locked) { | |
887 | /* | |
888 | * Got the lock. We might not be the anticipated owner if we | |
889 | * did a lock-steal - fix up the PI-state in that case: | |
890 | * | |
891 | * Speculative pi_state->owner read (we don't hold wait_lock); | |
892 | * since we own the lock pi_state->owner == current is the | |
893 | * stable state, anything else needs more attention. | |
894 | */ | |
895 | if (q->pi_state->owner != current) | |
896 | return fixup_pi_state_owner(uaddr, q, current); | |
897 | return 1; | |
898 | } | |
899 | ||
900 | /* | |
901 | * If we didn't get the lock; check if anybody stole it from us. In | |
902 | * that case, we need to fix up the uval to point to them instead of | |
903 | * us, otherwise bad things happen. [10] | |
904 | * | |
905 | * Another speculative read; pi_state->owner == current is unstable | |
906 | * but needs our attention. | |
907 | */ | |
908 | if (q->pi_state->owner == current) | |
909 | return fixup_pi_state_owner(uaddr, q, NULL); | |
910 | ||
911 | /* | |
912 | * Paranoia check. If we did not take the lock, then we should not be | |
913 | * the owner of the rt_mutex. Warn and establish consistent state. | |
914 | */ | |
915 | if (WARN_ON_ONCE(rt_mutex_owner(&q->pi_state->pi_mutex) == current)) | |
916 | return fixup_pi_state_owner(uaddr, q, current); | |
917 | ||
918 | return 0; | |
919 | } | |
920 | ||
921 | /* | |
922 | * Userspace tried a 0 -> TID atomic transition of the futex value | |
923 | * and failed. The kernel side here does the whole locking operation: | |
924 | * if there are waiters then it will block as a consequence of relying | |
925 | * on rt-mutexes, it does PI, etc. (Due to races the kernel might see | |
926 | * a 0 value of the futex too.). | |
927 | * | |
928 | * Also serves as futex trylock_pi()'ing, and due semantics. | |
929 | */ | |
930 | int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int trylock) | |
931 | { | |
932 | struct hrtimer_sleeper timeout, *to; | |
933 | struct task_struct *exiting = NULL; | |
934 | struct rt_mutex_waiter rt_waiter; | |
935 | struct futex_hash_bucket *hb; | |
936 | struct futex_q q = futex_q_init; | |
937 | int res, ret; | |
938 | ||
939 | if (!IS_ENABLED(CONFIG_FUTEX_PI)) | |
940 | return -ENOSYS; | |
941 | ||
942 | if (refill_pi_state_cache()) | |
943 | return -ENOMEM; | |
944 | ||
945 | to = futex_setup_timer(time, &timeout, flags, 0); | |
946 | ||
947 | retry: | |
948 | ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key, FUTEX_WRITE); | |
949 | if (unlikely(ret != 0)) | |
950 | goto out; | |
951 | ||
952 | retry_private: | |
953 | hb = futex_q_lock(&q); | |
954 | ||
955 | ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current, | |
956 | &exiting, 0); | |
957 | if (unlikely(ret)) { | |
958 | /* | |
959 | * Atomic work succeeded and we got the lock, | |
960 | * or failed. Either way, we do _not_ block. | |
961 | */ | |
962 | switch (ret) { | |
963 | case 1: | |
964 | /* We got the lock. */ | |
965 | ret = 0; | |
966 | goto out_unlock_put_key; | |
967 | case -EFAULT: | |
968 | goto uaddr_faulted; | |
969 | case -EBUSY: | |
970 | case -EAGAIN: | |
971 | /* | |
972 | * Two reasons for this: | |
973 | * - EBUSY: Task is exiting and we just wait for the | |
974 | * exit to complete. | |
975 | * - EAGAIN: The user space value changed. | |
976 | */ | |
977 | futex_q_unlock(hb); | |
978 | /* | |
979 | * Handle the case where the owner is in the middle of | |
980 | * exiting. Wait for the exit to complete otherwise | |
981 | * this task might loop forever, aka. live lock. | |
982 | */ | |
983 | wait_for_owner_exiting(ret, exiting); | |
984 | cond_resched(); | |
985 | goto retry; | |
986 | default: | |
987 | goto out_unlock_put_key; | |
988 | } | |
989 | } | |
990 | ||
991 | WARN_ON(!q.pi_state); | |
992 | ||
993 | /* | |
994 | * Only actually queue now that the atomic ops are done: | |
995 | */ | |
996 | __futex_queue(&q, hb); | |
997 | ||
998 | if (trylock) { | |
999 | ret = rt_mutex_futex_trylock(&q.pi_state->pi_mutex); | |
1000 | /* Fixup the trylock return value: */ | |
1001 | ret = ret ? 0 : -EWOULDBLOCK; | |
1002 | goto no_block; | |
1003 | } | |
1004 | ||
1005 | rt_mutex_init_waiter(&rt_waiter); | |
1006 | ||
1007 | /* | |
68290613 | 1008 | * On PREEMPT_RT, when hb->lock becomes an rt_mutex, we must not |
85dc28fa PZ |
1009 | * hold it while doing rt_mutex_start_proxy(), because then it will |
1010 | * include hb->lock in the blocking chain, even through we'll not in | |
1011 | * fact hold it while blocking. This will lead it to report -EDEADLK | |
1012 | * and BUG when futex_unlock_pi() interleaves with this. | |
1013 | * | |
1014 | * Therefore acquire wait_lock while holding hb->lock, but drop the | |
1015 | * latter before calling __rt_mutex_start_proxy_lock(). This | |
1016 | * interleaves with futex_unlock_pi() -- which does a similar lock | |
1017 | * handoff -- such that the latter can observe the futex_q::pi_state | |
1018 | * before __rt_mutex_start_proxy_lock() is done. | |
1019 | */ | |
1020 | raw_spin_lock_irq(&q.pi_state->pi_mutex.wait_lock); | |
1021 | spin_unlock(q.lock_ptr); | |
1022 | /* | |
1023 | * __rt_mutex_start_proxy_lock() unconditionally enqueues the @rt_waiter | |
1024 | * such that futex_unlock_pi() is guaranteed to observe the waiter when | |
1025 | * it sees the futex_q::pi_state. | |
1026 | */ | |
1027 | ret = __rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current); | |
1028 | raw_spin_unlock_irq(&q.pi_state->pi_mutex.wait_lock); | |
1029 | ||
1030 | if (ret) { | |
1031 | if (ret == 1) | |
1032 | ret = 0; | |
1033 | goto cleanup; | |
1034 | } | |
1035 | ||
1036 | if (unlikely(to)) | |
1037 | hrtimer_sleeper_start_expires(to, HRTIMER_MODE_ABS); | |
1038 | ||
1039 | ret = rt_mutex_wait_proxy_lock(&q.pi_state->pi_mutex, to, &rt_waiter); | |
1040 | ||
1041 | cleanup: | |
1042 | spin_lock(q.lock_ptr); | |
1043 | /* | |
1044 | * If we failed to acquire the lock (deadlock/signal/timeout), we must | |
1045 | * first acquire the hb->lock before removing the lock from the | |
1046 | * rt_mutex waitqueue, such that we can keep the hb and rt_mutex wait | |
1047 | * lists consistent. | |
1048 | * | |
1049 | * In particular; it is important that futex_unlock_pi() can not | |
1050 | * observe this inconsistency. | |
1051 | */ | |
1052 | if (ret && !rt_mutex_cleanup_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter)) | |
1053 | ret = 0; | |
1054 | ||
1055 | no_block: | |
1056 | /* | |
1057 | * Fixup the pi_state owner and possibly acquire the lock if we | |
1058 | * haven't already. | |
1059 | */ | |
1060 | res = fixup_pi_owner(uaddr, &q, !ret); | |
1061 | /* | |
1062 | * If fixup_pi_owner() returned an error, propagate that. If it acquired | |
1063 | * the lock, clear our -ETIMEDOUT or -EINTR. | |
1064 | */ | |
1065 | if (res) | |
1066 | ret = (res < 0) ? res : 0; | |
1067 | ||
1068 | futex_unqueue_pi(&q); | |
1069 | spin_unlock(q.lock_ptr); | |
1070 | goto out; | |
1071 | ||
1072 | out_unlock_put_key: | |
1073 | futex_q_unlock(hb); | |
1074 | ||
1075 | out: | |
1076 | if (to) { | |
1077 | hrtimer_cancel(&to->timer); | |
1078 | destroy_hrtimer_on_stack(&to->timer); | |
1079 | } | |
1080 | return ret != -EINTR ? ret : -ERESTARTNOINTR; | |
1081 | ||
1082 | uaddr_faulted: | |
1083 | futex_q_unlock(hb); | |
1084 | ||
1085 | ret = fault_in_user_writeable(uaddr); | |
1086 | if (ret) | |
1087 | goto out; | |
1088 | ||
1089 | if (!(flags & FLAGS_SHARED)) | |
1090 | goto retry_private; | |
1091 | ||
1092 | goto retry; | |
1093 | } | |
1094 | ||
1095 | /* | |
1096 | * Userspace attempted a TID -> 0 atomic transition, and failed. | |
1097 | * This is the in-kernel slowpath: we look up the PI state (if any), | |
1098 | * and do the rt-mutex unlock. | |
1099 | */ | |
1100 | int futex_unlock_pi(u32 __user *uaddr, unsigned int flags) | |
1101 | { | |
1102 | u32 curval, uval, vpid = task_pid_vnr(current); | |
1103 | union futex_key key = FUTEX_KEY_INIT; | |
1104 | struct futex_hash_bucket *hb; | |
1105 | struct futex_q *top_waiter; | |
1106 | int ret; | |
1107 | ||
1108 | if (!IS_ENABLED(CONFIG_FUTEX_PI)) | |
1109 | return -ENOSYS; | |
1110 | ||
1111 | retry: | |
1112 | if (get_user(uval, uaddr)) | |
1113 | return -EFAULT; | |
1114 | /* | |
1115 | * We release only a lock we actually own: | |
1116 | */ | |
1117 | if ((uval & FUTEX_TID_MASK) != vpid) | |
1118 | return -EPERM; | |
1119 | ||
1120 | ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, FUTEX_WRITE); | |
1121 | if (ret) | |
1122 | return ret; | |
1123 | ||
1124 | hb = futex_hash(&key); | |
1125 | spin_lock(&hb->lock); | |
1126 | ||
1127 | /* | |
1128 | * Check waiters first. We do not trust user space values at | |
1129 | * all and we at least want to know if user space fiddled | |
1130 | * with the futex value instead of blindly unlocking. | |
1131 | */ | |
1132 | top_waiter = futex_top_waiter(hb, &key); | |
1133 | if (top_waiter) { | |
1134 | struct futex_pi_state *pi_state = top_waiter->pi_state; | |
1135 | ||
1136 | ret = -EINVAL; | |
1137 | if (!pi_state) | |
1138 | goto out_unlock; | |
1139 | ||
1140 | /* | |
1141 | * If current does not own the pi_state then the futex is | |
1142 | * inconsistent and user space fiddled with the futex value. | |
1143 | */ | |
1144 | if (pi_state->owner != current) | |
1145 | goto out_unlock; | |
1146 | ||
1147 | get_pi_state(pi_state); | |
1148 | /* | |
1149 | * By taking wait_lock while still holding hb->lock, we ensure | |
1150 | * there is no point where we hold neither; and therefore | |
1151 | * wake_futex_p() must observe a state consistent with what we | |
1152 | * observed. | |
1153 | * | |
1154 | * In particular; this forces __rt_mutex_start_proxy() to | |
1155 | * complete such that we're guaranteed to observe the | |
1156 | * rt_waiter. Also see the WARN in wake_futex_pi(). | |
1157 | */ | |
1158 | raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); | |
1159 | spin_unlock(&hb->lock); | |
1160 | ||
1161 | /* drops pi_state->pi_mutex.wait_lock */ | |
1162 | ret = wake_futex_pi(uaddr, uval, pi_state); | |
1163 | ||
1164 | put_pi_state(pi_state); | |
1165 | ||
1166 | /* | |
1167 | * Success, we're done! No tricky corner cases. | |
1168 | */ | |
1169 | if (!ret) | |
1170 | return ret; | |
1171 | /* | |
1172 | * The atomic access to the futex value generated a | |
1173 | * pagefault, so retry the user-access and the wakeup: | |
1174 | */ | |
1175 | if (ret == -EFAULT) | |
1176 | goto pi_faulted; | |
1177 | /* | |
1178 | * A unconditional UNLOCK_PI op raced against a waiter | |
1179 | * setting the FUTEX_WAITERS bit. Try again. | |
1180 | */ | |
1181 | if (ret == -EAGAIN) | |
1182 | goto pi_retry; | |
1183 | /* | |
1184 | * wake_futex_pi has detected invalid state. Tell user | |
1185 | * space. | |
1186 | */ | |
1187 | return ret; | |
1188 | } | |
1189 | ||
1190 | /* | |
1191 | * We have no kernel internal state, i.e. no waiters in the | |
1192 | * kernel. Waiters which are about to queue themselves are stuck | |
1193 | * on hb->lock. So we can safely ignore them. We do neither | |
1194 | * preserve the WAITERS bit not the OWNER_DIED one. We are the | |
1195 | * owner. | |
1196 | */ | |
1197 | if ((ret = futex_cmpxchg_value_locked(&curval, uaddr, uval, 0))) { | |
1198 | spin_unlock(&hb->lock); | |
1199 | switch (ret) { | |
1200 | case -EFAULT: | |
1201 | goto pi_faulted; | |
1202 | ||
1203 | case -EAGAIN: | |
1204 | goto pi_retry; | |
1205 | ||
1206 | default: | |
1207 | WARN_ON_ONCE(1); | |
1208 | return ret; | |
1209 | } | |
1210 | } | |
1211 | ||
1212 | /* | |
1213 | * If uval has changed, let user space handle it. | |
1214 | */ | |
1215 | ret = (curval == uval) ? 0 : -EAGAIN; | |
1216 | ||
1217 | out_unlock: | |
1218 | spin_unlock(&hb->lock); | |
1219 | return ret; | |
1220 | ||
1221 | pi_retry: | |
1222 | cond_resched(); | |
1223 | goto retry; | |
1224 | ||
1225 | pi_faulted: | |
1226 | ||
1227 | ret = fault_in_user_writeable(uaddr); | |
1228 | if (!ret) | |
1229 | goto retry; | |
1230 | ||
1231 | return ret; | |
1232 | } | |
1233 |