Merge tag 'y2038-for-4.21' of ssh://gitolite.kernel.org:/pub/scm/linux/kernel/git...
[linux-2.6-block.git] / kernel / futex.c
index b305beaab739d2b6dfe65655a269ef40e80911c8..054105854e0e38fbab1fbaee8fc743729526de2c 100644 (file)
@@ -1151,11 +1151,65 @@ out_error:
        return ret;
 }
 
+static int handle_exit_race(u32 __user *uaddr, u32 uval,
+                           struct task_struct *tsk)
+{
+       u32 uval2;
+
+       /*
+        * If PF_EXITPIDONE is not yet set, then try again.
+        */
+       if (tsk && !(tsk->flags & PF_EXITPIDONE))
+               return -EAGAIN;
+
+       /*
+        * Reread the user space value to handle the following situation:
+        *
+        * CPU0                         CPU1
+        *
+        * sys_exit()                   sys_futex()
+        *  do_exit()                    futex_lock_pi()
+        *                                futex_lock_pi_atomic()
+        *   exit_signals(tsk)              No waiters:
+        *    tsk->flags |= PF_EXITING;     *uaddr == 0x00000PID
+        *  mm_release(tsk)                 Set waiter bit
+        *   exit_robust_list(tsk) {        *uaddr = 0x80000PID;
+        *      Set owner died              attach_to_pi_owner() {
+        *    *uaddr = 0xC0000000;           tsk = get_task(PID);
+        *   }                               if (!tsk->flags & PF_EXITING) {
+        *  ...                                attach();
+        *  tsk->flags |= PF_EXITPIDONE;     } else {
+        *                                     if (!(tsk->flags & PF_EXITPIDONE))
+        *                                       return -EAGAIN;
+        *                                     return -ESRCH; <--- FAIL
+        *                                   }
+        *
+        * Returning ESRCH unconditionally is wrong here because the
+        * user space value has been changed by the exiting task.
+        *
+        * The same logic applies to the case where the exiting task is
+        * already gone.
+        */
+       if (get_futex_value_locked(&uval2, uaddr))
+               return -EFAULT;
+
+       /* If the user space value has changed, try again. */
+       if (uval2 != uval)
+               return -EAGAIN;
+
+       /*
+        * The exiting task did not have a robust list, the robust list was
+        * corrupted or the user space value in *uaddr is simply bogus.
+        * Give up and tell user space.
+        */
+       return -ESRCH;
+}
+
 /*
  * Lookup the task for the TID provided from user space and attach to
  * it after doing proper sanity checks.
  */
-static int attach_to_pi_owner(u32 uval, union futex_key *key,
+static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key,
                              struct futex_pi_state **ps)
 {
        pid_t pid = uval & FUTEX_TID_MASK;
@@ -1165,12 +1219,15 @@ static int attach_to_pi_owner(u32 uval, union futex_key *key,
        /*
         * We are the first waiter - try to look up the real owner and attach
         * the new pi_state to it, but bail out when TID = 0 [1]
+        *
+        * The !pid check is paranoid. None of the call sites should end up
+        * with pid == 0, but better safe than sorry. Let the caller retry
         */
        if (!pid)
-               return -ESRCH;
+               return -EAGAIN;
        p = find_get_task_by_vpid(pid);
        if (!p)
-               return -ESRCH;
+               return handle_exit_race(uaddr, uval, NULL);
 
        if (unlikely(p->flags & PF_KTHREAD)) {
                put_task_struct(p);
@@ -1190,7 +1247,7 @@ static int attach_to_pi_owner(u32 uval, union futex_key *key,
                 * set, we know that the task has finished the
                 * cleanup:
                 */
-               int ret = (p->flags & PF_EXITPIDONE) ? -ESRCH : -EAGAIN;
+               int ret = handle_exit_race(uaddr, uval, p);
 
                raw_spin_unlock_irq(&p->pi_lock);
                put_task_struct(p);
@@ -1247,7 +1304,7 @@ static int lookup_pi_state(u32 __user *uaddr, u32 uval,
         * We are the first waiter - try to look up the owner based on
         * @uval and attach to it.
         */
-       return attach_to_pi_owner(uval, key, ps);
+       return attach_to_pi_owner(uaddr, uval, key, ps);
 }
 
 static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
@@ -1355,7 +1412,7 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
         * attach to the owner. If that fails, no harm done, we only
         * set the FUTEX_WAITERS bit in the user space variable.
         */
-       return attach_to_pi_owner(uval, key, ps);
+       return attach_to_pi_owner(uaddr, newval, key, ps);
 }
 
 /**