serial: imx: support RS-485 Rx disable on Tx
[linux-2.6-block.git] / kernel / futex.c
index 5d6ce6413ef1d227b32c99a4330bee1289f9f571..c20f06f38ef35a4776a61c3579a6de813e466ce0 100644 (file)
  *   futex_wait(futex, val);
  *
  *   waiters++; (a)
- *   mb(); (A) <-- paired with -.
- *                              |
- *   lock(hash_bucket(futex));  |
- *                              |
- *   uval = *futex;             |
- *                              |        *futex = newval;
- *                              |        sys_futex(WAKE, futex);
- *                              |          futex_wake(futex);
- *                              |
- *                              `------->  mb(); (B)
+ *   smp_mb(); (A) <-- paired with -.
+ *                                  |
+ *   lock(hash_bucket(futex));      |
+ *                                  |
+ *   uval = *futex;                 |
+ *                                  |        *futex = newval;
+ *                                  |        sys_futex(WAKE, futex);
+ *                                  |          futex_wake(futex);
+ *                                  |
+ *                                  `--------> smp_mb(); (B)
  *   if (uval == val)
  *     queue();
  *     unlock(hash_bucket(futex));
@@ -334,7 +334,7 @@ static inline void futex_get_mm(union futex_key *key)
        /*
         * Ensure futex_get_mm() implies a full barrier such that
         * get_futex_key() implies a full barrier. This is relied upon
-        * as full barrier (B), see the ordering comment above.
+        * as smp_mb(); (B), see the ordering comment above.
         */
        smp_mb__after_atomic();
 }
@@ -407,10 +407,10 @@ static void get_futex_key_refs(union futex_key *key)
 
        switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
        case FUT_OFF_INODE:
-               ihold(key->shared.inode); /* implies MB (B) */
+               ihold(key->shared.inode); /* implies smp_mb(); (B) */
                break;
        case FUT_OFF_MMSHARED:
-               futex_get_mm(key); /* implies MB (B) */
+               futex_get_mm(key); /* implies smp_mb(); (B) */
                break;
        default:
                /*
@@ -418,7 +418,7 @@ static void get_futex_key_refs(union futex_key *key)
                 * mm, therefore the only purpose of calling get_futex_key_refs
                 * is because we need the barrier for the lockless waiter check.
                 */
-               smp_mb(); /* explicit MB (B) */
+               smp_mb(); /* explicit smp_mb(); (B) */
        }
 }
 
@@ -497,7 +497,7 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
        if (!fshared) {
                key->private.mm = mm;
                key->private.address = address;
-               get_futex_key_refs(key);  /* implies MB (B) */
+               get_futex_key_refs(key);  /* implies smp_mb(); (B) */
                return 0;
        }
 
@@ -520,7 +520,20 @@ again:
        else
                err = 0;
 
-       lock_page(page);
+       /*
+        * The treatment of mapping from this point on is critical. The page
+        * lock protects many things but in this context the page lock
+        * stabilizes mapping, prevents inode freeing in the shared
+        * file-backed region case and guards against movement to swap cache.
+        *
+        * Strictly speaking the page lock is not needed in all cases being
+        * considered here and page lock forces unnecessarily serialization
+        * From this point on, mapping will be re-verified if necessary and
+        * page lock will be acquired only if it is unavoidable
+        */
+       page = compound_head(page);
+       mapping = READ_ONCE(page->mapping);
+
        /*
         * If page->mapping is NULL, then it cannot be a PageAnon
         * page; but it might be the ZERO_PAGE or in the gate area or
@@ -536,19 +549,31 @@ again:
         * shmem_writepage move it from filecache to swapcache beneath us:
         * an unlikely race, but we do need to retry for page->mapping.
         */
-       mapping = compound_head(page)->mapping;
-       if (!mapping) {
-               int shmem_swizzled = PageSwapCache(page);
+       if (unlikely(!mapping)) {
+               int shmem_swizzled;
+
+               /*
+                * Page lock is required to identify which special case above
+                * applies. If this is really a shmem page then the page lock
+                * will prevent unexpected transitions.
+                */
+               lock_page(page);
+               shmem_swizzled = PageSwapCache(page) || page->mapping;
                unlock_page(page);
                put_page(page);
+
                if (shmem_swizzled)
                        goto again;
+
                return -EFAULT;
        }
 
        /*
         * Private mappings are handled in a simple way.
         *
+        * If the futex key is stored on an anonymous page, then the associated
+        * object is the mm which is implicitly pinned by the calling process.
+        *
         * NOTE: When userspace waits on a MAP_SHARED mapping, even if
         * it's a read-only handle, it's expected that futexes attach to
         * the object not the particular process.
@@ -566,16 +591,74 @@ again:
                key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */
                key->private.mm = mm;
                key->private.address = address;
+
+               get_futex_key_refs(key); /* implies smp_mb(); (B) */
+
        } else {
+               struct inode *inode;
+
+               /*
+                * The associated futex object in this case is the inode and
+                * the page->mapping must be traversed. Ordinarily this should
+                * be stabilised under page lock but it's not strictly
+                * necessary in this case as we just want to pin the inode, not
+                * update the radix tree or anything like that.
+                *
+                * The RCU read lock is taken as the inode is finally freed
+                * under RCU. If the mapping still matches expectations then the
+                * mapping->host can be safely accessed as being a valid inode.
+                */
+               rcu_read_lock();
+
+               if (READ_ONCE(page->mapping) != mapping) {
+                       rcu_read_unlock();
+                       put_page(page);
+
+                       goto again;
+               }
+
+               inode = READ_ONCE(mapping->host);
+               if (!inode) {
+                       rcu_read_unlock();
+                       put_page(page);
+
+                       goto again;
+               }
+
+               /*
+                * Take a reference unless it is about to be freed. Previously
+                * this reference was taken by ihold under the page lock
+                * pinning the inode in place so i_lock was unnecessary. The
+                * only way for this check to fail is if the inode was
+                * truncated in parallel so warn for now if this happens.
+                *
+                * We are not calling into get_futex_key_refs() in file-backed
+                * cases, therefore a successful atomic_inc return below will
+                * guarantee that get_futex_key() will still imply smp_mb(); (B).
+                */
+               if (WARN_ON_ONCE(!atomic_inc_not_zero(&inode->i_count))) {
+                       rcu_read_unlock();
+                       put_page(page);
+
+                       goto again;
+               }
+
+               /* Should be impossible but lets be paranoid for now */
+               if (WARN_ON_ONCE(inode->i_mapping != mapping)) {
+                       err = -EFAULT;
+                       rcu_read_unlock();
+                       iput(inode);
+
+                       goto out;
+               }
+
                key->both.offset |= FUT_OFF_INODE; /* inode-based key */
-               key->shared.inode = mapping->host;
+               key->shared.inode = inode;
                key->shared.pgoff = basepage_index(page);
+               rcu_read_unlock();
        }
 
-       get_futex_key_refs(key); /* implies MB (B) */
-
 out:
-       unlock_page(page);
        put_page(page);
        return err;
 }
@@ -1212,10 +1295,20 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this,
        if (unlikely(should_fail_futex(true)))
                ret = -EFAULT;
 
-       if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))
+       if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)) {
                ret = -EFAULT;
-       else if (curval != uval)
-               ret = -EINVAL;
+       } else if (curval != uval) {
+               /*
+                * If a unconditional UNLOCK_PI operation (user space did not
+                * try the TID->0 transition) raced with a waiter setting the
+                * FUTEX_WAITERS flag between get_user() and locking the hash
+                * bucket lock, retry the operation.
+                */
+               if ((FUTEX_TID_MASK & curval) == uval)
+                       ret = -EAGAIN;
+               else
+                       ret = -EINVAL;
+       }
        if (ret) {
                raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
                return ret;
@@ -1442,8 +1535,8 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
        if (likely(&hb1->chain != &hb2->chain)) {
                plist_del(&q->list, &hb1->chain);
                hb_waiters_dec(hb1);
-               plist_add(&q->list, &hb2->chain);
                hb_waiters_inc(hb2);
+               plist_add(&q->list, &hb2->chain);
                q->lock_ptr = &hb2->lock;
        }
        get_futex_key_refs(key2);
@@ -1864,7 +1957,7 @@ static inline struct futex_hash_bucket *queue_lock(struct futex_q *q)
 
        q->lock_ptr = &hb->lock;
 
-       spin_lock(&hb->lock); /* implies MB (A) */
+       spin_lock(&hb->lock); /* implies smp_mb(); (A) */
        return hb;
 }
 
@@ -1927,8 +2020,12 @@ static int unqueue_me(struct futex_q *q)
 
        /* In the common case we don't take the spinlock, which is nice. */
 retry:
-       lock_ptr = q->lock_ptr;
-       barrier();
+       /*
+        * q->lock_ptr can change between this read and the following spin_lock.
+        * Use READ_ONCE to forbid the compiler from reloading q->lock_ptr and
+        * optimizing lock_ptr out of the logic below.
+        */
+       lock_ptr = READ_ONCE(q->lock_ptr);
        if (lock_ptr != NULL) {
                spin_lock(lock_ptr);
                /*
@@ -2535,6 +2632,15 @@ retry:
                 */
                if (ret == -EFAULT)
                        goto pi_faulted;
+               /*
+                * A unconditional UNLOCK_PI op raced against a waiter
+                * setting the FUTEX_WAITERS bit. Try again.
+                */
+               if (ret == -EAGAIN) {
+                       spin_unlock(&hb->lock);
+                       put_futex_key(&key);
+                       goto retry;
+               }
                /*
                 * wake_futex_pi has detected invalid state. Tell user
                 * space.