#include <linux/fault-inject.h>
#include <linux/slab.h>
#include <linux/prctl.h>
+#include <linux/rcuref.h>
#include "futex.h"
#include "../locking/rtmutex_common.h"
#define futex_hashmask (__futex_data.hashmask)
struct futex_private_hash {
+ rcuref_t users;
unsigned int hash_mask;
+ struct rcu_head rcu;
void *mm;
bool custom;
struct futex_hash_bucket queues[];
bool futex_private_hash_get(struct futex_private_hash *fph)
{
- return false;
+ return rcuref_get(&fph->users);
}
void futex_private_hash_put(struct futex_private_hash *fph)
{
+ /* Ignore return value, last put is verified via rcuref_is_dead() */
+ if (rcuref_put(&fph->users))
+ wake_up_var(fph->mm);
}
/**
* Obtain an additional reference for the already obtained hash bucket. The
* caller must already own an reference.
*/
-void futex_hash_get(struct futex_hash_bucket *hb) { }
-void futex_hash_put(struct futex_hash_bucket *hb) { }
+void futex_hash_get(struct futex_hash_bucket *hb)
+{
+ struct futex_private_hash *fph = hb->priv;
+
+ if (!fph)
+ return;
+ WARN_ON_ONCE(!futex_private_hash_get(fph));
+}
+
+void futex_hash_put(struct futex_hash_bucket *hb)
+{
+ struct futex_private_hash *fph = hb->priv;
+
+ if (!fph)
+ return;
+ futex_private_hash_put(fph);
+}
static struct futex_hash_bucket *
__futex_hash_private(union futex_key *key, struct futex_private_hash *fph)
return NULL;
if (!fph)
- fph = key->private.mm->futex_phash;
+ fph = rcu_dereference(key->private.mm->futex_phash);
if (!fph || !fph->hash_mask)
return NULL;
return &fph->queues[hash & fph->hash_mask];
}
+static void futex_rehash_private(struct futex_private_hash *old,
+ struct futex_private_hash *new)
+{
+ struct futex_hash_bucket *hb_old, *hb_new;
+ unsigned int slots = old->hash_mask + 1;
+ unsigned int i;
+
+ for (i = 0; i < slots; i++) {
+ struct futex_q *this, *tmp;
+
+ hb_old = &old->queues[i];
+
+ spin_lock(&hb_old->lock);
+ plist_for_each_entry_safe(this, tmp, &hb_old->chain, list) {
+
+ plist_del(&this->list, &hb_old->chain);
+ futex_hb_waiters_dec(hb_old);
+
+ WARN_ON_ONCE(this->lock_ptr != &hb_old->lock);
+
+ hb_new = __futex_hash(&this->key, new);
+ futex_hb_waiters_inc(hb_new);
+ /*
+ * The new pointer isn't published yet but an already
+ * moved user can be unqueued due to timeout or signal.
+ */
+ spin_lock_nested(&hb_new->lock, SINGLE_DEPTH_NESTING);
+ plist_add(&this->list, &hb_new->chain);
+ this->lock_ptr = &hb_new->lock;
+ spin_unlock(&hb_new->lock);
+ }
+ spin_unlock(&hb_old->lock);
+ }
+}
+
+static bool __futex_pivot_hash(struct mm_struct *mm,
+ struct futex_private_hash *new)
+{
+ struct futex_private_hash *fph;
+
+ WARN_ON_ONCE(mm->futex_phash_new);
+
+ fph = rcu_dereference_protected(mm->futex_phash,
+ lockdep_is_held(&mm->futex_hash_lock));
+ if (fph) {
+ if (!rcuref_is_dead(&fph->users)) {
+ mm->futex_phash_new = new;
+ return false;
+ }
+
+ futex_rehash_private(fph, new);
+ }
+ rcu_assign_pointer(mm->futex_phash, new);
+ kvfree_rcu(fph, rcu);
+ return true;
+}
+
+static void futex_pivot_hash(struct mm_struct *mm)
+{
+ scoped_guard(mutex, &mm->futex_hash_lock) {
+ struct futex_private_hash *fph;
+
+ fph = mm->futex_phash_new;
+ if (fph) {
+ mm->futex_phash_new = NULL;
+ __futex_pivot_hash(mm, fph);
+ }
+ }
+}
+
struct futex_private_hash *futex_private_hash(void)
{
struct mm_struct *mm = current->mm;
- struct futex_private_hash *fph;
+ /*
+ * Ideally we don't loop. If there is a replacement in progress
+ * then a new private hash is already prepared and a reference can't be
+ * obtained once the last user dropped it's.
+ * In that case we block on mm_struct::futex_hash_lock and either have
+ * to perform the replacement or wait while someone else is doing the
+ * job. Eitherway, on the second iteration we acquire a reference on the
+ * new private hash or loop again because a new replacement has been
+ * requested.
+ */
+again:
+ scoped_guard(rcu) {
+ struct futex_private_hash *fph;
- fph = mm->futex_phash;
- return fph;
+ fph = rcu_dereference(mm->futex_phash);
+ if (!fph)
+ return NULL;
+
+ if (rcuref_get(&fph->users))
+ return fph;
+ }
+ futex_pivot_hash(mm);
+ goto again;
}
struct futex_hash_bucket *futex_hash(union futex_key *key)
{
+ struct futex_private_hash *fph;
struct futex_hash_bucket *hb;
- hb = __futex_hash(key, NULL);
- return hb;
+again:
+ scoped_guard(rcu) {
+ hb = __futex_hash(key, NULL);
+ fph = hb->priv;
+
+ if (!fph || futex_private_hash_get(fph))
+ return hb;
+ }
+ futex_pivot_hash(key->private.mm);
+ goto again;
}
#else /* !CONFIG_FUTEX_PRIVATE_HASH */
spinlock_t *lock_ptr;
int ret = 0;
+ /* RCU so lock_ptr is not going away during locking. */
+ guard(rcu)();
/* In the common case we don't take the spinlock, which is nice. */
retry:
/*
struct futex_pi_state *pi_state;
union futex_key key = FUTEX_KEY_INIT;
+ /*
+ * The mutex mm_struct::futex_hash_lock might be acquired.
+ */
+ might_sleep();
/*
* Ensure the hash remains stable (no resize) during the while loop
* below. The hb pointer is acquired under the pi_lock so we can't block
#ifdef CONFIG_FUTEX_PRIVATE_HASH
void futex_hash_free(struct mm_struct *mm)
{
- kvfree(mm->futex_phash);
+ struct futex_private_hash *fph;
+
+ kvfree(mm->futex_phash_new);
+ fph = rcu_dereference_raw(mm->futex_phash);
+ if (fph) {
+ WARN_ON_ONCE(rcuref_read(&fph->users) > 1);
+ kvfree(fph);
+ }
+}
+
+static bool futex_pivot_pending(struct mm_struct *mm)
+{
+ struct futex_private_hash *fph;
+
+ guard(rcu)();
+
+ if (!mm->futex_phash_new)
+ return true;
+
+ fph = rcu_dereference(mm->futex_phash);
+ return rcuref_is_dead(&fph->users);
+}
+
+static bool futex_hash_less(struct futex_private_hash *a,
+ struct futex_private_hash *b)
+{
+ /* user provided always wins */
+ if (!a->custom && b->custom)
+ return true;
+ if (a->custom && !b->custom)
+ return false;
+
+ /* zero-sized hash wins */
+ if (!b->hash_mask)
+ return true;
+ if (!a->hash_mask)
+ return false;
+
+ /* keep the biggest */
+ if (a->hash_mask < b->hash_mask)
+ return true;
+ if (a->hash_mask > b->hash_mask)
+ return false;
+
+ return false; /* equal */
}
static int futex_hash_allocate(unsigned int hash_slots, bool custom)
if (hash_slots && (hash_slots == 1 || !is_power_of_2(hash_slots)))
return -EINVAL;
- if (mm->futex_phash)
- return -EALREADY;
-
- if (!thread_group_empty(current))
- return -EINVAL;
+ /*
+ * Once we've disabled the global hash there is no way back.
+ */
+ scoped_guard(rcu) {
+ fph = rcu_dereference(mm->futex_phash);
+ if (fph && !fph->hash_mask) {
+ if (custom)
+ return -EBUSY;
+ return 0;
+ }
+ }
fph = kvzalloc(struct_size(fph, queues, hash_slots), GFP_KERNEL_ACCOUNT | __GFP_NOWARN);
if (!fph)
return -ENOMEM;
+ rcuref_init(&fph->users, 1);
fph->hash_mask = hash_slots ? hash_slots - 1 : 0;
fph->custom = custom;
fph->mm = mm;
for (i = 0; i < hash_slots; i++)
futex_hash_bucket_init(&fph->queues[i], fph);
- mm->futex_phash = fph;
+ if (custom) {
+ /*
+ * Only let prctl() wait / retry; don't unduly delay clone().
+ */
+again:
+ wait_var_event(mm, futex_pivot_pending(mm));
+ }
+
+ scoped_guard(mutex, &mm->futex_hash_lock) {
+ struct futex_private_hash *free __free(kvfree) = NULL;
+ struct futex_private_hash *cur, *new;
+
+ cur = rcu_dereference_protected(mm->futex_phash,
+ lockdep_is_held(&mm->futex_hash_lock));
+ new = mm->futex_phash_new;
+ mm->futex_phash_new = NULL;
+
+ if (fph) {
+ if (cur && !new) {
+ /*
+ * If we have an existing hash, but do not yet have
+ * allocated a replacement hash, drop the initial
+ * reference on the existing hash.
+ */
+ futex_private_hash_put(cur);
+ }
+
+ if (new) {
+ /*
+ * Two updates raced; throw out the lesser one.
+ */
+ if (futex_hash_less(new, fph)) {
+ free = new;
+ new = fph;
+ } else {
+ free = fph;
+ }
+ } else {
+ new = fph;
+ }
+ fph = NULL;
+ }
+
+ if (new) {
+ /*
+ * Will set mm->futex_phash_new on failure;
+ * futex_private_hash_get() will try again.
+ */
+ if (!__futex_pivot_hash(mm, new) && custom)
+ goto again;
+ }
+ }
return 0;
}
int futex_hash_allocate_default(void)
{
+ unsigned int threads, buckets, current_buckets = 0;
+ struct futex_private_hash *fph;
+
if (!current->mm)
return 0;
- if (current->mm->futex_phash)
+ scoped_guard(rcu) {
+ threads = min_t(unsigned int,
+ get_nr_threads(current),
+ num_online_cpus());
+
+ fph = rcu_dereference(current->mm->futex_phash);
+ if (fph) {
+ if (fph->custom)
+ return 0;
+
+ current_buckets = fph->hash_mask + 1;
+ }
+ }
+
+ /*
+ * The default allocation will remain within
+ * 16 <= threads * 4 <= global hash size
+ */
+ buckets = roundup_pow_of_two(4 * threads);
+ buckets = clamp(buckets, 16, futex_hashmask + 1);
+
+ if (current_buckets >= buckets)
return 0;
- return futex_hash_allocate(16, false);
+ return futex_hash_allocate(buckets, false);
}
static int futex_hash_get_slots(void)
{
struct futex_private_hash *fph;
- fph = current->mm->futex_phash;
+ guard(rcu)();
+ fph = rcu_dereference(current->mm->futex_phash);
if (fph && fph->hash_mask)
return fph->hash_mask + 1;
return 0;