bcachefs: don't use rht_bucket() in btree_key_cache_scan()
authorKent Overstreet <kent.overstreet@linux.dev>
Mon, 19 Aug 2024 20:41:00 +0000 (16:41 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Thu, 22 Aug 2024 14:04:41 +0000 (10:04 -0400)
rht_bucket() does strange complicated things when a rehash is in
progress.

Instead, just skip scanning when a rehash is in progress: scanning is
going to be more expensive (many more empty slots to cover), and some
sort of infinite loop is being observed

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/btree_key_cache.c

index 9b3ec2a3b8cecfd31427c5ff17ac18ce82897746..fda7998734cbc1acd00e0c4a171ded929c9f8fda 100644 (file)
@@ -778,6 +778,20 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
 
        rcu_read_lock();
        tbl = rht_dereference_rcu(bc->table.tbl, &bc->table);
+
+       /*
+        * Scanning is expensive while a rehash is in progress - most elements
+        * will be on the new hashtable, if it's in progress
+        *
+        * A rehash could still start while we're scanning - that's ok, we'll
+        * still see most elements.
+        */
+       if (unlikely(tbl->nest)) {
+               rcu_read_unlock();
+               srcu_read_unlock(&c->btree_trans_barrier, srcu_idx);
+               return SHRINK_STOP;
+       }
+
        if (bc->shrink_iter >= tbl->size)
                bc->shrink_iter = 0;
        start = bc->shrink_iter;
@@ -785,7 +799,7 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
        do {
                struct rhash_head *pos, *next;
 
-               pos = rht_ptr_rcu(rht_bucket(tbl, bc->shrink_iter));
+               pos = rht_ptr_rcu(&tbl->buckets[bc->shrink_iter]);
 
                while (!rht_is_a_nulls(pos)) {
                        next = rht_dereference_bucket_rcu(pos->next, tbl, bc->shrink_iter);
@@ -866,12 +880,22 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc)
        while (atomic_long_read(&bc->nr_keys)) {
                rcu_read_lock();
                tbl = rht_dereference_rcu(bc->table.tbl, &bc->table);
-               if (tbl)
+               if (tbl) {
+                       if (tbl->nest) {
+                               /* wait for in progress rehash */
+                               rcu_read_unlock();
+                               mutex_lock(&bc->table.mutex);
+                               mutex_unlock(&bc->table.mutex);
+                               rcu_read_lock();
+                               continue;
+                       }
                        for (i = 0; i < tbl->size; i++)
-                               rht_for_each_entry_rcu(ck, pos, tbl, i, hash) {
+                               while (pos = rht_ptr_rcu(&tbl->buckets[i]), !rht_is_a_nulls(pos)) {
+                                       ck = container_of(pos, struct bkey_cached, hash);
                                        bkey_cached_evict(bc, ck);
                                        list_add(&ck->list, &items);
                                }
+               }
                rcu_read_unlock();
        }