bcachefs: key cache can now allocate from pending
authorKent Overstreet <kent.overstreet@linux.dev>
Thu, 13 Jun 2024 19:35:47 +0000 (15:35 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Mon, 9 Sep 2024 13:41:47 +0000 (09:41 -0400)
btree_trans objects can hold the btree_trans_barrier srcu read lock for
an extended amount of time (they shouldn't, but it's difficult to
guarantee).

the srcu barrier blocks memory reclaim, so to avoid too many stranded
key cache items, this uses the new pending_rcu_items to allocate from
pending items - like we did before, but now without a global lock on the
key cache.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/btree_key_cache.c
fs/bcachefs/btree_key_cache_types.h
fs/bcachefs/btree_types.h

index dfaeb0810c5e4332e6ce5f0a1ecbc98d8c49dc04..3048adde9284e7d79fa6776dfa8d4227738524ed 100644 (file)
@@ -92,18 +92,18 @@ static bool bkey_cached_evict(struct btree_key_cache *c,
        return ret;
 }
 
-static void __bkey_cached_free(struct rcu_head *rcu)
+static void __bkey_cached_free(struct rcu_pending *pending, struct rcu_head *rcu)
 {
+       struct bch_fs *c = container_of(pending->srcu, struct bch_fs, btree_trans_barrier);
        struct bkey_cached *ck = container_of(rcu, struct bkey_cached, rcu);
 
+       this_cpu_dec(*c->btree_key_cache.nr_pending);
        kmem_cache_free(bch2_key_cache, ck);
 }
 
 static void bkey_cached_free(struct btree_key_cache *bc,
                             struct bkey_cached *ck)
 {
-       struct bch_fs *c = container_of(bc, struct bch_fs, btree_key_cache);
-
        kfree(ck->k);
        ck->k           = NULL;
        ck->u64s        = 0;
@@ -111,7 +111,9 @@ static void bkey_cached_free(struct btree_key_cache *bc,
        six_unlock_write(&ck->c.lock);
        six_unlock_intent(&ck->c.lock);
 
-       call_srcu(&c->btree_trans_barrier, &ck->rcu, __bkey_cached_free);
+       bool pcpu_readers = ck->c.lock.readers != NULL;
+       rcu_pending_enqueue(&bc->pending[pcpu_readers], &ck->rcu);
+       this_cpu_inc(*bc->nr_pending);
 }
 
 static struct bkey_cached *__bkey_cached_alloc(unsigned key_u64s, gfp_t gfp)
@@ -131,10 +133,18 @@ static struct bkey_cached *__bkey_cached_alloc(unsigned key_u64s, gfp_t gfp)
 static struct bkey_cached *
 bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path, unsigned key_u64s)
 {
+       struct bch_fs *c = trans->c;
+       struct btree_key_cache *bc = &c->btree_key_cache;
        bool pcpu_readers = btree_uses_pcpu_readers(path->btree_id);
        int ret;
 
-       struct bkey_cached *ck = allocate_dropping_locks(trans, ret,
+       struct bkey_cached *ck = container_of_or_null(
+                               rcu_pending_dequeue(&bc->pending[pcpu_readers]),
+                               struct bkey_cached, rcu);
+       if (ck)
+               goto lock;
+
+       ck = allocate_dropping_locks(trans, ret,
                                     __bkey_cached_alloc(key_u64s, _gfp));
        if (ret) {
                if (ck)
@@ -143,14 +153,19 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path, unsigned k
                return ERR_PTR(ret);
        }
 
-       if (!ck)
-               return NULL;
-
-       bch2_btree_lock_init(&ck->c, pcpu_readers ? SIX_LOCK_INIT_PCPU : 0);
+       if (ck) {
+               bch2_btree_lock_init(&ck->c, pcpu_readers ? SIX_LOCK_INIT_PCPU : 0);
+               ck->c.cached = true;
+               goto lock;
+       }
 
-       ck->c.cached = true;
-       BUG_ON(!six_trylock_intent(&ck->c.lock));
-       BUG_ON(!six_trylock_write(&ck->c.lock));
+       ck = container_of_or_null(rcu_pending_dequeue_from_all(&bc->pending[pcpu_readers]),
+                                 struct bkey_cached, rcu);
+       if (ck)
+               goto lock;
+lock:
+       six_lock_intent(&ck->c.lock, NULL, NULL);
+       six_lock_write(&ck->c.lock, NULL, NULL);
        return ck;
 }
 
@@ -720,6 +735,11 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc)
 
        if (bc->table_init_done)
                rhashtable_destroy(&bc->table);
+
+       rcu_pending_exit(&bc->pending[0]);
+       rcu_pending_exit(&bc->pending[1]);
+
+       free_percpu(bc->nr_pending);
 }
 
 void bch2_fs_btree_key_cache_init_early(struct btree_key_cache *c)
@@ -731,6 +751,14 @@ int bch2_fs_btree_key_cache_init(struct btree_key_cache *bc)
        struct bch_fs *c = container_of(bc, struct bch_fs, btree_key_cache);
        struct shrinker *shrink;
 
+       bc->nr_pending = alloc_percpu(size_t);
+       if (!bc->nr_pending)
+               return -BCH_ERR_ENOMEM_fs_btree_cache_init;
+
+       if (rcu_pending_init(&bc->pending[0], &c->btree_trans_barrier, __bkey_cached_free) ||
+           rcu_pending_init(&bc->pending[1], &c->btree_trans_barrier, __bkey_cached_free))
+               return -BCH_ERR_ENOMEM_fs_btree_cache_init;
+
        if (rhashtable_init(&bc->table, &bch2_btree_key_cache_params))
                return -BCH_ERR_ENOMEM_fs_btree_cache_init;
 
@@ -757,13 +785,15 @@ void bch2_btree_key_cache_to_text(struct printbuf *out, struct btree_key_cache *
        prt_printf(out, "keys:\t%lu\r\n",               atomic_long_read(&bc->nr_keys));
        prt_printf(out, "dirty:\t%lu\r\n",              atomic_long_read(&bc->nr_dirty));
        prt_printf(out, "table size:\t%u\r\n",          bc->table.tbl->size);
-
-       prt_printf(out, "\nshrinker:\n");
+       prt_newline(out);
+       prt_printf(out, "shrinker:\n");
        prt_printf(out, "requested_to_free:\t%lu\r\n",  bc->requested_to_free);
        prt_printf(out, "freed:\t%lu\r\n",              bc->freed);
        prt_printf(out, "skipped_dirty:\t%lu\r\n",      bc->skipped_dirty);
        prt_printf(out, "skipped_accessed:\t%lu\r\n",   bc->skipped_accessed);
        prt_printf(out, "skipped_lock_fail:\t%lu\r\n",  bc->skipped_lock_fail);
+       prt_newline(out);
+       prt_printf(out, "pending:\t%lu\r\n",            per_cpu_sum(bc->nr_pending));
 }
 
 void bch2_btree_key_cache_exit(void)
index e026c65f54e150943e4cdaf2d0bdf9e906ce154b..722f1ed1055152b930fa22d41162e8405ac3288c 100644 (file)
@@ -2,6 +2,8 @@
 #ifndef _BCACHEFS_BTREE_KEY_CACHE_TYPES_H
 #define _BCACHEFS_BTREE_KEY_CACHE_TYPES_H
 
+#include "rcu_pending.h"
+
 struct btree_key_cache {
        struct rhashtable       table;
        bool                    table_init_done;
@@ -9,6 +11,10 @@ struct btree_key_cache {
        struct shrinker         *shrink;
        unsigned                shrink_iter;
 
+       /* 0: non pcpu reader locks, 1: pcpu reader locks */
+       struct rcu_pending      pending[2];
+       size_t __percpu         *nr_pending;
+
        atomic_long_t           nr_keys;
        atomic_long_t           nr_dirty;
 
index bca56b6359e771c2bada2bcb336c927b02783a3b..0df07929c545bb62272373fc6de8a25d94fbd498 100644 (file)
@@ -395,7 +395,6 @@ struct bkey_cached {
        u64                     seq;
 
        struct bkey_i           *k;
-
        struct rcu_head         rcu;
 };