bcachefs: Run btree key cache shrinker less aggressively
authorKent Overstreet <kent.overstreet@linux.dev>
Mon, 6 Nov 2023 14:53:14 +0000 (09:53 -0500)
committerKent Overstreet <kent.overstreet@linux.dev>
Tue, 14 Nov 2023 02:45:01 +0000 (21:45 -0500)
The btree key cache maintains lists of items that have been freed, but
can't yet be reclaimed because a bch2_trans_relock() call might find
them - we're waiting for SRCU readers to release.

Previously, we wouldn't count these items against the number we're
attempting to scan for, which would mean we'd evict more live key cache
entries - doing quite a bit of potentially unecessary work.

With recent work to make sure we don't hold SRCU locks for too long, it
should be safe to count all the items on the freelists against number to
scan - even if we can't reclaim them yet, we will be able to soon.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/btree_key_cache.c
fs/bcachefs/btree_key_cache_types.h

index 9b78f78a75b59c0cb28dac46fc4e107f0a9cbca1..b3305a04d808640a2eb1cf735e9d7d970104c4b7 100644 (file)
@@ -89,10 +89,13 @@ static void bkey_cached_free(struct btree_key_cache *bc,
        ck->btree_trans_barrier_seq =
                start_poll_synchronize_srcu(&c->btree_trans_barrier);
 
-       if (ck->c.lock.readers)
+       if (ck->c.lock.readers) {
                list_move_tail(&ck->list, &bc->freed_pcpu);
-       else
+               bc->nr_freed_pcpu++;
+       } else {
                list_move_tail(&ck->list, &bc->freed_nonpcpu);
+               bc->nr_freed_nonpcpu++;
+       }
        atomic_long_inc(&bc->nr_freed);
 
        kfree(ck->k);
@@ -109,6 +112,8 @@ static void __bkey_cached_move_to_freelist_ordered(struct btree_key_cache *bc,
 {
        struct bkey_cached *pos;
 
+       bc->nr_freed_nonpcpu++;
+
        list_for_each_entry_reverse(pos, &bc->freed_nonpcpu, list) {
                if (ULONG_CMP_GE(ck->btree_trans_barrier_seq,
                                 pos->btree_trans_barrier_seq)) {
@@ -158,6 +163,7 @@ static void bkey_cached_move_to_freelist(struct btree_key_cache *bc,
 #else
                mutex_lock(&bc->lock);
                list_move_tail(&ck->list, &bc->freed_nonpcpu);
+               bc->nr_freed_nonpcpu++;
                mutex_unlock(&bc->lock);
 #endif
        } else {
@@ -217,6 +223,7 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path,
                               f->nr < ARRAY_SIZE(f->objs) / 2) {
                                ck = list_last_entry(&bc->freed_nonpcpu, struct bkey_cached, list);
                                list_del_init(&ck->list);
+                               bc->nr_freed_nonpcpu--;
                                f->objs[f->nr++] = ck;
                        }
 
@@ -229,6 +236,7 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path,
                if (!list_empty(&bc->freed_nonpcpu)) {
                        ck = list_last_entry(&bc->freed_nonpcpu, struct bkey_cached, list);
                        list_del_init(&ck->list);
+                       bc->nr_freed_nonpcpu--;
                }
                mutex_unlock(&bc->lock);
 #endif
@@ -850,6 +858,8 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
         * Newest freed entries are at the end of the list - once we hit one
         * that's too new to be freed, we can bail out:
         */
+       scanned += bc->nr_freed_nonpcpu;
+
        list_for_each_entry_safe(ck, t, &bc->freed_nonpcpu, list) {
                if (!poll_state_synchronize_srcu(&c->btree_trans_barrier,
                                                 ck->btree_trans_barrier_seq))
@@ -859,13 +869,15 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
                six_lock_exit(&ck->c.lock);
                kmem_cache_free(bch2_key_cache, ck);
                atomic_long_dec(&bc->nr_freed);
-               scanned++;
                freed++;
+               bc->nr_freed_nonpcpu--;
        }
 
        if (scanned >= nr)
                goto out;
 
+       scanned += bc->nr_freed_pcpu;
+
        list_for_each_entry_safe(ck, t, &bc->freed_pcpu, list) {
                if (!poll_state_synchronize_srcu(&c->btree_trans_barrier,
                                                 ck->btree_trans_barrier_seq))
@@ -875,8 +887,8 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
                six_lock_exit(&ck->c.lock);
                kmem_cache_free(bch2_key_cache, ck);
                atomic_long_dec(&bc->nr_freed);
-               scanned++;
                freed++;
+               bc->nr_freed_pcpu--;
        }
 
        if (scanned >= nr)
@@ -982,6 +994,9 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc)
        }
 #endif
 
+       BUG_ON(list_count_nodes(&bc->freed_pcpu) != bc->nr_freed_pcpu);
+       BUG_ON(list_count_nodes(&bc->freed_nonpcpu) != bc->nr_freed_nonpcpu);
+
        list_splice(&bc->freed_pcpu,    &items);
        list_splice(&bc->freed_nonpcpu, &items);
 
index 0f967808d766d76f5e9cc558cc4c8d16ce20f8d7..290e4e57df5bbcfeffb38d666aa18c89a7c1c5a6 100644 (file)
@@ -11,8 +11,12 @@ struct btree_key_cache {
        struct mutex            lock;
        struct rhashtable       table;
        bool                    table_init_done;
+
        struct list_head        freed_pcpu;
+       size_t                  nr_freed_pcpu;
        struct list_head        freed_nonpcpu;
+       size_t                  nr_freed_nonpcpu;
+
        struct shrinker         *shrink;
        unsigned                shrink_iter;
        struct btree_key_cache_freelist __percpu *pcpu_freed;