bcachefs: add counters for failed shrinker reclaim
authorDaniel Hill <daniel@gluo.nz>
Fri, 30 Sep 2022 03:37:15 +0000 (16:37 +1300)
committerKent Overstreet <kent.overstreet@linux.dev>
Thu, 9 May 2024 20:24:29 +0000 (16:24 -0400)
This adds distinct counters for every reason the btree node shrinker can
fail to free an object - if our shrinker isn't making progress, this
will tell us why.

Signed-off-by: Daniel Hill <daniel@gluo.nz>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/btree_cache.c
fs/bcachefs/btree_cache.h
fs/bcachefs/btree_types.h
fs/bcachefs/sysfs.c

index 7dd35d6224d94445df935363daca40fe22e3d70a..9e4ed75d3675620736c83dd9c02c86b663117e21 100644 (file)
 #include <linux/prefetch.h>
 #include <linux/sched/mm.h>
 
+#define BTREE_CACHE_NOT_FREED_INCREMENT(counter) \
+do {                                            \
+       if (shrinker_counter)                    \
+               bc->not_freed_##counter++;       \
+} while (0)
+
 const char * const bch2_btree_node_flags[] = {
 #define x(f)   #f,
        BTREE_FLAGS()
@@ -238,7 +244,7 @@ static inline struct btree *btree_cache_find(struct btree_cache *bc,
  * this version is for btree nodes that have already been freed (we're not
  * reaping a real btree node)
  */
-static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush)
+static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush, bool shrinker_counter)
 {
        struct btree_cache *bc = &c->btree_cache;
        int ret = 0;
@@ -260,38 +266,64 @@ wait_on_io:
        if (b->flags & ((1U << BTREE_NODE_dirty)|
                        (1U << BTREE_NODE_read_in_flight)|
                        (1U << BTREE_NODE_write_in_flight))) {
-               if (!flush)
+               if (!flush) {
+                       if (btree_node_dirty(b))
+                               BTREE_CACHE_NOT_FREED_INCREMENT(dirty);
+                       else if (btree_node_read_in_flight(b))
+                               BTREE_CACHE_NOT_FREED_INCREMENT(read_in_flight);
+                       else if (btree_node_write_in_flight(b))
+                               BTREE_CACHE_NOT_FREED_INCREMENT(write_in_flight);
                        return -BCH_ERR_ENOMEM_btree_node_reclaim;
+               }
 
                /* XXX: waiting on IO with btree cache lock held */
                bch2_btree_node_wait_on_read(b);
                bch2_btree_node_wait_on_write(b);
        }
 
-       if (!six_trylock_intent(&b->c.lock))
+       if (!six_trylock_intent(&b->c.lock)) {
+               BTREE_CACHE_NOT_FREED_INCREMENT(lock_intent);
                return -BCH_ERR_ENOMEM_btree_node_reclaim;
+       }
 
-       if (!six_trylock_write(&b->c.lock))
+       if (!six_trylock_write(&b->c.lock)) {
+               BTREE_CACHE_NOT_FREED_INCREMENT(lock_write);
                goto out_unlock_intent;
+       }
 
        /* recheck under lock */
        if (b->flags & ((1U << BTREE_NODE_read_in_flight)|
                        (1U << BTREE_NODE_write_in_flight))) {
-               if (!flush)
+               if (!flush) {
+                       if (btree_node_read_in_flight(b))
+                               BTREE_CACHE_NOT_FREED_INCREMENT(read_in_flight);
+                       else if (btree_node_write_in_flight(b))
+                               BTREE_CACHE_NOT_FREED_INCREMENT(write_in_flight);
                        goto out_unlock;
+               }
                six_unlock_write(&b->c.lock);
                six_unlock_intent(&b->c.lock);
                goto wait_on_io;
        }
 
-       if (btree_node_noevict(b) ||
-           btree_node_write_blocked(b) ||
-           btree_node_will_make_reachable(b))
+       if (btree_node_noevict(b)) {
+               BTREE_CACHE_NOT_FREED_INCREMENT(noevict);
+               goto out_unlock;
+       }
+       if (btree_node_write_blocked(b)) {
+               BTREE_CACHE_NOT_FREED_INCREMENT(write_blocked);
                goto out_unlock;
+       }
+       if (btree_node_will_make_reachable(b)) {
+               BTREE_CACHE_NOT_FREED_INCREMENT(will_make_reachable);
+               goto out_unlock;
+       }
 
        if (btree_node_dirty(b)) {
-               if (!flush)
+               if (!flush) {
+                       BTREE_CACHE_NOT_FREED_INCREMENT(dirty);
                        goto out_unlock;
+               }
                /*
                 * Using the underscore version because we don't want to compact
                 * bsets after the write, since this node is about to be evicted
@@ -321,14 +353,14 @@ out_unlock_intent:
        goto out;
 }
 
-static int btree_node_reclaim(struct bch_fs *c, struct btree *b)
+static int btree_node_reclaim(struct bch_fs *c, struct btree *b, bool shrinker_counter)
 {
-       return __btree_node_reclaim(c, b, false);
+       return __btree_node_reclaim(c, b, false, shrinker_counter);
 }
 
 static int btree_node_write_and_reclaim(struct bch_fs *c, struct btree *b)
 {
-       return __btree_node_reclaim(c, b, true);
+       return __btree_node_reclaim(c, b, true, false);
 }
 
 static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
@@ -376,11 +408,12 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
                if (touched >= nr)
                        goto out;
 
-               if (!btree_node_reclaim(c, b)) {
+               if (!btree_node_reclaim(c, b, true)) {
                        btree_node_data_free(c, b);
                        six_unlock_write(&b->c.lock);
                        six_unlock_intent(&b->c.lock);
                        freed++;
+                       bc->freed++;
                }
        }
 restart:
@@ -389,9 +422,11 @@ restart:
 
                if (btree_node_accessed(b)) {
                        clear_btree_node_accessed(b);
-               } else if (!btree_node_reclaim(c, b)) {
+                       bc->not_freed_access_bit++;
+               } else if (!btree_node_reclaim(c, b, true)) {
                        freed++;
                        btree_node_data_free(c, b);
+                       bc->freed++;
 
                        bch2_btree_node_hash_remove(bc, b);
                        six_unlock_write(&b->c.lock);
@@ -599,7 +634,7 @@ static struct btree *btree_node_cannibalize(struct bch_fs *c)
        struct btree *b;
 
        list_for_each_entry_reverse(b, &bc->live, list)
-               if (!btree_node_reclaim(c, b))
+               if (!btree_node_reclaim(c, b, false))
                        return b;
 
        while (1) {
@@ -635,7 +670,7 @@ struct btree *bch2_btree_node_mem_alloc(struct btree_trans *trans, bool pcpu_rea
         * disk node. Check the freed list before allocating a new one:
         */
        list_for_each_entry(b, freed, list)
-               if (!btree_node_reclaim(c, b)) {
+               if (!btree_node_reclaim(c, b, false)) {
                        list_del_init(&b->list);
                        goto got_node;
                }
@@ -661,7 +696,7 @@ got_node:
         * the list. Check if there's any freed nodes there:
         */
        list_for_each_entry(b2, &bc->freeable, list)
-               if (!btree_node_reclaim(c, b2)) {
+               if (!btree_node_reclaim(c, b2, false)) {
                        swap(b->data, b2->data);
                        swap(b->aux_data, b2->aux_data);
                        btree_node_to_freedlist(bc, b2);
@@ -1280,12 +1315,12 @@ static void prt_btree_cache_line(struct printbuf *out, const struct bch_fs *c,
        prt_printf(out, " (%u)\n", nr);
 }
 
-void bch2_btree_cache_to_text(struct printbuf *out, const struct bch_fs *c)
+void bch2_btree_cache_to_text(struct printbuf *out, const struct btree_cache *bc)
 {
-       const struct btree_cache *bc = &c->btree_cache;
+       struct bch_fs *c = container_of(bc, struct bch_fs, btree_cache);
 
        if (!out->nr_tabstops)
-               printbuf_tabstop_push(out, 24);
+               printbuf_tabstop_push(out, 32);
 
        prt_btree_cache_line(out, c, "total:",          bc->used);
        prt_btree_cache_line(out, c, "nr dirty:",       atomic_read(&bc->dirty));
@@ -1294,4 +1329,17 @@ void bch2_btree_cache_to_text(struct printbuf *out, const struct bch_fs *c)
 
        for (unsigned i = 0; i < ARRAY_SIZE(bc->used_by_btree); i++)
                prt_btree_cache_line(out, c, bch2_btree_id_str(i), bc->used_by_btree[i]);
+
+       prt_newline(out);
+       prt_printf(out, "freed:\t%u\n", bc->freed);
+       prt_printf(out, "not freed:\n");
+       prt_printf(out, "  dirty\t%u\n", bc->not_freed_dirty);
+       prt_printf(out, "  write in flight\t%u\n", bc->not_freed_write_in_flight);
+       prt_printf(out, "  read in flight\t%u\n", bc->not_freed_read_in_flight);
+       prt_printf(out, "  lock intent failed\t%u\n", bc->not_freed_lock_intent);
+       prt_printf(out, "  lock write failed\t%u\n", bc->not_freed_lock_write);
+       prt_printf(out, "  access bit\t%u\n", bc->not_freed_access_bit);
+       prt_printf(out, "  no evict failed\t%u\n", bc->not_freed_noevict);
+       prt_printf(out, "  write blocked\t%u\n", bc->not_freed_write_blocked);
+       prt_printf(out, "  will make reachable\t%u\n", bc->not_freed_will_make_reachable);
 }
index 6fe91d1c0fd4c9714180662daec60f50dae98853..fed35de3e4de7caa5cc298a7a2edb7f93459fd4a 100644 (file)
@@ -134,6 +134,6 @@ static inline struct btree *btree_node_root(struct bch_fs *c, struct btree *b)
 const char *bch2_btree_id_str(enum btree_id);
 void bch2_btree_pos_to_text(struct printbuf *, struct bch_fs *, const struct btree *);
 void bch2_btree_node_to_text(struct printbuf *, struct bch_fs *, const struct btree *);
-void bch2_btree_cache_to_text(struct printbuf *, const struct bch_fs *);
+void bch2_btree_cache_to_text(struct printbuf *, const struct btree_cache *);
 
 #endif /* _BCACHEFS_BTREE_CACHE_H */
index 76364bd4347e36c8cab0a8cda45200e9cce5c6fd..d63db4fefe73433078634b75748f97083564985d 100644 (file)
@@ -163,6 +163,16 @@ struct btree_cache {
        /* Number of elements in live + freeable lists */
        unsigned                used;
        unsigned                reserve;
+       unsigned                freed;
+       unsigned                not_freed_lock_intent;
+       unsigned                not_freed_lock_write;
+       unsigned                not_freed_dirty;
+       unsigned                not_freed_read_in_flight;
+       unsigned                not_freed_write_in_flight;
+       unsigned                not_freed_noevict;
+       unsigned                not_freed_write_blocked;
+       unsigned                not_freed_will_make_reachable;
+       unsigned                not_freed_access_bit;
        atomic_t                dirty;
        struct shrinker         *shrink;
 
index df3d28659bfd4a216267f95286eb8fa82c36c930..93ca74d108b17e8709254d2c2b5e44763d29c3cb 100644 (file)
@@ -383,7 +383,7 @@ SHOW(bch2_fs)
                bch2_journal_debug_to_text(out, &c->journal);
 
        if (attr == &sysfs_btree_cache)
-               bch2_btree_cache_to_text(out, c);
+               bch2_btree_cache_to_text(out, &c->btree_cache);
 
        if (attr == &sysfs_btree_key_cache)
                bch2_btree_key_cache_to_text(out, &c->btree_key_cache);