bcachefs: Add accounting for dirty btree nodes/keys
authorKent Overstreet <kent.overstreet@gmail.com>
Mon, 9 Nov 2020 18:01:52 +0000 (13:01 -0500)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:08:46 +0000 (17:08 -0400)
This lets us improve journal reclaim, so that it now tries to make sure
no more than 3/4s of the btree node cache and btree key cache are dirty
- ensuring the shrinkers can free memory.

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/btree_cache.c
fs/bcachefs/btree_io.c
fs/bcachefs/btree_io.h
fs/bcachefs/btree_key_cache.c
fs/bcachefs/btree_types.h
fs/bcachefs/btree_update_interior.c
fs/bcachefs/btree_update_leaf.c
fs/bcachefs/journal_reclaim.c

index 229841c2ef0c7291bc67088ab498c81879cf7965..d130447e34779feaca9f054a70ad5b275a092aeb 100644 (file)
@@ -382,11 +382,13 @@ void bch2_fs_btree_cache_exit(struct bch_fs *c)
 
                if (btree_node_dirty(b))
                        bch2_btree_complete_write(c, b, btree_current_write(b));
-               clear_btree_node_dirty(b);
+               clear_btree_node_dirty(c, b);
 
                btree_node_data_free(c, b);
        }
 
+       BUG_ON(atomic_read(&c->btree_cache.dirty));
+
        while (!list_empty(&bc->freed)) {
                b = list_first_entry(&bc->freed, struct btree, list);
                list_del(&b->list);
index c1293709eb013dcb39ca28dcbf2dc8235c1faec6..0de703c5b4b79d981474d58586bf566a13a72cf9 100644 (file)
@@ -1498,6 +1498,8 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
                new ^=  (1 << BTREE_NODE_write_idx);
        } while (cmpxchg_acquire(&b->flags, old, new) != old);
 
+       atomic_dec(&c->btree_cache.dirty);
+
        BUG_ON(btree_node_fake(b));
        BUG_ON((b->will_make_reachable != 0) != !b->written);
 
index 626d0f071b7008d7f9f76f89df0a1bda34adb2b0..1a4b11e99cc40367457a69259860a3cff462eae6 100644 (file)
@@ -14,6 +14,23 @@ struct btree_write;
 struct btree;
 struct btree_iter;
 
+static inline bool btree_node_dirty(struct btree *b)
+{
+       return test_bit(BTREE_NODE_dirty, &b->flags);
+}
+
+static inline void set_btree_node_dirty(struct bch_fs *c, struct btree *b)
+{
+       if (!test_and_set_bit(BTREE_NODE_dirty, &b->flags))
+               atomic_inc(&c->btree_cache.dirty);
+}
+
+static inline void clear_btree_node_dirty(struct bch_fs *c, struct btree *b)
+{
+       if (test_and_clear_bit(BTREE_NODE_dirty, &b->flags))
+               atomic_dec(&c->btree_cache.dirty);
+}
+
 struct btree_read_bio {
        struct bch_fs           *c;
        u64                     start_time;
index 8b43460c9c9bef7ff933f2bd3b3afc9dd2d4f1ae..4c61324f59d440797a462dd5dd2fbe15d35b7049 100644 (file)
@@ -65,6 +65,8 @@ static void bkey_cached_evict(struct btree_key_cache *c,
        BUG_ON(rhashtable_remove_fast(&c->table, &ck->hash,
                                      bch2_btree_key_cache_params));
        memset(&ck->key, ~0, sizeof(ck->key));
+
+       c->nr_keys--;
 }
 
 static void bkey_cached_free(struct btree_key_cache *c,
@@ -135,6 +137,8 @@ btree_key_cache_create(struct btree_key_cache *c,
                return NULL;
        }
 
+       c->nr_keys++;
+
        list_move(&ck->list, &c->clean);
        six_unlock_write(&ck->c.lock);
 
@@ -355,10 +359,14 @@ err:
 
        bch2_journal_pin_drop(j, &ck->journal);
        bch2_journal_preres_put(j, &ck->res);
-       clear_bit(BKEY_CACHED_DIRTY, &ck->flags);
 
        if (!evict) {
                mutex_lock(&c->btree_key_cache.lock);
+               if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
+                       clear_bit(BKEY_CACHED_DIRTY, &ck->flags);
+                       c->btree_key_cache.nr_dirty--;
+               }
+
                list_move_tail(&ck->list, &c->btree_key_cache.clean);
                mutex_unlock(&c->btree_key_cache.lock);
        } else {
@@ -371,6 +379,11 @@ evict:
                six_lock_write(&ck->c.lock, NULL, NULL);
 
                mutex_lock(&c->btree_key_cache.lock);
+               if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
+                       clear_bit(BKEY_CACHED_DIRTY, &ck->flags);
+                       c->btree_key_cache.nr_dirty--;
+               }
+
                bkey_cached_evict(&c->btree_key_cache, ck);
                bkey_cached_free(&c->btree_key_cache, ck);
                mutex_unlock(&c->btree_key_cache.lock);
@@ -448,9 +461,10 @@ bool bch2_btree_insert_key_cached(struct btree_trans *trans,
 
        if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
                mutex_lock(&c->btree_key_cache.lock);
-               list_del_init(&ck->list);
+               list_move(&ck->list, &c->btree_key_cache.dirty);
 
                set_bit(BKEY_CACHED_DIRTY, &ck->flags);
+               c->btree_key_cache.nr_dirty++;
                mutex_unlock(&c->btree_key_cache.lock);
        }
 
@@ -467,20 +481,28 @@ void bch2_btree_key_cache_verify_clean(struct btree_trans *trans,
 }
 #endif
 
-void bch2_fs_btree_key_cache_exit(struct btree_key_cache *c)
+void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc)
 {
+       struct bch_fs *c = container_of(bc, struct bch_fs, btree_key_cache);
        struct bkey_cached *ck, *n;
 
-       mutex_lock(&c->lock);
-       list_for_each_entry_safe(ck, n, &c->clean, list) {
+       mutex_lock(&bc->lock);
+       list_splice(&bc->dirty, &bc->clean);
+
+       list_for_each_entry_safe(ck, n, &bc->clean, list) {
                kfree(ck->k);
                kfree(ck);
+               bc->nr_keys--;
        }
-       list_for_each_entry_safe(ck, n, &c->freed, list)
+
+       BUG_ON(bc->nr_dirty && !bch2_journal_error(&c->journal));
+       BUG_ON(bc->nr_keys);
+
+       list_for_each_entry_safe(ck, n, &bc->freed, list)
                kfree(ck);
-       mutex_unlock(&c->lock);
+       mutex_unlock(&bc->lock);
 
-       rhashtable_destroy(&c->table);
+       rhashtable_destroy(&bc->table);
 }
 
 void bch2_fs_btree_key_cache_init_early(struct btree_key_cache *c)
@@ -488,6 +510,7 @@ void bch2_fs_btree_key_cache_init_early(struct btree_key_cache *c)
        mutex_init(&c->lock);
        INIT_LIST_HEAD(&c->freed);
        INIT_LIST_HEAD(&c->clean);
+       INIT_LIST_HEAD(&c->dirty);
 }
 
 int bch2_fs_btree_key_cache_init(struct btree_key_cache *c)
index 55ea028d242e0ca56d93fb5e8f085696b8f362e5..de287f91ac28aebe01bdcf76ba52e8fe3149ae2d 100644 (file)
@@ -158,6 +158,7 @@ struct btree_cache {
        /* Number of elements in live + freeable lists */
        unsigned                used;
        unsigned                reserve;
+       atomic_t                dirty;
        struct shrinker         shrink;
 
        /*
@@ -294,6 +295,10 @@ struct btree_key_cache {
        struct rhashtable       table;
        struct list_head        freed;
        struct list_head        clean;
+       struct list_head        dirty;
+
+       size_t                  nr_keys;
+       size_t                  nr_dirty;
 };
 
 struct bkey_cached_key {
@@ -411,7 +416,6 @@ enum btree_flags {
 
 BTREE_FLAG(read_in_flight);
 BTREE_FLAG(read_error);
-BTREE_FLAG(dirty);
 BTREE_FLAG(need_write);
 BTREE_FLAG(noevict);
 BTREE_FLAG(write_idx);
index 78b8e2d00fd94e0d5146f1c19f23814eed2af5d0..c1f822b96c48aeb5db7d26db9057bd6e395cd8c0 100644 (file)
@@ -149,7 +149,7 @@ void bch2_btree_node_free_never_inserted(struct bch_fs *c, struct btree *b)
 
        b->ob.nr = 0;
 
-       clear_btree_node_dirty(b);
+       clear_btree_node_dirty(c, b);
 
        btree_node_lock_type(c, b, SIX_LOCK_write);
        __btree_node_free(c, b);
@@ -264,7 +264,7 @@ static struct btree *bch2_btree_node_alloc(struct btree_update *as, unsigned lev
        b = as->prealloc_nodes[--as->nr_prealloc_nodes];
 
        set_btree_node_accessed(b);
-       set_btree_node_dirty(b);
+       set_btree_node_dirty(c, b);
        set_btree_node_need_write(b);
 
        bch2_bset_init_first(b, &b->data->keys);
@@ -827,7 +827,7 @@ void bch2_btree_interior_update_will_free_node(struct btree_update *as,
                closure_wake_up(&c->btree_interior_update_wait);
        }
 
-       clear_btree_node_dirty(b);
+       clear_btree_node_dirty(c, b);
        clear_btree_node_need_write(b);
 
        /*
@@ -1034,7 +1034,7 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b
                bch2_btree_node_iter_advance(node_iter, b);
 
        bch2_btree_bset_insert_key(iter, b, node_iter, insert);
-       set_btree_node_dirty(b);
+       set_btree_node_dirty(as->c, b);
        set_btree_node_need_write(b);
 }
 
index 3122256cc6ca7ec932ec87cefaf534e142609fb8..4ab12a9db2f4ad752f9edbf481323e8b1abaa593 100644 (file)
@@ -191,7 +191,7 @@ static bool btree_insert_key_leaf(struct btree_trans *trans,
        bch2_btree_add_journal_pin(c, b, trans->journal_res.seq);
 
        if (unlikely(!btree_node_dirty(b)))
-               set_btree_node_dirty(b);
+               set_btree_node_dirty(c, b);
 
        live_u64s_added = (int) b->nr.live_u64s - old_live_u64s;
        u64s_added = (int) bset_u64s(t) - old_u64s;
index 3f57f498ce0b4a8e61cffc0715b521deebcd1482..da28761e7942ebcde677e9b094499209fc865c04 100644 (file)
@@ -547,6 +547,12 @@ void bch2_journal_reclaim(struct journal *j)
 
                if (j->prereserved.reserved * 2 > j->prereserved.remaining)
                        min_nr = 1;
+
+               if ((atomic_read(&c->btree_cache.dirty) * 4 >
+                    c->btree_cache.used  * 3) ||
+                   (c->btree_key_cache.nr_dirty * 4 >
+                    c->btree_key_cache.nr_keys))
+                       min_nr = 1;
        } while (journal_flush_pins(j, seq_to_flush, min_nr));
 
        if (!bch2_journal_error(j))