bcachefs: Fix reading of alloc info after unclean shutdown
authorKent Overstreet <kent.overstreet@gmail.com>
Sun, 24 May 2020 18:06:10 +0000 (14:06 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:08:40 +0000 (17:08 -0400)
When updates to interior nodes started being journalled, that meant that
after an unclean shutdown, until journal replay is done we can't walk
the btree without overlaying the updates from the journal.

The initial btree gc was changed to walk the btree overlaying keys from
the journal - but bch2_alloc_read() and bch2_stripes_read() were missed.
Major whoops...

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/alloc_background.c
fs/bcachefs/ec.c
fs/bcachefs/ec.h
fs/bcachefs/recovery.c
fs/bcachefs/recovery.h
fs/bcachefs/super.c

index 559b9be50952423c4f4b4c2a6e2d0953ab400dc3..a08ae42cc073ca4ad0fb272b1a72fd37451e266a 100644 (file)
@@ -208,29 +208,25 @@ void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c,
                               get_alloc_field(a.v, &d, i));
 }
 
-int bch2_alloc_read(struct bch_fs *c, struct journal_keys *journal_keys)
+static int bch2_alloc_read_fn(struct bch_fs *c, enum btree_id id,
+                             unsigned level, struct bkey_s_c k)
 {
-       struct btree_trans trans;
-       struct btree_and_journal_iter iter;
-       struct bkey_s_c k;
-       struct bch_dev *ca;
-       unsigned i;
-       int ret = 0;
-
-       bch2_trans_init(&trans, c, 0, 0);
-
-       bch2_btree_and_journal_iter_init(&iter, &trans, journal_keys,
-                                        BTREE_ID_ALLOC, POS_MIN);
-
-       while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
+       if (!level)
                bch2_mark_key(c, k, 0, 0, NULL, 0,
                              BTREE_TRIGGER_ALLOC_READ|
                              BTREE_TRIGGER_NOATOMIC);
 
-               bch2_btree_and_journal_iter_advance(&iter);
-       }
+       return 0;
+}
+
+int bch2_alloc_read(struct bch_fs *c, struct journal_keys *journal_keys)
+{
+       struct bch_dev *ca;
+       unsigned i;
+       int ret = 0;
 
-       ret = bch2_trans_exit(&trans) ?: ret;
+       ret = bch2_btree_and_journal_walk(c, journal_keys, BTREE_ID_ALLOC,
+                                         NULL, bch2_alloc_read_fn);
        if (ret) {
                bch_err(c, "error reading alloc info: %i", ret);
                return ret;
index 909a4a5036ab24ab477a6be8935417eb55c5820b..074b811e90436d6fc687bb05aabd732126548811 100644 (file)
@@ -1273,38 +1273,28 @@ int bch2_stripes_write(struct bch_fs *c, unsigned flags, bool *wrote)
        return ret;
 }
 
-int bch2_stripes_read(struct bch_fs *c, struct journal_keys *journal_keys)
+static int bch2_stripes_read_fn(struct bch_fs *c, enum btree_id id,
+                             unsigned level, struct bkey_s_c k)
 {
-       struct btree_trans trans;
-       struct btree_and_journal_iter iter;
-       struct bkey_s_c k;
-       int ret;
-
-       ret = bch2_fs_ec_start(c);
-       if (ret)
-               return ret;
-
-       bch2_trans_init(&trans, c, 0, 0);
-
-       bch2_btree_and_journal_iter_init(&iter, &trans, journal_keys,
-                                        BTREE_ID_EC, POS_MIN);
-
+       int ret = 0;
 
-       while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
-               bch2_mark_key(c, k, 0, 0, NULL, 0,
-                             BTREE_TRIGGER_ALLOC_READ|
-                             BTREE_TRIGGER_NOATOMIC);
+       if (k.k->type == KEY_TYPE_stripe)
+               ret = __ec_stripe_mem_alloc(c, k.k->p.offset, GFP_KERNEL) ?:
+                       bch2_mark_key(c, k, 0, 0, NULL, 0,
+                                     BTREE_TRIGGER_ALLOC_READ|
+                                     BTREE_TRIGGER_NOATOMIC);
 
-               bch2_btree_and_journal_iter_advance(&iter);
-       }
+       return ret;
+}
 
-       ret = bch2_trans_exit(&trans) ?: ret;
-       if (ret) {
+int bch2_stripes_read(struct bch_fs *c, struct journal_keys *journal_keys)
+{
+       int ret = bch2_btree_and_journal_walk(c, journal_keys, BTREE_ID_EC,
+                                         NULL, bch2_stripes_read_fn);
+       if (ret)
                bch_err(c, "error reading stripes: %i", ret);
-               return ret;
-       }
 
-       return 0;
+       return ret;
 }
 
 int bch2_ec_mem_alloc(struct bch_fs *c, bool gc)
@@ -1343,11 +1333,6 @@ int bch2_ec_mem_alloc(struct bch_fs *c, bool gc)
        return 0;
 }
 
-int bch2_fs_ec_start(struct bch_fs *c)
-{
-       return bch2_ec_mem_alloc(c, false);
-}
-
 void bch2_fs_ec_exit(struct bch_fs *c)
 {
        struct ec_stripe_head *h;
index cf67abd4849007281415f65ca5c5eb20d3319379..4dfaac0348869eb82640650c03d6709307d9fe1e 100644 (file)
@@ -157,8 +157,6 @@ int bch2_stripes_write(struct bch_fs *, unsigned, bool *);
 
 int bch2_ec_mem_alloc(struct bch_fs *, bool);
 
-int bch2_fs_ec_start(struct bch_fs *);
-
 void bch2_fs_ec_exit(struct bch_fs *);
 int bch2_fs_ec_init(struct bch_fs *);
 
index 8e9d412a6000ce0d1b20202d40a7470d36a4d6f9..95265f1c2b217bb3104b702328ee6001479c085d 100644 (file)
@@ -191,6 +191,78 @@ void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *i
                               b->c.btree_id, b->c.level, b->data->min_key);
 }
 
+/* Walk btree, overlaying keys from the journal: */
+
+static int bch2_btree_and_journal_walk_recurse(struct bch_fs *c, struct btree *b,
+                               struct journal_keys *journal_keys,
+                               enum btree_id btree_id,
+                               btree_walk_node_fn node_fn,
+                               btree_walk_key_fn key_fn)
+{
+       struct btree_and_journal_iter iter;
+       struct bkey_s_c k;
+       int ret = 0;
+
+       bch2_btree_and_journal_iter_init_node_iter(&iter, journal_keys, b);
+
+       while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
+               ret = key_fn(c, btree_id, b->c.level, k);
+               if (ret)
+                       break;
+
+               if (b->c.level) {
+                       struct btree *child;
+                       BKEY_PADDED(k) tmp;
+
+                       bkey_reassemble(&tmp.k, k);
+                       k = bkey_i_to_s_c(&tmp.k);
+
+                       bch2_btree_and_journal_iter_advance(&iter);
+
+                       if (b->c.level > 0) {
+                               child = bch2_btree_node_get_noiter(c, &tmp.k,
+                                                       b->c.btree_id, b->c.level - 1);
+                               ret = PTR_ERR_OR_ZERO(child);
+                               if (ret)
+                                       break;
+
+                               ret   = (node_fn ? node_fn(c, b) : 0) ?:
+                                       bch2_btree_and_journal_walk_recurse(c, child,
+                                               journal_keys, btree_id, node_fn, key_fn);
+                               six_unlock_read(&child->c.lock);
+
+                               if (ret)
+                                       break;
+                       }
+               } else {
+                       bch2_btree_and_journal_iter_advance(&iter);
+               }
+       }
+
+       return ret;
+}
+
+int bch2_btree_and_journal_walk(struct bch_fs *c, struct journal_keys *journal_keys,
+                               enum btree_id btree_id,
+                               btree_walk_node_fn node_fn,
+                               btree_walk_key_fn key_fn)
+{
+       struct btree *b = c->btree_roots[btree_id].b;
+       int ret = 0;
+
+       if (btree_node_fake(b))
+               return 0;
+
+       six_lock_read(&b->c.lock, NULL, NULL);
+       ret   = (node_fn ? node_fn(c, b) : 0) ?:
+               bch2_btree_and_journal_walk_recurse(c, b, journal_keys, btree_id,
+                                                   node_fn, key_fn) ?:
+               key_fn(c, btree_id, b->c.level + 1, bkey_i_to_s_c(&b->key));
+       six_unlock_read(&b->c.lock);
+
+       return ret;
+}
+
 /* sort and dedup all keys in the journal: */
 
 void bch2_journal_entries_free(struct list_head *list)
index 19f2f172a26b327709279b4cdcaac5bcbfbf58c0..a66827c9addf71a4b3eaeb08a0151e0d0c5cd9c2 100644 (file)
@@ -44,6 +44,13 @@ void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *,
                                                struct journal_keys *,
                                                struct btree *);
 
+typedef int (*btree_walk_node_fn)(struct bch_fs *c, struct btree *b);
+typedef int (*btree_walk_key_fn)(struct bch_fs *c, enum btree_id id,
+                                unsigned level, struct bkey_s_c k);
+
+int bch2_btree_and_journal_walk(struct bch_fs *, struct journal_keys *, enum btree_id,
+                               btree_walk_node_fn, btree_walk_key_fn);
+
 void bch2_journal_keys_free(struct journal_keys *);
 void bch2_journal_entries_free(struct list_head *);
 
index 6b5ab579a25c10c962821d7cbe557db98d3e150d..165163f3896e5cd9a35ead05681f69a026f0fb3f 100644 (file)
@@ -199,6 +199,8 @@ static void __bch2_fs_read_only(struct bch_fs *c)
        if (!test_bit(BCH_FS_ALLOCATOR_RUNNING, &c->flags))
                goto nowrote_alloc;
 
+       bch_verbose(c, "writing alloc info");
+
        do {
                wrote = false;
 
@@ -229,6 +231,7 @@ static void __bch2_fs_read_only(struct bch_fs *c)
                clean_passes = wrote ? 0 : clean_passes + 1;
        } while (clean_passes < 2);
 
+       bch_verbose(c, "writing alloc info complete");
        set_bit(BCH_FS_ALLOC_CLEAN, &c->flags);
 nowrote_alloc:
        for_each_member_device(ca, c, i)
@@ -313,8 +316,10 @@ void bch2_fs_read_only(struct bch_fs *c)
            !test_bit(BCH_FS_EMERGENCY_RO, &c->flags) &&
            test_bit(BCH_FS_STARTED, &c->flags) &&
            test_bit(BCH_FS_ALLOC_CLEAN, &c->flags) &&
-           !c->opts.norecovery)
+           !c->opts.norecovery) {
+               bch_verbose(c, "marking filesystem clean");
                bch2_fs_mark_clean(c);
+       }
 
        clear_bit(BCH_FS_RW, &c->flags);
 }