bcachefs: Rewrite btree nodes with errors
authorKent Overstreet <kent.overstreet@gmail.com>
Sat, 24 Apr 2021 06:47:41 +0000 (02:47 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:09:02 +0000 (17:09 -0400)
This patch adds self healing functionality for btree nodes - if we
notice a problem when reading a btree node, we just rewrite it.

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/btree_io.c
fs/bcachefs/btree_update.h
fs/bcachefs/btree_update_interior.c

index 2f5b7c629a9ceb01fb1f9c238b9abf1081abd390..cea151a5d4f807037f9e71ca94367e1b64b03ca3 100644 (file)
@@ -986,6 +986,7 @@ static void btree_node_read_work(struct work_struct *work)
        struct bch_io_failures failed = { .nr = 0 };
        char buf[200];
        struct printbuf out;
+       bool saw_error = false;
        bool can_retry;
 
        goto start;
@@ -1022,6 +1023,8 @@ start:
                    !bch2_btree_node_read_done(c, ca, b, can_retry))
                        break;
 
+               saw_error = true;
+
                if (!can_retry) {
                        set_btree_node_read_error(b);
                        break;
@@ -1031,6 +1034,10 @@ start:
        bch2_time_stats_update(&c->times[BCH_TIME_btree_node_read],
                               rb->start_time);
        bio_put(&rb->bio);
+
+       if (saw_error && !btree_node_read_error(b))
+               bch2_btree_node_rewrite_async(c, b);
+
        clear_btree_node_read_in_flight(b);
        wake_up_bit(&b->flags, BTREE_NODE_read_in_flight);
 }
index 0c7caa7e91a0ac7836a638e599a82b628580aef2..56131ac516ce4b74e5f357f1f923b4ab7cdad7fd 100644 (file)
@@ -72,6 +72,7 @@ int bch2_btree_delete_range(struct bch_fs *, enum btree_id,
 
 int bch2_btree_node_rewrite(struct bch_fs *c, struct btree_iter *,
                            __le64, unsigned);
+void bch2_btree_node_rewrite_async(struct bch_fs *, struct btree *);
 int bch2_btree_node_update_key(struct bch_fs *, struct btree_iter *,
                               struct btree *, struct bkey_i *);
 
index 986b396ba177c06e16ab442448c343ec78d460b1..3ca4114c74ad953dd7caebfc4382f6ff2ecfb211 100644 (file)
@@ -1797,6 +1797,56 @@ out:
        return ret;
 }
 
+struct async_btree_rewrite {
+       struct bch_fs           *c;
+       struct work_struct      work;
+       enum btree_id           btree_id;
+       unsigned                level;
+       struct bpos             pos;
+       __le64                  seq;
+};
+
+void async_btree_node_rewrite_work(struct work_struct *work)
+{
+       struct async_btree_rewrite *a =
+               container_of(work, struct async_btree_rewrite, work);
+       struct bch_fs *c = a->c;
+       struct btree_trans trans;
+       struct btree_iter *iter;
+
+       bch2_trans_init(&trans, c, 0, 0);
+       iter = bch2_trans_get_node_iter(&trans, a->btree_id, a->pos,
+                                       BTREE_MAX_DEPTH, a->level, 0);
+       bch2_btree_node_rewrite(c, iter, a->seq, 0);
+       bch2_trans_iter_put(&trans, iter);
+       bch2_trans_exit(&trans);
+       percpu_ref_put(&c->writes);
+       kfree(a);
+}
+
+void bch2_btree_node_rewrite_async(struct bch_fs *c, struct btree *b)
+{
+       struct async_btree_rewrite *a;
+
+       if (!percpu_ref_tryget(&c->writes))
+               return;
+
+       a = kmalloc(sizeof(*a), GFP_NOFS);
+       if (!a) {
+               percpu_ref_put(&c->writes);
+               return;
+       }
+
+       a->c            = c;
+       a->btree_id     = b->c.btree_id;
+       a->level        = b->c.level;
+       a->pos          = b->key.k.p;
+       a->seq          = b->data->keys.seq;
+
+       INIT_WORK(&a->work, async_btree_node_rewrite_work);
+       queue_work(system_long_wq, &a->work);
+}
+
 static void __bch2_btree_node_update_key(struct bch_fs *c,
                                         struct btree_update *as,
                                         struct btree_iter *iter,