bcachefs: Plumb printbuf through bch2_btree_lost_data()
authorKent Overstreet <kent.overstreet@linux.dev>
Thu, 24 Apr 2025 13:27:10 +0000 (09:27 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Thu, 22 May 2025 00:14:34 +0000 (20:14 -0400)
Part of the ongoing project to improve error messages by building them
up in printbufs and emitting them all at once, so that we can easily see
what events are related in the log.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/btree_io.c
fs/bcachefs/recovery.c
fs/bcachefs/recovery.h
fs/bcachefs/recovery_passes.c
fs/bcachefs/recovery_passes.h

index 84dae4c1ec13c90d3de70d31e0f67f393cbd96b4..41df1035ba2fea0777cb222b3bfe03814725e0d5 100644 (file)
@@ -1304,7 +1304,6 @@ fsck_err:
                retry_read = 1;
        } else {
                set_btree_node_read_error(b);
-               bch2_btree_lost_data(c, b->c.btree_id);
        }
        goto out;
 }
@@ -1372,15 +1371,16 @@ start:
 
                if (!can_retry) {
                        set_btree_node_read_error(b);
-                       bch2_btree_lost_data(c, b->c.btree_id);
                        break;
                }
        }
-
-       async_object_list_del(c, btree_read_bio, rb->list_idx);
-       bch2_time_stats_update(&c->times[BCH_TIME_btree_node_read],
-                              rb->start_time);
-       bio_put(&rb->bio);
+       if (btree_node_read_error(b)) {
+               struct printbuf buf = PRINTBUF;
+               bch2_btree_lost_data(c, &buf, b->c.btree_id);
+               if (buf.pos)
+                       bch_err(c, "%s", buf.buf);
+               printbuf_exit(&buf);
+       }
 
        if ((saw_error ||
             btree_node_need_rewrite(b)) &&
@@ -1398,6 +1398,10 @@ start:
                bch2_btree_node_rewrite_async(c, b);
        }
 
+       async_object_list_del(c, btree_read_bio, rb->list_idx);
+       bch2_time_stats_update(&c->times[BCH_TIME_btree_node_read],
+                              rb->start_time);
+       bio_put(&rb->bio);
        printbuf_exit(&buf);
        clear_btree_node_read_in_flight(b);
        smp_mb__after_atomic();
@@ -1587,7 +1591,12 @@ fsck_err:
 
        if (ret) {
                set_btree_node_read_error(b);
-               bch2_btree_lost_data(c, b->c.btree_id);
+
+               struct printbuf buf = PRINTBUF;
+               bch2_btree_lost_data(c, &buf, b->c.btree_id);
+               if (buf.pos)
+                       bch_err(c, "%s", buf.buf);
+               printbuf_exit(&buf);
        } else if (*saw_error)
                bch2_btree_node_rewrite_async(c, b);
 
@@ -1721,6 +1730,8 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b,
 
                prt_str(&buf, "btree node read error: no device to read from\n at ");
                bch2_btree_pos_to_text(&buf, c, b);
+               prt_newline(&buf);
+               bch2_btree_lost_data(c, &buf, b->c.btree_id);
                bch_err_ratelimited(c, "%s", buf.buf);
 
                if (c->opts.recovery_passes & BIT_ULL(BCH_RECOVERY_PASS_check_topology) &&
@@ -1728,7 +1739,6 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b,
                        bch2_fatal_error(c);
 
                set_btree_node_read_error(b);
-               bch2_btree_lost_data(c, b->c.btree_id);
                clear_btree_node_read_in_flight(b);
                smp_mb__after_atomic();
                wake_up_bit(&b->flags, BTREE_NODE_read_in_flight);
index b1afbe446d9e58123d0768df1ca7085a50ce2710..d13a6df289c7c3351495c85411bee8197867845d 100644 (file)
@@ -33,7 +33,9 @@
 #include <linux/sort.h>
 #include <linux/stat.h>
 
-int bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree)
+int bch2_btree_lost_data(struct bch_fs *c,
+                        struct printbuf *msg,
+                        enum btree_id btree)
 {
        u64 b = BIT_ULL(btree);
        int ret = 0;
@@ -42,32 +44,32 @@ int bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree)
        struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
 
        if (!(c->sb.btrees_lost_data & b)) {
-               struct printbuf buf = PRINTBUF;
-               bch2_btree_id_to_text(&buf, btree);
-               bch_err(c, "flagging btree %s lost data", buf.buf);
-               printbuf_exit(&buf);
+               prt_printf(msg, "flagging btree ");
+               bch2_btree_id_to_text(msg, btree);
+               prt_printf(msg, " lost data\n");
+
                ext->btrees_lost_data |= cpu_to_le64(b);
        }
 
        /* Once we have runtime self healing for topology errors we won't need this: */
-       ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_check_topology) ?: ret;
+       ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_topology) ?: ret;
 
        /* Btree node accounting will be off: */
        __set_bit_le64(BCH_FSCK_ERR_accounting_mismatch, ext->errors_silent);
-       ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_check_allocations) ?: ret;
+       ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_allocations) ?: ret;
 
 #ifdef CONFIG_BCACHEFS_DEBUG
        /*
         * These are much more minor, and don't need to be corrected right away,
         * but in debug mode we want the next fsck run to be clean:
         */
-       ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_check_lrus) ?: ret;
-       ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_check_backpointers_to_extents) ?: ret;
+       ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_lrus) ?: ret;
+       ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_backpointers_to_extents) ?: ret;
 #endif
 
        switch (btree) {
        case BTREE_ID_alloc:
-               ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret;
+               ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_alloc_info) ?: ret;
 
                __set_bit_le64(BCH_FSCK_ERR_alloc_key_data_type_wrong, ext->errors_silent);
                __set_bit_le64(BCH_FSCK_ERR_alloc_key_gen_wrong, ext->errors_silent);
@@ -77,30 +79,30 @@ int bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree)
                __set_bit_le64(BCH_FSCK_ERR_alloc_key_stripe_redundancy_wrong, ext->errors_silent);
                goto out;
        case BTREE_ID_backpointers:
-               ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_check_btree_backpointers) ?: ret;
-               ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_check_extents_to_backpointers) ?: ret;
+               ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_btree_backpointers) ?: ret;
+               ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_extents_to_backpointers) ?: ret;
                goto out;
        case BTREE_ID_need_discard:
-               ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret;
+               ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_alloc_info) ?: ret;
                goto out;
        case BTREE_ID_freespace:
-               ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret;
+               ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_alloc_info) ?: ret;
                goto out;
        case BTREE_ID_bucket_gens:
-               ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret;
+               ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_alloc_info) ?: ret;
                goto out;
        case BTREE_ID_lru:
-               ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret;
+               ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_alloc_info) ?: ret;
                goto out;
        case BTREE_ID_accounting:
-               ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_check_allocations) ?: ret;
+               ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_allocations) ?: ret;
                goto out;
        case BTREE_ID_snapshots:
-               ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_reconstruct_snapshots) ?: ret;
-               ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_scan_for_btree_nodes) ?: ret;
+               ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_reconstruct_snapshots) ?: ret;
+               ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_scan_for_btree_nodes) ?: ret;
                goto out;
        default:
-               ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_scan_for_btree_nodes) ?: ret;
+               ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_scan_for_btree_nodes) ?: ret;
                goto out;
        }
 out:
index d858ba674eaa457fcc7856c40363d1a5285ab176..c023f52fc2d6dc4496f38385409109f10d9e135e 100644 (file)
@@ -2,7 +2,7 @@
 #ifndef _BCACHEFS_RECOVERY_H
 #define _BCACHEFS_RECOVERY_H
 
-int bch2_btree_lost_data(struct bch_fs *, enum btree_id);
+int bch2_btree_lost_data(struct bch_fs *, struct printbuf *, enum btree_id);
 void bch2_reconstruct_alloc(struct bch_fs *);
 
 int bch2_journal_replay(struct bch_fs *);
index 9be715a49454fa1d22b6d62cb97ad5d1113a3f9b..347e17fe7901a5f5c6e786dbb50ee2a92e047781 100644 (file)
@@ -141,13 +141,13 @@ static int __bch2_run_explicit_recovery_pass(struct printbuf *out,
        if (pass < BCH_RECOVERY_PASS_set_may_go_rw &&
            c->curr_recovery_pass >= BCH_RECOVERY_PASS_set_may_go_rw) {
                if (print)
-                       prt_printf(out, "need recovery pass %s (%u), but already rw",
+                       prt_printf(out, "need recovery pass %s (%u), but already rw\n",
                                   bch2_recovery_passes[pass], pass);
                return -BCH_ERR_cannot_rewind_recovery;
        }
 
        if (print)
-               prt_printf(out, "running explicit recovery pass %s (%u), currently at %s (%u)",
+               prt_printf(out, "running explicit recovery pass %s (%u), currently at %s (%u)\n",
                           bch2_recovery_passes[pass], pass,
                           bch2_recovery_passes[c->curr_recovery_pass], c->curr_recovery_pass);
 
@@ -194,14 +194,15 @@ int bch2_run_explicit_recovery_pass(struct bch_fs *c,
 }
 
 int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *c,
-                                              enum bch_recovery_pass pass)
+                                               struct printbuf *out,
+                                               enum bch_recovery_pass pass)
 {
        lockdep_assert_held(&c->sb_lock);
 
        struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
        __set_bit_le64(bch2_recovery_pass_to_stable(pass), ext->recovery_passes_required);
 
-       return bch2_run_explicit_recovery_pass(c, pass);
+       return bch2_run_explicit_recovery_pass_printbuf(c, out, pass);
 }
 
 static void bch2_clear_recovery_pass_required(struct bch_fs *c,
index 62957e268a661e3772a4fb4ddb03e218961e87f6..1f91be4258c56624b282e2bb5df59aefc6638f1b 100644 (file)
@@ -12,7 +12,9 @@ int bch2_run_explicit_recovery_pass_printbuf(struct bch_fs *,
                                    struct printbuf *,
                                    enum bch_recovery_pass);
 int bch2_run_explicit_recovery_pass(struct bch_fs *, enum bch_recovery_pass);
-int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *, enum bch_recovery_pass);
+
+int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *, struct printbuf *,
+                                              enum bch_recovery_pass);
 
 int bch2_run_online_recovery_passes(struct bch_fs *);
 int bch2_run_recovery_passes(struct bch_fs *);