bcachefs: btree read retry fixes
authorKent Overstreet <kent.overstreet@linux.dev>
Thu, 3 Jul 2025 23:19:21 +0000 (19:19 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Sat, 5 Jul 2025 16:42:41 +0000 (12:42 -0400)
Fix btree node read retries after validate errors:

__btree_err() is the wrong place to flag a topology error: that is done
by btree_lost_data().

Additionally, some calls to bch2_bkey_pick_read_device() were not
updated in the 6.16 rework for improved log messages; we were failing to
signal that we still had a retry.

Cc: Nikita Ofitserov <himikof@gmail.com>
Cc: Alan Huang <mmpgouride@gmail.com>
Reported-and-tested-by: Edoardo Codeglia <bcachefs@404.blue>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/btree_io.c
fs/bcachefs/errcode.h
fs/bcachefs/error.c

index e874a4357f64c4a20fee88f2d810de1139050cf5..a4cc72986e3642a20cad3fc3edd00f05beda899e 100644 (file)
@@ -568,9 +568,9 @@ static int __btree_err(int ret,
                bch2_mark_btree_validate_failure(failed, ca->dev_idx);
 
                struct extent_ptr_decoded pick;
-               have_retry = !bch2_bkey_pick_read_device(c,
+               have_retry = bch2_bkey_pick_read_device(c,
                                        bkey_i_to_s_c(&b->key),
-                                       failed, &pick, -1);
+                                       failed, &pick, -1) == 1;
        }
 
        if (!have_retry && ret == -BCH_ERR_btree_node_read_err_want_retry)
@@ -615,7 +615,6 @@ static int __btree_err(int ret,
                        goto out;
                case -BCH_ERR_btree_node_read_err_bad_node:
                        prt_str(&out, ", ");
-                       ret = __bch2_topology_error(c, &out);
                        break;
                }
 
@@ -644,7 +643,6 @@ static int __btree_err(int ret,
                goto out;
        case -BCH_ERR_btree_node_read_err_bad_node:
                prt_str(&out, ", ");
-               ret = __bch2_topology_error(c, &out);
                break;
        }
 print:
@@ -1408,7 +1406,7 @@ static void btree_node_read_work(struct work_struct *work)
                ret = bch2_bkey_pick_read_device(c,
                                        bkey_i_to_s_c(&b->key),
                                        &failed, &rb->pick, -1);
-               if (ret) {
+               if (ret <= 0) {
                        set_btree_node_read_error(b);
                        break;
                }
index 86a842f1e88e69f67ef2a46b66431d0793ebfbf6..acc3b7b677041ded8daf3b9edb2b337e7de67433 100644 (file)
        x(EIO,                          sb_not_downgraded)                      \
        x(EIO,                          btree_node_write_all_failed)            \
        x(EIO,                          btree_node_read_error)                  \
-       x(EIO,                          btree_node_read_validate_error)         \
        x(EIO,                          btree_need_topology_repair)             \
        x(EIO,                          bucket_ref_update)                      \
        x(EIO,                          trigger_alloc)                          \
index b2a6c041e165dfdd84cfb9f8e4ae5b6987ea6990..ea37f5af1800c8848fd2c4cafb49e998e64f1de4 100644 (file)
@@ -103,7 +103,7 @@ int __bch2_topology_error(struct bch_fs *c, struct printbuf *out)
                return bch_err_throw(c, btree_need_topology_repair);
        } else {
                return bch2_run_explicit_recovery_pass(c, out, BCH_RECOVERY_PASS_check_topology, 0) ?:
-                       bch_err_throw(c, btree_node_read_validate_error);
+                       bch_err_throw(c, btree_need_topology_repair);
        }
 }