bcachefs: bch2_fpunch_snapshot()
authorKent Overstreet <kent.overstreet@linux.dev>
Wed, 2 Jul 2025 17:28:55 +0000 (13:28 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Fri, 4 Jul 2025 19:45:22 +0000 (15:45 -0400)
Add a new version of fpunch for operating on a snapshot ID, not a
subvolume - and use it for "extent past end of inode" repair.

Previously, repair would try to delete everything at once, but deleting
too many extents at once can overflow the btree_trans bump allocator, as
well as causing other problems - the new helper properly uses
bch2_extent_trim_atomic().

Reported-and-tested-by: Edoardo Codeglia <bcachefs@404.blue>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/fsck.c
fs/bcachefs/io_misc.c
fs/bcachefs/io_misc.h

index dbf161e4311aead7db7d876d7494626f3ee0a6dd..856eb2b4189683377d8e8a6cd891d4b7d100f954 100644 (file)
@@ -12,6 +12,7 @@
 #include "fs.h"
 #include "fsck.h"
 #include "inode.h"
+#include "io_misc.h"
 #include "keylist.h"
 #include "namei.h"
 #include "recovery_passes.h"
@@ -1919,33 +1920,11 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
                                        "extent type past end of inode %llu:%u, i_size %llu\n%s",
                                        i->inode.bi_inum, i->inode.bi_snapshot, i->inode.bi_size,
                                        (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
-                               struct bkey_i *whiteout = bch2_trans_kmalloc(trans, sizeof(*whiteout));
-                               ret = PTR_ERR_OR_ZERO(whiteout);
-                               if (ret)
-                                       goto err;
-
-                               bkey_init(&whiteout->k);
-                               whiteout->k.p = SPOS(k.k->p.inode,
-                                                    last_block,
-                                                    i->inode.bi_snapshot);
-                               bch2_key_resize(&whiteout->k,
-                                               min(KEY_SIZE_MAX & (~0 << c->block_bits),
-                                                   U64_MAX - whiteout->k.p.offset));
-
-
-                               /*
-                                * Need a normal (not BTREE_ITER_all_snapshots)
-                                * iterator, if we're deleting in a different
-                                * snapshot and need to emit a whiteout
-                                */
-                               struct btree_iter iter2;
-                               bch2_trans_iter_init(trans, &iter2, BTREE_ID_extents,
-                                                    bkey_start_pos(&whiteout->k),
-                                                    BTREE_ITER_intent);
-                               ret =   bch2_btree_iter_traverse(trans, &iter2) ?:
-                                       bch2_trans_update(trans, &iter2, whiteout,
-                                               BTREE_UPDATE_internal_snapshot_node);
-                               bch2_trans_iter_exit(trans, &iter2);
+                               ret = bch2_fpunch_snapshot(trans,
+                                                          SPOS(i->inode.bi_inum,
+                                                               last_block,
+                                                               i->inode.bi_snapshot),
+                                                          POS(i->inode.bi_inum, U64_MAX));
                                if (ret)
                                        goto err;
 
index bf72b1d2e2cb7e497bc932e49faff10b0b94d77a..07023667a475f6ec4579d18476194ee5b23b2960 100644 (file)
@@ -135,6 +135,33 @@ err_noprint:
        return ret;
 }
 
+/* For fsck */
+int bch2_fpunch_snapshot(struct btree_trans *trans, struct bpos start, struct bpos end)
+{
+       u32 restart_count = trans->restart_count;
+       struct bch_fs *c = trans->c;
+       struct disk_reservation disk_res = bch2_disk_reservation_init(c, 0);
+       unsigned max_sectors    = KEY_SIZE_MAX & (~0 << c->block_bits);
+       struct bkey_i delete;
+
+       int ret = for_each_btree_key_max_commit(trans, iter, BTREE_ID_extents,
+                       start, end, 0, k,
+                       &disk_res, NULL, BCH_TRANS_COMMIT_no_enospc, ({
+               bkey_init(&delete.k);
+               delete.k.p = iter.pos;
+
+               /* create the biggest key we can */
+               bch2_key_resize(&delete.k, max_sectors);
+               bch2_cut_back(end, &delete);
+
+               bch2_extent_trim_atomic(trans, &iter, &delete) ?:
+               bch2_trans_update(trans, &iter, &delete, 0);
+       }));
+
+       bch2_disk_reservation_put(c, &disk_res);
+       return ret ?: trans_was_restarted(trans, restart_count);
+}
+
 /*
  * Returns -BCH_ERR_transacton_restart if we had to drop locks:
  */
index 9cb44a7c43c1714678ef27d4ead33b29ac567849..b93e4d4b3c0c503d8a2bd37aa2fe64d30cad22c7 100644 (file)
@@ -5,6 +5,8 @@
 int bch2_extent_fallocate(struct btree_trans *, subvol_inum, struct btree_iter *,
                          u64, struct bch_io_opts, s64 *,
                          struct write_point_specifier);
+
+int bch2_fpunch_snapshot(struct btree_trans *, struct bpos, struct bpos);
 int bch2_fpunch_at(struct btree_trans *, struct btree_iter *,
                   subvol_inum, u64, s64 *);
 int bch2_fpunch(struct bch_fs *c, subvol_inum, u64, u64, s64 *);