btrfs: introduce mount option rescue=ignorebadroots
authorJosef Bacik <josef@toxicpanda.com>
Fri, 16 Oct 2020 15:29:18 +0000 (11:29 -0400)
committerDavid Sterba <dsterba@suse.com>
Tue, 8 Dec 2020 14:53:41 +0000 (15:53 +0100)
In the face of extent root corruption, or any other core fs wide root
corruption we will fail to mount the file system.  This makes recovery
kind of a pain, because you need to fall back to userspace tools to
scrape off data.  Instead provide a mechanism to gracefully handle bad
roots, so we can at least mount read-only and possibly recover data from
the file system.

Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
fs/btrfs/block-group.c
fs/btrfs/block-rsv.c
fs/btrfs/compression.c
fs/btrfs/ctree.h
fs/btrfs/disk-io.c
fs/btrfs/file-item.c
fs/btrfs/inode.c
fs/btrfs/super.c
fs/btrfs/sysfs.c
fs/btrfs/volumes.c

index 3ba6f3839d3929e5ab6d7d60ce76ccd8b44a5387..bb6685711824cd95be65a4f5a2ff2fc3f25e4a49 100644 (file)
@@ -1985,6 +1985,51 @@ error:
        return ret;
 }
 
+static int fill_dummy_bgs(struct btrfs_fs_info *fs_info)
+{
+       struct extent_map_tree *em_tree = &fs_info->mapping_tree;
+       struct btrfs_space_info *space_info;
+       struct rb_node *node;
+       int ret = 0;
+
+       for (node = rb_first_cached(&em_tree->map); node; node = rb_next(node)) {
+               struct extent_map *em;
+               struct map_lookup *map;
+               struct btrfs_block_group *bg;
+
+               em = rb_entry(node, struct extent_map, rb_node);
+               map = em->map_lookup;
+               bg = btrfs_create_block_group_cache(fs_info, em->start);
+               if (!bg) {
+                       ret = -ENOMEM;
+                       break;
+               }
+
+               /* Fill dummy cache as FULL */
+               bg->length = em->len;
+               bg->flags = map->type;
+               bg->last_byte_to_unpin = (u64)-1;
+               bg->cached = BTRFS_CACHE_FINISHED;
+               bg->used = em->len;
+               bg->flags = map->type;
+               ret = btrfs_add_block_group_cache(fs_info, bg);
+               if (ret) {
+                       btrfs_remove_free_space_cache(bg);
+                       btrfs_put_block_group(bg);
+                       break;
+               }
+               btrfs_update_space_info(fs_info, bg->flags, em->len, em->len,
+                                       0, &space_info);
+               bg->space_info = space_info;
+               link_block_group(bg);
+
+               set_avail_alloc_bits(fs_info, bg->flags);
+       }
+       if (!ret)
+               btrfs_init_global_block_rsv(fs_info);
+       return ret;
+}
+
 int btrfs_read_block_groups(struct btrfs_fs_info *info)
 {
        struct btrfs_path *path;
@@ -1995,6 +2040,9 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
        int need_clear = 0;
        u64 cache_gen;
 
+       if (!info->extent_root)
+               return fill_dummy_bgs(info);
+
        key.objectid = 0;
        key.offset = 0;
        key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
index bc920afe23bf09681cacf51b4db0f10648c3f974..04a6226e0388860024a1bf2f751e727bf68b6616 100644 (file)
@@ -426,6 +426,14 @@ void btrfs_init_global_block_rsv(struct btrfs_fs_info *fs_info)
        fs_info->delayed_block_rsv.space_info = space_info;
        fs_info->delayed_refs_rsv.space_info = space_info;
 
+       /*
+        * Our various recovery options can leave us with NULL roots, so check
+        * here and just bail before we go dereferencing NULLs everywhere.
+        */
+       if (!fs_info->extent_root || !fs_info->csum_root ||
+           !fs_info->dev_root || !fs_info->chunk_root || !fs_info->tree_root)
+               return;
+
        fs_info->extent_root->block_rsv = &fs_info->delayed_refs_rsv;
        fs_info->csum_root->block_rsv = &fs_info->delayed_refs_rsv;
        fs_info->dev_root->block_rsv = &fs_info->global_block_rsv;
index 7e1eb57b923c4c5cc5b70f398f3148b9aa9c1efc..972fb68a85ace457436b5adc2189d03ef6b2f0cb 100644 (file)
@@ -150,7 +150,7 @@ static int check_compressed_csum(struct btrfs_inode *inode, struct bio *bio,
        struct compressed_bio *cb = bio->bi_private;
        u8 *cb_sum = cb->sums;
 
-       if (inode->flags & BTRFS_INODE_NODATASUM)
+       if (!fs_info->csum_root || (inode->flags & BTRFS_INODE_NODATASUM))
                return 0;
 
        shash->tfm = fs_info->csum_shash;
index 0738ec94d8064984ef6c4d2244cd2cb54515287d..683dcb58eaa923141a716fb0e9a37b95bba7f7ae 100644 (file)
@@ -1298,6 +1298,7 @@ static inline u32 BTRFS_MAX_XATTR_SIZE(const struct btrfs_fs_info *info)
 #define BTRFS_MOUNT_NOLOGREPLAY                (1 << 27)
 #define BTRFS_MOUNT_REF_VERIFY         (1 << 28)
 #define BTRFS_MOUNT_DISCARD_ASYNC      (1 << 29)
+#define BTRFS_MOUNT_IGNOREBADROOTS     (1 << 30)
 
 #define BTRFS_DEFAULT_COMMIT_INTERVAL  (30)
 #define BTRFS_DEFAULT_MAX_INLINE       (2048)
index 137c4d5eaa8dbe8f43dc5e92e47180d4f396e511..d229f6c25f2938ad2fe2dfb06b43bfed0328380d 100644 (file)
@@ -2307,30 +2307,39 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info)
 
        root = btrfs_read_tree_root(tree_root, &location);
        if (IS_ERR(root)) {
-               ret = PTR_ERR(root);
-               goto out;
+               if (!btrfs_test_opt(fs_info, IGNOREBADROOTS)) {
+                       ret = PTR_ERR(root);
+                       goto out;
+               }
+       } else {
+               set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
+               fs_info->extent_root = root;
        }
-       set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
-       fs_info->extent_root = root;
 
        location.objectid = BTRFS_DEV_TREE_OBJECTID;
        root = btrfs_read_tree_root(tree_root, &location);
        if (IS_ERR(root)) {
-               ret = PTR_ERR(root);
-               goto out;
+               if (!btrfs_test_opt(fs_info, IGNOREBADROOTS)) {
+                       ret = PTR_ERR(root);
+                       goto out;
+               }
+       } else {
+               set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
+               fs_info->dev_root = root;
+               btrfs_init_devices_late(fs_info);
        }
-       set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
-       fs_info->dev_root = root;
-       btrfs_init_devices_late(fs_info);
 
        location.objectid = BTRFS_CSUM_TREE_OBJECTID;
        root = btrfs_read_tree_root(tree_root, &location);
        if (IS_ERR(root)) {
-               ret = PTR_ERR(root);
-               goto out;
+               if (!btrfs_test_opt(fs_info, IGNOREBADROOTS)) {
+                       ret = PTR_ERR(root);
+                       goto out;
+               }
+       } else {
+               set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
+               fs_info->csum_root = root;
        }
-       set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
-       fs_info->csum_root = root;
 
        /*
         * This tree can share blocks with some other fs tree during relocation
@@ -2339,11 +2348,14 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info)
        root = btrfs_get_fs_root(tree_root->fs_info,
                                 BTRFS_DATA_RELOC_TREE_OBJECTID, true);
        if (IS_ERR(root)) {
-               ret = PTR_ERR(root);
-               goto out;
+               if (!btrfs_test_opt(fs_info, IGNOREBADROOTS)) {
+                       ret = PTR_ERR(root);
+                       goto out;
+               }
+       } else {
+               set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
+               fs_info->data_reloc_root = root;
        }
-       set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
-       fs_info->data_reloc_root = root;
 
        location.objectid = BTRFS_QUOTA_TREE_OBJECTID;
        root = btrfs_read_tree_root(tree_root, &location);
@@ -2356,9 +2368,11 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info)
        location.objectid = BTRFS_UUID_TREE_OBJECTID;
        root = btrfs_read_tree_root(tree_root, &location);
        if (IS_ERR(root)) {
-               ret = PTR_ERR(root);
-               if (ret != -ENOENT)
-                       goto out;
+               if (!btrfs_test_opt(fs_info, IGNOREBADROOTS)) {
+                       ret = PTR_ERR(root);
+                       if (ret != -ENOENT)
+                               goto out;
+               }
        } else {
                set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
                fs_info->uuid_root = root;
@@ -2368,11 +2382,14 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info)
                location.objectid = BTRFS_FREE_SPACE_TREE_OBJECTID;
                root = btrfs_read_tree_root(tree_root, &location);
                if (IS_ERR(root)) {
-                       ret = PTR_ERR(root);
-                       goto out;
+                       if (!btrfs_test_opt(fs_info, IGNOREBADROOTS)) {
+                               ret = PTR_ERR(root);
+                               goto out;
+                       }
+               }  else {
+                       set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
+                       fs_info->free_space_root = root;
                }
-               set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
-               fs_info->free_space_root = root;
        }
 
        return 0;
index 8083d71d6af62a840ee134cb9f2282073bd34506..816f57d52fc9072691a6700d053cf93124e09377 100644 (file)
@@ -272,7 +272,7 @@ blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
        int count = 0;
        u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
 
-       if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
+       if (!fs_info->csum_root || (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM))
                return BLK_STS_OK;
 
        path = btrfs_alloc_path();
index c8cda5df9fb0d31b9c718a60e374b4a7d9a6e183..21a354dad6f21a669da4a9ba666c4d5d0b09d011 100644 (file)
@@ -2187,7 +2187,8 @@ blk_status_t btrfs_submit_data_bio(struct inode *inode, struct bio *bio,
        int skip_sum;
        int async = !atomic_read(&BTRFS_I(inode)->sync_writers);
 
-       skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
+       skip_sum = (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) ||
+                  !fs_info->csum_root;
 
        if (btrfs_is_free_space_inode(BTRFS_I(inode)))
                metadata = BTRFS_WQ_ENDIO_FREE_SPACE;
@@ -2902,6 +2903,9 @@ int btrfs_verify_data_csum(struct btrfs_io_bio *io_bio, u64 phy_offset,
        if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
                return 0;
 
+       if (!root->fs_info->csum_root)
+               return 0;
+
        if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID &&
            test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1, NULL)) {
                clear_extent_bits(io_tree, start, end, EXTENT_NODATASUM);
index b9d5d610682fe03a8dc430ba2624e04ea6d94b73..9bc46c0489789e512549e07a571089bc46f57bbf 100644 (file)
@@ -360,6 +360,7 @@ enum {
        Opt_rescue,
        Opt_usebackuproot,
        Opt_nologreplay,
+       Opt_ignorebadroots,
 
        /* Deprecated options */
        Opt_recovery,
@@ -455,6 +456,8 @@ static const match_table_t tokens = {
 static const match_table_t rescue_tokens = {
        {Opt_usebackuproot, "usebackuproot"},
        {Opt_nologreplay, "nologreplay"},
+       {Opt_ignorebadroots, "ignorebadroots"},
+       {Opt_ignorebadroots, "ibadroots"},
        {Opt_err, NULL},
 };
 
@@ -498,6 +501,10 @@ static int parse_rescue_options(struct btrfs_fs_info *info, const char *options)
                        btrfs_set_and_info(info, NOLOGREPLAY,
                                           "disabling log replay at mount time");
                        break;
+               case Opt_ignorebadroots:
+                       btrfs_set_and_info(info, IGNOREBADROOTS,
+                                          "ignoring bad roots");
+                       break;
                case Opt_err:
                        btrfs_info(info, "unrecognized rescue option '%s'", p);
                        ret = -EINVAL;
@@ -983,7 +990,8 @@ check:
        if (new_flags & SB_RDONLY)
                goto out;
 
-       if (check_ro_option(info, BTRFS_MOUNT_NOLOGREPLAY, "nologreplay"))
+       if (check_ro_option(info, BTRFS_MOUNT_NOLOGREPLAY, "nologreplay") ||
+           check_ro_option(info, BTRFS_MOUNT_IGNOREBADROOTS, "ignorebadroots"))
                ret = -EINVAL;
 out:
        if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE) &&
@@ -1439,6 +1447,8 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
                print_rescue_option(seq, "nologreplay", &printed);
        if (btrfs_test_opt(info, USEBACKUPROOT))
                print_rescue_option(seq, "usebackuproot", &printed);
+       if (btrfs_test_opt(info, IGNOREBADROOTS))
+               print_rescue_option(seq, "ignorebadroots", &printed);
        if (btrfs_test_opt(info, FLUSHONCOMMIT))
                seq_puts(seq, ",flushoncommit");
        if (btrfs_test_opt(info, DISCARD_SYNC))
index 8f0462d6855d3d56fac63a10f79d5451887df5b7..e9f4829894151e8d95cfdc94f89f5ab9517a73c1 100644 (file)
@@ -332,6 +332,7 @@ BTRFS_ATTR(static_feature, send_stream_version, send_stream_version_show);
 static const char *rescue_opts[] = {
        "usebackuproot",
        "nologreplay",
+       "ignorebadroots",
 };
 
 static ssize_t supported_rescue_options_show(struct kobject *kobj,
index 78637665166e05cdbbd4cb8e110bccc1f6fefdf4..07c6b0c853396867b88ab147e4c3b34f5d7e113d 100644 (file)
@@ -7659,6 +7659,19 @@ int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info)
        u64 prev_dev_ext_end = 0;
        int ret = 0;
 
+       /*
+        * We don't have a dev_root because we mounted with ignorebadroots and
+        * failed to load the root, so we want to skip the verification in this
+        * case for sure.
+        *
+        * However if the dev root is fine, but the tree itself is corrupted
+        * we'd still fail to mount.  This verification is only to make sure
+        * writes can happen safely, so instead just bypass this check
+        * completely in the case of IGNOREBADROOTS.
+        */
+       if (btrfs_test_opt(fs_info, IGNOREBADROOTS))
+               return 0;
+
        key.objectid = 1;
        key.type = BTRFS_DEV_EXTENT_KEY;
        key.offset = 0;