btrfs: add support for multiple global roots
authorJosef Bacik <josef@toxicpanda.com>
Wed, 15 Dec 2021 20:40:08 +0000 (15:40 -0500)
committerDavid Sterba <dsterba@suse.com>
Mon, 14 Mar 2022 12:13:49 +0000 (13:13 +0100)
With extent tree v2 you will be able to create multiple csum, extent,
and free space trees.  They will be used based on the block group, which
will now use the block_group_item->chunk_objectid to point to the set of
global roots that it will use.  When allocating new block groups we'll
simply mod the gigabyte offset of the block group against the number of
global roots we have and that will be the block groups global id.

>From there we can take the bytenr that we're modifying in the respective
tree, look up the block group and get that block groups corresponding
global root id.  From there we can get to the appropriate global root
for that bytenr.

Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: David Sterba <dsterba@suse.com>
fs/btrfs/block-group.c
fs/btrfs/block-group.h
fs/btrfs/ctree.h
fs/btrfs/disk-io.c
fs/btrfs/free-space-tree.c
fs/btrfs/transaction.c
fs/btrfs/tree-checker.c

index 8202ad6aa131740ec44474f0f82cb451f43eaebc..3113f6d7f335f95a36d379bf9ff4ee23751be7d1 100644 (file)
@@ -2006,6 +2006,7 @@ static int read_one_block_group(struct btrfs_fs_info *info,
        cache->length = key->offset;
        cache->used = btrfs_stack_block_group_used(bgi);
        cache->flags = btrfs_stack_block_group_flags(bgi);
+       cache->global_root_id = btrfs_stack_block_group_chunk_objectid(bgi);
 
        set_free_space_tree_thresholds(cache);
 
@@ -2288,7 +2289,7 @@ static int insert_block_group_item(struct btrfs_trans_handle *trans,
        spin_lock(&block_group->lock);
        btrfs_set_stack_block_group_used(&bgi, block_group->used);
        btrfs_set_stack_block_group_chunk_objectid(&bgi,
-                               BTRFS_FIRST_CHUNK_TREE_OBJECTID);
+                                                  block_group->global_root_id);
        btrfs_set_stack_block_group_flags(&bgi, block_group->flags);
        key.objectid = block_group->start;
        key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
@@ -2444,6 +2445,27 @@ next:
        btrfs_trans_release_chunk_metadata(trans);
 }
 
+/*
+ * For extent tree v2 we use the block_group_item->chunk_offset to point at our
+ * global root id.  For v1 it's always set to BTRFS_FIRST_CHUNK_TREE_OBJECTID.
+ */
+static u64 calculate_global_root_id(struct btrfs_fs_info *fs_info, u64 offset)
+{
+       u64 div = SZ_1G;
+       u64 index;
+
+       if (!btrfs_fs_incompat(fs_info, EXTENT_TREE_V2))
+               return BTRFS_FIRST_CHUNK_TREE_OBJECTID;
+
+       /* If we have a smaller fs index based on 128MiB. */
+       if (btrfs_super_total_bytes(fs_info->super_copy) <= (SZ_1G * 10ULL))
+               div = SZ_128M;
+
+       offset = div64_u64(offset, div);
+       div64_u64_rem(offset, fs_info->nr_global_roots, &index);
+       return index;
+}
+
 struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *trans,
                                                 u64 bytes_used, u64 type,
                                                 u64 chunk_offset, u64 size)
@@ -2464,6 +2486,8 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran
        cache->flags = type;
        cache->last_byte_to_unpin = (u64)-1;
        cache->cached = BTRFS_CACHE_FINISHED;
+       cache->global_root_id = calculate_global_root_id(fs_info, cache->start);
+
        if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
                cache->needs_free_space = 1;
 
@@ -2693,7 +2717,7 @@ static int update_block_group_item(struct btrfs_trans_handle *trans,
        bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
        btrfs_set_stack_block_group_used(&bgi, cache->used);
        btrfs_set_stack_block_group_chunk_objectid(&bgi,
-                       BTRFS_FIRST_CHUNK_TREE_OBJECTID);
+                                                  cache->global_root_id);
        btrfs_set_stack_block_group_flags(&bgi, cache->flags);
        write_extent_buffer(leaf, &bgi, bi, sizeof(bgi));
        btrfs_mark_buffer_dirty(leaf);
index 5878b7ce3b78ed10fab7e357eda42c49b709a6ef..93aabc68bb6a8e9c104d72474b12adaa9d08a546 100644 (file)
@@ -68,6 +68,7 @@ struct btrfs_block_group {
        u64 bytes_super;
        u64 flags;
        u64 cache_generation;
+       u64 global_root_id;
 
        /*
         * If the free space extent count exceeds this number, convert the block
index f460a7bb9ae8f860f867217ad1c0886a7ab8022a..54513c4e891a8226ae9d90c4ff23970e0f585685 100644 (file)
@@ -1058,6 +1058,8 @@ struct btrfs_fs_info {
        spinlock_t relocation_bg_lock;
        u64 data_reloc_bg;
 
+       u64 nr_global_roots;
+
        spinlock_t zone_active_bgs_lock;
        struct list_head zone_active_bgs;
 
index fe1349737edbcdcdf868ec9232cf86d4ba4db0c1..ed62e81c0b669aaf11c75bd28eb201c93eb531e3 100644 (file)
@@ -1289,12 +1289,33 @@ struct btrfs_root *btrfs_global_root(struct btrfs_fs_info *fs_info,
        return root;
 }
 
+static u64 btrfs_global_root_id(struct btrfs_fs_info *fs_info, u64 bytenr)
+{
+       struct btrfs_block_group *block_group;
+       u64 ret;
+
+       if (!btrfs_fs_incompat(fs_info, EXTENT_TREE_V2))
+               return 0;
+
+       if (bytenr)
+               block_group = btrfs_lookup_block_group(fs_info, bytenr);
+       else
+               block_group = btrfs_lookup_first_block_group(fs_info, bytenr);
+       ASSERT(block_group);
+       if (!block_group)
+               return 0;
+       ret = block_group->global_root_id;
+       btrfs_put_block_group(block_group);
+
+       return ret;
+}
+
 struct btrfs_root *btrfs_csum_root(struct btrfs_fs_info *fs_info, u64 bytenr)
 {
        struct btrfs_key key = {
                .objectid = BTRFS_CSUM_TREE_OBJECTID,
                .type = BTRFS_ROOT_ITEM_KEY,
-               .offset = 0,
+               .offset = btrfs_global_root_id(fs_info, bytenr),
        };
 
        return btrfs_global_root(fs_info, &key);
@@ -1305,7 +1326,7 @@ struct btrfs_root *btrfs_extent_root(struct btrfs_fs_info *fs_info, u64 bytenr)
        struct btrfs_key key = {
                .objectid = BTRFS_EXTENT_TREE_OBJECTID,
                .type = BTRFS_ROOT_ITEM_KEY,
-               .offset = 0,
+               .offset = btrfs_global_root_id(fs_info, bytenr),
        };
 
        return btrfs_global_root(fs_info, &key);
@@ -2096,7 +2117,6 @@ static void backup_super_roots(struct btrfs_fs_info *info)
 {
        const int next_backup = info->backup_root_index;
        struct btrfs_root_backup *root_backup;
-       struct btrfs_root *csum_root = btrfs_csum_root(info, 0);
 
        root_backup = info->super_for_commit->super_roots + next_backup;
 
@@ -2130,6 +2150,7 @@ static void backup_super_roots(struct btrfs_fs_info *info)
                        btrfs_header_level(info->block_group_root->node));
        } else {
                struct btrfs_root *extent_root = btrfs_extent_root(info, 0);
+               struct btrfs_root *csum_root = btrfs_csum_root(info, 0);
 
                btrfs_set_backup_extent_root(root_backup,
                                             extent_root->node->start);
@@ -2137,6 +2158,12 @@ static void backup_super_roots(struct btrfs_fs_info *info)
                                btrfs_header_generation(extent_root->node));
                btrfs_set_backup_extent_root_level(root_backup,
                                        btrfs_header_level(extent_root->node));
+
+               btrfs_set_backup_csum_root(root_backup, csum_root->node->start);
+               btrfs_set_backup_csum_root_gen(root_backup,
+                                              btrfs_header_generation(csum_root->node));
+               btrfs_set_backup_csum_root_level(root_backup,
+                                                btrfs_header_level(csum_root->node));
        }
 
        /*
@@ -2158,12 +2185,6 @@ static void backup_super_roots(struct btrfs_fs_info *info)
        btrfs_set_backup_dev_root_level(root_backup,
                                       btrfs_header_level(info->dev_root->node));
 
-       btrfs_set_backup_csum_root(root_backup, csum_root->node->start);
-       btrfs_set_backup_csum_root_gen(root_backup,
-                                      btrfs_header_generation(csum_root->node));
-       btrfs_set_backup_csum_root_level(root_backup,
-                                        btrfs_header_level(csum_root->node));
-
        btrfs_set_backup_total_bytes(root_backup,
                             btrfs_super_total_bytes(info->super_copy));
        btrfs_set_backup_bytes_used(root_backup,
@@ -2546,6 +2567,7 @@ static int load_global_roots_objectid(struct btrfs_root *tree_root,
 {
        struct btrfs_fs_info *fs_info = tree_root->fs_info;
        struct btrfs_root *root;
+       u64 max_global_id = 0;
        int ret;
        struct btrfs_key key = {
                .objectid = objectid,
@@ -2581,6 +2603,13 @@ static int load_global_roots_objectid(struct btrfs_root *tree_root,
                        break;
                btrfs_release_path(path);
 
+               /*
+                * Just worry about this for extent tree, it'll be the same for
+                * everybody.
+                */
+               if (objectid == BTRFS_EXTENT_TREE_OBJECTID)
+                       max_global_id = max(max_global_id, key.offset);
+
                found = true;
                root = read_tree_root_path(tree_root, path, &key);
                if (IS_ERR(root)) {
@@ -2598,6 +2627,9 @@ static int load_global_roots_objectid(struct btrfs_root *tree_root,
        }
        btrfs_release_path(path);
 
+       if (objectid == BTRFS_EXTENT_TREE_OBJECTID)
+               fs_info->nr_global_roots = max_global_id + 1;
+
        if (!found || ret) {
                if (objectid == BTRFS_CSUM_TREE_OBJECTID)
                        set_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state);
index 655aad0f9e1c8cc4de27afaf8222959be2bf04cf..0ae54d8c10d646cd8e22bb7d8a452f55669a69f8 100644 (file)
@@ -25,6 +25,8 @@ static struct btrfs_root *btrfs_free_space_root(
                .offset = 0,
        };
 
+       if (btrfs_fs_incompat(block_group->fs_info, EXTENT_TREE_V2))
+               key.offset = block_group->global_root_id;
        return btrfs_global_root(block_group->fs_info, &key);
 }
 
index 1f1c25db6f6b8a395b912d7c9e7855a6cc98d673..37f6ec2a3c5695cdffc320bf0cb8d2affff0337c 100644 (file)
@@ -1911,6 +1911,14 @@ static void update_super_roots(struct btrfs_fs_info *fs_info)
                super->cache_generation = 0;
        if (test_bit(BTRFS_FS_UPDATE_UUID_TREE_GEN, &fs_info->flags))
                super->uuid_tree_generation = root_item->generation;
+
+       if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
+               root_item = &fs_info->block_group_root->root_item;
+
+               super->block_group_root = root_item->bytenr;
+               super->block_group_root_generation = root_item->generation;
+               super->block_group_root_level = root_item->level;
+       }
 }
 
 int btrfs_transaction_in_commit(struct btrfs_fs_info *info)
@@ -2362,6 +2370,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
        list_add_tail(&fs_info->chunk_root->dirty_list,
                      &cur_trans->switch_commits);
 
+       if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
+               btrfs_set_root_node(&fs_info->block_group_root->root_item,
+                                   fs_info->block_group_root->node);
+               list_add_tail(&fs_info->block_group_root->dirty_list,
+                             &cur_trans->switch_commits);
+       }
+
        switch_commit_roots(trans);
 
        ASSERT(list_empty(&cur_trans->dirty_bgs));
index 64c7d2a2bb3e4244b9bc02e49a8f44e489c0f052..e56c0107eea3ac94d30b341a62c10b415a6f2254 100644 (file)
@@ -639,8 +639,10 @@ static void block_group_err(const struct extent_buffer *eb, int slot,
 static int check_block_group_item(struct extent_buffer *leaf,
                                  struct btrfs_key *key, int slot)
 {
+       struct btrfs_fs_info *fs_info = leaf->fs_info;
        struct btrfs_block_group_item bgi;
        u32 item_size = btrfs_item_size(leaf, slot);
+       u64 chunk_objectid;
        u64 flags;
        u64 type;
 
@@ -663,8 +665,23 @@ static int check_block_group_item(struct extent_buffer *leaf,
 
        read_extent_buffer(leaf, &bgi, btrfs_item_ptr_offset(leaf, slot),
                           sizeof(bgi));
-       if (unlikely(btrfs_stack_block_group_chunk_objectid(&bgi) !=
-                    BTRFS_FIRST_CHUNK_TREE_OBJECTID)) {
+       chunk_objectid = btrfs_stack_block_group_chunk_objectid(&bgi);
+       if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
+               /*
+                * We don't init the nr_global_roots until we load the global
+                * roots, so this could be 0 at mount time.  If it's 0 we'll
+                * just assume we're fine, and later we'll check against our
+                * actual value.
+                */
+               if (unlikely(fs_info->nr_global_roots &&
+                            chunk_objectid >= fs_info->nr_global_roots)) {
+                       block_group_err(leaf, slot,
+       "invalid block group global root id, have %llu, needs to be <= %llu",
+                                       chunk_objectid,
+                                       fs_info->nr_global_roots);
+                       return -EUCLEAN;
+               }
+       } else if (unlikely(chunk_objectid != BTRFS_FIRST_CHUNK_TREE_OBJECTID)) {
                block_group_err(leaf, slot,
                "invalid block group chunk objectid, have %llu expect %llu",
                                btrfs_stack_block_group_chunk_objectid(&bgi),