btrfs: zoned: fix data relocation block group reservation
authorNaohiro Aota <naohiro.aota@wdc.com>
Wed, 16 Jul 2025 07:59:53 +0000 (16:59 +0900)
committerDavid Sterba <dsterba@suse.com>
Wed, 13 Aug 2025 10:28:48 +0000 (12:28 +0200)
btrfs_zoned_reserve_data_reloc_bg() is called on mount and at that point,
all data block groups belong to the primary data space_info. So, we don't
find anything in the data relocation space_info.

Also, the condition "bg->used > 0" can select a block group with full of
zone_unusable bytes for the candidate. As we cannot allocate from the block
group, it is useless to reserve it as the data relocation block group.

Furthermore, because of the space_info separation, we need to migrate the
selected block group to the data relocation space_info. If not, the extent
allocator cannot use the block group to do the allocation.

This commit fixes these three issues.

Fixes: e606ff985ec7 ("btrfs: zoned: reserve data_reloc block group on mount")
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
Signed-off-by: David Sterba <dsterba@suse.com>
fs/btrfs/zoned.c

index 36de6d0d595f8097b0e2339f7de155d6f7de736a..7a3351b1b0c612a4439022f7f0d38427db0b2eb6 100644 (file)
@@ -17,6 +17,7 @@
 #include "accessors.h"
 #include "bio.h"
 #include "transaction.h"
+#include "sysfs.h"
 
 /* Maximum number of zones to report per blkdev_report_zones() call */
 #define BTRFS_REPORT_NR_ZONES   4096
@@ -2519,12 +2520,12 @@ void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg)
 void btrfs_zoned_reserve_data_reloc_bg(struct btrfs_fs_info *fs_info)
 {
        struct btrfs_space_info *data_sinfo = fs_info->data_sinfo;
-       struct btrfs_space_info *space_info = data_sinfo->sub_group[0];
+       struct btrfs_space_info *space_info = data_sinfo;
        struct btrfs_trans_handle *trans;
        struct btrfs_block_group *bg;
        struct list_head *bg_list;
        u64 alloc_flags;
-       bool initial = false;
+       bool first = true;
        bool did_chunk_alloc = false;
        int index;
        int ret;
@@ -2538,21 +2539,52 @@ void btrfs_zoned_reserve_data_reloc_bg(struct btrfs_fs_info *fs_info)
        if (sb_rdonly(fs_info->sb))
                return;
 
-       ASSERT(space_info->subgroup_id == BTRFS_SUB_GROUP_DATA_RELOC);
        alloc_flags = btrfs_get_alloc_profile(fs_info, space_info->flags);
        index = btrfs_bg_flags_to_raid_index(alloc_flags);
 
-       bg_list = &data_sinfo->block_groups[index];
+       /* Scan the data space_info to find empty block groups. Take the second one. */
 again:
+       bg_list = &space_info->block_groups[index];
        list_for_each_entry(bg, bg_list, list) {
-               if (bg->used > 0)
+               if (bg->alloc_offset != 0)
                        continue;
 
-               if (!initial) {
-                       initial = true;
+               if (first) {
+                       first = false;
                        continue;
                }
 
+               if (space_info == data_sinfo) {
+                       /* Migrate the block group to the data relocation space_info. */
+                       struct btrfs_space_info *reloc_sinfo = data_sinfo->sub_group[0];
+                       int factor;
+
+                       ASSERT(reloc_sinfo->subgroup_id == BTRFS_SUB_GROUP_DATA_RELOC);
+                       factor = btrfs_bg_type_to_factor(bg->flags);
+
+                       down_write(&space_info->groups_sem);
+                       list_del_init(&bg->list);
+                       /* We can assume this as we choose the second empty one. */
+                       ASSERT(!list_empty(&space_info->block_groups[index]));
+                       up_write(&space_info->groups_sem);
+
+                       spin_lock(&space_info->lock);
+                       space_info->total_bytes -= bg->length;
+                       space_info->disk_total -= bg->length * factor;
+                       /* There is no allocation ever happened. */
+                       ASSERT(bg->used == 0);
+                       ASSERT(bg->zone_unusable == 0);
+                       /* No super block in a block group on the zoned setup. */
+                       ASSERT(bg->bytes_super == 0);
+                       spin_unlock(&space_info->lock);
+
+                       bg->space_info = reloc_sinfo;
+                       if (reloc_sinfo->block_group_kobjs[index] == NULL)
+                               btrfs_sysfs_add_block_group_type(bg);
+
+                       btrfs_add_bg_to_space_info(fs_info, bg);
+               }
+
                fs_info->data_reloc_bg = bg->start;
                set_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &bg->runtime_flags);
                btrfs_zone_activate(bg);
@@ -2567,11 +2599,18 @@ again:
        if (IS_ERR(trans))
                return;
 
+       /* Allocate new BG in the data relocation space_info. */
+       space_info = data_sinfo->sub_group[0];
+       ASSERT(space_info->subgroup_id == BTRFS_SUB_GROUP_DATA_RELOC);
        ret = btrfs_chunk_alloc(trans, space_info, alloc_flags, CHUNK_ALLOC_FORCE);
        btrfs_end_transaction(trans);
        if (ret == 1) {
+               /*
+                * We allocated a new block group in the data relocation space_info. We
+                * can take that one.
+                */
+               first = false;
                did_chunk_alloc = true;
-               bg_list = &space_info->block_groups[index];
                goto again;
        }
 }