Merge tag 'for-6.0-rc4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave...
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 9 Sep 2022 11:54:19 +0000 (07:54 -0400)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 9 Sep 2022 11:54:19 +0000 (07:54 -0400)
Pull btrfs fixes from David Sterba:
 "A few more fixes to zoned mode and one regression fix for chunk limit:

    - Zoned mode fixes:
        - fix how wait/wake up is done when finishing zone
        - fix zone append limit in emulated mode
        - fix mount on devices with conventional zones

   - fix regression, user settable data chunk limit got accidentally
     lowered and causes allocation problems on some profiles (raid0,
     raid1)"

* tag 'for-6.0-rc4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  btrfs: fix the max chunk size and stripe length calculation
  btrfs: zoned: fix mounting with conventional zones
  btrfs: zoned: set pseudo max append zone limit in zone emulation mode
  btrfs: zoned: fix API misuse of zone finish waiting

fs/btrfs/ctree.h
fs/btrfs/disk-io.c
fs/btrfs/inode.c
fs/btrfs/space-info.c
fs/btrfs/volumes.c
fs/btrfs/zoned.c

index 9ef162dbd4bc11fd84c649d6b7c8b453313a8571..df8c99c99df9278f874c053f0ed7f34987fa1385 100644 (file)
@@ -1088,8 +1088,6 @@ struct btrfs_fs_info {
 
        spinlock_t zone_active_bgs_lock;
        struct list_head zone_active_bgs;
-       /* Waiters when BTRFS_FS_NEED_ZONE_FINISH is set */
-       wait_queue_head_t zone_finish_wait;
 
        /* Updates are not protected by any lock */
        struct btrfs_commit_stats commit_stats;
index 820b1f1e6b6723dbd6ffcb4da6255331c319a26e..1af28b066b42a1a54c78951de8a0839b6a545532 100644 (file)
@@ -3068,7 +3068,6 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
        init_waitqueue_head(&fs_info->transaction_blocked_wait);
        init_waitqueue_head(&fs_info->async_submit_wait);
        init_waitqueue_head(&fs_info->delayed_iputs_wait);
-       init_waitqueue_head(&fs_info->zone_finish_wait);
 
        /* Usable values until the real ones are cached from the superblock */
        fs_info->nodesize = 4096;
index ad250892028d6e0387ff8ad101e0ee2a9b9452f2..1372210869b14cda075d3e10fb583ac98d2d4a07 100644 (file)
@@ -1644,10 +1644,9 @@ static noinline int run_delalloc_zoned(struct btrfs_inode *inode,
                        done_offset = end;
 
                if (done_offset == start) {
-                       struct btrfs_fs_info *info = inode->root->fs_info;
-
-                       wait_var_event(&info->zone_finish_wait,
-                                      !test_bit(BTRFS_FS_NEED_ZONE_FINISH, &info->flags));
+                       wait_on_bit_io(&inode->root->fs_info->flags,
+                                      BTRFS_FS_NEED_ZONE_FINISH,
+                                      TASK_UNINTERRUPTIBLE);
                        continue;
                }
 
index d0cbeb7ae81c12ba4bf69fe9cb5c1669b56b3890..435559ba94fa00d31f21fa56ec97ec40c55282bc 100644 (file)
@@ -199,7 +199,7 @@ static u64 calc_chunk_size(const struct btrfs_fs_info *fs_info, u64 flags)
        ASSERT(flags & BTRFS_BLOCK_GROUP_TYPE_MASK);
 
        if (flags & BTRFS_BLOCK_GROUP_DATA)
-               return SZ_1G;
+               return BTRFS_MAX_DATA_CHUNK_SIZE;
        else if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
                return SZ_32M;
 
index 064ab2a79c805f5f07921a4b1f3641d6bb6736a8..f63ff91e28837a0d5b13d09a239b89c44e4ca099 100644 (file)
@@ -5267,6 +5267,9 @@ static int decide_stripe_size_regular(struct alloc_chunk_ctl *ctl,
                                       ctl->stripe_size);
        }
 
+       /* Stripe size should not go beyond 1G. */
+       ctl->stripe_size = min_t(u64, ctl->stripe_size, SZ_1G);
+
        /* Align to BTRFS_STRIPE_LEN */
        ctl->stripe_size = round_down(ctl->stripe_size, BTRFS_STRIPE_LEN);
        ctl->chunk_size = ctl->stripe_size * data_stripes;
index b150b07ba1a7663c9b04c85307324d6819bd1b73..62e7007a7e46c6ea8bc04f4ba48638498052c11b 100644 (file)
@@ -421,10 +421,19 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
         * since btrfs adds the pages one by one to a bio, and btrfs cannot
         * increase the metadata reservation even if it increases the number of
         * extents, it is safe to stick with the limit.
+        *
+        * With the zoned emulation, we can have non-zoned device on the zoned
+        * mode. In this case, we don't have a valid max zone append size. So,
+        * use max_segments * PAGE_SIZE as the pseudo max_zone_append_size.
         */
-       zone_info->max_zone_append_size =
-               min_t(u64, (u64)bdev_max_zone_append_sectors(bdev) << SECTOR_SHIFT,
-                     (u64)bdev_max_segments(bdev) << PAGE_SHIFT);
+       if (bdev_is_zoned(bdev)) {
+               zone_info->max_zone_append_size = min_t(u64,
+                       (u64)bdev_max_zone_append_sectors(bdev) << SECTOR_SHIFT,
+                       (u64)bdev_max_segments(bdev) << PAGE_SHIFT);
+       } else {
+               zone_info->max_zone_append_size =
+                       (u64)bdev_max_segments(bdev) << PAGE_SHIFT;
+       }
        if (!IS_ALIGNED(nr_sectors, zone_sectors))
                zone_info->nr_zones++;
 
@@ -1178,7 +1187,7 @@ int btrfs_ensure_empty_zones(struct btrfs_device *device, u64 start, u64 size)
  * offset.
  */
 static int calculate_alloc_pointer(struct btrfs_block_group *cache,
-                                  u64 *offset_ret)
+                                  u64 *offset_ret, bool new)
 {
        struct btrfs_fs_info *fs_info = cache->fs_info;
        struct btrfs_root *root;
@@ -1188,6 +1197,21 @@ static int calculate_alloc_pointer(struct btrfs_block_group *cache,
        int ret;
        u64 length;
 
+       /*
+        * Avoid  tree lookups for a new block group, there's no use for it.
+        * It must always be 0.
+        *
+        * Also, we have a lock chain of extent buffer lock -> chunk mutex.
+        * For new a block group, this function is called from
+        * btrfs_make_block_group() which is already taking the chunk mutex.
+        * Thus, we cannot call calculate_alloc_pointer() which takes extent
+        * buffer locks to avoid deadlock.
+        */
+       if (new) {
+               *offset_ret = 0;
+               return 0;
+       }
+
        path = btrfs_alloc_path();
        if (!path)
                return -ENOMEM;
@@ -1323,6 +1347,13 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
                else
                        num_conventional++;
 
+               /*
+                * Consider a zone as active if we can allow any number of
+                * active zones.
+                */
+               if (!device->zone_info->max_active_zones)
+                       __set_bit(i, active);
+
                if (!is_sequential) {
                        alloc_offsets[i] = WP_CONVENTIONAL;
                        continue;
@@ -1389,45 +1420,23 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
                        __set_bit(i, active);
                        break;
                }
-
-               /*
-                * Consider a zone as active if we can allow any number of
-                * active zones.
-                */
-               if (!device->zone_info->max_active_zones)
-                       __set_bit(i, active);
        }
 
        if (num_sequential > 0)
                cache->seq_zone = true;
 
        if (num_conventional > 0) {
-               /*
-                * Avoid calling calculate_alloc_pointer() for new BG. It
-                * is no use for new BG. It must be always 0.
-                *
-                * Also, we have a lock chain of extent buffer lock ->
-                * chunk mutex.  For new BG, this function is called from
-                * btrfs_make_block_group() which is already taking the
-                * chunk mutex. Thus, we cannot call
-                * calculate_alloc_pointer() which takes extent buffer
-                * locks to avoid deadlock.
-                */
-
                /* Zone capacity is always zone size in emulation */
                cache->zone_capacity = cache->length;
-               if (new) {
-                       cache->alloc_offset = 0;
-                       goto out;
-               }
-               ret = calculate_alloc_pointer(cache, &last_alloc);
-               if (ret || map->num_stripes == num_conventional) {
-                       if (!ret)
-                               cache->alloc_offset = last_alloc;
-                       else
-                               btrfs_err(fs_info,
+               ret = calculate_alloc_pointer(cache, &last_alloc, new);
+               if (ret) {
+                       btrfs_err(fs_info,
                        "zoned: failed to determine allocation offset of bg %llu",
-                                         cache->start);
+                                 cache->start);
+                       goto out;
+               } else if (map->num_stripes == num_conventional) {
+                       cache->alloc_offset = last_alloc;
+                       cache->zone_is_active = 1;
                        goto out;
                }
        }
@@ -1495,13 +1504,6 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
                goto out;
        }
 
-       if (cache->zone_is_active) {
-               btrfs_get_block_group(cache);
-               spin_lock(&fs_info->zone_active_bgs_lock);
-               list_add_tail(&cache->active_bg_list, &fs_info->zone_active_bgs);
-               spin_unlock(&fs_info->zone_active_bgs_lock);
-       }
-
 out:
        if (cache->alloc_offset > fs_info->zone_size) {
                btrfs_err(fs_info,
@@ -1526,10 +1528,16 @@ out:
                ret = -EIO;
        }
 
-       if (!ret)
+       if (!ret) {
                cache->meta_write_pointer = cache->alloc_offset + cache->start;
-
-       if (ret) {
+               if (cache->zone_is_active) {
+                       btrfs_get_block_group(cache);
+                       spin_lock(&fs_info->zone_active_bgs_lock);
+                       list_add_tail(&cache->active_bg_list,
+                                     &fs_info->zone_active_bgs);
+                       spin_unlock(&fs_info->zone_active_bgs_lock);
+               }
+       } else {
                kfree(cache->physical_map);
                cache->physical_map = NULL;
        }
@@ -2007,8 +2015,7 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
        /* For active_bg_list */
        btrfs_put_block_group(block_group);
 
-       clear_bit(BTRFS_FS_NEED_ZONE_FINISH, &fs_info->flags);
-       wake_up_all(&fs_info->zone_finish_wait);
+       clear_and_wake_up_bit(BTRFS_FS_NEED_ZONE_FINISH, &fs_info->flags);
 
        return 0;
 }