btrfs: use rbtree with leftmost node cached for tracking lowest block group
authorFilipe Manana <fdmanana@suse.com>
Wed, 13 Apr 2022 15:20:40 +0000 (16:20 +0100)
committerDavid Sterba <dsterba@suse.com>
Mon, 16 May 2022 15:03:13 +0000 (17:03 +0200)
We keep track of the start offset of the block group with the lowest start
offset at fs_info->first_logical_byte. This requires explicitly updating
that field every time we add, delete or lookup a block group to/from the
red black tree at fs_info->block_group_cache_tree.

Since the block group with the lowest start address happens to always be
the one that is the leftmost node of the tree, we can use a red black tree
that caches the left most node. Then when we need the start address of
that block group, we can just quickly get the leftmost node in the tree
and extract the start offset of that node's block group. This avoids the
need to explicitly keep track of that address in the dedicated member
fs_info->first_logical_byte, and it also allows the next patch in the
series to switch the lock that protects the red black tree from a spin
lock to a read/write lock - without this change it would be tricky
because block group searches also update fs_info->first_logical_byte.

Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
fs/btrfs/block-group.c
fs/btrfs/ctree.h
fs/btrfs/disk-io.c
fs/btrfs/extent-tree.c
fs/btrfs/free-space-cache.c
fs/btrfs/free-space-tree.c

index 7bf10afab89c77afe3a950aad3c13aa050267b4a..a91938ab7ff876944815485f95696050d1010f77 100644 (file)
@@ -168,11 +168,12 @@ static int btrfs_add_block_group_cache(struct btrfs_fs_info *info,
        struct rb_node **p;
        struct rb_node *parent = NULL;
        struct btrfs_block_group *cache;
+       bool leftmost = true;
 
        ASSERT(block_group->length != 0);
 
        spin_lock(&info->block_group_cache_lock);
-       p = &info->block_group_cache_tree.rb_node;
+       p = &info->block_group_cache_tree.rb_root.rb_node;
 
        while (*p) {
                parent = *p;
@@ -181,6 +182,7 @@ static int btrfs_add_block_group_cache(struct btrfs_fs_info *info,
                        p = &(*p)->rb_left;
                } else if (block_group->start > cache->start) {
                        p = &(*p)->rb_right;
+                       leftmost = false;
                } else {
                        spin_unlock(&info->block_group_cache_lock);
                        return -EEXIST;
@@ -188,11 +190,8 @@ static int btrfs_add_block_group_cache(struct btrfs_fs_info *info,
        }
 
        rb_link_node(&block_group->cache_node, parent, p);
-       rb_insert_color(&block_group->cache_node,
-                       &info->block_group_cache_tree);
-
-       if (info->first_logical_byte > block_group->start)
-               info->first_logical_byte = block_group->start;
+       rb_insert_color_cached(&block_group->cache_node,
+                              &info->block_group_cache_tree, leftmost);
 
        spin_unlock(&info->block_group_cache_lock);
 
@@ -211,7 +210,7 @@ static struct btrfs_block_group *block_group_cache_tree_search(
        u64 end, start;
 
        spin_lock(&info->block_group_cache_lock);
-       n = info->block_group_cache_tree.rb_node;
+       n = info->block_group_cache_tree.rb_root.rb_node;
 
        while (n) {
                cache = rb_entry(n, struct btrfs_block_group, cache_node);
@@ -233,11 +232,8 @@ static struct btrfs_block_group *block_group_cache_tree_search(
                        break;
                }
        }
-       if (ret) {
+       if (ret)
                btrfs_get_block_group(ret);
-               if (bytenr == 0 && info->first_logical_byte > ret->start)
-                       info->first_logical_byte = ret->start;
-       }
        spin_unlock(&info->block_group_cache_lock);
 
        return ret;
@@ -958,15 +954,13 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
                goto out;
 
        spin_lock(&fs_info->block_group_cache_lock);
-       rb_erase(&block_group->cache_node,
-                &fs_info->block_group_cache_tree);
+       rb_erase_cached(&block_group->cache_node,
+                       &fs_info->block_group_cache_tree);
        RB_CLEAR_NODE(&block_group->cache_node);
 
        /* Once for the block groups rbtree */
        btrfs_put_block_group(block_group);
 
-       if (fs_info->first_logical_byte == block_group->start)
-               fs_info->first_logical_byte = (u64)-1;
        spin_unlock(&fs_info->block_group_cache_lock);
 
        down_write(&block_group->space_info->groups_sem);
@@ -4014,11 +4008,11 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
        spin_unlock(&info->zone_active_bgs_lock);
 
        spin_lock(&info->block_group_cache_lock);
-       while ((n = rb_last(&info->block_group_cache_tree)) != NULL) {
+       while ((n = rb_last(&info->block_group_cache_tree.rb_root)) != NULL) {
                block_group = rb_entry(n, struct btrfs_block_group,
                                       cache_node);
-               rb_erase(&block_group->cache_node,
-                        &info->block_group_cache_tree);
+               rb_erase_cached(&block_group->cache_node,
+                               &info->block_group_cache_tree);
                RB_CLEAR_NODE(&block_group->cache_node);
                spin_unlock(&info->block_group_cache_lock);
 
index 63fb20d3fcc6cd5e1634f833cc7116eed3966338..ae8a083aa1de0819c62a400e9c900e7e18054364 100644 (file)
@@ -680,8 +680,7 @@ struct btrfs_fs_info {
 
        /* block group cache stuff */
        spinlock_t block_group_cache_lock;
-       u64 first_logical_byte;
-       struct rb_root block_group_cache_tree;
+       struct rb_root_cached block_group_cache_tree;
 
        /* keep track of unallocated space */
        atomic64_t free_chunk_space;
index c8661709a425ba1cf399243cbc2e7a2f25674221..7e8bb00720eb2025cc2a830ce6c081b5d145615f 100644 (file)
@@ -3232,8 +3232,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
        btrfs_init_async_reclaim_work(fs_info);
 
        spin_lock_init(&fs_info->block_group_cache_lock);
-       fs_info->block_group_cache_tree = RB_ROOT;
-       fs_info->first_logical_byte = (u64)-1;
+       fs_info->block_group_cache_tree = RB_ROOT_CACHED;
 
        extent_io_tree_init(fs_info, &fs_info->excluded_extents,
                            IO_TREE_FS_EXCLUDED_EXTENTS, NULL);
index 2a718727541c52bc860f7376f99308cb0a1a47a4..cd79a5f4c643b95277e23c528be0007133462e17 100644 (file)
@@ -2494,23 +2494,19 @@ static u64 get_alloc_profile_by_root(struct btrfs_root *root, int data)
 
 static u64 first_logical_byte(struct btrfs_fs_info *fs_info)
 {
-       struct btrfs_block_group *cache;
-       u64 bytenr;
+       struct rb_node *leftmost;
+       u64 bytenr = 0;
 
        spin_lock(&fs_info->block_group_cache_lock);
-       bytenr = fs_info->first_logical_byte;
-       spin_unlock(&fs_info->block_group_cache_lock);
-
-       if (bytenr < (u64)-1)
-               return bytenr;
-
        /* Get the block group with the lowest logical start address. */
-       cache = btrfs_lookup_first_block_group(fs_info, 0);
-       if (!cache)
-               return 0;
+       leftmost = rb_first_cached(&fs_info->block_group_cache_tree);
+       if (leftmost) {
+               struct btrfs_block_group *bg;
 
-       bytenr = cache->start;
-       btrfs_put_block_group(cache);
+               bg = rb_entry(leftmost, struct btrfs_block_group, cache_node);
+               bytenr = bg->start;
+       }
+       spin_unlock(&fs_info->block_group_cache_lock);
 
        return bytenr;
 }
index ef84bc5030cd8e8565242fbea8f29da06664781f..f7adee6fa05ecea67aae04f30f1f1bb290caae75 100644 (file)
@@ -4072,7 +4072,7 @@ static int cleanup_free_space_cache_v1(struct btrfs_fs_info *fs_info,
 
        btrfs_info(fs_info, "cleaning free space cache v1");
 
-       node = rb_first(&fs_info->block_group_cache_tree);
+       node = rb_first_cached(&fs_info->block_group_cache_tree);
        while (node) {
                block_group = rb_entry(node, struct btrfs_block_group, cache_node);
                ret = btrfs_remove_free_space_inode(trans, NULL, block_group);
index 0ae54d8c10d646cd8e22bb7d8a452f55669a69f8..1bf89aa672160b264469c59e486bbcaf3a7768b5 100644 (file)
@@ -1178,7 +1178,7 @@ int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info)
                goto abort;
        }
 
-       node = rb_first(&fs_info->block_group_cache_tree);
+       node = rb_first_cached(&fs_info->block_group_cache_tree);
        while (node) {
                block_group = rb_entry(node, struct btrfs_block_group,
                                       cache_node);