btrfs: lookup physical address from stripe extent
authorJohannes Thumshirn <johannes.thumshirn@wdc.com>
Thu, 14 Sep 2023 16:07:00 +0000 (09:07 -0700)
committerDavid Sterba <dsterba@suse.com>
Thu, 12 Oct 2023 14:44:09 +0000 (16:44 +0200)
Lookup the physical address from the raid stripe tree when a read on an
RAID volume formatted with the raid stripe tree was attempted.

Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: David Sterba <dsterba@suse.com>
fs/btrfs/raid-stripe-tree.c
fs/btrfs/raid-stripe-tree.h
fs/btrfs/volumes.c

index f2e052dfb0a5d9846e6ec559e48300a386323d19..c7e18a85f72336006f89e7f4544f2cd7fe46ddc5 100644 (file)
@@ -142,3 +142,120 @@ int btrfs_insert_raid_extent(struct btrfs_trans_handle *trans,
 
        return ret;
 }
+
+int btrfs_get_raid_extent_offset(struct btrfs_fs_info *fs_info,
+                                u64 logical, u64 *length, u64 map_type,
+                                u32 stripe_index, struct btrfs_io_stripe *stripe)
+{
+       struct btrfs_root *stripe_root = fs_info->stripe_root;
+       struct btrfs_stripe_extent *stripe_extent;
+       struct btrfs_key stripe_key;
+       struct btrfs_key found_key;
+       struct btrfs_path *path;
+       struct extent_buffer *leaf;
+       const u64 end = logical + *length;
+       int num_stripes;
+       u8 encoding;
+       u64 offset;
+       u64 found_logical;
+       u64 found_length;
+       u64 found_end;
+       int slot;
+       int ret;
+
+       stripe_key.objectid = logical;
+       stripe_key.type = BTRFS_RAID_STRIPE_KEY;
+       stripe_key.offset = 0;
+
+       path = btrfs_alloc_path();
+       if (!path)
+               return -ENOMEM;
+
+       ret = btrfs_search_slot(NULL, stripe_root, &stripe_key, path, 0, 0);
+       if (ret < 0)
+               goto free_path;
+       if (ret) {
+               if (path->slots[0] != 0)
+                       path->slots[0]--;
+       }
+
+       while (1) {
+               leaf = path->nodes[0];
+               slot = path->slots[0];
+
+               btrfs_item_key_to_cpu(leaf, &found_key, slot);
+               found_logical = found_key.objectid;
+               found_length = found_key.offset;
+               found_end = found_logical + found_length;
+
+               if (found_logical > end) {
+                       ret = -ENOENT;
+                       goto out;
+               }
+
+               if (in_range(logical, found_logical, found_length))
+                       break;
+
+               ret = btrfs_next_item(stripe_root, path);
+               if (ret)
+                       goto out;
+       }
+
+       offset = logical - found_logical;
+
+       /*
+        * If we have a logically contiguous, but physically non-continuous
+        * range, we need to split the bio. Record the length after which we
+        * must split the bio.
+        */
+       if (end > found_end)
+               *length -= end - found_end;
+
+       num_stripes = btrfs_num_raid_stripes(btrfs_item_size(leaf, slot));
+       stripe_extent = btrfs_item_ptr(leaf, slot, struct btrfs_stripe_extent);
+       encoding = btrfs_stripe_extent_encoding(leaf, stripe_extent);
+
+       if (encoding != btrfs_bg_flags_to_raid_index(map_type)) {
+               ret = -EUCLEAN;
+               btrfs_handle_fs_error(fs_info, ret,
+                                     "on-disk stripe encoding %d doesn't match RAID index %d",
+                                     encoding,
+                                     btrfs_bg_flags_to_raid_index(map_type));
+               goto out;
+       }
+
+       for (int i = 0; i < num_stripes; i++) {
+               struct btrfs_raid_stride *stride = &stripe_extent->strides[i];
+               u64 devid = btrfs_raid_stride_devid(leaf, stride);
+               u64 physical = btrfs_raid_stride_physical(leaf, stride);
+
+               if (devid != stripe->dev->devid)
+                       continue;
+
+               if ((map_type & BTRFS_BLOCK_GROUP_DUP) && stripe_index != i)
+                       continue;
+
+               stripe->physical = physical + offset;
+
+               ret = 0;
+               goto free_path;
+       }
+
+       /* If we're here, we haven't found the requested devid in the stripe. */
+       ret = -ENOENT;
+out:
+       if (ret > 0)
+               ret = -ENOENT;
+       if (ret && ret != -EIO) {
+               if (IS_ENABLED(CONFIG_BTRFS_DEBUG))
+                       btrfs_print_tree(leaf, 1);
+               btrfs_err(fs_info,
+               "cannot find raid-stripe for logical [%llu, %llu] devid %llu, profile %s",
+                         logical, logical + *length, stripe->dev->devid,
+                         btrfs_bg_type_to_raid_name(map_type));
+       }
+free_path:
+       btrfs_free_path(path);
+
+       return ret;
+}
index 9476131a64972d79d40e4bd5cd9721bf0154866e..f0f3c99b8a6f29ebda61114edd35f9c02ab25198 100644 (file)
@@ -12,6 +12,9 @@ struct btrfs_ordered_extent;
 struct btrfs_trans_handle;
 
 int btrfs_delete_raid_extent(struct btrfs_trans_handle *trans, u64 start, u64 length);
+int btrfs_get_raid_extent_offset(struct btrfs_fs_info *fs_info,
+                                u64 logical, u64 *length, u64 map_type,
+                                u32 stripe_index, struct btrfs_io_stripe *stripe);
 int btrfs_insert_raid_extent(struct btrfs_trans_handle *trans,
                             struct btrfs_ordered_extent *ordered_extent);
 
@@ -33,4 +36,10 @@ static inline bool btrfs_need_stripe_tree_update(struct btrfs_fs_info *fs_info,
        return false;
 }
 
+static inline int btrfs_num_raid_stripes(u32 item_size)
+{
+       return (item_size - offsetof(struct btrfs_stripe_extent, strides)) /
+               sizeof(struct btrfs_raid_stride);
+}
+
 #endif
index 4f01dec76f1d1e7a49b3a0d61b0a1a4696a9a626..af0abc775b7fdfb3fbfe6cfbd7b478e1dafa5cf3 100644 (file)
@@ -35,6 +35,7 @@
 #include "relocation.h"
 #include "scrub.h"
 #include "super.h"
+#include "raid-stripe-tree.h"
 
 #define BTRFS_BLOCK_GROUP_STRIPE_MASK  (BTRFS_BLOCK_GROUP_RAID0 | \
                                         BTRFS_BLOCK_GROUP_RAID10 | \
@@ -6231,12 +6232,20 @@ static u64 btrfs_max_io_len(struct map_lookup *map, enum btrfs_map_op op,
        return U64_MAX;
 }
 
-static void set_io_stripe(struct btrfs_io_stripe *dst, const struct map_lookup *map,
-                         u32 stripe_index, u64 stripe_offset, u32 stripe_nr)
+static int set_io_stripe(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
+                        u64 logical, u64 *length, struct btrfs_io_stripe *dst,
+                        struct map_lookup *map, u32 stripe_index,
+                        u64 stripe_offset, u64 stripe_nr)
 {
        dst->dev = map->stripes[stripe_index].dev;
+
+       if (op == BTRFS_MAP_READ && btrfs_need_stripe_tree_update(fs_info, map->type))
+               return btrfs_get_raid_extent_offset(fs_info, logical, length,
+                                                   map->type, stripe_index, dst);
+
        dst->physical = map->stripes[stripe_index].physical +
                        stripe_offset + btrfs_stripe_nr_to_offset(stripe_nr);
+       return 0;
 }
 
 /*
@@ -6445,11 +6454,11 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
         */
        if (smap && num_alloc_stripes == 1 &&
            !((map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) && mirror_num > 1)) {
-               set_io_stripe(smap, map, stripe_index, stripe_offset, stripe_nr);
+               ret = set_io_stripe(fs_info, op, logical, length, smap, map,
+                                   stripe_index, stripe_offset, stripe_nr);
                if (mirror_num_ret)
                        *mirror_num_ret = mirror_num;
                *bioc_ret = NULL;
-               ret = 0;
                goto out;
        }
 
@@ -6479,22 +6488,35 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
                 */
                bioc->full_stripe_logical = em->start +
                        btrfs_stripe_nr_to_offset(stripe_nr * data_stripes);
-               for (i = 0; i < num_stripes; i++)
-                       set_io_stripe(&bioc->stripes[i], map,
-                                     (i + stripe_nr) % num_stripes,
-                                     stripe_offset, stripe_nr);
+               for (int i = 0; i < num_stripes; i++) {
+                       ret = set_io_stripe(fs_info, op, logical, length,
+                                           &bioc->stripes[i], map,
+                                           (i + stripe_nr) % num_stripes,
+                                           stripe_offset, stripe_nr);
+                       if (ret < 0)
+                               break;
+               }
        } else {
                /*
                 * For all other non-RAID56 profiles, just copy the target
                 * stripe into the bioc.
                 */
                for (i = 0; i < num_stripes; i++) {
-                       set_io_stripe(&bioc->stripes[i], map, stripe_index,
-                                     stripe_offset, stripe_nr);
+                       ret = set_io_stripe(fs_info, op, logical, length,
+                                           &bioc->stripes[i], map, stripe_index,
+                                           stripe_offset, stripe_nr);
+                       if (ret < 0)
+                               break;
                        stripe_index++;
                }
        }
 
+       if (ret) {
+               *bioc_ret = NULL;
+               btrfs_put_bioc(bioc);
+               goto out;
+       }
+
        if (op != BTRFS_MAP_READ)
                max_errors = btrfs_chunk_max_errors(map);