btrfs: replace btrfs_io_context::raid_map with a fixed u64 value
authorQu Wenruo <wqu@suse.com>
Fri, 17 Feb 2023 05:37:03 +0000 (13:37 +0800)
committerDavid Sterba <dsterba@suse.com>
Mon, 17 Apr 2023 16:01:14 +0000 (18:01 +0200)
In btrfs_io_context structure, we have a pointer raid_map, which
indicates the logical bytenr for each stripe.

But considering we always call sort_parity_stripes(), the result
raid_map[] is always sorted, thus raid_map[0] is always the logical
bytenr of the full stripe.

So why we waste the space and time (for sorting) for raid_map?

This patch will replace btrfs_io_context::raid_map with a single u64
number, full_stripe_start, by:

- Replace btrfs_io_context::raid_map with full_stripe_start

- Replace call sites using raid_map[0] to use full_stripe_start

- Replace call sites using raid_map[i] to compare with nr_data_stripes.

The benefits are:

- Less memory wasted on raid_map
  It's sizeof(u64) * num_stripes vs sizeof(u64).
  It'll always save at least one u64, and the benefit grows larger with
  num_stripes.

- No more weird alloc_btrfs_io_context() behavior
  As there is only one fixed size + one variable length array.

Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
fs/btrfs/raid56.c
fs/btrfs/scrub.c
fs/btrfs/volumes.c
fs/btrfs/volumes.h
include/trace/events/btrfs.h

index 0ac1fc7896dd4c9a32d44ac56631674215644215..6cbbaa6c06ca6c8e45a853a8cee08cfe95d6fe9b 100644 (file)
@@ -202,7 +202,7 @@ static void cache_rbio_pages(struct btrfs_raid_bio *rbio)
  */
 static int rbio_bucket(struct btrfs_raid_bio *rbio)
 {
-       u64 num = rbio->bioc->raid_map[0];
+       u64 num = rbio->bioc->full_stripe_logical;
 
        /*
         * we shift down quite a bit.  We're using byte
@@ -567,7 +567,7 @@ static int rbio_can_merge(struct btrfs_raid_bio *last,
            test_bit(RBIO_CACHE_BIT, &cur->flags))
                return 0;
 
-       if (last->bioc->raid_map[0] != cur->bioc->raid_map[0])
+       if (last->bioc->full_stripe_logical != cur->bioc->full_stripe_logical)
                return 0;
 
        /* we can't merge with different operations */
@@ -661,7 +661,7 @@ static noinline int lock_stripe_add(struct btrfs_raid_bio *rbio)
 
        spin_lock(&h->lock);
        list_for_each_entry(cur, &h->hash_list, hash_list) {
-               if (cur->bioc->raid_map[0] != rbio->bioc->raid_map[0])
+               if (cur->bioc->full_stripe_logical != rbio->bioc->full_stripe_logical)
                        continue;
 
                spin_lock(&cur->bio_list_lock);
@@ -1113,7 +1113,7 @@ static void index_one_bio(struct btrfs_raid_bio *rbio, struct bio *bio)
        struct bio_vec bvec;
        struct bvec_iter iter;
        u32 offset = (bio->bi_iter.bi_sector << SECTOR_SHIFT) -
-                    rbio->bioc->raid_map[0];
+                    rbio->bioc->full_stripe_logical;
 
        bio_for_each_segment(bvec, bio, iter) {
                u32 bvec_offset;
@@ -1337,7 +1337,7 @@ static void set_rbio_range_error(struct btrfs_raid_bio *rbio, struct bio *bio)
 {
        struct btrfs_fs_info *fs_info = rbio->bioc->fs_info;
        u32 offset = (bio->bi_iter.bi_sector << SECTOR_SHIFT) -
-                    rbio->bioc->raid_map[0];
+                    rbio->bioc->full_stripe_logical;
        int total_nr_sector = offset >> fs_info->sectorsize_bits;
 
        ASSERT(total_nr_sector < rbio->nr_data * rbio->stripe_nsectors);
@@ -1614,7 +1614,7 @@ static void rbio_add_bio(struct btrfs_raid_bio *rbio, struct bio *orig_bio)
 {
        const struct btrfs_fs_info *fs_info = rbio->bioc->fs_info;
        const u64 orig_logical = orig_bio->bi_iter.bi_sector << SECTOR_SHIFT;
-       const u64 full_stripe_start = rbio->bioc->raid_map[0];
+       const u64 full_stripe_start = rbio->bioc->full_stripe_logical;
        const u32 orig_len = orig_bio->bi_iter.bi_size;
        const u32 sectorsize = fs_info->sectorsize;
        u64 cur_logical;
@@ -1801,9 +1801,8 @@ static int recover_vertical(struct btrfs_raid_bio *rbio, int sector_nr,
                 * here due to a crc mismatch and we can't give them the
                 * data they want.
                 */
-               if (rbio->bioc->raid_map[failb] == RAID6_Q_STRIPE) {
-                       if (rbio->bioc->raid_map[faila] ==
-                           RAID5_P_STRIPE)
+               if (failb == rbio->real_stripes - 1) {
+                       if (faila == rbio->real_stripes - 2)
                                /*
                                 * Only P and Q are corrupted.
                                 * We only care about data stripes recovery,
@@ -1817,7 +1816,7 @@ static int recover_vertical(struct btrfs_raid_bio *rbio, int sector_nr,
                        goto pstripe;
                }
 
-               if (rbio->bioc->raid_map[failb] == RAID5_P_STRIPE) {
+               if (failb == rbio->real_stripes - 2) {
                        raid6_datap_recov(rbio->real_stripes, sectorsize,
                                          faila, pointers);
                } else {
@@ -2080,8 +2079,8 @@ static void fill_data_csums(struct btrfs_raid_bio *rbio)
 {
        struct btrfs_fs_info *fs_info = rbio->bioc->fs_info;
        struct btrfs_root *csum_root = btrfs_csum_root(fs_info,
-                                                      rbio->bioc->raid_map[0]);
-       const u64 start = rbio->bioc->raid_map[0];
+                                                      rbio->bioc->full_stripe_logical);
+       const u64 start = rbio->bioc->full_stripe_logical;
        const u32 len = (rbio->nr_data * rbio->stripe_nsectors) <<
                        fs_info->sectorsize_bits;
        int ret;
@@ -2129,7 +2128,7 @@ error:
         */
        btrfs_warn_rl(fs_info,
 "sub-stripe write for full stripe %llu is not safe, failed to get csum: %d",
-                       rbio->bioc->raid_map[0], ret);
+                       rbio->bioc->full_stripe_logical, ret);
 no_csum:
        kfree(rbio->csum_buf);
        bitmap_free(rbio->csum_bitmap);
@@ -2385,10 +2384,10 @@ void raid56_add_scrub_pages(struct btrfs_raid_bio *rbio, struct page *page,
        int stripe_offset;
        int index;
 
-       ASSERT(logical >= rbio->bioc->raid_map[0]);
-       ASSERT(logical + sectorsize <= rbio->bioc->raid_map[0] +
+       ASSERT(logical >= rbio->bioc->full_stripe_logical);
+       ASSERT(logical + sectorsize <= rbio->bioc->full_stripe_logical +
                                       BTRFS_STRIPE_LEN * rbio->nr_data);
-       stripe_offset = (int)(logical - rbio->bioc->raid_map[0]);
+       stripe_offset = (int)(logical - rbio->bioc->full_stripe_logical);
        index = stripe_offset / sectorsize;
        rbio->bio_sectors[index].page = page;
        rbio->bio_sectors[index].pgoff = pgoff;
index 64b52be6bf0b819d04e6641ef4e77829ace959d4..91aeac36ebc93ea1938c23df04a770b8652312f2 100644 (file)
@@ -1430,7 +1430,7 @@ static inline int scrub_nr_raid_mirrors(struct btrfs_io_context *bioc)
 }
 
 static inline void scrub_stripe_index_and_offset(u64 logical, u64 map_type,
-                                                u64 *raid_map,
+                                                u64 full_stripe_logical,
                                                 int nstripes, int mirror,
                                                 int *stripe_index,
                                                 u64 *stripe_offset)
@@ -1438,19 +1438,22 @@ static inline void scrub_stripe_index_and_offset(u64 logical, u64 map_type,
        int i;
 
        if (map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
+               const int nr_data_stripes = (map_type & BTRFS_BLOCK_GROUP_RAID5) ?
+                                           nstripes - 1 : nstripes - 2;
+
                /* RAID5/6 */
-               for (i = 0; i < nstripes; i++) {
-                       if (raid_map[i] == RAID6_Q_STRIPE ||
-                           raid_map[i] == RAID5_P_STRIPE)
-                               continue;
+               for (i = 0; i < nr_data_stripes; i++) {
+                       const u64 data_stripe_start = full_stripe_logical +
+                                               (i * BTRFS_STRIPE_LEN);
 
-                       if (logical >= raid_map[i] &&
-                           logical < raid_map[i] + BTRFS_STRIPE_LEN)
+                       if (logical >= data_stripe_start &&
+                           logical < data_stripe_start + BTRFS_STRIPE_LEN)
                                break;
                }
 
                *stripe_index = i;
-               *stripe_offset = logical - raid_map[i];
+               *stripe_offset = (logical - full_stripe_logical) &
+                                BTRFS_STRIPE_LEN_MASK;
        } else {
                /* The other RAID type */
                *stripe_index = mirror;
@@ -1538,7 +1541,7 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
 
                        scrub_stripe_index_and_offset(logical,
                                                      bioc->map_type,
-                                                     bioc->raid_map,
+                                                     bioc->full_stripe_logical,
                                                      bioc->num_stripes -
                                                      bioc->replace_nr_stripes,
                                                      mirror_index,
@@ -2398,7 +2401,7 @@ static void scrub_missing_raid56_pages(struct scrub_block *sblock)
        btrfs_bio_counter_inc_blocked(fs_info);
        ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS, logical,
                               &length, &bioc);
-       if (ret || !bioc || !bioc->raid_map)
+       if (ret || !bioc)
                goto bioc_out;
 
        if (WARN_ON(!sctx->is_dev_replace ||
@@ -3007,7 +3010,7 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
        btrfs_bio_counter_inc_blocked(fs_info);
        ret = btrfs_map_sblock(fs_info, BTRFS_MAP_WRITE, sparity->logic_start,
                               &length, &bioc);
-       if (ret || !bioc || !bioc->raid_map)
+       if (ret || !bioc)
                goto bioc_out;
 
        bio = bio_alloc(NULL, BIO_MAX_VECS, REQ_OP_READ, GFP_NOFS);
index 8f06f0e47ba8197257033fc74481f572e8c8fcb7..b7e1d7dc4509a7698e3b6ceaa3524b0594b656cb 100644 (file)
@@ -5894,25 +5894,6 @@ static int find_live_mirror(struct btrfs_fs_info *fs_info,
        return preferred_mirror;
 }
 
-/* Bubble-sort the stripe set to put the parity/syndrome stripes last */
-static void sort_parity_stripes(struct btrfs_io_context *bioc, int num_stripes)
-{
-       int i;
-       int again = 1;
-
-       while (again) {
-               again = 0;
-               for (i = 0; i < num_stripes - 1; i++) {
-                       /* Swap if parity is on a smaller index */
-                       if (bioc->raid_map[i] > bioc->raid_map[i + 1]) {
-                               swap(bioc->stripes[i], bioc->stripes[i + 1]);
-                               swap(bioc->raid_map[i], bioc->raid_map[i + 1]);
-                               again = 1;
-                       }
-               }
-       }
-}
-
 static struct btrfs_io_context *alloc_btrfs_io_context(struct btrfs_fs_info *fs_info,
                                                       u16 total_stripes)
 {
@@ -5922,12 +5903,7 @@ static struct btrfs_io_context *alloc_btrfs_io_context(struct btrfs_fs_info *fs_
                 /* The size of btrfs_io_context */
                sizeof(struct btrfs_io_context) +
                /* Plus the variable array for the stripes */
-               sizeof(struct btrfs_io_stripe) * (total_stripes) +
-               /*
-                * Plus the raid_map, which includes both the tgt dev
-                * and the stripes.
-                */
-               sizeof(u64) * (total_stripes),
+               sizeof(struct btrfs_io_stripe) * (total_stripes),
                GFP_NOFS);
 
        if (!bioc)
@@ -5936,8 +5912,8 @@ static struct btrfs_io_context *alloc_btrfs_io_context(struct btrfs_fs_info *fs_
        refcount_set(&bioc->refs, 1);
 
        bioc->fs_info = fs_info;
-       bioc->raid_map = (u64 *)(bioc->stripes + total_stripes);
        bioc->replace_stripe_src = -1;
+       bioc->full_stripe_logical = (u64)-1;
 
        return bioc;
 }
@@ -6541,33 +6517,39 @@ int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
        }
        bioc->map_type = map->type;
 
-       for (i = 0; i < num_stripes; i++) {
-               set_io_stripe(&bioc->stripes[i], map, stripe_index, stripe_offset,
-                             stripe_nr);
-               stripe_index++;
-       }
-
-       /* Build raid_map */
+       /*
+        * For RAID56 full map, we need to make sure the stripes[] follows the
+        * rule that data stripes are all ordered, then followed with P and Q
+        * (if we have).
+        *
+        * It's still mostly the same as other profiles, just with extra rotation.
+        */
        if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK && need_raid_map &&
            (need_full_stripe(op) || mirror_num > 1)) {
-               u64 tmp;
-               unsigned rot;
-
-               /* Work out the disk rotation on this stripe-set */
-               rot = stripe_nr % num_stripes;
-
-               /* Fill in the logical address of each stripe */
-               tmp = stripe_nr * data_stripes;
-               for (i = 0; i < data_stripes; i++)
-                       bioc->raid_map[(i + rot) % num_stripes] =
-                               em->start + ((tmp + i) << BTRFS_STRIPE_LEN_SHIFT);
-
-               bioc->raid_map[(i + rot) % map->num_stripes] = RAID5_P_STRIPE;
-               if (map->type & BTRFS_BLOCK_GROUP_RAID6)
-                       bioc->raid_map[(i + rot + 1) % num_stripes] =
-                               RAID6_Q_STRIPE;
-
-               sort_parity_stripes(bioc, num_stripes);
+               /*
+                * For RAID56 @stripe_nr is already the number of full stripes
+                * before us, which is also the rotation value (needs to modulo
+                * with num_stripes).
+                *
+                * In this case, we just add @stripe_nr with @i, then do the
+                * modulo, to reduce one modulo call.
+                */
+               bioc->full_stripe_logical = em->start +
+                       ((stripe_nr * data_stripes) << BTRFS_STRIPE_LEN_SHIFT);
+               for (i = 0; i < num_stripes; i++)
+                       set_io_stripe(&bioc->stripes[i], map,
+                                     (i + stripe_nr) % num_stripes,
+                                     stripe_offset, stripe_nr);
+       } else {
+               /*
+                * For all other non-RAID56 profiles, just copy the target
+                * stripe into the bioc.
+                */
+               for (i = 0; i < num_stripes; i++) {
+                       set_io_stripe(&bioc->stripes[i], map, stripe_index,
+                                     stripe_offset, stripe_nr);
+                       stripe_index++;
+               }
        }
 
        if (need_full_stripe(op))
index e86e9f25ba0fc4be9beff42789ad2a212bec25a6..650e131d079e477017a203958199923e278d0d2f 100644 (file)
@@ -460,11 +460,22 @@ struct btrfs_io_context {
        u16 replace_nr_stripes;
        s16 replace_stripe_src;
        /*
-        * logical block numbers for the start of each stripe
-        * The last one or two are p/q.  These are sorted,
-        * so raid_map[0] is the start of our full stripe
+        * Logical bytenr of the full stripe start, only for RAID56 cases.
+        *
+        * When this value is set to other than (u64)-1, the stripes[] should
+        * follow this pattern:
+        *
+        * (real_stripes = num_stripes - replace_nr_stripes)
+        * (data_stripes = (is_raid6) ? (real_stripes - 2) : (real_stripes - 1))
+        *
+        * stripes[0]:                  The first data stripe
+        * stripes[1]:                  The second data stripe
+        * ...
+        * stripes[data_stripes - 1]:   The last data stripe
+        * stripes[data_stripes]:       The P stripe
+        * stripes[data_stripes + 1]:   The Q stripe (only for RAID6).
         */
-       u64 *raid_map;
+       u64 full_stripe_logical;
        struct btrfs_io_stripe stripes[];
 };
 
index 75d7d22c3a276c5a16b8a44b7a5dbe89c1e6cfb4..8ea9cea9bfeb4dd44b5886c128cc4e5752664897 100644 (file)
@@ -2422,7 +2422,7 @@ DECLARE_EVENT_CLASS(btrfs_raid56_bio,
        ),
 
        TP_fast_assign_btrfs(rbio->bioc->fs_info,
-               __entry->full_stripe    = rbio->bioc->raid_map[0];
+               __entry->full_stripe    = rbio->bioc->full_stripe_logical;
                __entry->physical       = bio->bi_iter.bi_sector << SECTOR_SHIFT;
                __entry->len            = bio->bi_iter.bi_size;
                __entry->opf            = bio_op(bio);