Merge tag 'i2c-for-6.4-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/wsa...
[linux-block.git] / fs / btrfs / volumes.c
index c6d59287040019a17ac4796f1f69f105a316178e..03f52e4a20aa9b695c7bb498e1e4a9e16b25488b 100644 (file)
@@ -395,7 +395,6 @@ void btrfs_free_device(struct btrfs_device *device)
 {
        WARN_ON(!list_empty(&device->post_commit_list));
        rcu_string_free(device->name);
-       extent_io_tree_release(&device->alloc_state);
        btrfs_destroy_dev_zone_info(device);
        kfree(device);
 }
@@ -1150,10 +1149,10 @@ static void btrfs_close_one_device(struct btrfs_device *device)
        device->last_flush_error = 0;
 
        /* Verify the device is back in a pristine state  */
-       ASSERT(!test_bit(BTRFS_DEV_STATE_FLUSH_SENT, &device->dev_state));
-       ASSERT(!test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state));
-       ASSERT(list_empty(&device->dev_alloc_list));
-       ASSERT(list_empty(&device->post_commit_list));
+       WARN_ON(test_bit(BTRFS_DEV_STATE_FLUSH_SENT, &device->dev_state));
+       WARN_ON(test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state));
+       WARN_ON(!list_empty(&device->dev_alloc_list));
+       WARN_ON(!list_empty(&device->post_commit_list));
 }
 
 static void close_fs_devices(struct btrfs_fs_devices *fs_devices)
@@ -2618,7 +2617,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
        struct block_device *bdev;
        struct super_block *sb = fs_info->sb;
        struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
-       struct btrfs_fs_devices *seed_devices;
+       struct btrfs_fs_devices *seed_devices = NULL;
        u64 orig_super_total_bytes;
        u64 orig_super_num_devices;
        int ret = 0;
@@ -5125,7 +5124,7 @@ static void init_alloc_chunk_ctl_policy_regular(
        /* We don't want a chunk larger than 10% of writable space */
        ctl->max_chunk_size = min(mult_perc(fs_devices->total_rw_bytes, 10),
                                  ctl->max_chunk_size);
-       ctl->dev_extent_min = BTRFS_STRIPE_LEN * ctl->dev_stripes;
+       ctl->dev_extent_min = ctl->dev_stripes << BTRFS_STRIPE_LEN_SHIFT;
 }
 
 static void init_alloc_chunk_ctl_policy_zoned(
@@ -5407,7 +5406,6 @@ static struct btrfs_block_group *create_chunk(struct btrfs_trans_handle *trans,
                                                   j * ctl->stripe_size;
                }
        }
-       map->stripe_len = BTRFS_STRIPE_LEN;
        map->io_align = BTRFS_STRIPE_LEN;
        map->io_width = BTRFS_STRIPE_LEN;
        map->type = type;
@@ -5438,7 +5436,7 @@ static struct btrfs_block_group *create_chunk(struct btrfs_trans_handle *trans,
        }
        write_unlock(&em_tree->lock);
 
-       block_group = btrfs_make_block_group(trans, 0, type, start, ctl->chunk_size);
+       block_group = btrfs_make_block_group(trans, type, start, ctl->chunk_size);
        if (IS_ERR(block_group))
                goto error_del_extent;
 
@@ -5615,11 +5613,11 @@ int btrfs_chunk_alloc_add_chunk_item(struct btrfs_trans_handle *trans,
 
        btrfs_set_stack_chunk_length(chunk, bg->length);
        btrfs_set_stack_chunk_owner(chunk, BTRFS_EXTENT_TREE_OBJECTID);
-       btrfs_set_stack_chunk_stripe_len(chunk, map->stripe_len);
+       btrfs_set_stack_chunk_stripe_len(chunk, BTRFS_STRIPE_LEN);
        btrfs_set_stack_chunk_type(chunk, map->type);
        btrfs_set_stack_chunk_num_stripes(chunk, map->num_stripes);
-       btrfs_set_stack_chunk_io_align(chunk, map->stripe_len);
-       btrfs_set_stack_chunk_io_width(chunk, map->stripe_len);
+       btrfs_set_stack_chunk_io_align(chunk, BTRFS_STRIPE_LEN);
+       btrfs_set_stack_chunk_io_width(chunk, BTRFS_STRIPE_LEN);
        btrfs_set_stack_chunk_sector_size(chunk, fs_info->sectorsize);
        btrfs_set_stack_chunk_sub_stripes(chunk, map->sub_stripes);
 
@@ -5784,13 +5782,6 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
                 */
                ret = map->num_stripes;
        free_extent_map(em);
-
-       down_read(&fs_info->dev_replace.rwsem);
-       if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace) &&
-           fs_info->dev_replace.tgtdev)
-               ret++;
-       up_read(&fs_info->dev_replace.rwsem);
-
        return ret;
 }
 
@@ -5809,7 +5800,7 @@ unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info,
        if (!WARN_ON(IS_ERR(em))) {
                map = em->map_lookup;
                if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
-                       len = map->stripe_len * nr_data_stripes(map);
+                       len = nr_data_stripes(map) << BTRFS_STRIPE_LEN_SHIFT;
                free_extent_map(em);
        }
        return len;
@@ -5895,41 +5886,16 @@ static int find_live_mirror(struct btrfs_fs_info *fs_info,
        return preferred_mirror;
 }
 
-/* Bubble-sort the stripe set to put the parity/syndrome stripes last */
-static void sort_parity_stripes(struct btrfs_io_context *bioc, int num_stripes)
-{
-       int i;
-       int again = 1;
-
-       while (again) {
-               again = 0;
-               for (i = 0; i < num_stripes - 1; i++) {
-                       /* Swap if parity is on a smaller index */
-                       if (bioc->raid_map[i] > bioc->raid_map[i + 1]) {
-                               swap(bioc->stripes[i], bioc->stripes[i + 1]);
-                               swap(bioc->raid_map[i], bioc->raid_map[i + 1]);
-                               again = 1;
-                       }
-               }
-       }
-}
-
 static struct btrfs_io_context *alloc_btrfs_io_context(struct btrfs_fs_info *fs_info,
-                                                      int total_stripes,
-                                                      int real_stripes)
+                                                      u16 total_stripes)
 {
-       struct btrfs_io_context *bioc = kzalloc(
+       struct btrfs_io_context *bioc;
+
+       bioc = kzalloc(
                 /* The size of btrfs_io_context */
                sizeof(struct btrfs_io_context) +
                /* Plus the variable array for the stripes */
-               sizeof(struct btrfs_io_stripe) * (total_stripes) +
-               /* Plus the variable array for the tgt dev */
-               sizeof(int) * (real_stripes) +
-               /*
-                * Plus the raid_map, which includes both the tgt dev
-                * and the stripes.
-                */
-               sizeof(u64) * (total_stripes),
+               sizeof(struct btrfs_io_stripe) * (total_stripes),
                GFP_NOFS);
 
        if (!bioc)
@@ -5938,8 +5904,8 @@ static struct btrfs_io_context *alloc_btrfs_io_context(struct btrfs_fs_info *fs_
        refcount_set(&bioc->refs, 1);
 
        bioc->fs_info = fs_info;
-       bioc->tgtdev_map = (int *)(bioc->stripes + total_stripes);
-       bioc->raid_map = (u64 *)(bioc->tgtdev_map + real_stripes);
+       bioc->replace_stripe_src = -1;
+       bioc->full_stripe_logical = (u64)-1;
 
        return bioc;
 }
@@ -5971,16 +5937,15 @@ struct btrfs_discard_stripe *btrfs_map_discard(struct btrfs_fs_info *fs_info,
        struct btrfs_discard_stripe *stripes;
        u64 length = *length_ret;
        u64 offset;
-       u64 stripe_nr;
-       u64 stripe_nr_end;
+       u32 stripe_nr;
+       u32 stripe_nr_end;
+       u32 stripe_cnt;
        u64 stripe_end_offset;
-       u64 stripe_cnt;
-       u64 stripe_len;
        u64 stripe_offset;
        u32 stripe_index;
        u32 factor = 0;
        u32 sub_stripes = 0;
-       u64 stripes_per_dev = 0;
+       u32 stripes_per_dev = 0;
        u32 remaining_stripes = 0;
        u32 last_stripe = 0;
        int ret;
@@ -5996,26 +5961,25 @@ struct btrfs_discard_stripe *btrfs_map_discard(struct btrfs_fs_info *fs_info,
        if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
                ret = -EOPNOTSUPP;
                goto out_free_map;
-}
+       }
 
        offset = logical - em->start;
        length = min_t(u64, em->start + em->len - logical, length);
        *length_ret = length;
 
-       stripe_len = map->stripe_len;
        /*
         * stripe_nr counts the total number of stripes we have to stride
         * to get to this block
         */
-       stripe_nr = div64_u64(offset, stripe_len);
+       stripe_nr = offset >> BTRFS_STRIPE_LEN_SHIFT;
 
        /* stripe_offset is the offset of this block in its stripe */
-       stripe_offset = offset - stripe_nr * stripe_len;
+       stripe_offset = offset - (stripe_nr << BTRFS_STRIPE_LEN_SHIFT);
 
-       stripe_nr_end = round_up(offset + length, map->stripe_len);
-       stripe_nr_end = div64_u64(stripe_nr_end, map->stripe_len);
+       stripe_nr_end = round_up(offset + length, BTRFS_STRIPE_LEN) >>
+                       BTRFS_STRIPE_LEN_SHIFT;
        stripe_cnt = stripe_nr_end - stripe_nr;
-       stripe_end_offset = stripe_nr_end * map->stripe_len -
+       stripe_end_offset = (stripe_nr_end << BTRFS_STRIPE_LEN_SHIFT) -
                            (offset + length);
        /*
         * after this, stripe_nr is the number of stripes on this
@@ -6034,18 +5998,19 @@ struct btrfs_discard_stripe *btrfs_map_discard(struct btrfs_fs_info *fs_info,
                factor = map->num_stripes / sub_stripes;
                *num_stripes = min_t(u64, map->num_stripes,
                                    sub_stripes * stripe_cnt);
-               stripe_nr = div_u64_rem(stripe_nr, factor, &stripe_index);
+               stripe_index = stripe_nr % factor;
+               stripe_nr /= factor;
                stripe_index *= sub_stripes;
-               stripes_per_dev = div_u64_rem(stripe_cnt, factor,
-                                             &remaining_stripes);
-               div_u64_rem(stripe_nr_end - 1, factor, &last_stripe);
-               last_stripe *= sub_stripes;
+
+               remaining_stripes = stripe_cnt % factor;
+               stripes_per_dev = stripe_cnt / factor;
+               last_stripe = ((stripe_nr_end - 1) % factor) * sub_stripes;
        } else if (map->type & (BTRFS_BLOCK_GROUP_RAID1_MASK |
                                BTRFS_BLOCK_GROUP_DUP)) {
                *num_stripes = map->num_stripes;
        } else {
-               stripe_nr = div_u64_rem(stripe_nr, map->num_stripes,
-                                       &stripe_index);
+               stripe_index = stripe_nr % map->num_stripes;
+               stripe_nr /= map->num_stripes;
        }
 
        stripes = kcalloc(*num_stripes, sizeof(*stripes), GFP_NOFS);
@@ -6057,15 +6022,15 @@ struct btrfs_discard_stripe *btrfs_map_discard(struct btrfs_fs_info *fs_info,
        for (i = 0; i < *num_stripes; i++) {
                stripes[i].physical =
                        map->stripes[stripe_index].physical +
-                       stripe_offset + stripe_nr * map->stripe_len;
+                       stripe_offset + (stripe_nr << BTRFS_STRIPE_LEN_SHIFT);
                stripes[i].dev = map->stripes[stripe_index].dev;
 
                if (map->type & (BTRFS_BLOCK_GROUP_RAID0 |
                                 BTRFS_BLOCK_GROUP_RAID10)) {
-                       stripes[i].length = stripes_per_dev * map->stripe_len;
+                       stripes[i].length = stripes_per_dev << BTRFS_STRIPE_LEN_SHIFT;
 
                        if (i / sub_stripes < remaining_stripes)
-                               stripes[i].length += map->stripe_len;
+                               stripes[i].length += BTRFS_STRIPE_LEN;
 
                        /*
                         * Special for the first stripe and
@@ -6103,83 +6068,6 @@ out_free_map:
        return ERR_PTR(ret);
 }
 
-/*
- * In dev-replace case, for repair case (that's the only case where the mirror
- * is selected explicitly when calling btrfs_map_block), blocks left of the
- * left cursor can also be read from the target drive.
- *
- * For REQ_GET_READ_MIRRORS, the target drive is added as the last one to the
- * array of stripes.
- * For READ, it also needs to be supported using the same mirror number.
- *
- * If the requested block is not left of the left cursor, EIO is returned. This
- * can happen because btrfs_num_copies() returns one more in the dev-replace
- * case.
- */
-static int get_extra_mirror_from_replace(struct btrfs_fs_info *fs_info,
-                                        u64 logical, u64 length,
-                                        u64 srcdev_devid, int *mirror_num,
-                                        u64 *physical)
-{
-       struct btrfs_io_context *bioc = NULL;
-       int num_stripes;
-       int index_srcdev = 0;
-       int found = 0;
-       u64 physical_of_found = 0;
-       int i;
-       int ret = 0;
-
-       ret = __btrfs_map_block(fs_info, BTRFS_MAP_GET_READ_MIRRORS,
-                               logical, &length, &bioc, NULL, NULL, 0);
-       if (ret) {
-               ASSERT(bioc == NULL);
-               return ret;
-       }
-
-       num_stripes = bioc->num_stripes;
-       if (*mirror_num > num_stripes) {
-               /*
-                * BTRFS_MAP_GET_READ_MIRRORS does not contain this mirror,
-                * that means that the requested area is not left of the left
-                * cursor
-                */
-               btrfs_put_bioc(bioc);
-               return -EIO;
-       }
-
-       /*
-        * process the rest of the function using the mirror_num of the source
-        * drive. Therefore look it up first.  At the end, patch the device
-        * pointer to the one of the target drive.
-        */
-       for (i = 0; i < num_stripes; i++) {
-               if (bioc->stripes[i].dev->devid != srcdev_devid)
-                       continue;
-
-               /*
-                * In case of DUP, in order to keep it simple, only add the
-                * mirror with the lowest physical address
-                */
-               if (found &&
-                   physical_of_found <= bioc->stripes[i].physical)
-                       continue;
-
-               index_srcdev = i;
-               found = 1;
-               physical_of_found = bioc->stripes[i].physical;
-       }
-
-       btrfs_put_bioc(bioc);
-
-       ASSERT(found);
-       if (!found)
-               return -EIO;
-
-       *mirror_num = index_srcdev + 1;
-       *physical = physical_of_found;
-       return ret;
-}
-
 static bool is_block_group_to_copy(struct btrfs_fs_info *fs_info, u64 logical)
 {
        struct btrfs_block_group *cache;
@@ -6198,101 +6086,80 @@ static bool is_block_group_to_copy(struct btrfs_fs_info *fs_info, u64 logical)
 }
 
 static void handle_ops_on_dev_replace(enum btrfs_map_op op,
-                                     struct btrfs_io_context **bioc_ret,
+                                     struct btrfs_io_context *bioc,
                                      struct btrfs_dev_replace *dev_replace,
                                      u64 logical,
                                      int *num_stripes_ret, int *max_errors_ret)
 {
-       struct btrfs_io_context *bioc = *bioc_ret;
        u64 srcdev_devid = dev_replace->srcdev->devid;
-       int tgtdev_indexes = 0;
+       /*
+        * At this stage, num_stripes is still the real number of stripes,
+        * excluding the duplicated stripes.
+        */
        int num_stripes = *num_stripes_ret;
+       int nr_extra_stripes = 0;
        int max_errors = *max_errors_ret;
        int i;
 
-       if (op == BTRFS_MAP_WRITE) {
-               int index_where_to_add;
+       /*
+        * A block group which has "to_copy" set will eventually be copied by
+        * the dev-replace process. We can avoid cloning IO here.
+        */
+       if (is_block_group_to_copy(dev_replace->srcdev->fs_info, logical))
+               return;
 
-               /*
-                * A block group which have "to_copy" set will eventually
-                * copied by dev-replace process. We can avoid cloning IO here.
-                */
-               if (is_block_group_to_copy(dev_replace->srcdev->fs_info, logical))
-                       return;
+       /*
+        * Duplicate the write operations while the dev-replace procedure is
+        * running. Since the copying of the old disk to the new disk takes
+        * place at run time while the filesystem is mounted writable, the
+        * regular write operations to the old disk have to be duplicated to go
+        * to the new disk as well.
+        *
+        * Note that device->missing is handled by the caller, and that the
+        * write to the old disk is already set up in the stripes array.
+        */
+       for (i = 0; i < num_stripes; i++) {
+               struct btrfs_io_stripe *old = &bioc->stripes[i];
+               struct btrfs_io_stripe *new = &bioc->stripes[num_stripes + nr_extra_stripes];
 
-               /*
-                * duplicate the write operations while the dev replace
-                * procedure is running. Since the copying of the old disk to
-                * the new disk takes place at run time while the filesystem is
-                * mounted writable, the regular write operations to the old
-                * disk have to be duplicated to go to the new disk as well.
-                *
-                * Note that device->missing is handled by the caller, and that
-                * the write to the old disk is already set up in the stripes
-                * array.
-                */
-               index_where_to_add = num_stripes;
-               for (i = 0; i < num_stripes; i++) {
-                       if (bioc->stripes[i].dev->devid == srcdev_devid) {
-                               /* write to new disk, too */
-                               struct btrfs_io_stripe *new =
-                                       bioc->stripes + index_where_to_add;
-                               struct btrfs_io_stripe *old =
-                                       bioc->stripes + i;
-
-                               new->physical = old->physical;
-                               new->dev = dev_replace->tgtdev;
-                               bioc->tgtdev_map[i] = index_where_to_add;
-                               index_where_to_add++;
-                               max_errors++;
-                               tgtdev_indexes++;
-                       }
-               }
-               num_stripes = index_where_to_add;
-       } else if (op == BTRFS_MAP_GET_READ_MIRRORS) {
-               int index_srcdev = 0;
-               int found = 0;
-               u64 physical_of_found = 0;
+               if (old->dev->devid != srcdev_devid)
+                       continue;
 
-               /*
-                * During the dev-replace procedure, the target drive can also
-                * be used to read data in case it is needed to repair a corrupt
-                * block elsewhere. This is possible if the requested area is
-                * left of the left cursor. In this area, the target drive is a
-                * full copy of the source drive.
-                */
-               for (i = 0; i < num_stripes; i++) {
-                       if (bioc->stripes[i].dev->devid == srcdev_devid) {
-                               /*
-                                * In case of DUP, in order to keep it simple,
-                                * only add the mirror with the lowest physical
-                                * address
-                                */
-                               if (found &&
-                                   physical_of_found <= bioc->stripes[i].physical)
-                                       continue;
-                               index_srcdev = i;
-                               found = 1;
-                               physical_of_found = bioc->stripes[i].physical;
-                       }
-               }
-               if (found) {
-                       struct btrfs_io_stripe *tgtdev_stripe =
-                               bioc->stripes + num_stripes;
+               new->physical = old->physical;
+               new->dev = dev_replace->tgtdev;
+               if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK)
+                       bioc->replace_stripe_src = i;
+               nr_extra_stripes++;
+       }
+
+       /* We can only have at most 2 extra nr_stripes (for DUP). */
+       ASSERT(nr_extra_stripes <= 2);
+       /*
+        * For GET_READ_MIRRORS, we can only return at most 1 extra stripe for
+        * replace.
+        * If we have 2 extra stripes, only choose the one with smaller physical.
+        */
+       if (op == BTRFS_MAP_GET_READ_MIRRORS && nr_extra_stripes == 2) {
+               struct btrfs_io_stripe *first = &bioc->stripes[num_stripes];
+               struct btrfs_io_stripe *second = &bioc->stripes[num_stripes + 1];
 
-                       tgtdev_stripe->physical = physical_of_found;
-                       tgtdev_stripe->dev = dev_replace->tgtdev;
-                       bioc->tgtdev_map[index_srcdev] = num_stripes;
+               /* Only DUP can have two extra stripes. */
+               ASSERT(bioc->map_type & BTRFS_BLOCK_GROUP_DUP);
 
-                       tgtdev_indexes++;
-                       num_stripes++;
+               /*
+                * Swap the last stripe stripes and reduce @nr_extra_stripes.
+                * The extra stripe would still be there, but won't be accessed.
+                */
+               if (first->physical > second->physical) {
+                       swap(second->physical, first->physical);
+                       swap(second->dev, first->dev);
+                       nr_extra_stripes--;
                }
        }
 
-       *num_stripes_ret = num_stripes;
-       *max_errors_ret = max_errors;
-       bioc->num_tgtdevs = tgtdev_indexes;
-       *bioc_ret = bioc;
+       *num_stripes_ret = num_stripes + nr_extra_stripes;
+       *max_errors_ret = max_errors + nr_extra_stripes;
+       bioc->replace_nr_stripes = nr_extra_stripes;
 }
 
 static bool need_full_stripe(enum btrfs_map_op op)
@@ -6301,25 +6168,35 @@ static bool need_full_stripe(enum btrfs_map_op op)
 }
 
 static u64 btrfs_max_io_len(struct map_lookup *map, enum btrfs_map_op op,
-                           u64 offset, u64 *stripe_nr, u64 *stripe_offset,
+                           u64 offset, u32 *stripe_nr, u64 *stripe_offset,
                            u64 *full_stripe_start)
 {
-       u32 stripe_len = map->stripe_len;
-
        ASSERT(op != BTRFS_MAP_DISCARD);
 
        /*
         * Stripe_nr is the stripe where this block falls.  stripe_offset is
         * the offset of this block in its stripe.
         */
-       *stripe_nr = div64_u64_rem(offset, stripe_len, stripe_offset);
+       *stripe_offset = offset & BTRFS_STRIPE_LEN_MASK;
+       *stripe_nr = offset >> BTRFS_STRIPE_LEN_SHIFT;
        ASSERT(*stripe_offset < U32_MAX);
 
        if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
-               unsigned long full_stripe_len = stripe_len * nr_data_stripes(map);
+               unsigned long full_stripe_len = nr_data_stripes(map) <<
+                                               BTRFS_STRIPE_LEN_SHIFT;
 
+               /*
+                * For full stripe start, we use previously calculated
+                * @stripe_nr. Align it to nr_data_stripes, then multiply with
+                * STRIPE_LEN.
+                *
+                * By this we can avoid u64 division completely.  And we have
+                * to go rounddown(), not round_down(), as nr_data_stripes is
+                * not ensured to be power of 2.
+                */
                *full_stripe_start =
-                       div64_u64(offset, full_stripe_len) * full_stripe_len;
+                       rounddown(*stripe_nr, nr_data_stripes(map)) <<
+                       BTRFS_STRIPE_LEN_SHIFT;
 
                /*
                 * For writes to RAID56, allow to write a full stripe set, but
@@ -6334,16 +6211,16 @@ static u64 btrfs_max_io_len(struct map_lookup *map, enum btrfs_map_op op,
         * a single disk).
         */
        if (map->type & BTRFS_BLOCK_GROUP_STRIPE_MASK)
-               return stripe_len - *stripe_offset;
+               return BTRFS_STRIPE_LEN - *stripe_offset;
        return U64_MAX;
 }
 
 static void set_io_stripe(struct btrfs_io_stripe *dst, const struct map_lookup *map,
-                         u32 stripe_index, u64 stripe_offset, u64 stripe_nr)
+                         u32 stripe_index, u64 stripe_offset, u32 stripe_nr)
 {
        dst->dev = map->stripes[stripe_index].dev;
        dst->physical = map->stripes[stripe_index].physical +
-                       stripe_offset + stripe_nr * map->stripe_len;
+                       stripe_offset + (stripe_nr << BTRFS_STRIPE_LEN_SHIFT);
 }
 
 int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
@@ -6356,35 +6233,35 @@ int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
        struct map_lookup *map;
        u64 map_offset;
        u64 stripe_offset;
-       u64 stripe_nr;
-       u64 stripe_len;
+       u32 stripe_nr;
        u32 stripe_index;
        int data_stripes;
        int i;
        int ret = 0;
        int mirror_num = (mirror_num_ret ? *mirror_num_ret : 0);
        int num_stripes;
+       int num_copies;
        int max_errors = 0;
-       int tgtdev_indexes = 0;
        struct btrfs_io_context *bioc = NULL;
        struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
        int dev_replace_is_ongoing = 0;
-       int num_alloc_stripes;
-       int patch_the_first_stripe_for_dev_replace = 0;
-       u64 physical_to_patch_in_first_stripe = 0;
+       u16 num_alloc_stripes;
        u64 raid56_full_stripe_start = (u64)-1;
        u64 max_len;
 
        ASSERT(bioc_ret);
        ASSERT(op != BTRFS_MAP_DISCARD);
 
+       num_copies = btrfs_num_copies(fs_info, logical, fs_info->sectorsize);
+       if (mirror_num > num_copies)
+               return -EINVAL;
+
        em = btrfs_get_chunk_map(fs_info, logical, *length);
        if (IS_ERR(em))
                return PTR_ERR(em);
 
        map = em->map_lookup;
        data_stripes = nr_data_stripes(map);
-       stripe_len = map->stripe_len;
 
        map_offset = logical - em->start;
        max_len = btrfs_max_io_len(map, op, map_offset, &stripe_nr,
@@ -6400,25 +6277,11 @@ int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
        if (!dev_replace_is_ongoing)
                up_read(&dev_replace->rwsem);
 
-       if (dev_replace_is_ongoing && mirror_num == map->num_stripes + 1 &&
-           !need_full_stripe(op) && dev_replace->tgtdev != NULL) {
-               ret = get_extra_mirror_from_replace(fs_info, logical, *length,
-                                                   dev_replace->srcdev->devid,
-                                                   &mirror_num,
-                                           &physical_to_patch_in_first_stripe);
-               if (ret)
-                       goto out;
-               else
-                       patch_the_first_stripe_for_dev_replace = 1;
-       } else if (mirror_num > map->num_stripes) {
-               mirror_num = 0;
-       }
-
        num_stripes = 1;
        stripe_index = 0;
        if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
-               stripe_nr = div_u64_rem(stripe_nr, map->num_stripes,
-                               &stripe_index);
+               stripe_index = stripe_nr % map->num_stripes;
+               stripe_nr /= map->num_stripes;
                if (!need_full_stripe(op))
                        mirror_num = 1;
        } else if (map->type & BTRFS_BLOCK_GROUP_RAID1_MASK) {
@@ -6444,8 +6307,8 @@ int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
        } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
                u32 factor = map->num_stripes / map->sub_stripes;
 
-               stripe_nr = div_u64_rem(stripe_nr, factor, &stripe_index);
-               stripe_index *= map->sub_stripes;
+               stripe_index = (stripe_nr % factor) * map->sub_stripes;
+               stripe_nr /= factor;
 
                if (need_full_stripe(op))
                        num_stripes = map->sub_stripes;
@@ -6460,11 +6323,17 @@ int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
                }
 
        } else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
-               ASSERT(map->stripe_len == BTRFS_STRIPE_LEN);
                if (need_raid_map && (need_full_stripe(op) || mirror_num > 1)) {
-                       /* push stripe_nr back to the start of the full stripe */
-                       stripe_nr = div64_u64(raid56_full_stripe_start,
-                                       stripe_len * data_stripes);
+                       /*
+                        * Push stripe_nr back to the start of the full stripe
+                        * For those cases needing a full stripe, @stripe_nr
+                        * is the full stripe number.
+                        *
+                        * Originally we go raid56_full_stripe_start / full_stripe_len,
+                        * but that can be expensive.  Here we just divide
+                        * @stripe_nr with @data_stripes.
+                        */
+                       stripe_nr /= data_stripes;
 
                        /* RAID[56] write or recovery. Return all stripes */
                        num_stripes = map->num_stripes;
@@ -6473,7 +6342,7 @@ int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
                        /* Return the length to the full stripe end */
                        *length = min(logical + *length,
                                      raid56_full_stripe_start + em->start +
-                                     data_stripes * stripe_len) - logical;
+                                     (data_stripes << BTRFS_STRIPE_LEN_SHIFT)) - logical;
                        stripe_index = 0;
                        stripe_offset = 0;
                } else {
@@ -6482,25 +6351,24 @@ int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
                         * Mirror #2 is RAID5 parity block.
                         * Mirror #3 is RAID6 Q block.
                         */
-                       stripe_nr = div_u64_rem(stripe_nr,
-                                       data_stripes, &stripe_index);
+                       stripe_index = stripe_nr % data_stripes;
+                       stripe_nr /= data_stripes;
                        if (mirror_num > 1)
                                stripe_index = data_stripes + mirror_num - 2;
 
                        /* We distribute the parity blocks across stripes */
-                       div_u64_rem(stripe_nr + stripe_index, map->num_stripes,
-                                       &stripe_index);
+                       stripe_index = (stripe_nr + stripe_index) % map->num_stripes;
                        if (!need_full_stripe(op) && mirror_num <= 1)
                                mirror_num = 1;
                }
        } else {
                /*
-                * after this, stripe_nr is the number of stripes on this
+                * After this, stripe_nr is the number of stripes on this
                 * device we have to walk to find the data, and stripe_index is
                 * the number of our device in the stripe array
                 */
-               stripe_nr = div_u64_rem(stripe_nr, map->num_stripes,
-                               &stripe_index);
+               stripe_index = stripe_nr % map->num_stripes;
+               stripe_nr /= map->num_stripes;
                mirror_num = stripe_index + 1;
        }
        if (stripe_index >= map->num_stripes) {
@@ -6512,13 +6380,16 @@ int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
        }
 
        num_alloc_stripes = num_stripes;
-       if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL) {
-               if (op == BTRFS_MAP_WRITE)
-                       num_alloc_stripes <<= 1;
-               if (op == BTRFS_MAP_GET_READ_MIRRORS)
-                       num_alloc_stripes++;
-               tgtdev_indexes = num_stripes;
-       }
+       if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL &&
+           op != BTRFS_MAP_READ)
+               /*
+                * For replace case, we need to add extra stripes for extra
+                * duplicated stripes.
+                *
+                * For both WRITE and GET_READ_MIRRORS, we may have at most
+                * 2 more stripes (DUP types, otherwise 1).
+                */
+               num_alloc_stripes += 2;
 
        /*
         * If this I/O maps to a single device, try to return the device and
@@ -6529,53 +6400,53 @@ int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
            !((map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) && mirror_num > 1) &&
            (!need_full_stripe(op) || !dev_replace_is_ongoing ||
             !dev_replace->tgtdev)) {
-               if (patch_the_first_stripe_for_dev_replace) {
-                       smap->dev = dev_replace->tgtdev;
-                       smap->physical = physical_to_patch_in_first_stripe;
-                       *mirror_num_ret = map->num_stripes + 1;
-               } else {
-                       set_io_stripe(smap, map, stripe_index, stripe_offset,
-                                     stripe_nr);
-                       *mirror_num_ret = mirror_num;
-               }
+               set_io_stripe(smap, map, stripe_index, stripe_offset, stripe_nr);
+               *mirror_num_ret = mirror_num;
                *bioc_ret = NULL;
                ret = 0;
                goto out;
        }
 
-       bioc = alloc_btrfs_io_context(fs_info, num_alloc_stripes, tgtdev_indexes);
+       bioc = alloc_btrfs_io_context(fs_info, num_alloc_stripes);
        if (!bioc) {
                ret = -ENOMEM;
                goto out;
        }
+       bioc->map_type = map->type;
 
-       for (i = 0; i < num_stripes; i++) {
-               set_io_stripe(&bioc->stripes[i], map, stripe_index, stripe_offset,
-                             stripe_nr);
-               stripe_index++;
-       }
-
-       /* Build raid_map */
+       /*
+        * For RAID56 full map, we need to make sure the stripes[] follows the
+        * rule that data stripes are all ordered, then followed with P and Q
+        * (if we have).
+        *
+        * It's still mostly the same as other profiles, just with extra rotation.
+        */
        if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK && need_raid_map &&
            (need_full_stripe(op) || mirror_num > 1)) {
-               u64 tmp;
-               unsigned rot;
-
-               /* Work out the disk rotation on this stripe-set */
-               div_u64_rem(stripe_nr, num_stripes, &rot);
-
-               /* Fill in the logical address of each stripe */
-               tmp = stripe_nr * data_stripes;
-               for (i = 0; i < data_stripes; i++)
-                       bioc->raid_map[(i + rot) % num_stripes] =
-                               em->start + (tmp + i) * map->stripe_len;
-
-               bioc->raid_map[(i + rot) % map->num_stripes] = RAID5_P_STRIPE;
-               if (map->type & BTRFS_BLOCK_GROUP_RAID6)
-                       bioc->raid_map[(i + rot + 1) % num_stripes] =
-                               RAID6_Q_STRIPE;
-
-               sort_parity_stripes(bioc, num_stripes);
+               /*
+                * For RAID56 @stripe_nr is already the number of full stripes
+                * before us, which is also the rotation value (needs to modulo
+                * with num_stripes).
+                *
+                * In this case, we just add @stripe_nr with @i, then do the
+                * modulo, to reduce one modulo call.
+                */
+               bioc->full_stripe_logical = em->start +
+                       ((stripe_nr * data_stripes) << BTRFS_STRIPE_LEN_SHIFT);
+               for (i = 0; i < num_stripes; i++)
+                       set_io_stripe(&bioc->stripes[i], map,
+                                     (i + stripe_nr) % num_stripes,
+                                     stripe_offset, stripe_nr);
+       } else {
+               /*
+                * For all other non-RAID56 profiles, just copy the target
+                * stripe into the bioc.
+                */
+               for (i = 0; i < num_stripes; i++) {
+                       set_io_stripe(&bioc->stripes[i], map, stripe_index,
+                                     stripe_offset, stripe_nr);
+                       stripe_index++;
+               }
        }
 
        if (need_full_stripe(op))
@@ -6583,27 +6454,15 @@ int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
 
        if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL &&
            need_full_stripe(op)) {
-               handle_ops_on_dev_replace(op, &bioc, dev_replace, logical,
+               handle_ops_on_dev_replace(op, bioc, dev_replace, logical,
                                          &num_stripes, &max_errors);
        }
 
        *bioc_ret = bioc;
-       bioc->map_type = map->type;
        bioc->num_stripes = num_stripes;
        bioc->max_errors = max_errors;
        bioc->mirror_num = mirror_num;
 
-       /*
-        * this is the case that REQ_READ && dev_replace_is_ongoing &&
-        * mirror_num == num_stripes + 1 && dev_replace target drive is
-        * available as a mirror
-        */
-       if (patch_the_first_stripe_for_dev_replace && num_stripes > 0) {
-               WARN_ON(num_stripes > 1);
-               bioc->stripes[0].dev = dev_replace->tgtdev;
-               bioc->stripes[0].physical = physical_to_patch_in_first_stripe;
-               bioc->mirror_num = map->num_stripes + 1;
-       }
 out:
        if (dev_replace_is_ongoing) {
                lockdep_assert_held(&dev_replace->rwsem);
@@ -6941,7 +6800,6 @@ static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf,
        map->num_stripes = num_stripes;
        map->io_width = btrfs_chunk_io_width(leaf, chunk);
        map->io_align = btrfs_chunk_io_align(leaf, chunk);
-       map->stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
        map->type = type;
        /*
         * We can't use the sub_stripes value, as for profiles other than
@@ -8161,3 +8019,76 @@ bool btrfs_repair_one_zone(struct btrfs_fs_info *fs_info, u64 logical)
 
        return true;
 }
+
+static void map_raid56_repair_block(struct btrfs_io_context *bioc,
+                                   struct btrfs_io_stripe *smap,
+                                   u64 logical)
+{
+       int data_stripes = nr_bioc_data_stripes(bioc);
+       int i;
+
+       for (i = 0; i < data_stripes; i++) {
+               u64 stripe_start = bioc->full_stripe_logical +
+                                  (i << BTRFS_STRIPE_LEN_SHIFT);
+
+               if (logical >= stripe_start &&
+                   logical < stripe_start + BTRFS_STRIPE_LEN)
+                       break;
+       }
+       ASSERT(i < data_stripes);
+       smap->dev = bioc->stripes[i].dev;
+       smap->physical = bioc->stripes[i].physical +
+                       ((logical - bioc->full_stripe_logical) &
+                        BTRFS_STRIPE_LEN_MASK);
+}
+
+/*
+ * Map a repair write into a single device.
+ *
+ * A repair write is triggered by read time repair or scrub, which would only
+ * update the contents of a single device.
+ * Not update any other mirrors nor go through RMW path.
+ *
+ * Callers should ensure:
+ *
+ * - Call btrfs_bio_counter_inc_blocked() first
+ * - The range does not cross stripe boundary
+ * - Has a valid @mirror_num passed in.
+ */
+int btrfs_map_repair_block(struct btrfs_fs_info *fs_info,
+                          struct btrfs_io_stripe *smap, u64 logical,
+                          u32 length, int mirror_num)
+{
+       struct btrfs_io_context *bioc = NULL;
+       u64 map_length = length;
+       int mirror_ret = mirror_num;
+       int ret;
+
+       ASSERT(mirror_num > 0);
+
+       ret = __btrfs_map_block(fs_info, BTRFS_MAP_WRITE, logical, &map_length,
+                               &bioc, smap, &mirror_ret, true);
+       if (ret < 0)
+               return ret;
+
+       /* The map range should not cross stripe boundary. */
+       ASSERT(map_length >= length);
+
+       /* Already mapped to single stripe. */
+       if (!bioc)
+               goto out;
+
+       /* Map the RAID56 multi-stripe writes to a single one. */
+       if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
+               map_raid56_repair_block(bioc, smap, logical);
+               goto out;
+       }
+
+       ASSERT(mirror_num <= bioc->num_stripes);
+       smap->dev = bioc->stripes[mirror_num - 1].dev;
+       smap->physical = bioc->stripes[mirror_num - 1].physical;
+out:
+       btrfs_put_bioc(bioc);
+       ASSERT(smap->dev);
+       return 0;
+}