btrfs: raid56: store a physical address in structure sector_ptr
authorQu Wenruo <wqu@suse.com>
Wed, 9 Apr 2025 11:10:40 +0000 (13:10 +0200)
committerDavid Sterba <dsterba@suse.com>
Thu, 15 May 2025 12:30:46 +0000 (14:30 +0200)
Instead of using a @page + @pg_offset pair inside sector_ptr structure,
use a single physical address instead.

This allows us to grab both the page and offset from a single u64 value.
Although we still need an extra bool value, @has_paddr, to distinguish
if the sector is properly mapped (as the 0 physical address is totally
valid).

This change doesn't change the size of structure sector_ptr, but reduces
the parameters of several functions.

Note: the original idea and patch is from Christoph Hellwig
(https://lore.kernel.org/linux-btrfs/20250409111055.3640328-7-hch@lst.de/)
but the final implementation is different.

Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
[ Use physical addresses instead to handle highmem. ]
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
fs/btrfs/raid56.c

index 6e1d9cdd59c185ea539da7ac1a6ffc07410886ac..09ae1018221e81ed4591329462a229193fac2817 100644 (file)
@@ -134,14 +134,17 @@ struct btrfs_stripe_hash_table {
 };
 
 /*
- * A bvec like structure to present a sector inside a page.
- *
- * Unlike bvec we don't need bvlen, as it's fixed to sectorsize.
+ * A structure to present a sector inside a page, the length is fixed to
+ * sectorsize;
  */
 struct sector_ptr {
-       struct page *page;
-       unsigned int pgoff:24;
-       unsigned int uptodate:8;
+       /*
+        * Blocks from the bio list can still be highmem.
+        * So here we use physical address to present a page and the offset inside it.
+        */
+       phys_addr_t paddr;
+       bool has_paddr;
+       bool uptodate;
 };
 
 static void rmw_rbio_work(struct work_struct *work);
@@ -233,6 +236,14 @@ int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info)
        return 0;
 }
 
+static void memcpy_sectors(const struct sector_ptr *dst,
+                          const struct sector_ptr *src, u32 blocksize)
+{
+       memcpy_page(phys_to_page(dst->paddr), offset_in_page(dst->paddr),
+                   phys_to_page(src->paddr), offset_in_page(src->paddr),
+                   blocksize);
+}
+
 /*
  * caching an rbio means to copy anything from the
  * bio_sectors array into the stripe_pages array.  We
@@ -253,7 +264,7 @@ static void cache_rbio_pages(struct btrfs_raid_bio *rbio)
 
        for (i = 0; i < rbio->nr_sectors; i++) {
                /* Some range not covered by bio (partial write), skip it */
-               if (!rbio->bio_sectors[i].page) {
+               if (!rbio->bio_sectors[i].has_paddr) {
                        /*
                         * Even if the sector is not covered by bio, if it is
                         * a data sector it should still be uptodate as it is
@@ -264,12 +275,8 @@ static void cache_rbio_pages(struct btrfs_raid_bio *rbio)
                        continue;
                }
 
-               ASSERT(rbio->stripe_sectors[i].page);
-               memcpy_page(rbio->stripe_sectors[i].page,
-                           rbio->stripe_sectors[i].pgoff,
-                           rbio->bio_sectors[i].page,
-                           rbio->bio_sectors[i].pgoff,
-                           rbio->bioc->fs_info->sectorsize);
+               memcpy_sectors(&rbio->stripe_sectors[i], &rbio->bio_sectors[i],
+                               rbio->bioc->fs_info->sectorsize);
                rbio->stripe_sectors[i].uptodate = 1;
        }
        set_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
@@ -326,8 +333,13 @@ static void index_stripe_sectors(struct btrfs_raid_bio *rbio)
                int page_index = offset >> PAGE_SHIFT;
 
                ASSERT(page_index < rbio->nr_pages);
-               rbio->stripe_sectors[i].page = rbio->stripe_pages[page_index];
-               rbio->stripe_sectors[i].pgoff = offset_in_page(offset);
+               if (!rbio->stripe_pages[page_index])
+                       continue;
+
+               rbio->stripe_sectors[i].has_paddr = true;
+               rbio->stripe_sectors[i].paddr =
+                       page_to_phys(rbio->stripe_pages[page_index]) +
+                       offset_in_page(offset);
        }
 }
 
@@ -962,9 +974,9 @@ static struct sector_ptr *sector_in_rbio(struct btrfs_raid_bio *rbio,
 
        spin_lock(&rbio->bio_list_lock);
        sector = &rbio->bio_sectors[index];
-       if (sector->page || bio_list_only) {
+       if (sector->has_paddr || bio_list_only) {
                /* Don't return sector without a valid page pointer */
-               if (!sector->page)
+               if (!sector->has_paddr)
                        sector = NULL;
                spin_unlock(&rbio->bio_list_lock);
                return sector;
@@ -1142,7 +1154,7 @@ static int rbio_add_io_sector(struct btrfs_raid_bio *rbio,
                           rbio, stripe_nr);
        ASSERT_RBIO_SECTOR(sector_nr >= 0 && sector_nr < rbio->stripe_nsectors,
                           rbio, sector_nr);
-       ASSERT(sector->page);
+       ASSERT(sector->has_paddr);
 
        stripe = &rbio->bioc->stripes[stripe_nr];
        disk_start = stripe->physical + sector_nr * sectorsize;
@@ -1173,8 +1185,8 @@ static int rbio_add_io_sector(struct btrfs_raid_bio *rbio,
                 */
                if (last_end == disk_start && !last->bi_status &&
                    last->bi_bdev == stripe->dev->bdev) {
-                       ret = bio_add_page(last, sector->page, sectorsize,
-                                          sector->pgoff);
+                       ret = bio_add_page(last, phys_to_page(sector->paddr),
+                                          sectorsize, offset_in_page(sector->paddr));
                        if (ret == sectorsize)
                                return 0;
                }
@@ -1187,7 +1199,8 @@ static int rbio_add_io_sector(struct btrfs_raid_bio *rbio,
        bio->bi_iter.bi_sector = disk_start >> SECTOR_SHIFT;
        bio->bi_private = rbio;
 
-       __bio_add_page(bio, sector->page, sectorsize, sector->pgoff);
+       __bio_add_page(bio, phys_to_page(sector->paddr), sectorsize,
+                      offset_in_page(sector->paddr));
        bio_list_add(bio_list, bio);
        return 0;
 }
@@ -1205,10 +1218,8 @@ static void index_one_bio(struct btrfs_raid_bio *rbio, struct bio *bio)
                struct sector_ptr *sector = &rbio->bio_sectors[index];
                struct bio_vec bv = bio_iter_iovec(bio, iter);
 
-               sector->page = bv.bv_page;
-               sector->pgoff = bv.bv_offset;
-               ASSERT(sector->pgoff < PAGE_SIZE);
-
+               sector->has_paddr = true;
+               sector->paddr = bvec_phys(&bv);
                bio_advance_iter_single(bio, &iter, sectorsize);
                offset += sectorsize;
        }
@@ -1288,6 +1299,15 @@ static void assert_rbio(struct btrfs_raid_bio *rbio)
        ASSERT_RBIO(rbio->nr_data < rbio->real_stripes, rbio);
 }
 
+static inline void *kmap_local_sector(const struct sector_ptr *sector)
+{
+       /* The sector pointer must have a page mapped to it. */
+       ASSERT(sector->has_paddr);
+
+       return kmap_local_page(phys_to_page(sector->paddr)) +
+              offset_in_page(sector->paddr);
+}
+
 /* Generate PQ for one vertical stripe. */
 static void generate_pq_vertical(struct btrfs_raid_bio *rbio, int sectornr)
 {
@@ -1300,14 +1320,13 @@ static void generate_pq_vertical(struct btrfs_raid_bio *rbio, int sectornr)
        /* First collect one sector from each data stripe */
        for (stripe = 0; stripe < rbio->nr_data; stripe++) {
                sector = sector_in_rbio(rbio, stripe, sectornr, 0);
-               pointers[stripe] = kmap_local_page(sector->page) +
-                                  sector->pgoff;
+               pointers[stripe] = kmap_local_sector(sector);
        }
 
        /* Then add the parity stripe */
        sector = rbio_pstripe_sector(rbio, sectornr);
        sector->uptodate = 1;
-       pointers[stripe++] = kmap_local_page(sector->page) + sector->pgoff;
+       pointers[stripe++] = kmap_local_sector(sector);
 
        if (has_qstripe) {
                /*
@@ -1316,8 +1335,7 @@ static void generate_pq_vertical(struct btrfs_raid_bio *rbio, int sectornr)
                 */
                sector = rbio_qstripe_sector(rbio, sectornr);
                sector->uptodate = 1;
-               pointers[stripe++] = kmap_local_page(sector->page) +
-                                    sector->pgoff;
+               pointers[stripe++] = kmap_local_sector(sector);
 
                assert_rbio(rbio);
                raid6_call.gen_syndrome(rbio->real_stripes, sectorsize,
@@ -1476,15 +1494,14 @@ static void set_rbio_range_error(struct btrfs_raid_bio *rbio, struct bio *bio)
  * stripe_pages[], thus we need to locate the sector.
  */
 static struct sector_ptr *find_stripe_sector(struct btrfs_raid_bio *rbio,
-                                            struct page *page,
-                                            unsigned int pgoff)
+                                            phys_addr_t paddr)
 {
        int i;
 
        for (i = 0; i < rbio->nr_sectors; i++) {
                struct sector_ptr *sector = &rbio->stripe_sectors[i];
 
-               if (sector->page == page && sector->pgoff == pgoff)
+               if (sector->has_paddr && sector->paddr == paddr)
                        return sector;
        }
        return NULL;
@@ -1504,11 +1521,10 @@ static void set_bio_pages_uptodate(struct btrfs_raid_bio *rbio, struct bio *bio)
 
        bio_for_each_segment_all(bvec, bio, iter_all) {
                struct sector_ptr *sector;
-               int pgoff;
+               phys_addr_t paddr = bvec_phys(bvec);
 
-               for (pgoff = bvec->bv_offset; pgoff - bvec->bv_offset < bvec->bv_len;
-                    pgoff += sectorsize) {
-                       sector = find_stripe_sector(rbio, bvec->bv_page, pgoff);
+               for (u32 off = 0; off < bvec->bv_len; off += sectorsize) {
+                       sector = find_stripe_sector(rbio, paddr + off);
                        ASSERT(sector);
                        if (sector)
                                sector->uptodate = 1;
@@ -1518,17 +1534,14 @@ static void set_bio_pages_uptodate(struct btrfs_raid_bio *rbio, struct bio *bio)
 
 static int get_bio_sector_nr(struct btrfs_raid_bio *rbio, struct bio *bio)
 {
-       struct bio_vec *bv = bio_first_bvec_all(bio);
+       phys_addr_t bvec_paddr = bvec_phys(bio_first_bvec_all(bio));
        int i;
 
        for (i = 0; i < rbio->nr_sectors; i++) {
-               struct sector_ptr *sector;
-
-               sector = &rbio->stripe_sectors[i];
-               if (sector->page == bv->bv_page && sector->pgoff == bv->bv_offset)
+               if (rbio->stripe_sectors[i].paddr == bvec_paddr)
                        break;
-               sector = &rbio->bio_sectors[i];
-               if (sector->page == bv->bv_page && sector->pgoff == bv->bv_offset)
+               if (rbio->bio_sectors[i].has_paddr &&
+                   rbio->bio_sectors[i].paddr == bvec_paddr)
                        break;
        }
        ASSERT(i < rbio->nr_sectors);
@@ -1810,12 +1823,10 @@ static int verify_one_sector(struct btrfs_raid_bio *rbio,
                sector = rbio_stripe_sector(rbio, stripe_nr, sector_nr);
        }
 
-       ASSERT(sector->page);
-
-       kaddr = kmap_local_page(sector->page) + sector->pgoff;
        csum_expected = rbio->csum_buf +
                        (stripe_nr * rbio->stripe_nsectors + sector_nr) *
                        fs_info->csum_size;
+       kaddr = kmap_local_sector(sector);
        ret = btrfs_check_sector_csum(fs_info, kaddr, csum_buf, csum_expected);
        kunmap_local(kaddr);
        return ret;
@@ -1874,9 +1885,7 @@ static int recover_vertical(struct btrfs_raid_bio *rbio, int sector_nr,
                } else {
                        sector = rbio_stripe_sector(rbio, stripe_nr, sector_nr);
                }
-               ASSERT(sector->page);
-               pointers[stripe_nr] = kmap_local_page(sector->page) +
-                                  sector->pgoff;
+               pointers[stripe_nr] = kmap_local_sector(sector);
                unmap_array[stripe_nr] = pointers[stripe_nr];
        }
 
@@ -2328,7 +2337,7 @@ static bool need_read_stripe_sectors(struct btrfs_raid_bio *rbio)
                 * thus this rbio can not be cached one, as cached one must
                 * have all its data sectors present and uptodate.
                 */
-               if (!sector->page || !sector->uptodate)
+               if (!sector->has_paddr || !sector->uptodate)
                        return true;
        }
        return false;
@@ -2518,6 +2527,7 @@ static int finish_parity_scrub(struct btrfs_raid_bio *rbio)
        int stripe;
        int sectornr;
        bool has_qstripe;
+       struct page *page;
        struct sector_ptr p_sector = { 0 };
        struct sector_ptr q_sector = { 0 };
        struct bio_list bio_list;
@@ -2549,29 +2559,33 @@ static int finish_parity_scrub(struct btrfs_raid_bio *rbio)
         */
        clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
 
-       p_sector.page = alloc_page(GFP_NOFS);
-       if (!p_sector.page)
+       page = alloc_page(GFP_NOFS);
+       if (!page)
                return -ENOMEM;
-       p_sector.pgoff = 0;
+       p_sector.has_paddr = true;
+       p_sector.paddr = page_to_phys(page);
        p_sector.uptodate = 1;
+       page = NULL;
 
        if (has_qstripe) {
                /* RAID6, allocate and map temp space for the Q stripe */
-               q_sector.page = alloc_page(GFP_NOFS);
-               if (!q_sector.page) {
-                       __free_page(p_sector.page);
-                       p_sector.page = NULL;
+               page = alloc_page(GFP_NOFS);
+               if (!page) {
+                       __free_page(phys_to_page(p_sector.paddr));
+                       p_sector.has_paddr = false;
                        return -ENOMEM;
                }
-               q_sector.pgoff = 0;
+               q_sector.has_paddr = true;
+               q_sector.paddr = page_to_phys(page);
                q_sector.uptodate = 1;
-               pointers[rbio->real_stripes - 1] = kmap_local_page(q_sector.page);
+               page = NULL;
+               pointers[rbio->real_stripes - 1] = kmap_local_sector(&q_sector);
        }
 
        bitmap_clear(rbio->error_bitmap, 0, rbio->nr_sectors);
 
        /* Map the parity stripe just once */
-       pointers[nr_data] = kmap_local_page(p_sector.page);
+       pointers[nr_data] = kmap_local_sector(&p_sector);
 
        for_each_set_bit(sectornr, &rbio->dbitmap, rbio->stripe_nsectors) {
                struct sector_ptr *sector;
@@ -2580,8 +2594,7 @@ static int finish_parity_scrub(struct btrfs_raid_bio *rbio)
                /* first collect one page from each data stripe */
                for (stripe = 0; stripe < nr_data; stripe++) {
                        sector = sector_in_rbio(rbio, stripe, sectornr, 0);
-                       pointers[stripe] = kmap_local_page(sector->page) +
-                                          sector->pgoff;
+                       pointers[stripe] = kmap_local_sector(sector);
                }
 
                if (has_qstripe) {
@@ -2597,7 +2610,7 @@ static int finish_parity_scrub(struct btrfs_raid_bio *rbio)
 
                /* Check scrubbing parity and repair it */
                sector = rbio_stripe_sector(rbio, rbio->scrubp, sectornr);
-               parity = kmap_local_page(sector->page) + sector->pgoff;
+               parity = kmap_local_sector(sector);
                if (memcmp(parity, pointers[rbio->scrubp], sectorsize) != 0)
                        memcpy(parity, pointers[rbio->scrubp], sectorsize);
                else
@@ -2610,12 +2623,11 @@ static int finish_parity_scrub(struct btrfs_raid_bio *rbio)
        }
 
        kunmap_local(pointers[nr_data]);
-       __free_page(p_sector.page);
-       p_sector.page = NULL;
-       if (q_sector.page) {
-               kunmap_local(pointers[rbio->real_stripes - 1]);
-               __free_page(q_sector.page);
-               q_sector.page = NULL;
+       __free_page(phys_to_page(p_sector.paddr));
+       p_sector.has_paddr = false;
+       if (q_sector.has_paddr) {
+               __free_page(phys_to_page(q_sector.paddr));
+               q_sector.has_paddr = false;
        }
 
        /*