btrfs: migrate btrfs_repair_io_failure() to folio interfaces
authorQu Wenruo <wqu@suse.com>
Tue, 12 Dec 2023 05:24:10 +0000 (15:54 +1030)
committerDavid Sterba <dsterba@suse.com>
Fri, 15 Dec 2023 22:03:58 +0000 (23:03 +0100)
[BUG]
Test case btrfs/124 failed if larger metadata folio is enabled, the
dying message looks like this:

 BTRFS error (device dm-2): bad tree block start, mirror 2 want 31686656 have 0
 BTRFS info (device dm-2): read error corrected: ino 0 off 31686656 (dev /dev/mapper/test-scratch2 sector 20928)
 BUG: kernel NULL pointer dereference, address: 0000000000000020
 #PF: supervisor read access in kernel mode
 #PF: error_code(0x0000) - not-present page
 CPU: 6 PID: 350881 Comm: btrfs Tainted: G           OE      6.7.0-rc3-custom+ #128
 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS unknown 2/2/2022
 RIP: 0010:btrfs_read_extent_buffer+0x106/0x180 [btrfs]
 PKRU: 55555554
 Call Trace:
  <TASK>
  read_tree_block+0x33/0xb0 [btrfs]
  read_block_for_search+0x23e/0x340 [btrfs]
  btrfs_search_slot+0x2f9/0xe60 [btrfs]
  btrfs_lookup_csum+0x75/0x160 [btrfs]
  btrfs_lookup_bio_sums+0x21a/0x560 [btrfs]
  btrfs_submit_chunk+0x152/0x680 [btrfs]
  btrfs_submit_bio+0x1c/0x50 [btrfs]
  submit_one_bio+0x40/0x80 [btrfs]
  submit_extent_page+0x158/0x390 [btrfs]
  btrfs_do_readpage+0x330/0x740 [btrfs]
  extent_readahead+0x38d/0x6c0 [btrfs]
  read_pages+0x94/0x2c0
  page_cache_ra_unbounded+0x12d/0x190
  relocate_file_extent_cluster+0x7c1/0x9d0 [btrfs]
  relocate_block_group+0x2d3/0x560 [btrfs]
  btrfs_relocate_block_group+0x2c7/0x4b0 [btrfs]
  btrfs_relocate_chunk+0x4c/0x1a0 [btrfs]
  btrfs_balance+0x925/0x13c0 [btrfs]
  btrfs_ioctl+0x19f1/0x25d0 [btrfs]
  __x64_sys_ioctl+0x90/0xd0
  do_syscall_64+0x3f/0xf0
  entry_SYSCALL_64_after_hwframe+0x6e/0x76

[CAUSE]
The dying line is at btrfs_repair_io_failure() call inside
btrfs_repair_eb_io_failure().

The function is still relying on the extent buffer using page sized
folios.
When the extent buffer is using larger folio, we go into the 2nd slot of
folios[], and triggered the NULL pointer dereference.

[FIX]
Migrate btrfs_repair_io_failure() to folio interfaces.

So that when we hit a larger folio, we just submit the whole folio in
one go.

This also affects data repair path through btrfs_end_repair_bio(),
thankfully data is still fully page based, we can just add an
ASSERT(), and use page_folio() to convert the page to folio.

Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
fs/btrfs/bio.c
fs/btrfs/bio.h
fs/btrfs/disk-io.c

index 67a885d3f9a875d3da05a5019f42c7a934ba314f..928f512cdb4a7496ba3dfcc712b8d2571841eacd 100644 (file)
@@ -194,6 +194,12 @@ static void btrfs_end_repair_bio(struct btrfs_bio *repair_bbio,
        struct bio_vec *bv = bio_first_bvec_all(&repair_bbio->bio);
        int mirror = repair_bbio->mirror_num;
 
+       /*
+        * We can only trigger this for data bio, which doesn't support larger
+        * folios yet.
+        */
+       ASSERT(folio_order(page_folio(bv->bv_page)) == 0);
+
        if (repair_bbio->bio.bi_status ||
            !btrfs_data_csum_ok(repair_bbio, dev, 0, bv)) {
                bio_reset(&repair_bbio->bio, NULL, REQ_OP_READ);
@@ -215,7 +221,7 @@ static void btrfs_end_repair_bio(struct btrfs_bio *repair_bbio,
                btrfs_repair_io_failure(fs_info, btrfs_ino(inode),
                                  repair_bbio->file_offset, fs_info->sectorsize,
                                  repair_bbio->saved_iter.bi_sector << SECTOR_SHIFT,
-                                 bv->bv_page, bv->bv_offset, mirror);
+                                 page_folio(bv->bv_page), bv->bv_offset, mirror);
        } while (mirror != fbio->bbio->mirror_num);
 
 done:
@@ -767,8 +773,8 @@ void btrfs_submit_bio(struct btrfs_bio *bbio, int mirror_num)
  * freeing the bio.
  */
 int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
-                           u64 length, u64 logical, struct page *page,
-                           unsigned int pg_offset, int mirror_num)
+                           u64 length, u64 logical, struct folio *folio,
+                           unsigned int folio_offset, int mirror_num)
 {
        struct btrfs_io_stripe smap = { 0 };
        struct bio_vec bvec;
@@ -799,7 +805,8 @@ int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
 
        bio_init(&bio, smap.dev->bdev, &bvec, 1, REQ_OP_WRITE | REQ_SYNC);
        bio.bi_iter.bi_sector = smap.physical >> SECTOR_SHIFT;
-       __bio_add_page(&bio, page, length, pg_offset);
+       ret = bio_add_folio(&bio, folio, length, folio_offset);
+       ASSERT(ret);
        ret = submit_bio_wait(&bio);
        if (ret) {
                /* try to remap that extent elsewhere? */
index ca79decee0607fb0df6c46967bbdc20cc05b2022..bbaed317161a4ccf3c6bfc118c6ed4fccb8be9fc 100644 (file)
@@ -105,7 +105,7 @@ void btrfs_bio_end_io(struct btrfs_bio *bbio, blk_status_t status);
 void btrfs_submit_bio(struct btrfs_bio *bbio, int mirror_num);
 void btrfs_submit_repair_write(struct btrfs_bio *bbio, int mirror_num, bool dev_replace);
 int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
-                           u64 length, u64 logical, struct page *page,
-                           unsigned int pg_offset, int mirror_num);
+                           u64 length, u64 logical, struct folio *folio,
+                           unsigned int folio_offset, int mirror_num);
 
 #endif
index 2c83da36a9c75e56ad9baa7d0c8d62f03c7d7d2e..c6907d533fe83912576fd92283658539e0abbb81 100644 (file)
@@ -183,21 +183,22 @@ static int btrfs_repair_eb_io_failure(const struct extent_buffer *eb,
                                      int mirror_num)
 {
        struct btrfs_fs_info *fs_info = eb->fs_info;
-       int i, num_pages = num_extent_pages(eb);
+       int num_folios = num_extent_folios(eb);
        int ret = 0;
 
        if (sb_rdonly(fs_info->sb))
                return -EROFS;
 
-       for (i = 0; i < num_pages; i++) {
-               u64 start = max_t(u64, eb->start, folio_pos(eb->folios[i]));
+       for (int i = 0; i < num_folios; i++) {
+               struct folio *folio = eb->folios[i];
+               u64 start = max_t(u64, eb->start, folio_pos(folio));
                u64 end = min_t(u64, eb->start + eb->len,
-                               folio_pos(eb->folios[i]) + PAGE_SIZE);
+                               folio_pos(folio) + folio_size(folio));
                u32 len = end - start;
 
                ret = btrfs_repair_io_failure(fs_info, 0, start, len,
-                                             start, folio_page(eb->folios[i], 0),
-                                             offset_in_page(start), mirror_num);
+                                             start, folio, offset_in_folio(folio, start),
+                                             mirror_num);
                if (ret)
                        break;
        }