From: Matthew Wilcox (Oracle) Date: Sat, 20 Apr 2024 02:49:59 +0000 (+0100) Subject: btrfs: count super block write errors in device instead of tracking folio error state X-Git-Tag: io_uring-6.10-20240523~95^2~1 X-Git-Url: https://git.kernel.dk/?a=commitdiff_plain;h=bc00965dbff7a8612c8ec0005b3bc943d7196629;p=linux-2.6-block.git btrfs: count super block write errors in device instead of tracking folio error state Currently the error status of super block write is tracked in page/folio status bit Error. For that we need to keep the reference for the whole duration of write and wait. Count the number of superblock writeback errors in the btrfs_device. That means we don't need the folio to stay around until it's waited for, and can avoid the extra call to folio_get/put. Also remove a mention of PageError in a comment as it's the last mention of the page Error state. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: David Sterba Signed-off-by: David Sterba --- diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 90c54466ecc3..a91a8056758a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3634,11 +3634,15 @@ static void btrfs_end_super_write(struct bio *bio) "lost super block write due to IO error on %s (%d)", btrfs_dev_name(device), blk_status_to_errno(bio->bi_status)); - folio_set_error(fi.folio); btrfs_dev_stat_inc_and_print(device, BTRFS_DEV_STAT_WRITE_ERRS); + /* Ensure failure if the primary sb fails. */ + if (bio->bi_opf & REQ_FUA) + atomic_add(BTRFS_SUPER_PRIMARY_WRITE_ERROR, + &device->sb_write_errors); + else + atomic_inc(&device->sb_write_errors); } - folio_unlock(fi.folio); folio_put(fi.folio); } @@ -3742,10 +3746,11 @@ static int write_dev_supers(struct btrfs_device *device, struct address_space *mapping = device->bdev->bd_inode->i_mapping; SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); int i; - int errors = 0; int ret; u64 bytenr, bytenr_orig; + atomic_set(&device->sb_write_errors, 0); + if (max_mirrors == 0) max_mirrors = BTRFS_SUPER_MIRROR_MAX; @@ -3765,7 +3770,7 @@ static int write_dev_supers(struct btrfs_device *device, btrfs_err(device->fs_info, "couldn't get super block location for mirror %d", i); - errors++; + atomic_inc(&device->sb_write_errors); continue; } if (bytenr + BTRFS_SUPER_INFO_SIZE >= @@ -3785,14 +3790,11 @@ static int write_dev_supers(struct btrfs_device *device, btrfs_err(device->fs_info, "couldn't get super block page for bytenr %llu", bytenr); - errors++; + atomic_inc(&device->sb_write_errors); continue; } ASSERT(folio_order(folio) == 0); - /* Bump the refcount for wait_dev_supers() */ - folio_get(folio); - offset = offset_in_folio(folio, bytenr); disk_super = folio_address(folio) + offset; memcpy(disk_super, sb, BTRFS_SUPER_INFO_SIZE); @@ -3820,16 +3822,17 @@ static int write_dev_supers(struct btrfs_device *device, submit_bio(bio); if (btrfs_advance_sb_log(device, i)) - errors++; + atomic_inc(&device->sb_write_errors); } - return errors < i ? 0 : -1; + return atomic_read(&device->sb_write_errors) < i ? 0 : -1; } /* * Wait for write completion of superblocks done by write_dev_supers, * @max_mirrors same for write and wait phases. * - * Return number of errors when folio is not found or not marked up to date. + * Return -1 if primary super block write failed or when there were no super block + * copies written. Otherwise 0. */ static int wait_dev_supers(struct btrfs_device *device, int max_mirrors) { @@ -3860,30 +3863,19 @@ static int wait_dev_supers(struct btrfs_device *device, int max_mirrors) folio = filemap_get_folio(device->bdev->bd_inode->i_mapping, bytenr >> PAGE_SHIFT); - if (IS_ERR(folio)) { - errors++; - if (i == 0) - primary_failed = true; + /* If the folio has been removed, then we know it completed. */ + if (IS_ERR(folio)) continue; - } ASSERT(folio_order(folio) == 0); /* Folio will be unlocked once the write completes. */ folio_wait_locked(folio); - if (folio_test_error(folio)) { - errors++; - if (i == 0) - primary_failed = true; - } - - /* Drop our reference */ - folio_put(folio); - - /* Drop the reference from the writing run */ folio_put(folio); } - /* log error, force error return */ + errors += atomic_read(&device->sb_write_errors); + if (errors >= BTRFS_SUPER_PRIMARY_WRITE_ERROR) + primary_failed = true; if (primary_failed) { btrfs_err(device->fs_info, "error writing primary super block to device %llu", device->devid); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 47a5bb95a994..597387e9f040 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1602,7 +1602,7 @@ static void set_btree_ioerr(struct extent_buffer *eb) * can be no longer dirty nor marked anymore for writeback (if a * subsequent modification to the extent buffer didn't happen before the * transaction commit), which makes filemap_fdata[write|wait]_range not - * able to find the pages tagged with SetPageError at transaction + * able to find the pages which contain errors at transaction * commit time. So if this happens we must abort the transaction, * otherwise we commit a super block with btree roots that point to * btree nodes/leafs whose content on disk is invalid - either garbage diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index cf555f5b47ce..66e6fc481ecd 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -92,6 +92,9 @@ enum btrfs_raid_types { #define BTRFS_DEV_STATE_FLUSH_SENT (4) #define BTRFS_DEV_STATE_NO_READA (5) +/* Special value encoding failure to write primary super block. */ +#define BTRFS_SUPER_PRIMARY_WRITE_ERROR (INT_MAX / 2) + struct btrfs_fs_devices; struct btrfs_device { @@ -142,6 +145,12 @@ struct btrfs_device { /* type and info about this device */ u64 type; + /* + * Counter of super block write errors, values larger than + * BTRFS_SUPER_PRIMARY_WRITE_ERROR encode primary super block write failure. + */ + atomic_t sb_write_errors; + /* minimal io size for this device */ u32 sector_size;