btrfs: count super block write errors in device instead of tracking folio error state
authorMatthew Wilcox (Oracle) <willy@infradead.org>
Sat, 20 Apr 2024 02:49:59 +0000 (03:49 +0100)
committerDavid Sterba <dsterba@suse.com>
Tue, 7 May 2024 19:31:11 +0000 (21:31 +0200)
Currently the error status of super block write is tracked in page/folio
status bit Error. For that we need to keep the reference for the whole
duration of write and wait.

Count the number of superblock writeback errors in the btrfs_device.
That means we don't need the folio to stay around until it's waited for,
and can avoid the extra call to folio_get/put.

Also remove a mention of PageError in a comment as it's the last mention
of the page Error state.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
fs/btrfs/disk-io.c
fs/btrfs/extent_io.c
fs/btrfs/volumes.h

index 90c54466ecc3a3b9029d09c7ca8e1bc072bfb822..a91a8056758a3286d3a99900a115be70a92a8a60 100644 (file)
@@ -3634,11 +3634,15 @@ static void btrfs_end_super_write(struct bio *bio)
                                "lost super block write due to IO error on %s (%d)",
                                btrfs_dev_name(device),
                                blk_status_to_errno(bio->bi_status));
-                       folio_set_error(fi.folio);
                        btrfs_dev_stat_inc_and_print(device,
                                                     BTRFS_DEV_STAT_WRITE_ERRS);
+                       /* Ensure failure if the primary sb fails. */
+                       if (bio->bi_opf & REQ_FUA)
+                               atomic_add(BTRFS_SUPER_PRIMARY_WRITE_ERROR,
+                                          &device->sb_write_errors);
+                       else
+                               atomic_inc(&device->sb_write_errors);
                }
-
                folio_unlock(fi.folio);
                folio_put(fi.folio);
        }
@@ -3742,10 +3746,11 @@ static int write_dev_supers(struct btrfs_device *device,
        struct address_space *mapping = device->bdev->bd_inode->i_mapping;
        SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
        int i;
-       int errors = 0;
        int ret;
        u64 bytenr, bytenr_orig;
 
+       atomic_set(&device->sb_write_errors, 0);
+
        if (max_mirrors == 0)
                max_mirrors = BTRFS_SUPER_MIRROR_MAX;
 
@@ -3765,7 +3770,7 @@ static int write_dev_supers(struct btrfs_device *device,
                        btrfs_err(device->fs_info,
                                "couldn't get super block location for mirror %d",
                                i);
-                       errors++;
+                       atomic_inc(&device->sb_write_errors);
                        continue;
                }
                if (bytenr + BTRFS_SUPER_INFO_SIZE >=
@@ -3785,14 +3790,11 @@ static int write_dev_supers(struct btrfs_device *device,
                        btrfs_err(device->fs_info,
                            "couldn't get super block page for bytenr %llu",
                            bytenr);
-                       errors++;
+                       atomic_inc(&device->sb_write_errors);
                        continue;
                }
                ASSERT(folio_order(folio) == 0);
 
-               /* Bump the refcount for wait_dev_supers() */
-               folio_get(folio);
-
                offset = offset_in_folio(folio, bytenr);
                disk_super = folio_address(folio) + offset;
                memcpy(disk_super, sb, BTRFS_SUPER_INFO_SIZE);
@@ -3820,16 +3822,17 @@ static int write_dev_supers(struct btrfs_device *device,
                submit_bio(bio);
 
                if (btrfs_advance_sb_log(device, i))
-                       errors++;
+                       atomic_inc(&device->sb_write_errors);
        }
-       return errors < i ? 0 : -1;
+       return atomic_read(&device->sb_write_errors) < i ? 0 : -1;
 }
 
 /*
  * Wait for write completion of superblocks done by write_dev_supers,
  * @max_mirrors same for write and wait phases.
  *
- * Return number of errors when folio is not found or not marked up to date.
+ * Return -1 if primary super block write failed or when there were no super block
+ * copies written. Otherwise 0.
  */
 static int wait_dev_supers(struct btrfs_device *device, int max_mirrors)
 {
@@ -3860,30 +3863,19 @@ static int wait_dev_supers(struct btrfs_device *device, int max_mirrors)
 
                folio = filemap_get_folio(device->bdev->bd_inode->i_mapping,
                                          bytenr >> PAGE_SHIFT);
-               if (IS_ERR(folio)) {
-                       errors++;
-                       if (i == 0)
-                               primary_failed = true;
+               /* If the folio has been removed, then we know it completed. */
+               if (IS_ERR(folio))
                        continue;
-               }
                ASSERT(folio_order(folio) == 0);
 
                /* Folio will be unlocked once the write completes. */
                folio_wait_locked(folio);
-               if (folio_test_error(folio)) {
-                       errors++;
-                       if (i == 0)
-                               primary_failed = true;
-               }
-
-               /* Drop our reference */
-               folio_put(folio);
-
-               /* Drop the reference from the writing run */
                folio_put(folio);
        }
 
-       /* log error, force error return */
+       errors += atomic_read(&device->sb_write_errors);
+       if (errors >= BTRFS_SUPER_PRIMARY_WRITE_ERROR)
+               primary_failed = true;
        if (primary_failed) {
                btrfs_err(device->fs_info, "error writing primary super block to device %llu",
                          device->devid);
index 47a5bb95a994932bc0c5aca2dbdc6658869fa32b..597387e9f040075ac46c02c107337b67762cb76c 100644 (file)
@@ -1602,7 +1602,7 @@ static void set_btree_ioerr(struct extent_buffer *eb)
         * can be no longer dirty nor marked anymore for writeback (if a
         * subsequent modification to the extent buffer didn't happen before the
         * transaction commit), which makes filemap_fdata[write|wait]_range not
-        * able to find the pages tagged with SetPageError at transaction
+        * able to find the pages which contain errors at transaction
         * commit time. So if this happens we must abort the transaction,
         * otherwise we commit a super block with btree roots that point to
         * btree nodes/leafs whose content on disk is invalid - either garbage
index cf555f5b47ce63f1c9bf0797cbaf1386ae14a01e..66e6fc481ecd2968bdf6e036bfae61a9d9ecc41c 100644 (file)
@@ -92,6 +92,9 @@ enum btrfs_raid_types {
 #define BTRFS_DEV_STATE_FLUSH_SENT     (4)
 #define BTRFS_DEV_STATE_NO_READA       (5)
 
+/* Special value encoding failure to write primary super block. */
+#define BTRFS_SUPER_PRIMARY_WRITE_ERROR                (INT_MAX / 2)
+
 struct btrfs_fs_devices;
 
 struct btrfs_device {
@@ -142,6 +145,12 @@ struct btrfs_device {
        /* type and info about this device */
        u64 type;
 
+       /*
+        * Counter of super block write errors, values larger than
+        * BTRFS_SUPER_PRIMARY_WRITE_ERROR encode primary super block write failure.
+        */
+       atomic_t sb_write_errors;
+
        /* minimal io size for this device */
        u32 sector_size;