btrfs: do proper folio cleanup when run_delalloc_nocow() failed

author Qu Wenruo <wqu@suse.com>

Thu, 12 Dec 2024 06:13:59 +0000 (16:43 +1030)

committer David Sterba <dsterba@suse.com>

Mon, 13 Jan 2025 14:52:17 +0000 (15:52 +0100)
author Qu Wenruo <wqu@suse.com>
Thu, 12 Dec 2024 06:13:59 +0000 (16:43 +1030)
committer David Sterba <dsterba@suse.com>
Mon, 13 Jan 2025 14:52:17 +0000 (15:52 +0100)
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c

index 9bb8c447cde19fcd0fb47f1e30c8297441bb6aaf..7aa178e728cfaf762bf9017d28aa9ade4487327b 100644 (file)
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1954,6 +1954,53 @@ static int can_nocow_file_extent(struct btrfs_path *path,
         return ret < 0 ? ret : can_nocow;
  }
  
+/*
+ * Cleanup the dirty folios which will never be submitted due to error.
+ *
+ * When running a delalloc range, we may need to split the ranges (due to
+ * fragmentation or NOCOW). If we hit an error in the later part, we will error
+ * out and previously successfully executed range will never be submitted, thus
+ * we have to cleanup those folios by clearing their dirty flag, starting and
+ * finishing the writeback.
+ */
+static void cleanup_dirty_folios(struct btrfs_inode *inode,
+                                struct folio *locked_folio,
+                                u64 start, u64 end, int error)
+{
+       struct btrfs_fs_info *fs_info = inode->root->fs_info;
+       struct address_space *mapping = inode->vfs_inode.i_mapping;
+       pgoff_t start_index = start >> PAGE_SHIFT;
+       pgoff_t end_index = end >> PAGE_SHIFT;
+       u32 len;
+
+       ASSERT(end + 1 - start < U32_MAX);
+       ASSERT(IS_ALIGNED(start, fs_info->sectorsize) &&
+              IS_ALIGNED(end + 1, fs_info->sectorsize));
+       len = end + 1 - start;
+
+       /*
+        * Handle the locked folio first.
+        * The btrfs_folio_clamp_*() helpers can handle range out of the folio case.
+        */
+       btrfs_folio_clamp_finish_io(fs_info, locked_folio, start, len);
+
+       for (pgoff_t index = start_index; index <= end_index; index++) {
+               struct folio *folio;
+
+               /* Already handled at the beginning. */
+               if (index == locked_folio->index)
+                       continue;
+               folio = __filemap_get_folio(mapping, index, FGP_LOCK, GFP_NOFS);
+               /* Cache already dropped, no need to do any cleanup. */
+               if (IS_ERR(folio))
+                       continue;
+               btrfs_folio_clamp_finish_io(fs_info, locked_folio, start, len);
+               folio_unlock(folio);
+               folio_put(folio);
+       }
+       mapping_set_error(mapping, error);
+}
+
  /*
   * when nowcow writeback call back.  This checks for snapshots or COW copies
   * of the extents that exist in the file, and COWs the file as required.
@@ -1969,6 +2016,11 @@ static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
         struct btrfs_root *root = inode->root;
         struct btrfs_path *path;
         u64 cow_start = (u64)-1;
+       /*
+        * If not 0, represents the inclusive end of the last fallback_to_cow()
+        * range. Only for error handling.
+        */
+       u64 cow_end = 0;
         u64 cur_offset = start;
         int ret;
         bool check_prev = true;
@@ -2129,6 +2181,7 @@ must_cow:
                                               found_key.offset - 1);
                         cow_start = (u64)-1;
                         if (ret) {
+                               cow_end = found_key.offset - 1;
                                 btrfs_dec_nocow_writers(nocow_bg);
                                 goto error;
                         }
@@ -2202,24 +2255,54 @@ must_cow:
                 cow_start = cur_offset;
  
         if (cow_start != (u64)-1) {
-               cur_offset = end;
                 ret = fallback_to_cow(inode, locked_folio, cow_start, end);
                 cow_start = (u64)-1;
-               if (ret)
+               if (ret) {
+                       cow_end = end;
                         goto error;
+               }
         }
  
         btrfs_free_path(path);
         return 0;
  
  error:
+       /*
+        * There are several error cases:
+        *
+        * 1) Failed without falling back to COW
+        *    start         cur_offset             end
+        *    |/////////////|                      |
+        *
+        *    For range [start, cur_offset) the folios are already unlocked (except
+        *    @locked_folio), EXTENT_DELALLOC already removed.
+        *    Only need to clear the dirty flag as they will never be submitted.
+        *    Ordered extent and extent maps are handled by
+        *    btrfs_mark_ordered_io_finished() inside run_delalloc_range().
+        *
+        * 2) Failed with error from fallback_to_cow()
+        *    start         cur_offset  cow_end    end
+        *    |/////////////|-----------|          |
+        *
+        *    For range [start, cur_offset) it's the same as case 1).
+        *    But for range [cur_offset, cow_end), the folios have dirty flag
+        *    cleared and unlocked, EXTENT_DEALLLOC cleared by cow_file_range().
+        *
+        *    Thus we should not call extent_clear_unlock_delalloc() on range
+        *    [cur_offset, cow_end), as the folios are already unlocked.
+        *
+        * So clear the folio dirty flags for [start, cur_offset) first.
+        */
+       if (cur_offset > start)
+               cleanup_dirty_folios(inode, locked_folio, start, cur_offset - 1, ret);
+
         /*
          * If an error happened while a COW region is outstanding, cur_offset
-        * needs to be reset to cow_start to ensure the COW region is unlocked
-        * as well.
+        * needs to be reset to @cow_end + 1 to skip the COW range, as
+        * cow_file_range() will do the proper cleanup at error.
          */
-       if (cow_start != (u64)-1)
-               cur_offset = cow_start;
+       if (cow_end)
+               cur_offset = cow_end + 1;
  
         /*
          * We need to lock the extent here because we're clearing DELALLOC and
diff --git a/fs/btrfs/subpage.h b/fs/btrfs/subpage.h

index 428fa9389fd49e30962ff6b7d32feef61c7bf4d4..44fff1f4eac48205efe035293cd2d59fcdfd1d35 100644 (file)
--- a/fs/btrfs/subpage.h
+++ b/fs/btrfs/subpage.h
@@ -137,6 +137,19 @@ DECLARE_BTRFS_SUBPAGE_OPS(writeback);
  DECLARE_BTRFS_SUBPAGE_OPS(ordered);
  DECLARE_BTRFS_SUBPAGE_OPS(checked);
  
+/*
+ * Helper for error cleanup, where a folio will have its dirty flag cleared,
+ * with writeback started and finished.
+ */
+static inline void btrfs_folio_clamp_finish_io(struct btrfs_fs_info *fs_info,
+                                              struct folio *locked_folio,
+                                              u64 start, u32 len)
+{
+       btrfs_folio_clamp_clear_dirty(fs_info, locked_folio, start, len);
+       btrfs_folio_clamp_set_writeback(fs_info, locked_folio, start, len);
+       btrfs_folio_clamp_clear_writeback(fs_info, locked_folio, start, len);
+}
+
  bool btrfs_subpage_clear_and_test_dirty(const struct btrfs_fs_info *fs_info,
                                         struct folio *folio, u64 start, u32 len);
author	Qu Wenruo <wqu@suse.com>
	Thu, 12 Dec 2024 06:13:59 +0000 (16:43 +1030)
committer	David Sterba <dsterba@suse.com>
	Mon, 13 Jan 2025 14:52:17 +0000 (15:52 +0100)
fs/btrfs/inode.c		patch \| blob \| blame \| history
fs/btrfs/subpage.h		patch \| blob \| blame \| history