btrfs: detect nocow for swap after snapshot delete
authorBoris Burkov <boris@bur.io>
Tue, 18 Aug 2020 18:00:05 +0000 (11:00 -0700)
committerDavid Sterba <dsterba@suse.com>
Fri, 21 Aug 2020 10:21:23 +0000 (12:21 +0200)
can_nocow_extent and btrfs_cross_ref_exist both rely on a heuristic for
detecting a must cow condition which is not exactly accurate, but saves
unnecessary tree traversal. The incorrect assumption is that if the
extent was created in a generation smaller than the last snapshot
generation, it must be referenced by that snapshot. That is true, except
the snapshot could have since been deleted, without affecting the last
snapshot generation.

The original patch claimed a performance win from this check, but it
also leads to a bug where you are unable to use a swapfile if you ever
snapshotted the subvolume it's in. Make the check slower and more strict
for the swapon case, without modifying the general cow checks as a
compromise. Turning swap on does not seem to be a particularly
performance sensitive operation, so incurring a possibly unnecessary
btrfs_search_slot seems worthwhile for the added usability.

Note: Until the snapshot is competely cleaned after deletion,
check_committed_refs will still cause the logic to think that cow is
necessary, so the user must until 'btrfs subvolu sync' finished before
activating the swapfile swapon.

CC: stable@vger.kernel.org # 5.4+
Suggested-by: Omar Sandoval <osandov@osandov.com>
Signed-off-by: Boris Burkov <boris@bur.io>
Signed-off-by: David Sterba <dsterba@suse.com>
fs/btrfs/ctree.h
fs/btrfs/extent-tree.c
fs/btrfs/file.c
fs/btrfs/inode.c

index 729b5b80014abcc965e4c9e1f702f99f112c9cdc..9a72896bed2ee464140734188771a9b319c46b92 100644 (file)
@@ -2518,7 +2518,7 @@ int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans,
                                    u64 bytenr, u64 num_bytes);
 int btrfs_exclude_logged_extents(struct extent_buffer *eb);
 int btrfs_cross_ref_exist(struct btrfs_root *root,
-                         u64 objectid, u64 offset, u64 bytenr);
+                         u64 objectid, u64 offset, u64 bytenr, bool strict);
 struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
                                             struct btrfs_root *root,
                                             u64 parent, u64 root_objectid,
@@ -2934,7 +2934,7 @@ struct extent_map *btrfs_get_extent_fiemap(struct btrfs_inode *inode,
                                           u64 start, u64 len);
 noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
                              u64 *orig_start, u64 *orig_block_len,
-                             u64 *ram_bytes);
+                             u64 *ram_bytes, bool strict);
 
 void __btrfs_del_delalloc_inode(struct btrfs_root *root,
                                struct btrfs_inode *inode);
index de6fe176fdfb3400c4b0786c9b032f972069d20e..5871ef78edbac4bcc363347f8875b266015cda94 100644 (file)
@@ -2306,7 +2306,8 @@ static noinline int check_delayed_ref(struct btrfs_root *root,
 
 static noinline int check_committed_ref(struct btrfs_root *root,
                                        struct btrfs_path *path,
-                                       u64 objectid, u64 offset, u64 bytenr)
+                                       u64 objectid, u64 offset, u64 bytenr,
+                                       bool strict)
 {
        struct btrfs_fs_info *fs_info = root->fs_info;
        struct btrfs_root *extent_root = fs_info->extent_root;
@@ -2348,9 +2349,13 @@ static noinline int check_committed_ref(struct btrfs_root *root,
            btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY))
                goto out;
 
-       /* If extent created before last snapshot => it's definitely shared */
-       if (btrfs_extent_generation(leaf, ei) <=
-           btrfs_root_last_snapshot(&root->root_item))
+       /*
+        * If extent created before last snapshot => it's shared unless the
+        * snapshot has been deleted. Use the heuristic if strict is false.
+        */
+       if (!strict &&
+           (btrfs_extent_generation(leaf, ei) <=
+            btrfs_root_last_snapshot(&root->root_item)))
                goto out;
 
        iref = (struct btrfs_extent_inline_ref *)(ei + 1);
@@ -2375,7 +2380,7 @@ out:
 }
 
 int btrfs_cross_ref_exist(struct btrfs_root *root, u64 objectid, u64 offset,
-                         u64 bytenr)
+                         u64 bytenr, bool strict)
 {
        struct btrfs_path *path;
        int ret;
@@ -2386,7 +2391,7 @@ int btrfs_cross_ref_exist(struct btrfs_root *root, u64 objectid, u64 offset,
 
        do {
                ret = check_committed_ref(root, path, objectid,
-                                         offset, bytenr);
+                                         offset, bytenr, strict);
                if (ret && ret != -ENOENT)
                        goto out;
 
index 841c516079a9626696d13d3da8a0d05041d25b3f..3abaff4d2cfffa6d0064d6f3a18367502e35ddf8 100644 (file)
@@ -1571,7 +1571,7 @@ static int check_can_nocow(struct btrfs_inode *inode, loff_t pos,
        }
 
        ret = can_nocow_extent(&inode->vfs_inode, lockstart, &num_bytes,
-                       NULL, NULL, NULL);
+                       NULL, NULL, NULL, false);
        if (ret <= 0) {
                ret = 0;
                if (!nowait)
index 4ad0323b36845e95767df0001342a7da76dcbfb7..46e04247193abbeea5024f583a9280091f6dc22d 100644 (file)
@@ -1609,7 +1609,7 @@ next_slot:
                                goto out_check;
                        ret = btrfs_cross_ref_exist(root, ino,
                                                    found_key.offset -
-                                                   extent_offset, disk_bytenr);
+                                                   extent_offset, disk_bytenr, false);
                        if (ret) {
                                /*
                                 * ret could be -EIO if the above fails to read
@@ -6949,6 +6949,8 @@ static struct extent_map *btrfs_new_extent_direct(struct btrfs_inode *inode,
  * @orig_start:        (optional) Return the original file offset of the file extent
  * @orig_len:  (optional) Return the original on-disk length of the file extent
  * @ram_bytes: (optional) Return the ram_bytes of the file extent
+ * @strict:    if true, omit optimizations that might force us into unnecessary
+ *             cow. e.g., don't trust generation number.
  *
  * This function will flush ordered extents in the range to ensure proper
  * nocow checks for (nowait == false) case.
@@ -6963,7 +6965,7 @@ static struct extent_map *btrfs_new_extent_direct(struct btrfs_inode *inode,
  */
 noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
                              u64 *orig_start, u64 *orig_block_len,
-                             u64 *ram_bytes)
+                             u64 *ram_bytes, bool strict)
 {
        struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
        struct btrfs_path *path;
@@ -7041,8 +7043,9 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
         * Do the same check as in btrfs_cross_ref_exist but without the
         * unnecessary search.
         */
-       if (btrfs_file_extent_generation(leaf, fi) <=
-           btrfs_root_last_snapshot(&root->root_item))
+       if (!strict &&
+           (btrfs_file_extent_generation(leaf, fi) <=
+            btrfs_root_last_snapshot(&root->root_item)))
                goto out;
 
        backref_offset = btrfs_file_extent_offset(leaf, fi);
@@ -7078,7 +7081,8 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
         */
 
        ret = btrfs_cross_ref_exist(root, btrfs_ino(BTRFS_I(inode)),
-                                   key.offset - backref_offset, disk_bytenr);
+                                   key.offset - backref_offset, disk_bytenr,
+                                   strict);
        if (ret) {
                ret = 0;
                goto out;
@@ -7299,7 +7303,7 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map,
                block_start = em->block_start + (start - em->start);
 
                if (can_nocow_extent(inode, start, &len, &orig_start,
-                                    &orig_block_len, &ram_bytes) == 1 &&
+                                    &orig_block_len, &ram_bytes, false) == 1 &&
                    btrfs_inc_nocow_writers(fs_info, block_start)) {
                        struct extent_map *em2;
 
@@ -10130,7 +10134,7 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
                free_extent_map(em);
                em = NULL;
 
-               ret = can_nocow_extent(inode, start, &len, NULL, NULL, NULL);
+               ret = can_nocow_extent(inode, start, &len, NULL, NULL, NULL, true);
                if (ret < 0) {
                        goto out;
                } else if (ret) {