Merge tag 'for-6.1-rc6-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave...

author Linus Torvalds <torvalds@linux-foundation.org>

Fri, 25 Nov 2022 21:24:05 +0000 (13:24 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Fri, 25 Nov 2022 21:24:05 +0000 (13:24 -0800)
author Linus Torvalds <torvalds@linux-foundation.org>
Fri, 25 Nov 2022 21:24:05 +0000 (13:24 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Fri, 25 Nov 2022 21:24:05 +0000 (13:24 -0800)
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c

index a9543f01184cba9cf490843a51f391d909789c95..dcb510f38dda045b9951de1ad2eadd418c3aa948 100644 (file)
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -4663,7 +4663,12 @@ int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
         int ret;
         int i;
  
-       ASSERT(!path->nowait);
+       /*
+        * The nowait semantics are used only for write paths, where we don't
+        * use the tree mod log and sequence numbers.
+        */
+       if (time_seq)
+               ASSERT(!path->nowait);
  
         nritems = btrfs_header_nritems(path->nodes[0]);
         if (nritems == 0)
@@ -4683,7 +4688,14 @@ again:
                 if (path->need_commit_sem) {
                         path->need_commit_sem = 0;
                         need_commit_sem = true;
-                       down_read(&fs_info->commit_root_sem);
+                       if (path->nowait) {
+                               if (!down_read_trylock(&fs_info->commit_root_sem)) {
+                                       ret = -EAGAIN;
+                                       goto done;
+                               }
+                       } else {
+                               down_read(&fs_info->commit_root_sem);
+                       }
                 }
                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
         }
@@ -4759,7 +4771,7 @@ again:
                 next = c;
                 ret = read_block_for_search(root, path, &next, level,
                                             slot, &key);
-               if (ret == -EAGAIN)
+               if (ret == -EAGAIN && !path->nowait)
                         goto again;
  
                 if (ret < 0) {
@@ -4769,6 +4781,10 @@ again:
  
                 if (!path->skip_locking) {
                         ret = btrfs_try_tree_read_lock(next);
+                       if (!ret && path->nowait) {
+                               ret = -EAGAIN;
+                               goto done;
+                       }
                         if (!ret && time_seq) {
                                 /*
                                  * If we don't get the lock, we may be racing
@@ -4799,7 +4815,7 @@ again:
  
                 ret = read_block_for_search(root, path, &next, level,
                                             0, &key);
-               if (ret == -EAGAIN)
+               if (ret == -EAGAIN && !path->nowait)
                         goto again;
  
                 if (ret < 0) {
@@ -4807,8 +4823,16 @@ again:
                         goto done;
                 }
  
-               if (!path->skip_locking)
-                       btrfs_tree_read_lock(next);
+               if (!path->skip_locking) {
+                       if (path->nowait) {
+                               if (!btrfs_try_tree_read_lock(next)) {
+                                       ret = -EAGAIN;
+                                       goto done;
+                               }
+                       } else {
+                               btrfs_tree_read_lock(next);
+                       }
+               }
         }
         ret = 0;
  done:
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c

index d5dd8bed1488a807d9ec4e5bb0834b8b6c0b4f1e..5ba2e810dc6e0eb07180fa34a8db0cdb0d75f78d 100644 (file)
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3105,6 +3105,8 @@ static int btrfs_ioctl_get_subvol_info(struct inode *inode, void __user *argp)
                 }
         }
  
+       btrfs_free_path(path);
+       path = NULL;
         if (copy_to_user(argp, subvol_info, sizeof(*subvol_info)))
                 ret = -EFAULT;
  
@@ -3194,6 +3196,8 @@ static int btrfs_ioctl_get_subvol_rootref(struct btrfs_root *root,
         }
  
  out:
+       btrfs_free_path(path);
+
         if (!ret || ret == -EOVERFLOW) {
                 rootrefs->num_items = found;
                 /* update min_treeid for next search */
@@ -3205,7 +3209,6 @@ out:
         }
  
         kfree(rootrefs);
-       btrfs_free_path(path);
  
         return ret;
  }
@@ -4231,6 +4234,8 @@ static long btrfs_ioctl_ino_to_path(struct btrfs_root *root, void __user *arg)
                 ipath->fspath->val[i] = rel_ptr;
         }
  
+       btrfs_free_path(path);
+       path = NULL;
         ret = copy_to_user((void __user *)(unsigned long)ipa->fspath,
                            ipath->fspath, size);
         if (ret) {
@@ -4281,21 +4286,20 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info,
                 size = min_t(u32, loi->size, SZ_16M);
         }
  
-       path = btrfs_alloc_path();
-       if (!path) {
-               ret = -ENOMEM;
-               goto out;
-       }
-
         inodes = init_data_container(size);
         if (IS_ERR(inodes)) {
                 ret = PTR_ERR(inodes);
-               inodes = NULL;
-               goto out;
+               goto out_loi;
         }
  
+       path = btrfs_alloc_path();
+       if (!path) {
+               ret = -ENOMEM;
+               goto out;
+       }
         ret = iterate_inodes_from_logical(loi->logical, fs_info, path,
                                           inodes, ignore_offset);
+       btrfs_free_path(path);
         if (ret == -EINVAL)
                 ret = -ENOENT;
         if (ret < 0)
@@ -4307,7 +4311,6 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info,
                 ret = -EFAULT;
  
  out:
-       btrfs_free_path(path);
         kvfree(inodes);
  out_loi:
         kfree(loi);
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c

index 9334c3157c22e89ca242daad82705e5c5229d8c3..b74105a10f16c10de23ec0a9b1ab1f4d561a01b3 100644 (file)
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -2951,14 +2951,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
                 dstgroup->rsv_rfer = inherit->lim.rsv_rfer;
                 dstgroup->rsv_excl = inherit->lim.rsv_excl;
  
-               ret = update_qgroup_limit_item(trans, dstgroup);
-               if (ret) {
-                       qgroup_mark_inconsistent(fs_info);
-                       btrfs_info(fs_info,
-                                  "unable to update quota limit for %llu",
-                                  dstgroup->qgroupid);
-                       goto unlock;
-               }
+               qgroup_dirty(fs_info, dstgroup);
         }
  
         if (srcid) {
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c

index 145c84b44fd0b8c9d8e20614e0b284a34e62b8ec..1c4b693ee4a3aeb849558dbcf523e427ba542f39 100644 (file)
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -5702,6 +5702,7 @@ static int clone_range(struct send_ctx *sctx, struct btrfs_path *dst_path,
                 u64 ext_len;
                 u64 clone_len;
                 u64 clone_data_offset;
+               bool crossed_src_i_size = false;
  
                 if (slot >= btrfs_header_nritems(leaf)) {
                         ret = btrfs_next_leaf(clone_root->root, path);
@@ -5759,8 +5760,10 @@ static int clone_range(struct send_ctx *sctx, struct btrfs_path *dst_path,
                 if (key.offset >= clone_src_i_size)
                         break;
  
-               if (key.offset + ext_len > clone_src_i_size)
+               if (key.offset + ext_len > clone_src_i_size) {
                         ext_len = clone_src_i_size - key.offset;
+                       crossed_src_i_size = true;
+               }
  
                 clone_data_offset = btrfs_file_extent_offset(leaf, ei);
                 if (btrfs_file_extent_disk_bytenr(leaf, ei) == disk_byte) {
@@ -5821,6 +5824,25 @@ static int clone_range(struct send_ctx *sctx, struct btrfs_path *dst_path,
                                 ret = send_clone(sctx, offset, clone_len,
                                                  clone_root);
                         }
+               } else if (crossed_src_i_size && clone_len < len) {
+                       /*
+                        * If we are at i_size of the clone source inode and we
+                        * can not clone from it, terminate the loop. This is
+                        * to avoid sending two write operations, one with a
+                        * length matching clone_len and the final one after
+                        * this loop with a length of len - clone_len.
+                        *
+                        * When using encoded writes (BTRFS_SEND_FLAG_COMPRESSED
+                        * was passed to the send ioctl), this helps avoid
+                        * sending an encoded write for an offset that is not
+                        * sector size aligned, in case the i_size of the source
+                        * inode is not sector size aligned. That will make the
+                        * receiver fallback to decompression of the data and
+                        * writing it using regular buffered IO, therefore while
+                        * not incorrect, it's not optimal due decompression and
+                        * possible re-compression at the receiver.
+                        */
+                       break;
                 } else {
                         ret = send_extent_data(sctx, dst_path, offset,
                                                clone_len);
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c

index 699b54b3acaae0b6e8e31f69e34066631b1aaa9c..74fef1f49c358cdc70f6ee2ada45d2c699194b80 100644 (file)
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -2321,8 +2321,11 @@ int __init btrfs_init_sysfs(void)
  
  #ifdef CONFIG_BTRFS_DEBUG
         ret = sysfs_create_group(&btrfs_kset->kobj, &btrfs_debug_feature_attr_group);
-       if (ret)
-               goto out2;
+       if (ret) {
+               sysfs_unmerge_group(&btrfs_kset->kobj,
+                                   &btrfs_static_feature_attr_group);
+               goto out_remove_group;
+       }
  #endif
  
         return 0;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c

index 813986e38258b78ceed6f4207dd1e38b3e66a8b2..c3cf3dabe0b1b625c1229bcb5a922f6d66bcc86b 100644 (file)
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -3694,15 +3694,29 @@ static int process_dir_items_leaf(struct btrfs_trans_handle *trans,
                                   u64 *last_old_dentry_offset)
  {
         struct btrfs_root *log = inode->root->log_root;
-       struct extent_buffer *src = path->nodes[0];
-       const int nritems = btrfs_header_nritems(src);
+       struct extent_buffer *src;
+       const int nritems = btrfs_header_nritems(path->nodes[0]);
         const u64 ino = btrfs_ino(inode);
         bool last_found = false;
         int batch_start = 0;
         int batch_size = 0;
         int i;
  
-       for (i = path->slots[0]; i < nritems; i++) {
+       /*
+        * We need to clone the leaf, release the read lock on it, and use the
+        * clone before modifying the log tree. See the comment at copy_items()
+        * about why we need to do this.
+        */
+       src = btrfs_clone_extent_buffer(path->nodes[0]);
+       if (!src)
+               return -ENOMEM;
+
+       i = path->slots[0];
+       btrfs_release_path(path);
+       path->nodes[0] = src;
+       path->slots[0] = i;
+
+       for (; i < nritems; i++) {
                 struct btrfs_dir_item *di;
                 struct btrfs_key key;
                 int ret;
@@ -4303,7 +4317,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
  {
         struct btrfs_root *log = inode->root->log_root;
         struct btrfs_file_extent_item *extent;
-       struct extent_buffer *src = src_path->nodes[0];
+       struct extent_buffer *src;
         int ret = 0;
         struct btrfs_key *ins_keys;
         u32 *ins_sizes;
@@ -4314,6 +4328,43 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
         const bool skip_csum = (inode->flags & BTRFS_INODE_NODATASUM);
         const u64 i_size = i_size_read(&inode->vfs_inode);
  
+       /*
+        * To keep lockdep happy and avoid deadlocks, clone the source leaf and
+        * use the clone. This is because otherwise we would be changing the log
+        * tree, to insert items from the subvolume tree or insert csum items,
+        * while holding a read lock on a leaf from the subvolume tree, which
+        * creates a nasty lock dependency when COWing log tree nodes/leaves:
+        *
+        * 1) Modifying the log tree triggers an extent buffer allocation while
+        *    holding a write lock on a parent extent buffer from the log tree.
+        *    Allocating the pages for an extent buffer, or the extent buffer
+        *    struct, can trigger inode eviction and finally the inode eviction
+        *    will trigger a release/remove of a delayed node, which requires
+        *    taking the delayed node's mutex;
+        *
+        * 2) Allocating a metadata extent for a log tree can trigger the async
+        *    reclaim thread and make us wait for it to release enough space and
+        *    unblock our reservation ticket. The reclaim thread can start
+        *    flushing delayed items, and that in turn results in the need to
+        *    lock delayed node mutexes and in the need to write lock extent
+        *    buffers of a subvolume tree - all this while holding a write lock
+        *    on the parent extent buffer in the log tree.
+        *
+        * So one task in scenario 1) running in parallel with another task in
+        * scenario 2) could lead to a deadlock, one wanting to lock a delayed
+        * node mutex while having a read lock on a leaf from the subvolume,
+        * while the other is holding the delayed node's mutex and wants to
+        * write lock the same subvolume leaf for flushing delayed items.
+        */
+       src = btrfs_clone_extent_buffer(src_path->nodes[0]);
+       if (!src)
+               return -ENOMEM;
+
+       i = src_path->slots[0];
+       btrfs_release_path(src_path);
+       src_path->nodes[0] = src;
+       src_path->slots[0] = i;
+
         ins_data = kmalloc(nr * sizeof(struct btrfs_key) +
                            nr * sizeof(u32), GFP_NOFS);
         if (!ins_data)
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c

index 1912abf6d02075cb2e21aff34c37d957fb75deda..c9e2b0c85309971b016704150361f2994afb2f4b 100644 (file)
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -134,7 +134,8 @@ static int sb_write_pointer(struct block_device *bdev, struct blk_zone *zones,
                         super[i] = page_address(page[i]);
                 }
  
-               if (super[0]->generation > super[1]->generation)
+               if (btrfs_super_generation(super[0]) >
+                   btrfs_super_generation(super[1]))
                         sector = zones[1].start;
                 else
                         sector = zones[0].start;
@@ -466,7 +467,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
                 goto out;
         }
  
-       zones = kcalloc(BTRFS_REPORT_NR_ZONES, sizeof(struct blk_zone), GFP_KERNEL);
+       zones = kvcalloc(BTRFS_REPORT_NR_ZONES, sizeof(struct blk_zone), GFP_KERNEL);
         if (!zones) {
                 ret = -ENOMEM;
                 goto out;
@@ -585,7 +586,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
         }
  
  
-       kfree(zones);
+       kvfree(zones);
  
         switch (bdev_zoned_model(bdev)) {
         case BLK_ZONED_HM:
@@ -617,7 +618,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
         return 0;
  
  out:
-       kfree(zones);
+       kvfree(zones);
  out_free_zone_info:
         btrfs_destroy_dev_zone_info(device);
author	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 25 Nov 2022 21:24:05 +0000 (13:24 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 25 Nov 2022 21:24:05 +0000 (13:24 -0800)
fs/btrfs/ctree.c		patch \| blob \| blame \| history
fs/btrfs/ioctl.c		patch \| blob \| blame \| history
fs/btrfs/qgroup.c		patch \| blob \| blame \| history
fs/btrfs/send.c		patch \| blob \| blame \| history
fs/btrfs/sysfs.c		patch \| blob \| blame \| history
fs/btrfs/tree-log.c		patch \| blob \| blame \| history
fs/btrfs/zoned.c		patch \| blob \| blame \| history