Merge tag 'for-4.17-rc5-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave...
authorLinus Torvalds <torvalds@linux-foundation.org>
Sun, 20 May 2018 19:04:27 +0000 (12:04 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sun, 20 May 2018 19:04:27 +0000 (12:04 -0700)
Pull btrfs fixes from David Sterba:
 "We've accumulated some fixes during the last week, some of them were
  in the works for a longer time but there are some newer ones too.

  Most of the fixes have a reproducer and fix user visible problems,
  also candidates for stable kernels. They IMHO qualify for a late rc,
  though I did not expect that many"

* tag 'for-4.17-rc5-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  btrfs: fix crash when trying to resume balance without the resume flag
  btrfs: Fix delalloc inodes invalidation during transaction abort
  btrfs: Split btrfs_del_delalloc_inode into 2 functions
  btrfs: fix reading stale metadata blocks after degraded raid1 mounts
  btrfs: property: Set incompat flag if lzo/zstd compression is set
  Btrfs: fix duplicate extents after fsync of file with prealloc extents
  Btrfs: fix xattr loss after power failure
  Btrfs: send, fix invalid access to commit roots due to concurrent snapshotting

fs/btrfs/ctree.c
fs/btrfs/ctree.h
fs/btrfs/disk-io.c
fs/btrfs/inode.c
fs/btrfs/props.c
fs/btrfs/tree-log.c
fs/btrfs/volumes.c

index 3fd44835b3869effbfe6ced6c3b8b373617698e5..8c68961925b1482517bfe2c96635f5ee1cc79694 100644 (file)
@@ -2436,10 +2436,8 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
        if (p->reada != READA_NONE)
                reada_for_search(fs_info, p, level, slot, key->objectid);
 
-       btrfs_release_path(p);
-
        ret = -EAGAIN;
-       tmp = read_tree_block(fs_info, blocknr, 0, parent_level - 1,
+       tmp = read_tree_block(fs_info, blocknr, gen, parent_level - 1,
                              &first_key);
        if (!IS_ERR(tmp)) {
                /*
@@ -2454,6 +2452,8 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
        } else {
                ret = PTR_ERR(tmp);
        }
+
+       btrfs_release_path(p);
        return ret;
 }
 
@@ -5414,12 +5414,24 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
        down_read(&fs_info->commit_root_sem);
        left_level = btrfs_header_level(left_root->commit_root);
        left_root_level = left_level;
-       left_path->nodes[left_level] = left_root->commit_root;
+       left_path->nodes[left_level] =
+                       btrfs_clone_extent_buffer(left_root->commit_root);
+       if (!left_path->nodes[left_level]) {
+               up_read(&fs_info->commit_root_sem);
+               ret = -ENOMEM;
+               goto out;
+       }
        extent_buffer_get(left_path->nodes[left_level]);
 
        right_level = btrfs_header_level(right_root->commit_root);
        right_root_level = right_level;
-       right_path->nodes[right_level] = right_root->commit_root;
+       right_path->nodes[right_level] =
+                       btrfs_clone_extent_buffer(right_root->commit_root);
+       if (!right_path->nodes[right_level]) {
+               up_read(&fs_info->commit_root_sem);
+               ret = -ENOMEM;
+               goto out;
+       }
        extent_buffer_get(right_path->nodes[right_level]);
        up_read(&fs_info->commit_root_sem);
 
index 2771cc56a622f50cdf834a291aa4f43a86798cb7..0d422c9908b8085f531a752a4bd6d5bf1b430e02 100644 (file)
@@ -3182,6 +3182,8 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
                              u64 *orig_start, u64 *orig_block_len,
                              u64 *ram_bytes);
 
+void __btrfs_del_delalloc_inode(struct btrfs_root *root,
+                               struct btrfs_inode *inode);
 struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry);
 int btrfs_set_inode_index(struct btrfs_inode *dir, u64 *index);
 int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
index 60caa68c3618d5a072f332be97e7db4ea7ba67b0..c3504b4d281b5cd76bb0861b781e228bcec4b3bb 100644 (file)
@@ -3818,6 +3818,7 @@ void close_ctree(struct btrfs_fs_info *fs_info)
        set_bit(BTRFS_FS_CLOSING_DONE, &fs_info->flags);
 
        btrfs_free_qgroup_config(fs_info);
+       ASSERT(list_empty(&fs_info->delalloc_roots));
 
        if (percpu_counter_sum(&fs_info->delalloc_bytes)) {
                btrfs_info(fs_info, "at unmount delalloc count %lld",
@@ -4125,15 +4126,15 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info)
 
 static void btrfs_error_commit_super(struct btrfs_fs_info *fs_info)
 {
+       /* cleanup FS via transaction */
+       btrfs_cleanup_transaction(fs_info);
+
        mutex_lock(&fs_info->cleaner_mutex);
        btrfs_run_delayed_iputs(fs_info);
        mutex_unlock(&fs_info->cleaner_mutex);
 
        down_write(&fs_info->cleanup_work_sem);
        up_write(&fs_info->cleanup_work_sem);
-
-       /* cleanup FS via transaction */
-       btrfs_cleanup_transaction(fs_info);
 }
 
 static void btrfs_destroy_ordered_extents(struct btrfs_root *root)
@@ -4258,19 +4259,23 @@ static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
        list_splice_init(&root->delalloc_inodes, &splice);
 
        while (!list_empty(&splice)) {
+               struct inode *inode = NULL;
                btrfs_inode = list_first_entry(&splice, struct btrfs_inode,
                                               delalloc_inodes);
-
-               list_del_init(&btrfs_inode->delalloc_inodes);
-               clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
-                         &btrfs_inode->runtime_flags);
+               __btrfs_del_delalloc_inode(root, btrfs_inode);
                spin_unlock(&root->delalloc_lock);
 
-               btrfs_invalidate_inodes(btrfs_inode->root);
-
+               /*
+                * Make sure we get a live inode and that it'll not disappear
+                * meanwhile.
+                */
+               inode = igrab(&btrfs_inode->vfs_inode);
+               if (inode) {
+                       invalidate_inode_pages2(inode->i_mapping);
+                       iput(inode);
+               }
                spin_lock(&root->delalloc_lock);
        }
-
        spin_unlock(&root->delalloc_lock);
 }
 
@@ -4286,7 +4291,6 @@ static void btrfs_destroy_all_delalloc_inodes(struct btrfs_fs_info *fs_info)
        while (!list_empty(&splice)) {
                root = list_first_entry(&splice, struct btrfs_root,
                                         delalloc_root);
-               list_del_init(&root->delalloc_root);
                root = btrfs_grab_fs_root(root);
                BUG_ON(!root);
                spin_unlock(&fs_info->delalloc_root_lock);
index d241285a0d2a8dea0ff858f58a5590c18298f2b0..8e604e7071f14cf166652369e6f167fab5b5f462 100644 (file)
@@ -1742,12 +1742,12 @@ static void btrfs_add_delalloc_inodes(struct btrfs_root *root,
        spin_unlock(&root->delalloc_lock);
 }
 
-static void btrfs_del_delalloc_inode(struct btrfs_root *root,
-                                    struct btrfs_inode *inode)
+
+void __btrfs_del_delalloc_inode(struct btrfs_root *root,
+                               struct btrfs_inode *inode)
 {
        struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
 
-       spin_lock(&root->delalloc_lock);
        if (!list_empty(&inode->delalloc_inodes)) {
                list_del_init(&inode->delalloc_inodes);
                clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
@@ -1760,6 +1760,13 @@ static void btrfs_del_delalloc_inode(struct btrfs_root *root,
                        spin_unlock(&fs_info->delalloc_root_lock);
                }
        }
+}
+
+static void btrfs_del_delalloc_inode(struct btrfs_root *root,
+                                    struct btrfs_inode *inode)
+{
+       spin_lock(&root->delalloc_lock);
+       __btrfs_del_delalloc_inode(root, inode);
        spin_unlock(&root->delalloc_lock);
 }
 
index 53a8c95828e33a537525a193dfaf20e030505bbc..dc6140013ae8194739a8aa6a387f12c35794bdf9 100644 (file)
@@ -380,6 +380,7 @@ static int prop_compression_apply(struct inode *inode,
                                  const char *value,
                                  size_t len)
 {
+       struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
        int type;
 
        if (len == 0) {
@@ -390,14 +391,17 @@ static int prop_compression_apply(struct inode *inode,
                return 0;
        }
 
-       if (!strncmp("lzo", value, 3))
+       if (!strncmp("lzo", value, 3)) {
                type = BTRFS_COMPRESS_LZO;
-       else if (!strncmp("zlib", value, 4))
+               btrfs_set_fs_incompat(fs_info, COMPRESS_LZO);
+       } else if (!strncmp("zlib", value, 4)) {
                type = BTRFS_COMPRESS_ZLIB;
-       else if (!strncmp("zstd", value, len))
+       } else if (!strncmp("zstd", value, len)) {
                type = BTRFS_COMPRESS_ZSTD;
-       else
+               btrfs_set_fs_incompat(fs_info, COMPRESS_ZSTD);
+       } else {
                return -EINVAL;
+       }
 
        BTRFS_I(inode)->flags &= ~BTRFS_INODE_NOCOMPRESS;
        BTRFS_I(inode)->flags |= BTRFS_INODE_COMPRESS;
index 43758e30aa7a97cb240c7b12146f040f5ce8d3d9..8f23a94dab770c00933f39a4ed1fb845edac22dd 100644 (file)
@@ -4320,6 +4320,110 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
        return ret;
 }
 
+/*
+ * Log all prealloc extents beyond the inode's i_size to make sure we do not
+ * lose them after doing a fast fsync and replaying the log. We scan the
+ * subvolume's root instead of iterating the inode's extent map tree because
+ * otherwise we can log incorrect extent items based on extent map conversion.
+ * That can happen due to the fact that extent maps are merged when they
+ * are not in the extent map tree's list of modified extents.
+ */
+static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans,
+                                     struct btrfs_inode *inode,
+                                     struct btrfs_path *path)
+{
+       struct btrfs_root *root = inode->root;
+       struct btrfs_key key;
+       const u64 i_size = i_size_read(&inode->vfs_inode);
+       const u64 ino = btrfs_ino(inode);
+       struct btrfs_path *dst_path = NULL;
+       u64 last_extent = (u64)-1;
+       int ins_nr = 0;
+       int start_slot;
+       int ret;
+
+       if (!(inode->flags & BTRFS_INODE_PREALLOC))
+               return 0;
+
+       key.objectid = ino;
+       key.type = BTRFS_EXTENT_DATA_KEY;
+       key.offset = i_size;
+       ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+       if (ret < 0)
+               goto out;
+
+       while (true) {
+               struct extent_buffer *leaf = path->nodes[0];
+               int slot = path->slots[0];
+
+               if (slot >= btrfs_header_nritems(leaf)) {
+                       if (ins_nr > 0) {
+                               ret = copy_items(trans, inode, dst_path, path,
+                                                &last_extent, start_slot,
+                                                ins_nr, 1, 0);
+                               if (ret < 0)
+                                       goto out;
+                               ins_nr = 0;
+                       }
+                       ret = btrfs_next_leaf(root, path);
+                       if (ret < 0)
+                               goto out;
+                       if (ret > 0) {
+                               ret = 0;
+                               break;
+                       }
+                       continue;
+               }
+
+               btrfs_item_key_to_cpu(leaf, &key, slot);
+               if (key.objectid > ino)
+                       break;
+               if (WARN_ON_ONCE(key.objectid < ino) ||
+                   key.type < BTRFS_EXTENT_DATA_KEY ||
+                   key.offset < i_size) {
+                       path->slots[0]++;
+                       continue;
+               }
+               if (last_extent == (u64)-1) {
+                       last_extent = key.offset;
+                       /*
+                        * Avoid logging extent items logged in past fsync calls
+                        * and leading to duplicate keys in the log tree.
+                        */
+                       do {
+                               ret = btrfs_truncate_inode_items(trans,
+                                                        root->log_root,
+                                                        &inode->vfs_inode,
+                                                        i_size,
+                                                        BTRFS_EXTENT_DATA_KEY);
+                       } while (ret == -EAGAIN);
+                       if (ret)
+                               goto out;
+               }
+               if (ins_nr == 0)
+                       start_slot = slot;
+               ins_nr++;
+               path->slots[0]++;
+               if (!dst_path) {
+                       dst_path = btrfs_alloc_path();
+                       if (!dst_path) {
+                               ret = -ENOMEM;
+                               goto out;
+                       }
+               }
+       }
+       if (ins_nr > 0) {
+               ret = copy_items(trans, inode, dst_path, path, &last_extent,
+                                start_slot, ins_nr, 1, 0);
+               if (ret > 0)
+                       ret = 0;
+       }
+out:
+       btrfs_release_path(path);
+       btrfs_free_path(dst_path);
+       return ret;
+}
+
 static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
                                     struct btrfs_root *root,
                                     struct btrfs_inode *inode,
@@ -4362,6 +4466,11 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
                if (em->generation <= test_gen)
                        continue;
 
+               /* We log prealloc extents beyond eof later. */
+               if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) &&
+                   em->start >= i_size_read(&inode->vfs_inode))
+                       continue;
+
                if (em->start < logged_start)
                        logged_start = em->start;
                if ((em->start + em->len - 1) > logged_end)
@@ -4374,31 +4483,6 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
                num++;
        }
 
-       /*
-        * Add all prealloc extents beyond the inode's i_size to make sure we
-        * don't lose them after doing a fast fsync and replaying the log.
-        */
-       if (inode->flags & BTRFS_INODE_PREALLOC) {
-               struct rb_node *node;
-
-               for (node = rb_last(&tree->map); node; node = rb_prev(node)) {
-                       em = rb_entry(node, struct extent_map, rb_node);
-                       if (em->start < i_size_read(&inode->vfs_inode))
-                               break;
-                       if (!list_empty(&em->list))
-                               continue;
-                       /* Same as above loop. */
-                       if (++num > 32768) {
-                               list_del_init(&tree->modified_extents);
-                               ret = -EFBIG;
-                               goto process;
-                       }
-                       refcount_inc(&em->refs);
-                       set_bit(EXTENT_FLAG_LOGGING, &em->flags);
-                       list_add_tail(&em->list, &extents);
-               }
-       }
-
        list_sort(NULL, &extents, extent_cmp);
        btrfs_get_logged_extents(inode, logged_list, logged_start, logged_end);
        /*
@@ -4443,6 +4527,9 @@ process:
        up_write(&inode->dio_sem);
 
        btrfs_release_path(path);
+       if (!ret)
+               ret = btrfs_log_prealloc_extents(trans, inode, path);
+
        return ret;
 }
 
@@ -4827,6 +4914,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
        struct extent_map_tree *em_tree = &inode->extent_tree;
        u64 logged_isize = 0;
        bool need_log_inode_item = true;
+       bool xattrs_logged = false;
 
        path = btrfs_alloc_path();
        if (!path)
@@ -5128,6 +5216,7 @@ next_key:
        err = btrfs_log_all_xattrs(trans, root, inode, path, dst_path);
        if (err)
                goto out_unlock;
+       xattrs_logged = true;
        if (max_key.type >= BTRFS_EXTENT_DATA_KEY && !fast_search) {
                btrfs_release_path(path);
                btrfs_release_path(dst_path);
@@ -5140,6 +5229,11 @@ log_extents:
        btrfs_release_path(dst_path);
        if (need_log_inode_item) {
                err = log_inode_item(trans, log, dst_path, inode);
+               if (!err && !xattrs_logged) {
+                       err = btrfs_log_all_xattrs(trans, root, inode, path,
+                                                  dst_path);
+                       btrfs_release_path(path);
+               }
                if (err)
                        goto out_unlock;
        }
index 292266f6ab9c9d8dfa18422998ec1884ae57369d..be3fc701f38948e37e5d776ee93413e0d06e85ff 100644 (file)
@@ -4052,6 +4052,15 @@ int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info)
                return 0;
        }
 
+       /*
+        * A ro->rw remount sequence should continue with the paused balance
+        * regardless of who pauses it, system or the user as of now, so set
+        * the resume flag.
+        */
+       spin_lock(&fs_info->balance_lock);
+       fs_info->balance_ctl->flags |= BTRFS_BALANCE_RESUME;
+       spin_unlock(&fs_info->balance_lock);
+
        tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance");
        return PTR_ERR_OR_ZERO(tsk);
 }