Merge branch 'for-linus-4.1' of git://git.kernel.org/pub/scm/linux/kernel/git/mason...

author Linus Torvalds <torvalds@linux-foundation.org>

Sat, 16 May 2015 22:50:58 +0000 (15:50 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Sat, 16 May 2015 22:50:58 +0000 (15:50 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Sat, 16 May 2015 22:50:58 +0000 (15:50 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Sat, 16 May 2015 22:50:58 +0000 (15:50 -0700)
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c

index 0ec8e228b89f42505cc0c9c8ffef96e1a2f3f9a4..7effed6f2fa64c136be27413a2cac47e41be4c3f 100644 (file)
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3180,8 +3180,6 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans,
         btrfs_mark_buffer_dirty(leaf);
  fail:
         btrfs_release_path(path);
-       if (ret)
-               btrfs_abort_transaction(trans, root, ret);
         return ret;
  
  }
@@ -3487,8 +3485,30 @@ again:
                                 ret = 0;
                         }
                 }
-               if (!ret)
+               if (!ret) {
                         ret = write_one_cache_group(trans, root, path, cache);
+                       /*
+                        * Our block group might still be attached to the list
+                        * of new block groups in the transaction handle of some
+                        * other task (struct btrfs_trans_handle->new_bgs). This
+                        * means its block group item isn't yet in the extent
+                        * tree. If this happens ignore the error, as we will
+                        * try again later in the critical section of the
+                        * transaction commit.
+                        */
+                       if (ret == -ENOENT) {
+                               ret = 0;
+                               spin_lock(&cur_trans->dirty_bgs_lock);
+                               if (list_empty(&cache->dirty_list)) {
+                                       list_add_tail(&cache->dirty_list,
+                                                     &cur_trans->dirty_bgs);
+                                       btrfs_get_block_group(cache);
+                               }
+                               spin_unlock(&cur_trans->dirty_bgs_lock);
+                       } else if (ret) {
+                               btrfs_abort_transaction(trans, root, ret);
+                       }
+               }
  
                 /* if its not on the io list, we need to put the block group */
                 if (should_put)
@@ -3597,8 +3617,11 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
                                 ret = 0;
                         }
                 }
-               if (!ret)
+               if (!ret) {
                         ret = write_one_cache_group(trans, root, path, cache);
+                       if (ret)
+                               btrfs_abort_transaction(trans, root, ret);
+               }
  
                 /* if its not on the io list, we need to put the block group */
                 if (should_put)
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c

index 43af5a61ad25b4dbb3f4c0f2cb89160c120d1ac7..c32d226bfeccbb28f25f2f417fa9e57b14411136 100644 (file)
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4772,6 +4772,25 @@ struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
                                start >> PAGE_CACHE_SHIFT);
         if (eb && atomic_inc_not_zero(&eb->refs)) {
                 rcu_read_unlock();
+               /*
+                * Lock our eb's refs_lock to avoid races with
+                * free_extent_buffer. When we get our eb it might be flagged
+                * with EXTENT_BUFFER_STALE and another task running
+                * free_extent_buffer might have seen that flag set,
+                * eb->refs == 2, that the buffer isn't under IO (dirty and
+                * writeback flags not set) and it's still in the tree (flag
+                * EXTENT_BUFFER_TREE_REF set), therefore being in the process
+                * of decrementing the extent buffer's reference count twice.
+                * So here we could race and increment the eb's reference count,
+                * clear its stale flag, mark it as dirty and drop our reference
+                * before the other task finishes executing free_extent_buffer,
+                * which would later result in an attempt to free an extent
+                * buffer that is dirty.
+                */
+               if (test_bit(EXTENT_BUFFER_STALE, &eb->bflags)) {
+                       spin_lock(&eb->refs_lock);
+                       spin_unlock(&eb->refs_lock);
+               }
                 mark_extent_buffer_accessed(eb, NULL);
                 return eb;
         }
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c

index 5e020d76fd07b00da1bacec4c5662dc0f1078ba7..9dbe5b548fa6a74029960de0ea1d8ebf63f835e8 100644 (file)
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -3466,6 +3466,7 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root,
         struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
         int ret;
         struct btrfs_io_ctl io_ctl;
+       bool release_metadata = true;
  
         if (!btrfs_test_opt(root, INODE_MAP_CACHE))
                 return 0;
@@ -3473,11 +3474,20 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root,
         memset(&io_ctl, 0, sizeof(io_ctl));
         ret = __btrfs_write_out_cache(root, inode, ctl, NULL, &io_ctl,
                                       trans, path, 0);
-       if (!ret)
+       if (!ret) {
+               /*
+                * At this point writepages() didn't error out, so our metadata
+                * reservation is released when the writeback finishes, at
+                * inode.c:btrfs_finish_ordered_io(), regardless of it finishing
+                * with or without an error.
+                */
+               release_metadata = false;
                 ret = btrfs_wait_cache_io(root, trans, NULL, &io_ctl, path, 0);
+       }
  
         if (ret) {
-               btrfs_delalloc_release_metadata(inode, inode->i_size);
+               if (release_metadata)
+                       btrfs_delalloc_release_metadata(inode, inode->i_size);
  #ifdef DEBUG
                 btrfs_err(root->fs_info,
                         "failed to write free ino cache for root %llu",
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c

index 157cc54fc63486e485a95bf6d8d6da692ef171ca..760c4a5e096b4d5a403f7923ad4b65537a085886 100644 (file)
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -722,6 +722,7 @@ void btrfs_start_ordered_extent(struct inode *inode,
  int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
  {
         int ret = 0;
+       int ret_wb = 0;
         u64 end;
         u64 orig_end;
         struct btrfs_ordered_extent *ordered;
@@ -741,9 +742,14 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
         if (ret)
                 return ret;
  
-       ret = filemap_fdatawait_range(inode->i_mapping, start, orig_end);
-       if (ret)
-               return ret;
+       /*
+        * If we have a writeback error don't return immediately. Wait first
+        * for any ordered extents that haven't completed yet. This is to make
+        * sure no one can dirty the same page ranges and call writepages()
+        * before the ordered extents complete - to avoid failures (-EEXIST)
+        * when adding the new ordered extents to the ordered tree.
+        */
+       ret_wb = filemap_fdatawait_range(inode->i_mapping, start, orig_end);
  
         end = orig_end;
         while (1) {
@@ -767,7 +773,7 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
                         break;
                 end--;
         }
-       return ret;
+       return ret_wb ? ret_wb : ret;
  }
  
  /*
author	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 16 May 2015 22:50:58 +0000 (15:50 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 16 May 2015 22:50:58 +0000 (15:50 -0700)
fs/btrfs/extent-tree.c		patch \| blob \| blame \| history
fs/btrfs/extent_io.c		patch \| blob \| blame \| history
fs/btrfs/free-space-cache.c		patch \| blob \| blame \| history
fs/btrfs/ordered-data.c		patch \| blob \| blame \| history