Merge branch 'for-linus-4.3' of git://git.kernel.org/pub/scm/linux/kernel/git/mason...

author Linus Torvalds <torvalds@linux-foundation.org>

Fri, 25 Sep 2015 19:08:41 +0000 (12:08 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Fri, 25 Sep 2015 19:08:41 +0000 (12:08 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Fri, 25 Sep 2015 19:08:41 +0000 (12:08 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Fri, 25 Sep 2015 19:08:41 +0000 (12:08 -0700)
diff --combined fs/btrfs/disk-io.c

index 0d98aee34fee8f716771e46a70cbee478ac27e2d,aa59871885da6d5623891c0e2de0599dd2727cda..295795aebe0b42330cc1147e02340eb2c59f1d7b
--- 1/fs/btrfs/disk-io.c
--- 2/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@@ -703,7 -703,7 +703,7 @@@ static int btree_io_failed_hook(struct 
         return -EIO;    /* we fixed nothing */
   }
   
- -static void end_workqueue_bio(struct bio *bio, int err)
+ +static void end_workqueue_bio(struct bio *bio)
   {
         struct btrfs_end_io_wq *end_io_wq = bio->bi_private;
         struct btrfs_fs_info *fs_info;
@@@ -711,7 -711,7 +711,7 @@@
         btrfs_work_func_t func;
   
         fs_info = end_io_wq->info;
- -      end_io_wq->error = err;
+ +      end_io_wq->error = bio->bi_error;
   
         if (bio->bi_rw & REQ_WRITE) {
                 if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA) {
@@@ -808,8 -808,7 +808,8 @@@ static void run_one_async_done(struct b
   
         /* If an error occured we just want to clean up the bio and move on */
         if (async->error) {
- -              bio_endio(async->bio, async->error);
+ +              async->bio->bi_error = async->error;
+ +              bio_endio(async->bio);
                 return;
         }
   
@@@ -909,10 -908,8 +909,10 @@@ static int __btree_submit_bio_done(stru
          * submission context.  Just jump into btrfs_map_bio
          */
         ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num, 1);
- -      if (ret)
- -              bio_endio(bio, ret);
+ +      if (ret) {
+ +              bio->bi_error = ret;
+ +              bio_endio(bio);
+ +      }
         return ret;
   }
   
@@@ -963,13 -960,10 +963,13 @@@ static int btree_submit_bio_hook(struc
                                           __btree_submit_bio_done);
         }
   
- -      if (ret) {
+ +      if (ret)
+ +              goto out_w_error;
+ +      return 0;
+ +
   out_w_error:
- -              bio_endio(bio, ret);
- -      }
+ +      bio->bi_error = ret;
+ +      bio_endio(bio);
         return ret;
   }
   
@@@ -1742,15 -1736,16 +1742,15 @@@ static void end_workqueue_fn(struct btr
   {
         struct bio *bio;
         struct btrfs_end_io_wq *end_io_wq;
- -      int error;
   
         end_io_wq = container_of(work, struct btrfs_end_io_wq, work);
         bio = end_io_wq->bio;
   
- -      error = end_io_wq->error;
+ +      bio->bi_error = end_io_wq->error;
         bio->bi_private = end_io_wq->private;
         bio->bi_end_io = end_io_wq->end_io;
         kmem_cache_free(btrfs_end_io_wq_cache, end_io_wq);
- -      bio_endio(bio, error);
+ +      bio_endio(bio);
   }
   
   static int cleaner_kthread(void *arg)
@@@ -3330,8 -3325,10 +3330,8 @@@ static int write_dev_supers(struct btrf
    * endio for the write_dev_flush, this will wake anyone waiting
    * for the barrier when it is done
    */
- -static void btrfs_end_empty_barrier(struct bio *bio, int err)
+ +static void btrfs_end_empty_barrier(struct bio *bio)
   {
- -      if (err)
- -              clear_bit(BIO_UPTODATE, &bio->bi_flags);
         if (bio->bi_private)
                 complete(bio->bi_private);
         bio_put(bio);
@@@ -3359,8 -3356,8 +3359,8 @@@ static int write_dev_flush(struct btrfs
   
                 wait_for_completion(&device->flush_wait);
   
- -              if (!bio_flagged(bio, BIO_UPTODATE)) {
- -                      ret = -EIO;
+ +              if (bio->bi_error) {
+ +                      ret = bio->bi_error;
                         btrfs_dev_stat_inc_and_print(device,
                                 BTRFS_DEV_STAT_FLUSH_ERRS);
                 }
@@@ -3765,9 -3762,7 +3765,7 @@@ void close_ctree(struct btrfs_root *roo
                  * block groups queued for removal, the deletion will be
                  * skipped when we quit the cleaner thread.
                  */
-               mutex_lock(&root->fs_info->cleaner_mutex);
                 btrfs_delete_unused_bgs(root->fs_info);
-               mutex_unlock(&root->fs_info->cleaner_mutex);
   
                 ret = btrfs_commit_super(root);
                 if (ret)
diff --combined fs/btrfs/extent_io.c

index f1018cfbfefad0f7b43920bf32ece43b46228276,11aa8f743b9041b1baa1414c6f310a02d7d60a42..e2357e31609a2e8469b38c7e95b66f6dd68fcd93
--- 1/fs/btrfs/extent_io.c
--- 2/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@@ -2486,7 -2486,7 +2486,7 @@@ int end_extent_writepage(struct page *p
    * Scheduling is not allowed, so the extent state tree is expected
    * to have one and only one object corresponding to this IO.
    */
- -static void end_bio_extent_writepage(struct bio *bio, int err)
+ +static void end_bio_extent_writepage(struct bio *bio)
   {
         struct bio_vec *bvec;
         u64 start;
@@@ -2516,7 -2516,7 +2516,7 @@@
                 start = page_offset(page);
                 end = start + bvec->bv_offset + bvec->bv_len - 1;
   
- -              if (end_extent_writepage(page, err, start, end))
+ +              if (end_extent_writepage(page, bio->bi_error, start, end))
                         continue;
   
                 end_page_writeback(page);
@@@ -2548,10 -2548,10 +2548,10 @@@ endio_readpage_release_extent(struct ex
    * Scheduling is not allowed, so the extent state tree is expected
    * to have one and only one object corresponding to this IO.
    */
- -static void end_bio_extent_readpage(struct bio *bio, int err)
+ +static void end_bio_extent_readpage(struct bio *bio)
   {
         struct bio_vec *bvec;
- -      int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
+ +      int uptodate = !bio->bi_error;
         struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
         struct extent_io_tree *tree;
         u64 offset = 0;
@@@ -2564,13 -2564,16 +2564,13 @@@
         int ret;
         int i;
   
- -      if (err)
- -              uptodate = 0;
- -
         bio_for_each_segment_all(bvec, bio, i) {
                 struct page *page = bvec->bv_page;
                 struct inode *inode = page->mapping->host;
   
                 pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, "
- -                       "mirror=%u\n", (u64)bio->bi_iter.bi_sector, err,
- -                       io_bio->mirror_num);
+ +                       "mirror=%u\n", (u64)bio->bi_iter.bi_sector,
+ +                       bio->bi_error, io_bio->mirror_num);
                 tree = &BTRFS_I(inode)->io_tree;
   
                 /* We always issue full-page reads, but if some block
@@@ -2611,7 -2614,8 +2611,7 @@@
   
                 if (tree->ops && tree->ops->readpage_io_failed_hook) {
                         ret = tree->ops->readpage_io_failed_hook(page, mirror);
- -                      if (!ret && !err &&
- -                          test_bit(BIO_UPTODATE, &bio->bi_flags))
+ +                      if (!ret && !bio->bi_error)
                                 uptodate = 1;
                 } else {
                         /*
@@@ -2627,7 -2631,10 +2627,7 @@@
                         ret = bio_readpage_error(bio, offset, page, start, end,
                                                  mirror);
                         if (ret == 0) {
- -                              uptodate =
- -                                      test_bit(BIO_UPTODATE, &bio->bi_flags);
- -                              if (err)
- -                                      uptodate = 0;
+ +                              uptodate = !bio->bi_error;
                                 offset += len;
                                 continue;
                         }
@@@ -2677,7 -2684,7 +2677,7 @@@ readpage_ok
                 endio_readpage_release_extent(tree, extent_start, extent_len,
                                               uptodate);
         if (io_bio->end_io)
- -              io_bio->end_io(io_bio, err);
+ +              io_bio->end_io(io_bio, bio->bi_error);
         bio_put(bio);
   }
   
@@@ -2798,11 -2805,14 +2798,12 @@@ static int submit_extent_page(int rw, s
                               bio_end_io_t end_io_func,
                               int mirror_num,
                               unsigned long prev_bio_flags,
-                             unsigned long bio_flags)
+                             unsigned long bio_flags,
+                             bool force_bio_submit)
   {
         int ret = 0;
         struct bio *bio;
- -      int nr;
         int contig = 0;
- -      int this_compressed = bio_flags & EXTENT_BIO_COMPRESSED;
         int old_compressed = prev_bio_flags & EXTENT_BIO_COMPRESSED;
         size_t page_size = min_t(size_t, size, PAGE_CACHE_SIZE);
   
@@@ -2814,6 -2824,7 +2815,7 @@@
                         contig = bio_end_sector(bio) == sector;
   
                 if (prev_bio_flags != bio_flags || !contig ||
+                   force_bio_submit ||
                     merge_bio(rw, tree, page, offset, page_size, bio, bio_flags) ||
                     bio_add_page(bio, page, page_size, offset) < page_size) {
                         ret = submit_one_bio(rw, bio, mirror_num,
@@@ -2829,9 -2840,12 +2831,9 @@@
                         return 0;
                 }
         }
- -      if (this_compressed)
- -              nr = BIO_MAX_PAGES;
- -      else
- -              nr = bio_get_nr_vecs(bdev);
   
- -      bio = btrfs_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH);
+ +      bio = btrfs_bio_alloc(bdev, sector, BIO_MAX_PAGES,
+ +                      GFP_NOFS | __GFP_HIGH);
         if (!bio)
                 return -ENOMEM;
   
@@@ -2910,7 -2924,8 +2912,8 @@@ static int __do_readpage(struct extent_
                          get_extent_t *get_extent,
                          struct extent_map **em_cached,
                          struct bio **bio, int mirror_num,
-                        unsigned long *bio_flags, int rw)
+                        unsigned long *bio_flags, int rw,
+                        u64 *prev_em_start)
   {
         struct inode *inode = page->mapping->host;
         u64 start = page_offset(page);
@@@ -2958,6 -2973,7 +2961,7 @@@
         }
         while (cur <= end) {
                 unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1;
+               bool force_bio_submit = false;
   
                 if (cur >= last_byte) {
                         char *userpage;
@@@ -3008,6 -3024,49 +3012,49 @@@
                 block_start = em->block_start;
                 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
                         block_start = EXTENT_MAP_HOLE;
+ 
+               /*
+                * If we have a file range that points to a compressed extent
+                * and it's followed by a consecutive file range that points to
+                * to the same compressed extent (possibly with a different
+                * offset and/or length, so it either points to the whole extent
+                * or only part of it), we must make sure we do not submit a
+                * single bio to populate the pages for the 2 ranges because
+                * this makes the compressed extent read zero out the pages
+                * belonging to the 2nd range. Imagine the following scenario:
+                *
+                *  File layout
+                *  [0 - 8K]                     [8K - 24K]
+                *    |                               |
+                *    |                               |
+                * points to extent X,         points to extent X,
+                * offset 4K, length of 8K     offset 0, length 16K
+                *
+                * [extent X, compressed length = 4K uncompressed length = 16K]
+                *
+                * If the bio to read the compressed extent covers both ranges,
+                * it will decompress extent X into the pages belonging to the
+                * first range and then it will stop, zeroing out the remaining
+                * pages that belong to the other range that points to extent X.
+                * So here we make sure we submit 2 bios, one for the first
+                * range and another one for the third range. Both will target
+                * the same physical extent from disk, but we can't currently
+                * make the compressed bio endio callback populate the pages
+                * for both ranges because each compressed bio is tightly
+                * coupled with a single extent map, and each range can have
+                * an extent map with a different offset value relative to the
+                * uncompressed data of our extent and different lengths. This
+                * is a corner case so we prioritize correctness over
+                * non-optimal behavior (submitting 2 bios for the same extent).
+                */
+               if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) &&
+                   prev_em_start && *prev_em_start != (u64)-1 &&
+                   *prev_em_start != em->orig_start)
+                       force_bio_submit = true;
+ 
+               if (prev_em_start)
+                       *prev_em_start = em->orig_start;
+ 
                 free_extent_map(em);
                 em = NULL;
   
@@@ -3057,7 -3116,8 +3104,8 @@@
                                          bdev, bio, pnr,
                                          end_bio_extent_readpage, mirror_num,
                                          *bio_flags,
-                                        this_bio_flag);
+                                        this_bio_flag,
+                                        force_bio_submit);
                 if (!ret) {
                         nr++;
                         *bio_flags = this_bio_flag;
@@@ -3089,6 -3149,7 +3137,7 @@@ static inline void __do_contiguous_read
         struct inode *inode;
         struct btrfs_ordered_extent *ordered;
         int index;
+       u64 prev_em_start = (u64)-1;
   
         inode = pages[0]->mapping->host;
         while (1) {
@@@ -3104,7 -3165,7 +3153,7 @@@
   
         for (index = 0; index < nr_pages; index++) {
                 __do_readpage(tree, pages[index], get_extent, em_cached, bio,
-                             mirror_num, bio_flags, rw);
+                             mirror_num, bio_flags, rw, &prev_em_start);
                 page_cache_release(pages[index]);
         }
   }
@@@ -3172,7 -3233,7 +3221,7 @@@ static int __extent_read_full_page(stru
         }
   
         ret = __do_readpage(tree, page, get_extent, NULL, bio, mirror_num,
-                           bio_flags, rw);
+                           bio_flags, rw, NULL);
         return ret;
   }
   
@@@ -3198,7 -3259,7 +3247,7 @@@ int extent_read_full_page_nolock(struc
         int ret;
   
         ret = __do_readpage(tree, page, get_extent, NULL, &bio, mirror_num,
-                                     &bio_flags, READ);
+                           &bio_flags, READ, NULL);
         if (bio)
                 ret = submit_one_bio(READ, bio, mirror_num, bio_flags);
         return ret;
@@@ -3451,7 -3512,7 +3500,7 @@@ static noinline_for_stack int __extent_
                                                  sector, iosize, pg_offset,
                                                  bdev, &epd->bio, max_nr,
                                                  end_bio_extent_writepage,
-                                                0, 0, 0);
+                                                0, 0, 0, false);
                         if (ret)
                                 SetPageError(page);
                 }
@@@ -3697,7 -3758,7 +3746,7 @@@ static void set_btree_ioerr(struct pag
         }
   }
   
- -static void end_bio_extent_buffer_writepage(struct bio *bio, int err)
+ +static void end_bio_extent_buffer_writepage(struct bio *bio)
   {
         struct bio_vec *bvec;
         struct extent_buffer *eb;
@@@ -3710,8 -3771,7 +3759,8 @@@
                 BUG_ON(!eb);
                 done = atomic_dec_and_test(&eb->io_pages);
   
- -              if (err || test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) {
+ +              if (bio->bi_error ||
+ +                  test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) {
                         ClearPageUptodate(page);
                         set_btree_ioerr(page);
                 }
@@@ -3754,7 -3814,7 +3803,7 @@@ static noinline_for_stack int write_one
                 ret = submit_extent_page(rw, tree, wbc, p, offset >> 9,
                                          PAGE_CACHE_SIZE, 0, bdev, &epd->bio,
                                          -1, end_bio_extent_buffer_writepage,
-                                        0, epd->bio_flags, bio_flags);
+                                        0, epd->bio_flags, bio_flags, false);
                 epd->bio_flags = bio_flags;
                 if (ret) {
                         set_btree_ioerr(p);
diff --combined fs/btrfs/inode.c

index a0fa7253a2d77b6faa2e71366c762b331cd784b2,b7e439bf5e4f79923cee1e5cc982d5d47f032330..611b66d73e80ba0e5f97b4415595d20f8ae35e1a
--- 1/fs/btrfs/inode.c
--- 2/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@@ -1845,10 -1845,8 +1845,10 @@@ static int __btrfs_submit_bio_done(stru
         int ret;
   
         ret = btrfs_map_bio(root, rw, bio, mirror_num, 1);
- -      if (ret)
- -              bio_endio(bio, ret);
+ +      if (ret) {
+ +              bio->bi_error = ret;
+ +              bio_endio(bio);
+ +      }
         return ret;
   }
   
@@@ -1908,10 -1906,8 +1908,10 @@@ mapit
         ret = btrfs_map_bio(root, rw, bio, mirror_num, 0);
   
   out:
- -      if (ret < 0)
- -              bio_endio(bio, ret);
+ +      if (ret < 0) {
+ +              bio->bi_error = ret;
+ +              bio_endio(bio);
+ +      }
         return ret;
   }
   
@@@ -5084,7 -5080,8 +5084,8 @@@ void btrfs_evict_inode(struct inode *in
                 goto no_delete;
         }
         /* do we really want it for ->i_nlink > 0 and zero btrfs_root_refs? */
-       btrfs_wait_ordered_range(inode, 0, (u64)-1);
+       if (!special_file(inode->i_mode))
+               btrfs_wait_ordered_range(inode, 0, (u64)-1);
   
         btrfs_free_io_failure_record(inode, 0, (u64)-1);
   
@@@ -7408,6 -7405,10 +7409,10 @@@ static struct extent_map *create_pinned
         return em;
   }
   
+ struct btrfs_dio_data {
+       u64 outstanding_extents;
+       u64 reserve;
+ };
   
   static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
                                    struct buffer_head *bh_result, int create)
@@@ -7415,10 -7416,10 +7420,10 @@@
         struct extent_map *em;
         struct btrfs_root *root = BTRFS_I(inode)->root;
         struct extent_state *cached_state = NULL;
+       struct btrfs_dio_data *dio_data = NULL;
         u64 start = iblock << inode->i_blkbits;
         u64 lockstart, lockend;
         u64 len = bh_result->b_size;
-       u64 *outstanding_extents = NULL;
         int unlock_bits = EXTENT_LOCKED;
         int ret = 0;
   
@@@ -7436,7 -7437,7 +7441,7 @@@
                  * that anything that needs to check if there's a transction doesn't get
                  * confused.
                  */
-               outstanding_extents = current->journal_info;
+               dio_data = current->journal_info;
                 current->journal_info = NULL;
         }
   
@@@ -7568,17 -7569,18 +7573,18 @@@ unlock
                  * within our reservation, otherwise we need to adjust our inode
                  * counter appropriately.
                  */
-               if (*outstanding_extents) {
-                       (*outstanding_extents)--;
+               if (dio_data->outstanding_extents) {
+                       (dio_data->outstanding_extents)--;
                 } else {
                         spin_lock(&BTRFS_I(inode)->lock);
                         BTRFS_I(inode)->outstanding_extents++;
                         spin_unlock(&BTRFS_I(inode)->lock);
                 }
   
-               current->journal_info = outstanding_extents;
                 btrfs_free_reserved_data_space(inode, len);
-               set_bit(BTRFS_INODE_DIO_READY, &BTRFS_I(inode)->runtime_flags);
+               WARN_ON(dio_data->reserve < len);
+               dio_data->reserve -= len;
+               current->journal_info = dio_data;
         }
   
         /*
@@@ -7601,8 -7603,8 +7607,8 @@@
   unlock_err:
         clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
                          unlock_bits, 1, 0, &cached_state, GFP_NOFS);
-       if (outstanding_extents)
-               current->journal_info = outstanding_extents;
+       if (dio_data)
+               current->journal_info = dio_data;
         return ret;
   }
   
@@@ -7720,13 -7722,13 +7726,13 @@@ struct btrfs_retry_complete 
         int uptodate;
   };
   
- -static void btrfs_retry_endio_nocsum(struct bio *bio, int err)
+ +static void btrfs_retry_endio_nocsum(struct bio *bio)
   {
         struct btrfs_retry_complete *done = bio->bi_private;
         struct bio_vec *bvec;
         int i;
   
- -      if (err)
+ +      if (bio->bi_error)
                 goto end;
   
         done->uptodate = 1;
@@@ -7775,7 -7777,7 +7781,7 @@@ try_again
         return 0;
   }
   
- -static void btrfs_retry_endio(struct bio *bio, int err)
+ +static void btrfs_retry_endio(struct bio *bio)
   {
         struct btrfs_retry_complete *done = bio->bi_private;
         struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
@@@ -7784,7 -7786,7 +7790,7 @@@
         int ret;
         int i;
   
- -      if (err)
+ +      if (bio->bi_error)
                 goto end;
   
         uptodate = 1;
@@@ -7867,13 -7869,12 +7873,13 @@@ static int btrfs_subio_endio_read(struc
         }
   }
   
- -static void btrfs_endio_direct_read(struct bio *bio, int err)
+ +static void btrfs_endio_direct_read(struct bio *bio)
   {
         struct btrfs_dio_private *dip = bio->bi_private;
         struct inode *inode = dip->inode;
         struct bio *dio_bio;
         struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
+ +      int err = bio->bi_error;
   
         if (dip->flags & BTRFS_DIO_ORIG_BIO_SUBMITTED)
                 err = btrfs_subio_endio_read(inode, io_bio, err);
@@@ -7884,14 -7885,17 +7890,14 @@@
   
         kfree(dip);
   
- -      /* If we had a csum failure make sure to clear the uptodate flag */
- -      if (err)
- -              clear_bit(BIO_UPTODATE, &dio_bio->bi_flags);
- -      dio_end_io(dio_bio, err);
+ +      dio_end_io(dio_bio, bio->bi_error);
   
         if (io_bio->end_io)
                 io_bio->end_io(io_bio, err);
         bio_put(bio);
   }
   
- -static void btrfs_endio_direct_write(struct bio *bio, int err)
+ +static void btrfs_endio_direct_write(struct bio *bio)
   {
         struct btrfs_dio_private *dip = bio->bi_private;
         struct inode *inode = dip->inode;
@@@ -7905,8 -7909,7 +7911,8 @@@
   again:
         ret = btrfs_dec_test_first_ordered_pending(inode, &ordered,
                                                    &ordered_offset,
- -                                                 ordered_bytes, !err);
+ +                                                 ordered_bytes,
+ +                                                 !bio->bi_error);
         if (!ret)
                 goto out_test;
   
@@@ -7929,7 -7932,10 +7935,7 @@@ out_test
   
         kfree(dip);
   
- -      /* If we had an error make sure to clear the uptodate flag */
- -      if (err)
- -              clear_bit(BIO_UPTODATE, &dio_bio->bi_flags);
- -      dio_end_io(dio_bio, err);
+ +      dio_end_io(dio_bio, bio->bi_error);
         bio_put(bio);
   }
   
@@@ -7944,10 -7950,9 +7950,10 @@@ static int __btrfs_submit_bio_start_dir
         return 0;
   }
   
- -static void btrfs_end_dio_bio(struct bio *bio, int err)
+ +static void btrfs_end_dio_bio(struct bio *bio)
   {
         struct btrfs_dio_private *dip = bio->bi_private;
+ +      int err = bio->bi_error;
   
         if (err)
                 btrfs_warn(BTRFS_I(dip->inode)->root->fs_info,
@@@ -7976,8 -7981,8 +7982,8 @@@
         if (dip->errors) {
                 bio_io_error(dip->orig_bio);
         } else {
- -              set_bit(BIO_UPTODATE, &dip->dio_bio->bi_flags);
- -              bio_endio(dip->orig_bio, 0);
+ +              dip->dio_bio->bi_error = 0;
+ +              bio_endio(dip->orig_bio);
         }
   out:
         bio_put(bio);
@@@ -7986,8 -7991,9 +7992,8 @@@
   static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev,
                                        u64 first_sector, gfp_t gfp_flags)
   {
- -      int nr_vecs = bio_get_nr_vecs(bdev);
         struct bio *bio;
- -      bio = btrfs_bio_alloc(bdev, first_sector, nr_vecs, gfp_flags);
+ +      bio = btrfs_bio_alloc(bdev, first_sector, BIO_MAX_PAGES, gfp_flags);
         if (bio)
                 bio_associate_current(bio);
         return bio;
@@@ -8251,8 -8257,7 +8257,8 @@@ free_ordered
          * callbacks - they require an allocated dip and a clone of dio_bio.
          */
         if (io_bio && dip) {
- -              bio_endio(io_bio, ret);
+ +              io_bio->bi_error = -EIO;
+ +              bio_endio(io_bio);
                 /*
                  * The end io callbacks free our dip, do the final put on io_bio
                  * and all the cleanup and final put for dio_bio (through
@@@ -8279,7 -8284,7 +8285,7 @@@
                         unlock_extent(&BTRFS_I(inode)->io_tree, file_offset,
                               file_offset + dio_bio->bi_iter.bi_size - 1);
                 }
- -              clear_bit(BIO_UPTODATE, &dio_bio->bi_flags);
+ +              dio_bio->bi_error = -EIO;
                 /*
                  * Releases and cleans up our dio_bio, no need to bio_put()
                  * nor bio_endio()/bio_io_error() against dio_bio.
@@@ -8329,7 -8334,8 +8335,8 @@@ static ssize_t btrfs_direct_IO(struct k
   {
         struct file *file = iocb->ki_filp;
         struct inode *inode = file->f_mapping->host;
-       u64 outstanding_extents = 0;
+       struct btrfs_root *root = BTRFS_I(inode)->root;
+       struct btrfs_dio_data dio_data = { 0 };
         size_t count = 0;
         int flags = 0;
         bool wakeup = true;
@@@ -8367,7 -8373,7 +8374,7 @@@
                 ret = btrfs_delalloc_reserve_space(inode, count);
                 if (ret)
                         goto out;
-               outstanding_extents = div64_u64(count +
+               dio_data.outstanding_extents = div64_u64(count +
                                                 BTRFS_MAX_EXTENT_SIZE - 1,
                                                 BTRFS_MAX_EXTENT_SIZE);
   
@@@ -8376,7 -8382,8 +8383,8 @@@
                  * do the accounting properly if we go over the number we
                  * originally calculated.  Abuse current->journal_info for this.
                  */
-               current->journal_info = &outstanding_extents;
+               dio_data.reserve = round_up(count, root->sectorsize);
+               current->journal_info = &dio_data;
         } else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK,
                                      &BTRFS_I(inode)->runtime_flags)) {
                 inode_dio_end(inode);
@@@ -8391,16 -8398,9 +8399,9 @@@
         if (iov_iter_rw(iter) == WRITE) {
                 current->journal_info = NULL;
                 if (ret < 0 && ret != -EIOCBQUEUED) {
-                       /*
-                        * If the error comes from submitting stage,
-                        * btrfs_get_blocsk_direct() has free'd data space,
-                        * and metadata space will be handled by
-                        * finish_ordered_fn, don't do that again to make
-                        * sure bytes_may_use is correct.
-                        */
-                       if (!test_and_clear_bit(BTRFS_INODE_DIO_READY,
-                                    &BTRFS_I(inode)->runtime_flags))
-                               btrfs_delalloc_release_space(inode, count);
+                       if (dio_data.reserve)
+                               btrfs_delalloc_release_space(inode,
+                                                       dio_data.reserve);
                 } else if (ret >= 0 && (size_t)ret < count)
                         btrfs_delalloc_release_space(inode,
                                                      count - (size_t)ret);
diff --combined fs/btrfs/super.c

index 2b07b3581781b7b952a384295d8bac5fee983f07,5a186d7180d34c90e8be25ecb22f1e0298b48bd6..11d1eab9234dc818244d1c1bbecd6d25981f4890
--- 1/fs/btrfs/super.c
--- 2/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@@ -1658,9 -1658,7 +1658,7 @@@ static int btrfs_remount(struct super_b
                  * groups on disk until we're mounted read-write again
                  * unless we clean them up here.
                  */
-               mutex_lock(&root->fs_info->cleaner_mutex);
                 btrfs_delete_unused_bgs(fs_info);
-               mutex_unlock(&root->fs_info->cleaner_mutex);
   
                 btrfs_dev_replace_suspend_for_unmount(fs_info);
                 btrfs_scrub_cancel(fs_info);
@@@ -2175,7 -2173,8 +2173,7 @@@ static int btrfs_interface_init(void
   
   static void btrfs_interface_exit(void)
   {
- -      if (misc_deregister(&btrfs_misc) < 0)
- -              printk(KERN_INFO "BTRFS: misc_deregister failed for control device\n");
+ +      misc_deregister(&btrfs_misc);
   }
   
   static void btrfs_print_info(void)
diff --combined fs/btrfs/transaction.c

index 8f259b3a66b366d6e90393d4b3f15e91d23fd2b0,a2d6f7bcef6cf39ccdc0664d5c1cce6ef4d0789d..74bc3338418be39badb2eb73160c20b3e2240c74
--- 1/fs/btrfs/transaction.c
--- 2/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@@ -117,6 -117,18 +117,18 @@@ static noinline void switch_commit_root
                         btrfs_unpin_free_ino(root);
                 clear_btree_io_tree(&root->dirty_log_pages);
         }
+ 
+       /* We can free old roots now. */
+       spin_lock(&trans->dropped_roots_lock);
+       while (!list_empty(&trans->dropped_roots)) {
+               root = list_first_entry(&trans->dropped_roots,
+                                       struct btrfs_root, root_list);
+               list_del_init(&root->root_list);
+               spin_unlock(&trans->dropped_roots_lock);
+               btrfs_drop_and_free_fs_root(fs_info, root);
+               spin_lock(&trans->dropped_roots_lock);
+       }
+       spin_unlock(&trans->dropped_roots_lock);
         up_write(&fs_info->commit_root_sem);
   }
   
@@@ -255,11 -267,13 +267,13 @@@ loop
         INIT_LIST_HEAD(&cur_trans->pending_ordered);
         INIT_LIST_HEAD(&cur_trans->dirty_bgs);
         INIT_LIST_HEAD(&cur_trans->io_bgs);
+       INIT_LIST_HEAD(&cur_trans->dropped_roots);
         mutex_init(&cur_trans->cache_write_mutex);
         cur_trans->num_dirty_bgs = 0;
         spin_lock_init(&cur_trans->dirty_bgs_lock);
         INIT_LIST_HEAD(&cur_trans->deleted_bgs);
         spin_lock_init(&cur_trans->deleted_bgs_lock);
+       spin_lock_init(&cur_trans->dropped_roots_lock);
         list_add_tail(&cur_trans->list, &fs_info->trans_list);
         extent_io_tree_init(&cur_trans->dirty_pages,
                              fs_info->btree_inode->i_mapping);
@@@ -336,6 -350,24 +350,24 @@@ static int record_root_in_trans(struct 
   }
   
   
+ void btrfs_add_dropped_root(struct btrfs_trans_handle *trans,
+                           struct btrfs_root *root)
+ {
+       struct btrfs_transaction *cur_trans = trans->transaction;
+ 
+       /* Add ourselves to the transaction dropped list */
+       spin_lock(&cur_trans->dropped_roots_lock);
+       list_add_tail(&root->root_list, &cur_trans->dropped_roots);
+       spin_unlock(&cur_trans->dropped_roots_lock);
+ 
+       /* Make sure we don't try to update the root at commit time */
+       spin_lock(&root->fs_info->fs_roots_radix_lock);
+       radix_tree_tag_clear(&root->fs_info->fs_roots_radix,
+                            (unsigned long)root->root_key.objectid,
+                            BTRFS_ROOT_TRANS_TAG);
+       spin_unlock(&root->fs_info->fs_roots_radix_lock);
+ }
+ 
   int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
                                struct btrfs_root *root)
   {
@@@ -1640,7 -1672,9 +1672,7 @@@ static void do_async_commit(struct work
          * Tell lockdep about it.
          */
         if (ac->newtrans->type & __TRANS_FREEZABLE)
- -              rwsem_acquire_read(
- -                   &ac->root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1],
- -                   0, 1, _THIS_IP_);
+ +              __sb_writers_acquired(ac->root->fs_info->sb, SB_FREEZE_FS);
   
         current->journal_info = ac->newtrans;
   
@@@ -1679,7 -1713,9 +1711,7 @@@ int btrfs_commit_transaction_async(stru
          * async commit thread will be the one to unlock it.
          */
         if (ac->newtrans->type & __TRANS_FREEZABLE)
- -              rwsem_release(
- -                      &root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1],
- -                      1, _THIS_IP_);
+ +              __sb_writers_release(root->fs_info->sb, SB_FREEZE_FS);
   
         schedule_work(&ac->work);
author	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 25 Sep 2015 19:08:41 +0000 (12:08 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 25 Sep 2015 19:08:41 +0000 (12:08 -0700)
		1	2
fs/btrfs/disk-io.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/btrfs/extent_io.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/btrfs/inode.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/btrfs/super.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/btrfs/transaction.c	patch \|	diff1 \|	diff2 \|	blob \| history