btrfs: don't pass compressed pages to btrfs_writepage_endio_finish_ordered()
[linux-block.git] / fs / btrfs / extent_io.c
index aaddd7225348191690e8a754c958a430bfe1a974..2570bbbd6a13789daaea45c1d252de4793e7a7d6 100644 (file)
@@ -241,7 +241,7 @@ int __init extent_io_init(void)
                return -ENOMEM;
 
        if (bioset_init(&btrfs_bioset, BIO_POOL_SIZE,
-                       offsetof(struct btrfs_io_bio, bio),
+                       offsetof(struct btrfs_bio, bio),
                        BIOSET_NEED_BVECS))
                goto free_buffer_cache;
 
@@ -2282,15 +2282,15 @@ int free_io_failure(struct extent_io_tree *failure_tree,
  * currently, there can be no more than two copies of every data bit. thus,
  * exactly one rewrite is required.
  */
-int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
-                     u64 length, u64 logical, struct page *page,
-                     unsigned int pg_offset, int mirror_num)
+static int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
+                            u64 length, u64 logical, struct page *page,
+                            unsigned int pg_offset, int mirror_num)
 {
        struct bio *bio;
        struct btrfs_device *dev;
        u64 map_length = 0;
        u64 sector;
-       struct btrfs_bio *bbio = NULL;
+       struct btrfs_io_context *bioc = NULL;
        int ret;
 
        ASSERT(!(fs_info->sb->s_flags & SB_RDONLY));
@@ -2299,12 +2299,12 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
        if (btrfs_is_zoned(fs_info))
                return btrfs_repair_one_zone(fs_info, logical);
 
-       bio = btrfs_io_bio_alloc(1);
+       bio = btrfs_bio_alloc(1);
        bio->bi_iter.bi_size = 0;
        map_length = length;
 
        /*
-        * Avoid races with device replace and make sure our bbio has devices
+        * Avoid races with device replace and make sure our bioc has devices
         * associated to its stripes that don't go away while we are doing the
         * read repair operation.
         */
@@ -2317,28 +2317,28 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
                 * stripe's dev and sector.
                 */
                ret = btrfs_map_block(fs_info, BTRFS_MAP_READ, logical,
-                                     &map_length, &bbio, 0);
+                                     &map_length, &bioc, 0);
                if (ret) {
                        btrfs_bio_counter_dec(fs_info);
                        bio_put(bio);
                        return -EIO;
                }
-               ASSERT(bbio->mirror_num == 1);
+               ASSERT(bioc->mirror_num == 1);
        } else {
                ret = btrfs_map_block(fs_info, BTRFS_MAP_WRITE, logical,
-                                     &map_length, &bbio, mirror_num);
+                                     &map_length, &bioc, mirror_num);
                if (ret) {
                        btrfs_bio_counter_dec(fs_info);
                        bio_put(bio);
                        return -EIO;
                }
-               BUG_ON(mirror_num != bbio->mirror_num);
+               BUG_ON(mirror_num != bioc->mirror_num);
        }
 
-       sector = bbio->stripes[bbio->mirror_num - 1].physical >> 9;
+       sector = bioc->stripes[bioc->mirror_num - 1].physical >> 9;
        bio->bi_iter.bi_sector = sector;
-       dev = bbio->stripes[bbio->mirror_num - 1].dev;
-       btrfs_put_bbio(bbio);
+       dev = bioc->stripes[bioc->mirror_num - 1].dev;
+       btrfs_put_bioc(bioc);
        if (!dev || !dev->bdev ||
            !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) {
                btrfs_bio_counter_dec(fs_info);
@@ -2618,10 +2618,10 @@ int btrfs_repair_one_sector(struct inode *inode,
        struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
        struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
        struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
-       struct btrfs_io_bio *failed_io_bio = btrfs_io_bio(failed_bio);
+       struct btrfs_bio *failed_bbio = btrfs_bio(failed_bio);
        const int icsum = bio_offset >> fs_info->sectorsize_bits;
        struct bio *repair_bio;
-       struct btrfs_io_bio *repair_io_bio;
+       struct btrfs_bio *repair_bbio;
        blk_status_t status;
 
        btrfs_debug(fs_info,
@@ -2639,24 +2639,24 @@ int btrfs_repair_one_sector(struct inode *inode,
                return -EIO;
        }
 
-       repair_bio = btrfs_io_bio_alloc(1);
-       repair_io_bio = btrfs_io_bio(repair_bio);
+       repair_bio = btrfs_bio_alloc(1);
+       repair_bbio = btrfs_bio(repair_bio);
        repair_bio->bi_opf = REQ_OP_READ;
        repair_bio->bi_end_io = failed_bio->bi_end_io;
        repair_bio->bi_iter.bi_sector = failrec->logical >> 9;
        repair_bio->bi_private = failed_bio->bi_private;
 
-       if (failed_io_bio->csum) {
+       if (failed_bbio->csum) {
                const u32 csum_size = fs_info->csum_size;
 
-               repair_io_bio->csum = repair_io_bio->csum_inline;
-               memcpy(repair_io_bio->csum,
-                      failed_io_bio->csum + csum_size * icsum, csum_size);
+               repair_bbio->csum = repair_bbio->csum_inline;
+               memcpy(repair_bbio->csum,
+                      failed_bbio->csum + csum_size * icsum, csum_size);
        }
 
        bio_add_page(repair_bio, page, failrec->len, pgoff);
-       repair_io_bio->logical = failrec->start;
-       repair_io_bio->iter = repair_bio->bi_iter;
+       repair_bbio->logical = failrec->start;
+       repair_bbio->iter = repair_bio->bi_iter;
 
        btrfs_debug(btrfs_sb(inode->i_sb),
                    "repair read error: submitting new read to mirror %d",
@@ -2976,7 +2976,7 @@ static struct extent_buffer *find_extent_buffer_readpage(
 static void end_bio_extent_readpage(struct bio *bio)
 {
        struct bio_vec *bvec;
-       struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
+       struct btrfs_bio *bbio = btrfs_bio(bio);
        struct extent_io_tree *tree, *failure_tree;
        struct processed_extent processed = { 0 };
        /*
@@ -3003,7 +3003,7 @@ static void end_bio_extent_readpage(struct bio *bio)
                btrfs_debug(fs_info,
                        "end_bio_extent_readpage: bi_sector=%llu, err=%d, mirror=%u",
                        bio->bi_iter.bi_sector, bio->bi_status,
-                       io_bio->mirror_num);
+                       bbio->mirror_num);
                tree = &BTRFS_I(inode)->io_tree;
                failure_tree = &BTRFS_I(inode)->io_failure_tree;
 
@@ -3028,14 +3028,14 @@ static void end_bio_extent_readpage(struct bio *bio)
                end = start + bvec->bv_len - 1;
                len = bvec->bv_len;
 
-               mirror = io_bio->mirror_num;
+               mirror = bbio->mirror_num;
                if (likely(uptodate)) {
                        if (is_data_inode(inode)) {
-                               error_bitmap = btrfs_verify_data_csum(io_bio,
+                               error_bitmap = btrfs_verify_data_csum(bbio,
                                                bio_offset, page, start, end);
                                ret = error_bitmap;
                        } else {
-                               ret = btrfs_validate_metadata_buffer(io_bio,
+                               ret = btrfs_validate_metadata_buffer(bbio,
                                        page, start, end, mirror);
                        }
                        if (ret)
@@ -3106,7 +3106,7 @@ readpage_ok:
        }
        /* Release the last extent */
        endio_readpage_release_extent(&processed, NULL, 0, 0, false);
-       btrfs_io_bio_free_csum(io_bio);
+       btrfs_bio_free_csum(bbio);
        bio_put(bio);
 }
 
@@ -3115,53 +3115,43 @@ readpage_ok:
  * new bio by bio_alloc_bioset as it does not initialize the bytes outside of
  * 'bio' because use of __GFP_ZERO is not supported.
  */
-static inline void btrfs_io_bio_init(struct btrfs_io_bio *btrfs_bio)
+static inline void btrfs_bio_init(struct btrfs_bio *bbio)
 {
-       memset(btrfs_bio, 0, offsetof(struct btrfs_io_bio, bio));
+       memset(bbio, 0, offsetof(struct btrfs_bio, bio));
 }
 
 /*
- * The following helpers allocate a bio. As it's backed by a bioset, it'll
- * never fail.  We're returning a bio right now but you can call btrfs_io_bio
- * for the appropriate container_of magic
+ * Allocate a btrfs_io_bio, with @nr_iovecs as maximum number of iovecs.
+ *
+ * The bio allocation is backed by bioset and does not fail.
  */
-struct bio *btrfs_bio_alloc(u64 first_byte)
+struct bio *btrfs_bio_alloc(unsigned int nr_iovecs)
 {
        struct bio *bio;
 
-       bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_VECS, &btrfs_bioset);
-       bio->bi_iter.bi_sector = first_byte >> 9;
-       btrfs_io_bio_init(btrfs_io_bio(bio));
+       ASSERT(0 < nr_iovecs && nr_iovecs <= BIO_MAX_VECS);
+       bio = bio_alloc_bioset(GFP_NOFS, nr_iovecs, &btrfs_bioset);
+       btrfs_bio_init(btrfs_bio(bio));
        return bio;
 }
 
 struct bio *btrfs_bio_clone(struct bio *bio)
 {
-       struct btrfs_io_bio *btrfs_bio;
+       struct btrfs_bio *bbio;
        struct bio *new;
 
        /* Bio allocation backed by a bioset does not fail */
        new = bio_clone_fast(bio, GFP_NOFS, &btrfs_bioset);
-       btrfs_bio = btrfs_io_bio(new);
-       btrfs_io_bio_init(btrfs_bio);
-       btrfs_bio->iter = bio->bi_iter;
+       bbio = btrfs_bio(new);
+       btrfs_bio_init(bbio);
+       bbio->iter = bio->bi_iter;
        return new;
 }
 
-struct bio *btrfs_io_bio_alloc(unsigned int nr_iovecs)
-{
-       struct bio *bio;
-
-       /* Bio allocation backed by a bioset does not fail */
-       bio = bio_alloc_bioset(GFP_NOFS, nr_iovecs, &btrfs_bioset);
-       btrfs_io_bio_init(btrfs_io_bio(bio));
-       return bio;
-}
-
 struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size)
 {
        struct bio *bio;
-       struct btrfs_io_bio *btrfs_bio;
+       struct btrfs_bio *bbio;
 
        ASSERT(offset <= UINT_MAX && size <= UINT_MAX);
 
@@ -3169,11 +3159,11 @@ struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size)
        bio = bio_clone_fast(orig, GFP_NOFS, &btrfs_bioset);
        ASSERT(bio);
 
-       btrfs_bio = btrfs_io_bio(bio);
-       btrfs_io_bio_init(btrfs_bio);
+       bbio = btrfs_bio(bio);
+       btrfs_bio_init(bbio);
 
        bio_trim(bio, offset >> 9, size >> 9);
-       btrfs_bio->iter = bio->bi_iter;
+       bbio->iter = bio->bi_iter;
        return bio;
 }
 
@@ -3307,14 +3297,15 @@ static int alloc_new_bio(struct btrfs_inode *inode,
        struct bio *bio;
        int ret;
 
+       bio = btrfs_bio_alloc(BIO_MAX_VECS);
        /*
         * For compressed page range, its disk_bytenr is always @disk_bytenr
         * passed in, no matter if we have added any range into previous bio.
         */
        if (bio_flags & EXTENT_BIO_COMPRESSED)
-               bio = btrfs_bio_alloc(disk_bytenr);
+               bio->bi_iter.bi_sector = disk_bytenr >> SECTOR_SHIFT;
        else
-               bio = btrfs_bio_alloc(disk_bytenr + offset);
+               bio->bi_iter.bi_sector = (disk_bytenr + offset) >> SECTOR_SHIFT;
        bio_ctrl->bio = bio;
        bio_ctrl->bio_flags = bio_flags;
        bio->bi_end_io = end_io_func;
@@ -3327,7 +3318,7 @@ static int alloc_new_bio(struct btrfs_inode *inode,
        if (wbc) {
                struct block_device *bdev;
 
-               bdev = fs_info->fs_devices->latest_bdev;
+               bdev = fs_info->fs_devices->latest_dev->bdev;
                bio_set_dev(bio, bdev);
                wbc_init_bio(wbc, bio);
        }
@@ -3341,7 +3332,7 @@ static int alloc_new_bio(struct btrfs_inode *inode,
                        goto error;
                }
 
-               btrfs_io_bio(bio)->device = device;
+               btrfs_bio(bio)->device = device;
        }
        return 0;
 error:
@@ -3777,10 +3768,11 @@ static void update_nr_written(struct writeback_control *wbc,
  */
 static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
                struct page *page, struct writeback_control *wbc,
-               u64 delalloc_start, unsigned long *nr_written)
+               unsigned long *nr_written)
 {
-       u64 page_end = delalloc_start + PAGE_SIZE - 1;
+       u64 page_end = page_offset(page) + PAGE_SIZE - 1;
        bool found;
+       u64 delalloc_start = page_offset(page);
        u64 delalloc_to_write = 0;
        u64 delalloc_end = 0;
        int ret;
@@ -3854,12 +3846,11 @@ static void find_next_dirty_byte(struct btrfs_fs_info *fs_info,
                                 struct page *page, u64 *start, u64 *end)
 {
        struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
+       struct btrfs_subpage_info *spi = fs_info->subpage_info;
        u64 orig_start = *start;
        /* Declare as unsigned long so we can use bitmap ops */
-       unsigned long dirty_bitmap;
        unsigned long flags;
-       int nbits = (orig_start - page_offset(page)) >> fs_info->sectorsize_bits;
-       int range_start_bit = nbits;
+       int range_start_bit;
        int range_end_bit;
 
        /*
@@ -3872,13 +3863,18 @@ static void find_next_dirty_byte(struct btrfs_fs_info *fs_info,
                return;
        }
 
+       range_start_bit = spi->dirty_offset +
+                         (offset_in_page(orig_start) >> fs_info->sectorsize_bits);
+
        /* We should have the page locked, but just in case */
        spin_lock_irqsave(&subpage->lock, flags);
-       dirty_bitmap = subpage->dirty_bitmap;
+       bitmap_next_set_region(subpage->bitmaps, &range_start_bit, &range_end_bit,
+                              spi->dirty_offset + spi->bitmap_nr_bits);
        spin_unlock_irqrestore(&subpage->lock, flags);
 
-       bitmap_next_set_region(&dirty_bitmap, &range_start_bit, &range_end_bit,
-                              BTRFS_SUBPAGE_BITMAP_SIZE);
+       range_start_bit -= spi->dirty_offset;
+       range_end_bit -= spi->dirty_offset;
+
        *start = page_offset(page) + range_start_bit * fs_info->sectorsize;
        *end = page_offset(page) + range_end_bit * fs_info->sectorsize;
 }
@@ -4054,8 +4050,8 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
                              struct extent_page_data *epd)
 {
        struct inode *inode = page->mapping->host;
-       u64 start = page_offset(page);
-       u64 page_end = start + PAGE_SIZE - 1;
+       const u64 page_start = page_offset(page);
+       const u64 page_end = page_start + PAGE_SIZE - 1;
        int ret;
        int nr = 0;
        size_t pg_offset;
@@ -4090,8 +4086,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
        }
 
        if (!epd->extent_locked) {
-               ret = writepage_delalloc(BTRFS_I(inode), page, wbc, start,
-                                        &nr_written);
+               ret = writepage_delalloc(BTRFS_I(inode), page, wbc, &nr_written);
                if (ret == 1)
                        return 0;
                if (ret)
@@ -4141,7 +4136,7 @@ done:
         * capable of that.
         */
        if (PageError(page))
-               end_extent_writepage(page, ret, start, page_end);
+               end_extent_writepage(page, ret, page_start, page_end);
        unlock_page(page);
        ASSERT(ret <= 0);
        return ret;
@@ -4155,6 +4150,9 @@ void wait_on_extent_buffer_writeback(struct extent_buffer *eb)
 
 static void end_extent_buffer_writeback(struct extent_buffer *eb)
 {
+       if (test_bit(EXTENT_BUFFER_ZONE_FINISH, &eb->bflags))
+               btrfs_zone_finish_endio(eb->fs_info, eb->start, eb->len);
+
        clear_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
        smp_mb__after_atomic();
        wake_up_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK);
@@ -4602,12 +4600,11 @@ static int submit_eb_subpage(struct page *page,
        int submitted = 0;
        u64 page_start = page_offset(page);
        int bit_start = 0;
-       const int nbits = BTRFS_SUBPAGE_BITMAP_SIZE;
        int sectors_per_node = fs_info->nodesize >> fs_info->sectorsize_bits;
        int ret;
 
        /* Lock and write each dirty extent buffers in the range */
-       while (bit_start < nbits) {
+       while (bit_start < fs_info->subpage_info->bitmap_nr_bits) {
                struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
                struct extent_buffer *eb;
                unsigned long flags;
@@ -4623,7 +4620,8 @@ static int submit_eb_subpage(struct page *page,
                        break;
                }
                spin_lock_irqsave(&subpage->lock, flags);
-               if (!((1 << bit_start) & subpage->dirty_bitmap)) {
+               if (!test_bit(bit_start + fs_info->subpage_info->dirty_offset,
+                             subpage->bitmaps)) {
                        spin_unlock_irqrestore(&subpage->lock, flags);
                        spin_unlock(&page->mapping->private_lock);
                        bit_start++;
@@ -4756,8 +4754,13 @@ static int submit_eb_page(struct page *page, struct writeback_control *wbc,
                free_extent_buffer(eb);
                return ret;
        }
-       if (cache)
+       if (cache) {
+               /* Impiles write in zoned mode */
                btrfs_put_block_group(cache);
+               /* Mark the last eb in a block group */
+               if (cache->seq_zone && eb->start + eb->len == cache->zone_capacity)
+                       set_bit(EXTENT_BUFFER_ZONE_FINISH, &eb->bflags);
+       }
        ret = write_one_eb(eb, wbc, epd);
        free_extent_buffer(eb);
        if (ret < 0)
@@ -5120,6 +5123,9 @@ int extent_write_locked_range(struct inode *inode, u64 start, u64 end,
 int extent_writepages(struct address_space *mapping,
                      struct writeback_control *wbc)
 {
+       struct inode *inode = mapping->host;
+       const bool data_reloc = btrfs_is_data_reloc_root(BTRFS_I(inode)->root);
+       const bool zoned = btrfs_is_zoned(BTRFS_I(inode)->root->fs_info);
        int ret = 0;
        struct extent_page_data epd = {
                .bio_ctrl = { 0 },
@@ -5127,7 +5133,15 @@ int extent_writepages(struct address_space *mapping,
                .sync_io = wbc->sync_mode == WB_SYNC_ALL,
        };
 
+       /*
+        * Allow only a single thread to do the reloc work in zoned mode to
+        * protect the write pointer updates.
+        */
+       if (data_reloc && zoned)
+               btrfs_inode_lock(inode, 0);
        ret = extent_write_cache_pages(mapping, wbc, &epd);
+       if (data_reloc && zoned)
+               btrfs_inode_unlock(inode, 0);
        ASSERT(ret <= 0);
        if (ret < 0) {
                end_write_bio(&epd, ret);
@@ -6137,13 +6151,15 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
                 * page, but it may change in the future for 16K page size
                 * support, so we still preallocate the memory in the loop.
                 */
-               ret = btrfs_alloc_subpage(fs_info, &prealloc,
-                                         BTRFS_SUBPAGE_METADATA);
-               if (ret < 0) {
-                       unlock_page(p);
-                       put_page(p);
-                       exists = ERR_PTR(ret);
-                       goto free_eb;
+               if (fs_info->sectorsize < PAGE_SIZE) {
+                       prealloc = btrfs_alloc_subpage(fs_info, BTRFS_SUBPAGE_METADATA);
+                       if (IS_ERR(prealloc)) {
+                               ret = PTR_ERR(prealloc);
+                               unlock_page(p);
+                               put_page(p);
+                               exists = ERR_PTR(ret);
+                               goto free_eb;
+                       }
                }
 
                spin_lock(&mapping->private_lock);
@@ -7167,32 +7183,41 @@ void memmove_extent_buffer(const struct extent_buffer *dst,
        }
 }
 
+#define GANG_LOOKUP_SIZE       16
 static struct extent_buffer *get_next_extent_buffer(
                struct btrfs_fs_info *fs_info, struct page *page, u64 bytenr)
 {
-       struct extent_buffer *gang[BTRFS_SUBPAGE_BITMAP_SIZE];
+       struct extent_buffer *gang[GANG_LOOKUP_SIZE];
        struct extent_buffer *found = NULL;
        u64 page_start = page_offset(page);
-       int ret;
-       int i;
+       u64 cur = page_start;
 
        ASSERT(in_range(bytenr, page_start, PAGE_SIZE));
-       ASSERT(PAGE_SIZE / fs_info->nodesize <= BTRFS_SUBPAGE_BITMAP_SIZE);
        lockdep_assert_held(&fs_info->buffer_lock);
 
-       ret = radix_tree_gang_lookup(&fs_info->buffer_radix, (void **)gang,
-                       bytenr >> fs_info->sectorsize_bits,
-                       PAGE_SIZE / fs_info->nodesize);
-       for (i = 0; i < ret; i++) {
-               /* Already beyond page end */
-               if (gang[i]->start >= page_start + PAGE_SIZE)
-                       break;
-               /* Found one */
-               if (gang[i]->start >= bytenr) {
-                       found = gang[i];
-                       break;
+       while (cur < page_start + PAGE_SIZE) {
+               int ret;
+               int i;
+
+               ret = radix_tree_gang_lookup(&fs_info->buffer_radix,
+                               (void **)gang, cur >> fs_info->sectorsize_bits,
+                               min_t(unsigned int, GANG_LOOKUP_SIZE,
+                                     PAGE_SIZE / fs_info->nodesize));
+               if (ret == 0)
+                       goto out;
+               for (i = 0; i < ret; i++) {
+                       /* Already beyond page end */
+                       if (gang[i]->start >= page_start + PAGE_SIZE)
+                               goto out;
+                       /* Found one */
+                       if (gang[i]->start >= bytenr) {
+                               found = gang[i];
+                               goto out;
+                       }
                }
+               cur = gang[ret - 1]->start + gang[ret - 1]->len;
        }
+out:
        return found;
 }