fs/f2fs/data.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * fs/f2fs/data.c
   4  *
   5  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
   6  *             http://www.samsung.com/
   7  */
   8 #include <linux/fs.h>
   9 #include <linux/f2fs_fs.h>
  10 #include <linux/buffer_head.h>
  11 #include <linux/sched/mm.h>
  12 #include <linux/mpage.h>
  13 #include <linux/writeback.h>
  14 #include <linux/pagevec.h>
  15 #include <linux/blkdev.h>
  16 #include <linux/bio.h>
  17 #include <linux/blk-crypto.h>
  18 #include <linux/swap.h>
  19 #include <linux/prefetch.h>
  20 #include <linux/uio.h>
  21 #include <linux/sched/signal.h>
  22 #include <linux/fiemap.h>
  23 #include <linux/iomap.h>
  24
  25 #include "f2fs.h"
  26 #include "node.h"
  27 #include "segment.h"
  28 #include "iostat.h"
  29 #include <trace/events/f2fs.h>
  30
  31 #define NUM_PREALLOC_POST_READ_CTXS     128
  32
  33 static struct kmem_cache *bio_post_read_ctx_cache;
  34 static struct kmem_cache *bio_entry_slab;
  35 static mempool_t *bio_post_read_ctx_pool;
  36 static struct bio_set f2fs_bioset;
  37
  38 #define F2FS_BIO_POOL_SIZE      NR_CURSEG_TYPE
  39
  40 int __init f2fs_init_bioset(void)
  41 {
  42         if (bioset_init(&f2fs_bioset, F2FS_BIO_POOL_SIZE,
  43                                         0, BIOSET_NEED_BVECS))
  44                 return -ENOMEM;
  45         return 0;
  46 }
  47
  48 void f2fs_destroy_bioset(void)
  49 {
  50         bioset_exit(&f2fs_bioset);
  51 }
  52
  53 static bool __is_cp_guaranteed(struct page *page)
  54 {
  55         struct address_space *mapping = page->mapping;
  56         struct inode *inode;
  57         struct f2fs_sb_info *sbi;
  58
  59         if (!mapping)
  60                 return false;
  61
  62         inode = mapping->host;
  63         sbi = F2FS_I_SB(inode);
  64
  65         if (inode->i_ino == F2FS_META_INO(sbi) ||
  66                         inode->i_ino == F2FS_NODE_INO(sbi) ||
  67                         S_ISDIR(inode->i_mode))
  68                 return true;
  69
  70         if (f2fs_is_compressed_page(page))
  71                 return false;
  72         if ((S_ISREG(inode->i_mode) && IS_NOQUOTA(inode)) ||
  73                         page_private_gcing(page))
  74                 return true;
  75         return false;
  76 }
  77
  78 static enum count_type __read_io_type(struct page *page)
  79 {
  80         struct address_space *mapping = page_file_mapping(page);
  81
  82         if (mapping) {
  83                 struct inode *inode = mapping->host;
  84                 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  85
  86                 if (inode->i_ino == F2FS_META_INO(sbi))
  87                         return F2FS_RD_META;
  88
  89                 if (inode->i_ino == F2FS_NODE_INO(sbi))
  90                         return F2FS_RD_NODE;
  91         }
  92         return F2FS_RD_DATA;
  93 }
  94
  95 /* postprocessing steps for read bios */
  96 enum bio_post_read_step {
  97 #ifdef CONFIG_FS_ENCRYPTION
  98         STEP_DECRYPT    = 1 << 0,
  99 #else
 100         STEP_DECRYPT    = 0,    /* compile out the decryption-related code */
 101 #endif
 102 #ifdef CONFIG_F2FS_FS_COMPRESSION
 103         STEP_DECOMPRESS = 1 << 1,
 104 #else
 105         STEP_DECOMPRESS = 0,    /* compile out the decompression-related code */
 106 #endif
 107 #ifdef CONFIG_FS_VERITY
 108         STEP_VERITY     = 1 << 2,
 109 #else
 110         STEP_VERITY     = 0,    /* compile out the verity-related code */
 111 #endif
 112 };
 113
 114 struct bio_post_read_ctx {
 115         struct bio *bio;
 116         struct f2fs_sb_info *sbi;
 117         struct work_struct work;
 118         unsigned int enabled_steps;
 119         block_t fs_blkaddr;
 120 };
 121
 122 static void f2fs_finish_read_bio(struct bio *bio, bool in_task)
 123 {
 124         struct bio_vec *bv;
 125         struct bvec_iter_all iter_all;
 126
 127         /*
 128          * Update and unlock the bio's pagecache pages, and put the
 129          * decompression context for any compressed pages.
 130          */
 131         bio_for_each_segment_all(bv, bio, iter_all) {
 132                 struct page *page = bv->bv_page;
 133
 134                 if (f2fs_is_compressed_page(page)) {
 135                         if (bio->bi_status)
 136                                 f2fs_end_read_compressed_page(page, true, 0,
 137                                                         in_task);
 138                         f2fs_put_page_dic(page, in_task);
 139                         continue;
 140                 }
 141
 142                 /* PG_error was set if verity failed. */
 143                 if (bio->bi_status || PageError(page)) {
 144                         ClearPageUptodate(page);
 145                         /* will re-read again later */
 146                         ClearPageError(page);
 147                 } else {
 148                         SetPageUptodate(page);
 149                 }
 150                 dec_page_count(F2FS_P_SB(page), __read_io_type(page));
 151                 unlock_page(page);
 152         }
 153
 154         if (bio->bi_private)
 155                 mempool_free(bio->bi_private, bio_post_read_ctx_pool);
 156         bio_put(bio);
 157 }
 158
 159 static void f2fs_verify_bio(struct work_struct *work)
 160 {
 161         struct bio_post_read_ctx *ctx =
 162                 container_of(work, struct bio_post_read_ctx, work);
 163         struct bio *bio = ctx->bio;
 164         bool may_have_compressed_pages = (ctx->enabled_steps & STEP_DECOMPRESS);
 165
 166         /*
 167          * fsverity_verify_bio() may call readahead() again, and while verity
 168          * will be disabled for this, decryption and/or decompression may still
 169          * be needed, resulting in another bio_post_read_ctx being allocated.
 170          * So to prevent deadlocks we need to release the current ctx to the
 171          * mempool first.  This assumes that verity is the last post-read step.
 172          */
 173         mempool_free(ctx, bio_post_read_ctx_pool);
 174         bio->bi_private = NULL;
 175
 176         /*
 177          * Verify the bio's pages with fs-verity.  Exclude compressed pages,
 178          * as those were handled separately by f2fs_end_read_compressed_page().
 179          */
 180         if (may_have_compressed_pages) {
 181                 struct bio_vec *bv;
 182                 struct bvec_iter_all iter_all;
 183
 184                 bio_for_each_segment_all(bv, bio, iter_all) {
 185                         struct page *page = bv->bv_page;
 186
 187                         if (!f2fs_is_compressed_page(page) &&
 188                             !fsverity_verify_page(page))
 189                                 SetPageError(page);
 190                 }
 191         } else {
 192                 fsverity_verify_bio(bio);
 193         }
 194
 195         f2fs_finish_read_bio(bio, true);
 196 }
 197
 198 /*
 199  * If the bio's data needs to be verified with fs-verity, then enqueue the
 200  * verity work for the bio.  Otherwise finish the bio now.
 201  *
 202  * Note that to avoid deadlocks, the verity work can't be done on the
 203  * decryption/decompression workqueue.  This is because verifying the data pages
 204  * can involve reading verity metadata pages from the file, and these verity
 205  * metadata pages may be encrypted and/or compressed.
 206  */
 207 static void f2fs_verify_and_finish_bio(struct bio *bio, bool in_task)
 208 {
 209         struct bio_post_read_ctx *ctx = bio->bi_private;
 210
 211         if (ctx && (ctx->enabled_steps & STEP_VERITY)) {
 212                 INIT_WORK(&ctx->work, f2fs_verify_bio);
 213                 fsverity_enqueue_verify_work(&ctx->work);
 214         } else {
 215                 f2fs_finish_read_bio(bio, in_task);
 216         }
 217 }
 218
 219 /*
 220  * Handle STEP_DECOMPRESS by decompressing any compressed clusters whose last
 221  * remaining page was read by @ctx->bio.
 222  *
 223  * Note that a bio may span clusters (even a mix of compressed and uncompressed
 224  * clusters) or be for just part of a cluster.  STEP_DECOMPRESS just indicates
 225  * that the bio includes at least one compressed page.  The actual decompression
 226  * is done on a per-cluster basis, not a per-bio basis.
 227  */
 228 static void f2fs_handle_step_decompress(struct bio_post_read_ctx *ctx,
 229                 bool in_task)
 230 {
 231         struct bio_vec *bv;
 232         struct bvec_iter_all iter_all;
 233         bool all_compressed = true;
 234         block_t blkaddr = ctx->fs_blkaddr;
 235
 236         bio_for_each_segment_all(bv, ctx->bio, iter_all) {
 237                 struct page *page = bv->bv_page;
 238
 239                 if (f2fs_is_compressed_page(page))
 240                         f2fs_end_read_compressed_page(page, false, blkaddr,
 241                                                       in_task);
 242                 else
 243                         all_compressed = false;
 244
 245                 blkaddr++;
 246         }
 247
 248         /*
 249          * Optimization: if all the bio's pages are compressed, then scheduling
 250          * the per-bio verity work is unnecessary, as verity will be fully
 251          * handled at the compression cluster level.
 252          */
 253         if (all_compressed)
 254                 ctx->enabled_steps &= ~STEP_VERITY;
 255 }
 256
 257 static void f2fs_post_read_work(struct work_struct *work)
 258 {
 259         struct bio_post_read_ctx *ctx =
 260                 container_of(work, struct bio_post_read_ctx, work);
 261         struct bio *bio = ctx->bio;
 262
 263         if ((ctx->enabled_steps & STEP_DECRYPT) && !fscrypt_decrypt_bio(bio)) {
 264                 f2fs_finish_read_bio(bio, true);
 265                 return;
 266         }
 267
 268         if (ctx->enabled_steps & STEP_DECOMPRESS)
 269                 f2fs_handle_step_decompress(ctx, true);
 270
 271         f2fs_verify_and_finish_bio(bio, true);
 272 }
 273
 274 static void f2fs_read_end_io(struct bio *bio)
 275 {
 276         struct f2fs_sb_info *sbi = F2FS_P_SB(bio_first_page_all(bio));
 277         struct bio_post_read_ctx *ctx;
 278         bool intask = in_task();
 279
 280         iostat_update_and_unbind_ctx(bio, 0);
 281         ctx = bio->bi_private;
 282
 283         if (time_to_inject(sbi, FAULT_READ_IO)) {
 284                 f2fs_show_injection_info(sbi, FAULT_READ_IO);
 285                 bio->bi_status = BLK_STS_IOERR;
 286         }
 287
 288         if (bio->bi_status) {
 289                 f2fs_finish_read_bio(bio, intask);
 290                 return;
 291         }
 292
 293         if (ctx) {
 294                 unsigned int enabled_steps = ctx->enabled_steps &
 295                                         (STEP_DECRYPT | STEP_DECOMPRESS);
 296
 297                 /*
 298                  * If we have only decompression step between decompression and
 299                  * decrypt, we don't need post processing for this.
 300                  */
 301                 if (enabled_steps == STEP_DECOMPRESS &&
 302                                 !f2fs_low_mem_mode(sbi)) {
 303                         f2fs_handle_step_decompress(ctx, intask);
 304                 } else if (enabled_steps) {
 305                         INIT_WORK(&ctx->work, f2fs_post_read_work);
 306                         queue_work(ctx->sbi->post_read_wq, &ctx->work);
 307                         return;
 308                 }
 309         }
 310
 311         f2fs_verify_and_finish_bio(bio, intask);
 312 }
 313
 314 static void f2fs_write_end_io(struct bio *bio)
 315 {
 316         struct f2fs_sb_info *sbi;
 317         struct bio_vec *bvec;
 318         struct bvec_iter_all iter_all;
 319
 320         iostat_update_and_unbind_ctx(bio, 1);
 321         sbi = bio->bi_private;
 322
 323         if (time_to_inject(sbi, FAULT_WRITE_IO)) {
 324                 f2fs_show_injection_info(sbi, FAULT_WRITE_IO);
 325                 bio->bi_status = BLK_STS_IOERR;
 326         }
 327
 328         bio_for_each_segment_all(bvec, bio, iter_all) {
 329                 struct page *page = bvec->bv_page;
 330                 enum count_type type = WB_DATA_TYPE(page);
 331
 332                 if (page_private_dummy(page)) {
 333                         clear_page_private_dummy(page);
 334                         unlock_page(page);
 335                         mempool_free(page, sbi->write_io_dummy);
 336
 337                         if (unlikely(bio->bi_status))
 338                                 f2fs_stop_checkpoint(sbi, true,
 339                                                 STOP_CP_REASON_WRITE_FAIL);
 340                         continue;
 341                 }
 342
 343                 fscrypt_finalize_bounce_page(&page);
 344
 345 #ifdef CONFIG_F2FS_FS_COMPRESSION
 346                 if (f2fs_is_compressed_page(page)) {
 347                         f2fs_compress_write_end_io(bio, page);
 348                         continue;
 349                 }
 350 #endif
 351
 352                 if (unlikely(bio->bi_status)) {
 353                         mapping_set_error(page->mapping, -EIO);
 354                         if (type == F2FS_WB_CP_DATA)
 355                                 f2fs_stop_checkpoint(sbi, true,
 356                                                 STOP_CP_REASON_WRITE_FAIL);
 357                 }
 358
 359                 f2fs_bug_on(sbi, page->mapping == NODE_MAPPING(sbi) &&
 360                                         page->index != nid_of_node(page));
 361
 362                 dec_page_count(sbi, type);
 363                 if (f2fs_in_warm_node_list(sbi, page))
 364                         f2fs_del_fsync_node_entry(sbi, page);
 365                 clear_page_private_gcing(page);
 366                 end_page_writeback(page);
 367         }
 368         if (!get_pages(sbi, F2FS_WB_CP_DATA) &&
 369                                 wq_has_sleeper(&sbi->cp_wait))
 370                 wake_up(&sbi->cp_wait);
 371
 372         bio_put(bio);
 373 }
 374
 375 struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi,
 376                 block_t blk_addr, sector_t *sector)
 377 {
 378         struct block_device *bdev = sbi->sb->s_bdev;
 379         int i;
 380
 381         if (f2fs_is_multi_device(sbi)) {
 382                 for (i = 0; i < sbi->s_ndevs; i++) {
 383                         if (FDEV(i).start_blk <= blk_addr &&
 384                             FDEV(i).end_blk >= blk_addr) {
 385                                 blk_addr -= FDEV(i).start_blk;
 386                                 bdev = FDEV(i).bdev;
 387                                 break;
 388                         }
 389                 }
 390         }
 391
 392         if (sector)
 393                 *sector = SECTOR_FROM_BLOCK(blk_addr);
 394         return bdev;
 395 }
 396
 397 int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr)
 398 {
 399         int i;
 400
 401         if (!f2fs_is_multi_device(sbi))
 402                 return 0;
 403
 404         for (i = 0; i < sbi->s_ndevs; i++)
 405                 if (FDEV(i).start_blk <= blkaddr && FDEV(i).end_blk >= blkaddr)
 406                         return i;
 407         return 0;
 408 }
 409
 410 static blk_opf_t f2fs_io_flags(struct f2fs_io_info *fio)
 411 {
 412         unsigned int temp_mask = (1 << NR_TEMP_TYPE) - 1;
 413         unsigned int fua_flag, meta_flag, io_flag;
 414         blk_opf_t op_flags = 0;
 415
 416         if (fio->op != REQ_OP_WRITE)
 417                 return 0;
 418         if (fio->type == DATA)
 419                 io_flag = fio->sbi->data_io_flag;
 420         else if (fio->type == NODE)
 421                 io_flag = fio->sbi->node_io_flag;
 422         else
 423                 return 0;
 424
 425         fua_flag = io_flag & temp_mask;
 426         meta_flag = (io_flag >> NR_TEMP_TYPE) & temp_mask;
 427
 428         /*
 429          * data/node io flag bits per temp:
 430          *      REQ_META     |      REQ_FUA      |
 431          *    5 |    4 |   3 |    2 |    1 |   0 |
 432          * Cold | Warm | Hot | Cold | Warm | Hot |
 433          */
 434         if ((1 << fio->temp) & meta_flag)
 435                 op_flags |= REQ_META;
 436         if ((1 << fio->temp) & fua_flag)
 437                 op_flags |= REQ_FUA;
 438         return op_flags;
 439 }
 440
 441 static struct bio *__bio_alloc(struct f2fs_io_info *fio, int npages)
 442 {
 443         struct f2fs_sb_info *sbi = fio->sbi;
 444         struct block_device *bdev;
 445         sector_t sector;
 446         struct bio *bio;
 447
 448         bdev = f2fs_target_device(sbi, fio->new_blkaddr, &sector);
 449         bio = bio_alloc_bioset(bdev, npages,
 450                                 fio->op | fio->op_flags | f2fs_io_flags(fio),
 451                                 GFP_NOIO, &f2fs_bioset);
 452         bio->bi_iter.bi_sector = sector;
 453         if (is_read_io(fio->op)) {
 454                 bio->bi_end_io = f2fs_read_end_io;
 455                 bio->bi_private = NULL;
 456         } else {
 457                 bio->bi_end_io = f2fs_write_end_io;
 458                 bio->bi_private = sbi;
 459         }
 460         iostat_alloc_and_bind_ctx(sbi, bio, NULL);
 461
 462         if (fio->io_wbc)
 463                 wbc_init_bio(fio->io_wbc, bio);
 464
 465         return bio;
 466 }
 467
 468 static void f2fs_set_bio_crypt_ctx(struct bio *bio, const struct inode *inode,
 469                                   pgoff_t first_idx,
 470                                   const struct f2fs_io_info *fio,
 471                                   gfp_t gfp_mask)
 472 {
 473         /*
 474          * The f2fs garbage collector sets ->encrypted_page when it wants to
 475          * read/write raw data without encryption.
 476          */
 477         if (!fio || !fio->encrypted_page)
 478                 fscrypt_set_bio_crypt_ctx(bio, inode, first_idx, gfp_mask);
 479 }
 480
 481 static bool f2fs_crypt_mergeable_bio(struct bio *bio, const struct inode *inode,
 482                                      pgoff_t next_idx,
 483                                      const struct f2fs_io_info *fio)
 484 {
 485         /*
 486          * The f2fs garbage collector sets ->encrypted_page when it wants to
 487          * read/write raw data without encryption.
 488          */
 489         if (fio && fio->encrypted_page)
 490                 return !bio_has_crypt_ctx(bio);
 491
 492         return fscrypt_mergeable_bio(bio, inode, next_idx);
 493 }
 494
 495 static inline void __submit_bio(struct f2fs_sb_info *sbi,
 496                                 struct bio *bio, enum page_type type)
 497 {
 498         if (!is_read_io(bio_op(bio))) {
 499                 unsigned int start;
 500
 501                 if (type != DATA && type != NODE)
 502                         goto submit_io;
 503
 504                 if (f2fs_lfs_mode(sbi) && current->plug)
 505                         blk_finish_plug(current->plug);
 506
 507                 if (!F2FS_IO_ALIGNED(sbi))
 508                         goto submit_io;
 509
 510                 start = bio->bi_iter.bi_size >> F2FS_BLKSIZE_BITS;
 511                 start %= F2FS_IO_SIZE(sbi);
 512
 513                 if (start == 0)
 514                         goto submit_io;
 515
 516                 /* fill dummy pages */
 517                 for (; start < F2FS_IO_SIZE(sbi); start++) {
 518                         struct page *page =
 519                                 mempool_alloc(sbi->write_io_dummy,
 520                                               GFP_NOIO | __GFP_NOFAIL);
 521                         f2fs_bug_on(sbi, !page);
 522
 523                         lock_page(page);
 524
 525                         zero_user_segment(page, 0, PAGE_SIZE);
 526                         set_page_private_dummy(page);
 527
 528                         if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE)
 529                                 f2fs_bug_on(sbi, 1);
 530                 }
 531                 /*
 532                  * In the NODE case, we lose next block address chain. So, we
 533                  * need to do checkpoint in f2fs_sync_file.
 534                  */
 535                 if (type == NODE)
 536                         set_sbi_flag(sbi, SBI_NEED_CP);
 537         }
 538 submit_io:
 539         if (is_read_io(bio_op(bio)))
 540                 trace_f2fs_submit_read_bio(sbi->sb, type, bio);
 541         else
 542                 trace_f2fs_submit_write_bio(sbi->sb, type, bio);
 543
 544         iostat_update_submit_ctx(bio, type);
 545         submit_bio(bio);
 546 }
 547
 548 void f2fs_submit_bio(struct f2fs_sb_info *sbi,
 549                                 struct bio *bio, enum page_type type)
 550 {
 551         __submit_bio(sbi, bio, type);
 552 }
 553
 554 static void __submit_merged_bio(struct f2fs_bio_info *io)
 555 {
 556         struct f2fs_io_info *fio = &io->fio;
 557
 558         if (!io->bio)
 559                 return;
 560
 561         if (is_read_io(fio->op))
 562                 trace_f2fs_prepare_read_bio(io->sbi->sb, fio->type, io->bio);
 563         else
 564                 trace_f2fs_prepare_write_bio(io->sbi->sb, fio->type, io->bio);
 565
 566         __submit_bio(io->sbi, io->bio, fio->type);
 567         io->bio = NULL;
 568 }
 569
 570 static bool __has_merged_page(struct bio *bio, struct inode *inode,
 571                                                 struct page *page, nid_t ino)
 572 {
 573         struct bio_vec *bvec;
 574         struct bvec_iter_all iter_all;
 575
 576         if (!bio)
 577                 return false;
 578
 579         if (!inode && !page && !ino)
 580                 return true;
 581
 582         bio_for_each_segment_all(bvec, bio, iter_all) {
 583                 struct page *target = bvec->bv_page;
 584
 585                 if (fscrypt_is_bounce_page(target)) {
 586                         target = fscrypt_pagecache_page(target);
 587                         if (IS_ERR(target))
 588                                 continue;
 589                 }
 590                 if (f2fs_is_compressed_page(target)) {
 591                         target = f2fs_compress_control_page(target);
 592                         if (IS_ERR(target))
 593                                 continue;
 594                 }
 595
 596                 if (inode && inode == target->mapping->host)
 597                         return true;
 598                 if (page && page == target)
 599                         return true;
 600                 if (ino && ino == ino_of_node(target))
 601                         return true;
 602         }
 603
 604         return false;
 605 }
 606
 607 int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi)
 608 {
 609         int i;
 610
 611         for (i = 0; i < NR_PAGE_TYPE; i++) {
 612                 int n = (i == META) ? 1 : NR_TEMP_TYPE;
 613                 int j;
 614
 615                 sbi->write_io[i] = f2fs_kmalloc(sbi,
 616                                 array_size(n, sizeof(struct f2fs_bio_info)),
 617                                 GFP_KERNEL);
 618                 if (!sbi->write_io[i])
 619                         return -ENOMEM;
 620
 621                 for (j = HOT; j < n; j++) {
 622                         init_f2fs_rwsem(&sbi->write_io[i][j].io_rwsem);
 623                         sbi->write_io[i][j].sbi = sbi;
 624                         sbi->write_io[i][j].bio = NULL;
 625                         spin_lock_init(&sbi->write_io[i][j].io_lock);
 626                         INIT_LIST_HEAD(&sbi->write_io[i][j].io_list);
 627                         INIT_LIST_HEAD(&sbi->write_io[i][j].bio_list);
 628                         init_f2fs_rwsem(&sbi->write_io[i][j].bio_list_lock);
 629                 }
 630         }
 631
 632         return 0;
 633 }
 634
 635 static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi,
 636                                 enum page_type type, enum temp_type temp)
 637 {
 638         enum page_type btype = PAGE_TYPE_OF_BIO(type);
 639         struct f2fs_bio_info *io = sbi->write_io[btype] + temp;
 640
 641         f2fs_down_write(&io->io_rwsem);
 642
 643         /* change META to META_FLUSH in the checkpoint procedure */
 644         if (type >= META_FLUSH) {
 645                 io->fio.type = META_FLUSH;
 646                 io->bio->bi_opf |= REQ_META | REQ_PRIO | REQ_SYNC;
 647                 if (!test_opt(sbi, NOBARRIER))
 648                         io->bio->bi_opf |= REQ_PREFLUSH | REQ_FUA;
 649         }
 650         __submit_merged_bio(io);
 651         f2fs_up_write(&io->io_rwsem);
 652 }
 653
 654 static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
 655                                 struct inode *inode, struct page *page,
 656                                 nid_t ino, enum page_type type, bool force)
 657 {
 658         enum temp_type temp;
 659         bool ret = true;
 660
 661         for (temp = HOT; temp < NR_TEMP_TYPE; temp++) {
 662                 if (!force)     {
 663                         enum page_type btype = PAGE_TYPE_OF_BIO(type);
 664                         struct f2fs_bio_info *io = sbi->write_io[btype] + temp;
 665
 666                         f2fs_down_read(&io->io_rwsem);
 667                         ret = __has_merged_page(io->bio, inode, page, ino);
 668                         f2fs_up_read(&io->io_rwsem);
 669                 }
 670                 if (ret)
 671                         __f2fs_submit_merged_write(sbi, type, temp);
 672
 673                 /* TODO: use HOT temp only for meta pages now. */
 674                 if (type >= META)
 675                         break;
 676         }
 677 }
 678
 679 void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type)
 680 {
 681         __submit_merged_write_cond(sbi, NULL, NULL, 0, type, true);
 682 }
 683
 684 void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
 685                                 struct inode *inode, struct page *page,
 686                                 nid_t ino, enum page_type type)
 687 {
 688         __submit_merged_write_cond(sbi, inode, page, ino, type, false);
 689 }
 690
 691 void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi)
 692 {
 693         f2fs_submit_merged_write(sbi, DATA);
 694         f2fs_submit_merged_write(sbi, NODE);
 695         f2fs_submit_merged_write(sbi, META);
 696 }
 697
 698 /*
 699  * Fill the locked page with data located in the block address.
 700  * A caller needs to unlock the page on failure.
 701  */
 702 int f2fs_submit_page_bio(struct f2fs_io_info *fio)
 703 {
 704         struct bio *bio;
 705         struct page *page = fio->encrypted_page ?
 706                         fio->encrypted_page : fio->page;
 707
 708         if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr,
 709                         fio->is_por ? META_POR : (__is_meta_io(fio) ?
 710                         META_GENERIC : DATA_GENERIC_ENHANCE))) {
 711                 f2fs_handle_error(fio->sbi, ERROR_INVALID_BLKADDR);
 712                 return -EFSCORRUPTED;
 713         }
 714
 715         trace_f2fs_submit_page_bio(page, fio);
 716
 717         /* Allocate a new bio */
 718         bio = __bio_alloc(fio, 1);
 719
 720         f2fs_set_bio_crypt_ctx(bio, fio->page->mapping->host,
 721                                fio->page->index, fio, GFP_NOIO);
 722
 723         if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
 724                 bio_put(bio);
 725                 return -EFAULT;
 726         }
 727
 728         if (fio->io_wbc && !is_read_io(fio->op))
 729                 wbc_account_cgroup_owner(fio->io_wbc, page, PAGE_SIZE);
 730
 731         inc_page_count(fio->sbi, is_read_io(fio->op) ?
 732                         __read_io_type(page) : WB_DATA_TYPE(fio->page));
 733
 734         __submit_bio(fio->sbi, bio, fio->type);
 735         return 0;
 736 }
 737
 738 static bool page_is_mergeable(struct f2fs_sb_info *sbi, struct bio *bio,
 739                                 block_t last_blkaddr, block_t cur_blkaddr)
 740 {
 741         if (unlikely(sbi->max_io_bytes &&
 742                         bio->bi_iter.bi_size >= sbi->max_io_bytes))
 743                 return false;
 744         if (last_blkaddr + 1 != cur_blkaddr)
 745                 return false;
 746         return bio->bi_bdev == f2fs_target_device(sbi, cur_blkaddr, NULL);
 747 }
 748
 749 static bool io_type_is_mergeable(struct f2fs_bio_info *io,
 750                                                 struct f2fs_io_info *fio)
 751 {
 752         if (io->fio.op != fio->op)
 753                 return false;
 754         return io->fio.op_flags == fio->op_flags;
 755 }
 756
 757 static bool io_is_mergeable(struct f2fs_sb_info *sbi, struct bio *bio,
 758                                         struct f2fs_bio_info *io,
 759                                         struct f2fs_io_info *fio,
 760                                         block_t last_blkaddr,
 761                                         block_t cur_blkaddr)
 762 {
 763         if (F2FS_IO_ALIGNED(sbi) && (fio->type == DATA || fio->type == NODE)) {
 764                 unsigned int filled_blocks =
 765                                 F2FS_BYTES_TO_BLK(bio->bi_iter.bi_size);
 766                 unsigned int io_size = F2FS_IO_SIZE(sbi);
 767                 unsigned int left_vecs = bio->bi_max_vecs - bio->bi_vcnt;
 768
 769                 /* IOs in bio is aligned and left space of vectors is not enough */
 770                 if (!(filled_blocks % io_size) && left_vecs < io_size)
 771                         return false;
 772         }
 773         if (!page_is_mergeable(sbi, bio, last_blkaddr, cur_blkaddr))
 774                 return false;
 775         return io_type_is_mergeable(io, fio);
 776 }
 777
 778 static void add_bio_entry(struct f2fs_sb_info *sbi, struct bio *bio,
 779                                 struct page *page, enum temp_type temp)
 780 {
 781         struct f2fs_bio_info *io = sbi->write_io[DATA] + temp;
 782         struct bio_entry *be;
 783
 784         be = f2fs_kmem_cache_alloc(bio_entry_slab, GFP_NOFS, true, NULL);
 785         be->bio = bio;
 786         bio_get(bio);
 787
 788         if (bio_add_page(bio, page, PAGE_SIZE, 0) != PAGE_SIZE)
 789                 f2fs_bug_on(sbi, 1);
 790
 791         f2fs_down_write(&io->bio_list_lock);
 792         list_add_tail(&be->list, &io->bio_list);
 793         f2fs_up_write(&io->bio_list_lock);
 794 }
 795
 796 static void del_bio_entry(struct bio_entry *be)
 797 {
 798         list_del(&be->list);
 799         kmem_cache_free(bio_entry_slab, be);
 800 }
 801
 802 static int add_ipu_page(struct f2fs_io_info *fio, struct bio **bio,
 803                                                         struct page *page)
 804 {
 805         struct f2fs_sb_info *sbi = fio->sbi;
 806         enum temp_type temp;
 807         bool found = false;
 808         int ret = -EAGAIN;
 809
 810         for (temp = HOT; temp < NR_TEMP_TYPE && !found; temp++) {
 811                 struct f2fs_bio_info *io = sbi->write_io[DATA] + temp;
 812                 struct list_head *head = &io->bio_list;
 813                 struct bio_entry *be;
 814
 815                 f2fs_down_write(&io->bio_list_lock);
 816                 list_for_each_entry(be, head, list) {
 817                         if (be->bio != *bio)
 818                                 continue;
 819
 820                         found = true;
 821
 822                         f2fs_bug_on(sbi, !page_is_mergeable(sbi, *bio,
 823                                                             *fio->last_block,
 824                                                             fio->new_blkaddr));
 825                         if (f2fs_crypt_mergeable_bio(*bio,
 826                                         fio->page->mapping->host,
 827                                         fio->page->index, fio) &&
 828                             bio_add_page(*bio, page, PAGE_SIZE, 0) ==
 829                                         PAGE_SIZE) {
 830                                 ret = 0;
 831                                 break;
 832                         }
 833
 834                         /* page can't be merged into bio; submit the bio */
 835                         del_bio_entry(be);
 836                         __submit_bio(sbi, *bio, DATA);
 837                         break;
 838                 }
 839                 f2fs_up_write(&io->bio_list_lock);
 840         }
 841
 842         if (ret) {
 843                 bio_put(*bio);
 844                 *bio = NULL;
 845         }
 846
 847         return ret;
 848 }
 849
 850 void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi,
 851                                         struct bio **bio, struct page *page)
 852 {
 853         enum temp_type temp;
 854         bool found = false;
 855         struct bio *target = bio ? *bio : NULL;
 856
 857         for (temp = HOT; temp < NR_TEMP_TYPE && !found; temp++) {
 858                 struct f2fs_bio_info *io = sbi->write_io[DATA] + temp;
 859                 struct list_head *head = &io->bio_list;
 860                 struct bio_entry *be;
 861
 862                 if (list_empty(head))
 863                         continue;
 864
 865                 f2fs_down_read(&io->bio_list_lock);
 866                 list_for_each_entry(be, head, list) {
 867                         if (target)
 868                                 found = (target == be->bio);
 869                         else
 870                                 found = __has_merged_page(be->bio, NULL,
 871                                                                 page, 0);
 872                         if (found)
 873                                 break;
 874                 }
 875                 f2fs_up_read(&io->bio_list_lock);
 876
 877                 if (!found)
 878                         continue;
 879
 880                 found = false;
 881
 882                 f2fs_down_write(&io->bio_list_lock);
 883                 list_for_each_entry(be, head, list) {
 884                         if (target)
 885                                 found = (target == be->bio);
 886                         else
 887                                 found = __has_merged_page(be->bio, NULL,
 888                                                                 page, 0);
 889                         if (found) {
 890                                 target = be->bio;
 891                                 del_bio_entry(be);
 892                                 break;
 893                         }
 894                 }
 895                 f2fs_up_write(&io->bio_list_lock);
 896         }
 897
 898         if (found)
 899                 __submit_bio(sbi, target, DATA);
 900         if (bio && *bio) {
 901                 bio_put(*bio);
 902                 *bio = NULL;
 903         }
 904 }
 905
 906 int f2fs_merge_page_bio(struct f2fs_io_info *fio)
 907 {
 908         struct bio *bio = *fio->bio;
 909         struct page *page = fio->encrypted_page ?
 910                         fio->encrypted_page : fio->page;
 911
 912         if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr,
 913                         __is_meta_io(fio) ? META_GENERIC : DATA_GENERIC)) {
 914                 f2fs_handle_error(fio->sbi, ERROR_INVALID_BLKADDR);
 915                 return -EFSCORRUPTED;
 916         }
 917
 918         trace_f2fs_submit_page_bio(page, fio);
 919
 920         if (bio && !page_is_mergeable(fio->sbi, bio, *fio->last_block,
 921                                                 fio->new_blkaddr))
 922                 f2fs_submit_merged_ipu_write(fio->sbi, &bio, NULL);
 923 alloc_new:
 924         if (!bio) {
 925                 bio = __bio_alloc(fio, BIO_MAX_VECS);
 926                 f2fs_set_bio_crypt_ctx(bio, fio->page->mapping->host,
 927                                        fio->page->index, fio, GFP_NOIO);
 928
 929                 add_bio_entry(fio->sbi, bio, page, fio->temp);
 930         } else {
 931                 if (add_ipu_page(fio, &bio, page))
 932                         goto alloc_new;
 933         }
 934
 935         if (fio->io_wbc)
 936                 wbc_account_cgroup_owner(fio->io_wbc, page, PAGE_SIZE);
 937
 938         inc_page_count(fio->sbi, WB_DATA_TYPE(page));
 939
 940         *fio->last_block = fio->new_blkaddr;
 941         *fio->bio = bio;
 942
 943         return 0;
 944 }
 945
 946 void f2fs_submit_page_write(struct f2fs_io_info *fio)
 947 {
 948         struct f2fs_sb_info *sbi = fio->sbi;
 949         enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
 950         struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp;
 951         struct page *bio_page;
 952
 953         f2fs_bug_on(sbi, is_read_io(fio->op));
 954
 955         f2fs_down_write(&io->io_rwsem);
 956 next:
 957         if (fio->in_list) {
 958                 spin_lock(&io->io_lock);
 959                 if (list_empty(&io->io_list)) {
 960                         spin_unlock(&io->io_lock);
 961                         goto out;
 962                 }
 963                 fio = list_first_entry(&io->io_list,
 964                                                 struct f2fs_io_info, list);
 965                 list_del(&fio->list);
 966                 spin_unlock(&io->io_lock);
 967         }
 968
 969         verify_fio_blkaddr(fio);
 970
 971         if (fio->encrypted_page)
 972                 bio_page = fio->encrypted_page;
 973         else if (fio->compressed_page)
 974                 bio_page = fio->compressed_page;
 975         else
 976                 bio_page = fio->page;
 977
 978         /* set submitted = true as a return value */
 979         fio->submitted = true;
 980
 981         inc_page_count(sbi, WB_DATA_TYPE(bio_page));
 982
 983         if (io->bio &&
 984             (!io_is_mergeable(sbi, io->bio, io, fio, io->last_block_in_bio,
 985                               fio->new_blkaddr) ||
 986              !f2fs_crypt_mergeable_bio(io->bio, fio->page->mapping->host,
 987                                        bio_page->index, fio)))
 988                 __submit_merged_bio(io);
 989 alloc_new:
 990         if (io->bio == NULL) {
 991                 if (F2FS_IO_ALIGNED(sbi) &&
 992                                 (fio->type == DATA || fio->type == NODE) &&
 993                                 fio->new_blkaddr & F2FS_IO_SIZE_MASK(sbi)) {
 994                         dec_page_count(sbi, WB_DATA_TYPE(bio_page));
 995                         fio->retry = true;
 996                         goto skip;
 997                 }
 998                 io->bio = __bio_alloc(fio, BIO_MAX_VECS);
 999                 f2fs_set_bio_crypt_ctx(io->bio, fio->page->mapping->host,
1000                                        bio_page->index, fio, GFP_NOIO);
1001                 io->fio = *fio;
1002         }
1003
1004         if (bio_add_page(io->bio, bio_page, PAGE_SIZE, 0) < PAGE_SIZE) {
1005                 __submit_merged_bio(io);
1006                 goto alloc_new;
1007         }
1008
1009         if (fio->io_wbc)
1010                 wbc_account_cgroup_owner(fio->io_wbc, bio_page, PAGE_SIZE);
1011
1012         io->last_block_in_bio = fio->new_blkaddr;
1013
1014         trace_f2fs_submit_page_write(fio->page, fio);
1015 skip:
1016         if (fio->in_list)
1017                 goto next;
1018 out:
1019         if (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) ||
1020                                 !f2fs_is_checkpoint_ready(sbi))
1021                 __submit_merged_bio(io);
1022         f2fs_up_write(&io->io_rwsem);
1023 }
1024
1025 static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
1026                                       unsigned nr_pages, blk_opf_t op_flag,
1027                                       pgoff_t first_idx, bool for_write)
1028 {
1029         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1030         struct bio *bio;
1031         struct bio_post_read_ctx *ctx = NULL;
1032         unsigned int post_read_steps = 0;
1033         sector_t sector;
1034         struct block_device *bdev = f2fs_target_device(sbi, blkaddr, &sector);
1035
1036         bio = bio_alloc_bioset(bdev, bio_max_segs(nr_pages),
1037                                REQ_OP_READ | op_flag,
1038                                for_write ? GFP_NOIO : GFP_KERNEL, &f2fs_bioset);
1039         if (!bio)
1040                 return ERR_PTR(-ENOMEM);
1041         bio->bi_iter.bi_sector = sector;
1042         f2fs_set_bio_crypt_ctx(bio, inode, first_idx, NULL, GFP_NOFS);
1043         bio->bi_end_io = f2fs_read_end_io;
1044
1045         if (fscrypt_inode_uses_fs_layer_crypto(inode))
1046                 post_read_steps |= STEP_DECRYPT;
1047
1048         if (f2fs_need_verity(inode, first_idx))
1049                 post_read_steps |= STEP_VERITY;
1050
1051         /*
1052          * STEP_DECOMPRESS is handled specially, since a compressed file might
1053          * contain both compressed and uncompressed clusters.  We'll allocate a
1054          * bio_post_read_ctx if the file is compressed, but the caller is
1055          * responsible for enabling STEP_DECOMPRESS if it's actually needed.
1056          */
1057
1058         if (post_read_steps || f2fs_compressed_file(inode)) {
1059                 /* Due to the mempool, this never fails. */
1060                 ctx = mempool_alloc(bio_post_read_ctx_pool, GFP_NOFS);
1061                 ctx->bio = bio;
1062                 ctx->sbi = sbi;
1063                 ctx->enabled_steps = post_read_steps;
1064                 ctx->fs_blkaddr = blkaddr;
1065                 bio->bi_private = ctx;
1066         }
1067         iostat_alloc_and_bind_ctx(sbi, bio, ctx);
1068
1069         return bio;
1070 }
1071
1072 /* This can handle encryption stuffs */
1073 static int f2fs_submit_page_read(struct inode *inode, struct page *page,
1074                                  block_t blkaddr, blk_opf_t op_flags,
1075                                  bool for_write)
1076 {
1077         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1078         struct bio *bio;
1079
1080         bio = f2fs_grab_read_bio(inode, blkaddr, 1, op_flags,
1081                                         page->index, for_write);
1082         if (IS_ERR(bio))
1083                 return PTR_ERR(bio);
1084
1085         /* wait for GCed page writeback via META_MAPPING */
1086         f2fs_wait_on_block_writeback(inode, blkaddr);
1087
1088         if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
1089                 bio_put(bio);
1090                 return -EFAULT;
1091         }
1092         ClearPageError(page);
1093         inc_page_count(sbi, F2FS_RD_DATA);
1094         f2fs_update_iostat(sbi, NULL, FS_DATA_READ_IO, F2FS_BLKSIZE);
1095         __submit_bio(sbi, bio, DATA);
1096         return 0;
1097 }
1098
1099 static void __set_data_blkaddr(struct dnode_of_data *dn)
1100 {
1101         struct f2fs_node *rn = F2FS_NODE(dn->node_page);
1102         __le32 *addr_array;
1103         int base = 0;
1104
1105         if (IS_INODE(dn->node_page) && f2fs_has_extra_attr(dn->inode))
1106                 base = get_extra_isize(dn->inode);
1107
1108         /* Get physical address of data block */
1109         addr_array = blkaddr_in_node(rn);
1110         addr_array[base + dn->ofs_in_node] = cpu_to_le32(dn->data_blkaddr);
1111 }
1112
1113 /*
1114  * Lock ordering for the change of data block address:
1115  * ->data_page
1116  *  ->node_page
1117  *    update block addresses in the node page
1118  */
1119 void f2fs_set_data_blkaddr(struct dnode_of_data *dn)
1120 {
1121         f2fs_wait_on_page_writeback(dn->node_page, NODE, true, true);
1122         __set_data_blkaddr(dn);
1123         if (set_page_dirty(dn->node_page))
1124                 dn->node_changed = true;
1125 }
1126
1127 void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr)
1128 {
1129         dn->data_blkaddr = blkaddr;
1130         f2fs_set_data_blkaddr(dn);
1131         f2fs_update_extent_cache(dn);
1132 }
1133
1134 /* dn->ofs_in_node will be returned with up-to-date last block pointer */
1135 int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count)
1136 {
1137         struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
1138         int err;
1139
1140         if (!count)
1141                 return 0;
1142
1143         if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
1144                 return -EPERM;
1145         if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count))))
1146                 return err;
1147
1148         trace_f2fs_reserve_new_blocks(dn->inode, dn->nid,
1149                                                 dn->ofs_in_node, count);
1150
1151         f2fs_wait_on_page_writeback(dn->node_page, NODE, true, true);
1152
1153         for (; count > 0; dn->ofs_in_node++) {
1154                 block_t blkaddr = f2fs_data_blkaddr(dn);
1155
1156                 if (blkaddr == NULL_ADDR) {
1157                         dn->data_blkaddr = NEW_ADDR;
1158                         __set_data_blkaddr(dn);
1159                         count--;
1160                 }
1161         }
1162
1163         if (set_page_dirty(dn->node_page))
1164                 dn->node_changed = true;
1165         return 0;
1166 }
1167
1168 /* Should keep dn->ofs_in_node unchanged */
1169 int f2fs_reserve_new_block(struct dnode_of_data *dn)
1170 {
1171         unsigned int ofs_in_node = dn->ofs_in_node;
1172         int ret;
1173
1174         ret = f2fs_reserve_new_blocks(dn, 1);
1175         dn->ofs_in_node = ofs_in_node;
1176         return ret;
1177 }
1178
1179 int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)
1180 {
1181         bool need_put = dn->inode_page ? false : true;
1182         int err;
1183
1184         err = f2fs_get_dnode_of_data(dn, index, ALLOC_NODE);
1185         if (err)
1186                 return err;
1187
1188         if (dn->data_blkaddr == NULL_ADDR)
1189                 err = f2fs_reserve_new_block(dn);
1190         if (err || need_put)
1191                 f2fs_put_dnode(dn);
1192         return err;
1193 }
1194
1195 int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index)
1196 {
1197         struct extent_info ei = {0, };
1198         struct inode *inode = dn->inode;
1199
1200         if (f2fs_lookup_extent_cache(inode, index, &ei)) {
1201                 dn->data_blkaddr = ei.blk + index - ei.fofs;
1202                 return 0;
1203         }
1204
1205         return f2fs_reserve_block(dn, index);
1206 }
1207
1208 struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index,
1209                                      blk_opf_t op_flags, bool for_write)
1210 {
1211         struct address_space *mapping = inode->i_mapping;
1212         struct dnode_of_data dn;
1213         struct page *page;
1214         struct extent_info ei = {0, };
1215         int err;
1216
1217         page = f2fs_grab_cache_page(mapping, index, for_write);
1218         if (!page)
1219                 return ERR_PTR(-ENOMEM);
1220
1221         if (f2fs_lookup_extent_cache(inode, index, &ei)) {
1222                 dn.data_blkaddr = ei.blk + index - ei.fofs;
1223                 if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), dn.data_blkaddr,
1224                                                 DATA_GENERIC_ENHANCE_READ)) {
1225                         err = -EFSCORRUPTED;
1226                         f2fs_handle_error(F2FS_I_SB(inode),
1227                                                 ERROR_INVALID_BLKADDR);
1228                         goto put_err;
1229                 }
1230                 goto got_it;
1231         }
1232
1233         set_new_dnode(&dn, inode, NULL, NULL, 0);
1234         err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
1235         if (err)
1236                 goto put_err;
1237         f2fs_put_dnode(&dn);
1238
1239         if (unlikely(dn.data_blkaddr == NULL_ADDR)) {
1240                 err = -ENOENT;
1241                 goto put_err;
1242         }
1243         if (dn.data_blkaddr != NEW_ADDR &&
1244                         !f2fs_is_valid_blkaddr(F2FS_I_SB(inode),
1245                                                 dn.data_blkaddr,
1246                                                 DATA_GENERIC_ENHANCE)) {
1247                 err = -EFSCORRUPTED;
1248                 f2fs_handle_error(F2FS_I_SB(inode),
1249                                         ERROR_INVALID_BLKADDR);
1250                 goto put_err;
1251         }
1252 got_it:
1253         if (PageUptodate(page)) {
1254                 unlock_page(page);
1255                 return page;
1256         }
1257
1258         /*
1259          * A new dentry page is allocated but not able to be written, since its
1260          * new inode page couldn't be allocated due to -ENOSPC.
1261          * In such the case, its blkaddr can be remained as NEW_ADDR.
1262          * see, f2fs_add_link -> f2fs_get_new_data_page ->
1263          * f2fs_init_inode_metadata.
1264          */
1265         if (dn.data_blkaddr == NEW_ADDR) {
1266                 zero_user_segment(page, 0, PAGE_SIZE);
1267                 if (!PageUptodate(page))
1268                         SetPageUptodate(page);
1269                 unlock_page(page);
1270                 return page;
1271         }
1272
1273         err = f2fs_submit_page_read(inode, page, dn.data_blkaddr,
1274                                                 op_flags, for_write);
1275         if (err)
1276                 goto put_err;
1277         return page;
1278
1279 put_err:
1280         f2fs_put_page(page, 1);
1281         return ERR_PTR(err);
1282 }
1283
1284 struct page *f2fs_find_data_page(struct inode *inode, pgoff_t index)
1285 {
1286         struct address_space *mapping = inode->i_mapping;
1287         struct page *page;
1288
1289         page = find_get_page(mapping, index);
1290         if (page && PageUptodate(page))
1291                 return page;
1292         f2fs_put_page(page, 0);
1293
1294         page = f2fs_get_read_data_page(inode, index, 0, false);
1295         if (IS_ERR(page))
1296                 return page;
1297
1298         if (PageUptodate(page))
1299                 return page;
1300
1301         wait_on_page_locked(page);
1302         if (unlikely(!PageUptodate(page))) {
1303                 f2fs_put_page(page, 0);
1304                 return ERR_PTR(-EIO);
1305         }
1306         return page;
1307 }
1308
1309 /*
1310  * If it tries to access a hole, return an error.
1311  * Because, the callers, functions in dir.c and GC, should be able to know
1312  * whether this page exists or not.
1313  */
1314 struct page *f2fs_get_lock_data_page(struct inode *inode, pgoff_t index,
1315                                                         bool for_write)
1316 {
1317         struct address_space *mapping = inode->i_mapping;
1318         struct page *page;
1319 repeat:
1320         page = f2fs_get_read_data_page(inode, index, 0, for_write);
1321         if (IS_ERR(page))
1322                 return page;
1323
1324         /* wait for read completion */
1325         lock_page(page);
1326         if (unlikely(page->mapping != mapping)) {
1327                 f2fs_put_page(page, 1);
1328                 goto repeat;
1329         }
1330         if (unlikely(!PageUptodate(page))) {
1331                 f2fs_put_page(page, 1);
1332                 return ERR_PTR(-EIO);
1333         }
1334         return page;
1335 }
1336
1337 /*
1338  * Caller ensures that this data page is never allocated.
1339  * A new zero-filled data page is allocated in the page cache.
1340  *
1341  * Also, caller should grab and release a rwsem by calling f2fs_lock_op() and
1342  * f2fs_unlock_op().
1343  * Note that, ipage is set only by make_empty_dir, and if any error occur,
1344  * ipage should be released by this function.
1345  */
1346 struct page *f2fs_get_new_data_page(struct inode *inode,
1347                 struct page *ipage, pgoff_t index, bool new_i_size)
1348 {
1349         struct address_space *mapping = inode->i_mapping;
1350         struct page *page;
1351         struct dnode_of_data dn;
1352         int err;
1353
1354         page = f2fs_grab_cache_page(mapping, index, true);
1355         if (!page) {
1356                 /*
1357                  * before exiting, we should make sure ipage will be released
1358                  * if any error occur.
1359                  */
1360                 f2fs_put_page(ipage, 1);
1361                 return ERR_PTR(-ENOMEM);
1362         }
1363
1364         set_new_dnode(&dn, inode, ipage, NULL, 0);
1365         err = f2fs_reserve_block(&dn, index);
1366         if (err) {
1367                 f2fs_put_page(page, 1);
1368                 return ERR_PTR(err);
1369         }
1370         if (!ipage)
1371                 f2fs_put_dnode(&dn);
1372
1373         if (PageUptodate(page))
1374                 goto got_it;
1375
1376         if (dn.data_blkaddr == NEW_ADDR) {
1377                 zero_user_segment(page, 0, PAGE_SIZE);
1378                 if (!PageUptodate(page))
1379                         SetPageUptodate(page);
1380         } else {
1381                 f2fs_put_page(page, 1);
1382
1383                 /* if ipage exists, blkaddr should be NEW_ADDR */
1384                 f2fs_bug_on(F2FS_I_SB(inode), ipage);
1385                 page = f2fs_get_lock_data_page(inode, index, true);
1386                 if (IS_ERR(page))
1387                         return page;
1388         }
1389 got_it:
1390         if (new_i_size && i_size_read(inode) <
1391                                 ((loff_t)(index + 1) << PAGE_SHIFT))
1392                 f2fs_i_size_write(inode, ((loff_t)(index + 1) << PAGE_SHIFT));
1393         return page;
1394 }
1395
1396 static int __allocate_data_block(struct dnode_of_data *dn, int seg_type)
1397 {
1398         struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
1399         struct f2fs_summary sum;
1400         struct node_info ni;
1401         block_t old_blkaddr;
1402         blkcnt_t count = 1;
1403         int err;
1404
1405         if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
1406                 return -EPERM;
1407
1408         err = f2fs_get_node_info(sbi, dn->nid, &ni, false);
1409         if (err)
1410                 return err;
1411
1412         dn->data_blkaddr = f2fs_data_blkaddr(dn);
1413         if (dn->data_blkaddr != NULL_ADDR)
1414                 goto alloc;
1415
1416         if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count))))
1417                 return err;
1418
1419 alloc:
1420         set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
1421         old_blkaddr = dn->data_blkaddr;
1422         f2fs_allocate_data_block(sbi, NULL, old_blkaddr, &dn->data_blkaddr,
1423                                 &sum, seg_type, NULL);
1424         if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) {
1425                 invalidate_mapping_pages(META_MAPPING(sbi),
1426                                         old_blkaddr, old_blkaddr);
1427                 f2fs_invalidate_compress_page(sbi, old_blkaddr);
1428         }
1429         f2fs_update_data_blkaddr(dn, dn->data_blkaddr);
1430         return 0;
1431 }
1432
1433 void f2fs_do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock)
1434 {
1435         if (flag == F2FS_GET_BLOCK_PRE_AIO) {
1436                 if (lock)
1437                         f2fs_down_read(&sbi->node_change);
1438                 else
1439                         f2fs_up_read(&sbi->node_change);
1440         } else {
1441                 if (lock)
1442                         f2fs_lock_op(sbi);
1443                 else
1444                         f2fs_unlock_op(sbi);
1445         }
1446 }
1447
1448 /*
1449  * f2fs_map_blocks() tries to find or build mapping relationship which
1450  * maps continuous logical blocks to physical blocks, and return such
1451  * info via f2fs_map_blocks structure.
1452  */
1453 int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
1454                                                 int create, int flag)
1455 {
1456         unsigned int maxblocks = map->m_len;
1457         struct dnode_of_data dn;
1458         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1459         int mode = map->m_may_create ? ALLOC_NODE : LOOKUP_NODE;
1460         pgoff_t pgofs, end_offset, end;
1461         int err = 0, ofs = 1;
1462         unsigned int ofs_in_node, last_ofs_in_node;
1463         blkcnt_t prealloc;
1464         struct extent_info ei = {0, };
1465         block_t blkaddr;
1466         unsigned int start_pgofs;
1467         int bidx = 0;
1468
1469         if (!maxblocks)
1470                 return 0;
1471
1472         map->m_bdev = inode->i_sb->s_bdev;
1473         map->m_multidev_dio =
1474                 f2fs_allow_multi_device_dio(F2FS_I_SB(inode), flag);
1475
1476         map->m_len = 0;
1477         map->m_flags = 0;
1478
1479         /* it only supports block size == page size */
1480         pgofs = (pgoff_t)map->m_lblk;
1481         end = pgofs + maxblocks;
1482
1483         if (!create && f2fs_lookup_extent_cache(inode, pgofs, &ei)) {
1484                 if (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO &&
1485                                                         map->m_may_create)
1486                         goto next_dnode;
1487
1488                 map->m_pblk = ei.blk + pgofs - ei.fofs;
1489                 map->m_len = min((pgoff_t)maxblocks, ei.fofs + ei.len - pgofs);
1490                 map->m_flags = F2FS_MAP_MAPPED;
1491                 if (map->m_next_extent)
1492                         *map->m_next_extent = pgofs + map->m_len;
1493
1494                 /* for hardware encryption, but to avoid potential issue in future */
1495                 if (flag == F2FS_GET_BLOCK_DIO)
1496                         f2fs_wait_on_block_writeback_range(inode,
1497                                                 map->m_pblk, map->m_len);
1498
1499                 if (map->m_multidev_dio) {
1500                         block_t blk_addr = map->m_pblk;
1501
1502                         bidx = f2fs_target_device_index(sbi, map->m_pblk);
1503
1504                         map->m_bdev = FDEV(bidx).bdev;
1505                         map->m_pblk -= FDEV(bidx).start_blk;
1506                         map->m_len = min(map->m_len,
1507                                 FDEV(bidx).end_blk + 1 - map->m_pblk);
1508
1509                         if (map->m_may_create)
1510                                 f2fs_update_device_state(sbi, inode->i_ino,
1511                                                         blk_addr, map->m_len);
1512                 }
1513                 goto out;
1514         }
1515
1516 next_dnode:
1517         if (map->m_may_create)
1518                 f2fs_do_map_lock(sbi, flag, true);
1519
1520         /* When reading holes, we need its node page */
1521         set_new_dnode(&dn, inode, NULL, NULL, 0);
1522         err = f2fs_get_dnode_of_data(&dn, pgofs, mode);
1523         if (err) {
1524                 if (flag == F2FS_GET_BLOCK_BMAP)
1525                         map->m_pblk = 0;
1526
1527                 if (err == -ENOENT) {
1528                         /*
1529                          * There is one exceptional case that read_node_page()
1530                          * may return -ENOENT due to filesystem has been
1531                          * shutdown or cp_error, so force to convert error
1532                          * number to EIO for such case.
1533                          */
1534                         if (map->m_may_create &&
1535                                 (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) ||
1536                                 f2fs_cp_error(sbi))) {
1537                                 err = -EIO;
1538                                 goto unlock_out;
1539                         }
1540
1541                         err = 0;
1542                         if (map->m_next_pgofs)
1543                                 *map->m_next_pgofs =
1544                                         f2fs_get_next_page_offset(&dn, pgofs);
1545                         if (map->m_next_extent)
1546                                 *map->m_next_extent =
1547                                         f2fs_get_next_page_offset(&dn, pgofs);
1548                 }
1549                 goto unlock_out;
1550         }
1551
1552         start_pgofs = pgofs;
1553         prealloc = 0;
1554         last_ofs_in_node = ofs_in_node = dn.ofs_in_node;
1555         end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
1556
1557 next_block:
1558         blkaddr = f2fs_data_blkaddr(&dn);
1559
1560         if (__is_valid_data_blkaddr(blkaddr) &&
1561                 !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) {
1562                 err = -EFSCORRUPTED;
1563                 f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR);
1564                 goto sync_out;
1565         }
1566
1567         if (__is_valid_data_blkaddr(blkaddr)) {
1568                 /* use out-place-update for driect IO under LFS mode */
1569                 if (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO &&
1570                                                         map->m_may_create) {
1571                         err = __allocate_data_block(&dn, map->m_seg_type);
1572                         if (err)
1573                                 goto sync_out;
1574                         blkaddr = dn.data_blkaddr;
1575                         set_inode_flag(inode, FI_APPEND_WRITE);
1576                 }
1577         } else {
1578                 if (create) {
1579                         if (unlikely(f2fs_cp_error(sbi))) {
1580                                 err = -EIO;
1581                                 goto sync_out;
1582                         }
1583                         if (flag == F2FS_GET_BLOCK_PRE_AIO) {
1584                                 if (blkaddr == NULL_ADDR) {
1585                                         prealloc++;
1586                                         last_ofs_in_node = dn.ofs_in_node;
1587                                 }
1588                         } else {
1589                                 WARN_ON(flag != F2FS_GET_BLOCK_PRE_DIO &&
1590                                         flag != F2FS_GET_BLOCK_DIO);
1591                                 err = __allocate_data_block(&dn,
1592                                                         map->m_seg_type);
1593                                 if (!err) {
1594                                         if (flag == F2FS_GET_BLOCK_PRE_DIO)
1595                                                 file_need_truncate(inode);
1596                                         set_inode_flag(inode, FI_APPEND_WRITE);
1597                                 }
1598                         }
1599                         if (err)
1600                                 goto sync_out;
1601                         map->m_flags |= F2FS_MAP_NEW;
1602                         blkaddr = dn.data_blkaddr;
1603                 } else {
1604                         if (f2fs_compressed_file(inode) &&
1605                                         f2fs_sanity_check_cluster(&dn) &&
1606                                         (flag != F2FS_GET_BLOCK_FIEMAP ||
1607                                         IS_ENABLED(CONFIG_F2FS_CHECK_FS))) {
1608                                 err = -EFSCORRUPTED;
1609                                 f2fs_handle_error(sbi,
1610                                                 ERROR_CORRUPTED_CLUSTER);
1611                                 goto sync_out;
1612                         }
1613                         if (flag == F2FS_GET_BLOCK_BMAP) {
1614                                 map->m_pblk = 0;
1615                                 goto sync_out;
1616                         }
1617                         if (flag == F2FS_GET_BLOCK_PRECACHE)
1618                                 goto sync_out;
1619                         if (flag == F2FS_GET_BLOCK_FIEMAP &&
1620                                                 blkaddr == NULL_ADDR) {
1621                                 if (map->m_next_pgofs)
1622                                         *map->m_next_pgofs = pgofs + 1;
1623                                 goto sync_out;
1624                         }
1625                         if (flag != F2FS_GET_BLOCK_FIEMAP) {
1626                                 /* for defragment case */
1627                                 if (map->m_next_pgofs)
1628                                         *map->m_next_pgofs = pgofs + 1;
1629                                 goto sync_out;
1630                         }
1631                 }
1632         }
1633
1634         if (flag == F2FS_GET_BLOCK_PRE_AIO)
1635                 goto skip;
1636
1637         if (map->m_multidev_dio)
1638                 bidx = f2fs_target_device_index(sbi, blkaddr);
1639
1640         if (map->m_len == 0) {
1641                 /* preallocated unwritten block should be mapped for fiemap. */
1642                 if (blkaddr == NEW_ADDR)
1643                         map->m_flags |= F2FS_MAP_UNWRITTEN;
1644                 map->m_flags |= F2FS_MAP_MAPPED;
1645
1646                 map->m_pblk = blkaddr;
1647                 map->m_len = 1;
1648
1649                 if (map->m_multidev_dio)
1650                         map->m_bdev = FDEV(bidx).bdev;
1651         } else if ((map->m_pblk != NEW_ADDR &&
1652                         blkaddr == (map->m_pblk + ofs)) ||
1653                         (map->m_pblk == NEW_ADDR && blkaddr == NEW_ADDR) ||
1654                         flag == F2FS_GET_BLOCK_PRE_DIO) {
1655                 if (map->m_multidev_dio && map->m_bdev != FDEV(bidx).bdev)
1656                         goto sync_out;
1657                 ofs++;
1658                 map->m_len++;
1659         } else {
1660                 goto sync_out;
1661         }
1662
1663 skip:
1664         dn.ofs_in_node++;
1665         pgofs++;
1666
1667         /* preallocate blocks in batch for one dnode page */
1668         if (flag == F2FS_GET_BLOCK_PRE_AIO &&
1669                         (pgofs == end || dn.ofs_in_node == end_offset)) {
1670
1671                 dn.ofs_in_node = ofs_in_node;
1672                 err = f2fs_reserve_new_blocks(&dn, prealloc);
1673                 if (err)
1674                         goto sync_out;
1675
1676                 map->m_len += dn.ofs_in_node - ofs_in_node;
1677                 if (prealloc && dn.ofs_in_node != last_ofs_in_node + 1) {
1678                         err = -ENOSPC;
1679                         goto sync_out;
1680                 }
1681                 dn.ofs_in_node = end_offset;
1682         }
1683
1684         if (pgofs >= end)
1685                 goto sync_out;
1686         else if (dn.ofs_in_node < end_offset)
1687                 goto next_block;
1688
1689         if (flag == F2FS_GET_BLOCK_PRECACHE) {
1690                 if (map->m_flags & F2FS_MAP_MAPPED) {
1691                         unsigned int ofs = start_pgofs - map->m_lblk;
1692
1693                         f2fs_update_extent_cache_range(&dn,
1694                                 start_pgofs, map->m_pblk + ofs,
1695                                 map->m_len - ofs);
1696                 }
1697         }
1698
1699         f2fs_put_dnode(&dn);
1700
1701         if (map->m_may_create) {
1702                 f2fs_do_map_lock(sbi, flag, false);
1703                 f2fs_balance_fs(sbi, dn.node_changed);
1704         }
1705         goto next_dnode;
1706
1707 sync_out:
1708
1709         if (flag == F2FS_GET_BLOCK_DIO && map->m_flags & F2FS_MAP_MAPPED) {
1710                 /*
1711                  * for hardware encryption, but to avoid potential issue
1712                  * in future
1713                  */
1714                 f2fs_wait_on_block_writeback_range(inode,
1715                                                 map->m_pblk, map->m_len);
1716
1717                 if (map->m_multidev_dio) {
1718                         block_t blk_addr = map->m_pblk;
1719
1720                         bidx = f2fs_target_device_index(sbi, map->m_pblk);
1721
1722                         map->m_bdev = FDEV(bidx).bdev;
1723                         map->m_pblk -= FDEV(bidx).start_blk;
1724
1725                         if (map->m_may_create)
1726                                 f2fs_update_device_state(sbi, inode->i_ino,
1727                                                         blk_addr, map->m_len);
1728
1729                         f2fs_bug_on(sbi, blk_addr + map->m_len >
1730                                                 FDEV(bidx).end_blk + 1);
1731                 }
1732         }
1733
1734         if (flag == F2FS_GET_BLOCK_PRECACHE) {
1735                 if (map->m_flags & F2FS_MAP_MAPPED) {
1736                         unsigned int ofs = start_pgofs - map->m_lblk;
1737
1738                         f2fs_update_extent_cache_range(&dn,
1739                                 start_pgofs, map->m_pblk + ofs,
1740                                 map->m_len - ofs);
1741                 }
1742                 if (map->m_next_extent)
1743                         *map->m_next_extent = pgofs + 1;
1744         }
1745         f2fs_put_dnode(&dn);
1746 unlock_out:
1747         if (map->m_may_create) {
1748                 f2fs_do_map_lock(sbi, flag, false);
1749                 f2fs_balance_fs(sbi, dn.node_changed);
1750         }
1751 out:
1752         trace_f2fs_map_blocks(inode, map, create, flag, err);
1753         return err;
1754 }
1755
1756 bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len)
1757 {
1758         struct f2fs_map_blocks map;
1759         block_t last_lblk;
1760         int err;
1761
1762         if (pos + len > i_size_read(inode))
1763                 return false;
1764
1765         map.m_lblk = F2FS_BYTES_TO_BLK(pos);
1766         map.m_next_pgofs = NULL;
1767         map.m_next_extent = NULL;
1768         map.m_seg_type = NO_CHECK_TYPE;
1769         map.m_may_create = false;
1770         last_lblk = F2FS_BLK_ALIGN(pos + len);
1771
1772         while (map.m_lblk < last_lblk) {
1773                 map.m_len = last_lblk - map.m_lblk;
1774                 err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_DEFAULT);
1775                 if (err || map.m_len == 0)
1776                         return false;
1777                 map.m_lblk += map.m_len;
1778         }
1779         return true;
1780 }
1781
1782 static inline u64 bytes_to_blks(struct inode *inode, u64 bytes)
1783 {
1784         return (bytes >> inode->i_blkbits);
1785 }
1786
1787 static inline u64 blks_to_bytes(struct inode *inode, u64 blks)
1788 {
1789         return (blks << inode->i_blkbits);
1790 }
1791
1792 static int f2fs_xattr_fiemap(struct inode *inode,
1793                                 struct fiemap_extent_info *fieinfo)
1794 {
1795         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1796         struct page *page;
1797         struct node_info ni;
1798         __u64 phys = 0, len;
1799         __u32 flags;
1800         nid_t xnid = F2FS_I(inode)->i_xattr_nid;
1801         int err = 0;
1802
1803         if (f2fs_has_inline_xattr(inode)) {
1804                 int offset;
1805
1806                 page = f2fs_grab_cache_page(NODE_MAPPING(sbi),
1807                                                 inode->i_ino, false);
1808                 if (!page)
1809                         return -ENOMEM;
1810
1811                 err = f2fs_get_node_info(sbi, inode->i_ino, &ni, false);
1812                 if (err) {
1813                         f2fs_put_page(page, 1);
1814                         return err;
1815                 }
1816
1817                 phys = blks_to_bytes(inode, ni.blk_addr);
1818                 offset = offsetof(struct f2fs_inode, i_addr) +
1819                                         sizeof(__le32) * (DEF_ADDRS_PER_INODE -
1820                                         get_inline_xattr_addrs(inode));
1821
1822                 phys += offset;
1823                 len = inline_xattr_size(inode);
1824
1825                 f2fs_put_page(page, 1);
1826
1827                 flags = FIEMAP_EXTENT_DATA_INLINE | FIEMAP_EXTENT_NOT_ALIGNED;
1828
1829                 if (!xnid)
1830                         flags |= FIEMAP_EXTENT_LAST;
1831
1832                 err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags);
1833                 trace_f2fs_fiemap(inode, 0, phys, len, flags, err);
1834                 if (err)
1835                         return err;
1836         }
1837
1838         if (xnid) {
1839                 page = f2fs_grab_cache_page(NODE_MAPPING(sbi), xnid, false);
1840                 if (!page)
1841                         return -ENOMEM;
1842
1843                 err = f2fs_get_node_info(sbi, xnid, &ni, false);
1844                 if (err) {
1845                         f2fs_put_page(page, 1);
1846                         return err;
1847                 }
1848
1849                 phys = blks_to_bytes(inode, ni.blk_addr);
1850                 len = inode->i_sb->s_blocksize;
1851
1852                 f2fs_put_page(page, 1);
1853
1854                 flags = FIEMAP_EXTENT_LAST;
1855         }
1856
1857         if (phys) {
1858                 err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags);
1859                 trace_f2fs_fiemap(inode, 0, phys, len, flags, err);
1860         }
1861
1862         return (err < 0 ? err : 0);
1863 }
1864
1865 static loff_t max_inode_blocks(struct inode *inode)
1866 {
1867         loff_t result = ADDRS_PER_INODE(inode);
1868         loff_t leaf_count = ADDRS_PER_BLOCK(inode);
1869
1870         /* two direct node blocks */
1871         result += (leaf_count * 2);
1872
1873         /* two indirect node blocks */
1874         leaf_count *= NIDS_PER_BLOCK;
1875         result += (leaf_count * 2);
1876
1877         /* one double indirect node block */
1878         leaf_count *= NIDS_PER_BLOCK;
1879         result += leaf_count;
1880
1881         return result;
1882 }
1883
1884 int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
1885                 u64 start, u64 len)
1886 {
1887         struct f2fs_map_blocks map;
1888         sector_t start_blk, last_blk;
1889         pgoff_t next_pgofs;
1890         u64 logical = 0, phys = 0, size = 0;
1891         u32 flags = 0;
1892         int ret = 0;
1893         bool compr_cluster = false, compr_appended;
1894         unsigned int cluster_size = F2FS_I(inode)->i_cluster_size;
1895         unsigned int count_in_cluster = 0;
1896         loff_t maxbytes;
1897
1898         if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) {
1899                 ret = f2fs_precache_extents(inode);
1900                 if (ret)
1901                         return ret;
1902         }
1903
1904         ret = fiemap_prep(inode, fieinfo, start, &len, FIEMAP_FLAG_XATTR);
1905         if (ret)
1906                 return ret;
1907
1908         inode_lock(inode);
1909
1910         maxbytes = max_file_blocks(inode) << F2FS_BLKSIZE_BITS;
1911         if (start > maxbytes) {
1912                 ret = -EFBIG;
1913                 goto out;
1914         }
1915
1916         if (len > maxbytes || (maxbytes - len) < start)
1917                 len = maxbytes - start;
1918
1919         if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) {
1920                 ret = f2fs_xattr_fiemap(inode, fieinfo);
1921                 goto out;
1922         }
1923
1924         if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode)) {
1925                 ret = f2fs_inline_data_fiemap(inode, fieinfo, start, len);
1926                 if (ret != -EAGAIN)
1927                         goto out;
1928         }
1929
1930         if (bytes_to_blks(inode, len) == 0)
1931                 len = blks_to_bytes(inode, 1);
1932
1933         start_blk = bytes_to_blks(inode, start);
1934         last_blk = bytes_to_blks(inode, start + len - 1);
1935
1936 next:
1937         memset(&map, 0, sizeof(map));
1938         map.m_lblk = start_blk;
1939         map.m_len = bytes_to_blks(inode, len);
1940         map.m_next_pgofs = &next_pgofs;
1941         map.m_seg_type = NO_CHECK_TYPE;
1942
1943         if (compr_cluster) {
1944                 map.m_lblk += 1;
1945                 map.m_len = cluster_size - count_in_cluster;
1946         }
1947
1948         ret = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_FIEMAP);
1949         if (ret)
1950                 goto out;
1951
1952         /* HOLE */
1953         if (!compr_cluster && !(map.m_flags & F2FS_MAP_FLAGS)) {
1954                 start_blk = next_pgofs;
1955
1956                 if (blks_to_bytes(inode, start_blk) < blks_to_bytes(inode,
1957                                                 max_inode_blocks(inode)))
1958                         goto prep_next;
1959
1960                 flags |= FIEMAP_EXTENT_LAST;
1961         }
1962
1963         compr_appended = false;
1964         /* In a case of compressed cluster, append this to the last extent */
1965         if (compr_cluster && ((map.m_flags & F2FS_MAP_UNWRITTEN) ||
1966                         !(map.m_flags & F2FS_MAP_FLAGS))) {
1967                 compr_appended = true;
1968                 goto skip_fill;
1969         }
1970
1971         if (size) {
1972                 flags |= FIEMAP_EXTENT_MERGED;
1973                 if (IS_ENCRYPTED(inode))
1974                         flags |= FIEMAP_EXTENT_DATA_ENCRYPTED;
1975
1976                 ret = fiemap_fill_next_extent(fieinfo, logical,
1977                                 phys, size, flags);
1978                 trace_f2fs_fiemap(inode, logical, phys, size, flags, ret);
1979                 if (ret)
1980                         goto out;
1981                 size = 0;
1982         }
1983
1984         if (start_blk > last_blk)
1985                 goto out;
1986
1987 skip_fill:
1988         if (map.m_pblk == COMPRESS_ADDR) {
1989                 compr_cluster = true;
1990                 count_in_cluster = 1;
1991         } else if (compr_appended) {
1992                 unsigned int appended_blks = cluster_size -
1993                                                 count_in_cluster + 1;
1994                 size += blks_to_bytes(inode, appended_blks);
1995                 start_blk += appended_blks;
1996                 compr_cluster = false;
1997         } else {
1998                 logical = blks_to_bytes(inode, start_blk);
1999                 phys = __is_valid_data_blkaddr(map.m_pblk) ?
2000                         blks_to_bytes(inode, map.m_pblk) : 0;
2001                 size = blks_to_bytes(inode, map.m_len);
2002                 flags = 0;
2003
2004                 if (compr_cluster) {
2005                         flags = FIEMAP_EXTENT_ENCODED;
2006                         count_in_cluster += map.m_len;
2007                         if (count_in_cluster == cluster_size) {
2008                                 compr_cluster = false;
2009                                 size += blks_to_bytes(inode, 1);
2010                         }
2011                 } else if (map.m_flags & F2FS_MAP_UNWRITTEN) {
2012                         flags = FIEMAP_EXTENT_UNWRITTEN;
2013                 }
2014
2015                 start_blk += bytes_to_blks(inode, size);
2016         }
2017
2018 prep_next:
2019         cond_resched();
2020         if (fatal_signal_pending(current))
2021                 ret = -EINTR;
2022         else
2023                 goto next;
2024 out:
2025         if (ret == 1)
2026                 ret = 0;
2027
2028         inode_unlock(inode);
2029         return ret;
2030 }
2031
2032 static inline loff_t f2fs_readpage_limit(struct inode *inode)
2033 {
2034         if (IS_ENABLED(CONFIG_FS_VERITY) &&
2035             (IS_VERITY(inode) || f2fs_verity_in_progress(inode)))
2036                 return inode->i_sb->s_maxbytes;
2037
2038         return i_size_read(inode);
2039 }
2040
2041 static int f2fs_read_single_page(struct inode *inode, struct page *page,
2042                                         unsigned nr_pages,
2043                                         struct f2fs_map_blocks *map,
2044                                         struct bio **bio_ret,
2045                                         sector_t *last_block_in_bio,
2046                                         bool is_readahead)
2047 {
2048         struct bio *bio = *bio_ret;
2049         const unsigned blocksize = blks_to_bytes(inode, 1);
2050         sector_t block_in_file;
2051         sector_t last_block;
2052         sector_t last_block_in_file;
2053         sector_t block_nr;
2054         int ret = 0;
2055
2056         block_in_file = (sector_t)page_index(page);
2057         last_block = block_in_file + nr_pages;
2058         last_block_in_file = bytes_to_blks(inode,
2059                         f2fs_readpage_limit(inode) + blocksize - 1);
2060         if (last_block > last_block_in_file)
2061                 last_block = last_block_in_file;
2062
2063         /* just zeroing out page which is beyond EOF */
2064         if (block_in_file >= last_block)
2065                 goto zero_out;
2066         /*
2067          * Map blocks using the previous result first.
2068          */
2069         if ((map->m_flags & F2FS_MAP_MAPPED) &&
2070                         block_in_file > map->m_lblk &&
2071                         block_in_file < (map->m_lblk + map->m_len))
2072                 goto got_it;
2073
2074         /*
2075          * Then do more f2fs_map_blocks() calls until we are
2076          * done with this page.
2077          */
2078         map->m_lblk = block_in_file;
2079         map->m_len = last_block - block_in_file;
2080
2081         ret = f2fs_map_blocks(inode, map, 0, F2FS_GET_BLOCK_DEFAULT);
2082         if (ret)
2083                 goto out;
2084 got_it:
2085         if ((map->m_flags & F2FS_MAP_MAPPED)) {
2086                 block_nr = map->m_pblk + block_in_file - map->m_lblk;
2087                 SetPageMappedToDisk(page);
2088
2089                 if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr,
2090                                                 DATA_GENERIC_ENHANCE_READ)) {
2091                         ret = -EFSCORRUPTED;
2092                         f2fs_handle_error(F2FS_I_SB(inode),
2093                                                 ERROR_INVALID_BLKADDR);
2094                         goto out;
2095                 }
2096         } else {
2097 zero_out:
2098                 zero_user_segment(page, 0, PAGE_SIZE);
2099                 if (f2fs_need_verity(inode, page->index) &&
2100                     !fsverity_verify_page(page)) {
2101                         ret = -EIO;
2102                         goto out;
2103                 }
2104                 if (!PageUptodate(page))
2105                         SetPageUptodate(page);
2106                 unlock_page(page);
2107                 goto out;
2108         }
2109
2110         /*
2111          * This page will go to BIO.  Do we need to send this
2112          * BIO off first?
2113          */
2114         if (bio && (!page_is_mergeable(F2FS_I_SB(inode), bio,
2115                                        *last_block_in_bio, block_nr) ||
2116                     !f2fs_crypt_mergeable_bio(bio, inode, page->index, NULL))) {
2117 submit_and_realloc:
2118                 __submit_bio(F2FS_I_SB(inode), bio, DATA);
2119                 bio = NULL;
2120         }
2121         if (bio == NULL) {
2122                 bio = f2fs_grab_read_bio(inode, block_nr, nr_pages,
2123                                 is_readahead ? REQ_RAHEAD : 0, page->index,
2124                                 false);
2125                 if (IS_ERR(bio)) {
2126                         ret = PTR_ERR(bio);
2127                         bio = NULL;
2128                         goto out;
2129                 }
2130         }
2131
2132         /*
2133          * If the page is under writeback, we need to wait for
2134          * its completion to see the correct decrypted data.
2135          */
2136         f2fs_wait_on_block_writeback(inode, block_nr);
2137
2138         if (bio_add_page(bio, page, blocksize, 0) < blocksize)
2139                 goto submit_and_realloc;
2140
2141         inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA);
2142         f2fs_update_iostat(F2FS_I_SB(inode), NULL, FS_DATA_READ_IO,
2143                                                         F2FS_BLKSIZE);
2144         ClearPageError(page);
2145         *last_block_in_bio = block_nr;
2146         goto out;
2147 out:
2148         *bio_ret = bio;
2149         return ret;
2150 }
2151
2152 #ifdef CONFIG_F2FS_FS_COMPRESSION
2153 int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
2154                                 unsigned nr_pages, sector_t *last_block_in_bio,
2155                                 bool is_readahead, bool for_write)
2156 {
2157         struct dnode_of_data dn;
2158         struct inode *inode = cc->inode;
2159         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2160         struct bio *bio = *bio_ret;
2161         unsigned int start_idx = cc->cluster_idx << cc->log_cluster_size;
2162         sector_t last_block_in_file;
2163         const unsigned blocksize = blks_to_bytes(inode, 1);
2164         struct decompress_io_ctx *dic = NULL;
2165         struct extent_info ei = {0, };
2166         bool from_dnode = true;
2167         int i;
2168         int ret = 0;
2169
2170         f2fs_bug_on(sbi, f2fs_cluster_is_empty(cc));
2171
2172         last_block_in_file = bytes_to_blks(inode,
2173                         f2fs_readpage_limit(inode) + blocksize - 1);
2174
2175         /* get rid of pages beyond EOF */
2176         for (i = 0; i < cc->cluster_size; i++) {
2177                 struct page *page = cc->rpages[i];
2178
2179                 if (!page)
2180                         continue;
2181                 if ((sector_t)page->index >= last_block_in_file) {
2182                         zero_user_segment(page, 0, PAGE_SIZE);
2183                         if (!PageUptodate(page))
2184                                 SetPageUptodate(page);
2185                 } else if (!PageUptodate(page)) {
2186                         continue;
2187                 }
2188                 unlock_page(page);
2189                 if (for_write)
2190                         put_page(page);
2191                 cc->rpages[i] = NULL;
2192                 cc->nr_rpages--;
2193         }
2194
2195         /* we are done since all pages are beyond EOF */
2196         if (f2fs_cluster_is_empty(cc))
2197                 goto out;
2198
2199         if (f2fs_lookup_extent_cache(inode, start_idx, &ei))
2200                 from_dnode = false;
2201
2202         if (!from_dnode)
2203                 goto skip_reading_dnode;
2204
2205         set_new_dnode(&dn, inode, NULL, NULL, 0);
2206         ret = f2fs_get_dnode_of_data(&dn, start_idx, LOOKUP_NODE);
2207         if (ret)
2208                 goto out;
2209
2210         f2fs_bug_on(sbi, dn.data_blkaddr != COMPRESS_ADDR);
2211
2212 skip_reading_dnode:
2213         for (i = 1; i < cc->cluster_size; i++) {
2214                 block_t blkaddr;
2215
2216                 blkaddr = from_dnode ? data_blkaddr(dn.inode, dn.node_page,
2217                                         dn.ofs_in_node + i) :
2218                                         ei.blk + i - 1;
2219
2220                 if (!__is_valid_data_blkaddr(blkaddr))
2221                         break;
2222
2223                 if (!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC)) {
2224                         ret = -EFAULT;
2225                         goto out_put_dnode;
2226                 }
2227                 cc->nr_cpages++;
2228
2229                 if (!from_dnode && i >= ei.c_len)
2230                         break;
2231         }
2232
2233         /* nothing to decompress */
2234         if (cc->nr_cpages == 0) {
2235                 ret = 0;
2236                 goto out_put_dnode;
2237         }
2238
2239         dic = f2fs_alloc_dic(cc);
2240         if (IS_ERR(dic)) {
2241                 ret = PTR_ERR(dic);
2242                 goto out_put_dnode;
2243         }
2244
2245         for (i = 0; i < cc->nr_cpages; i++) {
2246                 struct page *page = dic->cpages[i];
2247                 block_t blkaddr;
2248                 struct bio_post_read_ctx *ctx;
2249
2250                 blkaddr = from_dnode ? data_blkaddr(dn.inode, dn.node_page,
2251                                         dn.ofs_in_node + i + 1) :
2252                                         ei.blk + i;
2253
2254                 f2fs_wait_on_block_writeback(inode, blkaddr);
2255
2256                 if (f2fs_load_compressed_page(sbi, page, blkaddr)) {
2257                         if (atomic_dec_and_test(&dic->remaining_pages))
2258                                 f2fs_decompress_cluster(dic, true);
2259                         continue;
2260                 }
2261
2262                 if (bio && (!page_is_mergeable(sbi, bio,
2263                                         *last_block_in_bio, blkaddr) ||
2264                     !f2fs_crypt_mergeable_bio(bio, inode, page->index, NULL))) {
2265 submit_and_realloc:
2266                         __submit_bio(sbi, bio, DATA);
2267                         bio = NULL;
2268                 }
2269
2270                 if (!bio) {
2271                         bio = f2fs_grab_read_bio(inode, blkaddr, nr_pages,
2272                                         is_readahead ? REQ_RAHEAD : 0,
2273                                         page->index, for_write);
2274                         if (IS_ERR(bio)) {
2275                                 ret = PTR_ERR(bio);
2276                                 f2fs_decompress_end_io(dic, ret, true);
2277                                 f2fs_put_dnode(&dn);
2278                                 *bio_ret = NULL;
2279                                 return ret;
2280                         }
2281                 }
2282
2283                 if (bio_add_page(bio, page, blocksize, 0) < blocksize)
2284                         goto submit_and_realloc;
2285
2286                 ctx = get_post_read_ctx(bio);
2287                 ctx->enabled_steps |= STEP_DECOMPRESS;
2288                 refcount_inc(&dic->refcnt);
2289
2290                 inc_page_count(sbi, F2FS_RD_DATA);
2291                 f2fs_update_iostat(sbi, inode, FS_DATA_READ_IO, F2FS_BLKSIZE);
2292                 ClearPageError(page);
2293                 *last_block_in_bio = blkaddr;
2294         }
2295
2296         if (from_dnode)
2297                 f2fs_put_dnode(&dn);
2298
2299         *bio_ret = bio;
2300         return 0;
2301
2302 out_put_dnode:
2303         if (from_dnode)
2304                 f2fs_put_dnode(&dn);
2305 out:
2306         for (i = 0; i < cc->cluster_size; i++) {
2307                 if (cc->rpages[i]) {
2308                         ClearPageUptodate(cc->rpages[i]);
2309                         ClearPageError(cc->rpages[i]);
2310                         unlock_page(cc->rpages[i]);
2311                 }
2312         }
2313         *bio_ret = bio;
2314         return ret;
2315 }
2316 #endif
2317
2318 /*
2319  * This function was originally taken from fs/mpage.c, and customized for f2fs.
2320  * Major change was from block_size == page_size in f2fs by default.
2321  */
2322 static int f2fs_mpage_readpages(struct inode *inode,
2323                 struct readahead_control *rac, struct page *page)
2324 {
2325         struct bio *bio = NULL;
2326         sector_t last_block_in_bio = 0;
2327         struct f2fs_map_blocks map;
2328 #ifdef CONFIG_F2FS_FS_COMPRESSION
2329         struct compress_ctx cc = {
2330                 .inode = inode,
2331                 .log_cluster_size = F2FS_I(inode)->i_log_cluster_size,
2332                 .cluster_size = F2FS_I(inode)->i_cluster_size,
2333                 .cluster_idx = NULL_CLUSTER,
2334                 .rpages = NULL,
2335                 .cpages = NULL,
2336                 .nr_rpages = 0,
2337                 .nr_cpages = 0,
2338         };
2339         pgoff_t nc_cluster_idx = NULL_CLUSTER;
2340 #endif
2341         unsigned nr_pages = rac ? readahead_count(rac) : 1;
2342         unsigned max_nr_pages = nr_pages;
2343         int ret = 0;
2344
2345         map.m_pblk = 0;
2346         map.m_lblk = 0;
2347         map.m_len = 0;
2348         map.m_flags = 0;
2349         map.m_next_pgofs = NULL;
2350         map.m_next_extent = NULL;
2351         map.m_seg_type = NO_CHECK_TYPE;
2352         map.m_may_create = false;
2353
2354         for (; nr_pages; nr_pages--) {
2355                 if (rac) {
2356                         page = readahead_page(rac);
2357                         prefetchw(&page->flags);
2358                 }
2359
2360 #ifdef CONFIG_F2FS_FS_COMPRESSION
2361                 if (f2fs_compressed_file(inode)) {
2362                         /* there are remained comressed pages, submit them */
2363                         if (!f2fs_cluster_can_merge_page(&cc, page->index)) {
2364                                 ret = f2fs_read_multi_pages(&cc, &bio,
2365                                                         max_nr_pages,
2366                                                         &last_block_in_bio,
2367                                                         rac != NULL, false);
2368                                 f2fs_destroy_compress_ctx(&cc, false);
2369                                 if (ret)
2370                                         goto set_error_page;
2371                         }
2372                         if (cc.cluster_idx == NULL_CLUSTER) {
2373                                 if (nc_cluster_idx ==
2374                                         page->index >> cc.log_cluster_size) {
2375                                         goto read_single_page;
2376                                 }
2377
2378                                 ret = f2fs_is_compressed_cluster(inode, page->index);
2379                                 if (ret < 0)
2380                                         goto set_error_page;
2381                                 else if (!ret) {
2382                                         nc_cluster_idx =
2383                                                 page->index >> cc.log_cluster_size;
2384                                         goto read_single_page;
2385                                 }
2386
2387                                 nc_cluster_idx = NULL_CLUSTER;
2388                         }
2389                         ret = f2fs_init_compress_ctx(&cc);
2390                         if (ret)
2391                                 goto set_error_page;
2392
2393                         f2fs_compress_ctx_add_page(&cc, page);
2394
2395                         goto next_page;
2396                 }
2397 read_single_page:
2398 #endif
2399
2400                 ret = f2fs_read_single_page(inode, page, max_nr_pages, &map,
2401                                         &bio, &last_block_in_bio, rac);
2402                 if (ret) {
2403 #ifdef CONFIG_F2FS_FS_COMPRESSION
2404 set_error_page:
2405 #endif
2406                         SetPageError(page);
2407                         zero_user_segment(page, 0, PAGE_SIZE);
2408                         unlock_page(page);
2409                 }
2410 #ifdef CONFIG_F2FS_FS_COMPRESSION
2411 next_page:
2412 #endif
2413                 if (rac)
2414                         put_page(page);
2415
2416 #ifdef CONFIG_F2FS_FS_COMPRESSION
2417                 if (f2fs_compressed_file(inode)) {
2418                         /* last page */
2419                         if (nr_pages == 1 && !f2fs_cluster_is_empty(&cc)) {
2420                                 ret = f2fs_read_multi_pages(&cc, &bio,
2421                                                         max_nr_pages,
2422                                                         &last_block_in_bio,
2423                                                         rac != NULL, false);
2424                                 f2fs_destroy_compress_ctx(&cc, false);
2425                         }
2426                 }
2427 #endif
2428         }
2429         if (bio)
2430                 __submit_bio(F2FS_I_SB(inode), bio, DATA);
2431         return ret;
2432 }
2433
2434 static int f2fs_read_data_folio(struct file *file, struct folio *folio)
2435 {
2436         struct page *page = &folio->page;
2437         struct inode *inode = page_file_mapping(page)->host;
2438         int ret = -EAGAIN;
2439
2440         trace_f2fs_readpage(page, DATA);
2441
2442         if (!f2fs_is_compress_backend_ready(inode)) {
2443                 unlock_page(page);
2444                 return -EOPNOTSUPP;
2445         }
2446
2447         /* If the file has inline data, try to read it directly */
2448         if (f2fs_has_inline_data(inode))
2449                 ret = f2fs_read_inline_data(inode, page);
2450         if (ret == -EAGAIN)
2451                 ret = f2fs_mpage_readpages(inode, NULL, page);
2452         return ret;
2453 }
2454
2455 static void f2fs_readahead(struct readahead_control *rac)
2456 {
2457         struct inode *inode = rac->mapping->host;
2458
2459         trace_f2fs_readpages(inode, readahead_index(rac), readahead_count(rac));
2460
2461         if (!f2fs_is_compress_backend_ready(inode))
2462                 return;
2463
2464         /* If the file has inline data, skip readahead */
2465         if (f2fs_has_inline_data(inode))
2466                 return;
2467
2468         f2fs_mpage_readpages(inode, rac, NULL);
2469 }
2470
2471 int f2fs_encrypt_one_page(struct f2fs_io_info *fio)
2472 {
2473         struct inode *inode = fio->page->mapping->host;
2474         struct page *mpage, *page;
2475         gfp_t gfp_flags = GFP_NOFS;
2476
2477         if (!f2fs_encrypted_file(inode))
2478                 return 0;
2479
2480         page = fio->compressed_page ? fio->compressed_page : fio->page;
2481
2482         /* wait for GCed page writeback via META_MAPPING */
2483         f2fs_wait_on_block_writeback(inode, fio->old_blkaddr);
2484
2485         if (fscrypt_inode_uses_inline_crypto(inode))
2486                 return 0;
2487
2488 retry_encrypt:
2489         fio->encrypted_page = fscrypt_encrypt_pagecache_blocks(page,
2490                                         PAGE_SIZE, 0, gfp_flags);
2491         if (IS_ERR(fio->encrypted_page)) {
2492                 /* flush pending IOs and wait for a while in the ENOMEM case */
2493                 if (PTR_ERR(fio->encrypted_page) == -ENOMEM) {
2494                         f2fs_flush_merged_writes(fio->sbi);
2495                         memalloc_retry_wait(GFP_NOFS);
2496                         gfp_flags |= __GFP_NOFAIL;
2497                         goto retry_encrypt;
2498                 }
2499                 return PTR_ERR(fio->encrypted_page);
2500         }
2501
2502         mpage = find_lock_page(META_MAPPING(fio->sbi), fio->old_blkaddr);
2503         if (mpage) {
2504                 if (PageUptodate(mpage))
2505                         memcpy(page_address(mpage),
2506                                 page_address(fio->encrypted_page), PAGE_SIZE);
2507                 f2fs_put_page(mpage, 1);
2508         }
2509         return 0;
2510 }
2511
2512 static inline bool check_inplace_update_policy(struct inode *inode,
2513                                 struct f2fs_io_info *fio)
2514 {
2515         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2516         unsigned int policy = SM_I(sbi)->ipu_policy;
2517
2518         if (policy & (0x1 << F2FS_IPU_HONOR_OPU_WRITE) &&
2519                         is_inode_flag_set(inode, FI_OPU_WRITE))
2520                 return false;
2521         if (policy & (0x1 << F2FS_IPU_FORCE))
2522                 return true;
2523         if (policy & (0x1 << F2FS_IPU_SSR) && f2fs_need_SSR(sbi))
2524                 return true;
2525         if (policy & (0x1 << F2FS_IPU_UTIL) &&
2526                         utilization(sbi) > SM_I(sbi)->min_ipu_util)
2527                 return true;
2528         if (policy & (0x1 << F2FS_IPU_SSR_UTIL) && f2fs_need_SSR(sbi) &&
2529                         utilization(sbi) > SM_I(sbi)->min_ipu_util)
2530                 return true;
2531
2532         /*
2533          * IPU for rewrite async pages
2534          */
2535         if (policy & (0x1 << F2FS_IPU_ASYNC) &&
2536                         fio && fio->op == REQ_OP_WRITE &&
2537                         !(fio->op_flags & REQ_SYNC) &&
2538                         !IS_ENCRYPTED(inode))
2539                 return true;
2540
2541         /* this is only set during fdatasync */
2542         if (policy & (0x1 << F2FS_IPU_FSYNC) &&
2543                         is_inode_flag_set(inode, FI_NEED_IPU))
2544                 return true;
2545
2546         if (unlikely(fio && is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
2547                         !f2fs_is_checkpointed_data(sbi, fio->old_blkaddr)))
2548                 return true;
2549
2550         return false;
2551 }
2552
2553 bool f2fs_should_update_inplace(struct inode *inode, struct f2fs_io_info *fio)
2554 {
2555         /* swap file is migrating in aligned write mode */
2556         if (is_inode_flag_set(inode, FI_ALIGNED_WRITE))
2557                 return false;
2558
2559         if (f2fs_is_pinned_file(inode))
2560                 return true;
2561
2562         /* if this is cold file, we should overwrite to avoid fragmentation */
2563         if (file_is_cold(inode) && !is_inode_flag_set(inode, FI_OPU_WRITE))
2564                 return true;
2565
2566         return check_inplace_update_policy(inode, fio);
2567 }
2568
2569 bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
2570 {
2571         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2572
2573         /* The below cases were checked when setting it. */
2574         if (f2fs_is_pinned_file(inode))
2575                 return false;
2576         if (fio && is_sbi_flag_set(sbi, SBI_NEED_FSCK))
2577                 return true;
2578         if (f2fs_lfs_mode(sbi))
2579                 return true;
2580         if (S_ISDIR(inode->i_mode))
2581                 return true;
2582         if (IS_NOQUOTA(inode))
2583                 return true;
2584         if (f2fs_is_atomic_file(inode))
2585                 return true;
2586
2587         /* swap file is migrating in aligned write mode */
2588         if (is_inode_flag_set(inode, FI_ALIGNED_WRITE))
2589                 return true;
2590
2591         if (is_inode_flag_set(inode, FI_OPU_WRITE))
2592                 return true;
2593
2594         if (fio) {
2595                 if (page_private_gcing(fio->page))
2596                         return true;
2597                 if (page_private_dummy(fio->page))
2598                         return true;
2599                 if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
2600                         f2fs_is_checkpointed_data(sbi, fio->old_blkaddr)))
2601                         return true;
2602         }
2603         return false;
2604 }
2605
2606 static inline bool need_inplace_update(struct f2fs_io_info *fio)
2607 {
2608         struct inode *inode = fio->page->mapping->host;
2609
2610         if (f2fs_should_update_outplace(inode, fio))
2611                 return false;
2612
2613         return f2fs_should_update_inplace(inode, fio);
2614 }
2615
2616 int f2fs_do_write_data_page(struct f2fs_io_info *fio)
2617 {
2618         struct page *page = fio->page;
2619         struct inode *inode = page->mapping->host;
2620         struct dnode_of_data dn;
2621         struct extent_info ei = {0, };
2622         struct node_info ni;
2623         bool ipu_force = false;
2624         int err = 0;
2625
2626         /* Use COW inode to make dnode_of_data for atomic write */
2627         if (f2fs_is_atomic_file(inode))
2628                 set_new_dnode(&dn, F2FS_I(inode)->cow_inode, NULL, NULL, 0);
2629         else
2630                 set_new_dnode(&dn, inode, NULL, NULL, 0);
2631
2632         if (need_inplace_update(fio) &&
2633                         f2fs_lookup_extent_cache(inode, page->index, &ei)) {
2634                 fio->old_blkaddr = ei.blk + page->index - ei.fofs;
2635
2636                 if (!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
2637                                                 DATA_GENERIC_ENHANCE)) {
2638                         f2fs_handle_error(fio->sbi,
2639                                                 ERROR_INVALID_BLKADDR);
2640                         return -EFSCORRUPTED;
2641                 }
2642
2643                 ipu_force = true;
2644                 fio->need_lock = LOCK_DONE;
2645                 goto got_it;
2646         }
2647
2648         /* Deadlock due to between page->lock and f2fs_lock_op */
2649         if (fio->need_lock == LOCK_REQ && !f2fs_trylock_op(fio->sbi))
2650                 return -EAGAIN;
2651
2652         err = f2fs_get_dnode_of_data(&dn, page->index, LOOKUP_NODE);
2653         if (err)
2654                 goto out;
2655
2656         fio->old_blkaddr = dn.data_blkaddr;
2657
2658         /* This page is already truncated */
2659         if (fio->old_blkaddr == NULL_ADDR) {
2660                 ClearPageUptodate(page);
2661                 clear_page_private_gcing(page);
2662                 goto out_writepage;
2663         }
2664 got_it:
2665         if (__is_valid_data_blkaddr(fio->old_blkaddr) &&
2666                 !f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
2667                                                 DATA_GENERIC_ENHANCE)) {
2668                 err = -EFSCORRUPTED;
2669                 f2fs_handle_error(fio->sbi, ERROR_INVALID_BLKADDR);
2670                 goto out_writepage;
2671         }
2672
2673         /*
2674          * If current allocation needs SSR,
2675          * it had better in-place writes for updated data.
2676          */
2677         if (ipu_force ||
2678                 (__is_valid_data_blkaddr(fio->old_blkaddr) &&
2679                                         need_inplace_update(fio))) {
2680                 err = f2fs_encrypt_one_page(fio);
2681                 if (err)
2682                         goto out_writepage;
2683
2684                 set_page_writeback(page);
2685                 ClearPageError(page);
2686                 f2fs_put_dnode(&dn);
2687                 if (fio->need_lock == LOCK_REQ)
2688                         f2fs_unlock_op(fio->sbi);
2689                 err = f2fs_inplace_write_data(fio);
2690                 if (err) {
2691                         if (fscrypt_inode_uses_fs_layer_crypto(inode))
2692                                 fscrypt_finalize_bounce_page(&fio->encrypted_page);
2693                         if (PageWriteback(page))
2694                                 end_page_writeback(page);
2695                 } else {
2696                         set_inode_flag(inode, FI_UPDATE_WRITE);
2697                 }
2698                 trace_f2fs_do_write_data_page(fio->page, IPU);
2699                 return err;
2700         }
2701
2702         if (fio->need_lock == LOCK_RETRY) {
2703                 if (!f2fs_trylock_op(fio->sbi)) {
2704                         err = -EAGAIN;
2705                         goto out_writepage;
2706                 }
2707                 fio->need_lock = LOCK_REQ;
2708         }
2709
2710         err = f2fs_get_node_info(fio->sbi, dn.nid, &ni, false);
2711         if (err)
2712                 goto out_writepage;
2713
2714         fio->version = ni.version;
2715
2716         err = f2fs_encrypt_one_page(fio);
2717         if (err)
2718                 goto out_writepage;
2719
2720         set_page_writeback(page);
2721         ClearPageError(page);
2722
2723         if (fio->compr_blocks && fio->old_blkaddr == COMPRESS_ADDR)
2724                 f2fs_i_compr_blocks_update(inode, fio->compr_blocks - 1, false);
2725
2726         /* LFS mode write path */
2727         f2fs_outplace_write_data(&dn, fio);
2728         trace_f2fs_do_write_data_page(page, OPU);
2729         set_inode_flag(inode, FI_APPEND_WRITE);
2730         if (page->index == 0)
2731                 set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN);
2732 out_writepage:
2733         f2fs_put_dnode(&dn);
2734 out:
2735         if (fio->need_lock == LOCK_REQ)
2736                 f2fs_unlock_op(fio->sbi);
2737         return err;
2738 }
2739
2740 int f2fs_write_single_data_page(struct page *page, int *submitted,
2741                                 struct bio **bio,
2742                                 sector_t *last_block,
2743                                 struct writeback_control *wbc,
2744                                 enum iostat_type io_type,
2745                                 int compr_blocks,
2746                                 bool allow_balance)
2747 {
2748         struct inode *inode = page->mapping->host;
2749         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2750         loff_t i_size = i_size_read(inode);
2751         const pgoff_t end_index = ((unsigned long long)i_size)
2752                                                         >> PAGE_SHIFT;
2753         loff_t psize = (loff_t)(page->index + 1) << PAGE_SHIFT;
2754         unsigned offset = 0;
2755         bool need_balance_fs = false;
2756         int err = 0;
2757         struct f2fs_io_info fio = {
2758                 .sbi = sbi,
2759                 .ino = inode->i_ino,
2760                 .type = DATA,
2761                 .op = REQ_OP_WRITE,
2762                 .op_flags = wbc_to_write_flags(wbc),
2763                 .old_blkaddr = NULL_ADDR,
2764                 .page = page,
2765                 .encrypted_page = NULL,
2766                 .submitted = false,
2767                 .compr_blocks = compr_blocks,
2768                 .need_lock = LOCK_RETRY,
2769                 .post_read = f2fs_post_read_required(inode),
2770                 .io_type = io_type,
2771                 .io_wbc = wbc,
2772                 .bio = bio,
2773                 .last_block = last_block,
2774         };
2775
2776         trace_f2fs_writepage(page, DATA);
2777
2778         /* we should bypass data pages to proceed the kworkder jobs */
2779         if (unlikely(f2fs_cp_error(sbi))) {
2780                 mapping_set_error(page->mapping, -EIO);
2781                 /*
2782                  * don't drop any dirty dentry pages for keeping lastest
2783                  * directory structure.
2784                  */
2785                 if (S_ISDIR(inode->i_mode))
2786                         goto redirty_out;
2787                 goto out;
2788         }
2789
2790         if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
2791                 goto redirty_out;
2792
2793         if (page->index < end_index ||
2794                         f2fs_verity_in_progress(inode) ||
2795                         compr_blocks)
2796                 goto write;
2797
2798         /*
2799          * If the offset is out-of-range of file size,
2800          * this page does not have to be written to disk.
2801          */
2802         offset = i_size & (PAGE_SIZE - 1);
2803         if ((page->index >= end_index + 1) || !offset)
2804                 goto out;
2805
2806         zero_user_segment(page, offset, PAGE_SIZE);
2807 write:
2808         if (f2fs_is_drop_cache(inode))
2809                 goto out;
2810
2811         /* Dentry/quota blocks are controlled by checkpoint */
2812         if (S_ISDIR(inode->i_mode) || IS_NOQUOTA(inode)) {
2813                 /*
2814                  * We need to wait for node_write to avoid block allocation during
2815                  * checkpoint. This can only happen to quota writes which can cause
2816                  * the below discard race condition.
2817                  */
2818                 if (IS_NOQUOTA(inode))
2819                         f2fs_down_read(&sbi->node_write);
2820
2821                 fio.need_lock = LOCK_DONE;
2822                 err = f2fs_do_write_data_page(&fio);
2823
2824                 if (IS_NOQUOTA(inode))
2825                         f2fs_up_read(&sbi->node_write);
2826
2827                 goto done;
2828         }
2829
2830         if (!wbc->for_reclaim)
2831                 need_balance_fs = true;
2832         else if (has_not_enough_free_secs(sbi, 0, 0))
2833                 goto redirty_out;
2834         else
2835                 set_inode_flag(inode, FI_HOT_DATA);
2836
2837         err = -EAGAIN;
2838         if (f2fs_has_inline_data(inode)) {
2839                 err = f2fs_write_inline_data(inode, page);
2840                 if (!err)
2841                         goto out;
2842         }
2843
2844         if (err == -EAGAIN) {
2845                 err = f2fs_do_write_data_page(&fio);
2846                 if (err == -EAGAIN) {
2847                         fio.need_lock = LOCK_REQ;
2848                         err = f2fs_do_write_data_page(&fio);
2849                 }
2850         }
2851
2852         if (err) {
2853                 file_set_keep_isize(inode);
2854         } else {
2855                 spin_lock(&F2FS_I(inode)->i_size_lock);
2856                 if (F2FS_I(inode)->last_disk_size < psize)
2857                         F2FS_I(inode)->last_disk_size = psize;
2858                 spin_unlock(&F2FS_I(inode)->i_size_lock);
2859         }
2860
2861 done:
2862         if (err && err != -ENOENT)
2863                 goto redirty_out;
2864
2865 out:
2866         inode_dec_dirty_pages(inode);
2867         if (err) {
2868                 ClearPageUptodate(page);
2869                 clear_page_private_gcing(page);
2870         }
2871
2872         if (wbc->for_reclaim) {
2873                 f2fs_submit_merged_write_cond(sbi, NULL, page, 0, DATA);
2874                 clear_inode_flag(inode, FI_HOT_DATA);
2875                 f2fs_remove_dirty_inode(inode);
2876                 submitted = NULL;
2877         }
2878         unlock_page(page);
2879         if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode) &&
2880                         !F2FS_I(inode)->wb_task && allow_balance)
2881                 f2fs_balance_fs(sbi, need_balance_fs);
2882
2883         if (unlikely(f2fs_cp_error(sbi))) {
2884                 f2fs_submit_merged_write(sbi, DATA);
2885                 f2fs_submit_merged_ipu_write(sbi, bio, NULL);
2886                 submitted = NULL;
2887         }
2888
2889         if (submitted)
2890                 *submitted = fio.submitted ? 1 : 0;
2891
2892         return 0;
2893
2894 redirty_out:
2895         redirty_page_for_writepage(wbc, page);
2896         /*
2897          * pageout() in MM traslates EAGAIN, so calls handle_write_error()
2898          * -> mapping_set_error() -> set_bit(AS_EIO, ...).
2899          * file_write_and_wait_range() will see EIO error, which is critical
2900          * to return value of fsync() followed by atomic_write failure to user.
2901          */
2902         if (!err || wbc->for_reclaim)
2903                 return AOP_WRITEPAGE_ACTIVATE;
2904         unlock_page(page);
2905         return err;
2906 }
2907
2908 static int f2fs_write_data_page(struct page *page,
2909                                         struct writeback_control *wbc)
2910 {
2911 #ifdef CONFIG_F2FS_FS_COMPRESSION
2912         struct inode *inode = page->mapping->host;
2913
2914         if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
2915                 goto out;
2916
2917         if (f2fs_compressed_file(inode)) {
2918                 if (f2fs_is_compressed_cluster(inode, page->index)) {
2919                         redirty_page_for_writepage(wbc, page);
2920                         return AOP_WRITEPAGE_ACTIVATE;
2921                 }
2922         }
2923 out:
2924 #endif
2925
2926         return f2fs_write_single_data_page(page, NULL, NULL, NULL,
2927                                                 wbc, FS_DATA_IO, 0, true);
2928 }
2929
2930 /*
2931  * This function was copied from write_cche_pages from mm/page-writeback.c.
2932  * The major change is making write step of cold data page separately from
2933  * warm/hot data page.
2934  */
2935 static int f2fs_write_cache_pages(struct address_space *mapping,
2936                                         struct writeback_control *wbc,
2937                                         enum iostat_type io_type)
2938 {
2939         int ret = 0;
2940         int done = 0, retry = 0;
2941         struct page *pages[F2FS_ONSTACK_PAGES];
2942         struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
2943         struct bio *bio = NULL;
2944         sector_t last_block;
2945 #ifdef CONFIG_F2FS_FS_COMPRESSION
2946         struct inode *inode = mapping->host;
2947         struct compress_ctx cc = {
2948                 .inode = inode,
2949                 .log_cluster_size = F2FS_I(inode)->i_log_cluster_size,
2950                 .cluster_size = F2FS_I(inode)->i_cluster_size,
2951                 .cluster_idx = NULL_CLUSTER,
2952                 .rpages = NULL,
2953                 .nr_rpages = 0,
2954                 .cpages = NULL,
2955                 .valid_nr_cpages = 0,
2956                 .rbuf = NULL,
2957                 .cbuf = NULL,
2958                 .rlen = PAGE_SIZE * F2FS_I(inode)->i_cluster_size,
2959                 .private = NULL,
2960         };
2961 #endif
2962         int nr_pages;
2963         pgoff_t index;
2964         pgoff_t end;            /* Inclusive */
2965         pgoff_t done_index;
2966         int range_whole = 0;
2967         xa_mark_t tag;
2968         int nwritten = 0;
2969         int submitted = 0;
2970         int i;
2971
2972         if (get_dirty_pages(mapping->host) <=
2973                                 SM_I(F2FS_M_SB(mapping))->min_hot_blocks)
2974                 set_inode_flag(mapping->host, FI_HOT_DATA);
2975         else
2976                 clear_inode_flag(mapping->host, FI_HOT_DATA);
2977
2978         if (wbc->range_cyclic) {
2979                 index = mapping->writeback_index; /* prev offset */
2980                 end = -1;
2981         } else {
2982                 index = wbc->range_start >> PAGE_SHIFT;
2983                 end = wbc->range_end >> PAGE_SHIFT;
2984                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2985                         range_whole = 1;
2986         }
2987         if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
2988                 tag = PAGECACHE_TAG_TOWRITE;
2989         else
2990                 tag = PAGECACHE_TAG_DIRTY;
2991 retry:
2992         retry = 0;
2993         if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
2994                 tag_pages_for_writeback(mapping, index, end);
2995         done_index = index;
2996         while (!done && !retry && (index <= end)) {
2997                 nr_pages = find_get_pages_range_tag(mapping, &index, end,
2998                                 tag, F2FS_ONSTACK_PAGES, pages);
2999                 if (nr_pages == 0)
3000                         break;
3001
3002                 for (i = 0; i < nr_pages; i++) {
3003                         struct page *page = pages[i];
3004                         bool need_readd;
3005 readd:
3006                         need_readd = false;
3007 #ifdef CONFIG_F2FS_FS_COMPRESSION
3008                         if (f2fs_compressed_file(inode)) {
3009                                 void *fsdata = NULL;
3010                                 struct page *pagep;
3011                                 int ret2;
3012
3013                                 ret = f2fs_init_compress_ctx(&cc);
3014                                 if (ret) {
3015                                         done = 1;
3016                                         break;
3017                                 }
3018
3019                                 if (!f2fs_cluster_can_merge_page(&cc,
3020                                                                 page->index)) {
3021                                         ret = f2fs_write_multi_pages(&cc,
3022                                                 &submitted, wbc, io_type);
3023                                         if (!ret)
3024                                                 need_readd = true;
3025                                         goto result;
3026                                 }
3027
3028                                 if (unlikely(f2fs_cp_error(sbi)))
3029                                         goto lock_page;
3030
3031                                 if (!f2fs_cluster_is_empty(&cc))
3032                                         goto lock_page;
3033
3034                                 if (f2fs_all_cluster_page_ready(&cc,
3035                                         pages, i, nr_pages, true))
3036                                         goto lock_page;
3037
3038                                 ret2 = f2fs_prepare_compress_overwrite(
3039                                                         inode, &pagep,
3040                                                         page->index, &fsdata);
3041                                 if (ret2 < 0) {
3042                                         ret = ret2;
3043                                         done = 1;
3044                                         break;
3045                                 } else if (ret2 &&
3046                                         (!f2fs_compress_write_end(inode,
3047                                                 fsdata, page->index, 1) ||
3048                                          !f2fs_all_cluster_page_ready(&cc,
3049                                                 pages, i, nr_pages, false))) {
3050                                         retry = 1;
3051                                         break;
3052                                 }
3053                         }
3054 #endif
3055                         /* give a priority to WB_SYNC threads */
3056                         if (atomic_read(&sbi->wb_sync_req[DATA]) &&
3057                                         wbc->sync_mode == WB_SYNC_NONE) {
3058                                 done = 1;
3059                                 break;
3060                         }
3061 #ifdef CONFIG_F2FS_FS_COMPRESSION
3062 lock_page:
3063 #endif
3064                         done_index = page->index;
3065 retry_write:
3066                         lock_page(page);
3067
3068                         if (unlikely(page->mapping != mapping)) {
3069 continue_unlock:
3070                                 unlock_page(page);
3071                                 continue;
3072                         }
3073
3074                         if (!PageDirty(page)) {
3075                                 /* someone wrote it for us */
3076                                 goto continue_unlock;
3077                         }
3078
3079                         if (PageWriteback(page)) {
3080                                 if (wbc->sync_mode != WB_SYNC_NONE)
3081                                         f2fs_wait_on_page_writeback(page,
3082                                                         DATA, true, true);
3083                                 else
3084                                         goto continue_unlock;
3085                         }
3086
3087                         if (!clear_page_dirty_for_io(page))
3088                                 goto continue_unlock;
3089
3090 #ifdef CONFIG_F2FS_FS_COMPRESSION
3091                         if (f2fs_compressed_file(inode)) {
3092                                 get_page(page);
3093                                 f2fs_compress_ctx_add_page(&cc, page);
3094                                 continue;
3095                         }
3096 #endif
3097                         ret = f2fs_write_single_data_page(page, &submitted,
3098                                         &bio, &last_block, wbc, io_type,
3099                                         0, true);
3100                         if (ret == AOP_WRITEPAGE_ACTIVATE)
3101                                 unlock_page(page);
3102 #ifdef CONFIG_F2FS_FS_COMPRESSION
3103 result:
3104 #endif
3105                         nwritten += submitted;
3106                         wbc->nr_to_write -= submitted;
3107
3108                         if (unlikely(ret)) {
3109                                 /*
3110                                  * keep nr_to_write, since vfs uses this to
3111                                  * get # of written pages.
3112                                  */
3113                                 if (ret == AOP_WRITEPAGE_ACTIVATE) {
3114                                         ret = 0;
3115                                         goto next;
3116                                 } else if (ret == -EAGAIN) {
3117                                         ret = 0;
3118                                         if (wbc->sync_mode == WB_SYNC_ALL) {
3119                                                 f2fs_io_schedule_timeout(
3120                                                         DEFAULT_IO_TIMEOUT);
3121                                                 goto retry_write;
3122                                         }
3123                                         goto next;
3124                                 }
3125                                 done_index = page->index + 1;
3126                                 done = 1;
3127                                 break;
3128                         }
3129
3130                         if (wbc->nr_to_write <= 0 &&
3131                                         wbc->sync_mode == WB_SYNC_NONE) {
3132                                 done = 1;
3133                                 break;
3134                         }
3135 next:
3136                         if (need_readd)
3137                                 goto readd;
3138                 }
3139                 release_pages(pages, nr_pages);
3140                 cond_resched();
3141         }
3142 #ifdef CONFIG_F2FS_FS_COMPRESSION
3143         /* flush remained pages in compress cluster */
3144         if (f2fs_compressed_file(inode) && !f2fs_cluster_is_empty(&cc)) {
3145                 ret = f2fs_write_multi_pages(&cc, &submitted, wbc, io_type);
3146                 nwritten += submitted;
3147                 wbc->nr_to_write -= submitted;
3148                 if (ret) {
3149                         done = 1;
3150                         retry = 0;
3151                 }
3152         }
3153         if (f2fs_compressed_file(inode))
3154                 f2fs_destroy_compress_ctx(&cc, false);
3155 #endif
3156         if (retry) {
3157                 index = 0;
3158                 end = -1;
3159                 goto retry;
3160         }
3161         if (wbc->range_cyclic && !done)
3162                 done_index = 0;
3163         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
3164                 mapping->writeback_index = done_index;
3165
3166         if (nwritten)
3167                 f2fs_submit_merged_write_cond(F2FS_M_SB(mapping), mapping->host,
3168                                                                 NULL, 0, DATA);
3169         /* submit cached bio of IPU write */
3170         if (bio)
3171                 f2fs_submit_merged_ipu_write(sbi, &bio, NULL);
3172
3173         return ret;
3174 }
3175
3176 static inline bool __should_serialize_io(struct inode *inode,
3177                                         struct writeback_control *wbc)
3178 {
3179         /* to avoid deadlock in path of data flush */
3180         if (F2FS_I(inode)->wb_task)
3181                 return false;
3182
3183         if (!S_ISREG(inode->i_mode))
3184                 return false;
3185         if (IS_NOQUOTA(inode))
3186                 return false;
3187
3188         if (f2fs_need_compress_data(inode))
3189                 return true;
3190         if (wbc->sync_mode != WB_SYNC_ALL)
3191                 return true;
3192         if (get_dirty_pages(inode) >= SM_I(F2FS_I_SB(inode))->min_seq_blocks)
3193                 return true;
3194         return false;
3195 }
3196
3197 static int __f2fs_write_data_pages(struct address_space *mapping,
3198                                                 struct writeback_control *wbc,
3199                                                 enum iostat_type io_type)
3200 {
3201         struct inode *inode = mapping->host;
3202         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3203         struct blk_plug plug;
3204         int ret;
3205         bool locked = false;
3206
3207         /* deal with chardevs and other special file */
3208         if (!mapping->a_ops->writepage)
3209                 return 0;
3210
3211         /* skip writing if there is no dirty page in this inode */
3212         if (!get_dirty_pages(inode) && wbc->sync_mode == WB_SYNC_NONE)
3213                 return 0;
3214
3215         /* during POR, we don't need to trigger writepage at all. */
3216         if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
3217                 goto skip_write;
3218
3219         if ((S_ISDIR(inode->i_mode) || IS_NOQUOTA(inode)) &&
3220                         wbc->sync_mode == WB_SYNC_NONE &&
3221                         get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) &&
3222                         f2fs_available_free_memory(sbi, DIRTY_DENTS))
3223                 goto skip_write;
3224
3225         /* skip writing in file defragment preparing stage */
3226         if (is_inode_flag_set(inode, FI_SKIP_WRITES))
3227                 goto skip_write;
3228
3229         trace_f2fs_writepages(mapping->host, wbc, DATA);
3230
3231         /* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */
3232         if (wbc->sync_mode == WB_SYNC_ALL)
3233                 atomic_inc(&sbi->wb_sync_req[DATA]);
3234         else if (atomic_read(&sbi->wb_sync_req[DATA])) {
3235                 /* to avoid potential deadlock */
3236                 if (current->plug)
3237                         blk_finish_plug(current->plug);
3238                 goto skip_write;
3239         }
3240
3241         if (__should_serialize_io(inode, wbc)) {
3242                 mutex_lock(&sbi->writepages);
3243                 locked = true;
3244         }
3245
3246         blk_start_plug(&plug);
3247         ret = f2fs_write_cache_pages(mapping, wbc, io_type);
3248         blk_finish_plug(&plug);
3249
3250         if (locked)
3251                 mutex_unlock(&sbi->writepages);
3252
3253         if (wbc->sync_mode == WB_SYNC_ALL)
3254                 atomic_dec(&sbi->wb_sync_req[DATA]);
3255         /*
3256          * if some pages were truncated, we cannot guarantee its mapping->host
3257          * to detect pending bios.
3258          */
3259
3260         f2fs_remove_dirty_inode(inode);
3261         return ret;
3262
3263 skip_write:
3264         wbc->pages_skipped += get_dirty_pages(inode);
3265         trace_f2fs_writepages(mapping->host, wbc, DATA);
3266         return 0;
3267 }
3268
3269 static int f2fs_write_data_pages(struct address_space *mapping,
3270                             struct writeback_control *wbc)
3271 {
3272         struct inode *inode = mapping->host;
3273
3274         return __f2fs_write_data_pages(mapping, wbc,
3275                         F2FS_I(inode)->cp_task == current ?
3276                         FS_CP_DATA_IO : FS_DATA_IO);
3277 }
3278
3279 void f2fs_write_failed(struct inode *inode, loff_t to)
3280 {
3281         loff_t i_size = i_size_read(inode);
3282
3283         if (IS_NOQUOTA(inode))
3284                 return;
3285
3286         /* In the fs-verity case, f2fs_end_enable_verity() does the truncate */
3287         if (to > i_size && !f2fs_verity_in_progress(inode)) {
3288                 f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
3289                 filemap_invalidate_lock(inode->i_mapping);
3290
3291                 truncate_pagecache(inode, i_size);
3292                 f2fs_truncate_blocks(inode, i_size, true);
3293
3294                 filemap_invalidate_unlock(inode->i_mapping);
3295                 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
3296         }
3297 }
3298
3299 static int prepare_write_begin(struct f2fs_sb_info *sbi,
3300                         struct page *page, loff_t pos, unsigned len,
3301                         block_t *blk_addr, bool *node_changed)
3302 {
3303         struct inode *inode = page->mapping->host;
3304         pgoff_t index = page->index;
3305         struct dnode_of_data dn;
3306         struct page *ipage;
3307         bool locked = false;
3308         struct extent_info ei = {0, };
3309         int err = 0;
3310         int flag;
3311
3312         /*
3313          * If a whole page is being written and we already preallocated all the
3314          * blocks, then there is no need to get a block address now.
3315          */
3316         if (len == PAGE_SIZE && is_inode_flag_set(inode, FI_PREALLOCATED_ALL))
3317                 return 0;
3318
3319         /* f2fs_lock_op avoids race between write CP and convert_inline_page */
3320         if (f2fs_has_inline_data(inode) && pos + len > MAX_INLINE_DATA(inode))
3321                 flag = F2FS_GET_BLOCK_DEFAULT;
3322         else
3323                 flag = F2FS_GET_BLOCK_PRE_AIO;
3324
3325         if (f2fs_has_inline_data(inode) ||
3326                         (pos & PAGE_MASK) >= i_size_read(inode)) {
3327                 f2fs_do_map_lock(sbi, flag, true);
3328                 locked = true;
3329         }
3330
3331 restart:
3332         /* check inline_data */
3333         ipage = f2fs_get_node_page(sbi, inode->i_ino);
3334         if (IS_ERR(ipage)) {
3335                 err = PTR_ERR(ipage);
3336                 goto unlock_out;
3337         }
3338
3339         set_new_dnode(&dn, inode, ipage, ipage, 0);
3340
3341         if (f2fs_has_inline_data(inode)) {
3342                 if (pos + len <= MAX_INLINE_DATA(inode)) {
3343                         f2fs_do_read_inline_data(page, ipage);
3344                         set_inode_flag(inode, FI_DATA_EXIST);
3345                         if (inode->i_nlink)
3346                                 set_page_private_inline(ipage);
3347                 } else {
3348                         err = f2fs_convert_inline_page(&dn, page);
3349                         if (err)
3350                                 goto out;
3351                         if (dn.data_blkaddr == NULL_ADDR)
3352                                 err = f2fs_get_block(&dn, index);
3353                 }
3354         } else if (locked) {
3355                 err = f2fs_get_block(&dn, index);
3356         } else {
3357                 if (f2fs_lookup_extent_cache(inode, index, &ei)) {
3358                         dn.data_blkaddr = ei.blk + index - ei.fofs;
3359                 } else {
3360                         /* hole case */
3361                         err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
3362                         if (err || dn.data_blkaddr == NULL_ADDR) {
3363                                 f2fs_put_dnode(&dn);
3364                                 f2fs_do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO,
3365                                                                 true);
3366                                 WARN_ON(flag != F2FS_GET_BLOCK_PRE_AIO);
3367                                 locked = true;
3368                                 goto restart;
3369                         }
3370                 }
3371         }
3372
3373         /* convert_inline_page can make node_changed */
3374         *blk_addr = dn.data_blkaddr;
3375         *node_changed = dn.node_changed;
3376 out:
3377         f2fs_put_dnode(&dn);
3378 unlock_out:
3379         if (locked)
3380                 f2fs_do_map_lock(sbi, flag, false);
3381         return err;
3382 }
3383
3384 static int __find_data_block(struct inode *inode, pgoff_t index,
3385                                 block_t *blk_addr)
3386 {
3387         struct dnode_of_data dn;
3388         struct page *ipage;
3389         struct extent_info ei = {0, };
3390         int err = 0;
3391
3392         ipage = f2fs_get_node_page(F2FS_I_SB(inode), inode->i_ino);
3393         if (IS_ERR(ipage))
3394                 return PTR_ERR(ipage);
3395
3396         set_new_dnode(&dn, inode, ipage, ipage, 0);
3397
3398         if (f2fs_lookup_extent_cache(inode, index, &ei)) {
3399                 dn.data_blkaddr = ei.blk + index - ei.fofs;
3400         } else {
3401                 /* hole case */
3402                 err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
3403                 if (err) {
3404                         dn.data_blkaddr = NULL_ADDR;
3405                         err = 0;
3406                 }
3407         }
3408         *blk_addr = dn.data_blkaddr;
3409         f2fs_put_dnode(&dn);
3410         return err;
3411 }
3412
3413 static int __reserve_data_block(struct inode *inode, pgoff_t index,
3414                                 block_t *blk_addr, bool *node_changed)
3415 {
3416         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3417         struct dnode_of_data dn;
3418         struct page *ipage;
3419         int err = 0;
3420
3421         f2fs_do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true);
3422
3423         ipage = f2fs_get_node_page(sbi, inode->i_ino);
3424         if (IS_ERR(ipage)) {
3425                 err = PTR_ERR(ipage);
3426                 goto unlock_out;
3427         }
3428         set_new_dnode(&dn, inode, ipage, ipage, 0);
3429
3430         err = f2fs_get_block(&dn, index);
3431
3432         *blk_addr = dn.data_blkaddr;
3433         *node_changed = dn.node_changed;
3434         f2fs_put_dnode(&dn);
3435
3436 unlock_out:
3437         f2fs_do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false);
3438         return err;
3439 }
3440
3441 static int prepare_atomic_write_begin(struct f2fs_sb_info *sbi,
3442                         struct page *page, loff_t pos, unsigned int len,
3443                         block_t *blk_addr, bool *node_changed)
3444 {
3445         struct inode *inode = page->mapping->host;
3446         struct inode *cow_inode = F2FS_I(inode)->cow_inode;
3447         pgoff_t index = page->index;
3448         int err = 0;
3449         block_t ori_blk_addr = NULL_ADDR;
3450
3451         /* If pos is beyond the end of file, reserve a new block in COW inode */
3452         if ((pos & PAGE_MASK) >= i_size_read(inode))
3453                 goto reserve_block;
3454
3455         /* Look for the block in COW inode first */
3456         err = __find_data_block(cow_inode, index, blk_addr);
3457         if (err)
3458                 return err;
3459         else if (*blk_addr != NULL_ADDR)
3460                 return 0;
3461
3462         /* Look for the block in the original inode */
3463         err = __find_data_block(inode, index, &ori_blk_addr);
3464         if (err)
3465                 return err;
3466
3467 reserve_block:
3468         /* Finally, we should reserve a new block in COW inode for the update */
3469         err = __reserve_data_block(cow_inode, index, blk_addr, node_changed);
3470         if (err)
3471                 return err;
3472         inc_atomic_write_cnt(inode);
3473
3474         if (ori_blk_addr != NULL_ADDR)
3475                 *blk_addr = ori_blk_addr;
3476         return 0;
3477 }
3478
3479 static int f2fs_write_begin(struct file *file, struct address_space *mapping,
3480                 loff_t pos, unsigned len, struct page **pagep, void **fsdata)
3481 {
3482         struct inode *inode = mapping->host;
3483         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3484         struct page *page = NULL;
3485         pgoff_t index = ((unsigned long long) pos) >> PAGE_SHIFT;
3486         bool need_balance = false;
3487         block_t blkaddr = NULL_ADDR;
3488         int err = 0;
3489
3490         trace_f2fs_write_begin(inode, pos, len);
3491
3492         if (!f2fs_is_checkpoint_ready(sbi)) {
3493                 err = -ENOSPC;
3494                 goto fail;
3495         }
3496
3497         /*
3498          * We should check this at this moment to avoid deadlock on inode page
3499          * and #0 page. The locking rule for inline_data conversion should be:
3500          * lock_page(page #0) -> lock_page(inode_page)
3501          */
3502         if (index != 0) {
3503                 err = f2fs_convert_inline_inode(inode);
3504                 if (err)
3505                         goto fail;
3506         }
3507
3508 #ifdef CONFIG_F2FS_FS_COMPRESSION
3509         if (f2fs_compressed_file(inode)) {
3510                 int ret;
3511
3512                 *fsdata = NULL;
3513
3514                 if (len == PAGE_SIZE && !(f2fs_is_atomic_file(inode)))
3515                         goto repeat;
3516
3517                 ret = f2fs_prepare_compress_overwrite(inode, pagep,
3518                                                         index, fsdata);
3519                 if (ret < 0) {
3520                         err = ret;
3521                         goto fail;
3522                 } else if (ret) {
3523                         return 0;
3524                 }
3525         }
3526 #endif
3527
3528 repeat:
3529         /*
3530          * Do not use grab_cache_page_write_begin() to avoid deadlock due to
3531          * wait_for_stable_page. Will wait that below with our IO control.
3532          */
3533         page = f2fs_pagecache_get_page(mapping, index,
3534                                 FGP_LOCK | FGP_WRITE | FGP_CREAT, GFP_NOFS);
3535         if (!page) {
3536                 err = -ENOMEM;
3537                 goto fail;
3538         }
3539
3540         /* TODO: cluster can be compressed due to race with .writepage */
3541
3542         *pagep = page;
3543
3544         if (f2fs_is_atomic_file(inode))
3545                 err = prepare_atomic_write_begin(sbi, page, pos, len,
3546                                         &blkaddr, &need_balance);
3547         else
3548                 err = prepare_write_begin(sbi, page, pos, len,
3549                                         &blkaddr, &need_balance);
3550         if (err)
3551                 goto fail;
3552
3553         if (need_balance && !IS_NOQUOTA(inode) &&
3554                         has_not_enough_free_secs(sbi, 0, 0)) {
3555                 unlock_page(page);
3556                 f2fs_balance_fs(sbi, true);
3557                 lock_page(page);
3558                 if (page->mapping != mapping) {
3559                         /* The page got truncated from under us */
3560                         f2fs_put_page(page, 1);
3561                         goto repeat;
3562                 }
3563         }
3564
3565         f2fs_wait_on_page_writeback(page, DATA, false, true);
3566
3567         if (len == PAGE_SIZE || PageUptodate(page))
3568                 return 0;
3569
3570         if (!(pos & (PAGE_SIZE - 1)) && (pos + len) >= i_size_read(inode) &&
3571             !f2fs_verity_in_progress(inode)) {
3572                 zero_user_segment(page, len, PAGE_SIZE);
3573                 return 0;
3574         }
3575
3576         if (blkaddr == NEW_ADDR) {
3577                 zero_user_segment(page, 0, PAGE_SIZE);
3578                 SetPageUptodate(page);
3579         } else {
3580                 if (!f2fs_is_valid_blkaddr(sbi, blkaddr,
3581                                 DATA_GENERIC_ENHANCE_READ)) {
3582                         err = -EFSCORRUPTED;
3583                         f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR);
3584                         goto fail;
3585                 }
3586                 err = f2fs_submit_page_read(inode, page, blkaddr, 0, true);
3587                 if (err)
3588                         goto fail;
3589
3590                 lock_page(page);
3591                 if (unlikely(page->mapping != mapping)) {
3592                         f2fs_put_page(page, 1);
3593                         goto repeat;
3594                 }
3595                 if (unlikely(!PageUptodate(page))) {
3596                         err = -EIO;
3597                         goto fail;
3598                 }
3599         }
3600         return 0;
3601
3602 fail:
3603         f2fs_put_page(page, 1);
3604         f2fs_write_failed(inode, pos + len);
3605         return err;
3606 }
3607
3608 static int f2fs_write_end(struct file *file,
3609                         struct address_space *mapping,
3610                         loff_t pos, unsigned len, unsigned copied,
3611                         struct page *page, void *fsdata)
3612 {
3613         struct inode *inode = page->mapping->host;
3614
3615         trace_f2fs_write_end(inode, pos, len, copied);
3616
3617         /*
3618          * This should be come from len == PAGE_SIZE, and we expect copied
3619          * should be PAGE_SIZE. Otherwise, we treat it with zero copied and
3620          * let generic_perform_write() try to copy data again through copied=0.
3621          */
3622         if (!PageUptodate(page)) {
3623                 if (unlikely(copied != len))
3624                         copied = 0;
3625                 else
3626                         SetPageUptodate(page);
3627         }
3628
3629 #ifdef CONFIG_F2FS_FS_COMPRESSION
3630         /* overwrite compressed file */
3631         if (f2fs_compressed_file(inode) && fsdata) {
3632                 f2fs_compress_write_end(inode, fsdata, page->index, copied);
3633                 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
3634
3635                 if (pos + copied > i_size_read(inode) &&
3636                                 !f2fs_verity_in_progress(inode))
3637                         f2fs_i_size_write(inode, pos + copied);
3638                 return copied;
3639         }
3640 #endif
3641
3642         if (!copied)
3643                 goto unlock_out;
3644
3645         set_page_dirty(page);
3646
3647         if (pos + copied > i_size_read(inode) &&
3648             !f2fs_verity_in_progress(inode)) {
3649                 f2fs_i_size_write(inode, pos + copied);
3650                 if (f2fs_is_atomic_file(inode))
3651                         f2fs_i_size_write(F2FS_I(inode)->cow_inode,
3652                                         pos + copied);
3653         }
3654 unlock_out:
3655         f2fs_put_page(page, 1);
3656         f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
3657         return copied;
3658 }
3659
3660 void f2fs_invalidate_folio(struct folio *folio, size_t offset, size_t length)
3661 {
3662         struct inode *inode = folio->mapping->host;
3663         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3664
3665         if (inode->i_ino >= F2FS_ROOT_INO(sbi) &&
3666                                 (offset || length != folio_size(folio)))
3667                 return;
3668
3669         if (folio_test_dirty(folio)) {
3670                 if (inode->i_ino == F2FS_META_INO(sbi)) {
3671                         dec_page_count(sbi, F2FS_DIRTY_META);
3672                 } else if (inode->i_ino == F2FS_NODE_INO(sbi)) {
3673                         dec_page_count(sbi, F2FS_DIRTY_NODES);
3674                 } else {
3675                         inode_dec_dirty_pages(inode);
3676                         f2fs_remove_dirty_inode(inode);
3677                 }
3678         }
3679
3680         clear_page_private_gcing(&folio->page);
3681
3682         if (test_opt(sbi, COMPRESS_CACHE) &&
3683                         inode->i_ino == F2FS_COMPRESS_INO(sbi))
3684                 clear_page_private_data(&folio->page);
3685
3686         folio_detach_private(folio);
3687 }
3688
3689 bool f2fs_release_folio(struct folio *folio, gfp_t wait)
3690 {
3691         struct f2fs_sb_info *sbi;
3692
3693         /* If this is dirty folio, keep private data */
3694         if (folio_test_dirty(folio))
3695                 return false;
3696
3697         sbi = F2FS_M_SB(folio->mapping);
3698         if (test_opt(sbi, COMPRESS_CACHE)) {
3699                 struct inode *inode = folio->mapping->host;
3700
3701                 if (inode->i_ino == F2FS_COMPRESS_INO(sbi))
3702                         clear_page_private_data(&folio->page);
3703         }
3704
3705         clear_page_private_gcing(&folio->page);
3706
3707         folio_detach_private(folio);
3708         return true;
3709 }
3710
3711 static bool f2fs_dirty_data_folio(struct address_space *mapping,
3712                 struct folio *folio)
3713 {
3714         struct inode *inode = mapping->host;
3715
3716         trace_f2fs_set_page_dirty(&folio->page, DATA);
3717
3718         if (!folio_test_uptodate(folio))
3719                 folio_mark_uptodate(folio);
3720         BUG_ON(folio_test_swapcache(folio));
3721
3722         if (filemap_dirty_folio(mapping, folio)) {
3723                 f2fs_update_dirty_folio(inode, folio);
3724                 return true;
3725         }
3726         return false;
3727 }
3728
3729
3730 static sector_t f2fs_bmap_compress(struct inode *inode, sector_t block)
3731 {
3732 #ifdef CONFIG_F2FS_FS_COMPRESSION
3733         struct dnode_of_data dn;
3734         sector_t start_idx, blknr = 0;
3735         int ret;
3736
3737         start_idx = round_down(block, F2FS_I(inode)->i_cluster_size);
3738
3739         set_new_dnode(&dn, inode, NULL, NULL, 0);
3740         ret = f2fs_get_dnode_of_data(&dn, start_idx, LOOKUP_NODE);
3741         if (ret)
3742                 return 0;
3743
3744         if (dn.data_blkaddr != COMPRESS_ADDR) {
3745                 dn.ofs_in_node += block - start_idx;
3746                 blknr = f2fs_data_blkaddr(&dn);
3747                 if (!__is_valid_data_blkaddr(blknr))
3748                         blknr = 0;
3749         }
3750
3751         f2fs_put_dnode(&dn);
3752         return blknr;
3753 #else
3754         return 0;
3755 #endif
3756 }
3757
3758
3759 static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
3760 {
3761         struct inode *inode = mapping->host;
3762         sector_t blknr = 0;
3763
3764         if (f2fs_has_inline_data(inode))
3765                 goto out;
3766
3767         /* make sure allocating whole blocks */
3768         if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
3769                 filemap_write_and_wait(mapping);
3770
3771         /* Block number less than F2FS MAX BLOCKS */
3772         if (unlikely(block >= max_file_blocks(inode)))
3773                 goto out;
3774
3775         if (f2fs_compressed_file(inode)) {
3776                 blknr = f2fs_bmap_compress(inode, block);
3777         } else {
3778                 struct f2fs_map_blocks map;
3779
3780                 memset(&map, 0, sizeof(map));
3781                 map.m_lblk = block;
3782                 map.m_len = 1;
3783                 map.m_next_pgofs = NULL;
3784                 map.m_seg_type = NO_CHECK_TYPE;
3785
3786                 if (!f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_BMAP))
3787                         blknr = map.m_pblk;
3788         }
3789 out:
3790         trace_f2fs_bmap(inode, block, blknr);
3791         return blknr;
3792 }
3793
3794 #ifdef CONFIG_SWAP
3795 static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk,
3796                                                         unsigned int blkcnt)
3797 {
3798         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3799         unsigned int blkofs;
3800         unsigned int blk_per_sec = BLKS_PER_SEC(sbi);
3801         unsigned int secidx = start_blk / blk_per_sec;
3802         unsigned int end_sec = secidx + blkcnt / blk_per_sec;
3803         int ret = 0;
3804
3805         f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
3806         filemap_invalidate_lock(inode->i_mapping);
3807
3808         set_inode_flag(inode, FI_ALIGNED_WRITE);
3809         set_inode_flag(inode, FI_OPU_WRITE);
3810
3811         for (; secidx < end_sec; secidx++) {
3812                 f2fs_down_write(&sbi->pin_sem);
3813
3814                 f2fs_lock_op(sbi);
3815                 f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false);
3816                 f2fs_unlock_op(sbi);
3817
3818                 set_inode_flag(inode, FI_SKIP_WRITES);
3819
3820                 for (blkofs = 0; blkofs < blk_per_sec; blkofs++) {
3821                         struct page *page;
3822                         unsigned int blkidx = secidx * blk_per_sec + blkofs;
3823
3824                         page = f2fs_get_lock_data_page(inode, blkidx, true);
3825                         if (IS_ERR(page)) {
3826                                 f2fs_up_write(&sbi->pin_sem);
3827                                 ret = PTR_ERR(page);
3828                                 goto done;
3829                         }
3830
3831                         set_page_dirty(page);
3832                         f2fs_put_page(page, 1);
3833                 }
3834
3835                 clear_inode_flag(inode, FI_SKIP_WRITES);
3836
3837                 ret = filemap_fdatawrite(inode->i_mapping);
3838
3839                 f2fs_up_write(&sbi->pin_sem);
3840
3841                 if (ret)
3842                         break;
3843         }
3844
3845 done:
3846         clear_inode_flag(inode, FI_SKIP_WRITES);
3847         clear_inode_flag(inode, FI_OPU_WRITE);
3848         clear_inode_flag(inode, FI_ALIGNED_WRITE);
3849
3850         filemap_invalidate_unlock(inode->i_mapping);
3851         f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
3852
3853         return ret;
3854 }
3855
3856 static int check_swap_activate(struct swap_info_struct *sis,
3857                                 struct file *swap_file, sector_t *span)
3858 {
3859         struct address_space *mapping = swap_file->f_mapping;
3860         struct inode *inode = mapping->host;
3861         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3862         sector_t cur_lblock;
3863         sector_t last_lblock;
3864         sector_t pblock;
3865         sector_t lowest_pblock = -1;
3866         sector_t highest_pblock = 0;
3867         int nr_extents = 0;
3868         unsigned long nr_pblocks;
3869         unsigned int blks_per_sec = BLKS_PER_SEC(sbi);
3870         unsigned int sec_blks_mask = BLKS_PER_SEC(sbi) - 1;
3871         unsigned int not_aligned = 0;
3872         int ret = 0;
3873
3874         /*
3875          * Map all the blocks into the extent list.  This code doesn't try
3876          * to be very smart.
3877          */
3878         cur_lblock = 0;
3879         last_lblock = bytes_to_blks(inode, i_size_read(inode));
3880
3881         while (cur_lblock < last_lblock && cur_lblock < sis->max) {
3882                 struct f2fs_map_blocks map;
3883 retry:
3884                 cond_resched();
3885
3886                 memset(&map, 0, sizeof(map));
3887                 map.m_lblk = cur_lblock;
3888                 map.m_len = last_lblock - cur_lblock;
3889                 map.m_next_pgofs = NULL;
3890                 map.m_next_extent = NULL;
3891                 map.m_seg_type = NO_CHECK_TYPE;
3892                 map.m_may_create = false;
3893
3894                 ret = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_FIEMAP);
3895                 if (ret)
3896                         goto out;
3897
3898                 /* hole */
3899                 if (!(map.m_flags & F2FS_MAP_FLAGS)) {
3900                         f2fs_err(sbi, "Swapfile has holes");
3901                         ret = -EINVAL;
3902                         goto out;
3903                 }
3904
3905                 pblock = map.m_pblk;
3906                 nr_pblocks = map.m_len;
3907
3908                 if ((pblock - SM_I(sbi)->main_blkaddr) & sec_blks_mask ||
3909                                 nr_pblocks & sec_blks_mask) {
3910                         not_aligned++;
3911
3912                         nr_pblocks = roundup(nr_pblocks, blks_per_sec);
3913                         if (cur_lblock + nr_pblocks > sis->max)
3914                                 nr_pblocks -= blks_per_sec;
3915
3916                         if (!nr_pblocks) {
3917                                 /* this extent is last one */
3918                                 nr_pblocks = map.m_len;
3919                                 f2fs_warn(sbi, "Swapfile: last extent is not aligned to section");
3920                                 goto next;
3921                         }
3922
3923                         ret = f2fs_migrate_blocks(inode, cur_lblock,
3924                                                         nr_pblocks);
3925                         if (ret)
3926                                 goto out;
3927                         goto retry;
3928                 }
3929 next:
3930                 if (cur_lblock + nr_pblocks >= sis->max)
3931                         nr_pblocks = sis->max - cur_lblock;
3932
3933                 if (cur_lblock) {       /* exclude the header page */
3934                         if (pblock < lowest_pblock)
3935                                 lowest_pblock = pblock;
3936                         if (pblock + nr_pblocks - 1 > highest_pblock)
3937                                 highest_pblock = pblock + nr_pblocks - 1;
3938                 }
3939
3940                 /*
3941                  * We found a PAGE_SIZE-length, PAGE_SIZE-aligned run of blocks
3942                  */
3943                 ret = add_swap_extent(sis, cur_lblock, nr_pblocks, pblock);
3944                 if (ret < 0)
3945                         goto out;
3946                 nr_extents += ret;
3947                 cur_lblock += nr_pblocks;
3948         }
3949         ret = nr_extents;
3950         *span = 1 + highest_pblock - lowest_pblock;
3951         if (cur_lblock == 0)
3952                 cur_lblock = 1; /* force Empty message */
3953         sis->max = cur_lblock;
3954         sis->pages = cur_lblock - 1;
3955         sis->highest_bit = cur_lblock - 1;
3956 out:
3957         if (not_aligned)
3958                 f2fs_warn(sbi, "Swapfile (%u) is not align to section: 1) creat(), 2) ioctl(F2FS_IOC_SET_PIN_FILE), 3) fallocate(%u * N)",
3959                           not_aligned, blks_per_sec * F2FS_BLKSIZE);
3960         return ret;
3961 }
3962
3963 static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
3964                                 sector_t *span)
3965 {
3966         struct inode *inode = file_inode(file);
3967         int ret;
3968
3969         if (!S_ISREG(inode->i_mode))
3970                 return -EINVAL;
3971
3972         if (f2fs_readonly(F2FS_I_SB(inode)->sb))
3973                 return -EROFS;
3974
3975         if (f2fs_lfs_mode(F2FS_I_SB(inode))) {
3976                 f2fs_err(F2FS_I_SB(inode),
3977                         "Swapfile not supported in LFS mode");
3978                 return -EINVAL;
3979         }
3980
3981         ret = f2fs_convert_inline_inode(inode);
3982         if (ret)
3983                 return ret;
3984
3985         if (!f2fs_disable_compressed_file(inode))
3986                 return -EINVAL;
3987
3988         f2fs_precache_extents(inode);
3989
3990         ret = check_swap_activate(sis, file, span);
3991         if (ret < 0)
3992                 return ret;
3993
3994         stat_inc_swapfile_inode(inode);
3995         set_inode_flag(inode, FI_PIN_FILE);
3996         f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
3997         return ret;
3998 }
3999
4000 static void f2fs_swap_deactivate(struct file *file)
4001 {
4002         struct inode *inode = file_inode(file);
4003
4004         stat_dec_swapfile_inode(inode);
4005         clear_inode_flag(inode, FI_PIN_FILE);
4006 }
4007 #else
4008 static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
4009                                 sector_t *span)
4010 {
4011         return -EOPNOTSUPP;
4012 }
4013
4014 static void f2fs_swap_deactivate(struct file *file)
4015 {
4016 }
4017 #endif
4018
4019 const struct address_space_operations f2fs_dblock_aops = {
4020         .read_folio     = f2fs_read_data_folio,
4021         .readahead      = f2fs_readahead,
4022         .writepage      = f2fs_write_data_page,
4023         .writepages     = f2fs_write_data_pages,
4024         .write_begin    = f2fs_write_begin,
4025         .write_end      = f2fs_write_end,
4026         .dirty_folio    = f2fs_dirty_data_folio,
4027         .migrate_folio  = filemap_migrate_folio,
4028         .invalidate_folio = f2fs_invalidate_folio,
4029         .release_folio  = f2fs_release_folio,
4030         .direct_IO      = noop_direct_IO,
4031         .bmap           = f2fs_bmap,
4032         .swap_activate  = f2fs_swap_activate,
4033         .swap_deactivate = f2fs_swap_deactivate,
4034 };
4035
4036 void f2fs_clear_page_cache_dirty_tag(struct page *page)
4037 {
4038         struct address_space *mapping = page_mapping(page);
4039         unsigned long flags;
4040
4041         xa_lock_irqsave(&mapping->i_pages, flags);
4042         __xa_clear_mark(&mapping->i_pages, page_index(page),
4043                                                 PAGECACHE_TAG_DIRTY);
4044         xa_unlock_irqrestore(&mapping->i_pages, flags);
4045 }
4046
4047 int __init f2fs_init_post_read_processing(void)
4048 {
4049         bio_post_read_ctx_cache =
4050                 kmem_cache_create("f2fs_bio_post_read_ctx",
4051                                   sizeof(struct bio_post_read_ctx), 0, 0, NULL);
4052         if (!bio_post_read_ctx_cache)
4053                 goto fail;
4054         bio_post_read_ctx_pool =
4055                 mempool_create_slab_pool(NUM_PREALLOC_POST_READ_CTXS,
4056                                          bio_post_read_ctx_cache);
4057         if (!bio_post_read_ctx_pool)
4058                 goto fail_free_cache;
4059         return 0;
4060
4061 fail_free_cache:
4062         kmem_cache_destroy(bio_post_read_ctx_cache);
4063 fail:
4064         return -ENOMEM;
4065 }
4066
4067 void f2fs_destroy_post_read_processing(void)
4068 {
4069         mempool_destroy(bio_post_read_ctx_pool);
4070         kmem_cache_destroy(bio_post_read_ctx_cache);
4071 }
4072
4073 int f2fs_init_post_read_wq(struct f2fs_sb_info *sbi)
4074 {
4075         if (!f2fs_sb_has_encrypt(sbi) &&
4076                 !f2fs_sb_has_verity(sbi) &&
4077                 !f2fs_sb_has_compression(sbi))
4078                 return 0;
4079
4080         sbi->post_read_wq = alloc_workqueue("f2fs_post_read_wq",
4081                                                  WQ_UNBOUND | WQ_HIGHPRI,
4082                                                  num_online_cpus());
4083         if (!sbi->post_read_wq)
4084                 return -ENOMEM;
4085         return 0;
4086 }
4087
4088 void f2fs_destroy_post_read_wq(struct f2fs_sb_info *sbi)
4089 {
4090         if (sbi->post_read_wq)
4091                 destroy_workqueue(sbi->post_read_wq);
4092 }
4093
4094 int __init f2fs_init_bio_entry_cache(void)
4095 {
4096         bio_entry_slab = f2fs_kmem_cache_create("f2fs_bio_entry_slab",
4097                         sizeof(struct bio_entry));
4098         if (!bio_entry_slab)
4099                 return -ENOMEM;
4100         return 0;
4101 }
4102
4103 void f2fs_destroy_bio_entry_cache(void)
4104 {
4105         kmem_cache_destroy(bio_entry_slab);
4106 }
4107
4108 static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
4109                             unsigned int flags, struct iomap *iomap,
4110                             struct iomap *srcmap)
4111 {
4112         struct f2fs_map_blocks map = {};
4113         pgoff_t next_pgofs = 0;
4114         int err;
4115
4116         map.m_lblk = bytes_to_blks(inode, offset);
4117         map.m_len = bytes_to_blks(inode, offset + length - 1) - map.m_lblk + 1;
4118         map.m_next_pgofs = &next_pgofs;
4119         map.m_seg_type = f2fs_rw_hint_to_seg_type(inode->i_write_hint);
4120         if (flags & IOMAP_WRITE)
4121                 map.m_may_create = true;
4122
4123         err = f2fs_map_blocks(inode, &map, flags & IOMAP_WRITE,
4124                               F2FS_GET_BLOCK_DIO);
4125         if (err)
4126                 return err;
4127
4128         iomap->offset = blks_to_bytes(inode, map.m_lblk);
4129
4130         /*
4131          * When inline encryption is enabled, sometimes I/O to an encrypted file
4132          * has to be broken up to guarantee DUN contiguity.  Handle this by
4133          * limiting the length of the mapping returned.
4134          */
4135         map.m_len = fscrypt_limit_io_blocks(inode, map.m_lblk, map.m_len);
4136
4137         if (map.m_flags & (F2FS_MAP_MAPPED | F2FS_MAP_UNWRITTEN)) {
4138                 iomap->length = blks_to_bytes(inode, map.m_len);
4139                 if (map.m_flags & F2FS_MAP_MAPPED) {
4140                         iomap->type = IOMAP_MAPPED;
4141                         iomap->flags |= IOMAP_F_MERGED;
4142                 } else {
4143                         iomap->type = IOMAP_UNWRITTEN;
4144                 }
4145                 if (WARN_ON_ONCE(!__is_valid_data_blkaddr(map.m_pblk)))
4146                         return -EINVAL;
4147
4148                 iomap->bdev = map.m_bdev;
4149                 iomap->addr = blks_to_bytes(inode, map.m_pblk);
4150         } else {
4151                 iomap->length = blks_to_bytes(inode, next_pgofs) -
4152                                 iomap->offset;
4153                 iomap->type = IOMAP_HOLE;
4154                 iomap->addr = IOMAP_NULL_ADDR;
4155         }
4156
4157         if (map.m_flags & F2FS_MAP_NEW)
4158                 iomap->flags |= IOMAP_F_NEW;
4159         if ((inode->i_state & I_DIRTY_DATASYNC) ||
4160             offset + length > i_size_read(inode))
4161                 iomap->flags |= IOMAP_F_DIRTY;
4162
4163         return 0;
4164 }
4165
4166 const struct iomap_ops f2fs_iomap_ops = {
4167         .iomap_begin    = f2fs_iomap_begin,
4168 };