Merge tag 'f2fs-for-4.20-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeu...
authorLinus Torvalds <torvalds@linux-foundation.org>
Wed, 24 Oct 2018 16:39:36 +0000 (17:39 +0100)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 24 Oct 2018 16:39:36 +0000 (17:39 +0100)
Pull f2fs updates from Jaegeuk Kim:
 "In this round, we've added 1) superblock checksum feature, 2)
  implemented new mount option which we can disable/enable checkpoint to
  provide atomic updates of entire filesystem, 3) refactored quota
  operations to enhance its consistency along with checkpoint, 4) fixed
  subtle IO hang conditions and roll-forward recovery flow to resurrect
  any fsync'ed inode metadata.

  Enhancements:
   - add checksum to keep superblock contents more safe
   - add checkpoint=disable/enable to support A/B update of entire filesystem
   - use plug for readahead IO in readdir
   - add more IO counts to avoid block layer hacks

  Bug fixes:
   - prevent data corruption issue for hardware encryption
   - fix IO hang issues when GC is heavily triggered
   - add missing up_read in __write_node_page
   - recover inode metadata during roll-forward recovery flow
   - fix null pointer dereference issue in wrongly configured discard map

  There are some more sanity checks and minor bug fixes as well"

* tag 'f2fs-for-4.20-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (62 commits)
  f2fs: fix to keep project quota consistent
  f2fs: guarantee journalled quota data by checkpoint
  f2fs: cleanup dirty pages if recover failed
  f2fs: fix data corruption issue with hardware encryption
  f2fs: fix to recover inode->i_flags of inode block during POR
  f2fs: spread f2fs_set_inode_flags()
  f2fs: fix to spread clear_cold_data()
  Revert "f2fs: fix to clear PG_checked flag in set_page_dirty()"
  f2fs: account read IOs and use IO counts for is_idle
  f2fs: fix to account IO correctly for cgroup writeback
  f2fs: fix to account IO correctly
  f2fs: remove request_list check in is_idle()
  f2fs: allow to mount, if quota is failed
  f2fs: update REQ_TIME in f2fs_cross_rename()
  f2fs: do not update REQ_TIME in case of error conditions
  f2fs: remove unneeded disable_nat_bits()
  f2fs: remove unused sbi->trigger_ssr_threshold
  f2fs: shrink sbi->sb_lock coverage in set_file_temperature()
  f2fs: use rb_*_cached friends
  f2fs: fix to recover cold bit of inode block during POR
  ...

30 files changed:
Documentation/ABI/testing/sysfs-fs-f2fs
Documentation/filesystems/f2fs.txt
fs/f2fs/acl.c
fs/f2fs/acl.h
fs/f2fs/checkpoint.c
fs/f2fs/data.c
fs/f2fs/debug.c
fs/f2fs/dir.c
fs/f2fs/extent_cache.c
fs/f2fs/f2fs.h
fs/f2fs/file.c
fs/f2fs/gc.c
fs/f2fs/gc.h
fs/f2fs/hash.c
fs/f2fs/inline.c
fs/f2fs/inode.c
fs/f2fs/namei.c
fs/f2fs/node.c
fs/f2fs/node.h
fs/f2fs/recovery.c
fs/f2fs/segment.c
fs/f2fs/segment.h
fs/f2fs/shrinker.c
fs/f2fs/super.c
fs/f2fs/sysfs.c
fs/f2fs/trace.c
fs/f2fs/trace.h
fs/f2fs/xattr.c
fs/f2fs/xattr.h
include/linux/f2fs_fs.h

index 94a24aedcdb237cfe07fb9f4fe0c33e953a49bd5..3ac41774ad3cf5730e76d5c5c3595ef1efa8a267 100644 (file)
@@ -121,7 +121,22 @@ What:              /sys/fs/f2fs/<disk>/idle_interval
 Date:          January 2016
 Contact:       "Jaegeuk Kim" <jaegeuk@kernel.org>
 Description:
-                Controls the idle timing.
+                Controls the idle timing for all paths other than
+                discard and gc path.
+
+What:          /sys/fs/f2fs/<disk>/discard_idle_interval
+Date:          September 2018
+Contact:       "Chao Yu" <yuchao0@huawei.com>
+Contact:       "Sahitya Tummala" <stummala@codeaurora.org>
+Description:
+                Controls the idle timing for discard path.
+
+What:          /sys/fs/f2fs/<disk>/gc_idle_interval
+Date:          September 2018
+Contact:       "Chao Yu" <yuchao0@huawei.com>
+Contact:       "Sahitya Tummala" <stummala@codeaurora.org>
+Description:
+                Controls the idle timing for gc path.
 
 What:          /sys/fs/f2fs/<disk>/iostat_enable
 Date:          August 2017
index e5edd29687b50a74f8946ca215f639bb71b434fa..e46c2147ddf8e02083622c0976fb63da061c85ee 100644 (file)
@@ -172,9 +172,10 @@ fault_type=%d          Support configuring fault injection type, should be
                        FAULT_DIR_DEPTH         0x000000100
                        FAULT_EVICT_INODE       0x000000200
                        FAULT_TRUNCATE          0x000000400
-                       FAULT_IO                        0x000000800
+                       FAULT_READ_IO           0x000000800
                        FAULT_CHECKPOINT                0x000001000
                        FAULT_DISCARD           0x000002000
+                       FAULT_WRITE_IO          0x000004000
 mode=%s                Control block allocation mode which supports "adaptive"
                        and "lfs". In "lfs" mode, there should be no random
                        writes towards main area.
@@ -211,6 +212,11 @@ fsync_mode=%s          Control the policy of fsync. Currently supports "posix",
                        non-atomic files likewise "nobarrier" mount option.
 test_dummy_encryption  Enable dummy encryption, which provides a fake fscrypt
                        context. The fake fscrypt context is used by xfstests.
+checkpoint=%s          Set to "disable" to turn off checkpointing. Set to "enable"
+                       to reenable checkpointing. Is enabled by default. While
+                       disabled, any unmounting or unexpected shutdowns will cause
+                       the filesystem contents to appear as they did when the
+                       filesystem was mounted with that option.
 
 ================================================================================
 DEBUGFS ENTRIES
index 111824199a886c11f3a543463aeba0385a8a50ac..fa707cdd4120def0948f124fac75f86537d3e3b6 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * fs/f2fs/acl.c
  *
@@ -7,10 +8,6 @@
  * Portions of this code from linux/fs/ext2/acl.c
  *
  * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/f2fs_fs.h>
 #include "f2fs.h"
@@ -53,6 +50,9 @@ static struct posix_acl *f2fs_acl_from_disk(const char *value, size_t size)
        struct f2fs_acl_entry *entry = (struct f2fs_acl_entry *)(hdr + 1);
        const char *end = value + size;
 
+       if (size < sizeof(struct f2fs_acl_header))
+               return ERR_PTR(-EINVAL);
+
        if (hdr->a_version != cpu_to_le32(F2FS_ACL_VERSION))
                return ERR_PTR(-EINVAL);
 
@@ -394,12 +394,16 @@ int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage,
                error = __f2fs_set_acl(inode, ACL_TYPE_DEFAULT, default_acl,
                                       ipage);
                posix_acl_release(default_acl);
+       } else {
+               inode->i_default_acl = NULL;
        }
        if (acl) {
                if (!error)
                        error = __f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl,
                                               ipage);
                posix_acl_release(acl);
+       } else {
+               inode->i_acl = NULL;
        }
 
        return error;
index 2c685185c24db6823f11536fdc6d79155e449123..b96823c59b15a4c62f3bd00558cbc04cd9c96791 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * fs/f2fs/acl.h
  *
@@ -7,10 +8,6 @@
  * Portions of this code from linux/fs/ext2/acl.h
  *
  * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #ifndef __F2FS_ACL_H__
 #define __F2FS_ACL_H__
index e8b6b89bddb865e488c4213dcbf09fa79e4d83cb..9c28ea439e0bbc5073f8314d840c75353b784903 100644 (file)
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * fs/f2fs/checkpoint.c
  *
  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
  *             http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/fs.h>
 #include <linux/bio.h>
@@ -122,11 +119,8 @@ retry:
                if (PTR_ERR(page) == -EIO &&
                                ++count <= DEFAULT_RETRY_IO_COUNT)
                        goto retry;
-
                f2fs_stop_checkpoint(sbi, false);
-               f2fs_bug_on(sbi, 1);
        }
-
        return page;
 }
 
@@ -282,8 +276,7 @@ static int __f2fs_write_meta_page(struct page *page,
        dec_page_count(sbi, F2FS_DIRTY_META);
 
        if (wbc->for_reclaim)
-               f2fs_submit_merged_write_cond(sbi, page->mapping->host,
-                                               0, page->index, META);
+               f2fs_submit_merged_write_cond(sbi, NULL, page, 0, META);
 
        unlock_page(page);
 
@@ -696,6 +689,8 @@ int f2fs_recover_orphan_inodes(struct f2fs_sb_info *sbi)
        /* clear Orphan Flag */
        clear_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG);
 out:
+       set_sbi_flag(sbi, SBI_IS_RECOVERED);
+
 #ifdef CONFIG_QUOTA
        /* Turn quotas off */
        if (quota_enabled)
@@ -1084,6 +1079,21 @@ static void __prepare_cp_block(struct f2fs_sb_info *sbi)
        ckpt->next_free_nid = cpu_to_le32(last_nid);
 }
 
+static bool __need_flush_quota(struct f2fs_sb_info *sbi)
+{
+       if (!is_journalled_quota(sbi))
+               return false;
+       if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH))
+               return false;
+       if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR))
+               return false;
+       if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_FLUSH))
+               return true;
+       if (get_pages(sbi, F2FS_DIRTY_QDATA))
+               return true;
+       return false;
+}
+
 /*
  * Freeze all the FS-operations for checkpoint.
  */
@@ -1095,12 +1105,36 @@ static int block_operations(struct f2fs_sb_info *sbi)
                .for_reclaim = 0,
        };
        struct blk_plug plug;
-       int err = 0;
+       int err = 0, cnt = 0;
 
        blk_start_plug(&plug);
 
-retry_flush_dents:
+retry_flush_quotas:
+       if (__need_flush_quota(sbi)) {
+               int locked;
+
+               if (++cnt > DEFAULT_RETRY_QUOTA_FLUSH_COUNT) {
+                       set_sbi_flag(sbi, SBI_QUOTA_SKIP_FLUSH);
+                       f2fs_lock_all(sbi);
+                       goto retry_flush_dents;
+               }
+               clear_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH);
+
+               /* only failed during mount/umount/freeze/quotactl */
+               locked = down_read_trylock(&sbi->sb->s_umount);
+               f2fs_quota_sync(sbi->sb, -1);
+               if (locked)
+                       up_read(&sbi->sb->s_umount);
+       }
+
        f2fs_lock_all(sbi);
+       if (__need_flush_quota(sbi)) {
+               f2fs_unlock_all(sbi);
+               cond_resched();
+               goto retry_flush_quotas;
+       }
+
+retry_flush_dents:
        /* write all the dirty dentry pages */
        if (get_pages(sbi, F2FS_DIRTY_DENTS)) {
                f2fs_unlock_all(sbi);
@@ -1108,7 +1142,7 @@ retry_flush_dents:
                if (err)
                        goto out;
                cond_resched();
-               goto retry_flush_dents;
+               goto retry_flush_quotas;
        }
 
        /*
@@ -1117,6 +1151,12 @@ retry_flush_dents:
         */
        down_write(&sbi->node_change);
 
+       if (__need_flush_quota(sbi)) {
+               up_write(&sbi->node_change);
+               f2fs_unlock_all(sbi);
+               goto retry_flush_quotas;
+       }
+
        if (get_pages(sbi, F2FS_DIRTY_IMETA)) {
                up_write(&sbi->node_change);
                f2fs_unlock_all(sbi);
@@ -1124,7 +1164,7 @@ retry_flush_dents:
                if (err)
                        goto out;
                cond_resched();
-               goto retry_flush_dents;
+               goto retry_flush_quotas;
        }
 
 retry_flush_nodes:
@@ -1215,6 +1255,19 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
        if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
                __set_ckpt_flags(ckpt, CP_FSCK_FLAG);
 
+       if (is_sbi_flag_set(sbi, SBI_CP_DISABLED))
+               __set_ckpt_flags(ckpt, CP_DISABLED_FLAG);
+       else
+               __clear_ckpt_flags(ckpt, CP_DISABLED_FLAG);
+
+       if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH))
+               __set_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG);
+       else
+               __clear_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG);
+
+       if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR))
+               __set_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG);
+
        /* set this flag to activate crc|cp_ver for recovery */
        __set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG);
        __clear_ckpt_flags(ckpt, CP_NOCRC_RECOVERY_FLAG);
@@ -1422,6 +1475,8 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 
        clear_sbi_flag(sbi, SBI_IS_DIRTY);
        clear_sbi_flag(sbi, SBI_NEED_CP);
+       clear_sbi_flag(sbi, SBI_QUOTA_SKIP_FLUSH);
+       sbi->unusable_block_count = 0;
        __set_cp_next_pack(sbi);
 
        /*
@@ -1446,6 +1501,12 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
        unsigned long long ckpt_ver;
        int err = 0;
 
+       if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
+               if (cpc->reason != CP_PAUSE)
+                       return 0;
+               f2fs_msg(sbi->sb, KERN_WARNING,
+                               "Start checkpoint disabled!");
+       }
        mutex_lock(&sbi->cp_mutex);
 
        if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) &&
@@ -1497,7 +1558,10 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
        ckpt->checkpoint_ver = cpu_to_le64(++ckpt_ver);
 
        /* write cached NAT/SIT entries to NAT/SIT area */
-       f2fs_flush_nat_entries(sbi, cpc);
+       err = f2fs_flush_nat_entries(sbi, cpc);
+       if (err)
+               goto stop;
+
        f2fs_flush_sit_entries(sbi, cpc);
 
        /* unlock all the fs_lock[] in do_checkpoint() */
@@ -1506,7 +1570,7 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
                f2fs_release_discard_addrs(sbi);
        else
                f2fs_clear_prefree_segments(sbi, cpc);
-
+stop:
        unblock_operations(sbi);
        stat_inc_cp_count(sbi->stat_info);
 
index 382c1ef9a9e4d6e9df05e39031ce0f5958e51663..106f116466bf1937fb104213beb1eb0b0e6f1247 100644 (file)
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * fs/f2fs/data.c
  *
  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
  *             http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/fs.h>
 #include <linux/f2fs_fs.h>
@@ -49,12 +46,29 @@ static bool __is_cp_guaranteed(struct page *page)
                        inode->i_ino ==  F2FS_NODE_INO(sbi) ||
                        S_ISDIR(inode->i_mode) ||
                        (S_ISREG(inode->i_mode) &&
-                       is_inode_flag_set(inode, FI_ATOMIC_FILE)) ||
+                       (f2fs_is_atomic_file(inode) || IS_NOQUOTA(inode))) ||
                        is_cold_data(page))
                return true;
        return false;
 }
 
+static enum count_type __read_io_type(struct page *page)
+{
+       struct address_space *mapping = page->mapping;
+
+       if (mapping) {
+               struct inode *inode = mapping->host;
+               struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+
+               if (inode->i_ino == F2FS_META_INO(sbi))
+                       return F2FS_RD_META;
+
+               if (inode->i_ino == F2FS_NODE_INO(sbi))
+                       return F2FS_RD_NODE;
+       }
+       return F2FS_RD_DATA;
+}
+
 /* postprocessing steps for read bios */
 enum bio_post_read_step {
        STEP_INITIAL = 0,
@@ -80,10 +94,12 @@ static void __read_end_io(struct bio *bio)
                /* PG_error was set if any post_read step failed */
                if (bio->bi_status || PageError(page)) {
                        ClearPageUptodate(page);
-                       SetPageError(page);
+                       /* will re-read again later */
+                       ClearPageError(page);
                } else {
                        SetPageUptodate(page);
                }
+               dec_page_count(F2FS_P_SB(page), __read_io_type(page));
                unlock_page(page);
        }
        if (bio->bi_private)
@@ -126,8 +142,9 @@ static bool f2fs_bio_post_read_required(struct bio *bio)
 
 static void f2fs_read_end_io(struct bio *bio)
 {
-       if (time_to_inject(F2FS_P_SB(bio_first_page_all(bio)), FAULT_IO)) {
-               f2fs_show_injection_info(FAULT_IO);
+       if (time_to_inject(F2FS_P_SB(bio_first_page_all(bio)),
+                                               FAULT_READ_IO)) {
+               f2fs_show_injection_info(FAULT_READ_IO);
                bio->bi_status = BLK_STS_IOERR;
        }
 
@@ -148,6 +165,11 @@ static void f2fs_write_end_io(struct bio *bio)
        struct bio_vec *bvec;
        int i;
 
+       if (time_to_inject(sbi, FAULT_WRITE_IO)) {
+               f2fs_show_injection_info(FAULT_WRITE_IO);
+               bio->bi_status = BLK_STS_IOERR;
+       }
+
        bio_for_each_segment_all(bvec, bio, i) {
                struct page *page = bvec->bv_page;
                enum count_type type = WB_DATA_TYPE(page);
@@ -319,8 +341,8 @@ static void __submit_merged_bio(struct f2fs_bio_info *io)
        io->bio = NULL;
 }
 
-static bool __has_merged_page(struct f2fs_bio_info *io,
-                               struct inode *inode, nid_t ino, pgoff_t idx)
+static bool __has_merged_page(struct f2fs_bio_info *io, struct inode *inode,
+                                               struct page *page, nid_t ino)
 {
        struct bio_vec *bvec;
        struct page *target;
@@ -329,7 +351,7 @@ static bool __has_merged_page(struct f2fs_bio_info *io,
        if (!io->bio)
                return false;
 
-       if (!inode && !ino)
+       if (!inode && !page && !ino)
                return true;
 
        bio_for_each_segment_all(bvec, io->bio, i) {
@@ -339,11 +361,10 @@ static bool __has_merged_page(struct f2fs_bio_info *io,
                else
                        target = fscrypt_control_page(bvec->bv_page);
 
-               if (idx != target->index)
-                       continue;
-
                if (inode && inode == target->mapping->host)
                        return true;
+               if (page && page == target)
+                       return true;
                if (ino && ino == ino_of_node(target))
                        return true;
        }
@@ -352,7 +373,8 @@ static bool __has_merged_page(struct f2fs_bio_info *io,
 }
 
 static bool has_merged_page(struct f2fs_sb_info *sbi, struct inode *inode,
-                               nid_t ino, pgoff_t idx, enum page_type type)
+                                               struct page *page, nid_t ino,
+                                               enum page_type type)
 {
        enum page_type btype = PAGE_TYPE_OF_BIO(type);
        enum temp_type temp;
@@ -363,7 +385,7 @@ static bool has_merged_page(struct f2fs_sb_info *sbi, struct inode *inode,
                io = sbi->write_io[btype] + temp;
 
                down_read(&io->io_rwsem);
-               ret = __has_merged_page(io, inode, ino, idx);
+               ret = __has_merged_page(io, inode, page, ino);
                up_read(&io->io_rwsem);
 
                /* TODO: use HOT temp only for meta pages now. */
@@ -394,12 +416,12 @@ static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi,
 }
 
 static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
-                               struct inode *inode, nid_t ino, pgoff_t idx,
-                               enum page_type type, bool force)
+                               struct inode *inode, struct page *page,
+                               nid_t ino, enum page_type type, bool force)
 {
        enum temp_type temp;
 
-       if (!force && !has_merged_page(sbi, inode, ino, idx, type))
+       if (!force && !has_merged_page(sbi, inode, page, ino, type))
                return;
 
        for (temp = HOT; temp < NR_TEMP_TYPE; temp++) {
@@ -418,10 +440,10 @@ void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type)
 }
 
 void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
-                               struct inode *inode, nid_t ino, pgoff_t idx,
-                               enum page_type type)
+                               struct inode *inode, struct page *page,
+                               nid_t ino, enum page_type type)
 {
-       __submit_merged_write_cond(sbi, inode, ino, idx, type, false);
+       __submit_merged_write_cond(sbi, inode, page, ino, type, false);
 }
 
 void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi)
@@ -456,12 +478,16 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
                bio_put(bio);
                return -EFAULT;
        }
+
+       if (fio->io_wbc && !is_read_io(fio->op))
+               wbc_account_io(fio->io_wbc, page, PAGE_SIZE);
+
        bio_set_op_attrs(bio, fio->op, fio->op_flags);
 
-       __submit_bio(fio->sbi, bio, fio->type);
+       inc_page_count(fio->sbi, is_read_io(fio->op) ?
+                       __read_io_type(page): WB_DATA_TYPE(fio->page));
 
-       if (!is_read_io(fio->op))
-               inc_page_count(fio->sbi, WB_DATA_TYPE(fio->page));
+       __submit_bio(fio->sbi, bio, fio->type);
        return 0;
 }
 
@@ -533,6 +559,9 @@ skip:
        if (fio->in_list)
                goto next;
 out:
+       if (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) ||
+                               f2fs_is_checkpoint_ready(sbi))
+               __submit_merged_bio(io);
        up_write(&io->io_rwsem);
 }
 
@@ -565,9 +594,6 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
                ctx->bio = bio;
                ctx->enabled_steps = post_read_steps;
                bio->bi_private = ctx;
-
-               /* wait the page to be moved by cleaning */
-               f2fs_wait_on_block_writeback(sbi, blkaddr);
        }
 
        return bio;
@@ -582,10 +608,15 @@ static int f2fs_submit_page_read(struct inode *inode, struct page *page,
        if (IS_ERR(bio))
                return PTR_ERR(bio);
 
+       /* wait for GCed page writeback via META_MAPPING */
+       f2fs_wait_on_block_writeback(inode, blkaddr);
+
        if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
                bio_put(bio);
                return -EFAULT;
        }
+       ClearPageError(page);
+       inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA);
        __submit_bio(F2FS_I_SB(inode), bio, DATA);
        return 0;
 }
@@ -876,7 +907,6 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type)
        struct f2fs_summary sum;
        struct node_info ni;
        block_t old_blkaddr;
-       pgoff_t fofs;
        blkcnt_t count = 1;
        int err;
 
@@ -889,7 +919,7 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type)
 
        dn->data_blkaddr = datablock_addr(dn->inode,
                                dn->node_page, dn->ofs_in_node);
-       if (dn->data_blkaddr == NEW_ADDR)
+       if (dn->data_blkaddr != NULL_ADDR)
                goto alloc;
 
        if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count))))
@@ -905,12 +935,10 @@ alloc:
                                        old_blkaddr, old_blkaddr);
        f2fs_set_data_blkaddr(dn);
 
-       /* update i_size */
-       fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) +
-                                                       dn->ofs_in_node;
-       if (i_size_read(dn->inode) < ((loff_t)(fofs + 1) << PAGE_SHIFT))
-               f2fs_i_size_write(dn->inode,
-                               ((loff_t)(fofs + 1) << PAGE_SHIFT));
+       /*
+        * i_size will be updated by direct_IO. Otherwise, we'll get stale
+        * data from unwritten block via dio_read.
+        */
        return 0;
 }
 
@@ -945,7 +973,7 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
 
        if (direct_io) {
                map.m_seg_type = f2fs_rw_hint_to_seg_type(iocb->ki_hint);
-               flag = f2fs_force_buffered_io(inode, WRITE) ?
+               flag = f2fs_force_buffered_io(inode, iocb, from) ?
                                        F2FS_GET_BLOCK_PRE_AIO :
                                        F2FS_GET_BLOCK_PRE_DIO;
                goto map_blocks;
@@ -970,7 +998,7 @@ map_blocks:
        return err;
 }
 
-static inline void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock)
+void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock)
 {
        if (flag == F2FS_GET_BLOCK_PRE_AIO) {
                if (lock)
@@ -1025,6 +1053,11 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
                map->m_flags = F2FS_MAP_MAPPED;
                if (map->m_next_extent)
                        *map->m_next_extent = pgofs + map->m_len;
+
+               /* for hardware encryption, but to avoid potential issue in future */
+               if (flag == F2FS_GET_BLOCK_DIO)
+                       f2fs_wait_on_block_writeback_range(inode,
+                                               map->m_pblk, map->m_len);
                goto out;
        }
 
@@ -1064,7 +1097,15 @@ next_block:
                goto sync_out;
        }
 
-       if (!is_valid_data_blkaddr(sbi, blkaddr)) {
+       if (is_valid_data_blkaddr(sbi, blkaddr)) {
+               /* use out-place-update for driect IO under LFS mode */
+               if (test_opt(sbi, LFS) && create &&
+                               flag == F2FS_GET_BLOCK_DIO) {
+                       err = __allocate_data_block(&dn, map->m_seg_type);
+                       if (!err)
+                               set_inode_flag(inode, FI_APPEND_WRITE);
+               }
+       } else {
                if (create) {
                        if (unlikely(f2fs_cp_error(sbi))) {
                                err = -EIO;
@@ -1076,6 +1117,8 @@ next_block:
                                        last_ofs_in_node = dn.ofs_in_node;
                                }
                        } else {
+                               WARN_ON(flag != F2FS_GET_BLOCK_PRE_DIO &&
+                                       flag != F2FS_GET_BLOCK_DIO);
                                err = __allocate_data_block(&dn,
                                                        map->m_seg_type);
                                if (!err)
@@ -1173,6 +1216,12 @@ skip:
        goto next_dnode;
 
 sync_out:
+
+       /* for hardware encryption, but to avoid potential issue in future */
+       if (flag == F2FS_GET_BLOCK_DIO && map->m_flags & F2FS_MAP_MAPPED)
+               f2fs_wait_on_block_writeback_range(inode,
+                                               map->m_pblk, map->m_len);
+
        if (flag == F2FS_GET_BLOCK_PRECACHE) {
                if (map->m_flags & F2FS_MAP_MAPPED) {
                        unsigned int ofs = start_pgofs - map->m_lblk;
@@ -1255,7 +1304,7 @@ static int get_data_block_dio(struct inode *inode, sector_t iblock,
                        struct buffer_head *bh_result, int create)
 {
        return __get_data_block(inode, iblock, bh_result, create,
-                                               F2FS_GET_BLOCK_DEFAULT, NULL,
+                                               F2FS_GET_BLOCK_DIO, NULL,
                                                f2fs_rw_hint_to_seg_type(
                                                        inode->i_write_hint));
 }
@@ -1558,9 +1607,17 @@ submit_and_realloc:
                        }
                }
 
+               /*
+                * If the page is under writeback, we need to wait for
+                * its completion to see the correct decrypted data.
+                */
+               f2fs_wait_on_block_writeback(inode, block_nr);
+
                if (bio_add_page(bio, page, blocksize, 0) < blocksize)
                        goto submit_and_realloc;
 
+               inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA);
+               ClearPageError(page);
                last_block_in_bio = block_nr;
                goto next_page;
 set_error_page:
@@ -1625,7 +1682,7 @@ static int encrypt_one_page(struct f2fs_io_info *fio)
                return 0;
 
        /* wait for GCed page writeback via META_MAPPING */
-       f2fs_wait_on_block_writeback(fio->sbi, fio->old_blkaddr);
+       f2fs_wait_on_block_writeback(inode, fio->old_blkaddr);
 
 retry_encrypt:
        fio->encrypted_page = fscrypt_encrypt_page(inode, fio->page,
@@ -1682,6 +1739,10 @@ static inline bool check_inplace_update_policy(struct inode *inode,
                        is_inode_flag_set(inode, FI_NEED_IPU))
                return true;
 
+       if (unlikely(fio && is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
+                       !f2fs_is_checkpointed_data(sbi, fio->old_blkaddr)))
+               return true;
+
        return false;
 }
 
@@ -1705,6 +1766,8 @@ bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
                return true;
        if (S_ISDIR(inode->i_mode))
                return true;
+       if (IS_NOQUOTA(inode))
+               return true;
        if (f2fs_is_atomic_file(inode))
                return true;
        if (fio) {
@@ -1712,6 +1775,9 @@ bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
                        return true;
                if (IS_ATOMIC_WRITTEN_PAGE(fio->page))
                        return true;
+               if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
+                       f2fs_is_checkpointed_data(sbi, fio->old_blkaddr)))
+                       return true;
        }
        return false;
 }
@@ -1763,6 +1829,7 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio)
        /* This page is already truncated */
        if (fio->old_blkaddr == NULL_ADDR) {
                ClearPageUptodate(page);
+               clear_cold_data(page);
                goto out_writepage;
        }
 got_it:
@@ -1938,18 +2005,20 @@ done:
 
 out:
        inode_dec_dirty_pages(inode);
-       if (err)
+       if (err) {
                ClearPageUptodate(page);
+               clear_cold_data(page);
+       }
 
        if (wbc->for_reclaim) {
-               f2fs_submit_merged_write_cond(sbi, inode, 0, page->index, DATA);
+               f2fs_submit_merged_write_cond(sbi, NULL, page, 0, DATA);
                clear_inode_flag(inode, FI_HOT_DATA);
                f2fs_remove_dirty_inode(inode);
                submitted = NULL;
        }
 
        unlock_page(page);
-       if (!S_ISDIR(inode->i_mode))
+       if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode))
                f2fs_balance_fs(sbi, need_balance_fs);
 
        if (unlikely(f2fs_cp_error(sbi))) {
@@ -2000,10 +2069,10 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
        pgoff_t index;
        pgoff_t end;            /* Inclusive */
        pgoff_t done_index;
-       pgoff_t last_idx = ULONG_MAX;
        int cycled;
        int range_whole = 0;
        int tag;
+       int nwritten = 0;
 
        pagevec_init(&pvec);
 
@@ -2106,7 +2175,7 @@ continue_unlock:
                                done = 1;
                                break;
                        } else if (submitted) {
-                               last_idx = page->index;
+                               nwritten++;
                        }
 
                        if (--wbc->nr_to_write <= 0 &&
@@ -2128,9 +2197,9 @@ continue_unlock:
        if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
                mapping->writeback_index = done_index;
 
-       if (last_idx != ULONG_MAX)
+       if (nwritten)
                f2fs_submit_merged_write_cond(F2FS_M_SB(mapping), mapping->host,
-                                               0, last_idx, DATA);
+                                                               NULL, 0, DATA);
 
        return ret;
 }
@@ -2140,6 +2209,8 @@ static inline bool __should_serialize_io(struct inode *inode,
 {
        if (!S_ISREG(inode->i_mode))
                return false;
+       if (IS_NOQUOTA(inode))
+               return false;
        if (wbc->sync_mode != WB_SYNC_ALL)
                return true;
        if (get_dirty_pages(inode) >= SM_I(F2FS_I_SB(inode))->min_seq_blocks)
@@ -2169,7 +2240,8 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
        if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
                goto skip_write;
 
-       if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE &&
+       if ((S_ISDIR(inode->i_mode) || IS_NOQUOTA(inode)) &&
+                       wbc->sync_mode == WB_SYNC_NONE &&
                        get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) &&
                        f2fs_available_free_memory(sbi, DIRTY_DENTS))
                goto skip_write;
@@ -2234,7 +2306,7 @@ static void f2fs_write_failed(struct address_space *mapping, loff_t to)
                down_write(&F2FS_I(inode)->i_mmap_sem);
 
                truncate_pagecache(inode, i_size);
-               f2fs_truncate_blocks(inode, i_size, true);
+               f2fs_truncate_blocks(inode, i_size, true, true);
 
                up_write(&F2FS_I(inode)->i_mmap_sem);
                up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
@@ -2332,6 +2404,10 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
 
        trace_f2fs_write_begin(inode, pos, len, flags);
 
+       err = f2fs_is_checkpoint_ready(sbi);
+       if (err)
+               goto fail;
+
        if ((f2fs_is_atomic_file(inode) &&
                        !f2fs_available_free_memory(sbi, INMEM_PAGES)) ||
                        is_inode_flag_set(inode, FI_ATOMIC_REVOKE_REQUEST)) {
@@ -2369,7 +2445,8 @@ repeat:
        if (err)
                goto fail;
 
-       if (need_balance && has_not_enough_free_secs(sbi, 0, 0)) {
+       if (need_balance && !IS_NOQUOTA(inode) &&
+                       has_not_enough_free_secs(sbi, 0, 0)) {
                unlock_page(page);
                f2fs_balance_fs(sbi, true);
                lock_page(page);
@@ -2382,10 +2459,6 @@ repeat:
 
        f2fs_wait_on_page_writeback(page, DATA, false);
 
-       /* wait for GCed page writeback via META_MAPPING */
-       if (f2fs_post_read_required(inode))
-               f2fs_wait_on_block_writeback(sbi, blkaddr);
-
        if (len == PAGE_SIZE || PageUptodate(page))
                return 0;
 
@@ -2480,36 +2553,53 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
        struct address_space *mapping = iocb->ki_filp->f_mapping;
        struct inode *inode = mapping->host;
        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+       struct f2fs_inode_info *fi = F2FS_I(inode);
        size_t count = iov_iter_count(iter);
        loff_t offset = iocb->ki_pos;
        int rw = iov_iter_rw(iter);
        int err;
        enum rw_hint hint = iocb->ki_hint;
        int whint_mode = F2FS_OPTION(sbi).whint_mode;
+       bool do_opu;
 
        err = check_direct_IO(inode, iter, offset);
        if (err)
                return err < 0 ? err : 0;
 
-       if (f2fs_force_buffered_io(inode, rw))
+       if (f2fs_force_buffered_io(inode, iocb, iter))
                return 0;
 
+       do_opu = allow_outplace_dio(inode, iocb, iter);
+
        trace_f2fs_direct_IO_enter(inode, offset, count, rw);
 
        if (rw == WRITE && whint_mode == WHINT_MODE_OFF)
                iocb->ki_hint = WRITE_LIFE_NOT_SET;
 
-       if (!down_read_trylock(&F2FS_I(inode)->i_gc_rwsem[rw])) {
-               if (iocb->ki_flags & IOCB_NOWAIT) {
+       if (iocb->ki_flags & IOCB_NOWAIT) {
+               if (!down_read_trylock(&fi->i_gc_rwsem[rw])) {
                        iocb->ki_hint = hint;
                        err = -EAGAIN;
                        goto out;
                }
-               down_read(&F2FS_I(inode)->i_gc_rwsem[rw]);
+               if (do_opu && !down_read_trylock(&fi->i_gc_rwsem[READ])) {
+                       up_read(&fi->i_gc_rwsem[rw]);
+                       iocb->ki_hint = hint;
+                       err = -EAGAIN;
+                       goto out;
+               }
+       } else {
+               down_read(&fi->i_gc_rwsem[rw]);
+               if (do_opu)
+                       down_read(&fi->i_gc_rwsem[READ]);
        }
 
        err = blockdev_direct_IO(iocb, inode, iter, get_data_block_dio);
-       up_read(&F2FS_I(inode)->i_gc_rwsem[rw]);
+
+       if (do_opu)
+               up_read(&fi->i_gc_rwsem[READ]);
+
+       up_read(&fi->i_gc_rwsem[rw]);
 
        if (rw == WRITE) {
                if (whint_mode == WHINT_MODE_OFF)
@@ -2517,7 +2607,8 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
                if (err > 0) {
                        f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_IO,
                                                                        err);
-                       set_inode_flag(inode, FI_UPDATE_WRITE);
+                       if (!do_opu)
+                               set_inode_flag(inode, FI_UPDATE_WRITE);
                } else if (err < 0) {
                        f2fs_write_failed(mapping, offset + count);
                }
@@ -2550,6 +2641,8 @@ void f2fs_invalidate_page(struct page *page, unsigned int offset,
                }
        }
 
+       clear_cold_data(page);
+
        /* This is atomic written page, keep Private */
        if (IS_ATOMIC_WRITTEN_PAGE(page))
                return f2fs_drop_inmem_page(inode, page);
@@ -2568,6 +2661,7 @@ int f2fs_release_page(struct page *page, gfp_t wait)
        if (IS_ATOMIC_WRITTEN_PAGE(page))
                return 0;
 
+       clear_cold_data(page);
        set_page_private(page, 0);
        ClearPagePrivate(page);
        return 1;
@@ -2583,10 +2677,6 @@ static int f2fs_set_data_page_dirty(struct page *page)
        if (!PageUptodate(page))
                SetPageUptodate(page);
 
-       /* don't remain PG_checked flag which was set during GC */
-       if (is_cold_data(page))
-               clear_cold_data(page);
-
        if (f2fs_is_atomic_file(inode) && !f2fs_is_commit_atomic_write(inode)) {
                if (!IS_ATOMIC_WRITTEN_PAGE(page)) {
                        f2fs_register_inmem_page(inode, page);
index 214a968962a1d2b89791417adebabda0592e8653..139b4d5c83d5d0b0f7434b38bbb6105755eaffcb 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * f2fs debugging statistics
  *
@@ -5,10 +6,6 @@
  *             http://www.samsung.com/
  * Copyright (c) 2012 Linux Foundation
  * Copyright (c) 2012 Greg Kroah-Hartman <gregkh@linuxfoundation.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/fs.h>
@@ -58,6 +55,9 @@ static void update_general_status(struct f2fs_sb_info *sbi)
        si->max_vw_cnt = atomic_read(&sbi->max_vw_cnt);
        si->nr_wb_cp_data = get_pages(sbi, F2FS_WB_CP_DATA);
        si->nr_wb_data = get_pages(sbi, F2FS_WB_DATA);
+       si->nr_rd_data = get_pages(sbi, F2FS_RD_DATA);
+       si->nr_rd_node = get_pages(sbi, F2FS_RD_NODE);
+       si->nr_rd_meta = get_pages(sbi, F2FS_RD_META);
        if (SM_I(sbi) && SM_I(sbi)->fcc_info) {
                si->nr_flushed =
                        atomic_read(&SM_I(sbi)->fcc_info->issued_flush);
@@ -104,6 +104,8 @@ static void update_general_status(struct f2fs_sb_info *sbi)
        si->avail_nids = NM_I(sbi)->available_nids;
        si->alloc_nids = NM_I(sbi)->nid_cnt[PREALLOC_NID];
        si->bg_gc = sbi->bg_gc;
+       si->io_skip_bggc = sbi->io_skip_bggc;
+       si->other_skip_bggc = sbi->other_skip_bggc;
        si->skipped_atomic_files[BG_GC] = sbi->skipped_atomic_files[BG_GC];
        si->skipped_atomic_files[FG_GC] = sbi->skipped_atomic_files[FG_GC];
        si->util_free = (int)(free_user_blocks(sbi) >> sbi->log_blocks_per_seg)
@@ -121,6 +123,9 @@ static void update_general_status(struct f2fs_sb_info *sbi)
                si->curzone[i] = GET_ZONE_FROM_SEC(sbi, si->cursec[i]);
        }
 
+       for (i = META_CP; i < META_MAX; i++)
+               si->meta_count[i] = atomic_read(&sbi->meta_count[i]);
+
        for (i = 0; i < 2; i++) {
                si->segment_count[i] = sbi->segment_count[i];
                si->block_count[i] = sbi->block_count[i];
@@ -190,8 +195,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
        si->base_mem += MAIN_SEGS(sbi) * sizeof(struct seg_entry);
        si->base_mem += f2fs_bitmap_size(MAIN_SEGS(sbi));
        si->base_mem += 2 * SIT_VBLOCK_MAP_SIZE * MAIN_SEGS(sbi);
-       if (f2fs_discard_en(sbi))
-               si->base_mem += SIT_VBLOCK_MAP_SIZE * MAIN_SEGS(sbi);
+       si->base_mem += SIT_VBLOCK_MAP_SIZE * MAIN_SEGS(sbi);
        si->base_mem += SIT_VBLOCK_MAP_SIZE;
        if (sbi->segs_per_sec > 1)
                si->base_mem += MAIN_SECS(sbi) * sizeof(struct sec_entry);
@@ -271,7 +275,8 @@ static int stat_show(struct seq_file *s, void *v)
                seq_printf(s, "\n=====[ partition info(%pg). #%d, %s, CP: %s]=====\n",
                        si->sbi->sb->s_bdev, i++,
                        f2fs_readonly(si->sbi->sb) ? "RO": "RW",
-                       f2fs_cp_error(si->sbi) ? "Error": "Good");
+                       is_set_ckpt_flags(si->sbi, CP_DISABLED_FLAG) ?
+                       "Disabled": (f2fs_cp_error(si->sbi) ? "Error": "Good"));
                seq_printf(s, "[SB: 1] [CP: 2] [SIT: %d] [NAT: %d] ",
                           si->sit_area_segs, si->nat_area_segs);
                seq_printf(s, "[SSA: %d] [MAIN: %d",
@@ -333,6 +338,13 @@ static int stat_show(struct seq_file *s, void *v)
                           si->prefree_count, si->free_segs, si->free_secs);
                seq_printf(s, "CP calls: %d (BG: %d)\n",
                                si->cp_count, si->bg_cp_count);
+               seq_printf(s, "  - cp blocks : %u\n", si->meta_count[META_CP]);
+               seq_printf(s, "  - sit blocks : %u\n",
+                               si->meta_count[META_SIT]);
+               seq_printf(s, "  - nat blocks : %u\n",
+                               si->meta_count[META_NAT]);
+               seq_printf(s, "  - ssa blocks : %u\n",
+                               si->meta_count[META_SSA]);
                seq_printf(s, "GC calls: %d (BG: %d)\n",
                           si->call_count, si->bg_gc);
                seq_printf(s, "  - data segments : %d (%d)\n",
@@ -349,6 +361,8 @@ static int stat_show(struct seq_file *s, void *v)
                                si->skipped_atomic_files[BG_GC] +
                                si->skipped_atomic_files[FG_GC],
                                si->skipped_atomic_files[BG_GC]);
+               seq_printf(s, "BG skip : IO: %u, Other: %u\n",
+                               si->io_skip_bggc, si->other_skip_bggc);
                seq_puts(s, "\nExtent Cache:\n");
                seq_printf(s, "  - Hit Count: L1-1:%llu L1-2:%llu L2:%llu\n",
                                si->hit_largest, si->hit_cached,
@@ -360,7 +374,9 @@ static int stat_show(struct seq_file *s, void *v)
                seq_printf(s, "  - Inner Struct Count: tree: %d(%d), node: %d\n",
                                si->ext_tree, si->zombie_tree, si->ext_node);
                seq_puts(s, "\nBalancing F2FS Async:\n");
-               seq_printf(s, "  - IO (CP: %4d, Data: %4d, Flush: (%4d %4d %4d), "
+               seq_printf(s, "  - IO_R (Data: %4d, Node: %4d, Meta: %4d\n",
+                          si->nr_rd_data, si->nr_rd_node, si->nr_rd_meta);
+               seq_printf(s, "  - IO_W (CP: %4d, Data: %4d, Flush: (%4d %4d %4d), "
                        "Discard: (%4d %4d)) cmd: %4d undiscard:%4u\n",
                           si->nr_wb_cp_data, si->nr_wb_data,
                           si->nr_flushing, si->nr_flushed,
@@ -445,6 +461,7 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi)
 {
        struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
        struct f2fs_stat_info *si;
+       int i;
 
        si = f2fs_kzalloc(sbi, sizeof(struct f2fs_stat_info), GFP_KERNEL);
        if (!si)
@@ -470,6 +487,8 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi)
        atomic_set(&sbi->inline_inode, 0);
        atomic_set(&sbi->inline_dir, 0);
        atomic_set(&sbi->inplace_count, 0);
+       for (i = META_CP; i < META_MAX; i++)
+               atomic_set(&sbi->meta_count[i], 0);
 
        atomic_set(&sbi->aw_cnt, 0);
        atomic_set(&sbi->vw_cnt, 0);
index ecc3a4e2be96d8a25fd6814a9d09476b30095eaa..2ef84b4590ead0367e6d518a865e75838a41f373 100644 (file)
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * fs/f2fs/dir.c
  *
  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
  *             http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/fs.h>
 #include <linux/f2fs_fs.h>
@@ -658,9 +655,9 @@ int f2fs_do_tmpfile(struct inode *inode, struct inode *dir)
        f2fs_put_page(page, 1);
 
        clear_inode_flag(inode, FI_NEW_INODE);
+       f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
 fail:
        up_write(&F2FS_I(inode)->i_sem);
-       f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
        return err;
 }
 
@@ -733,6 +730,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
                clear_page_dirty_for_io(page);
                ClearPagePrivate(page);
                ClearPageUptodate(page);
+               clear_cold_data(page);
                inode_dec_dirty_pages(dir);
                f2fs_remove_dirty_inode(dir);
        }
@@ -784,9 +782,15 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
        struct f2fs_dir_entry *de = NULL;
        struct fscrypt_str de_name = FSTR_INIT(NULL, 0);
        struct f2fs_sb_info *sbi = F2FS_I_SB(d->inode);
+       struct blk_plug plug;
+       bool readdir_ra = sbi->readdir_ra == 1;
+       int err = 0;
 
        bit_pos = ((unsigned long)ctx->pos % d->max);
 
+       if (readdir_ra)
+               blk_start_plug(&plug);
+
        while (bit_pos < d->max) {
                bit_pos = find_next_bit_le(d->bitmap, d->max, bit_pos);
                if (bit_pos >= d->max)
@@ -806,29 +810,33 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
 
                if (f2fs_encrypted_inode(d->inode)) {
                        int save_len = fstr->len;
-                       int err;
 
                        err = fscrypt_fname_disk_to_usr(d->inode,
                                                (u32)de->hash_code, 0,
                                                &de_name, fstr);
                        if (err)
-                               return err;
+                               goto out;
 
                        de_name = *fstr;
                        fstr->len = save_len;
                }
 
                if (!dir_emit(ctx, de_name.name, de_name.len,
-                                       le32_to_cpu(de->ino), d_type))
-                       return 1;
+                                       le32_to_cpu(de->ino), d_type)) {
+                       err = 1;
+                       goto out;
+               }
 
-               if (sbi->readdir_ra == 1)
+               if (readdir_ra)
                        f2fs_ra_node_page(sbi, le32_to_cpu(de->ino));
 
                bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
                ctx->pos = start_pos + bit_pos;
        }
-       return 0;
+out:
+       if (readdir_ra)
+               blk_finish_plug(&plug);
+       return err;
 }
 
 static int f2fs_readdir(struct file *file, struct dir_context *ctx)
index 231b77ef5a53bffef1e9ce8ab5cfe98a99b6c8c3..1cb0fcc67d2df66d891c0d33f66d1c49cb16a3ca 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * f2fs extent cache support
  *
@@ -5,10 +6,6 @@
  * Copyright (c) 2015 Samsung Electronics
  * Authors: Jaegeuk Kim <jaegeuk@kernel.org>
  *          Chao Yu <chao2.yu@samsung.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/fs.h>
@@ -30,10 +27,10 @@ static struct rb_entry *__lookup_rb_tree_fast(struct rb_entry *cached_re,
        return NULL;
 }
 
-static struct rb_entry *__lookup_rb_tree_slow(struct rb_root *root,
+static struct rb_entry *__lookup_rb_tree_slow(struct rb_root_cached *root,
                                                        unsigned int ofs)
 {
-       struct rb_node *node = root->rb_node;
+       struct rb_node *node = root->rb_root.rb_node;
        struct rb_entry *re;
 
        while (node) {
@@ -49,7 +46,7 @@ static struct rb_entry *__lookup_rb_tree_slow(struct rb_root *root,
        return NULL;
 }
 
-struct rb_entry *f2fs_lookup_rb_tree(struct rb_root *root,
+struct rb_entry *f2fs_lookup_rb_tree(struct rb_root_cached *root,
                                struct rb_entry *cached_re, unsigned int ofs)
 {
        struct rb_entry *re;
@@ -62,22 +59,25 @@ struct rb_entry *f2fs_lookup_rb_tree(struct rb_root *root,
 }
 
 struct rb_node **f2fs_lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi,
-                               struct rb_root *root, struct rb_node **parent,
-                               unsigned int ofs)
+                               struct rb_root_cached *root,
+                               struct rb_node **parent,
+                               unsigned int ofs, bool *leftmost)
 {
-       struct rb_node **p = &root->rb_node;
+       struct rb_node **p = &root->rb_root.rb_node;
        struct rb_entry *re;
 
        while (*p) {
                *parent = *p;
                re = rb_entry(*parent, struct rb_entry, rb_node);
 
-               if (ofs < re->ofs)
+               if (ofs < re->ofs) {
                        p = &(*p)->rb_left;
-               else if (ofs >= re->ofs + re->len)
+               } else if (ofs >= re->ofs + re->len) {
                        p = &(*p)->rb_right;
-               else
+                       *leftmost = false;
+               } else {
                        f2fs_bug_on(sbi, 1);
+               }
        }
 
        return p;
@@ -92,16 +92,16 @@ struct rb_node **f2fs_lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi,
  * in order to simpfy the insertion after.
  * tree must stay unchanged between lookup and insertion.
  */
-struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root *root,
+struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root_cached *root,
                                struct rb_entry *cached_re,
                                unsigned int ofs,
                                struct rb_entry **prev_entry,
                                struct rb_entry **next_entry,
                                struct rb_node ***insert_p,
                                struct rb_node **insert_parent,
-                               bool force)
+                               bool force, bool *leftmost)
 {
-       struct rb_node **pnode = &root->rb_node;
+       struct rb_node **pnode = &root->rb_root.rb_node;
        struct rb_node *parent = NULL, *tmp_node;
        struct rb_entry *re = cached_re;
 
@@ -110,7 +110,7 @@ struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root *root,
        *prev_entry = NULL;
        *next_entry = NULL;
 
-       if (RB_EMPTY_ROOT(root))
+       if (RB_EMPTY_ROOT(&root->rb_root))
                return NULL;
 
        if (re) {
@@ -118,16 +118,22 @@ struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root *root,
                        goto lookup_neighbors;
        }
 
+       if (leftmost)
+               *leftmost = true;
+
        while (*pnode) {
                parent = *pnode;
                re = rb_entry(*pnode, struct rb_entry, rb_node);
 
-               if (ofs < re->ofs)
+               if (ofs < re->ofs) {
                        pnode = &(*pnode)->rb_left;
-               else if (ofs >= re->ofs + re->len)
+               } else if (ofs >= re->ofs + re->len) {
                        pnode = &(*pnode)->rb_right;
-               else
+                       if (leftmost)
+                               *leftmost = false;
+               } else {
                        goto lookup_neighbors;
+               }
        }
 
        *insert_p = pnode;
@@ -160,10 +166,10 @@ lookup_neighbors:
 }
 
 bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi,
-                                               struct rb_root *root)
+                                               struct rb_root_cached *root)
 {
 #ifdef CONFIG_F2FS_CHECK_FS
-       struct rb_node *cur = rb_first(root), *next;
+       struct rb_node *cur = rb_first_cached(root), *next;
        struct rb_entry *cur_re, *next_re;
 
        if (!cur)
@@ -196,7 +202,8 @@ static struct kmem_cache *extent_node_slab;
 
 static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi,
                                struct extent_tree *et, struct extent_info *ei,
-                               struct rb_node *parent, struct rb_node **p)
+                               struct rb_node *parent, struct rb_node **p,
+                               bool leftmost)
 {
        struct extent_node *en;
 
@@ -209,7 +216,7 @@ static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi,
        en->et = et;
 
        rb_link_node(&en->rb_node, parent, p);
-       rb_insert_color(&en->rb_node, &et->root);
+       rb_insert_color_cached(&en->rb_node, &et->root, leftmost);
        atomic_inc(&et->node_cnt);
        atomic_inc(&sbi->total_ext_node);
        return en;
@@ -218,7 +225,7 @@ static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi,
 static void __detach_extent_node(struct f2fs_sb_info *sbi,
                                struct extent_tree *et, struct extent_node *en)
 {
-       rb_erase(&en->rb_node, &et->root);
+       rb_erase_cached(&en->rb_node, &et->root);
        atomic_dec(&et->node_cnt);
        atomic_dec(&sbi->total_ext_node);
 
@@ -257,7 +264,7 @@ static struct extent_tree *__grab_extent_tree(struct inode *inode)
                f2fs_radix_tree_insert(&sbi->extent_tree_root, ino, et);
                memset(et, 0, sizeof(struct extent_tree));
                et->ino = ino;
-               et->root = RB_ROOT;
+               et->root = RB_ROOT_CACHED;
                et->cached_en = NULL;
                rwlock_init(&et->lock);
                INIT_LIST_HEAD(&et->list);
@@ -278,10 +285,10 @@ static struct extent_tree *__grab_extent_tree(struct inode *inode)
 static struct extent_node *__init_extent_tree(struct f2fs_sb_info *sbi,
                                struct extent_tree *et, struct extent_info *ei)
 {
-       struct rb_node **p = &et->root.rb_node;
+       struct rb_node **p = &et->root.rb_root.rb_node;
        struct extent_node *en;
 
-       en = __attach_extent_node(sbi, et, ei, NULL, p);
+       en = __attach_extent_node(sbi, et, ei, NULL, p, true);
        if (!en)
                return NULL;
 
@@ -297,7 +304,7 @@ static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi,
        struct extent_node *en;
        unsigned int count = atomic_read(&et->node_cnt);
 
-       node = rb_first(&et->root);
+       node = rb_first_cached(&et->root);
        while (node) {
                next = rb_next(node);
                en = rb_entry(node, struct extent_node, rb_node);
@@ -308,14 +315,13 @@ static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi,
        return count - atomic_read(&et->node_cnt);
 }
 
-static void __drop_largest_extent(struct inode *inode,
+static void __drop_largest_extent(struct extent_tree *et,
                                        pgoff_t fofs, unsigned int len)
 {
-       struct extent_info *largest = &F2FS_I(inode)->extent_tree->largest;
-
-       if (fofs < largest->fofs + largest->len && fofs + len > largest->fofs) {
-               largest->len = 0;
-               f2fs_mark_inode_dirty_sync(inode, true);
+       if (fofs < et->largest.fofs + et->largest.len &&
+                       fofs + len > et->largest.fofs) {
+               et->largest.len = 0;
+               et->largest_updated = true;
        }
 }
 
@@ -416,12 +422,11 @@ out:
        return ret;
 }
 
-static struct extent_node *__try_merge_extent_node(struct inode *inode,
+static struct extent_node *__try_merge_extent_node(struct f2fs_sb_info *sbi,
                                struct extent_tree *et, struct extent_info *ei,
                                struct extent_node *prev_ex,
                                struct extent_node *next_ex)
 {
-       struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
        struct extent_node *en = NULL;
 
        if (prev_ex && __is_back_mergeable(ei, &prev_ex->ei)) {
@@ -443,7 +448,7 @@ static struct extent_node *__try_merge_extent_node(struct inode *inode,
        if (!en)
                return NULL;
 
-       __try_update_largest_extent(inode, et, en);
+       __try_update_largest_extent(et, en);
 
        spin_lock(&sbi->extent_lock);
        if (!list_empty(&en->list)) {
@@ -454,12 +459,12 @@ static struct extent_node *__try_merge_extent_node(struct inode *inode,
        return en;
 }
 
-static struct extent_node *__insert_extent_tree(struct inode *inode,
+static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi,
                                struct extent_tree *et, struct extent_info *ei,
                                struct rb_node **insert_p,
-                               struct rb_node *insert_parent)
+                               struct rb_node *insert_parent,
+                               bool leftmost)
 {
-       struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
        struct rb_node **p;
        struct rb_node *parent = NULL;
        struct extent_node *en = NULL;
@@ -470,13 +475,16 @@ static struct extent_node *__insert_extent_tree(struct inode *inode,
                goto do_insert;
        }
 
-       p = f2fs_lookup_rb_tree_for_insert(sbi, &et->root, &parent, ei->fofs);
+       leftmost = true;
+
+       p = f2fs_lookup_rb_tree_for_insert(sbi, &et->root, &parent,
+                                               ei->fofs, &leftmost);
 do_insert:
-       en = __attach_extent_node(sbi, et, ei, parent, p);
+       en = __attach_extent_node(sbi, et, ei, parent, p, leftmost);
        if (!en)
                return NULL;
 
-       __try_update_largest_extent(inode, et, en);
+       __try_update_largest_extent(et, en);
 
        /* update in global extent list */
        spin_lock(&sbi->extent_lock);
@@ -497,6 +505,8 @@ static void f2fs_update_extent_tree_range(struct inode *inode,
        struct rb_node **insert_p = NULL, *insert_parent = NULL;
        unsigned int end = fofs + len;
        unsigned int pos = (unsigned int)fofs;
+       bool updated = false;
+       bool leftmost;
 
        if (!et)
                return;
@@ -517,14 +527,15 @@ static void f2fs_update_extent_tree_range(struct inode *inode,
         * drop largest extent before lookup, in case it's already
         * been shrunk from extent tree
         */
-       __drop_largest_extent(inode, fofs, len);
+       __drop_largest_extent(et, fofs, len);
 
        /* 1. lookup first extent node in range [fofs, fofs + len - 1] */
        en = (struct extent_node *)f2fs_lookup_rb_tree_ret(&et->root,
                                        (struct rb_entry *)et->cached_en, fofs,
                                        (struct rb_entry **)&prev_en,
                                        (struct rb_entry **)&next_en,
-                                       &insert_p, &insert_parent, false);
+                                       &insert_p, &insert_parent, false,
+                                       &leftmost);
        if (!en)
                en = next_en;
 
@@ -550,8 +561,8 @@ static void f2fs_update_extent_tree_range(struct inode *inode,
                                set_extent_info(&ei, end,
                                                end - dei.fofs + dei.blk,
                                                org_end - end);
-                               en1 = __insert_extent_tree(inode, et, &ei,
-                                                       NULL, NULL);
+                               en1 = __insert_extent_tree(sbi, et, &ei,
+                                                       NULL, NULL, true);
                                next_en = en1;
                        } else {
                                en->ei.fofs = end;
@@ -570,7 +581,7 @@ static void f2fs_update_extent_tree_range(struct inode *inode,
                }
 
                if (parts)
-                       __try_update_largest_extent(inode, et, en);
+                       __try_update_largest_extent(et, en);
                else
                        __release_extent_node(sbi, et, en);
 
@@ -590,15 +601,16 @@ static void f2fs_update_extent_tree_range(struct inode *inode,
        if (blkaddr) {
 
                set_extent_info(&ei, fofs, blkaddr, len);
-               if (!__try_merge_extent_node(inode, et, &ei, prev_en, next_en))
-                       __insert_extent_tree(inode, et, &ei,
-                                               insert_p, insert_parent);
+               if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en))
+                       __insert_extent_tree(sbi, et, &ei,
+                                       insert_p, insert_parent, leftmost);
 
                /* give up extent_cache, if split and small updates happen */
                if (dei.len >= 1 &&
                                prev.len < F2FS_MIN_EXTENT_LEN &&
                                et->largest.len < F2FS_MIN_EXTENT_LEN) {
-                       __drop_largest_extent(inode, 0, UINT_MAX);
+                       et->largest.len = 0;
+                       et->largest_updated = true;
                        set_inode_flag(inode, FI_NO_EXTENT);
                }
        }
@@ -606,7 +618,15 @@ static void f2fs_update_extent_tree_range(struct inode *inode,
        if (is_inode_flag_set(inode, FI_NO_EXTENT))
                __free_extent_tree(sbi, et);
 
+       if (et->largest_updated) {
+               et->largest_updated = false;
+               updated = true;
+       }
+
        write_unlock(&et->lock);
+
+       if (updated)
+               f2fs_mark_inode_dirty_sync(inode, true);
 }
 
 unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
@@ -705,6 +725,7 @@ void f2fs_drop_extent_tree(struct inode *inode)
 {
        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
        struct extent_tree *et = F2FS_I(inode)->extent_tree;
+       bool updated = false;
 
        if (!f2fs_may_extent_tree(inode))
                return;
@@ -713,8 +734,13 @@ void f2fs_drop_extent_tree(struct inode *inode)
 
        write_lock(&et->lock);
        __free_extent_tree(sbi, et);
-       __drop_largest_extent(inode, 0, UINT_MAX);
+       if (et->largest.len) {
+               et->largest.len = 0;
+               updated = true;
+       }
        write_unlock(&et->lock);
+       if (updated)
+               f2fs_mark_inode_dirty_sync(inode, true);
 }
 
 void f2fs_destroy_extent_tree(struct inode *inode)
index abf925664d9c5b9bf50f155824d34c9c86ce750f..56204a8f8a12f14a5c6bd8f9f9f53e3c58260787 100644 (file)
@@ -1,16 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * fs/f2fs/f2fs.h
  *
  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
  *             http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #ifndef _LINUX_F2FS_H
 #define _LINUX_F2FS_H
 
+#include <linux/uio.h>
 #include <linux/types.h>
 #include <linux/page-flags.h>
 #include <linux/buffer_head.h>
@@ -53,9 +51,10 @@ enum {
        FAULT_DIR_DEPTH,
        FAULT_EVICT_INODE,
        FAULT_TRUNCATE,
-       FAULT_IO,
+       FAULT_READ_IO,
        FAULT_CHECKPOINT,
        FAULT_DISCARD,
+       FAULT_WRITE_IO,
        FAULT_MAX,
 };
 
@@ -100,6 +99,7 @@ extern char *f2fs_fault_name[FAULT_MAX];
 #define F2FS_MOUNT_QUOTA               0x00400000
 #define F2FS_MOUNT_INLINE_XATTR_SIZE   0x00800000
 #define F2FS_MOUNT_RESERVE_ROOT                0x01000000
+#define F2FS_MOUNT_DISABLE_CHECKPOINT  0x02000000
 
 #define F2FS_OPTION(sbi)       ((sbi)->mount_opt)
 #define clear_opt(sbi, option) (F2FS_OPTION(sbi).opt &= ~F2FS_MOUNT_##option)
@@ -150,6 +150,7 @@ struct f2fs_mount_info {
 #define F2FS_FEATURE_INODE_CRTIME      0x0100
 #define F2FS_FEATURE_LOST_FOUND                0x0200
 #define F2FS_FEATURE_VERITY            0x0400  /* reserved */
+#define F2FS_FEATURE_SB_CHKSUM         0x0800
 
 #define F2FS_HAS_FEATURE(sb, mask)                                     \
        ((F2FS_SB(sb)->raw_super->feature & cpu_to_le32(mask)) != 0)
@@ -178,6 +179,7 @@ enum {
 #define        CP_RECOVERY     0x00000008
 #define        CP_DISCARD      0x00000010
 #define CP_TRIMMED     0x00000020
+#define CP_PAUSE       0x00000040
 
 #define MAX_DISCARD_BLOCKS(sbi)                BLKS_PER_SEC(sbi)
 #define DEF_MAX_DISCARD_REQUEST                8       /* issue 8 discards per round */
@@ -187,6 +189,7 @@ enum {
 #define DEF_DISCARD_URGENT_UTIL                80      /* do more discard over 80% */
 #define DEF_CP_INTERVAL                        60      /* 60 secs */
 #define DEF_IDLE_INTERVAL              5       /* 5 secs */
+#define DEF_DISABLE_INTERVAL           5       /* 5 secs */
 
 struct cp_control {
        int reason;
@@ -203,6 +206,7 @@ enum {
        META_NAT,
        META_SIT,
        META_SSA,
+       META_MAX,
        META_POR,
        DATA_GENERIC,
        META_GENERIC,
@@ -324,7 +328,7 @@ struct discard_cmd_control {
        atomic_t issued_discard;                /* # of issued discard */
        atomic_t issing_discard;                /* # of issing discard */
        atomic_t discard_cmd_cnt;               /* # of cached cmd count */
-       struct rb_root root;                    /* root of discard rb-tree */
+       struct rb_root_cached root;             /* root of discard rb-tree */
        bool rbtree_check;                      /* config for consistence check */
 };
 
@@ -527,6 +531,9 @@ enum {
 
 #define DEFAULT_RETRY_IO_COUNT 8       /* maximum retry read IO count */
 
+/* maximum retry quota flush count */
+#define DEFAULT_RETRY_QUOTA_FLUSH_COUNT                8
+
 #define F2FS_LINK_MAX  0xffffffff      /* maximum link count per file */
 
 #define MAX_DIR_RA_PAGES       4       /* maximum ra pages of dir */
@@ -566,12 +573,13 @@ struct extent_node {
 
 struct extent_tree {
        nid_t ino;                      /* inode number */
-       struct rb_root root;            /* root of extent info rb-tree */
+       struct rb_root_cached root;     /* root of extent info rb-tree */
        struct extent_node *cached_en;  /* recently accessed extent node */
        struct extent_info largest;     /* largested extent info */
        struct list_head list;          /* to be used by sbi->zombie_list */
        rwlock_t lock;                  /* protect extent info rb-tree */
        atomic_t node_cnt;              /* # of extent node in rb-tree*/
+       bool largest_updated;           /* largest extent updated */
 };
 
 /*
@@ -600,6 +608,7 @@ enum {
        F2FS_GET_BLOCK_DEFAULT,
        F2FS_GET_BLOCK_FIEMAP,
        F2FS_GET_BLOCK_BMAP,
+       F2FS_GET_BLOCK_DIO,
        F2FS_GET_BLOCK_PRE_DIO,
        F2FS_GET_BLOCK_PRE_AIO,
        F2FS_GET_BLOCK_PRECACHE,
@@ -754,12 +763,12 @@ static inline bool __is_front_mergeable(struct extent_info *cur,
 }
 
 extern void f2fs_mark_inode_dirty_sync(struct inode *inode, bool sync);
-static inline void __try_update_largest_extent(struct inode *inode,
-                       struct extent_tree *et, struct extent_node *en)
+static inline void __try_update_largest_extent(struct extent_tree *et,
+                                               struct extent_node *en)
 {
        if (en->ei.len > et->largest.len) {
                et->largest = en->ei;
-               f2fs_mark_inode_dirty_sync(inode, true);
+               et->largest_updated = true;
        }
 }
 
@@ -944,6 +953,9 @@ enum count_type {
        F2FS_DIRTY_IMETA,
        F2FS_WB_CP_DATA,
        F2FS_WB_DATA,
+       F2FS_RD_DATA,
+       F2FS_RD_NODE,
+       F2FS_RD_META,
        NR_COUNT_TYPE,
 };
 
@@ -1088,11 +1100,19 @@ enum {
        SBI_NEED_SB_WRITE,                      /* need to recover superblock */
        SBI_NEED_CP,                            /* need to checkpoint */
        SBI_IS_SHUTDOWN,                        /* shutdown by ioctl */
+       SBI_IS_RECOVERED,                       /* recovered orphan/data */
+       SBI_CP_DISABLED,                        /* CP was disabled last mount */
+       SBI_QUOTA_NEED_FLUSH,                   /* need to flush quota info in CP */
+       SBI_QUOTA_SKIP_FLUSH,                   /* skip flushing quota in current CP */
+       SBI_QUOTA_NEED_REPAIR,                  /* quota file may be corrupted */
 };
 
 enum {
        CP_TIME,
        REQ_TIME,
+       DISCARD_TIME,
+       GC_TIME,
+       DISABLE_TIME,
        MAX_TIME,
 };
 
@@ -1209,7 +1229,6 @@ struct f2fs_sb_info {
        unsigned int total_valid_node_count;    /* valid node block count */
        loff_t max_file_blocks;                 /* max block index of file */
        int dir_level;                          /* directory level */
-       unsigned int trigger_ssr_threshold;     /* threshold to trigger ssr */
        int readdir_ra;                         /* readahead inode in readdir */
 
        block_t user_block_count;               /* # of user blocks */
@@ -1219,6 +1238,9 @@ struct f2fs_sb_info {
        block_t reserved_blocks;                /* configurable reserved blocks */
        block_t current_reserved_blocks;        /* current reserved blocks */
 
+       /* Additional tracking for no checkpoint mode */
+       block_t unusable_block_count;           /* # of blocks saved by last cp */
+
        unsigned int nquota_files;              /* # of quota sysfile */
 
        u32 s_next_generation;                  /* for NFS support */
@@ -1257,6 +1279,7 @@ struct f2fs_sb_info {
         */
 #ifdef CONFIG_F2FS_STAT_FS
        struct f2fs_stat_info *stat_info;       /* FS status information */
+       atomic_t meta_count[META_MAX];          /* # of meta blocks */
        unsigned int segment_count[2];          /* # of allocated segments */
        unsigned int block_count[2];            /* # of allocated blocks */
        atomic_t inplace_count;         /* # of inplace update */
@@ -1272,6 +1295,8 @@ struct f2fs_sb_info {
        atomic_t max_aw_cnt;                    /* max # of atomic writes */
        atomic_t max_vw_cnt;                    /* max # of volatile writes */
        int bg_gc;                              /* background gc calls */
+       unsigned int io_skip_bggc;              /* skip background gc for in-flight IO */
+       unsigned int other_skip_bggc;           /* skip background gc for other reasons */
        unsigned int ndirty_inode[NR_INODE_TYPE];       /* # of dirty inodes */
 #endif
        spinlock_t stat_lock;                   /* lock for stat operations */
@@ -1306,9 +1331,9 @@ struct f2fs_sb_info {
 };
 
 #ifdef CONFIG_F2FS_FAULT_INJECTION
-#define f2fs_show_injection_info(type)                         \
-       printk("%sF2FS-fs : inject %s in %s of %pF\n",          \
-               KERN_INFO, f2fs_fault_name[type],               \
+#define f2fs_show_injection_info(type)                                 \
+       printk_ratelimited("%sF2FS-fs : inject %s in %s of %pF\n",      \
+               KERN_INFO, f2fs_fault_name[type],                       \
                __func__, __builtin_return_address(0))
 static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type)
 {
@@ -1344,7 +1369,15 @@ static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type)
 
 static inline void f2fs_update_time(struct f2fs_sb_info *sbi, int type)
 {
-       sbi->last_time[type] = jiffies;
+       unsigned long now = jiffies;
+
+       sbi->last_time[type] = now;
+
+       /* DISCARD_TIME and GC_TIME are based on REQ_TIME */
+       if (type == REQ_TIME) {
+               sbi->last_time[DISCARD_TIME] = now;
+               sbi->last_time[GC_TIME] = now;
+       }
 }
 
 static inline bool f2fs_time_over(struct f2fs_sb_info *sbi, int type)
@@ -1354,16 +1387,18 @@ static inline bool f2fs_time_over(struct f2fs_sb_info *sbi, int type)
        return time_after(jiffies, sbi->last_time[type] + interval);
 }
 
-static inline bool is_idle(struct f2fs_sb_info *sbi)
+static inline unsigned int f2fs_time_to_wait(struct f2fs_sb_info *sbi,
+                                               int type)
 {
-       struct block_device *bdev = sbi->sb->s_bdev;
-       struct request_queue *q = bdev_get_queue(bdev);
-       struct request_list *rl = &q->root_rl;
+       unsigned long interval = sbi->interval_time[type] * HZ;
+       unsigned int wait_ms = 0;
+       long delta;
 
-       if (rl->count[BLK_RW_SYNC] || rl->count[BLK_RW_ASYNC])
-               return false;
+       delta = (sbi->last_time[type] + interval) - jiffies;
+       if (delta > 0)
+               wait_ms = jiffies_to_msecs(delta);
 
-       return f2fs_time_over(sbi, REQ_TIME);
+       return wait_ms;
 }
 
 /*
@@ -1704,7 +1739,8 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
 
        if (!__allow_reserved_blocks(sbi, inode, true))
                avail_user_block_count -= F2FS_OPTION(sbi).root_reserved_blocks;
-
+       if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
+               avail_user_block_count -= sbi->unusable_block_count;
        if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) {
                diff = sbi->total_valid_block_count - avail_user_block_count;
                if (diff > *count)
@@ -1755,7 +1791,9 @@ static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type)
        atomic_inc(&sbi->nr_pages[count_type]);
 
        if (count_type == F2FS_DIRTY_DATA || count_type == F2FS_INMEM_PAGES ||
-               count_type == F2FS_WB_CP_DATA || count_type == F2FS_WB_DATA)
+               count_type == F2FS_WB_CP_DATA || count_type == F2FS_WB_DATA ||
+               count_type == F2FS_RD_DATA || count_type == F2FS_RD_NODE ||
+               count_type == F2FS_RD_META)
                return;
 
        set_sbi_flag(sbi, SBI_IS_DIRTY);
@@ -1891,12 +1929,18 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
 {
        block_t valid_block_count;
        unsigned int valid_node_count;
-       bool quota = inode && !is_inode;
+       int err;
 
-       if (quota) {
-               int ret = dquot_reserve_block(inode, 1);
-               if (ret)
-                       return ret;
+       if (is_inode) {
+               if (inode) {
+                       err = dquot_alloc_inode(inode);
+                       if (err)
+                               return err;
+               }
+       } else {
+               err = dquot_reserve_block(inode, 1);
+               if (err)
+                       return err;
        }
 
        if (time_to_inject(sbi, FAULT_BLOCK)) {
@@ -1911,6 +1955,8 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
 
        if (!__allow_reserved_blocks(sbi, inode, false))
                valid_block_count += F2FS_OPTION(sbi).root_reserved_blocks;
+       if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
+               valid_block_count += sbi->unusable_block_count;
 
        if (unlikely(valid_block_count > sbi->user_block_count)) {
                spin_unlock(&sbi->stat_lock);
@@ -1938,8 +1984,12 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
        return 0;
 
 enospc:
-       if (quota)
+       if (is_inode) {
+               if (inode)
+                       dquot_free_inode(inode);
+       } else {
                dquot_release_reservation_block(inode, 1);
+       }
        return -ENOSPC;
 }
 
@@ -1960,7 +2010,9 @@ static inline void dec_valid_node_count(struct f2fs_sb_info *sbi,
 
        spin_unlock(&sbi->stat_lock);
 
-       if (!is_inode)
+       if (is_inode)
+               dquot_free_inode(inode);
+       else
                f2fs_i_blocks_write(inode, 1, false, true);
 }
 
@@ -2090,6 +2142,15 @@ static inline struct bio *f2fs_bio_alloc(struct f2fs_sb_info *sbi,
        return bio_alloc(GFP_KERNEL, npages);
 }
 
+static inline bool is_idle(struct f2fs_sb_info *sbi, int type)
+{
+       if (get_pages(sbi, F2FS_RD_DATA) || get_pages(sbi, F2FS_RD_NODE) ||
+               get_pages(sbi, F2FS_RD_META) || get_pages(sbi, F2FS_WB_DATA) ||
+               get_pages(sbi, F2FS_WB_CP_DATA))
+               return false;
+       return f2fs_time_over(sbi, type);
+}
+
 static inline void f2fs_radix_tree_insert(struct radix_tree_root *root,
                                unsigned long index, void *item)
 {
@@ -2739,7 +2800,8 @@ static inline bool is_valid_data_blkaddr(struct f2fs_sb_info *sbi,
  */
 int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync);
 void f2fs_truncate_data_blocks(struct dnode_of_data *dn);
-int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock);
+int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock,
+                                                       bool buf_write);
 int f2fs_truncate(struct inode *inode);
 int f2fs_getattr(const struct path *path, struct kstat *stat,
                        u32 request_mask, unsigned int flags);
@@ -2749,6 +2811,7 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count);
 int f2fs_precache_extents(struct inode *inode);
 long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
 long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
+int f2fs_transfer_project_quota(struct inode *inode, kprojid_t kprojid);
 int f2fs_pin_file_control(struct inode *inode, bool inc);
 
 /*
@@ -2827,6 +2890,7 @@ static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode)
 int f2fs_inode_dirtied(struct inode *inode, bool sync);
 void f2fs_inode_synced(struct inode *inode);
 int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly);
+int f2fs_quota_sync(struct super_block *sb, int type);
 void f2fs_quota_off_umount(struct super_block *sb);
 int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover);
 int f2fs_sync_fs(struct super_block *sb, int sync);
@@ -2869,7 +2933,7 @@ struct page *f2fs_new_node_page(struct dnode_of_data *dn, unsigned int ofs);
 void f2fs_ra_node_page(struct f2fs_sb_info *sbi, nid_t nid);
 struct page *f2fs_get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid);
 struct page *f2fs_get_node_page_ra(struct page *parent, int start);
-void f2fs_move_node_page(struct page *node_page, int gc_type);
+int f2fs_move_node_page(struct page *node_page, int gc_type);
 int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
                        struct writeback_control *wbc, bool atomic,
                        unsigned int *seq_id);
@@ -2886,7 +2950,7 @@ int f2fs_recover_xattr_data(struct inode *inode, struct page *page);
 int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page);
 int f2fs_restore_node_summary(struct f2fs_sb_info *sbi,
                        unsigned int segno, struct f2fs_summary_block *sum);
-void f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc);
+int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc);
 int f2fs_build_node_manager(struct f2fs_sb_info *sbi);
 void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi);
 int __init f2fs_create_node_manager_caches(void);
@@ -2914,6 +2978,8 @@ void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi);
 bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi);
 void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
                                        struct cp_control *cpc);
+void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi);
+int f2fs_disable_cp_again(struct f2fs_sb_info *sbi);
 void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi);
 int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra);
 void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi);
@@ -2942,7 +3008,9 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
                        struct f2fs_io_info *fio, bool add_list);
 void f2fs_wait_on_page_writeback(struct page *page,
                        enum page_type type, bool ordered);
-void f2fs_wait_on_block_writeback(struct f2fs_sb_info *sbi, block_t blkaddr);
+void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr);
+void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr,
+                                                               block_t len);
 void f2fs_write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk);
 void f2fs_write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk);
 int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type,
@@ -3002,8 +3070,8 @@ int f2fs_init_post_read_processing(void);
 void f2fs_destroy_post_read_processing(void);
 void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type);
 void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
-                               struct inode *inode, nid_t ino, pgoff_t idx,
-                               enum page_type type);
+                               struct inode *inode, struct page *page,
+                               nid_t ino, enum page_type type);
 void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi);
 int f2fs_submit_page_bio(struct f2fs_io_info *fio);
 void f2fs_submit_page_write(struct f2fs_io_info *fio);
@@ -3025,6 +3093,7 @@ struct page *f2fs_get_lock_data_page(struct inode *inode, pgoff_t index,
 struct page *f2fs_get_new_data_page(struct inode *inode,
                        struct page *ipage, pgoff_t index, bool new_i_size);
 int f2fs_do_write_data_page(struct f2fs_io_info *fio);
+void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock);
 int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
                        int create, int flag);
 int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
@@ -3077,6 +3146,8 @@ struct f2fs_stat_info {
        int free_nids, avail_nids, alloc_nids;
        int total_count, utilization;
        int bg_gc, nr_wb_cp_data, nr_wb_data;
+       int nr_rd_data, nr_rd_node, nr_rd_meta;
+       unsigned int io_skip_bggc, other_skip_bggc;
        int nr_flushing, nr_flushed, flush_list_empty;
        int nr_discarding, nr_discarded;
        int nr_discard_cmd;
@@ -3098,6 +3169,7 @@ struct f2fs_stat_info {
        int cursec[NR_CURSEG_TYPE];
        int curzone[NR_CURSEG_TYPE];
 
+       unsigned int meta_count[META_MAX];
        unsigned int segment_count[2];
        unsigned int block_count[2];
        unsigned int inplace_count;
@@ -3113,6 +3185,8 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
 #define stat_inc_bg_cp_count(si)       ((si)->bg_cp_count++)
 #define stat_inc_call_count(si)                ((si)->call_count++)
 #define stat_inc_bggc_count(sbi)       ((sbi)->bg_gc++)
+#define stat_io_skip_bggc_count(sbi)   ((sbi)->io_skip_bggc++)
+#define stat_other_skip_bggc_count(sbi)        ((sbi)->other_skip_bggc++)
 #define stat_inc_dirty_inode(sbi, type)        ((sbi)->ndirty_inode[type]++)
 #define stat_dec_dirty_inode(sbi, type)        ((sbi)->ndirty_inode[type]--)
 #define stat_inc_total_hit(sbi)                (atomic64_inc(&(sbi)->total_hit_ext))
@@ -3149,6 +3223,17 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
                if (f2fs_has_inline_dentry(inode))                      \
                        (atomic_dec(&F2FS_I_SB(inode)->inline_dir));    \
        } while (0)
+#define stat_inc_meta_count(sbi, blkaddr)                              \
+       do {                                                            \
+               if (blkaddr < SIT_I(sbi)->sit_base_addr)                \
+                       atomic_inc(&(sbi)->meta_count[META_CP]);        \
+               else if (blkaddr < NM_I(sbi)->nat_blkaddr)              \
+                       atomic_inc(&(sbi)->meta_count[META_SIT]);       \
+               else if (blkaddr < SM_I(sbi)->ssa_blkaddr)              \
+                       atomic_inc(&(sbi)->meta_count[META_NAT]);       \
+               else if (blkaddr < SM_I(sbi)->main_blkaddr)             \
+                       atomic_inc(&(sbi)->meta_count[META_SSA]);       \
+       } while (0)
 #define stat_inc_seg_type(sbi, curseg)                                 \
                ((sbi)->segment_count[(curseg)->alloc_type]++)
 #define stat_inc_block_count(sbi, curseg)                              \
@@ -3218,6 +3303,8 @@ void f2fs_destroy_root_stats(void);
 #define stat_inc_bg_cp_count(si)                       do { } while (0)
 #define stat_inc_call_count(si)                                do { } while (0)
 #define stat_inc_bggc_count(si)                                do { } while (0)
+#define stat_io_skip_bggc_count(sbi)                   do { } while (0)
+#define stat_other_skip_bggc_count(sbi)                        do { } while (0)
 #define stat_inc_dirty_inode(sbi, type)                        do { } while (0)
 #define stat_dec_dirty_inode(sbi, type)                        do { } while (0)
 #define stat_inc_total_hit(sb)                         do { } while (0)
@@ -3236,6 +3323,7 @@ void f2fs_destroy_root_stats(void);
 #define stat_inc_volatile_write(inode)                 do { } while (0)
 #define stat_dec_volatile_write(inode)                 do { } while (0)
 #define stat_update_max_volatile_write(inode)          do { } while (0)
+#define stat_inc_meta_count(sbi, blkaddr)              do { } while (0)
 #define stat_inc_seg_type(sbi, curseg)                 do { } while (0)
 #define stat_inc_block_count(sbi, curseg)              do { } while (0)
 #define stat_inc_inplace_blocks(sbi)                   do { } while (0)
@@ -3305,18 +3393,19 @@ void f2fs_leave_shrinker(struct f2fs_sb_info *sbi);
 /*
  * extent_cache.c
  */
-struct rb_entry *f2fs_lookup_rb_tree(struct rb_root *root,
+struct rb_entry *f2fs_lookup_rb_tree(struct rb_root_cached *root,
                                struct rb_entry *cached_re, unsigned int ofs);
 struct rb_node **f2fs_lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi,
-                               struct rb_root *root, struct rb_node **parent,
-                               unsigned int ofs);
-struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root *root,
+                               struct rb_root_cached *root,
+                               struct rb_node **parent,
+                               unsigned int ofs, bool *leftmost);
+struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root_cached *root,
                struct rb_entry *cached_re, unsigned int ofs,
                struct rb_entry **prev_entry, struct rb_entry **next_entry,
                struct rb_node ***insert_p, struct rb_node **insert_parent,
-               bool force);
+               bool force, bool *leftmost);
 bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi,
-                                               struct rb_root *root);
+                                               struct rb_root_cached *root);
 unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink);
 bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext);
 void f2fs_drop_extent_tree(struct inode *inode);
@@ -3356,7 +3445,7 @@ static inline void f2fs_set_encrypted_inode(struct inode *inode)
 {
 #ifdef CONFIG_F2FS_FS_ENCRYPTION
        file_set_encrypt(inode);
-       inode->i_flags |= S_ENCRYPTED;
+       f2fs_set_inode_flags(inode);
 #endif
 }
 
@@ -3384,6 +3473,7 @@ F2FS_FEATURE_FUNCS(flexible_inline_xattr, FLEXIBLE_INLINE_XATTR);
 F2FS_FEATURE_FUNCS(quota_ino, QUOTA_INO);
 F2FS_FEATURE_FUNCS(inode_crtime, INODE_CRTIME);
 F2FS_FEATURE_FUNCS(lost_found, LOST_FOUND);
+F2FS_FEATURE_FUNCS(sb_chksum, SB_CHKSUM);
 
 #ifdef CONFIG_BLK_DEV_ZONED
 static inline int get_blkz_type(struct f2fs_sb_info *sbi,
@@ -3399,11 +3489,20 @@ static inline int get_blkz_type(struct f2fs_sb_info *sbi,
 }
 #endif
 
-static inline bool f2fs_discard_en(struct f2fs_sb_info *sbi)
+static inline bool f2fs_hw_should_discard(struct f2fs_sb_info *sbi)
 {
-       struct request_queue *q = bdev_get_queue(sbi->sb->s_bdev);
+       return f2fs_sb_has_blkzoned(sbi->sb);
+}
 
-       return blk_queue_discard(q) || f2fs_sb_has_blkzoned(sbi->sb);
+static inline bool f2fs_hw_support_discard(struct f2fs_sb_info *sbi)
+{
+       return blk_queue_discard(bdev_get_queue(sbi->sb->s_bdev));
+}
+
+static inline bool f2fs_realtime_discard_enable(struct f2fs_sb_info *sbi)
+{
+       return (test_opt(sbi, DISCARD) && f2fs_hw_support_discard(sbi)) ||
+                                       f2fs_hw_should_discard(sbi);
 }
 
 static inline void set_opt_mode(struct f2fs_sb_info *sbi, unsigned int mt)
@@ -3432,11 +3531,50 @@ static inline bool f2fs_may_encrypt(struct inode *inode)
 #endif
 }
 
-static inline bool f2fs_force_buffered_io(struct inode *inode, int rw)
+static inline int block_unaligned_IO(struct inode *inode,
+                               struct kiocb *iocb, struct iov_iter *iter)
 {
-       return (f2fs_post_read_required(inode) ||
-                       (rw == WRITE && test_opt(F2FS_I_SB(inode), LFS)) ||
-                       F2FS_I_SB(inode)->s_ndevs);
+       unsigned int i_blkbits = READ_ONCE(inode->i_blkbits);
+       unsigned int blocksize_mask = (1 << i_blkbits) - 1;
+       loff_t offset = iocb->ki_pos;
+       unsigned long align = offset | iov_iter_alignment(iter);
+
+       return align & blocksize_mask;
+}
+
+static inline int allow_outplace_dio(struct inode *inode,
+                               struct kiocb *iocb, struct iov_iter *iter)
+{
+       struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+       int rw = iov_iter_rw(iter);
+
+       return (test_opt(sbi, LFS) && (rw == WRITE) &&
+                               !block_unaligned_IO(inode, iocb, iter));
+}
+
+static inline bool f2fs_force_buffered_io(struct inode *inode,
+                               struct kiocb *iocb, struct iov_iter *iter)
+{
+       struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+       int rw = iov_iter_rw(iter);
+
+       if (f2fs_post_read_required(inode))
+               return true;
+       if (sbi->s_ndevs)
+               return true;
+       /*
+        * for blkzoned device, fallback direct IO to buffered IO, so
+        * all IOs can be serialized by log-structured write.
+        */
+       if (f2fs_sb_has_blkzoned(sbi->sb))
+               return true;
+       if (test_opt(sbi, LFS) && (rw == WRITE) &&
+                               block_unaligned_IO(inode, iocb, iter))
+               return true;
+       if (is_sbi_flag_set(F2FS_I_SB(inode), SBI_CP_DISABLED))
+               return true;
+
+       return false;
 }
 
 #ifdef CONFIG_F2FS_FAULT_INJECTION
@@ -3447,3 +3585,16 @@ extern void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate,
 #endif
 
 #endif
+
+static inline bool is_journalled_quota(struct f2fs_sb_info *sbi)
+{
+#ifdef CONFIG_QUOTA
+       if (f2fs_sb_has_quota_ino(sbi->sb))
+               return true;
+       if (F2FS_OPTION(sbi).s_qf_names[USRQUOTA] ||
+               F2FS_OPTION(sbi).s_qf_names[GRPQUOTA] ||
+               F2FS_OPTION(sbi).s_qf_names[PRJQUOTA])
+               return true;
+#endif
+       return false;
+}
index 5474aaa274b91d52c8259d31cfc072e5f8674d54..88b124677189b9d4b242142dd6d297bade855ab1 100644 (file)
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * fs/f2fs/file.c
  *
  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
  *             http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/fs.h>
 #include <linux/f2fs_fs.h>
@@ -50,7 +47,7 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf)
        struct page *page = vmf->page;
        struct inode *inode = file_inode(vmf->vma->vm_file);
        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
-       struct dnode_of_data dn;
+       struct dnode_of_data dn = { .node_changed = false };
        int err;
 
        if (unlikely(f2fs_cp_error(sbi))) {
@@ -62,19 +59,6 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf)
 
        f2fs_bug_on(sbi, f2fs_has_inline_data(inode));
 
-       /* block allocation */
-       f2fs_lock_op(sbi);
-       set_new_dnode(&dn, inode, NULL, NULL, 0);
-       err = f2fs_reserve_block(&dn, page->index);
-       if (err) {
-               f2fs_unlock_op(sbi);
-               goto out;
-       }
-       f2fs_put_dnode(&dn);
-       f2fs_unlock_op(sbi);
-
-       f2fs_balance_fs(sbi, dn.node_changed);
-
        file_update_time(vmf->vma->vm_file);
        down_read(&F2FS_I(inode)->i_mmap_sem);
        lock_page(page);
@@ -86,11 +70,28 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf)
                goto out_sem;
        }
 
+       /* block allocation */
+       __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true);
+       set_new_dnode(&dn, inode, NULL, NULL, 0);
+       err = f2fs_get_block(&dn, page->index);
+       f2fs_put_dnode(&dn);
+       __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false);
+       if (err) {
+               unlock_page(page);
+               goto out_sem;
+       }
+
+       /* fill the page */
+       f2fs_wait_on_page_writeback(page, DATA, false);
+
+       /* wait for GCed page writeback via META_MAPPING */
+       f2fs_wait_on_block_writeback(inode, dn.data_blkaddr);
+
        /*
         * check to see if the page is mapped already (no holes)
         */
        if (PageMappedToDisk(page))
-               goto mapped;
+               goto out_sem;
 
        /* page is wholly or partially inside EOF */
        if (((loff_t)(page->index + 1) << PAGE_SHIFT) >
@@ -105,21 +106,15 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf)
                SetPageUptodate(page);
 
        f2fs_update_iostat(sbi, APP_MAPPED_IO, F2FS_BLKSIZE);
+       f2fs_update_time(sbi, REQ_TIME);
 
        trace_f2fs_vm_page_mkwrite(page, DATA);
-mapped:
-       /* fill the page */
-       f2fs_wait_on_page_writeback(page, DATA, false);
-
-       /* wait for GCed page writeback via META_MAPPING */
-       if (f2fs_post_read_required(inode))
-               f2fs_wait_on_block_writeback(sbi, dn.data_blkaddr);
-
 out_sem:
        up_read(&F2FS_I(inode)->i_mmap_sem);
-out:
+
+       f2fs_balance_fs(sbi, dn.node_changed);
+
        sb_end_pagefault(inode->i_sb);
-       f2fs_update_time(sbi, REQ_TIME);
 err:
        return block_page_mkwrite_return(err);
 }
@@ -215,7 +210,8 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end,
        };
        unsigned int seq_id = 0;
 
-       if (unlikely(f2fs_readonly(inode->i_sb)))
+       if (unlikely(f2fs_readonly(inode->i_sb) ||
+                               is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
                return 0;
 
        trace_f2fs_sync_file_enter(inode);
@@ -590,7 +586,8 @@ truncate_out:
        return 0;
 }
 
-int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock)
+int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock,
+                                                       bool buf_write)
 {
        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
        struct dnode_of_data dn;
@@ -598,6 +595,7 @@ int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock)
        int count = 0, err = 0;
        struct page *ipage;
        bool truncate_page = false;
+       int flag = buf_write ? F2FS_GET_BLOCK_PRE_AIO : F2FS_GET_BLOCK_PRE_DIO;
 
        trace_f2fs_truncate_blocks_enter(inode, from);
 
@@ -607,7 +605,7 @@ int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock)
                goto free_partial;
 
        if (lock)
-               f2fs_lock_op(sbi);
+               __do_map_lock(sbi, flag, true);
 
        ipage = f2fs_get_node_page(sbi, inode->i_ino);
        if (IS_ERR(ipage)) {
@@ -645,7 +643,7 @@ free_next:
        err = f2fs_truncate_inode_blocks(inode, free_from);
 out:
        if (lock)
-               f2fs_unlock_op(sbi);
+               __do_map_lock(sbi, flag, false);
 free_partial:
        /* lastly zero out the first data page */
        if (!err)
@@ -680,7 +678,7 @@ int f2fs_truncate(struct inode *inode)
                        return err;
        }
 
-       err = f2fs_truncate_blocks(inode, i_size_read(inode), true);
+       err = f2fs_truncate_blocks(inode, i_size_read(inode), true, false);
        if (err)
                return err;
 
@@ -789,9 +787,24 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
                !uid_eq(attr->ia_uid, inode->i_uid)) ||
                (attr->ia_valid & ATTR_GID &&
                !gid_eq(attr->ia_gid, inode->i_gid))) {
+               f2fs_lock_op(F2FS_I_SB(inode));
                err = dquot_transfer(inode, attr);
-               if (err)
+               if (err) {
+                       set_sbi_flag(F2FS_I_SB(inode),
+                                       SBI_QUOTA_NEED_REPAIR);
+                       f2fs_unlock_op(F2FS_I_SB(inode));
                        return err;
+               }
+               /*
+                * update uid/gid under lock_op(), so that dquot and inode can
+                * be updated atomically.
+                */
+               if (attr->ia_valid & ATTR_UID)
+                       inode->i_uid = attr->ia_uid;
+               if (attr->ia_valid & ATTR_GID)
+                       inode->i_gid = attr->ia_gid;
+               f2fs_mark_inode_dirty_sync(inode, true);
+               f2fs_unlock_op(F2FS_I_SB(inode));
        }
 
        if (attr->ia_valid & ATTR_SIZE) {
@@ -1246,7 +1259,7 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
        new_size = i_size_read(inode) - len;
        truncate_pagecache(inode, new_size);
 
-       ret = f2fs_truncate_blocks(inode, new_size, true);
+       ret = f2fs_truncate_blocks(inode, new_size, true, false);
        up_write(&F2FS_I(inode)->i_mmap_sem);
        if (!ret)
                f2fs_i_size_write(inode, new_size);
@@ -1431,7 +1444,7 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
        f2fs_balance_fs(sbi, true);
 
        down_write(&F2FS_I(inode)->i_mmap_sem);
-       ret = f2fs_truncate_blocks(inode, i_size_read(inode), true);
+       ret = f2fs_truncate_blocks(inode, i_size_read(inode), true, false);
        up_write(&F2FS_I(inode)->i_mmap_sem);
        if (ret)
                return ret;
@@ -1978,7 +1991,7 @@ static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg)
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
 
-       if (!blk_queue_discard(q))
+       if (!f2fs_hw_support_discard(F2FS_SB(sb)))
                return -EOPNOTSUPP;
 
        if (copy_from_user(&range, (struct fstrim_range __user *)arg,
@@ -2162,6 +2175,12 @@ static int f2fs_ioc_write_checkpoint(struct file *filp, unsigned long arg)
        if (f2fs_readonly(sbi->sb))
                return -EROFS;
 
+       if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
+               f2fs_msg(sbi->sb, KERN_INFO,
+                       "Skipping Checkpoint. Checkpoints currently disabled.");
+               return -EINVAL;
+       }
+
        ret = mnt_want_write_file(filp);
        if (ret)
                return ret;
@@ -2533,6 +2552,9 @@ static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg)
        if (f2fs_readonly(sbi->sb))
                return -EROFS;
 
+       if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
+               return -EINVAL;
+
        if (copy_from_user(&range, (struct f2fs_flush_device __user *)arg,
                                                        sizeof(range)))
                return -EFAULT;
@@ -2591,13 +2613,29 @@ static int f2fs_ioc_get_features(struct file *filp, unsigned long arg)
 }
 
 #ifdef CONFIG_QUOTA
+int f2fs_transfer_project_quota(struct inode *inode, kprojid_t kprojid)
+{
+       struct dquot *transfer_to[MAXQUOTAS] = {};
+       struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+       struct super_block *sb = sbi->sb;
+       int err = 0;
+
+       transfer_to[PRJQUOTA] = dqget(sb, make_kqid_projid(kprojid));
+       if (!IS_ERR(transfer_to[PRJQUOTA])) {
+               err = __dquot_transfer(inode, transfer_to);
+               if (err)
+                       set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
+               dqput(transfer_to[PRJQUOTA]);
+       }
+       return err;
+}
+
 static int f2fs_ioc_setproject(struct file *filp, __u32 projid)
 {
        struct inode *inode = file_inode(filp);
        struct f2fs_inode_info *fi = F2FS_I(inode);
        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
        struct super_block *sb = sbi->sb;
-       struct dquot *transfer_to[MAXQUOTAS] = {};
        struct page *ipage;
        kprojid_t kprojid;
        int err;
@@ -2617,53 +2655,45 @@ static int f2fs_ioc_setproject(struct file *filp, __u32 projid)
        if (projid_eq(kprojid, F2FS_I(inode)->i_projid))
                return 0;
 
-       err = mnt_want_write_file(filp);
-       if (err)
-               return err;
-
        err = -EPERM;
-       inode_lock(inode);
-
        /* Is it quota file? Do not allow user to mess with it */
        if (IS_NOQUOTA(inode))
-               goto out_unlock;
+               return err;
 
        ipage = f2fs_get_node_page(sbi, inode->i_ino);
-       if (IS_ERR(ipage)) {
-               err = PTR_ERR(ipage);
-               goto out_unlock;
-       }
+       if (IS_ERR(ipage))
+               return PTR_ERR(ipage);
 
        if (!F2FS_FITS_IN_INODE(F2FS_INODE(ipage), fi->i_extra_isize,
                                                                i_projid)) {
                err = -EOVERFLOW;
                f2fs_put_page(ipage, 1);
-               goto out_unlock;
+               return err;
        }
        f2fs_put_page(ipage, 1);
 
        err = dquot_initialize(inode);
        if (err)
-               goto out_unlock;
+               return err;
 
-       transfer_to[PRJQUOTA] = dqget(sb, make_kqid_projid(kprojid));
-       if (!IS_ERR(transfer_to[PRJQUOTA])) {
-               err = __dquot_transfer(inode, transfer_to);
-               dqput(transfer_to[PRJQUOTA]);
-               if (err)
-                       goto out_dirty;
-       }
+       f2fs_lock_op(sbi);
+       err = f2fs_transfer_project_quota(inode, kprojid);
+       if (err)
+               goto out_unlock;
 
        F2FS_I(inode)->i_projid = kprojid;
        inode->i_ctime = current_time(inode);
-out_dirty:
        f2fs_mark_inode_dirty_sync(inode, true);
 out_unlock:
-       inode_unlock(inode);
-       mnt_drop_write_file(filp);
+       f2fs_unlock_op(sbi);
        return err;
 }
 #else
+int f2fs_transfer_project_quota(struct inode *inode, kprojid_t kprojid)
+{
+       return 0;
+}
+
 static int f2fs_ioc_setproject(struct file *filp, __u32 projid)
 {
        if (projid != F2FS_DEF_PROJID)
@@ -2736,6 +2766,30 @@ static int f2fs_ioc_fsgetxattr(struct file *filp, unsigned long arg)
        return 0;
 }
 
+static int f2fs_ioctl_check_project(struct inode *inode, struct fsxattr *fa)
+{
+       /*
+        * Project Quota ID state is only allowed to change from within the init
+        * namespace. Enforce that restriction only if we are trying to change
+        * the quota ID state. Everything else is allowed in user namespaces.
+        */
+       if (current_user_ns() == &init_user_ns)
+               return 0;
+
+       if (__kprojid_val(F2FS_I(inode)->i_projid) != fa->fsx_projid)
+               return -EINVAL;
+
+       if (F2FS_I(inode)->i_flags & F2FS_PROJINHERIT_FL) {
+               if (!(fa->fsx_xflags & FS_XFLAG_PROJINHERIT))
+                       return -EINVAL;
+       } else {
+               if (fa->fsx_xflags & FS_XFLAG_PROJINHERIT)
+                       return -EINVAL;
+       }
+
+       return 0;
+}
+
 static int f2fs_ioc_fssetxattr(struct file *filp, unsigned long arg)
 {
        struct inode *inode = file_inode(filp);
@@ -2763,19 +2817,20 @@ static int f2fs_ioc_fssetxattr(struct file *filp, unsigned long arg)
                return err;
 
        inode_lock(inode);
+       err = f2fs_ioctl_check_project(inode, &fa);
+       if (err)
+               goto out;
        flags = (fi->i_flags & ~F2FS_FL_XFLAG_VISIBLE) |
                                (flags & F2FS_FL_XFLAG_VISIBLE);
        err = __f2fs_ioc_setflags(inode, flags);
-       inode_unlock(inode);
-       mnt_drop_write_file(filp);
        if (err)
-               return err;
+               goto out;
 
        err = f2fs_ioc_setproject(filp, fa.fsx_projid);
-       if (err)
-               return err;
-
-       return 0;
+out:
+       inode_unlock(inode);
+       mnt_drop_write_file(filp);
+       return err;
 }
 
 int f2fs_pin_file_control(struct inode *inode, bool inc)
@@ -2992,7 +3047,8 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
                                if (!f2fs_overwrite_io(inode, iocb->ki_pos,
                                                iov_iter_count(from)) ||
                                        f2fs_has_inline_data(inode) ||
-                                       f2fs_force_buffered_io(inode, WRITE)) {
+                                       f2fs_force_buffered_io(inode,
+                                                       iocb, from)) {
                                                clear_inode_flag(inode,
                                                                FI_NO_PREALLOC);
                                                inode_unlock(inode);
index 5c8d004222372990c1445ff62172ea7d16759d74..a07241fb85370f16df2c171141a8d73995b28376 100644 (file)
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * fs/f2fs/gc.c
  *
  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
  *             http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/fs.h>
 #include <linux/module.h>
@@ -43,13 +40,16 @@ static int gc_thread_func(void *data)
                if (gc_th->gc_wake)
                        gc_th->gc_wake = 0;
 
-               if (try_to_freeze())
+               if (try_to_freeze()) {
+                       stat_other_skip_bggc_count(sbi);
                        continue;
+               }
                if (kthread_should_stop())
                        break;
 
                if (sbi->sb->s_writers.frozen >= SB_FREEZE_WRITE) {
                        increase_sleep_time(gc_th, &wait_ms);
+                       stat_other_skip_bggc_count(sbi);
                        continue;
                }
 
@@ -58,8 +58,10 @@ static int gc_thread_func(void *data)
                        f2fs_stop_checkpoint(sbi, false);
                }
 
-               if (!sb_start_write_trylock(sbi->sb))
+               if (!sb_start_write_trylock(sbi->sb)) {
+                       stat_other_skip_bggc_count(sbi);
                        continue;
+               }
 
                /*
                 * [GC triggering condition]
@@ -80,12 +82,15 @@ static int gc_thread_func(void *data)
                        goto do_gc;
                }
 
-               if (!mutex_trylock(&sbi->gc_mutex))
+               if (!mutex_trylock(&sbi->gc_mutex)) {
+                       stat_other_skip_bggc_count(sbi);
                        goto next;
+               }
 
-               if (!is_idle(sbi)) {
+               if (!is_idle(sbi, GC_TIME)) {
                        increase_sleep_time(gc_th, &wait_ms);
                        mutex_unlock(&sbi->gc_mutex);
+                       stat_io_skip_bggc_count(sbi);
                        goto next;
                }
 
@@ -365,6 +370,10 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
 
                if (sec_usage_check(sbi, secno))
                        goto next;
+               /* Don't touch checkpointed data */
+               if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
+                                       get_ckpt_valid_blocks(sbi, segno)))
+                       goto next;
                if (gc_type == BG_GC && test_bit(secno, dirty_i->victim_secmap))
                        goto next;
 
@@ -464,7 +473,7 @@ static int check_valid_map(struct f2fs_sb_info *sbi,
  * On validity, copy that node with cold status, otherwise (invalid node)
  * ignore that.
  */
-static void gc_node_segment(struct f2fs_sb_info *sbi,
+static int gc_node_segment(struct f2fs_sb_info *sbi,
                struct f2fs_summary *sum, unsigned int segno, int gc_type)
 {
        struct f2fs_summary *entry;
@@ -472,6 +481,7 @@ static void gc_node_segment(struct f2fs_sb_info *sbi,
        int off;
        int phase = 0;
        bool fggc = (gc_type == FG_GC);
+       int submitted = 0;
 
        start_addr = START_BLOCK(sbi, segno);
 
@@ -485,10 +495,11 @@ next_step:
                nid_t nid = le32_to_cpu(entry->nid);
                struct page *node_page;
                struct node_info ni;
+               int err;
 
                /* stop BG_GC if there is not enough free sections. */
                if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0, 0))
-                       return;
+                       return submitted;
 
                if (check_valid_map(sbi, segno, off) == 0)
                        continue;
@@ -525,7 +536,9 @@ next_step:
                        continue;
                }
 
-               f2fs_move_node_page(node_page, gc_type);
+               err = f2fs_move_node_page(node_page, gc_type);
+               if (!err && gc_type == FG_GC)
+                       submitted++;
                stat_inc_node_blk_count(sbi, 1, gc_type);
        }
 
@@ -534,6 +547,7 @@ next_step:
 
        if (fggc)
                atomic_dec(&sbi->wb_sync_req[NODE]);
+       return submitted;
 }
 
 /*
@@ -669,7 +683,7 @@ put_page:
  * Move data block via META_MAPPING while keeping locked data page.
  * This can be used to move blocks, aka LBAs, directly on disk.
  */
-static void move_data_block(struct inode *inode, block_t bidx,
+static int move_data_block(struct inode *inode, block_t bidx,
                                int gc_type, unsigned int segno, int off)
 {
        struct f2fs_io_info fio = {
@@ -688,25 +702,29 @@ static void move_data_block(struct inode *inode, block_t bidx,
        struct node_info ni;
        struct page *page, *mpage;
        block_t newaddr;
-       int err;
+       int err = 0;
        bool lfs_mode = test_opt(fio.sbi, LFS);
 
        /* do not read out */
        page = f2fs_grab_cache_page(inode->i_mapping, bidx, false);
        if (!page)
-               return;
+               return -ENOMEM;
 
-       if (!check_valid_map(F2FS_I_SB(inode), segno, off))
+       if (!check_valid_map(F2FS_I_SB(inode), segno, off)) {
+               err = -ENOENT;
                goto out;
+       }
 
        if (f2fs_is_atomic_file(inode)) {
                F2FS_I(inode)->i_gc_failures[GC_FAILURE_ATOMIC]++;
                F2FS_I_SB(inode)->skipped_atomic_files[gc_type]++;
+               err = -EAGAIN;
                goto out;
        }
 
        if (f2fs_is_pinned_file(inode)) {
                f2fs_pin_file_control(inode, true);
+               err = -EAGAIN;
                goto out;
        }
 
@@ -717,6 +735,7 @@ static void move_data_block(struct inode *inode, block_t bidx,
 
        if (unlikely(dn.data_blkaddr == NULL_ADDR)) {
                ClearPageUptodate(page);
+               err = -ENOENT;
                goto put_out;
        }
 
@@ -799,6 +818,7 @@ write_page:
        fio.new_blkaddr = newaddr;
        f2fs_submit_page_write(&fio);
        if (fio.retry) {
+               err = -EAGAIN;
                if (PageWriteback(fio.encrypted_page))
                        end_page_writeback(fio.encrypted_page);
                goto put_page_out;
@@ -822,34 +842,42 @@ put_out:
        f2fs_put_dnode(&dn);
 out:
        f2fs_put_page(page, 1);
+       return err;
 }
 
-static void move_data_page(struct inode *inode, block_t bidx, int gc_type,
+static int move_data_page(struct inode *inode, block_t bidx, int gc_type,
                                                        unsigned int segno, int off)
 {
        struct page *page;
+       int err = 0;
 
        page = f2fs_get_lock_data_page(inode, bidx, true);
        if (IS_ERR(page))
-               return;
+               return PTR_ERR(page);
 
-       if (!check_valid_map(F2FS_I_SB(inode), segno, off))
+       if (!check_valid_map(F2FS_I_SB(inode), segno, off)) {
+               err = -ENOENT;
                goto out;
+       }
 
        if (f2fs_is_atomic_file(inode)) {
                F2FS_I(inode)->i_gc_failures[GC_FAILURE_ATOMIC]++;
                F2FS_I_SB(inode)->skipped_atomic_files[gc_type]++;
+               err = -EAGAIN;
                goto out;
        }
        if (f2fs_is_pinned_file(inode)) {
                if (gc_type == FG_GC)
                        f2fs_pin_file_control(inode, true);
+               err = -EAGAIN;
                goto out;
        }
 
        if (gc_type == BG_GC) {
-               if (PageWriteback(page))
+               if (PageWriteback(page)) {
+                       err = -EAGAIN;
                        goto out;
+               }
                set_page_dirty(page);
                set_cold_data(page);
        } else {
@@ -867,7 +895,6 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type,
                        .io_type = FS_GC_DATA_IO,
                };
                bool is_dirty = PageDirty(page);
-               int err;
 
 retry:
                set_page_dirty(page);
@@ -892,6 +919,7 @@ retry:
        }
 out:
        f2fs_put_page(page, 1);
+       return err;
 }
 
 /*
@@ -901,7 +929,7 @@ out:
  * If the parent node is not valid or the data block address is different,
  * the victim data block is ignored.
  */
-static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
+static int gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
                struct gc_inode_list *gc_list, unsigned int segno, int gc_type)
 {
        struct super_block *sb = sbi->sb;
@@ -909,6 +937,7 @@ static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
        block_t start_addr;
        int off;
        int phase = 0;
+       int submitted = 0;
 
        start_addr = START_BLOCK(sbi, segno);
 
@@ -925,7 +954,7 @@ next_step:
 
                /* stop BG_GC if there is not enough free sections. */
                if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0, 0))
-                       return;
+                       return submitted;
 
                if (check_valid_map(sbi, segno, off) == 0)
                        continue;
@@ -997,6 +1026,7 @@ next_step:
                if (inode) {
                        struct f2fs_inode_info *fi = F2FS_I(inode);
                        bool locked = false;
+                       int err;
 
                        if (S_ISREG(inode->i_mode)) {
                                if (!down_write_trylock(&fi->i_gc_rwsem[READ]))
@@ -1016,12 +1046,16 @@ next_step:
                        start_bidx = f2fs_start_bidx_of_node(nofs, inode)
                                                                + ofs_in_node;
                        if (f2fs_post_read_required(inode))
-                               move_data_block(inode, start_bidx, gc_type,
-                                                               segno, off);
+                               err = move_data_block(inode, start_bidx,
+                                                       gc_type, segno, off);
                        else
-                               move_data_page(inode, start_bidx, gc_type,
+                               err = move_data_page(inode, start_bidx, gc_type,
                                                                segno, off);
 
+                       if (!err && (gc_type == FG_GC ||
+                                       f2fs_post_read_required(inode)))
+                               submitted++;
+
                        if (locked) {
                                up_write(&fi->i_gc_rwsem[WRITE]);
                                up_write(&fi->i_gc_rwsem[READ]);
@@ -1033,6 +1067,8 @@ next_step:
 
        if (++phase < 5)
                goto next_step;
+
+       return submitted;
 }
 
 static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim,
@@ -1060,6 +1096,7 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
        int seg_freed = 0;
        unsigned char type = IS_DATASEG(get_seg_entry(sbi, segno)->type) ?
                                                SUM_TYPE_DATA : SUM_TYPE_NODE;
+       int submitted = 0;
 
        /* readahead multi ssa blocks those have contiguous address */
        if (sbi->segs_per_sec > 1)
@@ -1069,6 +1106,18 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
        /* reference all summary page */
        while (segno < end_segno) {
                sum_page = f2fs_get_sum_page(sbi, segno++);
+               if (IS_ERR(sum_page)) {
+                       int err = PTR_ERR(sum_page);
+
+                       end_segno = segno - 1;
+                       for (segno = start_segno; segno < end_segno; segno++) {
+                               sum_page = find_get_page(META_MAPPING(sbi),
+                                               GET_SUM_BLOCK(sbi, segno));
+                               f2fs_put_page(sum_page, 0);
+                               f2fs_put_page(sum_page, 0);
+                       }
+                       return err;
+               }
                unlock_page(sum_page);
        }
 
@@ -1103,10 +1152,11 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
                 *                                  - lock_page(sum_page)
                 */
                if (type == SUM_TYPE_NODE)
-                       gc_node_segment(sbi, sum->entries, segno, gc_type);
-               else
-                       gc_data_segment(sbi, sum->entries, gc_list, segno,
+                       submitted += gc_node_segment(sbi, sum->entries, segno,
                                                                gc_type);
+               else
+                       submitted += gc_data_segment(sbi, sum->entries, gc_list,
+                                                       segno, gc_type);
 
                stat_inc_seg_count(sbi, type, gc_type);
 
@@ -1117,7 +1167,7 @@ next:
                f2fs_put_page(sum_page, 0);
        }
 
-       if (gc_type == FG_GC)
+       if (submitted)
                f2fs_submit_merged_write(sbi,
                                (type == SUM_TYPE_NODE) ? NODE : DATA);
 
@@ -1172,7 +1222,8 @@ gc_more:
                 * threshold, we can make them free by checkpoint. Then, we
                 * secure free segments which doesn't need fggc any more.
                 */
-               if (prefree_segments(sbi)) {
+               if (prefree_segments(sbi) &&
+                               !is_sbi_flag_set(sbi, SBI_CP_DISABLED)) {
                        ret = f2fs_write_checkpoint(sbi, &cpc);
                        if (ret)
                                goto stop;
@@ -1224,7 +1275,7 @@ gc_more:
                        segno = NULL_SEGNO;
                        goto gc_more;
                }
-               if (gc_type == FG_GC)
+               if (gc_type == FG_GC && !is_sbi_flag_set(sbi, SBI_CP_DISABLED))
                        ret = f2fs_write_checkpoint(sbi, &cpc);
        }
 stop:
@@ -1244,7 +1295,7 @@ stop:
 
        put_gc_inode(&gc_list);
 
-       if (sync)
+       if (sync && !ret)
                ret = sec_freed ? 0 : -EAGAIN;
        return ret;
 }
index c8619e408009068c63ff37d35eecc84797eb8130..bbac9d3787bd35e4bbdfb1a2ea65853148d1e6a2 100644 (file)
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * fs/f2fs/gc.h
  *
  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
  *             http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #define GC_THREAD_MIN_WB_PAGES         1       /*
                                                 * a threshold to determine
index eb2e031ea887bed43229f63e9923986bc14f3955..cc82f142f811f63065aa9299a2bdc772d79660aa 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * fs/f2fs/hash.c
  *
@@ -7,10 +8,6 @@
  * Portions of this code from linux/fs/ext3/hash.c
  *
  * Copyright (C) 2002 by Theodore Ts'o
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/types.h>
 #include <linux/fs.h>
index 115dc219344b15be987905121b38961b8fdcd630..cb31a719b04889b922aae25a230e6f08940d233c 100644 (file)
@@ -1,11 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * fs/f2fs/inline.c
  * Copyright (c) 2013, Intel Corporation
  * Authors: Huajun Li <huajun.li@intel.com>
  *          Haicheng Li <haicheng.li@intel.com>
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/fs.h>
@@ -300,7 +298,7 @@ process_inline:
                clear_inode_flag(inode, FI_INLINE_DATA);
                f2fs_put_page(ipage, 1);
        } else if (ri && (ri->i_inline & F2FS_INLINE_DATA)) {
-               if (f2fs_truncate_blocks(inode, 0, false))
+               if (f2fs_truncate_blocks(inode, 0, false, false))
                        return false;
                goto process_inline;
        }
@@ -472,7 +470,7 @@ static int f2fs_add_inline_entries(struct inode *dir, void *inline_dentry)
        return 0;
 punch_dentry_pages:
        truncate_inode_pages(&dir->i_data, 0);
-       f2fs_truncate_blocks(dir, 0, false);
+       f2fs_truncate_blocks(dir, 0, false, false);
        f2fs_remove_dirty_inode(dir);
        return err;
 }
index 959df2249875c484d6b7a51369fd97466e9002a8..91ceee0ed4c40fcaacb1d57cd6f4cf8edc77a946 100644 (file)
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * fs/f2fs/inode.c
  *
  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
  *             http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/fs.h>
 #include <linux/f2fs_fs.h>
@@ -368,6 +365,12 @@ static int do_read_inode(struct inode *inode)
        if (f2fs_has_inline_data(inode) && !f2fs_exist_data(inode))
                __recover_inline_status(inode, node_page);
 
+       /* try to recover cold bit for non-dir inode */
+       if (!S_ISDIR(inode->i_mode) && !is_cold_node(node_page)) {
+               set_cold_node(node_page, false);
+               set_page_dirty(node_page);
+       }
+
        /* get rdev by using inline_info */
        __get_inode_rdev(inode, ri);
 
@@ -610,6 +613,9 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
        if (!is_inode_flag_set(inode, FI_DIRTY_INODE))
                return 0;
 
+       if (f2fs_is_checkpoint_ready(sbi))
+               return -ENOSPC;
+
        /*
         * We need to balance fs here to prevent from producing dirty node pages
         * during the urgent cleaning time when runing out of free sections.
@@ -648,7 +654,11 @@ void f2fs_evict_inode(struct inode *inode)
        if (inode->i_nlink || is_bad_inode(inode))
                goto no_delete;
 
-       dquot_initialize(inode);
+       err = dquot_initialize(inode);
+       if (err) {
+               err = 0;
+               set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
+       }
 
        f2fs_remove_ino_entry(sbi, inode->i_ino, APPEND_INO);
        f2fs_remove_ino_entry(sbi, inode->i_ino, UPDATE_INO);
@@ -680,9 +690,10 @@ retry:
                goto retry;
        }
 
-       if (err)
+       if (err) {
                f2fs_update_inode_page(inode);
-       dquot_free_inode(inode);
+               set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
+       }
        sb_end_intwrite(inode->i_sb);
 no_delete:
        dquot_drop(inode);
@@ -691,7 +702,8 @@ no_delete:
        stat_dec_inline_dir(inode);
        stat_dec_inline_inode(inode);
 
-       if (likely(!is_set_ckpt_flags(sbi, CP_ERROR_FLAG)))
+       if (likely(!is_set_ckpt_flags(sbi, CP_ERROR_FLAG) &&
+                               !is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
                f2fs_bug_on(sbi, is_inode_flag_set(inode, FI_DIRTY_INODE));
        else
                f2fs_inode_synced(inode);
index 1f67e389169f5388a4883a1f89398b7f43cabaa0..99299ede7429997c0fe0f2bb825de62ee94c0f9b 100644 (file)
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * fs/f2fs/namei.c
  *
  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
  *             http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/fs.h>
 #include <linux/f2fs_fs.h>
@@ -19,6 +16,7 @@
 
 #include "f2fs.h"
 #include "node.h"
+#include "segment.h"
 #include "xattr.h"
 #include "acl.h"
 #include <trace/events/f2fs.h>
@@ -74,10 +72,6 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
        if (err)
                goto fail_drop;
 
-       err = dquot_alloc_inode(inode);
-       if (err)
-               goto fail_drop;
-
        set_inode_flag(inode, FI_NEW_INODE);
 
        /* If the directory encrypted, then we should encrypt the inode. */
@@ -124,6 +118,8 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
        if (F2FS_I(inode)->i_flags & F2FS_PROJINHERIT_FL)
                set_inode_flag(inode, FI_PROJ_INHERIT);
 
+       f2fs_set_inode_flags(inode);
+
        trace_f2fs_new_inode(inode, 0);
        return inode;
 
@@ -184,16 +180,19 @@ static inline void set_file_temperature(struct f2fs_sb_info *sbi, struct inode *
        hot_count = sbi->raw_super->hot_ext_count;
 
        for (i = 0; i < cold_count + hot_count; i++) {
-               if (!is_extension_exist(name, extlist[i]))
-                       continue;
-               if (i < cold_count)
-                       file_set_cold(inode);
-               else
-                       file_set_hot(inode);
-               break;
+               if (is_extension_exist(name, extlist[i]))
+                       break;
        }
 
        up_read(&sbi->sb_lock);
+
+       if (i == cold_count + hot_count)
+               return;
+
+       if (i < cold_count)
+               file_set_cold(inode);
+       else
+               file_set_hot(inode);
 }
 
 int f2fs_update_extension_list(struct f2fs_sb_info *sbi, const char *name,
@@ -272,6 +271,9 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 
        if (unlikely(f2fs_cp_error(sbi)))
                return -EIO;
+       err = f2fs_is_checkpoint_ready(sbi);
+       if (err)
+               return err;
 
        err = dquot_initialize(dir);
        if (err)
@@ -318,6 +320,9 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
 
        if (unlikely(f2fs_cp_error(sbi)))
                return -EIO;
+       err = f2fs_is_checkpoint_ready(sbi);
+       if (err)
+               return err;
 
        err = fscrypt_prepare_link(old_dentry, dir, dentry);
        if (err)
@@ -564,6 +569,9 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
 
        if (unlikely(f2fs_cp_error(sbi)))
                return -EIO;
+       err = f2fs_is_checkpoint_ready(sbi);
+       if (err)
+               return err;
 
        err = fscrypt_prepare_symlink(dir, symname, len, dir->i_sb->s_blocksize,
                                      &disk_link);
@@ -693,6 +701,9 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
 
        if (unlikely(f2fs_cp_error(sbi)))
                return -EIO;
+       err = f2fs_is_checkpoint_ready(sbi);
+       if (err)
+               return err;
 
        err = dquot_initialize(dir);
        if (err)
@@ -823,10 +834,13 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
        struct f2fs_dir_entry *old_entry;
        struct f2fs_dir_entry *new_entry;
        bool is_old_inline = f2fs_has_inline_dentry(old_dir);
-       int err = -ENOENT;
+       int err;
 
        if (unlikely(f2fs_cp_error(sbi)))
                return -EIO;
+       err = f2fs_is_checkpoint_ready(sbi);
+       if (err)
+               return err;
 
        if (is_inode_flag_set(new_dir, FI_PROJ_INHERIT) &&
                        (!projid_eq(F2FS_I(new_dir)->i_projid,
@@ -847,6 +861,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
                        goto out;
        }
 
+       err = -ENOENT;
        old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page);
        if (!old_entry) {
                if (IS_ERR(old_page))
@@ -983,6 +998,8 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
 
        if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir))
                f2fs_sync_fs(sbi->sb, 1);
+
+       f2fs_update_time(sbi, REQ_TIME);
        return 0;
 
 put_out_dir:
@@ -1012,10 +1029,13 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
        struct f2fs_dir_entry *old_dir_entry = NULL, *new_dir_entry = NULL;
        struct f2fs_dir_entry *old_entry, *new_entry;
        int old_nlink = 0, new_nlink = 0;
-       int err = -ENOENT;
+       int err;
 
        if (unlikely(f2fs_cp_error(sbi)))
                return -EIO;
+       err = f2fs_is_checkpoint_ready(sbi);
+       if (err)
+               return err;
 
        if ((is_inode_flag_set(new_dir, FI_PROJ_INHERIT) &&
                        !projid_eq(F2FS_I(new_dir)->i_projid,
@@ -1033,6 +1053,7 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
        if (err)
                goto out;
 
+       err = -ENOENT;
        old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page);
        if (!old_entry) {
                if (IS_ERR(old_page))
@@ -1136,6 +1157,8 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
 
        if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir))
                f2fs_sync_fs(sbi->sb, 1);
+
+       f2fs_update_time(sbi, REQ_TIME);
        return 0;
 out_new_dir:
        if (new_dir_entry) {
index dd2e45a661aacdb123b63c92f84e717f17fa59c1..2b34206486d8f2faacc7ae9a310eb360a4af0986 100644 (file)
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * fs/f2fs/node.c
  *
  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
  *             http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/fs.h>
 #include <linux/f2fs_fs.h>
@@ -129,6 +126,8 @@ static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
 
        /* get current nat block page with lock */
        src_page = get_current_nat_page(sbi, nid);
+       if (IS_ERR(src_page))
+               return src_page;
        dst_page = f2fs_grab_meta_page(sbi, dst_off);
        f2fs_bug_on(sbi, PageDirty(src_page));
 
@@ -1542,8 +1541,10 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted,
        }
 
        if (__is_valid_data_blkaddr(ni.blk_addr) &&
-               !f2fs_is_valid_blkaddr(sbi, ni.blk_addr, DATA_GENERIC))
+               !f2fs_is_valid_blkaddr(sbi, ni.blk_addr, DATA_GENERIC)) {
+               up_read(&sbi->node_write);
                goto redirty_out;
+       }
 
        if (atomic && !test_opt(sbi, NOBARRIER))
                fio.op_flags |= REQ_PREFLUSH | REQ_FUA;
@@ -1564,8 +1565,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted,
        up_read(&sbi->node_write);
 
        if (wbc->for_reclaim) {
-               f2fs_submit_merged_write_cond(sbi, page->mapping->host, 0,
-                                               page->index, NODE);
+               f2fs_submit_merged_write_cond(sbi, NULL, page, 0, NODE);
                submitted = NULL;
        }
 
@@ -1587,8 +1587,10 @@ redirty_out:
        return AOP_WRITEPAGE_ACTIVATE;
 }
 
-void f2fs_move_node_page(struct page *node_page, int gc_type)
+int f2fs_move_node_page(struct page *node_page, int gc_type)
 {
+       int err = 0;
+
        if (gc_type == FG_GC) {
                struct writeback_control wbc = {
                        .sync_mode = WB_SYNC_ALL,
@@ -1600,12 +1602,16 @@ void f2fs_move_node_page(struct page *node_page, int gc_type)
                f2fs_wait_on_page_writeback(node_page, NODE, true);
 
                f2fs_bug_on(F2FS_P_SB(node_page), PageWriteback(node_page));
-               if (!clear_page_dirty_for_io(node_page))
+               if (!clear_page_dirty_for_io(node_page)) {
+                       err = -EAGAIN;
                        goto out_page;
+               }
 
                if (__write_node_page(node_page, false, NULL,
-                                       &wbc, false, FS_GC_NODE_IO, NULL))
+                                       &wbc, false, FS_GC_NODE_IO, NULL)) {
+                       err = -EAGAIN;
                        unlock_page(node_page);
+               }
                goto release_page;
        } else {
                /* set page dirty and write it */
@@ -1616,6 +1622,7 @@ out_page:
        unlock_page(node_page);
 release_page:
        f2fs_put_page(node_page, 0);
+       return err;
 }
 
 static int f2fs_write_node_page(struct page *page,
@@ -1630,13 +1637,13 @@ int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
                        unsigned int *seq_id)
 {
        pgoff_t index;
-       pgoff_t last_idx = ULONG_MAX;
        struct pagevec pvec;
        int ret = 0;
        struct page *last_page = NULL;
        bool marked = false;
        nid_t ino = inode->i_ino;
        int nr_pages;
+       int nwritten = 0;
 
        if (atomic) {
                last_page = last_fsync_dnode(sbi, ino);
@@ -1714,7 +1721,7 @@ continue_unlock:
                                f2fs_put_page(last_page, 0);
                                break;
                        } else if (submitted) {
-                               last_idx = page->index;
+                               nwritten++;
                        }
 
                        if (page == last_page) {
@@ -1740,8 +1747,8 @@ continue_unlock:
                goto retry;
        }
 out:
-       if (last_idx != ULONG_MAX)
-               f2fs_submit_merged_write_cond(sbi, NULL, ino, last_idx, NODE);
+       if (nwritten)
+               f2fs_submit_merged_write_cond(sbi, NULL, NULL, ino, NODE);
        return ret ? -EIO: 0;
 }
 
@@ -2268,15 +2275,19 @@ static int __f2fs_build_free_nids(struct f2fs_sb_info *sbi,
                                                nm_i->nat_block_bitmap)) {
                        struct page *page = get_current_nat_page(sbi, nid);
 
-                       ret = scan_nat_page(sbi, page, nid);
-                       f2fs_put_page(page, 1);
+                       if (IS_ERR(page)) {
+                               ret = PTR_ERR(page);
+                       } else {
+                               ret = scan_nat_page(sbi, page, nid);
+                               f2fs_put_page(page, 1);
+                       }
 
                        if (ret) {
                                up_read(&nm_i->nat_tree_lock);
                                f2fs_bug_on(sbi, !mount);
                                f2fs_msg(sbi->sb, KERN_ERR,
                                        "NAT is corrupt, run fsck to fix it");
-                               return -EINVAL;
+                               return ret;
                        }
                }
 
@@ -2353,8 +2364,9 @@ retry:
        spin_unlock(&nm_i->nid_list_lock);
 
        /* Let's scan nat pages and its caches to get free nids */
-       f2fs_build_free_nids(sbi, true, false);
-       goto retry;
+       if (!f2fs_build_free_nids(sbi, true, false))
+               goto retry;
+       return false;
 }
 
 /*
@@ -2537,7 +2549,7 @@ retry:
        if (!PageUptodate(ipage))
                SetPageUptodate(ipage);
        fill_node_footer(ipage, ino, ino, 0, true);
-       set_cold_node(page, false);
+       set_cold_node(ipage, false);
 
        src = F2FS_INODE(page);
        dst = F2FS_INODE(ipage);
@@ -2560,6 +2572,13 @@ retry:
                        F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize),
                                                                i_projid))
                        dst->i_projid = src->i_projid;
+
+               if (f2fs_sb_has_inode_crtime(sbi->sb) &&
+                       F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize),
+                                                       i_crtime_nsec)) {
+                       dst->i_crtime = src->i_crtime;
+                       dst->i_crtime_nsec = src->i_crtime_nsec;
+               }
        }
 
        new_ni = old_ni;
@@ -2703,7 +2722,7 @@ static void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
                __clear_bit_le(nat_index, nm_i->full_nat_bits);
 }
 
-static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
+static int __flush_nat_entry_set(struct f2fs_sb_info *sbi,
                struct nat_entry_set *set, struct cp_control *cpc)
 {
        struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
@@ -2727,6 +2746,9 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
                down_write(&curseg->journal_rwsem);
        } else {
                page = get_next_nat_page(sbi, start_nid);
+               if (IS_ERR(page))
+                       return PTR_ERR(page);
+
                nat_blk = page_address(page);
                f2fs_bug_on(sbi, !nat_blk);
        }
@@ -2772,12 +2794,13 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
                radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set);
                kmem_cache_free(nat_entry_set_slab, set);
        }
+       return 0;
 }
 
 /*
  * This function is called during the checkpointing process.
  */
-void f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
+int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 {
        struct f2fs_nm_info *nm_i = NM_I(sbi);
        struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
@@ -2787,6 +2810,7 @@ void f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
        unsigned int found;
        nid_t set_idx = 0;
        LIST_HEAD(sets);
+       int err = 0;
 
        /* during unmount, let's flush nat_bits before checking dirty_nat_cnt */
        if (enabled_nat_bits(sbi, cpc)) {
@@ -2796,7 +2820,7 @@ void f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
        }
 
        if (!nm_i->dirty_nat_cnt)
-               return;
+               return 0;
 
        down_write(&nm_i->nat_tree_lock);
 
@@ -2819,11 +2843,16 @@ void f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
        }
 
        /* flush dirty nats in nat entry set */
-       list_for_each_entry_safe(set, tmp, &sets, set_list)
-               __flush_nat_entry_set(sbi, set, cpc);
+       list_for_each_entry_safe(set, tmp, &sets, set_list) {
+               err = __flush_nat_entry_set(sbi, set, cpc);
+               if (err)
+                       break;
+       }
 
        up_write(&nm_i->nat_tree_lock);
        /* Allow dirty nats by node block allocation in write_begin */
+
+       return err;
 }
 
 static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
@@ -2850,10 +2879,8 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
                struct page *page;
 
                page = f2fs_get_meta_page(sbi, nat_bits_addr++);
-               if (IS_ERR(page)) {
-                       disable_nat_bits(sbi, true);
+               if (IS_ERR(page))
                        return PTR_ERR(page);
-               }
 
                memcpy(nm_i->nat_bits + (i << F2FS_BLKSIZE_BITS),
                                        page_address(page), F2FS_BLKSIZE);
index 0f4db7a6125473b24e3a0135c080757f12b0b41d..1c73d879a9bc94c32f83ffb34eaf54f91eb2c7d3 100644 (file)
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * fs/f2fs/node.h
  *
  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
  *             http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 /* start node id of a node block dedicated to the given node id */
 #define        START_NID(nid) (((nid) / NAT_ENTRY_PER_BLOCK) * NAT_ENTRY_PER_BLOCK)
index 95511ed11a22650018ba2ffc9593dbf7c0d063db..1dfb17f9f9ff76f7631bd125e4211eb78ee05069 100644 (file)
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * fs/f2fs/recovery.c
  *
  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
  *             http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/fs.h>
 #include <linux/f2fs_fs.h>
@@ -99,8 +96,12 @@ err_out:
        return ERR_PTR(err);
 }
 
-static void del_fsync_inode(struct fsync_inode_entry *entry)
+static void del_fsync_inode(struct fsync_inode_entry *entry, int drop)
 {
+       if (drop) {
+               /* inode should not be recovered, drop it */
+               f2fs_inode_synced(entry->inode);
+       }
        iput(entry->inode);
        list_del(&entry->list);
        kmem_cache_free(fsync_entry_slab, entry);
@@ -194,6 +195,33 @@ out:
        return err;
 }
 
+static int recover_quota_data(struct inode *inode, struct page *page)
+{
+       struct f2fs_inode *raw = F2FS_INODE(page);
+       struct iattr attr;
+       uid_t i_uid = le32_to_cpu(raw->i_uid);
+       gid_t i_gid = le32_to_cpu(raw->i_gid);
+       int err;
+
+       memset(&attr, 0, sizeof(attr));
+
+       attr.ia_uid = make_kuid(inode->i_sb->s_user_ns, i_uid);
+       attr.ia_gid = make_kgid(inode->i_sb->s_user_ns, i_gid);
+
+       if (!uid_eq(attr.ia_uid, inode->i_uid))
+               attr.ia_valid |= ATTR_UID;
+       if (!gid_eq(attr.ia_gid, inode->i_gid))
+               attr.ia_valid |= ATTR_GID;
+
+       if (!attr.ia_valid)
+               return 0;
+
+       err = dquot_transfer(inode, &attr);
+       if (err)
+               set_sbi_flag(F2FS_I_SB(inode), SBI_QUOTA_NEED_REPAIR);
+       return err;
+}
+
 static void recover_inline_flags(struct inode *inode, struct f2fs_inode *ri)
 {
        if (ri->i_inline & F2FS_PIN_FILE)
@@ -206,12 +234,41 @@ static void recover_inline_flags(struct inode *inode, struct f2fs_inode *ri)
                clear_inode_flag(inode, FI_DATA_EXIST);
 }
 
-static void recover_inode(struct inode *inode, struct page *page)
+static int recover_inode(struct inode *inode, struct page *page)
 {
        struct f2fs_inode *raw = F2FS_INODE(page);
        char *name;
+       int err;
 
        inode->i_mode = le16_to_cpu(raw->i_mode);
+
+       err = recover_quota_data(inode, page);
+       if (err)
+               return err;
+
+       i_uid_write(inode, le32_to_cpu(raw->i_uid));
+       i_gid_write(inode, le32_to_cpu(raw->i_gid));
+
+       if (raw->i_inline & F2FS_EXTRA_ATTR) {
+               if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)->sb) &&
+                       F2FS_FITS_IN_INODE(raw, le16_to_cpu(raw->i_extra_isize),
+                                                               i_projid)) {
+                       projid_t i_projid;
+                       kprojid_t kprojid;
+
+                       i_projid = (projid_t)le32_to_cpu(raw->i_projid);
+                       kprojid = make_kprojid(&init_user_ns, i_projid);
+
+                       if (!projid_eq(kprojid, F2FS_I(inode)->i_projid)) {
+                               err = f2fs_transfer_project_quota(inode,
+                                                               kprojid);
+                               if (err)
+                                       return err;
+                               F2FS_I(inode)->i_projid = kprojid;
+                       }
+               }
+       }
+
        f2fs_i_size_write(inode, le64_to_cpu(raw->i_size));
        inode->i_atime.tv_sec = le64_to_cpu(raw->i_atime);
        inode->i_ctime.tv_sec = le64_to_cpu(raw->i_ctime);
@@ -221,9 +278,15 @@ static void recover_inode(struct inode *inode, struct page *page)
        inode->i_mtime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec);
 
        F2FS_I(inode)->i_advise = raw->i_advise;
+       F2FS_I(inode)->i_flags = le32_to_cpu(raw->i_flags);
+       f2fs_set_inode_flags(inode);
+       F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN] =
+                               le16_to_cpu(raw->i_gc_failures);
 
        recover_inline_flags(inode, raw);
 
+       f2fs_mark_inode_dirty_sync(inode, true);
+
        if (file_enc_name(inode))
                name = "<encrypted>";
        else
@@ -232,6 +295,7 @@ static void recover_inode(struct inode *inode, struct page *page)
        f2fs_msg(inode->i_sb, KERN_NOTICE,
                "recover_inode: ino = %x, name = %s, inline = %x",
                        ino_of_node(page), name, raw->i_inline);
+       return 0;
 }
 
 static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
@@ -320,12 +384,12 @@ next:
        return err;
 }
 
-static void destroy_fsync_dnodes(struct list_head *head)
+static void destroy_fsync_dnodes(struct list_head *head, int drop)
 {
        struct fsync_inode_entry *entry, *tmp;
 
        list_for_each_entry_safe(entry, tmp, head, list)
-               del_fsync_inode(entry);
+               del_fsync_inode(entry, drop);
 }
 
 static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
@@ -358,6 +422,8 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
        }
 
        sum_page = f2fs_get_sum_page(sbi, segno);
+       if (IS_ERR(sum_page))
+               return PTR_ERR(sum_page);
        sum_node = (struct f2fs_summary_block *)page_address(sum_page);
        sum = sum_node->entries[blkoff];
        f2fs_put_page(sum_page, 1);
@@ -560,7 +626,7 @@ out:
 }
 
 static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
-                                               struct list_head *dir_list)
+               struct list_head *tmp_inode_list, struct list_head *dir_list)
 {
        struct curseg_info *curseg;
        struct page *page = NULL;
@@ -598,8 +664,11 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
                 * In this case, we can lose the latest inode(x).
                 * So, call recover_inode for the inode update.
                 */
-               if (IS_INODE(page))
-                       recover_inode(entry->inode, page);
+               if (IS_INODE(page)) {
+                       err = recover_inode(entry->inode, page);
+                       if (err)
+                               break;
+               }
                if (entry->last_dentry == blkaddr) {
                        err = recover_dentry(entry->inode, page, dir_list);
                        if (err) {
@@ -614,7 +683,7 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
                }
 
                if (entry->blkaddr == blkaddr)
-                       del_fsync_inode(entry);
+                       list_move_tail(&entry->list, tmp_inode_list);
 next:
                /* check next segment */
                blkaddr = next_blkaddr_of_node(page);
@@ -627,7 +696,7 @@ next:
 
 int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
 {
-       struct list_head inode_list;
+       struct list_head inode_list, tmp_inode_list;
        struct list_head dir_list;
        int err;
        int ret = 0;
@@ -658,6 +727,7 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
        }
 
        INIT_LIST_HEAD(&inode_list);
+       INIT_LIST_HEAD(&tmp_inode_list);
        INIT_LIST_HEAD(&dir_list);
 
        /* prevent checkpoint */
@@ -676,11 +746,16 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
        need_writecp = true;
 
        /* step #2: recover data */
-       err = recover_data(sbi, &inode_list, &dir_list);
+       err = recover_data(sbi, &inode_list, &tmp_inode_list, &dir_list);
        if (!err)
                f2fs_bug_on(sbi, !list_empty(&inode_list));
+       else {
+               /* restore s_flags to let iput() trash data */
+               sbi->sb->s_flags = s_flags;
+       }
 skip:
-       destroy_fsync_dnodes(&inode_list);
+       destroy_fsync_dnodes(&inode_list, err);
+       destroy_fsync_dnodes(&tmp_inode_list, err);
 
        /* truncate meta pages to be used by the recovery */
        truncate_inode_pages_range(META_MAPPING(sbi),
@@ -689,19 +764,23 @@ skip:
        if (err) {
                truncate_inode_pages_final(NODE_MAPPING(sbi));
                truncate_inode_pages_final(META_MAPPING(sbi));
+       } else {
+               clear_sbi_flag(sbi, SBI_POR_DOING);
        }
-
-       clear_sbi_flag(sbi, SBI_POR_DOING);
        mutex_unlock(&sbi->cp_mutex);
 
        /* let's drop all the directory inodes for clean checkpoint */
-       destroy_fsync_dnodes(&dir_list);
+       destroy_fsync_dnodes(&dir_list, err);
 
-       if (!err && need_writecp) {
-               struct cp_control cpc = {
-                       .reason = CP_RECOVERY,
-               };
-               err = f2fs_write_checkpoint(sbi, &cpc);
+       if (need_writecp) {
+               set_sbi_flag(sbi, SBI_IS_RECOVERED);
+
+               if (!err) {
+                       struct cp_control cpc = {
+                               .reason = CP_RECOVERY,
+                       };
+                       err = f2fs_write_checkpoint(sbi, &cpc);
+               }
        }
 
        kmem_cache_destroy(fsync_entry_slab);
index 30779aaa9dbae57c56077cc6948c969b4d9d937e..6edcf8391dd3da6922a5845680b0b93c4b480359 100644 (file)
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * fs/f2fs/segment.c
  *
  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
  *             http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/fs.h>
 #include <linux/f2fs_fs.h>
@@ -179,6 +176,8 @@ bool f2fs_need_SSR(struct f2fs_sb_info *sbi)
                return false;
        if (sbi->gc_mode == GC_URGENT)
                return true;
+       if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
+               return true;
 
        return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs +
                        SM_I(sbi)->min_ssr_sections + reserved_sections(sbi));
@@ -267,8 +266,10 @@ retry:
                }
 next:
                /* we don't need to invalidate this in the sccessful status */
-               if (drop || recover)
+               if (drop || recover) {
                        ClearPageUptodate(page);
+                       clear_cold_data(page);
+               }
                set_page_private(page, 0);
                ClearPagePrivate(page);
                f2fs_put_page(page, 1);
@@ -374,7 +375,7 @@ static int __f2fs_commit_inmem_pages(struct inode *inode)
                .io_type = FS_DATA_IO,
        };
        struct list_head revoke_list;
-       pgoff_t last_idx = ULONG_MAX;
+       bool submit_bio = false;
        int err = 0;
 
        INIT_LIST_HEAD(&revoke_list);
@@ -409,14 +410,14 @@ retry:
                        }
                        /* record old blkaddr for revoking */
                        cur->old_addr = fio.old_blkaddr;
-                       last_idx = page->index;
+                       submit_bio = true;
                }
                unlock_page(page);
                list_move_tail(&cur->list, &revoke_list);
        }
 
-       if (last_idx != ULONG_MAX)
-               f2fs_submit_merged_write_cond(sbi, inode, 0, last_idx, DATA);
+       if (submit_bio)
+               f2fs_submit_merged_write_cond(sbi, inode, NULL, 0, DATA);
 
        if (err) {
                /*
@@ -483,6 +484,9 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
        if (need && excess_cached_nats(sbi))
                f2fs_balance_fs_bg(sbi);
 
+       if (f2fs_is_checkpoint_ready(sbi))
+               return;
+
        /*
         * We should do GC or end up with checkpoint, if there are so many dirty
         * dir/node pages without enough free segments.
@@ -511,7 +515,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
        else
                f2fs_build_free_nids(sbi, false, false);
 
-       if (!is_idle(sbi) &&
+       if (!is_idle(sbi, REQ_TIME) &&
                (!excess_dirty_nats(sbi) && !excess_dirty_nodes(sbi)))
                return;
 
@@ -799,7 +803,7 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
 static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
 {
        struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
-       unsigned short valid_blocks;
+       unsigned short valid_blocks, ckpt_valid_blocks;
 
        if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno))
                return;
@@ -807,8 +811,10 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
        mutex_lock(&dirty_i->seglist_lock);
 
        valid_blocks = get_valid_blocks(sbi, segno, false);
+       ckpt_valid_blocks = get_ckpt_valid_blocks(sbi, segno);
 
-       if (valid_blocks == 0) {
+       if (valid_blocks == 0 && (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) ||
+                               ckpt_valid_blocks == sbi->blocks_per_seg)) {
                __locate_dirty_segment(sbi, segno, PRE);
                __remove_dirty_segment(sbi, segno, DIRTY);
        } else if (valid_blocks < sbi->blocks_per_seg) {
@@ -821,6 +827,66 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
        mutex_unlock(&dirty_i->seglist_lock);
 }
 
+/* This moves currently empty dirty blocks to prefree. Must hold seglist_lock */
+void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi)
+{
+       struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+       unsigned int segno;
+
+       mutex_lock(&dirty_i->seglist_lock);
+       for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
+               if (get_valid_blocks(sbi, segno, false))
+                       continue;
+               if (IS_CURSEG(sbi, segno))
+                       continue;
+               __locate_dirty_segment(sbi, segno, PRE);
+               __remove_dirty_segment(sbi, segno, DIRTY);
+       }
+       mutex_unlock(&dirty_i->seglist_lock);
+}
+
+int f2fs_disable_cp_again(struct f2fs_sb_info *sbi)
+{
+       struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+       block_t ovp = overprovision_segments(sbi) << sbi->log_blocks_per_seg;
+       block_t holes[2] = {0, 0};      /* DATA and NODE */
+       struct seg_entry *se;
+       unsigned int segno;
+
+       mutex_lock(&dirty_i->seglist_lock);
+       for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
+               se = get_seg_entry(sbi, segno);
+               if (IS_NODESEG(se->type))
+                       holes[NODE] += sbi->blocks_per_seg - se->valid_blocks;
+               else
+                       holes[DATA] += sbi->blocks_per_seg - se->valid_blocks;
+       }
+       mutex_unlock(&dirty_i->seglist_lock);
+
+       if (holes[DATA] > ovp || holes[NODE] > ovp)
+               return -EAGAIN;
+       return 0;
+}
+
+/* This is only used by SBI_CP_DISABLED */
+static unsigned int get_free_segment(struct f2fs_sb_info *sbi)
+{
+       struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+       unsigned int segno = 0;
+
+       mutex_lock(&dirty_i->seglist_lock);
+       for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
+               if (get_valid_blocks(sbi, segno, false))
+                       continue;
+               if (get_ckpt_valid_blocks(sbi, segno))
+                       continue;
+               mutex_unlock(&dirty_i->seglist_lock);
+               return segno;
+       }
+       mutex_unlock(&dirty_i->seglist_lock);
+       return NULL_SEGNO;
+}
+
 static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi,
                struct block_device *bdev, block_t lstart,
                block_t start, block_t len)
@@ -856,7 +922,8 @@ static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi,
 static struct discard_cmd *__attach_discard_cmd(struct f2fs_sb_info *sbi,
                                struct block_device *bdev, block_t lstart,
                                block_t start, block_t len,
-                               struct rb_node *parent, struct rb_node **p)
+                               struct rb_node *parent, struct rb_node **p,
+                               bool leftmost)
 {
        struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
        struct discard_cmd *dc;
@@ -864,7 +931,7 @@ static struct discard_cmd *__attach_discard_cmd(struct f2fs_sb_info *sbi,
        dc = __create_discard_cmd(sbi, bdev, lstart, start, len);
 
        rb_link_node(&dc->rb_node, parent, p);
-       rb_insert_color(&dc->rb_node, &dcc->root);
+       rb_insert_color_cached(&dc->rb_node, &dcc->root, leftmost);
 
        return dc;
 }
@@ -876,7 +943,7 @@ static void __detach_discard_cmd(struct discard_cmd_control *dcc,
                atomic_sub(dc->issuing, &dcc->issing_discard);
 
        list_del(&dc->list);
-       rb_erase(&dc->rb_node, &dcc->root);
+       rb_erase_cached(&dc->rb_node, &dcc->root);
        dcc->undiscard_blks -= dc->len;
 
        kmem_cache_free(discard_cmd_slab, dc);
@@ -905,9 +972,9 @@ static void __remove_discard_cmd(struct f2fs_sb_info *sbi,
                dc->error = 0;
 
        if (dc->error)
-               f2fs_msg(sbi->sb, KERN_INFO,
-                       "Issue discard(%u, %u, %u) failed, ret: %d",
-                       dc->lstart, dc->start, dc->len, dc->error);
+               printk_ratelimited(
+                       "%sF2FS-fs: Issue discard(%u, %u, %u) failed, ret: %d",
+                       KERN_INFO, dc->lstart, dc->start, dc->len, dc->error);
        __detach_discard_cmd(dcc, dc);
 }
 
@@ -1113,6 +1180,7 @@ static struct discard_cmd *__insert_discard_tree(struct f2fs_sb_info *sbi,
        struct rb_node **p;
        struct rb_node *parent = NULL;
        struct discard_cmd *dc = NULL;
+       bool leftmost = true;
 
        if (insert_p && insert_parent) {
                parent = insert_parent;
@@ -1120,9 +1188,11 @@ static struct discard_cmd *__insert_discard_tree(struct f2fs_sb_info *sbi,
                goto do_insert;
        }
 
-       p = f2fs_lookup_rb_tree_for_insert(sbi, &dcc->root, &parent, lstart);
+       p = f2fs_lookup_rb_tree_for_insert(sbi, &dcc->root, &parent,
+                                                       lstart, &leftmost);
 do_insert:
-       dc = __attach_discard_cmd(sbi, bdev, lstart, start, len, parent, p);
+       dc = __attach_discard_cmd(sbi, bdev, lstart, start, len, parent,
+                                                               p, leftmost);
        if (!dc)
                return NULL;
 
@@ -1190,7 +1260,7 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
                                        NULL, lstart,
                                        (struct rb_entry **)&prev_dc,
                                        (struct rb_entry **)&next_dc,
-                                       &insert_p, &insert_parent, true);
+                                       &insert_p, &insert_parent, true, NULL);
        if (dc)
                prev_dc = dc;
 
@@ -1298,7 +1368,7 @@ static unsigned int __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi,
                                        NULL, pos,
                                        (struct rb_entry **)&prev_dc,
                                        (struct rb_entry **)&next_dc,
-                                       &insert_p, &insert_parent, true);
+                                       &insert_p, &insert_parent, true, NULL);
        if (!dc)
                dc = next_dc;
 
@@ -1311,7 +1381,7 @@ static unsigned int __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi,
                if (dc->state != D_PREP)
                        goto next;
 
-               if (dpolicy->io_aware && !is_idle(sbi)) {
+               if (dpolicy->io_aware && !is_idle(sbi, DISCARD_TIME)) {
                        io_interrupted = true;
                        break;
                }
@@ -1371,7 +1441,7 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
                        f2fs_bug_on(sbi, dc->state != D_PREP);
 
                        if (dpolicy->io_aware && i < dpolicy->io_aware_gran &&
-                                                               !is_idle(sbi)) {
+                                               !is_idle(sbi, DISCARD_TIME)) {
                                io_interrupted = true;
                                break;
                        }
@@ -1600,7 +1670,9 @@ static int issue_discard_thread(void *data)
                        __wait_all_discard_cmd(sbi, &dpolicy);
                        wait_ms = dpolicy.min_interval;
                } else if (issued == -1){
-                       wait_ms = dpolicy.mid_interval;
+                       wait_ms = f2fs_time_to_wait(sbi, DISCARD_TIME);
+                       if (!wait_ms)
+                               wait_ms = dpolicy.mid_interval;
                } else {
                        wait_ms = dpolicy.max_interval;
                }
@@ -1725,11 +1797,11 @@ static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc,
        struct list_head *head = &SM_I(sbi)->dcc_info->entry_list;
        int i;
 
-       if (se->valid_blocks == max_blocks || !f2fs_discard_en(sbi))
+       if (se->valid_blocks == max_blocks || !f2fs_hw_support_discard(sbi))
                return false;
 
        if (!force) {
-               if (!test_opt(sbi, DISCARD) || !se->valid_blocks ||
+               if (!f2fs_realtime_discard_enable(sbi) || !se->valid_blocks ||
                        SM_I(sbi)->dcc_info->nr_discards >=
                                SM_I(sbi)->dcc_info->max_discards)
                        return false;
@@ -1835,7 +1907,7 @@ void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
                                dirty_i->nr_dirty[PRE]--;
                }
 
-               if (!test_opt(sbi, DISCARD))
+               if (!f2fs_realtime_discard_enable(sbi))
                        continue;
 
                if (force && start >= cpc->trim_start &&
@@ -1928,7 +2000,7 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
        dcc->max_discards = MAIN_SEGS(sbi) << sbi->log_blocks_per_seg;
        dcc->undiscard_blks = 0;
        dcc->next_pos = 0;
-       dcc->root = RB_ROOT;
+       dcc->root = RB_ROOT_CACHED;
        dcc->rbtree_check = false;
 
        init_waitqueue_head(&dcc->discard_wait_queue);
@@ -2025,12 +2097,12 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
                        del = 0;
                }
 
-               if (f2fs_discard_en(sbi) &&
-                       !f2fs_test_and_set_bit(offset, se->discard_map))
+               if (!f2fs_test_and_set_bit(offset, se->discard_map))
                        sbi->discard_blks--;
 
                /* don't overwrite by SSR to keep node chain */
-               if (IS_NODESEG(se->type)) {
+               if (IS_NODESEG(se->type) &&
+                               !is_sbi_flag_set(sbi, SBI_CP_DISABLED)) {
                        if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map))
                                se->ckpt_valid_blocks++;
                }
@@ -2052,10 +2124,18 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
                        f2fs_bug_on(sbi, 1);
                        se->valid_blocks++;
                        del = 0;
+               } else if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
+                       /*
+                        * If checkpoints are off, we must not reuse data that
+                        * was used in the previous checkpoint. If it was used
+                        * before, we must track that to know how much space we
+                        * really have.
+                        */
+                       if (f2fs_test_bit(offset, se->ckpt_valid_map))
+                               sbi->unusable_block_count++;
                }
 
-               if (f2fs_discard_en(sbi) &&
-                       f2fs_test_and_clear_bit(offset, se->discard_map))
+               if (f2fs_test_and_clear_bit(offset, se->discard_map))
                        sbi->discard_blks++;
        }
        if (!f2fs_test_bit(offset, se->ckpt_valid_map))
@@ -2335,6 +2415,9 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
        if (sbi->segs_per_sec != 1)
                return CURSEG_I(sbi, type)->segno;
 
+       if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
+               return 0;
+
        if (test_opt(sbi, NOHEAP) &&
                (type == CURSEG_HOT_DATA || IS_NODESEG(type)))
                return 0;
@@ -2432,6 +2515,7 @@ static void change_curseg(struct f2fs_sb_info *sbi, int type)
        __next_free_blkoff(sbi, curseg, 0);
 
        sum_page = f2fs_get_sum_page(sbi, new_segno);
+       f2fs_bug_on(sbi, IS_ERR(sum_page));
        sum_node = (struct f2fs_summary_block *)page_address(sum_page);
        memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE);
        f2fs_put_page(sum_page, 1);
@@ -2478,6 +2562,15 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
                        return 1;
                }
        }
+
+       /* find valid_blocks=0 in dirty list */
+       if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
+               segno = get_free_segment(sbi);
+               if (segno != NULL_SEGNO) {
+                       curseg->next_segno = segno;
+                       return 1;
+               }
+       }
        return 0;
 }
 
@@ -2495,7 +2588,8 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
        else if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) &&
                                        type == CURSEG_WARM_NODE)
                new_curseg(sbi, type, false);
-       else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type))
+       else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type) &&
+                       likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
                new_curseg(sbi, type, false);
        else if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type))
                change_curseg(sbi, type);
@@ -2570,7 +2664,7 @@ next:
                                        NULL, start,
                                        (struct rb_entry **)&prev_dc,
                                        (struct rb_entry **)&next_dc,
-                                       &insert_p, &insert_parent, true);
+                                       &insert_p, &insert_parent, true, NULL);
        if (!dc)
                dc = next_dc;
 
@@ -2671,7 +2765,7 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
         * discard option. User configuration looks like using runtime discard
         * or periodic fstrim instead of it.
         */
-       if (test_opt(sbi, DISCARD))
+       if (f2fs_realtime_discard_enable(sbi))
                goto out;
 
        start_block = START_BLOCK(sbi, start_segno);
@@ -3020,6 +3114,7 @@ void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct page *page,
        ClearPageError(page);
        f2fs_submit_page_write(&fio);
 
+       stat_inc_meta_count(sbi, page->index);
        f2fs_update_iostat(sbi, io_type, F2FS_BLKSIZE);
 }
 
@@ -3182,8 +3277,7 @@ void f2fs_wait_on_page_writeback(struct page *page,
        if (PageWriteback(page)) {
                struct f2fs_sb_info *sbi = F2FS_P_SB(page);
 
-               f2fs_submit_merged_write_cond(sbi, page->mapping->host,
-                                               0, page->index, type);
+               f2fs_submit_merged_write_cond(sbi, NULL, page, 0, type);
                if (ordered)
                        wait_on_page_writeback(page);
                else
@@ -3191,10 +3285,14 @@ void f2fs_wait_on_page_writeback(struct page *page,
        }
 }
 
-void f2fs_wait_on_block_writeback(struct f2fs_sb_info *sbi, block_t blkaddr)
+void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr)
 {
+       struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
        struct page *cpage;
 
+       if (!f2fs_post_read_required(inode))
+               return;
+
        if (!is_valid_data_blkaddr(sbi, blkaddr))
                return;
 
@@ -3205,6 +3303,15 @@ void f2fs_wait_on_block_writeback(struct f2fs_sb_info *sbi, block_t blkaddr)
        }
 }
 
+void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr,
+                                                               block_t len)
+{
+       block_t i;
+
+       for (i = 0; i < len; i++)
+               f2fs_wait_on_block_writeback(inode, blkaddr + i);
+}
+
 static int read_compacted_summaries(struct f2fs_sb_info *sbi)
 {
        struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
@@ -3762,13 +3869,11 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
                        return -ENOMEM;
 #endif
 
-               if (f2fs_discard_en(sbi)) {
-                       sit_i->sentries[start].discard_map
-                               = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE,
-                                                               GFP_KERNEL);
-                       if (!sit_i->sentries[start].discard_map)
-                               return -ENOMEM;
-               }
+               sit_i->sentries[start].discard_map
+                       = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE,
+                                                       GFP_KERNEL);
+               if (!sit_i->sentries[start].discard_map)
+                       return -ENOMEM;
        }
 
        sit_i->tmp_map = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
@@ -3904,6 +4009,8 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
 
                        se = &sit_i->sentries[start];
                        page = get_current_sit_page(sbi, start);
+                       if (IS_ERR(page))
+                               return PTR_ERR(page);
                        sit_blk = (struct f2fs_sit_block *)page_address(page);
                        sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)];
                        f2fs_put_page(page, 1);
@@ -3916,18 +4023,16 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
                                total_node_blocks += se->valid_blocks;
 
                        /* build discard map only one time */
-                       if (f2fs_discard_en(sbi)) {
-                               if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
-                                       memset(se->discard_map, 0xff,
-                                               SIT_VBLOCK_MAP_SIZE);
-                               } else {
-                                       memcpy(se->discard_map,
-                                               se->cur_valid_map,
-                                               SIT_VBLOCK_MAP_SIZE);
-                                       sbi->discard_blks +=
-                                               sbi->blocks_per_seg -
-                                               se->valid_blocks;
-                               }
+                       if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
+                               memset(se->discard_map, 0xff,
+                                       SIT_VBLOCK_MAP_SIZE);
+                       } else {
+                               memcpy(se->discard_map,
+                                       se->cur_valid_map,
+                                       SIT_VBLOCK_MAP_SIZE);
+                               sbi->discard_blks +=
+                                       sbi->blocks_per_seg -
+                                       se->valid_blocks;
                        }
 
                        if (sbi->segs_per_sec > 1)
@@ -3965,16 +4070,13 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
                if (IS_NODESEG(se->type))
                        total_node_blocks += se->valid_blocks;
 
-               if (f2fs_discard_en(sbi)) {
-                       if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
-                               memset(se->discard_map, 0xff,
-                                                       SIT_VBLOCK_MAP_SIZE);
-                       } else {
-                               memcpy(se->discard_map, se->cur_valid_map,
-                                                       SIT_VBLOCK_MAP_SIZE);
-                               sbi->discard_blks += old_valid_blocks;
-                               sbi->discard_blks -= se->valid_blocks;
-                       }
+               if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
+                       memset(se->discard_map, 0xff, SIT_VBLOCK_MAP_SIZE);
+               } else {
+                       memcpy(se->discard_map, se->cur_valid_map,
+                                               SIT_VBLOCK_MAP_SIZE);
+                       sbi->discard_blks += old_valid_blocks;
+                       sbi->discard_blks -= se->valid_blocks;
                }
 
                if (sbi->segs_per_sec > 1) {
index b3d9e317ff0c142b111488911edeb222dfebdbcb..ab3465faddf1336a7f8f7892bd6685195641bd37 100644 (file)
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * fs/f2fs/segment.h
  *
  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
  *             http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/blkdev.h>
 #include <linux/backing-dev.h>
@@ -342,6 +339,12 @@ static inline unsigned int get_valid_blocks(struct f2fs_sb_info *sbi,
                return get_seg_entry(sbi, segno)->valid_blocks;
 }
 
+static inline unsigned int get_ckpt_valid_blocks(struct f2fs_sb_info *sbi,
+                               unsigned int segno)
+{
+       return get_seg_entry(sbi, segno)->ckpt_valid_blocks;
+}
+
 static inline void seg_info_from_raw_sit(struct seg_entry *se,
                                        struct f2fs_sit_entry *rs)
 {
@@ -579,6 +582,15 @@ static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi,
                reserved_sections(sbi) + needed);
 }
 
+static inline int f2fs_is_checkpoint_ready(struct f2fs_sb_info *sbi)
+{
+       if (likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
+               return 0;
+       if (likely(!has_not_enough_free_secs(sbi, 0, 0)))
+               return 0;
+       return -ENOSPC;
+}
+
 static inline bool excess_prefree_segs(struct f2fs_sb_info *sbi)
 {
        return prefree_segments(sbi) > SM_I(sbi)->rec_prefree_segments;
index 36cfd816c160827f6f65c358e1465d016eee1dca..9e13db994fdf4dcfb3955d3bc1fd9f4dbdb7a121 100644 (file)
@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * f2fs shrinker support
  *   the basic infra was copied from fs/ubifs/shrinker.c
  *
  * Copyright (c) 2015 Motorola Mobility
  * Copyright (c) 2015 Jaegeuk Kim <jaegeuk@kernel.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/fs.h>
 #include <linux/f2fs_fs.h>
index 896b885f504e782f2bc37a4aba4da5e9870f2b20..af58b2cc21b81ecafc73c550f5a8228dfaafe19a 100644 (file)
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * fs/f2fs/super.c
  *
  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
  *             http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/module.h>
 #include <linux/init.h>
@@ -53,9 +50,10 @@ char *f2fs_fault_name[FAULT_MAX] = {
        [FAULT_DIR_DEPTH]       = "too big dir depth",
        [FAULT_EVICT_INODE]     = "evict_inode fail",
        [FAULT_TRUNCATE]        = "truncate fail",
-       [FAULT_IO]              = "IO error",
+       [FAULT_READ_IO]         = "read IO error",
        [FAULT_CHECKPOINT]      = "checkpoint error",
        [FAULT_DISCARD]         = "discard error",
+       [FAULT_WRITE_IO]        = "write IO error",
 };
 
 void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate,
@@ -138,6 +136,7 @@ enum {
        Opt_alloc,
        Opt_fsync,
        Opt_test_dummy_encryption,
+       Opt_checkpoint,
        Opt_err,
 };
 
@@ -196,6 +195,7 @@ static match_table_t f2fs_tokens = {
        {Opt_alloc, "alloc_mode=%s"},
        {Opt_fsync, "fsync_mode=%s"},
        {Opt_test_dummy_encryption, "test_dummy_encryption"},
+       {Opt_checkpoint, "checkpoint=%s"},
        {Opt_err, NULL},
 };
 
@@ -207,7 +207,7 @@ void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...)
        va_start(args, fmt);
        vaf.fmt = fmt;
        vaf.va = &args;
-       printk_ratelimited("%sF2FS-fs (%s): %pV\n", level, sb->s_id, &vaf);
+       printk("%sF2FS-fs (%s): %pV\n", level, sb->s_id, &vaf);
        va_end(args);
 }
 
@@ -360,7 +360,6 @@ static int f2fs_check_quota_options(struct f2fs_sb_info *sbi)
 static int parse_options(struct super_block *sb, char *options)
 {
        struct f2fs_sb_info *sbi = F2FS_SB(sb);
-       struct request_queue *q;
        substring_t args[MAX_OPT_ARGS];
        char *p, *name;
        int arg = 0;
@@ -415,14 +414,7 @@ static int parse_options(struct super_block *sb, char *options)
                                return -EINVAL;
                        break;
                case Opt_discard:
-                       q = bdev_get_queue(sb->s_bdev);
-                       if (blk_queue_discard(q)) {
-                               set_opt(sbi, DISCARD);
-                       } else if (!f2fs_sb_has_blkzoned(sb)) {
-                               f2fs_msg(sb, KERN_WARNING,
-                                       "mounting with \"discard\" option, but "
-                                       "the device does not support discard");
-                       }
+                       set_opt(sbi, DISCARD);
                        break;
                case Opt_nodiscard:
                        if (f2fs_sb_has_blkzoned(sb)) {
@@ -602,28 +594,31 @@ static int parse_options(struct super_block *sb, char *options)
                        }
                        F2FS_OPTION(sbi).write_io_size_bits = arg;
                        break;
+#ifdef CONFIG_F2FS_FAULT_INJECTION
                case Opt_fault_injection:
                        if (args->from && match_int(args, &arg))
                                return -EINVAL;
-#ifdef CONFIG_F2FS_FAULT_INJECTION
                        f2fs_build_fault_attr(sbi, arg, F2FS_ALL_FAULT_TYPE);
                        set_opt(sbi, FAULT_INJECTION);
-#else
-                       f2fs_msg(sb, KERN_INFO,
-                               "FAULT_INJECTION was not selected");
-#endif
                        break;
+
                case Opt_fault_type:
                        if (args->from && match_int(args, &arg))
                                return -EINVAL;
-#ifdef CONFIG_F2FS_FAULT_INJECTION
                        f2fs_build_fault_attr(sbi, 0, arg);
                        set_opt(sbi, FAULT_INJECTION);
+                       break;
 #else
+               case Opt_fault_injection:
                        f2fs_msg(sb, KERN_INFO,
-                               "FAULT_INJECTION was not selected");
-#endif
+                               "fault_injection options not supported");
+                       break;
+
+               case Opt_fault_type:
+                       f2fs_msg(sb, KERN_INFO,
+                               "fault_type options not supported");
                        break;
+#endif
                case Opt_lazytime:
                        sb->s_flags |= SB_LAZYTIME;
                        break;
@@ -776,6 +771,23 @@ static int parse_options(struct super_block *sb, char *options)
                                        "Test dummy encryption mount option ignored");
 #endif
                        break;
+               case Opt_checkpoint:
+                       name = match_strdup(&args[0]);
+                       if (!name)
+                               return -ENOMEM;
+
+                       if (strlen(name) == 6 &&
+                                       !strncmp(name, "enable", 6)) {
+                               clear_opt(sbi, DISABLE_CHECKPOINT);
+                       } else if (strlen(name) == 7 &&
+                                       !strncmp(name, "disable", 7)) {
+                               set_opt(sbi, DISABLE_CHECKPOINT);
+                       } else {
+                               kfree(name);
+                               return -EINVAL;
+                       }
+                       kfree(name);
+                       break;
                default:
                        f2fs_msg(sb, KERN_ERR,
                                "Unrecognized mount option \"%s\" or missing value",
@@ -834,6 +846,12 @@ static int parse_options(struct super_block *sb, char *options)
                }
        }
 
+       if (test_opt(sbi, DISABLE_CHECKPOINT) && test_opt(sbi, LFS)) {
+               f2fs_msg(sb, KERN_ERR,
+                               "LFS not compatible with checkpoint=disable\n");
+               return -EINVAL;
+       }
+
        /* Not pass down write hints if the number of active logs is lesser
         * than NR_CURSEG_TYPE.
         */
@@ -1021,8 +1039,8 @@ static void f2fs_put_super(struct super_block *sb)
         * But, the previous checkpoint was not done by umount, it needs to do
         * clean checkpoint again.
         */
-       if (is_sbi_flag_set(sbi, SBI_IS_DIRTY) ||
-                       !is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) {
+       if ((is_sbi_flag_set(sbi, SBI_IS_DIRTY) ||
+                       !is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG))) {
                struct cp_control cpc = {
                        .reason = CP_UMOUNT,
                };
@@ -1032,7 +1050,8 @@ static void f2fs_put_super(struct super_block *sb)
        /* be sure to wait for any on-going discard commands */
        dropped = f2fs_wait_discard_bios(sbi);
 
-       if (f2fs_discard_en(sbi) && !sbi->discard_blks && !dropped) {
+       if ((f2fs_hw_support_discard(sbi) || f2fs_hw_should_discard(sbi)) &&
+                                       !sbi->discard_blks && !dropped) {
                struct cp_control cpc = {
                        .reason = CP_UMOUNT | CP_TRIMMED,
                };
@@ -1093,6 +1112,8 @@ int f2fs_sync_fs(struct super_block *sb, int sync)
 
        if (unlikely(f2fs_cp_error(sbi)))
                return 0;
+       if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
+               return 0;
 
        trace_f2fs_sync_fs(sb, sync);
 
@@ -1192,6 +1213,11 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf)
        buf->f_blocks = total_count - start_count;
        buf->f_bfree = user_block_count - valid_user_blocks(sbi) -
                                                sbi->current_reserved_blocks;
+       if (unlikely(buf->f_bfree <= sbi->unusable_block_count))
+               buf->f_bfree = 0;
+       else
+               buf->f_bfree -= sbi->unusable_block_count;
+
        if (buf->f_bfree > F2FS_OPTION(sbi).root_reserved_blocks)
                buf->f_bavail = buf->f_bfree -
                                F2FS_OPTION(sbi).root_reserved_blocks;
@@ -1336,7 +1362,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
                                from_kgid_munged(&init_user_ns,
                                        F2FS_OPTION(sbi).s_resgid));
        if (F2FS_IO_SIZE_BITS(sbi))
-               seq_printf(seq, ",io_size=%uKB", F2FS_IO_SIZE_KB(sbi));
+               seq_printf(seq, ",io_bits=%u",
+                               F2FS_OPTION(sbi).write_io_size_bits);
 #ifdef CONFIG_F2FS_FAULT_INJECTION
        if (test_opt(sbi, FAULT_INJECTION)) {
                seq_printf(seq, ",fault_injection=%u",
@@ -1370,6 +1397,9 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
        else if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE)
                seq_printf(seq, ",alloc_mode=%s", "reuse");
 
+       if (test_opt(sbi, DISABLE_CHECKPOINT))
+               seq_puts(seq, ",checkpoint=disable");
+
        if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_POSIX)
                seq_printf(seq, ",fsync_mode=%s", "posix");
        else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT)
@@ -1397,10 +1427,10 @@ static void default_options(struct f2fs_sb_info *sbi)
        set_opt(sbi, INLINE_DENTRY);
        set_opt(sbi, EXTENT_CACHE);
        set_opt(sbi, NOHEAP);
+       clear_opt(sbi, DISABLE_CHECKPOINT);
        sbi->sb->s_flags |= SB_LAZYTIME;
        set_opt(sbi, FLUSH_MERGE);
-       if (blk_queue_discard(bdev_get_queue(sbi->sb->s_bdev)))
-               set_opt(sbi, DISCARD);
+       set_opt(sbi, DISCARD);
        if (f2fs_sb_has_blkzoned(sbi->sb))
                set_opt_mode(sbi, F2FS_MOUNT_LFS);
        else
@@ -1419,6 +1449,57 @@ static void default_options(struct f2fs_sb_info *sbi)
 #ifdef CONFIG_QUOTA
 static int f2fs_enable_quotas(struct super_block *sb);
 #endif
+
+static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
+{
+       struct cp_control cpc;
+       int err;
+
+       sbi->sb->s_flags |= SB_ACTIVE;
+
+       mutex_lock(&sbi->gc_mutex);
+       f2fs_update_time(sbi, DISABLE_TIME);
+
+       while (!f2fs_time_over(sbi, DISABLE_TIME)) {
+               err = f2fs_gc(sbi, true, false, NULL_SEGNO);
+               if (err == -ENODATA)
+                       break;
+               if (err && err != -EAGAIN) {
+                       mutex_unlock(&sbi->gc_mutex);
+                       return err;
+               }
+       }
+       mutex_unlock(&sbi->gc_mutex);
+
+       err = sync_filesystem(sbi->sb);
+       if (err)
+               return err;
+
+       if (f2fs_disable_cp_again(sbi))
+               return -EAGAIN;
+
+       mutex_lock(&sbi->gc_mutex);
+       cpc.reason = CP_PAUSE;
+       set_sbi_flag(sbi, SBI_CP_DISABLED);
+       f2fs_write_checkpoint(sbi, &cpc);
+
+       sbi->unusable_block_count = 0;
+       mutex_unlock(&sbi->gc_mutex);
+       return 0;
+}
+
+static void f2fs_enable_checkpoint(struct f2fs_sb_info *sbi)
+{
+       mutex_lock(&sbi->gc_mutex);
+       f2fs_dirty_to_prefree(sbi);
+
+       clear_sbi_flag(sbi, SBI_CP_DISABLED);
+       set_sbi_flag(sbi, SBI_IS_DIRTY);
+       mutex_unlock(&sbi->gc_mutex);
+
+       f2fs_sync_fs(sbi->sb, 1);
+}
+
 static int f2fs_remount(struct super_block *sb, int *flags, char *data)
 {
        struct f2fs_sb_info *sbi = F2FS_SB(sb);
@@ -1428,6 +1509,8 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
        bool need_restart_gc = false;
        bool need_stop_gc = false;
        bool no_extent_cache = !test_opt(sbi, EXTENT_CACHE);
+       bool disable_checkpoint = test_opt(sbi, DISABLE_CHECKPOINT);
+       bool checkpoint_changed;
 #ifdef CONFIG_QUOTA
        int i, j;
 #endif
@@ -1472,6 +1555,8 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
        err = parse_options(sb, data);
        if (err)
                goto restore_opts;
+       checkpoint_changed =
+                       disable_checkpoint != test_opt(sbi, DISABLE_CHECKPOINT);
 
        /*
         * Previous and new state of filesystem is RO,
@@ -1485,7 +1570,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
                err = dquot_suspend(sb, -1);
                if (err < 0)
                        goto restore_opts;
-       } else if (f2fs_readonly(sb) && !(*flags & MS_RDONLY)) {
+       } else if (f2fs_readonly(sb) && !(*flags & SB_RDONLY)) {
                /* dquot_resume needs RW */
                sb->s_flags &= ~SB_RDONLY;
                if (sb_any_quota_suspended(sb)) {
@@ -1505,6 +1590,13 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
                goto restore_opts;
        }
 
+       if ((*flags & SB_RDONLY) && test_opt(sbi, DISABLE_CHECKPOINT)) {
+               err = -EINVAL;
+               f2fs_msg(sbi->sb, KERN_WARNING,
+                       "disabling checkpoint not compatible with read-only");
+               goto restore_opts;
+       }
+
        /*
         * We stop the GC thread if FS is mounted as RO
         * or if background_gc = off is passed in mount
@@ -1533,6 +1625,16 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
                clear_sbi_flag(sbi, SBI_IS_CLOSE);
        }
 
+       if (checkpoint_changed) {
+               if (test_opt(sbi, DISABLE_CHECKPOINT)) {
+                       err = f2fs_disable_checkpoint(sbi);
+                       if (err)
+                               goto restore_gc;
+               } else {
+                       f2fs_enable_checkpoint(sbi);
+               }
+       }
+
        /*
         * We stop issue flush thread if FS is mounted as RO
         * or if flush_merge is not passed in mount option.
@@ -1556,6 +1658,7 @@ skip:
                (test_opt(sbi, POSIX_ACL) ? SB_POSIXACL : 0);
 
        limit_reserve_root(sbi);
+       *flags = (*flags & ~SB_LAZYTIME) | (sb->s_flags & SB_LAZYTIME);
        return 0;
 restore_gc:
        if (need_restart_gc) {
@@ -1608,6 +1711,7 @@ repeat:
                                congestion_wait(BLK_RW_ASYNC, HZ/50);
                                goto repeat;
                        }
+                       set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
                        return PTR_ERR(page);
                }
 
@@ -1619,6 +1723,7 @@ repeat:
                }
                if (unlikely(!PageUptodate(page))) {
                        f2fs_put_page(page, 1);
+                       set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
                        return -EIO;
                }
 
@@ -1660,6 +1765,7 @@ retry:
                                congestion_wait(BLK_RW_ASYNC, HZ/50);
                                goto retry;
                        }
+                       set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
                        break;
                }
 
@@ -1696,6 +1802,12 @@ static qsize_t *f2fs_get_reserved_space(struct inode *inode)
 
 static int f2fs_quota_on_mount(struct f2fs_sb_info *sbi, int type)
 {
+       if (is_set_ckpt_flags(sbi, CP_QUOTA_NEED_FSCK_FLAG)) {
+               f2fs_msg(sbi->sb, KERN_ERR,
+                       "quota sysfile may be corrupted, skip loading it");
+               return 0;
+       }
+
        return dquot_quota_on_mount(sbi->sb, F2FS_OPTION(sbi).s_qf_names[type],
                                        F2FS_OPTION(sbi).s_jquota_fmt, type);
 }
@@ -1766,7 +1878,14 @@ static int f2fs_enable_quotas(struct super_block *sb)
                test_opt(F2FS_SB(sb), PRJQUOTA),
        };
 
-       sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE | DQUOT_NOLIST_DIRTY;
+       if (is_set_ckpt_flags(F2FS_SB(sb), CP_QUOTA_NEED_FSCK_FLAG)) {
+               f2fs_msg(sb, KERN_ERR,
+                       "quota file may be corrupted, skip loading it");
+               return 0;
+       }
+
+       sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
+
        for (type = 0; type < MAXQUOTAS; type++) {
                qf_inum = f2fs_qf_ino(sb, type);
                if (qf_inum) {
@@ -1780,6 +1899,8 @@ static int f2fs_enable_quotas(struct super_block *sb)
                                        "fsck to fix.", type, err);
                                for (type--; type >= 0; type--)
                                        dquot_quota_off(sb, type);
+                               set_sbi_flag(F2FS_SB(sb),
+                                               SBI_QUOTA_NEED_REPAIR);
                                return err;
                        }
                }
@@ -1787,35 +1908,51 @@ static int f2fs_enable_quotas(struct super_block *sb)
        return 0;
 }
 
-static int f2fs_quota_sync(struct super_block *sb, int type)
+int f2fs_quota_sync(struct super_block *sb, int type)
 {
+       struct f2fs_sb_info *sbi = F2FS_SB(sb);
        struct quota_info *dqopt = sb_dqopt(sb);
        int cnt;
        int ret;
 
        ret = dquot_writeback_dquots(sb, type);
        if (ret)
-               return ret;
+               goto out;
 
        /*
         * Now when everything is written we can discard the pagecache so
         * that userspace sees the changes.
         */
        for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+               struct address_space *mapping;
+
                if (type != -1 && cnt != type)
                        continue;
                if (!sb_has_quota_active(sb, cnt))
                        continue;
 
-               ret = filemap_write_and_wait(dqopt->files[cnt]->i_mapping);
+               mapping = dqopt->files[cnt]->i_mapping;
+
+               ret = filemap_fdatawrite(mapping);
+               if (ret)
+                       goto out;
+
+               /* if we are using journalled quota */
+               if (is_journalled_quota(sbi))
+                       continue;
+
+               ret = filemap_fdatawait(mapping);
                if (ret)
-                       return ret;
+                       set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
 
                inode_lock(dqopt->files[cnt]);
                truncate_inode_pages(&dqopt->files[cnt]->i_data, 0);
                inode_unlock(dqopt->files[cnt]);
        }
-       return 0;
+out:
+       if (ret)
+               set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
+       return ret;
 }
 
 static int f2fs_quota_on(struct super_block *sb, int type, int format_id,
@@ -1836,8 +1973,7 @@ static int f2fs_quota_on(struct super_block *sb, int type, int format_id,
 
        inode_lock(inode);
        F2FS_I(inode)->i_flags |= F2FS_NOATIME_FL | F2FS_IMMUTABLE_FL;
-       inode_set_flags(inode, S_NOATIME | S_IMMUTABLE,
-                                       S_NOATIME | S_IMMUTABLE);
+       f2fs_set_inode_flags(inode);
        inode_unlock(inode);
        f2fs_mark_inode_dirty_sync(inode, false);
 
@@ -1852,7 +1988,9 @@ static int f2fs_quota_off(struct super_block *sb, int type)
        if (!inode || !igrab(inode))
                return dquot_quota_off(sb, type);
 
-       f2fs_quota_sync(sb, type);
+       err = f2fs_quota_sync(sb, type);
+       if (err)
+               goto out_put;
 
        err = dquot_quota_off(sb, type);
        if (err || f2fs_sb_has_quota_ino(sb))
@@ -1860,7 +1998,7 @@ static int f2fs_quota_off(struct super_block *sb, int type)
 
        inode_lock(inode);
        F2FS_I(inode)->i_flags &= ~(F2FS_NOATIME_FL | F2FS_IMMUTABLE_FL);
-       inode_set_flags(inode, 0, S_NOATIME | S_IMMUTABLE);
+       f2fs_set_inode_flags(inode);
        inode_unlock(inode);
        f2fs_mark_inode_dirty_sync(inode, false);
 out_put:
@@ -1871,9 +2009,88 @@ out_put:
 void f2fs_quota_off_umount(struct super_block *sb)
 {
        int type;
+       int err;
+
+       for (type = 0; type < MAXQUOTAS; type++) {
+               err = f2fs_quota_off(sb, type);
+               if (err) {
+                       int ret = dquot_quota_off(sb, type);
+
+                       f2fs_msg(sb, KERN_ERR,
+                               "Fail to turn off disk quota "
+                               "(type: %d, err: %d, ret:%d), Please "
+                               "run fsck to fix it.", type, err, ret);
+                       set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
+               }
+       }
+}
+
+static void f2fs_truncate_quota_inode_pages(struct super_block *sb)
+{
+       struct quota_info *dqopt = sb_dqopt(sb);
+       int type;
+
+       for (type = 0; type < MAXQUOTAS; type++) {
+               if (!dqopt->files[type])
+                       continue;
+               f2fs_inode_synced(dqopt->files[type]);
+       }
+}
+
+static int f2fs_dquot_commit(struct dquot *dquot)
+{
+       int ret;
+
+       ret = dquot_commit(dquot);
+       if (ret < 0)
+               set_sbi_flag(F2FS_SB(dquot->dq_sb), SBI_QUOTA_NEED_REPAIR);
+       return ret;
+}
+
+static int f2fs_dquot_acquire(struct dquot *dquot)
+{
+       int ret;
+
+       ret = dquot_acquire(dquot);
+       if (ret < 0)
+               set_sbi_flag(F2FS_SB(dquot->dq_sb), SBI_QUOTA_NEED_REPAIR);
+
+       return ret;
+}
+
+static int f2fs_dquot_release(struct dquot *dquot)
+{
+       int ret;
+
+       ret = dquot_release(dquot);
+       if (ret < 0)
+               set_sbi_flag(F2FS_SB(dquot->dq_sb), SBI_QUOTA_NEED_REPAIR);
+       return ret;
+}
+
+static int f2fs_dquot_mark_dquot_dirty(struct dquot *dquot)
+{
+       struct super_block *sb = dquot->dq_sb;
+       struct f2fs_sb_info *sbi = F2FS_SB(sb);
+       int ret;
+
+       ret = dquot_mark_dquot_dirty(dquot);
+
+       /* if we are using journalled quota */
+       if (is_journalled_quota(sbi))
+               set_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH);
+
+       return ret;
+}
 
-       for (type = 0; type < MAXQUOTAS; type++)
-               f2fs_quota_off(sb, type);
+static int f2fs_dquot_commit_info(struct super_block *sb, int type)
+{
+       int ret;
+
+       ret = dquot_commit_info(sb, type);
+       if (ret < 0)
+               set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
+       return ret;
 }
 
 static int f2fs_get_projid(struct inode *inode, kprojid_t *projid)
@@ -1884,11 +2101,11 @@ static int f2fs_get_projid(struct inode *inode, kprojid_t *projid)
 
 static const struct dquot_operations f2fs_quota_operations = {
        .get_reserved_space = f2fs_get_reserved_space,
-       .write_dquot    = dquot_commit,
-       .acquire_dquot  = dquot_acquire,
-       .release_dquot  = dquot_release,
-       .mark_dirty     = dquot_mark_dquot_dirty,
-       .write_info     = dquot_commit_info,
+       .write_dquot    = f2fs_dquot_commit,
+       .acquire_dquot  = f2fs_dquot_acquire,
+       .release_dquot  = f2fs_dquot_release,
+       .mark_dirty     = f2fs_dquot_mark_dquot_dirty,
+       .write_info     = f2fs_dquot_commit_info,
        .alloc_dquot    = dquot_alloc,
        .destroy_dquot  = dquot_destroy,
        .get_projid     = f2fs_get_projid,
@@ -1906,6 +2123,11 @@ static const struct quotactl_ops f2fs_quotactl_ops = {
        .get_nextdqblk  = dquot_get_next_dqblk,
 };
 #else
+int f2fs_quota_sync(struct super_block *sb, int type)
+{
+       return 0;
+}
+
 void f2fs_quota_off_umount(struct super_block *sb)
 {
 }
@@ -2170,6 +2392,26 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
                                        (bh->b_data + F2FS_SUPER_OFFSET);
        struct super_block *sb = sbi->sb;
        unsigned int blocksize;
+       size_t crc_offset = 0;
+       __u32 crc = 0;
+
+       /* Check checksum_offset and crc in superblock */
+       if (le32_to_cpu(raw_super->feature) & F2FS_FEATURE_SB_CHKSUM) {
+               crc_offset = le32_to_cpu(raw_super->checksum_offset);
+               if (crc_offset !=
+                       offsetof(struct f2fs_super_block, crc)) {
+                       f2fs_msg(sb, KERN_INFO,
+                               "Invalid SB checksum offset: %zu",
+                               crc_offset);
+                       return 1;
+               }
+               crc = le32_to_cpu(raw_super->crc);
+               if (!f2fs_crc_valid(sbi, crc, raw_super, crc_offset)) {
+                       f2fs_msg(sb, KERN_INFO,
+                               "Invalid SB checksum value: %u", crc);
+                       return 1;
+               }
+       }
 
        if (F2FS_SUPER_MAGIC != le32_to_cpu(raw_super->magic)) {
                f2fs_msg(sb, KERN_INFO,
@@ -2320,7 +2562,7 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi)
        unsigned int segment_count_main;
        unsigned int cp_pack_start_sum, cp_payload;
        block_t user_block_count;
-       int i;
+       int i, j;
 
        total = le32_to_cpu(raw_super->segment_count);
        fsmeta = le32_to_cpu(raw_super->segment_count_ckpt);
@@ -2361,11 +2603,43 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi)
                if (le32_to_cpu(ckpt->cur_node_segno[i]) >= main_segs ||
                        le16_to_cpu(ckpt->cur_node_blkoff[i]) >= blocks_per_seg)
                        return 1;
+               for (j = i + 1; j < NR_CURSEG_NODE_TYPE; j++) {
+                       if (le32_to_cpu(ckpt->cur_node_segno[i]) ==
+                               le32_to_cpu(ckpt->cur_node_segno[j])) {
+                               f2fs_msg(sbi->sb, KERN_ERR,
+                                       "Node segment (%u, %u) has the same "
+                                       "segno: %u", i, j,
+                                       le32_to_cpu(ckpt->cur_node_segno[i]));
+                               return 1;
+                       }
+               }
        }
        for (i = 0; i < NR_CURSEG_DATA_TYPE; i++) {
                if (le32_to_cpu(ckpt->cur_data_segno[i]) >= main_segs ||
                        le16_to_cpu(ckpt->cur_data_blkoff[i]) >= blocks_per_seg)
                        return 1;
+               for (j = i + 1; j < NR_CURSEG_DATA_TYPE; j++) {
+                       if (le32_to_cpu(ckpt->cur_data_segno[i]) ==
+                               le32_to_cpu(ckpt->cur_data_segno[j])) {
+                               f2fs_msg(sbi->sb, KERN_ERR,
+                                       "Data segment (%u, %u) has the same "
+                                       "segno: %u", i, j,
+                                       le32_to_cpu(ckpt->cur_data_segno[i]));
+                               return 1;
+                       }
+               }
+       }
+       for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) {
+               for (j = i; j < NR_CURSEG_DATA_TYPE; j++) {
+                       if (le32_to_cpu(ckpt->cur_node_segno[i]) ==
+                               le32_to_cpu(ckpt->cur_data_segno[j])) {
+                               f2fs_msg(sbi->sb, KERN_ERR,
+                                       "Data segment (%u) and Data segment (%u)"
+                                       " has the same segno: %u", i, j,
+                                       le32_to_cpu(ckpt->cur_node_segno[i]));
+                               return 1;
+                       }
+               }
        }
 
        sit_bitmap_size = le32_to_cpu(ckpt->sit_ver_bitmap_bytesize);
@@ -2423,6 +2697,9 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
        sbi->dir_level = DEF_DIR_LEVEL;
        sbi->interval_time[CP_TIME] = DEF_CP_INTERVAL;
        sbi->interval_time[REQ_TIME] = DEF_IDLE_INTERVAL;
+       sbi->interval_time[DISCARD_TIME] = DEF_IDLE_INTERVAL;
+       sbi->interval_time[GC_TIME] = DEF_IDLE_INTERVAL;
+       sbi->interval_time[DISABLE_TIME] = DEF_DISABLE_INTERVAL;
        clear_sbi_flag(sbi, SBI_NEED_FSCK);
 
        for (i = 0; i < NR_COUNT_TYPE; i++)
@@ -2453,8 +2730,12 @@ static int init_percpu_info(struct f2fs_sb_info *sbi)
        if (err)
                return err;
 
-       return percpu_counter_init(&sbi->total_valid_inode_count, 0,
+       err = percpu_counter_init(&sbi->total_valid_inode_count, 0,
                                                                GFP_KERNEL);
+       if (err)
+               percpu_counter_destroy(&sbi->alloc_valid_block_count);
+
+       return err;
 }
 
 #ifdef CONFIG_BLK_DEV_ZONED
@@ -2589,6 +2870,7 @@ static int read_raw_super_block(struct f2fs_sb_info *sbi,
 int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover)
 {
        struct buffer_head *bh;
+       __u32 crc = 0;
        int err;
 
        if ((recover && f2fs_readonly(sbi->sb)) ||
@@ -2597,6 +2879,13 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover)
                return -EROFS;
        }
 
+       /* we should update superblock crc here */
+       if (!recover && f2fs_sb_has_sb_chksum(sbi->sb)) {
+               crc = f2fs_crc32(sbi, F2FS_RAW_SUPER(sbi),
+                               offsetof(struct f2fs_super_block, crc));
+               F2FS_RAW_SUPER(sbi)->crc = cpu_to_le32(crc);
+       }
+
        /* write back-up superblock first */
        bh = sb_bread(sbi->sb, sbi->valid_super_block ? 0 : 1);
        if (!bh)
@@ -2866,7 +3155,7 @@ try_onemore:
                                     GFP_KERNEL);
                if (!sbi->write_io[i]) {
                        err = -ENOMEM;
-                       goto free_options;
+                       goto free_bio_info;
                }
 
                for (j = HOT; j < n; j++) {
@@ -2909,6 +3198,9 @@ try_onemore:
                goto free_meta_inode;
        }
 
+       if (__is_set_ckpt_flags(F2FS_CKPT(sbi), CP_QUOTA_NEED_FSCK_FLAG))
+               set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
+
        /* Initialize device list */
        err = f2fs_scan_devices(sbi);
        if (err) {
@@ -3007,11 +3299,9 @@ try_onemore:
        /* Enable quota usage during mount */
        if (f2fs_sb_has_quota_ino(sb) && !f2fs_readonly(sb)) {
                err = f2fs_enable_quotas(sb);
-               if (err) {
+               if (err)
                        f2fs_msg(sb, KERN_ERR,
                                "Cannot turn on quotas: error %d", err);
-                       goto free_sysfs;
-               }
        }
 #endif
        /* if there are nt orphan nodes free them */
@@ -3019,6 +3309,9 @@ try_onemore:
        if (err)
                goto free_meta;
 
+       if (unlikely(is_set_ckpt_flags(sbi, CP_DISABLED_FLAG)))
+               goto skip_recovery;
+
        /* recover fsynced data */
        if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) {
                /*
@@ -3058,6 +3351,14 @@ skip_recovery:
        /* f2fs_recover_fsync_data() cleared this already */
        clear_sbi_flag(sbi, SBI_POR_DOING);
 
+       if (test_opt(sbi, DISABLE_CHECKPOINT)) {
+               err = f2fs_disable_checkpoint(sbi);
+               if (err)
+                       goto free_meta;
+       } else if (is_set_ckpt_flags(sbi, CP_DISABLED_FLAG)) {
+               f2fs_enable_checkpoint(sbi);
+       }
+
        /*
         * If filesystem is not mounted as read-only then
         * do start the gc_thread.
@@ -3090,10 +3391,10 @@ skip_recovery:
 
 free_meta:
 #ifdef CONFIG_QUOTA
+       f2fs_truncate_quota_inode_pages(sb);
        if (f2fs_sb_has_quota_ino(sb) && !f2fs_readonly(sb))
                f2fs_quota_off_umount(sbi->sb);
 #endif
-       f2fs_sync_inode_meta(sbi);
        /*
         * Some dirty meta pages can be produced by f2fs_recover_orphan_inodes()
         * failed by EIO. Then, iput(node_inode) can trigger balance_fs_bg()
@@ -3101,9 +3402,6 @@ free_meta:
         * falls into an infinite loop in f2fs_sync_meta_pages().
         */
        truncate_inode_pages_final(META_MAPPING(sbi));
-#ifdef CONFIG_QUOTA
-free_sysfs:
-#endif
        f2fs_unregister_sysfs(sbi);
 free_root_inode:
        dput(sb->s_root);
@@ -3175,6 +3473,9 @@ static void kill_f2fs_super(struct super_block *sb)
                        };
                        f2fs_write_checkpoint(sbi, &cpc);
                }
+
+               if (is_sbi_flag_set(sbi, SBI_IS_RECOVERED) && f2fs_readonly(sb))
+                       sb->s_flags &= ~SB_RDONLY;
        }
        kill_block_super(sb);
 }
index 81c0e5337443dee31c5339be2a305f16e91c9ddf..b777cbdd796baad3a62e284956ac1a3d5cc5b145 100644 (file)
@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * f2fs sysfs interface
  *
  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
  *             http://www.samsung.com/
  * Copyright (c) 2017 Chao Yu <chao@kernel.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/compiler.h>
 #include <linux/proc_fs.h>
@@ -120,6 +117,9 @@ static ssize_t features_show(struct f2fs_attr *a,
        if (f2fs_sb_has_lost_found(sb))
                len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
                                len ? ", " : "", "lost_found");
+       if (f2fs_sb_has_sb_chksum(sb))
+               len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
+                               len ? ", " : "", "sb_checksum");
        len += snprintf(buf + len, PAGE_SIZE - len, "\n");
        return len;
 }
@@ -337,6 +337,7 @@ enum feat_id {
        FEAT_QUOTA_INO,
        FEAT_INODE_CRTIME,
        FEAT_LOST_FOUND,
+       FEAT_SB_CHECKSUM,
 };
 
 static ssize_t f2fs_feature_show(struct f2fs_attr *a,
@@ -353,6 +354,7 @@ static ssize_t f2fs_feature_show(struct f2fs_attr *a,
        case FEAT_QUOTA_INO:
        case FEAT_INODE_CRTIME:
        case FEAT_LOST_FOUND:
+       case FEAT_SB_CHECKSUM:
                return snprintf(buf, PAGE_SIZE, "supported\n");
        }
        return 0;
@@ -407,6 +409,9 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search);
 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level);
 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, cp_interval, interval_time[CP_TIME]);
 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, idle_interval, interval_time[REQ_TIME]);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, discard_idle_interval,
+                                       interval_time[DISCARD_TIME]);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_idle_interval, interval_time[GC_TIME]);
 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_enable, iostat_enable);
 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, readdir_ra, readdir_ra);
 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_pin_file_thresh, gc_pin_file_threshold);
@@ -434,6 +439,7 @@ F2FS_FEATURE_RO_ATTR(flexible_inline_xattr, FEAT_FLEXIBLE_INLINE_XATTR);
 F2FS_FEATURE_RO_ATTR(quota_ino, FEAT_QUOTA_INO);
 F2FS_FEATURE_RO_ATTR(inode_crtime, FEAT_INODE_CRTIME);
 F2FS_FEATURE_RO_ATTR(lost_found, FEAT_LOST_FOUND);
+F2FS_FEATURE_RO_ATTR(sb_checksum, FEAT_SB_CHECKSUM);
 
 #define ATTR_LIST(name) (&f2fs_attr_##name.attr)
 static struct attribute *f2fs_attrs[] = {
@@ -460,6 +466,8 @@ static struct attribute *f2fs_attrs[] = {
        ATTR_LIST(dirty_nats_ratio),
        ATTR_LIST(cp_interval),
        ATTR_LIST(idle_interval),
+       ATTR_LIST(discard_idle_interval),
+       ATTR_LIST(gc_idle_interval),
        ATTR_LIST(iostat_enable),
        ATTR_LIST(readdir_ra),
        ATTR_LIST(gc_pin_file_thresh),
@@ -491,6 +499,7 @@ static struct attribute *f2fs_feat_attrs[] = {
        ATTR_LIST(quota_ino),
        ATTR_LIST(inode_crtime),
        ATTR_LIST(lost_found),
+       ATTR_LIST(sb_checksum),
        NULL,
 };
 
index a1fcd00bbb2bd6e1b73750ed76d9b57c25e25705..ce2a5eb210b6681f5b5f317314ca9335eb7c9d9b 100644 (file)
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * f2fs IO tracer
  *
  * Copyright (c) 2014 Motorola Mobility
  * Copyright (c) 2014 Jaegeuk Kim <jaegeuk@kernel.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/fs.h>
 #include <linux/f2fs_fs.h>
index 67db24ac1e85aa6d6ded7b4bda21d63935d4e233..e8075fc5b2284e9a00564d4d416c4115ef1df566 100644 (file)
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * f2fs IO tracer
  *
  * Copyright (c) 2014 Motorola Mobility
  * Copyright (c) 2014 Jaegeuk Kim <jaegeuk@kernel.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #ifndef __F2FS_TRACE_H__
 #define __F2FS_TRACE_H__
index 77a010e625f503b7ec54ca0daef6b8d5d5d4cfaf..7261245c208dc5f0ebd229460b2cfadf74119d7d 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * fs/f2fs/xattr.c
  *
  *  suggestion of Luka Renko <luka.renko@hermes.si>.
  * xattr consolidation Copyright (c) 2004 James Morris <jmorris@redhat.com>,
  *  Red Hat Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/rwsem.h>
 #include <linux/f2fs_fs.h>
index dbcd1d16e66982e07233e66fe400e95597a6c2df..67db134da0f5f6da01ff505d956e60069114767d 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * fs/f2fs/xattr.h
  *
@@ -9,10 +10,6 @@
  * On-disk format of extended attributes for the ext2 filesystem.
  *
  * (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #ifndef __F2FS_XATTR_H__
 #define __F2FS_XATTR_H__
index f70f8ac9c4f442a239f32ca948c92cfa32d2e09e..d7711048ef93b81827f2d593c08afeb5611fc6f0 100644 (file)
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /**
  * include/linux/f2fs_fs.h
  *
  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
  *             http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #ifndef _LINUX_F2FS_FS_H
 #define _LINUX_F2FS_FS_H
@@ -112,12 +109,15 @@ struct f2fs_super_block {
        struct f2fs_device devs[MAX_DEVICES];   /* device list */
        __le32 qf_ino[F2FS_MAX_QUOTAS]; /* quota inode numbers */
        __u8 hot_ext_count;             /* # of hot file extension */
-       __u8 reserved[314];             /* valid reserved region */
+       __u8 reserved[310];             /* valid reserved region */
+       __le32 crc;                     /* checksum of superblock */
 } __packed;
 
 /*
  * For checkpoint
  */
+#define CP_DISABLED_FLAG               0x00001000
+#define CP_QUOTA_NEED_FSCK_FLAG                0x00000800
 #define CP_LARGE_NAT_BITMAP_FLAG       0x00000400
 #define CP_NOCRC_RECOVERY_FLAG 0x00000200
 #define CP_TRIMMED_FLAG                0x00000100