Merge tag 'f2fs-for-5.9-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk...
authorLinus Torvalds <torvalds@linux-foundation.org>
Tue, 11 Aug 2020 01:33:22 +0000 (18:33 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 11 Aug 2020 01:33:22 +0000 (18:33 -0700)
Pull f2fs updates from Jaegeuk Kim:
 "In this round, we've added two small interfaces: (a) GC_URGENT_LOW
  mode for performance and (b) F2FS_IOC_SEC_TRIM_FILE ioctl for
  security.

  The new GC mode allows Android to run some lower priority GCs in
  background, while new ioctl discards user information without race
  condition when the account is removed.

  In addition, some patches were merged to address latency-related
  issues. We've fixed some compression-related bug fixes as well as edge
  race conditions.

  Enhancements:
   - add GC_URGENT_LOW mode in gc_urgent
   - introduce F2FS_IOC_SEC_TRIM_FILE ioctl
   - bypass racy readahead to improve read latencies
   - shrink node_write lock coverage to avoid long latency

  Bug fixes:
   - fix missing compression flag control, i_size, and mount option
   - fix deadlock between quota writes and checkpoint
   - remove inode eviction path in synchronous path to avoid deadlock
   - fix to wait GCed compressed page writeback
   - fix a kernel panic in f2fs_is_compressed_page
   - check page dirty status before writeback
   - wait page writeback before update in node page write flow
   - fix a race condition between f2fs_write_end_io and f2fs_del_fsync_node_entry

  We've added some minor sanity checks and refactored trivial code
  blocks for better readability and debugging information"

* tag 'f2fs-for-5.9-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (52 commits)
  f2fs: prepare a waiter before entering io_schedule
  f2fs: update_sit_entry: Make the judgment condition of f2fs_bug_on more intuitive
  f2fs: replace test_and_set/clear_bit() with set/clear_bit()
  f2fs: make file immutable even if releasing zero compression block
  f2fs: compress: disable compression mount option if compression is off
  f2fs: compress: add sanity check during compressed cluster read
  f2fs: use macro instead of f2fs verity version
  f2fs: fix deadlock between quota writes and checkpoint
  f2fs: correct comment of f2fs_exist_written_data
  f2fs: compress: delay temp page allocation
  f2fs: compress: fix to update isize when overwriting compressed file
  f2fs: space related cleanup
  f2fs: fix use-after-free issue
  f2fs: Change the type of f2fs_flush_inline_data() to void
  f2fs: add F2FS_IOC_SEC_TRIM_FILE ioctl
  f2fs: should avoid inode eviction in synchronous path
  f2fs: segment.h: delete a duplicated word
  f2fs: compress: fix to avoid memory leak on cc->cpages
  f2fs: use generic names for generic ioctls
  f2fs: don't keep meta inode pages used for compressed block migration
  ...

23 files changed:
Documentation/ABI/testing/sysfs-fs-f2fs
Documentation/filesystems/f2fs.rst
fs/f2fs/checkpoint.c
fs/f2fs/compress.c
fs/f2fs/data.c
fs/f2fs/debug.c
fs/f2fs/dir.c
fs/f2fs/extent_cache.c
fs/f2fs/f2fs.h
fs/f2fs/file.c
fs/f2fs/gc.c
fs/f2fs/inline.c
fs/f2fs/inode.c
fs/f2fs/namei.c
fs/f2fs/node.c
fs/f2fs/recovery.c
fs/f2fs/segment.c
fs/f2fs/segment.h
fs/f2fs/super.c
fs/f2fs/sysfs.c
fs/f2fs/verity.c
fs/f2fs/xattr.c
include/trace/events/f2fs.h

index 4bb93a06d8abc52bb3cc6e5bc38005c202294d10..7f730c4c8df225ed07335a529678cd05b9248f99 100644 (file)
@@ -229,7 +229,9 @@ Date:               August 2017
 Contact:       "Jaegeuk Kim" <jaegeuk@kernel.org>
 Description:   Do background GC agressively when set. When gc_urgent = 1,
                background thread starts to do GC by given gc_urgent_sleep_time
-               interval. It is set to 0 by default.
+               interval. When gc_urgent = 2, F2FS will lower the bar of
+               checking idle in order to process outstanding discard commands
+               and GC a little bit aggressively. It is set to 0 by default.
 
 What:          /sys/fs/f2fs/<disk>/gc_urgent_sleep_time
 Date:          August 2017
index a11d329542f968199dc3a4f4876d84d092f42537..ec8d99703ecb8642f2fb279186f7ea3bdcde20ac 100644 (file)
@@ -258,6 +258,8 @@ compress_extension=%s        Support adding specified extension, so that f2fs can enab
                         on compression extension list and enable compression on
                         these file by default rather than to enable it via ioctl.
                         For other files, we can still enable compression via ioctl.
+                        Note that, there is one reserved special extension '*', it
+                        can be set to enable compression for all files.
 inlinecrypt             When possible, encrypt/decrypt the contents of encrypted
                         files using the blk-crypto framework rather than
                         filesystem-layer encryption. This allows the use of
@@ -743,8 +745,8 @@ Compression implementation
 
 - In order to eliminate write amplification during overwrite, F2FS only
   support compression on write-once file, data can be compressed only when
-  all logical blocks in file are valid and cluster compress ratio is lower
-  than specified threshold.
+  all logical blocks in cluster contain valid data and compress ratio of
+  cluster data is lower than specified threshold.
 
 - To enable compression on regular inode, there are three ways:
 
index 23606493025165f11851de8b44ea905dfcab3cab..ff807e14c8911ca56175ab95699af99f7ce75387 100644 (file)
@@ -523,7 +523,7 @@ void f2fs_remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
        __remove_ino_entry(sbi, ino, type);
 }
 
-/* mode should be APPEND_INO or UPDATE_INO */
+/* mode should be APPEND_INO, UPDATE_INO or TRANS_DIR_INO */
 bool f2fs_exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode)
 {
        struct inode_management *im = &sbi->im[mode];
@@ -1258,8 +1258,6 @@ void f2fs_wait_on_all_pages(struct f2fs_sb_info *sbi, int type)
        DEFINE_WAIT(wait);
 
        for (;;) {
-               prepare_to_wait(&sbi->cp_wait, &wait, TASK_UNINTERRUPTIBLE);
-
                if (!get_pages(sbi, type))
                        break;
 
@@ -1269,6 +1267,10 @@ void f2fs_wait_on_all_pages(struct f2fs_sb_info *sbi, int type)
                if (type == F2FS_DIRTY_META)
                        f2fs_sync_meta_pages(sbi, META, LONG_MAX,
                                                        FS_CP_META_IO);
+               else if (type == F2FS_WB_CP_DATA)
+                       f2fs_submit_merged_write(sbi, DATA);
+
+               prepare_to_wait(&sbi->cp_wait, &wait, TASK_UNINTERRUPTIBLE);
                io_schedule_timeout(DEFAULT_IO_TIMEOUT);
        }
        finish_wait(&sbi->cp_wait, &wait);
@@ -1415,7 +1417,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
                                curseg_alloc_type(sbi, i + CURSEG_HOT_DATA);
        }
 
-       /* 2 cp  + n data seg summary + orphan inode blocks */
+       /* 2 cp + n data seg summary + orphan inode blocks */
        data_sum_blocks = f2fs_npages_for_summary_flush(sbi, false);
        spin_lock_irqsave(&sbi->cp_lock, flags);
        if (data_sum_blocks < NR_CURSEG_DATA_TYPE)
@@ -1515,9 +1517,10 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 
        /*
         * invalidate intermediate page cache borrowed from meta inode which are
-        * used for migration of encrypted or verity inode's blocks.
+        * used for migration of encrypted, verity or compressed inode's blocks.
         */
-       if (f2fs_sb_has_encrypt(sbi) || f2fs_sb_has_verity(sbi))
+       if (f2fs_sb_has_encrypt(sbi) || f2fs_sb_has_verity(sbi) ||
+               f2fs_sb_has_compression(sbi))
                invalidate_mapping_pages(META_MAPPING(sbi),
                                MAIN_BLKADDR(sbi), MAX_BLKADDR(sbi) - 1);
 
index 29e50fbe7eca01edd1e47c401059a847347edc61..1dfb126a0cb20c0218a739a54a68eedcc13599f4 100644 (file)
@@ -49,6 +49,13 @@ bool f2fs_is_compressed_page(struct page *page)
                return false;
        if (IS_ATOMIC_WRITTEN_PAGE(page) || IS_DUMMY_WRITTEN_PAGE(page))
                return false;
+       /*
+        * page->private may be set with pid.
+        * pid_max is enough to check if it is traced.
+        */
+       if (IS_IO_TRACED_PAGE(page))
+               return false;
+
        f2fs_bug_on(F2FS_M_SB(page->mapping),
                *((u32 *)page_private(page)) != F2FS_COMPRESSED_PAGE_MAGIC);
        return true;
@@ -506,7 +513,7 @@ bool f2fs_is_compress_backend_ready(struct inode *inode)
        return f2fs_cops[F2FS_I(inode)->i_compress_algorithm];
 }
 
-static mempool_t *compress_page_pool = NULL;
+static mempool_t *compress_page_pool;
 static int num_compress_pages = 512;
 module_param(num_compress_pages, uint, 0444);
 MODULE_PARM_DESC(num_compress_pages,
@@ -663,6 +670,7 @@ void f2fs_decompress_pages(struct bio *bio, struct page *page, bool verity)
        const struct f2fs_compress_ops *cops =
                        f2fs_cops[fi->i_compress_algorithm];
        int ret;
+       int i;
 
        dec_page_count(sbi, F2FS_RD_DATA);
 
@@ -681,6 +689,26 @@ void f2fs_decompress_pages(struct bio *bio, struct page *page, bool verity)
                goto out_free_dic;
        }
 
+       dic->tpages = f2fs_kzalloc(sbi, sizeof(struct page *) *
+                                       dic->cluster_size, GFP_NOFS);
+       if (!dic->tpages) {
+               ret = -ENOMEM;
+               goto out_free_dic;
+       }
+
+       for (i = 0; i < dic->cluster_size; i++) {
+               if (dic->rpages[i]) {
+                       dic->tpages[i] = dic->rpages[i];
+                       continue;
+               }
+
+               dic->tpages[i] = f2fs_compress_alloc_page();
+               if (!dic->tpages[i]) {
+                       ret = -ENOMEM;
+                       goto out_free_dic;
+               }
+       }
+
        if (cops->init_decompress_ctx) {
                ret = cops->init_decompress_ctx(dic);
                if (ret)
@@ -821,7 +849,7 @@ static int f2fs_compressed_blocks(struct compress_ctx *cc)
 }
 
 /* return # of valid blocks in compressed cluster */
-static int f2fs_cluster_blocks(struct compress_ctx *cc, bool compr)
+static int f2fs_cluster_blocks(struct compress_ctx *cc)
 {
        return __f2fs_cluster_blocks(cc, false);
 }
@@ -835,7 +863,7 @@ int f2fs_is_compressed_cluster(struct inode *inode, pgoff_t index)
                .cluster_idx = index >> F2FS_I(inode)->i_log_cluster_size,
        };
 
-       return f2fs_cluster_blocks(&cc, false);
+       return f2fs_cluster_blocks(&cc);
 }
 
 static bool cluster_may_compress(struct compress_ctx *cc)
@@ -886,7 +914,7 @@ static int prepare_compress_overwrite(struct compress_ctx *cc,
        bool prealloc;
 
 retry:
-       ret = f2fs_cluster_blocks(cc, false);
+       ret = f2fs_cluster_blocks(cc);
        if (ret <= 0)
                return ret;
 
@@ -949,7 +977,7 @@ retry:
        }
 
        if (prealloc) {
-               __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true);
+               f2fs_do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true);
 
                set_new_dnode(&dn, cc->inode, NULL, NULL, 0);
 
@@ -964,7 +992,7 @@ retry:
                                break;
                }
 
-               __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false);
+               f2fs_do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false);
        }
 
        if (likely(!ret)) {
@@ -1096,8 +1124,16 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc,
        loff_t psize;
        int i, err;
 
-       if (!IS_NOQUOTA(inode) && !f2fs_trylock_op(sbi))
+       if (IS_NOQUOTA(inode)) {
+               /*
+                * We need to wait for node_write to avoid block allocation during
+                * checkpoint. This can only happen to quota writes which can cause
+                * the below discard race condition.
+                */
+               down_read(&sbi->node_write);
+       } else if (!f2fs_trylock_op(sbi)) {
                return -EAGAIN;
+       }
 
        set_new_dnode(&dn, cc->inode, NULL, NULL, 0);
 
@@ -1137,6 +1173,13 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc,
                f2fs_set_compressed_page(cc->cpages[i], inode,
                                        cc->rpages[i + 1]->index, cic);
                fio.compressed_page = cc->cpages[i];
+
+               fio.old_blkaddr = data_blkaddr(dn.inode, dn.node_page,
+                                               dn.ofs_in_node + i + 1);
+
+               /* wait for GCed page writeback via META_MAPPING */
+               f2fs_wait_on_block_writeback(inode, fio.old_blkaddr);
+
                if (fio.encrypted) {
                        fio.page = cc->rpages[i + 1];
                        err = f2fs_encrypt_one_page(&fio);
@@ -1203,7 +1246,9 @@ unlock_continue:
                set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN);
 
        f2fs_put_dnode(&dn);
-       if (!IS_NOQUOTA(inode))
+       if (IS_NOQUOTA(inode))
+               up_read(&sbi->node_write);
+       else
                f2fs_unlock_op(sbi);
 
        spin_lock(&fi->i_size_lock);
@@ -1230,7 +1275,9 @@ out_put_cic:
 out_put_dnode:
        f2fs_put_dnode(&dn);
 out_unlock_op:
-       if (!IS_NOQUOTA(inode))
+       if (IS_NOQUOTA(inode))
+               up_read(&sbi->node_write);
+       else
                f2fs_unlock_op(sbi);
        return -EAGAIN;
 }
@@ -1310,6 +1357,12 @@ retry_write:
                                congestion_wait(BLK_RW_ASYNC,
                                                DEFAULT_IO_TIMEOUT);
                                lock_page(cc->rpages[i]);
+
+                               if (!PageDirty(cc->rpages[i])) {
+                                       unlock_page(cc->rpages[i]);
+                                       continue;
+                               }
+
                                clear_page_dirty_for_io(cc->rpages[i]);
                                goto retry_write;
                        }
@@ -1353,6 +1406,8 @@ int f2fs_write_multi_pages(struct compress_ctx *cc,
                err = f2fs_write_compressed_pages(cc, submitted,
                                                        wbc, io_type);
                cops->destroy_compress_ctx(cc);
+               kfree(cc->cpages);
+               cc->cpages = NULL;
                if (!err)
                        return 0;
                f2fs_bug_on(F2FS_I_SB(cc->inode), err != -EAGAIN);
@@ -1415,22 +1470,6 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc)
                dic->cpages[i] = page;
        }
 
-       dic->tpages = f2fs_kzalloc(sbi, sizeof(struct page *) *
-                                       dic->cluster_size, GFP_NOFS);
-       if (!dic->tpages)
-               goto out_free;
-
-       for (i = 0; i < dic->cluster_size; i++) {
-               if (cc->rpages[i]) {
-                       dic->tpages[i] = cc->rpages[i];
-                       continue;
-               }
-
-               dic->tpages[i] = f2fs_compress_alloc_page();
-               if (!dic->tpages[i])
-                       goto out_free;
-       }
-
        return dic;
 
 out_free:
index 5f527073143e1e2d604d9861400fb1d14473cdcb..ed2bca0fce9224bba95b8f1ab315e68038f29882 100644 (file)
@@ -87,7 +87,7 @@ static bool __is_cp_guaranteed(struct page *page)
        sbi = F2FS_I_SB(inode);
 
        if (inode->i_ino == F2FS_META_INO(sbi) ||
-                       inode->i_ino ==  F2FS_NODE_INO(sbi) ||
+                       inode->i_ino == F2FS_NODE_INO(sbi) ||
                        S_ISDIR(inode->i_mode) ||
                        (S_ISREG(inode->i_mode) &&
                        (f2fs_is_atomic_file(inode) || IS_NOQUOTA(inode))) ||
@@ -1073,12 +1073,13 @@ static void f2fs_release_read_bio(struct bio *bio)
 
 /* This can handle encryption stuffs */
 static int f2fs_submit_page_read(struct inode *inode, struct page *page,
-                                               block_t blkaddr, bool for_write)
+                                block_t blkaddr, int op_flags, bool for_write)
 {
        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
        struct bio *bio;
 
-       bio = f2fs_grab_read_bio(inode, blkaddr, 1, 0, page->index, for_write);
+       bio = f2fs_grab_read_bio(inode, blkaddr, 1, op_flags,
+                                       page->index, for_write);
        if (IS_ERR(bio))
                return PTR_ERR(bio);
 
@@ -1193,7 +1194,7 @@ int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)
 
 int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index)
 {
-       struct extent_info ei  = {0,0,0};
+       struct extent_info ei = {0, 0, 0};
        struct inode *inode = dn->inode;
 
        if (f2fs_lookup_extent_cache(inode, index, &ei)) {
@@ -1265,7 +1266,8 @@ got_it:
                return page;
        }
 
-       err = f2fs_submit_page_read(inode, page, dn.data_blkaddr, for_write);
+       err = f2fs_submit_page_read(inode, page, dn.data_blkaddr,
+                                               op_flags, for_write);
        if (err)
                goto put_err;
        return page;
@@ -1414,7 +1416,7 @@ alloc:
        set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
        old_blkaddr = dn->data_blkaddr;
        f2fs_allocate_data_block(sbi, NULL, old_blkaddr, &dn->data_blkaddr,
-                                       &sum, seg_type, NULL, false);
+                                       &sum, seg_type, NULL);
        if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
                invalidate_mapping_pages(META_MAPPING(sbi),
                                        old_blkaddr, old_blkaddr);
@@ -1474,7 +1476,7 @@ map_blocks:
        return err;
 }
 
-void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock)
+void f2fs_do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock)
 {
        if (flag == F2FS_GET_BLOCK_PRE_AIO) {
                if (lock)
@@ -1539,7 +1541,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
 
 next_dnode:
        if (map->m_may_create)
-               __do_map_lock(sbi, flag, true);
+               f2fs_do_map_lock(sbi, flag, true);
 
        /* When reading holes, we need its node page */
        set_new_dnode(&dn, inode, NULL, NULL, 0);
@@ -1688,7 +1690,7 @@ skip:
        f2fs_put_dnode(&dn);
 
        if (map->m_may_create) {
-               __do_map_lock(sbi, flag, false);
+               f2fs_do_map_lock(sbi, flag, false);
                f2fs_balance_fs(sbi, dn.node_changed);
        }
        goto next_dnode;
@@ -1714,7 +1716,7 @@ sync_out:
        f2fs_put_dnode(&dn);
 unlock_out:
        if (map->m_may_create) {
-               __do_map_lock(sbi, flag, false);
+               f2fs_do_map_lock(sbi, flag, false);
                f2fs_balance_fs(sbi, dn.node_changed);
        }
 out:
@@ -1861,6 +1863,7 @@ static int f2fs_xattr_fiemap(struct inode *inode,
                        flags |= FIEMAP_EXTENT_LAST;
 
                err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags);
+               trace_f2fs_fiemap(inode, 0, phys, len, flags, err);
                if (err || err == 1)
                        return err;
        }
@@ -1884,8 +1887,10 @@ static int f2fs_xattr_fiemap(struct inode *inode,
                flags = FIEMAP_EXTENT_LAST;
        }
 
-       if (phys)
+       if (phys) {
                err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags);
+               trace_f2fs_fiemap(inode, 0, phys, len, flags, err);
+       }
 
        return (err < 0 ? err : 0);
 }
@@ -1979,6 +1984,7 @@ next:
 
                ret = fiemap_fill_next_extent(fieinfo, logical,
                                phys, size, flags);
+               trace_f2fs_fiemap(inode, logical, phys, size, flags, ret);
                if (ret)
                        goto out;
                size = 0;
@@ -2213,9 +2219,7 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
        if (ret)
                goto out;
 
-       /* cluster was overwritten as normal cluster */
-       if (dn.data_blkaddr != COMPRESS_ADDR)
-               goto out;
+       f2fs_bug_on(sbi, dn.data_blkaddr != COMPRESS_ADDR);
 
        for (i = 1; i < cc->cluster_size; i++) {
                block_t blkaddr;
@@ -2342,6 +2346,7 @@ static int f2fs_mpage_readpages(struct inode *inode,
        unsigned nr_pages = rac ? readahead_count(rac) : 1;
        unsigned max_nr_pages = nr_pages;
        int ret = 0;
+       bool drop_ra = false;
 
        map.m_pblk = 0;
        map.m_lblk = 0;
@@ -2352,10 +2357,26 @@ static int f2fs_mpage_readpages(struct inode *inode,
        map.m_seg_type = NO_CHECK_TYPE;
        map.m_may_create = false;
 
+       /*
+        * Two readahead threads for same address range can cause race condition
+        * which fragments sequential read IOs. So let's avoid each other.
+        */
+       if (rac && readahead_count(rac)) {
+               if (READ_ONCE(F2FS_I(inode)->ra_offset) == readahead_index(rac))
+                       drop_ra = true;
+               else
+                       WRITE_ONCE(F2FS_I(inode)->ra_offset,
+                                               readahead_index(rac));
+       }
+
        for (; nr_pages; nr_pages--) {
                if (rac) {
                        page = readahead_page(rac);
                        prefetchw(&page->flags);
+                       if (drop_ra) {
+                               f2fs_put_page(page, 1);
+                               continue;
+                       }
                }
 
 #ifdef CONFIG_F2FS_FS_COMPRESSION
@@ -2418,6 +2439,9 @@ next_page:
        }
        if (bio)
                __submit_bio(F2FS_I_SB(inode), bio, DATA);
+
+       if (rac && readahead_count(rac) && !drop_ra)
+               WRITE_ONCE(F2FS_I(inode)->ra_offset, -1);
        return ret;
 }
 
@@ -2772,8 +2796,20 @@ write:
 
        /* Dentry/quota blocks are controlled by checkpoint */
        if (S_ISDIR(inode->i_mode) || IS_NOQUOTA(inode)) {
+               /*
+                * We need to wait for node_write to avoid block allocation during
+                * checkpoint. This can only happen to quota writes which can cause
+                * the below discard race condition.
+                */
+               if (IS_NOQUOTA(inode))
+                       down_read(&sbi->node_write);
+
                fio.need_lock = LOCK_DONE;
                err = f2fs_do_write_data_page(&fio);
+
+               if (IS_NOQUOTA(inode))
+                       up_read(&sbi->node_write);
+
                goto done;
        }
 
@@ -3268,7 +3304,7 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi,
 
        if (f2fs_has_inline_data(inode) ||
                        (pos & PAGE_MASK) >= i_size_read(inode)) {
-               __do_map_lock(sbi, flag, true);
+               f2fs_do_map_lock(sbi, flag, true);
                locked = true;
        }
 
@@ -3305,7 +3341,7 @@ restart:
                        err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
                        if (err || dn.data_blkaddr == NULL_ADDR) {
                                f2fs_put_dnode(&dn);
-                               __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO,
+                               f2fs_do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO,
                                                                true);
                                WARN_ON(flag != F2FS_GET_BLOCK_PRE_AIO);
                                locked = true;
@@ -3321,7 +3357,7 @@ out:
        f2fs_put_dnode(&dn);
 unlock_out:
        if (locked)
-               __do_map_lock(sbi, flag, false);
+               f2fs_do_map_lock(sbi, flag, false);
        return err;
 }
 
@@ -3433,7 +3469,7 @@ repeat:
                        err = -EFSCORRUPTED;
                        goto fail;
                }
-               err = f2fs_submit_page_read(inode, page, blkaddr, true);
+               err = f2fs_submit_page_read(inode, page, blkaddr, 0, true);
                if (err)
                        goto fail;
 
@@ -3483,6 +3519,10 @@ static int f2fs_write_end(struct file *file,
        if (f2fs_compressed_file(inode) && fsdata) {
                f2fs_compress_write_end(inode, fsdata, page->index, copied);
                f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
+
+               if (pos + copied > i_size_read(inode) &&
+                               !f2fs_verity_in_progress(inode))
+                       f2fs_i_size_write(inode, pos + copied);
                return copied;
        }
 #endif
@@ -3742,10 +3782,9 @@ static sector_t f2fs_bmap_compress(struct inode *inode, sector_t block)
        }
 
        f2fs_put_dnode(&dn);
-
        return blknr;
 #else
-       return -EOPNOTSUPP;
+       return 0;
 #endif
 }
 
@@ -3753,18 +3792,26 @@ static sector_t f2fs_bmap_compress(struct inode *inode, sector_t block)
 static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
 {
        struct inode *inode = mapping->host;
+       struct buffer_head tmp = {
+               .b_size = i_blocksize(inode),
+       };
+       sector_t blknr = 0;
 
        if (f2fs_has_inline_data(inode))
-               return 0;
+               goto out;
 
        /* make sure allocating whole blocks */
        if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
                filemap_write_and_wait(mapping);
 
        if (f2fs_compressed_file(inode))
-               return f2fs_bmap_compress(inode, block);
+               blknr = f2fs_bmap_compress(inode, block);
 
-       return generic_block_bmap(mapping, block, get_data_block_bmap);
+       if (!get_data_block_bmap(inode, block, &tmp, 0))
+               blknr = tmp.b_blocknr;
+out:
+       trace_f2fs_bmap(inode, block, blknr);
+       return blknr;
 }
 
 #ifdef CONFIG_MIGRATION
index 0dbcb0f9c0195a66194e05224155d421bae62a50..4276c0f79beb9479d55f892678e7d825b3d90b4f 100644 (file)
@@ -174,6 +174,26 @@ static void update_general_status(struct f2fs_sb_info *sbi)
        for (i = META_CP; i < META_MAX; i++)
                si->meta_count[i] = atomic_read(&sbi->meta_count[i]);
 
+       for (i = 0; i < NO_CHECK_TYPE; i++) {
+               si->dirty_seg[i] = 0;
+               si->full_seg[i] = 0;
+               si->valid_blks[i] = 0;
+       }
+
+       for (i = 0; i < MAIN_SEGS(sbi); i++) {
+               int blks = get_seg_entry(sbi, i)->valid_blocks;
+               int type = get_seg_entry(sbi, i)->type;
+
+               if (!blks)
+                       continue;
+
+               if (blks == sbi->blocks_per_seg)
+                       si->full_seg[type]++;
+               else
+                       si->dirty_seg[type]++;
+               si->valid_blks[type] += blks;
+       }
+
        for (i = 0; i < 2; i++) {
                si->segment_count[i] = sbi->segment_count[i];
                si->block_count[i] = sbi->block_count[i];
@@ -329,30 +349,50 @@ static int stat_show(struct seq_file *s, void *v)
                seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n",
                           si->main_area_segs, si->main_area_sections,
                           si->main_area_zones);
-               seq_printf(s, "  - COLD  data: %d, %d, %d\n",
+               seq_printf(s, "    TYPE         %8s %8s %8s %10s %10s %10s\n",
+                          "segno", "secno", "zoneno", "dirty_seg", "full_seg", "valid_blk");
+               seq_printf(s, "  - COLD   data: %8d %8d %8d %10u %10u %10u\n",
                           si->curseg[CURSEG_COLD_DATA],
                           si->cursec[CURSEG_COLD_DATA],
-                          si->curzone[CURSEG_COLD_DATA]);
-               seq_printf(s, "  - WARM  data: %d, %d, %d\n",
+                          si->curzone[CURSEG_COLD_DATA],
+                          si->dirty_seg[CURSEG_COLD_DATA],
+                          si->full_seg[CURSEG_COLD_DATA],
+                          si->valid_blks[CURSEG_COLD_DATA]);
+               seq_printf(s, "  - WARM   data: %8d %8d %8d %10u %10u %10u\n",
                           si->curseg[CURSEG_WARM_DATA],
                           si->cursec[CURSEG_WARM_DATA],
-                          si->curzone[CURSEG_WARM_DATA]);
-               seq_printf(s, "  - HOT   data: %d, %d, %d\n",
+                          si->curzone[CURSEG_WARM_DATA],
+                          si->dirty_seg[CURSEG_WARM_DATA],
+                          si->full_seg[CURSEG_WARM_DATA],
+                          si->valid_blks[CURSEG_WARM_DATA]);
+               seq_printf(s, "  - HOT    data: %8d %8d %8d %10u %10u %10u\n",
                           si->curseg[CURSEG_HOT_DATA],
                           si->cursec[CURSEG_HOT_DATA],
-                          si->curzone[CURSEG_HOT_DATA]);
-               seq_printf(s, "  - Dir   dnode: %d, %d, %d\n",
+                          si->curzone[CURSEG_HOT_DATA],
+                          si->dirty_seg[CURSEG_HOT_DATA],
+                          si->full_seg[CURSEG_HOT_DATA],
+                          si->valid_blks[CURSEG_HOT_DATA]);
+               seq_printf(s, "  - Dir   dnode: %8d %8d %8d %10u %10u %10u\n",
                           si->curseg[CURSEG_HOT_NODE],
                           si->cursec[CURSEG_HOT_NODE],
-                          si->curzone[CURSEG_HOT_NODE]);
-               seq_printf(s, "  - File   dnode: %d, %d, %d\n",
+                          si->curzone[CURSEG_HOT_NODE],
+                          si->dirty_seg[CURSEG_HOT_NODE],
+                          si->full_seg[CURSEG_HOT_NODE],
+                          si->valid_blks[CURSEG_HOT_NODE]);
+               seq_printf(s, "  - File  dnode: %8d %8d %8d %10u %10u %10u\n",
                           si->curseg[CURSEG_WARM_NODE],
                           si->cursec[CURSEG_WARM_NODE],
-                          si->curzone[CURSEG_WARM_NODE]);
-               seq_printf(s, "  - Indir nodes: %d, %d, %d\n",
+                          si->curzone[CURSEG_WARM_NODE],
+                          si->dirty_seg[CURSEG_WARM_NODE],
+                          si->full_seg[CURSEG_WARM_NODE],
+                          si->valid_blks[CURSEG_WARM_NODE]);
+               seq_printf(s, "  - Indir nodes: %8d %8d %8d %10u %10u %10u\n",
                           si->curseg[CURSEG_COLD_NODE],
                           si->cursec[CURSEG_COLD_NODE],
-                          si->curzone[CURSEG_COLD_NODE]);
+                          si->curzone[CURSEG_COLD_NODE],
+                          si->dirty_seg[CURSEG_COLD_NODE],
+                          si->full_seg[CURSEG_COLD_NODE],
+                          si->valid_blks[CURSEG_COLD_NODE]);
                seq_printf(s, "\n  - Valid: %d\n  - Dirty: %d\n",
                           si->main_area_segs - si->dirty_count -
                           si->prefree_count - si->free_segs,
index d35976785e8c583870060967ecae520c670afb24..069f498af1e38f642c944e13af59e8792c1955eb 100644 (file)
@@ -779,7 +779,7 @@ int f2fs_do_add_link(struct inode *dir, const struct qstr *name,
                return err;
 
        /*
-        * An immature stakable filesystem shows a race condition between lookup
+        * An immature stackable filesystem shows a race condition between lookup
         * and create. If we have same task when doing lookup and create, it's
         * definitely fine as expected by VFS normally. Otherwise, let's just
         * verify on-disk dentry one more time, which guarantees filesystem
index e60078460ad17975a2bdb39c46614e263ebdb067..686c68b98610b50c0d8bc9b264bb693793b92992 100644 (file)
@@ -325,9 +325,10 @@ static void __drop_largest_extent(struct extent_tree *et,
 }
 
 /* return true, if inode page is changed */
-static bool __f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext)
+static void __f2fs_init_extent_tree(struct inode *inode, struct page *ipage)
 {
        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+       struct f2fs_extent *i_ext = ipage ? &F2FS_INODE(ipage)->i_ext : NULL;
        struct extent_tree *et;
        struct extent_node *en;
        struct extent_info ei;
@@ -335,16 +336,18 @@ static bool __f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_e
        if (!f2fs_may_extent_tree(inode)) {
                /* drop largest extent */
                if (i_ext && i_ext->len) {
+                       f2fs_wait_on_page_writeback(ipage, NODE, true, true);
                        i_ext->len = 0;
-                       return true;
+                       set_page_dirty(ipage);
+                       return;
                }
-               return false;
+               return;
        }
 
        et = __grab_extent_tree(inode);
 
        if (!i_ext || !i_ext->len)
-               return false;
+               return;
 
        get_extent_info(&ei, i_ext);
 
@@ -360,17 +363,14 @@ static bool __f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_e
        }
 out:
        write_unlock(&et->lock);
-       return false;
 }
 
-bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext)
+void f2fs_init_extent_tree(struct inode *inode, struct page *ipage)
 {
-       bool ret =  __f2fs_init_extent_tree(inode, i_ext);
+       __f2fs_init_extent_tree(inode, ipage);
 
        if (!F2FS_I(inode)->extent_tree)
                set_inode_flag(inode, FI_NO_EXTENT);
-
-       return ret;
 }
 
 static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
index b35a50f4953c585a1c517b1f54bdd696156d2ded..16322ea5b46305b2e1010cec827f0aa19b4ffb73 100644 (file)
@@ -402,12 +402,8 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal,
 }
 
 /*
- * ioctl commands
+ * f2fs-specific ioctl commands
  */
-#define F2FS_IOC_GETFLAGS              FS_IOC_GETFLAGS
-#define F2FS_IOC_SETFLAGS              FS_IOC_SETFLAGS
-#define F2FS_IOC_GETVERSION            FS_IOC_GETVERSION
-
 #define F2FS_IOCTL_MAGIC               0xf5
 #define F2FS_IOC_START_ATOMIC_WRITE    _IO(F2FS_IOCTL_MAGIC, 1)
 #define F2FS_IOC_COMMIT_ATOMIC_WRITE   _IO(F2FS_IOCTL_MAGIC, 2)
@@ -434,13 +430,8 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal,
                                        _IOR(F2FS_IOCTL_MAGIC, 18, __u64)
 #define F2FS_IOC_RESERVE_COMPRESS_BLOCKS                               \
                                        _IOR(F2FS_IOCTL_MAGIC, 19, __u64)
-
-#define F2FS_IOC_GET_VOLUME_NAME       FS_IOC_GETFSLABEL
-#define F2FS_IOC_SET_VOLUME_NAME       FS_IOC_SETFSLABEL
-
-#define F2FS_IOC_SET_ENCRYPTION_POLICY FS_IOC_SET_ENCRYPTION_POLICY
-#define F2FS_IOC_GET_ENCRYPTION_POLICY FS_IOC_GET_ENCRYPTION_POLICY
-#define F2FS_IOC_GET_ENCRYPTION_PWSALT FS_IOC_GET_ENCRYPTION_PWSALT
+#define F2FS_IOC_SEC_TRIM_FILE         _IOW(F2FS_IOCTL_MAGIC, 20,      \
+                                               struct f2fs_sectrim_range)
 
 /*
  * should be same as XFS_IOC_GOINGDOWN.
@@ -453,17 +444,12 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal,
 #define F2FS_GOING_DOWN_METAFLUSH      0x3     /* going down with meta flush */
 #define F2FS_GOING_DOWN_NEED_FSCK      0x4     /* going down to trigger fsck */
 
-#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
 /*
- * ioctl commands in 32 bit emulation
+ * Flags used by F2FS_IOC_SEC_TRIM_FILE
  */
-#define F2FS_IOC32_GETFLAGS            FS_IOC32_GETFLAGS
-#define F2FS_IOC32_SETFLAGS            FS_IOC32_SETFLAGS
-#define F2FS_IOC32_GETVERSION          FS_IOC32_GETVERSION
-#endif
-
-#define F2FS_IOC_FSGETXATTR            FS_IOC_FSGETXATTR
-#define F2FS_IOC_FSSETXATTR            FS_IOC_FSSETXATTR
+#define F2FS_TRIM_FILE_DISCARD         0x1     /* send discard command */
+#define F2FS_TRIM_FILE_ZEROOUT         0x2     /* zero out */
+#define F2FS_TRIM_FILE_MASK            0x3
 
 struct f2fs_gc_range {
        u32 sync;
@@ -488,6 +474,12 @@ struct f2fs_flush_device {
        u32 segments;           /* # of segments to flush */
 };
 
+struct f2fs_sectrim_range {
+       u64 start;
+       u64 len;
+       u64 flags;
+};
+
 /* for inline stuff */
 #define DEF_INLINE_RESERVED_SIZE       1
 static inline int get_extra_isize(struct inode *inode);
@@ -794,6 +786,7 @@ struct f2fs_inode_info {
        struct list_head inmem_pages;   /* inmemory pages managed by f2fs */
        struct task_struct *inmem_task; /* store inmemory task */
        struct mutex inmem_lock;        /* lock for inmemory pages */
+       pgoff_t ra_offset;              /* ongoing readahead offset */
        struct extent_tree *extent_tree;        /* cached extent_tree entry */
 
        /* avoid racing between foreground op and gc */
@@ -1267,7 +1260,8 @@ enum {
        GC_NORMAL,
        GC_IDLE_CB,
        GC_IDLE_GREEDY,
-       GC_URGENT,
+       GC_URGENT_HIGH,
+       GC_URGENT_LOW,
 };
 
 enum {
@@ -1313,6 +1307,14 @@ enum fsync_mode {
 #define IS_DUMMY_WRITTEN_PAGE(page)                    \
                (page_private(page) == (unsigned long)DUMMY_WRITTEN_PAGE)
 
+#ifdef CONFIG_F2FS_IO_TRACE
+#define IS_IO_TRACED_PAGE(page)                        \
+               (page_private(page) > 0 &&              \
+                page_private(page) < (unsigned long)PID_MAX_LIMIT)
+#else
+#define IS_IO_TRACED_PAGE(page) (0)
+#endif
+
 #ifdef CONFIG_FS_ENCRYPTION
 #define DUMMY_ENCRYPTION_ENABLED(sbi) \
        (unlikely(F2FS_OPTION(sbi).dummy_enc_ctx.ctx != NULL))
@@ -1438,7 +1440,7 @@ struct f2fs_sb_info {
        unsigned long last_time[MAX_TIME];      /* to store time in jiffies */
        long interval_time[MAX_TIME];           /* to store thresholds */
 
-       struct inode_management im[MAX_INO_ENTRY];      /* manage inode cache */
+       struct inode_management im[MAX_INO_ENTRY];      /* manage inode cache */
 
        spinlock_t fsync_node_lock;             /* for node entry lock */
        struct list_head fsync_node_list;       /* node list head */
@@ -1516,8 +1518,9 @@ struct f2fs_sb_info {
        unsigned int cur_victim_sec;            /* current victim section num */
        unsigned int gc_mode;                   /* current GC state */
        unsigned int next_victim_seg[2];        /* next segment in victim section */
+
        /* for skip statistic */
-       unsigned int atomic_files;              /* # of opened atomic file */
+       unsigned int atomic_files;              /* # of opened atomic file */
        unsigned long long skipped_atomic_files[2];     /* FG_GC and BG_GC */
        unsigned long long skipped_gc_rwsem;            /* FG_GC only */
 
@@ -2456,7 +2459,7 @@ static inline void *f2fs_kmem_cache_alloc(struct kmem_cache *cachep,
 
 static inline bool is_idle(struct f2fs_sb_info *sbi, int type)
 {
-       if (sbi->gc_mode == GC_URGENT)
+       if (sbi->gc_mode == GC_URGENT_HIGH)
                return true;
 
        if (get_pages(sbi, F2FS_RD_DATA) || get_pages(sbi, F2FS_RD_NODE) ||
@@ -2474,6 +2477,10 @@ static inline bool is_idle(struct f2fs_sb_info *sbi, int type)
                        atomic_read(&SM_I(sbi)->fcc_info->queued_flush))
                return false;
 
+       if (sbi->gc_mode == GC_URGENT_LOW &&
+                       (type == DISCARD_TIME || type == GC_TIME))
+               return true;
+
        return f2fs_time_over(sbi, type);
 }
 
@@ -2649,7 +2656,7 @@ static inline void __mark_inode_dirty_flag(struct inode *inode,
 
 static inline void set_inode_flag(struct inode *inode, int flag)
 {
-       test_and_set_bit(flag, F2FS_I(inode)->flags);
+       set_bit(flag, F2FS_I(inode)->flags);
        __mark_inode_dirty_flag(inode, flag, true);
 }
 
@@ -2660,7 +2667,7 @@ static inline int is_inode_flag_set(struct inode *inode, int flag)
 
 static inline void clear_inode_flag(struct inode *inode, int flag)
 {
-       test_and_clear_bit(flag, F2FS_I(inode)->flags);
+       clear_bit(flag, F2FS_I(inode)->flags);
        __mark_inode_dirty_flag(inode, flag, false);
 }
 
@@ -3275,7 +3282,7 @@ void f2fs_ra_node_page(struct f2fs_sb_info *sbi, nid_t nid);
 struct page *f2fs_get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid);
 struct page *f2fs_get_node_page_ra(struct page *parent, int start);
 int f2fs_move_node_page(struct page *node_page, int gc_type);
-int f2fs_flush_inline_data(struct f2fs_sb_info *sbi);
+void f2fs_flush_inline_data(struct f2fs_sb_info *sbi);
 int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
                        struct writeback_control *wbc, bool atomic,
                        unsigned int *seq_id);
@@ -3287,7 +3294,7 @@ bool f2fs_alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid);
 void f2fs_alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid);
 void f2fs_alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid);
 int f2fs_try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink);
-void f2fs_recover_inline_xattr(struct inode *inode, struct page *page);
+int f2fs_recover_inline_xattr(struct inode *inode, struct page *page);
 int f2fs_recover_xattr_data(struct inode *inode, struct page *page);
 int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page);
 int f2fs_restore_node_summary(struct f2fs_sb_info *sbi,
@@ -3325,9 +3332,10 @@ block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi);
 int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable);
 void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi);
 int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra);
-void allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
+void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
                                        unsigned int start, unsigned int end);
-void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi, int type);
+void f2fs_allocate_new_segment(struct f2fs_sb_info *sbi, int type);
+void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi);
 int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range);
 bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi,
                                        struct cp_control *cpc);
@@ -3350,7 +3358,7 @@ void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
 void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
                        block_t old_blkaddr, block_t *new_blkaddr,
                        struct f2fs_summary *sum, int type,
-                       struct f2fs_io_info *fio, bool add_list);
+                       struct f2fs_io_info *fio);
 void f2fs_wait_on_page_writeback(struct page *page,
                        enum page_type type, bool ordered, bool locked);
 void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr);
@@ -3448,7 +3456,7 @@ struct page *f2fs_get_lock_data_page(struct inode *inode, pgoff_t index,
 struct page *f2fs_get_new_data_page(struct inode *inode,
                        struct page *ipage, pgoff_t index, bool new_i_size);
 int f2fs_do_write_data_page(struct f2fs_io_info *fio);
-void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock);
+void f2fs_do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock);
 int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
                        int create, int flag);
 int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
@@ -3536,6 +3544,9 @@ struct f2fs_stat_info {
        int curseg[NR_CURSEG_TYPE];
        int cursec[NR_CURSEG_TYPE];
        int curzone[NR_CURSEG_TYPE];
+       unsigned int dirty_seg[NR_CURSEG_TYPE];
+       unsigned int full_seg[NR_CURSEG_TYPE];
+       unsigned int valid_blks[NR_CURSEG_TYPE];
 
        unsigned int meta_count[META_MAX];
        unsigned int segment_count[2];
@@ -3750,7 +3761,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page);
 int f2fs_convert_inline_inode(struct inode *inode);
 int f2fs_try_convert_inline_dir(struct inode *dir, struct dentry *dentry);
 int f2fs_write_inline_data(struct inode *inode, struct page *page);
-bool f2fs_recover_inline_data(struct inode *inode, struct page *npage);
+int f2fs_recover_inline_data(struct inode *inode, struct page *npage);
 struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir,
                                        const struct f2fs_filename *fname,
                                        struct page **res_page);
@@ -3795,7 +3806,7 @@ struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root_cached *root,
 bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi,
                                                struct rb_root_cached *root);
 unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink);
-bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext);
+void f2fs_init_extent_tree(struct inode *inode, struct page *ipage);
 void f2fs_drop_extent_tree(struct inode *inode);
 unsigned int f2fs_destroy_extent_node(struct inode *inode);
 void f2fs_destroy_extent_tree(struct inode *inode);
index 3268f8dd59bbaf7f203d04cf755bef42994eb92a..8a422400e824d71ac3a47e473ab38b83fc002e05 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/uuid.h>
 #include <linux/file.h>
 #include <linux/nls.h>
+#include <linux/sched/signal.h>
 
 #include "f2fs.h"
 #include "node.h"
@@ -105,11 +106,11 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf)
 
        if (need_alloc) {
                /* block allocation */
-               __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true);
+               f2fs_do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true);
                set_new_dnode(&dn, inode, NULL, NULL, 0);
                err = f2fs_get_block(&dn, page->index);
                f2fs_put_dnode(&dn);
-               __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false);
+               f2fs_do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false);
        }
 
 #ifdef CONFIG_F2FS_FS_COMPRESSION
@@ -1373,8 +1374,6 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
        truncate_pagecache(inode, offset);
 
        new_size = i_size_read(inode) - len;
-       truncate_pagecache(inode, new_size);
-
        ret = f2fs_truncate_blocks(inode, new_size, true);
        up_write(&F2FS_I(inode)->i_mmap_sem);
        if (!ret)
@@ -1660,7 +1659,7 @@ next_alloc:
                map.m_seg_type = CURSEG_COLD_DATA_PINNED;
 
                f2fs_lock_op(sbi);
-               f2fs_allocate_new_segments(sbi, CURSEG_COLD_DATA);
+               f2fs_allocate_new_segment(sbi, CURSEG_COLD_DATA);
                f2fs_unlock_op(sbi);
 
                err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_DIO);
@@ -2527,6 +2526,11 @@ do_more:
        }
 
        ret = f2fs_gc(sbi, range.sync, true, GET_SEGNO(sbi, range.start));
+       if (ret) {
+               if (ret == -EBUSY)
+                       ret = -EAGAIN;
+               goto out;
+       }
        range.start += BLKS_PER_SEC(sbi);
        if (range.start <= end)
                goto do_more;
@@ -3359,7 +3363,7 @@ static int f2fs_ioc_measure_verity(struct file *filp, unsigned long arg)
        return fsverity_ioctl_measure(filp, (void __user *)arg);
 }
 
-static int f2fs_get_volume_name(struct file *filp, unsigned long arg)
+static int f2fs_ioc_getfslabel(struct file *filp, unsigned long arg)
 {
        struct inode *inode = file_inode(filp);
        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
@@ -3385,7 +3389,7 @@ static int f2fs_get_volume_name(struct file *filp, unsigned long arg)
        return err;
 }
 
-static int f2fs_set_volume_name(struct file *filp, unsigned long arg)
+static int f2fs_ioc_setfslabel(struct file *filp, unsigned long arg)
 {
        struct inode *inode = file_inode(filp);
        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
@@ -3531,14 +3535,14 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg)
        if (ret)
                goto out;
 
-       if (!F2FS_I(inode)->i_compr_blocks)
-               goto out;
-
        F2FS_I(inode)->i_flags |= F2FS_IMMUTABLE_FL;
        f2fs_set_inode_flags(inode);
        inode->i_ctime = current_time(inode);
        f2fs_mark_inode_dirty_sync(inode, true);
 
+       if (!F2FS_I(inode)->i_compr_blocks)
+               goto out;
+
        down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
        down_write(&F2FS_I(inode)->i_mmap_sem);
 
@@ -3756,6 +3760,193 @@ out:
        return ret;
 }
 
+static int f2fs_secure_erase(struct block_device *bdev, struct inode *inode,
+               pgoff_t off, block_t block, block_t len, u32 flags)
+{
+       struct request_queue *q = bdev_get_queue(bdev);
+       sector_t sector = SECTOR_FROM_BLOCK(block);
+       sector_t nr_sects = SECTOR_FROM_BLOCK(len);
+       int ret = 0;
+
+       if (!q)
+               return -ENXIO;
+
+       if (flags & F2FS_TRIM_FILE_DISCARD)
+               ret = blkdev_issue_discard(bdev, sector, nr_sects, GFP_NOFS,
+                                               blk_queue_secure_erase(q) ?
+                                               BLKDEV_DISCARD_SECURE : 0);
+
+       if (!ret && (flags & F2FS_TRIM_FILE_ZEROOUT)) {
+               if (IS_ENCRYPTED(inode))
+                       ret = fscrypt_zeroout_range(inode, off, block, len);
+               else
+                       ret = blkdev_issue_zeroout(bdev, sector, nr_sects,
+                                       GFP_NOFS, 0);
+       }
+
+       return ret;
+}
+
+static int f2fs_sec_trim_file(struct file *filp, unsigned long arg)
+{
+       struct inode *inode = file_inode(filp);
+       struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+       struct address_space *mapping = inode->i_mapping;
+       struct block_device *prev_bdev = NULL;
+       struct f2fs_sectrim_range range;
+       pgoff_t index, pg_end, prev_index = 0;
+       block_t prev_block = 0, len = 0;
+       loff_t end_addr;
+       bool to_end = false;
+       int ret = 0;
+
+       if (!(filp->f_mode & FMODE_WRITE))
+               return -EBADF;
+
+       if (copy_from_user(&range, (struct f2fs_sectrim_range __user *)arg,
+                               sizeof(range)))
+               return -EFAULT;
+
+       if (range.flags == 0 || (range.flags & ~F2FS_TRIM_FILE_MASK) ||
+                       !S_ISREG(inode->i_mode))
+               return -EINVAL;
+
+       if (((range.flags & F2FS_TRIM_FILE_DISCARD) &&
+                       !f2fs_hw_support_discard(sbi)) ||
+                       ((range.flags & F2FS_TRIM_FILE_ZEROOUT) &&
+                        IS_ENCRYPTED(inode) && f2fs_is_multi_device(sbi)))
+               return -EOPNOTSUPP;
+
+       file_start_write(filp);
+       inode_lock(inode);
+
+       if (f2fs_is_atomic_file(inode) || f2fs_compressed_file(inode) ||
+                       range.start >= inode->i_size) {
+               ret = -EINVAL;
+               goto err;
+       }
+
+       if (range.len == 0)
+               goto err;
+
+       if (inode->i_size - range.start > range.len) {
+               end_addr = range.start + range.len;
+       } else {
+               end_addr = range.len == (u64)-1 ?
+                       sbi->sb->s_maxbytes : inode->i_size;
+               to_end = true;
+       }
+
+       if (!IS_ALIGNED(range.start, F2FS_BLKSIZE) ||
+                       (!to_end && !IS_ALIGNED(end_addr, F2FS_BLKSIZE))) {
+               ret = -EINVAL;
+               goto err;
+       }
+
+       index = F2FS_BYTES_TO_BLK(range.start);
+       pg_end = DIV_ROUND_UP(end_addr, F2FS_BLKSIZE);
+
+       ret = f2fs_convert_inline_inode(inode);
+       if (ret)
+               goto err;
+
+       down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+       down_write(&F2FS_I(inode)->i_mmap_sem);
+
+       ret = filemap_write_and_wait_range(mapping, range.start,
+                       to_end ? LLONG_MAX : end_addr - 1);
+       if (ret)
+               goto out;
+
+       truncate_inode_pages_range(mapping, range.start,
+                       to_end ? -1 : end_addr - 1);
+
+       while (index < pg_end) {
+               struct dnode_of_data dn;
+               pgoff_t end_offset, count;
+               int i;
+
+               set_new_dnode(&dn, inode, NULL, NULL, 0);
+               ret = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
+               if (ret) {
+                       if (ret == -ENOENT) {
+                               index = f2fs_get_next_page_offset(&dn, index);
+                               continue;
+                       }
+                       goto out;
+               }
+
+               end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
+               count = min(end_offset - dn.ofs_in_node, pg_end - index);
+               for (i = 0; i < count; i++, index++, dn.ofs_in_node++) {
+                       struct block_device *cur_bdev;
+                       block_t blkaddr = f2fs_data_blkaddr(&dn);
+
+                       if (!__is_valid_data_blkaddr(blkaddr))
+                               continue;
+
+                       if (!f2fs_is_valid_blkaddr(sbi, blkaddr,
+                                               DATA_GENERIC_ENHANCE)) {
+                               ret = -EFSCORRUPTED;
+                               f2fs_put_dnode(&dn);
+                               goto out;
+                       }
+
+                       cur_bdev = f2fs_target_device(sbi, blkaddr, NULL);
+                       if (f2fs_is_multi_device(sbi)) {
+                               int di = f2fs_target_device_index(sbi, blkaddr);
+
+                               blkaddr -= FDEV(di).start_blk;
+                       }
+
+                       if (len) {
+                               if (prev_bdev == cur_bdev &&
+                                               index == prev_index + len &&
+                                               blkaddr == prev_block + len) {
+                                       len++;
+                               } else {
+                                       ret = f2fs_secure_erase(prev_bdev,
+                                               inode, prev_index, prev_block,
+                                               len, range.flags);
+                                       if (ret) {
+                                               f2fs_put_dnode(&dn);
+                                               goto out;
+                                       }
+
+                                       len = 0;
+                               }
+                       }
+
+                       if (!len) {
+                               prev_bdev = cur_bdev;
+                               prev_index = index;
+                               prev_block = blkaddr;
+                               len = 1;
+                       }
+               }
+
+               f2fs_put_dnode(&dn);
+
+               if (fatal_signal_pending(current)) {
+                       ret = -EINTR;
+                       goto out;
+               }
+               cond_resched();
+       }
+
+       if (len)
+               ret = f2fs_secure_erase(prev_bdev, inode, prev_index,
+                               prev_block, len, range.flags);
+out:
+       up_write(&F2FS_I(inode)->i_mmap_sem);
+       up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+err:
+       inode_unlock(inode);
+       file_end_write(filp);
+
+       return ret;
+}
+
 long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
        if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(filp)))))
@@ -3764,11 +3955,11 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                return -ENOSPC;
 
        switch (cmd) {
-       case F2FS_IOC_GETFLAGS:
+       case FS_IOC_GETFLAGS:
                return f2fs_ioc_getflags(filp, arg);
-       case F2FS_IOC_SETFLAGS:
+       case FS_IOC_SETFLAGS:
                return f2fs_ioc_setflags(filp, arg);
-       case F2FS_IOC_GETVERSION:
+       case FS_IOC_GETVERSION:
                return f2fs_ioc_getversion(filp, arg);
        case F2FS_IOC_START_ATOMIC_WRITE:
                return f2fs_ioc_start_atomic_write(filp);
@@ -3784,11 +3975,11 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                return f2fs_ioc_shutdown(filp, arg);
        case FITRIM:
                return f2fs_ioc_fitrim(filp, arg);
-       case F2FS_IOC_SET_ENCRYPTION_POLICY:
+       case FS_IOC_SET_ENCRYPTION_POLICY:
                return f2fs_ioc_set_encryption_policy(filp, arg);
-       case F2FS_IOC_GET_ENCRYPTION_POLICY:
+       case FS_IOC_GET_ENCRYPTION_POLICY:
                return f2fs_ioc_get_encryption_policy(filp, arg);
-       case F2FS_IOC_GET_ENCRYPTION_PWSALT:
+       case FS_IOC_GET_ENCRYPTION_PWSALT:
                return f2fs_ioc_get_encryption_pwsalt(filp, arg);
        case FS_IOC_GET_ENCRYPTION_POLICY_EX:
                return f2fs_ioc_get_encryption_policy_ex(filp, arg);
@@ -3816,9 +4007,9 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                return f2fs_ioc_flush_device(filp, arg);
        case F2FS_IOC_GET_FEATURES:
                return f2fs_ioc_get_features(filp, arg);
-       case F2FS_IOC_FSGETXATTR:
+       case FS_IOC_FSGETXATTR:
                return f2fs_ioc_fsgetxattr(filp, arg);
-       case F2FS_IOC_FSSETXATTR:
+       case FS_IOC_FSSETXATTR:
                return f2fs_ioc_fssetxattr(filp, arg);
        case F2FS_IOC_GET_PIN_FILE:
                return f2fs_ioc_get_pin_file(filp, arg);
@@ -3832,16 +4023,18 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                return f2fs_ioc_enable_verity(filp, arg);
        case FS_IOC_MEASURE_VERITY:
                return f2fs_ioc_measure_verity(filp, arg);
-       case F2FS_IOC_GET_VOLUME_NAME:
-               return f2fs_get_volume_name(filp, arg);
-       case F2FS_IOC_SET_VOLUME_NAME:
-               return f2fs_set_volume_name(filp, arg);
+       case FS_IOC_GETFSLABEL:
+               return f2fs_ioc_getfslabel(filp, arg);
+       case FS_IOC_SETFSLABEL:
+               return f2fs_ioc_setfslabel(filp, arg);
        case F2FS_IOC_GET_COMPRESS_BLOCKS:
                return f2fs_get_compress_blocks(filp, arg);
        case F2FS_IOC_RELEASE_COMPRESS_BLOCKS:
                return f2fs_release_compress_blocks(filp, arg);
        case F2FS_IOC_RESERVE_COMPRESS_BLOCKS:
                return f2fs_reserve_compress_blocks(filp, arg);
+       case F2FS_IOC_SEC_TRIM_FILE:
+               return f2fs_sec_trim_file(filp, arg);
        default:
                return -ENOTTY;
        }
@@ -3966,14 +4159,14 @@ out:
 long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
        switch (cmd) {
-       case F2FS_IOC32_GETFLAGS:
-               cmd = F2FS_IOC_GETFLAGS;
+       case FS_IOC32_GETFLAGS:
+               cmd = FS_IOC_GETFLAGS;
                break;
-       case F2FS_IOC32_SETFLAGS:
-               cmd = F2FS_IOC_SETFLAGS;
+       case FS_IOC32_SETFLAGS:
+               cmd = FS_IOC_SETFLAGS;
                break;
-       case F2FS_IOC32_GETVERSION:
-               cmd = F2FS_IOC_GETVERSION;
+       case FS_IOC32_GETVERSION:
+               cmd = FS_IOC_GETVERSION;
                break;
        case F2FS_IOC_START_ATOMIC_WRITE:
        case F2FS_IOC_COMMIT_ATOMIC_WRITE:
@@ -3982,9 +4175,9 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
        case F2FS_IOC_ABORT_VOLATILE_WRITE:
        case F2FS_IOC_SHUTDOWN:
        case FITRIM:
-       case F2FS_IOC_SET_ENCRYPTION_POLICY:
-       case F2FS_IOC_GET_ENCRYPTION_PWSALT:
-       case F2FS_IOC_GET_ENCRYPTION_POLICY:
+       case FS_IOC_SET_ENCRYPTION_POLICY:
+       case FS_IOC_GET_ENCRYPTION_PWSALT:
+       case FS_IOC_GET_ENCRYPTION_POLICY:
        case FS_IOC_GET_ENCRYPTION_POLICY_EX:
        case FS_IOC_ADD_ENCRYPTION_KEY:
        case FS_IOC_REMOVE_ENCRYPTION_KEY:
@@ -3998,19 +4191,20 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
        case F2FS_IOC_MOVE_RANGE:
        case F2FS_IOC_FLUSH_DEVICE:
        case F2FS_IOC_GET_FEATURES:
-       case F2FS_IOC_FSGETXATTR:
-       case F2FS_IOC_FSSETXATTR:
+       case FS_IOC_FSGETXATTR:
+       case FS_IOC_FSSETXATTR:
        case F2FS_IOC_GET_PIN_FILE:
        case F2FS_IOC_SET_PIN_FILE:
        case F2FS_IOC_PRECACHE_EXTENTS:
        case F2FS_IOC_RESIZE_FS:
        case FS_IOC_ENABLE_VERITY:
        case FS_IOC_MEASURE_VERITY:
-       case F2FS_IOC_GET_VOLUME_NAME:
-       case F2FS_IOC_SET_VOLUME_NAME:
+       case FS_IOC_GETFSLABEL:
+       case FS_IOC_SETFSLABEL:
        case F2FS_IOC_GET_COMPRESS_BLOCKS:
        case F2FS_IOC_RELEASE_COMPRESS_BLOCKS:
        case F2FS_IOC_RESERVE_COMPRESS_BLOCKS:
+       case F2FS_IOC_SEC_TRIM_FILE:
                break;
        default:
                return -ENOIOCTLCMD;
index 5b95d5a146eb6cde844c7b3b61c44a3b97382b79..11b4adde9baf0252361b47dbcaf06a0520ad1d70 100644 (file)
@@ -21,6 +21,9 @@
 #include "gc.h"
 #include <trace/events/f2fs.h>
 
+static unsigned int count_bits(const unsigned long *addr,
+                               unsigned int offset, unsigned int len);
+
 static int gc_thread_func(void *data)
 {
        struct f2fs_sb_info *sbi = data;
@@ -79,7 +82,7 @@ static int gc_thread_func(void *data)
                 * invalidated soon after by user update or deletion.
                 * So, I'd like to wait some time to collect dirty segments.
                 */
-               if (sbi->gc_mode == GC_URGENT) {
+               if (sbi->gc_mode == GC_URGENT_HIGH) {
                        wait_ms = gc_th->urgent_sleep_time;
                        down_write(&sbi->gc_lock);
                        goto do_gc;
@@ -173,7 +176,7 @@ static int select_gc_type(struct f2fs_sb_info *sbi, int gc_type)
                gc_mode = GC_CB;
                break;
        case GC_IDLE_GREEDY:
-       case GC_URGENT:
+       case GC_URGENT_HIGH:
                gc_mode = GC_GREEDY;
                break;
        }
@@ -187,14 +190,20 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
 
        if (p->alloc_mode == SSR) {
                p->gc_mode = GC_GREEDY;
-               p->dirty_segmap = dirty_i->dirty_segmap[type];
+               p->dirty_bitmap = dirty_i->dirty_segmap[type];
                p->max_search = dirty_i->nr_dirty[type];
                p->ofs_unit = 1;
        } else {
                p->gc_mode = select_gc_type(sbi, gc_type);
-               p->dirty_segmap = dirty_i->dirty_segmap[DIRTY];
-               p->max_search = dirty_i->nr_dirty[DIRTY];
                p->ofs_unit = sbi->segs_per_sec;
+               if (__is_large_section(sbi)) {
+                       p->dirty_bitmap = dirty_i->dirty_secmap;
+                       p->max_search = count_bits(p->dirty_bitmap,
+                                               0, MAIN_SECS(sbi));
+               } else {
+                       p->dirty_bitmap = dirty_i->dirty_segmap[DIRTY];
+                       p->max_search = dirty_i->nr_dirty[DIRTY];
+               }
        }
 
        /*
@@ -202,7 +211,7 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
         * foreground GC and urgent GC cases.
         */
        if (gc_type != FG_GC &&
-                       (sbi->gc_mode != GC_URGENT) &&
+                       (sbi->gc_mode != GC_URGENT_HIGH) &&
                        p->max_search > sbi->max_victim_search)
                p->max_search = sbi->max_victim_search;
 
@@ -321,6 +330,7 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
        unsigned int secno, last_victim;
        unsigned int last_segment;
        unsigned int nsearched = 0;
+       int ret = 0;
 
        mutex_lock(&dirty_i->seglist_lock);
        last_segment = MAIN_SECS(sbi) * sbi->segs_per_sec;
@@ -332,12 +342,19 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
        p.min_cost = get_max_cost(sbi, &p);
 
        if (*result != NULL_SEGNO) {
-               if (get_valid_blocks(sbi, *result, false) &&
-                       !sec_usage_check(sbi, GET_SEC_FROM_SEG(sbi, *result)))
+               if (!get_valid_blocks(sbi, *result, false)) {
+                       ret = -ENODATA;
+                       goto out;
+               }
+
+               if (sec_usage_check(sbi, GET_SEC_FROM_SEG(sbi, *result)))
+                       ret = -EBUSY;
+               else
                        p.min_segno = *result;
                goto out;
        }
 
+       ret = -ENODATA;
        if (p.max_search == 0)
                goto out;
 
@@ -365,10 +382,14 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
        }
 
        while (1) {
-               unsigned long cost;
-               unsigned int segno;
-
-               segno = find_next_bit(p.dirty_segmap, last_segment, p.offset);
+               unsigned long cost, *dirty_bitmap;
+               unsigned int unit_no, segno;
+
+               dirty_bitmap = p.dirty_bitmap;
+               unit_no = find_next_bit(dirty_bitmap,
+                               last_segment / p.ofs_unit,
+                               p.offset / p.ofs_unit);
+               segno = unit_no * p.ofs_unit;
                if (segno >= last_segment) {
                        if (sm->last_victim[p.gc_mode]) {
                                last_segment =
@@ -381,14 +402,7 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
                }
 
                p.offset = segno + p.ofs_unit;
-               if (p.ofs_unit > 1) {
-                       p.offset -= segno % p.ofs_unit;
-                       nsearched += count_bits(p.dirty_segmap,
-                                               p.offset - p.ofs_unit,
-                                               p.ofs_unit);
-               } else {
-                       nsearched++;
-               }
+               nsearched++;
 
 #ifdef CONFIG_F2FS_CHECK_FS
                /*
@@ -421,9 +435,10 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
 next:
                if (nsearched >= p.max_search) {
                        if (!sm->last_victim[p.gc_mode] && segno <= last_victim)
-                               sm->last_victim[p.gc_mode] = last_victim + 1;
+                               sm->last_victim[p.gc_mode] =
+                                       last_victim + p.ofs_unit;
                        else
-                               sm->last_victim[p.gc_mode] = segno + 1;
+                               sm->last_victim[p.gc_mode] = segno + p.ofs_unit;
                        sm->last_victim[p.gc_mode] %=
                                (MAIN_SECS(sbi) * sbi->segs_per_sec);
                        break;
@@ -440,6 +455,7 @@ got_result:
                        else
                                set_bit(secno, dirty_i->victim_secmap);
                }
+               ret = 0;
 
        }
 out:
@@ -449,7 +465,7 @@ out:
                                prefree_segments(sbi), free_segments(sbi));
        mutex_unlock(&dirty_i->seglist_lock);
 
-       return (p.min_segno == NULL_SEGNO) ? 0 : 1;
+       return ret;
 }
 
 static const struct victim_selection default_v_ops = {
@@ -833,8 +849,10 @@ static int move_data_block(struct inode *inode, block_t bidx,
 
        mpage = f2fs_grab_cache_page(META_MAPPING(fio.sbi),
                                        fio.old_blkaddr, false);
-       if (!mpage)
+       if (!mpage) {
+               err = -ENOMEM;
                goto up_out;
+       }
 
        fio.encrypted_page = mpage;
 
@@ -859,7 +877,7 @@ static int move_data_block(struct inode *inode, block_t bidx,
        }
 
        f2fs_allocate_data_block(fio.sbi, NULL, fio.old_blkaddr, &newaddr,
-                                       &sum, CURSEG_COLD_DATA, NULL, false);
+                                       &sum, CURSEG_COLD_DATA, NULL);
 
        fio.encrypted_page = f2fs_pagecache_get_page(META_MAPPING(fio.sbi),
                                newaddr, FGP_LOCK | FGP_CREAT, GFP_NOFS);
@@ -1333,10 +1351,9 @@ gc_more:
                ret = -EINVAL;
                goto stop;
        }
-       if (!__get_victim(sbi, &segno, gc_type)) {
-               ret = -ENODATA;
+       ret = __get_victim(sbi, &segno, gc_type);
+       if (ret)
                goto stop;
-       }
 
        seg_freed = do_garbage_collect(sbi, segno, &gc_list, gc_type);
        if (gc_type == FG_GC && seg_freed == sbi->segs_per_sec)
@@ -1434,7 +1451,7 @@ static int free_segment_range(struct f2fs_sb_info *sbi,
 
        /* Move out cursegs from the target range */
        for (type = CURSEG_HOT_DATA; type < NR_CURSEG_TYPE; type++)
-               allocate_segment_for_resize(sbi, type, start, end);
+               f2fs_allocate_segment_for_resize(sbi, type, start, end);
 
        /* do GC to move out valid blocks in the range */
        for (segno = start; segno <= end; segno += sbi->segs_per_sec) {
index dbade310dc792e4eec714a95764f5a3a987b007f..102df444f623c0643a0be15a3eb6971a79e69935 100644 (file)
@@ -12,6 +12,7 @@
 
 #include "f2fs.h"
 #include "node.h"
+#include <trace/events/f2fs.h>
 
 bool f2fs_may_inline_data(struct inode *inode)
 {
@@ -253,7 +254,7 @@ int f2fs_write_inline_data(struct inode *inode, struct page *page)
        return 0;
 }
 
-bool f2fs_recover_inline_data(struct inode *inode, struct page *npage)
+int f2fs_recover_inline_data(struct inode *inode, struct page *npage)
 {
        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
        struct f2fs_inode *ri = NULL;
@@ -275,7 +276,8 @@ bool f2fs_recover_inline_data(struct inode *inode, struct page *npage)
                        ri && (ri->i_inline & F2FS_INLINE_DATA)) {
 process_inline:
                ipage = f2fs_get_node_page(sbi, inode->i_ino);
-               f2fs_bug_on(sbi, IS_ERR(ipage));
+               if (IS_ERR(ipage))
+                       return PTR_ERR(ipage);
 
                f2fs_wait_on_page_writeback(ipage, NODE, true, true);
 
@@ -288,21 +290,25 @@ process_inline:
 
                set_page_dirty(ipage);
                f2fs_put_page(ipage, 1);
-               return true;
+               return 1;
        }
 
        if (f2fs_has_inline_data(inode)) {
                ipage = f2fs_get_node_page(sbi, inode->i_ino);
-               f2fs_bug_on(sbi, IS_ERR(ipage));
+               if (IS_ERR(ipage))
+                       return PTR_ERR(ipage);
                f2fs_truncate_inline_inode(inode, ipage, 0);
                clear_inode_flag(inode, FI_INLINE_DATA);
                f2fs_put_page(ipage, 1);
        } else if (ri && (ri->i_inline & F2FS_INLINE_DATA)) {
-               if (f2fs_truncate_blocks(inode, 0, false))
-                       return false;
+               int ret;
+
+               ret = f2fs_truncate_blocks(inode, 0, false);
+               if (ret)
+                       return ret;
                goto process_inline;
        }
-       return false;
+       return 0;
 }
 
 struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir,
@@ -776,6 +782,7 @@ int f2fs_inline_data_fiemap(struct inode *inode,
        byteaddr += (char *)inline_data_addr(inode, ipage) -
                                        (char *)F2FS_INODE(ipage);
        err = fiemap_fill_next_extent(fieinfo, start, byteaddr, ilen, flags);
+       trace_f2fs_fiemap(inode, start, byteaddr, ilen, flags, err);
 out:
        f2fs_put_page(ipage, 1);
        return err;
index 44582a4db513eda7d1bf24923605be0672505318..66969ae852b978a9a8435d11d4ba5e8a88218c50 100644 (file)
@@ -367,8 +367,7 @@ static int do_read_inode(struct inode *inode)
        fi->i_pino = le32_to_cpu(ri->i_pino);
        fi->i_dir_level = ri->i_dir_level;
 
-       if (f2fs_init_extent_tree(inode, &ri->i_ext))
-               set_page_dirty(node_page);
+       f2fs_init_extent_tree(inode, node_page);
 
        get_inline_info(inode, ri);
 
@@ -402,6 +401,7 @@ static int do_read_inode(struct inode *inode)
 
        /* try to recover cold bit for non-dir inode */
        if (!S_ISDIR(inode->i_mode) && !is_cold_node(node_page)) {
+               f2fs_wait_on_page_writeback(node_page, NODE, true, true);
                set_cold_node(node_page, false);
                set_page_dirty(node_page);
        }
index e94e02c6580ac20d10fd65acae749db863533291..84e4bbc1a64de0bf4f84c12c65b9467d7b42d378 100644 (file)
@@ -569,15 +569,17 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
 
        trace_f2fs_unlink_enter(dir, dentry);
 
-       if (unlikely(f2fs_cp_error(sbi)))
-               return -EIO;
+       if (unlikely(f2fs_cp_error(sbi))) {
+               err = -EIO;
+               goto fail;
+       }
 
        err = dquot_initialize(dir);
        if (err)
-               return err;
+               goto fail;
        err = dquot_initialize(inode);
        if (err)
-               return err;
+               goto fail;
 
        de = f2fs_find_entry(dir, &dentry->d_name, &page);
        if (!de) {
@@ -600,7 +602,7 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
        /* VFS negative dentries are incompatible with Encoding and
         * Case-insensitiveness. Eventually we'll want avoid
         * invalidating the dentries here, alongside with returning the
-        * negative dentries at f2fs_lookup(), when it is  better
+        * negative dentries at f2fs_lookup(), when it is better
         * supported by the VFS for the CI case.
         */
        if (IS_CASEFOLDED(dir))
@@ -1285,7 +1287,7 @@ static const char *f2fs_encrypted_get_link(struct dentry *dentry,
 }
 
 const struct inode_operations f2fs_encrypted_symlink_inode_operations = {
-       .get_link       = f2fs_encrypted_get_link,
+       .get_link       = f2fs_encrypted_get_link,
        .getattr        = f2fs_getattr,
        .setattr        = f2fs_setattr,
        .listxattr      = f2fs_listxattr,
@@ -1311,7 +1313,7 @@ const struct inode_operations f2fs_dir_inode_operations = {
 };
 
 const struct inode_operations f2fs_symlink_inode_operations = {
-       .get_link       = f2fs_get_link,
+       .get_link       = f2fs_get_link,
        .getattr        = f2fs_getattr,
        .setattr        = f2fs_setattr,
        .listxattr      = f2fs_listxattr,
@@ -1319,7 +1321,7 @@ const struct inode_operations f2fs_symlink_inode_operations = {
 
 const struct inode_operations f2fs_special_inode_operations = {
        .getattr        = f2fs_getattr,
-       .setattr        = f2fs_setattr,
+       .setattr        = f2fs_setattr,
        .get_acl        = f2fs_get_acl,
        .set_acl        = f2fs_set_acl,
        .listxattr      = f2fs_listxattr,
index 03e24df1c84f5c458f8284fd32c1f9f795093c16..9bbaa2614679f84d41062601bdbf0428b8fca21a 100644 (file)
@@ -1041,8 +1041,10 @@ int f2fs_truncate_inode_blocks(struct inode *inode, pgoff_t from)
        trace_f2fs_truncate_inode_blocks_enter(inode, from);
 
        level = get_node_path(inode, from, offset, noffset);
-       if (level < 0)
+       if (level < 0) {
+               trace_f2fs_truncate_inode_blocks_exit(inode, level);
                return level;
+       }
 
        page = f2fs_get_node_page(sbi, inode->i_ino);
        if (IS_ERR(page)) {
@@ -1726,7 +1728,7 @@ continue_unlock:
                                        set_dentry_mark(page,
                                                f2fs_need_dentry_mark(sbi, ino));
                                }
-                               /*  may be written by other thread */
+                               /* may be written by other thread */
                                if (!PageDirty(page))
                                        set_page_dirty(page);
                        }
@@ -1814,12 +1816,11 @@ static bool flush_dirty_inode(struct page *page)
        return true;
 }
 
-int f2fs_flush_inline_data(struct f2fs_sb_info *sbi)
+void f2fs_flush_inline_data(struct f2fs_sb_info *sbi)
 {
        pgoff_t index = 0;
        struct pagevec pvec;
        int nr_pages;
-       int ret = 0;
 
        pagevec_init(&pvec);
 
@@ -1858,7 +1859,6 @@ continue_unlock:
                pagevec_release(&pvec);
                cond_resched();
        }
-       return ret;
 }
 
 int f2fs_sync_node_pages(struct f2fs_sb_info *sbi,
@@ -1924,8 +1924,12 @@ continue_unlock:
                                goto continue_unlock;
                        }
 
-                       /* flush inline_data, if it's async context. */
-                       if (do_balance && is_inline_node(page)) {
+                       /* flush inline_data/inode, if it's async context. */
+                       if (!do_balance)
+                               goto write_node;
+
+                       /* flush inline_data */
+                       if (is_inline_node(page)) {
                                clear_inline_node(page);
                                unlock_page(page);
                                flush_inline_data(sbi, ino_of_node(page));
@@ -1938,7 +1942,7 @@ continue_unlock:
                                if (flush_dirty_inode(page))
                                        goto lock_node;
                        }
-
+write_node:
                        f2fs_wait_on_page_writeback(page, NODE, true, true);
 
                        if (!clear_page_dirty_for_io(page))
@@ -2097,7 +2101,7 @@ const struct address_space_operations f2fs_node_aops = {
        .invalidatepage = f2fs_invalidate_page,
        .releasepage    = f2fs_release_page,
 #ifdef CONFIG_MIGRATION
-       .migratepage    = f2fs_migrate_page,
+       .migratepage    = f2fs_migrate_page,
 #endif
 };
 
@@ -2108,7 +2112,7 @@ static struct free_nid *__lookup_free_nid_list(struct f2fs_nm_info *nm_i,
 }
 
 static int __insert_free_nid(struct f2fs_sb_info *sbi,
-                       struct free_nid *i, enum nid_state state)
+                               struct free_nid *i)
 {
        struct f2fs_nm_info *nm_i = NM_I(sbi);
 
@@ -2116,10 +2120,8 @@ static int __insert_free_nid(struct f2fs_sb_info *sbi,
        if (err)
                return err;
 
-       f2fs_bug_on(sbi, state != i->state);
-       nm_i->nid_cnt[state]++;
-       if (state == FREE_NID)
-               list_add_tail(&i->list, &nm_i->free_nid_list);
+       nm_i->nid_cnt[FREE_NID]++;
+       list_add_tail(&i->list, &nm_i->free_nid_list);
        return 0;
 }
 
@@ -2241,7 +2243,7 @@ static bool add_free_nid(struct f2fs_sb_info *sbi,
                }
        }
        ret = true;
-       err = __insert_free_nid(sbi, i, FREE_NID);
+       err = __insert_free_nid(sbi, i);
 err_out:
        if (update) {
                update_free_nid_bitmap(sbi, nid, ret, build);
@@ -2572,7 +2574,7 @@ int f2fs_try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink)
        return nr - nr_shrink;
 }
 
-void f2fs_recover_inline_xattr(struct inode *inode, struct page *page)
+int f2fs_recover_inline_xattr(struct inode *inode, struct page *page)
 {
        void *src_addr, *dst_addr;
        size_t inline_size;
@@ -2580,7 +2582,8 @@ void f2fs_recover_inline_xattr(struct inode *inode, struct page *page)
        struct f2fs_inode *ri;
 
        ipage = f2fs_get_node_page(F2FS_I_SB(inode), inode->i_ino);
-       f2fs_bug_on(F2FS_I_SB(inode), IS_ERR(ipage));
+       if (IS_ERR(ipage))
+               return PTR_ERR(ipage);
 
        ri = F2FS_INODE(page);
        if (ri->i_inline & F2FS_INLINE_XATTR) {
@@ -2599,6 +2602,7 @@ void f2fs_recover_inline_xattr(struct inode *inode, struct page *page)
 update_inode:
        f2fs_update_inode(inode, ipage);
        f2fs_put_page(ipage, 1);
+       return 0;
 }
 
 int f2fs_recover_xattr_data(struct inode *inode, struct page *page)
index ae5310f02e7ff1b7fa20e8b1819449aff560e870..4f12ade6410a133c8479c0e215a468da28a6b3f2 100644 (file)
@@ -544,7 +544,9 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
 
        /* step 1: recover xattr */
        if (IS_INODE(page)) {
-               f2fs_recover_inline_xattr(inode, page);
+               err = f2fs_recover_inline_xattr(inode, page);
+               if (err)
+                       goto out;
        } else if (f2fs_has_xattr_block(ofs_of_node(page))) {
                err = f2fs_recover_xattr_data(inode, page);
                if (!err)
@@ -553,8 +555,12 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
        }
 
        /* step 2: recover inline data */
-       if (f2fs_recover_inline_data(inode, page))
+       err = f2fs_recover_inline_data(inode, page);
+       if (err) {
+               if (err == 1)
+                       err = 0;
                goto out;
+       }
 
        /* step 3: recover data indices */
        start = f2fs_start_bidx_of_node(ofs_of_node(page), inode);
@@ -742,7 +748,7 @@ next:
                f2fs_put_page(page, 1);
        }
        if (!err)
-               f2fs_allocate_new_segments(sbi, NO_CHECK_TYPE);
+               f2fs_allocate_new_segments(sbi);
        return err;
 }
 
index 196f315035118eb2b767c3eded3c728078f7267a..a65d357f89a9fad7ec74d7716aea268e702c3f4a 100644 (file)
@@ -174,7 +174,7 @@ bool f2fs_need_SSR(struct f2fs_sb_info *sbi)
 
        if (f2fs_lfs_mode(sbi))
                return false;
-       if (sbi->gc_mode == GC_URGENT)
+       if (sbi->gc_mode == GC_URGENT_HIGH)
                return true;
        if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
                return true;
@@ -796,6 +796,18 @@ static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
                }
                if (!test_and_set_bit(segno, dirty_i->dirty_segmap[t]))
                        dirty_i->nr_dirty[t]++;
+
+               if (__is_large_section(sbi)) {
+                       unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
+                       unsigned short valid_blocks =
+                               get_valid_blocks(sbi, segno, true);
+
+                       f2fs_bug_on(sbi, unlikely(!valid_blocks ||
+                                       valid_blocks == BLKS_PER_SEC(sbi)));
+
+                       if (!IS_CURSEC(sbi, secno))
+                               set_bit(secno, dirty_i->dirty_secmap);
+               }
        }
 }
 
@@ -803,6 +815,7 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
                enum dirty_type dirty_type)
 {
        struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+       unsigned short valid_blocks;
 
        if (test_and_clear_bit(segno, dirty_i->dirty_segmap[dirty_type]))
                dirty_i->nr_dirty[dirty_type]--;
@@ -814,13 +827,26 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
                if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t]))
                        dirty_i->nr_dirty[t]--;
 
-               if (get_valid_blocks(sbi, segno, true) == 0) {
+               valid_blocks = get_valid_blocks(sbi, segno, true);
+               if (valid_blocks == 0) {
                        clear_bit(GET_SEC_FROM_SEG(sbi, segno),
                                                dirty_i->victim_secmap);
 #ifdef CONFIG_F2FS_CHECK_FS
                        clear_bit(segno, SIT_I(sbi)->invalid_segmap);
 #endif
                }
+               if (__is_large_section(sbi)) {
+                       unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
+
+                       if (!valid_blocks ||
+                                       valid_blocks == BLKS_PER_SEC(sbi)) {
+                               clear_bit(secno, dirty_i->dirty_secmap);
+                               return;
+                       }
+
+                       if (!IS_CURSEC(sbi, secno))
+                               set_bit(secno, dirty_i->dirty_secmap);
+               }
        }
 }
 
@@ -1733,7 +1759,7 @@ static int issue_discard_thread(void *data)
                        continue;
                }
 
-               if (sbi->gc_mode == GC_URGENT)
+               if (sbi->gc_mode == GC_URGENT_HIGH)
                        __init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE, 1);
 
                sb_start_intwrite(sbi->sb);
@@ -2140,7 +2166,7 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
        new_vblocks = se->valid_blocks + del;
        offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
 
-       f2fs_bug_on(sbi, (new_vblocks >> (sizeof(unsigned short) << 3) ||
+       f2fs_bug_on(sbi, (new_vblocks < 0 ||
                                (new_vblocks > sbi->blocks_per_seg)));
 
        se->valid_blocks = new_vblocks;
@@ -2605,7 +2631,7 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
        bool reversed = false;
 
        /* f2fs_need_SSR() already forces to do this */
-       if (v_ops->get_victim(sbi, &segno, BG_GC, type, SSR)) {
+       if (!v_ops->get_victim(sbi, &segno, BG_GC, type, SSR)) {
                curseg->next_segno = segno;
                return 1;
        }
@@ -2632,7 +2658,7 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
        for (; cnt-- > 0; reversed ? i-- : i++) {
                if (i == type)
                        continue;
-               if (v_ops->get_victim(sbi, &segno, BG_GC, i, SSR)) {
+               if (!v_ops->get_victim(sbi, &segno, BG_GC, i, SSR)) {
                        curseg->next_segno = segno;
                        return 1;
                }
@@ -2674,7 +2700,7 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
        stat_inc_seg_type(sbi, curseg);
 }
 
-void allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
+void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
                                        unsigned int start, unsigned int end)
 {
        struct curseg_info *curseg = CURSEG_I(sbi, type);
@@ -2707,28 +2733,35 @@ unlock:
        up_read(&SM_I(sbi)->curseg_lock);
 }
 
-void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi, int type)
+static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type)
 {
-       struct curseg_info *curseg;
+       struct curseg_info *curseg = CURSEG_I(sbi, type);
        unsigned int old_segno;
-       int i;
 
-       down_write(&SIT_I(sbi)->sentry_lock);
+       if (!curseg->next_blkoff &&
+               !get_valid_blocks(sbi, curseg->segno, false) &&
+               !get_ckpt_valid_blocks(sbi, curseg->segno))
+               return;
 
-       for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
-               if (type != NO_CHECK_TYPE && i != type)
-                       continue;
+       old_segno = curseg->segno;
+       SIT_I(sbi)->s_ops->allocate_segment(sbi, type, true);
+       locate_dirty_segment(sbi, old_segno);
+}
 
-               curseg = CURSEG_I(sbi, i);
-               if (type == NO_CHECK_TYPE || curseg->next_blkoff ||
-                               get_valid_blocks(sbi, curseg->segno, false) ||
-                               get_ckpt_valid_blocks(sbi, curseg->segno)) {
-                       old_segno = curseg->segno;
-                       SIT_I(sbi)->s_ops->allocate_segment(sbi, i, true);
-                       locate_dirty_segment(sbi, old_segno);
-               }
-       }
+void f2fs_allocate_new_segment(struct f2fs_sb_info *sbi, int type)
+{
+       down_write(&SIT_I(sbi)->sentry_lock);
+       __allocate_new_segment(sbi, type);
+       up_write(&SIT_I(sbi)->sentry_lock);
+}
 
+void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi)
+{
+       int i;
+
+       down_write(&SIT_I(sbi)->sentry_lock);
+       for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++)
+               __allocate_new_segment(sbi, i);
        up_write(&SIT_I(sbi)->sentry_lock);
 }
 
@@ -3089,7 +3122,7 @@ static int __get_segment_type(struct f2fs_io_info *fio)
 void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
                block_t old_blkaddr, block_t *new_blkaddr,
                struct f2fs_summary *sum, int type,
-               struct f2fs_io_info *fio, bool add_list)
+               struct f2fs_io_info *fio)
 {
        struct sit_info *sit_i = SIT_I(sbi);
        struct curseg_info *curseg = CURSEG_I(sbi, type);
@@ -3107,14 +3140,6 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
                type = CURSEG_COLD_DATA;
        }
 
-       /*
-        * We need to wait for node_write to avoid block allocation during
-        * checkpoint. This can only happen to quota writes which can cause
-        * the below discard race condition.
-        */
-       if (IS_DATASEG(type))
-               down_write(&sbi->node_write);
-
        down_read(&SM_I(sbi)->curseg_lock);
 
        mutex_lock(&curseg->curseg_mutex);
@@ -3165,7 +3190,7 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
        if (F2FS_IO_ALIGNED(sbi))
                fio->retry = false;
 
-       if (add_list) {
+       if (fio) {
                struct f2fs_bio_info *io;
 
                INIT_LIST_HEAD(&fio->list);
@@ -3180,9 +3205,6 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
 
        up_read(&SM_I(sbi)->curseg_lock);
 
-       if (IS_DATASEG(type))
-               up_write(&sbi->node_write);
-
        if (put_pin_sem)
                up_read(&sbi->pin_sem);
 }
@@ -3217,7 +3239,7 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
                down_read(&fio->sbi->io_order_lock);
 reallocate:
        f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr,
-                       &fio->new_blkaddr, sum, type, fio, true);
+                       &fio->new_blkaddr, sum, type, fio);
        if (GET_SEGNO(fio->sbi, fio->old_blkaddr) != NULL_SEGNO)
                invalidate_mapping_pages(META_MAPPING(fio->sbi),
                                        fio->old_blkaddr, fio->old_blkaddr);
@@ -4293,8 +4315,9 @@ static void init_dirty_segmap(struct f2fs_sb_info *sbi)
 {
        struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
        struct free_segmap_info *free_i = FREE_I(sbi);
-       unsigned int segno = 0, offset = 0;
+       unsigned int segno = 0, offset = 0, secno;
        unsigned short valid_blocks;
+       unsigned short blks_per_sec = BLKS_PER_SEC(sbi);
 
        while (1) {
                /* find dirty segment based on free segmap */
@@ -4313,6 +4336,22 @@ static void init_dirty_segmap(struct f2fs_sb_info *sbi)
                __locate_dirty_segment(sbi, segno, DIRTY);
                mutex_unlock(&dirty_i->seglist_lock);
        }
+
+       if (!__is_large_section(sbi))
+               return;
+
+       mutex_lock(&dirty_i->seglist_lock);
+       for (segno = 0; segno < MAIN_SECS(sbi); segno += blks_per_sec) {
+               valid_blocks = get_valid_blocks(sbi, segno, true);
+               secno = GET_SEC_FROM_SEG(sbi, segno);
+
+               if (!valid_blocks || valid_blocks == blks_per_sec)
+                       continue;
+               if (IS_CURSEC(sbi, secno))
+                       continue;
+               set_bit(secno, dirty_i->dirty_secmap);
+       }
+       mutex_unlock(&dirty_i->seglist_lock);
 }
 
 static int init_victim_secmap(struct f2fs_sb_info *sbi)
@@ -4349,6 +4388,14 @@ static int build_dirty_segmap(struct f2fs_sb_info *sbi)
                        return -ENOMEM;
        }
 
+       if (__is_large_section(sbi)) {
+               bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
+               dirty_i->dirty_secmap = f2fs_kvzalloc(sbi,
+                                               bitmap_size, GFP_KERNEL);
+               if (!dirty_i->dirty_secmap)
+                       return -ENOMEM;
+       }
+
        init_dirty_segmap(sbi);
        return init_victim_secmap(sbi);
 }
@@ -4775,6 +4822,12 @@ static void destroy_dirty_segmap(struct f2fs_sb_info *sbi)
        for (i = 0; i < NR_DIRTY_TYPE; i++)
                discard_dirty_segmap(sbi, i);
 
+       if (__is_large_section(sbi)) {
+               mutex_lock(&dirty_i->seglist_lock);
+               kvfree(dirty_i->dirty_secmap);
+               mutex_unlock(&dirty_i->seglist_lock);
+       }
+
        destroy_victim_secmap(sbi);
        SM_I(sbi)->dirty_info = NULL;
        kvfree(dirty_i);
index cba16cca518958d987eb8e414fbcb181f942c6a2..752b177073b29b0b2b7dd0f43c11ee7a3af99e20 100644 (file)
@@ -166,8 +166,11 @@ enum {
 struct victim_sel_policy {
        int alloc_mode;                 /* LFS or SSR */
        int gc_mode;                    /* GC_CB or GC_GREEDY */
-       unsigned long *dirty_segmap;    /* dirty segment bitmap */
-       unsigned int max_search;        /* maximum # of segments to search */
+       unsigned long *dirty_bitmap;    /* dirty segment/section bitmap */
+       unsigned int max_search;        /*
+                                        * maximum # of segments/sections
+                                        * to search
+                                        */
        unsigned int offset;            /* last scanned bitmap offset */
        unsigned int ofs_unit;          /* bitmap search unit */
        unsigned int min_cost;          /* minimum cost */
@@ -184,7 +187,7 @@ struct seg_entry {
        unsigned char *cur_valid_map_mir;       /* mirror of current valid bitmap */
 #endif
        /*
-        * # of valid blocks and the validity bitmap stored in the the last
+        * # of valid blocks and the validity bitmap stored in the last
         * checkpoint pack. This information is used by the SSR mode.
         */
        unsigned char *ckpt_valid_map;  /* validity bitmap of blocks last cp */
@@ -266,6 +269,7 @@ enum dirty_type {
 struct dirty_seglist_info {
        const struct victim_selection *v_ops;   /* victim selction operation */
        unsigned long *dirty_segmap[NR_DIRTY_TYPE];
+       unsigned long *dirty_secmap;
        struct mutex seglist_lock;              /* lock for segment bitmaps */
        int nr_dirty[NR_DIRTY_TYPE];            /* # of dirty segments */
        unsigned long *victim_secmap;           /* background GC victims */
index 23c49c313fb6820e68e80e1aa2946ec78f34c733..dfa072fa80815af4dfe7683c461bb68878d0fd5a 100644 (file)
@@ -350,7 +350,7 @@ static int f2fs_set_qf_name(struct super_block *sb, int qtype,
        set_opt(sbi, QUOTA);
        return 0;
 errout:
-       kvfree(qname);
+       kfree(qname);
        return ret;
 }
 
@@ -362,7 +362,7 @@ static int f2fs_clear_qf_name(struct super_block *sb, int qtype)
                f2fs_err(sbi, "Cannot change journaled quota options when quota turned on");
                return -EINVAL;
        }
-       kvfree(F2FS_OPTION(sbi).s_qf_names[qtype]);
+       kfree(F2FS_OPTION(sbi).s_qf_names[qtype]);
        F2FS_OPTION(sbi).s_qf_names[qtype] = NULL;
        return 0;
 }
@@ -462,9 +462,12 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
 {
        struct f2fs_sb_info *sbi = F2FS_SB(sb);
        substring_t args[MAX_OPT_ARGS];
+#ifdef CONFIG_F2FS_FS_COMPRESSION
        unsigned char (*ext)[F2FS_EXTENSION_LEN];
+       int ext_cnt;
+#endif
        char *p, *name;
-       int arg = 0, ext_cnt;
+       int arg = 0;
        kuid_t uid;
        kgid_t gid;
        int ret;
@@ -496,10 +499,10 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
                        } else if (!strcmp(name, "sync")) {
                                F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_SYNC;
                        } else {
-                               kvfree(name);
+                               kfree(name);
                                return -EINVAL;
                        }
-                       kvfree(name);
+                       kfree(name);
                        break;
                case Opt_disable_roll_forward:
                        set_opt(sbi, DISABLE_ROLL_FORWARD);
@@ -656,17 +659,17 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
                        if (!strcmp(name, "adaptive")) {
                                if (f2fs_sb_has_blkzoned(sbi)) {
                                        f2fs_warn(sbi, "adaptive mode is not allowed with zoned block device feature");
-                                       kvfree(name);
+                                       kfree(name);
                                        return -EINVAL;
                                }
                                F2FS_OPTION(sbi).fs_mode = FS_MODE_ADAPTIVE;
                        } else if (!strcmp(name, "lfs")) {
                                F2FS_OPTION(sbi).fs_mode = FS_MODE_LFS;
                        } else {
-                               kvfree(name);
+                               kfree(name);
                                return -EINVAL;
                        }
-                       kvfree(name);
+                       kfree(name);
                        break;
                case Opt_io_size_bits:
                        if (args->from && match_int(args, &arg))
@@ -792,10 +795,10 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
                        } else if (!strcmp(name, "fs-based")) {
                                F2FS_OPTION(sbi).whint_mode = WHINT_MODE_FS;
                        } else {
-                               kvfree(name);
+                               kfree(name);
                                return -EINVAL;
                        }
-                       kvfree(name);
+                       kfree(name);
                        break;
                case Opt_alloc:
                        name = match_strdup(&args[0]);
@@ -807,10 +810,10 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
                        } else if (!strcmp(name, "reuse")) {
                                F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_REUSE;
                        } else {
-                               kvfree(name);
+                               kfree(name);
                                return -EINVAL;
                        }
-                       kvfree(name);
+                       kfree(name);
                        break;
                case Opt_fsync:
                        name = match_strdup(&args[0]);
@@ -824,10 +827,10 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
                                F2FS_OPTION(sbi).fsync_mode =
                                                        FSYNC_MODE_NOBARRIER;
                        } else {
-                               kvfree(name);
+                               kfree(name);
                                return -EINVAL;
                        }
-                       kvfree(name);
+                       kfree(name);
                        break;
                case Opt_test_dummy_encryption:
                        ret = f2fs_set_test_dummy_encryption(sb, p, &args[0],
@@ -862,6 +865,7 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
                case Opt_checkpoint_enable:
                        clear_opt(sbi, DISABLE_CHECKPOINT);
                        break;
+#ifdef CONFIG_F2FS_FS_COMPRESSION
                case Opt_compress_algorithm:
                        if (!f2fs_sb_has_compression(sbi)) {
                                f2fs_err(sbi, "Compression feature if off");
@@ -927,6 +931,13 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
                        F2FS_OPTION(sbi).compress_ext_cnt++;
                        kfree(name);
                        break;
+#else
+               case Opt_compress_algorithm:
+               case Opt_compress_log_size:
+               case Opt_compress_extension:
+                       f2fs_info(sbi, "compression options not supported");
+                       break;
+#endif
                default:
                        f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value",
                                 p);
@@ -1024,6 +1035,8 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
        /* Will be used by directory only */
        fi->i_dir_level = F2FS_SB(sb)->dir_level;
 
+       fi->ra_offset = -1;
+
        return &fi->vfs_inode;
 }
 
@@ -1182,6 +1195,9 @@ static void f2fs_put_super(struct super_block *sb)
        int i;
        bool dropped;
 
+       /* unregister procfs/sysfs entries in advance to avoid race case */
+       f2fs_unregister_sysfs(sbi);
+
        f2fs_quota_off_umount(sb);
 
        /* prevent remaining shrinker jobs */
@@ -1247,19 +1263,17 @@ static void f2fs_put_super(struct super_block *sb)
 
        kvfree(sbi->ckpt);
 
-       f2fs_unregister_sysfs(sbi);
-
        sb->s_fs_info = NULL;
        if (sbi->s_chksum_driver)
                crypto_free_shash(sbi->s_chksum_driver);
-       kvfree(sbi->raw_super);
+       kfree(sbi->raw_super);
 
        destroy_device_list(sbi);
        f2fs_destroy_xattr_caches(sbi);
        mempool_destroy(sbi->write_io_dummy);
 #ifdef CONFIG_QUOTA
        for (i = 0; i < MAXQUOTAS; i++)
-               kvfree(F2FS_OPTION(sbi).s_qf_names[i]);
+               kfree(F2FS_OPTION(sbi).s_qf_names[i]);
 #endif
        fscrypt_free_dummy_context(&F2FS_OPTION(sbi).dummy_enc_ctx);
        destroy_percpu_info(sbi);
@@ -1268,7 +1282,7 @@ static void f2fs_put_super(struct super_block *sb)
 #ifdef CONFIG_UNICODE
        utf8_unload(sbi->s_encoding);
 #endif
-       kvfree(sbi);
+       kfree(sbi);
 }
 
 int f2fs_sync_fs(struct super_block *sb, int sync)
@@ -1617,7 +1631,9 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
        else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_NOBARRIER)
                seq_printf(seq, ",fsync_mode=%s", "nobarrier");
 
+#ifdef CONFIG_F2FS_FS_COMPRESSION
        f2fs_show_compress_options(seq, sbi->sb);
+#endif
        return 0;
 }
 
@@ -1768,7 +1784,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
                                GFP_KERNEL);
                        if (!org_mount_opt.s_qf_names[i]) {
                                for (j = 0; j < i; j++)
-                                       kvfree(org_mount_opt.s_qf_names[j]);
+                                       kfree(org_mount_opt.s_qf_names[j]);
                                return -ENOMEM;
                        }
                } else {
@@ -1893,7 +1909,7 @@ skip:
 #ifdef CONFIG_QUOTA
        /* Release old quota file names */
        for (i = 0; i < MAXQUOTAS; i++)
-               kvfree(org_mount_opt.s_qf_names[i]);
+               kfree(org_mount_opt.s_qf_names[i]);
 #endif
        /* Update the POSIXACL Flag */
        sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
@@ -1914,7 +1930,7 @@ restore_opts:
 #ifdef CONFIG_QUOTA
        F2FS_OPTION(sbi).s_jquota_fmt = org_mount_opt.s_jquota_fmt;
        for (i = 0; i < MAXQUOTAS; i++) {
-               kvfree(F2FS_OPTION(sbi).s_qf_names[i]);
+               kfree(F2FS_OPTION(sbi).s_qf_names[i]);
                F2FS_OPTION(sbi).s_qf_names[i] = org_mount_opt.s_qf_names[i];
        }
 #endif
@@ -3172,7 +3188,7 @@ static int read_raw_super_block(struct f2fs_sb_info *sbi,
 
        /* No valid superblock */
        if (!*raw_super)
-               kvfree(super);
+               kfree(super);
        else
                err = 0;
 
@@ -3846,16 +3862,16 @@ free_bio_info:
 free_options:
 #ifdef CONFIG_QUOTA
        for (i = 0; i < MAXQUOTAS; i++)
-               kvfree(F2FS_OPTION(sbi).s_qf_names[i]);
+               kfree(F2FS_OPTION(sbi).s_qf_names[i]);
 #endif
        fscrypt_free_dummy_context(&F2FS_OPTION(sbi).dummy_enc_ctx);
        kvfree(options);
 free_sb_buf:
-       kvfree(raw_super);
+       kfree(raw_super);
 free_sbi:
        if (sbi->s_chksum_driver)
                crypto_free_shash(sbi->s_chksum_driver);
-       kvfree(sbi);
+       kfree(sbi);
 
        /* give only one another chance */
        if (retry_cnt > 0 && skip_recovery) {
index e877c59b9fdb4eed2ba28a9dbf2114452304da69..88ed9969cc86258972de0484d3ab193aebc1b6fb 100644 (file)
@@ -27,7 +27,7 @@ enum {
        NM_INFO,        /* struct f2fs_nm_info */
        F2FS_SBI,       /* struct f2fs_sb_info */
 #ifdef CONFIG_F2FS_STAT_FS
-       STAT_INFO,      /* struct f2fs_stat_info */
+       STAT_INFO,      /* struct f2fs_stat_info */
 #endif
 #ifdef CONFIG_F2FS_FAULT_INJECTION
        FAULT_INFO_RATE,        /* struct f2fs_fault_info */
@@ -223,6 +223,13 @@ static ssize_t avg_vblocks_show(struct f2fs_attr *a,
 }
 #endif
 
+static ssize_t main_blkaddr_show(struct f2fs_attr *a,
+                               struct f2fs_sb_info *sbi, char *buf)
+{
+       return snprintf(buf, PAGE_SIZE, "%llu\n",
+                       (unsigned long long)MAIN_BLKADDR(sbi));
+}
+
 static ssize_t f2fs_sbi_show(struct f2fs_attr *a,
                        struct f2fs_sb_info *sbi, char *buf)
 {
@@ -350,16 +357,20 @@ out:
                return -EINVAL;
 
        if (!strcmp(a->attr.name, "gc_urgent")) {
-               if (t >= 1) {
-                       sbi->gc_mode = GC_URGENT;
+               if (t == 0) {
+                       sbi->gc_mode = GC_NORMAL;
+               } else if (t == 1) {
+                       sbi->gc_mode = GC_URGENT_HIGH;
                        if (sbi->gc_thread) {
                                sbi->gc_thread->gc_wake = 1;
                                wake_up_interruptible_all(
                                        &sbi->gc_thread->gc_wait_queue_head);
                                wake_up_discard_thread(sbi, true);
                        }
+               } else if (t == 2) {
+                       sbi->gc_mode = GC_URGENT_LOW;
                } else {
-                       sbi->gc_mode = GC_NORMAL;
+                       return -EINVAL;
                }
                return count;
        }
@@ -522,7 +533,6 @@ F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time);
 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_idle, gc_mode);
 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_urgent, gc_mode);
 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments);
-F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, main_blkaddr, main_blkaddr);
 F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_small_discards, max_discards);
 F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, discard_granularity, discard_granularity);
 F2FS_RW_ATTR(RESERVED_BLOCKS, f2fs_sb_info, reserved_blocks, reserved_blocks);
@@ -565,6 +575,7 @@ F2FS_GENERAL_RO_ATTR(current_reserved_blocks);
 F2FS_GENERAL_RO_ATTR(unusable);
 F2FS_GENERAL_RO_ATTR(encoding);
 F2FS_GENERAL_RO_ATTR(mounted_time_sec);
+F2FS_GENERAL_RO_ATTR(main_blkaddr);
 #ifdef CONFIG_F2FS_STAT_FS
 F2FS_STAT_ATTR(STAT_INFO, f2fs_stat_info, cp_foreground_calls, cp_count);
 F2FS_STAT_ATTR(STAT_INFO, f2fs_stat_info, cp_background_calls, bg_cp_count);
@@ -706,7 +717,7 @@ static struct kobj_type f2fs_ktype = {
 };
 
 static struct kset f2fs_kset = {
-       .kobj   = {.ktype = &f2fs_ktype},
+       .kobj   = {.ktype = &f2fs_ktype},
 };
 
 static struct kobj_type f2fs_feat_ktype = {
index 865c9fb774fbeb5209824bd68fb46d1544dfb703..9eb0dba851e8858ae6138ab7ce16cd898b416c3e 100644 (file)
@@ -29,6 +29,8 @@
 #include "f2fs.h"
 #include "xattr.h"
 
+#define F2FS_VERIFY_VER        (1)
+
 static inline loff_t f2fs_verity_metadata_pos(const struct inode *inode)
 {
        return round_up(inode->i_size, 65536);
@@ -152,7 +154,7 @@ static int f2fs_end_enable_verity(struct file *filp, const void *desc,
        struct inode *inode = file_inode(filp);
        u64 desc_pos = f2fs_verity_metadata_pos(inode) + merkle_tree_size;
        struct fsverity_descriptor_location dloc = {
-               .version = cpu_to_le32(1),
+               .version = cpu_to_le32(F2FS_VERIFY_VER),
                .size = cpu_to_le32(desc_size),
                .pos = cpu_to_le64(desc_pos),
        };
@@ -199,7 +201,7 @@ static int f2fs_get_verity_descriptor(struct inode *inode, void *buf,
                            F2FS_XATTR_NAME_VERITY, &dloc, sizeof(dloc), NULL);
        if (res < 0 && res != -ERANGE)
                return res;
-       if (res != sizeof(dloc) || dloc.version != cpu_to_le32(1)) {
+       if (res != sizeof(dloc) || dloc.version != cpu_to_le32(F2FS_VERIFY_VER)) {
                f2fs_warn(F2FS_I_SB(inode), "unknown verity xattr format");
                return -EINVAL;
        }
index 4f6582ef7ee331b7b62c3c0a1c77e4c732f0d1a7..1b0736ce0918b52110ffe00c39903f6fe75de6cd 100644 (file)
@@ -175,8 +175,8 @@ const struct xattr_handler f2fs_xattr_trusted_handler = {
 const struct xattr_handler f2fs_xattr_advise_handler = {
        .name   = F2FS_SYSTEM_ADVISE_NAME,
        .flags  = F2FS_XATTR_INDEX_ADVISE,
-       .get    = f2fs_xattr_advise_get,
-       .set    = f2fs_xattr_advise_set,
+       .get    = f2fs_xattr_advise_get,
+       .set    = f2fs_xattr_advise_set,
 };
 
 const struct xattr_handler f2fs_xattr_security_handler = {
index 8639ab962a713e9c7f185c70d943dd0a276e860e..8a1c1311acac4795a2e73b3e14c9e22e3d79a477 100644 (file)
@@ -1891,6 +1891,69 @@ TRACE_EVENT(f2fs_iostat,
                __entry->fs_cdrio, __entry->fs_nrio, __entry->fs_mrio)
 );
 
+TRACE_EVENT(f2fs_bmap,
+
+       TP_PROTO(struct inode *inode, sector_t lblock, sector_t pblock),
+
+       TP_ARGS(inode, lblock, pblock),
+
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(ino_t, ino)
+               __field(sector_t, lblock)
+               __field(sector_t, pblock)
+       ),
+
+       TP_fast_assign(
+               __entry->dev            = inode->i_sb->s_dev;
+               __entry->ino            = inode->i_ino;
+               __entry->lblock         = lblock;
+               __entry->pblock         = pblock;
+       ),
+
+       TP_printk("dev = (%d,%d), ino = %lu, lblock:%lld, pblock:%lld",
+               show_dev_ino(__entry),
+               (unsigned long long)__entry->lblock,
+               (unsigned long long)__entry->pblock)
+);
+
+TRACE_EVENT(f2fs_fiemap,
+
+       TP_PROTO(struct inode *inode, sector_t lblock, sector_t pblock,
+               unsigned long long len, unsigned int flags, int ret),
+
+       TP_ARGS(inode, lblock, pblock, len, flags, ret),
+
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(ino_t, ino)
+               __field(sector_t, lblock)
+               __field(sector_t, pblock)
+               __field(unsigned long long, len)
+               __field(unsigned int, flags)
+               __field(int, ret)
+       ),
+
+       TP_fast_assign(
+               __entry->dev            = inode->i_sb->s_dev;
+               __entry->ino            = inode->i_ino;
+               __entry->lblock         = lblock;
+               __entry->pblock         = pblock;
+               __entry->len            = len;
+               __entry->flags          = flags;
+               __entry->ret            = ret;
+       ),
+
+       TP_printk("dev = (%d,%d), ino = %lu, lblock:%lld, pblock:%lld, "
+               "len:%llu, flags:%u, ret:%d",
+               show_dev_ino(__entry),
+               (unsigned long long)__entry->lblock,
+               (unsigned long long)__entry->pblock,
+               __entry->len,
+               __entry->flags,
+               __entry->ret)
+);
+
 #endif /* _TRACE_F2FS_H */
 
  /* This part must be outside protection */