f2fs: clean up commit_inmem_pages()
[linux-2.6-block.git] / fs / f2fs / segment.c
index 5854cc4e1d67c01045f41b99403aaaefbfc27894..1dc7a3d207864f70dbc571366148465c85eaa751 100644 (file)
@@ -328,7 +328,7 @@ void drop_inmem_page(struct inode *inode, struct page *page)
                        break;
        }
 
-       f2fs_bug_on(sbi, !cur || cur->page != page);
+       f2fs_bug_on(sbi, list_empty(head) || cur->page != page);
        list_del(&cur->list);
        mutex_unlock(&fi->inmem_lock);
 
@@ -343,8 +343,7 @@ void drop_inmem_page(struct inode *inode, struct page *page)
        trace_f2fs_commit_inmem_page(page, INMEM_INVALIDATE);
 }
 
-static int __commit_inmem_pages(struct inode *inode,
-                                       struct list_head *revoke_list)
+static int __commit_inmem_pages(struct inode *inode)
 {
        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
        struct f2fs_inode_info *fi = F2FS_I(inode);
@@ -357,9 +356,12 @@ static int __commit_inmem_pages(struct inode *inode,
                .op_flags = REQ_SYNC | REQ_PRIO,
                .io_type = FS_DATA_IO,
        };
+       struct list_head revoke_list;
        pgoff_t last_idx = ULONG_MAX;
        int err = 0;
 
+       INIT_LIST_HEAD(&revoke_list);
+
        list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) {
                struct page *page = cur->page;
 
@@ -393,14 +395,28 @@ retry:
                        last_idx = page->index;
                }
                unlock_page(page);
-               list_move_tail(&cur->list, revoke_list);
+               list_move_tail(&cur->list, &revoke_list);
        }
 
        if (last_idx != ULONG_MAX)
                f2fs_submit_merged_write_cond(sbi, inode, 0, last_idx, DATA);
 
-       if (!err)
-               __revoke_inmem_pages(inode, revoke_list, false, false);
+       if (err) {
+               /*
+                * try to revoke all committed pages, but still we could fail
+                * due to no memory or other reason, if that happened, EAGAIN
+                * will be returned, which means in such case, transaction is
+                * already not integrity, caller should use journal to do the
+                * recovery or rewrite & commit last transaction. For other
+                * error number, revoking was done by filesystem itself.
+                */
+               err = __revoke_inmem_pages(inode, &revoke_list, false, true);
+
+               /* drop all uncommitted pages */
+               __revoke_inmem_pages(inode, &fi->inmem_pages, true, false);
+       } else {
+               __revoke_inmem_pages(inode, &revoke_list, false, false);
+       }
 
        return err;
 }
@@ -409,34 +425,16 @@ int commit_inmem_pages(struct inode *inode)
 {
        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
        struct f2fs_inode_info *fi = F2FS_I(inode);
-       struct list_head revoke_list;
        int err;
 
-       INIT_LIST_HEAD(&revoke_list);
        f2fs_balance_fs(sbi, true);
        f2fs_lock_op(sbi);
 
        set_inode_flag(inode, FI_ATOMIC_COMMIT);
 
        mutex_lock(&fi->inmem_lock);
-       err = __commit_inmem_pages(inode, &revoke_list);
-       if (err) {
-               int ret;
-               /*
-                * try to revoke all committed pages, but still we could fail
-                * due to no memory or other reason, if that happened, EAGAIN
-                * will be returned, which means in such case, transaction is
-                * already not integrity, caller should use journal to do the
-                * recovery or rewrite & commit last transaction. For other
-                * error number, revoking was done by filesystem itself.
-                */
-               ret = __revoke_inmem_pages(inode, &revoke_list, false, true);
-               if (ret)
-                       err = ret;
+       err = __commit_inmem_pages(inode);
 
-               /* drop all uncommitted pages */
-               __revoke_inmem_pages(inode, &fi->inmem_pages, true, false);
-       }
        spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
        if (!list_empty(&fi->inmem_ilist))
                list_del_init(&fi->inmem_ilist);
@@ -915,6 +913,40 @@ static void __check_sit_bitmap(struct f2fs_sb_info *sbi,
 #endif
 }
 
+static void __init_discard_policy(struct f2fs_sb_info *sbi,
+                               struct discard_policy *dpolicy,
+                               int discard_type, unsigned int granularity)
+{
+       /* common policy */
+       dpolicy->type = discard_type;
+       dpolicy->sync = true;
+       dpolicy->granularity = granularity;
+
+       dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST;
+       dpolicy->io_aware_gran = MAX_PLIST_NUM;
+
+       if (discard_type == DPOLICY_BG) {
+               dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
+               dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
+               dpolicy->io_aware = true;
+               dpolicy->sync = false;
+               if (utilization(sbi) > DEF_DISCARD_URGENT_UTIL) {
+                       dpolicy->granularity = 1;
+                       dpolicy->max_interval = DEF_MIN_DISCARD_ISSUE_TIME;
+               }
+       } else if (discard_type == DPOLICY_FORCE) {
+               dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
+               dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
+               dpolicy->io_aware = false;
+       } else if (discard_type == DPOLICY_FSTRIM) {
+               dpolicy->io_aware = false;
+       } else if (discard_type == DPOLICY_UMOUNT) {
+               dpolicy->max_requests = UINT_MAX;
+               dpolicy->io_aware = false;
+       }
+}
+
+
 /* this function is copied from blkdev_issue_discard from block/blk-lib.c */
 static void __submit_discard_cmd(struct f2fs_sb_info *sbi,
                                                struct discard_policy *dpolicy,
@@ -929,6 +961,9 @@ static void __submit_discard_cmd(struct f2fs_sb_info *sbi,
        if (dc->state != D_PREP)
                return;
 
+       if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
+               return;
+
        trace_f2fs_issue_discard(dc->bdev, dc->start, dc->len);
 
        dc->error = __blkdev_issue_discard(dc->bdev,
@@ -1130,68 +1165,6 @@ static int __queue_discard_cmd(struct f2fs_sb_info *sbi,
        return 0;
 }
 
-static void __issue_discard_cmd_range(struct f2fs_sb_info *sbi,
-                                       struct discard_policy *dpolicy,
-                                       unsigned int start, unsigned int end)
-{
-       struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
-       struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
-       struct rb_node **insert_p = NULL, *insert_parent = NULL;
-       struct discard_cmd *dc;
-       struct blk_plug plug;
-       int issued;
-
-next:
-       issued = 0;
-
-       mutex_lock(&dcc->cmd_lock);
-       f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, &dcc->root));
-
-       dc = (struct discard_cmd *)__lookup_rb_tree_ret(&dcc->root,
-                                       NULL, start,
-                                       (struct rb_entry **)&prev_dc,
-                                       (struct rb_entry **)&next_dc,
-                                       &insert_p, &insert_parent, true);
-       if (!dc)
-               dc = next_dc;
-
-       blk_start_plug(&plug);
-
-       while (dc && dc->lstart <= end) {
-               struct rb_node *node;
-
-               if (dc->len < dpolicy->granularity)
-                       goto skip;
-
-               if (dc->state != D_PREP) {
-                       list_move_tail(&dc->list, &dcc->fstrim_list);
-                       goto skip;
-               }
-
-               __submit_discard_cmd(sbi, dpolicy, dc);
-
-               if (++issued >= dpolicy->max_requests) {
-                       start = dc->lstart + dc->len;
-
-                       blk_finish_plug(&plug);
-                       mutex_unlock(&dcc->cmd_lock);
-
-                       schedule();
-
-                       goto next;
-               }
-skip:
-               node = rb_next(&dc->rb_node);
-               dc = rb_entry_safe(node, struct discard_cmd, rb_node);
-
-               if (fatal_signal_pending(current))
-                       break;
-       }
-
-       blk_finish_plug(&plug);
-       mutex_unlock(&dcc->cmd_lock);
-}
-
 static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
                                        struct discard_policy *dpolicy)
 {
@@ -1332,7 +1305,18 @@ next:
 static void __wait_all_discard_cmd(struct f2fs_sb_info *sbi,
                                                struct discard_policy *dpolicy)
 {
-       __wait_discard_cmd_range(sbi, dpolicy, 0, UINT_MAX);
+       struct discard_policy dp;
+
+       if (dpolicy) {
+               __wait_discard_cmd_range(sbi, dpolicy, 0, UINT_MAX);
+               return;
+       }
+
+       /* wait all */
+       __init_discard_policy(sbi, &dp, DPOLICY_FSTRIM, 1);
+       __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
+       __init_discard_policy(sbi, &dp, DPOLICY_UMOUNT, 1);
+       __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
 }
 
 /* This should be covered by global mutex, &sit_i->sentry_lock */
@@ -1377,11 +1361,13 @@ bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi)
        struct discard_policy dpolicy;
        bool dropped;
 
-       init_discard_policy(&dpolicy, DPOLICY_UMOUNT, dcc->discard_granularity);
+       __init_discard_policy(sbi, &dpolicy, DPOLICY_UMOUNT,
+                                       dcc->discard_granularity);
        __issue_discard_cmd(sbi, &dpolicy);
        dropped = __drop_discard_cmd(sbi);
-       __wait_all_discard_cmd(sbi, &dpolicy);
 
+       /* just to make sure there is no pending discard commands */
+       __wait_all_discard_cmd(sbi, NULL);
        return dropped;
 }
 
@@ -1397,7 +1383,7 @@ static int issue_discard_thread(void *data)
        set_freezable();
 
        do {
-               init_discard_policy(&dpolicy, DPOLICY_BG,
+               __init_discard_policy(sbi, &dpolicy, DPOLICY_BG,
                                        dcc->discard_granularity);
 
                wait_event_interruptible_timeout(*q,
@@ -1410,12 +1396,16 @@ static int issue_discard_thread(void *data)
                        continue;
                if (kthread_should_stop())
                        return 0;
+               if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) {
+                       wait_ms = dpolicy.max_interval;
+                       continue;
+               }
 
                if (dcc->discard_wake)
                        dcc->discard_wake = 0;
 
                if (sbi->gc_thread && sbi->gc_thread->gc_urgent)
-                       init_discard_policy(&dpolicy, DPOLICY_FORCE, 1);
+                       __init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE, 1);
 
                sb_start_intwrite(sbi->sb);
 
@@ -1708,32 +1698,6 @@ skip:
        wake_up_discard_thread(sbi, false);
 }
 
-void init_discard_policy(struct discard_policy *dpolicy,
-                               int discard_type, unsigned int granularity)
-{
-       /* common policy */
-       dpolicy->type = discard_type;
-       dpolicy->sync = true;
-       dpolicy->granularity = granularity;
-
-       dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST;
-       dpolicy->io_aware_gran = MAX_PLIST_NUM;
-
-       if (discard_type == DPOLICY_BG) {
-               dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
-               dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
-               dpolicy->io_aware = true;
-       } else if (discard_type == DPOLICY_FORCE) {
-               dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
-               dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
-               dpolicy->io_aware = false;
-       } else if (discard_type == DPOLICY_FSTRIM) {
-               dpolicy->io_aware = false;
-       } else if (discard_type == DPOLICY_UMOUNT) {
-               dpolicy->io_aware = false;
-       }
-}
-
 static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
 {
        dev_t dev = sbi->sb->s_bdev->bd_dev;
@@ -2020,6 +1984,7 @@ static void write_current_sum_page(struct f2fs_sb_info *sbi,
        struct f2fs_summary_block *dst;
 
        dst = (struct f2fs_summary_block *)page_address(page);
+       memset(dst, 0, PAGE_SIZE);
 
        mutex_lock(&curseg->curseg_mutex);
 
@@ -2373,11 +2338,72 @@ bool exist_trim_candidates(struct f2fs_sb_info *sbi, struct cp_control *cpc)
        return has_candidate;
 }
 
+static void __issue_discard_cmd_range(struct f2fs_sb_info *sbi,
+                                       struct discard_policy *dpolicy,
+                                       unsigned int start, unsigned int end)
+{
+       struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
+       struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
+       struct rb_node **insert_p = NULL, *insert_parent = NULL;
+       struct discard_cmd *dc;
+       struct blk_plug plug;
+       int issued;
+
+next:
+       issued = 0;
+
+       mutex_lock(&dcc->cmd_lock);
+       f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, &dcc->root));
+
+       dc = (struct discard_cmd *)__lookup_rb_tree_ret(&dcc->root,
+                                       NULL, start,
+                                       (struct rb_entry **)&prev_dc,
+                                       (struct rb_entry **)&next_dc,
+                                       &insert_p, &insert_parent, true);
+       if (!dc)
+               dc = next_dc;
+
+       blk_start_plug(&plug);
+
+       while (dc && dc->lstart <= end) {
+               struct rb_node *node;
+
+               if (dc->len < dpolicy->granularity)
+                       goto skip;
+
+               if (dc->state != D_PREP) {
+                       list_move_tail(&dc->list, &dcc->fstrim_list);
+                       goto skip;
+               }
+
+               __submit_discard_cmd(sbi, dpolicy, dc);
+
+               if (++issued >= dpolicy->max_requests) {
+                       start = dc->lstart + dc->len;
+
+                       blk_finish_plug(&plug);
+                       mutex_unlock(&dcc->cmd_lock);
+                       __wait_all_discard_cmd(sbi, NULL);
+                       congestion_wait(BLK_RW_ASYNC, HZ/50);
+                       goto next;
+               }
+skip:
+               node = rb_next(&dc->rb_node);
+               dc = rb_entry_safe(node, struct discard_cmd, rb_node);
+
+               if (fatal_signal_pending(current))
+                       break;
+       }
+
+       blk_finish_plug(&plug);
+       mutex_unlock(&dcc->cmd_lock);
+}
+
 int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
 {
        __u64 start = F2FS_BYTES_TO_BLK(range->start);
        __u64 end = start + F2FS_BYTES_TO_BLK(range->len) - 1;
-       unsigned int start_segno, end_segno, cur_segno;
+       unsigned int start_segno, end_segno;
        block_t start_block, end_block;
        struct cp_control cpc;
        struct discard_policy dpolicy;
@@ -2388,12 +2414,12 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
                return -EINVAL;
 
        if (end <= MAIN_BLKADDR(sbi))
-               goto out;
+               return -EINVAL;
 
        if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) {
                f2fs_msg(sbi->sb, KERN_WARNING,
                        "Found FS corruption, run fsck to fix.");
-               goto out;
+               return -EIO;
        }
 
        /* start/end segment number in main_area */
@@ -2403,40 +2429,36 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
 
        cpc.reason = CP_DISCARD;
        cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen));
+       cpc.trim_start = start_segno;
+       cpc.trim_end = end_segno;
 
-       /* do checkpoint to issue discard commands safely */
-       for (cur_segno = start_segno; cur_segno <= end_segno;
-                                       cur_segno = cpc.trim_end + 1) {
-               cpc.trim_start = cur_segno;
-
-               if (sbi->discard_blks == 0)
-                       break;
-               else if (sbi->discard_blks < BATCHED_TRIM_BLOCKS(sbi))
-                       cpc.trim_end = end_segno;
-               else
-                       cpc.trim_end = min_t(unsigned int,
-                               rounddown(cur_segno +
-                               BATCHED_TRIM_SEGMENTS(sbi),
-                               sbi->segs_per_sec) - 1, end_segno);
-
-               mutex_lock(&sbi->gc_mutex);
-               err = write_checkpoint(sbi, &cpc);
-               mutex_unlock(&sbi->gc_mutex);
-               if (err)
-                       break;
+       if (sbi->discard_blks == 0)
+               goto out;
 
-               schedule();
-       }
+       mutex_lock(&sbi->gc_mutex);
+       err = write_checkpoint(sbi, &cpc);
+       mutex_unlock(&sbi->gc_mutex);
+       if (err)
+               goto out;
 
        start_block = START_BLOCK(sbi, start_segno);
-       end_block = START_BLOCK(sbi, min(cur_segno, end_segno) + 1);
+       end_block = START_BLOCK(sbi, end_segno + 1);
 
-       init_discard_policy(&dpolicy, DPOLICY_FSTRIM, cpc.trim_minlen);
+       __init_discard_policy(sbi, &dpolicy, DPOLICY_FSTRIM, cpc.trim_minlen);
        __issue_discard_cmd_range(sbi, &dpolicy, start_block, end_block);
-       trimmed = __wait_discard_cmd_range(sbi, &dpolicy,
+
+       /*
+        * We filed discard candidates, but actually we don't need to wait for
+        * all of them, since they'll be issued in idle time along with runtime
+        * discard option. User configuration looks like using runtime discard
+        * or periodic fstrim instead of it.
+        */
+       if (!test_opt(sbi, DISCARD)) {
+               trimmed = __wait_discard_cmd_range(sbi, &dpolicy,
                                        start_block, end_block);
+               range->len = F2FS_BLK_TO_BYTES(trimmed);
+       }
 out:
-       range->len = F2FS_BLK_TO_BYTES(trimmed);
        return err;
 }
 
@@ -2757,6 +2779,7 @@ void write_meta_page(struct f2fs_sb_info *sbi, struct page *page,
                fio.op_flags &= ~REQ_META;
 
        set_page_writeback(page);
+       ClearPageError(page);
        f2fs_submit_page_write(&fio);
 
        f2fs_update_iostat(sbi, io_type, F2FS_BLKSIZE);
@@ -3116,6 +3139,7 @@ static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr)
 
        page = grab_meta_page(sbi, blkaddr++);
        kaddr = (unsigned char *)page_address(page);
+       memset(kaddr, 0, PAGE_SIZE);
 
        /* Step 1: write nat cache */
        seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
@@ -3140,6 +3164,7 @@ static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr)
                        if (!page) {
                                page = grab_meta_page(sbi, blkaddr++);
                                kaddr = (unsigned char *)page_address(page);
+                               memset(kaddr, 0, PAGE_SIZE);
                                written_size = 0;
                        }
                        summary = (struct f2fs_summary *)(kaddr + written_size);
@@ -3380,6 +3405,11 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
                        int offset, sit_offset;
 
                        se = get_seg_entry(sbi, segno);
+#ifdef CONFIG_F2FS_CHECK_FS
+                       if (memcmp(se->cur_valid_map, se->cur_valid_map_mir,
+                                               SIT_VBLOCK_MAP_SIZE))
+                               f2fs_bug_on(sbi, 1);
+#endif
 
                        /* add discard candidates */
                        if (!(cpc->reason & CP_DISCARD)) {
@@ -3395,10 +3425,14 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
                                                        cpu_to_le32(segno);
                                seg_info_to_raw_sit(se,
                                        &sit_in_journal(journal, offset));
+                               check_block_count(sbi, segno,
+                                       &sit_in_journal(journal, offset));
                        } else {
                                sit_offset = SIT_ENTRY_OFFSET(sit_i, segno);
                                seg_info_to_raw_sit(se,
                                                &raw_sit->entries[sit_offset]);
+                               check_block_count(sbi, segno,
+                                               &raw_sit->entries[sit_offset]);
                        }
 
                        __clear_bit(segno, bitmap);
@@ -3822,8 +3856,6 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
        sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
        sm_info->min_ssr_sections = reserved_sections(sbi);
 
-       sm_info->trim_sections = DEF_BATCHED_TRIM_SECTIONS;
-
        INIT_LIST_HEAD(&sm_info->sit_entry_set);
 
        init_rwsem(&sm_info->curseg_lock);