Merge tag 'net-6.12-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
[linux-block.git] / fs / f2fs / node.c
CommitLineData
7c1a000d 1// SPDX-License-Identifier: GPL-2.0
0a8165d7 2/*
e05df3b1
JK
3 * fs/f2fs/node.c
4 *
5 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
6 * http://www.samsung.com/
e05df3b1
JK
7 */
8#include <linux/fs.h>
9#include <linux/f2fs_fs.h>
10#include <linux/mpage.h>
4034247a 11#include <linux/sched/mm.h>
e05df3b1
JK
12#include <linux/blkdev.h>
13#include <linux/pagevec.h>
14#include <linux/swap.h>
15
16#include "f2fs.h"
17#include "node.h"
18#include "segment.h"
87905682 19#include "xattr.h"
52118743 20#include "iostat.h"
51dd6249 21#include <trace/events/f2fs.h>
e05df3b1 22
d1e1ff97 23#define on_f2fs_build_free_nids(nm_i) mutex_is_locked(&(nm_i)->build_lock)
f978f5a0 24
e05df3b1
JK
25static struct kmem_cache *nat_entry_slab;
26static struct kmem_cache *free_nid_slab;
aec71382 27static struct kmem_cache *nat_entry_set_slab;
50fa53ec 28static struct kmem_cache *fsync_node_entry_slab;
e05df3b1 29
a4f843bd
JK
30/*
31 * Check whether the given nid is within node id range.
32 */
4d57b86d 33int f2fs_check_nid_range(struct f2fs_sb_info *sbi, nid_t nid)
a4f843bd
JK
34{
35 if (unlikely(nid < F2FS_ROOT_INO(sbi) || nid >= NM_I(sbi)->max_nid)) {
36 set_sbi_flag(sbi, SBI_NEED_FSCK);
dcbb4c10
JP
37 f2fs_warn(sbi, "%s: out-of-range nid=%x, run fsck to fix.",
38 __func__, nid);
95fa90c9 39 f2fs_handle_error(sbi, ERROR_CORRUPTED_INODE);
10f966bb 40 return -EFSCORRUPTED;
a4f843bd
JK
41 }
42 return 0;
43}
44
4d57b86d 45bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type)
cdfc41c1 46{
6fb03f3a 47 struct f2fs_nm_info *nm_i = NM_I(sbi);
d6d2b491 48 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
cdfc41c1 49 struct sysinfo val;
e5e7ea3c 50 unsigned long avail_ram;
cdfc41c1 51 unsigned long mem_size = 0;
6fb03f3a 52 bool res = false;
cdfc41c1 53
d6d2b491
ST
54 if (!nm_i)
55 return true;
56
cdfc41c1 57 si_meminfo(&val);
e5e7ea3c
JK
58
59 /* only uses low memory */
60 avail_ram = val.totalram - val.totalhigh;
61
429511cd 62 /*
71644dff 63 * give 25%, 25%, 50%, 50%, 25%, 25% memory for each components respectively
429511cd 64 */
6fb03f3a 65 if (type == FREE_NIDS) {
9a4ffdf5 66 mem_size = (nm_i->nid_cnt[FREE_NID] *
b8559dc2 67 sizeof(struct free_nid)) >> PAGE_SHIFT;
e5e7ea3c 68 res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2);
6fb03f3a 69 } else if (type == NAT_ENTRIES) {
a95ba66a
JK
70 mem_size = (nm_i->nat_cnt[TOTAL_NAT] *
71 sizeof(struct nat_entry)) >> PAGE_SHIFT;
e5e7ea3c 72 res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2);
e589c2c4
JK
73 if (excess_cached_nats(sbi))
74 res = false;
a1257023
JK
75 } else if (type == DIRTY_DENTS) {
76 if (sbi->sb->s_bdi->wb.dirty_exceeded)
77 return false;
78 mem_size = get_pages(sbi, F2FS_DIRTY_DENTS);
79 res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
e5e7ea3c
JK
80 } else if (type == INO_ENTRIES) {
81 int i;
82
39d787be 83 for (i = 0; i < MAX_INO_ENTRY; i++)
8f73cbb7
KM
84 mem_size += sbi->im[i].ino_num *
85 sizeof(struct ino_entry);
86 mem_size >>= PAGE_SHIFT;
e5e7ea3c 87 res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
71644dff
JK
88 } else if (type == READ_EXTENT_CACHE || type == AGE_EXTENT_CACHE) {
89 enum extent_type etype = type == READ_EXTENT_CACHE ?
90 EX_READ : EX_BLOCK_AGE;
91 struct extent_tree_info *eti = &sbi->extent_tree[etype];
e7547dac
JK
92
93 mem_size = (atomic_read(&eti->total_ext_tree) *
7441ccef 94 sizeof(struct extent_tree) +
e7547dac 95 atomic_read(&eti->total_ext_node) *
09cbfeaf 96 sizeof(struct extent_node)) >> PAGE_SHIFT;
71644dff 97 res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2);
d6d2b491
ST
98 } else if (type == DISCARD_CACHE) {
99 mem_size = (atomic_read(&dcc->discard_cmd_cnt) *
100 sizeof(struct discard_cmd)) >> PAGE_SHIFT;
101 res = mem_size < (avail_ram * nm_i->ram_thresh / 100);
6ce19aff
CY
102 } else if (type == COMPRESS_PAGE) {
103#ifdef CONFIG_F2FS_FS_COMPRESSION
104 unsigned long free_ram = val.freeram;
105
106 /*
107 * free memory is lower than watermark or cached page count
108 * exceed threshold, deny caching compress page.
109 */
110 res = (free_ram > avail_ram * sbi->compress_watermark / 100) &&
111 (COMPRESS_MAPPING(sbi)->nrpages <
112 free_ram * sbi->compress_percent / 100);
113#else
114 res = false;
115#endif
1e84371f 116 } else {
1663cae4
JK
117 if (!sbi->sb->s_bdi->wb.dirty_exceeded)
118 return true;
6fb03f3a
JK
119 }
120 return res;
cdfc41c1
JK
121}
122
e05df3b1
JK
123static void clear_node_page_dirty(struct page *page)
124{
e05df3b1 125 if (PageDirty(page)) {
fd3a11af 126 f2fs_clear_page_cache_dirty_tag(page_folio(page));
e05df3b1 127 clear_page_dirty_for_io(page);
aec2f729 128 dec_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES);
e05df3b1
JK
129 }
130 ClearPageUptodate(page);
131}
132
133static struct page *get_current_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
134{
3acc4522 135 return f2fs_get_meta_page_retry(sbi, current_nat_addr(sbi, nid));
e05df3b1
JK
136}
137
138static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
139{
140 struct page *src_page;
141 struct page *dst_page;
e05df3b1
JK
142 pgoff_t dst_off;
143 void *src_addr;
144 void *dst_addr;
145 struct f2fs_nm_info *nm_i = NM_I(sbi);
146
80551d17 147 dst_off = next_nat_addr(sbi, current_nat_addr(sbi, nid));
e05df3b1
JK
148
149 /* get current nat block page with lock */
80551d17 150 src_page = get_current_nat_page(sbi, nid);
edc55aaf
JK
151 if (IS_ERR(src_page))
152 return src_page;
4d57b86d 153 dst_page = f2fs_grab_meta_page(sbi, dst_off);
9850cf4a 154 f2fs_bug_on(sbi, PageDirty(src_page));
e05df3b1
JK
155
156 src_addr = page_address(src_page);
157 dst_addr = page_address(dst_page);
09cbfeaf 158 memcpy(dst_addr, src_addr, PAGE_SIZE);
e05df3b1
JK
159 set_page_dirty(dst_page);
160 f2fs_put_page(src_page, 1);
161
162 set_to_next_nat(nm_i, nid);
163
164 return dst_page;
165}
166
32410577
CY
167static struct nat_entry *__alloc_nat_entry(struct f2fs_sb_info *sbi,
168 nid_t nid, bool no_fail)
12f9ef37
YH
169{
170 struct nat_entry *new;
171
32410577
CY
172 new = f2fs_kmem_cache_alloc(nat_entry_slab,
173 GFP_F2FS_ZERO, no_fail, sbi);
12f9ef37
YH
174 if (new) {
175 nat_set_nid(new, nid);
176 nat_reset_flag(new);
177 }
178 return new;
179}
180
181static void __free_nat_entry(struct nat_entry *e)
182{
183 kmem_cache_free(nat_entry_slab, e);
184}
185
186/* must be locked by nat_tree_lock */
187static struct nat_entry *__init_nat_entry(struct f2fs_nm_info *nm_i,
188 struct nat_entry *ne, struct f2fs_nat_entry *raw_ne, bool no_fail)
189{
190 if (no_fail)
191 f2fs_radix_tree_insert(&nm_i->nat_root, nat_get_nid(ne), ne);
192 else if (radix_tree_insert(&nm_i->nat_root, nat_get_nid(ne), ne))
193 return NULL;
194
195 if (raw_ne)
196 node_info_from_raw_nat(&ne->ni, raw_ne);
22969158
CY
197
198 spin_lock(&nm_i->nat_list_lock);
12f9ef37 199 list_add_tail(&ne->list, &nm_i->nat_entries);
22969158
CY
200 spin_unlock(&nm_i->nat_list_lock);
201
a95ba66a
JK
202 nm_i->nat_cnt[TOTAL_NAT]++;
203 nm_i->nat_cnt[RECLAIMABLE_NAT]++;
12f9ef37
YH
204 return ne;
205}
206
e05df3b1
JK
207static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n)
208{
22969158
CY
209 struct nat_entry *ne;
210
211 ne = radix_tree_lookup(&nm_i->nat_root, n);
212
213 /* for recent accessed nat entry, move it to tail of lru list */
214 if (ne && !get_nat_flag(ne, IS_DIRTY)) {
215 spin_lock(&nm_i->nat_list_lock);
216 if (!list_empty(&ne->list))
217 list_move_tail(&ne->list, &nm_i->nat_entries);
218 spin_unlock(&nm_i->nat_list_lock);
219 }
220
221 return ne;
e05df3b1
JK
222}
223
224static unsigned int __gang_lookup_nat_cache(struct f2fs_nm_info *nm_i,
225 nid_t start, unsigned int nr, struct nat_entry **ep)
226{
227 return radix_tree_gang_lookup(&nm_i->nat_root, (void **)ep, start, nr);
228}
229
230static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct nat_entry *e)
231{
e05df3b1 232 radix_tree_delete(&nm_i->nat_root, nat_get_nid(e));
a95ba66a
JK
233 nm_i->nat_cnt[TOTAL_NAT]--;
234 nm_i->nat_cnt[RECLAIMABLE_NAT]--;
12f9ef37 235 __free_nat_entry(e);
e05df3b1
JK
236}
237
780de47c
CY
238static struct nat_entry_set *__grab_nat_entry_set(struct f2fs_nm_info *nm_i,
239 struct nat_entry *ne)
309cc2b6
JK
240{
241 nid_t set = NAT_BLOCK_OFFSET(ne->ni.nid);
242 struct nat_entry_set *head;
243
309cc2b6
JK
244 head = radix_tree_lookup(&nm_i->nat_set_root, set);
245 if (!head) {
32410577
CY
246 head = f2fs_kmem_cache_alloc(nat_entry_set_slab,
247 GFP_NOFS, true, NULL);
309cc2b6
JK
248
249 INIT_LIST_HEAD(&head->entry_list);
250 INIT_LIST_HEAD(&head->set_list);
251 head->set = set;
252 head->entry_cnt = 0;
9be32d72 253 f2fs_radix_tree_insert(&nm_i->nat_set_root, set, head);
309cc2b6 254 }
780de47c
CY
255 return head;
256}
257
258static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i,
259 struct nat_entry *ne)
260{
261 struct nat_entry_set *head;
262 bool new_ne = nat_get_blkaddr(ne) == NEW_ADDR;
263
264 if (!new_ne)
265 head = __grab_nat_entry_set(nm_i, ne);
266
267 /*
268 * update entry_cnt in below condition:
269 * 1. update NEW_ADDR to valid block address;
270 * 2. update old block address to new one;
271 */
272 if (!new_ne && (get_nat_flag(ne, IS_PREALLOC) ||
273 !get_nat_flag(ne, IS_DIRTY)))
274 head->entry_cnt++;
275
276 set_nat_flag(ne, IS_PREALLOC, new_ne);
febeca6d
CY
277
278 if (get_nat_flag(ne, IS_DIRTY))
279 goto refresh_list;
280
a95ba66a
JK
281 nm_i->nat_cnt[DIRTY_NAT]++;
282 nm_i->nat_cnt[RECLAIMABLE_NAT]--;
309cc2b6 283 set_nat_flag(ne, IS_DIRTY, true);
febeca6d 284refresh_list:
22969158 285 spin_lock(&nm_i->nat_list_lock);
780de47c 286 if (new_ne)
febeca6d
CY
287 list_del_init(&ne->list);
288 else
289 list_move_tail(&ne->list, &head->entry_list);
22969158 290 spin_unlock(&nm_i->nat_list_lock);
309cc2b6
JK
291}
292
293static void __clear_nat_cache_dirty(struct f2fs_nm_info *nm_i,
0b28b71e 294 struct nat_entry_set *set, struct nat_entry *ne)
309cc2b6 295{
22969158 296 spin_lock(&nm_i->nat_list_lock);
0b28b71e 297 list_move_tail(&ne->list, &nm_i->nat_entries);
22969158
CY
298 spin_unlock(&nm_i->nat_list_lock);
299
0b28b71e
KM
300 set_nat_flag(ne, IS_DIRTY, false);
301 set->entry_cnt--;
a95ba66a
JK
302 nm_i->nat_cnt[DIRTY_NAT]--;
303 nm_i->nat_cnt[RECLAIMABLE_NAT]++;
309cc2b6
JK
304}
305
306static unsigned int __gang_lookup_nat_set(struct f2fs_nm_info *nm_i,
307 nid_t start, unsigned int nr, struct nat_entry_set **ep)
308{
309 return radix_tree_gang_lookup(&nm_i->nat_set_root, (void **)ep,
310 start, nr);
311}
312
50fa53ec
CY
313bool f2fs_in_warm_node_list(struct f2fs_sb_info *sbi, struct page *page)
314{
315 return NODE_MAPPING(sbi) == page->mapping &&
316 IS_DNODE(page) && is_cold_node(page);
317}
318
319void f2fs_init_fsync_node_info(struct f2fs_sb_info *sbi)
320{
321 spin_lock_init(&sbi->fsync_node_lock);
322 INIT_LIST_HEAD(&sbi->fsync_node_list);
323 sbi->fsync_seg_id = 0;
324 sbi->fsync_node_num = 0;
325}
326
327static unsigned int f2fs_add_fsync_node_entry(struct f2fs_sb_info *sbi,
328 struct page *page)
329{
330 struct fsync_node_entry *fn;
331 unsigned long flags;
332 unsigned int seq_id;
333
32410577
CY
334 fn = f2fs_kmem_cache_alloc(fsync_node_entry_slab,
335 GFP_NOFS, true, NULL);
50fa53ec
CY
336
337 get_page(page);
338 fn->page = page;
339 INIT_LIST_HEAD(&fn->list);
340
341 spin_lock_irqsave(&sbi->fsync_node_lock, flags);
342 list_add_tail(&fn->list, &sbi->fsync_node_list);
343 fn->seq_id = sbi->fsync_seg_id++;
344 seq_id = fn->seq_id;
345 sbi->fsync_node_num++;
346 spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
347
348 return seq_id;
349}
350
351void f2fs_del_fsync_node_entry(struct f2fs_sb_info *sbi, struct page *page)
352{
353 struct fsync_node_entry *fn;
354 unsigned long flags;
355
356 spin_lock_irqsave(&sbi->fsync_node_lock, flags);
357 list_for_each_entry(fn, &sbi->fsync_node_list, list) {
358 if (fn->page == page) {
359 list_del(&fn->list);
360 sbi->fsync_node_num--;
361 spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
362 kmem_cache_free(fsync_node_entry_slab, fn);
363 put_page(page);
364 return;
365 }
366 }
367 spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
368 f2fs_bug_on(sbi, 1);
369}
370
371void f2fs_reset_fsync_node_info(struct f2fs_sb_info *sbi)
372{
373 unsigned long flags;
374
375 spin_lock_irqsave(&sbi->fsync_node_lock, flags);
376 sbi->fsync_seg_id = 0;
377 spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
378}
379
4d57b86d 380int f2fs_need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid)
e05df3b1
JK
381{
382 struct f2fs_nm_info *nm_i = NM_I(sbi);
383 struct nat_entry *e;
2dcf51ab 384 bool need = false;
e05df3b1 385
e4544b63 386 f2fs_down_read(&nm_i->nat_tree_lock);
e05df3b1 387 e = __lookup_nat_cache(nm_i, nid);
2dcf51ab
JK
388 if (e) {
389 if (!get_nat_flag(e, IS_CHECKPOINTED) &&
390 !get_nat_flag(e, HAS_FSYNCED_INODE))
391 need = true;
392 }
e4544b63 393 f2fs_up_read(&nm_i->nat_tree_lock);
2dcf51ab 394 return need;
e05df3b1
JK
395}
396
4d57b86d 397bool f2fs_is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid)
479f40c4
JK
398{
399 struct f2fs_nm_info *nm_i = NM_I(sbi);
400 struct nat_entry *e;
2dcf51ab 401 bool is_cp = true;
479f40c4 402
e4544b63 403 f2fs_down_read(&nm_i->nat_tree_lock);
2dcf51ab
JK
404 e = __lookup_nat_cache(nm_i, nid);
405 if (e && !get_nat_flag(e, IS_CHECKPOINTED))
406 is_cp = false;
e4544b63 407 f2fs_up_read(&nm_i->nat_tree_lock);
2dcf51ab 408 return is_cp;
479f40c4
JK
409}
410
4d57b86d 411bool f2fs_need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino)
b6fe5873
JK
412{
413 struct f2fs_nm_info *nm_i = NM_I(sbi);
414 struct nat_entry *e;
88bd02c9 415 bool need_update = true;
b6fe5873 416
e4544b63 417 f2fs_down_read(&nm_i->nat_tree_lock);
88bd02c9
JK
418 e = __lookup_nat_cache(nm_i, ino);
419 if (e && get_nat_flag(e, HAS_LAST_FSYNC) &&
420 (get_nat_flag(e, IS_CHECKPOINTED) ||
421 get_nat_flag(e, HAS_FSYNCED_INODE)))
422 need_update = false;
e4544b63 423 f2fs_up_read(&nm_i->nat_tree_lock);
88bd02c9 424 return need_update;
b6fe5873
JK
425}
426
12f9ef37 427/* must be locked by nat_tree_lock */
1515aef0 428static void cache_nat_entry(struct f2fs_sb_info *sbi, nid_t nid,
e05df3b1
JK
429 struct f2fs_nat_entry *ne)
430{
1515aef0 431 struct f2fs_nm_info *nm_i = NM_I(sbi);
12f9ef37 432 struct nat_entry *new, *e;
9be32d72 433
0df035c7 434 /* Let's mitigate lock contention of nat_tree_lock during checkpoint */
e4544b63 435 if (f2fs_rwsem_is_locked(&sbi->cp_global_sem))
0df035c7
JK
436 return;
437
32410577 438 new = __alloc_nat_entry(sbi, nid, false);
12f9ef37
YH
439 if (!new)
440 return;
441
e4544b63 442 f2fs_down_write(&nm_i->nat_tree_lock);
e05df3b1 443 e = __lookup_nat_cache(nm_i, nid);
12f9ef37
YH
444 if (!e)
445 e = __init_nat_entry(nm_i, new, ne, false);
446 else
0c0b471e
EB
447 f2fs_bug_on(sbi, nat_get_ino(e) != le32_to_cpu(ne->ino) ||
448 nat_get_blkaddr(e) !=
449 le32_to_cpu(ne->block_addr) ||
1515aef0 450 nat_get_version(e) != ne->version);
e4544b63 451 f2fs_up_write(&nm_i->nat_tree_lock);
12f9ef37
YH
452 if (e != new)
453 __free_nat_entry(new);
e05df3b1
JK
454}
455
456static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
479f40c4 457 block_t new_blkaddr, bool fsync_done)
e05df3b1
JK
458{
459 struct f2fs_nm_info *nm_i = NM_I(sbi);
460 struct nat_entry *e;
32410577 461 struct nat_entry *new = __alloc_nat_entry(sbi, ni->nid, true);
9be32d72 462
e4544b63 463 f2fs_down_write(&nm_i->nat_tree_lock);
e05df3b1
JK
464 e = __lookup_nat_cache(nm_i, ni->nid);
465 if (!e) {
12f9ef37 466 e = __init_nat_entry(nm_i, new, NULL, true);
5c27f4ee 467 copy_node_info(&e->ni, ni);
9850cf4a 468 f2fs_bug_on(sbi, ni->blk_addr == NEW_ADDR);
e05df3b1
JK
469 } else if (new_blkaddr == NEW_ADDR) {
470 /*
471 * when nid is reallocated,
472 * previous nat entry can be remained in nat cache.
473 * So, reinitialize it with new information.
474 */
5c27f4ee 475 copy_node_info(&e->ni, ni);
9850cf4a 476 f2fs_bug_on(sbi, ni->blk_addr != NULL_ADDR);
e05df3b1 477 }
12f9ef37
YH
478 /* let's free early to reduce memory consumption */
479 if (e != new)
480 __free_nat_entry(new);
e05df3b1 481
e05df3b1 482 /* sanity check */
9850cf4a
JK
483 f2fs_bug_on(sbi, nat_get_blkaddr(e) != ni->blk_addr);
484 f2fs_bug_on(sbi, nat_get_blkaddr(e) == NULL_ADDR &&
e05df3b1 485 new_blkaddr == NULL_ADDR);
9850cf4a 486 f2fs_bug_on(sbi, nat_get_blkaddr(e) == NEW_ADDR &&
e05df3b1 487 new_blkaddr == NEW_ADDR);
93770ab7 488 f2fs_bug_on(sbi, __is_valid_data_blkaddr(nat_get_blkaddr(e)) &&
e05df3b1
JK
489 new_blkaddr == NEW_ADDR);
490
e1c42045 491 /* increment version no as node is removed */
e05df3b1
JK
492 if (nat_get_blkaddr(e) != NEW_ADDR && new_blkaddr == NULL_ADDR) {
493 unsigned char version = nat_get_version(e);
5f029c04 494
e05df3b1
JK
495 nat_set_version(e, inc_node_version(version));
496 }
497
498 /* change address */
499 nat_set_blkaddr(e, new_blkaddr);
93770ab7 500 if (!__is_valid_data_blkaddr(new_blkaddr))
88bd02c9 501 set_nat_flag(e, IS_CHECKPOINTED, false);
e05df3b1 502 __set_nat_cache_dirty(nm_i, e);
479f40c4
JK
503
504 /* update fsync_mark if its inode nat entry is still alive */
d5b692b7
CY
505 if (ni->nid != ni->ino)
506 e = __lookup_nat_cache(nm_i, ni->ino);
88bd02c9
JK
507 if (e) {
508 if (fsync_done && ni->nid == ni->ino)
509 set_nat_flag(e, HAS_FSYNCED_INODE, true);
510 set_nat_flag(e, HAS_LAST_FSYNC, fsync_done);
511 }
e4544b63 512 f2fs_up_write(&nm_i->nat_tree_lock);
e05df3b1
JK
513}
514
4d57b86d 515int f2fs_try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
e05df3b1
JK
516{
517 struct f2fs_nm_info *nm_i = NM_I(sbi);
1b38dc8e 518 int nr = nr_shrink;
e05df3b1 519
e4544b63 520 if (!f2fs_down_write_trylock(&nm_i->nat_tree_lock))
b873b798 521 return 0;
e05df3b1 522
22969158
CY
523 spin_lock(&nm_i->nat_list_lock);
524 while (nr_shrink) {
e05df3b1 525 struct nat_entry *ne;
22969158
CY
526
527 if (list_empty(&nm_i->nat_entries))
528 break;
529
e05df3b1
JK
530 ne = list_first_entry(&nm_i->nat_entries,
531 struct nat_entry, list);
22969158
CY
532 list_del(&ne->list);
533 spin_unlock(&nm_i->nat_list_lock);
534
e05df3b1
JK
535 __del_from_nat_cache(nm_i, ne);
536 nr_shrink--;
22969158
CY
537
538 spin_lock(&nm_i->nat_list_lock);
e05df3b1 539 }
22969158
CY
540 spin_unlock(&nm_i->nat_list_lock);
541
e4544b63 542 f2fs_up_write(&nm_i->nat_tree_lock);
1b38dc8e 543 return nr - nr_shrink;
e05df3b1
JK
544}
545
7735730d 546int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
a9419b63 547 struct node_info *ni, bool checkpoint_context)
e05df3b1
JK
548{
549 struct f2fs_nm_info *nm_i = NM_I(sbi);
550 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
b7ad7512 551 struct f2fs_journal *journal = curseg->journal;
e05df3b1
JK
552 nid_t start_nid = START_NID(nid);
553 struct f2fs_nat_block *nat_blk;
554 struct page *page = NULL;
555 struct f2fs_nat_entry ne;
556 struct nat_entry *e;
66a82d1f 557 pgoff_t index;
93770ab7 558 block_t blkaddr;
e05df3b1
JK
559 int i;
560
561 ni->nid = nid;
2eeb0dce 562retry:
e05df3b1 563 /* Check nat cache */
e4544b63 564 f2fs_down_read(&nm_i->nat_tree_lock);
e05df3b1
JK
565 e = __lookup_nat_cache(nm_i, nid);
566 if (e) {
567 ni->ino = nat_get_ino(e);
568 ni->blk_addr = nat_get_blkaddr(e);
569 ni->version = nat_get_version(e);
e4544b63 570 f2fs_up_read(&nm_i->nat_tree_lock);
7735730d 571 return 0;
1515aef0 572 }
e05df3b1 573
2eeb0dce
JK
574 /*
575 * Check current segment summary by trying to grab journal_rwsem first.
576 * This sem is on the critical path on the checkpoint requiring the above
577 * nat_tree_lock. Therefore, we should retry, if we failed to grab here
578 * while not bothering checkpoint.
579 */
e4544b63 580 if (!f2fs_rwsem_is_locked(&sbi->cp_global_sem) || checkpoint_context) {
2eeb0dce 581 down_read(&curseg->journal_rwsem);
e4544b63 582 } else if (f2fs_rwsem_is_contended(&nm_i->nat_tree_lock) ||
a9419b63 583 !down_read_trylock(&curseg->journal_rwsem)) {
e4544b63 584 f2fs_up_read(&nm_i->nat_tree_lock);
2eeb0dce
JK
585 goto retry;
586 }
3547ea96 587
4d57b86d 588 i = f2fs_lookup_journal_in_cursum(journal, NAT_JOURNAL, nid, 0);
e05df3b1 589 if (i >= 0) {
dfc08a12 590 ne = nat_in_journal(journal, i);
e05df3b1
JK
591 node_info_from_raw_nat(ni, &ne);
592 }
544b53da 593 up_read(&curseg->journal_rwsem);
66a82d1f 594 if (i >= 0) {
e4544b63 595 f2fs_up_read(&nm_i->nat_tree_lock);
e05df3b1 596 goto cache;
66a82d1f 597 }
e05df3b1
JK
598
599 /* Fill node_info from nat page */
66a82d1f 600 index = current_nat_addr(sbi, nid);
e4544b63 601 f2fs_up_read(&nm_i->nat_tree_lock);
66a82d1f 602
4d57b86d 603 page = f2fs_get_meta_page(sbi, index);
7735730d
CY
604 if (IS_ERR(page))
605 return PTR_ERR(page);
606
e05df3b1
JK
607 nat_blk = (struct f2fs_nat_block *)page_address(page);
608 ne = nat_blk->entries[nid - start_nid];
609 node_info_from_raw_nat(ni, &ne);
610 f2fs_put_page(page, 1);
611cache:
93770ab7
CY
612 blkaddr = le32_to_cpu(ne.block_addr);
613 if (__is_valid_data_blkaddr(blkaddr) &&
614 !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE))
615 return -EFAULT;
616
e05df3b1 617 /* cache nat entry */
1515aef0 618 cache_nat_entry(sbi, nid, &ne);
7735730d 619 return 0;
e05df3b1
JK
620}
621
79344efb
JK
622/*
623 * readahead MAX_RA_NODE number of node pages.
624 */
4d57b86d 625static void f2fs_ra_node_pages(struct page *parent, int start, int n)
79344efb
JK
626{
627 struct f2fs_sb_info *sbi = F2FS_P_SB(parent);
628 struct blk_plug plug;
629 int i, end;
630 nid_t nid;
631
632 blk_start_plug(&plug);
633
634 /* Then, try readahead for siblings of the desired node */
635 end = start + n;
d7e9a903 636 end = min(end, (int)NIDS_PER_BLOCK);
79344efb
JK
637 for (i = start; i < end; i++) {
638 nid = get_nid(parent, i, false);
4d57b86d 639 f2fs_ra_node_page(sbi, nid);
79344efb
JK
640 }
641
642 blk_finish_plug(&plug);
643}
644
4d57b86d 645pgoff_t f2fs_get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs)
3cf45747
CY
646{
647 const long direct_index = ADDRS_PER_INODE(dn->inode);
d02a6e61
CY
648 const long direct_blks = ADDRS_PER_BLOCK(dn->inode);
649 const long indirect_blks = ADDRS_PER_BLOCK(dn->inode) * NIDS_PER_BLOCK;
650 unsigned int skipped_unit = ADDRS_PER_BLOCK(dn->inode);
3cf45747
CY
651 int cur_level = dn->cur_level;
652 int max_level = dn->max_level;
653 pgoff_t base = 0;
654
655 if (!dn->max_level)
656 return pgofs + 1;
657
658 while (max_level-- > cur_level)
659 skipped_unit *= NIDS_PER_BLOCK;
660
661 switch (dn->max_level) {
662 case 3:
663 base += 2 * indirect_blks;
df561f66 664 fallthrough;
3cf45747
CY
665 case 2:
666 base += 2 * direct_blks;
df561f66 667 fallthrough;
3cf45747
CY
668 case 1:
669 base += direct_index;
670 break;
671 default:
672 f2fs_bug_on(F2FS_I_SB(dn->inode), 1);
673 }
674
675 return ((pgofs - base) / skipped_unit + 1) * skipped_unit + base;
676}
677
0a8165d7 678/*
e05df3b1
JK
679 * The maximum depth is four.
680 * Offset[0] will have raw inode offset.
681 */
81ca7350 682static int get_node_path(struct inode *inode, long block,
de93653f 683 int offset[4], unsigned int noffset[4])
e05df3b1 684{
81ca7350 685 const long direct_index = ADDRS_PER_INODE(inode);
d02a6e61 686 const long direct_blks = ADDRS_PER_BLOCK(inode);
e05df3b1 687 const long dptrs_per_blk = NIDS_PER_BLOCK;
d02a6e61 688 const long indirect_blks = ADDRS_PER_BLOCK(inode) * NIDS_PER_BLOCK;
e05df3b1
JK
689 const long dindirect_blks = indirect_blks * NIDS_PER_BLOCK;
690 int n = 0;
691 int level = 0;
692
693 noffset[0] = 0;
694
695 if (block < direct_index) {
25c0a6e5 696 offset[n] = block;
e05df3b1
JK
697 goto got;
698 }
699 block -= direct_index;
700 if (block < direct_blks) {
701 offset[n++] = NODE_DIR1_BLOCK;
702 noffset[n] = 1;
25c0a6e5 703 offset[n] = block;
e05df3b1
JK
704 level = 1;
705 goto got;
706 }
707 block -= direct_blks;
708 if (block < direct_blks) {
709 offset[n++] = NODE_DIR2_BLOCK;
710 noffset[n] = 2;
25c0a6e5 711 offset[n] = block;
e05df3b1
JK
712 level = 1;
713 goto got;
714 }
715 block -= direct_blks;
716 if (block < indirect_blks) {
717 offset[n++] = NODE_IND1_BLOCK;
718 noffset[n] = 3;
719 offset[n++] = block / direct_blks;
720 noffset[n] = 4 + offset[n - 1];
25c0a6e5 721 offset[n] = block % direct_blks;
e05df3b1
JK
722 level = 2;
723 goto got;
724 }
725 block -= indirect_blks;
726 if (block < indirect_blks) {
727 offset[n++] = NODE_IND2_BLOCK;
728 noffset[n] = 4 + dptrs_per_blk;
729 offset[n++] = block / direct_blks;
730 noffset[n] = 5 + dptrs_per_blk + offset[n - 1];
25c0a6e5 731 offset[n] = block % direct_blks;
e05df3b1
JK
732 level = 2;
733 goto got;
734 }
735 block -= indirect_blks;
736 if (block < dindirect_blks) {
737 offset[n++] = NODE_DIND_BLOCK;
738 noffset[n] = 5 + (dptrs_per_blk * 2);
739 offset[n++] = block / indirect_blks;
740 noffset[n] = 6 + (dptrs_per_blk * 2) +
741 offset[n - 1] * (dptrs_per_blk + 1);
742 offset[n++] = (block / direct_blks) % dptrs_per_blk;
743 noffset[n] = 7 + (dptrs_per_blk * 2) +
744 offset[n - 2] * (dptrs_per_blk + 1) +
745 offset[n - 1];
25c0a6e5 746 offset[n] = block % direct_blks;
e05df3b1
JK
747 level = 3;
748 goto got;
749 } else {
adb6dc19 750 return -E2BIG;
e05df3b1
JK
751 }
752got:
753 return level;
754}
755
756/*
757 * Caller should call f2fs_put_dnode(dn).
4f4124d0 758 * Also, it should grab and release a rwsem by calling f2fs_lock_op() and
7a88ddb5 759 * f2fs_unlock_op() only if mode is set with ALLOC_NODE.
e05df3b1 760 */
4d57b86d 761int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
e05df3b1 762{
4081363f 763 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
e05df3b1 764 struct page *npage[4];
f1a3b98e 765 struct page *parent = NULL;
e05df3b1
JK
766 int offset[4];
767 unsigned int noffset[4];
768 nid_t nids[4];
3cf45747 769 int level, i = 0;
e05df3b1
JK
770 int err = 0;
771
81ca7350 772 level = get_node_path(dn->inode, index, offset, noffset);
adb6dc19
JK
773 if (level < 0)
774 return level;
e05df3b1
JK
775
776 nids[0] = dn->inode->i_ino;
1646cfac 777 npage[0] = dn->inode_page;
e05df3b1 778
1646cfac 779 if (!npage[0]) {
4d57b86d 780 npage[0] = f2fs_get_node_page(sbi, nids[0]);
1646cfac
JK
781 if (IS_ERR(npage[0]))
782 return PTR_ERR(npage[0]);
783 }
f1a3b98e
JK
784
785 /* if inline_data is set, should not report any block indices */
786 if (f2fs_has_inline_data(dn->inode) && index) {
76629165 787 err = -ENOENT;
f1a3b98e
JK
788 f2fs_put_page(npage[0], 1);
789 goto release_out;
790 }
791
e05df3b1 792 parent = npage[0];
52c2db3f
CL
793 if (level != 0)
794 nids[1] = get_nid(parent, offset[0], true);
e05df3b1
JK
795 dn->inode_page = npage[0];
796 dn->inode_page_locked = true;
797
798 /* get indirect or direct nodes */
799 for (i = 1; i <= level; i++) {
800 bool done = false;
801
266e97a8 802 if (!nids[i] && mode == ALLOC_NODE) {
e05df3b1 803 /* alloc new node */
4d57b86d 804 if (!f2fs_alloc_nid(sbi, &(nids[i]))) {
e05df3b1
JK
805 err = -ENOSPC;
806 goto release_pages;
807 }
808
809 dn->nid = nids[i];
4d57b86d 810 npage[i] = f2fs_new_node_page(dn, noffset[i]);
e05df3b1 811 if (IS_ERR(npage[i])) {
4d57b86d 812 f2fs_alloc_nid_failed(sbi, nids[i]);
e05df3b1
JK
813 err = PTR_ERR(npage[i]);
814 goto release_pages;
815 }
816
817 set_nid(parent, offset[i - 1], nids[i], i == 1);
4d57b86d 818 f2fs_alloc_nid_done(sbi, nids[i]);
e05df3b1 819 done = true;
266e97a8 820 } else if (mode == LOOKUP_NODE_RA && i == level && level > 1) {
4d57b86d 821 npage[i] = f2fs_get_node_page_ra(parent, offset[i - 1]);
e05df3b1
JK
822 if (IS_ERR(npage[i])) {
823 err = PTR_ERR(npage[i]);
824 goto release_pages;
825 }
826 done = true;
827 }
828 if (i == 1) {
829 dn->inode_page_locked = false;
830 unlock_page(parent);
831 } else {
832 f2fs_put_page(parent, 1);
833 }
834
835 if (!done) {
4d57b86d 836 npage[i] = f2fs_get_node_page(sbi, nids[i]);
e05df3b1
JK
837 if (IS_ERR(npage[i])) {
838 err = PTR_ERR(npage[i]);
839 f2fs_put_page(npage[0], 0);
840 goto release_out;
841 }
842 }
843 if (i < level) {
844 parent = npage[i];
845 nids[i + 1] = get_nid(parent, offset[i], false);
846 }
847 }
848 dn->nid = nids[level];
849 dn->ofs_in_node = offset[level];
850 dn->node_page = npage[level];
a2ced1ce 851 dn->data_blkaddr = f2fs_data_blkaddr(dn);
94afd6d6
CY
852
853 if (is_inode_flag_set(dn->inode, FI_COMPRESSED_FILE) &&
854 f2fs_sb_has_readonly(sbi)) {
4b99ecd3
CY
855 unsigned int cluster_size = F2FS_I(dn->inode)->i_cluster_size;
856 unsigned int ofs_in_node = dn->ofs_in_node;
857 pgoff_t fofs = index;
858 unsigned int c_len;
94afd6d6
CY
859 block_t blkaddr;
860
4b99ecd3
CY
861 /* should align fofs and ofs_in_node to cluster_size */
862 if (fofs % cluster_size) {
863 fofs = round_down(fofs, cluster_size);
864 ofs_in_node = round_down(ofs_in_node, cluster_size);
865 }
866
867 c_len = f2fs_cluster_blocks_are_contiguous(dn, ofs_in_node);
94afd6d6
CY
868 if (!c_len)
869 goto out;
870
4b99ecd3 871 blkaddr = data_blkaddr(dn->inode, dn->node_page, ofs_in_node);
94afd6d6
CY
872 if (blkaddr == COMPRESS_ADDR)
873 blkaddr = data_blkaddr(dn->inode, dn->node_page,
4b99ecd3 874 ofs_in_node + 1);
94afd6d6 875
e7547dac 876 f2fs_update_read_extent_tree_range_compressed(dn->inode,
4b99ecd3 877 fofs, blkaddr, cluster_size, c_len);
94afd6d6
CY
878 }
879out:
e05df3b1
JK
880 return 0;
881
882release_pages:
883 f2fs_put_page(parent, 1);
884 if (i > 1)
885 f2fs_put_page(npage[0], 0);
886release_out:
887 dn->inode_page = NULL;
888 dn->node_page = NULL;
3cf45747
CY
889 if (err == -ENOENT) {
890 dn->cur_level = i;
891 dn->max_level = level;
0a2aa8fb 892 dn->ofs_in_node = offset[level];
3cf45747 893 }
e05df3b1
JK
894 return err;
895}
896
7735730d 897static int truncate_node(struct dnode_of_data *dn)
e05df3b1 898{
4081363f 899 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
e05df3b1 900 struct node_info ni;
7735730d 901 int err;
0ea295dd 902 pgoff_t index;
e05df3b1 903
a9419b63 904 err = f2fs_get_node_info(sbi, dn->nid, &ni, false);
7735730d
CY
905 if (err)
906 return err;
e05df3b1 907
e05df3b1 908 /* Deallocate node address */
4d57b86d 909 f2fs_invalidate_blocks(sbi, ni.blk_addr);
000519f2 910 dec_valid_node_count(sbi, dn->inode, dn->nid == dn->inode->i_ino);
479f40c4 911 set_node_addr(sbi, &ni, NULL_ADDR, false);
e05df3b1
JK
912
913 if (dn->nid == dn->inode->i_ino) {
4d57b86d 914 f2fs_remove_orphan_inode(sbi, dn->nid);
e05df3b1 915 dec_valid_inode_count(sbi);
0f18b462 916 f2fs_inode_synced(dn->inode);
e05df3b1 917 }
000519f2 918
e05df3b1 919 clear_node_page_dirty(dn->node_page);
caf0047e 920 set_sbi_flag(sbi, SBI_IS_DIRTY);
e05df3b1 921
5697e94d 922 index = page_folio(dn->node_page)->index;
e05df3b1 923 f2fs_put_page(dn->node_page, 1);
bf39c00a
JK
924
925 invalidate_mapping_pages(NODE_MAPPING(sbi),
0ea295dd 926 index, index);
bf39c00a 927
e05df3b1 928 dn->node_page = NULL;
51dd6249 929 trace_f2fs_truncate_node(dn->inode, dn->nid, ni.blk_addr);
7735730d
CY
930
931 return 0;
e05df3b1
JK
932}
933
934static int truncate_dnode(struct dnode_of_data *dn)
935{
a6ec8378 936 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
e05df3b1 937 struct page *page;
7735730d 938 int err;
e05df3b1
JK
939
940 if (dn->nid == 0)
941 return 1;
942
943 /* get direct node */
a6ec8378 944 page = f2fs_get_node_page(sbi, dn->nid);
45586c70 945 if (PTR_ERR(page) == -ENOENT)
e05df3b1
JK
946 return 1;
947 else if (IS_ERR(page))
948 return PTR_ERR(page);
949
a6ec8378
CY
950 if (IS_INODE(page) || ino_of_node(page) != dn->inode->i_ino) {
951 f2fs_err(sbi, "incorrect node reference, ino: %lu, nid: %u, ino_of_node: %u",
952 dn->inode->i_ino, dn->nid, ino_of_node(page));
953 set_sbi_flag(sbi, SBI_NEED_FSCK);
954 f2fs_handle_error(sbi, ERROR_INVALID_NODE_REFERENCE);
955 f2fs_put_page(page, 1);
956 return -EFSCORRUPTED;
957 }
958
e05df3b1
JK
959 /* Make dnode_of_data for parameter */
960 dn->node_page = page;
961 dn->ofs_in_node = 0;
a6ec8378 962 f2fs_truncate_data_blocks_range(dn, ADDRS_PER_BLOCK(dn->inode));
7735730d 963 err = truncate_node(dn);
0135c482
CY
964 if (err) {
965 f2fs_put_page(page, 1);
7735730d 966 return err;
0135c482 967 }
7735730d 968
e05df3b1
JK
969 return 1;
970}
971
972static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs,
973 int ofs, int depth)
974{
e05df3b1
JK
975 struct dnode_of_data rdn = *dn;
976 struct page *page;
977 struct f2fs_node *rn;
978 nid_t child_nid;
979 unsigned int child_nofs;
980 int freed = 0;
981 int i, ret;
982
983 if (dn->nid == 0)
984 return NIDS_PER_BLOCK + 1;
985
51dd6249
NJ
986 trace_f2fs_truncate_nodes_enter(dn->inode, dn->nid, dn->data_blkaddr);
987
4d57b86d 988 page = f2fs_get_node_page(F2FS_I_SB(dn->inode), dn->nid);
51dd6249
NJ
989 if (IS_ERR(page)) {
990 trace_f2fs_truncate_nodes_exit(dn->inode, PTR_ERR(page));
e05df3b1 991 return PTR_ERR(page);
51dd6249 992 }
e05df3b1 993
4d57b86d 994 f2fs_ra_node_pages(page, ofs, NIDS_PER_BLOCK);
79344efb 995
45590710 996 rn = F2FS_NODE(page);
e05df3b1
JK
997 if (depth < 3) {
998 for (i = ofs; i < NIDS_PER_BLOCK; i++, freed++) {
999 child_nid = le32_to_cpu(rn->in.nid[i]);
1000 if (child_nid == 0)
1001 continue;
1002 rdn.nid = child_nid;
1003 ret = truncate_dnode(&rdn);
1004 if (ret < 0)
1005 goto out_err;
12719ae1
JK
1006 if (set_nid(page, i, 0, false))
1007 dn->node_changed = true;
e05df3b1
JK
1008 }
1009 } else {
1010 child_nofs = nofs + ofs * (NIDS_PER_BLOCK + 1) + 1;
1011 for (i = ofs; i < NIDS_PER_BLOCK; i++) {
1012 child_nid = le32_to_cpu(rn->in.nid[i]);
1013 if (child_nid == 0) {
1014 child_nofs += NIDS_PER_BLOCK + 1;
1015 continue;
1016 }
1017 rdn.nid = child_nid;
1018 ret = truncate_nodes(&rdn, child_nofs, 0, depth - 1);
1019 if (ret == (NIDS_PER_BLOCK + 1)) {
12719ae1
JK
1020 if (set_nid(page, i, 0, false))
1021 dn->node_changed = true;
e05df3b1
JK
1022 child_nofs += ret;
1023 } else if (ret < 0 && ret != -ENOENT) {
1024 goto out_err;
1025 }
1026 }
1027 freed = child_nofs;
1028 }
1029
1030 if (!ofs) {
1031 /* remove current indirect node */
1032 dn->node_page = page;
7735730d
CY
1033 ret = truncate_node(dn);
1034 if (ret)
1035 goto out_err;
e05df3b1
JK
1036 freed++;
1037 } else {
1038 f2fs_put_page(page, 1);
1039 }
51dd6249 1040 trace_f2fs_truncate_nodes_exit(dn->inode, freed);
e05df3b1
JK
1041 return freed;
1042
1043out_err:
1044 f2fs_put_page(page, 1);
51dd6249 1045 trace_f2fs_truncate_nodes_exit(dn->inode, ret);
e05df3b1
JK
1046 return ret;
1047}
1048
1049static int truncate_partial_nodes(struct dnode_of_data *dn,
1050 struct f2fs_inode *ri, int *offset, int depth)
1051{
e05df3b1
JK
1052 struct page *pages[2];
1053 nid_t nid[3];
1054 nid_t child_nid;
1055 int err = 0;
1056 int i;
1057 int idx = depth - 2;
1058
1059 nid[0] = le32_to_cpu(ri->i_nid[offset[0] - NODE_DIR1_BLOCK]);
1060 if (!nid[0])
1061 return 0;
1062
1063 /* get indirect nodes in the path */
a225dca3 1064 for (i = 0; i < idx + 1; i++) {
e1c42045 1065 /* reference count'll be increased */
4d57b86d 1066 pages[i] = f2fs_get_node_page(F2FS_I_SB(dn->inode), nid[i]);
e05df3b1 1067 if (IS_ERR(pages[i])) {
e05df3b1 1068 err = PTR_ERR(pages[i]);
a225dca3 1069 idx = i - 1;
e05df3b1
JK
1070 goto fail;
1071 }
1072 nid[i + 1] = get_nid(pages[i], offset[i + 1], false);
1073 }
1074
4d57b86d 1075 f2fs_ra_node_pages(pages[idx], offset[idx + 1], NIDS_PER_BLOCK);
79344efb 1076
e05df3b1 1077 /* free direct nodes linked to a partial indirect node */
a225dca3 1078 for (i = offset[idx + 1]; i < NIDS_PER_BLOCK; i++) {
e05df3b1
JK
1079 child_nid = get_nid(pages[idx], i, false);
1080 if (!child_nid)
1081 continue;
1082 dn->nid = child_nid;
1083 err = truncate_dnode(dn);
1084 if (err < 0)
1085 goto fail;
12719ae1
JK
1086 if (set_nid(pages[idx], i, 0, false))
1087 dn->node_changed = true;
e05df3b1
JK
1088 }
1089
a225dca3 1090 if (offset[idx + 1] == 0) {
e05df3b1
JK
1091 dn->node_page = pages[idx];
1092 dn->nid = nid[idx];
7735730d
CY
1093 err = truncate_node(dn);
1094 if (err)
1095 goto fail;
e05df3b1
JK
1096 } else {
1097 f2fs_put_page(pages[idx], 1);
1098 }
1099 offset[idx]++;
a225dca3 1100 offset[idx + 1] = 0;
1101 idx--;
e05df3b1 1102fail:
a225dca3 1103 for (i = idx; i >= 0; i--)
e05df3b1 1104 f2fs_put_page(pages[i], 1);
51dd6249
NJ
1105
1106 trace_f2fs_truncate_partial_nodes(dn->inode, nid, depth, err);
1107
e05df3b1
JK
1108 return err;
1109}
1110
0a8165d7 1111/*
e05df3b1
JK
1112 * All the block addresses of data and nodes should be nullified.
1113 */
4d57b86d 1114int f2fs_truncate_inode_blocks(struct inode *inode, pgoff_t from)
e05df3b1 1115{
4081363f 1116 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
e05df3b1
JK
1117 int err = 0, cont = 1;
1118 int level, offset[4], noffset[4];
7dd690c8 1119 unsigned int nofs = 0;
58bfaf44 1120 struct f2fs_inode *ri;
e05df3b1
JK
1121 struct dnode_of_data dn;
1122 struct page *page;
1123
51dd6249
NJ
1124 trace_f2fs_truncate_inode_blocks_enter(inode, from);
1125
81ca7350 1126 level = get_node_path(inode, from, offset, noffset);
9039d835
YF
1127 if (level < 0) {
1128 trace_f2fs_truncate_inode_blocks_exit(inode, level);
adb6dc19 1129 return level;
9039d835 1130 }
ff373558 1131
4d57b86d 1132 page = f2fs_get_node_page(sbi, inode->i_ino);
51dd6249
NJ
1133 if (IS_ERR(page)) {
1134 trace_f2fs_truncate_inode_blocks_exit(inode, PTR_ERR(page));
e05df3b1 1135 return PTR_ERR(page);
51dd6249 1136 }
e05df3b1
JK
1137
1138 set_new_dnode(&dn, inode, page, NULL, 0);
1139 unlock_page(page);
1140
58bfaf44 1141 ri = F2FS_INODE(page);
e05df3b1
JK
1142 switch (level) {
1143 case 0:
1144 case 1:
1145 nofs = noffset[1];
1146 break;
1147 case 2:
1148 nofs = noffset[1];
1149 if (!offset[level - 1])
1150 goto skip_partial;
58bfaf44 1151 err = truncate_partial_nodes(&dn, ri, offset, level);
e05df3b1
JK
1152 if (err < 0 && err != -ENOENT)
1153 goto fail;
1154 nofs += 1 + NIDS_PER_BLOCK;
1155 break;
1156 case 3:
1157 nofs = 5 + 2 * NIDS_PER_BLOCK;
1158 if (!offset[level - 1])
1159 goto skip_partial;
58bfaf44 1160 err = truncate_partial_nodes(&dn, ri, offset, level);
e05df3b1
JK
1161 if (err < 0 && err != -ENOENT)
1162 goto fail;
1163 break;
1164 default:
1165 BUG();
1166 }
1167
1168skip_partial:
1169 while (cont) {
58bfaf44 1170 dn.nid = le32_to_cpu(ri->i_nid[offset[0] - NODE_DIR1_BLOCK]);
e05df3b1
JK
1171 switch (offset[0]) {
1172 case NODE_DIR1_BLOCK:
1173 case NODE_DIR2_BLOCK:
1174 err = truncate_dnode(&dn);
1175 break;
1176
1177 case NODE_IND1_BLOCK:
1178 case NODE_IND2_BLOCK:
1179 err = truncate_nodes(&dn, nofs, offset[1], 2);
1180 break;
1181
1182 case NODE_DIND_BLOCK:
1183 err = truncate_nodes(&dn, nofs, offset[1], 3);
1184 cont = 0;
1185 break;
1186
1187 default:
1188 BUG();
1189 }
92c556ed
CY
1190 if (err == -ENOENT) {
1191 set_sbi_flag(F2FS_P_SB(page), SBI_NEED_FSCK);
1192 f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR);
1193 f2fs_err_ratelimited(sbi,
1194 "truncate node fail, ino:%lu, nid:%u, "
1195 "offset[0]:%d, offset[1]:%d, nofs:%d",
1196 inode->i_ino, dn.nid, offset[0],
1197 offset[1], nofs);
1198 err = 0;
1199 }
1200 if (err < 0)
e05df3b1
JK
1201 goto fail;
1202 if (offset[1] == 0 &&
58bfaf44 1203 ri->i_nid[offset[0] - NODE_DIR1_BLOCK]) {
e05df3b1 1204 lock_page(page);
ff373558 1205 BUG_ON(page->mapping != NODE_MAPPING(sbi));
bae0ee7a 1206 f2fs_wait_on_page_writeback(page, NODE, true, true);
58bfaf44 1207 ri->i_nid[offset[0] - NODE_DIR1_BLOCK] = 0;
e05df3b1
JK
1208 set_page_dirty(page);
1209 unlock_page(page);
1210 }
1211 offset[1] = 0;
1212 offset[0]++;
1213 nofs += err;
1214 }
1215fail:
1216 f2fs_put_page(page, 0);
51dd6249 1217 trace_f2fs_truncate_inode_blocks_exit(inode, err);
e05df3b1
JK
1218 return err > 0 ? 0 : err;
1219}
1220
9c77f754 1221/* caller must lock inode page */
4d57b86d 1222int f2fs_truncate_xattr_node(struct inode *inode)
4f16fb0f 1223{
4081363f 1224 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
4f16fb0f
JK
1225 nid_t nid = F2FS_I(inode)->i_xattr_nid;
1226 struct dnode_of_data dn;
1227 struct page *npage;
7735730d 1228 int err;
4f16fb0f
JK
1229
1230 if (!nid)
1231 return 0;
1232
4d57b86d 1233 npage = f2fs_get_node_page(sbi, nid);
4f16fb0f
JK
1234 if (IS_ERR(npage))
1235 return PTR_ERR(npage);
1236
7735730d
CY
1237 set_new_dnode(&dn, inode, NULL, npage, nid);
1238 err = truncate_node(&dn);
1239 if (err) {
1240 f2fs_put_page(npage, 1);
1241 return err;
1242 }
1243
205b9822 1244 f2fs_i_xnid_write(inode, 0);
65985d93 1245
4f16fb0f
JK
1246 return 0;
1247}
1248
39936837 1249/*
4f4124d0
CY
1250 * Caller should grab and release a rwsem by calling f2fs_lock_op() and
1251 * f2fs_unlock_op().
39936837 1252 */
4d57b86d 1253int f2fs_remove_inode_page(struct inode *inode)
e05df3b1 1254{
e05df3b1 1255 struct dnode_of_data dn;
13ec7297 1256 int err;
e05df3b1 1257
c2e69583 1258 set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino);
4d57b86d 1259 err = f2fs_get_dnode_of_data(&dn, 0, LOOKUP_NODE);
13ec7297
CY
1260 if (err)
1261 return err;
e05df3b1 1262
4d57b86d 1263 err = f2fs_truncate_xattr_node(inode);
13ec7297 1264 if (err) {
c2e69583 1265 f2fs_put_dnode(&dn);
13ec7297 1266 return err;
e05df3b1 1267 }
c2e69583
JK
1268
1269 /* remove potential inline_data blocks */
1270 if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
1271 S_ISLNK(inode->i_mode))
4d57b86d 1272 f2fs_truncate_data_blocks_range(&dn, 1);
c2e69583 1273
e1c42045 1274 /* 0 is possible, after f2fs_new_inode() has failed */
8d714f8a
JK
1275 if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) {
1276 f2fs_put_dnode(&dn);
1277 return -EIO;
1278 }
8b6810f8
CY
1279
1280 if (unlikely(inode->i_blocks != 0 && inode->i_blocks != 8)) {
097a7686
CY
1281 f2fs_warn(F2FS_I_SB(inode),
1282 "f2fs_remove_inode_page: inconsistent i_blocks, ino:%lu, iblocks:%llu",
1283 inode->i_ino, (unsigned long long)inode->i_blocks);
8b6810f8
CY
1284 set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK);
1285 }
c2e69583
JK
1286
1287 /* will put inode & node pages */
7735730d
CY
1288 err = truncate_node(&dn);
1289 if (err) {
1290 f2fs_put_dnode(&dn);
1291 return err;
1292 }
13ec7297 1293 return 0;
e05df3b1
JK
1294}
1295
4d57b86d 1296struct page *f2fs_new_inode_page(struct inode *inode)
e05df3b1 1297{
e05df3b1
JK
1298 struct dnode_of_data dn;
1299
1300 /* allocate inode page for new inode */
1301 set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino);
44a83ff6
JK
1302
1303 /* caller should f2fs_put_page(page, 1); */
4d57b86d 1304 return f2fs_new_node_page(&dn, 0);
e05df3b1
JK
1305}
1306
4d57b86d 1307struct page *f2fs_new_node_page(struct dnode_of_data *dn, unsigned int ofs)
e05df3b1 1308{
4081363f 1309 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
25cc5d3b 1310 struct node_info new_ni;
e05df3b1
JK
1311 struct page *page;
1312 int err;
1313
91942321 1314 if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
e05df3b1
JK
1315 return ERR_PTR(-EPERM);
1316
300e129c 1317 page = f2fs_grab_cache_page(NODE_MAPPING(sbi), dn->nid, false);
e05df3b1
JK
1318 if (!page)
1319 return ERR_PTR(-ENOMEM);
1320
0abd675e 1321 if (unlikely((err = inc_valid_node_count(sbi, dn->inode, !ofs))))
9c02740c 1322 goto fail;
0abd675e 1323
25cc5d3b 1324#ifdef CONFIG_F2FS_CHECK_FS
a9419b63 1325 err = f2fs_get_node_info(sbi, dn->nid, &new_ni, false);
7735730d
CY
1326 if (err) {
1327 dec_valid_node_count(sbi, dn->inode, !ofs);
1328 goto fail;
1329 }
141170b7
CY
1330 if (unlikely(new_ni.blk_addr != NULL_ADDR)) {
1331 err = -EFSCORRUPTED;
0fa4e57c 1332 dec_valid_node_count(sbi, dn->inode, !ofs);
141170b7 1333 set_sbi_flag(sbi, SBI_NEED_FSCK);
95fa90c9 1334 f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR);
141170b7
CY
1335 goto fail;
1336 }
25cc5d3b
JK
1337#endif
1338 new_ni.nid = dn->nid;
e05df3b1 1339 new_ni.ino = dn->inode->i_ino;
25cc5d3b
JK
1340 new_ni.blk_addr = NULL_ADDR;
1341 new_ni.flag = 0;
1342 new_ni.version = 0;
479f40c4 1343 set_node_addr(sbi, &new_ni, NEW_ADDR, false);
9c02740c 1344
bae0ee7a 1345 f2fs_wait_on_page_writeback(page, NODE, true, true);
9c02740c 1346 fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true);
c5667575 1347 set_cold_node(page, S_ISDIR(dn->inode->i_mode));
237c0790
JK
1348 if (!PageUptodate(page))
1349 SetPageUptodate(page);
12719ae1
JK
1350 if (set_page_dirty(page))
1351 dn->node_changed = true;
e05df3b1 1352
4bc8e9bc 1353 if (f2fs_has_xattr_block(ofs))
205b9822 1354 f2fs_i_xnid_write(dn->inode, dn->nid);
479bd73a 1355
e05df3b1
JK
1356 if (ofs == 0)
1357 inc_valid_inode_count(sbi);
e05df3b1 1358 return page;
e05df3b1 1359fail:
71e9fec5 1360 clear_node_page_dirty(page);
e05df3b1
JK
1361 f2fs_put_page(page, 1);
1362 return ERR_PTR(err);
1363}
1364
56ae674c
JK
1365/*
1366 * Caller should do after getting the following values.
1367 * 0: f2fs_put_page(page, 0)
86531d6b 1368 * LOCKED_PAGE or error: f2fs_put_page(page, 1)
56ae674c 1369 */
7649c873 1370static int read_node_page(struct page *page, blk_opf_t op_flags)
e05df3b1 1371{
2eaa98e5 1372 struct folio *folio = page_folio(page);
4081363f 1373 struct f2fs_sb_info *sbi = F2FS_P_SB(page);
e05df3b1 1374 struct node_info ni;
cf04e8eb 1375 struct f2fs_io_info fio = {
05ca3632 1376 .sbi = sbi,
cf04e8eb 1377 .type = NODE,
04d328de
MC
1378 .op = REQ_OP_READ,
1379 .op_flags = op_flags,
05ca3632 1380 .page = page,
4375a336 1381 .encrypted_page = NULL,
cf04e8eb 1382 };
7735730d 1383 int err;
e05df3b1 1384
2eaa98e5 1385 if (folio_test_uptodate(folio)) {
b42b179b 1386 if (!f2fs_inode_chksum_verify(sbi, page)) {
2eaa98e5 1387 folio_clear_uptodate(folio);
10f966bb 1388 return -EFSBADCRC;
b42b179b 1389 }
3bdad3c7 1390 return LOCKED_PAGE;
54c55c4e 1391 }
3bdad3c7 1392
2eaa98e5 1393 err = f2fs_get_node_info(sbi, folio->index, &ni, false);
7735730d
CY
1394 if (err)
1395 return err;
e05df3b1 1396
b7ec2061 1397 /* NEW_ADDR can be seen, after cp_error drops some dirty node pages */
e6ecb142 1398 if (unlikely(ni.blk_addr == NULL_ADDR || ni.blk_addr == NEW_ADDR)) {
2eaa98e5 1399 folio_clear_uptodate(folio);
e05df3b1 1400 return -ENOENT;
393ff91f
JK
1401 }
1402
7a9d7548 1403 fio.new_blkaddr = fio.old_blkaddr = ni.blk_addr;
8b83ac81
CY
1404
1405 err = f2fs_submit_page_bio(&fio);
1406
1407 if (!err)
34a23525 1408 f2fs_update_iostat(sbi, NULL, FS_NODE_READ_IO, F2FS_BLKSIZE);
8b83ac81
CY
1409
1410 return err;
e05df3b1
JK
1411}
1412
0a8165d7 1413/*
e05df3b1
JK
1414 * Readahead a node page
1415 */
4d57b86d 1416void f2fs_ra_node_page(struct f2fs_sb_info *sbi, nid_t nid)
e05df3b1 1417{
e05df3b1 1418 struct page *apage;
56ae674c 1419 int err;
e05df3b1 1420
e8458725
CY
1421 if (!nid)
1422 return;
4d57b86d 1423 if (f2fs_check_nid_range(sbi, nid))
a4f843bd 1424 return;
e8458725 1425
5ec2d99d 1426 apage = xa_load(&NODE_MAPPING(sbi)->i_pages, nid);
999270de 1427 if (apage)
393ff91f 1428 return;
e05df3b1 1429
300e129c 1430 apage = f2fs_grab_cache_page(NODE_MAPPING(sbi), nid, false);
e05df3b1
JK
1431 if (!apage)
1432 return;
1433
70246286 1434 err = read_node_page(apage, REQ_RAHEAD);
86531d6b 1435 f2fs_put_page(apage, err ? 1 : 0);
e05df3b1
JK
1436}
1437
17a0ee55 1438static struct page *__get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid,
0e022ea8 1439 struct page *parent, int start)
e05df3b1 1440{
e05df3b1 1441 struct page *page;
0e022ea8 1442 int err;
e05df3b1 1443
e05df3b1
JK
1444 if (!nid)
1445 return ERR_PTR(-ENOENT);
4d57b86d 1446 if (f2fs_check_nid_range(sbi, nid))
a4f843bd 1447 return ERR_PTR(-EINVAL);
afcb7ca0 1448repeat:
300e129c 1449 page = f2fs_grab_cache_page(NODE_MAPPING(sbi), nid, false);
e05df3b1
JK
1450 if (!page)
1451 return ERR_PTR(-ENOMEM);
1452
70fd7614 1453 err = read_node_page(page, 0);
86531d6b 1454 if (err < 0) {
a7b8618a 1455 goto out_put_err;
86531d6b 1456 } else if (err == LOCKED_PAGE) {
1f258ec1 1457 err = 0;
56ae674c 1458 goto page_hit;
86531d6b 1459 }
e05df3b1 1460
0e022ea8 1461 if (parent)
4d57b86d 1462 f2fs_ra_node_pages(parent, start + 1, MAX_RA_NODE);
c718379b 1463
e05df3b1 1464 lock_page(page);
0e022ea8 1465
4ef51a8f 1466 if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
afcb7ca0
JK
1467 f2fs_put_page(page, 1);
1468 goto repeat;
1469 }
1563ac75 1470
1f258ec1
CY
1471 if (unlikely(!PageUptodate(page))) {
1472 err = -EIO;
1563ac75 1473 goto out_err;
1f258ec1 1474 }
704956ec
CY
1475
1476 if (!f2fs_inode_chksum_verify(sbi, page)) {
10f966bb 1477 err = -EFSBADCRC;
704956ec
CY
1478 goto out_err;
1479 }
e0f56cb4 1480page_hit:
a7b8618a
JK
1481 if (likely(nid == nid_of_node(page)))
1482 return page;
1483
1484 f2fs_warn(sbi, "inconsistent node block, nid:%lu, node_footer[nid:%u,ino:%u,ofs:%u,cpver:%llu,blkaddr:%u]",
dcbb4c10
JP
1485 nid, nid_of_node(page), ino_of_node(page),
1486 ofs_of_node(page), cpver_of_node(page),
1487 next_blkaddr_of_node(page));
a7b8618a 1488 set_sbi_flag(sbi, SBI_NEED_FSCK);
9b4c8dd9
ZN
1489 f2fs_handle_error(sbi, ERROR_INCONSISTENT_FOOTER);
1490 err = -EFSCORRUPTED;
0c9df7fb 1491out_err:
a7b8618a
JK
1492 ClearPageUptodate(page);
1493out_put_err:
82c7863e
JK
1494 /* ENOENT comes from read_node_page which is not an error. */
1495 if (err != -ENOENT)
763a0dc7 1496 f2fs_handle_page_eio(sbi, page_folio(page), NODE);
a7b8618a
JK
1497 f2fs_put_page(page, 1);
1498 return ERR_PTR(err);
e05df3b1
JK
1499}
1500
4d57b86d 1501struct page *f2fs_get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid)
0e022ea8
CY
1502{
1503 return __get_node_page(sbi, nid, NULL, 0);
1504}
1505
4d57b86d 1506struct page *f2fs_get_node_page_ra(struct page *parent, int start)
0e022ea8
CY
1507{
1508 struct f2fs_sb_info *sbi = F2FS_P_SB(parent);
1509 nid_t nid = get_nid(parent, start, false);
1510
1511 return __get_node_page(sbi, nid, parent, start);
1512}
1513
2049d4fc
JK
1514static void flush_inline_data(struct f2fs_sb_info *sbi, nid_t ino)
1515{
1516 struct inode *inode;
1517 struct page *page;
0f3311a8 1518 int ret;
2049d4fc
JK
1519
1520 /* should flush inline_data before evict_inode */
1521 inode = ilookup(sbi->sb, ino);
1522 if (!inode)
1523 return;
1524
01eccef7
CY
1525 page = f2fs_pagecache_get_page(inode->i_mapping, 0,
1526 FGP_LOCK|FGP_NOWAIT, 0);
2049d4fc
JK
1527 if (!page)
1528 goto iput_out;
1529
1530 if (!PageUptodate(page))
1531 goto page_out;
1532
1533 if (!PageDirty(page))
1534 goto page_out;
1535
1536 if (!clear_page_dirty_for_io(page))
1537 goto page_out;
1538
b0846621 1539 ret = f2fs_write_inline_data(inode, page_folio(page));
0f3311a8 1540 inode_dec_dirty_pages(inode);
4d57b86d 1541 f2fs_remove_dirty_inode(inode);
0f3311a8 1542 if (ret)
2049d4fc
JK
1543 set_page_dirty(page);
1544page_out:
4a6de50d 1545 f2fs_put_page(page, 1);
2049d4fc
JK
1546iput_out:
1547 iput(inode);
1548}
1549
608514de
JK
1550static struct page *last_fsync_dnode(struct f2fs_sb_info *sbi, nid_t ino)
1551{
028a63a6 1552 pgoff_t index;
4f4a4f0f 1553 struct folio_batch fbatch;
608514de 1554 struct page *last_page = NULL;
4f4a4f0f 1555 int nr_folios;
608514de 1556
4f4a4f0f 1557 folio_batch_init(&fbatch);
608514de 1558 index = 0;
028a63a6 1559
4f4a4f0f
VMO
1560 while ((nr_folios = filemap_get_folios_tag(NODE_MAPPING(sbi), &index,
1561 (pgoff_t)-1, PAGECACHE_TAG_DIRTY,
1562 &fbatch))) {
028a63a6 1563 int i;
608514de 1564
4f4a4f0f
VMO
1565 for (i = 0; i < nr_folios; i++) {
1566 struct page *page = &fbatch.folios[i]->page;
608514de
JK
1567
1568 if (unlikely(f2fs_cp_error(sbi))) {
1569 f2fs_put_page(last_page, 0);
4f4a4f0f 1570 folio_batch_release(&fbatch);
608514de
JK
1571 return ERR_PTR(-EIO);
1572 }
1573
1574 if (!IS_DNODE(page) || !is_cold_node(page))
1575 continue;
1576 if (ino_of_node(page) != ino)
1577 continue;
1578
1579 lock_page(page);
1580
1581 if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
1582continue_unlock:
1583 unlock_page(page);
1584 continue;
1585 }
1586 if (ino_of_node(page) != ino)
1587 goto continue_unlock;
1588
1589 if (!PageDirty(page)) {
1590 /* someone wrote it for us */
1591 goto continue_unlock;
1592 }
1593
1594 if (last_page)
1595 f2fs_put_page(last_page, 0);
1596
1597 get_page(page);
1598 last_page = page;
1599 unlock_page(page);
1600 }
4f4a4f0f 1601 folio_batch_release(&fbatch);
608514de
JK
1602 cond_resched();
1603 }
1604 return last_page;
1605}
1606
d68f735b 1607static int __write_node_page(struct page *page, bool atomic, bool *submitted,
b0af6d49 1608 struct writeback_control *wbc, bool do_balance,
50fa53ec 1609 enum iostat_type io_type, unsigned int *seq_id)
faa24895
JK
1610{
1611 struct f2fs_sb_info *sbi = F2FS_P_SB(page);
4deccfbd 1612 struct folio *folio = page_folio(page);
faa24895
JK
1613 nid_t nid;
1614 struct node_info ni;
1615 struct f2fs_io_info fio = {
1616 .sbi = sbi,
39d787be 1617 .ino = ino_of_node(page),
faa24895
JK
1618 .type = NODE,
1619 .op = REQ_OP_WRITE,
1620 .op_flags = wbc_to_write_flags(wbc),
1621 .page = page,
1622 .encrypted_page = NULL,
2eae077e 1623 .submitted = 0,
b0af6d49 1624 .io_type = io_type,
578c6478 1625 .io_wbc = wbc,
faa24895 1626 };
50fa53ec 1627 unsigned int seq;
faa24895 1628
4deccfbd 1629 trace_f2fs_writepage(folio, NODE);
faa24895 1630
6d7c865c 1631 if (unlikely(f2fs_cp_error(sbi))) {
b62e71be
CY
1632 /* keep node pages in remount-ro mode */
1633 if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_READONLY)
1634 goto redirty_out;
4deccfbd 1635 folio_clear_uptodate(folio);
28607bf3 1636 dec_page_count(sbi, F2FS_DIRTY_NODES);
4deccfbd 1637 folio_unlock(folio);
28607bf3 1638 return 0;
6d7c865c 1639 }
db198ae0 1640
faa24895
JK
1641 if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
1642 goto redirty_out;
faa24895 1643
100c0655
JK
1644 if (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
1645 wbc->sync_mode == WB_SYNC_NONE &&
fd8c8caf
CY
1646 IS_DNODE(page) && is_cold_node(page))
1647 goto redirty_out;
1648
faa24895
JK
1649 /* get old block addr of this node page */
1650 nid = nid_of_node(page);
4deccfbd 1651 f2fs_bug_on(sbi, folio->index != nid);
faa24895 1652
a9419b63 1653 if (f2fs_get_node_info(sbi, nid, &ni, !do_balance))
7735730d
CY
1654 goto redirty_out;
1655
faa24895 1656 if (wbc->for_reclaim) {
e4544b63 1657 if (!f2fs_down_read_trylock(&sbi->node_write))
faa24895
JK
1658 goto redirty_out;
1659 } else {
e4544b63 1660 f2fs_down_read(&sbi->node_write);
faa24895
JK
1661 }
1662
faa24895
JK
1663 /* This page is already truncated */
1664 if (unlikely(ni.blk_addr == NULL_ADDR)) {
4deccfbd 1665 folio_clear_uptodate(folio);
faa24895 1666 dec_page_count(sbi, F2FS_DIRTY_NODES);
e4544b63 1667 f2fs_up_read(&sbi->node_write);
4deccfbd 1668 folio_unlock(folio);
faa24895
JK
1669 return 0;
1670 }
1671
c9b60788 1672 if (__is_valid_data_blkaddr(ni.blk_addr) &&
93770ab7
CY
1673 !f2fs_is_valid_blkaddr(sbi, ni.blk_addr,
1674 DATA_GENERIC_ENHANCE)) {
e4544b63 1675 f2fs_up_read(&sbi->node_write);
c9b60788 1676 goto redirty_out;
89d13c38 1677 }
c9b60788 1678
b722ff8a 1679 if (atomic && !test_opt(sbi, NOBARRIER))
e7c75ab0
JK
1680 fio.op_flags |= REQ_PREFLUSH | REQ_FUA;
1681
dc5a9412 1682 /* should add to global list before clearing PAGECACHE status */
50fa53ec
CY
1683 if (f2fs_in_warm_node_list(sbi, page)) {
1684 seq = f2fs_add_fsync_node_entry(sbi, page);
1685 if (seq_id)
1686 *seq_id = seq;
1687 }
1688
4deccfbd 1689 folio_start_writeback(folio);
dc5a9412 1690
faa24895 1691 fio.old_blkaddr = ni.blk_addr;
4d57b86d 1692 f2fs_do_write_node_page(nid, &fio);
faa24895
JK
1693 set_node_addr(sbi, &ni, fio.new_blkaddr, is_fsync_dnode(page));
1694 dec_page_count(sbi, F2FS_DIRTY_NODES);
e4544b63 1695 f2fs_up_read(&sbi->node_write);
faa24895 1696
d68f735b 1697 if (wbc->for_reclaim) {
bab475c5 1698 f2fs_submit_merged_write_cond(sbi, NULL, page, 0, NODE);
d68f735b
JK
1699 submitted = NULL;
1700 }
faa24895 1701
4deccfbd 1702 folio_unlock(folio);
faa24895 1703
d68f735b 1704 if (unlikely(f2fs_cp_error(sbi))) {
b9109b0e 1705 f2fs_submit_merged_write(sbi, NODE);
d68f735b
JK
1706 submitted = NULL;
1707 }
1708 if (submitted)
1709 *submitted = fio.submitted;
faa24895 1710
401db79f
YS
1711 if (do_balance)
1712 f2fs_balance_fs(sbi, false);
faa24895
JK
1713 return 0;
1714
1715redirty_out:
4deccfbd 1716 folio_redirty_for_writepage(wbc, folio);
faa24895
JK
1717 return AOP_WRITEPAGE_ACTIVATE;
1718}
1719
48018b4c 1720int f2fs_move_node_page(struct page *node_page, int gc_type)
f15194fc 1721{
48018b4c
CY
1722 int err = 0;
1723
f15194fc
YH
1724 if (gc_type == FG_GC) {
1725 struct writeback_control wbc = {
1726 .sync_mode = WB_SYNC_ALL,
1727 .nr_to_write = 1,
1728 .for_reclaim = 0,
1729 };
1730
bae0ee7a 1731 f2fs_wait_on_page_writeback(node_page, NODE, true, true);
8d64d365
CY
1732
1733 set_page_dirty(node_page);
1734
48018b4c
CY
1735 if (!clear_page_dirty_for_io(node_page)) {
1736 err = -EAGAIN;
f15194fc 1737 goto out_page;
48018b4c 1738 }
f15194fc
YH
1739
1740 if (__write_node_page(node_page, false, NULL,
48018b4c
CY
1741 &wbc, false, FS_GC_NODE_IO, NULL)) {
1742 err = -EAGAIN;
f15194fc 1743 unlock_page(node_page);
48018b4c 1744 }
f15194fc
YH
1745 goto release_page;
1746 } else {
1747 /* set page dirty and write it */
16778aea 1748 if (!folio_test_writeback(page_folio(node_page)))
f15194fc
YH
1749 set_page_dirty(node_page);
1750 }
1751out_page:
1752 unlock_page(node_page);
1753release_page:
1754 f2fs_put_page(node_page, 0);
48018b4c 1755 return err;
f15194fc
YH
1756}
1757
faa24895
JK
1758static int f2fs_write_node_page(struct page *page,
1759 struct writeback_control *wbc)
1760{
50fa53ec
CY
1761 return __write_node_page(page, false, NULL, wbc, false,
1762 FS_NODE_IO, NULL);
faa24895
JK
1763}
1764
4d57b86d 1765int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
50fa53ec
CY
1766 struct writeback_control *wbc, bool atomic,
1767 unsigned int *seq_id)
e05df3b1 1768{
028a63a6 1769 pgoff_t index;
e6e46e1e 1770 struct folio_batch fbatch;
c267ec15 1771 int ret = 0;
608514de
JK
1772 struct page *last_page = NULL;
1773 bool marked = false;
26de9b11 1774 nid_t ino = inode->i_ino;
e6e46e1e 1775 int nr_folios;
bab475c5 1776 int nwritten = 0;
52681375 1777
608514de
JK
1778 if (atomic) {
1779 last_page = last_fsync_dnode(sbi, ino);
1780 if (IS_ERR_OR_NULL(last_page))
1781 return PTR_ERR_OR_ZERO(last_page);
1782 }
1783retry:
e6e46e1e 1784 folio_batch_init(&fbatch);
52681375 1785 index = 0;
028a63a6 1786
e6e46e1e
VMO
1787 while ((nr_folios = filemap_get_folios_tag(NODE_MAPPING(sbi), &index,
1788 (pgoff_t)-1, PAGECACHE_TAG_DIRTY,
1789 &fbatch))) {
028a63a6 1790 int i;
52681375 1791
e6e46e1e
VMO
1792 for (i = 0; i < nr_folios; i++) {
1793 struct page *page = &fbatch.folios[i]->page;
d68f735b 1794 bool submitted = false;
52681375
JK
1795
1796 if (unlikely(f2fs_cp_error(sbi))) {
608514de 1797 f2fs_put_page(last_page, 0);
e6e46e1e 1798 folio_batch_release(&fbatch);
9de69279
CY
1799 ret = -EIO;
1800 goto out;
52681375
JK
1801 }
1802
1803 if (!IS_DNODE(page) || !is_cold_node(page))
1804 continue;
1805 if (ino_of_node(page) != ino)
1806 continue;
1807
1808 lock_page(page);
1809
1810 if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
1811continue_unlock:
1812 unlock_page(page);
1813 continue;
1814 }
1815 if (ino_of_node(page) != ino)
1816 goto continue_unlock;
1817
608514de 1818 if (!PageDirty(page) && page != last_page) {
52681375
JK
1819 /* someone wrote it for us */
1820 goto continue_unlock;
1821 }
1822
bae0ee7a 1823 f2fs_wait_on_page_writeback(page, NODE, true, true);
52681375 1824
d29fd172
JK
1825 set_fsync_mark(page, 0);
1826 set_dentry_mark(page, 0);
1827
608514de
JK
1828 if (!atomic || page == last_page) {
1829 set_fsync_mark(page, 1);
47c8ebcc 1830 percpu_counter_inc(&sbi->rf_node_block_count);
26de9b11
JK
1831 if (IS_INODE(page)) {
1832 if (is_inode_flag_set(inode,
1833 FI_DIRTY_INODE))
4d57b86d 1834 f2fs_update_inode(inode, page);
608514de 1835 set_dentry_mark(page,
4d57b86d 1836 f2fs_need_dentry_mark(sbi, ino));
26de9b11 1837 }
a87aff1d 1838 /* may be written by other thread */
608514de
JK
1839 if (!PageDirty(page))
1840 set_page_dirty(page);
1841 }
1842
1843 if (!clear_page_dirty_for_io(page))
1844 goto continue_unlock;
52681375 1845
e7c75ab0 1846 ret = __write_node_page(page, atomic &&
d68f735b 1847 page == last_page,
b0af6d49 1848 &submitted, wbc, true,
50fa53ec 1849 FS_NODE_IO, seq_id);
c267ec15 1850 if (ret) {
52681375 1851 unlock_page(page);
608514de
JK
1852 f2fs_put_page(last_page, 0);
1853 break;
d68f735b 1854 } else if (submitted) {
bab475c5 1855 nwritten++;
608514de 1856 }
3f5f4959 1857
608514de
JK
1858 if (page == last_page) {
1859 f2fs_put_page(page, 0);
1860 marked = true;
52681375 1861 break;
c267ec15 1862 }
52681375 1863 }
e6e46e1e 1864 folio_batch_release(&fbatch);
52681375
JK
1865 cond_resched();
1866
608514de 1867 if (ret || marked)
52681375
JK
1868 break;
1869 }
608514de 1870 if (!ret && atomic && !marked) {
dcbb4c10 1871 f2fs_debug(sbi, "Retry to write fsync mark: ino=%u, idx=%lx",
5697e94d 1872 ino, page_folio(last_page)->index);
608514de 1873 lock_page(last_page);
bae0ee7a 1874 f2fs_wait_on_page_writeback(last_page, NODE, true, true);
608514de
JK
1875 set_page_dirty(last_page);
1876 unlock_page(last_page);
1877 goto retry;
1878 }
9de69279 1879out:
bab475c5
CY
1880 if (nwritten)
1881 f2fs_submit_merged_write_cond(sbi, NULL, NULL, ino, NODE);
5f029c04 1882 return ret ? -EIO : 0;
52681375
JK
1883}
1884
052a82d8
CY
1885static int f2fs_match_ino(struct inode *inode, unsigned long ino, void *data)
1886{
1887 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1888 bool clean;
1889
1890 if (inode->i_ino != ino)
1891 return 0;
1892
1893 if (!is_inode_flag_set(inode, FI_DIRTY_INODE))
1894 return 0;
1895
1896 spin_lock(&sbi->inode_lock[DIRTY_META]);
1897 clean = list_empty(&F2FS_I(inode)->gdirty_list);
1898 spin_unlock(&sbi->inode_lock[DIRTY_META]);
1899
1900 if (clean)
1901 return 0;
1902
1903 inode = igrab(inode);
1904 if (!inode)
1905 return 0;
1906 return 1;
1907}
1908
1909static bool flush_dirty_inode(struct page *page)
1910{
1911 struct f2fs_sb_info *sbi = F2FS_P_SB(page);
1912 struct inode *inode;
1913 nid_t ino = ino_of_node(page);
1914
1915 inode = find_inode_nowait(sbi->sb, ino, f2fs_match_ino, NULL);
1916 if (!inode)
1917 return false;
1918
1919 f2fs_update_inode(inode, page);
1920 unlock_page(page);
1921
1922 iput(inode);
1923 return true;
1924}
1925
68e79baf 1926void f2fs_flush_inline_data(struct f2fs_sb_info *sbi)
34c061ad
SL
1927{
1928 pgoff_t index = 0;
a40a4ad1
VMO
1929 struct folio_batch fbatch;
1930 int nr_folios;
34c061ad 1931
a40a4ad1 1932 folio_batch_init(&fbatch);
34c061ad 1933
a40a4ad1
VMO
1934 while ((nr_folios = filemap_get_folios_tag(NODE_MAPPING(sbi), &index,
1935 (pgoff_t)-1, PAGECACHE_TAG_DIRTY,
1936 &fbatch))) {
34c061ad
SL
1937 int i;
1938
a40a4ad1
VMO
1939 for (i = 0; i < nr_folios; i++) {
1940 struct page *page = &fbatch.folios[i]->page;
34c061ad 1941
8e9c1a34 1942 if (!IS_INODE(page))
34c061ad
SL
1943 continue;
1944
1945 lock_page(page);
1946
1947 if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
1948continue_unlock:
1949 unlock_page(page);
1950 continue;
1951 }
1952
1953 if (!PageDirty(page)) {
1954 /* someone wrote it for us */
1955 goto continue_unlock;
1956 }
1957
1958 /* flush inline_data, if it's async context. */
b763f3be
CY
1959 if (page_private_inline(page)) {
1960 clear_page_private_inline(page);
34c061ad
SL
1961 unlock_page(page);
1962 flush_inline_data(sbi, ino_of_node(page));
1963 continue;
1964 }
1965 unlock_page(page);
1966 }
a40a4ad1 1967 folio_batch_release(&fbatch);
34c061ad
SL
1968 cond_resched();
1969 }
34c061ad
SL
1970}
1971
4d57b86d
CY
1972int f2fs_sync_node_pages(struct f2fs_sb_info *sbi,
1973 struct writeback_control *wbc,
b0af6d49 1974 bool do_balance, enum iostat_type io_type)
52681375 1975{
028a63a6 1976 pgoff_t index;
7525486a 1977 struct folio_batch fbatch;
52681375 1978 int step = 0;
12bb0a8f 1979 int nwritten = 0;
3f5f4959 1980 int ret = 0;
7525486a 1981 int nr_folios, done = 0;
e05df3b1 1982
7525486a 1983 folio_batch_init(&fbatch);
e05df3b1
JK
1984
1985next_step:
1986 index = 0;
028a63a6 1987
7525486a
VMO
1988 while (!done && (nr_folios = filemap_get_folios_tag(NODE_MAPPING(sbi),
1989 &index, (pgoff_t)-1, PAGECACHE_TAG_DIRTY,
1990 &fbatch))) {
028a63a6 1991 int i;
e05df3b1 1992
7525486a
VMO
1993 for (i = 0; i < nr_folios; i++) {
1994 struct page *page = &fbatch.folios[i]->page;
d68f735b 1995 bool submitted = false;
e05df3b1 1996
c29fd0c0
CY
1997 /* give a priority to WB_SYNC threads */
1998 if (atomic_read(&sbi->wb_sync_req[NODE]) &&
1999 wbc->sync_mode == WB_SYNC_NONE) {
2000 done = 1;
2001 break;
2002 }
2003
e05df3b1
JK
2004 /*
2005 * flushing sequence with step:
2006 * 0. indirect nodes
2007 * 1. dentry dnodes
2008 * 2. file dnodes
2009 */
2010 if (step == 0 && IS_DNODE(page))
2011 continue;
2012 if (step == 1 && (!IS_DNODE(page) ||
2013 is_cold_node(page)))
2014 continue;
2015 if (step == 2 && (!IS_DNODE(page) ||
2016 !is_cold_node(page)))
2017 continue;
9a4cbc9e 2018lock_node:
4b270a8c
CY
2019 if (wbc->sync_mode == WB_SYNC_ALL)
2020 lock_page(page);
2021 else if (!trylock_page(page))
e05df3b1
JK
2022 continue;
2023
4ef51a8f 2024 if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
e05df3b1
JK
2025continue_unlock:
2026 unlock_page(page);
2027 continue;
2028 }
e05df3b1
JK
2029
2030 if (!PageDirty(page)) {
2031 /* someone wrote it for us */
2032 goto continue_unlock;
2033 }
2034
b0f3b87f
JK
2035 /* flush inline_data/inode, if it's async context. */
2036 if (!do_balance)
2037 goto write_node;
2038
2039 /* flush inline_data */
b763f3be
CY
2040 if (page_private_inline(page)) {
2041 clear_page_private_inline(page);
2049d4fc
JK
2042 unlock_page(page);
2043 flush_inline_data(sbi, ino_of_node(page));
9a4cbc9e 2044 goto lock_node;
052a82d8
CY
2045 }
2046
2047 /* flush dirty inode */
7859e97f
JK
2048 if (IS_INODE(page) && flush_dirty_inode(page))
2049 goto lock_node;
b0f3b87f 2050write_node:
bae0ee7a 2051 f2fs_wait_on_page_writeback(page, NODE, true, true);
fa3d2bdf 2052
e05df3b1
JK
2053 if (!clear_page_dirty_for_io(page))
2054 goto continue_unlock;
2055
52681375
JK
2056 set_fsync_mark(page, 0);
2057 set_dentry_mark(page, 0);
52746519 2058
401db79f 2059 ret = __write_node_page(page, false, &submitted,
50fa53ec 2060 wbc, do_balance, io_type, NULL);
d68f735b 2061 if (ret)
52746519 2062 unlock_page(page);
d68f735b 2063 else if (submitted)
3f5f4959 2064 nwritten++;
e05df3b1
JK
2065
2066 if (--wbc->nr_to_write == 0)
2067 break;
2068 }
7525486a 2069 folio_batch_release(&fbatch);
e05df3b1
JK
2070 cond_resched();
2071
2072 if (wbc->nr_to_write == 0) {
2073 step = 2;
2074 break;
2075 }
2076 }
2077
2078 if (step < 2) {
100c0655
JK
2079 if (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
2080 wbc->sync_mode == WB_SYNC_NONE && step == 1)
fd8c8caf 2081 goto out;
e05df3b1
JK
2082 step++;
2083 goto next_step;
2084 }
fd8c8caf 2085out:
3f5f4959 2086 if (nwritten)
b9109b0e 2087 f2fs_submit_merged_write(sbi, NODE);
db198ae0
CY
2088
2089 if (unlikely(f2fs_cp_error(sbi)))
2090 return -EIO;
3f5f4959 2091 return ret;
e05df3b1
JK
2092}
2093
50fa53ec
CY
2094int f2fs_wait_on_node_pages_writeback(struct f2fs_sb_info *sbi,
2095 unsigned int seq_id)
cfe58f9d 2096{
50fa53ec
CY
2097 struct fsync_node_entry *fn;
2098 struct page *page;
2099 struct list_head *head = &sbi->fsync_node_list;
2100 unsigned long flags;
2101 unsigned int cur_seq_id = 0;
cfe58f9d 2102
50fa53ec
CY
2103 while (seq_id && cur_seq_id < seq_id) {
2104 spin_lock_irqsave(&sbi->fsync_node_lock, flags);
2105 if (list_empty(head)) {
2106 spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
2107 break;
2108 }
2109 fn = list_first_entry(head, struct fsync_node_entry, list);
2110 if (fn->seq_id > seq_id) {
2111 spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
2112 break;
2113 }
2114 cur_seq_id = fn->seq_id;
2115 page = fn->page;
2116 get_page(page);
2117 spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
4ef51a8f 2118
bae0ee7a 2119 f2fs_wait_on_page_writeback(page, NODE, true, false);
cfe58f9d 2120
50fa53ec 2121 put_page(page);
cfe58f9d
JK
2122 }
2123
08c3eab5 2124 return filemap_check_errors(NODE_MAPPING(sbi));
cfe58f9d
JK
2125}
2126
e05df3b1
JK
2127static int f2fs_write_node_pages(struct address_space *mapping,
2128 struct writeback_control *wbc)
2129{
4081363f 2130 struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
9dfa1baf 2131 struct blk_plug plug;
50c8cdb3 2132 long diff;
e05df3b1 2133
0771fcc7
CY
2134 if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
2135 goto skip_write;
2136
4660f9c0 2137 /* balancing f2fs's metadata in background */
7bcd0cfa 2138 f2fs_balance_fs_bg(sbi, true);
e05df3b1 2139
a7fdffbd 2140 /* collect a number of dirty node pages and write together */
812a9597
JK
2141 if (wbc->sync_mode != WB_SYNC_ALL &&
2142 get_pages(sbi, F2FS_DIRTY_NODES) <
2143 nr_pages_to_skip(sbi, NODE))
d3baf95d 2144 goto skip_write;
a7fdffbd 2145
c29fd0c0
CY
2146 if (wbc->sync_mode == WB_SYNC_ALL)
2147 atomic_inc(&sbi->wb_sync_req[NODE]);
34415099
CY
2148 else if (atomic_read(&sbi->wb_sync_req[NODE])) {
2149 /* to avoid potential deadlock */
2150 if (current->plug)
2151 blk_finish_plug(current->plug);
c29fd0c0 2152 goto skip_write;
34415099 2153 }
c29fd0c0 2154
d31c7c3f
YH
2155 trace_f2fs_writepages(mapping->host, wbc, NODE);
2156
50c8cdb3 2157 diff = nr_pages_to_write(sbi, NODE, wbc);
9dfa1baf 2158 blk_start_plug(&plug);
4d57b86d 2159 f2fs_sync_node_pages(sbi, wbc, true, FS_NODE_IO);
9dfa1baf 2160 blk_finish_plug(&plug);
50c8cdb3 2161 wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff);
c29fd0c0
CY
2162
2163 if (wbc->sync_mode == WB_SYNC_ALL)
2164 atomic_dec(&sbi->wb_sync_req[NODE]);
e05df3b1 2165 return 0;
d3baf95d
JK
2166
2167skip_write:
2168 wbc->pages_skipped += get_pages(sbi, F2FS_DIRTY_NODES);
d31c7c3f 2169 trace_f2fs_writepages(mapping->host, wbc, NODE);
d3baf95d 2170 return 0;
e05df3b1
JK
2171}
2172
cbc975b1
MWO
2173static bool f2fs_dirty_node_folio(struct address_space *mapping,
2174 struct folio *folio)
e05df3b1 2175{
92f750d8 2176 trace_f2fs_set_page_dirty(folio, NODE);
26c6b887 2177
cbc975b1
MWO
2178 if (!folio_test_uptodate(folio))
2179 folio_mark_uptodate(folio);
54c55c4e 2180#ifdef CONFIG_F2FS_CHECK_FS
cbc975b1 2181 if (IS_INODE(&folio->page))
29c87793 2182 f2fs_inode_chksum_set(F2FS_M_SB(mapping), &folio->page);
54c55c4e 2183#endif
9b7eadd9 2184 if (filemap_dirty_folio(mapping, folio)) {
29c87793 2185 inc_page_count(F2FS_M_SB(mapping), F2FS_DIRTY_NODES);
cbc975b1
MWO
2186 set_page_private_reference(&folio->page);
2187 return true;
e05df3b1 2188 }
cbc975b1 2189 return false;
e05df3b1
JK
2190}
2191
0a8165d7 2192/*
e05df3b1
JK
2193 * Structure of the f2fs node operations
2194 */
2195const struct address_space_operations f2fs_node_aops = {
2196 .writepage = f2fs_write_node_page,
2197 .writepages = f2fs_write_node_pages,
cbc975b1 2198 .dirty_folio = f2fs_dirty_node_folio,
91503996 2199 .invalidate_folio = f2fs_invalidate_folio,
c26cd045 2200 .release_folio = f2fs_release_folio,
1d5b9bd6 2201 .migrate_folio = filemap_migrate_folio,
e05df3b1
JK
2202};
2203
8a7ed66a
JK
2204static struct free_nid *__lookup_free_nid_list(struct f2fs_nm_info *nm_i,
2205 nid_t n)
e05df3b1 2206{
8a7ed66a 2207 return radix_tree_lookup(&nm_i->free_nid_root, n);
e05df3b1
JK
2208}
2209
9a4ffdf5 2210static int __insert_free_nid(struct f2fs_sb_info *sbi,
b815bdc7 2211 struct free_nid *i)
e05df3b1 2212{
b8559dc2 2213 struct f2fs_nm_info *nm_i = NM_I(sbi);
a0761f63 2214 int err = radix_tree_insert(&nm_i->free_nid_root, i->nid, i);
5f029c04 2215
a0761f63
FL
2216 if (err)
2217 return err;
eb0aa4b8 2218
b815bdc7
LS
2219 nm_i->nid_cnt[FREE_NID]++;
2220 list_add_tail(&i->list, &nm_i->free_nid_list);
eb0aa4b8 2221 return 0;
b8559dc2
CY
2222}
2223
9a4ffdf5 2224static void __remove_free_nid(struct f2fs_sb_info *sbi,
a0761f63 2225 struct free_nid *i, enum nid_state state)
b8559dc2
CY
2226{
2227 struct f2fs_nm_info *nm_i = NM_I(sbi);
2228
9a4ffdf5
CY
2229 f2fs_bug_on(sbi, state != i->state);
2230 nm_i->nid_cnt[state]--;
2231 if (state == FREE_NID)
2232 list_del(&i->list);
a0761f63
FL
2233 radix_tree_delete(&nm_i->free_nid_root, i->nid);
2234}
2235
2236static void __move_free_nid(struct f2fs_sb_info *sbi, struct free_nid *i,
2237 enum nid_state org_state, enum nid_state dst_state)
b8559dc2
CY
2238{
2239 struct f2fs_nm_info *nm_i = NM_I(sbi);
2240
a0761f63
FL
2241 f2fs_bug_on(sbi, org_state != i->state);
2242 i->state = dst_state;
2243 nm_i->nid_cnt[org_state]--;
2244 nm_i->nid_cnt[dst_state]++;
2245
2246 switch (dst_state) {
2247 case PREALLOC_NID:
2248 list_del(&i->list);
2249 break;
2250 case FREE_NID:
2251 list_add_tail(&i->list, &nm_i->free_nid_list);
2252 break;
2253 default:
2254 BUG_ON(1);
2255 }
e05df3b1
JK
2256}
2257
94c821fb
CY
2258bool f2fs_nat_bitmap_enabled(struct f2fs_sb_info *sbi)
2259{
2260 struct f2fs_nm_info *nm_i = NM_I(sbi);
2261 unsigned int i;
2262 bool ret = true;
2263
e4544b63 2264 f2fs_down_read(&nm_i->nat_tree_lock);
94c821fb
CY
2265 for (i = 0; i < nm_i->nat_blocks; i++) {
2266 if (!test_bit_le(i, nm_i->nat_block_bitmap)) {
2267 ret = false;
2268 break;
2269 }
2270 }
e4544b63 2271 f2fs_up_read(&nm_i->nat_tree_lock);
94c821fb
CY
2272
2273 return ret;
2274}
2275
5921aaa1
L
2276static void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid,
2277 bool set, bool build)
2278{
2279 struct f2fs_nm_info *nm_i = NM_I(sbi);
2280 unsigned int nat_ofs = NAT_BLOCK_OFFSET(nid);
2281 unsigned int nid_ofs = nid - START_NID(nid);
2282
2283 if (!test_bit_le(nat_ofs, nm_i->nat_block_bitmap))
2284 return;
2285
2286 if (set) {
2287 if (test_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]))
2288 return;
2289 __set_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
2290 nm_i->free_nid_count[nat_ofs]++;
2291 } else {
2292 if (!test_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]))
2293 return;
2294 __clear_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
2295 if (!build)
2296 nm_i->free_nid_count[nat_ofs]--;
2297 }
2298}
2299
4ac91242 2300/* return if the nid is recognized as free */
5921aaa1
L
2301static bool add_free_nid(struct f2fs_sb_info *sbi,
2302 nid_t nid, bool build, bool update)
e05df3b1 2303{
6fb03f3a 2304 struct f2fs_nm_info *nm_i = NM_I(sbi);
30a61ddf 2305 struct free_nid *i, *e;
59bbd474 2306 struct nat_entry *ne;
30a61ddf
CY
2307 int err = -EINVAL;
2308 bool ret = false;
9198aceb
JK
2309
2310 /* 0 nid should not be used */
cfb271d4 2311 if (unlikely(nid == 0))
4ac91242 2312 return false;
59bbd474 2313
626bcf2b
CY
2314 if (unlikely(f2fs_check_nid_range(sbi, nid)))
2315 return false;
2316
32410577 2317 i = f2fs_kmem_cache_alloc(free_nid_slab, GFP_NOFS, true, NULL);
e05df3b1 2318 i->nid = nid;
9a4ffdf5 2319 i->state = FREE_NID;
e05df3b1 2320
5921aaa1 2321 radix_tree_preload(GFP_NOFS | __GFP_NOFAIL);
769ec6e5 2322
b8559dc2 2323 spin_lock(&nm_i->nid_list_lock);
30a61ddf
CY
2324
2325 if (build) {
2326 /*
2327 * Thread A Thread B
2328 * - f2fs_create
2329 * - f2fs_new_inode
4d57b86d 2330 * - f2fs_alloc_nid
9a4ffdf5 2331 * - __insert_nid_to_list(PREALLOC_NID)
30a61ddf 2332 * - f2fs_balance_fs_bg
4d57b86d
CY
2333 * - f2fs_build_free_nids
2334 * - __f2fs_build_free_nids
30a61ddf
CY
2335 * - scan_nat_page
2336 * - add_free_nid
2337 * - __lookup_nat_cache
2338 * - f2fs_add_link
4d57b86d
CY
2339 * - f2fs_init_inode_metadata
2340 * - f2fs_new_inode_page
2341 * - f2fs_new_node_page
30a61ddf 2342 * - set_node_addr
4d57b86d 2343 * - f2fs_alloc_nid_done
9a4ffdf5
CY
2344 * - __remove_nid_from_list(PREALLOC_NID)
2345 * - __insert_nid_to_list(FREE_NID)
30a61ddf
CY
2346 */
2347 ne = __lookup_nat_cache(nm_i, nid);
2348 if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) ||
2349 nat_get_blkaddr(ne) != NULL_ADDR))
2350 goto err_out;
2351
2352 e = __lookup_free_nid_list(nm_i, nid);
2353 if (e) {
9a4ffdf5 2354 if (e->state == FREE_NID)
30a61ddf
CY
2355 ret = true;
2356 goto err_out;
2357 }
2358 }
2359 ret = true;
b815bdc7 2360 err = __insert_free_nid(sbi, i);
30a61ddf 2361err_out:
5921aaa1
L
2362 if (update) {
2363 update_free_nid_bitmap(sbi, nid, ret, build);
2364 if (!build)
2365 nm_i->available_nids++;
2366 }
eb0aa4b8
JK
2367 spin_unlock(&nm_i->nid_list_lock);
2368 radix_tree_preload_end();
5921aaa1 2369
30a61ddf 2370 if (err)
e05df3b1 2371 kmem_cache_free(free_nid_slab, i);
30a61ddf 2372 return ret;
e05df3b1
JK
2373}
2374
b8559dc2 2375static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid)
e05df3b1 2376{
b8559dc2 2377 struct f2fs_nm_info *nm_i = NM_I(sbi);
e05df3b1 2378 struct free_nid *i;
cf0ee0f0
CY
2379 bool need_free = false;
2380
b8559dc2 2381 spin_lock(&nm_i->nid_list_lock);
8a7ed66a 2382 i = __lookup_free_nid_list(nm_i, nid);
9a4ffdf5 2383 if (i && i->state == FREE_NID) {
a0761f63 2384 __remove_free_nid(sbi, i, FREE_NID);
cf0ee0f0 2385 need_free = true;
e05df3b1 2386 }
b8559dc2 2387 spin_unlock(&nm_i->nid_list_lock);
cf0ee0f0
CY
2388
2389 if (need_free)
2390 kmem_cache_free(free_nid_slab, i);
e05df3b1
JK
2391}
2392
e2374015 2393static int scan_nat_page(struct f2fs_sb_info *sbi,
e05df3b1
JK
2394 struct page *nat_page, nid_t start_nid)
2395{
6fb03f3a 2396 struct f2fs_nm_info *nm_i = NM_I(sbi);
e05df3b1
JK
2397 struct f2fs_nat_block *nat_blk = page_address(nat_page);
2398 block_t blk_addr;
4ac91242 2399 unsigned int nat_ofs = NAT_BLOCK_OFFSET(start_nid);
e05df3b1
JK
2400 int i;
2401
23380b85 2402 __set_bit_le(nat_ofs, nm_i->nat_block_bitmap);
4ac91242 2403
e05df3b1
JK
2404 i = start_nid % NAT_ENTRY_PER_BLOCK;
2405
2406 for (; i < NAT_ENTRY_PER_BLOCK; i++, start_nid++) {
cfb271d4 2407 if (unlikely(start_nid >= nm_i->max_nid))
04431c44 2408 break;
23d38844
HL
2409
2410 blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr);
e2374015
CY
2411
2412 if (blk_addr == NEW_ADDR)
a5e80e18 2413 return -EFSCORRUPTED;
e2374015 2414
5921aaa1
L
2415 if (blk_addr == NULL_ADDR) {
2416 add_free_nid(sbi, start_nid, true, true);
2417 } else {
2418 spin_lock(&NM_I(sbi)->nid_list_lock);
2419 update_free_nid_bitmap(sbi, start_nid, false, true);
2420 spin_unlock(&NM_I(sbi)->nid_list_lock);
2421 }
4ac91242 2422 }
e2374015
CY
2423
2424 return 0;
4ac91242
CY
2425}
2426
2fbaa25f 2427static void scan_curseg_cache(struct f2fs_sb_info *sbi)
4ac91242 2428{
4ac91242
CY
2429 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
2430 struct f2fs_journal *journal = curseg->journal;
2fbaa25f
CY
2431 int i;
2432
2433 down_read(&curseg->journal_rwsem);
2434 for (i = 0; i < nats_in_cursum(journal); i++) {
2435 block_t addr;
2436 nid_t nid;
2437
2438 addr = le32_to_cpu(nat_in_journal(journal, i).block_addr);
2439 nid = le32_to_cpu(nid_in_journal(journal, i));
2440 if (addr == NULL_ADDR)
5921aaa1 2441 add_free_nid(sbi, nid, true, false);
2fbaa25f
CY
2442 else
2443 remove_free_nid(sbi, nid);
2444 }
2445 up_read(&curseg->journal_rwsem);
2446}
2447
2448static void scan_free_nid_bits(struct f2fs_sb_info *sbi)
2449{
2450 struct f2fs_nm_info *nm_i = NM_I(sbi);
4ac91242 2451 unsigned int i, idx;
97456574 2452 nid_t nid;
4ac91242 2453
e4544b63 2454 f2fs_down_read(&nm_i->nat_tree_lock);
4ac91242
CY
2455
2456 for (i = 0; i < nm_i->nat_blocks; i++) {
2457 if (!test_bit_le(i, nm_i->nat_block_bitmap))
2458 continue;
586d1492
CY
2459 if (!nm_i->free_nid_count[i])
2460 continue;
4ac91242 2461 for (idx = 0; idx < NAT_ENTRY_PER_BLOCK; idx++) {
97456574
FL
2462 idx = find_next_bit_le(nm_i->free_nid_bitmap[i],
2463 NAT_ENTRY_PER_BLOCK, idx);
2464 if (idx >= NAT_ENTRY_PER_BLOCK)
2465 break;
4ac91242
CY
2466
2467 nid = i * NAT_ENTRY_PER_BLOCK + idx;
5921aaa1 2468 add_free_nid(sbi, nid, true, false);
4ac91242 2469
9a4ffdf5 2470 if (nm_i->nid_cnt[FREE_NID] >= MAX_FREE_NIDS)
4ac91242
CY
2471 goto out;
2472 }
2473 }
2474out:
2fbaa25f 2475 scan_curseg_cache(sbi);
4ac91242 2476
e4544b63 2477 f2fs_up_read(&nm_i->nat_tree_lock);
e05df3b1
JK
2478}
2479
e2374015 2480static int __f2fs_build_free_nids(struct f2fs_sb_info *sbi,
4d57b86d 2481 bool sync, bool mount)
e05df3b1 2482{
e05df3b1 2483 struct f2fs_nm_info *nm_i = NM_I(sbi);
e2374015 2484 int i = 0, ret;
55008d84 2485 nid_t nid = nm_i->next_scan_nid;
e05df3b1 2486
e9cdd307
YH
2487 if (unlikely(nid >= nm_i->max_nid))
2488 nid = 0;
2489
e2cab031
ST
2490 if (unlikely(nid % NAT_ENTRY_PER_BLOCK))
2491 nid = NAT_BLOCK_OFFSET(nid) * NAT_ENTRY_PER_BLOCK;
2492
55008d84 2493 /* Enough entries */
9a4ffdf5 2494 if (nm_i->nid_cnt[FREE_NID] >= NAT_ENTRY_PER_BLOCK)
e2374015 2495 return 0;
e05df3b1 2496
4d57b86d 2497 if (!sync && !f2fs_available_free_memory(sbi, FREE_NIDS))
e2374015 2498 return 0;
e05df3b1 2499
4ac91242
CY
2500 if (!mount) {
2501 /* try to find free nids in free_nid_bitmap */
2502 scan_free_nid_bits(sbi);
2503
74986213 2504 if (nm_i->nid_cnt[FREE_NID] >= NAT_ENTRY_PER_BLOCK)
e2374015 2505 return 0;
22ad0b6a
JK
2506 }
2507
55008d84 2508 /* readahead nat pages to be scanned */
4d57b86d 2509 f2fs_ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES,
26879fb1 2510 META_NAT, true);
e05df3b1 2511
e4544b63 2512 f2fs_down_read(&nm_i->nat_tree_lock);
a5131193 2513
e05df3b1 2514 while (1) {
66e83361
YH
2515 if (!test_bit_le(NAT_BLOCK_OFFSET(nid),
2516 nm_i->nat_block_bitmap)) {
2517 struct page *page = get_current_nat_page(sbi, nid);
e05df3b1 2518
edc55aaf
JK
2519 if (IS_ERR(page)) {
2520 ret = PTR_ERR(page);
2521 } else {
2522 ret = scan_nat_page(sbi, page, nid);
2523 f2fs_put_page(page, 1);
2524 }
e2374015
CY
2525
2526 if (ret) {
e4544b63 2527 f2fs_up_read(&nm_i->nat_tree_lock);
a5e80e18
ZN
2528
2529 if (ret == -EFSCORRUPTED) {
2530 f2fs_err(sbi, "NAT is corrupt, run fsck to fix it");
2531 set_sbi_flag(sbi, SBI_NEED_FSCK);
2532 f2fs_handle_error(sbi,
2533 ERROR_INCONSISTENT_NAT);
2534 }
2535
edc55aaf 2536 return ret;
e2374015 2537 }
66e83361 2538 }
e05df3b1
JK
2539
2540 nid += (NAT_ENTRY_PER_BLOCK - (nid % NAT_ENTRY_PER_BLOCK));
cfb271d4 2541 if (unlikely(nid >= nm_i->max_nid))
e05df3b1 2542 nid = 0;
55008d84 2543
a6d494b6 2544 if (++i >= FREE_NID_PAGES)
e05df3b1
JK
2545 break;
2546 }
2547
55008d84
JK
2548 /* go to the next free nat pages to find free nids abundantly */
2549 nm_i->next_scan_nid = nid;
e05df3b1
JK
2550
2551 /* find free nids from current sum_pages */
2fbaa25f 2552 scan_curseg_cache(sbi);
dfc08a12 2553
e4544b63 2554 f2fs_up_read(&nm_i->nat_tree_lock);
2db2388f 2555
4d57b86d 2556 f2fs_ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid),
ea1a29a0 2557 nm_i->ra_nid_pages, META_NAT, false);
e2374015
CY
2558
2559 return 0;
e05df3b1
JK
2560}
2561
e2374015 2562int f2fs_build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
2411cf5b 2563{
e2374015
CY
2564 int ret;
2565
2411cf5b 2566 mutex_lock(&NM_I(sbi)->build_lock);
e2374015 2567 ret = __f2fs_build_free_nids(sbi, sync, mount);
2411cf5b 2568 mutex_unlock(&NM_I(sbi)->build_lock);
e2374015
CY
2569
2570 return ret;
2411cf5b
CY
2571}
2572
e05df3b1
JK
2573/*
2574 * If this function returns success, caller can obtain a new nid
2575 * from second parameter of this function.
2576 * The returned nid could be used ino as well as nid when inode is created.
2577 */
4d57b86d 2578bool f2fs_alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
e05df3b1
JK
2579{
2580 struct f2fs_nm_info *nm_i = NM_I(sbi);
2581 struct free_nid *i = NULL;
e05df3b1 2582retry:
c40e15a9 2583 if (time_to_inject(sbi, FAULT_ALLOC_NID))
cb78942b 2584 return false;
7fa750a1 2585
b8559dc2 2586 spin_lock(&nm_i->nid_list_lock);
e05df3b1 2587
04d47e67
CY
2588 if (unlikely(nm_i->available_nids == 0)) {
2589 spin_unlock(&nm_i->nid_list_lock);
2590 return false;
2591 }
e05df3b1 2592
4d57b86d
CY
2593 /* We should not use stale free nids created by f2fs_build_free_nids */
2594 if (nm_i->nid_cnt[FREE_NID] && !on_f2fs_build_free_nids(nm_i)) {
9a4ffdf5
CY
2595 f2fs_bug_on(sbi, list_empty(&nm_i->free_nid_list));
2596 i = list_first_entry(&nm_i->free_nid_list,
b8559dc2 2597 struct free_nid, list);
55008d84 2598 *nid = i->nid;
b8559dc2 2599
a0761f63 2600 __move_free_nid(sbi, i, FREE_NID, PREALLOC_NID);
04d47e67 2601 nm_i->available_nids--;
4ac91242 2602
346fe752 2603 update_free_nid_bitmap(sbi, *nid, false, false);
4ac91242 2604
b8559dc2 2605 spin_unlock(&nm_i->nid_list_lock);
55008d84
JK
2606 return true;
2607 }
b8559dc2 2608 spin_unlock(&nm_i->nid_list_lock);
55008d84
JK
2609
2610 /* Let's scan nat pages and its caches to get free nids */
f84262b0
JK
2611 if (!f2fs_build_free_nids(sbi, true, false))
2612 goto retry;
2613 return false;
e05df3b1
JK
2614}
2615
0a8165d7 2616/*
4d57b86d 2617 * f2fs_alloc_nid() should be called prior to this function.
e05df3b1 2618 */
4d57b86d 2619void f2fs_alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid)
e05df3b1
JK
2620{
2621 struct f2fs_nm_info *nm_i = NM_I(sbi);
2622 struct free_nid *i;
2623
b8559dc2 2624 spin_lock(&nm_i->nid_list_lock);
8a7ed66a 2625 i = __lookup_free_nid_list(nm_i, nid);
b8559dc2 2626 f2fs_bug_on(sbi, !i);
a0761f63 2627 __remove_free_nid(sbi, i, PREALLOC_NID);
b8559dc2 2628 spin_unlock(&nm_i->nid_list_lock);
cf0ee0f0
CY
2629
2630 kmem_cache_free(free_nid_slab, i);
e05df3b1
JK
2631}
2632
0a8165d7 2633/*
4d57b86d 2634 * f2fs_alloc_nid() should be called prior to this function.
e05df3b1 2635 */
4d57b86d 2636void f2fs_alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
e05df3b1 2637{
49952fa1
JK
2638 struct f2fs_nm_info *nm_i = NM_I(sbi);
2639 struct free_nid *i;
cf0ee0f0 2640 bool need_free = false;
49952fa1 2641
65985d93
JK
2642 if (!nid)
2643 return;
2644
b8559dc2 2645 spin_lock(&nm_i->nid_list_lock);
8a7ed66a 2646 i = __lookup_free_nid_list(nm_i, nid);
b8559dc2
CY
2647 f2fs_bug_on(sbi, !i);
2648
4d57b86d 2649 if (!f2fs_available_free_memory(sbi, FREE_NIDS)) {
a0761f63 2650 __remove_free_nid(sbi, i, PREALLOC_NID);
cf0ee0f0 2651 need_free = true;
95630cba 2652 } else {
a0761f63 2653 __move_free_nid(sbi, i, PREALLOC_NID, FREE_NID);
95630cba 2654 }
04d47e67
CY
2655
2656 nm_i->available_nids++;
2657
346fe752 2658 update_free_nid_bitmap(sbi, nid, true, false);
4ac91242 2659
b8559dc2 2660 spin_unlock(&nm_i->nid_list_lock);
cf0ee0f0
CY
2661
2662 if (need_free)
2663 kmem_cache_free(free_nid_slab, i);
e05df3b1
JK
2664}
2665
4d57b86d 2666int f2fs_try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink)
31696580
CY
2667{
2668 struct f2fs_nm_info *nm_i = NM_I(sbi);
31696580
CY
2669 int nr = nr_shrink;
2670
9a4ffdf5 2671 if (nm_i->nid_cnt[FREE_NID] <= MAX_FREE_NIDS)
ad4edb83
JK
2672 return 0;
2673
31696580
CY
2674 if (!mutex_trylock(&nm_i->build_lock))
2675 return 0;
2676
042be373
CY
2677 while (nr_shrink && nm_i->nid_cnt[FREE_NID] > MAX_FREE_NIDS) {
2678 struct free_nid *i, *next;
2679 unsigned int batch = SHRINK_NID_BATCH_SIZE;
b8559dc2 2680
042be373
CY
2681 spin_lock(&nm_i->nid_list_lock);
2682 list_for_each_entry_safe(i, next, &nm_i->free_nid_list, list) {
2683 if (!nr_shrink || !batch ||
2684 nm_i->nid_cnt[FREE_NID] <= MAX_FREE_NIDS)
2685 break;
2686 __remove_free_nid(sbi, i, FREE_NID);
2687 kmem_cache_free(free_nid_slab, i);
2688 nr_shrink--;
2689 batch--;
2690 }
2691 spin_unlock(&nm_i->nid_list_lock);
31696580 2692 }
042be373 2693
31696580
CY
2694 mutex_unlock(&nm_i->build_lock);
2695
2696 return nr - nr_shrink;
2697}
2698
9627a7b3 2699int f2fs_recover_inline_xattr(struct inode *inode, struct page *page)
28cdce04 2700{
28cdce04
CY
2701 void *src_addr, *dst_addr;
2702 size_t inline_size;
2703 struct page *ipage;
2704 struct f2fs_inode *ri;
2705
4d57b86d 2706 ipage = f2fs_get_node_page(F2FS_I_SB(inode), inode->i_ino);
9627a7b3
CY
2707 if (IS_ERR(ipage))
2708 return PTR_ERR(ipage);
28cdce04 2709
e3b4d43f 2710 ri = F2FS_INODE(page);
1eca05aa 2711 if (ri->i_inline & F2FS_INLINE_XATTR) {
96dd0251
CY
2712 if (!f2fs_has_inline_xattr(inode)) {
2713 set_inode_flag(inode, FI_INLINE_XATTR);
2714 stat_inc_inline_xattr(inode);
2715 }
1eca05aa 2716 } else {
96dd0251
CY
2717 if (f2fs_has_inline_xattr(inode)) {
2718 stat_dec_inline_xattr(inode);
2719 clear_inode_flag(inode, FI_INLINE_XATTR);
2720 }
e3b4d43f
JK
2721 goto update_inode;
2722 }
2723
6afc662e
CY
2724 dst_addr = inline_xattr_addr(inode, ipage);
2725 src_addr = inline_xattr_addr(inode, page);
28cdce04
CY
2726 inline_size = inline_xattr_size(inode);
2727
bae0ee7a 2728 f2fs_wait_on_page_writeback(ipage, NODE, true, true);
28cdce04 2729 memcpy(dst_addr, src_addr, inline_size);
e3b4d43f 2730update_inode:
4d57b86d 2731 f2fs_update_inode(inode, ipage);
28cdce04 2732 f2fs_put_page(ipage, 1);
9627a7b3 2733 return 0;
28cdce04
CY
2734}
2735
4d57b86d 2736int f2fs_recover_xattr_data(struct inode *inode, struct page *page)
abb2366c 2737{
4081363f 2738 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
abb2366c 2739 nid_t prev_xnid = F2FS_I(inode)->i_xattr_nid;
87905682
YH
2740 nid_t new_xnid;
2741 struct dnode_of_data dn;
abb2366c 2742 struct node_info ni;
d260081c 2743 struct page *xpage;
7735730d 2744 int err;
abb2366c 2745
abb2366c
JK
2746 if (!prev_xnid)
2747 goto recover_xnid;
2748
d260081c 2749 /* 1: invalidate the previous xattr nid */
a9419b63 2750 err = f2fs_get_node_info(sbi, prev_xnid, &ni, false);
7735730d
CY
2751 if (err)
2752 return err;
2753
4d57b86d 2754 f2fs_invalidate_blocks(sbi, ni.blk_addr);
000519f2 2755 dec_valid_node_count(sbi, inode, false);
479f40c4 2756 set_node_addr(sbi, &ni, NULL_ADDR, false);
abb2366c
JK
2757
2758recover_xnid:
d260081c 2759 /* 2: update xattr nid in inode */
4d57b86d 2760 if (!f2fs_alloc_nid(sbi, &new_xnid))
87905682
YH
2761 return -ENOSPC;
2762
2763 set_new_dnode(&dn, inode, NULL, NULL, new_xnid);
4d57b86d 2764 xpage = f2fs_new_node_page(&dn, XATTR_NODE_OFFSET);
87905682 2765 if (IS_ERR(xpage)) {
4d57b86d 2766 f2fs_alloc_nid_failed(sbi, new_xnid);
87905682
YH
2767 return PTR_ERR(xpage);
2768 }
2769
4d57b86d
CY
2770 f2fs_alloc_nid_done(sbi, new_xnid);
2771 f2fs_update_inode_page(inode);
d260081c
CY
2772
2773 /* 3: update and set xattr node page dirty */
86d7d57a 2774 if (page) {
50a472bb
JK
2775 memcpy(F2FS_NODE(xpage), F2FS_NODE(page),
2776 VALID_XATTR_BLOCK_SIZE);
86d7d57a
ZN
2777 set_page_dirty(xpage);
2778 }
d260081c 2779 f2fs_put_page(xpage, 1);
abb2366c 2780
d260081c 2781 return 0;
abb2366c
JK
2782}
2783
4d57b86d 2784int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
e05df3b1 2785{
58bfaf44 2786 struct f2fs_inode *src, *dst;
e05df3b1
JK
2787 nid_t ino = ino_of_node(page);
2788 struct node_info old_ni, new_ni;
2789 struct page *ipage;
7735730d 2790 int err;
e05df3b1 2791
a9419b63 2792 err = f2fs_get_node_info(sbi, ino, &old_ni, false);
7735730d
CY
2793 if (err)
2794 return err;
e8271fa3
JK
2795
2796 if (unlikely(old_ni.blk_addr != NULL_ADDR))
2797 return -EINVAL;
e8ea9b3d 2798retry:
300e129c 2799 ipage = f2fs_grab_cache_page(NODE_MAPPING(sbi), ino, false);
e8ea9b3d 2800 if (!ipage) {
4034247a 2801 memalloc_retry_wait(GFP_NOFS);
e8ea9b3d
JK
2802 goto retry;
2803 }
e05df3b1 2804
e1c42045 2805 /* Should not use this inode from free nid list */
b8559dc2 2806 remove_free_nid(sbi, ino);
e05df3b1 2807
237c0790
JK
2808 if (!PageUptodate(ipage))
2809 SetPageUptodate(ipage);
e05df3b1 2810 fill_node_footer(ipage, ino, ino, 0, true);
ef2a0071 2811 set_cold_node(ipage, false);
e05df3b1 2812
58bfaf44
JK
2813 src = F2FS_INODE(page);
2814 dst = F2FS_INODE(ipage);
e05df3b1 2815
36218b81 2816 memcpy(dst, src, offsetof(struct f2fs_inode, i_ext));
58bfaf44
JK
2817 dst->i_size = 0;
2818 dst->i_blocks = cpu_to_le64(1);
2819 dst->i_links = cpu_to_le32(1);
2820 dst->i_xattr_nid = 0;
7a2af766 2821 dst->i_inline = src->i_inline & (F2FS_INLINE_XATTR | F2FS_EXTRA_ATTR);
5c57132e 2822 if (dst->i_inline & F2FS_EXTRA_ATTR) {
7a2af766 2823 dst->i_extra_isize = src->i_extra_isize;
6afc662e 2824
7beb01f7 2825 if (f2fs_sb_has_flexible_inline_xattr(sbi) &&
6afc662e
CY
2826 F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize),
2827 i_inline_xattr_size))
2828 dst->i_inline_xattr_size = src->i_inline_xattr_size;
2829
7beb01f7 2830 if (f2fs_sb_has_project_quota(sbi) &&
5c57132e
CY
2831 F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize),
2832 i_projid))
2833 dst->i_projid = src->i_projid;
5cd1f387 2834
7beb01f7 2835 if (f2fs_sb_has_inode_crtime(sbi) &&
5cd1f387
CY
2836 F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize),
2837 i_crtime_nsec)) {
2838 dst->i_crtime = src->i_crtime;
2839 dst->i_crtime_nsec = src->i_crtime_nsec;
2840 }
5c57132e 2841 }
e05df3b1
JK
2842
2843 new_ni = old_ni;
2844 new_ni.ino = ino;
2845
0abd675e 2846 if (unlikely(inc_valid_node_count(sbi, NULL, true)))
65e5cd0a 2847 WARN_ON(1);
479f40c4 2848 set_node_addr(sbi, &new_ni, NEW_ADDR, false);
e05df3b1 2849 inc_valid_inode_count(sbi);
617deb8c 2850 set_page_dirty(ipage);
e05df3b1
JK
2851 f2fs_put_page(ipage, 1);
2852 return 0;
2853}
2854
7735730d 2855int f2fs_restore_node_summary(struct f2fs_sb_info *sbi,
e05df3b1
JK
2856 unsigned int segno, struct f2fs_summary_block *sum)
2857{
2858 struct f2fs_node *rn;
2859 struct f2fs_summary *sum_entry;
e05df3b1 2860 block_t addr;
9ecf4b80 2861 int i, idx, last_offset, nrpages;
e05df3b1
JK
2862
2863 /* scan the node segment */
a60108f7 2864 last_offset = BLKS_PER_SEG(sbi);
e05df3b1
JK
2865 addr = START_BLOCK(sbi, segno);
2866 sum_entry = &sum->entries[0];
2867
9ecf4b80 2868 for (i = 0; i < last_offset; i += nrpages, addr += nrpages) {
5f7136db 2869 nrpages = bio_max_segs(last_offset - i);
393ff91f 2870
e1c42045 2871 /* readahead node pages */
4d57b86d 2872 f2fs_ra_meta_pages(sbi, addr, nrpages, META_POR, true);
e05df3b1 2873
9ecf4b80 2874 for (idx = addr; idx < addr + nrpages; idx++) {
4d57b86d 2875 struct page *page = f2fs_get_tmp_page(sbi, idx);
9af0ff1c 2876
7735730d
CY
2877 if (IS_ERR(page))
2878 return PTR_ERR(page);
2879
9ecf4b80
CY
2880 rn = F2FS_NODE(page);
2881 sum_entry->nid = rn->footer.nid;
2882 sum_entry->version = 0;
2883 sum_entry->ofs_in_node = 0;
2884 sum_entry++;
2885 f2fs_put_page(page, 1);
9af0ff1c 2886 }
bac4eef6 2887
9ecf4b80 2888 invalidate_mapping_pages(META_MAPPING(sbi), addr,
bac4eef6 2889 addr + nrpages);
e05df3b1 2890 }
7735730d 2891 return 0;
e05df3b1
JK
2892}
2893
aec71382 2894static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
e05df3b1
JK
2895{
2896 struct f2fs_nm_info *nm_i = NM_I(sbi);
2897 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
b7ad7512 2898 struct f2fs_journal *journal = curseg->journal;
e05df3b1
JK
2899 int i;
2900
b7ad7512 2901 down_write(&curseg->journal_rwsem);
dfc08a12 2902 for (i = 0; i < nats_in_cursum(journal); i++) {
e05df3b1
JK
2903 struct nat_entry *ne;
2904 struct f2fs_nat_entry raw_ne;
dfc08a12 2905 nid_t nid = le32_to_cpu(nid_in_journal(journal, i));
e05df3b1 2906
b862676e
CY
2907 if (f2fs_check_nid_range(sbi, nid))
2908 continue;
2909
dfc08a12 2910 raw_ne = nat_in_journal(journal, i);
9be32d72 2911
e05df3b1 2912 ne = __lookup_nat_cache(nm_i, nid);
e05df3b1 2913 if (!ne) {
32410577 2914 ne = __alloc_nat_entry(sbi, nid, true);
12f9ef37 2915 __init_nat_entry(nm_i, ne, &raw_ne, true);
e05df3b1 2916 }
04d47e67
CY
2917
2918 /*
2919 * if a free nat in journal has not been used after last
2920 * checkpoint, we should remove it from available nids,
2921 * since later we will add it again.
2922 */
2923 if (!get_nat_flag(ne, IS_DIRTY) &&
2924 le32_to_cpu(raw_ne.block_addr) == NULL_ADDR) {
2925 spin_lock(&nm_i->nid_list_lock);
2926 nm_i->available_nids--;
2927 spin_unlock(&nm_i->nid_list_lock);
2928 }
2929
e05df3b1 2930 __set_nat_cache_dirty(nm_i, ne);
e05df3b1 2931 }
dfc08a12 2932 update_nats_in_cursum(journal, -i);
b7ad7512 2933 up_write(&curseg->journal_rwsem);
e05df3b1
JK
2934}
2935
309cc2b6
JK
2936static void __adjust_nat_entry_set(struct nat_entry_set *nes,
2937 struct list_head *head, int max)
e05df3b1 2938{
309cc2b6 2939 struct nat_entry_set *cur;
e05df3b1 2940
309cc2b6
JK
2941 if (nes->entry_cnt >= max)
2942 goto add_out;
e05df3b1 2943
309cc2b6
JK
2944 list_for_each_entry(cur, head, set_list) {
2945 if (cur->entry_cnt >= nes->entry_cnt) {
2946 list_add(&nes->set_list, cur->set_list.prev);
2947 return;
2948 }
aec71382 2949 }
309cc2b6
JK
2950add_out:
2951 list_add_tail(&nes->set_list, head);
2952}
e05df3b1 2953
94c821fb
CY
2954static void __update_nat_bits(struct f2fs_nm_info *nm_i, unsigned int nat_ofs,
2955 unsigned int valid)
2956{
2957 if (valid == 0) {
2958 __set_bit_le(nat_ofs, nm_i->empty_nat_bits);
2959 __clear_bit_le(nat_ofs, nm_i->full_nat_bits);
2960 return;
2961 }
2962
2963 __clear_bit_le(nat_ofs, nm_i->empty_nat_bits);
2964 if (valid == NAT_ENTRY_PER_BLOCK)
2965 __set_bit_le(nat_ofs, nm_i->full_nat_bits);
2966 else
2967 __clear_bit_le(nat_ofs, nm_i->full_nat_bits);
2968}
2969
2970static void update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
22ad0b6a
JK
2971 struct page *page)
2972{
2973 struct f2fs_nm_info *nm_i = NM_I(sbi);
2974 unsigned int nat_index = start_nid / NAT_ENTRY_PER_BLOCK;
2975 struct f2fs_nat_block *nat_blk = page_address(page);
2976 int valid = 0;
37a0ab2a 2977 int i = 0;
22ad0b6a 2978
94c821fb 2979 if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
22ad0b6a
JK
2980 return;
2981
37a0ab2a
FL
2982 if (nat_index == 0) {
2983 valid = 1;
2984 i = 1;
2985 }
2986 for (; i < NAT_ENTRY_PER_BLOCK; i++) {
36af5f40 2987 if (le32_to_cpu(nat_blk->entries[i].block_addr) != NULL_ADDR)
22ad0b6a
JK
2988 valid++;
2989 }
94c821fb
CY
2990
2991 __update_nat_bits(nm_i, nat_index, valid);
2992}
2993
2994void f2fs_enable_nat_bits(struct f2fs_sb_info *sbi)
2995{
2996 struct f2fs_nm_info *nm_i = NM_I(sbi);
2997 unsigned int nat_ofs;
2998
e4544b63 2999 f2fs_down_read(&nm_i->nat_tree_lock);
94c821fb
CY
3000
3001 for (nat_ofs = 0; nat_ofs < nm_i->nat_blocks; nat_ofs++) {
3002 unsigned int valid = 0, nid_ofs = 0;
3003
3004 /* handle nid zero due to it should never be used */
3005 if (unlikely(nat_ofs == 0)) {
3006 valid = 1;
3007 nid_ofs = 1;
3008 }
3009
3010 for (; nid_ofs < NAT_ENTRY_PER_BLOCK; nid_ofs++) {
3011 if (!test_bit_le(nid_ofs,
3012 nm_i->free_nid_bitmap[nat_ofs]))
3013 valid++;
3014 }
3015
3016 __update_nat_bits(nm_i, nat_ofs, valid);
22ad0b6a
JK
3017 }
3018
e4544b63 3019 f2fs_up_read(&nm_i->nat_tree_lock);
22ad0b6a
JK
3020}
3021
edc55aaf 3022static int __flush_nat_entry_set(struct f2fs_sb_info *sbi,
22ad0b6a 3023 struct nat_entry_set *set, struct cp_control *cpc)
309cc2b6
JK
3024{
3025 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
b7ad7512 3026 struct f2fs_journal *journal = curseg->journal;
309cc2b6
JK
3027 nid_t start_nid = set->set * NAT_ENTRY_PER_BLOCK;
3028 bool to_journal = true;
3029 struct f2fs_nat_block *nat_blk;
3030 struct nat_entry *ne, *cur;
3031 struct page *page = NULL;
e05df3b1 3032
aec71382
CY
3033 /*
3034 * there are two steps to flush nat entries:
3035 * #1, flush nat entries to journal in current hot data summary block.
3036 * #2, flush nat entries to nat page.
3037 */
94c821fb 3038 if ((cpc->reason & CP_UMOUNT) ||
22ad0b6a 3039 !__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL))
309cc2b6
JK
3040 to_journal = false;
3041
3042 if (to_journal) {
b7ad7512 3043 down_write(&curseg->journal_rwsem);
309cc2b6
JK
3044 } else {
3045 page = get_next_nat_page(sbi, start_nid);
edc55aaf
JK
3046 if (IS_ERR(page))
3047 return PTR_ERR(page);
3048
309cc2b6
JK
3049 nat_blk = page_address(page);
3050 f2fs_bug_on(sbi, !nat_blk);
3051 }
aec71382 3052
309cc2b6
JK
3053 /* flush dirty nats in nat entry set */
3054 list_for_each_entry_safe(ne, cur, &set->entry_list, list) {
3055 struct f2fs_nat_entry *raw_ne;
3056 nid_t nid = nat_get_nid(ne);
3057 int offset;
3058
febeca6d 3059 f2fs_bug_on(sbi, nat_get_blkaddr(ne) == NEW_ADDR);
aec71382
CY
3060
3061 if (to_journal) {
4d57b86d 3062 offset = f2fs_lookup_journal_in_cursum(journal,
309cc2b6
JK
3063 NAT_JOURNAL, nid, 1);
3064 f2fs_bug_on(sbi, offset < 0);
dfc08a12
CY
3065 raw_ne = &nat_in_journal(journal, offset);
3066 nid_in_journal(journal, offset) = cpu_to_le32(nid);
aec71382 3067 } else {
309cc2b6 3068 raw_ne = &nat_blk->entries[nid - start_nid];
e05df3b1 3069 }
309cc2b6 3070 raw_nat_from_node_info(raw_ne, &ne->ni);
309cc2b6 3071 nat_reset_flag(ne);
0b28b71e 3072 __clear_nat_cache_dirty(NM_I(sbi), set, ne);
04d47e67 3073 if (nat_get_blkaddr(ne) == NULL_ADDR) {
5921aaa1 3074 add_free_nid(sbi, nid, false, true);
4ac91242
CY
3075 } else {
3076 spin_lock(&NM_I(sbi)->nid_list_lock);
346fe752 3077 update_free_nid_bitmap(sbi, nid, false, false);
04d47e67
CY
3078 spin_unlock(&NM_I(sbi)->nid_list_lock);
3079 }
309cc2b6 3080 }
e05df3b1 3081
22ad0b6a 3082 if (to_journal) {
b7ad7512 3083 up_write(&curseg->journal_rwsem);
22ad0b6a 3084 } else {
94c821fb 3085 update_nat_bits(sbi, start_nid, page);
309cc2b6 3086 f2fs_put_page(page, 1);
22ad0b6a 3087 }
aec71382 3088
59c9081b
YH
3089 /* Allow dirty nats by node block allocation in write_begin */
3090 if (!set->entry_cnt) {
3091 radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set);
3092 kmem_cache_free(nat_entry_set_slab, set);
3093 }
edc55aaf 3094 return 0;
309cc2b6 3095}
aec71382 3096
309cc2b6
JK
3097/*
3098 * This function is called during the checkpointing process.
3099 */
edc55aaf 3100int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
309cc2b6
JK
3101{
3102 struct f2fs_nm_info *nm_i = NM_I(sbi);
3103 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
b7ad7512 3104 struct f2fs_journal *journal = curseg->journal;
c31e4961 3105 struct nat_entry_set *setvec[NAT_VEC_SIZE];
309cc2b6
JK
3106 struct nat_entry_set *set, *tmp;
3107 unsigned int found;
3108 nid_t set_idx = 0;
3109 LIST_HEAD(sets);
edc55aaf 3110 int err = 0;
309cc2b6 3111
a95ba66a
JK
3112 /*
3113 * during unmount, let's flush nat_bits before checking
3114 * nat_cnt[DIRTY_NAT].
3115 */
94c821fb 3116 if (cpc->reason & CP_UMOUNT) {
e4544b63 3117 f2fs_down_write(&nm_i->nat_tree_lock);
7f2ecdd8 3118 remove_nats_in_journal(sbi);
e4544b63 3119 f2fs_up_write(&nm_i->nat_tree_lock);
7f2ecdd8
JK
3120 }
3121
a95ba66a 3122 if (!nm_i->nat_cnt[DIRTY_NAT])
edc55aaf 3123 return 0;
a5131193 3124
e4544b63 3125 f2fs_down_write(&nm_i->nat_tree_lock);
a5131193 3126
309cc2b6
JK
3127 /*
3128 * if there are no enough space in journal to store dirty nat
3129 * entries, remove all entries from journal and merge them
3130 * into nat entry set.
3131 */
94c821fb 3132 if (cpc->reason & CP_UMOUNT ||
a95ba66a
JK
3133 !__has_cursum_space(journal,
3134 nm_i->nat_cnt[DIRTY_NAT], NAT_JOURNAL))
309cc2b6
JK
3135 remove_nats_in_journal(sbi);
3136
309cc2b6 3137 while ((found = __gang_lookup_nat_set(nm_i,
c31e4961 3138 set_idx, NAT_VEC_SIZE, setvec))) {
309cc2b6 3139 unsigned idx;
5f029c04 3140
309cc2b6
JK
3141 set_idx = setvec[found - 1]->set + 1;
3142 for (idx = 0; idx < found; idx++)
3143 __adjust_nat_entry_set(setvec[idx], &sets,
dfc08a12 3144 MAX_NAT_JENTRIES(journal));
e05df3b1 3145 }
aec71382 3146
309cc2b6 3147 /* flush dirty nats in nat entry set */
edc55aaf
JK
3148 list_for_each_entry_safe(set, tmp, &sets, set_list) {
3149 err = __flush_nat_entry_set(sbi, set, cpc);
3150 if (err)
3151 break;
3152 }
309cc2b6 3153
e4544b63 3154 f2fs_up_write(&nm_i->nat_tree_lock);
59c9081b 3155 /* Allow dirty nats by node block allocation in write_begin */
edc55aaf
JK
3156
3157 return err;
e05df3b1
JK
3158}
3159
22ad0b6a
JK
3160static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
3161{
3162 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
3163 struct f2fs_nm_info *nm_i = NM_I(sbi);
3164 unsigned int nat_bits_bytes = nm_i->nat_blocks / BITS_PER_BYTE;
3165 unsigned int i;
3166 __u64 cp_ver = cur_cp_version(ckpt);
22ad0b6a
JK
3167 block_t nat_bits_addr;
3168
df033caf 3169 nm_i->nat_bits_blocks = F2FS_BLK_ALIGN((nat_bits_bytes << 1) + 8);
0b6d4ca0 3170 nm_i->nat_bits = f2fs_kvzalloc(sbi,
8fb9f319 3171 F2FS_BLK_TO_BYTES(nm_i->nat_bits_blocks), GFP_KERNEL);
22ad0b6a
JK
3172 if (!nm_i->nat_bits)
3173 return -ENOMEM;
3174
94c821fb
CY
3175 nm_i->full_nat_bits = nm_i->nat_bits + 8;
3176 nm_i->empty_nat_bits = nm_i->full_nat_bits + nat_bits_bytes;
3177
3178 if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
3179 return 0;
3180
a60108f7 3181 nat_bits_addr = __start_cp_addr(sbi) + BLKS_PER_SEG(sbi) -
22ad0b6a
JK
3182 nm_i->nat_bits_blocks;
3183 for (i = 0; i < nm_i->nat_bits_blocks; i++) {
7735730d
CY
3184 struct page *page;
3185
3186 page = f2fs_get_meta_page(sbi, nat_bits_addr++);
3b30eb19 3187 if (IS_ERR(page))
7735730d 3188 return PTR_ERR(page);
22ad0b6a 3189
8fb9f319 3190 memcpy(nm_i->nat_bits + F2FS_BLK_TO_BYTES(i),
22ad0b6a
JK
3191 page_address(page), F2FS_BLKSIZE);
3192 f2fs_put_page(page, 1);
3193 }
3194
ced2c7ea 3195 cp_ver |= (cur_cp_crc(ckpt) << 32);
22ad0b6a 3196 if (cpu_to_le64(cp_ver) != *(__le64 *)nm_i->nat_bits) {
94c821fb
CY
3197 clear_ckpt_flags(sbi, CP_NAT_BITS_FLAG);
3198 f2fs_notice(sbi, "Disable nat_bits due to incorrect cp_ver (%llu, %llu)",
3199 cp_ver, le64_to_cpu(*(__le64 *)nm_i->nat_bits));
22ad0b6a
JK
3200 return 0;
3201 }
3202
dcbb4c10 3203 f2fs_notice(sbi, "Found nat_bits in checkpoint");
22ad0b6a
JK
3204 return 0;
3205}
3206
bd80a4b9 3207static inline void load_free_nid_bitmap(struct f2fs_sb_info *sbi)
7041d5d2
CY
3208{
3209 struct f2fs_nm_info *nm_i = NM_I(sbi);
3210 unsigned int i = 0;
3211 nid_t nid, last_nid;
3212
94c821fb 3213 if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
7041d5d2
CY
3214 return;
3215
3216 for (i = 0; i < nm_i->nat_blocks; i++) {
3217 i = find_next_bit_le(nm_i->empty_nat_bits, nm_i->nat_blocks, i);
3218 if (i >= nm_i->nat_blocks)
3219 break;
3220
3221 __set_bit_le(i, nm_i->nat_block_bitmap);
3222
3223 nid = i * NAT_ENTRY_PER_BLOCK;
f6986ede 3224 last_nid = nid + NAT_ENTRY_PER_BLOCK;
7041d5d2 3225
346fe752 3226 spin_lock(&NM_I(sbi)->nid_list_lock);
7041d5d2 3227 for (; nid < last_nid; nid++)
346fe752
CY
3228 update_free_nid_bitmap(sbi, nid, true, true);
3229 spin_unlock(&NM_I(sbi)->nid_list_lock);
7041d5d2
CY
3230 }
3231
3232 for (i = 0; i < nm_i->nat_blocks; i++) {
3233 i = find_next_bit_le(nm_i->full_nat_bits, nm_i->nat_blocks, i);
3234 if (i >= nm_i->nat_blocks)
3235 break;
3236
3237 __set_bit_le(i, nm_i->nat_block_bitmap);
3238 }
3239}
3240
e05df3b1
JK
3241static int init_node_manager(struct f2fs_sb_info *sbi)
3242{
3243 struct f2fs_super_block *sb_raw = F2FS_RAW_SUPER(sbi);
3244 struct f2fs_nm_info *nm_i = NM_I(sbi);
3245 unsigned char *version_bitmap;
22ad0b6a
JK
3246 unsigned int nat_segs;
3247 int err;
e05df3b1
JK
3248
3249 nm_i->nat_blkaddr = le32_to_cpu(sb_raw->nat_blkaddr);
3250
3251 /* segment_count_nat includes pair segment so divide to 2. */
3252 nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1;
22ad0b6a
JK
3253 nm_i->nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
3254 nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nm_i->nat_blocks;
7ee0eeab 3255
b63da15e 3256 /* not used nids: 0, node, meta, (and root counted as valid node) */
04d47e67 3257 nm_i->available_nids = nm_i->max_nid - sbi->total_valid_node_count -
27cae0bc 3258 F2FS_RESERVED_NODE_NUM;
9a4ffdf5
CY
3259 nm_i->nid_cnt[FREE_NID] = 0;
3260 nm_i->nid_cnt[PREALLOC_NID] = 0;
cdfc41c1 3261 nm_i->ram_thresh = DEF_RAM_THRESHOLD;
ea1a29a0 3262 nm_i->ra_nid_pages = DEF_RA_NID_PAGES;
2304cb0c 3263 nm_i->dirty_nats_ratio = DEF_DIRTY_NAT_RATIO_THRESHOLD;
47c8ebcc 3264 nm_i->max_rf_node_blocks = DEF_RF_NODE_BLOCKS;
e05df3b1 3265
8a7ed66a 3266 INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC);
9a4ffdf5 3267 INIT_LIST_HEAD(&nm_i->free_nid_list);
769ec6e5
JK
3268 INIT_RADIX_TREE(&nm_i->nat_root, GFP_NOIO);
3269 INIT_RADIX_TREE(&nm_i->nat_set_root, GFP_NOIO);
e05df3b1 3270 INIT_LIST_HEAD(&nm_i->nat_entries);
22969158 3271 spin_lock_init(&nm_i->nat_list_lock);
e05df3b1
JK
3272
3273 mutex_init(&nm_i->build_lock);
b8559dc2 3274 spin_lock_init(&nm_i->nid_list_lock);
e4544b63 3275 init_f2fs_rwsem(&nm_i->nat_tree_lock);
e05df3b1 3276
e05df3b1 3277 nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid);
79b5793b 3278 nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP);
e05df3b1 3279 version_bitmap = __bitmap_ptr(sbi, NAT_BITMAP);
79b5793b
AG
3280 nm_i->nat_bitmap = kmemdup(version_bitmap, nm_i->bitmap_size,
3281 GFP_KERNEL);
3282 if (!nm_i->nat_bitmap)
3283 return -ENOMEM;
599a09b2 3284
22ad0b6a
JK
3285 err = __get_nat_bitmaps(sbi);
3286 if (err)
3287 return err;
3288
599a09b2
CY
3289#ifdef CONFIG_F2FS_CHECK_FS
3290 nm_i->nat_bitmap_mir = kmemdup(version_bitmap, nm_i->bitmap_size,
3291 GFP_KERNEL);
3292 if (!nm_i->nat_bitmap_mir)
3293 return -ENOMEM;
3294#endif
3295
e05df3b1
JK
3296 return 0;
3297}
3298
9f7e4a2c 3299static int init_free_nid_cache(struct f2fs_sb_info *sbi)
4ac91242
CY
3300{
3301 struct f2fs_nm_info *nm_i = NM_I(sbi);
bb1105e4 3302 int i;
4ac91242 3303
026f0507 3304 nm_i->free_nid_bitmap =
0b6d4ca0
EB
3305 f2fs_kvzalloc(sbi, array_size(sizeof(unsigned char *),
3306 nm_i->nat_blocks),
3307 GFP_KERNEL);
4ac91242
CY
3308 if (!nm_i->free_nid_bitmap)
3309 return -ENOMEM;
3310
bb1105e4
JK
3311 for (i = 0; i < nm_i->nat_blocks; i++) {
3312 nm_i->free_nid_bitmap[i] = f2fs_kvzalloc(sbi,
e15d54d5 3313 f2fs_bitmap_size(NAT_ENTRY_PER_BLOCK), GFP_KERNEL);
68c43a23 3314 if (!nm_i->free_nid_bitmap[i])
bb1105e4
JK
3315 return -ENOMEM;
3316 }
3317
628b3d14 3318 nm_i->nat_block_bitmap = f2fs_kvzalloc(sbi, nm_i->nat_blocks / 8,
4ac91242
CY
3319 GFP_KERNEL);
3320 if (!nm_i->nat_block_bitmap)
3321 return -ENOMEM;
586d1492 3322
9d2a789c
KC
3323 nm_i->free_nid_count =
3324 f2fs_kvzalloc(sbi, array_size(sizeof(unsigned short),
3325 nm_i->nat_blocks),
3326 GFP_KERNEL);
586d1492
CY
3327 if (!nm_i->free_nid_count)
3328 return -ENOMEM;
4ac91242
CY
3329 return 0;
3330}
3331
4d57b86d 3332int f2fs_build_node_manager(struct f2fs_sb_info *sbi)
e05df3b1
JK
3333{
3334 int err;
3335
acbf054d
CY
3336 sbi->nm_info = f2fs_kzalloc(sbi, sizeof(struct f2fs_nm_info),
3337 GFP_KERNEL);
e05df3b1
JK
3338 if (!sbi->nm_info)
3339 return -ENOMEM;
3340
3341 err = init_node_manager(sbi);
3342 if (err)
3343 return err;
3344
4ac91242
CY
3345 err = init_free_nid_cache(sbi);
3346 if (err)
3347 return err;
3348
7041d5d2
CY
3349 /* load free nid status from nat_bits table */
3350 load_free_nid_bitmap(sbi);
3351
e2374015 3352 return f2fs_build_free_nids(sbi, true, true);
e05df3b1
JK
3353}
3354
4d57b86d 3355void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi)
e05df3b1
JK
3356{
3357 struct f2fs_nm_info *nm_i = NM_I(sbi);
3358 struct free_nid *i, *next_i;
c31e4961
CY
3359 void *vec[NAT_VEC_SIZE];
3360 struct nat_entry **natvec = (struct nat_entry **)vec;
3361 struct nat_entry_set **setvec = (struct nat_entry_set **)vec;
e05df3b1
JK
3362 nid_t nid = 0;
3363 unsigned int found;
3364
3365 if (!nm_i)
3366 return;
3367
3368 /* destroy free nid list */
b8559dc2 3369 spin_lock(&nm_i->nid_list_lock);
9a4ffdf5 3370 list_for_each_entry_safe(i, next_i, &nm_i->free_nid_list, list) {
a0761f63 3371 __remove_free_nid(sbi, i, FREE_NID);
b8559dc2 3372 spin_unlock(&nm_i->nid_list_lock);
cf0ee0f0 3373 kmem_cache_free(free_nid_slab, i);
b8559dc2 3374 spin_lock(&nm_i->nid_list_lock);
e05df3b1 3375 }
9a4ffdf5
CY
3376 f2fs_bug_on(sbi, nm_i->nid_cnt[FREE_NID]);
3377 f2fs_bug_on(sbi, nm_i->nid_cnt[PREALLOC_NID]);
3378 f2fs_bug_on(sbi, !list_empty(&nm_i->free_nid_list));
b8559dc2 3379 spin_unlock(&nm_i->nid_list_lock);
e05df3b1
JK
3380
3381 /* destroy nat cache */
e4544b63 3382 f2fs_down_write(&nm_i->nat_tree_lock);
e05df3b1 3383 while ((found = __gang_lookup_nat_cache(nm_i,
c31e4961 3384 nid, NAT_VEC_SIZE, natvec))) {
e05df3b1 3385 unsigned idx;
7aed0d45 3386
b6ce391e 3387 nid = nat_get_nid(natvec[found - 1]) + 1;
22969158
CY
3388 for (idx = 0; idx < found; idx++) {
3389 spin_lock(&nm_i->nat_list_lock);
3390 list_del(&natvec[idx]->list);
3391 spin_unlock(&nm_i->nat_list_lock);
3392
b6ce391e 3393 __del_from_nat_cache(nm_i, natvec[idx]);
22969158 3394 }
e05df3b1 3395 }
a95ba66a 3396 f2fs_bug_on(sbi, nm_i->nat_cnt[TOTAL_NAT]);
7aed0d45
JK
3397
3398 /* destroy nat set cache */
3399 nid = 0;
c31e4961 3400 memset(vec, 0, sizeof(void *) * NAT_VEC_SIZE);
7aed0d45 3401 while ((found = __gang_lookup_nat_set(nm_i,
c31e4961 3402 nid, NAT_VEC_SIZE, setvec))) {
7aed0d45
JK
3403 unsigned idx;
3404
3405 nid = setvec[found - 1]->set + 1;
3406 for (idx = 0; idx < found; idx++) {
3407 /* entry_cnt is not zero, when cp_error was occurred */
3408 f2fs_bug_on(sbi, !list_empty(&setvec[idx]->entry_list));
3409 radix_tree_delete(&nm_i->nat_set_root, setvec[idx]->set);
3410 kmem_cache_free(nat_entry_set_slab, setvec[idx]);
3411 }
3412 }
e4544b63 3413 f2fs_up_write(&nm_i->nat_tree_lock);
e05df3b1 3414
4ac91242 3415 kvfree(nm_i->nat_block_bitmap);
bb1105e4
JK
3416 if (nm_i->free_nid_bitmap) {
3417 int i;
3418
3419 for (i = 0; i < nm_i->nat_blocks; i++)
3420 kvfree(nm_i->free_nid_bitmap[i]);
5222595d 3421 kvfree(nm_i->free_nid_bitmap);
bb1105e4 3422 }
586d1492 3423 kvfree(nm_i->free_nid_count);
4ac91242 3424
5222595d
JK
3425 kvfree(nm_i->nat_bitmap);
3426 kvfree(nm_i->nat_bits);
599a09b2 3427#ifdef CONFIG_F2FS_CHECK_FS
5222595d 3428 kvfree(nm_i->nat_bitmap_mir);
599a09b2 3429#endif
e05df3b1 3430 sbi->nm_info = NULL;
c8eb7024 3431 kfree(nm_i);
e05df3b1
JK
3432}
3433
4d57b86d 3434int __init f2fs_create_node_manager_caches(void)
e05df3b1 3435{
98510003 3436 nat_entry_slab = f2fs_kmem_cache_create("f2fs_nat_entry",
e8512d2e 3437 sizeof(struct nat_entry));
e05df3b1 3438 if (!nat_entry_slab)
aec71382 3439 goto fail;
e05df3b1 3440
98510003 3441 free_nid_slab = f2fs_kmem_cache_create("f2fs_free_nid",
e8512d2e 3442 sizeof(struct free_nid));
aec71382 3443 if (!free_nid_slab)
ce3e6d25 3444 goto destroy_nat_entry;
aec71382 3445
98510003 3446 nat_entry_set_slab = f2fs_kmem_cache_create("f2fs_nat_entry_set",
aec71382
CY
3447 sizeof(struct nat_entry_set));
3448 if (!nat_entry_set_slab)
ce3e6d25 3449 goto destroy_free_nid;
50fa53ec 3450
98510003 3451 fsync_node_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_node_entry",
50fa53ec
CY
3452 sizeof(struct fsync_node_entry));
3453 if (!fsync_node_entry_slab)
3454 goto destroy_nat_entry_set;
e05df3b1 3455 return 0;
aec71382 3456
50fa53ec
CY
3457destroy_nat_entry_set:
3458 kmem_cache_destroy(nat_entry_set_slab);
ce3e6d25 3459destroy_free_nid:
aec71382 3460 kmem_cache_destroy(free_nid_slab);
ce3e6d25 3461destroy_nat_entry:
aec71382
CY
3462 kmem_cache_destroy(nat_entry_slab);
3463fail:
3464 return -ENOMEM;
e05df3b1
JK
3465}
3466
4d57b86d 3467void f2fs_destroy_node_manager_caches(void)
e05df3b1 3468{
50fa53ec 3469 kmem_cache_destroy(fsync_node_entry_slab);
aec71382 3470 kmem_cache_destroy(nat_entry_set_slab);
e05df3b1
JK
3471 kmem_cache_destroy(free_nid_slab);
3472 kmem_cache_destroy(nat_entry_slab);
3473}