f2fs: introduce FITRIM in f2fs_ioctl
[linux-2.6-block.git] / fs / f2fs / node.c
CommitLineData
0a8165d7 1/*
e05df3b1
JK
2 * fs/f2fs/node.c
3 *
4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5 * http://www.samsung.com/
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#include <linux/fs.h>
12#include <linux/f2fs_fs.h>
13#include <linux/mpage.h>
14#include <linux/backing-dev.h>
15#include <linux/blkdev.h>
16#include <linux/pagevec.h>
17#include <linux/swap.h>
18
19#include "f2fs.h"
20#include "node.h"
21#include "segment.h"
51dd6249 22#include <trace/events/f2fs.h>
e05df3b1 23
f978f5a0
GZ
24#define on_build_free_nids(nmi) mutex_is_locked(&nm_i->build_lock)
25
e05df3b1
JK
26static struct kmem_cache *nat_entry_slab;
27static struct kmem_cache *free_nid_slab;
aec71382 28static struct kmem_cache *nat_entry_set_slab;
e05df3b1 29
6fb03f3a 30bool available_free_memory(struct f2fs_sb_info *sbi, int type)
cdfc41c1 31{
6fb03f3a 32 struct f2fs_nm_info *nm_i = NM_I(sbi);
cdfc41c1
JK
33 struct sysinfo val;
34 unsigned long mem_size = 0;
6fb03f3a 35 bool res = false;
cdfc41c1
JK
36
37 si_meminfo(&val);
6fb03f3a
JK
38 /* give 25%, 25%, 50% memory for each components respectively */
39 if (type == FREE_NIDS) {
40 mem_size = (nm_i->fcnt * sizeof(struct free_nid)) >> 12;
41 res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 2);
42 } else if (type == NAT_ENTRIES) {
43 mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >> 12;
44 res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 2);
45 } else if (type == DIRTY_DENTS) {
2743f865
JK
46 if (sbi->sb->s_bdi->dirty_exceeded)
47 return false;
6fb03f3a
JK
48 mem_size = get_pages(sbi, F2FS_DIRTY_DENTS);
49 res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 1);
50 }
51 return res;
cdfc41c1
JK
52}
53
e05df3b1
JK
54static void clear_node_page_dirty(struct page *page)
55{
56 struct address_space *mapping = page->mapping;
e05df3b1
JK
57 unsigned int long flags;
58
59 if (PageDirty(page)) {
60 spin_lock_irqsave(&mapping->tree_lock, flags);
61 radix_tree_tag_clear(&mapping->page_tree,
62 page_index(page),
63 PAGECACHE_TAG_DIRTY);
64 spin_unlock_irqrestore(&mapping->tree_lock, flags);
65
66 clear_page_dirty_for_io(page);
4081363f 67 dec_page_count(F2FS_M_SB(mapping), F2FS_DIRTY_NODES);
e05df3b1
JK
68 }
69 ClearPageUptodate(page);
70}
71
72static struct page *get_current_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
73{
74 pgoff_t index = current_nat_addr(sbi, nid);
75 return get_meta_page(sbi, index);
76}
77
78static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
79{
80 struct page *src_page;
81 struct page *dst_page;
82 pgoff_t src_off;
83 pgoff_t dst_off;
84 void *src_addr;
85 void *dst_addr;
86 struct f2fs_nm_info *nm_i = NM_I(sbi);
87
88 src_off = current_nat_addr(sbi, nid);
89 dst_off = next_nat_addr(sbi, src_off);
90
91 /* get current nat block page with lock */
92 src_page = get_meta_page(sbi, src_off);
e05df3b1 93 dst_page = grab_meta_page(sbi, dst_off);
9850cf4a 94 f2fs_bug_on(sbi, PageDirty(src_page));
e05df3b1
JK
95
96 src_addr = page_address(src_page);
97 dst_addr = page_address(dst_page);
98 memcpy(dst_addr, src_addr, PAGE_CACHE_SIZE);
99 set_page_dirty(dst_page);
100 f2fs_put_page(src_page, 1);
101
102 set_to_next_nat(nm_i, nid);
103
104 return dst_page;
105}
106
e05df3b1
JK
107static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n)
108{
109 return radix_tree_lookup(&nm_i->nat_root, n);
110}
111
112static unsigned int __gang_lookup_nat_cache(struct f2fs_nm_info *nm_i,
113 nid_t start, unsigned int nr, struct nat_entry **ep)
114{
115 return radix_tree_gang_lookup(&nm_i->nat_root, (void **)ep, start, nr);
116}
117
118static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct nat_entry *e)
119{
120 list_del(&e->list);
121 radix_tree_delete(&nm_i->nat_root, nat_get_nid(e));
122 nm_i->nat_cnt--;
123 kmem_cache_free(nat_entry_slab, e);
124}
125
88bd02c9 126bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid)
e05df3b1
JK
127{
128 struct f2fs_nm_info *nm_i = NM_I(sbi);
129 struct nat_entry *e;
88bd02c9 130 bool is_cp = true;
e05df3b1
JK
131
132 read_lock(&nm_i->nat_tree_lock);
133 e = __lookup_nat_cache(nm_i, nid);
7ef35e3b 134 if (e && !get_nat_flag(e, IS_CHECKPOINTED))
88bd02c9 135 is_cp = false;
e05df3b1
JK
136 read_unlock(&nm_i->nat_tree_lock);
137 return is_cp;
138}
139
88bd02c9 140bool has_fsynced_inode(struct f2fs_sb_info *sbi, nid_t ino)
479f40c4
JK
141{
142 struct f2fs_nm_info *nm_i = NM_I(sbi);
143 struct nat_entry *e;
88bd02c9 144 bool fsynced = false;
479f40c4
JK
145
146 read_lock(&nm_i->nat_tree_lock);
88bd02c9
JK
147 e = __lookup_nat_cache(nm_i, ino);
148 if (e && get_nat_flag(e, HAS_FSYNCED_INODE))
149 fsynced = true;
479f40c4 150 read_unlock(&nm_i->nat_tree_lock);
88bd02c9 151 return fsynced;
479f40c4
JK
152}
153
88bd02c9 154bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino)
b6fe5873
JK
155{
156 struct f2fs_nm_info *nm_i = NM_I(sbi);
157 struct nat_entry *e;
88bd02c9 158 bool need_update = true;
b6fe5873 159
88bd02c9
JK
160 read_lock(&nm_i->nat_tree_lock);
161 e = __lookup_nat_cache(nm_i, ino);
162 if (e && get_nat_flag(e, HAS_LAST_FSYNC) &&
163 (get_nat_flag(e, IS_CHECKPOINTED) ||
164 get_nat_flag(e, HAS_FSYNCED_INODE)))
165 need_update = false;
166 read_unlock(&nm_i->nat_tree_lock);
167 return need_update;
b6fe5873
JK
168}
169
e05df3b1
JK
170static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid)
171{
172 struct nat_entry *new;
173
174 new = kmem_cache_alloc(nat_entry_slab, GFP_ATOMIC);
175 if (!new)
176 return NULL;
177 if (radix_tree_insert(&nm_i->nat_root, nid, new)) {
178 kmem_cache_free(nat_entry_slab, new);
179 return NULL;
180 }
181 memset(new, 0, sizeof(struct nat_entry));
182 nat_set_nid(new, nid);
88bd02c9 183 nat_reset_flag(new);
e05df3b1
JK
184 list_add_tail(&new->list, &nm_i->nat_entries);
185 nm_i->nat_cnt++;
186 return new;
187}
188
189static void cache_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid,
190 struct f2fs_nat_entry *ne)
191{
192 struct nat_entry *e;
193retry:
194 write_lock(&nm_i->nat_tree_lock);
195 e = __lookup_nat_cache(nm_i, nid);
196 if (!e) {
197 e = grab_nat_entry(nm_i, nid);
198 if (!e) {
199 write_unlock(&nm_i->nat_tree_lock);
200 goto retry;
201 }
94dac22e 202 node_info_from_raw_nat(&e->ni, ne);
e05df3b1
JK
203 }
204 write_unlock(&nm_i->nat_tree_lock);
205}
206
207static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
479f40c4 208 block_t new_blkaddr, bool fsync_done)
e05df3b1
JK
209{
210 struct f2fs_nm_info *nm_i = NM_I(sbi);
211 struct nat_entry *e;
212retry:
213 write_lock(&nm_i->nat_tree_lock);
214 e = __lookup_nat_cache(nm_i, ni->nid);
215 if (!e) {
216 e = grab_nat_entry(nm_i, ni->nid);
217 if (!e) {
218 write_unlock(&nm_i->nat_tree_lock);
219 goto retry;
220 }
221 e->ni = *ni;
9850cf4a 222 f2fs_bug_on(sbi, ni->blk_addr == NEW_ADDR);
e05df3b1
JK
223 } else if (new_blkaddr == NEW_ADDR) {
224 /*
225 * when nid is reallocated,
226 * previous nat entry can be remained in nat cache.
227 * So, reinitialize it with new information.
228 */
229 e->ni = *ni;
9850cf4a 230 f2fs_bug_on(sbi, ni->blk_addr != NULL_ADDR);
e05df3b1
JK
231 }
232
e05df3b1 233 /* sanity check */
9850cf4a
JK
234 f2fs_bug_on(sbi, nat_get_blkaddr(e) != ni->blk_addr);
235 f2fs_bug_on(sbi, nat_get_blkaddr(e) == NULL_ADDR &&
e05df3b1 236 new_blkaddr == NULL_ADDR);
9850cf4a 237 f2fs_bug_on(sbi, nat_get_blkaddr(e) == NEW_ADDR &&
e05df3b1 238 new_blkaddr == NEW_ADDR);
9850cf4a 239 f2fs_bug_on(sbi, nat_get_blkaddr(e) != NEW_ADDR &&
e05df3b1
JK
240 nat_get_blkaddr(e) != NULL_ADDR &&
241 new_blkaddr == NEW_ADDR);
242
e1c42045 243 /* increment version no as node is removed */
e05df3b1
JK
244 if (nat_get_blkaddr(e) != NEW_ADDR && new_blkaddr == NULL_ADDR) {
245 unsigned char version = nat_get_version(e);
246 nat_set_version(e, inc_node_version(version));
247 }
248
249 /* change address */
250 nat_set_blkaddr(e, new_blkaddr);
88bd02c9
JK
251 if (new_blkaddr == NEW_ADDR || new_blkaddr == NULL_ADDR)
252 set_nat_flag(e, IS_CHECKPOINTED, false);
e05df3b1 253 __set_nat_cache_dirty(nm_i, e);
479f40c4
JK
254
255 /* update fsync_mark if its inode nat entry is still alive */
256 e = __lookup_nat_cache(nm_i, ni->ino);
88bd02c9
JK
257 if (e) {
258 if (fsync_done && ni->nid == ni->ino)
259 set_nat_flag(e, HAS_FSYNCED_INODE, true);
260 set_nat_flag(e, HAS_LAST_FSYNC, fsync_done);
261 }
e05df3b1
JK
262 write_unlock(&nm_i->nat_tree_lock);
263}
264
4660f9c0 265int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
e05df3b1
JK
266{
267 struct f2fs_nm_info *nm_i = NM_I(sbi);
268
6fb03f3a 269 if (available_free_memory(sbi, NAT_ENTRIES))
e05df3b1
JK
270 return 0;
271
272 write_lock(&nm_i->nat_tree_lock);
273 while (nr_shrink && !list_empty(&nm_i->nat_entries)) {
274 struct nat_entry *ne;
275 ne = list_first_entry(&nm_i->nat_entries,
276 struct nat_entry, list);
277 __del_from_nat_cache(nm_i, ne);
278 nr_shrink--;
279 }
280 write_unlock(&nm_i->nat_tree_lock);
281 return nr_shrink;
282}
283
0a8165d7 284/*
e1c42045 285 * This function always returns success
e05df3b1
JK
286 */
287void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
288{
289 struct f2fs_nm_info *nm_i = NM_I(sbi);
290 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
291 struct f2fs_summary_block *sum = curseg->sum_blk;
292 nid_t start_nid = START_NID(nid);
293 struct f2fs_nat_block *nat_blk;
294 struct page *page = NULL;
295 struct f2fs_nat_entry ne;
296 struct nat_entry *e;
297 int i;
298
be4124f8 299 memset(&ne, 0, sizeof(struct f2fs_nat_entry));
e05df3b1
JK
300 ni->nid = nid;
301
302 /* Check nat cache */
303 read_lock(&nm_i->nat_tree_lock);
304 e = __lookup_nat_cache(nm_i, nid);
305 if (e) {
306 ni->ino = nat_get_ino(e);
307 ni->blk_addr = nat_get_blkaddr(e);
308 ni->version = nat_get_version(e);
309 }
310 read_unlock(&nm_i->nat_tree_lock);
311 if (e)
312 return;
313
314 /* Check current segment summary */
315 mutex_lock(&curseg->curseg_mutex);
316 i = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 0);
317 if (i >= 0) {
318 ne = nat_in_journal(sum, i);
319 node_info_from_raw_nat(ni, &ne);
320 }
321 mutex_unlock(&curseg->curseg_mutex);
322 if (i >= 0)
323 goto cache;
324
325 /* Fill node_info from nat page */
326 page = get_current_nat_page(sbi, start_nid);
327 nat_blk = (struct f2fs_nat_block *)page_address(page);
328 ne = nat_blk->entries[nid - start_nid];
329 node_info_from_raw_nat(ni, &ne);
330 f2fs_put_page(page, 1);
331cache:
332 /* cache nat entry */
333 cache_nat_entry(NM_I(sbi), nid, &ne);
334}
335
0a8165d7 336/*
e05df3b1
JK
337 * The maximum depth is four.
338 * Offset[0] will have raw inode offset.
339 */
de93653f
JK
340static int get_node_path(struct f2fs_inode_info *fi, long block,
341 int offset[4], unsigned int noffset[4])
e05df3b1 342{
de93653f 343 const long direct_index = ADDRS_PER_INODE(fi);
e05df3b1
JK
344 const long direct_blks = ADDRS_PER_BLOCK;
345 const long dptrs_per_blk = NIDS_PER_BLOCK;
346 const long indirect_blks = ADDRS_PER_BLOCK * NIDS_PER_BLOCK;
347 const long dindirect_blks = indirect_blks * NIDS_PER_BLOCK;
348 int n = 0;
349 int level = 0;
350
351 noffset[0] = 0;
352
353 if (block < direct_index) {
25c0a6e5 354 offset[n] = block;
e05df3b1
JK
355 goto got;
356 }
357 block -= direct_index;
358 if (block < direct_blks) {
359 offset[n++] = NODE_DIR1_BLOCK;
360 noffset[n] = 1;
25c0a6e5 361 offset[n] = block;
e05df3b1
JK
362 level = 1;
363 goto got;
364 }
365 block -= direct_blks;
366 if (block < direct_blks) {
367 offset[n++] = NODE_DIR2_BLOCK;
368 noffset[n] = 2;
25c0a6e5 369 offset[n] = block;
e05df3b1
JK
370 level = 1;
371 goto got;
372 }
373 block -= direct_blks;
374 if (block < indirect_blks) {
375 offset[n++] = NODE_IND1_BLOCK;
376 noffset[n] = 3;
377 offset[n++] = block / direct_blks;
378 noffset[n] = 4 + offset[n - 1];
25c0a6e5 379 offset[n] = block % direct_blks;
e05df3b1
JK
380 level = 2;
381 goto got;
382 }
383 block -= indirect_blks;
384 if (block < indirect_blks) {
385 offset[n++] = NODE_IND2_BLOCK;
386 noffset[n] = 4 + dptrs_per_blk;
387 offset[n++] = block / direct_blks;
388 noffset[n] = 5 + dptrs_per_blk + offset[n - 1];
25c0a6e5 389 offset[n] = block % direct_blks;
e05df3b1
JK
390 level = 2;
391 goto got;
392 }
393 block -= indirect_blks;
394 if (block < dindirect_blks) {
395 offset[n++] = NODE_DIND_BLOCK;
396 noffset[n] = 5 + (dptrs_per_blk * 2);
397 offset[n++] = block / indirect_blks;
398 noffset[n] = 6 + (dptrs_per_blk * 2) +
399 offset[n - 1] * (dptrs_per_blk + 1);
400 offset[n++] = (block / direct_blks) % dptrs_per_blk;
401 noffset[n] = 7 + (dptrs_per_blk * 2) +
402 offset[n - 2] * (dptrs_per_blk + 1) +
403 offset[n - 1];
25c0a6e5 404 offset[n] = block % direct_blks;
e05df3b1
JK
405 level = 3;
406 goto got;
407 } else {
408 BUG();
409 }
410got:
411 return level;
412}
413
414/*
415 * Caller should call f2fs_put_dnode(dn).
4f4124d0
CY
416 * Also, it should grab and release a rwsem by calling f2fs_lock_op() and
417 * f2fs_unlock_op() only if ro is not set RDONLY_NODE.
39936837 418 * In the case of RDONLY_NODE, we don't need to care about mutex.
e05df3b1 419 */
266e97a8 420int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
e05df3b1 421{
4081363f 422 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
e05df3b1
JK
423 struct page *npage[4];
424 struct page *parent;
425 int offset[4];
426 unsigned int noffset[4];
427 nid_t nids[4];
428 int level, i;
429 int err = 0;
430
de93653f 431 level = get_node_path(F2FS_I(dn->inode), index, offset, noffset);
e05df3b1
JK
432
433 nids[0] = dn->inode->i_ino;
1646cfac 434 npage[0] = dn->inode_page;
e05df3b1 435
1646cfac
JK
436 if (!npage[0]) {
437 npage[0] = get_node_page(sbi, nids[0]);
438 if (IS_ERR(npage[0]))
439 return PTR_ERR(npage[0]);
440 }
e05df3b1 441 parent = npage[0];
52c2db3f
CL
442 if (level != 0)
443 nids[1] = get_nid(parent, offset[0], true);
e05df3b1
JK
444 dn->inode_page = npage[0];
445 dn->inode_page_locked = true;
446
447 /* get indirect or direct nodes */
448 for (i = 1; i <= level; i++) {
449 bool done = false;
450
266e97a8 451 if (!nids[i] && mode == ALLOC_NODE) {
e05df3b1
JK
452 /* alloc new node */
453 if (!alloc_nid(sbi, &(nids[i]))) {
e05df3b1
JK
454 err = -ENOSPC;
455 goto release_pages;
456 }
457
458 dn->nid = nids[i];
8ae8f162 459 npage[i] = new_node_page(dn, noffset[i], NULL);
e05df3b1
JK
460 if (IS_ERR(npage[i])) {
461 alloc_nid_failed(sbi, nids[i]);
e05df3b1
JK
462 err = PTR_ERR(npage[i]);
463 goto release_pages;
464 }
465
466 set_nid(parent, offset[i - 1], nids[i], i == 1);
467 alloc_nid_done(sbi, nids[i]);
e05df3b1 468 done = true;
266e97a8 469 } else if (mode == LOOKUP_NODE_RA && i == level && level > 1) {
e05df3b1
JK
470 npage[i] = get_node_page_ra(parent, offset[i - 1]);
471 if (IS_ERR(npage[i])) {
472 err = PTR_ERR(npage[i]);
473 goto release_pages;
474 }
475 done = true;
476 }
477 if (i == 1) {
478 dn->inode_page_locked = false;
479 unlock_page(parent);
480 } else {
481 f2fs_put_page(parent, 1);
482 }
483
484 if (!done) {
485 npage[i] = get_node_page(sbi, nids[i]);
486 if (IS_ERR(npage[i])) {
487 err = PTR_ERR(npage[i]);
488 f2fs_put_page(npage[0], 0);
489 goto release_out;
490 }
491 }
492 if (i < level) {
493 parent = npage[i];
494 nids[i + 1] = get_nid(parent, offset[i], false);
495 }
496 }
497 dn->nid = nids[level];
498 dn->ofs_in_node = offset[level];
499 dn->node_page = npage[level];
500 dn->data_blkaddr = datablock_addr(dn->node_page, dn->ofs_in_node);
501 return 0;
502
503release_pages:
504 f2fs_put_page(parent, 1);
505 if (i > 1)
506 f2fs_put_page(npage[0], 0);
507release_out:
508 dn->inode_page = NULL;
509 dn->node_page = NULL;
510 return err;
511}
512
513static void truncate_node(struct dnode_of_data *dn)
514{
4081363f 515 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
e05df3b1
JK
516 struct node_info ni;
517
518 get_node_info(sbi, dn->nid, &ni);
71e9fec5 519 if (dn->inode->i_blocks == 0) {
9850cf4a 520 f2fs_bug_on(sbi, ni.blk_addr != NULL_ADDR);
71e9fec5
JK
521 goto invalidate;
522 }
9850cf4a 523 f2fs_bug_on(sbi, ni.blk_addr == NULL_ADDR);
e05df3b1 524
e05df3b1 525 /* Deallocate node address */
71e9fec5 526 invalidate_blocks(sbi, ni.blk_addr);
ef86d709 527 dec_valid_node_count(sbi, dn->inode);
479f40c4 528 set_node_addr(sbi, &ni, NULL_ADDR, false);
e05df3b1
JK
529
530 if (dn->nid == dn->inode->i_ino) {
531 remove_orphan_inode(sbi, dn->nid);
532 dec_valid_inode_count(sbi);
533 } else {
534 sync_inode_page(dn);
535 }
71e9fec5 536invalidate:
e05df3b1
JK
537 clear_node_page_dirty(dn->node_page);
538 F2FS_SET_SB_DIRT(sbi);
539
540 f2fs_put_page(dn->node_page, 1);
bf39c00a
JK
541
542 invalidate_mapping_pages(NODE_MAPPING(sbi),
543 dn->node_page->index, dn->node_page->index);
544
e05df3b1 545 dn->node_page = NULL;
51dd6249 546 trace_f2fs_truncate_node(dn->inode, dn->nid, ni.blk_addr);
e05df3b1
JK
547}
548
549static int truncate_dnode(struct dnode_of_data *dn)
550{
e05df3b1
JK
551 struct page *page;
552
553 if (dn->nid == 0)
554 return 1;
555
556 /* get direct node */
4081363f 557 page = get_node_page(F2FS_I_SB(dn->inode), dn->nid);
e05df3b1
JK
558 if (IS_ERR(page) && PTR_ERR(page) == -ENOENT)
559 return 1;
560 else if (IS_ERR(page))
561 return PTR_ERR(page);
562
563 /* Make dnode_of_data for parameter */
564 dn->node_page = page;
565 dn->ofs_in_node = 0;
566 truncate_data_blocks(dn);
567 truncate_node(dn);
568 return 1;
569}
570
571static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs,
572 int ofs, int depth)
573{
e05df3b1
JK
574 struct dnode_of_data rdn = *dn;
575 struct page *page;
576 struct f2fs_node *rn;
577 nid_t child_nid;
578 unsigned int child_nofs;
579 int freed = 0;
580 int i, ret;
581
582 if (dn->nid == 0)
583 return NIDS_PER_BLOCK + 1;
584
51dd6249
NJ
585 trace_f2fs_truncate_nodes_enter(dn->inode, dn->nid, dn->data_blkaddr);
586
4081363f 587 page = get_node_page(F2FS_I_SB(dn->inode), dn->nid);
51dd6249
NJ
588 if (IS_ERR(page)) {
589 trace_f2fs_truncate_nodes_exit(dn->inode, PTR_ERR(page));
e05df3b1 590 return PTR_ERR(page);
51dd6249 591 }
e05df3b1 592
45590710 593 rn = F2FS_NODE(page);
e05df3b1
JK
594 if (depth < 3) {
595 for (i = ofs; i < NIDS_PER_BLOCK; i++, freed++) {
596 child_nid = le32_to_cpu(rn->in.nid[i]);
597 if (child_nid == 0)
598 continue;
599 rdn.nid = child_nid;
600 ret = truncate_dnode(&rdn);
601 if (ret < 0)
602 goto out_err;
603 set_nid(page, i, 0, false);
604 }
605 } else {
606 child_nofs = nofs + ofs * (NIDS_PER_BLOCK + 1) + 1;
607 for (i = ofs; i < NIDS_PER_BLOCK; i++) {
608 child_nid = le32_to_cpu(rn->in.nid[i]);
609 if (child_nid == 0) {
610 child_nofs += NIDS_PER_BLOCK + 1;
611 continue;
612 }
613 rdn.nid = child_nid;
614 ret = truncate_nodes(&rdn, child_nofs, 0, depth - 1);
615 if (ret == (NIDS_PER_BLOCK + 1)) {
616 set_nid(page, i, 0, false);
617 child_nofs += ret;
618 } else if (ret < 0 && ret != -ENOENT) {
619 goto out_err;
620 }
621 }
622 freed = child_nofs;
623 }
624
625 if (!ofs) {
626 /* remove current indirect node */
627 dn->node_page = page;
628 truncate_node(dn);
629 freed++;
630 } else {
631 f2fs_put_page(page, 1);
632 }
51dd6249 633 trace_f2fs_truncate_nodes_exit(dn->inode, freed);
e05df3b1
JK
634 return freed;
635
636out_err:
637 f2fs_put_page(page, 1);
51dd6249 638 trace_f2fs_truncate_nodes_exit(dn->inode, ret);
e05df3b1
JK
639 return ret;
640}
641
642static int truncate_partial_nodes(struct dnode_of_data *dn,
643 struct f2fs_inode *ri, int *offset, int depth)
644{
e05df3b1
JK
645 struct page *pages[2];
646 nid_t nid[3];
647 nid_t child_nid;
648 int err = 0;
649 int i;
650 int idx = depth - 2;
651
652 nid[0] = le32_to_cpu(ri->i_nid[offset[0] - NODE_DIR1_BLOCK]);
653 if (!nid[0])
654 return 0;
655
656 /* get indirect nodes in the path */
a225dca3 657 for (i = 0; i < idx + 1; i++) {
e1c42045 658 /* reference count'll be increased */
4081363f 659 pages[i] = get_node_page(F2FS_I_SB(dn->inode), nid[i]);
e05df3b1 660 if (IS_ERR(pages[i])) {
e05df3b1 661 err = PTR_ERR(pages[i]);
a225dca3 662 idx = i - 1;
e05df3b1
JK
663 goto fail;
664 }
665 nid[i + 1] = get_nid(pages[i], offset[i + 1], false);
666 }
667
668 /* free direct nodes linked to a partial indirect node */
a225dca3 669 for (i = offset[idx + 1]; i < NIDS_PER_BLOCK; i++) {
e05df3b1
JK
670 child_nid = get_nid(pages[idx], i, false);
671 if (!child_nid)
672 continue;
673 dn->nid = child_nid;
674 err = truncate_dnode(dn);
675 if (err < 0)
676 goto fail;
677 set_nid(pages[idx], i, 0, false);
678 }
679
a225dca3 680 if (offset[idx + 1] == 0) {
e05df3b1
JK
681 dn->node_page = pages[idx];
682 dn->nid = nid[idx];
683 truncate_node(dn);
684 } else {
685 f2fs_put_page(pages[idx], 1);
686 }
687 offset[idx]++;
a225dca3 688 offset[idx + 1] = 0;
689 idx--;
e05df3b1 690fail:
a225dca3 691 for (i = idx; i >= 0; i--)
e05df3b1 692 f2fs_put_page(pages[i], 1);
51dd6249
NJ
693
694 trace_f2fs_truncate_partial_nodes(dn->inode, nid, depth, err);
695
e05df3b1
JK
696 return err;
697}
698
0a8165d7 699/*
e05df3b1
JK
700 * All the block addresses of data and nodes should be nullified.
701 */
702int truncate_inode_blocks(struct inode *inode, pgoff_t from)
703{
4081363f 704 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
e05df3b1
JK
705 int err = 0, cont = 1;
706 int level, offset[4], noffset[4];
7dd690c8 707 unsigned int nofs = 0;
58bfaf44 708 struct f2fs_inode *ri;
e05df3b1
JK
709 struct dnode_of_data dn;
710 struct page *page;
711
51dd6249
NJ
712 trace_f2fs_truncate_inode_blocks_enter(inode, from);
713
de93653f 714 level = get_node_path(F2FS_I(inode), from, offset, noffset);
afcb7ca0 715restart:
e05df3b1 716 page = get_node_page(sbi, inode->i_ino);
51dd6249
NJ
717 if (IS_ERR(page)) {
718 trace_f2fs_truncate_inode_blocks_exit(inode, PTR_ERR(page));
e05df3b1 719 return PTR_ERR(page);
51dd6249 720 }
e05df3b1
JK
721
722 set_new_dnode(&dn, inode, page, NULL, 0);
723 unlock_page(page);
724
58bfaf44 725 ri = F2FS_INODE(page);
e05df3b1
JK
726 switch (level) {
727 case 0:
728 case 1:
729 nofs = noffset[1];
730 break;
731 case 2:
732 nofs = noffset[1];
733 if (!offset[level - 1])
734 goto skip_partial;
58bfaf44 735 err = truncate_partial_nodes(&dn, ri, offset, level);
e05df3b1
JK
736 if (err < 0 && err != -ENOENT)
737 goto fail;
738 nofs += 1 + NIDS_PER_BLOCK;
739 break;
740 case 3:
741 nofs = 5 + 2 * NIDS_PER_BLOCK;
742 if (!offset[level - 1])
743 goto skip_partial;
58bfaf44 744 err = truncate_partial_nodes(&dn, ri, offset, level);
e05df3b1
JK
745 if (err < 0 && err != -ENOENT)
746 goto fail;
747 break;
748 default:
749 BUG();
750 }
751
752skip_partial:
753 while (cont) {
58bfaf44 754 dn.nid = le32_to_cpu(ri->i_nid[offset[0] - NODE_DIR1_BLOCK]);
e05df3b1
JK
755 switch (offset[0]) {
756 case NODE_DIR1_BLOCK:
757 case NODE_DIR2_BLOCK:
758 err = truncate_dnode(&dn);
759 break;
760
761 case NODE_IND1_BLOCK:
762 case NODE_IND2_BLOCK:
763 err = truncate_nodes(&dn, nofs, offset[1], 2);
764 break;
765
766 case NODE_DIND_BLOCK:
767 err = truncate_nodes(&dn, nofs, offset[1], 3);
768 cont = 0;
769 break;
770
771 default:
772 BUG();
773 }
774 if (err < 0 && err != -ENOENT)
775 goto fail;
776 if (offset[1] == 0 &&
58bfaf44 777 ri->i_nid[offset[0] - NODE_DIR1_BLOCK]) {
e05df3b1 778 lock_page(page);
4ef51a8f 779 if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
afcb7ca0
JK
780 f2fs_put_page(page, 1);
781 goto restart;
782 }
3cb5ad15 783 f2fs_wait_on_page_writeback(page, NODE);
58bfaf44 784 ri->i_nid[offset[0] - NODE_DIR1_BLOCK] = 0;
e05df3b1
JK
785 set_page_dirty(page);
786 unlock_page(page);
787 }
788 offset[1] = 0;
789 offset[0]++;
790 nofs += err;
791 }
792fail:
793 f2fs_put_page(page, 0);
51dd6249 794 trace_f2fs_truncate_inode_blocks_exit(inode, err);
e05df3b1
JK
795 return err > 0 ? 0 : err;
796}
797
4f16fb0f
JK
798int truncate_xattr_node(struct inode *inode, struct page *page)
799{
4081363f 800 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
4f16fb0f
JK
801 nid_t nid = F2FS_I(inode)->i_xattr_nid;
802 struct dnode_of_data dn;
803 struct page *npage;
804
805 if (!nid)
806 return 0;
807
808 npage = get_node_page(sbi, nid);
809 if (IS_ERR(npage))
810 return PTR_ERR(npage);
811
812 F2FS_I(inode)->i_xattr_nid = 0;
65985d93
JK
813
814 /* need to do checkpoint during fsync */
815 F2FS_I(inode)->xattr_ver = cur_cp_version(F2FS_CKPT(sbi));
816
4f16fb0f
JK
817 set_new_dnode(&dn, inode, page, npage, nid);
818
819 if (page)
01d2d1aa 820 dn.inode_page_locked = true;
4f16fb0f
JK
821 truncate_node(&dn);
822 return 0;
823}
824
39936837 825/*
4f4124d0
CY
826 * Caller should grab and release a rwsem by calling f2fs_lock_op() and
827 * f2fs_unlock_op().
39936837 828 */
58e674d6 829void remove_inode_page(struct inode *inode)
e05df3b1 830{
e05df3b1
JK
831 struct dnode_of_data dn;
832
c2e69583
JK
833 set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino);
834 if (get_dnode_of_data(&dn, 0, LOOKUP_NODE))
58e674d6 835 return;
e05df3b1 836
c2e69583
JK
837 if (truncate_xattr_node(inode, dn.inode_page)) {
838 f2fs_put_dnode(&dn);
58e674d6 839 return;
e05df3b1 840 }
c2e69583
JK
841
842 /* remove potential inline_data blocks */
843 if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
844 S_ISLNK(inode->i_mode))
845 truncate_data_blocks_range(&dn, 1);
846
e1c42045 847 /* 0 is possible, after f2fs_new_inode() has failed */
9850cf4a
JK
848 f2fs_bug_on(F2FS_I_SB(inode),
849 inode->i_blocks != 0 && inode->i_blocks != 1);
c2e69583
JK
850
851 /* will put inode & node pages */
71e9fec5 852 truncate_node(&dn);
e05df3b1
JK
853}
854
a014e037 855struct page *new_inode_page(struct inode *inode)
e05df3b1 856{
e05df3b1
JK
857 struct dnode_of_data dn;
858
859 /* allocate inode page for new inode */
860 set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino);
44a83ff6
JK
861
862 /* caller should f2fs_put_page(page, 1); */
8ae8f162 863 return new_node_page(&dn, 0, NULL);
e05df3b1
JK
864}
865
8ae8f162
JK
866struct page *new_node_page(struct dnode_of_data *dn,
867 unsigned int ofs, struct page *ipage)
e05df3b1 868{
4081363f 869 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
e05df3b1
JK
870 struct node_info old_ni, new_ni;
871 struct page *page;
872 int err;
873
6bacf52f 874 if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
e05df3b1
JK
875 return ERR_PTR(-EPERM);
876
54b591df 877 page = grab_cache_page(NODE_MAPPING(sbi), dn->nid);
e05df3b1
JK
878 if (!page)
879 return ERR_PTR(-ENOMEM);
880
6bacf52f 881 if (unlikely(!inc_valid_node_count(sbi, dn->inode))) {
9c02740c
JK
882 err = -ENOSPC;
883 goto fail;
884 }
e05df3b1 885
9c02740c 886 get_node_info(sbi, dn->nid, &old_ni);
e05df3b1
JK
887
888 /* Reinitialize old_ni with new node page */
9850cf4a 889 f2fs_bug_on(sbi, old_ni.blk_addr != NULL_ADDR);
e05df3b1
JK
890 new_ni = old_ni;
891 new_ni.ino = dn->inode->i_ino;
479f40c4 892 set_node_addr(sbi, &new_ni, NEW_ADDR, false);
9c02740c 893
54b591df 894 f2fs_wait_on_page_writeback(page, NODE);
9c02740c 895 fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true);
398b1ac5 896 set_cold_node(dn->inode, page);
9c02740c
JK
897 SetPageUptodate(page);
898 set_page_dirty(page);
e05df3b1 899
4bc8e9bc 900 if (f2fs_has_xattr_block(ofs))
479bd73a
JK
901 F2FS_I(dn->inode)->i_xattr_nid = dn->nid;
902
e05df3b1 903 dn->node_page = page;
8ae8f162
JK
904 if (ipage)
905 update_inode(dn->inode, ipage);
906 else
907 sync_inode_page(dn);
e05df3b1
JK
908 if (ofs == 0)
909 inc_valid_inode_count(sbi);
910
911 return page;
912
913fail:
71e9fec5 914 clear_node_page_dirty(page);
e05df3b1
JK
915 f2fs_put_page(page, 1);
916 return ERR_PTR(err);
917}
918
56ae674c
JK
919/*
920 * Caller should do after getting the following values.
921 * 0: f2fs_put_page(page, 0)
922 * LOCKED_PAGE: f2fs_put_page(page, 1)
923 * error: nothing
924 */
93dfe2ac 925static int read_node_page(struct page *page, int rw)
e05df3b1 926{
4081363f 927 struct f2fs_sb_info *sbi = F2FS_P_SB(page);
e05df3b1
JK
928 struct node_info ni;
929
930 get_node_info(sbi, page->index, &ni);
931
6bacf52f 932 if (unlikely(ni.blk_addr == NULL_ADDR)) {
393ff91f 933 f2fs_put_page(page, 1);
e05df3b1 934 return -ENOENT;
393ff91f
JK
935 }
936
56ae674c
JK
937 if (PageUptodate(page))
938 return LOCKED_PAGE;
393ff91f 939
93dfe2ac 940 return f2fs_submit_page_bio(sbi, page, ni.blk_addr, rw);
e05df3b1
JK
941}
942
0a8165d7 943/*
e05df3b1
JK
944 * Readahead a node page
945 */
946void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid)
947{
e05df3b1 948 struct page *apage;
56ae674c 949 int err;
e05df3b1 950
4ef51a8f 951 apage = find_get_page(NODE_MAPPING(sbi), nid);
393ff91f
JK
952 if (apage && PageUptodate(apage)) {
953 f2fs_put_page(apage, 0);
954 return;
955 }
e05df3b1
JK
956 f2fs_put_page(apage, 0);
957
4ef51a8f 958 apage = grab_cache_page(NODE_MAPPING(sbi), nid);
e05df3b1
JK
959 if (!apage)
960 return;
961
56ae674c
JK
962 err = read_node_page(apage, READA);
963 if (err == 0)
393ff91f 964 f2fs_put_page(apage, 0);
56ae674c
JK
965 else if (err == LOCKED_PAGE)
966 f2fs_put_page(apage, 1);
e05df3b1
JK
967}
968
969struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid)
970{
56ae674c
JK
971 struct page *page;
972 int err;
afcb7ca0 973repeat:
54b591df 974 page = grab_cache_page(NODE_MAPPING(sbi), nid);
e05df3b1
JK
975 if (!page)
976 return ERR_PTR(-ENOMEM);
977
978 err = read_node_page(page, READ_SYNC);
56ae674c 979 if (err < 0)
e05df3b1 980 return ERR_PTR(err);
56ae674c
JK
981 else if (err == LOCKED_PAGE)
982 goto got_it;
e05df3b1 983
393ff91f 984 lock_page(page);
3bb5e2c8 985 if (unlikely(!PageUptodate(page) || nid != nid_of_node(page))) {
393ff91f
JK
986 f2fs_put_page(page, 1);
987 return ERR_PTR(-EIO);
988 }
4ef51a8f 989 if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
afcb7ca0
JK
990 f2fs_put_page(page, 1);
991 goto repeat;
992 }
56ae674c 993got_it:
e05df3b1
JK
994 return page;
995}
996
0a8165d7 997/*
e05df3b1
JK
998 * Return a locked page for the desired node page.
999 * And, readahead MAX_RA_NODE number of node pages.
1000 */
1001struct page *get_node_page_ra(struct page *parent, int start)
1002{
4081363f 1003 struct f2fs_sb_info *sbi = F2FS_P_SB(parent);
c718379b 1004 struct blk_plug plug;
e05df3b1 1005 struct page *page;
56ae674c
JK
1006 int err, i, end;
1007 nid_t nid;
e05df3b1
JK
1008
1009 /* First, try getting the desired direct node. */
1010 nid = get_nid(parent, start, false);
1011 if (!nid)
1012 return ERR_PTR(-ENOENT);
afcb7ca0 1013repeat:
4ef51a8f 1014 page = grab_cache_page(NODE_MAPPING(sbi), nid);
e05df3b1
JK
1015 if (!page)
1016 return ERR_PTR(-ENOMEM);
1017
66d36a29 1018 err = read_node_page(page, READ_SYNC);
56ae674c 1019 if (err < 0)
e05df3b1 1020 return ERR_PTR(err);
56ae674c
JK
1021 else if (err == LOCKED_PAGE)
1022 goto page_hit;
e05df3b1 1023
c718379b
JK
1024 blk_start_plug(&plug);
1025
e05df3b1
JK
1026 /* Then, try readahead for siblings of the desired node */
1027 end = start + MAX_RA_NODE;
1028 end = min(end, NIDS_PER_BLOCK);
1029 for (i = start + 1; i < end; i++) {
1030 nid = get_nid(parent, i, false);
1031 if (!nid)
1032 continue;
1033 ra_node_page(sbi, nid);
1034 }
1035
c718379b
JK
1036 blk_finish_plug(&plug);
1037
e05df3b1 1038 lock_page(page);
4ef51a8f 1039 if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
afcb7ca0
JK
1040 f2fs_put_page(page, 1);
1041 goto repeat;
1042 }
e0f56cb4 1043page_hit:
6bacf52f 1044 if (unlikely(!PageUptodate(page))) {
e05df3b1
JK
1045 f2fs_put_page(page, 1);
1046 return ERR_PTR(-EIO);
1047 }
e05df3b1
JK
1048 return page;
1049}
1050
1051void sync_inode_page(struct dnode_of_data *dn)
1052{
1053 if (IS_INODE(dn->node_page) || dn->inode_page == dn->node_page) {
1054 update_inode(dn->inode, dn->node_page);
1055 } else if (dn->inode_page) {
1056 if (!dn->inode_page_locked)
1057 lock_page(dn->inode_page);
1058 update_inode(dn->inode, dn->inode_page);
1059 if (!dn->inode_page_locked)
1060 unlock_page(dn->inode_page);
1061 } else {
39936837 1062 update_inode_page(dn->inode);
e05df3b1
JK
1063 }
1064}
1065
1066int sync_node_pages(struct f2fs_sb_info *sbi, nid_t ino,
1067 struct writeback_control *wbc)
1068{
e05df3b1
JK
1069 pgoff_t index, end;
1070 struct pagevec pvec;
1071 int step = ino ? 2 : 0;
1072 int nwritten = 0, wrote = 0;
1073
1074 pagevec_init(&pvec, 0);
1075
1076next_step:
1077 index = 0;
1078 end = LONG_MAX;
1079
1080 while (index <= end) {
1081 int i, nr_pages;
4ef51a8f 1082 nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index,
e05df3b1
JK
1083 PAGECACHE_TAG_DIRTY,
1084 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
1085 if (nr_pages == 0)
1086 break;
1087
1088 for (i = 0; i < nr_pages; i++) {
1089 struct page *page = pvec.pages[i];
1090
1091 /*
1092 * flushing sequence with step:
1093 * 0. indirect nodes
1094 * 1. dentry dnodes
1095 * 2. file dnodes
1096 */
1097 if (step == 0 && IS_DNODE(page))
1098 continue;
1099 if (step == 1 && (!IS_DNODE(page) ||
1100 is_cold_node(page)))
1101 continue;
1102 if (step == 2 && (!IS_DNODE(page) ||
1103 !is_cold_node(page)))
1104 continue;
1105
1106 /*
1107 * If an fsync mode,
1108 * we should not skip writing node pages.
1109 */
1110 if (ino && ino_of_node(page) == ino)
1111 lock_page(page);
1112 else if (!trylock_page(page))
1113 continue;
1114
4ef51a8f 1115 if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
e05df3b1
JK
1116continue_unlock:
1117 unlock_page(page);
1118 continue;
1119 }
1120 if (ino && ino_of_node(page) != ino)
1121 goto continue_unlock;
1122
1123 if (!PageDirty(page)) {
1124 /* someone wrote it for us */
1125 goto continue_unlock;
1126 }
1127
1128 if (!clear_page_dirty_for_io(page))
1129 goto continue_unlock;
1130
1131 /* called by fsync() */
1132 if (ino && IS_DNODE(page)) {
e05df3b1 1133 set_fsync_mark(page, 1);
88bd02c9
JK
1134 if (IS_INODE(page)) {
1135 if (!is_checkpointed_node(sbi, ino) &&
1136 !has_fsynced_inode(sbi, ino))
1137 set_dentry_mark(page, 1);
1138 else
1139 set_dentry_mark(page, 0);
1140 }
e05df3b1
JK
1141 nwritten++;
1142 } else {
1143 set_fsync_mark(page, 0);
1144 set_dentry_mark(page, 0);
1145 }
52746519
JK
1146
1147 if (NODE_MAPPING(sbi)->a_ops->writepage(page, wbc))
1148 unlock_page(page);
1149 else
1150 wrote++;
e05df3b1
JK
1151
1152 if (--wbc->nr_to_write == 0)
1153 break;
1154 }
1155 pagevec_release(&pvec);
1156 cond_resched();
1157
1158 if (wbc->nr_to_write == 0) {
1159 step = 2;
1160 break;
1161 }
1162 }
1163
1164 if (step < 2) {
1165 step++;
1166 goto next_step;
1167 }
1168
1169 if (wrote)
458e6197 1170 f2fs_submit_merged_bio(sbi, NODE, WRITE);
e05df3b1
JK
1171 return nwritten;
1172}
1173
cfe58f9d
JK
1174int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino)
1175{
cfe58f9d
JK
1176 pgoff_t index = 0, end = LONG_MAX;
1177 struct pagevec pvec;
cfe58f9d
JK
1178 int ret2 = 0, ret = 0;
1179
1180 pagevec_init(&pvec, 0);
4ef51a8f
JK
1181
1182 while (index <= end) {
1183 int i, nr_pages;
1184 nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index,
1185 PAGECACHE_TAG_WRITEBACK,
1186 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
1187 if (nr_pages == 0)
1188 break;
cfe58f9d
JK
1189
1190 for (i = 0; i < nr_pages; i++) {
1191 struct page *page = pvec.pages[i];
1192
1193 /* until radix tree lookup accepts end_index */
cfb271d4 1194 if (unlikely(page->index > end))
cfe58f9d
JK
1195 continue;
1196
4bf08ff6 1197 if (ino && ino_of_node(page) == ino) {
3cb5ad15 1198 f2fs_wait_on_page_writeback(page, NODE);
4bf08ff6
CY
1199 if (TestClearPageError(page))
1200 ret = -EIO;
1201 }
cfe58f9d
JK
1202 }
1203 pagevec_release(&pvec);
1204 cond_resched();
1205 }
1206
4ef51a8f 1207 if (unlikely(test_and_clear_bit(AS_ENOSPC, &NODE_MAPPING(sbi)->flags)))
cfe58f9d 1208 ret2 = -ENOSPC;
4ef51a8f 1209 if (unlikely(test_and_clear_bit(AS_EIO, &NODE_MAPPING(sbi)->flags)))
cfe58f9d
JK
1210 ret2 = -EIO;
1211 if (!ret)
1212 ret = ret2;
1213 return ret;
1214}
1215
e05df3b1
JK
1216static int f2fs_write_node_page(struct page *page,
1217 struct writeback_control *wbc)
1218{
4081363f 1219 struct f2fs_sb_info *sbi = F2FS_P_SB(page);
e05df3b1 1220 nid_t nid;
e05df3b1
JK
1221 block_t new_addr;
1222 struct node_info ni;
fb5566da
JK
1223 struct f2fs_io_info fio = {
1224 .type = NODE,
6c311ec6 1225 .rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE,
fb5566da 1226 };
e05df3b1 1227
ecda0de3
CY
1228 trace_f2fs_writepage(page, NODE);
1229
cfb271d4 1230 if (unlikely(sbi->por_doing))
87a9bd26 1231 goto redirty_out;
cf779cab
JK
1232 if (unlikely(f2fs_cp_error(sbi)))
1233 goto redirty_out;
87a9bd26 1234
3cb5ad15 1235 f2fs_wait_on_page_writeback(page, NODE);
e05df3b1 1236
e05df3b1
JK
1237 /* get old block addr of this node page */
1238 nid = nid_of_node(page);
9850cf4a 1239 f2fs_bug_on(sbi, page->index != nid);
e05df3b1
JK
1240
1241 get_node_info(sbi, nid, &ni);
1242
1243 /* This page is already truncated */
6bacf52f 1244 if (unlikely(ni.blk_addr == NULL_ADDR)) {
39936837
JK
1245 dec_page_count(sbi, F2FS_DIRTY_NODES);
1246 unlock_page(page);
1247 return 0;
1248 }
e05df3b1 1249
87a9bd26
JK
1250 if (wbc->for_reclaim)
1251 goto redirty_out;
08d8058b 1252
b3582c68 1253 down_read(&sbi->node_write);
e05df3b1 1254 set_page_writeback(page);
fb5566da 1255 write_node_page(sbi, page, &fio, nid, ni.blk_addr, &new_addr);
479f40c4 1256 set_node_addr(sbi, &ni, new_addr, is_fsync_dnode(page));
e05df3b1 1257 dec_page_count(sbi, F2FS_DIRTY_NODES);
b3582c68 1258 up_read(&sbi->node_write);
e05df3b1
JK
1259 unlock_page(page);
1260 return 0;
87a9bd26
JK
1261
1262redirty_out:
76f60268 1263 redirty_page_for_writepage(wbc, page);
87a9bd26 1264 return AOP_WRITEPAGE_ACTIVATE;
e05df3b1
JK
1265}
1266
1267static int f2fs_write_node_pages(struct address_space *mapping,
1268 struct writeback_control *wbc)
1269{
4081363f 1270 struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
50c8cdb3 1271 long diff;
e05df3b1 1272
e5748434
CY
1273 trace_f2fs_writepages(mapping->host, wbc, NODE);
1274
4660f9c0
JK
1275 /* balancing f2fs's metadata in background */
1276 f2fs_balance_fs_bg(sbi);
e05df3b1 1277
a7fdffbd 1278 /* collect a number of dirty node pages and write together */
87d6f890 1279 if (get_pages(sbi, F2FS_DIRTY_NODES) < nr_pages_to_skip(sbi, NODE))
d3baf95d 1280 goto skip_write;
a7fdffbd 1281
50c8cdb3 1282 diff = nr_pages_to_write(sbi, NODE, wbc);
fb5566da 1283 wbc->sync_mode = WB_SYNC_NONE;
e05df3b1 1284 sync_node_pages(sbi, 0, wbc);
50c8cdb3 1285 wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff);
e05df3b1 1286 return 0;
d3baf95d
JK
1287
1288skip_write:
1289 wbc->pages_skipped += get_pages(sbi, F2FS_DIRTY_NODES);
1290 return 0;
e05df3b1
JK
1291}
1292
1293static int f2fs_set_node_page_dirty(struct page *page)
1294{
26c6b887
JK
1295 trace_f2fs_set_page_dirty(page, NODE);
1296
e05df3b1
JK
1297 SetPageUptodate(page);
1298 if (!PageDirty(page)) {
1299 __set_page_dirty_nobuffers(page);
4081363f 1300 inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES);
e05df3b1
JK
1301 SetPagePrivate(page);
1302 return 1;
1303 }
1304 return 0;
1305}
1306
d47992f8
LC
1307static void f2fs_invalidate_node_page(struct page *page, unsigned int offset,
1308 unsigned int length)
e05df3b1
JK
1309{
1310 struct inode *inode = page->mapping->host;
e05df3b1 1311 if (PageDirty(page))
4081363f 1312 dec_page_count(F2FS_I_SB(inode), F2FS_DIRTY_NODES);
e05df3b1
JK
1313 ClearPagePrivate(page);
1314}
1315
1316static int f2fs_release_node_page(struct page *page, gfp_t wait)
1317{
1318 ClearPagePrivate(page);
c3850aa1 1319 return 1;
e05df3b1
JK
1320}
1321
0a8165d7 1322/*
e05df3b1
JK
1323 * Structure of the f2fs node operations
1324 */
1325const struct address_space_operations f2fs_node_aops = {
1326 .writepage = f2fs_write_node_page,
1327 .writepages = f2fs_write_node_pages,
1328 .set_page_dirty = f2fs_set_node_page_dirty,
1329 .invalidatepage = f2fs_invalidate_node_page,
1330 .releasepage = f2fs_release_node_page,
1331};
1332
8a7ed66a
JK
1333static struct free_nid *__lookup_free_nid_list(struct f2fs_nm_info *nm_i,
1334 nid_t n)
e05df3b1 1335{
8a7ed66a 1336 return radix_tree_lookup(&nm_i->free_nid_root, n);
e05df3b1
JK
1337}
1338
8a7ed66a
JK
1339static void __del_from_free_nid_list(struct f2fs_nm_info *nm_i,
1340 struct free_nid *i)
e05df3b1
JK
1341{
1342 list_del(&i->list);
8a7ed66a 1343 radix_tree_delete(&nm_i->free_nid_root, i->nid);
e05df3b1
JK
1344}
1345
6fb03f3a 1346static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
e05df3b1 1347{
6fb03f3a 1348 struct f2fs_nm_info *nm_i = NM_I(sbi);
e05df3b1 1349 struct free_nid *i;
59bbd474
JK
1350 struct nat_entry *ne;
1351 bool allocated = false;
e05df3b1 1352
6fb03f3a 1353 if (!available_free_memory(sbi, FREE_NIDS))
23d38844 1354 return -1;
9198aceb
JK
1355
1356 /* 0 nid should not be used */
cfb271d4 1357 if (unlikely(nid == 0))
9198aceb 1358 return 0;
59bbd474 1359
7bd59381
GZ
1360 if (build) {
1361 /* do not add allocated nids */
1362 read_lock(&nm_i->nat_tree_lock);
1363 ne = __lookup_nat_cache(nm_i, nid);
8a7ed66a 1364 if (ne &&
7ef35e3b
JK
1365 (!get_nat_flag(ne, IS_CHECKPOINTED) ||
1366 nat_get_blkaddr(ne) != NULL_ADDR))
7bd59381
GZ
1367 allocated = true;
1368 read_unlock(&nm_i->nat_tree_lock);
1369 if (allocated)
1370 return 0;
e05df3b1 1371 }
7bd59381
GZ
1372
1373 i = f2fs_kmem_cache_alloc(free_nid_slab, GFP_NOFS);
e05df3b1
JK
1374 i->nid = nid;
1375 i->state = NID_NEW;
1376
1377 spin_lock(&nm_i->free_nid_list_lock);
8a7ed66a 1378 if (radix_tree_insert(&nm_i->free_nid_root, i->nid, i)) {
e05df3b1
JK
1379 spin_unlock(&nm_i->free_nid_list_lock);
1380 kmem_cache_free(free_nid_slab, i);
1381 return 0;
1382 }
1383 list_add_tail(&i->list, &nm_i->free_nid_list);
1384 nm_i->fcnt++;
1385 spin_unlock(&nm_i->free_nid_list_lock);
1386 return 1;
1387}
1388
1389static void remove_free_nid(struct f2fs_nm_info *nm_i, nid_t nid)
1390{
1391 struct free_nid *i;
cf0ee0f0
CY
1392 bool need_free = false;
1393
e05df3b1 1394 spin_lock(&nm_i->free_nid_list_lock);
8a7ed66a 1395 i = __lookup_free_nid_list(nm_i, nid);
e05df3b1 1396 if (i && i->state == NID_NEW) {
8a7ed66a 1397 __del_from_free_nid_list(nm_i, i);
e05df3b1 1398 nm_i->fcnt--;
cf0ee0f0 1399 need_free = true;
e05df3b1
JK
1400 }
1401 spin_unlock(&nm_i->free_nid_list_lock);
cf0ee0f0
CY
1402
1403 if (need_free)
1404 kmem_cache_free(free_nid_slab, i);
e05df3b1
JK
1405}
1406
6fb03f3a 1407static void scan_nat_page(struct f2fs_sb_info *sbi,
e05df3b1
JK
1408 struct page *nat_page, nid_t start_nid)
1409{
6fb03f3a 1410 struct f2fs_nm_info *nm_i = NM_I(sbi);
e05df3b1
JK
1411 struct f2fs_nat_block *nat_blk = page_address(nat_page);
1412 block_t blk_addr;
e05df3b1
JK
1413 int i;
1414
e05df3b1
JK
1415 i = start_nid % NAT_ENTRY_PER_BLOCK;
1416
1417 for (; i < NAT_ENTRY_PER_BLOCK; i++, start_nid++) {
23d38844 1418
cfb271d4 1419 if (unlikely(start_nid >= nm_i->max_nid))
04431c44 1420 break;
23d38844
HL
1421
1422 blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr);
9850cf4a 1423 f2fs_bug_on(sbi, blk_addr == NEW_ADDR);
23d38844 1424 if (blk_addr == NULL_ADDR) {
6fb03f3a 1425 if (add_free_nid(sbi, start_nid, true) < 0)
23d38844
HL
1426 break;
1427 }
e05df3b1 1428 }
e05df3b1
JK
1429}
1430
1431static void build_free_nids(struct f2fs_sb_info *sbi)
1432{
e05df3b1
JK
1433 struct f2fs_nm_info *nm_i = NM_I(sbi);
1434 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
1435 struct f2fs_summary_block *sum = curseg->sum_blk;
8760952d 1436 int i = 0;
55008d84 1437 nid_t nid = nm_i->next_scan_nid;
e05df3b1 1438
55008d84
JK
1439 /* Enough entries */
1440 if (nm_i->fcnt > NAT_ENTRY_PER_BLOCK)
1441 return;
e05df3b1 1442
55008d84 1443 /* readahead nat pages to be scanned */
662befda 1444 ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES, META_NAT);
e05df3b1
JK
1445
1446 while (1) {
1447 struct page *page = get_current_nat_page(sbi, nid);
1448
6fb03f3a 1449 scan_nat_page(sbi, page, nid);
e05df3b1
JK
1450 f2fs_put_page(page, 1);
1451
1452 nid += (NAT_ENTRY_PER_BLOCK - (nid % NAT_ENTRY_PER_BLOCK));
cfb271d4 1453 if (unlikely(nid >= nm_i->max_nid))
e05df3b1 1454 nid = 0;
55008d84
JK
1455
1456 if (i++ == FREE_NID_PAGES)
e05df3b1
JK
1457 break;
1458 }
1459
55008d84
JK
1460 /* go to the next free nat pages to find free nids abundantly */
1461 nm_i->next_scan_nid = nid;
e05df3b1
JK
1462
1463 /* find free nids from current sum_pages */
1464 mutex_lock(&curseg->curseg_mutex);
1465 for (i = 0; i < nats_in_cursum(sum); i++) {
1466 block_t addr = le32_to_cpu(nat_in_journal(sum, i).block_addr);
1467 nid = le32_to_cpu(nid_in_journal(sum, i));
1468 if (addr == NULL_ADDR)
6fb03f3a 1469 add_free_nid(sbi, nid, true);
e05df3b1
JK
1470 else
1471 remove_free_nid(nm_i, nid);
1472 }
1473 mutex_unlock(&curseg->curseg_mutex);
e05df3b1
JK
1474}
1475
1476/*
1477 * If this function returns success, caller can obtain a new nid
1478 * from second parameter of this function.
1479 * The returned nid could be used ino as well as nid when inode is created.
1480 */
1481bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
1482{
1483 struct f2fs_nm_info *nm_i = NM_I(sbi);
1484 struct free_nid *i = NULL;
e05df3b1 1485retry:
7ee0eeab 1486 if (unlikely(sbi->total_valid_node_count + 1 > nm_i->available_nids))
55008d84 1487 return false;
e05df3b1 1488
e05df3b1 1489 spin_lock(&nm_i->free_nid_list_lock);
e05df3b1 1490
55008d84 1491 /* We should not use stale free nids created by build_free_nids */
f978f5a0 1492 if (nm_i->fcnt && !on_build_free_nids(nm_i)) {
9850cf4a 1493 f2fs_bug_on(sbi, list_empty(&nm_i->free_nid_list));
2d7b822a 1494 list_for_each_entry(i, &nm_i->free_nid_list, list)
55008d84
JK
1495 if (i->state == NID_NEW)
1496 break;
e05df3b1 1497
9850cf4a 1498 f2fs_bug_on(sbi, i->state != NID_NEW);
55008d84
JK
1499 *nid = i->nid;
1500 i->state = NID_ALLOC;
1501 nm_i->fcnt--;
1502 spin_unlock(&nm_i->free_nid_list_lock);
1503 return true;
1504 }
e05df3b1 1505 spin_unlock(&nm_i->free_nid_list_lock);
55008d84
JK
1506
1507 /* Let's scan nat pages and its caches to get free nids */
1508 mutex_lock(&nm_i->build_lock);
55008d84 1509 build_free_nids(sbi);
55008d84
JK
1510 mutex_unlock(&nm_i->build_lock);
1511 goto retry;
e05df3b1
JK
1512}
1513
0a8165d7 1514/*
e05df3b1
JK
1515 * alloc_nid() should be called prior to this function.
1516 */
1517void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid)
1518{
1519 struct f2fs_nm_info *nm_i = NM_I(sbi);
1520 struct free_nid *i;
1521
1522 spin_lock(&nm_i->free_nid_list_lock);
8a7ed66a 1523 i = __lookup_free_nid_list(nm_i, nid);
9850cf4a 1524 f2fs_bug_on(sbi, !i || i->state != NID_ALLOC);
8a7ed66a 1525 __del_from_free_nid_list(nm_i, i);
e05df3b1 1526 spin_unlock(&nm_i->free_nid_list_lock);
cf0ee0f0
CY
1527
1528 kmem_cache_free(free_nid_slab, i);
e05df3b1
JK
1529}
1530
0a8165d7 1531/*
e05df3b1
JK
1532 * alloc_nid() should be called prior to this function.
1533 */
1534void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
1535{
49952fa1
JK
1536 struct f2fs_nm_info *nm_i = NM_I(sbi);
1537 struct free_nid *i;
cf0ee0f0 1538 bool need_free = false;
49952fa1 1539
65985d93
JK
1540 if (!nid)
1541 return;
1542
49952fa1 1543 spin_lock(&nm_i->free_nid_list_lock);
8a7ed66a 1544 i = __lookup_free_nid_list(nm_i, nid);
9850cf4a 1545 f2fs_bug_on(sbi, !i || i->state != NID_ALLOC);
6fb03f3a 1546 if (!available_free_memory(sbi, FREE_NIDS)) {
8a7ed66a 1547 __del_from_free_nid_list(nm_i, i);
cf0ee0f0 1548 need_free = true;
95630cba
HL
1549 } else {
1550 i->state = NID_NEW;
1551 nm_i->fcnt++;
1552 }
49952fa1 1553 spin_unlock(&nm_i->free_nid_list_lock);
cf0ee0f0
CY
1554
1555 if (need_free)
1556 kmem_cache_free(free_nid_slab, i);
e05df3b1
JK
1557}
1558
70cfed88 1559void recover_inline_xattr(struct inode *inode, struct page *page)
28cdce04 1560{
28cdce04
CY
1561 void *src_addr, *dst_addr;
1562 size_t inline_size;
1563 struct page *ipage;
1564 struct f2fs_inode *ri;
1565
4081363f 1566 ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino);
9850cf4a 1567 f2fs_bug_on(F2FS_I_SB(inode), IS_ERR(ipage));
28cdce04 1568
e3b4d43f
JK
1569 ri = F2FS_INODE(page);
1570 if (!(ri->i_inline & F2FS_INLINE_XATTR)) {
1571 clear_inode_flag(F2FS_I(inode), FI_INLINE_XATTR);
1572 goto update_inode;
1573 }
1574
28cdce04
CY
1575 dst_addr = inline_xattr_addr(ipage);
1576 src_addr = inline_xattr_addr(page);
1577 inline_size = inline_xattr_size(inode);
1578
54b591df 1579 f2fs_wait_on_page_writeback(ipage, NODE);
28cdce04 1580 memcpy(dst_addr, src_addr, inline_size);
e3b4d43f 1581update_inode:
28cdce04
CY
1582 update_inode(inode, ipage);
1583 f2fs_put_page(ipage, 1);
1584}
1585
1c35a90e 1586void recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr)
abb2366c 1587{
4081363f 1588 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
abb2366c
JK
1589 nid_t prev_xnid = F2FS_I(inode)->i_xattr_nid;
1590 nid_t new_xnid = nid_of_node(page);
1591 struct node_info ni;
1592
abb2366c
JK
1593 /* 1: invalidate the previous xattr nid */
1594 if (!prev_xnid)
1595 goto recover_xnid;
1596
1597 /* Deallocate node address */
1598 get_node_info(sbi, prev_xnid, &ni);
9850cf4a 1599 f2fs_bug_on(sbi, ni.blk_addr == NULL_ADDR);
abb2366c
JK
1600 invalidate_blocks(sbi, ni.blk_addr);
1601 dec_valid_node_count(sbi, inode);
479f40c4 1602 set_node_addr(sbi, &ni, NULL_ADDR, false);
abb2366c
JK
1603
1604recover_xnid:
1605 /* 2: allocate new xattr nid */
1606 if (unlikely(!inc_valid_node_count(sbi, inode)))
9850cf4a 1607 f2fs_bug_on(sbi, 1);
abb2366c
JK
1608
1609 remove_free_nid(NM_I(sbi), new_xnid);
1610 get_node_info(sbi, new_xnid, &ni);
1611 ni.ino = inode->i_ino;
479f40c4 1612 set_node_addr(sbi, &ni, NEW_ADDR, false);
abb2366c
JK
1613 F2FS_I(inode)->i_xattr_nid = new_xnid;
1614
1615 /* 3: update xattr blkaddr */
1616 refresh_sit_entry(sbi, NEW_ADDR, blkaddr);
479f40c4 1617 set_node_addr(sbi, &ni, blkaddr, false);
abb2366c
JK
1618
1619 update_inode_page(inode);
abb2366c
JK
1620}
1621
e05df3b1
JK
1622int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
1623{
58bfaf44 1624 struct f2fs_inode *src, *dst;
e05df3b1
JK
1625 nid_t ino = ino_of_node(page);
1626 struct node_info old_ni, new_ni;
1627 struct page *ipage;
1628
e8271fa3
JK
1629 get_node_info(sbi, ino, &old_ni);
1630
1631 if (unlikely(old_ni.blk_addr != NULL_ADDR))
1632 return -EINVAL;
1633
4ef51a8f 1634 ipage = grab_cache_page(NODE_MAPPING(sbi), ino);
e05df3b1
JK
1635 if (!ipage)
1636 return -ENOMEM;
1637
e1c42045 1638 /* Should not use this inode from free nid list */
e05df3b1
JK
1639 remove_free_nid(NM_I(sbi), ino);
1640
e05df3b1
JK
1641 SetPageUptodate(ipage);
1642 fill_node_footer(ipage, ino, ino, 0, true);
1643
58bfaf44
JK
1644 src = F2FS_INODE(page);
1645 dst = F2FS_INODE(ipage);
e05df3b1 1646
58bfaf44
JK
1647 memcpy(dst, src, (unsigned long)&src->i_ext - (unsigned long)src);
1648 dst->i_size = 0;
1649 dst->i_blocks = cpu_to_le64(1);
1650 dst->i_links = cpu_to_le32(1);
1651 dst->i_xattr_nid = 0;
617deb8c 1652 dst->i_inline = src->i_inline & F2FS_INLINE_XATTR;
e05df3b1
JK
1653
1654 new_ni = old_ni;
1655 new_ni.ino = ino;
1656
cfb271d4 1657 if (unlikely(!inc_valid_node_count(sbi, NULL)))
65e5cd0a 1658 WARN_ON(1);
479f40c4 1659 set_node_addr(sbi, &new_ni, NEW_ADDR, false);
e05df3b1 1660 inc_valid_inode_count(sbi);
617deb8c 1661 set_page_dirty(ipage);
e05df3b1
JK
1662 f2fs_put_page(ipage, 1);
1663 return 0;
1664}
1665
9af0ff1c
CY
1666/*
1667 * ra_sum_pages() merge contiguous pages into one bio and submit.
e1c42045 1668 * these pre-read pages are allocated in bd_inode's mapping tree.
9af0ff1c 1669 */
bac4eef6 1670static int ra_sum_pages(struct f2fs_sb_info *sbi, struct page **pages,
9af0ff1c
CY
1671 int start, int nrpages)
1672{
bac4eef6
CY
1673 struct inode *inode = sbi->sb->s_bdev->bd_inode;
1674 struct address_space *mapping = inode->i_mapping;
1675 int i, page_idx = start;
458e6197
JK
1676 struct f2fs_io_info fio = {
1677 .type = META,
7e8f2308 1678 .rw = READ_SYNC | REQ_META | REQ_PRIO
458e6197 1679 };
9af0ff1c 1680
bac4eef6
CY
1681 for (i = 0; page_idx < start + nrpages; page_idx++, i++) {
1682 /* alloc page in bd_inode for reading node summary info */
1683 pages[i] = grab_cache_page(mapping, page_idx);
1684 if (!pages[i])
d653788a 1685 break;
bac4eef6 1686 f2fs_submit_page_mbio(sbi, pages[i], page_idx, &fio);
9af0ff1c
CY
1687 }
1688
458e6197 1689 f2fs_submit_merged_bio(sbi, META, READ);
bac4eef6 1690 return i;
9af0ff1c
CY
1691}
1692
e05df3b1
JK
1693int restore_node_summary(struct f2fs_sb_info *sbi,
1694 unsigned int segno, struct f2fs_summary_block *sum)
1695{
1696 struct f2fs_node *rn;
1697 struct f2fs_summary *sum_entry;
bac4eef6 1698 struct inode *inode = sbi->sb->s_bdev->bd_inode;
e05df3b1 1699 block_t addr;
90a893c7 1700 int bio_blocks = MAX_BIO_BLOCKS(sbi);
bac4eef6
CY
1701 struct page *pages[bio_blocks];
1702 int i, idx, last_offset, nrpages, err = 0;
e05df3b1
JK
1703
1704 /* scan the node segment */
1705 last_offset = sbi->blocks_per_seg;
1706 addr = START_BLOCK(sbi, segno);
1707 sum_entry = &sum->entries[0];
1708
d653788a 1709 for (i = 0; !err && i < last_offset; i += nrpages, addr += nrpages) {
9af0ff1c 1710 nrpages = min(last_offset - i, bio_blocks);
393ff91f 1711
e1c42045 1712 /* readahead node pages */
bac4eef6 1713 nrpages = ra_sum_pages(sbi, pages, addr, nrpages);
d653788a
GZ
1714 if (!nrpages)
1715 return -ENOMEM;
e05df3b1 1716
bac4eef6 1717 for (idx = 0; idx < nrpages; idx++) {
d653788a
GZ
1718 if (err)
1719 goto skip;
9af0ff1c 1720
bac4eef6
CY
1721 lock_page(pages[idx]);
1722 if (unlikely(!PageUptodate(pages[idx]))) {
6bacf52f
JK
1723 err = -EIO;
1724 } else {
bac4eef6 1725 rn = F2FS_NODE(pages[idx]);
9af0ff1c
CY
1726 sum_entry->nid = rn->footer.nid;
1727 sum_entry->version = 0;
1728 sum_entry->ofs_in_node = 0;
1729 sum_entry++;
9af0ff1c 1730 }
bac4eef6 1731 unlock_page(pages[idx]);
d653788a 1732skip:
bac4eef6 1733 page_cache_release(pages[idx]);
9af0ff1c 1734 }
bac4eef6
CY
1735
1736 invalidate_mapping_pages(inode->i_mapping, addr,
1737 addr + nrpages);
e05df3b1 1738 }
9af0ff1c 1739 return err;
e05df3b1
JK
1740}
1741
aec71382
CY
1742static struct nat_entry_set *grab_nat_entry_set(void)
1743{
1744 struct nat_entry_set *nes =
1745 f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_ATOMIC);
1746
1747 nes->entry_cnt = 0;
1748 INIT_LIST_HEAD(&nes->set_list);
1749 INIT_LIST_HEAD(&nes->entry_list);
1750 return nes;
1751}
1752
1753static void release_nat_entry_set(struct nat_entry_set *nes,
1754 struct f2fs_nm_info *nm_i)
1755{
aec71382
CY
1756 nm_i->dirty_nat_cnt -= nes->entry_cnt;
1757 list_del(&nes->set_list);
1758 kmem_cache_free(nat_entry_set_slab, nes);
1759}
1760
1761static void adjust_nat_entry_set(struct nat_entry_set *nes,
1762 struct list_head *head)
1763{
1764 struct nat_entry_set *next = nes;
1765
1766 if (list_is_last(&nes->set_list, head))
1767 return;
1768
1769 list_for_each_entry_continue(next, head, set_list)
1770 if (nes->entry_cnt <= next->entry_cnt)
1771 break;
1772
1773 list_move_tail(&nes->set_list, &next->set_list);
1774}
1775
1776static void add_nat_entry(struct nat_entry *ne, struct list_head *head)
1777{
1778 struct nat_entry_set *nes;
1779 nid_t start_nid = START_NID(ne->ni.nid);
1780
1781 list_for_each_entry(nes, head, set_list) {
1782 if (nes->start_nid == start_nid) {
1783 list_move_tail(&ne->list, &nes->entry_list);
1784 nes->entry_cnt++;
1785 adjust_nat_entry_set(nes, head);
1786 return;
1787 }
1788 }
1789
1790 nes = grab_nat_entry_set();
1791
1792 nes->start_nid = start_nid;
1793 list_move_tail(&ne->list, &nes->entry_list);
1794 nes->entry_cnt++;
1795 list_add(&nes->set_list, head);
1796}
1797
1798static void merge_nats_in_set(struct f2fs_sb_info *sbi)
1799{
1800 struct f2fs_nm_info *nm_i = NM_I(sbi);
1801 struct list_head *dirty_list = &nm_i->dirty_nat_entries;
1802 struct list_head *set_list = &nm_i->nat_entry_set;
1803 struct nat_entry *ne, *tmp;
1804
1805 write_lock(&nm_i->nat_tree_lock);
1806 list_for_each_entry_safe(ne, tmp, dirty_list, list) {
1807 if (nat_get_blkaddr(ne) == NEW_ADDR)
1808 continue;
1809 add_nat_entry(ne, set_list);
1810 nm_i->dirty_nat_cnt++;
1811 }
1812 write_unlock(&nm_i->nat_tree_lock);
1813}
1814
aec71382 1815static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
e05df3b1
JK
1816{
1817 struct f2fs_nm_info *nm_i = NM_I(sbi);
1818 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
1819 struct f2fs_summary_block *sum = curseg->sum_blk;
1820 int i;
1821
1822 mutex_lock(&curseg->curseg_mutex);
e05df3b1
JK
1823 for (i = 0; i < nats_in_cursum(sum); i++) {
1824 struct nat_entry *ne;
1825 struct f2fs_nat_entry raw_ne;
1826 nid_t nid = le32_to_cpu(nid_in_journal(sum, i));
1827
1828 raw_ne = nat_in_journal(sum, i);
1829retry:
1830 write_lock(&nm_i->nat_tree_lock);
1831 ne = __lookup_nat_cache(nm_i, nid);
aec71382
CY
1832 if (ne)
1833 goto found;
1834
e05df3b1
JK
1835 ne = grab_nat_entry(nm_i, nid);
1836 if (!ne) {
1837 write_unlock(&nm_i->nat_tree_lock);
1838 goto retry;
1839 }
94dac22e 1840 node_info_from_raw_nat(&ne->ni, &raw_ne);
aec71382 1841found:
e05df3b1
JK
1842 __set_nat_cache_dirty(nm_i, ne);
1843 write_unlock(&nm_i->nat_tree_lock);
1844 }
1845 update_nats_in_cursum(sum, -i);
1846 mutex_unlock(&curseg->curseg_mutex);
e05df3b1
JK
1847}
1848
0a8165d7 1849/*
e05df3b1
JK
1850 * This function is called during the checkpointing process.
1851 */
1852void flush_nat_entries(struct f2fs_sb_info *sbi)
1853{
1854 struct f2fs_nm_info *nm_i = NM_I(sbi);
1855 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
1856 struct f2fs_summary_block *sum = curseg->sum_blk;
aec71382
CY
1857 struct nat_entry_set *nes, *tmp;
1858 struct list_head *head = &nm_i->nat_entry_set;
1859 bool to_journal = true;
e05df3b1 1860
aec71382
CY
1861 /* merge nat entries of dirty list to nat entry set temporarily */
1862 merge_nats_in_set(sbi);
e05df3b1 1863
aec71382
CY
1864 /*
1865 * if there are no enough space in journal to store dirty nat
1866 * entries, remove all entries from journal and merge them
1867 * into nat entry set.
1868 */
184a5cd2 1869 if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt, NAT_JOURNAL)) {
aec71382 1870 remove_nats_in_journal(sbi);
2d7b822a 1871
aec71382
CY
1872 /*
1873 * merge nat entries of dirty list to nat entry set temporarily
1874 */
1875 merge_nats_in_set(sbi);
1876 }
e05df3b1 1877
aec71382
CY
1878 if (!nm_i->dirty_nat_cnt)
1879 return;
e05df3b1 1880
aec71382
CY
1881 /*
1882 * there are two steps to flush nat entries:
1883 * #1, flush nat entries to journal in current hot data summary block.
1884 * #2, flush nat entries to nat page.
1885 */
1886 list_for_each_entry_safe(nes, tmp, head, set_list) {
1887 struct f2fs_nat_block *nat_blk;
1888 struct nat_entry *ne, *cur;
1889 struct page *page;
1890 nid_t start_nid = nes->start_nid;
1891
184a5cd2
CY
1892 if (to_journal &&
1893 !__has_cursum_space(sum, nes->entry_cnt, NAT_JOURNAL))
aec71382
CY
1894 to_journal = false;
1895
1896 if (to_journal) {
1897 mutex_lock(&curseg->curseg_mutex);
1898 } else {
e05df3b1
JK
1899 page = get_next_nat_page(sbi, start_nid);
1900 nat_blk = page_address(page);
9850cf4a 1901 f2fs_bug_on(sbi, !nat_blk);
e05df3b1
JK
1902 }
1903
aec71382
CY
1904 /* flush dirty nats in nat entry set */
1905 list_for_each_entry_safe(ne, cur, &nes->entry_list, list) {
1906 struct f2fs_nat_entry *raw_ne;
1907 nid_t nid = nat_get_nid(ne);
1908 int offset;
1909
1910 if (to_journal) {
1911 offset = lookup_journal_in_cursum(sum,
1912 NAT_JOURNAL, nid, 1);
9850cf4a 1913 f2fs_bug_on(sbi, offset < 0);
aec71382
CY
1914 raw_ne = &nat_in_journal(sum, offset);
1915 nid_in_journal(sum, offset) = cpu_to_le32(nid);
1916 } else {
1917 raw_ne = &nat_blk->entries[nid - start_nid];
1918 }
1919 raw_nat_from_node_info(raw_ne, &ne->ni);
e05df3b1 1920
aec71382 1921 if (nat_get_blkaddr(ne) == NULL_ADDR &&
6fb03f3a 1922 add_free_nid(sbi, nid, false) <= 0) {
aec71382
CY
1923 write_lock(&nm_i->nat_tree_lock);
1924 __del_from_nat_cache(nm_i, ne);
1925 write_unlock(&nm_i->nat_tree_lock);
1926 } else {
1927 write_lock(&nm_i->nat_tree_lock);
88bd02c9 1928 nat_reset_flag(ne);
aec71382
CY
1929 __clear_nat_cache_dirty(nm_i, ne);
1930 write_unlock(&nm_i->nat_tree_lock);
1931 }
e05df3b1 1932 }
aec71382
CY
1933
1934 if (to_journal)
1935 mutex_unlock(&curseg->curseg_mutex);
1936 else
1937 f2fs_put_page(page, 1);
1938
9850cf4a 1939 f2fs_bug_on(sbi, !list_empty(&nes->entry_list));
aec71382 1940 release_nat_entry_set(nes, nm_i);
e05df3b1 1941 }
aec71382 1942
9850cf4a
JK
1943 f2fs_bug_on(sbi, !list_empty(head));
1944 f2fs_bug_on(sbi, nm_i->dirty_nat_cnt);
e05df3b1
JK
1945}
1946
1947static int init_node_manager(struct f2fs_sb_info *sbi)
1948{
1949 struct f2fs_super_block *sb_raw = F2FS_RAW_SUPER(sbi);
1950 struct f2fs_nm_info *nm_i = NM_I(sbi);
1951 unsigned char *version_bitmap;
1952 unsigned int nat_segs, nat_blocks;
1953
1954 nm_i->nat_blkaddr = le32_to_cpu(sb_raw->nat_blkaddr);
1955
1956 /* segment_count_nat includes pair segment so divide to 2. */
1957 nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1;
1958 nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
b63da15e 1959
7ee0eeab
JK
1960 nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks;
1961
b63da15e 1962 /* not used nids: 0, node, meta, (and root counted as valid node) */
c200b1aa 1963 nm_i->available_nids = nm_i->max_nid - F2FS_RESERVED_NODE_NUM;
e05df3b1
JK
1964 nm_i->fcnt = 0;
1965 nm_i->nat_cnt = 0;
cdfc41c1 1966 nm_i->ram_thresh = DEF_RAM_THRESHOLD;
e05df3b1 1967
8a7ed66a 1968 INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC);
e05df3b1
JK
1969 INIT_LIST_HEAD(&nm_i->free_nid_list);
1970 INIT_RADIX_TREE(&nm_i->nat_root, GFP_ATOMIC);
1971 INIT_LIST_HEAD(&nm_i->nat_entries);
1972 INIT_LIST_HEAD(&nm_i->dirty_nat_entries);
aec71382 1973 INIT_LIST_HEAD(&nm_i->nat_entry_set);
e05df3b1
JK
1974
1975 mutex_init(&nm_i->build_lock);
1976 spin_lock_init(&nm_i->free_nid_list_lock);
1977 rwlock_init(&nm_i->nat_tree_lock);
1978
e05df3b1 1979 nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid);
79b5793b 1980 nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP);
e05df3b1
JK
1981 version_bitmap = __bitmap_ptr(sbi, NAT_BITMAP);
1982 if (!version_bitmap)
1983 return -EFAULT;
1984
79b5793b
AG
1985 nm_i->nat_bitmap = kmemdup(version_bitmap, nm_i->bitmap_size,
1986 GFP_KERNEL);
1987 if (!nm_i->nat_bitmap)
1988 return -ENOMEM;
e05df3b1
JK
1989 return 0;
1990}
1991
1992int build_node_manager(struct f2fs_sb_info *sbi)
1993{
1994 int err;
1995
1996 sbi->nm_info = kzalloc(sizeof(struct f2fs_nm_info), GFP_KERNEL);
1997 if (!sbi->nm_info)
1998 return -ENOMEM;
1999
2000 err = init_node_manager(sbi);
2001 if (err)
2002 return err;
2003
2004 build_free_nids(sbi);
2005 return 0;
2006}
2007
2008void destroy_node_manager(struct f2fs_sb_info *sbi)
2009{
2010 struct f2fs_nm_info *nm_i = NM_I(sbi);
2011 struct free_nid *i, *next_i;
2012 struct nat_entry *natvec[NATVEC_SIZE];
2013 nid_t nid = 0;
2014 unsigned int found;
2015
2016 if (!nm_i)
2017 return;
2018
2019 /* destroy free nid list */
2020 spin_lock(&nm_i->free_nid_list_lock);
2021 list_for_each_entry_safe(i, next_i, &nm_i->free_nid_list, list) {
9850cf4a 2022 f2fs_bug_on(sbi, i->state == NID_ALLOC);
8a7ed66a 2023 __del_from_free_nid_list(nm_i, i);
e05df3b1 2024 nm_i->fcnt--;
cf0ee0f0
CY
2025 spin_unlock(&nm_i->free_nid_list_lock);
2026 kmem_cache_free(free_nid_slab, i);
2027 spin_lock(&nm_i->free_nid_list_lock);
e05df3b1 2028 }
9850cf4a 2029 f2fs_bug_on(sbi, nm_i->fcnt);
e05df3b1
JK
2030 spin_unlock(&nm_i->free_nid_list_lock);
2031
2032 /* destroy nat cache */
2033 write_lock(&nm_i->nat_tree_lock);
2034 while ((found = __gang_lookup_nat_cache(nm_i,
2035 nid, NATVEC_SIZE, natvec))) {
2036 unsigned idx;
b6ce391e
GZ
2037 nid = nat_get_nid(natvec[found - 1]) + 1;
2038 for (idx = 0; idx < found; idx++)
2039 __del_from_nat_cache(nm_i, natvec[idx]);
e05df3b1 2040 }
9850cf4a 2041 f2fs_bug_on(sbi, nm_i->nat_cnt);
e05df3b1
JK
2042 write_unlock(&nm_i->nat_tree_lock);
2043
2044 kfree(nm_i->nat_bitmap);
2045 sbi->nm_info = NULL;
2046 kfree(nm_i);
2047}
2048
6e6093a8 2049int __init create_node_manager_caches(void)
e05df3b1
JK
2050{
2051 nat_entry_slab = f2fs_kmem_cache_create("nat_entry",
e8512d2e 2052 sizeof(struct nat_entry));
e05df3b1 2053 if (!nat_entry_slab)
aec71382 2054 goto fail;
e05df3b1
JK
2055
2056 free_nid_slab = f2fs_kmem_cache_create("free_nid",
e8512d2e 2057 sizeof(struct free_nid));
aec71382
CY
2058 if (!free_nid_slab)
2059 goto destory_nat_entry;
2060
2061 nat_entry_set_slab = f2fs_kmem_cache_create("nat_entry_set",
2062 sizeof(struct nat_entry_set));
2063 if (!nat_entry_set_slab)
2064 goto destory_free_nid;
e05df3b1 2065 return 0;
aec71382
CY
2066
2067destory_free_nid:
2068 kmem_cache_destroy(free_nid_slab);
2069destory_nat_entry:
2070 kmem_cache_destroy(nat_entry_slab);
2071fail:
2072 return -ENOMEM;
e05df3b1
JK
2073}
2074
2075void destroy_node_manager_caches(void)
2076{
aec71382 2077 kmem_cache_destroy(nat_entry_set_slab);
e05df3b1
JK
2078 kmem_cache_destroy(free_nid_slab);
2079 kmem_cache_destroy(nat_entry_slab);
2080}