f2fs: Don't overwrite all types of node to keep node chain
[linux-block.git] / fs / f2fs / data.c
CommitLineData
0a8165d7 1/*
eb47b800
JK
2 * fs/f2fs/data.c
3 *
4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5 * http://www.samsung.com/
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#include <linux/fs.h>
12#include <linux/f2fs_fs.h>
13#include <linux/buffer_head.h>
14#include <linux/mpage.h>
15#include <linux/writeback.h>
16#include <linux/backing-dev.h>
8f46dcae 17#include <linux/pagevec.h>
eb47b800
JK
18#include <linux/blkdev.h>
19#include <linux/bio.h>
690e4a3e 20#include <linux/prefetch.h>
e2e40f2c 21#include <linux/uio.h>
fe76b796
JK
22#include <linux/mm.h>
23#include <linux/memcontrol.h>
f1e88660 24#include <linux/cleancache.h>
174cd4b1 25#include <linux/sched/signal.h>
eb47b800
JK
26
27#include "f2fs.h"
28#include "node.h"
29#include "segment.h"
db9f7c1a 30#include "trace.h"
848753aa 31#include <trace/events/f2fs.h>
eb47b800 32
36951b38
CY
33static bool __is_cp_guaranteed(struct page *page)
34{
35 struct address_space *mapping = page->mapping;
36 struct inode *inode;
37 struct f2fs_sb_info *sbi;
38
39 if (!mapping)
40 return false;
41
42 inode = mapping->host;
43 sbi = F2FS_I_SB(inode);
44
45 if (inode->i_ino == F2FS_META_INO(sbi) ||
46 inode->i_ino == F2FS_NODE_INO(sbi) ||
47 S_ISDIR(inode->i_mode) ||
48 is_cold_data(page))
49 return true;
50 return false;
51}
52
4246a0b6 53static void f2fs_read_end_io(struct bio *bio)
93dfe2ac 54{
f568849e
LT
55 struct bio_vec *bvec;
56 int i;
93dfe2ac 57
8b038c70 58#ifdef CONFIG_F2FS_FAULT_INJECTION
263663cd 59 if (time_to_inject(F2FS_P_SB(bio_first_page_all(bio)), FAULT_IO)) {
55523519 60 f2fs_show_injection_info(FAULT_IO);
4e4cbee9 61 bio->bi_status = BLK_STS_IOERR;
55523519 62 }
8b038c70
CY
63#endif
64
4375a336 65 if (f2fs_bio_encrypted(bio)) {
4e4cbee9 66 if (bio->bi_status) {
0b81d077 67 fscrypt_release_ctx(bio->bi_private);
4375a336 68 } else {
0b81d077 69 fscrypt_decrypt_bio_pages(bio->bi_private, bio);
4375a336
JK
70 return;
71 }
72 }
73
12377024
CY
74 bio_for_each_segment_all(bvec, bio, i) {
75 struct page *page = bvec->bv_page;
f1e88660 76
4e4cbee9 77 if (!bio->bi_status) {
237c0790
JK
78 if (!PageUptodate(page))
79 SetPageUptodate(page);
f1e88660
JK
80 } else {
81 ClearPageUptodate(page);
82 SetPageError(page);
83 }
84 unlock_page(page);
85 }
f1e88660
JK
86 bio_put(bio);
87}
88
4246a0b6 89static void f2fs_write_end_io(struct bio *bio)
93dfe2ac 90{
1b1f559f 91 struct f2fs_sb_info *sbi = bio->bi_private;
f568849e
LT
92 struct bio_vec *bvec;
93 int i;
93dfe2ac 94
f568849e 95 bio_for_each_segment_all(bvec, bio, i) {
93dfe2ac 96 struct page *page = bvec->bv_page;
36951b38 97 enum count_type type = WB_DATA_TYPE(page);
93dfe2ac 98
0a595eba
JK
99 if (IS_DUMMY_WRITTEN_PAGE(page)) {
100 set_page_private(page, (unsigned long)NULL);
101 ClearPagePrivate(page);
102 unlock_page(page);
103 mempool_free(page, sbi->write_io_dummy);
104
4e4cbee9 105 if (unlikely(bio->bi_status))
0a595eba
JK
106 f2fs_stop_checkpoint(sbi, true);
107 continue;
108 }
109
0b81d077 110 fscrypt_pullback_bio_page(&page, true);
4375a336 111
4e4cbee9 112 if (unlikely(bio->bi_status)) {
5114a97a 113 mapping_set_error(page->mapping, -EIO);
b1ca321d
JK
114 if (type == F2FS_WB_CP_DATA)
115 f2fs_stop_checkpoint(sbi, true);
93dfe2ac 116 }
7dff55d2
YH
117
118 f2fs_bug_on(sbi, page->mapping == NODE_MAPPING(sbi) &&
119 page->index != nid_of_node(page));
120
36951b38
CY
121 dec_page_count(sbi, type);
122 clear_cold_data(page);
93dfe2ac 123 end_page_writeback(page);
f568849e 124 }
36951b38 125 if (!get_pages(sbi, F2FS_WB_CP_DATA) &&
f5730184 126 wq_has_sleeper(&sbi->cp_wait))
93dfe2ac
JK
127 wake_up(&sbi->cp_wait);
128
129 bio_put(bio);
130}
131
3c62be17
JK
132/*
133 * Return true, if pre_bio's bdev is same as its target device.
134 */
135struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi,
136 block_t blk_addr, struct bio *bio)
137{
138 struct block_device *bdev = sbi->sb->s_bdev;
139 int i;
140
141 for (i = 0; i < sbi->s_ndevs; i++) {
142 if (FDEV(i).start_blk <= blk_addr &&
143 FDEV(i).end_blk >= blk_addr) {
144 blk_addr -= FDEV(i).start_blk;
145 bdev = FDEV(i).bdev;
146 break;
147 }
148 }
149 if (bio) {
74d46992 150 bio_set_dev(bio, bdev);
3c62be17
JK
151 bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blk_addr);
152 }
153 return bdev;
154}
155
156int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr)
157{
158 int i;
159
160 for (i = 0; i < sbi->s_ndevs; i++)
161 if (FDEV(i).start_blk <= blkaddr && FDEV(i).end_blk >= blkaddr)
162 return i;
163 return 0;
164}
165
166static bool __same_bdev(struct f2fs_sb_info *sbi,
167 block_t blk_addr, struct bio *bio)
168{
74d46992
CH
169 struct block_device *b = f2fs_target_device(sbi, blk_addr, NULL);
170 return bio->bi_disk == b->bd_disk && bio->bi_partno == b->bd_partno;
3c62be17
JK
171}
172
940a6d34
GZ
173/*
174 * Low-level block read/write IO operations.
175 */
176static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr,
578c6478 177 struct writeback_control *wbc,
0cdd3195
HL
178 int npages, bool is_read,
179 enum page_type type, enum temp_type temp)
940a6d34
GZ
180{
181 struct bio *bio;
182
d62fe971 183 bio = f2fs_bio_alloc(sbi, npages, true);
940a6d34 184
3c62be17 185 f2fs_target_device(sbi, blk_addr, bio);
0cdd3195
HL
186 if (is_read) {
187 bio->bi_end_io = f2fs_read_end_io;
188 bio->bi_private = NULL;
189 } else {
190 bio->bi_end_io = f2fs_write_end_io;
191 bio->bi_private = sbi;
192 bio->bi_write_hint = io_type_to_rw_hint(sbi, type, temp);
193 }
578c6478
YY
194 if (wbc)
195 wbc_init_bio(wbc, bio);
940a6d34
GZ
196
197 return bio;
198}
199
4fc29c1a
LT
200static inline void __submit_bio(struct f2fs_sb_info *sbi,
201 struct bio *bio, enum page_type type)
f5730184 202{
4fc29c1a 203 if (!is_read_io(bio_op(bio))) {
0a595eba
JK
204 unsigned int start;
205
0a595eba
JK
206 if (type != DATA && type != NODE)
207 goto submit_io;
208
ccd31cb2 209 if (f2fs_sb_has_blkzoned(sbi->sb) && current->plug)
3bb09a0e
TY
210 blk_finish_plug(current->plug);
211
0a595eba
JK
212 start = bio->bi_iter.bi_size >> F2FS_BLKSIZE_BITS;
213 start %= F2FS_IO_SIZE(sbi);
214
215 if (start == 0)
216 goto submit_io;
217
218 /* fill dummy pages */
219 for (; start < F2FS_IO_SIZE(sbi); start++) {
220 struct page *page =
221 mempool_alloc(sbi->write_io_dummy,
222 GFP_NOIO | __GFP_ZERO | __GFP_NOFAIL);
223 f2fs_bug_on(sbi, !page);
224
225 SetPagePrivate(page);
226 set_page_private(page, (unsigned long)DUMMY_WRITTEN_PAGE);
227 lock_page(page);
228 if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE)
229 f2fs_bug_on(sbi, 1);
230 }
231 /*
232 * In the NODE case, we lose next block address chain. So, we
233 * need to do checkpoint in f2fs_sync_file.
234 */
235 if (type == NODE)
236 set_sbi_flag(sbi, SBI_NEED_CP);
19a5f5e2 237 }
0a595eba 238submit_io:
554b5125
JK
239 if (is_read_io(bio_op(bio)))
240 trace_f2fs_submit_read_bio(sbi->sb, type, bio);
241 else
242 trace_f2fs_submit_write_bio(sbi->sb, type, bio);
4e49ea4a 243 submit_bio(bio);
f5730184
JK
244}
245
458e6197 246static void __submit_merged_bio(struct f2fs_bio_info *io)
93dfe2ac 247{
458e6197 248 struct f2fs_io_info *fio = &io->fio;
93dfe2ac
JK
249
250 if (!io->bio)
251 return;
252
554b5125
JK
253 bio_set_op_attrs(io->bio, fio->op, fio->op_flags);
254
04d328de 255 if (is_read_io(fio->op))
554b5125 256 trace_f2fs_prepare_read_bio(io->sbi->sb, fio->type, io->bio);
6a8f8ca5 257 else
554b5125 258 trace_f2fs_prepare_write_bio(io->sbi->sb, fio->type, io->bio);
04d328de 259
4fc29c1a 260 __submit_bio(io->sbi, io->bio, fio->type);
93dfe2ac
JK
261 io->bio = NULL;
262}
263
942fd319
JK
264static bool __has_merged_page(struct f2fs_bio_info *io,
265 struct inode *inode, nid_t ino, pgoff_t idx)
0fd785eb 266{
0fd785eb
CY
267 struct bio_vec *bvec;
268 struct page *target;
269 int i;
270
0c3a5797 271 if (!io->bio)
0fd785eb 272 return false;
0c3a5797 273
942fd319 274 if (!inode && !ino)
0c3a5797 275 return true;
0fd785eb
CY
276
277 bio_for_each_segment_all(bvec, io->bio, i) {
278
0b81d077 279 if (bvec->bv_page->mapping)
0fd785eb 280 target = bvec->bv_page;
0b81d077
JK
281 else
282 target = fscrypt_control_page(bvec->bv_page);
0fd785eb 283
942fd319
JK
284 if (idx != target->index)
285 continue;
286
0c3a5797
CY
287 if (inode && inode == target->mapping->host)
288 return true;
0c3a5797 289 if (ino && ino == ino_of_node(target))
0fd785eb 290 return true;
0fd785eb
CY
291 }
292
0fd785eb
CY
293 return false;
294}
295
0c3a5797 296static bool has_merged_page(struct f2fs_sb_info *sbi, struct inode *inode,
942fd319 297 nid_t ino, pgoff_t idx, enum page_type type)
0c3a5797
CY
298{
299 enum page_type btype = PAGE_TYPE_OF_BIO(type);
a912b54d
JK
300 enum temp_type temp;
301 struct f2fs_bio_info *io;
302 bool ret = false;
0c3a5797 303
a912b54d
JK
304 for (temp = HOT; temp < NR_TEMP_TYPE; temp++) {
305 io = sbi->write_io[btype] + temp;
306
307 down_read(&io->io_rwsem);
308 ret = __has_merged_page(io, inode, ino, idx);
309 up_read(&io->io_rwsem);
0c3a5797 310
a912b54d
JK
311 /* TODO: use HOT temp only for meta pages now. */
312 if (ret || btype == META)
313 break;
314 }
0c3a5797
CY
315 return ret;
316}
317
b9109b0e 318static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi,
a912b54d 319 enum page_type type, enum temp_type temp)
93dfe2ac
JK
320{
321 enum page_type btype = PAGE_TYPE_OF_BIO(type);
a912b54d 322 struct f2fs_bio_info *io = sbi->write_io[btype] + temp;
93dfe2ac 323
df0f8dc0 324 down_write(&io->io_rwsem);
458e6197
JK
325
326 /* change META to META_FLUSH in the checkpoint procedure */
327 if (type >= META_FLUSH) {
328 io->fio.type = META_FLUSH;
04d328de 329 io->fio.op = REQ_OP_WRITE;
3adc5fcb 330 io->fio.op_flags = REQ_META | REQ_PRIO | REQ_SYNC;
70fd7614 331 if (!test_opt(sbi, NOBARRIER))
7f54f51f 332 io->fio.op_flags |= REQ_PREFLUSH | REQ_FUA;
458e6197
JK
333 }
334 __submit_merged_bio(io);
df0f8dc0 335 up_write(&io->io_rwsem);
93dfe2ac
JK
336}
337
a912b54d
JK
338static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
339 struct inode *inode, nid_t ino, pgoff_t idx,
340 enum page_type type, bool force)
0c3a5797 341{
a912b54d
JK
342 enum temp_type temp;
343
344 if (!force && !has_merged_page(sbi, inode, ino, idx, type))
345 return;
346
347 for (temp = HOT; temp < NR_TEMP_TYPE; temp++) {
348
349 __f2fs_submit_merged_write(sbi, type, temp);
350
351 /* TODO: use HOT temp only for meta pages now. */
352 if (type >= META)
353 break;
354 }
0c3a5797
CY
355}
356
b9109b0e 357void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type)
0c3a5797 358{
a912b54d 359 __submit_merged_write_cond(sbi, NULL, 0, 0, type, true);
0c3a5797
CY
360}
361
b9109b0e 362void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
942fd319 363 struct inode *inode, nid_t ino, pgoff_t idx,
b9109b0e 364 enum page_type type)
0c3a5797 365{
a912b54d 366 __submit_merged_write_cond(sbi, inode, ino, idx, type, false);
0c3a5797
CY
367}
368
b9109b0e 369void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi)
406657dd 370{
b9109b0e
JK
371 f2fs_submit_merged_write(sbi, DATA);
372 f2fs_submit_merged_write(sbi, NODE);
373 f2fs_submit_merged_write(sbi, META);
406657dd
CY
374}
375
93dfe2ac
JK
376/*
377 * Fill the locked page with data located in the block address.
771a9a71 378 * A caller needs to unlock the page on failure.
93dfe2ac 379 */
05ca3632 380int f2fs_submit_page_bio(struct f2fs_io_info *fio)
93dfe2ac 381{
93dfe2ac 382 struct bio *bio;
0b81d077
JK
383 struct page *page = fio->encrypted_page ?
384 fio->encrypted_page : fio->page;
93dfe2ac 385
2ace38e0 386 trace_f2fs_submit_page_bio(page, fio);
05ca3632 387 f2fs_trace_ios(fio, 0);
93dfe2ac
JK
388
389 /* Allocate a new bio */
578c6478 390 bio = __bio_alloc(fio->sbi, fio->new_blkaddr, fio->io_wbc,
0cdd3195 391 1, is_read_io(fio->op), fio->type, fio->temp);
93dfe2ac 392
09cbfeaf 393 if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
93dfe2ac 394 bio_put(bio);
93dfe2ac
JK
395 return -EFAULT;
396 }
04d328de 397 bio_set_op_attrs(bio, fio->op, fio->op_flags);
93dfe2ac 398
4fc29c1a 399 __submit_bio(fio->sbi, bio, fio->type);
d1b3e72d
JK
400
401 if (!is_read_io(fio->op))
402 inc_page_count(fio->sbi, WB_DATA_TYPE(fio->page));
93dfe2ac
JK
403 return 0;
404}
405
b9109b0e 406int f2fs_submit_page_write(struct f2fs_io_info *fio)
93dfe2ac 407{
05ca3632 408 struct f2fs_sb_info *sbi = fio->sbi;
458e6197 409 enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
a912b54d 410 struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp;
4375a336 411 struct page *bio_page;
0a595eba 412 int err = 0;
93dfe2ac 413
b9109b0e 414 f2fs_bug_on(sbi, is_read_io(fio->op));
93dfe2ac 415
fb830fc5
CY
416 down_write(&io->io_rwsem);
417next:
418 if (fio->in_list) {
419 spin_lock(&io->io_lock);
420 if (list_empty(&io->io_list)) {
421 spin_unlock(&io->io_lock);
422 goto out_fail;
423 }
424 fio = list_first_entry(&io->io_list,
425 struct f2fs_io_info, list);
426 list_del(&fio->list);
427 spin_unlock(&io->io_lock);
428 }
93dfe2ac 429
7a9d7548
CY
430 if (fio->old_blkaddr != NEW_ADDR)
431 verify_block_addr(sbi, fio->old_blkaddr);
432 verify_block_addr(sbi, fio->new_blkaddr);
93dfe2ac 433
36951b38
CY
434 bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page;
435
ebf7c522
TM
436 /* set submitted = true as a return value */
437 fio->submitted = true;
d68f735b 438
b9109b0e 439 inc_page_count(sbi, WB_DATA_TYPE(bio_page));
93dfe2ac 440
7a9d7548 441 if (io->bio && (io->last_block_in_bio != fio->new_blkaddr - 1 ||
3c62be17
JK
442 (io->fio.op != fio->op || io->fio.op_flags != fio->op_flags) ||
443 !__same_bdev(sbi, fio->new_blkaddr, io->bio)))
458e6197 444 __submit_merged_bio(io);
93dfe2ac
JK
445alloc_new:
446 if (io->bio == NULL) {
0a595eba
JK
447 if ((fio->type == DATA || fio->type == NODE) &&
448 fio->new_blkaddr & F2FS_IO_SIZE_MASK(sbi)) {
449 err = -EAGAIN;
b9109b0e 450 dec_page_count(sbi, WB_DATA_TYPE(bio_page));
0a595eba
JK
451 goto out_fail;
452 }
578c6478 453 io->bio = __bio_alloc(sbi, fio->new_blkaddr, fio->io_wbc,
0cdd3195
HL
454 BIO_MAX_PAGES, false,
455 fio->type, fio->temp);
458e6197 456 io->fio = *fio;
93dfe2ac
JK
457 }
458
a912b54d 459 if (bio_add_page(io->bio, bio_page, PAGE_SIZE, 0) < PAGE_SIZE) {
458e6197 460 __submit_merged_bio(io);
93dfe2ac
JK
461 goto alloc_new;
462 }
463
578c6478
YY
464 if (fio->io_wbc)
465 wbc_account_io(fio->io_wbc, bio_page, PAGE_SIZE);
466
7a9d7548 467 io->last_block_in_bio = fio->new_blkaddr;
05ca3632 468 f2fs_trace_ios(fio, 0);
fb830fc5
CY
469
470 trace_f2fs_submit_page_write(fio->page, fio);
471
472 if (fio->in_list)
473 goto next;
0a595eba 474out_fail:
df0f8dc0 475 up_write(&io->io_rwsem);
0a595eba 476 return err;
93dfe2ac
JK
477}
478
13ba41e3
JK
479static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
480 unsigned nr_pages)
481{
482 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
483 struct fscrypt_ctx *ctx = NULL;
484 struct bio *bio;
485
486 if (f2fs_encrypted_file(inode)) {
487 ctx = fscrypt_get_ctx(inode, GFP_NOFS);
488 if (IS_ERR(ctx))
489 return ERR_CAST(ctx);
490
491 /* wait the page to be moved by cleaning */
492 f2fs_wait_on_block_writeback(sbi, blkaddr);
493 }
494
d62fe971 495 bio = f2fs_bio_alloc(sbi, min_t(int, nr_pages, BIO_MAX_PAGES), false);
13ba41e3
JK
496 if (!bio) {
497 if (ctx)
498 fscrypt_release_ctx(ctx);
499 return ERR_PTR(-ENOMEM);
500 }
501 f2fs_target_device(sbi, blkaddr, bio);
502 bio->bi_end_io = f2fs_read_end_io;
503 bio->bi_private = ctx;
504 bio_set_op_attrs(bio, REQ_OP_READ, 0);
505
506 return bio;
507}
508
509/* This can handle encryption stuffs */
510static int f2fs_submit_page_read(struct inode *inode, struct page *page,
511 block_t blkaddr)
512{
513 struct bio *bio = f2fs_grab_read_bio(inode, blkaddr, 1);
514
515 if (IS_ERR(bio))
516 return PTR_ERR(bio);
517
518 if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
519 bio_put(bio);
520 return -EFAULT;
521 }
522 __submit_bio(F2FS_I_SB(inode), bio, DATA);
523 return 0;
524}
525
46008c6d
CY
526static void __set_data_blkaddr(struct dnode_of_data *dn)
527{
528 struct f2fs_node *rn = F2FS_NODE(dn->node_page);
529 __le32 *addr_array;
7a2af766
CY
530 int base = 0;
531
532 if (IS_INODE(dn->node_page) && f2fs_has_extra_attr(dn->inode))
533 base = get_extra_isize(dn->inode);
46008c6d
CY
534
535 /* Get physical address of data block */
536 addr_array = blkaddr_in_node(rn);
7a2af766 537 addr_array[base + dn->ofs_in_node] = cpu_to_le32(dn->data_blkaddr);
46008c6d
CY
538}
539
0a8165d7 540/*
eb47b800
JK
541 * Lock ordering for the change of data block address:
542 * ->data_page
543 * ->node_page
544 * update block addresses in the node page
545 */
216a620a 546void set_data_blkaddr(struct dnode_of_data *dn)
eb47b800 547{
46008c6d
CY
548 f2fs_wait_on_page_writeback(dn->node_page, NODE, true);
549 __set_data_blkaddr(dn);
550 if (set_page_dirty(dn->node_page))
12719ae1 551 dn->node_changed = true;
eb47b800
JK
552}
553
f28b3434
CY
554void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr)
555{
556 dn->data_blkaddr = blkaddr;
557 set_data_blkaddr(dn);
558 f2fs_update_extent_cache(dn);
559}
560
46008c6d
CY
561/* dn->ofs_in_node will be returned with up-to-date last block pointer */
562int reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count)
eb47b800 563{
4081363f 564 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
0abd675e 565 int err;
eb47b800 566
46008c6d
CY
567 if (!count)
568 return 0;
569
91942321 570 if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
eb47b800 571 return -EPERM;
0abd675e
CY
572 if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count))))
573 return err;
eb47b800 574
46008c6d
CY
575 trace_f2fs_reserve_new_blocks(dn->inode, dn->nid,
576 dn->ofs_in_node, count);
577
578 f2fs_wait_on_page_writeback(dn->node_page, NODE, true);
579
580 for (; count > 0; dn->ofs_in_node++) {
7a2af766
CY
581 block_t blkaddr = datablock_addr(dn->inode,
582 dn->node_page, dn->ofs_in_node);
46008c6d
CY
583 if (blkaddr == NULL_ADDR) {
584 dn->data_blkaddr = NEW_ADDR;
585 __set_data_blkaddr(dn);
586 count--;
587 }
588 }
589
590 if (set_page_dirty(dn->node_page))
591 dn->node_changed = true;
eb47b800
JK
592 return 0;
593}
594
46008c6d
CY
595/* Should keep dn->ofs_in_node unchanged */
596int reserve_new_block(struct dnode_of_data *dn)
597{
598 unsigned int ofs_in_node = dn->ofs_in_node;
599 int ret;
600
601 ret = reserve_new_blocks(dn, 1);
602 dn->ofs_in_node = ofs_in_node;
603 return ret;
604}
605
b600965c
HL
606int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)
607{
608 bool need_put = dn->inode_page ? false : true;
609 int err;
610
611 err = get_dnode_of_data(dn, index, ALLOC_NODE);
612 if (err)
613 return err;
a8865372 614
b600965c
HL
615 if (dn->data_blkaddr == NULL_ADDR)
616 err = reserve_new_block(dn);
a8865372 617 if (err || need_put)
b600965c
HL
618 f2fs_put_dnode(dn);
619 return err;
620}
621
759af1c9 622int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index)
eb47b800 623{
e15882b6 624 struct extent_info ei = {0,0,0};
759af1c9 625 struct inode *inode = dn->inode;
028a41e8 626
759af1c9
FL
627 if (f2fs_lookup_extent_cache(inode, index, &ei)) {
628 dn->data_blkaddr = ei.blk + index - ei.fofs;
629 return 0;
429511cd 630 }
028a41e8 631
759af1c9 632 return f2fs_reserve_block(dn, index);
eb47b800
JK
633}
634
a56c7c6f 635struct page *get_read_data_page(struct inode *inode, pgoff_t index,
04d328de 636 int op_flags, bool for_write)
eb47b800 637{
eb47b800
JK
638 struct address_space *mapping = inode->i_mapping;
639 struct dnode_of_data dn;
640 struct page *page;
e15882b6 641 struct extent_info ei = {0,0,0};
eb47b800 642 int err;
4375a336 643
a56c7c6f 644 page = f2fs_grab_cache_page(mapping, index, for_write);
650495de
JK
645 if (!page)
646 return ERR_PTR(-ENOMEM);
647
cb3bc9ee
CY
648 if (f2fs_lookup_extent_cache(inode, index, &ei)) {
649 dn.data_blkaddr = ei.blk + index - ei.fofs;
650 goto got_it;
651 }
652
eb47b800 653 set_new_dnode(&dn, inode, NULL, NULL, 0);
266e97a8 654 err = get_dnode_of_data(&dn, index, LOOKUP_NODE);
86531d6b
JK
655 if (err)
656 goto put_err;
eb47b800
JK
657 f2fs_put_dnode(&dn);
658
6bacf52f 659 if (unlikely(dn.data_blkaddr == NULL_ADDR)) {
86531d6b
JK
660 err = -ENOENT;
661 goto put_err;
650495de 662 }
cb3bc9ee 663got_it:
43f3eae1
JK
664 if (PageUptodate(page)) {
665 unlock_page(page);
eb47b800 666 return page;
43f3eae1 667 }
eb47b800 668
d59ff4df
JK
669 /*
670 * A new dentry page is allocated but not able to be written, since its
671 * new inode page couldn't be allocated due to -ENOSPC.
672 * In such the case, its blkaddr can be remained as NEW_ADDR.
673 * see, f2fs_add_link -> get_new_data_page -> init_inode_metadata.
674 */
675 if (dn.data_blkaddr == NEW_ADDR) {
09cbfeaf 676 zero_user_segment(page, 0, PAGE_SIZE);
237c0790
JK
677 if (!PageUptodate(page))
678 SetPageUptodate(page);
43f3eae1 679 unlock_page(page);
d59ff4df
JK
680 return page;
681 }
eb47b800 682
13ba41e3 683 err = f2fs_submit_page_read(inode, page, dn.data_blkaddr);
393ff91f 684 if (err)
86531d6b 685 goto put_err;
43f3eae1 686 return page;
86531d6b
JK
687
688put_err:
689 f2fs_put_page(page, 1);
690 return ERR_PTR(err);
43f3eae1
JK
691}
692
693struct page *find_data_page(struct inode *inode, pgoff_t index)
694{
695 struct address_space *mapping = inode->i_mapping;
696 struct page *page;
697
698 page = find_get_page(mapping, index);
699 if (page && PageUptodate(page))
700 return page;
701 f2fs_put_page(page, 0);
702
70fd7614 703 page = get_read_data_page(inode, index, 0, false);
43f3eae1
JK
704 if (IS_ERR(page))
705 return page;
706
707 if (PageUptodate(page))
708 return page;
709
710 wait_on_page_locked(page);
711 if (unlikely(!PageUptodate(page))) {
712 f2fs_put_page(page, 0);
713 return ERR_PTR(-EIO);
714 }
715 return page;
716}
717
718/*
719 * If it tries to access a hole, return an error.
720 * Because, the callers, functions in dir.c and GC, should be able to know
721 * whether this page exists or not.
722 */
a56c7c6f
JK
723struct page *get_lock_data_page(struct inode *inode, pgoff_t index,
724 bool for_write)
43f3eae1
JK
725{
726 struct address_space *mapping = inode->i_mapping;
727 struct page *page;
728repeat:
70fd7614 729 page = get_read_data_page(inode, index, 0, for_write);
43f3eae1
JK
730 if (IS_ERR(page))
731 return page;
393ff91f 732
43f3eae1 733 /* wait for read completion */
393ff91f 734 lock_page(page);
6bacf52f 735 if (unlikely(page->mapping != mapping)) {
afcb7ca0
JK
736 f2fs_put_page(page, 1);
737 goto repeat;
eb47b800 738 }
1563ac75
CY
739 if (unlikely(!PageUptodate(page))) {
740 f2fs_put_page(page, 1);
741 return ERR_PTR(-EIO);
742 }
eb47b800
JK
743 return page;
744}
745
0a8165d7 746/*
eb47b800
JK
747 * Caller ensures that this data page is never allocated.
748 * A new zero-filled data page is allocated in the page cache.
39936837 749 *
4f4124d0
CY
750 * Also, caller should grab and release a rwsem by calling f2fs_lock_op() and
751 * f2fs_unlock_op().
470f00e9
CY
752 * Note that, ipage is set only by make_empty_dir, and if any error occur,
753 * ipage should be released by this function.
eb47b800 754 */
64aa7ed9 755struct page *get_new_data_page(struct inode *inode,
a8865372 756 struct page *ipage, pgoff_t index, bool new_i_size)
eb47b800 757{
eb47b800
JK
758 struct address_space *mapping = inode->i_mapping;
759 struct page *page;
760 struct dnode_of_data dn;
761 int err;
7612118a 762
a56c7c6f 763 page = f2fs_grab_cache_page(mapping, index, true);
470f00e9
CY
764 if (!page) {
765 /*
766 * before exiting, we should make sure ipage will be released
767 * if any error occur.
768 */
769 f2fs_put_page(ipage, 1);
01f28610 770 return ERR_PTR(-ENOMEM);
470f00e9 771 }
eb47b800 772
a8865372 773 set_new_dnode(&dn, inode, ipage, NULL, 0);
b600965c 774 err = f2fs_reserve_block(&dn, index);
01f28610
JK
775 if (err) {
776 f2fs_put_page(page, 1);
eb47b800 777 return ERR_PTR(err);
a8865372 778 }
01f28610
JK
779 if (!ipage)
780 f2fs_put_dnode(&dn);
eb47b800
JK
781
782 if (PageUptodate(page))
01f28610 783 goto got_it;
eb47b800
JK
784
785 if (dn.data_blkaddr == NEW_ADDR) {
09cbfeaf 786 zero_user_segment(page, 0, PAGE_SIZE);
237c0790
JK
787 if (!PageUptodate(page))
788 SetPageUptodate(page);
eb47b800 789 } else {
4375a336 790 f2fs_put_page(page, 1);
a8865372 791
7612118a
JK
792 /* if ipage exists, blkaddr should be NEW_ADDR */
793 f2fs_bug_on(F2FS_I_SB(inode), ipage);
794 page = get_lock_data_page(inode, index, true);
4375a336 795 if (IS_ERR(page))
7612118a 796 return page;
eb47b800 797 }
01f28610 798got_it:
9edcdabf 799 if (new_i_size && i_size_read(inode) <
ee6d182f 800 ((loff_t)(index + 1) << PAGE_SHIFT))
fc9581c8 801 f2fs_i_size_write(inode, ((loff_t)(index + 1) << PAGE_SHIFT));
eb47b800
JK
802 return page;
803}
804
d5097be5 805static int __allocate_data_block(struct dnode_of_data *dn, int seg_type)
bfad7c2d 806{
4081363f 807 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
bfad7c2d 808 struct f2fs_summary sum;
bfad7c2d 809 struct node_info ni;
976e4c50 810 pgoff_t fofs;
46008c6d 811 blkcnt_t count = 1;
0abd675e 812 int err;
bfad7c2d 813
91942321 814 if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
bfad7c2d 815 return -EPERM;
df6136ef 816
7a2af766
CY
817 dn->data_blkaddr = datablock_addr(dn->inode,
818 dn->node_page, dn->ofs_in_node);
df6136ef
CY
819 if (dn->data_blkaddr == NEW_ADDR)
820 goto alloc;
821
0abd675e
CY
822 if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count))))
823 return err;
bfad7c2d 824
df6136ef 825alloc:
bfad7c2d
JK
826 get_node_info(sbi, dn->nid, &ni);
827 set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
828
df6136ef 829 allocate_data_block(sbi, NULL, dn->data_blkaddr, &dn->data_blkaddr,
d5097be5 830 &sum, seg_type, NULL, false);
216a620a 831 set_data_blkaddr(dn);
bfad7c2d 832
976e4c50 833 /* update i_size */
81ca7350 834 fofs = start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) +
976e4c50 835 dn->ofs_in_node;
09cbfeaf 836 if (i_size_read(dn->inode) < ((loff_t)(fofs + 1) << PAGE_SHIFT))
fc9581c8 837 f2fs_i_size_write(dn->inode,
09cbfeaf 838 ((loff_t)(fofs + 1) << PAGE_SHIFT));
bfad7c2d
JK
839 return 0;
840}
841
c040ff9d
JK
842static inline bool __force_buffered_io(struct inode *inode, int rw)
843{
1958593e 844 return (f2fs_encrypted_file(inode) ||
c040ff9d
JK
845 (rw == WRITE && test_opt(F2FS_I_SB(inode), LFS)) ||
846 F2FS_I_SB(inode)->s_ndevs);
847}
848
a7de6086 849int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
59b802e5 850{
b439b103 851 struct inode *inode = file_inode(iocb->ki_filp);
5b8db7fa 852 struct f2fs_map_blocks map;
d6d478a1 853 int flag;
a7de6086 854 int err = 0;
d6d478a1 855 bool direct_io = iocb->ki_flags & IOCB_DIRECT;
59b802e5 856
71ad682c 857 /* convert inline data for Direct I/O*/
d6d478a1 858 if (direct_io) {
71ad682c
WG
859 err = f2fs_convert_inline_inode(inode);
860 if (err)
861 return err;
862 }
863
dc91de78
JK
864 if (is_inode_flag_set(inode, FI_NO_PREALLOC))
865 return 0;
866
0080c507 867 map.m_lblk = F2FS_BLK_ALIGN(iocb->ki_pos);
dfd02e4d
CY
868 map.m_len = F2FS_BYTES_TO_BLK(iocb->ki_pos + iov_iter_count(from));
869 if (map.m_len > map.m_lblk)
870 map.m_len -= map.m_lblk;
871 else
872 map.m_len = 0;
873
da85985c 874 map.m_next_pgofs = NULL;
c4020b2d 875 map.m_next_extent = NULL;
d5097be5 876 map.m_seg_type = NO_CHECK_TYPE;
2a340760 877
d6d478a1 878 if (direct_io) {
d5097be5 879 map.m_seg_type = rw_hint_to_seg_type(iocb->ki_hint);
d6d478a1
CY
880 flag = __force_buffered_io(inode, WRITE) ?
881 F2FS_GET_BLOCK_PRE_AIO :
882 F2FS_GET_BLOCK_PRE_DIO;
883 goto map_blocks;
d5097be5 884 }
f2470371 885 if (iocb->ki_pos + iov_iter_count(from) > MAX_INLINE_DATA(inode)) {
a7de6086
JK
886 err = f2fs_convert_inline_inode(inode);
887 if (err)
888 return err;
b439b103 889 }
d6d478a1 890 if (f2fs_has_inline_data(inode))
25006645 891 return err;
d6d478a1
CY
892
893 flag = F2FS_GET_BLOCK_PRE_AIO;
894
895map_blocks:
896 err = f2fs_map_blocks(inode, &map, 1, flag);
897 if (map.m_len > 0 && err == -ENOSPC) {
898 if (!direct_io)
899 set_inode_flag(inode, FI_NO_PREALLOC);
900 err = 0;
25006645 901 }
a7de6086 902 return err;
59b802e5
JK
903}
904
59c9081b
YH
905static inline void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock)
906{
907 if (flag == F2FS_GET_BLOCK_PRE_AIO) {
908 if (lock)
909 down_read(&sbi->node_change);
910 else
911 up_read(&sbi->node_change);
912 } else {
913 if (lock)
914 f2fs_lock_op(sbi);
915 else
916 f2fs_unlock_op(sbi);
917 }
918}
919
0a8165d7 920/*
003a3e1d
JK
921 * f2fs_map_blocks() now supported readahead/bmap/rw direct_IO with
922 * f2fs_map_blocks structure.
4f4124d0
CY
923 * If original data blocks are allocated, then give them to blockdev.
924 * Otherwise,
925 * a. preallocate requested block addresses
926 * b. do not use extent cache for better performance
927 * c. give the block addresses to blockdev
eb47b800 928 */
d323d005 929int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
e2b4e2bc 930 int create, int flag)
eb47b800 931{
003a3e1d 932 unsigned int maxblocks = map->m_len;
eb47b800 933 struct dnode_of_data dn;
f9811703 934 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
ac6f1999 935 int mode = create ? ALLOC_NODE : LOOKUP_NODE;
46008c6d 936 pgoff_t pgofs, end_offset, end;
bfad7c2d 937 int err = 0, ofs = 1;
46008c6d
CY
938 unsigned int ofs_in_node, last_ofs_in_node;
939 blkcnt_t prealloc;
e15882b6 940 struct extent_info ei = {0,0,0};
7df3a431 941 block_t blkaddr;
c4020b2d 942 unsigned int start_pgofs;
eb47b800 943
dfd02e4d
CY
944 if (!maxblocks)
945 return 0;
946
003a3e1d
JK
947 map->m_len = 0;
948 map->m_flags = 0;
949
950 /* it only supports block size == page size */
951 pgofs = (pgoff_t)map->m_lblk;
46008c6d 952 end = pgofs + maxblocks;
eb47b800 953
24b84912 954 if (!create && f2fs_lookup_extent_cache(inode, pgofs, &ei)) {
003a3e1d
JK
955 map->m_pblk = ei.blk + pgofs - ei.fofs;
956 map->m_len = min((pgoff_t)maxblocks, ei.fofs + ei.len - pgofs);
957 map->m_flags = F2FS_MAP_MAPPED;
c4020b2d
CY
958 if (map->m_next_extent)
959 *map->m_next_extent = pgofs + map->m_len;
bfad7c2d 960 goto out;
a2e7d1bf 961 }
bfad7c2d 962
4fe71e88 963next_dnode:
59b802e5 964 if (create)
59c9081b 965 __do_map_lock(sbi, flag, true);
eb47b800
JK
966
967 /* When reading holes, we need its node page */
968 set_new_dnode(&dn, inode, NULL, NULL, 0);
bfad7c2d 969 err = get_dnode_of_data(&dn, pgofs, mode);
1ec79083 970 if (err) {
43473f96
CY
971 if (flag == F2FS_GET_BLOCK_BMAP)
972 map->m_pblk = 0;
da85985c 973 if (err == -ENOENT) {
bfad7c2d 974 err = 0;
da85985c
CY
975 if (map->m_next_pgofs)
976 *map->m_next_pgofs =
977 get_next_page_offset(&dn, pgofs);
c4020b2d
CY
978 if (map->m_next_extent)
979 *map->m_next_extent =
980 get_next_page_offset(&dn, pgofs);
da85985c 981 }
bfad7c2d 982 goto unlock_out;
848753aa 983 }
973163fc 984
c4020b2d 985 start_pgofs = pgofs;
46008c6d 986 prealloc = 0;
230436b3 987 last_ofs_in_node = ofs_in_node = dn.ofs_in_node;
81ca7350 988 end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
4fe71e88
CY
989
990next_block:
7a2af766 991 blkaddr = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node);
4fe71e88
CY
992
993 if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) {
973163fc 994 if (create) {
f9811703
CY
995 if (unlikely(f2fs_cp_error(sbi))) {
996 err = -EIO;
4fe71e88 997 goto sync_out;
f9811703 998 }
24b84912 999 if (flag == F2FS_GET_BLOCK_PRE_AIO) {
46008c6d
CY
1000 if (blkaddr == NULL_ADDR) {
1001 prealloc++;
1002 last_ofs_in_node = dn.ofs_in_node;
1003 }
24b84912 1004 } else {
d5097be5
HL
1005 err = __allocate_data_block(&dn,
1006 map->m_seg_type);
6f2d8ed6 1007 if (!err)
91942321 1008 set_inode_flag(inode, FI_APPEND_WRITE);
24b84912 1009 }
973163fc 1010 if (err)
4fe71e88 1011 goto sync_out;
3f2be043 1012 map->m_flags |= F2FS_MAP_NEW;
4fe71e88 1013 blkaddr = dn.data_blkaddr;
973163fc 1014 } else {
43473f96
CY
1015 if (flag == F2FS_GET_BLOCK_BMAP) {
1016 map->m_pblk = 0;
1017 goto sync_out;
1018 }
c4020b2d
CY
1019 if (flag == F2FS_GET_BLOCK_PRECACHE)
1020 goto sync_out;
da85985c
CY
1021 if (flag == F2FS_GET_BLOCK_FIEMAP &&
1022 blkaddr == NULL_ADDR) {
1023 if (map->m_next_pgofs)
1024 *map->m_next_pgofs = pgofs + 1;
4c2ac6a8 1025 goto sync_out;
da85985c 1026 }
f3d98e74
CY
1027 if (flag != F2FS_GET_BLOCK_FIEMAP) {
1028 /* for defragment case */
1029 if (map->m_next_pgofs)
1030 *map->m_next_pgofs = pgofs + 1;
4fe71e88 1031 goto sync_out;
f3d98e74 1032 }
e2b4e2bc 1033 }
e2b4e2bc 1034 }
eb47b800 1035
46008c6d
CY
1036 if (flag == F2FS_GET_BLOCK_PRE_AIO)
1037 goto skip;
1038
4fe71e88
CY
1039 if (map->m_len == 0) {
1040 /* preallocated unwritten block should be mapped for fiemap. */
1041 if (blkaddr == NEW_ADDR)
1042 map->m_flags |= F2FS_MAP_UNWRITTEN;
1043 map->m_flags |= F2FS_MAP_MAPPED;
1044
1045 map->m_pblk = blkaddr;
1046 map->m_len = 1;
1047 } else if ((map->m_pblk != NEW_ADDR &&
1048 blkaddr == (map->m_pblk + ofs)) ||
b439b103 1049 (map->m_pblk == NEW_ADDR && blkaddr == NEW_ADDR) ||
46008c6d 1050 flag == F2FS_GET_BLOCK_PRE_DIO) {
4fe71e88
CY
1051 ofs++;
1052 map->m_len++;
1053 } else {
1054 goto sync_out;
1055 }
bfad7c2d 1056
46008c6d 1057skip:
bfad7c2d
JK
1058 dn.ofs_in_node++;
1059 pgofs++;
1060
46008c6d
CY
1061 /* preallocate blocks in batch for one dnode page */
1062 if (flag == F2FS_GET_BLOCK_PRE_AIO &&
1063 (pgofs == end || dn.ofs_in_node == end_offset)) {
7df3a431 1064
46008c6d
CY
1065 dn.ofs_in_node = ofs_in_node;
1066 err = reserve_new_blocks(&dn, prealloc);
1067 if (err)
1068 goto sync_out;
bfad7c2d 1069
46008c6d
CY
1070 map->m_len += dn.ofs_in_node - ofs_in_node;
1071 if (prealloc && dn.ofs_in_node != last_ofs_in_node + 1) {
1072 err = -ENOSPC;
1073 goto sync_out;
3104af35 1074 }
46008c6d
CY
1075 dn.ofs_in_node = end_offset;
1076 }
1077
1078 if (pgofs >= end)
1079 goto sync_out;
1080 else if (dn.ofs_in_node < end_offset)
1081 goto next_block;
1082
c4020b2d
CY
1083 if (flag == F2FS_GET_BLOCK_PRECACHE) {
1084 if (map->m_flags & F2FS_MAP_MAPPED) {
1085 unsigned int ofs = start_pgofs - map->m_lblk;
1086
1087 f2fs_update_extent_cache_range(&dn,
1088 start_pgofs, map->m_pblk + ofs,
1089 map->m_len - ofs);
1090 }
1091 }
1092
46008c6d
CY
1093 f2fs_put_dnode(&dn);
1094
1095 if (create) {
59c9081b 1096 __do_map_lock(sbi, flag, false);
6f2d8ed6 1097 f2fs_balance_fs(sbi, dn.node_changed);
eb47b800 1098 }
46008c6d 1099 goto next_dnode;
7df3a431 1100
bfad7c2d 1101sync_out:
c4020b2d
CY
1102 if (flag == F2FS_GET_BLOCK_PRECACHE) {
1103 if (map->m_flags & F2FS_MAP_MAPPED) {
1104 unsigned int ofs = start_pgofs - map->m_lblk;
1105
1106 f2fs_update_extent_cache_range(&dn,
1107 start_pgofs, map->m_pblk + ofs,
1108 map->m_len - ofs);
1109 }
1110 if (map->m_next_extent)
1111 *map->m_next_extent = pgofs + 1;
1112 }
eb47b800 1113 f2fs_put_dnode(&dn);
bfad7c2d 1114unlock_out:
2a340760 1115 if (create) {
59c9081b 1116 __do_map_lock(sbi, flag, false);
6f2d8ed6 1117 f2fs_balance_fs(sbi, dn.node_changed);
2a340760 1118 }
bfad7c2d 1119out:
003a3e1d 1120 trace_f2fs_map_blocks(inode, map, err);
bfad7c2d 1121 return err;
eb47b800
JK
1122}
1123
003a3e1d 1124static int __get_data_block(struct inode *inode, sector_t iblock,
da85985c 1125 struct buffer_head *bh, int create, int flag,
d5097be5 1126 pgoff_t *next_pgofs, int seg_type)
003a3e1d
JK
1127{
1128 struct f2fs_map_blocks map;
a7de6086 1129 int err;
003a3e1d
JK
1130
1131 map.m_lblk = iblock;
1132 map.m_len = bh->b_size >> inode->i_blkbits;
da85985c 1133 map.m_next_pgofs = next_pgofs;
c4020b2d 1134 map.m_next_extent = NULL;
d5097be5 1135 map.m_seg_type = seg_type;
003a3e1d 1136
a7de6086
JK
1137 err = f2fs_map_blocks(inode, &map, create, flag);
1138 if (!err) {
003a3e1d
JK
1139 map_bh(bh, inode->i_sb, map.m_pblk);
1140 bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags;
b86e3307 1141 bh->b_size = (u64)map.m_len << inode->i_blkbits;
003a3e1d 1142 }
a7de6086 1143 return err;
003a3e1d
JK
1144}
1145
ccfb3000 1146static int get_data_block(struct inode *inode, sector_t iblock,
da85985c
CY
1147 struct buffer_head *bh_result, int create, int flag,
1148 pgoff_t *next_pgofs)
e2b4e2bc 1149{
da85985c 1150 return __get_data_block(inode, iblock, bh_result, create,
d5097be5
HL
1151 flag, next_pgofs,
1152 NO_CHECK_TYPE);
e2b4e2bc
CY
1153}
1154
1155static int get_data_block_dio(struct inode *inode, sector_t iblock,
ccfb3000
JK
1156 struct buffer_head *bh_result, int create)
1157{
e2b4e2bc 1158 return __get_data_block(inode, iblock, bh_result, create,
d5097be5
HL
1159 F2FS_GET_BLOCK_DEFAULT, NULL,
1160 rw_hint_to_seg_type(
1161 inode->i_write_hint));
ccfb3000
JK
1162}
1163
e2b4e2bc 1164static int get_data_block_bmap(struct inode *inode, sector_t iblock,
ccfb3000
JK
1165 struct buffer_head *bh_result, int create)
1166{
179448bf 1167 /* Block number less than F2FS MAX BLOCKS */
e0afc4d6 1168 if (unlikely(iblock >= F2FS_I_SB(inode)->max_file_blocks))
179448bf
YH
1169 return -EFBIG;
1170
e2b4e2bc 1171 return __get_data_block(inode, iblock, bh_result, create,
d5097be5
HL
1172 F2FS_GET_BLOCK_BMAP, NULL,
1173 NO_CHECK_TYPE);
ccfb3000
JK
1174}
1175
7f63eb77
JK
1176static inline sector_t logical_to_blk(struct inode *inode, loff_t offset)
1177{
1178 return (offset >> inode->i_blkbits);
1179}
1180
1181static inline loff_t blk_to_logical(struct inode *inode, sector_t blk)
1182{
1183 return (blk << inode->i_blkbits);
1184}
1185
442a9dbd
CY
1186static int f2fs_xattr_fiemap(struct inode *inode,
1187 struct fiemap_extent_info *fieinfo)
1188{
1189 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1190 struct page *page;
1191 struct node_info ni;
1192 __u64 phys = 0, len;
1193 __u32 flags;
1194 nid_t xnid = F2FS_I(inode)->i_xattr_nid;
1195 int err = 0;
1196
1197 if (f2fs_has_inline_xattr(inode)) {
1198 int offset;
1199
1200 page = f2fs_grab_cache_page(NODE_MAPPING(sbi),
1201 inode->i_ino, false);
1202 if (!page)
1203 return -ENOMEM;
1204
1205 get_node_info(sbi, inode->i_ino, &ni);
1206
1207 phys = (__u64)blk_to_logical(inode, ni.blk_addr);
1208 offset = offsetof(struct f2fs_inode, i_addr) +
1209 sizeof(__le32) * (DEF_ADDRS_PER_INODE -
b323fd28 1210 get_inline_xattr_addrs(inode));
442a9dbd
CY
1211
1212 phys += offset;
1213 len = inline_xattr_size(inode);
1214
1215 f2fs_put_page(page, 1);
1216
1217 flags = FIEMAP_EXTENT_DATA_INLINE | FIEMAP_EXTENT_NOT_ALIGNED;
1218
1219 if (!xnid)
1220 flags |= FIEMAP_EXTENT_LAST;
1221
1222 err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags);
1223 if (err || err == 1)
1224 return err;
1225 }
1226
1227 if (xnid) {
1228 page = f2fs_grab_cache_page(NODE_MAPPING(sbi), xnid, false);
1229 if (!page)
1230 return -ENOMEM;
1231
1232 get_node_info(sbi, xnid, &ni);
1233
1234 phys = (__u64)blk_to_logical(inode, ni.blk_addr);
1235 len = inode->i_sb->s_blocksize;
1236
1237 f2fs_put_page(page, 1);
1238
1239 flags = FIEMAP_EXTENT_LAST;
1240 }
1241
1242 if (phys)
1243 err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags);
1244
1245 return (err < 0 ? err : 0);
1246}
1247
9ab70134
JK
1248int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
1249 u64 start, u64 len)
1250{
7f63eb77
JK
1251 struct buffer_head map_bh;
1252 sector_t start_blk, last_blk;
da85985c 1253 pgoff_t next_pgofs;
7f63eb77
JK
1254 u64 logical = 0, phys = 0, size = 0;
1255 u32 flags = 0;
7f63eb77
JK
1256 int ret = 0;
1257
c4020b2d
CY
1258 if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) {
1259 ret = f2fs_precache_extents(inode);
1260 if (ret)
1261 return ret;
1262 }
1263
442a9dbd 1264 ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC | FIEMAP_FLAG_XATTR);
7f63eb77
JK
1265 if (ret)
1266 return ret;
1267
f1b43d4c
CY
1268 inode_lock(inode);
1269
442a9dbd
CY
1270 if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) {
1271 ret = f2fs_xattr_fiemap(inode, fieinfo);
1272 goto out;
1273 }
1274
67f8cf3c
JK
1275 if (f2fs_has_inline_data(inode)) {
1276 ret = f2fs_inline_data_fiemap(inode, fieinfo, start, len);
1277 if (ret != -EAGAIN)
f1b43d4c 1278 goto out;
67f8cf3c
JK
1279 }
1280
7f63eb77
JK
1281 if (logical_to_blk(inode, len) == 0)
1282 len = blk_to_logical(inode, 1);
1283
1284 start_blk = logical_to_blk(inode, start);
1285 last_blk = logical_to_blk(inode, start + len - 1);
9a950d52 1286
7f63eb77
JK
1287next:
1288 memset(&map_bh, 0, sizeof(struct buffer_head));
1289 map_bh.b_size = len;
1290
e2b4e2bc 1291 ret = get_data_block(inode, start_blk, &map_bh, 0,
da85985c 1292 F2FS_GET_BLOCK_FIEMAP, &next_pgofs);
7f63eb77
JK
1293 if (ret)
1294 goto out;
1295
1296 /* HOLE */
1297 if (!buffer_mapped(&map_bh)) {
da85985c 1298 start_blk = next_pgofs;
58736fa6
CY
1299
1300 if (blk_to_logical(inode, start_blk) < blk_to_logical(inode,
1301 F2FS_I_SB(inode)->max_file_blocks))
9a950d52 1302 goto prep_next;
58736fa6 1303
9a950d52
FL
1304 flags |= FIEMAP_EXTENT_LAST;
1305 }
7f63eb77 1306
da5af127
CY
1307 if (size) {
1308 if (f2fs_encrypted_inode(inode))
1309 flags |= FIEMAP_EXTENT_DATA_ENCRYPTED;
1310
9a950d52
FL
1311 ret = fiemap_fill_next_extent(fieinfo, logical,
1312 phys, size, flags);
da5af127 1313 }
7f63eb77 1314
9a950d52
FL
1315 if (start_blk > last_blk || ret)
1316 goto out;
7f63eb77 1317
9a950d52
FL
1318 logical = blk_to_logical(inode, start_blk);
1319 phys = blk_to_logical(inode, map_bh.b_blocknr);
1320 size = map_bh.b_size;
1321 flags = 0;
1322 if (buffer_unwritten(&map_bh))
1323 flags = FIEMAP_EXTENT_UNWRITTEN;
7f63eb77 1324
9a950d52 1325 start_blk += logical_to_blk(inode, size);
7f63eb77 1326
9a950d52 1327prep_next:
7f63eb77
JK
1328 cond_resched();
1329 if (fatal_signal_pending(current))
1330 ret = -EINTR;
1331 else
1332 goto next;
1333out:
1334 if (ret == 1)
1335 ret = 0;
1336
5955102c 1337 inode_unlock(inode);
7f63eb77 1338 return ret;
9ab70134
JK
1339}
1340
f1e88660
JK
1341/*
1342 * This function was originally taken from fs/mpage.c, and customized for f2fs.
1343 * Major change was from block_size == page_size in f2fs by default.
1344 */
1345static int f2fs_mpage_readpages(struct address_space *mapping,
1346 struct list_head *pages, struct page *page,
1347 unsigned nr_pages)
1348{
1349 struct bio *bio = NULL;
f1e88660
JK
1350 sector_t last_block_in_bio = 0;
1351 struct inode *inode = mapping->host;
1352 const unsigned blkbits = inode->i_blkbits;
1353 const unsigned blocksize = 1 << blkbits;
1354 sector_t block_in_file;
1355 sector_t last_block;
1356 sector_t last_block_in_file;
1357 sector_t block_nr;
f1e88660
JK
1358 struct f2fs_map_blocks map;
1359
1360 map.m_pblk = 0;
1361 map.m_lblk = 0;
1362 map.m_len = 0;
1363 map.m_flags = 0;
da85985c 1364 map.m_next_pgofs = NULL;
c4020b2d 1365 map.m_next_extent = NULL;
d5097be5 1366 map.m_seg_type = NO_CHECK_TYPE;
f1e88660 1367
736c0a74 1368 for (; nr_pages; nr_pages--) {
f1e88660 1369 if (pages) {
939afa94 1370 page = list_last_entry(pages, struct page, lru);
a83d50bc
KM
1371
1372 prefetchw(&page->flags);
f1e88660
JK
1373 list_del(&page->lru);
1374 if (add_to_page_cache_lru(page, mapping,
8a5c743e
MH
1375 page->index,
1376 readahead_gfp_mask(mapping)))
f1e88660
JK
1377 goto next_page;
1378 }
1379
1380 block_in_file = (sector_t)page->index;
1381 last_block = block_in_file + nr_pages;
1382 last_block_in_file = (i_size_read(inode) + blocksize - 1) >>
1383 blkbits;
1384 if (last_block > last_block_in_file)
1385 last_block = last_block_in_file;
1386
1387 /*
1388 * Map blocks using the previous result first.
1389 */
1390 if ((map.m_flags & F2FS_MAP_MAPPED) &&
1391 block_in_file > map.m_lblk &&
1392 block_in_file < (map.m_lblk + map.m_len))
1393 goto got_it;
1394
1395 /*
1396 * Then do more f2fs_map_blocks() calls until we are
1397 * done with this page.
1398 */
1399 map.m_flags = 0;
1400
1401 if (block_in_file < last_block) {
1402 map.m_lblk = block_in_file;
1403 map.m_len = last_block - block_in_file;
1404
46c9e141 1405 if (f2fs_map_blocks(inode, &map, 0,
f2220c7f 1406 F2FS_GET_BLOCK_DEFAULT))
f1e88660
JK
1407 goto set_error_page;
1408 }
1409got_it:
1410 if ((map.m_flags & F2FS_MAP_MAPPED)) {
1411 block_nr = map.m_pblk + block_in_file - map.m_lblk;
1412 SetPageMappedToDisk(page);
1413
1414 if (!PageUptodate(page) && !cleancache_get_page(page)) {
1415 SetPageUptodate(page);
1416 goto confused;
1417 }
1418 } else {
09cbfeaf 1419 zero_user_segment(page, 0, PAGE_SIZE);
237c0790
JK
1420 if (!PageUptodate(page))
1421 SetPageUptodate(page);
f1e88660
JK
1422 unlock_page(page);
1423 goto next_page;
1424 }
1425
1426 /*
1427 * This page will go to BIO. Do we need to send this
1428 * BIO off first?
1429 */
3c62be17
JK
1430 if (bio && (last_block_in_bio != block_nr - 1 ||
1431 !__same_bdev(F2FS_I_SB(inode), block_nr, bio))) {
f1e88660 1432submit_and_realloc:
4fc29c1a 1433 __submit_bio(F2FS_I_SB(inode), bio, DATA);
f1e88660
JK
1434 bio = NULL;
1435 }
1436 if (bio == NULL) {
13ba41e3 1437 bio = f2fs_grab_read_bio(inode, block_nr, nr_pages);
1d353eb7
JK
1438 if (IS_ERR(bio)) {
1439 bio = NULL;
f1e88660 1440 goto set_error_page;
4375a336 1441 }
f1e88660
JK
1442 }
1443
1444 if (bio_add_page(bio, page, blocksize, 0) < blocksize)
1445 goto submit_and_realloc;
1446
1447 last_block_in_bio = block_nr;
1448 goto next_page;
1449set_error_page:
1450 SetPageError(page);
09cbfeaf 1451 zero_user_segment(page, 0, PAGE_SIZE);
f1e88660
JK
1452 unlock_page(page);
1453 goto next_page;
1454confused:
1455 if (bio) {
4fc29c1a 1456 __submit_bio(F2FS_I_SB(inode), bio, DATA);
f1e88660
JK
1457 bio = NULL;
1458 }
1459 unlock_page(page);
1460next_page:
1461 if (pages)
09cbfeaf 1462 put_page(page);
f1e88660
JK
1463 }
1464 BUG_ON(pages && !list_empty(pages));
1465 if (bio)
4fc29c1a 1466 __submit_bio(F2FS_I_SB(inode), bio, DATA);
f1e88660
JK
1467 return 0;
1468}
1469
eb47b800
JK
1470static int f2fs_read_data_page(struct file *file, struct page *page)
1471{
9ffe0fb5 1472 struct inode *inode = page->mapping->host;
b3d208f9 1473 int ret = -EAGAIN;
9ffe0fb5 1474
c20e89cd
CY
1475 trace_f2fs_readpage(page, DATA);
1476
e1c42045 1477 /* If the file has inline data, try to read it directly */
9ffe0fb5
HL
1478 if (f2fs_has_inline_data(inode))
1479 ret = f2fs_read_inline_data(inode, page);
b3d208f9 1480 if (ret == -EAGAIN)
f1e88660 1481 ret = f2fs_mpage_readpages(page->mapping, NULL, page, 1);
9ffe0fb5 1482 return ret;
eb47b800
JK
1483}
1484
1485static int f2fs_read_data_pages(struct file *file,
1486 struct address_space *mapping,
1487 struct list_head *pages, unsigned nr_pages)
1488{
71cb4aff 1489 struct inode *inode = mapping->host;
939afa94 1490 struct page *page = list_last_entry(pages, struct page, lru);
b8c29400
CY
1491
1492 trace_f2fs_readpages(inode, page, nr_pages);
9ffe0fb5
HL
1493
1494 /* If the file has inline data, skip readpages */
1495 if (f2fs_has_inline_data(inode))
1496 return 0;
1497
f1e88660 1498 return f2fs_mpage_readpages(mapping, pages, NULL, nr_pages);
eb47b800
JK
1499}
1500
7eab0c0d
HP
1501static int encrypt_one_page(struct f2fs_io_info *fio)
1502{
1503 struct inode *inode = fio->page->mapping->host;
1504 gfp_t gfp_flags = GFP_NOFS;
1505
1958593e 1506 if (!f2fs_encrypted_file(inode))
7eab0c0d
HP
1507 return 0;
1508
1509 /* wait for GCed encrypted page writeback */
d4c759ee 1510 f2fs_wait_on_block_writeback(fio->sbi, fio->old_blkaddr);
7eab0c0d
HP
1511
1512retry_encrypt:
1513 fio->encrypted_page = fscrypt_encrypt_page(inode, fio->page,
1514 PAGE_SIZE, 0, fio->page->index, gfp_flags);
1515 if (!IS_ERR(fio->encrypted_page))
1516 return 0;
1517
1518 /* flush pending IOs and wait for a while in the ENOMEM case */
1519 if (PTR_ERR(fio->encrypted_page) == -ENOMEM) {
b9109b0e 1520 f2fs_flush_merged_writes(fio->sbi);
7eab0c0d
HP
1521 congestion_wait(BLK_RW_ASYNC, HZ/50);
1522 gfp_flags |= __GFP_NOFAIL;
1523 goto retry_encrypt;
1524 }
1525 return PTR_ERR(fio->encrypted_page);
1526}
1527
bb9e3bb8
CY
1528static inline bool check_inplace_update_policy(struct inode *inode,
1529 struct f2fs_io_info *fio)
7eab0c0d 1530{
bb9e3bb8
CY
1531 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1532 unsigned int policy = SM_I(sbi)->ipu_policy;
7eab0c0d 1533
bb9e3bb8
CY
1534 if (policy & (0x1 << F2FS_IPU_FORCE))
1535 return true;
1536 if (policy & (0x1 << F2FS_IPU_SSR) && need_SSR(sbi))
1537 return true;
1538 if (policy & (0x1 << F2FS_IPU_UTIL) &&
1539 utilization(sbi) > SM_I(sbi)->min_ipu_util)
1540 return true;
1541 if (policy & (0x1 << F2FS_IPU_SSR_UTIL) && need_SSR(sbi) &&
1542 utilization(sbi) > SM_I(sbi)->min_ipu_util)
1543 return true;
1544
1545 /*
1546 * IPU for rewrite async pages
1547 */
1548 if (policy & (0x1 << F2FS_IPU_ASYNC) &&
1549 fio && fio->op == REQ_OP_WRITE &&
1550 !(fio->op_flags & REQ_SYNC) &&
1551 !f2fs_encrypted_inode(inode))
1552 return true;
1553
1554 /* this is only set during fdatasync */
1555 if (policy & (0x1 << F2FS_IPU_FSYNC) &&
1556 is_inode_flag_set(inode, FI_NEED_IPU))
1557 return true;
1558
1559 return false;
1560}
1561
1562bool should_update_inplace(struct inode *inode, struct f2fs_io_info *fio)
1563{
1ad71a27
JK
1564 if (f2fs_is_pinned_file(inode))
1565 return true;
bb9e3bb8
CY
1566
1567 /* if this is cold file, we should overwrite to avoid fragmentation */
1568 if (file_is_cold(inode))
1569 return true;
1570
1571 return check_inplace_update_policy(inode, fio);
1572}
1573
1574bool should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
1575{
1576 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1577
1578 if (test_opt(sbi, LFS))
1579 return true;
1580 if (S_ISDIR(inode->i_mode))
1581 return true;
1582 if (f2fs_is_atomic_file(inode))
1583 return true;
1584 if (fio) {
1585 if (is_cold_data(fio->page))
1586 return true;
1587 if (IS_ATOMIC_WRITTEN_PAGE(fio->page))
1588 return true;
1589 }
1590 return false;
1591}
1592
7eab0c0d
HP
1593static inline bool need_inplace_update(struct f2fs_io_info *fio)
1594{
1595 struct inode *inode = fio->page->mapping->host;
1596
bb9e3bb8 1597 if (should_update_outplace(inode, fio))
7eab0c0d
HP
1598 return false;
1599
bb9e3bb8 1600 return should_update_inplace(inode, fio);
7eab0c0d
HP
1601}
1602
a817737e
JK
1603static inline bool valid_ipu_blkaddr(struct f2fs_io_info *fio)
1604{
1605 if (fio->old_blkaddr == NEW_ADDR)
1606 return false;
1607 if (fio->old_blkaddr == NULL_ADDR)
1608 return false;
1609 return true;
1610}
1611
05ca3632 1612int do_write_data_page(struct f2fs_io_info *fio)
eb47b800 1613{
05ca3632 1614 struct page *page = fio->page;
eb47b800 1615 struct inode *inode = page->mapping->host;
eb47b800 1616 struct dnode_of_data dn;
e959c8f5
HP
1617 struct extent_info ei = {0,0,0};
1618 bool ipu_force = false;
eb47b800
JK
1619 int err = 0;
1620
1621 set_new_dnode(&dn, inode, NULL, NULL, 0);
e959c8f5
HP
1622 if (need_inplace_update(fio) &&
1623 f2fs_lookup_extent_cache(inode, page->index, &ei)) {
1624 fio->old_blkaddr = ei.blk + page->index - ei.fofs;
a817737e
JK
1625
1626 if (valid_ipu_blkaddr(fio)) {
e959c8f5 1627 ipu_force = true;
cc15620b 1628 fio->need_lock = LOCK_DONE;
e959c8f5
HP
1629 goto got_it;
1630 }
1631 }
279d6df2 1632
d29460e5
JK
1633 /* Deadlock due to between page->lock and f2fs_lock_op */
1634 if (fio->need_lock == LOCK_REQ && !f2fs_trylock_op(fio->sbi))
1635 return -EAGAIN;
279d6df2 1636
266e97a8 1637 err = get_dnode_of_data(&dn, page->index, LOOKUP_NODE);
eb47b800 1638 if (err)
279d6df2 1639 goto out;
eb47b800 1640
28bc106b 1641 fio->old_blkaddr = dn.data_blkaddr;
eb47b800
JK
1642
1643 /* This page is already truncated */
7a9d7548 1644 if (fio->old_blkaddr == NULL_ADDR) {
2bca1e23 1645 ClearPageUptodate(page);
eb47b800 1646 goto out_writepage;
2bca1e23 1647 }
e959c8f5 1648got_it:
eb47b800
JK
1649 /*
1650 * If current allocation needs SSR,
1651 * it had better in-place writes for updated data.
1652 */
a817737e 1653 if (ipu_force || (valid_ipu_blkaddr(fio) && need_inplace_update(fio))) {
cc15620b
JK
1654 err = encrypt_one_page(fio);
1655 if (err)
1656 goto out_writepage;
1657
1658 set_page_writeback(page);
279d6df2 1659 f2fs_put_dnode(&dn);
cc15620b 1660 if (fio->need_lock == LOCK_REQ)
279d6df2 1661 f2fs_unlock_op(fio->sbi);
d1b3e72d 1662 err = rewrite_data_page(fio);
7eab0c0d 1663 trace_f2fs_do_write_data_page(fio->page, IPU);
91942321 1664 set_inode_flag(inode, FI_UPDATE_WRITE);
279d6df2 1665 return err;
eb47b800 1666 }
279d6df2 1667
cc15620b
JK
1668 if (fio->need_lock == LOCK_RETRY) {
1669 if (!f2fs_trylock_op(fio->sbi)) {
1670 err = -EAGAIN;
1671 goto out_writepage;
1672 }
1673 fio->need_lock = LOCK_REQ;
1674 }
1675
1676 err = encrypt_one_page(fio);
1677 if (err)
1678 goto out_writepage;
1679
1680 set_page_writeback(page);
1681
279d6df2
HP
1682 /* LFS mode write path */
1683 write_data_page(&dn, fio);
1684 trace_f2fs_do_write_data_page(page, OPU);
1685 set_inode_flag(inode, FI_APPEND_WRITE);
1686 if (page->index == 0)
1687 set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN);
eb47b800
JK
1688out_writepage:
1689 f2fs_put_dnode(&dn);
279d6df2 1690out:
cc15620b 1691 if (fio->need_lock == LOCK_REQ)
279d6df2 1692 f2fs_unlock_op(fio->sbi);
eb47b800
JK
1693 return err;
1694}
1695
d68f735b 1696static int __write_data_page(struct page *page, bool *submitted,
b0af6d49
CY
1697 struct writeback_control *wbc,
1698 enum iostat_type io_type)
eb47b800
JK
1699{
1700 struct inode *inode = page->mapping->host;
4081363f 1701 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
eb47b800
JK
1702 loff_t i_size = i_size_read(inode);
1703 const pgoff_t end_index = ((unsigned long long) i_size)
09cbfeaf 1704 >> PAGE_SHIFT;
26de9b11 1705 loff_t psize = (page->index + 1) << PAGE_SHIFT;
9ffe0fb5 1706 unsigned offset = 0;
39936837 1707 bool need_balance_fs = false;
eb47b800 1708 int err = 0;
458e6197 1709 struct f2fs_io_info fio = {
05ca3632 1710 .sbi = sbi,
39d787be 1711 .ino = inode->i_ino,
458e6197 1712 .type = DATA,
04d328de 1713 .op = REQ_OP_WRITE,
7637241e 1714 .op_flags = wbc_to_write_flags(wbc),
e959c8f5 1715 .old_blkaddr = NULL_ADDR,
05ca3632 1716 .page = page,
4375a336 1717 .encrypted_page = NULL,
d68f735b 1718 .submitted = false,
cc15620b 1719 .need_lock = LOCK_RETRY,
b0af6d49 1720 .io_type = io_type,
578c6478 1721 .io_wbc = wbc,
458e6197 1722 };
eb47b800 1723
ecda0de3
CY
1724 trace_f2fs_writepage(page, DATA);
1725
db198ae0
CY
1726 /* we should bypass data pages to proceed the kworkder jobs */
1727 if (unlikely(f2fs_cp_error(sbi))) {
1728 mapping_set_error(page->mapping, -EIO);
1729 goto out;
1730 }
1731
0771fcc7
CY
1732 if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
1733 goto redirty_out;
1734
eb47b800 1735 if (page->index < end_index)
39936837 1736 goto write;
eb47b800
JK
1737
1738 /*
1739 * If the offset is out-of-range of file size,
1740 * this page does not have to be written to disk.
1741 */
09cbfeaf 1742 offset = i_size & (PAGE_SIZE - 1);
76f60268 1743 if ((page->index >= end_index + 1) || !offset)
39936837 1744 goto out;
eb47b800 1745
09cbfeaf 1746 zero_user_segment(page, offset, PAGE_SIZE);
39936837 1747write:
1e84371f
JK
1748 if (f2fs_is_drop_cache(inode))
1749 goto out;
e6e5f561
JK
1750 /* we should not write 0'th page having journal header */
1751 if (f2fs_is_volatile_file(inode) && (!page->index ||
1752 (!wbc->for_reclaim &&
1753 available_free_memory(sbi, BASE_CHECK))))
1e84371f 1754 goto redirty_out;
eb47b800 1755
39936837 1756 /* Dentry blocks are controlled by checkpoint */
eb47b800 1757 if (S_ISDIR(inode->i_mode)) {
cc15620b 1758 fio.need_lock = LOCK_DONE;
05ca3632 1759 err = do_write_data_page(&fio);
8618b881
JK
1760 goto done;
1761 }
9ffe0fb5 1762
8618b881 1763 if (!wbc->for_reclaim)
39936837 1764 need_balance_fs = true;
7f3037a5 1765 else if (has_not_enough_free_secs(sbi, 0, 0))
39936837 1766 goto redirty_out;
ef095d19
JK
1767 else
1768 set_inode_flag(inode, FI_HOT_DATA);
eb47b800 1769
b3d208f9 1770 err = -EAGAIN;
dd7b2333 1771 if (f2fs_has_inline_data(inode)) {
b3d208f9 1772 err = f2fs_write_inline_data(inode, page);
dd7b2333
YH
1773 if (!err)
1774 goto out;
1775 }
279d6df2 1776
cc15620b 1777 if (err == -EAGAIN) {
05ca3632 1778 err = do_write_data_page(&fio);
cc15620b
JK
1779 if (err == -EAGAIN) {
1780 fio.need_lock = LOCK_REQ;
1781 err = do_write_data_page(&fio);
1782 }
1783 }
a0d00fad 1784
eb449797
CY
1785 if (err) {
1786 file_set_keep_isize(inode);
1787 } else {
1788 down_write(&F2FS_I(inode)->i_sem);
1789 if (F2FS_I(inode)->last_disk_size < psize)
1790 F2FS_I(inode)->last_disk_size = psize;
1791 up_write(&F2FS_I(inode)->i_sem);
1792 }
279d6df2 1793
8618b881
JK
1794done:
1795 if (err && err != -ENOENT)
1796 goto redirty_out;
eb47b800 1797
39936837 1798out:
a7ffdbe2 1799 inode_dec_dirty_pages(inode);
2bca1e23
JK
1800 if (err)
1801 ClearPageUptodate(page);
0c3a5797
CY
1802
1803 if (wbc->for_reclaim) {
b9109b0e 1804 f2fs_submit_merged_write_cond(sbi, inode, 0, page->index, DATA);
ef095d19 1805 clear_inode_flag(inode, FI_HOT_DATA);
0c3a5797 1806 remove_dirty_inode(inode);
d68f735b 1807 submitted = NULL;
0c3a5797
CY
1808 }
1809
eb47b800 1810 unlock_page(page);
a7881893
JK
1811 if (!S_ISDIR(inode->i_mode))
1812 f2fs_balance_fs(sbi, need_balance_fs);
0c3a5797 1813
d68f735b 1814 if (unlikely(f2fs_cp_error(sbi))) {
b9109b0e 1815 f2fs_submit_merged_write(sbi, DATA);
d68f735b
JK
1816 submitted = NULL;
1817 }
1818
1819 if (submitted)
1820 *submitted = fio.submitted;
0c3a5797 1821
eb47b800
JK
1822 return 0;
1823
eb47b800 1824redirty_out:
76f60268 1825 redirty_page_for_writepage(wbc, page);
0002b61b
CY
1826 if (!err)
1827 return AOP_WRITEPAGE_ACTIVATE;
b230e6ca
JK
1828 unlock_page(page);
1829 return err;
fa9150a8
NJ
1830}
1831
f566bae8
JK
1832static int f2fs_write_data_page(struct page *page,
1833 struct writeback_control *wbc)
1834{
b0af6d49 1835 return __write_data_page(page, NULL, wbc, FS_DATA_IO);
f566bae8
JK
1836}
1837
8f46dcae
CY
1838/*
1839 * This function was copied from write_cche_pages from mm/page-writeback.c.
1840 * The major change is making write step of cold data page separately from
1841 * warm/hot data page.
1842 */
1843static int f2fs_write_cache_pages(struct address_space *mapping,
b0af6d49
CY
1844 struct writeback_control *wbc,
1845 enum iostat_type io_type)
8f46dcae
CY
1846{
1847 int ret = 0;
1848 int done = 0;
1849 struct pagevec pvec;
1850 int nr_pages;
1851 pgoff_t uninitialized_var(writeback_index);
1852 pgoff_t index;
1853 pgoff_t end; /* Inclusive */
1854 pgoff_t done_index;
942fd319 1855 pgoff_t last_idx = ULONG_MAX;
8f46dcae
CY
1856 int cycled;
1857 int range_whole = 0;
1858 int tag;
8f46dcae 1859
86679820 1860 pagevec_init(&pvec);
46ae957f 1861
ef095d19
JK
1862 if (get_dirty_pages(mapping->host) <=
1863 SM_I(F2FS_M_SB(mapping))->min_hot_blocks)
1864 set_inode_flag(mapping->host, FI_HOT_DATA);
1865 else
1866 clear_inode_flag(mapping->host, FI_HOT_DATA);
1867
8f46dcae
CY
1868 if (wbc->range_cyclic) {
1869 writeback_index = mapping->writeback_index; /* prev offset */
1870 index = writeback_index;
1871 if (index == 0)
1872 cycled = 1;
1873 else
1874 cycled = 0;
1875 end = -1;
1876 } else {
09cbfeaf
KS
1877 index = wbc->range_start >> PAGE_SHIFT;
1878 end = wbc->range_end >> PAGE_SHIFT;
8f46dcae
CY
1879 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
1880 range_whole = 1;
1881 cycled = 1; /* ignore range_cyclic tests */
1882 }
1883 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
1884 tag = PAGECACHE_TAG_TOWRITE;
1885 else
1886 tag = PAGECACHE_TAG_DIRTY;
1887retry:
1888 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
1889 tag_pages_for_writeback(mapping, index, end);
1890 done_index = index;
1891 while (!done && (index <= end)) {
1892 int i;
1893
69c4f35d 1894 nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end,
67fd707f 1895 tag);
8f46dcae
CY
1896 if (nr_pages == 0)
1897 break;
1898
1899 for (i = 0; i < nr_pages; i++) {
1900 struct page *page = pvec.pages[i];
d68f735b 1901 bool submitted = false;
8f46dcae 1902
8f46dcae 1903 done_index = page->index;
d29460e5 1904retry_write:
8f46dcae
CY
1905 lock_page(page);
1906
1907 if (unlikely(page->mapping != mapping)) {
1908continue_unlock:
1909 unlock_page(page);
1910 continue;
1911 }
1912
1913 if (!PageDirty(page)) {
1914 /* someone wrote it for us */
1915 goto continue_unlock;
1916 }
1917
8f46dcae
CY
1918 if (PageWriteback(page)) {
1919 if (wbc->sync_mode != WB_SYNC_NONE)
fec1d657
JK
1920 f2fs_wait_on_page_writeback(page,
1921 DATA, true);
8f46dcae
CY
1922 else
1923 goto continue_unlock;
1924 }
1925
1926 BUG_ON(PageWriteback(page));
1927 if (!clear_page_dirty_for_io(page))
1928 goto continue_unlock;
1929
b0af6d49 1930 ret = __write_data_page(page, &submitted, wbc, io_type);
8f46dcae 1931 if (unlikely(ret)) {
0002b61b
CY
1932 /*
1933 * keep nr_to_write, since vfs uses this to
1934 * get # of written pages.
1935 */
1936 if (ret == AOP_WRITEPAGE_ACTIVATE) {
1937 unlock_page(page);
1938 ret = 0;
1939 continue;
d29460e5
JK
1940 } else if (ret == -EAGAIN) {
1941 ret = 0;
1942 if (wbc->sync_mode == WB_SYNC_ALL) {
1943 cond_resched();
1944 congestion_wait(BLK_RW_ASYNC,
1945 HZ/50);
1946 goto retry_write;
1947 }
1948 continue;
0002b61b 1949 }
b230e6ca
JK
1950 done_index = page->index + 1;
1951 done = 1;
1952 break;
d68f735b 1953 } else if (submitted) {
942fd319 1954 last_idx = page->index;
8f46dcae
CY
1955 }
1956
687de7f1
JK
1957 /* give a priority to WB_SYNC threads */
1958 if ((atomic_read(&F2FS_M_SB(mapping)->wb_sync_req) ||
1959 --wbc->nr_to_write <= 0) &&
1960 wbc->sync_mode == WB_SYNC_NONE) {
8f46dcae
CY
1961 done = 1;
1962 break;
1963 }
1964 }
1965 pagevec_release(&pvec);
1966 cond_resched();
1967 }
1968
8f46dcae
CY
1969 if (!cycled && !done) {
1970 cycled = 1;
1971 index = 0;
1972 end = writeback_index - 1;
1973 goto retry;
1974 }
1975 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
1976 mapping->writeback_index = done_index;
1977
942fd319 1978 if (last_idx != ULONG_MAX)
b9109b0e
JK
1979 f2fs_submit_merged_write_cond(F2FS_M_SB(mapping), mapping->host,
1980 0, last_idx, DATA);
6ca56ca4 1981
8f46dcae
CY
1982 return ret;
1983}
1984
b0af6d49
CY
1985int __f2fs_write_data_pages(struct address_space *mapping,
1986 struct writeback_control *wbc,
1987 enum iostat_type io_type)
eb47b800
JK
1988{
1989 struct inode *inode = mapping->host;
4081363f 1990 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
9dfa1baf 1991 struct blk_plug plug;
eb47b800 1992 int ret;
eb47b800 1993
cfb185a1 1994 /* deal with chardevs and other special file */
1995 if (!mapping->a_ops->writepage)
1996 return 0;
1997
6a290544
CY
1998 /* skip writing if there is no dirty page in this inode */
1999 if (!get_dirty_pages(inode) && wbc->sync_mode == WB_SYNC_NONE)
2000 return 0;
2001
0771fcc7
CY
2002 /* during POR, we don't need to trigger writepage at all. */
2003 if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
2004 goto skip_write;
2005
a1257023
JK
2006 if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE &&
2007 get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) &&
2008 available_free_memory(sbi, DIRTY_DENTS))
2009 goto skip_write;
2010
d323d005 2011 /* skip writing during file defragment */
91942321 2012 if (is_inode_flag_set(inode, FI_DO_DEFRAG))
d323d005
CY
2013 goto skip_write;
2014
d31c7c3f
YH
2015 trace_f2fs_writepages(mapping->host, wbc, DATA);
2016
687de7f1
JK
2017 /* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */
2018 if (wbc->sync_mode == WB_SYNC_ALL)
2019 atomic_inc(&sbi->wb_sync_req);
2020 else if (atomic_read(&sbi->wb_sync_req))
2021 goto skip_write;
2022
9dfa1baf 2023 blk_start_plug(&plug);
b0af6d49 2024 ret = f2fs_write_cache_pages(mapping, wbc, io_type);
9dfa1baf 2025 blk_finish_plug(&plug);
687de7f1
JK
2026
2027 if (wbc->sync_mode == WB_SYNC_ALL)
2028 atomic_dec(&sbi->wb_sync_req);
28ea6162
JK
2029 /*
2030 * if some pages were truncated, we cannot guarantee its mapping->host
2031 * to detect pending bios.
2032 */
458e6197 2033
c227f912 2034 remove_dirty_inode(inode);
eb47b800 2035 return ret;
d3baf95d
JK
2036
2037skip_write:
a7ffdbe2 2038 wbc->pages_skipped += get_dirty_pages(inode);
d31c7c3f 2039 trace_f2fs_writepages(mapping->host, wbc, DATA);
d3baf95d 2040 return 0;
eb47b800
JK
2041}
2042
b0af6d49
CY
2043static int f2fs_write_data_pages(struct address_space *mapping,
2044 struct writeback_control *wbc)
2045{
2046 struct inode *inode = mapping->host;
2047
2048 return __f2fs_write_data_pages(mapping, wbc,
2049 F2FS_I(inode)->cp_task == current ?
2050 FS_CP_DATA_IO : FS_DATA_IO);
2051}
2052
3aab8f82
CY
2053static void f2fs_write_failed(struct address_space *mapping, loff_t to)
2054{
2055 struct inode *inode = mapping->host;
819d9153 2056 loff_t i_size = i_size_read(inode);
3aab8f82 2057
819d9153 2058 if (to > i_size) {
5a3a2d83 2059 down_write(&F2FS_I(inode)->i_mmap_sem);
819d9153
JK
2060 truncate_pagecache(inode, i_size);
2061 truncate_blocks(inode, i_size, true);
5a3a2d83 2062 up_write(&F2FS_I(inode)->i_mmap_sem);
3aab8f82
CY
2063 }
2064}
2065
2aadac08
JK
2066static int prepare_write_begin(struct f2fs_sb_info *sbi,
2067 struct page *page, loff_t pos, unsigned len,
2068 block_t *blk_addr, bool *node_changed)
2069{
2070 struct inode *inode = page->mapping->host;
2071 pgoff_t index = page->index;
2072 struct dnode_of_data dn;
2073 struct page *ipage;
b4d07a3e 2074 bool locked = false;
e15882b6 2075 struct extent_info ei = {0,0,0};
2aadac08
JK
2076 int err = 0;
2077
24b84912
JK
2078 /*
2079 * we already allocated all the blocks, so we don't need to get
2080 * the block addresses when there is no need to fill the page.
2081 */
dc91de78
JK
2082 if (!f2fs_has_inline_data(inode) && len == PAGE_SIZE &&
2083 !is_inode_flag_set(inode, FI_NO_PREALLOC))
24b84912
JK
2084 return 0;
2085
b4d07a3e 2086 if (f2fs_has_inline_data(inode) ||
09cbfeaf 2087 (pos & PAGE_MASK) >= i_size_read(inode)) {
59c9081b 2088 __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true);
b4d07a3e
JK
2089 locked = true;
2090 }
2091restart:
2aadac08
JK
2092 /* check inline_data */
2093 ipage = get_node_page(sbi, inode->i_ino);
2094 if (IS_ERR(ipage)) {
2095 err = PTR_ERR(ipage);
2096 goto unlock_out;
2097 }
2098
2099 set_new_dnode(&dn, inode, ipage, ipage, 0);
2100
2101 if (f2fs_has_inline_data(inode)) {
f2470371 2102 if (pos + len <= MAX_INLINE_DATA(inode)) {
2aadac08 2103 read_inline_data(page, ipage);
91942321 2104 set_inode_flag(inode, FI_DATA_EXIST);
ab47036d
CY
2105 if (inode->i_nlink)
2106 set_inline_node(ipage);
2aadac08
JK
2107 } else {
2108 err = f2fs_convert_inline_page(&dn, page);
2109 if (err)
b4d07a3e
JK
2110 goto out;
2111 if (dn.data_blkaddr == NULL_ADDR)
2112 err = f2fs_get_block(&dn, index);
2113 }
2114 } else if (locked) {
2115 err = f2fs_get_block(&dn, index);
2116 } else {
2117 if (f2fs_lookup_extent_cache(inode, index, &ei)) {
2118 dn.data_blkaddr = ei.blk + index - ei.fofs;
2119 } else {
b4d07a3e
JK
2120 /* hole case */
2121 err = get_dnode_of_data(&dn, index, LOOKUP_NODE);
4da7bf5a 2122 if (err || dn.data_blkaddr == NULL_ADDR) {
b4d07a3e 2123 f2fs_put_dnode(&dn);
59c9081b
YH
2124 __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO,
2125 true);
b4d07a3e
JK
2126 locked = true;
2127 goto restart;
2128 }
2aadac08
JK
2129 }
2130 }
b4d07a3e 2131
2aadac08
JK
2132 /* convert_inline_page can make node_changed */
2133 *blk_addr = dn.data_blkaddr;
2134 *node_changed = dn.node_changed;
b4d07a3e 2135out:
2aadac08
JK
2136 f2fs_put_dnode(&dn);
2137unlock_out:
b4d07a3e 2138 if (locked)
59c9081b 2139 __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false);
2aadac08
JK
2140 return err;
2141}
2142
eb47b800
JK
2143static int f2fs_write_begin(struct file *file, struct address_space *mapping,
2144 loff_t pos, unsigned len, unsigned flags,
2145 struct page **pagep, void **fsdata)
2146{
2147 struct inode *inode = mapping->host;
4081363f 2148 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
86531d6b 2149 struct page *page = NULL;
09cbfeaf 2150 pgoff_t index = ((unsigned long long) pos) >> PAGE_SHIFT;
a2e2e76b 2151 bool need_balance = false, drop_atomic = false;
2aadac08 2152 block_t blkaddr = NULL_ADDR;
eb47b800
JK
2153 int err = 0;
2154
62aed044
CY
2155 trace_f2fs_write_begin(inode, pos, len, flags);
2156
57864ae5
JK
2157 if (f2fs_is_atomic_file(inode) &&
2158 !available_free_memory(sbi, INMEM_PAGES)) {
2159 err = -ENOMEM;
a2e2e76b 2160 drop_atomic = true;
57864ae5
JK
2161 goto fail;
2162 }
2163
5f727395
JK
2164 /*
2165 * We should check this at this moment to avoid deadlock on inode page
2166 * and #0 page. The locking rule for inline_data conversion should be:
2167 * lock_page(page #0) -> lock_page(inode_page)
2168 */
2169 if (index != 0) {
2170 err = f2fs_convert_inline_inode(inode);
2171 if (err)
2172 goto fail;
2173 }
afcb7ca0 2174repeat:
86d54795
JK
2175 /*
2176 * Do not use grab_cache_page_write_begin() to avoid deadlock due to
2177 * wait_for_stable_page. Will wait that below with our IO control.
2178 */
01eccef7 2179 page = f2fs_pagecache_get_page(mapping, index,
86d54795 2180 FGP_LOCK | FGP_WRITE | FGP_CREAT, GFP_NOFS);
3aab8f82
CY
2181 if (!page) {
2182 err = -ENOMEM;
2183 goto fail;
2184 }
d5f66990 2185
eb47b800
JK
2186 *pagep = page;
2187
2aadac08
JK
2188 err = prepare_write_begin(sbi, page, pos, len,
2189 &blkaddr, &need_balance);
9ba69cf9 2190 if (err)
2aadac08 2191 goto fail;
9ba69cf9 2192
7f3037a5 2193 if (need_balance && has_not_enough_free_secs(sbi, 0, 0)) {
2a340760 2194 unlock_page(page);
2c4db1a6 2195 f2fs_balance_fs(sbi, true);
2a340760
JK
2196 lock_page(page);
2197 if (page->mapping != mapping) {
2198 /* The page got truncated from under us */
2199 f2fs_put_page(page, 1);
2200 goto repeat;
2201 }
2202 }
2203
fec1d657 2204 f2fs_wait_on_page_writeback(page, DATA, false);
b3d208f9 2205
08b39fbd 2206 /* wait for GCed encrypted page writeback */
1958593e 2207 if (f2fs_encrypted_file(inode))
d4c759ee 2208 f2fs_wait_on_block_writeback(sbi, blkaddr);
08b39fbd 2209
649d7df2
JK
2210 if (len == PAGE_SIZE || PageUptodate(page))
2211 return 0;
eb47b800 2212
746e2403
YH
2213 if (!(pos & (PAGE_SIZE - 1)) && (pos + len) >= i_size_read(inode)) {
2214 zero_user_segment(page, len, PAGE_SIZE);
2215 return 0;
2216 }
2217
2aadac08 2218 if (blkaddr == NEW_ADDR) {
09cbfeaf 2219 zero_user_segment(page, 0, PAGE_SIZE);
649d7df2 2220 SetPageUptodate(page);
eb47b800 2221 } else {
13ba41e3
JK
2222 err = f2fs_submit_page_read(inode, page, blkaddr);
2223 if (err)
78682f79 2224 goto fail;
d54c795b 2225
393ff91f 2226 lock_page(page);
6bacf52f 2227 if (unlikely(page->mapping != mapping)) {
afcb7ca0
JK
2228 f2fs_put_page(page, 1);
2229 goto repeat;
eb47b800 2230 }
1563ac75
CY
2231 if (unlikely(!PageUptodate(page))) {
2232 err = -EIO;
2233 goto fail;
4375a336 2234 }
eb47b800 2235 }
eb47b800 2236 return 0;
9ba69cf9 2237
3aab8f82 2238fail:
86531d6b 2239 f2fs_put_page(page, 1);
3aab8f82 2240 f2fs_write_failed(mapping, pos + len);
a2e2e76b 2241 if (drop_atomic)
57864ae5 2242 drop_inmem_pages_all(sbi);
3aab8f82 2243 return err;
eb47b800
JK
2244}
2245
a1dd3c13
JK
2246static int f2fs_write_end(struct file *file,
2247 struct address_space *mapping,
2248 loff_t pos, unsigned len, unsigned copied,
2249 struct page *page, void *fsdata)
2250{
2251 struct inode *inode = page->mapping->host;
2252
dfb2bf38
CY
2253 trace_f2fs_write_end(inode, pos, len, copied);
2254
649d7df2
JK
2255 /*
2256 * This should be come from len == PAGE_SIZE, and we expect copied
2257 * should be PAGE_SIZE. Otherwise, we treat it with zero copied and
2258 * let generic_perform_write() try to copy data again through copied=0.
2259 */
2260 if (!PageUptodate(page)) {
746e2403 2261 if (unlikely(copied != len))
649d7df2
JK
2262 copied = 0;
2263 else
2264 SetPageUptodate(page);
2265 }
2266 if (!copied)
2267 goto unlock_out;
2268
34ba94ba 2269 set_page_dirty(page);
a1dd3c13 2270
fc9581c8
JK
2271 if (pos + copied > i_size_read(inode))
2272 f2fs_i_size_write(inode, pos + copied);
649d7df2 2273unlock_out:
3024c9a1 2274 f2fs_put_page(page, 1);
d0239e1b 2275 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
a1dd3c13
JK
2276 return copied;
2277}
2278
6f673763
OS
2279static int check_direct_IO(struct inode *inode, struct iov_iter *iter,
2280 loff_t offset)
944fcfc1
JK
2281{
2282 unsigned blocksize_mask = inode->i_sb->s_blocksize - 1;
944fcfc1 2283
944fcfc1
JK
2284 if (offset & blocksize_mask)
2285 return -EINVAL;
2286
5b46f25d
AV
2287 if (iov_iter_alignment(iter) & blocksize_mask)
2288 return -EINVAL;
2289
944fcfc1
JK
2290 return 0;
2291}
2292
c8b8e32d 2293static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
eb47b800 2294{
b439b103 2295 struct address_space *mapping = iocb->ki_filp->f_mapping;
3aab8f82 2296 struct inode *inode = mapping->host;
0cdd3195 2297 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3aab8f82 2298 size_t count = iov_iter_count(iter);
c8b8e32d 2299 loff_t offset = iocb->ki_pos;
82e0a5aa 2300 int rw = iov_iter_rw(iter);
3aab8f82 2301 int err;
0cdd3195
HL
2302 enum rw_hint hint = iocb->ki_hint;
2303 int whint_mode = sbi->whint_mode;
944fcfc1 2304
b439b103 2305 err = check_direct_IO(inode, iter, offset);
b9d777b8
JK
2306 if (err)
2307 return err;
9ffe0fb5 2308
c040ff9d 2309 if (__force_buffered_io(inode, rw))
36abef4e 2310 return 0;
fcc85a4d 2311
5302fb00 2312 trace_f2fs_direct_IO_enter(inode, offset, count, rw);
70407fad 2313
0cdd3195
HL
2314 if (rw == WRITE && whint_mode == WHINT_MODE_OFF)
2315 iocb->ki_hint = WRITE_LIFE_NOT_SET;
2316
82e0a5aa 2317 down_read(&F2FS_I(inode)->dio_rwsem[rw]);
c8b8e32d 2318 err = blockdev_direct_IO(iocb, inode, iter, get_data_block_dio);
82e0a5aa
CY
2319 up_read(&F2FS_I(inode)->dio_rwsem[rw]);
2320
2321 if (rw == WRITE) {
0cdd3195
HL
2322 if (whint_mode == WHINT_MODE_OFF)
2323 iocb->ki_hint = hint;
b0af6d49
CY
2324 if (err > 0) {
2325 f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_IO,
2326 err);
91942321 2327 set_inode_flag(inode, FI_UPDATE_WRITE);
b0af6d49 2328 } else if (err < 0) {
6bfc4919 2329 f2fs_write_failed(mapping, offset + count);
b0af6d49 2330 }
6bfc4919 2331 }
70407fad 2332
5302fb00 2333 trace_f2fs_direct_IO_exit(inode, offset, count, rw, err);
70407fad 2334
3aab8f82 2335 return err;
eb47b800
JK
2336}
2337
487261f3
CY
2338void f2fs_invalidate_page(struct page *page, unsigned int offset,
2339 unsigned int length)
eb47b800
JK
2340{
2341 struct inode *inode = page->mapping->host;
487261f3 2342 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
a7ffdbe2 2343
487261f3 2344 if (inode->i_ino >= F2FS_ROOT_INO(sbi) &&
09cbfeaf 2345 (offset % PAGE_SIZE || length != PAGE_SIZE))
a7ffdbe2
JK
2346 return;
2347
487261f3 2348 if (PageDirty(page)) {
933439c8 2349 if (inode->i_ino == F2FS_META_INO(sbi)) {
487261f3 2350 dec_page_count(sbi, F2FS_DIRTY_META);
933439c8 2351 } else if (inode->i_ino == F2FS_NODE_INO(sbi)) {
487261f3 2352 dec_page_count(sbi, F2FS_DIRTY_NODES);
933439c8 2353 } else {
487261f3 2354 inode_dec_dirty_pages(inode);
933439c8
CY
2355 remove_dirty_inode(inode);
2356 }
487261f3 2357 }
decd36b6
CY
2358
2359 /* This is atomic written page, keep Private */
2360 if (IS_ATOMIC_WRITTEN_PAGE(page))
8c242db9 2361 return drop_inmem_page(inode, page);
decd36b6 2362
23dc974e 2363 set_page_private(page, 0);
eb47b800
JK
2364 ClearPagePrivate(page);
2365}
2366
487261f3 2367int f2fs_release_page(struct page *page, gfp_t wait)
eb47b800 2368{
f68daeeb
JK
2369 /* If this is dirty page, keep PagePrivate */
2370 if (PageDirty(page))
2371 return 0;
2372
decd36b6
CY
2373 /* This is atomic written page, keep Private */
2374 if (IS_ATOMIC_WRITTEN_PAGE(page))
2375 return 0;
2376
23dc974e 2377 set_page_private(page, 0);
eb47b800 2378 ClearPagePrivate(page);
c3850aa1 2379 return 1;
eb47b800
JK
2380}
2381
fe76b796
JK
2382/*
2383 * This was copied from __set_page_dirty_buffers which gives higher performance
2384 * in very high speed storages. (e.g., pmem)
2385 */
2386void f2fs_set_page_dirty_nobuffers(struct page *page)
2387{
2388 struct address_space *mapping = page->mapping;
2389 unsigned long flags;
2390
2391 if (unlikely(!mapping))
2392 return;
2393
2394 spin_lock(&mapping->private_lock);
2395 lock_page_memcg(page);
2396 SetPageDirty(page);
2397 spin_unlock(&mapping->private_lock);
2398
2399 spin_lock_irqsave(&mapping->tree_lock, flags);
2400 WARN_ON_ONCE(!PageUptodate(page));
2401 account_page_dirtied(page, mapping);
2402 radix_tree_tag_set(&mapping->page_tree,
2403 page_index(page), PAGECACHE_TAG_DIRTY);
2404 spin_unlock_irqrestore(&mapping->tree_lock, flags);
2405 unlock_page_memcg(page);
2406
2407 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
2408 return;
2409}
2410
eb47b800
JK
2411static int f2fs_set_data_page_dirty(struct page *page)
2412{
2413 struct address_space *mapping = page->mapping;
2414 struct inode *inode = mapping->host;
2415
26c6b887
JK
2416 trace_f2fs_set_page_dirty(page, DATA);
2417
237c0790
JK
2418 if (!PageUptodate(page))
2419 SetPageUptodate(page);
34ba94ba 2420
5fe45743 2421 if (f2fs_is_atomic_file(inode) && !f2fs_is_commit_atomic_write(inode)) {
decd36b6
CY
2422 if (!IS_ATOMIC_WRITTEN_PAGE(page)) {
2423 register_inmem_page(inode, page);
2424 return 1;
2425 }
2426 /*
2427 * Previously, this page has been registered, we just
2428 * return here.
2429 */
2430 return 0;
34ba94ba
JK
2431 }
2432
eb47b800 2433 if (!PageDirty(page)) {
fe76b796 2434 f2fs_set_page_dirty_nobuffers(page);
a7ffdbe2 2435 update_dirty_page(inode, page);
eb47b800
JK
2436 return 1;
2437 }
2438 return 0;
2439}
2440
c01e54b7
JK
2441static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
2442{
454ae7e5
CY
2443 struct inode *inode = mapping->host;
2444
1d373a0e
JK
2445 if (f2fs_has_inline_data(inode))
2446 return 0;
2447
2448 /* make sure allocating whole blocks */
2449 if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
2450 filemap_write_and_wait(mapping);
2451
e2b4e2bc 2452 return generic_block_bmap(mapping, block, get_data_block_bmap);
429511cd
CY
2453}
2454
5b7a487c
WG
2455#ifdef CONFIG_MIGRATION
2456#include <linux/migrate.h>
2457
2458int f2fs_migrate_page(struct address_space *mapping,
2459 struct page *newpage, struct page *page, enum migrate_mode mode)
2460{
2461 int rc, extra_count;
2462 struct f2fs_inode_info *fi = F2FS_I(mapping->host);
2463 bool atomic_written = IS_ATOMIC_WRITTEN_PAGE(page);
2464
2465 BUG_ON(PageWriteback(page));
2466
2467 /* migrating an atomic written page is safe with the inmem_lock hold */
ff1048e7
JK
2468 if (atomic_written) {
2469 if (mode != MIGRATE_SYNC)
2470 return -EBUSY;
2471 if (!mutex_trylock(&fi->inmem_lock))
2472 return -EAGAIN;
2473 }
5b7a487c
WG
2474
2475 /*
2476 * A reference is expected if PagePrivate set when move mapping,
2477 * however F2FS breaks this for maintaining dirty page counts when
2478 * truncating pages. So here adjusting the 'extra_count' make it work.
2479 */
2480 extra_count = (atomic_written ? 1 : 0) - page_has_private(page);
2481 rc = migrate_page_move_mapping(mapping, newpage,
2482 page, NULL, mode, extra_count);
2483 if (rc != MIGRATEPAGE_SUCCESS) {
2484 if (atomic_written)
2485 mutex_unlock(&fi->inmem_lock);
2486 return rc;
2487 }
2488
2489 if (atomic_written) {
2490 struct inmem_pages *cur;
2491 list_for_each_entry(cur, &fi->inmem_pages, list)
2492 if (cur->page == page) {
2493 cur->page = newpage;
2494 break;
2495 }
2496 mutex_unlock(&fi->inmem_lock);
2497 put_page(page);
2498 get_page(newpage);
2499 }
2500
2501 if (PagePrivate(page))
2502 SetPagePrivate(newpage);
2503 set_page_private(newpage, page_private(page));
2504
2916ecc0
JG
2505 if (mode != MIGRATE_SYNC_NO_COPY)
2506 migrate_page_copy(newpage, page);
2507 else
2508 migrate_page_states(newpage, page);
5b7a487c
WG
2509
2510 return MIGRATEPAGE_SUCCESS;
2511}
2512#endif
2513
eb47b800
JK
2514const struct address_space_operations f2fs_dblock_aops = {
2515 .readpage = f2fs_read_data_page,
2516 .readpages = f2fs_read_data_pages,
2517 .writepage = f2fs_write_data_page,
2518 .writepages = f2fs_write_data_pages,
2519 .write_begin = f2fs_write_begin,
a1dd3c13 2520 .write_end = f2fs_write_end,
eb47b800 2521 .set_page_dirty = f2fs_set_data_page_dirty,
487261f3
CY
2522 .invalidatepage = f2fs_invalidate_page,
2523 .releasepage = f2fs_release_page,
eb47b800 2524 .direct_IO = f2fs_direct_IO,
c01e54b7 2525 .bmap = f2fs_bmap,
5b7a487c
WG
2526#ifdef CONFIG_MIGRATION
2527 .migratepage = f2fs_migrate_page,
2528#endif
eb47b800 2529};