Commit | Line | Data |
---|---|---|
afc51aaa DW |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* | |
3 | * Copyright (C) 2010 Red Hat, Inc. | |
4 | * Copyright (c) 2016-2018 Christoph Hellwig. | |
5 | */ | |
6 | #include <linux/module.h> | |
7 | #include <linux/compiler.h> | |
8 | #include <linux/fs.h> | |
9 | #include <linux/iomap.h> | |
10 | #include <linux/pagemap.h> | |
11 | #include <linux/uio.h> | |
12 | #include <linux/buffer_head.h> | |
13 | #include <linux/dax.h> | |
14 | #include <linux/writeback.h> | |
15 | #include <linux/swap.h> | |
16 | #include <linux/bio.h> | |
17 | #include <linux/sched/signal.h> | |
18 | #include <linux/migrate.h> | |
19 | ||
20 | #include "../internal.h" | |
21 | ||
22 | static struct iomap_page * | |
23 | iomap_page_create(struct inode *inode, struct page *page) | |
24 | { | |
25 | struct iomap_page *iop = to_iomap_page(page); | |
26 | ||
27 | if (iop || i_blocksize(inode) == PAGE_SIZE) | |
28 | return iop; | |
29 | ||
30 | iop = kmalloc(sizeof(*iop), GFP_NOFS | __GFP_NOFAIL); | |
31 | atomic_set(&iop->read_count, 0); | |
32 | atomic_set(&iop->write_count, 0); | |
33 | bitmap_zero(iop->uptodate, PAGE_SIZE / SECTOR_SIZE); | |
34 | ||
35 | /* | |
36 | * migrate_page_move_mapping() assumes that pages with private data have | |
37 | * their count elevated by 1. | |
38 | */ | |
39 | get_page(page); | |
40 | set_page_private(page, (unsigned long)iop); | |
41 | SetPagePrivate(page); | |
42 | return iop; | |
43 | } | |
44 | ||
45 | static void | |
46 | iomap_page_release(struct page *page) | |
47 | { | |
48 | struct iomap_page *iop = to_iomap_page(page); | |
49 | ||
50 | if (!iop) | |
51 | return; | |
52 | WARN_ON_ONCE(atomic_read(&iop->read_count)); | |
53 | WARN_ON_ONCE(atomic_read(&iop->write_count)); | |
54 | ClearPagePrivate(page); | |
55 | set_page_private(page, 0); | |
56 | put_page(page); | |
57 | kfree(iop); | |
58 | } | |
59 | ||
60 | /* | |
61 | * Calculate the range inside the page that we actually need to read. | |
62 | */ | |
63 | static void | |
64 | iomap_adjust_read_range(struct inode *inode, struct iomap_page *iop, | |
65 | loff_t *pos, loff_t length, unsigned *offp, unsigned *lenp) | |
66 | { | |
67 | loff_t orig_pos = *pos; | |
68 | loff_t isize = i_size_read(inode); | |
69 | unsigned block_bits = inode->i_blkbits; | |
70 | unsigned block_size = (1 << block_bits); | |
71 | unsigned poff = offset_in_page(*pos); | |
72 | unsigned plen = min_t(loff_t, PAGE_SIZE - poff, length); | |
73 | unsigned first = poff >> block_bits; | |
74 | unsigned last = (poff + plen - 1) >> block_bits; | |
75 | ||
76 | /* | |
77 | * If the block size is smaller than the page size we need to check the | |
78 | * per-block uptodate status and adjust the offset and length if needed | |
79 | * to avoid reading in already uptodate ranges. | |
80 | */ | |
81 | if (iop) { | |
82 | unsigned int i; | |
83 | ||
84 | /* move forward for each leading block marked uptodate */ | |
85 | for (i = first; i <= last; i++) { | |
86 | if (!test_bit(i, iop->uptodate)) | |
87 | break; | |
88 | *pos += block_size; | |
89 | poff += block_size; | |
90 | plen -= block_size; | |
91 | first++; | |
92 | } | |
93 | ||
94 | /* truncate len if we find any trailing uptodate block(s) */ | |
95 | for ( ; i <= last; i++) { | |
96 | if (test_bit(i, iop->uptodate)) { | |
97 | plen -= (last - i + 1) * block_size; | |
98 | last = i - 1; | |
99 | break; | |
100 | } | |
101 | } | |
102 | } | |
103 | ||
104 | /* | |
105 | * If the extent spans the block that contains the i_size we need to | |
106 | * handle both halves separately so that we properly zero data in the | |
107 | * page cache for blocks that are entirely outside of i_size. | |
108 | */ | |
109 | if (orig_pos <= isize && orig_pos + length > isize) { | |
110 | unsigned end = offset_in_page(isize - 1) >> block_bits; | |
111 | ||
112 | if (first <= end && last > end) | |
113 | plen -= (last - end) * block_size; | |
114 | } | |
115 | ||
116 | *offp = poff; | |
117 | *lenp = plen; | |
118 | } | |
119 | ||
120 | static void | |
121 | iomap_set_range_uptodate(struct page *page, unsigned off, unsigned len) | |
122 | { | |
123 | struct iomap_page *iop = to_iomap_page(page); | |
124 | struct inode *inode = page->mapping->host; | |
125 | unsigned first = off >> inode->i_blkbits; | |
126 | unsigned last = (off + len - 1) >> inode->i_blkbits; | |
127 | unsigned int i; | |
128 | bool uptodate = true; | |
129 | ||
130 | if (iop) { | |
131 | for (i = 0; i < PAGE_SIZE / i_blocksize(inode); i++) { | |
132 | if (i >= first && i <= last) | |
133 | set_bit(i, iop->uptodate); | |
134 | else if (!test_bit(i, iop->uptodate)) | |
135 | uptodate = false; | |
136 | } | |
137 | } | |
138 | ||
139 | if (uptodate && !PageError(page)) | |
140 | SetPageUptodate(page); | |
141 | } | |
142 | ||
143 | static void | |
144 | iomap_read_finish(struct iomap_page *iop, struct page *page) | |
145 | { | |
146 | if (!iop || atomic_dec_and_test(&iop->read_count)) | |
147 | unlock_page(page); | |
148 | } | |
149 | ||
150 | static void | |
151 | iomap_read_page_end_io(struct bio_vec *bvec, int error) | |
152 | { | |
153 | struct page *page = bvec->bv_page; | |
154 | struct iomap_page *iop = to_iomap_page(page); | |
155 | ||
156 | if (unlikely(error)) { | |
157 | ClearPageUptodate(page); | |
158 | SetPageError(page); | |
159 | } else { | |
160 | iomap_set_range_uptodate(page, bvec->bv_offset, bvec->bv_len); | |
161 | } | |
162 | ||
163 | iomap_read_finish(iop, page); | |
164 | } | |
165 | ||
166 | static void | |
167 | iomap_read_end_io(struct bio *bio) | |
168 | { | |
169 | int error = blk_status_to_errno(bio->bi_status); | |
170 | struct bio_vec *bvec; | |
171 | struct bvec_iter_all iter_all; | |
172 | ||
173 | bio_for_each_segment_all(bvec, bio, iter_all) | |
174 | iomap_read_page_end_io(bvec, error); | |
175 | bio_put(bio); | |
176 | } | |
177 | ||
178 | struct iomap_readpage_ctx { | |
179 | struct page *cur_page; | |
180 | bool cur_page_in_bio; | |
181 | bool is_readahead; | |
182 | struct bio *bio; | |
183 | struct list_head *pages; | |
184 | }; | |
185 | ||
186 | static void | |
187 | iomap_read_inline_data(struct inode *inode, struct page *page, | |
188 | struct iomap *iomap) | |
189 | { | |
190 | size_t size = i_size_read(inode); | |
191 | void *addr; | |
192 | ||
193 | if (PageUptodate(page)) | |
194 | return; | |
195 | ||
196 | BUG_ON(page->index); | |
197 | BUG_ON(size > PAGE_SIZE - offset_in_page(iomap->inline_data)); | |
198 | ||
199 | addr = kmap_atomic(page); | |
200 | memcpy(addr, iomap->inline_data, size); | |
201 | memset(addr + size, 0, PAGE_SIZE - size); | |
202 | kunmap_atomic(addr); | |
203 | SetPageUptodate(page); | |
204 | } | |
205 | ||
206 | static loff_t | |
207 | iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data, | |
208 | struct iomap *iomap) | |
209 | { | |
210 | struct iomap_readpage_ctx *ctx = data; | |
211 | struct page *page = ctx->cur_page; | |
212 | struct iomap_page *iop = iomap_page_create(inode, page); | |
213 | bool same_page = false, is_contig = false; | |
214 | loff_t orig_pos = pos; | |
215 | unsigned poff, plen; | |
216 | sector_t sector; | |
217 | ||
218 | if (iomap->type == IOMAP_INLINE) { | |
219 | WARN_ON_ONCE(pos); | |
220 | iomap_read_inline_data(inode, page, iomap); | |
221 | return PAGE_SIZE; | |
222 | } | |
223 | ||
224 | /* zero post-eof blocks as the page may be mapped */ | |
225 | iomap_adjust_read_range(inode, iop, &pos, length, &poff, &plen); | |
226 | if (plen == 0) | |
227 | goto done; | |
228 | ||
229 | if (iomap->type != IOMAP_MAPPED || pos >= i_size_read(inode)) { | |
230 | zero_user(page, poff, plen); | |
231 | iomap_set_range_uptodate(page, poff, plen); | |
232 | goto done; | |
233 | } | |
234 | ||
235 | ctx->cur_page_in_bio = true; | |
236 | ||
237 | /* | |
238 | * Try to merge into a previous segment if we can. | |
239 | */ | |
240 | sector = iomap_sector(iomap, pos); | |
241 | if (ctx->bio && bio_end_sector(ctx->bio) == sector) | |
242 | is_contig = true; | |
243 | ||
244 | if (is_contig && | |
245 | __bio_try_merge_page(ctx->bio, page, plen, poff, &same_page)) { | |
246 | if (!same_page && iop) | |
247 | atomic_inc(&iop->read_count); | |
248 | goto done; | |
249 | } | |
250 | ||
251 | /* | |
252 | * If we start a new segment we need to increase the read count, and we | |
253 | * need to do so before submitting any previous full bio to make sure | |
254 | * that we don't prematurely unlock the page. | |
255 | */ | |
256 | if (iop) | |
257 | atomic_inc(&iop->read_count); | |
258 | ||
259 | if (!ctx->bio || !is_contig || bio_full(ctx->bio, plen)) { | |
260 | gfp_t gfp = mapping_gfp_constraint(page->mapping, GFP_KERNEL); | |
261 | int nr_vecs = (length + PAGE_SIZE - 1) >> PAGE_SHIFT; | |
262 | ||
263 | if (ctx->bio) | |
264 | submit_bio(ctx->bio); | |
265 | ||
266 | if (ctx->is_readahead) /* same as readahead_gfp_mask */ | |
267 | gfp |= __GFP_NORETRY | __GFP_NOWARN; | |
268 | ctx->bio = bio_alloc(gfp, min(BIO_MAX_PAGES, nr_vecs)); | |
269 | ctx->bio->bi_opf = REQ_OP_READ; | |
270 | if (ctx->is_readahead) | |
271 | ctx->bio->bi_opf |= REQ_RAHEAD; | |
272 | ctx->bio->bi_iter.bi_sector = sector; | |
273 | bio_set_dev(ctx->bio, iomap->bdev); | |
274 | ctx->bio->bi_end_io = iomap_read_end_io; | |
275 | } | |
276 | ||
277 | bio_add_page(ctx->bio, page, plen, poff); | |
278 | done: | |
279 | /* | |
280 | * Move the caller beyond our range so that it keeps making progress. | |
281 | * For that we have to include any leading non-uptodate ranges, but | |
282 | * we can skip trailing ones as they will be handled in the next | |
283 | * iteration. | |
284 | */ | |
285 | return pos - orig_pos + plen; | |
286 | } | |
287 | ||
288 | int | |
289 | iomap_readpage(struct page *page, const struct iomap_ops *ops) | |
290 | { | |
291 | struct iomap_readpage_ctx ctx = { .cur_page = page }; | |
292 | struct inode *inode = page->mapping->host; | |
293 | unsigned poff; | |
294 | loff_t ret; | |
295 | ||
296 | for (poff = 0; poff < PAGE_SIZE; poff += ret) { | |
297 | ret = iomap_apply(inode, page_offset(page) + poff, | |
298 | PAGE_SIZE - poff, 0, ops, &ctx, | |
299 | iomap_readpage_actor); | |
300 | if (ret <= 0) { | |
301 | WARN_ON_ONCE(ret == 0); | |
302 | SetPageError(page); | |
303 | break; | |
304 | } | |
305 | } | |
306 | ||
307 | if (ctx.bio) { | |
308 | submit_bio(ctx.bio); | |
309 | WARN_ON_ONCE(!ctx.cur_page_in_bio); | |
310 | } else { | |
311 | WARN_ON_ONCE(ctx.cur_page_in_bio); | |
312 | unlock_page(page); | |
313 | } | |
314 | ||
315 | /* | |
316 | * Just like mpage_readpages and block_read_full_page we always | |
317 | * return 0 and just mark the page as PageError on errors. This | |
318 | * should be cleaned up all through the stack eventually. | |
319 | */ | |
320 | return 0; | |
321 | } | |
322 | EXPORT_SYMBOL_GPL(iomap_readpage); | |
323 | ||
324 | static struct page * | |
325 | iomap_next_page(struct inode *inode, struct list_head *pages, loff_t pos, | |
326 | loff_t length, loff_t *done) | |
327 | { | |
328 | while (!list_empty(pages)) { | |
329 | struct page *page = lru_to_page(pages); | |
330 | ||
331 | if (page_offset(page) >= (u64)pos + length) | |
332 | break; | |
333 | ||
334 | list_del(&page->lru); | |
335 | if (!add_to_page_cache_lru(page, inode->i_mapping, page->index, | |
336 | GFP_NOFS)) | |
337 | return page; | |
338 | ||
339 | /* | |
340 | * If we already have a page in the page cache at index we are | |
341 | * done. Upper layers don't care if it is uptodate after the | |
342 | * readpages call itself as every page gets checked again once | |
343 | * actually needed. | |
344 | */ | |
345 | *done += PAGE_SIZE; | |
346 | put_page(page); | |
347 | } | |
348 | ||
349 | return NULL; | |
350 | } | |
351 | ||
352 | static loff_t | |
353 | iomap_readpages_actor(struct inode *inode, loff_t pos, loff_t length, | |
354 | void *data, struct iomap *iomap) | |
355 | { | |
356 | struct iomap_readpage_ctx *ctx = data; | |
357 | loff_t done, ret; | |
358 | ||
359 | for (done = 0; done < length; done += ret) { | |
360 | if (ctx->cur_page && offset_in_page(pos + done) == 0) { | |
361 | if (!ctx->cur_page_in_bio) | |
362 | unlock_page(ctx->cur_page); | |
363 | put_page(ctx->cur_page); | |
364 | ctx->cur_page = NULL; | |
365 | } | |
366 | if (!ctx->cur_page) { | |
367 | ctx->cur_page = iomap_next_page(inode, ctx->pages, | |
368 | pos, length, &done); | |
369 | if (!ctx->cur_page) | |
370 | break; | |
371 | ctx->cur_page_in_bio = false; | |
372 | } | |
373 | ret = iomap_readpage_actor(inode, pos + done, length - done, | |
374 | ctx, iomap); | |
375 | } | |
376 | ||
377 | return done; | |
378 | } | |
379 | ||
380 | int | |
381 | iomap_readpages(struct address_space *mapping, struct list_head *pages, | |
382 | unsigned nr_pages, const struct iomap_ops *ops) | |
383 | { | |
384 | struct iomap_readpage_ctx ctx = { | |
385 | .pages = pages, | |
386 | .is_readahead = true, | |
387 | }; | |
388 | loff_t pos = page_offset(list_entry(pages->prev, struct page, lru)); | |
389 | loff_t last = page_offset(list_entry(pages->next, struct page, lru)); | |
390 | loff_t length = last - pos + PAGE_SIZE, ret = 0; | |
391 | ||
392 | while (length > 0) { | |
393 | ret = iomap_apply(mapping->host, pos, length, 0, ops, | |
394 | &ctx, iomap_readpages_actor); | |
395 | if (ret <= 0) { | |
396 | WARN_ON_ONCE(ret == 0); | |
397 | goto done; | |
398 | } | |
399 | pos += ret; | |
400 | length -= ret; | |
401 | } | |
402 | ret = 0; | |
403 | done: | |
404 | if (ctx.bio) | |
405 | submit_bio(ctx.bio); | |
406 | if (ctx.cur_page) { | |
407 | if (!ctx.cur_page_in_bio) | |
408 | unlock_page(ctx.cur_page); | |
409 | put_page(ctx.cur_page); | |
410 | } | |
411 | ||
412 | /* | |
413 | * Check that we didn't lose a page due to the arcance calling | |
414 | * conventions.. | |
415 | */ | |
416 | WARN_ON_ONCE(!ret && !list_empty(ctx.pages)); | |
417 | return ret; | |
418 | } | |
419 | EXPORT_SYMBOL_GPL(iomap_readpages); | |
420 | ||
421 | /* | |
422 | * iomap_is_partially_uptodate checks whether blocks within a page are | |
423 | * uptodate or not. | |
424 | * | |
425 | * Returns true if all blocks which correspond to a file portion | |
426 | * we want to read within the page are uptodate. | |
427 | */ | |
428 | int | |
429 | iomap_is_partially_uptodate(struct page *page, unsigned long from, | |
430 | unsigned long count) | |
431 | { | |
432 | struct iomap_page *iop = to_iomap_page(page); | |
433 | struct inode *inode = page->mapping->host; | |
434 | unsigned len, first, last; | |
435 | unsigned i; | |
436 | ||
437 | /* Limit range to one page */ | |
438 | len = min_t(unsigned, PAGE_SIZE - from, count); | |
439 | ||
440 | /* First and last blocks in range within page */ | |
441 | first = from >> inode->i_blkbits; | |
442 | last = (from + len - 1) >> inode->i_blkbits; | |
443 | ||
444 | if (iop) { | |
445 | for (i = first; i <= last; i++) | |
446 | if (!test_bit(i, iop->uptodate)) | |
447 | return 0; | |
448 | return 1; | |
449 | } | |
450 | ||
451 | return 0; | |
452 | } | |
453 | EXPORT_SYMBOL_GPL(iomap_is_partially_uptodate); | |
454 | ||
455 | int | |
456 | iomap_releasepage(struct page *page, gfp_t gfp_mask) | |
457 | { | |
458 | /* | |
459 | * mm accommodates an old ext3 case where clean pages might not have had | |
460 | * the dirty bit cleared. Thus, it can send actual dirty pages to | |
461 | * ->releasepage() via shrink_active_list(), skip those here. | |
462 | */ | |
463 | if (PageDirty(page) || PageWriteback(page)) | |
464 | return 0; | |
465 | iomap_page_release(page); | |
466 | return 1; | |
467 | } | |
468 | EXPORT_SYMBOL_GPL(iomap_releasepage); | |
469 | ||
470 | void | |
471 | iomap_invalidatepage(struct page *page, unsigned int offset, unsigned int len) | |
472 | { | |
473 | /* | |
474 | * If we are invalidating the entire page, clear the dirty state from it | |
475 | * and release it to avoid unnecessary buildup of the LRU. | |
476 | */ | |
477 | if (offset == 0 && len == PAGE_SIZE) { | |
478 | WARN_ON_ONCE(PageWriteback(page)); | |
479 | cancel_dirty_page(page); | |
480 | iomap_page_release(page); | |
481 | } | |
482 | } | |
483 | EXPORT_SYMBOL_GPL(iomap_invalidatepage); | |
484 | ||
485 | #ifdef CONFIG_MIGRATION | |
486 | int | |
487 | iomap_migrate_page(struct address_space *mapping, struct page *newpage, | |
488 | struct page *page, enum migrate_mode mode) | |
489 | { | |
490 | int ret; | |
491 | ||
26473f83 | 492 | ret = migrate_page_move_mapping(mapping, newpage, page, 0); |
afc51aaa DW |
493 | if (ret != MIGRATEPAGE_SUCCESS) |
494 | return ret; | |
495 | ||
496 | if (page_has_private(page)) { | |
497 | ClearPagePrivate(page); | |
498 | get_page(newpage); | |
499 | set_page_private(newpage, page_private(page)); | |
500 | set_page_private(page, 0); | |
501 | put_page(page); | |
502 | SetPagePrivate(newpage); | |
503 | } | |
504 | ||
505 | if (mode != MIGRATE_SYNC_NO_COPY) | |
506 | migrate_page_copy(newpage, page); | |
507 | else | |
508 | migrate_page_states(newpage, page); | |
509 | return MIGRATEPAGE_SUCCESS; | |
510 | } | |
511 | EXPORT_SYMBOL_GPL(iomap_migrate_page); | |
512 | #endif /* CONFIG_MIGRATION */ | |
513 | ||
514 | static void | |
515 | iomap_write_failed(struct inode *inode, loff_t pos, unsigned len) | |
516 | { | |
517 | loff_t i_size = i_size_read(inode); | |
518 | ||
519 | /* | |
520 | * Only truncate newly allocated pages beyoned EOF, even if the | |
521 | * write started inside the existing inode size. | |
522 | */ | |
523 | if (pos + len > i_size) | |
524 | truncate_pagecache_range(inode, max(pos, i_size), pos + len); | |
525 | } | |
526 | ||
527 | static int | |
528 | iomap_read_page_sync(struct inode *inode, loff_t block_start, struct page *page, | |
529 | unsigned poff, unsigned plen, unsigned from, unsigned to, | |
530 | struct iomap *iomap) | |
531 | { | |
532 | struct bio_vec bvec; | |
533 | struct bio bio; | |
534 | ||
535 | if (iomap->type != IOMAP_MAPPED || block_start >= i_size_read(inode)) { | |
536 | zero_user_segments(page, poff, from, to, poff + plen); | |
537 | iomap_set_range_uptodate(page, poff, plen); | |
538 | return 0; | |
539 | } | |
540 | ||
541 | bio_init(&bio, &bvec, 1); | |
542 | bio.bi_opf = REQ_OP_READ; | |
543 | bio.bi_iter.bi_sector = iomap_sector(iomap, block_start); | |
544 | bio_set_dev(&bio, iomap->bdev); | |
545 | __bio_add_page(&bio, page, plen, poff); | |
546 | return submit_bio_wait(&bio); | |
547 | } | |
548 | ||
549 | static int | |
550 | __iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, | |
551 | struct page *page, struct iomap *iomap) | |
552 | { | |
553 | struct iomap_page *iop = iomap_page_create(inode, page); | |
554 | loff_t block_size = i_blocksize(inode); | |
555 | loff_t block_start = pos & ~(block_size - 1); | |
556 | loff_t block_end = (pos + len + block_size - 1) & ~(block_size - 1); | |
557 | unsigned from = offset_in_page(pos), to = from + len, poff, plen; | |
558 | int status = 0; | |
559 | ||
560 | if (PageUptodate(page)) | |
561 | return 0; | |
562 | ||
563 | do { | |
564 | iomap_adjust_read_range(inode, iop, &block_start, | |
565 | block_end - block_start, &poff, &plen); | |
566 | if (plen == 0) | |
567 | break; | |
568 | ||
569 | if ((from > poff && from < poff + plen) || | |
570 | (to > poff && to < poff + plen)) { | |
571 | status = iomap_read_page_sync(inode, block_start, page, | |
572 | poff, plen, from, to, iomap); | |
573 | if (status) | |
574 | break; | |
575 | } | |
576 | ||
577 | } while ((block_start += plen) < block_end); | |
578 | ||
579 | return status; | |
580 | } | |
581 | ||
582 | static int | |
583 | iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags, | |
584 | struct page **pagep, struct iomap *iomap) | |
585 | { | |
586 | const struct iomap_page_ops *page_ops = iomap->page_ops; | |
587 | pgoff_t index = pos >> PAGE_SHIFT; | |
588 | struct page *page; | |
589 | int status = 0; | |
590 | ||
591 | BUG_ON(pos + len > iomap->offset + iomap->length); | |
592 | ||
593 | if (fatal_signal_pending(current)) | |
594 | return -EINTR; | |
595 | ||
596 | if (page_ops && page_ops->page_prepare) { | |
597 | status = page_ops->page_prepare(inode, pos, len, iomap); | |
598 | if (status) | |
599 | return status; | |
600 | } | |
601 | ||
602 | page = grab_cache_page_write_begin(inode->i_mapping, index, flags); | |
603 | if (!page) { | |
604 | status = -ENOMEM; | |
605 | goto out_no_page; | |
606 | } | |
607 | ||
608 | if (iomap->type == IOMAP_INLINE) | |
609 | iomap_read_inline_data(inode, page, iomap); | |
610 | else if (iomap->flags & IOMAP_F_BUFFER_HEAD) | |
611 | status = __block_write_begin_int(page, pos, len, NULL, iomap); | |
612 | else | |
613 | status = __iomap_write_begin(inode, pos, len, page, iomap); | |
614 | ||
615 | if (unlikely(status)) | |
616 | goto out_unlock; | |
617 | ||
618 | *pagep = page; | |
619 | return 0; | |
620 | ||
621 | out_unlock: | |
622 | unlock_page(page); | |
623 | put_page(page); | |
624 | iomap_write_failed(inode, pos, len); | |
625 | ||
626 | out_no_page: | |
627 | if (page_ops && page_ops->page_done) | |
628 | page_ops->page_done(inode, pos, 0, NULL, iomap); | |
629 | return status; | |
630 | } | |
631 | ||
632 | int | |
633 | iomap_set_page_dirty(struct page *page) | |
634 | { | |
635 | struct address_space *mapping = page_mapping(page); | |
636 | int newly_dirty; | |
637 | ||
638 | if (unlikely(!mapping)) | |
639 | return !TestSetPageDirty(page); | |
640 | ||
641 | /* | |
642 | * Lock out page->mem_cgroup migration to keep PageDirty | |
643 | * synchronized with per-memcg dirty page counters. | |
644 | */ | |
645 | lock_page_memcg(page); | |
646 | newly_dirty = !TestSetPageDirty(page); | |
647 | if (newly_dirty) | |
648 | __set_page_dirty(page, mapping, 0); | |
649 | unlock_page_memcg(page); | |
650 | ||
651 | if (newly_dirty) | |
652 | __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); | |
653 | return newly_dirty; | |
654 | } | |
655 | EXPORT_SYMBOL_GPL(iomap_set_page_dirty); | |
656 | ||
657 | static int | |
658 | __iomap_write_end(struct inode *inode, loff_t pos, unsigned len, | |
659 | unsigned copied, struct page *page, struct iomap *iomap) | |
660 | { | |
661 | flush_dcache_page(page); | |
662 | ||
663 | /* | |
664 | * The blocks that were entirely written will now be uptodate, so we | |
665 | * don't have to worry about a readpage reading them and overwriting a | |
666 | * partial write. However if we have encountered a short write and only | |
667 | * partially written into a block, it will not be marked uptodate, so a | |
668 | * readpage might come in and destroy our partial write. | |
669 | * | |
670 | * Do the simplest thing, and just treat any short write to a non | |
671 | * uptodate page as a zero-length write, and force the caller to redo | |
672 | * the whole thing. | |
673 | */ | |
674 | if (unlikely(copied < len && !PageUptodate(page))) | |
675 | return 0; | |
676 | iomap_set_range_uptodate(page, offset_in_page(pos), len); | |
677 | iomap_set_page_dirty(page); | |
678 | return copied; | |
679 | } | |
680 | ||
681 | static int | |
682 | iomap_write_end_inline(struct inode *inode, struct page *page, | |
683 | struct iomap *iomap, loff_t pos, unsigned copied) | |
684 | { | |
685 | void *addr; | |
686 | ||
687 | WARN_ON_ONCE(!PageUptodate(page)); | |
688 | BUG_ON(pos + copied > PAGE_SIZE - offset_in_page(iomap->inline_data)); | |
689 | ||
690 | addr = kmap_atomic(page); | |
691 | memcpy(iomap->inline_data + pos, addr + pos, copied); | |
692 | kunmap_atomic(addr); | |
693 | ||
694 | mark_inode_dirty(inode); | |
695 | return copied; | |
696 | } | |
697 | ||
698 | static int | |
699 | iomap_write_end(struct inode *inode, loff_t pos, unsigned len, | |
700 | unsigned copied, struct page *page, struct iomap *iomap) | |
701 | { | |
702 | const struct iomap_page_ops *page_ops = iomap->page_ops; | |
703 | loff_t old_size = inode->i_size; | |
704 | int ret; | |
705 | ||
706 | if (iomap->type == IOMAP_INLINE) { | |
707 | ret = iomap_write_end_inline(inode, page, iomap, pos, copied); | |
708 | } else if (iomap->flags & IOMAP_F_BUFFER_HEAD) { | |
709 | ret = block_write_end(NULL, inode->i_mapping, pos, len, copied, | |
710 | page, NULL); | |
711 | } else { | |
712 | ret = __iomap_write_end(inode, pos, len, copied, page, iomap); | |
713 | } | |
714 | ||
715 | /* | |
716 | * Update the in-memory inode size after copying the data into the page | |
717 | * cache. It's up to the file system to write the updated size to disk, | |
718 | * preferably after I/O completion so that no stale data is exposed. | |
719 | */ | |
720 | if (pos + ret > old_size) { | |
721 | i_size_write(inode, pos + ret); | |
722 | iomap->flags |= IOMAP_F_SIZE_CHANGED; | |
723 | } | |
724 | unlock_page(page); | |
725 | ||
726 | if (old_size < pos) | |
727 | pagecache_isize_extended(inode, old_size, pos); | |
728 | if (page_ops && page_ops->page_done) | |
729 | page_ops->page_done(inode, pos, ret, page, iomap); | |
730 | put_page(page); | |
731 | ||
732 | if (ret < len) | |
733 | iomap_write_failed(inode, pos, len); | |
734 | return ret; | |
735 | } | |
736 | ||
737 | static loff_t | |
738 | iomap_write_actor(struct inode *inode, loff_t pos, loff_t length, void *data, | |
739 | struct iomap *iomap) | |
740 | { | |
741 | struct iov_iter *i = data; | |
742 | long status = 0; | |
743 | ssize_t written = 0; | |
744 | unsigned int flags = AOP_FLAG_NOFS; | |
745 | ||
746 | do { | |
747 | struct page *page; | |
748 | unsigned long offset; /* Offset into pagecache page */ | |
749 | unsigned long bytes; /* Bytes to write to page */ | |
750 | size_t copied; /* Bytes copied from user */ | |
751 | ||
752 | offset = offset_in_page(pos); | |
753 | bytes = min_t(unsigned long, PAGE_SIZE - offset, | |
754 | iov_iter_count(i)); | |
755 | again: | |
756 | if (bytes > length) | |
757 | bytes = length; | |
758 | ||
759 | /* | |
760 | * Bring in the user page that we will copy from _first_. | |
761 | * Otherwise there's a nasty deadlock on copying from the | |
762 | * same page as we're writing to, without it being marked | |
763 | * up-to-date. | |
764 | * | |
765 | * Not only is this an optimisation, but it is also required | |
766 | * to check that the address is actually valid, when atomic | |
767 | * usercopies are used, below. | |
768 | */ | |
769 | if (unlikely(iov_iter_fault_in_readable(i, bytes))) { | |
770 | status = -EFAULT; | |
771 | break; | |
772 | } | |
773 | ||
774 | status = iomap_write_begin(inode, pos, bytes, flags, &page, | |
775 | iomap); | |
776 | if (unlikely(status)) | |
777 | break; | |
778 | ||
779 | if (mapping_writably_mapped(inode->i_mapping)) | |
780 | flush_dcache_page(page); | |
781 | ||
782 | copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes); | |
783 | ||
784 | flush_dcache_page(page); | |
785 | ||
786 | status = iomap_write_end(inode, pos, bytes, copied, page, | |
787 | iomap); | |
788 | if (unlikely(status < 0)) | |
789 | break; | |
790 | copied = status; | |
791 | ||
792 | cond_resched(); | |
793 | ||
794 | iov_iter_advance(i, copied); | |
795 | if (unlikely(copied == 0)) { | |
796 | /* | |
797 | * If we were unable to copy any data at all, we must | |
798 | * fall back to a single segment length write. | |
799 | * | |
800 | * If we didn't fallback here, we could livelock | |
801 | * because not all segments in the iov can be copied at | |
802 | * once without a pagefault. | |
803 | */ | |
804 | bytes = min_t(unsigned long, PAGE_SIZE - offset, | |
805 | iov_iter_single_seg_count(i)); | |
806 | goto again; | |
807 | } | |
808 | pos += copied; | |
809 | written += copied; | |
810 | length -= copied; | |
811 | ||
812 | balance_dirty_pages_ratelimited(inode->i_mapping); | |
813 | } while (iov_iter_count(i) && length); | |
814 | ||
815 | return written ? written : status; | |
816 | } | |
817 | ||
818 | ssize_t | |
819 | iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *iter, | |
820 | const struct iomap_ops *ops) | |
821 | { | |
822 | struct inode *inode = iocb->ki_filp->f_mapping->host; | |
823 | loff_t pos = iocb->ki_pos, ret = 0, written = 0; | |
824 | ||
825 | while (iov_iter_count(iter)) { | |
826 | ret = iomap_apply(inode, pos, iov_iter_count(iter), | |
827 | IOMAP_WRITE, ops, iter, iomap_write_actor); | |
828 | if (ret <= 0) | |
829 | break; | |
830 | pos += ret; | |
831 | written += ret; | |
832 | } | |
833 | ||
834 | return written ? written : ret; | |
835 | } | |
836 | EXPORT_SYMBOL_GPL(iomap_file_buffered_write); | |
837 | ||
838 | static struct page * | |
839 | __iomap_read_page(struct inode *inode, loff_t offset) | |
840 | { | |
841 | struct address_space *mapping = inode->i_mapping; | |
842 | struct page *page; | |
843 | ||
844 | page = read_mapping_page(mapping, offset >> PAGE_SHIFT, NULL); | |
845 | if (IS_ERR(page)) | |
846 | return page; | |
847 | if (!PageUptodate(page)) { | |
848 | put_page(page); | |
849 | return ERR_PTR(-EIO); | |
850 | } | |
851 | return page; | |
852 | } | |
853 | ||
854 | static loff_t | |
855 | iomap_dirty_actor(struct inode *inode, loff_t pos, loff_t length, void *data, | |
856 | struct iomap *iomap) | |
857 | { | |
858 | long status = 0; | |
859 | ssize_t written = 0; | |
860 | ||
861 | do { | |
862 | struct page *page, *rpage; | |
863 | unsigned long offset; /* Offset into pagecache page */ | |
864 | unsigned long bytes; /* Bytes to write to page */ | |
865 | ||
866 | offset = offset_in_page(pos); | |
867 | bytes = min_t(loff_t, PAGE_SIZE - offset, length); | |
868 | ||
869 | rpage = __iomap_read_page(inode, pos); | |
870 | if (IS_ERR(rpage)) | |
871 | return PTR_ERR(rpage); | |
872 | ||
873 | status = iomap_write_begin(inode, pos, bytes, | |
874 | AOP_FLAG_NOFS, &page, iomap); | |
875 | put_page(rpage); | |
876 | if (unlikely(status)) | |
877 | return status; | |
878 | ||
879 | WARN_ON_ONCE(!PageUptodate(page)); | |
880 | ||
881 | status = iomap_write_end(inode, pos, bytes, bytes, page, iomap); | |
882 | if (unlikely(status <= 0)) { | |
883 | if (WARN_ON_ONCE(status == 0)) | |
884 | return -EIO; | |
885 | return status; | |
886 | } | |
887 | ||
888 | cond_resched(); | |
889 | ||
890 | pos += status; | |
891 | written += status; | |
892 | length -= status; | |
893 | ||
894 | balance_dirty_pages_ratelimited(inode->i_mapping); | |
895 | } while (length); | |
896 | ||
897 | return written; | |
898 | } | |
899 | ||
900 | int | |
901 | iomap_file_dirty(struct inode *inode, loff_t pos, loff_t len, | |
902 | const struct iomap_ops *ops) | |
903 | { | |
904 | loff_t ret; | |
905 | ||
906 | while (len) { | |
907 | ret = iomap_apply(inode, pos, len, IOMAP_WRITE, ops, NULL, | |
908 | iomap_dirty_actor); | |
909 | if (ret <= 0) | |
910 | return ret; | |
911 | pos += ret; | |
912 | len -= ret; | |
913 | } | |
914 | ||
915 | return 0; | |
916 | } | |
917 | EXPORT_SYMBOL_GPL(iomap_file_dirty); | |
918 | ||
919 | static int iomap_zero(struct inode *inode, loff_t pos, unsigned offset, | |
920 | unsigned bytes, struct iomap *iomap) | |
921 | { | |
922 | struct page *page; | |
923 | int status; | |
924 | ||
925 | status = iomap_write_begin(inode, pos, bytes, AOP_FLAG_NOFS, &page, | |
926 | iomap); | |
927 | if (status) | |
928 | return status; | |
929 | ||
930 | zero_user(page, offset, bytes); | |
931 | mark_page_accessed(page); | |
932 | ||
933 | return iomap_write_end(inode, pos, bytes, bytes, page, iomap); | |
934 | } | |
935 | ||
936 | static int iomap_dax_zero(loff_t pos, unsigned offset, unsigned bytes, | |
937 | struct iomap *iomap) | |
938 | { | |
939 | return __dax_zero_page_range(iomap->bdev, iomap->dax_dev, | |
940 | iomap_sector(iomap, pos & PAGE_MASK), offset, bytes); | |
941 | } | |
942 | ||
943 | static loff_t | |
944 | iomap_zero_range_actor(struct inode *inode, loff_t pos, loff_t count, | |
945 | void *data, struct iomap *iomap) | |
946 | { | |
947 | bool *did_zero = data; | |
948 | loff_t written = 0; | |
949 | int status; | |
950 | ||
951 | /* already zeroed? we're done. */ | |
952 | if (iomap->type == IOMAP_HOLE || iomap->type == IOMAP_UNWRITTEN) | |
953 | return count; | |
954 | ||
955 | do { | |
956 | unsigned offset, bytes; | |
957 | ||
958 | offset = offset_in_page(pos); | |
959 | bytes = min_t(loff_t, PAGE_SIZE - offset, count); | |
960 | ||
961 | if (IS_DAX(inode)) | |
962 | status = iomap_dax_zero(pos, offset, bytes, iomap); | |
963 | else | |
964 | status = iomap_zero(inode, pos, offset, bytes, iomap); | |
965 | if (status < 0) | |
966 | return status; | |
967 | ||
968 | pos += bytes; | |
969 | count -= bytes; | |
970 | written += bytes; | |
971 | if (did_zero) | |
972 | *did_zero = true; | |
973 | } while (count > 0); | |
974 | ||
975 | return written; | |
976 | } | |
977 | ||
978 | int | |
979 | iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero, | |
980 | const struct iomap_ops *ops) | |
981 | { | |
982 | loff_t ret; | |
983 | ||
984 | while (len > 0) { | |
985 | ret = iomap_apply(inode, pos, len, IOMAP_ZERO, | |
986 | ops, did_zero, iomap_zero_range_actor); | |
987 | if (ret <= 0) | |
988 | return ret; | |
989 | ||
990 | pos += ret; | |
991 | len -= ret; | |
992 | } | |
993 | ||
994 | return 0; | |
995 | } | |
996 | EXPORT_SYMBOL_GPL(iomap_zero_range); | |
997 | ||
998 | int | |
999 | iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero, | |
1000 | const struct iomap_ops *ops) | |
1001 | { | |
1002 | unsigned int blocksize = i_blocksize(inode); | |
1003 | unsigned int off = pos & (blocksize - 1); | |
1004 | ||
1005 | /* Block boundary? Nothing to do */ | |
1006 | if (!off) | |
1007 | return 0; | |
1008 | return iomap_zero_range(inode, pos, blocksize - off, did_zero, ops); | |
1009 | } | |
1010 | EXPORT_SYMBOL_GPL(iomap_truncate_page); | |
1011 | ||
1012 | static loff_t | |
1013 | iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length, | |
1014 | void *data, struct iomap *iomap) | |
1015 | { | |
1016 | struct page *page = data; | |
1017 | int ret; | |
1018 | ||
1019 | if (iomap->flags & IOMAP_F_BUFFER_HEAD) { | |
1020 | ret = __block_write_begin_int(page, pos, length, NULL, iomap); | |
1021 | if (ret) | |
1022 | return ret; | |
1023 | block_commit_write(page, 0, length); | |
1024 | } else { | |
1025 | WARN_ON_ONCE(!PageUptodate(page)); | |
1026 | iomap_page_create(inode, page); | |
1027 | set_page_dirty(page); | |
1028 | } | |
1029 | ||
1030 | return length; | |
1031 | } | |
1032 | ||
1033 | vm_fault_t iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops) | |
1034 | { | |
1035 | struct page *page = vmf->page; | |
1036 | struct inode *inode = file_inode(vmf->vma->vm_file); | |
1037 | unsigned long length; | |
1038 | loff_t offset, size; | |
1039 | ssize_t ret; | |
1040 | ||
1041 | lock_page(page); | |
1042 | size = i_size_read(inode); | |
1043 | if ((page->mapping != inode->i_mapping) || | |
1044 | (page_offset(page) > size)) { | |
1045 | /* We overload EFAULT to mean page got truncated */ | |
1046 | ret = -EFAULT; | |
1047 | goto out_unlock; | |
1048 | } | |
1049 | ||
1050 | /* page is wholly or partially inside EOF */ | |
1051 | if (((page->index + 1) << PAGE_SHIFT) > size) | |
1052 | length = offset_in_page(size); | |
1053 | else | |
1054 | length = PAGE_SIZE; | |
1055 | ||
1056 | offset = page_offset(page); | |
1057 | while (length > 0) { | |
1058 | ret = iomap_apply(inode, offset, length, | |
1059 | IOMAP_WRITE | IOMAP_FAULT, ops, page, | |
1060 | iomap_page_mkwrite_actor); | |
1061 | if (unlikely(ret <= 0)) | |
1062 | goto out_unlock; | |
1063 | offset += ret; | |
1064 | length -= ret; | |
1065 | } | |
1066 | ||
1067 | wait_for_stable_page(page); | |
1068 | return VM_FAULT_LOCKED; | |
1069 | out_unlock: | |
1070 | unlock_page(page); | |
1071 | return block_page_mkwrite_return(ret); | |
1072 | } | |
1073 | EXPORT_SYMBOL_GPL(iomap_page_mkwrite); |