Commit | Line | Data |
---|---|---|
dbbfca9f KO |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | #ifndef NO_BCACHEFS_FS | |
3 | ||
4 | #include "bcachefs.h" | |
5 | #include "alloc_foreground.h" | |
6 | #include "fs.h" | |
7 | #include "fs-io.h" | |
8 | #include "fs-io-direct.h" | |
9 | #include "fs-io-pagecache.h" | |
10 | #include "io.h" | |
11 | ||
12 | #include <linux/kthread.h> | |
13 | #include <linux/pagemap.h> | |
14 | #include <linux/task_io_accounting_ops.h> | |
15 | ||
16 | /* O_DIRECT reads */ | |
17 | ||
18 | struct dio_read { | |
19 | struct closure cl; | |
20 | struct kiocb *req; | |
21 | long ret; | |
22 | bool should_dirty; | |
23 | struct bch_read_bio rbio; | |
24 | }; | |
25 | ||
26 | static void bio_check_or_release(struct bio *bio, bool check_dirty) | |
27 | { | |
28 | if (check_dirty) { | |
29 | bio_check_pages_dirty(bio); | |
30 | } else { | |
31 | bio_release_pages(bio, false); | |
32 | bio_put(bio); | |
33 | } | |
34 | } | |
35 | ||
36 | static void bch2_dio_read_complete(struct closure *cl) | |
37 | { | |
38 | struct dio_read *dio = container_of(cl, struct dio_read, cl); | |
39 | ||
40 | dio->req->ki_complete(dio->req, dio->ret); | |
41 | bio_check_or_release(&dio->rbio.bio, dio->should_dirty); | |
42 | } | |
43 | ||
44 | static void bch2_direct_IO_read_endio(struct bio *bio) | |
45 | { | |
46 | struct dio_read *dio = bio->bi_private; | |
47 | ||
48 | if (bio->bi_status) | |
49 | dio->ret = blk_status_to_errno(bio->bi_status); | |
50 | ||
51 | closure_put(&dio->cl); | |
52 | } | |
53 | ||
54 | static void bch2_direct_IO_read_split_endio(struct bio *bio) | |
55 | { | |
56 | struct dio_read *dio = bio->bi_private; | |
57 | bool should_dirty = dio->should_dirty; | |
58 | ||
59 | bch2_direct_IO_read_endio(bio); | |
60 | bio_check_or_release(bio, should_dirty); | |
61 | } | |
62 | ||
63 | static int bch2_direct_IO_read(struct kiocb *req, struct iov_iter *iter) | |
64 | { | |
65 | struct file *file = req->ki_filp; | |
66 | struct bch_inode_info *inode = file_bch_inode(file); | |
67 | struct bch_fs *c = inode->v.i_sb->s_fs_info; | |
68 | struct bch_io_opts opts; | |
69 | struct dio_read *dio; | |
70 | struct bio *bio; | |
71 | loff_t offset = req->ki_pos; | |
72 | bool sync = is_sync_kiocb(req); | |
73 | size_t shorten; | |
74 | ssize_t ret; | |
75 | ||
76 | bch2_inode_opts_get(&opts, c, &inode->ei_inode); | |
77 | ||
78 | if ((offset|iter->count) & (block_bytes(c) - 1)) | |
79 | return -EINVAL; | |
80 | ||
81 | ret = min_t(loff_t, iter->count, | |
82 | max_t(loff_t, 0, i_size_read(&inode->v) - offset)); | |
83 | ||
84 | if (!ret) | |
85 | return ret; | |
86 | ||
87 | shorten = iov_iter_count(iter) - round_up(ret, block_bytes(c)); | |
88 | iter->count -= shorten; | |
89 | ||
90 | bio = bio_alloc_bioset(NULL, | |
91 | bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS), | |
92 | REQ_OP_READ, | |
93 | GFP_KERNEL, | |
94 | &c->dio_read_bioset); | |
95 | ||
96 | bio->bi_end_io = bch2_direct_IO_read_endio; | |
97 | ||
98 | dio = container_of(bio, struct dio_read, rbio.bio); | |
99 | closure_init(&dio->cl, NULL); | |
100 | ||
101 | /* | |
102 | * this is a _really_ horrible hack just to avoid an atomic sub at the | |
103 | * end: | |
104 | */ | |
105 | if (!sync) { | |
106 | set_closure_fn(&dio->cl, bch2_dio_read_complete, NULL); | |
107 | atomic_set(&dio->cl.remaining, | |
108 | CLOSURE_REMAINING_INITIALIZER - | |
109 | CLOSURE_RUNNING + | |
110 | CLOSURE_DESTRUCTOR); | |
111 | } else { | |
112 | atomic_set(&dio->cl.remaining, | |
113 | CLOSURE_REMAINING_INITIALIZER + 1); | |
114 | } | |
115 | ||
116 | dio->req = req; | |
117 | dio->ret = ret; | |
118 | /* | |
119 | * This is one of the sketchier things I've encountered: we have to skip | |
120 | * the dirtying of requests that are internal from the kernel (i.e. from | |
121 | * loopback), because we'll deadlock on page_lock. | |
122 | */ | |
123 | dio->should_dirty = iter_is_iovec(iter); | |
124 | ||
125 | goto start; | |
126 | while (iter->count) { | |
127 | bio = bio_alloc_bioset(NULL, | |
128 | bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS), | |
129 | REQ_OP_READ, | |
130 | GFP_KERNEL, | |
131 | &c->bio_read); | |
132 | bio->bi_end_io = bch2_direct_IO_read_split_endio; | |
133 | start: | |
134 | bio->bi_opf = REQ_OP_READ|REQ_SYNC; | |
135 | bio->bi_iter.bi_sector = offset >> 9; | |
136 | bio->bi_private = dio; | |
137 | ||
138 | ret = bio_iov_iter_get_pages(bio, iter); | |
139 | if (ret < 0) { | |
140 | /* XXX: fault inject this path */ | |
141 | bio->bi_status = BLK_STS_RESOURCE; | |
142 | bio_endio(bio); | |
143 | break; | |
144 | } | |
145 | ||
146 | offset += bio->bi_iter.bi_size; | |
147 | ||
148 | if (dio->should_dirty) | |
149 | bio_set_pages_dirty(bio); | |
150 | ||
151 | if (iter->count) | |
152 | closure_get(&dio->cl); | |
153 | ||
154 | bch2_read(c, rbio_init(bio, opts), inode_inum(inode)); | |
155 | } | |
156 | ||
157 | iter->count += shorten; | |
158 | ||
159 | if (sync) { | |
160 | closure_sync(&dio->cl); | |
161 | closure_debug_destroy(&dio->cl); | |
162 | ret = dio->ret; | |
163 | bio_check_or_release(&dio->rbio.bio, dio->should_dirty); | |
164 | return ret; | |
165 | } else { | |
166 | return -EIOCBQUEUED; | |
167 | } | |
168 | } | |
169 | ||
170 | ssize_t bch2_read_iter(struct kiocb *iocb, struct iov_iter *iter) | |
171 | { | |
172 | struct file *file = iocb->ki_filp; | |
173 | struct bch_inode_info *inode = file_bch_inode(file); | |
174 | struct address_space *mapping = file->f_mapping; | |
175 | size_t count = iov_iter_count(iter); | |
176 | ssize_t ret; | |
177 | ||
178 | if (!count) | |
179 | return 0; /* skip atime */ | |
180 | ||
181 | if (iocb->ki_flags & IOCB_DIRECT) { | |
182 | struct blk_plug plug; | |
183 | ||
184 | if (unlikely(mapping->nrpages)) { | |
185 | ret = filemap_write_and_wait_range(mapping, | |
186 | iocb->ki_pos, | |
187 | iocb->ki_pos + count - 1); | |
188 | if (ret < 0) | |
189 | goto out; | |
190 | } | |
191 | ||
192 | file_accessed(file); | |
193 | ||
194 | blk_start_plug(&plug); | |
195 | ret = bch2_direct_IO_read(iocb, iter); | |
196 | blk_finish_plug(&plug); | |
197 | ||
198 | if (ret >= 0) | |
199 | iocb->ki_pos += ret; | |
200 | } else { | |
201 | bch2_pagecache_add_get(inode); | |
202 | ret = generic_file_read_iter(iocb, iter); | |
203 | bch2_pagecache_add_put(inode); | |
204 | } | |
205 | out: | |
206 | return bch2_err_class(ret); | |
207 | } | |
208 | ||
209 | /* O_DIRECT writes */ | |
210 | ||
211 | struct dio_write { | |
212 | struct kiocb *req; | |
213 | struct address_space *mapping; | |
214 | struct bch_inode_info *inode; | |
215 | struct mm_struct *mm; | |
216 | unsigned loop:1, | |
217 | extending:1, | |
218 | sync:1, | |
219 | flush:1, | |
220 | free_iov:1; | |
221 | struct quota_res quota_res; | |
222 | u64 written; | |
223 | ||
224 | struct iov_iter iter; | |
225 | struct iovec inline_vecs[2]; | |
226 | ||
227 | /* must be last: */ | |
228 | struct bch_write_op op; | |
229 | }; | |
230 | ||
231 | static bool bch2_check_range_allocated(struct bch_fs *c, subvol_inum inum, | |
232 | u64 offset, u64 size, | |
233 | unsigned nr_replicas, bool compressed) | |
234 | { | |
235 | struct btree_trans trans; | |
236 | struct btree_iter iter; | |
237 | struct bkey_s_c k; | |
238 | u64 end = offset + size; | |
239 | u32 snapshot; | |
240 | bool ret = true; | |
241 | int err; | |
242 | ||
243 | bch2_trans_init(&trans, c, 0, 0); | |
244 | retry: | |
245 | bch2_trans_begin(&trans); | |
246 | ||
247 | err = bch2_subvolume_get_snapshot(&trans, inum.subvol, &snapshot); | |
248 | if (err) | |
249 | goto err; | |
250 | ||
251 | for_each_btree_key_norestart(&trans, iter, BTREE_ID_extents, | |
252 | SPOS(inum.inum, offset, snapshot), | |
253 | BTREE_ITER_SLOTS, k, err) { | |
254 | if (bkey_ge(bkey_start_pos(k.k), POS(inum.inum, end))) | |
255 | break; | |
256 | ||
257 | if (k.k->p.snapshot != snapshot || | |
258 | nr_replicas > bch2_bkey_replicas(c, k) || | |
259 | (!compressed && bch2_bkey_sectors_compressed(k))) { | |
260 | ret = false; | |
261 | break; | |
262 | } | |
263 | } | |
264 | ||
265 | offset = iter.pos.offset; | |
266 | bch2_trans_iter_exit(&trans, &iter); | |
267 | err: | |
268 | if (bch2_err_matches(err, BCH_ERR_transaction_restart)) | |
269 | goto retry; | |
270 | bch2_trans_exit(&trans); | |
271 | ||
272 | return err ? false : ret; | |
273 | } | |
274 | ||
275 | static noinline bool bch2_dio_write_check_allocated(struct dio_write *dio) | |
276 | { | |
277 | struct bch_fs *c = dio->op.c; | |
278 | struct bch_inode_info *inode = dio->inode; | |
279 | struct bio *bio = &dio->op.wbio.bio; | |
280 | ||
281 | return bch2_check_range_allocated(c, inode_inum(inode), | |
282 | dio->op.pos.offset, bio_sectors(bio), | |
283 | dio->op.opts.data_replicas, | |
284 | dio->op.opts.compression != 0); | |
285 | } | |
286 | ||
287 | static void bch2_dio_write_loop_async(struct bch_write_op *); | |
288 | static __always_inline long bch2_dio_write_done(struct dio_write *dio); | |
289 | ||
290 | /* | |
291 | * We're going to return -EIOCBQUEUED, but we haven't finished consuming the | |
292 | * iov_iter yet, so we need to stash a copy of the iovec: it might be on the | |
293 | * caller's stack, we're not guaranteed that it will live for the duration of | |
294 | * the IO: | |
295 | */ | |
296 | static noinline int bch2_dio_write_copy_iov(struct dio_write *dio) | |
297 | { | |
298 | struct iovec *iov = dio->inline_vecs; | |
299 | ||
300 | /* | |
301 | * iov_iter has a single embedded iovec - nothing to do: | |
302 | */ | |
303 | if (iter_is_ubuf(&dio->iter)) | |
304 | return 0; | |
305 | ||
306 | /* | |
307 | * We don't currently handle non-iovec iov_iters here - return an error, | |
308 | * and we'll fall back to doing the IO synchronously: | |
309 | */ | |
310 | if (!iter_is_iovec(&dio->iter)) | |
311 | return -1; | |
312 | ||
313 | if (dio->iter.nr_segs > ARRAY_SIZE(dio->inline_vecs)) { | |
314 | iov = kmalloc_array(dio->iter.nr_segs, sizeof(*iov), | |
315 | GFP_KERNEL); | |
316 | if (unlikely(!iov)) | |
317 | return -ENOMEM; | |
318 | ||
319 | dio->free_iov = true; | |
320 | } | |
321 | ||
322 | memcpy(iov, dio->iter.__iov, dio->iter.nr_segs * sizeof(*iov)); | |
323 | dio->iter.__iov = iov; | |
324 | return 0; | |
325 | } | |
326 | ||
327 | static void bch2_dio_write_flush_done(struct closure *cl) | |
328 | { | |
329 | struct dio_write *dio = container_of(cl, struct dio_write, op.cl); | |
330 | struct bch_fs *c = dio->op.c; | |
331 | ||
332 | closure_debug_destroy(cl); | |
333 | ||
334 | dio->op.error = bch2_journal_error(&c->journal); | |
335 | ||
336 | bch2_dio_write_done(dio); | |
337 | } | |
338 | ||
339 | static noinline void bch2_dio_write_flush(struct dio_write *dio) | |
340 | { | |
341 | struct bch_fs *c = dio->op.c; | |
342 | struct bch_inode_unpacked inode; | |
343 | int ret; | |
344 | ||
345 | dio->flush = 0; | |
346 | ||
347 | closure_init(&dio->op.cl, NULL); | |
348 | ||
349 | if (!dio->op.error) { | |
350 | ret = bch2_inode_find_by_inum(c, inode_inum(dio->inode), &inode); | |
351 | if (ret) { | |
352 | dio->op.error = ret; | |
353 | } else { | |
354 | bch2_journal_flush_seq_async(&c->journal, inode.bi_journal_seq, | |
355 | &dio->op.cl); | |
356 | bch2_inode_flush_nocow_writes_async(c, dio->inode, &dio->op.cl); | |
357 | } | |
358 | } | |
359 | ||
360 | if (dio->sync) { | |
361 | closure_sync(&dio->op.cl); | |
362 | closure_debug_destroy(&dio->op.cl); | |
363 | } else { | |
364 | continue_at(&dio->op.cl, bch2_dio_write_flush_done, NULL); | |
365 | } | |
366 | } | |
367 | ||
368 | static __always_inline long bch2_dio_write_done(struct dio_write *dio) | |
369 | { | |
370 | struct kiocb *req = dio->req; | |
371 | struct bch_inode_info *inode = dio->inode; | |
372 | bool sync = dio->sync; | |
373 | long ret; | |
374 | ||
375 | if (unlikely(dio->flush)) { | |
376 | bch2_dio_write_flush(dio); | |
377 | if (!sync) | |
378 | return -EIOCBQUEUED; | |
379 | } | |
380 | ||
381 | bch2_pagecache_block_put(inode); | |
382 | ||
383 | if (dio->free_iov) | |
384 | kfree(dio->iter.__iov); | |
385 | ||
386 | ret = dio->op.error ?: ((long) dio->written << 9); | |
387 | bio_put(&dio->op.wbio.bio); | |
388 | ||
389 | /* inode->i_dio_count is our ref on inode and thus bch_fs */ | |
390 | inode_dio_end(&inode->v); | |
391 | ||
392 | if (ret < 0) | |
393 | ret = bch2_err_class(ret); | |
394 | ||
395 | if (!sync) { | |
396 | req->ki_complete(req, ret); | |
397 | ret = -EIOCBQUEUED; | |
398 | } | |
399 | return ret; | |
400 | } | |
401 | ||
402 | static __always_inline void bch2_dio_write_end(struct dio_write *dio) | |
403 | { | |
404 | struct bch_fs *c = dio->op.c; | |
405 | struct kiocb *req = dio->req; | |
406 | struct bch_inode_info *inode = dio->inode; | |
407 | struct bio *bio = &dio->op.wbio.bio; | |
408 | ||
409 | req->ki_pos += (u64) dio->op.written << 9; | |
410 | dio->written += dio->op.written; | |
411 | ||
412 | if (dio->extending) { | |
413 | spin_lock(&inode->v.i_lock); | |
414 | if (req->ki_pos > inode->v.i_size) | |
415 | i_size_write(&inode->v, req->ki_pos); | |
416 | spin_unlock(&inode->v.i_lock); | |
417 | } | |
418 | ||
419 | if (dio->op.i_sectors_delta || dio->quota_res.sectors) { | |
420 | mutex_lock(&inode->ei_quota_lock); | |
421 | __bch2_i_sectors_acct(c, inode, &dio->quota_res, dio->op.i_sectors_delta); | |
422 | __bch2_quota_reservation_put(c, inode, &dio->quota_res); | |
423 | mutex_unlock(&inode->ei_quota_lock); | |
424 | } | |
425 | ||
426 | bio_release_pages(bio, false); | |
427 | ||
428 | if (unlikely(dio->op.error)) | |
429 | set_bit(EI_INODE_ERROR, &inode->ei_flags); | |
430 | } | |
431 | ||
432 | static __always_inline long bch2_dio_write_loop(struct dio_write *dio) | |
433 | { | |
434 | struct bch_fs *c = dio->op.c; | |
435 | struct kiocb *req = dio->req; | |
436 | struct address_space *mapping = dio->mapping; | |
437 | struct bch_inode_info *inode = dio->inode; | |
438 | struct bch_io_opts opts; | |
439 | struct bio *bio = &dio->op.wbio.bio; | |
440 | unsigned unaligned, iter_count; | |
441 | bool sync = dio->sync, dropped_locks; | |
442 | long ret; | |
443 | ||
444 | bch2_inode_opts_get(&opts, c, &inode->ei_inode); | |
445 | ||
446 | while (1) { | |
447 | iter_count = dio->iter.count; | |
448 | ||
449 | EBUG_ON(current->faults_disabled_mapping); | |
450 | current->faults_disabled_mapping = mapping; | |
451 | ||
452 | ret = bio_iov_iter_get_pages(bio, &dio->iter); | |
453 | ||
454 | dropped_locks = fdm_dropped_locks(); | |
455 | ||
456 | current->faults_disabled_mapping = NULL; | |
457 | ||
458 | /* | |
459 | * If the fault handler returned an error but also signalled | |
460 | * that it dropped & retook ei_pagecache_lock, we just need to | |
461 | * re-shoot down the page cache and retry: | |
462 | */ | |
463 | if (dropped_locks && ret) | |
464 | ret = 0; | |
465 | ||
466 | if (unlikely(ret < 0)) | |
467 | goto err; | |
468 | ||
469 | if (unlikely(dropped_locks)) { | |
470 | ret = bch2_write_invalidate_inode_pages_range(mapping, | |
471 | req->ki_pos, | |
472 | req->ki_pos + iter_count - 1); | |
473 | if (unlikely(ret)) | |
474 | goto err; | |
475 | ||
476 | if (!bio->bi_iter.bi_size) | |
477 | continue; | |
478 | } | |
479 | ||
480 | unaligned = bio->bi_iter.bi_size & (block_bytes(c) - 1); | |
481 | bio->bi_iter.bi_size -= unaligned; | |
482 | iov_iter_revert(&dio->iter, unaligned); | |
483 | ||
484 | if (!bio->bi_iter.bi_size) { | |
485 | /* | |
486 | * bio_iov_iter_get_pages was only able to get < | |
487 | * blocksize worth of pages: | |
488 | */ | |
489 | ret = -EFAULT; | |
490 | goto err; | |
491 | } | |
492 | ||
493 | bch2_write_op_init(&dio->op, c, opts); | |
494 | dio->op.end_io = sync | |
495 | ? NULL | |
496 | : bch2_dio_write_loop_async; | |
497 | dio->op.target = dio->op.opts.foreground_target; | |
498 | dio->op.write_point = writepoint_hashed((unsigned long) current); | |
499 | dio->op.nr_replicas = dio->op.opts.data_replicas; | |
500 | dio->op.subvol = inode->ei_subvol; | |
501 | dio->op.pos = POS(inode->v.i_ino, (u64) req->ki_pos >> 9); | |
502 | dio->op.devs_need_flush = &inode->ei_devs_need_flush; | |
503 | ||
504 | if (sync) | |
505 | dio->op.flags |= BCH_WRITE_SYNC; | |
506 | dio->op.flags |= BCH_WRITE_CHECK_ENOSPC; | |
507 | ||
508 | ret = bch2_quota_reservation_add(c, inode, &dio->quota_res, | |
509 | bio_sectors(bio), true); | |
510 | if (unlikely(ret)) | |
511 | goto err; | |
512 | ||
513 | ret = bch2_disk_reservation_get(c, &dio->op.res, bio_sectors(bio), | |
514 | dio->op.opts.data_replicas, 0); | |
515 | if (unlikely(ret) && | |
516 | !bch2_dio_write_check_allocated(dio)) | |
517 | goto err; | |
518 | ||
519 | task_io_account_write(bio->bi_iter.bi_size); | |
520 | ||
521 | if (unlikely(dio->iter.count) && | |
522 | !dio->sync && | |
523 | !dio->loop && | |
524 | bch2_dio_write_copy_iov(dio)) | |
525 | dio->sync = sync = true; | |
526 | ||
527 | dio->loop = true; | |
528 | closure_call(&dio->op.cl, bch2_write, NULL, NULL); | |
529 | ||
530 | if (!sync) | |
531 | return -EIOCBQUEUED; | |
532 | ||
533 | bch2_dio_write_end(dio); | |
534 | ||
535 | if (likely(!dio->iter.count) || dio->op.error) | |
536 | break; | |
537 | ||
538 | bio_reset(bio, NULL, REQ_OP_WRITE); | |
539 | } | |
540 | out: | |
541 | return bch2_dio_write_done(dio); | |
542 | err: | |
543 | dio->op.error = ret; | |
544 | ||
545 | bio_release_pages(bio, false); | |
546 | ||
547 | bch2_quota_reservation_put(c, inode, &dio->quota_res); | |
548 | goto out; | |
549 | } | |
550 | ||
551 | static noinline __cold void bch2_dio_write_continue(struct dio_write *dio) | |
552 | { | |
553 | struct mm_struct *mm = dio->mm; | |
554 | ||
555 | bio_reset(&dio->op.wbio.bio, NULL, REQ_OP_WRITE); | |
556 | ||
557 | if (mm) | |
558 | kthread_use_mm(mm); | |
559 | bch2_dio_write_loop(dio); | |
560 | if (mm) | |
561 | kthread_unuse_mm(mm); | |
562 | } | |
563 | ||
564 | static void bch2_dio_write_loop_async(struct bch_write_op *op) | |
565 | { | |
566 | struct dio_write *dio = container_of(op, struct dio_write, op); | |
567 | ||
568 | bch2_dio_write_end(dio); | |
569 | ||
570 | if (likely(!dio->iter.count) || dio->op.error) | |
571 | bch2_dio_write_done(dio); | |
572 | else | |
573 | bch2_dio_write_continue(dio); | |
574 | } | |
575 | ||
576 | ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter) | |
577 | { | |
578 | struct file *file = req->ki_filp; | |
579 | struct address_space *mapping = file->f_mapping; | |
580 | struct bch_inode_info *inode = file_bch_inode(file); | |
581 | struct bch_fs *c = inode->v.i_sb->s_fs_info; | |
582 | struct dio_write *dio; | |
583 | struct bio *bio; | |
584 | bool locked = true, extending; | |
585 | ssize_t ret; | |
586 | ||
587 | prefetch(&c->opts); | |
588 | prefetch((void *) &c->opts + 64); | |
589 | prefetch(&inode->ei_inode); | |
590 | prefetch((void *) &inode->ei_inode + 64); | |
591 | ||
592 | inode_lock(&inode->v); | |
593 | ||
594 | ret = generic_write_checks(req, iter); | |
595 | if (unlikely(ret <= 0)) | |
596 | goto err; | |
597 | ||
598 | ret = file_remove_privs(file); | |
599 | if (unlikely(ret)) | |
600 | goto err; | |
601 | ||
602 | ret = file_update_time(file); | |
603 | if (unlikely(ret)) | |
604 | goto err; | |
605 | ||
606 | if (unlikely((req->ki_pos|iter->count) & (block_bytes(c) - 1))) | |
607 | goto err; | |
608 | ||
609 | inode_dio_begin(&inode->v); | |
610 | bch2_pagecache_block_get(inode); | |
611 | ||
612 | extending = req->ki_pos + iter->count > inode->v.i_size; | |
613 | if (!extending) { | |
614 | inode_unlock(&inode->v); | |
615 | locked = false; | |
616 | } | |
617 | ||
618 | bio = bio_alloc_bioset(NULL, | |
619 | bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS), | |
620 | REQ_OP_WRITE, | |
621 | GFP_KERNEL, | |
622 | &c->dio_write_bioset); | |
623 | dio = container_of(bio, struct dio_write, op.wbio.bio); | |
624 | dio->req = req; | |
625 | dio->mapping = mapping; | |
626 | dio->inode = inode; | |
627 | dio->mm = current->mm; | |
628 | dio->loop = false; | |
629 | dio->extending = extending; | |
630 | dio->sync = is_sync_kiocb(req) || extending; | |
631 | dio->flush = iocb_is_dsync(req) && !c->opts.journal_flush_disabled; | |
632 | dio->free_iov = false; | |
633 | dio->quota_res.sectors = 0; | |
634 | dio->written = 0; | |
635 | dio->iter = *iter; | |
636 | dio->op.c = c; | |
637 | ||
638 | if (unlikely(mapping->nrpages)) { | |
639 | ret = bch2_write_invalidate_inode_pages_range(mapping, | |
640 | req->ki_pos, | |
641 | req->ki_pos + iter->count - 1); | |
642 | if (unlikely(ret)) | |
643 | goto err_put_bio; | |
644 | } | |
645 | ||
646 | ret = bch2_dio_write_loop(dio); | |
647 | err: | |
648 | if (locked) | |
649 | inode_unlock(&inode->v); | |
650 | return ret; | |
651 | err_put_bio: | |
652 | bch2_pagecache_block_put(inode); | |
653 | bio_put(bio); | |
654 | inode_dio_end(&inode->v); | |
655 | goto err; | |
656 | } | |
657 | ||
658 | void bch2_fs_fs_io_direct_exit(struct bch_fs *c) | |
659 | { | |
660 | bioset_exit(&c->dio_write_bioset); | |
661 | bioset_exit(&c->dio_read_bioset); | |
662 | } | |
663 | ||
664 | int bch2_fs_fs_io_direct_init(struct bch_fs *c) | |
665 | { | |
666 | if (bioset_init(&c->dio_read_bioset, | |
667 | 4, offsetof(struct dio_read, rbio.bio), | |
668 | BIOSET_NEED_BVECS)) | |
669 | return -BCH_ERR_ENOMEM_dio_read_bioset_init; | |
670 | ||
671 | if (bioset_init(&c->dio_write_bioset, | |
672 | 4, offsetof(struct dio_write, op.wbio.bio), | |
673 | BIOSET_NEED_BVECS)) | |
674 | return -BCH_ERR_ENOMEM_dio_write_bioset_init; | |
675 | ||
676 | return 0; | |
677 | } | |
678 | ||
679 | #endif /* NO_BCACHEFS_FS */ |