Commit | Line | Data |
---|---|---|
dbbfca9f KO |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | #ifndef NO_BCACHEFS_FS | |
3 | ||
4 | #include "bcachefs.h" | |
5 | #include "alloc_foreground.h" | |
6 | #include "fs.h" | |
7 | #include "fs-io.h" | |
8 | #include "fs-io-direct.h" | |
9 | #include "fs-io-pagecache.h" | |
1809b8cb KO |
10 | #include "io_read.h" |
11 | #include "io_write.h" | |
dbbfca9f KO |
12 | |
13 | #include <linux/kthread.h> | |
14 | #include <linux/pagemap.h> | |
7bba0dc6 | 15 | #include <linux/prefetch.h> |
dbbfca9f KO |
16 | #include <linux/task_io_accounting_ops.h> |
17 | ||
18 | /* O_DIRECT reads */ | |
19 | ||
20 | struct dio_read { | |
21 | struct closure cl; | |
22 | struct kiocb *req; | |
23 | long ret; | |
24 | bool should_dirty; | |
25 | struct bch_read_bio rbio; | |
26 | }; | |
27 | ||
28 | static void bio_check_or_release(struct bio *bio, bool check_dirty) | |
29 | { | |
30 | if (check_dirty) { | |
31 | bio_check_pages_dirty(bio); | |
32 | } else { | |
33 | bio_release_pages(bio, false); | |
34 | bio_put(bio); | |
35 | } | |
36 | } | |
37 | ||
38 | static void bch2_dio_read_complete(struct closure *cl) | |
39 | { | |
40 | struct dio_read *dio = container_of(cl, struct dio_read, cl); | |
41 | ||
42 | dio->req->ki_complete(dio->req, dio->ret); | |
43 | bio_check_or_release(&dio->rbio.bio, dio->should_dirty); | |
44 | } | |
45 | ||
46 | static void bch2_direct_IO_read_endio(struct bio *bio) | |
47 | { | |
48 | struct dio_read *dio = bio->bi_private; | |
49 | ||
50 | if (bio->bi_status) | |
51 | dio->ret = blk_status_to_errno(bio->bi_status); | |
52 | ||
53 | closure_put(&dio->cl); | |
54 | } | |
55 | ||
56 | static void bch2_direct_IO_read_split_endio(struct bio *bio) | |
57 | { | |
58 | struct dio_read *dio = bio->bi_private; | |
59 | bool should_dirty = dio->should_dirty; | |
60 | ||
61 | bch2_direct_IO_read_endio(bio); | |
62 | bio_check_or_release(bio, should_dirty); | |
63 | } | |
64 | ||
65 | static int bch2_direct_IO_read(struct kiocb *req, struct iov_iter *iter) | |
66 | { | |
67 | struct file *file = req->ki_filp; | |
68 | struct bch_inode_info *inode = file_bch_inode(file); | |
69 | struct bch_fs *c = inode->v.i_sb->s_fs_info; | |
70 | struct bch_io_opts opts; | |
71 | struct dio_read *dio; | |
72 | struct bio *bio; | |
73 | loff_t offset = req->ki_pos; | |
74 | bool sync = is_sync_kiocb(req); | |
75 | size_t shorten; | |
76 | ssize_t ret; | |
77 | ||
78 | bch2_inode_opts_get(&opts, c, &inode->ei_inode); | |
79 | ||
80 | if ((offset|iter->count) & (block_bytes(c) - 1)) | |
81 | return -EINVAL; | |
82 | ||
83 | ret = min_t(loff_t, iter->count, | |
84 | max_t(loff_t, 0, i_size_read(&inode->v) - offset)); | |
85 | ||
86 | if (!ret) | |
87 | return ret; | |
88 | ||
89 | shorten = iov_iter_count(iter) - round_up(ret, block_bytes(c)); | |
90 | iter->count -= shorten; | |
91 | ||
92 | bio = bio_alloc_bioset(NULL, | |
93 | bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS), | |
94 | REQ_OP_READ, | |
95 | GFP_KERNEL, | |
96 | &c->dio_read_bioset); | |
97 | ||
98 | bio->bi_end_io = bch2_direct_IO_read_endio; | |
99 | ||
100 | dio = container_of(bio, struct dio_read, rbio.bio); | |
101 | closure_init(&dio->cl, NULL); | |
102 | ||
103 | /* | |
104 | * this is a _really_ horrible hack just to avoid an atomic sub at the | |
105 | * end: | |
106 | */ | |
107 | if (!sync) { | |
108 | set_closure_fn(&dio->cl, bch2_dio_read_complete, NULL); | |
109 | atomic_set(&dio->cl.remaining, | |
110 | CLOSURE_REMAINING_INITIALIZER - | |
111 | CLOSURE_RUNNING + | |
112 | CLOSURE_DESTRUCTOR); | |
113 | } else { | |
114 | atomic_set(&dio->cl.remaining, | |
115 | CLOSURE_REMAINING_INITIALIZER + 1); | |
ee526b88 | 116 | dio->cl.closure_get_happened = true; |
dbbfca9f KO |
117 | } |
118 | ||
119 | dio->req = req; | |
120 | dio->ret = ret; | |
121 | /* | |
122 | * This is one of the sketchier things I've encountered: we have to skip | |
123 | * the dirtying of requests that are internal from the kernel (i.e. from | |
124 | * loopback), because we'll deadlock on page_lock. | |
125 | */ | |
126 | dio->should_dirty = iter_is_iovec(iter); | |
127 | ||
128 | goto start; | |
129 | while (iter->count) { | |
130 | bio = bio_alloc_bioset(NULL, | |
131 | bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS), | |
132 | REQ_OP_READ, | |
133 | GFP_KERNEL, | |
134 | &c->bio_read); | |
135 | bio->bi_end_io = bch2_direct_IO_read_split_endio; | |
136 | start: | |
137 | bio->bi_opf = REQ_OP_READ|REQ_SYNC; | |
138 | bio->bi_iter.bi_sector = offset >> 9; | |
139 | bio->bi_private = dio; | |
140 | ||
141 | ret = bio_iov_iter_get_pages(bio, iter); | |
142 | if (ret < 0) { | |
143 | /* XXX: fault inject this path */ | |
144 | bio->bi_status = BLK_STS_RESOURCE; | |
145 | bio_endio(bio); | |
146 | break; | |
147 | } | |
148 | ||
149 | offset += bio->bi_iter.bi_size; | |
150 | ||
151 | if (dio->should_dirty) | |
152 | bio_set_pages_dirty(bio); | |
153 | ||
154 | if (iter->count) | |
155 | closure_get(&dio->cl); | |
156 | ||
157 | bch2_read(c, rbio_init(bio, opts), inode_inum(inode)); | |
158 | } | |
159 | ||
160 | iter->count += shorten; | |
161 | ||
162 | if (sync) { | |
163 | closure_sync(&dio->cl); | |
164 | closure_debug_destroy(&dio->cl); | |
165 | ret = dio->ret; | |
166 | bio_check_or_release(&dio->rbio.bio, dio->should_dirty); | |
167 | return ret; | |
168 | } else { | |
169 | return -EIOCBQUEUED; | |
170 | } | |
171 | } | |
172 | ||
173 | ssize_t bch2_read_iter(struct kiocb *iocb, struct iov_iter *iter) | |
174 | { | |
175 | struct file *file = iocb->ki_filp; | |
176 | struct bch_inode_info *inode = file_bch_inode(file); | |
177 | struct address_space *mapping = file->f_mapping; | |
178 | size_t count = iov_iter_count(iter); | |
179 | ssize_t ret; | |
180 | ||
181 | if (!count) | |
182 | return 0; /* skip atime */ | |
183 | ||
184 | if (iocb->ki_flags & IOCB_DIRECT) { | |
185 | struct blk_plug plug; | |
186 | ||
187 | if (unlikely(mapping->nrpages)) { | |
188 | ret = filemap_write_and_wait_range(mapping, | |
189 | iocb->ki_pos, | |
190 | iocb->ki_pos + count - 1); | |
191 | if (ret < 0) | |
192 | goto out; | |
193 | } | |
194 | ||
195 | file_accessed(file); | |
196 | ||
197 | blk_start_plug(&plug); | |
198 | ret = bch2_direct_IO_read(iocb, iter); | |
199 | blk_finish_plug(&plug); | |
200 | ||
201 | if (ret >= 0) | |
202 | iocb->ki_pos += ret; | |
203 | } else { | |
204 | bch2_pagecache_add_get(inode); | |
205 | ret = generic_file_read_iter(iocb, iter); | |
206 | bch2_pagecache_add_put(inode); | |
207 | } | |
208 | out: | |
209 | return bch2_err_class(ret); | |
210 | } | |
211 | ||
212 | /* O_DIRECT writes */ | |
213 | ||
214 | struct dio_write { | |
215 | struct kiocb *req; | |
216 | struct address_space *mapping; | |
217 | struct bch_inode_info *inode; | |
218 | struct mm_struct *mm; | |
219 | unsigned loop:1, | |
220 | extending:1, | |
221 | sync:1, | |
222 | flush:1, | |
223 | free_iov:1; | |
224 | struct quota_res quota_res; | |
225 | u64 written; | |
226 | ||
227 | struct iov_iter iter; | |
228 | struct iovec inline_vecs[2]; | |
229 | ||
230 | /* must be last: */ | |
231 | struct bch_write_op op; | |
232 | }; | |
233 | ||
234 | static bool bch2_check_range_allocated(struct bch_fs *c, subvol_inum inum, | |
235 | u64 offset, u64 size, | |
236 | unsigned nr_replicas, bool compressed) | |
237 | { | |
6bd68ec2 | 238 | struct btree_trans *trans = bch2_trans_get(c); |
dbbfca9f KO |
239 | struct btree_iter iter; |
240 | struct bkey_s_c k; | |
241 | u64 end = offset + size; | |
242 | u32 snapshot; | |
243 | bool ret = true; | |
244 | int err; | |
dbbfca9f | 245 | retry: |
6bd68ec2 | 246 | bch2_trans_begin(trans); |
dbbfca9f | 247 | |
6bd68ec2 | 248 | err = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); |
dbbfca9f KO |
249 | if (err) |
250 | goto err; | |
251 | ||
6bd68ec2 | 252 | for_each_btree_key_norestart(trans, iter, BTREE_ID_extents, |
dbbfca9f KO |
253 | SPOS(inum.inum, offset, snapshot), |
254 | BTREE_ITER_SLOTS, k, err) { | |
255 | if (bkey_ge(bkey_start_pos(k.k), POS(inum.inum, end))) | |
256 | break; | |
257 | ||
258 | if (k.k->p.snapshot != snapshot || | |
259 | nr_replicas > bch2_bkey_replicas(c, k) || | |
260 | (!compressed && bch2_bkey_sectors_compressed(k))) { | |
261 | ret = false; | |
262 | break; | |
263 | } | |
264 | } | |
265 | ||
266 | offset = iter.pos.offset; | |
6bd68ec2 | 267 | bch2_trans_iter_exit(trans, &iter); |
dbbfca9f KO |
268 | err: |
269 | if (bch2_err_matches(err, BCH_ERR_transaction_restart)) | |
270 | goto retry; | |
6bd68ec2 | 271 | bch2_trans_put(trans); |
dbbfca9f KO |
272 | |
273 | return err ? false : ret; | |
274 | } | |
275 | ||
276 | static noinline bool bch2_dio_write_check_allocated(struct dio_write *dio) | |
277 | { | |
278 | struct bch_fs *c = dio->op.c; | |
279 | struct bch_inode_info *inode = dio->inode; | |
280 | struct bio *bio = &dio->op.wbio.bio; | |
281 | ||
282 | return bch2_check_range_allocated(c, inode_inum(inode), | |
283 | dio->op.pos.offset, bio_sectors(bio), | |
284 | dio->op.opts.data_replicas, | |
285 | dio->op.opts.compression != 0); | |
286 | } | |
287 | ||
288 | static void bch2_dio_write_loop_async(struct bch_write_op *); | |
289 | static __always_inline long bch2_dio_write_done(struct dio_write *dio); | |
290 | ||
291 | /* | |
292 | * We're going to return -EIOCBQUEUED, but we haven't finished consuming the | |
293 | * iov_iter yet, so we need to stash a copy of the iovec: it might be on the | |
294 | * caller's stack, we're not guaranteed that it will live for the duration of | |
295 | * the IO: | |
296 | */ | |
297 | static noinline int bch2_dio_write_copy_iov(struct dio_write *dio) | |
298 | { | |
299 | struct iovec *iov = dio->inline_vecs; | |
300 | ||
301 | /* | |
302 | * iov_iter has a single embedded iovec - nothing to do: | |
303 | */ | |
304 | if (iter_is_ubuf(&dio->iter)) | |
305 | return 0; | |
306 | ||
307 | /* | |
308 | * We don't currently handle non-iovec iov_iters here - return an error, | |
309 | * and we'll fall back to doing the IO synchronously: | |
310 | */ | |
311 | if (!iter_is_iovec(&dio->iter)) | |
312 | return -1; | |
313 | ||
314 | if (dio->iter.nr_segs > ARRAY_SIZE(dio->inline_vecs)) { | |
315 | iov = kmalloc_array(dio->iter.nr_segs, sizeof(*iov), | |
316 | GFP_KERNEL); | |
317 | if (unlikely(!iov)) | |
318 | return -ENOMEM; | |
319 | ||
320 | dio->free_iov = true; | |
321 | } | |
322 | ||
323 | memcpy(iov, dio->iter.__iov, dio->iter.nr_segs * sizeof(*iov)); | |
324 | dio->iter.__iov = iov; | |
325 | return 0; | |
326 | } | |
327 | ||
328 | static void bch2_dio_write_flush_done(struct closure *cl) | |
329 | { | |
330 | struct dio_write *dio = container_of(cl, struct dio_write, op.cl); | |
331 | struct bch_fs *c = dio->op.c; | |
332 | ||
333 | closure_debug_destroy(cl); | |
334 | ||
335 | dio->op.error = bch2_journal_error(&c->journal); | |
336 | ||
337 | bch2_dio_write_done(dio); | |
338 | } | |
339 | ||
340 | static noinline void bch2_dio_write_flush(struct dio_write *dio) | |
341 | { | |
342 | struct bch_fs *c = dio->op.c; | |
343 | struct bch_inode_unpacked inode; | |
344 | int ret; | |
345 | ||
346 | dio->flush = 0; | |
347 | ||
348 | closure_init(&dio->op.cl, NULL); | |
349 | ||
350 | if (!dio->op.error) { | |
351 | ret = bch2_inode_find_by_inum(c, inode_inum(dio->inode), &inode); | |
352 | if (ret) { | |
353 | dio->op.error = ret; | |
354 | } else { | |
355 | bch2_journal_flush_seq_async(&c->journal, inode.bi_journal_seq, | |
356 | &dio->op.cl); | |
357 | bch2_inode_flush_nocow_writes_async(c, dio->inode, &dio->op.cl); | |
358 | } | |
359 | } | |
360 | ||
361 | if (dio->sync) { | |
362 | closure_sync(&dio->op.cl); | |
363 | closure_debug_destroy(&dio->op.cl); | |
364 | } else { | |
365 | continue_at(&dio->op.cl, bch2_dio_write_flush_done, NULL); | |
366 | } | |
367 | } | |
368 | ||
369 | static __always_inline long bch2_dio_write_done(struct dio_write *dio) | |
370 | { | |
371 | struct kiocb *req = dio->req; | |
372 | struct bch_inode_info *inode = dio->inode; | |
373 | bool sync = dio->sync; | |
374 | long ret; | |
375 | ||
376 | if (unlikely(dio->flush)) { | |
377 | bch2_dio_write_flush(dio); | |
378 | if (!sync) | |
379 | return -EIOCBQUEUED; | |
380 | } | |
381 | ||
382 | bch2_pagecache_block_put(inode); | |
383 | ||
384 | if (dio->free_iov) | |
385 | kfree(dio->iter.__iov); | |
386 | ||
387 | ret = dio->op.error ?: ((long) dio->written << 9); | |
388 | bio_put(&dio->op.wbio.bio); | |
389 | ||
390 | /* inode->i_dio_count is our ref on inode and thus bch_fs */ | |
391 | inode_dio_end(&inode->v); | |
392 | ||
393 | if (ret < 0) | |
394 | ret = bch2_err_class(ret); | |
395 | ||
396 | if (!sync) { | |
397 | req->ki_complete(req, ret); | |
398 | ret = -EIOCBQUEUED; | |
399 | } | |
400 | return ret; | |
401 | } | |
402 | ||
403 | static __always_inline void bch2_dio_write_end(struct dio_write *dio) | |
404 | { | |
405 | struct bch_fs *c = dio->op.c; | |
406 | struct kiocb *req = dio->req; | |
407 | struct bch_inode_info *inode = dio->inode; | |
408 | struct bio *bio = &dio->op.wbio.bio; | |
409 | ||
410 | req->ki_pos += (u64) dio->op.written << 9; | |
411 | dio->written += dio->op.written; | |
412 | ||
413 | if (dio->extending) { | |
414 | spin_lock(&inode->v.i_lock); | |
415 | if (req->ki_pos > inode->v.i_size) | |
416 | i_size_write(&inode->v, req->ki_pos); | |
417 | spin_unlock(&inode->v.i_lock); | |
418 | } | |
419 | ||
420 | if (dio->op.i_sectors_delta || dio->quota_res.sectors) { | |
421 | mutex_lock(&inode->ei_quota_lock); | |
422 | __bch2_i_sectors_acct(c, inode, &dio->quota_res, dio->op.i_sectors_delta); | |
423 | __bch2_quota_reservation_put(c, inode, &dio->quota_res); | |
424 | mutex_unlock(&inode->ei_quota_lock); | |
425 | } | |
426 | ||
427 | bio_release_pages(bio, false); | |
428 | ||
429 | if (unlikely(dio->op.error)) | |
430 | set_bit(EI_INODE_ERROR, &inode->ei_flags); | |
431 | } | |
432 | ||
433 | static __always_inline long bch2_dio_write_loop(struct dio_write *dio) | |
434 | { | |
435 | struct bch_fs *c = dio->op.c; | |
436 | struct kiocb *req = dio->req; | |
437 | struct address_space *mapping = dio->mapping; | |
438 | struct bch_inode_info *inode = dio->inode; | |
439 | struct bch_io_opts opts; | |
440 | struct bio *bio = &dio->op.wbio.bio; | |
441 | unsigned unaligned, iter_count; | |
442 | bool sync = dio->sync, dropped_locks; | |
443 | long ret; | |
444 | ||
445 | bch2_inode_opts_get(&opts, c, &inode->ei_inode); | |
446 | ||
447 | while (1) { | |
448 | iter_count = dio->iter.count; | |
449 | ||
450 | EBUG_ON(current->faults_disabled_mapping); | |
451 | current->faults_disabled_mapping = mapping; | |
452 | ||
453 | ret = bio_iov_iter_get_pages(bio, &dio->iter); | |
454 | ||
455 | dropped_locks = fdm_dropped_locks(); | |
456 | ||
457 | current->faults_disabled_mapping = NULL; | |
458 | ||
459 | /* | |
460 | * If the fault handler returned an error but also signalled | |
461 | * that it dropped & retook ei_pagecache_lock, we just need to | |
462 | * re-shoot down the page cache and retry: | |
463 | */ | |
464 | if (dropped_locks && ret) | |
465 | ret = 0; | |
466 | ||
467 | if (unlikely(ret < 0)) | |
468 | goto err; | |
469 | ||
470 | if (unlikely(dropped_locks)) { | |
471 | ret = bch2_write_invalidate_inode_pages_range(mapping, | |
472 | req->ki_pos, | |
473 | req->ki_pos + iter_count - 1); | |
474 | if (unlikely(ret)) | |
475 | goto err; | |
476 | ||
477 | if (!bio->bi_iter.bi_size) | |
478 | continue; | |
479 | } | |
480 | ||
481 | unaligned = bio->bi_iter.bi_size & (block_bytes(c) - 1); | |
482 | bio->bi_iter.bi_size -= unaligned; | |
483 | iov_iter_revert(&dio->iter, unaligned); | |
484 | ||
485 | if (!bio->bi_iter.bi_size) { | |
486 | /* | |
487 | * bio_iov_iter_get_pages was only able to get < | |
488 | * blocksize worth of pages: | |
489 | */ | |
490 | ret = -EFAULT; | |
491 | goto err; | |
492 | } | |
493 | ||
494 | bch2_write_op_init(&dio->op, c, opts); | |
495 | dio->op.end_io = sync | |
496 | ? NULL | |
497 | : bch2_dio_write_loop_async; | |
498 | dio->op.target = dio->op.opts.foreground_target; | |
499 | dio->op.write_point = writepoint_hashed((unsigned long) current); | |
500 | dio->op.nr_replicas = dio->op.opts.data_replicas; | |
501 | dio->op.subvol = inode->ei_subvol; | |
502 | dio->op.pos = POS(inode->v.i_ino, (u64) req->ki_pos >> 9); | |
503 | dio->op.devs_need_flush = &inode->ei_devs_need_flush; | |
504 | ||
505 | if (sync) | |
506 | dio->op.flags |= BCH_WRITE_SYNC; | |
507 | dio->op.flags |= BCH_WRITE_CHECK_ENOSPC; | |
508 | ||
509 | ret = bch2_quota_reservation_add(c, inode, &dio->quota_res, | |
510 | bio_sectors(bio), true); | |
511 | if (unlikely(ret)) | |
512 | goto err; | |
513 | ||
514 | ret = bch2_disk_reservation_get(c, &dio->op.res, bio_sectors(bio), | |
515 | dio->op.opts.data_replicas, 0); | |
516 | if (unlikely(ret) && | |
517 | !bch2_dio_write_check_allocated(dio)) | |
518 | goto err; | |
519 | ||
520 | task_io_account_write(bio->bi_iter.bi_size); | |
521 | ||
522 | if (unlikely(dio->iter.count) && | |
523 | !dio->sync && | |
524 | !dio->loop && | |
525 | bch2_dio_write_copy_iov(dio)) | |
526 | dio->sync = sync = true; | |
527 | ||
528 | dio->loop = true; | |
529 | closure_call(&dio->op.cl, bch2_write, NULL, NULL); | |
530 | ||
531 | if (!sync) | |
532 | return -EIOCBQUEUED; | |
533 | ||
534 | bch2_dio_write_end(dio); | |
535 | ||
536 | if (likely(!dio->iter.count) || dio->op.error) | |
537 | break; | |
538 | ||
539 | bio_reset(bio, NULL, REQ_OP_WRITE); | |
540 | } | |
541 | out: | |
542 | return bch2_dio_write_done(dio); | |
543 | err: | |
544 | dio->op.error = ret; | |
545 | ||
546 | bio_release_pages(bio, false); | |
547 | ||
548 | bch2_quota_reservation_put(c, inode, &dio->quota_res); | |
549 | goto out; | |
550 | } | |
551 | ||
552 | static noinline __cold void bch2_dio_write_continue(struct dio_write *dio) | |
553 | { | |
554 | struct mm_struct *mm = dio->mm; | |
555 | ||
556 | bio_reset(&dio->op.wbio.bio, NULL, REQ_OP_WRITE); | |
557 | ||
558 | if (mm) | |
559 | kthread_use_mm(mm); | |
560 | bch2_dio_write_loop(dio); | |
561 | if (mm) | |
562 | kthread_unuse_mm(mm); | |
563 | } | |
564 | ||
565 | static void bch2_dio_write_loop_async(struct bch_write_op *op) | |
566 | { | |
567 | struct dio_write *dio = container_of(op, struct dio_write, op); | |
568 | ||
569 | bch2_dio_write_end(dio); | |
570 | ||
571 | if (likely(!dio->iter.count) || dio->op.error) | |
572 | bch2_dio_write_done(dio); | |
573 | else | |
574 | bch2_dio_write_continue(dio); | |
575 | } | |
576 | ||
577 | ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter) | |
578 | { | |
579 | struct file *file = req->ki_filp; | |
580 | struct address_space *mapping = file->f_mapping; | |
581 | struct bch_inode_info *inode = file_bch_inode(file); | |
582 | struct bch_fs *c = inode->v.i_sb->s_fs_info; | |
583 | struct dio_write *dio; | |
584 | struct bio *bio; | |
585 | bool locked = true, extending; | |
586 | ssize_t ret; | |
587 | ||
588 | prefetch(&c->opts); | |
589 | prefetch((void *) &c->opts + 64); | |
590 | prefetch(&inode->ei_inode); | |
591 | prefetch((void *) &inode->ei_inode + 64); | |
592 | ||
593 | inode_lock(&inode->v); | |
594 | ||
595 | ret = generic_write_checks(req, iter); | |
596 | if (unlikely(ret <= 0)) | |
597 | goto err; | |
598 | ||
599 | ret = file_remove_privs(file); | |
600 | if (unlikely(ret)) | |
601 | goto err; | |
602 | ||
603 | ret = file_update_time(file); | |
604 | if (unlikely(ret)) | |
605 | goto err; | |
606 | ||
607 | if (unlikely((req->ki_pos|iter->count) & (block_bytes(c) - 1))) | |
608 | goto err; | |
609 | ||
610 | inode_dio_begin(&inode->v); | |
611 | bch2_pagecache_block_get(inode); | |
612 | ||
613 | extending = req->ki_pos + iter->count > inode->v.i_size; | |
614 | if (!extending) { | |
615 | inode_unlock(&inode->v); | |
616 | locked = false; | |
617 | } | |
618 | ||
619 | bio = bio_alloc_bioset(NULL, | |
620 | bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS), | |
621 | REQ_OP_WRITE, | |
622 | GFP_KERNEL, | |
623 | &c->dio_write_bioset); | |
624 | dio = container_of(bio, struct dio_write, op.wbio.bio); | |
625 | dio->req = req; | |
626 | dio->mapping = mapping; | |
627 | dio->inode = inode; | |
628 | dio->mm = current->mm; | |
629 | dio->loop = false; | |
630 | dio->extending = extending; | |
631 | dio->sync = is_sync_kiocb(req) || extending; | |
632 | dio->flush = iocb_is_dsync(req) && !c->opts.journal_flush_disabled; | |
633 | dio->free_iov = false; | |
634 | dio->quota_res.sectors = 0; | |
635 | dio->written = 0; | |
636 | dio->iter = *iter; | |
637 | dio->op.c = c; | |
638 | ||
639 | if (unlikely(mapping->nrpages)) { | |
640 | ret = bch2_write_invalidate_inode_pages_range(mapping, | |
641 | req->ki_pos, | |
642 | req->ki_pos + iter->count - 1); | |
643 | if (unlikely(ret)) | |
644 | goto err_put_bio; | |
645 | } | |
646 | ||
647 | ret = bch2_dio_write_loop(dio); | |
648 | err: | |
649 | if (locked) | |
650 | inode_unlock(&inode->v); | |
651 | return ret; | |
652 | err_put_bio: | |
653 | bch2_pagecache_block_put(inode); | |
654 | bio_put(bio); | |
655 | inode_dio_end(&inode->v); | |
656 | goto err; | |
657 | } | |
658 | ||
659 | void bch2_fs_fs_io_direct_exit(struct bch_fs *c) | |
660 | { | |
661 | bioset_exit(&c->dio_write_bioset); | |
662 | bioset_exit(&c->dio_read_bioset); | |
663 | } | |
664 | ||
665 | int bch2_fs_fs_io_direct_init(struct bch_fs *c) | |
666 | { | |
667 | if (bioset_init(&c->dio_read_bioset, | |
668 | 4, offsetof(struct dio_read, rbio.bio), | |
669 | BIOSET_NEED_BVECS)) | |
670 | return -BCH_ERR_ENOMEM_dio_read_bioset_init; | |
671 | ||
672 | if (bioset_init(&c->dio_write_bioset, | |
673 | 4, offsetof(struct dio_write, op.wbio.bio), | |
674 | BIOSET_NEED_BVECS)) | |
675 | return -BCH_ERR_ENOMEM_dio_write_bioset_init; | |
676 | ||
677 | return 0; | |
678 | } | |
679 | ||
680 | #endif /* NO_BCACHEFS_FS */ |