Merge tag 'clk-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/clk/linux
[linux-block.git] / fs / cachefiles / io.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* kiocb-using read/write
3  *
4  * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved.
5  * Written by David Howells (dhowells@redhat.com)
6  */
7
8 #include <linux/mount.h>
9 #include <linux/slab.h>
10 #include <linux/file.h>
11 #include <linux/uio.h>
12 #include <linux/falloc.h>
13 #include <linux/sched/mm.h>
14 #include <trace/events/fscache.h>
15 #include "internal.h"
16
17 struct cachefiles_kiocb {
18         struct kiocb            iocb;
19         refcount_t              ki_refcnt;
20         loff_t                  start;
21         union {
22                 size_t          skipped;
23                 size_t          len;
24         };
25         struct cachefiles_object *object;
26         netfs_io_terminated_t   term_func;
27         void                    *term_func_priv;
28         bool                    was_async;
29         unsigned int            inval_counter;  /* Copy of cookie->inval_counter */
30         u64                     b_writing;
31 };
32
33 static inline void cachefiles_put_kiocb(struct cachefiles_kiocb *ki)
34 {
35         if (refcount_dec_and_test(&ki->ki_refcnt)) {
36                 cachefiles_put_object(ki->object, cachefiles_obj_put_ioreq);
37                 fput(ki->iocb.ki_filp);
38                 kfree(ki);
39         }
40 }
41
42 /*
43  * Handle completion of a read from the cache.
44  */
45 static void cachefiles_read_complete(struct kiocb *iocb, long ret)
46 {
47         struct cachefiles_kiocb *ki = container_of(iocb, struct cachefiles_kiocb, iocb);
48         struct inode *inode = file_inode(ki->iocb.ki_filp);
49
50         _enter("%ld", ret);
51
52         if (ret < 0)
53                 trace_cachefiles_io_error(ki->object, inode, ret,
54                                           cachefiles_trace_read_error);
55
56         if (ki->term_func) {
57                 if (ret >= 0) {
58                         if (ki->object->cookie->inval_counter == ki->inval_counter)
59                                 ki->skipped += ret;
60                         else
61                                 ret = -ESTALE;
62                 }
63
64                 ki->term_func(ki->term_func_priv, ret, ki->was_async);
65         }
66
67         cachefiles_put_kiocb(ki);
68 }
69
70 /*
71  * Initiate a read from the cache.
72  */
73 static int cachefiles_read(struct netfs_cache_resources *cres,
74                            loff_t start_pos,
75                            struct iov_iter *iter,
76                            enum netfs_read_from_hole read_hole,
77                            netfs_io_terminated_t term_func,
78                            void *term_func_priv)
79 {
80         struct cachefiles_object *object;
81         struct cachefiles_kiocb *ki;
82         struct file *file;
83         unsigned int old_nofs;
84         ssize_t ret = -ENOBUFS;
85         size_t len = iov_iter_count(iter), skipped = 0;
86
87         if (!fscache_wait_for_operation(cres, FSCACHE_WANT_READ))
88                 goto presubmission_error;
89
90         fscache_count_read();
91         object = cachefiles_cres_object(cres);
92         file = cachefiles_cres_file(cres);
93
94         _enter("%pD,%li,%llx,%zx/%llx",
95                file, file_inode(file)->i_ino, start_pos, len,
96                i_size_read(file_inode(file)));
97
98         /* If the caller asked us to seek for data before doing the read, then
99          * we should do that now.  If we find a gap, we fill it with zeros.
100          */
101         if (read_hole != NETFS_READ_HOLE_IGNORE) {
102                 loff_t off = start_pos, off2;
103
104                 off2 = cachefiles_inject_read_error();
105                 if (off2 == 0)
106                         off2 = vfs_llseek(file, off, SEEK_DATA);
107                 if (off2 < 0 && off2 >= (loff_t)-MAX_ERRNO && off2 != -ENXIO) {
108                         skipped = 0;
109                         ret = off2;
110                         goto presubmission_error;
111                 }
112
113                 if (off2 == -ENXIO || off2 >= start_pos + len) {
114                         /* The region is beyond the EOF or there's no more data
115                          * in the region, so clear the rest of the buffer and
116                          * return success.
117                          */
118                         ret = -ENODATA;
119                         if (read_hole == NETFS_READ_HOLE_FAIL)
120                                 goto presubmission_error;
121
122                         iov_iter_zero(len, iter);
123                         skipped = len;
124                         ret = 0;
125                         goto presubmission_error;
126                 }
127
128                 skipped = off2 - off;
129                 iov_iter_zero(skipped, iter);
130         }
131
132         ret = -ENOMEM;
133         ki = kzalloc(sizeof(struct cachefiles_kiocb), GFP_KERNEL);
134         if (!ki)
135                 goto presubmission_error;
136
137         refcount_set(&ki->ki_refcnt, 2);
138         ki->iocb.ki_filp        = file;
139         ki->iocb.ki_pos         = start_pos + skipped;
140         ki->iocb.ki_flags       = IOCB_DIRECT;
141         ki->iocb.ki_hint        = ki_hint_validate(file_write_hint(file));
142         ki->iocb.ki_ioprio      = get_current_ioprio();
143         ki->skipped             = skipped;
144         ki->object              = object;
145         ki->inval_counter       = cres->inval_counter;
146         ki->term_func           = term_func;
147         ki->term_func_priv      = term_func_priv;
148         ki->was_async           = true;
149
150         if (ki->term_func)
151                 ki->iocb.ki_complete = cachefiles_read_complete;
152
153         get_file(ki->iocb.ki_filp);
154         cachefiles_grab_object(object, cachefiles_obj_get_ioreq);
155
156         trace_cachefiles_read(object, file_inode(file), ki->iocb.ki_pos, len - skipped);
157         old_nofs = memalloc_nofs_save();
158         ret = cachefiles_inject_read_error();
159         if (ret == 0)
160                 ret = vfs_iocb_iter_read(file, &ki->iocb, iter);
161         memalloc_nofs_restore(old_nofs);
162         switch (ret) {
163         case -EIOCBQUEUED:
164                 goto in_progress;
165
166         case -ERESTARTSYS:
167         case -ERESTARTNOINTR:
168         case -ERESTARTNOHAND:
169         case -ERESTART_RESTARTBLOCK:
170                 /* There's no easy way to restart the syscall since other AIO's
171                  * may be already running. Just fail this IO with EINTR.
172                  */
173                 ret = -EINTR;
174                 fallthrough;
175         default:
176                 ki->was_async = false;
177                 cachefiles_read_complete(&ki->iocb, ret);
178                 if (ret > 0)
179                         ret = 0;
180                 break;
181         }
182
183 in_progress:
184         cachefiles_put_kiocb(ki);
185         _leave(" = %zd", ret);
186         return ret;
187
188 presubmission_error:
189         if (term_func)
190                 term_func(term_func_priv, ret < 0 ? ret : skipped, false);
191         return ret;
192 }
193
194 /*
195  * Handle completion of a write to the cache.
196  */
197 static void cachefiles_write_complete(struct kiocb *iocb, long ret)
198 {
199         struct cachefiles_kiocb *ki = container_of(iocb, struct cachefiles_kiocb, iocb);
200         struct cachefiles_object *object = ki->object;
201         struct inode *inode = file_inode(ki->iocb.ki_filp);
202
203         _enter("%ld", ret);
204
205         /* Tell lockdep we inherited freeze protection from submission thread */
206         __sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE);
207         __sb_end_write(inode->i_sb, SB_FREEZE_WRITE);
208
209         if (ret < 0)
210                 trace_cachefiles_io_error(object, inode, ret,
211                                           cachefiles_trace_write_error);
212
213         atomic_long_sub(ki->b_writing, &object->volume->cache->b_writing);
214         set_bit(FSCACHE_COOKIE_HAVE_DATA, &object->cookie->flags);
215         if (ki->term_func)
216                 ki->term_func(ki->term_func_priv, ret, ki->was_async);
217         cachefiles_put_kiocb(ki);
218 }
219
220 /*
221  * Initiate a write to the cache.
222  */
223 static int cachefiles_write(struct netfs_cache_resources *cres,
224                             loff_t start_pos,
225                             struct iov_iter *iter,
226                             netfs_io_terminated_t term_func,
227                             void *term_func_priv)
228 {
229         struct cachefiles_object *object;
230         struct cachefiles_cache *cache;
231         struct cachefiles_kiocb *ki;
232         struct inode *inode;
233         struct file *file;
234         unsigned int old_nofs;
235         ssize_t ret = -ENOBUFS;
236         size_t len = iov_iter_count(iter);
237
238         if (!fscache_wait_for_operation(cres, FSCACHE_WANT_WRITE))
239                 goto presubmission_error;
240         fscache_count_write();
241         object = cachefiles_cres_object(cres);
242         cache = object->volume->cache;
243         file = cachefiles_cres_file(cres);
244
245         _enter("%pD,%li,%llx,%zx/%llx",
246                file, file_inode(file)->i_ino, start_pos, len,
247                i_size_read(file_inode(file)));
248
249         ret = -ENOMEM;
250         ki = kzalloc(sizeof(struct cachefiles_kiocb), GFP_KERNEL);
251         if (!ki)
252                 goto presubmission_error;
253
254         refcount_set(&ki->ki_refcnt, 2);
255         ki->iocb.ki_filp        = file;
256         ki->iocb.ki_pos         = start_pos;
257         ki->iocb.ki_flags       = IOCB_DIRECT | IOCB_WRITE;
258         ki->iocb.ki_hint        = ki_hint_validate(file_write_hint(file));
259         ki->iocb.ki_ioprio      = get_current_ioprio();
260         ki->object              = object;
261         ki->inval_counter       = cres->inval_counter;
262         ki->start               = start_pos;
263         ki->len                 = len;
264         ki->term_func           = term_func;
265         ki->term_func_priv      = term_func_priv;
266         ki->was_async           = true;
267         ki->b_writing           = (len + (1 << cache->bshift)) >> cache->bshift;
268
269         if (ki->term_func)
270                 ki->iocb.ki_complete = cachefiles_write_complete;
271         atomic_long_add(ki->b_writing, &cache->b_writing);
272
273         /* Open-code file_start_write here to grab freeze protection, which
274          * will be released by another thread in aio_complete_rw().  Fool
275          * lockdep by telling it the lock got released so that it doesn't
276          * complain about the held lock when we return to userspace.
277          */
278         inode = file_inode(file);
279         __sb_start_write(inode->i_sb, SB_FREEZE_WRITE);
280         __sb_writers_release(inode->i_sb, SB_FREEZE_WRITE);
281
282         get_file(ki->iocb.ki_filp);
283         cachefiles_grab_object(object, cachefiles_obj_get_ioreq);
284
285         trace_cachefiles_write(object, inode, ki->iocb.ki_pos, len);
286         old_nofs = memalloc_nofs_save();
287         ret = cachefiles_inject_write_error();
288         if (ret == 0)
289                 ret = vfs_iocb_iter_write(file, &ki->iocb, iter);
290         memalloc_nofs_restore(old_nofs);
291         switch (ret) {
292         case -EIOCBQUEUED:
293                 goto in_progress;
294
295         case -ERESTARTSYS:
296         case -ERESTARTNOINTR:
297         case -ERESTARTNOHAND:
298         case -ERESTART_RESTARTBLOCK:
299                 /* There's no easy way to restart the syscall since other AIO's
300                  * may be already running. Just fail this IO with EINTR.
301                  */
302                 ret = -EINTR;
303                 fallthrough;
304         default:
305                 ki->was_async = false;
306                 cachefiles_write_complete(&ki->iocb, ret);
307                 if (ret > 0)
308                         ret = 0;
309                 break;
310         }
311
312 in_progress:
313         cachefiles_put_kiocb(ki);
314         _leave(" = %zd", ret);
315         return ret;
316
317 presubmission_error:
318         if (term_func)
319                 term_func(term_func_priv, ret, false);
320         return ret;
321 }
322
323 /*
324  * Prepare a read operation, shortening it to a cached/uncached
325  * boundary as appropriate.
326  */
327 static enum netfs_read_source cachefiles_prepare_read(struct netfs_read_subrequest *subreq,
328                                                       loff_t i_size)
329 {
330         enum cachefiles_prepare_read_trace why;
331         struct netfs_read_request *rreq = subreq->rreq;
332         struct netfs_cache_resources *cres = &rreq->cache_resources;
333         struct cachefiles_object *object;
334         struct cachefiles_cache *cache;
335         struct fscache_cookie *cookie = fscache_cres_cookie(cres);
336         const struct cred *saved_cred;
337         struct file *file = cachefiles_cres_file(cres);
338         enum netfs_read_source ret = NETFS_DOWNLOAD_FROM_SERVER;
339         loff_t off, to;
340         ino_t ino = file ? file_inode(file)->i_ino : 0;
341
342         _enter("%zx @%llx/%llx", subreq->len, subreq->start, i_size);
343
344         if (subreq->start >= i_size) {
345                 ret = NETFS_FILL_WITH_ZEROES;
346                 why = cachefiles_trace_read_after_eof;
347                 goto out_no_object;
348         }
349
350         if (test_bit(FSCACHE_COOKIE_NO_DATA_TO_READ, &cookie->flags)) {
351                 __set_bit(NETFS_SREQ_WRITE_TO_CACHE, &subreq->flags);
352                 why = cachefiles_trace_read_no_data;
353                 goto out_no_object;
354         }
355
356         /* The object and the file may be being created in the background. */
357         if (!file) {
358                 why = cachefiles_trace_read_no_file;
359                 if (!fscache_wait_for_operation(cres, FSCACHE_WANT_READ))
360                         goto out_no_object;
361                 file = cachefiles_cres_file(cres);
362                 if (!file)
363                         goto out_no_object;
364                 ino = file_inode(file)->i_ino;
365         }
366
367         object = cachefiles_cres_object(cres);
368         cache = object->volume->cache;
369         cachefiles_begin_secure(cache, &saved_cred);
370
371         off = cachefiles_inject_read_error();
372         if (off == 0)
373                 off = vfs_llseek(file, subreq->start, SEEK_DATA);
374         if (off < 0 && off >= (loff_t)-MAX_ERRNO) {
375                 if (off == (loff_t)-ENXIO) {
376                         why = cachefiles_trace_read_seek_nxio;
377                         goto download_and_store;
378                 }
379                 trace_cachefiles_io_error(object, file_inode(file), off,
380                                           cachefiles_trace_seek_error);
381                 why = cachefiles_trace_read_seek_error;
382                 goto out;
383         }
384
385         if (off >= subreq->start + subreq->len) {
386                 why = cachefiles_trace_read_found_hole;
387                 goto download_and_store;
388         }
389
390         if (off > subreq->start) {
391                 off = round_up(off, cache->bsize);
392                 subreq->len = off - subreq->start;
393                 why = cachefiles_trace_read_found_part;
394                 goto download_and_store;
395         }
396
397         to = cachefiles_inject_read_error();
398         if (to == 0)
399                 to = vfs_llseek(file, subreq->start, SEEK_HOLE);
400         if (to < 0 && to >= (loff_t)-MAX_ERRNO) {
401                 trace_cachefiles_io_error(object, file_inode(file), to,
402                                           cachefiles_trace_seek_error);
403                 why = cachefiles_trace_read_seek_error;
404                 goto out;
405         }
406
407         if (to < subreq->start + subreq->len) {
408                 if (subreq->start + subreq->len >= i_size)
409                         to = round_up(to, cache->bsize);
410                 else
411                         to = round_down(to, cache->bsize);
412                 subreq->len = to - subreq->start;
413         }
414
415         why = cachefiles_trace_read_have_data;
416         ret = NETFS_READ_FROM_CACHE;
417         goto out;
418
419 download_and_store:
420         __set_bit(NETFS_SREQ_WRITE_TO_CACHE, &subreq->flags);
421 out:
422         cachefiles_end_secure(cache, saved_cred);
423 out_no_object:
424         trace_cachefiles_prep_read(subreq, ret, why, ino);
425         return ret;
426 }
427
428 /*
429  * Prepare for a write to occur.
430  */
431 static int __cachefiles_prepare_write(struct netfs_cache_resources *cres,
432                                       loff_t *_start, size_t *_len, loff_t i_size,
433                                       bool no_space_allocated_yet)
434 {
435         struct cachefiles_object *object = cachefiles_cres_object(cres);
436         struct cachefiles_cache *cache = object->volume->cache;
437         struct file *file = cachefiles_cres_file(cres);
438         loff_t start = *_start, pos;
439         size_t len = *_len, down;
440         int ret;
441
442         /* Round to DIO size */
443         down = start - round_down(start, PAGE_SIZE);
444         *_start = start - down;
445         *_len = round_up(down + len, PAGE_SIZE);
446
447         /* We need to work out whether there's sufficient disk space to perform
448          * the write - but we can skip that check if we have space already
449          * allocated.
450          */
451         if (no_space_allocated_yet)
452                 goto check_space;
453
454         pos = cachefiles_inject_read_error();
455         if (pos == 0)
456                 pos = vfs_llseek(file, *_start, SEEK_DATA);
457         if (pos < 0 && pos >= (loff_t)-MAX_ERRNO) {
458                 if (pos == -ENXIO)
459                         goto check_space; /* Unallocated tail */
460                 trace_cachefiles_io_error(object, file_inode(file), pos,
461                                           cachefiles_trace_seek_error);
462                 return pos;
463         }
464         if ((u64)pos >= (u64)*_start + *_len)
465                 goto check_space; /* Unallocated region */
466
467         /* We have a block that's at least partially filled - if we're low on
468          * space, we need to see if it's fully allocated.  If it's not, we may
469          * want to cull it.
470          */
471         if (cachefiles_has_space(cache, 0, *_len / PAGE_SIZE,
472                                  cachefiles_has_space_check) == 0)
473                 return 0; /* Enough space to simply overwrite the whole block */
474
475         pos = cachefiles_inject_read_error();
476         if (pos == 0)
477                 pos = vfs_llseek(file, *_start, SEEK_HOLE);
478         if (pos < 0 && pos >= (loff_t)-MAX_ERRNO) {
479                 trace_cachefiles_io_error(object, file_inode(file), pos,
480                                           cachefiles_trace_seek_error);
481                 return pos;
482         }
483         if ((u64)pos >= (u64)*_start + *_len)
484                 return 0; /* Fully allocated */
485
486         /* Partially allocated, but insufficient space: cull. */
487         fscache_count_no_write_space();
488         ret = cachefiles_inject_remove_error();
489         if (ret == 0)
490                 ret = vfs_fallocate(file, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
491                                     *_start, *_len);
492         if (ret < 0) {
493                 trace_cachefiles_io_error(object, file_inode(file), ret,
494                                           cachefiles_trace_fallocate_error);
495                 cachefiles_io_error_obj(object,
496                                         "CacheFiles: fallocate failed (%d)\n", ret);
497                 ret = -EIO;
498         }
499
500         return ret;
501
502 check_space:
503         return cachefiles_has_space(cache, 0, *_len / PAGE_SIZE,
504                                     cachefiles_has_space_for_write);
505 }
506
507 static int cachefiles_prepare_write(struct netfs_cache_resources *cres,
508                                     loff_t *_start, size_t *_len, loff_t i_size,
509                                     bool no_space_allocated_yet)
510 {
511         struct cachefiles_object *object = cachefiles_cres_object(cres);
512         struct cachefiles_cache *cache = object->volume->cache;
513         const struct cred *saved_cred;
514         int ret;
515
516         if (!cachefiles_cres_file(cres)) {
517                 if (!fscache_wait_for_operation(cres, FSCACHE_WANT_WRITE))
518                         return -ENOBUFS;
519                 if (!cachefiles_cres_file(cres))
520                         return -ENOBUFS;
521         }
522
523         cachefiles_begin_secure(cache, &saved_cred);
524         ret = __cachefiles_prepare_write(cres, _start, _len, i_size,
525                                          no_space_allocated_yet);
526         cachefiles_end_secure(cache, saved_cred);
527         return ret;
528 }
529
530 /*
531  * Clean up an operation.
532  */
533 static void cachefiles_end_operation(struct netfs_cache_resources *cres)
534 {
535         struct file *file = cachefiles_cres_file(cres);
536
537         if (file)
538                 fput(file);
539         fscache_end_cookie_access(fscache_cres_cookie(cres), fscache_access_io_end);
540 }
541
542 static const struct netfs_cache_ops cachefiles_netfs_cache_ops = {
543         .end_operation          = cachefiles_end_operation,
544         .read                   = cachefiles_read,
545         .write                  = cachefiles_write,
546         .prepare_read           = cachefiles_prepare_read,
547         .prepare_write          = cachefiles_prepare_write,
548 };
549
550 /*
551  * Open the cache file when beginning a cache operation.
552  */
553 bool cachefiles_begin_operation(struct netfs_cache_resources *cres,
554                                 enum fscache_want_state want_state)
555 {
556         struct cachefiles_object *object = cachefiles_cres_object(cres);
557
558         if (!cachefiles_cres_file(cres)) {
559                 cres->ops = &cachefiles_netfs_cache_ops;
560                 if (object->file) {
561                         spin_lock(&object->lock);
562                         if (!cres->cache_priv2 && object->file)
563                                 cres->cache_priv2 = get_file(object->file);
564                         spin_unlock(&object->lock);
565                 }
566         }
567
568         if (!cachefiles_cres_file(cres) && want_state != FSCACHE_WANT_PARAMS) {
569                 pr_err("failed to get cres->file\n");
570                 return false;
571         }
572
573         return true;
574 }