Merge tag 'spi-fix-v6.10-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/brooni...
[linux-2.6-block.git] / fs / backing-file.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Common helpers for stackable filesystems and backing files.
4  *
5  * Forked from fs/overlayfs/file.c.
6  *
7  * Copyright (C) 2017 Red Hat, Inc.
8  * Copyright (C) 2023 CTERA Networks.
9  */
10
11 #include <linux/fs.h>
12 #include <linux/backing-file.h>
13 #include <linux/splice.h>
14 #include <linux/mm.h>
15
16 #include "internal.h"
17
18 /**
19  * backing_file_open - open a backing file for kernel internal use
20  * @user_path:  path that the user reuqested to open
21  * @flags:      open flags
22  * @real_path:  path of the backing file
23  * @cred:       credentials for open
24  *
25  * Open a backing file for a stackable filesystem (e.g., overlayfs).
26  * @user_path may be on the stackable filesystem and @real_path on the
27  * underlying filesystem.  In this case, we want to be able to return the
28  * @user_path of the stackable filesystem. This is done by embedding the
29  * returned file into a container structure that also stores the stacked
30  * file's path, which can be retrieved using backing_file_user_path().
31  */
32 struct file *backing_file_open(const struct path *user_path, int flags,
33                                const struct path *real_path,
34                                const struct cred *cred)
35 {
36         struct file *f;
37         int error;
38
39         f = alloc_empty_backing_file(flags, cred);
40         if (IS_ERR(f))
41                 return f;
42
43         path_get(user_path);
44         *backing_file_user_path(f) = *user_path;
45         error = vfs_open(real_path, f);
46         if (error) {
47                 fput(f);
48                 f = ERR_PTR(error);
49         }
50
51         return f;
52 }
53 EXPORT_SYMBOL_GPL(backing_file_open);
54
55 struct file *backing_tmpfile_open(const struct path *user_path, int flags,
56                                   const struct path *real_parentpath,
57                                   umode_t mode, const struct cred *cred)
58 {
59         struct mnt_idmap *real_idmap = mnt_idmap(real_parentpath->mnt);
60         struct file *f;
61         int error;
62
63         f = alloc_empty_backing_file(flags, cred);
64         if (IS_ERR(f))
65                 return f;
66
67         path_get(user_path);
68         *backing_file_user_path(f) = *user_path;
69         error = vfs_tmpfile(real_idmap, real_parentpath, f, mode);
70         if (error) {
71                 fput(f);
72                 f = ERR_PTR(error);
73         }
74         return f;
75 }
76 EXPORT_SYMBOL(backing_tmpfile_open);
77
78 struct backing_aio {
79         struct kiocb iocb;
80         refcount_t ref;
81         struct kiocb *orig_iocb;
82         /* used for aio completion */
83         void (*end_write)(struct file *);
84         struct work_struct work;
85         long res;
86 };
87
88 static struct kmem_cache *backing_aio_cachep;
89
90 #define BACKING_IOCB_MASK \
91         (IOCB_NOWAIT | IOCB_HIPRI | IOCB_DSYNC | IOCB_SYNC | IOCB_APPEND)
92
93 static rwf_t iocb_to_rw_flags(int flags)
94 {
95         return (__force rwf_t)(flags & BACKING_IOCB_MASK);
96 }
97
98 static void backing_aio_put(struct backing_aio *aio)
99 {
100         if (refcount_dec_and_test(&aio->ref)) {
101                 fput(aio->iocb.ki_filp);
102                 kmem_cache_free(backing_aio_cachep, aio);
103         }
104 }
105
106 static void backing_aio_cleanup(struct backing_aio *aio, long res)
107 {
108         struct kiocb *iocb = &aio->iocb;
109         struct kiocb *orig_iocb = aio->orig_iocb;
110
111         if (aio->end_write)
112                 aio->end_write(orig_iocb->ki_filp);
113
114         orig_iocb->ki_pos = iocb->ki_pos;
115         backing_aio_put(aio);
116 }
117
118 static void backing_aio_rw_complete(struct kiocb *iocb, long res)
119 {
120         struct backing_aio *aio = container_of(iocb, struct backing_aio, iocb);
121         struct kiocb *orig_iocb = aio->orig_iocb;
122
123         if (iocb->ki_flags & IOCB_WRITE)
124                 kiocb_end_write(iocb);
125
126         backing_aio_cleanup(aio, res);
127         orig_iocb->ki_complete(orig_iocb, res);
128 }
129
130 static void backing_aio_complete_work(struct work_struct *work)
131 {
132         struct backing_aio *aio = container_of(work, struct backing_aio, work);
133
134         backing_aio_rw_complete(&aio->iocb, aio->res);
135 }
136
137 static void backing_aio_queue_completion(struct kiocb *iocb, long res)
138 {
139         struct backing_aio *aio = container_of(iocb, struct backing_aio, iocb);
140
141         /*
142          * Punt to a work queue to serialize updates of mtime/size.
143          */
144         aio->res = res;
145         INIT_WORK(&aio->work, backing_aio_complete_work);
146         queue_work(file_inode(aio->orig_iocb->ki_filp)->i_sb->s_dio_done_wq,
147                    &aio->work);
148 }
149
150 static int backing_aio_init_wq(struct kiocb *iocb)
151 {
152         struct super_block *sb = file_inode(iocb->ki_filp)->i_sb;
153
154         if (sb->s_dio_done_wq)
155                 return 0;
156
157         return sb_init_dio_done_wq(sb);
158 }
159
160
161 ssize_t backing_file_read_iter(struct file *file, struct iov_iter *iter,
162                                struct kiocb *iocb, int flags,
163                                struct backing_file_ctx *ctx)
164 {
165         struct backing_aio *aio = NULL;
166         const struct cred *old_cred;
167         ssize_t ret;
168
169         if (WARN_ON_ONCE(!(file->f_mode & FMODE_BACKING)))
170                 return -EIO;
171
172         if (!iov_iter_count(iter))
173                 return 0;
174
175         if (iocb->ki_flags & IOCB_DIRECT &&
176             !(file->f_mode & FMODE_CAN_ODIRECT))
177                 return -EINVAL;
178
179         old_cred = override_creds(ctx->cred);
180         if (is_sync_kiocb(iocb)) {
181                 rwf_t rwf = iocb_to_rw_flags(flags);
182
183                 ret = vfs_iter_read(file, iter, &iocb->ki_pos, rwf);
184         } else {
185                 ret = -ENOMEM;
186                 aio = kmem_cache_zalloc(backing_aio_cachep, GFP_KERNEL);
187                 if (!aio)
188                         goto out;
189
190                 aio->orig_iocb = iocb;
191                 kiocb_clone(&aio->iocb, iocb, get_file(file));
192                 aio->iocb.ki_complete = backing_aio_rw_complete;
193                 refcount_set(&aio->ref, 2);
194                 ret = vfs_iocb_iter_read(file, &aio->iocb, iter);
195                 backing_aio_put(aio);
196                 if (ret != -EIOCBQUEUED)
197                         backing_aio_cleanup(aio, ret);
198         }
199 out:
200         revert_creds(old_cred);
201
202         if (ctx->accessed)
203                 ctx->accessed(ctx->user_file);
204
205         return ret;
206 }
207 EXPORT_SYMBOL_GPL(backing_file_read_iter);
208
209 ssize_t backing_file_write_iter(struct file *file, struct iov_iter *iter,
210                                 struct kiocb *iocb, int flags,
211                                 struct backing_file_ctx *ctx)
212 {
213         const struct cred *old_cred;
214         ssize_t ret;
215
216         if (WARN_ON_ONCE(!(file->f_mode & FMODE_BACKING)))
217                 return -EIO;
218
219         if (!iov_iter_count(iter))
220                 return 0;
221
222         ret = file_remove_privs(ctx->user_file);
223         if (ret)
224                 return ret;
225
226         if (iocb->ki_flags & IOCB_DIRECT &&
227             !(file->f_mode & FMODE_CAN_ODIRECT))
228                 return -EINVAL;
229
230         /*
231          * Stacked filesystems don't support deferred completions, don't copy
232          * this property in case it is set by the issuer.
233          */
234         flags &= ~IOCB_DIO_CALLER_COMP;
235
236         old_cred = override_creds(ctx->cred);
237         if (is_sync_kiocb(iocb)) {
238                 rwf_t rwf = iocb_to_rw_flags(flags);
239
240                 ret = vfs_iter_write(file, iter, &iocb->ki_pos, rwf);
241                 if (ctx->end_write)
242                         ctx->end_write(ctx->user_file);
243         } else {
244                 struct backing_aio *aio;
245
246                 ret = backing_aio_init_wq(iocb);
247                 if (ret)
248                         goto out;
249
250                 ret = -ENOMEM;
251                 aio = kmem_cache_zalloc(backing_aio_cachep, GFP_KERNEL);
252                 if (!aio)
253                         goto out;
254
255                 aio->orig_iocb = iocb;
256                 aio->end_write = ctx->end_write;
257                 kiocb_clone(&aio->iocb, iocb, get_file(file));
258                 aio->iocb.ki_flags = flags;
259                 aio->iocb.ki_complete = backing_aio_queue_completion;
260                 refcount_set(&aio->ref, 2);
261                 ret = vfs_iocb_iter_write(file, &aio->iocb, iter);
262                 backing_aio_put(aio);
263                 if (ret != -EIOCBQUEUED)
264                         backing_aio_cleanup(aio, ret);
265         }
266 out:
267         revert_creds(old_cred);
268
269         return ret;
270 }
271 EXPORT_SYMBOL_GPL(backing_file_write_iter);
272
273 ssize_t backing_file_splice_read(struct file *in, loff_t *ppos,
274                                  struct pipe_inode_info *pipe, size_t len,
275                                  unsigned int flags,
276                                  struct backing_file_ctx *ctx)
277 {
278         const struct cred *old_cred;
279         ssize_t ret;
280
281         if (WARN_ON_ONCE(!(in->f_mode & FMODE_BACKING)))
282                 return -EIO;
283
284         old_cred = override_creds(ctx->cred);
285         ret = vfs_splice_read(in, ppos, pipe, len, flags);
286         revert_creds(old_cred);
287
288         if (ctx->accessed)
289                 ctx->accessed(ctx->user_file);
290
291         return ret;
292 }
293 EXPORT_SYMBOL_GPL(backing_file_splice_read);
294
295 ssize_t backing_file_splice_write(struct pipe_inode_info *pipe,
296                                   struct file *out, loff_t *ppos, size_t len,
297                                   unsigned int flags,
298                                   struct backing_file_ctx *ctx)
299 {
300         const struct cred *old_cred;
301         ssize_t ret;
302
303         if (WARN_ON_ONCE(!(out->f_mode & FMODE_BACKING)))
304                 return -EIO;
305
306         ret = file_remove_privs(ctx->user_file);
307         if (ret)
308                 return ret;
309
310         old_cred = override_creds(ctx->cred);
311         file_start_write(out);
312         ret = iter_file_splice_write(pipe, out, ppos, len, flags);
313         file_end_write(out);
314         revert_creds(old_cred);
315
316         if (ctx->end_write)
317                 ctx->end_write(ctx->user_file);
318
319         return ret;
320 }
321 EXPORT_SYMBOL_GPL(backing_file_splice_write);
322
323 int backing_file_mmap(struct file *file, struct vm_area_struct *vma,
324                       struct backing_file_ctx *ctx)
325 {
326         const struct cred *old_cred;
327         int ret;
328
329         if (WARN_ON_ONCE(!(file->f_mode & FMODE_BACKING)) ||
330             WARN_ON_ONCE(ctx->user_file != vma->vm_file))
331                 return -EIO;
332
333         if (!file->f_op->mmap)
334                 return -ENODEV;
335
336         vma_set_file(vma, file);
337
338         old_cred = override_creds(ctx->cred);
339         ret = call_mmap(vma->vm_file, vma);
340         revert_creds(old_cred);
341
342         if (ctx->accessed)
343                 ctx->accessed(ctx->user_file);
344
345         return ret;
346 }
347 EXPORT_SYMBOL_GPL(backing_file_mmap);
348
349 static int __init backing_aio_init(void)
350 {
351         backing_aio_cachep = KMEM_CACHE(backing_aio, SLAB_HWCACHE_ALIGN);
352         if (!backing_aio_cachep)
353                 return -ENOMEM;
354
355         return 0;
356 }
357 fs_initcall(backing_aio_init);