Merge tag 'nfsd-6.12' of git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux
[linux-block.git] / fs / overlayfs / file.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2017 Red Hat, Inc.
4  */
5
6 #include <linux/cred.h>
7 #include <linux/file.h>
8 #include <linux/mount.h>
9 #include <linux/xattr.h>
10 #include <linux/uio.h>
11 #include <linux/uaccess.h>
12 #include <linux/security.h>
13 #include <linux/fs.h>
14 #include <linux/backing-file.h>
15 #include "overlayfs.h"
16
17 static char ovl_whatisit(struct inode *inode, struct inode *realinode)
18 {
19         if (realinode != ovl_inode_upper(inode))
20                 return 'l';
21         if (ovl_has_upperdata(inode))
22                 return 'u';
23         else
24                 return 'm';
25 }
26
27 static struct file *ovl_open_realfile(const struct file *file,
28                                       const struct path *realpath)
29 {
30         struct inode *realinode = d_inode(realpath->dentry);
31         struct inode *inode = file_inode(file);
32         struct mnt_idmap *real_idmap;
33         struct file *realfile;
34         const struct cred *old_cred;
35         int flags = file->f_flags | OVL_OPEN_FLAGS;
36         int acc_mode = ACC_MODE(flags);
37         int err;
38
39         if (flags & O_APPEND)
40                 acc_mode |= MAY_APPEND;
41
42         old_cred = ovl_override_creds(inode->i_sb);
43         real_idmap = mnt_idmap(realpath->mnt);
44         err = inode_permission(real_idmap, realinode, MAY_OPEN | acc_mode);
45         if (err) {
46                 realfile = ERR_PTR(err);
47         } else {
48                 if (!inode_owner_or_capable(real_idmap, realinode))
49                         flags &= ~O_NOATIME;
50
51                 realfile = backing_file_open(&file->f_path, flags, realpath,
52                                              current_cred());
53         }
54         revert_creds(old_cred);
55
56         pr_debug("open(%p[%pD2/%c], 0%o) -> (%p, 0%o)\n",
57                  file, file, ovl_whatisit(inode, realinode), file->f_flags,
58                  realfile, IS_ERR(realfile) ? 0 : realfile->f_flags);
59
60         return realfile;
61 }
62
63 #define OVL_SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT)
64
65 static int ovl_change_flags(struct file *file, unsigned int flags)
66 {
67         struct inode *inode = file_inode(file);
68         int err;
69
70         flags &= OVL_SETFL_MASK;
71
72         if (((flags ^ file->f_flags) & O_APPEND) && IS_APPEND(inode))
73                 return -EPERM;
74
75         if ((flags & O_DIRECT) && !(file->f_mode & FMODE_CAN_ODIRECT))
76                 return -EINVAL;
77
78         if (file->f_op->check_flags) {
79                 err = file->f_op->check_flags(flags);
80                 if (err)
81                         return err;
82         }
83
84         spin_lock(&file->f_lock);
85         file->f_flags = (file->f_flags & ~OVL_SETFL_MASK) | flags;
86         file->f_iocb_flags = iocb_flags(file);
87         spin_unlock(&file->f_lock);
88
89         return 0;
90 }
91
92 static int ovl_real_fdget_meta(const struct file *file, struct fd *real,
93                                bool allow_meta)
94 {
95         struct dentry *dentry = file_dentry(file);
96         struct path realpath;
97         int err;
98
99         real->flags = 0;
100         real->file = file->private_data;
101
102         if (allow_meta) {
103                 ovl_path_real(dentry, &realpath);
104         } else {
105                 /* lazy lookup and verify of lowerdata */
106                 err = ovl_verify_lowerdata(dentry);
107                 if (err)
108                         return err;
109
110                 ovl_path_realdata(dentry, &realpath);
111         }
112         if (!realpath.dentry)
113                 return -EIO;
114
115         /* Has it been copied up since we'd opened it? */
116         if (unlikely(file_inode(real->file) != d_inode(realpath.dentry))) {
117                 real->flags = FDPUT_FPUT;
118                 real->file = ovl_open_realfile(file, &realpath);
119
120                 return PTR_ERR_OR_ZERO(real->file);
121         }
122
123         /* Did the flags change since open? */
124         if (unlikely((file->f_flags ^ real->file->f_flags) & ~OVL_OPEN_FLAGS))
125                 return ovl_change_flags(real->file, file->f_flags);
126
127         return 0;
128 }
129
130 static int ovl_real_fdget(const struct file *file, struct fd *real)
131 {
132         if (d_is_dir(file_dentry(file))) {
133                 real->flags = 0;
134                 real->file = ovl_dir_real_file(file, false);
135
136                 return PTR_ERR_OR_ZERO(real->file);
137         }
138
139         return ovl_real_fdget_meta(file, real, false);
140 }
141
142 static int ovl_open(struct inode *inode, struct file *file)
143 {
144         struct dentry *dentry = file_dentry(file);
145         struct file *realfile;
146         struct path realpath;
147         int err;
148
149         /* lazy lookup and verify lowerdata */
150         err = ovl_verify_lowerdata(dentry);
151         if (err)
152                 return err;
153
154         err = ovl_maybe_copy_up(dentry, file->f_flags);
155         if (err)
156                 return err;
157
158         /* No longer need these flags, so don't pass them on to underlying fs */
159         file->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
160
161         ovl_path_realdata(dentry, &realpath);
162         if (!realpath.dentry)
163                 return -EIO;
164
165         realfile = ovl_open_realfile(file, &realpath);
166         if (IS_ERR(realfile))
167                 return PTR_ERR(realfile);
168
169         file->private_data = realfile;
170
171         return 0;
172 }
173
174 static int ovl_release(struct inode *inode, struct file *file)
175 {
176         fput(file->private_data);
177
178         return 0;
179 }
180
181 static loff_t ovl_llseek(struct file *file, loff_t offset, int whence)
182 {
183         struct inode *inode = file_inode(file);
184         struct fd real;
185         const struct cred *old_cred;
186         loff_t ret;
187
188         /*
189          * The two special cases below do not need to involve real fs,
190          * so we can optimizing concurrent callers.
191          */
192         if (offset == 0) {
193                 if (whence == SEEK_CUR)
194                         return file->f_pos;
195
196                 if (whence == SEEK_SET)
197                         return vfs_setpos(file, 0, 0);
198         }
199
200         ret = ovl_real_fdget(file, &real);
201         if (ret)
202                 return ret;
203
204         /*
205          * Overlay file f_pos is the master copy that is preserved
206          * through copy up and modified on read/write, but only real
207          * fs knows how to SEEK_HOLE/SEEK_DATA and real fs may impose
208          * limitations that are more strict than ->s_maxbytes for specific
209          * files, so we use the real file to perform seeks.
210          */
211         ovl_inode_lock(inode);
212         real.file->f_pos = file->f_pos;
213
214         old_cred = ovl_override_creds(inode->i_sb);
215         ret = vfs_llseek(real.file, offset, whence);
216         revert_creds(old_cred);
217
218         file->f_pos = real.file->f_pos;
219         ovl_inode_unlock(inode);
220
221         fdput(real);
222
223         return ret;
224 }
225
226 static void ovl_file_modified(struct file *file)
227 {
228         /* Update size/mtime */
229         ovl_copyattr(file_inode(file));
230 }
231
232 static void ovl_file_accessed(struct file *file)
233 {
234         struct inode *inode, *upperinode;
235         struct timespec64 ctime, uctime;
236         struct timespec64 mtime, umtime;
237
238         if (file->f_flags & O_NOATIME)
239                 return;
240
241         inode = file_inode(file);
242         upperinode = ovl_inode_upper(inode);
243
244         if (!upperinode)
245                 return;
246
247         ctime = inode_get_ctime(inode);
248         uctime = inode_get_ctime(upperinode);
249         mtime = inode_get_mtime(inode);
250         umtime = inode_get_mtime(upperinode);
251         if ((!timespec64_equal(&mtime, &umtime)) ||
252              !timespec64_equal(&ctime, &uctime)) {
253                 inode_set_mtime_to_ts(inode, inode_get_mtime(upperinode));
254                 inode_set_ctime_to_ts(inode, uctime);
255         }
256
257         touch_atime(&file->f_path);
258 }
259
260 static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
261 {
262         struct file *file = iocb->ki_filp;
263         struct fd real;
264         ssize_t ret;
265         struct backing_file_ctx ctx = {
266                 .cred = ovl_creds(file_inode(file)->i_sb),
267                 .user_file = file,
268                 .accessed = ovl_file_accessed,
269         };
270
271         if (!iov_iter_count(iter))
272                 return 0;
273
274         ret = ovl_real_fdget(file, &real);
275         if (ret)
276                 return ret;
277
278         ret = backing_file_read_iter(real.file, iter, iocb, iocb->ki_flags,
279                                      &ctx);
280         fdput(real);
281
282         return ret;
283 }
284
285 static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
286 {
287         struct file *file = iocb->ki_filp;
288         struct inode *inode = file_inode(file);
289         struct fd real;
290         ssize_t ret;
291         int ifl = iocb->ki_flags;
292         struct backing_file_ctx ctx = {
293                 .cred = ovl_creds(inode->i_sb),
294                 .user_file = file,
295                 .end_write = ovl_file_modified,
296         };
297
298         if (!iov_iter_count(iter))
299                 return 0;
300
301         inode_lock(inode);
302         /* Update mode */
303         ovl_copyattr(inode);
304
305         ret = ovl_real_fdget(file, &real);
306         if (ret)
307                 goto out_unlock;
308
309         if (!ovl_should_sync(OVL_FS(inode->i_sb)))
310                 ifl &= ~(IOCB_DSYNC | IOCB_SYNC);
311
312         /*
313          * Overlayfs doesn't support deferred completions, don't copy
314          * this property in case it is set by the issuer.
315          */
316         ifl &= ~IOCB_DIO_CALLER_COMP;
317         ret = backing_file_write_iter(real.file, iter, iocb, ifl, &ctx);
318         fdput(real);
319
320 out_unlock:
321         inode_unlock(inode);
322
323         return ret;
324 }
325
326 static ssize_t ovl_splice_read(struct file *in, loff_t *ppos,
327                                struct pipe_inode_info *pipe, size_t len,
328                                unsigned int flags)
329 {
330         struct fd real;
331         ssize_t ret;
332         struct backing_file_ctx ctx = {
333                 .cred = ovl_creds(file_inode(in)->i_sb),
334                 .user_file = in,
335                 .accessed = ovl_file_accessed,
336         };
337
338         ret = ovl_real_fdget(in, &real);
339         if (ret)
340                 return ret;
341
342         ret = backing_file_splice_read(real.file, ppos, pipe, len, flags, &ctx);
343         fdput(real);
344
345         return ret;
346 }
347
348 /*
349  * Calling iter_file_splice_write() directly from overlay's f_op may deadlock
350  * due to lock order inversion between pipe->mutex in iter_file_splice_write()
351  * and file_start_write(real.file) in ovl_write_iter().
352  *
353  * So do everything ovl_write_iter() does and call iter_file_splice_write() on
354  * the real file.
355  */
356 static ssize_t ovl_splice_write(struct pipe_inode_info *pipe, struct file *out,
357                                 loff_t *ppos, size_t len, unsigned int flags)
358 {
359         struct fd real;
360         struct inode *inode = file_inode(out);
361         ssize_t ret;
362         struct backing_file_ctx ctx = {
363                 .cred = ovl_creds(inode->i_sb),
364                 .user_file = out,
365                 .end_write = ovl_file_modified,
366         };
367
368         inode_lock(inode);
369         /* Update mode */
370         ovl_copyattr(inode);
371
372         ret = ovl_real_fdget(out, &real);
373         if (ret)
374                 goto out_unlock;
375
376         ret = backing_file_splice_write(pipe, real.file, ppos, len, flags, &ctx);
377         fdput(real);
378
379 out_unlock:
380         inode_unlock(inode);
381
382         return ret;
383 }
384
385 static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
386 {
387         struct fd real;
388         const struct cred *old_cred;
389         int ret;
390
391         ret = ovl_sync_status(OVL_FS(file_inode(file)->i_sb));
392         if (ret <= 0)
393                 return ret;
394
395         ret = ovl_real_fdget_meta(file, &real, !datasync);
396         if (ret)
397                 return ret;
398
399         /* Don't sync lower file for fear of receiving EROFS error */
400         if (file_inode(real.file) == ovl_inode_upper(file_inode(file))) {
401                 old_cred = ovl_override_creds(file_inode(file)->i_sb);
402                 ret = vfs_fsync_range(real.file, start, end, datasync);
403                 revert_creds(old_cred);
404         }
405
406         fdput(real);
407
408         return ret;
409 }
410
411 static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
412 {
413         struct file *realfile = file->private_data;
414         struct backing_file_ctx ctx = {
415                 .cred = ovl_creds(file_inode(file)->i_sb),
416                 .user_file = file,
417                 .accessed = ovl_file_accessed,
418         };
419
420         return backing_file_mmap(realfile, vma, &ctx);
421 }
422
423 static long ovl_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
424 {
425         struct inode *inode = file_inode(file);
426         struct fd real;
427         const struct cred *old_cred;
428         int ret;
429
430         inode_lock(inode);
431         /* Update mode */
432         ovl_copyattr(inode);
433         ret = file_remove_privs(file);
434         if (ret)
435                 goto out_unlock;
436
437         ret = ovl_real_fdget(file, &real);
438         if (ret)
439                 goto out_unlock;
440
441         old_cred = ovl_override_creds(file_inode(file)->i_sb);
442         ret = vfs_fallocate(real.file, mode, offset, len);
443         revert_creds(old_cred);
444
445         /* Update size */
446         ovl_file_modified(file);
447
448         fdput(real);
449
450 out_unlock:
451         inode_unlock(inode);
452
453         return ret;
454 }
455
456 static int ovl_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
457 {
458         struct fd real;
459         const struct cred *old_cred;
460         int ret;
461
462         ret = ovl_real_fdget(file, &real);
463         if (ret)
464                 return ret;
465
466         old_cred = ovl_override_creds(file_inode(file)->i_sb);
467         ret = vfs_fadvise(real.file, offset, len, advice);
468         revert_creds(old_cred);
469
470         fdput(real);
471
472         return ret;
473 }
474
475 enum ovl_copyop {
476         OVL_COPY,
477         OVL_CLONE,
478         OVL_DEDUPE,
479 };
480
481 static loff_t ovl_copyfile(struct file *file_in, loff_t pos_in,
482                             struct file *file_out, loff_t pos_out,
483                             loff_t len, unsigned int flags, enum ovl_copyop op)
484 {
485         struct inode *inode_out = file_inode(file_out);
486         struct fd real_in, real_out;
487         const struct cred *old_cred;
488         loff_t ret;
489
490         inode_lock(inode_out);
491         if (op != OVL_DEDUPE) {
492                 /* Update mode */
493                 ovl_copyattr(inode_out);
494                 ret = file_remove_privs(file_out);
495                 if (ret)
496                         goto out_unlock;
497         }
498
499         ret = ovl_real_fdget(file_out, &real_out);
500         if (ret)
501                 goto out_unlock;
502
503         ret = ovl_real_fdget(file_in, &real_in);
504         if (ret) {
505                 fdput(real_out);
506                 goto out_unlock;
507         }
508
509         old_cred = ovl_override_creds(file_inode(file_out)->i_sb);
510         switch (op) {
511         case OVL_COPY:
512                 ret = vfs_copy_file_range(real_in.file, pos_in,
513                                           real_out.file, pos_out, len, flags);
514                 break;
515
516         case OVL_CLONE:
517                 ret = vfs_clone_file_range(real_in.file, pos_in,
518                                            real_out.file, pos_out, len, flags);
519                 break;
520
521         case OVL_DEDUPE:
522                 ret = vfs_dedupe_file_range_one(real_in.file, pos_in,
523                                                 real_out.file, pos_out, len,
524                                                 flags);
525                 break;
526         }
527         revert_creds(old_cred);
528
529         /* Update size */
530         ovl_file_modified(file_out);
531
532         fdput(real_in);
533         fdput(real_out);
534
535 out_unlock:
536         inode_unlock(inode_out);
537
538         return ret;
539 }
540
541 static ssize_t ovl_copy_file_range(struct file *file_in, loff_t pos_in,
542                                    struct file *file_out, loff_t pos_out,
543                                    size_t len, unsigned int flags)
544 {
545         return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, flags,
546                             OVL_COPY);
547 }
548
549 static loff_t ovl_remap_file_range(struct file *file_in, loff_t pos_in,
550                                    struct file *file_out, loff_t pos_out,
551                                    loff_t len, unsigned int remap_flags)
552 {
553         enum ovl_copyop op;
554
555         if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
556                 return -EINVAL;
557
558         if (remap_flags & REMAP_FILE_DEDUP)
559                 op = OVL_DEDUPE;
560         else
561                 op = OVL_CLONE;
562
563         /*
564          * Don't copy up because of a dedupe request, this wouldn't make sense
565          * most of the time (data would be duplicated instead of deduplicated).
566          */
567         if (op == OVL_DEDUPE &&
568             (!ovl_inode_upper(file_inode(file_in)) ||
569              !ovl_inode_upper(file_inode(file_out))))
570                 return -EPERM;
571
572         return ovl_copyfile(file_in, pos_in, file_out, pos_out, len,
573                             remap_flags, op);
574 }
575
576 static int ovl_flush(struct file *file, fl_owner_t id)
577 {
578         struct fd real;
579         const struct cred *old_cred;
580         int err;
581
582         err = ovl_real_fdget(file, &real);
583         if (err)
584                 return err;
585
586         if (real.file->f_op->flush) {
587                 old_cred = ovl_override_creds(file_inode(file)->i_sb);
588                 err = real.file->f_op->flush(real.file, id);
589                 revert_creds(old_cred);
590         }
591         fdput(real);
592
593         return err;
594 }
595
596 const struct file_operations ovl_file_operations = {
597         .open           = ovl_open,
598         .release        = ovl_release,
599         .llseek         = ovl_llseek,
600         .read_iter      = ovl_read_iter,
601         .write_iter     = ovl_write_iter,
602         .fsync          = ovl_fsync,
603         .mmap           = ovl_mmap,
604         .fallocate      = ovl_fallocate,
605         .fadvise        = ovl_fadvise,
606         .flush          = ovl_flush,
607         .splice_read    = ovl_splice_read,
608         .splice_write   = ovl_splice_write,
609
610         .copy_file_range        = ovl_copy_file_range,
611         .remap_file_range       = ovl_remap_file_range,
612 };