ovl: decide if revalidate needed on a per-dentry basis
[linux-2.6-block.git] / fs / overlayfs / super.c
CommitLineData
d2912cb1 1// SPDX-License-Identifier: GPL-2.0-only
e9be9d5e
MS
2/*
3 *
4 * Copyright (C) 2011 Novell Inc.
e9be9d5e
MS
5 */
6
5b825c3a 7#include <uapi/linux/magic.h>
e9be9d5e
MS
8#include <linux/fs.h>
9#include <linux/namei.h>
10#include <linux/xattr.h>
e9be9d5e 11#include <linux/mount.h>
e9be9d5e
MS
12#include <linux/parser.h>
13#include <linux/module.h>
cc259639 14#include <linux/statfs.h>
f45827e8 15#include <linux/seq_file.h>
d837a49b 16#include <linux/posix_acl_xattr.h>
e487d889 17#include <linux/exportfs.h>
e9be9d5e
MS
18#include "overlayfs.h"
19
20MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
21MODULE_DESCRIPTION("Overlay filesystem");
22MODULE_LICENSE("GPL");
23
e9be9d5e
MS
24
25struct ovl_dir_cache;
26
a78d9f0d
MS
27#define OVL_MAX_STACK 500
28
688ea0e5
MS
29static bool ovl_redirect_dir_def = IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_DIR);
30module_param_named(redirect_dir, ovl_redirect_dir_def, bool, 0644);
253e7483 31MODULE_PARM_DESC(redirect_dir,
688ea0e5 32 "Default to on or off for the redirect_dir feature");
e9be9d5e 33
438c84c2
MS
34static bool ovl_redirect_always_follow =
35 IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW);
36module_param_named(redirect_always_follow, ovl_redirect_always_follow,
37 bool, 0644);
253e7483 38MODULE_PARM_DESC(redirect_always_follow,
438c84c2
MS
39 "Follow redirects even if redirect_dir feature is turned off");
40
02bcd157
AG
41static bool ovl_index_def = IS_ENABLED(CONFIG_OVERLAY_FS_INDEX);
42module_param_named(index, ovl_index_def, bool, 0644);
253e7483 43MODULE_PARM_DESC(index,
02bcd157
AG
44 "Default to on or off for the inodes index feature");
45
f168f109
AG
46static bool ovl_nfs_export_def = IS_ENABLED(CONFIG_OVERLAY_FS_NFS_EXPORT);
47module_param_named(nfs_export, ovl_nfs_export_def, bool, 0644);
253e7483 48MODULE_PARM_DESC(nfs_export,
f168f109
AG
49 "Default to on or off for the NFS export feature");
50
795939a9
AG
51static bool ovl_xino_auto_def = IS_ENABLED(CONFIG_OVERLAY_FS_XINO_AUTO);
52module_param_named(xino_auto, ovl_xino_auto_def, bool, 0644);
253e7483 53MODULE_PARM_DESC(xino_auto,
795939a9
AG
54 "Auto enable xino feature");
55
4155c10a
MS
56static void ovl_entry_stack_free(struct ovl_entry *oe)
57{
58 unsigned int i;
59
60 for (i = 0; i < oe->numlower; i++)
61 dput(oe->lowerstack[i].dentry);
62}
63
d5791044
VG
64static bool ovl_metacopy_def = IS_ENABLED(CONFIG_OVERLAY_FS_METACOPY);
65module_param_named(metacopy, ovl_metacopy_def, bool, 0644);
253e7483 66MODULE_PARM_DESC(metacopy,
d5791044
VG
67 "Default to on or off for the metadata only copy up feature");
68
e9be9d5e
MS
69static void ovl_dentry_release(struct dentry *dentry)
70{
71 struct ovl_entry *oe = dentry->d_fsdata;
72
73 if (oe) {
4155c10a 74 ovl_entry_stack_free(oe);
e9be9d5e
MS
75 kfree_rcu(oe, rcu);
76 }
77}
78
2d902671 79static struct dentry *ovl_d_real(struct dentry *dentry,
fb16043b 80 const struct inode *inode)
d101a125
MS
81{
82 struct dentry *real;
83
e8c985ba
MS
84 /* It's an overlay file */
85 if (inode && d_inode(dentry) == inode)
86 return dentry;
87
ca4c8a3a 88 if (!d_is_reg(dentry)) {
d101a125
MS
89 if (!inode || inode == d_inode(dentry))
90 return dentry;
91 goto bug;
92 }
93
94 real = ovl_dentry_upper(dentry);
2c3d7358 95 if (real && (inode == d_inode(real)))
d101a125
MS
96 return real;
97
2c3d7358
VG
98 if (real && !inode && ovl_has_upperdata(d_inode(dentry)))
99 return real;
100
101 real = ovl_dentry_lowerdata(dentry);
d101a125
MS
102 if (!real)
103 goto bug;
104
c4fcfc16 105 /* Handle recursion */
fb16043b 106 real = d_real(real, inode);
c4fcfc16 107
d101a125
MS
108 if (!inode || inode == d_inode(real))
109 return real;
d101a125 110bug:
656189d2 111 WARN(1, "ovl_d_real(%pd4, %s:%lu): real dentry not found\n", dentry,
d101a125
MS
112 inode ? inode->i_sb->s_id : "NULL", inode ? inode->i_ino : 0);
113 return dentry;
114}
115
3bb7df92 116static int ovl_revalidate_real(struct dentry *d, unsigned int flags, bool weak)
7c03b5d4 117{
7c03b5d4
MS
118 int ret = 1;
119
3bb7df92
MS
120 if (weak) {
121 if (d->d_flags & DCACHE_OP_WEAK_REVALIDATE)
122 ret = d->d_op->d_weak_revalidate(d, flags);
123 } else if (d->d_flags & DCACHE_OP_REVALIDATE) {
124 ret = d->d_op->d_revalidate(d, flags);
125 if (!ret) {
126 if (!(flags & LOOKUP_RCU))
127 d_invalidate(d);
128 ret = -ESTALE;
7c03b5d4
MS
129 }
130 }
3bb7df92 131 return ret;
7c03b5d4
MS
132}
133
3bb7df92
MS
134static int ovl_dentry_revalidate_common(struct dentry *dentry,
135 unsigned int flags, bool weak)
7c03b5d4
MS
136{
137 struct ovl_entry *oe = dentry->d_fsdata;
138 unsigned int i;
139 int ret = 1;
140
3bb7df92
MS
141 for (i = 0; ret > 0 && i < oe->numlower; i++) {
142 ret = ovl_revalidate_real(oe->lowerstack[i].dentry, flags,
143 weak);
7c03b5d4
MS
144 }
145 return ret;
146}
147
3bb7df92
MS
148static int ovl_dentry_revalidate(struct dentry *dentry, unsigned int flags)
149{
150 return ovl_dentry_revalidate_common(dentry, flags, false);
151}
152
153static int ovl_dentry_weak_revalidate(struct dentry *dentry, unsigned int flags)
154{
155 return ovl_dentry_revalidate_common(dentry, flags, true);
156}
157
e9be9d5e
MS
158static const struct dentry_operations ovl_dentry_operations = {
159 .d_release = ovl_dentry_release,
d101a125 160 .d_real = ovl_d_real,
7c03b5d4
MS
161 .d_revalidate = ovl_dentry_revalidate,
162 .d_weak_revalidate = ovl_dentry_weak_revalidate,
163};
164
13cf199d
AG
165static struct kmem_cache *ovl_inode_cachep;
166
167static struct inode *ovl_alloc_inode(struct super_block *sb)
168{
169 struct ovl_inode *oi = kmem_cache_alloc(ovl_inode_cachep, GFP_KERNEL);
170
b3885bd6
HN
171 if (!oi)
172 return NULL;
173
04a01ac7 174 oi->cache = NULL;
cf31c463 175 oi->redirect = NULL;
04a01ac7 176 oi->version = 0;
13c72075 177 oi->flags = 0;
09d8b586 178 oi->__upperdentry = NULL;
25b7713a 179 oi->lower = NULL;
2664bd08 180 oi->lowerdata = NULL;
a015dafc 181 mutex_init(&oi->lock);
25b7713a 182
13cf199d
AG
183 return &oi->vfs_inode;
184}
185
0b269ded 186static void ovl_free_inode(struct inode *inode)
13cf199d 187{
0b269ded 188 struct ovl_inode *oi = OVL_I(inode);
13cf199d 189
0b269ded
AV
190 kfree(oi->redirect);
191 mutex_destroy(&oi->lock);
192 kmem_cache_free(ovl_inode_cachep, oi);
13cf199d
AG
193}
194
195static void ovl_destroy_inode(struct inode *inode)
196{
09d8b586
MS
197 struct ovl_inode *oi = OVL_I(inode);
198
199 dput(oi->__upperdentry);
31747eda 200 iput(oi->lower);
2664bd08
VG
201 if (S_ISDIR(inode->i_mode))
202 ovl_dir_cache_free(inode);
203 else
204 iput(oi->lowerdata);
13cf199d
AG
205}
206
ad204488 207static void ovl_free_fs(struct ovl_fs *ofs)
e9be9d5e 208{
dd662667 209 unsigned i;
e9be9d5e 210
0be0bfd2 211 iput(ofs->workbasedir_trap);
146d62e5
AG
212 iput(ofs->indexdir_trap);
213 iput(ofs->workdir_trap);
214 iput(ofs->upperdir_trap);
ad204488
MS
215 dput(ofs->indexdir);
216 dput(ofs->workdir);
217 if (ofs->workdir_locked)
218 ovl_inuse_unlock(ofs->workbasedir);
219 dput(ofs->workbasedir);
220 if (ofs->upperdir_locked)
221 ovl_inuse_unlock(ofs->upper_mnt->mnt_root);
222 mntput(ofs->upper_mnt);
94375f9d
AG
223 for (i = 1; i < ofs->numlayer; i++) {
224 iput(ofs->layers[i].trap);
225 mntput(ofs->layers[i].mnt);
146d62e5 226 }
94375f9d 227 kfree(ofs->layers);
b7bf9908 228 for (i = 0; i < ofs->numfs; i++)
07f1e596
AG
229 free_anon_bdev(ofs->fs[i].pseudo_dev);
230 kfree(ofs->fs);
ad204488
MS
231
232 kfree(ofs->config.lowerdir);
233 kfree(ofs->config.upperdir);
234 kfree(ofs->config.workdir);
438c84c2 235 kfree(ofs->config.redirect_mode);
ad204488
MS
236 if (ofs->creator_cred)
237 put_cred(ofs->creator_cred);
238 kfree(ofs);
e9be9d5e
MS
239}
240
a9075cdb
MS
241static void ovl_put_super(struct super_block *sb)
242{
243 struct ovl_fs *ofs = sb->s_fs_info;
244
245 ovl_free_fs(ofs);
246}
247
e8d4bfe3 248/* Sync real dirty inodes in upper filesystem (if it exists) */
e593b2bf
AG
249static int ovl_sync_fs(struct super_block *sb, int wait)
250{
ad204488 251 struct ovl_fs *ofs = sb->s_fs_info;
e593b2bf
AG
252 struct super_block *upper_sb;
253 int ret;
254
ad204488 255 if (!ofs->upper_mnt)
e593b2bf 256 return 0;
e8d4bfe3
CX
257
258 /*
259 * If this is a sync(2) call or an emergency sync, all the super blocks
260 * will be iterated, including upper_sb, so no need to do anything.
261 *
262 * If this is a syncfs(2) call, then we do need to call
263 * sync_filesystem() on upper_sb, but enough if we do it when being
264 * called with wait == 1.
265 */
266 if (!wait)
e593b2bf
AG
267 return 0;
268
e8d4bfe3
CX
269 upper_sb = ofs->upper_mnt->mnt_sb;
270
e593b2bf 271 down_read(&upper_sb->s_umount);
e8d4bfe3 272 ret = sync_filesystem(upper_sb);
e593b2bf 273 up_read(&upper_sb->s_umount);
e8d4bfe3 274
e593b2bf
AG
275 return ret;
276}
277
cc259639
AW
278/**
279 * ovl_statfs
280 * @sb: The overlayfs super block
281 * @buf: The struct kstatfs to fill in with stats
282 *
283 * Get the filesystem statistics. As writes always target the upper layer
4ebc5818 284 * filesystem pass the statfs to the upper filesystem (if it exists)
cc259639
AW
285 */
286static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf)
287{
288 struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
289 struct dentry *root_dentry = dentry->d_sb->s_root;
290 struct path path;
291 int err;
292
4ebc5818 293 ovl_path_real(root_dentry, &path);
cc259639
AW
294
295 err = vfs_statfs(&path, buf);
296 if (!err) {
6b2d5fe4 297 buf->f_namelen = ofs->namelen;
cc259639
AW
298 buf->f_type = OVERLAYFS_SUPER_MAGIC;
299 }
300
301 return err;
302}
303
02bcd157 304/* Will this overlay be forced to mount/remount ro? */
ad204488 305static bool ovl_force_readonly(struct ovl_fs *ofs)
02bcd157 306{
ad204488 307 return (!ofs->upper_mnt || !ofs->workdir);
02bcd157
AG
308}
309
438c84c2
MS
310static const char *ovl_redirect_mode_def(void)
311{
312 return ovl_redirect_dir_def ? "on" : "off";
313}
314
795939a9
AG
315enum {
316 OVL_XINO_OFF,
317 OVL_XINO_AUTO,
318 OVL_XINO_ON,
319};
320
321static const char * const ovl_xino_str[] = {
322 "off",
323 "auto",
324 "on",
325};
326
327static inline int ovl_xino_def(void)
328{
329 return ovl_xino_auto_def ? OVL_XINO_AUTO : OVL_XINO_OFF;
330}
331
f45827e8
EZ
332/**
333 * ovl_show_options
334 *
335 * Prints the mount options for a given superblock.
336 * Returns zero; does not fail.
337 */
338static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
339{
340 struct super_block *sb = dentry->d_sb;
ad204488 341 struct ovl_fs *ofs = sb->s_fs_info;
f45827e8 342
ad204488
MS
343 seq_show_option(m, "lowerdir", ofs->config.lowerdir);
344 if (ofs->config.upperdir) {
345 seq_show_option(m, "upperdir", ofs->config.upperdir);
346 seq_show_option(m, "workdir", ofs->config.workdir);
53a08cb9 347 }
ad204488 348 if (ofs->config.default_permissions)
8d3095f4 349 seq_puts(m, ",default_permissions");
438c84c2
MS
350 if (strcmp(ofs->config.redirect_mode, ovl_redirect_mode_def()) != 0)
351 seq_printf(m, ",redirect_dir=%s", ofs->config.redirect_mode);
ad204488 352 if (ofs->config.index != ovl_index_def)
438c84c2 353 seq_printf(m, ",index=%s", ofs->config.index ? "on" : "off");
f168f109
AG
354 if (ofs->config.nfs_export != ovl_nfs_export_def)
355 seq_printf(m, ",nfs_export=%s", ofs->config.nfs_export ?
356 "on" : "off");
0f831ec8 357 if (ofs->config.xino != ovl_xino_def() && !ovl_same_fs(sb))
795939a9 358 seq_printf(m, ",xino=%s", ovl_xino_str[ofs->config.xino]);
d5791044
VG
359 if (ofs->config.metacopy != ovl_metacopy_def)
360 seq_printf(m, ",metacopy=%s",
361 ofs->config.metacopy ? "on" : "off");
f45827e8
EZ
362 return 0;
363}
364
3cdf6fe9
SL
365static int ovl_remount(struct super_block *sb, int *flags, char *data)
366{
ad204488 367 struct ovl_fs *ofs = sb->s_fs_info;
3cdf6fe9 368
1751e8a6 369 if (!(*flags & SB_RDONLY) && ovl_force_readonly(ofs))
3cdf6fe9
SL
370 return -EROFS;
371
372 return 0;
373}
374
e9be9d5e 375static const struct super_operations ovl_super_operations = {
13cf199d 376 .alloc_inode = ovl_alloc_inode,
0b269ded 377 .free_inode = ovl_free_inode,
13cf199d
AG
378 .destroy_inode = ovl_destroy_inode,
379 .drop_inode = generic_delete_inode,
e9be9d5e 380 .put_super = ovl_put_super,
e593b2bf 381 .sync_fs = ovl_sync_fs,
cc259639 382 .statfs = ovl_statfs,
f45827e8 383 .show_options = ovl_show_options,
3cdf6fe9 384 .remount_fs = ovl_remount,
e9be9d5e
MS
385};
386
387enum {
388 OPT_LOWERDIR,
389 OPT_UPPERDIR,
390 OPT_WORKDIR,
8d3095f4 391 OPT_DEFAULT_PERMISSIONS,
438c84c2 392 OPT_REDIRECT_DIR,
02bcd157
AG
393 OPT_INDEX_ON,
394 OPT_INDEX_OFF,
f168f109
AG
395 OPT_NFS_EXPORT_ON,
396 OPT_NFS_EXPORT_OFF,
795939a9
AG
397 OPT_XINO_ON,
398 OPT_XINO_OFF,
399 OPT_XINO_AUTO,
d5791044
VG
400 OPT_METACOPY_ON,
401 OPT_METACOPY_OFF,
e9be9d5e
MS
402 OPT_ERR,
403};
404
405static const match_table_t ovl_tokens = {
406 {OPT_LOWERDIR, "lowerdir=%s"},
407 {OPT_UPPERDIR, "upperdir=%s"},
408 {OPT_WORKDIR, "workdir=%s"},
8d3095f4 409 {OPT_DEFAULT_PERMISSIONS, "default_permissions"},
438c84c2 410 {OPT_REDIRECT_DIR, "redirect_dir=%s"},
02bcd157
AG
411 {OPT_INDEX_ON, "index=on"},
412 {OPT_INDEX_OFF, "index=off"},
f168f109
AG
413 {OPT_NFS_EXPORT_ON, "nfs_export=on"},
414 {OPT_NFS_EXPORT_OFF, "nfs_export=off"},
795939a9
AG
415 {OPT_XINO_ON, "xino=on"},
416 {OPT_XINO_OFF, "xino=off"},
417 {OPT_XINO_AUTO, "xino=auto"},
d5791044
VG
418 {OPT_METACOPY_ON, "metacopy=on"},
419 {OPT_METACOPY_OFF, "metacopy=off"},
e9be9d5e
MS
420 {OPT_ERR, NULL}
421};
422
91c77947
MS
423static char *ovl_next_opt(char **s)
424{
425 char *sbegin = *s;
426 char *p;
427
428 if (sbegin == NULL)
429 return NULL;
430
431 for (p = sbegin; *p; p++) {
432 if (*p == '\\') {
433 p++;
434 if (!*p)
435 break;
436 } else if (*p == ',') {
437 *p = '\0';
438 *s = p + 1;
439 return sbegin;
440 }
441 }
442 *s = NULL;
443 return sbegin;
444}
445
438c84c2
MS
446static int ovl_parse_redirect_mode(struct ovl_config *config, const char *mode)
447{
448 if (strcmp(mode, "on") == 0) {
449 config->redirect_dir = true;
450 /*
451 * Does not make sense to have redirect creation without
452 * redirect following.
453 */
454 config->redirect_follow = true;
455 } else if (strcmp(mode, "follow") == 0) {
456 config->redirect_follow = true;
457 } else if (strcmp(mode, "off") == 0) {
458 if (ovl_redirect_always_follow)
459 config->redirect_follow = true;
460 } else if (strcmp(mode, "nofollow") != 0) {
1bd0a3ae 461 pr_err("bad mount option \"redirect_dir=%s\"\n",
438c84c2
MS
462 mode);
463 return -EINVAL;
464 }
465
466 return 0;
467}
468
e9be9d5e
MS
469static int ovl_parse_opt(char *opt, struct ovl_config *config)
470{
471 char *p;
d5791044 472 int err;
d47748e5 473 bool metacopy_opt = false, redirect_opt = false;
e9be9d5e 474
438c84c2
MS
475 config->redirect_mode = kstrdup(ovl_redirect_mode_def(), GFP_KERNEL);
476 if (!config->redirect_mode)
477 return -ENOMEM;
478
91c77947 479 while ((p = ovl_next_opt(&opt)) != NULL) {
e9be9d5e
MS
480 int token;
481 substring_t args[MAX_OPT_ARGS];
482
483 if (!*p)
484 continue;
485
486 token = match_token(p, ovl_tokens, args);
487 switch (token) {
488 case OPT_UPPERDIR:
489 kfree(config->upperdir);
490 config->upperdir = match_strdup(&args[0]);
491 if (!config->upperdir)
492 return -ENOMEM;
493 break;
494
495 case OPT_LOWERDIR:
496 kfree(config->lowerdir);
497 config->lowerdir = match_strdup(&args[0]);
498 if (!config->lowerdir)
499 return -ENOMEM;
500 break;
501
502 case OPT_WORKDIR:
503 kfree(config->workdir);
504 config->workdir = match_strdup(&args[0]);
505 if (!config->workdir)
506 return -ENOMEM;
507 break;
508
8d3095f4
MS
509 case OPT_DEFAULT_PERMISSIONS:
510 config->default_permissions = true;
511 break;
512
438c84c2
MS
513 case OPT_REDIRECT_DIR:
514 kfree(config->redirect_mode);
515 config->redirect_mode = match_strdup(&args[0]);
516 if (!config->redirect_mode)
517 return -ENOMEM;
d47748e5 518 redirect_opt = true;
a6c60655
MS
519 break;
520
02bcd157
AG
521 case OPT_INDEX_ON:
522 config->index = true;
523 break;
524
525 case OPT_INDEX_OFF:
526 config->index = false;
527 break;
528
f168f109
AG
529 case OPT_NFS_EXPORT_ON:
530 config->nfs_export = true;
531 break;
532
533 case OPT_NFS_EXPORT_OFF:
534 config->nfs_export = false;
535 break;
536
795939a9
AG
537 case OPT_XINO_ON:
538 config->xino = OVL_XINO_ON;
539 break;
540
541 case OPT_XINO_OFF:
542 config->xino = OVL_XINO_OFF;
543 break;
544
545 case OPT_XINO_AUTO:
546 config->xino = OVL_XINO_AUTO;
547 break;
548
d5791044
VG
549 case OPT_METACOPY_ON:
550 config->metacopy = true;
d47748e5 551 metacopy_opt = true;
d5791044
VG
552 break;
553
554 case OPT_METACOPY_OFF:
555 config->metacopy = false;
556 break;
557
e9be9d5e 558 default:
1bd0a3ae 559 pr_err("unrecognized mount option \"%s\" or missing value\n",
560 p);
e9be9d5e
MS
561 return -EINVAL;
562 }
563 }
71cbad7e 564
565 /* Workdir is useless in non-upper mount */
566 if (!config->upperdir && config->workdir) {
1bd0a3ae 567 pr_info("option \"workdir=%s\" is useless in a non-upper mount, ignore\n",
71cbad7e 568 config->workdir);
569 kfree(config->workdir);
570 config->workdir = NULL;
571 }
572
d5791044
VG
573 err = ovl_parse_redirect_mode(config, config->redirect_mode);
574 if (err)
575 return err;
576
d47748e5
MS
577 /*
578 * This is to make the logic below simpler. It doesn't make any other
579 * difference, since config->redirect_dir is only used for upper.
580 */
581 if (!config->upperdir && config->redirect_follow)
582 config->redirect_dir = true;
583
584 /* Resolve metacopy -> redirect_dir dependency */
585 if (config->metacopy && !config->redirect_dir) {
586 if (metacopy_opt && redirect_opt) {
1bd0a3ae 587 pr_err("conflicting options: metacopy=on,redirect_dir=%s\n",
d47748e5
MS
588 config->redirect_mode);
589 return -EINVAL;
590 }
591 if (redirect_opt) {
592 /*
593 * There was an explicit redirect_dir=... that resulted
594 * in this conflict.
595 */
1bd0a3ae 596 pr_info("disabling metacopy due to redirect_dir=%s\n",
d47748e5
MS
597 config->redirect_mode);
598 config->metacopy = false;
599 } else {
600 /* Automatically enable redirect otherwise. */
601 config->redirect_follow = config->redirect_dir = true;
602 }
d5791044
VG
603 }
604
605 return 0;
e9be9d5e
MS
606}
607
608#define OVL_WORKDIR_NAME "work"
02bcd157 609#define OVL_INDEXDIR_NAME "index"
e9be9d5e 610
ad204488 611static struct dentry *ovl_workdir_create(struct ovl_fs *ofs,
6b8aa129 612 const char *name, bool persist)
e9be9d5e 613{
ad204488
MS
614 struct inode *dir = ofs->workbasedir->d_inode;
615 struct vfsmount *mnt = ofs->upper_mnt;
e9be9d5e
MS
616 struct dentry *work;
617 int err;
618 bool retried = false;
6b8aa129 619 bool locked = false;
e9be9d5e 620
5955102c 621 inode_lock_nested(dir, I_MUTEX_PARENT);
6b8aa129
AG
622 locked = true;
623
e9be9d5e 624retry:
ad204488 625 work = lookup_one_len(name, ofs->workbasedir, strlen(name));
e9be9d5e
MS
626
627 if (!IS_ERR(work)) {
c11b9fdd
MS
628 struct iattr attr = {
629 .ia_valid = ATTR_MODE,
32a3d848 630 .ia_mode = S_IFDIR | 0,
c11b9fdd 631 };
e9be9d5e
MS
632
633 if (work->d_inode) {
634 err = -EEXIST;
635 if (retried)
636 goto out_dput;
637
6b8aa129
AG
638 if (persist)
639 goto out_unlock;
640
e9be9d5e 641 retried = true;
eea2fb48 642 ovl_workdir_cleanup(dir, mnt, work, 0);
e9be9d5e
MS
643 dput(work);
644 goto retry;
645 }
646
95a1c815
MS
647 work = ovl_create_real(dir, work, OVL_CATTR(attr.ia_mode));
648 err = PTR_ERR(work);
649 if (IS_ERR(work))
650 goto out_err;
c11b9fdd 651
cb348edb
MS
652 /*
653 * Try to remove POSIX ACL xattrs from workdir. We are good if:
654 *
655 * a) success (there was a POSIX ACL xattr and was removed)
656 * b) -ENODATA (there was no POSIX ACL xattr)
657 * c) -EOPNOTSUPP (POSIX ACL xattrs are not supported)
658 *
659 * There are various other error values that could effectively
660 * mean that the xattr doesn't exist (e.g. -ERANGE is returned
661 * if the xattr name is too long), but the set of filesystems
662 * allowed as upper are limited to "normal" ones, where checking
663 * for the above two errors is sufficient.
664 */
c11b9fdd 665 err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_DEFAULT);
e1ff3dd1 666 if (err && err != -ENODATA && err != -EOPNOTSUPP)
c11b9fdd
MS
667 goto out_dput;
668
669 err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_ACCESS);
e1ff3dd1 670 if (err && err != -ENODATA && err != -EOPNOTSUPP)
c11b9fdd
MS
671 goto out_dput;
672
673 /* Clear any inherited mode bits */
674 inode_lock(work->d_inode);
675 err = notify_change(work, &attr, NULL);
676 inode_unlock(work->d_inode);
677 if (err)
678 goto out_dput;
6b8aa129
AG
679 } else {
680 err = PTR_ERR(work);
681 goto out_err;
e9be9d5e
MS
682 }
683out_unlock:
6b8aa129
AG
684 if (locked)
685 inode_unlock(dir);
e9be9d5e
MS
686
687 return work;
688
689out_dput:
690 dput(work);
6b8aa129 691out_err:
1bd0a3ae 692 pr_warn("failed to create directory %s/%s (errno: %i); mounting read-only\n",
ad204488 693 ofs->config.workdir, name, -err);
6b8aa129 694 work = NULL;
e9be9d5e
MS
695 goto out_unlock;
696}
697
91c77947
MS
698static void ovl_unescape(char *s)
699{
700 char *d = s;
701
702 for (;; s++, d++) {
703 if (*s == '\\')
704 s++;
705 *d = *s;
706 if (!*s)
707 break;
708 }
709}
710
ab508822
MS
711static int ovl_mount_dir_noesc(const char *name, struct path *path)
712{
a78d9f0d 713 int err = -EINVAL;
ab508822 714
a78d9f0d 715 if (!*name) {
1bd0a3ae 716 pr_err("empty lowerdir\n");
a78d9f0d
MS
717 goto out;
718 }
ab508822
MS
719 err = kern_path(name, LOOKUP_FOLLOW, path);
720 if (err) {
1bd0a3ae 721 pr_err("failed to resolve '%s': %i\n", name, err);
ab508822
MS
722 goto out;
723 }
724 err = -EINVAL;
7c03b5d4 725 if (ovl_dentry_weird(path->dentry)) {
1bd0a3ae 726 pr_err("filesystem on '%s' not supported\n", name);
ab508822
MS
727 goto out_put;
728 }
2b8c30e9 729 if (!d_is_dir(path->dentry)) {
1bd0a3ae 730 pr_err("'%s' not a directory\n", name);
ab508822
MS
731 goto out_put;
732 }
733 return 0;
734
735out_put:
8aafcb59 736 path_put_init(path);
ab508822
MS
737out:
738 return err;
739}
740
741static int ovl_mount_dir(const char *name, struct path *path)
742{
743 int err = -ENOMEM;
744 char *tmp = kstrdup(name, GFP_KERNEL);
745
746 if (tmp) {
747 ovl_unescape(tmp);
748 err = ovl_mount_dir_noesc(tmp, path);
7c03b5d4 749
7925dad8
MS
750 if (!err && (ovl_dentry_remote(path->dentry) ||
751 path->dentry->d_flags & DCACHE_OP_REAL)) {
752 pr_err("filesystem on '%s' not supported as upperdir\n",
753 tmp);
754 path_put_init(path);
755 err = -EINVAL;
756 }
ab508822
MS
757 kfree(tmp);
758 }
759 return err;
760}
761
6b2d5fe4
MS
762static int ovl_check_namelen(struct path *path, struct ovl_fs *ofs,
763 const char *name)
ab508822 764{
ab508822 765 struct kstatfs statfs;
6b2d5fe4
MS
766 int err = vfs_statfs(path, &statfs);
767
768 if (err)
1bd0a3ae 769 pr_err("statfs failed on '%s'\n", name);
6b2d5fe4
MS
770 else
771 ofs->namelen = max(ofs->namelen, statfs.f_namelen);
772
773 return err;
774}
775
776static int ovl_lower_dir(const char *name, struct path *path,
f4288844 777 struct ovl_fs *ofs, int *stack_depth)
6b2d5fe4 778{
e487d889 779 int fh_type;
6b2d5fe4 780 int err;
ab508822 781
a78d9f0d 782 err = ovl_mount_dir_noesc(name, path);
ab508822
MS
783 if (err)
784 goto out;
785
6b2d5fe4
MS
786 err = ovl_check_namelen(path, ofs, name);
787 if (err)
ab508822 788 goto out_put;
6b2d5fe4 789
ab508822
MS
790 *stack_depth = max(*stack_depth, path->mnt->mnt_sb->s_stack_depth);
791
02bcd157 792 /*
f168f109
AG
793 * The inodes index feature and NFS export need to encode and decode
794 * file handles, so they require that all layers support them.
02bcd157 795 */
e487d889 796 fh_type = ovl_can_decode_fh(path->dentry->d_sb);
f168f109 797 if ((ofs->config.nfs_export ||
e487d889 798 (ofs->config.index && ofs->config.upperdir)) && !fh_type) {
02bcd157 799 ofs->config.index = false;
f168f109 800 ofs->config.nfs_export = false;
1bd0a3ae 801 pr_warn("fs on '%s' does not support file handles, falling back to index=off,nfs_export=off.\n",
f168f109 802 name);
02bcd157
AG
803 }
804
e487d889
AG
805 /* Check if lower fs has 32bit inode numbers */
806 if (fh_type != FILEID_INO32_GEN)
0f831ec8 807 ofs->xino_mode = -1;
e487d889 808
ab508822
MS
809 return 0;
810
811out_put:
8aafcb59 812 path_put_init(path);
ab508822
MS
813out:
814 return err;
815}
816
e9be9d5e
MS
817/* Workdir should not be subdir of upperdir and vice versa */
818static bool ovl_workdir_ok(struct dentry *workdir, struct dentry *upperdir)
819{
820 bool ok = false;
821
822 if (workdir != upperdir) {
823 ok = (lock_rename(workdir, upperdir) == NULL);
824 unlock_rename(workdir, upperdir);
825 }
826 return ok;
827}
828
a78d9f0d
MS
829static unsigned int ovl_split_lowerdirs(char *str)
830{
831 unsigned int ctr = 1;
832 char *s, *d;
833
834 for (s = d = str;; s++, d++) {
835 if (*s == '\\') {
836 s++;
837 } else if (*s == ':') {
838 *d = '\0';
839 ctr++;
840 continue;
841 }
842 *d = *s;
843 if (!*s)
844 break;
845 }
846 return ctr;
847}
848
0eb45fc3
AG
849static int __maybe_unused
850ovl_posix_acl_xattr_get(const struct xattr_handler *handler,
851 struct dentry *dentry, struct inode *inode,
852 const char *name, void *buffer, size_t size)
853{
1d88f183 854 return ovl_xattr_get(dentry, inode, handler->name, buffer, size);
0eb45fc3
AG
855}
856
0c97be22
AG
857static int __maybe_unused
858ovl_posix_acl_xattr_set(const struct xattr_handler *handler,
859 struct dentry *dentry, struct inode *inode,
860 const char *name, const void *value,
861 size_t size, int flags)
d837a49b
MS
862{
863 struct dentry *workdir = ovl_workdir(dentry);
09d8b586 864 struct inode *realinode = ovl_inode_real(inode);
d837a49b
MS
865 struct posix_acl *acl = NULL;
866 int err;
867
868 /* Check that everything is OK before copy-up */
869 if (value) {
870 acl = posix_acl_from_xattr(&init_user_ns, value, size);
871 if (IS_ERR(acl))
872 return PTR_ERR(acl);
873 }
874 err = -EOPNOTSUPP;
875 if (!IS_POSIXACL(d_inode(workdir)))
876 goto out_acl_release;
877 if (!realinode->i_op->set_acl)
878 goto out_acl_release;
879 if (handler->flags == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) {
880 err = acl ? -EACCES : 0;
881 goto out_acl_release;
882 }
883 err = -EPERM;
884 if (!inode_owner_or_capable(inode))
885 goto out_acl_release;
886
887 posix_acl_release(acl);
888
fd3220d3
MS
889 /*
890 * Check if sgid bit needs to be cleared (actual setacl operation will
891 * be done with mounter's capabilities and so that won't do it for us).
892 */
893 if (unlikely(inode->i_mode & S_ISGID) &&
894 handler->flags == ACL_TYPE_ACCESS &&
895 !in_group_p(inode->i_gid) &&
896 !capable_wrt_inode_uidgid(inode, CAP_FSETID)) {
897 struct iattr iattr = { .ia_valid = ATTR_KILL_SGID };
898
899 err = ovl_setattr(dentry, &iattr);
900 if (err)
901 return err;
902 }
903
1d88f183 904 err = ovl_xattr_set(dentry, inode, handler->name, value, size, flags);
ce31513a 905 if (!err)
09d8b586 906 ovl_copyattr(ovl_inode_real(inode), inode);
ce31513a
MS
907
908 return err;
d837a49b
MS
909
910out_acl_release:
911 posix_acl_release(acl);
912 return err;
913}
914
0eb45fc3
AG
915static int ovl_own_xattr_get(const struct xattr_handler *handler,
916 struct dentry *dentry, struct inode *inode,
917 const char *name, void *buffer, size_t size)
918{
48fab5d7 919 return -EOPNOTSUPP;
0eb45fc3
AG
920}
921
d837a49b
MS
922static int ovl_own_xattr_set(const struct xattr_handler *handler,
923 struct dentry *dentry, struct inode *inode,
924 const char *name, const void *value,
925 size_t size, int flags)
926{
48fab5d7 927 return -EOPNOTSUPP;
d837a49b
MS
928}
929
0eb45fc3
AG
930static int ovl_other_xattr_get(const struct xattr_handler *handler,
931 struct dentry *dentry, struct inode *inode,
932 const char *name, void *buffer, size_t size)
933{
1d88f183 934 return ovl_xattr_get(dentry, inode, name, buffer, size);
0eb45fc3
AG
935}
936
0e585ccc
AG
937static int ovl_other_xattr_set(const struct xattr_handler *handler,
938 struct dentry *dentry, struct inode *inode,
939 const char *name, const void *value,
940 size_t size, int flags)
941{
1d88f183 942 return ovl_xattr_set(dentry, inode, name, value, size, flags);
0e585ccc
AG
943}
944
0c97be22
AG
945static const struct xattr_handler __maybe_unused
946ovl_posix_acl_access_xattr_handler = {
d837a49b
MS
947 .name = XATTR_NAME_POSIX_ACL_ACCESS,
948 .flags = ACL_TYPE_ACCESS,
0eb45fc3 949 .get = ovl_posix_acl_xattr_get,
d837a49b
MS
950 .set = ovl_posix_acl_xattr_set,
951};
952
0c97be22
AG
953static const struct xattr_handler __maybe_unused
954ovl_posix_acl_default_xattr_handler = {
d837a49b
MS
955 .name = XATTR_NAME_POSIX_ACL_DEFAULT,
956 .flags = ACL_TYPE_DEFAULT,
0eb45fc3 957 .get = ovl_posix_acl_xattr_get,
d837a49b
MS
958 .set = ovl_posix_acl_xattr_set,
959};
960
961static const struct xattr_handler ovl_own_xattr_handler = {
962 .prefix = OVL_XATTR_PREFIX,
0eb45fc3 963 .get = ovl_own_xattr_get,
d837a49b
MS
964 .set = ovl_own_xattr_set,
965};
966
967static const struct xattr_handler ovl_other_xattr_handler = {
968 .prefix = "", /* catch all */
0eb45fc3 969 .get = ovl_other_xattr_get,
d837a49b
MS
970 .set = ovl_other_xattr_set,
971};
972
973static const struct xattr_handler *ovl_xattr_handlers[] = {
0c97be22 974#ifdef CONFIG_FS_POSIX_ACL
d837a49b
MS
975 &ovl_posix_acl_access_xattr_handler,
976 &ovl_posix_acl_default_xattr_handler,
0c97be22 977#endif
d837a49b
MS
978 &ovl_own_xattr_handler,
979 &ovl_other_xattr_handler,
980 NULL
981};
982
146d62e5
AG
983static int ovl_setup_trap(struct super_block *sb, struct dentry *dir,
984 struct inode **ptrap, const char *name)
985{
986 struct inode *trap;
987 int err;
988
989 trap = ovl_get_trap_inode(sb, dir);
1dac6f5b
AB
990 err = PTR_ERR_OR_ZERO(trap);
991 if (err) {
146d62e5 992 if (err == -ELOOP)
1bd0a3ae 993 pr_err("conflicting %s path\n", name);
146d62e5
AG
994 return err;
995 }
996
997 *ptrap = trap;
998 return 0;
999}
1000
0be0bfd2
AG
1001/*
1002 * Determine how we treat concurrent use of upperdir/workdir based on the
1003 * index feature. This is papering over mount leaks of container runtimes,
1004 * for example, an old overlay mount is leaked and now its upperdir is
1005 * attempted to be used as a lower layer in a new overlay mount.
1006 */
1007static int ovl_report_in_use(struct ovl_fs *ofs, const char *name)
1008{
1009 if (ofs->config.index) {
1bd0a3ae 1010 pr_err("%s is in-use as upperdir/workdir of another mount, mount with '-o index=off' to override exclusive upperdir protection.\n",
0be0bfd2
AG
1011 name);
1012 return -EBUSY;
1013 } else {
1bd0a3ae 1014 pr_warn("%s is in-use as upperdir/workdir of another mount, accessing files from both mounts will result in undefined behavior.\n",
0be0bfd2
AG
1015 name);
1016 return 0;
1017 }
1018}
1019
146d62e5
AG
1020static int ovl_get_upper(struct super_block *sb, struct ovl_fs *ofs,
1021 struct path *upperpath)
6ee8acf0 1022{
5064975e 1023 struct vfsmount *upper_mnt;
6ee8acf0
MS
1024 int err;
1025
ad204488 1026 err = ovl_mount_dir(ofs->config.upperdir, upperpath);
6ee8acf0
MS
1027 if (err)
1028 goto out;
1029
1030 /* Upper fs should not be r/o */
1031 if (sb_rdonly(upperpath->mnt->mnt_sb)) {
1bd0a3ae 1032 pr_err("upper fs is r/o, try multi-lower layers mount\n");
6ee8acf0
MS
1033 err = -EINVAL;
1034 goto out;
1035 }
1036
ad204488 1037 err = ovl_check_namelen(upperpath, ofs, ofs->config.upperdir);
6ee8acf0
MS
1038 if (err)
1039 goto out;
1040
146d62e5
AG
1041 err = ovl_setup_trap(sb, upperpath->dentry, &ofs->upperdir_trap,
1042 "upperdir");
1043 if (err)
1044 goto out;
1045
5064975e
MS
1046 upper_mnt = clone_private_mount(upperpath);
1047 err = PTR_ERR(upper_mnt);
1048 if (IS_ERR(upper_mnt)) {
1bd0a3ae 1049 pr_err("failed to clone upperpath\n");
5064975e
MS
1050 goto out;
1051 }
1052
1053 /* Don't inherit atime flags */
1054 upper_mnt->mnt_flags &= ~(MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME);
ad204488 1055 ofs->upper_mnt = upper_mnt;
8c25741a 1056
8c25741a
MS
1057 if (ovl_inuse_trylock(ofs->upper_mnt->mnt_root)) {
1058 ofs->upperdir_locked = true;
8c25741a 1059 } else {
0be0bfd2
AG
1060 err = ovl_report_in_use(ofs, "upperdir");
1061 if (err)
1062 goto out;
8c25741a
MS
1063 }
1064
6ee8acf0
MS
1065 err = 0;
1066out:
1067 return err;
1068}
1069
146d62e5
AG
1070static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs,
1071 struct path *workpath)
8ed61dc3 1072{
2ba9d57e 1073 struct vfsmount *mnt = ofs->upper_mnt;
8ed61dc3 1074 struct dentry *temp;
e487d889 1075 int fh_type;
8ed61dc3
MS
1076 int err;
1077
2ba9d57e
AG
1078 err = mnt_want_write(mnt);
1079 if (err)
1080 return err;
1081
ad204488
MS
1082 ofs->workdir = ovl_workdir_create(ofs, OVL_WORKDIR_NAME, false);
1083 if (!ofs->workdir)
2ba9d57e 1084 goto out;
8ed61dc3 1085
146d62e5
AG
1086 err = ovl_setup_trap(sb, ofs->workdir, &ofs->workdir_trap, "workdir");
1087 if (err)
1088 goto out;
1089
8ed61dc3
MS
1090 /*
1091 * Upper should support d_type, else whiteouts are visible. Given
1092 * workdir and upper are on same fs, we can do iterate_dir() on
1093 * workdir. This check requires successful creation of workdir in
1094 * previous step.
1095 */
1096 err = ovl_check_d_type_supported(workpath);
1097 if (err < 0)
2ba9d57e 1098 goto out;
8ed61dc3
MS
1099
1100 /*
1101 * We allowed this configuration and don't want to break users over
1102 * kernel upgrade. So warn instead of erroring out.
1103 */
1104 if (!err)
1bd0a3ae 1105 pr_warn("upper fs needs to support d_type.\n");
8ed61dc3
MS
1106
1107 /* Check if upper/work fs supports O_TMPFILE */
ad204488
MS
1108 temp = ovl_do_tmpfile(ofs->workdir, S_IFREG | 0);
1109 ofs->tmpfile = !IS_ERR(temp);
1110 if (ofs->tmpfile)
8ed61dc3
MS
1111 dput(temp);
1112 else
1bd0a3ae 1113 pr_warn("upper fs does not support tmpfile.\n");
8ed61dc3
MS
1114
1115 /*
1116 * Check if upper/work fs supports trusted.overlay.* xattr
1117 */
ad204488 1118 err = ovl_do_setxattr(ofs->workdir, OVL_XATTR_OPAQUE, "0", 1, 0);
8ed61dc3 1119 if (err) {
ad204488 1120 ofs->noxattr = true;
a683737b 1121 ofs->config.index = false;
d5791044 1122 ofs->config.metacopy = false;
1bd0a3ae 1123 pr_warn("upper fs does not support xattr, falling back to index=off and metacopy=off.\n");
2ba9d57e 1124 err = 0;
8ed61dc3 1125 } else {
ad204488 1126 vfs_removexattr(ofs->workdir, OVL_XATTR_OPAQUE);
8ed61dc3
MS
1127 }
1128
1129 /* Check if upper/work fs supports file handles */
e487d889
AG
1130 fh_type = ovl_can_decode_fh(ofs->workdir->d_sb);
1131 if (ofs->config.index && !fh_type) {
ad204488 1132 ofs->config.index = false;
1bd0a3ae 1133 pr_warn("upper fs does not support file handles, falling back to index=off.\n");
8ed61dc3
MS
1134 }
1135
e487d889
AG
1136 /* Check if upper fs has 32bit inode numbers */
1137 if (fh_type != FILEID_INO32_GEN)
0f831ec8 1138 ofs->xino_mode = -1;
e487d889 1139
f168f109
AG
1140 /* NFS export of r/w mount depends on index */
1141 if (ofs->config.nfs_export && !ofs->config.index) {
1bd0a3ae 1142 pr_warn("NFS export requires \"index=on\", falling back to nfs_export=off.\n");
f168f109
AG
1143 ofs->config.nfs_export = false;
1144 }
2ba9d57e
AG
1145out:
1146 mnt_drop_write(mnt);
1147 return err;
8ed61dc3
MS
1148}
1149
146d62e5
AG
1150static int ovl_get_workdir(struct super_block *sb, struct ovl_fs *ofs,
1151 struct path *upperpath)
520d7c86
MS
1152{
1153 int err;
bca44b52 1154 struct path workpath = { };
520d7c86 1155
ad204488 1156 err = ovl_mount_dir(ofs->config.workdir, &workpath);
520d7c86
MS
1157 if (err)
1158 goto out;
1159
1160 err = -EINVAL;
bca44b52 1161 if (upperpath->mnt != workpath.mnt) {
1bd0a3ae 1162 pr_err("workdir and upperdir must reside under the same mount\n");
520d7c86
MS
1163 goto out;
1164 }
bca44b52 1165 if (!ovl_workdir_ok(workpath.dentry, upperpath->dentry)) {
1bd0a3ae 1166 pr_err("workdir and upperdir must be separate subtrees\n");
520d7c86
MS
1167 goto out;
1168 }
1169
8c25741a
MS
1170 ofs->workbasedir = dget(workpath.dentry);
1171
8c25741a 1172 if (ovl_inuse_trylock(ofs->workbasedir)) {
ad204488 1173 ofs->workdir_locked = true;
520d7c86 1174 } else {
0be0bfd2
AG
1175 err = ovl_report_in_use(ofs, "workdir");
1176 if (err)
1177 goto out;
520d7c86
MS
1178 }
1179
0be0bfd2
AG
1180 err = ovl_setup_trap(sb, ofs->workbasedir, &ofs->workbasedir_trap,
1181 "workdir");
1182 if (err)
1183 goto out;
1184
146d62e5 1185 err = ovl_make_workdir(sb, ofs, &workpath);
bca44b52 1186
520d7c86 1187out:
bca44b52
MS
1188 path_put(&workpath);
1189
520d7c86
MS
1190 return err;
1191}
1192
146d62e5
AG
1193static int ovl_get_indexdir(struct super_block *sb, struct ovl_fs *ofs,
1194 struct ovl_entry *oe, struct path *upperpath)
f7e3a7d9 1195{
2ba9d57e 1196 struct vfsmount *mnt = ofs->upper_mnt;
f7e3a7d9
MS
1197 int err;
1198
2ba9d57e
AG
1199 err = mnt_want_write(mnt);
1200 if (err)
1201 return err;
1202
f7e3a7d9 1203 /* Verify lower root is upper root origin */
d9768076 1204 err = ovl_verify_origin(upperpath->dentry, oe->lowerstack[0].dentry,
05122443 1205 true);
f7e3a7d9 1206 if (err) {
1bd0a3ae 1207 pr_err("failed to verify upper root origin\n");
f7e3a7d9
MS
1208 goto out;
1209 }
1210
ad204488
MS
1211 ofs->indexdir = ovl_workdir_create(ofs, OVL_INDEXDIR_NAME, true);
1212 if (ofs->indexdir) {
146d62e5
AG
1213 err = ovl_setup_trap(sb, ofs->indexdir, &ofs->indexdir_trap,
1214 "indexdir");
1215 if (err)
1216 goto out;
1217
ad1d615c
AG
1218 /*
1219 * Verify upper root is exclusively associated with index dir.
1220 * Older kernels stored upper fh in "trusted.overlay.origin"
1221 * xattr. If that xattr exists, verify that it is a match to
1222 * upper dir file handle. In any case, verify or set xattr
1223 * "trusted.overlay.upper" to indicate that index may have
1224 * directory entries.
1225 */
1226 if (ovl_check_origin_xattr(ofs->indexdir)) {
1227 err = ovl_verify_set_fh(ofs->indexdir, OVL_XATTR_ORIGIN,
1228 upperpath->dentry, true, false);
1229 if (err)
1bd0a3ae 1230 pr_err("failed to verify index dir 'origin' xattr\n");
ad1d615c
AG
1231 }
1232 err = ovl_verify_upper(ofs->indexdir, upperpath->dentry, true);
f7e3a7d9 1233 if (err)
1bd0a3ae 1234 pr_err("failed to verify index dir 'upper' xattr\n");
f7e3a7d9
MS
1235
1236 /* Cleanup bad/stale/orphan index entries */
1237 if (!err)
1eff1a1d 1238 err = ovl_indexdir_cleanup(ofs);
f7e3a7d9 1239 }
ad204488 1240 if (err || !ofs->indexdir)
1bd0a3ae 1241 pr_warn("try deleting index dir or mounting with '-o index=off' to disable inodes index.\n");
f7e3a7d9
MS
1242
1243out:
2ba9d57e 1244 mnt_drop_write(mnt);
f7e3a7d9
MS
1245 return err;
1246}
1247
9df085f3
AG
1248static bool ovl_lower_uuid_ok(struct ovl_fs *ofs, const uuid_t *uuid)
1249{
1250 unsigned int i;
1251
7e63c87f 1252 if (!ofs->config.nfs_export && !ofs->upper_mnt)
9df085f3
AG
1253 return true;
1254
1b81dddd 1255 for (i = 0; i < ofs->numfs; i++) {
9df085f3
AG
1256 /*
1257 * We use uuid to associate an overlay lower file handle with a
1258 * lower layer, so we can accept lower fs with null uuid as long
1259 * as all lower layers with null uuid are on the same fs.
7e63c87f
AG
1260 * if we detect multiple lower fs with the same uuid, we
1261 * disable lower file handle decoding on all of them.
9df085f3 1262 */
1b81dddd
AG
1263 if (ofs->fs[i].is_lower &&
1264 uuid_equal(&ofs->fs[i].sb->s_uuid, uuid)) {
07f1e596 1265 ofs->fs[i].bad_uuid = true;
9df085f3 1266 return false;
7e63c87f 1267 }
9df085f3
AG
1268 }
1269 return true;
1270}
1271
5148626b 1272/* Get a unique fsid for the layer */
9df085f3 1273static int ovl_get_fsid(struct ovl_fs *ofs, const struct path *path)
5148626b 1274{
9df085f3 1275 struct super_block *sb = path->mnt->mnt_sb;
5148626b
AG
1276 unsigned int i;
1277 dev_t dev;
1278 int err;
7e63c87f 1279 bool bad_uuid = false;
5148626b 1280
07f1e596
AG
1281 for (i = 0; i < ofs->numfs; i++) {
1282 if (ofs->fs[i].sb == sb)
1283 return i;
5148626b
AG
1284 }
1285
9df085f3 1286 if (!ovl_lower_uuid_ok(ofs, &sb->s_uuid)) {
7e63c87f
AG
1287 bad_uuid = true;
1288 if (ofs->config.index || ofs->config.nfs_export) {
1289 ofs->config.index = false;
1290 ofs->config.nfs_export = false;
1bd0a3ae 1291 pr_warn("%s uuid detected in lower fs '%pd2', falling back to index=off,nfs_export=off.\n",
7e63c87f
AG
1292 uuid_is_null(&sb->s_uuid) ? "null" :
1293 "conflicting",
1294 path->dentry);
1295 }
9df085f3
AG
1296 }
1297
5148626b
AG
1298 err = get_anon_bdev(&dev);
1299 if (err) {
1bd0a3ae 1300 pr_err("failed to get anonymous bdev for lowerpath\n");
5148626b
AG
1301 return err;
1302 }
1303
07f1e596
AG
1304 ofs->fs[ofs->numfs].sb = sb;
1305 ofs->fs[ofs->numfs].pseudo_dev = dev;
1306 ofs->fs[ofs->numfs].bad_uuid = bad_uuid;
5148626b 1307
07f1e596 1308 return ofs->numfs++;
5148626b
AG
1309}
1310
94375f9d
AG
1311static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs,
1312 struct path *stack, unsigned int numlower)
520d7c86
MS
1313{
1314 int err;
1315 unsigned int i;
13464165 1316 struct ovl_layer *layers;
520d7c86
MS
1317
1318 err = -ENOMEM;
13464165
MS
1319 layers = kcalloc(numlower + 1, sizeof(struct ovl_layer), GFP_KERNEL);
1320 if (!layers)
520d7c86 1321 goto out;
13464165 1322 ofs->layers = layers;
5148626b 1323
07f1e596
AG
1324 ofs->fs = kcalloc(numlower + 1, sizeof(struct ovl_sb), GFP_KERNEL);
1325 if (ofs->fs == NULL)
5148626b
AG
1326 goto out;
1327
07f1e596
AG
1328 /* idx/fsid 0 are reserved for upper fs even with lower only overlay */
1329 ofs->numfs++;
1330
13464165
MS
1331 layers[0].mnt = ofs->upper_mnt;
1332 layers[0].idx = 0;
1333 layers[0].fsid = 0;
94375f9d
AG
1334 ofs->numlayer = 1;
1335
07f1e596 1336 /*
b7bf9908
AG
1337 * All lower layers that share the same fs as upper layer, use the same
1338 * pseudo_dev as upper layer. Allocate fs[0].pseudo_dev even for lower
1339 * only overlay to simplify ovl_fs_free().
1b81dddd 1340 * is_lower will be set if upper fs is shared with a lower layer.
07f1e596 1341 */
b7bf9908
AG
1342 err = get_anon_bdev(&ofs->fs[0].pseudo_dev);
1343 if (err) {
1344 pr_err("failed to get anonymous bdev for upper fs\n");
1345 goto out;
1346 }
1347
07f1e596
AG
1348 if (ofs->upper_mnt) {
1349 ofs->fs[0].sb = ofs->upper_mnt->mnt_sb;
1b81dddd 1350 ofs->fs[0].is_lower = false;
07f1e596
AG
1351 }
1352
520d7c86
MS
1353 for (i = 0; i < numlower; i++) {
1354 struct vfsmount *mnt;
146d62e5 1355 struct inode *trap;
5148626b 1356 int fsid;
520d7c86 1357
9df085f3 1358 err = fsid = ovl_get_fsid(ofs, &stack[i]);
5148626b 1359 if (err < 0)
520d7c86 1360 goto out;
520d7c86 1361
146d62e5
AG
1362 err = ovl_setup_trap(sb, stack[i].dentry, &trap, "lowerdir");
1363 if (err)
1364 goto out;
1365
0be0bfd2
AG
1366 if (ovl_is_inuse(stack[i].dentry)) {
1367 err = ovl_report_in_use(ofs, "lowerdir");
1368 if (err)
1369 goto out;
1370 }
1371
520d7c86
MS
1372 mnt = clone_private_mount(&stack[i]);
1373 err = PTR_ERR(mnt);
1374 if (IS_ERR(mnt)) {
1bd0a3ae 1375 pr_err("failed to clone lowerpath\n");
146d62e5 1376 iput(trap);
520d7c86
MS
1377 goto out;
1378 }
5148626b 1379
520d7c86
MS
1380 /*
1381 * Make lower layers R/O. That way fchmod/fchown on lower file
1382 * will fail instead of modifying lower fs.
1383 */
1384 mnt->mnt_flags |= MNT_READONLY | MNT_NOATIME;
1385
13464165
MS
1386 layers[ofs->numlayer].trap = trap;
1387 layers[ofs->numlayer].mnt = mnt;
1388 layers[ofs->numlayer].idx = ofs->numlayer;
1389 layers[ofs->numlayer].fsid = fsid;
1390 layers[ofs->numlayer].fs = &ofs->fs[fsid];
94375f9d 1391 ofs->numlayer++;
1b81dddd 1392 ofs->fs[fsid].is_lower = true;
520d7c86 1393 }
e487d889 1394
795939a9
AG
1395 /*
1396 * When all layers on same fs, overlay can use real inode numbers.
1397 * With mount option "xino=on", mounter declares that there are enough
1398 * free high bits in underlying fs to hold the unique fsid.
1399 * If overlayfs does encounter underlying inodes using the high xino
1400 * bits reserved for fsid, it emits a warning and uses the original
1401 * inode number.
1402 */
07f1e596 1403 if (ofs->numfs - !ofs->upper_mnt == 1) {
0f831ec8
AG
1404 if (ofs->config.xino == OVL_XINO_ON)
1405 pr_info("\"xino=on\" is useless with all layers on same fs, ignore.\n");
1406 ofs->xino_mode = 0;
53afcd31
AG
1407 } else if (ofs->config.xino == OVL_XINO_OFF) {
1408 ofs->xino_mode = -1;
0f831ec8 1409 } else if (ofs->config.xino == OVL_XINO_ON && ofs->xino_mode < 0) {
795939a9 1410 /*
07f1e596
AG
1411 * This is a roundup of number of bits needed for encoding
1412 * fsid, where fsid 0 is reserved for upper fs even with
1413 * lower only overlay.
795939a9
AG
1414 */
1415 BUILD_BUG_ON(ilog2(OVL_MAX_STACK) > 31);
07f1e596 1416 ofs->xino_mode = ilog2(ofs->numfs - 1) + 1;
795939a9
AG
1417 }
1418
0f831ec8 1419 if (ofs->xino_mode > 0) {
1bd0a3ae 1420 pr_info("\"xino\" feature enabled using %d upper inode bits.\n",
0f831ec8 1421 ofs->xino_mode);
795939a9 1422 }
e487d889 1423
520d7c86
MS
1424 err = 0;
1425out:
1426 return err;
1427}
1428
4155c10a 1429static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb,
ad204488 1430 struct ovl_fs *ofs)
53dbb0b4
MS
1431{
1432 int err;
1433 char *lowertmp, *lower;
4155c10a
MS
1434 struct path *stack = NULL;
1435 unsigned int stacklen, numlower = 0, i;
4155c10a 1436 struct ovl_entry *oe;
53dbb0b4
MS
1437
1438 err = -ENOMEM;
ad204488 1439 lowertmp = kstrdup(ofs->config.lowerdir, GFP_KERNEL);
53dbb0b4 1440 if (!lowertmp)
4155c10a 1441 goto out_err;
53dbb0b4
MS
1442
1443 err = -EINVAL;
1444 stacklen = ovl_split_lowerdirs(lowertmp);
1445 if (stacklen > OVL_MAX_STACK) {
1bd0a3ae 1446 pr_err("too many lower directories, limit is %d\n",
53dbb0b4 1447 OVL_MAX_STACK);
4155c10a 1448 goto out_err;
ad204488 1449 } else if (!ofs->config.upperdir && stacklen == 1) {
1bd0a3ae 1450 pr_err("at least 2 lowerdir are needed while upperdir nonexistent\n");
4155c10a 1451 goto out_err;
f168f109
AG
1452 } else if (!ofs->config.upperdir && ofs->config.nfs_export &&
1453 ofs->config.redirect_follow) {
1bd0a3ae 1454 pr_warn("NFS export requires \"redirect_dir=nofollow\" on non-upper mount, falling back to nfs_export=off.\n");
f168f109 1455 ofs->config.nfs_export = false;
53dbb0b4
MS
1456 }
1457
1458 err = -ENOMEM;
1459 stack = kcalloc(stacklen, sizeof(struct path), GFP_KERNEL);
1460 if (!stack)
4155c10a 1461 goto out_err;
53dbb0b4
MS
1462
1463 err = -EINVAL;
1464 lower = lowertmp;
1465 for (numlower = 0; numlower < stacklen; numlower++) {
ad204488 1466 err = ovl_lower_dir(lower, &stack[numlower], ofs,
f4288844 1467 &sb->s_stack_depth);
53dbb0b4 1468 if (err)
4155c10a 1469 goto out_err;
53dbb0b4
MS
1470
1471 lower = strchr(lower, '\0') + 1;
1472 }
1473
1474 err = -EINVAL;
1475 sb->s_stack_depth++;
1476 if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
1bd0a3ae 1477 pr_err("maximum fs stacking depth exceeded\n");
4155c10a 1478 goto out_err;
53dbb0b4
MS
1479 }
1480
94375f9d 1481 err = ovl_get_layers(sb, ofs, stack, numlower);
4155c10a
MS
1482 if (err)
1483 goto out_err;
1484
1485 err = -ENOMEM;
1486 oe = ovl_alloc_entry(numlower);
1487 if (!oe)
1488 goto out_err;
1489
1490 for (i = 0; i < numlower; i++) {
1491 oe->lowerstack[i].dentry = dget(stack[i].dentry);
94375f9d 1492 oe->lowerstack[i].layer = &ofs->layers[i+1];
4155c10a 1493 }
53dbb0b4 1494
53dbb0b4 1495out:
53dbb0b4
MS
1496 for (i = 0; i < numlower; i++)
1497 path_put(&stack[i]);
1498 kfree(stack);
4155c10a
MS
1499 kfree(lowertmp);
1500
1501 return oe;
1502
1503out_err:
1504 oe = ERR_PTR(err);
53dbb0b4
MS
1505 goto out;
1506}
1507
146d62e5
AG
1508/*
1509 * Check if this layer root is a descendant of:
1510 * - another layer of this overlayfs instance
1511 * - upper/work dir of any overlayfs instance
146d62e5 1512 */
0be0bfd2
AG
1513static int ovl_check_layer(struct super_block *sb, struct ovl_fs *ofs,
1514 struct dentry *dentry, const char *name)
146d62e5 1515{
9179c21d 1516 struct dentry *next = dentry, *parent;
146d62e5
AG
1517 int err = 0;
1518
9179c21d 1519 if (!dentry)
146d62e5
AG
1520 return 0;
1521
9179c21d
MS
1522 parent = dget_parent(next);
1523
1524 /* Walk back ancestors to root (inclusive) looking for traps */
1525 while (!err && parent != next) {
0be0bfd2 1526 if (ovl_lookup_trap_inode(sb, parent)) {
146d62e5 1527 err = -ELOOP;
1bd0a3ae 1528 pr_err("overlapping %s path\n", name);
0be0bfd2
AG
1529 } else if (ovl_is_inuse(parent)) {
1530 err = ovl_report_in_use(ofs, name);
146d62e5 1531 }
146d62e5 1532 next = parent;
9179c21d
MS
1533 parent = dget_parent(next);
1534 dput(next);
146d62e5
AG
1535 }
1536
9179c21d 1537 dput(parent);
146d62e5
AG
1538
1539 return err;
1540}
1541
1542/*
1543 * Check if any of the layers or work dirs overlap.
1544 */
1545static int ovl_check_overlapping_layers(struct super_block *sb,
1546 struct ovl_fs *ofs)
1547{
1548 int i, err;
1549
1550 if (ofs->upper_mnt) {
0be0bfd2
AG
1551 err = ovl_check_layer(sb, ofs, ofs->upper_mnt->mnt_root,
1552 "upperdir");
146d62e5
AG
1553 if (err)
1554 return err;
1555
1556 /*
1557 * Checking workbasedir avoids hitting ovl_is_inuse(parent) of
1558 * this instance and covers overlapping work and index dirs,
1559 * unless work or index dir have been moved since created inside
1560 * workbasedir. In that case, we already have their traps in
1561 * inode cache and we will catch that case on lookup.
1562 */
0be0bfd2 1563 err = ovl_check_layer(sb, ofs, ofs->workbasedir, "workdir");
146d62e5
AG
1564 if (err)
1565 return err;
1566 }
1567
94375f9d 1568 for (i = 1; i < ofs->numlayer; i++) {
0be0bfd2 1569 err = ovl_check_layer(sb, ofs,
94375f9d 1570 ofs->layers[i].mnt->mnt_root,
146d62e5
AG
1571 "lowerdir");
1572 if (err)
1573 return err;
1574 }
1575
1576 return 0;
1577}
1578
2effc5c2
AG
1579static struct dentry *ovl_get_root(struct super_block *sb,
1580 struct dentry *upperdentry,
1581 struct ovl_entry *oe)
1582{
1583 struct dentry *root;
62c832ed
AG
1584 struct ovl_path *lowerpath = &oe->lowerstack[0];
1585 unsigned long ino = d_inode(lowerpath->dentry)->i_ino;
1586 int fsid = lowerpath->layer->fsid;
1587 struct ovl_inode_params oip = {
1588 .upperdentry = upperdentry,
1589 .lowerpath = lowerpath,
1590 };
2effc5c2
AG
1591
1592 root = d_make_root(ovl_new_inode(sb, S_IFDIR, 0));
1593 if (!root)
1594 return NULL;
1595
1596 root->d_fsdata = oe;
1597
1598 if (upperdentry) {
62c832ed
AG
1599 /* Root inode uses upper st_ino/i_ino */
1600 ino = d_inode(upperdentry)->i_ino;
1601 fsid = 0;
2effc5c2
AG
1602 ovl_dentry_set_upper_alias(root);
1603 if (ovl_is_impuredir(upperdentry))
1604 ovl_set_flag(OVL_IMPURE, d_inode(root));
1605 }
1606
1607 /* Root is always merge -> can have whiteouts */
1608 ovl_set_flag(OVL_WHITEOUTS, d_inode(root));
1609 ovl_dentry_set_flag(OVL_E_CONNECTED, root);
1610 ovl_set_upperdata(d_inode(root));
62c832ed 1611 ovl_inode_init(d_inode(root), &oip, ino, fsid);
f4288844 1612 ovl_dentry_update_reval(root, upperdentry, DCACHE_OP_WEAK_REVALIDATE);
2effc5c2
AG
1613
1614 return root;
1615}
1616
e9be9d5e
MS
1617static int ovl_fill_super(struct super_block *sb, void *data, int silent)
1618{
33006cdf 1619 struct path upperpath = { };
e9be9d5e 1620 struct dentry *root_dentry;
4155c10a 1621 struct ovl_entry *oe;
ad204488 1622 struct ovl_fs *ofs;
51f8f3c4 1623 struct cred *cred;
e9be9d5e
MS
1624 int err;
1625
f4288844
MS
1626 sb->s_d_op = &ovl_dentry_operations;
1627
f45827e8 1628 err = -ENOMEM;
ad204488
MS
1629 ofs = kzalloc(sizeof(struct ovl_fs), GFP_KERNEL);
1630 if (!ofs)
e9be9d5e
MS
1631 goto out;
1632
ad204488 1633 ofs->creator_cred = cred = prepare_creds();
c6fe6254
MS
1634 if (!cred)
1635 goto out_err;
1636
ad204488 1637 ofs->config.index = ovl_index_def;
f168f109 1638 ofs->config.nfs_export = ovl_nfs_export_def;
795939a9 1639 ofs->config.xino = ovl_xino_def();
d5791044 1640 ofs->config.metacopy = ovl_metacopy_def;
ad204488 1641 err = ovl_parse_opt((char *) data, &ofs->config);
f45827e8 1642 if (err)
a9075cdb 1643 goto out_err;
f45827e8 1644
e9be9d5e 1645 err = -EINVAL;
ad204488 1646 if (!ofs->config.lowerdir) {
07f2af7b 1647 if (!silent)
1bd0a3ae 1648 pr_err("missing 'lowerdir'\n");
a9075cdb 1649 goto out_err;
e9be9d5e
MS
1650 }
1651
53a08cb9 1652 sb->s_stack_depth = 0;
cf9a6784 1653 sb->s_maxbytes = MAX_LFS_FILESIZE;
e487d889 1654 /* Assume underlaying fs uses 32bit inodes unless proven otherwise */
53afcd31 1655 if (ofs->config.xino != OVL_XINO_OFF) {
0f831ec8 1656 ofs->xino_mode = BITS_PER_LONG - 32;
53afcd31
AG
1657 if (!ofs->xino_mode) {
1658 pr_warn("xino not supported on 32bit kernel, falling back to xino=off.\n");
1659 ofs->config.xino = OVL_XINO_OFF;
1660 }
1661 }
795939a9 1662
146d62e5
AG
1663 /* alloc/destroy_inode needed for setting up traps in inode cache */
1664 sb->s_op = &ovl_super_operations;
1665
ad204488
MS
1666 if (ofs->config.upperdir) {
1667 if (!ofs->config.workdir) {
1bd0a3ae 1668 pr_err("missing 'workdir'\n");
a9075cdb 1669 goto out_err;
53a08cb9 1670 }
e9be9d5e 1671
146d62e5 1672 err = ovl_get_upper(sb, ofs, &upperpath);
53a08cb9 1673 if (err)
a9075cdb 1674 goto out_err;
2cac0c00 1675
146d62e5 1676 err = ovl_get_workdir(sb, ofs, &upperpath);
8ed61dc3 1677 if (err)
a9075cdb 1678 goto out_err;
c6fe6254 1679
ad204488 1680 if (!ofs->workdir)
1751e8a6 1681 sb->s_flags |= SB_RDONLY;
6e88256e 1682
ad204488
MS
1683 sb->s_stack_depth = ofs->upper_mnt->mnt_sb->s_stack_depth;
1684 sb->s_time_gran = ofs->upper_mnt->mnt_sb->s_time_gran;
c6fe6254 1685
e9be9d5e 1686 }
ad204488 1687 oe = ovl_get_lowerstack(sb, ofs);
4155c10a
MS
1688 err = PTR_ERR(oe);
1689 if (IS_ERR(oe))
a9075cdb 1690 goto out_err;
e9be9d5e 1691
71cbad7e 1692 /* If the upper fs is nonexistent, we mark overlayfs r/o too */
ad204488 1693 if (!ofs->upper_mnt)
1751e8a6 1694 sb->s_flags |= SB_RDONLY;
e9be9d5e 1695
ad204488 1696 if (!(ovl_force_readonly(ofs)) && ofs->config.index) {
146d62e5 1697 err = ovl_get_indexdir(sb, ofs, oe, &upperpath);
54fb347e 1698 if (err)
4155c10a 1699 goto out_free_oe;
6e88256e 1700
972d0093
AG
1701 /* Force r/o mount with no index dir */
1702 if (!ofs->indexdir) {
1703 dput(ofs->workdir);
1704 ofs->workdir = NULL;
1751e8a6 1705 sb->s_flags |= SB_RDONLY;
972d0093
AG
1706 }
1707
02bcd157
AG
1708 }
1709
146d62e5
AG
1710 err = ovl_check_overlapping_layers(sb, ofs);
1711 if (err)
1712 goto out_free_oe;
1713
972d0093 1714 /* Show index=off in /proc/mounts for forced r/o mount */
f168f109 1715 if (!ofs->indexdir) {
ad204488 1716 ofs->config.index = false;
f168f109 1717 if (ofs->upper_mnt && ofs->config.nfs_export) {
1bd0a3ae 1718 pr_warn("NFS export requires an index dir, falling back to nfs_export=off.\n");
f168f109
AG
1719 ofs->config.nfs_export = false;
1720 }
1721 }
02bcd157 1722
d5791044 1723 if (ofs->config.metacopy && ofs->config.nfs_export) {
1bd0a3ae 1724 pr_warn("NFS export is not supported with metadata only copy up, falling back to nfs_export=off.\n");
d5791044
VG
1725 ofs->config.nfs_export = false;
1726 }
1727
8383f174
AG
1728 if (ofs->config.nfs_export)
1729 sb->s_export_op = &ovl_export_operations;
1730
51f8f3c4
KK
1731 /* Never override disk quota limits or use reserved space */
1732 cap_lower(cred->cap_effective, CAP_SYS_RESOURCE);
1733
655042cc 1734 sb->s_magic = OVERLAYFS_SUPER_MAGIC;
655042cc 1735 sb->s_xattr = ovl_xattr_handlers;
ad204488 1736 sb->s_fs_info = ofs;
de2a4a50 1737 sb->s_flags |= SB_POSIXACL;
655042cc 1738
c6fe6254 1739 err = -ENOMEM;
2effc5c2 1740 root_dentry = ovl_get_root(sb, upperpath.dentry, oe);
e9be9d5e 1741 if (!root_dentry)
4155c10a 1742 goto out_free_oe;
e9be9d5e
MS
1743
1744 mntput(upperpath.mnt);
ed06e069 1745
e9be9d5e 1746 sb->s_root = root_dentry;
e9be9d5e
MS
1747
1748 return 0;
1749
4155c10a
MS
1750out_free_oe:
1751 ovl_entry_stack_free(oe);
b9343632 1752 kfree(oe);
4155c10a 1753out_err:
e9be9d5e 1754 path_put(&upperpath);
ad204488 1755 ovl_free_fs(ofs);
e9be9d5e
MS
1756out:
1757 return err;
1758}
1759
1760static struct dentry *ovl_mount(struct file_system_type *fs_type, int flags,
1761 const char *dev_name, void *raw_data)
1762{
1763 return mount_nodev(fs_type, flags, raw_data, ovl_fill_super);
1764}
1765
1766static struct file_system_type ovl_fs_type = {
1767 .owner = THIS_MODULE,
ef94b186 1768 .name = "overlay",
e9be9d5e
MS
1769 .mount = ovl_mount,
1770 .kill_sb = kill_anon_super,
1771};
ef94b186 1772MODULE_ALIAS_FS("overlay");
e9be9d5e 1773
13cf199d
AG
1774static void ovl_inode_init_once(void *foo)
1775{
1776 struct ovl_inode *oi = foo;
1777
1778 inode_init_once(&oi->vfs_inode);
1779}
1780
e9be9d5e
MS
1781static int __init ovl_init(void)
1782{
13cf199d
AG
1783 int err;
1784
1785 ovl_inode_cachep = kmem_cache_create("ovl_inode",
1786 sizeof(struct ovl_inode), 0,
1787 (SLAB_RECLAIM_ACCOUNT|
1788 SLAB_MEM_SPREAD|SLAB_ACCOUNT),
1789 ovl_inode_init_once);
1790 if (ovl_inode_cachep == NULL)
1791 return -ENOMEM;
1792
2406a307
JX
1793 err = ovl_aio_request_cache_init();
1794 if (!err) {
1795 err = register_filesystem(&ovl_fs_type);
1796 if (!err)
1797 return 0;
1798
1799 ovl_aio_request_cache_destroy();
1800 }
1801 kmem_cache_destroy(ovl_inode_cachep);
13cf199d
AG
1802
1803 return err;
e9be9d5e
MS
1804}
1805
1806static void __exit ovl_exit(void)
1807{
1808 unregister_filesystem(&ovl_fs_type);
13cf199d
AG
1809
1810 /*
1811 * Make sure all delayed rcu free inodes are flushed before we
1812 * destroy cache.
1813 */
1814 rcu_barrier();
1815 kmem_cache_destroy(ovl_inode_cachep);
2406a307 1816 ovl_aio_request_cache_destroy();
e9be9d5e
MS
1817}
1818
1819module_init(ovl_init);
1820module_exit(ovl_exit);