fs: support mapped mounts of mapped filesystems

author Christian Brauner <christian.brauner@ubuntu.com>

Fri, 3 Dec 2021 11:17:07 +0000 (12:17 +0100)

committer Christian Brauner <christian.brauner@ubuntu.com>

Sun, 5 Dec 2021 09:28:57 +0000 (10:28 +0100)
author Christian Brauner <christian.brauner@ubuntu.com>
Fri, 3 Dec 2021 11:17:07 +0000 (12:17 +0100)
committer Christian Brauner <christian.brauner@ubuntu.com>
Sun, 5 Dec 2021 09:28:57 +0000 (10:28 +0100)
diff --git a/fs/namespace.c b/fs/namespace.c

index 4994b816a74c642bb10a729d86ce2fdb0f1c6238..08266a35c0c192b21561fddb552f35fc0a339fe2 100644 (file)
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -31,6 +31,7 @@
  #include <uapi/linux/mount.h>
  #include <linux/fs_context.h>
  #include <linux/shmem_fs.h>
+#include <linux/mnt_idmapping.h>
  
  #include "pnode.h"
  #include "internal.h"
@@ -561,7 +562,7 @@ static void free_vfsmnt(struct mount *mnt)
         struct user_namespace *mnt_userns;
  
         mnt_userns = mnt_user_ns(&mnt->mnt);
-       if (mnt_userns != &init_user_ns)
+       if (!initial_idmapping(mnt_userns))
                 put_user_ns(mnt_userns);
         kfree_const(mnt->mnt_devname);
  #ifdef CONFIG_SMP
@@ -965,6 +966,7 @@ static struct mount *skip_mnt_tree(struct mount *p)
  struct vfsmount *vfs_create_mount(struct fs_context *fc)
  {
         struct mount *mnt;
+       struct user_namespace *fs_userns;
  
         if (!fc->root)
                 return ERR_PTR(-EINVAL);
@@ -982,6 +984,10 @@ struct vfsmount *vfs_create_mount(struct fs_context *fc)
         mnt->mnt_mountpoint     = mnt->mnt.mnt_root;
         mnt->mnt_parent         = mnt;
  
+       fs_userns = mnt->mnt.mnt_sb->s_user_ns;
+       if (!initial_idmapping(fs_userns))
+               mnt->mnt.mnt_userns = get_user_ns(fs_userns);
+
         lock_mount_hash();
         list_add_tail(&mnt->mnt_instance, &mnt->mnt.mnt_sb->s_mounts);
         unlock_mount_hash();
@@ -1072,7 +1078,7 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
  
         atomic_inc(&sb->s_active);
         mnt->mnt.mnt_userns = mnt_user_ns(&old->mnt);
-       if (mnt->mnt.mnt_userns != &init_user_ns)
+       if (!initial_idmapping(mnt->mnt.mnt_userns))
                 mnt->mnt.mnt_userns = get_user_ns(mnt->mnt.mnt_userns);
         mnt->mnt.mnt_sb = sb;
         mnt->mnt.mnt_root = dget(root);
@@ -3927,10 +3933,18 @@ static unsigned int recalc_flags(struct mount_kattr *kattr, struct mount *mnt)
  static int can_idmap_mount(const struct mount_kattr *kattr, struct mount *mnt)
  {
         struct vfsmount *m = &mnt->mnt;
+       struct user_namespace *fs_userns = m->mnt_sb->s_user_ns;
  
         if (!kattr->mnt_userns)
                 return 0;
  
+       /*
+        * Creating an idmapped mount with the filesystem wide idmapping
+        * doesn't make sense so block that. We don't allow mushy semantics.
+        */
+       if (kattr->mnt_userns == fs_userns)
+               return -EINVAL;
+
         /*
          * Once a mount has been idmapped we don't allow it to change its
          * mapping. It makes things simpler and callers can just create
@@ -3943,12 +3957,8 @@ static int can_idmap_mount(const struct mount_kattr *kattr, struct mount *mnt)
         if (!(m->mnt_sb->s_type->fs_flags & FS_ALLOW_IDMAP))
                 return -EINVAL;
  
-       /* Don't yet support filesystem mountable in user namespaces. */
-       if (m->mnt_sb->s_user_ns != &init_user_ns)
-               return -EINVAL;
-
         /* We're not controlling the superblock. */
-       if (!capable(CAP_SYS_ADMIN))
+       if (!ns_capable(fs_userns, CAP_SYS_ADMIN))
                 return -EPERM;
  
         /* Mount has already been visible in the filesystem hierarchy. */
@@ -4002,14 +4012,27 @@ out:
  
  static void do_idmap_mount(const struct mount_kattr *kattr, struct mount *mnt)
  {
-       struct user_namespace *mnt_userns;
+       struct user_namespace *mnt_userns, *old_mnt_userns;
  
         if (!kattr->mnt_userns)
                 return;
  
+       /*
+        * We're the only ones able to change the mount's idmapping. So
+        * mnt->mnt.mnt_userns is stable and we can retrieve it directly.
+        */
+       old_mnt_userns = mnt->mnt.mnt_userns;
+
         mnt_userns = get_user_ns(kattr->mnt_userns);
         /* Pairs with smp_load_acquire() in mnt_user_ns(). */
         smp_store_release(&mnt->mnt.mnt_userns, mnt_userns);
+
+       /*
+        * If this is an idmapped filesystem drop the reference we've taken
+        * in vfs_create_mount() before.
+        */
+       if (!initial_idmapping(old_mnt_userns))
+               put_user_ns(old_mnt_userns);
  }
  
  static void mount_setattr_commit(struct mount_kattr *kattr,
@@ -4133,13 +4156,15 @@ static int build_mount_idmapped(const struct mount_attr *attr, size_t usize,
         }
  
         /*
-        * The init_user_ns is used to indicate that a vfsmount is not idmapped.
-        * This is simpler than just having to treat NULL as unmapped. Users
-        * wanting to idmap a mount to init_user_ns can just use a namespace
-        * with an identity mapping.
+        * The initial idmapping cannot be used to create an idmapped
+        * mount. We use the initial idmapping as an indicator of a mount
+        * that is not idmapped. It can simply be passed into helpers that
+        * are aware of idmapped mounts as a convenient shortcut. A user
+        * can just create a dedicated identity mapping to achieve the same
+        * result.
          */
         mnt_userns = container_of(ns, struct user_namespace, ns);
-       if (mnt_userns == &init_user_ns) {
+       if (initial_idmapping(mnt_userns)) {
                 err = -EPERM;
                 goto out_fput;
         }
diff --git a/fs/open.c b/fs/open.c

index 40a00e71865bab39c8f11f750c3705d84620c82d..9ff2f621b760ba2ac69d7f7b14cab2952aff3848 100644 (file)
--- a/fs/open.c
+++ b/fs/open.c
@@ -641,7 +641,7 @@ SYSCALL_DEFINE2(chmod, const char __user *, filename, umode_t, mode)
  
  int chown_common(const struct path *path, uid_t user, gid_t group)
  {
-       struct user_namespace *mnt_userns;
+       struct user_namespace *mnt_userns, *fs_userns;
         struct inode *inode = path->dentry->d_inode;
         struct inode *delegated_inode = NULL;
         int error;
@@ -653,8 +653,9 @@ int chown_common(const struct path *path, uid_t user, gid_t group)
         gid = make_kgid(current_user_ns(), group);
  
         mnt_userns = mnt_user_ns(path->mnt);
-       uid = mapped_kuid_user(mnt_userns, &init_user_ns, uid);
-       gid = mapped_kgid_user(mnt_userns, &init_user_ns, gid);
+       fs_userns = i_user_ns(inode);
+       uid = mapped_kuid_user(mnt_userns, fs_userns, uid);
+       gid = mapped_kgid_user(mnt_userns, fs_userns, gid);
  
  retry_deleg:
         newattrs.ia_valid =  ATTR_CTIME;
diff --git a/fs/posix_acl.c b/fs/posix_acl.c

index 4b5fb9a9b90fa1e9c991f9655f330766da8de311..80acb6885cf90b6fce2aa5b64d7b6c3b10d6687a 100644 (file)
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -376,8 +376,8 @@ posix_acl_permission(struct user_namespace *mnt_userns, struct inode *inode,
                                  break;
                          case ACL_USER:
                                 uid = mapped_kuid_fs(mnt_userns,
-                                                     &init_user_ns,
-                                                     pa->e_uid);
+                                                    i_user_ns(inode),
+                                                    pa->e_uid);
                                 if (uid_eq(uid, current_fsuid()))
                                          goto mask;
                                 break;
@@ -391,8 +391,8 @@ posix_acl_permission(struct user_namespace *mnt_userns, struct inode *inode,
                                 break;
                          case ACL_GROUP:
                                 gid = mapped_kgid_fs(mnt_userns,
-                                                     &init_user_ns,
-                                                     pa->e_gid);
+                                                    i_user_ns(inode),
+                                                    pa->e_gid);
                                 if (in_group_p(gid)) {
                                         found = 1;
                                         if ((pa->e_perm & want) == want)
diff --git a/include/linux/fs.h b/include/linux/fs.h

index 3d6d514943ab991b16072d5071493303b9102b52..493b87e3616b8db1554cb873a036a27dffcdc463 100644 (file)
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1641,7 +1641,7 @@ static inline void i_gid_write(struct inode *inode, gid_t gid)
  static inline kuid_t i_uid_into_mnt(struct user_namespace *mnt_userns,
                                     const struct inode *inode)
  {
-       return mapped_kuid_fs(mnt_userns, &init_user_ns, inode->i_uid);
+       return mapped_kuid_fs(mnt_userns, i_user_ns(inode), inode->i_uid);
  }
  
  /**
@@ -1655,7 +1655,7 @@ static inline kuid_t i_uid_into_mnt(struct user_namespace *mnt_userns,
  static inline kgid_t i_gid_into_mnt(struct user_namespace *mnt_userns,
                                     const struct inode *inode)
  {
-       return mapped_kgid_fs(mnt_userns, &init_user_ns, inode->i_gid);
+       return mapped_kgid_fs(mnt_userns, i_user_ns(inode), inode->i_gid);
  }
  
  /**
@@ -1669,7 +1669,7 @@ static inline kgid_t i_gid_into_mnt(struct user_namespace *mnt_userns,
  static inline void inode_fsuid_set(struct inode *inode,
                                    struct user_namespace *mnt_userns)
  {
-       inode->i_uid = mapped_fsuid(mnt_userns, &init_user_ns);
+       inode->i_uid = mapped_fsuid(mnt_userns, i_user_ns(inode));
  }
  
  /**
@@ -1683,7 +1683,7 @@ static inline void inode_fsuid_set(struct inode *inode,
  static inline void inode_fsgid_set(struct inode *inode,
                                    struct user_namespace *mnt_userns)
  {
-       inode->i_gid = mapped_fsgid(mnt_userns, &init_user_ns);
+       inode->i_gid = mapped_fsgid(mnt_userns, i_user_ns(inode));
  }
  
  /**
@@ -1704,10 +1704,10 @@ static inline bool fsuidgid_has_mapping(struct super_block *sb,
         kuid_t kuid;
         kgid_t kgid;
  
-       kuid = mapped_fsuid(mnt_userns, &init_user_ns);
+       kuid = mapped_fsuid(mnt_userns, fs_userns);
         if (!uid_valid(kuid))
                 return false;
-       kgid = mapped_fsgid(mnt_userns, &init_user_ns);
+       kgid = mapped_fsgid(mnt_userns, fs_userns);
         if (!gid_valid(kgid))
                 return false;
         return kuid_has_mapping(fs_userns, kuid) &&
@@ -2653,13 +2653,14 @@ static inline struct user_namespace *file_mnt_user_ns(struct file *file)
   * is_idmapped_mnt - check whether a mount is mapped
   * @mnt: the mount to check
   *
- * If @mnt has an idmapping attached to it @mnt is mapped.
+ * If @mnt has an idmapping attached different from the
+ * filesystem's idmapping then @mnt is mapped.
   *
   * Return: true if mount is mapped, false if not.
   */
  static inline bool is_idmapped_mnt(const struct vfsmount *mnt)
  {
-       return mnt_user_ns(mnt) != &init_user_ns;
+       return mnt_user_ns(mnt) != mnt->mnt_sb->s_user_ns;
  }
  
  extern long vfs_truncate(const struct path *, loff_t);
diff --git a/security/commoncap.c b/security/commoncap.c

index d288a62e299967f71066a75a4fa547ce7b4a0088..5fc8986c3c77cd1dccfada0b8b35597de5f666aa 100644 (file)
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -419,7 +419,7 @@ int cap_inode_getsecurity(struct user_namespace *mnt_userns,
         kroot = make_kuid(fs_ns, root);
  
         /* If this is an idmapped mount shift the kuid. */
-       kroot = mapped_kuid_fs(mnt_userns, &init_user_ns, kroot);
+       kroot = mapped_kuid_fs(mnt_userns, fs_ns, kroot);
  
         /* If the root kuid maps to a valid uid in current ns, then return
          * this as a nscap. */
@@ -556,13 +556,12 @@ int cap_convert_nscap(struct user_namespace *mnt_userns, struct dentry *dentry,
                 return -EINVAL;
         if (!capable_wrt_inode_uidgid(mnt_userns, inode, CAP_SETFCAP))
                 return -EPERM;
-       if (size == XATTR_CAPS_SZ_2 && (mnt_userns == &init_user_ns))
+       if (size == XATTR_CAPS_SZ_2 && (mnt_userns == fs_ns))
                 if (ns_capable(inode->i_sb->s_user_ns, CAP_SETFCAP))
                         /* user is privileged, just write the v2 */
                         return size;
  
-       rootid = rootid_from_xattr(*ivalue, size, task_ns, mnt_userns,
-                                  &init_user_ns);
+       rootid = rootid_from_xattr(*ivalue, size, task_ns, mnt_userns, fs_ns);
         if (!uid_valid(rootid))
                 return -EINVAL;
  
@@ -703,7 +702,7 @@ int get_vfs_caps_from_disk(struct user_namespace *mnt_userns,
         /* Limit the caps to the mounter of the filesystem
          * or the more limited uid specified in the xattr.
          */
-       rootkuid = mapped_kuid_fs(mnt_userns, &init_user_ns, rootkuid);
+       rootkuid = mapped_kuid_fs(mnt_userns, fs_ns, rootkuid);
         if (!rootid_owns_currentns(rootkuid))
                 return -ENODATA;
author	Christian Brauner <christian.brauner@ubuntu.com>
	Fri, 3 Dec 2021 11:17:07 +0000 (12:17 +0100)
committer	Christian Brauner <christian.brauner@ubuntu.com>
	Sun, 5 Dec 2021 09:28:57 +0000 (10:28 +0100)
fs/namespace.c		patch \| blob \| blame \| history
fs/open.c		patch \| blob \| blame \| history
fs/posix_acl.c		patch \| blob \| blame \| history
include/linux/fs.h		patch \| blob \| blame \| history
security/commoncap.c		patch \| blob \| blame \| history