Merge tag 'watchdog-for-linus-v4.10' of git://git.kernel.org/pub/scm/linux/kernel...

[linux-2.6-block.git] / fs / namespace.c
diff --git a/fs/namespace.c b/fs/namespace.c

index 7bb2cda3bfef50b27f9bb8b3b478cc7aeb3d1049..b5b1259e064f8d9661110ba1f1f73d1a0ff19d51 100644 (file)
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -27,6 +27,9 @@
  #include "pnode.h"
  #include "internal.h"
  
+/* Maximum number of mounts in a mount namespace */
+unsigned int sysctl_mount_max __read_mostly = 100000;
+
  static unsigned int m_hash_mask __read_mostly;
  static unsigned int m_hash_shift __read_mostly;
  static unsigned int mp_hash_mask __read_mostly;
@@ -93,10 +96,6 @@ static inline struct hlist_head *mp_hash(struct dentry *dentry)
         return &mountpoint_hashtable[tmp & mp_hash_mask];
  }
  
-/*
- * allocation is serialized by namespace_sem, but we need the spinlock to
- * serialize with freeing.
- */
  static int mnt_alloc_id(struct mount *mnt)
  {
         int res;
@@ -675,7 +674,7 @@ out:
   *
   * lookup_mnt takes a reference to the found vfsmount.
   */
-struct vfsmount *lookup_mnt(struct path *path)
+struct vfsmount *lookup_mnt(const struct path *path)
  {
         struct mount *child_mnt;
         struct vfsmount *m;
@@ -899,6 +898,9 @@ static void commit_tree(struct mount *mnt, struct mount *shadows)
  
         list_splice(&head, n->list.prev);
  
+       n->mounts += n->pending_mounts;
+       n->pending_mounts = 0;
+
         attach_shadowed(mnt, parent, shadows);
         touch_mnt_namespace(n);
  }
@@ -1028,6 +1030,8 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
                 if (IS_MNT_SLAVE(old))
                         list_add(&mnt->mnt_slave, &old->mnt_slave);
                 mnt->mnt_master = old->mnt_master;
+       } else {
+               CLEAR_MNT_SHARED(mnt);
         }
         if (flag & CL_MAKE_SHARED)
                 set_mnt_shared(mnt);
@@ -1153,7 +1157,36 @@ struct vfsmount *mntget(struct vfsmount *mnt)
  }
  EXPORT_SYMBOL(mntget);
  
-struct vfsmount *mnt_clone_internal(struct path *path)
+/* path_is_mountpoint() - Check if path is a mount in the current
+ *                          namespace.
+ *
+ *  d_mountpoint() can only be used reliably to establish if a dentry is
+ *  not mounted in any namespace and that common case is handled inline.
+ *  d_mountpoint() isn't aware of the possibility there may be multiple
+ *  mounts using a given dentry in a different namespace. This function
+ *  checks if the passed in path is a mountpoint rather than the dentry
+ *  alone.
+ */
+bool path_is_mountpoint(const struct path *path)
+{
+       unsigned seq;
+       bool res;
+
+       if (!d_mountpoint(path->dentry))
+               return false;
+
+       rcu_read_lock();
+       do {
+               seq = read_seqbegin(&mount_lock);
+               res = __path_is_mountpoint(path);
+       } while (read_seqretry(&mount_lock, seq));
+       rcu_read_unlock();
+
+       return res;
+}
+EXPORT_SYMBOL(path_is_mountpoint);
+
+struct vfsmount *mnt_clone_internal(const struct path *path)
  {
         struct mount *p;
         p = clone_mnt(real_mount(path->mnt), path->dentry, CL_PRIVATE);
@@ -1419,11 +1452,16 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how)
                 propagate_umount(&tmp_list);
  
         while (!list_empty(&tmp_list)) {
+               struct mnt_namespace *ns;
                 bool disconnect;
                 p = list_first_entry(&tmp_list, struct mount, mnt_list);
                 list_del_init(&p->mnt_expire);
                 list_del_init(&p->mnt_list);
-               __touch_mnt_namespace(p->mnt_ns);
+               ns = p->mnt_ns;
+               if (ns) {
+                       ns->mounts--;
+                       __touch_mnt_namespace(ns);
+               }
                 p->mnt_ns = NULL;
                 if (how & UMOUNT_SYNC)
                         p->mnt.mnt_flags |= MNT_SYNC_UMOUNT;
@@ -1747,7 +1785,7 @@ out:
  
  /* Caller should check returned pointer for errors */
  
-struct vfsmount *collect_mounts(struct path *path)
+struct vfsmount *collect_mounts(const struct path *path)
  {
         struct mount *tree;
         namespace_lock();
@@ -1780,7 +1818,7 @@ void drop_collected_mounts(struct vfsmount *mnt)
   *
   * Release with mntput().
   */
-struct vfsmount *clone_private_mount(struct path *path)
+struct vfsmount *clone_private_mount(const struct path *path)
  {
         struct mount *old_mnt = real_mount(path->mnt);
         struct mount *new_mnt;
@@ -1788,9 +1826,7 @@ struct vfsmount *clone_private_mount(struct path *path)
         if (IS_MNT_UNBINDABLE(old_mnt))
                 return ERR_PTR(-EINVAL);
  
-       down_read(&namespace_sem);
         new_mnt = clone_mnt(old_mnt, path->dentry, CL_PRIVATE);
-       up_read(&namespace_sem);
         if (IS_ERR(new_mnt))
                 return ERR_CAST(new_mnt);
  
@@ -1840,6 +1876,28 @@ static int invent_group_ids(struct mount *mnt, bool recurse)
         return 0;
  }
  
+int count_mounts(struct mnt_namespace *ns, struct mount *mnt)
+{
+       unsigned int max = READ_ONCE(sysctl_mount_max);
+       unsigned int mounts = 0, old, pending, sum;
+       struct mount *p;
+
+       for (p = mnt; p; p = next_mnt(p, mnt))
+               mounts++;
+
+       old = ns->mounts;
+       pending = ns->pending_mounts;
+       sum = old + pending;
+       if ((old > sum) ||
+           (pending > sum) ||
+           (max < sum) ||
+           (mounts > (max - sum)))
+               return -ENOSPC;
+
+       ns->pending_mounts = pending + mounts;
+       return 0;
+}
+
  /*
   *  @source_mnt : mount tree to be attached
   *  @nd         : place the mount tree @source_mnt is attached
@@ -1909,10 +1967,18 @@ static int attach_recursive_mnt(struct mount *source_mnt,
                         struct path *parent_path)
  {
         HLIST_HEAD(tree_list);
+       struct mnt_namespace *ns = dest_mnt->mnt_ns;
         struct mount *child, *p;
         struct hlist_node *n;
         int err;
  
+       /* Is there space to add these mounts to the mount namespace? */
+       if (!parent_path) {
+               err = count_mounts(ns, source_mnt);
+               if (err)
+                       goto out;
+       }
+
         if (IS_MNT_SHARED(dest_mnt)) {
                 err = invent_group_ids(source_mnt, true);
                 if (err)
@@ -1949,11 +2015,13 @@ static int attach_recursive_mnt(struct mount *source_mnt,
   out_cleanup_ids:
         while (!hlist_empty(&tree_list)) {
                 child = hlist_entry(tree_list.first, struct mount, mnt_hash);
+               child->mnt_parent->mnt_ns->pending_mounts = 0;
                 umount_tree(child, UMOUNT_SYNC);
         }
         unlock_mount_hash();
         cleanup_group_ids(source_mnt, NULL);
   out:
+       ns->pending_mounts = 0;
         return err;
  }
  
@@ -2700,7 +2768,7 @@ long do_mount(const char *dev_name, const char __user *dir_name,
  
         flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | MS_BORN |
                    MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT |
-                  MS_STRICTATIME);
+                  MS_STRICTATIME | MS_NOREMOTELOCK);
  
         if (flags & MS_REMOUNT)
                 retval = do_remount(&path, flags & ~MS_REMOUNT, mnt_flags,
@@ -2719,9 +2787,20 @@ dput_out:
         return retval;
  }
  
+static struct ucounts *inc_mnt_namespaces(struct user_namespace *ns)
+{
+       return inc_ucount(ns, current_euid(), UCOUNT_MNT_NAMESPACES);
+}
+
+static void dec_mnt_namespaces(struct ucounts *ucounts)
+{
+       dec_ucount(ucounts, UCOUNT_MNT_NAMESPACES);
+}
+
  static void free_mnt_ns(struct mnt_namespace *ns)
  {
         ns_free_inum(&ns->ns);
+       dec_mnt_namespaces(ns->ucounts);
         put_user_ns(ns->user_ns);
         kfree(ns);
  }
@@ -2738,14 +2817,22 @@ static atomic64_t mnt_ns_seq = ATOMIC64_INIT(1);
  static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns)
  {
         struct mnt_namespace *new_ns;
+       struct ucounts *ucounts;
         int ret;
  
+       ucounts = inc_mnt_namespaces(user_ns);
+       if (!ucounts)
+               return ERR_PTR(-ENOSPC);
+
         new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
-       if (!new_ns)
+       if (!new_ns) {
+               dec_mnt_namespaces(ucounts);
                 return ERR_PTR(-ENOMEM);
+       }
         ret = ns_alloc_inum(&new_ns->ns);
         if (ret) {
                 kfree(new_ns);
+               dec_mnt_namespaces(ucounts);
                 return ERR_PTR(ret);
         }
         new_ns->ns.ops = &mntns_operations;
@@ -2756,9 +2843,13 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns)
         init_waitqueue_head(&new_ns->poll);
         new_ns->event = 0;
         new_ns->user_ns = get_user_ns(user_ns);
+       new_ns->ucounts = ucounts;
+       new_ns->mounts = 0;
+       new_ns->pending_mounts = 0;
         return new_ns;
  }
  
+__latent_entropy
  struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
                 struct user_namespace *user_ns, struct fs_struct *new_fs)
  {
@@ -2805,6 +2896,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
         q = new;
         while (p) {
                 q->mnt_ns = new_ns;
+               new_ns->mounts++;
                 if (new_fs) {
                         if (&p->mnt == new_fs->root.mnt) {
                                 new_fs->root.mnt = mntget(&q->mnt);
@@ -2843,6 +2935,7 @@ static struct mnt_namespace *create_mnt_ns(struct vfsmount *m)
                 struct mount *mnt = real_mount(m);
                 mnt->mnt_ns = new_ns;
                 new_ns->root = mnt;
+               new_ns->mounts++;
                 list_add(&mnt->mnt_list, &new_ns->list);
         } else {
                 mntput(m);
@@ -2929,7 +3022,7 @@ bool is_path_reachable(struct mount *mnt, struct dentry *dentry,
         return &mnt->mnt == root->mnt && is_subdir(dentry, root->dentry);
  }
  
-bool path_is_under(struct path *path1, struct path *path2)
+bool path_is_under(const struct path *path1, const struct path *path2)
  {
         bool res;
         read_seqlock_excl(&mount_lock);
@@ -3348,10 +3441,16 @@ static int mntns_install(struct nsproxy *nsproxy, struct ns_common *ns)
         return 0;
  }
  
+static struct user_namespace *mntns_owner(struct ns_common *ns)
+{
+       return to_mnt_ns(ns)->user_ns;
+}
+
  const struct proc_ns_operations mntns_operations = {
         .name           = "mnt",
         .type           = CLONE_NEWNS,
         .get            = mntns_get,
         .put            = mntns_put,
         .install        = mntns_install,
+       .owner          = mntns_owner,
  };