mounts: keep list of mounts in an rbtree

[linux-2.6-block.git] / fs / namespace.c
diff --git a/fs/namespace.c b/fs/namespace.c

index 0bcba81402b56e9765939443331059dd135c8bb3..bbe94096e26216116e918759702818163949a16a 100644 (file)
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -734,21 +734,6 @@ struct vfsmount *lookup_mnt(const struct path *path)
         return m;
  }
  
-static inline void lock_ns_list(struct mnt_namespace *ns)
-{
-       spin_lock(&ns->ns_lock);
-}
-
-static inline void unlock_ns_list(struct mnt_namespace *ns)
-{
-       spin_unlock(&ns->ns_lock);
-}
-
-static inline bool mnt_is_cursor(struct mount *mnt)
-{
-       return mnt->mnt.mnt_flags & MNT_CURSOR;
-}
-
  /*
   * __is_local_mountpoint - Test to see if dentry is a mountpoint in the
   *                         current mount namespace.
@@ -767,19 +752,15 @@ static inline bool mnt_is_cursor(struct mount *mnt)
  bool __is_local_mountpoint(struct dentry *dentry)
  {
         struct mnt_namespace *ns = current->nsproxy->mnt_ns;
-       struct mount *mnt;
+       struct mount *mnt, *n;
         bool is_covered = false;
  
         down_read(&namespace_sem);
-       lock_ns_list(ns);
-       list_for_each_entry(mnt, &ns->list, mnt_list) {
-               if (mnt_is_cursor(mnt))
-                       continue;
+       rbtree_postorder_for_each_entry_safe(mnt, n, &ns->mounts, mnt_node) {
                 is_covered = (mnt->mnt_mountpoint == dentry);
                 if (is_covered)
                         break;
         }
-       unlock_ns_list(ns);
         up_read(&namespace_sem);
  
         return is_covered;
@@ -1026,6 +1007,30 @@ void mnt_change_mountpoint(struct mount *parent, struct mountpoint *mp, struct m
         mnt_add_count(old_parent, -1);
  }
  
+static inline struct mount *node_to_mount(struct rb_node *node)
+{
+       return rb_entry(node, struct mount, mnt_node);
+}
+
+static void mnt_add_to_ns(struct mnt_namespace *ns, struct mount *mnt)
+{
+       struct rb_node **link = &ns->mounts.rb_node;
+       struct rb_node *parent = NULL;
+
+       WARN_ON(mnt->mnt.mnt_flags & MNT_ONRB);
+       mnt->mnt_ns = ns;
+       while (*link) {
+               parent = *link;
+               if (mnt->mnt_id_unique < node_to_mount(parent)->mnt_id_unique)
+                       link = &parent->rb_left;
+               else
+                       link = &parent->rb_right;
+       }
+       rb_link_node(&mnt->mnt_node, parent, link);
+       rb_insert_color(&mnt->mnt_node, &ns->mounts);
+       mnt->mnt.mnt_flags |= MNT_ONRB;
+}
+
  /*
   * vfsmount lock must be held for write
   */
@@ -1039,12 +1044,13 @@ static void commit_tree(struct mount *mnt)
         BUG_ON(parent == mnt);
  
         list_add_tail(&head, &mnt->mnt_list);
-       list_for_each_entry(m, &head, mnt_list)
-               m->mnt_ns = n;
+       while (!list_empty(&head)) {
+               m = list_first_entry(&head, typeof(*m), mnt_list);
+               list_del(&m->mnt_list);
  
-       list_splice(&head, n->list.prev);
-
-       n->mounts += n->pending_mounts;
+               mnt_add_to_ns(n, m);
+       }
+       n->nr_mounts += n->pending_mounts;
         n->pending_mounts = 0;
  
         __attach_mnt(mnt, parent);
@@ -1192,7 +1198,7 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
         }
  
         mnt->mnt.mnt_flags = old->mnt.mnt_flags;
-       mnt->mnt.mnt_flags &= ~(MNT_WRITE_HOLD|MNT_MARKED|MNT_INTERNAL);
+       mnt->mnt.mnt_flags &= ~(MNT_WRITE_HOLD|MNT_MARKED|MNT_INTERNAL|MNT_ONRB);
  
         atomic_inc(&sb->s_active);
         mnt->mnt.mnt_idmap = mnt_idmap_get(mnt_idmap(&old->mnt));
@@ -1417,65 +1423,57 @@ struct vfsmount *mnt_clone_internal(const struct path *path)
         return &p->mnt;
  }
  
-#ifdef CONFIG_PROC_FS
-static struct mount *mnt_list_next(struct mnt_namespace *ns,
-                                  struct list_head *p)
+/*
+ * Returns the mount which either has the specified mnt_id, or has the next
+ * smallest id afer the specified one.
+ */
+static struct mount *mnt_find_id_at(struct mnt_namespace *ns, u64 mnt_id)
  {
-       struct mount *mnt, *ret = NULL;
+       struct rb_node *node = ns->mounts.rb_node;
+       struct mount *ret = NULL;
  
-       lock_ns_list(ns);
-       list_for_each_continue(p, &ns->list) {
-               mnt = list_entry(p, typeof(*mnt), mnt_list);
-               if (!mnt_is_cursor(mnt)) {
-                       ret = mnt;
-                       break;
+       while (node) {
+               struct mount *m = node_to_mount(node);
+
+               if (mnt_id <= m->mnt_id_unique) {
+                       ret = node_to_mount(node);
+                       if (mnt_id == m->mnt_id_unique)
+                               break;
+                       node = node->rb_left;
+               } else {
+                       node = node->rb_right;
                 }
         }
-       unlock_ns_list(ns);
-
         return ret;
  }
  
+#ifdef CONFIG_PROC_FS
+
  /* iterator; we want it to have access to namespace_sem, thus here... */
  static void *m_start(struct seq_file *m, loff_t *pos)
  {
         struct proc_mounts *p = m->private;
-       struct list_head *prev;
  
         down_read(&namespace_sem);
-       if (!*pos) {
-               prev = &p->ns->list;
-       } else {
-               prev = &p->cursor.mnt_list;
  
-               /* Read after we'd reached the end? */
-               if (list_empty(prev))
-                       return NULL;
-       }
-
-       return mnt_list_next(p->ns, prev);
+       return mnt_find_id_at(p->ns, *pos);
  }
  
  static void *m_next(struct seq_file *m, void *v, loff_t *pos)
  {
-       struct proc_mounts *p = m->private;
-       struct mount *mnt = v;
+       struct mount *next = NULL, *mnt = v;
+       struct rb_node *node = rb_next(&mnt->mnt_node);
  
         ++*pos;
-       return mnt_list_next(p->ns, &mnt->mnt_list);
+       if (node) {
+               next = node_to_mount(node);
+               *pos = next->mnt_id_unique;
+       }
+       return next;
  }
  
  static void m_stop(struct seq_file *m, void *v)
  {
-       struct proc_mounts *p = m->private;
-       struct mount *mnt = v;
-
-       lock_ns_list(p->ns);
-       if (mnt)
-               list_move_tail(&p->cursor.mnt_list, &mnt->mnt_list);
-       else
-               list_del_init(&p->cursor.mnt_list);
-       unlock_ns_list(p->ns);
         up_read(&namespace_sem);
  }
  
@@ -1493,14 +1491,6 @@ const struct seq_operations mounts_op = {
         .show   = m_show,
  };
  
-void mnt_cursor_del(struct mnt_namespace *ns, struct mount *cursor)
-{
-       down_read(&namespace_sem);
-       lock_ns_list(ns);
-       list_del(&cursor->mnt_list);
-       unlock_ns_list(ns);
-       up_read(&namespace_sem);
-}
  #endif  /* CONFIG_PROC_FS */
  
  /**
@@ -1642,7 +1632,10 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how)
         /* Gather the mounts to umount */
         for (p = mnt; p; p = next_mnt(p, mnt)) {
                 p->mnt.mnt_flags |= MNT_UMOUNT;
-               list_move(&p->mnt_list, &tmp_list);
+               if (p->mnt.mnt_flags & MNT_ONRB)
+                       move_from_ns(p, &tmp_list);
+               else
+                       list_move(&p->mnt_list, &tmp_list);
         }
  
         /* Hide the mounts from mnt_mounts */
@@ -1662,7 +1655,7 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how)
                 list_del_init(&p->mnt_list);
                 ns = p->mnt_ns;
                 if (ns) {
-                       ns->mounts--;
+                       ns->nr_mounts--;
                         __touch_mnt_namespace(ns);
                 }
                 p->mnt_ns = NULL;
@@ -1788,14 +1781,16 @@ static int do_umount(struct mount *mnt, int flags)
  
         event++;
         if (flags & MNT_DETACH) {
-               if (!list_empty(&mnt->mnt_list))
+               if (mnt->mnt.mnt_flags & MNT_ONRB ||
+                   !list_empty(&mnt->mnt_list))
                         umount_tree(mnt, UMOUNT_PROPAGATE);
                 retval = 0;
         } else {
                 shrink_submounts(mnt);
                 retval = -EBUSY;
                 if (!propagate_mount_busy(mnt, 2)) {
-                       if (!list_empty(&mnt->mnt_list))
+                       if (mnt->mnt.mnt_flags & MNT_ONRB ||
+                           !list_empty(&mnt->mnt_list))
                                 umount_tree(mnt, UMOUNT_PROPAGATE|UMOUNT_SYNC);
                         retval = 0;
                 }
@@ -2213,9 +2208,9 @@ int count_mounts(struct mnt_namespace *ns, struct mount *mnt)
         unsigned int mounts = 0;
         struct mount *p;
  
-       if (ns->mounts >= max)
+       if (ns->nr_mounts >= max)
                 return -ENOSPC;
-       max -= ns->mounts;
+       max -= ns->nr_mounts;
         if (ns->pending_mounts >= max)
                 return -ENOSPC;
         max -= ns->pending_mounts;
@@ -2359,8 +2354,12 @@ static int attach_recursive_mnt(struct mount *source_mnt,
                 touch_mnt_namespace(source_mnt->mnt_ns);
         } else {
                 if (source_mnt->mnt_ns) {
+                       LIST_HEAD(head);
+
                         /* move from anon - the caller will destroy */
-                       list_del_init(&source_mnt->mnt_ns->list);
+                       for (p = source_mnt; p; p = next_mnt(p, source_mnt))
+                               move_from_ns(p, &head);
+                       list_del_init(&head);
                 }
                 if (beneath)
                         mnt_set_mountpoint_beneath(source_mnt, top_mnt, smp);
@@ -2671,11 +2670,10 @@ static struct file *open_detached_copy(struct path *path, bool recursive)
  
         lock_mount_hash();
         for (p = mnt; p; p = next_mnt(p, mnt)) {
-               p->mnt_ns = ns;
-               ns->mounts++;
+               mnt_add_to_ns(ns, p);
+               ns->nr_mounts++;
         }
         ns->root = mnt;
-       list_add_tail(&ns->list, &mnt->mnt_list);
         mntget(&mnt->mnt);
         unlock_mount_hash();
         namespace_unlock();
@@ -3738,9 +3736,8 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns, bool a
         if (!anon)
                 new_ns->seq = atomic64_add_return(1, &mnt_ns_seq);
         refcount_set(&new_ns->ns.count, 1);
-       INIT_LIST_HEAD(&new_ns->list);
+       new_ns->mounts = RB_ROOT;
         init_waitqueue_head(&new_ns->poll);
-       spin_lock_init(&new_ns->ns_lock);
         new_ns->user_ns = get_user_ns(user_ns);
         new_ns->ucounts = ucounts;
         return new_ns;
@@ -3787,7 +3784,6 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
                 unlock_mount_hash();
         }
         new_ns->root = new;
-       list_add_tail(&new_ns->list, &new->mnt_list);
  
         /*
          * Second pass: switch the tsk->fs->* elements and mark new vfsmounts
@@ -3797,8 +3793,8 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
         p = old;
         q = new;
         while (p) {
-               q->mnt_ns = new_ns;
-               new_ns->mounts++;
+               mnt_add_to_ns(new_ns, q);
+               new_ns->nr_mounts++;
                 if (new_fs) {
                         if (&p->mnt == new_fs->root.mnt) {
                                 new_fs->root.mnt = mntget(&q->mnt);
@@ -3840,10 +3836,9 @@ struct dentry *mount_subtree(struct vfsmount *m, const char *name)
                 mntput(m);
                 return ERR_CAST(ns);
         }
-       mnt->mnt_ns = ns;
         ns->root = mnt;
-       ns->mounts++;
-       list_add(&mnt->mnt_list, &ns->list);
+       ns->nr_mounts++;
+       mnt_add_to_ns(ns, mnt);
  
         err = vfs_path_lookup(m->mnt_root, m,
                         name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &path);
@@ -4021,10 +4016,9 @@ SYSCALL_DEFINE3(fsmount, int, fs_fd, unsigned int, flags,
                 goto err_path;
         }
         mnt = real_mount(newmount.mnt);
-       mnt->mnt_ns = ns;
         ns->root = mnt;
-       ns->mounts = 1;
-       list_add(&mnt->mnt_list, &ns->list);
+       ns->nr_mounts = 1;
+       mnt_add_to_ns(ns, mnt);
         mntget(newmount.mnt);
  
         /* Attach to an apparent O_PATH fd with a note that we need to unmount
@@ -4695,10 +4689,9 @@ static void __init init_mount_tree(void)
         if (IS_ERR(ns))
                 panic("Can't allocate initial namespace");
         m = real_mount(mnt);
-       m->mnt_ns = ns;
         ns->root = m;
-       ns->mounts = 1;
-       list_add(&m->mnt_list, &ns->list);
+       ns->nr_mounts = 1;
+       mnt_add_to_ns(ns, m);
         init_task.nsproxy->mnt_ns = ns;
         get_mnt_ns(ns);
  
@@ -4825,18 +4818,14 @@ static bool mnt_already_visible(struct mnt_namespace *ns,
                                 int *new_mnt_flags)
  {
         int new_flags = *new_mnt_flags;
-       struct mount *mnt;
+       struct mount *mnt, *n;
         bool visible = false;
  
         down_read(&namespace_sem);
-       lock_ns_list(ns);
-       list_for_each_entry(mnt, &ns->list, mnt_list) {
+       rbtree_postorder_for_each_entry_safe(mnt, n, &ns->mounts, mnt_node) {
                 struct mount *child;
                 int mnt_flags;
  
-               if (mnt_is_cursor(mnt))
-                       continue;
-
                 if (mnt->mnt.mnt_sb->s_type != sb->s_type)
                         continue;
  
@@ -4884,7 +4873,6 @@ static bool mnt_already_visible(struct mnt_namespace *ns,
         next:   ;
         }
  found:
-       unlock_ns_list(ns);
         up_read(&namespace_sem);
         return visible;
  }