cgroup: add cgroup_root_mutex

author Tejun Heo <tj@kernel.org>

Tue, 13 Dec 2011 02:12:21 +0000 (18:12 -0800)

committer Tejun Heo <tj@kernel.org>

Tue, 13 Dec 2011 02:12:21 +0000 (18:12 -0800)
author Tejun Heo <tj@kernel.org>
Tue, 13 Dec 2011 02:12:21 +0000 (18:12 -0800)
committer Tejun Heo <tj@kernel.org>
Tue, 13 Dec 2011 02:12:21 +0000 (18:12 -0800)
diff --git a/kernel/cgroup.c b/kernel/cgroup.c

index d9d5648f3cdcc4bcde3a7a67afe845aa89dc2a1f..6545fd61b10db9a0ae26fd7b9443e3e0316f4666 100644 (file)
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -63,7 +63,24 @@
  
  #include <linux/atomic.h>
  
+/*
+ * cgroup_mutex is the master lock.  Any modification to cgroup or its
+ * hierarchy must be performed while holding it.
+ *
+ * cgroup_root_mutex nests inside cgroup_mutex and should be held to modify
+ * cgroupfs_root of any cgroup hierarchy - subsys list, flags,
+ * release_agent_path and so on.  Modifying requires both cgroup_mutex and
+ * cgroup_root_mutex.  Readers can acquire either of the two.  This is to
+ * break the following locking order cycle.
+ *
+ *  A. cgroup_mutex -> cred_guard_mutex -> s_type->i_mutex_key -> namespace_sem
+ *  B. namespace_sem -> cgroup_mutex
+ *
+ * B happens only through cgroup_show_options() and using cgroup_root_mutex
+ * breaks it.
+ */
  static DEFINE_MUTEX(cgroup_mutex);
+static DEFINE_MUTEX(cgroup_root_mutex);
  
  /*
   * Generate an array of cgroup subsystem pointers. At boot time, this is
@@ -953,6 +970,7 @@ static int rebind_subsystems(struct cgroupfs_root *root,
         int i;
  
         BUG_ON(!mutex_is_locked(&cgroup_mutex));
+       BUG_ON(!mutex_is_locked(&cgroup_root_mutex));
  
         removed_bits = root->actual_subsys_bits & ~final_bits;
         added_bits = final_bits & ~root->actual_subsys_bits;
@@ -1043,7 +1061,7 @@ static int cgroup_show_options(struct seq_file *seq, struct vfsmount *vfs)
         struct cgroupfs_root *root = vfs->mnt_sb->s_fs_info;
         struct cgroup_subsys *ss;
  
-       mutex_lock(&cgroup_mutex);
+       mutex_lock(&cgroup_root_mutex);
         for_each_subsys(root, ss)
                 seq_printf(seq, ",%s", ss->name);
         if (test_bit(ROOT_NOPREFIX, &root->flags))
@@ -1054,7 +1072,7 @@ static int cgroup_show_options(struct seq_file *seq, struct vfsmount *vfs)
                 seq_puts(seq, ",clone_children");
         if (strlen(root->name))
                 seq_printf(seq, ",name=%s", root->name);
-       mutex_unlock(&cgroup_mutex);
+       mutex_unlock(&cgroup_root_mutex);
         return 0;
  }
  
@@ -1269,6 +1287,7 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data)
  
         mutex_lock(&cgrp->dentry->d_inode->i_mutex);
         mutex_lock(&cgroup_mutex);
+       mutex_lock(&cgroup_root_mutex);
  
         /* See what subsystems are wanted */
         ret = parse_cgroupfs_options(data, &opts);
@@ -1297,6 +1316,7 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data)
   out_unlock:
         kfree(opts.release_agent);
         kfree(opts.name);
+       mutex_unlock(&cgroup_root_mutex);
         mutex_unlock(&cgroup_mutex);
         mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
         return ret;
@@ -1481,6 +1501,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
         int ret = 0;
         struct super_block *sb;
         struct cgroupfs_root *new_root;
+       struct inode *inode;
  
         /* First find the desired set of subsystems */
         mutex_lock(&cgroup_mutex);
@@ -1514,7 +1535,6 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
                 /* We used the new root structure, so this is a new hierarchy */
                 struct list_head tmp_cg_links;
                 struct cgroup *root_cgrp = &root->top_cgroup;
-               struct inode *inode;
                 struct cgroupfs_root *existing_root;
                 const struct cred *cred;
                 int i;
@@ -1528,18 +1548,14 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
  
                 mutex_lock(&inode->i_mutex);
                 mutex_lock(&cgroup_mutex);
+               mutex_lock(&cgroup_root_mutex);
  
-               if (strlen(root->name)) {
-                       /* Check for name clashes with existing mounts */
-                       for_each_active_root(existing_root) {
-                               if (!strcmp(existing_root->name, root->name)) {
-                                       ret = -EBUSY;
-                                       mutex_unlock(&cgroup_mutex);
-                                       mutex_unlock(&inode->i_mutex);
-                                       goto drop_new_super;
-                               }
-                       }
-               }
+               /* Check for name clashes with existing mounts */
+               ret = -EBUSY;
+               if (strlen(root->name))
+                       for_each_active_root(existing_root)
+                               if (!strcmp(existing_root->name, root->name))
+                                       goto unlock_drop;
  
                 /*
                  * We're accessing css_set_count without locking
@@ -1549,18 +1565,13 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
                  * have some link structures left over
                  */
                 ret = allocate_cg_links(css_set_count, &tmp_cg_links);
-               if (ret) {
-                       mutex_unlock(&cgroup_mutex);
-                       mutex_unlock(&inode->i_mutex);
-                       goto drop_new_super;
-               }
+               if (ret)
+                       goto unlock_drop;
  
                 ret = rebind_subsystems(root, root->subsys_bits);
                 if (ret == -EBUSY) {
-                       mutex_unlock(&cgroup_mutex);
-                       mutex_unlock(&inode->i_mutex);
                         free_cg_links(&tmp_cg_links);
-                       goto drop_new_super;
+                       goto unlock_drop;
                 }
                 /*
                  * There must be no failure case after here, since rebinding
@@ -1599,6 +1610,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
                 cred = override_creds(&init_cred);
                 cgroup_populate_dir(root_cgrp);
                 revert_creds(cred);
+               mutex_unlock(&cgroup_root_mutex);
                 mutex_unlock(&cgroup_mutex);
                 mutex_unlock(&inode->i_mutex);
         } else {
@@ -1615,6 +1627,10 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
         kfree(opts.name);
         return dget(sb->s_root);
  
+ unlock_drop:
+       mutex_unlock(&cgroup_root_mutex);
+       mutex_unlock(&cgroup_mutex);
+       mutex_unlock(&inode->i_mutex);
   drop_new_super:
         deactivate_locked_super(sb);
   drop_modules:
@@ -1639,6 +1655,7 @@ static void cgroup_kill_sb(struct super_block *sb) {
         BUG_ON(!list_empty(&cgrp->sibling));
  
         mutex_lock(&cgroup_mutex);
+       mutex_lock(&cgroup_root_mutex);
  
         /* Rebind all subsystems back to the default hierarchy */
         ret = rebind_subsystems(root, 0);
@@ -1664,6 +1681,7 @@ static void cgroup_kill_sb(struct super_block *sb) {
                 root_count--;
         }
  
+       mutex_unlock(&cgroup_root_mutex);
         mutex_unlock(&cgroup_mutex);
  
         kill_litter_super(sb);
@@ -2311,7 +2329,9 @@ static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft,
                 return -EINVAL;
         if (!cgroup_lock_live_group(cgrp))
                 return -ENODEV;
+       mutex_lock(&cgroup_root_mutex);
         strcpy(cgrp->root->release_agent_path, buffer);
+       mutex_unlock(&cgroup_root_mutex);
         cgroup_unlock();
         return 0;
  }
author	Tejun Heo <tj@kernel.org>
	Tue, 13 Dec 2011 02:12:21 +0000 (18:12 -0800)
committer	Tejun Heo <tj@kernel.org>
	Tue, 13 Dec 2011 02:12:21 +0000 (18:12 -0800)