Merge branch 'kbuild' of git://git.kernel.org/pub/scm/linux/kernel/git/mmarek/kbuild
[linux-2.6-block.git] / kernel / cgroup.c
index 0fd5227958fed86165c994efbf5c13a831f5f7ad..9ef9fc8a774b08820a0dea814ecc4456dd83376c 100644 (file)
@@ -178,12 +178,13 @@ static DEFINE_IDR(cgroup_hierarchy_idr);
  */
 static u64 css_serial_nr_next = 1;
 
-/* This flag indicates whether tasks in the fork and exit paths should
- * check for fork/exit handlers to call. This avoids us having to do
- * extra work in the fork/exit path if none of the subsystems need to
- * be called.
+/*
+ * These bitmask flags indicate whether tasks in the fork and exit paths have
+ * fork/exit handlers to call. This avoids us having to do extra work in the
+ * fork/exit path to check which subsystems have fork/exit callbacks.
  */
-static int need_forkexit_callback __read_mostly;
+static unsigned long have_fork_callback __read_mostly;
+static unsigned long have_exit_callback __read_mostly;
 
 static struct cftype cgroup_dfl_base_files[];
 static struct cftype cgroup_legacy_base_files[];
@@ -412,6 +413,24 @@ static int notify_on_release(const struct cgroup *cgrp)
        for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT &&                \
             (((ss) = cgroup_subsys[ssid]) || true); (ssid)++)
 
+/**
+ * for_each_subsys_which - filter for_each_subsys with a bitmask
+ * @ss: the iteration cursor
+ * @ssid: the index of @ss, CGROUP_SUBSYS_COUNT after reaching the end
+ * @ss_maskp: a pointer to the bitmask
+ *
+ * The block will only run for cases where the ssid-th bit (1 << ssid) of
+ * mask is set to 1.
+ */
+#define for_each_subsys_which(ss, ssid, ss_maskp)                      \
+       if (!CGROUP_SUBSYS_COUNT) /* to avoid spurious gcc warning */   \
+               (ssid) = 0;                                             \
+       else                                                            \
+               for_each_set_bit(ssid, ss_maskp, CGROUP_SUBSYS_COUNT)   \
+                       if (((ss) = cgroup_subsys[ssid]) && false)      \
+                               break;                                  \
+                       else
+
 /* iterate across the hierarchies */
 #define for_each_root(root)                                            \
        list_for_each_entry((root), &cgroup_roots, root_list)
@@ -1087,9 +1106,8 @@ static unsigned long cgroup_calc_child_subsys_mask(struct cgroup *cgrp,
        while (true) {
                unsigned long new_ss_mask = cur_ss_mask;
 
-               for_each_subsys(ss, ssid)
-                       if (cur_ss_mask & (1 << ssid))
-                               new_ss_mask |= ss->depends_on;
+               for_each_subsys_which(ss, ssid, &cur_ss_mask)
+                       new_ss_mask |= ss->depends_on;
 
                /*
                 * Mask out subsystems which aren't available.  This can
@@ -1227,10 +1245,7 @@ static int rebind_subsystems(struct cgroup_root *dst_root,
 
        lockdep_assert_held(&cgroup_mutex);
 
-       for_each_subsys(ss, ssid) {
-               if (!(ss_mask & (1 << ssid)))
-                       continue;
-
+       for_each_subsys_which(ss, ssid, &ss_mask) {
                /* if @ss has non-root csses attached to it, can't move */
                if (css_next_child(NULL, cgroup_css(&ss->root->cgrp, ss)))
                        return -EBUSY;
@@ -1267,18 +1282,14 @@ static int rebind_subsystems(struct cgroup_root *dst_root,
         * Nothing can fail from this point on.  Remove files for the
         * removed subsystems and rebind each subsystem.
         */
-       for_each_subsys(ss, ssid)
-               if (ss_mask & (1 << ssid))
-                       cgroup_clear_dir(&ss->root->cgrp, 1 << ssid);
+       for_each_subsys_which(ss, ssid, &ss_mask)
+               cgroup_clear_dir(&ss->root->cgrp, 1 << ssid);
 
-       for_each_subsys(ss, ssid) {
+       for_each_subsys_which(ss, ssid, &ss_mask) {
                struct cgroup_root *src_root;
                struct cgroup_subsys_state *css;
                struct css_set *cset;
 
-               if (!(ss_mask & (1 << ssid)))
-                       continue;
-
                src_root = ss->root;
                css = cgroup_css(&src_root->cgrp, ss);
 
@@ -2381,6 +2392,47 @@ static int cgroup_attach_task(struct cgroup *dst_cgrp,
        return ret;
 }
 
+static int cgroup_procs_write_permission(struct task_struct *task,
+                                        struct cgroup *dst_cgrp,
+                                        struct kernfs_open_file *of)
+{
+       const struct cred *cred = current_cred();
+       const struct cred *tcred = get_task_cred(task);
+       int ret = 0;
+
+       /*
+        * even if we're attaching all tasks in the thread group, we only
+        * need to check permissions on one of them.
+        */
+       if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
+           !uid_eq(cred->euid, tcred->uid) &&
+           !uid_eq(cred->euid, tcred->suid))
+               ret = -EACCES;
+
+       if (!ret && cgroup_on_dfl(dst_cgrp)) {
+               struct super_block *sb = of->file->f_path.dentry->d_sb;
+               struct cgroup *cgrp;
+               struct inode *inode;
+
+               down_read(&css_set_rwsem);
+               cgrp = task_cgroup_from_root(task, &cgrp_dfl_root);
+               up_read(&css_set_rwsem);
+
+               while (!cgroup_is_descendant(dst_cgrp, cgrp))
+                       cgrp = cgroup_parent(cgrp);
+
+               ret = -ENOMEM;
+               inode = kernfs_get_inode(sb, cgrp->procs_kn);
+               if (inode) {
+                       ret = inode_permission(inode, MAY_WRITE);
+                       iput(inode);
+               }
+       }
+
+       put_cred(tcred);
+       return ret;
+}
+
 /*
  * Find the task_struct of the task to attach by vpid and pass it along to the
  * function to attach either it or all tasks in its threadgroup. Will lock
@@ -2390,7 +2442,6 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
                                    size_t nbytes, loff_t off, bool threadgroup)
 {
        struct task_struct *tsk;
-       const struct cred *cred = current_cred(), *tcred;
        struct cgroup *cgrp;
        pid_t pid;
        int ret;
@@ -2410,19 +2461,9 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
                        ret = -ESRCH;
                        goto out_unlock_rcu;
                }
-               /*
-                * even if we're attaching all tasks in the thread group, we
-                * only need to check permissions on one of them.
-                */
-               tcred = __task_cred(tsk);
-               if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
-                   !uid_eq(cred->euid, tcred->uid) &&
-                   !uid_eq(cred->euid, tcred->suid)) {
-                       ret = -EACCES;
-                       goto out_unlock_rcu;
-               }
-       } else
+       } else {
                tsk = current;
+       }
 
        if (threadgroup)
                tsk = tsk->group_leader;
@@ -2440,7 +2481,9 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
        get_task_struct(tsk);
        rcu_read_unlock();
 
-       ret = cgroup_attach_task(cgrp, tsk, threadgroup);
+       ret = cgroup_procs_write_permission(tsk, cgrp, of);
+       if (!ret)
+               ret = cgroup_attach_task(cgrp, tsk, threadgroup);
 
        put_task_struct(tsk);
        goto out_unlock_threadgroup;
@@ -2537,13 +2580,11 @@ static void cgroup_print_ss_mask(struct seq_file *seq, unsigned long ss_mask)
        bool printed = false;
        int ssid;
 
-       for_each_subsys(ss, ssid) {
-               if (ss_mask & (1 << ssid)) {
-                       if (printed)
-                               seq_putc(seq, ' ');
-                       seq_printf(seq, "%s", ss->name);
-                       printed = true;
-               }
+       for_each_subsys_which(ss, ssid, &ss_mask) {
+               if (printed)
+                       seq_putc(seq, ' ');
+               seq_printf(seq, "%s", ss->name);
+               printed = true;
        }
        if (printed)
                seq_putc(seq, '\n');
@@ -2685,11 +2726,12 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
         */
        buf = strstrip(buf);
        while ((tok = strsep(&buf, " "))) {
+               unsigned long tmp_ss_mask = ~cgrp_dfl_root_inhibit_ss_mask;
+
                if (tok[0] == '\0')
                        continue;
-               for_each_subsys(ss, ssid) {
-                       if (ss->disabled || strcmp(tok + 1, ss->name) ||
-                           ((1 << ss->id) & cgrp_dfl_root_inhibit_ss_mask))
+               for_each_subsys_which(ss, ssid, &tmp_ss_mask) {
+                       if (ss->disabled || strcmp(tok + 1, ss->name))
                                continue;
 
                        if (*tok == '+') {
@@ -2776,10 +2818,7 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
         * still around.  In such cases, wait till it's gone using
         * offline_waitq.
         */
-       for_each_subsys(ss, ssid) {
-               if (!(css_enable & (1 << ssid)))
-                       continue;
-
+       for_each_subsys_which(ss, ssid, &css_enable) {
                cgroup_for_each_live_child(child, cgrp) {
                        DEFINE_WAIT(wait);
 
@@ -3070,7 +3109,9 @@ static int cgroup_add_file(struct cgroup *cgrp, struct cftype *cft)
                return ret;
        }
 
-       if (cft->seq_show == cgroup_populated_show)
+       if (cft->write == cgroup_procs_write)
+               cgrp->procs_kn = kn;
+       else if (cft->seq_show == cgroup_populated_show)
                cgrp->populated_kn = kn;
        return 0;
 }
@@ -4914,7 +4955,8 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early)
         * init_css_set is in the subsystem's root cgroup. */
        init_css_set.subsys[ss->id] = css;
 
-       need_forkexit_callback |= ss->fork || ss->exit;
+       have_fork_callback |= (bool)ss->fork << ss->id;
+       have_exit_callback |= (bool)ss->exit << ss->id;
 
        /* At system boot, before all subsystems have been
         * registered, no tasks have been forked, so we don't
@@ -5225,11 +5267,8 @@ void cgroup_post_fork(struct task_struct *child)
         * css_set; otherwise, @child might change state between ->fork()
         * and addition to css_set.
         */
-       if (need_forkexit_callback) {
-               for_each_subsys(ss, i)
-                       if (ss->fork)
-                               ss->fork(child);
-       }
+       for_each_subsys_which(ss, i, &have_fork_callback)
+               ss->fork(child);
 }
 
 /**
@@ -5273,16 +5312,12 @@ void cgroup_exit(struct task_struct *tsk)
        cset = task_css_set(tsk);
        RCU_INIT_POINTER(tsk->cgroups, &init_css_set);
 
-       if (need_forkexit_callback) {
-               /* see cgroup_post_fork() for details */
-               for_each_subsys(ss, i) {
-                       if (ss->exit) {
-                               struct cgroup_subsys_state *old_css = cset->subsys[i];
-                               struct cgroup_subsys_state *css = task_css(tsk, i);
+       /* see cgroup_post_fork() for details */
+       for_each_subsys_which(ss, i, &have_exit_callback) {
+               struct cgroup_subsys_state *old_css = cset->subsys[i];
+               struct cgroup_subsys_state *css = task_css(tsk, i);
 
-                               ss->exit(css, old_css, tsk);
-                       }
-               }
+               ss->exit(css, old_css, tsk);
        }
 
        if (put_cset)