cgroup: Include dying leaders with live threads in PROCS iterations
authorTejun Heo <tj@kernel.org>
Fri, 31 May 2019 17:38:58 +0000 (10:38 -0700)
committerTejun Heo <tj@kernel.org>
Fri, 31 May 2019 17:38:58 +0000 (10:38 -0700)
CSS_TASK_ITER_PROCS currently iterates live group leaders; however,
this means that a process with dying leader and live threads will be
skipped.  IOW, cgroup.procs might be empty while cgroup.threads isn't,
which is confusing to say the least.

Fix it by making cset track dying tasks and include dying leaders with
live threads in PROCS iteration.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-and-tested-by: Topi Miettinen <toiwoton@gmail.com>
Cc: Oleg Nesterov <oleg@redhat.com>
include/linux/cgroup-defs.h
include/linux/cgroup.h
kernel/cgroup/cgroup.c

index 77258d276f9350c54b1aca6c713a39d826a9715d..1615b9c17e0272441906ddb0d68de09c7794f7f2 100644 (file)
@@ -216,6 +216,7 @@ struct css_set {
         */
        struct list_head tasks;
        struct list_head mg_tasks;
+       struct list_head dying_tasks;
 
        /* all css_task_iters currently walking this cset */
        struct list_head task_iters;
index 05ed2a209e7411eb8d66797a9240483d2f37a06b..0297f930a56e1b08a46141a1d535b7d214183aca 100644 (file)
@@ -60,6 +60,7 @@ struct css_task_iter {
        struct list_head                *task_pos;
        struct list_head                *tasks_head;
        struct list_head                *mg_tasks_head;
+       struct list_head                *dying_tasks_head;
 
        struct css_set                  *cur_cset;
        struct css_set                  *cur_dcset;
index 035aee466bbfe565c6220e5b6aa828381b1fa805..a7df319c2e9a6f0cafd3d9095f9d26ef84400528 100644 (file)
@@ -739,6 +739,7 @@ struct css_set init_css_set = {
        .dom_cset               = &init_css_set,
        .tasks                  = LIST_HEAD_INIT(init_css_set.tasks),
        .mg_tasks               = LIST_HEAD_INIT(init_css_set.mg_tasks),
+       .dying_tasks            = LIST_HEAD_INIT(init_css_set.dying_tasks),
        .task_iters             = LIST_HEAD_INIT(init_css_set.task_iters),
        .threaded_csets         = LIST_HEAD_INIT(init_css_set.threaded_csets),
        .cgrp_links             = LIST_HEAD_INIT(init_css_set.cgrp_links),
@@ -1213,6 +1214,7 @@ static struct css_set *find_css_set(struct css_set *old_cset,
        cset->dom_cset = cset;
        INIT_LIST_HEAD(&cset->tasks);
        INIT_LIST_HEAD(&cset->mg_tasks);
+       INIT_LIST_HEAD(&cset->dying_tasks);
        INIT_LIST_HEAD(&cset->task_iters);
        INIT_LIST_HEAD(&cset->threaded_csets);
        INIT_HLIST_NODE(&cset->hlist);
@@ -4399,15 +4401,18 @@ static void css_task_iter_advance_css_set(struct css_task_iter *it)
                        it->task_pos = NULL;
                        return;
                }
-       } while (!css_set_populated(cset));
+       } while (!css_set_populated(cset) && !list_empty(&cset->dying_tasks));
 
        if (!list_empty(&cset->tasks))
                it->task_pos = cset->tasks.next;
-       else
+       else if (!list_empty(&cset->mg_tasks))
                it->task_pos = cset->mg_tasks.next;
+       else
+               it->task_pos = cset->dying_tasks.next;
 
        it->tasks_head = &cset->tasks;
        it->mg_tasks_head = &cset->mg_tasks;
+       it->dying_tasks_head = &cset->dying_tasks;
 
        /*
         * We don't keep css_sets locked across iteration steps and thus
@@ -4446,6 +4451,8 @@ static void css_task_iter_skip(struct css_task_iter *it,
 
 static void css_task_iter_advance(struct css_task_iter *it)
 {
+       struct task_struct *task;
+
        lockdep_assert_held(&css_set_lock);
 repeat:
        if (it->task_pos) {
@@ -4462,17 +4469,32 @@ repeat:
                if (it->task_pos == it->tasks_head)
                        it->task_pos = it->mg_tasks_head->next;
                if (it->task_pos == it->mg_tasks_head)
+                       it->task_pos = it->dying_tasks_head->next;
+               if (it->task_pos == it->dying_tasks_head)
                        css_task_iter_advance_css_set(it);
        } else {
                /* called from start, proceed to the first cset */
                css_task_iter_advance_css_set(it);
        }
 
-       /* if PROCS, skip over tasks which aren't group leaders */
-       if ((it->flags & CSS_TASK_ITER_PROCS) && it->task_pos &&
-           !thread_group_leader(list_entry(it->task_pos, struct task_struct,
-                                           cg_list)))
-               goto repeat;
+       if (!it->task_pos)
+               return;
+
+       task = list_entry(it->task_pos, struct task_struct, cg_list);
+
+       if (it->flags & CSS_TASK_ITER_PROCS) {
+               /* if PROCS, skip over tasks which aren't group leaders */
+               if (!thread_group_leader(task))
+                       goto repeat;
+
+               /* and dying leaders w/o live member threads */
+               if (!atomic_read(&task->signal->live))
+                       goto repeat;
+       } else {
+               /* skip all dying ones */
+               if (task->flags & PF_EXITING)
+                       goto repeat;
+       }
 }
 
 /**
@@ -6009,6 +6031,7 @@ void cgroup_exit(struct task_struct *tsk)
        if (!list_empty(&tsk->cg_list)) {
                spin_lock_irq(&css_set_lock);
                css_set_move_task(tsk, cset, NULL, false);
+               list_add_tail(&tsk->cg_list, &cset->dying_tasks);
                cset->nr_tasks--;
 
                WARN_ON_ONCE(cgroup_task_frozen(tsk));
@@ -6034,6 +6057,13 @@ void cgroup_release(struct task_struct *task)
        do_each_subsys_mask(ss, ssid, have_release_callback) {
                ss->release(task);
        } while_each_subsys_mask();
+
+       if (use_task_css_set_links) {
+               spin_lock_irq(&css_set_lock);
+               css_set_skip_task_iters(task_css_set(task), task);
+               list_del_init(&task->cg_list);
+               spin_unlock_irq(&css_set_lock);
+       }
 }
 
 void cgroup_free(struct task_struct *task)