Merge tag 'efi-efivars-removal-for-v5.20' of git://git.kernel.org/pub/scm/linux/kerne...
[linux-2.6-block.git] / kernel / cgroup / cgroup.c
index 13c8e91d7862020d0081cca68d10dc71e2aefe52..ffaccd6373f1e786e9c90f8230b16f11ef4b8535 100644 (file)
@@ -279,8 +279,6 @@ bool cgroup_ssid_enabled(int ssid)
  *
  * - When mounting an existing superblock, mount options should match.
  *
- * - Remount is disallowed.
- *
  * - rename(2) is disallowed.
  *
  * - "tasks" is removed.  Everything should be at process granularity.  Use
@@ -1309,6 +1307,20 @@ struct cgroup_root *cgroup_root_from_kf(struct kernfs_root *kf_root)
        return root_cgrp->root;
 }
 
+void cgroup_favor_dynmods(struct cgroup_root *root, bool favor)
+{
+       bool favoring = root->flags & CGRP_ROOT_FAVOR_DYNMODS;
+
+       /* see the comment above CGRP_ROOT_FAVOR_DYNMODS definition */
+       if (favor && !favoring) {
+               rcu_sync_enter(&cgroup_threadgroup_rwsem.rss);
+               root->flags |= CGRP_ROOT_FAVOR_DYNMODS;
+       } else if (!favor && favoring) {
+               rcu_sync_exit(&cgroup_threadgroup_rwsem.rss);
+               root->flags &= ~CGRP_ROOT_FAVOR_DYNMODS;
+       }
+}
+
 static int cgroup_init_root_id(struct cgroup_root *root)
 {
        int id;
@@ -1369,6 +1381,7 @@ static void cgroup_destroy_root(struct cgroup_root *root)
                cgroup_root_count--;
        }
 
+       cgroup_favor_dynmods(root, false);
        cgroup_exit_root_id(root);
 
        mutex_unlock(&cgroup_mutex);
@@ -1378,6 +1391,31 @@ static void cgroup_destroy_root(struct cgroup_root *root)
        cgroup_free_root(root);
 }
 
+static inline struct cgroup *__cset_cgroup_from_root(struct css_set *cset,
+                                           struct cgroup_root *root)
+{
+       struct cgroup *res_cgroup = NULL;
+
+       if (cset == &init_css_set) {
+               res_cgroup = &root->cgrp;
+       } else if (root == &cgrp_dfl_root) {
+               res_cgroup = cset->dfl_cgrp;
+       } else {
+               struct cgrp_cset_link *link;
+
+               list_for_each_entry(link, &cset->cgrp_links, cgrp_link) {
+                       struct cgroup *c = link->cgrp;
+
+                       if (c->root == root) {
+                               res_cgroup = c;
+                               break;
+                       }
+               }
+       }
+
+       return res_cgroup;
+}
+
 /*
  * look up cgroup associated with current task's cgroup namespace on the
  * specified hierarchy
@@ -1393,22 +1431,8 @@ current_cgns_cgroup_from_root(struct cgroup_root *root)
        rcu_read_lock();
 
        cset = current->nsproxy->cgroup_ns->root_cset;
-       if (cset == &init_css_set) {
-               res = &root->cgrp;
-       } else if (root == &cgrp_dfl_root) {
-               res = cset->dfl_cgrp;
-       } else {
-               struct cgrp_cset_link *link;
-
-               list_for_each_entry(link, &cset->cgrp_links, cgrp_link) {
-                       struct cgroup *c = link->cgrp;
+       res = __cset_cgroup_from_root(cset, root);
 
-                       if (c->root == root) {
-                               res = c;
-                               break;
-                       }
-               }
-       }
        rcu_read_unlock();
 
        BUG_ON(!res);
@@ -1424,22 +1448,7 @@ static struct cgroup *cset_cgroup_from_root(struct css_set *cset,
        lockdep_assert_held(&cgroup_mutex);
        lockdep_assert_held(&css_set_lock);
 
-       if (cset == &init_css_set) {
-               res = &root->cgrp;
-       } else if (root == &cgrp_dfl_root) {
-               res = cset->dfl_cgrp;
-       } else {
-               struct cgrp_cset_link *link;
-
-               list_for_each_entry(link, &cset->cgrp_links, cgrp_link) {
-                       struct cgroup *c = link->cgrp;
-
-                       if (c->root == root) {
-                               res = c;
-                               break;
-                       }
-               }
-       }
+       res = __cset_cgroup_from_root(cset, root);
 
        BUG_ON(!res);
        return res;
@@ -1866,6 +1875,7 @@ int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node,
 
 enum cgroup2_param {
        Opt_nsdelegate,
+       Opt_favordynmods,
        Opt_memory_localevents,
        Opt_memory_recursiveprot,
        nr__cgroup2_params
@@ -1873,6 +1883,7 @@ enum cgroup2_param {
 
 static const struct fs_parameter_spec cgroup2_fs_parameters[] = {
        fsparam_flag("nsdelegate",              Opt_nsdelegate),
+       fsparam_flag("favordynmods",            Opt_favordynmods),
        fsparam_flag("memory_localevents",      Opt_memory_localevents),
        fsparam_flag("memory_recursiveprot",    Opt_memory_recursiveprot),
        {}
@@ -1892,6 +1903,9 @@ static int cgroup2_parse_param(struct fs_context *fc, struct fs_parameter *param
        case Opt_nsdelegate:
                ctx->flags |= CGRP_ROOT_NS_DELEGATE;
                return 0;
+       case Opt_favordynmods:
+               ctx->flags |= CGRP_ROOT_FAVOR_DYNMODS;
+               return 0;
        case Opt_memory_localevents:
                ctx->flags |= CGRP_ROOT_MEMORY_LOCAL_EVENTS;
                return 0;
@@ -1910,6 +1924,9 @@ static void apply_cgroup_root_flags(unsigned int root_flags)
                else
                        cgrp_dfl_root.flags &= ~CGRP_ROOT_NS_DELEGATE;
 
+               cgroup_favor_dynmods(&cgrp_dfl_root,
+                                    root_flags & CGRP_ROOT_FAVOR_DYNMODS);
+
                if (root_flags & CGRP_ROOT_MEMORY_LOCAL_EVENTS)
                        cgrp_dfl_root.flags |= CGRP_ROOT_MEMORY_LOCAL_EVENTS;
                else
@@ -1926,6 +1943,8 @@ static int cgroup_show_options(struct seq_file *seq, struct kernfs_root *kf_root
 {
        if (cgrp_dfl_root.flags & CGRP_ROOT_NS_DELEGATE)
                seq_puts(seq, ",nsdelegate");
+       if (cgrp_dfl_root.flags & CGRP_ROOT_FAVOR_DYNMODS)
+               seq_puts(seq, ",favordynmods");
        if (cgrp_dfl_root.flags & CGRP_ROOT_MEMORY_LOCAL_EVENTS)
                seq_puts(seq, ",memory_localevents");
        if (cgrp_dfl_root.flags & CGRP_ROOT_MEMORY_RECURSIVE_PROT)
@@ -1976,7 +1995,8 @@ void init_cgroup_root(struct cgroup_fs_context *ctx)
        cgrp->root = root;
        init_cgroup_housekeeping(cgrp);
 
-       root->flags = ctx->flags;
+       /* DYNMODS must be modified through cgroup_favor_dynmods() */
+       root->flags = ctx->flags & ~CGRP_ROOT_FAVOR_DYNMODS;
        if (ctx->release_agent)
                strscpy(root->release_agent_path, ctx->release_agent, PATH_MAX);
        if (ctx->name)
@@ -2198,6 +2218,10 @@ static int cgroup_init_fs_context(struct fs_context *fc)
        put_user_ns(fc->user_ns);
        fc->user_ns = get_user_ns(ctx->ns->user_ns);
        fc->global = true;
+
+#ifdef CONFIG_CGROUP_FAVOR_DYNMODS
+       ctx->flags |= CGRP_ROOT_FAVOR_DYNMODS;
+#endif
        return 0;
 }
 
@@ -2572,10 +2596,6 @@ int cgroup_migrate_vet_dst(struct cgroup *dst_cgrp)
        if (!cgroup_is_valid_domain(dst_cgrp->dom_cgrp))
                return -EOPNOTSUPP;
 
-       /* mixables don't care */
-       if (cgroup_is_mixable(dst_cgrp))
-               return 0;
-
        /*
         * If @dst_cgrp is already or can become a thread root or is
         * threaded, it doesn't matter.
@@ -2949,22 +2969,40 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
        struct cgroup_subsys_state *d_css;
        struct cgroup *dsct;
        struct css_set *src_cset;
+       bool has_tasks;
        int ret;
 
        lockdep_assert_held(&cgroup_mutex);
 
-       percpu_down_write(&cgroup_threadgroup_rwsem);
-
        /* look up all csses currently attached to @cgrp's subtree */
        spin_lock_irq(&css_set_lock);
        cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
                struct cgrp_cset_link *link;
 
+               /*
+                * As cgroup_update_dfl_csses() is only called by
+                * cgroup_apply_control(). The csses associated with the
+                * given cgrp will not be affected by changes made to
+                * its subtree_control file. We can skip them.
+                */
+               if (dsct == cgrp)
+                       continue;
+
                list_for_each_entry(link, &dsct->cset_links, cset_link)
                        cgroup_migrate_add_src(link->cset, dsct, &mgctx);
        }
        spin_unlock_irq(&css_set_lock);
 
+       /*
+        * We need to write-lock threadgroup_rwsem while migrating tasks.
+        * However, if there are no source csets for @cgrp, changing its
+        * controllers isn't gonna produce any task migrations and the
+        * write-locking can be skipped safely.
+        */
+       has_tasks = !list_empty(&mgctx.preloaded_src_csets);
+       if (has_tasks)
+               percpu_down_write(&cgroup_threadgroup_rwsem);
+
        /* NULL dst indicates self on default hierarchy */
        ret = cgroup_migrate_prepare_dst(&mgctx);
        if (ret)
@@ -2984,7 +3022,8 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
        ret = cgroup_migrate_execute(&mgctx);
 out_finish:
        cgroup_migrate_finish(&mgctx);
-       percpu_up_write(&cgroup_threadgroup_rwsem);
+       if (has_tasks)
+               percpu_up_write(&cgroup_threadgroup_rwsem);
        return ret;
 }
 
@@ -3618,21 +3657,21 @@ static int cpu_stat_show(struct seq_file *seq, void *v)
 static int cgroup_io_pressure_show(struct seq_file *seq, void *v)
 {
        struct cgroup *cgrp = seq_css(seq)->cgroup;
-       struct psi_group *psi = cgroup_ino(cgrp) == 1 ? &psi_system : &cgrp->psi;
+       struct psi_group *psi = cgroup_ino(cgrp) == 1 ? &psi_system : cgrp->psi;
 
        return psi_show(seq, psi, PSI_IO);
 }
 static int cgroup_memory_pressure_show(struct seq_file *seq, void *v)
 {
        struct cgroup *cgrp = seq_css(seq)->cgroup;
-       struct psi_group *psi = cgroup_ino(cgrp) == 1 ? &psi_system : &cgrp->psi;
+       struct psi_group *psi = cgroup_ino(cgrp) == 1 ? &psi_system : cgrp->psi;
 
        return psi_show(seq, psi, PSI_MEM);
 }
 static int cgroup_cpu_pressure_show(struct seq_file *seq, void *v)
 {
        struct cgroup *cgrp = seq_css(seq)->cgroup;
-       struct psi_group *psi = cgroup_ino(cgrp) == 1 ? &psi_system : &cgrp->psi;
+       struct psi_group *psi = cgroup_ino(cgrp) == 1 ? &psi_system : cgrp->psi;
 
        return psi_show(seq, psi, PSI_CPU);
 }
@@ -3658,7 +3697,7 @@ static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf,
                return -EBUSY;
        }
 
-       psi = cgroup_ino(cgrp) == 1 ? &psi_system : &cgrp->psi;
+       psi = cgroup_ino(cgrp) == 1 ? &psi_system : cgrp->psi;
        new = psi_trigger_create(psi, buf, nbytes, res);
        if (IS_ERR(new)) {
                cgroup_put(cgrp);
@@ -5851,12 +5890,6 @@ int __init cgroup_init(void)
 
        cgroup_rstat_boot();
 
-       /*
-        * The latency of the synchronize_rcu() is too high for cgroups,
-        * avoid it at the cost of forcing all readers into the slow path.
-        */
-       rcu_sync_enter_start(&cgroup_threadgroup_rwsem.rss);
-
        get_user_ns(init_cgroup_ns.user_ns);
 
        mutex_lock(&cgroup_mutex);
@@ -6768,6 +6801,7 @@ static ssize_t features_show(struct kobject *kobj, struct kobj_attribute *attr,
 {
        return snprintf(buf, PAGE_SIZE,
                        "nsdelegate\n"
+                       "favordynmods\n"
                        "memory_localevents\n"
                        "memory_recursiveprot\n");
 }