cgroup/cpuset: Eliminate unncessary sched domains rebuilds in hotplug
authorWaiman Long <longman@redhat.com>
Mon, 5 Aug 2024 01:30:17 +0000 (21:30 -0400)
committerTejun Heo <tj@kernel.org>
Mon, 5 Aug 2024 20:54:25 +0000 (10:54 -1000)
It was found that some hotplug operations may cause multiple
rebuild_sched_domains_locked() calls. Some of those intermediate calls
may use cpuset states not in the final correct form leading to incorrect
sched domain setting.

Fix this problem by using the existing force_rebuild flag to inhibit
immediate rebuild_sched_domains_locked() calls if set and only doing
one final call at the end. Also renaming the force_rebuild flag to
force_sd_rebuild to make its meaning for clear.

Signed-off-by: Waiman Long <longman@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
kernel/cgroup/cpuset.c

index 97d02612b3a66b4a9cc385debfe8ca5d709073c1..4bd9e50bcc8eefd72b2b817415b442a86a421df7 100644 (file)
@@ -232,6 +232,13 @@ static cpumask_var_t       isolated_cpus;
 /* List of remote partition root children */
 static struct list_head remote_children;
 
+/*
+ * A flag to force sched domain rebuild at the end of an operation while
+ * inhibiting it in the intermediate stages when set. Currently it is only
+ * set in hotplug code.
+ */
+static bool force_sd_rebuild;
+
 /*
  * Partition root states:
  *
@@ -1475,7 +1482,7 @@ static void update_partition_sd_lb(struct cpuset *cs, int old_prs)
                        clear_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
        }
 
-       if (rebuild_domains)
+       if (rebuild_domains && !force_sd_rebuild)
                rebuild_sched_domains_locked();
 }
 
@@ -1833,7 +1840,7 @@ static void remote_partition_check(struct cpuset *cs, struct cpumask *newmask,
                        remote_partition_disable(child, tmp);
                        disable_cnt++;
                }
-       if (disable_cnt)
+       if (disable_cnt && !force_sd_rebuild)
                rebuild_sched_domains_locked();
 }
 
@@ -2442,7 +2449,8 @@ get_css:
        }
        rcu_read_unlock();
 
-       if (need_rebuild_sched_domains && !(flags & HIER_NO_SD_REBUILD))
+       if (need_rebuild_sched_domains && !(flags & HIER_NO_SD_REBUILD) &&
+           !force_sd_rebuild)
                rebuild_sched_domains_locked();
 }
 
@@ -3104,7 +3112,8 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
        cs->flags = trialcs->flags;
        spin_unlock_irq(&callback_lock);
 
-       if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed)
+       if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed &&
+           !force_sd_rebuild)
                rebuild_sched_domains_locked();
 
        if (spread_flag_changed)
@@ -4501,11 +4510,9 @@ hotplug_update_tasks(struct cpuset *cs,
                update_tasks_nodemask(cs);
 }
 
-static bool force_rebuild;
-
 void cpuset_force_rebuild(void)
 {
-       force_rebuild = true;
+       force_sd_rebuild = true;
 }
 
 /**
@@ -4653,15 +4660,9 @@ static void cpuset_handle_hotplug(void)
                       !cpumask_empty(subpartitions_cpus);
        mems_updated = !nodes_equal(top_cpuset.effective_mems, new_mems);
 
-       /*
-        * In the rare case that hotplug removes all the cpus in
-        * subpartitions_cpus, we assumed that cpus are updated.
-        */
-       if (!cpus_updated && !cpumask_empty(subpartitions_cpus))
-               cpus_updated = true;
-
        /* For v1, synchronize cpus_allowed to cpu_active_mask */
        if (cpus_updated) {
+               cpuset_force_rebuild();
                spin_lock_irq(&callback_lock);
                if (!on_dfl)
                        cpumask_copy(top_cpuset.cpus_allowed, &new_cpus);
@@ -4717,8 +4718,8 @@ static void cpuset_handle_hotplug(void)
        }
 
        /* rebuild sched domains if cpus_allowed has changed */
-       if (cpus_updated || force_rebuild) {
-               force_rebuild = false;
+       if (force_sd_rebuild) {
+               force_sd_rebuild = false;
                rebuild_sched_domains_cpuslocked();
        }