lib/group_cpus.c: avoid acquiring cpu hotplug lock in group_cpus_evenly

author Ming Lei <ming.lei@redhat.com>

Mon, 20 Nov 2023 08:35:59 +0000 (16:35 +0800)

committer Andrew Morton <akpm@linux-foundation.org>

Thu, 7 Dec 2023 00:12:46 +0000 (16:12 -0800)
author Ming Lei <ming.lei@redhat.com>
Mon, 20 Nov 2023 08:35:59 +0000 (16:35 +0800)
committer Andrew Morton <akpm@linux-foundation.org>
Thu, 7 Dec 2023 00:12:46 +0000 (16:12 -0800)
diff --git a/lib/group_cpus.c b/lib/group_cpus.c

index aa3f6815bb124053e06678fc17c6b6613375e2b9..ee272c4cefcc13907ce9f211f479615d2e3c9154 100644 (file)
--- a/lib/group_cpus.c
+++ b/lib/group_cpus.c
@@ -366,13 +366,25 @@ struct cpumask *group_cpus_evenly(unsigned int numgrps)
         if (!masks)
                 goto fail_node_to_cpumask;
  
-       /* Stabilize the cpumasks */
-       cpus_read_lock();
         build_node_to_cpumask(node_to_cpumask);
  
+       /*
+        * Make a local cache of 'cpu_present_mask', so the two stages
+        * spread can observe consistent 'cpu_present_mask' without holding
+        * cpu hotplug lock, then we can reduce deadlock risk with cpu
+        * hotplug code.
+        *
+        * Here CPU hotplug may happen when reading `cpu_present_mask`, and
+        * we can live with the case because it only affects that hotplug
+        * CPU is handled in the 1st or 2nd stage, and either way is correct
+        * from API user viewpoint since 2-stage spread is sort of
+        * optimization.
+        */
+       cpumask_copy(npresmsk, data_race(cpu_present_mask));
+
         /* grouping present CPUs first */
         ret = __group_cpus_evenly(curgrp, numgrps, node_to_cpumask,
-                                 cpu_present_mask, nmsk, masks);
+                                 npresmsk, nmsk, masks);
         if (ret < 0)
                 goto fail_build_affinity;
         nr_present = ret;
@@ -387,15 +399,13 @@ struct cpumask *group_cpus_evenly(unsigned int numgrps)
                 curgrp = 0;
         else
                 curgrp = nr_present;
-       cpumask_andnot(npresmsk, cpu_possible_mask, cpu_present_mask);
+       cpumask_andnot(npresmsk, cpu_possible_mask, npresmsk);
         ret = __group_cpus_evenly(curgrp, numgrps, node_to_cpumask,
                                   npresmsk, nmsk, masks);
         if (ret >= 0)
                 nr_others = ret;
  
   fail_build_affinity:
-       cpus_read_unlock();
-
         if (ret >= 0)
                 WARN_ON(nr_present + nr_others < numgrps);
author	Ming Lei <ming.lei@redhat.com>
	Mon, 20 Nov 2023 08:35:59 +0000 (16:35 +0800)
committer	Andrew Morton <akpm@linux-foundation.org>
	Thu, 7 Dec 2023 00:12:46 +0000 (16:12 -0800)