x86/cpu/topology: Rework possible CPU management
authorThomas Gleixner <tglx@linutronix.de>
Tue, 13 Feb 2024 21:05:53 +0000 (22:05 +0100)
committerThomas Gleixner <tglx@linutronix.de>
Thu, 15 Feb 2024 21:07:43 +0000 (22:07 +0100)
Managing possible CPUs is an unreadable and uncomprehensible maze. Aside of
that it's backwards because it applies command line limits after
registering all APICs.

Rewrite it so that it:

  - Applies the command line limits upfront so that only the allowed amount
    of APIC IDs can be registered.

  - Applies eventual late restrictions in an understandable way

  - Uses simple min_t() calculations which are trivial to follow.

  - Provides a separate function for resetting to UP mode late in the
    bringup process.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Michael Kelley <mhklinux@outlook.com>
Tested-by: Sohil Mehta <sohil.mehta@intel.com>
Link: https://lore.kernel.org/r/20240213210252.290098853@linutronix.de
arch/x86/include/asm/apic.h
arch/x86/include/asm/cpu.h
arch/x86/include/asm/topology.h
arch/x86/kernel/cpu/topology.c
arch/x86/kernel/setup.c
arch/x86/kernel/smpboot.c

index 28e9aa46d1ed89ada73acc32659ad7355494fb31..94ce0f7c9d3a26cd2b766a60042a0b941b3fe0d2 100644 (file)
@@ -175,6 +175,9 @@ extern void topology_register_apic(u32 apic_id, u32 acpi_id, bool present);
 extern void topology_register_boot_apic(u32 apic_id);
 extern int topology_hotplug_apic(u32 apic_id, u32 acpi_id);
 extern void topology_hotunplug_apic(unsigned int cpu);
+extern void topology_apply_cmdline_limits_early(void);
+extern void topology_init_possible_cpus(void);
+extern void topology_reset_possible_cpus_up(void);
 
 #else /* !CONFIG_X86_LOCAL_APIC */
 static inline void lapic_shutdown(void) { }
@@ -190,6 +193,8 @@ static inline void apic_intr_mode_init(void) { }
 static inline void lapic_assign_system_vectors(void) { }
 static inline void lapic_assign_legacy_vector(unsigned int i, bool r) { }
 static inline bool apic_needs_pit(void) { return true; }
+static inline void topology_apply_cmdline_limits_early(void) { }
+static inline void topology_init_possible_cpus(void) { }
 #endif /* !CONFIG_X86_LOCAL_APIC */
 
 #ifdef CONFIG_X86_X2APIC
index f8f9a9b7939587b2b8f6e00794e94ea69e6a338a..aa30fd8cad7f5285534c9e4b24e2846d1429f480 100644 (file)
@@ -9,18 +9,10 @@
 #include <linux/percpu.h>
 #include <asm/ibt.h>
 
-#ifdef CONFIG_SMP
-
-extern void prefill_possible_map(void);
-
-#else /* CONFIG_SMP */
-
-static inline void prefill_possible_map(void) {}
-
+#ifndef CONFIG_SMP
 #define cpu_physical_id(cpu)                   boot_cpu_physical_apicid
 #define cpu_acpi_id(cpu)                       0
 #define safe_smp_processor_id()                        0
-
 #endif /* CONFIG_SMP */
 
 #ifdef CONFIG_HOTPLUG_CPU
index cb6bafd329f1f292eedbc40221ccbd45802f6fdf..3e11a5a3b8304c97a9364c7ef8f71e30106d1039 100644 (file)
@@ -191,6 +191,7 @@ static inline bool topology_is_primary_thread(unsigned int cpu)
 {
        return cpumask_test_cpu(cpu, cpu_primary_thread_mask);
 }
+
 #else /* CONFIG_SMP */
 #define topology_max_packages()                        (1)
 static inline int
index 80625f4194de5fff2c4aa72e4fc4d1c07384df89..fc47f5216f9af24e0e5db49487c20fe1a779460d 100644 (file)
@@ -5,6 +5,7 @@
 #include <xen/xen.h>
 
 #include <asm/apic.h>
+#include <asm/io_apic.h>
 #include <asm/mpspec.h>
 #include <asm/smp.h>
 
@@ -85,73 +86,6 @@ early_initcall(smp_init_primary_thread_mask);
 static inline void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid) { }
 #endif
 
-static int __initdata setup_possible_cpus = -1;
-
-/*
- * cpu_possible_mask should be static, it cannot change as cpu's
- * are onlined, or offlined. The reason is per-cpu data-structures
- * are allocated by some modules at init time, and don't expect to
- * do this dynamically on cpu arrival/departure.
- * cpu_present_mask on the other hand can change dynamically.
- * In case when cpu_hotplug is not compiled, then we resort to current
- * behaviour, which is cpu_possible == cpu_present.
- * - Ashok Raj
- *
- * Three ways to find out the number of additional hotplug CPUs:
- * - If the BIOS specified disabled CPUs in ACPI/mptables use that.
- * - The user can overwrite it with possible_cpus=NUM
- * - Otherwise don't reserve additional CPUs.
- * We do this because additional CPUs waste a lot of memory.
- * -AK
- */
-__init void prefill_possible_map(void)
-{
-       unsigned int num_processors = topo_info.nr_assigned_cpus;
-       unsigned int disabled_cpus = topo_info.nr_disabled_cpus;
-       int i, possible;
-
-       i = setup_max_cpus ?: 1;
-       if (setup_possible_cpus == -1) {
-               possible = topo_info.nr_assigned_cpus;
-#ifdef CONFIG_HOTPLUG_CPU
-               if (setup_max_cpus)
-                       possible += num_processors;
-#else
-               if (possible > i)
-                       possible = i;
-#endif
-       } else
-               possible = setup_possible_cpus;
-
-       total_cpus = max_t(int, possible, num_processors + disabled_cpus);
-
-       /* nr_cpu_ids could be reduced via nr_cpus= */
-       if (possible > nr_cpu_ids) {
-               pr_warn("%d Processors exceeds NR_CPUS limit of %u\n",
-                       possible, nr_cpu_ids);
-               possible = nr_cpu_ids;
-       }
-
-#ifdef CONFIG_HOTPLUG_CPU
-       if (!setup_max_cpus)
-#endif
-       if (possible > i) {
-               pr_warn("%d Processors exceeds max_cpus limit of %u\n",
-                       possible, setup_max_cpus);
-               possible = i;
-       }
-
-       set_nr_cpu_ids(possible);
-
-       pr_info("Allowing %d CPUs, %d hotplug CPUs\n",
-               possible, max_t(int, possible - num_processors, 0));
-
-       reset_cpu_possible_mask();
-
-       for (i = 0; i < possible; i++)
-               set_cpu_possible(i, true);
-}
-
 static int topo_lookup_cpuid(u32 apic_id)
 {
        int i;
@@ -293,12 +227,114 @@ void topology_hotunplug_apic(unsigned int cpu)
 }
 #endif
 
-static int __init _setup_possible_cpus(char *str)
+#ifdef CONFIG_SMP
+static unsigned int max_possible_cpus __initdata = NR_CPUS;
+
+/**
+ * topology_apply_cmdline_limits_early - Apply topology command line limits early
+ *
+ * Ensure that command line limits are in effect before firmware parsing
+ * takes place.
+ */
+void __init topology_apply_cmdline_limits_early(void)
 {
-       get_option(&str, &setup_possible_cpus);
+       unsigned int possible = nr_cpu_ids;
+
+       /* 'maxcpus=0' 'nosmp' 'nolapic' 'disableapic' 'noapic' */
+       if (!setup_max_cpus || ioapic_is_disabled || apic_is_disabled)
+               possible = 1;
+
+       /* 'possible_cpus=N' */
+       possible = min_t(unsigned int, max_possible_cpus, possible);
+
+       if (possible < nr_cpu_ids) {
+               pr_info("Limiting to %u possible CPUs\n", possible);
+               set_nr_cpu_ids(possible);
+       }
+}
+
+static __init bool restrict_to_up(void)
+{
+       if (!smp_found_config || ioapic_is_disabled)
+               return true;
+       /*
+        * XEN PV is special as it does not advertise the local APIC
+        * properly, but provides a fake topology for it so that the
+        * infrastructure works. So don't apply the restrictions vs. APIC
+        * here.
+        */
+       if (xen_pv_domain())
+               return false;
+
+       return apic_is_disabled;
+}
+
+void __init topology_init_possible_cpus(void)
+{
+       unsigned int assigned = topo_info.nr_assigned_cpus;
+       unsigned int disabled = topo_info.nr_disabled_cpus;
+       unsigned int total = assigned + disabled;
+       unsigned int cpu, allowed = 1;
+
+       if (!restrict_to_up()) {
+               if (WARN_ON_ONCE(assigned > nr_cpu_ids)) {
+                       disabled += assigned - nr_cpu_ids;
+                       assigned = nr_cpu_ids;
+               }
+               allowed = min_t(unsigned int, total, nr_cpu_ids);
+       }
+
+       if (total > allowed)
+               pr_warn("%u possible CPUs exceed the limit of %u\n", total, allowed);
+
+       assigned = min_t(unsigned int, allowed, assigned);
+       disabled = allowed - assigned;
+
+       topo_info.nr_assigned_cpus = assigned;
+       topo_info.nr_disabled_cpus = disabled;
+
+       total_cpus = allowed;
+       set_nr_cpu_ids(allowed);
+
+       pr_info("Allowing %u present CPUs plus %u hotplug CPUs\n", assigned, disabled);
+       if (topo_info.nr_rejected_cpus)
+               pr_info("Rejected CPUs %u\n", topo_info.nr_rejected_cpus);
+
+       init_cpu_present(cpumask_of(0));
+       init_cpu_possible(cpumask_of(0));
+
+       for (cpu = 0; cpu < allowed; cpu++) {
+               u32 apicid = cpuid_to_apicid[cpu];
+
+               set_cpu_possible(cpu, true);
+
+               if (apicid == BAD_APICID)
+                       continue;
+
+               set_cpu_present(cpu, test_bit(apicid, phys_cpu_present_map));
+       }
+}
+
+/*
+ * Late SMP disable after sizing CPU masks when APIC/IOAPIC setup failed.
+ */
+void __init topology_reset_possible_cpus_up(void)
+{
+       init_cpu_present(cpumask_of(0));
+       init_cpu_possible(cpumask_of(0));
+
+       bitmap_zero(phys_cpu_present_map, MAX_LOCAL_APIC);
+       if (topo_info.boot_cpu_apic_id != BAD_APICID)
+               set_bit(topo_info.boot_cpu_apic_id, phys_cpu_present_map);
+}
+
+static int __init setup_possible_cpus(char *str)
+{
+       get_option(&str, &max_possible_cpus);
        return 0;
 }
-early_param("possible_cpus", _setup_possible_cpus);
+early_param("possible_cpus", setup_possible_cpus);
+#endif
 
 static int __init apic_set_disabled_cpu_apicid(char *arg)
 {
index b1e52ac164b1e45f78264ffc076fd3e3477ddd96..4e320d4d389847c2b8054039458f7b23cfecbaec 100644 (file)
@@ -1131,6 +1131,8 @@ void __init setup_arch(char **cmdline_p)
 
        early_quirks();
 
+       topology_apply_cmdline_limits_early();
+
        /*
         * Parse SMP configuration. Try ACPI first and then the platform
         * specific parser.
@@ -1138,13 +1140,10 @@ void __init setup_arch(char **cmdline_p)
        acpi_boot_init();
        x86_init.mpparse.parse_smp_cfg();
 
-       /*
-        * Systems w/o ACPI and mptables might not have it mapped the local
-        * APIC yet, but prefill_possible_map() might need to access it.
-        */
+       /* Last opportunity to detect and map the local APIC */
        init_apic_mappings();
 
-       prefill_possible_map();
+       topology_init_possible_cpus();
 
        init_cpu_to_node();
        init_gi_nodes();
index d850faca946d177649547768604b96240428a6a9..7f85f174690fb8db4b2e7ab5b348188429b99cb6 100644 (file)
@@ -1147,11 +1147,7 @@ static __init void disable_smp(void)
        pr_info("SMP disabled\n");
 
        disable_ioapic_support();
-
-       init_cpu_present(cpumask_of(0));
-       init_cpu_possible(cpumask_of(0));
-
-       reset_phys_cpu_present_map(smp_found_config ? boot_cpu_physical_apicid : 0);
+       topology_reset_possible_cpus_up();
 
        cpumask_set_cpu(0, topology_sibling_cpumask(0));
        cpumask_set_cpu(0, topology_core_cpumask(0));