Merge tag 'x86-cleanups-2024-03-11' of git://git.kernel.org/pub/scm/linux/kernel...

author Linus Torvalds <torvalds@linux-foundation.org>

Tue, 12 Mar 2024 02:37:56 +0000 (19:37 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Tue, 12 Mar 2024 02:37:56 +0000 (19:37 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Tue, 12 Mar 2024 02:37:56 +0000 (19:37 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Tue, 12 Mar 2024 02:37:56 +0000 (19:37 -0700)
diff --combined arch/x86/include/asm/msr.h

index c284ff9ebe67687e52fa8f3d8609ce9a8a90c699,4621e083af7bd5d4b131c33a6f158eec5cf2f027..d642037f9ed5d81d5af89986e19bf8c33c74c6c8
--- 1/arch/x86/include/asm/msr.h
--- 2/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@@ -12,11 -12,13 +12,13 @@@
   #include <uapi/asm/msr.h>
   #include <asm/shared/msr.h>
   
+ #include <linux/percpu.h>
+ 
   struct msr_info {
-       u32 msr_no;
-       struct msr reg;
-       struct msr *msrs;
-       int err;
+       u32                     msr_no;
+       struct msr              reg;
+       struct msr __percpu     *msrs;
+       int                     err;
   };
   
   struct msr_regs_info {
@@@ -97,19 -99,6 +99,19 @@@ static __always_inline void __wrmsr(uns
                      : : "c" (msr), "a"(low), "d" (high) : "memory");
   }
   
+ +/*
+ + * WRMSRNS behaves exactly like WRMSR with the only difference being
+ + * that it is not a serializing instruction by default.
+ + */
+ +static __always_inline void __wrmsrns(u32 msr, u32 low, u32 high)
+ +{
+ +      /* Instruction opcode for WRMSRNS; supported in binutils >= 2.40. */
+ +      asm volatile("1: .byte 0x0f,0x01,0xc6\n"
+ +                   "2:\n"
+ +                   _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_WRMSR)
+ +                   : : "c" (msr), "a"(low), "d" (high));
+ +}
+ +
   #define native_rdmsr(msr, val1, val2)                 \
   do {                                                  \
         u64 __val = __rdmsr((msr));                     \
@@@ -310,11 -299,6 +312,11 @@@ do {                                                     
   
   #endif        /* !CONFIG_PARAVIRT_XXL */
   
+ +static __always_inline void wrmsrns(u32 msr, u64 val)
+ +{
+ +      __wrmsrns(msr, val, val >> 32);
+ +}
+ +
   /*
    * 64-bit version of wrmsr_safe():
    */
@@@ -323,8 -307,8 +325,8 @@@ static inline int wrmsrl_safe(u32 msr, 
         return wrmsr_safe(msr, (u32)val,  (u32)(val >> 32));
   }
   
- struct msr *msrs_alloc(void);
- void msrs_free(struct msr *msrs);
+ struct msr __percpu *msrs_alloc(void);
+ void msrs_free(struct msr __percpu *msrs);
   int msr_set_bit(u32 msr, u8 bit);
   int msr_clear_bit(u32 msr, u8 bit);
   
@@@ -333,8 -317,8 +335,8 @@@ int rdmsr_on_cpu(unsigned int cpu, u32 
   int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h);
   int rdmsrl_on_cpu(unsigned int cpu, u32 msr_no, u64 *q);
   int wrmsrl_on_cpu(unsigned int cpu, u32 msr_no, u64 q);
- void rdmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs);
- void wrmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs);
+ void rdmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr __percpu *msrs);
+ void wrmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr __percpu *msrs);
   int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h);
   int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h);
   int rdmsrl_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 *q);
@@@ -363,14 -347,14 +365,14 @@@ static inline int wrmsrl_on_cpu(unsigne
         return 0;
   }
   static inline void rdmsr_on_cpus(const struct cpumask *m, u32 msr_no,
-                               struct msr *msrs)
+                               struct msr __percpu *msrs)
   {
-       rdmsr_on_cpu(0, msr_no, &(msrs[0].l), &(msrs[0].h));
+       rdmsr_on_cpu(0, msr_no, raw_cpu_ptr(&msrs->l), raw_cpu_ptr(&msrs->h));
   }
   static inline void wrmsr_on_cpus(const struct cpumask *m, u32 msr_no,
-                               struct msr *msrs)
+                               struct msr __percpu *msrs)
   {
-       wrmsr_on_cpu(0, msr_no, msrs[0].l, msrs[0].h);
+       wrmsr_on_cpu(0, msr_no, raw_cpu_read(msrs->l), raw_cpu_read(msrs->h));
   }
   static inline int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no,
                                     u32 *l, u32 *h)
diff --combined arch/x86/include/asm/processor.h

index 89cf39dbd3062025b84203b2a777d9655f65d75e,e2262aca65616991389a7683af82ba6cbd593965..8830133f220ca0da5767ae78cdcea15534fad8a7
--- 1/arch/x86/include/asm/processor.h
--- 2/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@@ -20,7 -20,6 +20,6 @@@ struct vm86
   #include <asm/page.h>
   #include <asm/pgtable_types.h>
   #include <asm/percpu.h>
- #include <asm/msr.h>
   #include <asm/desc_defs.h>
   #include <asm/nops.h>
   #include <asm/special_insns.h>
@@@ -100,9 -99,6 +99,9 @@@ struct cpuinfo_topology 
         u32                     logical_pkg_id;
         u32                     logical_die_id;
   
+ +      // AMD Node ID and Nodes per Package info
+ +      u32                     amd_node_id;
+ +
         // Cache level topology IDs
         u32                     llc_id;
         u32                     l2c_id;
@@@ -122,6 -118,8 +121,6 @@@ struct cpuinfo_x86 
   #endif
         __u8                    x86_virt_bits;
         __u8                    x86_phys_bits;
- -      /* CPUID returned core id bits: */
- -      __u8                    x86_coreid_bits;
         /* Max extended CPUID function supported: */
         __u32                   extended_cpuid_level;
         /* Maximum supported CPUID level, -1=no CPUID: */
@@@ -149,6 -147,8 +148,6 @@@
         unsigned long           loops_per_jiffy;
         /* protected processor identification number */
         u64                     ppin;
- -      /* cpuid returned max cores value: */
- -      u16                     x86_max_cores;
         u16                     x86_clflush_size;
         /* number of cores as seen by the OS: */
         u16                     booted_cores;
@@@ -185,13 -185,8 +184,8 @@@ extern struct cpuinfo_x86 new_cpu_data
   extern __u32                  cpu_caps_cleared[NCAPINTS + NBUGINTS];
   extern __u32                  cpu_caps_set[NCAPINTS + NBUGINTS];
   
- #ifdef CONFIG_SMP
   DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
   #define cpu_data(cpu)         per_cpu(cpu_info, cpu)
- #else
- #define cpu_info              boot_cpu_data
- #define cpu_data(cpu)         boot_cpu_data
- #endif
   
   extern const struct seq_operations cpuinfo_op;
   
@@@ -575,28 -570,6 +569,6 @@@ extern void cpu_init(void)
   extern void cpu_init_exception_handling(void);
   extern void cr4_init(void);
   
- static inline unsigned long get_debugctlmsr(void)
- {
-       unsigned long debugctlmsr = 0;
- 
- #ifndef CONFIG_X86_DEBUGCTLMSR
-       if (boot_cpu_data.x86 < 6)
-               return 0;
- #endif
-       rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr);
- 
-       return debugctlmsr;
- }
- 
- static inline void update_debugctlmsr(unsigned long debugctlmsr)
- {
- #ifndef CONFIG_X86_DEBUGCTLMSR
-       if (boot_cpu_data.x86 < 6)
-               return;
- #endif
-       wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr);
- }
- 
   extern void set_task_blockstep(struct task_struct *task, bool on);
   
   /* Boot loader type from the setup header: */
@@@ -663,10 -636,8 +635,10 @@@ static __always_inline void prefetchw(c
   #else
   extern unsigned long __end_init_task[];
   
- -#define INIT_THREAD {                                                     \
- -      .sp     = (unsigned long)&__end_init_task - sizeof(struct pt_regs), \
+ +#define INIT_THREAD {                                                 \
+ +      .sp     = (unsigned long)&__end_init_task -                     \
+ +                TOP_OF_KERNEL_STACK_PADDING -                         \
+ +                sizeof(struct pt_regs),                               \
   }
   
   extern unsigned long KSTK_ESP(struct task_struct *task);
@@@ -705,10 -676,12 +677,10 @@@ static inline u32 per_cpu_l2c_id(unsign
   }
   
   #ifdef CONFIG_CPU_SUP_AMD
- -extern u32 amd_get_nodes_per_socket(void);
   extern u32 amd_get_highest_perf(void);
   extern void amd_clear_divider(void);
   extern void amd_check_microcode(void);
   #else
- -static inline u32 amd_get_nodes_per_socket(void)      { return 0; }
   static inline u32 amd_get_highest_perf(void)          { return 0; }
   static inline void amd_clear_divider(void)            { }
   static inline void amd_check_microcode(void)          { }
diff --combined arch/x86/include/asm/smp.h

index 54d6d71e0ecae0f953ae21c408ec8023bb9601a7,31edeab5ee88fa03c84c208c119824127f33fd0a..a35936b512fee639db9930fa64b543687edbf0b8
--- 1/arch/x86/include/asm/smp.h
--- 2/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@@ -8,6 -8,9 +8,6 @@@
   #include <asm/current.h>
   #include <asm/thread_info.h>
   
- -extern int smp_num_siblings;
- -extern unsigned int num_processors;
- -
   DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map);
   DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map);
   DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_die_map);
@@@ -56,11 -59,6 +56,6 @@@ static inline void stop_other_cpus(void
         smp_ops.stop_other_cpus(1);
   }
   
- static inline void smp_prepare_boot_cpu(void)
- {
-       smp_ops.smp_prepare_boot_cpu();
- }
- 
   static inline void smp_prepare_cpus(unsigned int max_cpus)
   {
         smp_ops.smp_prepare_cpus(max_cpus);
@@@ -107,6 -105,7 +102,6 @@@ void cpu_disable_common(void)
   void native_smp_prepare_boot_cpu(void);
   void smp_prepare_cpus_common(void);
   void native_smp_prepare_cpus(unsigned int max_cpus);
- -void calculate_max_logical_packages(void);
   void native_smp_cpus_done(unsigned int max_cpus);
   int common_cpu_up(unsigned int cpunum, struct task_struct *tidle);
   int native_kick_ap(unsigned int cpu, struct task_struct *tidle);
@@@ -170,6 -169,8 +165,6 @@@ static inline struct cpumask *cpu_llc_s
   }
   #endif /* CONFIG_SMP */
   
- -extern unsigned disabled_cpus;
- -
   #ifdef CONFIG_DEBUG_NMI_SELFTEST
   extern void nmi_selftest(void);
   #else
diff --combined arch/x86/kernel/cpu/common.c

index e5d7dcaea2093c0d06f2700f93c34f9b398dac98,6057a9ecac9345a3b49e4373b19c0c26fb803825..6756025b35cae1d9f6cd5a44bf0fde2d96d70f9d
--- 1/arch/x86/kernel/cpu/common.c
--- 2/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@@ -61,7 -61,6 +61,7 @@@
   #include <asm/microcode.h>
   #include <asm/intel-family.h>
   #include <asm/cpu_device_id.h>
+ +#include <asm/fred.h>
   #include <asm/uv/uv.h>
   #include <asm/ia32.h>
   #include <asm/set_memory.h>
@@@ -71,23 -70,14 +71,26 @@@
   
   #include "cpu.h"
   
+ DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
+ EXPORT_PER_CPU_SYMBOL(cpu_info);
+ 
   u32 elf_hwcap2 __read_mostly;
   
   /* Number of siblings per CPU package */
- -int smp_num_siblings = 1;
- -EXPORT_SYMBOL(smp_num_siblings);
+ +unsigned int __max_threads_per_core __ro_after_init = 1;
+ +EXPORT_SYMBOL(__max_threads_per_core);
+ +
+ +unsigned int __max_dies_per_package __ro_after_init = 1;
+ +EXPORT_SYMBOL(__max_dies_per_package);
+ +
+ +unsigned int __max_logical_packages __ro_after_init = 1;
+ +EXPORT_SYMBOL(__max_logical_packages);
+ +
+ +unsigned int __num_cores_per_package __ro_after_init = 1;
+ +EXPORT_SYMBOL(__num_cores_per_package);
+ +
+ +unsigned int __num_threads_per_package __ro_after_init = 1;
+ +EXPORT_SYMBOL(__num_threads_per_package);
   
   static struct ppin_info {
         int     feature;
@@@ -395,8 -385,9 +398,8 @@@ out
   }
   
   /* These bits should not change their value after CPU init is finished. */
- -static const unsigned long cr4_pinned_mask =
- -      X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP |
- -      X86_CR4_FSGSBASE | X86_CR4_CET;
+ +static const unsigned long cr4_pinned_mask = X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP |
+ +                                           X86_CR4_FSGSBASE | X86_CR4_CET | X86_CR4_FRED;
   static DEFINE_STATIC_KEY_FALSE_RO(cr_pinning);
   static unsigned long cr4_pinned_bits __ro_after_init;
   
@@@ -802,6 -793,19 +805,6 @@@ static void get_model_name(struct cpuin
         *(s + 1) = '\0';
   }
   
- -void detect_num_cpu_cores(struct cpuinfo_x86 *c)
- -{
- -      unsigned int eax, ebx, ecx, edx;
- -
- -      c->x86_max_cores = 1;
- -      if (!IS_ENABLED(CONFIG_SMP) || c->cpuid_level < 4)
- -              return;
- -
- -      cpuid_count(4, 0, &eax, &ebx, &ecx, &edx);
- -      if (eax & 0x1f)
- -              c->x86_max_cores = (eax >> 26) + 1;
- -}
- -
   void cpu_detect_cache_sizes(struct cpuinfo_x86 *c)
   {
         unsigned int n, dummy, ebx, ecx, edx, l2size;
@@@ -863,6 -867,51 +866,6 @@@ static void cpu_detect_tlb(struct cpuin
                 tlb_lld_4m[ENTRIES], tlb_lld_1g[ENTRIES]);
   }
   
- -int detect_ht_early(struct cpuinfo_x86 *c)
- -{
- -#ifdef CONFIG_SMP
- -      u32 eax, ebx, ecx, edx;
- -
- -      if (!cpu_has(c, X86_FEATURE_HT))
- -              return -1;
- -
- -      if (cpu_has(c, X86_FEATURE_CMP_LEGACY))
- -              return -1;
- -
- -      if (cpu_has(c, X86_FEATURE_XTOPOLOGY))
- -              return -1;
- -
- -      cpuid(1, &eax, &ebx, &ecx, &edx);
- -
- -      smp_num_siblings = (ebx & 0xff0000) >> 16;
- -      if (smp_num_siblings == 1)
- -              pr_info_once("CPU0: Hyper-Threading is disabled\n");
- -#endif
- -      return 0;
- -}
- -
- -void detect_ht(struct cpuinfo_x86 *c)
- -{
- -#ifdef CONFIG_SMP
- -      int index_msb, core_bits;
- -
- -      if (detect_ht_early(c) < 0)
- -              return;
- -
- -      index_msb = get_count_order(smp_num_siblings);
- -      c->topo.pkg_id = apic->phys_pkg_id(c->topo.initial_apicid, index_msb);
- -
- -      smp_num_siblings = smp_num_siblings / c->x86_max_cores;
- -
- -      index_msb = get_count_order(smp_num_siblings);
- -
- -      core_bits = get_count_order(c->x86_max_cores);
- -
- -      c->topo.core_id = apic->phys_pkg_id(c->topo.initial_apicid, index_msb) &
- -              ((1 << core_bits) - 1);
- -#endif
- -}
- -
   static void get_cpu_vendor(struct cpuinfo_x86 *c)
   {
         char *v = c->x86_vendor_id;
@@@ -1309,13 -1358,8 +1312,13 @@@ static void __init cpu_set_bug_bits(str
         /*
          * AMD's AutoIBRS is equivalent to Intel's eIBRS - use the Intel feature
          * flag and protect from vendor-specific bugs via the whitelist.
+ +       *
+ +       * Don't use AutoIBRS when SNP is enabled because it degrades host
+ +       * userspace indirect branch performance.
          */
- -      if ((ia32_cap & ARCH_CAP_IBRS_ALL) || cpu_has(c, X86_FEATURE_AUTOIBRS)) {
+ +      if ((ia32_cap & ARCH_CAP_IBRS_ALL) ||
+ +          (cpu_has(c, X86_FEATURE_AUTOIBRS) &&
+ +           !cpu_feature_enabled(X86_FEATURE_SEV_SNP))) {
                 setup_force_cpu_cap(X86_FEATURE_IBRS_ENHANCED);
                 if (!cpu_matches(cpu_vuln_whitelist, NO_EIBRS_PBRSB) &&
                     !(ia32_cap & ARCH_CAP_PBRSB_NO))
@@@ -1551,8 -1595,6 +1554,8 @@@ static void __init early_identify_cpu(s
                 get_cpu_address_sizes(c);
                 cpu_parse_early_param();
   
+ +              cpu_init_topology(c);
+ +
                 if (this_cpu->c_early_init)
                         this_cpu->c_early_init(c);
   
@@@ -1564,7 -1606,6 +1567,7 @@@
         } else {
                 setup_clear_cpu_cap(X86_FEATURE_CPUID);
                 get_cpu_address_sizes(c);
+ +              cpu_init_topology(c);
         }
   
         setup_force_cpu_cap(X86_FEATURE_ALWAYS);
@@@ -1710,6 -1751,18 +1713,6 @@@ static void generic_identify(struct cpu
   
         get_cpu_address_sizes(c);
   
- -      if (c->cpuid_level >= 0x00000001) {
- -              c->topo.initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF;
- -#ifdef CONFIG_X86_32
- -# ifdef CONFIG_SMP
- -              c->topo.apicid = apic->phys_pkg_id(c->topo.initial_apicid, 0);
- -# else
- -              c->topo.apicid = c->topo.initial_apicid;
- -# endif
- -#endif
- -              c->topo.pkg_id = c->topo.initial_apicid;
- -      }
- -
         get_model_name(c); /* Default name */
   
         /*
@@@ -1730,6 -1783,29 +1733,6 @@@
   #endif
   }
   
- -/*
- - * Validate that ACPI/mptables have the same information about the
- - * effective APIC id and update the package map.
- - */
- -static void validate_apic_and_package_id(struct cpuinfo_x86 *c)
- -{
- -#ifdef CONFIG_SMP
- -      unsigned int cpu = smp_processor_id();
- -      u32 apicid;
- -
- -      apicid = apic->cpu_present_to_apicid(cpu);
- -
- -      if (apicid != c->topo.apicid) {
- -              pr_err(FW_BUG "CPU%u: APIC id mismatch. Firmware: %x APIC: %x\n",
- -                     cpu, apicid, c->topo.initial_apicid);
- -      }
- -      BUG_ON(topology_update_package_map(c->topo.pkg_id, cpu));
- -      BUG_ON(topology_update_die_map(c->topo.die_id, cpu));
- -#else
- -      c->topo.logical_pkg_id = 0;
- -#endif
- -}
- -
   /*
    * This does the hard work of actually picking apart the CPU stuff...
    */
@@@ -1743,6 -1819,11 +1746,6 @@@ static void identify_cpu(struct cpuinfo
         c->x86_model = c->x86_stepping = 0;     /* So far unknown... */
         c->x86_vendor_id[0] = '\0'; /* Unset */
         c->x86_model_id[0] = '\0';  /* Unset */
- -      c->x86_max_cores = 1;
- -      c->x86_coreid_bits = 0;
- -      c->topo.cu_id = 0xff;
- -      c->topo.llc_id = BAD_APICID;
- -      c->topo.l2c_id = BAD_APICID;
   #ifdef CONFIG_X86_64
         c->x86_clflush_size = 64;
         c->x86_phys_bits = 36;
@@@ -1761,14 -1842,17 +1764,14 @@@
   
         generic_identify(c);
   
+ +      cpu_parse_topology(c);
+ +
         if (this_cpu->c_identify)
                 this_cpu->c_identify(c);
   
         /* Clear/Set all flags overridden by options, after probe */
         apply_forced_caps(c);
   
- -#ifdef CONFIG_X86_64
- -      c->topo.apicid = apic->phys_pkg_id(c->topo.initial_apicid, 0);
- -#endif
- -
- -
         /*
          * Set default APIC and TSC_DEADLINE MSR fencing flag. AMD and
          * Hygon will clear it in ->c_init() below.
@@@ -1822,6 -1906,10 +1825,6 @@@
                                 c->x86, c->x86_model);
         }
   
- -#ifdef CONFIG_X86_64
- -      detect_ht(c);
- -#endif
- -
         x86_init_rdrand(c);
         setup_pku(c);
         setup_cet(c);
@@@ -1913,6 -2001,7 +1916,6 @@@ void identify_secondary_cpu(struct cpui
   #ifdef CONFIG_X86_32
         enable_sep_cpu();
   #endif
- -      validate_apic_and_package_id(c);
         x86_spec_ctrl_setup_ap();
         update_srbds_msr();
         if (boot_cpu_has_bug(X86_BUG_GDS))
@@@ -1981,8 -2070,10 +1984,8 @@@ static void wrmsrl_cstar(unsigned long 
                 wrmsrl(MSR_CSTAR, val);
   }
   
- -/* May not be marked __init: used by software suspend */
- -void syscall_init(void)
+ +static inline void idt_syscall_init(void)
   {
- -      wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS);
         wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
   
         if (ia32_enabled()) {
@@@ -2016,23 -2107,6 +2019,23 @@@
                X86_EFLAGS_AC|X86_EFLAGS_ID);
   }
   
+ +/* May not be marked __init: used by software suspend */
+ +void syscall_init(void)
+ +{
+ +      /* The default user and kernel segments */
+ +      wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS);
+ +
+ +      /*
+ +       * Except the IA32_STAR MSR, there is NO need to setup SYSCALL and
+ +       * SYSENTER MSRs for FRED, because FRED uses the ring 3 FRED
+ +       * entrypoint for SYSCALL and SYSENTER, and ERETU is the only legit
+ +       * instruction to return to ring 3 (both sysexit and sysret cause
+ +       * #UD when FRED is enabled).
+ +       */
+ +      if (!cpu_feature_enabled(X86_FEATURE_FRED))
+ +              idt_syscall_init();
+ +}
+ +
   #else /* CONFIG_X86_64 */
   
   #ifdef CONFIG_STACKPROTECTOR
@@@ -2136,9 -2210,8 +2139,9 @@@ void cpu_init_exception_handling(void
         /* paranoid_entry() gets the CPU number from the GDT */
         setup_getcpu(cpu);
   
- -      /* IST vectors need TSS to be set up. */
- -      tss_setup_ist(tss);
+ +      /* For IDT mode, IST vectors need to be set in TSS. */
+ +      if (!cpu_feature_enabled(X86_FEATURE_FRED))
+ +              tss_setup_ist(tss);
         tss_setup_io_bitmap(tss);
         set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
   
@@@ -2147,10 -2220,8 +2150,10 @@@
         /* GHCB needs to be setup to handle #VC. */
         setup_ghcb();
   
- -      /* Finally load the IDT */
- -      load_current_idt();
+ +      if (cpu_feature_enabled(X86_FEATURE_FRED))
+ +              cpu_init_fred_exceptions();
+ +      else
+ +              load_current_idt();
   }
   
   /*
@@@ -2279,7 -2350,7 +2282,7 @@@ void __init arch_cpu_finalize_init(void
          * identify_boot_cpu() initialized SMT support information, let the
          * core code know.
          */
- -      cpu_smt_set_num_threads(smp_num_siblings, smp_num_siblings);
+ +      cpu_smt_set_num_threads(__max_threads_per_core, __max_threads_per_core);
   
         if (!IS_ENABLED(CONFIG_SMP)) {
                 pr_info("CPU: ");
diff --combined arch/x86/kernel/nmi.c

index 56e7a9e2737a017410e03dbca5b616d8925b7ae9,3de0772c65e381f6c66b8370ddb9d84aa086c7f5..9a5b372c706fccb15aea2375a5424461a201d716
--- 1/arch/x86/kernel/nmi.c
--- 2/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@@ -35,7 -35,6 +35,7 @@@
   #include <asm/nospec-branch.h>
   #include <asm/microcode.h>
   #include <asm/sev.h>
+ +#include <asm/fred.h>
   
   #define CREATE_TRACE_POINTS
   #include <trace/events/nmi.h>
@@@ -304,13 -303,13 +304,13 @@@ unknown_nmi_error(unsigned char reason
   
         __this_cpu_add(nmi_stats.unknown, 1);
   
- -      pr_emerg("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
- -               reason, smp_processor_id());
+ +      pr_emerg_ratelimited("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
+ +                           reason, smp_processor_id());
   
         if (unknown_nmi_panic || panic_on_unrecovered_nmi)
                 nmi_panic(regs, "NMI: Not continuing");
   
- -      pr_emerg("Dazed and confused, but trying to continue\n");
+ +      pr_emerg_ratelimited("Dazed and confused, but trying to continue\n");
   }
   NOKPROBE_SYMBOL(unknown_nmi_error);
   
@@@ -503,7 -502,7 +503,7 @@@ DEFINE_IDTENTRY_RAW(exc_nmi
         if (IS_ENABLED(CONFIG_NMI_CHECK_CPU))
                 raw_atomic_long_inc(&nsp->idt_calls);
   
-       if (IS_ENABLED(CONFIG_SMP) && arch_cpu_is_offline(smp_processor_id())) {
+       if (arch_cpu_is_offline(smp_processor_id())) {
                 if (microcode_nmi_handler_enabled())
                         microcode_offline_nmi_handler();
                 return;
@@@ -637,7 -636,7 +637,7 @@@ void nmi_backtrace_stall_check(const st
                         msgp = nmi_check_stall_msg[idx];
                         if (nsp->idt_ignored_snap != READ_ONCE(nsp->idt_ignored) && (idx & 0x1))
                                 modp = ", but OK because ignore_nmis was set";
- -                      if (nmi_seq & ~0x1)
+ +                      if (nmi_seq & 0x1)
                                 msghp = " (CPU currently in NMI handler function)";
                         else if (nsp->idt_nmi_seq_snap + 1 == nmi_seq)
                                 msghp = " (CPU exited one NMI handler function)";
@@@ -649,47 -648,6 +649,47 @@@
   
   #endif
   
+ +#ifdef CONFIG_X86_FRED
+ +/*
+ + * With FRED, CR2/DR6 is pushed to #PF/#DB stack frame during FRED
+ + * event delivery, i.e., there is no problem of transient states.
+ + * And NMI unblocking only happens when the stack frame indicates
+ + * that so should happen.
+ + *
+ + * Thus, the NMI entry stub for FRED is really straightforward and
+ + * as simple as most exception handlers. As such, #DB is allowed
+ + * during NMI handling.
+ + */
+ +DEFINE_FREDENTRY_NMI(exc_nmi)
+ +{
+ +      irqentry_state_t irq_state;
+ +
+ +      if (arch_cpu_is_offline(smp_processor_id())) {
+ +              if (microcode_nmi_handler_enabled())
+ +                      microcode_offline_nmi_handler();
+ +              return;
+ +      }
+ +
+ +      /*
+ +       * Save CR2 for eventual restore to cover the case where the NMI
+ +       * hits the VMENTER/VMEXIT region where guest CR2 is life. This
+ +       * prevents guest state corruption in case that the NMI handler
+ +       * takes a page fault.
+ +       */
+ +      this_cpu_write(nmi_cr2, read_cr2());
+ +
+ +      irq_state = irqentry_nmi_enter(regs);
+ +
+ +      inc_irq_stat(__nmi_count);
+ +      default_do_nmi(regs);
+ +
+ +      irqentry_nmi_exit(regs, irq_state);
+ +
+ +      if (unlikely(this_cpu_read(nmi_cr2) != read_cr2()))
+ +              write_cr2(this_cpu_read(nmi_cr2));
+ +}
+ +#endif
+ +
   void stop_nmi(void)
   {
         ignore_nmis++;
diff --combined arch/x86/kernel/setup.c

index 4e320d4d389847c2b8054039458f7b23cfecbaec,8f669d3be44589e0a1f4ec40c145e8c6b9484309..46d5a8c520ad4aa165b72036d43c62135e83f037
--- 1/arch/x86/kernel/setup.c
--- 2/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@@ -970,8 -970,10 +970,8 @@@ void __init setup_arch(char **cmdline_p
         high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;
   #endif
   
- -      /*
- -       * Find and reserve possible boot-time SMP configuration:
- -       */
- -      find_smp_config();
+ +      /* Find and reserve MPTABLE area */
+ +      x86_init.mpparse.find_mptable();
   
         early_alloc_pgt_buf();
   
@@@ -1088,9 -1090,7 +1088,9 @@@
   
         early_platform_quirks();
   
+ +      /* Some platforms need the APIC registered for NUMA configuration */
         early_acpi_boot_init();
+ +      x86_init.mpparse.early_parse_smp_cfg();
   
         x86_flattree_get_config();
   
@@@ -1131,19 -1131,24 +1131,19 @@@
   
         early_quirks();
   
- -      /*
- -       * Read APIC and some other early information from ACPI tables.
- -       */
- -      acpi_boot_init();
- -      x86_dtb_init();
+ +      topology_apply_cmdline_limits_early();
   
         /*
- -       * get boot-time SMP configuration:
+ +       * Parse SMP configuration. Try ACPI first and then the platform
+ +       * specific parser.
          */
- -      get_smp_config();
+ +      acpi_boot_init();
+ +      x86_init.mpparse.parse_smp_cfg();
   
- -      /*
- -       * Systems w/o ACPI and mptables might not have it mapped the local
- -       * APIC yet, but prefill_possible_map() might need to access it.
- -       */
+ +      /* Last opportunity to detect and map the local APIC */
         init_apic_mappings();
   
- -      prefill_possible_map();
+ +      topology_init_possible_cpus();
   
         init_cpu_to_node();
         init_gi_nodes();
@@@ -1206,6 -1211,16 +1206,16 @@@ void __init i386_reserve_resources(void
   
   #endif /* CONFIG_X86_32 */
   
+ #ifndef CONFIG_SMP
+ void __init smp_prepare_boot_cpu(void)
+ {
+       struct cpuinfo_x86 *c = &cpu_data(0);
+ 
+       *c = boot_cpu_data;
+       c->initialized = true;
+ }
+ #endif
+ 
   static struct notifier_block kernel_offset_notifier = {
         .notifier_call = dump_kernel_offset
   };
diff --combined arch/x86/kernel/smpboot.c

index 9c1e1219c28f9e2315cb1d89ff4efbfbe66ec068,37ea8c872e4a60772cad824ebd917d16bd90e3bf..fe355c89f6c112a33d17966d8821b1ae20608055
--- 1/arch/x86/kernel/smpboot.c
--- 2/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@@ -101,10 -101,6 +101,6 @@@ EXPORT_PER_CPU_SYMBOL(cpu_core_map)
   DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_die_map);
   EXPORT_PER_CPU_SYMBOL(cpu_die_map);
   
- /* Per CPU bogomips and other parameters */
- DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
- EXPORT_PER_CPU_SYMBOL(cpu_info);
- 
   /* CPUs which are the primary SMT threads */
   struct cpumask __cpu_primary_thread_mask __read_mostly;
   
@@@ -125,6 -121,25 +121,6 @@@ struct mwait_cpu_dead 
    */
   static DEFINE_PER_CPU_ALIGNED(struct mwait_cpu_dead, mwait_cpu_dead);
   
- -/* Logical package management. */
- -struct logical_maps {
- -      u32     phys_pkg_id;
- -      u32     phys_die_id;
- -      u32     logical_pkg_id;
- -      u32     logical_die_id;
- -};
- -
- -/* Temporary workaround until the full topology mechanics is in place */
- -static DEFINE_PER_CPU_READ_MOSTLY(struct logical_maps, logical_maps) = {
- -      .phys_pkg_id    = U32_MAX,
- -      .phys_die_id    = U32_MAX,
- -};
- -
- -unsigned int __max_logical_packages __read_mostly;
- -EXPORT_SYMBOL(__max_logical_packages);
- -static unsigned int logical_packages __read_mostly;
- -static unsigned int logical_die __read_mostly;
- -
   /* Maximum number of SMT threads on any online core */
   int __read_mostly __max_smt_threads = 1;
   
@@@ -317,11 -332,103 +313,11 @@@ static void notrace start_secondary(voi
         cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
   }
   
- -/**
- - * topology_phys_to_logical_pkg - Map a physical package id to a logical
- - * @phys_pkg: The physical package id to map
- - *
- - * Returns logical package id or -1 if not found
- - */
- -int topology_phys_to_logical_pkg(unsigned int phys_pkg)
- -{
- -      int cpu;
- -
- -      for_each_possible_cpu(cpu) {
- -              if (per_cpu(logical_maps.phys_pkg_id, cpu) == phys_pkg)
- -                      return per_cpu(logical_maps.logical_pkg_id, cpu);
- -      }
- -      return -1;
- -}
- -EXPORT_SYMBOL(topology_phys_to_logical_pkg);
- -
- -/**
- - * topology_phys_to_logical_die - Map a physical die id to logical
- - * @die_id:   The physical die id to map
- - * @cur_cpu:  The CPU for which the mapping is done
- - *
- - * Returns logical die id or -1 if not found
- - */
- -static int topology_phys_to_logical_die(unsigned int die_id, unsigned int cur_cpu)
- -{
- -      int cpu, proc_id = cpu_data(cur_cpu).topo.pkg_id;
- -
- -      for_each_possible_cpu(cpu) {
- -              if (per_cpu(logical_maps.phys_pkg_id, cpu) == proc_id &&
- -                  per_cpu(logical_maps.phys_die_id, cpu) == die_id)
- -                      return per_cpu(logical_maps.logical_die_id, cpu);
- -      }
- -      return -1;
- -}
- -
- -/**
- - * topology_update_package_map - Update the physical to logical package map
- - * @pkg:      The physical package id as retrieved via CPUID
- - * @cpu:      The cpu for which this is updated
- - */
- -int topology_update_package_map(unsigned int pkg, unsigned int cpu)
- -{
- -      int new;
- -
- -      /* Already available somewhere? */
- -      new = topology_phys_to_logical_pkg(pkg);
- -      if (new >= 0)
- -              goto found;
- -
- -      new = logical_packages++;
- -      if (new != pkg) {
- -              pr_info("CPU %u Converting physical %u to logical package %u\n",
- -                      cpu, pkg, new);
- -      }
- -found:
- -      per_cpu(logical_maps.phys_pkg_id, cpu) = pkg;
- -      per_cpu(logical_maps.logical_pkg_id, cpu) = new;
- -      cpu_data(cpu).topo.logical_pkg_id = new;
- -      return 0;
- -}
- -/**
- - * topology_update_die_map - Update the physical to logical die map
- - * @die:      The die id as retrieved via CPUID
- - * @cpu:      The cpu for which this is updated
- - */
- -int topology_update_die_map(unsigned int die, unsigned int cpu)
- -{
- -      int new;
- -
- -      /* Already available somewhere? */
- -      new = topology_phys_to_logical_die(die, cpu);
- -      if (new >= 0)
- -              goto found;
- -
- -      new = logical_die++;
- -      if (new != die) {
- -              pr_info("CPU %u Converting physical %u to logical die %u\n",
- -                      cpu, die, new);
- -      }
- -found:
- -      per_cpu(logical_maps.phys_die_id, cpu) = die;
- -      per_cpu(logical_maps.logical_die_id, cpu) = new;
- -      cpu_data(cpu).topo.logical_die_id = new;
- -      return 0;
- -}
- -
   static void __init smp_store_boot_cpu_info(void)
   {
- -      int id = 0; /* CPU 0 */
- -      struct cpuinfo_x86 *c = &cpu_data(id);
+ +      struct cpuinfo_x86 *c = &cpu_data(0);
   
         *c = boot_cpu_data;
- -      c->cpu_index = id;
- -      topology_update_package_map(c->topo.pkg_id, id);
- -      topology_update_die_map(c->topo.die_id, id);
         c->initialized = true;
   }
   
@@@ -377,7 -484,6 +373,7 @@@ static bool match_smt(struct cpuinfo_x8
   
                 if (c->topo.pkg_id == o->topo.pkg_id &&
                     c->topo.die_id == o->topo.die_id &&
+ +                  c->topo.amd_node_id == o->topo.amd_node_id &&
                     per_cpu_llc_id(cpu1) == per_cpu_llc_id(cpu2)) {
                         if (c->topo.core_id == o->topo.core_id)
                                 return topology_sane(c, o, "smt");
@@@ -399,13 -505,10 +395,13 @@@
   
   static bool match_die(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
   {
- -      if (c->topo.pkg_id == o->topo.pkg_id &&
- -          c->topo.die_id == o->topo.die_id)
- -              return true;
- -      return false;
+ +      if (c->topo.pkg_id != o->topo.pkg_id || c->topo.die_id != o->topo.die_id)
+ +              return false;
+ +
+ +      if (cpu_feature_enabled(X86_FEATURE_TOPOEXT) && topology_amd_nodes_per_pkg() > 1)
+ +              return c->topo.amd_node_id == o->topo.amd_node_id;
+ +
+ +      return true;
   }
   
   static bool match_l2c(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
@@@ -563,8 -666,8 +559,8 @@@ static void __init build_sched_topology
   
   void set_cpu_sibling_map(int cpu)
   {
- -      bool has_smt = smp_num_siblings > 1;
- -      bool has_mp = has_smt || boot_cpu_data.x86_max_cores > 1;
+ +      bool has_smt = __max_threads_per_core > 1;
+ +      bool has_mp = has_smt || topology_num_cores_per_package() > 1;
         struct cpuinfo_x86 *c = &cpu_data(cpu);
         struct cpuinfo_x86 *o;
         int i, threads;
@@@ -961,13 -1064,9 +957,13 @@@ int native_kick_ap(unsigned int cpu, st
   
         pr_debug("++++++++++++++++++++=_---CPU UP  %u\n", cpu);
   
- -      if (apicid == BAD_APICID || !physid_isset(apicid, phys_cpu_present_map) ||
- -          !apic_id_valid(apicid)) {
- -              pr_err("%s: bad cpu %d\n", __func__, cpu);
+ +      if (apicid == BAD_APICID || !apic_id_valid(apicid)) {
+ +              pr_err("CPU %u has invalid APIC ID %x. Aborting bringup\n", cpu, apicid);
+ +              return -EINVAL;
+ +      }
+ +
+ +      if (!test_bit(apicid, phys_cpu_present_map)) {
+ +              pr_err("CPU %u APIC ID %x is not present. Aborting bringup\n", cpu, apicid);
                 return -EINVAL;
         }
   
@@@ -1036,8 -1135,14 +1032,8 @@@ static __init void disable_smp(void
         pr_info("SMP disabled\n");
   
         disable_ioapic_support();
+ +      topology_reset_possible_cpus_up();
   
- -      init_cpu_present(cpumask_of(0));
- -      init_cpu_possible(cpumask_of(0));
- -
- -      if (smp_found_config)
- -              physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
- -      else
- -              physid_set_mask_of_physid(0, &phys_cpu_present_map);
         cpumask_set_cpu(0, topology_sibling_cpumask(0));
         cpumask_set_cpu(0, topology_core_cpumask(0));
         cpumask_set_cpu(0, topology_die_cpumask(0));
@@@ -1078,6 -1183,11 +1074,11 @@@ void __init smp_prepare_cpus_common(voi
         set_cpu_sibling_map(0);
   }
   
+ void __init smp_prepare_boot_cpu(void)
+ {
+       smp_ops.smp_prepare_boot_cpu();
+ }
+ 
   #ifdef CONFIG_X86_64
   /* Establish whether parallel bringup can be supported. */
   bool __init arch_cpuhp_init_parallel_bringup(void)
@@@ -1156,16 -1266,102 +1157,16 @@@ void __init native_smp_prepare_boot_cpu
         native_pv_lock_init();
   }
   
- -void __init calculate_max_logical_packages(void)
- -{
- -      int ncpus;
- -
- -      /*
- -       * Today neither Intel nor AMD support heterogeneous systems so
- -       * extrapolate the boot cpu's data to all packages.
- -       */
- -      ncpus = cpu_data(0).booted_cores * topology_max_smt_threads();
- -      __max_logical_packages = DIV_ROUND_UP(total_cpus, ncpus);
- -      pr_info("Max logical packages: %u\n", __max_logical_packages);
- -}
- -
   void __init native_smp_cpus_done(unsigned int max_cpus)
   {
         pr_debug("Boot done\n");
   
- -      calculate_max_logical_packages();
         build_sched_topology();
         nmi_selftest();
         impress_friends();
         cache_aps_init();
   }
   
- -static int __initdata setup_possible_cpus = -1;
- -static int __init _setup_possible_cpus(char *str)
- -{
- -      get_option(&str, &setup_possible_cpus);
- -      return 0;
- -}
- -early_param("possible_cpus", _setup_possible_cpus);
- -
- -
- -/*
- - * cpu_possible_mask should be static, it cannot change as cpu's
- - * are onlined, or offlined. The reason is per-cpu data-structures
- - * are allocated by some modules at init time, and don't expect to
- - * do this dynamically on cpu arrival/departure.
- - * cpu_present_mask on the other hand can change dynamically.
- - * In case when cpu_hotplug is not compiled, then we resort to current
- - * behaviour, which is cpu_possible == cpu_present.
- - * - Ashok Raj
- - *
- - * Three ways to find out the number of additional hotplug CPUs:
- - * - If the BIOS specified disabled CPUs in ACPI/mptables use that.
- - * - The user can overwrite it with possible_cpus=NUM
- - * - Otherwise don't reserve additional CPUs.
- - * We do this because additional CPUs waste a lot of memory.
- - * -AK
- - */
- -__init void prefill_possible_map(void)
- -{
- -      int i, possible;
- -
- -      i = setup_max_cpus ?: 1;
- -      if (setup_possible_cpus == -1) {
- -              possible = num_processors;
- -#ifdef CONFIG_HOTPLUG_CPU
- -              if (setup_max_cpus)
- -                      possible += disabled_cpus;
- -#else
- -              if (possible > i)
- -                      possible = i;
- -#endif
- -      } else
- -              possible = setup_possible_cpus;
- -
- -      total_cpus = max_t(int, possible, num_processors + disabled_cpus);
- -
- -      /* nr_cpu_ids could be reduced via nr_cpus= */
- -      if (possible > nr_cpu_ids) {
- -              pr_warn("%d Processors exceeds NR_CPUS limit of %u\n",
- -                      possible, nr_cpu_ids);
- -              possible = nr_cpu_ids;
- -      }
- -
- -#ifdef CONFIG_HOTPLUG_CPU
- -      if (!setup_max_cpus)
- -#endif
- -      if (possible > i) {
- -              pr_warn("%d Processors exceeds max_cpus limit of %u\n",
- -                      possible, setup_max_cpus);
- -              possible = i;
- -      }
- -
- -      set_nr_cpu_ids(possible);
- -
- -      pr_info("Allowing %d CPUs, %d hotplug CPUs\n",
- -              possible, max_t(int, possible - num_processors, 0));
- -
- -      reset_cpu_possible_mask();
- -
- -      for (i = 0; i < possible; i++)
- -              set_cpu_possible(i, true);
- -}
- -
   /* correctly size the local cpu masks */
   void __init setup_cpu_local_masks(void)
   {
diff --combined arch/x86/kvm/mmu/mmu.c

index 0544700ca50b8458ad97020bde53ec24432a21c2,3c89d3ebaa3a9d3966a4d5c001beb7090b87da8d..f920f649f4b70bd836f2fe73fe60b5969c2a13a6
--- 1/arch/x86/kvm/mmu/mmu.c
--- 2/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@@ -53,12 -53,11 +53,11 @@@
   #include <asm/cmpxchg.h>
   #include <asm/io.h>
   #include <asm/set_memory.h>
+ #include <asm/spec-ctrl.h>
   #include <asm/vmx.h>
   
   #include "trace.h"
   
- extern bool itlb_multihit_kvm_mitigation;
- 
   static bool nx_hugepage_mitigation_hard_disabled;
   
   int __read_mostly nx_huge_pages = -1;
@@@ -4405,31 -4404,6 +4404,31 @@@ static int kvm_faultin_pfn(struct kvm_v
         fault->mmu_seq = vcpu->kvm->mmu_invalidate_seq;
         smp_rmb();
   
+ +      /*
+ +       * Check for a relevant mmu_notifier invalidation event before getting
+ +       * the pfn from the primary MMU, and before acquiring mmu_lock.
+ +       *
+ +       * For mmu_lock, if there is an in-progress invalidation and the kernel
+ +       * allows preemption, the invalidation task may drop mmu_lock and yield
+ +       * in response to mmu_lock being contended, which is *very* counter-
+ +       * productive as this vCPU can't actually make forward progress until
+ +       * the invalidation completes.
+ +       *
+ +       * Retrying now can also avoid unnessary lock contention in the primary
+ +       * MMU, as the primary MMU doesn't necessarily hold a single lock for
+ +       * the duration of the invalidation, i.e. faulting in a conflicting pfn
+ +       * can cause the invalidation to take longer by holding locks that are
+ +       * needed to complete the invalidation.
+ +       *
+ +       * Do the pre-check even for non-preemtible kernels, i.e. even if KVM
+ +       * will never yield mmu_lock in response to contention, as this vCPU is
+ +       * *guaranteed* to need to retry, i.e. waiting until mmu_lock is held
+ +       * to detect retry guarantees the worst case latency for the vCPU.
+ +       */
+ +      if (fault->slot &&
+ +          mmu_invalidate_retry_gfn_unsafe(vcpu->kvm, fault->mmu_seq, fault->gfn))
+ +              return RET_PF_RETRY;
+ +
         ret = __kvm_faultin_pfn(vcpu, fault);
         if (ret != RET_PF_CONTINUE)
                 return ret;
@@@ -4440,18 -4414,6 +4439,18 @@@
         if (unlikely(!fault->slot))
                 return kvm_handle_noslot_fault(vcpu, fault, access);
   
+ +      /*
+ +       * Check again for a relevant mmu_notifier invalidation event purely to
+ +       * avoid contending mmu_lock.  Most invalidations will be detected by
+ +       * the previous check, but checking is extremely cheap relative to the
+ +       * overall cost of failing to detect the invalidation until after
+ +       * mmu_lock is acquired.
+ +       */
+ +      if (mmu_invalidate_retry_gfn_unsafe(vcpu->kvm, fault->mmu_seq, fault->gfn)) {
+ +              kvm_release_pfn_clean(fault->pfn);
+ +              return RET_PF_RETRY;
+ +      }
+ +
         return RET_PF_CONTINUE;
   }
   
@@@ -4479,11 -4441,6 +4478,11 @@@ static bool is_page_fault_stale(struct 
         if (!sp && kvm_test_request(KVM_REQ_MMU_FREE_OBSOLETE_ROOTS, vcpu))
                 return true;
   
+ +      /*
+ +       * Check for a relevant mmu_notifier invalidation event one last time
+ +       * now that mmu_lock is held, as the "unsafe" checks performed without
+ +       * holding mmu_lock can get false negatives.
+ +       */
         return fault->slot &&
                mmu_invalidate_retry_gfn(vcpu->kvm, fault->mmu_seq, fault->gfn);
   }
diff --combined include/linux/smp.h

index 7398ce99786cfbdf60e2a9516abed58abe954b9f,b84592950149e20409151e3fd02eec692bee8842..fcd61dfe2af331a3a4448da9adb5c507e370a8cc
--- 1/include/linux/smp.h
--- 2/include/linux/smp.h
+++ b/include/linux/smp.h
@@@ -105,6 -105,12 +105,12 @@@ static inline void on_each_cpu_cond(smp
         on_each_cpu_cond_mask(cond_func, func, info, wait, cpu_online_mask);
   }
   
+ /*
+  * Architecture specific boot CPU setup.  Defined as empty weak function in
+  * init/main.c. Architectures can override it.
+  */
+ void smp_prepare_boot_cpu(void);
+ 
   #ifdef CONFIG_SMP
   
   #include <linux/preempt.h>
@@@ -171,12 -177,6 +177,6 @@@ void generic_smp_call_function_single_i
   #define generic_smp_call_function_interrupt \
         generic_smp_call_function_single_interrupt
   
- /*
-  * Mark the boot cpu "online" so that it can call console drivers in
-  * printk() and can access its per-cpu storage.
-  */
- void smp_prepare_boot_cpu(void);
- 
   extern unsigned int setup_max_cpus;
   extern void __init setup_nr_cpu_ids(void);
   extern void __init smp_init(void);
@@@ -203,7 -203,6 +203,6 @@@ static inline void up_smp_call_function
                         (up_smp_call_function(func, info))
   
   static inline void smp_send_reschedule(int cpu) { }
- #define smp_prepare_boot_cpu()                        do {} while (0)
   #define smp_call_function_many(mask, func, info, wait) \
                         (up_smp_call_function(func, info))
   static inline void call_function_init(void) { }
@@@ -218,8 -217,6 +217,8 @@@ smp_call_function_any(const struct cpum
   static inline void kick_all_cpus_sync(void) {  }
   static inline void wake_up_all_idle_cpus(void) {  }
   
+ +#define setup_max_cpus 0
+ +
   #ifdef CONFIG_UP_LATE_INIT
   extern void __init up_late_init(void);
   static inline void smp_init(void) { up_late_init(); }
@@@ -263,7 -260,7 +262,7 @@@ static inline int get_boot_cpu_id(void
    * regular asm read for the stable.
    */
   #ifndef __smp_processor_id
- -#define __smp_processor_id(x) raw_smp_processor_id(x)
+ +#define __smp_processor_id() raw_smp_processor_id()
   #endif
   
   #ifdef CONFIG_DEBUG_PREEMPT
diff --combined init/main.c

index c87280454f55d941d36805a738e66b08117d75ce,d60bc4b23dd47024bd66c8b04f1217bd374b2c80..7dce08198b13352357a1262138c71460da1cb584
--- 1/init/main.c
--- 2/init/main.c
+++ b/init/main.c
@@@ -99,7 -99,6 +99,7 @@@
   #include <linux/init_syscalls.h>
   #include <linux/stackdepot.h>
   #include <linux/randomize_kstack.h>
+ +#include <linux/pidfs.h>
   #include <net/net_namespace.h>
   
   #include <asm/io.h>
@@@ -604,6 -603,7 +604,6 @@@ static int __init rdinit_setup(char *st
   __setup("rdinit=", rdinit_setup);
   
   #ifndef CONFIG_SMP
- -static const unsigned int setup_max_cpus = NR_CPUS;
   static inline void setup_nr_cpu_ids(void) { }
   static inline void smp_prepare_cpus(unsigned int maxcpus) { }
   #endif
@@@ -776,6 -776,10 +776,10 @@@ void __init __weak smp_setup_processor_
   {
   }
   
+ void __init __weak smp_prepare_boot_cpu(void)
+ {
+ }
+ 
   # if THREAD_SIZE >= PAGE_SIZE
   void __init __weak thread_stack_cache_init(void)
   {
@@@ -1059,7 -1063,6 +1063,7 @@@ void start_kernel(void
         seq_file_init();
         proc_root_init();
         nsfs_init();
+ +      pidfs_init();
         cpuset_init();
         cgroup_init();
         taskstats_init_early();
@@@ -1546,7 -1549,6 +1550,7 @@@ static noinline void __init kernel_init
         sched_init_smp();
   
         workqueue_init_topology();
+ +      async_init();
         padata_init();
         page_alloc_init_late();
author	Linus Torvalds <torvalds@linux-foundation.org>
	Tue, 12 Mar 2024 02:37:56 +0000 (19:37 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Tue, 12 Mar 2024 02:37:56 +0000 (19:37 -0700)
		1	2
arch/x86/include/asm/msr.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/include/asm/processor.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/include/asm/smp.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/cpu/common.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/nmi.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/setup.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/smpboot.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/mmu/mmu.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/smp.h	patch \|	diff1 \|	diff2 \|	blob \| history
init/main.c	patch \|	diff1 \|	diff2 \|	blob \| history