#include <uapi/asm/msr.h>
#include <asm/shared/msr.h>
+ #include <linux/percpu.h>
+
struct msr_info {
- u32 msr_no;
- struct msr reg;
- struct msr *msrs;
- int err;
+ u32 msr_no;
+ struct msr reg;
+ struct msr __percpu *msrs;
+ int err;
};
struct msr_regs_info {
: : "c" (msr), "a"(low), "d" (high) : "memory");
}
+/*
+ * WRMSRNS behaves exactly like WRMSR with the only difference being
+ * that it is not a serializing instruction by default.
+ */
+static __always_inline void __wrmsrns(u32 msr, u32 low, u32 high)
+{
+ /* Instruction opcode for WRMSRNS; supported in binutils >= 2.40. */
+ asm volatile("1: .byte 0x0f,0x01,0xc6\n"
+ "2:\n"
+ _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_WRMSR)
+ : : "c" (msr), "a"(low), "d" (high));
+}
+
#define native_rdmsr(msr, val1, val2) \
do { \
u64 __val = __rdmsr((msr)); \
#endif /* !CONFIG_PARAVIRT_XXL */
+static __always_inline void wrmsrns(u32 msr, u64 val)
+{
+ __wrmsrns(msr, val, val >> 32);
+}
+
/*
* 64-bit version of wrmsr_safe():
*/
return wrmsr_safe(msr, (u32)val, (u32)(val >> 32));
}
- struct msr *msrs_alloc(void);
- void msrs_free(struct msr *msrs);
+ struct msr __percpu *msrs_alloc(void);
+ void msrs_free(struct msr __percpu *msrs);
int msr_set_bit(u32 msr, u8 bit);
int msr_clear_bit(u32 msr, u8 bit);
int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h);
int rdmsrl_on_cpu(unsigned int cpu, u32 msr_no, u64 *q);
int wrmsrl_on_cpu(unsigned int cpu, u32 msr_no, u64 q);
- void rdmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs);
- void wrmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs);
+ void rdmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr __percpu *msrs);
+ void wrmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr __percpu *msrs);
int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h);
int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h);
int rdmsrl_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 *q);
return 0;
}
static inline void rdmsr_on_cpus(const struct cpumask *m, u32 msr_no,
- struct msr *msrs)
+ struct msr __percpu *msrs)
{
- rdmsr_on_cpu(0, msr_no, &(msrs[0].l), &(msrs[0].h));
+ rdmsr_on_cpu(0, msr_no, raw_cpu_ptr(&msrs->l), raw_cpu_ptr(&msrs->h));
}
static inline void wrmsr_on_cpus(const struct cpumask *m, u32 msr_no,
- struct msr *msrs)
+ struct msr __percpu *msrs)
{
- wrmsr_on_cpu(0, msr_no, msrs[0].l, msrs[0].h);
+ wrmsr_on_cpu(0, msr_no, raw_cpu_read(msrs->l), raw_cpu_read(msrs->h));
}
static inline int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no,
u32 *l, u32 *h)
#include <asm/page.h>
#include <asm/pgtable_types.h>
#include <asm/percpu.h>
- #include <asm/msr.h>
#include <asm/desc_defs.h>
#include <asm/nops.h>
#include <asm/special_insns.h>
u32 logical_pkg_id;
u32 logical_die_id;
+ // AMD Node ID and Nodes per Package info
+ u32 amd_node_id;
+
// Cache level topology IDs
u32 llc_id;
u32 l2c_id;
#endif
__u8 x86_virt_bits;
__u8 x86_phys_bits;
- /* CPUID returned core id bits: */
- __u8 x86_coreid_bits;
/* Max extended CPUID function supported: */
__u32 extended_cpuid_level;
/* Maximum supported CPUID level, -1=no CPUID: */
unsigned long loops_per_jiffy;
/* protected processor identification number */
u64 ppin;
- /* cpuid returned max cores value: */
- u16 x86_max_cores;
u16 x86_clflush_size;
/* number of cores as seen by the OS: */
u16 booted_cores;
extern __u32 cpu_caps_cleared[NCAPINTS + NBUGINTS];
extern __u32 cpu_caps_set[NCAPINTS + NBUGINTS];
- #ifdef CONFIG_SMP
DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
#define cpu_data(cpu) per_cpu(cpu_info, cpu)
- #else
- #define cpu_info boot_cpu_data
- #define cpu_data(cpu) boot_cpu_data
- #endif
extern const struct seq_operations cpuinfo_op;
extern void cpu_init_exception_handling(void);
extern void cr4_init(void);
- static inline unsigned long get_debugctlmsr(void)
- {
- unsigned long debugctlmsr = 0;
-
- #ifndef CONFIG_X86_DEBUGCTLMSR
- if (boot_cpu_data.x86 < 6)
- return 0;
- #endif
- rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr);
-
- return debugctlmsr;
- }
-
- static inline void update_debugctlmsr(unsigned long debugctlmsr)
- {
- #ifndef CONFIG_X86_DEBUGCTLMSR
- if (boot_cpu_data.x86 < 6)
- return;
- #endif
- wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr);
- }
-
extern void set_task_blockstep(struct task_struct *task, bool on);
/* Boot loader type from the setup header: */
#else
extern unsigned long __end_init_task[];
-#define INIT_THREAD { \
- .sp = (unsigned long)&__end_init_task - sizeof(struct pt_regs), \
+#define INIT_THREAD { \
+ .sp = (unsigned long)&__end_init_task - \
+ TOP_OF_KERNEL_STACK_PADDING - \
+ sizeof(struct pt_regs), \
}
extern unsigned long KSTK_ESP(struct task_struct *task);
}
#ifdef CONFIG_CPU_SUP_AMD
-extern u32 amd_get_nodes_per_socket(void);
extern u32 amd_get_highest_perf(void);
extern void amd_clear_divider(void);
extern void amd_check_microcode(void);
#else
-static inline u32 amd_get_nodes_per_socket(void) { return 0; }
static inline u32 amd_get_highest_perf(void) { return 0; }
static inline void amd_clear_divider(void) { }
static inline void amd_check_microcode(void) { }
#include <asm/current.h>
#include <asm/thread_info.h>
-extern int smp_num_siblings;
-extern unsigned int num_processors;
-
DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map);
DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map);
DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_die_map);
smp_ops.stop_other_cpus(1);
}
- static inline void smp_prepare_boot_cpu(void)
- {
- smp_ops.smp_prepare_boot_cpu();
- }
-
static inline void smp_prepare_cpus(unsigned int max_cpus)
{
smp_ops.smp_prepare_cpus(max_cpus);
void native_smp_prepare_boot_cpu(void);
void smp_prepare_cpus_common(void);
void native_smp_prepare_cpus(unsigned int max_cpus);
-void calculate_max_logical_packages(void);
void native_smp_cpus_done(unsigned int max_cpus);
int common_cpu_up(unsigned int cpunum, struct task_struct *tidle);
int native_kick_ap(unsigned int cpu, struct task_struct *tidle);
}
#endif /* CONFIG_SMP */
-extern unsigned disabled_cpus;
-
#ifdef CONFIG_DEBUG_NMI_SELFTEST
extern void nmi_selftest(void);
#else
#include <asm/microcode.h>
#include <asm/intel-family.h>
#include <asm/cpu_device_id.h>
+#include <asm/fred.h>
#include <asm/uv/uv.h>
#include <asm/ia32.h>
#include <asm/set_memory.h>
#include "cpu.h"
+ DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
+ EXPORT_PER_CPU_SYMBOL(cpu_info);
+
u32 elf_hwcap2 __read_mostly;
/* Number of siblings per CPU package */
-int smp_num_siblings = 1;
-EXPORT_SYMBOL(smp_num_siblings);
+unsigned int __max_threads_per_core __ro_after_init = 1;
+EXPORT_SYMBOL(__max_threads_per_core);
+
+unsigned int __max_dies_per_package __ro_after_init = 1;
+EXPORT_SYMBOL(__max_dies_per_package);
+
+unsigned int __max_logical_packages __ro_after_init = 1;
+EXPORT_SYMBOL(__max_logical_packages);
+
+unsigned int __num_cores_per_package __ro_after_init = 1;
+EXPORT_SYMBOL(__num_cores_per_package);
+
+unsigned int __num_threads_per_package __ro_after_init = 1;
+EXPORT_SYMBOL(__num_threads_per_package);
static struct ppin_info {
int feature;
}
/* These bits should not change their value after CPU init is finished. */
-static const unsigned long cr4_pinned_mask =
- X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP |
- X86_CR4_FSGSBASE | X86_CR4_CET;
+static const unsigned long cr4_pinned_mask = X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP |
+ X86_CR4_FSGSBASE | X86_CR4_CET | X86_CR4_FRED;
static DEFINE_STATIC_KEY_FALSE_RO(cr_pinning);
static unsigned long cr4_pinned_bits __ro_after_init;
*(s + 1) = '\0';
}
-void detect_num_cpu_cores(struct cpuinfo_x86 *c)
-{
- unsigned int eax, ebx, ecx, edx;
-
- c->x86_max_cores = 1;
- if (!IS_ENABLED(CONFIG_SMP) || c->cpuid_level < 4)
- return;
-
- cpuid_count(4, 0, &eax, &ebx, &ecx, &edx);
- if (eax & 0x1f)
- c->x86_max_cores = (eax >> 26) + 1;
-}
-
void cpu_detect_cache_sizes(struct cpuinfo_x86 *c)
{
unsigned int n, dummy, ebx, ecx, edx, l2size;
tlb_lld_4m[ENTRIES], tlb_lld_1g[ENTRIES]);
}
-int detect_ht_early(struct cpuinfo_x86 *c)
-{
-#ifdef CONFIG_SMP
- u32 eax, ebx, ecx, edx;
-
- if (!cpu_has(c, X86_FEATURE_HT))
- return -1;
-
- if (cpu_has(c, X86_FEATURE_CMP_LEGACY))
- return -1;
-
- if (cpu_has(c, X86_FEATURE_XTOPOLOGY))
- return -1;
-
- cpuid(1, &eax, &ebx, &ecx, &edx);
-
- smp_num_siblings = (ebx & 0xff0000) >> 16;
- if (smp_num_siblings == 1)
- pr_info_once("CPU0: Hyper-Threading is disabled\n");
-#endif
- return 0;
-}
-
-void detect_ht(struct cpuinfo_x86 *c)
-{
-#ifdef CONFIG_SMP
- int index_msb, core_bits;
-
- if (detect_ht_early(c) < 0)
- return;
-
- index_msb = get_count_order(smp_num_siblings);
- c->topo.pkg_id = apic->phys_pkg_id(c->topo.initial_apicid, index_msb);
-
- smp_num_siblings = smp_num_siblings / c->x86_max_cores;
-
- index_msb = get_count_order(smp_num_siblings);
-
- core_bits = get_count_order(c->x86_max_cores);
-
- c->topo.core_id = apic->phys_pkg_id(c->topo.initial_apicid, index_msb) &
- ((1 << core_bits) - 1);
-#endif
-}
-
static void get_cpu_vendor(struct cpuinfo_x86 *c)
{
char *v = c->x86_vendor_id;
/*
* AMD's AutoIBRS is equivalent to Intel's eIBRS - use the Intel feature
* flag and protect from vendor-specific bugs via the whitelist.
+ *
+ * Don't use AutoIBRS when SNP is enabled because it degrades host
+ * userspace indirect branch performance.
*/
- if ((ia32_cap & ARCH_CAP_IBRS_ALL) || cpu_has(c, X86_FEATURE_AUTOIBRS)) {
+ if ((ia32_cap & ARCH_CAP_IBRS_ALL) ||
+ (cpu_has(c, X86_FEATURE_AUTOIBRS) &&
+ !cpu_feature_enabled(X86_FEATURE_SEV_SNP))) {
setup_force_cpu_cap(X86_FEATURE_IBRS_ENHANCED);
if (!cpu_matches(cpu_vuln_whitelist, NO_EIBRS_PBRSB) &&
!(ia32_cap & ARCH_CAP_PBRSB_NO))
get_cpu_address_sizes(c);
cpu_parse_early_param();
+ cpu_init_topology(c);
+
if (this_cpu->c_early_init)
this_cpu->c_early_init(c);
} else {
setup_clear_cpu_cap(X86_FEATURE_CPUID);
get_cpu_address_sizes(c);
+ cpu_init_topology(c);
}
setup_force_cpu_cap(X86_FEATURE_ALWAYS);
get_cpu_address_sizes(c);
- if (c->cpuid_level >= 0x00000001) {
- c->topo.initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF;
-#ifdef CONFIG_X86_32
-# ifdef CONFIG_SMP
- c->topo.apicid = apic->phys_pkg_id(c->topo.initial_apicid, 0);
-# else
- c->topo.apicid = c->topo.initial_apicid;
-# endif
-#endif
- c->topo.pkg_id = c->topo.initial_apicid;
- }
-
get_model_name(c); /* Default name */
/*
#endif
}
-/*
- * Validate that ACPI/mptables have the same information about the
- * effective APIC id and update the package map.
- */
-static void validate_apic_and_package_id(struct cpuinfo_x86 *c)
-{
-#ifdef CONFIG_SMP
- unsigned int cpu = smp_processor_id();
- u32 apicid;
-
- apicid = apic->cpu_present_to_apicid(cpu);
-
- if (apicid != c->topo.apicid) {
- pr_err(FW_BUG "CPU%u: APIC id mismatch. Firmware: %x APIC: %x\n",
- cpu, apicid, c->topo.initial_apicid);
- }
- BUG_ON(topology_update_package_map(c->topo.pkg_id, cpu));
- BUG_ON(topology_update_die_map(c->topo.die_id, cpu));
-#else
- c->topo.logical_pkg_id = 0;
-#endif
-}
-
/*
* This does the hard work of actually picking apart the CPU stuff...
*/
c->x86_model = c->x86_stepping = 0; /* So far unknown... */
c->x86_vendor_id[0] = '\0'; /* Unset */
c->x86_model_id[0] = '\0'; /* Unset */
- c->x86_max_cores = 1;
- c->x86_coreid_bits = 0;
- c->topo.cu_id = 0xff;
- c->topo.llc_id = BAD_APICID;
- c->topo.l2c_id = BAD_APICID;
#ifdef CONFIG_X86_64
c->x86_clflush_size = 64;
c->x86_phys_bits = 36;
generic_identify(c);
+ cpu_parse_topology(c);
+
if (this_cpu->c_identify)
this_cpu->c_identify(c);
/* Clear/Set all flags overridden by options, after probe */
apply_forced_caps(c);
-#ifdef CONFIG_X86_64
- c->topo.apicid = apic->phys_pkg_id(c->topo.initial_apicid, 0);
-#endif
-
-
/*
* Set default APIC and TSC_DEADLINE MSR fencing flag. AMD and
* Hygon will clear it in ->c_init() below.
c->x86, c->x86_model);
}
-#ifdef CONFIG_X86_64
- detect_ht(c);
-#endif
-
x86_init_rdrand(c);
setup_pku(c);
setup_cet(c);
#ifdef CONFIG_X86_32
enable_sep_cpu();
#endif
- validate_apic_and_package_id(c);
x86_spec_ctrl_setup_ap();
update_srbds_msr();
if (boot_cpu_has_bug(X86_BUG_GDS))
wrmsrl(MSR_CSTAR, val);
}
-/* May not be marked __init: used by software suspend */
-void syscall_init(void)
+static inline void idt_syscall_init(void)
{
- wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS);
wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
if (ia32_enabled()) {
X86_EFLAGS_AC|X86_EFLAGS_ID);
}
+/* May not be marked __init: used by software suspend */
+void syscall_init(void)
+{
+ /* The default user and kernel segments */
+ wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS);
+
+ /*
+ * Except the IA32_STAR MSR, there is NO need to setup SYSCALL and
+ * SYSENTER MSRs for FRED, because FRED uses the ring 3 FRED
+ * entrypoint for SYSCALL and SYSENTER, and ERETU is the only legit
+ * instruction to return to ring 3 (both sysexit and sysret cause
+ * #UD when FRED is enabled).
+ */
+ if (!cpu_feature_enabled(X86_FEATURE_FRED))
+ idt_syscall_init();
+}
+
#else /* CONFIG_X86_64 */
#ifdef CONFIG_STACKPROTECTOR
/* paranoid_entry() gets the CPU number from the GDT */
setup_getcpu(cpu);
- /* IST vectors need TSS to be set up. */
- tss_setup_ist(tss);
+ /* For IDT mode, IST vectors need to be set in TSS. */
+ if (!cpu_feature_enabled(X86_FEATURE_FRED))
+ tss_setup_ist(tss);
tss_setup_io_bitmap(tss);
set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
/* GHCB needs to be setup to handle #VC. */
setup_ghcb();
- /* Finally load the IDT */
- load_current_idt();
+ if (cpu_feature_enabled(X86_FEATURE_FRED))
+ cpu_init_fred_exceptions();
+ else
+ load_current_idt();
}
/*
* identify_boot_cpu() initialized SMT support information, let the
* core code know.
*/
- cpu_smt_set_num_threads(smp_num_siblings, smp_num_siblings);
+ cpu_smt_set_num_threads(__max_threads_per_core, __max_threads_per_core);
if (!IS_ENABLED(CONFIG_SMP)) {
pr_info("CPU: ");
#include <asm/nospec-branch.h>
#include <asm/microcode.h>
#include <asm/sev.h>
+#include <asm/fred.h>
#define CREATE_TRACE_POINTS
#include <trace/events/nmi.h>
__this_cpu_add(nmi_stats.unknown, 1);
- pr_emerg("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
- reason, smp_processor_id());
+ pr_emerg_ratelimited("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
+ reason, smp_processor_id());
if (unknown_nmi_panic || panic_on_unrecovered_nmi)
nmi_panic(regs, "NMI: Not continuing");
- pr_emerg("Dazed and confused, but trying to continue\n");
+ pr_emerg_ratelimited("Dazed and confused, but trying to continue\n");
}
NOKPROBE_SYMBOL(unknown_nmi_error);
if (IS_ENABLED(CONFIG_NMI_CHECK_CPU))
raw_atomic_long_inc(&nsp->idt_calls);
- if (IS_ENABLED(CONFIG_SMP) && arch_cpu_is_offline(smp_processor_id())) {
+ if (arch_cpu_is_offline(smp_processor_id())) {
if (microcode_nmi_handler_enabled())
microcode_offline_nmi_handler();
return;
msgp = nmi_check_stall_msg[idx];
if (nsp->idt_ignored_snap != READ_ONCE(nsp->idt_ignored) && (idx & 0x1))
modp = ", but OK because ignore_nmis was set";
- if (nmi_seq & ~0x1)
+ if (nmi_seq & 0x1)
msghp = " (CPU currently in NMI handler function)";
else if (nsp->idt_nmi_seq_snap + 1 == nmi_seq)
msghp = " (CPU exited one NMI handler function)";
#endif
+#ifdef CONFIG_X86_FRED
+/*
+ * With FRED, CR2/DR6 is pushed to #PF/#DB stack frame during FRED
+ * event delivery, i.e., there is no problem of transient states.
+ * And NMI unblocking only happens when the stack frame indicates
+ * that so should happen.
+ *
+ * Thus, the NMI entry stub for FRED is really straightforward and
+ * as simple as most exception handlers. As such, #DB is allowed
+ * during NMI handling.
+ */
+DEFINE_FREDENTRY_NMI(exc_nmi)
+{
+ irqentry_state_t irq_state;
+
+ if (arch_cpu_is_offline(smp_processor_id())) {
+ if (microcode_nmi_handler_enabled())
+ microcode_offline_nmi_handler();
+ return;
+ }
+
+ /*
+ * Save CR2 for eventual restore to cover the case where the NMI
+ * hits the VMENTER/VMEXIT region where guest CR2 is life. This
+ * prevents guest state corruption in case that the NMI handler
+ * takes a page fault.
+ */
+ this_cpu_write(nmi_cr2, read_cr2());
+
+ irq_state = irqentry_nmi_enter(regs);
+
+ inc_irq_stat(__nmi_count);
+ default_do_nmi(regs);
+
+ irqentry_nmi_exit(regs, irq_state);
+
+ if (unlikely(this_cpu_read(nmi_cr2) != read_cr2()))
+ write_cr2(this_cpu_read(nmi_cr2));
+}
+#endif
+
void stop_nmi(void)
{
ignore_nmis++;
high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;
#endif
- /*
- * Find and reserve possible boot-time SMP configuration:
- */
- find_smp_config();
+ /* Find and reserve MPTABLE area */
+ x86_init.mpparse.find_mptable();
early_alloc_pgt_buf();
early_platform_quirks();
+ /* Some platforms need the APIC registered for NUMA configuration */
early_acpi_boot_init();
+ x86_init.mpparse.early_parse_smp_cfg();
x86_flattree_get_config();
early_quirks();
- /*
- * Read APIC and some other early information from ACPI tables.
- */
- acpi_boot_init();
- x86_dtb_init();
+ topology_apply_cmdline_limits_early();
/*
- * get boot-time SMP configuration:
+ * Parse SMP configuration. Try ACPI first and then the platform
+ * specific parser.
*/
- get_smp_config();
+ acpi_boot_init();
+ x86_init.mpparse.parse_smp_cfg();
- /*
- * Systems w/o ACPI and mptables might not have it mapped the local
- * APIC yet, but prefill_possible_map() might need to access it.
- */
+ /* Last opportunity to detect and map the local APIC */
init_apic_mappings();
- prefill_possible_map();
+ topology_init_possible_cpus();
init_cpu_to_node();
init_gi_nodes();
#endif /* CONFIG_X86_32 */
+ #ifndef CONFIG_SMP
+ void __init smp_prepare_boot_cpu(void)
+ {
+ struct cpuinfo_x86 *c = &cpu_data(0);
+
+ *c = boot_cpu_data;
+ c->initialized = true;
+ }
+ #endif
+
static struct notifier_block kernel_offset_notifier = {
.notifier_call = dump_kernel_offset
};
DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_die_map);
EXPORT_PER_CPU_SYMBOL(cpu_die_map);
- /* Per CPU bogomips and other parameters */
- DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
- EXPORT_PER_CPU_SYMBOL(cpu_info);
-
/* CPUs which are the primary SMT threads */
struct cpumask __cpu_primary_thread_mask __read_mostly;
*/
static DEFINE_PER_CPU_ALIGNED(struct mwait_cpu_dead, mwait_cpu_dead);
-/* Logical package management. */
-struct logical_maps {
- u32 phys_pkg_id;
- u32 phys_die_id;
- u32 logical_pkg_id;
- u32 logical_die_id;
-};
-
-/* Temporary workaround until the full topology mechanics is in place */
-static DEFINE_PER_CPU_READ_MOSTLY(struct logical_maps, logical_maps) = {
- .phys_pkg_id = U32_MAX,
- .phys_die_id = U32_MAX,
-};
-
-unsigned int __max_logical_packages __read_mostly;
-EXPORT_SYMBOL(__max_logical_packages);
-static unsigned int logical_packages __read_mostly;
-static unsigned int logical_die __read_mostly;
-
/* Maximum number of SMT threads on any online core */
int __read_mostly __max_smt_threads = 1;
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
-/**
- * topology_phys_to_logical_pkg - Map a physical package id to a logical
- * @phys_pkg: The physical package id to map
- *
- * Returns logical package id or -1 if not found
- */
-int topology_phys_to_logical_pkg(unsigned int phys_pkg)
-{
- int cpu;
-
- for_each_possible_cpu(cpu) {
- if (per_cpu(logical_maps.phys_pkg_id, cpu) == phys_pkg)
- return per_cpu(logical_maps.logical_pkg_id, cpu);
- }
- return -1;
-}
-EXPORT_SYMBOL(topology_phys_to_logical_pkg);
-
-/**
- * topology_phys_to_logical_die - Map a physical die id to logical
- * @die_id: The physical die id to map
- * @cur_cpu: The CPU for which the mapping is done
- *
- * Returns logical die id or -1 if not found
- */
-static int topology_phys_to_logical_die(unsigned int die_id, unsigned int cur_cpu)
-{
- int cpu, proc_id = cpu_data(cur_cpu).topo.pkg_id;
-
- for_each_possible_cpu(cpu) {
- if (per_cpu(logical_maps.phys_pkg_id, cpu) == proc_id &&
- per_cpu(logical_maps.phys_die_id, cpu) == die_id)
- return per_cpu(logical_maps.logical_die_id, cpu);
- }
- return -1;
-}
-
-/**
- * topology_update_package_map - Update the physical to logical package map
- * @pkg: The physical package id as retrieved via CPUID
- * @cpu: The cpu for which this is updated
- */
-int topology_update_package_map(unsigned int pkg, unsigned int cpu)
-{
- int new;
-
- /* Already available somewhere? */
- new = topology_phys_to_logical_pkg(pkg);
- if (new >= 0)
- goto found;
-
- new = logical_packages++;
- if (new != pkg) {
- pr_info("CPU %u Converting physical %u to logical package %u\n",
- cpu, pkg, new);
- }
-found:
- per_cpu(logical_maps.phys_pkg_id, cpu) = pkg;
- per_cpu(logical_maps.logical_pkg_id, cpu) = new;
- cpu_data(cpu).topo.logical_pkg_id = new;
- return 0;
-}
-/**
- * topology_update_die_map - Update the physical to logical die map
- * @die: The die id as retrieved via CPUID
- * @cpu: The cpu for which this is updated
- */
-int topology_update_die_map(unsigned int die, unsigned int cpu)
-{
- int new;
-
- /* Already available somewhere? */
- new = topology_phys_to_logical_die(die, cpu);
- if (new >= 0)
- goto found;
-
- new = logical_die++;
- if (new != die) {
- pr_info("CPU %u Converting physical %u to logical die %u\n",
- cpu, die, new);
- }
-found:
- per_cpu(logical_maps.phys_die_id, cpu) = die;
- per_cpu(logical_maps.logical_die_id, cpu) = new;
- cpu_data(cpu).topo.logical_die_id = new;
- return 0;
-}
-
static void __init smp_store_boot_cpu_info(void)
{
- int id = 0; /* CPU 0 */
- struct cpuinfo_x86 *c = &cpu_data(id);
+ struct cpuinfo_x86 *c = &cpu_data(0);
*c = boot_cpu_data;
- c->cpu_index = id;
- topology_update_package_map(c->topo.pkg_id, id);
- topology_update_die_map(c->topo.die_id, id);
c->initialized = true;
}
if (c->topo.pkg_id == o->topo.pkg_id &&
c->topo.die_id == o->topo.die_id &&
+ c->topo.amd_node_id == o->topo.amd_node_id &&
per_cpu_llc_id(cpu1) == per_cpu_llc_id(cpu2)) {
if (c->topo.core_id == o->topo.core_id)
return topology_sane(c, o, "smt");
static bool match_die(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
{
- if (c->topo.pkg_id == o->topo.pkg_id &&
- c->topo.die_id == o->topo.die_id)
- return true;
- return false;
+ if (c->topo.pkg_id != o->topo.pkg_id || c->topo.die_id != o->topo.die_id)
+ return false;
+
+ if (cpu_feature_enabled(X86_FEATURE_TOPOEXT) && topology_amd_nodes_per_pkg() > 1)
+ return c->topo.amd_node_id == o->topo.amd_node_id;
+
+ return true;
}
static bool match_l2c(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
void set_cpu_sibling_map(int cpu)
{
- bool has_smt = smp_num_siblings > 1;
- bool has_mp = has_smt || boot_cpu_data.x86_max_cores > 1;
+ bool has_smt = __max_threads_per_core > 1;
+ bool has_mp = has_smt || topology_num_cores_per_package() > 1;
struct cpuinfo_x86 *c = &cpu_data(cpu);
struct cpuinfo_x86 *o;
int i, threads;
pr_debug("++++++++++++++++++++=_---CPU UP %u\n", cpu);
- if (apicid == BAD_APICID || !physid_isset(apicid, phys_cpu_present_map) ||
- !apic_id_valid(apicid)) {
- pr_err("%s: bad cpu %d\n", __func__, cpu);
+ if (apicid == BAD_APICID || !apic_id_valid(apicid)) {
+ pr_err("CPU %u has invalid APIC ID %x. Aborting bringup\n", cpu, apicid);
+ return -EINVAL;
+ }
+
+ if (!test_bit(apicid, phys_cpu_present_map)) {
+ pr_err("CPU %u APIC ID %x is not present. Aborting bringup\n", cpu, apicid);
return -EINVAL;
}
pr_info("SMP disabled\n");
disable_ioapic_support();
+ topology_reset_possible_cpus_up();
- init_cpu_present(cpumask_of(0));
- init_cpu_possible(cpumask_of(0));
-
- if (smp_found_config)
- physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
- else
- physid_set_mask_of_physid(0, &phys_cpu_present_map);
cpumask_set_cpu(0, topology_sibling_cpumask(0));
cpumask_set_cpu(0, topology_core_cpumask(0));
cpumask_set_cpu(0, topology_die_cpumask(0));
set_cpu_sibling_map(0);
}
+ void __init smp_prepare_boot_cpu(void)
+ {
+ smp_ops.smp_prepare_boot_cpu();
+ }
+
#ifdef CONFIG_X86_64
/* Establish whether parallel bringup can be supported. */
bool __init arch_cpuhp_init_parallel_bringup(void)
native_pv_lock_init();
}
-void __init calculate_max_logical_packages(void)
-{
- int ncpus;
-
- /*
- * Today neither Intel nor AMD support heterogeneous systems so
- * extrapolate the boot cpu's data to all packages.
- */
- ncpus = cpu_data(0).booted_cores * topology_max_smt_threads();
- __max_logical_packages = DIV_ROUND_UP(total_cpus, ncpus);
- pr_info("Max logical packages: %u\n", __max_logical_packages);
-}
-
void __init native_smp_cpus_done(unsigned int max_cpus)
{
pr_debug("Boot done\n");
- calculate_max_logical_packages();
build_sched_topology();
nmi_selftest();
impress_friends();
cache_aps_init();
}
-static int __initdata setup_possible_cpus = -1;
-static int __init _setup_possible_cpus(char *str)
-{
- get_option(&str, &setup_possible_cpus);
- return 0;
-}
-early_param("possible_cpus", _setup_possible_cpus);
-
-
-/*
- * cpu_possible_mask should be static, it cannot change as cpu's
- * are onlined, or offlined. The reason is per-cpu data-structures
- * are allocated by some modules at init time, and don't expect to
- * do this dynamically on cpu arrival/departure.
- * cpu_present_mask on the other hand can change dynamically.
- * In case when cpu_hotplug is not compiled, then we resort to current
- * behaviour, which is cpu_possible == cpu_present.
- * - Ashok Raj
- *
- * Three ways to find out the number of additional hotplug CPUs:
- * - If the BIOS specified disabled CPUs in ACPI/mptables use that.
- * - The user can overwrite it with possible_cpus=NUM
- * - Otherwise don't reserve additional CPUs.
- * We do this because additional CPUs waste a lot of memory.
- * -AK
- */
-__init void prefill_possible_map(void)
-{
- int i, possible;
-
- i = setup_max_cpus ?: 1;
- if (setup_possible_cpus == -1) {
- possible = num_processors;
-#ifdef CONFIG_HOTPLUG_CPU
- if (setup_max_cpus)
- possible += disabled_cpus;
-#else
- if (possible > i)
- possible = i;
-#endif
- } else
- possible = setup_possible_cpus;
-
- total_cpus = max_t(int, possible, num_processors + disabled_cpus);
-
- /* nr_cpu_ids could be reduced via nr_cpus= */
- if (possible > nr_cpu_ids) {
- pr_warn("%d Processors exceeds NR_CPUS limit of %u\n",
- possible, nr_cpu_ids);
- possible = nr_cpu_ids;
- }
-
-#ifdef CONFIG_HOTPLUG_CPU
- if (!setup_max_cpus)
-#endif
- if (possible > i) {
- pr_warn("%d Processors exceeds max_cpus limit of %u\n",
- possible, setup_max_cpus);
- possible = i;
- }
-
- set_nr_cpu_ids(possible);
-
- pr_info("Allowing %d CPUs, %d hotplug CPUs\n",
- possible, max_t(int, possible - num_processors, 0));
-
- reset_cpu_possible_mask();
-
- for (i = 0; i < possible; i++)
- set_cpu_possible(i, true);
-}
-
/* correctly size the local cpu masks */
void __init setup_cpu_local_masks(void)
{
#include <asm/cmpxchg.h>
#include <asm/io.h>
#include <asm/set_memory.h>
+ #include <asm/spec-ctrl.h>
#include <asm/vmx.h>
#include "trace.h"
- extern bool itlb_multihit_kvm_mitigation;
-
static bool nx_hugepage_mitigation_hard_disabled;
int __read_mostly nx_huge_pages = -1;
fault->mmu_seq = vcpu->kvm->mmu_invalidate_seq;
smp_rmb();
+ /*
+ * Check for a relevant mmu_notifier invalidation event before getting
+ * the pfn from the primary MMU, and before acquiring mmu_lock.
+ *
+ * For mmu_lock, if there is an in-progress invalidation and the kernel
+ * allows preemption, the invalidation task may drop mmu_lock and yield
+ * in response to mmu_lock being contended, which is *very* counter-
+ * productive as this vCPU can't actually make forward progress until
+ * the invalidation completes.
+ *
+ * Retrying now can also avoid unnessary lock contention in the primary
+ * MMU, as the primary MMU doesn't necessarily hold a single lock for
+ * the duration of the invalidation, i.e. faulting in a conflicting pfn
+ * can cause the invalidation to take longer by holding locks that are
+ * needed to complete the invalidation.
+ *
+ * Do the pre-check even for non-preemtible kernels, i.e. even if KVM
+ * will never yield mmu_lock in response to contention, as this vCPU is
+ * *guaranteed* to need to retry, i.e. waiting until mmu_lock is held
+ * to detect retry guarantees the worst case latency for the vCPU.
+ */
+ if (fault->slot &&
+ mmu_invalidate_retry_gfn_unsafe(vcpu->kvm, fault->mmu_seq, fault->gfn))
+ return RET_PF_RETRY;
+
ret = __kvm_faultin_pfn(vcpu, fault);
if (ret != RET_PF_CONTINUE)
return ret;
if (unlikely(!fault->slot))
return kvm_handle_noslot_fault(vcpu, fault, access);
+ /*
+ * Check again for a relevant mmu_notifier invalidation event purely to
+ * avoid contending mmu_lock. Most invalidations will be detected by
+ * the previous check, but checking is extremely cheap relative to the
+ * overall cost of failing to detect the invalidation until after
+ * mmu_lock is acquired.
+ */
+ if (mmu_invalidate_retry_gfn_unsafe(vcpu->kvm, fault->mmu_seq, fault->gfn)) {
+ kvm_release_pfn_clean(fault->pfn);
+ return RET_PF_RETRY;
+ }
+
return RET_PF_CONTINUE;
}
if (!sp && kvm_test_request(KVM_REQ_MMU_FREE_OBSOLETE_ROOTS, vcpu))
return true;
+ /*
+ * Check for a relevant mmu_notifier invalidation event one last time
+ * now that mmu_lock is held, as the "unsafe" checks performed without
+ * holding mmu_lock can get false negatives.
+ */
return fault->slot &&
mmu_invalidate_retry_gfn(vcpu->kvm, fault->mmu_seq, fault->gfn);
}
on_each_cpu_cond_mask(cond_func, func, info, wait, cpu_online_mask);
}
+ /*
+ * Architecture specific boot CPU setup. Defined as empty weak function in
+ * init/main.c. Architectures can override it.
+ */
+ void smp_prepare_boot_cpu(void);
+
#ifdef CONFIG_SMP
#include <linux/preempt.h>
#define generic_smp_call_function_interrupt \
generic_smp_call_function_single_interrupt
- /*
- * Mark the boot cpu "online" so that it can call console drivers in
- * printk() and can access its per-cpu storage.
- */
- void smp_prepare_boot_cpu(void);
-
extern unsigned int setup_max_cpus;
extern void __init setup_nr_cpu_ids(void);
extern void __init smp_init(void);
(up_smp_call_function(func, info))
static inline void smp_send_reschedule(int cpu) { }
- #define smp_prepare_boot_cpu() do {} while (0)
#define smp_call_function_many(mask, func, info, wait) \
(up_smp_call_function(func, info))
static inline void call_function_init(void) { }
static inline void kick_all_cpus_sync(void) { }
static inline void wake_up_all_idle_cpus(void) { }
+#define setup_max_cpus 0
+
#ifdef CONFIG_UP_LATE_INIT
extern void __init up_late_init(void);
static inline void smp_init(void) { up_late_init(); }
* regular asm read for the stable.
*/
#ifndef __smp_processor_id
-#define __smp_processor_id(x) raw_smp_processor_id(x)
+#define __smp_processor_id() raw_smp_processor_id()
#endif
#ifdef CONFIG_DEBUG_PREEMPT
#include <linux/init_syscalls.h>
#include <linux/stackdepot.h>
#include <linux/randomize_kstack.h>
+#include <linux/pidfs.h>
#include <net/net_namespace.h>
#include <asm/io.h>
__setup("rdinit=", rdinit_setup);
#ifndef CONFIG_SMP
-static const unsigned int setup_max_cpus = NR_CPUS;
static inline void setup_nr_cpu_ids(void) { }
static inline void smp_prepare_cpus(unsigned int maxcpus) { }
#endif
{
}
+ void __init __weak smp_prepare_boot_cpu(void)
+ {
+ }
+
# if THREAD_SIZE >= PAGE_SIZE
void __init __weak thread_stack_cache_init(void)
{
seq_file_init();
proc_root_init();
nsfs_init();
+ pidfs_init();
cpuset_init();
cgroup_init();
taskstats_init_early();
sched_init_smp();
workqueue_init_topology();
+ async_init();
padata_init();
page_alloc_init_late();