Linux 6.10
[linux-block.git] / arch / x86 / kernel / cpu / intel.c
CommitLineData
b2441318 1// SPDX-License-Identifier: GPL-2.0
1da177e4 2#include <linux/kernel.h>
65fddcfc 3#include <linux/pgtable.h>
1da177e4
LT
4
5#include <linux/string.h>
6#include <linux/bitops.h>
7#include <linux/smp.h>
83ce4009 8#include <linux/sched.h>
e6017571 9#include <linux/sched/clock.h>
b041b525 10#include <linux/semaphore.h>
1da177e4 11#include <linux/thread_info.h>
186f4360 12#include <linux/init.h>
8bdbd962 13#include <linux/uaccess.h>
b041b525 14#include <linux/workqueue.h>
ef4ae6e4 15#include <linux/delay.h>
b041b525 16#include <linux/cpuhotplug.h>
1da177e4 17
cd4d09ec 18#include <asm/cpufeature.h>
1da177e4 19#include <asm/msr.h>
73bdb73f 20#include <asm/bugs.h>
1f442d70 21#include <asm/cpu.h>
08e237fa 22#include <asm/intel-family.h>
82ad097b 23#include <asm/microcode.h>
e16fd002
GA
24#include <asm/hwcap2.h>
25#include <asm/elf.h>
6650cdd9
PZI
26#include <asm/cpu_device_id.h>
27#include <asm/cmdline.h>
d7e94dbd 28#include <asm/traps.h>
923f3a2b 29#include <asm/resctrl.h>
0cd39f46 30#include <asm/numa.h>
9223d0dc 31#include <asm/thermal.h>
1da177e4 32
185f3b9d 33#ifdef CONFIG_X86_64
8bdbd962 34#include <linux/topology.h>
185f3b9d
YL
35#endif
36
1da177e4
LT
37#include "cpu.h"
38
39#ifdef CONFIG_X86_LOCAL_APIC
40#include <asm/mpspec.h>
41#include <asm/apic.h>
1da177e4
LT
42#endif
43
6650cdd9
PZI
44enum split_lock_detect_state {
45 sld_off = 0,
46 sld_warn,
47 sld_fatal,
ef4ae6e4 48 sld_ratelimit,
6650cdd9
PZI
49};
50
51/*
ebb1064e
FY
52 * Default to sld_off because most systems do not support split lock detection.
53 * sld_state_setup() will switch this to sld_warn on systems that support
54 * split lock/bus lock detect, unless there is a command line override.
6650cdd9 55 */
dbaba470 56static enum split_lock_detect_state sld_state __ro_after_init = sld_off;
a6a60741 57static u64 msr_test_ctrl_cache __ro_after_init;
6650cdd9 58
009bce1d
SC
59/*
60 * With a name like MSR_TEST_CTL it should go without saying, but don't touch
61 * MSR_TEST_CTL unless the CPU is one of the whitelisted models. Writing it
62 * on CPUs that do not support SLD can cause fireworks, even when writing '0'.
63 */
64static bool cpu_model_supports_sld __ro_after_init;
65
1e03bff3
RN
66/*
67 * Processors which have self-snooping capability can handle conflicting
68 * memory type across CPUs by snooping its own cache. However, there exists
69 * CPU models in which having conflicting memory types still leads to
70 * unpredictable behavior, machine check errors, or hangs. Clear this
71 * feature to prevent its use on machines with known erratas.
72 */
73static void check_memory_type_self_snoop_errata(struct cpuinfo_x86 *c)
74{
75 switch (c->x86_model) {
76 case INTEL_FAM6_CORE_YONAH:
77 case INTEL_FAM6_CORE2_MEROM:
78 case INTEL_FAM6_CORE2_MEROM_L:
79 case INTEL_FAM6_CORE2_PENRYN:
80 case INTEL_FAM6_CORE2_DUNNINGTON:
81 case INTEL_FAM6_NEHALEM:
82 case INTEL_FAM6_NEHALEM_G:
83 case INTEL_FAM6_NEHALEM_EP:
84 case INTEL_FAM6_NEHALEM_EX:
85 case INTEL_FAM6_WESTMERE:
86 case INTEL_FAM6_WESTMERE_EP:
87 case INTEL_FAM6_SANDYBRIDGE:
88 setup_clear_cpu_cap(X86_FEATURE_SELFSNOOP);
89 }
90}
91
e16fd002
GA
92static bool ring3mwait_disabled __read_mostly;
93
94static int __init ring3mwait_disable(char *__unused)
95{
96 ring3mwait_disabled = true;
12441ccd 97 return 1;
e16fd002
GA
98}
99__setup("ring3mwait=disable", ring3mwait_disable);
100
101static void probe_xeon_phi_r3mwait(struct cpuinfo_x86 *c)
102{
103 /*
104 * Ring 3 MONITOR/MWAIT feature cannot be detected without
105 * cpu model and family comparison.
106 */
4d8bb006 107 if (c->x86 != 6)
e16fd002 108 return;
4d8bb006
PL
109 switch (c->x86_model) {
110 case INTEL_FAM6_XEON_PHI_KNL:
111 case INTEL_FAM6_XEON_PHI_KNM:
112 break;
113 default:
114 return;
115 }
e16fd002 116
e9ea1e7f 117 if (ring3mwait_disabled)
e16fd002 118 return;
e16fd002
GA
119
120 set_cpu_cap(c, X86_FEATURE_RING3MWAIT);
e9ea1e7f
KH
121 this_cpu_or(msr_misc_features_shadow,
122 1UL << MSR_MISC_FEATURES_ENABLES_RING3MWAIT_BIT);
e16fd002
GA
123
124 if (c == &boot_cpu_data)
125 ELF_HWCAP2 |= HWCAP2_RING3MWAIT;
126}
127
a5b29663
DW
128/*
129 * Early microcode releases for the Spectre v2 mitigation were broken.
130 * Information taken from;
e3b3121f 131 * - https://newsroom.intel.com/wp-content/uploads/sites/11/2018/03/microcode-update-guidance.pdf
a5b29663
DW
132 * - https://kb.vmware.com/s/article/52345
133 * - Microcode revisions observed in the wild
134 * - Release note from 20180108 microcode release
135 */
136struct sku_microcode {
137 u8 model;
138 u8 stepping;
139 u32 microcode;
140};
141static const struct sku_microcode spectre_bad_microcodes[] = {
c66f78a6
PZ
142 { INTEL_FAM6_KABYLAKE, 0x0B, 0x80 },
143 { INTEL_FAM6_KABYLAKE, 0x0A, 0x80 },
144 { INTEL_FAM6_KABYLAKE, 0x09, 0x80 },
af239c44
PZ
145 { INTEL_FAM6_KABYLAKE_L, 0x0A, 0x80 },
146 { INTEL_FAM6_KABYLAKE_L, 0x09, 0x80 },
a5b29663
DW
147 { INTEL_FAM6_SKYLAKE_X, 0x03, 0x0100013e },
148 { INTEL_FAM6_SKYLAKE_X, 0x04, 0x0200003c },
c66f78a6 149 { INTEL_FAM6_BROADWELL, 0x04, 0x28 },
5e741407 150 { INTEL_FAM6_BROADWELL_G, 0x01, 0x1b },
5ebb34ed
PZ
151 { INTEL_FAM6_BROADWELL_D, 0x02, 0x14 },
152 { INTEL_FAM6_BROADWELL_D, 0x03, 0x07000011 },
a5b29663 153 { INTEL_FAM6_BROADWELL_X, 0x01, 0x0b000025 },
af239c44 154 { INTEL_FAM6_HASWELL_L, 0x01, 0x21 },
5e741407 155 { INTEL_FAM6_HASWELL_G, 0x01, 0x18 },
c66f78a6 156 { INTEL_FAM6_HASWELL, 0x03, 0x23 },
a5b29663
DW
157 { INTEL_FAM6_HASWELL_X, 0x02, 0x3b },
158 { INTEL_FAM6_HASWELL_X, 0x04, 0x10 },
159 { INTEL_FAM6_IVYBRIDGE_X, 0x04, 0x42a },
a5b29663
DW
160 /* Observed in the wild */
161 { INTEL_FAM6_SANDYBRIDGE_X, 0x06, 0x61b },
162 { INTEL_FAM6_SANDYBRIDGE_X, 0x07, 0x712 },
163};
164
165static bool bad_spectre_microcode(struct cpuinfo_x86 *c)
166{
167 int i;
168
36268223
KRW
169 /*
170 * We know that the hypervisor lie to us on the microcode version so
171 * we may as well hope that it is running the correct version.
172 */
173 if (cpu_has(c, X86_FEATURE_HYPERVISOR))
174 return false;
175
1ab534e8
AK
176 if (c->x86 != 6)
177 return false;
178
a5b29663
DW
179 for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) {
180 if (c->x86_model == spectre_bad_microcodes[i].model &&
b399151c 181 c->x86_stepping == spectre_bad_microcodes[i].stepping)
a5b29663
DW
182 return (c->microcode <= spectre_bad_microcodes[i].microcode);
183 }
184 return false;
185}
186
6890cb1a
PB
187#define MSR_IA32_TME_ACTIVATE 0x982
188
189/* Helpers to access TME_ACTIVATE MSR */
190#define TME_ACTIVATE_LOCKED(x) (x & 0x1)
191#define TME_ACTIVATE_ENABLED(x) (x & 0x2)
192
193#define TME_ACTIVATE_POLICY(x) ((x >> 4) & 0xf) /* Bits 7:4 */
194#define TME_ACTIVATE_POLICY_AES_XTS_128 0
195
196#define TME_ACTIVATE_KEYID_BITS(x) ((x >> 32) & 0xf) /* Bits 35:32 */
197
198#define TME_ACTIVATE_CRYPTO_ALGS(x) ((x >> 48) & 0xffff) /* Bits 63:48 */
199#define TME_ACTIVATE_CRYPTO_AES_XTS_128 1
200
201/* Values for mktme_status (SW only construct) */
202#define MKTME_ENABLED 0
203#define MKTME_DISABLED 1
204#define MKTME_UNINITIALIZED 2
205static int mktme_status = MKTME_UNINITIALIZED;
206
207static void detect_tme_early(struct cpuinfo_x86 *c)
208{
209 u64 tme_activate, tme_policy, tme_crypto_algs;
210 int keyid_bits = 0, nr_keyids = 0;
211 static u64 tme_activate_cpu0 = 0;
212
213 rdmsrl(MSR_IA32_TME_ACTIVATE, tme_activate);
214
215 if (mktme_status != MKTME_UNINITIALIZED) {
216 if (tme_activate != tme_activate_cpu0) {
217 /* Broken BIOS? */
218 pr_err_once("x86/tme: configuration is inconsistent between CPUs\n");
219 pr_err_once("x86/tme: MKTME is not usable\n");
220 mktme_status = MKTME_DISABLED;
221
222 /* Proceed. We may need to exclude bits from x86_phys_bits. */
223 }
224 } else {
225 tme_activate_cpu0 = tme_activate;
226 }
227
228 if (!TME_ACTIVATE_LOCKED(tme_activate) || !TME_ACTIVATE_ENABLED(tme_activate)) {
229 pr_info_once("x86/tme: not enabled by BIOS\n");
230 mktme_status = MKTME_DISABLED;
cd2236c2 231 clear_cpu_cap(c, X86_FEATURE_TME);
6890cb1a
PB
232 return;
233 }
234
235 if (mktme_status != MKTME_UNINITIALIZED)
236 goto detect_keyid_bits;
237
238 pr_info("x86/tme: enabled by BIOS\n");
239
240 tme_policy = TME_ACTIVATE_POLICY(tme_activate);
241 if (tme_policy != TME_ACTIVATE_POLICY_AES_XTS_128)
242 pr_warn("x86/tme: Unknown policy is active: %#llx\n", tme_policy);
243
244 tme_crypto_algs = TME_ACTIVATE_CRYPTO_ALGS(tme_activate);
245 if (!(tme_crypto_algs & TME_ACTIVATE_CRYPTO_AES_XTS_128)) {
246 pr_err("x86/mktme: No known encryption algorithm is supported: %#llx\n",
247 tme_crypto_algs);
248 mktme_status = MKTME_DISABLED;
249 }
250detect_keyid_bits:
251 keyid_bits = TME_ACTIVATE_KEYID_BITS(tme_activate);
252 nr_keyids = (1UL << keyid_bits) - 1;
253 if (nr_keyids) {
254 pr_info_once("x86/mktme: enabled by BIOS\n");
255 pr_info_once("x86/mktme: %d KeyIDs available\n", nr_keyids);
256 } else {
257 pr_info_once("x86/mktme: disabled by BIOS\n");
258 }
259
260 if (mktme_status == MKTME_UNINITIALIZED) {
261 /* MKTME is usable */
262 mktme_status = MKTME_ENABLED;
263 }
264
265 /*
266 * KeyID bits effectively lower the number of physical address
267 * bits. Update cpuinfo_x86::x86_phys_bits accordingly.
268 */
269 c->x86_phys_bits -= keyid_bits;
270}
271
0c2f6d04
TG
272void intel_unlock_cpuid_leafs(struct cpuinfo_x86 *c)
273{
274 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
275 return;
276
277 if (c->x86 < 6 || (c->x86 == 6 && c->x86_model < 0xd))
278 return;
279
280 /*
281 * The BIOS can have limited CPUID to leaf 2, which breaks feature
282 * enumeration. Unlock it and update the maximum leaf info.
283 */
284 if (msr_clear_bit(MSR_IA32_MISC_ENABLE, MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT) > 0)
285 c->cpuid_level = cpuid_eax(0);
286}
287
148f9bb8 288static void early_init_intel(struct cpuinfo_x86 *c)
1da177e4 289{
161ec53c
FY
290 u64 misc_enable;
291
2b16a235
AK
292 if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
293 (c->x86 == 0x6 && c->x86_model >= 0x0e))
294 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
185f3b9d 295
4167709b
BP
296 if (c->x86 >= 6 && !cpu_has(c, X86_FEATURE_IA64))
297 c->microcode = intel_get_microcode_revision();
506ed6b5 298
2961298e 299 /* Now if any of them are set, check the blacklist and clear the lot */
7fcae111
DW
300 if ((cpu_has(c, X86_FEATURE_SPEC_CTRL) ||
301 cpu_has(c, X86_FEATURE_INTEL_STIBP) ||
302 cpu_has(c, X86_FEATURE_IBRS) || cpu_has(c, X86_FEATURE_IBPB) ||
2961298e
DW
303 cpu_has(c, X86_FEATURE_STIBP)) && bad_spectre_microcode(c)) {
304 pr_warn("Intel Spectre v2 broken microcode detected; disabling Speculation Control\n");
7fcae111
DW
305 setup_clear_cpu_cap(X86_FEATURE_IBRS);
306 setup_clear_cpu_cap(X86_FEATURE_IBPB);
307 setup_clear_cpu_cap(X86_FEATURE_STIBP);
308 setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL);
7eb8956a 309 setup_clear_cpu_cap(X86_FEATURE_MSR_SPEC_CTRL);
7fcae111 310 setup_clear_cpu_cap(X86_FEATURE_INTEL_STIBP);
9f65fb29 311 setup_clear_cpu_cap(X86_FEATURE_SSBD);
52817587 312 setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL_SSBD);
a5b29663
DW
313 }
314
7a0fc404
PA
315 /*
316 * Atom erratum AAE44/AAF40/AAG38/AAH41:
317 *
318 * A race condition between speculative fetches and invalidating
319 * a large page. This is worked around in microcode, but we
320 * need the microcode to have already been loaded... so if it is
321 * not, recommend a BIOS update and disable large pages.
322 */
b399151c 323 if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_stepping <= 2 &&
30963c0a 324 c->microcode < 0x20e) {
1b74dde7 325 pr_warn("Atom PSE erratum detected, BIOS microcode update recommended\n");
30963c0a 326 clear_cpu_cap(c, X86_FEATURE_PSE);
7a0fc404
PA
327 }
328
185f3b9d
YL
329#ifdef CONFIG_X86_64
330 set_cpu_cap(c, X86_FEATURE_SYSENTER32);
331#else
332 /* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */
333 if (c->x86 == 15 && c->x86_cache_alignment == 64)
334 c->x86_cache_alignment = 128;
335#endif
40fb1715 336
13c6c532
JB
337 /* CPUID workaround for 0F33/0F34 CPU */
338 if (c->x86 == 0xF && c->x86_model == 0x3
b399151c 339 && (c->x86_stepping == 0x3 || c->x86_stepping == 0x4))
13c6c532
JB
340 c->x86_phys_bits = 36;
341
40fb1715
VP
342 /*
343 * c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate
83ce4009
IM
344 * with P/T states and does not stop in deep C-states.
345 *
346 * It is also reliable across cores and sockets. (but not across
347 * cabinets - we turn it off in that case explicitly.)
40fb1715
VP
348 */
349 if (c->x86_power & (1 << 8)) {
350 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
351 set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
352 }
353
c54fdbb2
FT
354 /* Penwell and Cloverview have the TSC which doesn't sleep on S3 */
355 if (c->x86 == 6) {
356 switch (c->x86_model) {
bba10c5c
RT
357 case INTEL_FAM6_ATOM_SALTWELL_MID:
358 case INTEL_FAM6_ATOM_SALTWELL_TABLET:
359 case INTEL_FAM6_ATOM_SILVERMONT_MID:
0cc5359d 360 case INTEL_FAM6_ATOM_AIRMONT_NP:
c54fdbb2
FT
361 set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC_S3);
362 break;
363 default:
364 break;
365 }
366 }
367
75a04811
PA
368 /*
369 * There is a known erratum on Pentium III and Core Solo
370 * and Core Duo CPUs.
371 * " Page with PAT set to WC while associated MTRR is UC
372 * may consolidate to UC "
373 * Because of this erratum, it is better to stick with
374 * setting WC in MTRR rather than using PAT on these CPUs.
375 *
376 * Enable PAT WC only on P4, Core 2 or later CPUs.
377 */
378 if (c->x86 == 6 && c->x86_model < 15)
379 clear_cpu_cap(c, X86_FEATURE_PAT);
f8561296 380
161ec53c
FY
381 /*
382 * If fast string is not enabled in IA32_MISC_ENABLE for any reason,
383 * clear the fast string and enhanced fast string CPU capabilities.
384 */
385 if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) {
386 rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
387 if (!(misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING)) {
1b74dde7 388 pr_info("Disabled fast string operations\n");
161ec53c
FY
389 setup_clear_cpu_cap(X86_FEATURE_REP_GOOD);
390 setup_clear_cpu_cap(X86_FEATURE_ERMS);
391 }
392 }
ee1b5b16
BD
393
394 /*
395 * Intel Quark Core DevMan_001.pdf section 6.4.11
396 * "The operating system also is required to invalidate (i.e., flush)
397 * the TLB when any changes are made to any of the page table entries.
398 * The operating system must reload CR3 to cause the TLB to be flushed"
399 *
c109bf95 400 * As a result, boot_cpu_has(X86_FEATURE_PGE) in arch/x86/include/asm/tlbflush.h
163b0991 401 * should be false so that __flush_tlb_all() causes CR3 instead of CR4.PGE
c109bf95 402 * to be modified.
ee1b5b16
BD
403 */
404 if (c->x86 == 5 && c->x86_model == 9) {
405 pr_info("Disabling PGE capability bit\n");
406 setup_clear_cpu_cap(X86_FEATURE_PGE);
407 }
1f12e32f 408
1e03bff3 409 check_memory_type_self_snoop_errata(c);
1910ad56 410
6890cb1a
PB
411 /*
412 * Adjust the number of physical bits early because it affects the
413 * valid bits of the MTRR mask registers.
414 */
415 if (cpu_has(c, X86_FEATURE_TME))
416 detect_tme_early(c);
1da177e4
LT
417}
418
923f3a2b
RC
419static void bsp_init_intel(struct cpuinfo_x86 *c)
420{
421 resctrl_cpu_detect(c);
422}
423
185f3b9d 424#ifdef CONFIG_X86_32
1da177e4
LT
425/*
426 * Early probe support logic for ppro memory erratum #50
427 *
428 * This is called before we do cpu ident work
429 */
65eb6b43 430
148f9bb8 431int ppro_with_ram_bug(void)
1da177e4
LT
432{
433 /* Uses data from early_cpu_detect now */
434 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
435 boot_cpu_data.x86 == 6 &&
436 boot_cpu_data.x86_model == 1 &&
b399151c 437 boot_cpu_data.x86_stepping < 8) {
1b74dde7 438 pr_info("Pentium Pro with Errata#50 detected. Taking evasive action.\n");
1da177e4
LT
439 return 1;
440 }
441 return 0;
442}
65eb6b43 443
148f9bb8 444static void intel_smp_check(struct cpuinfo_x86 *c)
1f442d70 445{
1f442d70 446 /* calling is from identify_secondary_cpu() ? */
f6e9456c 447 if (!c->cpu_index)
1f442d70
YL
448 return;
449
450 /*
451 * Mask B, Pentium, but not Pentium MMX
452 */
453 if (c->x86 == 5 &&
b399151c 454 c->x86_stepping >= 1 && c->x86_stepping <= 4 &&
1f442d70
YL
455 c->x86_model <= 3) {
456 /*
457 * Remember we have B step Pentia with bugs
458 */
459 WARN_ONCE(1, "WARNING: SMP operation may be unreliable"
460 "with B stepping processors.\n");
461 }
1f442d70
YL
462}
463
69f2366c
CB
464static int forcepae;
465static int __init forcepae_setup(char *__unused)
466{
467 forcepae = 1;
468 return 1;
469}
470__setup("forcepae", forcepae_setup);
471
148f9bb8 472static void intel_workarounds(struct cpuinfo_x86 *c)
1da177e4 473{
4052704d
YL
474#ifdef CONFIG_X86_F00F_BUG
475 /*
d4e1a0af 476 * All models of Pentium and Pentium with MMX technology CPUs
8bdbd962 477 * have the F0 0F bug, which lets nonprivileged users lock up the
4eefbe79 478 * system. Announce that the fault handler will be checking for it.
d4e1a0af 479 * The Quark is also family 5, but does not have the same bug.
4052704d 480 */
e2604b49 481 clear_cpu_bug(c, X86_BUG_F00F);
fa392794 482 if (c->x86 == 5 && c->x86_model < 9) {
4052704d
YL
483 static int f00f_workaround_enabled;
484
e2604b49 485 set_cpu_bug(c, X86_BUG_F00F);
4052704d 486 if (!f00f_workaround_enabled) {
1b74dde7 487 pr_notice("Intel Pentium with F0 0F bug - workaround enabled.\n");
4052704d
YL
488 f00f_workaround_enabled = 1;
489 }
490 }
491#endif
492
493 /*
494 * SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until
495 * model 3 mask 3
496 */
b399151c 497 if ((c->x86<<8 | c->x86_model<<4 | c->x86_stepping) < 0x633)
4052704d
YL
498 clear_cpu_cap(c, X86_FEATURE_SEP);
499
69f2366c
CB
500 /*
501 * PAE CPUID issue: many Pentium M report no PAE but may have a
502 * functionally usable PAE implementation.
503 * Forcefully enable PAE if kernel parameter "forcepae" is present.
504 */
505 if (forcepae) {
1b74dde7 506 pr_warn("PAE forced!\n");
69f2366c
CB
507 set_cpu_cap(c, X86_FEATURE_PAE);
508 add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_NOW_UNRELIABLE);
509 }
510
4052704d 511 /*
f0133acc 512 * P4 Xeon erratum 037 workaround.
4052704d
YL
513 * Hardware prefetcher may cause stale data to be loaded into the cache.
514 */
b399151c 515 if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_stepping == 1)) {
0b131be8 516 if (msr_set_bit(MSR_IA32_MISC_ENABLE,
f0133acc 517 MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT) > 0) {
c0a639ad 518 pr_info("CPU: C0 stepping P4 Xeon detected.\n");
f0133acc 519 pr_info("CPU: Disabling hardware prefetching (Erratum 037)\n");
1da177e4
LT
520 }
521 }
1da177e4 522
4052704d
YL
523 /*
524 * See if we have a good local APIC by checking for buggy Pentia,
525 * i.e. all B steppings and the C2 stepping of P54C when using their
526 * integrated APIC (see 11AP erratum in "Pentium Processor
527 * Specification Update").
528 */
93984fbd 529 if (boot_cpu_has(X86_FEATURE_APIC) && (c->x86<<8 | c->x86_model<<4) == 0x520 &&
b399151c 530 (c->x86_stepping < 0x6 || c->x86_stepping == 0xb))
9b13a93d 531 set_cpu_bug(c, X86_BUG_11AP);
185f3b9d 532
185f3b9d 533
4052704d 534#ifdef CONFIG_X86_INTEL_USERCOPY
185f3b9d 535 /*
4052704d 536 * Set up the preferred alignment for movsl bulk memory moves
185f3b9d 537 */
4052704d
YL
538 switch (c->x86) {
539 case 4: /* 486: untested */
540 break;
541 case 5: /* Old Pentia: untested */
542 break;
543 case 6: /* PII/PIII only like movsl with 8-byte alignment */
544 movsl_mask.mask = 7;
545 break;
546 case 15: /* P4 is OK down to 8-byte alignment */
547 movsl_mask.mask = 7;
548 break;
549 }
185f3b9d 550#endif
4052704d 551
1f442d70 552 intel_smp_check(c);
4052704d
YL
553}
554#else
148f9bb8 555static void intel_workarounds(struct cpuinfo_x86 *c)
4052704d
YL
556{
557}
185f3b9d
YL
558#endif
559
148f9bb8 560static void srat_detect_node(struct cpuinfo_x86 *c)
185f3b9d 561{
645a7919 562#ifdef CONFIG_NUMA
185f3b9d
YL
563 unsigned node;
564 int cpu = smp_processor_id();
185f3b9d
YL
565
566 /* Don't do the funky fallback heuristics the AMD version employs
567 for now. */
bbc9e2f4 568 node = numa_cpu_node(cpu);
50f2d7f6 569 if (node == NUMA_NO_NODE || !node_online(node)) {
d9c2d5ac
YL
570 /* reuse the value from init_cpu_to_node() */
571 node = cpu_to_node(cpu);
572 }
185f3b9d 573 numa_set_node(cpu, node);
185f3b9d
YL
574#endif
575}
576
90218ac7
KH
577static void init_cpuid_fault(struct cpuinfo_x86 *c)
578{
579 u64 msr;
580
581 if (!rdmsrl_safe(MSR_PLATFORM_INFO, &msr)) {
582 if (msr & MSR_PLATFORM_INFO_CPUID_FAULT)
583 set_cpu_cap(c, X86_FEATURE_CPUID_FAULT);
584 }
585}
586
587static void init_intel_misc_features(struct cpuinfo_x86 *c)
588{
589 u64 msr;
590
591 if (rdmsrl_safe(MSR_MISC_FEATURES_ENABLES, &msr))
592 return;
593
e9ea1e7f
KH
594 /* Clear all MISC features */
595 this_cpu_write(msr_misc_features_shadow, 0);
596
597 /* Check features and update capabilities and shadow control bits */
90218ac7
KH
598 init_cpuid_fault(c);
599 probe_xeon_phi_r3mwait(c);
e9ea1e7f
KH
600
601 msr = this_cpu_read(msr_misc_features_shadow);
602 wrmsrl(MSR_MISC_FEATURES_ENABLES, msr);
90218ac7
KH
603}
604
6650cdd9 605static void split_lock_init(void);
ebb1064e 606static void bus_lock_init(void);
6650cdd9 607
148f9bb8 608static void init_intel(struct cpuinfo_x86 *c)
1da177e4 609{
2b16a235
AK
610 early_init_intel(c);
611
4052704d 612 intel_workarounds(c);
1da177e4 613
807e9bc8 614 init_intel_cacheinfo(c);
aece118e 615
65eb6b43 616 if (c->cpuid_level > 9) {
0080e667
VP
617 unsigned eax = cpuid_eax(10);
618 /* Check for version and the number of counters */
619 if ((eax & 0xff) && (((eax>>8) & 0xff) > 1))
d0e95ebd 620 set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON);
0080e667 621 }
1da177e4 622
054efb64 623 if (cpu_has(c, X86_FEATURE_XMM2))
4052704d 624 set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
362f924b
BP
625
626 if (boot_cpu_has(X86_FEATURE_DS)) {
807e9bc8
DW
627 unsigned int l1, l2;
628
4052704d 629 rdmsr(MSR_IA32_MISC_ENABLE, l1, l2);
3f2adf00 630 if (!(l1 & MSR_IA32_MISC_ENABLE_BTS_UNAVAIL))
4052704d 631 set_cpu_cap(c, X86_FEATURE_BTS);
3f2adf00 632 if (!(l1 & MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL))
4052704d 633 set_cpu_cap(c, X86_FEATURE_PEBS);
4052704d 634 }
1da177e4 635
906bf7fd 636 if (c->x86 == 6 && boot_cpu_has(X86_FEATURE_CLFLUSH) &&
40e2d7f9 637 (c->x86_model == 29 || c->x86_model == 46 || c->x86_model == 47))
9b13a93d 638 set_cpu_bug(c, X86_BUG_CLFLUSH_MONITOR);
e736ad54 639
08e237fa
PZ
640 if (c->x86 == 6 && boot_cpu_has(X86_FEATURE_MWAIT) &&
641 ((c->x86_model == INTEL_FAM6_ATOM_GOLDMONT)))
642 set_cpu_bug(c, X86_BUG_MONITOR);
643
4052704d
YL
644#ifdef CONFIG_X86_64
645 if (c->x86 == 15)
646 c->x86_cache_alignment = c->x86_clflush_size * 2;
647 if (c->x86 == 6)
648 set_cpu_cap(c, X86_FEATURE_REP_GOOD);
649#else
65eb6b43
PC
650 /*
651 * Names for the Pentium II/Celeron processors
652 * detectable only by also checking the cache size.
653 * Dixon is NOT a Celeron.
654 */
1da177e4 655 if (c->x86 == 6) {
807e9bc8 656 unsigned int l2 = c->x86_cache_size;
4052704d
YL
657 char *p = NULL;
658
1da177e4
LT
659 switch (c->x86_model) {
660 case 5:
865be7a8
OZ
661 if (l2 == 0)
662 p = "Celeron (Covington)";
663 else if (l2 == 256)
664 p = "Mobile Pentium II (Dixon)";
1da177e4 665 break;
65eb6b43 666
1da177e4
LT
667 case 6:
668 if (l2 == 128)
669 p = "Celeron (Mendocino)";
b399151c 670 else if (c->x86_stepping == 0 || c->x86_stepping == 5)
1da177e4
LT
671 p = "Celeron-A";
672 break;
65eb6b43 673
1da177e4
LT
674 case 8:
675 if (l2 == 128)
676 p = "Celeron (Coppermine)";
677 break;
678 }
1da177e4 679
4052704d
YL
680 if (p)
681 strcpy(c->x86_model_id, p);
1da177e4 682 }
1da177e4 683
185f3b9d
YL
684 if (c->x86 == 15)
685 set_cpu_cap(c, X86_FEATURE_P4);
686 if (c->x86 == 6)
687 set_cpu_cap(c, X86_FEATURE_P3);
f4166c54 688#endif
185f3b9d 689
185f3b9d 690 /* Work around errata */
2759c328 691 srat_detect_node(c);
e38e05a8 692
1db2a6e1
SC
693 init_ia32_feat_ctl(c);
694
90218ac7 695 init_intel_misc_features(c);
95c5824f 696
6650cdd9 697 split_lock_init();
ebb1064e 698 bus_lock_init();
9223d0dc
BP
699
700 intel_init_thermal(c);
42ed458a 701}
1da177e4 702
185f3b9d 703#ifdef CONFIG_X86_32
148f9bb8 704static unsigned int intel_size_cache(struct cpuinfo_x86 *c, unsigned int size)
1da177e4 705{
65eb6b43
PC
706 /*
707 * Intel PIII Tualatin. This comes in two flavours.
1da177e4
LT
708 * One has 256kb of cache, the other 512. We have no way
709 * to determine which, so we use a boottime override
710 * for the 512kb model, and assume 256 otherwise.
711 */
712 if ((c->x86 == 6) && (c->x86_model == 11) && (size == 0))
713 size = 256;
aece118e
BD
714
715 /*
716 * Intel Quark SoC X1000 contains a 4-way set associative
717 * 16K cache with a 16 byte cache line and 256 lines per tag
718 */
719 if ((c->x86 == 5) && (c->x86_model == 9))
720 size = 16;
1da177e4
LT
721 return size;
722}
185f3b9d 723#endif
1da177e4 724
e0ba94f1
AS
725#define TLB_INST_4K 0x01
726#define TLB_INST_4M 0x02
727#define TLB_INST_2M_4M 0x03
728
729#define TLB_INST_ALL 0x05
730#define TLB_INST_1G 0x06
731
732#define TLB_DATA_4K 0x11
733#define TLB_DATA_4M 0x12
734#define TLB_DATA_2M_4M 0x13
735#define TLB_DATA_4K_4M 0x14
736
737#define TLB_DATA_1G 0x16
738
739#define TLB_DATA0_4K 0x21
740#define TLB_DATA0_4M 0x22
741#define TLB_DATA0_2M_4M 0x23
742
743#define STLB_4K 0x41
dd360393 744#define STLB_4K_2M 0x42
e0ba94f1 745
148f9bb8 746static const struct _tlb_table intel_tlb_table[] = {
e0ba94f1
AS
747 { 0x01, TLB_INST_4K, 32, " TLB_INST 4 KByte pages, 4-way set associative" },
748 { 0x02, TLB_INST_4M, 2, " TLB_INST 4 MByte pages, full associative" },
749 { 0x03, TLB_DATA_4K, 64, " TLB_DATA 4 KByte pages, 4-way set associative" },
750 { 0x04, TLB_DATA_4M, 8, " TLB_DATA 4 MByte pages, 4-way set associative" },
751 { 0x05, TLB_DATA_4M, 32, " TLB_DATA 4 MByte pages, 4-way set associative" },
752 { 0x0b, TLB_INST_4M, 4, " TLB_INST 4 MByte pages, 4-way set associative" },
77df779d 753 { 0x4f, TLB_INST_4K, 32, " TLB_INST 4 KByte pages" },
e0ba94f1
AS
754 { 0x50, TLB_INST_ALL, 64, " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" },
755 { 0x51, TLB_INST_ALL, 128, " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" },
756 { 0x52, TLB_INST_ALL, 256, " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" },
757 { 0x55, TLB_INST_2M_4M, 7, " TLB_INST 2-MByte or 4-MByte pages, fully associative" },
758 { 0x56, TLB_DATA0_4M, 16, " TLB_DATA0 4 MByte pages, 4-way set associative" },
759 { 0x57, TLB_DATA0_4K, 16, " TLB_DATA0 4 KByte pages, 4-way associative" },
760 { 0x59, TLB_DATA0_4K, 16, " TLB_DATA0 4 KByte pages, fully associative" },
761 { 0x5a, TLB_DATA0_2M_4M, 32, " TLB_DATA0 2-MByte or 4 MByte pages, 4-way set associative" },
762 { 0x5b, TLB_DATA_4K_4M, 64, " TLB_DATA 4 KByte and 4 MByte pages" },
763 { 0x5c, TLB_DATA_4K_4M, 128, " TLB_DATA 4 KByte and 4 MByte pages" },
764 { 0x5d, TLB_DATA_4K_4M, 256, " TLB_DATA 4 KByte and 4 MByte pages" },
dd360393
KS
765 { 0x61, TLB_INST_4K, 48, " TLB_INST 4 KByte pages, full associative" },
766 { 0x63, TLB_DATA_1G, 4, " TLB_DATA 1 GByte pages, 4-way set associative" },
b837913f 767 { 0x6b, TLB_DATA_4K, 256, " TLB_DATA 4 KByte pages, 8-way associative" },
768 { 0x6c, TLB_DATA_2M_4M, 128, " TLB_DATA 2 MByte or 4 MByte pages, 8-way associative" },
769 { 0x6d, TLB_DATA_1G, 16, " TLB_DATA 1 GByte pages, fully associative" },
dd360393 770 { 0x76, TLB_INST_2M_4M, 8, " TLB_INST 2-MByte or 4-MByte pages, fully associative" },
e0ba94f1
AS
771 { 0xb0, TLB_INST_4K, 128, " TLB_INST 4 KByte pages, 4-way set associative" },
772 { 0xb1, TLB_INST_2M_4M, 4, " TLB_INST 2M pages, 4-way, 8 entries or 4M pages, 4-way entries" },
773 { 0xb2, TLB_INST_4K, 64, " TLB_INST 4KByte pages, 4-way set associative" },
774 { 0xb3, TLB_DATA_4K, 128, " TLB_DATA 4 KByte pages, 4-way set associative" },
775 { 0xb4, TLB_DATA_4K, 256, " TLB_DATA 4 KByte pages, 4-way associative" },
a927792c
YG
776 { 0xb5, TLB_INST_4K, 64, " TLB_INST 4 KByte pages, 8-way set associative" },
777 { 0xb6, TLB_INST_4K, 128, " TLB_INST 4 KByte pages, 8-way set associative" },
e0ba94f1
AS
778 { 0xba, TLB_DATA_4K, 64, " TLB_DATA 4 KByte pages, 4-way associative" },
779 { 0xc0, TLB_DATA_4K_4M, 8, " TLB_DATA 4 KByte and 4 MByte pages, 4-way associative" },
dd360393 780 { 0xc1, STLB_4K_2M, 1024, " STLB 4 KByte and 2 MByte pages, 8-way associative" },
77df779d 781 { 0xc2, TLB_DATA_2M_4M, 16, " TLB_DATA 2 MByte/4MByte pages, 4-way associative" },
e0ba94f1
AS
782 { 0xca, STLB_4K, 512, " STLB 4 KByte pages, 4-way associative" },
783 { 0x00, 0, 0 }
784};
785
148f9bb8 786static void intel_tlb_lookup(const unsigned char desc)
e0ba94f1
AS
787{
788 unsigned char k;
789 if (desc == 0)
790 return;
791
792 /* look up this descriptor in the table */
77df779d
SH
793 for (k = 0; intel_tlb_table[k].descriptor != desc &&
794 intel_tlb_table[k].descriptor != 0; k++)
e0ba94f1
AS
795 ;
796
797 if (intel_tlb_table[k].tlb_type == 0)
798 return;
799
800 switch (intel_tlb_table[k].tlb_type) {
801 case STLB_4K:
802 if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries)
803 tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries;
804 if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries)
805 tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries;
806 break;
dd360393
KS
807 case STLB_4K_2M:
808 if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries)
809 tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries;
810 if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries)
811 tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries;
812 if (tlb_lli_2m[ENTRIES] < intel_tlb_table[k].entries)
813 tlb_lli_2m[ENTRIES] = intel_tlb_table[k].entries;
814 if (tlb_lld_2m[ENTRIES] < intel_tlb_table[k].entries)
815 tlb_lld_2m[ENTRIES] = intel_tlb_table[k].entries;
816 if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries)
817 tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries;
818 if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries)
819 tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries;
820 break;
e0ba94f1
AS
821 case TLB_INST_ALL:
822 if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries)
823 tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries;
824 if (tlb_lli_2m[ENTRIES] < intel_tlb_table[k].entries)
825 tlb_lli_2m[ENTRIES] = intel_tlb_table[k].entries;
826 if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries)
827 tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries;
828 break;
829 case TLB_INST_4K:
830 if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries)
831 tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries;
832 break;
833 case TLB_INST_4M:
834 if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries)
835 tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries;
836 break;
837 case TLB_INST_2M_4M:
838 if (tlb_lli_2m[ENTRIES] < intel_tlb_table[k].entries)
839 tlb_lli_2m[ENTRIES] = intel_tlb_table[k].entries;
840 if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries)
841 tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries;
842 break;
843 case TLB_DATA_4K:
844 case TLB_DATA0_4K:
845 if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries)
846 tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries;
847 break;
848 case TLB_DATA_4M:
849 case TLB_DATA0_4M:
850 if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries)
851 tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries;
852 break;
853 case TLB_DATA_2M_4M:
854 case TLB_DATA0_2M_4M:
855 if (tlb_lld_2m[ENTRIES] < intel_tlb_table[k].entries)
856 tlb_lld_2m[ENTRIES] = intel_tlb_table[k].entries;
857 if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries)
858 tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries;
859 break;
860 case TLB_DATA_4K_4M:
861 if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries)
862 tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries;
863 if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries)
864 tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries;
865 break;
dd360393
KS
866 case TLB_DATA_1G:
867 if (tlb_lld_1g[ENTRIES] < intel_tlb_table[k].entries)
868 tlb_lld_1g[ENTRIES] = intel_tlb_table[k].entries;
e0ba94f1
AS
869 break;
870 }
871}
872
148f9bb8 873static void intel_detect_tlb(struct cpuinfo_x86 *c)
e0ba94f1
AS
874{
875 int i, j, n;
876 unsigned int regs[4];
877 unsigned char *desc = (unsigned char *)regs;
5b556332
BP
878
879 if (c->cpuid_level < 2)
880 return;
881
e0ba94f1
AS
882 /* Number of times to iterate */
883 n = cpuid_eax(2) & 0xFF;
884
885 for (i = 0 ; i < n ; i++) {
886 cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
887
888 /* If bit 31 is set, this is an unknown format */
889 for (j = 0 ; j < 3 ; j++)
890 if (regs[j] & (1 << 31))
891 regs[j] = 0;
892
893 /* Byte 0 is level count, not a descriptor */
894 for (j = 1 ; j < 16 ; j++)
895 intel_tlb_lookup(desc[j]);
896 }
897}
898
148f9bb8 899static const struct cpu_dev intel_cpu_dev = {
1da177e4 900 .c_vendor = "Intel",
65eb6b43 901 .c_ident = { "GenuineIntel" },
185f3b9d 902#ifdef CONFIG_X86_32
09dc68d9
JB
903 .legacy_models = {
904 { .family = 4, .model_names =
65eb6b43
PC
905 {
906 [0] = "486 DX-25/33",
907 [1] = "486 DX-50",
908 [2] = "486 SX",
909 [3] = "486 DX/2",
910 [4] = "486 SL",
911 [5] = "486 SX/2",
912 [7] = "486 DX/2-WB",
913 [8] = "486 DX/4",
1da177e4
LT
914 [9] = "486 DX/4-WB"
915 }
916 },
09dc68d9 917 { .family = 5, .model_names =
65eb6b43
PC
918 {
919 [0] = "Pentium 60/66 A-step",
920 [1] = "Pentium 60/66",
1da177e4 921 [2] = "Pentium 75 - 200",
65eb6b43 922 [3] = "OverDrive PODP5V83",
1da177e4 923 [4] = "Pentium MMX",
65eb6b43 924 [7] = "Mobile Pentium 75 - 200",
aece118e
BD
925 [8] = "Mobile Pentium MMX",
926 [9] = "Quark SoC X1000",
1da177e4
LT
927 }
928 },
09dc68d9 929 { .family = 6, .model_names =
65eb6b43 930 {
1da177e4 931 [0] = "Pentium Pro A-step",
65eb6b43
PC
932 [1] = "Pentium Pro",
933 [3] = "Pentium II (Klamath)",
934 [4] = "Pentium II (Deschutes)",
935 [5] = "Pentium II (Deschutes)",
1da177e4 936 [6] = "Mobile Pentium II",
65eb6b43
PC
937 [7] = "Pentium III (Katmai)",
938 [8] = "Pentium III (Coppermine)",
1da177e4
LT
939 [10] = "Pentium III (Cascades)",
940 [11] = "Pentium III (Tualatin)",
941 }
942 },
09dc68d9 943 { .family = 15, .model_names =
1da177e4
LT
944 {
945 [0] = "Pentium 4 (Unknown)",
946 [1] = "Pentium 4 (Willamette)",
947 [2] = "Pentium 4 (Northwood)",
948 [4] = "Pentium 4 (Foster)",
949 [5] = "Pentium 4 (Foster)",
950 }
951 },
952 },
09dc68d9 953 .legacy_cache_size = intel_size_cache,
185f3b9d 954#endif
e0ba94f1 955 .c_detect_tlb = intel_detect_tlb,
03ae5768 956 .c_early_init = early_init_intel,
923f3a2b 957 .c_bsp_init = bsp_init_intel,
1da177e4 958 .c_init = init_intel,
10a434fc 959 .c_x86_vendor = X86_VENDOR_INTEL,
1da177e4
LT
960};
961
10a434fc 962cpu_dev_register(intel_cpu_dev);
6650cdd9
PZI
963
964#undef pr_fmt
965#define pr_fmt(fmt) "x86/split lock detection: " fmt
966
967static const struct {
968 const char *option;
969 enum split_lock_detect_state state;
970} sld_options[] __initconst = {
971 { "off", sld_off },
972 { "warn", sld_warn },
973 { "fatal", sld_fatal },
ef4ae6e4 974 { "ratelimit:", sld_ratelimit },
6650cdd9
PZI
975};
976
ef4ae6e4
FY
977static struct ratelimit_state bld_ratelimit;
978
72720937 979static unsigned int sysctl_sld_mitigate = 1;
48380368 980static DEFINE_SEMAPHORE(buslock_sem, 1);
b041b525 981
72720937
GP
982#ifdef CONFIG_PROC_SYSCTL
983static struct ctl_table sld_sysctls[] = {
984 {
985 .procname = "split_lock_mitigate",
986 .data = &sysctl_sld_mitigate,
987 .maxlen = sizeof(unsigned int),
988 .mode = 0644,
989 .proc_handler = proc_douintvec_minmax,
990 .extra1 = SYSCTL_ZERO,
991 .extra2 = SYSCTL_ONE,
992 },
72720937
GP
993};
994
995static int __init sld_mitigate_sysctl_init(void)
996{
997 register_sysctl_init("kernel", sld_sysctls);
998 return 0;
999}
1000
1001late_initcall(sld_mitigate_sysctl_init);
1002#endif
1003
6650cdd9
PZI
1004static inline bool match_option(const char *arg, int arglen, const char *opt)
1005{
ef4ae6e4
FY
1006 int len = strlen(opt), ratelimit;
1007
1008 if (strncmp(arg, opt, len))
1009 return false;
1010
1011 /*
1012 * Min ratelimit is 1 bus lock/sec.
1013 * Max ratelimit is 1000 bus locks/sec.
1014 */
1015 if (sscanf(arg, "ratelimit:%d", &ratelimit) == 1 &&
1016 ratelimit > 0 && ratelimit <= 1000) {
1017 ratelimit_state_init(&bld_ratelimit, HZ, ratelimit);
1018 ratelimit_set_flags(&bld_ratelimit, RATELIMIT_MSG_ON_RELEASE);
1019 return true;
1020 }
6650cdd9 1021
ef4ae6e4 1022 return len == arglen;
6650cdd9
PZI
1023}
1024
dbaba470
XL
1025static bool split_lock_verify_msr(bool on)
1026{
1027 u64 ctrl, tmp;
1028
1029 if (rdmsrl_safe(MSR_TEST_CTRL, &ctrl))
1030 return false;
1031 if (on)
1032 ctrl |= MSR_TEST_CTRL_SPLIT_LOCK_DETECT;
1033 else
1034 ctrl &= ~MSR_TEST_CTRL_SPLIT_LOCK_DETECT;
1035 if (wrmsrl_safe(MSR_TEST_CTRL, ctrl))
1036 return false;
1037 rdmsrl(MSR_TEST_CTRL, tmp);
1038 return ctrl == tmp;
1039}
1040
ebb1064e 1041static void __init sld_state_setup(void)
6650cdd9 1042{
dbaba470 1043 enum split_lock_detect_state state = sld_warn;
6650cdd9
PZI
1044 char arg[20];
1045 int i, ret;
1046
ebb1064e
FY
1047 if (!boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) &&
1048 !boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT))
dbaba470 1049 return;
6650cdd9
PZI
1050
1051 ret = cmdline_find_option(boot_command_line, "split_lock_detect",
1052 arg, sizeof(arg));
1053 if (ret >= 0) {
1054 for (i = 0; i < ARRAY_SIZE(sld_options); i++) {
1055 if (match_option(arg, ret, sld_options[i].option)) {
dbaba470 1056 state = sld_options[i].state;
6650cdd9
PZI
1057 break;
1058 }
1059 }
1060 }
ebb1064e
FY
1061 sld_state = state;
1062}
6650cdd9 1063
ebb1064e
FY
1064static void __init __split_lock_setup(void)
1065{
1066 if (!split_lock_verify_msr(false)) {
1067 pr_info("MSR access failed: Disabled\n");
dbaba470 1068 return;
6650cdd9 1069 }
dbaba470 1070
a6a60741
XL
1071 rdmsrl(MSR_TEST_CTRL, msr_test_ctrl_cache);
1072
dbaba470
XL
1073 if (!split_lock_verify_msr(true)) {
1074 pr_info("MSR access failed: Disabled\n");
1075 return;
1076 }
1077
ebb1064e
FY
1078 /* Restore the MSR to its cached value. */
1079 wrmsrl(MSR_TEST_CTRL, msr_test_ctrl_cache);
1080
dbaba470 1081 setup_force_cpu_cap(X86_FEATURE_SPLIT_LOCK_DETECT);
6650cdd9
PZI
1082}
1083
1084/*
dbaba470
XL
1085 * MSR_TEST_CTRL is per core, but we treat it like a per CPU MSR. Locking
1086 * is not implemented as one thread could undo the setting of the other
1087 * thread immediately after dropping the lock anyway.
6650cdd9 1088 */
dbaba470 1089static void sld_update_msr(bool on)
6650cdd9 1090{
a6a60741 1091 u64 test_ctrl_val = msr_test_ctrl_cache;
6650cdd9
PZI
1092
1093 if (on)
1094 test_ctrl_val |= MSR_TEST_CTRL_SPLIT_LOCK_DETECT;
6650cdd9 1095
dbaba470 1096 wrmsrl(MSR_TEST_CTRL, test_ctrl_val);
6650cdd9
PZI
1097}
1098
1099static void split_lock_init(void)
1100{
ef4ae6e4
FY
1101 /*
1102 * #DB for bus lock handles ratelimit and #AC for split lock is
1103 * disabled.
1104 */
1105 if (sld_state == sld_ratelimit) {
1106 split_lock_verify_msr(false);
1107 return;
1108 }
1109
009bce1d
SC
1110 if (cpu_model_supports_sld)
1111 split_lock_verify_msr(sld_state != sld_off);
6650cdd9
PZI
1112}
1113
72720937 1114static void __split_lock_reenable_unlock(struct work_struct *work)
b041b525
TL
1115{
1116 sld_update_msr(true);
1117 up(&buslock_sem);
1118}
1119
72720937
GP
1120static DECLARE_DELAYED_WORK(sl_reenable_unlock, __split_lock_reenable_unlock);
1121
1122static void __split_lock_reenable(struct work_struct *work)
1123{
1124 sld_update_msr(true);
1125}
1126static DECLARE_DELAYED_WORK(sl_reenable, __split_lock_reenable);
1127
b041b525
TL
1128/*
1129 * If a CPU goes offline with pending delayed work to re-enable split lock
1130 * detection then the delayed work will be executed on some other CPU. That
1131 * handles releasing the buslock_sem, but because it executes on a
1132 * different CPU probably won't re-enable split lock detection. This is a
1133 * problem on HT systems since the sibling CPU on the same core may then be
1134 * left running with split lock detection disabled.
1135 *
1136 * Unconditionally re-enable detection here.
1137 */
1138static int splitlock_cpu_offline(unsigned int cpu)
1139{
1140 sld_update_msr(true);
1141
1142 return 0;
1143}
1144
d7e94dbd 1145static void split_lock_warn(unsigned long ip)
6650cdd9 1146{
72720937 1147 struct delayed_work *work;
b041b525 1148 int cpu;
6650cdd9 1149
b041b525
TL
1150 if (!current->reported_split_lock)
1151 pr_warn_ratelimited("#AC: %s/%d took a split_lock trap at address: 0x%lx\n",
1152 current->comm, current->pid, ip);
1153 current->reported_split_lock = 1;
1154
72720937
GP
1155 if (sysctl_sld_mitigate) {
1156 /*
1157 * misery factor #1:
1158 * sleep 10ms before trying to execute split lock.
1159 */
1160 if (msleep_interruptible(10) > 0)
1161 return;
1162 /*
1163 * Misery factor #2:
1164 * only allow one buslocked disabled core at a time.
1165 */
1166 if (down_interruptible(&buslock_sem) == -EINTR)
1167 return;
1168 work = &sl_reenable_unlock;
1169 } else {
1170 work = &sl_reenable;
1171 }
1172
b041b525 1173 cpu = get_cpu();
72720937 1174 schedule_delayed_work_on(cpu, work, 2);
b041b525
TL
1175
1176 /* Disable split lock detection on this CPU to make progress */
dbaba470 1177 sld_update_msr(false);
b041b525 1178 put_cpu();
d7e94dbd
TG
1179}
1180
1181bool handle_guest_split_lock(unsigned long ip)
1182{
1183 if (sld_state == sld_warn) {
1184 split_lock_warn(ip);
1185 return true;
1186 }
1187
1188 pr_warn_once("#AC: %s/%d %s split_lock trap at address: 0x%lx\n",
1189 current->comm, current->pid,
1190 sld_state == sld_fatal ? "fatal" : "bogus", ip);
1191
1192 current->thread.error_code = 0;
1193 current->thread.trap_nr = X86_TRAP_AC;
1194 force_sig_fault(SIGBUS, BUS_ADRALN, NULL);
1195 return false;
1196}
1197EXPORT_SYMBOL_GPL(handle_guest_split_lock);
1198
ebb1064e
FY
1199static void bus_lock_init(void)
1200{
1201 u64 val;
1202
ffa6482e 1203 if (!boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT))
ebb1064e
FY
1204 return;
1205
ebb1064e 1206 rdmsrl(MSR_IA32_DEBUGCTLMSR, val);
ffa6482e
CQ
1207
1208 if ((boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) &&
1209 (sld_state == sld_warn || sld_state == sld_fatal)) ||
1210 sld_state == sld_off) {
1211 /*
1212 * Warn and fatal are handled by #AC for split lock if #AC for
1213 * split lock is supported.
1214 */
1215 val &= ~DEBUGCTLMSR_BUS_LOCK_DETECT;
1216 } else {
1217 val |= DEBUGCTLMSR_BUS_LOCK_DETECT;
1218 }
1219
ebb1064e
FY
1220 wrmsrl(MSR_IA32_DEBUGCTLMSR, val);
1221}
1222
d7e94dbd
TG
1223bool handle_user_split_lock(struct pt_regs *regs, long error_code)
1224{
1225 if ((regs->flags & X86_EFLAGS_AC) || sld_state == sld_fatal)
1226 return false;
1227 split_lock_warn(regs->ip);
6650cdd9
PZI
1228 return true;
1229}
1230
ebb1064e
FY
1231void handle_bus_lock(struct pt_regs *regs)
1232{
1233 switch (sld_state) {
1234 case sld_off:
1235 break;
ef4ae6e4
FY
1236 case sld_ratelimit:
1237 /* Enforce no more than bld_ratelimit bus locks/sec. */
1238 while (!__ratelimit(&bld_ratelimit))
1239 msleep(20);
1240 /* Warn on the bus lock. */
1241 fallthrough;
ebb1064e
FY
1242 case sld_warn:
1243 pr_warn_ratelimited("#DB: %s/%d took a bus_lock trap at address: 0x%lx\n",
1244 current->comm, current->pid, regs->ip);
1245 break;
1246 case sld_fatal:
1247 force_sig_fault(SIGBUS, BUS_ADRALN, NULL);
1248 break;
1249 }
1250}
1251
6650cdd9 1252/*
d7ce15e1
FY
1253 * CPU models that are known to have the per-core split-lock detection
1254 * feature even though they do not enumerate IA32_CORE_CAPABILITIES.
6650cdd9
PZI
1255 */
1256static const struct x86_cpu_id split_lock_cpu_ids[] __initconst = {
d7ce15e1
FY
1257 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, 0),
1258 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, 0),
1259 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, 0),
6650cdd9
PZI
1260 {}
1261};
1262
ebb1064e 1263static void __init split_lock_setup(struct cpuinfo_x86 *c)
6650cdd9 1264{
48fd5b5e
TL
1265 const struct x86_cpu_id *m;
1266 u64 ia32_core_caps;
1267
1268 if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
1269 return;
6650cdd9 1270
d7ce15e1 1271 /* Check for CPUs that have support but do not enumerate it: */
48fd5b5e 1272 m = x86_match_cpu(split_lock_cpu_ids);
d7ce15e1
FY
1273 if (m)
1274 goto supported;
48fd5b5e 1275
d7ce15e1 1276 if (!cpu_has(c, X86_FEATURE_CORE_CAPABILITIES))
48fd5b5e 1277 return;
6650cdd9 1278
d7ce15e1
FY
1279 /*
1280 * Not all bits in MSR_IA32_CORE_CAPS are architectural, but
1281 * MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT is. All CPUs that set
1282 * it have split lock detection.
1283 */
1284 rdmsrl(MSR_IA32_CORE_CAPS, ia32_core_caps);
1285 if (ia32_core_caps & MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT)
1286 goto supported;
1287
1288 /* CPU is not in the model list and does not have the MSR bit: */
1289 return;
1290
1291supported:
009bce1d 1292 cpu_model_supports_sld = true;
ebb1064e
FY
1293 __split_lock_setup();
1294}
1295
1296static void sld_state_show(void)
1297{
1298 if (!boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT) &&
1299 !boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT))
1300 return;
1301
1302 switch (sld_state) {
1303 case sld_off:
1304 pr_info("disabled\n");
1305 break;
1306 case sld_warn:
b041b525 1307 if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) {
ebb1064e 1308 pr_info("#AC: crashing the kernel on kernel split_locks and warning on user-space split_locks\n");
b041b525
TL
1309 if (cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
1310 "x86/splitlock", NULL, splitlock_cpu_offline) < 0)
1311 pr_warn("No splitlock CPU offline handler\n");
1312 } else if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) {
ebb1064e 1313 pr_info("#DB: warning on user-space bus_locks\n");
b041b525 1314 }
ebb1064e
FY
1315 break;
1316 case sld_fatal:
1317 if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) {
1318 pr_info("#AC: crashing the kernel on kernel split_locks and sending SIGBUS on user-space split_locks\n");
1319 } else if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) {
1320 pr_info("#DB: sending SIGBUS on user-space bus_locks%s\n",
1321 boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) ?
1322 " from non-WB" : "");
1323 }
1324 break;
ef4ae6e4
FY
1325 case sld_ratelimit:
1326 if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT))
1327 pr_info("#DB: setting system wide bus lock rate limit to %u/sec\n", bld_ratelimit.burst);
1328 break;
ebb1064e
FY
1329 }
1330}
1331
1332void __init sld_setup(struct cpuinfo_x86 *c)
1333{
1334 split_lock_setup(c);
1335 sld_state_setup();
1336 sld_state_show();
6650cdd9 1337}
250b3c0d
RN
1338
1339#define X86_HYBRID_CPU_TYPE_ID_SHIFT 24
1340
1341/**
1342 * get_this_hybrid_cpu_type() - Get the type of this hybrid CPU
1343 *
1344 * Returns the CPU type [31:24] (i.e., Atom or Core) of a CPU in
1345 * a hybrid processor. If the processor is not hybrid, returns 0.
1346 */
1347u8 get_this_hybrid_cpu_type(void)
1348{
1349 if (!cpu_feature_enabled(X86_FEATURE_HYBRID_CPU))
1350 return 0;
1351
1352 return cpuid_eax(0x0000001a) >> X86_HYBRID_CPU_TYPE_ID_SHIFT;
1353}