Commit | Line | Data |
---|---|---|
b2441318 | 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
1965aae3 PA |
2 | #ifndef _ASM_X86_MCE_H |
3 | #define _ASM_X86_MCE_H | |
e2f43029 | 4 | |
af170c50 | 5 | #include <uapi/asm/mce.h> |
e2f43029 | 6 | |
f51bde6f BP |
7 | /* |
8 | * Machine Check support for x86 | |
9 | */ | |
10 | ||
11 | /* MCG_CAP register defines */ | |
12 | #define MCG_BANKCNT_MASK 0xff /* Number of Banks */ | |
93ac5754 QZ |
13 | #define MCG_CTL_P BIT_ULL(8) /* MCG_CTL register available */ |
14 | #define MCG_EXT_P BIT_ULL(9) /* Extended registers available */ | |
15 | #define MCG_CMCI_P BIT_ULL(10) /* CMCI supported */ | |
f51bde6f BP |
16 | #define MCG_EXT_CNT_MASK 0xff0000 /* Number of Extended registers */ |
17 | #define MCG_EXT_CNT_SHIFT 16 | |
18 | #define MCG_EXT_CNT(c) (((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT) | |
93ac5754 QZ |
19 | #define MCG_SER_P BIT_ULL(24) /* MCA recovery/new status bits */ |
20 | #define MCG_ELOG_P BIT_ULL(26) /* Extended error log supported */ | |
21 | #define MCG_LMCE_P BIT_ULL(27) /* Local machine check supported */ | |
f51bde6f BP |
22 | |
23 | /* MCG_STATUS register defines */ | |
93ac5754 QZ |
24 | #define MCG_STATUS_RIPV BIT_ULL(0) /* restart ip valid */ |
25 | #define MCG_STATUS_EIPV BIT_ULL(1) /* ip points to correct instruction */ | |
26 | #define MCG_STATUS_MCIP BIT_ULL(2) /* machine check in progress */ | |
27 | #define MCG_STATUS_LMCES BIT_ULL(3) /* LMCE signaled */ | |
bc12edb8 AR |
28 | |
29 | /* MCG_EXT_CTL register defines */ | |
93ac5754 | 30 | #define MCG_EXT_CTL_LMCE_EN BIT_ULL(0) /* Enable LMCE */ |
f51bde6f BP |
31 | |
32 | /* MCi_STATUS register defines */ | |
93ac5754 QZ |
33 | #define MCI_STATUS_VAL BIT_ULL(63) /* valid error */ |
34 | #define MCI_STATUS_OVER BIT_ULL(62) /* previous errors lost */ | |
35 | #define MCI_STATUS_UC BIT_ULL(61) /* uncorrected error */ | |
36 | #define MCI_STATUS_EN BIT_ULL(60) /* error enabled */ | |
37 | #define MCI_STATUS_MISCV BIT_ULL(59) /* misc error reg. valid */ | |
38 | #define MCI_STATUS_ADDRV BIT_ULL(58) /* addr reg. valid */ | |
39 | #define MCI_STATUS_PCC BIT_ULL(57) /* processor context corrupt */ | |
40 | #define MCI_STATUS_S BIT_ULL(56) /* Signaled machine check */ | |
41 | #define MCI_STATUS_AR BIT_ULL(55) /* Action required */ | |
e5276b1f QZ |
42 | #define MCI_STATUS_CEC_SHIFT 38 /* Corrected Error Count */ |
43 | #define MCI_STATUS_CEC_MASK GENMASK_ULL(52,38) | |
44 | #define MCI_STATUS_CEC(c) (((c) & MCI_STATUS_CEC_MASK) >> MCI_STATUS_CEC_SHIFT) | |
2738c69a | 45 | #define MCI_STATUS_MSCOD(m) (((m) >> 16) & 0xffff) |
0ca06c08 | 46 | |
e3480271 | 47 | /* AMD-specific bits */ |
93ac5754 QZ |
48 | #define MCI_STATUS_TCC BIT_ULL(55) /* Task context corrupt */ |
49 | #define MCI_STATUS_SYNDV BIT_ULL(53) /* synd reg. valid */ | |
50 | #define MCI_STATUS_DEFERRED BIT_ULL(44) /* uncorrected error, deferred exception */ | |
51 | #define MCI_STATUS_POISON BIT_ULL(43) /* access poisonous data */ | |
3f4da372 | 52 | #define MCI_STATUS_SCRUB BIT_ULL(40) /* Error detected during scrub operation */ |
be0aec23 AG |
53 | |
54 | /* | |
55 | * McaX field if set indicates a given bank supports MCA extensions: | |
56 | * - Deferred error interrupt type is specifiable by bank. | |
57 | * - MCx_MISC0[BlkPtr] field indicates presence of extended MISC registers, | |
58 | * But should not be used to determine MSR numbers. | |
59 | * - TCC bit is present in MCx_STATUS. | |
60 | */ | |
61 | #define MCI_CONFIG_MCAX 0x1 | |
62 | #define MCI_IPID_MCATYPE 0xFFFF0000 | |
63 | #define MCI_IPID_HWID 0xFFF | |
e3480271 | 64 | |
0ca06c08 TL |
65 | /* |
66 | * Note that the full MCACOD field of IA32_MCi_STATUS MSR is | |
67 | * bits 15:0. But bit 12 is the 'F' bit, defined for corrected | |
68 | * errors to indicate that errors are being filtered by hardware. | |
69 | * We should mask out bit 12 when looking for specific signatures | |
70 | * of uncorrected errors - so the F bit is deliberately skipped | |
71 | * in this #define. | |
72 | */ | |
73 | #define MCACOD 0xefff /* MCA Error Code */ | |
f51bde6f BP |
74 | |
75 | /* Architecturally defined codes from SDM Vol. 3B Chapter 15 */ | |
76 | #define MCACOD_SCRUB 0x00C0 /* 0xC0-0xCF Memory Scrubbing */ | |
0ca06c08 | 77 | #define MCACOD_SCRUBMSK 0xeff0 /* Skip bit 12 ('F' bit) */ |
f51bde6f BP |
78 | #define MCACOD_L3WB 0x017A /* L3 Explicit Writeback */ |
79 | #define MCACOD_DATA 0x0134 /* Data Load */ | |
80 | #define MCACOD_INSTR 0x0150 /* Instruction Fetch */ | |
81 | ||
82 | /* MCi_MISC register defines */ | |
83 | #define MCI_MISC_ADDR_LSB(m) ((m) & 0x3f) | |
84 | #define MCI_MISC_ADDR_MODE(m) (((m) >> 6) & 7) | |
85 | #define MCI_MISC_ADDR_SEGOFF 0 /* segment offset */ | |
86 | #define MCI_MISC_ADDR_LINEAR 1 /* linear address */ | |
87 | #define MCI_MISC_ADDR_PHYS 2 /* physical address */ | |
88 | #define MCI_MISC_ADDR_MEM 3 /* memory address */ | |
89 | #define MCI_MISC_ADDR_GENERIC 7 /* generic */ | |
90 | ||
91 | /* CTL2 register defines */ | |
93ac5754 | 92 | #define MCI_CTL2_CMCI_EN BIT_ULL(30) |
f51bde6f BP |
93 | #define MCI_CTL2_CMCI_THRESHOLD_MASK 0x7fffULL |
94 | ||
95 | #define MCJ_CTX_MASK 3 | |
96 | #define MCJ_CTX(flags) ((flags) & MCJ_CTX_MASK) | |
97 | #define MCJ_CTX_RANDOM 0 /* inject context: random */ | |
98 | #define MCJ_CTX_PROCESS 0x1 /* inject context: process */ | |
99 | #define MCJ_CTX_IRQ 0x2 /* inject context: IRQ */ | |
100 | #define MCJ_NMI_BROADCAST 0x4 /* do NMI broadcasting */ | |
101 | #define MCJ_EXCEPTION 0x8 /* raise as exception */ | |
a9093684 | 102 | #define MCJ_IRQ_BROADCAST 0x10 /* do IRQ broadcasting */ |
f51bde6f BP |
103 | |
104 | #define MCE_OVERFLOW 0 /* bit 0 in flags means overflow */ | |
105 | ||
d8ecca40 | 106 | #define MCE_LOG_MIN_LEN 32U |
f51bde6f BP |
107 | #define MCE_LOG_SIGNATURE "MACHINECHECK" |
108 | ||
adc53f2e | 109 | /* AMD Scalable MCA */ |
a9750a31 YG |
110 | #define MSR_AMD64_SMCA_MC0_CTL 0xc0002000 |
111 | #define MSR_AMD64_SMCA_MC0_STATUS 0xc0002001 | |
112 | #define MSR_AMD64_SMCA_MC0_ADDR 0xc0002002 | |
8dd1e17a | 113 | #define MSR_AMD64_SMCA_MC0_MISC0 0xc0002003 |
adc53f2e | 114 | #define MSR_AMD64_SMCA_MC0_CONFIG 0xc0002004 |
be0aec23 | 115 | #define MSR_AMD64_SMCA_MC0_IPID 0xc0002005 |
db819d60 | 116 | #define MSR_AMD64_SMCA_MC0_SYND 0xc0002006 |
34102009 YG |
117 | #define MSR_AMD64_SMCA_MC0_DESTAT 0xc0002008 |
118 | #define MSR_AMD64_SMCA_MC0_DEADDR 0xc0002009 | |
8dd1e17a | 119 | #define MSR_AMD64_SMCA_MC0_MISC1 0xc000200a |
a9750a31 YG |
120 | #define MSR_AMD64_SMCA_MCx_CTL(x) (MSR_AMD64_SMCA_MC0_CTL + 0x10*(x)) |
121 | #define MSR_AMD64_SMCA_MCx_STATUS(x) (MSR_AMD64_SMCA_MC0_STATUS + 0x10*(x)) | |
122 | #define MSR_AMD64_SMCA_MCx_ADDR(x) (MSR_AMD64_SMCA_MC0_ADDR + 0x10*(x)) | |
8dd1e17a | 123 | #define MSR_AMD64_SMCA_MCx_MISC(x) (MSR_AMD64_SMCA_MC0_MISC0 + 0x10*(x)) |
adc53f2e | 124 | #define MSR_AMD64_SMCA_MCx_CONFIG(x) (MSR_AMD64_SMCA_MC0_CONFIG + 0x10*(x)) |
be0aec23 | 125 | #define MSR_AMD64_SMCA_MCx_IPID(x) (MSR_AMD64_SMCA_MC0_IPID + 0x10*(x)) |
db819d60 | 126 | #define MSR_AMD64_SMCA_MCx_SYND(x) (MSR_AMD64_SMCA_MC0_SYND + 0x10*(x)) |
34102009 YG |
127 | #define MSR_AMD64_SMCA_MCx_DESTAT(x) (MSR_AMD64_SMCA_MC0_DESTAT + 0x10*(x)) |
128 | #define MSR_AMD64_SMCA_MCx_DEADDR(x) (MSR_AMD64_SMCA_MC0_DEADDR + 0x10*(x)) | |
8dd1e17a | 129 | #define MSR_AMD64_SMCA_MCx_MISCy(x, y) ((MSR_AMD64_SMCA_MC0_MISC1 + y) + (0x10*(x))) |
adc53f2e | 130 | |
3e0fdec8 BP |
131 | #define XEC(x, mask) (((x) >> 16) & mask) |
132 | ||
1de08dcc TL |
133 | /* mce.kflags flag bits for logging etc. */ |
134 | #define MCE_HANDLED_CEC BIT_ULL(0) | |
135 | #define MCE_HANDLED_UC BIT_ULL(1) | |
136 | #define MCE_HANDLED_EXTLOG BIT_ULL(2) | |
137 | #define MCE_HANDLED_NFIT BIT_ULL(3) | |
138 | #define MCE_HANDLED_EDAC BIT_ULL(4) | |
139 | #define MCE_HANDLED_MCELOG BIT_ULL(5) | |
278b917f YS |
140 | |
141 | /* | |
142 | * Indicates an MCE which has happened in kernel space but from | |
143 | * which the kernel can recover simply by executing fixup_exception() | |
144 | * so that an error is returned to the caller of the function that | |
145 | * hit the machine check. | |
146 | */ | |
1df73b21 | 147 | #define MCE_IN_KERNEL_RECOV BIT_ULL(6) |
1de08dcc | 148 | |
278b917f YS |
149 | /* |
150 | * Indicates an MCE that happened in kernel space while copying data | |
151 | * from user. In this case fixup_exception() gets the kernel to the | |
152 | * error exit for the copy function. Machine check handler can then | |
153 | * treat it like a fault taken in user mode. | |
154 | */ | |
155 | #define MCE_IN_KERNEL_COPYIN BIT_ULL(7) | |
156 | ||
f51bde6f BP |
157 | /* |
158 | * This structure contains all data related to the MCE log. Also | |
159 | * carries a signature to make it easier to find from external | |
160 | * debugging tools. Each entry is only valid when its finished flag | |
161 | * is set. | |
162 | */ | |
e64edfcc | 163 | struct mce_log_buffer { |
f51bde6f | 164 | char signature[12]; /* "MACHINECHECK" */ |
d8ecca40 | 165 | unsigned len; /* = elements in .mce_entry[] */ |
f51bde6f BP |
166 | unsigned next; |
167 | unsigned flags; | |
168 | unsigned recordlen; /* length of struct mce */ | |
d8ecca40 | 169 | struct mce entry[]; |
f51bde6f | 170 | }; |
d203f0b8 | 171 | |
c9c6d216 | 172 | /* Highest last */ |
9026cc82 | 173 | enum mce_notifier_prios { |
c9c6d216 TL |
174 | MCE_PRIO_LOWEST, |
175 | MCE_PRIO_MCELOG, | |
176 | MCE_PRIO_EDAC, | |
177 | MCE_PRIO_NFIT, | |
178 | MCE_PRIO_EXTLOG, | |
179 | MCE_PRIO_UC, | |
180 | MCE_PRIO_EARLY, | |
15af3659 ZL |
181 | MCE_PRIO_CEC, |
182 | MCE_PRIO_HIGHEST = MCE_PRIO_CEC | |
9026cc82 BP |
183 | }; |
184 | ||
704ae091 | 185 | struct notifier_block; |
eef4dfa0 | 186 | extern void mce_register_decode_chain(struct notifier_block *nb); |
3653ada5 | 187 | extern void mce_unregister_decode_chain(struct notifier_block *nb); |
df39a2e4 | 188 | |
9e55e44e | 189 | #include <linux/percpu.h> |
60063497 | 190 | #include <linux/atomic.h> |
9e55e44e | 191 | |
c6978369 | 192 | extern int mce_p5_enabled; |
e2f43029 | 193 | |
ec6347bb DW |
194 | #ifdef CONFIG_ARCH_HAS_COPY_MC |
195 | extern void enable_copy_mc_fragile(void); | |
196 | unsigned long __must_check copy_mc_fragile(void *dst, const void *src, unsigned cnt); | |
197 | #else | |
198 | static inline void enable_copy_mc_fragile(void) | |
199 | { | |
200 | } | |
201 | #endif | |
202 | ||
4a24d80b SK |
203 | struct cper_ia_proc_ctx; |
204 | ||
58995d2d | 205 | #ifdef CONFIG_X86_MCE |
a2202aa2 | 206 | int mcheck_init(void); |
5e09954a | 207 | void mcheck_cpu_init(struct cpuinfo_x86 *c); |
8838eb6c | 208 | void mcheck_cpu_clear(struct cpuinfo_x86 *c); |
4a24d80b SK |
209 | int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info, |
210 | u64 lapic_id); | |
58995d2d | 211 | #else |
a2202aa2 | 212 | static inline int mcheck_init(void) { return 0; } |
5e09954a | 213 | static inline void mcheck_cpu_init(struct cpuinfo_x86 *c) {} |
8838eb6c | 214 | static inline void mcheck_cpu_clear(struct cpuinfo_x86 *c) {} |
4a24d80b SK |
215 | static inline int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info, |
216 | u64 lapic_id) { return -EINVAL; } | |
58995d2d HS |
217 | #endif |
218 | ||
b5f2fa4e | 219 | void mce_setup(struct mce *m); |
e2f43029 | 220 | void mce_log(struct mce *m); |
d6126ef5 | 221 | DECLARE_PER_CPU(struct device *, mce_device); |
e2f43029 | 222 | |
a0bc32b3 AG |
223 | /* Maximum number of MCA banks per CPU. */ |
224 | #define MAX_NR_BANKS 64 | |
41fdff32 | 225 | |
e2f43029 TG |
226 | #ifdef CONFIG_X86_MCE_INTEL |
227 | void mce_intel_feature_init(struct cpuinfo_x86 *c); | |
8838eb6c | 228 | void mce_intel_feature_clear(struct cpuinfo_x86 *c); |
88ccbedd AK |
229 | void cmci_clear(void); |
230 | void cmci_reenable(void); | |
7a0c819d | 231 | void cmci_rediscover(void); |
88ccbedd | 232 | void cmci_recheck(void); |
e2f43029 TG |
233 | #else |
234 | static inline void mce_intel_feature_init(struct cpuinfo_x86 *c) { } | |
8838eb6c | 235 | static inline void mce_intel_feature_clear(struct cpuinfo_x86 *c) { } |
88ccbedd AK |
236 | static inline void cmci_clear(void) {} |
237 | static inline void cmci_reenable(void) {} | |
7a0c819d | 238 | static inline void cmci_rediscover(void) {} |
88ccbedd | 239 | static inline void cmci_recheck(void) {} |
e2f43029 TG |
240 | #endif |
241 | ||
38736072 | 242 | int mce_available(struct cpuinfo_x86 *c); |
2d1f4061 | 243 | bool mce_is_memory_error(struct mce *m); |
5d96c934 | 244 | bool mce_is_correctable(struct mce *m); |
e8a308e5 | 245 | int mce_usable_address(struct mce *m); |
88ccbedd | 246 | |
01ca79f1 | 247 | DECLARE_PER_CPU(unsigned, mce_exception_count); |
ca84f696 | 248 | DECLARE_PER_CPU(unsigned, mce_poll_count); |
01ca79f1 | 249 | |
ee031c31 AK |
250 | typedef DECLARE_BITMAP(mce_banks_t, MAX_NR_BANKS); |
251 | DECLARE_PER_CPU(mce_banks_t, mce_poll_banks); | |
252 | ||
b79109c3 | 253 | enum mcp_flags { |
3f2f0680 BP |
254 | MCP_TIMESTAMP = BIT(0), /* log time stamp */ |
255 | MCP_UC = BIT(1), /* log uncorrected errors */ | |
256 | MCP_DONTLOG = BIT(2), /* only clear, don't log */ | |
3bff147b | 257 | MCP_QUEUE_LOG = BIT(3), /* only queue to genpool */ |
b79109c3 | 258 | }; |
3f2f0680 | 259 | bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b); |
b79109c3 | 260 | |
9ff36ee9 | 261 | int mce_notify_irq(void); |
e2f43029 | 262 | |
ea149b36 | 263 | DECLARE_PER_CPU(struct mce, injectm); |
66f5ddf3 | 264 | |
c3d1fb56 NR |
265 | /* Disable CMCI/polling for MCA bank claimed by firmware */ |
266 | extern void mce_disable_bank(int bank); | |
267 | ||
58995d2d HS |
268 | /* |
269 | * Exception handler | |
270 | */ | |
8cd501c1 | 271 | void do_machine_check(struct pt_regs *pt_regs); |
58995d2d HS |
272 | |
273 | /* | |
274 | * Threshold handler | |
275 | */ | |
b2762686 AK |
276 | extern void (*mce_threshold_vector)(void); |
277 | ||
24fd78a8 AG |
278 | /* Deferred error interrupt handler */ |
279 | extern void (*deferred_error_int_vector)(void); | |
280 | ||
d334a491 HY |
281 | /* |
282 | * Used by APEI to report memory error via /dev/mcelog | |
283 | */ | |
284 | ||
285 | struct cper_sec_mem_err; | |
286 | extern void apei_mce_report_mem_error(int corrected, | |
287 | struct cper_sec_mem_err *mem_err); | |
288 | ||
be0aec23 AG |
289 | /* |
290 | * Enumerate new IP types and HWID values in AMD processors which support | |
291 | * Scalable MCA. | |
292 | */ | |
293 | #ifdef CONFIG_X86_MCE_AMD | |
be0aec23 | 294 | |
5896820e YG |
295 | /* These may be used by multiple smca_hwid_mcatypes */ |
296 | enum smca_bank_types { | |
be0aec23 | 297 | SMCA_LS = 0, /* Load Store */ |
94a311ce | 298 | SMCA_LS_V2, |
be0aec23 | 299 | SMCA_IF, /* Instruction Fetch */ |
5896820e YG |
300 | SMCA_L2_CACHE, /* L2 Cache */ |
301 | SMCA_DE, /* Decoder Unit */ | |
68627a69 | 302 | SMCA_RESERVED, /* Reserved */ |
5896820e | 303 | SMCA_EX, /* Execution Unit */ |
be0aec23 | 304 | SMCA_FP, /* Floating Point */ |
5896820e YG |
305 | SMCA_L3_CACHE, /* L3 Cache */ |
306 | SMCA_CS, /* Coherent Slave */ | |
94a311ce | 307 | SMCA_CS_V2, |
5896820e YG |
308 | SMCA_PIE, /* Power, Interrupts, etc. */ |
309 | SMCA_UMC, /* Unified Memory Controller */ | |
94a311ce | 310 | SMCA_UMC_V2, |
5896820e YG |
311 | SMCA_PB, /* Parameter Block */ |
312 | SMCA_PSP, /* Platform Security Processor */ | |
94a311ce | 313 | SMCA_PSP_V2, |
5896820e | 314 | SMCA_SMU, /* System Management Unit */ |
94a311ce | 315 | SMCA_SMU_V2, |
cbfa447e | 316 | SMCA_MP5, /* Microprocessor 5 Unit */ |
5176a93a | 317 | SMCA_MPDMA, /* MPDMA Unit */ |
cbfa447e YG |
318 | SMCA_NBIO, /* Northbridge IO Unit */ |
319 | SMCA_PCIE, /* PCI Express Unit */ | |
94a311ce M |
320 | SMCA_PCIE_V2, |
321 | SMCA_XGMI_PCS, /* xGMI PCS Unit */ | |
5176a93a YG |
322 | SMCA_NBIF, /* NBIF Unit */ |
323 | SMCA_SHUB, /* System HUB Unit */ | |
324 | SMCA_SATA, /* SATA Unit */ | |
325 | SMCA_USB, /* USB Unit */ | |
326 | SMCA_GMI_PCS, /* GMI PCS Unit */ | |
94a311ce M |
327 | SMCA_XGMI_PHY, /* xGMI PHY Unit */ |
328 | SMCA_WAFL_PHY, /* WAFL PHY Unit */ | |
5176a93a | 329 | SMCA_GMI_PHY, /* GMI PHY Unit */ |
5896820e YG |
330 | N_SMCA_BANK_TYPES |
331 | }; | |
332 | ||
c09a8c40 | 333 | extern const char *smca_get_long_name(enum smca_bank_types t); |
c6708d50 | 334 | extern bool amd_mce_is_memory_error(struct mce *m); |
e71c3978 | 335 | |
4d7b02d5 SAS |
336 | extern int mce_threshold_create_device(unsigned int cpu); |
337 | extern int mce_threshold_remove_device(unsigned int cpu); | |
e71c3978 | 338 | |
9308fd40 | 339 | void mce_amd_feature_init(struct cpuinfo_x86 *c); |
91f75eb4 | 340 | enum smca_bank_types smca_get_bank_type(unsigned int cpu, unsigned int bank); |
9308fd40 | 341 | #else |
e71c3978 | 342 | |
9308fd40 YG |
343 | static inline int mce_threshold_create_device(unsigned int cpu) { return 0; }; |
344 | static inline int mce_threshold_remove_device(unsigned int cpu) { return 0; }; | |
345 | static inline bool amd_mce_is_memory_error(struct mce *m) { return false; }; | |
346 | static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { } | |
be0aec23 AG |
347 | #endif |
348 | ||
9308fd40 | 349 | static inline void mce_hygon_feature_init(struct cpuinfo_x86 *c) { return mce_amd_feature_init(c); } |
1965aae3 | 350 | #endif /* _ASM_X86_MCE_H */ |