x86/cpu/amd: Provide a separate accessor for Node ID
[linux-2.6-block.git] / drivers / edac / mce_amd.c
CommitLineData
09c434b8 1// SPDX-License-Identifier: GPL-2.0-only
b70ef010 2#include <linux/module.h>
888ab8e6
BP
3#include <linux/slab.h>
4
f3c0891c
BP
5#include <asm/cpu.h>
6
47ca08a4 7#include "mce_amd.h"
b52401ce 8
86e9f9d6 9static struct amd_decoder_ops fam_ops;
888ab8e6 10
2be64bfa 11static u8 xec_mask = 0xf;
5ce88f6e 12
5c332202 13static void (*decode_dram_ecc)(int node_id, struct mce *m);
549d042d 14
b0b07a2b 15void amd_register_ecc_decoder(void (*f)(int, struct mce *))
549d042d 16{
5c332202 17 decode_dram_ecc = f;
549d042d
BP
18}
19EXPORT_SYMBOL_GPL(amd_register_ecc_decoder);
20
b0b07a2b 21void amd_unregister_ecc_decoder(void (*f)(int, struct mce *))
549d042d 22{
5c332202
YG
23 if (decode_dram_ecc) {
24 WARN_ON(decode_dram_ecc != f);
549d042d 25
5c332202 26 decode_dram_ecc = NULL;
549d042d
BP
27 }
28}
29EXPORT_SYMBOL_GPL(amd_unregister_ecc_decoder);
30
b52401ce
DT
31/*
32 * string representation for the different MCA reported error types, see F3x48
33 * or MSR0000_0411.
34 */
6337583d
BP
35
36/* transaction type */
0f08669e 37static const char * const tt_msgs[] = { "INSN", "DATA", "GEN", "RESV" };
b52401ce 38
6337583d 39/* cache level */
0f08669e 40static const char * const ll_msgs[] = { "RESV", "L1", "L2", "L3/GEN" };
b52401ce 41
6337583d 42/* memory transaction type */
0f08669e 43static const char * const rrrr_msgs[] = {
6337583d 44 "GEN", "RD", "WR", "DRD", "DWR", "IRD", "PRF", "EV", "SNP"
b52401ce
DT
45};
46
6337583d 47/* participating processor */
ebe2aea8 48const char * const pp_msgs[] = { "SRC", "RES", "OBS", "GEN" };
b70ef010 49EXPORT_SYMBOL_GPL(pp_msgs);
b52401ce 50
6337583d 51/* request timeout */
0f08669e 52static const char * const to_msgs[] = { "no timeout", "timed out" };
b52401ce 53
6337583d 54/* memory or i/o */
0f08669e 55static const char * const ii_msgs[] = { "MEM", "RESV", "IO", "GEN" };
b52401ce 56
980eec8b 57/* internal error type */
0f08669e 58static const char * const uu_msgs[] = { "RESV", "RESV", "HWA", "RESV" };
980eec8b 59
f05c41a9 60static const char * const f15h_mc1_mce_desc[] = {
86039cd4
BP
61 "UC during a demand linefill from L2",
62 "Parity error during data load from IC",
63 "Parity error for IC valid bit",
64 "Main tag parity error",
65 "Parity error in prediction queue",
66 "PFB data/address parity error",
67 "Parity error in the branch status reg",
68 "PFB promotion address error",
69 "Tag error during probe/victimization",
70 "Parity error for IC probe tag valid bit",
71 "PFB non-cacheable bit parity error",
72 "PFB valid bit parity error", /* xec = 0xd */
6c1173a6 73 "Microcode Patch Buffer", /* xec = 010 */
86039cd4
BP
74 "uop queue",
75 "insn buffer",
76 "predecode buffer",
eba4bfb3
AG
77 "fetch address FIFO",
78 "dispatch uop queue"
86039cd4
BP
79};
80
f05c41a9 81static const char * const f15h_mc2_mce_desc[] = {
70fdb494
BP
82 "Fill ECC error on data fills", /* xec = 0x4 */
83 "Fill parity error on insn fills",
84 "Prefetcher request FIFO parity error",
85 "PRQ address parity error",
86 "PRQ data parity error",
87 "WCC Tag ECC error",
88 "WCC Data ECC error",
89 "WCB Data parity error",
b64a99c1 90 "VB Data ECC or parity error",
70fdb494
BP
91 "L2 Tag ECC error", /* xec = 0x10 */
92 "Hard L2 Tag ECC error",
93 "Multiple hits on L2 tag",
94 "XAB parity error",
95 "PRB address parity error"
96};
97
f05c41a9 98static const char * const mc4_mce_desc[] = {
68782673
BP
99 "DRAM ECC error detected on the NB",
100 "CRC error detected on HT link",
101 "Link-defined sync error packets detected on HT link",
102 "HT Master abort",
103 "HT Target abort",
104 "Invalid GART PTE entry during GART table walk",
105 "Unsupported atomic RMW received from an IO link",
106 "Watchdog timeout due to lack of progress",
107 "DRAM ECC error detected on the NB",
108 "SVM DMA Exclusion Vector error",
109 "HT data error detected on link",
110 "Protocol error (link, L3, probe filter)",
111 "NB internal arrays parity error",
112 "DRAM addr/ctl signals parity error",
113 "IO link transmission error",
114 "L3 data cache ECC error", /* xec = 0x1c */
115 "L3 cache tag error",
116 "L3 LRU parity bits error",
117 "ECC Error in the Probe Filter directory"
118};
119
f05c41a9 120static const char * const mc5_mce_desc[] = {
8259a7e5
BP
121 "CPU Watchdog timer expire",
122 "Wakeup array dest tag",
123 "AG payload array",
124 "EX payload array",
125 "IDRF array",
126 "Retire dispatch queue",
127 "Mapper checkpoint array",
128 "Physical register file EX0 port",
129 "Physical register file EX1 port",
130 "Physical register file AG0 port",
131 "Physical register file AG1 port",
132 "Flag register file",
aad19e51
AG
133 "DE error occurred",
134 "Retire status queue"
8259a7e5
BP
135};
136
bc4febe9
AG
137static const char * const mc6_mce_desc[] = {
138 "Hardware Assertion",
139 "Free List",
140 "Physical Register File",
141 "Retire Queue",
142 "Scheduler table",
143 "Status Register File",
144};
145
f05c41a9 146static bool f12h_mc0_mce(u16 ec, u8 xec)
51966241 147{
888ab8e6 148 bool ret = false;
51966241 149
888ab8e6 150 if (MEM_ERROR(ec)) {
62452882 151 u8 ll = LL(ec);
888ab8e6 152 ret = true;
51966241 153
888ab8e6
BP
154 if (ll == LL_L2)
155 pr_cont("during L1 linefill from L2.\n");
156 else if (ll == LL_L1)
62452882 157 pr_cont("Data/Tag %s error.\n", R4_MSG(ec));
888ab8e6
BP
158 else
159 ret = false;
160 }
161 return ret;
162}
51966241 163
f05c41a9 164static bool f10h_mc0_mce(u16 ec, u8 xec)
9be0bb10 165{
62452882 166 if (R4(ec) == R4_GEN && LL(ec) == LL_L1) {
9be0bb10
BP
167 pr_cont("during data scrub.\n");
168 return true;
169 }
f05c41a9 170 return f12h_mc0_mce(ec, xec);
9be0bb10
BP
171}
172
f05c41a9 173static bool k8_mc0_mce(u16 ec, u8 xec)
888ab8e6
BP
174{
175 if (BUS_ERROR(ec)) {
176 pr_cont("during system linefill.\n");
177 return true;
178 }
51966241 179
f05c41a9 180 return f10h_mc0_mce(ec, xec);
888ab8e6
BP
181}
182
980eec8b 183static bool cat_mc0_mce(u16 ec, u8 xec)
888ab8e6 184{
62452882 185 u8 r4 = R4(ec);
888ab8e6
BP
186 bool ret = true;
187
188 if (MEM_ERROR(ec)) {
189
62452882 190 if (TT(ec) != TT_DATA || LL(ec) != LL_L1)
888ab8e6
BP
191 return false;
192
193 switch (r4) {
194 case R4_DRD:
195 case R4_DWR:
196 pr_cont("Data/Tag parity error due to %s.\n",
197 (r4 == R4_DRD ? "load/hw prf" : "store"));
198 break;
199 case R4_EVICT:
200 pr_cont("Copyback parity error on a tag miss.\n");
201 break;
202 case R4_SNOOP:
203 pr_cont("Tag parity error during snoop.\n");
204 break;
205 default:
206 ret = false;
207 }
208 } else if (BUS_ERROR(ec)) {
209
62452882 210 if ((II(ec) != II_MEM && II(ec) != II_IO) || LL(ec) != LL_LG)
888ab8e6
BP
211 return false;
212
213 pr_cont("System read data error on a ");
214
215 switch (r4) {
216 case R4_RD:
217 pr_cont("TLB reload.\n");
218 break;
219 case R4_DWR:
220 pr_cont("store.\n");
221 break;
222 case R4_DRD:
223 pr_cont("load.\n");
224 break;
225 default:
226 ret = false;
227 }
228 } else {
229 ret = false;
230 }
231
232 return ret;
233}
234
f05c41a9 235static bool f15h_mc0_mce(u16 ec, u8 xec)
25a4f8b0
BP
236{
237 bool ret = true;
238
239 if (MEM_ERROR(ec)) {
240
241 switch (xec) {
242 case 0x0:
243 pr_cont("Data Array access error.\n");
244 break;
245
246 case 0x1:
247 pr_cont("UC error during a linefill from L2/NB.\n");
248 break;
249
250 case 0x2:
251 case 0x11:
252 pr_cont("STQ access error.\n");
253 break;
254
255 case 0x3:
256 pr_cont("SCB access error.\n");
257 break;
258
259 case 0x10:
260 pr_cont("Tag error.\n");
261 break;
262
263 case 0x12:
264 pr_cont("LDQ access error.\n");
265 break;
266
267 default:
268 ret = false;
269 }
270 } else if (BUS_ERROR(ec)) {
271
272 if (!xec)
344f0a06 273 pr_cont("System Read Data Error.\n");
25a4f8b0 274 else
344f0a06 275 pr_cont(" Internal error condition type %d.\n", xec);
eba4bfb3
AG
276 } else if (INT_ERROR(ec)) {
277 if (xec <= 0x1f)
278 pr_cont("Hardware Assert.\n");
279 else
280 ret = false;
281
25a4f8b0
BP
282 } else
283 ret = false;
284
285 return ret;
286}
287
f05c41a9 288static void decode_mc0_mce(struct mce *m)
888ab8e6 289{
62452882
BP
290 u16 ec = EC(m->status);
291 u8 xec = XEC(m->status, xec_mask);
888ab8e6 292
f05c41a9 293 pr_emerg(HW_ERR "MC0 Error: ");
888ab8e6
BP
294
295 /* TLB error signatures are the same across families */
296 if (TLB_ERROR(ec)) {
62452882 297 if (TT(ec) == TT_DATA) {
888ab8e6 298 pr_cont("%s TLB %s.\n", LL_MSG(ec),
25a4f8b0
BP
299 ((xec == 2) ? "locked miss"
300 : (xec ? "multimatch" : "parity")));
888ab8e6
BP
301 return;
302 }
86e9f9d6 303 } else if (fam_ops.mc0_mce(ec, xec))
25a4f8b0
BP
304 ;
305 else
f05c41a9 306 pr_emerg(HW_ERR "Corrupted MC0 MCE info?\n");
51966241
BP
307}
308
f05c41a9 309static bool k8_mc1_mce(u16 ec, u8 xec)
ab5535e7 310{
62452882 311 u8 ll = LL(ec);
dd53bce4 312 bool ret = true;
ab5535e7 313
dd53bce4
BP
314 if (!MEM_ERROR(ec))
315 return false;
ab5535e7 316
dd53bce4
BP
317 if (ll == 0x2)
318 pr_cont("during a linefill from L2.\n");
319 else if (ll == 0x1) {
62452882 320 switch (R4(ec)) {
dd53bce4
BP
321 case R4_IRD:
322 pr_cont("Parity error during data load.\n");
323 break;
ab5535e7 324
dd53bce4
BP
325 case R4_EVICT:
326 pr_cont("Copyback Parity/Victim error.\n");
327 break;
328
329 case R4_SNOOP:
330 pr_cont("Tag Snoop error.\n");
331 break;
332
333 default:
334 ret = false;
335 break;
336 }
ab5535e7 337 } else
dd53bce4 338 ret = false;
ab5535e7 339
dd53bce4
BP
340 return ret;
341}
342
980eec8b 343static bool cat_mc1_mce(u16 ec, u8 xec)
dd53bce4 344{
62452882 345 u8 r4 = R4(ec);
dd53bce4 346 bool ret = true;
ab5535e7 347
980eec8b
JS
348 if (!MEM_ERROR(ec))
349 return false;
350
351 if (TT(ec) != TT_INSTR)
352 return false;
353
354 if (r4 == R4_IRD)
355 pr_cont("Data/tag array parity error for a tag hit.\n");
356 else if (r4 == R4_SNOOP)
357 pr_cont("Tag error during snoop/victimization.\n");
358 else if (xec == 0x0)
359 pr_cont("Tag parity error from victim castout.\n");
360 else if (xec == 0x2)
361 pr_cont("Microcode patch RAM parity error.\n");
362 else
363 ret = false;
dd53bce4 364
dd53bce4
BP
365 return ret;
366}
367
f05c41a9 368static bool f15h_mc1_mce(u16 ec, u8 xec)
86039cd4
BP
369{
370 bool ret = true;
371
372 if (!MEM_ERROR(ec))
373 return false;
374
375 switch (xec) {
376 case 0x0 ... 0xa:
f05c41a9 377 pr_cont("%s.\n", f15h_mc1_mce_desc[xec]);
86039cd4
BP
378 break;
379
380 case 0xd:
f05c41a9 381 pr_cont("%s.\n", f15h_mc1_mce_desc[xec-2]);
86039cd4
BP
382 break;
383
6c1173a6 384 case 0x10:
f05c41a9 385 pr_cont("%s.\n", f15h_mc1_mce_desc[xec-4]);
6c1173a6
BP
386 break;
387
eba4bfb3 388 case 0x11 ... 0x15:
f05c41a9 389 pr_cont("Decoder %s parity error.\n", f15h_mc1_mce_desc[xec-4]);
86039cd4
BP
390 break;
391
392 default:
393 ret = false;
394 }
395 return ret;
396}
397
f05c41a9 398static void decode_mc1_mce(struct mce *m)
dd53bce4 399{
62452882
BP
400 u16 ec = EC(m->status);
401 u8 xec = XEC(m->status, xec_mask);
dd53bce4 402
f05c41a9 403 pr_emerg(HW_ERR "MC1 Error: ");
dd53bce4
BP
404
405 if (TLB_ERROR(ec))
406 pr_cont("%s TLB %s.\n", LL_MSG(ec),
407 (xec ? "multimatch" : "parity error"));
408 else if (BUS_ERROR(ec)) {
525906bc 409 bool k8 = (boot_cpu_data.x86 == 0xf && (m->status & BIT_64(58)));
dd53bce4
BP
410
411 pr_cont("during %s.\n", (k8 ? "system linefill" : "NB data read"));
eba4bfb3
AG
412 } else if (INT_ERROR(ec)) {
413 if (xec <= 0x3f)
414 pr_cont("Hardware Assert.\n");
415 else
416 goto wrong_mc1_mce;
86e9f9d6 417 } else if (fam_ops.mc1_mce(ec, xec))
dd53bce4
BP
418 ;
419 else
eba4bfb3
AG
420 goto wrong_mc1_mce;
421
422 return;
423
424wrong_mc1_mce:
425 pr_emerg(HW_ERR "Corrupted MC1 MCE info?\n");
ab5535e7
BP
426}
427
4a73d3de 428static bool k8_mc2_mce(u16 ec, u8 xec)
56cad2d6 429{
4a73d3de 430 bool ret = true;
56cad2d6
BP
431
432 if (xec == 0x1)
433 pr_cont(" in the write data buffers.\n");
434 else if (xec == 0x3)
435 pr_cont(" in the victim data buffers.\n");
436 else if (xec == 0x2 && MEM_ERROR(ec))
62452882 437 pr_cont(": %s error in the L2 cache tags.\n", R4_MSG(ec));
56cad2d6
BP
438 else if (xec == 0x0) {
439 if (TLB_ERROR(ec))
50872ccd
BP
440 pr_cont("%s error in a Page Descriptor Cache or Guest TLB.\n",
441 TT_MSG(ec));
56cad2d6
BP
442 else if (BUS_ERROR(ec))
443 pr_cont(": %s/ECC error in data read from NB: %s.\n",
62452882 444 R4_MSG(ec), PP_MSG(ec));
56cad2d6 445 else if (MEM_ERROR(ec)) {
62452882 446 u8 r4 = R4(ec);
56cad2d6 447
62452882 448 if (r4 >= 0x7)
56cad2d6 449 pr_cont(": %s error during data copyback.\n",
62452882
BP
450 R4_MSG(ec));
451 else if (r4 <= 0x1)
56cad2d6 452 pr_cont(": %s parity/ECC error during data "
62452882 453 "access from L2.\n", R4_MSG(ec));
56cad2d6 454 else
4a73d3de 455 ret = false;
56cad2d6 456 } else
4a73d3de 457 ret = false;
56cad2d6 458 } else
4a73d3de 459 ret = false;
56cad2d6 460
4a73d3de 461 return ret;
56cad2d6
BP
462}
463
4a73d3de 464static bool f15h_mc2_mce(u16 ec, u8 xec)
70fdb494 465{
4a73d3de 466 bool ret = true;
70fdb494
BP
467
468 if (TLB_ERROR(ec)) {
469 if (xec == 0x0)
470 pr_cont("Data parity TLB read error.\n");
471 else if (xec == 0x1)
472 pr_cont("Poison data provided for TLB fill.\n");
473 else
4a73d3de 474 ret = false;
70fdb494
BP
475 } else if (BUS_ERROR(ec)) {
476 if (xec > 2)
4a73d3de 477 ret = false;
70fdb494
BP
478
479 pr_cont("Error during attempted NB data read.\n");
480 } else if (MEM_ERROR(ec)) {
481 switch (xec) {
482 case 0x4 ... 0xc:
f05c41a9 483 pr_cont("%s.\n", f15h_mc2_mce_desc[xec - 0x4]);
70fdb494
BP
484 break;
485
486 case 0x10 ... 0x14:
f05c41a9 487 pr_cont("%s.\n", f15h_mc2_mce_desc[xec - 0x7]);
70fdb494
BP
488 break;
489
490 default:
4a73d3de 491 ret = false;
70fdb494 492 }
eba4bfb3
AG
493 } else if (INT_ERROR(ec)) {
494 if (xec <= 0x3f)
495 pr_cont("Hardware Assert.\n");
496 else
497 ret = false;
70fdb494
BP
498 }
499
4a73d3de
JS
500 return ret;
501}
502
980eec8b
JS
503static bool f16h_mc2_mce(u16 ec, u8 xec)
504{
505 u8 r4 = R4(ec);
506
507 if (!MEM_ERROR(ec))
508 return false;
509
510 switch (xec) {
511 case 0x04 ... 0x05:
512 pr_cont("%cBUFF parity error.\n", (r4 == R4_RD) ? 'I' : 'O');
513 break;
514
515 case 0x09 ... 0x0b:
516 case 0x0d ... 0x0f:
517 pr_cont("ECC error in L2 tag (%s).\n",
518 ((r4 == R4_GEN) ? "BankReq" :
519 ((r4 == R4_SNOOP) ? "Prb" : "Fill")));
520 break;
521
522 case 0x10 ... 0x19:
523 case 0x1b:
524 pr_cont("ECC error in L2 data array (%s).\n",
525 (((r4 == R4_RD) && !(xec & 0x3)) ? "Hit" :
526 ((r4 == R4_GEN) ? "Attr" :
527 ((r4 == R4_EVICT) ? "Vict" : "Fill"))));
528 break;
529
530 case 0x1c ... 0x1d:
531 case 0x1f:
532 pr_cont("Parity error in L2 attribute bits (%s).\n",
533 ((r4 == R4_RD) ? "Hit" :
534 ((r4 == R4_GEN) ? "Attr" : "Fill")));
535 break;
536
537 default:
538 return false;
539 }
540
541 return true;
542}
543
4a73d3de
JS
544static void decode_mc2_mce(struct mce *m)
545{
546 u16 ec = EC(m->status);
547 u8 xec = XEC(m->status, xec_mask);
70fdb494 548
4a73d3de
JS
549 pr_emerg(HW_ERR "MC2 Error: ");
550
86e9f9d6 551 if (!fam_ops.mc2_mce(ec, xec))
4a73d3de 552 pr_cont(HW_ERR "Corrupted MC2 MCE info?\n");
70fdb494
BP
553}
554
f05c41a9 555static void decode_mc3_mce(struct mce *m)
f9350efd 556{
62452882
BP
557 u16 ec = EC(m->status);
558 u8 xec = XEC(m->status, xec_mask);
ded50623 559
b18434ca 560 if (boot_cpu_data.x86 >= 0x14) {
f05c41a9 561 pr_emerg("You shouldn't be seeing MC3 MCE on this cpu family,"
ded50623
BP
562 " please report on LKML.\n");
563 return;
564 }
f9350efd 565
f05c41a9 566 pr_emerg(HW_ERR "MC3 Error");
f9350efd
BP
567
568 if (xec == 0x0) {
62452882 569 u8 r4 = R4(ec);
f9350efd 570
ded50623 571 if (!BUS_ERROR(ec) || (r4 != R4_DRD && r4 != R4_DWR))
f05c41a9 572 goto wrong_mc3_mce;
f9350efd 573
62452882 574 pr_cont(" during %s.\n", R4_MSG(ec));
ded50623 575 } else
f05c41a9 576 goto wrong_mc3_mce;
ded50623 577
f9350efd
BP
578 return;
579
f05c41a9
BP
580 wrong_mc3_mce:
581 pr_emerg(HW_ERR "Corrupted MC3 MCE info?\n");
f9350efd
BP
582}
583
f05c41a9 584static void decode_mc4_mce(struct mce *m)
5ce88f6e 585{
f3c0891c 586 unsigned int fam = x86_family(m->cpuid);
7e3ec628 587 int node_id = topology_amd_node_id(m->extcpu);
68782673
BP
588 u16 ec = EC(m->status);
589 u8 xec = XEC(m->status, 0x1f);
590 u8 offset = 0;
5ce88f6e 591
f05c41a9 592 pr_emerg(HW_ERR "MC4 Error (node %d): ", node_id);
5ce88f6e 593
68782673
BP
594 switch (xec) {
595 case 0x0 ... 0xe:
5ce88f6e 596
68782673
BP
597 /* special handling for DRAM ECCs */
598 if (xec == 0x0 || xec == 0x8) {
599 /* no ECCs on F11h */
f3c0891c 600 if (fam == 0x11)
f05c41a9 601 goto wrong_mc4_mce;
5ce88f6e 602
f05c41a9 603 pr_cont("%s.\n", mc4_mce_desc[xec]);
5ce88f6e 604
5c332202
YG
605 if (decode_dram_ecc)
606 decode_dram_ecc(node_id, m);
68782673
BP
607 return;
608 }
5ce88f6e
BP
609 break;
610
611 case 0xf:
612 if (TLB_ERROR(ec))
613 pr_cont("GART Table Walk data error.\n");
614 else if (BUS_ERROR(ec))
615 pr_cont("DMA Exclusion Vector Table Walk error.\n");
616 else
f05c41a9 617 goto wrong_mc4_mce;
68782673 618 return;
5ce88f6e 619
05cd667d 620 case 0x19:
f3c0891c 621 if (fam == 0x15 || fam == 0x16)
05cd667d
BP
622 pr_cont("Compute Unit Data Error.\n");
623 else
f05c41a9 624 goto wrong_mc4_mce;
68782673 625 return;
05cd667d 626
5ce88f6e 627 case 0x1c ... 0x1f:
68782673 628 offset = 13;
5ce88f6e
BP
629 break;
630
631 default:
f05c41a9 632 goto wrong_mc4_mce;
68782673 633 }
5ce88f6e 634
f05c41a9 635 pr_cont("%s.\n", mc4_mce_desc[xec - offset]);
5ce88f6e
BP
636 return;
637
f05c41a9
BP
638 wrong_mc4_mce:
639 pr_emerg(HW_ERR "Corrupted MC4 MCE info?\n");
d93cc222 640}
d93cc222 641
f05c41a9 642static void decode_mc5_mce(struct mce *m)
53bd5fed 643{
f3c0891c 644 unsigned int fam = x86_family(m->cpuid);
eba4bfb3 645 u16 ec = EC(m->status);
62452882 646 u8 xec = XEC(m->status, xec_mask);
8259a7e5 647
f3c0891c 648 if (fam == 0xf || fam == 0x11)
f05c41a9 649 goto wrong_mc5_mce;
fe4ea262 650
f05c41a9 651 pr_emerg(HW_ERR "MC5 Error: ");
8259a7e5 652
eba4bfb3
AG
653 if (INT_ERROR(ec)) {
654 if (xec <= 0x1f) {
655 pr_cont("Hardware Assert.\n");
656 return;
657 } else
658 goto wrong_mc5_mce;
659 }
660
8259a7e5 661 if (xec == 0x0 || xec == 0xc)
f05c41a9 662 pr_cont("%s.\n", mc5_mce_desc[xec]);
aad19e51 663 else if (xec <= 0xd)
f05c41a9 664 pr_cont("%s parity error.\n", mc5_mce_desc[xec]);
8259a7e5 665 else
f05c41a9 666 goto wrong_mc5_mce;
8259a7e5
BP
667
668 return;
fe4ea262 669
f05c41a9
BP
670 wrong_mc5_mce:
671 pr_emerg(HW_ERR "Corrupted MC5 MCE info?\n");
53bd5fed
BP
672}
673
f05c41a9 674static void decode_mc6_mce(struct mce *m)
b8f85c47 675{
62452882 676 u8 xec = XEC(m->status, xec_mask);
b8f85c47 677
f05c41a9 678 pr_emerg(HW_ERR "MC6 Error: ");
b8f85c47 679
bc4febe9 680 if (xec > 0x5)
f05c41a9 681 goto wrong_mc6_mce;
b8f85c47 682
bc4febe9 683 pr_cont("%s parity error.\n", mc6_mce_desc[xec]);
b8f85c47
BP
684 return;
685
f05c41a9
BP
686 wrong_mc6_mce:
687 pr_emerg(HW_ERR "Corrupted MC6 MCE info?\n");
b8f85c47
BP
688}
689
ff03ff32
YG
690static const char * const smca_long_names[] = {
691 [SMCA_LS ... SMCA_LS_V2] = "Load Store Unit",
692 [SMCA_IF] = "Instruction Fetch Unit",
693 [SMCA_L2_CACHE] = "L2 Cache",
694 [SMCA_DE] = "Decode Unit",
695 [SMCA_RESERVED] = "Reserved",
696 [SMCA_EX] = "Execution Unit",
697 [SMCA_FP] = "Floating Point Unit",
698 [SMCA_L3_CACHE] = "L3 Cache",
699 [SMCA_CS ... SMCA_CS_V2] = "Coherent Slave",
700 [SMCA_PIE] = "Power, Interrupts, etc.",
701
702 /* UMC v2 is separate because both of them can exist in a single system. */
703 [SMCA_UMC] = "Unified Memory Controller",
704 [SMCA_UMC_V2] = "Unified Memory Controller v2",
705 [SMCA_PB] = "Parameter Block",
706 [SMCA_PSP ... SMCA_PSP_V2] = "Platform Security Processor",
707 [SMCA_SMU ... SMCA_SMU_V2] = "System Management Unit",
708 [SMCA_MP5] = "Microprocessor 5 Unit",
709 [SMCA_MPDMA] = "MPDMA Unit",
710 [SMCA_NBIO] = "Northbridge IO Unit",
711 [SMCA_PCIE ... SMCA_PCIE_V2] = "PCI Express Unit",
712 [SMCA_XGMI_PCS] = "Ext Global Memory Interconnect PCS Unit",
713 [SMCA_NBIF] = "NBIF Unit",
714 [SMCA_SHUB] = "System Hub Unit",
715 [SMCA_SATA] = "SATA Unit",
716 [SMCA_USB] = "USB Unit",
717 [SMCA_GMI_PCS] = "Global Memory Interconnect PCS Unit",
718 [SMCA_XGMI_PHY] = "Ext Global Memory Interconnect PHY Unit",
719 [SMCA_WAFL_PHY] = "WAFL PHY Unit",
720 [SMCA_GMI_PHY] = "Global Memory Interconnect PHY Unit",
721};
722
723static const char *smca_get_long_name(enum smca_bank_types t)
724{
725 if (t >= N_SMCA_BANK_TYPES)
726 return NULL;
727
728 return smca_long_names[t];
729}
730
be0aec23 731/* Decode errors according to Scalable MCA specification */
4ab1784b 732static void decode_smca_error(struct mce *m)
be0aec23 733{
91f75eb4 734 enum smca_bank_types bank_type = smca_get_bank_type(m->extcpu, m->bank);
5896820e 735 u8 xec = XEC(m->status, xec_mask);
be0aec23 736
91f75eb4 737 if (bank_type >= N_SMCA_BANK_TYPES)
be0aec23 738 return;
be0aec23 739
68627a69
YG
740 if (bank_type == SMCA_RESERVED) {
741 pr_emerg(HW_ERR "Bank %d is reserved.\n", m->bank);
742 return;
743 }
744
ff03ff32 745 pr_emerg(HW_ERR "%s Ext. Error Code: %d", smca_get_long_name(bank_type), xec);
be0aec23 746
c35977b0
YG
747 if ((bank_type == SMCA_UMC || bank_type == SMCA_UMC_V2) &&
748 xec == 0 && decode_dram_ecc)
7e3ec628 749 decode_dram_ecc(topology_amd_node_id(m->extcpu), m);
be0aec23
AG
750}
751
6337583d 752static inline void amd_decode_err_code(u16 ec)
d93cc222 753{
980eec8b
JS
754 if (INT_ERROR(ec)) {
755 pr_emerg(HW_ERR "internal: %s\n", UU_MSG(ec));
756 return;
757 }
fa7ae8cc
BP
758
759 pr_emerg(HW_ERR "cache level: %s", LL_MSG(ec));
760
761 if (BUS_ERROR(ec))
762 pr_cont(", mem/io: %s", II_MSG(ec));
763 else
764 pr_cont(", tx: %s", TT_MSG(ec));
765
766 if (MEM_ERROR(ec) || BUS_ERROR(ec)) {
767 pr_cont(", mem-tx: %s", R4_MSG(ec));
768
769 if (BUS_ERROR(ec))
770 pr_cont(", part-proc: %s (%s)", PP_MSG(ec), TO_MSG(ec));
771 }
772
773 pr_cont("\n");
549d042d 774}
549d042d 775
d5c6770d
BP
776static const char *decode_error_status(struct mce *m)
777{
778 if (m->status & MCI_STATUS_UC) {
779 if (m->status & MCI_STATUS_PCC)
780 return "System Fatal error.";
781 if (m->mcgstatus & MCG_STATUS_RIPV)
782 return "Uncorrected, software restartable error.";
783 return "Uncorrected, software containable error.";
784 }
785
786 if (m->status & MCI_STATUS_DEFERRED)
67d7fd30 787 return "Deferred error, no action required.";
d5c6770d
BP
788
789 return "Corrected error, no action required.";
790}
791
1fbcd909
BP
792static int
793amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
549d042d 794{
fb253195 795 struct mce *m = (struct mce *)data;
f3c0891c 796 unsigned int fam = x86_family(m->cpuid);
b0b07a2b 797 int ecc;
549d042d 798
23ba710a
TL
799 if (m->kflags & MCE_HANDLED_CEC)
800 return NOTIFY_DONE;
801
fd0f5fff
BP
802 pr_emerg(HW_ERR "%s\n", decode_error_status(m));
803
804 pr_emerg(HW_ERR "CPU:%d (%x:%x:%x) MC%d_STATUS[%s|%s|%s|%s|%s",
805 m->extcpu,
f3c0891c 806 fam, x86_model(m->cpuid), x86_stepping(m->cpuid),
fd0f5fff
BP
807 m->bank,
808 ((m->status & MCI_STATUS_OVER) ? "Over" : "-"),
99e1dfb7
AG
809 ((m->status & MCI_STATUS_UC) ? "UE" :
810 (m->status & MCI_STATUS_DEFERRED) ? "-" : "CE"),
fd0f5fff 811 ((m->status & MCI_STATUS_MISCV) ? "MiscV" : "-"),
a0bcd3c0
YG
812 ((m->status & MCI_STATUS_ADDRV) ? "AddrV" : "-"),
813 ((m->status & MCI_STATUS_PCC) ? "PCC" : "-"));
fd0f5fff 814
a348ed83 815 if (boot_cpu_has(X86_FEATURE_SMCA)) {
be0aec23
AG
816 u32 low, high;
817 u32 addr = MSR_AMD64_SMCA_MCx_CONFIG(m->bank);
818
819 if (!rdmsr_safe(addr, &low, &high) &&
820 (low & MCI_CONFIG_MCAX))
821 pr_cont("|%s", ((m->status & MCI_STATUS_TCC) ? "TCC" : "-"));
a0bcd3c0
YG
822
823 pr_cont("|%s", ((m->status & MCI_STATUS_SYNDV) ? "SyndV" : "-"));
be0aec23
AG
824 }
825
fd0f5fff
BP
826 /* do the two bits[14:13] together */
827 ecc = (m->status >> 45) & 0x3;
828 if (ecc)
829 pr_cont("|%sECC", ((ecc == 2) ? "C" : "U"));
830
a0bcd3c0
YG
831 if (fam >= 0x15) {
832 pr_cont("|%s", (m->status & MCI_STATUS_DEFERRED ? "Deferred" : "-"));
833
834 /* F15h, bank4, bit 43 is part of McaStatSubCache. */
835 if (fam != 0x15 || m->bank != 4)
836 pr_cont("|%s", (m->status & MCI_STATUS_POISON ? "Poison" : "-"));
837 }
838
3f4da372
YG
839 if (fam >= 0x17)
840 pr_cont("|%s", (m->status & MCI_STATUS_SCRUB ? "Scrub" : "-"));
841
fd0f5fff
BP
842 pr_cont("]: 0x%016llx\n", m->status);
843
844 if (m->status & MCI_STATUS_ADDRV)
75bf2f64 845 pr_emerg(HW_ERR "Error Addr: 0x%016llx\n", m->addr);
fd0f5fff 846
bb2de0ad
SK
847 if (m->ppin)
848 pr_emerg(HW_ERR "PPIN: 0x%016llx\n", m->ppin);
849
a348ed83 850 if (boot_cpu_has(X86_FEATURE_SMCA)) {
75bf2f64
YG
851 pr_emerg(HW_ERR "IPID: 0x%016llx", m->ipid);
852
b300e873
YG
853 if (m->status & MCI_STATUS_SYNDV)
854 pr_cont(", Syndrome: 0x%016llx", m->synd);
855
856 pr_cont("\n");
857
4ab1784b 858 decode_smca_error(m);
be0aec23 859 goto err_code;
75bf2f64 860 }
be0aec23 861
0bceab67
BP
862 if (m->tsc)
863 pr_emerg(HW_ERR "TSC: %llu\n", m->tsc);
864
86e9f9d6
BP
865 /* Doesn't matter which member to test. */
866 if (!fam_ops.mc0_mce)
fd0f5fff
BP
867 goto err_code;
868
51966241
BP
869 switch (m->bank) {
870 case 0:
f05c41a9 871 decode_mc0_mce(m);
51966241 872 break;
d93cc222 873
ab5535e7 874 case 1:
f05c41a9 875 decode_mc1_mce(m);
ab5535e7
BP
876 break;
877
56cad2d6 878 case 2:
4a73d3de 879 decode_mc2_mce(m);
56cad2d6
BP
880 break;
881
f9350efd 882 case 3:
f05c41a9 883 decode_mc3_mce(m);
f9350efd
BP
884 break;
885
51966241 886 case 4:
f05c41a9 887 decode_mc4_mce(m);
51966241
BP
888 break;
889
53bd5fed 890 case 5:
f05c41a9 891 decode_mc5_mce(m);
53bd5fed
BP
892 break;
893
b8f85c47 894 case 6:
f05c41a9 895 decode_mc6_mce(m);
b8f85c47
BP
896 break;
897
51966241
BP
898 default:
899 break;
b69b29de 900 }
51966241 901
fd0f5fff 902 err_code:
51966241 903 amd_decode_err_code(m->status & 0xffff);
fb253195 904
23ba710a
TL
905 m->kflags |= MCE_HANDLED_EDAC;
906 return NOTIFY_OK;
549d042d 907}
f436f8bb 908
fb253195
BP
909static struct notifier_block amd_mce_dec_nb = {
910 .notifier_call = amd_decode_mce,
9026cc82 911 .priority = MCE_PRIO_EDAC,
fb253195
BP
912};
913
f436f8bb
IM
914static int __init mce_amd_init(void)
915{
bad11e03
BP
916 struct cpuinfo_x86 *c = &boot_cpu_data;
917
c4a3e946
PW
918 if (c->x86_vendor != X86_VENDOR_AMD &&
919 c->x86_vendor != X86_VENDOR_HYGON)
fd0f5fff 920 return -ENODEV;
e045c291 921
767f4b62
SK
922 if (cpu_feature_enabled(X86_FEATURE_HYPERVISOR))
923 return -ENODEV;
924
9f6aef86
YG
925 if (boot_cpu_has(X86_FEATURE_SMCA)) {
926 xec_mask = 0x3f;
927 goto out;
928 }
929
bad11e03 930 switch (c->x86) {
888ab8e6 931 case 0xf:
86e9f9d6
BP
932 fam_ops.mc0_mce = k8_mc0_mce;
933 fam_ops.mc1_mce = k8_mc1_mce;
934 fam_ops.mc2_mce = k8_mc2_mce;
888ab8e6
BP
935 break;
936
937 case 0x10:
86e9f9d6
BP
938 fam_ops.mc0_mce = f10h_mc0_mce;
939 fam_ops.mc1_mce = k8_mc1_mce;
940 fam_ops.mc2_mce = k8_mc2_mce;
888ab8e6
BP
941 break;
942
f0157b3a 943 case 0x11:
86e9f9d6
BP
944 fam_ops.mc0_mce = k8_mc0_mce;
945 fam_ops.mc1_mce = k8_mc1_mce;
946 fam_ops.mc2_mce = k8_mc2_mce;
f0157b3a
BP
947 break;
948
9be0bb10 949 case 0x12:
86e9f9d6
BP
950 fam_ops.mc0_mce = f12h_mc0_mce;
951 fam_ops.mc1_mce = k8_mc1_mce;
952 fam_ops.mc2_mce = k8_mc2_mce;
9be0bb10
BP
953 break;
954
888ab8e6 955 case 0x14:
86e9f9d6
BP
956 fam_ops.mc0_mce = cat_mc0_mce;
957 fam_ops.mc1_mce = cat_mc1_mce;
958 fam_ops.mc2_mce = k8_mc2_mce;
888ab8e6
BP
959 break;
960
2be64bfa 961 case 0x15:
eba4bfb3
AG
962 xec_mask = c->x86_model == 0x60 ? 0x3f : 0x1f;
963
86e9f9d6
BP
964 fam_ops.mc0_mce = f15h_mc0_mce;
965 fam_ops.mc1_mce = f15h_mc1_mce;
966 fam_ops.mc2_mce = f15h_mc2_mce;
2be64bfa
BP
967 break;
968
980eec8b
JS
969 case 0x16:
970 xec_mask = 0x1f;
86e9f9d6
BP
971 fam_ops.mc0_mce = cat_mc0_mce;
972 fam_ops.mc1_mce = cat_mc1_mce;
973 fam_ops.mc2_mce = f16h_mc2_mce;
980eec8b
JS
974 break;
975
be0aec23 976 case 0x17:
c4a3e946 977 case 0x18:
52cff04a 978 pr_warn_once("Decoding supported only on Scalable MCA processors.\n");
86e9f9d6 979 return -EINVAL;
be0aec23 980
888ab8e6 981 default:
ec3e82d6 982 printk(KERN_WARNING "Huh? What family is it: 0x%x?!\n", c->x86);
86e9f9d6 983 return -EINVAL;
888ab8e6
BP
984 }
985
9f6aef86 986out:
9530d608
BP
987 pr_info("MCE: In-kernel MCE decoding enabled.\n");
988
3653ada5 989 mce_register_decode_chain(&amd_mce_dec_nb);
f436f8bb
IM
990
991 return 0;
992}
993early_initcall(mce_amd_init);
0d18b2e3
BP
994
995#ifdef MODULE
996static void __exit mce_amd_exit(void)
997{
3653ada5 998 mce_unregister_decode_chain(&amd_mce_dec_nb);
0d18b2e3
BP
999}
1000
1001MODULE_DESCRIPTION("AMD MCE decoder");
1002MODULE_ALIAS("edac-mce-amd");
1003MODULE_LICENSE("GPL");
1004module_exit(mce_amd_exit);
1005#endif