Merge remote-tracking branch 'asoc/topic/tlv320aic31xx' into asoc-next
[linux-2.6-block.git] / drivers / edac / mce_amd.c
CommitLineData
b70ef010 1#include <linux/module.h>
888ab8e6
BP
2#include <linux/slab.h>
3
47ca08a4 4#include "mce_amd.h"
b52401ce 5
888ab8e6
BP
6static struct amd_decoder_ops *fam_ops;
7
2be64bfa 8static u8 xec_mask = 0xf;
5ce88f6e 9
549d042d 10static bool report_gart_errors;
b0b07a2b 11static void (*nb_bus_decoder)(int node_id, struct mce *m);
549d042d
BP
12
13void amd_report_gart_errors(bool v)
14{
15 report_gart_errors = v;
16}
17EXPORT_SYMBOL_GPL(amd_report_gart_errors);
18
b0b07a2b 19void amd_register_ecc_decoder(void (*f)(int, struct mce *))
549d042d
BP
20{
21 nb_bus_decoder = f;
22}
23EXPORT_SYMBOL_GPL(amd_register_ecc_decoder);
24
b0b07a2b 25void amd_unregister_ecc_decoder(void (*f)(int, struct mce *))
549d042d
BP
26{
27 if (nb_bus_decoder) {
28 WARN_ON(nb_bus_decoder != f);
29
30 nb_bus_decoder = NULL;
31 }
32}
33EXPORT_SYMBOL_GPL(amd_unregister_ecc_decoder);
34
b52401ce
DT
35/*
36 * string representation for the different MCA reported error types, see F3x48
37 * or MSR0000_0411.
38 */
6337583d
BP
39
40/* transaction type */
0f08669e 41static const char * const tt_msgs[] = { "INSN", "DATA", "GEN", "RESV" };
b52401ce 42
6337583d 43/* cache level */
0f08669e 44static const char * const ll_msgs[] = { "RESV", "L1", "L2", "L3/GEN" };
b52401ce 45
6337583d 46/* memory transaction type */
0f08669e 47static const char * const rrrr_msgs[] = {
6337583d 48 "GEN", "RD", "WR", "DRD", "DWR", "IRD", "PRF", "EV", "SNP"
b52401ce
DT
49};
50
6337583d 51/* participating processor */
ebe2aea8 52const char * const pp_msgs[] = { "SRC", "RES", "OBS", "GEN" };
b70ef010 53EXPORT_SYMBOL_GPL(pp_msgs);
b52401ce 54
6337583d 55/* request timeout */
0f08669e 56static const char * const to_msgs[] = { "no timeout", "timed out" };
b52401ce 57
6337583d 58/* memory or i/o */
0f08669e 59static const char * const ii_msgs[] = { "MEM", "RESV", "IO", "GEN" };
b52401ce 60
980eec8b 61/* internal error type */
0f08669e 62static const char * const uu_msgs[] = { "RESV", "RESV", "HWA", "RESV" };
980eec8b 63
f05c41a9 64static const char * const f15h_mc1_mce_desc[] = {
86039cd4
BP
65 "UC during a demand linefill from L2",
66 "Parity error during data load from IC",
67 "Parity error for IC valid bit",
68 "Main tag parity error",
69 "Parity error in prediction queue",
70 "PFB data/address parity error",
71 "Parity error in the branch status reg",
72 "PFB promotion address error",
73 "Tag error during probe/victimization",
74 "Parity error for IC probe tag valid bit",
75 "PFB non-cacheable bit parity error",
76 "PFB valid bit parity error", /* xec = 0xd */
6c1173a6 77 "Microcode Patch Buffer", /* xec = 010 */
86039cd4
BP
78 "uop queue",
79 "insn buffer",
80 "predecode buffer",
eba4bfb3
AG
81 "fetch address FIFO",
82 "dispatch uop queue"
86039cd4
BP
83};
84
f05c41a9 85static const char * const f15h_mc2_mce_desc[] = {
70fdb494
BP
86 "Fill ECC error on data fills", /* xec = 0x4 */
87 "Fill parity error on insn fills",
88 "Prefetcher request FIFO parity error",
89 "PRQ address parity error",
90 "PRQ data parity error",
91 "WCC Tag ECC error",
92 "WCC Data ECC error",
93 "WCB Data parity error",
b64a99c1 94 "VB Data ECC or parity error",
70fdb494
BP
95 "L2 Tag ECC error", /* xec = 0x10 */
96 "Hard L2 Tag ECC error",
97 "Multiple hits on L2 tag",
98 "XAB parity error",
99 "PRB address parity error"
100};
101
f05c41a9 102static const char * const mc4_mce_desc[] = {
68782673
BP
103 "DRAM ECC error detected on the NB",
104 "CRC error detected on HT link",
105 "Link-defined sync error packets detected on HT link",
106 "HT Master abort",
107 "HT Target abort",
108 "Invalid GART PTE entry during GART table walk",
109 "Unsupported atomic RMW received from an IO link",
110 "Watchdog timeout due to lack of progress",
111 "DRAM ECC error detected on the NB",
112 "SVM DMA Exclusion Vector error",
113 "HT data error detected on link",
114 "Protocol error (link, L3, probe filter)",
115 "NB internal arrays parity error",
116 "DRAM addr/ctl signals parity error",
117 "IO link transmission error",
118 "L3 data cache ECC error", /* xec = 0x1c */
119 "L3 cache tag error",
120 "L3 LRU parity bits error",
121 "ECC Error in the Probe Filter directory"
122};
123
f05c41a9 124static const char * const mc5_mce_desc[] = {
8259a7e5
BP
125 "CPU Watchdog timer expire",
126 "Wakeup array dest tag",
127 "AG payload array",
128 "EX payload array",
129 "IDRF array",
130 "Retire dispatch queue",
131 "Mapper checkpoint array",
132 "Physical register file EX0 port",
133 "Physical register file EX1 port",
134 "Physical register file AG0 port",
135 "Physical register file AG1 port",
136 "Flag register file",
aad19e51
AG
137 "DE error occurred",
138 "Retire status queue"
8259a7e5
BP
139};
140
bc4febe9
AG
141static const char * const mc6_mce_desc[] = {
142 "Hardware Assertion",
143 "Free List",
144 "Physical Register File",
145 "Retire Queue",
146 "Scheduler table",
147 "Status Register File",
148};
149
be0aec23
AG
150/* Scalable MCA error strings */
151static const char * const f17h_ls_mce_desc[] = {
152 "Load queue parity",
153 "Store queue parity",
154 "Miss address buffer payload parity",
155 "L1 TLB parity",
156 "", /* reserved */
157 "DC tag error type 6",
158 "DC tag error type 1",
159 "Internal error type 1",
160 "Internal error type 2",
161 "Sys Read data error thread 0",
162 "Sys read data error thread 1",
163 "DC tag error type 2",
164 "DC data error type 1 (poison comsumption)",
165 "DC data error type 2",
166 "DC data error type 3",
167 "DC tag error type 4",
168 "L2 TLB parity",
169 "PDC parity error",
170 "DC tag error type 3",
171 "DC tag error type 5",
172 "L2 fill data error",
173};
174
175static const char * const f17h_if_mce_desc[] = {
176 "microtag probe port parity error",
177 "IC microtag or full tag multi-hit error",
178 "IC full tag parity",
179 "IC data array parity",
180 "Decoupling queue phys addr parity error",
181 "L0 ITLB parity error",
182 "L1 ITLB parity error",
183 "L2 ITLB parity error",
184 "BPQ snoop parity on Thread 0",
185 "BPQ snoop parity on Thread 1",
186 "L1 BTB multi-match error",
187 "L2 BTB multi-match error",
188};
189
190static const char * const f17h_l2_mce_desc[] = {
191 "L2M tag multi-way-hit error",
192 "L2M tag ECC error",
193 "L2M data ECC error",
194 "HW assert",
195};
196
197static const char * const f17h_de_mce_desc[] = {
198 "uop cache tag parity error",
199 "uop cache data parity error",
200 "Insn buffer parity error",
201 "Insn dispatch queue parity error",
202 "Fetch address FIFO parity",
203 "Patch RAM data parity",
204 "Patch RAM sequencer parity",
205 "uop buffer parity"
206};
207
208static const char * const f17h_ex_mce_desc[] = {
209 "Watchdog timeout error",
210 "Phy register file parity",
211 "Flag register file parity",
212 "Immediate displacement register file parity",
213 "Address generator payload parity",
214 "EX payload parity",
215 "Checkpoint queue parity",
216 "Retire dispatch queue parity",
217};
218
219static const char * const f17h_fp_mce_desc[] = {
220 "Physical register file parity",
221 "Freelist parity error",
222 "Schedule queue parity",
223 "NSQ parity error",
224 "Retire queue parity",
225 "Status register file parity",
226};
227
228static const char * const f17h_l3_mce_desc[] = {
229 "Shadow tag macro ECC error",
230 "Shadow tag macro multi-way-hit error",
231 "L3M tag ECC error",
232 "L3M tag multi-way-hit error",
233 "L3M data ECC error",
234 "XI parity, L3 fill done channel error",
235 "L3 victim queue parity",
236 "L3 HW assert",
237};
238
239static const char * const f17h_cs_mce_desc[] = {
240 "Illegal request from transport layer",
241 "Address violation",
242 "Security violation",
243 "Illegal response from transport layer",
244 "Unexpected response",
245 "Parity error on incoming request or probe response data",
246 "Parity error on incoming read response data",
247 "Atomic request parity",
248 "ECC error on probe filter access",
249};
250
251static const char * const f17h_pie_mce_desc[] = {
252 "HW assert",
253 "Internal PIE register security violation",
254 "Error on GMI link",
255 "Poison data written to internal PIE register",
256};
257
258static const char * const f17h_umc_mce_desc[] = {
259 "DRAM ECC error",
260 "Data poison error on DRAM",
261 "SDP parity error",
262 "Advanced peripheral bus error",
263 "Command/address parity error",
264 "Write data CRC error",
265};
266
267static const char * const f17h_pb_mce_desc[] = {
268 "Parameter Block RAM ECC error",
269};
270
271static const char * const f17h_psp_mce_desc[] = {
272 "PSP RAM ECC or parity error",
273};
274
275static const char * const f17h_smu_mce_desc[] = {
276 "SMU RAM ECC or parity error",
277};
278
f05c41a9 279static bool f12h_mc0_mce(u16 ec, u8 xec)
51966241 280{
888ab8e6 281 bool ret = false;
51966241 282
888ab8e6 283 if (MEM_ERROR(ec)) {
62452882 284 u8 ll = LL(ec);
888ab8e6 285 ret = true;
51966241 286
888ab8e6
BP
287 if (ll == LL_L2)
288 pr_cont("during L1 linefill from L2.\n");
289 else if (ll == LL_L1)
62452882 290 pr_cont("Data/Tag %s error.\n", R4_MSG(ec));
888ab8e6
BP
291 else
292 ret = false;
293 }
294 return ret;
295}
51966241 296
f05c41a9 297static bool f10h_mc0_mce(u16 ec, u8 xec)
9be0bb10 298{
62452882 299 if (R4(ec) == R4_GEN && LL(ec) == LL_L1) {
9be0bb10
BP
300 pr_cont("during data scrub.\n");
301 return true;
302 }
f05c41a9 303 return f12h_mc0_mce(ec, xec);
9be0bb10
BP
304}
305
f05c41a9 306static bool k8_mc0_mce(u16 ec, u8 xec)
888ab8e6
BP
307{
308 if (BUS_ERROR(ec)) {
309 pr_cont("during system linefill.\n");
310 return true;
311 }
51966241 312
f05c41a9 313 return f10h_mc0_mce(ec, xec);
888ab8e6
BP
314}
315
980eec8b 316static bool cat_mc0_mce(u16 ec, u8 xec)
888ab8e6 317{
62452882 318 u8 r4 = R4(ec);
888ab8e6
BP
319 bool ret = true;
320
321 if (MEM_ERROR(ec)) {
322
62452882 323 if (TT(ec) != TT_DATA || LL(ec) != LL_L1)
888ab8e6
BP
324 return false;
325
326 switch (r4) {
327 case R4_DRD:
328 case R4_DWR:
329 pr_cont("Data/Tag parity error due to %s.\n",
330 (r4 == R4_DRD ? "load/hw prf" : "store"));
331 break;
332 case R4_EVICT:
333 pr_cont("Copyback parity error on a tag miss.\n");
334 break;
335 case R4_SNOOP:
336 pr_cont("Tag parity error during snoop.\n");
337 break;
338 default:
339 ret = false;
340 }
341 } else if (BUS_ERROR(ec)) {
342
62452882 343 if ((II(ec) != II_MEM && II(ec) != II_IO) || LL(ec) != LL_LG)
888ab8e6
BP
344 return false;
345
346 pr_cont("System read data error on a ");
347
348 switch (r4) {
349 case R4_RD:
350 pr_cont("TLB reload.\n");
351 break;
352 case R4_DWR:
353 pr_cont("store.\n");
354 break;
355 case R4_DRD:
356 pr_cont("load.\n");
357 break;
358 default:
359 ret = false;
360 }
361 } else {
362 ret = false;
363 }
364
365 return ret;
366}
367
f05c41a9 368static bool f15h_mc0_mce(u16 ec, u8 xec)
25a4f8b0
BP
369{
370 bool ret = true;
371
372 if (MEM_ERROR(ec)) {
373
374 switch (xec) {
375 case 0x0:
376 pr_cont("Data Array access error.\n");
377 break;
378
379 case 0x1:
380 pr_cont("UC error during a linefill from L2/NB.\n");
381 break;
382
383 case 0x2:
384 case 0x11:
385 pr_cont("STQ access error.\n");
386 break;
387
388 case 0x3:
389 pr_cont("SCB access error.\n");
390 break;
391
392 case 0x10:
393 pr_cont("Tag error.\n");
394 break;
395
396 case 0x12:
397 pr_cont("LDQ access error.\n");
398 break;
399
400 default:
401 ret = false;
402 }
403 } else if (BUS_ERROR(ec)) {
404
405 if (!xec)
344f0a06 406 pr_cont("System Read Data Error.\n");
25a4f8b0 407 else
344f0a06 408 pr_cont(" Internal error condition type %d.\n", xec);
eba4bfb3
AG
409 } else if (INT_ERROR(ec)) {
410 if (xec <= 0x1f)
411 pr_cont("Hardware Assert.\n");
412 else
413 ret = false;
414
25a4f8b0
BP
415 } else
416 ret = false;
417
418 return ret;
419}
420
f05c41a9 421static void decode_mc0_mce(struct mce *m)
888ab8e6 422{
62452882
BP
423 u16 ec = EC(m->status);
424 u8 xec = XEC(m->status, xec_mask);
888ab8e6 425
f05c41a9 426 pr_emerg(HW_ERR "MC0 Error: ");
888ab8e6
BP
427
428 /* TLB error signatures are the same across families */
429 if (TLB_ERROR(ec)) {
62452882 430 if (TT(ec) == TT_DATA) {
888ab8e6 431 pr_cont("%s TLB %s.\n", LL_MSG(ec),
25a4f8b0
BP
432 ((xec == 2) ? "locked miss"
433 : (xec ? "multimatch" : "parity")));
888ab8e6
BP
434 return;
435 }
f05c41a9 436 } else if (fam_ops->mc0_mce(ec, xec))
25a4f8b0
BP
437 ;
438 else
f05c41a9 439 pr_emerg(HW_ERR "Corrupted MC0 MCE info?\n");
51966241
BP
440}
441
f05c41a9 442static bool k8_mc1_mce(u16 ec, u8 xec)
ab5535e7 443{
62452882 444 u8 ll = LL(ec);
dd53bce4 445 bool ret = true;
ab5535e7 446
dd53bce4
BP
447 if (!MEM_ERROR(ec))
448 return false;
ab5535e7 449
dd53bce4
BP
450 if (ll == 0x2)
451 pr_cont("during a linefill from L2.\n");
452 else if (ll == 0x1) {
62452882 453 switch (R4(ec)) {
dd53bce4
BP
454 case R4_IRD:
455 pr_cont("Parity error during data load.\n");
456 break;
ab5535e7 457
dd53bce4
BP
458 case R4_EVICT:
459 pr_cont("Copyback Parity/Victim error.\n");
460 break;
461
462 case R4_SNOOP:
463 pr_cont("Tag Snoop error.\n");
464 break;
465
466 default:
467 ret = false;
468 break;
469 }
ab5535e7 470 } else
dd53bce4 471 ret = false;
ab5535e7 472
dd53bce4
BP
473 return ret;
474}
475
980eec8b 476static bool cat_mc1_mce(u16 ec, u8 xec)
dd53bce4 477{
62452882 478 u8 r4 = R4(ec);
dd53bce4 479 bool ret = true;
ab5535e7 480
980eec8b
JS
481 if (!MEM_ERROR(ec))
482 return false;
483
484 if (TT(ec) != TT_INSTR)
485 return false;
486
487 if (r4 == R4_IRD)
488 pr_cont("Data/tag array parity error for a tag hit.\n");
489 else if (r4 == R4_SNOOP)
490 pr_cont("Tag error during snoop/victimization.\n");
491 else if (xec == 0x0)
492 pr_cont("Tag parity error from victim castout.\n");
493 else if (xec == 0x2)
494 pr_cont("Microcode patch RAM parity error.\n");
495 else
496 ret = false;
dd53bce4 497
dd53bce4
BP
498 return ret;
499}
500
f05c41a9 501static bool f15h_mc1_mce(u16 ec, u8 xec)
86039cd4
BP
502{
503 bool ret = true;
504
505 if (!MEM_ERROR(ec))
506 return false;
507
508 switch (xec) {
509 case 0x0 ... 0xa:
f05c41a9 510 pr_cont("%s.\n", f15h_mc1_mce_desc[xec]);
86039cd4
BP
511 break;
512
513 case 0xd:
f05c41a9 514 pr_cont("%s.\n", f15h_mc1_mce_desc[xec-2]);
86039cd4
BP
515 break;
516
6c1173a6 517 case 0x10:
f05c41a9 518 pr_cont("%s.\n", f15h_mc1_mce_desc[xec-4]);
6c1173a6
BP
519 break;
520
eba4bfb3 521 case 0x11 ... 0x15:
f05c41a9 522 pr_cont("Decoder %s parity error.\n", f15h_mc1_mce_desc[xec-4]);
86039cd4
BP
523 break;
524
525 default:
526 ret = false;
527 }
528 return ret;
529}
530
f05c41a9 531static void decode_mc1_mce(struct mce *m)
dd53bce4 532{
62452882
BP
533 u16 ec = EC(m->status);
534 u8 xec = XEC(m->status, xec_mask);
dd53bce4 535
f05c41a9 536 pr_emerg(HW_ERR "MC1 Error: ");
dd53bce4
BP
537
538 if (TLB_ERROR(ec))
539 pr_cont("%s TLB %s.\n", LL_MSG(ec),
540 (xec ? "multimatch" : "parity error"));
541 else if (BUS_ERROR(ec)) {
525906bc 542 bool k8 = (boot_cpu_data.x86 == 0xf && (m->status & BIT_64(58)));
dd53bce4
BP
543
544 pr_cont("during %s.\n", (k8 ? "system linefill" : "NB data read"));
eba4bfb3
AG
545 } else if (INT_ERROR(ec)) {
546 if (xec <= 0x3f)
547 pr_cont("Hardware Assert.\n");
548 else
549 goto wrong_mc1_mce;
f05c41a9 550 } else if (fam_ops->mc1_mce(ec, xec))
dd53bce4
BP
551 ;
552 else
eba4bfb3
AG
553 goto wrong_mc1_mce;
554
555 return;
556
557wrong_mc1_mce:
558 pr_emerg(HW_ERR "Corrupted MC1 MCE info?\n");
ab5535e7
BP
559}
560
4a73d3de 561static bool k8_mc2_mce(u16 ec, u8 xec)
56cad2d6 562{
4a73d3de 563 bool ret = true;
56cad2d6
BP
564
565 if (xec == 0x1)
566 pr_cont(" in the write data buffers.\n");
567 else if (xec == 0x3)
568 pr_cont(" in the victim data buffers.\n");
569 else if (xec == 0x2 && MEM_ERROR(ec))
62452882 570 pr_cont(": %s error in the L2 cache tags.\n", R4_MSG(ec));
56cad2d6
BP
571 else if (xec == 0x0) {
572 if (TLB_ERROR(ec))
50872ccd
BP
573 pr_cont("%s error in a Page Descriptor Cache or Guest TLB.\n",
574 TT_MSG(ec));
56cad2d6
BP
575 else if (BUS_ERROR(ec))
576 pr_cont(": %s/ECC error in data read from NB: %s.\n",
62452882 577 R4_MSG(ec), PP_MSG(ec));
56cad2d6 578 else if (MEM_ERROR(ec)) {
62452882 579 u8 r4 = R4(ec);
56cad2d6 580
62452882 581 if (r4 >= 0x7)
56cad2d6 582 pr_cont(": %s error during data copyback.\n",
62452882
BP
583 R4_MSG(ec));
584 else if (r4 <= 0x1)
56cad2d6 585 pr_cont(": %s parity/ECC error during data "
62452882 586 "access from L2.\n", R4_MSG(ec));
56cad2d6 587 else
4a73d3de 588 ret = false;
56cad2d6 589 } else
4a73d3de 590 ret = false;
56cad2d6 591 } else
4a73d3de 592 ret = false;
56cad2d6 593
4a73d3de 594 return ret;
56cad2d6
BP
595}
596
4a73d3de 597static bool f15h_mc2_mce(u16 ec, u8 xec)
70fdb494 598{
4a73d3de 599 bool ret = true;
70fdb494
BP
600
601 if (TLB_ERROR(ec)) {
602 if (xec == 0x0)
603 pr_cont("Data parity TLB read error.\n");
604 else if (xec == 0x1)
605 pr_cont("Poison data provided for TLB fill.\n");
606 else
4a73d3de 607 ret = false;
70fdb494
BP
608 } else if (BUS_ERROR(ec)) {
609 if (xec > 2)
4a73d3de 610 ret = false;
70fdb494
BP
611
612 pr_cont("Error during attempted NB data read.\n");
613 } else if (MEM_ERROR(ec)) {
614 switch (xec) {
615 case 0x4 ... 0xc:
f05c41a9 616 pr_cont("%s.\n", f15h_mc2_mce_desc[xec - 0x4]);
70fdb494
BP
617 break;
618
619 case 0x10 ... 0x14:
f05c41a9 620 pr_cont("%s.\n", f15h_mc2_mce_desc[xec - 0x7]);
70fdb494
BP
621 break;
622
623 default:
4a73d3de 624 ret = false;
70fdb494 625 }
eba4bfb3
AG
626 } else if (INT_ERROR(ec)) {
627 if (xec <= 0x3f)
628 pr_cont("Hardware Assert.\n");
629 else
630 ret = false;
70fdb494
BP
631 }
632
4a73d3de
JS
633 return ret;
634}
635
980eec8b
JS
636static bool f16h_mc2_mce(u16 ec, u8 xec)
637{
638 u8 r4 = R4(ec);
639
640 if (!MEM_ERROR(ec))
641 return false;
642
643 switch (xec) {
644 case 0x04 ... 0x05:
645 pr_cont("%cBUFF parity error.\n", (r4 == R4_RD) ? 'I' : 'O');
646 break;
647
648 case 0x09 ... 0x0b:
649 case 0x0d ... 0x0f:
650 pr_cont("ECC error in L2 tag (%s).\n",
651 ((r4 == R4_GEN) ? "BankReq" :
652 ((r4 == R4_SNOOP) ? "Prb" : "Fill")));
653 break;
654
655 case 0x10 ... 0x19:
656 case 0x1b:
657 pr_cont("ECC error in L2 data array (%s).\n",
658 (((r4 == R4_RD) && !(xec & 0x3)) ? "Hit" :
659 ((r4 == R4_GEN) ? "Attr" :
660 ((r4 == R4_EVICT) ? "Vict" : "Fill"))));
661 break;
662
663 case 0x1c ... 0x1d:
664 case 0x1f:
665 pr_cont("Parity error in L2 attribute bits (%s).\n",
666 ((r4 == R4_RD) ? "Hit" :
667 ((r4 == R4_GEN) ? "Attr" : "Fill")));
668 break;
669
670 default:
671 return false;
672 }
673
674 return true;
675}
676
4a73d3de
JS
677static void decode_mc2_mce(struct mce *m)
678{
679 u16 ec = EC(m->status);
680 u8 xec = XEC(m->status, xec_mask);
70fdb494 681
4a73d3de
JS
682 pr_emerg(HW_ERR "MC2 Error: ");
683
684 if (!fam_ops->mc2_mce(ec, xec))
685 pr_cont(HW_ERR "Corrupted MC2 MCE info?\n");
70fdb494
BP
686}
687
f05c41a9 688static void decode_mc3_mce(struct mce *m)
f9350efd 689{
62452882
BP
690 u16 ec = EC(m->status);
691 u8 xec = XEC(m->status, xec_mask);
ded50623 692
b18434ca 693 if (boot_cpu_data.x86 >= 0x14) {
f05c41a9 694 pr_emerg("You shouldn't be seeing MC3 MCE on this cpu family,"
ded50623
BP
695 " please report on LKML.\n");
696 return;
697 }
f9350efd 698
f05c41a9 699 pr_emerg(HW_ERR "MC3 Error");
f9350efd
BP
700
701 if (xec == 0x0) {
62452882 702 u8 r4 = R4(ec);
f9350efd 703
ded50623 704 if (!BUS_ERROR(ec) || (r4 != R4_DRD && r4 != R4_DWR))
f05c41a9 705 goto wrong_mc3_mce;
f9350efd 706
62452882 707 pr_cont(" during %s.\n", R4_MSG(ec));
ded50623 708 } else
f05c41a9 709 goto wrong_mc3_mce;
ded50623 710
f9350efd
BP
711 return;
712
f05c41a9
BP
713 wrong_mc3_mce:
714 pr_emerg(HW_ERR "Corrupted MC3 MCE info?\n");
f9350efd
BP
715}
716
f05c41a9 717static void decode_mc4_mce(struct mce *m)
5ce88f6e 718{
68782673
BP
719 struct cpuinfo_x86 *c = &boot_cpu_data;
720 int node_id = amd_get_nb_id(m->extcpu);
721 u16 ec = EC(m->status);
722 u8 xec = XEC(m->status, 0x1f);
723 u8 offset = 0;
5ce88f6e 724
f05c41a9 725 pr_emerg(HW_ERR "MC4 Error (node %d): ", node_id);
5ce88f6e 726
68782673
BP
727 switch (xec) {
728 case 0x0 ... 0xe:
5ce88f6e 729
68782673
BP
730 /* special handling for DRAM ECCs */
731 if (xec == 0x0 || xec == 0x8) {
732 /* no ECCs on F11h */
733 if (c->x86 == 0x11)
f05c41a9 734 goto wrong_mc4_mce;
5ce88f6e 735
f05c41a9 736 pr_cont("%s.\n", mc4_mce_desc[xec]);
5ce88f6e 737
68782673
BP
738 if (nb_bus_decoder)
739 nb_bus_decoder(node_id, m);
740 return;
741 }
5ce88f6e
BP
742 break;
743
744 case 0xf:
745 if (TLB_ERROR(ec))
746 pr_cont("GART Table Walk data error.\n");
747 else if (BUS_ERROR(ec))
748 pr_cont("DMA Exclusion Vector Table Walk error.\n");
749 else
f05c41a9 750 goto wrong_mc4_mce;
68782673 751 return;
5ce88f6e 752
05cd667d 753 case 0x19:
980eec8b 754 if (boot_cpu_data.x86 == 0x15 || boot_cpu_data.x86 == 0x16)
05cd667d
BP
755 pr_cont("Compute Unit Data Error.\n");
756 else
f05c41a9 757 goto wrong_mc4_mce;
68782673 758 return;
05cd667d 759
5ce88f6e 760 case 0x1c ... 0x1f:
68782673 761 offset = 13;
5ce88f6e
BP
762 break;
763
764 default:
f05c41a9 765 goto wrong_mc4_mce;
68782673 766 }
5ce88f6e 767
f05c41a9 768 pr_cont("%s.\n", mc4_mce_desc[xec - offset]);
5ce88f6e
BP
769 return;
770
f05c41a9
BP
771 wrong_mc4_mce:
772 pr_emerg(HW_ERR "Corrupted MC4 MCE info?\n");
d93cc222 773}
d93cc222 774
f05c41a9 775static void decode_mc5_mce(struct mce *m)
53bd5fed 776{
8259a7e5 777 struct cpuinfo_x86 *c = &boot_cpu_data;
eba4bfb3 778 u16 ec = EC(m->status);
62452882 779 u8 xec = XEC(m->status, xec_mask);
8259a7e5
BP
780
781 if (c->x86 == 0xf || c->x86 == 0x11)
f05c41a9 782 goto wrong_mc5_mce;
fe4ea262 783
f05c41a9 784 pr_emerg(HW_ERR "MC5 Error: ");
8259a7e5 785
eba4bfb3
AG
786 if (INT_ERROR(ec)) {
787 if (xec <= 0x1f) {
788 pr_cont("Hardware Assert.\n");
789 return;
790 } else
791 goto wrong_mc5_mce;
792 }
793
8259a7e5 794 if (xec == 0x0 || xec == 0xc)
f05c41a9 795 pr_cont("%s.\n", mc5_mce_desc[xec]);
aad19e51 796 else if (xec <= 0xd)
f05c41a9 797 pr_cont("%s parity error.\n", mc5_mce_desc[xec]);
8259a7e5 798 else
f05c41a9 799 goto wrong_mc5_mce;
8259a7e5
BP
800
801 return;
fe4ea262 802
f05c41a9
BP
803 wrong_mc5_mce:
804 pr_emerg(HW_ERR "Corrupted MC5 MCE info?\n");
53bd5fed
BP
805}
806
f05c41a9 807static void decode_mc6_mce(struct mce *m)
b8f85c47 808{
62452882 809 u8 xec = XEC(m->status, xec_mask);
b8f85c47 810
f05c41a9 811 pr_emerg(HW_ERR "MC6 Error: ");
b8f85c47 812
bc4febe9 813 if (xec > 0x5)
f05c41a9 814 goto wrong_mc6_mce;
b8f85c47 815
bc4febe9 816 pr_cont("%s parity error.\n", mc6_mce_desc[xec]);
b8f85c47
BP
817 return;
818
f05c41a9
BP
819 wrong_mc6_mce:
820 pr_emerg(HW_ERR "Corrupted MC6 MCE info?\n");
b8f85c47
BP
821}
822
be0aec23
AG
823static void decode_f17h_core_errors(const char *ip_name, u8 xec,
824 unsigned int mca_type)
825{
826 const char * const *error_desc_array;
827 size_t len;
828
829 pr_emerg(HW_ERR "%s Error: ", ip_name);
830
831 switch (mca_type) {
832 case SMCA_LS:
833 error_desc_array = f17h_ls_mce_desc;
834 len = ARRAY_SIZE(f17h_ls_mce_desc) - 1;
835
836 if (xec == 0x4) {
837 pr_cont("Unrecognized LS MCA error code.\n");
838 return;
839 }
840 break;
841
842 case SMCA_IF:
843 error_desc_array = f17h_if_mce_desc;
844 len = ARRAY_SIZE(f17h_if_mce_desc) - 1;
845 break;
846
847 case SMCA_L2_CACHE:
848 error_desc_array = f17h_l2_mce_desc;
849 len = ARRAY_SIZE(f17h_l2_mce_desc) - 1;
850 break;
851
852 case SMCA_DE:
853 error_desc_array = f17h_de_mce_desc;
854 len = ARRAY_SIZE(f17h_de_mce_desc) - 1;
855 break;
856
857 case SMCA_EX:
858 error_desc_array = f17h_ex_mce_desc;
859 len = ARRAY_SIZE(f17h_ex_mce_desc) - 1;
860 break;
861
862 case SMCA_FP:
863 error_desc_array = f17h_fp_mce_desc;
864 len = ARRAY_SIZE(f17h_fp_mce_desc) - 1;
865 break;
866
867 case SMCA_L3_CACHE:
868 error_desc_array = f17h_l3_mce_desc;
869 len = ARRAY_SIZE(f17h_l3_mce_desc) - 1;
870 break;
871
872 default:
873 pr_cont("Corrupted MCA core error info.\n");
874 return;
875 }
876
877 if (xec > len) {
878 pr_cont("Unrecognized %s MCA bank error code.\n",
879 amd_core_mcablock_names[mca_type]);
880 return;
881 }
882
883 pr_cont("%s.\n", error_desc_array[xec]);
884}
885
886static void decode_df_errors(u8 xec, unsigned int mca_type)
887{
888 const char * const *error_desc_array;
889 size_t len;
890
891 pr_emerg(HW_ERR "Data Fabric Error: ");
892
893 switch (mca_type) {
894 case SMCA_CS:
895 error_desc_array = f17h_cs_mce_desc;
896 len = ARRAY_SIZE(f17h_cs_mce_desc) - 1;
897 break;
898
899 case SMCA_PIE:
900 error_desc_array = f17h_pie_mce_desc;
901 len = ARRAY_SIZE(f17h_pie_mce_desc) - 1;
902 break;
903
904 default:
905 pr_cont("Corrupted MCA Data Fabric info.\n");
906 return;
907 }
908
909 if (xec > len) {
910 pr_cont("Unrecognized %s MCA bank error code.\n",
911 amd_df_mcablock_names[mca_type]);
912 return;
913 }
914
915 pr_cont("%s.\n", error_desc_array[xec]);
916}
917
918/* Decode errors according to Scalable MCA specification */
919static void decode_smca_errors(struct mce *m)
920{
921 u32 addr = MSR_AMD64_SMCA_MCx_IPID(m->bank);
922 unsigned int hwid, mca_type, i;
923 u8 xec = XEC(m->status, xec_mask);
924 const char * const *error_desc_array;
925 const char *ip_name;
926 u32 low, high;
927 size_t len;
928
929 if (rdmsr_safe(addr, &low, &high)) {
930 pr_emerg("Invalid IP block specified, error information is unreliable.\n");
931 return;
932 }
933
934 hwid = high & MCI_IPID_HWID;
935 mca_type = (high & MCI_IPID_MCATYPE) >> 16;
936
937 pr_emerg(HW_ERR "MC%d IPID value: 0x%08x%08x\n", m->bank, high, low);
938
939 /*
940 * Based on hwid and mca_type values, decode errors from respective IPs.
941 * Note: mca_type values make sense only in the context of an hwid.
942 */
943 for (i = 0; i < ARRAY_SIZE(amd_hwids); i++)
944 if (amd_hwids[i].hwid == hwid)
945 break;
946
947 switch (i) {
948 case SMCA_F17H_CORE:
949 ip_name = (mca_type == SMCA_L3_CACHE) ?
950 "L3 Cache" : "F17h Core";
951 return decode_f17h_core_errors(ip_name, xec, mca_type);
952 break;
953
954 case SMCA_DF:
955 return decode_df_errors(xec, mca_type);
956 break;
957
958 case SMCA_UMC:
959 error_desc_array = f17h_umc_mce_desc;
960 len = ARRAY_SIZE(f17h_umc_mce_desc) - 1;
961 break;
962
963 case SMCA_PB:
964 error_desc_array = f17h_pb_mce_desc;
965 len = ARRAY_SIZE(f17h_pb_mce_desc) - 1;
966 break;
967
968 case SMCA_PSP:
969 error_desc_array = f17h_psp_mce_desc;
970 len = ARRAY_SIZE(f17h_psp_mce_desc) - 1;
971 break;
972
973 case SMCA_SMU:
974 error_desc_array = f17h_smu_mce_desc;
975 len = ARRAY_SIZE(f17h_smu_mce_desc) - 1;
976 break;
977
978 default:
979 pr_emerg(HW_ERR "HWID:%d does not match any existing IPs.\n", hwid);
980 return;
981 }
982
983 ip_name = amd_hwids[i].name;
984 pr_emerg(HW_ERR "%s Error: ", ip_name);
985
986 if (xec > len) {
987 pr_cont("Unrecognized %s MCA bank error code.\n", ip_name);
988 return;
989 }
990
991 pr_cont("%s.\n", error_desc_array[xec]);
992}
993
6337583d 994static inline void amd_decode_err_code(u16 ec)
d93cc222 995{
980eec8b
JS
996 if (INT_ERROR(ec)) {
997 pr_emerg(HW_ERR "internal: %s\n", UU_MSG(ec));
998 return;
999 }
fa7ae8cc
BP
1000
1001 pr_emerg(HW_ERR "cache level: %s", LL_MSG(ec));
1002
1003 if (BUS_ERROR(ec))
1004 pr_cont(", mem/io: %s", II_MSG(ec));
1005 else
1006 pr_cont(", tx: %s", TT_MSG(ec));
1007
1008 if (MEM_ERROR(ec) || BUS_ERROR(ec)) {
1009 pr_cont(", mem-tx: %s", R4_MSG(ec));
1010
1011 if (BUS_ERROR(ec))
1012 pr_cont(", part-proc: %s (%s)", PP_MSG(ec), TO_MSG(ec));
1013 }
1014
1015 pr_cont("\n");
549d042d 1016}
549d042d 1017
5ce88f6e
BP
1018/*
1019 * Filter out unwanted MCE signatures here.
1020 */
1021static bool amd_filter_mce(struct mce *m)
1022{
1023 u8 xec = (m->status >> 16) & 0x1f;
1024
1025 /*
1026 * NB GART TLB error reporting is disabled by default.
1027 */
1028 if (m->bank == 4 && xec == 0x5 && !report_gart_errors)
1029 return true;
1030
1031 return false;
1032}
1033
d5c6770d
BP
1034static const char *decode_error_status(struct mce *m)
1035{
1036 if (m->status & MCI_STATUS_UC) {
1037 if (m->status & MCI_STATUS_PCC)
1038 return "System Fatal error.";
1039 if (m->mcgstatus & MCG_STATUS_RIPV)
1040 return "Uncorrected, software restartable error.";
1041 return "Uncorrected, software containable error.";
1042 }
1043
1044 if (m->status & MCI_STATUS_DEFERRED)
1045 return "Deferred error.";
1046
1047 return "Corrected error, no action required.";
1048}
1049
9cdeb404 1050int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
549d042d 1051{
fb253195 1052 struct mce *m = (struct mce *)data;
f89f8388 1053 struct cpuinfo_x86 *c = &cpu_data(m->extcpu);
b0b07a2b 1054 int ecc;
be0aec23 1055 u32 ebx = cpuid_ebx(0x80000007);
549d042d 1056
5ce88f6e
BP
1057 if (amd_filter_mce(m))
1058 return NOTIFY_STOP;
1059
fd0f5fff
BP
1060 pr_emerg(HW_ERR "%s\n", decode_error_status(m));
1061
1062 pr_emerg(HW_ERR "CPU:%d (%x:%x:%x) MC%d_STATUS[%s|%s|%s|%s|%s",
1063 m->extcpu,
1064 c->x86, c->x86_model, c->x86_mask,
1065 m->bank,
1066 ((m->status & MCI_STATUS_OVER) ? "Over" : "-"),
99e1dfb7
AG
1067 ((m->status & MCI_STATUS_UC) ? "UE" :
1068 (m->status & MCI_STATUS_DEFERRED) ? "-" : "CE"),
fd0f5fff
BP
1069 ((m->status & MCI_STATUS_MISCV) ? "MiscV" : "-"),
1070 ((m->status & MCI_STATUS_PCC) ? "PCC" : "-"),
1071 ((m->status & MCI_STATUS_ADDRV) ? "AddrV" : "-"));
1072
be0aec23 1073 if (c->x86 >= 0x15)
fd0f5fff
BP
1074 pr_cont("|%s|%s",
1075 ((m->status & MCI_STATUS_DEFERRED) ? "Deferred" : "-"),
1076 ((m->status & MCI_STATUS_POISON) ? "Poison" : "-"));
1077
be0aec23
AG
1078 if (!!(ebx & BIT(3))) {
1079 u32 low, high;
1080 u32 addr = MSR_AMD64_SMCA_MCx_CONFIG(m->bank);
1081
1082 if (!rdmsr_safe(addr, &low, &high) &&
1083 (low & MCI_CONFIG_MCAX))
1084 pr_cont("|%s", ((m->status & MCI_STATUS_TCC) ? "TCC" : "-"));
1085 }
1086
fd0f5fff
BP
1087 /* do the two bits[14:13] together */
1088 ecc = (m->status >> 45) & 0x3;
1089 if (ecc)
1090 pr_cont("|%sECC", ((ecc == 2) ? "C" : "U"));
1091
1092 pr_cont("]: 0x%016llx\n", m->status);
1093
1094 if (m->status & MCI_STATUS_ADDRV)
50872ccd 1095 pr_emerg(HW_ERR "MC%d Error Address: 0x%016llx\n", m->bank, m->addr);
fd0f5fff 1096
be0aec23
AG
1097 if (!!(ebx & BIT(3))) {
1098 decode_smca_errors(m);
1099 goto err_code;
1100 }
1101
fd0f5fff
BP
1102 if (!fam_ops)
1103 goto err_code;
1104
51966241
BP
1105 switch (m->bank) {
1106 case 0:
f05c41a9 1107 decode_mc0_mce(m);
51966241 1108 break;
d93cc222 1109
ab5535e7 1110 case 1:
f05c41a9 1111 decode_mc1_mce(m);
ab5535e7
BP
1112 break;
1113
56cad2d6 1114 case 2:
4a73d3de 1115 decode_mc2_mce(m);
56cad2d6
BP
1116 break;
1117
f9350efd 1118 case 3:
f05c41a9 1119 decode_mc3_mce(m);
f9350efd
BP
1120 break;
1121
51966241 1122 case 4:
f05c41a9 1123 decode_mc4_mce(m);
51966241
BP
1124 break;
1125
53bd5fed 1126 case 5:
f05c41a9 1127 decode_mc5_mce(m);
53bd5fed
BP
1128 break;
1129
b8f85c47 1130 case 6:
f05c41a9 1131 decode_mc6_mce(m);
b8f85c47
BP
1132 break;
1133
51966241
BP
1134 default:
1135 break;
b69b29de 1136 }
51966241 1137
fd0f5fff 1138 err_code:
51966241 1139 amd_decode_err_code(m->status & 0xffff);
fb253195
BP
1140
1141 return NOTIFY_STOP;
549d042d 1142}
9cdeb404 1143EXPORT_SYMBOL_GPL(amd_decode_mce);
f436f8bb 1144
fb253195
BP
1145static struct notifier_block amd_mce_dec_nb = {
1146 .notifier_call = amd_decode_mce,
1147};
1148
f436f8bb
IM
1149static int __init mce_amd_init(void)
1150{
bad11e03 1151 struct cpuinfo_x86 *c = &boot_cpu_data;
be0aec23 1152 u32 ebx;
bad11e03
BP
1153
1154 if (c->x86_vendor != X86_VENDOR_AMD)
fd0f5fff 1155 return -ENODEV;
e045c291 1156
888ab8e6
BP
1157 fam_ops = kzalloc(sizeof(struct amd_decoder_ops), GFP_KERNEL);
1158 if (!fam_ops)
1159 return -ENOMEM;
1160
bad11e03 1161 switch (c->x86) {
888ab8e6 1162 case 0xf:
f05c41a9
BP
1163 fam_ops->mc0_mce = k8_mc0_mce;
1164 fam_ops->mc1_mce = k8_mc1_mce;
4a73d3de 1165 fam_ops->mc2_mce = k8_mc2_mce;
888ab8e6
BP
1166 break;
1167
1168 case 0x10:
f05c41a9
BP
1169 fam_ops->mc0_mce = f10h_mc0_mce;
1170 fam_ops->mc1_mce = k8_mc1_mce;
4a73d3de 1171 fam_ops->mc2_mce = k8_mc2_mce;
888ab8e6
BP
1172 break;
1173
f0157b3a 1174 case 0x11:
f05c41a9
BP
1175 fam_ops->mc0_mce = k8_mc0_mce;
1176 fam_ops->mc1_mce = k8_mc1_mce;
4a73d3de 1177 fam_ops->mc2_mce = k8_mc2_mce;
f0157b3a
BP
1178 break;
1179
9be0bb10 1180 case 0x12:
f05c41a9
BP
1181 fam_ops->mc0_mce = f12h_mc0_mce;
1182 fam_ops->mc1_mce = k8_mc1_mce;
4a73d3de 1183 fam_ops->mc2_mce = k8_mc2_mce;
9be0bb10
BP
1184 break;
1185
888ab8e6 1186 case 0x14:
980eec8b
JS
1187 fam_ops->mc0_mce = cat_mc0_mce;
1188 fam_ops->mc1_mce = cat_mc1_mce;
4a73d3de 1189 fam_ops->mc2_mce = k8_mc2_mce;
888ab8e6
BP
1190 break;
1191
2be64bfa 1192 case 0x15:
eba4bfb3
AG
1193 xec_mask = c->x86_model == 0x60 ? 0x3f : 0x1f;
1194
f05c41a9
BP
1195 fam_ops->mc0_mce = f15h_mc0_mce;
1196 fam_ops->mc1_mce = f15h_mc1_mce;
4a73d3de 1197 fam_ops->mc2_mce = f15h_mc2_mce;
2be64bfa
BP
1198 break;
1199
980eec8b
JS
1200 case 0x16:
1201 xec_mask = 0x1f;
1202 fam_ops->mc0_mce = cat_mc0_mce;
1203 fam_ops->mc1_mce = cat_mc1_mce;
1204 fam_ops->mc2_mce = f16h_mc2_mce;
1205 break;
1206
be0aec23
AG
1207 case 0x17:
1208 ebx = cpuid_ebx(0x80000007);
1209 xec_mask = 0x3f;
1210 if (!(ebx & BIT(3))) {
1211 printk(KERN_WARNING "Decoding supported only on Scalable MCA processors.\n");
1212 goto err_out;
1213 }
1214 break;
1215
888ab8e6 1216 default:
ec3e82d6 1217 printk(KERN_WARNING "Huh? What family is it: 0x%x?!\n", c->x86);
be0aec23 1218 goto err_out;
888ab8e6
BP
1219 }
1220
9530d608
BP
1221 pr_info("MCE: In-kernel MCE decoding enabled.\n");
1222
3653ada5 1223 mce_register_decode_chain(&amd_mce_dec_nb);
f436f8bb
IM
1224
1225 return 0;
be0aec23
AG
1226
1227err_out:
1228 kfree(fam_ops);
1229 fam_ops = NULL;
1230 return -EINVAL;
f436f8bb
IM
1231}
1232early_initcall(mce_amd_init);
0d18b2e3
BP
1233
1234#ifdef MODULE
1235static void __exit mce_amd_exit(void)
1236{
3653ada5 1237 mce_unregister_decode_chain(&amd_mce_dec_nb);
888ab8e6 1238 kfree(fam_ops);
0d18b2e3
BP
1239}
1240
1241MODULE_DESCRIPTION("AMD MCE decoder");
1242MODULE_ALIAS("edac-mce-amd");
1243MODULE_LICENSE("GPL");
1244module_exit(mce_amd_exit);
1245#endif