2 * Machine check exception handling.
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 * Copyright 2013 IBM Corporation
19 * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
23 #define pr_fmt(fmt) "mce: " fmt
25 #include <linux/hardirq.h>
26 #include <linux/types.h>
27 #include <linux/ptrace.h>
28 #include <linux/percpu.h>
29 #include <linux/export.h>
30 #include <linux/irq_work.h>
32 #include <asm/machdep.h>
36 static DEFINE_PER_CPU(int, mce_nest_count);
37 static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event);
39 /* Queue for delayed MCE events. */
40 static DEFINE_PER_CPU(int, mce_queue_count);
41 static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event_queue);
43 /* Queue for delayed MCE UE events. */
44 static DEFINE_PER_CPU(int, mce_ue_count);
45 static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT],
48 static void machine_check_process_queued_event(struct irq_work *work);
49 void machine_check_ue_event(struct machine_check_event *evt);
50 static void machine_process_ue_event(struct work_struct *work);
52 static struct irq_work mce_event_process_work = {
53 .func = machine_check_process_queued_event,
56 DECLARE_WORK(mce_ue_event_work, machine_process_ue_event);
58 static void mce_set_error_info(struct machine_check_event *mce,
59 struct mce_error_info *mce_err)
61 mce->error_type = mce_err->error_type;
62 switch (mce_err->error_type) {
63 case MCE_ERROR_TYPE_UE:
64 mce->u.ue_error.ue_error_type = mce_err->u.ue_error_type;
66 case MCE_ERROR_TYPE_SLB:
67 mce->u.slb_error.slb_error_type = mce_err->u.slb_error_type;
69 case MCE_ERROR_TYPE_ERAT:
70 mce->u.erat_error.erat_error_type = mce_err->u.erat_error_type;
72 case MCE_ERROR_TYPE_TLB:
73 mce->u.tlb_error.tlb_error_type = mce_err->u.tlb_error_type;
75 case MCE_ERROR_TYPE_USER:
76 mce->u.user_error.user_error_type = mce_err->u.user_error_type;
78 case MCE_ERROR_TYPE_RA:
79 mce->u.ra_error.ra_error_type = mce_err->u.ra_error_type;
81 case MCE_ERROR_TYPE_LINK:
82 mce->u.link_error.link_error_type = mce_err->u.link_error_type;
84 case MCE_ERROR_TYPE_UNKNOWN:
91 * Decode and save high level MCE information into per cpu buffer which
92 * is an array of machine_check_event structure.
94 void save_mce_event(struct pt_regs *regs, long handled,
95 struct mce_error_info *mce_err,
96 uint64_t nip, uint64_t addr, uint64_t phys_addr)
98 int index = __this_cpu_inc_return(mce_nest_count) - 1;
99 struct machine_check_event *mce = this_cpu_ptr(&mce_event[index]);
102 * Return if we don't have enough space to log mce event.
103 * mce_nest_count may go beyond MAX_MC_EVT but that's ok,
104 * the check below will stop buffer overrun.
106 if (index >= MAX_MC_EVT)
109 /* Populate generic machine check info */
110 mce->version = MCE_V1;
112 mce->srr1 = regs->msr;
113 mce->gpr3 = regs->gpr[3];
115 mce->cpu = get_paca()->paca_index;
117 /* Mark it recovered if we have handled it and MSR(RI=1). */
118 if (handled && (regs->msr & MSR_RI))
119 mce->disposition = MCE_DISPOSITION_RECOVERED;
121 mce->disposition = MCE_DISPOSITION_NOT_RECOVERED;
123 mce->initiator = mce_err->initiator;
124 mce->severity = mce_err->severity;
125 mce->sync_error = mce_err->sync_error;
126 mce->error_class = mce_err->error_class;
129 * Populate the mce error_type and type-specific error_type.
131 mce_set_error_info(mce, mce_err);
136 if (mce->error_type == MCE_ERROR_TYPE_TLB) {
137 mce->u.tlb_error.effective_address_provided = true;
138 mce->u.tlb_error.effective_address = addr;
139 } else if (mce->error_type == MCE_ERROR_TYPE_SLB) {
140 mce->u.slb_error.effective_address_provided = true;
141 mce->u.slb_error.effective_address = addr;
142 } else if (mce->error_type == MCE_ERROR_TYPE_ERAT) {
143 mce->u.erat_error.effective_address_provided = true;
144 mce->u.erat_error.effective_address = addr;
145 } else if (mce->error_type == MCE_ERROR_TYPE_USER) {
146 mce->u.user_error.effective_address_provided = true;
147 mce->u.user_error.effective_address = addr;
148 } else if (mce->error_type == MCE_ERROR_TYPE_RA) {
149 mce->u.ra_error.effective_address_provided = true;
150 mce->u.ra_error.effective_address = addr;
151 } else if (mce->error_type == MCE_ERROR_TYPE_LINK) {
152 mce->u.link_error.effective_address_provided = true;
153 mce->u.link_error.effective_address = addr;
154 } else if (mce->error_type == MCE_ERROR_TYPE_UE) {
155 mce->u.ue_error.effective_address_provided = true;
156 mce->u.ue_error.effective_address = addr;
157 if (phys_addr != ULONG_MAX) {
158 mce->u.ue_error.physical_address_provided = true;
159 mce->u.ue_error.physical_address = phys_addr;
160 machine_check_ue_event(mce);
168 * mce Pointer to machine_check_event structure to be filled.
169 * release Flag to indicate whether to free the event slot or not.
170 * 0 <= do not release the mce event. Caller will invoke
171 * release_mce_event() once event has been consumed.
172 * 1 <= release the slot.
177 * get_mce_event() will be called by platform specific machine check
178 * handle routine and in KVM.
179 * When we call get_mce_event(), we are still in interrupt context and
180 * preemption will not be scheduled until ret_from_expect() routine
183 int get_mce_event(struct machine_check_event *mce, bool release)
185 int index = __this_cpu_read(mce_nest_count) - 1;
186 struct machine_check_event *mc_evt;
193 /* Check if we have MCE info to process. */
194 if (index < MAX_MC_EVT) {
195 mc_evt = this_cpu_ptr(&mce_event[index]);
196 /* Copy the event structure and release the original */
203 /* Decrement the count to free the slot. */
205 __this_cpu_dec(mce_nest_count);
210 void release_mce_event(void)
212 get_mce_event(NULL, true);
217 * Queue up the MCE event which then can be handled later.
219 void machine_check_ue_event(struct machine_check_event *evt)
223 index = __this_cpu_inc_return(mce_ue_count) - 1;
224 /* If queue is full, just return for now. */
225 if (index >= MAX_MC_EVT) {
226 __this_cpu_dec(mce_ue_count);
229 memcpy(this_cpu_ptr(&mce_ue_event_queue[index]), evt, sizeof(*evt));
231 /* Queue work to process this event later. */
232 schedule_work(&mce_ue_event_work);
236 * Queue up the MCE event which then can be handled later.
238 void machine_check_queue_event(void)
241 struct machine_check_event evt;
243 if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
246 index = __this_cpu_inc_return(mce_queue_count) - 1;
247 /* If queue is full, just return for now. */
248 if (index >= MAX_MC_EVT) {
249 __this_cpu_dec(mce_queue_count);
252 memcpy(this_cpu_ptr(&mce_event_queue[index]), &evt, sizeof(evt));
254 /* Queue irq work to process this event later. */
255 irq_work_queue(&mce_event_process_work);
258 * process pending MCE event from the mce event queue. This function will be
259 * called during syscall exit.
261 static void machine_process_ue_event(struct work_struct *work)
264 struct machine_check_event *evt;
266 while (__this_cpu_read(mce_ue_count) > 0) {
267 index = __this_cpu_read(mce_ue_count) - 1;
268 evt = this_cpu_ptr(&mce_ue_event_queue[index]);
269 #ifdef CONFIG_MEMORY_FAILURE
271 * This should probably queued elsewhere, but
274 if (evt->error_type == MCE_ERROR_TYPE_UE) {
275 if (evt->u.ue_error.physical_address_provided) {
278 pfn = evt->u.ue_error.physical_address >>
280 memory_failure(pfn, 0);
282 pr_warn("Failed to identify bad address from "
283 "where the uncorrectable error (UE) "
287 __this_cpu_dec(mce_ue_count);
291 * process pending MCE event from the mce event queue. This function will be
292 * called during syscall exit.
294 static void machine_check_process_queued_event(struct irq_work *work)
297 struct machine_check_event *evt;
299 add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
302 * For now just print it to console.
303 * TODO: log this error event to FSP or nvram.
305 while (__this_cpu_read(mce_queue_count) > 0) {
306 index = __this_cpu_read(mce_queue_count) - 1;
307 evt = this_cpu_ptr(&mce_event_queue[index]);
308 machine_check_print_event_info(evt, false, false);
309 __this_cpu_dec(mce_queue_count);
313 void machine_check_print_event_info(struct machine_check_event *evt,
314 bool user_mode, bool in_guest)
316 const char *level, *sevstr, *subtype, *err_type;
317 uint64_t ea = 0, pa = 0;
321 static const char *mc_ue_types[] = {
324 "Page table walk ifetch",
326 "Page table walk Load/Store",
328 static const char *mc_slb_types[] = {
333 static const char *mc_erat_types[] = {
338 static const char *mc_tlb_types[] = {
343 static const char *mc_user_types[] = {
347 static const char *mc_ra_types[] = {
349 "Instruction fetch (bad)",
350 "Instruction fetch (foreign)",
351 "Page table walk ifetch (bad)",
352 "Page table walk ifetch (foreign)",
355 "Page table walk Load/Store (bad)",
356 "Page table walk Load/Store (foreign)",
357 "Load/Store (foreign)",
359 static const char *mc_link_types[] = {
361 "Instruction fetch (timeout)",
362 "Page table walk ifetch (timeout)",
365 "Page table walk Load/Store (timeout)",
367 static const char *mc_error_class[] = {
370 "Probable Hardware error (some chance of software cause)",
372 "Probable Software error (some chance of hardware cause)",
375 /* Print things out */
376 if (evt->version != MCE_V1) {
377 pr_err("Machine Check Exception, Unknown event version %d !\n",
381 switch (evt->severity) {
382 case MCE_SEV_NO_ERROR:
386 case MCE_SEV_WARNING:
387 level = KERN_WARNING;
401 switch (evt->error_type) {
402 case MCE_ERROR_TYPE_UE:
404 subtype = evt->u.ue_error.ue_error_type <
405 ARRAY_SIZE(mc_ue_types) ?
406 mc_ue_types[evt->u.ue_error.ue_error_type]
408 if (evt->u.ue_error.effective_address_provided)
409 ea = evt->u.ue_error.effective_address;
410 if (evt->u.ue_error.physical_address_provided)
411 pa = evt->u.ue_error.physical_address;
413 case MCE_ERROR_TYPE_SLB:
415 subtype = evt->u.slb_error.slb_error_type <
416 ARRAY_SIZE(mc_slb_types) ?
417 mc_slb_types[evt->u.slb_error.slb_error_type]
419 if (evt->u.slb_error.effective_address_provided)
420 ea = evt->u.slb_error.effective_address;
422 case MCE_ERROR_TYPE_ERAT:
424 subtype = evt->u.erat_error.erat_error_type <
425 ARRAY_SIZE(mc_erat_types) ?
426 mc_erat_types[evt->u.erat_error.erat_error_type]
428 if (evt->u.erat_error.effective_address_provided)
429 ea = evt->u.erat_error.effective_address;
431 case MCE_ERROR_TYPE_TLB:
433 subtype = evt->u.tlb_error.tlb_error_type <
434 ARRAY_SIZE(mc_tlb_types) ?
435 mc_tlb_types[evt->u.tlb_error.tlb_error_type]
437 if (evt->u.tlb_error.effective_address_provided)
438 ea = evt->u.tlb_error.effective_address;
440 case MCE_ERROR_TYPE_USER:
442 subtype = evt->u.user_error.user_error_type <
443 ARRAY_SIZE(mc_user_types) ?
444 mc_user_types[evt->u.user_error.user_error_type]
446 if (evt->u.user_error.effective_address_provided)
447 ea = evt->u.user_error.effective_address;
449 case MCE_ERROR_TYPE_RA:
450 err_type = "Real address";
451 subtype = evt->u.ra_error.ra_error_type <
452 ARRAY_SIZE(mc_ra_types) ?
453 mc_ra_types[evt->u.ra_error.ra_error_type]
455 if (evt->u.ra_error.effective_address_provided)
456 ea = evt->u.ra_error.effective_address;
458 case MCE_ERROR_TYPE_LINK:
460 subtype = evt->u.link_error.link_error_type <
461 ARRAY_SIZE(mc_link_types) ?
462 mc_link_types[evt->u.link_error.link_error_type]
464 if (evt->u.link_error.effective_address_provided)
465 ea = evt->u.link_error.effective_address;
468 case MCE_ERROR_TYPE_UNKNOWN:
469 err_type = "Unknown";
474 dar_str[0] = pa_str[0] = '\0';
475 if (ea && evt->srr0 != ea) {
476 /* Load/Store address */
477 n = sprintf(dar_str, "DAR: %016llx ", ea);
479 sprintf(dar_str + n, "paddr: %016llx ", pa);
481 sprintf(pa_str, " paddr: %016llx", pa);
484 printk("%sMCE: CPU%d: machine check (%s) %s %s %s %s[%s]\n",
485 level, evt->cpu, sevstr, in_guest ? "Guest" : "Host",
486 err_type, subtype, dar_str,
487 evt->disposition == MCE_DISPOSITION_RECOVERED ?
488 "Recovered" : "Not recovered");
490 if (in_guest || user_mode) {
491 printk("%sMCE: CPU%d: PID: %d Comm: %s %sNIP: [%016llx]%s\n",
492 level, evt->cpu, current->pid, current->comm,
493 in_guest ? "Guest " : "", evt->srr0, pa_str);
495 printk("%sMCE: CPU%d: NIP: [%016llx] %pS%s\n",
496 level, evt->cpu, evt->srr0, (void *)evt->srr0, pa_str);
499 subtype = evt->error_class < ARRAY_SIZE(mc_error_class) ?
500 mc_error_class[evt->error_class] : "Unknown";
501 printk("%sMCE: CPU%d: %s\n", level, evt->cpu, subtype);
503 EXPORT_SYMBOL_GPL(machine_check_print_event_info);
506 * This function is called in real mode. Strictly no printk's please.
508 * regs->nip and regs->msr contains srr0 and ssr1.
510 long machine_check_early(struct pt_regs *regs)
514 hv_nmi_check_nonrecoverable(regs);
517 * See if platform is capable of handling machine check.
519 if (ppc_md.machine_check_early)
520 handled = ppc_md.machine_check_early(regs);
524 /* Possible meanings for HMER_DEBUG_TRIG bit being set on POWER9 */
527 DTRIG_VECTOR_CI, /* need to emulate vector CI load instr */
528 DTRIG_SUSPEND_ESCAPE, /* need to escape from TM suspend mode */
529 } hmer_debug_trig_function;
531 static int init_debug_trig_function(void)
534 struct device_node *cpun;
535 struct property *prop = NULL;
538 /* First look in the device tree */
540 cpun = of_get_cpu_node(smp_processor_id(), NULL);
542 of_property_for_each_string(cpun, "ibm,hmi-special-triggers",
544 if (strcmp(str, "bit17-vector-ci-load") == 0)
545 hmer_debug_trig_function = DTRIG_VECTOR_CI;
546 else if (strcmp(str, "bit17-tm-suspend-escape") == 0)
547 hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
553 /* If we found the property, don't look at PVR */
557 pvr = mfspr(SPRN_PVR);
558 /* Check for POWER9 Nimbus (scale-out) */
559 if ((PVR_VER(pvr) == PVR_POWER9) && (pvr & 0xe000) == 0) {
560 /* DD2.2 and later */
561 if ((pvr & 0xfff) >= 0x202)
562 hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
563 /* DD2.0 and DD2.1 - used for vector CI load emulation */
564 else if ((pvr & 0xfff) >= 0x200)
565 hmer_debug_trig_function = DTRIG_VECTOR_CI;
569 switch (hmer_debug_trig_function) {
570 case DTRIG_VECTOR_CI:
571 pr_debug("HMI debug trigger used for vector CI load\n");
573 case DTRIG_SUSPEND_ESCAPE:
574 pr_debug("HMI debug trigger used for TM suspend escape\n");
581 __initcall(init_debug_trig_function);
584 * Handle HMIs that occur as a result of a debug trigger.
586 * -1 means this is not a HMI cause that we know about
587 * 0 means no further handling is required
588 * 1 means further handling is required
590 long hmi_handle_debugtrig(struct pt_regs *regs)
592 unsigned long hmer = mfspr(SPRN_HMER);
595 /* HMER_DEBUG_TRIG bit is used for various workarounds on P9 */
596 if (!((hmer & HMER_DEBUG_TRIG)
597 && hmer_debug_trig_function != DTRIG_UNKNOWN))
600 hmer &= ~HMER_DEBUG_TRIG;
601 /* HMER is a write-AND register */
602 mtspr(SPRN_HMER, ~HMER_DEBUG_TRIG);
604 switch (hmer_debug_trig_function) {
605 case DTRIG_VECTOR_CI:
607 * Now to avoid problems with soft-disable we
608 * only do the emulation if we are coming from
611 if (regs && user_mode(regs))
612 ret = local_paca->hmi_p9_special_emu = 1;
621 * See if any other HMI causes remain to be handled
623 if (hmer & mfspr(SPRN_HMEER))
632 long hmi_exception_realmode(struct pt_regs *regs)
636 __this_cpu_inc(irq_stat.hmi_exceptions);
638 ret = hmi_handle_debugtrig(regs);
642 wait_for_subcore_guest_exit();
644 if (ppc_md.hmi_exception_early)
645 ppc_md.hmi_exception_early(regs);
647 wait_for_tb_resync();