Merge tag 'for-linus-4.20a-rc2-tag' of git://git.kernel.org/pub/scm/linux/kernel...
[linux-2.6-block.git] / arch / powerpc / kernel / mce.c
CommitLineData
36df96f8
MS
1/*
2 * Machine check exception handling.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * Copyright 2013 IBM Corporation
19 * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
20 */
21
22#undef DEBUG
23#define pr_fmt(fmt) "mce: " fmt
24
ccd3cd36 25#include <linux/hardirq.h>
36df96f8
MS
26#include <linux/types.h>
27#include <linux/ptrace.h>
28#include <linux/percpu.h>
29#include <linux/export.h>
30c82635 30#include <linux/irq_work.h>
ccd3cd36
ME
31
32#include <asm/machdep.h>
36df96f8
MS
33#include <asm/mce.h>
34
35static DEFINE_PER_CPU(int, mce_nest_count);
36static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event);
37
b5ff4211
MS
38/* Queue for delayed MCE events. */
39static DEFINE_PER_CPU(int, mce_queue_count);
40static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event_queue);
41
733e4a4c
BS
42/* Queue for delayed MCE UE events. */
43static DEFINE_PER_CPU(int, mce_ue_count);
44static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT],
45 mce_ue_event_queue);
46
30c82635 47static void machine_check_process_queued_event(struct irq_work *work);
733e4a4c
BS
48void machine_check_ue_event(struct machine_check_event *evt);
49static void machine_process_ue_event(struct work_struct *work);
50
635218c7 51static struct irq_work mce_event_process_work = {
30c82635
MS
52 .func = machine_check_process_queued_event,
53};
54
733e4a4c
BS
55DECLARE_WORK(mce_ue_event_work, machine_process_ue_event);
56
36df96f8
MS
57static void mce_set_error_info(struct machine_check_event *mce,
58 struct mce_error_info *mce_err)
59{
60 mce->error_type = mce_err->error_type;
61 switch (mce_err->error_type) {
62 case MCE_ERROR_TYPE_UE:
63 mce->u.ue_error.ue_error_type = mce_err->u.ue_error_type;
64 break;
65 case MCE_ERROR_TYPE_SLB:
66 mce->u.slb_error.slb_error_type = mce_err->u.slb_error_type;
67 break;
68 case MCE_ERROR_TYPE_ERAT:
69 mce->u.erat_error.erat_error_type = mce_err->u.erat_error_type;
70 break;
71 case MCE_ERROR_TYPE_TLB:
72 mce->u.tlb_error.tlb_error_type = mce_err->u.tlb_error_type;
73 break;
7b9f71f9
NP
74 case MCE_ERROR_TYPE_USER:
75 mce->u.user_error.user_error_type = mce_err->u.user_error_type;
76 break;
77 case MCE_ERROR_TYPE_RA:
78 mce->u.ra_error.ra_error_type = mce_err->u.ra_error_type;
79 break;
80 case MCE_ERROR_TYPE_LINK:
81 mce->u.link_error.link_error_type = mce_err->u.link_error_type;
82 break;
36df96f8
MS
83 case MCE_ERROR_TYPE_UNKNOWN:
84 default:
85 break;
86 }
87}
88
89/*
90 * Decode and save high level MCE information into per cpu buffer which
91 * is an array of machine_check_event structure.
92 */
93void save_mce_event(struct pt_regs *regs, long handled,
94 struct mce_error_info *mce_err,
ba41e1e1 95 uint64_t nip, uint64_t addr, uint64_t phys_addr)
36df96f8 96{
ffb2d78e 97 int index = __this_cpu_inc_return(mce_nest_count) - 1;
69111bac 98 struct machine_check_event *mce = this_cpu_ptr(&mce_event[index]);
36df96f8
MS
99
100 /*
101 * Return if we don't have enough space to log mce event.
102 * mce_nest_count may go beyond MAX_MC_EVT but that's ok,
103 * the check below will stop buffer overrun.
104 */
105 if (index >= MAX_MC_EVT)
106 return;
107
108 /* Populate generic machine check info */
109 mce->version = MCE_V1;
55672ecf 110 mce->srr0 = nip;
36df96f8
MS
111 mce->srr1 = regs->msr;
112 mce->gpr3 = regs->gpr[3];
113 mce->in_use = 1;
114
c74dd88e
MS
115 /* Mark it recovered if we have handled it and MSR(RI=1). */
116 if (handled && (regs->msr & MSR_RI))
36df96f8
MS
117 mce->disposition = MCE_DISPOSITION_RECOVERED;
118 else
119 mce->disposition = MCE_DISPOSITION_NOT_RECOVERED;
c1bbf387
NP
120
121 mce->initiator = mce_err->initiator;
122 mce->severity = mce_err->severity;
36df96f8 123
36df96f8
MS
124 /*
125 * Populate the mce error_type and type-specific error_type.
126 */
127 mce_set_error_info(mce, mce_err);
128
129 if (!addr)
130 return;
131
132 if (mce->error_type == MCE_ERROR_TYPE_TLB) {
133 mce->u.tlb_error.effective_address_provided = true;
134 mce->u.tlb_error.effective_address = addr;
135 } else if (mce->error_type == MCE_ERROR_TYPE_SLB) {
136 mce->u.slb_error.effective_address_provided = true;
137 mce->u.slb_error.effective_address = addr;
138 } else if (mce->error_type == MCE_ERROR_TYPE_ERAT) {
139 mce->u.erat_error.effective_address_provided = true;
140 mce->u.erat_error.effective_address = addr;
7b9f71f9
NP
141 } else if (mce->error_type == MCE_ERROR_TYPE_USER) {
142 mce->u.user_error.effective_address_provided = true;
143 mce->u.user_error.effective_address = addr;
144 } else if (mce->error_type == MCE_ERROR_TYPE_RA) {
145 mce->u.ra_error.effective_address_provided = true;
146 mce->u.ra_error.effective_address = addr;
147 } else if (mce->error_type == MCE_ERROR_TYPE_LINK) {
148 mce->u.link_error.effective_address_provided = true;
149 mce->u.link_error.effective_address = addr;
36df96f8
MS
150 } else if (mce->error_type == MCE_ERROR_TYPE_UE) {
151 mce->u.ue_error.effective_address_provided = true;
152 mce->u.ue_error.effective_address = addr;
ba41e1e1
BS
153 if (phys_addr != ULONG_MAX) {
154 mce->u.ue_error.physical_address_provided = true;
155 mce->u.ue_error.physical_address = phys_addr;
733e4a4c 156 machine_check_ue_event(mce);
ba41e1e1 157 }
36df96f8
MS
158 }
159 return;
160}
161
162/*
163 * get_mce_event:
164 * mce Pointer to machine_check_event structure to be filled.
165 * release Flag to indicate whether to free the event slot or not.
166 * 0 <= do not release the mce event. Caller will invoke
167 * release_mce_event() once event has been consumed.
168 * 1 <= release the slot.
169 *
170 * return 1 = success
171 * 0 = failure
172 *
173 * get_mce_event() will be called by platform specific machine check
174 * handle routine and in KVM.
175 * When we call get_mce_event(), we are still in interrupt context and
176 * preemption will not be scheduled until ret_from_expect() routine
177 * is called.
178 */
179int get_mce_event(struct machine_check_event *mce, bool release)
180{
69111bac 181 int index = __this_cpu_read(mce_nest_count) - 1;
36df96f8
MS
182 struct machine_check_event *mc_evt;
183 int ret = 0;
184
185 /* Sanity check */
186 if (index < 0)
187 return ret;
188
189 /* Check if we have MCE info to process. */
190 if (index < MAX_MC_EVT) {
69111bac 191 mc_evt = this_cpu_ptr(&mce_event[index]);
36df96f8
MS
192 /* Copy the event structure and release the original */
193 if (mce)
194 *mce = *mc_evt;
195 if (release)
196 mc_evt->in_use = 0;
197 ret = 1;
198 }
199 /* Decrement the count to free the slot. */
200 if (release)
69111bac 201 __this_cpu_dec(mce_nest_count);
36df96f8
MS
202
203 return ret;
204}
205
206void release_mce_event(void)
207{
208 get_mce_event(NULL, true);
209}
b5ff4211 210
733e4a4c
BS
211
212/*
213 * Queue up the MCE event which then can be handled later.
214 */
215void machine_check_ue_event(struct machine_check_event *evt)
216{
217 int index;
218
219 index = __this_cpu_inc_return(mce_ue_count) - 1;
220 /* If queue is full, just return for now. */
221 if (index >= MAX_MC_EVT) {
222 __this_cpu_dec(mce_ue_count);
223 return;
224 }
225 memcpy(this_cpu_ptr(&mce_ue_event_queue[index]), evt, sizeof(*evt));
226
227 /* Queue work to process this event later. */
228 schedule_work(&mce_ue_event_work);
229}
230
b5ff4211
MS
231/*
232 * Queue up the MCE event which then can be handled later.
233 */
234void machine_check_queue_event(void)
235{
236 int index;
237 struct machine_check_event evt;
238
239 if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
240 return;
241
ffb2d78e 242 index = __this_cpu_inc_return(mce_queue_count) - 1;
b5ff4211
MS
243 /* If queue is full, just return for now. */
244 if (index >= MAX_MC_EVT) {
69111bac 245 __this_cpu_dec(mce_queue_count);
b5ff4211
MS
246 return;
247 }
69111bac 248 memcpy(this_cpu_ptr(&mce_event_queue[index]), &evt, sizeof(evt));
30c82635
MS
249
250 /* Queue irq work to process this event later. */
251 irq_work_queue(&mce_event_process_work);
b5ff4211 252}
733e4a4c
BS
253/*
254 * process pending MCE event from the mce event queue. This function will be
255 * called during syscall exit.
256 */
257static void machine_process_ue_event(struct work_struct *work)
258{
259 int index;
260 struct machine_check_event *evt;
261
262 while (__this_cpu_read(mce_ue_count) > 0) {
263 index = __this_cpu_read(mce_ue_count) - 1;
264 evt = this_cpu_ptr(&mce_ue_event_queue[index]);
265#ifdef CONFIG_MEMORY_FAILURE
266 /*
267 * This should probably queued elsewhere, but
268 * oh! well
269 */
270 if (evt->error_type == MCE_ERROR_TYPE_UE) {
271 if (evt->u.ue_error.physical_address_provided) {
272 unsigned long pfn;
b5ff4211 273
733e4a4c
BS
274 pfn = evt->u.ue_error.physical_address >>
275 PAGE_SHIFT;
d4173023 276 memory_failure(pfn, 0);
733e4a4c
BS
277 } else
278 pr_warn("Failed to identify bad address from "
279 "where the uncorrectable error (UE) "
280 "was generated\n");
281 }
282#endif
283 __this_cpu_dec(mce_ue_count);
284 }
285}
b5ff4211
MS
286/*
287 * process pending MCE event from the mce event queue. This function will be
288 * called during syscall exit.
289 */
30c82635 290static void machine_check_process_queued_event(struct irq_work *work)
b5ff4211
MS
291{
292 int index;
733e4a4c 293 struct machine_check_event *evt;
b5ff4211 294
d93b0ac0
MS
295 add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
296
b5ff4211
MS
297 /*
298 * For now just print it to console.
299 * TODO: log this error event to FSP or nvram.
300 */
69111bac
CL
301 while (__this_cpu_read(mce_queue_count) > 0) {
302 index = __this_cpu_read(mce_queue_count) - 1;
733e4a4c
BS
303 evt = this_cpu_ptr(&mce_event_queue[index]);
304 machine_check_print_event_info(evt, false);
69111bac 305 __this_cpu_dec(mce_queue_count);
b5ff4211 306 }
b5ff4211
MS
307}
308
63f44d65
ME
309void machine_check_print_event_info(struct machine_check_event *evt,
310 bool user_mode)
b5ff4211
MS
311{
312 const char *level, *sevstr, *subtype;
313 static const char *mc_ue_types[] = {
314 "Indeterminate",
315 "Instruction fetch",
316 "Page table walk ifetch",
317 "Load/Store",
318 "Page table walk Load/Store",
319 };
320 static const char *mc_slb_types[] = {
321 "Indeterminate",
322 "Parity",
323 "Multihit",
324 };
325 static const char *mc_erat_types[] = {
326 "Indeterminate",
327 "Parity",
328 "Multihit",
329 };
330 static const char *mc_tlb_types[] = {
331 "Indeterminate",
332 "Parity",
333 "Multihit",
334 };
7b9f71f9
NP
335 static const char *mc_user_types[] = {
336 "Indeterminate",
337 "tlbie(l) invalid",
338 };
339 static const char *mc_ra_types[] = {
340 "Indeterminate",
341 "Instruction fetch (bad)",
90df4bfb 342 "Instruction fetch (foreign)",
7b9f71f9
NP
343 "Page table walk ifetch (bad)",
344 "Page table walk ifetch (foreign)",
345 "Load (bad)",
346 "Store (bad)",
347 "Page table walk Load/Store (bad)",
348 "Page table walk Load/Store (foreign)",
349 "Load/Store (foreign)",
350 };
351 static const char *mc_link_types[] = {
352 "Indeterminate",
353 "Instruction fetch (timeout)",
354 "Page table walk ifetch (timeout)",
355 "Load (timeout)",
356 "Store (timeout)",
357 "Page table walk Load/Store (timeout)",
358 };
b5ff4211
MS
359
360 /* Print things out */
361 if (evt->version != MCE_V1) {
362 pr_err("Machine Check Exception, Unknown event version %d !\n",
363 evt->version);
364 return;
365 }
366 switch (evt->severity) {
367 case MCE_SEV_NO_ERROR:
368 level = KERN_INFO;
369 sevstr = "Harmless";
370 break;
371 case MCE_SEV_WARNING:
372 level = KERN_WARNING;
373 sevstr = "";
374 break;
375 case MCE_SEV_ERROR_SYNC:
376 level = KERN_ERR;
377 sevstr = "Severe";
378 break;
379 case MCE_SEV_FATAL:
380 default:
381 level = KERN_ERR;
382 sevstr = "Fatal";
383 break;
384 }
385
386 printk("%s%s Machine check interrupt [%s]\n", level, sevstr,
387 evt->disposition == MCE_DISPOSITION_RECOVERED ?
fc84427b 388 "Recovered" : "Not recovered");
63f44d65
ME
389
390 if (user_mode) {
391 printk("%s NIP: [%016llx] PID: %d Comm: %s\n", level,
392 evt->srr0, current->pid, current->comm);
393 } else {
394 printk("%s NIP [%016llx]: %pS\n", level, evt->srr0,
395 (void *)evt->srr0);
396 }
397
b5ff4211
MS
398 printk("%s Initiator: %s\n", level,
399 evt->initiator == MCE_INITIATOR_CPU ? "CPU" : "Unknown");
400 switch (evt->error_type) {
401 case MCE_ERROR_TYPE_UE:
402 subtype = evt->u.ue_error.ue_error_type <
403 ARRAY_SIZE(mc_ue_types) ?
404 mc_ue_types[evt->u.ue_error.ue_error_type]
405 : "Unknown";
406 printk("%s Error type: UE [%s]\n", level, subtype);
407 if (evt->u.ue_error.effective_address_provided)
408 printk("%s Effective address: %016llx\n",
409 level, evt->u.ue_error.effective_address);
410 if (evt->u.ue_error.physical_address_provided)
81b61fa7 411 printk("%s Physical address: %016llx\n",
b5ff4211
MS
412 level, evt->u.ue_error.physical_address);
413 break;
414 case MCE_ERROR_TYPE_SLB:
415 subtype = evt->u.slb_error.slb_error_type <
416 ARRAY_SIZE(mc_slb_types) ?
417 mc_slb_types[evt->u.slb_error.slb_error_type]
418 : "Unknown";
419 printk("%s Error type: SLB [%s]\n", level, subtype);
420 if (evt->u.slb_error.effective_address_provided)
421 printk("%s Effective address: %016llx\n",
422 level, evt->u.slb_error.effective_address);
423 break;
424 case MCE_ERROR_TYPE_ERAT:
425 subtype = evt->u.erat_error.erat_error_type <
426 ARRAY_SIZE(mc_erat_types) ?
427 mc_erat_types[evt->u.erat_error.erat_error_type]
428 : "Unknown";
429 printk("%s Error type: ERAT [%s]\n", level, subtype);
430 if (evt->u.erat_error.effective_address_provided)
431 printk("%s Effective address: %016llx\n",
432 level, evt->u.erat_error.effective_address);
433 break;
434 case MCE_ERROR_TYPE_TLB:
435 subtype = evt->u.tlb_error.tlb_error_type <
436 ARRAY_SIZE(mc_tlb_types) ?
437 mc_tlb_types[evt->u.tlb_error.tlb_error_type]
438 : "Unknown";
439 printk("%s Error type: TLB [%s]\n", level, subtype);
440 if (evt->u.tlb_error.effective_address_provided)
441 printk("%s Effective address: %016llx\n",
442 level, evt->u.tlb_error.effective_address);
443 break;
7b9f71f9
NP
444 case MCE_ERROR_TYPE_USER:
445 subtype = evt->u.user_error.user_error_type <
446 ARRAY_SIZE(mc_user_types) ?
447 mc_user_types[evt->u.user_error.user_error_type]
448 : "Unknown";
449 printk("%s Error type: User [%s]\n", level, subtype);
450 if (evt->u.user_error.effective_address_provided)
451 printk("%s Effective address: %016llx\n",
452 level, evt->u.user_error.effective_address);
453 break;
454 case MCE_ERROR_TYPE_RA:
455 subtype = evt->u.ra_error.ra_error_type <
456 ARRAY_SIZE(mc_ra_types) ?
457 mc_ra_types[evt->u.ra_error.ra_error_type]
458 : "Unknown";
459 printk("%s Error type: Real address [%s]\n", level, subtype);
460 if (evt->u.ra_error.effective_address_provided)
461 printk("%s Effective address: %016llx\n",
462 level, evt->u.ra_error.effective_address);
463 break;
464 case MCE_ERROR_TYPE_LINK:
465 subtype = evt->u.link_error.link_error_type <
466 ARRAY_SIZE(mc_link_types) ?
467 mc_link_types[evt->u.link_error.link_error_type]
468 : "Unknown";
469 printk("%s Error type: Link [%s]\n", level, subtype);
470 if (evt->u.link_error.effective_address_provided)
471 printk("%s Effective address: %016llx\n",
472 level, evt->u.link_error.effective_address);
473 break;
b5ff4211
MS
474 default:
475 case MCE_ERROR_TYPE_UNKNOWN:
476 printk("%s Error type: Unknown\n", level);
477 break;
478 }
479}
8aa586c6 480EXPORT_SYMBOL_GPL(machine_check_print_event_info);
b63a0ffe 481
ccd3cd36
ME
482/*
483 * This function is called in real mode. Strictly no printk's please.
484 *
485 * regs->nip and regs->msr contains srr0 and ssr1.
486 */
487long machine_check_early(struct pt_regs *regs)
488{
489 long handled = 0;
490
a43c1590
MS
491 /*
492 * See if platform is capable of handling machine check.
493 */
494 if (ppc_md.machine_check_early)
495 handled = ppc_md.machine_check_early(regs);
ccd3cd36
ME
496 return handled;
497}
498
d075745d
PM
499/* Possible meanings for HMER_DEBUG_TRIG bit being set on POWER9 */
500static enum {
501 DTRIG_UNKNOWN,
502 DTRIG_VECTOR_CI, /* need to emulate vector CI load instr */
503 DTRIG_SUSPEND_ESCAPE, /* need to escape from TM suspend mode */
504} hmer_debug_trig_function;
505
506static int init_debug_trig_function(void)
ccd3cd36 507{
d075745d
PM
508 int pvr;
509 struct device_node *cpun;
510 struct property *prop = NULL;
511 const char *str;
512
513 /* First look in the device tree */
514 preempt_disable();
515 cpun = of_get_cpu_node(smp_processor_id(), NULL);
516 if (cpun) {
517 of_property_for_each_string(cpun, "ibm,hmi-special-triggers",
518 prop, str) {
519 if (strcmp(str, "bit17-vector-ci-load") == 0)
520 hmer_debug_trig_function = DTRIG_VECTOR_CI;
521 else if (strcmp(str, "bit17-tm-suspend-escape") == 0)
522 hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
5080332c 523 }
d075745d
PM
524 of_node_put(cpun);
525 }
526 preempt_enable();
527
528 /* If we found the property, don't look at PVR */
529 if (prop)
530 goto out;
531
532 pvr = mfspr(SPRN_PVR);
533 /* Check for POWER9 Nimbus (scale-out) */
534 if ((PVR_VER(pvr) == PVR_POWER9) && (pvr & 0xe000) == 0) {
535 /* DD2.2 and later */
536 if ((pvr & 0xfff) >= 0x202)
537 hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
538 /* DD2.0 and DD2.1 - used for vector CI load emulation */
539 else if ((pvr & 0xfff) >= 0x200)
540 hmer_debug_trig_function = DTRIG_VECTOR_CI;
541 }
542
543 out:
544 switch (hmer_debug_trig_function) {
545 case DTRIG_VECTOR_CI:
546 pr_debug("HMI debug trigger used for vector CI load\n");
547 break;
548 case DTRIG_SUSPEND_ESCAPE:
549 pr_debug("HMI debug trigger used for TM suspend escape\n");
550 break;
551 default:
552 break;
5080332c 553 }
d075745d
PM
554 return 0;
555}
556__initcall(init_debug_trig_function);
557
558/*
559 * Handle HMIs that occur as a result of a debug trigger.
560 * Return values:
561 * -1 means this is not a HMI cause that we know about
562 * 0 means no further handling is required
563 * 1 means further handling is required
564 */
565long hmi_handle_debugtrig(struct pt_regs *regs)
566{
567 unsigned long hmer = mfspr(SPRN_HMER);
568 long ret = 0;
569
570 /* HMER_DEBUG_TRIG bit is used for various workarounds on P9 */
571 if (!((hmer & HMER_DEBUG_TRIG)
572 && hmer_debug_trig_function != DTRIG_UNKNOWN))
573 return -1;
574
575 hmer &= ~HMER_DEBUG_TRIG;
576 /* HMER is a write-AND register */
577 mtspr(SPRN_HMER, ~HMER_DEBUG_TRIG);
578
579 switch (hmer_debug_trig_function) {
580 case DTRIG_VECTOR_CI:
581 /*
582 * Now to avoid problems with soft-disable we
583 * only do the emulation if we are coming from
584 * host user space
585 */
586 if (regs && user_mode(regs))
587 ret = local_paca->hmi_p9_special_emu = 1;
588
589 break;
590
591 default:
592 break;
593 }
594
595 /*
596 * See if any other HMI causes remain to be handled
597 */
598 if (hmer & mfspr(SPRN_HMEER))
599 return -1;
600
601 return ret;
602}
603
604/*
605 * Return values:
606 */
607long hmi_exception_realmode(struct pt_regs *regs)
608{
609 int ret;
610
611 __this_cpu_inc(irq_stat.hmi_exceptions);
612
613 ret = hmi_handle_debugtrig(regs);
614 if (ret >= 0)
615 return ret;
5080332c 616
ccd3cd36
ME
617 wait_for_subcore_guest_exit();
618
619 if (ppc_md.hmi_exception_early)
620 ppc_md.hmi_exception_early(regs);
621
622 wait_for_tb_resync();
623
5080332c 624 return 1;
ccd3cd36 625}