Commit | Line | Data |
---|---|---|
36df96f8 MS |
1 | /* |
2 | * Machine check exception handling. | |
3 | * | |
4 | * This program is free software; you can redistribute it and/or modify | |
5 | * it under the terms of the GNU General Public License as published by | |
6 | * the Free Software Foundation; either version 2 of the License, or | |
7 | * (at your option) any later version. | |
8 | * | |
9 | * This program is distributed in the hope that it will be useful, | |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | * GNU General Public License for more details. | |
13 | * | |
14 | * You should have received a copy of the GNU General Public License | |
15 | * along with this program; if not, write to the Free Software | |
16 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | |
17 | * | |
18 | * Copyright 2013 IBM Corporation | |
19 | * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> | |
20 | */ | |
21 | ||
22 | #undef DEBUG | |
23 | #define pr_fmt(fmt) "mce: " fmt | |
24 | ||
25 | #include <linux/types.h> | |
26 | #include <linux/ptrace.h> | |
27 | #include <linux/percpu.h> | |
28 | #include <linux/export.h> | |
30c82635 | 29 | #include <linux/irq_work.h> |
36df96f8 MS |
30 | #include <asm/mce.h> |
31 | ||
32 | static DEFINE_PER_CPU(int, mce_nest_count); | |
33 | static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event); | |
34 | ||
b5ff4211 MS |
35 | /* Queue for delayed MCE events. */ |
36 | static DEFINE_PER_CPU(int, mce_queue_count); | |
37 | static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event_queue); | |
38 | ||
30c82635 | 39 | static void machine_check_process_queued_event(struct irq_work *work); |
635218c7 | 40 | static struct irq_work mce_event_process_work = { |
30c82635 MS |
41 | .func = machine_check_process_queued_event, |
42 | }; | |
43 | ||
36df96f8 MS |
44 | static void mce_set_error_info(struct machine_check_event *mce, |
45 | struct mce_error_info *mce_err) | |
46 | { | |
47 | mce->error_type = mce_err->error_type; | |
48 | switch (mce_err->error_type) { | |
49 | case MCE_ERROR_TYPE_UE: | |
50 | mce->u.ue_error.ue_error_type = mce_err->u.ue_error_type; | |
51 | break; | |
52 | case MCE_ERROR_TYPE_SLB: | |
53 | mce->u.slb_error.slb_error_type = mce_err->u.slb_error_type; | |
54 | break; | |
55 | case MCE_ERROR_TYPE_ERAT: | |
56 | mce->u.erat_error.erat_error_type = mce_err->u.erat_error_type; | |
57 | break; | |
58 | case MCE_ERROR_TYPE_TLB: | |
59 | mce->u.tlb_error.tlb_error_type = mce_err->u.tlb_error_type; | |
60 | break; | |
7b9f71f9 NP |
61 | case MCE_ERROR_TYPE_USER: |
62 | mce->u.user_error.user_error_type = mce_err->u.user_error_type; | |
63 | break; | |
64 | case MCE_ERROR_TYPE_RA: | |
65 | mce->u.ra_error.ra_error_type = mce_err->u.ra_error_type; | |
66 | break; | |
67 | case MCE_ERROR_TYPE_LINK: | |
68 | mce->u.link_error.link_error_type = mce_err->u.link_error_type; | |
69 | break; | |
36df96f8 MS |
70 | case MCE_ERROR_TYPE_UNKNOWN: |
71 | default: | |
72 | break; | |
73 | } | |
74 | } | |
75 | ||
76 | /* | |
77 | * Decode and save high level MCE information into per cpu buffer which | |
78 | * is an array of machine_check_event structure. | |
79 | */ | |
80 | void save_mce_event(struct pt_regs *regs, long handled, | |
81 | struct mce_error_info *mce_err, | |
55672ecf | 82 | uint64_t nip, uint64_t addr) |
36df96f8 | 83 | { |
ffb2d78e | 84 | int index = __this_cpu_inc_return(mce_nest_count) - 1; |
69111bac | 85 | struct machine_check_event *mce = this_cpu_ptr(&mce_event[index]); |
36df96f8 MS |
86 | |
87 | /* | |
88 | * Return if we don't have enough space to log mce event. | |
89 | * mce_nest_count may go beyond MAX_MC_EVT but that's ok, | |
90 | * the check below will stop buffer overrun. | |
91 | */ | |
92 | if (index >= MAX_MC_EVT) | |
93 | return; | |
94 | ||
95 | /* Populate generic machine check info */ | |
96 | mce->version = MCE_V1; | |
55672ecf | 97 | mce->srr0 = nip; |
36df96f8 MS |
98 | mce->srr1 = regs->msr; |
99 | mce->gpr3 = regs->gpr[3]; | |
100 | mce->in_use = 1; | |
101 | ||
c74dd88e MS |
102 | /* Mark it recovered if we have handled it and MSR(RI=1). */ |
103 | if (handled && (regs->msr & MSR_RI)) | |
36df96f8 MS |
104 | mce->disposition = MCE_DISPOSITION_RECOVERED; |
105 | else | |
106 | mce->disposition = MCE_DISPOSITION_NOT_RECOVERED; | |
c1bbf387 NP |
107 | |
108 | mce->initiator = mce_err->initiator; | |
109 | mce->severity = mce_err->severity; | |
36df96f8 | 110 | |
36df96f8 MS |
111 | /* |
112 | * Populate the mce error_type and type-specific error_type. | |
113 | */ | |
114 | mce_set_error_info(mce, mce_err); | |
115 | ||
116 | if (!addr) | |
117 | return; | |
118 | ||
119 | if (mce->error_type == MCE_ERROR_TYPE_TLB) { | |
120 | mce->u.tlb_error.effective_address_provided = true; | |
121 | mce->u.tlb_error.effective_address = addr; | |
122 | } else if (mce->error_type == MCE_ERROR_TYPE_SLB) { | |
123 | mce->u.slb_error.effective_address_provided = true; | |
124 | mce->u.slb_error.effective_address = addr; | |
125 | } else if (mce->error_type == MCE_ERROR_TYPE_ERAT) { | |
126 | mce->u.erat_error.effective_address_provided = true; | |
127 | mce->u.erat_error.effective_address = addr; | |
7b9f71f9 NP |
128 | } else if (mce->error_type == MCE_ERROR_TYPE_USER) { |
129 | mce->u.user_error.effective_address_provided = true; | |
130 | mce->u.user_error.effective_address = addr; | |
131 | } else if (mce->error_type == MCE_ERROR_TYPE_RA) { | |
132 | mce->u.ra_error.effective_address_provided = true; | |
133 | mce->u.ra_error.effective_address = addr; | |
134 | } else if (mce->error_type == MCE_ERROR_TYPE_LINK) { | |
135 | mce->u.link_error.effective_address_provided = true; | |
136 | mce->u.link_error.effective_address = addr; | |
36df96f8 MS |
137 | } else if (mce->error_type == MCE_ERROR_TYPE_UE) { |
138 | mce->u.ue_error.effective_address_provided = true; | |
139 | mce->u.ue_error.effective_address = addr; | |
140 | } | |
141 | return; | |
142 | } | |
143 | ||
144 | /* | |
145 | * get_mce_event: | |
146 | * mce Pointer to machine_check_event structure to be filled. | |
147 | * release Flag to indicate whether to free the event slot or not. | |
148 | * 0 <= do not release the mce event. Caller will invoke | |
149 | * release_mce_event() once event has been consumed. | |
150 | * 1 <= release the slot. | |
151 | * | |
152 | * return 1 = success | |
153 | * 0 = failure | |
154 | * | |
155 | * get_mce_event() will be called by platform specific machine check | |
156 | * handle routine and in KVM. | |
157 | * When we call get_mce_event(), we are still in interrupt context and | |
158 | * preemption will not be scheduled until ret_from_expect() routine | |
159 | * is called. | |
160 | */ | |
161 | int get_mce_event(struct machine_check_event *mce, bool release) | |
162 | { | |
69111bac | 163 | int index = __this_cpu_read(mce_nest_count) - 1; |
36df96f8 MS |
164 | struct machine_check_event *mc_evt; |
165 | int ret = 0; | |
166 | ||
167 | /* Sanity check */ | |
168 | if (index < 0) | |
169 | return ret; | |
170 | ||
171 | /* Check if we have MCE info to process. */ | |
172 | if (index < MAX_MC_EVT) { | |
69111bac | 173 | mc_evt = this_cpu_ptr(&mce_event[index]); |
36df96f8 MS |
174 | /* Copy the event structure and release the original */ |
175 | if (mce) | |
176 | *mce = *mc_evt; | |
177 | if (release) | |
178 | mc_evt->in_use = 0; | |
179 | ret = 1; | |
180 | } | |
181 | /* Decrement the count to free the slot. */ | |
182 | if (release) | |
69111bac | 183 | __this_cpu_dec(mce_nest_count); |
36df96f8 MS |
184 | |
185 | return ret; | |
186 | } | |
187 | ||
188 | void release_mce_event(void) | |
189 | { | |
190 | get_mce_event(NULL, true); | |
191 | } | |
b5ff4211 MS |
192 | |
193 | /* | |
194 | * Queue up the MCE event which then can be handled later. | |
195 | */ | |
196 | void machine_check_queue_event(void) | |
197 | { | |
198 | int index; | |
199 | struct machine_check_event evt; | |
200 | ||
201 | if (!get_mce_event(&evt, MCE_EVENT_RELEASE)) | |
202 | return; | |
203 | ||
ffb2d78e | 204 | index = __this_cpu_inc_return(mce_queue_count) - 1; |
b5ff4211 MS |
205 | /* If queue is full, just return for now. */ |
206 | if (index >= MAX_MC_EVT) { | |
69111bac | 207 | __this_cpu_dec(mce_queue_count); |
b5ff4211 MS |
208 | return; |
209 | } | |
69111bac | 210 | memcpy(this_cpu_ptr(&mce_event_queue[index]), &evt, sizeof(evt)); |
30c82635 MS |
211 | |
212 | /* Queue irq work to process this event later. */ | |
213 | irq_work_queue(&mce_event_process_work); | |
b5ff4211 MS |
214 | } |
215 | ||
216 | /* | |
217 | * process pending MCE event from the mce event queue. This function will be | |
218 | * called during syscall exit. | |
219 | */ | |
30c82635 | 220 | static void machine_check_process_queued_event(struct irq_work *work) |
b5ff4211 MS |
221 | { |
222 | int index; | |
223 | ||
b5ff4211 MS |
224 | /* |
225 | * For now just print it to console. | |
226 | * TODO: log this error event to FSP or nvram. | |
227 | */ | |
69111bac CL |
228 | while (__this_cpu_read(mce_queue_count) > 0) { |
229 | index = __this_cpu_read(mce_queue_count) - 1; | |
b5ff4211 | 230 | machine_check_print_event_info( |
63f44d65 | 231 | this_cpu_ptr(&mce_event_queue[index]), false); |
69111bac | 232 | __this_cpu_dec(mce_queue_count); |
b5ff4211 | 233 | } |
b5ff4211 MS |
234 | } |
235 | ||
63f44d65 ME |
236 | void machine_check_print_event_info(struct machine_check_event *evt, |
237 | bool user_mode) | |
b5ff4211 MS |
238 | { |
239 | const char *level, *sevstr, *subtype; | |
240 | static const char *mc_ue_types[] = { | |
241 | "Indeterminate", | |
242 | "Instruction fetch", | |
243 | "Page table walk ifetch", | |
244 | "Load/Store", | |
245 | "Page table walk Load/Store", | |
246 | }; | |
247 | static const char *mc_slb_types[] = { | |
248 | "Indeterminate", | |
249 | "Parity", | |
250 | "Multihit", | |
251 | }; | |
252 | static const char *mc_erat_types[] = { | |
253 | "Indeterminate", | |
254 | "Parity", | |
255 | "Multihit", | |
256 | }; | |
257 | static const char *mc_tlb_types[] = { | |
258 | "Indeterminate", | |
259 | "Parity", | |
260 | "Multihit", | |
261 | }; | |
7b9f71f9 NP |
262 | static const char *mc_user_types[] = { |
263 | "Indeterminate", | |
264 | "tlbie(l) invalid", | |
265 | }; | |
266 | static const char *mc_ra_types[] = { | |
267 | "Indeterminate", | |
268 | "Instruction fetch (bad)", | |
269 | "Page table walk ifetch (bad)", | |
270 | "Page table walk ifetch (foreign)", | |
271 | "Load (bad)", | |
272 | "Store (bad)", | |
273 | "Page table walk Load/Store (bad)", | |
274 | "Page table walk Load/Store (foreign)", | |
275 | "Load/Store (foreign)", | |
276 | }; | |
277 | static const char *mc_link_types[] = { | |
278 | "Indeterminate", | |
279 | "Instruction fetch (timeout)", | |
280 | "Page table walk ifetch (timeout)", | |
281 | "Load (timeout)", | |
282 | "Store (timeout)", | |
283 | "Page table walk Load/Store (timeout)", | |
284 | }; | |
b5ff4211 MS |
285 | |
286 | /* Print things out */ | |
287 | if (evt->version != MCE_V1) { | |
288 | pr_err("Machine Check Exception, Unknown event version %d !\n", | |
289 | evt->version); | |
290 | return; | |
291 | } | |
292 | switch (evt->severity) { | |
293 | case MCE_SEV_NO_ERROR: | |
294 | level = KERN_INFO; | |
295 | sevstr = "Harmless"; | |
296 | break; | |
297 | case MCE_SEV_WARNING: | |
298 | level = KERN_WARNING; | |
299 | sevstr = ""; | |
300 | break; | |
301 | case MCE_SEV_ERROR_SYNC: | |
302 | level = KERN_ERR; | |
303 | sevstr = "Severe"; | |
304 | break; | |
305 | case MCE_SEV_FATAL: | |
306 | default: | |
307 | level = KERN_ERR; | |
308 | sevstr = "Fatal"; | |
309 | break; | |
310 | } | |
311 | ||
312 | printk("%s%s Machine check interrupt [%s]\n", level, sevstr, | |
313 | evt->disposition == MCE_DISPOSITION_RECOVERED ? | |
fc84427b | 314 | "Recovered" : "Not recovered"); |
63f44d65 ME |
315 | |
316 | if (user_mode) { | |
317 | printk("%s NIP: [%016llx] PID: %d Comm: %s\n", level, | |
318 | evt->srr0, current->pid, current->comm); | |
319 | } else { | |
320 | printk("%s NIP [%016llx]: %pS\n", level, evt->srr0, | |
321 | (void *)evt->srr0); | |
322 | } | |
323 | ||
b5ff4211 MS |
324 | printk("%s Initiator: %s\n", level, |
325 | evt->initiator == MCE_INITIATOR_CPU ? "CPU" : "Unknown"); | |
326 | switch (evt->error_type) { | |
327 | case MCE_ERROR_TYPE_UE: | |
328 | subtype = evt->u.ue_error.ue_error_type < | |
329 | ARRAY_SIZE(mc_ue_types) ? | |
330 | mc_ue_types[evt->u.ue_error.ue_error_type] | |
331 | : "Unknown"; | |
332 | printk("%s Error type: UE [%s]\n", level, subtype); | |
333 | if (evt->u.ue_error.effective_address_provided) | |
334 | printk("%s Effective address: %016llx\n", | |
335 | level, evt->u.ue_error.effective_address); | |
336 | if (evt->u.ue_error.physical_address_provided) | |
c01e0159 | 337 | printk("%s Physical address: %016llx\n", |
b5ff4211 MS |
338 | level, evt->u.ue_error.physical_address); |
339 | break; | |
340 | case MCE_ERROR_TYPE_SLB: | |
341 | subtype = evt->u.slb_error.slb_error_type < | |
342 | ARRAY_SIZE(mc_slb_types) ? | |
343 | mc_slb_types[evt->u.slb_error.slb_error_type] | |
344 | : "Unknown"; | |
345 | printk("%s Error type: SLB [%s]\n", level, subtype); | |
346 | if (evt->u.slb_error.effective_address_provided) | |
347 | printk("%s Effective address: %016llx\n", | |
348 | level, evt->u.slb_error.effective_address); | |
349 | break; | |
350 | case MCE_ERROR_TYPE_ERAT: | |
351 | subtype = evt->u.erat_error.erat_error_type < | |
352 | ARRAY_SIZE(mc_erat_types) ? | |
353 | mc_erat_types[evt->u.erat_error.erat_error_type] | |
354 | : "Unknown"; | |
355 | printk("%s Error type: ERAT [%s]\n", level, subtype); | |
356 | if (evt->u.erat_error.effective_address_provided) | |
357 | printk("%s Effective address: %016llx\n", | |
358 | level, evt->u.erat_error.effective_address); | |
359 | break; | |
360 | case MCE_ERROR_TYPE_TLB: | |
361 | subtype = evt->u.tlb_error.tlb_error_type < | |
362 | ARRAY_SIZE(mc_tlb_types) ? | |
363 | mc_tlb_types[evt->u.tlb_error.tlb_error_type] | |
364 | : "Unknown"; | |
365 | printk("%s Error type: TLB [%s]\n", level, subtype); | |
366 | if (evt->u.tlb_error.effective_address_provided) | |
367 | printk("%s Effective address: %016llx\n", | |
368 | level, evt->u.tlb_error.effective_address); | |
369 | break; | |
7b9f71f9 NP |
370 | case MCE_ERROR_TYPE_USER: |
371 | subtype = evt->u.user_error.user_error_type < | |
372 | ARRAY_SIZE(mc_user_types) ? | |
373 | mc_user_types[evt->u.user_error.user_error_type] | |
374 | : "Unknown"; | |
375 | printk("%s Error type: User [%s]\n", level, subtype); | |
376 | if (evt->u.user_error.effective_address_provided) | |
377 | printk("%s Effective address: %016llx\n", | |
378 | level, evt->u.user_error.effective_address); | |
379 | break; | |
380 | case MCE_ERROR_TYPE_RA: | |
381 | subtype = evt->u.ra_error.ra_error_type < | |
382 | ARRAY_SIZE(mc_ra_types) ? | |
383 | mc_ra_types[evt->u.ra_error.ra_error_type] | |
384 | : "Unknown"; | |
385 | printk("%s Error type: Real address [%s]\n", level, subtype); | |
386 | if (evt->u.ra_error.effective_address_provided) | |
387 | printk("%s Effective address: %016llx\n", | |
388 | level, evt->u.ra_error.effective_address); | |
389 | break; | |
390 | case MCE_ERROR_TYPE_LINK: | |
391 | subtype = evt->u.link_error.link_error_type < | |
392 | ARRAY_SIZE(mc_link_types) ? | |
393 | mc_link_types[evt->u.link_error.link_error_type] | |
394 | : "Unknown"; | |
395 | printk("%s Error type: Link [%s]\n", level, subtype); | |
396 | if (evt->u.link_error.effective_address_provided) | |
397 | printk("%s Effective address: %016llx\n", | |
398 | level, evt->u.link_error.effective_address); | |
399 | break; | |
b5ff4211 MS |
400 | default: |
401 | case MCE_ERROR_TYPE_UNKNOWN: | |
402 | printk("%s Error type: Unknown\n", level); | |
403 | break; | |
404 | } | |
405 | } | |
b63a0ffe MS |
406 | |
407 | uint64_t get_mce_fault_addr(struct machine_check_event *evt) | |
408 | { | |
409 | switch (evt->error_type) { | |
410 | case MCE_ERROR_TYPE_UE: | |
411 | if (evt->u.ue_error.effective_address_provided) | |
412 | return evt->u.ue_error.effective_address; | |
413 | break; | |
414 | case MCE_ERROR_TYPE_SLB: | |
415 | if (evt->u.slb_error.effective_address_provided) | |
416 | return evt->u.slb_error.effective_address; | |
417 | break; | |
418 | case MCE_ERROR_TYPE_ERAT: | |
419 | if (evt->u.erat_error.effective_address_provided) | |
420 | return evt->u.erat_error.effective_address; | |
421 | break; | |
422 | case MCE_ERROR_TYPE_TLB: | |
423 | if (evt->u.tlb_error.effective_address_provided) | |
424 | return evt->u.tlb_error.effective_address; | |
425 | break; | |
7b9f71f9 NP |
426 | case MCE_ERROR_TYPE_USER: |
427 | if (evt->u.user_error.effective_address_provided) | |
428 | return evt->u.user_error.effective_address; | |
429 | break; | |
430 | case MCE_ERROR_TYPE_RA: | |
431 | if (evt->u.ra_error.effective_address_provided) | |
432 | return evt->u.ra_error.effective_address; | |
433 | break; | |
434 | case MCE_ERROR_TYPE_LINK: | |
435 | if (evt->u.link_error.effective_address_provided) | |
436 | return evt->u.link_error.effective_address; | |
437 | break; | |
b63a0ffe MS |
438 | default: |
439 | case MCE_ERROR_TYPE_UNKNOWN: | |
440 | break; | |
441 | } | |
442 | return 0; | |
443 | } | |
444 | EXPORT_SYMBOL(get_mce_fault_addr); |