Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * File: mca_drv.c | |
3 | * Purpose: Generic MCA handling layer | |
4 | * | |
5 | * Copyright (C) 2004 FUJITSU LIMITED | |
6 | * Copyright (C) Hidetoshi Seto (seto.hidetoshi@jp.fujitsu.com) | |
7 | */ | |
8 | #include <linux/config.h> | |
9 | #include <linux/types.h> | |
10 | #include <linux/init.h> | |
11 | #include <linux/sched.h> | |
12 | #include <linux/interrupt.h> | |
13 | #include <linux/irq.h> | |
14 | #include <linux/kallsyms.h> | |
15 | #include <linux/smp_lock.h> | |
16 | #include <linux/bootmem.h> | |
17 | #include <linux/acpi.h> | |
18 | #include <linux/timer.h> | |
19 | #include <linux/module.h> | |
20 | #include <linux/kernel.h> | |
21 | #include <linux/smp.h> | |
22 | #include <linux/workqueue.h> | |
23 | #include <linux/mm.h> | |
24 | ||
25 | #include <asm/delay.h> | |
26 | #include <asm/machvec.h> | |
27 | #include <asm/page.h> | |
28 | #include <asm/ptrace.h> | |
29 | #include <asm/system.h> | |
30 | #include <asm/sal.h> | |
31 | #include <asm/mca.h> | |
32 | ||
33 | #include <asm/irq.h> | |
34 | #include <asm/hw_irq.h> | |
35 | ||
36 | #include "mca_drv.h" | |
37 | ||
38 | /* max size of SAL error record (default) */ | |
39 | static int sal_rec_max = 10000; | |
40 | ||
41 | /* from mca.c */ | |
42 | static ia64_mca_sal_to_os_state_t *sal_to_os_handoff_state; | |
43 | static ia64_mca_os_to_sal_state_t *os_to_sal_handoff_state; | |
44 | ||
45 | /* from mca_drv_asm.S */ | |
46 | extern void *mca_handler_bhhook(void); | |
47 | ||
48 | static DEFINE_SPINLOCK(mca_bh_lock); | |
49 | ||
50 | typedef enum { | |
51 | MCA_IS_LOCAL = 0, | |
52 | MCA_IS_GLOBAL = 1 | |
53 | } mca_type_t; | |
54 | ||
55 | #define MAX_PAGE_ISOLATE 1024 | |
56 | ||
57 | static struct page *page_isolate[MAX_PAGE_ISOLATE]; | |
58 | static int num_page_isolate = 0; | |
59 | ||
60 | typedef enum { | |
61 | ISOLATE_NG = 0, | |
62 | ISOLATE_OK = 1 | |
63 | } isolate_status_t; | |
64 | ||
65 | /* | |
66 | * This pool keeps pointers to the section part of SAL error record | |
67 | */ | |
68 | static struct { | |
69 | slidx_list_t *buffer; /* section pointer list pool */ | |
70 | int cur_idx; /* Current index of section pointer list pool */ | |
71 | int max_idx; /* Maximum index of section pointer list pool */ | |
72 | } slidx_pool; | |
73 | ||
74 | /** | |
75 | * mca_page_isolate - isolate a poisoned page in order not to use it later | |
76 | * @paddr: poisoned memory location | |
77 | * | |
78 | * Return value: | |
79 | * ISOLATE_OK / ISOLATE_NG | |
80 | */ | |
81 | ||
82 | static isolate_status_t | |
83 | mca_page_isolate(unsigned long paddr) | |
84 | { | |
85 | int i; | |
86 | struct page *p; | |
87 | ||
88 | /* whether physical address is valid or not */ | |
89 | if ( !ia64_phys_addr_valid(paddr) ) | |
90 | return ISOLATE_NG; | |
91 | ||
92 | /* convert physical address to physical page number */ | |
93 | p = pfn_to_page(paddr>>PAGE_SHIFT); | |
94 | ||
95 | /* check whether a page number have been already registered or not */ | |
96 | for( i = 0; i < num_page_isolate; i++ ) | |
97 | if( page_isolate[i] == p ) | |
98 | return ISOLATE_OK; /* already listed */ | |
99 | ||
100 | /* limitation check */ | |
101 | if( num_page_isolate == MAX_PAGE_ISOLATE ) | |
102 | return ISOLATE_NG; | |
103 | ||
104 | /* kick pages having attribute 'SLAB' or 'Reserved' */ | |
105 | if( PageSlab(p) || PageReserved(p) ) | |
106 | return ISOLATE_NG; | |
107 | ||
108 | /* add attribute 'Reserved' and register the page */ | |
109 | SetPageReserved(p); | |
110 | page_isolate[num_page_isolate++] = p; | |
111 | ||
112 | return ISOLATE_OK; | |
113 | } | |
114 | ||
115 | /** | |
116 | * mca_hanlder_bh - Kill the process which occurred memory read error | |
117 | * @paddr: poisoned address received from MCA Handler | |
118 | */ | |
119 | ||
120 | void | |
121 | mca_handler_bh(unsigned long paddr) | |
122 | { | |
123 | printk(KERN_DEBUG "OS_MCA: process [pid: %d](%s) encounters MCA.\n", | |
124 | current->pid, current->comm); | |
125 | ||
126 | spin_lock(&mca_bh_lock); | |
127 | if (mca_page_isolate(paddr) == ISOLATE_OK) { | |
128 | printk(KERN_DEBUG "Page isolation: ( %lx ) success.\n", paddr); | |
129 | } else { | |
130 | printk(KERN_DEBUG "Page isolation: ( %lx ) failure.\n", paddr); | |
131 | } | |
132 | spin_unlock(&mca_bh_lock); | |
133 | ||
134 | /* This process is about to be killed itself */ | |
b1b901c2 | 135 | do_exit(SIGKILL); |
1da177e4 LT |
136 | } |
137 | ||
138 | /** | |
139 | * mca_make_peidx - Make index of processor error section | |
140 | * @slpi: pointer to record of processor error section | |
141 | * @peidx: pointer to index of processor error section | |
142 | */ | |
143 | ||
144 | static void | |
145 | mca_make_peidx(sal_log_processor_info_t *slpi, peidx_table_t *peidx) | |
146 | { | |
147 | /* | |
148 | * calculate the start address of | |
149 | * "struct cpuid_info" and "sal_processor_static_info_t". | |
150 | */ | |
151 | u64 total_check_num = slpi->valid.num_cache_check | |
152 | + slpi->valid.num_tlb_check | |
153 | + slpi->valid.num_bus_check | |
154 | + slpi->valid.num_reg_file_check | |
155 | + slpi->valid.num_ms_check; | |
156 | u64 head_size = sizeof(sal_log_mod_error_info_t) * total_check_num | |
157 | + sizeof(sal_log_processor_info_t); | |
158 | u64 mid_size = slpi->valid.cpuid_info * sizeof(struct sal_cpuid_info); | |
159 | ||
160 | peidx_head(peidx) = slpi; | |
161 | peidx_mid(peidx) = (struct sal_cpuid_info *) | |
162 | (slpi->valid.cpuid_info ? ((char*)slpi + head_size) : NULL); | |
163 | peidx_bottom(peidx) = (sal_processor_static_info_t *) | |
164 | (slpi->valid.psi_static_struct ? | |
165 | ((char*)slpi + head_size + mid_size) : NULL); | |
166 | } | |
167 | ||
168 | /** | |
169 | * mca_make_slidx - Make index of SAL error record | |
170 | * @buffer: pointer to SAL error record | |
171 | * @slidx: pointer to index of SAL error record | |
172 | * | |
173 | * Return value: | |
174 | * 1 if record has platform error / 0 if not | |
175 | */ | |
176 | #define LOG_INDEX_ADD_SECT_PTR(sect, ptr) \ | |
177 | { slidx_list_t *hl = &slidx_pool.buffer[slidx_pool.cur_idx]; \ | |
178 | hl->hdr = ptr; \ | |
179 | list_add(&hl->list, &(sect)); \ | |
180 | slidx_pool.cur_idx = (slidx_pool.cur_idx + 1)%slidx_pool.max_idx; } | |
181 | ||
182 | static int | |
183 | mca_make_slidx(void *buffer, slidx_table_t *slidx) | |
184 | { | |
185 | int platform_err = 0; | |
186 | int record_len = ((sal_log_record_header_t*)buffer)->len; | |
187 | u32 ercd_pos; | |
188 | int sects; | |
189 | sal_log_section_hdr_t *sp; | |
190 | ||
191 | /* | |
192 | * Initialize index referring current record | |
193 | */ | |
194 | INIT_LIST_HEAD(&(slidx->proc_err)); | |
195 | INIT_LIST_HEAD(&(slidx->mem_dev_err)); | |
196 | INIT_LIST_HEAD(&(slidx->sel_dev_err)); | |
197 | INIT_LIST_HEAD(&(slidx->pci_bus_err)); | |
198 | INIT_LIST_HEAD(&(slidx->smbios_dev_err)); | |
199 | INIT_LIST_HEAD(&(slidx->pci_comp_err)); | |
200 | INIT_LIST_HEAD(&(slidx->plat_specific_err)); | |
201 | INIT_LIST_HEAD(&(slidx->host_ctlr_err)); | |
202 | INIT_LIST_HEAD(&(slidx->plat_bus_err)); | |
203 | INIT_LIST_HEAD(&(slidx->unsupported)); | |
204 | ||
205 | /* | |
206 | * Extract a Record Header | |
207 | */ | |
208 | slidx->header = buffer; | |
209 | ||
210 | /* | |
211 | * Extract each section records | |
212 | * (arranged from "int ia64_log_platform_info_print()") | |
213 | */ | |
214 | for (ercd_pos = sizeof(sal_log_record_header_t), sects = 0; | |
215 | ercd_pos < record_len; ercd_pos += sp->len, sects++) { | |
216 | sp = (sal_log_section_hdr_t *)((char*)buffer + ercd_pos); | |
217 | if (!efi_guidcmp(sp->guid, SAL_PROC_DEV_ERR_SECT_GUID)) { | |
218 | LOG_INDEX_ADD_SECT_PTR(slidx->proc_err, sp); | |
219 | } else if (!efi_guidcmp(sp->guid, SAL_PLAT_MEM_DEV_ERR_SECT_GUID)) { | |
220 | platform_err = 1; | |
221 | LOG_INDEX_ADD_SECT_PTR(slidx->mem_dev_err, sp); | |
222 | } else if (!efi_guidcmp(sp->guid, SAL_PLAT_SEL_DEV_ERR_SECT_GUID)) { | |
223 | platform_err = 1; | |
224 | LOG_INDEX_ADD_SECT_PTR(slidx->sel_dev_err, sp); | |
225 | } else if (!efi_guidcmp(sp->guid, SAL_PLAT_PCI_BUS_ERR_SECT_GUID)) { | |
226 | platform_err = 1; | |
227 | LOG_INDEX_ADD_SECT_PTR(slidx->pci_bus_err, sp); | |
228 | } else if (!efi_guidcmp(sp->guid, SAL_PLAT_SMBIOS_DEV_ERR_SECT_GUID)) { | |
229 | platform_err = 1; | |
230 | LOG_INDEX_ADD_SECT_PTR(slidx->smbios_dev_err, sp); | |
231 | } else if (!efi_guidcmp(sp->guid, SAL_PLAT_PCI_COMP_ERR_SECT_GUID)) { | |
232 | platform_err = 1; | |
233 | LOG_INDEX_ADD_SECT_PTR(slidx->pci_comp_err, sp); | |
234 | } else if (!efi_guidcmp(sp->guid, SAL_PLAT_SPECIFIC_ERR_SECT_GUID)) { | |
235 | platform_err = 1; | |
236 | LOG_INDEX_ADD_SECT_PTR(slidx->plat_specific_err, sp); | |
237 | } else if (!efi_guidcmp(sp->guid, SAL_PLAT_HOST_CTLR_ERR_SECT_GUID)) { | |
238 | platform_err = 1; | |
239 | LOG_INDEX_ADD_SECT_PTR(slidx->host_ctlr_err, sp); | |
240 | } else if (!efi_guidcmp(sp->guid, SAL_PLAT_BUS_ERR_SECT_GUID)) { | |
241 | platform_err = 1; | |
242 | LOG_INDEX_ADD_SECT_PTR(slidx->plat_bus_err, sp); | |
243 | } else { | |
244 | LOG_INDEX_ADD_SECT_PTR(slidx->unsupported, sp); | |
245 | } | |
246 | } | |
247 | slidx->n_sections = sects; | |
248 | ||
249 | return platform_err; | |
250 | } | |
251 | ||
252 | /** | |
253 | * init_record_index_pools - Initialize pool of lists for SAL record index | |
254 | * | |
255 | * Return value: | |
256 | * 0 on Success / -ENOMEM on Failure | |
257 | */ | |
258 | static int | |
259 | init_record_index_pools(void) | |
260 | { | |
261 | int i; | |
262 | int rec_max_size; /* Maximum size of SAL error records */ | |
263 | int sect_min_size; /* Minimum size of SAL error sections */ | |
264 | /* minimum size table of each section */ | |
265 | static int sal_log_sect_min_sizes[] = { | |
266 | sizeof(sal_log_processor_info_t) + sizeof(sal_processor_static_info_t), | |
267 | sizeof(sal_log_mem_dev_err_info_t), | |
268 | sizeof(sal_log_sel_dev_err_info_t), | |
269 | sizeof(sal_log_pci_bus_err_info_t), | |
270 | sizeof(sal_log_smbios_dev_err_info_t), | |
271 | sizeof(sal_log_pci_comp_err_info_t), | |
272 | sizeof(sal_log_plat_specific_err_info_t), | |
273 | sizeof(sal_log_host_ctlr_err_info_t), | |
274 | sizeof(sal_log_plat_bus_err_info_t), | |
275 | }; | |
276 | ||
277 | /* | |
278 | * MCA handler cannot allocate new memory on flight, | |
279 | * so we preallocate enough memory to handle a SAL record. | |
280 | * | |
281 | * Initialize a handling set of slidx_pool: | |
282 | * 1. Pick up the max size of SAL error records | |
283 | * 2. Pick up the min size of SAL error sections | |
284 | * 3. Allocate the pool as enough to 2 SAL records | |
285 | * (now we can estimate the maxinum of section in a record.) | |
286 | */ | |
287 | ||
288 | /* - 1 - */ | |
289 | rec_max_size = sal_rec_max; | |
290 | ||
291 | /* - 2 - */ | |
292 | sect_min_size = sal_log_sect_min_sizes[0]; | |
293 | for (i = 1; i < sizeof sal_log_sect_min_sizes/sizeof(size_t); i++) | |
294 | if (sect_min_size > sal_log_sect_min_sizes[i]) | |
295 | sect_min_size = sal_log_sect_min_sizes[i]; | |
296 | ||
297 | /* - 3 - */ | |
298 | slidx_pool.max_idx = (rec_max_size/sect_min_size) * 2 + 1; | |
299 | slidx_pool.buffer = (slidx_list_t *) kmalloc(slidx_pool.max_idx * sizeof(slidx_list_t), GFP_KERNEL); | |
300 | ||
301 | return slidx_pool.buffer ? 0 : -ENOMEM; | |
302 | } | |
303 | ||
304 | ||
305 | /***************************************************************************** | |
306 | * Recovery functions * | |
307 | *****************************************************************************/ | |
308 | ||
309 | /** | |
310 | * is_mca_global - Check whether this MCA is global or not | |
311 | * @peidx: pointer of index of processor error section | |
312 | * @pbci: pointer to pal_bus_check_info_t | |
313 | * | |
314 | * Return value: | |
315 | * MCA_IS_LOCAL / MCA_IS_GLOBAL | |
316 | */ | |
317 | ||
318 | static mca_type_t | |
319 | is_mca_global(peidx_table_t *peidx, pal_bus_check_info_t *pbci) | |
320 | { | |
321 | pal_processor_state_info_t *psp = (pal_processor_state_info_t*)peidx_psp(peidx); | |
322 | ||
323 | /* | |
324 | * PAL can request a rendezvous, if the MCA has a global scope. | |
325 | * If "rz_always" flag is set, SAL requests MCA rendezvous | |
326 | * in spite of global MCA. | |
327 | * Therefore it is local MCA when rendezvous has not been requested. | |
328 | * Failed to rendezvous, the system must be down. | |
329 | */ | |
330 | switch (sal_to_os_handoff_state->imsto_rendez_state) { | |
331 | case -1: /* SAL rendezvous unsuccessful */ | |
332 | return MCA_IS_GLOBAL; | |
333 | case 0: /* SAL rendezvous not required */ | |
334 | return MCA_IS_LOCAL; | |
335 | case 1: /* SAL rendezvous successful int */ | |
336 | case 2: /* SAL rendezvous successful int with init */ | |
337 | default: | |
338 | break; | |
339 | } | |
340 | ||
341 | /* | |
342 | * If One or more Cache/TLB/Reg_File/Uarch_Check is here, | |
343 | * it would be a local MCA. (i.e. processor internal error) | |
344 | */ | |
345 | if (psp->tc || psp->cc || psp->rc || psp->uc) | |
346 | return MCA_IS_LOCAL; | |
347 | ||
348 | /* | |
349 | * Bus_Check structure with Bus_Check.ib (internal bus error) flag set | |
350 | * would be a global MCA. (e.g. a system bus address parity error) | |
351 | */ | |
352 | if (!pbci || pbci->ib) | |
353 | return MCA_IS_GLOBAL; | |
354 | ||
355 | /* | |
356 | * Bus_Check structure with Bus_Check.eb (external bus error) flag set | |
357 | * could be either a local MCA or a global MCA. | |
358 | * | |
359 | * Referring Bus_Check.bsi: | |
360 | * 0: Unknown/unclassified | |
361 | * 1: BERR# | |
362 | * 2: BINIT# | |
363 | * 3: Hard Fail | |
364 | * (FIXME: Are these SGI specific or generic bsi values?) | |
365 | */ | |
366 | if (pbci->eb) | |
367 | switch (pbci->bsi) { | |
368 | case 0: | |
369 | /* e.g. a load from poisoned memory */ | |
370 | return MCA_IS_LOCAL; | |
371 | case 1: | |
372 | case 2: | |
373 | case 3: | |
374 | return MCA_IS_GLOBAL; | |
375 | } | |
376 | ||
377 | return MCA_IS_GLOBAL; | |
378 | } | |
379 | ||
380 | /** | |
381 | * recover_from_read_error - Try to recover the errors which type are "read"s. | |
382 | * @slidx: pointer of index of SAL error record | |
383 | * @peidx: pointer of index of processor error section | |
384 | * @pbci: pointer of pal_bus_check_info | |
385 | * | |
386 | * Return value: | |
387 | * 1 on Success / 0 on Failure | |
388 | */ | |
389 | ||
390 | static int | |
391 | recover_from_read_error(slidx_table_t *slidx, peidx_table_t *peidx, pal_bus_check_info_t *pbci) | |
392 | { | |
393 | sal_log_mod_error_info_t *smei; | |
394 | pal_min_state_area_t *pmsa; | |
395 | struct ia64_psr *psr1, *psr2; | |
396 | ia64_fptr_t *mca_hdlr_bh = (ia64_fptr_t*)mca_handler_bhhook; | |
397 | ||
398 | /* Is target address valid? */ | |
399 | if (!pbci->tv) | |
400 | return 0; | |
401 | ||
402 | /* | |
403 | * cpu read or memory-mapped io read | |
404 | * | |
405 | * offending process affected process OS MCA do | |
406 | * kernel mode kernel mode down system | |
407 | * kernel mode user mode kill the process | |
408 | * user mode kernel mode down system (*) | |
409 | * user mode user mode kill the process | |
410 | * | |
411 | * (*) You could terminate offending user-mode process | |
412 | * if (pbci->pv && pbci->pl != 0) *and* if you sure | |
413 | * the process not have any locks of kernel. | |
414 | */ | |
415 | ||
416 | psr1 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_ipsr); | |
417 | ||
418 | /* | |
419 | * Check the privilege level of interrupted context. | |
420 | * If it is user-mode, then terminate affected process. | |
421 | */ | |
422 | if (psr1->cpl != 0) { | |
423 | smei = peidx_bus_check(peidx, 0); | |
424 | if (smei->valid.target_identifier) { | |
425 | /* | |
426 | * setup for resume to bottom half of MCA, | |
427 | * "mca_handler_bhhook" | |
428 | */ | |
429 | pmsa = (pal_min_state_area_t *)(sal_to_os_handoff_state->pal_min_state | (6ul<<61)); | |
430 | /* pass to bhhook as 1st argument (gr8) */ | |
431 | pmsa->pmsa_gr[8-1] = smei->target_identifier; | |
432 | /* set interrupted return address (but no use) */ | |
433 | pmsa->pmsa_br0 = pmsa->pmsa_iip; | |
434 | /* change resume address to bottom half */ | |
435 | pmsa->pmsa_iip = mca_hdlr_bh->fp; | |
436 | pmsa->pmsa_gr[1-1] = mca_hdlr_bh->gp; | |
437 | /* set cpl with kernel mode */ | |
438 | psr2 = (struct ia64_psr *)&pmsa->pmsa_ipsr; | |
439 | psr2->cpl = 0; | |
440 | psr2->ri = 0; | |
b1b901c2 | 441 | psr2->i = 0; |
1da177e4 LT |
442 | |
443 | return 1; | |
444 | } | |
445 | ||
446 | } | |
447 | ||
448 | return 0; | |
449 | } | |
450 | ||
451 | /** | |
452 | * recover_from_platform_error - Recover from platform error. | |
453 | * @slidx: pointer of index of SAL error record | |
454 | * @peidx: pointer of index of processor error section | |
455 | * @pbci: pointer of pal_bus_check_info | |
456 | * | |
457 | * Return value: | |
458 | * 1 on Success / 0 on Failure | |
459 | */ | |
460 | ||
461 | static int | |
462 | recover_from_platform_error(slidx_table_t *slidx, peidx_table_t *peidx, pal_bus_check_info_t *pbci) | |
463 | { | |
464 | int status = 0; | |
465 | pal_processor_state_info_t *psp = (pal_processor_state_info_t*)peidx_psp(peidx); | |
466 | ||
467 | if (psp->bc && pbci->eb && pbci->bsi == 0) { | |
468 | switch(pbci->type) { | |
469 | case 1: /* partial read */ | |
470 | case 3: /* full line(cpu) read */ | |
471 | case 9: /* I/O space read */ | |
472 | status = recover_from_read_error(slidx, peidx, pbci); | |
473 | break; | |
474 | case 0: /* unknown */ | |
475 | case 2: /* partial write */ | |
476 | case 4: /* full line write */ | |
477 | case 5: /* implicit or explicit write-back operation */ | |
478 | case 6: /* snoop probe */ | |
479 | case 7: /* incoming or outgoing ptc.g */ | |
480 | case 8: /* write coalescing transactions */ | |
481 | case 10: /* I/O space write */ | |
482 | case 11: /* inter-processor interrupt message(IPI) */ | |
483 | case 12: /* interrupt acknowledge or external task priority cycle */ | |
484 | default: | |
485 | break; | |
486 | } | |
487 | } | |
488 | ||
489 | return status; | |
490 | } | |
491 | ||
492 | /** | |
493 | * recover_from_processor_error | |
494 | * @platform: whether there are some platform error section or not | |
495 | * @slidx: pointer of index of SAL error record | |
496 | * @peidx: pointer of index of processor error section | |
497 | * @pbci: pointer of pal_bus_check_info | |
498 | * | |
499 | * Return value: | |
500 | * 1 on Success / 0 on Failure | |
501 | */ | |
502 | /* | |
503 | * Later we try to recover when below all conditions are satisfied. | |
504 | * 1. Only one processor error section is exist. | |
505 | * 2. BUS_CHECK is exist and the others are not exist.(Except TLB_CHECK) | |
506 | * 3. The entry of BUS_CHECK_INFO is 1. | |
507 | * 4. "External bus error" flag is set and the others are not set. | |
508 | */ | |
509 | ||
510 | static int | |
511 | recover_from_processor_error(int platform, slidx_table_t *slidx, peidx_table_t *peidx, pal_bus_check_info_t *pbci) | |
512 | { | |
513 | pal_processor_state_info_t *psp = (pal_processor_state_info_t*)peidx_psp(peidx); | |
514 | ||
515 | /* | |
516 | * We cannot recover errors with other than bus_check. | |
517 | */ | |
518 | if (psp->cc || psp->rc || psp->uc) | |
519 | return 0; | |
520 | ||
521 | /* | |
522 | * If there is no bus error, record is weird but we need not to recover. | |
523 | */ | |
524 | if (psp->bc == 0 || pbci == NULL) | |
525 | return 1; | |
526 | ||
527 | /* | |
528 | * Sorry, we cannot handle so many. | |
529 | */ | |
530 | if (peidx_bus_check_num(peidx) > 1) | |
531 | return 0; | |
532 | /* | |
533 | * Well, here is only one bus error. | |
534 | */ | |
535 | if (pbci->ib || pbci->cc) | |
536 | return 0; | |
537 | if (pbci->eb && pbci->bsi > 0) | |
538 | return 0; | |
539 | if (psp->ci == 0) | |
540 | return 0; | |
541 | ||
542 | /* | |
543 | * This is a local MCA and estimated as recoverble external bus error. | |
544 | * (e.g. a load from poisoned memory) | |
545 | * This means "there are some platform errors". | |
546 | */ | |
547 | if (platform) | |
548 | return recover_from_platform_error(slidx, peidx, pbci); | |
549 | /* | |
550 | * On account of strange SAL error record, we cannot recover. | |
551 | */ | |
552 | return 0; | |
553 | } | |
554 | ||
555 | /** | |
556 | * mca_try_to_recover - Try to recover from MCA | |
557 | * @rec: pointer to a SAL error record | |
558 | * | |
559 | * Return value: | |
560 | * 1 on Success / 0 on Failure | |
561 | */ | |
562 | ||
563 | static int | |
564 | mca_try_to_recover(void *rec, | |
565 | ia64_mca_sal_to_os_state_t *sal_to_os_state, | |
566 | ia64_mca_os_to_sal_state_t *os_to_sal_state) | |
567 | { | |
568 | int platform_err; | |
569 | int n_proc_err; | |
570 | slidx_table_t slidx; | |
571 | peidx_table_t peidx; | |
572 | pal_bus_check_info_t pbci; | |
573 | ||
574 | /* handoff state from/to mca.c */ | |
575 | sal_to_os_handoff_state = sal_to_os_state; | |
576 | os_to_sal_handoff_state = os_to_sal_state; | |
577 | ||
578 | /* Make index of SAL error record */ | |
579 | platform_err = mca_make_slidx(rec, &slidx); | |
580 | ||
581 | /* Count processor error sections */ | |
582 | n_proc_err = slidx_count(&slidx, proc_err); | |
583 | ||
584 | /* Now, OS can recover when there is one processor error section */ | |
585 | if (n_proc_err > 1) | |
586 | return 0; | |
587 | else if (n_proc_err == 0) { | |
588 | /* Weird SAL record ... We need not to recover */ | |
589 | ||
590 | return 1; | |
591 | } | |
592 | ||
593 | /* Make index of processor error section */ | |
594 | mca_make_peidx((sal_log_processor_info_t*)slidx_first_entry(&slidx.proc_err)->hdr, &peidx); | |
595 | ||
596 | /* Extract Processor BUS_CHECK[0] */ | |
597 | *((u64*)&pbci) = peidx_check_info(&peidx, bus_check, 0); | |
598 | ||
599 | /* Check whether MCA is global or not */ | |
600 | if (is_mca_global(&peidx, &pbci)) | |
601 | return 0; | |
602 | ||
603 | /* Try to recover a processor error */ | |
604 | return recover_from_processor_error(platform_err, &slidx, &peidx, &pbci); | |
605 | } | |
606 | ||
607 | /* | |
608 | * ============================================================================= | |
609 | */ | |
610 | ||
611 | int __init mca_external_handler_init(void) | |
612 | { | |
613 | if (init_record_index_pools()) | |
614 | return -ENOMEM; | |
615 | ||
616 | /* register external mca handlers */ | |
617 | if (ia64_reg_MCA_extension(mca_try_to_recover)){ | |
618 | printk(KERN_ERR "ia64_reg_MCA_extension failed.\n"); | |
619 | kfree(slidx_pool.buffer); | |
620 | return -EFAULT; | |
621 | } | |
622 | return 0; | |
623 | } | |
624 | ||
625 | void __exit mca_external_handler_exit(void) | |
626 | { | |
627 | /* unregister external mca handlers */ | |
628 | ia64_unreg_MCA_extension(); | |
629 | kfree(slidx_pool.buffer); | |
630 | } | |
631 | ||
632 | module_init(mca_external_handler_init); | |
633 | module_exit(mca_external_handler_exit); | |
634 | ||
635 | module_param(sal_rec_max, int, 0644); | |
636 | MODULE_PARM_DESC(sal_rec_max, "Max size of SAL error record"); | |
637 | ||
638 | MODULE_DESCRIPTION("ia64 platform dependent mca handler driver"); | |
639 | MODULE_LICENSE("GPL"); |