Commit | Line | Data |
---|---|---|
1802d0be | 1 | // SPDX-License-Identifier: GPL-2.0-only |
d334a491 HY |
2 | /* |
3 | * APEI Generic Hardware Error Source support | |
4 | * | |
5 | * Generic Hardware Error Source provides a way to report platform | |
6 | * hardware errors (such as that from chipset). It works in so called | |
7 | * "Firmware First" mode, that is, hardware errors are reported to | |
8 | * firmware firstly, then reported to Linux by firmware. This way, | |
9 | * some non-standard hardware error registers or non-standard hardware | |
10 | * link can be checked by firmware to produce more hardware error | |
11 | * information for Linux. | |
12 | * | |
13 | * For more information about Generic Hardware Error Source, please | |
14 | * refer to ACPI Specification version 4.0, section 17.3.2.6 | |
15 | * | |
67eb2e99 | 16 | * Copyright 2010,2011 Intel Corp. |
d334a491 | 17 | * Author: Huang Ying <ying.huang@intel.com> |
d334a491 HY |
18 | */ |
19 | ||
f9f05395 | 20 | #include <linux/arm_sdei.h> |
d334a491 | 21 | #include <linux/kernel.h> |
020bf066 | 22 | #include <linux/moduleparam.h> |
d334a491 HY |
23 | #include <linux/init.h> |
24 | #include <linux/acpi.h> | |
25 | #include <linux/io.h> | |
26 | #include <linux/interrupt.h> | |
81e88fdc | 27 | #include <linux/timer.h> |
d334a491 | 28 | #include <linux/cper.h> |
671a794c | 29 | #include <linux/cxl-event.h> |
7ad6e943 HY |
30 | #include <linux/platform_device.h> |
31 | #include <linux/mutex.h> | |
32c361f5 | 32 | #include <linux/ratelimit.h> |
81e88fdc | 33 | #include <linux/vmalloc.h> |
67eb2e99 HY |
34 | #include <linux/irq_work.h> |
35 | #include <linux/llist.h> | |
36 | #include <linux/genalloc.h> | |
a654e5ee | 37 | #include <linux/pci.h> |
b484079b | 38 | #include <linux/pfn.h> |
a654e5ee | 39 | #include <linux/aer.h> |
44a69f61 | 40 | #include <linux/nmi.h> |
e6017571 | 41 | #include <linux/sched/clock.h> |
297b64c7 TB |
42 | #include <linux/uuid.h> |
43 | #include <linux/ras.h> | |
7f17b4a1 | 44 | #include <linux/task_work.h> |
40e06415 | 45 | |
42aa5604 | 46 | #include <acpi/actbl1.h> |
40e06415 | 47 | #include <acpi/ghes.h> |
9dae3d0d | 48 | #include <acpi/apei.h> |
4f89fa28 | 49 | #include <asm/fixmap.h> |
81e88fdc | 50 | #include <asm/tlbflush.h> |
297b64c7 | 51 | #include <ras/ras_event.h> |
d334a491 HY |
52 | |
53 | #include "apei-internal.h" | |
54 | ||
55 | #define GHES_PFX "GHES: " | |
56 | ||
57 | #define GHES_ESTATUS_MAX_SIZE 65536 | |
67eb2e99 HY |
58 | #define GHES_ESOURCE_PREALLOC_MAX_SIZE 65536 |
59 | ||
60 | #define GHES_ESTATUS_POOL_MIN_ALLOC_ORDER 3 | |
61 | ||
152cef40 HY |
62 | /* This is just an estimation for memory pool allocation */ |
63 | #define GHES_ESTATUS_CACHE_AVG_SIZE 512 | |
64 | ||
65 | #define GHES_ESTATUS_CACHES_SIZE 4 | |
66 | ||
70cb6e1d | 67 | #define GHES_ESTATUS_IN_CACHE_MAX_NSEC 10000000000ULL |
152cef40 HY |
68 | /* Prevent too many caches are allocated because of RCU */ |
69 | #define GHES_ESTATUS_CACHE_ALLOCED_MAX (GHES_ESTATUS_CACHES_SIZE * 3 / 2) | |
70 | ||
71 | #define GHES_ESTATUS_CACHE_LEN(estatus_len) \ | |
72 | (sizeof(struct ghes_estatus_cache) + (estatus_len)) | |
73 | #define GHES_ESTATUS_FROM_CACHE(estatus_cache) \ | |
0a00fd5e | 74 | ((struct acpi_hest_generic_status *) \ |
152cef40 HY |
75 | ((struct ghes_estatus_cache *)(estatus_cache) + 1)) |
76 | ||
67eb2e99 HY |
77 | #define GHES_ESTATUS_NODE_LEN(estatus_len) \ |
78 | (sizeof(struct ghes_estatus_node) + (estatus_len)) | |
88f074f4 | 79 | #define GHES_ESTATUS_FROM_NODE(estatus_node) \ |
0a00fd5e | 80 | ((struct acpi_hest_generic_status *) \ |
67eb2e99 | 81 | ((struct ghes_estatus_node *)(estatus_node) + 1)) |
d334a491 | 82 | |
9aa9cf3e SJ |
83 | #define GHES_VENDOR_ENTRY_LEN(gdata_len) \ |
84 | (sizeof(struct ghes_vendor_record_entry) + (gdata_len)) | |
85 | #define GHES_GDATA_FROM_VENDOR_ENTRY(vendor_entry) \ | |
86 | ((struct acpi_hest_generic_data *) \ | |
87 | ((struct ghes_vendor_record_entry *)(vendor_entry) + 1)) | |
88 | ||
f9f05395 JM |
89 | /* |
90 | * NMI-like notifications vary by architecture, before the compiler can prune | |
91 | * unused static functions it needs a value for these enums. | |
92 | */ | |
93 | #ifndef CONFIG_ARM_SDE_INTERFACE | |
94 | #define FIX_APEI_GHES_SDEI_NORMAL __end_of_fixed_addresses | |
95 | #define FIX_APEI_GHES_SDEI_CRITICAL __end_of_fixed_addresses | |
96 | #endif | |
97 | ||
8e40612f JH |
98 | static ATOMIC_NOTIFIER_HEAD(ghes_report_chain); |
99 | ||
42aa5604 TB |
100 | static inline bool is_hest_type_generic_v2(struct ghes *ghes) |
101 | { | |
102 | return ghes->generic->header.type == ACPI_HEST_TYPE_GENERIC_ERROR_V2; | |
103 | } | |
104 | ||
a70297d2 SX |
105 | /* |
106 | * A platform may describe one error source for the handling of synchronous | |
107 | * errors (e.g. MCE or SEA), or for handling asynchronous errors (e.g. SCI | |
108 | * or External Interrupt). On x86, the HEST notifications are always | |
109 | * asynchronous, so only SEA on ARM is delivered as a synchronous | |
110 | * notification. | |
111 | */ | |
112 | static inline bool is_hest_sync_notify(struct ghes *ghes) | |
113 | { | |
114 | u8 notify_type = ghes->generic->notify.type; | |
115 | ||
116 | return notify_type == ACPI_HEST_NOTIFY_SEA; | |
117 | } | |
118 | ||
020bf066 PG |
119 | /* |
120 | * This driver isn't really modular, however for the time being, | |
121 | * continuing to use module_param is the easiest way to remain | |
122 | * compatible with existing boot arg use cases. | |
123 | */ | |
90ab5ee9 | 124 | bool ghes_disable; |
b6a95016 HY |
125 | module_param_named(disable, ghes_disable, bool, 0); |
126 | ||
9057a3f7 JH |
127 | /* |
128 | * "ghes.edac_force_enable" forcibly enables ghes_edac and skips the platform | |
129 | * check. | |
130 | */ | |
131 | static bool ghes_edac_force_enable; | |
132 | module_param_named(edac_force_enable, ghes_edac_force_enable, bool, 0); | |
133 | ||
d334a491 | 134 | /* |
7bf130e4 SJ |
135 | * All error sources notified with HED (Hardware Error Device) share a |
136 | * single notifier callback, so they need to be linked and checked one | |
137 | * by one. This holds true for NMI too. | |
d334a491 | 138 | * |
81e88fdc HY |
139 | * RCU is used for these lists, so ghes_list_mutex is only used for |
140 | * list changing, not for traversing. | |
d334a491 | 141 | */ |
7bf130e4 | 142 | static LIST_HEAD(ghes_hed); |
7ad6e943 | 143 | static DEFINE_MUTEX(ghes_list_mutex); |
d334a491 | 144 | |
9057a3f7 JH |
145 | /* |
146 | * A list of GHES devices which are given to the corresponding EDAC driver | |
147 | * ghes_edac for further use. | |
148 | */ | |
149 | static LIST_HEAD(ghes_devs); | |
150 | static DEFINE_MUTEX(ghes_devs_mutex); | |
151 | ||
81e88fdc HY |
152 | /* |
153 | * Because the memory area used to transfer hardware error information | |
154 | * from BIOS to Linux can be determined only in NMI, IRQ or timer | |
155 | * handler, but general ioremap can not be used in atomic context, so | |
4f89fa28 | 156 | * the fixmap is used instead. |
520e18a5 | 157 | * |
3b880cbe | 158 | * This spinlock is used to prevent the fixmap entry from being used |
4f89fa28 | 159 | * simultaneously. |
81e88fdc | 160 | */ |
3b880cbe | 161 | static DEFINE_SPINLOCK(ghes_notify_lock_irq); |
81e88fdc | 162 | |
9aa9cf3e SJ |
163 | struct ghes_vendor_record_entry { |
164 | struct work_struct work; | |
165 | int error_severity; | |
166 | char vendor_record[]; | |
167 | }; | |
168 | ||
67eb2e99 | 169 | static struct gen_pool *ghes_estatus_pool; |
67eb2e99 | 170 | |
dd3fa54b | 171 | static struct ghes_estatus_cache __rcu *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE]; |
152cef40 HY |
172 | static atomic_t ghes_estatus_cache_alloced; |
173 | ||
2fb5853e JZZ |
174 | static int ghes_panic_timeout __read_mostly = 30; |
175 | ||
b484079b | 176 | static void __iomem *ghes_map(u64 pfn, enum fixed_addresses fixmap_idx) |
81e88fdc | 177 | { |
7edda088 TB |
178 | phys_addr_t paddr; |
179 | pgprot_t prot; | |
81e88fdc | 180 | |
b484079b | 181 | paddr = PFN_PHYS(pfn); |
7edda088 | 182 | prot = arch_apei_get_mem_attribute(paddr); |
b484079b | 183 | __set_fixmap(fixmap_idx, paddr, prot); |
81e88fdc | 184 | |
b484079b | 185 | return (void __iomem *) __fix_to_virt(fixmap_idx); |
81e88fdc HY |
186 | } |
187 | ||
b484079b | 188 | static void ghes_unmap(void __iomem *vaddr, enum fixed_addresses fixmap_idx) |
81e88fdc | 189 | { |
b484079b | 190 | int _idx = virt_to_fix((unsigned long)vaddr); |
8ece249a | 191 | |
b484079b JM |
192 | WARN_ON_ONCE(fixmap_idx != _idx); |
193 | clear_fixmap(fixmap_idx); | |
81e88fdc HY |
194 | } |
195 | ||
43d27483 | 196 | int ghes_estatus_pool_init(unsigned int num_ghes) |
67eb2e99 | 197 | { |
fb7be08f | 198 | unsigned long addr, len; |
6abc7622 | 199 | int rc; |
fb7be08f | 200 | |
67eb2e99 HY |
201 | ghes_estatus_pool = gen_pool_create(GHES_ESTATUS_POOL_MIN_ALLOC_ORDER, -1); |
202 | if (!ghes_estatus_pool) | |
203 | return -ENOMEM; | |
67eb2e99 | 204 | |
fb7be08f JM |
205 | len = GHES_ESTATUS_CACHE_AVG_SIZE * GHES_ESTATUS_CACHE_ALLOCED_MAX; |
206 | len += (num_ghes * GHES_ESOURCE_PREALLOC_MAX_SIZE); | |
67eb2e99 | 207 | |
0ac234be JM |
208 | addr = (unsigned long)vmalloc(PAGE_ALIGN(len)); |
209 | if (!addr) | |
6abc7622 | 210 | goto err_pool_alloc; |
0ac234be | 211 | |
6abc7622 LZ |
212 | rc = gen_pool_add(ghes_estatus_pool, addr, PAGE_ALIGN(len), -1); |
213 | if (rc) | |
214 | goto err_pool_add; | |
215 | ||
216 | return 0; | |
217 | ||
218 | err_pool_add: | |
219 | vfree((void *)addr); | |
220 | ||
221 | err_pool_alloc: | |
222 | gen_pool_destroy(ghes_estatus_pool); | |
223 | ||
224 | return -ENOMEM; | |
67eb2e99 HY |
225 | } |
226 | ||
e2abc47a SJ |
227 | /** |
228 | * ghes_estatus_pool_region_free - free previously allocated memory | |
229 | * from the ghes_estatus_pool. | |
230 | * @addr: address of memory to free. | |
231 | * @size: size of memory to free. | |
232 | * | |
233 | * Returns none. | |
234 | */ | |
235 | void ghes_estatus_pool_region_free(unsigned long addr, u32 size) | |
236 | { | |
237 | gen_pool_free(ghes_estatus_pool, addr, size); | |
238 | } | |
239 | EXPORT_SYMBOL_GPL(ghes_estatus_pool_region_free); | |
240 | ||
42aa5604 TB |
241 | static int map_gen_v2(struct ghes *ghes) |
242 | { | |
243 | return apei_map_generic_address(&ghes->generic_v2->read_ack_register); | |
244 | } | |
245 | ||
246 | static void unmap_gen_v2(struct ghes *ghes) | |
247 | { | |
248 | apei_unmap_generic_address(&ghes->generic_v2->read_ack_register); | |
249 | } | |
250 | ||
06ddeadc JM |
251 | static void ghes_ack_error(struct acpi_hest_generic_v2 *gv2) |
252 | { | |
253 | int rc; | |
254 | u64 val = 0; | |
255 | ||
256 | rc = apei_read(&val, &gv2->read_ack_register); | |
257 | if (rc) | |
258 | return; | |
259 | ||
260 | val &= gv2->read_ack_preserve << gv2->read_ack_register.bit_offset; | |
261 | val |= gv2->read_ack_write << gv2->read_ack_register.bit_offset; | |
262 | ||
263 | apei_write(val, &gv2->read_ack_register); | |
264 | } | |
265 | ||
d334a491 HY |
266 | static struct ghes *ghes_new(struct acpi_hest_generic *generic) |
267 | { | |
268 | struct ghes *ghes; | |
269 | unsigned int error_block_length; | |
270 | int rc; | |
271 | ||
272 | ghes = kzalloc(sizeof(*ghes), GFP_KERNEL); | |
273 | if (!ghes) | |
274 | return ERR_PTR(-ENOMEM); | |
42aa5604 | 275 | |
d334a491 | 276 | ghes->generic = generic; |
42aa5604 TB |
277 | if (is_hest_type_generic_v2(ghes)) { |
278 | rc = map_gen_v2(ghes); | |
279 | if (rc) | |
280 | goto err_free; | |
281 | } | |
282 | ||
34ddeb03 | 283 | rc = apei_map_generic_address(&generic->error_status_address); |
d334a491 | 284 | if (rc) |
42aa5604 | 285 | goto err_unmap_read_ack_addr; |
d334a491 HY |
286 | error_block_length = generic->error_block_length; |
287 | if (error_block_length > GHES_ESTATUS_MAX_SIZE) { | |
933ca4e3 KW |
288 | pr_warn(FW_WARN GHES_PFX |
289 | "Error status block length is too long: %u for " | |
290 | "generic hardware error source: %d.\n", | |
291 | error_block_length, generic->header.source_id); | |
d334a491 HY |
292 | error_block_length = GHES_ESTATUS_MAX_SIZE; |
293 | } | |
294 | ghes->estatus = kmalloc(error_block_length, GFP_KERNEL); | |
295 | if (!ghes->estatus) { | |
296 | rc = -ENOMEM; | |
42aa5604 | 297 | goto err_unmap_status_addr; |
d334a491 HY |
298 | } |
299 | ||
300 | return ghes; | |
301 | ||
42aa5604 | 302 | err_unmap_status_addr: |
34ddeb03 | 303 | apei_unmap_generic_address(&generic->error_status_address); |
42aa5604 TB |
304 | err_unmap_read_ack_addr: |
305 | if (is_hest_type_generic_v2(ghes)) | |
306 | unmap_gen_v2(ghes); | |
d334a491 HY |
307 | err_free: |
308 | kfree(ghes); | |
309 | return ERR_PTR(rc); | |
310 | } | |
311 | ||
312 | static void ghes_fini(struct ghes *ghes) | |
313 | { | |
314 | kfree(ghes->estatus); | |
34ddeb03 | 315 | apei_unmap_generic_address(&ghes->generic->error_status_address); |
42aa5604 TB |
316 | if (is_hest_type_generic_v2(ghes)) |
317 | unmap_gen_v2(ghes); | |
d334a491 HY |
318 | } |
319 | ||
d334a491 HY |
320 | static inline int ghes_severity(int severity) |
321 | { | |
322 | switch (severity) { | |
ad4ecef2 HY |
323 | case CPER_SEV_INFORMATIONAL: |
324 | return GHES_SEV_NO; | |
325 | case CPER_SEV_CORRECTED: | |
326 | return GHES_SEV_CORRECTED; | |
327 | case CPER_SEV_RECOVERABLE: | |
328 | return GHES_SEV_RECOVERABLE; | |
329 | case CPER_SEV_FATAL: | |
330 | return GHES_SEV_PANIC; | |
d334a491 | 331 | default: |
25985edc | 332 | /* Unknown, go panic */ |
ad4ecef2 | 333 | return GHES_SEV_PANIC; |
d334a491 HY |
334 | } |
335 | } | |
336 | ||
81e88fdc | 337 | static void ghes_copy_tofrom_phys(void *buffer, u64 paddr, u32 len, |
b484079b JM |
338 | int from_phys, |
339 | enum fixed_addresses fixmap_idx) | |
d334a491 | 340 | { |
81e88fdc | 341 | void __iomem *vaddr; |
81e88fdc HY |
342 | u64 offset; |
343 | u32 trunk; | |
344 | ||
345 | while (len > 0) { | |
346 | offset = paddr - (paddr & PAGE_MASK); | |
b484079b | 347 | vaddr = ghes_map(PHYS_PFN(paddr), fixmap_idx); |
81e88fdc HY |
348 | trunk = PAGE_SIZE - offset; |
349 | trunk = min(trunk, len); | |
350 | if (from_phys) | |
351 | memcpy_fromio(buffer, vaddr + offset, trunk); | |
352 | else | |
353 | memcpy_toio(vaddr + offset, buffer, trunk); | |
354 | len -= trunk; | |
355 | paddr += trunk; | |
356 | buffer += trunk; | |
b484079b | 357 | ghes_unmap(vaddr, fixmap_idx); |
81e88fdc | 358 | } |
d334a491 HY |
359 | } |
360 | ||
f2a681b9 JM |
361 | /* Check the top-level record header has an appropriate size. */ |
362 | static int __ghes_check_estatus(struct ghes *ghes, | |
363 | struct acpi_hest_generic_status *estatus) | |
364 | { | |
365 | u32 len = cper_estatus_len(estatus); | |
366 | ||
367 | if (len < sizeof(*estatus)) { | |
368 | pr_warn_ratelimited(FW_WARN GHES_PFX "Truncated error status block!\n"); | |
369 | return -EIO; | |
370 | } | |
371 | ||
372 | if (len > ghes->generic->error_block_length) { | |
373 | pr_warn_ratelimited(FW_WARN GHES_PFX "Invalid error status block length!\n"); | |
374 | return -EIO; | |
375 | } | |
376 | ||
377 | if (cper_estatus_check_header(estatus)) { | |
378 | pr_warn_ratelimited(FW_WARN GHES_PFX "Invalid CPER header!\n"); | |
379 | return -EIO; | |
380 | } | |
381 | ||
382 | return 0; | |
383 | } | |
384 | ||
e00a6e33 JM |
385 | /* Read the CPER block, returning its address, and header in estatus. */ |
386 | static int __ghes_peek_estatus(struct ghes *ghes, | |
387 | struct acpi_hest_generic_status *estatus, | |
388 | u64 *buf_paddr, enum fixed_addresses fixmap_idx) | |
d334a491 HY |
389 | { |
390 | struct acpi_hest_generic *g = ghes->generic; | |
d334a491 HY |
391 | int rc; |
392 | ||
eeb25557 | 393 | rc = apei_read(buf_paddr, &g->error_status_address); |
d334a491 | 394 | if (rc) { |
eeb25557 | 395 | *buf_paddr = 0; |
93066e9a | 396 | pr_warn_ratelimited(FW_WARN GHES_PFX |
d334a491 HY |
397 | "Failed to read error status block address for hardware error source: %d.\n", |
398 | g->header.source_id); | |
399 | return -EIO; | |
400 | } | |
eeb25557 | 401 | if (!*buf_paddr) |
d334a491 HY |
402 | return -ENOENT; |
403 | ||
f2a7e059 JM |
404 | ghes_copy_tofrom_phys(estatus, *buf_paddr, sizeof(*estatus), 1, |
405 | fixmap_idx); | |
406 | if (!estatus->block_status) { | |
eeb25557 | 407 | *buf_paddr = 0; |
d334a491 | 408 | return -ENOENT; |
eeb25557 | 409 | } |
d334a491 | 410 | |
371b8689 | 411 | return 0; |
e00a6e33 | 412 | } |
f2a681b9 | 413 | |
e00a6e33 JM |
414 | static int __ghes_read_estatus(struct acpi_hest_generic_status *estatus, |
415 | u64 buf_paddr, enum fixed_addresses fixmap_idx, | |
416 | size_t buf_len) | |
417 | { | |
418 | ghes_copy_tofrom_phys(estatus, buf_paddr, buf_len, 1, fixmap_idx); | |
f2a681b9 | 419 | if (cper_estatus_check(estatus)) { |
93066e9a JM |
420 | pr_warn_ratelimited(FW_WARN GHES_PFX |
421 | "Failed to read error status block!\n"); | |
f2a681b9 JM |
422 | return -EIO; |
423 | } | |
eeb25557 | 424 | |
f2a681b9 | 425 | return 0; |
d334a491 HY |
426 | } |
427 | ||
e00a6e33 JM |
428 | static int ghes_read_estatus(struct ghes *ghes, |
429 | struct acpi_hest_generic_status *estatus, | |
430 | u64 *buf_paddr, enum fixed_addresses fixmap_idx) | |
431 | { | |
432 | int rc; | |
433 | ||
434 | rc = __ghes_peek_estatus(ghes, estatus, buf_paddr, fixmap_idx); | |
435 | if (rc) | |
436 | return rc; | |
437 | ||
438 | rc = __ghes_check_estatus(ghes, estatus); | |
439 | if (rc) | |
440 | return rc; | |
441 | ||
442 | return __ghes_read_estatus(estatus, *buf_paddr, fixmap_idx, | |
443 | cper_estatus_len(estatus)); | |
444 | } | |
445 | ||
f2a7e059 JM |
446 | static void ghes_clear_estatus(struct ghes *ghes, |
447 | struct acpi_hest_generic_status *estatus, | |
448 | u64 buf_paddr, enum fixed_addresses fixmap_idx) | |
d334a491 | 449 | { |
f2a7e059 | 450 | estatus->block_status = 0; |
eeb25557 JM |
451 | |
452 | if (!buf_paddr) | |
453 | return; | |
454 | ||
f2a7e059 JM |
455 | ghes_copy_tofrom_phys(estatus, buf_paddr, |
456 | sizeof(estatus->block_status), 0, | |
b484079b | 457 | fixmap_idx); |
06ddeadc JM |
458 | |
459 | /* | |
460 | * GHESv2 type HEST entries introduce support for error acknowledgment, | |
461 | * so only acknowledge the error if this support is present. | |
462 | */ | |
463 | if (is_hest_type_generic_v2(ghes)) | |
464 | ghes_ack_error(ghes->generic_v2); | |
d334a491 HY |
465 | } |
466 | ||
7f17b4a1 JM |
467 | /* |
468 | * Called as task_work before returning to user-space. | |
469 | * Ensure any queued work has been done before we return to the context that | |
470 | * triggered the notification. | |
471 | */ | |
472 | static void ghes_kick_task_work(struct callback_head *head) | |
473 | { | |
474 | struct acpi_hest_generic_status *estatus; | |
475 | struct ghes_estatus_node *estatus_node; | |
476 | u32 node_len; | |
477 | ||
478 | estatus_node = container_of(head, struct ghes_estatus_node, task_work); | |
479 | if (IS_ENABLED(CONFIG_ACPI_APEI_MEMORY_FAILURE)) | |
480 | memory_failure_queue_kick(estatus_node->task_work_cpu); | |
481 | ||
482 | estatus = GHES_ESTATUS_FROM_NODE(estatus_node); | |
483 | node_len = GHES_ESTATUS_NODE_LEN(cper_estatus_len(estatus)); | |
484 | gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node, node_len); | |
485 | } | |
486 | ||
ccb5ecdc | 487 | static bool ghes_do_memory_failure(u64 physical_addr, int flags) |
cf870c70 | 488 | { |
cf870c70 | 489 | unsigned long pfn; |
cf870c70 | 490 | |
7f17b4a1 JM |
491 | if (!IS_ENABLED(CONFIG_ACPI_APEI_MEMORY_FAILURE)) |
492 | return false; | |
493 | ||
ccb5ecdc | 494 | pfn = PHYS_PFN(physical_addr); |
3ad6fd77 | 495 | if (!pfn_valid(pfn) && !arch_is_platform_page(physical_addr)) { |
ca104edc CG |
496 | pr_warn_ratelimited(FW_WARN GHES_PFX |
497 | "Invalid address in generic error data: %#llx\n", | |
ccb5ecdc | 498 | physical_addr); |
7f17b4a1 | 499 | return false; |
cf870c70 | 500 | } |
ca104edc | 501 | |
ccb5ecdc XT |
502 | memory_failure_queue(pfn, flags); |
503 | return true; | |
504 | } | |
505 | ||
506 | static bool ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, | |
a70297d2 | 507 | int sev, bool sync) |
ccb5ecdc XT |
508 | { |
509 | int flags = -1; | |
510 | int sec_sev = ghes_severity(gdata->error_severity); | |
511 | struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata); | |
512 | ||
513 | if (!(mem_err->validation_bits & CPER_MEM_VALID_PA)) | |
514 | return false; | |
515 | ||
ca104edc CG |
516 | /* iff following two events can be handled properly by now */ |
517 | if (sec_sev == GHES_SEV_CORRECTED && | |
518 | (gdata->flags & CPER_SEC_ERROR_THRESHOLD_EXCEEDED)) | |
519 | flags = MF_SOFT_OFFLINE; | |
520 | if (sev == GHES_SEV_RECOVERABLE && sec_sev == GHES_SEV_RECOVERABLE) | |
a70297d2 | 521 | flags = sync ? MF_ACTION_REQUIRED : 0; |
ca104edc | 522 | |
ccb5ecdc XT |
523 | if (flags != -1) |
524 | return ghes_do_memory_failure(mem_err->physical_addr, flags); | |
7f17b4a1 JM |
525 | |
526 | return false; | |
cf870c70 NR |
527 | } |
528 | ||
a70297d2 SX |
529 | static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata, |
530 | int sev, bool sync) | |
ccb5ecdc XT |
531 | { |
532 | struct cper_sec_proc_arm *err = acpi_hest_get_payload(gdata); | |
a70297d2 | 533 | int flags = sync ? MF_ACTION_REQUIRED : 0; |
ccb5ecdc XT |
534 | bool queued = false; |
535 | int sec_sev, i; | |
536 | char *p; | |
537 | ||
538 | log_arm_hw_error(err); | |
539 | ||
540 | sec_sev = ghes_severity(gdata->error_severity); | |
541 | if (sev != GHES_SEV_RECOVERABLE || sec_sev != GHES_SEV_RECOVERABLE) | |
542 | return false; | |
543 | ||
544 | p = (char *)(err + 1); | |
545 | for (i = 0; i < err->err_info_num; i++) { | |
546 | struct cper_arm_err_info *err_info = (struct cper_arm_err_info *)p; | |
547 | bool is_cache = (err_info->type == CPER_ARM_CACHE_ERROR); | |
548 | bool has_pa = (err_info->validation_bits & CPER_ARM_INFO_VALID_PHYSICAL_ADDR); | |
549 | const char *error_type = "unknown error"; | |
550 | ||
551 | /* | |
552 | * The field (err_info->error_info & BIT(26)) is fixed to set to | |
553 | * 1 in some old firmware of HiSilicon Kunpeng920. We assume that | |
554 | * firmware won't mix corrected errors in an uncorrected section, | |
555 | * and don't filter out 'corrected' error here. | |
556 | */ | |
557 | if (is_cache && has_pa) { | |
a70297d2 | 558 | queued = ghes_do_memory_failure(err_info->physical_fault_addr, flags); |
ccb5ecdc XT |
559 | p += err_info->length; |
560 | continue; | |
561 | } | |
562 | ||
563 | if (err_info->type < ARRAY_SIZE(cper_proc_error_type_strs)) | |
564 | error_type = cper_proc_error_type_strs[err_info->type]; | |
565 | ||
566 | pr_warn_ratelimited(FW_WARN GHES_PFX | |
567 | "Unhandled processor error type: %s\n", | |
568 | error_type); | |
569 | p += err_info->length; | |
570 | } | |
571 | ||
572 | return queued; | |
573 | } | |
574 | ||
9852ce9a TB |
575 | /* |
576 | * PCIe AER errors need to be sent to the AER driver for reporting and | |
577 | * recovery. The GHES severities map to the following AER severities and | |
578 | * require the following handling: | |
579 | * | |
580 | * GHES_SEV_CORRECTABLE -> AER_CORRECTABLE | |
581 | * These need to be reported by the AER driver but no recovery is | |
582 | * necessary. | |
583 | * GHES_SEV_RECOVERABLE -> AER_NONFATAL | |
584 | * GHES_SEV_RECOVERABLE && CPER_SEC_RESET -> AER_FATAL | |
585 | * These both need to be reported and recovered from by the AER driver. | |
586 | * GHES_SEV_PANIC does not make it to this handling since the kernel must | |
587 | * panic. | |
588 | */ | |
589 | static void ghes_handle_aer(struct acpi_hest_generic_data *gdata) | |
3c5b977f TB |
590 | { |
591 | #ifdef CONFIG_ACPI_APEI_PCIEAER | |
592 | struct cper_sec_pcie *pcie_err = acpi_hest_get_payload(gdata); | |
593 | ||
9852ce9a | 594 | if (pcie_err->validation_bits & CPER_PCIE_VALID_DEVICE_ID && |
3c5b977f TB |
595 | pcie_err->validation_bits & CPER_PCIE_VALID_AER_INFO) { |
596 | unsigned int devfn; | |
597 | int aer_severity; | |
e2abc47a | 598 | u8 *aer_info; |
3c5b977f TB |
599 | |
600 | devfn = PCI_DEVFN(pcie_err->device_id.device, | |
601 | pcie_err->device_id.function); | |
602 | aer_severity = cper_severity_to_aer(gdata->error_severity); | |
603 | ||
604 | /* | |
605 | * If firmware reset the component to contain | |
606 | * the error, we must reinitialize it before | |
607 | * use, so treat it as a fatal AER error. | |
608 | */ | |
609 | if (gdata->flags & CPER_SEC_RESET) | |
610 | aer_severity = AER_FATAL; | |
611 | ||
e2abc47a SJ |
612 | aer_info = (void *)gen_pool_alloc(ghes_estatus_pool, |
613 | sizeof(struct aer_capability_regs)); | |
614 | if (!aer_info) | |
615 | return; | |
616 | memcpy(aer_info, pcie_err->aer_info, sizeof(struct aer_capability_regs)); | |
617 | ||
3c5b977f TB |
618 | aer_recover_queue(pcie_err->device_id.segment, |
619 | pcie_err->device_id.bus, | |
620 | devfn, aer_severity, | |
621 | (struct aer_capability_regs *) | |
e2abc47a | 622 | aer_info); |
3c5b977f TB |
623 | } |
624 | #endif | |
625 | } | |
626 | ||
9aa9cf3e SJ |
627 | static BLOCKING_NOTIFIER_HEAD(vendor_record_notify_list); |
628 | ||
629 | int ghes_register_vendor_record_notifier(struct notifier_block *nb) | |
630 | { | |
631 | return blocking_notifier_chain_register(&vendor_record_notify_list, nb); | |
632 | } | |
633 | EXPORT_SYMBOL_GPL(ghes_register_vendor_record_notifier); | |
634 | ||
635 | void ghes_unregister_vendor_record_notifier(struct notifier_block *nb) | |
636 | { | |
637 | blocking_notifier_chain_unregister(&vendor_record_notify_list, nb); | |
638 | } | |
639 | EXPORT_SYMBOL_GPL(ghes_unregister_vendor_record_notifier); | |
640 | ||
641 | static void ghes_vendor_record_work_func(struct work_struct *work) | |
642 | { | |
643 | struct ghes_vendor_record_entry *entry; | |
644 | struct acpi_hest_generic_data *gdata; | |
645 | u32 len; | |
646 | ||
647 | entry = container_of(work, struct ghes_vendor_record_entry, work); | |
648 | gdata = GHES_GDATA_FROM_VENDOR_ENTRY(entry); | |
649 | ||
650 | blocking_notifier_call_chain(&vendor_record_notify_list, | |
651 | entry->error_severity, gdata); | |
652 | ||
653 | len = GHES_VENDOR_ENTRY_LEN(acpi_hest_get_record_size(gdata)); | |
654 | gen_pool_free(ghes_estatus_pool, (unsigned long)entry, len); | |
655 | } | |
656 | ||
657 | static void ghes_defer_non_standard_event(struct acpi_hest_generic_data *gdata, | |
658 | int sev) | |
659 | { | |
660 | struct acpi_hest_generic_data *copied_gdata; | |
661 | struct ghes_vendor_record_entry *entry; | |
662 | u32 len; | |
663 | ||
664 | len = GHES_VENDOR_ENTRY_LEN(acpi_hest_get_record_size(gdata)); | |
665 | entry = (void *)gen_pool_alloc(ghes_estatus_pool, len); | |
666 | if (!entry) | |
667 | return; | |
668 | ||
669 | copied_gdata = GHES_GDATA_FROM_VENDOR_ENTRY(entry); | |
670 | memcpy(copied_gdata, gdata, acpi_hest_get_record_size(gdata)); | |
671 | entry->error_severity = sev; | |
672 | ||
673 | INIT_WORK(&entry->work, ghes_vendor_record_work_func); | |
674 | schedule_work(&entry->work); | |
675 | } | |
676 | ||
671a794c IW |
677 | /* |
678 | * Only a single callback can be registered for CXL CPER events. | |
679 | */ | |
680 | static DECLARE_RWSEM(cxl_cper_rw_sem); | |
681 | static cxl_cper_callback cper_callback; | |
682 | ||
671a794c IW |
683 | static void cxl_cper_post_event(enum cxl_event_type event_type, |
684 | struct cxl_cper_event_rec *rec) | |
685 | { | |
686 | if (rec->hdr.length <= sizeof(rec->hdr) || | |
687 | rec->hdr.length > sizeof(*rec)) { | |
688 | pr_err(FW_WARN "CXL CPER Invalid section length (%u)\n", | |
689 | rec->hdr.length); | |
690 | return; | |
691 | } | |
692 | ||
693 | if (!(rec->hdr.validation_bits & CPER_CXL_COMP_EVENT_LOG_VALID)) { | |
694 | pr_err(FW_WARN "CXL CPER invalid event\n"); | |
695 | return; | |
696 | } | |
697 | ||
698 | guard(rwsem_read)(&cxl_cper_rw_sem); | |
699 | if (cper_callback) | |
700 | cper_callback(event_type, rec); | |
701 | } | |
702 | ||
703 | int cxl_cper_register_callback(cxl_cper_callback callback) | |
704 | { | |
705 | guard(rwsem_write)(&cxl_cper_rw_sem); | |
706 | if (cper_callback) | |
707 | return -EINVAL; | |
708 | cper_callback = callback; | |
709 | return 0; | |
710 | } | |
711 | EXPORT_SYMBOL_NS_GPL(cxl_cper_register_callback, CXL); | |
712 | ||
713 | int cxl_cper_unregister_callback(cxl_cper_callback callback) | |
714 | { | |
715 | guard(rwsem_write)(&cxl_cper_rw_sem); | |
716 | if (callback != cper_callback) | |
717 | return -EINVAL; | |
718 | cper_callback = NULL; | |
719 | return 0; | |
720 | } | |
721 | EXPORT_SYMBOL_NS_GPL(cxl_cper_unregister_callback, CXL); | |
722 | ||
7f17b4a1 | 723 | static bool ghes_do_proc(struct ghes *ghes, |
0a00fd5e | 724 | const struct acpi_hest_generic_status *estatus) |
d334a491 | 725 | { |
ba61ca4a | 726 | int sev, sec_sev; |
0a00fd5e | 727 | struct acpi_hest_generic_data *gdata; |
5b53696a | 728 | guid_t *sec_type; |
bb100b64 | 729 | const guid_t *fru_id = &guid_null; |
297b64c7 | 730 | char *fru_text = ""; |
7f17b4a1 | 731 | bool queued = false; |
a70297d2 | 732 | bool sync = is_hest_sync_notify(ghes); |
d334a491 | 733 | |
67eb2e99 HY |
734 | sev = ghes_severity(estatus->error_severity); |
735 | apei_estatus_for_each_section(estatus, gdata) { | |
5b53696a | 736 | sec_type = (guid_t *)gdata->section_type; |
ba61ca4a | 737 | sec_sev = ghes_severity(gdata->error_severity); |
297b64c7 TB |
738 | if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID) |
739 | fru_id = (guid_t *)gdata->fru_id; | |
740 | ||
741 | if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT) | |
742 | fru_text = gdata->fru_text; | |
743 | ||
5b53696a | 744 | if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) { |
bbcc2e7b TB |
745 | struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata); |
746 | ||
8e40612f | 747 | atomic_notifier_call_chain(&ghes_report_chain, sev, mem_err); |
21480547 | 748 | |
9dae3d0d | 749 | arch_apei_report_mem_error(sev, mem_err); |
a70297d2 | 750 | queued = ghes_handle_memory_failure(gdata, sev, sync); |
ba61ca4a | 751 | } |
5b53696a | 752 | else if (guid_equal(sec_type, &CPER_SEC_PCIE)) { |
9852ce9a | 753 | ghes_handle_aer(gdata); |
a654e5ee | 754 | } |
e9279e83 | 755 | else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) { |
a70297d2 | 756 | queued = ghes_handle_arm_hw_error(gdata, sev, sync); |
671a794c IW |
757 | } else if (guid_equal(sec_type, &CPER_SEC_CXL_GEN_MEDIA_GUID)) { |
758 | struct cxl_cper_event_rec *rec = | |
759 | acpi_hest_get_payload(gdata); | |
760 | ||
761 | cxl_cper_post_event(CXL_CPER_EVENT_GEN_MEDIA, rec); | |
762 | } else if (guid_equal(sec_type, &CPER_SEC_CXL_DRAM_GUID)) { | |
763 | struct cxl_cper_event_rec *rec = | |
764 | acpi_hest_get_payload(gdata); | |
765 | ||
766 | cxl_cper_post_event(CXL_CPER_EVENT_DRAM, rec); | |
767 | } else if (guid_equal(sec_type, | |
768 | &CPER_SEC_CXL_MEM_MODULE_GUID)) { | |
769 | struct cxl_cper_event_rec *rec = | |
770 | acpi_hest_get_payload(gdata); | |
771 | ||
772 | cxl_cper_post_event(CXL_CPER_EVENT_MEM_MODULE, rec); | |
e9279e83 | 773 | } else { |
297b64c7 TB |
774 | void *err = acpi_hest_get_payload(gdata); |
775 | ||
9aa9cf3e | 776 | ghes_defer_non_standard_event(gdata, sev); |
297b64c7 TB |
777 | log_non_standard_event(sec_type, fru_id, fru_text, |
778 | sec_sev, err, | |
779 | gdata->error_data_length); | |
780 | } | |
d334a491 | 781 | } |
7f17b4a1 JM |
782 | |
783 | return queued; | |
32c361f5 | 784 | } |
d334a491 | 785 | |
67eb2e99 HY |
786 | static void __ghes_print_estatus(const char *pfx, |
787 | const struct acpi_hest_generic *generic, | |
0a00fd5e | 788 | const struct acpi_hest_generic_status *estatus) |
32c361f5 | 789 | { |
5ba82ab5 HY |
790 | static atomic_t seqno; |
791 | unsigned int curr_seqno; | |
792 | char pfx_seq[64]; | |
793 | ||
32c361f5 | 794 | if (pfx == NULL) { |
67eb2e99 | 795 | if (ghes_severity(estatus->error_severity) <= |
32c361f5 | 796 | GHES_SEV_CORRECTED) |
5ba82ab5 | 797 | pfx = KERN_WARNING; |
32c361f5 | 798 | else |
5ba82ab5 | 799 | pfx = KERN_ERR; |
32c361f5 | 800 | } |
5ba82ab5 HY |
801 | curr_seqno = atomic_inc_return(&seqno); |
802 | snprintf(pfx_seq, sizeof(pfx_seq), "%s{%u}" HW_ERR, pfx, curr_seqno); | |
5588340d | 803 | printk("%s""Hardware error from APEI Generic Hardware Error Source: %d\n", |
5ba82ab5 | 804 | pfx_seq, generic->header.source_id); |
88f074f4 | 805 | cper_estatus_print(pfx_seq, estatus); |
5588340d HY |
806 | } |
807 | ||
152cef40 HY |
808 | static int ghes_print_estatus(const char *pfx, |
809 | const struct acpi_hest_generic *generic, | |
0a00fd5e | 810 | const struct acpi_hest_generic_status *estatus) |
5588340d HY |
811 | { |
812 | /* Not more than 2 messages every 5 seconds */ | |
67eb2e99 HY |
813 | static DEFINE_RATELIMIT_STATE(ratelimit_corrected, 5*HZ, 2); |
814 | static DEFINE_RATELIMIT_STATE(ratelimit_uncorrected, 5*HZ, 2); | |
815 | struct ratelimit_state *ratelimit; | |
5588340d | 816 | |
67eb2e99 HY |
817 | if (ghes_severity(estatus->error_severity) <= GHES_SEV_CORRECTED) |
818 | ratelimit = &ratelimit_corrected; | |
819 | else | |
820 | ratelimit = &ratelimit_uncorrected; | |
152cef40 | 821 | if (__ratelimit(ratelimit)) { |
67eb2e99 | 822 | __ghes_print_estatus(pfx, generic, estatus); |
152cef40 HY |
823 | return 1; |
824 | } | |
825 | return 0; | |
826 | } | |
827 | ||
828 | /* | |
829 | * GHES error status reporting throttle, to report more kinds of | |
830 | * errors, instead of just most frequently occurred errors. | |
831 | */ | |
0a00fd5e | 832 | static int ghes_estatus_cached(struct acpi_hest_generic_status *estatus) |
152cef40 HY |
833 | { |
834 | u32 len; | |
835 | int i, cached = 0; | |
836 | unsigned long long now; | |
837 | struct ghes_estatus_cache *cache; | |
0a00fd5e | 838 | struct acpi_hest_generic_status *cache_estatus; |
152cef40 | 839 | |
88f074f4 | 840 | len = cper_estatus_len(estatus); |
152cef40 HY |
841 | rcu_read_lock(); |
842 | for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) { | |
843 | cache = rcu_dereference(ghes_estatus_caches[i]); | |
844 | if (cache == NULL) | |
845 | continue; | |
846 | if (len != cache->estatus_len) | |
847 | continue; | |
848 | cache_estatus = GHES_ESTATUS_FROM_CACHE(cache); | |
849 | if (memcmp(estatus, cache_estatus, len)) | |
850 | continue; | |
851 | atomic_inc(&cache->count); | |
852 | now = sched_clock(); | |
853 | if (now - cache->time_in < GHES_ESTATUS_IN_CACHE_MAX_NSEC) | |
854 | cached = 1; | |
855 | break; | |
856 | } | |
857 | rcu_read_unlock(); | |
858 | return cached; | |
859 | } | |
860 | ||
861 | static struct ghes_estatus_cache *ghes_estatus_cache_alloc( | |
862 | struct acpi_hest_generic *generic, | |
0a00fd5e | 863 | struct acpi_hest_generic_status *estatus) |
152cef40 HY |
864 | { |
865 | int alloced; | |
866 | u32 len, cache_len; | |
867 | struct ghes_estatus_cache *cache; | |
0a00fd5e | 868 | struct acpi_hest_generic_status *cache_estatus; |
152cef40 HY |
869 | |
870 | alloced = atomic_add_return(1, &ghes_estatus_cache_alloced); | |
871 | if (alloced > GHES_ESTATUS_CACHE_ALLOCED_MAX) { | |
872 | atomic_dec(&ghes_estatus_cache_alloced); | |
873 | return NULL; | |
874 | } | |
88f074f4 | 875 | len = cper_estatus_len(estatus); |
152cef40 HY |
876 | cache_len = GHES_ESTATUS_CACHE_LEN(len); |
877 | cache = (void *)gen_pool_alloc(ghes_estatus_pool, cache_len); | |
878 | if (!cache) { | |
879 | atomic_dec(&ghes_estatus_cache_alloced); | |
880 | return NULL; | |
881 | } | |
882 | cache_estatus = GHES_ESTATUS_FROM_CACHE(cache); | |
883 | memcpy(cache_estatus, estatus, len); | |
884 | cache->estatus_len = len; | |
885 | atomic_set(&cache->count, 0); | |
886 | cache->generic = generic; | |
887 | cache->time_in = sched_clock(); | |
888 | return cache; | |
889 | } | |
890 | ||
dd3fa54b | 891 | static void ghes_estatus_cache_rcu_free(struct rcu_head *head) |
152cef40 | 892 | { |
dd3fa54b | 893 | struct ghes_estatus_cache *cache; |
152cef40 HY |
894 | u32 len; |
895 | ||
dd3fa54b | 896 | cache = container_of(head, struct ghes_estatus_cache, rcu); |
88f074f4 | 897 | len = cper_estatus_len(GHES_ESTATUS_FROM_CACHE(cache)); |
152cef40 HY |
898 | len = GHES_ESTATUS_CACHE_LEN(len); |
899 | gen_pool_free(ghes_estatus_pool, (unsigned long)cache, len); | |
900 | atomic_dec(&ghes_estatus_cache_alloced); | |
901 | } | |
902 | ||
dd3fa54b AB |
903 | static void |
904 | ghes_estatus_cache_add(struct acpi_hest_generic *generic, | |
905 | struct acpi_hest_generic_status *estatus) | |
152cef40 | 906 | { |
152cef40 | 907 | unsigned long long now, duration, period, max_period = 0; |
dd3fa54b AB |
908 | struct ghes_estatus_cache *cache, *new_cache; |
909 | struct ghes_estatus_cache __rcu *victim; | |
910 | int i, slot = -1, count; | |
152cef40 HY |
911 | |
912 | new_cache = ghes_estatus_cache_alloc(generic, estatus); | |
dd3fa54b | 913 | if (!new_cache) |
152cef40 | 914 | return; |
dd3fa54b | 915 | |
152cef40 HY |
916 | rcu_read_lock(); |
917 | now = sched_clock(); | |
918 | for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) { | |
919 | cache = rcu_dereference(ghes_estatus_caches[i]); | |
920 | if (cache == NULL) { | |
921 | slot = i; | |
152cef40 HY |
922 | break; |
923 | } | |
924 | duration = now - cache->time_in; | |
925 | if (duration >= GHES_ESTATUS_IN_CACHE_MAX_NSEC) { | |
926 | slot = i; | |
152cef40 HY |
927 | break; |
928 | } | |
929 | count = atomic_read(&cache->count); | |
70cb6e1d LB |
930 | period = duration; |
931 | do_div(period, (count + 1)); | |
152cef40 HY |
932 | if (period > max_period) { |
933 | max_period = period; | |
934 | slot = i; | |
152cef40 HY |
935 | } |
936 | } | |
152cef40 | 937 | rcu_read_unlock(); |
dd3fa54b AB |
938 | |
939 | if (slot != -1) { | |
940 | /* | |
941 | * Use release semantics to ensure that ghes_estatus_cached() | |
942 | * running on another CPU will see the updated cache fields if | |
943 | * it can see the new value of the pointer. | |
944 | */ | |
945 | victim = xchg_release(&ghes_estatus_caches[slot], | |
946 | RCU_INITIALIZER(new_cache)); | |
947 | ||
948 | /* | |
949 | * At this point, victim may point to a cached item different | |
950 | * from the one based on which we selected the slot. Instead of | |
951 | * going to the loop again to pick another slot, let's just | |
952 | * drop the other item anyway: this may cause a false cache | |
953 | * miss later on, but that won't cause any problems. | |
954 | */ | |
955 | if (victim) | |
956 | call_rcu(&unrcu_pointer(victim)->rcu, | |
957 | ghes_estatus_cache_rcu_free); | |
958 | } | |
d334a491 HY |
959 | } |
960 | ||
f2a7e059 JM |
961 | static void __ghes_panic(struct ghes *ghes, |
962 | struct acpi_hest_generic_status *estatus, | |
963 | u64 buf_paddr, enum fixed_addresses fixmap_idx) | |
2fb5853e | 964 | { |
f2a7e059 | 965 | __ghes_print_estatus(KERN_EMERG, ghes->generic, estatus); |
2fb5853e | 966 | |
f2a7e059 | 967 | ghes_clear_estatus(ghes, estatus, buf_paddr, fixmap_idx); |
98cff8b2 | 968 | |
2fb5853e JZZ |
969 | /* reboot to log the error! */ |
970 | if (!panic_timeout) | |
971 | panic_timeout = ghes_panic_timeout; | |
972 | panic("Fatal hardware error!"); | |
973 | } | |
974 | ||
d334a491 HY |
975 | static int ghes_proc(struct ghes *ghes) |
976 | { | |
f2a7e059 | 977 | struct acpi_hest_generic_status *estatus = ghes->estatus; |
eeb25557 | 978 | u64 buf_paddr; |
d334a491 HY |
979 | int rc; |
980 | ||
f2a7e059 | 981 | rc = ghes_read_estatus(ghes, estatus, &buf_paddr, FIX_APEI_GHES_IRQ); |
d334a491 HY |
982 | if (rc) |
983 | goto out; | |
2fb5853e | 984 | |
f2a7e059 JM |
985 | if (ghes_severity(estatus->error_severity) >= GHES_SEV_PANIC) |
986 | __ghes_panic(ghes, estatus, buf_paddr, FIX_APEI_GHES_IRQ); | |
2fb5853e | 987 | |
f2a7e059 JM |
988 | if (!ghes_estatus_cached(estatus)) { |
989 | if (ghes_print_estatus(NULL, ghes->generic, estatus)) | |
990 | ghes_estatus_cache_add(ghes->generic, estatus); | |
152cef40 | 991 | } |
f2a7e059 | 992 | ghes_do_proc(ghes, estatus); |
42aa5604 | 993 | |
aaf2c2fb | 994 | out: |
f2a7e059 | 995 | ghes_clear_estatus(ghes, estatus, buf_paddr, FIX_APEI_GHES_IRQ); |
aaf2c2fb | 996 | |
806487a8 | 997 | return rc; |
d334a491 HY |
998 | } |
999 | ||
81e88fdc HY |
1000 | static void ghes_add_timer(struct ghes *ghes) |
1001 | { | |
1002 | struct acpi_hest_generic *g = ghes->generic; | |
1003 | unsigned long expire; | |
1004 | ||
1005 | if (!g->notify.poll_interval) { | |
933ca4e3 KW |
1006 | pr_warn(FW_WARN GHES_PFX "Poll interval is 0 for generic hardware error source: %d, disabled.\n", |
1007 | g->header.source_id); | |
81e88fdc HY |
1008 | return; |
1009 | } | |
1010 | expire = jiffies + msecs_to_jiffies(g->notify.poll_interval); | |
1011 | ghes->timer.expires = round_jiffies_relative(expire); | |
1012 | add_timer(&ghes->timer); | |
1013 | } | |
1014 | ||
d5272003 | 1015 | static void ghes_poll_func(struct timer_list *t) |
81e88fdc | 1016 | { |
d5272003 | 1017 | struct ghes *ghes = from_timer(ghes, t, timer); |
3b880cbe | 1018 | unsigned long flags; |
81e88fdc | 1019 | |
3b880cbe | 1020 | spin_lock_irqsave(&ghes_notify_lock_irq, flags); |
81e88fdc | 1021 | ghes_proc(ghes); |
3b880cbe | 1022 | spin_unlock_irqrestore(&ghes_notify_lock_irq, flags); |
81e88fdc HY |
1023 | if (!(ghes->flags & GHES_EXITING)) |
1024 | ghes_add_timer(ghes); | |
1025 | } | |
1026 | ||
1027 | static irqreturn_t ghes_irq_func(int irq, void *data) | |
1028 | { | |
1029 | struct ghes *ghes = data; | |
3b880cbe | 1030 | unsigned long flags; |
81e88fdc HY |
1031 | int rc; |
1032 | ||
3b880cbe | 1033 | spin_lock_irqsave(&ghes_notify_lock_irq, flags); |
81e88fdc | 1034 | rc = ghes_proc(ghes); |
3b880cbe | 1035 | spin_unlock_irqrestore(&ghes_notify_lock_irq, flags); |
81e88fdc HY |
1036 | if (rc) |
1037 | return IRQ_NONE; | |
1038 | ||
1039 | return IRQ_HANDLED; | |
1040 | } | |
1041 | ||
7bf130e4 SJ |
1042 | static int ghes_notify_hed(struct notifier_block *this, unsigned long event, |
1043 | void *data) | |
d334a491 HY |
1044 | { |
1045 | struct ghes *ghes; | |
3b880cbe | 1046 | unsigned long flags; |
d334a491 HY |
1047 | int ret = NOTIFY_DONE; |
1048 | ||
3b880cbe | 1049 | spin_lock_irqsave(&ghes_notify_lock_irq, flags); |
d334a491 | 1050 | rcu_read_lock(); |
7bf130e4 | 1051 | list_for_each_entry_rcu(ghes, &ghes_hed, list) { |
d334a491 HY |
1052 | if (!ghes_proc(ghes)) |
1053 | ret = NOTIFY_OK; | |
1054 | } | |
1055 | rcu_read_unlock(); | |
3b880cbe | 1056 | spin_unlock_irqrestore(&ghes_notify_lock_irq, flags); |
d334a491 HY |
1057 | |
1058 | return ret; | |
1059 | } | |
1060 | ||
7bf130e4 SJ |
1061 | static struct notifier_block ghes_notifier_hed = { |
1062 | .notifier_call = ghes_notify_hed, | |
44a69f61 TN |
1063 | }; |
1064 | ||
44a69f61 | 1065 | /* |
9c9d0805 JM |
1066 | * Handlers for CPER records may not be NMI safe. For example, |
1067 | * memory_failure_queue() takes spinlocks and calls schedule_work_on(). | |
1068 | * In any NMI-like handler, memory from ghes_estatus_pool is used to save | |
1069 | * estatus, and added to the ghes_estatus_llist. irq_work_queue() causes | |
1070 | * ghes_proc_in_irq() to run in IRQ context where each estatus in | |
1071 | * ghes_estatus_llist is processed. | |
1072 | * | |
1073 | * Memory from the ghes_estatus_pool is also used with the ghes_estatus_cache | |
1074 | * to suppress frequent messages. | |
44a69f61 TN |
1075 | */ |
1076 | static struct llist_head ghes_estatus_llist; | |
1077 | static struct irq_work ghes_proc_irq_work; | |
1078 | ||
67eb2e99 HY |
1079 | static void ghes_proc_in_irq(struct irq_work *irq_work) |
1080 | { | |
46d12f0b | 1081 | struct llist_node *llnode, *next; |
67eb2e99 | 1082 | struct ghes_estatus_node *estatus_node; |
152cef40 | 1083 | struct acpi_hest_generic *generic; |
0a00fd5e | 1084 | struct acpi_hest_generic_status *estatus; |
7f17b4a1 | 1085 | bool task_work_pending; |
67eb2e99 | 1086 | u32 len, node_len; |
7f17b4a1 | 1087 | int ret; |
67eb2e99 | 1088 | |
46d12f0b | 1089 | llnode = llist_del_all(&ghes_estatus_llist); |
67eb2e99 HY |
1090 | /* |
1091 | * Because the time order of estatus in list is reversed, | |
1092 | * revert it back to proper order. | |
1093 | */ | |
8d21d4c9 | 1094 | llnode = llist_reverse_order(llnode); |
67eb2e99 HY |
1095 | while (llnode) { |
1096 | next = llnode->next; | |
1097 | estatus_node = llist_entry(llnode, struct ghes_estatus_node, | |
1098 | llnode); | |
1099 | estatus = GHES_ESTATUS_FROM_NODE(estatus_node); | |
88f074f4 | 1100 | len = cper_estatus_len(estatus); |
67eb2e99 | 1101 | node_len = GHES_ESTATUS_NODE_LEN(len); |
7f17b4a1 | 1102 | task_work_pending = ghes_do_proc(estatus_node->ghes, estatus); |
152cef40 HY |
1103 | if (!ghes_estatus_cached(estatus)) { |
1104 | generic = estatus_node->generic; | |
1105 | if (ghes_print_estatus(NULL, generic, estatus)) | |
1106 | ghes_estatus_cache_add(generic, estatus); | |
1107 | } | |
7f17b4a1 | 1108 | |
415fed69 | 1109 | if (task_work_pending && current->mm) { |
7f17b4a1 JM |
1110 | estatus_node->task_work.func = ghes_kick_task_work; |
1111 | estatus_node->task_work_cpu = smp_processor_id(); | |
1112 | ret = task_work_add(current, &estatus_node->task_work, | |
91989c70 | 1113 | TWA_RESUME); |
7f17b4a1 JM |
1114 | if (ret) |
1115 | estatus_node->task_work.func = NULL; | |
1116 | } | |
1117 | ||
1118 | if (!estatus_node->task_work.func) | |
1119 | gen_pool_free(ghes_estatus_pool, | |
1120 | (unsigned long)estatus_node, node_len); | |
1121 | ||
67eb2e99 HY |
1122 | llnode = next; |
1123 | } | |
1124 | } | |
1125 | ||
46d12f0b HY |
1126 | static void ghes_print_queued_estatus(void) |
1127 | { | |
1128 | struct llist_node *llnode; | |
1129 | struct ghes_estatus_node *estatus_node; | |
1130 | struct acpi_hest_generic *generic; | |
0a00fd5e | 1131 | struct acpi_hest_generic_status *estatus; |
46d12f0b HY |
1132 | |
1133 | llnode = llist_del_all(&ghes_estatus_llist); | |
1134 | /* | |
1135 | * Because the time order of estatus in list is reversed, | |
1136 | * revert it back to proper order. | |
1137 | */ | |
8d21d4c9 | 1138 | llnode = llist_reverse_order(llnode); |
46d12f0b HY |
1139 | while (llnode) { |
1140 | estatus_node = llist_entry(llnode, struct ghes_estatus_node, | |
1141 | llnode); | |
1142 | estatus = GHES_ESTATUS_FROM_NODE(estatus_node); | |
46d12f0b HY |
1143 | generic = estatus_node->generic; |
1144 | ghes_print_estatus(NULL, generic, estatus); | |
1145 | llnode = llnode->next; | |
1146 | } | |
1147 | } | |
1148 | ||
d9f608dc JM |
1149 | static int ghes_in_nmi_queue_one_entry(struct ghes *ghes, |
1150 | enum fixed_addresses fixmap_idx) | |
11568496 | 1151 | { |
d9f608dc | 1152 | struct acpi_hest_generic_status *estatus, tmp_header; |
11568496 | 1153 | struct ghes_estatus_node *estatus_node; |
d9f608dc JM |
1154 | u32 len, node_len; |
1155 | u64 buf_paddr; | |
1156 | int sev, rc; | |
11568496 | 1157 | |
f2a7e059 | 1158 | if (!IS_ENABLED(CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG)) |
d9f608dc | 1159 | return -EOPNOTSUPP; |
11568496 | 1160 | |
d9f608dc JM |
1161 | rc = __ghes_peek_estatus(ghes, &tmp_header, &buf_paddr, fixmap_idx); |
1162 | if (rc) { | |
1163 | ghes_clear_estatus(ghes, &tmp_header, buf_paddr, fixmap_idx); | |
1164 | return rc; | |
1165 | } | |
f2a7e059 | 1166 | |
d9f608dc JM |
1167 | rc = __ghes_check_estatus(ghes, &tmp_header); |
1168 | if (rc) { | |
1169 | ghes_clear_estatus(ghes, &tmp_header, buf_paddr, fixmap_idx); | |
1170 | return rc; | |
1171 | } | |
11568496 | 1172 | |
d9f608dc JM |
1173 | len = cper_estatus_len(&tmp_header); |
1174 | node_len = GHES_ESTATUS_NODE_LEN(len); | |
11568496 BP |
1175 | estatus_node = (void *)gen_pool_alloc(ghes_estatus_pool, node_len); |
1176 | if (!estatus_node) | |
d9f608dc | 1177 | return -ENOMEM; |
11568496 BP |
1178 | |
1179 | estatus_node->ghes = ghes; | |
1180 | estatus_node->generic = ghes->generic; | |
7f17b4a1 | 1181 | estatus_node->task_work.func = NULL; |
11568496 | 1182 | estatus = GHES_ESTATUS_FROM_NODE(estatus_node); |
11568496 | 1183 | |
d9f608dc | 1184 | if (__ghes_read_estatus(estatus, buf_paddr, fixmap_idx, len)) { |
f2a7e059 | 1185 | ghes_clear_estatus(ghes, estatus, buf_paddr, fixmap_idx); |
d9f608dc JM |
1186 | rc = -ENOENT; |
1187 | goto no_work; | |
ee2eb3d4 | 1188 | } |
6fe9e7c2 | 1189 | |
f2a7e059 | 1190 | sev = ghes_severity(estatus->error_severity); |
ee2eb3d4 JM |
1191 | if (sev >= GHES_SEV_PANIC) { |
1192 | ghes_print_queued_estatus(); | |
f2a7e059 | 1193 | __ghes_panic(ghes, estatus, buf_paddr, fixmap_idx); |
ee2eb3d4 | 1194 | } |
6169ddf8 | 1195 | |
d9f608dc | 1196 | ghes_clear_estatus(ghes, &tmp_header, buf_paddr, fixmap_idx); |
6169ddf8 | 1197 | |
d9f608dc JM |
1198 | /* This error has been reported before, don't process it again. */ |
1199 | if (ghes_estatus_cached(estatus)) | |
1200 | goto no_work; | |
1201 | ||
1202 | llist_add(&estatus_node->llnode, &ghes_estatus_llist); | |
1203 | ||
1204 | return rc; | |
1205 | ||
1206 | no_work: | |
1207 | gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node, | |
1208 | node_len); | |
1209 | ||
1210 | return rc; | |
ee2eb3d4 JM |
1211 | } |
1212 | ||
b484079b JM |
1213 | static int ghes_in_nmi_spool_from_list(struct list_head *rcu_list, |
1214 | enum fixed_addresses fixmap_idx) | |
ee2eb3d4 JM |
1215 | { |
1216 | int ret = -ENOENT; | |
1217 | struct ghes *ghes; | |
1218 | ||
1219 | rcu_read_lock(); | |
1220 | list_for_each_entry_rcu(ghes, rcu_list, list) { | |
b484079b | 1221 | if (!ghes_in_nmi_queue_one_entry(ghes, fixmap_idx)) |
ee2eb3d4 | 1222 | ret = 0; |
81e88fdc | 1223 | } |
ee2eb3d4 | 1224 | rcu_read_unlock(); |
11568496 | 1225 | |
ee2eb3d4 | 1226 | if (IS_ENABLED(CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG) && !ret) |
a545715d | 1227 | irq_work_queue(&ghes_proc_irq_work); |
ee2eb3d4 JM |
1228 | |
1229 | return ret; | |
1230 | } | |
9c9d0805 JM |
1231 | |
1232 | #ifdef CONFIG_ACPI_APEI_SEA | |
1233 | static LIST_HEAD(ghes_sea); | |
1234 | ||
1235 | /* | |
1236 | * Return 0 only if one of the SEA error sources successfully reported an error | |
1237 | * record sent from the firmware. | |
1238 | */ | |
1239 | int ghes_notify_sea(void) | |
1240 | { | |
3b880cbe JM |
1241 | static DEFINE_RAW_SPINLOCK(ghes_notify_lock_sea); |
1242 | int rv; | |
1243 | ||
1244 | raw_spin_lock(&ghes_notify_lock_sea); | |
b972d2ea | 1245 | rv = ghes_in_nmi_spool_from_list(&ghes_sea, FIX_APEI_GHES_SEA); |
3b880cbe JM |
1246 | raw_spin_unlock(&ghes_notify_lock_sea); |
1247 | ||
1248 | return rv; | |
9c9d0805 JM |
1249 | } |
1250 | ||
1251 | static void ghes_sea_add(struct ghes *ghes) | |
1252 | { | |
1253 | mutex_lock(&ghes_list_mutex); | |
1254 | list_add_rcu(&ghes->list, &ghes_sea); | |
1255 | mutex_unlock(&ghes_list_mutex); | |
1256 | } | |
1257 | ||
1258 | static void ghes_sea_remove(struct ghes *ghes) | |
1259 | { | |
1260 | mutex_lock(&ghes_list_mutex); | |
1261 | list_del_rcu(&ghes->list); | |
1262 | mutex_unlock(&ghes_list_mutex); | |
1263 | synchronize_rcu(); | |
1264 | } | |
1265 | #else /* CONFIG_ACPI_APEI_SEA */ | |
1266 | static inline void ghes_sea_add(struct ghes *ghes) { } | |
1267 | static inline void ghes_sea_remove(struct ghes *ghes) { } | |
1268 | #endif /* CONFIG_ACPI_APEI_SEA */ | |
1269 | ||
1270 | #ifdef CONFIG_HAVE_ACPI_APEI_NMI | |
1271 | /* | |
1272 | * NMI may be triggered on any CPU, so ghes_in_nmi is used for | |
1273 | * having only one concurrent reader. | |
1274 | */ | |
1275 | static atomic_t ghes_in_nmi = ATOMIC_INIT(0); | |
1276 | ||
1277 | static LIST_HEAD(ghes_nmi); | |
ee2eb3d4 JM |
1278 | |
1279 | static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs) | |
1280 | { | |
3b880cbe | 1281 | static DEFINE_RAW_SPINLOCK(ghes_notify_lock_nmi); |
ee2eb3d4 JM |
1282 | int ret = NMI_DONE; |
1283 | ||
1284 | if (!atomic_add_unless(&ghes_in_nmi, 1, 1)) | |
1285 | return ret; | |
1286 | ||
3b880cbe | 1287 | raw_spin_lock(&ghes_notify_lock_nmi); |
b484079b | 1288 | if (!ghes_in_nmi_spool_from_list(&ghes_nmi, FIX_APEI_GHES_NMI)) |
ee2eb3d4 | 1289 | ret = NMI_HANDLED; |
3b880cbe | 1290 | raw_spin_unlock(&ghes_notify_lock_nmi); |
ee2eb3d4 | 1291 | |
6fe9e7c2 | 1292 | atomic_dec(&ghes_in_nmi); |
81e88fdc HY |
1293 | return ret; |
1294 | } | |
1295 | ||
44a69f61 TN |
1296 | static void ghes_nmi_add(struct ghes *ghes) |
1297 | { | |
44a69f61 TN |
1298 | mutex_lock(&ghes_list_mutex); |
1299 | if (list_empty(&ghes_nmi)) | |
1300 | register_nmi_handler(NMI_LOCAL, ghes_notify_nmi, 0, "ghes"); | |
1301 | list_add_rcu(&ghes->list, &ghes_nmi); | |
1302 | mutex_unlock(&ghes_list_mutex); | |
1303 | } | |
1304 | ||
1305 | static void ghes_nmi_remove(struct ghes *ghes) | |
1306 | { | |
44a69f61 TN |
1307 | mutex_lock(&ghes_list_mutex); |
1308 | list_del_rcu(&ghes->list); | |
1309 | if (list_empty(&ghes_nmi)) | |
1310 | unregister_nmi_handler(NMI_LOCAL, "ghes"); | |
1311 | mutex_unlock(&ghes_list_mutex); | |
1312 | /* | |
1313 | * To synchronize with NMI handler, ghes can only be | |
1314 | * freed after NMI handler finishes. | |
1315 | */ | |
1316 | synchronize_rcu(); | |
44a69f61 | 1317 | } |
255097c8 JM |
1318 | #else /* CONFIG_HAVE_ACPI_APEI_NMI */ |
1319 | static inline void ghes_nmi_add(struct ghes *ghes) { } | |
1320 | static inline void ghes_nmi_remove(struct ghes *ghes) { } | |
1321 | #endif /* CONFIG_HAVE_ACPI_APEI_NMI */ | |
44a69f61 TN |
1322 | |
1323 | static void ghes_nmi_init_cxt(void) | |
1324 | { | |
1325 | init_irq_work(&ghes_proc_irq_work, ghes_proc_in_irq); | |
1326 | } | |
44a69f61 | 1327 | |
f9f05395 JM |
1328 | static int __ghes_sdei_callback(struct ghes *ghes, |
1329 | enum fixed_addresses fixmap_idx) | |
1330 | { | |
1331 | if (!ghes_in_nmi_queue_one_entry(ghes, fixmap_idx)) { | |
1332 | irq_work_queue(&ghes_proc_irq_work); | |
1333 | ||
1334 | return 0; | |
1335 | } | |
1336 | ||
1337 | return -ENOENT; | |
1338 | } | |
1339 | ||
1340 | static int ghes_sdei_normal_callback(u32 event_num, struct pt_regs *regs, | |
1341 | void *arg) | |
1342 | { | |
1343 | static DEFINE_RAW_SPINLOCK(ghes_notify_lock_sdei_normal); | |
1344 | struct ghes *ghes = arg; | |
1345 | int err; | |
1346 | ||
1347 | raw_spin_lock(&ghes_notify_lock_sdei_normal); | |
1348 | err = __ghes_sdei_callback(ghes, FIX_APEI_GHES_SDEI_NORMAL); | |
1349 | raw_spin_unlock(&ghes_notify_lock_sdei_normal); | |
1350 | ||
1351 | return err; | |
1352 | } | |
1353 | ||
1354 | static int ghes_sdei_critical_callback(u32 event_num, struct pt_regs *regs, | |
1355 | void *arg) | |
1356 | { | |
1357 | static DEFINE_RAW_SPINLOCK(ghes_notify_lock_sdei_critical); | |
1358 | struct ghes *ghes = arg; | |
1359 | int err; | |
1360 | ||
1361 | raw_spin_lock(&ghes_notify_lock_sdei_critical); | |
1362 | err = __ghes_sdei_callback(ghes, FIX_APEI_GHES_SDEI_CRITICAL); | |
1363 | raw_spin_unlock(&ghes_notify_lock_sdei_critical); | |
1364 | ||
1365 | return err; | |
1366 | } | |
1367 | ||
1368 | static int apei_sdei_register_ghes(struct ghes *ghes) | |
1369 | { | |
1370 | if (!IS_ENABLED(CONFIG_ARM_SDE_INTERFACE)) | |
1371 | return -EOPNOTSUPP; | |
1372 | ||
1373 | return sdei_register_ghes(ghes, ghes_sdei_normal_callback, | |
1374 | ghes_sdei_critical_callback); | |
1375 | } | |
1376 | ||
1377 | static int apei_sdei_unregister_ghes(struct ghes *ghes) | |
1378 | { | |
1379 | if (!IS_ENABLED(CONFIG_ARM_SDE_INTERFACE)) | |
1380 | return -EOPNOTSUPP; | |
1381 | ||
1382 | return sdei_unregister_ghes(ghes); | |
1383 | } | |
1384 | ||
da095fd3 | 1385 | static int ghes_probe(struct platform_device *ghes_dev) |
d334a491 HY |
1386 | { |
1387 | struct acpi_hest_generic *generic; | |
1388 | struct ghes *ghes = NULL; | |
3b880cbe | 1389 | unsigned long flags; |
44a69f61 | 1390 | |
7ad6e943 | 1391 | int rc = -EINVAL; |
d334a491 | 1392 | |
1dd6b20e | 1393 | generic = *(struct acpi_hest_generic **)ghes_dev->dev.platform_data; |
d334a491 | 1394 | if (!generic->enabled) |
7ad6e943 | 1395 | return -ENODEV; |
d334a491 | 1396 | |
81e88fdc HY |
1397 | switch (generic->notify.type) { |
1398 | case ACPI_HEST_NOTIFY_POLLED: | |
1399 | case ACPI_HEST_NOTIFY_EXTERNAL: | |
1400 | case ACPI_HEST_NOTIFY_SCI: | |
7bf130e4 SJ |
1401 | case ACPI_HEST_NOTIFY_GSIV: |
1402 | case ACPI_HEST_NOTIFY_GPIO: | |
44a69f61 | 1403 | break; |
7bf130e4 | 1404 | |
7edda088 TB |
1405 | case ACPI_HEST_NOTIFY_SEA: |
1406 | if (!IS_ENABLED(CONFIG_ACPI_APEI_SEA)) { | |
1407 | pr_warn(GHES_PFX "Generic hardware error source: %d notified via SEA is not supported\n", | |
1408 | generic->header.source_id); | |
1409 | rc = -ENOTSUPP; | |
1410 | goto err; | |
1411 | } | |
1412 | break; | |
81e88fdc | 1413 | case ACPI_HEST_NOTIFY_NMI: |
44a69f61 TN |
1414 | if (!IS_ENABLED(CONFIG_HAVE_ACPI_APEI_NMI)) { |
1415 | pr_warn(GHES_PFX "Generic hardware error source: %d notified via NMI interrupt is not supported!\n", | |
1416 | generic->header.source_id); | |
1417 | goto err; | |
1418 | } | |
81e88fdc | 1419 | break; |
f9f05395 JM |
1420 | case ACPI_HEST_NOTIFY_SOFTWARE_DELEGATED: |
1421 | if (!IS_ENABLED(CONFIG_ARM_SDE_INTERFACE)) { | |
1422 | pr_warn(GHES_PFX "Generic hardware error source: %d notified via SDE Interface is not supported!\n", | |
1423 | generic->header.source_id); | |
1424 | goto err; | |
1425 | } | |
1426 | break; | |
81e88fdc | 1427 | case ACPI_HEST_NOTIFY_LOCAL: |
933ca4e3 KW |
1428 | pr_warn(GHES_PFX "Generic hardware error source: %d notified via local interrupt is not supported!\n", |
1429 | generic->header.source_id); | |
d334a491 | 1430 | goto err; |
81e88fdc | 1431 | default: |
933ca4e3 KW |
1432 | pr_warn(FW_WARN GHES_PFX "Unknown notification type: %u for generic hardware error source: %d\n", |
1433 | generic->notify.type, generic->header.source_id); | |
81e88fdc | 1434 | goto err; |
d334a491 | 1435 | } |
81e88fdc HY |
1436 | |
1437 | rc = -EIO; | |
1438 | if (generic->error_block_length < | |
0a00fd5e | 1439 | sizeof(struct acpi_hest_generic_status)) { |
933ca4e3 KW |
1440 | pr_warn(FW_BUG GHES_PFX "Invalid error block length: %u for generic hardware error source: %d\n", |
1441 | generic->error_block_length, generic->header.source_id); | |
d334a491 HY |
1442 | goto err; |
1443 | } | |
1444 | ghes = ghes_new(generic); | |
1445 | if (IS_ERR(ghes)) { | |
1446 | rc = PTR_ERR(ghes); | |
1447 | ghes = NULL; | |
1448 | goto err; | |
1449 | } | |
21480547 | 1450 | |
81e88fdc HY |
1451 | switch (generic->notify.type) { |
1452 | case ACPI_HEST_NOTIFY_POLLED: | |
cea79e7e | 1453 | timer_setup(&ghes->timer, ghes_poll_func, 0); |
81e88fdc HY |
1454 | ghes_add_timer(ghes); |
1455 | break; | |
1456 | case ACPI_HEST_NOTIFY_EXTERNAL: | |
1457 | /* External interrupt vector is GSI */ | |
a98d4f64 WY |
1458 | rc = acpi_gsi_to_irq(generic->notify.vector, &ghes->irq); |
1459 | if (rc) { | |
81e88fdc HY |
1460 | pr_err(GHES_PFX "Failed to map GSI to IRQ for generic hardware error source: %d\n", |
1461 | generic->header.source_id); | |
cc7f3f13 | 1462 | goto err; |
81e88fdc | 1463 | } |
bdb9458a LH |
1464 | rc = request_irq(ghes->irq, ghes_irq_func, IRQF_SHARED, |
1465 | "GHES IRQ", ghes); | |
a98d4f64 | 1466 | if (rc) { |
81e88fdc HY |
1467 | pr_err(GHES_PFX "Failed to register IRQ for generic hardware error source: %d\n", |
1468 | generic->header.source_id); | |
cc7f3f13 | 1469 | goto err; |
81e88fdc HY |
1470 | } |
1471 | break; | |
7bf130e4 | 1472 | |
81e88fdc | 1473 | case ACPI_HEST_NOTIFY_SCI: |
7bf130e4 SJ |
1474 | case ACPI_HEST_NOTIFY_GSIV: |
1475 | case ACPI_HEST_NOTIFY_GPIO: | |
7ad6e943 | 1476 | mutex_lock(&ghes_list_mutex); |
7bf130e4 SJ |
1477 | if (list_empty(&ghes_hed)) |
1478 | register_acpi_hed_notifier(&ghes_notifier_hed); | |
1479 | list_add_rcu(&ghes->list, &ghes_hed); | |
7ad6e943 | 1480 | mutex_unlock(&ghes_list_mutex); |
81e88fdc | 1481 | break; |
7bf130e4 | 1482 | |
7edda088 TB |
1483 | case ACPI_HEST_NOTIFY_SEA: |
1484 | ghes_sea_add(ghes); | |
1485 | break; | |
81e88fdc | 1486 | case ACPI_HEST_NOTIFY_NMI: |
44a69f61 | 1487 | ghes_nmi_add(ghes); |
81e88fdc | 1488 | break; |
f9f05395 JM |
1489 | case ACPI_HEST_NOTIFY_SOFTWARE_DELEGATED: |
1490 | rc = apei_sdei_register_ghes(ghes); | |
1491 | if (rc) | |
1492 | goto err; | |
1493 | break; | |
81e88fdc HY |
1494 | default: |
1495 | BUG(); | |
d334a491 | 1496 | } |
cc7f3f13 | 1497 | |
7ad6e943 | 1498 | platform_set_drvdata(ghes_dev, ghes); |
d334a491 | 1499 | |
9057a3f7 JH |
1500 | ghes->dev = &ghes_dev->dev; |
1501 | ||
1502 | mutex_lock(&ghes_devs_mutex); | |
1503 | list_add_tail(&ghes->elist, &ghes_devs); | |
1504 | mutex_unlock(&ghes_devs_mutex); | |
cc7f3f13 | 1505 | |
77b246b3 | 1506 | /* Handle any pending errors right away */ |
3b880cbe | 1507 | spin_lock_irqsave(&ghes_notify_lock_irq, flags); |
77b246b3 | 1508 | ghes_proc(ghes); |
3b880cbe | 1509 | spin_unlock_irqrestore(&ghes_notify_lock_irq, flags); |
77b246b3 | 1510 | |
d334a491 | 1511 | return 0; |
cc7f3f13 | 1512 | |
d334a491 | 1513 | err: |
7ad6e943 | 1514 | if (ghes) { |
d334a491 | 1515 | ghes_fini(ghes); |
7ad6e943 HY |
1516 | kfree(ghes); |
1517 | } | |
d334a491 HY |
1518 | return rc; |
1519 | } | |
1520 | ||
b59bc2fb | 1521 | static int ghes_remove(struct platform_device *ghes_dev) |
d334a491 | 1522 | { |
f9f05395 | 1523 | int rc; |
7ad6e943 HY |
1524 | struct ghes *ghes; |
1525 | struct acpi_hest_generic *generic; | |
d334a491 | 1526 | |
7ad6e943 HY |
1527 | ghes = platform_get_drvdata(ghes_dev); |
1528 | generic = ghes->generic; | |
1529 | ||
81e88fdc | 1530 | ghes->flags |= GHES_EXITING; |
7ad6e943 | 1531 | switch (generic->notify.type) { |
81e88fdc | 1532 | case ACPI_HEST_NOTIFY_POLLED: |
292a089d | 1533 | timer_shutdown_sync(&ghes->timer); |
81e88fdc HY |
1534 | break; |
1535 | case ACPI_HEST_NOTIFY_EXTERNAL: | |
1536 | free_irq(ghes->irq, ghes); | |
1537 | break; | |
7bf130e4 | 1538 | |
7ad6e943 | 1539 | case ACPI_HEST_NOTIFY_SCI: |
7bf130e4 SJ |
1540 | case ACPI_HEST_NOTIFY_GSIV: |
1541 | case ACPI_HEST_NOTIFY_GPIO: | |
7ad6e943 HY |
1542 | mutex_lock(&ghes_list_mutex); |
1543 | list_del_rcu(&ghes->list); | |
7bf130e4 SJ |
1544 | if (list_empty(&ghes_hed)) |
1545 | unregister_acpi_hed_notifier(&ghes_notifier_hed); | |
7ad6e943 | 1546 | mutex_unlock(&ghes_list_mutex); |
7d64f82c | 1547 | synchronize_rcu(); |
7ad6e943 | 1548 | break; |
7bf130e4 | 1549 | |
7edda088 TB |
1550 | case ACPI_HEST_NOTIFY_SEA: |
1551 | ghes_sea_remove(ghes); | |
1552 | break; | |
81e88fdc | 1553 | case ACPI_HEST_NOTIFY_NMI: |
44a69f61 | 1554 | ghes_nmi_remove(ghes); |
81e88fdc | 1555 | break; |
f9f05395 JM |
1556 | case ACPI_HEST_NOTIFY_SOFTWARE_DELEGATED: |
1557 | rc = apei_sdei_unregister_ghes(ghes); | |
1558 | if (rc) | |
1559 | return rc; | |
1560 | break; | |
7ad6e943 HY |
1561 | default: |
1562 | BUG(); | |
1563 | break; | |
1564 | } | |
d334a491 | 1565 | |
7ad6e943 | 1566 | ghes_fini(ghes); |
21480547 | 1567 | |
9057a3f7 JH |
1568 | mutex_lock(&ghes_devs_mutex); |
1569 | list_del(&ghes->elist); | |
1570 | mutex_unlock(&ghes_devs_mutex); | |
21480547 | 1571 | |
7ad6e943 | 1572 | kfree(ghes); |
d334a491 | 1573 | |
7ad6e943 | 1574 | return 0; |
d334a491 HY |
1575 | } |
1576 | ||
7ad6e943 HY |
1577 | static struct platform_driver ghes_platform_driver = { |
1578 | .driver = { | |
1579 | .name = "GHES", | |
7ad6e943 HY |
1580 | }, |
1581 | .probe = ghes_probe, | |
1582 | .remove = ghes_remove, | |
1583 | }; | |
1584 | ||
27e932a3 | 1585 | void __init acpi_ghes_init(void) |
d334a491 | 1586 | { |
81e88fdc HY |
1587 | int rc; |
1588 | ||
dc4e8c07 SX |
1589 | sdei_init(); |
1590 | ||
d334a491 | 1591 | if (acpi_disabled) |
dc4e8c07 | 1592 | return; |
d334a491 | 1593 | |
e931d0da PA |
1594 | switch (hest_disable) { |
1595 | case HEST_NOT_FOUND: | |
dc4e8c07 | 1596 | return; |
e931d0da | 1597 | case HEST_DISABLED: |
d334a491 | 1598 | pr_info(GHES_PFX "HEST is not enabled!\n"); |
dc4e8c07 | 1599 | return; |
e931d0da PA |
1600 | default: |
1601 | break; | |
d334a491 HY |
1602 | } |
1603 | ||
b6a95016 HY |
1604 | if (ghes_disable) { |
1605 | pr_info(GHES_PFX "GHES is not enabled!\n"); | |
dc4e8c07 | 1606 | return; |
b6a95016 HY |
1607 | } |
1608 | ||
44a69f61 | 1609 | ghes_nmi_init_cxt(); |
67eb2e99 | 1610 | |
67eb2e99 HY |
1611 | rc = platform_driver_register(&ghes_platform_driver); |
1612 | if (rc) | |
dc4e8c07 | 1613 | return; |
67eb2e99 | 1614 | |
9fb0bfe1 HY |
1615 | rc = apei_osc_setup(); |
1616 | if (rc == 0 && osc_sb_apei_support_acked) | |
1617 | pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit and WHEA _OSC.\n"); | |
1618 | else if (rc == 0 && !osc_sb_apei_support_acked) | |
1619 | pr_info(GHES_PFX "APEI firmware first mode is enabled by WHEA _OSC.\n"); | |
1620 | else if (rc && osc_sb_apei_support_acked) | |
1621 | pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit.\n"); | |
1622 | else | |
1623 | pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n"); | |
d334a491 | 1624 | } |
8e40612f | 1625 | |
9057a3f7 JH |
1626 | /* |
1627 | * Known x86 systems that prefer GHES error reporting: | |
1628 | */ | |
1629 | static struct acpi_platform_list plat_list[] = { | |
1630 | {"HPE ", "Server ", 0, ACPI_SIG_FADT, all_versions}, | |
1631 | { } /* End */ | |
1632 | }; | |
1633 | ||
1634 | struct list_head *ghes_get_devices(void) | |
1635 | { | |
1636 | int idx = -1; | |
1637 | ||
1638 | if (IS_ENABLED(CONFIG_X86)) { | |
1639 | idx = acpi_match_platform_list(plat_list); | |
1640 | if (idx < 0) { | |
1641 | if (!ghes_edac_force_enable) | |
1642 | return NULL; | |
1643 | ||
1644 | pr_warn_once("Force-loading ghes_edac on an unsupported platform. You're on your own!\n"); | |
1645 | } | |
9368aa18 LY |
1646 | } else if (list_empty(&ghes_devs)) { |
1647 | return NULL; | |
9057a3f7 JH |
1648 | } |
1649 | ||
1650 | return &ghes_devs; | |
1651 | } | |
1652 | EXPORT_SYMBOL_GPL(ghes_get_devices); | |
1653 | ||
8e40612f JH |
1654 | void ghes_register_report_chain(struct notifier_block *nb) |
1655 | { | |
1656 | atomic_notifier_chain_register(&ghes_report_chain, nb); | |
1657 | } | |
1658 | EXPORT_SYMBOL_GPL(ghes_register_report_chain); | |
1659 | ||
1660 | void ghes_unregister_report_chain(struct notifier_block *nb) | |
1661 | { | |
1662 | atomic_notifier_chain_unregister(&ghes_report_chain, nb); | |
1663 | } | |
1664 | EXPORT_SYMBOL_GPL(ghes_unregister_report_chain); |