Commit | Line | Data |
---|---|---|
1802d0be | 1 | // SPDX-License-Identifier: GPL-2.0-only |
d334a491 HY |
2 | /* |
3 | * APEI Generic Hardware Error Source support | |
4 | * | |
5 | * Generic Hardware Error Source provides a way to report platform | |
6 | * hardware errors (such as that from chipset). It works in so called | |
7 | * "Firmware First" mode, that is, hardware errors are reported to | |
8 | * firmware firstly, then reported to Linux by firmware. This way, | |
9 | * some non-standard hardware error registers or non-standard hardware | |
10 | * link can be checked by firmware to produce more hardware error | |
11 | * information for Linux. | |
12 | * | |
13 | * For more information about Generic Hardware Error Source, please | |
14 | * refer to ACPI Specification version 4.0, section 17.3.2.6 | |
15 | * | |
67eb2e99 | 16 | * Copyright 2010,2011 Intel Corp. |
d334a491 | 17 | * Author: Huang Ying <ying.huang@intel.com> |
d334a491 HY |
18 | */ |
19 | ||
f9f05395 | 20 | #include <linux/arm_sdei.h> |
d334a491 | 21 | #include <linux/kernel.h> |
020bf066 | 22 | #include <linux/moduleparam.h> |
d334a491 HY |
23 | #include <linux/init.h> |
24 | #include <linux/acpi.h> | |
25 | #include <linux/io.h> | |
26 | #include <linux/interrupt.h> | |
81e88fdc | 27 | #include <linux/timer.h> |
d334a491 | 28 | #include <linux/cper.h> |
5e4a264b IW |
29 | #include <linux/cleanup.h> |
30 | #include <linux/cxl-event.h> | |
7ad6e943 HY |
31 | #include <linux/platform_device.h> |
32 | #include <linux/mutex.h> | |
32c361f5 | 33 | #include <linux/ratelimit.h> |
81e88fdc | 34 | #include <linux/vmalloc.h> |
67eb2e99 HY |
35 | #include <linux/irq_work.h> |
36 | #include <linux/llist.h> | |
37 | #include <linux/genalloc.h> | |
5e4a264b | 38 | #include <linux/kfifo.h> |
a654e5ee | 39 | #include <linux/pci.h> |
b484079b | 40 | #include <linux/pfn.h> |
a654e5ee | 41 | #include <linux/aer.h> |
44a69f61 | 42 | #include <linux/nmi.h> |
e6017571 | 43 | #include <linux/sched/clock.h> |
297b64c7 TB |
44 | #include <linux/uuid.h> |
45 | #include <linux/ras.h> | |
7f17b4a1 | 46 | #include <linux/task_work.h> |
40e06415 | 47 | |
42aa5604 | 48 | #include <acpi/actbl1.h> |
40e06415 | 49 | #include <acpi/ghes.h> |
9dae3d0d | 50 | #include <acpi/apei.h> |
4f89fa28 | 51 | #include <asm/fixmap.h> |
81e88fdc | 52 | #include <asm/tlbflush.h> |
297b64c7 | 53 | #include <ras/ras_event.h> |
d334a491 HY |
54 | |
55 | #include "apei-internal.h" | |
56 | ||
57 | #define GHES_PFX "GHES: " | |
58 | ||
59 | #define GHES_ESTATUS_MAX_SIZE 65536 | |
67eb2e99 HY |
60 | #define GHES_ESOURCE_PREALLOC_MAX_SIZE 65536 |
61 | ||
62 | #define GHES_ESTATUS_POOL_MIN_ALLOC_ORDER 3 | |
63 | ||
152cef40 HY |
64 | /* This is just an estimation for memory pool allocation */ |
65 | #define GHES_ESTATUS_CACHE_AVG_SIZE 512 | |
66 | ||
67 | #define GHES_ESTATUS_CACHES_SIZE 4 | |
68 | ||
70cb6e1d | 69 | #define GHES_ESTATUS_IN_CACHE_MAX_NSEC 10000000000ULL |
152cef40 HY |
70 | /* Prevent too many caches are allocated because of RCU */ |
71 | #define GHES_ESTATUS_CACHE_ALLOCED_MAX (GHES_ESTATUS_CACHES_SIZE * 3 / 2) | |
72 | ||
73 | #define GHES_ESTATUS_CACHE_LEN(estatus_len) \ | |
74 | (sizeof(struct ghes_estatus_cache) + (estatus_len)) | |
75 | #define GHES_ESTATUS_FROM_CACHE(estatus_cache) \ | |
0a00fd5e | 76 | ((struct acpi_hest_generic_status *) \ |
152cef40 HY |
77 | ((struct ghes_estatus_cache *)(estatus_cache) + 1)) |
78 | ||
67eb2e99 HY |
79 | #define GHES_ESTATUS_NODE_LEN(estatus_len) \ |
80 | (sizeof(struct ghes_estatus_node) + (estatus_len)) | |
88f074f4 | 81 | #define GHES_ESTATUS_FROM_NODE(estatus_node) \ |
0a00fd5e | 82 | ((struct acpi_hest_generic_status *) \ |
67eb2e99 | 83 | ((struct ghes_estatus_node *)(estatus_node) + 1)) |
d334a491 | 84 | |
9aa9cf3e SJ |
85 | #define GHES_VENDOR_ENTRY_LEN(gdata_len) \ |
86 | (sizeof(struct ghes_vendor_record_entry) + (gdata_len)) | |
87 | #define GHES_GDATA_FROM_VENDOR_ENTRY(vendor_entry) \ | |
88 | ((struct acpi_hest_generic_data *) \ | |
89 | ((struct ghes_vendor_record_entry *)(vendor_entry) + 1)) | |
90 | ||
f9f05395 JM |
91 | /* |
92 | * NMI-like notifications vary by architecture, before the compiler can prune | |
93 | * unused static functions it needs a value for these enums. | |
94 | */ | |
95 | #ifndef CONFIG_ARM_SDE_INTERFACE | |
96 | #define FIX_APEI_GHES_SDEI_NORMAL __end_of_fixed_addresses | |
97 | #define FIX_APEI_GHES_SDEI_CRITICAL __end_of_fixed_addresses | |
98 | #endif | |
99 | ||
8e40612f JH |
100 | static ATOMIC_NOTIFIER_HEAD(ghes_report_chain); |
101 | ||
42aa5604 TB |
102 | static inline bool is_hest_type_generic_v2(struct ghes *ghes) |
103 | { | |
104 | return ghes->generic->header.type == ACPI_HEST_TYPE_GENERIC_ERROR_V2; | |
105 | } | |
106 | ||
a70297d2 SX |
107 | /* |
108 | * A platform may describe one error source for the handling of synchronous | |
109 | * errors (e.g. MCE or SEA), or for handling asynchronous errors (e.g. SCI | |
110 | * or External Interrupt). On x86, the HEST notifications are always | |
111 | * asynchronous, so only SEA on ARM is delivered as a synchronous | |
112 | * notification. | |
113 | */ | |
114 | static inline bool is_hest_sync_notify(struct ghes *ghes) | |
115 | { | |
116 | u8 notify_type = ghes->generic->notify.type; | |
117 | ||
118 | return notify_type == ACPI_HEST_NOTIFY_SEA; | |
119 | } | |
120 | ||
020bf066 PG |
121 | /* |
122 | * This driver isn't really modular, however for the time being, | |
123 | * continuing to use module_param is the easiest way to remain | |
124 | * compatible with existing boot arg use cases. | |
125 | */ | |
90ab5ee9 | 126 | bool ghes_disable; |
b6a95016 HY |
127 | module_param_named(disable, ghes_disable, bool, 0); |
128 | ||
9057a3f7 JH |
129 | /* |
130 | * "ghes.edac_force_enable" forcibly enables ghes_edac and skips the platform | |
131 | * check. | |
132 | */ | |
133 | static bool ghes_edac_force_enable; | |
134 | module_param_named(edac_force_enable, ghes_edac_force_enable, bool, 0); | |
135 | ||
d334a491 | 136 | /* |
7bf130e4 SJ |
137 | * All error sources notified with HED (Hardware Error Device) share a |
138 | * single notifier callback, so they need to be linked and checked one | |
139 | * by one. This holds true for NMI too. | |
d334a491 | 140 | * |
81e88fdc HY |
141 | * RCU is used for these lists, so ghes_list_mutex is only used for |
142 | * list changing, not for traversing. | |
d334a491 | 143 | */ |
7bf130e4 | 144 | static LIST_HEAD(ghes_hed); |
7ad6e943 | 145 | static DEFINE_MUTEX(ghes_list_mutex); |
d334a491 | 146 | |
9057a3f7 JH |
147 | /* |
148 | * A list of GHES devices which are given to the corresponding EDAC driver | |
149 | * ghes_edac for further use. | |
150 | */ | |
151 | static LIST_HEAD(ghes_devs); | |
152 | static DEFINE_MUTEX(ghes_devs_mutex); | |
153 | ||
81e88fdc HY |
154 | /* |
155 | * Because the memory area used to transfer hardware error information | |
156 | * from BIOS to Linux can be determined only in NMI, IRQ or timer | |
157 | * handler, but general ioremap can not be used in atomic context, so | |
4f89fa28 | 158 | * the fixmap is used instead. |
520e18a5 | 159 | * |
3b880cbe | 160 | * This spinlock is used to prevent the fixmap entry from being used |
4f89fa28 | 161 | * simultaneously. |
81e88fdc | 162 | */ |
3b880cbe | 163 | static DEFINE_SPINLOCK(ghes_notify_lock_irq); |
81e88fdc | 164 | |
9aa9cf3e SJ |
165 | struct ghes_vendor_record_entry { |
166 | struct work_struct work; | |
167 | int error_severity; | |
168 | char vendor_record[]; | |
169 | }; | |
170 | ||
67eb2e99 | 171 | static struct gen_pool *ghes_estatus_pool; |
67eb2e99 | 172 | |
dd3fa54b | 173 | static struct ghes_estatus_cache __rcu *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE]; |
152cef40 HY |
174 | static atomic_t ghes_estatus_cache_alloced; |
175 | ||
2fb5853e JZZ |
176 | static int ghes_panic_timeout __read_mostly = 30; |
177 | ||
b484079b | 178 | static void __iomem *ghes_map(u64 pfn, enum fixed_addresses fixmap_idx) |
81e88fdc | 179 | { |
7edda088 TB |
180 | phys_addr_t paddr; |
181 | pgprot_t prot; | |
81e88fdc | 182 | |
b484079b | 183 | paddr = PFN_PHYS(pfn); |
7edda088 | 184 | prot = arch_apei_get_mem_attribute(paddr); |
b484079b | 185 | __set_fixmap(fixmap_idx, paddr, prot); |
81e88fdc | 186 | |
b484079b | 187 | return (void __iomem *) __fix_to_virt(fixmap_idx); |
81e88fdc HY |
188 | } |
189 | ||
b484079b | 190 | static void ghes_unmap(void __iomem *vaddr, enum fixed_addresses fixmap_idx) |
81e88fdc | 191 | { |
b484079b | 192 | int _idx = virt_to_fix((unsigned long)vaddr); |
8ece249a | 193 | |
b484079b JM |
194 | WARN_ON_ONCE(fixmap_idx != _idx); |
195 | clear_fixmap(fixmap_idx); | |
81e88fdc HY |
196 | } |
197 | ||
43d27483 | 198 | int ghes_estatus_pool_init(unsigned int num_ghes) |
67eb2e99 | 199 | { |
fb7be08f | 200 | unsigned long addr, len; |
6abc7622 | 201 | int rc; |
fb7be08f | 202 | |
67eb2e99 HY |
203 | ghes_estatus_pool = gen_pool_create(GHES_ESTATUS_POOL_MIN_ALLOC_ORDER, -1); |
204 | if (!ghes_estatus_pool) | |
205 | return -ENOMEM; | |
67eb2e99 | 206 | |
fb7be08f JM |
207 | len = GHES_ESTATUS_CACHE_AVG_SIZE * GHES_ESTATUS_CACHE_ALLOCED_MAX; |
208 | len += (num_ghes * GHES_ESOURCE_PREALLOC_MAX_SIZE); | |
67eb2e99 | 209 | |
0ac234be JM |
210 | addr = (unsigned long)vmalloc(PAGE_ALIGN(len)); |
211 | if (!addr) | |
6abc7622 | 212 | goto err_pool_alloc; |
0ac234be | 213 | |
6abc7622 LZ |
214 | rc = gen_pool_add(ghes_estatus_pool, addr, PAGE_ALIGN(len), -1); |
215 | if (rc) | |
216 | goto err_pool_add; | |
217 | ||
218 | return 0; | |
219 | ||
220 | err_pool_add: | |
221 | vfree((void *)addr); | |
222 | ||
223 | err_pool_alloc: | |
224 | gen_pool_destroy(ghes_estatus_pool); | |
225 | ||
226 | return -ENOMEM; | |
67eb2e99 HY |
227 | } |
228 | ||
e2abc47a SJ |
229 | /** |
230 | * ghes_estatus_pool_region_free - free previously allocated memory | |
231 | * from the ghes_estatus_pool. | |
232 | * @addr: address of memory to free. | |
233 | * @size: size of memory to free. | |
234 | * | |
235 | * Returns none. | |
236 | */ | |
237 | void ghes_estatus_pool_region_free(unsigned long addr, u32 size) | |
238 | { | |
239 | gen_pool_free(ghes_estatus_pool, addr, size); | |
240 | } | |
241 | EXPORT_SYMBOL_GPL(ghes_estatus_pool_region_free); | |
242 | ||
42aa5604 TB |
243 | static int map_gen_v2(struct ghes *ghes) |
244 | { | |
245 | return apei_map_generic_address(&ghes->generic_v2->read_ack_register); | |
246 | } | |
247 | ||
248 | static void unmap_gen_v2(struct ghes *ghes) | |
249 | { | |
250 | apei_unmap_generic_address(&ghes->generic_v2->read_ack_register); | |
251 | } | |
252 | ||
06ddeadc JM |
253 | static void ghes_ack_error(struct acpi_hest_generic_v2 *gv2) |
254 | { | |
255 | int rc; | |
256 | u64 val = 0; | |
257 | ||
258 | rc = apei_read(&val, &gv2->read_ack_register); | |
259 | if (rc) | |
260 | return; | |
261 | ||
262 | val &= gv2->read_ack_preserve << gv2->read_ack_register.bit_offset; | |
263 | val |= gv2->read_ack_write << gv2->read_ack_register.bit_offset; | |
264 | ||
265 | apei_write(val, &gv2->read_ack_register); | |
266 | } | |
267 | ||
d334a491 HY |
268 | static struct ghes *ghes_new(struct acpi_hest_generic *generic) |
269 | { | |
270 | struct ghes *ghes; | |
271 | unsigned int error_block_length; | |
272 | int rc; | |
273 | ||
274 | ghes = kzalloc(sizeof(*ghes), GFP_KERNEL); | |
275 | if (!ghes) | |
276 | return ERR_PTR(-ENOMEM); | |
42aa5604 | 277 | |
d334a491 | 278 | ghes->generic = generic; |
42aa5604 TB |
279 | if (is_hest_type_generic_v2(ghes)) { |
280 | rc = map_gen_v2(ghes); | |
281 | if (rc) | |
282 | goto err_free; | |
283 | } | |
284 | ||
34ddeb03 | 285 | rc = apei_map_generic_address(&generic->error_status_address); |
d334a491 | 286 | if (rc) |
42aa5604 | 287 | goto err_unmap_read_ack_addr; |
d334a491 HY |
288 | error_block_length = generic->error_block_length; |
289 | if (error_block_length > GHES_ESTATUS_MAX_SIZE) { | |
933ca4e3 KW |
290 | pr_warn(FW_WARN GHES_PFX |
291 | "Error status block length is too long: %u for " | |
292 | "generic hardware error source: %d.\n", | |
293 | error_block_length, generic->header.source_id); | |
d334a491 HY |
294 | error_block_length = GHES_ESTATUS_MAX_SIZE; |
295 | } | |
296 | ghes->estatus = kmalloc(error_block_length, GFP_KERNEL); | |
297 | if (!ghes->estatus) { | |
298 | rc = -ENOMEM; | |
42aa5604 | 299 | goto err_unmap_status_addr; |
d334a491 HY |
300 | } |
301 | ||
302 | return ghes; | |
303 | ||
42aa5604 | 304 | err_unmap_status_addr: |
34ddeb03 | 305 | apei_unmap_generic_address(&generic->error_status_address); |
42aa5604 TB |
306 | err_unmap_read_ack_addr: |
307 | if (is_hest_type_generic_v2(ghes)) | |
308 | unmap_gen_v2(ghes); | |
d334a491 HY |
309 | err_free: |
310 | kfree(ghes); | |
311 | return ERR_PTR(rc); | |
312 | } | |
313 | ||
314 | static void ghes_fini(struct ghes *ghes) | |
315 | { | |
316 | kfree(ghes->estatus); | |
34ddeb03 | 317 | apei_unmap_generic_address(&ghes->generic->error_status_address); |
42aa5604 TB |
318 | if (is_hest_type_generic_v2(ghes)) |
319 | unmap_gen_v2(ghes); | |
d334a491 HY |
320 | } |
321 | ||
d334a491 HY |
322 | static inline int ghes_severity(int severity) |
323 | { | |
324 | switch (severity) { | |
ad4ecef2 HY |
325 | case CPER_SEV_INFORMATIONAL: |
326 | return GHES_SEV_NO; | |
327 | case CPER_SEV_CORRECTED: | |
328 | return GHES_SEV_CORRECTED; | |
329 | case CPER_SEV_RECOVERABLE: | |
330 | return GHES_SEV_RECOVERABLE; | |
331 | case CPER_SEV_FATAL: | |
332 | return GHES_SEV_PANIC; | |
d334a491 | 333 | default: |
25985edc | 334 | /* Unknown, go panic */ |
ad4ecef2 | 335 | return GHES_SEV_PANIC; |
d334a491 HY |
336 | } |
337 | } | |
338 | ||
81e88fdc | 339 | static void ghes_copy_tofrom_phys(void *buffer, u64 paddr, u32 len, |
b484079b JM |
340 | int from_phys, |
341 | enum fixed_addresses fixmap_idx) | |
d334a491 | 342 | { |
81e88fdc | 343 | void __iomem *vaddr; |
81e88fdc HY |
344 | u64 offset; |
345 | u32 trunk; | |
346 | ||
347 | while (len > 0) { | |
348 | offset = paddr - (paddr & PAGE_MASK); | |
b484079b | 349 | vaddr = ghes_map(PHYS_PFN(paddr), fixmap_idx); |
81e88fdc HY |
350 | trunk = PAGE_SIZE - offset; |
351 | trunk = min(trunk, len); | |
352 | if (from_phys) | |
353 | memcpy_fromio(buffer, vaddr + offset, trunk); | |
354 | else | |
355 | memcpy_toio(vaddr + offset, buffer, trunk); | |
356 | len -= trunk; | |
357 | paddr += trunk; | |
358 | buffer += trunk; | |
b484079b | 359 | ghes_unmap(vaddr, fixmap_idx); |
81e88fdc | 360 | } |
d334a491 HY |
361 | } |
362 | ||
f2a681b9 JM |
363 | /* Check the top-level record header has an appropriate size. */ |
364 | static int __ghes_check_estatus(struct ghes *ghes, | |
365 | struct acpi_hest_generic_status *estatus) | |
366 | { | |
367 | u32 len = cper_estatus_len(estatus); | |
368 | ||
369 | if (len < sizeof(*estatus)) { | |
370 | pr_warn_ratelimited(FW_WARN GHES_PFX "Truncated error status block!\n"); | |
371 | return -EIO; | |
372 | } | |
373 | ||
374 | if (len > ghes->generic->error_block_length) { | |
375 | pr_warn_ratelimited(FW_WARN GHES_PFX "Invalid error status block length!\n"); | |
376 | return -EIO; | |
377 | } | |
378 | ||
379 | if (cper_estatus_check_header(estatus)) { | |
380 | pr_warn_ratelimited(FW_WARN GHES_PFX "Invalid CPER header!\n"); | |
381 | return -EIO; | |
382 | } | |
383 | ||
384 | return 0; | |
385 | } | |
386 | ||
e00a6e33 JM |
387 | /* Read the CPER block, returning its address, and header in estatus. */ |
388 | static int __ghes_peek_estatus(struct ghes *ghes, | |
389 | struct acpi_hest_generic_status *estatus, | |
390 | u64 *buf_paddr, enum fixed_addresses fixmap_idx) | |
d334a491 HY |
391 | { |
392 | struct acpi_hest_generic *g = ghes->generic; | |
d334a491 HY |
393 | int rc; |
394 | ||
eeb25557 | 395 | rc = apei_read(buf_paddr, &g->error_status_address); |
d334a491 | 396 | if (rc) { |
eeb25557 | 397 | *buf_paddr = 0; |
93066e9a | 398 | pr_warn_ratelimited(FW_WARN GHES_PFX |
d334a491 HY |
399 | "Failed to read error status block address for hardware error source: %d.\n", |
400 | g->header.source_id); | |
401 | return -EIO; | |
402 | } | |
eeb25557 | 403 | if (!*buf_paddr) |
d334a491 HY |
404 | return -ENOENT; |
405 | ||
f2a7e059 JM |
406 | ghes_copy_tofrom_phys(estatus, *buf_paddr, sizeof(*estatus), 1, |
407 | fixmap_idx); | |
408 | if (!estatus->block_status) { | |
eeb25557 | 409 | *buf_paddr = 0; |
d334a491 | 410 | return -ENOENT; |
eeb25557 | 411 | } |
d334a491 | 412 | |
371b8689 | 413 | return 0; |
e00a6e33 | 414 | } |
f2a681b9 | 415 | |
e00a6e33 JM |
416 | static int __ghes_read_estatus(struct acpi_hest_generic_status *estatus, |
417 | u64 buf_paddr, enum fixed_addresses fixmap_idx, | |
418 | size_t buf_len) | |
419 | { | |
420 | ghes_copy_tofrom_phys(estatus, buf_paddr, buf_len, 1, fixmap_idx); | |
f2a681b9 | 421 | if (cper_estatus_check(estatus)) { |
93066e9a JM |
422 | pr_warn_ratelimited(FW_WARN GHES_PFX |
423 | "Failed to read error status block!\n"); | |
f2a681b9 JM |
424 | return -EIO; |
425 | } | |
eeb25557 | 426 | |
f2a681b9 | 427 | return 0; |
d334a491 HY |
428 | } |
429 | ||
e00a6e33 JM |
430 | static int ghes_read_estatus(struct ghes *ghes, |
431 | struct acpi_hest_generic_status *estatus, | |
432 | u64 *buf_paddr, enum fixed_addresses fixmap_idx) | |
433 | { | |
434 | int rc; | |
435 | ||
436 | rc = __ghes_peek_estatus(ghes, estatus, buf_paddr, fixmap_idx); | |
437 | if (rc) | |
438 | return rc; | |
439 | ||
440 | rc = __ghes_check_estatus(ghes, estatus); | |
441 | if (rc) | |
442 | return rc; | |
443 | ||
444 | return __ghes_read_estatus(estatus, *buf_paddr, fixmap_idx, | |
445 | cper_estatus_len(estatus)); | |
446 | } | |
447 | ||
f2a7e059 JM |
448 | static void ghes_clear_estatus(struct ghes *ghes, |
449 | struct acpi_hest_generic_status *estatus, | |
450 | u64 buf_paddr, enum fixed_addresses fixmap_idx) | |
d334a491 | 451 | { |
f2a7e059 | 452 | estatus->block_status = 0; |
eeb25557 JM |
453 | |
454 | if (!buf_paddr) | |
455 | return; | |
456 | ||
f2a7e059 JM |
457 | ghes_copy_tofrom_phys(estatus, buf_paddr, |
458 | sizeof(estatus->block_status), 0, | |
b484079b | 459 | fixmap_idx); |
06ddeadc JM |
460 | |
461 | /* | |
462 | * GHESv2 type HEST entries introduce support for error acknowledgment, | |
463 | * so only acknowledge the error if this support is present. | |
464 | */ | |
465 | if (is_hest_type_generic_v2(ghes)) | |
466 | ghes_ack_error(ghes->generic_v2); | |
d334a491 HY |
467 | } |
468 | ||
7f17b4a1 JM |
469 | /* |
470 | * Called as task_work before returning to user-space. | |
471 | * Ensure any queued work has been done before we return to the context that | |
472 | * triggered the notification. | |
473 | */ | |
474 | static void ghes_kick_task_work(struct callback_head *head) | |
475 | { | |
476 | struct acpi_hest_generic_status *estatus; | |
477 | struct ghes_estatus_node *estatus_node; | |
478 | u32 node_len; | |
479 | ||
480 | estatus_node = container_of(head, struct ghes_estatus_node, task_work); | |
481 | if (IS_ENABLED(CONFIG_ACPI_APEI_MEMORY_FAILURE)) | |
482 | memory_failure_queue_kick(estatus_node->task_work_cpu); | |
483 | ||
484 | estatus = GHES_ESTATUS_FROM_NODE(estatus_node); | |
485 | node_len = GHES_ESTATUS_NODE_LEN(cper_estatus_len(estatus)); | |
486 | gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node, node_len); | |
487 | } | |
488 | ||
ccb5ecdc | 489 | static bool ghes_do_memory_failure(u64 physical_addr, int flags) |
cf870c70 | 490 | { |
cf870c70 | 491 | unsigned long pfn; |
cf870c70 | 492 | |
7f17b4a1 JM |
493 | if (!IS_ENABLED(CONFIG_ACPI_APEI_MEMORY_FAILURE)) |
494 | return false; | |
495 | ||
ccb5ecdc | 496 | pfn = PHYS_PFN(physical_addr); |
3ad6fd77 | 497 | if (!pfn_valid(pfn) && !arch_is_platform_page(physical_addr)) { |
ca104edc CG |
498 | pr_warn_ratelimited(FW_WARN GHES_PFX |
499 | "Invalid address in generic error data: %#llx\n", | |
ccb5ecdc | 500 | physical_addr); |
7f17b4a1 | 501 | return false; |
cf870c70 | 502 | } |
ca104edc | 503 | |
ccb5ecdc XT |
504 | memory_failure_queue(pfn, flags); |
505 | return true; | |
506 | } | |
507 | ||
508 | static bool ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, | |
a70297d2 | 509 | int sev, bool sync) |
ccb5ecdc XT |
510 | { |
511 | int flags = -1; | |
512 | int sec_sev = ghes_severity(gdata->error_severity); | |
513 | struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata); | |
514 | ||
515 | if (!(mem_err->validation_bits & CPER_MEM_VALID_PA)) | |
516 | return false; | |
517 | ||
ca104edc CG |
518 | /* iff following two events can be handled properly by now */ |
519 | if (sec_sev == GHES_SEV_CORRECTED && | |
520 | (gdata->flags & CPER_SEC_ERROR_THRESHOLD_EXCEEDED)) | |
521 | flags = MF_SOFT_OFFLINE; | |
522 | if (sev == GHES_SEV_RECOVERABLE && sec_sev == GHES_SEV_RECOVERABLE) | |
a70297d2 | 523 | flags = sync ? MF_ACTION_REQUIRED : 0; |
ca104edc | 524 | |
ccb5ecdc XT |
525 | if (flags != -1) |
526 | return ghes_do_memory_failure(mem_err->physical_addr, flags); | |
7f17b4a1 JM |
527 | |
528 | return false; | |
cf870c70 NR |
529 | } |
530 | ||
a70297d2 SX |
531 | static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata, |
532 | int sev, bool sync) | |
ccb5ecdc XT |
533 | { |
534 | struct cper_sec_proc_arm *err = acpi_hest_get_payload(gdata); | |
a70297d2 | 535 | int flags = sync ? MF_ACTION_REQUIRED : 0; |
ccb5ecdc XT |
536 | bool queued = false; |
537 | int sec_sev, i; | |
538 | char *p; | |
539 | ||
540 | log_arm_hw_error(err); | |
541 | ||
542 | sec_sev = ghes_severity(gdata->error_severity); | |
543 | if (sev != GHES_SEV_RECOVERABLE || sec_sev != GHES_SEV_RECOVERABLE) | |
544 | return false; | |
545 | ||
546 | p = (char *)(err + 1); | |
547 | for (i = 0; i < err->err_info_num; i++) { | |
548 | struct cper_arm_err_info *err_info = (struct cper_arm_err_info *)p; | |
549 | bool is_cache = (err_info->type == CPER_ARM_CACHE_ERROR); | |
550 | bool has_pa = (err_info->validation_bits & CPER_ARM_INFO_VALID_PHYSICAL_ADDR); | |
551 | const char *error_type = "unknown error"; | |
552 | ||
553 | /* | |
554 | * The field (err_info->error_info & BIT(26)) is fixed to set to | |
555 | * 1 in some old firmware of HiSilicon Kunpeng920. We assume that | |
556 | * firmware won't mix corrected errors in an uncorrected section, | |
557 | * and don't filter out 'corrected' error here. | |
558 | */ | |
559 | if (is_cache && has_pa) { | |
a70297d2 | 560 | queued = ghes_do_memory_failure(err_info->physical_fault_addr, flags); |
ccb5ecdc XT |
561 | p += err_info->length; |
562 | continue; | |
563 | } | |
564 | ||
565 | if (err_info->type < ARRAY_SIZE(cper_proc_error_type_strs)) | |
566 | error_type = cper_proc_error_type_strs[err_info->type]; | |
567 | ||
568 | pr_warn_ratelimited(FW_WARN GHES_PFX | |
569 | "Unhandled processor error type: %s\n", | |
570 | error_type); | |
571 | p += err_info->length; | |
572 | } | |
573 | ||
574 | return queued; | |
575 | } | |
576 | ||
9852ce9a TB |
577 | /* |
578 | * PCIe AER errors need to be sent to the AER driver for reporting and | |
579 | * recovery. The GHES severities map to the following AER severities and | |
580 | * require the following handling: | |
581 | * | |
582 | * GHES_SEV_CORRECTABLE -> AER_CORRECTABLE | |
583 | * These need to be reported by the AER driver but no recovery is | |
584 | * necessary. | |
585 | * GHES_SEV_RECOVERABLE -> AER_NONFATAL | |
586 | * GHES_SEV_RECOVERABLE && CPER_SEC_RESET -> AER_FATAL | |
587 | * These both need to be reported and recovered from by the AER driver. | |
588 | * GHES_SEV_PANIC does not make it to this handling since the kernel must | |
589 | * panic. | |
590 | */ | |
591 | static void ghes_handle_aer(struct acpi_hest_generic_data *gdata) | |
3c5b977f TB |
592 | { |
593 | #ifdef CONFIG_ACPI_APEI_PCIEAER | |
594 | struct cper_sec_pcie *pcie_err = acpi_hest_get_payload(gdata); | |
595 | ||
9852ce9a | 596 | if (pcie_err->validation_bits & CPER_PCIE_VALID_DEVICE_ID && |
3c5b977f TB |
597 | pcie_err->validation_bits & CPER_PCIE_VALID_AER_INFO) { |
598 | unsigned int devfn; | |
599 | int aer_severity; | |
e2abc47a | 600 | u8 *aer_info; |
3c5b977f TB |
601 | |
602 | devfn = PCI_DEVFN(pcie_err->device_id.device, | |
603 | pcie_err->device_id.function); | |
604 | aer_severity = cper_severity_to_aer(gdata->error_severity); | |
605 | ||
606 | /* | |
607 | * If firmware reset the component to contain | |
608 | * the error, we must reinitialize it before | |
609 | * use, so treat it as a fatal AER error. | |
610 | */ | |
611 | if (gdata->flags & CPER_SEC_RESET) | |
612 | aer_severity = AER_FATAL; | |
613 | ||
e2abc47a SJ |
614 | aer_info = (void *)gen_pool_alloc(ghes_estatus_pool, |
615 | sizeof(struct aer_capability_regs)); | |
616 | if (!aer_info) | |
617 | return; | |
618 | memcpy(aer_info, pcie_err->aer_info, sizeof(struct aer_capability_regs)); | |
619 | ||
3c5b977f TB |
620 | aer_recover_queue(pcie_err->device_id.segment, |
621 | pcie_err->device_id.bus, | |
622 | devfn, aer_severity, | |
623 | (struct aer_capability_regs *) | |
e2abc47a | 624 | aer_info); |
3c5b977f TB |
625 | } |
626 | #endif | |
627 | } | |
628 | ||
9aa9cf3e SJ |
629 | static BLOCKING_NOTIFIER_HEAD(vendor_record_notify_list); |
630 | ||
631 | int ghes_register_vendor_record_notifier(struct notifier_block *nb) | |
632 | { | |
633 | return blocking_notifier_chain_register(&vendor_record_notify_list, nb); | |
634 | } | |
635 | EXPORT_SYMBOL_GPL(ghes_register_vendor_record_notifier); | |
636 | ||
637 | void ghes_unregister_vendor_record_notifier(struct notifier_block *nb) | |
638 | { | |
639 | blocking_notifier_chain_unregister(&vendor_record_notify_list, nb); | |
640 | } | |
641 | EXPORT_SYMBOL_GPL(ghes_unregister_vendor_record_notifier); | |
642 | ||
643 | static void ghes_vendor_record_work_func(struct work_struct *work) | |
644 | { | |
645 | struct ghes_vendor_record_entry *entry; | |
646 | struct acpi_hest_generic_data *gdata; | |
647 | u32 len; | |
648 | ||
649 | entry = container_of(work, struct ghes_vendor_record_entry, work); | |
650 | gdata = GHES_GDATA_FROM_VENDOR_ENTRY(entry); | |
651 | ||
652 | blocking_notifier_call_chain(&vendor_record_notify_list, | |
653 | entry->error_severity, gdata); | |
654 | ||
655 | len = GHES_VENDOR_ENTRY_LEN(acpi_hest_get_record_size(gdata)); | |
656 | gen_pool_free(ghes_estatus_pool, (unsigned long)entry, len); | |
657 | } | |
658 | ||
659 | static void ghes_defer_non_standard_event(struct acpi_hest_generic_data *gdata, | |
660 | int sev) | |
661 | { | |
662 | struct acpi_hest_generic_data *copied_gdata; | |
663 | struct ghes_vendor_record_entry *entry; | |
664 | u32 len; | |
665 | ||
666 | len = GHES_VENDOR_ENTRY_LEN(acpi_hest_get_record_size(gdata)); | |
667 | entry = (void *)gen_pool_alloc(ghes_estatus_pool, len); | |
668 | if (!entry) | |
669 | return; | |
670 | ||
671 | copied_gdata = GHES_GDATA_FROM_VENDOR_ENTRY(entry); | |
672 | memcpy(copied_gdata, gdata, acpi_hest_get_record_size(gdata)); | |
673 | entry->error_severity = sev; | |
674 | ||
675 | INIT_WORK(&entry->work, ghes_vendor_record_work_func); | |
676 | schedule_work(&entry->work); | |
677 | } | |
678 | ||
5e4a264b IW |
679 | /* Room for 8 entries for each of the 4 event log queues */ |
680 | #define CXL_CPER_FIFO_DEPTH 32 | |
681 | DEFINE_KFIFO(cxl_cper_fifo, struct cxl_cper_work_data, CXL_CPER_FIFO_DEPTH); | |
682 | ||
683 | /* Synchronize schedule_work() with cxl_cper_work changes */ | |
684 | static DEFINE_SPINLOCK(cxl_cper_work_lock); | |
685 | struct work_struct *cxl_cper_work; | |
686 | ||
687 | static void cxl_cper_post_event(enum cxl_event_type event_type, | |
688 | struct cxl_cper_event_rec *rec) | |
689 | { | |
690 | struct cxl_cper_work_data wd; | |
691 | ||
692 | if (rec->hdr.length <= sizeof(rec->hdr) || | |
693 | rec->hdr.length > sizeof(*rec)) { | |
694 | pr_err(FW_WARN "CXL CPER Invalid section length (%u)\n", | |
695 | rec->hdr.length); | |
696 | return; | |
697 | } | |
698 | ||
699 | if (!(rec->hdr.validation_bits & CPER_CXL_COMP_EVENT_LOG_VALID)) { | |
700 | pr_err(FW_WARN "CXL CPER invalid event\n"); | |
701 | return; | |
702 | } | |
703 | ||
704 | guard(spinlock_irqsave)(&cxl_cper_work_lock); | |
705 | ||
706 | if (!cxl_cper_work) | |
707 | return; | |
708 | ||
709 | wd.event_type = event_type; | |
710 | memcpy(&wd.rec, rec, sizeof(wd.rec)); | |
711 | ||
712 | if (!kfifo_put(&cxl_cper_fifo, wd)) { | |
713 | pr_err_ratelimited("CXL CPER kfifo overflow\n"); | |
714 | return; | |
715 | } | |
716 | ||
717 | schedule_work(cxl_cper_work); | |
718 | } | |
719 | ||
720 | int cxl_cper_register_work(struct work_struct *work) | |
721 | { | |
722 | if (cxl_cper_work) | |
723 | return -EINVAL; | |
724 | ||
725 | guard(spinlock)(&cxl_cper_work_lock); | |
726 | cxl_cper_work = work; | |
727 | return 0; | |
728 | } | |
729 | EXPORT_SYMBOL_NS_GPL(cxl_cper_register_work, CXL); | |
730 | ||
731 | int cxl_cper_unregister_work(struct work_struct *work) | |
732 | { | |
733 | if (cxl_cper_work != work) | |
734 | return -EINVAL; | |
735 | ||
736 | guard(spinlock)(&cxl_cper_work_lock); | |
737 | cxl_cper_work = NULL; | |
738 | return 0; | |
739 | } | |
740 | EXPORT_SYMBOL_NS_GPL(cxl_cper_unregister_work, CXL); | |
741 | ||
742 | int cxl_cper_kfifo_get(struct cxl_cper_work_data *wd) | |
743 | { | |
744 | return kfifo_get(&cxl_cper_fifo, wd); | |
745 | } | |
746 | EXPORT_SYMBOL_NS_GPL(cxl_cper_kfifo_get, CXL); | |
747 | ||
7f17b4a1 | 748 | static bool ghes_do_proc(struct ghes *ghes, |
0a00fd5e | 749 | const struct acpi_hest_generic_status *estatus) |
d334a491 | 750 | { |
ba61ca4a | 751 | int sev, sec_sev; |
0a00fd5e | 752 | struct acpi_hest_generic_data *gdata; |
5b53696a | 753 | guid_t *sec_type; |
bb100b64 | 754 | const guid_t *fru_id = &guid_null; |
297b64c7 | 755 | char *fru_text = ""; |
7f17b4a1 | 756 | bool queued = false; |
a70297d2 | 757 | bool sync = is_hest_sync_notify(ghes); |
d334a491 | 758 | |
67eb2e99 HY |
759 | sev = ghes_severity(estatus->error_severity); |
760 | apei_estatus_for_each_section(estatus, gdata) { | |
5b53696a | 761 | sec_type = (guid_t *)gdata->section_type; |
ba61ca4a | 762 | sec_sev = ghes_severity(gdata->error_severity); |
297b64c7 TB |
763 | if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID) |
764 | fru_id = (guid_t *)gdata->fru_id; | |
765 | ||
766 | if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT) | |
767 | fru_text = gdata->fru_text; | |
768 | ||
5b53696a | 769 | if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) { |
bbcc2e7b TB |
770 | struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata); |
771 | ||
8e40612f | 772 | atomic_notifier_call_chain(&ghes_report_chain, sev, mem_err); |
21480547 | 773 | |
9dae3d0d | 774 | arch_apei_report_mem_error(sev, mem_err); |
a70297d2 | 775 | queued = ghes_handle_memory_failure(gdata, sev, sync); |
ba61ca4a | 776 | } |
5b53696a | 777 | else if (guid_equal(sec_type, &CPER_SEC_PCIE)) { |
9852ce9a | 778 | ghes_handle_aer(gdata); |
a654e5ee | 779 | } |
e9279e83 | 780 | else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) { |
a70297d2 | 781 | queued = ghes_handle_arm_hw_error(gdata, sev, sync); |
5e4a264b IW |
782 | } else if (guid_equal(sec_type, &CPER_SEC_CXL_GEN_MEDIA_GUID)) { |
783 | struct cxl_cper_event_rec *rec = acpi_hest_get_payload(gdata); | |
784 | ||
785 | cxl_cper_post_event(CXL_CPER_EVENT_GEN_MEDIA, rec); | |
786 | } else if (guid_equal(sec_type, &CPER_SEC_CXL_DRAM_GUID)) { | |
787 | struct cxl_cper_event_rec *rec = acpi_hest_get_payload(gdata); | |
788 | ||
789 | cxl_cper_post_event(CXL_CPER_EVENT_DRAM, rec); | |
790 | } else if (guid_equal(sec_type, &CPER_SEC_CXL_MEM_MODULE_GUID)) { | |
791 | struct cxl_cper_event_rec *rec = acpi_hest_get_payload(gdata); | |
792 | ||
793 | cxl_cper_post_event(CXL_CPER_EVENT_MEM_MODULE, rec); | |
e9279e83 | 794 | } else { |
297b64c7 TB |
795 | void *err = acpi_hest_get_payload(gdata); |
796 | ||
9aa9cf3e | 797 | ghes_defer_non_standard_event(gdata, sev); |
297b64c7 TB |
798 | log_non_standard_event(sec_type, fru_id, fru_text, |
799 | sec_sev, err, | |
800 | gdata->error_data_length); | |
801 | } | |
d334a491 | 802 | } |
7f17b4a1 JM |
803 | |
804 | return queued; | |
32c361f5 | 805 | } |
d334a491 | 806 | |
67eb2e99 HY |
807 | static void __ghes_print_estatus(const char *pfx, |
808 | const struct acpi_hest_generic *generic, | |
0a00fd5e | 809 | const struct acpi_hest_generic_status *estatus) |
32c361f5 | 810 | { |
5ba82ab5 HY |
811 | static atomic_t seqno; |
812 | unsigned int curr_seqno; | |
813 | char pfx_seq[64]; | |
814 | ||
32c361f5 | 815 | if (pfx == NULL) { |
67eb2e99 | 816 | if (ghes_severity(estatus->error_severity) <= |
32c361f5 | 817 | GHES_SEV_CORRECTED) |
5ba82ab5 | 818 | pfx = KERN_WARNING; |
32c361f5 | 819 | else |
5ba82ab5 | 820 | pfx = KERN_ERR; |
32c361f5 | 821 | } |
5ba82ab5 HY |
822 | curr_seqno = atomic_inc_return(&seqno); |
823 | snprintf(pfx_seq, sizeof(pfx_seq), "%s{%u}" HW_ERR, pfx, curr_seqno); | |
5588340d | 824 | printk("%s""Hardware error from APEI Generic Hardware Error Source: %d\n", |
5ba82ab5 | 825 | pfx_seq, generic->header.source_id); |
88f074f4 | 826 | cper_estatus_print(pfx_seq, estatus); |
5588340d HY |
827 | } |
828 | ||
152cef40 HY |
829 | static int ghes_print_estatus(const char *pfx, |
830 | const struct acpi_hest_generic *generic, | |
0a00fd5e | 831 | const struct acpi_hest_generic_status *estatus) |
5588340d HY |
832 | { |
833 | /* Not more than 2 messages every 5 seconds */ | |
67eb2e99 HY |
834 | static DEFINE_RATELIMIT_STATE(ratelimit_corrected, 5*HZ, 2); |
835 | static DEFINE_RATELIMIT_STATE(ratelimit_uncorrected, 5*HZ, 2); | |
836 | struct ratelimit_state *ratelimit; | |
5588340d | 837 | |
67eb2e99 HY |
838 | if (ghes_severity(estatus->error_severity) <= GHES_SEV_CORRECTED) |
839 | ratelimit = &ratelimit_corrected; | |
840 | else | |
841 | ratelimit = &ratelimit_uncorrected; | |
152cef40 | 842 | if (__ratelimit(ratelimit)) { |
67eb2e99 | 843 | __ghes_print_estatus(pfx, generic, estatus); |
152cef40 HY |
844 | return 1; |
845 | } | |
846 | return 0; | |
847 | } | |
848 | ||
849 | /* | |
850 | * GHES error status reporting throttle, to report more kinds of | |
851 | * errors, instead of just most frequently occurred errors. | |
852 | */ | |
0a00fd5e | 853 | static int ghes_estatus_cached(struct acpi_hest_generic_status *estatus) |
152cef40 HY |
854 | { |
855 | u32 len; | |
856 | int i, cached = 0; | |
857 | unsigned long long now; | |
858 | struct ghes_estatus_cache *cache; | |
0a00fd5e | 859 | struct acpi_hest_generic_status *cache_estatus; |
152cef40 | 860 | |
88f074f4 | 861 | len = cper_estatus_len(estatus); |
152cef40 HY |
862 | rcu_read_lock(); |
863 | for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) { | |
864 | cache = rcu_dereference(ghes_estatus_caches[i]); | |
865 | if (cache == NULL) | |
866 | continue; | |
867 | if (len != cache->estatus_len) | |
868 | continue; | |
869 | cache_estatus = GHES_ESTATUS_FROM_CACHE(cache); | |
870 | if (memcmp(estatus, cache_estatus, len)) | |
871 | continue; | |
872 | atomic_inc(&cache->count); | |
873 | now = sched_clock(); | |
874 | if (now - cache->time_in < GHES_ESTATUS_IN_CACHE_MAX_NSEC) | |
875 | cached = 1; | |
876 | break; | |
877 | } | |
878 | rcu_read_unlock(); | |
879 | return cached; | |
880 | } | |
881 | ||
882 | static struct ghes_estatus_cache *ghes_estatus_cache_alloc( | |
883 | struct acpi_hest_generic *generic, | |
0a00fd5e | 884 | struct acpi_hest_generic_status *estatus) |
152cef40 HY |
885 | { |
886 | int alloced; | |
887 | u32 len, cache_len; | |
888 | struct ghes_estatus_cache *cache; | |
0a00fd5e | 889 | struct acpi_hest_generic_status *cache_estatus; |
152cef40 HY |
890 | |
891 | alloced = atomic_add_return(1, &ghes_estatus_cache_alloced); | |
892 | if (alloced > GHES_ESTATUS_CACHE_ALLOCED_MAX) { | |
893 | atomic_dec(&ghes_estatus_cache_alloced); | |
894 | return NULL; | |
895 | } | |
88f074f4 | 896 | len = cper_estatus_len(estatus); |
152cef40 HY |
897 | cache_len = GHES_ESTATUS_CACHE_LEN(len); |
898 | cache = (void *)gen_pool_alloc(ghes_estatus_pool, cache_len); | |
899 | if (!cache) { | |
900 | atomic_dec(&ghes_estatus_cache_alloced); | |
901 | return NULL; | |
902 | } | |
903 | cache_estatus = GHES_ESTATUS_FROM_CACHE(cache); | |
904 | memcpy(cache_estatus, estatus, len); | |
905 | cache->estatus_len = len; | |
906 | atomic_set(&cache->count, 0); | |
907 | cache->generic = generic; | |
908 | cache->time_in = sched_clock(); | |
909 | return cache; | |
910 | } | |
911 | ||
dd3fa54b | 912 | static void ghes_estatus_cache_rcu_free(struct rcu_head *head) |
152cef40 | 913 | { |
dd3fa54b | 914 | struct ghes_estatus_cache *cache; |
152cef40 HY |
915 | u32 len; |
916 | ||
dd3fa54b | 917 | cache = container_of(head, struct ghes_estatus_cache, rcu); |
88f074f4 | 918 | len = cper_estatus_len(GHES_ESTATUS_FROM_CACHE(cache)); |
152cef40 HY |
919 | len = GHES_ESTATUS_CACHE_LEN(len); |
920 | gen_pool_free(ghes_estatus_pool, (unsigned long)cache, len); | |
921 | atomic_dec(&ghes_estatus_cache_alloced); | |
922 | } | |
923 | ||
dd3fa54b AB |
924 | static void |
925 | ghes_estatus_cache_add(struct acpi_hest_generic *generic, | |
926 | struct acpi_hest_generic_status *estatus) | |
152cef40 | 927 | { |
152cef40 | 928 | unsigned long long now, duration, period, max_period = 0; |
dd3fa54b AB |
929 | struct ghes_estatus_cache *cache, *new_cache; |
930 | struct ghes_estatus_cache __rcu *victim; | |
931 | int i, slot = -1, count; | |
152cef40 HY |
932 | |
933 | new_cache = ghes_estatus_cache_alloc(generic, estatus); | |
dd3fa54b | 934 | if (!new_cache) |
152cef40 | 935 | return; |
dd3fa54b | 936 | |
152cef40 HY |
937 | rcu_read_lock(); |
938 | now = sched_clock(); | |
939 | for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) { | |
940 | cache = rcu_dereference(ghes_estatus_caches[i]); | |
941 | if (cache == NULL) { | |
942 | slot = i; | |
152cef40 HY |
943 | break; |
944 | } | |
945 | duration = now - cache->time_in; | |
946 | if (duration >= GHES_ESTATUS_IN_CACHE_MAX_NSEC) { | |
947 | slot = i; | |
152cef40 HY |
948 | break; |
949 | } | |
950 | count = atomic_read(&cache->count); | |
70cb6e1d LB |
951 | period = duration; |
952 | do_div(period, (count + 1)); | |
152cef40 HY |
953 | if (period > max_period) { |
954 | max_period = period; | |
955 | slot = i; | |
152cef40 HY |
956 | } |
957 | } | |
152cef40 | 958 | rcu_read_unlock(); |
dd3fa54b AB |
959 | |
960 | if (slot != -1) { | |
961 | /* | |
962 | * Use release semantics to ensure that ghes_estatus_cached() | |
963 | * running on another CPU will see the updated cache fields if | |
964 | * it can see the new value of the pointer. | |
965 | */ | |
966 | victim = xchg_release(&ghes_estatus_caches[slot], | |
967 | RCU_INITIALIZER(new_cache)); | |
968 | ||
969 | /* | |
970 | * At this point, victim may point to a cached item different | |
971 | * from the one based on which we selected the slot. Instead of | |
972 | * going to the loop again to pick another slot, let's just | |
973 | * drop the other item anyway: this may cause a false cache | |
974 | * miss later on, but that won't cause any problems. | |
975 | */ | |
976 | if (victim) | |
977 | call_rcu(&unrcu_pointer(victim)->rcu, | |
978 | ghes_estatus_cache_rcu_free); | |
979 | } | |
d334a491 HY |
980 | } |
981 | ||
f2a7e059 JM |
982 | static void __ghes_panic(struct ghes *ghes, |
983 | struct acpi_hest_generic_status *estatus, | |
984 | u64 buf_paddr, enum fixed_addresses fixmap_idx) | |
2fb5853e | 985 | { |
f2a7e059 | 986 | __ghes_print_estatus(KERN_EMERG, ghes->generic, estatus); |
2fb5853e | 987 | |
f2a7e059 | 988 | ghes_clear_estatus(ghes, estatus, buf_paddr, fixmap_idx); |
98cff8b2 | 989 | |
2fb5853e JZZ |
990 | /* reboot to log the error! */ |
991 | if (!panic_timeout) | |
992 | panic_timeout = ghes_panic_timeout; | |
993 | panic("Fatal hardware error!"); | |
994 | } | |
995 | ||
d334a491 HY |
996 | static int ghes_proc(struct ghes *ghes) |
997 | { | |
f2a7e059 | 998 | struct acpi_hest_generic_status *estatus = ghes->estatus; |
eeb25557 | 999 | u64 buf_paddr; |
d334a491 HY |
1000 | int rc; |
1001 | ||
f2a7e059 | 1002 | rc = ghes_read_estatus(ghes, estatus, &buf_paddr, FIX_APEI_GHES_IRQ); |
d334a491 HY |
1003 | if (rc) |
1004 | goto out; | |
2fb5853e | 1005 | |
f2a7e059 JM |
1006 | if (ghes_severity(estatus->error_severity) >= GHES_SEV_PANIC) |
1007 | __ghes_panic(ghes, estatus, buf_paddr, FIX_APEI_GHES_IRQ); | |
2fb5853e | 1008 | |
f2a7e059 JM |
1009 | if (!ghes_estatus_cached(estatus)) { |
1010 | if (ghes_print_estatus(NULL, ghes->generic, estatus)) | |
1011 | ghes_estatus_cache_add(ghes->generic, estatus); | |
152cef40 | 1012 | } |
f2a7e059 | 1013 | ghes_do_proc(ghes, estatus); |
42aa5604 | 1014 | |
aaf2c2fb | 1015 | out: |
f2a7e059 | 1016 | ghes_clear_estatus(ghes, estatus, buf_paddr, FIX_APEI_GHES_IRQ); |
aaf2c2fb | 1017 | |
806487a8 | 1018 | return rc; |
d334a491 HY |
1019 | } |
1020 | ||
81e88fdc HY |
1021 | static void ghes_add_timer(struct ghes *ghes) |
1022 | { | |
1023 | struct acpi_hest_generic *g = ghes->generic; | |
1024 | unsigned long expire; | |
1025 | ||
1026 | if (!g->notify.poll_interval) { | |
933ca4e3 KW |
1027 | pr_warn(FW_WARN GHES_PFX "Poll interval is 0 for generic hardware error source: %d, disabled.\n", |
1028 | g->header.source_id); | |
81e88fdc HY |
1029 | return; |
1030 | } | |
1031 | expire = jiffies + msecs_to_jiffies(g->notify.poll_interval); | |
1032 | ghes->timer.expires = round_jiffies_relative(expire); | |
1033 | add_timer(&ghes->timer); | |
1034 | } | |
1035 | ||
d5272003 | 1036 | static void ghes_poll_func(struct timer_list *t) |
81e88fdc | 1037 | { |
d5272003 | 1038 | struct ghes *ghes = from_timer(ghes, t, timer); |
3b880cbe | 1039 | unsigned long flags; |
81e88fdc | 1040 | |
3b880cbe | 1041 | spin_lock_irqsave(&ghes_notify_lock_irq, flags); |
81e88fdc | 1042 | ghes_proc(ghes); |
3b880cbe | 1043 | spin_unlock_irqrestore(&ghes_notify_lock_irq, flags); |
81e88fdc HY |
1044 | if (!(ghes->flags & GHES_EXITING)) |
1045 | ghes_add_timer(ghes); | |
1046 | } | |
1047 | ||
1048 | static irqreturn_t ghes_irq_func(int irq, void *data) | |
1049 | { | |
1050 | struct ghes *ghes = data; | |
3b880cbe | 1051 | unsigned long flags; |
81e88fdc HY |
1052 | int rc; |
1053 | ||
3b880cbe | 1054 | spin_lock_irqsave(&ghes_notify_lock_irq, flags); |
81e88fdc | 1055 | rc = ghes_proc(ghes); |
3b880cbe | 1056 | spin_unlock_irqrestore(&ghes_notify_lock_irq, flags); |
81e88fdc HY |
1057 | if (rc) |
1058 | return IRQ_NONE; | |
1059 | ||
1060 | return IRQ_HANDLED; | |
1061 | } | |
1062 | ||
7bf130e4 SJ |
1063 | static int ghes_notify_hed(struct notifier_block *this, unsigned long event, |
1064 | void *data) | |
d334a491 HY |
1065 | { |
1066 | struct ghes *ghes; | |
3b880cbe | 1067 | unsigned long flags; |
d334a491 HY |
1068 | int ret = NOTIFY_DONE; |
1069 | ||
3b880cbe | 1070 | spin_lock_irqsave(&ghes_notify_lock_irq, flags); |
d334a491 | 1071 | rcu_read_lock(); |
7bf130e4 | 1072 | list_for_each_entry_rcu(ghes, &ghes_hed, list) { |
d334a491 HY |
1073 | if (!ghes_proc(ghes)) |
1074 | ret = NOTIFY_OK; | |
1075 | } | |
1076 | rcu_read_unlock(); | |
3b880cbe | 1077 | spin_unlock_irqrestore(&ghes_notify_lock_irq, flags); |
d334a491 HY |
1078 | |
1079 | return ret; | |
1080 | } | |
1081 | ||
7bf130e4 SJ |
1082 | static struct notifier_block ghes_notifier_hed = { |
1083 | .notifier_call = ghes_notify_hed, | |
44a69f61 TN |
1084 | }; |
1085 | ||
44a69f61 | 1086 | /* |
9c9d0805 JM |
1087 | * Handlers for CPER records may not be NMI safe. For example, |
1088 | * memory_failure_queue() takes spinlocks and calls schedule_work_on(). | |
1089 | * In any NMI-like handler, memory from ghes_estatus_pool is used to save | |
1090 | * estatus, and added to the ghes_estatus_llist. irq_work_queue() causes | |
1091 | * ghes_proc_in_irq() to run in IRQ context where each estatus in | |
1092 | * ghes_estatus_llist is processed. | |
1093 | * | |
1094 | * Memory from the ghes_estatus_pool is also used with the ghes_estatus_cache | |
1095 | * to suppress frequent messages. | |
44a69f61 TN |
1096 | */ |
1097 | static struct llist_head ghes_estatus_llist; | |
1098 | static struct irq_work ghes_proc_irq_work; | |
1099 | ||
67eb2e99 HY |
1100 | static void ghes_proc_in_irq(struct irq_work *irq_work) |
1101 | { | |
46d12f0b | 1102 | struct llist_node *llnode, *next; |
67eb2e99 | 1103 | struct ghes_estatus_node *estatus_node; |
152cef40 | 1104 | struct acpi_hest_generic *generic; |
0a00fd5e | 1105 | struct acpi_hest_generic_status *estatus; |
7f17b4a1 | 1106 | bool task_work_pending; |
67eb2e99 | 1107 | u32 len, node_len; |
7f17b4a1 | 1108 | int ret; |
67eb2e99 | 1109 | |
46d12f0b | 1110 | llnode = llist_del_all(&ghes_estatus_llist); |
67eb2e99 HY |
1111 | /* |
1112 | * Because the time order of estatus in list is reversed, | |
1113 | * revert it back to proper order. | |
1114 | */ | |
8d21d4c9 | 1115 | llnode = llist_reverse_order(llnode); |
67eb2e99 HY |
1116 | while (llnode) { |
1117 | next = llnode->next; | |
1118 | estatus_node = llist_entry(llnode, struct ghes_estatus_node, | |
1119 | llnode); | |
1120 | estatus = GHES_ESTATUS_FROM_NODE(estatus_node); | |
88f074f4 | 1121 | len = cper_estatus_len(estatus); |
67eb2e99 | 1122 | node_len = GHES_ESTATUS_NODE_LEN(len); |
7f17b4a1 | 1123 | task_work_pending = ghes_do_proc(estatus_node->ghes, estatus); |
152cef40 HY |
1124 | if (!ghes_estatus_cached(estatus)) { |
1125 | generic = estatus_node->generic; | |
1126 | if (ghes_print_estatus(NULL, generic, estatus)) | |
1127 | ghes_estatus_cache_add(generic, estatus); | |
1128 | } | |
7f17b4a1 | 1129 | |
415fed69 | 1130 | if (task_work_pending && current->mm) { |
7f17b4a1 JM |
1131 | estatus_node->task_work.func = ghes_kick_task_work; |
1132 | estatus_node->task_work_cpu = smp_processor_id(); | |
1133 | ret = task_work_add(current, &estatus_node->task_work, | |
91989c70 | 1134 | TWA_RESUME); |
7f17b4a1 JM |
1135 | if (ret) |
1136 | estatus_node->task_work.func = NULL; | |
1137 | } | |
1138 | ||
1139 | if (!estatus_node->task_work.func) | |
1140 | gen_pool_free(ghes_estatus_pool, | |
1141 | (unsigned long)estatus_node, node_len); | |
1142 | ||
67eb2e99 HY |
1143 | llnode = next; |
1144 | } | |
1145 | } | |
1146 | ||
46d12f0b HY |
1147 | static void ghes_print_queued_estatus(void) |
1148 | { | |
1149 | struct llist_node *llnode; | |
1150 | struct ghes_estatus_node *estatus_node; | |
1151 | struct acpi_hest_generic *generic; | |
0a00fd5e | 1152 | struct acpi_hest_generic_status *estatus; |
46d12f0b HY |
1153 | |
1154 | llnode = llist_del_all(&ghes_estatus_llist); | |
1155 | /* | |
1156 | * Because the time order of estatus in list is reversed, | |
1157 | * revert it back to proper order. | |
1158 | */ | |
8d21d4c9 | 1159 | llnode = llist_reverse_order(llnode); |
46d12f0b HY |
1160 | while (llnode) { |
1161 | estatus_node = llist_entry(llnode, struct ghes_estatus_node, | |
1162 | llnode); | |
1163 | estatus = GHES_ESTATUS_FROM_NODE(estatus_node); | |
46d12f0b HY |
1164 | generic = estatus_node->generic; |
1165 | ghes_print_estatus(NULL, generic, estatus); | |
1166 | llnode = llnode->next; | |
1167 | } | |
1168 | } | |
1169 | ||
d9f608dc JM |
1170 | static int ghes_in_nmi_queue_one_entry(struct ghes *ghes, |
1171 | enum fixed_addresses fixmap_idx) | |
11568496 | 1172 | { |
d9f608dc | 1173 | struct acpi_hest_generic_status *estatus, tmp_header; |
11568496 | 1174 | struct ghes_estatus_node *estatus_node; |
d9f608dc JM |
1175 | u32 len, node_len; |
1176 | u64 buf_paddr; | |
1177 | int sev, rc; | |
11568496 | 1178 | |
f2a7e059 | 1179 | if (!IS_ENABLED(CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG)) |
d9f608dc | 1180 | return -EOPNOTSUPP; |
11568496 | 1181 | |
d9f608dc JM |
1182 | rc = __ghes_peek_estatus(ghes, &tmp_header, &buf_paddr, fixmap_idx); |
1183 | if (rc) { | |
1184 | ghes_clear_estatus(ghes, &tmp_header, buf_paddr, fixmap_idx); | |
1185 | return rc; | |
1186 | } | |
f2a7e059 | 1187 | |
d9f608dc JM |
1188 | rc = __ghes_check_estatus(ghes, &tmp_header); |
1189 | if (rc) { | |
1190 | ghes_clear_estatus(ghes, &tmp_header, buf_paddr, fixmap_idx); | |
1191 | return rc; | |
1192 | } | |
11568496 | 1193 | |
d9f608dc JM |
1194 | len = cper_estatus_len(&tmp_header); |
1195 | node_len = GHES_ESTATUS_NODE_LEN(len); | |
11568496 BP |
1196 | estatus_node = (void *)gen_pool_alloc(ghes_estatus_pool, node_len); |
1197 | if (!estatus_node) | |
d9f608dc | 1198 | return -ENOMEM; |
11568496 BP |
1199 | |
1200 | estatus_node->ghes = ghes; | |
1201 | estatus_node->generic = ghes->generic; | |
7f17b4a1 | 1202 | estatus_node->task_work.func = NULL; |
11568496 | 1203 | estatus = GHES_ESTATUS_FROM_NODE(estatus_node); |
11568496 | 1204 | |
d9f608dc | 1205 | if (__ghes_read_estatus(estatus, buf_paddr, fixmap_idx, len)) { |
f2a7e059 | 1206 | ghes_clear_estatus(ghes, estatus, buf_paddr, fixmap_idx); |
d9f608dc JM |
1207 | rc = -ENOENT; |
1208 | goto no_work; | |
ee2eb3d4 | 1209 | } |
6fe9e7c2 | 1210 | |
f2a7e059 | 1211 | sev = ghes_severity(estatus->error_severity); |
ee2eb3d4 JM |
1212 | if (sev >= GHES_SEV_PANIC) { |
1213 | ghes_print_queued_estatus(); | |
f2a7e059 | 1214 | __ghes_panic(ghes, estatus, buf_paddr, fixmap_idx); |
ee2eb3d4 | 1215 | } |
6169ddf8 | 1216 | |
d9f608dc | 1217 | ghes_clear_estatus(ghes, &tmp_header, buf_paddr, fixmap_idx); |
6169ddf8 | 1218 | |
d9f608dc JM |
1219 | /* This error has been reported before, don't process it again. */ |
1220 | if (ghes_estatus_cached(estatus)) | |
1221 | goto no_work; | |
1222 | ||
1223 | llist_add(&estatus_node->llnode, &ghes_estatus_llist); | |
1224 | ||
1225 | return rc; | |
1226 | ||
1227 | no_work: | |
1228 | gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node, | |
1229 | node_len); | |
1230 | ||
1231 | return rc; | |
ee2eb3d4 JM |
1232 | } |
1233 | ||
b484079b JM |
1234 | static int ghes_in_nmi_spool_from_list(struct list_head *rcu_list, |
1235 | enum fixed_addresses fixmap_idx) | |
ee2eb3d4 JM |
1236 | { |
1237 | int ret = -ENOENT; | |
1238 | struct ghes *ghes; | |
1239 | ||
1240 | rcu_read_lock(); | |
1241 | list_for_each_entry_rcu(ghes, rcu_list, list) { | |
b484079b | 1242 | if (!ghes_in_nmi_queue_one_entry(ghes, fixmap_idx)) |
ee2eb3d4 | 1243 | ret = 0; |
81e88fdc | 1244 | } |
ee2eb3d4 | 1245 | rcu_read_unlock(); |
11568496 | 1246 | |
ee2eb3d4 | 1247 | if (IS_ENABLED(CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG) && !ret) |
a545715d | 1248 | irq_work_queue(&ghes_proc_irq_work); |
ee2eb3d4 JM |
1249 | |
1250 | return ret; | |
1251 | } | |
9c9d0805 JM |
1252 | |
1253 | #ifdef CONFIG_ACPI_APEI_SEA | |
1254 | static LIST_HEAD(ghes_sea); | |
1255 | ||
1256 | /* | |
1257 | * Return 0 only if one of the SEA error sources successfully reported an error | |
1258 | * record sent from the firmware. | |
1259 | */ | |
1260 | int ghes_notify_sea(void) | |
1261 | { | |
3b880cbe JM |
1262 | static DEFINE_RAW_SPINLOCK(ghes_notify_lock_sea); |
1263 | int rv; | |
1264 | ||
1265 | raw_spin_lock(&ghes_notify_lock_sea); | |
b972d2ea | 1266 | rv = ghes_in_nmi_spool_from_list(&ghes_sea, FIX_APEI_GHES_SEA); |
3b880cbe JM |
1267 | raw_spin_unlock(&ghes_notify_lock_sea); |
1268 | ||
1269 | return rv; | |
9c9d0805 JM |
1270 | } |
1271 | ||
1272 | static void ghes_sea_add(struct ghes *ghes) | |
1273 | { | |
1274 | mutex_lock(&ghes_list_mutex); | |
1275 | list_add_rcu(&ghes->list, &ghes_sea); | |
1276 | mutex_unlock(&ghes_list_mutex); | |
1277 | } | |
1278 | ||
1279 | static void ghes_sea_remove(struct ghes *ghes) | |
1280 | { | |
1281 | mutex_lock(&ghes_list_mutex); | |
1282 | list_del_rcu(&ghes->list); | |
1283 | mutex_unlock(&ghes_list_mutex); | |
1284 | synchronize_rcu(); | |
1285 | } | |
1286 | #else /* CONFIG_ACPI_APEI_SEA */ | |
1287 | static inline void ghes_sea_add(struct ghes *ghes) { } | |
1288 | static inline void ghes_sea_remove(struct ghes *ghes) { } | |
1289 | #endif /* CONFIG_ACPI_APEI_SEA */ | |
1290 | ||
1291 | #ifdef CONFIG_HAVE_ACPI_APEI_NMI | |
1292 | /* | |
1293 | * NMI may be triggered on any CPU, so ghes_in_nmi is used for | |
1294 | * having only one concurrent reader. | |
1295 | */ | |
1296 | static atomic_t ghes_in_nmi = ATOMIC_INIT(0); | |
1297 | ||
1298 | static LIST_HEAD(ghes_nmi); | |
ee2eb3d4 JM |
1299 | |
1300 | static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs) | |
1301 | { | |
3b880cbe | 1302 | static DEFINE_RAW_SPINLOCK(ghes_notify_lock_nmi); |
ee2eb3d4 JM |
1303 | int ret = NMI_DONE; |
1304 | ||
1305 | if (!atomic_add_unless(&ghes_in_nmi, 1, 1)) | |
1306 | return ret; | |
1307 | ||
3b880cbe | 1308 | raw_spin_lock(&ghes_notify_lock_nmi); |
b484079b | 1309 | if (!ghes_in_nmi_spool_from_list(&ghes_nmi, FIX_APEI_GHES_NMI)) |
ee2eb3d4 | 1310 | ret = NMI_HANDLED; |
3b880cbe | 1311 | raw_spin_unlock(&ghes_notify_lock_nmi); |
ee2eb3d4 | 1312 | |
6fe9e7c2 | 1313 | atomic_dec(&ghes_in_nmi); |
81e88fdc HY |
1314 | return ret; |
1315 | } | |
1316 | ||
44a69f61 TN |
1317 | static void ghes_nmi_add(struct ghes *ghes) |
1318 | { | |
44a69f61 TN |
1319 | mutex_lock(&ghes_list_mutex); |
1320 | if (list_empty(&ghes_nmi)) | |
1321 | register_nmi_handler(NMI_LOCAL, ghes_notify_nmi, 0, "ghes"); | |
1322 | list_add_rcu(&ghes->list, &ghes_nmi); | |
1323 | mutex_unlock(&ghes_list_mutex); | |
1324 | } | |
1325 | ||
1326 | static void ghes_nmi_remove(struct ghes *ghes) | |
1327 | { | |
44a69f61 TN |
1328 | mutex_lock(&ghes_list_mutex); |
1329 | list_del_rcu(&ghes->list); | |
1330 | if (list_empty(&ghes_nmi)) | |
1331 | unregister_nmi_handler(NMI_LOCAL, "ghes"); | |
1332 | mutex_unlock(&ghes_list_mutex); | |
1333 | /* | |
1334 | * To synchronize with NMI handler, ghes can only be | |
1335 | * freed after NMI handler finishes. | |
1336 | */ | |
1337 | synchronize_rcu(); | |
44a69f61 | 1338 | } |
255097c8 JM |
1339 | #else /* CONFIG_HAVE_ACPI_APEI_NMI */ |
1340 | static inline void ghes_nmi_add(struct ghes *ghes) { } | |
1341 | static inline void ghes_nmi_remove(struct ghes *ghes) { } | |
1342 | #endif /* CONFIG_HAVE_ACPI_APEI_NMI */ | |
44a69f61 TN |
1343 | |
1344 | static void ghes_nmi_init_cxt(void) | |
1345 | { | |
1346 | init_irq_work(&ghes_proc_irq_work, ghes_proc_in_irq); | |
1347 | } | |
44a69f61 | 1348 | |
f9f05395 JM |
1349 | static int __ghes_sdei_callback(struct ghes *ghes, |
1350 | enum fixed_addresses fixmap_idx) | |
1351 | { | |
1352 | if (!ghes_in_nmi_queue_one_entry(ghes, fixmap_idx)) { | |
1353 | irq_work_queue(&ghes_proc_irq_work); | |
1354 | ||
1355 | return 0; | |
1356 | } | |
1357 | ||
1358 | return -ENOENT; | |
1359 | } | |
1360 | ||
1361 | static int ghes_sdei_normal_callback(u32 event_num, struct pt_regs *regs, | |
1362 | void *arg) | |
1363 | { | |
1364 | static DEFINE_RAW_SPINLOCK(ghes_notify_lock_sdei_normal); | |
1365 | struct ghes *ghes = arg; | |
1366 | int err; | |
1367 | ||
1368 | raw_spin_lock(&ghes_notify_lock_sdei_normal); | |
1369 | err = __ghes_sdei_callback(ghes, FIX_APEI_GHES_SDEI_NORMAL); | |
1370 | raw_spin_unlock(&ghes_notify_lock_sdei_normal); | |
1371 | ||
1372 | return err; | |
1373 | } | |
1374 | ||
1375 | static int ghes_sdei_critical_callback(u32 event_num, struct pt_regs *regs, | |
1376 | void *arg) | |
1377 | { | |
1378 | static DEFINE_RAW_SPINLOCK(ghes_notify_lock_sdei_critical); | |
1379 | struct ghes *ghes = arg; | |
1380 | int err; | |
1381 | ||
1382 | raw_spin_lock(&ghes_notify_lock_sdei_critical); | |
1383 | err = __ghes_sdei_callback(ghes, FIX_APEI_GHES_SDEI_CRITICAL); | |
1384 | raw_spin_unlock(&ghes_notify_lock_sdei_critical); | |
1385 | ||
1386 | return err; | |
1387 | } | |
1388 | ||
1389 | static int apei_sdei_register_ghes(struct ghes *ghes) | |
1390 | { | |
1391 | if (!IS_ENABLED(CONFIG_ARM_SDE_INTERFACE)) | |
1392 | return -EOPNOTSUPP; | |
1393 | ||
1394 | return sdei_register_ghes(ghes, ghes_sdei_normal_callback, | |
1395 | ghes_sdei_critical_callback); | |
1396 | } | |
1397 | ||
1398 | static int apei_sdei_unregister_ghes(struct ghes *ghes) | |
1399 | { | |
1400 | if (!IS_ENABLED(CONFIG_ARM_SDE_INTERFACE)) | |
1401 | return -EOPNOTSUPP; | |
1402 | ||
1403 | return sdei_unregister_ghes(ghes); | |
1404 | } | |
1405 | ||
da095fd3 | 1406 | static int ghes_probe(struct platform_device *ghes_dev) |
d334a491 HY |
1407 | { |
1408 | struct acpi_hest_generic *generic; | |
1409 | struct ghes *ghes = NULL; | |
3b880cbe | 1410 | unsigned long flags; |
44a69f61 | 1411 | |
7ad6e943 | 1412 | int rc = -EINVAL; |
d334a491 | 1413 | |
1dd6b20e | 1414 | generic = *(struct acpi_hest_generic **)ghes_dev->dev.platform_data; |
d334a491 | 1415 | if (!generic->enabled) |
7ad6e943 | 1416 | return -ENODEV; |
d334a491 | 1417 | |
81e88fdc HY |
1418 | switch (generic->notify.type) { |
1419 | case ACPI_HEST_NOTIFY_POLLED: | |
1420 | case ACPI_HEST_NOTIFY_EXTERNAL: | |
1421 | case ACPI_HEST_NOTIFY_SCI: | |
7bf130e4 SJ |
1422 | case ACPI_HEST_NOTIFY_GSIV: |
1423 | case ACPI_HEST_NOTIFY_GPIO: | |
44a69f61 | 1424 | break; |
7bf130e4 | 1425 | |
7edda088 TB |
1426 | case ACPI_HEST_NOTIFY_SEA: |
1427 | if (!IS_ENABLED(CONFIG_ACPI_APEI_SEA)) { | |
1428 | pr_warn(GHES_PFX "Generic hardware error source: %d notified via SEA is not supported\n", | |
1429 | generic->header.source_id); | |
1430 | rc = -ENOTSUPP; | |
1431 | goto err; | |
1432 | } | |
1433 | break; | |
81e88fdc | 1434 | case ACPI_HEST_NOTIFY_NMI: |
44a69f61 TN |
1435 | if (!IS_ENABLED(CONFIG_HAVE_ACPI_APEI_NMI)) { |
1436 | pr_warn(GHES_PFX "Generic hardware error source: %d notified via NMI interrupt is not supported!\n", | |
1437 | generic->header.source_id); | |
1438 | goto err; | |
1439 | } | |
81e88fdc | 1440 | break; |
f9f05395 JM |
1441 | case ACPI_HEST_NOTIFY_SOFTWARE_DELEGATED: |
1442 | if (!IS_ENABLED(CONFIG_ARM_SDE_INTERFACE)) { | |
1443 | pr_warn(GHES_PFX "Generic hardware error source: %d notified via SDE Interface is not supported!\n", | |
1444 | generic->header.source_id); | |
1445 | goto err; | |
1446 | } | |
1447 | break; | |
81e88fdc | 1448 | case ACPI_HEST_NOTIFY_LOCAL: |
933ca4e3 KW |
1449 | pr_warn(GHES_PFX "Generic hardware error source: %d notified via local interrupt is not supported!\n", |
1450 | generic->header.source_id); | |
d334a491 | 1451 | goto err; |
81e88fdc | 1452 | default: |
933ca4e3 KW |
1453 | pr_warn(FW_WARN GHES_PFX "Unknown notification type: %u for generic hardware error source: %d\n", |
1454 | generic->notify.type, generic->header.source_id); | |
81e88fdc | 1455 | goto err; |
d334a491 | 1456 | } |
81e88fdc HY |
1457 | |
1458 | rc = -EIO; | |
1459 | if (generic->error_block_length < | |
0a00fd5e | 1460 | sizeof(struct acpi_hest_generic_status)) { |
933ca4e3 KW |
1461 | pr_warn(FW_BUG GHES_PFX "Invalid error block length: %u for generic hardware error source: %d\n", |
1462 | generic->error_block_length, generic->header.source_id); | |
d334a491 HY |
1463 | goto err; |
1464 | } | |
1465 | ghes = ghes_new(generic); | |
1466 | if (IS_ERR(ghes)) { | |
1467 | rc = PTR_ERR(ghes); | |
1468 | ghes = NULL; | |
1469 | goto err; | |
1470 | } | |
21480547 | 1471 | |
81e88fdc HY |
1472 | switch (generic->notify.type) { |
1473 | case ACPI_HEST_NOTIFY_POLLED: | |
cea79e7e | 1474 | timer_setup(&ghes->timer, ghes_poll_func, 0); |
81e88fdc HY |
1475 | ghes_add_timer(ghes); |
1476 | break; | |
1477 | case ACPI_HEST_NOTIFY_EXTERNAL: | |
1478 | /* External interrupt vector is GSI */ | |
a98d4f64 WY |
1479 | rc = acpi_gsi_to_irq(generic->notify.vector, &ghes->irq); |
1480 | if (rc) { | |
81e88fdc HY |
1481 | pr_err(GHES_PFX "Failed to map GSI to IRQ for generic hardware error source: %d\n", |
1482 | generic->header.source_id); | |
cc7f3f13 | 1483 | goto err; |
81e88fdc | 1484 | } |
bdb9458a LH |
1485 | rc = request_irq(ghes->irq, ghes_irq_func, IRQF_SHARED, |
1486 | "GHES IRQ", ghes); | |
a98d4f64 | 1487 | if (rc) { |
81e88fdc HY |
1488 | pr_err(GHES_PFX "Failed to register IRQ for generic hardware error source: %d\n", |
1489 | generic->header.source_id); | |
cc7f3f13 | 1490 | goto err; |
81e88fdc HY |
1491 | } |
1492 | break; | |
7bf130e4 | 1493 | |
81e88fdc | 1494 | case ACPI_HEST_NOTIFY_SCI: |
7bf130e4 SJ |
1495 | case ACPI_HEST_NOTIFY_GSIV: |
1496 | case ACPI_HEST_NOTIFY_GPIO: | |
7ad6e943 | 1497 | mutex_lock(&ghes_list_mutex); |
7bf130e4 SJ |
1498 | if (list_empty(&ghes_hed)) |
1499 | register_acpi_hed_notifier(&ghes_notifier_hed); | |
1500 | list_add_rcu(&ghes->list, &ghes_hed); | |
7ad6e943 | 1501 | mutex_unlock(&ghes_list_mutex); |
81e88fdc | 1502 | break; |
7bf130e4 | 1503 | |
7edda088 TB |
1504 | case ACPI_HEST_NOTIFY_SEA: |
1505 | ghes_sea_add(ghes); | |
1506 | break; | |
81e88fdc | 1507 | case ACPI_HEST_NOTIFY_NMI: |
44a69f61 | 1508 | ghes_nmi_add(ghes); |
81e88fdc | 1509 | break; |
f9f05395 JM |
1510 | case ACPI_HEST_NOTIFY_SOFTWARE_DELEGATED: |
1511 | rc = apei_sdei_register_ghes(ghes); | |
1512 | if (rc) | |
1513 | goto err; | |
1514 | break; | |
81e88fdc HY |
1515 | default: |
1516 | BUG(); | |
d334a491 | 1517 | } |
cc7f3f13 | 1518 | |
7ad6e943 | 1519 | platform_set_drvdata(ghes_dev, ghes); |
d334a491 | 1520 | |
9057a3f7 JH |
1521 | ghes->dev = &ghes_dev->dev; |
1522 | ||
1523 | mutex_lock(&ghes_devs_mutex); | |
1524 | list_add_tail(&ghes->elist, &ghes_devs); | |
1525 | mutex_unlock(&ghes_devs_mutex); | |
cc7f3f13 | 1526 | |
77b246b3 | 1527 | /* Handle any pending errors right away */ |
3b880cbe | 1528 | spin_lock_irqsave(&ghes_notify_lock_irq, flags); |
77b246b3 | 1529 | ghes_proc(ghes); |
3b880cbe | 1530 | spin_unlock_irqrestore(&ghes_notify_lock_irq, flags); |
77b246b3 | 1531 | |
d334a491 | 1532 | return 0; |
cc7f3f13 | 1533 | |
d334a491 | 1534 | err: |
7ad6e943 | 1535 | if (ghes) { |
d334a491 | 1536 | ghes_fini(ghes); |
7ad6e943 HY |
1537 | kfree(ghes); |
1538 | } | |
d334a491 HY |
1539 | return rc; |
1540 | } | |
1541 | ||
f2f212f3 | 1542 | static void ghes_remove(struct platform_device *ghes_dev) |
d334a491 | 1543 | { |
f9f05395 | 1544 | int rc; |
7ad6e943 HY |
1545 | struct ghes *ghes; |
1546 | struct acpi_hest_generic *generic; | |
d334a491 | 1547 | |
7ad6e943 HY |
1548 | ghes = platform_get_drvdata(ghes_dev); |
1549 | generic = ghes->generic; | |
1550 | ||
81e88fdc | 1551 | ghes->flags |= GHES_EXITING; |
7ad6e943 | 1552 | switch (generic->notify.type) { |
81e88fdc | 1553 | case ACPI_HEST_NOTIFY_POLLED: |
292a089d | 1554 | timer_shutdown_sync(&ghes->timer); |
81e88fdc HY |
1555 | break; |
1556 | case ACPI_HEST_NOTIFY_EXTERNAL: | |
1557 | free_irq(ghes->irq, ghes); | |
1558 | break; | |
7bf130e4 | 1559 | |
7ad6e943 | 1560 | case ACPI_HEST_NOTIFY_SCI: |
7bf130e4 SJ |
1561 | case ACPI_HEST_NOTIFY_GSIV: |
1562 | case ACPI_HEST_NOTIFY_GPIO: | |
7ad6e943 HY |
1563 | mutex_lock(&ghes_list_mutex); |
1564 | list_del_rcu(&ghes->list); | |
7bf130e4 SJ |
1565 | if (list_empty(&ghes_hed)) |
1566 | unregister_acpi_hed_notifier(&ghes_notifier_hed); | |
7ad6e943 | 1567 | mutex_unlock(&ghes_list_mutex); |
7d64f82c | 1568 | synchronize_rcu(); |
7ad6e943 | 1569 | break; |
7bf130e4 | 1570 | |
7edda088 TB |
1571 | case ACPI_HEST_NOTIFY_SEA: |
1572 | ghes_sea_remove(ghes); | |
1573 | break; | |
81e88fdc | 1574 | case ACPI_HEST_NOTIFY_NMI: |
44a69f61 | 1575 | ghes_nmi_remove(ghes); |
81e88fdc | 1576 | break; |
f9f05395 JM |
1577 | case ACPI_HEST_NOTIFY_SOFTWARE_DELEGATED: |
1578 | rc = apei_sdei_unregister_ghes(ghes); | |
f2f212f3 UKK |
1579 | if (rc) { |
1580 | /* | |
1581 | * Returning early results in a resource leak, but we're | |
1582 | * only here if stopping the hardware failed. | |
1583 | */ | |
1584 | dev_err(&ghes_dev->dev, "Failed to unregister ghes (%pe)\n", | |
1585 | ERR_PTR(rc)); | |
1586 | return; | |
1587 | } | |
f9f05395 | 1588 | break; |
7ad6e943 HY |
1589 | default: |
1590 | BUG(); | |
1591 | break; | |
1592 | } | |
d334a491 | 1593 | |
7ad6e943 | 1594 | ghes_fini(ghes); |
21480547 | 1595 | |
9057a3f7 JH |
1596 | mutex_lock(&ghes_devs_mutex); |
1597 | list_del(&ghes->elist); | |
1598 | mutex_unlock(&ghes_devs_mutex); | |
21480547 | 1599 | |
7ad6e943 | 1600 | kfree(ghes); |
d334a491 HY |
1601 | } |
1602 | ||
7ad6e943 HY |
1603 | static struct platform_driver ghes_platform_driver = { |
1604 | .driver = { | |
1605 | .name = "GHES", | |
7ad6e943 HY |
1606 | }, |
1607 | .probe = ghes_probe, | |
f2f212f3 | 1608 | .remove_new = ghes_remove, |
7ad6e943 HY |
1609 | }; |
1610 | ||
27e932a3 | 1611 | void __init acpi_ghes_init(void) |
d334a491 | 1612 | { |
81e88fdc HY |
1613 | int rc; |
1614 | ||
dc4e8c07 SX |
1615 | sdei_init(); |
1616 | ||
d334a491 | 1617 | if (acpi_disabled) |
dc4e8c07 | 1618 | return; |
d334a491 | 1619 | |
e931d0da PA |
1620 | switch (hest_disable) { |
1621 | case HEST_NOT_FOUND: | |
dc4e8c07 | 1622 | return; |
e931d0da | 1623 | case HEST_DISABLED: |
d334a491 | 1624 | pr_info(GHES_PFX "HEST is not enabled!\n"); |
dc4e8c07 | 1625 | return; |
e931d0da PA |
1626 | default: |
1627 | break; | |
d334a491 HY |
1628 | } |
1629 | ||
b6a95016 HY |
1630 | if (ghes_disable) { |
1631 | pr_info(GHES_PFX "GHES is not enabled!\n"); | |
dc4e8c07 | 1632 | return; |
b6a95016 HY |
1633 | } |
1634 | ||
44a69f61 | 1635 | ghes_nmi_init_cxt(); |
67eb2e99 | 1636 | |
67eb2e99 HY |
1637 | rc = platform_driver_register(&ghes_platform_driver); |
1638 | if (rc) | |
dc4e8c07 | 1639 | return; |
67eb2e99 | 1640 | |
9fb0bfe1 HY |
1641 | rc = apei_osc_setup(); |
1642 | if (rc == 0 && osc_sb_apei_support_acked) | |
1643 | pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit and WHEA _OSC.\n"); | |
1644 | else if (rc == 0 && !osc_sb_apei_support_acked) | |
1645 | pr_info(GHES_PFX "APEI firmware first mode is enabled by WHEA _OSC.\n"); | |
1646 | else if (rc && osc_sb_apei_support_acked) | |
1647 | pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit.\n"); | |
1648 | else | |
1649 | pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n"); | |
d334a491 | 1650 | } |
8e40612f | 1651 | |
9057a3f7 JH |
1652 | /* |
1653 | * Known x86 systems that prefer GHES error reporting: | |
1654 | */ | |
1655 | static struct acpi_platform_list plat_list[] = { | |
1656 | {"HPE ", "Server ", 0, ACPI_SIG_FADT, all_versions}, | |
1657 | { } /* End */ | |
1658 | }; | |
1659 | ||
1660 | struct list_head *ghes_get_devices(void) | |
1661 | { | |
1662 | int idx = -1; | |
1663 | ||
1664 | if (IS_ENABLED(CONFIG_X86)) { | |
1665 | idx = acpi_match_platform_list(plat_list); | |
1666 | if (idx < 0) { | |
1667 | if (!ghes_edac_force_enable) | |
1668 | return NULL; | |
1669 | ||
1670 | pr_warn_once("Force-loading ghes_edac on an unsupported platform. You're on your own!\n"); | |
1671 | } | |
9368aa18 LY |
1672 | } else if (list_empty(&ghes_devs)) { |
1673 | return NULL; | |
9057a3f7 JH |
1674 | } |
1675 | ||
1676 | return &ghes_devs; | |
1677 | } | |
1678 | EXPORT_SYMBOL_GPL(ghes_get_devices); | |
1679 | ||
8e40612f JH |
1680 | void ghes_register_report_chain(struct notifier_block *nb) |
1681 | { | |
1682 | atomic_notifier_chain_register(&ghes_report_chain, nb); | |
1683 | } | |
1684 | EXPORT_SYMBOL_GPL(ghes_register_report_chain); | |
1685 | ||
1686 | void ghes_unregister_report_chain(struct notifier_block *nb) | |
1687 | { | |
1688 | atomic_notifier_chain_unregister(&ghes_report_chain, nb); | |
1689 | } | |
1690 | EXPORT_SYMBOL_GPL(ghes_unregister_report_chain); |