Commit | Line | Data |
---|---|---|
41df5928 HB |
1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* | |
3 | * Firmware-Assisted Dump support on POWER platform (OPAL). | |
4 | * | |
5 | * Copyright 2019, Hari Bathini, IBM Corporation. | |
6 | */ | |
7 | ||
8 | #define pr_fmt(fmt) "opal fadump: " fmt | |
9 | ||
10 | #include <linux/string.h> | |
11 | #include <linux/seq_file.h> | |
7b1b3b48 | 12 | #include <linux/of.h> |
41df5928 HB |
13 | #include <linux/of_fdt.h> |
14 | #include <linux/libfdt.h> | |
742a265a | 15 | #include <linux/mm.h> |
2a1b06dd | 16 | #include <linux/crash_dump.h> |
41df5928 | 17 | |
742a265a | 18 | #include <asm/page.h> |
41df5928 HB |
19 | #include <asm/opal.h> |
20 | #include <asm/fadump-internal.h> | |
21 | ||
742a265a HB |
22 | #include "opal-fadump.h" |
23 | ||
bec53196 HB |
24 | |
25 | #ifdef CONFIG_PRESERVE_FA_DUMP | |
26 | /* | |
27 | * When dump is active but PRESERVE_FA_DUMP is enabled on the kernel, | |
28 | * ensure crash data is preserved in hope that the subsequent memory | |
29 | * preserving kernel boot is going to process this crash data. | |
30 | */ | |
31 | void __init opal_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node) | |
32 | { | |
33 | const struct opal_fadump_mem_struct *opal_fdm_active; | |
34 | const __be32 *prop; | |
35 | unsigned long dn; | |
36 | u64 addr = 0; | |
37 | s64 ret; | |
38 | ||
39 | dn = of_get_flat_dt_subnode_by_name(node, "dump"); | |
40 | if (dn == -FDT_ERR_NOTFOUND) | |
41 | return; | |
42 | ||
43 | /* | |
44 | * Check if dump has been initiated on last reboot. | |
45 | */ | |
46 | prop = of_get_flat_dt_prop(dn, "mpipl-boot", NULL); | |
47 | if (!prop) | |
48 | return; | |
49 | ||
50 | ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_KERNEL, &addr); | |
51 | if ((ret != OPAL_SUCCESS) || !addr) { | |
52 | pr_debug("Could not get Kernel metadata (%lld)\n", ret); | |
53 | return; | |
54 | } | |
55 | ||
56 | /* | |
57 | * Preserve memory only if kernel memory regions are registered | |
58 | * with f/w for MPIPL. | |
59 | */ | |
60 | addr = be64_to_cpu(addr); | |
61 | pr_debug("Kernel metadata addr: %llx\n", addr); | |
62 | opal_fdm_active = (void *)addr; | |
b74196af | 63 | if (be16_to_cpu(opal_fdm_active->registered_regions) == 0) |
bec53196 HB |
64 | return; |
65 | ||
66 | ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_BOOT_MEM, &addr); | |
67 | if ((ret != OPAL_SUCCESS) || !addr) { | |
68 | pr_err("Failed to get boot memory tag (%lld)\n", ret); | |
69 | return; | |
70 | } | |
71 | ||
72 | /* | |
73 | * Memory below this address can be used for booting a | |
74 | * capture kernel or petitboot kernel. Preserve everything | |
75 | * above this address for processing crashdump. | |
76 | */ | |
77 | fadump_conf->boot_mem_top = be64_to_cpu(addr); | |
78 | pr_debug("Preserve everything above %llx\n", fadump_conf->boot_mem_top); | |
79 | ||
80 | pr_info("Firmware-assisted dump is active.\n"); | |
81 | fadump_conf->dump_active = 1; | |
82 | } | |
83 | ||
84 | #else /* CONFIG_PRESERVE_FA_DUMP */ | |
2a1b06dd | 85 | static const struct opal_fadump_mem_struct *opal_fdm_active; |
5000a17a | 86 | static const struct opal_mpipl_fadump *opal_cpu_metadata; |
742a265a HB |
87 | static struct opal_fadump_mem_struct *opal_fdm; |
88 | ||
6f713d18 HB |
89 | #ifdef CONFIG_OPAL_CORE |
90 | extern bool kernel_initiated; | |
91 | #endif | |
92 | ||
a20a8fa4 HB |
93 | static int opal_fadump_unregister(struct fw_dump *fadump_conf); |
94 | ||
95 | static void opal_fadump_update_config(struct fw_dump *fadump_conf, | |
96 | const struct opal_fadump_mem_struct *fdm) | |
97 | { | |
b74196af | 98 | pr_debug("Boot memory regions count: %d\n", be16_to_cpu(fdm->region_cnt)); |
51bba8ed | 99 | |
a20a8fa4 HB |
100 | /* |
101 | * The destination address of the first boot memory region is the | |
102 | * destination address of boot memory regions. | |
103 | */ | |
b74196af | 104 | fadump_conf->boot_mem_dest_addr = be64_to_cpu(fdm->rgn[0].dest); |
a20a8fa4 HB |
105 | pr_debug("Destination address of boot memory regions: %#016llx\n", |
106 | fadump_conf->boot_mem_dest_addr); | |
107 | ||
b74196af | 108 | fadump_conf->fadumphdr_addr = be64_to_cpu(fdm->fadumphdr_addr); |
a20a8fa4 HB |
109 | } |
110 | ||
2a1b06dd HB |
111 | /* |
112 | * This function is called in the capture kernel to get configuration details | |
113 | * from metadata setup by the first kernel. | |
114 | */ | |
e5913db1 | 115 | static void __init opal_fadump_get_config(struct fw_dump *fadump_conf, |
2a1b06dd HB |
116 | const struct opal_fadump_mem_struct *fdm) |
117 | { | |
7dee93a9 | 118 | unsigned long base, size, last_end, hole_size; |
2a1b06dd HB |
119 | int i; |
120 | ||
121 | if (!fadump_conf->dump_active) | |
122 | return; | |
123 | ||
7dee93a9 HB |
124 | last_end = 0; |
125 | hole_size = 0; | |
2a1b06dd HB |
126 | fadump_conf->boot_memory_size = 0; |
127 | ||
128 | pr_debug("Boot memory regions:\n"); | |
b74196af HB |
129 | for (i = 0; i < be16_to_cpu(fdm->region_cnt); i++) { |
130 | base = be64_to_cpu(fdm->rgn[i].src); | |
131 | size = be64_to_cpu(fdm->rgn[i].size); | |
7dee93a9 | 132 | pr_debug("\t[%03d] base: 0x%lx, size: 0x%lx\n", i, base, size); |
2a1b06dd | 133 | |
7dee93a9 HB |
134 | fadump_conf->boot_mem_addr[i] = base; |
135 | fadump_conf->boot_mem_sz[i] = size; | |
136 | fadump_conf->boot_memory_size += size; | |
137 | hole_size += (base - last_end); | |
138 | ||
139 | last_end = base + size; | |
2a1b06dd HB |
140 | } |
141 | ||
142 | /* | |
143 | * Start address of reserve dump area (permanent reservation) for | |
144 | * re-registering FADump after dump capture. | |
145 | */ | |
b74196af | 146 | fadump_conf->reserve_dump_area_start = be64_to_cpu(fdm->rgn[0].dest); |
2a1b06dd | 147 | |
6071e8f9 HB |
148 | /* |
149 | * Rarely, but it can so happen that system crashes before all | |
150 | * boot memory regions are registered for MPIPL. In such | |
151 | * cases, warn that the vmcore may not be accurate and proceed | |
152 | * anyway as that is the best bet considering free pages, cache | |
153 | * pages, user pages, etc are usually filtered out. | |
154 | * | |
155 | * Hope the memory that could not be preserved only has pages | |
156 | * that are usually filtered out while saving the vmcore. | |
157 | */ | |
b74196af | 158 | if (be16_to_cpu(fdm->region_cnt) > be16_to_cpu(fdm->registered_regions)) { |
6071e8f9 HB |
159 | pr_warn("Not all memory regions were saved!!!\n"); |
160 | pr_warn(" Unsaved memory regions:\n"); | |
b74196af HB |
161 | i = be16_to_cpu(fdm->registered_regions); |
162 | while (i < be16_to_cpu(fdm->region_cnt)) { | |
6071e8f9 | 163 | pr_warn("\t[%03d] base: 0x%llx, size: 0x%llx\n", |
b74196af HB |
164 | i, be64_to_cpu(fdm->rgn[i].src), |
165 | be64_to_cpu(fdm->rgn[i].size)); | |
6071e8f9 HB |
166 | i++; |
167 | } | |
168 | ||
169 | pr_warn("If the unsaved regions only contain pages that are filtered out (eg. free/user pages), the vmcore should still be usable.\n"); | |
170 | pr_warn("WARNING: If the unsaved regions contain kernel pages, the vmcore will be corrupted.\n"); | |
171 | } | |
172 | ||
7dee93a9 | 173 | fadump_conf->boot_mem_top = (fadump_conf->boot_memory_size + hole_size); |
b74196af | 174 | fadump_conf->boot_mem_regs_cnt = be16_to_cpu(fdm->region_cnt); |
2a1b06dd HB |
175 | opal_fadump_update_config(fadump_conf, fdm); |
176 | } | |
177 | ||
742a265a HB |
178 | /* Initialize kernel metadata */ |
179 | static void opal_fadump_init_metadata(struct opal_fadump_mem_struct *fdm) | |
180 | { | |
181 | fdm->version = OPAL_FADUMP_VERSION; | |
b74196af HB |
182 | fdm->region_cnt = cpu_to_be16(0); |
183 | fdm->registered_regions = cpu_to_be16(0); | |
184 | fdm->fadumphdr_addr = cpu_to_be64(0); | |
742a265a HB |
185 | } |
186 | ||
41df5928 HB |
187 | static u64 opal_fadump_init_mem_struct(struct fw_dump *fadump_conf) |
188 | { | |
7dee93a9 | 189 | u64 addr = fadump_conf->reserve_dump_area_start; |
b74196af | 190 | u16 reg_cnt; |
7dee93a9 | 191 | int i; |
742a265a HB |
192 | |
193 | opal_fdm = __va(fadump_conf->kernel_metadata); | |
194 | opal_fadump_init_metadata(opal_fdm); | |
195 | ||
51bba8ed | 196 | /* Boot memory regions */ |
b74196af | 197 | reg_cnt = be16_to_cpu(opal_fdm->region_cnt); |
7dee93a9 | 198 | for (i = 0; i < fadump_conf->boot_mem_regs_cnt; i++) { |
b74196af HB |
199 | opal_fdm->rgn[i].src = cpu_to_be64(fadump_conf->boot_mem_addr[i]); |
200 | opal_fdm->rgn[i].dest = cpu_to_be64(addr); | |
201 | opal_fdm->rgn[i].size = cpu_to_be64(fadump_conf->boot_mem_sz[i]); | |
51bba8ed | 202 | |
b74196af | 203 | reg_cnt++; |
7dee93a9 | 204 | addr += fadump_conf->boot_mem_sz[i]; |
51bba8ed | 205 | } |
b74196af | 206 | opal_fdm->region_cnt = cpu_to_be16(reg_cnt); |
742a265a HB |
207 | |
208 | /* | |
1fd02f66 | 209 | * Kernel metadata is passed to f/w and retrieved in capture kernel. |
742a265a HB |
210 | * So, use it to save fadump header address instead of calculating it. |
211 | */ | |
b74196af HB |
212 | opal_fdm->fadumphdr_addr = cpu_to_be64(be64_to_cpu(opal_fdm->rgn[0].dest) + |
213 | fadump_conf->boot_memory_size); | |
742a265a | 214 | |
a20a8fa4 HB |
215 | opal_fadump_update_config(fadump_conf, opal_fdm); |
216 | ||
7dee93a9 | 217 | return addr; |
742a265a HB |
218 | } |
219 | ||
220 | static u64 opal_fadump_get_metadata_size(void) | |
221 | { | |
222 | return PAGE_ALIGN(sizeof(struct opal_fadump_mem_struct)); | |
223 | } | |
224 | ||
225 | static int opal_fadump_setup_metadata(struct fw_dump *fadump_conf) | |
226 | { | |
227 | int err = 0; | |
228 | s64 ret; | |
229 | ||
230 | /* | |
231 | * Use the last page(s) in FADump memory reservation for | |
232 | * kernel metadata. | |
233 | */ | |
234 | fadump_conf->kernel_metadata = (fadump_conf->reserve_dump_area_start + | |
235 | fadump_conf->reserve_dump_area_size - | |
236 | opal_fadump_get_metadata_size()); | |
237 | pr_info("Kernel metadata addr: %llx\n", fadump_conf->kernel_metadata); | |
238 | ||
239 | /* Initialize kernel metadata before registering the address with f/w */ | |
240 | opal_fdm = __va(fadump_conf->kernel_metadata); | |
241 | opal_fadump_init_metadata(opal_fdm); | |
242 | ||
243 | /* | |
244 | * Register metadata address with f/w. Can be retrieved in | |
245 | * the capture kernel. | |
246 | */ | |
247 | ret = opal_mpipl_register_tag(OPAL_MPIPL_TAG_KERNEL, | |
248 | fadump_conf->kernel_metadata); | |
249 | if (ret != OPAL_SUCCESS) { | |
250 | pr_err("Failed to set kernel metadata tag!\n"); | |
251 | err = -EPERM; | |
252 | } | |
253 | ||
bec53196 HB |
254 | /* |
255 | * Register boot memory top address with f/w. Should be retrieved | |
256 | * by a kernel that intends to preserve crash'ed kernel's memory. | |
257 | */ | |
258 | ret = opal_mpipl_register_tag(OPAL_MPIPL_TAG_BOOT_MEM, | |
7dee93a9 | 259 | fadump_conf->boot_mem_top); |
bec53196 HB |
260 | if (ret != OPAL_SUCCESS) { |
261 | pr_err("Failed to set boot memory tag!\n"); | |
262 | err = -EPERM; | |
263 | } | |
264 | ||
742a265a | 265 | return err; |
41df5928 HB |
266 | } |
267 | ||
7b1b3b48 HB |
268 | static u64 opal_fadump_get_bootmem_min(void) |
269 | { | |
270 | return OPAL_FADUMP_MIN_BOOT_MEM; | |
271 | } | |
272 | ||
41df5928 HB |
273 | static int opal_fadump_register(struct fw_dump *fadump_conf) |
274 | { | |
a20a8fa4 | 275 | s64 rc = OPAL_PARAMETER; |
b74196af | 276 | u16 registered_regs; |
a20a8fa4 HB |
277 | int i, err = -EIO; |
278 | ||
b74196af HB |
279 | registered_regs = be16_to_cpu(opal_fdm->registered_regions); |
280 | for (i = 0; i < be16_to_cpu(opal_fdm->region_cnt); i++) { | |
a20a8fa4 | 281 | rc = opal_mpipl_update(OPAL_MPIPL_ADD_RANGE, |
b74196af HB |
282 | be64_to_cpu(opal_fdm->rgn[i].src), |
283 | be64_to_cpu(opal_fdm->rgn[i].dest), | |
284 | be64_to_cpu(opal_fdm->rgn[i].size)); | |
a20a8fa4 HB |
285 | if (rc != OPAL_SUCCESS) |
286 | break; | |
287 | ||
b74196af | 288 | registered_regs++; |
a20a8fa4 | 289 | } |
b74196af | 290 | opal_fdm->registered_regions = cpu_to_be16(registered_regs); |
a20a8fa4 HB |
291 | |
292 | switch (rc) { | |
293 | case OPAL_SUCCESS: | |
294 | pr_info("Registration is successful!\n"); | |
295 | fadump_conf->dump_registered = 1; | |
296 | err = 0; | |
297 | break; | |
298 | case OPAL_RESOURCE: | |
299 | /* If MAX regions limit in f/w is hit, warn and proceed. */ | |
300 | pr_warn("%d regions could not be registered for MPIPL as MAX limit is reached!\n", | |
b74196af HB |
301 | (be16_to_cpu(opal_fdm->region_cnt) - |
302 | be16_to_cpu(opal_fdm->registered_regions))); | |
a20a8fa4 HB |
303 | fadump_conf->dump_registered = 1; |
304 | err = 0; | |
305 | break; | |
306 | case OPAL_PARAMETER: | |
307 | pr_err("Failed to register. Parameter Error(%lld).\n", rc); | |
308 | break; | |
309 | case OPAL_HARDWARE: | |
310 | pr_err("Support not available.\n"); | |
311 | fadump_conf->fadump_supported = 0; | |
312 | fadump_conf->fadump_enabled = 0; | |
313 | break; | |
314 | default: | |
315 | pr_err("Failed to register. Unknown Error(%lld).\n", rc); | |
316 | break; | |
317 | } | |
318 | ||
319 | /* | |
320 | * If some regions were registered before OPAL_MPIPL_ADD_RANGE | |
321 | * OPAL call failed, unregister all regions. | |
322 | */ | |
b74196af | 323 | if ((err < 0) && (be16_to_cpu(opal_fdm->registered_regions) > 0)) |
a20a8fa4 HB |
324 | opal_fadump_unregister(fadump_conf); |
325 | ||
326 | return err; | |
41df5928 HB |
327 | } |
328 | ||
329 | static int opal_fadump_unregister(struct fw_dump *fadump_conf) | |
330 | { | |
a20a8fa4 HB |
331 | s64 rc; |
332 | ||
333 | rc = opal_mpipl_update(OPAL_MPIPL_REMOVE_ALL, 0, 0, 0); | |
334 | if (rc) { | |
335 | pr_err("Failed to un-register - unexpected Error(%lld).\n", rc); | |
336 | return -EIO; | |
337 | } | |
338 | ||
b74196af | 339 | opal_fdm->registered_regions = cpu_to_be16(0); |
a20a8fa4 HB |
340 | fadump_conf->dump_registered = 0; |
341 | return 0; | |
41df5928 HB |
342 | } |
343 | ||
344 | static int opal_fadump_invalidate(struct fw_dump *fadump_conf) | |
345 | { | |
a4e2e2ca HB |
346 | s64 rc; |
347 | ||
348 | rc = opal_mpipl_update(OPAL_MPIPL_FREE_PRESERVED_MEMORY, 0, 0, 0); | |
349 | if (rc) { | |
350 | pr_err("Failed to invalidate - unexpected Error(%lld).\n", rc); | |
351 | return -EIO; | |
352 | } | |
353 | ||
354 | fadump_conf->dump_active = 0; | |
355 | opal_fdm_active = NULL; | |
356 | return 0; | |
41df5928 HB |
357 | } |
358 | ||
2790d01d HB |
359 | static void opal_fadump_cleanup(struct fw_dump *fadump_conf) |
360 | { | |
361 | s64 ret; | |
362 | ||
363 | ret = opal_mpipl_register_tag(OPAL_MPIPL_TAG_KERNEL, 0); | |
364 | if (ret != OPAL_SUCCESS) | |
365 | pr_warn("Could not reset (%llu) kernel metadata tag!\n", ret); | |
366 | } | |
367 | ||
5000a17a HB |
368 | /* |
369 | * Verify if CPU state data is available. If available, do a bit of sanity | |
370 | * checking before processing this data. | |
371 | */ | |
372 | static bool __init is_opal_fadump_cpu_data_valid(struct fw_dump *fadump_conf) | |
373 | { | |
374 | if (!opal_cpu_metadata) | |
375 | return false; | |
376 | ||
377 | fadump_conf->cpu_state_data_version = | |
378 | be32_to_cpu(opal_cpu_metadata->cpu_data_version); | |
379 | fadump_conf->cpu_state_entry_size = | |
380 | be32_to_cpu(opal_cpu_metadata->cpu_data_size); | |
381 | fadump_conf->cpu_state_dest_vaddr = | |
382 | (u64)__va(be64_to_cpu(opal_cpu_metadata->region[0].dest)); | |
383 | fadump_conf->cpu_state_data_size = | |
384 | be64_to_cpu(opal_cpu_metadata->region[0].size); | |
385 | ||
386 | if (fadump_conf->cpu_state_data_version != HDAT_FADUMP_CPU_DATA_VER) { | |
387 | pr_warn("Supported CPU state data version: %u, found: %d!\n", | |
388 | HDAT_FADUMP_CPU_DATA_VER, | |
389 | fadump_conf->cpu_state_data_version); | |
390 | pr_warn("WARNING: F/W using newer CPU state data format!!\n"); | |
391 | } | |
392 | ||
393 | if ((fadump_conf->cpu_state_dest_vaddr == 0) || | |
394 | (fadump_conf->cpu_state_entry_size == 0) || | |
395 | (fadump_conf->cpu_state_entry_size > | |
396 | fadump_conf->cpu_state_data_size)) { | |
397 | pr_err("CPU state data is invalid. Ignoring!\n"); | |
398 | return false; | |
399 | } | |
400 | ||
401 | return true; | |
402 | } | |
403 | ||
2a1b06dd HB |
404 | /* |
405 | * Convert CPU state data saved at the time of crash into ELF notes. | |
406 | * | |
5000a17a HB |
407 | * While the crashing CPU's register data is saved by the kernel, CPU state |
408 | * data for all CPUs is saved by f/w. In CPU state data provided by f/w, | |
409 | * each register entry is of 16 bytes, a numerical identifier along with | |
410 | * a GPR/SPR flag in the first 8 bytes and the register value in the next | |
411 | * 8 bytes. For more details refer to F/W documentation. If this data is | |
412 | * missing or in unsupported format, append crashing CPU's register data | |
413 | * saved by the kernel in the PT_NOTE, to have something to work with in | |
414 | * the vmcore file. | |
2a1b06dd HB |
415 | */ |
416 | static int __init | |
417 | opal_fadump_build_cpu_notes(struct fw_dump *fadump_conf, | |
418 | struct fadump_crash_info_header *fdh) | |
419 | { | |
5000a17a HB |
420 | u32 thread_pir, size_per_thread, regs_offset, regs_cnt, reg_esize; |
421 | struct hdat_fadump_thread_hdr *thdr; | |
422 | bool is_cpu_data_valid = false; | |
2a1b06dd | 423 | u32 num_cpus = 1, *note_buf; |
5000a17a HB |
424 | struct pt_regs regs; |
425 | char *bufp; | |
426 | int rc, i; | |
427 | ||
428 | if (is_opal_fadump_cpu_data_valid(fadump_conf)) { | |
429 | size_per_thread = fadump_conf->cpu_state_entry_size; | |
430 | num_cpus = (fadump_conf->cpu_state_data_size / size_per_thread); | |
431 | bufp = __va(fadump_conf->cpu_state_dest_vaddr); | |
432 | is_cpu_data_valid = true; | |
433 | } | |
2a1b06dd | 434 | |
2a1b06dd HB |
435 | rc = fadump_setup_cpu_notes_buf(num_cpus); |
436 | if (rc != 0) | |
437 | return rc; | |
438 | ||
439 | note_buf = (u32 *)fadump_conf->cpu_notes_buf_vaddr; | |
5000a17a HB |
440 | if (!is_cpu_data_valid) |
441 | goto out; | |
442 | ||
443 | /* | |
444 | * Offset for register entries, entry size and registers count is | |
445 | * duplicated in every thread header in keeping with HDAT format. | |
446 | * Use these values from the first thread header. | |
447 | */ | |
448 | thdr = (struct hdat_fadump_thread_hdr *)bufp; | |
449 | regs_offset = (offsetof(struct hdat_fadump_thread_hdr, offset) + | |
450 | be32_to_cpu(thdr->offset)); | |
451 | reg_esize = be32_to_cpu(thdr->esize); | |
452 | regs_cnt = be32_to_cpu(thdr->ecnt); | |
453 | ||
454 | pr_debug("--------CPU State Data------------\n"); | |
455 | pr_debug("NumCpus : %u\n", num_cpus); | |
456 | pr_debug("\tOffset: %u, Entry size: %u, Cnt: %u\n", | |
457 | regs_offset, reg_esize, regs_cnt); | |
458 | ||
459 | for (i = 0; i < num_cpus; i++, bufp += size_per_thread) { | |
460 | thdr = (struct hdat_fadump_thread_hdr *)bufp; | |
461 | ||
462 | thread_pir = be32_to_cpu(thdr->pir); | |
463 | pr_debug("[%04d] PIR: 0x%x, core state: 0x%02x\n", | |
464 | i, thread_pir, thdr->core_state); | |
465 | ||
466 | /* | |
467 | * If this is kernel initiated crash, crashing_cpu would be set | |
468 | * appropriately and register data of the crashing CPU saved by | |
469 | * crashing kernel. Add this saved register data of crashing CPU | |
470 | * to elf notes and populate the pt_regs for the remaining CPUs | |
471 | * from register state data provided by firmware. | |
472 | */ | |
473 | if (fdh->crashing_cpu == thread_pir) { | |
474 | note_buf = fadump_regs_to_elf_notes(note_buf, | |
475 | &fdh->regs); | |
476 | pr_debug("Crashing CPU PIR: 0x%x - R1 : 0x%lx, NIP : 0x%lx\n", | |
477 | fdh->crashing_cpu, fdh->regs.gpr[1], | |
478 | fdh->regs.nip); | |
479 | continue; | |
480 | } | |
481 | ||
482 | /* | |
483 | * Register state data of MAX cores is provided by firmware, | |
484 | * but some of this cores may not be active. So, while | |
485 | * processing register state data, check core state and | |
486 | * skip threads that belong to inactive cores. | |
487 | */ | |
488 | if (thdr->core_state == HDAT_FADUMP_CORE_INACTIVE) | |
489 | continue; | |
490 | ||
491 | opal_fadump_read_regs((bufp + regs_offset), regs_cnt, | |
6f713d18 | 492 | reg_esize, true, ®s); |
5000a17a HB |
493 | note_buf = fadump_regs_to_elf_notes(note_buf, ®s); |
494 | pr_debug("CPU PIR: 0x%x - R1 : 0x%lx, NIP : 0x%lx\n", | |
495 | thread_pir, regs.gpr[1], regs.nip); | |
496 | } | |
497 | ||
498 | out: | |
499 | /* | |
500 | * CPU state data is invalid/unsupported. Try appending crashing CPU's | |
501 | * register data, if it is saved by the kernel. | |
502 | */ | |
503 | if (fadump_conf->cpu_notes_buf_vaddr == (u64)note_buf) { | |
504 | if (fdh->crashing_cpu == FADUMP_CPU_UNKNOWN) { | |
505 | fadump_free_cpu_notes_buf(); | |
506 | return -ENODEV; | |
507 | } | |
508 | ||
509 | pr_warn("WARNING: appending only crashing CPU's register data\n"); | |
510 | note_buf = fadump_regs_to_elf_notes(note_buf, &(fdh->regs)); | |
511 | } | |
512 | ||
2a1b06dd HB |
513 | final_note(note_buf); |
514 | ||
515 | pr_debug("Updating elfcore header (%llx) with cpu notes\n", | |
c6c5b14d SJ |
516 | fadump_conf->elfcorehdr_addr); |
517 | fadump_update_elfcore_header((char *)fadump_conf->elfcorehdr_addr); | |
2a1b06dd HB |
518 | return 0; |
519 | } | |
520 | ||
41df5928 HB |
521 | static int __init opal_fadump_process(struct fw_dump *fadump_conf) |
522 | { | |
2a1b06dd HB |
523 | struct fadump_crash_info_header *fdh; |
524 | int rc = -EINVAL; | |
525 | ||
526 | if (!opal_fdm_active || !fadump_conf->fadumphdr_addr) | |
527 | return rc; | |
528 | ||
2a1b06dd | 529 | fdh = __va(fadump_conf->fadumphdr_addr); |
2a1b06dd | 530 | |
6f713d18 HB |
531 | #ifdef CONFIG_OPAL_CORE |
532 | /* | |
533 | * If this is a kernel initiated crash, crashing_cpu would be set | |
534 | * appropriately and register data of the crashing CPU saved by | |
535 | * crashing kernel. Add this saved register data of crashing CPU | |
536 | * to elf notes and populate the pt_regs for the remaining CPUs | |
537 | * from register state data provided by firmware. | |
538 | */ | |
539 | if (fdh->crashing_cpu != FADUMP_CPU_UNKNOWN) | |
540 | kernel_initiated = true; | |
541 | #endif | |
542 | ||
c6c5b14d | 543 | return opal_fadump_build_cpu_notes(fadump_conf, fdh); |
41df5928 HB |
544 | } |
545 | ||
546 | static void opal_fadump_region_show(struct fw_dump *fadump_conf, | |
547 | struct seq_file *m) | |
548 | { | |
2a1b06dd | 549 | const struct opal_fadump_mem_struct *fdm_ptr; |
742a265a HB |
550 | u64 dumped_bytes = 0; |
551 | int i; | |
552 | ||
2a1b06dd HB |
553 | if (fadump_conf->dump_active) |
554 | fdm_ptr = opal_fdm_active; | |
555 | else | |
556 | fdm_ptr = opal_fdm; | |
557 | ||
b74196af | 558 | for (i = 0; i < be16_to_cpu(fdm_ptr->region_cnt); i++) { |
2a1b06dd HB |
559 | /* |
560 | * Only regions that are registered for MPIPL | |
561 | * would have dump data. | |
562 | */ | |
563 | if ((fadump_conf->dump_active) && | |
b74196af HB |
564 | (i < be16_to_cpu(fdm_ptr->registered_regions))) |
565 | dumped_bytes = be64_to_cpu(fdm_ptr->rgn[i].size); | |
2a1b06dd | 566 | |
742a265a | 567 | seq_printf(m, "DUMP: Src: %#016llx, Dest: %#016llx, ", |
b74196af HB |
568 | be64_to_cpu(fdm_ptr->rgn[i].src), |
569 | be64_to_cpu(fdm_ptr->rgn[i].dest)); | |
742a265a | 570 | seq_printf(m, "Size: %#llx, Dumped: %#llx bytes\n", |
b74196af | 571 | be64_to_cpu(fdm_ptr->rgn[i].size), dumped_bytes); |
742a265a | 572 | } |
2a1b06dd | 573 | |
a3ceb588 | 574 | /* Dump is active. Show preserved area start address. */ |
2a1b06dd | 575 | if (fadump_conf->dump_active) { |
a3ceb588 HB |
576 | seq_printf(m, "\nMemory above %#016llx is reserved for saving crash dump\n", |
577 | fadump_conf->boot_mem_top); | |
2a1b06dd | 578 | } |
41df5928 HB |
579 | } |
580 | ||
581 | static void opal_fadump_trigger(struct fadump_crash_info_header *fdh, | |
582 | const char *msg) | |
583 | { | |
584 | int rc; | |
585 | ||
5000a17a HB |
586 | /* |
587 | * Unlike on pSeries platform, logical CPU number is not provided | |
588 | * with architected register state data. So, store the crashing | |
589 | * CPU's PIR instead to plug the appropriate register data for | |
590 | * crashing CPU in the vmcore file. | |
591 | */ | |
592 | fdh->crashing_cpu = (u32)mfspr(SPRN_PIR); | |
593 | ||
41df5928 HB |
594 | rc = opal_cec_reboot2(OPAL_REBOOT_MPIPL, msg); |
595 | if (rc == OPAL_UNSUPPORTED) { | |
596 | pr_emerg("Reboot type %d not supported.\n", | |
597 | OPAL_REBOOT_MPIPL); | |
598 | } else if (rc == OPAL_HARDWARE) | |
599 | pr_emerg("No backend support for MPIPL!\n"); | |
600 | } | |
601 | ||
78d5cc15 HB |
602 | /* FADUMP_MAX_MEM_REGS or lower */ |
603 | static int opal_fadump_max_boot_mem_rgns(void) | |
604 | { | |
605 | return FADUMP_MAX_MEM_REGS; | |
606 | } | |
607 | ||
41df5928 HB |
608 | static struct fadump_ops opal_fadump_ops = { |
609 | .fadump_init_mem_struct = opal_fadump_init_mem_struct, | |
742a265a HB |
610 | .fadump_get_metadata_size = opal_fadump_get_metadata_size, |
611 | .fadump_setup_metadata = opal_fadump_setup_metadata, | |
7b1b3b48 | 612 | .fadump_get_bootmem_min = opal_fadump_get_bootmem_min, |
41df5928 HB |
613 | .fadump_register = opal_fadump_register, |
614 | .fadump_unregister = opal_fadump_unregister, | |
615 | .fadump_invalidate = opal_fadump_invalidate, | |
2790d01d | 616 | .fadump_cleanup = opal_fadump_cleanup, |
41df5928 HB |
617 | .fadump_process = opal_fadump_process, |
618 | .fadump_region_show = opal_fadump_region_show, | |
619 | .fadump_trigger = opal_fadump_trigger, | |
78d5cc15 | 620 | .fadump_max_boot_mem_rgns = opal_fadump_max_boot_mem_rgns, |
41df5928 HB |
621 | }; |
622 | ||
623 | void __init opal_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node) | |
624 | { | |
2a1b06dd | 625 | const __be32 *prop; |
41df5928 | 626 | unsigned long dn; |
b74196af | 627 | __be64 be_addr; |
2a1b06dd | 628 | u64 addr = 0; |
7b1b3b48 | 629 | int i, len; |
2a1b06dd HB |
630 | s64 ret; |
631 | ||
41df5928 HB |
632 | /* |
633 | * Check if Firmware-Assisted Dump is supported. if yes, check | |
634 | * if dump has been initiated on last reboot. | |
635 | */ | |
636 | dn = of_get_flat_dt_subnode_by_name(node, "dump"); | |
637 | if (dn == -FDT_ERR_NOTFOUND) { | |
638 | pr_debug("FADump support is missing!\n"); | |
639 | return; | |
640 | } | |
641 | ||
642 | if (!of_flat_dt_is_compatible(dn, "ibm,opal-dump")) { | |
643 | pr_err("Support missing for this f/w version!\n"); | |
644 | return; | |
645 | } | |
646 | ||
7b1b3b48 HB |
647 | prop = of_get_flat_dt_prop(dn, "fw-load-area", &len); |
648 | if (prop) { | |
649 | /* | |
650 | * Each f/w load area is an (address,size) pair, | |
651 | * 2 cells each, totalling 4 cells per range. | |
652 | */ | |
653 | for (i = 0; i < len / (sizeof(*prop) * 4); i++) { | |
654 | u64 base, end; | |
655 | ||
656 | base = of_read_number(prop + (i * 4) + 0, 2); | |
657 | end = base; | |
658 | end += of_read_number(prop + (i * 4) + 2, 2); | |
659 | if (end > OPAL_FADUMP_MIN_BOOT_MEM) { | |
660 | pr_err("F/W load area: 0x%llx-0x%llx\n", | |
661 | base, end); | |
662 | pr_err("F/W version not supported!\n"); | |
663 | return; | |
664 | } | |
665 | } | |
666 | } | |
667 | ||
683eab94 HB |
668 | fadump_conf->ops = &opal_fadump_ops; |
669 | fadump_conf->fadump_supported = 1; | |
670 | /* TODO: Add support to pass additional parameters */ | |
671 | fadump_conf->param_area_supported = 0; | |
2a1b06dd | 672 | |
7dee93a9 HB |
673 | /* |
674 | * Firmware supports 32-bit field for size. Align it to PAGE_SIZE | |
675 | * and request firmware to copy multiple kernel boot memory regions. | |
676 | */ | |
e96d904e | 677 | fadump_conf->max_copy_size = ALIGN_DOWN(U32_MAX, PAGE_SIZE); |
7dee93a9 | 678 | |
2a1b06dd HB |
679 | /* |
680 | * Check if dump has been initiated on last reboot. | |
681 | */ | |
682 | prop = of_get_flat_dt_prop(dn, "mpipl-boot", NULL); | |
683 | if (!prop) | |
684 | return; | |
685 | ||
b74196af HB |
686 | ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_KERNEL, &be_addr); |
687 | if ((ret != OPAL_SUCCESS) || !be_addr) { | |
2a1b06dd HB |
688 | pr_err("Failed to get Kernel metadata (%lld)\n", ret); |
689 | return; | |
690 | } | |
691 | ||
b74196af | 692 | addr = be64_to_cpu(be_addr); |
2a1b06dd HB |
693 | pr_debug("Kernel metadata addr: %llx\n", addr); |
694 | ||
695 | opal_fdm_active = __va(addr); | |
696 | if (opal_fdm_active->version != OPAL_FADUMP_VERSION) { | |
697 | pr_warn("Supported kernel metadata version: %u, found: %d!\n", | |
698 | OPAL_FADUMP_VERSION, opal_fdm_active->version); | |
699 | pr_warn("WARNING: Kernel metadata format mismatch identified! Core file maybe corrupted..\n"); | |
700 | } | |
701 | ||
702 | /* Kernel regions not registered with f/w for MPIPL */ | |
b74196af | 703 | if (be16_to_cpu(opal_fdm_active->registered_regions) == 0) { |
2a1b06dd HB |
704 | opal_fdm_active = NULL; |
705 | return; | |
706 | } | |
707 | ||
b74196af HB |
708 | ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_CPU, &be_addr); |
709 | if (be_addr) { | |
710 | addr = be64_to_cpu(be_addr); | |
5000a17a HB |
711 | pr_debug("CPU metadata addr: %llx\n", addr); |
712 | opal_cpu_metadata = __va(addr); | |
713 | } | |
714 | ||
2a1b06dd HB |
715 | pr_info("Firmware-assisted dump is active.\n"); |
716 | fadump_conf->dump_active = 1; | |
717 | opal_fadump_get_config(fadump_conf, opal_fdm_active); | |
41df5928 | 718 | } |
bec53196 | 719 | #endif /* !CONFIG_PRESERVE_FA_DUMP */ |