Commit | Line | Data |
---|---|---|
1a59d1b8 | 1 | // SPDX-License-Identifier: GPL-2.0-or-later |
eb39c880 MS |
2 | /* |
3 | * Firmware Assisted dump: A robust mechanism to get reliable kernel crash | |
4 | * dump with assistance from firmware. This approach does not use kexec, | |
5 | * instead firmware assists in booting the kdump kernel while preserving | |
6 | * memory contents. The most of the code implementation has been adapted | |
7 | * from phyp assisted dump implementation written by Linas Vepstas and | |
8 | * Manish Ahuja | |
9 | * | |
eb39c880 MS |
10 | * Copyright 2011 IBM Corporation |
11 | * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> | |
12 | */ | |
13 | ||
14 | #undef DEBUG | |
15 | #define pr_fmt(fmt) "fadump: " fmt | |
16 | ||
17 | #include <linux/string.h> | |
18 | #include <linux/memblock.h> | |
3ccc00a7 | 19 | #include <linux/delay.h> |
3ccc00a7 | 20 | #include <linux/seq_file.h> |
2df173d9 | 21 | #include <linux/crash_dump.h> |
b500afff MS |
22 | #include <linux/kobject.h> |
23 | #include <linux/sysfs.h> | |
a5818313 | 24 | #include <linux/slab.h> |
a4e92ce8 | 25 | #include <linux/cma.h> |
45d0ba52 | 26 | #include <linux/hugetlb.h> |
dbf77fed | 27 | #include <linux/debugfs.h> |
e6f6390a CL |
28 | #include <linux/of.h> |
29 | #include <linux/of_fdt.h> | |
eb39c880 MS |
30 | |
31 | #include <asm/page.h> | |
eb39c880 | 32 | #include <asm/fadump.h> |
ca986d7f | 33 | #include <asm/fadump-internal.h> |
cad3c834 | 34 | #include <asm/setup.h> |
cbd3d5ba | 35 | #include <asm/interrupt.h> |
eb39c880 | 36 | |
ba608c4f SJ |
37 | /* |
38 | * The CPU who acquired the lock to trigger the fadump crash should | |
39 | * wait for other CPUs to enter. | |
40 | * | |
41 | * The timeout is in milliseconds. | |
42 | */ | |
43 | #define CRASH_TIMEOUT 500 | |
44 | ||
eb39c880 | 45 | static struct fw_dump fw_dump; |
3ccc00a7 | 46 | |
b2a815a5 HB |
47 | static void __init fadump_reserve_crash_area(u64 base); |
48 | ||
bec53196 | 49 | #ifndef CONFIG_PRESERVE_FA_DUMP |
5f987cae | 50 | |
2e341f56 ME |
51 | static struct kobject *fadump_kobj; |
52 | ||
5f987cae | 53 | static atomic_t cpus_in_fadump; |
3ccc00a7 | 54 | static DEFINE_MUTEX(fadump_mutex); |
5f987cae | 55 | |
02c04e37 HB |
56 | #define RESERVED_RNGS_SZ 16384 /* 16K - 128 entries */ |
57 | #define RESERVED_RNGS_CNT (RESERVED_RNGS_SZ / \ | |
58 | sizeof(struct fadump_memory_range)) | |
59 | static struct fadump_memory_range rngs[RESERVED_RNGS_CNT]; | |
2e341f56 ME |
60 | static struct fadump_mrange_info |
61 | reserved_mrange_info = { "reserved", rngs, RESERVED_RNGS_SZ, 0, RESERVED_RNGS_CNT, true }; | |
02c04e37 HB |
62 | |
63 | static void __init early_init_dt_scan_reserved_ranges(unsigned long node); | |
eb39c880 | 64 | |
a4e92ce8 | 65 | #ifdef CONFIG_CMA |
0226e552 HB |
66 | static struct cma *fadump_cma; |
67 | ||
a4e92ce8 MS |
68 | /* |
69 | * fadump_cma_init() - Initialize CMA area from a fadump reserved memory | |
70 | * | |
71 | * This function initializes CMA area from fadump reserved memory. | |
72 | * The total size of fadump reserved memory covers for boot memory size | |
73 | * + cpu data size + hpte size and metadata. | |
74 | * Initialize only the area equivalent to boot memory size for CMA use. | |
887f56a0 RD |
75 | * The remaining portion of fadump reserved memory will be not given |
76 | * to CMA and pages for those will stay reserved. boot memory size is | |
a4e92ce8 MS |
77 | * aligned per CMA requirement to satisy cma_init_reserved_mem() call. |
78 | * But for some reason even if it fails we still have the memory reservation | |
79 | * with us and we can still continue doing fadump. | |
80 | */ | |
2e341f56 | 81 | static int __init fadump_cma_init(void) |
a4e92ce8 MS |
82 | { |
83 | unsigned long long base, size; | |
84 | int rc; | |
85 | ||
86 | if (!fw_dump.fadump_enabled) | |
87 | return 0; | |
88 | ||
89 | /* | |
90 | * Do not use CMA if user has provided fadump=nocma kernel parameter. | |
91 | * Return 1 to continue with fadump old behaviour. | |
92 | */ | |
93 | if (fw_dump.nocma) | |
94 | return 1; | |
95 | ||
96 | base = fw_dump.reserve_dump_area_start; | |
97 | size = fw_dump.boot_memory_size; | |
98 | ||
99 | if (!size) | |
100 | return 0; | |
101 | ||
102 | rc = cma_init_reserved_mem(base, size, 0, "fadump_cma", &fadump_cma); | |
103 | if (rc) { | |
104 | pr_err("Failed to init cma area for firmware-assisted dump,%d\n", rc); | |
105 | /* | |
106 | * Though the CMA init has failed we still have memory | |
107 | * reservation with us. The reserved memory will be | |
108 | * blocked from production system usage. Hence return 1, | |
109 | * so that we can continue with fadump. | |
110 | */ | |
111 | return 1; | |
112 | } | |
113 | ||
ee97347f HB |
114 | /* |
115 | * If CMA activation fails, keep the pages reserved, instead of | |
116 | * exposing them to buddy allocator. Same as 'fadump=nocma' case. | |
117 | */ | |
118 | cma_reserve_pages_on_error(fadump_cma); | |
119 | ||
a4e92ce8 MS |
120 | /* |
121 | * So we now have successfully initialized cma area for fadump. | |
122 | */ | |
123 | pr_info("Initialized 0x%lx bytes cma area at %ldMB from 0x%lx " | |
124 | "bytes of memory reserved for firmware-assisted dump\n", | |
125 | cma_get_size(fadump_cma), | |
126 | (unsigned long)cma_get_base(fadump_cma) >> 20, | |
127 | fw_dump.reserve_dump_area_size); | |
128 | return 1; | |
129 | } | |
130 | #else | |
131 | static int __init fadump_cma_init(void) { return 1; } | |
132 | #endif /* CONFIG_CMA */ | |
133 | ||
eb39c880 | 134 | /* Scan the Firmware Assisted dump configuration details. */ |
f3512011 HB |
135 | int __init early_init_dt_scan_fw_dump(unsigned long node, const char *uname, |
136 | int depth, void *data) | |
eb39c880 | 137 | { |
02c04e37 HB |
138 | if (depth == 0) { |
139 | early_init_dt_scan_reserved_ranges(node); | |
140 | return 0; | |
141 | } | |
142 | ||
41df5928 | 143 | if (depth != 1) |
eb39c880 MS |
144 | return 0; |
145 | ||
41df5928 HB |
146 | if (strcmp(uname, "rtas") == 0) { |
147 | rtas_fadump_dt_scan(&fw_dump, node); | |
148 | return 1; | |
149 | } | |
150 | ||
151 | if (strcmp(uname, "ibm,opal") == 0) { | |
152 | opal_fadump_dt_scan(&fw_dump, node); | |
153 | return 1; | |
154 | } | |
155 | ||
156 | return 0; | |
eb39c880 MS |
157 | } |
158 | ||
eae0dfcc HB |
159 | /* |
160 | * If fadump is registered, check if the memory provided | |
0db6896f | 161 | * falls within boot memory area and reserved memory area. |
eae0dfcc | 162 | */ |
becd91d9 | 163 | int is_fadump_memory_area(u64 addr, unsigned long size) |
eae0dfcc | 164 | { |
becd91d9 | 165 | u64 d_start, d_end; |
0db6896f | 166 | |
eae0dfcc HB |
167 | if (!fw_dump.dump_registered) |
168 | return 0; | |
169 | ||
becd91d9 HB |
170 | if (!size) |
171 | return 0; | |
172 | ||
173 | d_start = fw_dump.reserve_dump_area_start; | |
174 | d_end = d_start + fw_dump.reserve_dump_area_size; | |
0db6896f MS |
175 | if (((addr + size) > d_start) && (addr <= d_end)) |
176 | return 1; | |
177 | ||
7dee93a9 | 178 | return (addr <= fw_dump.boot_mem_top); |
eae0dfcc HB |
179 | } |
180 | ||
6fcd6baa NP |
181 | int should_fadump_crash(void) |
182 | { | |
183 | if (!fw_dump.dump_registered || !fw_dump.fadumphdr_addr) | |
184 | return 0; | |
185 | return 1; | |
186 | } | |
187 | ||
3ccc00a7 MS |
188 | int is_fadump_active(void) |
189 | { | |
190 | return fw_dump.dump_active; | |
191 | } | |
192 | ||
a5a05b91 | 193 | /* |
961cf26a HB |
194 | * Returns true, if there are no holes in memory area between d_start to d_end, |
195 | * false otherwise. | |
a5a05b91 | 196 | */ |
961cf26a | 197 | static bool is_fadump_mem_area_contiguous(u64 d_start, u64 d_end) |
a5a05b91 | 198 | { |
b10d6bca | 199 | phys_addr_t reg_start, reg_end; |
961cf26a | 200 | bool ret = false; |
b10d6bca | 201 | u64 i, start, end; |
a5a05b91 | 202 | |
b10d6bca MR |
203 | for_each_mem_range(i, ®_start, ®_end) { |
204 | start = max_t(u64, d_start, reg_start); | |
205 | end = min_t(u64, d_end, reg_end); | |
961cf26a HB |
206 | if (d_start < end) { |
207 | /* Memory hole from d_start to start */ | |
208 | if (start > d_start) | |
a5a05b91 HB |
209 | break; |
210 | ||
961cf26a HB |
211 | if (end == d_end) { |
212 | ret = true; | |
a5a05b91 HB |
213 | break; |
214 | } | |
215 | ||
961cf26a | 216 | d_start = end + 1; |
a5a05b91 HB |
217 | } |
218 | } | |
219 | ||
220 | return ret; | |
221 | } | |
222 | ||
f86593be MS |
223 | /* |
224 | * Returns true, if there are no holes in reserved memory area, | |
225 | * false otherwise. | |
226 | */ | |
7f0ad11d | 227 | bool is_fadump_reserved_mem_contiguous(void) |
f86593be | 228 | { |
961cf26a | 229 | u64 d_start, d_end; |
f86593be | 230 | |
961cf26a HB |
231 | d_start = fw_dump.reserve_dump_area_start; |
232 | d_end = d_start + fw_dump.reserve_dump_area_size; | |
233 | return is_fadump_mem_area_contiguous(d_start, d_end); | |
f86593be MS |
234 | } |
235 | ||
3ccc00a7 | 236 | /* Print firmware assisted dump configurations for debugging purpose. */ |
d276960d | 237 | static void __init fadump_show_config(void) |
3ccc00a7 | 238 | { |
7dee93a9 HB |
239 | int i; |
240 | ||
3ccc00a7 MS |
241 | pr_debug("Support for firmware-assisted dump (fadump): %s\n", |
242 | (fw_dump.fadump_supported ? "present" : "no support")); | |
243 | ||
244 | if (!fw_dump.fadump_supported) | |
245 | return; | |
246 | ||
247 | pr_debug("Fadump enabled : %s\n", | |
248 | (fw_dump.fadump_enabled ? "yes" : "no")); | |
249 | pr_debug("Dump Active : %s\n", | |
250 | (fw_dump.dump_active ? "yes" : "no")); | |
251 | pr_debug("Dump section sizes:\n"); | |
252 | pr_debug(" CPU state data size: %lx\n", fw_dump.cpu_state_data_size); | |
253 | pr_debug(" HPTE region size : %lx\n", fw_dump.hpte_region_size); | |
7dee93a9 HB |
254 | pr_debug(" Boot memory size : %lx\n", fw_dump.boot_memory_size); |
255 | pr_debug(" Boot memory top : %llx\n", fw_dump.boot_mem_top); | |
256 | pr_debug("Boot memory regions cnt: %llx\n", fw_dump.boot_mem_regs_cnt); | |
257 | for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) { | |
258 | pr_debug("[%03d] base = %llx, size = %llx\n", i, | |
259 | fw_dump.boot_mem_addr[i], fw_dump.boot_mem_sz[i]); | |
260 | } | |
3ccc00a7 MS |
261 | } |
262 | ||
eb39c880 MS |
263 | /** |
264 | * fadump_calculate_reserve_size(): reserve variable boot area 5% of System RAM | |
265 | * | |
266 | * Function to find the largest memory size we need to reserve during early | |
267 | * boot process. This will be the size of the memory that is required for a | |
268 | * kernel to boot successfully. | |
269 | * | |
270 | * This function has been taken from phyp-assisted dump feature implementation. | |
271 | * | |
272 | * returns larger of 256MB or 5% rounded down to multiples of 256MB. | |
273 | * | |
274 | * TODO: Come up with better approach to find out more accurate memory size | |
275 | * that is required for a kernel to boot successfully. | |
276 | * | |
277 | */ | |
fbced154 | 278 | static __init u64 fadump_calculate_reserve_size(void) |
eb39c880 | 279 | { |
7b1b3b48 | 280 | u64 base, size, bootmem_min; |
11550dc0 | 281 | int ret; |
eb39c880 | 282 | |
81d9eca5 HB |
283 | if (fw_dump.reserve_bootvar) |
284 | pr_warn("'fadump_reserve_mem=' parameter is deprecated in favor of 'crashkernel=' parameter.\n"); | |
285 | ||
eb39c880 | 286 | /* |
11550dc0 | 287 | * Check if the size is specified through crashkernel= cmdline |
e7467dc6 HB |
288 | * option. If yes, then use that but ignore base as fadump reserves |
289 | * memory at a predefined offset. | |
eb39c880 | 290 | */ |
11550dc0 | 291 | ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), |
a9e1a3d8 | 292 | &size, &base, NULL, NULL); |
11550dc0 | 293 | if (ret == 0 && size > 0) { |
48a316e3 HB |
294 | unsigned long max_size; |
295 | ||
81d9eca5 HB |
296 | if (fw_dump.reserve_bootvar) |
297 | pr_info("Using 'crashkernel=' parameter for memory reservation.\n"); | |
298 | ||
11550dc0 | 299 | fw_dump.reserve_bootvar = (unsigned long)size; |
48a316e3 HB |
300 | |
301 | /* | |
302 | * Adjust if the boot memory size specified is above | |
303 | * the upper limit. | |
304 | */ | |
305 | max_size = memblock_phys_mem_size() / MAX_BOOT_MEM_RATIO; | |
306 | if (fw_dump.reserve_bootvar > max_size) { | |
307 | fw_dump.reserve_bootvar = max_size; | |
308 | pr_info("Adjusted boot memory size to %luMB\n", | |
309 | (fw_dump.reserve_bootvar >> 20)); | |
310 | } | |
311 | ||
eb39c880 | 312 | return fw_dump.reserve_bootvar; |
81d9eca5 HB |
313 | } else if (fw_dump.reserve_bootvar) { |
314 | /* | |
315 | * 'fadump_reserve_mem=' is being used to reserve memory | |
316 | * for firmware-assisted dump. | |
317 | */ | |
318 | return fw_dump.reserve_bootvar; | |
11550dc0 | 319 | } |
eb39c880 MS |
320 | |
321 | /* divide by 20 to get 5% of value */ | |
48a316e3 | 322 | size = memblock_phys_mem_size() / 20; |
eb39c880 MS |
323 | |
324 | /* round it down in multiples of 256 */ | |
325 | size = size & ~0x0FFFFFFFUL; | |
326 | ||
327 | /* Truncate to memory_limit. We don't want to over reserve the memory.*/ | |
328 | if (memory_limit && size > memory_limit) | |
329 | size = memory_limit; | |
330 | ||
7b1b3b48 HB |
331 | bootmem_min = fw_dump.ops->fadump_get_bootmem_min(); |
332 | return (size > bootmem_min ? size : bootmem_min); | |
eb39c880 MS |
333 | } |
334 | ||
335 | /* | |
336 | * Calculate the total memory size required to be reserved for | |
337 | * firmware-assisted dump registration. | |
338 | */ | |
d276960d | 339 | static unsigned long __init get_fadump_area_size(void) |
eb39c880 MS |
340 | { |
341 | unsigned long size = 0; | |
342 | ||
343 | size += fw_dump.cpu_state_data_size; | |
344 | size += fw_dump.hpte_region_size; | |
9cf3b3a3 HB |
345 | /* |
346 | * Account for pagesize alignment of boot memory area destination address. | |
347 | * This faciliates in mmap reading of first kernel's memory. | |
348 | */ | |
349 | size = PAGE_ALIGN(size); | |
eb39c880 | 350 | size += fw_dump.boot_memory_size; |
2df173d9 | 351 | size += sizeof(struct fadump_crash_info_header); |
742a265a HB |
352 | |
353 | /* This is to hold kernel metadata on platforms that support it */ | |
354 | size += (fw_dump.ops->fadump_get_metadata_size ? | |
355 | fw_dump.ops->fadump_get_metadata_size() : 0); | |
eb39c880 MS |
356 | return size; |
357 | } | |
358 | ||
7dee93a9 HB |
359 | static int __init add_boot_mem_region(unsigned long rstart, |
360 | unsigned long rsize) | |
361 | { | |
78d5cc15 | 362 | int max_boot_mem_rgns = fw_dump.ops->fadump_max_boot_mem_rgns(); |
7dee93a9 HB |
363 | int i = fw_dump.boot_mem_regs_cnt++; |
364 | ||
78d5cc15 HB |
365 | if (fw_dump.boot_mem_regs_cnt > max_boot_mem_rgns) { |
366 | fw_dump.boot_mem_regs_cnt = max_boot_mem_rgns; | |
7dee93a9 HB |
367 | return 0; |
368 | } | |
369 | ||
370 | pr_debug("Added boot memory range[%d] [%#016lx-%#016lx)\n", | |
371 | i, rstart, (rstart + rsize)); | |
372 | fw_dump.boot_mem_addr[i] = rstart; | |
373 | fw_dump.boot_mem_sz[i] = rsize; | |
374 | return 1; | |
375 | } | |
376 | ||
377 | /* | |
378 | * Firmware usually has a hard limit on the data it can copy per region. | |
379 | * Honour that by splitting a memory range into multiple regions. | |
380 | */ | |
381 | static int __init add_boot_mem_regions(unsigned long mstart, | |
382 | unsigned long msize) | |
383 | { | |
384 | unsigned long rstart, rsize, max_size; | |
385 | int ret = 1; | |
386 | ||
387 | rstart = mstart; | |
388 | max_size = fw_dump.max_copy_size ? fw_dump.max_copy_size : msize; | |
389 | while (msize) { | |
390 | if (msize > max_size) | |
391 | rsize = max_size; | |
392 | else | |
393 | rsize = msize; | |
394 | ||
395 | ret = add_boot_mem_region(rstart, rsize); | |
396 | if (!ret) | |
397 | break; | |
398 | ||
399 | msize -= rsize; | |
400 | rstart += rsize; | |
401 | } | |
402 | ||
403 | return ret; | |
404 | } | |
405 | ||
406 | static int __init fadump_get_boot_mem_regions(void) | |
407 | { | |
b10d6bca | 408 | unsigned long size, cur_size, hole_size, last_end; |
7dee93a9 | 409 | unsigned long mem_size = fw_dump.boot_memory_size; |
b10d6bca | 410 | phys_addr_t reg_start, reg_end; |
7dee93a9 | 411 | int ret = 1; |
b10d6bca | 412 | u64 i; |
7dee93a9 HB |
413 | |
414 | fw_dump.boot_mem_regs_cnt = 0; | |
415 | ||
416 | last_end = 0; | |
417 | hole_size = 0; | |
418 | cur_size = 0; | |
b10d6bca MR |
419 | for_each_mem_range(i, ®_start, ®_end) { |
420 | size = reg_end - reg_start; | |
421 | hole_size += (reg_start - last_end); | |
7dee93a9 HB |
422 | |
423 | if ((cur_size + size) >= mem_size) { | |
424 | size = (mem_size - cur_size); | |
b10d6bca | 425 | ret = add_boot_mem_regions(reg_start, size); |
7dee93a9 HB |
426 | break; |
427 | } | |
428 | ||
429 | mem_size -= size; | |
430 | cur_size += size; | |
b10d6bca | 431 | ret = add_boot_mem_regions(reg_start, size); |
7dee93a9 HB |
432 | if (!ret) |
433 | break; | |
434 | ||
b10d6bca | 435 | last_end = reg_end; |
7dee93a9 HB |
436 | } |
437 | fw_dump.boot_mem_top = PAGE_ALIGN(fw_dump.boot_memory_size + hole_size); | |
438 | ||
439 | return ret; | |
440 | } | |
441 | ||
140777a3 HB |
442 | /* |
443 | * Returns true, if the given range overlaps with reserved memory ranges | |
444 | * starting at idx. Also, updates idx to index of overlapping memory range | |
445 | * with the given memory range. | |
446 | * False, otherwise. | |
447 | */ | |
d276960d | 448 | static bool __init overlaps_reserved_ranges(u64 base, u64 end, int *idx) |
140777a3 HB |
449 | { |
450 | bool ret = false; | |
451 | int i; | |
452 | ||
453 | for (i = *idx; i < reserved_mrange_info.mem_range_cnt; i++) { | |
454 | u64 rbase = reserved_mrange_info.mem_ranges[i].base; | |
455 | u64 rend = rbase + reserved_mrange_info.mem_ranges[i].size; | |
456 | ||
457 | if (end <= rbase) | |
458 | break; | |
459 | ||
460 | if ((end > rbase) && (base < rend)) { | |
461 | *idx = i; | |
462 | ret = true; | |
463 | break; | |
464 | } | |
465 | } | |
466 | ||
467 | return ret; | |
468 | } | |
469 | ||
470 | /* | |
471 | * Locate a suitable memory area to reserve memory for FADump. While at it, | |
472 | * lookup reserved-ranges & avoid overlap with them, as they are used by F/W. | |
473 | */ | |
474 | static u64 __init fadump_locate_reserve_mem(u64 base, u64 size) | |
475 | { | |
476 | struct fadump_memory_range *mrngs; | |
477 | phys_addr_t mstart, mend; | |
478 | int idx = 0; | |
479 | u64 i, ret = 0; | |
480 | ||
481 | mrngs = reserved_mrange_info.mem_ranges; | |
482 | for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE, | |
483 | &mstart, &mend, NULL) { | |
484 | pr_debug("%llu) mstart: %llx, mend: %llx, base: %llx\n", | |
485 | i, mstart, mend, base); | |
486 | ||
487 | if (mstart > base) | |
488 | base = PAGE_ALIGN(mstart); | |
489 | ||
490 | while ((mend > base) && ((mend - base) >= size)) { | |
491 | if (!overlaps_reserved_ranges(base, base+size, &idx)) { | |
492 | ret = base; | |
493 | goto out; | |
494 | } | |
495 | ||
496 | base = mrngs[idx].base + mrngs[idx].size; | |
497 | base = PAGE_ALIGN(base); | |
498 | } | |
499 | } | |
500 | ||
501 | out: | |
502 | return ret; | |
503 | } | |
504 | ||
eb39c880 MS |
505 | int __init fadump_reserve_mem(void) |
506 | { | |
140777a3 | 507 | u64 base, size, mem_boundary, bootmem_min; |
6abec12c | 508 | int ret = 1; |
eb39c880 MS |
509 | |
510 | if (!fw_dump.fadump_enabled) | |
511 | return 0; | |
512 | ||
513 | if (!fw_dump.fadump_supported) { | |
6abec12c HB |
514 | pr_info("Firmware-Assisted Dump is not supported on this hardware\n"); |
515 | goto error_out; | |
eb39c880 | 516 | } |
742a265a | 517 | |
3ccc00a7 MS |
518 | /* |
519 | * Initialize boot memory size | |
520 | * If dump is active then we have already calculated the size during | |
521 | * first kernel. | |
522 | */ | |
f3512011 | 523 | if (!fw_dump.dump_active) { |
6abec12c HB |
524 | fw_dump.boot_memory_size = |
525 | PAGE_ALIGN(fadump_calculate_reserve_size()); | |
a4e92ce8 | 526 | #ifdef CONFIG_CMA |
579ca1a2 | 527 | if (!fw_dump.nocma) { |
a4e92ce8 | 528 | fw_dump.boot_memory_size = |
140777a3 | 529 | ALIGN(fw_dump.boot_memory_size, |
e16faf26 | 530 | CMA_MIN_ALIGNMENT_BYTES); |
579ca1a2 | 531 | } |
a4e92ce8 | 532 | #endif |
7b1b3b48 HB |
533 | |
534 | bootmem_min = fw_dump.ops->fadump_get_bootmem_min(); | |
535 | if (fw_dump.boot_memory_size < bootmem_min) { | |
536 | pr_err("Can't enable fadump with boot memory size (0x%lx) less than 0x%llx\n", | |
537 | fw_dump.boot_memory_size, bootmem_min); | |
538 | goto error_out; | |
539 | } | |
7dee93a9 HB |
540 | |
541 | if (!fadump_get_boot_mem_regions()) { | |
542 | pr_err("Too many holes in boot memory area to enable fadump\n"); | |
543 | goto error_out; | |
544 | } | |
a4e92ce8 | 545 | } |
eb39c880 | 546 | |
eb39c880 | 547 | if (memory_limit) |
6abec12c | 548 | mem_boundary = memory_limit; |
eb39c880 | 549 | else |
6abec12c | 550 | mem_boundary = memblock_end_of_DRAM(); |
eb39c880 | 551 | |
7dee93a9 | 552 | base = fw_dump.boot_mem_top; |
8255da95 HB |
553 | size = get_fadump_area_size(); |
554 | fw_dump.reserve_dump_area_size = size; | |
eb39c880 | 555 | if (fw_dump.dump_active) { |
b71a693d MS |
556 | pr_info("Firmware-assisted dump is active.\n"); |
557 | ||
85975387 HB |
558 | #ifdef CONFIG_HUGETLB_PAGE |
559 | /* | |
560 | * FADump capture kernel doesn't care much about hugepages. | |
561 | * In fact, handling hugepages in capture kernel is asking for | |
562 | * trouble. So, disable HugeTLB support when fadump is active. | |
563 | */ | |
564 | hugetlb_disabled = true; | |
565 | #endif | |
eb39c880 MS |
566 | /* |
567 | * If last boot has crashed then reserve all the memory | |
b2a815a5 | 568 | * above boot memory size so that we don't touch it until |
eb39c880 | 569 | * dump is written to disk by userspace tool. This memory |
b2a815a5 | 570 | * can be released for general use by invalidating fadump. |
eb39c880 | 571 | */ |
b2a815a5 | 572 | fadump_reserve_crash_area(base); |
2df173d9 | 573 | |
f3512011 HB |
574 | pr_debug("fadumphdr_addr = %#016lx\n", fw_dump.fadumphdr_addr); |
575 | pr_debug("Reserve dump area start address: 0x%lx\n", | |
576 | fw_dump.reserve_dump_area_start); | |
8255da95 | 577 | } else { |
f6e6bedb HB |
578 | /* |
579 | * Reserve memory at an offset closer to bottom of the RAM to | |
579ca1a2 | 580 | * minimize the impact of memory hot-remove operation. |
f6e6bedb | 581 | */ |
140777a3 | 582 | base = fadump_locate_reserve_mem(base, size); |
6abec12c | 583 | |
9a2921e5 | 584 | if (!base || (base + size > mem_boundary)) { |
742a265a HB |
585 | pr_err("Failed to find memory chunk for reservation!\n"); |
586 | goto error_out; | |
587 | } | |
588 | fw_dump.reserve_dump_area_start = base; | |
589 | ||
590 | /* | |
591 | * Calculate the kernel metadata address and register it with | |
592 | * f/w if the platform supports. | |
593 | */ | |
594 | if (fw_dump.ops->fadump_setup_metadata && | |
595 | (fw_dump.ops->fadump_setup_metadata(&fw_dump) < 0)) | |
596 | goto error_out; | |
597 | ||
598 | if (memblock_reserve(base, size)) { | |
6abec12c HB |
599 | pr_err("Failed to reserve memory!\n"); |
600 | goto error_out; | |
f6e6bedb HB |
601 | } |
602 | ||
6abec12c HB |
603 | pr_info("Reserved %lldMB of memory at %#016llx (System RAM: %lldMB)\n", |
604 | (size >> 20), base, (memblock_phys_mem_size() >> 20)); | |
f6e6bedb | 605 | |
6abec12c | 606 | ret = fadump_cma_init(); |
a4e92ce8 | 607 | } |
6abec12c HB |
608 | |
609 | return ret; | |
610 | error_out: | |
611 | fw_dump.fadump_enabled = 0; | |
d1eb75e0 | 612 | fw_dump.reserve_dump_area_size = 0; |
6abec12c | 613 | return 0; |
eb39c880 MS |
614 | } |
615 | ||
616 | /* Look for fadump= cmdline option. */ | |
617 | static int __init early_fadump_param(char *p) | |
618 | { | |
619 | if (!p) | |
620 | return 1; | |
621 | ||
622 | if (strncmp(p, "on", 2) == 0) | |
623 | fw_dump.fadump_enabled = 1; | |
624 | else if (strncmp(p, "off", 3) == 0) | |
625 | fw_dump.fadump_enabled = 0; | |
a4e92ce8 MS |
626 | else if (strncmp(p, "nocma", 5) == 0) { |
627 | fw_dump.fadump_enabled = 1; | |
628 | fw_dump.nocma = 1; | |
629 | } | |
eb39c880 MS |
630 | |
631 | return 0; | |
632 | } | |
633 | early_param("fadump", early_fadump_param); | |
634 | ||
81d9eca5 HB |
635 | /* |
636 | * Look for fadump_reserve_mem= cmdline option | |
637 | * TODO: Remove references to 'fadump_reserve_mem=' parameter, | |
638 | * the sooner 'crashkernel=' parameter is accustomed to. | |
639 | */ | |
640 | static int __init early_fadump_reserve_mem(char *p) | |
641 | { | |
642 | if (p) | |
643 | fw_dump.reserve_bootvar = memparse(p, &p); | |
644 | return 0; | |
645 | } | |
646 | early_param("fadump_reserve_mem", early_fadump_reserve_mem); | |
647 | ||
ebaeb5ae MS |
648 | void crash_fadump(struct pt_regs *regs, const char *str) |
649 | { | |
ba608c4f | 650 | unsigned int msecs; |
ebaeb5ae | 651 | struct fadump_crash_info_header *fdh = NULL; |
f2a5e8f0 | 652 | int old_cpu, this_cpu; |
ba608c4f SJ |
653 | /* Do not include first CPU */ |
654 | unsigned int ncpus = num_online_cpus() - 1; | |
ebaeb5ae | 655 | |
6fcd6baa | 656 | if (!should_fadump_crash()) |
ebaeb5ae MS |
657 | return; |
658 | ||
f2a5e8f0 MS |
659 | /* |
660 | * old_cpu == -1 means this is the first CPU which has come here, | |
661 | * go ahead and trigger fadump. | |
662 | * | |
0ddbbb89 | 663 | * old_cpu != -1 means some other CPU has already on its way |
f2a5e8f0 MS |
664 | * to trigger fadump, just keep looping here. |
665 | */ | |
666 | this_cpu = smp_processor_id(); | |
667 | old_cpu = cmpxchg(&crashing_cpu, -1, this_cpu); | |
668 | ||
669 | if (old_cpu != -1) { | |
ba608c4f SJ |
670 | atomic_inc(&cpus_in_fadump); |
671 | ||
f2a5e8f0 MS |
672 | /* |
673 | * We can't loop here indefinitely. Wait as long as fadump | |
674 | * is in force. If we race with fadump un-registration this | |
675 | * loop will break and then we go down to normal panic path | |
676 | * and reboot. If fadump is in force the first crashing | |
677 | * cpu will definitely trigger fadump. | |
678 | */ | |
679 | while (fw_dump.dump_registered) | |
680 | cpu_relax(); | |
681 | return; | |
682 | } | |
683 | ||
ebaeb5ae | 684 | fdh = __va(fw_dump.fadumphdr_addr); |
ebaeb5ae MS |
685 | fdh->crashing_cpu = crashing_cpu; |
686 | crash_save_vmcoreinfo(); | |
687 | ||
688 | if (regs) | |
689 | fdh->regs = *regs; | |
690 | else | |
691 | ppc_save_regs(&fdh->regs); | |
692 | ||
6584cec0 | 693 | fdh->cpu_mask = *cpu_online_mask; |
ebaeb5ae | 694 | |
ba608c4f SJ |
695 | /* |
696 | * If we came in via system reset, wait a while for the secondary | |
697 | * CPUs to enter. | |
698 | */ | |
7153d4bf | 699 | if (TRAP(&(fdh->regs)) == INTERRUPT_SYSTEM_RESET) { |
ba608c4f SJ |
700 | msecs = CRASH_TIMEOUT; |
701 | while ((atomic_read(&cpus_in_fadump) < ncpus) && (--msecs > 0)) | |
702 | mdelay(1); | |
703 | } | |
704 | ||
41a65d16 | 705 | fw_dump.ops->fadump_trigger(fdh, str); |
ebaeb5ae MS |
706 | } |
707 | ||
d276960d | 708 | u32 *__init fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs) |
ebaeb5ae MS |
709 | { |
710 | struct elf_prstatus prstatus; | |
711 | ||
712 | memset(&prstatus, 0, sizeof(prstatus)); | |
713 | /* | |
714 | * FIXME: How do i get PID? Do I really need it? | |
715 | * prstatus.pr_pid = ???? | |
716 | */ | |
9554e908 | 717 | elf_core_copy_regs(&prstatus.pr_reg, regs); |
22bd0177 HB |
718 | buf = append_elf_note(buf, CRASH_CORE_NOTE_NAME, NT_PRSTATUS, |
719 | &prstatus, sizeof(prstatus)); | |
ebaeb5ae MS |
720 | return buf; |
721 | } | |
722 | ||
d276960d | 723 | void __init fadump_update_elfcore_header(char *bufp) |
ebaeb5ae | 724 | { |
ebaeb5ae MS |
725 | struct elf_phdr *phdr; |
726 | ||
ebaeb5ae MS |
727 | bufp += sizeof(struct elfhdr); |
728 | ||
729 | /* First note is a place holder for cpu notes info. */ | |
730 | phdr = (struct elf_phdr *)bufp; | |
731 | ||
732 | if (phdr->p_type == PT_NOTE) { | |
961cf26a | 733 | phdr->p_paddr = __pa(fw_dump.cpu_notes_buf_vaddr); |
ebaeb5ae MS |
734 | phdr->p_offset = phdr->p_paddr; |
735 | phdr->p_filesz = fw_dump.cpu_notes_buf_size; | |
736 | phdr->p_memsz = fw_dump.cpu_notes_buf_size; | |
737 | } | |
738 | return; | |
739 | } | |
740 | ||
d276960d | 741 | static void *__init fadump_alloc_buffer(unsigned long size) |
ebaeb5ae | 742 | { |
72aa6517 | 743 | unsigned long count, i; |
ebaeb5ae | 744 | struct page *page; |
72aa6517 | 745 | void *vaddr; |
ebaeb5ae | 746 | |
72aa6517 | 747 | vaddr = alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO); |
ebaeb5ae MS |
748 | if (!vaddr) |
749 | return NULL; | |
750 | ||
72aa6517 | 751 | count = PAGE_ALIGN(size) / PAGE_SIZE; |
ebaeb5ae MS |
752 | page = virt_to_page(vaddr); |
753 | for (i = 0; i < count; i++) | |
72aa6517 | 754 | mark_page_reserved(page + i); |
ebaeb5ae MS |
755 | return vaddr; |
756 | } | |
757 | ||
961cf26a | 758 | static void fadump_free_buffer(unsigned long vaddr, unsigned long size) |
ebaeb5ae | 759 | { |
72aa6517 | 760 | free_reserved_area((void *)vaddr, (void *)(vaddr + size), -1, NULL); |
ebaeb5ae MS |
761 | } |
762 | ||
d276960d | 763 | s32 __init fadump_setup_cpu_notes_buf(u32 num_cpus) |
961cf26a HB |
764 | { |
765 | /* Allocate buffer to hold cpu crash notes. */ | |
766 | fw_dump.cpu_notes_buf_size = num_cpus * sizeof(note_buf_t); | |
767 | fw_dump.cpu_notes_buf_size = PAGE_ALIGN(fw_dump.cpu_notes_buf_size); | |
768 | fw_dump.cpu_notes_buf_vaddr = | |
769 | (unsigned long)fadump_alloc_buffer(fw_dump.cpu_notes_buf_size); | |
770 | if (!fw_dump.cpu_notes_buf_vaddr) { | |
771 | pr_err("Failed to allocate %ld bytes for CPU notes buffer\n", | |
772 | fw_dump.cpu_notes_buf_size); | |
773 | return -ENOMEM; | |
774 | } | |
775 | ||
776 | pr_debug("Allocated buffer for cpu notes of size %ld at 0x%lx\n", | |
777 | fw_dump.cpu_notes_buf_size, | |
778 | fw_dump.cpu_notes_buf_vaddr); | |
779 | return 0; | |
780 | } | |
781 | ||
7f0ad11d | 782 | void fadump_free_cpu_notes_buf(void) |
961cf26a HB |
783 | { |
784 | if (!fw_dump.cpu_notes_buf_vaddr) | |
785 | return; | |
786 | ||
787 | fadump_free_buffer(fw_dump.cpu_notes_buf_vaddr, | |
788 | fw_dump.cpu_notes_buf_size); | |
789 | fw_dump.cpu_notes_buf_vaddr = 0; | |
790 | fw_dump.cpu_notes_buf_size = 0; | |
791 | } | |
792 | ||
e4fc48fb | 793 | static void fadump_free_mem_ranges(struct fadump_mrange_info *mrange_info) |
1bd6a1c4 | 794 | { |
02c04e37 HB |
795 | if (mrange_info->is_static) { |
796 | mrange_info->mem_range_cnt = 0; | |
797 | return; | |
798 | } | |
799 | ||
e4fc48fb | 800 | kfree(mrange_info->mem_ranges); |
02c04e37 HB |
801 | memset((void *)((u64)mrange_info + RNG_NAME_SZ), 0, |
802 | (sizeof(struct fadump_mrange_info) - RNG_NAME_SZ)); | |
1bd6a1c4 HB |
803 | } |
804 | ||
805 | /* | |
e4fc48fb | 806 | * Allocate or reallocate mem_ranges array in incremental units |
1bd6a1c4 HB |
807 | * of PAGE_SIZE. |
808 | */ | |
e4fc48fb | 809 | static int fadump_alloc_mem_ranges(struct fadump_mrange_info *mrange_info) |
1bd6a1c4 | 810 | { |
e4fc48fb | 811 | struct fadump_memory_range *new_array; |
1bd6a1c4 HB |
812 | u64 new_size; |
813 | ||
e4fc48fb HB |
814 | new_size = mrange_info->mem_ranges_sz + PAGE_SIZE; |
815 | pr_debug("Allocating %llu bytes of memory for %s memory ranges\n", | |
816 | new_size, mrange_info->name); | |
1bd6a1c4 | 817 | |
e4fc48fb | 818 | new_array = krealloc(mrange_info->mem_ranges, new_size, GFP_KERNEL); |
1bd6a1c4 | 819 | if (new_array == NULL) { |
e4fc48fb HB |
820 | pr_err("Insufficient memory for setting up %s memory ranges\n", |
821 | mrange_info->name); | |
822 | fadump_free_mem_ranges(mrange_info); | |
1bd6a1c4 HB |
823 | return -ENOMEM; |
824 | } | |
825 | ||
e4fc48fb HB |
826 | mrange_info->mem_ranges = new_array; |
827 | mrange_info->mem_ranges_sz = new_size; | |
828 | mrange_info->max_mem_ranges = (new_size / | |
829 | sizeof(struct fadump_memory_range)); | |
1bd6a1c4 HB |
830 | return 0; |
831 | } | |
e4fc48fb HB |
832 | static inline int fadump_add_mem_range(struct fadump_mrange_info *mrange_info, |
833 | u64 base, u64 end) | |
2df173d9 | 834 | { |
e4fc48fb | 835 | struct fadump_memory_range *mem_ranges = mrange_info->mem_ranges; |
ced1bf52 | 836 | bool is_adjacent = false; |
e4fc48fb | 837 | u64 start, size; |
ced1bf52 | 838 | |
2df173d9 | 839 | if (base == end) |
1bd6a1c4 HB |
840 | return 0; |
841 | ||
ced1bf52 HB |
842 | /* |
843 | * Fold adjacent memory ranges to bring down the memory ranges/ | |
844 | * PT_LOAD segments count. | |
845 | */ | |
e4fc48fb HB |
846 | if (mrange_info->mem_range_cnt) { |
847 | start = mem_ranges[mrange_info->mem_range_cnt - 1].base; | |
848 | size = mem_ranges[mrange_info->mem_range_cnt - 1].size; | |
1bd6a1c4 | 849 | |
15eb77f8 HB |
850 | /* |
851 | * Boot memory area needs separate PT_LOAD segment(s) as it | |
852 | * is moved to a different location at the time of crash. | |
853 | * So, fold only if the region is not boot memory area. | |
854 | */ | |
855 | if ((start + size) == base && start >= fw_dump.boot_mem_top) | |
ced1bf52 HB |
856 | is_adjacent = true; |
857 | } | |
858 | if (!is_adjacent) { | |
859 | /* resize the array on reaching the limit */ | |
e4fc48fb | 860 | if (mrange_info->mem_range_cnt == mrange_info->max_mem_ranges) { |
ced1bf52 HB |
861 | int ret; |
862 | ||
02c04e37 HB |
863 | if (mrange_info->is_static) { |
864 | pr_err("Reached array size limit for %s memory ranges\n", | |
865 | mrange_info->name); | |
866 | return -ENOSPC; | |
867 | } | |
868 | ||
e4fc48fb | 869 | ret = fadump_alloc_mem_ranges(mrange_info); |
ced1bf52 HB |
870 | if (ret) |
871 | return ret; | |
e4fc48fb HB |
872 | |
873 | /* Update to the new resized array */ | |
874 | mem_ranges = mrange_info->mem_ranges; | |
ced1bf52 HB |
875 | } |
876 | ||
877 | start = base; | |
e4fc48fb HB |
878 | mem_ranges[mrange_info->mem_range_cnt].base = start; |
879 | mrange_info->mem_range_cnt++; | |
1bd6a1c4 | 880 | } |
2df173d9 | 881 | |
e4fc48fb HB |
882 | mem_ranges[mrange_info->mem_range_cnt - 1].size = (end - start); |
883 | pr_debug("%s_memory_range[%d] [%#016llx-%#016llx], %#llx bytes\n", | |
884 | mrange_info->name, (mrange_info->mem_range_cnt - 1), | |
885 | start, end - 1, (end - start)); | |
1bd6a1c4 | 886 | return 0; |
2df173d9 MS |
887 | } |
888 | ||
2df173d9 MS |
889 | static int fadump_init_elfcore_header(char *bufp) |
890 | { | |
891 | struct elfhdr *elf; | |
892 | ||
893 | elf = (struct elfhdr *) bufp; | |
894 | bufp += sizeof(struct elfhdr); | |
895 | memcpy(elf->e_ident, ELFMAG, SELFMAG); | |
896 | elf->e_ident[EI_CLASS] = ELF_CLASS; | |
897 | elf->e_ident[EI_DATA] = ELF_DATA; | |
898 | elf->e_ident[EI_VERSION] = EV_CURRENT; | |
899 | elf->e_ident[EI_OSABI] = ELF_OSABI; | |
900 | memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD); | |
901 | elf->e_type = ET_CORE; | |
902 | elf->e_machine = ELF_ARCH; | |
903 | elf->e_version = EV_CURRENT; | |
904 | elf->e_entry = 0; | |
905 | elf->e_phoff = sizeof(struct elfhdr); | |
906 | elf->e_shoff = 0; | |
5b89492c CL |
907 | |
908 | if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2)) | |
909 | elf->e_flags = 2; | |
910 | else if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V1)) | |
911 | elf->e_flags = 1; | |
912 | else | |
913 | elf->e_flags = 0; | |
914 | ||
2df173d9 MS |
915 | elf->e_ehsize = sizeof(struct elfhdr); |
916 | elf->e_phentsize = sizeof(struct elf_phdr); | |
917 | elf->e_phnum = 0; | |
918 | elf->e_shentsize = 0; | |
919 | elf->e_shnum = 0; | |
920 | elf->e_shstrndx = 0; | |
921 | ||
922 | return 0; | |
923 | } | |
924 | ||
d34c5f26 MS |
925 | /* |
926 | * If the given physical address falls within the boot memory region then | |
927 | * return the relocated address that points to the dump region reserved | |
928 | * for saving initial boot memory contents. | |
929 | */ | |
930 | static inline unsigned long fadump_relocate(unsigned long paddr) | |
931 | { | |
7dee93a9 HB |
932 | unsigned long raddr, rstart, rend, rlast, hole_size; |
933 | int i; | |
934 | ||
935 | hole_size = 0; | |
936 | rlast = 0; | |
937 | raddr = paddr; | |
938 | for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) { | |
939 | rstart = fw_dump.boot_mem_addr[i]; | |
940 | rend = rstart + fw_dump.boot_mem_sz[i]; | |
941 | hole_size += (rstart - rlast); | |
942 | ||
943 | if (paddr >= rstart && paddr < rend) { | |
944 | raddr += fw_dump.boot_mem_dest_addr - hole_size; | |
945 | break; | |
946 | } | |
947 | ||
948 | rlast = rend; | |
949 | } | |
950 | ||
951 | pr_debug("vmcoreinfo: paddr = 0x%lx, raddr = 0x%lx\n", paddr, raddr); | |
952 | return raddr; | |
d34c5f26 MS |
953 | } |
954 | ||
c6c5b14d SJ |
955 | static void __init populate_elf_pt_load(struct elf_phdr *phdr, u64 start, |
956 | u64 size, unsigned long long offset) | |
2df173d9 | 957 | { |
c6c5b14d SJ |
958 | phdr->p_align = 0; |
959 | phdr->p_memsz = size; | |
960 | phdr->p_filesz = size; | |
961 | phdr->p_paddr = start; | |
962 | phdr->p_offset = offset; | |
963 | phdr->p_type = PT_LOAD; | |
964 | phdr->p_flags = PF_R|PF_W|PF_X; | |
965 | phdr->p_vaddr = (unsigned long)__va(start); | |
966 | } | |
967 | ||
968 | static void __init fadump_populate_elfcorehdr(struct fadump_crash_info_header *fdh) | |
969 | { | |
970 | char *bufp; | |
7dee93a9 | 971 | struct elfhdr *elf; |
c6c5b14d SJ |
972 | struct elf_phdr *phdr; |
973 | u64 boot_mem_dest_offset; | |
974 | unsigned long long i, ra_start, ra_end, ra_size, mstart, mend; | |
2df173d9 | 975 | |
c6c5b14d | 976 | bufp = (char *) fw_dump.elfcorehdr_addr; |
2df173d9 MS |
977 | fadump_init_elfcore_header(bufp); |
978 | elf = (struct elfhdr *)bufp; | |
979 | bufp += sizeof(struct elfhdr); | |
980 | ||
ebaeb5ae | 981 | /* |
c6c5b14d SJ |
982 | * Set up ELF PT_NOTE, a placeholder for CPU notes information. |
983 | * The notes info will be populated later by platform-specific code. | |
984 | * Hence, this PT_NOTE will always be the first ELF note. | |
ebaeb5ae MS |
985 | * |
986 | * NOTE: Any new ELF note addition should be placed after this note. | |
987 | */ | |
988 | phdr = (struct elf_phdr *)bufp; | |
989 | bufp += sizeof(struct elf_phdr); | |
990 | phdr->p_type = PT_NOTE; | |
c6c5b14d SJ |
991 | phdr->p_flags = 0; |
992 | phdr->p_vaddr = 0; | |
993 | phdr->p_align = 0; | |
994 | phdr->p_offset = 0; | |
995 | phdr->p_paddr = 0; | |
996 | phdr->p_filesz = 0; | |
997 | phdr->p_memsz = 0; | |
998 | /* Increment number of program headers. */ | |
ebaeb5ae MS |
999 | (elf->e_phnum)++; |
1000 | ||
d34c5f26 MS |
1001 | /* setup ELF PT_NOTE for vmcoreinfo */ |
1002 | phdr = (struct elf_phdr *)bufp; | |
1003 | bufp += sizeof(struct elf_phdr); | |
1004 | phdr->p_type = PT_NOTE; | |
1005 | phdr->p_flags = 0; | |
1006 | phdr->p_vaddr = 0; | |
1007 | phdr->p_align = 0; | |
c6c5b14d SJ |
1008 | phdr->p_paddr = phdr->p_offset = fdh->vmcoreinfo_raddr; |
1009 | phdr->p_memsz = phdr->p_filesz = fdh->vmcoreinfo_size; | |
d34c5f26 MS |
1010 | /* Increment number of program headers. */ |
1011 | (elf->e_phnum)++; | |
1012 | ||
c6c5b14d SJ |
1013 | /* |
1014 | * Setup PT_LOAD sections. first include boot memory regions | |
1015 | * and then add rest of the memory regions. | |
1016 | */ | |
1017 | boot_mem_dest_offset = fw_dump.boot_mem_dest_addr; | |
1018 | for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) { | |
2df173d9 MS |
1019 | phdr = (struct elf_phdr *)bufp; |
1020 | bufp += sizeof(struct elf_phdr); | |
c6c5b14d SJ |
1021 | populate_elf_pt_load(phdr, fw_dump.boot_mem_addr[i], |
1022 | fw_dump.boot_mem_sz[i], | |
1023 | boot_mem_dest_offset); | |
1024 | /* Increment number of program headers. */ | |
1025 | (elf->e_phnum)++; | |
1026 | boot_mem_dest_offset += fw_dump.boot_mem_sz[i]; | |
1027 | } | |
1028 | ||
1029 | /* Memory reserved for fadump in first kernel */ | |
1030 | ra_start = fw_dump.reserve_dump_area_start; | |
1031 | ra_size = get_fadump_area_size(); | |
1032 | ra_end = ra_start + ra_size; | |
1033 | ||
1034 | phdr = (struct elf_phdr *)bufp; | |
1035 | for_each_mem_range(i, &mstart, &mend) { | |
1036 | /* Boot memory regions already added, skip them now */ | |
1037 | if (mstart < fw_dump.boot_mem_top) { | |
1038 | if (mend > fw_dump.boot_mem_top) | |
1039 | mstart = fw_dump.boot_mem_top; | |
1040 | else | |
1041 | continue; | |
2df173d9 MS |
1042 | } |
1043 | ||
c6c5b14d SJ |
1044 | /* Handle memblock regions overlaps with fadump reserved area */ |
1045 | if ((ra_start < mend) && (ra_end > mstart)) { | |
1046 | if ((mstart < ra_start) && (mend > ra_end)) { | |
1047 | populate_elf_pt_load(phdr, mstart, ra_start - mstart, mstart); | |
1048 | /* Increment number of program headers. */ | |
1049 | (elf->e_phnum)++; | |
1050 | bufp += sizeof(struct elf_phdr); | |
1051 | phdr = (struct elf_phdr *)bufp; | |
1052 | populate_elf_pt_load(phdr, ra_end, mend - ra_end, ra_end); | |
1053 | } else if (mstart < ra_start) { | |
1054 | populate_elf_pt_load(phdr, mstart, ra_start - mstart, mstart); | |
1055 | } else if (ra_end < mend) { | |
1056 | populate_elf_pt_load(phdr, ra_end, mend - ra_end, ra_end); | |
1057 | } | |
1058 | } else { | |
1059 | /* No overlap with fadump reserved memory region */ | |
1060 | populate_elf_pt_load(phdr, mstart, mend - mstart, mstart); | |
1061 | } | |
2df173d9 MS |
1062 | |
1063 | /* Increment number of program headers. */ | |
1064 | (elf->e_phnum)++; | |
c6c5b14d SJ |
1065 | bufp += sizeof(struct elf_phdr); |
1066 | phdr = (struct elf_phdr *) bufp; | |
2df173d9 | 1067 | } |
2df173d9 MS |
1068 | } |
1069 | ||
1070 | static unsigned long init_fadump_header(unsigned long addr) | |
1071 | { | |
1072 | struct fadump_crash_info_header *fdh; | |
1073 | ||
1074 | if (!addr) | |
1075 | return 0; | |
1076 | ||
2df173d9 MS |
1077 | fdh = __va(addr); |
1078 | addr += sizeof(struct fadump_crash_info_header); | |
1079 | ||
1080 | memset(fdh, 0, sizeof(struct fadump_crash_info_header)); | |
1081 | fdh->magic_number = FADUMP_CRASH_INFO_MAGIC; | |
c6c5b14d | 1082 | fdh->version = FADUMP_HEADER_VERSION; |
ebaeb5ae | 1083 | /* We will set the crashing cpu id in crash_fadump() during crash. */ |
0226e552 | 1084 | fdh->crashing_cpu = FADUMP_CPU_UNKNOWN; |
c6c5b14d SJ |
1085 | |
1086 | /* | |
1087 | * The physical address and size of vmcoreinfo are required in the | |
1088 | * second kernel to prepare elfcorehdr. | |
1089 | */ | |
1090 | fdh->vmcoreinfo_raddr = fadump_relocate(paddr_vmcoreinfo_note()); | |
1091 | fdh->vmcoreinfo_size = VMCOREINFO_NOTE_SIZE; | |
1092 | ||
1093 | ||
1094 | fdh->pt_regs_sz = sizeof(struct pt_regs); | |
6584cec0 HB |
1095 | /* |
1096 | * When LPAR is terminated by PYHP, ensure all possible CPUs' | |
1097 | * register data is processed while exporting the vmcore. | |
1098 | */ | |
1099 | fdh->cpu_mask = *cpu_possible_mask; | |
c6c5b14d | 1100 | fdh->cpu_mask_sz = sizeof(struct cpumask); |
2df173d9 MS |
1101 | |
1102 | return addr; | |
1103 | } | |
1104 | ||
98b8cd7f | 1105 | static int register_fadump(void) |
3ccc00a7 | 1106 | { |
2df173d9 | 1107 | unsigned long addr; |
2df173d9 | 1108 | |
3ccc00a7 MS |
1109 | /* |
1110 | * If no memory is reserved then we can not register for firmware- | |
1111 | * assisted dump. | |
1112 | */ | |
1113 | if (!fw_dump.reserve_dump_area_size) | |
98b8cd7f | 1114 | return -ENODEV; |
3ccc00a7 | 1115 | |
41a65d16 HB |
1116 | addr = fw_dump.fadumphdr_addr; |
1117 | ||
2df173d9 MS |
1118 | /* Initialize fadump crash info header. */ |
1119 | addr = init_fadump_header(addr); | |
2df173d9 | 1120 | |
3ccc00a7 | 1121 | /* register the future kernel dump with firmware. */ |
41a65d16 HB |
1122 | pr_debug("Registering for firmware-assisted kernel dump...\n"); |
1123 | return fw_dump.ops->fadump_register(&fw_dump); | |
3ccc00a7 MS |
1124 | } |
1125 | ||
b500afff MS |
1126 | void fadump_cleanup(void) |
1127 | { | |
2790d01d HB |
1128 | if (!fw_dump.fadump_supported) |
1129 | return; | |
1130 | ||
b500afff MS |
1131 | /* Invalidate the registration only if dump is active. */ |
1132 | if (fw_dump.dump_active) { | |
f3512011 HB |
1133 | pr_debug("Invalidating firmware-assisted dump registration\n"); |
1134 | fw_dump.ops->fadump_invalidate(&fw_dump); | |
722cde76 MS |
1135 | } else if (fw_dump.dump_registered) { |
1136 | /* Un-register Firmware-assisted dump if it was registered. */ | |
41a65d16 | 1137 | fw_dump.ops->fadump_unregister(&fw_dump); |
b500afff | 1138 | } |
2790d01d HB |
1139 | |
1140 | if (fw_dump.ops->fadump_cleanup) | |
1141 | fw_dump.ops->fadump_cleanup(&fw_dump); | |
b500afff MS |
1142 | } |
1143 | ||
68fa6478 HB |
1144 | static void fadump_free_reserved_memory(unsigned long start_pfn, |
1145 | unsigned long end_pfn) | |
1146 | { | |
1147 | unsigned long pfn; | |
1148 | unsigned long time_limit = jiffies + HZ; | |
1149 | ||
1150 | pr_info("freeing reserved memory (0x%llx - 0x%llx)\n", | |
1151 | PFN_PHYS(start_pfn), PFN_PHYS(end_pfn)); | |
1152 | ||
1153 | for (pfn = start_pfn; pfn < end_pfn; pfn++) { | |
1154 | free_reserved_page(pfn_to_page(pfn)); | |
1155 | ||
1156 | if (time_after(jiffies, time_limit)) { | |
1157 | cond_resched(); | |
1158 | time_limit = jiffies + HZ; | |
1159 | } | |
1160 | } | |
1161 | } | |
1162 | ||
1163 | /* | |
1164 | * Skip memory holes and free memory that was actually reserved. | |
1165 | */ | |
dda9dbfe | 1166 | static void fadump_release_reserved_area(u64 start, u64 end) |
68fa6478 | 1167 | { |
b10d6bca MR |
1168 | unsigned long reg_spfn, reg_epfn; |
1169 | u64 tstart, tend, spfn, epfn; | |
1170 | int i; | |
68fa6478 | 1171 | |
dda9dbfe HB |
1172 | spfn = PHYS_PFN(start); |
1173 | epfn = PHYS_PFN(end); | |
c9118e6c MR |
1174 | |
1175 | for_each_mem_pfn_range(i, MAX_NUMNODES, ®_spfn, ®_epfn, NULL) { | |
1176 | tstart = max_t(u64, spfn, reg_spfn); | |
1177 | tend = min_t(u64, epfn, reg_epfn); | |
1178 | ||
68fa6478 HB |
1179 | if (tstart < tend) { |
1180 | fadump_free_reserved_memory(tstart, tend); | |
1181 | ||
dda9dbfe | 1182 | if (tend == epfn) |
68fa6478 HB |
1183 | break; |
1184 | ||
dda9dbfe | 1185 | spfn = tend; |
68fa6478 HB |
1186 | } |
1187 | } | |
1188 | } | |
1189 | ||
b500afff | 1190 | /* |
dda9dbfe HB |
1191 | * Sort the mem ranges in-place and merge adjacent ranges |
1192 | * to minimize the memory ranges count. | |
b500afff | 1193 | */ |
dda9dbfe | 1194 | static void sort_and_merge_mem_ranges(struct fadump_mrange_info *mrange_info) |
b500afff | 1195 | { |
dda9dbfe | 1196 | struct fadump_memory_range *mem_ranges; |
dda9dbfe HB |
1197 | u64 base, size; |
1198 | int i, j, idx; | |
1199 | ||
1200 | if (!reserved_mrange_info.mem_range_cnt) | |
1201 | return; | |
1202 | ||
1203 | /* Sort the memory ranges */ | |
1204 | mem_ranges = mrange_info->mem_ranges; | |
1205 | for (i = 0; i < mrange_info->mem_range_cnt; i++) { | |
1206 | idx = i; | |
1207 | for (j = (i + 1); j < mrange_info->mem_range_cnt; j++) { | |
1208 | if (mem_ranges[idx].base > mem_ranges[j].base) | |
1209 | idx = j; | |
1210 | } | |
20776319 JC |
1211 | if (idx != i) |
1212 | swap(mem_ranges[idx], mem_ranges[i]); | |
dda9dbfe HB |
1213 | } |
1214 | ||
1215 | /* Merge adjacent reserved ranges */ | |
1216 | idx = 0; | |
1217 | for (i = 1; i < mrange_info->mem_range_cnt; i++) { | |
1218 | base = mem_ranges[i-1].base; | |
1219 | size = mem_ranges[i-1].size; | |
1220 | if (mem_ranges[i].base == (base + size)) | |
1221 | mem_ranges[idx].size += mem_ranges[i].size; | |
1222 | else { | |
1223 | idx++; | |
1224 | if (i == idx) | |
1225 | continue; | |
1226 | ||
1227 | mem_ranges[idx] = mem_ranges[i]; | |
1228 | } | |
1229 | } | |
1230 | mrange_info->mem_range_cnt = idx + 1; | |
1231 | } | |
1232 | ||
1233 | /* | |
1234 | * Scan reserved-ranges to consider them while reserving/releasing | |
1235 | * memory for FADump. | |
1236 | */ | |
02c04e37 | 1237 | static void __init early_init_dt_scan_reserved_ranges(unsigned long node) |
dda9dbfe | 1238 | { |
dda9dbfe HB |
1239 | const __be32 *prop; |
1240 | int len, ret = -1; | |
1241 | unsigned long i; | |
1242 | ||
02c04e37 HB |
1243 | /* reserved-ranges already scanned */ |
1244 | if (reserved_mrange_info.mem_range_cnt != 0) | |
1245 | return; | |
dda9dbfe | 1246 | |
02c04e37 | 1247 | prop = of_get_flat_dt_prop(node, "reserved-ranges", &len); |
dda9dbfe | 1248 | if (!prop) |
02c04e37 | 1249 | return; |
dda9dbfe HB |
1250 | |
1251 | /* | |
1252 | * Each reserved range is an (address,size) pair, 2 cells each, | |
1253 | * totalling 4 cells per range. | |
1254 | */ | |
1255 | for (i = 0; i < len / (sizeof(*prop) * 4); i++) { | |
1256 | u64 base, size; | |
1257 | ||
1258 | base = of_read_number(prop + (i * 4) + 0, 2); | |
1259 | size = of_read_number(prop + (i * 4) + 2, 2); | |
1260 | ||
1261 | if (size) { | |
1262 | ret = fadump_add_mem_range(&reserved_mrange_info, | |
1263 | base, base + size); | |
1264 | if (ret < 0) { | |
1265 | pr_warn("some reserved ranges are ignored!\n"); | |
1266 | break; | |
1267 | } | |
1268 | } | |
1269 | } | |
1270 | ||
02c04e37 HB |
1271 | /* Compact reserved ranges */ |
1272 | sort_and_merge_mem_ranges(&reserved_mrange_info); | |
dda9dbfe HB |
1273 | } |
1274 | ||
1275 | /* | |
1276 | * Release the memory that was reserved during early boot to preserve the | |
1277 | * crash'ed kernel's memory contents except reserved dump area (permanent | |
1278 | * reservation) and reserved ranges used by F/W. The released memory will | |
1279 | * be available for general use. | |
1280 | */ | |
1281 | static void fadump_release_memory(u64 begin, u64 end) | |
1282 | { | |
1283 | u64 ra_start, ra_end, tstart; | |
1284 | int i, ret; | |
1285 | ||
b500afff MS |
1286 | ra_start = fw_dump.reserve_dump_area_start; |
1287 | ra_end = ra_start + fw_dump.reserve_dump_area_size; | |
1288 | ||
68fa6478 | 1289 | /* |
02c04e37 HB |
1290 | * If reserved ranges array limit is hit, overwrite the last reserved |
1291 | * memory range with reserved dump area to ensure it is excluded from | |
1292 | * the memory being released (reused for next FADump registration). | |
68fa6478 | 1293 | */ |
02c04e37 HB |
1294 | if (reserved_mrange_info.mem_range_cnt == |
1295 | reserved_mrange_info.max_mem_ranges) | |
1296 | reserved_mrange_info.mem_range_cnt--; | |
dda9dbfe | 1297 | |
02c04e37 HB |
1298 | ret = fadump_add_mem_range(&reserved_mrange_info, ra_start, ra_end); |
1299 | if (ret != 0) | |
dda9dbfe | 1300 | return; |
dda9dbfe HB |
1301 | |
1302 | /* Get the reserved ranges list in order first. */ | |
1303 | sort_and_merge_mem_ranges(&reserved_mrange_info); | |
1304 | ||
1305 | /* Exclude reserved ranges and release remaining memory */ | |
1306 | tstart = begin; | |
1307 | for (i = 0; i < reserved_mrange_info.mem_range_cnt; i++) { | |
1308 | ra_start = reserved_mrange_info.mem_ranges[i].base; | |
1309 | ra_end = ra_start + reserved_mrange_info.mem_ranges[i].size; | |
1310 | ||
1311 | if (tstart >= ra_end) | |
1312 | continue; | |
1313 | ||
1314 | if (tstart < ra_start) | |
1315 | fadump_release_reserved_area(tstart, ra_start); | |
1316 | tstart = ra_end; | |
1317 | } | |
1318 | ||
1319 | if (tstart < end) | |
1320 | fadump_release_reserved_area(tstart, end); | |
b500afff MS |
1321 | } |
1322 | ||
c6c5b14d SJ |
1323 | static void fadump_free_elfcorehdr_buf(void) |
1324 | { | |
1325 | if (fw_dump.elfcorehdr_addr == 0 || fw_dump.elfcorehdr_size == 0) | |
1326 | return; | |
1327 | ||
1328 | /* | |
1329 | * Before freeing the memory of `elfcorehdr`, reset the global | |
1330 | * `elfcorehdr_addr` to prevent modules like `vmcore` from accessing | |
1331 | * invalid memory. | |
1332 | */ | |
1333 | elfcorehdr_addr = ELFCORE_ADDR_ERR; | |
1334 | fadump_free_buffer(fw_dump.elfcorehdr_addr, fw_dump.elfcorehdr_size); | |
1335 | fw_dump.elfcorehdr_addr = 0; | |
1336 | fw_dump.elfcorehdr_size = 0; | |
1337 | } | |
1338 | ||
b500afff MS |
1339 | static void fadump_invalidate_release_mem(void) |
1340 | { | |
b500afff MS |
1341 | mutex_lock(&fadump_mutex); |
1342 | if (!fw_dump.dump_active) { | |
1343 | mutex_unlock(&fadump_mutex); | |
1344 | return; | |
1345 | } | |
1346 | ||
b500afff MS |
1347 | fadump_cleanup(); |
1348 | mutex_unlock(&fadump_mutex); | |
1349 | ||
c6c5b14d | 1350 | fadump_free_elfcorehdr_buf(); |
7dee93a9 | 1351 | fadump_release_memory(fw_dump.boot_mem_top, memblock_end_of_DRAM()); |
961cf26a HB |
1352 | fadump_free_cpu_notes_buf(); |
1353 | ||
a4e2e2ca HB |
1354 | /* |
1355 | * Setup kernel metadata and initialize the kernel dump | |
1356 | * memory structure for FADump re-registration. | |
1357 | */ | |
1358 | if (fw_dump.ops->fadump_setup_metadata && | |
1359 | (fw_dump.ops->fadump_setup_metadata(&fw_dump) < 0)) | |
1360 | pr_warn("Failed to setup kernel metadata!\n"); | |
41a65d16 | 1361 | fw_dump.ops->fadump_init_mem_struct(&fw_dump); |
b500afff MS |
1362 | } |
1363 | ||
d418b19f SJ |
1364 | static ssize_t release_mem_store(struct kobject *kobj, |
1365 | struct kobj_attribute *attr, | |
1366 | const char *buf, size_t count) | |
b500afff | 1367 | { |
dcdc4679 MS |
1368 | int input = -1; |
1369 | ||
b500afff MS |
1370 | if (!fw_dump.dump_active) |
1371 | return -EPERM; | |
1372 | ||
dcdc4679 MS |
1373 | if (kstrtoint(buf, 0, &input)) |
1374 | return -EINVAL; | |
1375 | ||
1376 | if (input == 1) { | |
b500afff MS |
1377 | /* |
1378 | * Take away the '/proc/vmcore'. We are releasing the dump | |
1379 | * memory, hence it will not be valid anymore. | |
1380 | */ | |
2685f826 | 1381 | #ifdef CONFIG_PROC_VMCORE |
b500afff | 1382 | vmcore_cleanup(); |
2685f826 | 1383 | #endif |
b500afff MS |
1384 | fadump_invalidate_release_mem(); |
1385 | ||
1386 | } else | |
1387 | return -EINVAL; | |
1388 | return count; | |
1389 | } | |
1390 | ||
d418b19f | 1391 | /* Release the reserved memory and disable the FADump */ |
d276960d | 1392 | static void __init unregister_fadump(void) |
d418b19f SJ |
1393 | { |
1394 | fadump_cleanup(); | |
1395 | fadump_release_memory(fw_dump.reserve_dump_area_start, | |
1396 | fw_dump.reserve_dump_area_size); | |
1397 | fw_dump.fadump_enabled = 0; | |
1398 | kobject_put(fadump_kobj); | |
1399 | } | |
1400 | ||
1401 | static ssize_t enabled_show(struct kobject *kobj, | |
1402 | struct kobj_attribute *attr, | |
1403 | char *buf) | |
3ccc00a7 MS |
1404 | { |
1405 | return sprintf(buf, "%d\n", fw_dump.fadump_enabled); | |
1406 | } | |
1407 | ||
bc446c5a SJ |
1408 | /* |
1409 | * /sys/kernel/fadump/hotplug_ready sysfs node returns 1, which inidcates | |
1410 | * to usersapce that fadump re-registration is not required on memory | |
1411 | * hotplug events. | |
1412 | */ | |
1413 | static ssize_t hotplug_ready_show(struct kobject *kobj, | |
1414 | struct kobj_attribute *attr, | |
1415 | char *buf) | |
1416 | { | |
1417 | return sprintf(buf, "%d\n", 1); | |
1418 | } | |
1419 | ||
d8e73458 SJ |
1420 | static ssize_t mem_reserved_show(struct kobject *kobj, |
1421 | struct kobj_attribute *attr, | |
1422 | char *buf) | |
1423 | { | |
1424 | return sprintf(buf, "%ld\n", fw_dump.reserve_dump_area_size); | |
1425 | } | |
1426 | ||
d418b19f SJ |
1427 | static ssize_t registered_show(struct kobject *kobj, |
1428 | struct kobj_attribute *attr, | |
1429 | char *buf) | |
3ccc00a7 MS |
1430 | { |
1431 | return sprintf(buf, "%d\n", fw_dump.dump_registered); | |
1432 | } | |
1433 | ||
683eab94 HB |
1434 | static ssize_t bootargs_append_show(struct kobject *kobj, |
1435 | struct kobj_attribute *attr, | |
1436 | char *buf) | |
1437 | { | |
1438 | return sprintf(buf, "%s\n", (char *)__va(fw_dump.param_area)); | |
1439 | } | |
1440 | ||
1441 | static ssize_t bootargs_append_store(struct kobject *kobj, | |
1442 | struct kobj_attribute *attr, | |
1443 | const char *buf, size_t count) | |
1444 | { | |
1445 | char *params; | |
1446 | ||
1447 | if (!fw_dump.fadump_enabled || fw_dump.dump_active) | |
1448 | return -EPERM; | |
1449 | ||
1450 | if (count >= COMMAND_LINE_SIZE) | |
1451 | return -EINVAL; | |
1452 | ||
1453 | /* | |
1454 | * Fail here instead of handling this scenario with | |
1455 | * some silly workaround in capture kernel. | |
1456 | */ | |
1457 | if (saved_command_line_len + count >= COMMAND_LINE_SIZE) { | |
1458 | pr_err("Appending parameters exceeds cmdline size!\n"); | |
1459 | return -ENOSPC; | |
1460 | } | |
1461 | ||
1462 | params = __va(fw_dump.param_area); | |
1463 | strscpy_pad(params, buf, COMMAND_LINE_SIZE); | |
1464 | /* Remove newline character at the end. */ | |
1465 | if (params[count-1] == '\n') | |
1466 | params[count-1] = '\0'; | |
1467 | ||
1468 | return count; | |
1469 | } | |
1470 | ||
d418b19f SJ |
1471 | static ssize_t registered_store(struct kobject *kobj, |
1472 | struct kobj_attribute *attr, | |
1473 | const char *buf, size_t count) | |
3ccc00a7 MS |
1474 | { |
1475 | int ret = 0; | |
dcdc4679 | 1476 | int input = -1; |
3ccc00a7 | 1477 | |
f3512011 | 1478 | if (!fw_dump.fadump_enabled || fw_dump.dump_active) |
3ccc00a7 MS |
1479 | return -EPERM; |
1480 | ||
dcdc4679 MS |
1481 | if (kstrtoint(buf, 0, &input)) |
1482 | return -EINVAL; | |
1483 | ||
3ccc00a7 MS |
1484 | mutex_lock(&fadump_mutex); |
1485 | ||
dcdc4679 MS |
1486 | switch (input) { |
1487 | case 0: | |
3ccc00a7 | 1488 | if (fw_dump.dump_registered == 0) { |
3ccc00a7 MS |
1489 | goto unlock_out; |
1490 | } | |
f3512011 | 1491 | |
3ccc00a7 | 1492 | /* Un-register Firmware-assisted dump */ |
41a65d16 HB |
1493 | pr_debug("Un-register firmware-assisted dump\n"); |
1494 | fw_dump.ops->fadump_unregister(&fw_dump); | |
3ccc00a7 | 1495 | break; |
dcdc4679 | 1496 | case 1: |
3ccc00a7 | 1497 | if (fw_dump.dump_registered == 1) { |
0823c68b | 1498 | /* Un-register Firmware-assisted dump */ |
41a65d16 | 1499 | fw_dump.ops->fadump_unregister(&fw_dump); |
3ccc00a7 MS |
1500 | } |
1501 | /* Register Firmware-assisted dump */ | |
98b8cd7f | 1502 | ret = register_fadump(); |
3ccc00a7 MS |
1503 | break; |
1504 | default: | |
1505 | ret = -EINVAL; | |
1506 | break; | |
1507 | } | |
1508 | ||
1509 | unlock_out: | |
1510 | mutex_unlock(&fadump_mutex); | |
1511 | return ret < 0 ? ret : count; | |
1512 | } | |
1513 | ||
1514 | static int fadump_region_show(struct seq_file *m, void *private) | |
1515 | { | |
3ccc00a7 MS |
1516 | if (!fw_dump.fadump_enabled) |
1517 | return 0; | |
1518 | ||
b500afff | 1519 | mutex_lock(&fadump_mutex); |
f3512011 HB |
1520 | fw_dump.ops->fadump_region_show(&fw_dump, m); |
1521 | mutex_unlock(&fadump_mutex); | |
3ccc00a7 MS |
1522 | return 0; |
1523 | } | |
1524 | ||
d418b19f SJ |
1525 | static struct kobj_attribute release_attr = __ATTR_WO(release_mem); |
1526 | static struct kobj_attribute enable_attr = __ATTR_RO(enabled); | |
1527 | static struct kobj_attribute register_attr = __ATTR_RW(registered); | |
d8e73458 | 1528 | static struct kobj_attribute mem_reserved_attr = __ATTR_RO(mem_reserved); |
bc446c5a | 1529 | static struct kobj_attribute hotplug_ready_attr = __ATTR_RO(hotplug_ready); |
683eab94 | 1530 | static struct kobj_attribute bootargs_append_attr = __ATTR_RW(bootargs_append); |
d418b19f SJ |
1531 | |
1532 | static struct attribute *fadump_attrs[] = { | |
1533 | &enable_attr.attr, | |
1534 | ®ister_attr.attr, | |
d8e73458 | 1535 | &mem_reserved_attr.attr, |
bc446c5a | 1536 | &hotplug_ready_attr.attr, |
d418b19f SJ |
1537 | NULL, |
1538 | }; | |
1539 | ||
1540 | ATTRIBUTE_GROUPS(fadump); | |
3ccc00a7 | 1541 | |
f6cee260 | 1542 | DEFINE_SHOW_ATTRIBUTE(fadump_region); |
3ccc00a7 | 1543 | |
d276960d | 1544 | static void __init fadump_init_files(void) |
3ccc00a7 | 1545 | { |
3ccc00a7 MS |
1546 | int rc = 0; |
1547 | ||
d418b19f SJ |
1548 | fadump_kobj = kobject_create_and_add("fadump", kernel_kobj); |
1549 | if (!fadump_kobj) { | |
1550 | pr_err("failed to create fadump kobject\n"); | |
1551 | return; | |
1552 | } | |
860286cf | 1553 | |
dbf77fed | 1554 | debugfs_create_file("fadump_region", 0444, arch_debugfs_dir, NULL, |
860286cf | 1555 | &fadump_region_fops); |
b500afff MS |
1556 | |
1557 | if (fw_dump.dump_active) { | |
d418b19f SJ |
1558 | rc = sysfs_create_file(fadump_kobj, &release_attr.attr); |
1559 | if (rc) | |
1560 | pr_err("unable to create release_mem sysfs file (%d)\n", | |
1561 | rc); | |
1562 | } | |
1563 | ||
1564 | rc = sysfs_create_groups(fadump_kobj, fadump_groups); | |
1565 | if (rc) { | |
1566 | pr_err("sysfs group creation failed (%d), unregistering FADump", | |
1567 | rc); | |
1568 | unregister_fadump(); | |
1569 | return; | |
1570 | } | |
1571 | ||
1572 | /* | |
1573 | * The FADump sysfs are moved from kernel_kobj to fadump_kobj need to | |
1574 | * create symlink at old location to maintain backward compatibility. | |
1575 | * | |
1576 | * - fadump_enabled -> fadump/enabled | |
1577 | * - fadump_registered -> fadump/registered | |
1578 | * - fadump_release_mem -> fadump/release_mem | |
1579 | */ | |
1580 | rc = compat_only_sysfs_link_entry_to_kobj(kernel_kobj, fadump_kobj, | |
1581 | "enabled", "fadump_enabled"); | |
1582 | if (rc) { | |
1583 | pr_err("unable to create fadump_enabled symlink (%d)", rc); | |
1584 | return; | |
1585 | } | |
1586 | ||
1587 | rc = compat_only_sysfs_link_entry_to_kobj(kernel_kobj, fadump_kobj, | |
1588 | "registered", | |
1589 | "fadump_registered"); | |
1590 | if (rc) { | |
1591 | pr_err("unable to create fadump_registered symlink (%d)", rc); | |
1592 | sysfs_remove_link(kernel_kobj, "fadump_enabled"); | |
1593 | return; | |
1594 | } | |
1595 | ||
1596 | if (fw_dump.dump_active) { | |
1597 | rc = compat_only_sysfs_link_entry_to_kobj(kernel_kobj, | |
1598 | fadump_kobj, | |
1599 | "release_mem", | |
1600 | "fadump_release_mem"); | |
b500afff | 1601 | if (rc) |
d418b19f SJ |
1602 | pr_err("unable to create fadump_release_mem symlink (%d)", |
1603 | rc); | |
b500afff | 1604 | } |
3ccc00a7 MS |
1605 | return; |
1606 | } | |
1607 | ||
c6c5b14d SJ |
1608 | static int __init fadump_setup_elfcorehdr_buf(void) |
1609 | { | |
1610 | int elf_phdr_cnt; | |
1611 | unsigned long elfcorehdr_size; | |
1612 | ||
1613 | /* | |
1614 | * Program header for CPU notes comes first, followed by one for | |
1615 | * vmcoreinfo, and the remaining program headers correspond to | |
1616 | * memory regions. | |
1617 | */ | |
1618 | elf_phdr_cnt = 2 + fw_dump.boot_mem_regs_cnt + memblock_num_regions(memory); | |
1619 | elfcorehdr_size = sizeof(struct elfhdr) + (elf_phdr_cnt * sizeof(struct elf_phdr)); | |
1620 | elfcorehdr_size = PAGE_ALIGN(elfcorehdr_size); | |
1621 | ||
1622 | fw_dump.elfcorehdr_addr = (u64)fadump_alloc_buffer(elfcorehdr_size); | |
1623 | if (!fw_dump.elfcorehdr_addr) { | |
1624 | pr_err("Failed to allocate %lu bytes for elfcorehdr\n", | |
1625 | elfcorehdr_size); | |
1626 | return -ENOMEM; | |
1627 | } | |
1628 | fw_dump.elfcorehdr_size = elfcorehdr_size; | |
1629 | return 0; | |
1630 | } | |
1631 | ||
1632 | /* | |
1633 | * Check if the fadump header of crashed kernel is compatible with fadump kernel. | |
1634 | * | |
1635 | * It checks the magic number, endianness, and size of non-primitive type | |
1636 | * members of fadump header to ensure safe dump collection. | |
1637 | */ | |
1638 | static bool __init is_fadump_header_compatible(struct fadump_crash_info_header *fdh) | |
1639 | { | |
1640 | if (fdh->magic_number == FADUMP_CRASH_INFO_MAGIC_OLD) { | |
1641 | pr_err("Old magic number, can't process the dump.\n"); | |
1642 | return false; | |
1643 | } | |
1644 | ||
1645 | if (fdh->magic_number != FADUMP_CRASH_INFO_MAGIC) { | |
1646 | if (fdh->magic_number == swab64(FADUMP_CRASH_INFO_MAGIC)) | |
1647 | pr_err("Endianness mismatch between the crashed and fadump kernels.\n"); | |
1648 | else | |
1649 | pr_err("Fadump header is corrupted.\n"); | |
1650 | ||
1651 | return false; | |
1652 | } | |
1653 | ||
1654 | /* | |
1655 | * Dump collection is not safe if the size of non-primitive type members | |
1656 | * of the fadump header do not match between crashed and fadump kernel. | |
1657 | */ | |
1658 | if (fdh->pt_regs_sz != sizeof(struct pt_regs) || | |
1659 | fdh->cpu_mask_sz != sizeof(struct cpumask)) { | |
1660 | pr_err("Fadump header size mismatch.\n"); | |
1661 | return false; | |
1662 | } | |
1663 | ||
1664 | return true; | |
1665 | } | |
1666 | ||
1667 | static void __init fadump_process(void) | |
1668 | { | |
1669 | struct fadump_crash_info_header *fdh; | |
1670 | ||
1671 | fdh = (struct fadump_crash_info_header *) __va(fw_dump.fadumphdr_addr); | |
1672 | if (!fdh) { | |
1673 | pr_err("Crash info header is empty.\n"); | |
1674 | goto err_out; | |
1675 | } | |
1676 | ||
1677 | /* Avoid processing the dump if fadump header isn't compatible */ | |
1678 | if (!is_fadump_header_compatible(fdh)) | |
1679 | goto err_out; | |
1680 | ||
1681 | /* Allocate buffer for elfcorehdr */ | |
1682 | if (fadump_setup_elfcorehdr_buf()) | |
1683 | goto err_out; | |
1684 | ||
1685 | fadump_populate_elfcorehdr(fdh); | |
1686 | ||
1687 | /* Let platform update the CPU notes in elfcorehdr */ | |
1688 | if (fw_dump.ops->fadump_process(&fw_dump) < 0) | |
1689 | goto err_out; | |
1690 | ||
1691 | /* | |
1692 | * elfcorehdr is now ready to be exported. | |
1693 | * | |
1694 | * set elfcorehdr_addr so that vmcore module will export the | |
1695 | * elfcorehdr through '/proc/vmcore'. | |
1696 | */ | |
1697 | elfcorehdr_addr = virt_to_phys((void *)fw_dump.elfcorehdr_addr); | |
1698 | return; | |
1699 | ||
1700 | err_out: | |
1701 | fadump_invalidate_release_mem(); | |
1702 | } | |
1703 | ||
683eab94 HB |
1704 | /* |
1705 | * Reserve memory to store additional parameters to be passed | |
1706 | * for fadump/capture kernel. | |
1707 | */ | |
1708 | static void fadump_setup_param_area(void) | |
1709 | { | |
1710 | phys_addr_t range_start, range_end; | |
1711 | ||
1712 | if (!fw_dump.param_area_supported || fw_dump.dump_active) | |
1713 | return; | |
1714 | ||
1715 | /* This memory can't be used by PFW or bootloader as it is shared across kernels */ | |
1716 | if (radix_enabled()) { | |
1717 | /* | |
1718 | * Anywhere in the upper half should be good enough as all memory | |
1719 | * is accessible in real mode. | |
1720 | */ | |
1721 | range_start = memblock_end_of_DRAM() / 2; | |
1722 | range_end = memblock_end_of_DRAM(); | |
1723 | } else { | |
1724 | /* | |
1725 | * Passing additional parameters is supported for hash MMU only | |
1726 | * if the first memory block size is 768MB or higher. | |
1727 | */ | |
1728 | if (ppc64_rma_size < 0x30000000) | |
1729 | return; | |
1730 | ||
1731 | /* | |
1732 | * 640 MB to 768 MB is not used by PFW/bootloader. So, try reserving | |
1733 | * memory for passing additional parameters in this range to avoid | |
1734 | * being stomped on by PFW/bootloader. | |
1735 | */ | |
1736 | range_start = 0x2A000000; | |
1737 | range_end = range_start + 0x4000000; | |
1738 | } | |
1739 | ||
1740 | fw_dump.param_area = memblock_phys_alloc_range(COMMAND_LINE_SIZE, | |
1741 | COMMAND_LINE_SIZE, | |
1742 | range_start, | |
1743 | range_end); | |
1744 | if (!fw_dump.param_area || sysfs_create_file(fadump_kobj, &bootargs_append_attr.attr)) { | |
1745 | pr_warn("WARNING: Could not setup area to pass additional parameters!\n"); | |
1746 | return; | |
1747 | } | |
1748 | ||
1749 | memset(phys_to_virt(fw_dump.param_area), 0, COMMAND_LINE_SIZE); | |
1750 | } | |
1751 | ||
3ccc00a7 MS |
1752 | /* |
1753 | * Prepare for firmware-assisted dump. | |
1754 | */ | |
1755 | int __init setup_fadump(void) | |
1756 | { | |
565f9bc0 | 1757 | if (!fw_dump.fadump_supported) |
3ccc00a7 | 1758 | return 0; |
3ccc00a7 | 1759 | |
565f9bc0 | 1760 | fadump_init_files(); |
3ccc00a7 | 1761 | fadump_show_config(); |
565f9bc0 MS |
1762 | |
1763 | if (!fw_dump.fadump_enabled) | |
1764 | return 1; | |
1765 | ||
2df173d9 MS |
1766 | /* |
1767 | * If dump data is available then see if it is valid and prepare for | |
1768 | * saving it to the disk. | |
1769 | */ | |
b500afff | 1770 | if (fw_dump.dump_active) { |
c6c5b14d | 1771 | fadump_process(); |
b500afff | 1772 | } |
607451ce HB |
1773 | /* Initialize the kernel dump memory structure and register with f/w */ |
1774 | else if (fw_dump.reserve_dump_area_size) { | |
683eab94 | 1775 | fadump_setup_param_area(); |
41a65d16 | 1776 | fw_dump.ops->fadump_init_mem_struct(&fw_dump); |
607451ce HB |
1777 | register_fadump(); |
1778 | } | |
f3512011 | 1779 | |
06e629c2 HB |
1780 | /* |
1781 | * In case of panic, fadump is triggered via ppc_panic_event() | |
1782 | * panic notifier. Setting crash_kexec_post_notifiers to 'true' | |
1783 | * lets panic() function take crash friendly path before panic | |
1784 | * notifiers are invoked. | |
1785 | */ | |
1786 | crash_kexec_post_notifiers = true; | |
1787 | ||
3ccc00a7 MS |
1788 | return 1; |
1789 | } | |
607451ce HB |
1790 | /* |
1791 | * Use subsys_initcall_sync() here because there is dependency with | |
1fd02f66 JL |
1792 | * crash_save_vmcoreinfo_init(), which must run first to ensure vmcoreinfo initialization |
1793 | * is done before registering with f/w. | |
607451ce HB |
1794 | */ |
1795 | subsys_initcall_sync(setup_fadump); | |
bec53196 HB |
1796 | #else /* !CONFIG_PRESERVE_FA_DUMP */ |
1797 | ||
1798 | /* Scan the Firmware Assisted dump configuration details. */ | |
1799 | int __init early_init_dt_scan_fw_dump(unsigned long node, const char *uname, | |
1800 | int depth, void *data) | |
1801 | { | |
1802 | if ((depth != 1) || (strcmp(uname, "ibm,opal") != 0)) | |
1803 | return 0; | |
1804 | ||
1805 | opal_fadump_dt_scan(&fw_dump, node); | |
1806 | return 1; | |
1807 | } | |
1808 | ||
1809 | /* | |
1810 | * When dump is active but PRESERVE_FA_DUMP is enabled on the kernel, | |
1811 | * preserve crash data. The subsequent memory preserving kernel boot | |
1812 | * is likely to process this crash data. | |
1813 | */ | |
1814 | int __init fadump_reserve_mem(void) | |
1815 | { | |
1816 | if (fw_dump.dump_active) { | |
1817 | /* | |
1818 | * If last boot has crashed then reserve all the memory | |
1819 | * above boot memory to preserve crash data. | |
1820 | */ | |
1821 | pr_info("Preserving crash data for processing in next boot.\n"); | |
1822 | fadump_reserve_crash_area(fw_dump.boot_mem_top); | |
1823 | } else | |
1824 | pr_debug("FADump-aware kernel..\n"); | |
1825 | ||
1826 | return 1; | |
1827 | } | |
1828 | #endif /* CONFIG_PRESERVE_FA_DUMP */ | |
b2a815a5 HB |
1829 | |
1830 | /* Preserve everything above the base address */ | |
1831 | static void __init fadump_reserve_crash_area(u64 base) | |
1832 | { | |
b10d6bca | 1833 | u64 i, mstart, mend, msize; |
b2a815a5 | 1834 | |
b10d6bca MR |
1835 | for_each_mem_range(i, &mstart, &mend) { |
1836 | msize = mend - mstart; | |
b2a815a5 HB |
1837 | |
1838 | if ((mstart + msize) < base) | |
1839 | continue; | |
1840 | ||
1841 | if (mstart < base) { | |
1842 | msize -= (base - mstart); | |
1843 | mstart = base; | |
1844 | } | |
1845 | ||
1846 | pr_info("Reserving %lluMB of memory at %#016llx for preserving crash data", | |
1847 | (msize >> 20), mstart); | |
1848 | memblock_reserve(mstart, msize); | |
1849 | } | |
1850 | } | |
bec53196 HB |
1851 | |
1852 | unsigned long __init arch_reserved_kernel_pages(void) | |
1853 | { | |
1854 | return memblock_reserved_size() / PAGE_SIZE; | |
1855 | } |