Commit | Line | Data |
---|---|---|
1a59d1b8 | 1 | // SPDX-License-Identifier: GPL-2.0-or-later |
eb39c880 MS |
2 | /* |
3 | * Firmware Assisted dump: A robust mechanism to get reliable kernel crash | |
4 | * dump with assistance from firmware. This approach does not use kexec, | |
5 | * instead firmware assists in booting the kdump kernel while preserving | |
6 | * memory contents. The most of the code implementation has been adapted | |
7 | * from phyp assisted dump implementation written by Linas Vepstas and | |
8 | * Manish Ahuja | |
9 | * | |
eb39c880 MS |
10 | * Copyright 2011 IBM Corporation |
11 | * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> | |
12 | */ | |
13 | ||
14 | #undef DEBUG | |
15 | #define pr_fmt(fmt) "fadump: " fmt | |
16 | ||
17 | #include <linux/string.h> | |
18 | #include <linux/memblock.h> | |
3ccc00a7 | 19 | #include <linux/delay.h> |
3ccc00a7 | 20 | #include <linux/seq_file.h> |
2df173d9 | 21 | #include <linux/crash_dump.h> |
b500afff MS |
22 | #include <linux/kobject.h> |
23 | #include <linux/sysfs.h> | |
a5818313 | 24 | #include <linux/slab.h> |
a4e92ce8 | 25 | #include <linux/cma.h> |
45d0ba52 | 26 | #include <linux/hugetlb.h> |
dbf77fed | 27 | #include <linux/debugfs.h> |
e6f6390a CL |
28 | #include <linux/of.h> |
29 | #include <linux/of_fdt.h> | |
eb39c880 MS |
30 | |
31 | #include <asm/page.h> | |
eb39c880 | 32 | #include <asm/fadump.h> |
ca986d7f | 33 | #include <asm/fadump-internal.h> |
cad3c834 | 34 | #include <asm/setup.h> |
cbd3d5ba | 35 | #include <asm/interrupt.h> |
eb39c880 | 36 | |
ba608c4f SJ |
37 | /* |
38 | * The CPU who acquired the lock to trigger the fadump crash should | |
39 | * wait for other CPUs to enter. | |
40 | * | |
41 | * The timeout is in milliseconds. | |
42 | */ | |
43 | #define CRASH_TIMEOUT 500 | |
44 | ||
eb39c880 | 45 | static struct fw_dump fw_dump; |
3ccc00a7 | 46 | |
b2a815a5 HB |
47 | static void __init fadump_reserve_crash_area(u64 base); |
48 | ||
bec53196 | 49 | #ifndef CONFIG_PRESERVE_FA_DUMP |
5f987cae | 50 | |
2e341f56 ME |
51 | static struct kobject *fadump_kobj; |
52 | ||
5f987cae | 53 | static atomic_t cpus_in_fadump; |
3ccc00a7 | 54 | static DEFINE_MUTEX(fadump_mutex); |
5f987cae | 55 | |
02c04e37 HB |
56 | #define RESERVED_RNGS_SZ 16384 /* 16K - 128 entries */ |
57 | #define RESERVED_RNGS_CNT (RESERVED_RNGS_SZ / \ | |
58 | sizeof(struct fadump_memory_range)) | |
59 | static struct fadump_memory_range rngs[RESERVED_RNGS_CNT]; | |
2e341f56 ME |
60 | static struct fadump_mrange_info |
61 | reserved_mrange_info = { "reserved", rngs, RESERVED_RNGS_SZ, 0, RESERVED_RNGS_CNT, true }; | |
02c04e37 HB |
62 | |
63 | static void __init early_init_dt_scan_reserved_ranges(unsigned long node); | |
eb39c880 | 64 | |
a4e92ce8 | 65 | #ifdef CONFIG_CMA |
0226e552 HB |
66 | static struct cma *fadump_cma; |
67 | ||
a4e92ce8 MS |
68 | /* |
69 | * fadump_cma_init() - Initialize CMA area from a fadump reserved memory | |
70 | * | |
71 | * This function initializes CMA area from fadump reserved memory. | |
72 | * The total size of fadump reserved memory covers for boot memory size | |
73 | * + cpu data size + hpte size and metadata. | |
74 | * Initialize only the area equivalent to boot memory size for CMA use. | |
887f56a0 RD |
75 | * The remaining portion of fadump reserved memory will be not given |
76 | * to CMA and pages for those will stay reserved. boot memory size is | |
a4e92ce8 MS |
77 | * aligned per CMA requirement to satisy cma_init_reserved_mem() call. |
78 | * But for some reason even if it fails we still have the memory reservation | |
79 | * with us and we can still continue doing fadump. | |
80 | */ | |
2e341f56 | 81 | static int __init fadump_cma_init(void) |
a4e92ce8 MS |
82 | { |
83 | unsigned long long base, size; | |
84 | int rc; | |
85 | ||
86 | if (!fw_dump.fadump_enabled) | |
87 | return 0; | |
88 | ||
89 | /* | |
90 | * Do not use CMA if user has provided fadump=nocma kernel parameter. | |
91 | * Return 1 to continue with fadump old behaviour. | |
92 | */ | |
93 | if (fw_dump.nocma) | |
94 | return 1; | |
95 | ||
96 | base = fw_dump.reserve_dump_area_start; | |
97 | size = fw_dump.boot_memory_size; | |
98 | ||
99 | if (!size) | |
100 | return 0; | |
101 | ||
102 | rc = cma_init_reserved_mem(base, size, 0, "fadump_cma", &fadump_cma); | |
103 | if (rc) { | |
104 | pr_err("Failed to init cma area for firmware-assisted dump,%d\n", rc); | |
105 | /* | |
106 | * Though the CMA init has failed we still have memory | |
107 | * reservation with us. The reserved memory will be | |
108 | * blocked from production system usage. Hence return 1, | |
109 | * so that we can continue with fadump. | |
110 | */ | |
111 | return 1; | |
112 | } | |
113 | ||
ee97347f HB |
114 | /* |
115 | * If CMA activation fails, keep the pages reserved, instead of | |
116 | * exposing them to buddy allocator. Same as 'fadump=nocma' case. | |
117 | */ | |
118 | cma_reserve_pages_on_error(fadump_cma); | |
119 | ||
a4e92ce8 MS |
120 | /* |
121 | * So we now have successfully initialized cma area for fadump. | |
122 | */ | |
123 | pr_info("Initialized 0x%lx bytes cma area at %ldMB from 0x%lx " | |
124 | "bytes of memory reserved for firmware-assisted dump\n", | |
125 | cma_get_size(fadump_cma), | |
126 | (unsigned long)cma_get_base(fadump_cma) >> 20, | |
127 | fw_dump.reserve_dump_area_size); | |
128 | return 1; | |
129 | } | |
130 | #else | |
131 | static int __init fadump_cma_init(void) { return 1; } | |
132 | #endif /* CONFIG_CMA */ | |
133 | ||
3416c9da HB |
134 | /* |
135 | * Additional parameters meant for capture kernel are placed in a dedicated area. | |
136 | * If this is capture kernel boot, append these parameters to bootargs. | |
137 | */ | |
138 | void __init fadump_append_bootargs(void) | |
139 | { | |
140 | char *append_args; | |
141 | size_t len; | |
142 | ||
143 | if (!fw_dump.dump_active || !fw_dump.param_area_supported || !fw_dump.param_area) | |
144 | return; | |
145 | ||
146 | if (fw_dump.param_area >= fw_dump.boot_mem_top) { | |
147 | if (memblock_reserve(fw_dump.param_area, COMMAND_LINE_SIZE)) { | |
148 | pr_warn("WARNING: Can't use additional parameters area!\n"); | |
149 | fw_dump.param_area = 0; | |
150 | return; | |
151 | } | |
152 | } | |
153 | ||
154 | append_args = (char *)fw_dump.param_area; | |
155 | len = strlen(boot_command_line); | |
156 | ||
157 | /* | |
158 | * Too late to fail even if cmdline size exceeds. Truncate additional parameters | |
159 | * to cmdline size and proceed anyway. | |
160 | */ | |
161 | if (len + strlen(append_args) >= COMMAND_LINE_SIZE - 1) | |
162 | pr_warn("WARNING: Appending parameters exceeds cmdline size. Truncating!\n"); | |
163 | ||
164 | pr_debug("Cmdline: %s\n", boot_command_line); | |
165 | snprintf(boot_command_line + len, COMMAND_LINE_SIZE - len, " %s", append_args); | |
166 | pr_info("Updated cmdline: %s\n", boot_command_line); | |
167 | } | |
168 | ||
eb39c880 | 169 | /* Scan the Firmware Assisted dump configuration details. */ |
f3512011 HB |
170 | int __init early_init_dt_scan_fw_dump(unsigned long node, const char *uname, |
171 | int depth, void *data) | |
eb39c880 | 172 | { |
02c04e37 HB |
173 | if (depth == 0) { |
174 | early_init_dt_scan_reserved_ranges(node); | |
175 | return 0; | |
176 | } | |
177 | ||
41df5928 | 178 | if (depth != 1) |
eb39c880 MS |
179 | return 0; |
180 | ||
41df5928 HB |
181 | if (strcmp(uname, "rtas") == 0) { |
182 | rtas_fadump_dt_scan(&fw_dump, node); | |
183 | return 1; | |
184 | } | |
185 | ||
186 | if (strcmp(uname, "ibm,opal") == 0) { | |
187 | opal_fadump_dt_scan(&fw_dump, node); | |
188 | return 1; | |
189 | } | |
190 | ||
191 | return 0; | |
eb39c880 MS |
192 | } |
193 | ||
eae0dfcc HB |
194 | /* |
195 | * If fadump is registered, check if the memory provided | |
0db6896f | 196 | * falls within boot memory area and reserved memory area. |
eae0dfcc | 197 | */ |
becd91d9 | 198 | int is_fadump_memory_area(u64 addr, unsigned long size) |
eae0dfcc | 199 | { |
becd91d9 | 200 | u64 d_start, d_end; |
0db6896f | 201 | |
eae0dfcc HB |
202 | if (!fw_dump.dump_registered) |
203 | return 0; | |
204 | ||
becd91d9 HB |
205 | if (!size) |
206 | return 0; | |
207 | ||
208 | d_start = fw_dump.reserve_dump_area_start; | |
209 | d_end = d_start + fw_dump.reserve_dump_area_size; | |
0db6896f MS |
210 | if (((addr + size) > d_start) && (addr <= d_end)) |
211 | return 1; | |
212 | ||
7dee93a9 | 213 | return (addr <= fw_dump.boot_mem_top); |
eae0dfcc HB |
214 | } |
215 | ||
6fcd6baa NP |
216 | int should_fadump_crash(void) |
217 | { | |
218 | if (!fw_dump.dump_registered || !fw_dump.fadumphdr_addr) | |
219 | return 0; | |
220 | return 1; | |
221 | } | |
222 | ||
3ccc00a7 MS |
223 | int is_fadump_active(void) |
224 | { | |
225 | return fw_dump.dump_active; | |
226 | } | |
227 | ||
a5a05b91 | 228 | /* |
961cf26a HB |
229 | * Returns true, if there are no holes in memory area between d_start to d_end, |
230 | * false otherwise. | |
a5a05b91 | 231 | */ |
961cf26a | 232 | static bool is_fadump_mem_area_contiguous(u64 d_start, u64 d_end) |
a5a05b91 | 233 | { |
b10d6bca | 234 | phys_addr_t reg_start, reg_end; |
961cf26a | 235 | bool ret = false; |
b10d6bca | 236 | u64 i, start, end; |
a5a05b91 | 237 | |
b10d6bca MR |
238 | for_each_mem_range(i, ®_start, ®_end) { |
239 | start = max_t(u64, d_start, reg_start); | |
240 | end = min_t(u64, d_end, reg_end); | |
961cf26a HB |
241 | if (d_start < end) { |
242 | /* Memory hole from d_start to start */ | |
243 | if (start > d_start) | |
a5a05b91 HB |
244 | break; |
245 | ||
961cf26a HB |
246 | if (end == d_end) { |
247 | ret = true; | |
a5a05b91 HB |
248 | break; |
249 | } | |
250 | ||
961cf26a | 251 | d_start = end + 1; |
a5a05b91 HB |
252 | } |
253 | } | |
254 | ||
255 | return ret; | |
256 | } | |
257 | ||
f86593be MS |
258 | /* |
259 | * Returns true, if there are no holes in reserved memory area, | |
260 | * false otherwise. | |
261 | */ | |
7f0ad11d | 262 | bool is_fadump_reserved_mem_contiguous(void) |
f86593be | 263 | { |
961cf26a | 264 | u64 d_start, d_end; |
f86593be | 265 | |
961cf26a HB |
266 | d_start = fw_dump.reserve_dump_area_start; |
267 | d_end = d_start + fw_dump.reserve_dump_area_size; | |
268 | return is_fadump_mem_area_contiguous(d_start, d_end); | |
f86593be MS |
269 | } |
270 | ||
3ccc00a7 | 271 | /* Print firmware assisted dump configurations for debugging purpose. */ |
d276960d | 272 | static void __init fadump_show_config(void) |
3ccc00a7 | 273 | { |
7dee93a9 HB |
274 | int i; |
275 | ||
3ccc00a7 MS |
276 | pr_debug("Support for firmware-assisted dump (fadump): %s\n", |
277 | (fw_dump.fadump_supported ? "present" : "no support")); | |
278 | ||
279 | if (!fw_dump.fadump_supported) | |
280 | return; | |
281 | ||
282 | pr_debug("Fadump enabled : %s\n", | |
283 | (fw_dump.fadump_enabled ? "yes" : "no")); | |
284 | pr_debug("Dump Active : %s\n", | |
285 | (fw_dump.dump_active ? "yes" : "no")); | |
286 | pr_debug("Dump section sizes:\n"); | |
287 | pr_debug(" CPU state data size: %lx\n", fw_dump.cpu_state_data_size); | |
288 | pr_debug(" HPTE region size : %lx\n", fw_dump.hpte_region_size); | |
7dee93a9 HB |
289 | pr_debug(" Boot memory size : %lx\n", fw_dump.boot_memory_size); |
290 | pr_debug(" Boot memory top : %llx\n", fw_dump.boot_mem_top); | |
291 | pr_debug("Boot memory regions cnt: %llx\n", fw_dump.boot_mem_regs_cnt); | |
292 | for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) { | |
293 | pr_debug("[%03d] base = %llx, size = %llx\n", i, | |
294 | fw_dump.boot_mem_addr[i], fw_dump.boot_mem_sz[i]); | |
295 | } | |
3ccc00a7 MS |
296 | } |
297 | ||
eb39c880 MS |
298 | /** |
299 | * fadump_calculate_reserve_size(): reserve variable boot area 5% of System RAM | |
300 | * | |
301 | * Function to find the largest memory size we need to reserve during early | |
302 | * boot process. This will be the size of the memory that is required for a | |
303 | * kernel to boot successfully. | |
304 | * | |
305 | * This function has been taken from phyp-assisted dump feature implementation. | |
306 | * | |
307 | * returns larger of 256MB or 5% rounded down to multiples of 256MB. | |
308 | * | |
309 | * TODO: Come up with better approach to find out more accurate memory size | |
310 | * that is required for a kernel to boot successfully. | |
311 | * | |
312 | */ | |
fbced154 | 313 | static __init u64 fadump_calculate_reserve_size(void) |
eb39c880 | 314 | { |
7b1b3b48 | 315 | u64 base, size, bootmem_min; |
11550dc0 | 316 | int ret; |
eb39c880 | 317 | |
81d9eca5 HB |
318 | if (fw_dump.reserve_bootvar) |
319 | pr_warn("'fadump_reserve_mem=' parameter is deprecated in favor of 'crashkernel=' parameter.\n"); | |
320 | ||
eb39c880 | 321 | /* |
11550dc0 | 322 | * Check if the size is specified through crashkernel= cmdline |
e7467dc6 HB |
323 | * option. If yes, then use that but ignore base as fadump reserves |
324 | * memory at a predefined offset. | |
eb39c880 | 325 | */ |
11550dc0 | 326 | ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), |
a9e1a3d8 | 327 | &size, &base, NULL, NULL); |
11550dc0 | 328 | if (ret == 0 && size > 0) { |
48a316e3 HB |
329 | unsigned long max_size; |
330 | ||
81d9eca5 HB |
331 | if (fw_dump.reserve_bootvar) |
332 | pr_info("Using 'crashkernel=' parameter for memory reservation.\n"); | |
333 | ||
11550dc0 | 334 | fw_dump.reserve_bootvar = (unsigned long)size; |
48a316e3 HB |
335 | |
336 | /* | |
337 | * Adjust if the boot memory size specified is above | |
338 | * the upper limit. | |
339 | */ | |
340 | max_size = memblock_phys_mem_size() / MAX_BOOT_MEM_RATIO; | |
341 | if (fw_dump.reserve_bootvar > max_size) { | |
342 | fw_dump.reserve_bootvar = max_size; | |
343 | pr_info("Adjusted boot memory size to %luMB\n", | |
344 | (fw_dump.reserve_bootvar >> 20)); | |
345 | } | |
346 | ||
eb39c880 | 347 | return fw_dump.reserve_bootvar; |
81d9eca5 HB |
348 | } else if (fw_dump.reserve_bootvar) { |
349 | /* | |
350 | * 'fadump_reserve_mem=' is being used to reserve memory | |
351 | * for firmware-assisted dump. | |
352 | */ | |
353 | return fw_dump.reserve_bootvar; | |
11550dc0 | 354 | } |
eb39c880 MS |
355 | |
356 | /* divide by 20 to get 5% of value */ | |
48a316e3 | 357 | size = memblock_phys_mem_size() / 20; |
eb39c880 MS |
358 | |
359 | /* round it down in multiples of 256 */ | |
360 | size = size & ~0x0FFFFFFFUL; | |
361 | ||
362 | /* Truncate to memory_limit. We don't want to over reserve the memory.*/ | |
363 | if (memory_limit && size > memory_limit) | |
364 | size = memory_limit; | |
365 | ||
7b1b3b48 HB |
366 | bootmem_min = fw_dump.ops->fadump_get_bootmem_min(); |
367 | return (size > bootmem_min ? size : bootmem_min); | |
eb39c880 MS |
368 | } |
369 | ||
370 | /* | |
371 | * Calculate the total memory size required to be reserved for | |
372 | * firmware-assisted dump registration. | |
373 | */ | |
d276960d | 374 | static unsigned long __init get_fadump_area_size(void) |
eb39c880 MS |
375 | { |
376 | unsigned long size = 0; | |
377 | ||
378 | size += fw_dump.cpu_state_data_size; | |
379 | size += fw_dump.hpte_region_size; | |
9cf3b3a3 HB |
380 | /* |
381 | * Account for pagesize alignment of boot memory area destination address. | |
382 | * This faciliates in mmap reading of first kernel's memory. | |
383 | */ | |
384 | size = PAGE_ALIGN(size); | |
eb39c880 | 385 | size += fw_dump.boot_memory_size; |
2df173d9 | 386 | size += sizeof(struct fadump_crash_info_header); |
742a265a HB |
387 | |
388 | /* This is to hold kernel metadata on platforms that support it */ | |
389 | size += (fw_dump.ops->fadump_get_metadata_size ? | |
390 | fw_dump.ops->fadump_get_metadata_size() : 0); | |
eb39c880 MS |
391 | return size; |
392 | } | |
393 | ||
7dee93a9 HB |
394 | static int __init add_boot_mem_region(unsigned long rstart, |
395 | unsigned long rsize) | |
396 | { | |
78d5cc15 | 397 | int max_boot_mem_rgns = fw_dump.ops->fadump_max_boot_mem_rgns(); |
7dee93a9 HB |
398 | int i = fw_dump.boot_mem_regs_cnt++; |
399 | ||
78d5cc15 HB |
400 | if (fw_dump.boot_mem_regs_cnt > max_boot_mem_rgns) { |
401 | fw_dump.boot_mem_regs_cnt = max_boot_mem_rgns; | |
7dee93a9 HB |
402 | return 0; |
403 | } | |
404 | ||
405 | pr_debug("Added boot memory range[%d] [%#016lx-%#016lx)\n", | |
406 | i, rstart, (rstart + rsize)); | |
407 | fw_dump.boot_mem_addr[i] = rstart; | |
408 | fw_dump.boot_mem_sz[i] = rsize; | |
409 | return 1; | |
410 | } | |
411 | ||
412 | /* | |
413 | * Firmware usually has a hard limit on the data it can copy per region. | |
414 | * Honour that by splitting a memory range into multiple regions. | |
415 | */ | |
416 | static int __init add_boot_mem_regions(unsigned long mstart, | |
417 | unsigned long msize) | |
418 | { | |
419 | unsigned long rstart, rsize, max_size; | |
420 | int ret = 1; | |
421 | ||
422 | rstart = mstart; | |
423 | max_size = fw_dump.max_copy_size ? fw_dump.max_copy_size : msize; | |
424 | while (msize) { | |
425 | if (msize > max_size) | |
426 | rsize = max_size; | |
427 | else | |
428 | rsize = msize; | |
429 | ||
430 | ret = add_boot_mem_region(rstart, rsize); | |
431 | if (!ret) | |
432 | break; | |
433 | ||
434 | msize -= rsize; | |
435 | rstart += rsize; | |
436 | } | |
437 | ||
438 | return ret; | |
439 | } | |
440 | ||
441 | static int __init fadump_get_boot_mem_regions(void) | |
442 | { | |
b10d6bca | 443 | unsigned long size, cur_size, hole_size, last_end; |
7dee93a9 | 444 | unsigned long mem_size = fw_dump.boot_memory_size; |
b10d6bca | 445 | phys_addr_t reg_start, reg_end; |
7dee93a9 | 446 | int ret = 1; |
b10d6bca | 447 | u64 i; |
7dee93a9 HB |
448 | |
449 | fw_dump.boot_mem_regs_cnt = 0; | |
450 | ||
451 | last_end = 0; | |
452 | hole_size = 0; | |
453 | cur_size = 0; | |
b10d6bca MR |
454 | for_each_mem_range(i, ®_start, ®_end) { |
455 | size = reg_end - reg_start; | |
456 | hole_size += (reg_start - last_end); | |
7dee93a9 HB |
457 | |
458 | if ((cur_size + size) >= mem_size) { | |
459 | size = (mem_size - cur_size); | |
b10d6bca | 460 | ret = add_boot_mem_regions(reg_start, size); |
7dee93a9 HB |
461 | break; |
462 | } | |
463 | ||
464 | mem_size -= size; | |
465 | cur_size += size; | |
b10d6bca | 466 | ret = add_boot_mem_regions(reg_start, size); |
7dee93a9 HB |
467 | if (!ret) |
468 | break; | |
469 | ||
b10d6bca | 470 | last_end = reg_end; |
7dee93a9 HB |
471 | } |
472 | fw_dump.boot_mem_top = PAGE_ALIGN(fw_dump.boot_memory_size + hole_size); | |
473 | ||
474 | return ret; | |
475 | } | |
476 | ||
140777a3 HB |
477 | /* |
478 | * Returns true, if the given range overlaps with reserved memory ranges | |
479 | * starting at idx. Also, updates idx to index of overlapping memory range | |
480 | * with the given memory range. | |
481 | * False, otherwise. | |
482 | */ | |
d276960d | 483 | static bool __init overlaps_reserved_ranges(u64 base, u64 end, int *idx) |
140777a3 HB |
484 | { |
485 | bool ret = false; | |
486 | int i; | |
487 | ||
488 | for (i = *idx; i < reserved_mrange_info.mem_range_cnt; i++) { | |
489 | u64 rbase = reserved_mrange_info.mem_ranges[i].base; | |
490 | u64 rend = rbase + reserved_mrange_info.mem_ranges[i].size; | |
491 | ||
492 | if (end <= rbase) | |
493 | break; | |
494 | ||
495 | if ((end > rbase) && (base < rend)) { | |
496 | *idx = i; | |
497 | ret = true; | |
498 | break; | |
499 | } | |
500 | } | |
501 | ||
502 | return ret; | |
503 | } | |
504 | ||
505 | /* | |
506 | * Locate a suitable memory area to reserve memory for FADump. While at it, | |
507 | * lookup reserved-ranges & avoid overlap with them, as they are used by F/W. | |
508 | */ | |
509 | static u64 __init fadump_locate_reserve_mem(u64 base, u64 size) | |
510 | { | |
511 | struct fadump_memory_range *mrngs; | |
512 | phys_addr_t mstart, mend; | |
513 | int idx = 0; | |
514 | u64 i, ret = 0; | |
515 | ||
516 | mrngs = reserved_mrange_info.mem_ranges; | |
517 | for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE, | |
518 | &mstart, &mend, NULL) { | |
519 | pr_debug("%llu) mstart: %llx, mend: %llx, base: %llx\n", | |
520 | i, mstart, mend, base); | |
521 | ||
522 | if (mstart > base) | |
523 | base = PAGE_ALIGN(mstart); | |
524 | ||
525 | while ((mend > base) && ((mend - base) >= size)) { | |
526 | if (!overlaps_reserved_ranges(base, base+size, &idx)) { | |
527 | ret = base; | |
528 | goto out; | |
529 | } | |
530 | ||
531 | base = mrngs[idx].base + mrngs[idx].size; | |
532 | base = PAGE_ALIGN(base); | |
533 | } | |
534 | } | |
535 | ||
536 | out: | |
537 | return ret; | |
538 | } | |
539 | ||
eb39c880 MS |
540 | int __init fadump_reserve_mem(void) |
541 | { | |
140777a3 | 542 | u64 base, size, mem_boundary, bootmem_min; |
6abec12c | 543 | int ret = 1; |
eb39c880 MS |
544 | |
545 | if (!fw_dump.fadump_enabled) | |
546 | return 0; | |
547 | ||
548 | if (!fw_dump.fadump_supported) { | |
6abec12c HB |
549 | pr_info("Firmware-Assisted Dump is not supported on this hardware\n"); |
550 | goto error_out; | |
eb39c880 | 551 | } |
742a265a | 552 | |
3ccc00a7 MS |
553 | /* |
554 | * Initialize boot memory size | |
555 | * If dump is active then we have already calculated the size during | |
556 | * first kernel. | |
557 | */ | |
f3512011 | 558 | if (!fw_dump.dump_active) { |
6abec12c HB |
559 | fw_dump.boot_memory_size = |
560 | PAGE_ALIGN(fadump_calculate_reserve_size()); | |
a4e92ce8 | 561 | #ifdef CONFIG_CMA |
579ca1a2 | 562 | if (!fw_dump.nocma) { |
a4e92ce8 | 563 | fw_dump.boot_memory_size = |
140777a3 | 564 | ALIGN(fw_dump.boot_memory_size, |
e16faf26 | 565 | CMA_MIN_ALIGNMENT_BYTES); |
579ca1a2 | 566 | } |
a4e92ce8 | 567 | #endif |
7b1b3b48 HB |
568 | |
569 | bootmem_min = fw_dump.ops->fadump_get_bootmem_min(); | |
570 | if (fw_dump.boot_memory_size < bootmem_min) { | |
571 | pr_err("Can't enable fadump with boot memory size (0x%lx) less than 0x%llx\n", | |
572 | fw_dump.boot_memory_size, bootmem_min); | |
573 | goto error_out; | |
574 | } | |
7dee93a9 HB |
575 | |
576 | if (!fadump_get_boot_mem_regions()) { | |
577 | pr_err("Too many holes in boot memory area to enable fadump\n"); | |
578 | goto error_out; | |
579 | } | |
a4e92ce8 | 580 | } |
eb39c880 | 581 | |
eb39c880 | 582 | if (memory_limit) |
6abec12c | 583 | mem_boundary = memory_limit; |
eb39c880 | 584 | else |
6abec12c | 585 | mem_boundary = memblock_end_of_DRAM(); |
eb39c880 | 586 | |
7dee93a9 | 587 | base = fw_dump.boot_mem_top; |
8255da95 HB |
588 | size = get_fadump_area_size(); |
589 | fw_dump.reserve_dump_area_size = size; | |
eb39c880 | 590 | if (fw_dump.dump_active) { |
b71a693d MS |
591 | pr_info("Firmware-assisted dump is active.\n"); |
592 | ||
85975387 HB |
593 | #ifdef CONFIG_HUGETLB_PAGE |
594 | /* | |
595 | * FADump capture kernel doesn't care much about hugepages. | |
596 | * In fact, handling hugepages in capture kernel is asking for | |
597 | * trouble. So, disable HugeTLB support when fadump is active. | |
598 | */ | |
599 | hugetlb_disabled = true; | |
600 | #endif | |
eb39c880 MS |
601 | /* |
602 | * If last boot has crashed then reserve all the memory | |
b2a815a5 | 603 | * above boot memory size so that we don't touch it until |
eb39c880 | 604 | * dump is written to disk by userspace tool. This memory |
b2a815a5 | 605 | * can be released for general use by invalidating fadump. |
eb39c880 | 606 | */ |
b2a815a5 | 607 | fadump_reserve_crash_area(base); |
2df173d9 | 608 | |
f3512011 HB |
609 | pr_debug("fadumphdr_addr = %#016lx\n", fw_dump.fadumphdr_addr); |
610 | pr_debug("Reserve dump area start address: 0x%lx\n", | |
611 | fw_dump.reserve_dump_area_start); | |
8255da95 | 612 | } else { |
f6e6bedb HB |
613 | /* |
614 | * Reserve memory at an offset closer to bottom of the RAM to | |
579ca1a2 | 615 | * minimize the impact of memory hot-remove operation. |
f6e6bedb | 616 | */ |
140777a3 | 617 | base = fadump_locate_reserve_mem(base, size); |
6abec12c | 618 | |
9a2921e5 | 619 | if (!base || (base + size > mem_boundary)) { |
742a265a HB |
620 | pr_err("Failed to find memory chunk for reservation!\n"); |
621 | goto error_out; | |
622 | } | |
623 | fw_dump.reserve_dump_area_start = base; | |
624 | ||
625 | /* | |
626 | * Calculate the kernel metadata address and register it with | |
627 | * f/w if the platform supports. | |
628 | */ | |
629 | if (fw_dump.ops->fadump_setup_metadata && | |
630 | (fw_dump.ops->fadump_setup_metadata(&fw_dump) < 0)) | |
631 | goto error_out; | |
632 | ||
633 | if (memblock_reserve(base, size)) { | |
6abec12c HB |
634 | pr_err("Failed to reserve memory!\n"); |
635 | goto error_out; | |
f6e6bedb HB |
636 | } |
637 | ||
6abec12c HB |
638 | pr_info("Reserved %lldMB of memory at %#016llx (System RAM: %lldMB)\n", |
639 | (size >> 20), base, (memblock_phys_mem_size() >> 20)); | |
f6e6bedb | 640 | |
6abec12c | 641 | ret = fadump_cma_init(); |
a4e92ce8 | 642 | } |
6abec12c HB |
643 | |
644 | return ret; | |
645 | error_out: | |
646 | fw_dump.fadump_enabled = 0; | |
d1eb75e0 | 647 | fw_dump.reserve_dump_area_size = 0; |
6abec12c | 648 | return 0; |
eb39c880 MS |
649 | } |
650 | ||
651 | /* Look for fadump= cmdline option. */ | |
652 | static int __init early_fadump_param(char *p) | |
653 | { | |
654 | if (!p) | |
655 | return 1; | |
656 | ||
657 | if (strncmp(p, "on", 2) == 0) | |
658 | fw_dump.fadump_enabled = 1; | |
659 | else if (strncmp(p, "off", 3) == 0) | |
660 | fw_dump.fadump_enabled = 0; | |
a4e92ce8 MS |
661 | else if (strncmp(p, "nocma", 5) == 0) { |
662 | fw_dump.fadump_enabled = 1; | |
663 | fw_dump.nocma = 1; | |
664 | } | |
eb39c880 MS |
665 | |
666 | return 0; | |
667 | } | |
668 | early_param("fadump", early_fadump_param); | |
669 | ||
81d9eca5 HB |
670 | /* |
671 | * Look for fadump_reserve_mem= cmdline option | |
672 | * TODO: Remove references to 'fadump_reserve_mem=' parameter, | |
673 | * the sooner 'crashkernel=' parameter is accustomed to. | |
674 | */ | |
675 | static int __init early_fadump_reserve_mem(char *p) | |
676 | { | |
677 | if (p) | |
678 | fw_dump.reserve_bootvar = memparse(p, &p); | |
679 | return 0; | |
680 | } | |
681 | early_param("fadump_reserve_mem", early_fadump_reserve_mem); | |
682 | ||
ebaeb5ae MS |
683 | void crash_fadump(struct pt_regs *regs, const char *str) |
684 | { | |
ba608c4f | 685 | unsigned int msecs; |
ebaeb5ae | 686 | struct fadump_crash_info_header *fdh = NULL; |
f2a5e8f0 | 687 | int old_cpu, this_cpu; |
ba608c4f SJ |
688 | /* Do not include first CPU */ |
689 | unsigned int ncpus = num_online_cpus() - 1; | |
ebaeb5ae | 690 | |
6fcd6baa | 691 | if (!should_fadump_crash()) |
ebaeb5ae MS |
692 | return; |
693 | ||
f2a5e8f0 MS |
694 | /* |
695 | * old_cpu == -1 means this is the first CPU which has come here, | |
696 | * go ahead and trigger fadump. | |
697 | * | |
0ddbbb89 | 698 | * old_cpu != -1 means some other CPU has already on its way |
f2a5e8f0 MS |
699 | * to trigger fadump, just keep looping here. |
700 | */ | |
701 | this_cpu = smp_processor_id(); | |
702 | old_cpu = cmpxchg(&crashing_cpu, -1, this_cpu); | |
703 | ||
704 | if (old_cpu != -1) { | |
ba608c4f SJ |
705 | atomic_inc(&cpus_in_fadump); |
706 | ||
f2a5e8f0 MS |
707 | /* |
708 | * We can't loop here indefinitely. Wait as long as fadump | |
709 | * is in force. If we race with fadump un-registration this | |
710 | * loop will break and then we go down to normal panic path | |
711 | * and reboot. If fadump is in force the first crashing | |
712 | * cpu will definitely trigger fadump. | |
713 | */ | |
714 | while (fw_dump.dump_registered) | |
715 | cpu_relax(); | |
716 | return; | |
717 | } | |
718 | ||
ebaeb5ae | 719 | fdh = __va(fw_dump.fadumphdr_addr); |
ebaeb5ae MS |
720 | fdh->crashing_cpu = crashing_cpu; |
721 | crash_save_vmcoreinfo(); | |
722 | ||
723 | if (regs) | |
724 | fdh->regs = *regs; | |
725 | else | |
726 | ppc_save_regs(&fdh->regs); | |
727 | ||
6584cec0 | 728 | fdh->cpu_mask = *cpu_online_mask; |
ebaeb5ae | 729 | |
ba608c4f SJ |
730 | /* |
731 | * If we came in via system reset, wait a while for the secondary | |
732 | * CPUs to enter. | |
733 | */ | |
7153d4bf | 734 | if (TRAP(&(fdh->regs)) == INTERRUPT_SYSTEM_RESET) { |
ba608c4f SJ |
735 | msecs = CRASH_TIMEOUT; |
736 | while ((atomic_read(&cpus_in_fadump) < ncpus) && (--msecs > 0)) | |
737 | mdelay(1); | |
738 | } | |
739 | ||
41a65d16 | 740 | fw_dump.ops->fadump_trigger(fdh, str); |
ebaeb5ae MS |
741 | } |
742 | ||
d276960d | 743 | u32 *__init fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs) |
ebaeb5ae MS |
744 | { |
745 | struct elf_prstatus prstatus; | |
746 | ||
747 | memset(&prstatus, 0, sizeof(prstatus)); | |
748 | /* | |
749 | * FIXME: How do i get PID? Do I really need it? | |
750 | * prstatus.pr_pid = ???? | |
751 | */ | |
9554e908 | 752 | elf_core_copy_regs(&prstatus.pr_reg, regs); |
22bd0177 HB |
753 | buf = append_elf_note(buf, CRASH_CORE_NOTE_NAME, NT_PRSTATUS, |
754 | &prstatus, sizeof(prstatus)); | |
ebaeb5ae MS |
755 | return buf; |
756 | } | |
757 | ||
d276960d | 758 | void __init fadump_update_elfcore_header(char *bufp) |
ebaeb5ae | 759 | { |
ebaeb5ae MS |
760 | struct elf_phdr *phdr; |
761 | ||
ebaeb5ae MS |
762 | bufp += sizeof(struct elfhdr); |
763 | ||
764 | /* First note is a place holder for cpu notes info. */ | |
765 | phdr = (struct elf_phdr *)bufp; | |
766 | ||
767 | if (phdr->p_type == PT_NOTE) { | |
961cf26a | 768 | phdr->p_paddr = __pa(fw_dump.cpu_notes_buf_vaddr); |
ebaeb5ae MS |
769 | phdr->p_offset = phdr->p_paddr; |
770 | phdr->p_filesz = fw_dump.cpu_notes_buf_size; | |
771 | phdr->p_memsz = fw_dump.cpu_notes_buf_size; | |
772 | } | |
773 | return; | |
774 | } | |
775 | ||
d276960d | 776 | static void *__init fadump_alloc_buffer(unsigned long size) |
ebaeb5ae | 777 | { |
72aa6517 | 778 | unsigned long count, i; |
ebaeb5ae | 779 | struct page *page; |
72aa6517 | 780 | void *vaddr; |
ebaeb5ae | 781 | |
72aa6517 | 782 | vaddr = alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO); |
ebaeb5ae MS |
783 | if (!vaddr) |
784 | return NULL; | |
785 | ||
72aa6517 | 786 | count = PAGE_ALIGN(size) / PAGE_SIZE; |
ebaeb5ae MS |
787 | page = virt_to_page(vaddr); |
788 | for (i = 0; i < count; i++) | |
72aa6517 | 789 | mark_page_reserved(page + i); |
ebaeb5ae MS |
790 | return vaddr; |
791 | } | |
792 | ||
961cf26a | 793 | static void fadump_free_buffer(unsigned long vaddr, unsigned long size) |
ebaeb5ae | 794 | { |
72aa6517 | 795 | free_reserved_area((void *)vaddr, (void *)(vaddr + size), -1, NULL); |
ebaeb5ae MS |
796 | } |
797 | ||
d276960d | 798 | s32 __init fadump_setup_cpu_notes_buf(u32 num_cpus) |
961cf26a HB |
799 | { |
800 | /* Allocate buffer to hold cpu crash notes. */ | |
801 | fw_dump.cpu_notes_buf_size = num_cpus * sizeof(note_buf_t); | |
802 | fw_dump.cpu_notes_buf_size = PAGE_ALIGN(fw_dump.cpu_notes_buf_size); | |
803 | fw_dump.cpu_notes_buf_vaddr = | |
804 | (unsigned long)fadump_alloc_buffer(fw_dump.cpu_notes_buf_size); | |
805 | if (!fw_dump.cpu_notes_buf_vaddr) { | |
806 | pr_err("Failed to allocate %ld bytes for CPU notes buffer\n", | |
807 | fw_dump.cpu_notes_buf_size); | |
808 | return -ENOMEM; | |
809 | } | |
810 | ||
811 | pr_debug("Allocated buffer for cpu notes of size %ld at 0x%lx\n", | |
812 | fw_dump.cpu_notes_buf_size, | |
813 | fw_dump.cpu_notes_buf_vaddr); | |
814 | return 0; | |
815 | } | |
816 | ||
7f0ad11d | 817 | void fadump_free_cpu_notes_buf(void) |
961cf26a HB |
818 | { |
819 | if (!fw_dump.cpu_notes_buf_vaddr) | |
820 | return; | |
821 | ||
822 | fadump_free_buffer(fw_dump.cpu_notes_buf_vaddr, | |
823 | fw_dump.cpu_notes_buf_size); | |
824 | fw_dump.cpu_notes_buf_vaddr = 0; | |
825 | fw_dump.cpu_notes_buf_size = 0; | |
826 | } | |
827 | ||
e4fc48fb | 828 | static void fadump_free_mem_ranges(struct fadump_mrange_info *mrange_info) |
1bd6a1c4 | 829 | { |
02c04e37 HB |
830 | if (mrange_info->is_static) { |
831 | mrange_info->mem_range_cnt = 0; | |
832 | return; | |
833 | } | |
834 | ||
e4fc48fb | 835 | kfree(mrange_info->mem_ranges); |
02c04e37 HB |
836 | memset((void *)((u64)mrange_info + RNG_NAME_SZ), 0, |
837 | (sizeof(struct fadump_mrange_info) - RNG_NAME_SZ)); | |
1bd6a1c4 HB |
838 | } |
839 | ||
840 | /* | |
e4fc48fb | 841 | * Allocate or reallocate mem_ranges array in incremental units |
1bd6a1c4 HB |
842 | * of PAGE_SIZE. |
843 | */ | |
e4fc48fb | 844 | static int fadump_alloc_mem_ranges(struct fadump_mrange_info *mrange_info) |
1bd6a1c4 | 845 | { |
e4fc48fb | 846 | struct fadump_memory_range *new_array; |
1bd6a1c4 HB |
847 | u64 new_size; |
848 | ||
e4fc48fb HB |
849 | new_size = mrange_info->mem_ranges_sz + PAGE_SIZE; |
850 | pr_debug("Allocating %llu bytes of memory for %s memory ranges\n", | |
851 | new_size, mrange_info->name); | |
1bd6a1c4 | 852 | |
e4fc48fb | 853 | new_array = krealloc(mrange_info->mem_ranges, new_size, GFP_KERNEL); |
1bd6a1c4 | 854 | if (new_array == NULL) { |
e4fc48fb HB |
855 | pr_err("Insufficient memory for setting up %s memory ranges\n", |
856 | mrange_info->name); | |
857 | fadump_free_mem_ranges(mrange_info); | |
1bd6a1c4 HB |
858 | return -ENOMEM; |
859 | } | |
860 | ||
e4fc48fb HB |
861 | mrange_info->mem_ranges = new_array; |
862 | mrange_info->mem_ranges_sz = new_size; | |
863 | mrange_info->max_mem_ranges = (new_size / | |
864 | sizeof(struct fadump_memory_range)); | |
1bd6a1c4 HB |
865 | return 0; |
866 | } | |
e4fc48fb HB |
867 | static inline int fadump_add_mem_range(struct fadump_mrange_info *mrange_info, |
868 | u64 base, u64 end) | |
2df173d9 | 869 | { |
e4fc48fb | 870 | struct fadump_memory_range *mem_ranges = mrange_info->mem_ranges; |
ced1bf52 | 871 | bool is_adjacent = false; |
e4fc48fb | 872 | u64 start, size; |
ced1bf52 | 873 | |
2df173d9 | 874 | if (base == end) |
1bd6a1c4 HB |
875 | return 0; |
876 | ||
ced1bf52 HB |
877 | /* |
878 | * Fold adjacent memory ranges to bring down the memory ranges/ | |
879 | * PT_LOAD segments count. | |
880 | */ | |
e4fc48fb HB |
881 | if (mrange_info->mem_range_cnt) { |
882 | start = mem_ranges[mrange_info->mem_range_cnt - 1].base; | |
883 | size = mem_ranges[mrange_info->mem_range_cnt - 1].size; | |
1bd6a1c4 | 884 | |
15eb77f8 HB |
885 | /* |
886 | * Boot memory area needs separate PT_LOAD segment(s) as it | |
887 | * is moved to a different location at the time of crash. | |
888 | * So, fold only if the region is not boot memory area. | |
889 | */ | |
890 | if ((start + size) == base && start >= fw_dump.boot_mem_top) | |
ced1bf52 HB |
891 | is_adjacent = true; |
892 | } | |
893 | if (!is_adjacent) { | |
894 | /* resize the array on reaching the limit */ | |
e4fc48fb | 895 | if (mrange_info->mem_range_cnt == mrange_info->max_mem_ranges) { |
ced1bf52 HB |
896 | int ret; |
897 | ||
02c04e37 HB |
898 | if (mrange_info->is_static) { |
899 | pr_err("Reached array size limit for %s memory ranges\n", | |
900 | mrange_info->name); | |
901 | return -ENOSPC; | |
902 | } | |
903 | ||
e4fc48fb | 904 | ret = fadump_alloc_mem_ranges(mrange_info); |
ced1bf52 HB |
905 | if (ret) |
906 | return ret; | |
e4fc48fb HB |
907 | |
908 | /* Update to the new resized array */ | |
909 | mem_ranges = mrange_info->mem_ranges; | |
ced1bf52 HB |
910 | } |
911 | ||
912 | start = base; | |
e4fc48fb HB |
913 | mem_ranges[mrange_info->mem_range_cnt].base = start; |
914 | mrange_info->mem_range_cnt++; | |
1bd6a1c4 | 915 | } |
2df173d9 | 916 | |
e4fc48fb HB |
917 | mem_ranges[mrange_info->mem_range_cnt - 1].size = (end - start); |
918 | pr_debug("%s_memory_range[%d] [%#016llx-%#016llx], %#llx bytes\n", | |
919 | mrange_info->name, (mrange_info->mem_range_cnt - 1), | |
920 | start, end - 1, (end - start)); | |
1bd6a1c4 | 921 | return 0; |
2df173d9 MS |
922 | } |
923 | ||
2df173d9 MS |
924 | static int fadump_init_elfcore_header(char *bufp) |
925 | { | |
926 | struct elfhdr *elf; | |
927 | ||
928 | elf = (struct elfhdr *) bufp; | |
929 | bufp += sizeof(struct elfhdr); | |
930 | memcpy(elf->e_ident, ELFMAG, SELFMAG); | |
931 | elf->e_ident[EI_CLASS] = ELF_CLASS; | |
932 | elf->e_ident[EI_DATA] = ELF_DATA; | |
933 | elf->e_ident[EI_VERSION] = EV_CURRENT; | |
934 | elf->e_ident[EI_OSABI] = ELF_OSABI; | |
935 | memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD); | |
936 | elf->e_type = ET_CORE; | |
937 | elf->e_machine = ELF_ARCH; | |
938 | elf->e_version = EV_CURRENT; | |
939 | elf->e_entry = 0; | |
940 | elf->e_phoff = sizeof(struct elfhdr); | |
941 | elf->e_shoff = 0; | |
5b89492c CL |
942 | |
943 | if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2)) | |
944 | elf->e_flags = 2; | |
945 | else if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V1)) | |
946 | elf->e_flags = 1; | |
947 | else | |
948 | elf->e_flags = 0; | |
949 | ||
2df173d9 MS |
950 | elf->e_ehsize = sizeof(struct elfhdr); |
951 | elf->e_phentsize = sizeof(struct elf_phdr); | |
952 | elf->e_phnum = 0; | |
953 | elf->e_shentsize = 0; | |
954 | elf->e_shnum = 0; | |
955 | elf->e_shstrndx = 0; | |
956 | ||
957 | return 0; | |
958 | } | |
959 | ||
d34c5f26 MS |
960 | /* |
961 | * If the given physical address falls within the boot memory region then | |
962 | * return the relocated address that points to the dump region reserved | |
963 | * for saving initial boot memory contents. | |
964 | */ | |
965 | static inline unsigned long fadump_relocate(unsigned long paddr) | |
966 | { | |
7dee93a9 HB |
967 | unsigned long raddr, rstart, rend, rlast, hole_size; |
968 | int i; | |
969 | ||
970 | hole_size = 0; | |
971 | rlast = 0; | |
972 | raddr = paddr; | |
973 | for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) { | |
974 | rstart = fw_dump.boot_mem_addr[i]; | |
975 | rend = rstart + fw_dump.boot_mem_sz[i]; | |
976 | hole_size += (rstart - rlast); | |
977 | ||
978 | if (paddr >= rstart && paddr < rend) { | |
979 | raddr += fw_dump.boot_mem_dest_addr - hole_size; | |
980 | break; | |
981 | } | |
982 | ||
983 | rlast = rend; | |
984 | } | |
985 | ||
986 | pr_debug("vmcoreinfo: paddr = 0x%lx, raddr = 0x%lx\n", paddr, raddr); | |
987 | return raddr; | |
d34c5f26 MS |
988 | } |
989 | ||
c6c5b14d SJ |
990 | static void __init populate_elf_pt_load(struct elf_phdr *phdr, u64 start, |
991 | u64 size, unsigned long long offset) | |
2df173d9 | 992 | { |
c6c5b14d SJ |
993 | phdr->p_align = 0; |
994 | phdr->p_memsz = size; | |
995 | phdr->p_filesz = size; | |
996 | phdr->p_paddr = start; | |
997 | phdr->p_offset = offset; | |
998 | phdr->p_type = PT_LOAD; | |
999 | phdr->p_flags = PF_R|PF_W|PF_X; | |
1000 | phdr->p_vaddr = (unsigned long)__va(start); | |
1001 | } | |
1002 | ||
1003 | static void __init fadump_populate_elfcorehdr(struct fadump_crash_info_header *fdh) | |
1004 | { | |
1005 | char *bufp; | |
7dee93a9 | 1006 | struct elfhdr *elf; |
c6c5b14d SJ |
1007 | struct elf_phdr *phdr; |
1008 | u64 boot_mem_dest_offset; | |
1009 | unsigned long long i, ra_start, ra_end, ra_size, mstart, mend; | |
2df173d9 | 1010 | |
c6c5b14d | 1011 | bufp = (char *) fw_dump.elfcorehdr_addr; |
2df173d9 MS |
1012 | fadump_init_elfcore_header(bufp); |
1013 | elf = (struct elfhdr *)bufp; | |
1014 | bufp += sizeof(struct elfhdr); | |
1015 | ||
ebaeb5ae | 1016 | /* |
c6c5b14d SJ |
1017 | * Set up ELF PT_NOTE, a placeholder for CPU notes information. |
1018 | * The notes info will be populated later by platform-specific code. | |
1019 | * Hence, this PT_NOTE will always be the first ELF note. | |
ebaeb5ae MS |
1020 | * |
1021 | * NOTE: Any new ELF note addition should be placed after this note. | |
1022 | */ | |
1023 | phdr = (struct elf_phdr *)bufp; | |
1024 | bufp += sizeof(struct elf_phdr); | |
1025 | phdr->p_type = PT_NOTE; | |
c6c5b14d SJ |
1026 | phdr->p_flags = 0; |
1027 | phdr->p_vaddr = 0; | |
1028 | phdr->p_align = 0; | |
1029 | phdr->p_offset = 0; | |
1030 | phdr->p_paddr = 0; | |
1031 | phdr->p_filesz = 0; | |
1032 | phdr->p_memsz = 0; | |
1033 | /* Increment number of program headers. */ | |
ebaeb5ae MS |
1034 | (elf->e_phnum)++; |
1035 | ||
d34c5f26 MS |
1036 | /* setup ELF PT_NOTE for vmcoreinfo */ |
1037 | phdr = (struct elf_phdr *)bufp; | |
1038 | bufp += sizeof(struct elf_phdr); | |
1039 | phdr->p_type = PT_NOTE; | |
1040 | phdr->p_flags = 0; | |
1041 | phdr->p_vaddr = 0; | |
1042 | phdr->p_align = 0; | |
c6c5b14d SJ |
1043 | phdr->p_paddr = phdr->p_offset = fdh->vmcoreinfo_raddr; |
1044 | phdr->p_memsz = phdr->p_filesz = fdh->vmcoreinfo_size; | |
d34c5f26 MS |
1045 | /* Increment number of program headers. */ |
1046 | (elf->e_phnum)++; | |
1047 | ||
c6c5b14d SJ |
1048 | /* |
1049 | * Setup PT_LOAD sections. first include boot memory regions | |
1050 | * and then add rest of the memory regions. | |
1051 | */ | |
1052 | boot_mem_dest_offset = fw_dump.boot_mem_dest_addr; | |
1053 | for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) { | |
2df173d9 MS |
1054 | phdr = (struct elf_phdr *)bufp; |
1055 | bufp += sizeof(struct elf_phdr); | |
c6c5b14d SJ |
1056 | populate_elf_pt_load(phdr, fw_dump.boot_mem_addr[i], |
1057 | fw_dump.boot_mem_sz[i], | |
1058 | boot_mem_dest_offset); | |
1059 | /* Increment number of program headers. */ | |
1060 | (elf->e_phnum)++; | |
1061 | boot_mem_dest_offset += fw_dump.boot_mem_sz[i]; | |
1062 | } | |
1063 | ||
1064 | /* Memory reserved for fadump in first kernel */ | |
1065 | ra_start = fw_dump.reserve_dump_area_start; | |
1066 | ra_size = get_fadump_area_size(); | |
1067 | ra_end = ra_start + ra_size; | |
1068 | ||
1069 | phdr = (struct elf_phdr *)bufp; | |
1070 | for_each_mem_range(i, &mstart, &mend) { | |
1071 | /* Boot memory regions already added, skip them now */ | |
1072 | if (mstart < fw_dump.boot_mem_top) { | |
1073 | if (mend > fw_dump.boot_mem_top) | |
1074 | mstart = fw_dump.boot_mem_top; | |
1075 | else | |
1076 | continue; | |
2df173d9 MS |
1077 | } |
1078 | ||
c6c5b14d SJ |
1079 | /* Handle memblock regions overlaps with fadump reserved area */ |
1080 | if ((ra_start < mend) && (ra_end > mstart)) { | |
1081 | if ((mstart < ra_start) && (mend > ra_end)) { | |
1082 | populate_elf_pt_load(phdr, mstart, ra_start - mstart, mstart); | |
1083 | /* Increment number of program headers. */ | |
1084 | (elf->e_phnum)++; | |
1085 | bufp += sizeof(struct elf_phdr); | |
1086 | phdr = (struct elf_phdr *)bufp; | |
1087 | populate_elf_pt_load(phdr, ra_end, mend - ra_end, ra_end); | |
1088 | } else if (mstart < ra_start) { | |
1089 | populate_elf_pt_load(phdr, mstart, ra_start - mstart, mstart); | |
1090 | } else if (ra_end < mend) { | |
1091 | populate_elf_pt_load(phdr, ra_end, mend - ra_end, ra_end); | |
1092 | } | |
1093 | } else { | |
1094 | /* No overlap with fadump reserved memory region */ | |
1095 | populate_elf_pt_load(phdr, mstart, mend - mstart, mstart); | |
1096 | } | |
2df173d9 MS |
1097 | |
1098 | /* Increment number of program headers. */ | |
1099 | (elf->e_phnum)++; | |
c6c5b14d SJ |
1100 | bufp += sizeof(struct elf_phdr); |
1101 | phdr = (struct elf_phdr *) bufp; | |
2df173d9 | 1102 | } |
2df173d9 MS |
1103 | } |
1104 | ||
1105 | static unsigned long init_fadump_header(unsigned long addr) | |
1106 | { | |
1107 | struct fadump_crash_info_header *fdh; | |
1108 | ||
1109 | if (!addr) | |
1110 | return 0; | |
1111 | ||
2df173d9 MS |
1112 | fdh = __va(addr); |
1113 | addr += sizeof(struct fadump_crash_info_header); | |
1114 | ||
1115 | memset(fdh, 0, sizeof(struct fadump_crash_info_header)); | |
1116 | fdh->magic_number = FADUMP_CRASH_INFO_MAGIC; | |
c6c5b14d | 1117 | fdh->version = FADUMP_HEADER_VERSION; |
ebaeb5ae | 1118 | /* We will set the crashing cpu id in crash_fadump() during crash. */ |
0226e552 | 1119 | fdh->crashing_cpu = FADUMP_CPU_UNKNOWN; |
c6c5b14d SJ |
1120 | |
1121 | /* | |
1122 | * The physical address and size of vmcoreinfo are required in the | |
1123 | * second kernel to prepare elfcorehdr. | |
1124 | */ | |
1125 | fdh->vmcoreinfo_raddr = fadump_relocate(paddr_vmcoreinfo_note()); | |
1126 | fdh->vmcoreinfo_size = VMCOREINFO_NOTE_SIZE; | |
1127 | ||
1128 | ||
1129 | fdh->pt_regs_sz = sizeof(struct pt_regs); | |
6584cec0 HB |
1130 | /* |
1131 | * When LPAR is terminated by PYHP, ensure all possible CPUs' | |
1132 | * register data is processed while exporting the vmcore. | |
1133 | */ | |
1134 | fdh->cpu_mask = *cpu_possible_mask; | |
c6c5b14d | 1135 | fdh->cpu_mask_sz = sizeof(struct cpumask); |
2df173d9 MS |
1136 | |
1137 | return addr; | |
1138 | } | |
1139 | ||
98b8cd7f | 1140 | static int register_fadump(void) |
3ccc00a7 | 1141 | { |
2df173d9 | 1142 | unsigned long addr; |
2df173d9 | 1143 | |
3ccc00a7 MS |
1144 | /* |
1145 | * If no memory is reserved then we can not register for firmware- | |
1146 | * assisted dump. | |
1147 | */ | |
1148 | if (!fw_dump.reserve_dump_area_size) | |
98b8cd7f | 1149 | return -ENODEV; |
3ccc00a7 | 1150 | |
41a65d16 HB |
1151 | addr = fw_dump.fadumphdr_addr; |
1152 | ||
2df173d9 MS |
1153 | /* Initialize fadump crash info header. */ |
1154 | addr = init_fadump_header(addr); | |
2df173d9 | 1155 | |
3ccc00a7 | 1156 | /* register the future kernel dump with firmware. */ |
41a65d16 HB |
1157 | pr_debug("Registering for firmware-assisted kernel dump...\n"); |
1158 | return fw_dump.ops->fadump_register(&fw_dump); | |
3ccc00a7 MS |
1159 | } |
1160 | ||
b500afff MS |
1161 | void fadump_cleanup(void) |
1162 | { | |
2790d01d HB |
1163 | if (!fw_dump.fadump_supported) |
1164 | return; | |
1165 | ||
b500afff MS |
1166 | /* Invalidate the registration only if dump is active. */ |
1167 | if (fw_dump.dump_active) { | |
f3512011 HB |
1168 | pr_debug("Invalidating firmware-assisted dump registration\n"); |
1169 | fw_dump.ops->fadump_invalidate(&fw_dump); | |
722cde76 MS |
1170 | } else if (fw_dump.dump_registered) { |
1171 | /* Un-register Firmware-assisted dump if it was registered. */ | |
41a65d16 | 1172 | fw_dump.ops->fadump_unregister(&fw_dump); |
b500afff | 1173 | } |
2790d01d HB |
1174 | |
1175 | if (fw_dump.ops->fadump_cleanup) | |
1176 | fw_dump.ops->fadump_cleanup(&fw_dump); | |
b500afff MS |
1177 | } |
1178 | ||
68fa6478 HB |
1179 | static void fadump_free_reserved_memory(unsigned long start_pfn, |
1180 | unsigned long end_pfn) | |
1181 | { | |
1182 | unsigned long pfn; | |
1183 | unsigned long time_limit = jiffies + HZ; | |
1184 | ||
1185 | pr_info("freeing reserved memory (0x%llx - 0x%llx)\n", | |
1186 | PFN_PHYS(start_pfn), PFN_PHYS(end_pfn)); | |
1187 | ||
1188 | for (pfn = start_pfn; pfn < end_pfn; pfn++) { | |
1189 | free_reserved_page(pfn_to_page(pfn)); | |
1190 | ||
1191 | if (time_after(jiffies, time_limit)) { | |
1192 | cond_resched(); | |
1193 | time_limit = jiffies + HZ; | |
1194 | } | |
1195 | } | |
1196 | } | |
1197 | ||
1198 | /* | |
1199 | * Skip memory holes and free memory that was actually reserved. | |
1200 | */ | |
dda9dbfe | 1201 | static void fadump_release_reserved_area(u64 start, u64 end) |
68fa6478 | 1202 | { |
b10d6bca MR |
1203 | unsigned long reg_spfn, reg_epfn; |
1204 | u64 tstart, tend, spfn, epfn; | |
1205 | int i; | |
68fa6478 | 1206 | |
dda9dbfe HB |
1207 | spfn = PHYS_PFN(start); |
1208 | epfn = PHYS_PFN(end); | |
c9118e6c MR |
1209 | |
1210 | for_each_mem_pfn_range(i, MAX_NUMNODES, ®_spfn, ®_epfn, NULL) { | |
1211 | tstart = max_t(u64, spfn, reg_spfn); | |
1212 | tend = min_t(u64, epfn, reg_epfn); | |
1213 | ||
68fa6478 HB |
1214 | if (tstart < tend) { |
1215 | fadump_free_reserved_memory(tstart, tend); | |
1216 | ||
dda9dbfe | 1217 | if (tend == epfn) |
68fa6478 HB |
1218 | break; |
1219 | ||
dda9dbfe | 1220 | spfn = tend; |
68fa6478 HB |
1221 | } |
1222 | } | |
1223 | } | |
1224 | ||
b500afff | 1225 | /* |
dda9dbfe HB |
1226 | * Sort the mem ranges in-place and merge adjacent ranges |
1227 | * to minimize the memory ranges count. | |
b500afff | 1228 | */ |
dda9dbfe | 1229 | static void sort_and_merge_mem_ranges(struct fadump_mrange_info *mrange_info) |
b500afff | 1230 | { |
dda9dbfe | 1231 | struct fadump_memory_range *mem_ranges; |
dda9dbfe HB |
1232 | u64 base, size; |
1233 | int i, j, idx; | |
1234 | ||
1235 | if (!reserved_mrange_info.mem_range_cnt) | |
1236 | return; | |
1237 | ||
1238 | /* Sort the memory ranges */ | |
1239 | mem_ranges = mrange_info->mem_ranges; | |
1240 | for (i = 0; i < mrange_info->mem_range_cnt; i++) { | |
1241 | idx = i; | |
1242 | for (j = (i + 1); j < mrange_info->mem_range_cnt; j++) { | |
1243 | if (mem_ranges[idx].base > mem_ranges[j].base) | |
1244 | idx = j; | |
1245 | } | |
20776319 JC |
1246 | if (idx != i) |
1247 | swap(mem_ranges[idx], mem_ranges[i]); | |
dda9dbfe HB |
1248 | } |
1249 | ||
1250 | /* Merge adjacent reserved ranges */ | |
1251 | idx = 0; | |
1252 | for (i = 1; i < mrange_info->mem_range_cnt; i++) { | |
1253 | base = mem_ranges[i-1].base; | |
1254 | size = mem_ranges[i-1].size; | |
1255 | if (mem_ranges[i].base == (base + size)) | |
1256 | mem_ranges[idx].size += mem_ranges[i].size; | |
1257 | else { | |
1258 | idx++; | |
1259 | if (i == idx) | |
1260 | continue; | |
1261 | ||
1262 | mem_ranges[idx] = mem_ranges[i]; | |
1263 | } | |
1264 | } | |
1265 | mrange_info->mem_range_cnt = idx + 1; | |
1266 | } | |
1267 | ||
1268 | /* | |
1269 | * Scan reserved-ranges to consider them while reserving/releasing | |
1270 | * memory for FADump. | |
1271 | */ | |
02c04e37 | 1272 | static void __init early_init_dt_scan_reserved_ranges(unsigned long node) |
dda9dbfe | 1273 | { |
dda9dbfe HB |
1274 | const __be32 *prop; |
1275 | int len, ret = -1; | |
1276 | unsigned long i; | |
1277 | ||
02c04e37 HB |
1278 | /* reserved-ranges already scanned */ |
1279 | if (reserved_mrange_info.mem_range_cnt != 0) | |
1280 | return; | |
dda9dbfe | 1281 | |
02c04e37 | 1282 | prop = of_get_flat_dt_prop(node, "reserved-ranges", &len); |
dda9dbfe | 1283 | if (!prop) |
02c04e37 | 1284 | return; |
dda9dbfe HB |
1285 | |
1286 | /* | |
1287 | * Each reserved range is an (address,size) pair, 2 cells each, | |
1288 | * totalling 4 cells per range. | |
1289 | */ | |
1290 | for (i = 0; i < len / (sizeof(*prop) * 4); i++) { | |
1291 | u64 base, size; | |
1292 | ||
1293 | base = of_read_number(prop + (i * 4) + 0, 2); | |
1294 | size = of_read_number(prop + (i * 4) + 2, 2); | |
1295 | ||
1296 | if (size) { | |
1297 | ret = fadump_add_mem_range(&reserved_mrange_info, | |
1298 | base, base + size); | |
1299 | if (ret < 0) { | |
1300 | pr_warn("some reserved ranges are ignored!\n"); | |
1301 | break; | |
1302 | } | |
1303 | } | |
1304 | } | |
1305 | ||
02c04e37 HB |
1306 | /* Compact reserved ranges */ |
1307 | sort_and_merge_mem_ranges(&reserved_mrange_info); | |
dda9dbfe HB |
1308 | } |
1309 | ||
1310 | /* | |
1311 | * Release the memory that was reserved during early boot to preserve the | |
1312 | * crash'ed kernel's memory contents except reserved dump area (permanent | |
1313 | * reservation) and reserved ranges used by F/W. The released memory will | |
1314 | * be available for general use. | |
1315 | */ | |
1316 | static void fadump_release_memory(u64 begin, u64 end) | |
1317 | { | |
1318 | u64 ra_start, ra_end, tstart; | |
1319 | int i, ret; | |
1320 | ||
b500afff MS |
1321 | ra_start = fw_dump.reserve_dump_area_start; |
1322 | ra_end = ra_start + fw_dump.reserve_dump_area_size; | |
1323 | ||
68fa6478 | 1324 | /* |
02c04e37 HB |
1325 | * If reserved ranges array limit is hit, overwrite the last reserved |
1326 | * memory range with reserved dump area to ensure it is excluded from | |
1327 | * the memory being released (reused for next FADump registration). | |
68fa6478 | 1328 | */ |
02c04e37 HB |
1329 | if (reserved_mrange_info.mem_range_cnt == |
1330 | reserved_mrange_info.max_mem_ranges) | |
1331 | reserved_mrange_info.mem_range_cnt--; | |
dda9dbfe | 1332 | |
02c04e37 HB |
1333 | ret = fadump_add_mem_range(&reserved_mrange_info, ra_start, ra_end); |
1334 | if (ret != 0) | |
dda9dbfe | 1335 | return; |
dda9dbfe HB |
1336 | |
1337 | /* Get the reserved ranges list in order first. */ | |
1338 | sort_and_merge_mem_ranges(&reserved_mrange_info); | |
1339 | ||
1340 | /* Exclude reserved ranges and release remaining memory */ | |
1341 | tstart = begin; | |
1342 | for (i = 0; i < reserved_mrange_info.mem_range_cnt; i++) { | |
1343 | ra_start = reserved_mrange_info.mem_ranges[i].base; | |
1344 | ra_end = ra_start + reserved_mrange_info.mem_ranges[i].size; | |
1345 | ||
1346 | if (tstart >= ra_end) | |
1347 | continue; | |
1348 | ||
1349 | if (tstart < ra_start) | |
1350 | fadump_release_reserved_area(tstart, ra_start); | |
1351 | tstart = ra_end; | |
1352 | } | |
1353 | ||
1354 | if (tstart < end) | |
1355 | fadump_release_reserved_area(tstart, end); | |
b500afff MS |
1356 | } |
1357 | ||
c6c5b14d SJ |
1358 | static void fadump_free_elfcorehdr_buf(void) |
1359 | { | |
1360 | if (fw_dump.elfcorehdr_addr == 0 || fw_dump.elfcorehdr_size == 0) | |
1361 | return; | |
1362 | ||
1363 | /* | |
1364 | * Before freeing the memory of `elfcorehdr`, reset the global | |
1365 | * `elfcorehdr_addr` to prevent modules like `vmcore` from accessing | |
1366 | * invalid memory. | |
1367 | */ | |
1368 | elfcorehdr_addr = ELFCORE_ADDR_ERR; | |
1369 | fadump_free_buffer(fw_dump.elfcorehdr_addr, fw_dump.elfcorehdr_size); | |
1370 | fw_dump.elfcorehdr_addr = 0; | |
1371 | fw_dump.elfcorehdr_size = 0; | |
1372 | } | |
1373 | ||
b500afff MS |
1374 | static void fadump_invalidate_release_mem(void) |
1375 | { | |
b500afff MS |
1376 | mutex_lock(&fadump_mutex); |
1377 | if (!fw_dump.dump_active) { | |
1378 | mutex_unlock(&fadump_mutex); | |
1379 | return; | |
1380 | } | |
1381 | ||
b500afff MS |
1382 | fadump_cleanup(); |
1383 | mutex_unlock(&fadump_mutex); | |
1384 | ||
c6c5b14d | 1385 | fadump_free_elfcorehdr_buf(); |
7dee93a9 | 1386 | fadump_release_memory(fw_dump.boot_mem_top, memblock_end_of_DRAM()); |
961cf26a HB |
1387 | fadump_free_cpu_notes_buf(); |
1388 | ||
a4e2e2ca HB |
1389 | /* |
1390 | * Setup kernel metadata and initialize the kernel dump | |
1391 | * memory structure for FADump re-registration. | |
1392 | */ | |
1393 | if (fw_dump.ops->fadump_setup_metadata && | |
1394 | (fw_dump.ops->fadump_setup_metadata(&fw_dump) < 0)) | |
1395 | pr_warn("Failed to setup kernel metadata!\n"); | |
41a65d16 | 1396 | fw_dump.ops->fadump_init_mem_struct(&fw_dump); |
b500afff MS |
1397 | } |
1398 | ||
d418b19f SJ |
1399 | static ssize_t release_mem_store(struct kobject *kobj, |
1400 | struct kobj_attribute *attr, | |
1401 | const char *buf, size_t count) | |
b500afff | 1402 | { |
dcdc4679 MS |
1403 | int input = -1; |
1404 | ||
b500afff MS |
1405 | if (!fw_dump.dump_active) |
1406 | return -EPERM; | |
1407 | ||
dcdc4679 MS |
1408 | if (kstrtoint(buf, 0, &input)) |
1409 | return -EINVAL; | |
1410 | ||
1411 | if (input == 1) { | |
b500afff MS |
1412 | /* |
1413 | * Take away the '/proc/vmcore'. We are releasing the dump | |
1414 | * memory, hence it will not be valid anymore. | |
1415 | */ | |
2685f826 | 1416 | #ifdef CONFIG_PROC_VMCORE |
b500afff | 1417 | vmcore_cleanup(); |
2685f826 | 1418 | #endif |
b500afff MS |
1419 | fadump_invalidate_release_mem(); |
1420 | ||
1421 | } else | |
1422 | return -EINVAL; | |
1423 | return count; | |
1424 | } | |
1425 | ||
d418b19f | 1426 | /* Release the reserved memory and disable the FADump */ |
d276960d | 1427 | static void __init unregister_fadump(void) |
d418b19f SJ |
1428 | { |
1429 | fadump_cleanup(); | |
1430 | fadump_release_memory(fw_dump.reserve_dump_area_start, | |
1431 | fw_dump.reserve_dump_area_size); | |
1432 | fw_dump.fadump_enabled = 0; | |
1433 | kobject_put(fadump_kobj); | |
1434 | } | |
1435 | ||
1436 | static ssize_t enabled_show(struct kobject *kobj, | |
1437 | struct kobj_attribute *attr, | |
1438 | char *buf) | |
3ccc00a7 MS |
1439 | { |
1440 | return sprintf(buf, "%d\n", fw_dump.fadump_enabled); | |
1441 | } | |
1442 | ||
bc446c5a SJ |
1443 | /* |
1444 | * /sys/kernel/fadump/hotplug_ready sysfs node returns 1, which inidcates | |
1445 | * to usersapce that fadump re-registration is not required on memory | |
1446 | * hotplug events. | |
1447 | */ | |
1448 | static ssize_t hotplug_ready_show(struct kobject *kobj, | |
1449 | struct kobj_attribute *attr, | |
1450 | char *buf) | |
1451 | { | |
1452 | return sprintf(buf, "%d\n", 1); | |
1453 | } | |
1454 | ||
d8e73458 SJ |
1455 | static ssize_t mem_reserved_show(struct kobject *kobj, |
1456 | struct kobj_attribute *attr, | |
1457 | char *buf) | |
1458 | { | |
1459 | return sprintf(buf, "%ld\n", fw_dump.reserve_dump_area_size); | |
1460 | } | |
1461 | ||
d418b19f SJ |
1462 | static ssize_t registered_show(struct kobject *kobj, |
1463 | struct kobj_attribute *attr, | |
1464 | char *buf) | |
3ccc00a7 MS |
1465 | { |
1466 | return sprintf(buf, "%d\n", fw_dump.dump_registered); | |
1467 | } | |
1468 | ||
683eab94 HB |
1469 | static ssize_t bootargs_append_show(struct kobject *kobj, |
1470 | struct kobj_attribute *attr, | |
1471 | char *buf) | |
1472 | { | |
1473 | return sprintf(buf, "%s\n", (char *)__va(fw_dump.param_area)); | |
1474 | } | |
1475 | ||
1476 | static ssize_t bootargs_append_store(struct kobject *kobj, | |
1477 | struct kobj_attribute *attr, | |
1478 | const char *buf, size_t count) | |
1479 | { | |
1480 | char *params; | |
1481 | ||
1482 | if (!fw_dump.fadump_enabled || fw_dump.dump_active) | |
1483 | return -EPERM; | |
1484 | ||
1485 | if (count >= COMMAND_LINE_SIZE) | |
1486 | return -EINVAL; | |
1487 | ||
1488 | /* | |
1489 | * Fail here instead of handling this scenario with | |
1490 | * some silly workaround in capture kernel. | |
1491 | */ | |
1492 | if (saved_command_line_len + count >= COMMAND_LINE_SIZE) { | |
1493 | pr_err("Appending parameters exceeds cmdline size!\n"); | |
1494 | return -ENOSPC; | |
1495 | } | |
1496 | ||
1497 | params = __va(fw_dump.param_area); | |
1498 | strscpy_pad(params, buf, COMMAND_LINE_SIZE); | |
1499 | /* Remove newline character at the end. */ | |
1500 | if (params[count-1] == '\n') | |
1501 | params[count-1] = '\0'; | |
1502 | ||
1503 | return count; | |
1504 | } | |
1505 | ||
d418b19f SJ |
1506 | static ssize_t registered_store(struct kobject *kobj, |
1507 | struct kobj_attribute *attr, | |
1508 | const char *buf, size_t count) | |
3ccc00a7 MS |
1509 | { |
1510 | int ret = 0; | |
dcdc4679 | 1511 | int input = -1; |
3ccc00a7 | 1512 | |
f3512011 | 1513 | if (!fw_dump.fadump_enabled || fw_dump.dump_active) |
3ccc00a7 MS |
1514 | return -EPERM; |
1515 | ||
dcdc4679 MS |
1516 | if (kstrtoint(buf, 0, &input)) |
1517 | return -EINVAL; | |
1518 | ||
3ccc00a7 MS |
1519 | mutex_lock(&fadump_mutex); |
1520 | ||
dcdc4679 MS |
1521 | switch (input) { |
1522 | case 0: | |
3ccc00a7 | 1523 | if (fw_dump.dump_registered == 0) { |
3ccc00a7 MS |
1524 | goto unlock_out; |
1525 | } | |
f3512011 | 1526 | |
3ccc00a7 | 1527 | /* Un-register Firmware-assisted dump */ |
41a65d16 HB |
1528 | pr_debug("Un-register firmware-assisted dump\n"); |
1529 | fw_dump.ops->fadump_unregister(&fw_dump); | |
3ccc00a7 | 1530 | break; |
dcdc4679 | 1531 | case 1: |
3ccc00a7 | 1532 | if (fw_dump.dump_registered == 1) { |
0823c68b | 1533 | /* Un-register Firmware-assisted dump */ |
41a65d16 | 1534 | fw_dump.ops->fadump_unregister(&fw_dump); |
3ccc00a7 MS |
1535 | } |
1536 | /* Register Firmware-assisted dump */ | |
98b8cd7f | 1537 | ret = register_fadump(); |
3ccc00a7 MS |
1538 | break; |
1539 | default: | |
1540 | ret = -EINVAL; | |
1541 | break; | |
1542 | } | |
1543 | ||
1544 | unlock_out: | |
1545 | mutex_unlock(&fadump_mutex); | |
1546 | return ret < 0 ? ret : count; | |
1547 | } | |
1548 | ||
1549 | static int fadump_region_show(struct seq_file *m, void *private) | |
1550 | { | |
3ccc00a7 MS |
1551 | if (!fw_dump.fadump_enabled) |
1552 | return 0; | |
1553 | ||
b500afff | 1554 | mutex_lock(&fadump_mutex); |
f3512011 HB |
1555 | fw_dump.ops->fadump_region_show(&fw_dump, m); |
1556 | mutex_unlock(&fadump_mutex); | |
3ccc00a7 MS |
1557 | return 0; |
1558 | } | |
1559 | ||
d418b19f SJ |
1560 | static struct kobj_attribute release_attr = __ATTR_WO(release_mem); |
1561 | static struct kobj_attribute enable_attr = __ATTR_RO(enabled); | |
1562 | static struct kobj_attribute register_attr = __ATTR_RW(registered); | |
d8e73458 | 1563 | static struct kobj_attribute mem_reserved_attr = __ATTR_RO(mem_reserved); |
bc446c5a | 1564 | static struct kobj_attribute hotplug_ready_attr = __ATTR_RO(hotplug_ready); |
683eab94 | 1565 | static struct kobj_attribute bootargs_append_attr = __ATTR_RW(bootargs_append); |
d418b19f SJ |
1566 | |
1567 | static struct attribute *fadump_attrs[] = { | |
1568 | &enable_attr.attr, | |
1569 | ®ister_attr.attr, | |
d8e73458 | 1570 | &mem_reserved_attr.attr, |
bc446c5a | 1571 | &hotplug_ready_attr.attr, |
d418b19f SJ |
1572 | NULL, |
1573 | }; | |
1574 | ||
1575 | ATTRIBUTE_GROUPS(fadump); | |
3ccc00a7 | 1576 | |
f6cee260 | 1577 | DEFINE_SHOW_ATTRIBUTE(fadump_region); |
3ccc00a7 | 1578 | |
d276960d | 1579 | static void __init fadump_init_files(void) |
3ccc00a7 | 1580 | { |
3ccc00a7 MS |
1581 | int rc = 0; |
1582 | ||
d418b19f SJ |
1583 | fadump_kobj = kobject_create_and_add("fadump", kernel_kobj); |
1584 | if (!fadump_kobj) { | |
1585 | pr_err("failed to create fadump kobject\n"); | |
1586 | return; | |
1587 | } | |
860286cf | 1588 | |
dbf77fed | 1589 | debugfs_create_file("fadump_region", 0444, arch_debugfs_dir, NULL, |
860286cf | 1590 | &fadump_region_fops); |
b500afff MS |
1591 | |
1592 | if (fw_dump.dump_active) { | |
d418b19f SJ |
1593 | rc = sysfs_create_file(fadump_kobj, &release_attr.attr); |
1594 | if (rc) | |
1595 | pr_err("unable to create release_mem sysfs file (%d)\n", | |
1596 | rc); | |
1597 | } | |
1598 | ||
1599 | rc = sysfs_create_groups(fadump_kobj, fadump_groups); | |
1600 | if (rc) { | |
1601 | pr_err("sysfs group creation failed (%d), unregistering FADump", | |
1602 | rc); | |
1603 | unregister_fadump(); | |
1604 | return; | |
1605 | } | |
1606 | ||
1607 | /* | |
1608 | * The FADump sysfs are moved from kernel_kobj to fadump_kobj need to | |
1609 | * create symlink at old location to maintain backward compatibility. | |
1610 | * | |
1611 | * - fadump_enabled -> fadump/enabled | |
1612 | * - fadump_registered -> fadump/registered | |
1613 | * - fadump_release_mem -> fadump/release_mem | |
1614 | */ | |
1615 | rc = compat_only_sysfs_link_entry_to_kobj(kernel_kobj, fadump_kobj, | |
1616 | "enabled", "fadump_enabled"); | |
1617 | if (rc) { | |
1618 | pr_err("unable to create fadump_enabled symlink (%d)", rc); | |
1619 | return; | |
1620 | } | |
1621 | ||
1622 | rc = compat_only_sysfs_link_entry_to_kobj(kernel_kobj, fadump_kobj, | |
1623 | "registered", | |
1624 | "fadump_registered"); | |
1625 | if (rc) { | |
1626 | pr_err("unable to create fadump_registered symlink (%d)", rc); | |
1627 | sysfs_remove_link(kernel_kobj, "fadump_enabled"); | |
1628 | return; | |
1629 | } | |
1630 | ||
1631 | if (fw_dump.dump_active) { | |
1632 | rc = compat_only_sysfs_link_entry_to_kobj(kernel_kobj, | |
1633 | fadump_kobj, | |
1634 | "release_mem", | |
1635 | "fadump_release_mem"); | |
b500afff | 1636 | if (rc) |
d418b19f SJ |
1637 | pr_err("unable to create fadump_release_mem symlink (%d)", |
1638 | rc); | |
b500afff | 1639 | } |
3ccc00a7 MS |
1640 | return; |
1641 | } | |
1642 | ||
c6c5b14d SJ |
1643 | static int __init fadump_setup_elfcorehdr_buf(void) |
1644 | { | |
1645 | int elf_phdr_cnt; | |
1646 | unsigned long elfcorehdr_size; | |
1647 | ||
1648 | /* | |
1649 | * Program header for CPU notes comes first, followed by one for | |
1650 | * vmcoreinfo, and the remaining program headers correspond to | |
1651 | * memory regions. | |
1652 | */ | |
1653 | elf_phdr_cnt = 2 + fw_dump.boot_mem_regs_cnt + memblock_num_regions(memory); | |
1654 | elfcorehdr_size = sizeof(struct elfhdr) + (elf_phdr_cnt * sizeof(struct elf_phdr)); | |
1655 | elfcorehdr_size = PAGE_ALIGN(elfcorehdr_size); | |
1656 | ||
1657 | fw_dump.elfcorehdr_addr = (u64)fadump_alloc_buffer(elfcorehdr_size); | |
1658 | if (!fw_dump.elfcorehdr_addr) { | |
1659 | pr_err("Failed to allocate %lu bytes for elfcorehdr\n", | |
1660 | elfcorehdr_size); | |
1661 | return -ENOMEM; | |
1662 | } | |
1663 | fw_dump.elfcorehdr_size = elfcorehdr_size; | |
1664 | return 0; | |
1665 | } | |
1666 | ||
1667 | /* | |
1668 | * Check if the fadump header of crashed kernel is compatible with fadump kernel. | |
1669 | * | |
1670 | * It checks the magic number, endianness, and size of non-primitive type | |
1671 | * members of fadump header to ensure safe dump collection. | |
1672 | */ | |
1673 | static bool __init is_fadump_header_compatible(struct fadump_crash_info_header *fdh) | |
1674 | { | |
1675 | if (fdh->magic_number == FADUMP_CRASH_INFO_MAGIC_OLD) { | |
1676 | pr_err("Old magic number, can't process the dump.\n"); | |
1677 | return false; | |
1678 | } | |
1679 | ||
1680 | if (fdh->magic_number != FADUMP_CRASH_INFO_MAGIC) { | |
1681 | if (fdh->magic_number == swab64(FADUMP_CRASH_INFO_MAGIC)) | |
1682 | pr_err("Endianness mismatch between the crashed and fadump kernels.\n"); | |
1683 | else | |
1684 | pr_err("Fadump header is corrupted.\n"); | |
1685 | ||
1686 | return false; | |
1687 | } | |
1688 | ||
1689 | /* | |
1690 | * Dump collection is not safe if the size of non-primitive type members | |
1691 | * of the fadump header do not match between crashed and fadump kernel. | |
1692 | */ | |
1693 | if (fdh->pt_regs_sz != sizeof(struct pt_regs) || | |
1694 | fdh->cpu_mask_sz != sizeof(struct cpumask)) { | |
1695 | pr_err("Fadump header size mismatch.\n"); | |
1696 | return false; | |
1697 | } | |
1698 | ||
1699 | return true; | |
1700 | } | |
1701 | ||
1702 | static void __init fadump_process(void) | |
1703 | { | |
1704 | struct fadump_crash_info_header *fdh; | |
1705 | ||
1706 | fdh = (struct fadump_crash_info_header *) __va(fw_dump.fadumphdr_addr); | |
1707 | if (!fdh) { | |
1708 | pr_err("Crash info header is empty.\n"); | |
1709 | goto err_out; | |
1710 | } | |
1711 | ||
1712 | /* Avoid processing the dump if fadump header isn't compatible */ | |
1713 | if (!is_fadump_header_compatible(fdh)) | |
1714 | goto err_out; | |
1715 | ||
1716 | /* Allocate buffer for elfcorehdr */ | |
1717 | if (fadump_setup_elfcorehdr_buf()) | |
1718 | goto err_out; | |
1719 | ||
1720 | fadump_populate_elfcorehdr(fdh); | |
1721 | ||
1722 | /* Let platform update the CPU notes in elfcorehdr */ | |
1723 | if (fw_dump.ops->fadump_process(&fw_dump) < 0) | |
1724 | goto err_out; | |
1725 | ||
1726 | /* | |
1727 | * elfcorehdr is now ready to be exported. | |
1728 | * | |
1729 | * set elfcorehdr_addr so that vmcore module will export the | |
1730 | * elfcorehdr through '/proc/vmcore'. | |
1731 | */ | |
1732 | elfcorehdr_addr = virt_to_phys((void *)fw_dump.elfcorehdr_addr); | |
1733 | return; | |
1734 | ||
1735 | err_out: | |
1736 | fadump_invalidate_release_mem(); | |
1737 | } | |
1738 | ||
683eab94 HB |
1739 | /* |
1740 | * Reserve memory to store additional parameters to be passed | |
1741 | * for fadump/capture kernel. | |
1742 | */ | |
1743 | static void fadump_setup_param_area(void) | |
1744 | { | |
1745 | phys_addr_t range_start, range_end; | |
1746 | ||
1747 | if (!fw_dump.param_area_supported || fw_dump.dump_active) | |
1748 | return; | |
1749 | ||
1750 | /* This memory can't be used by PFW or bootloader as it is shared across kernels */ | |
1751 | if (radix_enabled()) { | |
1752 | /* | |
1753 | * Anywhere in the upper half should be good enough as all memory | |
1754 | * is accessible in real mode. | |
1755 | */ | |
1756 | range_start = memblock_end_of_DRAM() / 2; | |
1757 | range_end = memblock_end_of_DRAM(); | |
1758 | } else { | |
1759 | /* | |
1760 | * Passing additional parameters is supported for hash MMU only | |
1761 | * if the first memory block size is 768MB or higher. | |
1762 | */ | |
1763 | if (ppc64_rma_size < 0x30000000) | |
1764 | return; | |
1765 | ||
1766 | /* | |
1767 | * 640 MB to 768 MB is not used by PFW/bootloader. So, try reserving | |
1768 | * memory for passing additional parameters in this range to avoid | |
1769 | * being stomped on by PFW/bootloader. | |
1770 | */ | |
1771 | range_start = 0x2A000000; | |
1772 | range_end = range_start + 0x4000000; | |
1773 | } | |
1774 | ||
1775 | fw_dump.param_area = memblock_phys_alloc_range(COMMAND_LINE_SIZE, | |
1776 | COMMAND_LINE_SIZE, | |
1777 | range_start, | |
1778 | range_end); | |
1779 | if (!fw_dump.param_area || sysfs_create_file(fadump_kobj, &bootargs_append_attr.attr)) { | |
1780 | pr_warn("WARNING: Could not setup area to pass additional parameters!\n"); | |
1781 | return; | |
1782 | } | |
1783 | ||
1784 | memset(phys_to_virt(fw_dump.param_area), 0, COMMAND_LINE_SIZE); | |
1785 | } | |
1786 | ||
3ccc00a7 MS |
1787 | /* |
1788 | * Prepare for firmware-assisted dump. | |
1789 | */ | |
1790 | int __init setup_fadump(void) | |
1791 | { | |
565f9bc0 | 1792 | if (!fw_dump.fadump_supported) |
3ccc00a7 | 1793 | return 0; |
3ccc00a7 | 1794 | |
565f9bc0 | 1795 | fadump_init_files(); |
3ccc00a7 | 1796 | fadump_show_config(); |
565f9bc0 MS |
1797 | |
1798 | if (!fw_dump.fadump_enabled) | |
1799 | return 1; | |
1800 | ||
2df173d9 MS |
1801 | /* |
1802 | * If dump data is available then see if it is valid and prepare for | |
1803 | * saving it to the disk. | |
1804 | */ | |
b500afff | 1805 | if (fw_dump.dump_active) { |
c6c5b14d | 1806 | fadump_process(); |
b500afff | 1807 | } |
607451ce HB |
1808 | /* Initialize the kernel dump memory structure and register with f/w */ |
1809 | else if (fw_dump.reserve_dump_area_size) { | |
683eab94 | 1810 | fadump_setup_param_area(); |
41a65d16 | 1811 | fw_dump.ops->fadump_init_mem_struct(&fw_dump); |
607451ce HB |
1812 | register_fadump(); |
1813 | } | |
f3512011 | 1814 | |
06e629c2 HB |
1815 | /* |
1816 | * In case of panic, fadump is triggered via ppc_panic_event() | |
1817 | * panic notifier. Setting crash_kexec_post_notifiers to 'true' | |
1818 | * lets panic() function take crash friendly path before panic | |
1819 | * notifiers are invoked. | |
1820 | */ | |
1821 | crash_kexec_post_notifiers = true; | |
1822 | ||
3ccc00a7 MS |
1823 | return 1; |
1824 | } | |
607451ce HB |
1825 | /* |
1826 | * Use subsys_initcall_sync() here because there is dependency with | |
1fd02f66 JL |
1827 | * crash_save_vmcoreinfo_init(), which must run first to ensure vmcoreinfo initialization |
1828 | * is done before registering with f/w. | |
607451ce HB |
1829 | */ |
1830 | subsys_initcall_sync(setup_fadump); | |
bec53196 HB |
1831 | #else /* !CONFIG_PRESERVE_FA_DUMP */ |
1832 | ||
1833 | /* Scan the Firmware Assisted dump configuration details. */ | |
1834 | int __init early_init_dt_scan_fw_dump(unsigned long node, const char *uname, | |
1835 | int depth, void *data) | |
1836 | { | |
1837 | if ((depth != 1) || (strcmp(uname, "ibm,opal") != 0)) | |
1838 | return 0; | |
1839 | ||
1840 | opal_fadump_dt_scan(&fw_dump, node); | |
1841 | return 1; | |
1842 | } | |
1843 | ||
1844 | /* | |
1845 | * When dump is active but PRESERVE_FA_DUMP is enabled on the kernel, | |
1846 | * preserve crash data. The subsequent memory preserving kernel boot | |
1847 | * is likely to process this crash data. | |
1848 | */ | |
1849 | int __init fadump_reserve_mem(void) | |
1850 | { | |
1851 | if (fw_dump.dump_active) { | |
1852 | /* | |
1853 | * If last boot has crashed then reserve all the memory | |
1854 | * above boot memory to preserve crash data. | |
1855 | */ | |
1856 | pr_info("Preserving crash data for processing in next boot.\n"); | |
1857 | fadump_reserve_crash_area(fw_dump.boot_mem_top); | |
1858 | } else | |
1859 | pr_debug("FADump-aware kernel..\n"); | |
1860 | ||
1861 | return 1; | |
1862 | } | |
1863 | #endif /* CONFIG_PRESERVE_FA_DUMP */ | |
b2a815a5 HB |
1864 | |
1865 | /* Preserve everything above the base address */ | |
1866 | static void __init fadump_reserve_crash_area(u64 base) | |
1867 | { | |
b10d6bca | 1868 | u64 i, mstart, mend, msize; |
b2a815a5 | 1869 | |
b10d6bca MR |
1870 | for_each_mem_range(i, &mstart, &mend) { |
1871 | msize = mend - mstart; | |
b2a815a5 HB |
1872 | |
1873 | if ((mstart + msize) < base) | |
1874 | continue; | |
1875 | ||
1876 | if (mstart < base) { | |
1877 | msize -= (base - mstart); | |
1878 | mstart = base; | |
1879 | } | |
1880 | ||
1881 | pr_info("Reserving %lluMB of memory at %#016llx for preserving crash data", | |
1882 | (msize >> 20), mstart); | |
1883 | memblock_reserve(mstart, msize); | |
1884 | } | |
1885 | } | |
bec53196 HB |
1886 | |
1887 | unsigned long __init arch_reserved_kernel_pages(void) | |
1888 | { | |
1889 | return memblock_reserved_size() / PAGE_SIZE; | |
1890 | } |