powerpc/fadump: setup additional parameters for dump capture kernel
[linux-2.6-block.git] / arch / powerpc / kernel / fadump.c
CommitLineData
1a59d1b8 1// SPDX-License-Identifier: GPL-2.0-or-later
eb39c880
MS
2/*
3 * Firmware Assisted dump: A robust mechanism to get reliable kernel crash
4 * dump with assistance from firmware. This approach does not use kexec,
5 * instead firmware assists in booting the kdump kernel while preserving
6 * memory contents. The most of the code implementation has been adapted
7 * from phyp assisted dump implementation written by Linas Vepstas and
8 * Manish Ahuja
9 *
eb39c880
MS
10 * Copyright 2011 IBM Corporation
11 * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
12 */
13
14#undef DEBUG
15#define pr_fmt(fmt) "fadump: " fmt
16
17#include <linux/string.h>
18#include <linux/memblock.h>
3ccc00a7 19#include <linux/delay.h>
3ccc00a7 20#include <linux/seq_file.h>
2df173d9 21#include <linux/crash_dump.h>
b500afff
MS
22#include <linux/kobject.h>
23#include <linux/sysfs.h>
a5818313 24#include <linux/slab.h>
a4e92ce8 25#include <linux/cma.h>
45d0ba52 26#include <linux/hugetlb.h>
dbf77fed 27#include <linux/debugfs.h>
e6f6390a
CL
28#include <linux/of.h>
29#include <linux/of_fdt.h>
eb39c880
MS
30
31#include <asm/page.h>
eb39c880 32#include <asm/fadump.h>
ca986d7f 33#include <asm/fadump-internal.h>
cad3c834 34#include <asm/setup.h>
cbd3d5ba 35#include <asm/interrupt.h>
eb39c880 36
ba608c4f
SJ
37/*
38 * The CPU who acquired the lock to trigger the fadump crash should
39 * wait for other CPUs to enter.
40 *
41 * The timeout is in milliseconds.
42 */
43#define CRASH_TIMEOUT 500
44
eb39c880 45static struct fw_dump fw_dump;
3ccc00a7 46
b2a815a5
HB
47static void __init fadump_reserve_crash_area(u64 base);
48
bec53196 49#ifndef CONFIG_PRESERVE_FA_DUMP
5f987cae 50
2e341f56
ME
51static struct kobject *fadump_kobj;
52
5f987cae 53static atomic_t cpus_in_fadump;
3ccc00a7 54static DEFINE_MUTEX(fadump_mutex);
5f987cae 55
02c04e37
HB
56#define RESERVED_RNGS_SZ 16384 /* 16K - 128 entries */
57#define RESERVED_RNGS_CNT (RESERVED_RNGS_SZ / \
58 sizeof(struct fadump_memory_range))
59static struct fadump_memory_range rngs[RESERVED_RNGS_CNT];
2e341f56
ME
60static struct fadump_mrange_info
61reserved_mrange_info = { "reserved", rngs, RESERVED_RNGS_SZ, 0, RESERVED_RNGS_CNT, true };
02c04e37
HB
62
63static void __init early_init_dt_scan_reserved_ranges(unsigned long node);
eb39c880 64
a4e92ce8 65#ifdef CONFIG_CMA
0226e552
HB
66static struct cma *fadump_cma;
67
a4e92ce8
MS
68/*
69 * fadump_cma_init() - Initialize CMA area from a fadump reserved memory
70 *
71 * This function initializes CMA area from fadump reserved memory.
72 * The total size of fadump reserved memory covers for boot memory size
73 * + cpu data size + hpte size and metadata.
74 * Initialize only the area equivalent to boot memory size for CMA use.
887f56a0
RD
75 * The remaining portion of fadump reserved memory will be not given
76 * to CMA and pages for those will stay reserved. boot memory size is
a4e92ce8
MS
77 * aligned per CMA requirement to satisy cma_init_reserved_mem() call.
78 * But for some reason even if it fails we still have the memory reservation
79 * with us and we can still continue doing fadump.
80 */
2e341f56 81static int __init fadump_cma_init(void)
a4e92ce8
MS
82{
83 unsigned long long base, size;
84 int rc;
85
86 if (!fw_dump.fadump_enabled)
87 return 0;
88
89 /*
90 * Do not use CMA if user has provided fadump=nocma kernel parameter.
91 * Return 1 to continue with fadump old behaviour.
92 */
93 if (fw_dump.nocma)
94 return 1;
95
96 base = fw_dump.reserve_dump_area_start;
97 size = fw_dump.boot_memory_size;
98
99 if (!size)
100 return 0;
101
102 rc = cma_init_reserved_mem(base, size, 0, "fadump_cma", &fadump_cma);
103 if (rc) {
104 pr_err("Failed to init cma area for firmware-assisted dump,%d\n", rc);
105 /*
106 * Though the CMA init has failed we still have memory
107 * reservation with us. The reserved memory will be
108 * blocked from production system usage. Hence return 1,
109 * so that we can continue with fadump.
110 */
111 return 1;
112 }
113
ee97347f
HB
114 /*
115 * If CMA activation fails, keep the pages reserved, instead of
116 * exposing them to buddy allocator. Same as 'fadump=nocma' case.
117 */
118 cma_reserve_pages_on_error(fadump_cma);
119
a4e92ce8
MS
120 /*
121 * So we now have successfully initialized cma area for fadump.
122 */
123 pr_info("Initialized 0x%lx bytes cma area at %ldMB from 0x%lx "
124 "bytes of memory reserved for firmware-assisted dump\n",
125 cma_get_size(fadump_cma),
126 (unsigned long)cma_get_base(fadump_cma) >> 20,
127 fw_dump.reserve_dump_area_size);
128 return 1;
129}
130#else
131static int __init fadump_cma_init(void) { return 1; }
132#endif /* CONFIG_CMA */
133
eb39c880 134/* Scan the Firmware Assisted dump configuration details. */
f3512011
HB
135int __init early_init_dt_scan_fw_dump(unsigned long node, const char *uname,
136 int depth, void *data)
eb39c880 137{
02c04e37
HB
138 if (depth == 0) {
139 early_init_dt_scan_reserved_ranges(node);
140 return 0;
141 }
142
41df5928 143 if (depth != 1)
eb39c880
MS
144 return 0;
145
41df5928
HB
146 if (strcmp(uname, "rtas") == 0) {
147 rtas_fadump_dt_scan(&fw_dump, node);
148 return 1;
149 }
150
151 if (strcmp(uname, "ibm,opal") == 0) {
152 opal_fadump_dt_scan(&fw_dump, node);
153 return 1;
154 }
155
156 return 0;
eb39c880
MS
157}
158
eae0dfcc
HB
159/*
160 * If fadump is registered, check if the memory provided
0db6896f 161 * falls within boot memory area and reserved memory area.
eae0dfcc 162 */
becd91d9 163int is_fadump_memory_area(u64 addr, unsigned long size)
eae0dfcc 164{
becd91d9 165 u64 d_start, d_end;
0db6896f 166
eae0dfcc
HB
167 if (!fw_dump.dump_registered)
168 return 0;
169
becd91d9
HB
170 if (!size)
171 return 0;
172
173 d_start = fw_dump.reserve_dump_area_start;
174 d_end = d_start + fw_dump.reserve_dump_area_size;
0db6896f
MS
175 if (((addr + size) > d_start) && (addr <= d_end))
176 return 1;
177
7dee93a9 178 return (addr <= fw_dump.boot_mem_top);
eae0dfcc
HB
179}
180
6fcd6baa
NP
181int should_fadump_crash(void)
182{
183 if (!fw_dump.dump_registered || !fw_dump.fadumphdr_addr)
184 return 0;
185 return 1;
186}
187
3ccc00a7
MS
188int is_fadump_active(void)
189{
190 return fw_dump.dump_active;
191}
192
a5a05b91 193/*
961cf26a
HB
194 * Returns true, if there are no holes in memory area between d_start to d_end,
195 * false otherwise.
a5a05b91 196 */
961cf26a 197static bool is_fadump_mem_area_contiguous(u64 d_start, u64 d_end)
a5a05b91 198{
b10d6bca 199 phys_addr_t reg_start, reg_end;
961cf26a 200 bool ret = false;
b10d6bca 201 u64 i, start, end;
a5a05b91 202
b10d6bca
MR
203 for_each_mem_range(i, &reg_start, &reg_end) {
204 start = max_t(u64, d_start, reg_start);
205 end = min_t(u64, d_end, reg_end);
961cf26a
HB
206 if (d_start < end) {
207 /* Memory hole from d_start to start */
208 if (start > d_start)
a5a05b91
HB
209 break;
210
961cf26a
HB
211 if (end == d_end) {
212 ret = true;
a5a05b91
HB
213 break;
214 }
215
961cf26a 216 d_start = end + 1;
a5a05b91
HB
217 }
218 }
219
220 return ret;
221}
222
f86593be
MS
223/*
224 * Returns true, if there are no holes in reserved memory area,
225 * false otherwise.
226 */
7f0ad11d 227bool is_fadump_reserved_mem_contiguous(void)
f86593be 228{
961cf26a 229 u64 d_start, d_end;
f86593be 230
961cf26a
HB
231 d_start = fw_dump.reserve_dump_area_start;
232 d_end = d_start + fw_dump.reserve_dump_area_size;
233 return is_fadump_mem_area_contiguous(d_start, d_end);
f86593be
MS
234}
235
3ccc00a7 236/* Print firmware assisted dump configurations for debugging purpose. */
d276960d 237static void __init fadump_show_config(void)
3ccc00a7 238{
7dee93a9
HB
239 int i;
240
3ccc00a7
MS
241 pr_debug("Support for firmware-assisted dump (fadump): %s\n",
242 (fw_dump.fadump_supported ? "present" : "no support"));
243
244 if (!fw_dump.fadump_supported)
245 return;
246
247 pr_debug("Fadump enabled : %s\n",
248 (fw_dump.fadump_enabled ? "yes" : "no"));
249 pr_debug("Dump Active : %s\n",
250 (fw_dump.dump_active ? "yes" : "no"));
251 pr_debug("Dump section sizes:\n");
252 pr_debug(" CPU state data size: %lx\n", fw_dump.cpu_state_data_size);
253 pr_debug(" HPTE region size : %lx\n", fw_dump.hpte_region_size);
7dee93a9
HB
254 pr_debug(" Boot memory size : %lx\n", fw_dump.boot_memory_size);
255 pr_debug(" Boot memory top : %llx\n", fw_dump.boot_mem_top);
256 pr_debug("Boot memory regions cnt: %llx\n", fw_dump.boot_mem_regs_cnt);
257 for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) {
258 pr_debug("[%03d] base = %llx, size = %llx\n", i,
259 fw_dump.boot_mem_addr[i], fw_dump.boot_mem_sz[i]);
260 }
3ccc00a7
MS
261}
262
eb39c880
MS
263/**
264 * fadump_calculate_reserve_size(): reserve variable boot area 5% of System RAM
265 *
266 * Function to find the largest memory size we need to reserve during early
267 * boot process. This will be the size of the memory that is required for a
268 * kernel to boot successfully.
269 *
270 * This function has been taken from phyp-assisted dump feature implementation.
271 *
272 * returns larger of 256MB or 5% rounded down to multiples of 256MB.
273 *
274 * TODO: Come up with better approach to find out more accurate memory size
275 * that is required for a kernel to boot successfully.
276 *
277 */
fbced154 278static __init u64 fadump_calculate_reserve_size(void)
eb39c880 279{
7b1b3b48 280 u64 base, size, bootmem_min;
11550dc0 281 int ret;
eb39c880 282
81d9eca5
HB
283 if (fw_dump.reserve_bootvar)
284 pr_warn("'fadump_reserve_mem=' parameter is deprecated in favor of 'crashkernel=' parameter.\n");
285
eb39c880 286 /*
11550dc0 287 * Check if the size is specified through crashkernel= cmdline
e7467dc6
HB
288 * option. If yes, then use that but ignore base as fadump reserves
289 * memory at a predefined offset.
eb39c880 290 */
11550dc0 291 ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
a9e1a3d8 292 &size, &base, NULL, NULL);
11550dc0 293 if (ret == 0 && size > 0) {
48a316e3
HB
294 unsigned long max_size;
295
81d9eca5
HB
296 if (fw_dump.reserve_bootvar)
297 pr_info("Using 'crashkernel=' parameter for memory reservation.\n");
298
11550dc0 299 fw_dump.reserve_bootvar = (unsigned long)size;
48a316e3
HB
300
301 /*
302 * Adjust if the boot memory size specified is above
303 * the upper limit.
304 */
305 max_size = memblock_phys_mem_size() / MAX_BOOT_MEM_RATIO;
306 if (fw_dump.reserve_bootvar > max_size) {
307 fw_dump.reserve_bootvar = max_size;
308 pr_info("Adjusted boot memory size to %luMB\n",
309 (fw_dump.reserve_bootvar >> 20));
310 }
311
eb39c880 312 return fw_dump.reserve_bootvar;
81d9eca5
HB
313 } else if (fw_dump.reserve_bootvar) {
314 /*
315 * 'fadump_reserve_mem=' is being used to reserve memory
316 * for firmware-assisted dump.
317 */
318 return fw_dump.reserve_bootvar;
11550dc0 319 }
eb39c880
MS
320
321 /* divide by 20 to get 5% of value */
48a316e3 322 size = memblock_phys_mem_size() / 20;
eb39c880
MS
323
324 /* round it down in multiples of 256 */
325 size = size & ~0x0FFFFFFFUL;
326
327 /* Truncate to memory_limit. We don't want to over reserve the memory.*/
328 if (memory_limit && size > memory_limit)
329 size = memory_limit;
330
7b1b3b48
HB
331 bootmem_min = fw_dump.ops->fadump_get_bootmem_min();
332 return (size > bootmem_min ? size : bootmem_min);
eb39c880
MS
333}
334
335/*
336 * Calculate the total memory size required to be reserved for
337 * firmware-assisted dump registration.
338 */
d276960d 339static unsigned long __init get_fadump_area_size(void)
eb39c880
MS
340{
341 unsigned long size = 0;
342
343 size += fw_dump.cpu_state_data_size;
344 size += fw_dump.hpte_region_size;
9cf3b3a3
HB
345 /*
346 * Account for pagesize alignment of boot memory area destination address.
347 * This faciliates in mmap reading of first kernel's memory.
348 */
349 size = PAGE_ALIGN(size);
eb39c880 350 size += fw_dump.boot_memory_size;
2df173d9 351 size += sizeof(struct fadump_crash_info_header);
742a265a
HB
352
353 /* This is to hold kernel metadata on platforms that support it */
354 size += (fw_dump.ops->fadump_get_metadata_size ?
355 fw_dump.ops->fadump_get_metadata_size() : 0);
eb39c880
MS
356 return size;
357}
358
7dee93a9
HB
359static int __init add_boot_mem_region(unsigned long rstart,
360 unsigned long rsize)
361{
78d5cc15 362 int max_boot_mem_rgns = fw_dump.ops->fadump_max_boot_mem_rgns();
7dee93a9
HB
363 int i = fw_dump.boot_mem_regs_cnt++;
364
78d5cc15
HB
365 if (fw_dump.boot_mem_regs_cnt > max_boot_mem_rgns) {
366 fw_dump.boot_mem_regs_cnt = max_boot_mem_rgns;
7dee93a9
HB
367 return 0;
368 }
369
370 pr_debug("Added boot memory range[%d] [%#016lx-%#016lx)\n",
371 i, rstart, (rstart + rsize));
372 fw_dump.boot_mem_addr[i] = rstart;
373 fw_dump.boot_mem_sz[i] = rsize;
374 return 1;
375}
376
377/*
378 * Firmware usually has a hard limit on the data it can copy per region.
379 * Honour that by splitting a memory range into multiple regions.
380 */
381static int __init add_boot_mem_regions(unsigned long mstart,
382 unsigned long msize)
383{
384 unsigned long rstart, rsize, max_size;
385 int ret = 1;
386
387 rstart = mstart;
388 max_size = fw_dump.max_copy_size ? fw_dump.max_copy_size : msize;
389 while (msize) {
390 if (msize > max_size)
391 rsize = max_size;
392 else
393 rsize = msize;
394
395 ret = add_boot_mem_region(rstart, rsize);
396 if (!ret)
397 break;
398
399 msize -= rsize;
400 rstart += rsize;
401 }
402
403 return ret;
404}
405
406static int __init fadump_get_boot_mem_regions(void)
407{
b10d6bca 408 unsigned long size, cur_size, hole_size, last_end;
7dee93a9 409 unsigned long mem_size = fw_dump.boot_memory_size;
b10d6bca 410 phys_addr_t reg_start, reg_end;
7dee93a9 411 int ret = 1;
b10d6bca 412 u64 i;
7dee93a9
HB
413
414 fw_dump.boot_mem_regs_cnt = 0;
415
416 last_end = 0;
417 hole_size = 0;
418 cur_size = 0;
b10d6bca
MR
419 for_each_mem_range(i, &reg_start, &reg_end) {
420 size = reg_end - reg_start;
421 hole_size += (reg_start - last_end);
7dee93a9
HB
422
423 if ((cur_size + size) >= mem_size) {
424 size = (mem_size - cur_size);
b10d6bca 425 ret = add_boot_mem_regions(reg_start, size);
7dee93a9
HB
426 break;
427 }
428
429 mem_size -= size;
430 cur_size += size;
b10d6bca 431 ret = add_boot_mem_regions(reg_start, size);
7dee93a9
HB
432 if (!ret)
433 break;
434
b10d6bca 435 last_end = reg_end;
7dee93a9
HB
436 }
437 fw_dump.boot_mem_top = PAGE_ALIGN(fw_dump.boot_memory_size + hole_size);
438
439 return ret;
440}
441
140777a3
HB
442/*
443 * Returns true, if the given range overlaps with reserved memory ranges
444 * starting at idx. Also, updates idx to index of overlapping memory range
445 * with the given memory range.
446 * False, otherwise.
447 */
d276960d 448static bool __init overlaps_reserved_ranges(u64 base, u64 end, int *idx)
140777a3
HB
449{
450 bool ret = false;
451 int i;
452
453 for (i = *idx; i < reserved_mrange_info.mem_range_cnt; i++) {
454 u64 rbase = reserved_mrange_info.mem_ranges[i].base;
455 u64 rend = rbase + reserved_mrange_info.mem_ranges[i].size;
456
457 if (end <= rbase)
458 break;
459
460 if ((end > rbase) && (base < rend)) {
461 *idx = i;
462 ret = true;
463 break;
464 }
465 }
466
467 return ret;
468}
469
470/*
471 * Locate a suitable memory area to reserve memory for FADump. While at it,
472 * lookup reserved-ranges & avoid overlap with them, as they are used by F/W.
473 */
474static u64 __init fadump_locate_reserve_mem(u64 base, u64 size)
475{
476 struct fadump_memory_range *mrngs;
477 phys_addr_t mstart, mend;
478 int idx = 0;
479 u64 i, ret = 0;
480
481 mrngs = reserved_mrange_info.mem_ranges;
482 for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE,
483 &mstart, &mend, NULL) {
484 pr_debug("%llu) mstart: %llx, mend: %llx, base: %llx\n",
485 i, mstart, mend, base);
486
487 if (mstart > base)
488 base = PAGE_ALIGN(mstart);
489
490 while ((mend > base) && ((mend - base) >= size)) {
491 if (!overlaps_reserved_ranges(base, base+size, &idx)) {
492 ret = base;
493 goto out;
494 }
495
496 base = mrngs[idx].base + mrngs[idx].size;
497 base = PAGE_ALIGN(base);
498 }
499 }
500
501out:
502 return ret;
503}
504
eb39c880
MS
505int __init fadump_reserve_mem(void)
506{
140777a3 507 u64 base, size, mem_boundary, bootmem_min;
6abec12c 508 int ret = 1;
eb39c880
MS
509
510 if (!fw_dump.fadump_enabled)
511 return 0;
512
513 if (!fw_dump.fadump_supported) {
6abec12c
HB
514 pr_info("Firmware-Assisted Dump is not supported on this hardware\n");
515 goto error_out;
eb39c880 516 }
742a265a 517
3ccc00a7
MS
518 /*
519 * Initialize boot memory size
520 * If dump is active then we have already calculated the size during
521 * first kernel.
522 */
f3512011 523 if (!fw_dump.dump_active) {
6abec12c
HB
524 fw_dump.boot_memory_size =
525 PAGE_ALIGN(fadump_calculate_reserve_size());
a4e92ce8 526#ifdef CONFIG_CMA
579ca1a2 527 if (!fw_dump.nocma) {
a4e92ce8 528 fw_dump.boot_memory_size =
140777a3 529 ALIGN(fw_dump.boot_memory_size,
e16faf26 530 CMA_MIN_ALIGNMENT_BYTES);
579ca1a2 531 }
a4e92ce8 532#endif
7b1b3b48
HB
533
534 bootmem_min = fw_dump.ops->fadump_get_bootmem_min();
535 if (fw_dump.boot_memory_size < bootmem_min) {
536 pr_err("Can't enable fadump with boot memory size (0x%lx) less than 0x%llx\n",
537 fw_dump.boot_memory_size, bootmem_min);
538 goto error_out;
539 }
7dee93a9
HB
540
541 if (!fadump_get_boot_mem_regions()) {
542 pr_err("Too many holes in boot memory area to enable fadump\n");
543 goto error_out;
544 }
a4e92ce8 545 }
eb39c880 546
eb39c880 547 if (memory_limit)
6abec12c 548 mem_boundary = memory_limit;
eb39c880 549 else
6abec12c 550 mem_boundary = memblock_end_of_DRAM();
eb39c880 551
7dee93a9 552 base = fw_dump.boot_mem_top;
8255da95
HB
553 size = get_fadump_area_size();
554 fw_dump.reserve_dump_area_size = size;
eb39c880 555 if (fw_dump.dump_active) {
b71a693d
MS
556 pr_info("Firmware-assisted dump is active.\n");
557
85975387
HB
558#ifdef CONFIG_HUGETLB_PAGE
559 /*
560 * FADump capture kernel doesn't care much about hugepages.
561 * In fact, handling hugepages in capture kernel is asking for
562 * trouble. So, disable HugeTLB support when fadump is active.
563 */
564 hugetlb_disabled = true;
565#endif
eb39c880
MS
566 /*
567 * If last boot has crashed then reserve all the memory
b2a815a5 568 * above boot memory size so that we don't touch it until
eb39c880 569 * dump is written to disk by userspace tool. This memory
b2a815a5 570 * can be released for general use by invalidating fadump.
eb39c880 571 */
b2a815a5 572 fadump_reserve_crash_area(base);
2df173d9 573
f3512011
HB
574 pr_debug("fadumphdr_addr = %#016lx\n", fw_dump.fadumphdr_addr);
575 pr_debug("Reserve dump area start address: 0x%lx\n",
576 fw_dump.reserve_dump_area_start);
8255da95 577 } else {
f6e6bedb
HB
578 /*
579 * Reserve memory at an offset closer to bottom of the RAM to
579ca1a2 580 * minimize the impact of memory hot-remove operation.
f6e6bedb 581 */
140777a3 582 base = fadump_locate_reserve_mem(base, size);
6abec12c 583
9a2921e5 584 if (!base || (base + size > mem_boundary)) {
742a265a
HB
585 pr_err("Failed to find memory chunk for reservation!\n");
586 goto error_out;
587 }
588 fw_dump.reserve_dump_area_start = base;
589
590 /*
591 * Calculate the kernel metadata address and register it with
592 * f/w if the platform supports.
593 */
594 if (fw_dump.ops->fadump_setup_metadata &&
595 (fw_dump.ops->fadump_setup_metadata(&fw_dump) < 0))
596 goto error_out;
597
598 if (memblock_reserve(base, size)) {
6abec12c
HB
599 pr_err("Failed to reserve memory!\n");
600 goto error_out;
f6e6bedb
HB
601 }
602
6abec12c
HB
603 pr_info("Reserved %lldMB of memory at %#016llx (System RAM: %lldMB)\n",
604 (size >> 20), base, (memblock_phys_mem_size() >> 20));
f6e6bedb 605
6abec12c 606 ret = fadump_cma_init();
a4e92ce8 607 }
6abec12c
HB
608
609 return ret;
610error_out:
611 fw_dump.fadump_enabled = 0;
d1eb75e0 612 fw_dump.reserve_dump_area_size = 0;
6abec12c 613 return 0;
eb39c880
MS
614}
615
616/* Look for fadump= cmdline option. */
617static int __init early_fadump_param(char *p)
618{
619 if (!p)
620 return 1;
621
622 if (strncmp(p, "on", 2) == 0)
623 fw_dump.fadump_enabled = 1;
624 else if (strncmp(p, "off", 3) == 0)
625 fw_dump.fadump_enabled = 0;
a4e92ce8
MS
626 else if (strncmp(p, "nocma", 5) == 0) {
627 fw_dump.fadump_enabled = 1;
628 fw_dump.nocma = 1;
629 }
eb39c880
MS
630
631 return 0;
632}
633early_param("fadump", early_fadump_param);
634
81d9eca5
HB
635/*
636 * Look for fadump_reserve_mem= cmdline option
637 * TODO: Remove references to 'fadump_reserve_mem=' parameter,
638 * the sooner 'crashkernel=' parameter is accustomed to.
639 */
640static int __init early_fadump_reserve_mem(char *p)
641{
642 if (p)
643 fw_dump.reserve_bootvar = memparse(p, &p);
644 return 0;
645}
646early_param("fadump_reserve_mem", early_fadump_reserve_mem);
647
ebaeb5ae
MS
648void crash_fadump(struct pt_regs *regs, const char *str)
649{
ba608c4f 650 unsigned int msecs;
ebaeb5ae 651 struct fadump_crash_info_header *fdh = NULL;
f2a5e8f0 652 int old_cpu, this_cpu;
ba608c4f
SJ
653 /* Do not include first CPU */
654 unsigned int ncpus = num_online_cpus() - 1;
ebaeb5ae 655
6fcd6baa 656 if (!should_fadump_crash())
ebaeb5ae
MS
657 return;
658
f2a5e8f0
MS
659 /*
660 * old_cpu == -1 means this is the first CPU which has come here,
661 * go ahead and trigger fadump.
662 *
0ddbbb89 663 * old_cpu != -1 means some other CPU has already on its way
f2a5e8f0
MS
664 * to trigger fadump, just keep looping here.
665 */
666 this_cpu = smp_processor_id();
667 old_cpu = cmpxchg(&crashing_cpu, -1, this_cpu);
668
669 if (old_cpu != -1) {
ba608c4f
SJ
670 atomic_inc(&cpus_in_fadump);
671
f2a5e8f0
MS
672 /*
673 * We can't loop here indefinitely. Wait as long as fadump
674 * is in force. If we race with fadump un-registration this
675 * loop will break and then we go down to normal panic path
676 * and reboot. If fadump is in force the first crashing
677 * cpu will definitely trigger fadump.
678 */
679 while (fw_dump.dump_registered)
680 cpu_relax();
681 return;
682 }
683
ebaeb5ae 684 fdh = __va(fw_dump.fadumphdr_addr);
ebaeb5ae
MS
685 fdh->crashing_cpu = crashing_cpu;
686 crash_save_vmcoreinfo();
687
688 if (regs)
689 fdh->regs = *regs;
690 else
691 ppc_save_regs(&fdh->regs);
692
6584cec0 693 fdh->cpu_mask = *cpu_online_mask;
ebaeb5ae 694
ba608c4f
SJ
695 /*
696 * If we came in via system reset, wait a while for the secondary
697 * CPUs to enter.
698 */
7153d4bf 699 if (TRAP(&(fdh->regs)) == INTERRUPT_SYSTEM_RESET) {
ba608c4f
SJ
700 msecs = CRASH_TIMEOUT;
701 while ((atomic_read(&cpus_in_fadump) < ncpus) && (--msecs > 0))
702 mdelay(1);
703 }
704
41a65d16 705 fw_dump.ops->fadump_trigger(fdh, str);
ebaeb5ae
MS
706}
707
d276960d 708u32 *__init fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs)
ebaeb5ae
MS
709{
710 struct elf_prstatus prstatus;
711
712 memset(&prstatus, 0, sizeof(prstatus));
713 /*
714 * FIXME: How do i get PID? Do I really need it?
715 * prstatus.pr_pid = ????
716 */
9554e908 717 elf_core_copy_regs(&prstatus.pr_reg, regs);
22bd0177
HB
718 buf = append_elf_note(buf, CRASH_CORE_NOTE_NAME, NT_PRSTATUS,
719 &prstatus, sizeof(prstatus));
ebaeb5ae
MS
720 return buf;
721}
722
d276960d 723void __init fadump_update_elfcore_header(char *bufp)
ebaeb5ae 724{
ebaeb5ae
MS
725 struct elf_phdr *phdr;
726
ebaeb5ae
MS
727 bufp += sizeof(struct elfhdr);
728
729 /* First note is a place holder for cpu notes info. */
730 phdr = (struct elf_phdr *)bufp;
731
732 if (phdr->p_type == PT_NOTE) {
961cf26a 733 phdr->p_paddr = __pa(fw_dump.cpu_notes_buf_vaddr);
ebaeb5ae
MS
734 phdr->p_offset = phdr->p_paddr;
735 phdr->p_filesz = fw_dump.cpu_notes_buf_size;
736 phdr->p_memsz = fw_dump.cpu_notes_buf_size;
737 }
738 return;
739}
740
d276960d 741static void *__init fadump_alloc_buffer(unsigned long size)
ebaeb5ae 742{
72aa6517 743 unsigned long count, i;
ebaeb5ae 744 struct page *page;
72aa6517 745 void *vaddr;
ebaeb5ae 746
72aa6517 747 vaddr = alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO);
ebaeb5ae
MS
748 if (!vaddr)
749 return NULL;
750
72aa6517 751 count = PAGE_ALIGN(size) / PAGE_SIZE;
ebaeb5ae
MS
752 page = virt_to_page(vaddr);
753 for (i = 0; i < count; i++)
72aa6517 754 mark_page_reserved(page + i);
ebaeb5ae
MS
755 return vaddr;
756}
757
961cf26a 758static void fadump_free_buffer(unsigned long vaddr, unsigned long size)
ebaeb5ae 759{
72aa6517 760 free_reserved_area((void *)vaddr, (void *)(vaddr + size), -1, NULL);
ebaeb5ae
MS
761}
762
d276960d 763s32 __init fadump_setup_cpu_notes_buf(u32 num_cpus)
961cf26a
HB
764{
765 /* Allocate buffer to hold cpu crash notes. */
766 fw_dump.cpu_notes_buf_size = num_cpus * sizeof(note_buf_t);
767 fw_dump.cpu_notes_buf_size = PAGE_ALIGN(fw_dump.cpu_notes_buf_size);
768 fw_dump.cpu_notes_buf_vaddr =
769 (unsigned long)fadump_alloc_buffer(fw_dump.cpu_notes_buf_size);
770 if (!fw_dump.cpu_notes_buf_vaddr) {
771 pr_err("Failed to allocate %ld bytes for CPU notes buffer\n",
772 fw_dump.cpu_notes_buf_size);
773 return -ENOMEM;
774 }
775
776 pr_debug("Allocated buffer for cpu notes of size %ld at 0x%lx\n",
777 fw_dump.cpu_notes_buf_size,
778 fw_dump.cpu_notes_buf_vaddr);
779 return 0;
780}
781
7f0ad11d 782void fadump_free_cpu_notes_buf(void)
961cf26a
HB
783{
784 if (!fw_dump.cpu_notes_buf_vaddr)
785 return;
786
787 fadump_free_buffer(fw_dump.cpu_notes_buf_vaddr,
788 fw_dump.cpu_notes_buf_size);
789 fw_dump.cpu_notes_buf_vaddr = 0;
790 fw_dump.cpu_notes_buf_size = 0;
791}
792
e4fc48fb 793static void fadump_free_mem_ranges(struct fadump_mrange_info *mrange_info)
1bd6a1c4 794{
02c04e37
HB
795 if (mrange_info->is_static) {
796 mrange_info->mem_range_cnt = 0;
797 return;
798 }
799
e4fc48fb 800 kfree(mrange_info->mem_ranges);
02c04e37
HB
801 memset((void *)((u64)mrange_info + RNG_NAME_SZ), 0,
802 (sizeof(struct fadump_mrange_info) - RNG_NAME_SZ));
1bd6a1c4
HB
803}
804
805/*
e4fc48fb 806 * Allocate or reallocate mem_ranges array in incremental units
1bd6a1c4
HB
807 * of PAGE_SIZE.
808 */
e4fc48fb 809static int fadump_alloc_mem_ranges(struct fadump_mrange_info *mrange_info)
1bd6a1c4 810{
e4fc48fb 811 struct fadump_memory_range *new_array;
1bd6a1c4
HB
812 u64 new_size;
813
e4fc48fb
HB
814 new_size = mrange_info->mem_ranges_sz + PAGE_SIZE;
815 pr_debug("Allocating %llu bytes of memory for %s memory ranges\n",
816 new_size, mrange_info->name);
1bd6a1c4 817
e4fc48fb 818 new_array = krealloc(mrange_info->mem_ranges, new_size, GFP_KERNEL);
1bd6a1c4 819 if (new_array == NULL) {
e4fc48fb
HB
820 pr_err("Insufficient memory for setting up %s memory ranges\n",
821 mrange_info->name);
822 fadump_free_mem_ranges(mrange_info);
1bd6a1c4
HB
823 return -ENOMEM;
824 }
825
e4fc48fb
HB
826 mrange_info->mem_ranges = new_array;
827 mrange_info->mem_ranges_sz = new_size;
828 mrange_info->max_mem_ranges = (new_size /
829 sizeof(struct fadump_memory_range));
1bd6a1c4
HB
830 return 0;
831}
e4fc48fb
HB
832static inline int fadump_add_mem_range(struct fadump_mrange_info *mrange_info,
833 u64 base, u64 end)
2df173d9 834{
e4fc48fb 835 struct fadump_memory_range *mem_ranges = mrange_info->mem_ranges;
ced1bf52 836 bool is_adjacent = false;
e4fc48fb 837 u64 start, size;
ced1bf52 838
2df173d9 839 if (base == end)
1bd6a1c4
HB
840 return 0;
841
ced1bf52
HB
842 /*
843 * Fold adjacent memory ranges to bring down the memory ranges/
844 * PT_LOAD segments count.
845 */
e4fc48fb
HB
846 if (mrange_info->mem_range_cnt) {
847 start = mem_ranges[mrange_info->mem_range_cnt - 1].base;
848 size = mem_ranges[mrange_info->mem_range_cnt - 1].size;
1bd6a1c4 849
15eb77f8
HB
850 /*
851 * Boot memory area needs separate PT_LOAD segment(s) as it
852 * is moved to a different location at the time of crash.
853 * So, fold only if the region is not boot memory area.
854 */
855 if ((start + size) == base && start >= fw_dump.boot_mem_top)
ced1bf52
HB
856 is_adjacent = true;
857 }
858 if (!is_adjacent) {
859 /* resize the array on reaching the limit */
e4fc48fb 860 if (mrange_info->mem_range_cnt == mrange_info->max_mem_ranges) {
ced1bf52
HB
861 int ret;
862
02c04e37
HB
863 if (mrange_info->is_static) {
864 pr_err("Reached array size limit for %s memory ranges\n",
865 mrange_info->name);
866 return -ENOSPC;
867 }
868
e4fc48fb 869 ret = fadump_alloc_mem_ranges(mrange_info);
ced1bf52
HB
870 if (ret)
871 return ret;
e4fc48fb
HB
872
873 /* Update to the new resized array */
874 mem_ranges = mrange_info->mem_ranges;
ced1bf52
HB
875 }
876
877 start = base;
e4fc48fb
HB
878 mem_ranges[mrange_info->mem_range_cnt].base = start;
879 mrange_info->mem_range_cnt++;
1bd6a1c4 880 }
2df173d9 881
e4fc48fb
HB
882 mem_ranges[mrange_info->mem_range_cnt - 1].size = (end - start);
883 pr_debug("%s_memory_range[%d] [%#016llx-%#016llx], %#llx bytes\n",
884 mrange_info->name, (mrange_info->mem_range_cnt - 1),
885 start, end - 1, (end - start));
1bd6a1c4 886 return 0;
2df173d9
MS
887}
888
2df173d9
MS
889static int fadump_init_elfcore_header(char *bufp)
890{
891 struct elfhdr *elf;
892
893 elf = (struct elfhdr *) bufp;
894 bufp += sizeof(struct elfhdr);
895 memcpy(elf->e_ident, ELFMAG, SELFMAG);
896 elf->e_ident[EI_CLASS] = ELF_CLASS;
897 elf->e_ident[EI_DATA] = ELF_DATA;
898 elf->e_ident[EI_VERSION] = EV_CURRENT;
899 elf->e_ident[EI_OSABI] = ELF_OSABI;
900 memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
901 elf->e_type = ET_CORE;
902 elf->e_machine = ELF_ARCH;
903 elf->e_version = EV_CURRENT;
904 elf->e_entry = 0;
905 elf->e_phoff = sizeof(struct elfhdr);
906 elf->e_shoff = 0;
5b89492c
CL
907
908 if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2))
909 elf->e_flags = 2;
910 else if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V1))
911 elf->e_flags = 1;
912 else
913 elf->e_flags = 0;
914
2df173d9
MS
915 elf->e_ehsize = sizeof(struct elfhdr);
916 elf->e_phentsize = sizeof(struct elf_phdr);
917 elf->e_phnum = 0;
918 elf->e_shentsize = 0;
919 elf->e_shnum = 0;
920 elf->e_shstrndx = 0;
921
922 return 0;
923}
924
d34c5f26
MS
925/*
926 * If the given physical address falls within the boot memory region then
927 * return the relocated address that points to the dump region reserved
928 * for saving initial boot memory contents.
929 */
930static inline unsigned long fadump_relocate(unsigned long paddr)
931{
7dee93a9
HB
932 unsigned long raddr, rstart, rend, rlast, hole_size;
933 int i;
934
935 hole_size = 0;
936 rlast = 0;
937 raddr = paddr;
938 for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) {
939 rstart = fw_dump.boot_mem_addr[i];
940 rend = rstart + fw_dump.boot_mem_sz[i];
941 hole_size += (rstart - rlast);
942
943 if (paddr >= rstart && paddr < rend) {
944 raddr += fw_dump.boot_mem_dest_addr - hole_size;
945 break;
946 }
947
948 rlast = rend;
949 }
950
951 pr_debug("vmcoreinfo: paddr = 0x%lx, raddr = 0x%lx\n", paddr, raddr);
952 return raddr;
d34c5f26
MS
953}
954
c6c5b14d
SJ
955static void __init populate_elf_pt_load(struct elf_phdr *phdr, u64 start,
956 u64 size, unsigned long long offset)
2df173d9 957{
c6c5b14d
SJ
958 phdr->p_align = 0;
959 phdr->p_memsz = size;
960 phdr->p_filesz = size;
961 phdr->p_paddr = start;
962 phdr->p_offset = offset;
963 phdr->p_type = PT_LOAD;
964 phdr->p_flags = PF_R|PF_W|PF_X;
965 phdr->p_vaddr = (unsigned long)__va(start);
966}
967
968static void __init fadump_populate_elfcorehdr(struct fadump_crash_info_header *fdh)
969{
970 char *bufp;
7dee93a9 971 struct elfhdr *elf;
c6c5b14d
SJ
972 struct elf_phdr *phdr;
973 u64 boot_mem_dest_offset;
974 unsigned long long i, ra_start, ra_end, ra_size, mstart, mend;
2df173d9 975
c6c5b14d 976 bufp = (char *) fw_dump.elfcorehdr_addr;
2df173d9
MS
977 fadump_init_elfcore_header(bufp);
978 elf = (struct elfhdr *)bufp;
979 bufp += sizeof(struct elfhdr);
980
ebaeb5ae 981 /*
c6c5b14d
SJ
982 * Set up ELF PT_NOTE, a placeholder for CPU notes information.
983 * The notes info will be populated later by platform-specific code.
984 * Hence, this PT_NOTE will always be the first ELF note.
ebaeb5ae
MS
985 *
986 * NOTE: Any new ELF note addition should be placed after this note.
987 */
988 phdr = (struct elf_phdr *)bufp;
989 bufp += sizeof(struct elf_phdr);
990 phdr->p_type = PT_NOTE;
c6c5b14d
SJ
991 phdr->p_flags = 0;
992 phdr->p_vaddr = 0;
993 phdr->p_align = 0;
994 phdr->p_offset = 0;
995 phdr->p_paddr = 0;
996 phdr->p_filesz = 0;
997 phdr->p_memsz = 0;
998 /* Increment number of program headers. */
ebaeb5ae
MS
999 (elf->e_phnum)++;
1000
d34c5f26
MS
1001 /* setup ELF PT_NOTE for vmcoreinfo */
1002 phdr = (struct elf_phdr *)bufp;
1003 bufp += sizeof(struct elf_phdr);
1004 phdr->p_type = PT_NOTE;
1005 phdr->p_flags = 0;
1006 phdr->p_vaddr = 0;
1007 phdr->p_align = 0;
c6c5b14d
SJ
1008 phdr->p_paddr = phdr->p_offset = fdh->vmcoreinfo_raddr;
1009 phdr->p_memsz = phdr->p_filesz = fdh->vmcoreinfo_size;
d34c5f26
MS
1010 /* Increment number of program headers. */
1011 (elf->e_phnum)++;
1012
c6c5b14d
SJ
1013 /*
1014 * Setup PT_LOAD sections. first include boot memory regions
1015 * and then add rest of the memory regions.
1016 */
1017 boot_mem_dest_offset = fw_dump.boot_mem_dest_addr;
1018 for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) {
2df173d9
MS
1019 phdr = (struct elf_phdr *)bufp;
1020 bufp += sizeof(struct elf_phdr);
c6c5b14d
SJ
1021 populate_elf_pt_load(phdr, fw_dump.boot_mem_addr[i],
1022 fw_dump.boot_mem_sz[i],
1023 boot_mem_dest_offset);
1024 /* Increment number of program headers. */
1025 (elf->e_phnum)++;
1026 boot_mem_dest_offset += fw_dump.boot_mem_sz[i];
1027 }
1028
1029 /* Memory reserved for fadump in first kernel */
1030 ra_start = fw_dump.reserve_dump_area_start;
1031 ra_size = get_fadump_area_size();
1032 ra_end = ra_start + ra_size;
1033
1034 phdr = (struct elf_phdr *)bufp;
1035 for_each_mem_range(i, &mstart, &mend) {
1036 /* Boot memory regions already added, skip them now */
1037 if (mstart < fw_dump.boot_mem_top) {
1038 if (mend > fw_dump.boot_mem_top)
1039 mstart = fw_dump.boot_mem_top;
1040 else
1041 continue;
2df173d9
MS
1042 }
1043
c6c5b14d
SJ
1044 /* Handle memblock regions overlaps with fadump reserved area */
1045 if ((ra_start < mend) && (ra_end > mstart)) {
1046 if ((mstart < ra_start) && (mend > ra_end)) {
1047 populate_elf_pt_load(phdr, mstart, ra_start - mstart, mstart);
1048 /* Increment number of program headers. */
1049 (elf->e_phnum)++;
1050 bufp += sizeof(struct elf_phdr);
1051 phdr = (struct elf_phdr *)bufp;
1052 populate_elf_pt_load(phdr, ra_end, mend - ra_end, ra_end);
1053 } else if (mstart < ra_start) {
1054 populate_elf_pt_load(phdr, mstart, ra_start - mstart, mstart);
1055 } else if (ra_end < mend) {
1056 populate_elf_pt_load(phdr, ra_end, mend - ra_end, ra_end);
1057 }
1058 } else {
1059 /* No overlap with fadump reserved memory region */
1060 populate_elf_pt_load(phdr, mstart, mend - mstart, mstart);
1061 }
2df173d9
MS
1062
1063 /* Increment number of program headers. */
1064 (elf->e_phnum)++;
c6c5b14d
SJ
1065 bufp += sizeof(struct elf_phdr);
1066 phdr = (struct elf_phdr *) bufp;
2df173d9 1067 }
2df173d9
MS
1068}
1069
1070static unsigned long init_fadump_header(unsigned long addr)
1071{
1072 struct fadump_crash_info_header *fdh;
1073
1074 if (!addr)
1075 return 0;
1076
2df173d9
MS
1077 fdh = __va(addr);
1078 addr += sizeof(struct fadump_crash_info_header);
1079
1080 memset(fdh, 0, sizeof(struct fadump_crash_info_header));
1081 fdh->magic_number = FADUMP_CRASH_INFO_MAGIC;
c6c5b14d 1082 fdh->version = FADUMP_HEADER_VERSION;
ebaeb5ae 1083 /* We will set the crashing cpu id in crash_fadump() during crash. */
0226e552 1084 fdh->crashing_cpu = FADUMP_CPU_UNKNOWN;
c6c5b14d
SJ
1085
1086 /*
1087 * The physical address and size of vmcoreinfo are required in the
1088 * second kernel to prepare elfcorehdr.
1089 */
1090 fdh->vmcoreinfo_raddr = fadump_relocate(paddr_vmcoreinfo_note());
1091 fdh->vmcoreinfo_size = VMCOREINFO_NOTE_SIZE;
1092
1093
1094 fdh->pt_regs_sz = sizeof(struct pt_regs);
6584cec0
HB
1095 /*
1096 * When LPAR is terminated by PYHP, ensure all possible CPUs'
1097 * register data is processed while exporting the vmcore.
1098 */
1099 fdh->cpu_mask = *cpu_possible_mask;
c6c5b14d 1100 fdh->cpu_mask_sz = sizeof(struct cpumask);
2df173d9
MS
1101
1102 return addr;
1103}
1104
98b8cd7f 1105static int register_fadump(void)
3ccc00a7 1106{
2df173d9 1107 unsigned long addr;
2df173d9 1108
3ccc00a7
MS
1109 /*
1110 * If no memory is reserved then we can not register for firmware-
1111 * assisted dump.
1112 */
1113 if (!fw_dump.reserve_dump_area_size)
98b8cd7f 1114 return -ENODEV;
3ccc00a7 1115
41a65d16
HB
1116 addr = fw_dump.fadumphdr_addr;
1117
2df173d9
MS
1118 /* Initialize fadump crash info header. */
1119 addr = init_fadump_header(addr);
2df173d9 1120
3ccc00a7 1121 /* register the future kernel dump with firmware. */
41a65d16
HB
1122 pr_debug("Registering for firmware-assisted kernel dump...\n");
1123 return fw_dump.ops->fadump_register(&fw_dump);
3ccc00a7
MS
1124}
1125
b500afff
MS
1126void fadump_cleanup(void)
1127{
2790d01d
HB
1128 if (!fw_dump.fadump_supported)
1129 return;
1130
b500afff
MS
1131 /* Invalidate the registration only if dump is active. */
1132 if (fw_dump.dump_active) {
f3512011
HB
1133 pr_debug("Invalidating firmware-assisted dump registration\n");
1134 fw_dump.ops->fadump_invalidate(&fw_dump);
722cde76
MS
1135 } else if (fw_dump.dump_registered) {
1136 /* Un-register Firmware-assisted dump if it was registered. */
41a65d16 1137 fw_dump.ops->fadump_unregister(&fw_dump);
b500afff 1138 }
2790d01d
HB
1139
1140 if (fw_dump.ops->fadump_cleanup)
1141 fw_dump.ops->fadump_cleanup(&fw_dump);
b500afff
MS
1142}
1143
68fa6478
HB
1144static void fadump_free_reserved_memory(unsigned long start_pfn,
1145 unsigned long end_pfn)
1146{
1147 unsigned long pfn;
1148 unsigned long time_limit = jiffies + HZ;
1149
1150 pr_info("freeing reserved memory (0x%llx - 0x%llx)\n",
1151 PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
1152
1153 for (pfn = start_pfn; pfn < end_pfn; pfn++) {
1154 free_reserved_page(pfn_to_page(pfn));
1155
1156 if (time_after(jiffies, time_limit)) {
1157 cond_resched();
1158 time_limit = jiffies + HZ;
1159 }
1160 }
1161}
1162
1163/*
1164 * Skip memory holes and free memory that was actually reserved.
1165 */
dda9dbfe 1166static void fadump_release_reserved_area(u64 start, u64 end)
68fa6478 1167{
b10d6bca
MR
1168 unsigned long reg_spfn, reg_epfn;
1169 u64 tstart, tend, spfn, epfn;
1170 int i;
68fa6478 1171
dda9dbfe
HB
1172 spfn = PHYS_PFN(start);
1173 epfn = PHYS_PFN(end);
c9118e6c
MR
1174
1175 for_each_mem_pfn_range(i, MAX_NUMNODES, &reg_spfn, &reg_epfn, NULL) {
1176 tstart = max_t(u64, spfn, reg_spfn);
1177 tend = min_t(u64, epfn, reg_epfn);
1178
68fa6478
HB
1179 if (tstart < tend) {
1180 fadump_free_reserved_memory(tstart, tend);
1181
dda9dbfe 1182 if (tend == epfn)
68fa6478
HB
1183 break;
1184
dda9dbfe 1185 spfn = tend;
68fa6478
HB
1186 }
1187 }
1188}
1189
b500afff 1190/*
dda9dbfe
HB
1191 * Sort the mem ranges in-place and merge adjacent ranges
1192 * to minimize the memory ranges count.
b500afff 1193 */
dda9dbfe 1194static void sort_and_merge_mem_ranges(struct fadump_mrange_info *mrange_info)
b500afff 1195{
dda9dbfe 1196 struct fadump_memory_range *mem_ranges;
dda9dbfe
HB
1197 u64 base, size;
1198 int i, j, idx;
1199
1200 if (!reserved_mrange_info.mem_range_cnt)
1201 return;
1202
1203 /* Sort the memory ranges */
1204 mem_ranges = mrange_info->mem_ranges;
1205 for (i = 0; i < mrange_info->mem_range_cnt; i++) {
1206 idx = i;
1207 for (j = (i + 1); j < mrange_info->mem_range_cnt; j++) {
1208 if (mem_ranges[idx].base > mem_ranges[j].base)
1209 idx = j;
1210 }
20776319
JC
1211 if (idx != i)
1212 swap(mem_ranges[idx], mem_ranges[i]);
dda9dbfe
HB
1213 }
1214
1215 /* Merge adjacent reserved ranges */
1216 idx = 0;
1217 for (i = 1; i < mrange_info->mem_range_cnt; i++) {
1218 base = mem_ranges[i-1].base;
1219 size = mem_ranges[i-1].size;
1220 if (mem_ranges[i].base == (base + size))
1221 mem_ranges[idx].size += mem_ranges[i].size;
1222 else {
1223 idx++;
1224 if (i == idx)
1225 continue;
1226
1227 mem_ranges[idx] = mem_ranges[i];
1228 }
1229 }
1230 mrange_info->mem_range_cnt = idx + 1;
1231}
1232
1233/*
1234 * Scan reserved-ranges to consider them while reserving/releasing
1235 * memory for FADump.
1236 */
02c04e37 1237static void __init early_init_dt_scan_reserved_ranges(unsigned long node)
dda9dbfe 1238{
dda9dbfe
HB
1239 const __be32 *prop;
1240 int len, ret = -1;
1241 unsigned long i;
1242
02c04e37
HB
1243 /* reserved-ranges already scanned */
1244 if (reserved_mrange_info.mem_range_cnt != 0)
1245 return;
dda9dbfe 1246
02c04e37 1247 prop = of_get_flat_dt_prop(node, "reserved-ranges", &len);
dda9dbfe 1248 if (!prop)
02c04e37 1249 return;
dda9dbfe
HB
1250
1251 /*
1252 * Each reserved range is an (address,size) pair, 2 cells each,
1253 * totalling 4 cells per range.
1254 */
1255 for (i = 0; i < len / (sizeof(*prop) * 4); i++) {
1256 u64 base, size;
1257
1258 base = of_read_number(prop + (i * 4) + 0, 2);
1259 size = of_read_number(prop + (i * 4) + 2, 2);
1260
1261 if (size) {
1262 ret = fadump_add_mem_range(&reserved_mrange_info,
1263 base, base + size);
1264 if (ret < 0) {
1265 pr_warn("some reserved ranges are ignored!\n");
1266 break;
1267 }
1268 }
1269 }
1270
02c04e37
HB
1271 /* Compact reserved ranges */
1272 sort_and_merge_mem_ranges(&reserved_mrange_info);
dda9dbfe
HB
1273}
1274
1275/*
1276 * Release the memory that was reserved during early boot to preserve the
1277 * crash'ed kernel's memory contents except reserved dump area (permanent
1278 * reservation) and reserved ranges used by F/W. The released memory will
1279 * be available for general use.
1280 */
1281static void fadump_release_memory(u64 begin, u64 end)
1282{
1283 u64 ra_start, ra_end, tstart;
1284 int i, ret;
1285
b500afff
MS
1286 ra_start = fw_dump.reserve_dump_area_start;
1287 ra_end = ra_start + fw_dump.reserve_dump_area_size;
1288
68fa6478 1289 /*
02c04e37
HB
1290 * If reserved ranges array limit is hit, overwrite the last reserved
1291 * memory range with reserved dump area to ensure it is excluded from
1292 * the memory being released (reused for next FADump registration).
68fa6478 1293 */
02c04e37
HB
1294 if (reserved_mrange_info.mem_range_cnt ==
1295 reserved_mrange_info.max_mem_ranges)
1296 reserved_mrange_info.mem_range_cnt--;
dda9dbfe 1297
02c04e37
HB
1298 ret = fadump_add_mem_range(&reserved_mrange_info, ra_start, ra_end);
1299 if (ret != 0)
dda9dbfe 1300 return;
dda9dbfe
HB
1301
1302 /* Get the reserved ranges list in order first. */
1303 sort_and_merge_mem_ranges(&reserved_mrange_info);
1304
1305 /* Exclude reserved ranges and release remaining memory */
1306 tstart = begin;
1307 for (i = 0; i < reserved_mrange_info.mem_range_cnt; i++) {
1308 ra_start = reserved_mrange_info.mem_ranges[i].base;
1309 ra_end = ra_start + reserved_mrange_info.mem_ranges[i].size;
1310
1311 if (tstart >= ra_end)
1312 continue;
1313
1314 if (tstart < ra_start)
1315 fadump_release_reserved_area(tstart, ra_start);
1316 tstart = ra_end;
1317 }
1318
1319 if (tstart < end)
1320 fadump_release_reserved_area(tstart, end);
b500afff
MS
1321}
1322
c6c5b14d
SJ
1323static void fadump_free_elfcorehdr_buf(void)
1324{
1325 if (fw_dump.elfcorehdr_addr == 0 || fw_dump.elfcorehdr_size == 0)
1326 return;
1327
1328 /*
1329 * Before freeing the memory of `elfcorehdr`, reset the global
1330 * `elfcorehdr_addr` to prevent modules like `vmcore` from accessing
1331 * invalid memory.
1332 */
1333 elfcorehdr_addr = ELFCORE_ADDR_ERR;
1334 fadump_free_buffer(fw_dump.elfcorehdr_addr, fw_dump.elfcorehdr_size);
1335 fw_dump.elfcorehdr_addr = 0;
1336 fw_dump.elfcorehdr_size = 0;
1337}
1338
b500afff
MS
1339static void fadump_invalidate_release_mem(void)
1340{
b500afff
MS
1341 mutex_lock(&fadump_mutex);
1342 if (!fw_dump.dump_active) {
1343 mutex_unlock(&fadump_mutex);
1344 return;
1345 }
1346
b500afff
MS
1347 fadump_cleanup();
1348 mutex_unlock(&fadump_mutex);
1349
c6c5b14d 1350 fadump_free_elfcorehdr_buf();
7dee93a9 1351 fadump_release_memory(fw_dump.boot_mem_top, memblock_end_of_DRAM());
961cf26a
HB
1352 fadump_free_cpu_notes_buf();
1353
a4e2e2ca
HB
1354 /*
1355 * Setup kernel metadata and initialize the kernel dump
1356 * memory structure for FADump re-registration.
1357 */
1358 if (fw_dump.ops->fadump_setup_metadata &&
1359 (fw_dump.ops->fadump_setup_metadata(&fw_dump) < 0))
1360 pr_warn("Failed to setup kernel metadata!\n");
41a65d16 1361 fw_dump.ops->fadump_init_mem_struct(&fw_dump);
b500afff
MS
1362}
1363
d418b19f
SJ
1364static ssize_t release_mem_store(struct kobject *kobj,
1365 struct kobj_attribute *attr,
1366 const char *buf, size_t count)
b500afff 1367{
dcdc4679
MS
1368 int input = -1;
1369
b500afff
MS
1370 if (!fw_dump.dump_active)
1371 return -EPERM;
1372
dcdc4679
MS
1373 if (kstrtoint(buf, 0, &input))
1374 return -EINVAL;
1375
1376 if (input == 1) {
b500afff
MS
1377 /*
1378 * Take away the '/proc/vmcore'. We are releasing the dump
1379 * memory, hence it will not be valid anymore.
1380 */
2685f826 1381#ifdef CONFIG_PROC_VMCORE
b500afff 1382 vmcore_cleanup();
2685f826 1383#endif
b500afff
MS
1384 fadump_invalidate_release_mem();
1385
1386 } else
1387 return -EINVAL;
1388 return count;
1389}
1390
d418b19f 1391/* Release the reserved memory and disable the FADump */
d276960d 1392static void __init unregister_fadump(void)
d418b19f
SJ
1393{
1394 fadump_cleanup();
1395 fadump_release_memory(fw_dump.reserve_dump_area_start,
1396 fw_dump.reserve_dump_area_size);
1397 fw_dump.fadump_enabled = 0;
1398 kobject_put(fadump_kobj);
1399}
1400
1401static ssize_t enabled_show(struct kobject *kobj,
1402 struct kobj_attribute *attr,
1403 char *buf)
3ccc00a7
MS
1404{
1405 return sprintf(buf, "%d\n", fw_dump.fadump_enabled);
1406}
1407
bc446c5a
SJ
1408/*
1409 * /sys/kernel/fadump/hotplug_ready sysfs node returns 1, which inidcates
1410 * to usersapce that fadump re-registration is not required on memory
1411 * hotplug events.
1412 */
1413static ssize_t hotplug_ready_show(struct kobject *kobj,
1414 struct kobj_attribute *attr,
1415 char *buf)
1416{
1417 return sprintf(buf, "%d\n", 1);
1418}
1419
d8e73458
SJ
1420static ssize_t mem_reserved_show(struct kobject *kobj,
1421 struct kobj_attribute *attr,
1422 char *buf)
1423{
1424 return sprintf(buf, "%ld\n", fw_dump.reserve_dump_area_size);
1425}
1426
d418b19f
SJ
1427static ssize_t registered_show(struct kobject *kobj,
1428 struct kobj_attribute *attr,
1429 char *buf)
3ccc00a7
MS
1430{
1431 return sprintf(buf, "%d\n", fw_dump.dump_registered);
1432}
1433
683eab94
HB
1434static ssize_t bootargs_append_show(struct kobject *kobj,
1435 struct kobj_attribute *attr,
1436 char *buf)
1437{
1438 return sprintf(buf, "%s\n", (char *)__va(fw_dump.param_area));
1439}
1440
1441static ssize_t bootargs_append_store(struct kobject *kobj,
1442 struct kobj_attribute *attr,
1443 const char *buf, size_t count)
1444{
1445 char *params;
1446
1447 if (!fw_dump.fadump_enabled || fw_dump.dump_active)
1448 return -EPERM;
1449
1450 if (count >= COMMAND_LINE_SIZE)
1451 return -EINVAL;
1452
1453 /*
1454 * Fail here instead of handling this scenario with
1455 * some silly workaround in capture kernel.
1456 */
1457 if (saved_command_line_len + count >= COMMAND_LINE_SIZE) {
1458 pr_err("Appending parameters exceeds cmdline size!\n");
1459 return -ENOSPC;
1460 }
1461
1462 params = __va(fw_dump.param_area);
1463 strscpy_pad(params, buf, COMMAND_LINE_SIZE);
1464 /* Remove newline character at the end. */
1465 if (params[count-1] == '\n')
1466 params[count-1] = '\0';
1467
1468 return count;
1469}
1470
d418b19f
SJ
1471static ssize_t registered_store(struct kobject *kobj,
1472 struct kobj_attribute *attr,
1473 const char *buf, size_t count)
3ccc00a7
MS
1474{
1475 int ret = 0;
dcdc4679 1476 int input = -1;
3ccc00a7 1477
f3512011 1478 if (!fw_dump.fadump_enabled || fw_dump.dump_active)
3ccc00a7
MS
1479 return -EPERM;
1480
dcdc4679
MS
1481 if (kstrtoint(buf, 0, &input))
1482 return -EINVAL;
1483
3ccc00a7
MS
1484 mutex_lock(&fadump_mutex);
1485
dcdc4679
MS
1486 switch (input) {
1487 case 0:
3ccc00a7 1488 if (fw_dump.dump_registered == 0) {
3ccc00a7
MS
1489 goto unlock_out;
1490 }
f3512011 1491
3ccc00a7 1492 /* Un-register Firmware-assisted dump */
41a65d16
HB
1493 pr_debug("Un-register firmware-assisted dump\n");
1494 fw_dump.ops->fadump_unregister(&fw_dump);
3ccc00a7 1495 break;
dcdc4679 1496 case 1:
3ccc00a7 1497 if (fw_dump.dump_registered == 1) {
0823c68b 1498 /* Un-register Firmware-assisted dump */
41a65d16 1499 fw_dump.ops->fadump_unregister(&fw_dump);
3ccc00a7
MS
1500 }
1501 /* Register Firmware-assisted dump */
98b8cd7f 1502 ret = register_fadump();
3ccc00a7
MS
1503 break;
1504 default:
1505 ret = -EINVAL;
1506 break;
1507 }
1508
1509unlock_out:
1510 mutex_unlock(&fadump_mutex);
1511 return ret < 0 ? ret : count;
1512}
1513
1514static int fadump_region_show(struct seq_file *m, void *private)
1515{
3ccc00a7
MS
1516 if (!fw_dump.fadump_enabled)
1517 return 0;
1518
b500afff 1519 mutex_lock(&fadump_mutex);
f3512011
HB
1520 fw_dump.ops->fadump_region_show(&fw_dump, m);
1521 mutex_unlock(&fadump_mutex);
3ccc00a7
MS
1522 return 0;
1523}
1524
d418b19f
SJ
1525static struct kobj_attribute release_attr = __ATTR_WO(release_mem);
1526static struct kobj_attribute enable_attr = __ATTR_RO(enabled);
1527static struct kobj_attribute register_attr = __ATTR_RW(registered);
d8e73458 1528static struct kobj_attribute mem_reserved_attr = __ATTR_RO(mem_reserved);
bc446c5a 1529static struct kobj_attribute hotplug_ready_attr = __ATTR_RO(hotplug_ready);
683eab94 1530static struct kobj_attribute bootargs_append_attr = __ATTR_RW(bootargs_append);
d418b19f
SJ
1531
1532static struct attribute *fadump_attrs[] = {
1533 &enable_attr.attr,
1534 &register_attr.attr,
d8e73458 1535 &mem_reserved_attr.attr,
bc446c5a 1536 &hotplug_ready_attr.attr,
d418b19f
SJ
1537 NULL,
1538};
1539
1540ATTRIBUTE_GROUPS(fadump);
3ccc00a7 1541
f6cee260 1542DEFINE_SHOW_ATTRIBUTE(fadump_region);
3ccc00a7 1543
d276960d 1544static void __init fadump_init_files(void)
3ccc00a7 1545{
3ccc00a7
MS
1546 int rc = 0;
1547
d418b19f
SJ
1548 fadump_kobj = kobject_create_and_add("fadump", kernel_kobj);
1549 if (!fadump_kobj) {
1550 pr_err("failed to create fadump kobject\n");
1551 return;
1552 }
860286cf 1553
dbf77fed 1554 debugfs_create_file("fadump_region", 0444, arch_debugfs_dir, NULL,
860286cf 1555 &fadump_region_fops);
b500afff
MS
1556
1557 if (fw_dump.dump_active) {
d418b19f
SJ
1558 rc = sysfs_create_file(fadump_kobj, &release_attr.attr);
1559 if (rc)
1560 pr_err("unable to create release_mem sysfs file (%d)\n",
1561 rc);
1562 }
1563
1564 rc = sysfs_create_groups(fadump_kobj, fadump_groups);
1565 if (rc) {
1566 pr_err("sysfs group creation failed (%d), unregistering FADump",
1567 rc);
1568 unregister_fadump();
1569 return;
1570 }
1571
1572 /*
1573 * The FADump sysfs are moved from kernel_kobj to fadump_kobj need to
1574 * create symlink at old location to maintain backward compatibility.
1575 *
1576 * - fadump_enabled -> fadump/enabled
1577 * - fadump_registered -> fadump/registered
1578 * - fadump_release_mem -> fadump/release_mem
1579 */
1580 rc = compat_only_sysfs_link_entry_to_kobj(kernel_kobj, fadump_kobj,
1581 "enabled", "fadump_enabled");
1582 if (rc) {
1583 pr_err("unable to create fadump_enabled symlink (%d)", rc);
1584 return;
1585 }
1586
1587 rc = compat_only_sysfs_link_entry_to_kobj(kernel_kobj, fadump_kobj,
1588 "registered",
1589 "fadump_registered");
1590 if (rc) {
1591 pr_err("unable to create fadump_registered symlink (%d)", rc);
1592 sysfs_remove_link(kernel_kobj, "fadump_enabled");
1593 return;
1594 }
1595
1596 if (fw_dump.dump_active) {
1597 rc = compat_only_sysfs_link_entry_to_kobj(kernel_kobj,
1598 fadump_kobj,
1599 "release_mem",
1600 "fadump_release_mem");
b500afff 1601 if (rc)
d418b19f
SJ
1602 pr_err("unable to create fadump_release_mem symlink (%d)",
1603 rc);
b500afff 1604 }
3ccc00a7
MS
1605 return;
1606}
1607
c6c5b14d
SJ
1608static int __init fadump_setup_elfcorehdr_buf(void)
1609{
1610 int elf_phdr_cnt;
1611 unsigned long elfcorehdr_size;
1612
1613 /*
1614 * Program header for CPU notes comes first, followed by one for
1615 * vmcoreinfo, and the remaining program headers correspond to
1616 * memory regions.
1617 */
1618 elf_phdr_cnt = 2 + fw_dump.boot_mem_regs_cnt + memblock_num_regions(memory);
1619 elfcorehdr_size = sizeof(struct elfhdr) + (elf_phdr_cnt * sizeof(struct elf_phdr));
1620 elfcorehdr_size = PAGE_ALIGN(elfcorehdr_size);
1621
1622 fw_dump.elfcorehdr_addr = (u64)fadump_alloc_buffer(elfcorehdr_size);
1623 if (!fw_dump.elfcorehdr_addr) {
1624 pr_err("Failed to allocate %lu bytes for elfcorehdr\n",
1625 elfcorehdr_size);
1626 return -ENOMEM;
1627 }
1628 fw_dump.elfcorehdr_size = elfcorehdr_size;
1629 return 0;
1630}
1631
1632/*
1633 * Check if the fadump header of crashed kernel is compatible with fadump kernel.
1634 *
1635 * It checks the magic number, endianness, and size of non-primitive type
1636 * members of fadump header to ensure safe dump collection.
1637 */
1638static bool __init is_fadump_header_compatible(struct fadump_crash_info_header *fdh)
1639{
1640 if (fdh->magic_number == FADUMP_CRASH_INFO_MAGIC_OLD) {
1641 pr_err("Old magic number, can't process the dump.\n");
1642 return false;
1643 }
1644
1645 if (fdh->magic_number != FADUMP_CRASH_INFO_MAGIC) {
1646 if (fdh->magic_number == swab64(FADUMP_CRASH_INFO_MAGIC))
1647 pr_err("Endianness mismatch between the crashed and fadump kernels.\n");
1648 else
1649 pr_err("Fadump header is corrupted.\n");
1650
1651 return false;
1652 }
1653
1654 /*
1655 * Dump collection is not safe if the size of non-primitive type members
1656 * of the fadump header do not match between crashed and fadump kernel.
1657 */
1658 if (fdh->pt_regs_sz != sizeof(struct pt_regs) ||
1659 fdh->cpu_mask_sz != sizeof(struct cpumask)) {
1660 pr_err("Fadump header size mismatch.\n");
1661 return false;
1662 }
1663
1664 return true;
1665}
1666
1667static void __init fadump_process(void)
1668{
1669 struct fadump_crash_info_header *fdh;
1670
1671 fdh = (struct fadump_crash_info_header *) __va(fw_dump.fadumphdr_addr);
1672 if (!fdh) {
1673 pr_err("Crash info header is empty.\n");
1674 goto err_out;
1675 }
1676
1677 /* Avoid processing the dump if fadump header isn't compatible */
1678 if (!is_fadump_header_compatible(fdh))
1679 goto err_out;
1680
1681 /* Allocate buffer for elfcorehdr */
1682 if (fadump_setup_elfcorehdr_buf())
1683 goto err_out;
1684
1685 fadump_populate_elfcorehdr(fdh);
1686
1687 /* Let platform update the CPU notes in elfcorehdr */
1688 if (fw_dump.ops->fadump_process(&fw_dump) < 0)
1689 goto err_out;
1690
1691 /*
1692 * elfcorehdr is now ready to be exported.
1693 *
1694 * set elfcorehdr_addr so that vmcore module will export the
1695 * elfcorehdr through '/proc/vmcore'.
1696 */
1697 elfcorehdr_addr = virt_to_phys((void *)fw_dump.elfcorehdr_addr);
1698 return;
1699
1700err_out:
1701 fadump_invalidate_release_mem();
1702}
1703
683eab94
HB
1704/*
1705 * Reserve memory to store additional parameters to be passed
1706 * for fadump/capture kernel.
1707 */
1708static void fadump_setup_param_area(void)
1709{
1710 phys_addr_t range_start, range_end;
1711
1712 if (!fw_dump.param_area_supported || fw_dump.dump_active)
1713 return;
1714
1715 /* This memory can't be used by PFW or bootloader as it is shared across kernels */
1716 if (radix_enabled()) {
1717 /*
1718 * Anywhere in the upper half should be good enough as all memory
1719 * is accessible in real mode.
1720 */
1721 range_start = memblock_end_of_DRAM() / 2;
1722 range_end = memblock_end_of_DRAM();
1723 } else {
1724 /*
1725 * Passing additional parameters is supported for hash MMU only
1726 * if the first memory block size is 768MB or higher.
1727 */
1728 if (ppc64_rma_size < 0x30000000)
1729 return;
1730
1731 /*
1732 * 640 MB to 768 MB is not used by PFW/bootloader. So, try reserving
1733 * memory for passing additional parameters in this range to avoid
1734 * being stomped on by PFW/bootloader.
1735 */
1736 range_start = 0x2A000000;
1737 range_end = range_start + 0x4000000;
1738 }
1739
1740 fw_dump.param_area = memblock_phys_alloc_range(COMMAND_LINE_SIZE,
1741 COMMAND_LINE_SIZE,
1742 range_start,
1743 range_end);
1744 if (!fw_dump.param_area || sysfs_create_file(fadump_kobj, &bootargs_append_attr.attr)) {
1745 pr_warn("WARNING: Could not setup area to pass additional parameters!\n");
1746 return;
1747 }
1748
1749 memset(phys_to_virt(fw_dump.param_area), 0, COMMAND_LINE_SIZE);
1750}
1751
3ccc00a7
MS
1752/*
1753 * Prepare for firmware-assisted dump.
1754 */
1755int __init setup_fadump(void)
1756{
565f9bc0 1757 if (!fw_dump.fadump_supported)
3ccc00a7 1758 return 0;
3ccc00a7 1759
565f9bc0 1760 fadump_init_files();
3ccc00a7 1761 fadump_show_config();
565f9bc0
MS
1762
1763 if (!fw_dump.fadump_enabled)
1764 return 1;
1765
2df173d9
MS
1766 /*
1767 * If dump data is available then see if it is valid and prepare for
1768 * saving it to the disk.
1769 */
b500afff 1770 if (fw_dump.dump_active) {
c6c5b14d 1771 fadump_process();
b500afff 1772 }
607451ce
HB
1773 /* Initialize the kernel dump memory structure and register with f/w */
1774 else if (fw_dump.reserve_dump_area_size) {
683eab94 1775 fadump_setup_param_area();
41a65d16 1776 fw_dump.ops->fadump_init_mem_struct(&fw_dump);
607451ce
HB
1777 register_fadump();
1778 }
f3512011 1779
06e629c2
HB
1780 /*
1781 * In case of panic, fadump is triggered via ppc_panic_event()
1782 * panic notifier. Setting crash_kexec_post_notifiers to 'true'
1783 * lets panic() function take crash friendly path before panic
1784 * notifiers are invoked.
1785 */
1786 crash_kexec_post_notifiers = true;
1787
3ccc00a7
MS
1788 return 1;
1789}
607451ce
HB
1790/*
1791 * Use subsys_initcall_sync() here because there is dependency with
1fd02f66
JL
1792 * crash_save_vmcoreinfo_init(), which must run first to ensure vmcoreinfo initialization
1793 * is done before registering with f/w.
607451ce
HB
1794 */
1795subsys_initcall_sync(setup_fadump);
bec53196
HB
1796#else /* !CONFIG_PRESERVE_FA_DUMP */
1797
1798/* Scan the Firmware Assisted dump configuration details. */
1799int __init early_init_dt_scan_fw_dump(unsigned long node, const char *uname,
1800 int depth, void *data)
1801{
1802 if ((depth != 1) || (strcmp(uname, "ibm,opal") != 0))
1803 return 0;
1804
1805 opal_fadump_dt_scan(&fw_dump, node);
1806 return 1;
1807}
1808
1809/*
1810 * When dump is active but PRESERVE_FA_DUMP is enabled on the kernel,
1811 * preserve crash data. The subsequent memory preserving kernel boot
1812 * is likely to process this crash data.
1813 */
1814int __init fadump_reserve_mem(void)
1815{
1816 if (fw_dump.dump_active) {
1817 /*
1818 * If last boot has crashed then reserve all the memory
1819 * above boot memory to preserve crash data.
1820 */
1821 pr_info("Preserving crash data for processing in next boot.\n");
1822 fadump_reserve_crash_area(fw_dump.boot_mem_top);
1823 } else
1824 pr_debug("FADump-aware kernel..\n");
1825
1826 return 1;
1827}
1828#endif /* CONFIG_PRESERVE_FA_DUMP */
b2a815a5
HB
1829
1830/* Preserve everything above the base address */
1831static void __init fadump_reserve_crash_area(u64 base)
1832{
b10d6bca 1833 u64 i, mstart, mend, msize;
b2a815a5 1834
b10d6bca
MR
1835 for_each_mem_range(i, &mstart, &mend) {
1836 msize = mend - mstart;
b2a815a5
HB
1837
1838 if ((mstart + msize) < base)
1839 continue;
1840
1841 if (mstart < base) {
1842 msize -= (base - mstart);
1843 mstart = base;
1844 }
1845
1846 pr_info("Reserving %lluMB of memory at %#016llx for preserving crash data",
1847 (msize >> 20), mstart);
1848 memblock_reserve(mstart, msize);
1849 }
1850}
bec53196
HB
1851
1852unsigned long __init arch_reserved_kernel_pages(void)
1853{
1854 return memblock_reserved_size() / PAGE_SIZE;
1855}