Commit | Line | Data |
---|---|---|
d2912cb1 | 1 | // SPDX-License-Identifier: GPL-2.0-only |
d28f6df1 GL |
2 | /* |
3 | * kexec for arm64 | |
4 | * | |
5 | * Copyright (C) Linaro. | |
6 | * Copyright (C) Huawei Futurewei Technologies. | |
d28f6df1 GL |
7 | */ |
8 | ||
78fd584c AT |
9 | #include <linux/interrupt.h> |
10 | #include <linux/irq.h> | |
11 | #include <linux/kernel.h> | |
d28f6df1 | 12 | #include <linux/kexec.h> |
254a41c0 | 13 | #include <linux/page-flags.h> |
6d47c23b | 14 | #include <linux/set_memory.h> |
d28f6df1 GL |
15 | #include <linux/smp.h> |
16 | ||
17 | #include <asm/cacheflush.h> | |
18 | #include <asm/cpu_ops.h> | |
0fbeb318 | 19 | #include <asm/daifflags.h> |
20a16624 | 20 | #include <asm/memory.h> |
98d2e153 | 21 | #include <asm/mmu.h> |
d28f6df1 | 22 | #include <asm/mmu_context.h> |
98d2e153 | 23 | #include <asm/page.h> |
19a046f0 | 24 | #include <asm/sections.h> |
08eae0ef | 25 | #include <asm/trans_pgd.h> |
d28f6df1 | 26 | |
221f2c77 GL |
27 | /** |
28 | * kexec_image_info - For debugging output. | |
29 | */ | |
30 | #define kexec_image_info(_i) _kexec_image_info(__func__, __LINE__, _i) | |
31 | static void _kexec_image_info(const char *func, int line, | |
32 | const struct kimage *kimage) | |
33 | { | |
34 | unsigned long i; | |
35 | ||
36 | pr_debug("%s:%d:\n", func, line); | |
37 | pr_debug(" kexec kimage info:\n"); | |
38 | pr_debug(" type: %d\n", kimage->type); | |
39 | pr_debug(" start: %lx\n", kimage->start); | |
40 | pr_debug(" head: %lx\n", kimage->head); | |
41 | pr_debug(" nr_segments: %lu\n", kimage->nr_segments); | |
08eae0ef | 42 | pr_debug(" dtb_mem: %pa\n", &kimage->arch.dtb_mem); |
4c3c3123 | 43 | pr_debug(" kern_reloc: %pa\n", &kimage->arch.kern_reloc); |
08eae0ef | 44 | pr_debug(" el2_vectors: %pa\n", &kimage->arch.el2_vectors); |
221f2c77 GL |
45 | |
46 | for (i = 0; i < kimage->nr_segments; i++) { | |
47 | pr_debug(" segment[%lu]: %016lx - %016lx, 0x%lx bytes, %lu pages\n", | |
48 | i, | |
49 | kimage->segment[i].mem, | |
50 | kimage->segment[i].mem + kimage->segment[i].memsz, | |
51 | kimage->segment[i].memsz, | |
52 | kimage->segment[i].memsz / PAGE_SIZE); | |
53 | } | |
54 | } | |
55 | ||
d28f6df1 GL |
56 | void machine_kexec_cleanup(struct kimage *kimage) |
57 | { | |
58 | /* Empty routine needed to avoid build errors. */ | |
59 | } | |
60 | ||
61 | /** | |
62 | * machine_kexec_prepare - Prepare for a kexec reboot. | |
63 | * | |
64 | * Called from the core kexec code when a kernel image is loaded. | |
65 | * Forbid loading a kexec kernel if we have no way of hotplugging cpus or cpus | |
66 | * are stuck in the kernel. This avoids a panic once we hit machine_kexec(). | |
67 | */ | |
68 | int machine_kexec_prepare(struct kimage *kimage) | |
69 | { | |
d28f6df1 GL |
70 | if (kimage->type != KEXEC_TYPE_CRASH && cpus_are_stuck_in_kernel()) { |
71 | pr_err("Can't kexec: CPUs are stuck in the kernel.\n"); | |
72 | return -EBUSY; | |
73 | } | |
74 | ||
75 | return 0; | |
76 | } | |
77 | ||
d28f6df1 GL |
78 | /** |
79 | * kexec_segment_flush - Helper to flush the kimage segments to PoC. | |
80 | */ | |
81 | static void kexec_segment_flush(const struct kimage *kimage) | |
82 | { | |
83 | unsigned long i; | |
84 | ||
85 | pr_debug("%s:\n", __func__); | |
86 | ||
87 | for (i = 0; i < kimage->nr_segments; i++) { | |
88 | pr_debug(" segment[%lu]: %016lx - %016lx, 0x%lx bytes, %lu pages\n", | |
89 | i, | |
90 | kimage->segment[i].mem, | |
91 | kimage->segment[i].mem + kimage->segment[i].memsz, | |
92 | kimage->segment[i].memsz, | |
93 | kimage->segment[i].memsz / PAGE_SIZE); | |
94 | ||
fade9c2c | 95 | dcache_clean_inval_poc( |
814b1860 FT |
96 | (unsigned long)phys_to_virt(kimage->segment[i].mem), |
97 | (unsigned long)phys_to_virt(kimage->segment[i].mem) + | |
98 | kimage->segment[i].memsz); | |
d28f6df1 GL |
99 | } |
100 | } | |
101 | ||
08eae0ef PT |
102 | /* Allocates pages for kexec page table */ |
103 | static void *kexec_page_alloc(void *arg) | |
104 | { | |
105 | struct kimage *kimage = (struct kimage *)arg; | |
106 | struct page *page = kimage_alloc_control_pages(kimage, 0); | |
7afccde3 | 107 | void *vaddr = NULL; |
08eae0ef PT |
108 | |
109 | if (!page) | |
110 | return NULL; | |
111 | ||
7afccde3 RW |
112 | vaddr = page_address(page); |
113 | memset(vaddr, 0, PAGE_SIZE); | |
08eae0ef | 114 | |
7afccde3 | 115 | return vaddr; |
08eae0ef PT |
116 | } |
117 | ||
0d8732e4 PT |
118 | int machine_kexec_post_load(struct kimage *kimage) |
119 | { | |
3744b528 PT |
120 | int rc; |
121 | pgd_t *trans_pgd; | |
0d8732e4 | 122 | void *reloc_code = page_to_virt(kimage->control_code_page); |
19a046f0 | 123 | long reloc_size; |
08eae0ef PT |
124 | struct trans_pgd_info info = { |
125 | .trans_alloc_page = kexec_page_alloc, | |
126 | .trans_alloc_arg = kimage, | |
127 | }; | |
0d8732e4 | 128 | |
5bb6834f PT |
129 | /* If in place, relocation is not used, only flush next kernel */ |
130 | if (kimage->head & IND_DONE) { | |
0d8732e4 | 131 | kexec_segment_flush(kimage); |
5bb6834f PT |
132 | kexec_image_info(kimage); |
133 | return 0; | |
134 | } | |
0d8732e4 | 135 | |
08eae0ef PT |
136 | kimage->arch.el2_vectors = 0; |
137 | if (is_hyp_nvhe()) { | |
3744b528 PT |
138 | rc = trans_pgd_copy_el2_vectors(&info, |
139 | &kimage->arch.el2_vectors); | |
08eae0ef PT |
140 | if (rc) |
141 | return rc; | |
142 | } | |
143 | ||
3744b528 PT |
144 | /* Create a copy of the linear map */ |
145 | trans_pgd = kexec_page_alloc(kimage); | |
146 | if (!trans_pgd) | |
147 | return -ENOMEM; | |
148 | rc = trans_pgd_create_copy(&info, &trans_pgd, PAGE_OFFSET, PAGE_END); | |
149 | if (rc) | |
150 | return rc; | |
151 | kimage->arch.ttbr1 = __pa(trans_pgd); | |
2f218324 | 152 | kimage->arch.zero_page = __pa_symbol(empty_zero_page); |
3744b528 | 153 | |
19a046f0 PT |
154 | reloc_size = __relocate_new_kernel_end - __relocate_new_kernel_start; |
155 | memcpy(reloc_code, __relocate_new_kernel_start, reloc_size); | |
0d8732e4 | 156 | kimage->arch.kern_reloc = __pa(reloc_code); |
efc2d0f2 PT |
157 | rc = trans_pgd_idmap_page(&info, &kimage->arch.ttbr0, |
158 | &kimage->arch.t0sz, reloc_code); | |
159 | if (rc) | |
160 | return rc; | |
161 | kimage->arch.phys_offset = virt_to_phys(kimage) - (long)kimage; | |
0d8732e4 PT |
162 | |
163 | /* Flush the reloc_code in preparation for its execution. */ | |
164 | dcache_clean_inval_poc((unsigned long)reloc_code, | |
19a046f0 | 165 | (unsigned long)reloc_code + reloc_size); |
0d8732e4 | 166 | icache_inval_pou((uintptr_t)reloc_code, |
19a046f0 | 167 | (uintptr_t)reloc_code + reloc_size); |
5bb6834f | 168 | kexec_image_info(kimage); |
0d8732e4 PT |
169 | |
170 | return 0; | |
171 | } | |
172 | ||
d28f6df1 GL |
173 | /** |
174 | * machine_kexec - Do the kexec reboot. | |
175 | * | |
176 | * Called from the core kexec code for a sys_reboot with LINUX_REBOOT_CMD_KEXEC. | |
177 | */ | |
178 | void machine_kexec(struct kimage *kimage) | |
179 | { | |
78fd584c AT |
180 | bool in_kexec_crash = (kimage == kexec_crash_image); |
181 | bool stuck_cpus = cpus_are_stuck_in_kernel(); | |
d28f6df1 GL |
182 | |
183 | /* | |
184 | * New cpus may have become stuck_in_kernel after we loaded the image. | |
185 | */ | |
78fd584c AT |
186 | BUG_ON(!in_kexec_crash && (stuck_cpus || (num_online_cpus() > 1))); |
187 | WARN(in_kexec_crash && (stuck_cpus || smp_crash_stop_failed()), | |
188 | "Some CPUs may be stale, kdump will be unreliable.\n"); | |
d28f6df1 | 189 | |
d28f6df1 GL |
190 | pr_info("Bye!\n"); |
191 | ||
0fbeb318 | 192 | local_daif_mask(); |
d28f6df1 GL |
193 | |
194 | /* | |
efc2d0f2 | 195 | * Both restart and kernel_reloc will shutdown the MMU, disable data |
5bb6834f | 196 | * caches. However, restart will start new kernel or purgatory directly, |
efc2d0f2 | 197 | * kernel_reloc contains the body of arm64_relocate_new_kernel |
4c9e7e64 AT |
198 | * In kexec case, kimage->start points to purgatory assuming that |
199 | * kernel entry and dtb address are embedded in purgatory by | |
200 | * userspace (kexec-tools). | |
201 | * In kexec_file case, the kernel starts directly without purgatory. | |
d28f6df1 | 202 | */ |
5bb6834f | 203 | if (kimage->head & IND_DONE) { |
7a2512fa | 204 | typeof(cpu_soft_restart) *restart; |
5bb6834f PT |
205 | |
206 | cpu_install_idmap(); | |
7a2512fa | 207 | restart = (void *)__pa_symbol(function_nocfi(cpu_soft_restart)); |
5bb6834f PT |
208 | restart(is_hyp_nvhe(), kimage->start, kimage->arch.dtb_mem, |
209 | 0, 0); | |
210 | } else { | |
efc2d0f2 PT |
211 | void (*kernel_reloc)(struct kimage *kimage); |
212 | ||
08eae0ef PT |
213 | if (is_hyp_nvhe()) |
214 | __hyp_set_vectors(kimage->arch.el2_vectors); | |
efc2d0f2 PT |
215 | cpu_install_ttbr0(kimage->arch.ttbr0, kimage->arch.t0sz); |
216 | kernel_reloc = (void *)kimage->arch.kern_reloc; | |
217 | kernel_reloc(kimage); | |
5bb6834f | 218 | } |
d28f6df1 GL |
219 | |
220 | BUG(); /* Should never get here. */ | |
221 | } | |
222 | ||
78fd584c AT |
223 | static void machine_kexec_mask_interrupts(void) |
224 | { | |
225 | unsigned int i; | |
226 | struct irq_desc *desc; | |
227 | ||
228 | for_each_irq_desc(i, desc) { | |
229 | struct irq_chip *chip; | |
230 | int ret; | |
231 | ||
232 | chip = irq_desc_get_chip(desc); | |
233 | if (!chip) | |
234 | continue; | |
235 | ||
236 | /* | |
237 | * First try to remove the active state. If this | |
238 | * fails, try to EOI the interrupt. | |
239 | */ | |
240 | ret = irq_set_irqchip_state(i, IRQCHIP_STATE_ACTIVE, false); | |
241 | ||
242 | if (ret && irqd_irq_inprogress(&desc->irq_data) && | |
243 | chip->irq_eoi) | |
244 | chip->irq_eoi(&desc->irq_data); | |
245 | ||
246 | if (chip->irq_mask) | |
247 | chip->irq_mask(&desc->irq_data); | |
248 | ||
249 | if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data)) | |
250 | chip->irq_disable(&desc->irq_data); | |
251 | } | |
252 | } | |
253 | ||
254 | /** | |
255 | * machine_crash_shutdown - shutdown non-crashing cpus and save registers | |
256 | */ | |
d28f6df1 GL |
257 | void machine_crash_shutdown(struct pt_regs *regs) |
258 | { | |
78fd584c AT |
259 | local_irq_disable(); |
260 | ||
261 | /* shutdown non-crashing cpus */ | |
a88ce63b | 262 | crash_smp_send_stop(); |
78fd584c AT |
263 | |
264 | /* for crashing cpu */ | |
265 | crash_save_cpu(regs, smp_processor_id()); | |
266 | machine_kexec_mask_interrupts(); | |
267 | ||
268 | pr_info("Starting crashdump kernel...\n"); | |
d28f6df1 | 269 | } |
98d2e153 TA |
270 | |
271 | void arch_kexec_protect_crashkres(void) | |
272 | { | |
273 | int i; | |
274 | ||
98d2e153 TA |
275 | for (i = 0; i < kexec_crash_image->nr_segments; i++) |
276 | set_memory_valid( | |
277 | __phys_to_virt(kexec_crash_image->segment[i].mem), | |
278 | kexec_crash_image->segment[i].memsz >> PAGE_SHIFT, 0); | |
279 | } | |
280 | ||
281 | void arch_kexec_unprotect_crashkres(void) | |
282 | { | |
283 | int i; | |
284 | ||
285 | for (i = 0; i < kexec_crash_image->nr_segments; i++) | |
286 | set_memory_valid( | |
287 | __phys_to_virt(kexec_crash_image->segment[i].mem), | |
288 | kexec_crash_image->segment[i].memsz >> PAGE_SHIFT, 1); | |
289 | } | |
254a41c0 AT |
290 | |
291 | #ifdef CONFIG_HIBERNATION | |
292 | /* | |
293 | * To preserve the crash dump kernel image, the relevant memory segments | |
294 | * should be mapped again around the hibernation. | |
295 | */ | |
296 | void crash_prepare_suspend(void) | |
297 | { | |
298 | if (kexec_crash_image) | |
299 | arch_kexec_unprotect_crashkres(); | |
300 | } | |
301 | ||
302 | void crash_post_resume(void) | |
303 | { | |
304 | if (kexec_crash_image) | |
305 | arch_kexec_protect_crashkres(); | |
306 | } | |
307 | ||
308 | /* | |
309 | * crash_is_nosave | |
310 | * | |
311 | * Return true only if a page is part of reserved memory for crash dump kernel, | |
312 | * but does not hold any data of loaded kernel image. | |
313 | * | |
314 | * Note that all the pages in crash dump kernel memory have been initially | |
d9fa9d95 | 315 | * marked as Reserved as memory was allocated via memblock_reserve(). |
254a41c0 AT |
316 | * |
317 | * In hibernation, the pages which are Reserved and yet "nosave" are excluded | |
318 | * from the hibernation iamge. crash_is_nosave() does thich check for crash | |
319 | * dump kernel and will reduce the total size of hibernation image. | |
320 | */ | |
321 | ||
322 | bool crash_is_nosave(unsigned long pfn) | |
323 | { | |
324 | int i; | |
325 | phys_addr_t addr; | |
326 | ||
327 | if (!crashk_res.end) | |
328 | return false; | |
329 | ||
330 | /* in reserved memory? */ | |
331 | addr = __pfn_to_phys(pfn); | |
944a45ab CZ |
332 | if ((addr < crashk_res.start) || (crashk_res.end < addr)) { |
333 | if (!crashk_low_res.end) | |
334 | return false; | |
335 | ||
336 | if ((addr < crashk_low_res.start) || (crashk_low_res.end < addr)) | |
337 | return false; | |
338 | } | |
254a41c0 AT |
339 | |
340 | if (!kexec_crash_image) | |
341 | return true; | |
342 | ||
343 | /* not part of loaded kernel image? */ | |
344 | for (i = 0; i < kexec_crash_image->nr_segments; i++) | |
345 | if (addr >= kexec_crash_image->segment[i].mem && | |
346 | addr < (kexec_crash_image->segment[i].mem + | |
347 | kexec_crash_image->segment[i].memsz)) | |
348 | return false; | |
349 | ||
350 | return true; | |
351 | } | |
352 | ||
353 | void crash_free_reserved_phys_range(unsigned long begin, unsigned long end) | |
354 | { | |
355 | unsigned long addr; | |
356 | struct page *page; | |
357 | ||
358 | for (addr = begin; addr < end; addr += PAGE_SIZE) { | |
359 | page = phys_to_page(addr); | |
254a41c0 AT |
360 | free_reserved_page(page); |
361 | } | |
362 | } | |
363 | #endif /* CONFIG_HIBERNATION */ |