Commit | Line | Data |
---|---|---|
2874c5fd | 1 | // SPDX-License-Identifier: GPL-2.0-or-later |
aaddd3ea ME |
2 | /* |
3 | * Copyright 2008 Michael Ellerman, IBM Corporation. | |
aaddd3ea ME |
4 | */ |
5 | ||
71f6e58e | 6 | #include <linux/kprobes.h> |
c28c15b6 CR |
7 | #include <linux/mmu_context.h> |
8 | #include <linux/random.h> | |
ae0dc736 ME |
9 | #include <linux/vmalloc.h> |
10 | #include <linux/init.h> | |
37bc3e5f | 11 | #include <linux/cpuhotplug.h> |
7c0f6ba6 | 12 | #include <linux/uaccess.h> |
b0337678 | 13 | #include <linux/jump_label.h> |
aaddd3ea | 14 | |
c28c15b6 CR |
15 | #include <asm/debug.h> |
16 | #include <asm/pgalloc.h> | |
17 | #include <asm/tlb.h> | |
37bc3e5f BS |
18 | #include <asm/tlbflush.h> |
19 | #include <asm/page.h> | |
20 | #include <asm/code-patching.h> | |
75346251 | 21 | #include <asm/inst.h> |
aaddd3ea | 22 | |
c545b9f0 | 23 | static int __patch_instruction(u32 *exec_addr, ppc_inst_t instr, u32 *patch_addr) |
aaddd3ea | 24 | { |
e63ceebd CL |
25 | if (!ppc_inst_prefixed(instr)) { |
26 | u32 val = ppc_inst_val(instr); | |
27 | ||
28 | __put_kernel_nofault(patch_addr, &val, u32, failed); | |
29 | } else { | |
693557eb | 30 | u64 val = ppc_inst_as_ulong(instr); |
e63ceebd CL |
31 | |
32 | __put_kernel_nofault(patch_addr, &val, u64, failed); | |
33 | } | |
37bc3e5f | 34 | |
8cf4c057 CL |
35 | asm ("dcbst 0, %0; sync; icbi 0,%1; sync; isync" :: "r" (patch_addr), |
36 | "r" (exec_addr)); | |
37bc3e5f BS |
37 | |
38 | return 0; | |
e64ac41a CL |
39 | |
40 | failed: | |
74726fda | 41 | mb(); /* sync */ |
bbffdd2f | 42 | return -EPERM; |
37bc3e5f BS |
43 | } |
44 | ||
c545b9f0 | 45 | int raw_patch_instruction(u32 *addr, ppc_inst_t instr) |
8cf4c057 CL |
46 | { |
47 | return __patch_instruction(addr, instr, addr); | |
48 | } | |
49 | ||
2f228ee1 BG |
50 | struct patch_context { |
51 | union { | |
52 | struct vm_struct *area; | |
53 | struct mm_struct *mm; | |
54 | }; | |
55 | unsigned long addr; | |
56 | pte_t *pte; | |
57 | }; | |
58 | ||
59 | static DEFINE_PER_CPU(struct patch_context, cpu_patching_context); | |
37bc3e5f | 60 | |
591b4b26 ME |
61 | static int map_patch_area(void *addr, unsigned long text_poke_addr); |
62 | static void unmap_patch_area(unsigned long addr); | |
63 | ||
c28c15b6 CR |
64 | static bool mm_patch_enabled(void) |
65 | { | |
66 | return IS_ENABLED(CONFIG_SMP) && radix_enabled(); | |
67 | } | |
68 | ||
69 | /* | |
70 | * The following applies for Radix MMU. Hash MMU has different requirements, | |
71 | * and so is not supported. | |
72 | * | |
73 | * Changing mm requires context synchronising instructions on both sides of | |
74 | * the context switch, as well as a hwsync between the last instruction for | |
75 | * which the address of an associated storage access was translated using | |
76 | * the current context. | |
77 | * | |
78 | * switch_mm_irqs_off() performs an isync after the context switch. It is | |
79 | * the responsibility of the caller to perform the CSI and hwsync before | |
80 | * starting/stopping the temp mm. | |
81 | */ | |
82 | static struct mm_struct *start_using_temp_mm(struct mm_struct *temp_mm) | |
83 | { | |
84 | struct mm_struct *orig_mm = current->active_mm; | |
85 | ||
86 | lockdep_assert_irqs_disabled(); | |
87 | switch_mm_irqs_off(orig_mm, temp_mm, current); | |
88 | ||
89 | WARN_ON(!mm_is_thread_local(temp_mm)); | |
90 | ||
91 | suspend_breakpoints(); | |
92 | return orig_mm; | |
93 | } | |
94 | ||
95 | static void stop_using_temp_mm(struct mm_struct *temp_mm, | |
96 | struct mm_struct *orig_mm) | |
97 | { | |
98 | lockdep_assert_irqs_disabled(); | |
99 | switch_mm_irqs_off(temp_mm, orig_mm, current); | |
100 | restore_breakpoints(); | |
101 | } | |
102 | ||
37bc3e5f BS |
103 | static int text_area_cpu_up(unsigned int cpu) |
104 | { | |
105 | struct vm_struct *area; | |
591b4b26 ME |
106 | unsigned long addr; |
107 | int err; | |
37bc3e5f BS |
108 | |
109 | area = get_vm_area(PAGE_SIZE, VM_ALLOC); | |
110 | if (!area) { | |
111 | WARN_ONCE(1, "Failed to create text area for cpu %d\n", | |
112 | cpu); | |
113 | return -1; | |
114 | } | |
591b4b26 ME |
115 | |
116 | // Map/unmap the area to ensure all page tables are pre-allocated | |
117 | addr = (unsigned long)area->addr; | |
118 | err = map_patch_area(empty_zero_page, addr); | |
119 | if (err) | |
120 | return err; | |
121 | ||
122 | unmap_patch_area(addr); | |
123 | ||
2f228ee1 BG |
124 | this_cpu_write(cpu_patching_context.area, area); |
125 | this_cpu_write(cpu_patching_context.addr, addr); | |
126 | this_cpu_write(cpu_patching_context.pte, virt_to_kpte(addr)); | |
37bc3e5f BS |
127 | |
128 | return 0; | |
129 | } | |
130 | ||
131 | static int text_area_cpu_down(unsigned int cpu) | |
132 | { | |
2f228ee1 BG |
133 | free_vm_area(this_cpu_read(cpu_patching_context.area)); |
134 | this_cpu_write(cpu_patching_context.area, NULL); | |
135 | this_cpu_write(cpu_patching_context.addr, 0); | |
136 | this_cpu_write(cpu_patching_context.pte, NULL); | |
37bc3e5f BS |
137 | return 0; |
138 | } | |
139 | ||
c28c15b6 CR |
140 | static void put_patching_mm(struct mm_struct *mm, unsigned long patching_addr) |
141 | { | |
142 | struct mmu_gather tlb; | |
143 | ||
144 | tlb_gather_mmu(&tlb, mm); | |
145 | free_pgd_range(&tlb, patching_addr, patching_addr + PAGE_SIZE, 0, 0); | |
146 | mmput(mm); | |
147 | } | |
148 | ||
149 | static int text_area_cpu_up_mm(unsigned int cpu) | |
150 | { | |
151 | struct mm_struct *mm; | |
152 | unsigned long addr; | |
153 | pte_t *pte; | |
154 | spinlock_t *ptl; | |
155 | ||
156 | mm = mm_alloc(); | |
157 | if (WARN_ON(!mm)) | |
158 | goto fail_no_mm; | |
159 | ||
160 | /* | |
161 | * Choose a random page-aligned address from the interval | |
162 | * [PAGE_SIZE .. DEFAULT_MAP_WINDOW - PAGE_SIZE]. | |
163 | * The lower address bound is PAGE_SIZE to avoid the zero-page. | |
164 | */ | |
165 | addr = (1 + (get_random_long() % (DEFAULT_MAP_WINDOW / PAGE_SIZE - 2))) << PAGE_SHIFT; | |
166 | ||
167 | /* | |
168 | * PTE allocation uses GFP_KERNEL which means we need to | |
169 | * pre-allocate the PTE here because we cannot do the | |
170 | * allocation during patching when IRQs are disabled. | |
171 | * | |
172 | * Using get_locked_pte() to avoid open coding, the lock | |
173 | * is unnecessary. | |
174 | */ | |
175 | pte = get_locked_pte(mm, addr, &ptl); | |
176 | if (!pte) | |
177 | goto fail_no_pte; | |
178 | pte_unmap_unlock(pte, ptl); | |
179 | ||
2f228ee1 BG |
180 | this_cpu_write(cpu_patching_context.mm, mm); |
181 | this_cpu_write(cpu_patching_context.addr, addr); | |
c28c15b6 CR |
182 | |
183 | return 0; | |
184 | ||
185 | fail_no_pte: | |
186 | put_patching_mm(mm, addr); | |
187 | fail_no_mm: | |
188 | return -ENOMEM; | |
189 | } | |
190 | ||
191 | static int text_area_cpu_down_mm(unsigned int cpu) | |
192 | { | |
2f228ee1 BG |
193 | put_patching_mm(this_cpu_read(cpu_patching_context.mm), |
194 | this_cpu_read(cpu_patching_context.addr)); | |
c28c15b6 | 195 | |
2f228ee1 BG |
196 | this_cpu_write(cpu_patching_context.mm, NULL); |
197 | this_cpu_write(cpu_patching_context.addr, 0); | |
c28c15b6 CR |
198 | |
199 | return 0; | |
200 | } | |
201 | ||
17512892 CL |
202 | static __ro_after_init DEFINE_STATIC_KEY_FALSE(poking_init_done); |
203 | ||
71a5b3db | 204 | void __init poking_init(void) |
37bc3e5f | 205 | { |
c28c15b6 CR |
206 | int ret; |
207 | ||
84ecfe6f CL |
208 | if (!IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) |
209 | return; | |
210 | ||
c28c15b6 CR |
211 | if (mm_patch_enabled()) |
212 | ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, | |
213 | "powerpc/text_poke_mm:online", | |
214 | text_area_cpu_up_mm, | |
215 | text_area_cpu_down_mm); | |
216 | else | |
217 | ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, | |
218 | "powerpc/text_poke:online", | |
219 | text_area_cpu_up, | |
220 | text_area_cpu_down); | |
071c95c1 BG |
221 | |
222 | /* cpuhp_setup_state returns >= 0 on success */ | |
223 | if (WARN_ON(ret < 0)) | |
224 | return; | |
225 | ||
17512892 | 226 | static_branch_enable(&poking_init_done); |
37bc3e5f | 227 | } |
37bc3e5f | 228 | |
8b4bb0ad CL |
229 | static unsigned long get_patch_pfn(void *addr) |
230 | { | |
231 | if (IS_ENABLED(CONFIG_MODULES) && is_vmalloc_or_module_addr(addr)) | |
232 | return vmalloc_to_pfn(addr); | |
233 | else | |
234 | return __pa_symbol(addr) >> PAGE_SHIFT; | |
235 | } | |
236 | ||
37bc3e5f BS |
237 | /* |
238 | * This can be called for kernel text or a module. | |
239 | */ | |
240 | static int map_patch_area(void *addr, unsigned long text_poke_addr) | |
241 | { | |
8b4bb0ad | 242 | unsigned long pfn = get_patch_pfn(addr); |
37bc3e5f | 243 | |
285672f9 | 244 | return map_kernel_page(text_poke_addr, (pfn << PAGE_SHIFT), PAGE_KERNEL); |
37bc3e5f BS |
245 | } |
246 | ||
a3483c3d | 247 | static void unmap_patch_area(unsigned long addr) |
37bc3e5f BS |
248 | { |
249 | pte_t *ptep; | |
250 | pmd_t *pmdp; | |
251 | pud_t *pudp; | |
2fb47060 | 252 | p4d_t *p4dp; |
37bc3e5f BS |
253 | pgd_t *pgdp; |
254 | ||
255 | pgdp = pgd_offset_k(addr); | |
a3483c3d CL |
256 | if (WARN_ON(pgd_none(*pgdp))) |
257 | return; | |
37bc3e5f | 258 | |
2fb47060 | 259 | p4dp = p4d_offset(pgdp, addr); |
a3483c3d CL |
260 | if (WARN_ON(p4d_none(*p4dp))) |
261 | return; | |
2fb47060 MR |
262 | |
263 | pudp = pud_offset(p4dp, addr); | |
a3483c3d CL |
264 | if (WARN_ON(pud_none(*pudp))) |
265 | return; | |
37bc3e5f BS |
266 | |
267 | pmdp = pmd_offset(pudp, addr); | |
a3483c3d CL |
268 | if (WARN_ON(pmd_none(*pmdp))) |
269 | return; | |
37bc3e5f BS |
270 | |
271 | ptep = pte_offset_kernel(pmdp, addr); | |
a3483c3d CL |
272 | if (WARN_ON(pte_none(*ptep))) |
273 | return; | |
37bc3e5f | 274 | |
37bc3e5f BS |
275 | /* |
276 | * In hash, pte_clear flushes the tlb, in radix, we have to | |
277 | */ | |
278 | pte_clear(&init_mm, addr, ptep); | |
279 | flush_tlb_kernel_range(addr, addr + PAGE_SIZE); | |
aaddd3ea ME |
280 | } |
281 | ||
c28c15b6 CR |
282 | static int __do_patch_instruction_mm(u32 *addr, ppc_inst_t instr) |
283 | { | |
284 | int err; | |
285 | u32 *patch_addr; | |
286 | unsigned long text_poke_addr; | |
287 | pte_t *pte; | |
288 | unsigned long pfn = get_patch_pfn(addr); | |
289 | struct mm_struct *patching_mm; | |
290 | struct mm_struct *orig_mm; | |
980411a4 | 291 | spinlock_t *ptl; |
c28c15b6 | 292 | |
2f228ee1 | 293 | patching_mm = __this_cpu_read(cpu_patching_context.mm); |
2f228ee1 | 294 | text_poke_addr = __this_cpu_read(cpu_patching_context.addr); |
c28c15b6 CR |
295 | patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr)); |
296 | ||
980411a4 ME |
297 | pte = get_locked_pte(patching_mm, text_poke_addr, &ptl); |
298 | if (!pte) | |
299 | return -ENOMEM; | |
300 | ||
c28c15b6 CR |
301 | __set_pte_at(patching_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0); |
302 | ||
303 | /* order PTE update before use, also serves as the hwsync */ | |
304 | asm volatile("ptesync": : :"memory"); | |
305 | ||
306 | /* order context switch after arbitrary prior code */ | |
307 | isync(); | |
308 | ||
309 | orig_mm = start_using_temp_mm(patching_mm); | |
310 | ||
311 | err = __patch_instruction(addr, instr, patch_addr); | |
312 | ||
c28c15b6 CR |
313 | /* context synchronisation performed by __patch_instruction (isync or exception) */ |
314 | stop_using_temp_mm(patching_mm, orig_mm); | |
315 | ||
316 | pte_clear(patching_mm, text_poke_addr, pte); | |
317 | /* | |
318 | * ptesync to order PTE update before TLB invalidation done | |
319 | * by radix__local_flush_tlb_page_psize (in _tlbiel_va) | |
320 | */ | |
321 | local_flush_tlb_page_psize(patching_mm, text_poke_addr, mmu_virtual_psize); | |
322 | ||
980411a4 ME |
323 | pte_unmap_unlock(pte, ptl); |
324 | ||
c28c15b6 CR |
325 | return err; |
326 | } | |
327 | ||
6b21af74 CL |
328 | static int __do_patch_instruction(u32 *addr, ppc_inst_t instr) |
329 | { | |
330 | int err; | |
331 | u32 *patch_addr; | |
332 | unsigned long text_poke_addr; | |
8b4bb0ad CL |
333 | pte_t *pte; |
334 | unsigned long pfn = get_patch_pfn(addr); | |
6b21af74 | 335 | |
2f228ee1 | 336 | text_poke_addr = (unsigned long)__this_cpu_read(cpu_patching_context.addr) & PAGE_MASK; |
6b21af74 CL |
337 | patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr)); |
338 | ||
2f228ee1 | 339 | pte = __this_cpu_read(cpu_patching_context.pte); |
8b4bb0ad CL |
340 | __set_pte_at(&init_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0); |
341 | /* See ptesync comment in radix__set_pte_at() */ | |
342 | if (radix_enabled()) | |
343 | asm volatile("ptesync": : :"memory"); | |
6b21af74 CL |
344 | |
345 | err = __patch_instruction(addr, instr, patch_addr); | |
346 | ||
8b4bb0ad CL |
347 | pte_clear(&init_mm, text_poke_addr, pte); |
348 | flush_tlb_kernel_range(text_poke_addr, text_poke_addr + PAGE_SIZE); | |
6b21af74 CL |
349 | |
350 | return err; | |
351 | } | |
352 | ||
6f3a81b6 | 353 | int patch_instruction(u32 *addr, ppc_inst_t instr) |
37bc3e5f BS |
354 | { |
355 | int err; | |
37bc3e5f | 356 | unsigned long flags; |
37bc3e5f BS |
357 | |
358 | /* | |
359 | * During early early boot patch_instruction is called | |
360 | * when text_poke_area is not ready, but we still need | |
361 | * to allow patching. We just do the plain old patching | |
37bc3e5f | 362 | */ |
84ecfe6f CL |
363 | if (!IS_ENABLED(CONFIG_STRICT_KERNEL_RWX) || |
364 | !static_branch_likely(&poking_init_done)) | |
8cf4c057 | 365 | return raw_patch_instruction(addr, instr); |
37bc3e5f BS |
366 | |
367 | local_irq_save(flags); | |
c28c15b6 CR |
368 | if (mm_patch_enabled()) |
369 | err = __do_patch_instruction_mm(addr, instr); | |
370 | else | |
371 | err = __do_patch_instruction(addr, instr); | |
37bc3e5f BS |
372 | local_irq_restore(flags); |
373 | ||
374 | return err; | |
375 | } | |
37bc3e5f BS |
376 | NOKPROBE_SYMBOL(patch_instruction); |
377 | ||
69d4d6e5 | 378 | int patch_branch(u32 *addr, unsigned long target, int flags) |
e7a57273 | 379 | { |
c545b9f0 | 380 | ppc_inst_t instr; |
7c95d889 | 381 | |
d5937db1 CL |
382 | if (create_branch(&instr, addr, target, flags)) |
383 | return -ERANGE; | |
384 | ||
7c95d889 | 385 | return patch_instruction(addr, instr); |
e7a57273 ME |
386 | } |
387 | ||
51c9c084 A |
388 | /* |
389 | * Helper to check if a given instruction is a conditional branch | |
390 | * Derived from the conditional checks in analyse_instr() | |
391 | */ | |
c545b9f0 | 392 | bool is_conditional_branch(ppc_inst_t instr) |
51c9c084 | 393 | { |
8094892d | 394 | unsigned int opcode = ppc_inst_primary_opcode(instr); |
51c9c084 A |
395 | |
396 | if (opcode == 16) /* bc, bca, bcl, bcla */ | |
397 | return true; | |
398 | if (opcode == 19) { | |
777e26f0 | 399 | switch ((ppc_inst_val(instr) >> 1) & 0x3ff) { |
51c9c084 A |
400 | case 16: /* bclr, bclrl */ |
401 | case 528: /* bcctr, bcctrl */ | |
402 | case 560: /* bctar, bctarl */ | |
403 | return true; | |
404 | } | |
405 | } | |
406 | return false; | |
407 | } | |
71f6e58e | 408 | NOKPROBE_SYMBOL(is_conditional_branch); |
51c9c084 | 409 | |
c545b9f0 | 410 | int create_cond_branch(ppc_inst_t *instr, const u32 *addr, |
7c95d889 | 411 | unsigned long target, int flags) |
411781a2 | 412 | { |
411781a2 ME |
413 | long offset; |
414 | ||
415 | offset = target; | |
416 | if (! (flags & BRANCH_ABSOLUTE)) | |
417 | offset = offset - (unsigned long)addr; | |
418 | ||
419 | /* Check we can represent the target in the instruction format */ | |
4549c3ea | 420 | if (!is_offset_in_cond_branch_range(offset)) |
7c95d889 | 421 | return 1; |
411781a2 ME |
422 | |
423 | /* Mask out the flags and target, so they don't step on each other. */ | |
94afd069 | 424 | *instr = ppc_inst(0x40000000 | (flags & 0x3FF0003) | (offset & 0xFFFC)); |
411781a2 | 425 | |
7c95d889 | 426 | return 0; |
411781a2 ME |
427 | } |
428 | ||
c545b9f0 | 429 | int instr_is_relative_branch(ppc_inst_t instr) |
411781a2 | 430 | { |
777e26f0 | 431 | if (ppc_inst_val(instr) & BRANCH_ABSOLUTE) |
411781a2 ME |
432 | return 0; |
433 | ||
434 | return instr_is_branch_iform(instr) || instr_is_branch_bform(instr); | |
435 | } | |
436 | ||
c545b9f0 | 437 | int instr_is_relative_link_branch(ppc_inst_t instr) |
b9eab08d | 438 | { |
777e26f0 | 439 | return instr_is_relative_branch(instr) && (ppc_inst_val(instr) & BRANCH_SET_LINK); |
b9eab08d JP |
440 | } |
441 | ||
69d4d6e5 | 442 | static unsigned long branch_iform_target(const u32 *instr) |
411781a2 ME |
443 | { |
444 | signed long imm; | |
445 | ||
18c85964 | 446 | imm = ppc_inst_val(ppc_inst_read(instr)) & 0x3FFFFFC; |
411781a2 ME |
447 | |
448 | /* If the top bit of the immediate value is set this is negative */ | |
449 | if (imm & 0x2000000) | |
450 | imm -= 0x4000000; | |
451 | ||
18c85964 | 452 | if ((ppc_inst_val(ppc_inst_read(instr)) & BRANCH_ABSOLUTE) == 0) |
411781a2 ME |
453 | imm += (unsigned long)instr; |
454 | ||
455 | return (unsigned long)imm; | |
456 | } | |
457 | ||
69d4d6e5 | 458 | static unsigned long branch_bform_target(const u32 *instr) |
411781a2 ME |
459 | { |
460 | signed long imm; | |
461 | ||
18c85964 | 462 | imm = ppc_inst_val(ppc_inst_read(instr)) & 0xFFFC; |
411781a2 ME |
463 | |
464 | /* If the top bit of the immediate value is set this is negative */ | |
465 | if (imm & 0x8000) | |
466 | imm -= 0x10000; | |
467 | ||
18c85964 | 468 | if ((ppc_inst_val(ppc_inst_read(instr)) & BRANCH_ABSOLUTE) == 0) |
411781a2 ME |
469 | imm += (unsigned long)instr; |
470 | ||
471 | return (unsigned long)imm; | |
472 | } | |
473 | ||
69d4d6e5 | 474 | unsigned long branch_target(const u32 *instr) |
411781a2 | 475 | { |
f8faaffa | 476 | if (instr_is_branch_iform(ppc_inst_read(instr))) |
411781a2 | 477 | return branch_iform_target(instr); |
f8faaffa | 478 | else if (instr_is_branch_bform(ppc_inst_read(instr))) |
411781a2 ME |
479 | return branch_bform_target(instr); |
480 | ||
481 | return 0; | |
482 | } | |
483 | ||
c545b9f0 | 484 | int translate_branch(ppc_inst_t *instr, const u32 *dest, const u32 *src) |
411781a2 ME |
485 | { |
486 | unsigned long target; | |
411781a2 ME |
487 | target = branch_target(src); |
488 | ||
f8faaffa JN |
489 | if (instr_is_branch_iform(ppc_inst_read(src))) |
490 | return create_branch(instr, dest, target, | |
491 | ppc_inst_val(ppc_inst_read(src))); | |
492 | else if (instr_is_branch_bform(ppc_inst_read(src))) | |
493 | return create_cond_branch(instr, dest, target, | |
494 | ppc_inst_val(ppc_inst_read(src))); | |
411781a2 | 495 | |
7c95d889 | 496 | return 1; |
411781a2 | 497 | } |