Commit | Line | Data |
---|---|---|
2874c5fd | 1 | // SPDX-License-Identifier: GPL-2.0-or-later |
aaddd3ea ME |
2 | /* |
3 | * Copyright 2008 Michael Ellerman, IBM Corporation. | |
aaddd3ea ME |
4 | */ |
5 | ||
71f6e58e | 6 | #include <linux/kprobes.h> |
c28c15b6 CR |
7 | #include <linux/mmu_context.h> |
8 | #include <linux/random.h> | |
ae0dc736 ME |
9 | #include <linux/vmalloc.h> |
10 | #include <linux/init.h> | |
37bc3e5f | 11 | #include <linux/cpuhotplug.h> |
7c0f6ba6 | 12 | #include <linux/uaccess.h> |
b0337678 | 13 | #include <linux/jump_label.h> |
aaddd3ea | 14 | |
c28c15b6 CR |
15 | #include <asm/debug.h> |
16 | #include <asm/pgalloc.h> | |
17 | #include <asm/tlb.h> | |
37bc3e5f BS |
18 | #include <asm/tlbflush.h> |
19 | #include <asm/page.h> | |
20 | #include <asm/code-patching.h> | |
75346251 | 21 | #include <asm/inst.h> |
aaddd3ea | 22 | |
c545b9f0 | 23 | static int __patch_instruction(u32 *exec_addr, ppc_inst_t instr, u32 *patch_addr) |
aaddd3ea | 24 | { |
e63ceebd CL |
25 | if (!ppc_inst_prefixed(instr)) { |
26 | u32 val = ppc_inst_val(instr); | |
27 | ||
28 | __put_kernel_nofault(patch_addr, &val, u32, failed); | |
29 | } else { | |
693557eb | 30 | u64 val = ppc_inst_as_ulong(instr); |
e63ceebd CL |
31 | |
32 | __put_kernel_nofault(patch_addr, &val, u64, failed); | |
33 | } | |
37bc3e5f | 34 | |
8cf4c057 CL |
35 | asm ("dcbst 0, %0; sync; icbi 0,%1; sync; isync" :: "r" (patch_addr), |
36 | "r" (exec_addr)); | |
37bc3e5f BS |
37 | |
38 | return 0; | |
e64ac41a CL |
39 | |
40 | failed: | |
bbffdd2f | 41 | return -EPERM; |
37bc3e5f BS |
42 | } |
43 | ||
c545b9f0 | 44 | int raw_patch_instruction(u32 *addr, ppc_inst_t instr) |
8cf4c057 CL |
45 | { |
46 | return __patch_instruction(addr, instr, addr); | |
47 | } | |
48 | ||
2f228ee1 BG |
49 | struct patch_context { |
50 | union { | |
51 | struct vm_struct *area; | |
52 | struct mm_struct *mm; | |
53 | }; | |
54 | unsigned long addr; | |
55 | pte_t *pte; | |
56 | }; | |
57 | ||
58 | static DEFINE_PER_CPU(struct patch_context, cpu_patching_context); | |
37bc3e5f | 59 | |
591b4b26 ME |
60 | static int map_patch_area(void *addr, unsigned long text_poke_addr); |
61 | static void unmap_patch_area(unsigned long addr); | |
62 | ||
c28c15b6 CR |
63 | static bool mm_patch_enabled(void) |
64 | { | |
65 | return IS_ENABLED(CONFIG_SMP) && radix_enabled(); | |
66 | } | |
67 | ||
68 | /* | |
69 | * The following applies for Radix MMU. Hash MMU has different requirements, | |
70 | * and so is not supported. | |
71 | * | |
72 | * Changing mm requires context synchronising instructions on both sides of | |
73 | * the context switch, as well as a hwsync between the last instruction for | |
74 | * which the address of an associated storage access was translated using | |
75 | * the current context. | |
76 | * | |
77 | * switch_mm_irqs_off() performs an isync after the context switch. It is | |
78 | * the responsibility of the caller to perform the CSI and hwsync before | |
79 | * starting/stopping the temp mm. | |
80 | */ | |
81 | static struct mm_struct *start_using_temp_mm(struct mm_struct *temp_mm) | |
82 | { | |
83 | struct mm_struct *orig_mm = current->active_mm; | |
84 | ||
85 | lockdep_assert_irqs_disabled(); | |
86 | switch_mm_irqs_off(orig_mm, temp_mm, current); | |
87 | ||
88 | WARN_ON(!mm_is_thread_local(temp_mm)); | |
89 | ||
90 | suspend_breakpoints(); | |
91 | return orig_mm; | |
92 | } | |
93 | ||
94 | static void stop_using_temp_mm(struct mm_struct *temp_mm, | |
95 | struct mm_struct *orig_mm) | |
96 | { | |
97 | lockdep_assert_irqs_disabled(); | |
98 | switch_mm_irqs_off(temp_mm, orig_mm, current); | |
99 | restore_breakpoints(); | |
100 | } | |
101 | ||
37bc3e5f BS |
102 | static int text_area_cpu_up(unsigned int cpu) |
103 | { | |
104 | struct vm_struct *area; | |
591b4b26 ME |
105 | unsigned long addr; |
106 | int err; | |
37bc3e5f BS |
107 | |
108 | area = get_vm_area(PAGE_SIZE, VM_ALLOC); | |
109 | if (!area) { | |
110 | WARN_ONCE(1, "Failed to create text area for cpu %d\n", | |
111 | cpu); | |
112 | return -1; | |
113 | } | |
591b4b26 ME |
114 | |
115 | // Map/unmap the area to ensure all page tables are pre-allocated | |
116 | addr = (unsigned long)area->addr; | |
117 | err = map_patch_area(empty_zero_page, addr); | |
118 | if (err) | |
119 | return err; | |
120 | ||
121 | unmap_patch_area(addr); | |
122 | ||
2f228ee1 BG |
123 | this_cpu_write(cpu_patching_context.area, area); |
124 | this_cpu_write(cpu_patching_context.addr, addr); | |
125 | this_cpu_write(cpu_patching_context.pte, virt_to_kpte(addr)); | |
37bc3e5f BS |
126 | |
127 | return 0; | |
128 | } | |
129 | ||
130 | static int text_area_cpu_down(unsigned int cpu) | |
131 | { | |
2f228ee1 BG |
132 | free_vm_area(this_cpu_read(cpu_patching_context.area)); |
133 | this_cpu_write(cpu_patching_context.area, NULL); | |
134 | this_cpu_write(cpu_patching_context.addr, 0); | |
135 | this_cpu_write(cpu_patching_context.pte, NULL); | |
37bc3e5f BS |
136 | return 0; |
137 | } | |
138 | ||
c28c15b6 CR |
139 | static void put_patching_mm(struct mm_struct *mm, unsigned long patching_addr) |
140 | { | |
141 | struct mmu_gather tlb; | |
142 | ||
143 | tlb_gather_mmu(&tlb, mm); | |
144 | free_pgd_range(&tlb, patching_addr, patching_addr + PAGE_SIZE, 0, 0); | |
145 | mmput(mm); | |
146 | } | |
147 | ||
148 | static int text_area_cpu_up_mm(unsigned int cpu) | |
149 | { | |
150 | struct mm_struct *mm; | |
151 | unsigned long addr; | |
152 | pte_t *pte; | |
153 | spinlock_t *ptl; | |
154 | ||
155 | mm = mm_alloc(); | |
156 | if (WARN_ON(!mm)) | |
157 | goto fail_no_mm; | |
158 | ||
159 | /* | |
160 | * Choose a random page-aligned address from the interval | |
161 | * [PAGE_SIZE .. DEFAULT_MAP_WINDOW - PAGE_SIZE]. | |
162 | * The lower address bound is PAGE_SIZE to avoid the zero-page. | |
163 | */ | |
164 | addr = (1 + (get_random_long() % (DEFAULT_MAP_WINDOW / PAGE_SIZE - 2))) << PAGE_SHIFT; | |
165 | ||
166 | /* | |
167 | * PTE allocation uses GFP_KERNEL which means we need to | |
168 | * pre-allocate the PTE here because we cannot do the | |
169 | * allocation during patching when IRQs are disabled. | |
170 | * | |
171 | * Using get_locked_pte() to avoid open coding, the lock | |
172 | * is unnecessary. | |
173 | */ | |
174 | pte = get_locked_pte(mm, addr, &ptl); | |
175 | if (!pte) | |
176 | goto fail_no_pte; | |
177 | pte_unmap_unlock(pte, ptl); | |
178 | ||
2f228ee1 BG |
179 | this_cpu_write(cpu_patching_context.mm, mm); |
180 | this_cpu_write(cpu_patching_context.addr, addr); | |
c28c15b6 CR |
181 | |
182 | return 0; | |
183 | ||
184 | fail_no_pte: | |
185 | put_patching_mm(mm, addr); | |
186 | fail_no_mm: | |
187 | return -ENOMEM; | |
188 | } | |
189 | ||
190 | static int text_area_cpu_down_mm(unsigned int cpu) | |
191 | { | |
2f228ee1 BG |
192 | put_patching_mm(this_cpu_read(cpu_patching_context.mm), |
193 | this_cpu_read(cpu_patching_context.addr)); | |
c28c15b6 | 194 | |
2f228ee1 BG |
195 | this_cpu_write(cpu_patching_context.mm, NULL); |
196 | this_cpu_write(cpu_patching_context.addr, 0); | |
c28c15b6 CR |
197 | |
198 | return 0; | |
199 | } | |
200 | ||
17512892 CL |
201 | static __ro_after_init DEFINE_STATIC_KEY_FALSE(poking_init_done); |
202 | ||
71a5b3db | 203 | void __init poking_init(void) |
37bc3e5f | 204 | { |
c28c15b6 CR |
205 | int ret; |
206 | ||
84ecfe6f CL |
207 | if (!IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) |
208 | return; | |
209 | ||
c28c15b6 CR |
210 | if (mm_patch_enabled()) |
211 | ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, | |
212 | "powerpc/text_poke_mm:online", | |
213 | text_area_cpu_up_mm, | |
214 | text_area_cpu_down_mm); | |
215 | else | |
216 | ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, | |
217 | "powerpc/text_poke:online", | |
218 | text_area_cpu_up, | |
219 | text_area_cpu_down); | |
071c95c1 BG |
220 | |
221 | /* cpuhp_setup_state returns >= 0 on success */ | |
222 | if (WARN_ON(ret < 0)) | |
223 | return; | |
224 | ||
17512892 | 225 | static_branch_enable(&poking_init_done); |
37bc3e5f | 226 | } |
37bc3e5f | 227 | |
8b4bb0ad CL |
228 | static unsigned long get_patch_pfn(void *addr) |
229 | { | |
230 | if (IS_ENABLED(CONFIG_MODULES) && is_vmalloc_or_module_addr(addr)) | |
231 | return vmalloc_to_pfn(addr); | |
232 | else | |
233 | return __pa_symbol(addr) >> PAGE_SHIFT; | |
234 | } | |
235 | ||
37bc3e5f BS |
236 | /* |
237 | * This can be called for kernel text or a module. | |
238 | */ | |
239 | static int map_patch_area(void *addr, unsigned long text_poke_addr) | |
240 | { | |
8b4bb0ad | 241 | unsigned long pfn = get_patch_pfn(addr); |
37bc3e5f | 242 | |
285672f9 | 243 | return map_kernel_page(text_poke_addr, (pfn << PAGE_SHIFT), PAGE_KERNEL); |
37bc3e5f BS |
244 | } |
245 | ||
a3483c3d | 246 | static void unmap_patch_area(unsigned long addr) |
37bc3e5f BS |
247 | { |
248 | pte_t *ptep; | |
249 | pmd_t *pmdp; | |
250 | pud_t *pudp; | |
2fb47060 | 251 | p4d_t *p4dp; |
37bc3e5f BS |
252 | pgd_t *pgdp; |
253 | ||
254 | pgdp = pgd_offset_k(addr); | |
a3483c3d CL |
255 | if (WARN_ON(pgd_none(*pgdp))) |
256 | return; | |
37bc3e5f | 257 | |
2fb47060 | 258 | p4dp = p4d_offset(pgdp, addr); |
a3483c3d CL |
259 | if (WARN_ON(p4d_none(*p4dp))) |
260 | return; | |
2fb47060 MR |
261 | |
262 | pudp = pud_offset(p4dp, addr); | |
a3483c3d CL |
263 | if (WARN_ON(pud_none(*pudp))) |
264 | return; | |
37bc3e5f BS |
265 | |
266 | pmdp = pmd_offset(pudp, addr); | |
a3483c3d CL |
267 | if (WARN_ON(pmd_none(*pmdp))) |
268 | return; | |
37bc3e5f BS |
269 | |
270 | ptep = pte_offset_kernel(pmdp, addr); | |
a3483c3d CL |
271 | if (WARN_ON(pte_none(*ptep))) |
272 | return; | |
37bc3e5f | 273 | |
37bc3e5f BS |
274 | /* |
275 | * In hash, pte_clear flushes the tlb, in radix, we have to | |
276 | */ | |
277 | pte_clear(&init_mm, addr, ptep); | |
278 | flush_tlb_kernel_range(addr, addr + PAGE_SIZE); | |
aaddd3ea ME |
279 | } |
280 | ||
c28c15b6 CR |
281 | static int __do_patch_instruction_mm(u32 *addr, ppc_inst_t instr) |
282 | { | |
283 | int err; | |
284 | u32 *patch_addr; | |
285 | unsigned long text_poke_addr; | |
286 | pte_t *pte; | |
287 | unsigned long pfn = get_patch_pfn(addr); | |
288 | struct mm_struct *patching_mm; | |
289 | struct mm_struct *orig_mm; | |
980411a4 | 290 | spinlock_t *ptl; |
c28c15b6 | 291 | |
2f228ee1 | 292 | patching_mm = __this_cpu_read(cpu_patching_context.mm); |
2f228ee1 | 293 | text_poke_addr = __this_cpu_read(cpu_patching_context.addr); |
c28c15b6 CR |
294 | patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr)); |
295 | ||
980411a4 ME |
296 | pte = get_locked_pte(patching_mm, text_poke_addr, &ptl); |
297 | if (!pte) | |
298 | return -ENOMEM; | |
299 | ||
c28c15b6 CR |
300 | __set_pte_at(patching_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0); |
301 | ||
302 | /* order PTE update before use, also serves as the hwsync */ | |
303 | asm volatile("ptesync": : :"memory"); | |
304 | ||
305 | /* order context switch after arbitrary prior code */ | |
306 | isync(); | |
307 | ||
308 | orig_mm = start_using_temp_mm(patching_mm); | |
309 | ||
310 | err = __patch_instruction(addr, instr, patch_addr); | |
311 | ||
312 | /* hwsync performed by __patch_instruction (sync) if successful */ | |
313 | if (err) | |
314 | mb(); /* sync */ | |
315 | ||
316 | /* context synchronisation performed by __patch_instruction (isync or exception) */ | |
317 | stop_using_temp_mm(patching_mm, orig_mm); | |
318 | ||
319 | pte_clear(patching_mm, text_poke_addr, pte); | |
320 | /* | |
321 | * ptesync to order PTE update before TLB invalidation done | |
322 | * by radix__local_flush_tlb_page_psize (in _tlbiel_va) | |
323 | */ | |
324 | local_flush_tlb_page_psize(patching_mm, text_poke_addr, mmu_virtual_psize); | |
325 | ||
980411a4 ME |
326 | pte_unmap_unlock(pte, ptl); |
327 | ||
c28c15b6 CR |
328 | return err; |
329 | } | |
330 | ||
6b21af74 CL |
331 | static int __do_patch_instruction(u32 *addr, ppc_inst_t instr) |
332 | { | |
333 | int err; | |
334 | u32 *patch_addr; | |
335 | unsigned long text_poke_addr; | |
8b4bb0ad CL |
336 | pte_t *pte; |
337 | unsigned long pfn = get_patch_pfn(addr); | |
6b21af74 | 338 | |
2f228ee1 | 339 | text_poke_addr = (unsigned long)__this_cpu_read(cpu_patching_context.addr) & PAGE_MASK; |
6b21af74 CL |
340 | patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr)); |
341 | ||
2f228ee1 | 342 | pte = __this_cpu_read(cpu_patching_context.pte); |
8b4bb0ad CL |
343 | __set_pte_at(&init_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0); |
344 | /* See ptesync comment in radix__set_pte_at() */ | |
345 | if (radix_enabled()) | |
346 | asm volatile("ptesync": : :"memory"); | |
6b21af74 CL |
347 | |
348 | err = __patch_instruction(addr, instr, patch_addr); | |
349 | ||
8b4bb0ad CL |
350 | pte_clear(&init_mm, text_poke_addr, pte); |
351 | flush_tlb_kernel_range(text_poke_addr, text_poke_addr + PAGE_SIZE); | |
6b21af74 CL |
352 | |
353 | return err; | |
354 | } | |
355 | ||
6f3a81b6 | 356 | int patch_instruction(u32 *addr, ppc_inst_t instr) |
37bc3e5f BS |
357 | { |
358 | int err; | |
37bc3e5f | 359 | unsigned long flags; |
37bc3e5f BS |
360 | |
361 | /* | |
362 | * During early early boot patch_instruction is called | |
363 | * when text_poke_area is not ready, but we still need | |
364 | * to allow patching. We just do the plain old patching | |
37bc3e5f | 365 | */ |
84ecfe6f CL |
366 | if (!IS_ENABLED(CONFIG_STRICT_KERNEL_RWX) || |
367 | !static_branch_likely(&poking_init_done)) | |
8cf4c057 | 368 | return raw_patch_instruction(addr, instr); |
37bc3e5f BS |
369 | |
370 | local_irq_save(flags); | |
c28c15b6 CR |
371 | if (mm_patch_enabled()) |
372 | err = __do_patch_instruction_mm(addr, instr); | |
373 | else | |
374 | err = __do_patch_instruction(addr, instr); | |
37bc3e5f BS |
375 | local_irq_restore(flags); |
376 | ||
377 | return err; | |
378 | } | |
37bc3e5f BS |
379 | NOKPROBE_SYMBOL(patch_instruction); |
380 | ||
69d4d6e5 | 381 | int patch_branch(u32 *addr, unsigned long target, int flags) |
e7a57273 | 382 | { |
c545b9f0 | 383 | ppc_inst_t instr; |
7c95d889 | 384 | |
d5937db1 CL |
385 | if (create_branch(&instr, addr, target, flags)) |
386 | return -ERANGE; | |
387 | ||
7c95d889 | 388 | return patch_instruction(addr, instr); |
e7a57273 ME |
389 | } |
390 | ||
51c9c084 A |
391 | /* |
392 | * Helper to check if a given instruction is a conditional branch | |
393 | * Derived from the conditional checks in analyse_instr() | |
394 | */ | |
c545b9f0 | 395 | bool is_conditional_branch(ppc_inst_t instr) |
51c9c084 | 396 | { |
8094892d | 397 | unsigned int opcode = ppc_inst_primary_opcode(instr); |
51c9c084 A |
398 | |
399 | if (opcode == 16) /* bc, bca, bcl, bcla */ | |
400 | return true; | |
401 | if (opcode == 19) { | |
777e26f0 | 402 | switch ((ppc_inst_val(instr) >> 1) & 0x3ff) { |
51c9c084 A |
403 | case 16: /* bclr, bclrl */ |
404 | case 528: /* bcctr, bcctrl */ | |
405 | case 560: /* bctar, bctarl */ | |
406 | return true; | |
407 | } | |
408 | } | |
409 | return false; | |
410 | } | |
71f6e58e | 411 | NOKPROBE_SYMBOL(is_conditional_branch); |
51c9c084 | 412 | |
c545b9f0 | 413 | int create_cond_branch(ppc_inst_t *instr, const u32 *addr, |
7c95d889 | 414 | unsigned long target, int flags) |
411781a2 | 415 | { |
411781a2 ME |
416 | long offset; |
417 | ||
418 | offset = target; | |
419 | if (! (flags & BRANCH_ABSOLUTE)) | |
420 | offset = offset - (unsigned long)addr; | |
421 | ||
422 | /* Check we can represent the target in the instruction format */ | |
4549c3ea | 423 | if (!is_offset_in_cond_branch_range(offset)) |
7c95d889 | 424 | return 1; |
411781a2 ME |
425 | |
426 | /* Mask out the flags and target, so they don't step on each other. */ | |
94afd069 | 427 | *instr = ppc_inst(0x40000000 | (flags & 0x3FF0003) | (offset & 0xFFFC)); |
411781a2 | 428 | |
7c95d889 | 429 | return 0; |
411781a2 ME |
430 | } |
431 | ||
c545b9f0 | 432 | int instr_is_relative_branch(ppc_inst_t instr) |
411781a2 | 433 | { |
777e26f0 | 434 | if (ppc_inst_val(instr) & BRANCH_ABSOLUTE) |
411781a2 ME |
435 | return 0; |
436 | ||
437 | return instr_is_branch_iform(instr) || instr_is_branch_bform(instr); | |
438 | } | |
439 | ||
c545b9f0 | 440 | int instr_is_relative_link_branch(ppc_inst_t instr) |
b9eab08d | 441 | { |
777e26f0 | 442 | return instr_is_relative_branch(instr) && (ppc_inst_val(instr) & BRANCH_SET_LINK); |
b9eab08d JP |
443 | } |
444 | ||
69d4d6e5 | 445 | static unsigned long branch_iform_target(const u32 *instr) |
411781a2 ME |
446 | { |
447 | signed long imm; | |
448 | ||
18c85964 | 449 | imm = ppc_inst_val(ppc_inst_read(instr)) & 0x3FFFFFC; |
411781a2 ME |
450 | |
451 | /* If the top bit of the immediate value is set this is negative */ | |
452 | if (imm & 0x2000000) | |
453 | imm -= 0x4000000; | |
454 | ||
18c85964 | 455 | if ((ppc_inst_val(ppc_inst_read(instr)) & BRANCH_ABSOLUTE) == 0) |
411781a2 ME |
456 | imm += (unsigned long)instr; |
457 | ||
458 | return (unsigned long)imm; | |
459 | } | |
460 | ||
69d4d6e5 | 461 | static unsigned long branch_bform_target(const u32 *instr) |
411781a2 ME |
462 | { |
463 | signed long imm; | |
464 | ||
18c85964 | 465 | imm = ppc_inst_val(ppc_inst_read(instr)) & 0xFFFC; |
411781a2 ME |
466 | |
467 | /* If the top bit of the immediate value is set this is negative */ | |
468 | if (imm & 0x8000) | |
469 | imm -= 0x10000; | |
470 | ||
18c85964 | 471 | if ((ppc_inst_val(ppc_inst_read(instr)) & BRANCH_ABSOLUTE) == 0) |
411781a2 ME |
472 | imm += (unsigned long)instr; |
473 | ||
474 | return (unsigned long)imm; | |
475 | } | |
476 | ||
69d4d6e5 | 477 | unsigned long branch_target(const u32 *instr) |
411781a2 | 478 | { |
f8faaffa | 479 | if (instr_is_branch_iform(ppc_inst_read(instr))) |
411781a2 | 480 | return branch_iform_target(instr); |
f8faaffa | 481 | else if (instr_is_branch_bform(ppc_inst_read(instr))) |
411781a2 ME |
482 | return branch_bform_target(instr); |
483 | ||
484 | return 0; | |
485 | } | |
486 | ||
c545b9f0 | 487 | int translate_branch(ppc_inst_t *instr, const u32 *dest, const u32 *src) |
411781a2 ME |
488 | { |
489 | unsigned long target; | |
411781a2 ME |
490 | target = branch_target(src); |
491 | ||
f8faaffa JN |
492 | if (instr_is_branch_iform(ppc_inst_read(src))) |
493 | return create_branch(instr, dest, target, | |
494 | ppc_inst_val(ppc_inst_read(src))); | |
495 | else if (instr_is_branch_bform(ppc_inst_read(src))) | |
496 | return create_cond_branch(instr, dest, target, | |
497 | ppc_inst_val(ppc_inst_read(src))); | |
411781a2 | 498 | |
7c95d889 | 499 | return 1; |
411781a2 | 500 | } |