kprobes/x86: Use text_poke_bp() instead of text_poke_smp*()
[linux-2.6-block.git] / arch / x86 / kernel / alternative.c
CommitLineData
c767a54b
JP
1#define pr_fmt(fmt) "SMP alternatives: " fmt
2
9a0b5817 3#include <linux/module.h>
f6a57033 4#include <linux/sched.h>
2f1dafe5 5#include <linux/mutex.h>
9a0b5817 6#include <linux/list.h>
8b5a10fc 7#include <linux/stringify.h>
19d36ccd
AK
8#include <linux/kprobes.h>
9#include <linux/mm.h>
10#include <linux/vmalloc.h>
3945dab4 11#include <linux/memory.h>
3d55cc8a 12#include <linux/stop_machine.h>
5a0e3ad6 13#include <linux/slab.h>
fd4363ff 14#include <linux/kdebug.h>
9a0b5817
GH
15#include <asm/alternative.h>
16#include <asm/sections.h>
19d36ccd 17#include <asm/pgtable.h>
8f4e956b
AK
18#include <asm/mce.h>
19#include <asm/nmi.h>
e587cadd 20#include <asm/cacheflush.h>
78ff7fae 21#include <asm/tlbflush.h>
e587cadd 22#include <asm/io.h>
78ff7fae 23#include <asm/fixmap.h>
9a0b5817 24
ab144f5e
AK
25#define MAX_PATCH_LEN (255-1)
26
8b5a10fc 27static int __initdata_or_module debug_alternative;
b7fb4af0 28
d167a518
GH
29static int __init debug_alt(char *str)
30{
31 debug_alternative = 1;
32 return 1;
33}
d167a518
GH
34__setup("debug-alternative", debug_alt);
35
09488165
JB
36static int noreplace_smp;
37
b7fb4af0
JF
38static int __init setup_noreplace_smp(char *str)
39{
40 noreplace_smp = 1;
41 return 1;
42}
43__setup("noreplace-smp", setup_noreplace_smp);
44
959b4fdf 45#ifdef CONFIG_PARAVIRT
8b5a10fc 46static int __initdata_or_module noreplace_paravirt = 0;
959b4fdf
JF
47
48static int __init setup_noreplace_paravirt(char *str)
49{
50 noreplace_paravirt = 1;
51 return 1;
52}
53__setup("noreplace-paravirt", setup_noreplace_paravirt);
54#endif
b7fb4af0 55
c767a54b
JP
56#define DPRINTK(fmt, ...) \
57do { \
58 if (debug_alternative) \
59 printk(KERN_DEBUG fmt, ##__VA_ARGS__); \
60} while (0)
d167a518 61
dc326fca
PA
62/*
63 * Each GENERIC_NOPX is of X bytes, and defined as an array of bytes
64 * that correspond to that nop. Getting from one nop to the next, we
65 * add to the array the offset that is equal to the sum of all sizes of
66 * nops preceding the one we are after.
67 *
68 * Note: The GENERIC_NOP5_ATOMIC is at the end, as it breaks the
69 * nice symmetry of sizes of the previous nops.
70 */
8b5a10fc 71#if defined(GENERIC_NOP1) && !defined(CONFIG_X86_64)
dc326fca
PA
72static const unsigned char intelnops[] =
73{
74 GENERIC_NOP1,
75 GENERIC_NOP2,
76 GENERIC_NOP3,
77 GENERIC_NOP4,
78 GENERIC_NOP5,
79 GENERIC_NOP6,
80 GENERIC_NOP7,
81 GENERIC_NOP8,
82 GENERIC_NOP5_ATOMIC
83};
84static const unsigned char * const intel_nops[ASM_NOP_MAX+2] =
85{
9a0b5817
GH
86 NULL,
87 intelnops,
88 intelnops + 1,
89 intelnops + 1 + 2,
90 intelnops + 1 + 2 + 3,
91 intelnops + 1 + 2 + 3 + 4,
92 intelnops + 1 + 2 + 3 + 4 + 5,
93 intelnops + 1 + 2 + 3 + 4 + 5 + 6,
94 intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
dc326fca 95 intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8,
9a0b5817 96};
d167a518
GH
97#endif
98
99#ifdef K8_NOP1
dc326fca
PA
100static const unsigned char k8nops[] =
101{
102 K8_NOP1,
103 K8_NOP2,
104 K8_NOP3,
105 K8_NOP4,
106 K8_NOP5,
107 K8_NOP6,
108 K8_NOP7,
109 K8_NOP8,
110 K8_NOP5_ATOMIC
111};
112static const unsigned char * const k8_nops[ASM_NOP_MAX+2] =
113{
9a0b5817
GH
114 NULL,
115 k8nops,
116 k8nops + 1,
117 k8nops + 1 + 2,
118 k8nops + 1 + 2 + 3,
119 k8nops + 1 + 2 + 3 + 4,
120 k8nops + 1 + 2 + 3 + 4 + 5,
121 k8nops + 1 + 2 + 3 + 4 + 5 + 6,
122 k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
dc326fca 123 k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8,
9a0b5817 124};
d167a518
GH
125#endif
126
8b5a10fc 127#if defined(K7_NOP1) && !defined(CONFIG_X86_64)
dc326fca
PA
128static const unsigned char k7nops[] =
129{
130 K7_NOP1,
131 K7_NOP2,
132 K7_NOP3,
133 K7_NOP4,
134 K7_NOP5,
135 K7_NOP6,
136 K7_NOP7,
137 K7_NOP8,
138 K7_NOP5_ATOMIC
139};
140static const unsigned char * const k7_nops[ASM_NOP_MAX+2] =
141{
9a0b5817
GH
142 NULL,
143 k7nops,
144 k7nops + 1,
145 k7nops + 1 + 2,
146 k7nops + 1 + 2 + 3,
147 k7nops + 1 + 2 + 3 + 4,
148 k7nops + 1 + 2 + 3 + 4 + 5,
149 k7nops + 1 + 2 + 3 + 4 + 5 + 6,
150 k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
dc326fca 151 k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8,
9a0b5817 152};
d167a518
GH
153#endif
154
32c464f5 155#ifdef P6_NOP1
cb09cad4 156static const unsigned char p6nops[] =
dc326fca
PA
157{
158 P6_NOP1,
159 P6_NOP2,
160 P6_NOP3,
161 P6_NOP4,
162 P6_NOP5,
163 P6_NOP6,
164 P6_NOP7,
165 P6_NOP8,
166 P6_NOP5_ATOMIC
167};
168static const unsigned char * const p6_nops[ASM_NOP_MAX+2] =
169{
32c464f5
JB
170 NULL,
171 p6nops,
172 p6nops + 1,
173 p6nops + 1 + 2,
174 p6nops + 1 + 2 + 3,
175 p6nops + 1 + 2 + 3 + 4,
176 p6nops + 1 + 2 + 3 + 4 + 5,
177 p6nops + 1 + 2 + 3 + 4 + 5 + 6,
178 p6nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
dc326fca 179 p6nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8,
32c464f5
JB
180};
181#endif
182
dc326fca 183/* Initialize these to a safe default */
d167a518 184#ifdef CONFIG_X86_64
dc326fca
PA
185const unsigned char * const *ideal_nops = p6_nops;
186#else
187const unsigned char * const *ideal_nops = intel_nops;
188#endif
d167a518 189
dc326fca 190void __init arch_init_ideal_nops(void)
d167a518 191{
dc326fca
PA
192 switch (boot_cpu_data.x86_vendor) {
193 case X86_VENDOR_INTEL:
d8d9766c
PA
194 /*
195 * Due to a decoder implementation quirk, some
196 * specific Intel CPUs actually perform better with
197 * the "k8_nops" than with the SDM-recommended NOPs.
198 */
199 if (boot_cpu_data.x86 == 6 &&
200 boot_cpu_data.x86_model >= 0x0f &&
201 boot_cpu_data.x86_model != 0x1c &&
202 boot_cpu_data.x86_model != 0x26 &&
203 boot_cpu_data.x86_model != 0x27 &&
204 boot_cpu_data.x86_model < 0x30) {
205 ideal_nops = k8_nops;
206 } else if (boot_cpu_has(X86_FEATURE_NOPL)) {
dc326fca
PA
207 ideal_nops = p6_nops;
208 } else {
209#ifdef CONFIG_X86_64
210 ideal_nops = k8_nops;
211#else
212 ideal_nops = intel_nops;
213#endif
214 }
d6250a3f 215 break;
dc326fca
PA
216 default:
217#ifdef CONFIG_X86_64
218 ideal_nops = k8_nops;
219#else
220 if (boot_cpu_has(X86_FEATURE_K8))
221 ideal_nops = k8_nops;
222 else if (boot_cpu_has(X86_FEATURE_K7))
223 ideal_nops = k7_nops;
224 else
225 ideal_nops = intel_nops;
226#endif
227 }
9a0b5817
GH
228}
229
ab144f5e 230/* Use this to add nops to a buffer, then text_poke the whole buffer. */
8b5a10fc 231static void __init_or_module add_nops(void *insns, unsigned int len)
139ec7c4 232{
139ec7c4
RR
233 while (len > 0) {
234 unsigned int noplen = len;
235 if (noplen > ASM_NOP_MAX)
236 noplen = ASM_NOP_MAX;
dc326fca 237 memcpy(insns, ideal_nops[noplen], noplen);
139ec7c4
RR
238 insns += noplen;
239 len -= noplen;
240 }
241}
242
d167a518 243extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
5967ed87 244extern s32 __smp_locks[], __smp_locks_end[];
fa6f2cc7 245void *text_poke_early(void *addr, const void *opcode, size_t len);
d167a518 246
9a0b5817
GH
247/* Replace instructions with better alternatives for this CPU type.
248 This runs before SMP is initialized to avoid SMP problems with
0d2eb44f 249 self modifying code. This implies that asymmetric systems where
9a0b5817
GH
250 APs have less capabilities than the boot processor are not handled.
251 Tough. Make sure you disable such features by hand. */
252
8b5a10fc
JB
253void __init_or_module apply_alternatives(struct alt_instr *start,
254 struct alt_instr *end)
9a0b5817 255{
9a0b5817 256 struct alt_instr *a;
59e97e4d 257 u8 *instr, *replacement;
1b1d9258 258 u8 insnbuf[MAX_PATCH_LEN];
9a0b5817 259
77bf90ed 260 DPRINTK("%s: alt table %p -> %p\n", __func__, start, end);
50973133
FY
261 /*
262 * The scan order should be from start to end. A later scanned
263 * alternative code can overwrite a previous scanned alternative code.
264 * Some kernel functions (e.g. memcpy, memset, etc) use this order to
265 * patch code.
266 *
267 * So be careful if you want to change the scan order to any other
268 * order.
269 */
9a0b5817 270 for (a = start; a < end; a++) {
59e97e4d
AL
271 instr = (u8 *)&a->instr_offset + a->instr_offset;
272 replacement = (u8 *)&a->repl_offset + a->repl_offset;
9a0b5817 273 BUG_ON(a->replacementlen > a->instrlen);
ab144f5e 274 BUG_ON(a->instrlen > sizeof(insnbuf));
65fc985b 275 BUG_ON(a->cpuid >= (NCAPINTS + NBUGINTS) * 32);
9a0b5817
GH
276 if (!boot_cpu_has(a->cpuid))
277 continue;
59e97e4d
AL
278
279 memcpy(insnbuf, replacement, a->replacementlen);
280
281 /* 0xe8 is a relative jump; fix the offset. */
282 if (*insnbuf == 0xe8 && a->replacementlen == 5)
283 *(s32 *)(insnbuf + 1) += replacement - instr;
284
285 add_nops(insnbuf + a->replacementlen,
286 a->instrlen - a->replacementlen);
287
e587cadd 288 text_poke_early(instr, insnbuf, a->instrlen);
9a0b5817
GH
289 }
290}
291
8ec4d41f
GH
292#ifdef CONFIG_SMP
293
5967ed87
JB
294static void alternatives_smp_lock(const s32 *start, const s32 *end,
295 u8 *text, u8 *text_end)
9a0b5817 296{
5967ed87 297 const s32 *poff;
9a0b5817 298
3945dab4 299 mutex_lock(&text_mutex);
5967ed87
JB
300 for (poff = start; poff < end; poff++) {
301 u8 *ptr = (u8 *)poff + *poff;
302
303 if (!*poff || ptr < text || ptr >= text_end)
9a0b5817 304 continue;
f88f07e0 305 /* turn DS segment override prefix into lock prefix */
d9c5841e
PA
306 if (*ptr == 0x3e)
307 text_poke(ptr, ((unsigned char []){0xf0}), 1);
4b8073e4 308 }
3945dab4 309 mutex_unlock(&text_mutex);
9a0b5817
GH
310}
311
5967ed87
JB
312static void alternatives_smp_unlock(const s32 *start, const s32 *end,
313 u8 *text, u8 *text_end)
9a0b5817 314{
5967ed87 315 const s32 *poff;
9a0b5817 316
3945dab4 317 mutex_lock(&text_mutex);
5967ed87
JB
318 for (poff = start; poff < end; poff++) {
319 u8 *ptr = (u8 *)poff + *poff;
320
321 if (!*poff || ptr < text || ptr >= text_end)
9a0b5817 322 continue;
f88f07e0 323 /* turn lock prefix into DS segment override prefix */
d9c5841e
PA
324 if (*ptr == 0xf0)
325 text_poke(ptr, ((unsigned char []){0x3E}), 1);
4b8073e4 326 }
3945dab4 327 mutex_unlock(&text_mutex);
9a0b5817
GH
328}
329
330struct smp_alt_module {
331 /* what is this ??? */
332 struct module *mod;
333 char *name;
334
335 /* ptrs to lock prefixes */
5967ed87
JB
336 const s32 *locks;
337 const s32 *locks_end;
9a0b5817
GH
338
339 /* .text segment, needed to avoid patching init code ;) */
340 u8 *text;
341 u8 *text_end;
342
343 struct list_head next;
344};
345static LIST_HEAD(smp_alt_modules);
2f1dafe5 346static DEFINE_MUTEX(smp_alt);
816afe4f 347static bool uniproc_patched = false; /* protected by smp_alt */
9a0b5817 348
8b5a10fc
JB
349void __init_or_module alternatives_smp_module_add(struct module *mod,
350 char *name,
351 void *locks, void *locks_end,
352 void *text, void *text_end)
9a0b5817
GH
353{
354 struct smp_alt_module *smp;
9a0b5817 355
816afe4f
RR
356 mutex_lock(&smp_alt);
357 if (!uniproc_patched)
358 goto unlock;
b7fb4af0 359
816afe4f
RR
360 if (num_possible_cpus() == 1)
361 /* Don't bother remembering, we'll never have to undo it. */
362 goto smp_unlock;
9a0b5817
GH
363
364 smp = kzalloc(sizeof(*smp), GFP_KERNEL);
365 if (NULL == smp)
816afe4f
RR
366 /* we'll run the (safe but slow) SMP code then ... */
367 goto unlock;
9a0b5817
GH
368
369 smp->mod = mod;
370 smp->name = name;
371 smp->locks = locks;
372 smp->locks_end = locks_end;
373 smp->text = text;
374 smp->text_end = text_end;
375 DPRINTK("%s: locks %p -> %p, text %p -> %p, name %s\n",
77bf90ed 376 __func__, smp->locks, smp->locks_end,
9a0b5817
GH
377 smp->text, smp->text_end, smp->name);
378
9a0b5817 379 list_add_tail(&smp->next, &smp_alt_modules);
816afe4f
RR
380smp_unlock:
381 alternatives_smp_unlock(locks, locks_end, text, text_end);
382unlock:
2f1dafe5 383 mutex_unlock(&smp_alt);
9a0b5817
GH
384}
385
8b5a10fc 386void __init_or_module alternatives_smp_module_del(struct module *mod)
9a0b5817
GH
387{
388 struct smp_alt_module *item;
9a0b5817 389
2f1dafe5 390 mutex_lock(&smp_alt);
9a0b5817
GH
391 list_for_each_entry(item, &smp_alt_modules, next) {
392 if (mod != item->mod)
393 continue;
394 list_del(&item->next);
9a0b5817 395 kfree(item);
816afe4f 396 break;
9a0b5817 397 }
2f1dafe5 398 mutex_unlock(&smp_alt);
9a0b5817
GH
399}
400
816afe4f 401void alternatives_enable_smp(void)
9a0b5817
GH
402{
403 struct smp_alt_module *mod;
9a0b5817 404
3047e99e
IM
405#ifdef CONFIG_LOCKDEP
406 /*
17abecfe
IM
407 * Older binutils section handling bug prevented
408 * alternatives-replacement from working reliably.
409 *
410 * If this still occurs then you should see a hang
411 * or crash shortly after this line:
3047e99e 412 */
c767a54b 413 pr_info("lockdep: fixing up alternatives\n");
3047e99e
IM
414#endif
415
816afe4f
RR
416 /* Why bother if there are no other CPUs? */
417 BUG_ON(num_possible_cpus() == 1);
9a0b5817 418
2f1dafe5 419 mutex_lock(&smp_alt);
ca74a6f8 420
816afe4f 421 if (uniproc_patched) {
c767a54b 422 pr_info("switching to SMP code\n");
816afe4f 423 BUG_ON(num_online_cpus() != 1);
53756d37
JF
424 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
425 clear_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
9a0b5817
GH
426 list_for_each_entry(mod, &smp_alt_modules, next)
427 alternatives_smp_lock(mod->locks, mod->locks_end,
428 mod->text, mod->text_end);
816afe4f 429 uniproc_patched = false;
9a0b5817 430 }
2f1dafe5 431 mutex_unlock(&smp_alt);
9a0b5817
GH
432}
433
2cfa1978
MH
434/* Return 1 if the address range is reserved for smp-alternatives */
435int alternatives_text_reserved(void *start, void *end)
436{
437 struct smp_alt_module *mod;
5967ed87 438 const s32 *poff;
076dc4a6
MH
439 u8 *text_start = start;
440 u8 *text_end = end;
2cfa1978
MH
441
442 list_for_each_entry(mod, &smp_alt_modules, next) {
076dc4a6 443 if (mod->text > text_end || mod->text_end < text_start)
2cfa1978 444 continue;
5967ed87
JB
445 for (poff = mod->locks; poff < mod->locks_end; poff++) {
446 const u8 *ptr = (const u8 *)poff + *poff;
447
448 if (text_start <= ptr && text_end > ptr)
2cfa1978 449 return 1;
5967ed87 450 }
2cfa1978
MH
451 }
452
453 return 0;
454}
8ec4d41f
GH
455#endif
456
139ec7c4 457#ifdef CONFIG_PARAVIRT
8b5a10fc
JB
458void __init_or_module apply_paravirt(struct paravirt_patch_site *start,
459 struct paravirt_patch_site *end)
139ec7c4 460{
98de032b 461 struct paravirt_patch_site *p;
ab144f5e 462 char insnbuf[MAX_PATCH_LEN];
139ec7c4 463
959b4fdf
JF
464 if (noreplace_paravirt)
465 return;
466
139ec7c4
RR
467 for (p = start; p < end; p++) {
468 unsigned int used;
469
ab144f5e 470 BUG_ON(p->len > MAX_PATCH_LEN);
d34fda4a
CW
471 /* prep the buffer with the original instructions */
472 memcpy(insnbuf, p->instr, p->len);
93b1eab3
JF
473 used = pv_init_ops.patch(p->instrtype, p->clobbers, insnbuf,
474 (unsigned long)p->instr, p->len);
7f63c41c 475
63f70270
JF
476 BUG_ON(used > p->len);
477
139ec7c4 478 /* Pad the rest with nops */
ab144f5e 479 add_nops(insnbuf + used, p->len - used);
e587cadd 480 text_poke_early(p->instr, insnbuf, p->len);
139ec7c4 481 }
139ec7c4 482}
98de032b 483extern struct paravirt_patch_site __start_parainstructions[],
139ec7c4
RR
484 __stop_parainstructions[];
485#endif /* CONFIG_PARAVIRT */
486
9a0b5817
GH
487void __init alternative_instructions(void)
488{
8f4e956b
AK
489 /* The patching is not fully atomic, so try to avoid local interruptions
490 that might execute the to be patched code.
491 Other CPUs are not running. */
492 stop_nmi();
123aa76e
AK
493
494 /*
495 * Don't stop machine check exceptions while patching.
496 * MCEs only happen when something got corrupted and in this
497 * case we must do something about the corruption.
498 * Ignoring it is worse than a unlikely patching race.
499 * Also machine checks tend to be broadcast and if one CPU
500 * goes into machine check the others follow quickly, so we don't
501 * expect a machine check to cause undue problems during to code
502 * patching.
503 */
8f4e956b 504
9a0b5817
GH
505 apply_alternatives(__alt_instructions, __alt_instructions_end);
506
8ec4d41f 507#ifdef CONFIG_SMP
816afe4f
RR
508 /* Patch to UP if other cpus not imminent. */
509 if (!noreplace_smp && (num_present_cpus() == 1 || setup_max_cpus <= 1)) {
510 uniproc_patched = true;
9a0b5817
GH
511 alternatives_smp_module_add(NULL, "core kernel",
512 __smp_locks, __smp_locks_end,
513 _text, _etext);
9a0b5817 514 }
8f4e956b 515
816afe4f 516 if (!uniproc_patched || num_possible_cpus() == 1)
f68fd5f4
FW
517 free_init_pages("SMP alternatives",
518 (unsigned long)__smp_locks,
519 (unsigned long)__smp_locks_end);
816afe4f
RR
520#endif
521
522 apply_paravirt(__parainstructions, __parainstructions_end);
f68fd5f4 523
8f4e956b 524 restart_nmi();
9a0b5817 525}
19d36ccd 526
e587cadd
MD
527/**
528 * text_poke_early - Update instructions on a live kernel at boot time
529 * @addr: address to modify
530 * @opcode: source of the copy
531 * @len: length to copy
532 *
19d36ccd
AK
533 * When you use this code to patch more than one byte of an instruction
534 * you need to make sure that other CPUs cannot execute this code in parallel.
e587cadd
MD
535 * Also no thread must be currently preempted in the middle of these
536 * instructions. And on the local CPU you need to be protected again NMI or MCE
537 * handlers seeing an inconsistent instruction while you patch.
19d36ccd 538 */
fa6f2cc7 539void *__init_or_module text_poke_early(void *addr, const void *opcode,
8b5a10fc 540 size_t len)
19d36ccd 541{
e587cadd
MD
542 unsigned long flags;
543 local_irq_save(flags);
19d36ccd 544 memcpy(addr, opcode, len);
e587cadd 545 sync_core();
5367b688 546 local_irq_restore(flags);
e587cadd
MD
547 /* Could also do a CLFLUSH here to speed up CPU recovery; but
548 that causes hangs on some VIA CPUs. */
549 return addr;
550}
551
552/**
553 * text_poke - Update instructions on a live kernel
554 * @addr: address to modify
555 * @opcode: source of the copy
556 * @len: length to copy
557 *
558 * Only atomic text poke/set should be allowed when not doing early patching.
559 * It means the size must be writable atomically and the address must be aligned
560 * in a way that permits an atomic write. It also makes sure we fit on a single
561 * page.
78ff7fae
MH
562 *
563 * Note: Must be called under text_mutex.
e587cadd
MD
564 */
565void *__kprobes text_poke(void *addr, const void *opcode, size_t len)
566{
78ff7fae 567 unsigned long flags;
e587cadd 568 char *vaddr;
b7b66baa
MD
569 struct page *pages[2];
570 int i;
e587cadd 571
b7b66baa
MD
572 if (!core_kernel_text((unsigned long)addr)) {
573 pages[0] = vmalloc_to_page(addr);
574 pages[1] = vmalloc_to_page(addr + PAGE_SIZE);
15a601eb 575 } else {
b7b66baa 576 pages[0] = virt_to_page(addr);
00c6b2d5 577 WARN_ON(!PageReserved(pages[0]));
b7b66baa 578 pages[1] = virt_to_page(addr + PAGE_SIZE);
e587cadd 579 }
b7b66baa 580 BUG_ON(!pages[0]);
7cf49427 581 local_irq_save(flags);
78ff7fae
MH
582 set_fixmap(FIX_TEXT_POKE0, page_to_phys(pages[0]));
583 if (pages[1])
584 set_fixmap(FIX_TEXT_POKE1, page_to_phys(pages[1]));
585 vaddr = (char *)fix_to_virt(FIX_TEXT_POKE0);
b7b66baa 586 memcpy(&vaddr[(unsigned long)addr & ~PAGE_MASK], opcode, len);
78ff7fae
MH
587 clear_fixmap(FIX_TEXT_POKE0);
588 if (pages[1])
589 clear_fixmap(FIX_TEXT_POKE1);
590 local_flush_tlb();
19d36ccd 591 sync_core();
a534b679
AK
592 /* Could also do a CLFLUSH here to speed up CPU recovery; but
593 that causes hangs on some VIA CPUs. */
b7b66baa
MD
594 for (i = 0; i < len; i++)
595 BUG_ON(((char *)addr)[i] != ((char *)opcode)[i]);
7cf49427 596 local_irq_restore(flags);
e587cadd 597 return addr;
19d36ccd 598}
3d55cc8a 599
fd4363ff
JK
600static void do_sync_core(void *info)
601{
602 sync_core();
603}
604
605static bool bp_patching_in_progress;
606static void *bp_int3_handler, *bp_int3_addr;
607
608static int int3_notify(struct notifier_block *self, unsigned long val, void *data)
609{
610 struct die_args *args = data;
611
612 /* bp_patching_in_progress */
613 smp_rmb();
614
615 if (likely(!bp_patching_in_progress))
616 return NOTIFY_DONE;
617
618 /* we are not interested in non-int3 faults and ring > 0 faults */
619 if (val != DIE_INT3 || !args->regs || user_mode_vm(args->regs)
620 || args->regs->ip != (unsigned long)bp_int3_addr)
621 return NOTIFY_DONE;
622
623 /* set up the specified breakpoint handler */
624 args->regs->ip = (unsigned long) bp_int3_handler;
625
626 return NOTIFY_STOP;
627}
628/**
629 * text_poke_bp() -- update instructions on live kernel on SMP
630 * @addr: address to patch
631 * @opcode: opcode of new instruction
632 * @len: length to copy
633 * @handler: address to jump to when the temporary breakpoint is hit
634 *
635 * Modify multi-byte instruction by using int3 breakpoint on SMP.
636 * In contrary to text_poke_smp(), we completely avoid stop_machine() here,
637 * and achieve the synchronization using int3 breakpoint.
638 *
639 * The way it is done:
640 * - add a int3 trap to the address that will be patched
641 * - sync cores
642 * - update all but the first byte of the patched range
643 * - sync cores
644 * - replace the first byte (int3) by the first byte of
645 * replacing opcode
646 * - sync cores
647 *
648 * Note: must be called under text_mutex.
649 */
650void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler)
651{
652 unsigned char int3 = 0xcc;
653
654 bp_int3_handler = handler;
655 bp_int3_addr = (u8 *)addr + sizeof(int3);
656 bp_patching_in_progress = true;
657 /*
658 * Corresponding read barrier in int3 notifier for
659 * making sure the in_progress flags is correctly ordered wrt.
660 * patching
661 */
662 smp_wmb();
663
664 text_poke(addr, &int3, sizeof(int3));
665
666 on_each_cpu(do_sync_core, NULL, 1);
667
668 if (len - sizeof(int3) > 0) {
669 /* patch all but the first byte */
670 text_poke((char *)addr + sizeof(int3),
671 (const char *) opcode + sizeof(int3),
672 len - sizeof(int3));
673 /*
674 * According to Intel, this core syncing is very likely
675 * not necessary and we'd be safe even without it. But
676 * better safe than sorry (plus there's not only Intel).
677 */
678 on_each_cpu(do_sync_core, NULL, 1);
679 }
680
681 /* patch the first byte */
682 text_poke(addr, opcode, sizeof(int3));
683
684 on_each_cpu(do_sync_core, NULL, 1);
685
686 bp_patching_in_progress = false;
687 smp_wmb();
688
689 return addr;
690}
691
692/* this one needs to run before anything else handles it as a
693 * regular exception */
694static struct notifier_block int3_nb = {
695 .priority = 0x7fffffff,
696 .notifier_call = int3_notify
697};
698
699static int __init int3_init(void)
700{
701 return register_die_notifier(&int3_nb);
702}
703
704arch_initcall(int3_init);
3d55cc8a
MH
705/*
706 * Cross-modifying kernel text with stop_machine().
707 * This code originally comes from immediate value.
708 */
709static atomic_t stop_machine_first;
710static int wrote_text;
711
712struct text_poke_params {
7deb18dc
MH
713 struct text_poke_param *params;
714 int nparams;
3d55cc8a
MH
715};
716
717static int __kprobes stop_machine_text_poke(void *data)
718{
719 struct text_poke_params *tpp = data;
7deb18dc
MH
720 struct text_poke_param *p;
721 int i;
3d55cc8a 722
2f747590 723 if (atomic_xchg(&stop_machine_first, 0)) {
7deb18dc
MH
724 for (i = 0; i < tpp->nparams; i++) {
725 p = &tpp->params[i];
726 text_poke(p->addr, p->opcode, p->len);
727 }
3d55cc8a
MH
728 smp_wmb(); /* Make sure other cpus see that this has run */
729 wrote_text = 1;
730 } else {
731 while (!wrote_text)
e5a11016
MH
732 cpu_relax();
733 smp_mb(); /* Load wrote_text before following execution */
3d55cc8a
MH
734 }
735
7deb18dc
MH
736 for (i = 0; i < tpp->nparams; i++) {
737 p = &tpp->params[i];
738 flush_icache_range((unsigned long)p->addr,
739 (unsigned long)p->addr + p->len);
740 }
0e00f7ae
MD
741 /*
742 * Intel Archiecture Software Developer's Manual section 7.1.3 specifies
743 * that a core serializing instruction such as "cpuid" should be
744 * executed on _each_ core before the new instruction is made visible.
745 */
746 sync_core();
3d55cc8a
MH
747 return 0;
748}
749
750/**
751 * text_poke_smp - Update instructions on a live kernel on SMP
752 * @addr: address to modify
753 * @opcode: source of the copy
754 * @len: length to copy
755 *
756 * Modify multi-byte instruction by using stop_machine() on SMP. This allows
757 * user to poke/set multi-byte text on SMP. Only non-NMI/MCE code modifying
758 * should be allowed, since stop_machine() does _not_ protect code against
759 * NMI and MCE.
760 *
761 * Note: Must be called under get_online_cpus() and text_mutex.
762 */
763void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len)
764{
765 struct text_poke_params tpp;
7deb18dc 766 struct text_poke_param p;
3d55cc8a 767
7deb18dc
MH
768 p.addr = addr;
769 p.opcode = opcode;
770 p.len = len;
771 tpp.params = &p;
772 tpp.nparams = 1;
3d55cc8a
MH
773 atomic_set(&stop_machine_first, 1);
774 wrote_text = 0;
3caa3751 775 /* Use __stop_machine() because the caller already got online_cpus. */
404ba5d7 776 __stop_machine(stop_machine_text_poke, (void *)&tpp, cpu_online_mask);
3d55cc8a
MH
777 return addr;
778}
779
7deb18dc
MH
780/**
781 * text_poke_smp_batch - Update instructions on a live kernel on SMP
782 * @params: an array of text_poke parameters
783 * @n: the number of elements in params.
784 *
785 * Modify multi-byte instruction by using stop_machine() on SMP. Since the
786 * stop_machine() is heavy task, it is better to aggregate text_poke requests
787 * and do it once if possible.
788 *
789 * Note: Must be called under get_online_cpus() and text_mutex.
790 */
791void __kprobes text_poke_smp_batch(struct text_poke_param *params, int n)
792{
793 struct text_poke_params tpp = {.params = params, .nparams = n};
794
795 atomic_set(&stop_machine_first, 1);
796 wrote_text = 0;
78345d2e 797 __stop_machine(stop_machine_text_poke, (void *)&tpp, cpu_online_mask);
7deb18dc 798}