x86/fixup_irq: Use cpu_online_mask instead of cpu_all_mask
[linux-2.6-block.git] / arch / x86 / kernel / alternative.c
CommitLineData
c767a54b
JP
1#define pr_fmt(fmt) "SMP alternatives: " fmt
2
9a0b5817 3#include <linux/module.h>
f6a57033 4#include <linux/sched.h>
2f1dafe5 5#include <linux/mutex.h>
9a0b5817 6#include <linux/list.h>
8b5a10fc 7#include <linux/stringify.h>
19d36ccd
AK
8#include <linux/kprobes.h>
9#include <linux/mm.h>
10#include <linux/vmalloc.h>
3945dab4 11#include <linux/memory.h>
3d55cc8a 12#include <linux/stop_machine.h>
5a0e3ad6 13#include <linux/slab.h>
9a0b5817
GH
14#include <asm/alternative.h>
15#include <asm/sections.h>
19d36ccd 16#include <asm/pgtable.h>
8f4e956b
AK
17#include <asm/mce.h>
18#include <asm/nmi.h>
e587cadd 19#include <asm/cacheflush.h>
78ff7fae 20#include <asm/tlbflush.h>
e587cadd 21#include <asm/io.h>
78ff7fae 22#include <asm/fixmap.h>
9a0b5817 23
ab144f5e
AK
24#define MAX_PATCH_LEN (255-1)
25
09488165
JB
26#ifdef CONFIG_HOTPLUG_CPU
27static int smp_alt_once;
9a0b5817 28
d167a518
GH
29static int __init bootonly(char *str)
30{
31 smp_alt_once = 1;
32 return 1;
33}
b7fb4af0 34__setup("smp-alt-boot", bootonly);
09488165
JB
35#else
36#define smp_alt_once 1
37#endif
38
8b5a10fc 39static int __initdata_or_module debug_alternative;
b7fb4af0 40
d167a518
GH
41static int __init debug_alt(char *str)
42{
43 debug_alternative = 1;
44 return 1;
45}
d167a518
GH
46__setup("debug-alternative", debug_alt);
47
09488165
JB
48static int noreplace_smp;
49
b7fb4af0
JF
50static int __init setup_noreplace_smp(char *str)
51{
52 noreplace_smp = 1;
53 return 1;
54}
55__setup("noreplace-smp", setup_noreplace_smp);
56
959b4fdf 57#ifdef CONFIG_PARAVIRT
8b5a10fc 58static int __initdata_or_module noreplace_paravirt = 0;
959b4fdf
JF
59
60static int __init setup_noreplace_paravirt(char *str)
61{
62 noreplace_paravirt = 1;
63 return 1;
64}
65__setup("noreplace-paravirt", setup_noreplace_paravirt);
66#endif
b7fb4af0 67
c767a54b
JP
68#define DPRINTK(fmt, ...) \
69do { \
70 if (debug_alternative) \
71 printk(KERN_DEBUG fmt, ##__VA_ARGS__); \
72} while (0)
d167a518 73
dc326fca
PA
74/*
75 * Each GENERIC_NOPX is of X bytes, and defined as an array of bytes
76 * that correspond to that nop. Getting from one nop to the next, we
77 * add to the array the offset that is equal to the sum of all sizes of
78 * nops preceding the one we are after.
79 *
80 * Note: The GENERIC_NOP5_ATOMIC is at the end, as it breaks the
81 * nice symmetry of sizes of the previous nops.
82 */
8b5a10fc 83#if defined(GENERIC_NOP1) && !defined(CONFIG_X86_64)
dc326fca
PA
84static const unsigned char intelnops[] =
85{
86 GENERIC_NOP1,
87 GENERIC_NOP2,
88 GENERIC_NOP3,
89 GENERIC_NOP4,
90 GENERIC_NOP5,
91 GENERIC_NOP6,
92 GENERIC_NOP7,
93 GENERIC_NOP8,
94 GENERIC_NOP5_ATOMIC
95};
96static const unsigned char * const intel_nops[ASM_NOP_MAX+2] =
97{
9a0b5817
GH
98 NULL,
99 intelnops,
100 intelnops + 1,
101 intelnops + 1 + 2,
102 intelnops + 1 + 2 + 3,
103 intelnops + 1 + 2 + 3 + 4,
104 intelnops + 1 + 2 + 3 + 4 + 5,
105 intelnops + 1 + 2 + 3 + 4 + 5 + 6,
106 intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
dc326fca 107 intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8,
9a0b5817 108};
d167a518
GH
109#endif
110
111#ifdef K8_NOP1
dc326fca
PA
112static const unsigned char k8nops[] =
113{
114 K8_NOP1,
115 K8_NOP2,
116 K8_NOP3,
117 K8_NOP4,
118 K8_NOP5,
119 K8_NOP6,
120 K8_NOP7,
121 K8_NOP8,
122 K8_NOP5_ATOMIC
123};
124static const unsigned char * const k8_nops[ASM_NOP_MAX+2] =
125{
9a0b5817
GH
126 NULL,
127 k8nops,
128 k8nops + 1,
129 k8nops + 1 + 2,
130 k8nops + 1 + 2 + 3,
131 k8nops + 1 + 2 + 3 + 4,
132 k8nops + 1 + 2 + 3 + 4 + 5,
133 k8nops + 1 + 2 + 3 + 4 + 5 + 6,
134 k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
dc326fca 135 k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8,
9a0b5817 136};
d167a518
GH
137#endif
138
8b5a10fc 139#if defined(K7_NOP1) && !defined(CONFIG_X86_64)
dc326fca
PA
140static const unsigned char k7nops[] =
141{
142 K7_NOP1,
143 K7_NOP2,
144 K7_NOP3,
145 K7_NOP4,
146 K7_NOP5,
147 K7_NOP6,
148 K7_NOP7,
149 K7_NOP8,
150 K7_NOP5_ATOMIC
151};
152static const unsigned char * const k7_nops[ASM_NOP_MAX+2] =
153{
9a0b5817
GH
154 NULL,
155 k7nops,
156 k7nops + 1,
157 k7nops + 1 + 2,
158 k7nops + 1 + 2 + 3,
159 k7nops + 1 + 2 + 3 + 4,
160 k7nops + 1 + 2 + 3 + 4 + 5,
161 k7nops + 1 + 2 + 3 + 4 + 5 + 6,
162 k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
dc326fca 163 k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8,
9a0b5817 164};
d167a518
GH
165#endif
166
32c464f5 167#ifdef P6_NOP1
dc326fca
PA
168static const unsigned char __initconst_or_module p6nops[] =
169{
170 P6_NOP1,
171 P6_NOP2,
172 P6_NOP3,
173 P6_NOP4,
174 P6_NOP5,
175 P6_NOP6,
176 P6_NOP7,
177 P6_NOP8,
178 P6_NOP5_ATOMIC
179};
180static const unsigned char * const p6_nops[ASM_NOP_MAX+2] =
181{
32c464f5
JB
182 NULL,
183 p6nops,
184 p6nops + 1,
185 p6nops + 1 + 2,
186 p6nops + 1 + 2 + 3,
187 p6nops + 1 + 2 + 3 + 4,
188 p6nops + 1 + 2 + 3 + 4 + 5,
189 p6nops + 1 + 2 + 3 + 4 + 5 + 6,
190 p6nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
dc326fca 191 p6nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8,
32c464f5
JB
192};
193#endif
194
dc326fca 195/* Initialize these to a safe default */
d167a518 196#ifdef CONFIG_X86_64
dc326fca
PA
197const unsigned char * const *ideal_nops = p6_nops;
198#else
199const unsigned char * const *ideal_nops = intel_nops;
200#endif
d167a518 201
dc326fca 202void __init arch_init_ideal_nops(void)
d167a518 203{
dc326fca
PA
204 switch (boot_cpu_data.x86_vendor) {
205 case X86_VENDOR_INTEL:
d8d9766c
PA
206 /*
207 * Due to a decoder implementation quirk, some
208 * specific Intel CPUs actually perform better with
209 * the "k8_nops" than with the SDM-recommended NOPs.
210 */
211 if (boot_cpu_data.x86 == 6 &&
212 boot_cpu_data.x86_model >= 0x0f &&
213 boot_cpu_data.x86_model != 0x1c &&
214 boot_cpu_data.x86_model != 0x26 &&
215 boot_cpu_data.x86_model != 0x27 &&
216 boot_cpu_data.x86_model < 0x30) {
217 ideal_nops = k8_nops;
218 } else if (boot_cpu_has(X86_FEATURE_NOPL)) {
dc326fca
PA
219 ideal_nops = p6_nops;
220 } else {
221#ifdef CONFIG_X86_64
222 ideal_nops = k8_nops;
223#else
224 ideal_nops = intel_nops;
225#endif
226 }
d6250a3f 227 break;
dc326fca
PA
228 default:
229#ifdef CONFIG_X86_64
230 ideal_nops = k8_nops;
231#else
232 if (boot_cpu_has(X86_FEATURE_K8))
233 ideal_nops = k8_nops;
234 else if (boot_cpu_has(X86_FEATURE_K7))
235 ideal_nops = k7_nops;
236 else
237 ideal_nops = intel_nops;
238#endif
239 }
9a0b5817
GH
240}
241
ab144f5e 242/* Use this to add nops to a buffer, then text_poke the whole buffer. */
8b5a10fc 243static void __init_or_module add_nops(void *insns, unsigned int len)
139ec7c4 244{
139ec7c4
RR
245 while (len > 0) {
246 unsigned int noplen = len;
247 if (noplen > ASM_NOP_MAX)
248 noplen = ASM_NOP_MAX;
dc326fca 249 memcpy(insns, ideal_nops[noplen], noplen);
139ec7c4
RR
250 insns += noplen;
251 len -= noplen;
252 }
253}
254
d167a518 255extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
5967ed87 256extern s32 __smp_locks[], __smp_locks_end[];
fa6f2cc7 257void *text_poke_early(void *addr, const void *opcode, size_t len);
d167a518 258
9a0b5817
GH
259/* Replace instructions with better alternatives for this CPU type.
260 This runs before SMP is initialized to avoid SMP problems with
0d2eb44f 261 self modifying code. This implies that asymmetric systems where
9a0b5817
GH
262 APs have less capabilities than the boot processor are not handled.
263 Tough. Make sure you disable such features by hand. */
264
8b5a10fc
JB
265void __init_or_module apply_alternatives(struct alt_instr *start,
266 struct alt_instr *end)
9a0b5817 267{
9a0b5817 268 struct alt_instr *a;
59e97e4d 269 u8 *instr, *replacement;
1b1d9258 270 u8 insnbuf[MAX_PATCH_LEN];
9a0b5817 271
77bf90ed 272 DPRINTK("%s: alt table %p -> %p\n", __func__, start, end);
50973133
FY
273 /*
274 * The scan order should be from start to end. A later scanned
275 * alternative code can overwrite a previous scanned alternative code.
276 * Some kernel functions (e.g. memcpy, memset, etc) use this order to
277 * patch code.
278 *
279 * So be careful if you want to change the scan order to any other
280 * order.
281 */
9a0b5817 282 for (a = start; a < end; a++) {
59e97e4d
AL
283 instr = (u8 *)&a->instr_offset + a->instr_offset;
284 replacement = (u8 *)&a->repl_offset + a->repl_offset;
9a0b5817 285 BUG_ON(a->replacementlen > a->instrlen);
ab144f5e 286 BUG_ON(a->instrlen > sizeof(insnbuf));
3b770a21 287 BUG_ON(a->cpuid >= NCAPINTS*32);
9a0b5817
GH
288 if (!boot_cpu_has(a->cpuid))
289 continue;
59e97e4d
AL
290
291 memcpy(insnbuf, replacement, a->replacementlen);
292
293 /* 0xe8 is a relative jump; fix the offset. */
294 if (*insnbuf == 0xe8 && a->replacementlen == 5)
295 *(s32 *)(insnbuf + 1) += replacement - instr;
296
297 add_nops(insnbuf + a->replacementlen,
298 a->instrlen - a->replacementlen);
299
e587cadd 300 text_poke_early(instr, insnbuf, a->instrlen);
9a0b5817
GH
301 }
302}
303
8ec4d41f
GH
304#ifdef CONFIG_SMP
305
5967ed87
JB
306static void alternatives_smp_lock(const s32 *start, const s32 *end,
307 u8 *text, u8 *text_end)
9a0b5817 308{
5967ed87 309 const s32 *poff;
9a0b5817 310
3945dab4 311 mutex_lock(&text_mutex);
5967ed87
JB
312 for (poff = start; poff < end; poff++) {
313 u8 *ptr = (u8 *)poff + *poff;
314
315 if (!*poff || ptr < text || ptr >= text_end)
9a0b5817 316 continue;
f88f07e0 317 /* turn DS segment override prefix into lock prefix */
d9c5841e
PA
318 if (*ptr == 0x3e)
319 text_poke(ptr, ((unsigned char []){0xf0}), 1);
9a0b5817 320 };
3945dab4 321 mutex_unlock(&text_mutex);
9a0b5817
GH
322}
323
5967ed87
JB
324static void alternatives_smp_unlock(const s32 *start, const s32 *end,
325 u8 *text, u8 *text_end)
9a0b5817 326{
5967ed87 327 const s32 *poff;
9a0b5817 328
b7fb4af0
JF
329 if (noreplace_smp)
330 return;
331
3945dab4 332 mutex_lock(&text_mutex);
5967ed87
JB
333 for (poff = start; poff < end; poff++) {
334 u8 *ptr = (u8 *)poff + *poff;
335
336 if (!*poff || ptr < text || ptr >= text_end)
9a0b5817 337 continue;
f88f07e0 338 /* turn lock prefix into DS segment override prefix */
d9c5841e
PA
339 if (*ptr == 0xf0)
340 text_poke(ptr, ((unsigned char []){0x3E}), 1);
9a0b5817 341 };
3945dab4 342 mutex_unlock(&text_mutex);
9a0b5817
GH
343}
344
345struct smp_alt_module {
346 /* what is this ??? */
347 struct module *mod;
348 char *name;
349
350 /* ptrs to lock prefixes */
5967ed87
JB
351 const s32 *locks;
352 const s32 *locks_end;
9a0b5817
GH
353
354 /* .text segment, needed to avoid patching init code ;) */
355 u8 *text;
356 u8 *text_end;
357
358 struct list_head next;
359};
360static LIST_HEAD(smp_alt_modules);
2f1dafe5 361static DEFINE_MUTEX(smp_alt);
ca74a6f8 362static int smp_mode = 1; /* protected by smp_alt */
9a0b5817 363
8b5a10fc
JB
364void __init_or_module alternatives_smp_module_add(struct module *mod,
365 char *name,
366 void *locks, void *locks_end,
367 void *text, void *text_end)
9a0b5817
GH
368{
369 struct smp_alt_module *smp;
9a0b5817 370
b7fb4af0
JF
371 if (noreplace_smp)
372 return;
373
9a0b5817
GH
374 if (smp_alt_once) {
375 if (boot_cpu_has(X86_FEATURE_UP))
376 alternatives_smp_unlock(locks, locks_end,
377 text, text_end);
378 return;
379 }
380
381 smp = kzalloc(sizeof(*smp), GFP_KERNEL);
382 if (NULL == smp)
383 return; /* we'll run the (safe but slow) SMP code then ... */
384
385 smp->mod = mod;
386 smp->name = name;
387 smp->locks = locks;
388 smp->locks_end = locks_end;
389 smp->text = text;
390 smp->text_end = text_end;
391 DPRINTK("%s: locks %p -> %p, text %p -> %p, name %s\n",
77bf90ed 392 __func__, smp->locks, smp->locks_end,
9a0b5817
GH
393 smp->text, smp->text_end, smp->name);
394
2f1dafe5 395 mutex_lock(&smp_alt);
9a0b5817
GH
396 list_add_tail(&smp->next, &smp_alt_modules);
397 if (boot_cpu_has(X86_FEATURE_UP))
398 alternatives_smp_unlock(smp->locks, smp->locks_end,
399 smp->text, smp->text_end);
2f1dafe5 400 mutex_unlock(&smp_alt);
9a0b5817
GH
401}
402
8b5a10fc 403void __init_or_module alternatives_smp_module_del(struct module *mod)
9a0b5817
GH
404{
405 struct smp_alt_module *item;
9a0b5817 406
b7fb4af0 407 if (smp_alt_once || noreplace_smp)
9a0b5817
GH
408 return;
409
2f1dafe5 410 mutex_lock(&smp_alt);
9a0b5817
GH
411 list_for_each_entry(item, &smp_alt_modules, next) {
412 if (mod != item->mod)
413 continue;
414 list_del(&item->next);
2f1dafe5 415 mutex_unlock(&smp_alt);
77bf90ed 416 DPRINTK("%s: %s\n", __func__, item->name);
9a0b5817
GH
417 kfree(item);
418 return;
419 }
2f1dafe5 420 mutex_unlock(&smp_alt);
9a0b5817
GH
421}
422
3fb82d56 423bool skip_smp_alternatives;
9a0b5817
GH
424void alternatives_smp_switch(int smp)
425{
426 struct smp_alt_module *mod;
9a0b5817 427
3047e99e
IM
428#ifdef CONFIG_LOCKDEP
429 /*
17abecfe
IM
430 * Older binutils section handling bug prevented
431 * alternatives-replacement from working reliably.
432 *
433 * If this still occurs then you should see a hang
434 * or crash shortly after this line:
3047e99e 435 */
c767a54b 436 pr_info("lockdep: fixing up alternatives\n");
3047e99e
IM
437#endif
438
3fb82d56 439 if (noreplace_smp || smp_alt_once || skip_smp_alternatives)
9a0b5817
GH
440 return;
441 BUG_ON(!smp && (num_online_cpus() > 1));
442
2f1dafe5 443 mutex_lock(&smp_alt);
ca74a6f8
AK
444
445 /*
446 * Avoid unnecessary switches because it forces JIT based VMs to
447 * throw away all cached translations, which can be quite costly.
448 */
449 if (smp == smp_mode) {
450 /* nothing */
451 } else if (smp) {
c767a54b 452 pr_info("switching to SMP code\n");
53756d37
JF
453 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
454 clear_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
9a0b5817
GH
455 list_for_each_entry(mod, &smp_alt_modules, next)
456 alternatives_smp_lock(mod->locks, mod->locks_end,
457 mod->text, mod->text_end);
458 } else {
c767a54b 459 pr_info("switching to UP code\n");
53756d37
JF
460 set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
461 set_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
9a0b5817
GH
462 list_for_each_entry(mod, &smp_alt_modules, next)
463 alternatives_smp_unlock(mod->locks, mod->locks_end,
464 mod->text, mod->text_end);
465 }
ca74a6f8 466 smp_mode = smp;
2f1dafe5 467 mutex_unlock(&smp_alt);
9a0b5817
GH
468}
469
2cfa1978
MH
470/* Return 1 if the address range is reserved for smp-alternatives */
471int alternatives_text_reserved(void *start, void *end)
472{
473 struct smp_alt_module *mod;
5967ed87 474 const s32 *poff;
076dc4a6
MH
475 u8 *text_start = start;
476 u8 *text_end = end;
2cfa1978
MH
477
478 list_for_each_entry(mod, &smp_alt_modules, next) {
076dc4a6 479 if (mod->text > text_end || mod->text_end < text_start)
2cfa1978 480 continue;
5967ed87
JB
481 for (poff = mod->locks; poff < mod->locks_end; poff++) {
482 const u8 *ptr = (const u8 *)poff + *poff;
483
484 if (text_start <= ptr && text_end > ptr)
2cfa1978 485 return 1;
5967ed87 486 }
2cfa1978
MH
487 }
488
489 return 0;
490}
8ec4d41f
GH
491#endif
492
139ec7c4 493#ifdef CONFIG_PARAVIRT
8b5a10fc
JB
494void __init_or_module apply_paravirt(struct paravirt_patch_site *start,
495 struct paravirt_patch_site *end)
139ec7c4 496{
98de032b 497 struct paravirt_patch_site *p;
ab144f5e 498 char insnbuf[MAX_PATCH_LEN];
139ec7c4 499
959b4fdf
JF
500 if (noreplace_paravirt)
501 return;
502
139ec7c4
RR
503 for (p = start; p < end; p++) {
504 unsigned int used;
505
ab144f5e 506 BUG_ON(p->len > MAX_PATCH_LEN);
d34fda4a
CW
507 /* prep the buffer with the original instructions */
508 memcpy(insnbuf, p->instr, p->len);
93b1eab3
JF
509 used = pv_init_ops.patch(p->instrtype, p->clobbers, insnbuf,
510 (unsigned long)p->instr, p->len);
7f63c41c 511
63f70270
JF
512 BUG_ON(used > p->len);
513
139ec7c4 514 /* Pad the rest with nops */
ab144f5e 515 add_nops(insnbuf + used, p->len - used);
e587cadd 516 text_poke_early(p->instr, insnbuf, p->len);
139ec7c4 517 }
139ec7c4 518}
98de032b 519extern struct paravirt_patch_site __start_parainstructions[],
139ec7c4
RR
520 __stop_parainstructions[];
521#endif /* CONFIG_PARAVIRT */
522
9a0b5817
GH
523void __init alternative_instructions(void)
524{
8f4e956b
AK
525 /* The patching is not fully atomic, so try to avoid local interruptions
526 that might execute the to be patched code.
527 Other CPUs are not running. */
528 stop_nmi();
123aa76e
AK
529
530 /*
531 * Don't stop machine check exceptions while patching.
532 * MCEs only happen when something got corrupted and in this
533 * case we must do something about the corruption.
534 * Ignoring it is worse than a unlikely patching race.
535 * Also machine checks tend to be broadcast and if one CPU
536 * goes into machine check the others follow quickly, so we don't
537 * expect a machine check to cause undue problems during to code
538 * patching.
539 */
8f4e956b 540
9a0b5817
GH
541 apply_alternatives(__alt_instructions, __alt_instructions_end);
542
543 /* switch to patch-once-at-boottime-only mode and free the
544 * tables in case we know the number of CPUs will never ever
545 * change */
546#ifdef CONFIG_HOTPLUG_CPU
547 if (num_possible_cpus() < 2)
548 smp_alt_once = 1;
9a0b5817
GH
549#endif
550
8ec4d41f 551#ifdef CONFIG_SMP
9a0b5817
GH
552 if (smp_alt_once) {
553 if (1 == num_possible_cpus()) {
c767a54b 554 pr_info("switching to UP code\n");
53756d37
JF
555 set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
556 set_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
557
9a0b5817
GH
558 alternatives_smp_unlock(__smp_locks, __smp_locks_end,
559 _text, _etext);
560 }
9a0b5817 561 } else {
9a0b5817
GH
562 alternatives_smp_module_add(NULL, "core kernel",
563 __smp_locks, __smp_locks_end,
564 _text, _etext);
ca74a6f8
AK
565
566 /* Only switch to UP mode if we don't immediately boot others */
649c6653 567 if (num_present_cpus() == 1 || setup_max_cpus <= 1)
ca74a6f8 568 alternatives_smp_switch(0);
9a0b5817 569 }
8ec4d41f 570#endif
441d40dc 571 apply_paravirt(__parainstructions, __parainstructions_end);
8f4e956b 572
f68fd5f4
FW
573 if (smp_alt_once)
574 free_init_pages("SMP alternatives",
575 (unsigned long)__smp_locks,
576 (unsigned long)__smp_locks_end);
577
8f4e956b 578 restart_nmi();
9a0b5817 579}
19d36ccd 580
e587cadd
MD
581/**
582 * text_poke_early - Update instructions on a live kernel at boot time
583 * @addr: address to modify
584 * @opcode: source of the copy
585 * @len: length to copy
586 *
19d36ccd
AK
587 * When you use this code to patch more than one byte of an instruction
588 * you need to make sure that other CPUs cannot execute this code in parallel.
e587cadd
MD
589 * Also no thread must be currently preempted in the middle of these
590 * instructions. And on the local CPU you need to be protected again NMI or MCE
591 * handlers seeing an inconsistent instruction while you patch.
19d36ccd 592 */
fa6f2cc7 593void *__init_or_module text_poke_early(void *addr, const void *opcode,
8b5a10fc 594 size_t len)
19d36ccd 595{
e587cadd
MD
596 unsigned long flags;
597 local_irq_save(flags);
19d36ccd 598 memcpy(addr, opcode, len);
e587cadd 599 sync_core();
5367b688 600 local_irq_restore(flags);
e587cadd
MD
601 /* Could also do a CLFLUSH here to speed up CPU recovery; but
602 that causes hangs on some VIA CPUs. */
603 return addr;
604}
605
606/**
607 * text_poke - Update instructions on a live kernel
608 * @addr: address to modify
609 * @opcode: source of the copy
610 * @len: length to copy
611 *
612 * Only atomic text poke/set should be allowed when not doing early patching.
613 * It means the size must be writable atomically and the address must be aligned
614 * in a way that permits an atomic write. It also makes sure we fit on a single
615 * page.
78ff7fae
MH
616 *
617 * Note: Must be called under text_mutex.
e587cadd
MD
618 */
619void *__kprobes text_poke(void *addr, const void *opcode, size_t len)
620{
78ff7fae 621 unsigned long flags;
e587cadd 622 char *vaddr;
b7b66baa
MD
623 struct page *pages[2];
624 int i;
e587cadd 625
b7b66baa
MD
626 if (!core_kernel_text((unsigned long)addr)) {
627 pages[0] = vmalloc_to_page(addr);
628 pages[1] = vmalloc_to_page(addr + PAGE_SIZE);
15a601eb 629 } else {
b7b66baa 630 pages[0] = virt_to_page(addr);
00c6b2d5 631 WARN_ON(!PageReserved(pages[0]));
b7b66baa 632 pages[1] = virt_to_page(addr + PAGE_SIZE);
e587cadd 633 }
b7b66baa 634 BUG_ON(!pages[0]);
7cf49427 635 local_irq_save(flags);
78ff7fae
MH
636 set_fixmap(FIX_TEXT_POKE0, page_to_phys(pages[0]));
637 if (pages[1])
638 set_fixmap(FIX_TEXT_POKE1, page_to_phys(pages[1]));
639 vaddr = (char *)fix_to_virt(FIX_TEXT_POKE0);
b7b66baa 640 memcpy(&vaddr[(unsigned long)addr & ~PAGE_MASK], opcode, len);
78ff7fae
MH
641 clear_fixmap(FIX_TEXT_POKE0);
642 if (pages[1])
643 clear_fixmap(FIX_TEXT_POKE1);
644 local_flush_tlb();
19d36ccd 645 sync_core();
a534b679
AK
646 /* Could also do a CLFLUSH here to speed up CPU recovery; but
647 that causes hangs on some VIA CPUs. */
b7b66baa
MD
648 for (i = 0; i < len; i++)
649 BUG_ON(((char *)addr)[i] != ((char *)opcode)[i]);
7cf49427 650 local_irq_restore(flags);
e587cadd 651 return addr;
19d36ccd 652}
3d55cc8a
MH
653
654/*
655 * Cross-modifying kernel text with stop_machine().
656 * This code originally comes from immediate value.
657 */
658static atomic_t stop_machine_first;
659static int wrote_text;
660
661struct text_poke_params {
7deb18dc
MH
662 struct text_poke_param *params;
663 int nparams;
3d55cc8a
MH
664};
665
666static int __kprobes stop_machine_text_poke(void *data)
667{
668 struct text_poke_params *tpp = data;
7deb18dc
MH
669 struct text_poke_param *p;
670 int i;
3d55cc8a 671
2f747590 672 if (atomic_xchg(&stop_machine_first, 0)) {
7deb18dc
MH
673 for (i = 0; i < tpp->nparams; i++) {
674 p = &tpp->params[i];
675 text_poke(p->addr, p->opcode, p->len);
676 }
3d55cc8a
MH
677 smp_wmb(); /* Make sure other cpus see that this has run */
678 wrote_text = 1;
679 } else {
680 while (!wrote_text)
e5a11016
MH
681 cpu_relax();
682 smp_mb(); /* Load wrote_text before following execution */
3d55cc8a
MH
683 }
684
7deb18dc
MH
685 for (i = 0; i < tpp->nparams; i++) {
686 p = &tpp->params[i];
687 flush_icache_range((unsigned long)p->addr,
688 (unsigned long)p->addr + p->len);
689 }
0e00f7ae
MD
690 /*
691 * Intel Archiecture Software Developer's Manual section 7.1.3 specifies
692 * that a core serializing instruction such as "cpuid" should be
693 * executed on _each_ core before the new instruction is made visible.
694 */
695 sync_core();
3d55cc8a
MH
696 return 0;
697}
698
699/**
700 * text_poke_smp - Update instructions on a live kernel on SMP
701 * @addr: address to modify
702 * @opcode: source of the copy
703 * @len: length to copy
704 *
705 * Modify multi-byte instruction by using stop_machine() on SMP. This allows
706 * user to poke/set multi-byte text on SMP. Only non-NMI/MCE code modifying
707 * should be allowed, since stop_machine() does _not_ protect code against
708 * NMI and MCE.
709 *
710 * Note: Must be called under get_online_cpus() and text_mutex.
711 */
712void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len)
713{
714 struct text_poke_params tpp;
7deb18dc 715 struct text_poke_param p;
3d55cc8a 716
7deb18dc
MH
717 p.addr = addr;
718 p.opcode = opcode;
719 p.len = len;
720 tpp.params = &p;
721 tpp.nparams = 1;
3d55cc8a
MH
722 atomic_set(&stop_machine_first, 1);
723 wrote_text = 0;
3caa3751 724 /* Use __stop_machine() because the caller already got online_cpus. */
404ba5d7 725 __stop_machine(stop_machine_text_poke, (void *)&tpp, cpu_online_mask);
3d55cc8a
MH
726 return addr;
727}
728
7deb18dc
MH
729/**
730 * text_poke_smp_batch - Update instructions on a live kernel on SMP
731 * @params: an array of text_poke parameters
732 * @n: the number of elements in params.
733 *
734 * Modify multi-byte instruction by using stop_machine() on SMP. Since the
735 * stop_machine() is heavy task, it is better to aggregate text_poke requests
736 * and do it once if possible.
737 *
738 * Note: Must be called under get_online_cpus() and text_mutex.
739 */
740void __kprobes text_poke_smp_batch(struct text_poke_param *params, int n)
741{
742 struct text_poke_params tpp = {.params = params, .nparams = n};
743
744 atomic_set(&stop_machine_first, 1);
745 wrote_text = 0;
78345d2e 746 __stop_machine(stop_machine_text_poke, (void *)&tpp, cpu_online_mask);
7deb18dc 747}