x86, msr: change msr-reg.o to obj-y, and export its symbols
[linux-2.6-block.git] / arch / x86 / kernel / alternative.c
CommitLineData
9a0b5817 1#include <linux/module.h>
f6a57033 2#include <linux/sched.h>
2f1dafe5 3#include <linux/mutex.h>
9a0b5817 4#include <linux/list.h>
19d36ccd
AK
5#include <linux/kprobes.h>
6#include <linux/mm.h>
7#include <linux/vmalloc.h>
3945dab4 8#include <linux/memory.h>
9a0b5817
GH
9#include <asm/alternative.h>
10#include <asm/sections.h>
19d36ccd 11#include <asm/pgtable.h>
8f4e956b
AK
12#include <asm/mce.h>
13#include <asm/nmi.h>
b097976e 14#include <asm/vsyscall.h>
e587cadd 15#include <asm/cacheflush.h>
78ff7fae 16#include <asm/tlbflush.h>
e587cadd 17#include <asm/io.h>
78ff7fae 18#include <asm/fixmap.h>
9a0b5817 19
ab144f5e
AK
20#define MAX_PATCH_LEN (255-1)
21
09488165
JB
22#ifdef CONFIG_HOTPLUG_CPU
23static int smp_alt_once;
9a0b5817 24
d167a518
GH
25static int __init bootonly(char *str)
26{
27 smp_alt_once = 1;
28 return 1;
29}
b7fb4af0 30__setup("smp-alt-boot", bootonly);
09488165
JB
31#else
32#define smp_alt_once 1
33#endif
34
35static int debug_alternative;
b7fb4af0 36
d167a518
GH
37static int __init debug_alt(char *str)
38{
39 debug_alternative = 1;
40 return 1;
41}
d167a518
GH
42__setup("debug-alternative", debug_alt);
43
09488165
JB
44static int noreplace_smp;
45
b7fb4af0
JF
46static int __init setup_noreplace_smp(char *str)
47{
48 noreplace_smp = 1;
49 return 1;
50}
51__setup("noreplace-smp", setup_noreplace_smp);
52
959b4fdf
JF
53#ifdef CONFIG_PARAVIRT
54static int noreplace_paravirt = 0;
55
56static int __init setup_noreplace_paravirt(char *str)
57{
58 noreplace_paravirt = 1;
59 return 1;
60}
61__setup("noreplace-paravirt", setup_noreplace_paravirt);
62#endif
b7fb4af0 63
d167a518
GH
64#define DPRINTK(fmt, args...) if (debug_alternative) \
65 printk(KERN_DEBUG fmt, args)
66
67#ifdef GENERIC_NOP1
9a0b5817
GH
68/* Use inline assembly to define this because the nops are defined
69 as inline assembly strings in the include files and we cannot
70 get them easily into strings. */
121d7bf5 71asm("\t.section .rodata, \"a\"\nintelnops: "
9a0b5817 72 GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6
f4be31ec
SR
73 GENERIC_NOP7 GENERIC_NOP8
74 "\t.previous");
121d7bf5
JB
75extern const unsigned char intelnops[];
76static const unsigned char *const intel_nops[ASM_NOP_MAX+1] = {
9a0b5817
GH
77 NULL,
78 intelnops,
79 intelnops + 1,
80 intelnops + 1 + 2,
81 intelnops + 1 + 2 + 3,
82 intelnops + 1 + 2 + 3 + 4,
83 intelnops + 1 + 2 + 3 + 4 + 5,
84 intelnops + 1 + 2 + 3 + 4 + 5 + 6,
85 intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
86};
d167a518
GH
87#endif
88
89#ifdef K8_NOP1
121d7bf5 90asm("\t.section .rodata, \"a\"\nk8nops: "
d167a518 91 K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
f4be31ec
SR
92 K8_NOP7 K8_NOP8
93 "\t.previous");
121d7bf5
JB
94extern const unsigned char k8nops[];
95static const unsigned char *const k8_nops[ASM_NOP_MAX+1] = {
9a0b5817
GH
96 NULL,
97 k8nops,
98 k8nops + 1,
99 k8nops + 1 + 2,
100 k8nops + 1 + 2 + 3,
101 k8nops + 1 + 2 + 3 + 4,
102 k8nops + 1 + 2 + 3 + 4 + 5,
103 k8nops + 1 + 2 + 3 + 4 + 5 + 6,
104 k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
105};
d167a518
GH
106#endif
107
108#ifdef K7_NOP1
121d7bf5 109asm("\t.section .rodata, \"a\"\nk7nops: "
d167a518 110 K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
f4be31ec
SR
111 K7_NOP7 K7_NOP8
112 "\t.previous");
121d7bf5
JB
113extern const unsigned char k7nops[];
114static const unsigned char *const k7_nops[ASM_NOP_MAX+1] = {
9a0b5817
GH
115 NULL,
116 k7nops,
117 k7nops + 1,
118 k7nops + 1 + 2,
119 k7nops + 1 + 2 + 3,
120 k7nops + 1 + 2 + 3 + 4,
121 k7nops + 1 + 2 + 3 + 4 + 5,
122 k7nops + 1 + 2 + 3 + 4 + 5 + 6,
123 k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
124};
d167a518
GH
125#endif
126
32c464f5
JB
127#ifdef P6_NOP1
128asm("\t.section .rodata, \"a\"\np6nops: "
129 P6_NOP1 P6_NOP2 P6_NOP3 P6_NOP4 P6_NOP5 P6_NOP6
f4be31ec
SR
130 P6_NOP7 P6_NOP8
131 "\t.previous");
32c464f5
JB
132extern const unsigned char p6nops[];
133static const unsigned char *const p6_nops[ASM_NOP_MAX+1] = {
134 NULL,
135 p6nops,
136 p6nops + 1,
137 p6nops + 1 + 2,
138 p6nops + 1 + 2 + 3,
139 p6nops + 1 + 2 + 3 + 4,
140 p6nops + 1 + 2 + 3 + 4 + 5,
141 p6nops + 1 + 2 + 3 + 4 + 5 + 6,
142 p6nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
143};
144#endif
145
d167a518
GH
146#ifdef CONFIG_X86_64
147
148extern char __vsyscall_0;
dfa60aba 149const unsigned char *const *find_nop_table(void)
d167a518 150{
f31d731e
PA
151 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
152 boot_cpu_has(X86_FEATURE_NOPL))
153 return p6_nops;
154 else
155 return k8_nops;
d167a518
GH
156}
157
158#else /* CONFIG_X86_64 */
159
dfa60aba 160const unsigned char *const *find_nop_table(void)
9a0b5817 161{
f31d731e
PA
162 if (boot_cpu_has(X86_FEATURE_K8))
163 return k8_nops;
164 else if (boot_cpu_has(X86_FEATURE_K7))
165 return k7_nops;
166 else if (boot_cpu_has(X86_FEATURE_NOPL))
167 return p6_nops;
168 else
169 return intel_nops;
9a0b5817
GH
170}
171
d167a518
GH
172#endif /* CONFIG_X86_64 */
173
ab144f5e 174/* Use this to add nops to a buffer, then text_poke the whole buffer. */
e587cadd 175void add_nops(void *insns, unsigned int len)
139ec7c4 176{
121d7bf5 177 const unsigned char *const *noptable = find_nop_table();
139ec7c4
RR
178
179 while (len > 0) {
180 unsigned int noplen = len;
181 if (noplen > ASM_NOP_MAX)
182 noplen = ASM_NOP_MAX;
ab144f5e 183 memcpy(insns, noptable[noplen], noplen);
139ec7c4
RR
184 insns += noplen;
185 len -= noplen;
186 }
187}
e587cadd 188EXPORT_SYMBOL_GPL(add_nops);
139ec7c4 189
d167a518 190extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
d167a518
GH
191extern u8 *__smp_locks[], *__smp_locks_end[];
192
9a0b5817
GH
193/* Replace instructions with better alternatives for this CPU type.
194 This runs before SMP is initialized to avoid SMP problems with
195 self modifying code. This implies that assymetric systems where
196 APs have less capabilities than the boot processor are not handled.
197 Tough. Make sure you disable such features by hand. */
198
199void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
200{
9a0b5817 201 struct alt_instr *a;
ab144f5e 202 char insnbuf[MAX_PATCH_LEN];
9a0b5817 203
77bf90ed 204 DPRINTK("%s: alt table %p -> %p\n", __func__, start, end);
9a0b5817 205 for (a = start; a < end; a++) {
ab144f5e 206 u8 *instr = a->instr;
9a0b5817 207 BUG_ON(a->replacementlen > a->instrlen);
ab144f5e 208 BUG_ON(a->instrlen > sizeof(insnbuf));
9a0b5817
GH
209 if (!boot_cpu_has(a->cpuid))
210 continue;
d167a518
GH
211#ifdef CONFIG_X86_64
212 /* vsyscall code is not mapped yet. resolve it manually. */
213 if (instr >= (u8 *)VSYSCALL_START && instr < (u8*)VSYSCALL_END) {
214 instr = __va(instr - (u8*)VSYSCALL_START + (u8*)__pa_symbol(&__vsyscall_0));
215 DPRINTK("%s: vsyscall fixup: %p => %p\n",
77bf90ed 216 __func__, a->instr, instr);
d167a518
GH
217 }
218#endif
ab144f5e
AK
219 memcpy(insnbuf, a->replacement, a->replacementlen);
220 add_nops(insnbuf + a->replacementlen,
221 a->instrlen - a->replacementlen);
e587cadd 222 text_poke_early(instr, insnbuf, a->instrlen);
9a0b5817
GH
223 }
224}
225
8ec4d41f
GH
226#ifdef CONFIG_SMP
227
9a0b5817
GH
228static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end)
229{
230 u8 **ptr;
231
3945dab4 232 mutex_lock(&text_mutex);
9a0b5817
GH
233 for (ptr = start; ptr < end; ptr++) {
234 if (*ptr < text)
235 continue;
236 if (*ptr > text_end)
237 continue;
f88f07e0
MD
238 /* turn DS segment override prefix into lock prefix */
239 text_poke(*ptr, ((unsigned char []){0xf0}), 1);
9a0b5817 240 };
3945dab4 241 mutex_unlock(&text_mutex);
9a0b5817
GH
242}
243
244static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end)
245{
9a0b5817
GH
246 u8 **ptr;
247
b7fb4af0
JF
248 if (noreplace_smp)
249 return;
250
3945dab4 251 mutex_lock(&text_mutex);
9a0b5817
GH
252 for (ptr = start; ptr < end; ptr++) {
253 if (*ptr < text)
254 continue;
255 if (*ptr > text_end)
256 continue;
f88f07e0
MD
257 /* turn lock prefix into DS segment override prefix */
258 text_poke(*ptr, ((unsigned char []){0x3E}), 1);
9a0b5817 259 };
3945dab4 260 mutex_unlock(&text_mutex);
9a0b5817
GH
261}
262
263struct smp_alt_module {
264 /* what is this ??? */
265 struct module *mod;
266 char *name;
267
268 /* ptrs to lock prefixes */
269 u8 **locks;
270 u8 **locks_end;
271
272 /* .text segment, needed to avoid patching init code ;) */
273 u8 *text;
274 u8 *text_end;
275
276 struct list_head next;
277};
278static LIST_HEAD(smp_alt_modules);
2f1dafe5 279static DEFINE_MUTEX(smp_alt);
ca74a6f8 280static int smp_mode = 1; /* protected by smp_alt */
9a0b5817 281
9a0b5817
GH
282void alternatives_smp_module_add(struct module *mod, char *name,
283 void *locks, void *locks_end,
284 void *text, void *text_end)
285{
286 struct smp_alt_module *smp;
9a0b5817 287
b7fb4af0
JF
288 if (noreplace_smp)
289 return;
290
9a0b5817
GH
291 if (smp_alt_once) {
292 if (boot_cpu_has(X86_FEATURE_UP))
293 alternatives_smp_unlock(locks, locks_end,
294 text, text_end);
295 return;
296 }
297
298 smp = kzalloc(sizeof(*smp), GFP_KERNEL);
299 if (NULL == smp)
300 return; /* we'll run the (safe but slow) SMP code then ... */
301
302 smp->mod = mod;
303 smp->name = name;
304 smp->locks = locks;
305 smp->locks_end = locks_end;
306 smp->text = text;
307 smp->text_end = text_end;
308 DPRINTK("%s: locks %p -> %p, text %p -> %p, name %s\n",
77bf90ed 309 __func__, smp->locks, smp->locks_end,
9a0b5817
GH
310 smp->text, smp->text_end, smp->name);
311
2f1dafe5 312 mutex_lock(&smp_alt);
9a0b5817
GH
313 list_add_tail(&smp->next, &smp_alt_modules);
314 if (boot_cpu_has(X86_FEATURE_UP))
315 alternatives_smp_unlock(smp->locks, smp->locks_end,
316 smp->text, smp->text_end);
2f1dafe5 317 mutex_unlock(&smp_alt);
9a0b5817
GH
318}
319
320void alternatives_smp_module_del(struct module *mod)
321{
322 struct smp_alt_module *item;
9a0b5817 323
b7fb4af0 324 if (smp_alt_once || noreplace_smp)
9a0b5817
GH
325 return;
326
2f1dafe5 327 mutex_lock(&smp_alt);
9a0b5817
GH
328 list_for_each_entry(item, &smp_alt_modules, next) {
329 if (mod != item->mod)
330 continue;
331 list_del(&item->next);
2f1dafe5 332 mutex_unlock(&smp_alt);
77bf90ed 333 DPRINTK("%s: %s\n", __func__, item->name);
9a0b5817
GH
334 kfree(item);
335 return;
336 }
2f1dafe5 337 mutex_unlock(&smp_alt);
9a0b5817
GH
338}
339
340void alternatives_smp_switch(int smp)
341{
342 struct smp_alt_module *mod;
9a0b5817 343
3047e99e
IM
344#ifdef CONFIG_LOCKDEP
345 /*
17abecfe
IM
346 * Older binutils section handling bug prevented
347 * alternatives-replacement from working reliably.
348 *
349 * If this still occurs then you should see a hang
350 * or crash shortly after this line:
3047e99e 351 */
17abecfe 352 printk("lockdep: fixing up alternatives.\n");
3047e99e
IM
353#endif
354
b7fb4af0 355 if (noreplace_smp || smp_alt_once)
9a0b5817
GH
356 return;
357 BUG_ON(!smp && (num_online_cpus() > 1));
358
2f1dafe5 359 mutex_lock(&smp_alt);
ca74a6f8
AK
360
361 /*
362 * Avoid unnecessary switches because it forces JIT based VMs to
363 * throw away all cached translations, which can be quite costly.
364 */
365 if (smp == smp_mode) {
366 /* nothing */
367 } else if (smp) {
9a0b5817 368 printk(KERN_INFO "SMP alternatives: switching to SMP code\n");
53756d37
JF
369 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
370 clear_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
9a0b5817
GH
371 list_for_each_entry(mod, &smp_alt_modules, next)
372 alternatives_smp_lock(mod->locks, mod->locks_end,
373 mod->text, mod->text_end);
374 } else {
375 printk(KERN_INFO "SMP alternatives: switching to UP code\n");
53756d37
JF
376 set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
377 set_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
9a0b5817
GH
378 list_for_each_entry(mod, &smp_alt_modules, next)
379 alternatives_smp_unlock(mod->locks, mod->locks_end,
380 mod->text, mod->text_end);
381 }
ca74a6f8 382 smp_mode = smp;
2f1dafe5 383 mutex_unlock(&smp_alt);
9a0b5817
GH
384}
385
8ec4d41f
GH
386#endif
387
139ec7c4 388#ifdef CONFIG_PARAVIRT
98de032b
JF
389void apply_paravirt(struct paravirt_patch_site *start,
390 struct paravirt_patch_site *end)
139ec7c4 391{
98de032b 392 struct paravirt_patch_site *p;
ab144f5e 393 char insnbuf[MAX_PATCH_LEN];
139ec7c4 394
959b4fdf
JF
395 if (noreplace_paravirt)
396 return;
397
139ec7c4
RR
398 for (p = start; p < end; p++) {
399 unsigned int used;
400
ab144f5e 401 BUG_ON(p->len > MAX_PATCH_LEN);
d34fda4a
CW
402 /* prep the buffer with the original instructions */
403 memcpy(insnbuf, p->instr, p->len);
93b1eab3
JF
404 used = pv_init_ops.patch(p->instrtype, p->clobbers, insnbuf,
405 (unsigned long)p->instr, p->len);
7f63c41c 406
63f70270
JF
407 BUG_ON(used > p->len);
408
139ec7c4 409 /* Pad the rest with nops */
ab144f5e 410 add_nops(insnbuf + used, p->len - used);
e587cadd 411 text_poke_early(p->instr, insnbuf, p->len);
139ec7c4 412 }
139ec7c4 413}
98de032b 414extern struct paravirt_patch_site __start_parainstructions[],
139ec7c4
RR
415 __stop_parainstructions[];
416#endif /* CONFIG_PARAVIRT */
417
9a0b5817
GH
418void __init alternative_instructions(void)
419{
8f4e956b
AK
420 /* The patching is not fully atomic, so try to avoid local interruptions
421 that might execute the to be patched code.
422 Other CPUs are not running. */
423 stop_nmi();
123aa76e
AK
424
425 /*
426 * Don't stop machine check exceptions while patching.
427 * MCEs only happen when something got corrupted and in this
428 * case we must do something about the corruption.
429 * Ignoring it is worse than a unlikely patching race.
430 * Also machine checks tend to be broadcast and if one CPU
431 * goes into machine check the others follow quickly, so we don't
432 * expect a machine check to cause undue problems during to code
433 * patching.
434 */
8f4e956b 435
9a0b5817
GH
436 apply_alternatives(__alt_instructions, __alt_instructions_end);
437
438 /* switch to patch-once-at-boottime-only mode and free the
439 * tables in case we know the number of CPUs will never ever
440 * change */
441#ifdef CONFIG_HOTPLUG_CPU
442 if (num_possible_cpus() < 2)
443 smp_alt_once = 1;
9a0b5817
GH
444#endif
445
8ec4d41f 446#ifdef CONFIG_SMP
9a0b5817
GH
447 if (smp_alt_once) {
448 if (1 == num_possible_cpus()) {
449 printk(KERN_INFO "SMP alternatives: switching to UP code\n");
53756d37
JF
450 set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
451 set_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
452
9a0b5817
GH
453 alternatives_smp_unlock(__smp_locks, __smp_locks_end,
454 _text, _etext);
455 }
9a0b5817 456 } else {
9a0b5817
GH
457 alternatives_smp_module_add(NULL, "core kernel",
458 __smp_locks, __smp_locks_end,
459 _text, _etext);
ca74a6f8
AK
460
461 /* Only switch to UP mode if we don't immediately boot others */
649c6653 462 if (num_present_cpus() == 1 || setup_max_cpus <= 1)
ca74a6f8 463 alternatives_smp_switch(0);
9a0b5817 464 }
8ec4d41f 465#endif
441d40dc 466 apply_paravirt(__parainstructions, __parainstructions_end);
8f4e956b 467
f68fd5f4
FW
468 if (smp_alt_once)
469 free_init_pages("SMP alternatives",
470 (unsigned long)__smp_locks,
471 (unsigned long)__smp_locks_end);
472
8f4e956b 473 restart_nmi();
9a0b5817 474}
19d36ccd 475
e587cadd
MD
476/**
477 * text_poke_early - Update instructions on a live kernel at boot time
478 * @addr: address to modify
479 * @opcode: source of the copy
480 * @len: length to copy
481 *
19d36ccd
AK
482 * When you use this code to patch more than one byte of an instruction
483 * you need to make sure that other CPUs cannot execute this code in parallel.
e587cadd
MD
484 * Also no thread must be currently preempted in the middle of these
485 * instructions. And on the local CPU you need to be protected again NMI or MCE
486 * handlers seeing an inconsistent instruction while you patch.
19d36ccd 487 */
e587cadd 488void *text_poke_early(void *addr, const void *opcode, size_t len)
19d36ccd 489{
e587cadd
MD
490 unsigned long flags;
491 local_irq_save(flags);
19d36ccd 492 memcpy(addr, opcode, len);
e587cadd
MD
493 local_irq_restore(flags);
494 sync_core();
495 /* Could also do a CLFLUSH here to speed up CPU recovery; but
496 that causes hangs on some VIA CPUs. */
497 return addr;
498}
499
500/**
501 * text_poke - Update instructions on a live kernel
502 * @addr: address to modify
503 * @opcode: source of the copy
504 * @len: length to copy
505 *
506 * Only atomic text poke/set should be allowed when not doing early patching.
507 * It means the size must be writable atomically and the address must be aligned
508 * in a way that permits an atomic write. It also makes sure we fit on a single
509 * page.
78ff7fae
MH
510 *
511 * Note: Must be called under text_mutex.
e587cadd
MD
512 */
513void *__kprobes text_poke(void *addr, const void *opcode, size_t len)
514{
78ff7fae 515 unsigned long flags;
e587cadd 516 char *vaddr;
b7b66baa
MD
517 struct page *pages[2];
518 int i;
e587cadd 519
b7b66baa
MD
520 if (!core_kernel_text((unsigned long)addr)) {
521 pages[0] = vmalloc_to_page(addr);
522 pages[1] = vmalloc_to_page(addr + PAGE_SIZE);
15a601eb 523 } else {
b7b66baa 524 pages[0] = virt_to_page(addr);
00c6b2d5 525 WARN_ON(!PageReserved(pages[0]));
b7b66baa 526 pages[1] = virt_to_page(addr + PAGE_SIZE);
e587cadd 527 }
b7b66baa 528 BUG_ON(!pages[0]);
7cf49427 529 local_irq_save(flags);
78ff7fae
MH
530 set_fixmap(FIX_TEXT_POKE0, page_to_phys(pages[0]));
531 if (pages[1])
532 set_fixmap(FIX_TEXT_POKE1, page_to_phys(pages[1]));
533 vaddr = (char *)fix_to_virt(FIX_TEXT_POKE0);
b7b66baa 534 memcpy(&vaddr[(unsigned long)addr & ~PAGE_MASK], opcode, len);
78ff7fae
MH
535 clear_fixmap(FIX_TEXT_POKE0);
536 if (pages[1])
537 clear_fixmap(FIX_TEXT_POKE1);
538 local_flush_tlb();
19d36ccd 539 sync_core();
a534b679
AK
540 /* Could also do a CLFLUSH here to speed up CPU recovery; but
541 that causes hangs on some VIA CPUs. */
b7b66baa
MD
542 for (i = 0; i < len; i++)
543 BUG_ON(((char *)addr)[i] != ((char *)opcode)[i]);
7cf49427 544 local_irq_restore(flags);
e587cadd 545 return addr;
19d36ccd 546}