x86: Fix alternatives and kprobes to remap write-protected kernel text
[linux-2.6-block.git] / arch / i386 / kernel / alternative.c
CommitLineData
9a0b5817 1#include <linux/module.h>
f6a57033 2#include <linux/sched.h>
9a0b5817
GH
3#include <linux/spinlock.h>
4#include <linux/list.h>
19d36ccd
AK
5#include <linux/kprobes.h>
6#include <linux/mm.h>
7#include <linux/vmalloc.h>
9a0b5817
GH
8#include <asm/alternative.h>
9#include <asm/sections.h>
19d36ccd 10#include <asm/pgtable.h>
9a0b5817 11
09488165
JB
12#ifdef CONFIG_HOTPLUG_CPU
13static int smp_alt_once;
9a0b5817 14
d167a518
GH
15static int __init bootonly(char *str)
16{
17 smp_alt_once = 1;
18 return 1;
19}
b7fb4af0 20__setup("smp-alt-boot", bootonly);
09488165
JB
21#else
22#define smp_alt_once 1
23#endif
24
25static int debug_alternative;
b7fb4af0 26
d167a518
GH
27static int __init debug_alt(char *str)
28{
29 debug_alternative = 1;
30 return 1;
31}
d167a518
GH
32__setup("debug-alternative", debug_alt);
33
09488165
JB
34static int noreplace_smp;
35
b7fb4af0
JF
36static int __init setup_noreplace_smp(char *str)
37{
38 noreplace_smp = 1;
39 return 1;
40}
41__setup("noreplace-smp", setup_noreplace_smp);
42
959b4fdf
JF
43#ifdef CONFIG_PARAVIRT
44static int noreplace_paravirt = 0;
45
46static int __init setup_noreplace_paravirt(char *str)
47{
48 noreplace_paravirt = 1;
49 return 1;
50}
51__setup("noreplace-paravirt", setup_noreplace_paravirt);
52#endif
b7fb4af0 53
d167a518
GH
54#define DPRINTK(fmt, args...) if (debug_alternative) \
55 printk(KERN_DEBUG fmt, args)
56
57#ifdef GENERIC_NOP1
9a0b5817
GH
58/* Use inline assembly to define this because the nops are defined
59 as inline assembly strings in the include files and we cannot
60 get them easily into strings. */
61asm("\t.data\nintelnops: "
62 GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6
63 GENERIC_NOP7 GENERIC_NOP8);
d167a518 64extern unsigned char intelnops[];
9a0b5817
GH
65static unsigned char *intel_nops[ASM_NOP_MAX+1] = {
66 NULL,
67 intelnops,
68 intelnops + 1,
69 intelnops + 1 + 2,
70 intelnops + 1 + 2 + 3,
71 intelnops + 1 + 2 + 3 + 4,
72 intelnops + 1 + 2 + 3 + 4 + 5,
73 intelnops + 1 + 2 + 3 + 4 + 5 + 6,
74 intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
75};
d167a518
GH
76#endif
77
78#ifdef K8_NOP1
79asm("\t.data\nk8nops: "
80 K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
81 K8_NOP7 K8_NOP8);
82extern unsigned char k8nops[];
9a0b5817
GH
83static unsigned char *k8_nops[ASM_NOP_MAX+1] = {
84 NULL,
85 k8nops,
86 k8nops + 1,
87 k8nops + 1 + 2,
88 k8nops + 1 + 2 + 3,
89 k8nops + 1 + 2 + 3 + 4,
90 k8nops + 1 + 2 + 3 + 4 + 5,
91 k8nops + 1 + 2 + 3 + 4 + 5 + 6,
92 k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
93};
d167a518
GH
94#endif
95
96#ifdef K7_NOP1
97asm("\t.data\nk7nops: "
98 K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
99 K7_NOP7 K7_NOP8);
100extern unsigned char k7nops[];
9a0b5817
GH
101static unsigned char *k7_nops[ASM_NOP_MAX+1] = {
102 NULL,
103 k7nops,
104 k7nops + 1,
105 k7nops + 1 + 2,
106 k7nops + 1 + 2 + 3,
107 k7nops + 1 + 2 + 3 + 4,
108 k7nops + 1 + 2 + 3 + 4 + 5,
109 k7nops + 1 + 2 + 3 + 4 + 5 + 6,
110 k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
111};
d167a518
GH
112#endif
113
114#ifdef CONFIG_X86_64
115
116extern char __vsyscall_0;
117static inline unsigned char** find_nop_table(void)
118{
119 return k8_nops;
120}
121
122#else /* CONFIG_X86_64 */
123
9a0b5817
GH
124static struct nop {
125 int cpuid;
126 unsigned char **noptable;
127} noptypes[] = {
128 { X86_FEATURE_K8, k8_nops },
129 { X86_FEATURE_K7, k7_nops },
130 { -1, NULL }
131};
132
9a0b5817
GH
133static unsigned char** find_nop_table(void)
134{
135 unsigned char **noptable = intel_nops;
136 int i;
137
138 for (i = 0; noptypes[i].cpuid >= 0; i++) {
139 if (boot_cpu_has(noptypes[i].cpuid)) {
140 noptable = noptypes[i].noptable;
141 break;
142 }
143 }
144 return noptable;
145}
146
d167a518
GH
147#endif /* CONFIG_X86_64 */
148
139ec7c4
RR
149static void nop_out(void *insns, unsigned int len)
150{
151 unsigned char **noptable = find_nop_table();
152
153 while (len > 0) {
154 unsigned int noplen = len;
155 if (noplen > ASM_NOP_MAX)
156 noplen = ASM_NOP_MAX;
19d36ccd 157 text_poke(insns, noptable[noplen], noplen);
139ec7c4
RR
158 insns += noplen;
159 len -= noplen;
160 }
161}
162
d167a518 163extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
d167a518
GH
164extern u8 *__smp_locks[], *__smp_locks_end[];
165
9a0b5817
GH
166/* Replace instructions with better alternatives for this CPU type.
167 This runs before SMP is initialized to avoid SMP problems with
168 self modifying code. This implies that assymetric systems where
169 APs have less capabilities than the boot processor are not handled.
170 Tough. Make sure you disable such features by hand. */
171
172void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
173{
9a0b5817 174 struct alt_instr *a;
d167a518 175 u8 *instr;
139ec7c4 176 int diff;
9a0b5817
GH
177
178 DPRINTK("%s: alt table %p -> %p\n", __FUNCTION__, start, end);
179 for (a = start; a < end; a++) {
180 BUG_ON(a->replacementlen > a->instrlen);
181 if (!boot_cpu_has(a->cpuid))
182 continue;
d167a518
GH
183 instr = a->instr;
184#ifdef CONFIG_X86_64
185 /* vsyscall code is not mapped yet. resolve it manually. */
186 if (instr >= (u8 *)VSYSCALL_START && instr < (u8*)VSYSCALL_END) {
187 instr = __va(instr - (u8*)VSYSCALL_START + (u8*)__pa_symbol(&__vsyscall_0));
188 DPRINTK("%s: vsyscall fixup: %p => %p\n",
189 __FUNCTION__, a->instr, instr);
190 }
191#endif
192 memcpy(instr, a->replacement, a->replacementlen);
9a0b5817 193 diff = a->instrlen - a->replacementlen;
139ec7c4 194 nop_out(instr + a->replacementlen, diff);
9a0b5817
GH
195 }
196}
197
8ec4d41f
GH
198#ifdef CONFIG_SMP
199
9a0b5817
GH
200static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end)
201{
202 u8 **ptr;
203
204 for (ptr = start; ptr < end; ptr++) {
205 if (*ptr < text)
206 continue;
207 if (*ptr > text_end)
208 continue;
19d36ccd 209 text_poke(*ptr, ((unsigned char []){0xf0}), 1); /* add lock prefix */
9a0b5817
GH
210 };
211}
212
213static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end)
214{
9a0b5817
GH
215 u8 **ptr;
216
b7fb4af0
JF
217 if (noreplace_smp)
218 return;
219
9a0b5817
GH
220 for (ptr = start; ptr < end; ptr++) {
221 if (*ptr < text)
222 continue;
223 if (*ptr > text_end)
224 continue;
139ec7c4 225 nop_out(*ptr, 1);
9a0b5817
GH
226 };
227}
228
229struct smp_alt_module {
230 /* what is this ??? */
231 struct module *mod;
232 char *name;
233
234 /* ptrs to lock prefixes */
235 u8 **locks;
236 u8 **locks_end;
237
238 /* .text segment, needed to avoid patching init code ;) */
239 u8 *text;
240 u8 *text_end;
241
242 struct list_head next;
243};
244static LIST_HEAD(smp_alt_modules);
245static DEFINE_SPINLOCK(smp_alt);
246
9a0b5817
GH
247void alternatives_smp_module_add(struct module *mod, char *name,
248 void *locks, void *locks_end,
249 void *text, void *text_end)
250{
251 struct smp_alt_module *smp;
252 unsigned long flags;
253
b7fb4af0
JF
254 if (noreplace_smp)
255 return;
256
9a0b5817
GH
257 if (smp_alt_once) {
258 if (boot_cpu_has(X86_FEATURE_UP))
259 alternatives_smp_unlock(locks, locks_end,
260 text, text_end);
261 return;
262 }
263
264 smp = kzalloc(sizeof(*smp), GFP_KERNEL);
265 if (NULL == smp)
266 return; /* we'll run the (safe but slow) SMP code then ... */
267
268 smp->mod = mod;
269 smp->name = name;
270 smp->locks = locks;
271 smp->locks_end = locks_end;
272 smp->text = text;
273 smp->text_end = text_end;
274 DPRINTK("%s: locks %p -> %p, text %p -> %p, name %s\n",
275 __FUNCTION__, smp->locks, smp->locks_end,
276 smp->text, smp->text_end, smp->name);
277
278 spin_lock_irqsave(&smp_alt, flags);
279 list_add_tail(&smp->next, &smp_alt_modules);
280 if (boot_cpu_has(X86_FEATURE_UP))
281 alternatives_smp_unlock(smp->locks, smp->locks_end,
282 smp->text, smp->text_end);
283 spin_unlock_irqrestore(&smp_alt, flags);
284}
285
286void alternatives_smp_module_del(struct module *mod)
287{
288 struct smp_alt_module *item;
289 unsigned long flags;
290
b7fb4af0 291 if (smp_alt_once || noreplace_smp)
9a0b5817
GH
292 return;
293
294 spin_lock_irqsave(&smp_alt, flags);
295 list_for_each_entry(item, &smp_alt_modules, next) {
296 if (mod != item->mod)
297 continue;
298 list_del(&item->next);
299 spin_unlock_irqrestore(&smp_alt, flags);
300 DPRINTK("%s: %s\n", __FUNCTION__, item->name);
301 kfree(item);
302 return;
303 }
304 spin_unlock_irqrestore(&smp_alt, flags);
305}
306
307void alternatives_smp_switch(int smp)
308{
309 struct smp_alt_module *mod;
310 unsigned long flags;
311
3047e99e
IM
312#ifdef CONFIG_LOCKDEP
313 /*
314 * A not yet fixed binutils section handling bug prevents
315 * alternatives-replacement from working reliably, so turn
316 * it off:
317 */
318 printk("lockdep: not fixing up alternatives.\n");
319 return;
320#endif
321
b7fb4af0 322 if (noreplace_smp || smp_alt_once)
9a0b5817
GH
323 return;
324 BUG_ON(!smp && (num_online_cpus() > 1));
325
326 spin_lock_irqsave(&smp_alt, flags);
327 if (smp) {
328 printk(KERN_INFO "SMP alternatives: switching to SMP code\n");
329 clear_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability);
330 clear_bit(X86_FEATURE_UP, cpu_data[0].x86_capability);
9a0b5817
GH
331 list_for_each_entry(mod, &smp_alt_modules, next)
332 alternatives_smp_lock(mod->locks, mod->locks_end,
333 mod->text, mod->text_end);
334 } else {
335 printk(KERN_INFO "SMP alternatives: switching to UP code\n");
336 set_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability);
337 set_bit(X86_FEATURE_UP, cpu_data[0].x86_capability);
9a0b5817
GH
338 list_for_each_entry(mod, &smp_alt_modules, next)
339 alternatives_smp_unlock(mod->locks, mod->locks_end,
340 mod->text, mod->text_end);
341 }
342 spin_unlock_irqrestore(&smp_alt, flags);
343}
344
8ec4d41f
GH
345#endif
346
139ec7c4 347#ifdef CONFIG_PARAVIRT
98de032b
JF
348void apply_paravirt(struct paravirt_patch_site *start,
349 struct paravirt_patch_site *end)
139ec7c4 350{
98de032b 351 struct paravirt_patch_site *p;
139ec7c4 352
959b4fdf
JF
353 if (noreplace_paravirt)
354 return;
355
139ec7c4
RR
356 for (p = start; p < end; p++) {
357 unsigned int used;
358
359 used = paravirt_ops.patch(p->instrtype, p->clobbers, p->instr,
360 p->len);
7f63c41c 361
63f70270
JF
362 BUG_ON(used > p->len);
363
139ec7c4
RR
364 /* Pad the rest with nops */
365 nop_out(p->instr + used, p->len - used);
366 }
139ec7c4 367}
98de032b 368extern struct paravirt_patch_site __start_parainstructions[],
139ec7c4
RR
369 __stop_parainstructions[];
370#endif /* CONFIG_PARAVIRT */
371
9a0b5817
GH
372void __init alternative_instructions(void)
373{
e51959fa 374 unsigned long flags;
e51959fa
ZA
375
376 local_irq_save(flags);
9a0b5817
GH
377 apply_alternatives(__alt_instructions, __alt_instructions_end);
378
379 /* switch to patch-once-at-boottime-only mode and free the
380 * tables in case we know the number of CPUs will never ever
381 * change */
382#ifdef CONFIG_HOTPLUG_CPU
383 if (num_possible_cpus() < 2)
384 smp_alt_once = 1;
9a0b5817
GH
385#endif
386
8ec4d41f 387#ifdef CONFIG_SMP
9a0b5817
GH
388 if (smp_alt_once) {
389 if (1 == num_possible_cpus()) {
390 printk(KERN_INFO "SMP alternatives: switching to UP code\n");
391 set_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability);
392 set_bit(X86_FEATURE_UP, cpu_data[0].x86_capability);
9a0b5817
GH
393 alternatives_smp_unlock(__smp_locks, __smp_locks_end,
394 _text, _etext);
395 }
396 free_init_pages("SMP alternatives",
e3ebadd9
LT
397 (unsigned long)__smp_locks,
398 (unsigned long)__smp_locks_end);
9a0b5817 399 } else {
9a0b5817
GH
400 alternatives_smp_module_add(NULL, "core kernel",
401 __smp_locks, __smp_locks_end,
402 _text, _etext);
403 alternatives_smp_switch(0);
404 }
8ec4d41f 405#endif
441d40dc 406 apply_paravirt(__parainstructions, __parainstructions_end);
e51959fa 407 local_irq_restore(flags);
9a0b5817 408}
19d36ccd
AK
409
410/*
411 * Warning:
412 * When you use this code to patch more than one byte of an instruction
413 * you need to make sure that other CPUs cannot execute this code in parallel.
414 * Also no thread must be currently preempted in the middle of these instructions.
415 * And on the local CPU you need to be protected again NMI or MCE handlers
416 * seeing an inconsistent instruction while you patch.
417 */
418void __kprobes text_poke(void *oaddr, unsigned char *opcode, int len)
419{
420 u8 *addr = oaddr;
421 if (!pte_write(*lookup_address((unsigned long)addr))) {
422 struct page *p[2] = { virt_to_page(addr), virt_to_page(addr+PAGE_SIZE) };
423 addr = vmap(p, 2, VM_MAP, PAGE_KERNEL);
424 if (!addr)
425 return;
426 addr += ((unsigned long)oaddr) % PAGE_SIZE;
427 }
428 memcpy(addr, opcode, len);
429 sync_core();
430 /* Not strictly needed, but can speed CPU recovery up. Ignore cross cacheline
431 case. */
432 if (cpu_has_clflush)
433 asm("clflush (%0) " :: "r" (oaddr) : "memory");
434 if (addr != oaddr)
435 vunmap(addr);
436}