x86: honor _PAGE_PSE bit on page walks
[linux-2.6-block.git] / arch / x86 / kernel / traps_32.c
CommitLineData
1da177e4 1/*
1da177e4
LT
2 * Copyright (C) 1991, 1992 Linus Torvalds
3 *
4 * Pentium III FXSR, SSE support
5 * Gareth Hughes <gareth@valinux.com>, May 2000
6 */
7
8/*
9 * 'Traps.c' handles hardware traps and faults after we have saved some
10 * state in 'asm.s'.
11 */
1da177e4
LT
12#include <linux/sched.h>
13#include <linux/kernel.h>
14#include <linux/string.h>
15#include <linux/errno.h>
16#include <linux/timer.h>
17#include <linux/mm.h>
18#include <linux/init.h>
19#include <linux/delay.h>
20#include <linux/spinlock.h>
21#include <linux/interrupt.h>
22#include <linux/highmem.h>
23#include <linux/kallsyms.h>
24#include <linux/ptrace.h>
25#include <linux/utsname.h>
26#include <linux/kprobes.h>
6e274d14 27#include <linux/kexec.h>
176a2718 28#include <linux/unwind.h>
1e2af92e 29#include <linux/uaccess.h>
a36df98a 30#include <linux/nmi.h>
91768d6c 31#include <linux/bug.h>
1da177e4
LT
32
33#ifdef CONFIG_EISA
34#include <linux/ioport.h>
35#include <linux/eisa.h>
36#endif
37
38#ifdef CONFIG_MCA
39#include <linux/mca.h>
40#endif
41
c0d12172
DJ
42#if defined(CONFIG_EDAC)
43#include <linux/edac.h>
44#endif
45
1da177e4
LT
46#include <asm/processor.h>
47#include <asm/system.h>
1da177e4
LT
48#include <asm/io.h>
49#include <asm/atomic.h>
50#include <asm/debugreg.h>
51#include <asm/desc.h>
52#include <asm/i387.h>
53#include <asm/nmi.h>
176a2718 54#include <asm/unwind.h>
1da177e4
LT
55#include <asm/smp.h>
56#include <asm/arch_hooks.h>
1eeb66a1 57#include <linux/kdebug.h>
2b14a78c 58#include <asm/stacktrace.h>
1da177e4 59
1da177e4
LT
60#include <linux/module.h>
61
62#include "mach_traps.h"
63
29cbc78b
AK
64int panic_on_unrecovered_nmi;
65
1da177e4
LT
66asmlinkage int system_call(void);
67
1da177e4
LT
68/* Do we ignore FPU interrupts ? */
69char ignore_fpu_irq = 0;
70
71/*
72 * The IDT has to be page-aligned to simplify the Pentium
73 * F0 0F bug workaround.. We have a special link segment
74 * for this.
75 */
76struct desc_struct idt_table[256] __attribute__((__section__(".data.idt"))) = { {0, 0}, };
77
78asmlinkage void divide_error(void);
79asmlinkage void debug(void);
80asmlinkage void nmi(void);
81asmlinkage void int3(void);
82asmlinkage void overflow(void);
83asmlinkage void bounds(void);
84asmlinkage void invalid_op(void);
85asmlinkage void device_not_available(void);
86asmlinkage void coprocessor_segment_overrun(void);
87asmlinkage void invalid_TSS(void);
88asmlinkage void segment_not_present(void);
89asmlinkage void stack_segment(void);
90asmlinkage void general_protection(void);
91asmlinkage void page_fault(void);
92asmlinkage void coprocessor_error(void);
93asmlinkage void simd_coprocessor_error(void);
94asmlinkage void alignment_check(void);
95asmlinkage void spurious_interrupt_bug(void);
96asmlinkage void machine_check(void);
97
0741f4d2 98int kstack_depth_to_print = 24;
86c41837 99static unsigned int code_bytes = 64;
e041c683 100
36ad4885 101static inline int valid_stack_ptr(struct thread_info *tinfo, void *p, unsigned size)
1da177e4
LT
102{
103 return p > (void *)tinfo &&
36ad4885 104 p <= (void *)tinfo + THREAD_SIZE - size;
1da177e4
LT
105}
106
36ad4885
LT
107/* The form of the top of the frame on the stack */
108struct stack_frame {
109 struct stack_frame *next_frame;
110 unsigned long return_address;
111};
112
1da177e4 113static inline unsigned long print_context_stack(struct thread_info *tinfo,
7aa89746 114 unsigned long *stack, unsigned long ebp,
9689ba8a 115 const struct stacktrace_ops *ops, void *data)
1da177e4 116{
1da177e4 117#ifdef CONFIG_FRAME_POINTER
36ad4885
LT
118 struct stack_frame *frame = (struct stack_frame *)ebp;
119 while (valid_stack_ptr(tinfo, frame, sizeof(*frame))) {
120 struct stack_frame *next;
121 unsigned long addr;
122
123 addr = frame->return_address;
2b14a78c 124 ops->address(data, addr);
b88d4f1d
IM
125 /*
126 * break out of recursive entries (such as
808dbbb6
LT
127 * end_of_stack_stop_unwind_function). Also,
128 * we can never allow a frame pointer to
129 * move downwards!
36ad4885
LT
130 */
131 next = frame->next_frame;
132 if (next <= frame)
b88d4f1d 133 break;
36ad4885 134 frame = next;
1da177e4
LT
135 }
136#else
36ad4885
LT
137 while (valid_stack_ptr(tinfo, stack, sizeof(*stack))) {
138 unsigned long addr;
139
1da177e4 140 addr = *stack++;
7aa89746 141 if (__kernel_text_address(addr))
2b14a78c 142 ops->address(data, addr);
1da177e4
LT
143 }
144#endif
145 return ebp;
146}
147
b615ebda
AK
148#define MSG(msg) ops->warning(data, msg)
149
2b14a78c
AK
150void dump_trace(struct task_struct *task, struct pt_regs *regs,
151 unsigned long *stack,
9689ba8a 152 const struct stacktrace_ops *ops, void *data)
1da177e4 153{
a32cf397 154 unsigned long ebp = 0;
1da177e4
LT
155
156 if (!task)
157 task = current;
158
a32cf397 159 if (!stack) {
2b14a78c
AK
160 unsigned long dummy;
161 stack = &dummy;
028a690a 162 if (task != current)
2b14a78c 163 stack = (unsigned long *)task->thread.esp;
176a2718
JB
164 }
165
a32cf397
AK
166#ifdef CONFIG_FRAME_POINTER
167 if (!ebp) {
168 if (task == current) {
169 /* Grab ebp right from our regs */
170 asm ("movl %%ebp, %0" : "=r" (ebp) : );
171 } else {
172 /* ebp is the last reg pushed by switch_to */
173 ebp = *(unsigned long *) task->thread.esp;
174 }
1da177e4 175 }
a32cf397 176#endif
1da177e4
LT
177
178 while (1) {
179 struct thread_info *context;
180 context = (struct thread_info *)
181 ((unsigned long)stack & (~(THREAD_SIZE - 1)));
2b14a78c
AK
182 ebp = print_context_stack(context, stack, ebp, ops, data);
183 /* Should be after the line below, but somewhere
184 in early boot context comes out corrupted and we
185 can't reference it -AK */
186 if (ops->stack(data, "IRQ") < 0)
187 break;
1da177e4
LT
188 stack = (unsigned long*)context->previous_esp;
189 if (!stack)
190 break;
a36df98a 191 touch_nmi_watchdog();
1da177e4
LT
192 }
193}
2b14a78c
AK
194EXPORT_SYMBOL(dump_trace);
195
196static void
197print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
198{
199 printk(data);
200 print_symbol(msg, symbol);
201 printk("\n");
202}
203
204static void print_trace_warning(void *data, char *msg)
205{
206 printk("%s%s\n", (char *)data, msg);
207}
208
209static int print_trace_stack(void *data, char *name)
210{
211 return 0;
212}
213
214/*
215 * Print one address/symbol entries per line.
216 */
217static void print_trace_address(void *data, unsigned long addr)
218{
219 printk("%s [<%08lx>] ", (char *)data, addr);
220 print_symbol("%s\n", addr);
601e6255 221 touch_nmi_watchdog();
2b14a78c
AK
222}
223
9689ba8a 224static const struct stacktrace_ops print_trace_ops = {
2b14a78c
AK
225 .warning = print_trace_warning,
226 .warning_symbol = print_trace_warning_symbol,
227 .stack = print_trace_stack,
228 .address = print_trace_address,
229};
230
231static void
232show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
233 unsigned long * stack, char *log_lvl)
234{
235 dump_trace(task, regs, stack, &print_trace_ops, log_lvl);
236 printk("%s =======================\n", log_lvl);
237}
1da177e4 238
2b14a78c
AK
239void show_trace(struct task_struct *task, struct pt_regs *regs,
240 unsigned long * stack)
7aa89746 241{
176a2718 242 show_trace_log_lvl(task, regs, stack, "");
7aa89746
CE
243}
244
176a2718
JB
245static void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
246 unsigned long *esp, char *log_lvl)
1da177e4
LT
247{
248 unsigned long *stack;
249 int i;
250
251 if (esp == NULL) {
252 if (task)
253 esp = (unsigned long*)task->thread.esp;
254 else
255 esp = (unsigned long *)&esp;
256 }
257
258 stack = esp;
259 for(i = 0; i < kstack_depth_to_print; i++) {
260 if (kstack_end(stack))
261 break;
75874d5c
CE
262 if (i && ((i % 8) == 0))
263 printk("\n%s ", log_lvl);
1da177e4
LT
264 printk("%08lx ", *stack++);
265 }
75874d5c 266 printk("\n%sCall Trace:\n", log_lvl);
176a2718 267 show_trace_log_lvl(task, regs, esp, log_lvl);
7aa89746
CE
268}
269
270void show_stack(struct task_struct *task, unsigned long *esp)
271{
75874d5c 272 printk(" ");
176a2718 273 show_stack_log_lvl(task, NULL, esp, "");
1da177e4
LT
274}
275
276/*
277 * The architecture-independent dump_stack generator
278 */
279void dump_stack(void)
280{
281 unsigned long stack;
282
176a2718 283 show_trace(current, NULL, &stack);
1da177e4
LT
284}
285
286EXPORT_SYMBOL(dump_stack);
287
288void show_registers(struct pt_regs *regs)
289{
290 int i;
9d975ebd 291
1da177e4 292 print_modules();
9d975ebd 293 __show_registers(regs, 0);
7e04a118
CE
294 printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)",
295 TASK_COMM_LEN, current->comm, current->pid,
c9f4f06d 296 current_thread_info(), current, task_thread_info(current));
1da177e4
LT
297 /*
298 * When in-kernel, we also print out the stack and code at the
299 * time of the fault..
300 */
9d975ebd 301 if (!user_mode_vm(regs)) {
11a4180c 302 u8 *eip;
86c41837
CE
303 unsigned int code_prologue = code_bytes * 43 / 64;
304 unsigned int code_len = code_bytes;
99325326 305 unsigned char c;
1da177e4 306
9c107805 307 printk("\n" KERN_EMERG "Stack: ");
9d975ebd 308 show_stack_log_lvl(NULL, regs, &regs->esp, KERN_EMERG);
1da177e4 309
9c107805 310 printk(KERN_EMERG "Code: ");
1da177e4 311
86c41837 312 eip = (u8 *)regs->eip - code_prologue;
11a4180c
AK
313 if (eip < (u8 *)PAGE_OFFSET ||
314 probe_kernel_address(eip, c)) {
99325326 315 /* try starting at EIP */
11a4180c 316 eip = (u8 *)regs->eip;
86c41837 317 code_len = code_len - code_prologue + 1;
99325326 318 }
86c41837 319 for (i = 0; i < code_len; i++, eip++) {
11a4180c
AK
320 if (eip < (u8 *)PAGE_OFFSET ||
321 probe_kernel_address(eip, c)) {
1da177e4
LT
322 printk(" Bad EIP value.");
323 break;
324 }
11a4180c 325 if (eip == (u8 *)regs->eip)
1da177e4
LT
326 printk("<%02x> ", c);
327 else
328 printk("%02x ", c);
329 }
330 }
331 printk("\n");
332}
333
91768d6c 334int is_valid_bugaddr(unsigned long eip)
1da177e4
LT
335{
336 unsigned short ud2;
1da177e4
LT
337
338 if (eip < PAGE_OFFSET)
91768d6c 339 return 0;
11a4180c 340 if (probe_kernel_address((unsigned short *)eip, ud2))
91768d6c 341 return 0;
1da177e4 342
91768d6c 343 return ud2 == 0x0b0f;
1da177e4
LT
344}
345
91768d6c
JF
346/*
347 * This is gone through when something in the kernel has done something bad and
348 * is about to be terminated.
349 */
1da177e4
LT
350void die(const char * str, struct pt_regs * regs, long err)
351{
352 static struct {
353 spinlock_t lock;
354 u32 lock_owner;
355 int lock_owner_depth;
356 } die = {
6cfd76a2 357 .lock = __SPIN_LOCK_UNLOCKED(die.lock),
1da177e4
LT
358 .lock_owner = -1,
359 .lock_owner_depth = 0
360 };
361 static int die_counter;
e43d674f 362 unsigned long flags;
1da177e4 363
dd287796
AM
364 oops_enter();
365
39c715b7 366 if (die.lock_owner != raw_smp_processor_id()) {
1da177e4 367 console_verbose();
e43d674f 368 spin_lock_irqsave(&die.lock, flags);
1da177e4
LT
369 die.lock_owner = smp_processor_id();
370 die.lock_owner_depth = 0;
371 bust_spinlocks(1);
372 }
e43d674f
JB
373 else
374 local_save_flags(flags);
1da177e4
LT
375
376 if (++die.lock_owner_depth < 3) {
7bee5c0f
RD
377 unsigned long esp;
378 unsigned short ss;
379
608e2619 380 report_bug(regs->eip, regs);
91768d6c 381
9aa8d719
PE
382 printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff,
383 ++die_counter);
1da177e4 384#ifdef CONFIG_PREEMPT
9aa8d719 385 printk("PREEMPT ");
1da177e4
LT
386#endif
387#ifdef CONFIG_SMP
388 printk("SMP ");
1da177e4
LT
389#endif
390#ifdef CONFIG_DEBUG_PAGEALLOC
391 printk("DEBUG_PAGEALLOC");
1da177e4 392#endif
9aa8d719
PE
393 printk("\n");
394
20c0d2d4
JB
395 if (notify_die(DIE_OOPS, str, regs, err,
396 current->thread.trap_no, SIGSEGV) !=
7bee5c0f 397 NOTIFY_STOP) {
20c0d2d4 398 show_registers(regs);
7bee5c0f
RD
399 /* Executive summary in case the oops scrolled away */
400 esp = (unsigned long) (&regs->esp);
401 savesegment(ss, ss);
402 if (user_mode(regs)) {
403 esp = regs->esp;
404 ss = regs->xss & 0xffff;
405 }
406 printk(KERN_EMERG "EIP: [<%08lx>] ", regs->eip);
407 print_symbol("%s", regs->eip);
408 printk(" SS:ESP %04x:%08lx\n", ss, esp);
409 }
20c0d2d4
JB
410 else
411 regs = NULL;
1da177e4 412 } else
9c107805 413 printk(KERN_EMERG "Recursive die() failure, output suppressed\n");
1da177e4
LT
414
415 bust_spinlocks(0);
416 die.lock_owner = -1;
bcdcd8e7 417 add_taint(TAINT_DIE);
e43d674f 418 spin_unlock_irqrestore(&die.lock, flags);
6e274d14 419
20c0d2d4
JB
420 if (!regs)
421 return;
422
6e274d14
AN
423 if (kexec_should_crash(current))
424 crash_kexec(regs);
425
1da177e4
LT
426 if (in_interrupt())
427 panic("Fatal exception in interrupt");
428
cea6a4ba 429 if (panic_on_oops)
012c437d 430 panic("Fatal exception");
cea6a4ba 431
dd287796 432 oops_exit();
1da177e4
LT
433 do_exit(SIGSEGV);
434}
435
436static inline void die_if_kernel(const char * str, struct pt_regs * regs, long err)
437{
717b594a 438 if (!user_mode_vm(regs))
1da177e4
LT
439 die(str, regs, err);
440}
441
3d97ae5b
PP
442static void __kprobes do_trap(int trapnr, int signr, char *str, int vm86,
443 struct pt_regs * regs, long error_code,
444 siginfo_t *info)
1da177e4 445{
4f339ecb 446 struct task_struct *tsk = current;
4f339ecb 447
1da177e4
LT
448 if (regs->eflags & VM_MASK) {
449 if (vm86)
450 goto vm86_trap;
451 goto trap_signal;
452 }
453
717b594a 454 if (!user_mode(regs))
1da177e4
LT
455 goto kernel_trap;
456
457 trap_signal: {
d1895183
AK
458 /*
459 * We want error_code and trap_no set for userspace faults and
460 * kernelspace faults which result in die(), but not
461 * kernelspace faults which are fixed up. die() gives the
462 * process no chance to handle the signal and notice the
463 * kernel fault information, so that won't result in polluting
464 * the information about previously queued, but not yet
465 * delivered, faults. See also do_general_protection below.
466 */
467 tsk->thread.error_code = error_code;
468 tsk->thread.trap_no = trapnr;
469
1da177e4
LT
470 if (info)
471 force_sig_info(signr, info, tsk);
472 else
473 force_sig(signr, tsk);
474 return;
475 }
476
477 kernel_trap: {
d1895183
AK
478 if (!fixup_exception(regs)) {
479 tsk->thread.error_code = error_code;
480 tsk->thread.trap_no = trapnr;
1da177e4 481 die(str, regs, error_code);
d1895183 482 }
1da177e4
LT
483 return;
484 }
485
486 vm86_trap: {
487 int ret = handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, trapnr);
488 if (ret) goto trap_signal;
489 return;
490 }
491}
492
493#define DO_ERROR(trapnr, signr, str, name) \
494fastcall void do_##name(struct pt_regs * regs, long error_code) \
495{ \
496 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
497 == NOTIFY_STOP) \
498 return; \
499 do_trap(trapnr, signr, str, 0, regs, error_code, NULL); \
500}
501
a10d9a71 502#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr, irq) \
1da177e4
LT
503fastcall void do_##name(struct pt_regs * regs, long error_code) \
504{ \
505 siginfo_t info; \
a10d9a71
PZ
506 if (irq) \
507 local_irq_enable(); \
1da177e4
LT
508 info.si_signo = signr; \
509 info.si_errno = 0; \
510 info.si_code = sicode; \
511 info.si_addr = (void __user *)siaddr; \
512 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
513 == NOTIFY_STOP) \
514 return; \
515 do_trap(trapnr, signr, str, 0, regs, error_code, &info); \
516}
517
518#define DO_VM86_ERROR(trapnr, signr, str, name) \
519fastcall void do_##name(struct pt_regs * regs, long error_code) \
520{ \
521 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
522 == NOTIFY_STOP) \
523 return; \
524 do_trap(trapnr, signr, str, 1, regs, error_code, NULL); \
525}
526
527#define DO_VM86_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
528fastcall void do_##name(struct pt_regs * regs, long error_code) \
529{ \
530 siginfo_t info; \
531 info.si_signo = signr; \
532 info.si_errno = 0; \
533 info.si_code = sicode; \
534 info.si_addr = (void __user *)siaddr; \
535 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
536 == NOTIFY_STOP) \
537 return; \
538 do_trap(trapnr, signr, str, 1, regs, error_code, &info); \
539}
540
541DO_VM86_ERROR_INFO( 0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->eip)
542#ifndef CONFIG_KPROBES
543DO_VM86_ERROR( 3, SIGTRAP, "int3", int3)
544#endif
545DO_VM86_ERROR( 4, SIGSEGV, "overflow", overflow)
546DO_VM86_ERROR( 5, SIGSEGV, "bounds", bounds)
a10d9a71 547DO_ERROR_INFO( 6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->eip, 0)
1da177e4
LT
548DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun)
549DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
550DO_ERROR(11, SIGBUS, "segment not present", segment_not_present)
551DO_ERROR(12, SIGBUS, "stack segment", stack_segment)
a10d9a71
PZ
552DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0, 0)
553DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0, 1)
1da177e4 554
3d97ae5b
PP
555fastcall void __kprobes do_general_protection(struct pt_regs * regs,
556 long error_code)
1da177e4
LT
557{
558 int cpu = get_cpu();
559 struct tss_struct *tss = &per_cpu(init_tss, cpu);
560 struct thread_struct *thread = &current->thread;
561
562 /*
563 * Perform the lazy TSS's I/O bitmap copy. If the TSS has an
564 * invalid offset set (the LAZY one) and the faulting thread has
565 * a valid I/O bitmap pointer, we copy the I/O bitmap in the TSS
566 * and we set the offset field correctly. Then we let the CPU to
567 * restart the faulting instruction.
568 */
a75c54f9 569 if (tss->x86_tss.io_bitmap_base == INVALID_IO_BITMAP_OFFSET_LAZY &&
1da177e4
LT
570 thread->io_bitmap_ptr) {
571 memcpy(tss->io_bitmap, thread->io_bitmap_ptr,
572 thread->io_bitmap_max);
573 /*
574 * If the previously set map was extending to higher ports
575 * than the current one, pad extra space with 0xff (no access).
576 */
577 if (thread->io_bitmap_max < tss->io_bitmap_max)
578 memset((char *) tss->io_bitmap +
579 thread->io_bitmap_max, 0xff,
580 tss->io_bitmap_max - thread->io_bitmap_max);
581 tss->io_bitmap_max = thread->io_bitmap_max;
a75c54f9 582 tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
d5cd4aad 583 tss->io_bitmap_owner = thread;
1da177e4
LT
584 put_cpu();
585 return;
586 }
587 put_cpu();
588
589 if (regs->eflags & VM_MASK)
590 goto gp_in_vm86;
591
717b594a 592 if (!user_mode(regs))
1da177e4
LT
593 goto gp_in_kernel;
594
595 current->thread.error_code = error_code;
596 current->thread.trap_no = 13;
abd4f750
MAS
597 if (show_unhandled_signals && unhandled_signal(current, SIGSEGV) &&
598 printk_ratelimit())
599 printk(KERN_INFO
600 "%s[%d] general protection eip:%lx esp:%lx error:%lx\n",
601 current->comm, current->pid,
602 regs->eip, regs->esp, error_code);
603
1da177e4
LT
604 force_sig(SIGSEGV, current);
605 return;
606
607gp_in_vm86:
608 local_irq_enable();
609 handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code);
610 return;
611
612gp_in_kernel:
613 if (!fixup_exception(regs)) {
d1895183
AK
614 current->thread.error_code = error_code;
615 current->thread.trap_no = 13;
1da177e4
LT
616 if (notify_die(DIE_GPF, "general protection fault", regs,
617 error_code, 13, SIGSEGV) == NOTIFY_STOP)
618 return;
619 die("general protection fault", regs, error_code);
620 }
621}
622