def_bool y
depends on X86_MPPARSE || X86_VOYAGER
-if ACPI
config X86_MPPARSE
- def_bool y
- bool "Enable MPS table"
+ bool "Enable MPS table" if ACPI
+ default y
depends on X86_LOCAL_APIC
help
For old smp systems that do not have proper acpi support. Newer systems
(esp with 64bit cpus) with acpi support, MADT and DSDT will override it
-endif
-
-if !ACPI
-config X86_MPPARSE
- def_bool y
- depends on X86_LOCAL_APIC
-endif
choice
prompt "Subarchitecture Type"
regs->dx = (unsigned long) &frame->info;
regs->cx = (unsigned long) &frame->uc;
- /* Make -mregparm=3 work */
- regs->ax = sig;
- regs->dx = (unsigned long) &frame->info;
- regs->cx = (unsigned long) &frame->uc;
-
loadsegment(ds, __USER32_DS);
loadsegment(es, __USER32_DS);
*/
static inline void change_bit(int nr, volatile unsigned long *addr)
{
- asm volatile(LOCK_PREFIX "btc %1,%0" : ADDR : "Ir" (nr));
+ if (IS_IMMEDIATE(nr)) {
+ asm volatile(LOCK_PREFIX "xorb %1,%0"
+ : CONST_MASK_ADDR(nr, addr)
+ : "iq" ((u8)CONST_MASK(nr)));
+ } else {
+ asm volatile(LOCK_PREFIX "btc %1,%0"
+ : BITOP_ADDR(addr)
+ : "Ir" (nr));
+ }
}
/**
#include <asm/types.h>
#include <linux/compiler.h>
-#ifdef __GNUC__
+#define __LITTLE_ENDIAN
-#ifdef __i386__
-
-static inline __attribute_const__ __u32 ___arch__swab32(__u32 x)
+static inline __attribute_const__ __u32 __arch_swab32(__u32 val)
{
-#ifdef CONFIG_X86_BSWAP
- asm("bswap %0" : "=r" (x) : "0" (x));
-#else
+#ifdef __i386__
+# ifdef CONFIG_X86_BSWAP
+ asm("bswap %0" : "=r" (val) : "0" (val));
+# else
asm("xchgb %b0,%h0\n\t" /* swap lower bytes */
"rorl $16,%0\n\t" /* swap words */
"xchgb %b0,%h0" /* swap higher bytes */
- : "=q" (x)
- : "0" (x));
+ : "=q" (val)
+ : "0" (val));
+# endif
+
+#else /* __i386__ */
+ asm("bswapl %0"
+ : "=r" (val)
+ : "0" (val));
#endif
- return x;
+ return val;
}
+#define __arch_swab32 __arch_swab32
-static inline __attribute_const__ __u64 ___arch__swab64(__u64 val)
+static inline __attribute_const__ __u64 __arch_swab64(__u64 val)
{
+#ifdef __i386__
union {
struct {
__u32 a;
__u64 u;
} v;
v.u = val;
-#ifdef CONFIG_X86_BSWAP
+# ifdef CONFIG_X86_BSWAP
asm("bswapl %0 ; bswapl %1 ; xchgl %0,%1"
: "=r" (v.s.a), "=r" (v.s.b)
: "0" (v.s.a), "1" (v.s.b));
-#else
- v.s.a = ___arch__swab32(v.s.a);
- v.s.b = ___arch__swab32(v.s.b);
+# else
+ v.s.a = __arch_swab32(v.s.a);
+ v.s.b = __arch_swab32(v.s.b);
asm("xchgl %0,%1"
: "=r" (v.s.a), "=r" (v.s.b)
: "0" (v.s.a), "1" (v.s.b));
-#endif
+# endif
return v.u;
-}
-
#else /* __i386__ */
-
-static inline __attribute_const__ __u64 ___arch__swab64(__u64 x)
-{
asm("bswapq %0"
- : "=r" (x)
- : "0" (x));
- return x;
-}
-
-static inline __attribute_const__ __u32 ___arch__swab32(__u32 x)
-{
- asm("bswapl %0"
- : "=r" (x)
- : "0" (x));
- return x;
-}
-
+ : "=r" (val)
+ : "0" (val));
+ return val;
#endif
+}
+#define __arch_swab64 __arch_swab64
-/* Do not define swab16. Gcc is smart enough to recognize "C" version and
- convert it into rotation or exhange. */
-
-#define __arch__swab64(x) ___arch__swab64(x)
-#define __arch__swab32(x) ___arch__swab32(x)
-
-#define __BYTEORDER_HAS_U64__
-
-#endif /* __GNUC__ */
-
-#include <linux/byteorder/little_endian.h>
+#include <linux/byteorder.h>
#endif /* _ASM_X86_BYTEORDER_H */
# endif
#endif
-#ifdef CONFIG_IRQBALANCE
-extern int irqbalance_disable(char *str);
-#endif
-
#ifdef CONFIG_HOTPLUG_CPU
#include <linux/cpumask.h>
extern void fixup_irqs(cpumask_t map);
#include <asm/percpu.h>
+#define ARCH_HAS_OWN_IRQ_REGS
+
DECLARE_PER_CPU(struct pt_regs *, irq_regs);
static inline struct pt_regs *get_irq_regs(void)
#define ARCH_GET_FS 0x1003
#define ARCH_GET_GS 0x1004
+#ifdef CONFIG_X86_64
+extern long sys_arch_prctl(int, unsigned long);
+#endif /* CONFIG_X86_64 */
#endif /* _ASM_X86_PRCTL_H */
#ifndef __ASSEMBLY__
+# ifdef __KERNEL__
+extern void do_notify_resume(struct pt_regs *, void *, __u32);
+# endif /* __KERNEL__ */
+
#ifdef __i386__
# ifdef __KERNEL__
struct old_sigaction {
struct sigaction sa;
};
-extern void do_notify_resume(struct pt_regs *, void *, __u32);
-
# else /* __KERNEL__ */
/* Here we must cater to libcs that poke about in kernel headers. */
/* kernel/ioport.c */
asmlinkage long sys_ioperm(unsigned long, unsigned long, int);
+/* kernel/ldt.c */
+asmlinkage int sys_modify_ldt(int, void __user *, unsigned long);
+
+/* kernel/tls.c */
+asmlinkage int sys_set_thread_area(struct user_desc __user *);
+asmlinkage int sys_get_thread_area(struct user_desc __user *);
+
/* X86_32 only */
#ifdef CONFIG_X86_32
/* kernel/process_32.c */
/* kernel/ioport.c */
asmlinkage long sys_iopl(unsigned long);
-/* kernel/ldt.c */
-asmlinkage int sys_modify_ldt(int, void __user *, unsigned long);
-
/* kernel/sys_i386_32.c */
asmlinkage long sys_mmap2(unsigned long, unsigned long, unsigned long,
unsigned long, unsigned long, unsigned long);
struct oldold_utsname;
asmlinkage int sys_olduname(struct oldold_utsname __user *);
-/* kernel/tls.c */
-asmlinkage int sys_set_thread_area(struct user_desc __user *);
-asmlinkage int sys_get_thread_area(struct user_desc __user *);
-
/* kernel/vm86_32.c */
asmlinkage int sys_vm86old(struct pt_regs);
asmlinkage int sys_vm86(struct pt_regs);
# define AT_VECTOR_SIZE_ARCH 1
#endif
-#ifdef CONFIG_X86_32
-
struct task_struct; /* one of the stranger aspects of C forward declarations */
struct task_struct *__switch_to(struct task_struct *prev,
struct task_struct *next);
+#ifdef CONFIG_X86_32
+
/*
* Saving eflags is important. It switches not only IOPL between tasks,
* it also protects other tasks from NT leaking through sysenter etc.
struct thread_info {
struct task_struct *task; /* main task structure */
struct exec_domain *exec_domain; /* execution domain */
- unsigned long flags; /* low level flags */
+ __u32 flags; /* low level flags */
__u32 status; /* thread synchronous flags */
__u32 cpu; /* current CPU */
int preempt_count; /* 0 => preemptable,
dotraplinkage void do_invalid_TSS(struct pt_regs *, long);
dotraplinkage void do_segment_not_present(struct pt_regs *, long);
dotraplinkage void do_stack_segment(struct pt_regs *, long);
+#ifdef CONFIG_X86_64
+dotraplinkage void do_double_fault(struct pt_regs *, long);
+asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *);
+#endif
dotraplinkage void do_general_protection(struct pt_regs *, long);
dotraplinkage void do_page_fault(struct pt_regs *, unsigned long);
dotraplinkage void do_spurious_interrupt_bug(struct pt_regs *, long);
extern int panic_on_unrecovered_nmi;
extern int kstack_depth_to_print;
-#ifdef CONFIG_X86_32
void math_error(void __user *);
-unsigned long patch_espfix_desc(unsigned long, unsigned long);
asmlinkage void math_emulate(long);
+#ifdef CONFIG_X86_32
+unsigned long patch_espfix_desc(unsigned long, unsigned long);
+#else
+asmlinkage void smp_thermal_interrupt(void);
+asmlinkage void mce_threshold_interrupt(void);
#endif
#endif /* _ASM_X86_TRAPS_H */
static __always_inline cycles_t vget_cycles(void)
{
- cycles_t cycles;
-
/*
* We only do VDSOs on TSC capable CPUs, so this shouldnt
* access boot_cpu_data (which is not VDSO-safe):
if (!cpu_has_tsc)
return 0;
#endif
- rdtsc_barrier();
- cycles = (cycles_t)__native_read_tsc();
- rdtsc_barrier();
-
- return cycles;
+ return (cycles_t)__native_read_tsc();
}
extern void tsc_init(void);
struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
-char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss;
+static char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss;
void __cpuinit pda_init(int cpu)
{
}
}
-char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ +
- DEBUG_STKSZ] __page_aligned_bss;
+static char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ +
+ DEBUG_STKSZ] __page_aligned_bss;
extern asmlinkage void ignore_sysret(void);
return show_shared_cpu_map_func(leaf, 1, buf);
}
-static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) {
- switch(this_leaf->eax.split.type) {
- case CACHE_TYPE_DATA:
+static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf)
+{
+ switch (this_leaf->eax.split.type) {
+ case CACHE_TYPE_DATA:
return sprintf(buf, "Data\n");
- break;
- case CACHE_TYPE_INST:
+ case CACHE_TYPE_INST:
return sprintf(buf, "Instruction\n");
- break;
- case CACHE_TYPE_UNIFIED:
+ case CACHE_TYPE_UNIFIED:
return sprintf(buf, "Unified\n");
- break;
- default:
+ default:
return sprintf(buf, "Unknown\n");
- break;
}
}
static int __init disable_mtrr_cleanup_setup(char *str)
{
- if (enable_mtrr_cleanup != -1)
- enable_mtrr_cleanup = 0;
+ enable_mtrr_cleanup = 0;
return 0;
}
early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup);
static int __init enable_mtrr_cleanup_setup(char *str)
{
- if (enable_mtrr_cleanup != -1)
- enable_mtrr_cleanup = 1;
+ enable_mtrr_cleanup = 1;
return 0;
}
early_param("enable_mtrr_cleanup", enable_mtrr_cleanup_setup);
*
* NOTE: This code handles signal-recognition, which happens every time
* after an interrupt and after each system call.
- *
- * Normal syscalls and interrupts don't save a full stack frame, this is
+ *
+ * Normal syscalls and interrupts don't save a full stack frame, this is
* only done for syscall tracing, signals or fork/exec et.al.
- *
- * A note on terminology:
- * - top of stack: Architecture defined interrupt frame from SS to RIP
- * at the top of the kernel process stack.
+ *
+ * A note on terminology:
+ * - top of stack: Architecture defined interrupt frame from SS to RIP
+ * at the top of the kernel process stack.
* - partial stack frame: partially saved registers upto R11.
- * - full stack frame: Like partial stack frame, but all register saved.
+ * - full stack frame: Like partial stack frame, but all register saved.
*
* Some macro usage:
* - CFI macros are used to generate dwarf2 unwind information for better
#ifndef CONFIG_PREEMPT
#define retint_kernel retint_restore_args
-#endif
+#endif
#ifdef CONFIG_PARAVIRT
ENTRY(native_usergs_sysret64)
.endm
/*
- * C code is not supposed to know about undefined top of stack. Every time
- * a C function with an pt_regs argument is called from the SYSCALL based
+ * C code is not supposed to know about undefined top of stack. Every time
+ * a C function with an pt_regs argument is called from the SYSCALL based
* fast path FIXUP_TOP_OF_STACK is needed.
* RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
* manipulation.
- */
-
- /* %rsp:at FRAMEEND */
+ */
+
+ /* %rsp:at FRAMEEND */
.macro FIXUP_TOP_OF_STACK tmp
movq %gs:pda_oldrsp,\tmp
movq \tmp,RSP(%rsp)
.endm
/*
* A newly forked process directly context switches into this.
- */
-/* rdi: prev */
+ */
+/* rdi: prev */
ENTRY(ret_from_fork)
CFI_DEFAULT_STACK
push kernel_eflags(%rip)
GET_THREAD_INFO(%rcx)
testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
jnz rff_trace
-rff_action:
+rff_action:
RESTORE_REST
testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
je int_ret_from_sys_call
rff_trace:
movq %rsp,%rdi
call syscall_trace_leave
- GET_THREAD_INFO(%rcx)
+ GET_THREAD_INFO(%rcx)
jmp rff_action
CFI_ENDPROC
END(ret_from_fork)
* SYSCALL does not save anything on the stack and does not change the
* stack pointer.
*/
-
+
/*
- * Register setup:
+ * Register setup:
* rax system call number
* rdi arg0
- * rcx return address for syscall/sysret, C arg3
+ * rcx return address for syscall/sysret, C arg3
* rsi arg1
- * rdx arg2
+ * rdx arg2
* r10 arg3 (--> moved to rcx for C)
* r8 arg4
* r9 arg5
* r11 eflags for syscall/sysret, temporary for C
- * r12-r15,rbp,rbx saved by C code, not touched.
- *
+ * r12-r15,rbp,rbx saved by C code, not touched.
+ *
* Interrupts are off on entry.
* Only called from user space.
*
* When user can change the frames always force IRET. That is because
* it deals with uncanonical addresses better. SYSRET has trouble
* with them due to bugs in both AMD and Intel CPUs.
- */
+ */
ENTRY(system_call)
CFI_STARTPROC simple
*/
ENTRY(system_call_after_swapgs)
- movq %rsp,%gs:pda_oldrsp
+ movq %rsp,%gs:pda_oldrsp
movq %gs:pda_kernelstack,%rsp
/*
* No need to follow this irqs off/on section - it's straight
*/
ENABLE_INTERRUPTS(CLBR_NONE)
SAVE_ARGS 8,1
- movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
+ movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
movq %rcx,RIP-ARGOFFSET(%rsp)
CFI_REL_OFFSET rip,RIP-ARGOFFSET
GET_THREAD_INFO(%rcx)
movq %rax,RAX-ARGOFFSET(%rsp)
/*
* Syscall return path ending with SYSRET (fast path)
- * Has incomplete stack frame and undefined top of stack.
- */
+ * Has incomplete stack frame and undefined top of stack.
+ */
ret_from_sys_call:
movl $_TIF_ALLWORK_MASK,%edi
/* edi: flagmask */
-sysret_check:
+sysret_check:
LOCKDEP_SYS_EXIT
GET_THREAD_INFO(%rcx)
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
movl TI_flags(%rcx),%edx
andl %edi,%edx
- jnz sysret_careful
+ jnz sysret_careful
CFI_REMEMBER_STATE
/*
* sysretq will re-enable interrupts:
CFI_RESTORE_STATE
/* Handle reschedules */
- /* edx: work, edi: workmask */
+ /* edx: work, edi: workmask */
sysret_careful:
bt $TIF_NEED_RESCHED,%edx
jnc sysret_signal
CFI_ADJUST_CFA_OFFSET -8
jmp sysret_check
- /* Handle a signal */
+ /* Handle a signal */
sysret_signal:
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
jmp int_with_check
-
+
badsys:
movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
jmp ret_from_sys_call
#endif /* CONFIG_AUDITSYSCALL */
/* Do syscall tracing */
-tracesys:
+tracesys:
#ifdef CONFIG_AUDITSYSCALL
testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
jz auditsys
call *sys_call_table(,%rax,8)
movq %rax,RAX-ARGOFFSET(%rsp)
/* Use IRET because user could have changed frame */
-
-/*
+
+/*
* Syscall return path ending with IRET.
* Has correct top of stack, but partial stack frame.
*/
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
SAVE_REST
- /* Check for syscall exit trace */
+ /* Check for syscall exit trace */
testl $_TIF_WORK_SYSCALL_EXIT,%edx
jz int_signal
pushq %rdi
CFI_ADJUST_CFA_OFFSET 8
- leaq 8(%rsp),%rdi # &ptregs -> arg1
+ leaq 8(%rsp),%rdi # &ptregs -> arg1
call syscall_trace_leave
popq %rdi
CFI_ADJUST_CFA_OFFSET -8
andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi
jmp int_restore_rest
-
+
int_signal:
testl $_TIF_DO_NOTIFY_MASK,%edx
jz 1f
jmp int_with_check
CFI_ENDPROC
END(system_call)
-
-/*
+
+/*
* Certain special system calls that need to save a complete full stack frame.
- */
-
+ */
+
.macro PTREGSCALL label,func,arg
.globl \label
\label:
ret
CFI_ENDPROC
END(ptregscall_common)
-
+
ENTRY(stub_execve)
CFI_STARTPROC
popq %r11
jmp int_ret_from_sys_call
CFI_ENDPROC
END(stub_execve)
-
+
/*
* sigreturn is special because it needs to restore all registers on return.
* This cannot be done with SYSRET, so use the IRET return path instead.
- */
+ */
ENTRY(stub_rt_sigreturn)
CFI_STARTPROC
addq $8, %rsp
GET_THREAD_INFO(%rcx)
testl $3,CS-ARGOFFSET(%rsp)
je retint_kernel
-
+
/* Interrupt came from user space */
/*
* Has a correct top of stack, but a partial stack frame
* %rcx: thread info. Interrupts off.
- */
+ */
retint_with_reschedule:
movl $_TIF_WORK_MASK,%edi
retint_check:
pushq %rdi
CFI_ADJUST_CFA_OFFSET 8
call schedule
- popq %rdi
+ popq %rdi
CFI_ADJUST_CFA_OFFSET -8
GET_THREAD_INFO(%rcx)
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
jmp retint_check
-
+
retint_signal:
testl $_TIF_DO_NOTIFY_MASK,%edx
jz retint_swapgs
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
SAVE_REST
- movq $-1,ORIG_RAX(%rsp)
+ movq $-1,ORIG_RAX(%rsp)
xorl %esi,%esi # oldset
movq %rsp,%rdi # &pt_regs
call do_notify_resume
jnc retint_restore_args
call preempt_schedule_irq
jmp exit_intr
-#endif
+#endif
CFI_ENDPROC
END(common_interrupt)
-
+
/*
* APIC interrupts.
- */
+ */
.macro apicinterrupt num,func
INTR_FRAME
pushq $~(\num)
apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
END(threshold_interrupt)
-#ifdef CONFIG_SMP
+#ifdef CONFIG_SMP
ENTRY(reschedule_interrupt)
apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
END(reschedule_interrupt)
.macro INVALIDATE_ENTRY num
ENTRY(invalidate_interrupt\num)
- apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
+ apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
END(invalidate_interrupt\num)
.endm
ENTRY(spurious_interrupt)
apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
END(spurious_interrupt)
-
+
/*
* Exception entry points.
- */
+ */
.macro zeroentry sym
INTR_FRAME
PARAVIRT_ADJUST_EXCEPTION_FRAME
- pushq $0 /* push error code/oldrax */
+ pushq $0 /* push error code/oldrax */
CFI_ADJUST_CFA_OFFSET 8
- pushq %rax /* push real oldrax to the rdi slot */
+ pushq %rax /* push real oldrax to the rdi slot */
CFI_ADJUST_CFA_OFFSET 8
CFI_REL_OFFSET rax,0
leaq \sym(%rip),%rax
jmp error_entry
CFI_ENDPROC
- .endm
+ .endm
.macro errorentry sym
XCPT_FRAME
/*
* Exception entry point. This expects an error code/orig_rax on the stack
- * and the exception handler in %rax.
- */
+ * and the exception handler in %rax.
+ */
KPROBE_ENTRY(error_entry)
_frame RDI
CFI_REL_OFFSET rax,0
/* rdi slot contains rax, oldrax contains error code */
- cld
+ cld
subq $14*8,%rsp
CFI_ADJUST_CFA_OFFSET (14*8)
movq %rsi,13*8(%rsp)
CFI_REL_OFFSET rdx,RDX
movq %rcx,11*8(%rsp)
CFI_REL_OFFSET rcx,RCX
- movq %rsi,10*8(%rsp) /* store rax */
+ movq %rsi,10*8(%rsp) /* store rax */
CFI_REL_OFFSET rax,RAX
movq %r8, 9*8(%rsp)
CFI_REL_OFFSET r8,R8
CFI_REL_OFFSET r10,R10
movq %r11,6*8(%rsp)
CFI_REL_OFFSET r11,R11
- movq %rbx,5*8(%rsp)
+ movq %rbx,5*8(%rsp)
CFI_REL_OFFSET rbx,RBX
- movq %rbp,4*8(%rsp)
+ movq %rbp,4*8(%rsp)
CFI_REL_OFFSET rbp,RBP
- movq %r12,3*8(%rsp)
+ movq %r12,3*8(%rsp)
CFI_REL_OFFSET r12,R12
- movq %r13,2*8(%rsp)
+ movq %r13,2*8(%rsp)
CFI_REL_OFFSET r13,R13
- movq %r14,1*8(%rsp)
+ movq %r14,1*8(%rsp)
CFI_REL_OFFSET r14,R14
- movq %r15,(%rsp)
+ movq %r15,(%rsp)
CFI_REL_OFFSET r15,R15
- xorl %ebx,%ebx
+ xorl %ebx,%ebx
testl $3,CS(%rsp)
je error_kernelspace
-error_swapgs:
+error_swapgs:
SWAPGS
error_sti:
TRACE_IRQS_OFF
- movq %rdi,RDI(%rsp)
+ movq %rdi,RDI(%rsp)
CFI_REL_OFFSET rdi,RDI
movq %rsp,%rdi
- movq ORIG_RAX(%rsp),%rsi /* get error code */
+ movq ORIG_RAX(%rsp),%rsi /* get error code */
movq $-1,ORIG_RAX(%rsp)
call *%rax
/* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
RESTORE_REST
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
- GET_THREAD_INFO(%rcx)
+ GET_THREAD_INFO(%rcx)
testl %eax,%eax
jne retint_kernel
LOCKDEP_SYS_EXIT_IRQ
/* There are two places in the kernel that can potentially fault with
usergs. Handle them here. The exception handlers after
iret run with kernel gs again, so don't set the user space flag.
- B stepping K8s sometimes report an truncated RIP for IRET
+ B stepping K8s sometimes report an truncated RIP for IRET
exceptions returning to compat mode. Check for these here too. */
leaq irq_return(%rip),%rcx
cmpq %rcx,RIP(%rsp)
je error_swapgs
jmp error_sti
KPROBE_END(error_entry)
-
+
/* Reload gs selector with exception handling */
- /* edi: new selector */
+ /* edi: new selector */
ENTRY(native_load_gs_index)
CFI_STARTPROC
pushf
CFI_ADJUST_CFA_OFFSET 8
DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI))
SWAPGS
-gs_change:
- movl %edi,%gs
+gs_change:
+ movl %edi,%gs
2: mfence /* workaround */
SWAPGS
popf
ret
CFI_ENDPROC
ENDPROC(native_load_gs_index)
-
+
.section __ex_table,"a"
.align 8
.quad gs_change,bad_gs
.previous
.section .fixup,"ax"
/* running with kernelgs */
-bad_gs:
+bad_gs:
SWAPGS /* switch back to user gs */
xorl %eax,%eax
movl %eax,%gs
jmp 2b
- .previous
-
+ .previous
+
/*
* Create a kernel thread.
*
xorl %r8d,%r8d
xorl %r9d,%r9d
-
+
# clone now
call do_fork
movq %rax,RAX(%rsp)
* so internally to the x86_64 port you can rely on kernel_thread()
* not to reschedule the child before returning, this avoids the need
* of hacks for example to fork off the per-CPU idle tasks.
- * [Hopefully no generic code relies on the reschedule -AK]
+ * [Hopefully no generic code relies on the reschedule -AK]
*/
RESTORE_ALL
UNFAKE_STACK_FRAME
ret
CFI_ENDPROC
ENDPROC(kernel_thread)
-
+
child_rip:
pushq $0 # fake return address
CFI_STARTPROC
ENTRY(kernel_execve)
CFI_STARTPROC
FAKE_STACK_FRAME $0
- SAVE_ALL
+ SAVE_ALL
movq %rsp,%rcx
call sys_execve
- movq %rax, RAX(%rsp)
+ movq %rax, RAX(%rsp)
RESTORE_REST
testq %rax,%rax
je int_ret_from_sys_call
END(coprocessor_error)
ENTRY(simd_coprocessor_error)
- zeroentry do_simd_coprocessor_error
+ zeroentry do_simd_coprocessor_error
END(simd_coprocessor_error)
ENTRY(device_not_available)
INTR_FRAME
PARAVIRT_ADJUST_EXCEPTION_FRAME
pushq $0
- CFI_ADJUST_CFA_OFFSET 8
+ CFI_ADJUST_CFA_OFFSET 8
paranoidentry do_debug, DEBUG_STACK
paranoidexit
KPROBE_END(debug)
- /* runs on exception stack */
+ /* runs on exception stack */
KPROBE_ENTRY(nmi)
INTR_FRAME
PARAVIRT_ADJUST_EXCEPTION_FRAME
END(bounds)
ENTRY(invalid_op)
- zeroentry do_invalid_op
+ zeroentry do_invalid_op
END(invalid_op)
ENTRY(coprocessor_segment_overrun)
INTR_FRAME
PARAVIRT_ADJUST_EXCEPTION_FRAME
pushq $0
- CFI_ADJUST_CFA_OFFSET 8
+ CFI_ADJUST_CFA_OFFSET 8
paranoidentry do_machine_check
jmp paranoid_exit1
CFI_ENDPROC
static __devinit void via_no_dac(struct pci_dev *dev)
{
if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) {
- printk(KERN_INFO "PCI: VIA PCI bridge detected."
- "Disabling DAC.\n");
+ printk(KERN_INFO
+ "PCI: VIA PCI bridge detected. Disabling DAC.\n");
forbid_dac = 1;
}
}
#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/mm.h>
+#include <asm/idle.h>
#include <linux/smp.h>
#include <linux/slab.h>
#include <linux/sched.h>
* @size: Size of the crashkernel memory to reserve.
* Returns the base address on success, and -1ULL on failure.
*/
+static
unsigned long long __init find_and_reserve_crashkernel(unsigned long long size)
{
const unsigned long long alignment = 16<<20; /* 16M */
#endif
if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) {
- printk(KERN_WARNING "weird, boot CPU (#%d) not listed"
- "by the BIOS.\n", hard_smp_processor_id());
+ printk(KERN_WARNING
+ "weird, boot CPU (#%d) not listed by the BIOS.\n",
+ hard_smp_processor_id());
+
physid_set(hard_smp_processor_id(), phys_cpu_present_map);
}
}
EXPORT_SYMBOL(profile_pc);
-irqreturn_t timer_interrupt(int irq, void *dev_id)
+static irqreturn_t timer_interrupt(int irq, void *dev_id)
{
add_pda(irq0_irqs, 1);
{
}
-#ifdef CONFIG_DEBUG_PAGE_TYPE
-
-#ifdef CONFIG_X86_PAE
-#define MAX_BOOT_PTS (2048+4+1)
-#else
-#define MAX_BOOT_PTS (1024+1)
-#endif
-
-/*
- * During boot, mem_map is not yet available in paging_init, so stash
- * all the boot page allocations here.
- */
-static struct {
- u32 pfn;
- int type;
-} boot_page_allocations[MAX_BOOT_PTS];
-static int num_boot_page_allocations;
-static int boot_allocations_applied;
-
-void vmi_apply_boot_page_allocations(void)
-{
- int i;
- BUG_ON(!mem_map);
- for (i = 0; i < num_boot_page_allocations; i++) {
- struct page *page = pfn_to_page(boot_page_allocations[i].pfn);
- page->type = boot_page_allocations[i].type;
- page->type = boot_page_allocations[i].type &
- ~(VMI_PAGE_ZEROED | VMI_PAGE_CLONE);
- }
- boot_allocations_applied = 1;
-}
-
-static void record_page_type(u32 pfn, int type)
-{
- BUG_ON(num_boot_page_allocations >= MAX_BOOT_PTS);
- boot_page_allocations[num_boot_page_allocations].pfn = pfn;
- boot_page_allocations[num_boot_page_allocations].type = type;
- num_boot_page_allocations++;
-}
-
-static void check_zeroed_page(u32 pfn, int type, struct page *page)
-{
- u32 *ptr;
- int i;
- int limit = PAGE_SIZE / sizeof(int);
-
- if (page_address(page))
- ptr = (u32 *)page_address(page);
- else
- ptr = (u32 *)__va(pfn << PAGE_SHIFT);
- /*
- * When cloning the root in non-PAE mode, only the userspace
- * pdes need to be zeroed.
- */
- if (type & VMI_PAGE_CLONE)
- limit = KERNEL_PGD_BOUNDARY;
- for (i = 0; i < limit; i++)
- BUG_ON(ptr[i]);
-}
-
-/*
- * We stash the page type into struct page so we can verify the page
- * types are used properly.
- */
-static void vmi_set_page_type(u32 pfn, int type)
-{
- /* PAE can have multiple roots per page - don't track */
- if (PTRS_PER_PMD > 1 && (type & VMI_PAGE_PDP))
- return;
-
- if (boot_allocations_applied) {
- struct page *page = pfn_to_page(pfn);
- if (type != VMI_PAGE_NORMAL)
- BUG_ON(page->type);
- else
- BUG_ON(page->type == VMI_PAGE_NORMAL);
- page->type = type & ~(VMI_PAGE_ZEROED | VMI_PAGE_CLONE);
- if (type & VMI_PAGE_ZEROED)
- check_zeroed_page(pfn, type, page);
- } else {
- record_page_type(pfn, type);
- }
-}
-
-static void vmi_check_page_type(u32 pfn, int type)
-{
- /* PAE can have multiple roots per page - skip checks */
- if (PTRS_PER_PMD > 1 && (type & VMI_PAGE_PDP))
- return;
-
- type &= ~(VMI_PAGE_ZEROED | VMI_PAGE_CLONE);
- if (boot_allocations_applied) {
- struct page *page = pfn_to_page(pfn);
- BUG_ON((page->type ^ type) & VMI_PAGE_PAE);
- BUG_ON(type == VMI_PAGE_NORMAL && page->type);
- BUG_ON((type & page->type) == 0);
- }
-}
-#else
-#define vmi_set_page_type(p,t) do { } while (0)
-#define vmi_check_page_type(p,t) do { } while (0)
-#endif
-
#ifdef CONFIG_HIGHPTE
static void *vmi_kmap_atomic_pte(struct page *page, enum km_type type)
{
static void vmi_allocate_pte(struct mm_struct *mm, unsigned long pfn)
{
- vmi_set_page_type(pfn, VMI_PAGE_L1);
vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0);
}
* It is called only for swapper_pg_dir, which already has
* data on it.
*/
- vmi_set_page_type(pfn, VMI_PAGE_L2);
vmi_ops.allocate_page(pfn, VMI_PAGE_L2, 0, 0, 0);
}
static void vmi_allocate_pmd_clone(unsigned long pfn, unsigned long clonepfn, unsigned long start, unsigned long count)
{
- vmi_set_page_type(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE);
- vmi_check_page_type(clonepfn, VMI_PAGE_L2);
vmi_ops.allocate_page(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE, clonepfn, start, count);
}
static void vmi_release_pte(unsigned long pfn)
{
vmi_ops.release_page(pfn, VMI_PAGE_L1);
- vmi_set_page_type(pfn, VMI_PAGE_NORMAL);
}
static void vmi_release_pmd(unsigned long pfn)
{
vmi_ops.release_page(pfn, VMI_PAGE_L2);
- vmi_set_page_type(pfn, VMI_PAGE_NORMAL);
}
/*
static void vmi_update_pte(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
{
- vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE);
vmi_ops.update_pte(ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0));
}
static void vmi_update_pte_defer(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
{
- vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE);
vmi_ops.update_pte(ptep, vmi_flags_addr_defer(mm, addr, VMI_PAGE_PT, 0));
}
static void vmi_set_pte(pte_t *ptep, pte_t pte)
{
/* XXX because of set_pmd_pte, this can be called on PT or PD layers */
- vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE | VMI_PAGE_PD);
vmi_ops.set_pte(pte, ptep, VMI_PAGE_PT);
}
static void vmi_set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte)
{
- vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE);
vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0));
}
{
#ifdef CONFIG_X86_PAE
const pte_t pte = { .pte = pmdval.pmd };
- vmi_check_page_type(__pa(pmdp) >> PAGE_SHIFT, VMI_PAGE_PMD);
#else
const pte_t pte = { pmdval.pud.pgd.pgd };
- vmi_check_page_type(__pa(pmdp) >> PAGE_SHIFT, VMI_PAGE_PGD);
#endif
vmi_ops.set_pte(pte, (pte_t *)pmdp, VMI_PAGE_PD);
}
static void vmi_set_pte_present(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte)
{
- vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE);
vmi_ops.set_pte(pte, ptep, vmi_flags_addr_defer(mm, addr, VMI_PAGE_PT, 1));
}
{
/* Um, eww */
const pte_t pte = { .pte = pudval.pgd.pgd };
- vmi_check_page_type(__pa(pudp) >> PAGE_SHIFT, VMI_PAGE_PGD);
vmi_ops.set_pte(pte, (pte_t *)pudp, VMI_PAGE_PDP);
}
static void vmi_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
{
const pte_t pte = { .pte = 0 };
- vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE);
vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0));
}
static void vmi_pmd_clear(pmd_t *pmd)
{
const pte_t pte = { .pte = 0 };
- vmi_check_page_type(__pa(pmd) >> PAGE_SHIFT, VMI_PAGE_PMD);
vmi_ops.set_pte(pte, (pte_t *)pmd, VMI_PAGE_PD);
}
#endif
gettimeofday(tv,NULL);
return;
}
+
+ /*
+ * Surround the RDTSC by barriers, to make sure it's not
+ * speculated to outside the seqlock critical section and
+ * does not cause time warps:
+ */
+ rdtsc_barrier();
now = vread();
+ rdtsc_barrier();
+
base = __vsyscall_gtod_data.clock.cycle_last;
mask = __vsyscall_gtod_data.clock.mask;
mult = __vsyscall_gtod_data.clock.mult;
static int __initdata after_init_bootmem;
-static __init void *alloc_low_page(unsigned long *phys)
+static __init void *alloc_low_page(void)
{
unsigned long pfn = table_end++;
void *adr;
adr = __va(pfn * PAGE_SIZE);
memset(adr, 0, PAGE_SIZE);
- *phys = pfn * PAGE_SIZE;
return adr;
}
pmd_t *pmd_table;
#ifdef CONFIG_X86_PAE
- unsigned long phys;
if (!(pgd_val(*pgd) & _PAGE_PRESENT)) {
if (after_init_bootmem)
pmd_table = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE);
else
- pmd_table = (pmd_t *)alloc_low_page(&phys);
+ pmd_table = (pmd_t *)alloc_low_page();
paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
pud = pud_offset(pgd, 0);
BUG_ON(pmd_table != pmd_offset(pud, 0));
+
+ return pmd_table;
}
#endif
pud = pud_offset(pgd, 0);
if (!page_table)
page_table =
(pte_t *)alloc_bootmem_low_pages(PAGE_SIZE);
- } else {
- unsigned long phys;
- page_table = (pte_t *)alloc_low_page(&phys);
- }
+ } else
+ page_table = (pte_t *)alloc_low_page();
paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT);
set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));