x86 ptrace: unify syscall tracing
[linux-2.6-block.git] / arch / x86 / kernel / entry_64.S
CommitLineData
1da177e4
LT
1/*
2 * linux/arch/x86_64/entry.S
3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
6 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
1da177e4
LT
7 */
8
9/*
10 * entry.S contains the system-call and fault low-level handling routines.
11 *
12 * NOTE: This code handles signal-recognition, which happens every time
13 * after an interrupt and after each system call.
14 *
15 * Normal syscalls and interrupts don't save a full stack frame, this is
16 * only done for syscall tracing, signals or fork/exec et.al.
17 *
18 * A note on terminology:
19 * - top of stack: Architecture defined interrupt frame from SS to RIP
20 * at the top of the kernel process stack.
21 * - partial stack frame: partially saved registers upto R11.
22 * - full stack frame: Like partial stack frame, but all register saved.
2e91a17b
AK
23 *
24 * Some macro usage:
25 * - CFI macros are used to generate dwarf2 unwind information for better
26 * backtraces. They don't change any code.
27 * - SAVE_ALL/RESTORE_ALL - Save/restore all registers
28 * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify.
29 * There are unfortunately lots of special cases where some registers
30 * not touched. The macro is a big mess that should be cleaned up.
31 * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS.
32 * Gives a full stack frame.
33 * - ENTRY/END Define functions in the symbol table.
34 * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack
35 * frame that is otherwise undefined after a SYSCALL
36 * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging.
37 * - errorentry/paranoidentry/zeroentry - Define exception entry points.
1da177e4
LT
38 */
39
1da177e4
LT
40#include <linux/linkage.h>
41#include <asm/segment.h>
1da177e4
LT
42#include <asm/cache.h>
43#include <asm/errno.h>
44#include <asm/dwarf2.h>
45#include <asm/calling.h>
e2d5df93 46#include <asm/asm-offsets.h>
1da177e4
LT
47#include <asm/msr.h>
48#include <asm/unistd.h>
49#include <asm/thread_info.h>
50#include <asm/hw_irq.h>
5f8efbb9 51#include <asm/page.h>
2601e64d 52#include <asm/irqflags.h>
72fe4858 53#include <asm/paravirt.h>
395a59d0 54#include <asm/ftrace.h>
1da177e4
LT
55
56 .code64
57
16444a8a 58#ifdef CONFIG_FTRACE
d61f82d0
SR
59#ifdef CONFIG_DYNAMIC_FTRACE
60ENTRY(mcount)
61
62 subq $0x38, %rsp
63 movq %rax, (%rsp)
64 movq %rcx, 8(%rsp)
65 movq %rdx, 16(%rsp)
66 movq %rsi, 24(%rsp)
67 movq %rdi, 32(%rsp)
68 movq %r8, 40(%rsp)
69 movq %r9, 48(%rsp)
70
71 movq 0x38(%rsp), %rdi
395a59d0 72 subq $MCOUNT_INSN_SIZE, %rdi
d61f82d0
SR
73
74.globl mcount_call
75mcount_call:
76 call ftrace_stub
77
78 movq 48(%rsp), %r9
79 movq 40(%rsp), %r8
80 movq 32(%rsp), %rdi
81 movq 24(%rsp), %rsi
82 movq 16(%rsp), %rdx
83 movq 8(%rsp), %rcx
84 movq (%rsp), %rax
85 addq $0x38, %rsp
86
87 retq
88END(mcount)
89
90ENTRY(ftrace_caller)
91
92 /* taken from glibc */
93 subq $0x38, %rsp
94 movq %rax, (%rsp)
95 movq %rcx, 8(%rsp)
96 movq %rdx, 16(%rsp)
97 movq %rsi, 24(%rsp)
98 movq %rdi, 32(%rsp)
99 movq %r8, 40(%rsp)
100 movq %r9, 48(%rsp)
101
102 movq 0x38(%rsp), %rdi
103 movq 8(%rbp), %rsi
395a59d0 104 subq $MCOUNT_INSN_SIZE, %rdi
d61f82d0
SR
105
106.globl ftrace_call
107ftrace_call:
108 call ftrace_stub
109
110 movq 48(%rsp), %r9
111 movq 40(%rsp), %r8
112 movq 32(%rsp), %rdi
113 movq 24(%rsp), %rsi
114 movq 16(%rsp), %rdx
115 movq 8(%rsp), %rcx
116 movq (%rsp), %rax
117 addq $0x38, %rsp
118
119.globl ftrace_stub
120ftrace_stub:
121 retq
122END(ftrace_caller)
123
124#else /* ! CONFIG_DYNAMIC_FTRACE */
16444a8a
ACM
125ENTRY(mcount)
126 cmpq $ftrace_stub, ftrace_trace_function
127 jnz trace
128.globl ftrace_stub
129ftrace_stub:
130 retq
131
132trace:
133 /* taken from glibc */
134 subq $0x38, %rsp
135 movq %rax, (%rsp)
136 movq %rcx, 8(%rsp)
137 movq %rdx, 16(%rsp)
138 movq %rsi, 24(%rsp)
139 movq %rdi, 32(%rsp)
140 movq %r8, 40(%rsp)
141 movq %r9, 48(%rsp)
142
143 movq 0x38(%rsp), %rdi
144 movq 8(%rbp), %rsi
395a59d0 145 subq $MCOUNT_INSN_SIZE, %rdi
16444a8a
ACM
146
147 call *ftrace_trace_function
148
149 movq 48(%rsp), %r9
150 movq 40(%rsp), %r8
151 movq 32(%rsp), %rdi
152 movq 24(%rsp), %rsi
153 movq 16(%rsp), %rdx
154 movq 8(%rsp), %rcx
155 movq (%rsp), %rax
156 addq $0x38, %rsp
157
158 jmp ftrace_stub
159END(mcount)
d61f82d0
SR
160#endif /* CONFIG_DYNAMIC_FTRACE */
161#endif /* CONFIG_FTRACE */
16444a8a 162
dc37db4d 163#ifndef CONFIG_PREEMPT
1da177e4
LT
164#define retint_kernel retint_restore_args
165#endif
2601e64d 166
72fe4858 167#ifdef CONFIG_PARAVIRT
2be29982 168ENTRY(native_usergs_sysret64)
72fe4858
GOC
169 swapgs
170 sysretq
171#endif /* CONFIG_PARAVIRT */
172
2601e64d
IM
173
174.macro TRACE_IRQS_IRETQ offset=ARGOFFSET
175#ifdef CONFIG_TRACE_IRQFLAGS
176 bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */
177 jnc 1f
178 TRACE_IRQS_ON
1791:
180#endif
181.endm
182
1da177e4
LT
183/*
184 * C code is not supposed to know about undefined top of stack. Every time
185 * a C function with an pt_regs argument is called from the SYSCALL based
186 * fast path FIXUP_TOP_OF_STACK is needed.
187 * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
188 * manipulation.
189 */
190
191 /* %rsp:at FRAMEEND */
192 .macro FIXUP_TOP_OF_STACK tmp
193 movq %gs:pda_oldrsp,\tmp
194 movq \tmp,RSP(%rsp)
195 movq $__USER_DS,SS(%rsp)
196 movq $__USER_CS,CS(%rsp)
197 movq $-1,RCX(%rsp)
198 movq R11(%rsp),\tmp /* get eflags */
199 movq \tmp,EFLAGS(%rsp)
200 .endm
201
202 .macro RESTORE_TOP_OF_STACK tmp,offset=0
203 movq RSP-\offset(%rsp),\tmp
204 movq \tmp,%gs:pda_oldrsp
205 movq EFLAGS-\offset(%rsp),\tmp
206 movq \tmp,R11-\offset(%rsp)
207 .endm
208
209 .macro FAKE_STACK_FRAME child_rip
210 /* push in order ss, rsp, eflags, cs, rip */
3829ee6b 211 xorl %eax, %eax
e04e0a63 212 pushq $__KERNEL_DS /* ss */
1da177e4 213 CFI_ADJUST_CFA_OFFSET 8
7effaa88 214 /*CFI_REL_OFFSET ss,0*/
1da177e4
LT
215 pushq %rax /* rsp */
216 CFI_ADJUST_CFA_OFFSET 8
7effaa88 217 CFI_REL_OFFSET rsp,0
1da177e4
LT
218 pushq $(1<<9) /* eflags - interrupts on */
219 CFI_ADJUST_CFA_OFFSET 8
7effaa88 220 /*CFI_REL_OFFSET rflags,0*/
1da177e4
LT
221 pushq $__KERNEL_CS /* cs */
222 CFI_ADJUST_CFA_OFFSET 8
7effaa88 223 /*CFI_REL_OFFSET cs,0*/
1da177e4
LT
224 pushq \child_rip /* rip */
225 CFI_ADJUST_CFA_OFFSET 8
7effaa88 226 CFI_REL_OFFSET rip,0
1da177e4
LT
227 pushq %rax /* orig rax */
228 CFI_ADJUST_CFA_OFFSET 8
229 .endm
230
231 .macro UNFAKE_STACK_FRAME
232 addq $8*6, %rsp
233 CFI_ADJUST_CFA_OFFSET -(6*8)
234 .endm
235
7effaa88
JB
236 .macro CFI_DEFAULT_STACK start=1
237 .if \start
238 CFI_STARTPROC simple
adf14236 239 CFI_SIGNAL_FRAME
7effaa88
JB
240 CFI_DEF_CFA rsp,SS+8
241 .else
242 CFI_DEF_CFA_OFFSET SS+8
243 .endif
244 CFI_REL_OFFSET r15,R15
245 CFI_REL_OFFSET r14,R14
246 CFI_REL_OFFSET r13,R13
247 CFI_REL_OFFSET r12,R12
248 CFI_REL_OFFSET rbp,RBP
249 CFI_REL_OFFSET rbx,RBX
250 CFI_REL_OFFSET r11,R11
251 CFI_REL_OFFSET r10,R10
252 CFI_REL_OFFSET r9,R9
253 CFI_REL_OFFSET r8,R8
254 CFI_REL_OFFSET rax,RAX
255 CFI_REL_OFFSET rcx,RCX
256 CFI_REL_OFFSET rdx,RDX
257 CFI_REL_OFFSET rsi,RSI
258 CFI_REL_OFFSET rdi,RDI
259 CFI_REL_OFFSET rip,RIP
260 /*CFI_REL_OFFSET cs,CS*/
261 /*CFI_REL_OFFSET rflags,EFLAGS*/
262 CFI_REL_OFFSET rsp,RSP
263 /*CFI_REL_OFFSET ss,SS*/
1da177e4
LT
264 .endm
265/*
266 * A newly forked process directly context switches into this.
267 */
268/* rdi: prev */
269ENTRY(ret_from_fork)
1da177e4 270 CFI_DEFAULT_STACK
658fdbef
AK
271 push kernel_eflags(%rip)
272 CFI_ADJUST_CFA_OFFSET 4
273 popf # reset kernel eflags
274 CFI_ADJUST_CFA_OFFSET -4
1da177e4
LT
275 call schedule_tail
276 GET_THREAD_INFO(%rcx)
26ccb8a7 277 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
1da177e4
LT
278 jnz rff_trace
279rff_action:
280 RESTORE_REST
281 testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
282 je int_ret_from_sys_call
26ccb8a7 283 testl $_TIF_IA32,TI_flags(%rcx)
1da177e4
LT
284 jnz int_ret_from_sys_call
285 RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
286 jmp ret_from_sys_call
287rff_trace:
288 movq %rsp,%rdi
289 call syscall_trace_leave
290 GET_THREAD_INFO(%rcx)
291 jmp rff_action
292 CFI_ENDPROC
4b787e0b 293END(ret_from_fork)
1da177e4
LT
294
295/*
296 * System call entry. Upto 6 arguments in registers are supported.
297 *
298 * SYSCALL does not save anything on the stack and does not change the
299 * stack pointer.
300 */
301
302/*
303 * Register setup:
304 * rax system call number
305 * rdi arg0
306 * rcx return address for syscall/sysret, C arg3
307 * rsi arg1
308 * rdx arg2
309 * r10 arg3 (--> moved to rcx for C)
310 * r8 arg4
311 * r9 arg5
312 * r11 eflags for syscall/sysret, temporary for C
313 * r12-r15,rbp,rbx saved by C code, not touched.
314 *
315 * Interrupts are off on entry.
316 * Only called from user space.
317 *
318 * XXX if we had a free scratch register we could save the RSP into the stack frame
319 * and report it properly in ps. Unfortunately we haven't.
7bf36bbc
AK
320 *
321 * When user can change the frames always force IRET. That is because
322 * it deals with uncanonical addresses better. SYSRET has trouble
323 * with them due to bugs in both AMD and Intel CPUs.
1da177e4
LT
324 */
325
326ENTRY(system_call)
7effaa88 327 CFI_STARTPROC simple
adf14236 328 CFI_SIGNAL_FRAME
dffead4e 329 CFI_DEF_CFA rsp,PDA_STACKOFFSET
7effaa88
JB
330 CFI_REGISTER rip,rcx
331 /*CFI_REGISTER rflags,r11*/
72fe4858
GOC
332 SWAPGS_UNSAFE_STACK
333 /*
334 * A hypervisor implementation might want to use a label
335 * after the swapgs, so that it can do the swapgs
336 * for the guest and jump here on syscall.
337 */
338ENTRY(system_call_after_swapgs)
339
1da177e4
LT
340 movq %rsp,%gs:pda_oldrsp
341 movq %gs:pda_kernelstack,%rsp
2601e64d
IM
342 /*
343 * No need to follow this irqs off/on section - it's straight
344 * and short:
345 */
72fe4858 346 ENABLE_INTERRUPTS(CLBR_NONE)
1da177e4
LT
347 SAVE_ARGS 8,1
348 movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
7effaa88
JB
349 movq %rcx,RIP-ARGOFFSET(%rsp)
350 CFI_REL_OFFSET rip,RIP-ARGOFFSET
1da177e4 351 GET_THREAD_INFO(%rcx)
d4d67150 352 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx)
1da177e4
LT
353 jnz tracesys
354 cmpq $__NR_syscall_max,%rax
355 ja badsys
356 movq %r10,%rcx
357 call *sys_call_table(,%rax,8) # XXX: rip relative
358 movq %rax,RAX-ARGOFFSET(%rsp)
359/*
360 * Syscall return path ending with SYSRET (fast path)
361 * Has incomplete stack frame and undefined top of stack.
362 */
1da177e4 363ret_from_sys_call:
11b854b2 364 movl $_TIF_ALLWORK_MASK,%edi
1da177e4
LT
365 /* edi: flagmask */
366sysret_check:
10cd706d 367 LOCKDEP_SYS_EXIT
1da177e4 368 GET_THREAD_INFO(%rcx)
72fe4858 369 DISABLE_INTERRUPTS(CLBR_NONE)
2601e64d 370 TRACE_IRQS_OFF
26ccb8a7 371 movl TI_flags(%rcx),%edx
1da177e4
LT
372 andl %edi,%edx
373 jnz sysret_careful
bcddc015 374 CFI_REMEMBER_STATE
2601e64d
IM
375 /*
376 * sysretq will re-enable interrupts:
377 */
378 TRACE_IRQS_ON
1da177e4 379 movq RIP-ARGOFFSET(%rsp),%rcx
7effaa88 380 CFI_REGISTER rip,rcx
1da177e4 381 RESTORE_ARGS 0,-ARG_SKIP,1
7effaa88 382 /*CFI_REGISTER rflags,r11*/
c7245da6 383 movq %gs:pda_oldrsp, %rsp
2be29982 384 USERGS_SYSRET64
1da177e4 385
bcddc015 386 CFI_RESTORE_STATE
1da177e4
LT
387 /* Handle reschedules */
388 /* edx: work, edi: workmask */
389sysret_careful:
390 bt $TIF_NEED_RESCHED,%edx
391 jnc sysret_signal
2601e64d 392 TRACE_IRQS_ON
72fe4858 393 ENABLE_INTERRUPTS(CLBR_NONE)
1da177e4 394 pushq %rdi
7effaa88 395 CFI_ADJUST_CFA_OFFSET 8
1da177e4
LT
396 call schedule
397 popq %rdi
7effaa88 398 CFI_ADJUST_CFA_OFFSET -8
1da177e4
LT
399 jmp sysret_check
400
401 /* Handle a signal */
402sysret_signal:
2601e64d 403 TRACE_IRQS_ON
72fe4858 404 ENABLE_INTERRUPTS(CLBR_NONE)
8f4d37ec 405 testl $_TIF_DO_NOTIFY_MASK,%edx
10ffdbb8
AK
406 jz 1f
407
408 /* Really a signal */
409 /* edx: work flags (arg3) */
1da177e4
LT
410 leaq do_notify_resume(%rip),%rax
411 leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
412 xorl %esi,%esi # oldset -> arg2
413 call ptregscall_common
eca91e78 4141: movl $_TIF_WORK_MASK,%edi
7bf36bbc
AK
415 /* Use IRET because user could have changed frame. This
416 works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
72fe4858 417 DISABLE_INTERRUPTS(CLBR_NONE)
2601e64d 418 TRACE_IRQS_OFF
7bf36bbc 419 jmp int_with_check
1da177e4 420
7effaa88
JB
421badsys:
422 movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
423 jmp ret_from_sys_call
424
1da177e4
LT
425 /* Do syscall tracing */
426tracesys:
427 SAVE_REST
a31f8dd7 428 movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
1da177e4
LT
429 FIXUP_TOP_OF_STACK %rdi
430 movq %rsp,%rdi
431 call syscall_trace_enter
d4d67150
RM
432 /*
433 * Reload arg registers from stack in case ptrace changed them.
434 * We don't reload %rax because syscall_trace_enter() returned
435 * the value it wants us to use in the table lookup.
436 */
437 LOAD_ARGS ARGOFFSET, 1
1da177e4
LT
438 RESTORE_REST
439 cmpq $__NR_syscall_max,%rax
a31f8dd7 440 ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */
1da177e4
LT
441 movq %r10,%rcx /* fixup for C */
442 call *sys_call_table(,%rax,8)
a31f8dd7 443 movq %rax,RAX-ARGOFFSET(%rsp)
7bf36bbc 444 /* Use IRET because user could have changed frame */
1da177e4 445
1da177e4
LT
446/*
447 * Syscall return path ending with IRET.
448 * Has correct top of stack, but partial stack frame.
bcddc015
JB
449 */
450 .globl int_ret_from_sys_call
451int_ret_from_sys_call:
72fe4858 452 DISABLE_INTERRUPTS(CLBR_NONE)
2601e64d 453 TRACE_IRQS_OFF
1da177e4
LT
454 testl $3,CS-ARGOFFSET(%rsp)
455 je retint_restore_args
456 movl $_TIF_ALLWORK_MASK,%edi
457 /* edi: mask to check */
458int_with_check:
10cd706d 459 LOCKDEP_SYS_EXIT_IRQ
1da177e4 460 GET_THREAD_INFO(%rcx)
26ccb8a7 461 movl TI_flags(%rcx),%edx
1da177e4
LT
462 andl %edi,%edx
463 jnz int_careful
26ccb8a7 464 andl $~TS_COMPAT,TI_status(%rcx)
1da177e4
LT
465 jmp retint_swapgs
466
467 /* Either reschedule or signal or syscall exit tracking needed. */
468 /* First do a reschedule test. */
469 /* edx: work, edi: workmask */
470int_careful:
471 bt $TIF_NEED_RESCHED,%edx
472 jnc int_very_careful
2601e64d 473 TRACE_IRQS_ON
72fe4858 474 ENABLE_INTERRUPTS(CLBR_NONE)
1da177e4 475 pushq %rdi
7effaa88 476 CFI_ADJUST_CFA_OFFSET 8
1da177e4
LT
477 call schedule
478 popq %rdi
7effaa88 479 CFI_ADJUST_CFA_OFFSET -8
72fe4858 480 DISABLE_INTERRUPTS(CLBR_NONE)
2601e64d 481 TRACE_IRQS_OFF
1da177e4
LT
482 jmp int_with_check
483
484 /* handle signals and tracing -- both require a full stack frame */
485int_very_careful:
2601e64d 486 TRACE_IRQS_ON
72fe4858 487 ENABLE_INTERRUPTS(CLBR_NONE)
1da177e4
LT
488 SAVE_REST
489 /* Check for syscall exit trace */
d4d67150 490 testl $_TIF_WORK_SYSCALL_EXIT,%edx
1da177e4
LT
491 jz int_signal
492 pushq %rdi
7effaa88 493 CFI_ADJUST_CFA_OFFSET 8
1da177e4
LT
494 leaq 8(%rsp),%rdi # &ptregs -> arg1
495 call syscall_trace_leave
496 popq %rdi
7effaa88 497 CFI_ADJUST_CFA_OFFSET -8
d4d67150 498 andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi
1da177e4
LT
499 jmp int_restore_rest
500
501int_signal:
8f4d37ec 502 testl $_TIF_DO_NOTIFY_MASK,%edx
1da177e4
LT
503 jz 1f
504 movq %rsp,%rdi # &ptregs -> arg1
505 xorl %esi,%esi # oldset -> arg2
506 call do_notify_resume
eca91e78 5071: movl $_TIF_WORK_MASK,%edi
1da177e4
LT
508int_restore_rest:
509 RESTORE_REST
72fe4858 510 DISABLE_INTERRUPTS(CLBR_NONE)
2601e64d 511 TRACE_IRQS_OFF
1da177e4
LT
512 jmp int_with_check
513 CFI_ENDPROC
bcddc015 514END(system_call)
1da177e4
LT
515
516/*
517 * Certain special system calls that need to save a complete full stack frame.
518 */
519
520 .macro PTREGSCALL label,func,arg
521 .globl \label
522\label:
523 leaq \func(%rip),%rax
524 leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
525 jmp ptregscall_common
4b787e0b 526END(\label)
1da177e4
LT
527 .endm
528
7effaa88
JB
529 CFI_STARTPROC
530
1da177e4
LT
531 PTREGSCALL stub_clone, sys_clone, %r8
532 PTREGSCALL stub_fork, sys_fork, %rdi
533 PTREGSCALL stub_vfork, sys_vfork, %rdi
1da177e4
LT
534 PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
535 PTREGSCALL stub_iopl, sys_iopl, %rsi
536
537ENTRY(ptregscall_common)
1da177e4 538 popq %r11
7effaa88
JB
539 CFI_ADJUST_CFA_OFFSET -8
540 CFI_REGISTER rip, r11
1da177e4
LT
541 SAVE_REST
542 movq %r11, %r15
7effaa88 543 CFI_REGISTER rip, r15
1da177e4
LT
544 FIXUP_TOP_OF_STACK %r11
545 call *%rax
546 RESTORE_TOP_OF_STACK %r11
547 movq %r15, %r11
7effaa88 548 CFI_REGISTER rip, r11
1da177e4
LT
549 RESTORE_REST
550 pushq %r11
7effaa88
JB
551 CFI_ADJUST_CFA_OFFSET 8
552 CFI_REL_OFFSET rip, 0
1da177e4
LT
553 ret
554 CFI_ENDPROC
4b787e0b 555END(ptregscall_common)
1da177e4
LT
556
557ENTRY(stub_execve)
558 CFI_STARTPROC
559 popq %r11
7effaa88
JB
560 CFI_ADJUST_CFA_OFFSET -8
561 CFI_REGISTER rip, r11
1da177e4 562 SAVE_REST
1da177e4 563 FIXUP_TOP_OF_STACK %r11
5d119b2c 564 movq %rsp, %rcx
1da177e4 565 call sys_execve
1da177e4 566 RESTORE_TOP_OF_STACK %r11
1da177e4
LT
567 movq %rax,RAX(%rsp)
568 RESTORE_REST
569 jmp int_ret_from_sys_call
570 CFI_ENDPROC
4b787e0b 571END(stub_execve)
1da177e4
LT
572
573/*
574 * sigreturn is special because it needs to restore all registers on return.
575 * This cannot be done with SYSRET, so use the IRET return path instead.
576 */
577ENTRY(stub_rt_sigreturn)
578 CFI_STARTPROC
7effaa88
JB
579 addq $8, %rsp
580 CFI_ADJUST_CFA_OFFSET -8
1da177e4
LT
581 SAVE_REST
582 movq %rsp,%rdi
583 FIXUP_TOP_OF_STACK %r11
584 call sys_rt_sigreturn
585 movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
586 RESTORE_REST
587 jmp int_ret_from_sys_call
588 CFI_ENDPROC
4b787e0b 589END(stub_rt_sigreturn)
1da177e4 590
7effaa88
JB
591/*
592 * initial frame state for interrupts and exceptions
593 */
594 .macro _frame ref
595 CFI_STARTPROC simple
adf14236 596 CFI_SIGNAL_FRAME
7effaa88
JB
597 CFI_DEF_CFA rsp,SS+8-\ref
598 /*CFI_REL_OFFSET ss,SS-\ref*/
599 CFI_REL_OFFSET rsp,RSP-\ref
600 /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/
601 /*CFI_REL_OFFSET cs,CS-\ref*/
602 CFI_REL_OFFSET rip,RIP-\ref
603 .endm
604
605/* initial frame state for interrupts (and exceptions without error code) */
606#define INTR_FRAME _frame RIP
607/* initial frame state for exceptions with error code (and interrupts with
608 vector already pushed) */
609#define XCPT_FRAME _frame ORIG_RAX
610
1da177e4
LT
611/*
612 * Interrupt entry/exit.
613 *
614 * Interrupt entry points save only callee clobbered registers in fast path.
615 *
616 * Entry runs with interrupts off.
617 */
618
619/* 0(%rsp): interrupt number */
620 .macro interrupt func
1da177e4 621 cld
1da177e4
LT
622 SAVE_ARGS
623 leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
1de9c3f6
JB
624 pushq %rbp
625 CFI_ADJUST_CFA_OFFSET 8
626 CFI_REL_OFFSET rbp, 0
627 movq %rsp,%rbp
628 CFI_DEF_CFA_REGISTER rbp
1da177e4
LT
629 testl $3,CS(%rdi)
630 je 1f
72fe4858 631 SWAPGS
96e54049
AK
632 /* irqcount is used to check if a CPU is already on an interrupt
633 stack or not. While this is essentially redundant with preempt_count
634 it is a little cheaper to use a separate counter in the PDA
635 (short of moving irq_enter into assembly, which would be too
636 much work) */
6371: incl %gs:pda_irqcount
1de9c3f6 638 cmoveq %gs:pda_irqstackptr,%rsp
2699500b 639 push %rbp # backlink for old unwinder
2601e64d
IM
640 /*
641 * We entered an interrupt context - irqs are off:
642 */
643 TRACE_IRQS_OFF
1da177e4
LT
644 call \func
645 .endm
646
647ENTRY(common_interrupt)
7effaa88 648 XCPT_FRAME
1da177e4
LT
649 interrupt do_IRQ
650 /* 0(%rsp): oldrsp-ARGOFFSET */
7effaa88 651ret_from_intr:
72fe4858 652 DISABLE_INTERRUPTS(CLBR_NONE)
2601e64d 653 TRACE_IRQS_OFF
3829ee6b 654 decl %gs:pda_irqcount
1de9c3f6 655 leaveq
7effaa88 656 CFI_DEF_CFA_REGISTER rsp
1de9c3f6 657 CFI_ADJUST_CFA_OFFSET -8
7effaa88 658exit_intr:
1da177e4
LT
659 GET_THREAD_INFO(%rcx)
660 testl $3,CS-ARGOFFSET(%rsp)
661 je retint_kernel
662
663 /* Interrupt came from user space */
664 /*
665 * Has a correct top of stack, but a partial stack frame
666 * %rcx: thread info. Interrupts off.
667 */
668retint_with_reschedule:
669 movl $_TIF_WORK_MASK,%edi
7effaa88 670retint_check:
10cd706d 671 LOCKDEP_SYS_EXIT_IRQ
26ccb8a7 672 movl TI_flags(%rcx),%edx
1da177e4 673 andl %edi,%edx
7effaa88 674 CFI_REMEMBER_STATE
1da177e4 675 jnz retint_careful
10cd706d
PZ
676
677retint_swapgs: /* return to user-space */
2601e64d
IM
678 /*
679 * The iretq could re-enable interrupts:
680 */
72fe4858 681 DISABLE_INTERRUPTS(CLBR_ANY)
2601e64d 682 TRACE_IRQS_IRETQ
72fe4858 683 SWAPGS
2601e64d
IM
684 jmp restore_args
685
10cd706d 686retint_restore_args: /* return to kernel space */
72fe4858 687 DISABLE_INTERRUPTS(CLBR_ANY)
2601e64d
IM
688 /*
689 * The iretq could re-enable interrupts:
690 */
691 TRACE_IRQS_IRETQ
692restore_args:
3701d863
IM
693 RESTORE_ARGS 0,8,0
694
f7f3d791 695irq_return:
72fe4858 696 INTERRUPT_RETURN
3701d863
IM
697
698 .section __ex_table, "a"
699 .quad irq_return, bad_iret
700 .previous
701
702#ifdef CONFIG_PARAVIRT
72fe4858 703ENTRY(native_iret)
1da177e4
LT
704 iretq
705
706 .section __ex_table,"a"
72fe4858 707 .quad native_iret, bad_iret
1da177e4 708 .previous
3701d863
IM
709#endif
710
1da177e4 711 .section .fixup,"ax"
1da177e4 712bad_iret:
3aa4b37d
RM
713 /*
714 * The iret traps when the %cs or %ss being restored is bogus.
715 * We've lost the original trap vector and error code.
716 * #GPF is the most likely one to get for an invalid selector.
717 * So pretend we completed the iret and took the #GPF in user mode.
718 *
719 * We are now running with the kernel GS after exception recovery.
720 * But error_entry expects us to have user GS to match the user %cs,
721 * so swap back.
722 */
723 pushq $0
724
725 SWAPGS
726 jmp general_protection
727
72fe4858
GOC
728 .previous
729
7effaa88 730 /* edi: workmask, edx: work */
1da177e4 731retint_careful:
7effaa88 732 CFI_RESTORE_STATE
1da177e4
LT
733 bt $TIF_NEED_RESCHED,%edx
734 jnc retint_signal
2601e64d 735 TRACE_IRQS_ON
72fe4858 736 ENABLE_INTERRUPTS(CLBR_NONE)
1da177e4 737 pushq %rdi
7effaa88 738 CFI_ADJUST_CFA_OFFSET 8
1da177e4
LT
739 call schedule
740 popq %rdi
7effaa88 741 CFI_ADJUST_CFA_OFFSET -8
1da177e4 742 GET_THREAD_INFO(%rcx)
72fe4858 743 DISABLE_INTERRUPTS(CLBR_NONE)
2601e64d 744 TRACE_IRQS_OFF
1da177e4
LT
745 jmp retint_check
746
747retint_signal:
8f4d37ec 748 testl $_TIF_DO_NOTIFY_MASK,%edx
10ffdbb8 749 jz retint_swapgs
2601e64d 750 TRACE_IRQS_ON
72fe4858 751 ENABLE_INTERRUPTS(CLBR_NONE)
1da177e4
LT
752 SAVE_REST
753 movq $-1,ORIG_RAX(%rsp)
3829ee6b 754 xorl %esi,%esi # oldset
1da177e4
LT
755 movq %rsp,%rdi # &pt_regs
756 call do_notify_resume
757 RESTORE_REST
72fe4858 758 DISABLE_INTERRUPTS(CLBR_NONE)
2601e64d 759 TRACE_IRQS_OFF
be9e6870 760 GET_THREAD_INFO(%rcx)
eca91e78 761 jmp retint_with_reschedule
1da177e4
LT
762
763#ifdef CONFIG_PREEMPT
764 /* Returning to kernel space. Check if we need preemption */
765 /* rcx: threadinfo. interrupts off. */
b06babac 766ENTRY(retint_kernel)
26ccb8a7 767 cmpl $0,TI_preempt_count(%rcx)
1da177e4 768 jnz retint_restore_args
26ccb8a7 769 bt $TIF_NEED_RESCHED,TI_flags(%rcx)
1da177e4
LT
770 jnc retint_restore_args
771 bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
772 jnc retint_restore_args
773 call preempt_schedule_irq
774 jmp exit_intr
775#endif
4b787e0b 776
1da177e4 777 CFI_ENDPROC
4b787e0b 778END(common_interrupt)
1da177e4
LT
779
780/*
781 * APIC interrupts.
782 */
783 .macro apicinterrupt num,func
7effaa88 784 INTR_FRAME
19eadf98 785 pushq $~(\num)
7effaa88 786 CFI_ADJUST_CFA_OFFSET 8
1da177e4
LT
787 interrupt \func
788 jmp ret_from_intr
789 CFI_ENDPROC
790 .endm
791
792ENTRY(thermal_interrupt)
793 apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
4b787e0b 794END(thermal_interrupt)
1da177e4 795
89b831ef
JS
796ENTRY(threshold_interrupt)
797 apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
4b787e0b 798END(threshold_interrupt)
89b831ef 799
1da177e4
LT
800#ifdef CONFIG_SMP
801ENTRY(reschedule_interrupt)
802 apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
4b787e0b 803END(reschedule_interrupt)
1da177e4 804
e5bc8b6b
AK
805 .macro INVALIDATE_ENTRY num
806ENTRY(invalidate_interrupt\num)
807 apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
4b787e0b 808END(invalidate_interrupt\num)
e5bc8b6b
AK
809 .endm
810
811 INVALIDATE_ENTRY 0
812 INVALIDATE_ENTRY 1
813 INVALIDATE_ENTRY 2
814 INVALIDATE_ENTRY 3
815 INVALIDATE_ENTRY 4
816 INVALIDATE_ENTRY 5
817 INVALIDATE_ENTRY 6
818 INVALIDATE_ENTRY 7
1da177e4
LT
819
820ENTRY(call_function_interrupt)
821 apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
4b787e0b 822END(call_function_interrupt)
3b16cf87
JA
823ENTRY(call_function_single_interrupt)
824 apicinterrupt CALL_FUNCTION_SINGLE_VECTOR,smp_call_function_single_interrupt
825END(call_function_single_interrupt)
61014292
EB
826ENTRY(irq_move_cleanup_interrupt)
827 apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt
828END(irq_move_cleanup_interrupt)
1da177e4
LT
829#endif
830
1da177e4
LT
831ENTRY(apic_timer_interrupt)
832 apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
4b787e0b 833END(apic_timer_interrupt)
1da177e4 834
1812924b
CW
835ENTRY(uv_bau_message_intr1)
836 apicinterrupt 220,uv_bau_message_interrupt
837END(uv_bau_message_intr1)
838
1da177e4
LT
839ENTRY(error_interrupt)
840 apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
4b787e0b 841END(error_interrupt)
1da177e4
LT
842
843ENTRY(spurious_interrupt)
844 apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
4b787e0b 845END(spurious_interrupt)
1da177e4
LT
846
847/*
848 * Exception entry points.
849 */
850 .macro zeroentry sym
7effaa88 851 INTR_FRAME
fab58420 852 PARAVIRT_ADJUST_EXCEPTION_FRAME
1da177e4 853 pushq $0 /* push error code/oldrax */
7effaa88 854 CFI_ADJUST_CFA_OFFSET 8
1da177e4 855 pushq %rax /* push real oldrax to the rdi slot */
7effaa88 856 CFI_ADJUST_CFA_OFFSET 8
37550907 857 CFI_REL_OFFSET rax,0
1da177e4
LT
858 leaq \sym(%rip),%rax
859 jmp error_entry
7effaa88 860 CFI_ENDPROC
1da177e4
LT
861 .endm
862
863 .macro errorentry sym
7effaa88 864 XCPT_FRAME
fab58420 865 PARAVIRT_ADJUST_EXCEPTION_FRAME
1da177e4 866 pushq %rax
7effaa88 867 CFI_ADJUST_CFA_OFFSET 8
37550907 868 CFI_REL_OFFSET rax,0
1da177e4
LT
869 leaq \sym(%rip),%rax
870 jmp error_entry
7effaa88 871 CFI_ENDPROC
1da177e4
LT
872 .endm
873
874 /* error code is on the stack already */
875 /* handle NMI like exceptions that can happen everywhere */
2601e64d 876 .macro paranoidentry sym, ist=0, irqtrace=1
1da177e4
LT
877 SAVE_ALL
878 cld
879 movl $1,%ebx
880 movl $MSR_GS_BASE,%ecx
881 rdmsr
882 testl %edx,%edx
883 js 1f
72fe4858 884 SWAPGS
1da177e4 885 xorl %ebx,%ebx
b556b35e
JB
8861:
887 .if \ist
888 movq %gs:pda_data_offset, %rbp
889 .endif
890 movq %rsp,%rdi
1da177e4
LT
891 movq ORIG_RAX(%rsp),%rsi
892 movq $-1,ORIG_RAX(%rsp)
b556b35e 893 .if \ist
5f8efbb9 894 subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
b556b35e 895 .endif
1da177e4 896 call \sym
b556b35e 897 .if \ist
5f8efbb9 898 addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
b556b35e 899 .endif
72fe4858 900 DISABLE_INTERRUPTS(CLBR_NONE)
2601e64d
IM
901 .if \irqtrace
902 TRACE_IRQS_OFF
903 .endif
1da177e4 904 .endm
2601e64d
IM
905
906 /*
907 * "Paranoid" exit path from exception stack.
908 * Paranoid because this is used by NMIs and cannot take
909 * any kernel state for granted.
910 * We don't do kernel preemption checks here, because only
911 * NMI should be common and it does not enable IRQs and
912 * cannot get reschedule ticks.
913 *
914 * "trace" is 0 for the NMI handler only, because irq-tracing
915 * is fundamentally NMI-unsafe. (we cannot change the soft and
916 * hard flags at once, atomically)
917 */
918 .macro paranoidexit trace=1
919 /* ebx: no swapgs flag */
920paranoid_exit\trace:
921 testl %ebx,%ebx /* swapgs needed? */
922 jnz paranoid_restore\trace
923 testl $3,CS(%rsp)
924 jnz paranoid_userspace\trace
925paranoid_swapgs\trace:
7a0a2dff 926 .if \trace
2601e64d 927 TRACE_IRQS_IRETQ 0
7a0a2dff 928 .endif
72fe4858 929 SWAPGS_UNSAFE_STACK
2601e64d
IM
930paranoid_restore\trace:
931 RESTORE_ALL 8
3701d863 932 jmp irq_return
2601e64d
IM
933paranoid_userspace\trace:
934 GET_THREAD_INFO(%rcx)
26ccb8a7 935 movl TI_flags(%rcx),%ebx
2601e64d
IM
936 andl $_TIF_WORK_MASK,%ebx
937 jz paranoid_swapgs\trace
938 movq %rsp,%rdi /* &pt_regs */
939 call sync_regs
940 movq %rax,%rsp /* switch stack for scheduling */
941 testl $_TIF_NEED_RESCHED,%ebx
942 jnz paranoid_schedule\trace
943 movl %ebx,%edx /* arg3: thread flags */
944 .if \trace
945 TRACE_IRQS_ON
946 .endif
72fe4858 947 ENABLE_INTERRUPTS(CLBR_NONE)
2601e64d
IM
948 xorl %esi,%esi /* arg2: oldset */
949 movq %rsp,%rdi /* arg1: &pt_regs */
950 call do_notify_resume
72fe4858 951 DISABLE_INTERRUPTS(CLBR_NONE)
2601e64d
IM
952 .if \trace
953 TRACE_IRQS_OFF
954 .endif
955 jmp paranoid_userspace\trace
956paranoid_schedule\trace:
957 .if \trace
958 TRACE_IRQS_ON
959 .endif
72fe4858 960 ENABLE_INTERRUPTS(CLBR_ANY)
2601e64d 961 call schedule
72fe4858 962 DISABLE_INTERRUPTS(CLBR_ANY)
2601e64d
IM
963 .if \trace
964 TRACE_IRQS_OFF
965 .endif
966 jmp paranoid_userspace\trace
967 CFI_ENDPROC
968 .endm
969
1da177e4
LT
970/*
971 * Exception entry point. This expects an error code/orig_rax on the stack
972 * and the exception handler in %rax.
973 */
d28c4393 974KPROBE_ENTRY(error_entry)
7effaa88 975 _frame RDI
37550907 976 CFI_REL_OFFSET rax,0
1da177e4
LT
977 /* rdi slot contains rax, oldrax contains error code */
978 cld
979 subq $14*8,%rsp
980 CFI_ADJUST_CFA_OFFSET (14*8)
981 movq %rsi,13*8(%rsp)
982 CFI_REL_OFFSET rsi,RSI
983 movq 14*8(%rsp),%rsi /* load rax from rdi slot */
37550907 984 CFI_REGISTER rax,rsi
1da177e4
LT
985 movq %rdx,12*8(%rsp)
986 CFI_REL_OFFSET rdx,RDX
987 movq %rcx,11*8(%rsp)
988 CFI_REL_OFFSET rcx,RCX
989 movq %rsi,10*8(%rsp) /* store rax */
990 CFI_REL_OFFSET rax,RAX
991 movq %r8, 9*8(%rsp)
992 CFI_REL_OFFSET r8,R8
993 movq %r9, 8*8(%rsp)
994 CFI_REL_OFFSET r9,R9
995 movq %r10,7*8(%rsp)
996 CFI_REL_OFFSET r10,R10
997 movq %r11,6*8(%rsp)
998 CFI_REL_OFFSET r11,R11
999 movq %rbx,5*8(%rsp)
1000 CFI_REL_OFFSET rbx,RBX
1001 movq %rbp,4*8(%rsp)
1002 CFI_REL_OFFSET rbp,RBP
1003 movq %r12,3*8(%rsp)
1004 CFI_REL_OFFSET r12,R12
1005 movq %r13,2*8(%rsp)
1006 CFI_REL_OFFSET r13,R13
1007 movq %r14,1*8(%rsp)
1008 CFI_REL_OFFSET r14,R14
1009 movq %r15,(%rsp)
1010 CFI_REL_OFFSET r15,R15
1011 xorl %ebx,%ebx
1012 testl $3,CS(%rsp)
1013 je error_kernelspace
1014error_swapgs:
72fe4858 1015 SWAPGS
1da177e4
LT
1016error_sti:
1017 movq %rdi,RDI(%rsp)
37550907 1018 CFI_REL_OFFSET rdi,RDI
1da177e4
LT
1019 movq %rsp,%rdi
1020 movq ORIG_RAX(%rsp),%rsi /* get error code */
1021 movq $-1,ORIG_RAX(%rsp)
1022 call *%rax
10cd706d
PZ
1023 /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
1024error_exit:
1025 movl %ebx,%eax
1da177e4 1026 RESTORE_REST
72fe4858 1027 DISABLE_INTERRUPTS(CLBR_NONE)
2601e64d 1028 TRACE_IRQS_OFF
1da177e4
LT
1029 GET_THREAD_INFO(%rcx)
1030 testl %eax,%eax
1031 jne retint_kernel
10cd706d 1032 LOCKDEP_SYS_EXIT_IRQ
26ccb8a7 1033 movl TI_flags(%rcx),%edx
1da177e4
LT
1034 movl $_TIF_WORK_MASK,%edi
1035 andl %edi,%edx
1036 jnz retint_careful
10cd706d 1037 jmp retint_swapgs
1da177e4
LT
1038 CFI_ENDPROC
1039
1040error_kernelspace:
1041 incl %ebx
1042 /* There are two places in the kernel that can potentially fault with
1043 usergs. Handle them here. The exception handlers after
1044 iret run with kernel gs again, so don't set the user space flag.
1045 B stepping K8s sometimes report an truncated RIP for IRET
1046 exceptions returning to compat mode. Check for these here too. */
9d8ad5d6
VN
1047 leaq irq_return(%rip),%rcx
1048 cmpq %rcx,RIP(%rsp)
1da177e4 1049 je error_swapgs
9d8ad5d6
VN
1050 movl %ecx,%ecx /* zero extend */
1051 cmpq %rcx,RIP(%rsp)
1da177e4
LT
1052 je error_swapgs
1053 cmpq $gs_change,RIP(%rsp)
1054 je error_swapgs
1055 jmp error_sti
d28c4393 1056KPROBE_END(error_entry)
1da177e4
LT
1057
1058 /* Reload gs selector with exception handling */
1059 /* edi: new selector */
9f9d489a 1060ENTRY(native_load_gs_index)
7effaa88 1061 CFI_STARTPROC
1da177e4 1062 pushf
7effaa88 1063 CFI_ADJUST_CFA_OFFSET 8
72fe4858
GOC
1064 DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI))
1065 SWAPGS
1da177e4
LT
1066gs_change:
1067 movl %edi,%gs
10682: mfence /* workaround */
72fe4858 1069 SWAPGS
1da177e4 1070 popf
7effaa88 1071 CFI_ADJUST_CFA_OFFSET -8
1da177e4 1072 ret
7effaa88 1073 CFI_ENDPROC
9f9d489a 1074ENDPROC(native_load_gs_index)
1da177e4
LT
1075
1076 .section __ex_table,"a"
1077 .align 8
1078 .quad gs_change,bad_gs
1079 .previous
1080 .section .fixup,"ax"
1081 /* running with kernelgs */
1082bad_gs:
72fe4858 1083 SWAPGS /* switch back to user gs */
1da177e4
LT
1084 xorl %eax,%eax
1085 movl %eax,%gs
1086 jmp 2b
1087 .previous
1088
1089/*
1090 * Create a kernel thread.
1091 *
1092 * C extern interface:
1093 * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
1094 *
1095 * asm input arguments:
1096 * rdi: fn, rsi: arg, rdx: flags
1097 */
1098ENTRY(kernel_thread)
1099 CFI_STARTPROC
1100 FAKE_STACK_FRAME $child_rip
1101 SAVE_ALL
1102
1103 # rdi: flags, rsi: usp, rdx: will be &pt_regs
1104 movq %rdx,%rdi
1105 orq kernel_thread_flags(%rip),%rdi
1106 movq $-1, %rsi
1107 movq %rsp, %rdx
1108
1109 xorl %r8d,%r8d
1110 xorl %r9d,%r9d
1111
1112 # clone now
1113 call do_fork
1114 movq %rax,RAX(%rsp)
1115 xorl %edi,%edi
1116
1117 /*
1118 * It isn't worth to check for reschedule here,
1119 * so internally to the x86_64 port you can rely on kernel_thread()
1120 * not to reschedule the child before returning, this avoids the need
1121 * of hacks for example to fork off the per-CPU idle tasks.
1122 * [Hopefully no generic code relies on the reschedule -AK]
1123 */
1124 RESTORE_ALL
1125 UNFAKE_STACK_FRAME
1126 ret
1127 CFI_ENDPROC
4b787e0b 1128ENDPROC(kernel_thread)
1da177e4
LT
1129
1130child_rip:
c05991ed
AK
1131 pushq $0 # fake return address
1132 CFI_STARTPROC
1da177e4
LT
1133 /*
1134 * Here we are in the child and the registers are set as they were
1135 * at kernel_thread() invocation in the parent.
1136 */
1137 movq %rdi, %rax
1138 movq %rsi, %rdi
1139 call *%rax
1140 # exit
1c5b5cfd 1141 mov %eax, %edi
1da177e4 1142 call do_exit
c05991ed 1143 CFI_ENDPROC
4b787e0b 1144ENDPROC(child_rip)
1da177e4
LT
1145
1146/*
1147 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
1148 *
1149 * C extern interface:
1150 * extern long execve(char *name, char **argv, char **envp)
1151 *
1152 * asm input arguments:
1153 * rdi: name, rsi: argv, rdx: envp
1154 *
1155 * We want to fallback into:
5d119b2c 1156 * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs *regs)
1da177e4
LT
1157 *
1158 * do_sys_execve asm fallback arguments:
5d119b2c 1159 * rdi: name, rsi: argv, rdx: envp, rcx: fake frame on the stack
1da177e4 1160 */
3db03b4a 1161ENTRY(kernel_execve)
1da177e4
LT
1162 CFI_STARTPROC
1163 FAKE_STACK_FRAME $0
1164 SAVE_ALL
5d119b2c 1165 movq %rsp,%rcx
1da177e4
LT
1166 call sys_execve
1167 movq %rax, RAX(%rsp)
1168 RESTORE_REST
1169 testq %rax,%rax
1170 je int_ret_from_sys_call
1171 RESTORE_ARGS
1172 UNFAKE_STACK_FRAME
1173 ret
1174 CFI_ENDPROC
3db03b4a 1175ENDPROC(kernel_execve)
1da177e4 1176
0f2fbdcb 1177KPROBE_ENTRY(page_fault)
1da177e4 1178 errorentry do_page_fault
d28c4393 1179KPROBE_END(page_fault)
1da177e4
LT
1180
1181ENTRY(coprocessor_error)
1182 zeroentry do_coprocessor_error
4b787e0b 1183END(coprocessor_error)
1da177e4
LT
1184
1185ENTRY(simd_coprocessor_error)
1186 zeroentry do_simd_coprocessor_error
4b787e0b 1187END(simd_coprocessor_error)
1da177e4
LT
1188
1189ENTRY(device_not_available)
1190 zeroentry math_state_restore
4b787e0b 1191END(device_not_available)
1da177e4
LT
1192
1193 /* runs on exception stack */
0f2fbdcb 1194KPROBE_ENTRY(debug)
7effaa88 1195 INTR_FRAME
1da177e4
LT
1196 pushq $0
1197 CFI_ADJUST_CFA_OFFSET 8
5f8efbb9 1198 paranoidentry do_debug, DEBUG_STACK
2601e64d 1199 paranoidexit
d28c4393 1200KPROBE_END(debug)
1da177e4
LT
1201
1202 /* runs on exception stack */
eddb6fb9 1203KPROBE_ENTRY(nmi)
7effaa88 1204 INTR_FRAME
1da177e4 1205 pushq $-1
7effaa88 1206 CFI_ADJUST_CFA_OFFSET 8
2601e64d
IM
1207 paranoidentry do_nmi, 0, 0
1208#ifdef CONFIG_TRACE_IRQFLAGS
1209 paranoidexit 0
1210#else
1211 jmp paranoid_exit1
1212 CFI_ENDPROC
1213#endif
d28c4393 1214KPROBE_END(nmi)
6fefb0d1 1215
0f2fbdcb 1216KPROBE_ENTRY(int3)
b556b35e
JB
1217 INTR_FRAME
1218 pushq $0
1219 CFI_ADJUST_CFA_OFFSET 8
5f8efbb9 1220 paranoidentry do_int3, DEBUG_STACK
2601e64d 1221 jmp paranoid_exit1
b556b35e 1222 CFI_ENDPROC
d28c4393 1223KPROBE_END(int3)
1da177e4
LT
1224
1225ENTRY(overflow)
1226 zeroentry do_overflow
4b787e0b 1227END(overflow)
1da177e4
LT
1228
1229ENTRY(bounds)
1230 zeroentry do_bounds
4b787e0b 1231END(bounds)
1da177e4
LT
1232
1233ENTRY(invalid_op)
1234 zeroentry do_invalid_op
4b787e0b 1235END(invalid_op)
1da177e4
LT
1236
1237ENTRY(coprocessor_segment_overrun)
1238 zeroentry do_coprocessor_segment_overrun
4b787e0b 1239END(coprocessor_segment_overrun)
1da177e4 1240
1da177e4
LT
1241 /* runs on exception stack */
1242ENTRY(double_fault)
7effaa88 1243 XCPT_FRAME
1da177e4 1244 paranoidentry do_double_fault
2601e64d 1245 jmp paranoid_exit1
1da177e4 1246 CFI_ENDPROC
4b787e0b 1247END(double_fault)
1da177e4
LT
1248
1249ENTRY(invalid_TSS)
1250 errorentry do_invalid_TSS
4b787e0b 1251END(invalid_TSS)
1da177e4
LT
1252
1253ENTRY(segment_not_present)
1254 errorentry do_segment_not_present
4b787e0b 1255END(segment_not_present)
1da177e4
LT
1256
1257 /* runs on exception stack */
1258ENTRY(stack_segment)
7effaa88 1259 XCPT_FRAME
1da177e4 1260 paranoidentry do_stack_segment
2601e64d 1261 jmp paranoid_exit1
1da177e4 1262 CFI_ENDPROC
4b787e0b 1263END(stack_segment)
1da177e4 1264
0f2fbdcb 1265KPROBE_ENTRY(general_protection)
1da177e4 1266 errorentry do_general_protection
d28c4393 1267KPROBE_END(general_protection)
1da177e4
LT
1268
1269ENTRY(alignment_check)
1270 errorentry do_alignment_check
4b787e0b 1271END(alignment_check)
1da177e4
LT
1272
1273ENTRY(divide_error)
1274 zeroentry do_divide_error
4b787e0b 1275END(divide_error)
1da177e4
LT
1276
1277ENTRY(spurious_interrupt_bug)
1278 zeroentry do_spurious_interrupt_bug
4b787e0b 1279END(spurious_interrupt_bug)
1da177e4
LT
1280
1281#ifdef CONFIG_X86_MCE
1282 /* runs on exception stack */
1283ENTRY(machine_check)
7effaa88 1284 INTR_FRAME
1da177e4
LT
1285 pushq $0
1286 CFI_ADJUST_CFA_OFFSET 8
1287 paranoidentry do_machine_check
2601e64d 1288 jmp paranoid_exit1
1da177e4 1289 CFI_ENDPROC
4b787e0b 1290END(machine_check)
1da177e4
LT
1291#endif
1292
2699500b 1293/* Call softirq on interrupt stack. Interrupts are off. */
ed6b676c 1294ENTRY(call_softirq)
7effaa88 1295 CFI_STARTPROC
2699500b
AK
1296 push %rbp
1297 CFI_ADJUST_CFA_OFFSET 8
1298 CFI_REL_OFFSET rbp,0
1299 mov %rsp,%rbp
1300 CFI_DEF_CFA_REGISTER rbp
ed6b676c 1301 incl %gs:pda_irqcount
2699500b
AK
1302 cmove %gs:pda_irqstackptr,%rsp
1303 push %rbp # backlink for old unwinder
ed6b676c 1304 call __do_softirq
2699500b 1305 leaveq
7effaa88 1306 CFI_DEF_CFA_REGISTER rsp
2699500b 1307 CFI_ADJUST_CFA_OFFSET -8
ed6b676c 1308 decl %gs:pda_irqcount
ed6b676c 1309 ret
7effaa88 1310 CFI_ENDPROC
4b787e0b 1311ENDPROC(call_softirq)
75154f40
AK
1312
1313KPROBE_ENTRY(ignore_sysret)
1314 CFI_STARTPROC
1315 mov $-ENOSYS,%eax
1316 sysret
1317 CFI_ENDPROC
1318ENDPROC(ignore_sysret)