x86, bts: add fork and exit handling
[linux-2.6-block.git] / arch / x86 / kernel / process_64.c
CommitLineData
1da177e4 1/*
1da177e4
LT
2 * Copyright (C) 1995 Linus Torvalds
3 *
4 * Pentium III FXSR, SSE support
5 * Gareth Hughes <gareth@valinux.com>, May 2000
6612538c 6 *
1da177e4
LT
7 * X86-64 port
8 * Andi Kleen.
76e4f660
AR
9 *
10 * CPU hotplug support - ashok.raj@intel.com
1da177e4
LT
11 */
12
13/*
14 * This file handles the architecture-dependent parts of process handling..
15 */
16
17#include <stdarg.h>
18
76e4f660 19#include <linux/cpu.h>
1da177e4
LT
20#include <linux/errno.h>
21#include <linux/sched.h>
6612538c 22#include <linux/fs.h>
1da177e4
LT
23#include <linux/kernel.h>
24#include <linux/mm.h>
25#include <linux/elfcore.h>
26#include <linux/smp.h>
27#include <linux/slab.h>
28#include <linux/user.h>
1da177e4 29#include <linux/interrupt.h>
6612538c 30#include <linux/utsname.h>
1da177e4 31#include <linux/delay.h>
6612538c 32#include <linux/module.h>
1da177e4 33#include <linux/ptrace.h>
1da177e4 34#include <linux/random.h>
95833c83 35#include <linux/notifier.h>
c6fd91f0 36#include <linux/kprobes.h>
1eeb66a1 37#include <linux/kdebug.h>
02290683 38#include <linux/tick.h>
529e25f6 39#include <linux/prctl.h>
7de08b4e
GP
40#include <linux/uaccess.h>
41#include <linux/io.h>
8b96f011 42#include <linux/ftrace.h>
1da177e4 43
1da177e4
LT
44#include <asm/pgtable.h>
45#include <asm/system.h>
1da177e4
LT
46#include <asm/processor.h>
47#include <asm/i387.h>
48#include <asm/mmu_context.h>
49#include <asm/pda.h>
50#include <asm/prctl.h>
1da177e4
LT
51#include <asm/desc.h>
52#include <asm/proto.h>
53#include <asm/ia32.h>
95833c83 54#include <asm/idle.h>
bbc1f698 55#include <asm/syscalls.h>
bf53de90 56#include <asm/ds.h>
1da177e4
LT
57
58asmlinkage extern void ret_from_fork(void);
59
60unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
61
e041c683 62static ATOMIC_NOTIFIER_HEAD(idle_notifier);
95833c83
AK
63
64void idle_notifier_register(struct notifier_block *n)
65{
e041c683 66 atomic_notifier_chain_register(&idle_notifier, n);
95833c83 67}
c7d87d79
VP
68EXPORT_SYMBOL_GPL(idle_notifier_register);
69
70void idle_notifier_unregister(struct notifier_block *n)
71{
72 atomic_notifier_chain_unregister(&idle_notifier, n);
73}
74EXPORT_SYMBOL_GPL(idle_notifier_unregister);
95833c83 75
95833c83
AK
76void enter_idle(void)
77{
a15da49d 78 write_pda(isidle, 1);
e041c683 79 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
95833c83
AK
80}
81
82static void __exit_idle(void)
83{
9446868b 84 if (test_and_clear_bit_pda(0, isidle) == 0)
a15da49d 85 return;
e041c683 86 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
95833c83
AK
87}
88
89/* Called from interrupts to signify idle end */
90void exit_idle(void)
91{
a15da49d
AK
92 /* idle loop has pid 0 */
93 if (current->pid)
95833c83
AK
94 return;
95 __exit_idle();
96}
97
913da64b 98#ifndef CONFIG_SMP
76e4f660
AR
99static inline void play_dead(void)
100{
101 BUG();
102}
913da64b 103#endif
76e4f660 104
1da177e4
LT
105/*
106 * The idle thread. There's no useful work to be
107 * done, so just try to conserve power and have a
108 * low exit latency (ie sit in a loop waiting for
109 * somebody to say that they'd like to reschedule)
110 */
b10db7f0 111void cpu_idle(void)
1da177e4 112{
495ab9c0 113 current_thread_info()->status |= TS_POLLING;
1da177e4
LT
114 /* endless idle loop with no priority at all */
115 while (1) {
b8f8c3cf 116 tick_nohz_stop_sched_tick(1);
1da177e4 117 while (!need_resched()) {
1da177e4 118
1da177e4 119 rmb();
6ddd2a27 120
76e4f660
AR
121 if (cpu_is_offline(smp_processor_id()))
122 play_dead();
d331e739
VP
123 /*
124 * Idle routines should keep interrupts disabled
125 * from here on, until they go to idle.
126 * Otherwise, idle callbacks can misfire.
127 */
128 local_irq_disable();
95833c83 129 enter_idle();
81d68a96
SR
130 /* Don't trace irqs off for idle */
131 stop_critical_timings();
6ddd2a27 132 pm_idle();
81d68a96 133 start_critical_timings();
a15da49d
AK
134 /* In many cases the interrupt that ended idle
135 has already called exit_idle. But some idle
136 loops can be woken up without interrupt. */
95833c83 137 __exit_idle();
1da177e4
LT
138 }
139
02290683 140 tick_nohz_restart_sched_tick();
5bfb5d69 141 preempt_enable_no_resched();
1da177e4 142 schedule();
5bfb5d69 143 preempt_disable();
1da177e4
LT
144 }
145}
146
6612538c 147/* Prints also some state that isn't saved in the pt_regs */
e2ce07c8 148void __show_regs(struct pt_regs *regs, int all)
1da177e4
LT
149{
150 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
bb1995d5 151 unsigned long d0, d1, d2, d3, d6, d7;
6612538c
HS
152 unsigned int fsindex, gsindex;
153 unsigned int ds, cs, es;
1da177e4
LT
154
155 printk("\n");
156 print_modules();
8092c654 157 printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s\n",
9acf23c4 158 current->pid, current->comm, print_tainted(),
96b644bd
SH
159 init_utsname()->release,
160 (int)strcspn(init_utsname()->version, " "),
161 init_utsname()->version);
8092c654 162 printk(KERN_INFO "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
aafbd7eb 163 printk_address(regs->ip, 1);
8092c654
GP
164 printk(KERN_INFO "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss,
165 regs->sp, regs->flags);
166 printk(KERN_INFO "RAX: %016lx RBX: %016lx RCX: %016lx\n",
65ea5b03 167 regs->ax, regs->bx, regs->cx);
8092c654 168 printk(KERN_INFO "RDX: %016lx RSI: %016lx RDI: %016lx\n",
65ea5b03 169 regs->dx, regs->si, regs->di);
8092c654 170 printk(KERN_INFO "RBP: %016lx R08: %016lx R09: %016lx\n",
65ea5b03 171 regs->bp, regs->r8, regs->r9);
8092c654 172 printk(KERN_INFO "R10: %016lx R11: %016lx R12: %016lx\n",
7de08b4e 173 regs->r10, regs->r11, regs->r12);
8092c654 174 printk(KERN_INFO "R13: %016lx R14: %016lx R15: %016lx\n",
7de08b4e 175 regs->r13, regs->r14, regs->r15);
1da177e4 176
7de08b4e
GP
177 asm("movl %%ds,%0" : "=r" (ds));
178 asm("movl %%cs,%0" : "=r" (cs));
179 asm("movl %%es,%0" : "=r" (es));
1da177e4
LT
180 asm("movl %%fs,%0" : "=r" (fsindex));
181 asm("movl %%gs,%0" : "=r" (gsindex));
182
183 rdmsrl(MSR_FS_BASE, fs);
7de08b4e
GP
184 rdmsrl(MSR_GS_BASE, gs);
185 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
1da177e4 186
e2ce07c8
PE
187 if (!all)
188 return;
1da177e4 189
f51c9452
GOC
190 cr0 = read_cr0();
191 cr2 = read_cr2();
192 cr3 = read_cr3();
193 cr4 = read_cr4();
1da177e4 194
8092c654 195 printk(KERN_INFO "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
7de08b4e 196 fs, fsindex, gs, gsindex, shadowgs);
8092c654
GP
197 printk(KERN_INFO "CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds,
198 es, cr0);
199 printk(KERN_INFO "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3,
200 cr4);
bb1995d5
AS
201
202 get_debugreg(d0, 0);
203 get_debugreg(d1, 1);
204 get_debugreg(d2, 2);
8092c654 205 printk(KERN_INFO "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
bb1995d5
AS
206 get_debugreg(d3, 3);
207 get_debugreg(d6, 6);
208 get_debugreg(d7, 7);
8092c654 209 printk(KERN_INFO "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
1da177e4
LT
210}
211
212void show_regs(struct pt_regs *regs)
213{
8092c654 214 printk(KERN_INFO "CPU %d:", smp_processor_id());
e2ce07c8 215 __show_regs(regs, 1);
bc850d6b 216 show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
1da177e4
LT
217}
218
219/*
220 * Free current thread data structures etc..
221 */
222void exit_thread(void)
223{
224 struct task_struct *me = current;
225 struct thread_struct *t = &me->thread;
73649dab 226
6612538c 227 if (me->thread.io_bitmap_ptr) {
1da177e4
LT
228 struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
229
230 kfree(t->io_bitmap_ptr);
231 t->io_bitmap_ptr = NULL;
d3a4f48d 232 clear_thread_flag(TIF_IO_BITMAP);
1da177e4
LT
233 /*
234 * Careful, clear this in the TSS too:
235 */
236 memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
237 t->io_bitmap_max = 0;
238 put_cpu();
239 }
bf53de90
MM
240
241 ds_exit_thread(current);
1da177e4
LT
242}
243
244void flush_thread(void)
245{
246 struct task_struct *tsk = current;
1da177e4 247
303cd153
MD
248 if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) {
249 clear_tsk_thread_flag(tsk, TIF_ABI_PENDING);
250 if (test_tsk_thread_flag(tsk, TIF_IA32)) {
251 clear_tsk_thread_flag(tsk, TIF_IA32);
252 } else {
253 set_tsk_thread_flag(tsk, TIF_IA32);
4d9bc79c 254 current_thread_info()->status |= TS_COMPAT;
303cd153 255 }
4d9bc79c 256 }
303cd153 257 clear_tsk_thread_flag(tsk, TIF_DEBUG);
1da177e4
LT
258
259 tsk->thread.debugreg0 = 0;
260 tsk->thread.debugreg1 = 0;
261 tsk->thread.debugreg2 = 0;
262 tsk->thread.debugreg3 = 0;
263 tsk->thread.debugreg6 = 0;
264 tsk->thread.debugreg7 = 0;
6612538c 265 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
1da177e4
LT
266 /*
267 * Forget coprocessor state..
268 */
75118a82 269 tsk->fpu_counter = 0;
1da177e4
LT
270 clear_fpu(tsk);
271 clear_used_math();
272}
273
274void release_thread(struct task_struct *dead_task)
275{
276 if (dead_task->mm) {
277 if (dead_task->mm->context.size) {
278 printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
279 dead_task->comm,
280 dead_task->mm->context.ldt,
281 dead_task->mm->context.size);
282 BUG();
283 }
284 }
285}
286
287static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
288{
6612538c 289 struct user_desc ud = {
1da177e4
LT
290 .base_addr = addr,
291 .limit = 0xfffff,
292 .seg_32bit = 1,
293 .limit_in_pages = 1,
294 .useable = 1,
295 };
ade1af77 296 struct desc_struct *desc = t->thread.tls_array;
1da177e4 297 desc += tls;
80fbb69a 298 fill_ldt(desc, &ud);
1da177e4
LT
299}
300
301static inline u32 read_32bit_tls(struct task_struct *t, int tls)
302{
91394eb0 303 return get_desc_base(&t->thread.tls_array[tls]);
1da177e4
LT
304}
305
306/*
307 * This gets called before we allocate a new thread and copy
308 * the current task into it.
309 */
310void prepare_to_copy(struct task_struct *tsk)
311{
312 unlazy_fpu(tsk);
313}
314
65ea5b03 315int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
1da177e4 316 unsigned long unused,
7de08b4e 317 struct task_struct *p, struct pt_regs *regs)
1da177e4
LT
318{
319 int err;
7de08b4e 320 struct pt_regs *childregs;
1da177e4
LT
321 struct task_struct *me = current;
322
a88cde13 323 childregs = ((struct pt_regs *)
57eafdc2 324 (THREAD_SIZE + task_stack_page(p))) - 1;
1da177e4
LT
325 *childregs = *regs;
326
65ea5b03
PA
327 childregs->ax = 0;
328 childregs->sp = sp;
329 if (sp == ~0UL)
330 childregs->sp = (unsigned long)childregs;
1da177e4 331
faca6227
PA
332 p->thread.sp = (unsigned long) childregs;
333 p->thread.sp0 = (unsigned long) (childregs+1);
334 p->thread.usersp = me->thread.usersp;
1da177e4 335
e4f17c43 336 set_tsk_thread_flag(p, TIF_FORK);
1da177e4
LT
337
338 p->thread.fs = me->thread.fs;
339 p->thread.gs = me->thread.gs;
340
ada85708
JF
341 savesegment(gs, p->thread.gsindex);
342 savesegment(fs, p->thread.fsindex);
343 savesegment(es, p->thread.es);
344 savesegment(ds, p->thread.ds);
1da177e4 345
d3a4f48d 346 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
1da177e4
LT
347 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
348 if (!p->thread.io_bitmap_ptr) {
349 p->thread.io_bitmap_max = 0;
350 return -ENOMEM;
351 }
a88cde13
AK
352 memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
353 IO_BITMAP_BYTES);
d3a4f48d 354 set_tsk_thread_flag(p, TIF_IO_BITMAP);
6612538c 355 }
1da177e4
LT
356
357 /*
358 * Set a new TLS for the child thread?
359 */
360 if (clone_flags & CLONE_SETTLS) {
361#ifdef CONFIG_IA32_EMULATION
362 if (test_thread_flag(TIF_IA32))
efd1ca52 363 err = do_set_thread_area(p, -1,
65ea5b03 364 (struct user_desc __user *)childregs->si, 0);
7de08b4e
GP
365 else
366#endif
367 err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
368 if (err)
1da177e4
LT
369 goto out;
370 }
bf53de90
MM
371
372 ds_copy_thread(p, me);
373
374 clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR);
375 p->thread.debugctlmsr = 0;
376
1da177e4
LT
377 err = 0;
378out:
379 if (err && p->thread.io_bitmap_ptr) {
380 kfree(p->thread.io_bitmap_ptr);
381 p->thread.io_bitmap_max = 0;
382 }
383 return err;
384}
385
513ad84b
IM
386void
387start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
388{
ada85708
JF
389 loadsegment(fs, 0);
390 loadsegment(es, 0);
391 loadsegment(ds, 0);
513ad84b
IM
392 load_gs_index(0);
393 regs->ip = new_ip;
394 regs->sp = new_sp;
395 write_pda(oldrsp, new_sp);
396 regs->cs = __USER_CS;
397 regs->ss = __USER_DS;
398 regs->flags = 0x200;
399 set_fs(USER_DS);
aa283f49
SS
400 /*
401 * Free the old FP and other extended state
402 */
403 free_thread_xstate(current);
513ad84b
IM
404}
405EXPORT_SYMBOL_GPL(start_thread);
406
529e25f6
EB
407static void hard_disable_TSC(void)
408{
409 write_cr4(read_cr4() | X86_CR4_TSD);
410}
411
412void disable_TSC(void)
413{
414 preempt_disable();
415 if (!test_and_set_thread_flag(TIF_NOTSC))
416 /*
417 * Must flip the CPU state synchronously with
418 * TIF_NOTSC in the current running context.
419 */
420 hard_disable_TSC();
421 preempt_enable();
422}
423
424static void hard_enable_TSC(void)
425{
426 write_cr4(read_cr4() & ~X86_CR4_TSD);
427}
428
a4928cff 429static void enable_TSC(void)
529e25f6
EB
430{
431 preempt_disable();
432 if (test_and_clear_thread_flag(TIF_NOTSC))
433 /*
434 * Must flip the CPU state synchronously with
435 * TIF_NOTSC in the current running context.
436 */
437 hard_enable_TSC();
438 preempt_enable();
439}
440
441int get_tsc_mode(unsigned long adr)
442{
443 unsigned int val;
444
445 if (test_thread_flag(TIF_NOTSC))
446 val = PR_TSC_SIGSEGV;
447 else
448 val = PR_TSC_ENABLE;
449
450 return put_user(val, (unsigned int __user *)adr);
451}
452
453int set_tsc_mode(unsigned int val)
454{
455 if (val == PR_TSC_SIGSEGV)
456 disable_TSC();
457 else if (val == PR_TSC_ENABLE)
458 enable_TSC();
459 else
460 return -EINVAL;
461
462 return 0;
463}
464
1da177e4
LT
465/*
466 * This special macro can be used to load a debugging register
467 */
6612538c
HS
468#define loaddebug(thread, r) set_debugreg(thread->debugreg ## r, r)
469
d3a4f48d 470static inline void __switch_to_xtra(struct task_struct *prev_p,
6612538c
HS
471 struct task_struct *next_p,
472 struct tss_struct *tss)
d3a4f48d
SE
473{
474 struct thread_struct *prev, *next;
475
476 prev = &prev_p->thread,
477 next = &next_p->thread;
478
c2724775
MM
479 if (test_tsk_thread_flag(next_p, TIF_DS_AREA_MSR) ||
480 test_tsk_thread_flag(prev_p, TIF_DS_AREA_MSR))
481 ds_switch_to(prev_p, next_p);
482 else if (next->debugctlmsr != prev->debugctlmsr)
5b0e5084 483 update_debugctlmsr(next->debugctlmsr);
7e991604 484
d3a4f48d
SE
485 if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
486 loaddebug(next, 0);
487 loaddebug(next, 1);
488 loaddebug(next, 2);
489 loaddebug(next, 3);
490 /* no 4 and 5 */
491 loaddebug(next, 6);
492 loaddebug(next, 7);
493 }
494
529e25f6
EB
495 if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
496 test_tsk_thread_flag(next_p, TIF_NOTSC)) {
497 /* prev and next are different */
498 if (test_tsk_thread_flag(next_p, TIF_NOTSC))
499 hard_disable_TSC();
500 else
501 hard_enable_TSC();
502 }
503
d3a4f48d
SE
504 if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
505 /*
506 * Copy the relevant range of the IO bitmap.
507 * Normally this is 128 bytes or less:
508 */
509 memcpy(tss->io_bitmap, next->io_bitmap_ptr,
510 max(prev->io_bitmap_max, next->io_bitmap_max));
511 } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
512 /*
513 * Clear any possible leftover bits:
514 */
515 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
516 }
517}
518
1da177e4
LT
519/*
520 * switch_to(x,y) should switch tasks from x to y.
521 *
6612538c 522 * This could still be optimized:
1da177e4
LT
523 * - fold all the options into a flag word and test it with a single test.
524 * - could test fs/gs bitsliced
099f318b
AK
525 *
526 * Kprobes not supported here. Set the probe on schedule instead.
8b96f011 527 * Function graph tracer not supported too.
1da177e4 528 */
8b96f011 529__notrace_funcgraph struct task_struct *
a88cde13 530__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
1da177e4 531{
87b935a0
JF
532 struct thread_struct *prev = &prev_p->thread;
533 struct thread_struct *next = &next_p->thread;
6612538c 534 int cpu = smp_processor_id();
1da177e4 535 struct tss_struct *tss = &per_cpu(init_tss, cpu);
478de5a9 536 unsigned fsindex, gsindex;
1da177e4 537
e07e23e1 538 /* we're going to use this soon, after a few expensive things */
7de08b4e 539 if (next_p->fpu_counter > 5)
61c4628b 540 prefetch(next->xstate);
e07e23e1 541
1da177e4
LT
542 /*
543 * Reload esp0, LDT and the page table pointer:
544 */
7818a1e0 545 load_sp0(tss, next);
1da177e4 546
7de08b4e 547 /*
1da177e4
LT
548 * Switch DS and ES.
549 * This won't pick up thread selector changes, but I guess that is ok.
550 */
ada85708 551 savesegment(es, prev->es);
1da177e4 552 if (unlikely(next->es | prev->es))
7de08b4e 553 loadsegment(es, next->es);
ada85708
JF
554
555 savesegment(ds, prev->ds);
1da177e4
LT
556 if (unlikely(next->ds | prev->ds))
557 loadsegment(ds, next->ds);
558
478de5a9
JF
559
560 /* We must save %fs and %gs before load_TLS() because
561 * %fs and %gs may be cleared by load_TLS().
562 *
563 * (e.g. xen_load_tls())
564 */
565 savesegment(fs, fsindex);
566 savesegment(gs, gsindex);
567
1da177e4
LT
568 load_TLS(next, cpu);
569
3fe0a63e
JF
570 /*
571 * Leave lazy mode, flushing any hypercalls made here.
572 * This must be done before restoring TLS segments so
573 * the GDT and LDT are properly updated, and must be
574 * done before math_state_restore, so the TS bit is up
575 * to date.
576 */
577 arch_leave_lazy_cpu_mode();
578
7de08b4e 579 /*
1da177e4 580 * Switch FS and GS.
87b935a0
JF
581 *
582 * Segment register != 0 always requires a reload. Also
583 * reload when it has changed. When prev process used 64bit
584 * base always reload to avoid an information leak.
1da177e4 585 */
87b935a0
JF
586 if (unlikely(fsindex | next->fsindex | prev->fs)) {
587 loadsegment(fs, next->fsindex);
7de08b4e 588 /*
87b935a0
JF
589 * Check if the user used a selector != 0; if yes
590 * clear 64bit base, since overloaded base is always
591 * mapped to the Null selector
592 */
593 if (fsindex)
7de08b4e 594 prev->fs = 0;
87b935a0
JF
595 }
596 /* when next process has a 64bit base use it */
597 if (next->fs)
598 wrmsrl(MSR_FS_BASE, next->fs);
599 prev->fsindex = fsindex;
600
601 if (unlikely(gsindex | next->gsindex | prev->gs)) {
602 load_gs_index(next->gsindex);
603 if (gsindex)
7de08b4e 604 prev->gs = 0;
1da177e4 605 }
87b935a0
JF
606 if (next->gs)
607 wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
608 prev->gsindex = gsindex;
1da177e4 609
0a5ace2a
AK
610 /* Must be after DS reload */
611 unlazy_fpu(prev_p);
612
7de08b4e 613 /*
45948d77 614 * Switch the PDA and FPU contexts.
1da177e4 615 */
faca6227
PA
616 prev->usersp = read_pda(oldrsp);
617 write_pda(oldrsp, next->usersp);
7de08b4e 618 write_pda(pcurrent, next_p);
18bd057b 619
a88cde13 620 write_pda(kernelstack,
87b935a0
JF
621 (unsigned long)task_stack_page(next_p) +
622 THREAD_SIZE - PDA_STACKOFFSET);
0a425405
AV
623#ifdef CONFIG_CC_STACKPROTECTOR
624 write_pda(stack_canary, next_p->stack_canary);
625 /*
626 * Build time only check to make sure the stack_canary is at
627 * offset 40 in the pda; this is a gcc ABI requirement
628 */
629 BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
630#endif
1da177e4
LT
631
632 /*
d3a4f48d 633 * Now maybe reload the debug registers and handle I/O bitmaps
1da177e4 634 */
eee3af4a
MM
635 if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
636 task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
d3a4f48d 637 __switch_to_xtra(prev_p, next_p, tss);
1da177e4 638
e07e23e1
AV
639 /* If the task has used fpu the last 5 timeslices, just do a full
640 * restore of the math state immediately to avoid the trap; the
641 * chances of needing FPU soon are obviously high now
870568b3
SS
642 *
643 * tsk_used_math() checks prevent calling math_state_restore(),
644 * which can sleep in the case of !tsk_used_math()
e07e23e1 645 */
870568b3 646 if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
e07e23e1 647 math_state_restore();
1da177e4
LT
648 return prev_p;
649}
650
651/*
652 * sys_execve() executes a new program.
653 */
6612538c 654asmlinkage
1da177e4 655long sys_execve(char __user *name, char __user * __user *argv,
5d119b2c 656 char __user * __user *envp, struct pt_regs *regs)
1da177e4
LT
657{
658 long error;
7de08b4e 659 char *filename;
1da177e4
LT
660
661 filename = getname(name);
662 error = PTR_ERR(filename);
5d119b2c 663 if (IS_ERR(filename))
1da177e4 664 return error;
5d119b2c 665 error = do_execve(filename, argv, envp, regs);
1da177e4
LT
666 putname(filename);
667 return error;
668}
669
670void set_personality_64bit(void)
671{
672 /* inherit personality from parent */
673
674 /* Make sure to be in 64bit mode */
6612538c 675 clear_thread_flag(TIF_IA32);
1da177e4
LT
676
677 /* TBD: overwrites user setup. Should have two bits.
678 But 64bit processes have always behaved this way,
679 so it's not too bad. The main problem is just that
6612538c 680 32bit childs are affected again. */
1da177e4
LT
681 current->personality &= ~READ_IMPLIES_EXEC;
682}
683
684asmlinkage long sys_fork(struct pt_regs *regs)
685{
65ea5b03 686 return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
1da177e4
LT
687}
688
a88cde13
AK
689asmlinkage long
690sys_clone(unsigned long clone_flags, unsigned long newsp,
691 void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
1da177e4
LT
692{
693 if (!newsp)
65ea5b03 694 newsp = regs->sp;
1da177e4
LT
695 return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
696}
697
698/*
699 * This is trivial, and on the face of it looks like it
700 * could equally well be done in user mode.
701 *
702 * Not so, for quite unobvious reasons - register pressure.
703 * In user mode vfork() cannot have a stack frame, and if
704 * done by calling the "clone()" system call directly, you
705 * do not have enough call-clobbered registers to hold all
706 * the information you need.
707 */
708asmlinkage long sys_vfork(struct pt_regs *regs)
709{
65ea5b03 710 return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0,
1da177e4
LT
711 NULL, NULL);
712}
713
714unsigned long get_wchan(struct task_struct *p)
715{
716 unsigned long stack;
7de08b4e 717 u64 fp, ip;
1da177e4
LT
718 int count = 0;
719
7de08b4e
GP
720 if (!p || p == current || p->state == TASK_RUNNING)
721 return 0;
57eafdc2 722 stack = (unsigned long)task_stack_page(p);
e1e23bb0 723 if (p->thread.sp < stack || p->thread.sp >= stack+THREAD_SIZE)
1da177e4 724 return 0;
faca6227 725 fp = *(u64 *)(p->thread.sp);
7de08b4e 726 do {
a88cde13 727 if (fp < (unsigned long)stack ||
e1e23bb0 728 fp >= (unsigned long)stack+THREAD_SIZE)
7de08b4e 729 return 0;
65ea5b03
PA
730 ip = *(u64 *)(fp+8);
731 if (!in_sched_functions(ip))
732 return ip;
7de08b4e
GP
733 fp = *(u64 *)fp;
734 } while (count++ < 16);
1da177e4
LT
735 return 0;
736}
737
738long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
7de08b4e
GP
739{
740 int ret = 0;
1da177e4
LT
741 int doit = task == current;
742 int cpu;
743
7de08b4e 744 switch (code) {
1da177e4 745 case ARCH_SET_GS:
84929801 746 if (addr >= TASK_SIZE_OF(task))
7de08b4e 747 return -EPERM;
1da177e4 748 cpu = get_cpu();
7de08b4e 749 /* handle small bases via the GDT because that's faster to
1da177e4 750 switch. */
7de08b4e
GP
751 if (addr <= 0xffffffff) {
752 set_32bit_tls(task, GS_TLS, addr);
753 if (doit) {
1da177e4 754 load_TLS(&task->thread, cpu);
7de08b4e 755 load_gs_index(GS_TLS_SEL);
1da177e4 756 }
7de08b4e 757 task->thread.gsindex = GS_TLS_SEL;
1da177e4 758 task->thread.gs = 0;
7de08b4e 759 } else {
1da177e4
LT
760 task->thread.gsindex = 0;
761 task->thread.gs = addr;
762 if (doit) {
a88cde13
AK
763 load_gs_index(0);
764 ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
7de08b4e 765 }
1da177e4
LT
766 }
767 put_cpu();
768 break;
769 case ARCH_SET_FS:
770 /* Not strictly needed for fs, but do it for symmetry
771 with gs */
84929801 772 if (addr >= TASK_SIZE_OF(task))
6612538c 773 return -EPERM;
1da177e4 774 cpu = get_cpu();
6612538c 775 /* handle small bases via the GDT because that's faster to
1da177e4 776 switch. */
6612538c 777 if (addr <= 0xffffffff) {
1da177e4 778 set_32bit_tls(task, FS_TLS, addr);
6612538c
HS
779 if (doit) {
780 load_TLS(&task->thread, cpu);
ada85708 781 loadsegment(fs, FS_TLS_SEL);
1da177e4
LT
782 }
783 task->thread.fsindex = FS_TLS_SEL;
784 task->thread.fs = 0;
6612538c 785 } else {
1da177e4
LT
786 task->thread.fsindex = 0;
787 task->thread.fs = addr;
788 if (doit) {
789 /* set the selector to 0 to not confuse
790 __switch_to */
ada85708 791 loadsegment(fs, 0);
a88cde13 792 ret = checking_wrmsrl(MSR_FS_BASE, addr);
1da177e4
LT
793 }
794 }
795 put_cpu();
796 break;
6612538c
HS
797 case ARCH_GET_FS: {
798 unsigned long base;
1da177e4
LT
799 if (task->thread.fsindex == FS_TLS_SEL)
800 base = read_32bit_tls(task, FS_TLS);
a88cde13 801 else if (doit)
1da177e4 802 rdmsrl(MSR_FS_BASE, base);
a88cde13 803 else
1da177e4 804 base = task->thread.fs;
6612538c
HS
805 ret = put_user(base, (unsigned long __user *)addr);
806 break;
1da177e4 807 }
6612538c 808 case ARCH_GET_GS: {
1da177e4 809 unsigned long base;
97c2803c 810 unsigned gsindex;
1da177e4
LT
811 if (task->thread.gsindex == GS_TLS_SEL)
812 base = read_32bit_tls(task, GS_TLS);
97c2803c 813 else if (doit) {
ada85708 814 savesegment(gs, gsindex);
97c2803c
JB
815 if (gsindex)
816 rdmsrl(MSR_KERNEL_GS_BASE, base);
817 else
818 base = task->thread.gs;
7de08b4e 819 } else
1da177e4 820 base = task->thread.gs;
6612538c 821 ret = put_user(base, (unsigned long __user *)addr);
1da177e4
LT
822 break;
823 }
824
825 default:
826 ret = -EINVAL;
827 break;
6612538c 828 }
1da177e4 829
6612538c
HS
830 return ret;
831}
1da177e4
LT
832
833long sys_arch_prctl(int code, unsigned long addr)
834{
835 return do_arch_prctl(current, code, addr);
1da177e4
LT
836}
837
838unsigned long arch_align_stack(unsigned long sp)
839{
c16b63e0 840 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
1da177e4
LT
841 sp -= get_random_int() % 8192;
842 return sp & ~0xf;
843}
c1d171a0
JK
844
845unsigned long arch_randomize_brk(struct mm_struct *mm)
846{
847 unsigned long range_end = mm->brk + 0x02000000;
848 return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
849}