[PATCH] x86-64: Fix vgetcpu when CONFIG_HOTPLUG_CPU is disabled
[linux-2.6-block.git] / arch / x86_64 / kernel / process.c
CommitLineData
1da177e4
LT
1/*
2 * linux/arch/x86-64/kernel/process.c
3 *
4 * Copyright (C) 1995 Linus Torvalds
5 *
6 * Pentium III FXSR, SSE support
7 * Gareth Hughes <gareth@valinux.com>, May 2000
8 *
9 * X86-64 port
10 * Andi Kleen.
76e4f660
AR
11 *
12 * CPU hotplug support - ashok.raj@intel.com
1da177e4
LT
13 */
14
15/*
16 * This file handles the architecture-dependent parts of process handling..
17 */
18
19#include <stdarg.h>
20
76e4f660 21#include <linux/cpu.h>
1da177e4
LT
22#include <linux/errno.h>
23#include <linux/sched.h>
24#include <linux/kernel.h>
25#include <linux/mm.h>
26#include <linux/elfcore.h>
27#include <linux/smp.h>
28#include <linux/slab.h>
29#include <linux/user.h>
30#include <linux/module.h>
31#include <linux/a.out.h>
32#include <linux/interrupt.h>
33#include <linux/delay.h>
1da177e4
LT
34#include <linux/ptrace.h>
35#include <linux/utsname.h>
36#include <linux/random.h>
95833c83 37#include <linux/notifier.h>
c6fd91f0 38#include <linux/kprobes.h>
1da177e4
LT
39
40#include <asm/uaccess.h>
41#include <asm/pgtable.h>
42#include <asm/system.h>
43#include <asm/io.h>
44#include <asm/processor.h>
45#include <asm/i387.h>
46#include <asm/mmu_context.h>
47#include <asm/pda.h>
48#include <asm/prctl.h>
49#include <asm/kdebug.h>
50#include <asm/desc.h>
51#include <asm/proto.h>
52#include <asm/ia32.h>
95833c83 53#include <asm/idle.h>
1da177e4
LT
54
55asmlinkage extern void ret_from_fork(void);
56
57unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
58
1da177e4
LT
59unsigned long boot_option_idle_override = 0;
60EXPORT_SYMBOL(boot_option_idle_override);
61
62/*
63 * Powermanagement idle function, if any..
64 */
65void (*pm_idle)(void);
2ee60e17 66EXPORT_SYMBOL(pm_idle);
1da177e4
LT
67static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
68
e041c683 69static ATOMIC_NOTIFIER_HEAD(idle_notifier);
95833c83
AK
70
71void idle_notifier_register(struct notifier_block *n)
72{
e041c683 73 atomic_notifier_chain_register(&idle_notifier, n);
95833c83
AK
74}
75EXPORT_SYMBOL_GPL(idle_notifier_register);
76
77void idle_notifier_unregister(struct notifier_block *n)
78{
e041c683 79 atomic_notifier_chain_unregister(&idle_notifier, n);
95833c83
AK
80}
81EXPORT_SYMBOL(idle_notifier_unregister);
82
95833c83
AK
83void enter_idle(void)
84{
a15da49d 85 write_pda(isidle, 1);
e041c683 86 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
95833c83
AK
87}
88
89static void __exit_idle(void)
90{
a15da49d
AK
91 if (read_pda(isidle) == 0)
92 return;
93 write_pda(isidle, 0);
e041c683 94 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
95833c83
AK
95}
96
97/* Called from interrupts to signify idle end */
98void exit_idle(void)
99{
a15da49d
AK
100 /* idle loop has pid 0 */
101 if (current->pid)
95833c83
AK
102 return;
103 __exit_idle();
104}
105
1da177e4
LT
106/*
107 * We use this if we don't have any better
108 * idle routine..
109 */
cdb04527 110static void default_idle(void)
1da177e4 111{
64c7c8f8
NP
112 local_irq_enable();
113
495ab9c0 114 current_thread_info()->status &= ~TS_POLLING;
2d52ede9
AK
115 smp_mb__after_clear_bit();
116 while (!need_resched()) {
117 local_irq_disable();
118 if (!need_resched())
119 safe_halt();
120 else
121 local_irq_enable();
1da177e4 122 }
495ab9c0 123 current_thread_info()->status |= TS_POLLING;
1da177e4
LT
124}
125
126/*
127 * On SMP it's slightly faster (but much more power-consuming!)
128 * to poll the ->need_resched flag instead of waiting for the
129 * cross-CPU IPI to arrive. Use this option with caution.
130 */
131static void poll_idle (void)
132{
1da177e4
LT
133 local_irq_enable();
134
64c7c8f8
NP
135 asm volatile(
136 "2:"
137 "testl %0,%1;"
138 "rep; nop;"
139 "je 2b;"
140 : :
141 "i" (_TIF_NEED_RESCHED),
142 "m" (current_thread_info()->flags));
1da177e4
LT
143}
144
145void cpu_idle_wait(void)
146{
147 unsigned int cpu, this_cpu = get_cpu();
148 cpumask_t map;
149
150 set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
151 put_cpu();
152
153 cpus_clear(map);
154 for_each_online_cpu(cpu) {
155 per_cpu(cpu_idle_state, cpu) = 1;
156 cpu_set(cpu, map);
157 }
158
159 __get_cpu_var(cpu_idle_state) = 0;
160
161 wmb();
162 do {
163 ssleep(1);
164 for_each_online_cpu(cpu) {
a88cde13
AK
165 if (cpu_isset(cpu, map) &&
166 !per_cpu(cpu_idle_state, cpu))
1da177e4
LT
167 cpu_clear(cpu, map);
168 }
169 cpus_and(map, map, cpu_online_map);
170 } while (!cpus_empty(map));
171}
172EXPORT_SYMBOL_GPL(cpu_idle_wait);
173
76e4f660
AR
174#ifdef CONFIG_HOTPLUG_CPU
175DECLARE_PER_CPU(int, cpu_state);
176
177#include <asm/nmi.h>
1fa744e6 178/* We halt the CPU with physical CPU hotplug */
76e4f660
AR
179static inline void play_dead(void)
180{
181 idle_task_exit();
182 wbinvd();
183 mb();
184 /* Ack it */
185 __get_cpu_var(cpu_state) = CPU_DEAD;
186
1fa744e6 187 local_irq_disable();
76e4f660 188 while (1)
1fa744e6 189 halt();
76e4f660
AR
190}
191#else
192static inline void play_dead(void)
193{
194 BUG();
195}
196#endif /* CONFIG_HOTPLUG_CPU */
197
1da177e4
LT
198/*
199 * The idle thread. There's no useful work to be
200 * done, so just try to conserve power and have a
201 * low exit latency (ie sit in a loop waiting for
202 * somebody to say that they'd like to reschedule)
203 */
204void cpu_idle (void)
205{
495ab9c0 206 current_thread_info()->status |= TS_POLLING;
1da177e4
LT
207 /* endless idle loop with no priority at all */
208 while (1) {
209 while (!need_resched()) {
210 void (*idle)(void);
211
212 if (__get_cpu_var(cpu_idle_state))
213 __get_cpu_var(cpu_idle_state) = 0;
214
215 rmb();
216 idle = pm_idle;
217 if (!idle)
218 idle = default_idle;
76e4f660
AR
219 if (cpu_is_offline(smp_processor_id()))
220 play_dead();
95833c83 221 enter_idle();
1da177e4 222 idle();
a15da49d
AK
223 /* In many cases the interrupt that ended idle
224 has already called exit_idle. But some idle
225 loops can be woken up without interrupt. */
95833c83 226 __exit_idle();
1da177e4
LT
227 }
228
5bfb5d69 229 preempt_enable_no_resched();
1da177e4 230 schedule();
5bfb5d69 231 preempt_disable();
1da177e4
LT
232 }
233}
234
235/*
236 * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
237 * which can obviate IPI to trigger checking of need_resched.
238 * We execute MONITOR against need_resched and enter optimized wait state
239 * through MWAIT. Whenever someone changes need_resched, we would be woken
240 * up from MWAIT (without an IPI).
991528d7
VP
241 *
242 * New with Core Duo processors, MWAIT can take some hints based on CPU
243 * capability.
1da177e4 244 */
991528d7 245void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
1da177e4 246{
991528d7 247 if (!need_resched()) {
64c7c8f8
NP
248 __monitor((void *)&current_thread_info()->flags, 0, 0);
249 smp_mb();
991528d7
VP
250 if (!need_resched())
251 __mwait(eax, ecx);
1da177e4
LT
252 }
253}
254
991528d7
VP
255/* Default MONITOR/MWAIT with no hints, used for default C1 state */
256static void mwait_idle(void)
257{
258 local_irq_enable();
259 while (!need_resched())
260 mwait_idle_with_hints(0,0);
261}
262
e6982c67 263void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
1da177e4
LT
264{
265 static int printed;
266 if (cpu_has(c, X86_FEATURE_MWAIT)) {
267 /*
268 * Skip, if setup has overridden idle.
269 * One CPU supports mwait => All CPUs supports mwait
270 */
271 if (!pm_idle) {
272 if (!printed) {
273 printk("using mwait in idle threads.\n");
274 printed = 1;
275 }
276 pm_idle = mwait_idle;
277 }
278 }
279}
280
281static int __init idle_setup (char *str)
282{
283 if (!strncmp(str, "poll", 4)) {
284 printk("using polling idle threads.\n");
285 pm_idle = poll_idle;
286 }
287
288 boot_option_idle_override = 1;
289 return 1;
290}
291
292__setup("idle=", idle_setup);
293
294/* Prints also some state that isn't saved in the pt_regs */
295void __show_regs(struct pt_regs * regs)
296{
297 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
298 unsigned int fsindex,gsindex;
299 unsigned int ds,cs,es;
300
301 printk("\n");
302 print_modules();
9acf23c4
AK
303 printk("Pid: %d, comm: %.20s %s %s %.*s\n",
304 current->pid, current->comm, print_tainted(),
96b644bd
SH
305 init_utsname()->release,
306 (int)strcspn(init_utsname()->version, " "),
307 init_utsname()->version);
1da177e4
LT
308 printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip);
309 printk_address(regs->rip);
3ac94932 310 printk("RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp,
a88cde13 311 regs->eflags);
1da177e4
LT
312 printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
313 regs->rax, regs->rbx, regs->rcx);
314 printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
315 regs->rdx, regs->rsi, regs->rdi);
316 printk("RBP: %016lx R08: %016lx R09: %016lx\n",
317 regs->rbp, regs->r8, regs->r9);
318 printk("R10: %016lx R11: %016lx R12: %016lx\n",
319 regs->r10, regs->r11, regs->r12);
320 printk("R13: %016lx R14: %016lx R15: %016lx\n",
321 regs->r13, regs->r14, regs->r15);
322
323 asm("movl %%ds,%0" : "=r" (ds));
324 asm("movl %%cs,%0" : "=r" (cs));
325 asm("movl %%es,%0" : "=r" (es));
326 asm("movl %%fs,%0" : "=r" (fsindex));
327 asm("movl %%gs,%0" : "=r" (gsindex));
328
329 rdmsrl(MSR_FS_BASE, fs);
330 rdmsrl(MSR_GS_BASE, gs);
331 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
332
333 asm("movq %%cr0, %0": "=r" (cr0));
334 asm("movq %%cr2, %0": "=r" (cr2));
335 asm("movq %%cr3, %0": "=r" (cr3));
336 asm("movq %%cr4, %0": "=r" (cr4));
337
338 printk("FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
339 fs,fsindex,gs,gsindex,shadowgs);
340 printk("CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0);
341 printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4);
342}
343
344void show_regs(struct pt_regs *regs)
345{
c078d326 346 printk("CPU %d:", smp_processor_id());
1da177e4 347 __show_regs(regs);
b538ed27 348 show_trace(NULL, regs, (void *)(regs + 1));
1da177e4
LT
349}
350
351/*
352 * Free current thread data structures etc..
353 */
354void exit_thread(void)
355{
356 struct task_struct *me = current;
357 struct thread_struct *t = &me->thread;
73649dab 358
1da177e4
LT
359 if (me->thread.io_bitmap_ptr) {
360 struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
361
362 kfree(t->io_bitmap_ptr);
363 t->io_bitmap_ptr = NULL;
d3a4f48d 364 clear_thread_flag(TIF_IO_BITMAP);
1da177e4
LT
365 /*
366 * Careful, clear this in the TSS too:
367 */
368 memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
369 t->io_bitmap_max = 0;
370 put_cpu();
371 }
372}
373
374void flush_thread(void)
375{
376 struct task_struct *tsk = current;
377 struct thread_info *t = current_thread_info();
378
4d9bc79c 379 if (t->flags & _TIF_ABI_PENDING) {
1da177e4 380 t->flags ^= (_TIF_ABI_PENDING | _TIF_IA32);
4d9bc79c
AK
381 if (t->flags & _TIF_IA32)
382 current_thread_info()->status |= TS_COMPAT;
383 }
d3a4f48d 384 t->flags &= ~_TIF_DEBUG;
1da177e4
LT
385
386 tsk->thread.debugreg0 = 0;
387 tsk->thread.debugreg1 = 0;
388 tsk->thread.debugreg2 = 0;
389 tsk->thread.debugreg3 = 0;
390 tsk->thread.debugreg6 = 0;
391 tsk->thread.debugreg7 = 0;
392 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
393 /*
394 * Forget coprocessor state..
395 */
396 clear_fpu(tsk);
397 clear_used_math();
398}
399
400void release_thread(struct task_struct *dead_task)
401{
402 if (dead_task->mm) {
403 if (dead_task->mm->context.size) {
404 printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
405 dead_task->comm,
406 dead_task->mm->context.ldt,
407 dead_task->mm->context.size);
408 BUG();
409 }
410 }
411}
412
413static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
414{
415 struct user_desc ud = {
416 .base_addr = addr,
417 .limit = 0xfffff,
418 .seg_32bit = 1,
419 .limit_in_pages = 1,
420 .useable = 1,
421 };
422 struct n_desc_struct *desc = (void *)t->thread.tls_array;
423 desc += tls;
424 desc->a = LDT_entry_a(&ud);
425 desc->b = LDT_entry_b(&ud);
426}
427
428static inline u32 read_32bit_tls(struct task_struct *t, int tls)
429{
430 struct desc_struct *desc = (void *)t->thread.tls_array;
431 desc += tls;
432 return desc->base0 |
433 (((u32)desc->base1) << 16) |
434 (((u32)desc->base2) << 24);
435}
436
437/*
438 * This gets called before we allocate a new thread and copy
439 * the current task into it.
440 */
441void prepare_to_copy(struct task_struct *tsk)
442{
443 unlazy_fpu(tsk);
444}
445
446int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp,
447 unsigned long unused,
448 struct task_struct * p, struct pt_regs * regs)
449{
450 int err;
451 struct pt_regs * childregs;
452 struct task_struct *me = current;
453
a88cde13 454 childregs = ((struct pt_regs *)
57eafdc2 455 (THREAD_SIZE + task_stack_page(p))) - 1;
1da177e4
LT
456 *childregs = *regs;
457
458 childregs->rax = 0;
459 childregs->rsp = rsp;
a88cde13 460 if (rsp == ~0UL)
1da177e4 461 childregs->rsp = (unsigned long)childregs;
1da177e4
LT
462
463 p->thread.rsp = (unsigned long) childregs;
464 p->thread.rsp0 = (unsigned long) (childregs+1);
465 p->thread.userrsp = me->thread.userrsp;
466
e4f17c43 467 set_tsk_thread_flag(p, TIF_FORK);
1da177e4
LT
468
469 p->thread.fs = me->thread.fs;
470 p->thread.gs = me->thread.gs;
471
fd51f666
L
472 asm("mov %%gs,%0" : "=m" (p->thread.gsindex));
473 asm("mov %%fs,%0" : "=m" (p->thread.fsindex));
474 asm("mov %%es,%0" : "=m" (p->thread.es));
475 asm("mov %%ds,%0" : "=m" (p->thread.ds));
1da177e4 476
d3a4f48d 477 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
1da177e4
LT
478 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
479 if (!p->thread.io_bitmap_ptr) {
480 p->thread.io_bitmap_max = 0;
481 return -ENOMEM;
482 }
a88cde13
AK
483 memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
484 IO_BITMAP_BYTES);
d3a4f48d 485 set_tsk_thread_flag(p, TIF_IO_BITMAP);
1da177e4
LT
486 }
487
488 /*
489 * Set a new TLS for the child thread?
490 */
491 if (clone_flags & CLONE_SETTLS) {
492#ifdef CONFIG_IA32_EMULATION
493 if (test_thread_flag(TIF_IA32))
494 err = ia32_child_tls(p, childregs);
495 else
496#endif
497 err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
498 if (err)
499 goto out;
500 }
501 err = 0;
502out:
503 if (err && p->thread.io_bitmap_ptr) {
504 kfree(p->thread.io_bitmap_ptr);
505 p->thread.io_bitmap_max = 0;
506 }
507 return err;
508}
509
510/*
511 * This special macro can be used to load a debugging register
512 */
2b514e74 513#define loaddebug(thread,r) set_debugreg(thread->debugreg ## r, r)
1da177e4 514
d3a4f48d
SE
515static inline void __switch_to_xtra(struct task_struct *prev_p,
516 struct task_struct *next_p,
517 struct tss_struct *tss)
518{
519 struct thread_struct *prev, *next;
520
521 prev = &prev_p->thread,
522 next = &next_p->thread;
523
524 if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
525 loaddebug(next, 0);
526 loaddebug(next, 1);
527 loaddebug(next, 2);
528 loaddebug(next, 3);
529 /* no 4 and 5 */
530 loaddebug(next, 6);
531 loaddebug(next, 7);
532 }
533
534 if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
535 /*
536 * Copy the relevant range of the IO bitmap.
537 * Normally this is 128 bytes or less:
538 */
539 memcpy(tss->io_bitmap, next->io_bitmap_ptr,
540 max(prev->io_bitmap_max, next->io_bitmap_max));
541 } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
542 /*
543 * Clear any possible leftover bits:
544 */
545 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
546 }
547}
548
1da177e4
LT
549/*
550 * switch_to(x,y) should switch tasks from x to y.
551 *
552 * This could still be optimized:
553 * - fold all the options into a flag word and test it with a single test.
554 * - could test fs/gs bitsliced
099f318b
AK
555 *
556 * Kprobes not supported here. Set the probe on schedule instead.
1da177e4 557 */
099f318b 558__kprobes struct task_struct *
a88cde13 559__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
1da177e4
LT
560{
561 struct thread_struct *prev = &prev_p->thread,
562 *next = &next_p->thread;
563 int cpu = smp_processor_id();
564 struct tss_struct *tss = &per_cpu(init_tss, cpu);
565
e07e23e1
AV
566 /* we're going to use this soon, after a few expensive things */
567 if (next_p->fpu_counter>5)
568 prefetch(&next->i387.fxsave);
569
1da177e4
LT
570 /*
571 * Reload esp0, LDT and the page table pointer:
572 */
573 tss->rsp0 = next->rsp0;
574
575 /*
576 * Switch DS and ES.
577 * This won't pick up thread selector changes, but I guess that is ok.
578 */
fd51f666 579 asm volatile("mov %%es,%0" : "=m" (prev->es));
1da177e4
LT
580 if (unlikely(next->es | prev->es))
581 loadsegment(es, next->es);
582
fd51f666 583 asm volatile ("mov %%ds,%0" : "=m" (prev->ds));
1da177e4
LT
584 if (unlikely(next->ds | prev->ds))
585 loadsegment(ds, next->ds);
586
587 load_TLS(next, cpu);
588
589 /*
590 * Switch FS and GS.
591 */
592 {
593 unsigned fsindex;
594 asm volatile("movl %%fs,%0" : "=r" (fsindex));
595 /* segment register != 0 always requires a reload.
596 also reload when it has changed.
597 when prev process used 64bit base always reload
598 to avoid an information leak. */
599 if (unlikely(fsindex | next->fsindex | prev->fs)) {
600 loadsegment(fs, next->fsindex);
601 /* check if the user used a selector != 0
602 * if yes clear 64bit base, since overloaded base
603 * is always mapped to the Null selector
604 */
605 if (fsindex)
606 prev->fs = 0;
607 }
608 /* when next process has a 64bit base use it */
609 if (next->fs)
610 wrmsrl(MSR_FS_BASE, next->fs);
611 prev->fsindex = fsindex;
612 }
613 {
614 unsigned gsindex;
615 asm volatile("movl %%gs,%0" : "=r" (gsindex));
616 if (unlikely(gsindex | next->gsindex | prev->gs)) {
617 load_gs_index(next->gsindex);
618 if (gsindex)
619 prev->gs = 0;
620 }
621 if (next->gs)
622 wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
623 prev->gsindex = gsindex;
624 }
625
0a5ace2a
AK
626 /* Must be after DS reload */
627 unlazy_fpu(prev_p);
628
1da177e4 629 /*
45948d77 630 * Switch the PDA and FPU contexts.
1da177e4
LT
631 */
632 prev->userrsp = read_pda(oldrsp);
633 write_pda(oldrsp, next->userrsp);
634 write_pda(pcurrent, next_p);
18bd057b 635
a88cde13 636 write_pda(kernelstack,
7b0bda74 637 (unsigned long)task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET);
0a425405
AV
638#ifdef CONFIG_CC_STACKPROTECTOR
639 write_pda(stack_canary, next_p->stack_canary);
640 /*
641 * Build time only check to make sure the stack_canary is at
642 * offset 40 in the pda; this is a gcc ABI requirement
643 */
644 BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
645#endif
1da177e4
LT
646
647 /*
d3a4f48d 648 * Now maybe reload the debug registers and handle I/O bitmaps
1da177e4 649 */
d3a4f48d
SE
650 if (unlikely((task_thread_info(next_p)->flags & _TIF_WORK_CTXSW))
651 || test_tsk_thread_flag(prev_p, TIF_IO_BITMAP))
652 __switch_to_xtra(prev_p, next_p, tss);
1da177e4 653
e07e23e1
AV
654 /* If the task has used fpu the last 5 timeslices, just do a full
655 * restore of the math state immediately to avoid the trap; the
656 * chances of needing FPU soon are obviously high now
657 */
658 if (next_p->fpu_counter>5)
659 math_state_restore();
1da177e4
LT
660 return prev_p;
661}
662
663/*
664 * sys_execve() executes a new program.
665 */
666asmlinkage
667long sys_execve(char __user *name, char __user * __user *argv,
668 char __user * __user *envp, struct pt_regs regs)
669{
670 long error;
671 char * filename;
672
673 filename = getname(name);
674 error = PTR_ERR(filename);
675 if (IS_ERR(filename))
676 return error;
677 error = do_execve(filename, argv, envp, &regs);
678 if (error == 0) {
679 task_lock(current);
680 current->ptrace &= ~PT_DTRACE;
681 task_unlock(current);
682 }
683 putname(filename);
684 return error;
685}
686
687void set_personality_64bit(void)
688{
689 /* inherit personality from parent */
690
691 /* Make sure to be in 64bit mode */
692 clear_thread_flag(TIF_IA32);
693
694 /* TBD: overwrites user setup. Should have two bits.
695 But 64bit processes have always behaved this way,
696 so it's not too bad. The main problem is just that
697 32bit childs are affected again. */
698 current->personality &= ~READ_IMPLIES_EXEC;
699}
700
701asmlinkage long sys_fork(struct pt_regs *regs)
702{
703 return do_fork(SIGCHLD, regs->rsp, regs, 0, NULL, NULL);
704}
705
a88cde13
AK
706asmlinkage long
707sys_clone(unsigned long clone_flags, unsigned long newsp,
708 void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
1da177e4
LT
709{
710 if (!newsp)
711 newsp = regs->rsp;
712 return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
713}
714
715/*
716 * This is trivial, and on the face of it looks like it
717 * could equally well be done in user mode.
718 *
719 * Not so, for quite unobvious reasons - register pressure.
720 * In user mode vfork() cannot have a stack frame, and if
721 * done by calling the "clone()" system call directly, you
722 * do not have enough call-clobbered registers to hold all
723 * the information you need.
724 */
725asmlinkage long sys_vfork(struct pt_regs *regs)
726{
727 return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->rsp, regs, 0,
728 NULL, NULL);
729}
730
731unsigned long get_wchan(struct task_struct *p)
732{
733 unsigned long stack;
734 u64 fp,rip;
735 int count = 0;
736
737 if (!p || p == current || p->state==TASK_RUNNING)
738 return 0;
57eafdc2 739 stack = (unsigned long)task_stack_page(p);
1da177e4
LT
740 if (p->thread.rsp < stack || p->thread.rsp > stack+THREAD_SIZE)
741 return 0;
742 fp = *(u64 *)(p->thread.rsp);
743 do {
a88cde13
AK
744 if (fp < (unsigned long)stack ||
745 fp > (unsigned long)stack+THREAD_SIZE)
1da177e4
LT
746 return 0;
747 rip = *(u64 *)(fp+8);
748 if (!in_sched_functions(rip))
749 return rip;
750 fp = *(u64 *)fp;
751 } while (count++ < 16);
752 return 0;
753}
754
755long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
756{
757 int ret = 0;
758 int doit = task == current;
759 int cpu;
760
761 switch (code) {
762 case ARCH_SET_GS:
84929801 763 if (addr >= TASK_SIZE_OF(task))
1da177e4
LT
764 return -EPERM;
765 cpu = get_cpu();
766 /* handle small bases via the GDT because that's faster to
767 switch. */
768 if (addr <= 0xffffffff) {
769 set_32bit_tls(task, GS_TLS, addr);
770 if (doit) {
771 load_TLS(&task->thread, cpu);
772 load_gs_index(GS_TLS_SEL);
773 }
774 task->thread.gsindex = GS_TLS_SEL;
775 task->thread.gs = 0;
776 } else {
777 task->thread.gsindex = 0;
778 task->thread.gs = addr;
779 if (doit) {
a88cde13
AK
780 load_gs_index(0);
781 ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
1da177e4
LT
782 }
783 }
784 put_cpu();
785 break;
786 case ARCH_SET_FS:
787 /* Not strictly needed for fs, but do it for symmetry
788 with gs */
84929801 789 if (addr >= TASK_SIZE_OF(task))
1da177e4
LT
790 return -EPERM;
791 cpu = get_cpu();
792 /* handle small bases via the GDT because that's faster to
793 switch. */
794 if (addr <= 0xffffffff) {
795 set_32bit_tls(task, FS_TLS, addr);
796 if (doit) {
797 load_TLS(&task->thread, cpu);
a88cde13 798 asm volatile("movl %0,%%fs" :: "r"(FS_TLS_SEL));
1da177e4
LT
799 }
800 task->thread.fsindex = FS_TLS_SEL;
801 task->thread.fs = 0;
802 } else {
803 task->thread.fsindex = 0;
804 task->thread.fs = addr;
805 if (doit) {
806 /* set the selector to 0 to not confuse
807 __switch_to */
a88cde13
AK
808 asm volatile("movl %0,%%fs" :: "r" (0));
809 ret = checking_wrmsrl(MSR_FS_BASE, addr);
1da177e4
LT
810 }
811 }
812 put_cpu();
813 break;
814 case ARCH_GET_FS: {
815 unsigned long base;
816 if (task->thread.fsindex == FS_TLS_SEL)
817 base = read_32bit_tls(task, FS_TLS);
a88cde13 818 else if (doit)
1da177e4 819 rdmsrl(MSR_FS_BASE, base);
a88cde13 820 else
1da177e4
LT
821 base = task->thread.fs;
822 ret = put_user(base, (unsigned long __user *)addr);
823 break;
824 }
825 case ARCH_GET_GS: {
826 unsigned long base;
97c2803c 827 unsigned gsindex;
1da177e4
LT
828 if (task->thread.gsindex == GS_TLS_SEL)
829 base = read_32bit_tls(task, GS_TLS);
97c2803c
JB
830 else if (doit) {
831 asm("movl %%gs,%0" : "=r" (gsindex));
832 if (gsindex)
833 rdmsrl(MSR_KERNEL_GS_BASE, base);
834 else
835 base = task->thread.gs;
836 }
a88cde13 837 else
1da177e4
LT
838 base = task->thread.gs;
839 ret = put_user(base, (unsigned long __user *)addr);
840 break;
841 }
842
843 default:
844 ret = -EINVAL;
845 break;
846 }
847
848 return ret;
849}
850
851long sys_arch_prctl(int code, unsigned long addr)
852{
853 return do_arch_prctl(current, code, addr);
854}
855
856/*
857 * Capture the user space registers if the task is not running (in user space)
858 */
859int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs)
860{
861 struct pt_regs *pp, ptregs;
862
bb049232 863 pp = task_pt_regs(tsk);
1da177e4
LT
864
865 ptregs = *pp;
866 ptregs.cs &= 0xffff;
867 ptregs.ss &= 0xffff;
868
869 elf_core_copy_regs(regs, &ptregs);
870
871 return 1;
872}
873
874unsigned long arch_align_stack(unsigned long sp)
875{
c16b63e0 876 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
1da177e4
LT
877 sp -= get_random_int() % 8192;
878 return sp & ~0xf;
879}