[PATCH] pi-futex: rt mutex futex api
[linux-2.6-block.git] / kernel / exit.c
CommitLineData
1da177e4
LT
1/*
2 * linux/kernel/exit.c
3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 */
6
7#include <linux/config.h>
8#include <linux/mm.h>
9#include <linux/slab.h>
10#include <linux/interrupt.h>
11#include <linux/smp_lock.h>
12#include <linux/module.h>
c59ede7b 13#include <linux/capability.h>
1da177e4
LT
14#include <linux/completion.h>
15#include <linux/personality.h>
16#include <linux/tty.h>
17#include <linux/namespace.h>
18#include <linux/key.h>
19#include <linux/security.h>
20#include <linux/cpu.h>
21#include <linux/acct.h>
22#include <linux/file.h>
23#include <linux/binfmts.h>
24#include <linux/ptrace.h>
25#include <linux/profile.h>
26#include <linux/mount.h>
27#include <linux/proc_fs.h>
28#include <linux/mempolicy.h>
29#include <linux/cpuset.h>
30#include <linux/syscalls.h>
7ed20e1a 31#include <linux/signal.h>
6a14c5c9 32#include <linux/posix-timers.h>
9f46080c 33#include <linux/cn_proc.h>
de5097c2 34#include <linux/mutex.h>
0771dfef 35#include <linux/futex.h>
34f192c6 36#include <linux/compat.h>
b92ce558 37#include <linux/pipe_fs_i.h>
fa84cb93 38#include <linux/audit.h> /* for audit_free() */
83cc5ed3 39#include <linux/resource.h>
1da177e4
LT
40
41#include <asm/uaccess.h>
42#include <asm/unistd.h>
43#include <asm/pgtable.h>
44#include <asm/mmu_context.h>
45
46extern void sem_exit (void);
47extern struct task_struct *child_reaper;
48
408b664a
AB
49static void exit_mm(struct task_struct * tsk);
50
1da177e4
LT
51static void __unhash_process(struct task_struct *p)
52{
53 nr_threads--;
54 detach_pid(p, PIDTYPE_PID);
1da177e4
LT
55 if (thread_group_leader(p)) {
56 detach_pid(p, PIDTYPE_PGID);
57 detach_pid(p, PIDTYPE_SID);
c97d9893 58
5e85d4ab 59 list_del_rcu(&p->tasks);
73b9ebfe 60 __get_cpu_var(process_counts)--;
1da177e4 61 }
47e65328 62 list_del_rcu(&p->thread_group);
c97d9893 63 remove_parent(p);
1da177e4
LT
64}
65
6a14c5c9
ON
66/*
67 * This function expects the tasklist_lock write-locked.
68 */
69static void __exit_signal(struct task_struct *tsk)
70{
71 struct signal_struct *sig = tsk->signal;
72 struct sighand_struct *sighand;
73
74 BUG_ON(!sig);
75 BUG_ON(!atomic_read(&sig->count));
76
77 rcu_read_lock();
78 sighand = rcu_dereference(tsk->sighand);
79 spin_lock(&sighand->siglock);
80
81 posix_cpu_timers_exit(tsk);
82 if (atomic_dec_and_test(&sig->count))
83 posix_cpu_timers_exit_group(tsk);
84 else {
85 /*
86 * If there is any task waiting for the group exit
87 * then notify it:
88 */
89 if (sig->group_exit_task && atomic_read(&sig->count) == sig->notify_count) {
90 wake_up_process(sig->group_exit_task);
91 sig->group_exit_task = NULL;
92 }
93 if (tsk == sig->curr_target)
94 sig->curr_target = next_thread(tsk);
95 /*
96 * Accumulate here the counters for all threads but the
97 * group leader as they die, so they can be added into
98 * the process-wide totals when those are taken.
99 * The group leader stays around as a zombie as long
100 * as there are other threads. When it gets reaped,
101 * the exit.c code will add its counts into these totals.
102 * We won't ever get here for the group leader, since it
103 * will have been the last reference on the signal_struct.
104 */
105 sig->utime = cputime_add(sig->utime, tsk->utime);
106 sig->stime = cputime_add(sig->stime, tsk->stime);
107 sig->min_flt += tsk->min_flt;
108 sig->maj_flt += tsk->maj_flt;
109 sig->nvcsw += tsk->nvcsw;
110 sig->nivcsw += tsk->nivcsw;
111 sig->sched_time += tsk->sched_time;
112 sig = NULL; /* Marker for below. */
113 }
114
5876700c
ON
115 __unhash_process(tsk);
116
6a14c5c9 117 tsk->signal = NULL;
a7e5328a 118 tsk->sighand = NULL;
6a14c5c9
ON
119 spin_unlock(&sighand->siglock);
120 rcu_read_unlock();
121
a7e5328a 122 __cleanup_sighand(sighand);
6a14c5c9
ON
123 clear_tsk_thread_flag(tsk,TIF_SIGPENDING);
124 flush_sigqueue(&tsk->pending);
125 if (sig) {
126 flush_sigqueue(&sig->shared_pending);
127 __cleanup_signal(sig);
128 }
129}
130
8c7904a0
EB
131static void delayed_put_task_struct(struct rcu_head *rhp)
132{
133 put_task_struct(container_of(rhp, struct task_struct, rcu));
134}
135
1da177e4
LT
136void release_task(struct task_struct * p)
137{
138 int zap_leader;
139 task_t *leader;
1f09f974 140repeat:
1da177e4 141 atomic_dec(&p->user->processes);
1da177e4 142 write_lock_irq(&tasklist_lock);
1f09f974 143 ptrace_unlink(p);
1da177e4
LT
144 BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children));
145 __exit_signal(p);
35f5cad8 146
1da177e4
LT
147 /*
148 * If we are the last non-leader member of the thread
149 * group, and the leader is zombie, then notify the
150 * group leader's parent process. (if it wants notification.)
151 */
152 zap_leader = 0;
153 leader = p->group_leader;
154 if (leader != p && thread_group_empty(leader) && leader->exit_state == EXIT_ZOMBIE) {
155 BUG_ON(leader->exit_signal == -1);
156 do_notify_parent(leader, leader->exit_signal);
157 /*
158 * If we were the last child thread and the leader has
159 * exited already, and the leader's parent ignores SIGCHLD,
160 * then we are the one who should release the leader.
161 *
162 * do_notify_parent() will have marked it self-reaping in
163 * that case.
164 */
165 zap_leader = (leader->exit_signal == -1);
166 }
167
168 sched_exit(p);
169 write_unlock_irq(&tasklist_lock);
48e6484d 170 proc_flush_task(p);
1da177e4 171 release_thread(p);
8c7904a0 172 call_rcu(&p->rcu, delayed_put_task_struct);
1da177e4
LT
173
174 p = leader;
175 if (unlikely(zap_leader))
176 goto repeat;
177}
178
1da177e4
LT
179/*
180 * This checks not only the pgrp, but falls back on the pid if no
181 * satisfactory pgrp is found. I dunno - gdb doesn't work correctly
182 * without this...
183 */
184int session_of_pgrp(int pgrp)
185{
186 struct task_struct *p;
187 int sid = -1;
188
189 read_lock(&tasklist_lock);
190 do_each_task_pid(pgrp, PIDTYPE_PGID, p) {
191 if (p->signal->session > 0) {
192 sid = p->signal->session;
193 goto out;
194 }
195 } while_each_task_pid(pgrp, PIDTYPE_PGID, p);
196 p = find_task_by_pid(pgrp);
197 if (p)
198 sid = p->signal->session;
199out:
200 read_unlock(&tasklist_lock);
201
202 return sid;
203}
204
205/*
206 * Determine if a process group is "orphaned", according to the POSIX
207 * definition in 2.2.2.52. Orphaned process groups are not to be affected
208 * by terminal-generated stop signals. Newly orphaned process groups are
209 * to receive a SIGHUP and a SIGCONT.
210 *
211 * "I ask you, have you ever known what it is to be an orphan?"
212 */
213static int will_become_orphaned_pgrp(int pgrp, task_t *ignored_task)
214{
215 struct task_struct *p;
216 int ret = 1;
217
218 do_each_task_pid(pgrp, PIDTYPE_PGID, p) {
219 if (p == ignored_task
220 || p->exit_state
221 || p->real_parent->pid == 1)
222 continue;
223 if (process_group(p->real_parent) != pgrp
224 && p->real_parent->signal->session == p->signal->session) {
225 ret = 0;
226 break;
227 }
228 } while_each_task_pid(pgrp, PIDTYPE_PGID, p);
229 return ret; /* (sighing) "Often!" */
230}
231
232int is_orphaned_pgrp(int pgrp)
233{
234 int retval;
235
236 read_lock(&tasklist_lock);
237 retval = will_become_orphaned_pgrp(pgrp, NULL);
238 read_unlock(&tasklist_lock);
239
240 return retval;
241}
242
858119e1 243static int has_stopped_jobs(int pgrp)
1da177e4
LT
244{
245 int retval = 0;
246 struct task_struct *p;
247
248 do_each_task_pid(pgrp, PIDTYPE_PGID, p) {
249 if (p->state != TASK_STOPPED)
250 continue;
251
252 /* If p is stopped by a debugger on a signal that won't
253 stop it, then don't count p as stopped. This isn't
254 perfect but it's a good approximation. */
255 if (unlikely (p->ptrace)
256 && p->exit_code != SIGSTOP
257 && p->exit_code != SIGTSTP
258 && p->exit_code != SIGTTOU
259 && p->exit_code != SIGTTIN)
260 continue;
261
262 retval = 1;
263 break;
264 } while_each_task_pid(pgrp, PIDTYPE_PGID, p);
265 return retval;
266}
267
268/**
4dc3b16b 269 * reparent_to_init - Reparent the calling kernel thread to the init task.
1da177e4
LT
270 *
271 * If a kernel thread is launched as a result of a system call, or if
272 * it ever exits, it should generally reparent itself to init so that
273 * it is correctly cleaned up on exit.
274 *
275 * The various task state such as scheduling policy and priority may have
276 * been inherited from a user process, so we reset them to sane values here.
277 *
278 * NOTE that reparent_to_init() gives the caller full capabilities.
279 */
858119e1 280static void reparent_to_init(void)
1da177e4
LT
281{
282 write_lock_irq(&tasklist_lock);
283
284 ptrace_unlink(current);
285 /* Reparent to init */
9b678ece 286 remove_parent(current);
1da177e4
LT
287 current->parent = child_reaper;
288 current->real_parent = child_reaper;
9b678ece 289 add_parent(current);
1da177e4
LT
290
291 /* Set the exit signal to SIGCHLD so we signal init on exit */
292 current->exit_signal = SIGCHLD;
293
b0a9499c
IM
294 if ((current->policy == SCHED_NORMAL ||
295 current->policy == SCHED_BATCH)
296 && (task_nice(current) < 0))
1da177e4
LT
297 set_user_nice(current, 0);
298 /* cpus_allowed? */
299 /* rt_priority? */
300 /* signals? */
301 security_task_reparent_to_init(current);
302 memcpy(current->signal->rlim, init_task.signal->rlim,
303 sizeof(current->signal->rlim));
304 atomic_inc(&(INIT_USER->__count));
305 write_unlock_irq(&tasklist_lock);
306 switch_uid(INIT_USER);
307}
308
309void __set_special_pids(pid_t session, pid_t pgrp)
310{
e19f247a 311 struct task_struct *curr = current->group_leader;
1da177e4
LT
312
313 if (curr->signal->session != session) {
314 detach_pid(curr, PIDTYPE_SID);
315 curr->signal->session = session;
316 attach_pid(curr, PIDTYPE_SID, session);
317 }
318 if (process_group(curr) != pgrp) {
319 detach_pid(curr, PIDTYPE_PGID);
320 curr->signal->pgrp = pgrp;
321 attach_pid(curr, PIDTYPE_PGID, pgrp);
322 }
323}
324
325void set_special_pids(pid_t session, pid_t pgrp)
326{
327 write_lock_irq(&tasklist_lock);
328 __set_special_pids(session, pgrp);
329 write_unlock_irq(&tasklist_lock);
330}
331
332/*
333 * Let kernel threads use this to say that they
334 * allow a certain signal (since daemonize() will
335 * have disabled all of them by default).
336 */
337int allow_signal(int sig)
338{
7ed20e1a 339 if (!valid_signal(sig) || sig < 1)
1da177e4
LT
340 return -EINVAL;
341
342 spin_lock_irq(&current->sighand->siglock);
343 sigdelset(&current->blocked, sig);
344 if (!current->mm) {
345 /* Kernel threads handle their own signals.
346 Let the signal code know it'll be handled, so
347 that they don't get converted to SIGKILL or
348 just silently dropped */
349 current->sighand->action[(sig)-1].sa.sa_handler = (void __user *)2;
350 }
351 recalc_sigpending();
352 spin_unlock_irq(&current->sighand->siglock);
353 return 0;
354}
355
356EXPORT_SYMBOL(allow_signal);
357
358int disallow_signal(int sig)
359{
7ed20e1a 360 if (!valid_signal(sig) || sig < 1)
1da177e4
LT
361 return -EINVAL;
362
363 spin_lock_irq(&current->sighand->siglock);
364 sigaddset(&current->blocked, sig);
365 recalc_sigpending();
366 spin_unlock_irq(&current->sighand->siglock);
367 return 0;
368}
369
370EXPORT_SYMBOL(disallow_signal);
371
372/*
373 * Put all the gunge required to become a kernel thread without
374 * attached user resources in one place where it belongs.
375 */
376
377void daemonize(const char *name, ...)
378{
379 va_list args;
380 struct fs_struct *fs;
381 sigset_t blocked;
382
383 va_start(args, name);
384 vsnprintf(current->comm, sizeof(current->comm), name, args);
385 va_end(args);
386
387 /*
388 * If we were started as result of loading a module, close all of the
389 * user space pages. We don't need them, and if we didn't close them
390 * they would be locked into memory.
391 */
392 exit_mm(current);
393
394 set_special_pids(1, 1);
70522e12 395 mutex_lock(&tty_mutex);
1da177e4 396 current->signal->tty = NULL;
70522e12 397 mutex_unlock(&tty_mutex);
1da177e4
LT
398
399 /* Block and flush all signals */
400 sigfillset(&blocked);
401 sigprocmask(SIG_BLOCK, &blocked, NULL);
402 flush_signals(current);
403
404 /* Become as one with the init task */
405
406 exit_fs(current); /* current->fs->count--; */
407 fs = init_task.fs;
408 current->fs = fs;
409 atomic_inc(&fs->count);