[PATCH] lightweight robust futexes: arch defaults
[linux-2.6-block.git] / kernel / exit.c
CommitLineData
1da177e4
LT
1/*
2 * linux/kernel/exit.c
3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 */
6
7#include <linux/config.h>
8#include <linux/mm.h>
9#include <linux/slab.h>
10#include <linux/interrupt.h>
11#include <linux/smp_lock.h>
12#include <linux/module.h>
c59ede7b 13#include <linux/capability.h>
1da177e4
LT
14#include <linux/completion.h>
15#include <linux/personality.h>
16#include <linux/tty.h>
17#include <linux/namespace.h>
18#include <linux/key.h>
19#include <linux/security.h>
20#include <linux/cpu.h>
21#include <linux/acct.h>
22#include <linux/file.h>
23#include <linux/binfmts.h>
24#include <linux/ptrace.h>
25#include <linux/profile.h>
26#include <linux/mount.h>
27#include <linux/proc_fs.h>
28#include <linux/mempolicy.h>
29#include <linux/cpuset.h>
30#include <linux/syscalls.h>
7ed20e1a 31#include <linux/signal.h>
9f46080c 32#include <linux/cn_proc.h>
de5097c2 33#include <linux/mutex.h>
1da177e4
LT
34
35#include <asm/uaccess.h>
36#include <asm/unistd.h>
37#include <asm/pgtable.h>
38#include <asm/mmu_context.h>
39
40extern void sem_exit (void);
41extern struct task_struct *child_reaper;
42
43int getrusage(struct task_struct *, int, struct rusage __user *);
44
408b664a
AB
45static void exit_mm(struct task_struct * tsk);
46
1da177e4
LT
47static void __unhash_process(struct task_struct *p)
48{
49 nr_threads--;
50 detach_pid(p, PIDTYPE_PID);
51 detach_pid(p, PIDTYPE_TGID);
52 if (thread_group_leader(p)) {
53 detach_pid(p, PIDTYPE_PGID);
54 detach_pid(p, PIDTYPE_SID);
55 if (p->pid)
56 __get_cpu_var(process_counts)--;
57 }
58
59 REMOVE_LINKS(p);
60}
61
62void release_task(struct task_struct * p)
63{
64 int zap_leader;
65 task_t *leader;
66 struct dentry *proc_dentry;
67
68repeat:
69 atomic_dec(&p->user->processes);
70 spin_lock(&p->proc_lock);
71 proc_dentry = proc_pid_unhash(p);
72 write_lock_irq(&tasklist_lock);
73 if (unlikely(p->ptrace))
74 __ptrace_unlink(p);
75 BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children));
76 __exit_signal(p);
71a2224d
CL
77 /*
78 * Note that the fastpath in sys_times depends on __exit_signal having
79 * updated the counters before a task is removed from the tasklist of
80 * the process by __unhash_process.
81 */
1da177e4
LT
82 __unhash_process(p);
83
84 /*
85 * If we are the last non-leader member of the thread
86 * group, and the leader is zombie, then notify the
87 * group leader's parent process. (if it wants notification.)
88 */
89 zap_leader = 0;
90 leader = p->group_leader;
91 if (leader != p && thread_group_empty(leader) && leader->exit_state == EXIT_ZOMBIE) {
92 BUG_ON(leader->exit_signal == -1);
93 do_notify_parent(leader, leader->exit_signal);
94 /*
95 * If we were the last child thread and the leader has
96 * exited already, and the leader's parent ignores SIGCHLD,
97 * then we are the one who should release the leader.
98 *
99 * do_notify_parent() will have marked it self-reaping in
100 * that case.
101 */
102 zap_leader = (leader->exit_signal == -1);
103 }
104
105 sched_exit(p);
106 write_unlock_irq(&tasklist_lock);
107 spin_unlock(&p->proc_lock);
108 proc_pid_flush(proc_dentry);
109 release_thread(p);
110 put_task_struct(p);
111
112 p = leader;
113 if (unlikely(zap_leader))
114 goto repeat;
115}
116
117/* we are using it only for SMP init */
118
119void unhash_process(struct task_struct *p)
120{
121 struct dentry *proc_dentry;
122
123 spin_lock(&p->proc_lock);
124 proc_dentry = proc_pid_unhash(p);
125 write_lock_irq(&tasklist_lock);
126 __unhash_process(p);
127 write_unlock_irq(&tasklist_lock);
128 spin_unlock(&p->proc_lock);
129 proc_pid_flush(proc_dentry);
130}
131
132/*
133 * This checks not only the pgrp, but falls back on the pid if no
134 * satisfactory pgrp is found. I dunno - gdb doesn't work correctly
135 * without this...
136 */
137int session_of_pgrp(int pgrp)
138{
139 struct task_struct *p;
140 int sid = -1;
141
142 read_lock(&tasklist_lock);
143 do_each_task_pid(pgrp, PIDTYPE_PGID, p) {
144 if (p->signal->session > 0) {
145 sid = p->signal->session;
146 goto out;
147 }
148 } while_each_task_pid(pgrp, PIDTYPE_PGID, p);
149 p = find_task_by_pid(pgrp);
150 if (p)
151 sid = p->signal->session;
152out:
153 read_unlock(&tasklist_lock);
154
155 return sid;
156}
157
158/*
159 * Determine if a process group is "orphaned", according to the POSIX
160 * definition in 2.2.2.52. Orphaned process groups are not to be affected
161 * by terminal-generated stop signals. Newly orphaned process groups are
162 * to receive a SIGHUP and a SIGCONT.
163 *
164 * "I ask you, have you ever known what it is to be an orphan?"
165 */
166static int will_become_orphaned_pgrp(int pgrp, task_t *ignored_task)
167{
168 struct task_struct *p;
169 int ret = 1;
170
171 do_each_task_pid(pgrp, PIDTYPE_PGID, p) {
172 if (p == ignored_task
173 || p->exit_state
174 || p->real_parent->pid == 1)
175 continue;
176 if (process_group(p->real_parent) != pgrp
177 && p->real_parent->signal->session == p->signal->session) {
178 ret = 0;
179 break;
180 }
181 } while_each_task_pid(pgrp, PIDTYPE_PGID, p);
182 return ret; /* (sighing) "Often!" */
183}
184
185int is_orphaned_pgrp(int pgrp)
186{
187 int retval;
188
189 read_lock(&tasklist_lock);
190 retval = will_become_orphaned_pgrp(pgrp, NULL);
191 read_unlock(&tasklist_lock);
192
193 return retval;
194}
195
858119e1 196static int has_stopped_jobs(int pgrp)
1da177e4
LT
197{
198 int retval = 0;
199 struct task_struct *p;
200
201 do_each_task_pid(pgrp, PIDTYPE_PGID, p) {
202 if (p->state != TASK_STOPPED)
203 continue;
204
205 /* If p is stopped by a debugger on a signal that won't
206 stop it, then don't count p as stopped. This isn't
207 perfect but it's a good approximation. */
208 if (unlikely (p->ptrace)
209 && p->exit_code != SIGSTOP
210 && p->exit_code != SIGTSTP
211 && p->exit_code != SIGTTOU
212 && p->exit_code != SIGTTIN)
213 continue;
214
215 retval = 1;
216 break;
217 } while_each_task_pid(pgrp, PIDTYPE_PGID, p);
218 return retval;
219}
220
221/**
4dc3b16b 222 * reparent_to_init - Reparent the calling kernel thread to the init task.
1da177e4
LT
223 *
224 * If a kernel thread is launched as a result of a system call, or if
225 * it ever exits, it should generally reparent itself to init so that
226 * it is correctly cleaned up on exit.
227 *
228 * The various task state such as scheduling policy and priority may have
229 * been inherited from a user process, so we reset them to sane values here.
230 *
231 * NOTE that reparent_to_init() gives the caller full capabilities.
232 */
858119e1 233static void reparent_to_init(void)
1da177e4
LT
234{
235 write_lock_irq(&tasklist_lock);
236
237 ptrace_unlink(current);
238 /* Reparent to init */
239 REMOVE_LINKS(current);
240 current->parent = child_reaper;
241 current->real_parent = child_reaper;
242 SET_LINKS(current);
243
244 /* Set the exit signal to SIGCHLD so we signal init on exit */
245 current->exit_signal = SIGCHLD;
246
b0a9499c
IM
247 if ((current->policy == SCHED_NORMAL ||
248 current->policy == SCHED_BATCH)
249 && (task_nice(current) < 0))
1da177e4
LT
250 set_user_nice(current, 0);
251 /* cpus_allowed? */
252 /* rt_priority? */
253 /* signals? */
254 security_task_reparent_to_init(current);
255 memcpy(current->signal->rlim, init_task.signal->rlim,
256 sizeof(current->signal->rlim));
257 atomic_inc(&(INIT_USER->__count));
258 write_unlock_irq(&tasklist_lock);
259 switch_uid(INIT_USER);
260}
261
262void __set_special_pids(pid_t session, pid_t pgrp)
263{
e19f247a 264 struct task_struct *curr = current->group_leader;
1da177e4
LT
265
266 if (curr->signal->session != session) {
267 detach_pid(curr, PIDTYPE_SID);
268 curr->signal->session = session;
269 attach_pid(curr, PIDTYPE_SID, session);
270 }
271 if (process_group(curr) != pgrp) {
272 detach_pid(curr, PIDTYPE_PGID);
273 curr->signal->pgrp = pgrp;
274 attach_pid(curr, PIDTYPE_PGID, pgrp);
275 }
276}
277
278void set_special_pids(pid_t session, pid_t pgrp)
279{
280 write_lock_irq(&tasklist_lock);
281 __set_special_pids(session, pgrp);
282 write_unlock_irq(&tasklist_lock);
283}
284
285/*
286 * Let kernel threads use this to say that they
287 * allow a certain signal (since daemonize() will
288 * have disabled all of them by default).
289 */
290int allow_signal(int sig)
291{
7ed20e1a 292 if (!valid_signal(sig) || sig < 1)
1da177e4
LT
293 return -EINVAL;
294
295 spin_lock_irq(&current->sighand->siglock);
296 sigdelset(&current->blocked, sig);
297 if (!current->mm) {
298 /* Kernel threads handle their own signals.
299 Let the signal code know it'll be handled, so
300 that they don't get converted to SIGKILL or
301 just silently dropped */
302 current->sighand->action[(sig)-1].sa.sa_handler = (void __user *)2;
303 }
304 recalc_sigpending();
305 spin_unlock_irq(&current->sighand->siglock);
306 return 0;
307}
308
309EXPORT_SYMBOL(allow_signal);
310
311int disallow_signal(int sig)
312{
7ed20e1a 313 if (!valid_signal(sig) || sig < 1)
1da177e4
LT
314 return -EINVAL;
315
316 spin_lock_irq(&current->sighand->siglock);
317 sigaddset(&current->blocked, sig);
318 recalc_sigpending();
319 spin_unlock_irq(&current->sighand->siglock);
320 return 0;
321}
322
323EXPORT_SYMBOL(disallow_signal);
324
325/*
326 * Put all the gunge required to become a kernel thread without
327 * attached user resources in one place where it belongs.
328 */
329
330void daemonize(const char *name, ...)
331{
332 va_list args;
333 struct fs_struct *fs;
334 sigset_t blocked;
335
336 va_start(args, name);
337 vsnprintf(current->comm, sizeof(current->comm), name, args);
338 va_end(args);
339
340 /*
341 * If we were started as result of loading a module, close all of the
342 * user space pages. We don't need them, and if we didn't close them
343 * they would be locked into memory.
344 */
345 exit_mm(current);
346
347 set_special_pids(1, 1);
70522e12 348 mutex_lock(&tty_mutex);
1da177e4 349 current->signal->tty = NULL;
70522e12 350 mutex_unlock(&tty_mutex);
1da177e4
LT
351
352 /* Block and flush all signals */
353 sigfillset(&blocked);
354 sigprocmask(SIG_BLOCK, &blocked, NULL);
355 flush_signals(current);
356
357 /* Become as one with the init task */
358
359 exit_fs(current); /* current->fs->count--; */
360 fs = init_task.fs;
361 current->fs = fs;
362 atomic_inc(&fs->count);