[PATCH] lightweight robust futexes: docs
[linux-block.git] / kernel / exit.c
CommitLineData
1da177e4
LT
1/*
2 * linux/kernel/exit.c
3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 */
6
7#include <linux/config.h>
8#include <linux/mm.h>
9#include <linux/slab.h>
10#include <linux/interrupt.h>
11#include <linux/smp_lock.h>
12#include <linux/module.h>
c59ede7b 13#include <linux/capability.h>
1da177e4
LT
14#include <linux/completion.h>
15#include <linux/personality.h>
16#include <linux/tty.h>
17#include <linux/namespace.h>
18#include <linux/key.h>
19#include <linux/security.h>
20#include <linux/cpu.h>
21#include <linux/acct.h>
22#include <linux/file.h>
23#include <linux/binfmts.h>
24#include <linux/ptrace.h>
25#include <linux/profile.h>
26#include <linux/mount.h>
27#include <linux/proc_fs.h>
28#include <linux/mempolicy.h>
29#include <linux/cpuset.h>
30#include <linux/syscalls.h>
7ed20e1a 31#include <linux/signal.h>
9f46080c 32#include <linux/cn_proc.h>
de5097c2 33#include <linux/mutex.h>
0771dfef 34#include <linux/futex.h>
1da177e4
LT
35
36#include <asm/uaccess.h>
37#include <asm/unistd.h>
38#include <asm/pgtable.h>
39#include <asm/mmu_context.h>
40
41extern void sem_exit (void);
42extern struct task_struct *child_reaper;
43
44int getrusage(struct task_struct *, int, struct rusage __user *);
45
408b664a
AB
46static void exit_mm(struct task_struct * tsk);
47
1da177e4
LT
48static void __unhash_process(struct task_struct *p)
49{
50 nr_threads--;
51 detach_pid(p, PIDTYPE_PID);
52 detach_pid(p, PIDTYPE_TGID);
53 if (thread_group_leader(p)) {
54 detach_pid(p, PIDTYPE_PGID);
55 detach_pid(p, PIDTYPE_SID);
56 if (p->pid)
57 __get_cpu_var(process_counts)--;
58 }
59
60 REMOVE_LINKS(p);
61}
62
63void release_task(struct task_struct * p)
64{
65 int zap_leader;
66 task_t *leader;
67 struct dentry *proc_dentry;
68
69repeat:
70 atomic_dec(&p->user->processes);
71 spin_lock(&p->proc_lock);
72 proc_dentry = proc_pid_unhash(p);
73 write_lock_irq(&tasklist_lock);
74 if (unlikely(p->ptrace))
75 __ptrace_unlink(p);
76 BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children));
77 __exit_signal(p);
71a2224d
CL
78 /*
79 * Note that the fastpath in sys_times depends on __exit_signal having
80 * updated the counters before a task is removed from the tasklist of
81 * the process by __unhash_process.
82 */
1da177e4
LT
83 __unhash_process(p);
84
85 /*
86 * If we are the last non-leader member of the thread
87 * group, and the leader is zombie, then notify the
88 * group leader's parent process. (if it wants notification.)
89 */
90 zap_leader = 0;
91 leader = p->group_leader;
92 if (leader != p && thread_group_empty(leader) && leader->exit_state == EXIT_ZOMBIE) {
93 BUG_ON(leader->exit_signal == -1);
94 do_notify_parent(leader, leader->exit_signal);
95 /*
96 * If we were the last child thread and the leader has
97 * exited already, and the leader's parent ignores SIGCHLD,
98 * then we are the one who should release the leader.
99 *
100 * do_notify_parent() will have marked it self-reaping in
101 * that case.
102 */
103 zap_leader = (leader->exit_signal == -1);
104 }
105
106 sched_exit(p);
107 write_unlock_irq(&tasklist_lock);
108 spin_unlock(&p->proc_lock);
109 proc_pid_flush(proc_dentry);
110 release_thread(p);
111 put_task_struct(p);
112
113 p = leader;
114 if (unlikely(zap_leader))
115 goto repeat;
116}
117
118/* we are using it only for SMP init */
119
120void unhash_process(struct task_struct *p)
121{
122 struct dentry *proc_dentry;
123
124 spin_lock(&p->proc_lock);
125 proc_dentry = proc_pid_unhash(p);
126 write_lock_irq(&tasklist_lock);
127 __unhash_process(p);
128 write_unlock_irq(&tasklist_lock);
129 spin_unlock(&p->proc_lock);
130 proc_pid_flush(proc_dentry);
131}
132
133/*
134 * This checks not only the pgrp, but falls back on the pid if no
135 * satisfactory pgrp is found. I dunno - gdb doesn't work correctly
136 * without this...
137 */
138int session_of_pgrp(int pgrp)
139{
140 struct task_struct *p;
141 int sid = -1;
142
143 read_lock(&tasklist_lock);
144 do_each_task_pid(pgrp, PIDTYPE_PGID, p) {
145 if (p->signal->session > 0) {
146 sid = p->signal->session;
147 goto out;
148 }
149 } while_each_task_pid(pgrp, PIDTYPE_PGID, p);
150 p = find_task_by_pid(pgrp);
151 if (p)
152 sid = p->signal->session;
153out:
154 read_unlock(&tasklist_lock);
155
156 return sid;
157}
158
159/*
160 * Determine if a process group is "orphaned", according to the POSIX
161 * definition in 2.2.2.52. Orphaned process groups are not to be affected
162 * by terminal-generated stop signals. Newly orphaned process groups are
163 * to receive a SIGHUP and a SIGCONT.
164 *
165 * "I ask you, have you ever known what it is to be an orphan?"
166 */
167static int will_become_orphaned_pgrp(int pgrp, task_t *ignored_task)
168{
169 struct task_struct *p;
170 int ret = 1;
171
172 do_each_task_pid(pgrp, PIDTYPE_PGID, p) {
173 if (p == ignored_task
174 || p->exit_state
175 || p->real_parent->pid == 1)
176 continue;
177 if (process_group(p->real_parent) != pgrp
178 && p->real_parent->signal->session == p->signal->session) {
179 ret = 0;
180 break;
181 }
182 } while_each_task_pid(pgrp, PIDTYPE_PGID, p);
183 return ret; /* (sighing) "Often!" */
184}
185
186int is_orphaned_pgrp(int pgrp)
187{
188 int retval;
189
190 read_lock(&tasklist_lock);
191 retval = will_become_orphaned_pgrp(pgrp, NULL);
192 read_unlock(&tasklist_lock);
193
194 return retval;
195}
196
858119e1 197static int has_stopped_jobs(int pgrp)
1da177e4
LT
198{
199 int retval = 0;
200 struct task_struct *p;
201
202 do_each_task_pid(pgrp, PIDTYPE_PGID, p) {
203 if (p->state != TASK_STOPPED)
204 continue;
205
206 /* If p is stopped by a debugger on a signal that won't
207 stop it, then don't count p as stopped. This isn't
208 perfect but it's a good approximation. */
209 if (unlikely (p->ptrace)
210 && p->exit_code != SIGSTOP
211 && p->exit_code != SIGTSTP
212 && p->exit_code != SIGTTOU
213 && p->exit_code != SIGTTIN)
214 continue;
215
216 retval = 1;
217 break;
218 } while_each_task_pid(pgrp, PIDTYPE_PGID, p);
219 return retval;
220}
221
222/**
4dc3b16b 223 * reparent_to_init - Reparent the calling kernel thread to the init task.
1da177e4
LT
224 *
225 * If a kernel thread is launched as a result of a system call, or if
226 * it ever exits, it should generally reparent itself to init so that
227 * it is correctly cleaned up on exit.
228 *
229 * The various task state such as scheduling policy and priority may have
230 * been inherited from a user process, so we reset them to sane values here.
231 *
232 * NOTE that reparent_to_init() gives the caller full capabilities.
233 */
858119e1 234static void reparent_to_init(void)
1da177e4
LT
235{
236 write_lock_irq(&tasklist_lock);
237
238 ptrace_unlink(current);
239 /* Reparent to init */
240 REMOVE_LINKS(current);
241 current->parent = child_reaper;
242 current->real_parent = child_reaper;
243 SET_LINKS(current);
244
245 /* Set the exit signal to SIGCHLD so we signal init on exit */
246 current->exit_signal = SIGCHLD;
247
b0a9499c
IM
248 if ((current->policy == SCHED_NORMAL ||
249 current->policy == SCHED_BATCH)
250 && (task_nice(current) < 0))
1da177e4
LT
251 set_user_nice(current, 0);
252 /* cpus_allowed? */
253 /* rt_priority? */
254 /* signals? */
255 security_task_reparent_to_init(current);
256 memcpy(current->signal->rlim, init_task.signal->rlim,
257 sizeof(current->signal->rlim));
258 atomic_inc(&(INIT_USER->__count));
259 write_unlock_irq(&tasklist_lock);
260 switch_uid(INIT_USER);
261}
262
263void __set_special_pids(pid_t session, pid_t pgrp)
264{
e19f247a 265 struct task_struct *curr = current->group_leader;
1da177e4
LT
266
267 if (curr->signal->session != session) {
268 detach_pid(curr, PIDTYPE_SID);
269 curr->signal->session = session;
270 attach_pid(curr, PIDTYPE_SID, session);
271 }
272 if (process_group(curr) != pgrp) {
273 detach_pid(curr, PIDTYPE_PGID);
274 curr->signal->pgrp = pgrp;
275 attach_pid(curr, PIDTYPE_PGID, pgrp);
276 }
277}
278
279void set_special_pids(pid_t session, pid_t pgrp)
280{
281 write_lock_irq(&tasklist_lock);
282 __set_special_pids(session, pgrp);
283 write_unlock_irq(&tasklist_lock);
284}
285
286/*
287 * Let kernel threads use this to say that they
288 * allow a certain signal (since daemonize() will
289 * have disabled all of them by default).
290 */
291int allow_signal(int sig)
292{
7ed20e1a 293 if (!valid_signal(sig) || sig < 1)
1da177e4
LT
294 return -EINVAL;
295
296 spin_lock_irq(&current->sighand->siglock);
297 sigdelset(&current->blocked, sig);
298 if (!current->mm) {
299 /* Kernel threads handle their own signals.
300 Let the signal code know it'll be handled, so
301 that they don't get converted to SIGKILL or
302 just silently dropped */
303 current->sighand->action[(sig)-1].sa.sa_handler = (void __user *)2;
304 }
305 recalc_sigpending();
306 spin_unlock_irq(&current->sighand->siglock);
307 return 0;
308}
309
310EXPORT_SYMBOL(allow_signal);
311
312int disallow_signal(int sig)
313{
7ed20e1a 314 if (!valid_signal(sig) || sig < 1)
1da177e4
LT
315 return -EINVAL;
316
317 spin_lock_irq(&current->sighand->siglock);
318 sigaddset(&current->blocked, sig);
319 recalc_sigpending();
320 spin_unlock_irq(&current->sighand->siglock);
321 return 0;
322}
323
324EXPORT_SYMBOL(disallow_signal);
325
326/*
327 * Put all the gunge required to become a kernel thread without
328 * attached user resources in one place where it belongs.
329 */
330
331void daemonize(const char *name, ...)
332{
333 va_list args;
334 struct fs_struct *fs;
335 sigset_t blocked;
336
337 va_start(args, name);
338 vsnprintf(current->comm, sizeof(current->comm), name, args);
339 va_end(args);
340
341 /*
342 * If we were started as result of loading a module, close all of the
343 * user space pages. We don't need them, and if we didn't close them
344 * they would be locked into memory.
345 */
346 exit_mm(current);
347
348 set_special_pids(1, 1);
70522e12 349 mutex_lock(&tty_mutex);
1da177e4 350 current->signal->tty = NULL;
70522e12 351 mutex_unlock(&tty_mutex);
1da177e4
LT
352
353 /* Block and flush all signals */
354 sigfillset(&blocked);
355 sigprocmask(SIG_BLOCK, &blocked, NULL);
356 flush_signals(current);
357
358 /* Become as one with the init task */
359
360 exit_fs(current); /* current->fs->count--; */
361 fs = init_task.fs;
362 current->fs = fs;
363 atomic_inc(&fs->count);