[PATCH] kthread: convert arch/i386/kernel/apm.c
[linux-2.6-block.git] / kernel / exit.c
CommitLineData
1da177e4
LT
1/*
2 * linux/kernel/exit.c
3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 */
6
1da177e4
LT
7#include <linux/mm.h>
8#include <linux/slab.h>
9#include <linux/interrupt.h>
10#include <linux/smp_lock.h>
11#include <linux/module.h>
c59ede7b 12#include <linux/capability.h>
1da177e4
LT
13#include <linux/completion.h>
14#include <linux/personality.h>
15#include <linux/tty.h>
16#include <linux/namespace.h>
17#include <linux/key.h>
18#include <linux/security.h>
19#include <linux/cpu.h>
20#include <linux/acct.h>
21#include <linux/file.h>
22#include <linux/binfmts.h>
23#include <linux/ptrace.h>
24#include <linux/profile.h>
25#include <linux/mount.h>
26#include <linux/proc_fs.h>
27#include <linux/mempolicy.h>
c757249a 28#include <linux/taskstats_kern.h>
ca74e92b 29#include <linux/delayacct.h>
1da177e4
LT
30#include <linux/cpuset.h>
31#include <linux/syscalls.h>
7ed20e1a 32#include <linux/signal.h>
6a14c5c9 33#include <linux/posix-timers.h>
9f46080c 34#include <linux/cn_proc.h>
de5097c2 35#include <linux/mutex.h>
0771dfef 36#include <linux/futex.h>
34f192c6 37#include <linux/compat.h>
b92ce558 38#include <linux/pipe_fs_i.h>
fa84cb93 39#include <linux/audit.h> /* for audit_free() */
83cc5ed3 40#include <linux/resource.h>
1da177e4
LT
41
42#include <asm/uaccess.h>
43#include <asm/unistd.h>
44#include <asm/pgtable.h>
45#include <asm/mmu_context.h>
46
47extern void sem_exit (void);
48extern struct task_struct *child_reaper;
49
408b664a
AB
50static void exit_mm(struct task_struct * tsk);
51
1da177e4
LT
52static void __unhash_process(struct task_struct *p)
53{
54 nr_threads--;
55 detach_pid(p, PIDTYPE_PID);
1da177e4
LT
56 if (thread_group_leader(p)) {
57 detach_pid(p, PIDTYPE_PGID);
58 detach_pid(p, PIDTYPE_SID);
c97d9893 59
5e85d4ab 60 list_del_rcu(&p->tasks);
73b9ebfe 61 __get_cpu_var(process_counts)--;
1da177e4 62 }
47e65328 63 list_del_rcu(&p->thread_group);
c97d9893 64 remove_parent(p);
1da177e4
LT
65}
66
6a14c5c9
ON
67/*
68 * This function expects the tasklist_lock write-locked.
69 */
70static void __exit_signal(struct task_struct *tsk)
71{
72 struct signal_struct *sig = tsk->signal;
73 struct sighand_struct *sighand;
74
75 BUG_ON(!sig);
76 BUG_ON(!atomic_read(&sig->count));
77
78 rcu_read_lock();
79 sighand = rcu_dereference(tsk->sighand);
80 spin_lock(&sighand->siglock);
81
82 posix_cpu_timers_exit(tsk);
83 if (atomic_dec_and_test(&sig->count))
84 posix_cpu_timers_exit_group(tsk);
85 else {
86 /*
87 * If there is any task waiting for the group exit
88 * then notify it:
89 */
90 if (sig->group_exit_task && atomic_read(&sig->count) == sig->notify_count) {
91 wake_up_process(sig->group_exit_task);
92 sig->group_exit_task = NULL;
93 }
94 if (tsk == sig->curr_target)
95 sig->curr_target = next_thread(tsk);
96 /*
97 * Accumulate here the counters for all threads but the
98 * group leader as they die, so they can be added into
99 * the process-wide totals when those are taken.
100 * The group leader stays around as a zombie as long
101 * as there are other threads. When it gets reaped,
102 * the exit.c code will add its counts into these totals.
103 * We won't ever get here for the group leader, since it
104 * will have been the last reference on the signal_struct.
105 */
106 sig->utime = cputime_add(sig->utime, tsk->utime);
107 sig->stime = cputime_add(sig->stime, tsk->stime);
108 sig->min_flt += tsk->min_flt;
109 sig->maj_flt += tsk->maj_flt;
110 sig->nvcsw += tsk->nvcsw;
111 sig->nivcsw += tsk->nivcsw;
112 sig->sched_time += tsk->sched_time;
113 sig = NULL; /* Marker for below. */
114 }
115
5876700c
ON
116 __unhash_process(tsk);
117
6a14c5c9 118 tsk->signal = NULL;
a7e5328a 119 tsk->sighand = NULL;
6a14c5c9
ON
120 spin_unlock(&sighand->siglock);
121 rcu_read_unlock();
122
a7e5328a 123 __cleanup_sighand(sighand);
6a14c5c9
ON
124 clear_tsk_thread_flag(tsk,TIF_SIGPENDING);
125 flush_sigqueue(&tsk->pending);
126 if (sig) {
127 flush_sigqueue(&sig->shared_pending);
128 __cleanup_signal(sig);
129 }
130}
131
8c7904a0
EB
132static void delayed_put_task_struct(struct rcu_head *rhp)
133{
134 put_task_struct(container_of(rhp, struct task_struct, rcu));
135}
136
1da177e4
LT
137void release_task(struct task_struct * p)
138{
36c8b586 139 struct task_struct *leader;
1da177e4 140 int zap_leader;
1f09f974 141repeat:
1da177e4 142 atomic_dec(&p->user->processes);
1da177e4 143 write_lock_irq(&tasklist_lock);
1f09f974 144 ptrace_unlink(p);
1da177e4
LT
145 BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children));
146 __exit_signal(p);
35f5cad8 147
1da177e4
LT
148 /*
149 * If we are the last non-leader member of the thread
150 * group, and the leader is zombie, then notify the
151 * group leader's parent process. (if it wants notification.)
152 */
153 zap_leader = 0;
154 leader = p->group_leader;
155 if (leader != p && thread_group_empty(leader) && leader->exit_state == EXIT_ZOMBIE) {
156 BUG_ON(leader->exit_signal == -1);
157 do_notify_parent(leader, leader->exit_signal);
158 /*
159 * If we were the last child thread and the leader has
160 * exited already, and the leader's parent ignores SIGCHLD,
161 * then we are the one who should release the leader.
162 *
163 * do_notify_parent() will have marked it self-reaping in
164 * that case.
165 */
166 zap_leader = (leader->exit_signal == -1);
167 }
168
169 sched_exit(p);
170 write_unlock_irq(&tasklist_lock);
48e6484d 171 proc_flush_task(p);
1da177e4 172 release_thread(p);
8c7904a0 173 call_rcu(&p->rcu, delayed_put_task_struct);
1da177e4
LT
174
175 p = leader;
176 if (unlikely(zap_leader))
177 goto repeat;
178}
179
1da177e4
LT
180/*
181 * This checks not only the pgrp, but falls back on the pid if no
182 * satisfactory pgrp is found. I dunno - gdb doesn't work correctly
183 * without this...
184 */
185int session_of_pgrp(int pgrp)
186{
187 struct task_struct *p;
188 int sid = -1;
189
190 read_lock(&tasklist_lock);
191 do_each_task_pid(pgrp, PIDTYPE_PGID, p) {
192 if (p->signal->session > 0) {
193 sid = p->signal->session;
194 goto out;
195 }
196 } while_each_task_pid(pgrp, PIDTYPE_PGID, p);
197 p = find_task_by_pid(pgrp);
198 if (p)
199 sid = p->signal->session;
200out:
201 read_unlock(&tasklist_lock);
202
203 return sid;
204}
205
206/*
207 * Determine if a process group is "orphaned", according to the POSIX
208 * definition in 2.2.2.52. Orphaned process groups are not to be affected
209 * by terminal-generated stop signals. Newly orphaned process groups are
210 * to receive a SIGHUP and a SIGCONT.
211 *
212 * "I ask you, have you ever known what it is to be an orphan?"
213 */
36c8b586 214static int will_become_orphaned_pgrp(int pgrp, struct task_struct *ignored_task)
1da177e4
LT
215{
216 struct task_struct *p;
217 int ret = 1;
218
219 do_each_task_pid(pgrp, PIDTYPE_PGID, p) {
220 if (p == ignored_task
221 || p->exit_state
222 || p->real_parent->pid == 1)
223 continue;
224 if (process_group(p->real_parent) != pgrp
225 && p->real_parent->signal->session == p->signal->session) {
226 ret = 0;
227 break;
228 }
229 } while_each_task_pid(pgrp, PIDTYPE_PGID, p);
230 return ret; /* (sighing) "Often!" */
231}
232
233int is_orphaned_pgrp(int pgrp)
234{
235 int retval;
236
237 read_lock(&tasklist_lock);
238 retval = will_become_orphaned_pgrp(pgrp, NULL);
239 read_unlock(&tasklist_lock);
240
241 return retval;
242}
243
858119e1 244static int has_stopped_jobs(int pgrp)
1da177e4
LT
245{
246 int retval = 0;
247 struct task_struct *p;
248
249 do_each_task_pid(pgrp, PIDTYPE_PGID, p) {
250 if (p->state != TASK_STOPPED)
251 continue;
252
253 /* If p is stopped by a debugger on a signal that won't
254 stop it, then don't count p as stopped. This isn't
255 perfect but it's a good approximation. */
256 if (unlikely (p->ptrace)
257 && p->exit_code != SIGSTOP
258 && p->exit_code != SIGTSTP
259 && p->exit_code != SIGTTOU
260 && p->exit_code != SIGTTIN)
261 continue;
262
263 retval = 1;
264 break;
265 } while_each_task_pid(pgrp, PIDTYPE_PGID, p);
266 return retval;
267}
268
269/**
4dc3b16b 270 * reparent_to_init - Reparent the calling kernel thread to the init task.
1da177e4
LT
271 *
272 * If a kernel thread is launched as a result of a system call, or if
273 * it ever exits, it should generally reparent itself to init so that
274 * it is correctly cleaned up on exit.
275 *
276 * The various task state such as scheduling policy and priority may have
277 * been inherited from a user process, so we reset them to sane values here.
278 *
279 * NOTE that reparent_to_init() gives the caller full capabilities.
280 */
858119e1 281static void reparent_to_init(void)
1da177e4
LT
282{
283 write_lock_irq(&tasklist_lock);
284
285 ptrace_unlink(current);
286 /* Reparent to init */
9b678ece 287 remove_parent(current);
1da177e4
LT
288 current->parent = child_reaper;
289 current->real_parent = child_reaper;
9b678ece 290 add_parent(current);
1da177e4
LT
291
292 /* Set the exit signal to SIGCHLD so we signal init on exit */
293 current->exit_signal = SIGCHLD;
294
b0a9499c
IM
295 if ((current->policy == SCHED_NORMAL ||
296 current->policy == SCHED_BATCH)
297 && (task_nice(current) < 0))
1da177e4
LT
298 set_user_nice(current, 0);
299 /* cpus_allowed? */
300 /* rt_priority? */
301 /* signals? */
302 security_task_reparent_to_init(current);
303 memcpy(current->signal->rlim, init_task.signal->rlim,
304 sizeof(current->signal->rlim));
305 atomic_inc(&(INIT_USER->__count));
306 write_unlock_irq(&tasklist_lock);
307 switch_uid(INIT_USER);
308}
309
310void __set_special_pids(pid_t session, pid_t pgrp)
311{
e19f247a 312 struct task_struct *curr = current->group_leader;
1da177e4
LT
313
314 if (curr->signal->session != session) {
315 detach_pid(curr, PIDTYPE_SID);
316 curr->signal->session = session;
317 attach_pid(curr, PIDTYPE_SID, session);
318 }
319 if (process_group(curr) != pgrp) {
320 detach_pid(curr, PIDTYPE_PGID);
321 curr->signal->pgrp = pgrp;
322 attach_pid(curr, PIDTYPE_PGID, pgrp);
323 }
324}
325
326void set_special_pids(pid_t session, pid_t pgrp)
327{
328 write_lock_irq(&tasklist_lock);
329 __set_special_pids(session, pgrp);
330 write_unlock_irq(&tasklist_lock);
331}
332
333/*
334 * Let kernel threads use this to say that they
335 * allow a certain signal (since daemonize() will
336 * have disabled all of them by default).
337 */
338int allow_signal(int sig)
339{
7ed20e1a 340 if (!valid_signal(sig) || sig < 1)
1da177e4
LT
341 return -EINVAL;
342
343 spin_lock_irq(&current->sighand->siglock);
344 sigdelset(&current->blocked, sig);
345 if (!current->mm) {
346 /* Kernel threads handle their own signals.
347 Let the signal code know it'll be handled, so
348 that they don't get converted to SIGKILL or
349 just silently dropped */
350 current->sighand->action[(sig)-1].sa.sa_handler = (void __user *)2;
351 }
352 recalc_sigpending();
353 spin_unlock_irq(&current->sighand->siglock);
354 return 0;
355}
356
357EXPORT_SYMBOL(allow_signal);
358
359int disallow_signal(int sig)
360{
7ed20e1a 361 if (!valid_signal(sig) || sig < 1)
1da177e4
LT
362 return -EINVAL;
363
364 spin_lock_irq(&current->sighand->siglock);
365 sigaddset(&current->blocked, sig);
366 recalc_sigpending();
367 spin_unlock_irq(&current->sighand->siglock);
368 return 0;
369}
370
371EXPORT_SYMBOL(disallow_signal);
372
373/*
374 * Put all the gunge required to become a kernel thread without
375 * attached user resources in one place where it belongs.
376 */
377
378void daemonize(const char *name, ...)
379{
380 va_list args;
381 struct fs_struct *fs;
382 sigset_t blocked;
383
384 va_start(args, name);
385 vsnprintf(current->comm, sizeof(current->comm), name, args);
386 va_end(args);
387
388 /*
389 * If we were started as result of loading a module, close all of the
390 * user space pages. We don't need them, and if we didn't close them
391 * they would be locked into memory.
392 */
393 exit_mm(current);
394
395 set_special_pids(1, 1);
70522e12 396 mutex_lock(&tty_mutex);
1da177e4 397 current->signal->tty = NULL;
70522e12 398 mutex_unlock(&tty_mutex);
1da177e4
LT
399
400 /* Block and flush all signals */
401 sigfillset(&blocked);
402 sigprocmask(SIG_BLOCK, &blocked, NULL);
403 flush_signals(current);
404
405 /* Become as one with the init task */
406
407 exit_fs(current); /* current->fs->count--; */
408 fs = init_task.fs;
409 current->fs = fs;
410 atomic_inc(&fs->count);