[linux-block.git] / kernel / exit.c

/*
 *  linux/kernel/exit.c
 *
 *  Copyright (C) 1991, 1992  Linus Torvalds
 */

#include <linux/config.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/interrupt.h>
#include <linux/smp_lock.h>
#include <linux/module.h>
#include <linux/capability.h>
#include <linux/completion.h>
#include <linux/personality.h>
#include <linux/tty.h>
#include <linux/namespace.h>
#include <linux/key.h>
#include <linux/security.h>
#include <linux/cpu.h>
#include <linux/acct.h>
#include <linux/file.h>
#include <linux/binfmts.h>
#include <linux/ptrace.h>
#include <linux/profile.h>
#include <linux/mount.h>
#include <linux/proc_fs.h>
#include <linux/mempolicy.h>
#include <linux/cpuset.h>
#include <linux/syscalls.h>
#include <linux/signal.h>
#include <linux/cn_proc.h>
#include <linux/mutex.h>
#include <linux/futex.h>

#include <asm/uaccess.h>
#include <asm/unistd.h>
#include <asm/pgtable.h>
#include <asm/mmu_context.h>

extern void sem_exit (void);
extern struct task_struct *child_reaper;

int getrusage(struct task_struct *, int, struct rusage __user *);

static void exit_mm(struct task_struct * tsk);

static void __unhash_process(struct task_struct *p)
{
	nr_threads--;
	detach_pid(p, PIDTYPE_PID);
	detach_pid(p, PIDTYPE_TGID);
	if (thread_group_leader(p)) {
		detach_pid(p, PIDTYPE_PGID);
		detach_pid(p, PIDTYPE_SID);
		if (p->pid)
			__get_cpu_var(process_counts)--;
	}

	REMOVE_LINKS(p);
}

void release_task(struct task_struct * p)
{
	int zap_leader;
	task_t *leader;
	struct dentry *proc_dentry;

repeat: 
	atomic_dec(&p->user->processes);
	spin_lock(&p->proc_lock);
	proc_dentry = proc_pid_unhash(p);
	write_lock_irq(&tasklist_lock);
	if (unlikely(p->ptrace))
		__ptrace_unlink(p);
	BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children));
	__exit_signal(p);
	/*
	 * Note that the fastpath in sys_times depends on __exit_signal having
	 * updated the counters before a task is removed from the tasklist of
	 * the process by __unhash_process.
	 */
	__unhash_process(p);

	/*
	 * If we are the last non-leader member of the thread
	 * group, and the leader is zombie, then notify the
	 * group leader's parent process. (if it wants notification.)
	 */
	zap_leader = 0;
	leader = p->group_leader;
	if (leader != p && thread_group_empty(leader) && leader->exit_state == EXIT_ZOMBIE) {
		BUG_ON(leader->exit_signal == -1);
		do_notify_parent(leader, leader->exit_signal);
		/*
		 * If we were the last child thread and the leader has
		 * exited already, and the leader's parent ignores SIGCHLD,
		 * then we are the one who should release the leader.
		 *
		 * do_notify_parent() will have marked it self-reaping in
		 * that case.
		 */
		zap_leader = (leader->exit_signal == -1);
	}

	sched_exit(p);
	write_unlock_irq(&tasklist_lock);
	spin_unlock(&p->proc_lock);
	proc_pid_flush(proc_dentry);
	release_thread(p);
	put_task_struct(p);

	p = leader;
	if (unlikely(zap_leader))
		goto repeat;
}

/* we are using it only for SMP init */

void unhash_process(struct task_struct *p)
{
	struct dentry *proc_dentry;

	spin_lock(&p->proc_lock);
	proc_dentry = proc_pid_unhash(p);
	write_lock_irq(&tasklist_lock);
	__unhash_process(p);
	write_unlock_irq(&tasklist_lock);
	spin_unlock(&p->proc_lock);
	proc_pid_flush(proc_dentry);
}

/*
 * This checks not only the pgrp, but falls back on the pid if no
 * satisfactory pgrp is found. I dunno - gdb doesn't work correctly
 * without this...
 */
int session_of_pgrp(int pgrp)
{
	struct task_struct *p;
	int sid = -1;

	read_lock(&tasklist_lock);
	do_each_task_pid(pgrp, PIDTYPE_PGID, p) {
		if (p->signal->session > 0) {
			sid = p->signal->session;
			goto out;
		}
	} while_each_task_pid(pgrp, PIDTYPE_PGID, p);
	p = find_task_by_pid(pgrp);
	if (p)
		sid = p->signal->session;
out:
	read_unlock(&tasklist_lock);
	
	return sid;
}

/*
 * Determine if a process group is "orphaned", according to the POSIX
 * definition in 2.2.2.52.  Orphaned process groups are not to be affected
 * by terminal-generated stop signals.  Newly orphaned process groups are
 * to receive a SIGHUP and a SIGCONT.
 *
 * "I ask you, have you ever known what it is to be an orphan?"
 */
static int will_become_orphaned_pgrp(int pgrp, task_t *ignored_task)
{
	struct task_struct *p;
	int ret = 1;

	do_each_task_pid(pgrp, PIDTYPE_PGID, p) {
		if (p == ignored_task
				|| p->exit_state
				|| p->real_parent->pid == 1)
			continue;
		if (process_group(p->real_parent) != pgrp
			    && p->real_parent->signal->session == p->signal->session) {
			ret = 0;
			break;
		}
	} while_each_task_pid(pgrp, PIDTYPE_PGID, p);
	return ret;	/* (sighing) "Often!" */
}

int is_orphaned_pgrp(int pgrp)
{
	int retval;

	read_lock(&tasklist_lock);
	retval = will_become_orphaned_pgrp(pgrp, NULL);
	read_unlock(&tasklist_lock);

	return retval;
}

static int has_stopped_jobs(int pgrp)
{
	int retval = 0;
	struct task_struct *p;

	do_each_task_pid(pgrp, PIDTYPE_PGID, p) {
		if (p->state != TASK_STOPPED)
			continue;

		/* If p is stopped by a debugger on a signal that won't
		   stop it, then don't count p as stopped.  This isn't
		   perfect but it's a good approximation.  */
		if (unlikely (p->ptrace)
		    && p->exit_code != SIGSTOP
		    && p->exit_code != SIGTSTP
		    && p->exit_code != SIGTTOU
		    && p->exit_code != SIGTTIN)
			continue;

		retval = 1;
		break;
	} while_each_task_pid(pgrp, PIDTYPE_PGID, p);
	return retval;
}

/**
 * reparent_to_init - Reparent the calling kernel thread to the init task.
 *
 * If a kernel thread is launched as a result of a system call, or if
 * it ever exits, it should generally reparent itself to init so that
 * it is correctly cleaned up on exit.
 *
 * The various task state such as scheduling policy and priority may have
 * been inherited from a user process, so we reset them to sane values here.
 *
 * NOTE that reparent_to_init() gives the caller full capabilities.
 */
static void reparent_to_init(void)
{
	write_lock_irq(&tasklist_lock);

	ptrace_unlink(current);
	/* Reparent to init */
	REMOVE_LINKS(current);
	current->parent = child_reaper;
	current->real_parent = child_reaper;
	SET_LINKS(current);

	/* Set the exit signal to SIGCHLD so we signal init on exit */
	current->exit_signal = SIGCHLD;

	if ((current->policy == SCHED_NORMAL ||
			current->policy == SCHED_BATCH)
				&& (task_nice(current) < 0))
		set_user_nice(current, 0);
	/* cpus_allowed? */
	/* rt_priority? */
	/* signals? */
	security_task_reparent_to_init(current);
	memcpy(current->signal->rlim, init_task.signal->rlim,
	       sizeof(current->signal->rlim));
	atomic_inc(&(INIT_USER->__count));
	write_unlock_irq(&tasklist_lock);
	switch_uid(INIT_USER);
}

void __set_special_pids(pid_t session, pid_t pgrp)
{
	struct task_struct *curr = current->group_leader;

	if (curr->signal->session != session) {
		detach_pid(curr, PIDTYPE_SID);
		curr->signal->session = session;
		attach_pid(curr, PIDTYPE_SID, session);
	}
	if (process_group(curr) != pgrp) {
		detach_pid(curr, PIDTYPE_PGID);
		curr->signal->pgrp = pgrp;
		attach_pid(curr, PIDTYPE_PGID, pgrp);
	}
}

void set_special_pids(pid_t session, pid_t pgrp)
{
	write_lock_irq(&tasklist_lock);
	__set_special_pids(session, pgrp);
	write_unlock_irq(&tasklist_lock);
}

/*
 * Let kernel threads use this to say that they
 * allow a certain signal (since daemonize() will
 * have disabled all of them by default).
 */
int allow_signal(int sig)
{
	if (!valid_signal(sig) || sig < 1)
		return -EINVAL;

	spin_lock_irq(&current->sighand->siglock);
	sigdelset(&current->blocked, sig);
	if (!current->mm) {
		/* Kernel threads handle their own signals.
		   Let the signal code know it'll be handled, so
		   that they don't get converted to SIGKILL or
		   just silently dropped */
		current->sighand->action[(sig)-1].sa.sa_handler = (void __user *)2;
	}
	recalc_sigpending();
	spin_unlock_irq(&current->sighand->siglock);
	return 0;
}

EXPORT_SYMBOL(allow_signal);

int disallow_signal(int sig)
{
	if (!valid_signal(sig) || sig < 1)
		return -EINVAL;

	spin_lock_irq(&current->sighand->siglock);
	sigaddset(&current->blocked, sig);
	recalc_sigpending();
	spin_unlock_irq(&current->sighand->siglock);
	return 0;
}

EXPORT_SYMBOL(disallow_signal);

/*
 *	Put all the gunge required to become a kernel thread without
 *	attached user resources in one place where it belongs.
 */

void daemonize(const char *name, ...)
{
	va_list args;
	struct fs_struct *fs;
	sigset_t blocked;

	va_start(args, name);
	vsnprintf(current->comm, sizeof(current->comm), name, args);
	va_end(args);

	/*
	 * If we were started as result of loading a module, close all of the
	 * user space pages.  We don't need them, and if we didn't close them
	 * they would be locked into memory.
	 */
	exit_mm(current);

	set_special_pids(1, 1);
	mutex_lock(&tty_mutex);
	current->signal->tty = NULL;
	mutex_unlock(&tty_mutex);

	/* Block and flush all signals */
	sigfillset(&blocked);
	sigprocmask(SIG_BLOCK, &blocked, NULL);
	flush_signals(current);

	/* Become as one with the init task */

	exit_fs(current);	/* current->fs->count--; */
	fs = init_task.fs;
	current->fs = fs;
	atomic_inc(&fs->count);
Commit	Line	Data
1da177e4 LT	1	/*
	2	* linux/kernel/exit.c
	3	*
	4	* Copyright (C) 1991, 1992 Linus Torvalds
	5	*/
	6
	7	#include <linux/config.h>
	8	#include <linux/mm.h>
	9	#include <linux/slab.h>
	10	#include <linux/interrupt.h>
	11	#include <linux/smp_lock.h>
	12	#include <linux/module.h>
c59ede7b	13	#include <linux/capability.h>
1da177e4 LT	14	#include <linux/completion.h>
	15	#include <linux/personality.h>
	16	#include <linux/tty.h>
	17	#include <linux/namespace.h>
	18	#include <linux/key.h>
	19	#include <linux/security.h>
	20	#include <linux/cpu.h>
	21	#include <linux/acct.h>
	22	#include <linux/file.h>
	23	#include <linux/binfmts.h>
	24	#include <linux/ptrace.h>
	25	#include <linux/profile.h>
	26	#include <linux/mount.h>
	27	#include <linux/proc_fs.h>
	28	#include <linux/mempolicy.h>
	29	#include <linux/cpuset.h>
	30	#include <linux/syscalls.h>
7ed20e1a	31	#include <linux/signal.h>
9f46080c	32	#include <linux/cn_proc.h>
de5097c2	33	#include <linux/mutex.h>
0771dfef	34	#include <linux/futex.h>
1da177e4 LT	35
	36	#include <asm/uaccess.h>
	37	#include <asm/unistd.h>
	38	#include <asm/pgtable.h>
	39	#include <asm/mmu_context.h>
	40
	41	extern void sem_exit (void);
	42	extern struct task_struct *child_reaper;
	43
	44	int getrusage(struct task_struct , int, struct rusage __user );
	45
408b664a AB	46	static void exit_mm(struct task_struct * tsk);
408b664a AB	47
1da177e4 LT	48	static void __unhash_process(struct task_struct *p)
	49	{
	50	nr_threads--;
	51	detach_pid(p, PIDTYPE_PID);
	52	detach_pid(p, PIDTYPE_TGID);
	53	if (thread_group_leader(p)) {
	54	detach_pid(p, PIDTYPE_PGID);
	55	detach_pid(p, PIDTYPE_SID);
	56	if (p->pid)
	57	__get_cpu_var(process_counts)--;
	58	}
	59
	60	REMOVE_LINKS(p);
	61	}
	62
	63	void release_task(struct task_struct * p)
	64	{
	65	int zap_leader;
	66	task_t *leader;
	67	struct dentry *proc_dentry;
	68
	69	repeat:
	70	atomic_dec(&p->user->processes);
	71	spin_lock(&p->proc_lock);
	72	proc_dentry = proc_pid_unhash(p);
	73	write_lock_irq(&tasklist_lock);
	74	if (unlikely(p->ptrace))
	75	__ptrace_unlink(p);
	76	BUG_ON(!list_empty(&p->ptrace_list) \|\| !list_empty(&p->ptrace_children));
	77	__exit_signal(p);
71a2224d CL	78	/*
	79	* Note that the fastpath in sys_times depends on __exit_signal having
	80	* updated the counters before a task is removed from the tasklist of
	81	* the process by __unhash_process.
	82	*/
1da177e4 LT	83	__unhash_process(p);
	84
	85	/*
	86	* If we are the last non-leader member of the thread
	87	* group, and the leader is zombie, then notify the
	88	* group leader's parent process. (if it wants notification.)
	89	*/
	90	zap_leader = 0;
	91	leader = p->group_leader;
	92	if (leader != p && thread_group_empty(leader) && leader->exit_state == EXIT_ZOMBIE) {
	93	BUG_ON(leader->exit_signal == -1);
	94	do_notify_parent(leader, leader->exit_signal);
	95	/*
	96	* If we were the last child thread and the leader has
	97	* exited already, and the leader's parent ignores SIGCHLD,
	98	* then we are the one who should release the leader.
	99	*
	100	* do_notify_parent() will have marked it self-reaping in
	101	* that case.
	102	*/
	103	zap_leader = (leader->exit_signal == -1);
	104	}
	105
	106	sched_exit(p);
	107	write_unlock_irq(&tasklist_lock);
	108	spin_unlock(&p->proc_lock);
	109	proc_pid_flush(proc_dentry);
	110	release_thread(p);
	111	put_task_struct(p);
	112
	113	p = leader;
	114	if (unlikely(zap_leader))
	115	goto repeat;
	116	}
	117
	118	/* we are using it only for SMP init */
	119
	120	void unhash_process(struct task_struct *p)
	121	{
	122	struct dentry *proc_dentry;
	123
	124	spin_lock(&p->proc_lock);
	125	proc_dentry = proc_pid_unhash(p);
	126	write_lock_irq(&tasklist_lock);
	127	__unhash_process(p);
	128	write_unlock_irq(&tasklist_lock);
	129	spin_unlock(&p->proc_lock);
	130	proc_pid_flush(proc_dentry);
	131	}
	132
	133	/*
	134	* This checks not only the pgrp, but falls back on the pid if no
	135	* satisfactory pgrp is found. I dunno - gdb doesn't work correctly
	136	* without this...
	137	*/
	138	int session_of_pgrp(int pgrp)
	139	{
	140	struct task_struct *p;
	141	int sid = -1;
	142
	143	read_lock(&tasklist_lock);
	144	do_each_task_pid(pgrp, PIDTYPE_PGID, p) {
	145	if (p->signal->session > 0) {
	146	sid = p->signal->session;
147	goto out;
148	}
149	} while_each_task_pid(pgrp, PIDTYPE_PGID, p);
150	p = find_task_by_pid(pgrp);
151	if (p)
152	sid = p->signal->session;
153	out:
154	read_unlock(&tasklist_lock);
155
156	return sid;
157	}
158
159	/*
160	* Determine if a process group is "orphaned", according to the POSIX
161	* definition in 2.2.2.52. Orphaned process groups are not to be affected
162	* by terminal-generated stop signals. Newly orphaned process groups are
163	* to receive a SIGHUP and a SIGCONT.
164	*
165	* "I ask you, have you ever known what it is to be an orphan?"
166	*/
167	static int will_become_orphaned_pgrp(int pgrp, task_t *ignored_task)
168	{
169	struct task_struct *p;
170	int ret = 1;
171
172	do_each_task_pid(pgrp, PIDTYPE_PGID, p) {
173	if (p == ignored_task
174	\|\| p->exit_state
175	\|\| p->real_parent->pid == 1)
176	continue;
177	if (process_group(p->real_parent) != pgrp
178	&& p->real_parent->signal->session == p->signal->session) {
179	ret = 0;
180	break;
181	}
182	} while_each_task_pid(pgrp, PIDTYPE_PGID, p);
183	return ret; /* (sighing) "Often!" */
184	}
185
186	int is_orphaned_pgrp(int pgrp)
187	{
188	int retval;
189
190	read_lock(&tasklist_lock);
191	retval = will_become_orphaned_pgrp(pgrp, NULL);
192	read_unlock(&tasklist_lock);
193
194	return retval;
195	}
196
858119e1	197	static int has_stopped_jobs(int pgrp)
1da177e4 LT	198	{
	199	int retval = 0;
	200	struct task_struct *p;
	201
	202	do_each_task_pid(pgrp, PIDTYPE_PGID, p) {
	203	if (p->state != TASK_STOPPED)
	204	continue;
	205
	206	/* If p is stopped by a debugger on a signal that won't
	207	stop it, then don't count p as stopped. This isn't
	208	perfect but it's a good approximation. */
	209	if (unlikely (p->ptrace)
	210	&& p->exit_code != SIGSTOP
	211	&& p->exit_code != SIGTSTP
	212	&& p->exit_code != SIGTTOU
	213	&& p->exit_code != SIGTTIN)
	214	continue;
	215
	216	retval = 1;
	217	break;
	218	} while_each_task_pid(pgrp, PIDTYPE_PGID, p);
	219	return retval;
	220	}
	221
	222	/**
4dc3b16b	223	* reparent_to_init - Reparent the calling kernel thread to the init task.
1da177e4 LT	224	*
	225	* If a kernel thread is launched as a result of a system call, or if
	226	* it ever exits, it should generally reparent itself to init so that
	227	* it is correctly cleaned up on exit.
	228	*
	229	* The various task state such as scheduling policy and priority may have
	230	* been inherited from a user process, so we reset them to sane values here.
	231	*
	232	* NOTE that reparent_to_init() gives the caller full capabilities.
	233	*/
858119e1	234	static void reparent_to_init(void)
1da177e4 LT	235	{
	236	write_lock_irq(&tasklist_lock);
	237
	238	ptrace_unlink(current);
	239	/* Reparent to init */
	240	REMOVE_LINKS(current);
	241	current->parent = child_reaper;
	242	current->real_parent = child_reaper;
	243	SET_LINKS(current);
	244
	245	/* Set the exit signal to SIGCHLD so we signal init on exit */
	246	current->exit_signal = SIGCHLD;
	247
b0a9499c IM	248	if ((current->policy == SCHED_NORMAL \|\|
	249	current->policy == SCHED_BATCH)
	250	&& (task_nice(current) < 0))
1da177e4 LT	251	set_user_nice(current, 0);
	252	/* cpus_allowed? */
	253	/* rt_priority? */
	254	/* signals? */
	255	security_task_reparent_to_init(current);
	256	memcpy(current->signal->rlim, init_task.signal->rlim,
	257	sizeof(current->signal->rlim));
	258	atomic_inc(&(INIT_USER->__count));
	259	write_unlock_irq(&tasklist_lock);
	260	switch_uid(INIT_USER);
	261	}
	262
	263	void __set_special_pids(pid_t session, pid_t pgrp)
	264	{
e19f247a	265	struct task_struct *curr = current->group_leader;
1da177e4 LT	266
	267	if (curr->signal->session != session) {
	268	detach_pid(curr, PIDTYPE_SID);
	269	curr->signal->session = session;
	270	attach_pid(curr, PIDTYPE_SID, session);
	271	}
	272	if (process_group(curr) != pgrp) {
	273	detach_pid(curr, PIDTYPE_PGID);
	274	curr->signal->pgrp = pgrp;
	275	attach_pid(curr, PIDTYPE_PGID, pgrp);
	276	}
	277	}
	278
	279	void set_special_pids(pid_t session, pid_t pgrp)
	280	{
	281	write_lock_irq(&tasklist_lock);
	282	__set_special_pids(session, pgrp);
	283	write_unlock_irq(&tasklist_lock);
	284	}
	285
	286	/*
	287	* Let kernel threads use this to say that they
	288	* allow a certain signal (since daemonize() will
	289	* have disabled all of them by default).
	290	*/
	291	int allow_signal(int sig)
	292	{
7ed20e1a	293	if (!valid_signal(sig) \|\| sig < 1)
1da177e4 LT	294	return -EINVAL;
	295
	296	spin_lock_irq(&current->sighand->siglock);
	297	sigdelset(&current->blocked, sig);
	298	if (!current->mm) {
	299	/* Kernel threads handle their own signals.
	300	Let the signal code know it'll be handled, so
	301	that they don't get converted to SIGKILL or
	302	just silently dropped */
	303	current->sighand->action[(sig)-1].sa.sa_handler = (void __user *)2;
	304	}
	305	recalc_sigpending();
	306	spin_unlock_irq(&current->sighand->siglock);
	307	return 0;
	308	}
	309
	310	EXPORT_SYMBOL(allow_signal);
	311
	312	int disallow_signal(int sig)
	313	{
7ed20e1a	314	if (!valid_signal(sig) \|\| sig < 1)
1da177e4 LT	315	return -EINVAL;
	316
	317	spin_lock_irq(&current->sighand->siglock);
	318	sigaddset(&current->blocked, sig);
	319	recalc_sigpending();
	320	spin_unlock_irq(&current->sighand->siglock);
	321	return 0;
	322	}
	323
	324	EXPORT_SYMBOL(disallow_signal);
	325
	326	/*
	327	* Put all the gunge required to become a kernel thread without
	328	* attached user resources in one place where it belongs.
	329	*/
	330
	331	void daemonize(const char *name, ...)
	332	{
	333	va_list args;
	334	struct fs_struct *fs;
	335	sigset_t blocked;
	336
	337	va_start(args, name);
	338	vsnprintf(current->comm, sizeof(current->comm), name, args);
	339	va_end(args);
	340
	341	/*
	342	* If we were started as result of loading a module, close all of the
	343	* user space pages. We don't need them, and if we didn't close them
	344	* they would be locked into memory.
	345	*/
	346	exit_mm(current);
	347
	348	set_special_pids(1, 1);
70522e12	349	mutex_lock(&tty_mutex);
1da177e4	350	current->signal->tty = NULL;
70522e12	351	mutex_unlock(&tty_mutex);
1da177e4 LT	352
	353	/* Block and flush all signals */
	354	sigfillset(&blocked);
	355	sigprocmask(SIG_BLOCK, &blocked, NULL);
	356	flush_signals(current);
	357
	358	/* Become as one with the init task */
	359
	360	exit_fs(current); /* current->fs->count--; */
	361	fs = init_task.fs;
	362	current->fs = fs;
	363	atomic_inc(&fs->count);