sched: Fix ancient race in do_exit()
[linux-2.6-block.git] / kernel / exit.c
index c44738267be770118b64203eb3cbd411d9656686..4b4042f9bc6ade78a14199eb4ce96dc4ce6236b3 100644 (file)
@@ -964,8 +964,7 @@ void do_exit(long code)
        acct_collect(code, group_dead);
        if (group_dead)
                tty_audit_exit();
-       if (unlikely(tsk->audit_context))
-               audit_free(tsk);
+       audit_free(tsk);
 
        tsk->exit_code = code;
        taskstats_exit(tsk, group_dead);
@@ -1039,6 +1038,22 @@ void do_exit(long code)
        if (tsk->nr_dirtied)
                __this_cpu_add(dirty_throttle_leaks, tsk->nr_dirtied);
        exit_rcu();
+
+       /*
+        * The setting of TASK_RUNNING by try_to_wake_up() may be delayed
+        * when the following two conditions become true.
+        *   - There is race condition of mmap_sem (It is acquired by
+        *     exit_mm()), and
+        *   - SMI occurs before setting TASK_RUNINNG.
+        *     (or hypervisor of virtual machine switches to other guest)
+        *  As a result, we may become TASK_RUNNING after becoming TASK_DEAD
+        *
+        * To avoid it, we have to wait for releasing tsk->pi_lock which
+        * is held by try_to_wake_up()
+        */
+       smp_mb();
+       raw_spin_unlock_wait(&tsk->pi_lock);
+
        /* causes final put_task_struct in finish_task_switch(). */
        tsk->state = TASK_DEAD;
        tsk->flags |= PF_NOFREEZE;      /* tell freezer to ignore us */