sched/hotplug: Consolidate task migration on CPU unplug

[linux-2.6-block.git] / kernel / sched / core.c
diff --git a/kernel/sched/core.c b/kernel/sched/core.c

index e1093c443ff9c1d2ec5bed01232616d375920482..6c89806c834b587966b5135e1296a415df15c201 100644 (file)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6741,120 +6741,6 @@ void idle_task_exit(void)
         /* finish_cpu(), as ran on the BP, will clean up the active_mm state */
  }
  
-/*
- * Since this CPU is going 'away' for a while, fold any nr_active delta
- * we might have. Assumes we're called after migrate_tasks() so that the
- * nr_active count is stable. We need to take the teardown thread which
- * is calling this into account, so we hand in adjust = 1 to the load
- * calculation.
- *
- * Also see the comment "Global load-average calculations".
- */
-static void calc_load_migrate(struct rq *rq)
-{
-       long delta = calc_load_fold_active(rq, 1);
-       if (delta)
-               atomic_long_add(delta, &calc_load_tasks);
-}
-
-static struct task_struct *__pick_migrate_task(struct rq *rq)
-{
-       const struct sched_class *class;
-       struct task_struct *next;
-
-       for_each_class(class) {
-               next = class->pick_next_task(rq);
-               if (next) {
-                       next->sched_class->put_prev_task(rq, next);
-                       return next;
-               }
-       }
-
-       /* The idle class should always have a runnable task */
-       BUG();
-}
-
-/*
- * Migrate all tasks from the rq, sleeping tasks will be migrated by
- * try_to_wake_up()->select_task_rq().
- *
- * Called with rq->lock held even though we'er in stop_machine() and
- * there's no concurrency possible, we hold the required locks anyway
- * because of lock validation efforts.
- */
-static void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf)
-{
-       struct rq *rq = dead_rq;
-       struct task_struct *next, *stop = rq->stop;
-       struct rq_flags orf = *rf;
-       int dest_cpu;
-
-       /*
-        * Fudge the rq selection such that the below task selection loop
-        * doesn't get stuck on the currently eligible stop task.
-        *
-        * We're currently inside stop_machine() and the rq is either stuck
-        * in the stop_machine_cpu_stop() loop, or we're executing this code,
-        * either way we should never end up calling schedule() until we're
-        * done here.
-        */
-       rq->stop = NULL;
-
-       /*
-        * put_prev_task() and pick_next_task() sched
-        * class method both need to have an up-to-date
-        * value of rq->clock[_task]
-        */
-       update_rq_clock(rq);
-
-       for (;;) {
-               /*
-                * There's this thread running, bail when that's the only
-                * remaining thread:
-                */
-               if (rq->nr_running == 1)
-                       break;
-
-               next = __pick_migrate_task(rq);
-
-               /*
-                * Rules for changing task_struct::cpus_mask are holding
-                * both pi_lock and rq->lock, such that holding either
-                * stabilizes the mask.
-                *
-                * Drop rq->lock is not quite as disastrous as it usually is
-                * because !cpu_active at this point, which means load-balance
-                * will not interfere. Also, stop-machine.
-                */
-               rq_unlock(rq, rf);
-               raw_spin_lock(&next->pi_lock);
-               rq_relock(rq, rf);
-
-               /*
-                * Since we're inside stop-machine, _nothing_ should have
-                * changed the task, WARN if weird stuff happened, because in
-                * that case the above rq->lock drop is a fail too.
-                */
-               if (WARN_ON(task_rq(next) != rq || !task_on_rq_queued(next))) {
-                       raw_spin_unlock(&next->pi_lock);
-                       continue;
-               }
-
-               /* Find suitable destination for @next, with force if needed. */
-               dest_cpu = select_fallback_rq(dead_rq->cpu, next);
-               rq = __migrate_task(rq, rf, next, dest_cpu);
-               if (rq != dead_rq) {
-                       rq_unlock(rq, rf);
-                       rq = dead_rq;
-                       *rf = orf;
-                       rq_relock(rq, rf);
-               }
-               raw_spin_unlock(&next->pi_lock);
-       }
-
-       rq->stop = stop;
-}
-
  static int __balance_push_cpu_stop(void *arg)
  {
         struct task_struct *p = arg;
@@ -7123,10 +7009,6 @@ int sched_cpu_deactivate(unsigned int cpu)
                 return ret;
         }
         sched_domains_numa_masks_clear(cpu);
-
-       /* Wait for all non per CPU kernel threads to vanish. */
-       balance_hotplug_wait();
-
         return 0;
  }
  
@@ -7146,6 +7028,41 @@ int sched_cpu_starting(unsigned int cpu)
  }
  
  #ifdef CONFIG_HOTPLUG_CPU
+
+/*
+ * Invoked immediately before the stopper thread is invoked to bring the
+ * CPU down completely. At this point all per CPU kthreads except the
+ * hotplug thread (current) and the stopper thread (inactive) have been
+ * either parked or have been unbound from the outgoing CPU. Ensure that
+ * any of those which might be on the way out are gone.
+ *
+ * If after this point a bound task is being woken on this CPU then the
+ * responsible hotplug callback has failed to do it's job.
+ * sched_cpu_dying() will catch it with the appropriate fireworks.
+ */
+int sched_cpu_wait_empty(unsigned int cpu)
+{
+       balance_hotplug_wait();
+       return 0;
+}
+
+/*
+ * Since this CPU is going 'away' for a while, fold any nr_active delta we
+ * might have. Called from the CPU stopper task after ensuring that the
+ * stopper is the last running task on the CPU, so nr_active count is
+ * stable. We need to take the teardown thread which is calling this into
+ * account, so we hand in adjust = 1 to the load calculation.
+ *
+ * Also see the comment "Global load-average calculations".
+ */
+static void calc_load_migrate(struct rq *rq)
+{
+       long delta = calc_load_fold_active(rq, 1);
+
+       if (delta)
+               atomic_long_add(delta, &calc_load_tasks);
+}
+
  int sched_cpu_dying(unsigned int cpu)
  {
         struct rq *rq = cpu_rq(cpu);
@@ -7159,7 +7076,6 @@ int sched_cpu_dying(unsigned int cpu)
                 BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
                 set_rq_offline(rq);
         }
-       migrate_tasks(rq, &rf);
         BUG_ON(rq->nr_running != 1);
         rq_unlock_irqrestore(rq, &rf);