Merge branch 'cpu_stop' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/misc...

author Ingo Molnar <mingo@elte.hu>

Sat, 8 May 2010 16:11:19 +0000 (18:11 +0200)

committer Ingo Molnar <mingo@elte.hu>

Sat, 8 May 2010 16:11:19 +0000 (18:11 +0200)
author Ingo Molnar <mingo@elte.hu>
Sat, 8 May 2010 16:11:19 +0000 (18:11 +0200)
committer Ingo Molnar <mingo@elte.hu>
Sat, 8 May 2010 16:11:19 +0000 (18:11 +0200)
diff --combined arch/s390/kernel/time.c

index d906bf19c14a2a309d7aca26d0903a5503987559,03d96569f187b33ac4d46cf9c462f7f6b0d239fc..a2163c95eb9845ffac908bf09b7af1bb5084cf3c
--- 1/arch/s390/kernel/time.c
--- 2/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@@ -221,7 -221,6 +221,7 @@@ void update_vsyscall(struct timespec *w
         vdso_data->xtime_clock_nsec = wall_time->tv_nsec;
         vdso_data->wtom_clock_sec = wall_to_monotonic.tv_sec;
         vdso_data->wtom_clock_nsec = wall_to_monotonic.tv_nsec;
+ +      vdso_data->ntp_mult = mult;
         smp_wmb();
         ++vdso_data->tb_update_count;
   }
@@@ -391,7 -390,6 +391,6 @@@ static void __init time_init_wq(void
         if (time_sync_wq)
                 return;
         time_sync_wq = create_singlethread_workqueue("timesync");
-       stop_machine_create();
   }
   
   /*
diff --combined kernel/sched.c

index 11ac0eb0bce7cfc9d67b4a06f15c50bfdfea6900,fbaf3128d010ddd8079344d4c219a9b37f4a3973..39aa9c7e22c05492e2a66d1379fef4c1b8f2494d
--- 1/kernel/sched.c
--- 2/kernel/sched.c
+++ b/kernel/sched.c
@@@ -55,9 -55,9 +55,9 @@@
   #include <linux/cpu.h>
   #include <linux/cpuset.h>
   #include <linux/percpu.h>
- #include <linux/kthread.h>
   #include <linux/proc_fs.h>
   #include <linux/seq_file.h>
+ #include <linux/stop_machine.h>
   #include <linux/sysctl.h>
   #include <linux/syscalls.h>
   #include <linux/times.h>
@@@ -539,15 -539,13 +539,13 @@@ struct rq 
         int post_schedule;
         int active_balance;
         int push_cpu;
+       struct cpu_stop_work active_balance_work;
         /* cpu of this runqueue: */
         int cpu;
         int online;
   
         unsigned long avg_load_per_task;
   
-       struct task_struct *migration_thread;
-       struct list_head migration_queue;
- 
         u64 rt_avg;
         u64 age_stamp;
         u64 idle_stamp;
@@@ -2037,21 -2035,18 +2035,18 @@@ void set_task_cpu(struct task_struct *p
         __set_task_cpu(p, new_cpu);
   }
   
- struct migration_req {
-       struct list_head list;
- 
+ struct migration_arg {
         struct task_struct *task;
         int dest_cpu;
- 
-       struct completion done;
   };
   
+ static int migration_cpu_stop(void *data);
+ 
   /*
    * The task's runqueue lock must be held.
    * Returns true if you have to wait for migration thread.
    */
- static int
- migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req)
+ static bool migrate_task(struct task_struct *p, int dest_cpu)
   {
         struct rq *rq = task_rq(p);
   
@@@ -2059,15 -2054,7 +2054,7 @@@
          * If the task is not on a runqueue (and not running), then
          * the next wake-up will properly place the task.
          */
-       if (!p->se.on_rq && !task_running(rq, p))
-               return 0;
- 
-       init_completion(&req->done);
-       req->task = p;
-       req->dest_cpu = dest_cpu;
-       list_add(&req->list, &rq->migration_queue);
- 
-       return 1;
+       return p->se.on_rq || task_running(rq, p);
   }
   
   /*
@@@ -2168,7 -2155,7 +2155,7 @@@ unsigned long wait_task_inactive(struc
                  * just go back and repeat.
                  */
                 rq = task_rq_lock(p, &flags);
- -              trace_sched_wait_task(rq, p);
+ +              trace_sched_wait_task(p);
                 running = task_running(rq, p);
                 on_rq = p->se.on_rq;
                 ncsw = 0;
@@@ -2439,7 -2426,7 +2426,7 @@@ out_activate
         success = 1;
   
   out_running:
- -      trace_sched_wakeup(rq, p, success);
+ +      trace_sched_wakeup(p, success);
         check_preempt_curr(rq, p, wake_flags);
   
         p->state = TASK_RUNNING;
@@@ -2613,7 -2600,7 +2600,7 @@@ void wake_up_new_task(struct task_struc
   
         rq = task_rq_lock(p, &flags);
         activate_task(rq, p, 0);
- -      trace_sched_wakeup_new(rq, p, 1);
+ +      trace_sched_wakeup_new(p, 1);
         check_preempt_curr(rq, p, WF_FORK);
   #ifdef CONFIG_SMP
         if (p->sched_class->task_woken)
@@@ -2833,7 -2820,7 +2820,7 @@@ context_switch(struct rq *rq, struct ta
         struct mm_struct *mm, *oldmm;
   
         prepare_task_switch(rq, prev, next);
- -      trace_sched_switch(rq, prev, next);
+ +      trace_sched_switch(prev, next);
         mm = next->mm;
         oldmm = prev->active_mm;
         /*
@@@ -3110,7 -3097,6 +3097,6 @@@ static void update_cpu_load(struct rq *
   void sched_exec(void)
   {
         struct task_struct *p = current;
-       struct migration_req req;
         unsigned long flags;
         struct rq *rq;
         int dest_cpu;
@@@ -3124,17 -3110,11 +3110,11 @@@
          * select_task_rq() can race against ->cpus_allowed
          */
         if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed) &&
-           likely(cpu_active(dest_cpu)) &&
-           migrate_task(p, dest_cpu, &req)) {
-               /* Need to wait for migration thread (might exit: take ref). */
-               struct task_struct *mt = rq->migration_thread;
+           likely(cpu_active(dest_cpu)) && migrate_task(p, dest_cpu)) {
+               struct migration_arg arg = { p, dest_cpu };
   
-               get_task_struct(mt);
                 task_rq_unlock(rq, &flags);
-               wake_up_process(mt);
-               put_task_struct(mt);
-               wait_for_completion(&req.done);
- 
+               stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
                 return;
         }
   unlock:
@@@ -5290,17 -5270,15 +5270,15 @@@ static inline void sched_init_granulari
   /*
    * This is how migration works:
    *
-  * 1) we queue a struct migration_req structure in the source CPU's
-  *    runqueue and wake up that CPU's migration thread.
-  * 2) we down() the locked semaphore => thread blocks.
-  * 3) migration thread wakes up (implicitly it forces the migrated
-  *    thread off the CPU)
-  * 4) it gets the migration request and checks whether the migrated
-  *    task is still in the wrong runqueue.
-  * 5) if it's in the wrong runqueue then the migration thread removes
+  * 1) we invoke migration_cpu_stop() on the target CPU using
+  *    stop_one_cpu().
+  * 2) stopper starts to run (implicitly forcing the migrated thread
+  *    off the CPU)
+  * 3) it checks whether the migrated task is still in the wrong runqueue.
+  * 4) if it's in the wrong runqueue then the migration thread removes
    *    it and puts it into the right queue.
-  * 6) migration thread up()s the semaphore.
-  * 7) we wake up and the migration is done.
+  * 5) stopper completes and stop_one_cpu() returns and the migration
+  *    is done.
    */
   
   /*
@@@ -5314,9 -5292,9 +5292,9 @@@
    */
   int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
   {
-       struct migration_req req;
         unsigned long flags;
         struct rq *rq;
+       unsigned int dest_cpu;
         int ret = 0;
   
         /*
@@@ -5354,15 -5332,12 +5332,12 @@@ again
         if (cpumask_test_cpu(task_cpu(p), new_mask))
                 goto out;
   
-       if (migrate_task(p, cpumask_any_and(cpu_active_mask, new_mask), &req)) {
+       dest_cpu = cpumask_any_and(cpu_active_mask, new_mask);
+       if (migrate_task(p, dest_cpu)) {
+               struct migration_arg arg = { p, dest_cpu };
                 /* Need help from migration thread: drop lock and wait. */
-               struct task_struct *mt = rq->migration_thread;
- 
-               get_task_struct(mt);
                 task_rq_unlock(rq, &flags);
-               wake_up_process(mt);
-               put_task_struct(mt);
-               wait_for_completion(&req.done);
+               stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
                 tlb_migrate_finish(p->mm);
                 return 0;
         }
@@@ -5420,70 -5395,22 +5395,22 @@@ fail
         return ret;
   }
   
- #define RCU_MIGRATION_IDLE    0
- #define RCU_MIGRATION_NEED_QS 1
- #define RCU_MIGRATION_GOT_QS  2
- #define RCU_MIGRATION_MUST_SYNC       3
- 
   /*
-  * migration_thread - this is a highprio system thread that performs
-  * thread migration by bumping thread off CPU then 'pushing' onto
-  * another runqueue.
+  * migration_cpu_stop - this will be executed by a highprio stopper thread
+  * and performs thread migration by bumping thread off CPU then
+  * 'pushing' onto another runqueue.
    */
- static int migration_thread(void *data)
+ static int migration_cpu_stop(void *data)
   {
-       int badcpu;
-       int cpu = (long)data;
-       struct rq *rq;
- 
-       rq = cpu_rq(cpu);
-       BUG_ON(rq->migration_thread != current);
- 
-       set_current_state(TASK_INTERRUPTIBLE);
-       while (!kthread_should_stop()) {
-               struct migration_req *req;
-               struct list_head *head;
- 
-               raw_spin_lock_irq(&rq->lock);
- 
-               if (cpu_is_offline(cpu)) {
-                       raw_spin_unlock_irq(&rq->lock);
-                       break;
-               }
- 
-               if (rq->active_balance) {
-                       active_load_balance(rq, cpu);
-                       rq->active_balance = 0;
-               }
- 
-               head = &rq->migration_queue;
- 
-               if (list_empty(head)) {
-                       raw_spin_unlock_irq(&rq->lock);
-                       schedule();
-                       set_current_state(TASK_INTERRUPTIBLE);
-                       continue;
-               }
-               req = list_entry(head->next, struct migration_req, list);
-               list_del_init(head->next);
- 
-               if (req->task != NULL) {
-                       raw_spin_unlock(&rq->lock);
-                       __migrate_task(req->task, cpu, req->dest_cpu);
-               } else if (likely(cpu == (badcpu = smp_processor_id()))) {
-                       req->dest_cpu = RCU_MIGRATION_GOT_QS;
-                       raw_spin_unlock(&rq->lock);
-               } else {
-                       req->dest_cpu = RCU_MIGRATION_MUST_SYNC;
-                       raw_spin_unlock(&rq->lock);
-                       WARN_ONCE(1, "migration_thread() on CPU %d, expected %d\n", badcpu, cpu);
-               }
-               local_irq_enable();
- 
-               complete(&req->done);
-       }
-       __set_current_state(TASK_RUNNING);
+       struct migration_arg *arg = data;
   
+       /*
+        * The original target cpu might have gone down and we might
+        * be on another cpu but it doesn't matter.
+        */
+       local_irq_disable();
+       __migrate_task(arg->task, raw_smp_processor_id(), arg->dest_cpu);
+       local_irq_enable();
         return 0;
   }
   
@@@ -5850,35 -5777,20 +5777,20 @@@ static void set_rq_offline(struct rq *r
   static int __cpuinit
   migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
   {
-       struct task_struct *p;
         int cpu = (long)hcpu;
         unsigned long flags;
-       struct rq *rq;
+       struct rq *rq = cpu_rq(cpu);
   
         switch (action) {
   
         case CPU_UP_PREPARE:
         case CPU_UP_PREPARE_FROZEN:
-               p = kthread_create(migration_thread, hcpu, "migration/%d", cpu);
-               if (IS_ERR(p))
-                       return NOTIFY_BAD;
-               kthread_bind(p, cpu);
-               /* Must be high prio: stop_machine expects to yield to it. */
-               rq = task_rq_lock(p, &flags);
-               __setscheduler(rq, p, SCHED_FIFO, MAX_RT_PRIO-1);
-               task_rq_unlock(rq, &flags);
-               get_task_struct(p);
-               cpu_rq(cpu)->migration_thread = p;
                 rq->calc_load_update = calc_load_update;
                 break;
   
         case CPU_ONLINE:
         case CPU_ONLINE_FROZEN:
-               /* Strictly unnecessary, as first user will wake it. */
-               wake_up_process(cpu_rq(cpu)->migration_thread);
- 
                 /* Update our root-domain */
-               rq = cpu_rq(cpu);
                 raw_spin_lock_irqsave(&rq->lock, flags);
                 if (rq->rd) {
                         BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
@@@ -5889,25 -5801,9 +5801,9 @@@
                 break;
   
   #ifdef CONFIG_HOTPLUG_CPU
-       case CPU_UP_CANCELED:
-       case CPU_UP_CANCELED_FROZEN:
-               if (!cpu_rq(cpu)->migration_thread)
-                       break;
-               /* Unbind it from offline cpu so it can run. Fall thru. */
-               kthread_bind(cpu_rq(cpu)->migration_thread,
-                            cpumask_any(cpu_online_mask));
-               kthread_stop(cpu_rq(cpu)->migration_thread);
-               put_task_struct(cpu_rq(cpu)->migration_thread);
-               cpu_rq(cpu)->migration_thread = NULL;
-               break;
- 
         case CPU_DEAD:
         case CPU_DEAD_FROZEN:
                 migrate_live_tasks(cpu);
-               rq = cpu_rq(cpu);
-               kthread_stop(rq->migration_thread);
-               put_task_struct(rq->migration_thread);
-               rq->migration_thread = NULL;
                 /* Idle task back to normal (off runqueue, low prio) */
                 raw_spin_lock_irq(&rq->lock);
                 deactivate_task(rq, rq->idle, 0);
@@@ -5918,29 -5814,11 +5814,11 @@@
                 migrate_nr_uninterruptible(rq);
                 BUG_ON(rq->nr_running != 0);
                 calc_global_load_remove(rq);
-               /*
-                * No need to migrate the tasks: it was best-effort if
-                * they didn't take sched_hotcpu_mutex. Just wake up
-                * the requestors.
-                */
-               raw_spin_lock_irq(&rq->lock);
-               while (!list_empty(&rq->migration_queue)) {
-                       struct migration_req *req;
- 
-                       req = list_entry(rq->migration_queue.next,
-                                        struct migration_req, list);
-                       list_del_init(&req->list);
-                       raw_spin_unlock_irq(&rq->lock);
-                       complete(&req->done);
-                       raw_spin_lock_irq(&rq->lock);
-               }
-               raw_spin_unlock_irq(&rq->lock);
                 break;
   
         case CPU_DYING:
         case CPU_DYING_FROZEN:
                 /* Update our root-domain */
-               rq = cpu_rq(cpu);
                 raw_spin_lock_irqsave(&rq->lock, flags);
                 if (rq->rd) {
                         BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
@@@ -7757,10 -7635,8 +7635,8 @@@ void __init sched_init(void
                 rq->push_cpu = 0;
                 rq->cpu = i;
                 rq->online = 0;
-               rq->migration_thread = NULL;
                 rq->idle_stamp = 0;
                 rq->avg_idle = 2*sysctl_sched_migration_cost;
-               INIT_LIST_HEAD(&rq->migration_queue);
                 rq_attach_root(rq, &def_root_domain);
   #endif
                 init_rq_hrtick(rq);
@@@ -9054,43 -8930,32 +8930,32 @@@ struct cgroup_subsys cpuacct_subsys = 
   
   #ifndef CONFIG_SMP
   
- int rcu_expedited_torture_stats(char *page)
- {
-       return 0;
- }
- EXPORT_SYMBOL_GPL(rcu_expedited_torture_stats);
- 
   void synchronize_sched_expedited(void)
   {
+       barrier();
   }
   EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
   
   #else /* #ifndef CONFIG_SMP */
   
- static DEFINE_PER_CPU(struct migration_req, rcu_migration_req);
- static DEFINE_MUTEX(rcu_sched_expedited_mutex);
- 
- #define RCU_EXPEDITED_STATE_POST -2
- #define RCU_EXPEDITED_STATE_IDLE -1
- 
- static int rcu_expedited_state = RCU_EXPEDITED_STATE_IDLE;
+ static atomic_t synchronize_sched_expedited_count = ATOMIC_INIT(0);
   
- int rcu_expedited_torture_stats(char *page)
+ static int synchronize_sched_expedited_cpu_stop(void *data)
   {
-       int cnt = 0;
-       int cpu;
- 
-       cnt += sprintf(&page[cnt], "state: %d /", rcu_expedited_state);
-       for_each_online_cpu(cpu) {
-                cnt += sprintf(&page[cnt], " %d:%d",
-                               cpu, per_cpu(rcu_migration_req, cpu).dest_cpu);
-       }
-       cnt += sprintf(&page[cnt], "\n");
-       return cnt;
+       /*
+        * There must be a full memory barrier on each affected CPU
+        * between the time that try_stop_cpus() is called and the
+        * time that it returns.
+        *
+        * In the current initial implementation of cpu_stop, the
+        * above condition is already met when the control reaches
+        * this point and the following smp_mb() is not strictly
+        * necessary.  Do smp_mb() anyway for documentation and
+        * robustness against future implementation changes.
+        */
+       smp_mb(); /* See above comment block. */
+       return 0;
   }
- EXPORT_SYMBOL_GPL(rcu_expedited_torture_stats);
- 
- static long synchronize_sched_expedited_count;
   
   /*
    * Wait for an rcu-sched grace period to elapse, but use "big hammer"
@@@ -9104,18 -8969,14 +8969,14 @@@
    */
   void synchronize_sched_expedited(void)
   {
-       int cpu;
-       unsigned long flags;
-       bool need_full_sync = 0;
-       struct rq *rq;
-       struct migration_req *req;
-       long snap;
-       int trycount = 0;
+       int snap, trycount = 0;
   
         smp_mb();  /* ensure prior mod happens before capturing snap. */
-       snap = ACCESS_ONCE(synchronize_sched_expedited_count) + 1;
+       snap = atomic_read(&synchronize_sched_expedited_count) + 1;
         get_online_cpus();
-       while (!mutex_trylock(&rcu_sched_expedited_mutex)) {
+       while (try_stop_cpus(cpu_online_mask,
+                            synchronize_sched_expedited_cpu_stop,
+                            NULL) == -EAGAIN) {
                 put_online_cpus();
                 if (trycount++ < 10)
                         udelay(trycount * num_online_cpus());
@@@ -9123,41 -8984,15 +8984,15 @@@
                         synchronize_sched();
                         return;
                 }
-               if (ACCESS_ONCE(synchronize_sched_expedited_count) - snap > 0) {
+               if (atomic_read(&synchronize_sched_expedited_count) - snap > 0) {
                         smp_mb(); /* ensure test happens before caller kfree */
                         return;
                 }
                 get_online_cpus();
         }
-       rcu_expedited_state = RCU_EXPEDITED_STATE_POST;
-       for_each_online_cpu(cpu) {
-               rq = cpu_rq(cpu);
-               req = &per_cpu(rcu_migration_req, cpu);
-               init_completion(&req->done);
-               req->task = NULL;
-               req->dest_cpu = RCU_MIGRATION_NEED_QS;
-               raw_spin_lock_irqsave(&rq->lock, flags);
-               list_add(&req->list, &rq->migration_queue);
-               raw_spin_unlock_irqrestore(&rq->lock, flags);
-               wake_up_process(rq->migration_thread);
-       }
-       for_each_online_cpu(cpu) {
-               rcu_expedited_state = cpu;
-               req = &per_cpu(rcu_migration_req, cpu);
-               rq = cpu_rq(cpu);
-               wait_for_completion(&req->done);
-               raw_spin_lock_irqsave(&rq->lock, flags);
-               if (unlikely(req->dest_cpu == RCU_MIGRATION_MUST_SYNC))
-                       need_full_sync = 1;
-               req->dest_cpu = RCU_MIGRATION_IDLE;
-               raw_spin_unlock_irqrestore(&rq->lock, flags);
-       }
-       rcu_expedited_state = RCU_EXPEDITED_STATE_IDLE;
-       synchronize_sched_expedited_count++;
-       mutex_unlock(&rcu_sched_expedited_mutex);
+       atomic_inc(&synchronize_sched_expedited_count);
+       smp_mb__after_atomic_inc(); /* ensure post-GP actions seen after GP. */
         put_online_cpus();
-       if (need_full_sync)
-               synchronize_sched();
   }
   EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
author	Ingo Molnar <mingo@elte.hu>
	Sat, 8 May 2010 16:11:19 +0000 (18:11 +0200)
committer	Ingo Molnar <mingo@elte.hu>
	Sat, 8 May 2010 16:11:19 +0000 (18:11 +0200)
		1	2
arch/s390/kernel/time.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/sched.c	patch \|	diff1 \|	diff2 \|	blob \| history