smp/hotplug: Rewrite AP state machine core
authorPeter Zijlstra <peterz@infradead.org>
Wed, 20 Sep 2017 17:00:17 +0000 (19:00 +0200)
committerThomas Gleixner <tglx@linutronix.de>
Mon, 25 Sep 2017 20:11:42 +0000 (22:11 +0200)
There is currently no explicit state change on rollback. That is,
st->bringup, st->rollback and st->target are not consistent when doing
the rollback.

Rework the AP state handling to be more coherent. This does mean we
have to do a second AP kick-and-wait for rollback, but since rollback
is the slow path of a slowpath, this really should not matter.

Take this opportunity to simplify the AP thread function to only run a
single callback per invocation. This unifies the three single/up/down
modes is supports. The looping it used to do for up/down are achieved
by retaining should_run and relying on the main smpboot_thread_fn()
loop.

(I have most of a patch that does the same for the BP state handling,
but that's not critical and gets a little complicated because
CPUHP_BRINGUP_CPU does the AP handoff from a callback, which gets
recursive @st usage, I still have de-fugly that.)

[ tglx: Move cpuhp_down_callbacks() et al. into the HOTPLUG_CPU section to
   avoid gcc complaining about unused functions. Make the HOTPLUG_CPU
   one piece instead of having two consecutive ifdef sections of the
   same type. ]

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: bigeasy@linutronix.de
Cc: efault@gmx.de
Cc: rostedt@goodmis.org
Cc: max.byungchul.park@gmail.com
Link: https://lkml.kernel.org/r/20170920170546.769658088@infradead.org
kernel/cpu.c

index 323b71050b54b9a6761b977cee3651e186ce78f3..1139063de5afbe3a1459f4fa91751911bb2a7177 100644 (file)
@@ -58,6 +58,7 @@ struct cpuhp_cpu_state {
        bool                    single;
        bool                    bringup;
        struct hlist_node       *node;
+       struct hlist_node       *last;
        enum cpuhp_state        cb_state;
        int                     result;
        struct completion       done;
@@ -112,6 +113,14 @@ static bool cpuhp_is_ap_state(enum cpuhp_state state)
        return state > CPUHP_BRINGUP_CPU && state != CPUHP_TEARDOWN_CPU;
 }
 
+/*
+ * The former STARTING/DYING states, ran with IRQs disabled and must not fail.
+ */
+static bool cpuhp_is_atomic_state(enum cpuhp_state state)
+{
+       return CPUHP_AP_IDLE_DEAD <= state && state < CPUHP_AP_ONLINE;
+}
+
 static struct cpuhp_step *cpuhp_get_step(enum cpuhp_state state)
 {
        struct cpuhp_step *sp;
@@ -286,7 +295,72 @@ void cpu_hotplug_enable(void)
 EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
 #endif /* CONFIG_HOTPLUG_CPU */
 
-static void __cpuhp_kick_ap_work(struct cpuhp_cpu_state *st);
+static inline enum cpuhp_state
+cpuhp_set_state(struct cpuhp_cpu_state *st, enum cpuhp_state target)
+{
+       enum cpuhp_state prev_state = st->state;
+
+       st->rollback = false;
+       st->last = NULL;
+
+       st->target = target;
+       st->single = false;
+       st->bringup = st->state < target;
+
+       return prev_state;
+}
+
+static inline void
+cpuhp_reset_state(struct cpuhp_cpu_state *st, enum cpuhp_state prev_state)
+{
+       st->rollback = true;
+
+       /*
+        * If we have st->last we need to undo partial multi_instance of this
+        * state first. Otherwise start undo at the previous state.
+        */
+       if (!st->last) {
+               if (st->bringup)
+                       st->state--;
+               else
+                       st->state++;
+       }
+
+       st->target = prev_state;
+       st->bringup = !st->bringup;
+}
+
+/* Regular hotplug invocation of the AP hotplug thread */
+static void __cpuhp_kick_ap(struct cpuhp_cpu_state *st)
+{
+       if (!st->single && st->state == st->target)
+               return;
+
+       st->result = 0;
+       /*
+        * Make sure the above stores are visible before should_run becomes
+        * true. Paired with the mb() above in cpuhp_thread_fun()
+        */
+       smp_mb();
+       st->should_run = true;
+       wake_up_process(st->thread);
+       wait_for_completion(&st->done);
+}
+
+static int cpuhp_kick_ap(struct cpuhp_cpu_state *st, enum cpuhp_state target)
+{
+       enum cpuhp_state prev_state;
+       int ret;
+
+       prev_state = cpuhp_set_state(st, target);
+       __cpuhp_kick_ap(st);
+       if ((ret = st->result)) {
+               cpuhp_reset_state(st, prev_state);
+               __cpuhp_kick_ap(st);
+       }
+
+       return ret;
+}
 
 static int bringup_wait_for_ap(unsigned int cpu)
 {
@@ -301,12 +375,10 @@ static int bringup_wait_for_ap(unsigned int cpu)
        stop_machine_unpark(cpu);
        kthread_unpark(st->thread);
 
-       /* Should we go further up ? */
-       if (st->target > CPUHP_AP_ONLINE_IDLE) {
-               __cpuhp_kick_ap_work(st);
-               wait_for_completion(&st->done);
-       }
-       return st->result;
+       if (st->target <= CPUHP_AP_ONLINE_IDLE)
+               return 0;
+
+       return cpuhp_kick_ap(st, st->target);
 }
 
 static int bringup_cpu(unsigned int cpu)
@@ -332,32 +404,6 @@ static int bringup_cpu(unsigned int cpu)
 /*
  * Hotplug state machine related functions
  */
-static void undo_cpu_down(unsigned int cpu, struct cpuhp_cpu_state *st)
-{
-       for (st->state++; st->state < st->target; st->state++) {
-               struct cpuhp_step *step = cpuhp_get_step(st->state);
-
-               if (!step->skip_onerr)
-                       cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
-       }
-}
-
-static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
-                               enum cpuhp_state target)
-{
-       enum cpuhp_state prev_state = st->state;
-       int ret = 0;
-
-       for (; st->state > target; st->state--) {
-               ret = cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
-               if (ret) {
-                       st->target = prev_state;
-                       undo_cpu_down(cpu, st);
-                       break;
-               }
-       }
-       return ret;
-}
 
 static void undo_cpu_up(unsigned int cpu, struct cpuhp_cpu_state *st)
 {
@@ -404,71 +450,90 @@ static int cpuhp_should_run(unsigned int cpu)
        return st->should_run;
 }
 
-/* Execute the teardown callbacks. Used to be CPU_DOWN_PREPARE */
-static int cpuhp_ap_offline(unsigned int cpu, struct cpuhp_cpu_state *st)
-{
-       enum cpuhp_state target = max((int)st->target, CPUHP_TEARDOWN_CPU);
-
-       return cpuhp_down_callbacks(cpu, st, target);
-}
-
-/* Execute the online startup callbacks. Used to be CPU_ONLINE */
-static int cpuhp_ap_online(unsigned int cpu, struct cpuhp_cpu_state *st)
-{
-       return cpuhp_up_callbacks(cpu, st, st->target);
-}
-
 /*
  * Execute teardown/startup callbacks on the plugged cpu. Also used to invoke
  * callbacks when a state gets [un]installed at runtime.
+ *
+ * Each invocation of this function by the smpboot thread does a single AP
+ * state callback.
+ *
+ * It has 3 modes of operation:
+ *  - single: runs st->cb_state
+ *  - up:     runs ++st->state, while st->state < st->target
+ *  - down:   runs st->state--, while st->state > st->target
+ *
+ * When complete or on error, should_run is cleared and the completion is fired.
  */
 static void cpuhp_thread_fun(unsigned int cpu)
 {
        struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
-       int ret = 0;
+       bool bringup = st->bringup;
+       enum cpuhp_state state;
 
        /*
-        * Paired with the mb() in cpuhp_kick_ap_work and
-        * cpuhp_invoke_ap_callback, so the work set is consistent visible.
+        * ACQUIRE for the cpuhp_should_run() load of ->should_run. Ensures
+        * that if we see ->should_run we also see the rest of the state.
         */
        smp_mb();
-       if (!st->should_run)
-               return;
 
-       st->should_run = false;
+       if (WARN_ON_ONCE(!st->should_run))
+               return;
 
        lock_map_acquire(&cpuhp_state_lock_map);
-       /* Single callback invocation for [un]install ? */
+
        if (st->single) {
-               if (st->cb_state < CPUHP_AP_ONLINE) {
-                       local_irq_disable();
-                       ret = cpuhp_invoke_callback(cpu, st->cb_state,
-                                                   st->bringup, st->node,
-                                                   NULL);
-                       local_irq_enable();
+               state = st->cb_state;
+               st->should_run = false;
+       } else {
+               if (bringup) {
+                       st->state++;
+                       state = st->state;
+                       st->should_run = (st->state < st->target);
+                       WARN_ON_ONCE(st->state > st->target);
                } else {
-                       ret = cpuhp_invoke_callback(cpu, st->cb_state,
-                                                   st->bringup, st->node,
-                                                   NULL);
+                       state = st->state;
+                       st->state--;
+                       st->should_run = (st->state > st->target);
+                       WARN_ON_ONCE(st->state < st->target);
                }
-       } else if (st->rollback) {
-               BUG_ON(st->state < CPUHP_AP_ONLINE_IDLE);
+       }
+
+       WARN_ON_ONCE(!cpuhp_is_ap_state(state));
+
+       if (st->rollback) {
+               struct cpuhp_step *step = cpuhp_get_step(state);
+               if (step->skip_onerr)
+                       goto next;
+       }
+
+       if (cpuhp_is_atomic_state(state)) {
+               local_irq_disable();
+               st->result = cpuhp_invoke_callback(cpu, state, bringup, st->node, &st->last);
+               local_irq_enable();
 
-               undo_cpu_down(cpu, st);
-               st->rollback = false;
+               /*
+                * STARTING/DYING must not fail!
+                */
+               WARN_ON_ONCE(st->result);
        } else {
-               /* Cannot happen .... */
-               BUG_ON(st->state < CPUHP_AP_ONLINE_IDLE);
-
-               /* Regular hotplug work */
-               if (st->state < st->target)
-                       ret = cpuhp_ap_online(cpu, st);
-               else if (st->state > st->target)
-                       ret = cpuhp_ap_offline(cpu, st);
+               st->result = cpuhp_invoke_callback(cpu, state, bringup, st->node, &st->last);
+       }
+
+       if (st->result) {
+               /*
+                * If we fail on a rollback, we're up a creek without no
+                * paddle, no way forward, no way back. We loose, thanks for
+                * playing.
+                */
+               WARN_ON_ONCE(st->rollback);
+               st->should_run = false;
        }
+
+next:
        lock_map_release(&cpuhp_state_lock_map);
-       st->result = ret;
-       complete(&st->done);
+
+       if (!st->should_run)
+               complete(&st->done);
 }
 
 /* Invoke a single callback on a remote cpu */
@@ -477,6 +542,7 @@ cpuhp_invoke_ap_callback(int cpu, enum cpuhp_state state, bool bringup,
                         struct hlist_node *node)
 {
        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
+       int ret;
 
        if (!cpu_online(cpu))
                return 0;
@@ -491,48 +557,43 @@ cpuhp_invoke_ap_callback(int cpu, enum cpuhp_state state, bool bringup,
        if (!st->thread)
                return cpuhp_invoke_callback(cpu, state, bringup, node, NULL);
 
+       st->rollback = false;
+       st->last = NULL;
+
+       st->node = node;
+       st->bringup = bringup;
        st->cb_state = state;
        st->single = true;
-       st->bringup = bringup;
-       st->node = node;
 
-       /*
-        * Make sure the above stores are visible before should_run becomes
-        * true. Paired with the mb() above in cpuhp_thread_fun()
-        */
-       smp_mb();
-       st->should_run = true;
-       wake_up_process(st->thread);
-       wait_for_completion(&st->done);
-       return st->result;
-}
+       __cpuhp_kick_ap(st);
 
-/* Regular hotplug invocation of the AP hotplug thread */
-static void __cpuhp_kick_ap_work(struct cpuhp_cpu_state *st)
-{
-       st->result = 0;
-       st->single = false;
        /*
-        * Make sure the above stores are visible before should_run becomes
-        * true. Paired with the mb() above in cpuhp_thread_fun()
+        * If we failed and did a partial, do a rollback.
         */
-       smp_mb();
-       st->should_run = true;
-       wake_up_process(st->thread);
+       if ((ret = st->result) && st->last) {
+               st->rollback = true;
+               st->bringup = !bringup;
+
+               __cpuhp_kick_ap(st);
+       }
+
+       return ret;
 }
 
 static int cpuhp_kick_ap_work(unsigned int cpu)
 {
        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
-       enum cpuhp_state state = st->state;
+       enum cpuhp_state prev_state = st->state;
+       int ret;
 
-       trace_cpuhp_enter(cpu, st->target, state, cpuhp_kick_ap_work);
        lock_map_acquire(&cpuhp_state_lock_map);
        lock_map_release(&cpuhp_state_lock_map);
-       __cpuhp_kick_ap_work(st);
-       wait_for_completion(&st->done);
-       trace_cpuhp_exit(cpu, st->state, state, st->result);
-       return st->result;
+
+       trace_cpuhp_enter(cpu, st->target, prev_state, cpuhp_kick_ap_work);
+       ret = cpuhp_kick_ap(st, st->target);
+       trace_cpuhp_exit(cpu, st->state, prev_state, ret);
+
+       return ret;
 }
 
 static struct smp_hotplug_thread cpuhp_threads = {
@@ -693,11 +754,32 @@ void cpuhp_report_idle_dead(void)
                                 cpuhp_complete_idle_dead, st, 0);
 }
 
-#else
-#define takedown_cpu           NULL
-#endif
+static void undo_cpu_down(unsigned int cpu, struct cpuhp_cpu_state *st)
+{
+       for (st->state++; st->state < st->target; st->state++) {
+               struct cpuhp_step *step = cpuhp_get_step(st->state);
 
-#ifdef CONFIG_HOTPLUG_CPU
+               if (!step->skip_onerr)
+                       cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
+       }
+}
+
+static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
+                               enum cpuhp_state target)
+{
+       enum cpuhp_state prev_state = st->state;
+       int ret = 0;
+
+       for (; st->state > target; st->state--) {
+               ret = cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
+               if (ret) {
+                       st->target = prev_state;
+                       undo_cpu_down(cpu, st);
+                       break;
+               }
+       }
+       return ret;
+}
 
 /* Requires cpu_add_remove_lock to be held */
 static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
@@ -716,13 +798,13 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
 
        cpuhp_tasks_frozen = tasks_frozen;
 
-       prev_state = st->state;
-       st->target = target;
+       prev_state = cpuhp_set_state(st, target);
        /*
         * If the current CPU state is in the range of the AP hotplug thread,
         * then we need to kick the thread.
         */
        if (st->state > CPUHP_TEARDOWN_CPU) {
+               st->target = max((int)target, CPUHP_TEARDOWN_CPU);
                ret = cpuhp_kick_ap_work(cpu);
                /*
                 * The AP side has done the error rollback already. Just
@@ -737,6 +819,8 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
                 */
                if (st->state > CPUHP_TEARDOWN_CPU)
                        goto out;
+
+               st->target = target;
        }
        /*
         * The AP brought itself down to CPUHP_TEARDOWN_CPU. So we need
@@ -744,9 +828,8 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
         */
        ret = cpuhp_down_callbacks(cpu, st, target);
        if (ret && st->state > CPUHP_TEARDOWN_CPU && st->state < prev_state) {
-               st->target = prev_state;
-               st->rollback = true;
-               cpuhp_kick_ap_work(cpu);
+               cpuhp_reset_state(st, prev_state);
+               __cpuhp_kick_ap(st);
        }
 
 out:
@@ -771,11 +854,15 @@ out:
        cpu_maps_update_done();
        return err;
 }
+
 int cpu_down(unsigned int cpu)
 {
        return do_cpu_down(cpu, CPUHP_OFFLINE);
 }
 EXPORT_SYMBOL(cpu_down);
+
+#else
+#define takedown_cpu           NULL
 #endif /*CONFIG_HOTPLUG_CPU*/
 
 /**
@@ -846,7 +933,7 @@ static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
 
        cpuhp_tasks_frozen = tasks_frozen;
 
-       st->target = target;
+       cpuhp_set_state(st, target);
        /*
         * If the current CPU state is in the range of the AP hotplug thread,
         * then we need to kick the thread once more.
@@ -1313,6 +1400,10 @@ static int cpuhp_issue_call(int cpu, enum cpuhp_state state, bool bringup,
        struct cpuhp_step *sp = cpuhp_get_step(state);
        int ret;
 
+       /*
+        * If there's nothing to do, we done.
+        * Relies on the union for multi_instance.
+        */
        if ((bringup && !sp->startup.single) ||
            (!bringup && !sp->teardown.single))
                return 0;