Merge tag 'for-linus-iommufd' of git://git.kernel.org/pub/scm/linux/kernel/git/jgg...
[linux-block.git] / kernel / cpu.c
CommitLineData
1da177e4
LT
1/* CPU control.
2 * (C) 2001, 2002, 2003, 2004 Rusty Russell
3 *
4 * This code is licenced under the GPL.
5 */
bf2c59fc 6#include <linux/sched/mm.h>
1da177e4
LT
7#include <linux/proc_fs.h>
8#include <linux/smp.h>
9#include <linux/init.h>
10#include <linux/notifier.h>
3f07c014 11#include <linux/sched/signal.h>
ef8bd77f 12#include <linux/sched/hotplug.h>
9ca12ac0 13#include <linux/sched/isolation.h>
29930025 14#include <linux/sched/task.h>
a74cfffb 15#include <linux/sched/smt.h>
1da177e4
LT
16#include <linux/unistd.h>
17#include <linux/cpu.h>
cb79295e
AV
18#include <linux/oom.h>
19#include <linux/rcupdate.h>
6f062123 20#include <linux/delay.h>
9984de1a 21#include <linux/export.h>
e4cc2f87 22#include <linux/bug.h>
1da177e4
LT
23#include <linux/kthread.h>
24#include <linux/stop_machine.h>
81615b62 25#include <linux/mutex.h>
5a0e3ad6 26#include <linux/gfp.h>
79cfbdfa 27#include <linux/suspend.h>
a19423b9 28#include <linux/lockdep.h>
345527b1 29#include <linux/tick.h>
a8994181 30#include <linux/irq.h>
941154bd 31#include <linux/nmi.h>
4cb28ced 32#include <linux/smpboot.h>
e6d4989a 33#include <linux/relay.h>
6731d4f1 34#include <linux/slab.h>
dce1ca05 35#include <linux/scs.h>
fc8dffd3 36#include <linux/percpu-rwsem.h>
b22afcdf 37#include <linux/cpuset.h>
3191dd5a 38#include <linux/random.h>
bae1a962 39#include <linux/cc_platform.h>
cff7d378 40
bb3632c6 41#include <trace/events/power.h>
cff7d378
TG
42#define CREATE_TRACE_POINTS
43#include <trace/events/cpuhp.h>
1da177e4 44
38498a67
TG
45#include "smpboot.h"
46
cff7d378 47/**
11bc021d 48 * struct cpuhp_cpu_state - Per cpu hotplug state storage
cff7d378
TG
49 * @state: The current cpu state
50 * @target: The target state
11bc021d 51 * @fail: Current CPU hotplug callback state
4cb28ced
TG
52 * @thread: Pointer to the hotplug thread
53 * @should_run: Thread should execute
3b9d6da6 54 * @rollback: Perform a rollback
a724632c
TG
55 * @single: Single callback invocation
56 * @bringup: Single callback bringup or teardown selector
11bc021d
RD
57 * @cpu: CPU number
58 * @node: Remote CPU node; for multi-instance, do a
59 * single entry callback for install/remove
60 * @last: For multi-instance rollback, remember how far we got
a724632c 61 * @cb_state: The state for a single callback (install/uninstall)
4cb28ced 62 * @result: Result of the operation
6f062123 63 * @ap_sync_state: State for AP synchronization
5ebe7742
PZ
64 * @done_up: Signal completion to the issuer of the task for cpu-up
65 * @done_down: Signal completion to the issuer of the task for cpu-down
cff7d378
TG
66 */
67struct cpuhp_cpu_state {
68 enum cpuhp_state state;
69 enum cpuhp_state target;
1db49484 70 enum cpuhp_state fail;
4cb28ced
TG
71#ifdef CONFIG_SMP
72 struct task_struct *thread;
73 bool should_run;
3b9d6da6 74 bool rollback;
a724632c
TG
75 bool single;
76 bool bringup;
cf392d10 77 struct hlist_node *node;
4dddfb5f 78 struct hlist_node *last;
4cb28ced 79 enum cpuhp_state cb_state;
4cb28ced 80 int result;
6f062123 81 atomic_t ap_sync_state;
5ebe7742
PZ
82 struct completion done_up;
83 struct completion done_down;
4cb28ced 84#endif
cff7d378
TG
85};
86
1db49484
PZ
87static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state) = {
88 .fail = CPUHP_INVALID,
89};
cff7d378 90
e797bda3
TG
91#ifdef CONFIG_SMP
92cpumask_t cpus_booted_once_mask;
93#endif
94
49dfe2a6 95#if defined(CONFIG_LOCKDEP) && defined(CONFIG_SMP)
5f4b55e1
PZ
96static struct lockdep_map cpuhp_state_up_map =
97 STATIC_LOCKDEP_MAP_INIT("cpuhp_state-up", &cpuhp_state_up_map);
98static struct lockdep_map cpuhp_state_down_map =
99 STATIC_LOCKDEP_MAP_INIT("cpuhp_state-down", &cpuhp_state_down_map);
100
101
76dc6c09 102static inline void cpuhp_lock_acquire(bool bringup)
5f4b55e1
PZ
103{
104 lock_map_acquire(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map);
105}
106
76dc6c09 107static inline void cpuhp_lock_release(bool bringup)
5f4b55e1
PZ
108{
109 lock_map_release(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map);
110}
111#else
112
76dc6c09
MM
113static inline void cpuhp_lock_acquire(bool bringup) { }
114static inline void cpuhp_lock_release(bool bringup) { }
5f4b55e1 115
49dfe2a6
TG
116#endif
117
cff7d378 118/**
11bc021d 119 * struct cpuhp_step - Hotplug state machine step
cff7d378
TG
120 * @name: Name of the step
121 * @startup: Startup function of the step
122 * @teardown: Teardown function of the step
757c989b 123 * @cant_stop: Bringup/teardown can't be stopped at this step
11bc021d 124 * @multi_instance: State has multiple instances which get added afterwards
cff7d378
TG
125 */
126struct cpuhp_step {
cf392d10
TG
127 const char *name;
128 union {
3c1627e9
TG
129 int (*single)(unsigned int cpu);
130 int (*multi)(unsigned int cpu,
131 struct hlist_node *node);
132 } startup;
cf392d10 133 union {
3c1627e9
TG
134 int (*single)(unsigned int cpu);
135 int (*multi)(unsigned int cpu,
136 struct hlist_node *node);
137 } teardown;
11bc021d 138 /* private: */
cf392d10 139 struct hlist_head list;
11bc021d 140 /* public: */
cf392d10
TG
141 bool cant_stop;
142 bool multi_instance;
cff7d378
TG
143};
144
98f8cdce 145static DEFINE_MUTEX(cpuhp_state_mutex);
17a2f1ce 146static struct cpuhp_step cpuhp_hp_states[];
cff7d378 147
a724632c
TG
148static struct cpuhp_step *cpuhp_get_step(enum cpuhp_state state)
149{
17a2f1ce 150 return cpuhp_hp_states + state;
a724632c
TG
151}
152
453e4108
VD
153static bool cpuhp_step_empty(bool bringup, struct cpuhp_step *step)
154{
155 return bringup ? !step->startup.single : !step->teardown.single;
156}
157
cff7d378 158/**
11bc021d 159 * cpuhp_invoke_callback - Invoke the callbacks for a given state
cff7d378 160 * @cpu: The cpu for which the callback should be invoked
96abb968 161 * @state: The state to do callbacks for
a724632c 162 * @bringup: True if the bringup callback should be invoked
96abb968
PZ
163 * @node: For multi-instance, do a single entry callback for install/remove
164 * @lastp: For multi-instance rollback, remember how far we got
cff7d378 165 *
cf392d10 166 * Called from cpu hotplug and from the state register machinery.
11bc021d
RD
167 *
168 * Return: %0 on success or a negative errno code
cff7d378 169 */
a724632c 170static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state,
96abb968
PZ
171 bool bringup, struct hlist_node *node,
172 struct hlist_node **lastp)
cff7d378
TG
173{
174 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
a724632c 175 struct cpuhp_step *step = cpuhp_get_step(state);
cf392d10
TG
176 int (*cbm)(unsigned int cpu, struct hlist_node *node);
177 int (*cb)(unsigned int cpu);
178 int ret, cnt;
179
1db49484
PZ
180 if (st->fail == state) {
181 st->fail = CPUHP_INVALID;
1db49484
PZ
182 return -EAGAIN;
183 }
184
453e4108
VD
185 if (cpuhp_step_empty(bringup, step)) {
186 WARN_ON_ONCE(1);
187 return 0;
188 }
189
cf392d10 190 if (!step->multi_instance) {
96abb968 191 WARN_ON_ONCE(lastp && *lastp);
3c1627e9 192 cb = bringup ? step->startup.single : step->teardown.single;
453e4108 193
a724632c 194 trace_cpuhp_enter(cpu, st->target, state, cb);
cff7d378 195 ret = cb(cpu);
a724632c 196 trace_cpuhp_exit(cpu, st->state, state, ret);
cf392d10
TG
197 return ret;
198 }
3c1627e9 199 cbm = bringup ? step->startup.multi : step->teardown.multi;
cf392d10
TG
200
201 /* Single invocation for instance add/remove */
202 if (node) {
96abb968 203 WARN_ON_ONCE(lastp && *lastp);
cf392d10
TG
204 trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
205 ret = cbm(cpu, node);
206 trace_cpuhp_exit(cpu, st->state, state, ret);
207 return ret;
208 }
209
210 /* State transition. Invoke on all instances */
211 cnt = 0;
212 hlist_for_each(node, &step->list) {
96abb968
PZ
213 if (lastp && node == *lastp)
214 break;
215
cf392d10
TG
216 trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
217 ret = cbm(cpu, node);
218 trace_cpuhp_exit(cpu, st->state, state, ret);
96abb968
PZ
219 if (ret) {
220 if (!lastp)
221 goto err;
222
223 *lastp = node;
224 return ret;
225 }
cf392d10
TG
226 cnt++;
227 }
96abb968
PZ
228 if (lastp)
229 *lastp = NULL;
cf392d10
TG
230 return 0;
231err:
232 /* Rollback the instances if one failed */
3c1627e9 233 cbm = !bringup ? step->startup.multi : step->teardown.multi;
cf392d10
TG
234 if (!cbm)
235 return ret;
236
237 hlist_for_each(node, &step->list) {
238 if (!cnt--)
239 break;
724a8688
PZ
240
241 trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
242 ret = cbm(cpu, node);
243 trace_cpuhp_exit(cpu, st->state, state, ret);
244 /*
245 * Rollback must not fail,
246 */
247 WARN_ON_ONCE(ret);
cff7d378
TG
248 }
249 return ret;
250}
251
98a79d6a 252#ifdef CONFIG_SMP
fcb3029a
AB
253static bool cpuhp_is_ap_state(enum cpuhp_state state)
254{
255 /*
256 * The extra check for CPUHP_TEARDOWN_CPU is only for documentation
257 * purposes as that state is handled explicitly in cpu_down.
258 */
259 return state > CPUHP_BRINGUP_CPU && state != CPUHP_TEARDOWN_CPU;
260}
261
5ebe7742
PZ
262static inline void wait_for_ap_thread(struct cpuhp_cpu_state *st, bool bringup)
263{
264 struct completion *done = bringup ? &st->done_up : &st->done_down;
265 wait_for_completion(done);
266}
267
268static inline void complete_ap_thread(struct cpuhp_cpu_state *st, bool bringup)
269{
270 struct completion *done = bringup ? &st->done_up : &st->done_down;
271 complete(done);
272}
273
274/*
275 * The former STARTING/DYING states, ran with IRQs disabled and must not fail.
276 */
277static bool cpuhp_is_atomic_state(enum cpuhp_state state)
278{
279 return CPUHP_AP_IDLE_DEAD <= state && state < CPUHP_AP_ONLINE;
280}
281
6f062123
TG
282/* Synchronization state management */
283enum cpuhp_sync_state {
284 SYNC_STATE_DEAD,
285 SYNC_STATE_KICKED,
286 SYNC_STATE_SHOULD_DIE,
287 SYNC_STATE_ALIVE,
288 SYNC_STATE_SHOULD_ONLINE,
289 SYNC_STATE_ONLINE,
290};
291
292#ifdef CONFIG_HOTPLUG_CORE_SYNC
293/**
294 * cpuhp_ap_update_sync_state - Update synchronization state during bringup/teardown
295 * @state: The synchronization state to set
296 *
297 * No synchronization point. Just update of the synchronization state, but implies
298 * a full barrier so that the AP changes are visible before the control CPU proceeds.
299 */
300static inline void cpuhp_ap_update_sync_state(enum cpuhp_sync_state state)
301{
302 atomic_t *st = this_cpu_ptr(&cpuhp_state.ap_sync_state);
303
304 (void)atomic_xchg(st, state);
305}
306
307void __weak arch_cpuhp_sync_state_poll(void) { cpu_relax(); }
308
309static bool cpuhp_wait_for_sync_state(unsigned int cpu, enum cpuhp_sync_state state,
310 enum cpuhp_sync_state next_state)
311{
312 atomic_t *st = per_cpu_ptr(&cpuhp_state.ap_sync_state, cpu);
313 ktime_t now, end, start = ktime_get();
314 int sync;
315
316 end = start + 10ULL * NSEC_PER_SEC;
317
318 sync = atomic_read(st);
319 while (1) {
320 if (sync == state) {
321 if (!atomic_try_cmpxchg(st, &sync, next_state))
322 continue;
323 return true;
324 }
325
326 now = ktime_get();
327 if (now > end) {
328 /* Timeout. Leave the state unchanged */
329 return false;
330 } else if (now - start < NSEC_PER_MSEC) {
331 /* Poll for one millisecond */
332 arch_cpuhp_sync_state_poll();
333 } else {
334 usleep_range_state(USEC_PER_MSEC, 2 * USEC_PER_MSEC, TASK_UNINTERRUPTIBLE);
335 }
336 sync = atomic_read(st);
337 }
338 return true;
339}
340#else /* CONFIG_HOTPLUG_CORE_SYNC */
341static inline void cpuhp_ap_update_sync_state(enum cpuhp_sync_state state) { }
342#endif /* !CONFIG_HOTPLUG_CORE_SYNC */
343
344#ifdef CONFIG_HOTPLUG_CORE_SYNC_DEAD
345/**
346 * cpuhp_ap_report_dead - Update synchronization state to DEAD
347 *
348 * No synchronization point. Just update of the synchronization state.
349 */
350void cpuhp_ap_report_dead(void)
351{
352 cpuhp_ap_update_sync_state(SYNC_STATE_DEAD);
353}
354
355void __weak arch_cpuhp_cleanup_dead_cpu(unsigned int cpu) { }
356
357/*
358 * Late CPU shutdown synchronization point. Cannot use cpuhp_state::done_down
359 * because the AP cannot issue complete() at this stage.
360 */
361static void cpuhp_bp_sync_dead(unsigned int cpu)
362{
363 atomic_t *st = per_cpu_ptr(&cpuhp_state.ap_sync_state, cpu);
364 int sync = atomic_read(st);
365
366 do {
367 /* CPU can have reported dead already. Don't overwrite that! */
368 if (sync == SYNC_STATE_DEAD)
369 break;
370 } while (!atomic_try_cmpxchg(st, &sync, SYNC_STATE_SHOULD_DIE));
371
372 if (cpuhp_wait_for_sync_state(cpu, SYNC_STATE_DEAD, SYNC_STATE_DEAD)) {
373 /* CPU reached dead state. Invoke the cleanup function */
374 arch_cpuhp_cleanup_dead_cpu(cpu);
375 return;
376 }
377
378 /* No further action possible. Emit message and give up. */
379 pr_err("CPU%u failed to report dead state\n", cpu);
380}
381#else /* CONFIG_HOTPLUG_CORE_SYNC_DEAD */
382static inline void cpuhp_bp_sync_dead(unsigned int cpu) { }
383#endif /* !CONFIG_HOTPLUG_CORE_SYNC_DEAD */
384
385#ifdef CONFIG_HOTPLUG_CORE_SYNC_FULL
386/**
387 * cpuhp_ap_sync_alive - Synchronize AP with the control CPU once it is alive
388 *
389 * Updates the AP synchronization state to SYNC_STATE_ALIVE and waits
390 * for the BP to release it.
391 */
392void cpuhp_ap_sync_alive(void)
393{
394 atomic_t *st = this_cpu_ptr(&cpuhp_state.ap_sync_state);
395
396 cpuhp_ap_update_sync_state(SYNC_STATE_ALIVE);
397
398 /* Wait for the control CPU to release it. */
399 while (atomic_read(st) != SYNC_STATE_SHOULD_ONLINE)
400 cpu_relax();
401}
402
403static bool cpuhp_can_boot_ap(unsigned int cpu)
404{
405 atomic_t *st = per_cpu_ptr(&cpuhp_state.ap_sync_state, cpu);
406 int sync = atomic_read(st);
407
408again:
409 switch (sync) {
410 case SYNC_STATE_DEAD:
411 /* CPU is properly dead */
412 break;
413 case SYNC_STATE_KICKED:
414 /* CPU did not come up in previous attempt */
415 break;
416 case SYNC_STATE_ALIVE:
417 /* CPU is stuck cpuhp_ap_sync_alive(). */
418 break;
419 default:
420 /* CPU failed to report online or dead and is in limbo state. */
421 return false;
422 }
423
424 /* Prepare for booting */
425 if (!atomic_try_cmpxchg(st, &sync, SYNC_STATE_KICKED))
426 goto again;
427
428 return true;
429}
430
431void __weak arch_cpuhp_cleanup_kick_cpu(unsigned int cpu) { }
432
433/*
434 * Early CPU bringup synchronization point. Cannot use cpuhp_state::done_up
435 * because the AP cannot issue complete() so early in the bringup.
436 */
437static int cpuhp_bp_sync_alive(unsigned int cpu)
438{
439 int ret = 0;
440
441 if (!IS_ENABLED(CONFIG_HOTPLUG_CORE_SYNC_FULL))
442 return 0;
443
444 if (!cpuhp_wait_for_sync_state(cpu, SYNC_STATE_ALIVE, SYNC_STATE_SHOULD_ONLINE)) {
445 pr_err("CPU%u failed to report alive state\n", cpu);
446 ret = -EIO;
447 }
448
449 /* Let the architecture cleanup the kick alive mechanics. */
450 arch_cpuhp_cleanup_kick_cpu(cpu);
451 return ret;
452}
453#else /* CONFIG_HOTPLUG_CORE_SYNC_FULL */
454static inline int cpuhp_bp_sync_alive(unsigned int cpu) { return 0; }
455static inline bool cpuhp_can_boot_ap(unsigned int cpu) { return true; }
456#endif /* !CONFIG_HOTPLUG_CORE_SYNC_FULL */
457
b3199c02 458/* Serializes the updates to cpu_online_mask, cpu_present_mask */
aa953877 459static DEFINE_MUTEX(cpu_add_remove_lock);
090e77c3
TG
460bool cpuhp_tasks_frozen;
461EXPORT_SYMBOL_GPL(cpuhp_tasks_frozen);
1da177e4 462
79a6cdeb 463/*
93ae4f97
SB
464 * The following two APIs (cpu_maps_update_begin/done) must be used when
465 * attempting to serialize the updates to cpu_online_mask & cpu_present_mask.
79a6cdeb
LJ
466 */
467void cpu_maps_update_begin(void)
468{
469 mutex_lock(&cpu_add_remove_lock);
470}
471
472void cpu_maps_update_done(void)
473{
474 mutex_unlock(&cpu_add_remove_lock);
475}
1da177e4 476
fc8dffd3
TG
477/*
478 * If set, cpu_up and cpu_down will return -EBUSY and do nothing.
e3920fb4
RW
479 * Should always be manipulated under cpu_add_remove_lock
480 */
481static int cpu_hotplug_disabled;
482
79a6cdeb
LJ
483#ifdef CONFIG_HOTPLUG_CPU
484
fc8dffd3 485DEFINE_STATIC_PERCPU_RWSEM(cpu_hotplug_lock);
a19423b9 486
8f553c49 487void cpus_read_lock(void)
a9d9baa1 488{
fc8dffd3 489 percpu_down_read(&cpu_hotplug_lock);
a9d9baa1 490}
8f553c49 491EXPORT_SYMBOL_GPL(cpus_read_lock);
90d45d17 492
6f4ceee9
WL
493int cpus_read_trylock(void)
494{
495 return percpu_down_read_trylock(&cpu_hotplug_lock);
496}
497EXPORT_SYMBOL_GPL(cpus_read_trylock);
498
8f553c49 499void cpus_read_unlock(void)
a9d9baa1 500{
fc8dffd3 501 percpu_up_read(&cpu_hotplug_lock);
a9d9baa1 502}
8f553c49 503EXPORT_SYMBOL_GPL(cpus_read_unlock);
a9d9baa1 504
8f553c49 505void cpus_write_lock(void)
d221938c 506{
fc8dffd3 507 percpu_down_write(&cpu_hotplug_lock);
d221938c 508}
87af9e7f 509
8f553c49 510void cpus_write_unlock(void)
d221938c 511{
fc8dffd3 512 percpu_up_write(&cpu_hotplug_lock);
d221938c
GS
513}
514
fc8dffd3 515void lockdep_assert_cpus_held(void)
d221938c 516{
ce48c457
VS
517 /*
518 * We can't have hotplug operations before userspace starts running,
519 * and some init codepaths will knowingly not take the hotplug lock.
520 * This is all valid, so mute lockdep until it makes sense to report
521 * unheld locks.
522 */
523 if (system_state < SYSTEM_RUNNING)
524 return;
525
fc8dffd3 526 percpu_rwsem_assert_held(&cpu_hotplug_lock);
d221938c 527}
79a6cdeb 528
43759fe5
FW
529#ifdef CONFIG_LOCKDEP
530int lockdep_is_cpus_held(void)
531{
532 return percpu_rwsem_is_held(&cpu_hotplug_lock);
533}
534#endif
535
cb92173d
PZ
536static void lockdep_acquire_cpus_lock(void)
537{
1751060e 538 rwsem_acquire(&cpu_hotplug_lock.dep_map, 0, 0, _THIS_IP_);
cb92173d
PZ
539}
540
541static void lockdep_release_cpus_lock(void)
542{
1751060e 543 rwsem_release(&cpu_hotplug_lock.dep_map, _THIS_IP_);
cb92173d
PZ
544}
545
16e53dbf
SB
546/*
547 * Wait for currently running CPU hotplug operations to complete (if any) and
548 * disable future CPU hotplug (from sysfs). The 'cpu_add_remove_lock' protects
549 * the 'cpu_hotplug_disabled' flag. The same lock is also acquired by the
550 * hotplug path before performing hotplug operations. So acquiring that lock
551 * guarantees mutual exclusion from any currently running hotplug operations.
552 */
553void cpu_hotplug_disable(void)
554{
555 cpu_maps_update_begin();
89af7ba5 556 cpu_hotplug_disabled++;
16e53dbf
SB
557 cpu_maps_update_done();
558}
32145c46 559EXPORT_SYMBOL_GPL(cpu_hotplug_disable);
16e53dbf 560
01b41159
LW
561static void __cpu_hotplug_enable(void)
562{
563 if (WARN_ONCE(!cpu_hotplug_disabled, "Unbalanced cpu hotplug enable\n"))
564 return;
565 cpu_hotplug_disabled--;
566}
567
16e53dbf
SB
568void cpu_hotplug_enable(void)
569{
570 cpu_maps_update_begin();
01b41159 571 __cpu_hotplug_enable();
16e53dbf
SB
572 cpu_maps_update_done();
573}
32145c46 574EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
cb92173d
PZ
575
576#else
577
578static void lockdep_acquire_cpus_lock(void)
579{
580}
581
582static void lockdep_release_cpus_lock(void)
583{
584}
585
b9d10be7 586#endif /* CONFIG_HOTPLUG_CPU */
79a6cdeb 587
a74cfffb
TG
588/*
589 * Architectures that need SMT-specific errata handling during SMT hotplug
590 * should override this.
591 */
592void __weak arch_smt_update(void) { }
593
0cc3cd21
TG
594#ifdef CONFIG_HOTPLUG_SMT
595enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED;
bc2d8d26 596
8e1b706b 597void __init cpu_smt_disable(bool force)
0cc3cd21 598{
e1572f1d 599 if (!cpu_smt_possible())
8e1b706b
JK
600 return;
601
602 if (force) {
0cc3cd21
TG
603 pr_info("SMT: Force disabled\n");
604 cpu_smt_control = CPU_SMT_FORCE_DISABLED;
8e1b706b 605 } else {
d0e7d144 606 pr_info("SMT: disabled\n");
8e1b706b 607 cpu_smt_control = CPU_SMT_DISABLED;
0cc3cd21 608 }
8e1b706b
JK
609}
610
fee0aede
TG
611/*
612 * The decision whether SMT is supported can only be done after the full
b284909a 613 * CPU identification. Called from architecture code.
bc2d8d26
TG
614 */
615void __init cpu_smt_check_topology(void)
616{
b284909a 617 if (!topology_smt_supported())
bc2d8d26
TG
618 cpu_smt_control = CPU_SMT_NOT_SUPPORTED;
619}
620
8e1b706b
JK
621static int __init smt_cmdline_disable(char *str)
622{
623 cpu_smt_disable(str && !strcmp(str, "force"));
0cc3cd21
TG
624 return 0;
625}
626early_param("nosmt", smt_cmdline_disable);
627
628static inline bool cpu_smt_allowed(unsigned int cpu)
629{
b284909a 630 if (cpu_smt_control == CPU_SMT_ENABLED)
0cc3cd21
TG
631 return true;
632
b284909a 633 if (topology_is_primary_thread(cpu))
0cc3cd21
TG
634 return true;
635
636 /*
637 * On x86 it's required to boot all logical CPUs at least once so
638 * that the init code can get a chance to set CR4.MCE on each
182e073f 639 * CPU. Otherwise, a broadcasted MCE observing CR4.MCE=0b on any
0cc3cd21
TG
640 * core will shutdown the machine.
641 */
e797bda3 642 return !cpumask_test_cpu(cpu, &cpus_booted_once_mask);
0cc3cd21 643}
e1572f1d
VK
644
645/* Returns true if SMT is not supported of forcefully (irreversibly) disabled */
646bool cpu_smt_possible(void)
647{
648 return cpu_smt_control != CPU_SMT_FORCE_DISABLED &&
649 cpu_smt_control != CPU_SMT_NOT_SUPPORTED;
650}
651EXPORT_SYMBOL_GPL(cpu_smt_possible);
18415f33
TG
652
653static inline bool cpuhp_smt_aware(void)
654{
655 return topology_smt_supported();
656}
657
658static inline const struct cpumask *cpuhp_get_primary_thread_mask(void)
659{
660 return cpu_primary_thread_mask;
661}
0cc3cd21
TG
662#else
663static inline bool cpu_smt_allowed(unsigned int cpu) { return true; }
18415f33
TG
664static inline bool cpuhp_smt_aware(void) { return false; }
665static inline const struct cpumask *cpuhp_get_primary_thread_mask(void)
666{
667 return cpu_present_mask;
668}
0cc3cd21
TG
669#endif
670
4dddfb5f 671static inline enum cpuhp_state
b7ba6d8d 672cpuhp_set_state(int cpu, struct cpuhp_cpu_state *st, enum cpuhp_state target)
4dddfb5f
PZ
673{
674 enum cpuhp_state prev_state = st->state;
2ea46c6f 675 bool bringup = st->state < target;
4dddfb5f
PZ
676
677 st->rollback = false;
678 st->last = NULL;
679
680 st->target = target;
681 st->single = false;
2ea46c6f 682 st->bringup = bringup;
b7ba6d8d
SP
683 if (cpu_dying(cpu) != !bringup)
684 set_cpu_dying(cpu, !bringup);
4dddfb5f
PZ
685
686 return prev_state;
687}
688
689static inline void
b7ba6d8d
SP
690cpuhp_reset_state(int cpu, struct cpuhp_cpu_state *st,
691 enum cpuhp_state prev_state)
4dddfb5f 692{
2ea46c6f
PZ
693 bool bringup = !st->bringup;
694
453e4108
VD
695 st->target = prev_state;
696
697 /*
698 * Already rolling back. No need invert the bringup value or to change
699 * the current state.
700 */
701 if (st->rollback)
702 return;
703
4dddfb5f
PZ
704 st->rollback = true;
705
706 /*
707 * If we have st->last we need to undo partial multi_instance of this
708 * state first. Otherwise start undo at the previous state.
709 */
710 if (!st->last) {
711 if (st->bringup)
712 st->state--;
713 else
714 st->state++;
715 }
716
2ea46c6f 717 st->bringup = bringup;
b7ba6d8d
SP
718 if (cpu_dying(cpu) != !bringup)
719 set_cpu_dying(cpu, !bringup);
4dddfb5f
PZ
720}
721
722/* Regular hotplug invocation of the AP hotplug thread */
723static void __cpuhp_kick_ap(struct cpuhp_cpu_state *st)
724{
725 if (!st->single && st->state == st->target)
726 return;
727
728 st->result = 0;
729 /*
730 * Make sure the above stores are visible before should_run becomes
731 * true. Paired with the mb() above in cpuhp_thread_fun()
732 */
733 smp_mb();
734 st->should_run = true;
735 wake_up_process(st->thread);
5ebe7742 736 wait_for_ap_thread(st, st->bringup);
4dddfb5f
PZ
737}
738
b7ba6d8d
SP
739static int cpuhp_kick_ap(int cpu, struct cpuhp_cpu_state *st,
740 enum cpuhp_state target)
4dddfb5f
PZ
741{
742 enum cpuhp_state prev_state;
743 int ret;
744
b7ba6d8d 745 prev_state = cpuhp_set_state(cpu, st, target);
4dddfb5f
PZ
746 __cpuhp_kick_ap(st);
747 if ((ret = st->result)) {
b7ba6d8d 748 cpuhp_reset_state(cpu, st, prev_state);
4dddfb5f
PZ
749 __cpuhp_kick_ap(st);
750 }
751
752 return ret;
753}
9cd4f1a4 754
22b612e2 755static int bringup_wait_for_ap_online(unsigned int cpu)
8df3e07e
TG
756{
757 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
758
9cd4f1a4 759 /* Wait for the CPU to reach CPUHP_AP_ONLINE_IDLE */
5ebe7742 760 wait_for_ap_thread(st, true);
dea1d0f5
TG
761 if (WARN_ON_ONCE((!cpu_online(cpu))))
762 return -ECANCELED;
9cd4f1a4 763
45178ac0 764 /* Unpark the hotplug thread of the target cpu */
9cd4f1a4
TG
765 kthread_unpark(st->thread);
766
0cc3cd21
TG
767 /*
768 * SMT soft disabling on X86 requires to bring the CPU out of the
769 * BIOS 'wait for SIPI' state in order to set the CR4.MCE bit. The
f5602011 770 * CPU marked itself as booted_once in notify_cpu_starting() so the
0cc3cd21
TG
771 * cpu_smt_allowed() check will now return false if this is not the
772 * primary sibling.
773 */
774 if (!cpu_smt_allowed(cpu))
775 return -ECANCELED;
22b612e2 776 return 0;
8df3e07e
TG
777}
778
a631be92
TG
779#ifdef CONFIG_HOTPLUG_SPLIT_STARTUP
780static int cpuhp_kick_ap_alive(unsigned int cpu)
781{
782 if (!cpuhp_can_boot_ap(cpu))
783 return -EAGAIN;
784
785 return arch_cpuhp_kick_ap_alive(cpu, idle_thread_get(cpu));
786}
787
788static int cpuhp_bringup_ap(unsigned int cpu)
789{
790 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
791 int ret;
792
793 /*
794 * Some architectures have to walk the irq descriptors to
795 * setup the vector space for the cpu which comes online.
796 * Prevent irq alloc/free across the bringup.
797 */
798 irq_lock_sparse();
799
800 ret = cpuhp_bp_sync_alive(cpu);
801 if (ret)
802 goto out_unlock;
803
804 ret = bringup_wait_for_ap_online(cpu);
805 if (ret)
806 goto out_unlock;
807
808 irq_unlock_sparse();
809
810 if (st->target <= CPUHP_AP_ONLINE_IDLE)
811 return 0;
812
813 return cpuhp_kick_ap(cpu, st, st->target);
814
815out_unlock:
816 irq_unlock_sparse();
817 return ret;
818}
819#else
ba997462
TG
820static int bringup_cpu(unsigned int cpu)
821{
22b612e2 822 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
ba997462
TG
823 struct task_struct *idle = idle_thread_get(cpu);
824 int ret;
825
6f062123
TG
826 if (!cpuhp_can_boot_ap(cpu))
827 return -EAGAIN;
828
aa877175
BO
829 /*
830 * Some architectures have to walk the irq descriptors to
831 * setup the vector space for the cpu which comes online.
22b612e2
TG
832 *
833 * Prevent irq alloc/free across the bringup by acquiring the
834 * sparse irq lock. Hold it until the upcoming CPU completes the
835 * startup in cpuhp_online_idle() which allows to avoid
836 * intermediate synchronization points in the architecture code.
aa877175
BO
837 */
838 irq_lock_sparse();
839
ba997462 840 ret = __cpu_up(cpu, idle);
530e9b76 841 if (ret)
22b612e2
TG
842 goto out_unlock;
843
6f062123
TG
844 ret = cpuhp_bp_sync_alive(cpu);
845 if (ret)
846 goto out_unlock;
847
22b612e2
TG
848 ret = bringup_wait_for_ap_online(cpu);
849 if (ret)
850 goto out_unlock;
851
852 irq_unlock_sparse();
853
854 if (st->target <= CPUHP_AP_ONLINE_IDLE)
855 return 0;
856
857 return cpuhp_kick_ap(cpu, st, st->target);
858
859out_unlock:
860 irq_unlock_sparse();
861 return ret;
ba997462 862}
a631be92 863#endif
ba997462 864
bf2c59fc
PZ
865static int finish_cpu(unsigned int cpu)
866{
867 struct task_struct *idle = idle_thread_get(cpu);
868 struct mm_struct *mm = idle->active_mm;
869
870 /*
871 * idle_task_exit() will have switched to &init_mm, now
872 * clean up any remaining active_mm state.
873 */
874 if (mm != &init_mm)
875 idle->active_mm = &init_mm;
aa464ba9 876 mmdrop_lazy_tlb(mm);
bf2c59fc
PZ
877 return 0;
878}
879
2e1a3483
TG
880/*
881 * Hotplug state machine related functions
882 */
2e1a3483 883
453e4108
VD
884/*
885 * Get the next state to run. Empty ones will be skipped. Returns true if a
886 * state must be run.
887 *
888 * st->state will be modified ahead of time, to match state_to_run, as if it
889 * has already ran.
890 */
891static bool cpuhp_next_state(bool bringup,
892 enum cpuhp_state *state_to_run,
893 struct cpuhp_cpu_state *st,
894 enum cpuhp_state target)
2e1a3483 895{
453e4108
VD
896 do {
897 if (bringup) {
898 if (st->state >= target)
899 return false;
900
901 *state_to_run = ++st->state;
902 } else {
903 if (st->state <= target)
904 return false;
905
906 *state_to_run = st->state--;
907 }
908
909 if (!cpuhp_step_empty(bringup, cpuhp_get_step(*state_to_run)))
910 break;
911 } while (true);
912
913 return true;
914}
915
6f855b39
VD
916static int __cpuhp_invoke_callback_range(bool bringup,
917 unsigned int cpu,
918 struct cpuhp_cpu_state *st,
919 enum cpuhp_state target,
920 bool nofail)
453e4108
VD
921{
922 enum cpuhp_state state;
6f855b39 923 int ret = 0;
453e4108
VD
924
925 while (cpuhp_next_state(bringup, &state, st, target)) {
6f855b39
VD
926 int err;
927
453e4108 928 err = cpuhp_invoke_callback(cpu, state, bringup, NULL, NULL);
6f855b39
VD
929 if (!err)
930 continue;
931
932 if (nofail) {
933 pr_warn("CPU %u %s state %s (%d) failed (%d)\n",
934 cpu, bringup ? "UP" : "DOWN",
935 cpuhp_get_step(st->state)->name,
936 st->state, err);
937 ret = -1;
938 } else {
939 ret = err;
453e4108 940 break;
6f855b39 941 }
453e4108
VD
942 }
943
6f855b39
VD
944 return ret;
945}
946
947static inline int cpuhp_invoke_callback_range(bool bringup,
948 unsigned int cpu,
949 struct cpuhp_cpu_state *st,
950 enum cpuhp_state target)
951{
952 return __cpuhp_invoke_callback_range(bringup, cpu, st, target, false);
953}
954
955static inline void cpuhp_invoke_callback_range_nofail(bool bringup,
956 unsigned int cpu,
957 struct cpuhp_cpu_state *st,
958 enum cpuhp_state target)
959{
960 __cpuhp_invoke_callback_range(bringup, cpu, st, target, true);
2e1a3483
TG
961}
962
206b9235
TG
963static inline bool can_rollback_cpu(struct cpuhp_cpu_state *st)
964{
965 if (IS_ENABLED(CONFIG_HOTPLUG_CPU))
966 return true;
967 /*
968 * When CPU hotplug is disabled, then taking the CPU down is not
969 * possible because takedown_cpu() and the architecture and
970 * subsystem specific mechanisms are not available. So the CPU
971 * which would be completely unplugged again needs to stay around
972 * in the current state.
973 */
974 return st->state <= CPUHP_BRINGUP_CPU;
975}
976
2e1a3483 977static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
a724632c 978 enum cpuhp_state target)
2e1a3483
TG
979{
980 enum cpuhp_state prev_state = st->state;
981 int ret = 0;
982
453e4108
VD
983 ret = cpuhp_invoke_callback_range(true, cpu, st, target);
984 if (ret) {
ebca71a8
DZ
985 pr_debug("CPU UP failed (%d) CPU %u state %s (%d)\n",
986 ret, cpu, cpuhp_get_step(st->state)->name,
987 st->state);
988
b7ba6d8d 989 cpuhp_reset_state(cpu, st, prev_state);
453e4108
VD
990 if (can_rollback_cpu(st))
991 WARN_ON(cpuhp_invoke_callback_range(false, cpu, st,
992 prev_state));
2e1a3483
TG
993 }
994 return ret;
995}
996
4cb28ced
TG
997/*
998 * The cpu hotplug threads manage the bringup and teardown of the cpus
999 */
4cb28ced
TG
1000static int cpuhp_should_run(unsigned int cpu)
1001{
1002 struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
1003
1004 return st->should_run;
1005}
1006
4cb28ced
TG
1007/*
1008 * Execute teardown/startup callbacks on the plugged cpu. Also used to invoke
1009 * callbacks when a state gets [un]installed at runtime.
4dddfb5f
PZ
1010 *
1011 * Each invocation of this function by the smpboot thread does a single AP
1012 * state callback.
1013 *
1014 * It has 3 modes of operation:
1015 * - single: runs st->cb_state
1016 * - up: runs ++st->state, while st->state < st->target
1017 * - down: runs st->state--, while st->state > st->target
1018 *
1019 * When complete or on error, should_run is cleared and the completion is fired.
4cb28ced
TG
1020 */
1021static void cpuhp_thread_fun(unsigned int cpu)
1022{
1023 struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
4dddfb5f
PZ
1024 bool bringup = st->bringup;
1025 enum cpuhp_state state;
4cb28ced 1026
f8b7530a
NU
1027 if (WARN_ON_ONCE(!st->should_run))
1028 return;
1029
4cb28ced 1030 /*
4dddfb5f
PZ
1031 * ACQUIRE for the cpuhp_should_run() load of ->should_run. Ensures
1032 * that if we see ->should_run we also see the rest of the state.
4cb28ced
TG
1033 */
1034 smp_mb();
4cb28ced 1035
cb92173d
PZ
1036 /*
1037 * The BP holds the hotplug lock, but we're now running on the AP,
1038 * ensure that anybody asserting the lock is held, will actually find
1039 * it so.
1040 */
1041 lockdep_acquire_cpus_lock();
5f4b55e1 1042 cpuhp_lock_acquire(bringup);
4dddfb5f 1043
a724632c 1044 if (st->single) {
4dddfb5f
PZ
1045 state = st->cb_state;
1046 st->should_run = false;
1047 } else {
453e4108
VD
1048 st->should_run = cpuhp_next_state(bringup, &state, st, st->target);
1049 if (!st->should_run)
1050 goto end;
4dddfb5f
PZ
1051 }
1052
1053 WARN_ON_ONCE(!cpuhp_is_ap_state(state));
1054
4dddfb5f
PZ
1055 if (cpuhp_is_atomic_state(state)) {
1056 local_irq_disable();
1057 st->result = cpuhp_invoke_callback(cpu, state, bringup, st->node, &st->last);
1058 local_irq_enable();
3b9d6da6 1059
4dddfb5f
PZ
1060 /*
1061 * STARTING/DYING must not fail!
1062 */
1063 WARN_ON_ONCE(st->result);
4cb28ced 1064 } else {
4dddfb5f
PZ
1065 st->result = cpuhp_invoke_callback(cpu, state, bringup, st->node, &st->last);
1066 }
1067
1068 if (st->result) {
1069 /*
1070 * If we fail on a rollback, we're up a creek without no
1071 * paddle, no way forward, no way back. We loose, thanks for
1072 * playing.
1073 */
1074 WARN_ON_ONCE(st->rollback);
1075 st->should_run = false;
4cb28ced 1076 }
4dddfb5f 1077
453e4108 1078end:
5f4b55e1 1079 cpuhp_lock_release(bringup);
cb92173d 1080 lockdep_release_cpus_lock();
4dddfb5f
PZ
1081
1082 if (!st->should_run)
5ebe7742 1083 complete_ap_thread(st, bringup);
4cb28ced
TG
1084}
1085
1086/* Invoke a single callback on a remote cpu */
a724632c 1087static int
cf392d10
TG
1088cpuhp_invoke_ap_callback(int cpu, enum cpuhp_state state, bool bringup,
1089 struct hlist_node *node)
4cb28ced
TG
1090{
1091 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
4dddfb5f 1092 int ret;
4cb28ced
TG
1093
1094 if (!cpu_online(cpu))
1095 return 0;
1096
5f4b55e1
PZ
1097 cpuhp_lock_acquire(false);
1098 cpuhp_lock_release(false);
1099
1100 cpuhp_lock_acquire(true);
1101 cpuhp_lock_release(true);
49dfe2a6 1102
6a4e2451
TG
1103 /*
1104 * If we are up and running, use the hotplug thread. For early calls
1105 * we invoke the thread function directly.
1106 */
1107 if (!st->thread)
96abb968 1108 return cpuhp_invoke_callback(cpu, state, bringup, node, NULL);
6a4e2451 1109
4dddfb5f
PZ
1110 st->rollback = false;
1111 st->last = NULL;
1112
1113 st->node = node;
1114 st->bringup = bringup;
4cb28ced 1115 st->cb_state = state;
a724632c 1116 st->single = true;
a724632c 1117
4dddfb5f 1118 __cpuhp_kick_ap(st);
4cb28ced 1119
4cb28ced 1120 /*
4dddfb5f 1121 * If we failed and did a partial, do a rollback.
4cb28ced 1122 */
4dddfb5f
PZ
1123 if ((ret = st->result) && st->last) {
1124 st->rollback = true;
1125 st->bringup = !bringup;
1126
1127 __cpuhp_kick_ap(st);
1128 }
1129
1f7c70d6
TG
1130 /*
1131 * Clean up the leftovers so the next hotplug operation wont use stale
1132 * data.
1133 */
1134 st->node = st->last = NULL;
4dddfb5f 1135 return ret;
1cf4f629
TG
1136}
1137
1138static int cpuhp_kick_ap_work(unsigned int cpu)
1139{
1140 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
4dddfb5f
PZ
1141 enum cpuhp_state prev_state = st->state;
1142 int ret;
1cf4f629 1143
5f4b55e1
PZ
1144 cpuhp_lock_acquire(false);
1145 cpuhp_lock_release(false);
1146
1147 cpuhp_lock_acquire(true);
1148 cpuhp_lock_release(true);
4dddfb5f
PZ
1149
1150 trace_cpuhp_enter(cpu, st->target, prev_state, cpuhp_kick_ap_work);
b7ba6d8d 1151 ret = cpuhp_kick_ap(cpu, st, st->target);
4dddfb5f
PZ
1152 trace_cpuhp_exit(cpu, st->state, prev_state, ret);
1153
1154 return ret;
4cb28ced
TG
1155}
1156
1157static struct smp_hotplug_thread cpuhp_threads = {
1158 .store = &cpuhp_state.thread,
4cb28ced
TG
1159 .thread_should_run = cpuhp_should_run,
1160 .thread_fn = cpuhp_thread_fun,
1161 .thread_comm = "cpuhp/%u",
1162 .selfparking = true,
1163};
1164
d308077e
SP
1165static __init void cpuhp_init_state(void)
1166{
1167 struct cpuhp_cpu_state *st;
1168 int cpu;
1169
1170 for_each_possible_cpu(cpu) {
1171 st = per_cpu_ptr(&cpuhp_state, cpu);
1172 init_completion(&st->done_up);
1173 init_completion(&st->done_down);
1174 }
1175}
1176
4cb28ced
TG
1177void __init cpuhp_threads_init(void)
1178{
d308077e 1179 cpuhp_init_state();
4cb28ced
TG
1180 BUG_ON(smpboot_register_percpu_thread(&cpuhp_threads));
1181 kthread_unpark(this_cpu_read(cpuhp_state.thread));
1182}
1183
b22afcdf
TG
1184/*
1185 *
1186 * Serialize hotplug trainwrecks outside of the cpu_hotplug_lock
1187 * protected region.
1188 *
1189 * The operation is still serialized against concurrent CPU hotplug via
1190 * cpu_add_remove_lock, i.e. CPU map protection. But it is _not_
1191 * serialized against other hotplug related activity like adding or
1192 * removing of state callbacks and state instances, which invoke either the
1193 * startup or the teardown callback of the affected state.
1194 *
1195 * This is required for subsystems which are unfixable vs. CPU hotplug and
1196 * evade lock inversion problems by scheduling work which has to be
1197 * completed _before_ cpu_up()/_cpu_down() returns.
1198 *
1199 * Don't even think about adding anything to this for any new code or even
1200 * drivers. It's only purpose is to keep existing lock order trainwrecks
1201 * working.
1202 *
1203 * For cpu_down() there might be valid reasons to finish cleanups which are
1204 * not required to be done under cpu_hotplug_lock, but that's a different
1205 * story and would be not invoked via this.
1206 */
1207static void cpu_up_down_serialize_trainwrecks(bool tasks_frozen)
1208{
1209 /*
1210 * cpusets delegate hotplug operations to a worker to "solve" the
1211 * lock order problems. Wait for the worker, but only if tasks are
1212 * _not_ frozen (suspend, hibernate) as that would wait forever.
1213 *
1214 * The wait is required because otherwise the hotplug operation
1215 * returns with inconsistent state, which could even be observed in
1216 * user space when a new CPU is brought up. The CPU plug uevent
1217 * would be delivered and user space reacting on it would fail to
1218 * move tasks to the newly plugged CPU up to the point where the
1219 * work has finished because up to that point the newly plugged CPU
1220 * is not assignable in cpusets/cgroups. On unplug that's not
1221 * necessarily a visible issue, but it is still inconsistent state,
1222 * which is the real problem which needs to be "fixed". This can't
1223 * prevent the transient state between scheduling the work and
1224 * returning from waiting for it.
1225 */
1226 if (!tasks_frozen)
1227 cpuset_wait_for_hotplug();
1228}
1229
777c6e0d 1230#ifdef CONFIG_HOTPLUG_CPU
8ff00399
NP
1231#ifndef arch_clear_mm_cpumask_cpu
1232#define arch_clear_mm_cpumask_cpu(cpu, mm) cpumask_clear_cpu(cpu, mm_cpumask(mm))
1233#endif
1234
e4cc2f87
AV
1235/**
1236 * clear_tasks_mm_cpumask - Safely clear tasks' mm_cpumask for a CPU
1237 * @cpu: a CPU id
1238 *
1239 * This function walks all processes, finds a valid mm struct for each one and
1240 * then clears a corresponding bit in mm's cpumask. While this all sounds
1241 * trivial, there are various non-obvious corner cases, which this function
1242 * tries to solve in a safe manner.
1243 *
1244 * Also note that the function uses a somewhat relaxed locking scheme, so it may
1245 * be called only for an already offlined CPU.
1246 */
cb79295e
AV
1247void clear_tasks_mm_cpumask(int cpu)
1248{
1249 struct task_struct *p;
1250
1251 /*
1252 * This function is called after the cpu is taken down and marked
1253 * offline, so its not like new tasks will ever get this cpu set in
1254 * their mm mask. -- Peter Zijlstra
1255 * Thus, we may use rcu_read_lock() here, instead of grabbing
1256 * full-fledged tasklist_lock.
1257 */
e4cc2f87 1258 WARN_ON(cpu_online(cpu));
cb79295e
AV
1259 rcu_read_lock();
1260 for_each_process(p) {
1261 struct task_struct *t;
1262
e4cc2f87
AV
1263 /*
1264 * Main thread might exit, but other threads may still have
1265 * a valid mm. Find one.
1266 */
cb79295e
AV
1267 t = find_lock_task_mm(p);
1268 if (!t)
1269 continue;
8ff00399 1270 arch_clear_mm_cpumask_cpu(cpu, t->mm);
cb79295e
AV
1271 task_unlock(t);
1272 }
1273 rcu_read_unlock();
1274}
1275
1da177e4 1276/* Take this CPU down. */
71cf5aee 1277static int take_cpu_down(void *_param)
1da177e4 1278{
4baa0afc
TG
1279 struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
1280 enum cpuhp_state target = max((int)st->target, CPUHP_AP_OFFLINE);
090e77c3 1281 int err, cpu = smp_processor_id();
1da177e4 1282
1da177e4
LT
1283 /* Ensure this CPU doesn't handle any more interrupts. */
1284 err = __cpu_disable();
1285 if (err < 0)
f3705136 1286 return err;
1da177e4 1287
a724632c 1288 /*
453e4108
VD
1289 * Must be called from CPUHP_TEARDOWN_CPU, which means, as we are going
1290 * down, that the current state is CPUHP_TEARDOWN_CPU - 1.
a724632c 1291 */
453e4108
VD
1292 WARN_ON(st->state != (CPUHP_TEARDOWN_CPU - 1));
1293
453e4108 1294 /*
6f855b39 1295 * Invoke the former CPU_DYING callbacks. DYING must not fail!
453e4108 1296 */
6f855b39 1297 cpuhp_invoke_callback_range_nofail(false, cpu, st, target);
4baa0afc 1298
52c063d1
TG
1299 /* Give up timekeeping duties */
1300 tick_handover_do_timer();
1b72d432
TG
1301 /* Remove CPU from timer broadcasting */
1302 tick_offline_cpu(cpu);
14e568e7 1303 /* Park the stopper thread */
090e77c3 1304 stop_machine_park(cpu);
f3705136 1305 return 0;
1da177e4
LT
1306}
1307
98458172 1308static int takedown_cpu(unsigned int cpu)
1da177e4 1309{
e69aab13 1310 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
98458172 1311 int err;
1da177e4 1312
2a58c527 1313 /* Park the smpboot threads */
13070833 1314 kthread_park(st->thread);
1cf4f629 1315
6acce3ef 1316 /*
a8994181
TG
1317 * Prevent irq alloc/free while the dying cpu reorganizes the
1318 * interrupt affinities.
6acce3ef 1319 */
a8994181 1320 irq_lock_sparse();
6acce3ef 1321
a8994181
TG
1322 /*
1323 * So now all preempt/rcu users must observe !cpu_active().
1324 */
210e2133 1325 err = stop_machine_cpuslocked(take_cpu_down, NULL, cpumask_of(cpu));
04321587 1326 if (err) {
3b9d6da6 1327 /* CPU refused to die */
a8994181 1328 irq_unlock_sparse();
3b9d6da6 1329 /* Unpark the hotplug thread so we can rollback there */
13070833 1330 kthread_unpark(st->thread);
98458172 1331 return err;
8fa1d7d3 1332 }
04321587 1333 BUG_ON(cpu_online(cpu));
1da177e4 1334
48c5ccae 1335 /*
5b1ead68
BJ
1336 * The teardown callback for CPUHP_AP_SCHED_STARTING will have removed
1337 * all runnable tasks from the CPU, there's only the idle task left now
48c5ccae 1338 * that the migration thread is done doing the stop_machine thing.
51a96c77
PZ
1339 *
1340 * Wait for the stop thread to go away.
48c5ccae 1341 */
5ebe7742 1342 wait_for_ap_thread(st, false);
e69aab13 1343 BUG_ON(st->state != CPUHP_AP_IDLE_DEAD);
1da177e4 1344
a8994181
TG
1345 /* Interrupts are moved away from the dying cpu, reenable alloc/free */
1346 irq_unlock_sparse();
1347
345527b1 1348 hotplug_cpu__broadcast_tick_pull(cpu);
1da177e4
LT
1349 /* This actually kills the CPU. */
1350 __cpu_die(cpu);
1351
6f062123
TG
1352 cpuhp_bp_sync_dead(cpu);
1353
a49b116d 1354 tick_cleanup_dead_cpu(cpu);
a58163d8 1355 rcutree_migrate_callbacks(cpu);
98458172
TG
1356 return 0;
1357}
1da177e4 1358
71f87b2f
TG
1359static void cpuhp_complete_idle_dead(void *arg)
1360{
1361 struct cpuhp_cpu_state *st = arg;
1362
5ebe7742 1363 complete_ap_thread(st, false);
71f87b2f
TG
1364}
1365
e69aab13
TG
1366void cpuhp_report_idle_dead(void)
1367{
1368 struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
1369
1370 BUG_ON(st->state != CPUHP_AP_OFFLINE);
27d50c7e 1371 rcu_report_dead(smp_processor_id());
71f87b2f
TG
1372 st->state = CPUHP_AP_IDLE_DEAD;
1373 /*
1374 * We cannot call complete after rcu_report_dead() so we delegate it
1375 * to an online cpu.
1376 */
1377 smp_call_function_single(cpumask_first(cpu_online_mask),
1378 cpuhp_complete_idle_dead, st, 0);
e69aab13
TG
1379}
1380
4dddfb5f
PZ
1381static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
1382 enum cpuhp_state target)
1383{
1384 enum cpuhp_state prev_state = st->state;
1385 int ret = 0;
1386
453e4108
VD
1387 ret = cpuhp_invoke_callback_range(false, cpu, st, target);
1388 if (ret) {
ebca71a8
DZ
1389 pr_debug("CPU DOWN failed (%d) CPU %u state %s (%d)\n",
1390 ret, cpu, cpuhp_get_step(st->state)->name,
1391 st->state);
453e4108 1392
b7ba6d8d 1393 cpuhp_reset_state(cpu, st, prev_state);
453e4108
VD
1394
1395 if (st->state < prev_state)
1396 WARN_ON(cpuhp_invoke_callback_range(true, cpu, st,
1397 prev_state));
4dddfb5f 1398 }
453e4108 1399
4dddfb5f
PZ
1400 return ret;
1401}
cff7d378 1402
98458172 1403/* Requires cpu_add_remove_lock to be held */
af1f4045
TG
1404static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
1405 enum cpuhp_state target)
98458172 1406{
cff7d378
TG
1407 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1408 int prev_state, ret = 0;
98458172
TG
1409
1410 if (num_online_cpus() == 1)
1411 return -EBUSY;
1412
757c989b 1413 if (!cpu_present(cpu))
98458172
TG
1414 return -EINVAL;
1415
8f553c49 1416 cpus_write_lock();
98458172
TG
1417
1418 cpuhp_tasks_frozen = tasks_frozen;
1419
b7ba6d8d 1420 prev_state = cpuhp_set_state(cpu, st, target);
1cf4f629
TG
1421 /*
1422 * If the current CPU state is in the range of the AP hotplug thread,
1423 * then we need to kick the thread.
1424 */
8df3e07e 1425 if (st->state > CPUHP_TEARDOWN_CPU) {
4dddfb5f 1426 st->target = max((int)target, CPUHP_TEARDOWN_CPU);
1cf4f629
TG
1427 ret = cpuhp_kick_ap_work(cpu);
1428 /*
1429 * The AP side has done the error rollback already. Just
1430 * return the error code..
1431 */
1432 if (ret)
1433 goto out;
1434
1435 /*
1436 * We might have stopped still in the range of the AP hotplug
1437 * thread. Nothing to do anymore.
1438 */
8df3e07e 1439 if (st->state > CPUHP_TEARDOWN_CPU)
1cf4f629 1440 goto out;
4dddfb5f
PZ
1441
1442 st->target = target;
1cf4f629
TG
1443 }
1444 /*
8df3e07e 1445 * The AP brought itself down to CPUHP_TEARDOWN_CPU. So we need
1cf4f629
TG
1446 * to do the further cleanups.
1447 */
a724632c 1448 ret = cpuhp_down_callbacks(cpu, st, target);
62f25069
VD
1449 if (ret && st->state < prev_state) {
1450 if (st->state == CPUHP_TEARDOWN_CPU) {
b7ba6d8d 1451 cpuhp_reset_state(cpu, st, prev_state);
62f25069
VD
1452 __cpuhp_kick_ap(st);
1453 } else {
1454 WARN(1, "DEAD callback error for CPU%d", cpu);
1455 }
3b9d6da6 1456 }
98458172 1457
1cf4f629 1458out:
8f553c49 1459 cpus_write_unlock();
941154bd
TG
1460 /*
1461 * Do post unplug cleanup. This is still protected against
1462 * concurrent CPU hotplug via cpu_add_remove_lock.
1463 */
1464 lockup_detector_cleanup();
a74cfffb 1465 arch_smt_update();
b22afcdf 1466 cpu_up_down_serialize_trainwrecks(tasks_frozen);
cff7d378 1467 return ret;
e3920fb4
RW
1468}
1469
cc1fe215
TG
1470static int cpu_down_maps_locked(unsigned int cpu, enum cpuhp_state target)
1471{
bae1a962
KS
1472 /*
1473 * If the platform does not support hotplug, report it explicitly to
1474 * differentiate it from a transient offlining failure.
1475 */
1476 if (cc_platform_has(CC_ATTR_HOTPLUG_DISABLED))
1477 return -EOPNOTSUPP;
cc1fe215
TG
1478 if (cpu_hotplug_disabled)
1479 return -EBUSY;
1480 return _cpu_down(cpu, 0, target);
1481}
1482
33c3736e 1483static int cpu_down(unsigned int cpu, enum cpuhp_state target)
e3920fb4 1484{
9ea09af3 1485 int err;
e3920fb4 1486
d221938c 1487 cpu_maps_update_begin();
cc1fe215 1488 err = cpu_down_maps_locked(cpu, target);
d221938c 1489 cpu_maps_update_done();
1da177e4
LT
1490 return err;
1491}
4dddfb5f 1492
33c3736e
QY
1493/**
1494 * cpu_device_down - Bring down a cpu device
1495 * @dev: Pointer to the cpu device to offline
1496 *
1497 * This function is meant to be used by device core cpu subsystem only.
1498 *
1499 * Other subsystems should use remove_cpu() instead.
11bc021d
RD
1500 *
1501 * Return: %0 on success or a negative errno code
33c3736e
QY
1502 */
1503int cpu_device_down(struct device *dev)
af1f4045 1504{
33c3736e 1505 return cpu_down(dev->id, CPUHP_OFFLINE);
af1f4045 1506}
4dddfb5f 1507
93ef1429
QY
1508int remove_cpu(unsigned int cpu)
1509{
1510 int ret;
1511
1512 lock_device_hotplug();
1513 ret = device_offline(get_cpu_device(cpu));
1514 unlock_device_hotplug();
1515
1516 return ret;
1517}
1518EXPORT_SYMBOL_GPL(remove_cpu);
1519
0441a559
QY
1520void smp_shutdown_nonboot_cpus(unsigned int primary_cpu)
1521{
1522 unsigned int cpu;
1523 int error;
1524
1525 cpu_maps_update_begin();
1526
1527 /*
1528 * Make certain the cpu I'm about to reboot on is online.
1529 *
1530 * This is inline to what migrate_to_reboot_cpu() already do.
1531 */
1532 if (!cpu_online(primary_cpu))
1533 primary_cpu = cpumask_first(cpu_online_mask);
1534
1535 for_each_online_cpu(cpu) {
1536 if (cpu == primary_cpu)
1537 continue;
1538
1539 error = cpu_down_maps_locked(cpu, CPUHP_OFFLINE);
1540 if (error) {
1541 pr_err("Failed to offline CPU%d - error=%d",
1542 cpu, error);
1543 break;
1544 }
1545 }
1546
1547 /*
1548 * Ensure all but the reboot CPU are offline.
1549 */
1550 BUG_ON(num_online_cpus() > 1);
1551
1552 /*
1553 * Make sure the CPUs won't be enabled by someone else after this
1554 * point. Kexec will reboot to a new kernel shortly resetting
1555 * everything along the way.
1556 */
1557 cpu_hotplug_disabled++;
1558
1559 cpu_maps_update_done();
af1f4045 1560}
4dddfb5f
PZ
1561
1562#else
1563#define takedown_cpu NULL
1da177e4
LT
1564#endif /*CONFIG_HOTPLUG_CPU*/
1565
4baa0afc 1566/**
ee1e714b 1567 * notify_cpu_starting(cpu) - Invoke the callbacks on the starting CPU
4baa0afc
TG
1568 * @cpu: cpu that just started
1569 *
4baa0afc
TG
1570 * It must be called by the arch code on the new cpu, before the new cpu
1571 * enables interrupts and before the "boot" cpu returns from __cpu_up().
1572 */
1573void notify_cpu_starting(unsigned int cpu)
1574{
1575 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1576 enum cpuhp_state target = min((int)st->target, CPUHP_AP_ONLINE);
1577
0c6d4576 1578 rcu_cpu_starting(cpu); /* Enables RCU usage on this CPU. */
e797bda3 1579 cpumask_set_cpu(cpu, &cpus_booted_once_mask);
453e4108
VD
1580
1581 /*
1582 * STARTING must not fail!
1583 */
6f855b39 1584 cpuhp_invoke_callback_range_nofail(true, cpu, st, target);
4baa0afc
TG
1585}
1586
949338e3 1587/*
9cd4f1a4 1588 * Called from the idle task. Wake up the controlling task which brings the
45178ac0
PZ
1589 * hotplug thread of the upcoming CPU up and then delegates the rest of the
1590 * online bringup to the hotplug thread.
949338e3 1591 */
8df3e07e 1592void cpuhp_online_idle(enum cpuhp_state state)
949338e3 1593{
8df3e07e 1594 struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
8df3e07e
TG
1595
1596 /* Happens for the boot cpu */
1597 if (state != CPUHP_AP_ONLINE_IDLE)
1598 return;
1599
6f062123
TG
1600 cpuhp_ap_update_sync_state(SYNC_STATE_ONLINE);
1601
45178ac0 1602 /*
6f062123 1603 * Unpark the stopper thread before we start the idle loop (and start
45178ac0
PZ
1604 * scheduling); this ensures the stopper task is always available.
1605 */
1606 stop_machine_unpark(smp_processor_id());
1607
8df3e07e 1608 st->state = CPUHP_AP_ONLINE_IDLE;
5ebe7742 1609 complete_ap_thread(st, true);
949338e3
TG
1610}
1611
e3920fb4 1612/* Requires cpu_add_remove_lock to be held */
af1f4045 1613static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
1da177e4 1614{
cff7d378 1615 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
3bb5d2ee 1616 struct task_struct *idle;
2e1a3483 1617 int ret = 0;
1da177e4 1618
8f553c49 1619 cpus_write_lock();
38498a67 1620
757c989b 1621 if (!cpu_present(cpu)) {
5e5041f3
YI
1622 ret = -EINVAL;
1623 goto out;
1624 }
1625
757c989b 1626 /*
33c3736e
QY
1627 * The caller of cpu_up() might have raced with another
1628 * caller. Nothing to do.
757c989b
TG
1629 */
1630 if (st->state >= target)
38498a67 1631 goto out;
757c989b
TG
1632
1633 if (st->state == CPUHP_OFFLINE) {
1634 /* Let it fail before we try to bring the cpu up */
1635 idle = idle_thread_get(cpu);
1636 if (IS_ERR(idle)) {
1637 ret = PTR_ERR(idle);
1638 goto out;
1639 }
6d712b9b
DW
1640
1641 /*
1642 * Reset stale stack state from the last time this CPU was online.
1643 */
1644 scs_task_reset(idle);
1645 kasan_unpoison_task_stack(idle);
3bb5d2ee 1646 }
38498a67 1647
ba997462
TG
1648 cpuhp_tasks_frozen = tasks_frozen;
1649
b7ba6d8d 1650 cpuhp_set_state(cpu, st, target);
1cf4f629
TG
1651 /*
1652 * If the current CPU state is in the range of the AP hotplug thread,
1653 * then we need to kick the thread once more.
1654 */
8df3e07e 1655 if (st->state > CPUHP_BRINGUP_CPU) {
1cf4f629
TG
1656 ret = cpuhp_kick_ap_work(cpu);
1657 /*
1658 * The AP side has done the error rollback already. Just
1659 * return the error code..
1660 */
1661 if (ret)
1662 goto out;
1663 }
1664
1665 /*
1666 * Try to reach the target state. We max out on the BP at
8df3e07e 1667 * CPUHP_BRINGUP_CPU. After that the AP hotplug thread is
1cf4f629
TG
1668 * responsible for bringing it up to the target state.
1669 */
8df3e07e 1670 target = min((int)target, CPUHP_BRINGUP_CPU);
a724632c 1671 ret = cpuhp_up_callbacks(cpu, st, target);
38498a67 1672out:
8f553c49 1673 cpus_write_unlock();
a74cfffb 1674 arch_smt_update();
b22afcdf 1675 cpu_up_down_serialize_trainwrecks(tasks_frozen);
e3920fb4
RW
1676 return ret;
1677}
1678
33c3736e 1679static int cpu_up(unsigned int cpu, enum cpuhp_state target)
e3920fb4
RW
1680{
1681 int err = 0;
cf23422b 1682
e0b582ec 1683 if (!cpu_possible(cpu)) {
84117da5
FF
1684 pr_err("can't online cpu %d because it is not configured as may-hotadd at boot time\n",
1685 cpu);
87d5e023 1686#if defined(CONFIG_IA64)
84117da5 1687 pr_err("please check additional_cpus= boot parameter\n");
73e753a5
KH
1688#endif
1689 return -EINVAL;
1690 }
e3920fb4 1691
01b0f197
TK
1692 err = try_online_node(cpu_to_node(cpu));
1693 if (err)
1694 return err;
cf23422b 1695
d221938c 1696 cpu_maps_update_begin();
e761b772
MK
1697
1698 if (cpu_hotplug_disabled) {
e3920fb4 1699 err = -EBUSY;
e761b772
MK
1700 goto out;
1701 }
05736e4a
TG
1702 if (!cpu_smt_allowed(cpu)) {
1703 err = -EPERM;
1704 goto out;
1705 }
e761b772 1706
af1f4045 1707 err = _cpu_up(cpu, 0, target);
e761b772 1708out:
d221938c 1709 cpu_maps_update_done();
e3920fb4
RW
1710 return err;
1711}
af1f4045 1712
33c3736e
QY
1713/**
1714 * cpu_device_up - Bring up a cpu device
1715 * @dev: Pointer to the cpu device to online
1716 *
1717 * This function is meant to be used by device core cpu subsystem only.
1718 *
1719 * Other subsystems should use add_cpu() instead.
11bc021d
RD
1720 *
1721 * Return: %0 on success or a negative errno code
33c3736e
QY
1722 */
1723int cpu_device_up(struct device *dev)
af1f4045 1724{
33c3736e 1725 return cpu_up(dev->id, CPUHP_ONLINE);
af1f4045 1726}
e3920fb4 1727
93ef1429
QY
1728int add_cpu(unsigned int cpu)
1729{
1730 int ret;
1731
1732 lock_device_hotplug();
1733 ret = device_online(get_cpu_device(cpu));
1734 unlock_device_hotplug();
1735
1736 return ret;
1737}
1738EXPORT_SYMBOL_GPL(add_cpu);
1739
d720f986
QY
1740/**
1741 * bringup_hibernate_cpu - Bring up the CPU that we hibernated on
1742 * @sleep_cpu: The cpu we hibernated on and should be brought up.
1743 *
1744 * On some architectures like arm64, we can hibernate on any CPU, but on
1745 * wake up the CPU we hibernated on might be offline as a side effect of
1746 * using maxcpus= for example.
11bc021d
RD
1747 *
1748 * Return: %0 on success or a negative errno code
d720f986
QY
1749 */
1750int bringup_hibernate_cpu(unsigned int sleep_cpu)
af1f4045 1751{
d720f986
QY
1752 int ret;
1753
1754 if (!cpu_online(sleep_cpu)) {
1755 pr_info("Hibernated on a CPU that is offline! Bringing CPU up.\n");
33c3736e 1756 ret = cpu_up(sleep_cpu, CPUHP_ONLINE);
d720f986
QY
1757 if (ret) {
1758 pr_err("Failed to bring hibernate-CPU up!\n");
1759 return ret;
1760 }
1761 }
1762 return 0;
1763}
1764
18415f33
TG
1765static void __init cpuhp_bringup_mask(const struct cpumask *mask, unsigned int ncpus,
1766 enum cpuhp_state target)
b99a2659
QY
1767{
1768 unsigned int cpu;
1769
18415f33
TG
1770 for_each_cpu(cpu, mask) {
1771 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1772
18415f33
TG
1773 if (cpu_up(cpu, target) && can_rollback_cpu(st)) {
1774 /*
1775 * If this failed then cpu_up() might have only
1776 * rolled back to CPUHP_BP_KICK_AP for the final
1777 * online. Clean it up. NOOP if already rolled back.
1778 */
1779 WARN_ON(cpuhp_invoke_callback_range(false, cpu, st, CPUHP_OFFLINE));
1780 }
06c6796e
TG
1781
1782 if (!--ncpus)
1783 break;
b99a2659 1784 }
af1f4045 1785}
e3920fb4 1786
18415f33
TG
1787#ifdef CONFIG_HOTPLUG_PARALLEL
1788static bool __cpuhp_parallel_bringup __ro_after_init = true;
1789
1790static int __init parallel_bringup_parse_param(char *arg)
1791{
1792 return kstrtobool(arg, &__cpuhp_parallel_bringup);
1793}
1794early_param("cpuhp.parallel", parallel_bringup_parse_param);
1795
1796/*
1797 * On architectures which have enabled parallel bringup this invokes all BP
1798 * prepare states for each of the to be onlined APs first. The last state
1799 * sends the startup IPI to the APs. The APs proceed through the low level
1800 * bringup code in parallel and then wait for the control CPU to release
1801 * them one by one for the final onlining procedure.
1802 *
1803 * This avoids waiting for each AP to respond to the startup IPI in
1804 * CPUHP_BRINGUP_CPU.
1805 */
1806static bool __init cpuhp_bringup_cpus_parallel(unsigned int ncpus)
1807{
1808 const struct cpumask *mask = cpu_present_mask;
1809
1810 if (__cpuhp_parallel_bringup)
1811 __cpuhp_parallel_bringup = arch_cpuhp_init_parallel_bringup();
1812 if (!__cpuhp_parallel_bringup)
1813 return false;
1814
1815 if (cpuhp_smt_aware()) {
1816 const struct cpumask *pmask = cpuhp_get_primary_thread_mask();
1817 static struct cpumask tmp_mask __initdata;
1818
1819 /*
1820 * X86 requires to prevent that SMT siblings stopped while
1821 * the primary thread does a microcode update for various
1822 * reasons. Bring the primary threads up first.
1823 */
1824 cpumask_and(&tmp_mask, mask, pmask);
1825 cpuhp_bringup_mask(&tmp_mask, ncpus, CPUHP_BP_KICK_AP);
1826 cpuhp_bringup_mask(&tmp_mask, ncpus, CPUHP_ONLINE);
1827 /* Account for the online CPUs */
1828 ncpus -= num_online_cpus();
1829 if (!ncpus)
1830 return true;
1831 /* Create the mask for secondary CPUs */
1832 cpumask_andnot(&tmp_mask, mask, pmask);
1833 mask = &tmp_mask;
1834 }
1835
1836 /* Bring the not-yet started CPUs up */
1837 cpuhp_bringup_mask(mask, ncpus, CPUHP_BP_KICK_AP);
1838 cpuhp_bringup_mask(mask, ncpus, CPUHP_ONLINE);
1839 return true;
1840}
1841#else
1842static inline bool cpuhp_bringup_cpus_parallel(unsigned int ncpus) { return false; }
1843#endif /* CONFIG_HOTPLUG_PARALLEL */
1844
1845void __init bringup_nonboot_cpus(unsigned int setup_max_cpus)
1846{
1847 /* Try parallel bringup optimization if enabled */
1848 if (cpuhp_bringup_cpus_parallel(setup_max_cpus))
1849 return;
1850
1851 /* Full per CPU serialized bringup */
1852 cpuhp_bringup_mask(cpu_present_mask, setup_max_cpus, CPUHP_ONLINE);
1853}
1854
f3de4be9 1855#ifdef CONFIG_PM_SLEEP_SMP
e0b582ec 1856static cpumask_var_t frozen_cpus;
e3920fb4 1857
fb7fb84a 1858int freeze_secondary_cpus(int primary)
e3920fb4 1859{
d391e552 1860 int cpu, error = 0;
e3920fb4 1861
d221938c 1862 cpu_maps_update_begin();
9ca12ac0 1863 if (primary == -1) {
d391e552 1864 primary = cpumask_first(cpu_online_mask);
04d4e665
FW
1865 if (!housekeeping_cpu(primary, HK_TYPE_TIMER))
1866 primary = housekeeping_any_cpu(HK_TYPE_TIMER);
9ca12ac0
NP
1867 } else {
1868 if (!cpu_online(primary))
1869 primary = cpumask_first(cpu_online_mask);
1870 }
1871
9ee349ad
XF
1872 /*
1873 * We take down all of the non-boot CPUs in one shot to avoid races
e3920fb4
RW
1874 * with the userspace trying to use the CPU hotplug at the same time
1875 */
e0b582ec 1876 cpumask_clear(frozen_cpus);
6ad4c188 1877
84117da5 1878 pr_info("Disabling non-boot CPUs ...\n");
e3920fb4 1879 for_each_online_cpu(cpu) {
d391e552 1880 if (cpu == primary)
e3920fb4 1881 continue;
a66d955e 1882
fb7fb84a 1883 if (pm_wakeup_pending()) {
a66d955e
PK
1884 pr_info("Wakeup pending. Abort CPU freeze\n");
1885 error = -EBUSY;
1886 break;
1887 }
1888
bb3632c6 1889 trace_suspend_resume(TPS("CPU_OFF"), cpu, true);
af1f4045 1890 error = _cpu_down(cpu, 1, CPUHP_OFFLINE);
bb3632c6 1891 trace_suspend_resume(TPS("CPU_OFF"), cpu, false);
feae3203 1892 if (!error)
e0b582ec 1893 cpumask_set_cpu(cpu, frozen_cpus);
feae3203 1894 else {
84117da5 1895 pr_err("Error taking CPU%d down: %d\n", cpu, error);
e3920fb4
RW
1896 break;
1897 }
1898 }
86886e55 1899
89af7ba5 1900 if (!error)
e3920fb4 1901 BUG_ON(num_online_cpus() > 1);
89af7ba5 1902 else
84117da5 1903 pr_err("Non-boot CPUs are not disabled\n");
89af7ba5
VK
1904
1905 /*
1906 * Make sure the CPUs won't be enabled by someone else. We need to do
56555855
QY
1907 * this even in case of failure as all freeze_secondary_cpus() users are
1908 * supposed to do thaw_secondary_cpus() on the failure path.
89af7ba5
VK
1909 */
1910 cpu_hotplug_disabled++;
1911
d221938c 1912 cpu_maps_update_done();
e3920fb4
RW
1913 return error;
1914}
1915
56555855 1916void __weak arch_thaw_secondary_cpus_begin(void)
d0af9eed
SS
1917{
1918}
1919
56555855 1920void __weak arch_thaw_secondary_cpus_end(void)
d0af9eed
SS
1921{
1922}
1923
56555855 1924void thaw_secondary_cpus(void)
e3920fb4
RW
1925{
1926 int cpu, error;
1927
1928 /* Allow everyone to use the CPU hotplug again */
d221938c 1929 cpu_maps_update_begin();
01b41159 1930 __cpu_hotplug_enable();
e0b582ec 1931 if (cpumask_empty(frozen_cpus))
1d64b9cb 1932 goto out;
e3920fb4 1933
84117da5 1934 pr_info("Enabling non-boot CPUs ...\n");
d0af9eed 1935
56555855 1936 arch_thaw_secondary_cpus_begin();
d0af9eed 1937
e0b582ec 1938 for_each_cpu(cpu, frozen_cpus) {
bb3632c6 1939 trace_suspend_resume(TPS("CPU_ON"), cpu, true);
af1f4045 1940 error = _cpu_up(cpu, 1, CPUHP_ONLINE);
bb3632c6 1941 trace_suspend_resume(TPS("CPU_ON"), cpu, false);
e3920fb4 1942 if (!error) {
84117da5 1943 pr_info("CPU%d is up\n", cpu);
e3920fb4
RW
1944 continue;
1945 }
84117da5 1946 pr_warn("Error taking CPU%d up: %d\n", cpu, error);
e3920fb4 1947 }
d0af9eed 1948
56555855 1949 arch_thaw_secondary_cpus_end();
d0af9eed 1950
e0b582ec 1951 cpumask_clear(frozen_cpus);
1d64b9cb 1952out:
d221938c 1953 cpu_maps_update_done();
1da177e4 1954}
e0b582ec 1955
d7268a31 1956static int __init alloc_frozen_cpus(void)
e0b582ec
RR
1957{
1958 if (!alloc_cpumask_var(&frozen_cpus, GFP_KERNEL|__GFP_ZERO))
1959 return -ENOMEM;
1960 return 0;
1961}
1962core_initcall(alloc_frozen_cpus);
79cfbdfa 1963
79cfbdfa
SB
1964/*
1965 * When callbacks for CPU hotplug notifications are being executed, we must
1966 * ensure that the state of the system with respect to the tasks being frozen
1967 * or not, as reported by the notification, remains unchanged *throughout the
1968 * duration* of the execution of the callbacks.
1969 * Hence we need to prevent the freezer from racing with regular CPU hotplug.
1970 *
1971 * This synchronization is implemented by mutually excluding regular CPU
1972 * hotplug and Suspend/Hibernate call paths by hooking onto the Suspend/
1973 * Hibernate notifications.
1974 */
1975static int
1976cpu_hotplug_pm_callback(struct notifier_block *nb,
1977 unsigned long action, void *ptr)
1978{
1979 switch (action) {
1980
1981 case PM_SUSPEND_PREPARE:
1982 case PM_HIBERNATION_PREPARE:
16e53dbf 1983 cpu_hotplug_disable();
79cfbdfa
SB
1984 break;
1985
1986 case PM_POST_SUSPEND:
1987 case PM_POST_HIBERNATION:
16e53dbf 1988 cpu_hotplug_enable();
79cfbdfa
SB
1989 break;
1990
1991 default:
1992 return NOTIFY_DONE;
1993 }
1994
1995 return NOTIFY_OK;
1996}
1997
1998
d7268a31 1999static int __init cpu_hotplug_pm_sync_init(void)
79cfbdfa 2000{
6e32d479
FY
2001 /*
2002 * cpu_hotplug_pm_callback has higher priority than x86
2003 * bsp_pm_callback which depends on cpu_hotplug_pm_callback
2004 * to disable cpu hotplug to avoid cpu hotplug race.
2005 */
79cfbdfa
SB
2006 pm_notifier(cpu_hotplug_pm_callback, 0);
2007 return 0;
2008}
2009core_initcall(cpu_hotplug_pm_sync_init);
2010
f3de4be9 2011#endif /* CONFIG_PM_SLEEP_SMP */
68f4f1ec 2012
8ce371f9
PZ
2013int __boot_cpu_id;
2014
68f4f1ec 2015#endif /* CONFIG_SMP */
b8d317d1 2016
cff7d378 2017/* Boot processor state steps */
17a2f1ce 2018static struct cpuhp_step cpuhp_hp_states[] = {
cff7d378
TG
2019 [CPUHP_OFFLINE] = {
2020 .name = "offline",
3c1627e9
TG
2021 .startup.single = NULL,
2022 .teardown.single = NULL,
cff7d378
TG
2023 },
2024#ifdef CONFIG_SMP
2025 [CPUHP_CREATE_THREADS]= {
677f6646 2026 .name = "threads:prepare",
3c1627e9
TG
2027 .startup.single = smpboot_create_threads,
2028 .teardown.single = NULL,
757c989b 2029 .cant_stop = true,
cff7d378 2030 },
00e16c3d 2031 [CPUHP_PERF_PREPARE] = {
3c1627e9
TG
2032 .name = "perf:prepare",
2033 .startup.single = perf_event_init_cpu,
2034 .teardown.single = perf_event_exit_cpu,
00e16c3d 2035 },
3191dd5a
JD
2036 [CPUHP_RANDOM_PREPARE] = {
2037 .name = "random:prepare",
2038 .startup.single = random_prepare_cpu,
2039 .teardown.single = NULL,
2040 },
7ee681b2 2041 [CPUHP_WORKQUEUE_PREP] = {
3c1627e9
TG
2042 .name = "workqueue:prepare",
2043 .startup.single = workqueue_prepare_cpu,
2044 .teardown.single = NULL,
7ee681b2 2045 },
27590dc1 2046 [CPUHP_HRTIMERS_PREPARE] = {
3c1627e9
TG
2047 .name = "hrtimers:prepare",
2048 .startup.single = hrtimers_prepare_cpu,
2049 .teardown.single = hrtimers_dead_cpu,
27590dc1 2050 },
31487f83 2051 [CPUHP_SMPCFD_PREPARE] = {
677f6646 2052 .name = "smpcfd:prepare",
3c1627e9
TG
2053 .startup.single = smpcfd_prepare_cpu,
2054 .teardown.single = smpcfd_dead_cpu,
31487f83 2055 },
e6d4989a
RW
2056 [CPUHP_RELAY_PREPARE] = {
2057 .name = "relay:prepare",
2058 .startup.single = relay_prepare_cpu,
2059 .teardown.single = NULL,
2060 },
6731d4f1
SAS
2061 [CPUHP_SLAB_PREPARE] = {
2062 .name = "slab:prepare",
2063 .startup.single = slab_prepare_cpu,
2064 .teardown.single = slab_dead_cpu,
31487f83 2065 },
4df83742 2066 [CPUHP_RCUTREE_PREP] = {
677f6646 2067 .name = "RCU/tree:prepare",
3c1627e9
TG
2068 .startup.single = rcutree_prepare_cpu,
2069 .teardown.single = rcutree_dead_cpu,
4df83742 2070 },
4fae16df
RC
2071 /*
2072 * On the tear-down path, timers_dead_cpu() must be invoked
2073 * before blk_mq_queue_reinit_notify() from notify_dead(),
2074 * otherwise a RCU stall occurs.
2075 */
26456f87 2076 [CPUHP_TIMERS_PREPARE] = {
d018031f 2077 .name = "timers:prepare",
26456f87 2078 .startup.single = timers_prepare_cpu,
3c1627e9 2079 .teardown.single = timers_dead_cpu,
4fae16df 2080 },
a631be92
TG
2081
2082#ifdef CONFIG_HOTPLUG_SPLIT_STARTUP
2083 /*
2084 * Kicks the AP alive. AP will wait in cpuhp_ap_sync_alive() until
2085 * the next step will release it.
2086 */
2087 [CPUHP_BP_KICK_AP] = {
2088 .name = "cpu:kick_ap",
2089 .startup.single = cpuhp_kick_ap_alive,
2090 },
2091
2092 /*
2093 * Waits for the AP to reach cpuhp_ap_sync_alive() and then
2094 * releases it for the complete bringup.
2095 */
2096 [CPUHP_BRINGUP_CPU] = {
2097 .name = "cpu:bringup",
2098 .startup.single = cpuhp_bringup_ap,
2099 .teardown.single = finish_cpu,
2100 .cant_stop = true,
2101 },
2102#else
2103 /*
2104 * All-in-one CPU bringup state which includes the kick alive.
2105 */
cff7d378
TG
2106 [CPUHP_BRINGUP_CPU] = {
2107 .name = "cpu:bringup",
3c1627e9 2108 .startup.single = bringup_cpu,
bf2c59fc 2109 .teardown.single = finish_cpu,
757c989b 2110 .cant_stop = true,
4baa0afc 2111 },
a631be92 2112#endif
d10ef6f9
TG
2113 /* Final state before CPU kills itself */
2114 [CPUHP_AP_IDLE_DEAD] = {
2115 .name = "idle:dead",
2116 },
2117 /*
2118 * Last state before CPU enters the idle loop to die. Transient state
2119 * for synchronization.
2120 */
2121 [CPUHP_AP_OFFLINE] = {
2122 .name = "ap:offline",
2123 .cant_stop = true,
2124 },
9cf7243d
TG
2125 /* First state is scheduler control. Interrupts are disabled */
2126 [CPUHP_AP_SCHED_STARTING] = {
2127 .name = "sched:starting",
3c1627e9
TG
2128 .startup.single = sched_cpu_starting,
2129 .teardown.single = sched_cpu_dying,
9cf7243d 2130 },
4df83742 2131 [CPUHP_AP_RCUTREE_DYING] = {
677f6646 2132 .name = "RCU/tree:dying",
3c1627e9
TG
2133 .startup.single = NULL,
2134 .teardown.single = rcutree_dying_cpu,
4baa0afc 2135 },
46febd37
LJ
2136 [CPUHP_AP_SMPCFD_DYING] = {
2137 .name = "smpcfd:dying",
2138 .startup.single = NULL,
2139 .teardown.single = smpcfd_dying_cpu,
2140 },
d10ef6f9
TG
2141 /* Entry state on starting. Interrupts enabled from here on. Transient
2142 * state for synchronsization */
2143 [CPUHP_AP_ONLINE] = {
2144 .name = "ap:online",
2145 },
17a2f1ce 2146 /*
1cf12e08 2147 * Handled on control processor until the plugged processor manages
17a2f1ce
LJ
2148 * this itself.
2149 */
2150 [CPUHP_TEARDOWN_CPU] = {
2151 .name = "cpu:teardown",
2152 .startup.single = NULL,
2153 .teardown.single = takedown_cpu,
2154 .cant_stop = true,
2155 },
1cf12e08
TG
2156
2157 [CPUHP_AP_SCHED_WAIT_EMPTY] = {
2158 .name = "sched:waitempty",
2159 .startup.single = NULL,
2160 .teardown.single = sched_cpu_wait_empty,
2161 },
2162
d10ef6f9 2163 /* Handle smpboot threads park/unpark */
1cf4f629 2164 [CPUHP_AP_SMPBOOT_THREADS] = {
677f6646 2165 .name = "smpboot/threads:online",
3c1627e9 2166 .startup.single = smpboot_unpark_threads,
c4de6569 2167 .teardown.single = smpboot_park_threads,
1cf4f629 2168 },
c5cb83bb
TG
2169 [CPUHP_AP_IRQ_AFFINITY_ONLINE] = {
2170 .name = "irq/affinity:online",
2171 .startup.single = irq_affinity_online_cpu,
2172 .teardown.single = NULL,
2173 },
00e16c3d 2174 [CPUHP_AP_PERF_ONLINE] = {
3c1627e9
TG
2175 .name = "perf:online",
2176 .startup.single = perf_event_init_cpu,
2177 .teardown.single = perf_event_exit_cpu,
00e16c3d 2178 },
9cf57731
PZ
2179 [CPUHP_AP_WATCHDOG_ONLINE] = {
2180 .name = "lockup_detector:online",
2181 .startup.single = lockup_detector_online_cpu,
2182 .teardown.single = lockup_detector_offline_cpu,
2183 },
7ee681b2 2184 [CPUHP_AP_WORKQUEUE_ONLINE] = {
3c1627e9
TG
2185 .name = "workqueue:online",
2186 .startup.single = workqueue_online_cpu,
2187 .teardown.single = workqueue_offline_cpu,
7ee681b2 2188 },
3191dd5a
JD
2189 [CPUHP_AP_RANDOM_ONLINE] = {
2190 .name = "random:online",
2191 .startup.single = random_online_cpu,
2192 .teardown.single = NULL,
2193 },
4df83742 2194 [CPUHP_AP_RCUTREE_ONLINE] = {
677f6646 2195 .name = "RCU/tree:online",
3c1627e9
TG
2196 .startup.single = rcutree_online_cpu,
2197 .teardown.single = rcutree_offline_cpu,
4df83742 2198 },
4baa0afc 2199#endif
d10ef6f9
TG
2200 /*
2201 * The dynamically registered state space is here
2202 */
2203
aaddd7d1
TG
2204#ifdef CONFIG_SMP
2205 /* Last state is scheduler control setting the cpu active */
2206 [CPUHP_AP_ACTIVE] = {
2207 .name = "sched:active",
3c1627e9
TG
2208 .startup.single = sched_cpu_activate,
2209 .teardown.single = sched_cpu_deactivate,
aaddd7d1
TG
2210 },
2211#endif
2212
d10ef6f9 2213 /* CPU is fully up and running. */
4baa0afc
TG
2214 [CPUHP_ONLINE] = {
2215 .name = "online",
3c1627e9
TG
2216 .startup.single = NULL,
2217 .teardown.single = NULL,
4baa0afc
TG
2218 },
2219};
2220
5b7aa87e
TG
2221/* Sanity check for callbacks */
2222static int cpuhp_cb_check(enum cpuhp_state state)
2223{
2224 if (state <= CPUHP_OFFLINE || state >= CPUHP_ONLINE)
2225 return -EINVAL;
2226 return 0;
2227}
2228
dc280d93
TG
2229/*
2230 * Returns a free for dynamic slot assignment of the Online state. The states
2231 * are protected by the cpuhp_slot_states mutex and an empty slot is identified
2232 * by having no name assigned.
2233 */
2234static int cpuhp_reserve_state(enum cpuhp_state state)
2235{
4205e478
TG
2236 enum cpuhp_state i, end;
2237 struct cpuhp_step *step;
dc280d93 2238
4205e478
TG
2239 switch (state) {
2240 case CPUHP_AP_ONLINE_DYN:
17a2f1ce 2241 step = cpuhp_hp_states + CPUHP_AP_ONLINE_DYN;
4205e478
TG
2242 end = CPUHP_AP_ONLINE_DYN_END;
2243 break;
2244 case CPUHP_BP_PREPARE_DYN:
17a2f1ce 2245 step = cpuhp_hp_states + CPUHP_BP_PREPARE_DYN;
4205e478
TG
2246 end = CPUHP_BP_PREPARE_DYN_END;
2247 break;
2248 default:
2249 return -EINVAL;
2250 }
2251
2252 for (i = state; i <= end; i++, step++) {
2253 if (!step->name)
dc280d93
TG
2254 return i;
2255 }
2256 WARN(1, "No more dynamic states available for CPU hotplug\n");
2257 return -ENOSPC;
2258}
2259
2260static int cpuhp_store_callbacks(enum cpuhp_state state, const char *name,
2261 int (*startup)(unsigned int cpu),
2262 int (*teardown)(unsigned int cpu),
2263 bool multi_instance)
5b7aa87e
TG
2264{
2265 /* (Un)Install the callbacks for further cpu hotplug operations */
2266 struct cpuhp_step *sp;
dc280d93 2267 int ret = 0;
5b7aa87e 2268
0c96b273
EB
2269 /*
2270 * If name is NULL, then the state gets removed.
2271 *
2272 * CPUHP_AP_ONLINE_DYN and CPUHP_BP_PREPARE_DYN are handed out on
2273 * the first allocation from these dynamic ranges, so the removal
2274 * would trigger a new allocation and clear the wrong (already
2275 * empty) state, leaving the callbacks of the to be cleared state
2276 * dangling, which causes wreckage on the next hotplug operation.
2277 */
2278 if (name && (state == CPUHP_AP_ONLINE_DYN ||
2279 state == CPUHP_BP_PREPARE_DYN)) {
dc280d93
TG
2280 ret = cpuhp_reserve_state(state);
2281 if (ret < 0)
dc434e05 2282 return ret;
dc280d93
TG
2283 state = ret;
2284 }
5b7aa87e 2285 sp = cpuhp_get_step(state);
dc434e05
SAS
2286 if (name && sp->name)
2287 return -EBUSY;
2288
3c1627e9
TG
2289 sp->startup.single = startup;
2290 sp->teardown.single = teardown;
5b7aa87e 2291 sp->name = name;
cf392d10
TG
2292 sp->multi_instance = multi_instance;
2293 INIT_HLIST_HEAD(&sp->list);
dc280d93 2294 return ret;
5b7aa87e
TG
2295}
2296
2297static void *cpuhp_get_teardown_cb(enum cpuhp_state state)
2298{
3c1627e9 2299 return cpuhp_get_step(state)->teardown.single;
5b7aa87e
TG
2300}
2301
5b7aa87e
TG
2302/*
2303 * Call the startup/teardown function for a step either on the AP or
2304 * on the current CPU.
2305 */
cf392d10
TG
2306static int cpuhp_issue_call(int cpu, enum cpuhp_state state, bool bringup,
2307 struct hlist_node *node)
5b7aa87e 2308{
a724632c 2309 struct cpuhp_step *sp = cpuhp_get_step(state);
5b7aa87e
TG
2310 int ret;
2311
4dddfb5f
PZ
2312 /*
2313 * If there's nothing to do, we done.
2314 * Relies on the union for multi_instance.
2315 */
453e4108 2316 if (cpuhp_step_empty(bringup, sp))
5b7aa87e 2317 return 0;
5b7aa87e
TG
2318 /*
2319 * The non AP bound callbacks can fail on bringup. On teardown
2320 * e.g. module removal we crash for now.
2321 */
1cf4f629
TG
2322#ifdef CONFIG_SMP
2323 if (cpuhp_is_ap_state(state))
cf392d10 2324 ret = cpuhp_invoke_ap_callback(cpu, state, bringup, node);
1cf4f629 2325 else
96abb968 2326 ret = cpuhp_invoke_callback(cpu, state, bringup, node, NULL);
1cf4f629 2327#else
96abb968 2328 ret = cpuhp_invoke_callback(cpu, state, bringup, node, NULL);
1cf4f629 2329#endif
5b7aa87e
TG
2330 BUG_ON(ret && !bringup);
2331 return ret;
2332}
2333
2334/*
2335 * Called from __cpuhp_setup_state on a recoverable failure.
2336 *
2337 * Note: The teardown callbacks for rollback are not allowed to fail!
2338 */
2339static void cpuhp_rollback_install(int failedcpu, enum cpuhp_state state,
cf392d10 2340 struct hlist_node *node)
5b7aa87e
TG
2341{
2342 int cpu;
2343
5b7aa87e
TG
2344 /* Roll back the already executed steps on the other cpus */
2345 for_each_present_cpu(cpu) {
2346 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
2347 int cpustate = st->state;
2348
2349 if (cpu >= failedcpu)
2350 break;
2351
2352 /* Did we invoke the startup call on that cpu ? */
2353 if (cpustate >= state)
cf392d10 2354 cpuhp_issue_call(cpu, state, false, node);
5b7aa87e
TG
2355 }
2356}
2357
9805c673
TG
2358int __cpuhp_state_add_instance_cpuslocked(enum cpuhp_state state,
2359 struct hlist_node *node,
2360 bool invoke)
cf392d10
TG
2361{
2362 struct cpuhp_step *sp;
2363 int cpu;
2364 int ret;
2365
9805c673
TG
2366 lockdep_assert_cpus_held();
2367
cf392d10
TG
2368 sp = cpuhp_get_step(state);
2369 if (sp->multi_instance == false)
2370 return -EINVAL;
2371
dc434e05 2372 mutex_lock(&cpuhp_state_mutex);
cf392d10 2373
3c1627e9 2374 if (!invoke || !sp->startup.multi)
cf392d10
TG
2375 goto add_node;
2376
2377 /*
2378 * Try to call the startup callback for each present cpu
2379 * depending on the hotplug state of the cpu.
2380 */
2381 for_each_present_cpu(cpu) {
2382 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
2383 int cpustate = st->state;
2384
2385 if (cpustate < state)
2386 continue;
2387
2388 ret = cpuhp_issue_call(cpu, state, true, node);
2389 if (ret) {
3c1627e9 2390 if (sp->teardown.multi)
cf392d10 2391 cpuhp_rollback_install(cpu, state, node);
dc434e05 2392 goto unlock;
cf392d10
TG
2393 }
2394 }
2395add_node:
2396 ret = 0;
cf392d10 2397 hlist_add_head(node, &sp->list);
dc434e05 2398unlock:
cf392d10 2399 mutex_unlock(&cpuhp_state_mutex);
9805c673
TG
2400 return ret;
2401}
2402
2403int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node,
2404 bool invoke)
2405{
2406 int ret;
2407
2408 cpus_read_lock();
2409 ret = __cpuhp_state_add_instance_cpuslocked(state, node, invoke);
8f553c49 2410 cpus_read_unlock();
cf392d10
TG
2411 return ret;
2412}
2413EXPORT_SYMBOL_GPL(__cpuhp_state_add_instance);
2414
5b7aa87e 2415/**
71def423 2416 * __cpuhp_setup_state_cpuslocked - Setup the callbacks for an hotplug machine state
dc280d93 2417 * @state: The state to setup
ed3cd1da 2418 * @name: Name of the step
dc280d93
TG
2419 * @invoke: If true, the startup function is invoked for cpus where
2420 * cpu state >= @state
2421 * @startup: startup callback function
2422 * @teardown: teardown callback function
2423 * @multi_instance: State is set up for multiple instances which get
2424 * added afterwards.
5b7aa87e 2425 *
71def423 2426 * The caller needs to hold cpus read locked while calling this function.
11bc021d 2427 * Return:
512f0980 2428 * On success:
11bc021d 2429 * Positive state number if @state is CPUHP_AP_ONLINE_DYN;
512f0980
BO
2430 * 0 for all other states
2431 * On failure: proper (negative) error code
5b7aa87e 2432 */
71def423
SAS
2433int __cpuhp_setup_state_cpuslocked(enum cpuhp_state state,
2434 const char *name, bool invoke,
2435 int (*startup)(unsigned int cpu),
2436 int (*teardown)(unsigned int cpu),
2437 bool multi_instance)
5b7aa87e
TG
2438{
2439 int cpu, ret = 0;
b9d9d691 2440 bool dynstate;
5b7aa87e 2441
71def423
SAS
2442 lockdep_assert_cpus_held();
2443
5b7aa87e
TG
2444 if (cpuhp_cb_check(state) || !name)
2445 return -EINVAL;
2446
dc434e05 2447 mutex_lock(&cpuhp_state_mutex);
5b7aa87e 2448
dc280d93
TG
2449 ret = cpuhp_store_callbacks(state, name, startup, teardown,
2450 multi_instance);
5b7aa87e 2451
b9d9d691
TG
2452 dynstate = state == CPUHP_AP_ONLINE_DYN;
2453 if (ret > 0 && dynstate) {
2454 state = ret;
2455 ret = 0;
2456 }
2457
dc280d93 2458 if (ret || !invoke || !startup)
5b7aa87e
TG
2459 goto out;
2460
2461 /*
2462 * Try to call the startup callback for each present cpu
2463 * depending on the hotplug state of the cpu.
2464 */
2465 for_each_present_cpu(cpu) {
2466 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
2467 int cpustate = st->state;
2468
2469 if (cpustate < state)
2470 continue;
2471
cf392d10 2472 ret = cpuhp_issue_call(cpu, state, true, NULL);
5b7aa87e 2473 if (ret) {
a724632c 2474 if (teardown)
cf392d10
TG
2475 cpuhp_rollback_install(cpu, state, NULL);
2476 cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
5b7aa87e
TG
2477 goto out;
2478 }
2479 }
2480out:
dc434e05 2481 mutex_unlock(&cpuhp_state_mutex);
dc280d93
TG
2482 /*
2483 * If the requested state is CPUHP_AP_ONLINE_DYN, return the
2484 * dynamically allocated state in case of success.
2485 */
b9d9d691 2486 if (!ret && dynstate)
5b7aa87e
TG
2487 return state;
2488 return ret;
2489}
71def423
SAS
2490EXPORT_SYMBOL(__cpuhp_setup_state_cpuslocked);
2491
2492int __cpuhp_setup_state(enum cpuhp_state state,
2493 const char *name, bool invoke,
2494 int (*startup)(unsigned int cpu),
2495 int (*teardown)(unsigned int cpu),
2496 bool multi_instance)
2497{
2498 int ret;
2499
2500 cpus_read_lock();
2501 ret = __cpuhp_setup_state_cpuslocked(state, name, invoke, startup,
2502 teardown, multi_instance);
2503 cpus_read_unlock();
2504 return ret;
2505}
5b7aa87e
TG
2506EXPORT_SYMBOL(__cpuhp_setup_state);
2507
cf392d10
TG
2508int __cpuhp_state_remove_instance(enum cpuhp_state state,
2509 struct hlist_node *node, bool invoke)
2510{
2511 struct cpuhp_step *sp = cpuhp_get_step(state);
2512 int cpu;
2513
2514 BUG_ON(cpuhp_cb_check(state));
2515
2516 if (!sp->multi_instance)
2517 return -EINVAL;
2518
8f553c49 2519 cpus_read_lock();
dc434e05
SAS
2520 mutex_lock(&cpuhp_state_mutex);
2521
cf392d10
TG
2522 if (!invoke || !cpuhp_get_teardown_cb(state))
2523 goto remove;
2524 /*
2525 * Call the teardown callback for each present cpu depending
2526 * on the hotplug state of the cpu. This function is not
2527 * allowed to fail currently!
2528 */
2529 for_each_present_cpu(cpu) {
2530 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
2531 int cpustate = st->state;
2532
2533 if (cpustate >= state)
2534 cpuhp_issue_call(cpu, state, false, node);
2535 }
2536
2537remove:
cf392d10
TG
2538 hlist_del(node);
2539 mutex_unlock(&cpuhp_state_mutex);
8f553c49 2540 cpus_read_unlock();
cf392d10
TG
2541
2542 return 0;
2543}
2544EXPORT_SYMBOL_GPL(__cpuhp_state_remove_instance);
dc434e05 2545
5b7aa87e 2546/**
71def423 2547 * __cpuhp_remove_state_cpuslocked - Remove the callbacks for an hotplug machine state
5b7aa87e
TG
2548 * @state: The state to remove
2549 * @invoke: If true, the teardown function is invoked for cpus where
2550 * cpu state >= @state
2551 *
71def423 2552 * The caller needs to hold cpus read locked while calling this function.
5b7aa87e
TG
2553 * The teardown callback is currently not allowed to fail. Think
2554 * about module removal!
2555 */
71def423 2556void __cpuhp_remove_state_cpuslocked(enum cpuhp_state state, bool invoke)
5b7aa87e 2557{
cf392d10 2558 struct cpuhp_step *sp = cpuhp_get_step(state);
5b7aa87e
TG
2559 int cpu;
2560
2561 BUG_ON(cpuhp_cb_check(state));
2562
71def423 2563 lockdep_assert_cpus_held();
5b7aa87e 2564
dc434e05 2565 mutex_lock(&cpuhp_state_mutex);
cf392d10
TG
2566 if (sp->multi_instance) {
2567 WARN(!hlist_empty(&sp->list),
2568 "Error: Removing state %d which has instances left.\n",
2569 state);
2570 goto remove;
2571 }
2572
a724632c 2573 if (!invoke || !cpuhp_get_teardown_cb(state))
5b7aa87e
TG
2574 goto remove;
2575
2576 /*
2577 * Call the teardown callback for each present cpu depending
2578 * on the hotplug state of the cpu. This function is not
2579 * allowed to fail currently!
2580 */
2581 for_each_present_cpu(cpu) {
2582 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
2583 int cpustate = st->state;
2584
2585 if (cpustate >= state)
cf392d10 2586 cpuhp_issue_call(cpu, state, false, NULL);
5b7aa87e
TG
2587 }
2588remove:
cf392d10 2589 cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
dc434e05 2590 mutex_unlock(&cpuhp_state_mutex);
71def423
SAS
2591}
2592EXPORT_SYMBOL(__cpuhp_remove_state_cpuslocked);
2593
2594void __cpuhp_remove_state(enum cpuhp_state state, bool invoke)
2595{
2596 cpus_read_lock();
2597 __cpuhp_remove_state_cpuslocked(state, invoke);
8f553c49 2598 cpus_read_unlock();
5b7aa87e
TG
2599}
2600EXPORT_SYMBOL(__cpuhp_remove_state);
2601
dc8d37ed
AB
2602#ifdef CONFIG_HOTPLUG_SMT
2603static void cpuhp_offline_cpu_device(unsigned int cpu)
2604{
2605 struct device *dev = get_cpu_device(cpu);
2606
2607 dev->offline = true;
2608 /* Tell user space about the state change */
2609 kobject_uevent(&dev->kobj, KOBJ_OFFLINE);
2610}
2611
2612static void cpuhp_online_cpu_device(unsigned int cpu)
2613{
2614 struct device *dev = get_cpu_device(cpu);
2615
2616 dev->offline = false;
2617 /* Tell user space about the state change */
2618 kobject_uevent(&dev->kobj, KOBJ_ONLINE);
2619}
2620
2621int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval)
2622{
2623 int cpu, ret = 0;
2624
2625 cpu_maps_update_begin();
2626 for_each_online_cpu(cpu) {
2627 if (topology_is_primary_thread(cpu))
2628 continue;
2629 ret = cpu_down_maps_locked(cpu, CPUHP_OFFLINE);
2630 if (ret)
2631 break;
2632 /*
2633 * As this needs to hold the cpu maps lock it's impossible
2634 * to call device_offline() because that ends up calling
2635 * cpu_down() which takes cpu maps lock. cpu maps lock
2636 * needs to be held as this might race against in kernel
2637 * abusers of the hotplug machinery (thermal management).
2638 *
2639 * So nothing would update device:offline state. That would
2640 * leave the sysfs entry stale and prevent onlining after
2641 * smt control has been changed to 'off' again. This is
2642 * called under the sysfs hotplug lock, so it is properly
2643 * serialized against the regular offline usage.
2644 */
2645 cpuhp_offline_cpu_device(cpu);
2646 }
2647 if (!ret)
2648 cpu_smt_control = ctrlval;
2649 cpu_maps_update_done();
2650 return ret;
2651}
2652
2653int cpuhp_smt_enable(void)
2654{
2655 int cpu, ret = 0;
2656
2657 cpu_maps_update_begin();
2658 cpu_smt_control = CPU_SMT_ENABLED;
2659 for_each_present_cpu(cpu) {
2660 /* Skip online CPUs and CPUs on offline nodes */
2661 if (cpu_online(cpu) || !node_online(cpu_to_node(cpu)))
2662 continue;
2663 ret = _cpu_up(cpu, 0, CPUHP_ONLINE);
2664 if (ret)
2665 break;
2666 /* See comment in cpuhp_smt_disable() */
2667 cpuhp_online_cpu_device(cpu);
2668 }
2669 cpu_maps_update_done();
2670 return ret;
2671}
2672#endif
2673
98f8cdce 2674#if defined(CONFIG_SYSFS) && defined(CONFIG_HOTPLUG_CPU)
1782dc87
Y
2675static ssize_t state_show(struct device *dev,
2676 struct device_attribute *attr, char *buf)
98f8cdce
TG
2677{
2678 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
2679
2680 return sprintf(buf, "%d\n", st->state);
2681}
1782dc87 2682static DEVICE_ATTR_RO(state);
98f8cdce 2683
1782dc87
Y
2684static ssize_t target_store(struct device *dev, struct device_attribute *attr,
2685 const char *buf, size_t count)
757c989b
TG
2686{
2687 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
2688 struct cpuhp_step *sp;
2689 int target, ret;
2690
2691 ret = kstrtoint(buf, 10, &target);
2692 if (ret)
2693 return ret;
2694
2695#ifdef CONFIG_CPU_HOTPLUG_STATE_CONTROL
2696 if (target < CPUHP_OFFLINE || target > CPUHP_ONLINE)
2697 return -EINVAL;
2698#else
2699 if (target != CPUHP_OFFLINE && target != CPUHP_ONLINE)
2700 return -EINVAL;
2701#endif
2702
2703 ret = lock_device_hotplug_sysfs();
2704 if (ret)
2705 return ret;
2706
2707 mutex_lock(&cpuhp_state_mutex);
2708 sp = cpuhp_get_step(target);
2709 ret = !sp->name || sp->cant_stop ? -EINVAL : 0;
2710 mutex_unlock(&cpuhp_state_mutex);
2711 if (ret)
40da1b11 2712 goto out;
757c989b
TG
2713
2714 if (st->state < target)
33c3736e 2715 ret = cpu_up(dev->id, target);
64ea6e44 2716 else if (st->state > target)
33c3736e 2717 ret = cpu_down(dev->id, target);
64ea6e44
PA
2718 else if (WARN_ON(st->target != target))
2719 st->target = target;
40da1b11 2720out:
757c989b
TG
2721 unlock_device_hotplug();
2722 return ret ? ret : count;
2723}
2724
1782dc87
Y
2725static ssize_t target_show(struct device *dev,
2726 struct device_attribute *attr, char *buf)
98f8cdce
TG
2727{
2728 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
2729
2730 return sprintf(buf, "%d\n", st->target);
2731}
1782dc87 2732static DEVICE_ATTR_RW(target);
1db49484 2733
1782dc87
Y
2734static ssize_t fail_store(struct device *dev, struct device_attribute *attr,
2735 const char *buf, size_t count)
1db49484
PZ
2736{
2737 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
2738 struct cpuhp_step *sp;
2739 int fail, ret;
2740
2741 ret = kstrtoint(buf, 10, &fail);
2742 if (ret)
2743 return ret;
2744
3ae70c25
VD
2745 if (fail == CPUHP_INVALID) {
2746 st->fail = fail;
2747 return count;
2748 }
2749
33d4a5a7
ET
2750 if (fail < CPUHP_OFFLINE || fail > CPUHP_ONLINE)
2751 return -EINVAL;
2752
1db49484
PZ
2753 /*
2754 * Cannot fail STARTING/DYING callbacks.
2755 */
2756 if (cpuhp_is_atomic_state(fail))
2757 return -EINVAL;
2758
62f25069
VD
2759 /*
2760 * DEAD callbacks cannot fail...
2761 * ... neither can CPUHP_BRINGUP_CPU during hotunplug. The latter
2762 * triggering STARTING callbacks, a failure in this state would
2763 * hinder rollback.
2764 */
2765 if (fail <= CPUHP_BRINGUP_CPU && st->state > CPUHP_BRINGUP_CPU)
2766 return -EINVAL;
2767
1db49484
PZ
2768 /*
2769 * Cannot fail anything that doesn't have callbacks.
2770 */
2771 mutex_lock(&cpuhp_state_mutex);
2772 sp = cpuhp_get_step(fail);
2773 if (!sp->startup.single && !sp->teardown.single)
2774 ret = -EINVAL;
2775 mutex_unlock(&cpuhp_state_mutex);
2776 if (ret)
2777 return ret;
2778
2779 st->fail = fail;
2780
2781 return count;
2782}
2783
1782dc87
Y
2784static ssize_t fail_show(struct device *dev,
2785 struct device_attribute *attr, char *buf)
1db49484
PZ
2786{
2787 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
2788
2789 return sprintf(buf, "%d\n", st->fail);
2790}
2791
1782dc87 2792static DEVICE_ATTR_RW(fail);
1db49484 2793
98f8cdce
TG
2794static struct attribute *cpuhp_cpu_attrs[] = {
2795 &dev_attr_state.attr,
2796 &dev_attr_target.attr,
1db49484 2797 &dev_attr_fail.attr,
98f8cdce
TG
2798 NULL
2799};
2800
993647a2 2801static const struct attribute_group cpuhp_cpu_attr_group = {
98f8cdce
TG
2802 .attrs = cpuhp_cpu_attrs,
2803 .name = "hotplug",
2804 NULL
2805};
2806
1782dc87 2807static ssize_t states_show(struct device *dev,
98f8cdce
TG
2808 struct device_attribute *attr, char *buf)
2809{
2810 ssize_t cur, res = 0;
2811 int i;
2812
2813 mutex_lock(&cpuhp_state_mutex);
757c989b 2814 for (i = CPUHP_OFFLINE; i <= CPUHP_ONLINE; i++) {
98f8cdce
TG
2815 struct cpuhp_step *sp = cpuhp_get_step(i);
2816
2817 if (sp->name) {
2818 cur = sprintf(buf, "%3d: %s\n", i, sp->name);
2819 buf += cur;
2820 res += cur;
2821 }
2822 }
2823 mutex_unlock(&cpuhp_state_mutex);
2824 return res;
2825}
1782dc87 2826static DEVICE_ATTR_RO(states);
98f8cdce
TG
2827
2828static struct attribute *cpuhp_cpu_root_attrs[] = {
2829 &dev_attr_states.attr,
2830 NULL
2831};
2832
993647a2 2833static const struct attribute_group cpuhp_cpu_root_attr_group = {
98f8cdce
TG
2834 .attrs = cpuhp_cpu_root_attrs,
2835 .name = "hotplug",
2836 NULL
2837};
2838
05736e4a
TG
2839#ifdef CONFIG_HOTPLUG_SMT
2840
05736e4a 2841static ssize_t
de7b77e5
JP
2842__store_smt_control(struct device *dev, struct device_attribute *attr,
2843 const char *buf, size_t count)
05736e4a
TG
2844{
2845 int ctrlval, ret;
2846
2847 if (sysfs_streq(buf, "on"))
2848 ctrlval = CPU_SMT_ENABLED;
2849 else if (sysfs_streq(buf, "off"))
2850 ctrlval = CPU_SMT_DISABLED;
2851 else if (sysfs_streq(buf, "forceoff"))
2852 ctrlval = CPU_SMT_FORCE_DISABLED;
2853 else
2854 return -EINVAL;
2855
2856 if (cpu_smt_control == CPU_SMT_FORCE_DISABLED)
2857 return -EPERM;
2858
2859 if (cpu_smt_control == CPU_SMT_NOT_SUPPORTED)
2860 return -ENODEV;
2861
2862 ret = lock_device_hotplug_sysfs();
2863 if (ret)
2864 return ret;
2865
2866 if (ctrlval != cpu_smt_control) {
2867 switch (ctrlval) {
2868 case CPU_SMT_ENABLED:
215af549 2869 ret = cpuhp_smt_enable();
05736e4a
TG
2870 break;
2871 case CPU_SMT_DISABLED:
2872 case CPU_SMT_FORCE_DISABLED:
2873 ret = cpuhp_smt_disable(ctrlval);
2874 break;
2875 }
2876 }
2877
2878 unlock_device_hotplug();
2879 return ret ? ret : count;
2880}
de7b77e5
JP
2881
2882#else /* !CONFIG_HOTPLUG_SMT */
2883static ssize_t
2884__store_smt_control(struct device *dev, struct device_attribute *attr,
2885 const char *buf, size_t count)
2886{
2887 return -ENODEV;
2888}
2889#endif /* CONFIG_HOTPLUG_SMT */
2890
2891static const char *smt_states[] = {
2892 [CPU_SMT_ENABLED] = "on",
2893 [CPU_SMT_DISABLED] = "off",
2894 [CPU_SMT_FORCE_DISABLED] = "forceoff",
2895 [CPU_SMT_NOT_SUPPORTED] = "notsupported",
2896 [CPU_SMT_NOT_IMPLEMENTED] = "notimplemented",
2897};
2898
1782dc87
Y
2899static ssize_t control_show(struct device *dev,
2900 struct device_attribute *attr, char *buf)
de7b77e5
JP
2901{
2902 const char *state = smt_states[cpu_smt_control];
2903
2904 return snprintf(buf, PAGE_SIZE - 2, "%s\n", state);
2905}
2906
1782dc87
Y
2907static ssize_t control_store(struct device *dev, struct device_attribute *attr,
2908 const char *buf, size_t count)
de7b77e5
JP
2909{
2910 return __store_smt_control(dev, attr, buf, count);
2911}
1782dc87 2912static DEVICE_ATTR_RW(control);
05736e4a 2913
1782dc87
Y
2914static ssize_t active_show(struct device *dev,
2915 struct device_attribute *attr, char *buf)
05736e4a 2916{
de7b77e5 2917 return snprintf(buf, PAGE_SIZE - 2, "%d\n", sched_smt_active());
05736e4a 2918}
1782dc87 2919static DEVICE_ATTR_RO(active);
05736e4a
TG
2920
2921static struct attribute *cpuhp_smt_attrs[] = {
2922 &dev_attr_control.attr,
2923 &dev_attr_active.attr,
2924 NULL
2925};
2926
2927static const struct attribute_group cpuhp_smt_attr_group = {
2928 .attrs = cpuhp_smt_attrs,
2929 .name = "smt",
2930 NULL
2931};
2932
de7b77e5 2933static int __init cpu_smt_sysfs_init(void)
05736e4a 2934{
db281d59
GKH
2935 struct device *dev_root;
2936 int ret = -ENODEV;
2937
2938 dev_root = bus_get_dev_root(&cpu_subsys);
2939 if (dev_root) {
2940 ret = sysfs_create_group(&dev_root->kobj, &cpuhp_smt_attr_group);
2941 put_device(dev_root);
2942 }
2943 return ret;
05736e4a
TG
2944}
2945
98f8cdce
TG
2946static int __init cpuhp_sysfs_init(void)
2947{
db281d59 2948 struct device *dev_root;
98f8cdce
TG
2949 int cpu, ret;
2950
de7b77e5 2951 ret = cpu_smt_sysfs_init();
05736e4a
TG
2952 if (ret)
2953 return ret;
2954
db281d59
GKH
2955 dev_root = bus_get_dev_root(&cpu_subsys);
2956 if (dev_root) {
2957 ret = sysfs_create_group(&dev_root->kobj, &cpuhp_cpu_root_attr_group);
2958 put_device(dev_root);
2959 if (ret)
2960 return ret;
2961 }
98f8cdce
TG
2962
2963 for_each_possible_cpu(cpu) {
2964 struct device *dev = get_cpu_device(cpu);
2965
2966 if (!dev)
2967 continue;
2968 ret = sysfs_create_group(&dev->kobj, &cpuhp_cpu_attr_group);
2969 if (ret)
2970 return ret;
2971 }
2972 return 0;
2973}
2974device_initcall(cpuhp_sysfs_init);
de7b77e5 2975#endif /* CONFIG_SYSFS && CONFIG_HOTPLUG_CPU */
98f8cdce 2976
e56b3bc7
LT
2977/*
2978 * cpu_bit_bitmap[] is a special, "compressed" data structure that
2979 * represents all NR_CPUS bits binary values of 1<<nr.
2980 *
e0b582ec 2981 * It is used by cpumask_of() to get a constant address to a CPU
e56b3bc7
LT
2982 * mask value that has a single bit set only.
2983 */
b8d317d1 2984
e56b3bc7 2985/* cpu_bit_bitmap[0] is empty - so we can back into it */
4d51985e 2986#define MASK_DECLARE_1(x) [x+1][0] = (1UL << (x))
e56b3bc7
LT
2987#define MASK_DECLARE_2(x) MASK_DECLARE_1(x), MASK_DECLARE_1(x+1)
2988#define MASK_DECLARE_4(x) MASK_DECLARE_2(x), MASK_DECLARE_2(x+2)
2989#define MASK_DECLARE_8(x) MASK_DECLARE_4(x), MASK_DECLARE_4(x+4)
b8d317d1 2990
e56b3bc7
LT
2991const unsigned long cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)] = {
2992
2993 MASK_DECLARE_8(0), MASK_DECLARE_8(8),
2994 MASK_DECLARE_8(16), MASK_DECLARE_8(24),
2995#if BITS_PER_LONG > 32
2996 MASK_DECLARE_8(32), MASK_DECLARE_8(40),
2997 MASK_DECLARE_8(48), MASK_DECLARE_8(56),
b8d317d1
MT
2998#endif
2999};
e56b3bc7 3000EXPORT_SYMBOL_GPL(cpu_bit_bitmap);
2d3854a3
RR
3001
3002const DECLARE_BITMAP(cpu_all_bits, NR_CPUS) = CPU_BITS_ALL;
3003EXPORT_SYMBOL(cpu_all_bits);
b3199c02
RR
3004
3005#ifdef CONFIG_INIT_ALL_POSSIBLE
4b804c85 3006struct cpumask __cpu_possible_mask __read_mostly
c4c54dd1 3007 = {CPU_BITS_ALL};
b3199c02 3008#else
4b804c85 3009struct cpumask __cpu_possible_mask __read_mostly;
b3199c02 3010#endif
4b804c85 3011EXPORT_SYMBOL(__cpu_possible_mask);
b3199c02 3012
4b804c85
RV
3013struct cpumask __cpu_online_mask __read_mostly;
3014EXPORT_SYMBOL(__cpu_online_mask);
b3199c02 3015
4b804c85
RV
3016struct cpumask __cpu_present_mask __read_mostly;
3017EXPORT_SYMBOL(__cpu_present_mask);
b3199c02 3018
4b804c85
RV
3019struct cpumask __cpu_active_mask __read_mostly;
3020EXPORT_SYMBOL(__cpu_active_mask);
3fa41520 3021
e40f74c5
PZ
3022struct cpumask __cpu_dying_mask __read_mostly;
3023EXPORT_SYMBOL(__cpu_dying_mask);
3024
0c09ab96
TG
3025atomic_t __num_online_cpus __read_mostly;
3026EXPORT_SYMBOL(__num_online_cpus);
3027
3fa41520
RR
3028void init_cpu_present(const struct cpumask *src)
3029{
c4c54dd1 3030 cpumask_copy(&__cpu_present_mask, src);
3fa41520
RR
3031}
3032
3033void init_cpu_possible(const struct cpumask *src)
3034{
c4c54dd1 3035 cpumask_copy(&__cpu_possible_mask, src);
3fa41520
RR
3036}
3037
3038void init_cpu_online(const struct cpumask *src)
3039{
c4c54dd1 3040 cpumask_copy(&__cpu_online_mask, src);
3fa41520 3041}
cff7d378 3042
0c09ab96
TG
3043void set_cpu_online(unsigned int cpu, bool online)
3044{
3045 /*
3046 * atomic_inc/dec() is required to handle the horrid abuse of this
3047 * function by the reboot and kexec code which invoke it from
3048 * IPI/NMI broadcasts when shutting down CPUs. Invocation from
3049 * regular CPU hotplug is properly serialized.
3050 *
3051 * Note, that the fact that __num_online_cpus is of type atomic_t
3052 * does not protect readers which are not serialized against
3053 * concurrent hotplug operations.
3054 */
3055 if (online) {
3056 if (!cpumask_test_and_set_cpu(cpu, &__cpu_online_mask))
3057 atomic_inc(&__num_online_cpus);
3058 } else {
3059 if (cpumask_test_and_clear_cpu(cpu, &__cpu_online_mask))
3060 atomic_dec(&__num_online_cpus);
3061 }
3062}
3063
cff7d378
TG
3064/*
3065 * Activate the first processor.
3066 */
3067void __init boot_cpu_init(void)
3068{
3069 int cpu = smp_processor_id();
3070
3071 /* Mark the boot cpu "present", "online" etc for SMP and UP case */
3072 set_cpu_online(cpu, true);
3073 set_cpu_active(cpu, true);
3074 set_cpu_present(cpu, true);
3075 set_cpu_possible(cpu, true);
8ce371f9
PZ
3076
3077#ifdef CONFIG_SMP
3078 __boot_cpu_id = cpu;
3079#endif
cff7d378
TG
3080}
3081
3082/*
3083 * Must be called _AFTER_ setting up the per_cpu areas
3084 */
b5b1404d 3085void __init boot_cpu_hotplug_init(void)
cff7d378 3086{
269777aa 3087#ifdef CONFIG_SMP
e797bda3 3088 cpumask_set_cpu(smp_processor_id(), &cpus_booted_once_mask);
6f062123 3089 atomic_set(this_cpu_ptr(&cpuhp_state.ap_sync_state), SYNC_STATE_ONLINE);
269777aa 3090#endif
0cc3cd21 3091 this_cpu_write(cpuhp_state.state, CPUHP_ONLINE);
d385febc 3092 this_cpu_write(cpuhp_state.target, CPUHP_ONLINE);
cff7d378 3093}
98af8452 3094
731dc9df
TH
3095/*
3096 * These are used for a global "mitigations=" cmdline option for toggling
3097 * optional CPU mitigations.
3098 */
3099enum cpu_mitigations {
3100 CPU_MITIGATIONS_OFF,
3101 CPU_MITIGATIONS_AUTO,
3102 CPU_MITIGATIONS_AUTO_NOSMT,
3103};
3104
3105static enum cpu_mitigations cpu_mitigations __ro_after_init =
3106 CPU_MITIGATIONS_AUTO;
98af8452
JP
3107
3108static int __init mitigations_parse_cmdline(char *arg)
3109{
3110 if (!strcmp(arg, "off"))
3111 cpu_mitigations = CPU_MITIGATIONS_OFF;
3112 else if (!strcmp(arg, "auto"))
3113 cpu_mitigations = CPU_MITIGATIONS_AUTO;
3114 else if (!strcmp(arg, "auto,nosmt"))
3115 cpu_mitigations = CPU_MITIGATIONS_AUTO_NOSMT;
1bf72720
GU
3116 else
3117 pr_crit("Unsupported mitigations=%s, system may still be vulnerable\n",
3118 arg);
98af8452
JP
3119
3120 return 0;
3121}
3122early_param("mitigations", mitigations_parse_cmdline);
731dc9df
TH
3123
3124/* mitigations=off */
3125bool cpu_mitigations_off(void)
3126{
3127 return cpu_mitigations == CPU_MITIGATIONS_OFF;
3128}
3129EXPORT_SYMBOL_GPL(cpu_mitigations_off);
3130
3131/* mitigations=auto,nosmt */
3132bool cpu_mitigations_auto_nosmt(void)
3133{
3134 return cpu_mitigations == CPU_MITIGATIONS_AUTO_NOSMT;
3135}
3136EXPORT_SYMBOL_GPL(cpu_mitigations_auto_nosmt);