locking/csd_lock: Remove per-CPU data indirection from CSD lock debugging
[linux-block.git] / kernel / smp.c
CommitLineData
457c8996 1// SPDX-License-Identifier: GPL-2.0-only
3d442233
JA
2/*
3 * Generic helpers for smp ipi calls
4 *
5 * (C) Jens Axboe <jens.axboe@oracle.com> 2008
3d442233 6 */
ca7dfdbb
ME
7
8#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
9
47885016 10#include <linux/irq_work.h>
3d442233 11#include <linux/rcupdate.h>
59190f42 12#include <linux/rculist.h>
641cd4cf 13#include <linux/kernel.h>
9984de1a 14#include <linux/export.h>
0b13fda1
IM
15#include <linux/percpu.h>
16#include <linux/init.h>
f9d34595 17#include <linux/interrupt.h>
5a0e3ad6 18#include <linux/gfp.h>
3d442233 19#include <linux/smp.h>
8969a5ed 20#include <linux/cpu.h>
c6f4459f 21#include <linux/sched.h>
4c822698 22#include <linux/sched/idle.h>
47ae4b05 23#include <linux/hypervisor.h>
35feb604
PM
24#include <linux/sched/clock.h>
25#include <linux/nmi.h>
26#include <linux/sched/debug.h>
8d0968cc 27#include <linux/jump_label.h>
3d442233 28
3bb5d2ee 29#include "smpboot.h"
1f8db415 30#include "sched/smp.h"
3bb5d2ee 31
545b8c8d 32#define CSD_TYPE(_csd) ((_csd)->node.u_flags & CSD_FLAG_TYPE_MASK)
3d442233
JA
33
34struct call_function_data {
6366d062 35 call_single_data_t __percpu *csd;
0b13fda1 36 cpumask_var_t cpumask;
3fc5b3b6 37 cpumask_var_t cpumask_ipi;
3d442233
JA
38};
39
a22793c7 40static DEFINE_PER_CPU_ALIGNED(struct call_function_data, cfd_data);
e03bcb68 41
6897fc22 42static DEFINE_PER_CPU_SHARED_ALIGNED(struct llist_head, call_single_queue);
8969a5ed 43
16bf5a5e 44static void __flush_smp_call_function_queue(bool warn_cpu_offline);
8d056c48 45
31487f83 46int smpcfd_prepare_cpu(unsigned int cpu)
8969a5ed 47{
8969a5ed
PZ
48 struct call_function_data *cfd = &per_cpu(cfd_data, cpu);
49
31487f83
RW
50 if (!zalloc_cpumask_var_node(&cfd->cpumask, GFP_KERNEL,
51 cpu_to_node(cpu)))
52 return -ENOMEM;
3fc5b3b6
AL
53 if (!zalloc_cpumask_var_node(&cfd->cpumask_ipi, GFP_KERNEL,
54 cpu_to_node(cpu))) {
55 free_cpumask_var(cfd->cpumask);
56 return -ENOMEM;
57 }
6366d062
PM
58 cfd->csd = alloc_percpu(call_single_data_t);
59 if (!cfd->csd) {
8969a5ed 60 free_cpumask_var(cfd->cpumask);
3fc5b3b6 61 free_cpumask_var(cfd->cpumask_ipi);
31487f83
RW
62 return -ENOMEM;
63 }
64
65 return 0;
8969a5ed
PZ
66}
67
31487f83
RW
68int smpcfd_dead_cpu(unsigned int cpu)
69{
70 struct call_function_data *cfd = &per_cpu(cfd_data, cpu);
71
72 free_cpumask_var(cfd->cpumask);
3fc5b3b6 73 free_cpumask_var(cfd->cpumask_ipi);
6366d062 74 free_percpu(cfd->csd);
31487f83
RW
75 return 0;
76}
77
78int smpcfd_dying_cpu(unsigned int cpu)
79{
80 /*
81 * The IPIs for the smp-call-function callbacks queued by other
82 * CPUs might arrive late, either due to hardware latencies or
83 * because this CPU disabled interrupts (inside stop-machine)
84 * before the IPIs were sent. So flush out any pending callbacks
85 * explicitly (without waiting for the IPIs to arrive), to
86 * ensure that the outgoing CPU doesn't go offline with work
87 * still pending.
88 */
16bf5a5e 89 __flush_smp_call_function_queue(false);
afaa653c 90 irq_work_run();
31487f83
RW
91 return 0;
92}
8969a5ed 93
d8ad7d11 94void __init call_function_init(void)
3d442233
JA
95{
96 int i;
97
6897fc22
CH
98 for_each_possible_cpu(i)
99 init_llist_head(&per_cpu(call_single_queue, i));
8969a5ed 100
31487f83 101 smpcfd_prepare_cpu(smp_processor_id());
3d442233
JA
102}
103
35feb604
PM
104#ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
105
c5219860 106static DEFINE_STATIC_KEY_MAYBE(CONFIG_CSD_LOCK_WAIT_DEBUG_DEFAULT, csdlock_debug_enabled);
8d0968cc 107
1771257c
PM
108/*
109 * Parse the csdlock_debug= kernel boot parameter.
110 *
111 * If you need to restore the old "ext" value that once provided
112 * additional debugging information, reapply the following commits:
113 *
114 * de7b09ef658d ("locking/csd_lock: Prepare more CSD lock debugging")
115 * a5aabace5fb8 ("locking/csd_lock: Add more data to CSD lock debugging")
116 */
8d0968cc
JG
117static int __init csdlock_debug(char *str)
118{
119 unsigned int val = 0;
120
1771257c 121 get_option(&str, &val);
8d0968cc
JG
122 if (val)
123 static_branch_enable(&csdlock_debug_enabled);
124
9c9b26b0 125 return 1;
8d0968cc 126}
9c9b26b0 127__setup("csdlock_debug=", csdlock_debug);
8d0968cc 128
35feb604
PM
129static DEFINE_PER_CPU(call_single_data_t *, cur_csd);
130static DEFINE_PER_CPU(smp_call_func_t, cur_csd_func);
131static DEFINE_PER_CPU(void *, cur_csd_info);
132
3791a223
PM
133static ulong csd_lock_timeout = 5000; /* CSD lock timeout in milliseconds. */
134module_param(csd_lock_timeout, ulong, 0444);
135
2b722160 136static atomic_t csd_bug_count = ATOMIC_INIT(0);
35feb604
PM
137
138/* Record current CSD work for current CPU, NULL to erase. */
1139aeb1 139static void __csd_lock_record(struct __call_single_data *csd)
35feb604
PM
140{
141 if (!csd) {
142 smp_mb(); /* NULL cur_csd after unlock. */
143 __this_cpu_write(cur_csd, NULL);
144 return;
145 }
146 __this_cpu_write(cur_csd_func, csd->func);
147 __this_cpu_write(cur_csd_info, csd->info);
148 smp_wmb(); /* func and info before csd. */
149 __this_cpu_write(cur_csd, csd);
150 smp_mb(); /* Update cur_csd before function call. */
151 /* Or before unlock, as the case may be. */
152}
153
1139aeb1 154static __always_inline void csd_lock_record(struct __call_single_data *csd)
8d0968cc
JG
155{
156 if (static_branch_unlikely(&csdlock_debug_enabled))
157 __csd_lock_record(csd);
158}
159
1139aeb1 160static int csd_lock_wait_getcpu(struct __call_single_data *csd)
35feb604
PM
161{
162 unsigned int csd_type;
163
164 csd_type = CSD_TYPE(csd);
165 if (csd_type == CSD_TYPE_ASYNC || csd_type == CSD_TYPE_SYNC)
a787bdaf 166 return csd->node.dst; /* Other CSD_TYPE_ values might not have ->dst. */
35feb604
PM
167 return -1;
168}
169
170/*
171 * Complain if too much time spent waiting. Note that only
172 * the CSD_TYPE_SYNC/ASYNC types provide the destination CPU,
173 * so waiting on other types gets much less information.
174 */
1139aeb1 175static bool csd_lock_wait_toolong(struct __call_single_data *csd, u64 ts0, u64 *ts1, int *bug_id)
35feb604
PM
176{
177 int cpu = -1;
178 int cpux;
179 bool firsttime;
180 u64 ts2, ts_delta;
181 call_single_data_t *cpu_cur_csd;
545b8c8d 182 unsigned int flags = READ_ONCE(csd->node.u_flags);
3791a223 183 unsigned long long csd_lock_timeout_ns = csd_lock_timeout * NSEC_PER_MSEC;
35feb604
PM
184
185 if (!(flags & CSD_FLAG_LOCK)) {
186 if (!unlikely(*bug_id))
187 return true;
188 cpu = csd_lock_wait_getcpu(csd);
189 pr_alert("csd: CSD lock (#%d) got unstuck on CPU#%02d, CPU#%02d released the lock.\n",
190 *bug_id, raw_smp_processor_id(), cpu);
191 return true;
192 }
193
194 ts2 = sched_clock();
195 ts_delta = ts2 - *ts1;
3791a223 196 if (likely(ts_delta <= csd_lock_timeout_ns || csd_lock_timeout_ns == 0))
35feb604
PM
197 return false;
198
199 firsttime = !*bug_id;
200 if (firsttime)
201 *bug_id = atomic_inc_return(&csd_bug_count);
202 cpu = csd_lock_wait_getcpu(csd);
203 if (WARN_ONCE(cpu < 0 || cpu >= nr_cpu_ids, "%s: cpu = %d\n", __func__, cpu))
204 cpux = 0;
205 else
206 cpux = cpu;
207 cpu_cur_csd = smp_load_acquire(&per_cpu(cur_csd, cpux)); /* Before func and info. */
208 pr_alert("csd: %s non-responsive CSD lock (#%d) on CPU#%d, waiting %llu ns for CPU#%02d %pS(%ps).\n",
209 firsttime ? "Detected" : "Continued", *bug_id, raw_smp_processor_id(), ts2 - ts0,
210 cpu, csd->func, csd->info);
211 if (cpu_cur_csd && csd != cpu_cur_csd) {
212 pr_alert("\tcsd: CSD lock (#%d) handling prior %pS(%ps) request.\n",
213 *bug_id, READ_ONCE(per_cpu(cur_csd_func, cpux)),
214 READ_ONCE(per_cpu(cur_csd_info, cpux)));
215 } else {
216 pr_alert("\tcsd: CSD lock (#%d) %s.\n",
217 *bug_id, !cpu_cur_csd ? "unresponsive" : "handling this request");
218 }
219 if (cpu >= 0) {
e73dfe30 220 dump_cpu_task(cpu);
35feb604
PM
221 if (!cpu_cur_csd) {
222 pr_alert("csd: Re-sending CSD lock (#%d) IPI from CPU#%02d to CPU#%02d\n", *bug_id, raw_smp_processor_id(), cpu);
223 arch_send_call_function_single_ipi(cpu);
224 }
225 }
226 dump_stack();
227 *ts1 = ts2;
228
229 return false;
230}
231
8969a5ed
PZ
232/*
233 * csd_lock/csd_unlock used to serialize access to per-cpu csd resources
234 *
0b13fda1
IM
235 * For non-synchronous ipi calls the csd can still be in use by the
236 * previous function call. For multi-cpu calls its even more interesting
237 * as we'll have to ensure no other cpu is observing our csd.
8969a5ed 238 */
1139aeb1 239static void __csd_lock_wait(struct __call_single_data *csd)
35feb604
PM
240{
241 int bug_id = 0;
242 u64 ts0, ts1;
243
244 ts1 = ts0 = sched_clock();
245 for (;;) {
246 if (csd_lock_wait_toolong(csd, ts0, &ts1, &bug_id))
247 break;
248 cpu_relax();
249 }
250 smp_acquire__after_ctrl_dep();
251}
252
1139aeb1 253static __always_inline void csd_lock_wait(struct __call_single_data *csd)
8d0968cc
JG
254{
255 if (static_branch_unlikely(&csdlock_debug_enabled)) {
256 __csd_lock_wait(csd);
257 return;
258 }
259
260 smp_cond_load_acquire(&csd->node.u_flags, !(VAL & CSD_FLAG_LOCK));
261}
35feb604 262#else
1139aeb1 263static void csd_lock_record(struct __call_single_data *csd)
35feb604
PM
264{
265}
266
1139aeb1 267static __always_inline void csd_lock_wait(struct __call_single_data *csd)
8969a5ed 268{
545b8c8d 269 smp_cond_load_acquire(&csd->node.u_flags, !(VAL & CSD_FLAG_LOCK));
6e275637 270}
35feb604 271#endif
6e275637 272
1139aeb1 273static __always_inline void csd_lock(struct __call_single_data *csd)
6e275637 274{
e1d12f32 275 csd_lock_wait(csd);
545b8c8d 276 csd->node.u_flags |= CSD_FLAG_LOCK;
8969a5ed
PZ
277
278 /*
0b13fda1
IM
279 * prevent CPU from reordering the above assignment
280 * to ->flags with any subsequent assignments to other
966a9671 281 * fields of the specified call_single_data_t structure:
8969a5ed 282 */
8053871d 283 smp_wmb();
8969a5ed
PZ
284}
285
1139aeb1 286static __always_inline void csd_unlock(struct __call_single_data *csd)
8969a5ed 287{
545b8c8d 288 WARN_ON(!(csd->node.u_flags & CSD_FLAG_LOCK));
0b13fda1 289
8969a5ed 290 /*
0b13fda1 291 * ensure we're all done before releasing data:
8969a5ed 292 */
545b8c8d 293 smp_store_release(&csd->node.u_flags, 0);
3d442233
JA
294}
295
966a9671 296static DEFINE_PER_CPU_SHARED_ALIGNED(call_single_data_t, csd_data);
8b28499a 297
4b44a21d
PZ
298void __smp_call_single_queue(int cpu, struct llist_node *node)
299{
300 /*
301 * The list addition should be visible before sending the IPI
302 * handler locks the list to pull the entry off it because of
303 * normal cache coherency rules implied by spinlocks.
304 *
305 * If IPIs can go out of order to the cache coherency protocol
306 * in an architecture, sufficient synchronisation should be added
307 * to arch code to make it appear to obey cache coherency WRT
308 * locking and barrier primitives. Generic code isn't really
309 * equipped to do the right thing...
310 */
311 if (llist_add(node, &per_cpu(call_single_queue, cpu)))
312 send_call_function_single_ipi(cpu);
313}
314
3d442233 315/*
966a9671 316 * Insert a previously allocated call_single_data_t element
0b13fda1
IM
317 * for execution on the given CPU. data must already have
318 * ->func, ->info, and ->flags set.
3d442233 319 */
1139aeb1 320static int generic_exec_single(int cpu, struct __call_single_data *csd)
3d442233 321{
8b28499a 322 if (cpu == smp_processor_id()) {
4b44a21d
PZ
323 smp_call_func_t func = csd->func;
324 void *info = csd->info;
8053871d
LT
325 unsigned long flags;
326
327 /*
328 * We can unlock early even for the synchronous on-stack case,
329 * since we're doing this from the same CPU..
330 */
35feb604 331 csd_lock_record(csd);
8053871d 332 csd_unlock(csd);
8b28499a
FW
333 local_irq_save(flags);
334 func(info);
35feb604 335 csd_lock_record(NULL);
8b28499a
FW
336 local_irq_restore(flags);
337 return 0;
338 }
339
5224b961
LT
340 if ((unsigned)cpu >= nr_cpu_ids || !cpu_online(cpu)) {
341 csd_unlock(csd);
8b28499a 342 return -ENXIO;
5224b961 343 }
8b28499a 344
545b8c8d 345 __smp_call_single_queue(cpu, &csd->node.llist);
3d442233 346
8b28499a 347 return 0;
3d442233
JA
348}
349
8d056c48
SB
350/**
351 * generic_smp_call_function_single_interrupt - Execute SMP IPI callbacks
352 *
353 * Invoked by arch to handle an IPI for call function single.
354 * Must be called with interrupts disabled.
3d442233
JA
355 */
356void generic_smp_call_function_single_interrupt(void)
357{
16bf5a5e 358 __flush_smp_call_function_queue(true);
8d056c48
SB
359}
360
361/**
16bf5a5e 362 * __flush_smp_call_function_queue - Flush pending smp-call-function callbacks
8d056c48
SB
363 *
364 * @warn_cpu_offline: If set to 'true', warn if callbacks were queued on an
365 * offline CPU. Skip this check if set to 'false'.
366 *
367 * Flush any pending smp-call-function callbacks queued on this CPU. This is
368 * invoked by the generic IPI handler, as well as by a CPU about to go offline,
369 * to ensure that all pending IPI callbacks are run before it goes completely
370 * offline.
371 *
372 * Loop through the call_single_queue and run all the queued callbacks.
373 * Must be called with interrupts disabled.
374 */
16bf5a5e 375static void __flush_smp_call_function_queue(bool warn_cpu_offline)
8d056c48 376{
966a9671 377 call_single_data_t *csd, *csd_next;
52103be0
PZ
378 struct llist_node *entry, *prev;
379 struct llist_head *head;
a219ccf4
SB
380 static bool warned;
381
83efcbd0 382 lockdep_assert_irqs_disabled();
8d056c48 383
bb964a92 384 head = this_cpu_ptr(&call_single_queue);
8d056c48 385 entry = llist_del_all(head);
a219ccf4 386 entry = llist_reverse_order(entry);
3d442233 387
8d056c48
SB
388 /* There shouldn't be any pending callbacks on an offline CPU. */
389 if (unlikely(warn_cpu_offline && !cpu_online(smp_processor_id()) &&
9e949a38 390 !warned && entry != NULL)) {
a219ccf4
SB
391 warned = true;
392 WARN(1, "IPI on offline CPU %d\n", smp_processor_id());
393
394 /*
395 * We don't have to use the _safe() variant here
396 * because we are not invoking the IPI handlers yet.
397 */
545b8c8d 398 llist_for_each_entry(csd, entry, node.llist) {
4b44a21d
PZ
399 switch (CSD_TYPE(csd)) {
400 case CSD_TYPE_ASYNC:
401 case CSD_TYPE_SYNC:
402 case CSD_TYPE_IRQ_WORK:
403 pr_warn("IPI callback %pS sent to offline CPU\n",
404 csd->func);
405 break;
406
a1488664
PZ
407 case CSD_TYPE_TTWU:
408 pr_warn("IPI task-wakeup sent to offline CPU\n");
409 break;
410
4b44a21d
PZ
411 default:
412 pr_warn("IPI callback, unknown type %d, sent to offline CPU\n",
413 CSD_TYPE(csd));
414 break;
415 }
416 }
a219ccf4 417 }
3d442233 418
52103be0
PZ
419 /*
420 * First; run all SYNC callbacks, people are waiting for us.
421 */
422 prev = NULL;
545b8c8d 423 llist_for_each_entry_safe(csd, csd_next, entry, node.llist) {
8053871d 424 /* Do we wait until *after* callback? */
4b44a21d
PZ
425 if (CSD_TYPE(csd) == CSD_TYPE_SYNC) {
426 smp_call_func_t func = csd->func;
427 void *info = csd->info;
428
52103be0 429 if (prev) {
545b8c8d 430 prev->next = &csd_next->node.llist;
52103be0 431 } else {
545b8c8d 432 entry = &csd_next->node.llist;
52103be0 433 }
4b44a21d 434
35feb604 435 csd_lock_record(csd);
8053871d
LT
436 func(info);
437 csd_unlock(csd);
35feb604 438 csd_lock_record(NULL);
8053871d 439 } else {
545b8c8d 440 prev = &csd->node.llist;
8053871d 441 }
3d442233 442 }
47885016 443
1771257c 444 if (!entry)
a1488664
PZ
445 return;
446
47885016 447 /*
52103be0 448 * Second; run all !SYNC callbacks.
47885016 449 */
a1488664 450 prev = NULL;
545b8c8d 451 llist_for_each_entry_safe(csd, csd_next, entry, node.llist) {
4b44a21d 452 int type = CSD_TYPE(csd);
52103be0 453
a1488664
PZ
454 if (type != CSD_TYPE_TTWU) {
455 if (prev) {
545b8c8d 456 prev->next = &csd_next->node.llist;
a1488664 457 } else {
545b8c8d 458 entry = &csd_next->node.llist;
a1488664 459 }
4b44a21d 460
a1488664
PZ
461 if (type == CSD_TYPE_ASYNC) {
462 smp_call_func_t func = csd->func;
463 void *info = csd->info;
464
35feb604 465 csd_lock_record(csd);
a1488664
PZ
466 csd_unlock(csd);
467 func(info);
35feb604 468 csd_lock_record(NULL);
a1488664
PZ
469 } else if (type == CSD_TYPE_IRQ_WORK) {
470 irq_work_single(csd);
471 }
472
473 } else {
545b8c8d 474 prev = &csd->node.llist;
4b44a21d 475 }
52103be0 476 }
a1488664
PZ
477
478 /*
479 * Third; only CSD_TYPE_TTWU is left, issue those.
480 */
481 if (entry)
482 sched_ttwu_pending(entry);
3d442233
JA
483}
484
16bf5a5e
TG
485
486/**
487 * flush_smp_call_function_queue - Flush pending smp-call-function callbacks
488 * from task context (idle, migration thread)
489 *
490 * When TIF_POLLING_NRFLAG is supported and a CPU is in idle and has it
491 * set, then remote CPUs can avoid sending IPIs and wake the idle CPU by
492 * setting TIF_NEED_RESCHED. The idle task on the woken up CPU has to
493 * handle queued SMP function calls before scheduling.
494 *
495 * The migration thread has to ensure that an eventually pending wakeup has
496 * been handled before it migrates a task.
497 */
498void flush_smp_call_function_queue(void)
b2a02fc4 499{
1a90bfd2 500 unsigned int was_pending;
b2a02fc4
PZ
501 unsigned long flags;
502
503 if (llist_empty(this_cpu_ptr(&call_single_queue)))
504 return;
505
506 local_irq_save(flags);
1a90bfd2
SAS
507 /* Get the already pending soft interrupts for RT enabled kernels */
508 was_pending = local_softirq_pending();
16bf5a5e 509 __flush_smp_call_function_queue(true);
f9d34595 510 if (local_softirq_pending())
1a90bfd2 511 do_softirq_post_smp_call_flush(was_pending);
f9d34595 512
b2a02fc4 513 local_irq_restore(flags);
3d442233
JA
514}
515
516/*
517 * smp_call_function_single - Run a function on a specific CPU
518 * @func: The function to run. This must be fast and non-blocking.
519 * @info: An arbitrary pointer to pass to the function.
3d442233
JA
520 * @wait: If true, wait until function has completed on other CPUs.
521 *
72f279b2 522 * Returns 0 on success, else a negative status code.
3d442233 523 */
3a5f65df 524int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
8691e5a8 525 int wait)
3d442233 526{
966a9671
YH
527 call_single_data_t *csd;
528 call_single_data_t csd_stack = {
545b8c8d 529 .node = { .u_flags = CSD_FLAG_LOCK | CSD_TYPE_SYNC, },
966a9671 530 };
0b13fda1 531 int this_cpu;
8b28499a 532 int err;
3d442233 533
0b13fda1
IM
534 /*
535 * prevent preemption and reschedule on another processor,
536 * as well as CPU removal
537 */
538 this_cpu = get_cpu();
539
269c861b
SS
540 /*
541 * Can deadlock when called with interrupts disabled.
542 * We allow cpu's that are not yet online though, as no one else can
543 * send smp call function interrupt to this cpu and as such deadlocks
544 * can't happen.
545 */
546 WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled()
547 && !oops_in_progress);
3d442233 548
19dbdcb8
PZ
549 /*
550 * When @wait we can deadlock when we interrupt between llist_add() and
551 * arch_send_call_function_ipi*(); when !@wait we can deadlock due to
552 * csd_lock() on because the interrupt context uses the same csd
553 * storage.
554 */
555 WARN_ON_ONCE(!in_task());
556
8053871d
LT
557 csd = &csd_stack;
558 if (!wait) {
559 csd = this_cpu_ptr(&csd_data);
560 csd_lock(csd);
561 }
562
4b44a21d
PZ
563 csd->func = func;
564 csd->info = info;
35feb604 565#ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
545b8c8d
PZ
566 csd->node.src = smp_processor_id();
567 csd->node.dst = cpu;
e48c15b7 568#endif
4b44a21d
PZ
569
570 err = generic_exec_single(cpu, csd);
8053871d
LT
571
572 if (wait)
573 csd_lock_wait(csd);
3d442233
JA
574
575 put_cpu();
0b13fda1 576
f73be6de 577 return err;
3d442233
JA
578}
579EXPORT_SYMBOL(smp_call_function_single);
580
d7877c03 581/**
49b3bd21 582 * smp_call_function_single_async() - Run an asynchronous function on a
c46fff2a 583 * specific CPU.
d7877c03
FW
584 * @cpu: The CPU to run on.
585 * @csd: Pre-allocated and setup data structure
d7877c03 586 *
c46fff2a
FW
587 * Like smp_call_function_single(), but the call is asynchonous and
588 * can thus be done from contexts with disabled interrupts.
589 *
590 * The caller passes his own pre-allocated data structure
591 * (ie: embedded in an object) and is responsible for synchronizing it
592 * such that the IPIs performed on the @csd are strictly serialized.
593 *
5a18ceca
PX
594 * If the function is called with one csd which has not yet been
595 * processed by previous call to smp_call_function_single_async(), the
596 * function will return immediately with -EBUSY showing that the csd
597 * object is still in progress.
598 *
c46fff2a
FW
599 * NOTE: Be careful, there is unfortunately no current debugging facility to
600 * validate the correctness of this serialization.
49b3bd21
RD
601 *
602 * Return: %0 on success or negative errno value on error
d7877c03 603 */
1139aeb1 604int smp_call_function_single_async(int cpu, struct __call_single_data *csd)
d7877c03
FW
605{
606 int err = 0;
d7877c03 607
fce8ad15 608 preempt_disable();
8053871d 609
545b8c8d 610 if (csd->node.u_flags & CSD_FLAG_LOCK) {
5a18ceca
PX
611 err = -EBUSY;
612 goto out;
613 }
8053871d 614
545b8c8d 615 csd->node.u_flags = CSD_FLAG_LOCK;
8053871d
LT
616 smp_wmb();
617
4b44a21d 618 err = generic_exec_single(cpu, csd);
5a18ceca
PX
619
620out:
fce8ad15 621 preempt_enable();
d7877c03
FW
622
623 return err;
624}
c46fff2a 625EXPORT_SYMBOL_GPL(smp_call_function_single_async);
d7877c03 626
2ea6dec4
RR
627/*
628 * smp_call_function_any - Run a function on any of the given cpus
629 * @mask: The mask of cpus it can run on.
630 * @func: The function to run. This must be fast and non-blocking.
631 * @info: An arbitrary pointer to pass to the function.
632 * @wait: If true, wait until function has completed.
633 *
634 * Returns 0 on success, else a negative status code (if no cpus were online).
2ea6dec4
RR
635 *
636 * Selection preference:
637 * 1) current cpu if in @mask
638 * 2) any cpu of current node if in @mask
639 * 3) any other online cpu in @mask
640 */
641int smp_call_function_any(const struct cpumask *mask,
3a5f65df 642 smp_call_func_t func, void *info, int wait)
2ea6dec4
RR
643{
644 unsigned int cpu;
645 const struct cpumask *nodemask;
646 int ret;
647
648 /* Try for same CPU (cheapest) */
649 cpu = get_cpu();
650 if (cpumask_test_cpu(cpu, mask))
651 goto call;
652
653 /* Try for same node. */
af2422c4 654 nodemask = cpumask_of_node(cpu_to_node(cpu));
2ea6dec4
RR
655 for (cpu = cpumask_first_and(nodemask, mask); cpu < nr_cpu_ids;
656 cpu = cpumask_next_and(cpu, nodemask, mask)) {
657 if (cpu_online(cpu))
658 goto call;
659 }
660
661 /* Any online will do: smp_call_function_single handles nr_cpu_ids. */
662 cpu = cpumask_any_and(mask, cpu_online_mask);
663call:
664 ret = smp_call_function_single(cpu, func, info, wait);
665 put_cpu();
666 return ret;
667}
668EXPORT_SYMBOL_GPL(smp_call_function_any);
669
a32a4d8a
NA
670/*
671 * Flags to be used as scf_flags argument of smp_call_function_many_cond().
672 *
673 * %SCF_WAIT: Wait until function execution is completed
674 * %SCF_RUN_LOCAL: Run also locally if local cpu is set in cpumask
675 */
676#define SCF_WAIT (1U << 0)
677#define SCF_RUN_LOCAL (1U << 1)
678
67719ef2
SAS
679static void smp_call_function_many_cond(const struct cpumask *mask,
680 smp_call_func_t func, void *info,
a32a4d8a
NA
681 unsigned int scf_flags,
682 smp_cond_func_t cond_func)
3d442233 683{
a32a4d8a 684 int cpu, last_cpu, this_cpu = smp_processor_id();
e1d12f32 685 struct call_function_data *cfd;
a32a4d8a
NA
686 bool wait = scf_flags & SCF_WAIT;
687 bool run_remote = false;
688 bool run_local = false;
689 int nr_cpus = 0;
690
691 lockdep_assert_preemption_disabled();
3d442233 692
269c861b
SS
693 /*
694 * Can deadlock when called with interrupts disabled.
695 * We allow cpu's that are not yet online though, as no one else can
696 * send smp call function interrupt to this cpu and as such deadlocks
697 * can't happen.
698 */
a32a4d8a
NA
699 if (cpu_online(this_cpu) && !oops_in_progress &&
700 !early_boot_irqs_disabled)
701 lockdep_assert_irqs_enabled();
3d442233 702
19dbdcb8
PZ
703 /*
704 * When @wait we can deadlock when we interrupt between llist_add() and
705 * arch_send_call_function_ipi*(); when !@wait we can deadlock due to
706 * csd_lock() on because the interrupt context uses the same csd
707 * storage.
708 */
709 WARN_ON_ONCE(!in_task());
710
a32a4d8a
NA
711 /* Check if we need local execution. */
712 if ((scf_flags & SCF_RUN_LOCAL) && cpumask_test_cpu(this_cpu, mask))
713 run_local = true;
714
715 /* Check if we need remote execution, i.e., any CPU excluding this one. */
54b11e6d 716 cpu = cpumask_first_and(mask, cpu_online_mask);
0b13fda1 717 if (cpu == this_cpu)
54b11e6d 718 cpu = cpumask_next_and(cpu, mask, cpu_online_mask);
a32a4d8a
NA
719 if (cpu < nr_cpu_ids)
720 run_remote = true;
0b13fda1 721
a32a4d8a
NA
722 if (run_remote) {
723 cfd = this_cpu_ptr(&cfd_data);
724 cpumask_and(cfd->cpumask, mask, cpu_online_mask);
725 __cpumask_clear_cpu(this_cpu, cfd->cpumask);
45a57919 726
a32a4d8a
NA
727 cpumask_clear(cfd->cpumask_ipi);
728 for_each_cpu(cpu, cfd->cpumask) {
6366d062 729 call_single_data_t *csd = per_cpu_ptr(cfd->csd, cpu);
9a46ad6d 730
a32a4d8a
NA
731 if (cond_func && !cond_func(cpu, info))
732 continue;
67719ef2 733
a32a4d8a
NA
734 csd_lock(csd);
735 if (wait)
736 csd->node.u_flags |= CSD_TYPE_SYNC;
737 csd->func = func;
738 csd->info = info;
35feb604 739#ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
a32a4d8a
NA
740 csd->node.src = smp_processor_id();
741 csd->node.dst = cpu;
e48c15b7 742#endif
a32a4d8a
NA
743 if (llist_add(&csd->node.llist, &per_cpu(call_single_queue, cpu))) {
744 __cpumask_set_cpu(cpu, cfd->cpumask_ipi);
745 nr_cpus++;
746 last_cpu = cpu;
747 }
748 }
749
750 /*
751 * Choose the most efficient way to send an IPI. Note that the
752 * number of CPUs might be zero due to concurrent changes to the
753 * provided mask.
754 */
755 if (nr_cpus == 1)
d43f17a1 756 send_call_function_single_ipi(last_cpu);
a32a4d8a
NA
757 else if (likely(nr_cpus > 1))
758 arch_send_call_function_ipi_mask(cfd->cpumask_ipi);
9a46ad6d 759 }
561920a0 760
a32a4d8a
NA
761 if (run_local && (!cond_func || cond_func(this_cpu, info))) {
762 unsigned long flags;
763
764 local_irq_save(flags);
765 func(info);
766 local_irq_restore(flags);
767 }
3d442233 768
a32a4d8a 769 if (run_remote && wait) {
e1d12f32 770 for_each_cpu(cpu, cfd->cpumask) {
966a9671 771 call_single_data_t *csd;
e1d12f32 772
6366d062 773 csd = per_cpu_ptr(cfd->csd, cpu);
9a46ad6d
SL
774 csd_lock_wait(csd);
775 }
776 }
3d442233 777}
67719ef2
SAS
778
779/**
a32a4d8a 780 * smp_call_function_many(): Run a function on a set of CPUs.
67719ef2
SAS
781 * @mask: The set of cpus to run on (only runs on online subset).
782 * @func: The function to run. This must be fast and non-blocking.
783 * @info: An arbitrary pointer to pass to the function.
49b3bd21 784 * @wait: Bitmask that controls the operation. If %SCF_WAIT is set, wait
a32a4d8a
NA
785 * (atomically) until function has completed on other CPUs. If
786 * %SCF_RUN_LOCAL is set, the function will also be run locally
787 * if the local CPU is set in the @cpumask.
67719ef2
SAS
788 *
789 * If @wait is true, then returns once @func has returned.
790 *
791 * You must not call this function with disabled interrupts or from a
792 * hardware interrupt handler or from a bottom half handler. Preemption
793 * must be disabled when calling this function.
794 */
795void smp_call_function_many(const struct cpumask *mask,
796 smp_call_func_t func, void *info, bool wait)
797{
a32a4d8a 798 smp_call_function_many_cond(mask, func, info, wait * SCF_WAIT, NULL);
67719ef2 799}
54b11e6d 800EXPORT_SYMBOL(smp_call_function_many);
3d442233
JA
801
802/**
803 * smp_call_function(): Run a function on all other CPUs.
804 * @func: The function to run. This must be fast and non-blocking.
805 * @info: An arbitrary pointer to pass to the function.
0b13fda1
IM
806 * @wait: If true, wait (atomically) until function has completed
807 * on other CPUs.
3d442233 808 *
54b11e6d 809 * Returns 0.
3d442233
JA
810 *
811 * If @wait is true, then returns once @func has returned; otherwise
72f279b2 812 * it returns just before the target cpu calls @func.
3d442233
JA
813 *
814 * You must not call this function with disabled interrupts or from a
815 * hardware interrupt handler or from a bottom half handler.
816 */
caa75932 817void smp_call_function(smp_call_func_t func, void *info, int wait)
3d442233 818{
3d442233 819 preempt_disable();
54b11e6d 820 smp_call_function_many(cpu_online_mask, func, info, wait);
3d442233 821 preempt_enable();
3d442233
JA
822}
823EXPORT_SYMBOL(smp_call_function);
351f8f8e 824
34db18a0
AW
825/* Setup configured maximum number of CPUs to activate */
826unsigned int setup_max_cpus = NR_CPUS;
827EXPORT_SYMBOL(setup_max_cpus);
828
829
830/*
831 * Setup routine for controlling SMP activation
832 *
833 * Command-line option of "nosmp" or "maxcpus=0" will disable SMP
834 * activation entirely (the MPS table probe still happens, though).
835 *
836 * Command-line option of "maxcpus=<NUM>", where <NUM> is an integer
837 * greater than 0, limits the maximum number of CPUs activated in
838 * SMP mode to <NUM>.
839 */
840
841void __weak arch_disable_smp_support(void) { }
842
843static int __init nosmp(char *str)
844{
845 setup_max_cpus = 0;
846 arch_disable_smp_support();
847
848 return 0;
849}
850
851early_param("nosmp", nosmp);
852
853/* this is hard limit */
854static int __init nrcpus(char *str)
855{
856 int nr_cpus;
857
58934356 858 if (get_option(&str, &nr_cpus) && nr_cpus > 0 && nr_cpus < nr_cpu_ids)
38bef8e5 859 set_nr_cpu_ids(nr_cpus);
34db18a0
AW
860
861 return 0;
862}
863
864early_param("nr_cpus", nrcpus);
865
866static int __init maxcpus(char *str)
867{
868 get_option(&str, &setup_max_cpus);
869 if (setup_max_cpus == 0)
870 arch_disable_smp_support();
871
872 return 0;
873}
874
875early_param("maxcpus", maxcpus);
876
6f9c07be 877#if (NR_CPUS > 1) && !defined(CONFIG_FORCE_NR_CPUS)
34db18a0 878/* Setup number of possible processor ids */
9b130ad5 879unsigned int nr_cpu_ids __read_mostly = NR_CPUS;
34db18a0 880EXPORT_SYMBOL(nr_cpu_ids);
53fc190c 881#endif
34db18a0
AW
882
883/* An arch may set nr_cpu_ids earlier if needed, so this would be redundant */
884void __init setup_nr_cpu_ids(void)
885{
38bef8e5 886 set_nr_cpu_ids(find_last_bit(cpumask_bits(cpu_possible_mask), NR_CPUS) + 1);
34db18a0
AW
887}
888
889/* Called by boot processor to activate the rest. */
890void __init smp_init(void)
891{
92b23278 892 int num_nodes, num_cpus;
34db18a0 893
3bb5d2ee 894 idle_threads_init();
4cb28ced 895 cpuhp_threads_init();
3bb5d2ee 896
51111dce
ME
897 pr_info("Bringing up secondary CPUs ...\n");
898
b99a2659 899 bringup_nonboot_cpus(setup_max_cpus);
34db18a0 900
92b23278
ME
901 num_nodes = num_online_nodes();
902 num_cpus = num_online_cpus();
903 pr_info("Brought up %d node%s, %d CPU%s\n",
904 num_nodes, (num_nodes > 1 ? "s" : ""),
905 num_cpus, (num_cpus > 1 ? "s" : ""));
906
34db18a0 907 /* Any cleanup work */
34db18a0
AW
908 smp_cpus_done(setup_max_cpus);
909}
910
b3a7e98e
GBY
911/*
912 * on_each_cpu_cond(): Call a function on each processor for which
913 * the supplied function cond_func returns true, optionally waiting
914 * for all the required CPUs to finish. This may include the local
915 * processor.
916 * @cond_func: A callback function that is passed a cpu id and
7b7b8a2c 917 * the info parameter. The function is called
b3a7e98e
GBY
918 * with preemption disabled. The function should
919 * return a blooean value indicating whether to IPI
920 * the specified CPU.
921 * @func: The function to run on all applicable CPUs.
922 * This must be fast and non-blocking.
923 * @info: An arbitrary pointer to pass to both functions.
924 * @wait: If true, wait (atomically) until function has
925 * completed on other CPUs.
b3a7e98e
GBY
926 *
927 * Preemption is disabled to protect against CPUs going offline but not online.
928 * CPUs going online during the call will not be seen or sent an IPI.
929 *
930 * You must not call this function with disabled interrupts or
931 * from a hardware interrupt handler or from a bottom half handler.
932 */
5671d814 933void on_each_cpu_cond_mask(smp_cond_func_t cond_func, smp_call_func_t func,
cb923159 934 void *info, bool wait, const struct cpumask *mask)
b3a7e98e 935{
a32a4d8a 936 unsigned int scf_flags = SCF_RUN_LOCAL;
67719ef2 937
a32a4d8a
NA
938 if (wait)
939 scf_flags |= SCF_WAIT;
67719ef2 940
a32a4d8a
NA
941 preempt_disable();
942 smp_call_function_many_cond(mask, func, info, scf_flags, cond_func);
943 preempt_enable();
b3a7e98e 944}
7d49b28a
RR
945EXPORT_SYMBOL(on_each_cpu_cond_mask);
946
f37f435f
TG
947static void do_nothing(void *unused)
948{
949}
950
951/**
952 * kick_all_cpus_sync - Force all cpus out of idle
953 *
954 * Used to synchronize the update of pm_idle function pointer. It's
955 * called after the pointer is updated and returns after the dummy
956 * callback function has been executed on all cpus. The execution of
957 * the function can only happen on the remote cpus after they have
958 * left the idle function which had been called via pm_idle function
959 * pointer. So it's guaranteed that nothing uses the previous pointer
960 * anymore.
961 */
962void kick_all_cpus_sync(void)
963{
964 /* Make sure the change is visible before we kick the cpus */
965 smp_mb();
966 smp_call_function(do_nothing, NULL, 1);
967}
968EXPORT_SYMBOL_GPL(kick_all_cpus_sync);
c6f4459f
CL
969
970/**
971 * wake_up_all_idle_cpus - break all cpus out of idle
972 * wake_up_all_idle_cpus try to break all cpus which is in idle state even
973 * including idle polling cpus, for non-idle cpus, we will do nothing
974 * for them.
975 */
976void wake_up_all_idle_cpus(void)
977{
978 int cpu;
979
96611c26
PZ
980 for_each_possible_cpu(cpu) {
981 preempt_disable();
982 if (cpu != smp_processor_id() && cpu_online(cpu))
983 wake_up_if_idle(cpu);
984 preempt_enable();
c6f4459f 985 }
c6f4459f
CL
986}
987EXPORT_SYMBOL_GPL(wake_up_all_idle_cpus);
df8ce9d7
JG
988
989/**
49b3bd21
RD
990 * struct smp_call_on_cpu_struct - Call a function on a specific CPU
991 * @work: &work_struct
992 * @done: &completion to signal
993 * @func: function to call
994 * @data: function's data argument
995 * @ret: return value from @func
996 * @cpu: target CPU (%-1 for any CPU)
df8ce9d7
JG
997 *
998 * Used to call a function on a specific cpu and wait for it to return.
999 * Optionally make sure the call is done on a specified physical cpu via vcpu
1000 * pinning in order to support virtualized environments.
1001 */
1002struct smp_call_on_cpu_struct {
1003 struct work_struct work;
1004 struct completion done;
1005 int (*func)(void *);
1006 void *data;
1007 int ret;
1008 int cpu;
1009};
1010
1011static void smp_call_on_cpu_callback(struct work_struct *work)
1012{
1013 struct smp_call_on_cpu_struct *sscs;
1014
1015 sscs = container_of(work, struct smp_call_on_cpu_struct, work);
1016 if (sscs->cpu >= 0)
1017 hypervisor_pin_vcpu(sscs->cpu);
1018 sscs->ret = sscs->func(sscs->data);
1019 if (sscs->cpu >= 0)
1020 hypervisor_pin_vcpu(-1);
1021
1022 complete(&sscs->done);
1023}
1024
1025int smp_call_on_cpu(unsigned int cpu, int (*func)(void *), void *par, bool phys)
1026{
1027 struct smp_call_on_cpu_struct sscs = {
df8ce9d7
JG
1028 .done = COMPLETION_INITIALIZER_ONSTACK(sscs.done),
1029 .func = func,
1030 .data = par,
1031 .cpu = phys ? cpu : -1,
1032 };
1033
8db54949
PZ
1034 INIT_WORK_ONSTACK(&sscs.work, smp_call_on_cpu_callback);
1035
df8ce9d7
JG
1036 if (cpu >= nr_cpu_ids || !cpu_online(cpu))
1037 return -ENXIO;
1038
1039 queue_work_on(cpu, system_wq, &sscs.work);
1040 wait_for_completion(&sscs.done);
1041
1042 return sscs.ret;
1043}
1044EXPORT_SYMBOL_GPL(smp_call_on_cpu);