Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * SMP support for ppc. | |
3 | * | |
4 | * Written by Cort Dougan (cort@cs.nmt.edu) borrowing a great | |
5 | * deal of code from the sparc and intel versions. | |
6 | * | |
7 | * Copyright (C) 1999 Cort Dougan <cort@cs.nmt.edu> | |
8 | * | |
9 | * PowerPC-64 Support added by Dave Engebretsen, Peter Bergner, and | |
10 | * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com | |
11 | * | |
12 | * This program is free software; you can redistribute it and/or | |
13 | * modify it under the terms of the GNU General Public License | |
14 | * as published by the Free Software Foundation; either version | |
15 | * 2 of the License, or (at your option) any later version. | |
16 | */ | |
17 | ||
18 | #undef DEBUG | |
19 | ||
1da177e4 | 20 | #include <linux/kernel.h> |
4b16f8e2 | 21 | #include <linux/export.h> |
68e21be2 | 22 | #include <linux/sched/mm.h> |
105ab3d8 | 23 | #include <linux/sched/topology.h> |
1da177e4 LT |
24 | #include <linux/smp.h> |
25 | #include <linux/interrupt.h> | |
26 | #include <linux/delay.h> | |
27 | #include <linux/init.h> | |
28 | #include <linux/spinlock.h> | |
29 | #include <linux/cache.h> | |
30 | #include <linux/err.h> | |
8a25a2fd | 31 | #include <linux/device.h> |
1da177e4 LT |
32 | #include <linux/cpu.h> |
33 | #include <linux/notifier.h> | |
4b703a23 | 34 | #include <linux/topology.h> |
665e87ff | 35 | #include <linux/profile.h> |
4e287e65 | 36 | #include <linux/processor.h> |
7241d26e | 37 | #include <linux/random.h> |
b6aeddea | 38 | #include <linux/stackprotector.h> |
1da177e4 LT |
39 | |
40 | #include <asm/ptrace.h> | |
60063497 | 41 | #include <linux/atomic.h> |
1da177e4 | 42 | #include <asm/irq.h> |
1b67bee1 | 43 | #include <asm/hw_irq.h> |
441c19c8 | 44 | #include <asm/kvm_ppc.h> |
b866cc21 | 45 | #include <asm/dbell.h> |
1da177e4 LT |
46 | #include <asm/page.h> |
47 | #include <asm/pgtable.h> | |
48 | #include <asm/prom.h> | |
49 | #include <asm/smp.h> | |
1da177e4 LT |
50 | #include <asm/time.h> |
51 | #include <asm/machdep.h> | |
e2075f79 | 52 | #include <asm/cputhreads.h> |
1da177e4 | 53 | #include <asm/cputable.h> |
bbeb3f4c | 54 | #include <asm/mpic.h> |
a7f290da | 55 | #include <asm/vdso_datapage.h> |
5ad57078 PM |
56 | #ifdef CONFIG_PPC64 |
57 | #include <asm/paca.h> | |
58 | #endif | |
18ad51dd | 59 | #include <asm/vdso.h> |
ae3a197e | 60 | #include <asm/debug.h> |
1217d34b | 61 | #include <asm/kexec.h> |
42f5b4ca | 62 | #include <asm/asm-prototypes.h> |
b92a226e | 63 | #include <asm/cpu_has_feature.h> |
d1039786 | 64 | #include <asm/ftrace.h> |
5ad57078 | 65 | |
1da177e4 | 66 | #ifdef DEBUG |
f9e4ec57 | 67 | #include <asm/udbg.h> |
1da177e4 LT |
68 | #define DBG(fmt...) udbg_printf(fmt) |
69 | #else | |
70 | #define DBG(fmt...) | |
71 | #endif | |
72 | ||
c56e5853 | 73 | #ifdef CONFIG_HOTPLUG_CPU |
fb82b839 BH |
74 | /* State of each CPU during hotplug phases */ |
75 | static DEFINE_PER_CPU(int, cpu_state) = { 0 }; | |
c56e5853 BH |
76 | #endif |
77 | ||
f9e4ec57 | 78 | struct thread_info *secondary_ti; |
425752c6 | 79 | bool has_big_cores; |
f9e4ec57 | 80 | |
cc1ba8ea | 81 | DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map); |
425752c6 | 82 | DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map); |
2a636a56 | 83 | DEFINE_PER_CPU(cpumask_var_t, cpu_l2_cache_map); |
cc1ba8ea | 84 | DEFINE_PER_CPU(cpumask_var_t, cpu_core_map); |
1da177e4 | 85 | |
d5a7430d | 86 | EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); |
2a636a56 | 87 | EXPORT_PER_CPU_SYMBOL(cpu_l2_cache_map); |
440a0857 | 88 | EXPORT_PER_CPU_SYMBOL(cpu_core_map); |
425752c6 GS |
89 | EXPORT_SYMBOL_GPL(has_big_cores); |
90 | ||
91 | #define MAX_THREAD_LIST_SIZE 8 | |
92 | #define THREAD_GROUP_SHARE_L1 1 | |
93 | struct thread_groups { | |
94 | unsigned int property; | |
95 | unsigned int nr_groups; | |
96 | unsigned int threads_per_group; | |
97 | unsigned int thread_list[MAX_THREAD_LIST_SIZE]; | |
98 | }; | |
99 | ||
100 | /* | |
101 | * On big-cores system, cpu_l1_cache_map for each CPU corresponds to | |
102 | * the set its siblings that share the L1-cache. | |
103 | */ | |
104 | DEFINE_PER_CPU(cpumask_var_t, cpu_l1_cache_map); | |
1da177e4 | 105 | |
5ad57078 | 106 | /* SMP operations for this machine */ |
1da177e4 LT |
107 | struct smp_ops_t *smp_ops; |
108 | ||
7ccbe504 BH |
109 | /* Can't be static due to PowerMac hackery */ |
110 | volatile unsigned int cpu_callin_map[NR_CPUS]; | |
1da177e4 | 111 | |
1da177e4 LT |
112 | int smt_enabled_at_boot = 1; |
113 | ||
3cd85250 AF |
114 | /* |
115 | * Returns 1 if the specified cpu should be brought up during boot. | |
116 | * Used to inhibit booting threads if they've been disabled or | |
117 | * limited on the command line | |
118 | */ | |
119 | int smp_generic_cpu_bootable(unsigned int nr) | |
120 | { | |
121 | /* Special case - we inhibit secondary thread startup | |
122 | * during boot if the user requests it. | |
123 | */ | |
a8fcfc19 | 124 | if (system_state < SYSTEM_RUNNING && cpu_has_feature(CPU_FTR_SMT)) { |
3cd85250 AF |
125 | if (!smt_enabled_at_boot && cpu_thread_in_core(nr) != 0) |
126 | return 0; | |
127 | if (smt_enabled_at_boot | |
128 | && cpu_thread_in_core(nr) >= smt_enabled_at_boot) | |
129 | return 0; | |
130 | } | |
131 | ||
132 | return 1; | |
133 | } | |
134 | ||
135 | ||
5ad57078 | 136 | #ifdef CONFIG_PPC64 |
cad5cef6 | 137 | int smp_generic_kick_cpu(int nr) |
1da177e4 | 138 | { |
c642af9c | 139 | if (nr < 0 || nr >= nr_cpu_ids) |
f8d0d5dc | 140 | return -EINVAL; |
1da177e4 LT |
141 | |
142 | /* | |
143 | * The processor is currently spinning, waiting for the | |
144 | * cpu_start field to become non-zero After we set cpu_start, | |
145 | * the processor will continue on to secondary_start | |
146 | */ | |
d2e60075 NP |
147 | if (!paca_ptrs[nr]->cpu_start) { |
148 | paca_ptrs[nr]->cpu_start = 1; | |
fb82b839 BH |
149 | smp_mb(); |
150 | return 0; | |
151 | } | |
152 | ||
153 | #ifdef CONFIG_HOTPLUG_CPU | |
154 | /* | |
155 | * Ok it's not there, so it might be soft-unplugged, let's | |
156 | * try to bring it back | |
157 | */ | |
ae5cab47 | 158 | generic_set_cpu_up(nr); |
fb82b839 BH |
159 | smp_wmb(); |
160 | smp_send_reschedule(nr); | |
161 | #endif /* CONFIG_HOTPLUG_CPU */ | |
de300974 ME |
162 | |
163 | return 0; | |
1da177e4 | 164 | } |
fb82b839 | 165 | #endif /* CONFIG_PPC64 */ |
1da177e4 | 166 | |
25ddd738 MM |
167 | static irqreturn_t call_function_action(int irq, void *data) |
168 | { | |
169 | generic_smp_call_function_interrupt(); | |
170 | return IRQ_HANDLED; | |
171 | } | |
172 | ||
173 | static irqreturn_t reschedule_action(int irq, void *data) | |
174 | { | |
184748cc | 175 | scheduler_ipi(); |
25ddd738 MM |
176 | return IRQ_HANDLED; |
177 | } | |
178 | ||
bc907113 | 179 | #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST |
1b67bee1 | 180 | static irqreturn_t tick_broadcast_ipi_action(int irq, void *data) |
25ddd738 | 181 | { |
3f984620 | 182 | timer_broadcast_interrupt(); |
25ddd738 MM |
183 | return IRQ_HANDLED; |
184 | } | |
bc907113 | 185 | #endif |
25ddd738 | 186 | |
ddd703ca NP |
187 | #ifdef CONFIG_NMI_IPI |
188 | static irqreturn_t nmi_ipi_action(int irq, void *data) | |
25ddd738 | 189 | { |
ddd703ca | 190 | smp_handle_nmi_ipi(get_irq_regs()); |
25ddd738 MM |
191 | return IRQ_HANDLED; |
192 | } | |
ddd703ca | 193 | #endif |
25ddd738 MM |
194 | |
195 | static irq_handler_t smp_ipi_action[] = { | |
196 | [PPC_MSG_CALL_FUNCTION] = call_function_action, | |
197 | [PPC_MSG_RESCHEDULE] = reschedule_action, | |
bc907113 | 198 | #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST |
1b67bee1 | 199 | [PPC_MSG_TICK_BROADCAST] = tick_broadcast_ipi_action, |
bc907113 | 200 | #endif |
ddd703ca NP |
201 | #ifdef CONFIG_NMI_IPI |
202 | [PPC_MSG_NMI_IPI] = nmi_ipi_action, | |
203 | #endif | |
25ddd738 MM |
204 | }; |
205 | ||
ddd703ca NP |
206 | /* |
207 | * The NMI IPI is a fallback and not truly non-maskable. It is simpler | |
208 | * than going through the call function infrastructure, and strongly | |
209 | * serialized, so it is more appropriate for debugging. | |
210 | */ | |
25ddd738 MM |
211 | const char *smp_ipi_name[] = { |
212 | [PPC_MSG_CALL_FUNCTION] = "ipi call function", | |
213 | [PPC_MSG_RESCHEDULE] = "ipi reschedule", | |
bc907113 | 214 | #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST |
1b67bee1 | 215 | [PPC_MSG_TICK_BROADCAST] = "ipi tick-broadcast", |
bc907113 | 216 | #endif |
21bfd6a8 | 217 | #ifdef CONFIG_NMI_IPI |
ddd703ca | 218 | [PPC_MSG_NMI_IPI] = "nmi ipi", |
21bfd6a8 | 219 | #endif |
25ddd738 MM |
220 | }; |
221 | ||
222 | /* optional function to request ipi, for controllers with >= 4 ipis */ | |
223 | int smp_request_message_ipi(int virq, int msg) | |
224 | { | |
225 | int err; | |
226 | ||
ddd703ca | 227 | if (msg < 0 || msg > PPC_MSG_NMI_IPI) |
25ddd738 | 228 | return -EINVAL; |
ddd703ca NP |
229 | #ifndef CONFIG_NMI_IPI |
230 | if (msg == PPC_MSG_NMI_IPI) | |
25ddd738 | 231 | return 1; |
25ddd738 | 232 | #endif |
ddd703ca | 233 | |
3b5e16d7 | 234 | err = request_irq(virq, smp_ipi_action[msg], |
e6651de9 | 235 | IRQF_PERCPU | IRQF_NO_THREAD | IRQF_NO_SUSPEND, |
b0d436c7 | 236 | smp_ipi_name[msg], NULL); |
25ddd738 MM |
237 | WARN(err < 0, "unable to request_irq %d for %s (rc %d)\n", |
238 | virq, smp_ipi_name[msg], err); | |
239 | ||
240 | return err; | |
241 | } | |
242 | ||
1ece355b | 243 | #ifdef CONFIG_PPC_SMP_MUXED_IPI |
23d72bfd | 244 | struct cpu_messages { |
bd7f561f | 245 | long messages; /* current messages */ |
23d72bfd MM |
246 | }; |
247 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct cpu_messages, ipi_message); | |
248 | ||
31639c77 | 249 | void smp_muxed_ipi_set_message(int cpu, int msg) |
23d72bfd MM |
250 | { |
251 | struct cpu_messages *info = &per_cpu(ipi_message, cpu); | |
71454272 | 252 | char *message = (char *)&info->messages; |
23d72bfd | 253 | |
9fb1b36c PM |
254 | /* |
255 | * Order previous accesses before accesses in the IPI handler. | |
256 | */ | |
257 | smp_mb(); | |
71454272 | 258 | message[msg] = 1; |
31639c77 SW |
259 | } |
260 | ||
261 | void smp_muxed_ipi_message_pass(int cpu, int msg) | |
262 | { | |
31639c77 | 263 | smp_muxed_ipi_set_message(cpu, msg); |
b866cc21 | 264 | |
9fb1b36c PM |
265 | /* |
266 | * cause_ipi functions are required to include a full barrier | |
267 | * before doing whatever causes the IPI. | |
268 | */ | |
b866cc21 | 269 | smp_ops->cause_ipi(cpu); |
23d72bfd MM |
270 | } |
271 | ||
0654de1c | 272 | #ifdef __BIG_ENDIAN__ |
bd7f561f | 273 | #define IPI_MESSAGE(A) (1uL << ((BITS_PER_LONG - 8) - 8 * (A))) |
0654de1c | 274 | #else |
bd7f561f | 275 | #define IPI_MESSAGE(A) (1uL << (8 * (A))) |
0654de1c AB |
276 | #endif |
277 | ||
23d72bfd MM |
278 | irqreturn_t smp_ipi_demux(void) |
279 | { | |
23d72bfd | 280 | mb(); /* order any irq clear */ |
71454272 | 281 | |
b87ac021 NP |
282 | return smp_ipi_demux_relaxed(); |
283 | } | |
284 | ||
285 | /* sync-free variant. Callers should ensure synchronization */ | |
286 | irqreturn_t smp_ipi_demux_relaxed(void) | |
23d72bfd | 287 | { |
b866cc21 | 288 | struct cpu_messages *info; |
bd7f561f | 289 | unsigned long all; |
23d72bfd | 290 | |
b866cc21 | 291 | info = this_cpu_ptr(&ipi_message); |
71454272 | 292 | do { |
9fb1b36c | 293 | all = xchg(&info->messages, 0); |
e17769eb SW |
294 | #if defined(CONFIG_KVM_XICS) && defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE) |
295 | /* | |
296 | * Must check for PPC_MSG_RM_HOST_ACTION messages | |
297 | * before PPC_MSG_CALL_FUNCTION messages because when | |
298 | * a VM is destroyed, we call kick_all_cpus_sync() | |
299 | * to ensure that any pending PPC_MSG_RM_HOST_ACTION | |
300 | * messages have completed before we free any VCPUs. | |
301 | */ | |
302 | if (all & IPI_MESSAGE(PPC_MSG_RM_HOST_ACTION)) | |
303 | kvmppc_xics_ipi_action(); | |
304 | #endif | |
0654de1c | 305 | if (all & IPI_MESSAGE(PPC_MSG_CALL_FUNCTION)) |
23d72bfd | 306 | generic_smp_call_function_interrupt(); |
0654de1c | 307 | if (all & IPI_MESSAGE(PPC_MSG_RESCHEDULE)) |
880102e7 | 308 | scheduler_ipi(); |
bc907113 | 309 | #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST |
1b67bee1 | 310 | if (all & IPI_MESSAGE(PPC_MSG_TICK_BROADCAST)) |
3f984620 | 311 | timer_broadcast_interrupt(); |
bc907113 | 312 | #endif |
ddd703ca NP |
313 | #ifdef CONFIG_NMI_IPI |
314 | if (all & IPI_MESSAGE(PPC_MSG_NMI_IPI)) | |
315 | nmi_ipi_action(0, NULL); | |
316 | #endif | |
71454272 MM |
317 | } while (info->messages); |
318 | ||
23d72bfd MM |
319 | return IRQ_HANDLED; |
320 | } | |
1ece355b | 321 | #endif /* CONFIG_PPC_SMP_MUXED_IPI */ |
23d72bfd | 322 | |
9ca980dc PM |
323 | static inline void do_message_pass(int cpu, int msg) |
324 | { | |
325 | if (smp_ops->message_pass) | |
326 | smp_ops->message_pass(cpu, msg); | |
327 | #ifdef CONFIG_PPC_SMP_MUXED_IPI | |
328 | else | |
329 | smp_muxed_ipi_message_pass(cpu, msg); | |
330 | #endif | |
331 | } | |
332 | ||
1da177e4 LT |
333 | void smp_send_reschedule(int cpu) |
334 | { | |
8cffc6ac | 335 | if (likely(smp_ops)) |
9ca980dc | 336 | do_message_pass(cpu, PPC_MSG_RESCHEDULE); |
1da177e4 | 337 | } |
de56a948 | 338 | EXPORT_SYMBOL_GPL(smp_send_reschedule); |
1da177e4 | 339 | |
b7d7a240 JA |
340 | void arch_send_call_function_single_ipi(int cpu) |
341 | { | |
402d9a1e | 342 | do_message_pass(cpu, PPC_MSG_CALL_FUNCTION); |
b7d7a240 JA |
343 | } |
344 | ||
f063ea02 | 345 | void arch_send_call_function_ipi_mask(const struct cpumask *mask) |
b7d7a240 JA |
346 | { |
347 | unsigned int cpu; | |
348 | ||
f063ea02 | 349 | for_each_cpu(cpu, mask) |
9ca980dc | 350 | do_message_pass(cpu, PPC_MSG_CALL_FUNCTION); |
b7d7a240 JA |
351 | } |
352 | ||
ddd703ca NP |
353 | #ifdef CONFIG_NMI_IPI |
354 | ||
355 | /* | |
356 | * "NMI IPI" system. | |
357 | * | |
358 | * NMI IPIs may not be recoverable, so should not be used as ongoing part of | |
359 | * a running system. They can be used for crash, debug, halt/reboot, etc. | |
360 | * | |
ddd703ca | 361 | * The IPI call waits with interrupts disabled until all targets enter the |
88b9a3d1 NP |
362 | * NMI handler, then returns. Subsequent IPIs can be issued before targets |
363 | * have returned from their handlers, so there is no guarantee about | |
364 | * concurrency or re-entrancy. | |
ddd703ca | 365 | * |
88b9a3d1 | 366 | * A new NMI can be issued before all targets exit the handler. |
ddd703ca NP |
367 | * |
368 | * The IPI call may time out without all targets entering the NMI handler. | |
369 | * In that case, there is some logic to recover (and ignore subsequent | |
370 | * NMI interrupts that may eventually be raised), but the platform interrupt | |
371 | * handler may not be able to distinguish this from other exception causes, | |
372 | * which may cause a crash. | |
373 | */ | |
374 | ||
375 | static atomic_t __nmi_ipi_lock = ATOMIC_INIT(0); | |
376 | static struct cpumask nmi_ipi_pending_mask; | |
88b9a3d1 | 377 | static bool nmi_ipi_busy = false; |
ddd703ca NP |
378 | static void (*nmi_ipi_function)(struct pt_regs *) = NULL; |
379 | ||
380 | static void nmi_ipi_lock_start(unsigned long *flags) | |
381 | { | |
382 | raw_local_irq_save(*flags); | |
383 | hard_irq_disable(); | |
384 | while (atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1) { | |
385 | raw_local_irq_restore(*flags); | |
0459ddfd | 386 | spin_until_cond(atomic_read(&__nmi_ipi_lock) == 0); |
ddd703ca NP |
387 | raw_local_irq_save(*flags); |
388 | hard_irq_disable(); | |
389 | } | |
390 | } | |
391 | ||
392 | static void nmi_ipi_lock(void) | |
393 | { | |
394 | while (atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1) | |
0459ddfd | 395 | spin_until_cond(atomic_read(&__nmi_ipi_lock) == 0); |
ddd703ca NP |
396 | } |
397 | ||
398 | static void nmi_ipi_unlock(void) | |
399 | { | |
400 | smp_mb(); | |
401 | WARN_ON(atomic_read(&__nmi_ipi_lock) != 1); | |
402 | atomic_set(&__nmi_ipi_lock, 0); | |
403 | } | |
404 | ||
405 | static void nmi_ipi_unlock_end(unsigned long *flags) | |
406 | { | |
407 | nmi_ipi_unlock(); | |
408 | raw_local_irq_restore(*flags); | |
409 | } | |
410 | ||
411 | /* | |
412 | * Platform NMI handler calls this to ack | |
413 | */ | |
414 | int smp_handle_nmi_ipi(struct pt_regs *regs) | |
415 | { | |
88b9a3d1 | 416 | void (*fn)(struct pt_regs *) = NULL; |
ddd703ca NP |
417 | unsigned long flags; |
418 | int me = raw_smp_processor_id(); | |
419 | int ret = 0; | |
420 | ||
421 | /* | |
422 | * Unexpected NMIs are possible here because the interrupt may not | |
423 | * be able to distinguish NMI IPIs from other types of NMIs, or | |
424 | * because the caller may have timed out. | |
425 | */ | |
426 | nmi_ipi_lock_start(&flags); | |
88b9a3d1 NP |
427 | if (cpumask_test_cpu(me, &nmi_ipi_pending_mask)) { |
428 | cpumask_clear_cpu(me, &nmi_ipi_pending_mask); | |
429 | fn = READ_ONCE(nmi_ipi_function); | |
430 | WARN_ON_ONCE(!fn); | |
431 | ret = 1; | |
432 | } | |
ddd703ca NP |
433 | nmi_ipi_unlock_end(&flags); |
434 | ||
88b9a3d1 NP |
435 | if (fn) |
436 | fn(regs); | |
437 | ||
ddd703ca NP |
438 | return ret; |
439 | } | |
440 | ||
6ba55716 | 441 | static void do_smp_send_nmi_ipi(int cpu, bool safe) |
ddd703ca | 442 | { |
6ba55716 | 443 | if (!safe && smp_ops->cause_nmi_ipi && smp_ops->cause_nmi_ipi(cpu)) |
c64af645 NP |
444 | return; |
445 | ||
ddd703ca NP |
446 | if (cpu >= 0) { |
447 | do_message_pass(cpu, PPC_MSG_NMI_IPI); | |
448 | } else { | |
449 | int c; | |
450 | ||
451 | for_each_online_cpu(c) { | |
452 | if (c == raw_smp_processor_id()) | |
453 | continue; | |
454 | do_message_pass(c, PPC_MSG_NMI_IPI); | |
455 | } | |
456 | } | |
457 | } | |
458 | ||
459 | /* | |
460 | * - cpu is the target CPU (must not be this CPU), or NMI_IPI_ALL_OTHERS. | |
461 | * - fn is the target callback function. | |
462 | * - delay_us > 0 is the delay before giving up waiting for targets to | |
88b9a3d1 | 463 | * begin executing the handler, == 0 specifies indefinite delay. |
ddd703ca | 464 | */ |
6fe243fe NP |
465 | static int __smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), |
466 | u64 delay_us, bool safe) | |
ddd703ca NP |
467 | { |
468 | unsigned long flags; | |
469 | int me = raw_smp_processor_id(); | |
470 | int ret = 1; | |
471 | ||
472 | BUG_ON(cpu == me); | |
473 | BUG_ON(cpu < 0 && cpu != NMI_IPI_ALL_OTHERS); | |
474 | ||
475 | if (unlikely(!smp_ops)) | |
476 | return 0; | |
477 | ||
ddd703ca | 478 | nmi_ipi_lock_start(&flags); |
88b9a3d1 | 479 | while (nmi_ipi_busy) { |
ddd703ca | 480 | nmi_ipi_unlock_end(&flags); |
88b9a3d1 | 481 | spin_until_cond(!nmi_ipi_busy); |
ddd703ca NP |
482 | nmi_ipi_lock_start(&flags); |
483 | } | |
88b9a3d1 | 484 | nmi_ipi_busy = true; |
ddd703ca NP |
485 | nmi_ipi_function = fn; |
486 | ||
88b9a3d1 NP |
487 | WARN_ON_ONCE(!cpumask_empty(&nmi_ipi_pending_mask)); |
488 | ||
ddd703ca NP |
489 | if (cpu < 0) { |
490 | /* ALL_OTHERS */ | |
491 | cpumask_copy(&nmi_ipi_pending_mask, cpu_online_mask); | |
492 | cpumask_clear_cpu(me, &nmi_ipi_pending_mask); | |
493 | } else { | |
ddd703ca NP |
494 | cpumask_set_cpu(cpu, &nmi_ipi_pending_mask); |
495 | } | |
88b9a3d1 | 496 | |
ddd703ca NP |
497 | nmi_ipi_unlock(); |
498 | ||
88b9a3d1 NP |
499 | /* Interrupts remain hard disabled */ |
500 | ||
6ba55716 | 501 | do_smp_send_nmi_ipi(cpu, safe); |
ddd703ca | 502 | |
5b73151f | 503 | nmi_ipi_lock(); |
88b9a3d1 | 504 | /* nmi_ipi_busy is set here, so unlock/lock is okay */ |
ddd703ca | 505 | while (!cpumask_empty(&nmi_ipi_pending_mask)) { |
5b73151f | 506 | nmi_ipi_unlock(); |
ddd703ca | 507 | udelay(1); |
5b73151f NP |
508 | nmi_ipi_lock(); |
509 | if (delay_us) { | |
510 | delay_us--; | |
511 | if (!delay_us) | |
88b9a3d1 | 512 | break; |
5b73151f NP |
513 | } |
514 | } | |
515 | ||
ddd703ca | 516 | if (!cpumask_empty(&nmi_ipi_pending_mask)) { |
5b73151f | 517 | /* Timeout waiting for CPUs to call smp_handle_nmi_ipi */ |
ddd703ca NP |
518 | ret = 0; |
519 | cpumask_clear(&nmi_ipi_pending_mask); | |
520 | } | |
5b73151f | 521 | |
88b9a3d1 NP |
522 | nmi_ipi_function = NULL; |
523 | nmi_ipi_busy = false; | |
524 | ||
ddd703ca NP |
525 | nmi_ipi_unlock_end(&flags); |
526 | ||
527 | return ret; | |
528 | } | |
6ba55716 ME |
529 | |
530 | int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us) | |
531 | { | |
532 | return __smp_send_nmi_ipi(cpu, fn, delay_us, false); | |
533 | } | |
534 | ||
535 | int smp_send_safe_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us) | |
536 | { | |
537 | return __smp_send_nmi_ipi(cpu, fn, delay_us, true); | |
538 | } | |
ddd703ca NP |
539 | #endif /* CONFIG_NMI_IPI */ |
540 | ||
1b67bee1 SB |
541 | #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST |
542 | void tick_broadcast(const struct cpumask *mask) | |
543 | { | |
544 | unsigned int cpu; | |
545 | ||
546 | for_each_cpu(cpu, mask) | |
547 | do_message_pass(cpu, PPC_MSG_TICK_BROADCAST); | |
548 | } | |
549 | #endif | |
550 | ||
ddd703ca NP |
551 | #ifdef CONFIG_DEBUGGER |
552 | void debugger_ipi_callback(struct pt_regs *regs) | |
1da177e4 | 553 | { |
ddd703ca NP |
554 | debugger_ipi(regs); |
555 | } | |
e0476371 | 556 | |
ddd703ca NP |
557 | void smp_send_debugger_break(void) |
558 | { | |
559 | smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, debugger_ipi_callback, 1000000); | |
1da177e4 LT |
560 | } |
561 | #endif | |
562 | ||
da665885 | 563 | #ifdef CONFIG_KEXEC_CORE |
cc532915 ME |
564 | void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *)) |
565 | { | |
4145f358 BS |
566 | int cpu; |
567 | ||
ddd703ca | 568 | smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, crash_ipi_callback, 1000000); |
4145f358 BS |
569 | if (kdump_in_progress() && crash_wake_offline) { |
570 | for_each_present_cpu(cpu) { | |
571 | if (cpu_online(cpu)) | |
572 | continue; | |
573 | /* | |
574 | * crash_ipi_callback will wait for | |
575 | * all cpus, including offline CPUs. | |
576 | * We don't care about nmi_ipi_function. | |
577 | * Offline cpus will jump straight into | |
578 | * crash_ipi_callback, we can skip the | |
579 | * entire NMI dance and waiting for | |
580 | * cpus to clear pending mask, etc. | |
581 | */ | |
6ba55716 | 582 | do_smp_send_nmi_ipi(cpu, false); |
4145f358 BS |
583 | } |
584 | } | |
cc532915 ME |
585 | } |
586 | #endif | |
587 | ||
ac61c115 NP |
588 | #ifdef CONFIG_NMI_IPI |
589 | static void nmi_stop_this_cpu(struct pt_regs *regs) | |
590 | { | |
591 | /* | |
6029755e | 592 | * IRQs are already hard disabled by the smp_handle_nmi_ipi. |
ac61c115 | 593 | */ |
6029755e NP |
594 | spin_begin(); |
595 | while (1) | |
596 | spin_cpu_relax(); | |
ac61c115 | 597 | } |
ac61c115 | 598 | |
8fd7675c SS |
599 | void smp_send_stop(void) |
600 | { | |
ac61c115 | 601 | smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, nmi_stop_this_cpu, 1000000); |
6029755e NP |
602 | } |
603 | ||
604 | #else /* CONFIG_NMI_IPI */ | |
605 | ||
606 | static void stop_this_cpu(void *dummy) | |
607 | { | |
6029755e NP |
608 | hard_irq_disable(); |
609 | spin_begin(); | |
610 | while (1) | |
611 | spin_cpu_relax(); | |
612 | } | |
613 | ||
614 | void smp_send_stop(void) | |
615 | { | |
616 | static bool stopped = false; | |
617 | ||
618 | /* | |
619 | * Prevent waiting on csd lock from a previous smp_send_stop. | |
620 | * This is racy, but in general callers try to do the right | |
621 | * thing and only fire off one smp_send_stop (e.g., see | |
622 | * kernel/panic.c) | |
623 | */ | |
624 | if (stopped) | |
625 | return; | |
626 | ||
627 | stopped = true; | |
628 | ||
8691e5a8 | 629 | smp_call_function(stop_this_cpu, NULL, 0); |
1da177e4 | 630 | } |
6029755e | 631 | #endif /* CONFIG_NMI_IPI */ |
1da177e4 | 632 | |
1da177e4 LT |
633 | struct thread_info *current_set[NR_CPUS]; |
634 | ||
cad5cef6 | 635 | static void smp_store_cpu_info(int id) |
1da177e4 | 636 | { |
6b7487fc | 637 | per_cpu(cpu_pvr, id) = mfspr(SPRN_PVR); |
3160b097 BB |
638 | #ifdef CONFIG_PPC_FSL_BOOK3E |
639 | per_cpu(next_tlbcam_idx, id) | |
640 | = (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) - 1; | |
641 | #endif | |
1da177e4 LT |
642 | } |
643 | ||
df52f671 OH |
644 | /* |
645 | * Relationships between CPUs are maintained in a set of per-cpu cpumasks so | |
646 | * rather than just passing around the cpumask we pass around a function that | |
647 | * returns the that cpumask for the given CPU. | |
648 | */ | |
649 | static void set_cpus_related(int i, int j, struct cpumask *(*get_cpumask)(int)) | |
650 | { | |
651 | cpumask_set_cpu(i, get_cpumask(j)); | |
652 | cpumask_set_cpu(j, get_cpumask(i)); | |
653 | } | |
654 | ||
655 | #ifdef CONFIG_HOTPLUG_CPU | |
656 | static void set_cpus_unrelated(int i, int j, | |
657 | struct cpumask *(*get_cpumask)(int)) | |
658 | { | |
659 | cpumask_clear_cpu(i, get_cpumask(j)); | |
660 | cpumask_clear_cpu(j, get_cpumask(i)); | |
661 | } | |
662 | #endif | |
663 | ||
425752c6 GS |
664 | /* |
665 | * parse_thread_groups: Parses the "ibm,thread-groups" device tree | |
666 | * property for the CPU device node @dn and stores | |
667 | * the parsed output in the thread_groups | |
668 | * structure @tg if the ibm,thread-groups[0] | |
669 | * matches @property. | |
670 | * | |
671 | * @dn: The device node of the CPU device. | |
672 | * @tg: Pointer to a thread group structure into which the parsed | |
673 | * output of "ibm,thread-groups" is stored. | |
674 | * @property: The property of the thread-group that the caller is | |
675 | * interested in. | |
676 | * | |
677 | * ibm,thread-groups[0..N-1] array defines which group of threads in | |
678 | * the CPU-device node can be grouped together based on the property. | |
679 | * | |
680 | * ibm,thread-groups[0] tells us the property based on which the | |
681 | * threads are being grouped together. If this value is 1, it implies | |
682 | * that the threads in the same group share L1, translation cache. | |
683 | * | |
684 | * ibm,thread-groups[1] tells us how many such thread groups exist. | |
685 | * | |
686 | * ibm,thread-groups[2] tells us the number of threads in each such | |
687 | * group. | |
688 | * | |
689 | * ibm,thread-groups[3..N-1] is the list of threads identified by | |
690 | * "ibm,ppc-interrupt-server#s" arranged as per their membership in | |
691 | * the grouping. | |
692 | * | |
693 | * Example: If ibm,thread-groups = [1,2,4,5,6,7,8,9,10,11,12] it | |
694 | * implies that there are 2 groups of 4 threads each, where each group | |
695 | * of threads share L1, translation cache. | |
696 | * | |
697 | * The "ibm,ppc-interrupt-server#s" of the first group is {5,6,7,8} | |
698 | * and the "ibm,ppc-interrupt-server#s" of the second group is {9, 10, | |
699 | * 11, 12} structure | |
700 | * | |
701 | * Returns 0 on success, -EINVAL if the property does not exist, | |
702 | * -ENODATA if property does not have a value, and -EOVERFLOW if the | |
703 | * property data isn't large enough. | |
704 | */ | |
705 | static int parse_thread_groups(struct device_node *dn, | |
706 | struct thread_groups *tg, | |
707 | unsigned int property) | |
708 | { | |
709 | int i; | |
710 | u32 thread_group_array[3 + MAX_THREAD_LIST_SIZE]; | |
711 | u32 *thread_list; | |
712 | size_t total_threads; | |
713 | int ret; | |
714 | ||
715 | ret = of_property_read_u32_array(dn, "ibm,thread-groups", | |
716 | thread_group_array, 3); | |
717 | if (ret) | |
718 | return ret; | |
719 | ||
720 | tg->property = thread_group_array[0]; | |
721 | tg->nr_groups = thread_group_array[1]; | |
722 | tg->threads_per_group = thread_group_array[2]; | |
723 | if (tg->property != property || | |
724 | tg->nr_groups < 1 || | |
725 | tg->threads_per_group < 1) | |
726 | return -ENODATA; | |
727 | ||
728 | total_threads = tg->nr_groups * tg->threads_per_group; | |
729 | ||
730 | ret = of_property_read_u32_array(dn, "ibm,thread-groups", | |
731 | thread_group_array, | |
732 | 3 + total_threads); | |
733 | if (ret) | |
734 | return ret; | |
735 | ||
736 | thread_list = &thread_group_array[3]; | |
737 | ||
738 | for (i = 0 ; i < total_threads; i++) | |
739 | tg->thread_list[i] = thread_list[i]; | |
740 | ||
741 | return 0; | |
742 | } | |
743 | ||
744 | /* | |
745 | * get_cpu_thread_group_start : Searches the thread group in tg->thread_list | |
746 | * that @cpu belongs to. | |
747 | * | |
748 | * @cpu : The logical CPU whose thread group is being searched. | |
749 | * @tg : The thread-group structure of the CPU node which @cpu belongs | |
750 | * to. | |
751 | * | |
752 | * Returns the index to tg->thread_list that points to the the start | |
753 | * of the thread_group that @cpu belongs to. | |
754 | * | |
755 | * Returns -1 if cpu doesn't belong to any of the groups pointed to by | |
756 | * tg->thread_list. | |
757 | */ | |
758 | static int get_cpu_thread_group_start(int cpu, struct thread_groups *tg) | |
759 | { | |
760 | int hw_cpu_id = get_hard_smp_processor_id(cpu); | |
761 | int i, j; | |
762 | ||
763 | for (i = 0; i < tg->nr_groups; i++) { | |
764 | int group_start = i * tg->threads_per_group; | |
765 | ||
766 | for (j = 0; j < tg->threads_per_group; j++) { | |
767 | int idx = group_start + j; | |
768 | ||
769 | if (tg->thread_list[idx] == hw_cpu_id) | |
770 | return group_start; | |
771 | } | |
772 | } | |
773 | ||
774 | return -1; | |
775 | } | |
776 | ||
777 | static int init_cpu_l1_cache_map(int cpu) | |
778 | ||
779 | { | |
780 | struct device_node *dn = of_get_cpu_node(cpu, NULL); | |
781 | struct thread_groups tg = {.property = 0, | |
782 | .nr_groups = 0, | |
783 | .threads_per_group = 0}; | |
784 | int first_thread = cpu_first_thread_sibling(cpu); | |
785 | int i, cpu_group_start = -1, err = 0; | |
786 | ||
787 | if (!dn) | |
788 | return -ENODATA; | |
789 | ||
790 | err = parse_thread_groups(dn, &tg, THREAD_GROUP_SHARE_L1); | |
791 | if (err) | |
792 | goto out; | |
793 | ||
794 | zalloc_cpumask_var_node(&per_cpu(cpu_l1_cache_map, cpu), | |
795 | GFP_KERNEL, | |
796 | cpu_to_node(cpu)); | |
797 | ||
798 | cpu_group_start = get_cpu_thread_group_start(cpu, &tg); | |
799 | ||
800 | if (unlikely(cpu_group_start == -1)) { | |
801 | WARN_ON_ONCE(1); | |
802 | err = -ENODATA; | |
803 | goto out; | |
804 | } | |
805 | ||
806 | for (i = first_thread; i < first_thread + threads_per_core; i++) { | |
807 | int i_group_start = get_cpu_thread_group_start(i, &tg); | |
808 | ||
809 | if (unlikely(i_group_start == -1)) { | |
810 | WARN_ON_ONCE(1); | |
811 | err = -ENODATA; | |
812 | goto out; | |
813 | } | |
814 | ||
815 | if (i_group_start == cpu_group_start) | |
816 | cpumask_set_cpu(i, per_cpu(cpu_l1_cache_map, cpu)); | |
817 | } | |
818 | ||
819 | out: | |
820 | of_node_put(dn); | |
821 | return err; | |
822 | } | |
823 | ||
824 | static int init_big_cores(void) | |
825 | { | |
826 | int cpu; | |
827 | ||
828 | for_each_possible_cpu(cpu) { | |
829 | int err = init_cpu_l1_cache_map(cpu); | |
830 | ||
831 | if (err) | |
832 | return err; | |
833 | ||
834 | zalloc_cpumask_var_node(&per_cpu(cpu_smallcore_map, cpu), | |
835 | GFP_KERNEL, | |
836 | cpu_to_node(cpu)); | |
837 | } | |
838 | ||
839 | has_big_cores = true; | |
840 | return 0; | |
841 | } | |
842 | ||
1da177e4 LT |
843 | void __init smp_prepare_cpus(unsigned int max_cpus) |
844 | { | |
845 | unsigned int cpu; | |
846 | ||
847 | DBG("smp_prepare_cpus\n"); | |
848 | ||
849 | /* | |
850 | * setup_cpu may need to be called on the boot cpu. We havent | |
851 | * spun any cpus up but lets be paranoid. | |
852 | */ | |
853 | BUG_ON(boot_cpuid != smp_processor_id()); | |
854 | ||
855 | /* Fixup boot cpu */ | |
856 | smp_store_cpu_info(boot_cpuid); | |
857 | cpu_callin_map[boot_cpuid] = 1; | |
858 | ||
cc1ba8ea AB |
859 | for_each_possible_cpu(cpu) { |
860 | zalloc_cpumask_var_node(&per_cpu(cpu_sibling_map, cpu), | |
861 | GFP_KERNEL, cpu_to_node(cpu)); | |
2a636a56 OH |
862 | zalloc_cpumask_var_node(&per_cpu(cpu_l2_cache_map, cpu), |
863 | GFP_KERNEL, cpu_to_node(cpu)); | |
cc1ba8ea AB |
864 | zalloc_cpumask_var_node(&per_cpu(cpu_core_map, cpu), |
865 | GFP_KERNEL, cpu_to_node(cpu)); | |
2fabf084 NA |
866 | /* |
867 | * numa_node_id() works after this. | |
868 | */ | |
bc3c4327 LZ |
869 | if (cpu_present(cpu)) { |
870 | set_cpu_numa_node(cpu, numa_cpu_lookup_table[cpu]); | |
871 | set_cpu_numa_mem(cpu, | |
872 | local_memory_node(numa_cpu_lookup_table[cpu])); | |
873 | } | |
cc1ba8ea AB |
874 | } |
875 | ||
df52f671 | 876 | /* Init the cpumasks so the boot CPU is related to itself */ |
cc1ba8ea | 877 | cpumask_set_cpu(boot_cpuid, cpu_sibling_mask(boot_cpuid)); |
2a636a56 | 878 | cpumask_set_cpu(boot_cpuid, cpu_l2_cache_mask(boot_cpuid)); |
cc1ba8ea AB |
879 | cpumask_set_cpu(boot_cpuid, cpu_core_mask(boot_cpuid)); |
880 | ||
425752c6 GS |
881 | init_big_cores(); |
882 | if (has_big_cores) { | |
883 | cpumask_set_cpu(boot_cpuid, | |
884 | cpu_smallcore_mask(boot_cpuid)); | |
885 | } | |
886 | ||
dfee0efe CG |
887 | if (smp_ops && smp_ops->probe) |
888 | smp_ops->probe(); | |
1da177e4 LT |
889 | } |
890 | ||
cad5cef6 | 891 | void smp_prepare_boot_cpu(void) |
1da177e4 LT |
892 | { |
893 | BUG_ON(smp_processor_id() != boot_cpuid); | |
5ad57078 | 894 | #ifdef CONFIG_PPC64 |
d2e60075 | 895 | paca_ptrs[boot_cpuid]->__current = current; |
5ad57078 | 896 | #endif |
8c272261 | 897 | set_numa_node(numa_cpu_lookup_table[boot_cpuid]); |
b5e2fc1c | 898 | current_set[boot_cpuid] = task_thread_info(current); |
1da177e4 LT |
899 | } |
900 | ||
901 | #ifdef CONFIG_HOTPLUG_CPU | |
1da177e4 LT |
902 | |
903 | int generic_cpu_disable(void) | |
904 | { | |
905 | unsigned int cpu = smp_processor_id(); | |
906 | ||
907 | if (cpu == boot_cpuid) | |
908 | return -EBUSY; | |
909 | ||
ea0f1cab | 910 | set_cpu_online(cpu, false); |
799d6046 | 911 | #ifdef CONFIG_PPC64 |
a7f290da | 912 | vdso_data->processorCount--; |
094fe2e7 | 913 | #endif |
a978e139 BH |
914 | /* Update affinity of all IRQs previously aimed at this CPU */ |
915 | irq_migrate_all_off_this_cpu(); | |
916 | ||
687b8f24 ME |
917 | /* |
918 | * Depending on the details of the interrupt controller, it's possible | |
919 | * that one of the interrupts we just migrated away from this CPU is | |
920 | * actually already pending on this CPU. If we leave it in that state | |
921 | * the interrupt will never be EOI'ed, and will never fire again. So | |
922 | * temporarily enable interrupts here, to allow any pending interrupt to | |
923 | * be received (and EOI'ed), before we take this CPU offline. | |
924 | */ | |
a978e139 BH |
925 | local_irq_enable(); |
926 | mdelay(1); | |
927 | local_irq_disable(); | |
928 | ||
1da177e4 LT |
929 | return 0; |
930 | } | |
931 | ||
1da177e4 LT |
932 | void generic_cpu_die(unsigned int cpu) |
933 | { | |
934 | int i; | |
935 | ||
936 | for (i = 0; i < 100; i++) { | |
0d8d4d42 | 937 | smp_rmb(); |
2f4f1f81 | 938 | if (is_cpu_dead(cpu)) |
1da177e4 LT |
939 | return; |
940 | msleep(100); | |
941 | } | |
942 | printk(KERN_ERR "CPU%d didn't die...\n", cpu); | |
943 | } | |
944 | ||
105765f4 BH |
945 | void generic_set_cpu_dead(unsigned int cpu) |
946 | { | |
947 | per_cpu(cpu_state, cpu) = CPU_DEAD; | |
948 | } | |
fb82b839 | 949 | |
ae5cab47 ZC |
950 | /* |
951 | * The cpu_state should be set to CPU_UP_PREPARE in kick_cpu(), otherwise | |
952 | * the cpu_state is always CPU_DEAD after calling generic_set_cpu_dead(), | |
953 | * which makes the delay in generic_cpu_die() not happen. | |
954 | */ | |
955 | void generic_set_cpu_up(unsigned int cpu) | |
956 | { | |
957 | per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; | |
958 | } | |
959 | ||
fb82b839 BH |
960 | int generic_check_cpu_restart(unsigned int cpu) |
961 | { | |
962 | return per_cpu(cpu_state, cpu) == CPU_UP_PREPARE; | |
963 | } | |
512691d4 | 964 | |
2f4f1f81 | 965 | int is_cpu_dead(unsigned int cpu) |
966 | { | |
967 | return per_cpu(cpu_state, cpu) == CPU_DEAD; | |
968 | } | |
969 | ||
441c19c8 | 970 | static bool secondaries_inhibited(void) |
512691d4 | 971 | { |
441c19c8 | 972 | return kvm_hv_mode_active(); |
512691d4 PM |
973 | } |
974 | ||
975 | #else /* HOTPLUG_CPU */ | |
976 | ||
977 | #define secondaries_inhibited() 0 | |
978 | ||
1da177e4 LT |
979 | #endif |
980 | ||
17e32eac | 981 | static void cpu_idle_thread_init(unsigned int cpu, struct task_struct *idle) |
c56e5853 | 982 | { |
17e32eac | 983 | struct thread_info *ti = task_thread_info(idle); |
c56e5853 BH |
984 | |
985 | #ifdef CONFIG_PPC64 | |
d2e60075 NP |
986 | paca_ptrs[cpu]->__current = idle; |
987 | paca_ptrs[cpu]->kstack = (unsigned long)ti + THREAD_SIZE - STACK_FRAME_OVERHEAD; | |
c56e5853 BH |
988 | #endif |
989 | ti->cpu = cpu; | |
17e32eac | 990 | secondary_ti = current_set[cpu] = ti; |
c56e5853 BH |
991 | } |
992 | ||
061d19f2 | 993 | int __cpu_up(unsigned int cpu, struct task_struct *tidle) |
1da177e4 | 994 | { |
c56e5853 | 995 | int rc, c; |
1da177e4 | 996 | |
512691d4 PM |
997 | /* |
998 | * Don't allow secondary threads to come online if inhibited | |
999 | */ | |
1000 | if (threads_per_core > 1 && secondaries_inhibited() && | |
6f5e40a3 | 1001 | cpu_thread_in_subcore(cpu)) |
512691d4 PM |
1002 | return -EBUSY; |
1003 | ||
8cffc6ac BH |
1004 | if (smp_ops == NULL || |
1005 | (smp_ops->cpu_bootable && !smp_ops->cpu_bootable(cpu))) | |
1da177e4 LT |
1006 | return -EINVAL; |
1007 | ||
17e32eac | 1008 | cpu_idle_thread_init(cpu, tidle); |
c560bbce | 1009 | |
14d4ae5c BH |
1010 | /* |
1011 | * The platform might need to allocate resources prior to bringing | |
1012 | * up the CPU | |
1013 | */ | |
1014 | if (smp_ops->prepare_cpu) { | |
1015 | rc = smp_ops->prepare_cpu(cpu); | |
1016 | if (rc) | |
1017 | return rc; | |
1018 | } | |
1019 | ||
1da177e4 LT |
1020 | /* Make sure callin-map entry is 0 (can be leftover a CPU |
1021 | * hotplug | |
1022 | */ | |
1023 | cpu_callin_map[cpu] = 0; | |
1024 | ||
1025 | /* The information for processor bringup must | |
1026 | * be written out to main store before we release | |
1027 | * the processor. | |
1028 | */ | |
0d8d4d42 | 1029 | smp_mb(); |
1da177e4 LT |
1030 | |
1031 | /* wake up cpus */ | |
1032 | DBG("smp: kicking cpu %d\n", cpu); | |
de300974 ME |
1033 | rc = smp_ops->kick_cpu(cpu); |
1034 | if (rc) { | |
1035 | pr_err("smp: failed starting cpu %d (rc %d)\n", cpu, rc); | |
1036 | return rc; | |
1037 | } | |
1da177e4 LT |
1038 | |
1039 | /* | |
1040 | * wait to see if the cpu made a callin (is actually up). | |
1041 | * use this value that I found through experimentation. | |
1042 | * -- Cort | |
1043 | */ | |
1044 | if (system_state < SYSTEM_RUNNING) | |
ee0339f2 | 1045 | for (c = 50000; c && !cpu_callin_map[cpu]; c--) |
1da177e4 LT |
1046 | udelay(100); |
1047 | #ifdef CONFIG_HOTPLUG_CPU | |
1048 | else | |
1049 | /* | |
1050 | * CPUs can take much longer to come up in the | |
1051 | * hotplug case. Wait five seconds. | |
1052 | */ | |
67764263 GS |
1053 | for (c = 5000; c && !cpu_callin_map[cpu]; c--) |
1054 | msleep(1); | |
1da177e4 LT |
1055 | #endif |
1056 | ||
1057 | if (!cpu_callin_map[cpu]) { | |
6685a477 | 1058 | printk(KERN_ERR "Processor %u is stuck.\n", cpu); |
1da177e4 LT |
1059 | return -ENOENT; |
1060 | } | |
1061 | ||
6685a477 | 1062 | DBG("Processor %u found.\n", cpu); |
1da177e4 LT |
1063 | |
1064 | if (smp_ops->give_timebase) | |
1065 | smp_ops->give_timebase(); | |
1066 | ||
875ebe94 | 1067 | /* Wait until cpu puts itself in the online & active maps */ |
4e287e65 | 1068 | spin_until_cond(cpu_online(cpu)); |
1da177e4 LT |
1069 | |
1070 | return 0; | |
1071 | } | |
1072 | ||
e9efed3b NL |
1073 | /* Return the value of the reg property corresponding to the given |
1074 | * logical cpu. | |
1075 | */ | |
1076 | int cpu_to_core_id(int cpu) | |
1077 | { | |
1078 | struct device_node *np; | |
f8a1883a | 1079 | const __be32 *reg; |
e9efed3b NL |
1080 | int id = -1; |
1081 | ||
1082 | np = of_get_cpu_node(cpu, NULL); | |
1083 | if (!np) | |
1084 | goto out; | |
1085 | ||
1086 | reg = of_get_property(np, "reg", NULL); | |
1087 | if (!reg) | |
1088 | goto out; | |
1089 | ||
f8a1883a | 1090 | id = be32_to_cpup(reg); |
e9efed3b NL |
1091 | out: |
1092 | of_node_put(np); | |
1093 | return id; | |
1094 | } | |
f8ab4810 | 1095 | EXPORT_SYMBOL_GPL(cpu_to_core_id); |
e9efed3b | 1096 | |
99d86705 VS |
1097 | /* Helper routines for cpu to core mapping */ |
1098 | int cpu_core_index_of_thread(int cpu) | |
1099 | { | |
1100 | return cpu >> threads_shift; | |
1101 | } | |
1102 | EXPORT_SYMBOL_GPL(cpu_core_index_of_thread); | |
1103 | ||
1104 | int cpu_first_thread_of_core(int core) | |
1105 | { | |
1106 | return core << threads_shift; | |
1107 | } | |
1108 | EXPORT_SYMBOL_GPL(cpu_first_thread_of_core); | |
1109 | ||
104699c0 | 1110 | /* Must be called when no change can occur to cpu_present_mask, |
440a0857 NL |
1111 | * i.e. during cpu online or offline. |
1112 | */ | |
1113 | static struct device_node *cpu_to_l2cache(int cpu) | |
1114 | { | |
1115 | struct device_node *np; | |
b2ea25b9 | 1116 | struct device_node *cache; |
440a0857 NL |
1117 | |
1118 | if (!cpu_present(cpu)) | |
1119 | return NULL; | |
1120 | ||
1121 | np = of_get_cpu_node(cpu, NULL); | |
1122 | if (np == NULL) | |
1123 | return NULL; | |
1124 | ||
b2ea25b9 NL |
1125 | cache = of_find_next_cache_node(np); |
1126 | ||
440a0857 NL |
1127 | of_node_put(np); |
1128 | ||
b2ea25b9 | 1129 | return cache; |
440a0857 | 1130 | } |
1da177e4 | 1131 | |
df52f671 | 1132 | static bool update_mask_by_l2(int cpu, struct cpumask *(*mask_fn)(int)) |
a8a5356c | 1133 | { |
256f2d4b | 1134 | struct device_node *l2_cache, *np; |
e3d8b67e | 1135 | int i; |
256f2d4b | 1136 | |
a8a5356c | 1137 | l2_cache = cpu_to_l2cache(cpu); |
df52f671 OH |
1138 | if (!l2_cache) |
1139 | return false; | |
1140 | ||
1141 | for_each_cpu(i, cpu_online_mask) { | |
1142 | /* | |
1143 | * when updating the marks the current CPU has not been marked | |
1144 | * online, but we need to update the cache masks | |
1145 | */ | |
256f2d4b | 1146 | np = cpu_to_l2cache(i); |
a8a5356c PM |
1147 | if (!np) |
1148 | continue; | |
df52f671 OH |
1149 | |
1150 | if (np == l2_cache) | |
1151 | set_cpus_related(cpu, i, mask_fn); | |
1152 | ||
a8a5356c PM |
1153 | of_node_put(np); |
1154 | } | |
1155 | of_node_put(l2_cache); | |
df52f671 OH |
1156 | |
1157 | return true; | |
1158 | } | |
1159 | ||
1160 | #ifdef CONFIG_HOTPLUG_CPU | |
1161 | static void remove_cpu_from_masks(int cpu) | |
1162 | { | |
1163 | int i; | |
1164 | ||
1165 | /* NB: cpu_core_mask is a superset of the others */ | |
1166 | for_each_cpu(i, cpu_core_mask(cpu)) { | |
1167 | set_cpus_unrelated(cpu, i, cpu_core_mask); | |
2a636a56 | 1168 | set_cpus_unrelated(cpu, i, cpu_l2_cache_mask); |
df52f671 | 1169 | set_cpus_unrelated(cpu, i, cpu_sibling_mask); |
425752c6 GS |
1170 | if (has_big_cores) |
1171 | set_cpus_unrelated(cpu, i, cpu_smallcore_mask); | |
df52f671 OH |
1172 | } |
1173 | } | |
1174 | #endif | |
1175 | ||
425752c6 GS |
1176 | static inline void add_cpu_to_smallcore_masks(int cpu) |
1177 | { | |
1178 | struct cpumask *this_l1_cache_map = per_cpu(cpu_l1_cache_map, cpu); | |
1179 | int i, first_thread = cpu_first_thread_sibling(cpu); | |
1180 | ||
1181 | if (!has_big_cores) | |
1182 | return; | |
1183 | ||
1184 | cpumask_set_cpu(cpu, cpu_smallcore_mask(cpu)); | |
1185 | ||
1186 | for (i = first_thread; i < first_thread + threads_per_core; i++) { | |
1187 | if (cpu_online(i) && cpumask_test_cpu(i, this_l1_cache_map)) | |
1188 | set_cpus_related(i, cpu, cpu_smallcore_mask); | |
1189 | } | |
1190 | } | |
1191 | ||
df52f671 OH |
1192 | static void add_cpu_to_masks(int cpu) |
1193 | { | |
1194 | int first_thread = cpu_first_thread_sibling(cpu); | |
1195 | int chipid = cpu_to_chip_id(cpu); | |
1196 | int i; | |
1197 | ||
1198 | /* | |
1199 | * This CPU will not be in the online mask yet so we need to manually | |
1200 | * add it to it's own thread sibling mask. | |
1201 | */ | |
1202 | cpumask_set_cpu(cpu, cpu_sibling_mask(cpu)); | |
1203 | ||
1204 | for (i = first_thread; i < first_thread + threads_per_core; i++) | |
1205 | if (cpu_online(i)) | |
1206 | set_cpus_related(i, cpu, cpu_sibling_mask); | |
1207 | ||
425752c6 | 1208 | add_cpu_to_smallcore_masks(cpu); |
df52f671 | 1209 | /* |
2a636a56 OH |
1210 | * Copy the thread sibling mask into the cache sibling mask |
1211 | * and mark any CPUs that share an L2 with this CPU. | |
df52f671 OH |
1212 | */ |
1213 | for_each_cpu(i, cpu_sibling_mask(cpu)) | |
2a636a56 OH |
1214 | set_cpus_related(cpu, i, cpu_l2_cache_mask); |
1215 | update_mask_by_l2(cpu, cpu_l2_cache_mask); | |
1216 | ||
1217 | /* | |
1218 | * Copy the cache sibling mask into core sibling mask and mark | |
1219 | * any CPUs on the same chip as this CPU. | |
1220 | */ | |
1221 | for_each_cpu(i, cpu_l2_cache_mask(cpu)) | |
df52f671 OH |
1222 | set_cpus_related(cpu, i, cpu_core_mask); |
1223 | ||
2a636a56 | 1224 | if (chipid == -1) |
df52f671 | 1225 | return; |
df52f671 OH |
1226 | |
1227 | for_each_cpu(i, cpu_online_mask) | |
1228 | if (cpu_to_chip_id(i) == chipid) | |
1229 | set_cpus_related(cpu, i, cpu_core_mask); | |
a8a5356c PM |
1230 | } |
1231 | ||
96d91431 OH |
1232 | static bool shared_caches; |
1233 | ||
1da177e4 | 1234 | /* Activate a secondary processor. */ |
061d19f2 | 1235 | void start_secondary(void *unused) |
1da177e4 LT |
1236 | { |
1237 | unsigned int cpu = smp_processor_id(); | |
8e8a31d7 | 1238 | struct cpumask *(*sibling_mask)(int) = cpu_sibling_mask; |
1da177e4 | 1239 | |
f1f10076 | 1240 | mmgrab(&init_mm); |
1da177e4 LT |
1241 | current->active_mm = &init_mm; |
1242 | ||
1243 | smp_store_cpu_info(cpu); | |
5ad57078 | 1244 | set_dec(tb_ticks_per_jiffy); |
e4d76e1c | 1245 | preempt_disable(); |
1be6f10f | 1246 | cpu_callin_map[cpu] = 1; |
1da177e4 | 1247 | |
757cbd46 KG |
1248 | if (smp_ops->setup_cpu) |
1249 | smp_ops->setup_cpu(cpu); | |
1da177e4 LT |
1250 | if (smp_ops->take_timebase) |
1251 | smp_ops->take_timebase(); | |
1252 | ||
d831d0b8 TB |
1253 | secondary_cpu_time_init(); |
1254 | ||
aeeafbfa BH |
1255 | #ifdef CONFIG_PPC64 |
1256 | if (system_state == SYSTEM_RUNNING) | |
1257 | vdso_data->processorCount++; | |
18ad51dd AB |
1258 | |
1259 | vdso_getcpu_init(); | |
aeeafbfa | 1260 | #endif |
df52f671 OH |
1261 | /* Update topology CPU masks */ |
1262 | add_cpu_to_masks(cpu); | |
1da177e4 | 1263 | |
8e8a31d7 GS |
1264 | if (has_big_cores) |
1265 | sibling_mask = cpu_smallcore_mask; | |
96d91431 OH |
1266 | /* |
1267 | * Check for any shared caches. Note that this must be done on a | |
1268 | * per-core basis because one core in the pair might be disabled. | |
1269 | */ | |
8e8a31d7 | 1270 | if (!cpumask_equal(cpu_l2_cache_mask(cpu), sibling_mask(cpu))) |
96d91431 OH |
1271 | shared_caches = true; |
1272 | ||
bc3c4327 LZ |
1273 | set_numa_node(numa_cpu_lookup_table[cpu]); |
1274 | set_numa_mem(local_memory_node(numa_cpu_lookup_table[cpu])); | |
1275 | ||
cce606fe LZ |
1276 | smp_wmb(); |
1277 | notify_cpu_starting(cpu); | |
1278 | set_cpu_online(cpu, true); | |
1279 | ||
b6aeddea ME |
1280 | boot_init_stack_canary(); |
1281 | ||
1da177e4 LT |
1282 | local_irq_enable(); |
1283 | ||
d1039786 NR |
1284 | /* We can enable ftrace for secondary cpus now */ |
1285 | this_cpu_enable_ftrace(); | |
1286 | ||
fc6d73d6 | 1287 | cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); |
fa3f82c8 BH |
1288 | |
1289 | BUG(); | |
1da177e4 LT |
1290 | } |
1291 | ||
1292 | int setup_profiling_timer(unsigned int multiplier) | |
1293 | { | |
1294 | return 0; | |
1295 | } | |
1296 | ||
607b45e9 VG |
1297 | #ifdef CONFIG_SCHED_SMT |
1298 | /* cpumask of CPUs with asymetric SMT dependancy */ | |
b6220ad6 | 1299 | static int powerpc_smt_flags(void) |
607b45e9 | 1300 | { |
5d4dfddd | 1301 | int flags = SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES; |
607b45e9 VG |
1302 | |
1303 | if (cpu_has_feature(CPU_FTR_ASYM_SMT)) { | |
1304 | printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n"); | |
1305 | flags |= SD_ASYM_PACKING; | |
1306 | } | |
1307 | return flags; | |
1308 | } | |
1309 | #endif | |
1310 | ||
1311 | static struct sched_domain_topology_level powerpc_topology[] = { | |
1312 | #ifdef CONFIG_SCHED_SMT | |
1313 | { cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) }, | |
1314 | #endif | |
1315 | { cpu_cpu_mask, SD_INIT_NAME(DIE) }, | |
1316 | { NULL, }, | |
1317 | }; | |
1318 | ||
96d91431 OH |
1319 | /* |
1320 | * P9 has a slightly odd architecture where pairs of cores share an L2 cache. | |
1321 | * This topology makes it *much* cheaper to migrate tasks between adjacent cores | |
1322 | * since the migrated task remains cache hot. We want to take advantage of this | |
1323 | * at the scheduler level so an extra topology level is required. | |
1324 | */ | |
1325 | static int powerpc_shared_cache_flags(void) | |
1326 | { | |
1327 | return SD_SHARE_PKG_RESOURCES; | |
1328 | } | |
1329 | ||
1330 | /* | |
1331 | * We can't just pass cpu_l2_cache_mask() directly because | |
1332 | * returns a non-const pointer and the compiler barfs on that. | |
1333 | */ | |
1334 | static const struct cpumask *shared_cache_mask(int cpu) | |
1335 | { | |
1336 | return cpu_l2_cache_mask(cpu); | |
1337 | } | |
1338 | ||
8e8a31d7 GS |
1339 | #ifdef CONFIG_SCHED_SMT |
1340 | static const struct cpumask *smallcore_smt_mask(int cpu) | |
1341 | { | |
1342 | return cpu_smallcore_mask(cpu); | |
1343 | } | |
1344 | #endif | |
1345 | ||
96d91431 OH |
1346 | static struct sched_domain_topology_level power9_topology[] = { |
1347 | #ifdef CONFIG_SCHED_SMT | |
1348 | { cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) }, | |
1349 | #endif | |
1350 | { shared_cache_mask, powerpc_shared_cache_flags, SD_INIT_NAME(CACHE) }, | |
1351 | { cpu_cpu_mask, SD_INIT_NAME(DIE) }, | |
1352 | { NULL, }, | |
1353 | }; | |
1354 | ||
6d11b87d TG |
1355 | void __init smp_cpus_done(unsigned int max_cpus) |
1356 | { | |
1357 | /* | |
7b7622bb | 1358 | * We are running pinned to the boot CPU, see rest_init(). |
1da177e4 | 1359 | */ |
757cbd46 | 1360 | if (smp_ops && smp_ops->setup_cpu) |
7b7622bb | 1361 | smp_ops->setup_cpu(boot_cpuid); |
4b703a23 | 1362 | |
d7294445 BH |
1363 | if (smp_ops && smp_ops->bringup_done) |
1364 | smp_ops->bringup_done(); | |
1365 | ||
2ea62630 SD |
1366 | /* |
1367 | * On a shared LPAR, associativity needs to be requested. | |
1368 | * Hence, get numa topology before dumping cpu topology | |
1369 | */ | |
1370 | shared_proc_topology_init(); | |
4b703a23 | 1371 | dump_numa_cpu_topology(); |
d7294445 | 1372 | |
8e8a31d7 GS |
1373 | #ifdef CONFIG_SCHED_SMT |
1374 | if (has_big_cores) { | |
1375 | pr_info("Using small cores at SMT level\n"); | |
1376 | power9_topology[0].mask = smallcore_smt_mask; | |
1377 | powerpc_topology[0].mask = smallcore_smt_mask; | |
1378 | } | |
1379 | #endif | |
96d91431 OH |
1380 | /* |
1381 | * If any CPU detects that it's sharing a cache with another CPU then | |
1382 | * use the deeper topology that is aware of this sharing. | |
1383 | */ | |
1384 | if (shared_caches) { | |
1385 | pr_info("Using shared cache scheduler topology\n"); | |
1386 | set_sched_topology(power9_topology); | |
1387 | } else { | |
1388 | pr_info("Using standard scheduler topology\n"); | |
1389 | set_sched_topology(powerpc_topology); | |
1390 | } | |
e1f0ece1 MN |
1391 | } |
1392 | ||
1da177e4 LT |
1393 | #ifdef CONFIG_HOTPLUG_CPU |
1394 | int __cpu_disable(void) | |
1395 | { | |
e2075f79 | 1396 | int cpu = smp_processor_id(); |
e2075f79 | 1397 | int err; |
1da177e4 | 1398 | |
e2075f79 NL |
1399 | if (!smp_ops->cpu_disable) |
1400 | return -ENOSYS; | |
1401 | ||
424ef016 NR |
1402 | this_cpu_disable_ftrace(); |
1403 | ||
e2075f79 NL |
1404 | err = smp_ops->cpu_disable(); |
1405 | if (err) | |
1406 | return err; | |
1407 | ||
1408 | /* Update sibling maps */ | |
df52f671 | 1409 | remove_cpu_from_masks(cpu); |
e2075f79 NL |
1410 | |
1411 | return 0; | |
1da177e4 LT |
1412 | } |
1413 | ||
1414 | void __cpu_die(unsigned int cpu) | |
1415 | { | |
1416 | if (smp_ops->cpu_die) | |
1417 | smp_ops->cpu_die(cpu); | |
1418 | } | |
d0174c72 | 1419 | |
abb17f9c MM |
1420 | void cpu_die(void) |
1421 | { | |
424ef016 NR |
1422 | /* |
1423 | * Disable on the down path. This will be re-enabled by | |
1424 | * start_secondary() via start_secondary_resume() below | |
1425 | */ | |
1426 | this_cpu_disable_ftrace(); | |
1427 | ||
abb17f9c MM |
1428 | if (ppc_md.cpu_die) |
1429 | ppc_md.cpu_die(); | |
fa3f82c8 BH |
1430 | |
1431 | /* If we return, we re-enter start_secondary */ | |
1432 | start_secondary_resume(); | |
abb17f9c | 1433 | } |
fa3f82c8 | 1434 | |
1da177e4 | 1435 | #endif |