Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * SMP support for pSeries machines. | |
3 | * | |
4 | * Dave Engebretsen, Peter Bergner, and | |
5 | * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com | |
6 | * | |
7 | * Plus various changes from other IBM teams... | |
8 | * | |
9 | * This program is free software; you can redistribute it and/or | |
10 | * modify it under the terms of the GNU General Public License | |
11 | * as published by the Free Software Foundation; either version | |
12 | * 2 of the License, or (at your option) any later version. | |
13 | */ | |
14 | ||
15 | #undef DEBUG | |
16 | ||
17 | #include <linux/config.h> | |
18 | #include <linux/kernel.h> | |
19 | #include <linux/module.h> | |
20 | #include <linux/sched.h> | |
21 | #include <linux/smp.h> | |
22 | #include <linux/interrupt.h> | |
23 | #include <linux/delay.h> | |
24 | #include <linux/init.h> | |
25 | #include <linux/spinlock.h> | |
26 | #include <linux/cache.h> | |
27 | #include <linux/err.h> | |
28 | #include <linux/sysdev.h> | |
29 | #include <linux/cpu.h> | |
30 | ||
31 | #include <asm/ptrace.h> | |
32 | #include <asm/atomic.h> | |
33 | #include <asm/irq.h> | |
34 | #include <asm/page.h> | |
35 | #include <asm/pgtable.h> | |
36 | #include <asm/io.h> | |
37 | #include <asm/prom.h> | |
38 | #include <asm/smp.h> | |
39 | #include <asm/paca.h> | |
40 | #include <asm/time.h> | |
41 | #include <asm/machdep.h> | |
42 | #include <asm/xics.h> | |
43 | #include <asm/cputable.h> | |
44 | #include <asm/system.h> | |
45 | #include <asm/rtas.h> | |
46 | #include <asm/plpar_wrappers.h> | |
47 | #include <asm/pSeries_reconfig.h> | |
48 | ||
49 | #include "mpic.h" | |
50 | ||
51 | #ifdef DEBUG | |
52 | #define DBG(fmt...) udbg_printf(fmt) | |
53 | #else | |
54 | #define DBG(fmt...) | |
55 | #endif | |
56 | ||
57 | /* | |
58 | * The primary thread of each non-boot processor is recorded here before | |
59 | * smp init. | |
60 | */ | |
61 | static cpumask_t of_spin_map; | |
62 | ||
63 | extern void pSeries_secondary_smp_init(unsigned long); | |
64 | ||
65 | #ifdef CONFIG_HOTPLUG_CPU | |
66 | ||
67 | /* Get state of physical CPU. | |
68 | * Return codes: | |
69 | * 0 - The processor is in the RTAS stopped state | |
70 | * 1 - stop-self is in progress | |
71 | * 2 - The processor is not in the RTAS stopped state | |
72 | * -1 - Hardware Error | |
73 | * -2 - Hardware Busy, Try again later. | |
74 | */ | |
75 | static int query_cpu_stopped(unsigned int pcpu) | |
76 | { | |
77 | int cpu_status; | |
78 | int status, qcss_tok; | |
79 | ||
80 | qcss_tok = rtas_token("query-cpu-stopped-state"); | |
81 | if (qcss_tok == RTAS_UNKNOWN_SERVICE) | |
82 | return -1; | |
83 | status = rtas_call(qcss_tok, 1, 2, &cpu_status, pcpu); | |
84 | if (status != 0) { | |
85 | printk(KERN_ERR | |
86 | "RTAS query-cpu-stopped-state failed: %i\n", status); | |
87 | return status; | |
88 | } | |
89 | ||
90 | return cpu_status; | |
91 | } | |
92 | ||
93 | int pSeries_cpu_disable(void) | |
94 | { | |
95 | systemcfg->processorCount--; | |
96 | ||
97 | /*fix boot_cpuid here*/ | |
98 | if (smp_processor_id() == boot_cpuid) | |
99 | boot_cpuid = any_online_cpu(cpu_online_map); | |
100 | ||
101 | /* FIXME: abstract this to not be platform specific later on */ | |
102 | xics_migrate_irqs_away(); | |
103 | return 0; | |
104 | } | |
105 | ||
106 | void pSeries_cpu_die(unsigned int cpu) | |
107 | { | |
108 | int tries; | |
109 | int cpu_status; | |
110 | unsigned int pcpu = get_hard_smp_processor_id(cpu); | |
111 | ||
112 | for (tries = 0; tries < 25; tries++) { | |
113 | cpu_status = query_cpu_stopped(pcpu); | |
114 | if (cpu_status == 0 || cpu_status == -1) | |
115 | break; | |
116 | msleep(200); | |
117 | } | |
118 | if (cpu_status != 0) { | |
119 | printk("Querying DEAD? cpu %i (%i) shows %i\n", | |
120 | cpu, pcpu, cpu_status); | |
121 | } | |
122 | ||
123 | /* Isolation and deallocation are definatly done by | |
124 | * drslot_chrp_cpu. If they were not they would be | |
125 | * done here. Change isolate state to Isolate and | |
126 | * change allocation-state to Unusable. | |
127 | */ | |
128 | paca[cpu].cpu_start = 0; | |
129 | } | |
130 | ||
131 | /* | |
132 | * Update cpu_present_map and paca(s) for a new cpu node. The wrinkle | |
133 | * here is that a cpu device node may represent up to two logical cpus | |
134 | * in the SMT case. We must honor the assumption in other code that | |
135 | * the logical ids for sibling SMT threads x and y are adjacent, such | |
136 | * that x^1 == y and y^1 == x. | |
137 | */ | |
138 | static int pSeries_add_processor(struct device_node *np) | |
139 | { | |
140 | unsigned int cpu; | |
141 | cpumask_t candidate_map, tmp = CPU_MASK_NONE; | |
142 | int err = -ENOSPC, len, nthreads, i; | |
143 | u32 *intserv; | |
144 | ||
145 | intserv = (u32 *)get_property(np, "ibm,ppc-interrupt-server#s", &len); | |
146 | if (!intserv) | |
147 | return 0; | |
148 | ||
149 | nthreads = len / sizeof(u32); | |
150 | for (i = 0; i < nthreads; i++) | |
151 | cpu_set(i, tmp); | |
152 | ||
153 | lock_cpu_hotplug(); | |
154 | ||
155 | BUG_ON(!cpus_subset(cpu_present_map, cpu_possible_map)); | |
156 | ||
157 | /* Get a bitmap of unoccupied slots. */ | |
158 | cpus_xor(candidate_map, cpu_possible_map, cpu_present_map); | |
159 | if (cpus_empty(candidate_map)) { | |
160 | /* If we get here, it most likely means that NR_CPUS is | |
161 | * less than the partition's max processors setting. | |
162 | */ | |
163 | printk(KERN_ERR "Cannot add cpu %s; this system configuration" | |
164 | " supports %d logical cpus.\n", np->full_name, | |
165 | cpus_weight(cpu_possible_map)); | |
166 | goto out_unlock; | |
167 | } | |
168 | ||
169 | while (!cpus_empty(tmp)) | |
170 | if (cpus_subset(tmp, candidate_map)) | |
171 | /* Found a range where we can insert the new cpu(s) */ | |
172 | break; | |
173 | else | |
174 | cpus_shift_left(tmp, tmp, nthreads); | |
175 | ||
176 | if (cpus_empty(tmp)) { | |
177 | printk(KERN_ERR "Unable to find space in cpu_present_map for" | |
178 | " processor %s with %d thread(s)\n", np->name, | |
179 | nthreads); | |
180 | goto out_unlock; | |
181 | } | |
182 | ||
183 | for_each_cpu_mask(cpu, tmp) { | |
184 | BUG_ON(cpu_isset(cpu, cpu_present_map)); | |
185 | cpu_set(cpu, cpu_present_map); | |
186 | set_hard_smp_processor_id(cpu, *intserv++); | |
187 | } | |
188 | err = 0; | |
189 | out_unlock: | |
190 | unlock_cpu_hotplug(); | |
191 | return err; | |
192 | } | |
193 | ||
194 | /* | |
195 | * Update the present map for a cpu node which is going away, and set | |
196 | * the hard id in the paca(s) to -1 to be consistent with boot time | |
197 | * convention for non-present cpus. | |
198 | */ | |
199 | static void pSeries_remove_processor(struct device_node *np) | |
200 | { | |
201 | unsigned int cpu; | |
202 | int len, nthreads, i; | |
203 | u32 *intserv; | |
204 | ||
205 | intserv = (u32 *)get_property(np, "ibm,ppc-interrupt-server#s", &len); | |
206 | if (!intserv) | |
207 | return; | |
208 | ||
209 | nthreads = len / sizeof(u32); | |
210 | ||
211 | lock_cpu_hotplug(); | |
212 | for (i = 0; i < nthreads; i++) { | |
213 | for_each_present_cpu(cpu) { | |
214 | if (get_hard_smp_processor_id(cpu) != intserv[i]) | |
215 | continue; | |
216 | BUG_ON(cpu_online(cpu)); | |
217 | cpu_clear(cpu, cpu_present_map); | |
218 | set_hard_smp_processor_id(cpu, -1); | |
219 | break; | |
220 | } | |
221 | if (cpu == NR_CPUS) | |
222 | printk(KERN_WARNING "Could not find cpu to remove " | |
223 | "with physical id 0x%x\n", intserv[i]); | |
224 | } | |
225 | unlock_cpu_hotplug(); | |
226 | } | |
227 | ||
228 | static int pSeries_smp_notifier(struct notifier_block *nb, unsigned long action, void *node) | |
229 | { | |
230 | int err = NOTIFY_OK; | |
231 | ||
232 | switch (action) { | |
233 | case PSERIES_RECONFIG_ADD: | |
234 | if (pSeries_add_processor(node)) | |
235 | err = NOTIFY_BAD; | |
236 | break; | |
237 | case PSERIES_RECONFIG_REMOVE: | |
238 | pSeries_remove_processor(node); | |
239 | break; | |
240 | default: | |
241 | err = NOTIFY_DONE; | |
242 | break; | |
243 | } | |
244 | return err; | |
245 | } | |
246 | ||
247 | static struct notifier_block pSeries_smp_nb = { | |
248 | .notifier_call = pSeries_smp_notifier, | |
249 | }; | |
250 | ||
251 | #endif /* CONFIG_HOTPLUG_CPU */ | |
252 | ||
253 | /** | |
254 | * smp_startup_cpu() - start the given cpu | |
255 | * | |
256 | * At boot time, there is nothing to do for primary threads which were | |
257 | * started from Open Firmware. For anything else, call RTAS with the | |
258 | * appropriate start location. | |
259 | * | |
260 | * Returns: | |
261 | * 0 - failure | |
262 | * 1 - success | |
263 | */ | |
264 | static inline int __devinit smp_startup_cpu(unsigned int lcpu) | |
265 | { | |
266 | int status; | |
267 | unsigned long start_here = __pa((u32)*((unsigned long *) | |
268 | pSeries_secondary_smp_init)); | |
269 | unsigned int pcpu; | |
270 | ||
271 | if (cpu_isset(lcpu, of_spin_map)) | |
272 | /* Already started by OF and sitting in spin loop */ | |
273 | return 1; | |
274 | ||
275 | pcpu = get_hard_smp_processor_id(lcpu); | |
276 | ||
277 | /* Fixup atomic count: it exited inside IRQ handler. */ | |
278 | paca[lcpu].__current->thread_info->preempt_count = 0; | |
279 | ||
280 | status = rtas_call(rtas_token("start-cpu"), 3, 1, NULL, | |
281 | pcpu, start_here, lcpu); | |
282 | if (status != 0) { | |
283 | printk(KERN_ERR "start-cpu failed: %i\n", status); | |
284 | return 0; | |
285 | } | |
286 | return 1; | |
287 | } | |
288 | ||
289 | static inline void smp_xics_do_message(int cpu, int msg) | |
290 | { | |
291 | set_bit(msg, &xics_ipi_message[cpu].value); | |
292 | mb(); | |
293 | xics_cause_IPI(cpu); | |
294 | } | |
295 | ||
296 | static void smp_xics_message_pass(int target, int msg) | |
297 | { | |
298 | unsigned int i; | |
299 | ||
300 | if (target < NR_CPUS) { | |
301 | smp_xics_do_message(target, msg); | |
302 | } else { | |
303 | for_each_online_cpu(i) { | |
304 | if (target == MSG_ALL_BUT_SELF | |
305 | && i == smp_processor_id()) | |
306 | continue; | |
307 | smp_xics_do_message(i, msg); | |
308 | } | |
309 | } | |
310 | } | |
311 | ||
312 | static int __init smp_xics_probe(void) | |
313 | { | |
314 | xics_request_IPIs(); | |
315 | ||
316 | return cpus_weight(cpu_possible_map); | |
317 | } | |
318 | ||
319 | static void __devinit smp_xics_setup_cpu(int cpu) | |
320 | { | |
321 | if (cpu != boot_cpuid) | |
322 | xics_setup_cpu(); | |
323 | ||
324 | if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) | |
325 | vpa_init(cpu); | |
326 | ||
327 | cpu_clear(cpu, of_spin_map); | |
328 | ||
329 | /* | |
330 | * Put the calling processor into the GIQ. This is really only | |
331 | * necessary from a secondary thread as the OF start-cpu interface | |
332 | * performs this function for us on primary threads. | |
333 | */ | |
334 | rtas_set_indicator(GLOBAL_INTERRUPT_QUEUE, | |
335 | (1UL << interrupt_server_size) - 1 - default_distrib_server, 1); | |
336 | } | |
337 | ||
338 | static DEFINE_SPINLOCK(timebase_lock); | |
339 | static unsigned long timebase = 0; | |
340 | ||
341 | static void __devinit pSeries_give_timebase(void) | |
342 | { | |
343 | spin_lock(&timebase_lock); | |
344 | rtas_call(rtas_token("freeze-time-base"), 0, 1, NULL); | |
345 | timebase = get_tb(); | |
346 | spin_unlock(&timebase_lock); | |
347 | ||
348 | while (timebase) | |
349 | barrier(); | |
350 | rtas_call(rtas_token("thaw-time-base"), 0, 1, NULL); | |
351 | } | |
352 | ||
353 | static void __devinit pSeries_take_timebase(void) | |
354 | { | |
355 | while (!timebase) | |
356 | barrier(); | |
357 | spin_lock(&timebase_lock); | |
358 | set_tb(timebase >> 32, timebase & 0xffffffff); | |
359 | timebase = 0; | |
360 | spin_unlock(&timebase_lock); | |
361 | } | |
362 | ||
363 | static void __devinit smp_pSeries_kick_cpu(int nr) | |
364 | { | |
365 | BUG_ON(nr < 0 || nr >= NR_CPUS); | |
366 | ||
367 | if (!smp_startup_cpu(nr)) | |
368 | return; | |
369 | ||
370 | /* | |
371 | * The processor is currently spinning, waiting for the | |
372 | * cpu_start field to become non-zero After we set cpu_start, | |
373 | * the processor will continue on to secondary_start | |
374 | */ | |
375 | paca[nr].cpu_start = 1; | |
376 | } | |
377 | ||
378 | static int smp_pSeries_cpu_bootable(unsigned int nr) | |
379 | { | |
380 | /* Special case - we inhibit secondary thread startup | |
381 | * during boot if the user requests it. Odd-numbered | |
382 | * cpus are assumed to be secondary threads. | |
383 | */ | |
384 | if (system_state < SYSTEM_RUNNING && | |
385 | cur_cpu_spec->cpu_features & CPU_FTR_SMT && | |
386 | !smt_enabled_at_boot && nr % 2 != 0) | |
387 | return 0; | |
388 | ||
389 | return 1; | |
390 | } | |
391 | ||
392 | static struct smp_ops_t pSeries_mpic_smp_ops = { | |
393 | .message_pass = smp_mpic_message_pass, | |
394 | .probe = smp_mpic_probe, | |
395 | .kick_cpu = smp_pSeries_kick_cpu, | |
396 | .setup_cpu = smp_mpic_setup_cpu, | |
397 | }; | |
398 | ||
399 | static struct smp_ops_t pSeries_xics_smp_ops = { | |
400 | .message_pass = smp_xics_message_pass, | |
401 | .probe = smp_xics_probe, | |
402 | .kick_cpu = smp_pSeries_kick_cpu, | |
403 | .setup_cpu = smp_xics_setup_cpu, | |
404 | .cpu_bootable = smp_pSeries_cpu_bootable, | |
405 | }; | |
406 | ||
407 | /* This is called very early */ | |
408 | void __init smp_init_pSeries(void) | |
409 | { | |
410 | int i; | |
411 | ||
412 | DBG(" -> smp_init_pSeries()\n"); | |
413 | ||
414 | if (ppc64_interrupt_controller == IC_OPEN_PIC) | |
415 | smp_ops = &pSeries_mpic_smp_ops; | |
416 | else | |
417 | smp_ops = &pSeries_xics_smp_ops; | |
418 | ||
419 | #ifdef CONFIG_HOTPLUG_CPU | |
420 | smp_ops->cpu_disable = pSeries_cpu_disable; | |
421 | smp_ops->cpu_die = pSeries_cpu_die; | |
422 | ||
423 | /* Processors can be added/removed only on LPAR */ | |
424 | if (systemcfg->platform == PLATFORM_PSERIES_LPAR) | |
425 | pSeries_reconfig_notifier_register(&pSeries_smp_nb); | |
426 | #endif | |
427 | ||
428 | /* Mark threads which are still spinning in hold loops. */ | |
429 | if (cur_cpu_spec->cpu_features & CPU_FTR_SMT) | |
430 | for_each_present_cpu(i) { | |
431 | if (i % 2 == 0) | |
432 | /* | |
433 | * Even-numbered logical cpus correspond to | |
434 | * primary threads. | |
435 | */ | |
436 | cpu_set(i, of_spin_map); | |
437 | } | |
438 | else | |
439 | of_spin_map = cpu_present_map; | |
440 | ||
441 | cpu_clear(boot_cpuid, of_spin_map); | |
442 | ||
443 | /* Non-lpar has additional take/give timebase */ | |
444 | if (rtas_token("freeze-time-base") != RTAS_UNKNOWN_SERVICE) { | |
445 | smp_ops->give_timebase = pSeries_give_timebase; | |
446 | smp_ops->take_timebase = pSeries_take_timebase; | |
447 | } | |
448 | ||
449 | DBG(" <- smp_init_pSeries()\n"); | |
450 | } | |
451 |