Commit | Line | Data |
---|---|---|
0332c2d4 ME |
1 | /* |
2 | * pseries CPU Hotplug infrastructure. | |
3 | * | |
413f7c40 ME |
4 | * Split out from arch/powerpc/platforms/pseries/setup.c |
5 | * arch/powerpc/kernel/rtas.c, and arch/powerpc/platforms/pseries/smp.c | |
0332c2d4 ME |
6 | * |
7 | * Peter Bergner, IBM March 2001. | |
8 | * Copyright (C) 2001 IBM. | |
413f7c40 ME |
9 | * Dave Engebretsen, Peter Bergner, and |
10 | * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com | |
11 | * Plus various changes from other IBM teams... | |
0332c2d4 ME |
12 | * |
13 | * Copyright (C) 2006 Michael Ellerman, IBM Corporation | |
14 | * | |
15 | * This program is free software; you can redistribute it and/or | |
16 | * modify it under the terms of the GNU General Public License | |
17 | * as published by the Free Software Foundation; either version | |
18 | * 2 of the License, or (at your option) any later version. | |
19 | */ | |
20 | ||
21 | #include <linux/kernel.h> | |
0b05ac6e | 22 | #include <linux/interrupt.h> |
0332c2d4 | 23 | #include <linux/delay.h> |
62fe91bb | 24 | #include <linux/sched.h> /* for idle_task_exit */ |
0332c2d4 | 25 | #include <linux/cpu.h> |
1cf3d8b3 | 26 | #include <linux/of.h> |
0332c2d4 ME |
27 | #include <asm/prom.h> |
28 | #include <asm/rtas.h> | |
29 | #include <asm/firmware.h> | |
30 | #include <asm/machdep.h> | |
31 | #include <asm/vdso_datapage.h> | |
0b05ac6e | 32 | #include <asm/xics.h> |
212bebb4 DD |
33 | #include <asm/plpar_wrappers.h> |
34 | ||
3aa565f5 | 35 | #include "offline_states.h" |
0332c2d4 ME |
36 | |
37 | /* This version can't take the spinlock, because it never returns */ | |
38 | static struct rtas_args rtas_stop_self_args = { | |
39 | .token = RTAS_UNKNOWN_SERVICE, | |
40 | .nargs = 0, | |
41 | .nret = 1, | |
42 | .rets = &rtas_stop_self_args.args[0], | |
43 | }; | |
44 | ||
3aa565f5 GS |
45 | static DEFINE_PER_CPU(enum cpu_state_vals, preferred_offline_state) = |
46 | CPU_STATE_OFFLINE; | |
47 | static DEFINE_PER_CPU(enum cpu_state_vals, current_state) = CPU_STATE_OFFLINE; | |
48 | ||
49 | static enum cpu_state_vals default_offline_state = CPU_STATE_OFFLINE; | |
50 | ||
51 | static int cede_offline_enabled __read_mostly = 1; | |
52 | ||
53 | /* | |
54 | * Enable/disable cede_offline when available. | |
55 | */ | |
56 | static int __init setup_cede_offline(char *str) | |
57 | { | |
58 | if (!strcmp(str, "off")) | |
59 | cede_offline_enabled = 0; | |
60 | else if (!strcmp(str, "on")) | |
61 | cede_offline_enabled = 1; | |
62 | else | |
63 | return 0; | |
64 | return 1; | |
65 | } | |
66 | ||
67 | __setup("cede_offline=", setup_cede_offline); | |
68 | ||
69 | enum cpu_state_vals get_cpu_current_state(int cpu) | |
70 | { | |
71 | return per_cpu(current_state, cpu); | |
72 | } | |
73 | ||
74 | void set_cpu_current_state(int cpu, enum cpu_state_vals state) | |
75 | { | |
76 | per_cpu(current_state, cpu) = state; | |
77 | } | |
78 | ||
79 | enum cpu_state_vals get_preferred_offline_state(int cpu) | |
80 | { | |
81 | return per_cpu(preferred_offline_state, cpu); | |
82 | } | |
83 | ||
84 | void set_preferred_offline_state(int cpu, enum cpu_state_vals state) | |
85 | { | |
86 | per_cpu(preferred_offline_state, cpu) = state; | |
87 | } | |
88 | ||
89 | void set_default_offline_state(int cpu) | |
90 | { | |
91 | per_cpu(preferred_offline_state, cpu) = default_offline_state; | |
92 | } | |
93 | ||
04da6af9 | 94 | static void rtas_stop_self(void) |
0332c2d4 ME |
95 | { |
96 | struct rtas_args *args = &rtas_stop_self_args; | |
97 | ||
98 | local_irq_disable(); | |
99 | ||
100 | BUG_ON(args->token == RTAS_UNKNOWN_SERVICE); | |
101 | ||
102 | printk("cpu %u (hwid %u) Ready to die...\n", | |
103 | smp_processor_id(), hard_smp_processor_id()); | |
104 | enter_rtas(__pa(args)); | |
105 | ||
106 | panic("Alas, I survived.\n"); | |
107 | } | |
108 | ||
06ba30b6 | 109 | static void pseries_mach_cpu_die(void) |
04da6af9 | 110 | { |
3aa565f5 GS |
111 | unsigned int cpu = smp_processor_id(); |
112 | unsigned int hwcpu = hard_smp_processor_id(); | |
113 | u8 cede_latency_hint = 0; | |
114 | ||
04da6af9 ME |
115 | local_irq_disable(); |
116 | idle_task_exit(); | |
c3e8506c | 117 | xics_teardown_cpu(); |
3aa565f5 GS |
118 | |
119 | if (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) { | |
120 | set_cpu_current_state(cpu, CPU_STATE_INACTIVE); | |
32d8ad4e BK |
121 | if (ppc_md.suspend_disable_cpu) |
122 | ppc_md.suspend_disable_cpu(); | |
123 | ||
3aa565f5 GS |
124 | cede_latency_hint = 2; |
125 | ||
126 | get_lppaca()->idle = 1; | |
f13c13a0 | 127 | if (!lppaca_shared_proc(get_lppaca())) |
3aa565f5 GS |
128 | get_lppaca()->donate_dedicated_cpu = 1; |
129 | ||
3aa565f5 | 130 | while (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) { |
fb912568 LZ |
131 | while (!prep_irq_for_idle()) { |
132 | local_irq_enable(); | |
133 | local_irq_disable(); | |
134 | } | |
135 | ||
3aa565f5 | 136 | extended_cede_processor(cede_latency_hint); |
3aa565f5 GS |
137 | } |
138 | ||
fb912568 LZ |
139 | local_irq_disable(); |
140 | ||
f13c13a0 | 141 | if (!lppaca_shared_proc(get_lppaca())) |
3aa565f5 GS |
142 | get_lppaca()->donate_dedicated_cpu = 0; |
143 | get_lppaca()->idle = 0; | |
3aa565f5 | 144 | |
0212f260 | 145 | if (get_preferred_offline_state(cpu) == CPU_STATE_ONLINE) { |
598c8231 | 146 | unregister_slb_shadow(hwcpu); |
3aa565f5 | 147 | |
fb912568 | 148 | hard_irq_disable(); |
0212f260 VS |
149 | /* |
150 | * Call to start_secondary_resume() will not return. | |
151 | * Kernel stack will be reset and start_secondary() | |
152 | * will be called to continue the online operation. | |
153 | */ | |
154 | start_secondary_resume(); | |
155 | } | |
156 | } | |
3aa565f5 | 157 | |
0212f260 VS |
158 | /* Requested state is CPU_STATE_OFFLINE at this point */ |
159 | WARN_ON(get_preferred_offline_state(cpu) != CPU_STATE_OFFLINE); | |
3aa565f5 | 160 | |
0212f260 | 161 | set_cpu_current_state(cpu, CPU_STATE_OFFLINE); |
598c8231 | 162 | unregister_slb_shadow(hwcpu); |
0212f260 | 163 | rtas_stop_self(); |
3aa565f5 | 164 | |
04da6af9 ME |
165 | /* Should never get here... */ |
166 | BUG(); | |
167 | for(;;); | |
168 | } | |
169 | ||
06ba30b6 | 170 | static int pseries_cpu_disable(void) |
413f7c40 ME |
171 | { |
172 | int cpu = smp_processor_id(); | |
173 | ||
ea0f1cab | 174 | set_cpu_online(cpu, false); |
413f7c40 ME |
175 | vdso_data->processorCount--; |
176 | ||
177 | /*fix boot_cpuid here*/ | |
178 | if (cpu == boot_cpuid) | |
8729faaa | 179 | boot_cpuid = cpumask_any(cpu_online_mask); |
413f7c40 ME |
180 | |
181 | /* FIXME: abstract this to not be platform specific later on */ | |
182 | xics_migrate_irqs_away(); | |
183 | return 0; | |
184 | } | |
185 | ||
3aa565f5 GS |
186 | /* |
187 | * pseries_cpu_die: Wait for the cpu to die. | |
188 | * @cpu: logical processor id of the CPU whose death we're awaiting. | |
189 | * | |
190 | * This function is called from the context of the thread which is performing | |
191 | * the cpu-offline. Here we wait for long enough to allow the cpu in question | |
192 | * to self-destroy so that the cpu-offline thread can send the CPU_DEAD | |
193 | * notifications. | |
194 | * | |
195 | * OTOH, pseries_mach_cpu_die() is called by the @cpu when it wants to | |
196 | * self-destruct. | |
197 | */ | |
06ba30b6 | 198 | static void pseries_cpu_die(unsigned int cpu) |
413f7c40 ME |
199 | { |
200 | int tries; | |
3aa565f5 | 201 | int cpu_status = 1; |
413f7c40 ME |
202 | unsigned int pcpu = get_hard_smp_processor_id(cpu); |
203 | ||
3aa565f5 GS |
204 | if (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) { |
205 | cpu_status = 1; | |
940ce422 | 206 | for (tries = 0; tries < 5000; tries++) { |
3aa565f5 GS |
207 | if (get_cpu_current_state(cpu) == CPU_STATE_INACTIVE) { |
208 | cpu_status = 0; | |
209 | break; | |
210 | } | |
940ce422 | 211 | msleep(1); |
3aa565f5 GS |
212 | } |
213 | } else if (get_preferred_offline_state(cpu) == CPU_STATE_OFFLINE) { | |
214 | ||
215 | for (tries = 0; tries < 25; tries++) { | |
f8b67691 MN |
216 | cpu_status = smp_query_cpu_stopped(pcpu); |
217 | if (cpu_status == QCSS_STOPPED || | |
218 | cpu_status == QCSS_HARDWARE_ERROR) | |
3aa565f5 GS |
219 | break; |
220 | cpu_relax(); | |
221 | } | |
413f7c40 | 222 | } |
3aa565f5 | 223 | |
413f7c40 ME |
224 | if (cpu_status != 0) { |
225 | printk("Querying DEAD? cpu %i (%i) shows %i\n", | |
226 | cpu, pcpu, cpu_status); | |
227 | } | |
228 | ||
25985edc | 229 | /* Isolation and deallocation are definitely done by |
413f7c40 ME |
230 | * drslot_chrp_cpu. If they were not they would be |
231 | * done here. Change isolate state to Isolate and | |
232 | * change allocation-state to Unusable. | |
233 | */ | |
234 | paca[cpu].cpu_start = 0; | |
235 | } | |
236 | ||
237 | /* | |
828a6986 | 238 | * Update cpu_present_mask and paca(s) for a new cpu node. The wrinkle |
413f7c40 ME |
239 | * here is that a cpu device node may represent up to two logical cpus |
240 | * in the SMT case. We must honor the assumption in other code that | |
241 | * the logical ids for sibling SMT threads x and y are adjacent, such | |
242 | * that x^1 == y and y^1 == x. | |
243 | */ | |
06ba30b6 | 244 | static int pseries_add_processor(struct device_node *np) |
413f7c40 ME |
245 | { |
246 | unsigned int cpu; | |
8729faaa | 247 | cpumask_var_t candidate_mask, tmp; |
413f7c40 ME |
248 | int err = -ENOSPC, len, nthreads, i; |
249 | const u32 *intserv; | |
250 | ||
e2eb6392 | 251 | intserv = of_get_property(np, "ibm,ppc-interrupt-server#s", &len); |
413f7c40 ME |
252 | if (!intserv) |
253 | return 0; | |
254 | ||
8729faaa AB |
255 | zalloc_cpumask_var(&candidate_mask, GFP_KERNEL); |
256 | zalloc_cpumask_var(&tmp, GFP_KERNEL); | |
257 | ||
413f7c40 ME |
258 | nthreads = len / sizeof(u32); |
259 | for (i = 0; i < nthreads; i++) | |
8729faaa | 260 | cpumask_set_cpu(i, tmp); |
413f7c40 | 261 | |
86ef5c9a | 262 | cpu_maps_update_begin(); |
413f7c40 | 263 | |
8729faaa | 264 | BUG_ON(!cpumask_subset(cpu_present_mask, cpu_possible_mask)); |
413f7c40 ME |
265 | |
266 | /* Get a bitmap of unoccupied slots. */ | |
8729faaa AB |
267 | cpumask_xor(candidate_mask, cpu_possible_mask, cpu_present_mask); |
268 | if (cpumask_empty(candidate_mask)) { | |
413f7c40 ME |
269 | /* If we get here, it most likely means that NR_CPUS is |
270 | * less than the partition's max processors setting. | |
271 | */ | |
272 | printk(KERN_ERR "Cannot add cpu %s; this system configuration" | |
273 | " supports %d logical cpus.\n", np->full_name, | |
8729faaa | 274 | cpumask_weight(cpu_possible_mask)); |
413f7c40 ME |
275 | goto out_unlock; |
276 | } | |
277 | ||
8729faaa AB |
278 | while (!cpumask_empty(tmp)) |
279 | if (cpumask_subset(tmp, candidate_mask)) | |
413f7c40 ME |
280 | /* Found a range where we can insert the new cpu(s) */ |
281 | break; | |
282 | else | |
8729faaa | 283 | cpumask_shift_left(tmp, tmp, nthreads); |
413f7c40 | 284 | |
8729faaa | 285 | if (cpumask_empty(tmp)) { |
828a6986 | 286 | printk(KERN_ERR "Unable to find space in cpu_present_mask for" |
413f7c40 ME |
287 | " processor %s with %d thread(s)\n", np->name, |
288 | nthreads); | |
289 | goto out_unlock; | |
290 | } | |
291 | ||
8729faaa | 292 | for_each_cpu(cpu, tmp) { |
104699c0 | 293 | BUG_ON(cpu_present(cpu)); |
ea0f1cab | 294 | set_cpu_present(cpu, true); |
413f7c40 ME |
295 | set_hard_smp_processor_id(cpu, *intserv++); |
296 | } | |
297 | err = 0; | |
298 | out_unlock: | |
86ef5c9a | 299 | cpu_maps_update_done(); |
8729faaa AB |
300 | free_cpumask_var(candidate_mask); |
301 | free_cpumask_var(tmp); | |
413f7c40 ME |
302 | return err; |
303 | } | |
304 | ||
305 | /* | |
306 | * Update the present map for a cpu node which is going away, and set | |
307 | * the hard id in the paca(s) to -1 to be consistent with boot time | |
308 | * convention for non-present cpus. | |
309 | */ | |
06ba30b6 | 310 | static void pseries_remove_processor(struct device_node *np) |
413f7c40 ME |
311 | { |
312 | unsigned int cpu; | |
313 | int len, nthreads, i; | |
314 | const u32 *intserv; | |
315 | ||
e2eb6392 | 316 | intserv = of_get_property(np, "ibm,ppc-interrupt-server#s", &len); |
413f7c40 ME |
317 | if (!intserv) |
318 | return; | |
319 | ||
320 | nthreads = len / sizeof(u32); | |
321 | ||
86ef5c9a | 322 | cpu_maps_update_begin(); |
413f7c40 ME |
323 | for (i = 0; i < nthreads; i++) { |
324 | for_each_present_cpu(cpu) { | |
325 | if (get_hard_smp_processor_id(cpu) != intserv[i]) | |
326 | continue; | |
327 | BUG_ON(cpu_online(cpu)); | |
ea0f1cab | 328 | set_cpu_present(cpu, false); |
413f7c40 ME |
329 | set_hard_smp_processor_id(cpu, -1); |
330 | break; | |
331 | } | |
8729faaa | 332 | if (cpu >= nr_cpu_ids) |
413f7c40 ME |
333 | printk(KERN_WARNING "Could not find cpu to remove " |
334 | "with physical id 0x%x\n", intserv[i]); | |
335 | } | |
86ef5c9a | 336 | cpu_maps_update_done(); |
413f7c40 ME |
337 | } |
338 | ||
06ba30b6 ME |
339 | static int pseries_smp_notifier(struct notifier_block *nb, |
340 | unsigned long action, void *node) | |
413f7c40 | 341 | { |
de2780a3 | 342 | int err = 0; |
413f7c40 ME |
343 | |
344 | switch (action) { | |
1cf3d8b3 | 345 | case OF_RECONFIG_ATTACH_NODE: |
de2780a3 | 346 | err = pseries_add_processor(node); |
413f7c40 | 347 | break; |
1cf3d8b3 | 348 | case OF_RECONFIG_DETACH_NODE: |
06ba30b6 | 349 | pseries_remove_processor(node); |
413f7c40 | 350 | break; |
413f7c40 | 351 | } |
de2780a3 | 352 | return notifier_from_errno(err); |
413f7c40 ME |
353 | } |
354 | ||
06ba30b6 ME |
355 | static struct notifier_block pseries_smp_nb = { |
356 | .notifier_call = pseries_smp_notifier, | |
413f7c40 ME |
357 | }; |
358 | ||
3aa565f5 GS |
359 | #define MAX_CEDE_LATENCY_LEVELS 4 |
360 | #define CEDE_LATENCY_PARAM_LENGTH 10 | |
361 | #define CEDE_LATENCY_PARAM_MAX_LENGTH \ | |
362 | (MAX_CEDE_LATENCY_LEVELS * CEDE_LATENCY_PARAM_LENGTH * sizeof(char)) | |
363 | #define CEDE_LATENCY_TOKEN 45 | |
364 | ||
365 | static char cede_parameters[CEDE_LATENCY_PARAM_MAX_LENGTH]; | |
366 | ||
367 | static int parse_cede_parameters(void) | |
368 | { | |
3aa565f5 | 369 | memset(cede_parameters, 0, CEDE_LATENCY_PARAM_MAX_LENGTH); |
20a8ab97 AB |
370 | return rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1, |
371 | NULL, | |
372 | CEDE_LATENCY_TOKEN, | |
373 | __pa(cede_parameters), | |
374 | CEDE_LATENCY_PARAM_MAX_LENGTH); | |
3aa565f5 GS |
375 | } |
376 | ||
0332c2d4 ME |
377 | static int __init pseries_cpu_hotplug_init(void) |
378 | { | |
64f27585 OJ |
379 | struct device_node *np; |
380 | const char *typep; | |
3aa565f5 | 381 | int cpu; |
f8b67691 | 382 | int qcss_tok; |
64f27585 OJ |
383 | |
384 | for_each_node_by_name(np, "interrupt-controller") { | |
385 | typep = of_get_property(np, "compatible", NULL); | |
386 | if (strstr(typep, "open-pic")) { | |
387 | of_node_put(np); | |
388 | ||
389 | printk(KERN_INFO "CPU Hotplug not supported on " | |
390 | "systems using MPIC\n"); | |
391 | return 0; | |
392 | } | |
393 | } | |
394 | ||
0332c2d4 | 395 | rtas_stop_self_args.token = rtas_token("stop-self"); |
674fa677 | 396 | qcss_tok = rtas_token("query-cpu-stopped-state"); |
0332c2d4 | 397 | |
674fa677 ME |
398 | if (rtas_stop_self_args.token == RTAS_UNKNOWN_SERVICE || |
399 | qcss_tok == RTAS_UNKNOWN_SERVICE) { | |
400 | printk(KERN_INFO "CPU Hotplug not supported by firmware " | |
401 | "- disabling.\n"); | |
402 | return 0; | |
403 | } | |
04da6af9 | 404 | |
06ba30b6 ME |
405 | ppc_md.cpu_die = pseries_mach_cpu_die; |
406 | smp_ops->cpu_disable = pseries_cpu_disable; | |
407 | smp_ops->cpu_die = pseries_cpu_die; | |
413f7c40 ME |
408 | |
409 | /* Processors can be added/removed only on LPAR */ | |
3aa565f5 | 410 | if (firmware_has_feature(FW_FEATURE_LPAR)) { |
1cf3d8b3 | 411 | of_reconfig_notifier_register(&pseries_smp_nb); |
3aa565f5 GS |
412 | cpu_maps_update_begin(); |
413 | if (cede_offline_enabled && parse_cede_parameters() == 0) { | |
414 | default_offline_state = CPU_STATE_INACTIVE; | |
415 | for_each_online_cpu(cpu) | |
416 | set_default_offline_state(cpu); | |
417 | } | |
418 | cpu_maps_update_done(); | |
419 | } | |
413f7c40 | 420 | |
0332c2d4 ME |
421 | return 0; |
422 | } | |
423 | arch_initcall(pseries_cpu_hotplug_init); |