Commit | Line | Data |
---|---|---|
e2186023 ME |
1 | /* |
2 | * Copyright 2013, Michael (Ellerman|Neuling), IBM Corporation. | |
3 | * | |
4 | * This program is free software; you can redistribute it and/or | |
5 | * modify it under the terms of the GNU General Public License | |
6 | * as published by the Free Software Foundation; either version | |
7 | * 2 of the License, or (at your option) any later version. | |
8 | */ | |
9 | ||
10 | #define pr_fmt(fmt) "powernv: " fmt | |
11 | ||
12 | #include <linux/kernel.h> | |
13 | #include <linux/cpu.h> | |
14 | #include <linux/cpumask.h> | |
15 | #include <linux/device.h> | |
16 | #include <linux/gfp.h> | |
17 | #include <linux/smp.h> | |
18 | #include <linux/stop_machine.h> | |
19 | ||
20 | #include <asm/cputhreads.h> | |
21 | #include <asm/kvm_ppc.h> | |
22 | #include <asm/machdep.h> | |
23 | #include <asm/opal.h> | |
24 | #include <asm/smp.h> | |
25 | ||
26 | #include "subcore.h" | |
1217d34b | 27 | #include "powernv.h" |
e2186023 ME |
28 | |
29 | ||
30 | /* | |
31 | * Split/unsplit procedure: | |
32 | * | |
33 | * A core can be in one of three states, unsplit, 2-way split, and 4-way split. | |
34 | * | |
35 | * The mapping to subcores_per_core is simple: | |
36 | * | |
37 | * State | subcores_per_core | |
38 | * ------------|------------------ | |
39 | * Unsplit | 1 | |
40 | * 2-way split | 2 | |
41 | * 4-way split | 4 | |
42 | * | |
43 | * The core is split along thread boundaries, the mapping between subcores and | |
44 | * threads is as follows: | |
45 | * | |
46 | * Unsplit: | |
47 | * ---------------------------- | |
48 | * Subcore | 0 | | |
49 | * ---------------------------- | |
50 | * Thread | 0 1 2 3 4 5 6 7 | | |
51 | * ---------------------------- | |
52 | * | |
53 | * 2-way split: | |
54 | * ------------------------------------- | |
55 | * Subcore | 0 | 1 | | |
56 | * ------------------------------------- | |
57 | * Thread | 0 1 2 3 | 4 5 6 7 | | |
58 | * ------------------------------------- | |
59 | * | |
60 | * 4-way split: | |
61 | * ----------------------------------------- | |
62 | * Subcore | 0 | 1 | 2 | 3 | | |
63 | * ----------------------------------------- | |
64 | * Thread | 0 1 | 2 3 | 4 5 | 6 7 | | |
65 | * ----------------------------------------- | |
66 | * | |
67 | * | |
68 | * Transitions | |
69 | * ----------- | |
70 | * | |
71 | * It is not possible to transition between either of the split states, the | |
72 | * core must first be unsplit. The legal transitions are: | |
73 | * | |
74 | * ----------- --------------- | |
75 | * | | <----> | 2-way split | | |
76 | * | | --------------- | |
77 | * | Unsplit | | |
78 | * | | --------------- | |
79 | * | | <----> | 4-way split | | |
80 | * ----------- --------------- | |
81 | * | |
82 | * Unsplitting | |
83 | * ----------- | |
84 | * | |
85 | * Unsplitting is the simpler procedure. It requires thread 0 to request the | |
86 | * unsplit while all other threads NAP. | |
87 | * | |
88 | * Thread 0 clears HID0_POWER8_DYNLPARDIS (Dynamic LPAR Disable). This tells | |
89 | * the hardware that if all threads except 0 are napping, the hardware should | |
90 | * unsplit the core. | |
91 | * | |
92 | * Non-zero threads are sent to a NAP loop, they don't exit the loop until they | |
93 | * see the core unsplit. | |
94 | * | |
95 | * Core 0 spins waiting for the hardware to see all the other threads napping | |
96 | * and perform the unsplit. | |
97 | * | |
98 | * Once thread 0 sees the unsplit, it IPIs the secondary threads to wake them | |
99 | * out of NAP. They will then see the core unsplit and exit the NAP loop. | |
100 | * | |
101 | * Splitting | |
102 | * --------- | |
103 | * | |
104 | * The basic splitting procedure is fairly straight forward. However it is | |
105 | * complicated by the fact that after the split occurs, the newly created | |
106 | * subcores are not in a fully initialised state. | |
107 | * | |
108 | * Most notably the subcores do not have the correct value for SDR1, which | |
109 | * means they must not be running in virtual mode when the split occurs. The | |
110 | * subcores have separate timebases SPRs but these are pre-synchronised by | |
111 | * opal. | |
112 | * | |
113 | * To begin with secondary threads are sent to an assembly routine. There they | |
114 | * switch to real mode, so they are immune to the uninitialised SDR1 value. | |
115 | * Once in real mode they indicate that they are in real mode, and spin waiting | |
116 | * to see the core split. | |
117 | * | |
118 | * Thread 0 waits to see that all secondaries are in real mode, and then begins | |
119 | * the splitting procedure. It firstly sets HID0_POWER8_DYNLPARDIS, which | |
120 | * prevents the hardware from unsplitting. Then it sets the appropriate HID bit | |
121 | * to request the split, and spins waiting to see that the split has happened. | |
122 | * | |
123 | * Concurrently the secondaries will notice the split. When they do they set up | |
124 | * their SPRs, notably SDR1, and then they can return to virtual mode and exit | |
125 | * the procedure. | |
126 | */ | |
127 | ||
128 | /* Initialised at boot by subcore_init() */ | |
129 | static int subcores_per_core; | |
130 | ||
131 | /* | |
132 | * Used to communicate to offline cpus that we want them to pop out of the | |
133 | * offline loop and do a split or unsplit. | |
134 | * | |
135 | * 0 - no split happening | |
136 | * 1 - unsplit in progress | |
137 | * 2 - split to 2 in progress | |
138 | * 4 - split to 4 in progress | |
139 | */ | |
140 | static int new_split_mode; | |
141 | ||
142 | static cpumask_var_t cpu_offline_mask; | |
143 | ||
144 | struct split_state { | |
145 | u8 step; | |
146 | u8 master; | |
147 | }; | |
148 | ||
149 | static DEFINE_PER_CPU(struct split_state, split_state); | |
150 | ||
151 | static void wait_for_sync_step(int step) | |
152 | { | |
153 | int i, cpu = smp_processor_id(); | |
154 | ||
155 | for (i = cpu + 1; i < cpu + threads_per_core; i++) | |
156 | while(per_cpu(split_state, i).step < step) | |
157 | barrier(); | |
158 | ||
159 | /* Order the wait loop vs any subsequent loads/stores. */ | |
160 | mb(); | |
161 | } | |
162 | ||
77b54e9f SP |
163 | static void update_hid_in_slw(u64 hid0) |
164 | { | |
165 | u64 idle_states = pnv_get_supported_cpuidle_states(); | |
166 | ||
167 | if (idle_states & OPAL_PM_WINKLE_ENABLED) { | |
168 | /* OPAL call to patch slw with the new HID0 value */ | |
169 | u64 cpu_pir = hard_smp_processor_id(); | |
170 | ||
171 | opal_slw_set_reg(cpu_pir, SPRN_HID0, hid0); | |
172 | } | |
173 | } | |
174 | ||
e2186023 ME |
175 | static void unsplit_core(void) |
176 | { | |
177 | u64 hid0, mask; | |
178 | int i, cpu; | |
179 | ||
180 | mask = HID0_POWER8_2LPARMODE | HID0_POWER8_4LPARMODE; | |
181 | ||
182 | cpu = smp_processor_id(); | |
183 | if (cpu_thread_in_core(cpu) != 0) { | |
184 | while (mfspr(SPRN_HID0) & mask) | |
185 | power7_nap(0); | |
186 | ||
187 | per_cpu(split_state, cpu).step = SYNC_STEP_UNSPLIT; | |
188 | return; | |
189 | } | |
190 | ||
191 | hid0 = mfspr(SPRN_HID0); | |
192 | hid0 &= ~HID0_POWER8_DYNLPARDIS; | |
e63dbd16 | 193 | update_power8_hid0(hid0); |
77b54e9f | 194 | update_hid_in_slw(hid0); |
e2186023 ME |
195 | |
196 | while (mfspr(SPRN_HID0) & mask) | |
197 | cpu_relax(); | |
198 | ||
199 | /* Wake secondaries out of NAP */ | |
200 | for (i = cpu + 1; i < cpu + threads_per_core; i++) | |
201 | smp_send_reschedule(i); | |
202 | ||
203 | wait_for_sync_step(SYNC_STEP_UNSPLIT); | |
204 | } | |
205 | ||
206 | static void split_core(int new_mode) | |
207 | { | |
208 | struct { u64 value; u64 mask; } split_parms[2] = { | |
209 | { HID0_POWER8_1TO2LPAR, HID0_POWER8_2LPARMODE }, | |
210 | { HID0_POWER8_1TO4LPAR, HID0_POWER8_4LPARMODE } | |
211 | }; | |
212 | int i, cpu; | |
213 | u64 hid0; | |
214 | ||
215 | /* Convert new_mode (2 or 4) into an index into our parms array */ | |
216 | i = (new_mode >> 1) - 1; | |
217 | BUG_ON(i < 0 || i > 1); | |
218 | ||
219 | cpu = smp_processor_id(); | |
220 | if (cpu_thread_in_core(cpu) != 0) { | |
221 | split_core_secondary_loop(&per_cpu(split_state, cpu).step); | |
222 | return; | |
223 | } | |
224 | ||
225 | wait_for_sync_step(SYNC_STEP_REAL_MODE); | |
226 | ||
227 | /* Write new mode */ | |
228 | hid0 = mfspr(SPRN_HID0); | |
229 | hid0 |= HID0_POWER8_DYNLPARDIS | split_parms[i].value; | |
e63dbd16 | 230 | update_power8_hid0(hid0); |
77b54e9f | 231 | update_hid_in_slw(hid0); |
e2186023 ME |
232 | |
233 | /* Wait for it to happen */ | |
234 | while (!(mfspr(SPRN_HID0) & split_parms[i].mask)) | |
235 | cpu_relax(); | |
236 | } | |
237 | ||
238 | static void cpu_do_split(int new_mode) | |
239 | { | |
240 | /* | |
241 | * At boot subcores_per_core will be 0, so we will always unsplit at | |
242 | * boot. In the usual case where the core is already unsplit it's a | |
243 | * nop, and this just ensures the kernel's notion of the mode is | |
244 | * consistent with the hardware. | |
245 | */ | |
246 | if (subcores_per_core != 1) | |
247 | unsplit_core(); | |
248 | ||
249 | if (new_mode != 1) | |
250 | split_core(new_mode); | |
251 | ||
252 | mb(); | |
253 | per_cpu(split_state, smp_processor_id()).step = SYNC_STEP_FINISHED; | |
254 | } | |
255 | ||
256 | bool cpu_core_split_required(void) | |
257 | { | |
258 | smp_rmb(); | |
259 | ||
260 | if (!new_split_mode) | |
261 | return false; | |
262 | ||
263 | cpu_do_split(new_split_mode); | |
264 | ||
265 | return true; | |
266 | } | |
267 | ||
77b54e9f SP |
268 | void update_subcore_sibling_mask(void) |
269 | { | |
270 | int cpu; | |
271 | /* | |
272 | * sibling mask for the first cpu. Left shift this by required bits | |
273 | * to get sibling mask for the rest of the cpus. | |
274 | */ | |
275 | int sibling_mask_first_cpu = (1 << threads_per_subcore) - 1; | |
276 | ||
277 | for_each_possible_cpu(cpu) { | |
278 | int tid = cpu_thread_in_core(cpu); | |
279 | int offset = (tid / threads_per_subcore) * threads_per_subcore; | |
280 | int mask = sibling_mask_first_cpu << offset; | |
281 | ||
282 | paca[cpu].subcore_sibling_mask = mask; | |
283 | ||
284 | } | |
285 | } | |
286 | ||
e2186023 ME |
287 | static int cpu_update_split_mode(void *data) |
288 | { | |
289 | int cpu, new_mode = *(int *)data; | |
290 | ||
291 | if (this_cpu_ptr(&split_state)->master) { | |
292 | new_split_mode = new_mode; | |
293 | smp_wmb(); | |
294 | ||
295 | cpumask_andnot(cpu_offline_mask, cpu_present_mask, | |
296 | cpu_online_mask); | |
297 | ||
298 | /* This should work even though the cpu is offline */ | |
299 | for_each_cpu(cpu, cpu_offline_mask) | |
300 | smp_send_reschedule(cpu); | |
301 | } | |
302 | ||
303 | cpu_do_split(new_mode); | |
304 | ||
305 | if (this_cpu_ptr(&split_state)->master) { | |
306 | /* Wait for all cpus to finish before we touch subcores_per_core */ | |
307 | for_each_present_cpu(cpu) { | |
308 | if (cpu >= setup_max_cpus) | |
309 | break; | |
310 | ||
311 | while(per_cpu(split_state, cpu).step < SYNC_STEP_FINISHED) | |
312 | barrier(); | |
313 | } | |
314 | ||
315 | new_split_mode = 0; | |
316 | ||
317 | /* Make the new mode public */ | |
318 | subcores_per_core = new_mode; | |
319 | threads_per_subcore = threads_per_core / subcores_per_core; | |
77b54e9f | 320 | update_subcore_sibling_mask(); |
e2186023 ME |
321 | |
322 | /* Make sure the new mode is written before we exit */ | |
323 | mb(); | |
324 | } | |
325 | ||
326 | return 0; | |
327 | } | |
328 | ||
329 | static int set_subcores_per_core(int new_mode) | |
330 | { | |
331 | struct split_state *state; | |
332 | int cpu; | |
333 | ||
334 | if (kvm_hv_mode_active()) { | |
335 | pr_err("Unable to change split core mode while KVM active.\n"); | |
336 | return -EBUSY; | |
337 | } | |
338 | ||
339 | /* | |
340 | * We are only called at boot, or from the sysfs write. If that ever | |
341 | * changes we'll need a lock here. | |
342 | */ | |
343 | BUG_ON(new_mode < 1 || new_mode > 4 || new_mode == 3); | |
344 | ||
345 | for_each_present_cpu(cpu) { | |
346 | state = &per_cpu(split_state, cpu); | |
347 | state->step = SYNC_STEP_INITIAL; | |
348 | state->master = 0; | |
349 | } | |
350 | ||
f9a69931 | 351 | cpus_read_lock(); |
e2186023 ME |
352 | |
353 | /* This cpu will update the globals before exiting stop machine */ | |
354 | this_cpu_ptr(&split_state)->master = 1; | |
355 | ||
356 | /* Ensure state is consistent before we call the other cpus */ | |
357 | mb(); | |
358 | ||
f9a69931 SAS |
359 | stop_machine_cpuslocked(cpu_update_split_mode, &new_mode, |
360 | cpu_online_mask); | |
e2186023 | 361 | |
f9a69931 | 362 | cpus_read_unlock(); |
e2186023 ME |
363 | |
364 | return 0; | |
365 | } | |
366 | ||
367 | static ssize_t __used store_subcores_per_core(struct device *dev, | |
368 | struct device_attribute *attr, const char *buf, | |
369 | size_t count) | |
370 | { | |
371 | unsigned long val; | |
372 | int rc; | |
373 | ||
374 | /* We are serialised by the attribute lock */ | |
375 | ||
376 | rc = sscanf(buf, "%lx", &val); | |
377 | if (rc != 1) | |
378 | return -EINVAL; | |
379 | ||
380 | switch (val) { | |
381 | case 1: | |
382 | case 2: | |
383 | case 4: | |
384 | if (subcores_per_core == val) | |
385 | /* Nothing to do */ | |
386 | goto out; | |
387 | break; | |
388 | default: | |
389 | return -EINVAL; | |
390 | } | |
391 | ||
392 | rc = set_subcores_per_core(val); | |
393 | if (rc) | |
394 | return rc; | |
395 | ||
396 | out: | |
397 | return count; | |
398 | } | |
399 | ||
400 | static ssize_t show_subcores_per_core(struct device *dev, | |
401 | struct device_attribute *attr, char *buf) | |
402 | { | |
403 | return sprintf(buf, "%x\n", subcores_per_core); | |
404 | } | |
405 | ||
406 | static DEVICE_ATTR(subcores_per_core, 0644, | |
407 | show_subcores_per_core, store_subcores_per_core); | |
408 | ||
409 | static int subcore_init(void) | |
410 | { | |
ce5732a2 | 411 | if (!cpu_has_feature(CPU_FTR_SUBCORE)) |
e2186023 ME |
412 | return 0; |
413 | ||
414 | /* | |
415 | * We need all threads in a core to be present to split/unsplit so | |
416 | * continue only if max_cpus are aligned to threads_per_core. | |
417 | */ | |
418 | if (setup_max_cpus % threads_per_core) | |
419 | return 0; | |
420 | ||
421 | BUG_ON(!alloc_cpumask_var(&cpu_offline_mask, GFP_KERNEL)); | |
422 | ||
423 | set_subcores_per_core(1); | |
424 | ||
425 | return device_create_file(cpu_subsys.dev_root, | |
426 | &dev_attr_subcores_per_core); | |
427 | } | |
428 | machine_device_initcall(powernv, subcore_init); |