Commit | Line | Data |
---|---|---|
2874c5fd | 1 | // SPDX-License-Identifier: GPL-2.0-or-later |
e2186023 ME |
2 | /* |
3 | * Copyright 2013, Michael (Ellerman|Neuling), IBM Corporation. | |
e2186023 ME |
4 | */ |
5 | ||
6 | #define pr_fmt(fmt) "powernv: " fmt | |
7 | ||
8 | #include <linux/kernel.h> | |
9 | #include <linux/cpu.h> | |
10 | #include <linux/cpumask.h> | |
11 | #include <linux/device.h> | |
12 | #include <linux/gfp.h> | |
13 | #include <linux/smp.h> | |
14 | #include <linux/stop_machine.h> | |
15 | ||
16 | #include <asm/cputhreads.h> | |
2201f994 | 17 | #include <asm/cpuidle.h> |
e2186023 ME |
18 | #include <asm/kvm_ppc.h> |
19 | #include <asm/machdep.h> | |
20 | #include <asm/opal.h> | |
21 | #include <asm/smp.h> | |
22 | ||
4c8c3c7f VS |
23 | #include <trace/events/ipi.h> |
24 | ||
e2186023 | 25 | #include "subcore.h" |
1217d34b | 26 | #include "powernv.h" |
e2186023 ME |
27 | |
28 | ||
29 | /* | |
30 | * Split/unsplit procedure: | |
31 | * | |
32 | * A core can be in one of three states, unsplit, 2-way split, and 4-way split. | |
33 | * | |
34 | * The mapping to subcores_per_core is simple: | |
35 | * | |
36 | * State | subcores_per_core | |
37 | * ------------|------------------ | |
38 | * Unsplit | 1 | |
39 | * 2-way split | 2 | |
40 | * 4-way split | 4 | |
41 | * | |
42 | * The core is split along thread boundaries, the mapping between subcores and | |
43 | * threads is as follows: | |
44 | * | |
45 | * Unsplit: | |
46 | * ---------------------------- | |
47 | * Subcore | 0 | | |
48 | * ---------------------------- | |
49 | * Thread | 0 1 2 3 4 5 6 7 | | |
50 | * ---------------------------- | |
51 | * | |
52 | * 2-way split: | |
53 | * ------------------------------------- | |
54 | * Subcore | 0 | 1 | | |
55 | * ------------------------------------- | |
56 | * Thread | 0 1 2 3 | 4 5 6 7 | | |
57 | * ------------------------------------- | |
58 | * | |
59 | * 4-way split: | |
60 | * ----------------------------------------- | |
61 | * Subcore | 0 | 1 | 2 | 3 | | |
62 | * ----------------------------------------- | |
63 | * Thread | 0 1 | 2 3 | 4 5 | 6 7 | | |
64 | * ----------------------------------------- | |
65 | * | |
66 | * | |
67 | * Transitions | |
68 | * ----------- | |
69 | * | |
70 | * It is not possible to transition between either of the split states, the | |
71 | * core must first be unsplit. The legal transitions are: | |
72 | * | |
73 | * ----------- --------------- | |
74 | * | | <----> | 2-way split | | |
75 | * | | --------------- | |
76 | * | Unsplit | | |
77 | * | | --------------- | |
78 | * | | <----> | 4-way split | | |
79 | * ----------- --------------- | |
80 | * | |
81 | * Unsplitting | |
82 | * ----------- | |
83 | * | |
84 | * Unsplitting is the simpler procedure. It requires thread 0 to request the | |
85 | * unsplit while all other threads NAP. | |
86 | * | |
87 | * Thread 0 clears HID0_POWER8_DYNLPARDIS (Dynamic LPAR Disable). This tells | |
88 | * the hardware that if all threads except 0 are napping, the hardware should | |
89 | * unsplit the core. | |
90 | * | |
91 | * Non-zero threads are sent to a NAP loop, they don't exit the loop until they | |
92 | * see the core unsplit. | |
93 | * | |
94 | * Core 0 spins waiting for the hardware to see all the other threads napping | |
95 | * and perform the unsplit. | |
96 | * | |
97 | * Once thread 0 sees the unsplit, it IPIs the secondary threads to wake them | |
98 | * out of NAP. They will then see the core unsplit and exit the NAP loop. | |
99 | * | |
100 | * Splitting | |
101 | * --------- | |
102 | * | |
103 | * The basic splitting procedure is fairly straight forward. However it is | |
104 | * complicated by the fact that after the split occurs, the newly created | |
105 | * subcores are not in a fully initialised state. | |
106 | * | |
107 | * Most notably the subcores do not have the correct value for SDR1, which | |
108 | * means they must not be running in virtual mode when the split occurs. The | |
109 | * subcores have separate timebases SPRs but these are pre-synchronised by | |
110 | * opal. | |
111 | * | |
112 | * To begin with secondary threads are sent to an assembly routine. There they | |
113 | * switch to real mode, so they are immune to the uninitialised SDR1 value. | |
114 | * Once in real mode they indicate that they are in real mode, and spin waiting | |
115 | * to see the core split. | |
116 | * | |
117 | * Thread 0 waits to see that all secondaries are in real mode, and then begins | |
118 | * the splitting procedure. It firstly sets HID0_POWER8_DYNLPARDIS, which | |
119 | * prevents the hardware from unsplitting. Then it sets the appropriate HID bit | |
120 | * to request the split, and spins waiting to see that the split has happened. | |
121 | * | |
122 | * Concurrently the secondaries will notice the split. When they do they set up | |
123 | * their SPRs, notably SDR1, and then they can return to virtual mode and exit | |
124 | * the procedure. | |
125 | */ | |
126 | ||
127 | /* Initialised at boot by subcore_init() */ | |
128 | static int subcores_per_core; | |
129 | ||
130 | /* | |
131 | * Used to communicate to offline cpus that we want them to pop out of the | |
132 | * offline loop and do a split or unsplit. | |
133 | * | |
134 | * 0 - no split happening | |
135 | * 1 - unsplit in progress | |
136 | * 2 - split to 2 in progress | |
137 | * 4 - split to 4 in progress | |
138 | */ | |
139 | static int new_split_mode; | |
140 | ||
141 | static cpumask_var_t cpu_offline_mask; | |
142 | ||
143 | struct split_state { | |
144 | u8 step; | |
145 | u8 master; | |
146 | }; | |
147 | ||
148 | static DEFINE_PER_CPU(struct split_state, split_state); | |
149 | ||
150 | static void wait_for_sync_step(int step) | |
151 | { | |
152 | int i, cpu = smp_processor_id(); | |
153 | ||
154 | for (i = cpu + 1; i < cpu + threads_per_core; i++) | |
155 | while(per_cpu(split_state, i).step < step) | |
156 | barrier(); | |
157 | ||
158 | /* Order the wait loop vs any subsequent loads/stores. */ | |
159 | mb(); | |
160 | } | |
161 | ||
77b54e9f SP |
162 | static void update_hid_in_slw(u64 hid0) |
163 | { | |
164 | u64 idle_states = pnv_get_supported_cpuidle_states(); | |
165 | ||
166 | if (idle_states & OPAL_PM_WINKLE_ENABLED) { | |
167 | /* OPAL call to patch slw with the new HID0 value */ | |
168 | u64 cpu_pir = hard_smp_processor_id(); | |
169 | ||
170 | opal_slw_set_reg(cpu_pir, SPRN_HID0, hid0); | |
171 | } | |
172 | } | |
173 | ||
ab3aab29 CL |
174 | static inline void update_power8_hid0(unsigned long hid0) |
175 | { | |
176 | /* | |
177 | * The HID0 update on Power8 should at the very least be | |
178 | * preceded by a SYNC instruction followed by an ISYNC | |
179 | * instruction | |
180 | */ | |
181 | asm volatile("sync; mtspr %0,%1; isync":: "i"(SPRN_HID0), "r"(hid0)); | |
182 | } | |
183 | ||
e2186023 ME |
184 | static void unsplit_core(void) |
185 | { | |
186 | u64 hid0, mask; | |
187 | int i, cpu; | |
188 | ||
189 | mask = HID0_POWER8_2LPARMODE | HID0_POWER8_4LPARMODE; | |
190 | ||
191 | cpu = smp_processor_id(); | |
192 | if (cpu_thread_in_core(cpu) != 0) { | |
193 | while (mfspr(SPRN_HID0) & mask) | |
10d91611 | 194 | power7_idle_type(PNV_THREAD_NAP); |
e2186023 ME |
195 | |
196 | per_cpu(split_state, cpu).step = SYNC_STEP_UNSPLIT; | |
197 | return; | |
198 | } | |
199 | ||
200 | hid0 = mfspr(SPRN_HID0); | |
201 | hid0 &= ~HID0_POWER8_DYNLPARDIS; | |
e63dbd16 | 202 | update_power8_hid0(hid0); |
77b54e9f | 203 | update_hid_in_slw(hid0); |
e2186023 ME |
204 | |
205 | while (mfspr(SPRN_HID0) & mask) | |
206 | cpu_relax(); | |
207 | ||
208 | /* Wake secondaries out of NAP */ | |
209 | for (i = cpu + 1; i < cpu + threads_per_core; i++) | |
210 | smp_send_reschedule(i); | |
211 | ||
212 | wait_for_sync_step(SYNC_STEP_UNSPLIT); | |
213 | } | |
214 | ||
215 | static void split_core(int new_mode) | |
216 | { | |
217 | struct { u64 value; u64 mask; } split_parms[2] = { | |
218 | { HID0_POWER8_1TO2LPAR, HID0_POWER8_2LPARMODE }, | |
219 | { HID0_POWER8_1TO4LPAR, HID0_POWER8_4LPARMODE } | |
220 | }; | |
221 | int i, cpu; | |
222 | u64 hid0; | |
223 | ||
224 | /* Convert new_mode (2 or 4) into an index into our parms array */ | |
225 | i = (new_mode >> 1) - 1; | |
226 | BUG_ON(i < 0 || i > 1); | |
227 | ||
228 | cpu = smp_processor_id(); | |
229 | if (cpu_thread_in_core(cpu) != 0) { | |
230 | split_core_secondary_loop(&per_cpu(split_state, cpu).step); | |
231 | return; | |
232 | } | |
233 | ||
234 | wait_for_sync_step(SYNC_STEP_REAL_MODE); | |
235 | ||
236 | /* Write new mode */ | |
237 | hid0 = mfspr(SPRN_HID0); | |
238 | hid0 |= HID0_POWER8_DYNLPARDIS | split_parms[i].value; | |
e63dbd16 | 239 | update_power8_hid0(hid0); |
77b54e9f | 240 | update_hid_in_slw(hid0); |
e2186023 ME |
241 | |
242 | /* Wait for it to happen */ | |
243 | while (!(mfspr(SPRN_HID0) & split_parms[i].mask)) | |
244 | cpu_relax(); | |
245 | } | |
246 | ||
247 | static void cpu_do_split(int new_mode) | |
248 | { | |
249 | /* | |
250 | * At boot subcores_per_core will be 0, so we will always unsplit at | |
251 | * boot. In the usual case where the core is already unsplit it's a | |
252 | * nop, and this just ensures the kernel's notion of the mode is | |
253 | * consistent with the hardware. | |
254 | */ | |
255 | if (subcores_per_core != 1) | |
256 | unsplit_core(); | |
257 | ||
258 | if (new_mode != 1) | |
259 | split_core(new_mode); | |
260 | ||
261 | mb(); | |
262 | per_cpu(split_state, smp_processor_id()).step = SYNC_STEP_FINISHED; | |
263 | } | |
264 | ||
265 | bool cpu_core_split_required(void) | |
266 | { | |
267 | smp_rmb(); | |
268 | ||
269 | if (!new_split_mode) | |
270 | return false; | |
271 | ||
272 | cpu_do_split(new_split_mode); | |
273 | ||
274 | return true; | |
275 | } | |
276 | ||
77b54e9f SP |
277 | void update_subcore_sibling_mask(void) |
278 | { | |
279 | int cpu; | |
280 | /* | |
281 | * sibling mask for the first cpu. Left shift this by required bits | |
282 | * to get sibling mask for the rest of the cpus. | |
283 | */ | |
284 | int sibling_mask_first_cpu = (1 << threads_per_subcore) - 1; | |
285 | ||
286 | for_each_possible_cpu(cpu) { | |
287 | int tid = cpu_thread_in_core(cpu); | |
288 | int offset = (tid / threads_per_subcore) * threads_per_subcore; | |
289 | int mask = sibling_mask_first_cpu << offset; | |
290 | ||
d2e60075 | 291 | paca_ptrs[cpu]->subcore_sibling_mask = mask; |
77b54e9f SP |
292 | |
293 | } | |
294 | } | |
295 | ||
e2186023 ME |
296 | static int cpu_update_split_mode(void *data) |
297 | { | |
298 | int cpu, new_mode = *(int *)data; | |
299 | ||
300 | if (this_cpu_ptr(&split_state)->master) { | |
301 | new_split_mode = new_mode; | |
302 | smp_wmb(); | |
303 | ||
304 | cpumask_andnot(cpu_offline_mask, cpu_present_mask, | |
305 | cpu_online_mask); | |
306 | ||
307 | /* This should work even though the cpu is offline */ | |
308 | for_each_cpu(cpu, cpu_offline_mask) | |
309 | smp_send_reschedule(cpu); | |
310 | } | |
311 | ||
312 | cpu_do_split(new_mode); | |
313 | ||
314 | if (this_cpu_ptr(&split_state)->master) { | |
315 | /* Wait for all cpus to finish before we touch subcores_per_core */ | |
316 | for_each_present_cpu(cpu) { | |
317 | if (cpu >= setup_max_cpus) | |
318 | break; | |
319 | ||
320 | while(per_cpu(split_state, cpu).step < SYNC_STEP_FINISHED) | |
321 | barrier(); | |
322 | } | |
323 | ||
324 | new_split_mode = 0; | |
325 | ||
326 | /* Make the new mode public */ | |
327 | subcores_per_core = new_mode; | |
328 | threads_per_subcore = threads_per_core / subcores_per_core; | |
77b54e9f | 329 | update_subcore_sibling_mask(); |
e2186023 ME |
330 | |
331 | /* Make sure the new mode is written before we exit */ | |
332 | mb(); | |
333 | } | |
334 | ||
335 | return 0; | |
336 | } | |
337 | ||
338 | static int set_subcores_per_core(int new_mode) | |
339 | { | |
340 | struct split_state *state; | |
341 | int cpu; | |
342 | ||
343 | if (kvm_hv_mode_active()) { | |
344 | pr_err("Unable to change split core mode while KVM active.\n"); | |
345 | return -EBUSY; | |
346 | } | |
347 | ||
348 | /* | |
349 | * We are only called at boot, or from the sysfs write. If that ever | |
350 | * changes we'll need a lock here. | |
351 | */ | |
352 | BUG_ON(new_mode < 1 || new_mode > 4 || new_mode == 3); | |
353 | ||
354 | for_each_present_cpu(cpu) { | |
355 | state = &per_cpu(split_state, cpu); | |
356 | state->step = SYNC_STEP_INITIAL; | |
357 | state->master = 0; | |
358 | } | |
359 | ||
f9a69931 | 360 | cpus_read_lock(); |
e2186023 ME |
361 | |
362 | /* This cpu will update the globals before exiting stop machine */ | |
363 | this_cpu_ptr(&split_state)->master = 1; | |
364 | ||
365 | /* Ensure state is consistent before we call the other cpus */ | |
366 | mb(); | |
367 | ||
f9a69931 SAS |
368 | stop_machine_cpuslocked(cpu_update_split_mode, &new_mode, |
369 | cpu_online_mask); | |
e2186023 | 370 | |
f9a69931 | 371 | cpus_read_unlock(); |
e2186023 ME |
372 | |
373 | return 0; | |
374 | } | |
375 | ||
376 | static ssize_t __used store_subcores_per_core(struct device *dev, | |
377 | struct device_attribute *attr, const char *buf, | |
378 | size_t count) | |
379 | { | |
380 | unsigned long val; | |
381 | int rc; | |
382 | ||
383 | /* We are serialised by the attribute lock */ | |
384 | ||
385 | rc = sscanf(buf, "%lx", &val); | |
386 | if (rc != 1) | |
387 | return -EINVAL; | |
388 | ||
389 | switch (val) { | |
390 | case 1: | |
391 | case 2: | |
392 | case 4: | |
393 | if (subcores_per_core == val) | |
394 | /* Nothing to do */ | |
395 | goto out; | |
396 | break; | |
397 | default: | |
398 | return -EINVAL; | |
399 | } | |
400 | ||
401 | rc = set_subcores_per_core(val); | |
402 | if (rc) | |
403 | return rc; | |
404 | ||
405 | out: | |
406 | return count; | |
407 | } | |
408 | ||
409 | static ssize_t show_subcores_per_core(struct device *dev, | |
410 | struct device_attribute *attr, char *buf) | |
411 | { | |
412 | return sprintf(buf, "%x\n", subcores_per_core); | |
413 | } | |
414 | ||
415 | static DEVICE_ATTR(subcores_per_core, 0644, | |
416 | show_subcores_per_core, store_subcores_per_core); | |
417 | ||
418 | static int subcore_init(void) | |
419 | { | |
3bf88c42 | 420 | struct device *dev_root; |
0e5e7f5e | 421 | unsigned pvr_ver; |
3bf88c42 | 422 | int rc = 0; |
0e5e7f5e ME |
423 | |
424 | pvr_ver = PVR_VER(mfspr(SPRN_PVR)); | |
425 | ||
426 | if (pvr_ver != PVR_POWER8 && | |
427 | pvr_ver != PVR_POWER8E && | |
e12d8e26 ZK |
428 | pvr_ver != PVR_POWER8NVL && |
429 | pvr_ver != PVR_HX_C2000) | |
e2186023 ME |
430 | return 0; |
431 | ||
432 | /* | |
433 | * We need all threads in a core to be present to split/unsplit so | |
434 | * continue only if max_cpus are aligned to threads_per_core. | |
435 | */ | |
436 | if (setup_max_cpus % threads_per_core) | |
437 | return 0; | |
438 | ||
439 | BUG_ON(!alloc_cpumask_var(&cpu_offline_mask, GFP_KERNEL)); | |
440 | ||
441 | set_subcores_per_core(1); | |
442 | ||
3bf88c42 GKH |
443 | dev_root = bus_get_dev_root(&cpu_subsys); |
444 | if (dev_root) { | |
445 | rc = device_create_file(dev_root, &dev_attr_subcores_per_core); | |
446 | put_device(dev_root); | |
447 | } | |
448 | return rc; | |
e2186023 ME |
449 | } |
450 | machine_device_initcall(powernv, subcore_init); |