Commit | Line | Data |
---|---|---|
6ee97d35 | 1 | // SPDX-License-Identifier: GPL-2.0 |
2ef7a295 JL |
2 | /* |
3 | * Arch specific cpu topology information | |
4 | * | |
5 | * Copyright (C) 2016, ARM Ltd. | |
6 | * Written by: Juri Lelli, ARM Ltd. | |
2ef7a295 JL |
7 | */ |
8 | ||
9 | #include <linux/acpi.h> | |
10 | #include <linux/cpu.h> | |
11 | #include <linux/cpufreq.h> | |
12 | #include <linux/device.h> | |
13 | #include <linux/of.h> | |
14 | #include <linux/slab.h> | |
15 | #include <linux/string.h> | |
16 | #include <linux/sched/topology.h> | |
bb1fbdd3 | 17 | #include <linux/cpuset.h> |
60c1b220 AP |
18 | #include <linux/cpumask.h> |
19 | #include <linux/init.h> | |
20 | #include <linux/percpu.h> | |
21 | #include <linux/sched.h> | |
22 | #include <linux/smp.h> | |
2ef7a295 | 23 | |
0e27c567 | 24 | DEFINE_PER_CPU(unsigned long, freq_scale) = SCHED_CAPACITY_SCALE; |
2ef7a295 | 25 | |
0e27c567 DE |
26 | void arch_set_freq_scale(struct cpumask *cpus, unsigned long cur_freq, |
27 | unsigned long max_freq) | |
2ef7a295 | 28 | { |
0e27c567 DE |
29 | unsigned long scale; |
30 | int i; | |
31 | ||
32 | scale = (cur_freq << SCHED_CAPACITY_SHIFT) / max_freq; | |
33 | ||
34 | for_each_cpu(i, cpus) | |
35 | per_cpu(freq_scale, i) = scale; | |
2ef7a295 JL |
36 | } |
37 | ||
8216f588 | 38 | DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE; |
2ef7a295 | 39 | |
4ca4f26a | 40 | void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity) |
2ef7a295 JL |
41 | { |
42 | per_cpu(cpu_scale, cpu) = capacity; | |
43 | } | |
44 | ||
45 | static ssize_t cpu_capacity_show(struct device *dev, | |
46 | struct device_attribute *attr, | |
47 | char *buf) | |
48 | { | |
49 | struct cpu *cpu = container_of(dev, struct cpu, dev); | |
50 | ||
8ec59c0f | 51 | return sprintf(buf, "%lu\n", topology_get_cpu_scale(cpu->dev.id)); |
2ef7a295 JL |
52 | } |
53 | ||
bb1fbdd3 MR |
54 | static void update_topology_flags_workfn(struct work_struct *work); |
55 | static DECLARE_WORK(update_topology_flags_work, update_topology_flags_workfn); | |
56 | ||
5d777b18 | 57 | static DEVICE_ATTR_RO(cpu_capacity); |
2ef7a295 JL |
58 | |
59 | static int register_cpu_capacity_sysctl(void) | |
60 | { | |
61 | int i; | |
62 | struct device *cpu; | |
63 | ||
64 | for_each_possible_cpu(i) { | |
65 | cpu = get_cpu_device(i); | |
66 | if (!cpu) { | |
67 | pr_err("%s: too early to get CPU%d device!\n", | |
68 | __func__, i); | |
69 | continue; | |
70 | } | |
71 | device_create_file(cpu, &dev_attr_cpu_capacity); | |
72 | } | |
73 | ||
74 | return 0; | |
75 | } | |
76 | subsys_initcall(register_cpu_capacity_sysctl); | |
77 | ||
bb1fbdd3 MR |
78 | static int update_topology; |
79 | ||
80 | int topology_update_cpu_topology(void) | |
81 | { | |
82 | return update_topology; | |
83 | } | |
84 | ||
85 | /* | |
86 | * Updating the sched_domains can't be done directly from cpufreq callbacks | |
87 | * due to locking, so queue the work for later. | |
88 | */ | |
89 | static void update_topology_flags_workfn(struct work_struct *work) | |
90 | { | |
91 | update_topology = 1; | |
92 | rebuild_sched_domains(); | |
93 | pr_debug("sched_domain hierarchy rebuilt, flags updated\n"); | |
94 | update_topology = 0; | |
95 | } | |
96 | ||
2ef7a295 JL |
97 | static u32 capacity_scale; |
98 | static u32 *raw_capacity; | |
62de1161 | 99 | |
82d8ba71 | 100 | static int free_raw_capacity(void) |
62de1161 VK |
101 | { |
102 | kfree(raw_capacity); | |
103 | raw_capacity = NULL; | |
104 | ||
105 | return 0; | |
106 | } | |
2ef7a295 | 107 | |
4ca4f26a | 108 | void topology_normalize_cpu_scale(void) |
2ef7a295 JL |
109 | { |
110 | u64 capacity; | |
111 | int cpu; | |
112 | ||
62de1161 | 113 | if (!raw_capacity) |
2ef7a295 JL |
114 | return; |
115 | ||
116 | pr_debug("cpu_capacity: capacity_scale=%u\n", capacity_scale); | |
2ef7a295 JL |
117 | for_each_possible_cpu(cpu) { |
118 | pr_debug("cpu_capacity: cpu=%d raw_capacity=%u\n", | |
119 | cpu, raw_capacity[cpu]); | |
120 | capacity = (raw_capacity[cpu] << SCHED_CAPACITY_SHIFT) | |
121 | / capacity_scale; | |
4ca4f26a | 122 | topology_set_cpu_scale(cpu, capacity); |
2ef7a295 | 123 | pr_debug("cpu_capacity: CPU%d cpu_capacity=%lu\n", |
8ec59c0f | 124 | cpu, topology_get_cpu_scale(cpu)); |
2ef7a295 | 125 | } |
2ef7a295 JL |
126 | } |
127 | ||
805df296 | 128 | bool __init topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu) |
2ef7a295 | 129 | { |
62de1161 | 130 | static bool cap_parsing_failed; |
805df296 | 131 | int ret; |
2ef7a295 JL |
132 | u32 cpu_capacity; |
133 | ||
134 | if (cap_parsing_failed) | |
805df296 | 135 | return false; |
2ef7a295 | 136 | |
3eeba1a2 | 137 | ret = of_property_read_u32(cpu_node, "capacity-dmips-mhz", |
2ef7a295 JL |
138 | &cpu_capacity); |
139 | if (!ret) { | |
140 | if (!raw_capacity) { | |
141 | raw_capacity = kcalloc(num_possible_cpus(), | |
142 | sizeof(*raw_capacity), | |
143 | GFP_KERNEL); | |
144 | if (!raw_capacity) { | |
2ef7a295 | 145 | cap_parsing_failed = true; |
805df296 | 146 | return false; |
2ef7a295 JL |
147 | } |
148 | } | |
149 | capacity_scale = max(cpu_capacity, capacity_scale); | |
150 | raw_capacity[cpu] = cpu_capacity; | |
6ef2541f RH |
151 | pr_debug("cpu_capacity: %pOF cpu_capacity=%u (raw)\n", |
152 | cpu_node, raw_capacity[cpu]); | |
2ef7a295 JL |
153 | } else { |
154 | if (raw_capacity) { | |
6ef2541f RH |
155 | pr_err("cpu_capacity: missing %pOF raw capacity\n", |
156 | cpu_node); | |
2ef7a295 JL |
157 | pr_err("cpu_capacity: partial information: fallback to 1024 for all CPUs\n"); |
158 | } | |
159 | cap_parsing_failed = true; | |
62de1161 | 160 | free_raw_capacity(); |
2ef7a295 JL |
161 | } |
162 | ||
163 | return !ret; | |
164 | } | |
165 | ||
166 | #ifdef CONFIG_CPU_FREQ | |
9de9a449 GI |
167 | static cpumask_var_t cpus_to_visit; |
168 | static void parsing_done_workfn(struct work_struct *work); | |
169 | static DECLARE_WORK(parsing_done_work, parsing_done_workfn); | |
2ef7a295 | 170 | |
9de9a449 | 171 | static int |
2ef7a295 JL |
172 | init_cpu_capacity_callback(struct notifier_block *nb, |
173 | unsigned long val, | |
174 | void *data) | |
175 | { | |
176 | struct cpufreq_policy *policy = data; | |
177 | int cpu; | |
178 | ||
d8bcf4db | 179 | if (!raw_capacity) |
2ef7a295 JL |
180 | return 0; |
181 | ||
93a57081 VK |
182 | if (val != CPUFREQ_NOTIFY) |
183 | return 0; | |
184 | ||
185 | pr_debug("cpu_capacity: init cpu capacity for CPUs [%*pbl] (to_visit=%*pbl)\n", | |
186 | cpumask_pr_args(policy->related_cpus), | |
187 | cpumask_pr_args(cpus_to_visit)); | |
188 | ||
189 | cpumask_andnot(cpus_to_visit, cpus_to_visit, policy->related_cpus); | |
190 | ||
191 | for_each_cpu(cpu, policy->related_cpus) { | |
8ec59c0f | 192 | raw_capacity[cpu] = topology_get_cpu_scale(cpu) * |
93a57081 VK |
193 | policy->cpuinfo.max_freq / 1000UL; |
194 | capacity_scale = max(raw_capacity[cpu], capacity_scale); | |
2ef7a295 | 195 | } |
93a57081 VK |
196 | |
197 | if (cpumask_empty(cpus_to_visit)) { | |
198 | topology_normalize_cpu_scale(); | |
bb1fbdd3 | 199 | schedule_work(&update_topology_flags_work); |
62de1161 | 200 | free_raw_capacity(); |
93a57081 | 201 | pr_debug("cpu_capacity: parsing done\n"); |
93a57081 VK |
202 | schedule_work(&parsing_done_work); |
203 | } | |
204 | ||
2ef7a295 JL |
205 | return 0; |
206 | } | |
207 | ||
9de9a449 | 208 | static struct notifier_block init_cpu_capacity_notifier = { |
2ef7a295 JL |
209 | .notifier_call = init_cpu_capacity_callback, |
210 | }; | |
211 | ||
212 | static int __init register_cpufreq_notifier(void) | |
213 | { | |
5408211a DE |
214 | int ret; |
215 | ||
2ef7a295 JL |
216 | /* |
217 | * on ACPI-based systems we need to use the default cpu capacity | |
218 | * until we have the necessary code to parse the cpu capacity, so | |
219 | * skip registering cpufreq notifier. | |
220 | */ | |
c105aa31 | 221 | if (!acpi_disabled || !raw_capacity) |
2ef7a295 JL |
222 | return -EINVAL; |
223 | ||
0fd33116 | 224 | if (!alloc_cpumask_var(&cpus_to_visit, GFP_KERNEL)) |
2ef7a295 | 225 | return -ENOMEM; |
2ef7a295 JL |
226 | |
227 | cpumask_copy(cpus_to_visit, cpu_possible_mask); | |
228 | ||
5408211a DE |
229 | ret = cpufreq_register_notifier(&init_cpu_capacity_notifier, |
230 | CPUFREQ_POLICY_NOTIFIER); | |
231 | ||
232 | if (ret) | |
233 | free_cpumask_var(cpus_to_visit); | |
234 | ||
235 | return ret; | |
2ef7a295 JL |
236 | } |
237 | core_initcall(register_cpufreq_notifier); | |
238 | ||
9de9a449 | 239 | static void parsing_done_workfn(struct work_struct *work) |
2ef7a295 JL |
240 | { |
241 | cpufreq_unregister_notifier(&init_cpu_capacity_notifier, | |
242 | CPUFREQ_POLICY_NOTIFIER); | |
5408211a | 243 | free_cpumask_var(cpus_to_visit); |
2ef7a295 JL |
244 | } |
245 | ||
246 | #else | |
2ef7a295 JL |
247 | core_initcall(free_raw_capacity); |
248 | #endif | |
60c1b220 AP |
249 | |
250 | #if defined(CONFIG_ARM64) || defined(CONFIG_RISCV) | |
251 | static int __init get_cpu_for_node(struct device_node *node) | |
252 | { | |
253 | struct device_node *cpu_node; | |
254 | int cpu; | |
255 | ||
256 | cpu_node = of_parse_phandle(node, "cpu", 0); | |
257 | if (!cpu_node) | |
258 | return -1; | |
259 | ||
260 | cpu = of_cpu_node_to_id(cpu_node); | |
261 | if (cpu >= 0) | |
262 | topology_parse_cpu_capacity(cpu_node, cpu); | |
263 | else | |
264 | pr_crit("Unable to find CPU node for %pOF\n", cpu_node); | |
265 | ||
266 | of_node_put(cpu_node); | |
267 | return cpu; | |
268 | } | |
269 | ||
270 | static int __init parse_core(struct device_node *core, int package_id, | |
271 | int core_id) | |
272 | { | |
273 | char name[10]; | |
274 | bool leaf = true; | |
275 | int i = 0; | |
276 | int cpu; | |
277 | struct device_node *t; | |
278 | ||
279 | do { | |
280 | snprintf(name, sizeof(name), "thread%d", i); | |
281 | t = of_get_child_by_name(core, name); | |
282 | if (t) { | |
283 | leaf = false; | |
284 | cpu = get_cpu_for_node(t); | |
285 | if (cpu >= 0) { | |
286 | cpu_topology[cpu].package_id = package_id; | |
287 | cpu_topology[cpu].core_id = core_id; | |
288 | cpu_topology[cpu].thread_id = i; | |
289 | } else { | |
290 | pr_err("%pOF: Can't get CPU for thread\n", | |
291 | t); | |
292 | of_node_put(t); | |
293 | return -EINVAL; | |
294 | } | |
295 | of_node_put(t); | |
296 | } | |
297 | i++; | |
298 | } while (t); | |
299 | ||
300 | cpu = get_cpu_for_node(core); | |
301 | if (cpu >= 0) { | |
302 | if (!leaf) { | |
303 | pr_err("%pOF: Core has both threads and CPU\n", | |
304 | core); | |
305 | return -EINVAL; | |
306 | } | |
307 | ||
308 | cpu_topology[cpu].package_id = package_id; | |
309 | cpu_topology[cpu].core_id = core_id; | |
310 | } else if (leaf) { | |
311 | pr_err("%pOF: Can't get CPU for leaf core\n", core); | |
312 | return -EINVAL; | |
313 | } | |
314 | ||
315 | return 0; | |
316 | } | |
317 | ||
318 | static int __init parse_cluster(struct device_node *cluster, int depth) | |
319 | { | |
320 | char name[10]; | |
321 | bool leaf = true; | |
322 | bool has_cores = false; | |
323 | struct device_node *c; | |
324 | static int package_id __initdata; | |
325 | int core_id = 0; | |
326 | int i, ret; | |
327 | ||
328 | /* | |
329 | * First check for child clusters; we currently ignore any | |
330 | * information about the nesting of clusters and present the | |
331 | * scheduler with a flat list of them. | |
332 | */ | |
333 | i = 0; | |
334 | do { | |
335 | snprintf(name, sizeof(name), "cluster%d", i); | |
336 | c = of_get_child_by_name(cluster, name); | |
337 | if (c) { | |
338 | leaf = false; | |
339 | ret = parse_cluster(c, depth + 1); | |
340 | of_node_put(c); | |
341 | if (ret != 0) | |
342 | return ret; | |
343 | } | |
344 | i++; | |
345 | } while (c); | |
346 | ||
347 | /* Now check for cores */ | |
348 | i = 0; | |
349 | do { | |
350 | snprintf(name, sizeof(name), "core%d", i); | |
351 | c = of_get_child_by_name(cluster, name); | |
352 | if (c) { | |
353 | has_cores = true; | |
354 | ||
355 | if (depth == 0) { | |
356 | pr_err("%pOF: cpu-map children should be clusters\n", | |
357 | c); | |
358 | of_node_put(c); | |
359 | return -EINVAL; | |
360 | } | |
361 | ||
362 | if (leaf) { | |
363 | ret = parse_core(c, package_id, core_id++); | |
364 | } else { | |
365 | pr_err("%pOF: Non-leaf cluster with core %s\n", | |
366 | cluster, name); | |
367 | ret = -EINVAL; | |
368 | } | |
369 | ||
370 | of_node_put(c); | |
371 | if (ret != 0) | |
372 | return ret; | |
373 | } | |
374 | i++; | |
375 | } while (c); | |
376 | ||
377 | if (leaf && !has_cores) | |
378 | pr_warn("%pOF: empty cluster\n", cluster); | |
379 | ||
380 | if (leaf) | |
381 | package_id++; | |
382 | ||
383 | return 0; | |
384 | } | |
385 | ||
386 | static int __init parse_dt_topology(void) | |
387 | { | |
388 | struct device_node *cn, *map; | |
389 | int ret = 0; | |
390 | int cpu; | |
391 | ||
392 | cn = of_find_node_by_path("/cpus"); | |
393 | if (!cn) { | |
394 | pr_err("No CPU information found in DT\n"); | |
395 | return 0; | |
396 | } | |
397 | ||
398 | /* | |
399 | * When topology is provided cpu-map is essentially a root | |
400 | * cluster with restricted subnodes. | |
401 | */ | |
402 | map = of_get_child_by_name(cn, "cpu-map"); | |
403 | if (!map) | |
404 | goto out; | |
405 | ||
406 | ret = parse_cluster(map, 0); | |
407 | if (ret != 0) | |
408 | goto out_map; | |
409 | ||
410 | topology_normalize_cpu_scale(); | |
411 | ||
412 | /* | |
413 | * Check that all cores are in the topology; the SMP code will | |
414 | * only mark cores described in the DT as possible. | |
415 | */ | |
416 | for_each_possible_cpu(cpu) | |
417 | if (cpu_topology[cpu].package_id == -1) | |
418 | ret = -EINVAL; | |
419 | ||
420 | out_map: | |
421 | of_node_put(map); | |
422 | out: | |
423 | of_node_put(cn); | |
424 | return ret; | |
425 | } | |
426 | ||
427 | /* | |
428 | * cpu topology table | |
429 | */ | |
430 | struct cpu_topology cpu_topology[NR_CPUS]; | |
431 | EXPORT_SYMBOL_GPL(cpu_topology); | |
432 | ||
433 | const struct cpumask *cpu_coregroup_mask(int cpu) | |
434 | { | |
435 | const cpumask_t *core_mask = cpumask_of_node(cpu_to_node(cpu)); | |
436 | ||
437 | /* Find the smaller of NUMA, core or LLC siblings */ | |
438 | if (cpumask_subset(&cpu_topology[cpu].core_sibling, core_mask)) { | |
439 | /* not numa in package, lets use the package siblings */ | |
440 | core_mask = &cpu_topology[cpu].core_sibling; | |
441 | } | |
442 | if (cpu_topology[cpu].llc_id != -1) { | |
443 | if (cpumask_subset(&cpu_topology[cpu].llc_sibling, core_mask)) | |
444 | core_mask = &cpu_topology[cpu].llc_sibling; | |
445 | } | |
446 | ||
447 | return core_mask; | |
448 | } | |
449 | ||
450 | void update_siblings_masks(unsigned int cpuid) | |
451 | { | |
452 | struct cpu_topology *cpu_topo, *cpuid_topo = &cpu_topology[cpuid]; | |
453 | int cpu; | |
454 | ||
455 | /* update core and thread sibling masks */ | |
456 | for_each_online_cpu(cpu) { | |
457 | cpu_topo = &cpu_topology[cpu]; | |
458 | ||
459 | if (cpuid_topo->llc_id == cpu_topo->llc_id) { | |
460 | cpumask_set_cpu(cpu, &cpuid_topo->llc_sibling); | |
461 | cpumask_set_cpu(cpuid, &cpu_topo->llc_sibling); | |
462 | } | |
463 | ||
464 | if (cpuid_topo->package_id != cpu_topo->package_id) | |
465 | continue; | |
466 | ||
467 | cpumask_set_cpu(cpuid, &cpu_topo->core_sibling); | |
468 | cpumask_set_cpu(cpu, &cpuid_topo->core_sibling); | |
469 | ||
470 | if (cpuid_topo->core_id != cpu_topo->core_id) | |
471 | continue; | |
472 | ||
473 | cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling); | |
474 | cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling); | |
475 | } | |
476 | } | |
477 | ||
478 | static void clear_cpu_topology(int cpu) | |
479 | { | |
480 | struct cpu_topology *cpu_topo = &cpu_topology[cpu]; | |
481 | ||
482 | cpumask_clear(&cpu_topo->llc_sibling); | |
483 | cpumask_set_cpu(cpu, &cpu_topo->llc_sibling); | |
484 | ||
485 | cpumask_clear(&cpu_topo->core_sibling); | |
486 | cpumask_set_cpu(cpu, &cpu_topo->core_sibling); | |
487 | cpumask_clear(&cpu_topo->thread_sibling); | |
488 | cpumask_set_cpu(cpu, &cpu_topo->thread_sibling); | |
489 | } | |
490 | ||
491 | static void __init reset_cpu_topology(void) | |
492 | { | |
493 | unsigned int cpu; | |
494 | ||
495 | for_each_possible_cpu(cpu) { | |
496 | struct cpu_topology *cpu_topo = &cpu_topology[cpu]; | |
497 | ||
498 | cpu_topo->thread_id = -1; | |
499 | cpu_topo->core_id = -1; | |
500 | cpu_topo->package_id = -1; | |
501 | cpu_topo->llc_id = -1; | |
502 | ||
503 | clear_cpu_topology(cpu); | |
504 | } | |
505 | } | |
506 | ||
507 | void remove_cpu_topology(unsigned int cpu) | |
508 | { | |
509 | int sibling; | |
510 | ||
511 | for_each_cpu(sibling, topology_core_cpumask(cpu)) | |
512 | cpumask_clear_cpu(cpu, topology_core_cpumask(sibling)); | |
513 | for_each_cpu(sibling, topology_sibling_cpumask(cpu)) | |
514 | cpumask_clear_cpu(cpu, topology_sibling_cpumask(sibling)); | |
515 | for_each_cpu(sibling, topology_llc_cpumask(cpu)) | |
516 | cpumask_clear_cpu(cpu, topology_llc_cpumask(sibling)); | |
517 | ||
518 | clear_cpu_topology(cpu); | |
519 | } | |
520 | ||
521 | __weak int __init parse_acpi_topology(void) | |
522 | { | |
523 | return 0; | |
524 | } | |
525 | ||
526 | void __init init_cpu_topology(void) | |
527 | { | |
528 | reset_cpu_topology(); | |
529 | ||
530 | /* | |
531 | * Discard anything that was parsed if we hit an error so we | |
532 | * don't use partial information. | |
533 | */ | |
534 | if (parse_acpi_topology()) | |
535 | reset_cpu_topology(); | |
536 | else if (of_have_populated_dt() && parse_dt_topology()) | |
537 | reset_cpu_topology(); | |
538 | } | |
539 | #endif |