[linux-block.git] / drivers / base / arch_topology.c

// SPDX-License-Identifier: GPL-2.0
/*
 * Arch specific cpu topology information
 *
 * Copyright (C) 2016, ARM Ltd.
 * Written by: Juri Lelli, ARM Ltd.
 */

#include <linux/acpi.h>
#include <linux/cpu.h>
#include <linux/cpufreq.h>
#include <linux/device.h>
#include <linux/of.h>
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/sched/topology.h>
#include <linux/cpuset.h>
#include <linux/cpumask.h>
#include <linux/init.h>
#include <linux/percpu.h>
#include <linux/sched.h>
#include <linux/smp.h>

__weak bool arch_freq_counters_available(struct cpumask *cpus)
{
	return false;
}
DEFINE_PER_CPU(unsigned long, freq_scale) = SCHED_CAPACITY_SCALE;

void arch_set_freq_scale(struct cpumask *cpus, unsigned long cur_freq,
			 unsigned long max_freq)
{
	unsigned long scale;
	int i;

	/*
	 * If the use of counters for FIE is enabled, just return as we don't
	 * want to update the scale factor with information from CPUFREQ.
	 * Instead the scale factor will be updated from arch_scale_freq_tick.
	 */
	if (arch_freq_counters_available(cpus))
		return;

	scale = (cur_freq << SCHED_CAPACITY_SHIFT) / max_freq;

	for_each_cpu(i, cpus)
		per_cpu(freq_scale, i) = scale;
}

DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE;

void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity)
{
	per_cpu(cpu_scale, cpu) = capacity;
}

DEFINE_PER_CPU(unsigned long, thermal_pressure);

void topology_set_thermal_pressure(const struct cpumask *cpus,
			       unsigned long th_pressure)
{
	int cpu;

	for_each_cpu(cpu, cpus)
		WRITE_ONCE(per_cpu(thermal_pressure, cpu), th_pressure);
}

static ssize_t cpu_capacity_show(struct device *dev,
				 struct device_attribute *attr,
				 char *buf)
{
	struct cpu *cpu = container_of(dev, struct cpu, dev);

	return sprintf(buf, "%lu\n", topology_get_cpu_scale(cpu->dev.id));
}

static void update_topology_flags_workfn(struct work_struct *work);
static DECLARE_WORK(update_topology_flags_work, update_topology_flags_workfn);

static DEVICE_ATTR_RO(cpu_capacity);

static int register_cpu_capacity_sysctl(void)
{
	int i;
	struct device *cpu;

	for_each_possible_cpu(i) {
		cpu = get_cpu_device(i);
		if (!cpu) {
			pr_err("%s: too early to get CPU%d device!\n",
			       __func__, i);
			continue;
		}
		device_create_file(cpu, &dev_attr_cpu_capacity);
	}

	return 0;
}
subsys_initcall(register_cpu_capacity_sysctl);

static int update_topology;

int topology_update_cpu_topology(void)
{
	return update_topology;
}

/*
 * Updating the sched_domains can't be done directly from cpufreq callbacks
 * due to locking, so queue the work for later.
 */
static void update_topology_flags_workfn(struct work_struct *work)
{
	update_topology = 1;
	rebuild_sched_domains();
	pr_debug("sched_domain hierarchy rebuilt, flags updated\n");
	update_topology = 0;
}

static DEFINE_PER_CPU(u32, freq_factor) = 1;
static u32 *raw_capacity;

static int free_raw_capacity(void)
{
	kfree(raw_capacity);
	raw_capacity = NULL;

	return 0;
}

void topology_normalize_cpu_scale(void)
{
	u64 capacity;
	u64 capacity_scale;
	int cpu;

	if (!raw_capacity)
		return;

	capacity_scale = 1;
	for_each_possible_cpu(cpu) {
		capacity = raw_capacity[cpu] * per_cpu(freq_factor, cpu);
		capacity_scale = max(capacity, capacity_scale);
	}

	pr_debug("cpu_capacity: capacity_scale=%llu\n", capacity_scale);
	for_each_possible_cpu(cpu) {
		capacity = raw_capacity[cpu] * per_cpu(freq_factor, cpu);
		capacity = div64_u64(capacity << SCHED_CAPACITY_SHIFT,
			capacity_scale);
		topology_set_cpu_scale(cpu, capacity);
		pr_debug("cpu_capacity: CPU%d cpu_capacity=%lu\n",
			cpu, topology_get_cpu_scale(cpu));
	}
}

bool __init topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu)
{
	struct clk *cpu_clk;
	static bool cap_parsing_failed;
	int ret;
	u32 cpu_capacity;

	if (cap_parsing_failed)
		return false;

	ret = of_property_read_u32(cpu_node, "capacity-dmips-mhz",
				   &cpu_capacity);
	if (!ret) {
		if (!raw_capacity) {
			raw_capacity = kcalloc(num_possible_cpus(),
					       sizeof(*raw_capacity),
					       GFP_KERNEL);
			if (!raw_capacity) {
				cap_parsing_failed = true;
				return false;
			}
		}
		raw_capacity[cpu] = cpu_capacity;
		pr_debug("cpu_capacity: %pOF cpu_capacity=%u (raw)\n",
			cpu_node, raw_capacity[cpu]);

		/*
		 * Update freq_factor for calculating early boot cpu capacities.
		 * For non-clk CPU DVFS mechanism, there's no way to get the
		 * frequency value now, assuming they are running at the same
		 * frequency (by keeping the initial freq_factor value).
		 */
		cpu_clk = of_clk_get(cpu_node, 0);
		if (!PTR_ERR_OR_ZERO(cpu_clk)) {
			per_cpu(freq_factor, cpu) =
				clk_get_rate(cpu_clk) / 1000;
			clk_put(cpu_clk);
		}
	} else {
		if (raw_capacity) {
			pr_err("cpu_capacity: missing %pOF raw capacity\n",
				cpu_node);
			pr_err("cpu_capacity: partial information: fallback to 1024 for all CPUs\n");
		}
		cap_parsing_failed = true;
		free_raw_capacity();
	}

	return !ret;
}

#ifdef CONFIG_CPU_FREQ
static cpumask_var_t cpus_to_visit;
static void parsing_done_workfn(struct work_struct *work);
static DECLARE_WORK(parsing_done_work, parsing_done_workfn);

static int
init_cpu_capacity_callback(struct notifier_block *nb,
			   unsigned long val,
			   void *data)
{
	struct cpufreq_policy *policy = data;
	int cpu;

	if (!raw_capacity)
		return 0;

	if (val != CPUFREQ_CREATE_POLICY)
		return 0;

	pr_debug("cpu_capacity: init cpu capacity for CPUs [%*pbl] (to_visit=%*pbl)\n",
		 cpumask_pr_args(policy->related_cpus),
		 cpumask_pr_args(cpus_to_visit));

	cpumask_andnot(cpus_to_visit, cpus_to_visit, policy->related_cpus);

	for_each_cpu(cpu, policy->related_cpus)
		per_cpu(freq_factor, cpu) = policy->cpuinfo.max_freq / 1000;

	if (cpumask_empty(cpus_to_visit)) {
		topology_normalize_cpu_scale();
		schedule_work(&update_topology_flags_work);
		free_raw_capacity();
		pr_debug("cpu_capacity: parsing done\n");
		schedule_work(&parsing_done_work);
	}

	return 0;
}

static struct notifier_block init_cpu_capacity_notifier = {
	.notifier_call = init_cpu_capacity_callback,
};

static int __init register_cpufreq_notifier(void)
{
	int ret;

	/*
	 * on ACPI-based systems we need to use the default cpu capacity
	 * until we have the necessary code to parse the cpu capacity, so
	 * skip registering cpufreq notifier.
	 */
	if (!acpi_disabled || !raw_capacity)
		return -EINVAL;

	if (!alloc_cpumask_var(&cpus_to_visit, GFP_KERNEL))
		return -ENOMEM;

	cpumask_copy(cpus_to_visit, cpu_possible_mask);

	ret = cpufreq_register_notifier(&init_cpu_capacity_notifier,
					CPUFREQ_POLICY_NOTIFIER);

	if (ret)
		free_cpumask_var(cpus_to_visit);

	return ret;
}
core_initcall(register_cpufreq_notifier);

static void parsing_done_workfn(struct work_struct *work)
{
	cpufreq_unregister_notifier(&init_cpu_capacity_notifier,
					 CPUFREQ_POLICY_NOTIFIER);
	free_cpumask_var(cpus_to_visit);
}

#else
core_initcall(free_raw_capacity);
#endif

#if defined(CONFIG_ARM64) || defined(CONFIG_RISCV)
/*
 * This function returns the logic cpu number of the node.
 * There are basically three kinds of return values:
 * (1) logic cpu number which is > 0.
 * (2) -ENODEV when the device tree(DT) node is valid and found in the DT but
 * there is no possible logical CPU in the kernel to match. This happens
 * when CONFIG_NR_CPUS is configure to be smaller than the number of
 * CPU nodes in DT. We need to just ignore this case.
 * (3) -1 if the node does not exist in the device tree
 */
static int __init get_cpu_for_node(struct device_node *node)
{
	struct device_node *cpu_node;
	int cpu;

	cpu_node = of_parse_phandle(node, "cpu", 0);
	if (!cpu_node)
		return -1;

	cpu = of_cpu_node_to_id(cpu_node);
	if (cpu >= 0)
		topology_parse_cpu_capacity(cpu_node, cpu);
	else
		pr_info("CPU node for %pOF exist but the possible cpu range is :%*pbl\n",
			cpu_node, cpumask_pr_args(cpu_possible_mask));

	of_node_put(cpu_node);
	return cpu;
}

static int __init parse_core(struct device_node *core, int package_id,
			     int core_id)
{
	char name[20];
	bool leaf = true;
	int i = 0;
	int cpu;
	struct device_node *t;

	do {
		snprintf(name, sizeof(name), "thread%d", i);
		t = of_get_child_by_name(core, name);
		if (t) {
			leaf = false;
			cpu = get_cpu_for_node(t);
			if (cpu >= 0) {
				cpu_topology[cpu].package_id = package_id;
				cpu_topology[cpu].core_id = core_id;
				cpu_topology[cpu].thread_id = i;
			} else if (cpu != -ENODEV) {
				pr_err("%pOF: Can't get CPU for thread\n", t);
				of_node_put(t);
				return -EINVAL;
			}
			of_node_put(t);
		}
		i++;
	} while (t);

	cpu = get_cpu_for_node(core);
	if (cpu >= 0) {
		if (!leaf) {
			pr_err("%pOF: Core has both threads and CPU\n",
			       core);
			return -EINVAL;
		}

		cpu_topology[cpu].package_id = package_id;
		cpu_topology[cpu].core_id = core_id;
	} else if (leaf && cpu != -ENODEV) {
		pr_err("%pOF: Can't get CPU for leaf core\n", core);
		return -EINVAL;
	}

	return 0;
}

static int __init parse_cluster(struct device_node *cluster, int depth)
{
	char name[20];
	bool leaf = true;
	bool has_cores = false;
	struct device_node *c;
	static int package_id __initdata;
	int core_id = 0;
	int i, ret;

	/*
	 * First check for child clusters; we currently ignore any
	 * information about the nesting of clusters and present the
	 * scheduler with a flat list of them.
	 */
	i = 0;
	do {
		snprintf(name, sizeof(name), "cluster%d", i);
		c = of_get_child_by_name(cluster, name);
		if (c) {
			leaf = false;
			ret = parse_cluster(c, depth + 1);
			of_node_put(c);
			if (ret != 0)
				return ret;
		}
		i++;
	} while (c);

	/* Now check for cores */
	i = 0;
	do {
		snprintf(name, sizeof(name), "core%d", i);
		c = of_get_child_by_name(cluster, name);
		if (c) {
			has_cores = true;

			if (depth == 0) {
				pr_err("%pOF: cpu-map children should be clusters\n",
				       c);
				of_node_put(c);
				return -EINVAL;
			}

			if (leaf) {
				ret = parse_core(c, package_id, core_id++);
			} else {
				pr_err("%pOF: Non-leaf cluster with core %s\n",
				       cluster, name);
				ret = -EINVAL;
			}

			of_node_put(c);
			if (ret != 0)
				return ret;
		}
		i++;
	} while (c);

	if (leaf && !has_cores)
		pr_warn("%pOF: empty cluster\n", cluster);

	if (leaf)
		package_id++;

	return 0;
}

static int __init parse_dt_topology(void)
{
	struct device_node *cn, *map;
	int ret = 0;
	int cpu;

	cn = of_find_node_by_path("/cpus");
	if (!cn) {
		pr_err("No CPU information found in DT\n");
		return 0;
	}

	/*
	 * When topology is provided cpu-map is essentially a root
	 * cluster with restricted subnodes.
	 */
	map = of_get_child_by_name(cn, "cpu-map");
	if (!map)
		goto out;

	ret = parse_cluster(map, 0);
	if (ret != 0)
		goto out_map;

	topology_normalize_cpu_scale();

	/*
	 * Check that all cores are in the topology; the SMP code will
	 * only mark cores described in the DT as possible.
	 */
	for_each_possible_cpu(cpu)
		if (cpu_topology[cpu].package_id == -1)
			ret = -EINVAL;

out_map:
	of_node_put(map);
out:
	of_node_put(cn);
	return ret;
}
#endif

/*
 * cpu topology table
 */
struct cpu_topology cpu_topology[NR_CPUS];
EXPORT_SYMBOL_GPL(cpu_topology);

const struct cpumask *cpu_coregroup_mask(int cpu)
{
	const cpumask_t *core_mask = cpumask_of_node(cpu_to_node(cpu));

	/* Find the smaller of NUMA, core or LLC siblings */
	if (cpumask_subset(&cpu_topology[cpu].core_sibling, core_mask)) {
		/* not numa in package, lets use the package siblings */
		core_mask = &cpu_topology[cpu].core_sibling;
	}
	if (cpu_topology[cpu].llc_id != -1) {
		if (cpumask_subset(&cpu_topology[cpu].llc_sibling, core_mask))
			core_mask = &cpu_topology[cpu].llc_sibling;
	}

	return core_mask;
}

void update_siblings_masks(unsigned int cpuid)
{
	struct cpu_topology *cpu_topo, *cpuid_topo = &cpu_topology[cpuid];
	int cpu;

	/* update core and thread sibling masks */
	for_each_online_cpu(cpu) {
		cpu_topo = &cpu_topology[cpu];

		if (cpuid_topo->llc_id == cpu_topo->llc_id) {
			cpumask_set_cpu(cpu, &cpuid_topo->llc_sibling);
			cpumask_set_cpu(cpuid, &cpu_topo->llc_sibling);
		}

		if (cpuid_topo->package_id != cpu_topo->package_id)
			continue;

		cpumask_set_cpu(cpuid, &cpu_topo->core_sibling);
		cpumask_set_cpu(cpu, &cpuid_topo->core_sibling);

		if (cpuid_topo->core_id != cpu_topo->core_id)
			continue;

		cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling);
		cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling);
	}
}

static void clear_cpu_topology(int cpu)
{
	struct cpu_topology *cpu_topo = &cpu_topology[cpu];

	cpumask_clear(&cpu_topo->llc_sibling);
	cpumask_set_cpu(cpu, &cpu_topo->llc_sibling);

	cpumask_clear(&cpu_topo->core_sibling);
	cpumask_set_cpu(cpu, &cpu_topo->core_sibling);
	cpumask_clear(&cpu_topo->thread_sibling);
	cpumask_set_cpu(cpu, &cpu_topo->thread_sibling);
}

void __init reset_cpu_topology(void)
{
	unsigned int cpu;

	for_each_possible_cpu(cpu) {
		struct cpu_topology *cpu_topo = &cpu_topology[cpu];

		cpu_topo->thread_id = -1;
		cpu_topo->core_id = -1;
		cpu_topo->package_id = -1;
		cpu_topo->llc_id = -1;

		clear_cpu_topology(cpu);
	}
}

void remove_cpu_topology(unsigned int cpu)
{
	int sibling;

	for_each_cpu(sibling, topology_core_cpumask(cpu))
		cpumask_clear_cpu(cpu, topology_core_cpumask(sibling));
	for_each_cpu(sibling, topology_sibling_cpumask(cpu))
		cpumask_clear_cpu(cpu, topology_sibling_cpumask(sibling));
	for_each_cpu(sibling, topology_llc_cpumask(cpu))
		cpumask_clear_cpu(cpu, topology_llc_cpumask(sibling));

	clear_cpu_topology(cpu);
}

__weak int __init parse_acpi_topology(void)
{
	return 0;
}

#if defined(CONFIG_ARM64) || defined(CONFIG_RISCV)
void __init init_cpu_topology(void)
{
	reset_cpu_topology();

	/*
	 * Discard anything that was parsed if we hit an error so we
	 * don't use partial information.
	 */
	if (parse_acpi_topology())
		reset_cpu_topology();
	else if (of_have_populated_dt() && parse_dt_topology())
		reset_cpu_topology();
}
#endif
Commit	Line	Data
6ee97d35	1	// SPDX-License-Identifier: GPL-2.0
2ef7a295 JL	2	/*
	3	* Arch specific cpu topology information
	4	*
	5	* Copyright (C) 2016, ARM Ltd.
	6	* Written by: Juri Lelli, ARM Ltd.
2ef7a295 JL	7	*/
	8
	9	#include <linux/acpi.h>
	10	#include <linux/cpu.h>
	11	#include <linux/cpufreq.h>
	12	#include <linux/device.h>
	13	#include <linux/of.h>
	14	#include <linux/slab.h>
	15	#include <linux/string.h>
	16	#include <linux/sched/topology.h>
bb1fbdd3	17	#include <linux/cpuset.h>
60c1b220 AP	18	#include <linux/cpumask.h>
	19	#include <linux/init.h>
	20	#include <linux/percpu.h>
	21	#include <linux/sched.h>
	22	#include <linux/smp.h>
2ef7a295	23
cd0ed03a IV	24	__weak bool arch_freq_counters_available(struct cpumask *cpus)
	25	{
	26	return false;
	27	}
0e27c567	28	DEFINE_PER_CPU(unsigned long, freq_scale) = SCHED_CAPACITY_SCALE;
2ef7a295	29
0e27c567 DE	30	void arch_set_freq_scale(struct cpumask *cpus, unsigned long cur_freq,
0e27c567 DE	31	unsigned long max_freq)
2ef7a295	32	{
0e27c567 DE	33	unsigned long scale;
	34	int i;
	35
cd0ed03a IV	36	/*
	37	* If the use of counters for FIE is enabled, just return as we don't
	38	* want to update the scale factor with information from CPUFREQ.
	39	* Instead the scale factor will be updated from arch_scale_freq_tick.
	40	*/
	41	if (arch_freq_counters_available(cpus))
	42	return;
	43
0e27c567 DE	44	scale = (cur_freq << SCHED_CAPACITY_SHIFT) / max_freq;
	45
	46	for_each_cpu(i, cpus)
	47	per_cpu(freq_scale, i) = scale;
2ef7a295 JL	48	}
2ef7a295 JL	49
8216f588	50	DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE;
2ef7a295	51
4ca4f26a	52	void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity)
2ef7a295 JL	53	{
	54	per_cpu(cpu_scale, cpu) = capacity;
	55	}
	56
25980c7a VS	57	DEFINE_PER_CPU(unsigned long, thermal_pressure);
	58
	59	void topology_set_thermal_pressure(const struct cpumask *cpus,
	60	unsigned long th_pressure)
	61	{
	62	int cpu;
	63
	64	for_each_cpu(cpu, cpus)
	65	WRITE_ONCE(per_cpu(thermal_pressure, cpu), th_pressure);
	66	}
	67
2ef7a295 JL	68	static ssize_t cpu_capacity_show(struct device *dev,
	69	struct device_attribute *attr,
	70	char *buf)
	71	{
	72	struct cpu *cpu = container_of(dev, struct cpu, dev);
	73
8ec59c0f	74	return sprintf(buf, "%lu\n", topology_get_cpu_scale(cpu->dev.id));
2ef7a295 JL	75	}
2ef7a295 JL	76
bb1fbdd3 MR	77	static void update_topology_flags_workfn(struct work_struct *work);
	78	static DECLARE_WORK(update_topology_flags_work, update_topology_flags_workfn);
	79
5d777b18	80	static DEVICE_ATTR_RO(cpu_capacity);
2ef7a295 JL	81
	82	static int register_cpu_capacity_sysctl(void)
	83	{
	84	int i;
	85	struct device *cpu;
	86
	87	for_each_possible_cpu(i) {
	88	cpu = get_cpu_device(i);
	89	if (!cpu) {
	90	pr_err("%s: too early to get CPU%d device!\n",
	91	__func__, i);
	92	continue;
	93	}
	94	device_create_file(cpu, &dev_attr_cpu_capacity);
	95	}
	96
	97	return 0;
	98	}
	99	subsys_initcall(register_cpu_capacity_sysctl);
	100
bb1fbdd3 MR	101	static int update_topology;
	102
	103	int topology_update_cpu_topology(void)
	104	{
	105	return update_topology;
	106	}
	107
	108	/*
	109	* Updating the sched_domains can't be done directly from cpufreq callbacks
	110	* due to locking, so queue the work for later.
	111	*/
	112	static void update_topology_flags_workfn(struct work_struct *work)
	113	{
	114	update_topology = 1;
	115	rebuild_sched_domains();
	116	pr_debug("sched_domain hierarchy rebuilt, flags updated\n");
	117	update_topology = 0;
	118	}
	119
b8fe128d	120	static DEFINE_PER_CPU(u32, freq_factor) = 1;
2ef7a295	121	static u32 *raw_capacity;
62de1161	122
82d8ba71	123	static int free_raw_capacity(void)
62de1161 VK	124	{
	125	kfree(raw_capacity);
	126	raw_capacity = NULL;
	127
	128	return 0;
	129	}
2ef7a295	130
4ca4f26a	131	void topology_normalize_cpu_scale(void)
2ef7a295 JL	132	{
2ef7a295 JL	133	u64 capacity;
b8fe128d	134	u64 capacity_scale;
2ef7a295 JL	135	int cpu;
2ef7a295 JL	136
62de1161	137	if (!raw_capacity)
2ef7a295 JL	138	return;
2ef7a295 JL	139
b8fe128d	140	capacity_scale = 1;
2ef7a295	141	for_each_possible_cpu(cpu) {
b8fe128d JC	142	capacity = raw_capacity[cpu] * per_cpu(freq_factor, cpu);
	143	capacity_scale = max(capacity, capacity_scale);
	144	}
	145
	146	pr_debug("cpu_capacity: capacity_scale=%llu\n", capacity_scale);
	147	for_each_possible_cpu(cpu) {
	148	capacity = raw_capacity[cpu] * per_cpu(freq_factor, cpu);
	149	capacity = div64_u64(capacity << SCHED_CAPACITY_SHIFT,
	150	capacity_scale);
4ca4f26a	151	topology_set_cpu_scale(cpu, capacity);
2ef7a295	152	pr_debug("cpu_capacity: CPU%d cpu_capacity=%lu\n",
8ec59c0f	153	cpu, topology_get_cpu_scale(cpu));
2ef7a295	154	}
2ef7a295 JL	155	}
2ef7a295 JL	156
805df296	157	bool __init topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu)
2ef7a295	158	{
b8fe128d	159	struct clk *cpu_clk;
62de1161	160	static bool cap_parsing_failed;
805df296	161	int ret;
2ef7a295 JL	162	u32 cpu_capacity;
	163
	164	if (cap_parsing_failed)
805df296	165	return false;
2ef7a295	166
3eeba1a2	167	ret = of_property_read_u32(cpu_node, "capacity-dmips-mhz",
2ef7a295 JL	168	&cpu_capacity);
	169	if (!ret) {
	170	if (!raw_capacity) {
	171	raw_capacity = kcalloc(num_possible_cpus(),
	172	sizeof(*raw_capacity),
	173	GFP_KERNEL);
	174	if (!raw_capacity) {
2ef7a295	175	cap_parsing_failed = true;
805df296	176	return false;
2ef7a295 JL	177	}
2ef7a295 JL	178	}
2ef7a295	179	raw_capacity[cpu] = cpu_capacity;
6ef2541f RH	180	pr_debug("cpu_capacity: %pOF cpu_capacity=%u (raw)\n",
6ef2541f RH	181	cpu_node, raw_capacity[cpu]);
b8fe128d JC	182
	183	/*
	184	* Update freq_factor for calculating early boot cpu capacities.
	185	* For non-clk CPU DVFS mechanism, there's no way to get the
	186	* frequency value now, assuming they are running at the same
	187	* frequency (by keeping the initial freq_factor value).
	188	*/
	189	cpu_clk = of_clk_get(cpu_node, 0);
4dfff3d5	190	if (!PTR_ERR_OR_ZERO(cpu_clk)) {
b8fe128d JC	191	per_cpu(freq_factor, cpu) =
b8fe128d JC	192	clk_get_rate(cpu_clk) / 1000;
4dfff3d5 JC	193	clk_put(cpu_clk);
4dfff3d5 JC	194	}
2ef7a295 JL	195	} else {
2ef7a295 JL	196	if (raw_capacity) {
6ef2541f RH	197	pr_err("cpu_capacity: missing %pOF raw capacity\n",
6ef2541f RH	198	cpu_node);
2ef7a295 JL	199	pr_err("cpu_capacity: partial information: fallback to 1024 for all CPUs\n");
	200	}
	201	cap_parsing_failed = true;
62de1161	202	free_raw_capacity();
2ef7a295 JL	203	}
	204
	205	return !ret;
	206	}
	207
	208	#ifdef CONFIG_CPU_FREQ
9de9a449 GI	209	static cpumask_var_t cpus_to_visit;
	210	static void parsing_done_workfn(struct work_struct *work);
	211	static DECLARE_WORK(parsing_done_work, parsing_done_workfn);
2ef7a295	212
9de9a449	213	static int
2ef7a295 JL	214	init_cpu_capacity_callback(struct notifier_block *nb,
	215	unsigned long val,
	216	void *data)
	217	{
	218	struct cpufreq_policy *policy = data;
	219	int cpu;
	220
d8bcf4db	221	if (!raw_capacity)
2ef7a295 JL	222	return 0;
2ef7a295 JL	223
40f0fc2a	224	if (val != CPUFREQ_CREATE_POLICY)
93a57081 VK	225	return 0;
	226
	227	pr_debug("cpu_capacity: init cpu capacity for CPUs [%pbl] (to_visit=%pbl)\n",
	228	cpumask_pr_args(policy->related_cpus),
	229	cpumask_pr_args(cpus_to_visit));
	230
	231	cpumask_andnot(cpus_to_visit, cpus_to_visit, policy->related_cpus);
	232
b8fe128d JC	233	for_each_cpu(cpu, policy->related_cpus)
b8fe128d JC	234	per_cpu(freq_factor, cpu) = policy->cpuinfo.max_freq / 1000;
93a57081 VK	235
	236	if (cpumask_empty(cpus_to_visit)) {
	237	topology_normalize_cpu_scale();
bb1fbdd3	238	schedule_work(&update_topology_flags_work);
62de1161	239	free_raw_capacity();
93a57081	240	pr_debug("cpu_capacity: parsing done\n");
93a57081 VK	241	schedule_work(&parsing_done_work);
	242	}
	243
2ef7a295 JL	244	return 0;
	245	}
	246
9de9a449	247	static struct notifier_block init_cpu_capacity_notifier = {
2ef7a295 JL	248	.notifier_call = init_cpu_capacity_callback,
	249	};
	250
	251	static int __init register_cpufreq_notifier(void)
	252	{
5408211a DE	253	int ret;
5408211a DE	254
2ef7a295 JL	255	/*
	256	* on ACPI-based systems we need to use the default cpu capacity
	257	* until we have the necessary code to parse the cpu capacity, so
	258	* skip registering cpufreq notifier.
	259	*/
c105aa31	260	if (!acpi_disabled \|\| !raw_capacity)
2ef7a295 JL	261	return -EINVAL;
2ef7a295 JL	262
0fd33116	263	if (!alloc_cpumask_var(&cpus_to_visit, GFP_KERNEL))
2ef7a295	264	return -ENOMEM;
2ef7a295 JL	265
	266	cpumask_copy(cpus_to_visit, cpu_possible_mask);
	267
5408211a DE	268	ret = cpufreq_register_notifier(&init_cpu_capacity_notifier,
	269	CPUFREQ_POLICY_NOTIFIER);
	270
	271	if (ret)
	272	free_cpumask_var(cpus_to_visit);
	273
	274	return ret;
2ef7a295 JL	275	}
	276	core_initcall(register_cpufreq_notifier);
	277
9de9a449	278	static void parsing_done_workfn(struct work_struct *work)
2ef7a295 JL	279	{
	280	cpufreq_unregister_notifier(&init_cpu_capacity_notifier,
	281	CPUFREQ_POLICY_NOTIFIER);
5408211a	282	free_cpumask_var(cpus_to_visit);
2ef7a295 JL	283	}
	284
	285	#else
2ef7a295 JL	286	core_initcall(free_raw_capacity);
2ef7a295 JL	287	#endif
60c1b220 AP	288
60c1b220 AP	289	#if defined(CONFIG_ARM64) \|\| defined(CONFIG_RISCV)
f3c19481 ZT	290	/*
	291	* This function returns the logic cpu number of the node.
	292	* There are basically three kinds of return values:
	293	* (1) logic cpu number which is > 0.
	294	* (2) -ENODEV when the device tree(DT) node is valid and found in the DT but
	295	* there is no possible logical CPU in the kernel to match. This happens
	296	* when CONFIG_NR_CPUS is configure to be smaller than the number of
	297	* CPU nodes in DT. We need to just ignore this case.
	298	* (3) -1 if the node does not exist in the device tree
	299	*/
60c1b220 AP	300	static int __init get_cpu_for_node(struct device_node *node)
	301	{
	302	struct device_node *cpu_node;
	303	int cpu;
	304
	305	cpu_node = of_parse_phandle(node, "cpu", 0);
	306	if (!cpu_node)
	307	return -1;
	308
	309	cpu = of_cpu_node_to_id(cpu_node);
	310	if (cpu >= 0)
	311	topology_parse_cpu_capacity(cpu_node, cpu);
	312	else
f3c19481 ZT	313	pr_info("CPU node for %pOF exist but the possible cpu range is :%*pbl\n",
f3c19481 ZT	314	cpu_node, cpumask_pr_args(cpu_possible_mask));
60c1b220 AP	315
	316	of_node_put(cpu_node);
	317	return cpu;
	318	}
	319
	320	static int __init parse_core(struct device_node *core, int package_id,
	321	int core_id)
	322	{
4a33691c	323	char name[20];
60c1b220 AP	324	bool leaf = true;
	325	int i = 0;
	326	int cpu;
	327	struct device_node *t;
	328
	329	do {
	330	snprintf(name, sizeof(name), "thread%d", i);
	331	t = of_get_child_by_name(core, name);
	332	if (t) {
	333	leaf = false;
	334	cpu = get_cpu_for_node(t);
	335	if (cpu >= 0) {
	336	cpu_topology[cpu].package_id = package_id;
	337	cpu_topology[cpu].core_id = core_id;
	338	cpu_topology[cpu].thread_id = i;
f3c19481 ZT	339	} else if (cpu != -ENODEV) {
f3c19481 ZT	340	pr_err("%pOF: Can't get CPU for thread\n", t);
60c1b220 AP	341	of_node_put(t);
	342	return -EINVAL;
	343	}
	344	of_node_put(t);
	345	}
	346	i++;
	347	} while (t);
	348
	349	cpu = get_cpu_for_node(core);
	350	if (cpu >= 0) {
	351	if (!leaf) {
	352	pr_err("%pOF: Core has both threads and CPU\n",
	353	core);
	354	return -EINVAL;
	355	}
	356
	357	cpu_topology[cpu].package_id = package_id;
	358	cpu_topology[cpu].core_id = core_id;
f3c19481	359	} else if (leaf && cpu != -ENODEV) {
60c1b220 AP	360	pr_err("%pOF: Can't get CPU for leaf core\n", core);
	361	return -EINVAL;
	362	}
	363
	364	return 0;
	365	}
	366
	367	static int __init parse_cluster(struct device_node *cluster, int depth)
	368	{
4a33691c	369	char name[20];
60c1b220 AP	370	bool leaf = true;
	371	bool has_cores = false;
	372	struct device_node *c;
	373	static int package_id __initdata;
	374	int core_id = 0;
	375	int i, ret;
	376
	377	/*
	378	* First check for child clusters; we currently ignore any
	379	* information about the nesting of clusters and present the
	380	* scheduler with a flat list of them.
	381	*/
	382	i = 0;
	383	do {
	384	snprintf(name, sizeof(name), "cluster%d", i);
	385	c = of_get_child_by_name(cluster, name);
	386	if (c) {
	387	leaf = false;
	388	ret = parse_cluster(c, depth + 1);
	389	of_node_put(c);
	390	if (ret != 0)
	391	return ret;
	392	}
	393	i++;
	394	} while (c);
	395
	396	/* Now check for cores */
	397	i = 0;
	398	do {
	399	snprintf(name, sizeof(name), "core%d", i);
	400	c = of_get_child_by_name(cluster, name);
	401	if (c) {
	402	has_cores = true;
	403
	404	if (depth == 0) {
	405	pr_err("%pOF: cpu-map children should be clusters\n",
	406	c);
	407	of_node_put(c);
	408	return -EINVAL;
	409	}
	410
	411	if (leaf) {
	412	ret = parse_core(c, package_id, core_id++);
	413	} else {
	414	pr_err("%pOF: Non-leaf cluster with core %s\n",
	415	cluster, name);
	416	ret = -EINVAL;
	417	}
	418
	419	of_node_put(c);
	420	if (ret != 0)
	421	return ret;
	422	}
	423	i++;
	424	} while (c);
	425
	426	if (leaf && !has_cores)
	427	pr_warn("%pOF: empty cluster\n", cluster);
	428
	429	if (leaf)
	430	package_id++;
	431
	432	return 0;
	433	}
434
435	static int __init parse_dt_topology(void)
436	{
437	struct device_node cn, map;
438	int ret = 0;
439	int cpu;
440
441	cn = of_find_node_by_path("/cpus");
442	if (!cn) {
443	pr_err("No CPU information found in DT\n");
444	return 0;
445	}
446
447	/*
448	* When topology is provided cpu-map is essentially a root
449	* cluster with restricted subnodes.
450	*/
451	map = of_get_child_by_name(cn, "cpu-map");
452	if (!map)
453	goto out;
454
455	ret = parse_cluster(map, 0);
456	if (ret != 0)
457	goto out_map;
458
459	topology_normalize_cpu_scale();
460
461	/*
462	* Check that all cores are in the topology; the SMP code will
463	* only mark cores described in the DT as possible.
464	*/
465	for_each_possible_cpu(cpu)
466	if (cpu_topology[cpu].package_id == -1)
467	ret = -EINVAL;
468
469	out_map:
470	of_node_put(map);
471	out:
472	of_node_put(cn);
473	return ret;
474	}
ca74b316	475	#endif
60c1b220 AP	476
	477	/*
	478	* cpu topology table
	479	*/
	480	struct cpu_topology cpu_topology[NR_CPUS];
	481	EXPORT_SYMBOL_GPL(cpu_topology);
	482
	483	const struct cpumask *cpu_coregroup_mask(int cpu)
	484	{
	485	const cpumask_t *core_mask = cpumask_of_node(cpu_to_node(cpu));
	486
	487	/* Find the smaller of NUMA, core or LLC siblings */
	488	if (cpumask_subset(&cpu_topology[cpu].core_sibling, core_mask)) {
	489	/* not numa in package, lets use the package siblings */
	490	core_mask = &cpu_topology[cpu].core_sibling;
	491	}
	492	if (cpu_topology[cpu].llc_id != -1) {
	493	if (cpumask_subset(&cpu_topology[cpu].llc_sibling, core_mask))
	494	core_mask = &cpu_topology[cpu].llc_sibling;
	495	}
	496
	497	return core_mask;
	498	}
	499
	500	void update_siblings_masks(unsigned int cpuid)
	501	{
	502	struct cpu_topology cpu_topo, cpuid_topo = &cpu_topology[cpuid];
	503	int cpu;
	504
	505	/* update core and thread sibling masks */
	506	for_each_online_cpu(cpu) {
	507	cpu_topo = &cpu_topology[cpu];
	508
	509	if (cpuid_topo->llc_id == cpu_topo->llc_id) {
	510	cpumask_set_cpu(cpu, &cpuid_topo->llc_sibling);
	511	cpumask_set_cpu(cpuid, &cpu_topo->llc_sibling);
	512	}
	513
	514	if (cpuid_topo->package_id != cpu_topo->package_id)
	515	continue;
	516
	517	cpumask_set_cpu(cpuid, &cpu_topo->core_sibling);
	518	cpumask_set_cpu(cpu, &cpuid_topo->core_sibling);
	519
	520	if (cpuid_topo->core_id != cpu_topo->core_id)
	521	continue;
	522
	523	cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling);
	524	cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling);
	525	}
	526	}
	527
	528	static void clear_cpu_topology(int cpu)
	529	{
	530	struct cpu_topology *cpu_topo = &cpu_topology[cpu];
	531
	532	cpumask_clear(&cpu_topo->llc_sibling);
	533	cpumask_set_cpu(cpu, &cpu_topo->llc_sibling);
	534
	535	cpumask_clear(&cpu_topo->core_sibling);
	536	cpumask_set_cpu(cpu, &cpu_topo->core_sibling);
	537	cpumask_clear(&cpu_topo->thread_sibling);
	538	cpumask_set_cpu(cpu, &cpu_topo->thread_sibling);
	539	}
540
ca74b316	541	void __init reset_cpu_topology(void)
60c1b220 AP	542	{
	543	unsigned int cpu;
	544
	545	for_each_possible_cpu(cpu) {
	546	struct cpu_topology *cpu_topo = &cpu_topology[cpu];
	547
	548	cpu_topo->thread_id = -1;
	549	cpu_topo->core_id = -1;
	550	cpu_topo->package_id = -1;
	551	cpu_topo->llc_id = -1;
	552
	553	clear_cpu_topology(cpu);
	554	}
	555	}
	556
	557	void remove_cpu_topology(unsigned int cpu)
	558	{
	559	int sibling;
	560
	561	for_each_cpu(sibling, topology_core_cpumask(cpu))
	562	cpumask_clear_cpu(cpu, topology_core_cpumask(sibling));
	563	for_each_cpu(sibling, topology_sibling_cpumask(cpu))
	564	cpumask_clear_cpu(cpu, topology_sibling_cpumask(sibling));
	565	for_each_cpu(sibling, topology_llc_cpumask(cpu))
	566	cpumask_clear_cpu(cpu, topology_llc_cpumask(sibling));
	567
	568	clear_cpu_topology(cpu);
	569	}
	570
	571	__weak int __init parse_acpi_topology(void)
	572	{
	573	return 0;
	574	}
	575
ca74b316	576	#if defined(CONFIG_ARM64) \|\| defined(CONFIG_RISCV)
60c1b220 AP	577	void __init init_cpu_topology(void)
	578	{
	579	reset_cpu_topology();
	580
	581	/*
	582	* Discard anything that was parsed if we hit an error so we
	583	* don't use partial information.
	584	*/
	585	if (parse_acpi_topology())
	586	reset_cpu_topology();
	587	else if (of_have_populated_dt() && parse_dt_topology())
	588	reset_cpu_topology();
	589	}
	590	#endif