Commit | Line | Data |
---|---|---|
27871f7a QP |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* | |
3 | * Energy Model of CPUs | |
4 | * | |
5 | * Copyright (c) 2018, Arm ltd. | |
6 | * Written by: Quentin Perret, Arm ltd. | |
7 | */ | |
8 | ||
9 | #define pr_fmt(fmt) "energy_model: " fmt | |
10 | ||
11 | #include <linux/cpu.h> | |
12 | #include <linux/cpumask.h> | |
9cac42d0 | 13 | #include <linux/debugfs.h> |
27871f7a QP |
14 | #include <linux/energy_model.h> |
15 | #include <linux/sched/topology.h> | |
16 | #include <linux/slab.h> | |
17 | ||
18 | /* Mapping of each CPU to the performance domain to which it belongs. */ | |
19 | static DEFINE_PER_CPU(struct em_perf_domain *, em_data); | |
20 | ||
21 | /* | |
22 | * Mutex serializing the registrations of performance domains and letting | |
23 | * callbacks defined by drivers sleep. | |
24 | */ | |
25 | static DEFINE_MUTEX(em_pd_mutex); | |
26 | ||
9cac42d0 QP |
27 | #ifdef CONFIG_DEBUG_FS |
28 | static struct dentry *rootdir; | |
29 | ||
30 | static void em_debug_create_cs(struct em_cap_state *cs, struct dentry *pd) | |
31 | { | |
32 | struct dentry *d; | |
33 | char name[24]; | |
34 | ||
35 | snprintf(name, sizeof(name), "cs:%lu", cs->frequency); | |
36 | ||
37 | /* Create per-cs directory */ | |
38 | d = debugfs_create_dir(name, pd); | |
39 | debugfs_create_ulong("frequency", 0444, d, &cs->frequency); | |
40 | debugfs_create_ulong("power", 0444, d, &cs->power); | |
41 | debugfs_create_ulong("cost", 0444, d, &cs->cost); | |
42 | } | |
43 | ||
44 | static int em_debug_cpus_show(struct seq_file *s, void *unused) | |
45 | { | |
46 | seq_printf(s, "%*pbl\n", cpumask_pr_args(to_cpumask(s->private))); | |
47 | ||
48 | return 0; | |
49 | } | |
50 | DEFINE_SHOW_ATTRIBUTE(em_debug_cpus); | |
51 | ||
52 | static void em_debug_create_pd(struct em_perf_domain *pd, int cpu) | |
53 | { | |
54 | struct dentry *d; | |
55 | char name[8]; | |
56 | int i; | |
57 | ||
58 | snprintf(name, sizeof(name), "pd%d", cpu); | |
59 | ||
60 | /* Create the directory of the performance domain */ | |
61 | d = debugfs_create_dir(name, rootdir); | |
62 | ||
63 | debugfs_create_file("cpus", 0444, d, pd->cpus, &em_debug_cpus_fops); | |
64 | ||
65 | /* Create a sub-directory for each capacity state */ | |
66 | for (i = 0; i < pd->nr_cap_states; i++) | |
67 | em_debug_create_cs(&pd->table[i], d); | |
68 | } | |
69 | ||
70 | static int __init em_debug_init(void) | |
71 | { | |
72 | /* Create /sys/kernel/debug/energy_model directory */ | |
73 | rootdir = debugfs_create_dir("energy_model", NULL); | |
74 | ||
75 | return 0; | |
76 | } | |
77 | core_initcall(em_debug_init); | |
78 | #else /* CONFIG_DEBUG_FS */ | |
79 | static void em_debug_create_pd(struct em_perf_domain *pd, int cpu) {} | |
80 | #endif | |
27871f7a QP |
81 | static struct em_perf_domain *em_create_pd(cpumask_t *span, int nr_states, |
82 | struct em_data_callback *cb) | |
83 | { | |
84 | unsigned long opp_eff, prev_opp_eff = ULONG_MAX; | |
85 | unsigned long power, freq, prev_freq = 0; | |
86 | int i, ret, cpu = cpumask_first(span); | |
87 | struct em_cap_state *table; | |
88 | struct em_perf_domain *pd; | |
89 | u64 fmax; | |
90 | ||
91 | if (!cb->active_power) | |
92 | return NULL; | |
93 | ||
94 | pd = kzalloc(sizeof(*pd) + cpumask_size(), GFP_KERNEL); | |
95 | if (!pd) | |
96 | return NULL; | |
97 | ||
98 | table = kcalloc(nr_states, sizeof(*table), GFP_KERNEL); | |
99 | if (!table) | |
100 | goto free_pd; | |
101 | ||
102 | /* Build the list of capacity states for this performance domain */ | |
103 | for (i = 0, freq = 0; i < nr_states; i++, freq++) { | |
104 | /* | |
105 | * active_power() is a driver callback which ceils 'freq' to | |
106 | * lowest capacity state of 'cpu' above 'freq' and updates | |
107 | * 'power' and 'freq' accordingly. | |
108 | */ | |
109 | ret = cb->active_power(&power, &freq, cpu); | |
110 | if (ret) { | |
111 | pr_err("pd%d: invalid cap. state: %d\n", cpu, ret); | |
112 | goto free_cs_table; | |
113 | } | |
114 | ||
115 | /* | |
116 | * We expect the driver callback to increase the frequency for | |
117 | * higher capacity states. | |
118 | */ | |
119 | if (freq <= prev_freq) { | |
120 | pr_err("pd%d: non-increasing freq: %lu\n", cpu, freq); | |
121 | goto free_cs_table; | |
122 | } | |
123 | ||
124 | /* | |
125 | * The power returned by active_state() is expected to be | |
126 | * positive, in milli-watts and to fit into 16 bits. | |
127 | */ | |
128 | if (!power || power > EM_CPU_MAX_POWER) { | |
129 | pr_err("pd%d: invalid power: %lu\n", cpu, power); | |
130 | goto free_cs_table; | |
131 | } | |
132 | ||
133 | table[i].power = power; | |
134 | table[i].frequency = prev_freq = freq; | |
135 | ||
136 | /* | |
137 | * The hertz/watts efficiency ratio should decrease as the | |
138 | * frequency grows on sane platforms. But this isn't always | |
139 | * true in practice so warn the user if a higher OPP is more | |
140 | * power efficient than a lower one. | |
141 | */ | |
142 | opp_eff = freq / power; | |
143 | if (opp_eff >= prev_opp_eff) | |
144 | pr_warn("pd%d: hertz/watts ratio non-monotonically decreasing: em_cap_state %d >= em_cap_state%d\n", | |
145 | cpu, i, i - 1); | |
146 | prev_opp_eff = opp_eff; | |
147 | } | |
148 | ||
149 | /* Compute the cost of each capacity_state. */ | |
150 | fmax = (u64) table[nr_states - 1].frequency; | |
151 | for (i = 0; i < nr_states; i++) { | |
152 | table[i].cost = div64_u64(fmax * table[i].power, | |
153 | table[i].frequency); | |
154 | } | |
155 | ||
156 | pd->table = table; | |
157 | pd->nr_cap_states = nr_states; | |
158 | cpumask_copy(to_cpumask(pd->cpus), span); | |
159 | ||
9cac42d0 QP |
160 | em_debug_create_pd(pd, cpu); |
161 | ||
27871f7a QP |
162 | return pd; |
163 | ||
164 | free_cs_table: | |
165 | kfree(table); | |
166 | free_pd: | |
167 | kfree(pd); | |
168 | ||
169 | return NULL; | |
170 | } | |
171 | ||
172 | /** | |
173 | * em_cpu_get() - Return the performance domain for a CPU | |
174 | * @cpu : CPU to find the performance domain for | |
175 | * | |
176 | * Return: the performance domain to which 'cpu' belongs, or NULL if it doesn't | |
177 | * exist. | |
178 | */ | |
179 | struct em_perf_domain *em_cpu_get(int cpu) | |
180 | { | |
181 | return READ_ONCE(per_cpu(em_data, cpu)); | |
182 | } | |
183 | EXPORT_SYMBOL_GPL(em_cpu_get); | |
184 | ||
185 | /** | |
186 | * em_register_perf_domain() - Register the Energy Model of a performance domain | |
187 | * @span : Mask of CPUs in the performance domain | |
188 | * @nr_states : Number of capacity states to register | |
189 | * @cb : Callback functions providing the data of the Energy Model | |
190 | * | |
191 | * Create Energy Model tables for a performance domain using the callbacks | |
192 | * defined in cb. | |
193 | * | |
194 | * If multiple clients register the same performance domain, all but the first | |
195 | * registration will be ignored. | |
196 | * | |
197 | * Return 0 on success | |
198 | */ | |
199 | int em_register_perf_domain(cpumask_t *span, unsigned int nr_states, | |
200 | struct em_data_callback *cb) | |
201 | { | |
202 | unsigned long cap, prev_cap = 0; | |
203 | struct em_perf_domain *pd; | |
204 | int cpu, ret = 0; | |
205 | ||
206 | if (!span || !nr_states || !cb) | |
207 | return -EINVAL; | |
208 | ||
209 | /* | |
210 | * Use a mutex to serialize the registration of performance domains and | |
211 | * let the driver-defined callback functions sleep. | |
212 | */ | |
213 | mutex_lock(&em_pd_mutex); | |
214 | ||
215 | for_each_cpu(cpu, span) { | |
216 | /* Make sure we don't register again an existing domain. */ | |
217 | if (READ_ONCE(per_cpu(em_data, cpu))) { | |
218 | ret = -EEXIST; | |
219 | goto unlock; | |
220 | } | |
221 | ||
222 | /* | |
223 | * All CPUs of a domain must have the same micro-architecture | |
224 | * since they all share the same table. | |
225 | */ | |
8ec59c0f | 226 | cap = arch_scale_cpu_capacity(cpu); |
27871f7a QP |
227 | if (prev_cap && prev_cap != cap) { |
228 | pr_err("CPUs of %*pbl must have the same capacity\n", | |
229 | cpumask_pr_args(span)); | |
230 | ret = -EINVAL; | |
231 | goto unlock; | |
232 | } | |
233 | prev_cap = cap; | |
234 | } | |
235 | ||
236 | /* Create the performance domain and add it to the Energy Model. */ | |
237 | pd = em_create_pd(span, nr_states, cb); | |
238 | if (!pd) { | |
239 | ret = -EINVAL; | |
240 | goto unlock; | |
241 | } | |
242 | ||
243 | for_each_cpu(cpu, span) { | |
244 | /* | |
245 | * The per-cpu array can be read concurrently from em_cpu_get(). | |
246 | * The barrier enforces the ordering needed to make sure readers | |
247 | * can only access well formed em_perf_domain structs. | |
248 | */ | |
249 | smp_store_release(per_cpu_ptr(&em_data, cpu), pd); | |
250 | } | |
251 | ||
252 | pr_debug("Created perf domain %*pbl\n", cpumask_pr_args(span)); | |
253 | unlock: | |
254 | mutex_unlock(&em_pd_mutex); | |
255 | ||
256 | return ret; | |
257 | } | |
258 | EXPORT_SYMBOL_GPL(em_register_perf_domain); |