Commit | Line | Data |
---|---|---|
9bdcb44e RW |
1 | /* |
2 | * CPUFreq governor based on scheduler-provided CPU utilization data. | |
3 | * | |
4 | * Copyright (C) 2016, Intel Corporation | |
5 | * Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com> | |
6 | * | |
7 | * This program is free software; you can redistribute it and/or modify | |
8 | * it under the terms of the GNU General Public License version 2 as | |
9 | * published by the Free Software Foundation. | |
10 | */ | |
11 | ||
60f05e86 VK |
12 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
13 | ||
9bdcb44e RW |
14 | #include <linux/cpufreq.h> |
15 | #include <linux/module.h> | |
16 | #include <linux/slab.h> | |
17 | #include <trace/events/power.h> | |
18 | ||
19 | #include "sched.h" | |
20 | ||
21 | struct sugov_tunables { | |
22 | struct gov_attr_set attr_set; | |
23 | unsigned int rate_limit_us; | |
24 | }; | |
25 | ||
26 | struct sugov_policy { | |
27 | struct cpufreq_policy *policy; | |
28 | ||
29 | struct sugov_tunables *tunables; | |
30 | struct list_head tunables_hook; | |
31 | ||
32 | raw_spinlock_t update_lock; /* For shared policies */ | |
33 | u64 last_freq_update_time; | |
34 | s64 freq_update_delay_ns; | |
35 | unsigned int next_freq; | |
36 | ||
37 | /* The next fields are only needed if fast switch cannot be used. */ | |
38 | struct irq_work irq_work; | |
39 | struct work_struct work; | |
40 | struct mutex work_lock; | |
41 | bool work_in_progress; | |
42 | ||
43 | bool need_freq_update; | |
44 | }; | |
45 | ||
46 | struct sugov_cpu { | |
47 | struct update_util_data update_util; | |
48 | struct sugov_policy *sg_policy; | |
49 | ||
5cbea469 SM |
50 | unsigned int cached_raw_freq; |
51 | ||
9bdcb44e RW |
52 | /* The fields below are only needed when sharing a policy. */ |
53 | unsigned long util; | |
54 | unsigned long max; | |
55 | u64 last_update; | |
56 | }; | |
57 | ||
58 | static DEFINE_PER_CPU(struct sugov_cpu, sugov_cpu); | |
59 | ||
60 | /************************ Governor internals ***********************/ | |
61 | ||
62 | static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time) | |
63 | { | |
64 | s64 delta_ns; | |
65 | ||
66 | if (sg_policy->work_in_progress) | |
67 | return false; | |
68 | ||
69 | if (unlikely(sg_policy->need_freq_update)) { | |
70 | sg_policy->need_freq_update = false; | |
71 | /* | |
72 | * This happens when limits change, so forget the previous | |
73 | * next_freq value and force an update. | |
74 | */ | |
75 | sg_policy->next_freq = UINT_MAX; | |
76 | return true; | |
77 | } | |
78 | ||
79 | delta_ns = time - sg_policy->last_freq_update_time; | |
80 | return delta_ns >= sg_policy->freq_update_delay_ns; | |
81 | } | |
82 | ||
83 | static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time, | |
84 | unsigned int next_freq) | |
85 | { | |
86 | struct cpufreq_policy *policy = sg_policy->policy; | |
87 | ||
88 | sg_policy->last_freq_update_time = time; | |
89 | ||
90 | if (policy->fast_switch_enabled) { | |
91 | if (sg_policy->next_freq == next_freq) { | |
92 | trace_cpu_frequency(policy->cur, smp_processor_id()); | |
93 | return; | |
94 | } | |
95 | sg_policy->next_freq = next_freq; | |
96 | next_freq = cpufreq_driver_fast_switch(policy, next_freq); | |
97 | if (next_freq == CPUFREQ_ENTRY_INVALID) | |
98 | return; | |
99 | ||
100 | policy->cur = next_freq; | |
101 | trace_cpu_frequency(next_freq, smp_processor_id()); | |
102 | } else if (sg_policy->next_freq != next_freq) { | |
103 | sg_policy->next_freq = next_freq; | |
104 | sg_policy->work_in_progress = true; | |
105 | irq_work_queue(&sg_policy->irq_work); | |
106 | } | |
107 | } | |
108 | ||
109 | /** | |
110 | * get_next_freq - Compute a new frequency for a given cpufreq policy. | |
5cbea469 | 111 | * @sg_cpu: schedutil cpu object to compute the new frequency for. |
9bdcb44e RW |
112 | * @util: Current CPU utilization. |
113 | * @max: CPU capacity. | |
114 | * | |
115 | * If the utilization is frequency-invariant, choose the new frequency to be | |
116 | * proportional to it, that is | |
117 | * | |
118 | * next_freq = C * max_freq * util / max | |
119 | * | |
120 | * Otherwise, approximate the would-be frequency-invariant utilization by | |
121 | * util_raw * (curr_freq / max_freq) which leads to | |
122 | * | |
123 | * next_freq = C * curr_freq * util_raw / max | |
124 | * | |
125 | * Take C = 1.25 for the frequency tipping point at (util / max) = 0.8. | |
5cbea469 SM |
126 | * |
127 | * The lowest driver-supported frequency which is equal or greater than the raw | |
128 | * next_freq (as calculated above) is returned, subject to policy min/max and | |
129 | * cpufreq driver limitations. | |
9bdcb44e | 130 | */ |
5cbea469 SM |
131 | static unsigned int get_next_freq(struct sugov_cpu *sg_cpu, unsigned long util, |
132 | unsigned long max) | |
9bdcb44e | 133 | { |
5cbea469 SM |
134 | struct sugov_policy *sg_policy = sg_cpu->sg_policy; |
135 | struct cpufreq_policy *policy = sg_policy->policy; | |
9bdcb44e RW |
136 | unsigned int freq = arch_scale_freq_invariant() ? |
137 | policy->cpuinfo.max_freq : policy->cur; | |
138 | ||
5cbea469 SM |
139 | freq = (freq + (freq >> 2)) * util / max; |
140 | ||
141 | if (freq == sg_cpu->cached_raw_freq && sg_policy->next_freq != UINT_MAX) | |
142 | return sg_policy->next_freq; | |
143 | sg_cpu->cached_raw_freq = freq; | |
144 | return cpufreq_driver_resolve_freq(policy, freq); | |
9bdcb44e RW |
145 | } |
146 | ||
147 | static void sugov_update_single(struct update_util_data *hook, u64 time, | |
148 | unsigned long util, unsigned long max) | |
149 | { | |
150 | struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); | |
151 | struct sugov_policy *sg_policy = sg_cpu->sg_policy; | |
152 | struct cpufreq_policy *policy = sg_policy->policy; | |
153 | unsigned int next_f; | |
154 | ||
155 | if (!sugov_should_update_freq(sg_policy, time)) | |
156 | return; | |
157 | ||
158 | next_f = util == ULONG_MAX ? policy->cpuinfo.max_freq : | |
5cbea469 | 159 | get_next_freq(sg_cpu, util, max); |
9bdcb44e RW |
160 | sugov_update_commit(sg_policy, time, next_f); |
161 | } | |
162 | ||
5cbea469 | 163 | static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, |
9bdcb44e RW |
164 | unsigned long util, unsigned long max) |
165 | { | |
5cbea469 | 166 | struct sugov_policy *sg_policy = sg_cpu->sg_policy; |
9bdcb44e RW |
167 | struct cpufreq_policy *policy = sg_policy->policy; |
168 | unsigned int max_f = policy->cpuinfo.max_freq; | |
169 | u64 last_freq_update_time = sg_policy->last_freq_update_time; | |
170 | unsigned int j; | |
171 | ||
172 | if (util == ULONG_MAX) | |
173 | return max_f; | |
174 | ||
175 | for_each_cpu(j, policy->cpus) { | |
176 | struct sugov_cpu *j_sg_cpu; | |
177 | unsigned long j_util, j_max; | |
178 | s64 delta_ns; | |
179 | ||
180 | if (j == smp_processor_id()) | |
181 | continue; | |
182 | ||
183 | j_sg_cpu = &per_cpu(sugov_cpu, j); | |
184 | /* | |
185 | * If the CPU utilization was last updated before the previous | |
186 | * frequency update and the time elapsed between the last update | |
187 | * of the CPU utilization and the last frequency update is long | |
188 | * enough, don't take the CPU into account as it probably is | |
189 | * idle now. | |
190 | */ | |
191 | delta_ns = last_freq_update_time - j_sg_cpu->last_update; | |
192 | if (delta_ns > TICK_NSEC) | |
193 | continue; | |
194 | ||
195 | j_util = j_sg_cpu->util; | |
196 | if (j_util == ULONG_MAX) | |
197 | return max_f; | |
198 | ||
199 | j_max = j_sg_cpu->max; | |
200 | if (j_util * max > j_max * util) { | |
201 | util = j_util; | |
202 | max = j_max; | |
203 | } | |
204 | } | |
205 | ||
5cbea469 | 206 | return get_next_freq(sg_cpu, util, max); |
9bdcb44e RW |
207 | } |
208 | ||
209 | static void sugov_update_shared(struct update_util_data *hook, u64 time, | |
210 | unsigned long util, unsigned long max) | |
211 | { | |
212 | struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); | |
213 | struct sugov_policy *sg_policy = sg_cpu->sg_policy; | |
214 | unsigned int next_f; | |
215 | ||
216 | raw_spin_lock(&sg_policy->update_lock); | |
217 | ||
218 | sg_cpu->util = util; | |
219 | sg_cpu->max = max; | |
220 | sg_cpu->last_update = time; | |
221 | ||
222 | if (sugov_should_update_freq(sg_policy, time)) { | |
5cbea469 | 223 | next_f = sugov_next_freq_shared(sg_cpu, util, max); |
9bdcb44e RW |
224 | sugov_update_commit(sg_policy, time, next_f); |
225 | } | |
226 | ||
227 | raw_spin_unlock(&sg_policy->update_lock); | |
228 | } | |
229 | ||
230 | static void sugov_work(struct work_struct *work) | |
231 | { | |
232 | struct sugov_policy *sg_policy = container_of(work, struct sugov_policy, work); | |
233 | ||
234 | mutex_lock(&sg_policy->work_lock); | |
235 | __cpufreq_driver_target(sg_policy->policy, sg_policy->next_freq, | |
236 | CPUFREQ_RELATION_L); | |
237 | mutex_unlock(&sg_policy->work_lock); | |
238 | ||
239 | sg_policy->work_in_progress = false; | |
240 | } | |
241 | ||
242 | static void sugov_irq_work(struct irq_work *irq_work) | |
243 | { | |
244 | struct sugov_policy *sg_policy; | |
245 | ||
246 | sg_policy = container_of(irq_work, struct sugov_policy, irq_work); | |
247 | schedule_work_on(smp_processor_id(), &sg_policy->work); | |
248 | } | |
249 | ||
250 | /************************** sysfs interface ************************/ | |
251 | ||
252 | static struct sugov_tunables *global_tunables; | |
253 | static DEFINE_MUTEX(global_tunables_lock); | |
254 | ||
255 | static inline struct sugov_tunables *to_sugov_tunables(struct gov_attr_set *attr_set) | |
256 | { | |
257 | return container_of(attr_set, struct sugov_tunables, attr_set); | |
258 | } | |
259 | ||
260 | static ssize_t rate_limit_us_show(struct gov_attr_set *attr_set, char *buf) | |
261 | { | |
262 | struct sugov_tunables *tunables = to_sugov_tunables(attr_set); | |
263 | ||
264 | return sprintf(buf, "%u\n", tunables->rate_limit_us); | |
265 | } | |
266 | ||
267 | static ssize_t rate_limit_us_store(struct gov_attr_set *attr_set, const char *buf, | |
268 | size_t count) | |
269 | { | |
270 | struct sugov_tunables *tunables = to_sugov_tunables(attr_set); | |
271 | struct sugov_policy *sg_policy; | |
272 | unsigned int rate_limit_us; | |
273 | ||
274 | if (kstrtouint(buf, 10, &rate_limit_us)) | |
275 | return -EINVAL; | |
276 | ||
277 | tunables->rate_limit_us = rate_limit_us; | |
278 | ||
279 | list_for_each_entry(sg_policy, &attr_set->policy_list, tunables_hook) | |
280 | sg_policy->freq_update_delay_ns = rate_limit_us * NSEC_PER_USEC; | |
281 | ||
282 | return count; | |
283 | } | |
284 | ||
285 | static struct governor_attr rate_limit_us = __ATTR_RW(rate_limit_us); | |
286 | ||
287 | static struct attribute *sugov_attributes[] = { | |
288 | &rate_limit_us.attr, | |
289 | NULL | |
290 | }; | |
291 | ||
292 | static struct kobj_type sugov_tunables_ktype = { | |
293 | .default_attrs = sugov_attributes, | |
294 | .sysfs_ops = &governor_sysfs_ops, | |
295 | }; | |
296 | ||
297 | /********************** cpufreq governor interface *********************/ | |
298 | ||
299 | static struct cpufreq_governor schedutil_gov; | |
300 | ||
301 | static struct sugov_policy *sugov_policy_alloc(struct cpufreq_policy *policy) | |
302 | { | |
303 | struct sugov_policy *sg_policy; | |
304 | ||
305 | sg_policy = kzalloc(sizeof(*sg_policy), GFP_KERNEL); | |
306 | if (!sg_policy) | |
307 | return NULL; | |
308 | ||
309 | sg_policy->policy = policy; | |
310 | init_irq_work(&sg_policy->irq_work, sugov_irq_work); | |
311 | INIT_WORK(&sg_policy->work, sugov_work); | |
312 | mutex_init(&sg_policy->work_lock); | |
313 | raw_spin_lock_init(&sg_policy->update_lock); | |
314 | return sg_policy; | |
315 | } | |
316 | ||
317 | static void sugov_policy_free(struct sugov_policy *sg_policy) | |
318 | { | |
319 | mutex_destroy(&sg_policy->work_lock); | |
320 | kfree(sg_policy); | |
321 | } | |
322 | ||
323 | static struct sugov_tunables *sugov_tunables_alloc(struct sugov_policy *sg_policy) | |
324 | { | |
325 | struct sugov_tunables *tunables; | |
326 | ||
327 | tunables = kzalloc(sizeof(*tunables), GFP_KERNEL); | |
328 | if (tunables) { | |
329 | gov_attr_set_init(&tunables->attr_set, &sg_policy->tunables_hook); | |
330 | if (!have_governor_per_policy()) | |
331 | global_tunables = tunables; | |
332 | } | |
333 | return tunables; | |
334 | } | |
335 | ||
336 | static void sugov_tunables_free(struct sugov_tunables *tunables) | |
337 | { | |
338 | if (!have_governor_per_policy()) | |
339 | global_tunables = NULL; | |
340 | ||
341 | kfree(tunables); | |
342 | } | |
343 | ||
344 | static int sugov_init(struct cpufreq_policy *policy) | |
345 | { | |
346 | struct sugov_policy *sg_policy; | |
347 | struct sugov_tunables *tunables; | |
348 | unsigned int lat; | |
349 | int ret = 0; | |
350 | ||
351 | /* State should be equivalent to EXIT */ | |
352 | if (policy->governor_data) | |
353 | return -EBUSY; | |
354 | ||
355 | sg_policy = sugov_policy_alloc(policy); | |
356 | if (!sg_policy) | |
357 | return -ENOMEM; | |
358 | ||
359 | mutex_lock(&global_tunables_lock); | |
360 | ||
361 | if (global_tunables) { | |
362 | if (WARN_ON(have_governor_per_policy())) { | |
363 | ret = -EINVAL; | |
364 | goto free_sg_policy; | |
365 | } | |
366 | policy->governor_data = sg_policy; | |
367 | sg_policy->tunables = global_tunables; | |
368 | ||
369 | gov_attr_set_get(&global_tunables->attr_set, &sg_policy->tunables_hook); | |
370 | goto out; | |
371 | } | |
372 | ||
373 | tunables = sugov_tunables_alloc(sg_policy); | |
374 | if (!tunables) { | |
375 | ret = -ENOMEM; | |
376 | goto free_sg_policy; | |
377 | } | |
378 | ||
379 | tunables->rate_limit_us = LATENCY_MULTIPLIER; | |
380 | lat = policy->cpuinfo.transition_latency / NSEC_PER_USEC; | |
381 | if (lat) | |
382 | tunables->rate_limit_us *= lat; | |
383 | ||
384 | policy->governor_data = sg_policy; | |
385 | sg_policy->tunables = tunables; | |
386 | ||
387 | ret = kobject_init_and_add(&tunables->attr_set.kobj, &sugov_tunables_ktype, | |
388 | get_governor_parent_kobj(policy), "%s", | |
389 | schedutil_gov.name); | |
390 | if (ret) | |
391 | goto fail; | |
392 | ||
393 | out: | |
394 | mutex_unlock(&global_tunables_lock); | |
395 | ||
396 | cpufreq_enable_fast_switch(policy); | |
397 | return 0; | |
398 | ||
399 | fail: | |
400 | policy->governor_data = NULL; | |
401 | sugov_tunables_free(tunables); | |
402 | ||
403 | free_sg_policy: | |
404 | mutex_unlock(&global_tunables_lock); | |
405 | ||
406 | sugov_policy_free(sg_policy); | |
60f05e86 | 407 | pr_err("initialization failed (error %d)\n", ret); |
9bdcb44e RW |
408 | return ret; |
409 | } | |
410 | ||
e788892b | 411 | static void sugov_exit(struct cpufreq_policy *policy) |
9bdcb44e RW |
412 | { |
413 | struct sugov_policy *sg_policy = policy->governor_data; | |
414 | struct sugov_tunables *tunables = sg_policy->tunables; | |
415 | unsigned int count; | |
416 | ||
6c9d9c81 RW |
417 | cpufreq_disable_fast_switch(policy); |
418 | ||
9bdcb44e RW |
419 | mutex_lock(&global_tunables_lock); |
420 | ||
421 | count = gov_attr_set_put(&tunables->attr_set, &sg_policy->tunables_hook); | |
422 | policy->governor_data = NULL; | |
423 | if (!count) | |
424 | sugov_tunables_free(tunables); | |
425 | ||
426 | mutex_unlock(&global_tunables_lock); | |
427 | ||
428 | sugov_policy_free(sg_policy); | |
9bdcb44e RW |
429 | } |
430 | ||
431 | static int sugov_start(struct cpufreq_policy *policy) | |
432 | { | |
433 | struct sugov_policy *sg_policy = policy->governor_data; | |
434 | unsigned int cpu; | |
435 | ||
436 | sg_policy->freq_update_delay_ns = sg_policy->tunables->rate_limit_us * NSEC_PER_USEC; | |
437 | sg_policy->last_freq_update_time = 0; | |
438 | sg_policy->next_freq = UINT_MAX; | |
439 | sg_policy->work_in_progress = false; | |
440 | sg_policy->need_freq_update = false; | |
441 | ||
442 | for_each_cpu(cpu, policy->cpus) { | |
443 | struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu); | |
444 | ||
445 | sg_cpu->sg_policy = sg_policy; | |
446 | if (policy_is_shared(policy)) { | |
447 | sg_cpu->util = ULONG_MAX; | |
448 | sg_cpu->max = 0; | |
449 | sg_cpu->last_update = 0; | |
5cbea469 | 450 | sg_cpu->cached_raw_freq = 0; |
9bdcb44e RW |
451 | cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util, |
452 | sugov_update_shared); | |
453 | } else { | |
454 | cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util, | |
455 | sugov_update_single); | |
456 | } | |
457 | } | |
458 | return 0; | |
459 | } | |
460 | ||
e788892b | 461 | static void sugov_stop(struct cpufreq_policy *policy) |
9bdcb44e RW |
462 | { |
463 | struct sugov_policy *sg_policy = policy->governor_data; | |
464 | unsigned int cpu; | |
465 | ||
466 | for_each_cpu(cpu, policy->cpus) | |
467 | cpufreq_remove_update_util_hook(cpu); | |
468 | ||
469 | synchronize_sched(); | |
470 | ||
471 | irq_work_sync(&sg_policy->irq_work); | |
472 | cancel_work_sync(&sg_policy->work); | |
9bdcb44e RW |
473 | } |
474 | ||
e788892b | 475 | static void sugov_limits(struct cpufreq_policy *policy) |
9bdcb44e RW |
476 | { |
477 | struct sugov_policy *sg_policy = policy->governor_data; | |
478 | ||
479 | if (!policy->fast_switch_enabled) { | |
480 | mutex_lock(&sg_policy->work_lock); | |
bf2be2de | 481 | cpufreq_policy_apply_limits(policy); |
9bdcb44e RW |
482 | mutex_unlock(&sg_policy->work_lock); |
483 | } | |
484 | ||
485 | sg_policy->need_freq_update = true; | |
9bdcb44e RW |
486 | } |
487 | ||
488 | static struct cpufreq_governor schedutil_gov = { | |
489 | .name = "schedutil", | |
9bdcb44e | 490 | .owner = THIS_MODULE, |
e788892b RW |
491 | .init = sugov_init, |
492 | .exit = sugov_exit, | |
493 | .start = sugov_start, | |
494 | .stop = sugov_stop, | |
495 | .limits = sugov_limits, | |
9bdcb44e RW |
496 | }; |
497 | ||
498 | static int __init sugov_module_init(void) | |
499 | { | |
500 | return cpufreq_register_governor(&schedutil_gov); | |
501 | } | |
502 | ||
503 | static void __exit sugov_module_exit(void) | |
504 | { | |
505 | cpufreq_unregister_governor(&schedutil_gov); | |
506 | } | |
507 | ||
508 | MODULE_AUTHOR("Rafael J. Wysocki <rafael.j.wysocki@intel.com>"); | |
509 | MODULE_DESCRIPTION("Utilization-based CPU frequency selection"); | |
510 | MODULE_LICENSE("GPL"); | |
511 | ||
512 | #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL | |
513 | struct cpufreq_governor *cpufreq_default_governor(void) | |
514 | { | |
515 | return &schedutil_gov; | |
516 | } | |
517 | ||
518 | fs_initcall(sugov_module_init); | |
519 | #else | |
520 | module_init(sugov_module_init); | |
521 | #endif | |
522 | module_exit(sugov_module_exit); |