Commit | Line | Data |
---|---|---|
9bdcb44e RW |
1 | /* |
2 | * CPUFreq governor based on scheduler-provided CPU utilization data. | |
3 | * | |
4 | * Copyright (C) 2016, Intel Corporation | |
5 | * Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com> | |
6 | * | |
7 | * This program is free software; you can redistribute it and/or modify | |
8 | * it under the terms of the GNU General Public License version 2 as | |
9 | * published by the Free Software Foundation. | |
10 | */ | |
11 | ||
60f05e86 VK |
12 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
13 | ||
9bdcb44e | 14 | #include <linux/cpufreq.h> |
02a7b1ee | 15 | #include <linux/kthread.h> |
ae7e81c0 | 16 | #include <uapi/linux/sched/types.h> |
9bdcb44e RW |
17 | #include <linux/slab.h> |
18 | #include <trace/events/power.h> | |
19 | ||
20 | #include "sched.h" | |
21 | ||
22 | struct sugov_tunables { | |
23 | struct gov_attr_set attr_set; | |
24 | unsigned int rate_limit_us; | |
25 | }; | |
26 | ||
27 | struct sugov_policy { | |
28 | struct cpufreq_policy *policy; | |
29 | ||
30 | struct sugov_tunables *tunables; | |
31 | struct list_head tunables_hook; | |
32 | ||
33 | raw_spinlock_t update_lock; /* For shared policies */ | |
34 | u64 last_freq_update_time; | |
35 | s64 freq_update_delay_ns; | |
36 | unsigned int next_freq; | |
6c4f0fa6 | 37 | unsigned int cached_raw_freq; |
9bdcb44e RW |
38 | |
39 | /* The next fields are only needed if fast switch cannot be used. */ | |
40 | struct irq_work irq_work; | |
02a7b1ee | 41 | struct kthread_work work; |
9bdcb44e | 42 | struct mutex work_lock; |
02a7b1ee VK |
43 | struct kthread_worker worker; |
44 | struct task_struct *thread; | |
9bdcb44e RW |
45 | bool work_in_progress; |
46 | ||
47 | bool need_freq_update; | |
48 | }; | |
49 | ||
50 | struct sugov_cpu { | |
51 | struct update_util_data update_util; | |
52 | struct sugov_policy *sg_policy; | |
674e7541 | 53 | unsigned int cpu; |
9bdcb44e | 54 | |
a5a0809b | 55 | bool iowait_boost_pending; |
251accf9 JF |
56 | unsigned int iowait_boost; |
57 | unsigned int iowait_boost_max; | |
21ca6d2c | 58 | u64 last_update; |
5cbea469 | 59 | |
9bdcb44e | 60 | /* The fields below are only needed when sharing a policy. */ |
d18be45d JL |
61 | unsigned long util_cfs; |
62 | unsigned long util_dl; | |
9bdcb44e | 63 | unsigned long max; |
58919e83 | 64 | unsigned int flags; |
b7eaf1aa RW |
65 | |
66 | /* The field below is for single-CPU policies only. */ | |
67 | #ifdef CONFIG_NO_HZ_COMMON | |
68 | unsigned long saved_idle_calls; | |
69 | #endif | |
9bdcb44e RW |
70 | }; |
71 | ||
72 | static DEFINE_PER_CPU(struct sugov_cpu, sugov_cpu); | |
73 | ||
74 | /************************ Governor internals ***********************/ | |
75 | ||
76 | static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time) | |
77 | { | |
78 | s64 delta_ns; | |
79 | ||
674e7541 VK |
80 | /* |
81 | * Since cpufreq_update_util() is called with rq->lock held for | |
82 | * the @target_cpu, our per-cpu data is fully serialized. | |
83 | * | |
84 | * However, drivers cannot in general deal with cross-cpu | |
85 | * requests, so while get_next_freq() will work, our | |
c49cbc19 | 86 | * sugov_update_commit() call may not for the fast switching platforms. |
674e7541 VK |
87 | * |
88 | * Hence stop here for remote requests if they aren't supported | |
89 | * by the hardware, as calculating the frequency is pointless if | |
90 | * we cannot in fact act on it. | |
c49cbc19 VK |
91 | * |
92 | * For the slow switching platforms, the kthread is always scheduled on | |
93 | * the right set of CPUs and any CPU can find the next frequency and | |
94 | * schedule the kthread. | |
674e7541 | 95 | */ |
c49cbc19 VK |
96 | if (sg_policy->policy->fast_switch_enabled && |
97 | !cpufreq_can_do_remote_dvfs(sg_policy->policy)) | |
674e7541 VK |
98 | return false; |
99 | ||
9bdcb44e RW |
100 | if (sg_policy->work_in_progress) |
101 | return false; | |
102 | ||
103 | if (unlikely(sg_policy->need_freq_update)) { | |
104 | sg_policy->need_freq_update = false; | |
105 | /* | |
106 | * This happens when limits change, so forget the previous | |
107 | * next_freq value and force an update. | |
108 | */ | |
109 | sg_policy->next_freq = UINT_MAX; | |
110 | return true; | |
111 | } | |
112 | ||
113 | delta_ns = time - sg_policy->last_freq_update_time; | |
114 | return delta_ns >= sg_policy->freq_update_delay_ns; | |
115 | } | |
116 | ||
117 | static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time, | |
118 | unsigned int next_freq) | |
119 | { | |
120 | struct cpufreq_policy *policy = sg_policy->policy; | |
121 | ||
38d4ea22 RW |
122 | if (sg_policy->next_freq == next_freq) |
123 | return; | |
124 | ||
125 | sg_policy->next_freq = next_freq; | |
9bdcb44e RW |
126 | sg_policy->last_freq_update_time = time; |
127 | ||
128 | if (policy->fast_switch_enabled) { | |
9bdcb44e | 129 | next_freq = cpufreq_driver_fast_switch(policy, next_freq); |
209887e6 | 130 | if (!next_freq) |
9bdcb44e RW |
131 | return; |
132 | ||
133 | policy->cur = next_freq; | |
134 | trace_cpu_frequency(next_freq, smp_processor_id()); | |
38d4ea22 | 135 | } else { |
9bdcb44e RW |
136 | sg_policy->work_in_progress = true; |
137 | irq_work_queue(&sg_policy->irq_work); | |
138 | } | |
139 | } | |
140 | ||
141 | /** | |
142 | * get_next_freq - Compute a new frequency for a given cpufreq policy. | |
655cb1eb | 143 | * @sg_policy: schedutil policy object to compute the new frequency for. |
9bdcb44e RW |
144 | * @util: Current CPU utilization. |
145 | * @max: CPU capacity. | |
146 | * | |
147 | * If the utilization is frequency-invariant, choose the new frequency to be | |
148 | * proportional to it, that is | |
149 | * | |
150 | * next_freq = C * max_freq * util / max | |
151 | * | |
152 | * Otherwise, approximate the would-be frequency-invariant utilization by | |
153 | * util_raw * (curr_freq / max_freq) which leads to | |
154 | * | |
155 | * next_freq = C * curr_freq * util_raw / max | |
156 | * | |
157 | * Take C = 1.25 for the frequency tipping point at (util / max) = 0.8. | |
5cbea469 SM |
158 | * |
159 | * The lowest driver-supported frequency which is equal or greater than the raw | |
160 | * next_freq (as calculated above) is returned, subject to policy min/max and | |
161 | * cpufreq driver limitations. | |
9bdcb44e | 162 | */ |
655cb1eb VK |
163 | static unsigned int get_next_freq(struct sugov_policy *sg_policy, |
164 | unsigned long util, unsigned long max) | |
9bdcb44e | 165 | { |
5cbea469 | 166 | struct cpufreq_policy *policy = sg_policy->policy; |
9bdcb44e RW |
167 | unsigned int freq = arch_scale_freq_invariant() ? |
168 | policy->cpuinfo.max_freq : policy->cur; | |
169 | ||
5cbea469 SM |
170 | freq = (freq + (freq >> 2)) * util / max; |
171 | ||
6c4f0fa6 | 172 | if (freq == sg_policy->cached_raw_freq && sg_policy->next_freq != UINT_MAX) |
5cbea469 | 173 | return sg_policy->next_freq; |
6c4f0fa6 | 174 | sg_policy->cached_raw_freq = freq; |
5cbea469 | 175 | return cpufreq_driver_resolve_freq(policy, freq); |
9bdcb44e RW |
176 | } |
177 | ||
d18be45d | 178 | static void sugov_get_util(struct sugov_cpu *sg_cpu) |
58919e83 | 179 | { |
d18be45d | 180 | struct rq *rq = cpu_rq(sg_cpu->cpu); |
8314bc83 | 181 | |
d18be45d JL |
182 | sg_cpu->max = arch_scale_cpu_capacity(NULL, sg_cpu->cpu); |
183 | sg_cpu->util_cfs = cpu_util_cfs(rq); | |
184 | sg_cpu->util_dl = cpu_util_dl(rq); | |
185 | } | |
58919e83 | 186 | |
d18be45d JL |
187 | static unsigned long sugov_aggregate_util(struct sugov_cpu *sg_cpu) |
188 | { | |
d4edd662 JL |
189 | /* |
190 | * Ideally we would like to set util_dl as min/guaranteed freq and | |
191 | * util_cfs + util_dl as requested freq. However, cpufreq is not yet | |
192 | * ready for such an interface. So, we only do the latter for now. | |
193 | */ | |
d18be45d | 194 | return min(sg_cpu->util_cfs + sg_cpu->util_dl, sg_cpu->max); |
58919e83 RW |
195 | } |
196 | ||
5083452f | 197 | static void sugov_set_iowait_boost(struct sugov_cpu *sg_cpu, u64 time) |
21ca6d2c | 198 | { |
5083452f | 199 | if (sg_cpu->flags & SCHED_CPUFREQ_IOWAIT) { |
a5a0809b JF |
200 | if (sg_cpu->iowait_boost_pending) |
201 | return; | |
202 | ||
203 | sg_cpu->iowait_boost_pending = true; | |
204 | ||
205 | if (sg_cpu->iowait_boost) { | |
206 | sg_cpu->iowait_boost <<= 1; | |
207 | if (sg_cpu->iowait_boost > sg_cpu->iowait_boost_max) | |
208 | sg_cpu->iowait_boost = sg_cpu->iowait_boost_max; | |
209 | } else { | |
210 | sg_cpu->iowait_boost = sg_cpu->sg_policy->policy->min; | |
211 | } | |
21ca6d2c RW |
212 | } else if (sg_cpu->iowait_boost) { |
213 | s64 delta_ns = time - sg_cpu->last_update; | |
214 | ||
215 | /* Clear iowait_boost if the CPU apprears to have been idle. */ | |
a5a0809b | 216 | if (delta_ns > TICK_NSEC) { |
21ca6d2c | 217 | sg_cpu->iowait_boost = 0; |
a5a0809b JF |
218 | sg_cpu->iowait_boost_pending = false; |
219 | } | |
21ca6d2c RW |
220 | } |
221 | } | |
222 | ||
223 | static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, unsigned long *util, | |
224 | unsigned long *max) | |
225 | { | |
251accf9 | 226 | unsigned int boost_util, boost_max; |
21ca6d2c | 227 | |
a5a0809b | 228 | if (!sg_cpu->iowait_boost) |
21ca6d2c RW |
229 | return; |
230 | ||
a5a0809b JF |
231 | if (sg_cpu->iowait_boost_pending) { |
232 | sg_cpu->iowait_boost_pending = false; | |
233 | } else { | |
234 | sg_cpu->iowait_boost >>= 1; | |
235 | if (sg_cpu->iowait_boost < sg_cpu->sg_policy->policy->min) { | |
236 | sg_cpu->iowait_boost = 0; | |
237 | return; | |
238 | } | |
239 | } | |
240 | ||
241 | boost_util = sg_cpu->iowait_boost; | |
242 | boost_max = sg_cpu->iowait_boost_max; | |
243 | ||
21ca6d2c RW |
244 | if (*util * boost_max < *max * boost_util) { |
245 | *util = boost_util; | |
246 | *max = boost_max; | |
247 | } | |
21ca6d2c RW |
248 | } |
249 | ||
b7eaf1aa RW |
250 | #ifdef CONFIG_NO_HZ_COMMON |
251 | static bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) | |
252 | { | |
466a2b42 | 253 | unsigned long idle_calls = tick_nohz_get_idle_calls_cpu(sg_cpu->cpu); |
b7eaf1aa RW |
254 | bool ret = idle_calls == sg_cpu->saved_idle_calls; |
255 | ||
256 | sg_cpu->saved_idle_calls = idle_calls; | |
257 | return ret; | |
258 | } | |
259 | #else | |
260 | static inline bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) { return false; } | |
261 | #endif /* CONFIG_NO_HZ_COMMON */ | |
262 | ||
9bdcb44e | 263 | static void sugov_update_single(struct update_util_data *hook, u64 time, |
58919e83 | 264 | unsigned int flags) |
9bdcb44e RW |
265 | { |
266 | struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); | |
267 | struct sugov_policy *sg_policy = sg_cpu->sg_policy; | |
268 | struct cpufreq_policy *policy = sg_policy->policy; | |
58919e83 | 269 | unsigned long util, max; |
9bdcb44e | 270 | unsigned int next_f; |
b7eaf1aa | 271 | bool busy; |
9bdcb44e | 272 | |
5083452f | 273 | sugov_set_iowait_boost(sg_cpu, time); |
21ca6d2c RW |
274 | sg_cpu->last_update = time; |
275 | ||
9bdcb44e RW |
276 | if (!sugov_should_update_freq(sg_policy, time)) |
277 | return; | |
278 | ||
b7eaf1aa RW |
279 | busy = sugov_cpu_is_busy(sg_cpu); |
280 | ||
d4edd662 | 281 | if (flags & SCHED_CPUFREQ_RT) { |
58919e83 RW |
282 | next_f = policy->cpuinfo.max_freq; |
283 | } else { | |
d18be45d JL |
284 | sugov_get_util(sg_cpu); |
285 | max = sg_cpu->max; | |
286 | util = sugov_aggregate_util(sg_cpu); | |
21ca6d2c | 287 | sugov_iowait_boost(sg_cpu, &util, &max); |
655cb1eb | 288 | next_f = get_next_freq(sg_policy, util, max); |
b7eaf1aa RW |
289 | /* |
290 | * Do not reduce the frequency if the CPU has not been idle | |
291 | * recently, as the reduction is likely to be premature then. | |
292 | */ | |
07458f6a | 293 | if (busy && next_f < sg_policy->next_freq) { |
b7eaf1aa | 294 | next_f = sg_policy->next_freq; |
07458f6a VK |
295 | |
296 | /* Reset cached freq as next_freq has changed */ | |
297 | sg_policy->cached_raw_freq = 0; | |
298 | } | |
58919e83 | 299 | } |
9bdcb44e RW |
300 | sugov_update_commit(sg_policy, time, next_f); |
301 | } | |
302 | ||
d86ab9cf | 303 | static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time) |
9bdcb44e | 304 | { |
5cbea469 | 305 | struct sugov_policy *sg_policy = sg_cpu->sg_policy; |
9bdcb44e | 306 | struct cpufreq_policy *policy = sg_policy->policy; |
cba1dfb5 | 307 | unsigned long util = 0, max = 1; |
9bdcb44e RW |
308 | unsigned int j; |
309 | ||
9bdcb44e | 310 | for_each_cpu(j, policy->cpus) { |
cba1dfb5 | 311 | struct sugov_cpu *j_sg_cpu = &per_cpu(sugov_cpu, j); |
9bdcb44e RW |
312 | unsigned long j_util, j_max; |
313 | s64 delta_ns; | |
314 | ||
9bdcb44e | 315 | /* |
0fa7d181 JL |
316 | * If the CFS CPU utilization was last updated before the |
317 | * previous frequency update and the time elapsed between the | |
318 | * last update of the CPU utilization and the last frequency | |
319 | * update is long enough, reset iowait_boost and util_cfs, as | |
320 | * they are now probably stale. However, still consider the | |
321 | * CPU contribution if it has some DEADLINE utilization | |
322 | * (util_dl). | |
9bdcb44e | 323 | */ |
d86ab9cf | 324 | delta_ns = time - j_sg_cpu->last_update; |
21ca6d2c RW |
325 | if (delta_ns > TICK_NSEC) { |
326 | j_sg_cpu->iowait_boost = 0; | |
a5a0809b | 327 | j_sg_cpu->iowait_boost_pending = false; |
0fa7d181 JL |
328 | j_sg_cpu->util_cfs = 0; |
329 | if (j_sg_cpu->util_dl == 0) | |
330 | continue; | |
21ca6d2c | 331 | } |
d4edd662 | 332 | if (j_sg_cpu->flags & SCHED_CPUFREQ_RT) |
cba1dfb5 | 333 | return policy->cpuinfo.max_freq; |
9bdcb44e RW |
334 | |
335 | j_max = j_sg_cpu->max; | |
d18be45d | 336 | j_util = sugov_aggregate_util(j_sg_cpu); |
9bdcb44e RW |
337 | if (j_util * max > j_max * util) { |
338 | util = j_util; | |
339 | max = j_max; | |
340 | } | |
21ca6d2c RW |
341 | |
342 | sugov_iowait_boost(j_sg_cpu, &util, &max); | |
9bdcb44e RW |
343 | } |
344 | ||
655cb1eb | 345 | return get_next_freq(sg_policy, util, max); |
9bdcb44e RW |
346 | } |
347 | ||
348 | static void sugov_update_shared(struct update_util_data *hook, u64 time, | |
58919e83 | 349 | unsigned int flags) |
9bdcb44e RW |
350 | { |
351 | struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); | |
352 | struct sugov_policy *sg_policy = sg_cpu->sg_policy; | |
353 | unsigned int next_f; | |
354 | ||
355 | raw_spin_lock(&sg_policy->update_lock); | |
356 | ||
d18be45d | 357 | sugov_get_util(sg_cpu); |
58919e83 | 358 | sg_cpu->flags = flags; |
21ca6d2c | 359 | |
5083452f | 360 | sugov_set_iowait_boost(sg_cpu, time); |
9bdcb44e RW |
361 | sg_cpu->last_update = time; |
362 | ||
363 | if (sugov_should_update_freq(sg_policy, time)) { | |
d4edd662 | 364 | if (flags & SCHED_CPUFREQ_RT) |
cba1dfb5 VK |
365 | next_f = sg_policy->policy->cpuinfo.max_freq; |
366 | else | |
d86ab9cf | 367 | next_f = sugov_next_freq_shared(sg_cpu, time); |
cba1dfb5 | 368 | |
9bdcb44e RW |
369 | sugov_update_commit(sg_policy, time, next_f); |
370 | } | |
371 | ||
372 | raw_spin_unlock(&sg_policy->update_lock); | |
373 | } | |
374 | ||
02a7b1ee | 375 | static void sugov_work(struct kthread_work *work) |
9bdcb44e RW |
376 | { |
377 | struct sugov_policy *sg_policy = container_of(work, struct sugov_policy, work); | |
378 | ||
379 | mutex_lock(&sg_policy->work_lock); | |
380 | __cpufreq_driver_target(sg_policy->policy, sg_policy->next_freq, | |
381 | CPUFREQ_RELATION_L); | |
382 | mutex_unlock(&sg_policy->work_lock); | |
383 | ||
384 | sg_policy->work_in_progress = false; | |
385 | } | |
386 | ||
387 | static void sugov_irq_work(struct irq_work *irq_work) | |
388 | { | |
389 | struct sugov_policy *sg_policy; | |
390 | ||
391 | sg_policy = container_of(irq_work, struct sugov_policy, irq_work); | |
02a7b1ee VK |
392 | |
393 | /* | |
d4edd662 JL |
394 | * For RT tasks, the schedutil governor shoots the frequency to maximum. |
395 | * Special care must be taken to ensure that this kthread doesn't result | |
396 | * in the same behavior. | |
02a7b1ee VK |
397 | * |
398 | * This is (mostly) guaranteed by the work_in_progress flag. The flag is | |
d06e622d VK |
399 | * updated only at the end of the sugov_work() function and before that |
400 | * the schedutil governor rejects all other frequency scaling requests. | |
02a7b1ee | 401 | * |
d06e622d | 402 | * There is a very rare case though, where the RT thread yields right |
02a7b1ee VK |
403 | * after the work_in_progress flag is cleared. The effects of that are |
404 | * neglected for now. | |
405 | */ | |
406 | kthread_queue_work(&sg_policy->worker, &sg_policy->work); | |
9bdcb44e RW |
407 | } |
408 | ||
409 | /************************** sysfs interface ************************/ | |
410 | ||
411 | static struct sugov_tunables *global_tunables; | |
412 | static DEFINE_MUTEX(global_tunables_lock); | |
413 | ||
414 | static inline struct sugov_tunables *to_sugov_tunables(struct gov_attr_set *attr_set) | |
415 | { | |
416 | return container_of(attr_set, struct sugov_tunables, attr_set); | |
417 | } | |
418 | ||
419 | static ssize_t rate_limit_us_show(struct gov_attr_set *attr_set, char *buf) | |
420 | { | |
421 | struct sugov_tunables *tunables = to_sugov_tunables(attr_set); | |
422 | ||
423 | return sprintf(buf, "%u\n", tunables->rate_limit_us); | |
424 | } | |
425 | ||
426 | static ssize_t rate_limit_us_store(struct gov_attr_set *attr_set, const char *buf, | |
427 | size_t count) | |
428 | { | |
429 | struct sugov_tunables *tunables = to_sugov_tunables(attr_set); | |
430 | struct sugov_policy *sg_policy; | |
431 | unsigned int rate_limit_us; | |
432 | ||
433 | if (kstrtouint(buf, 10, &rate_limit_us)) | |
434 | return -EINVAL; | |
435 | ||
436 | tunables->rate_limit_us = rate_limit_us; | |
437 | ||
438 | list_for_each_entry(sg_policy, &attr_set->policy_list, tunables_hook) | |
439 | sg_policy->freq_update_delay_ns = rate_limit_us * NSEC_PER_USEC; | |
440 | ||
441 | return count; | |
442 | } | |
443 | ||
444 | static struct governor_attr rate_limit_us = __ATTR_RW(rate_limit_us); | |
445 | ||
446 | static struct attribute *sugov_attributes[] = { | |
447 | &rate_limit_us.attr, | |
448 | NULL | |
449 | }; | |
450 | ||
451 | static struct kobj_type sugov_tunables_ktype = { | |
452 | .default_attrs = sugov_attributes, | |
453 | .sysfs_ops = &governor_sysfs_ops, | |
454 | }; | |
455 | ||
456 | /********************** cpufreq governor interface *********************/ | |
457 | ||
458 | static struct cpufreq_governor schedutil_gov; | |
459 | ||
460 | static struct sugov_policy *sugov_policy_alloc(struct cpufreq_policy *policy) | |
461 | { | |
462 | struct sugov_policy *sg_policy; | |
463 | ||
464 | sg_policy = kzalloc(sizeof(*sg_policy), GFP_KERNEL); | |
465 | if (!sg_policy) | |
466 | return NULL; | |
467 | ||
468 | sg_policy->policy = policy; | |
9bdcb44e RW |
469 | raw_spin_lock_init(&sg_policy->update_lock); |
470 | return sg_policy; | |
471 | } | |
472 | ||
473 | static void sugov_policy_free(struct sugov_policy *sg_policy) | |
474 | { | |
9bdcb44e RW |
475 | kfree(sg_policy); |
476 | } | |
477 | ||
02a7b1ee VK |
478 | static int sugov_kthread_create(struct sugov_policy *sg_policy) |
479 | { | |
480 | struct task_struct *thread; | |
794a56eb JL |
481 | struct sched_attr attr = { |
482 | .size = sizeof(struct sched_attr), | |
483 | .sched_policy = SCHED_DEADLINE, | |
484 | .sched_flags = SCHED_FLAG_SUGOV, | |
485 | .sched_nice = 0, | |
486 | .sched_priority = 0, | |
487 | /* | |
488 | * Fake (unused) bandwidth; workaround to "fix" | |
489 | * priority inheritance. | |
490 | */ | |
491 | .sched_runtime = 1000000, | |
492 | .sched_deadline = 10000000, | |
493 | .sched_period = 10000000, | |
494 | }; | |
02a7b1ee VK |
495 | struct cpufreq_policy *policy = sg_policy->policy; |
496 | int ret; | |
497 | ||
498 | /* kthread only required for slow path */ | |
499 | if (policy->fast_switch_enabled) | |
500 | return 0; | |
501 | ||
502 | kthread_init_work(&sg_policy->work, sugov_work); | |
503 | kthread_init_worker(&sg_policy->worker); | |
504 | thread = kthread_create(kthread_worker_fn, &sg_policy->worker, | |
505 | "sugov:%d", | |
506 | cpumask_first(policy->related_cpus)); | |
507 | if (IS_ERR(thread)) { | |
508 | pr_err("failed to create sugov thread: %ld\n", PTR_ERR(thread)); | |
509 | return PTR_ERR(thread); | |
510 | } | |
511 | ||
794a56eb | 512 | ret = sched_setattr_nocheck(thread, &attr); |
02a7b1ee VK |
513 | if (ret) { |
514 | kthread_stop(thread); | |
794a56eb | 515 | pr_warn("%s: failed to set SCHED_DEADLINE\n", __func__); |
02a7b1ee VK |
516 | return ret; |
517 | } | |
518 | ||
519 | sg_policy->thread = thread; | |
e2cabe48 VK |
520 | |
521 | /* Kthread is bound to all CPUs by default */ | |
522 | if (!policy->dvfs_possible_from_any_cpu) | |
523 | kthread_bind_mask(thread, policy->related_cpus); | |
524 | ||
21ef5729 VK |
525 | init_irq_work(&sg_policy->irq_work, sugov_irq_work); |
526 | mutex_init(&sg_policy->work_lock); | |
527 | ||
02a7b1ee VK |
528 | wake_up_process(thread); |
529 | ||
530 | return 0; | |
531 | } | |
532 | ||
533 | static void sugov_kthread_stop(struct sugov_policy *sg_policy) | |
534 | { | |
535 | /* kthread only required for slow path */ | |
536 | if (sg_policy->policy->fast_switch_enabled) | |
537 | return; | |
538 | ||
539 | kthread_flush_worker(&sg_policy->worker); | |
540 | kthread_stop(sg_policy->thread); | |
21ef5729 | 541 | mutex_destroy(&sg_policy->work_lock); |
02a7b1ee VK |
542 | } |
543 | ||
9bdcb44e RW |
544 | static struct sugov_tunables *sugov_tunables_alloc(struct sugov_policy *sg_policy) |
545 | { | |
546 | struct sugov_tunables *tunables; | |
547 | ||
548 | tunables = kzalloc(sizeof(*tunables), GFP_KERNEL); | |
549 | if (tunables) { | |
550 | gov_attr_set_init(&tunables->attr_set, &sg_policy->tunables_hook); | |
551 | if (!have_governor_per_policy()) | |
552 | global_tunables = tunables; | |
553 | } | |
554 | return tunables; | |
555 | } | |
556 | ||
557 | static void sugov_tunables_free(struct sugov_tunables *tunables) | |
558 | { | |
559 | if (!have_governor_per_policy()) | |
560 | global_tunables = NULL; | |
561 | ||
562 | kfree(tunables); | |
563 | } | |
564 | ||
565 | static int sugov_init(struct cpufreq_policy *policy) | |
566 | { | |
567 | struct sugov_policy *sg_policy; | |
568 | struct sugov_tunables *tunables; | |
9bdcb44e RW |
569 | int ret = 0; |
570 | ||
571 | /* State should be equivalent to EXIT */ | |
572 | if (policy->governor_data) | |
573 | return -EBUSY; | |
574 | ||
4a71ce43 VK |
575 | cpufreq_enable_fast_switch(policy); |
576 | ||
9bdcb44e | 577 | sg_policy = sugov_policy_alloc(policy); |
4a71ce43 VK |
578 | if (!sg_policy) { |
579 | ret = -ENOMEM; | |
580 | goto disable_fast_switch; | |
581 | } | |
9bdcb44e | 582 | |
02a7b1ee VK |
583 | ret = sugov_kthread_create(sg_policy); |
584 | if (ret) | |
585 | goto free_sg_policy; | |
586 | ||
9bdcb44e RW |
587 | mutex_lock(&global_tunables_lock); |
588 | ||
589 | if (global_tunables) { | |
590 | if (WARN_ON(have_governor_per_policy())) { | |
591 | ret = -EINVAL; | |
02a7b1ee | 592 | goto stop_kthread; |
9bdcb44e RW |
593 | } |
594 | policy->governor_data = sg_policy; | |
595 | sg_policy->tunables = global_tunables; | |
596 | ||
597 | gov_attr_set_get(&global_tunables->attr_set, &sg_policy->tunables_hook); | |
598 | goto out; | |
599 | } | |
600 | ||
601 | tunables = sugov_tunables_alloc(sg_policy); | |
602 | if (!tunables) { | |
603 | ret = -ENOMEM; | |
02a7b1ee | 604 | goto stop_kthread; |
9bdcb44e RW |
605 | } |
606 | ||
aa7519af | 607 | tunables->rate_limit_us = cpufreq_policy_transition_delay_us(policy); |
9bdcb44e RW |
608 | |
609 | policy->governor_data = sg_policy; | |
610 | sg_policy->tunables = tunables; | |
611 | ||
612 | ret = kobject_init_and_add(&tunables->attr_set.kobj, &sugov_tunables_ktype, | |
613 | get_governor_parent_kobj(policy), "%s", | |
614 | schedutil_gov.name); | |
615 | if (ret) | |
616 | goto fail; | |
617 | ||
8e2ddb03 | 618 | out: |
9bdcb44e | 619 | mutex_unlock(&global_tunables_lock); |
9bdcb44e RW |
620 | return 0; |
621 | ||
8e2ddb03 | 622 | fail: |
9bdcb44e RW |
623 | policy->governor_data = NULL; |
624 | sugov_tunables_free(tunables); | |
625 | ||
02a7b1ee VK |
626 | stop_kthread: |
627 | sugov_kthread_stop(sg_policy); | |
628 | ||
8e2ddb03 | 629 | free_sg_policy: |
9bdcb44e RW |
630 | mutex_unlock(&global_tunables_lock); |
631 | ||
632 | sugov_policy_free(sg_policy); | |
4a71ce43 VK |
633 | |
634 | disable_fast_switch: | |
635 | cpufreq_disable_fast_switch(policy); | |
636 | ||
60f05e86 | 637 | pr_err("initialization failed (error %d)\n", ret); |
9bdcb44e RW |
638 | return ret; |
639 | } | |
640 | ||
e788892b | 641 | static void sugov_exit(struct cpufreq_policy *policy) |
9bdcb44e RW |
642 | { |
643 | struct sugov_policy *sg_policy = policy->governor_data; | |
644 | struct sugov_tunables *tunables = sg_policy->tunables; | |
645 | unsigned int count; | |
646 | ||
647 | mutex_lock(&global_tunables_lock); | |
648 | ||
649 | count = gov_attr_set_put(&tunables->attr_set, &sg_policy->tunables_hook); | |
650 | policy->governor_data = NULL; | |
651 | if (!count) | |
652 | sugov_tunables_free(tunables); | |
653 | ||
654 | mutex_unlock(&global_tunables_lock); | |
655 | ||
02a7b1ee | 656 | sugov_kthread_stop(sg_policy); |
9bdcb44e | 657 | sugov_policy_free(sg_policy); |
4a71ce43 | 658 | cpufreq_disable_fast_switch(policy); |
9bdcb44e RW |
659 | } |
660 | ||
661 | static int sugov_start(struct cpufreq_policy *policy) | |
662 | { | |
663 | struct sugov_policy *sg_policy = policy->governor_data; | |
664 | unsigned int cpu; | |
665 | ||
666 | sg_policy->freq_update_delay_ns = sg_policy->tunables->rate_limit_us * NSEC_PER_USEC; | |
667 | sg_policy->last_freq_update_time = 0; | |
668 | sg_policy->next_freq = UINT_MAX; | |
669 | sg_policy->work_in_progress = false; | |
670 | sg_policy->need_freq_update = false; | |
6c4f0fa6 | 671 | sg_policy->cached_raw_freq = 0; |
9bdcb44e RW |
672 | |
673 | for_each_cpu(cpu, policy->cpus) { | |
674 | struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu); | |
675 | ||
4296f23e | 676 | memset(sg_cpu, 0, sizeof(*sg_cpu)); |
d62d813c | 677 | sg_cpu->cpu = cpu; |
9bdcb44e | 678 | sg_cpu->sg_policy = sg_policy; |
6257e704 | 679 | sg_cpu->flags = 0; |
4296f23e | 680 | sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq; |
ab2f7cf1 VM |
681 | } |
682 | ||
683 | for_each_cpu(cpu, policy->cpus) { | |
684 | struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu); | |
685 | ||
4296f23e RW |
686 | cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util, |
687 | policy_is_shared(policy) ? | |
688 | sugov_update_shared : | |
689 | sugov_update_single); | |
9bdcb44e RW |
690 | } |
691 | return 0; | |
692 | } | |
693 | ||
e788892b | 694 | static void sugov_stop(struct cpufreq_policy *policy) |
9bdcb44e RW |
695 | { |
696 | struct sugov_policy *sg_policy = policy->governor_data; | |
697 | unsigned int cpu; | |
698 | ||
699 | for_each_cpu(cpu, policy->cpus) | |
700 | cpufreq_remove_update_util_hook(cpu); | |
701 | ||
702 | synchronize_sched(); | |
703 | ||
21ef5729 VK |
704 | if (!policy->fast_switch_enabled) { |
705 | irq_work_sync(&sg_policy->irq_work); | |
706 | kthread_cancel_work_sync(&sg_policy->work); | |
707 | } | |
9bdcb44e RW |
708 | } |
709 | ||
e788892b | 710 | static void sugov_limits(struct cpufreq_policy *policy) |
9bdcb44e RW |
711 | { |
712 | struct sugov_policy *sg_policy = policy->governor_data; | |
713 | ||
714 | if (!policy->fast_switch_enabled) { | |
715 | mutex_lock(&sg_policy->work_lock); | |
bf2be2de | 716 | cpufreq_policy_apply_limits(policy); |
9bdcb44e RW |
717 | mutex_unlock(&sg_policy->work_lock); |
718 | } | |
719 | ||
720 | sg_policy->need_freq_update = true; | |
9bdcb44e RW |
721 | } |
722 | ||
723 | static struct cpufreq_governor schedutil_gov = { | |
724 | .name = "schedutil", | |
9bdcb44e | 725 | .owner = THIS_MODULE, |
560c6e45 | 726 | .dynamic_switching = true, |
e788892b RW |
727 | .init = sugov_init, |
728 | .exit = sugov_exit, | |
729 | .start = sugov_start, | |
730 | .stop = sugov_stop, | |
731 | .limits = sugov_limits, | |
9bdcb44e RW |
732 | }; |
733 | ||
9bdcb44e RW |
734 | #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL |
735 | struct cpufreq_governor *cpufreq_default_governor(void) | |
736 | { | |
737 | return &schedutil_gov; | |
738 | } | |
9bdcb44e | 739 | #endif |
58919e83 RW |
740 | |
741 | static int __init sugov_register(void) | |
742 | { | |
743 | return cpufreq_register_governor(&schedutil_gov); | |
744 | } | |
745 | fs_initcall(sugov_register); |