Commit | Line | Data |
---|---|---|
9bdcb44e RW |
1 | /* |
2 | * CPUFreq governor based on scheduler-provided CPU utilization data. | |
3 | * | |
4 | * Copyright (C) 2016, Intel Corporation | |
5 | * Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com> | |
6 | * | |
7 | * This program is free software; you can redistribute it and/or modify | |
8 | * it under the terms of the GNU General Public License version 2 as | |
9 | * published by the Free Software Foundation. | |
10 | */ | |
11 | ||
60f05e86 VK |
12 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
13 | ||
9bdcb44e RW |
14 | #include "sched.h" |
15 | ||
325ea10c IM |
16 | #include <trace/events/power.h> |
17 | ||
9bdcb44e | 18 | struct sugov_tunables { |
97fb7a0a IM |
19 | struct gov_attr_set attr_set; |
20 | unsigned int rate_limit_us; | |
9bdcb44e RW |
21 | }; |
22 | ||
23 | struct sugov_policy { | |
97fb7a0a IM |
24 | struct cpufreq_policy *policy; |
25 | ||
26 | struct sugov_tunables *tunables; | |
27 | struct list_head tunables_hook; | |
28 | ||
29 | raw_spinlock_t update_lock; /* For shared policies */ | |
30 | u64 last_freq_update_time; | |
31 | s64 freq_update_delay_ns; | |
32 | unsigned int next_freq; | |
33 | unsigned int cached_raw_freq; | |
34 | ||
35 | /* The next fields are only needed if fast switch cannot be used: */ | |
36 | struct irq_work irq_work; | |
37 | struct kthread_work work; | |
38 | struct mutex work_lock; | |
39 | struct kthread_worker worker; | |
40 | struct task_struct *thread; | |
41 | bool work_in_progress; | |
42 | ||
43 | bool need_freq_update; | |
9bdcb44e RW |
44 | }; |
45 | ||
46 | struct sugov_cpu { | |
97fb7a0a IM |
47 | struct update_util_data update_util; |
48 | struct sugov_policy *sg_policy; | |
49 | unsigned int cpu; | |
9bdcb44e | 50 | |
97fb7a0a IM |
51 | bool iowait_boost_pending; |
52 | unsigned int iowait_boost; | |
53 | unsigned int iowait_boost_max; | |
21ca6d2c | 54 | u64 last_update; |
5cbea469 | 55 | |
97fb7a0a IM |
56 | /* The fields below are only needed when sharing a policy: */ |
57 | unsigned long util_cfs; | |
58 | unsigned long util_dl; | |
59 | unsigned long max; | |
b7eaf1aa | 60 | |
97fb7a0a | 61 | /* The field below is for single-CPU policies only: */ |
b7eaf1aa | 62 | #ifdef CONFIG_NO_HZ_COMMON |
97fb7a0a | 63 | unsigned long saved_idle_calls; |
b7eaf1aa | 64 | #endif |
9bdcb44e RW |
65 | }; |
66 | ||
67 | static DEFINE_PER_CPU(struct sugov_cpu, sugov_cpu); | |
68 | ||
69 | /************************ Governor internals ***********************/ | |
70 | ||
71 | static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time) | |
72 | { | |
73 | s64 delta_ns; | |
74 | ||
674e7541 VK |
75 | /* |
76 | * Since cpufreq_update_util() is called with rq->lock held for | |
97fb7a0a | 77 | * the @target_cpu, our per-CPU data is fully serialized. |
674e7541 | 78 | * |
97fb7a0a | 79 | * However, drivers cannot in general deal with cross-CPU |
674e7541 | 80 | * requests, so while get_next_freq() will work, our |
c49cbc19 | 81 | * sugov_update_commit() call may not for the fast switching platforms. |
674e7541 VK |
82 | * |
83 | * Hence stop here for remote requests if they aren't supported | |
84 | * by the hardware, as calculating the frequency is pointless if | |
85 | * we cannot in fact act on it. | |
c49cbc19 VK |
86 | * |
87 | * For the slow switching platforms, the kthread is always scheduled on | |
88 | * the right set of CPUs and any CPU can find the next frequency and | |
89 | * schedule the kthread. | |
674e7541 | 90 | */ |
c49cbc19 VK |
91 | if (sg_policy->policy->fast_switch_enabled && |
92 | !cpufreq_can_do_remote_dvfs(sg_policy->policy)) | |
674e7541 VK |
93 | return false; |
94 | ||
9bdcb44e RW |
95 | if (sg_policy->work_in_progress) |
96 | return false; | |
97 | ||
98 | if (unlikely(sg_policy->need_freq_update)) { | |
99 | sg_policy->need_freq_update = false; | |
100 | /* | |
101 | * This happens when limits change, so forget the previous | |
102 | * next_freq value and force an update. | |
103 | */ | |
104 | sg_policy->next_freq = UINT_MAX; | |
105 | return true; | |
106 | } | |
107 | ||
108 | delta_ns = time - sg_policy->last_freq_update_time; | |
97fb7a0a | 109 | |
9bdcb44e RW |
110 | return delta_ns >= sg_policy->freq_update_delay_ns; |
111 | } | |
112 | ||
113 | static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time, | |
114 | unsigned int next_freq) | |
115 | { | |
116 | struct cpufreq_policy *policy = sg_policy->policy; | |
117 | ||
38d4ea22 RW |
118 | if (sg_policy->next_freq == next_freq) |
119 | return; | |
120 | ||
121 | sg_policy->next_freq = next_freq; | |
9bdcb44e RW |
122 | sg_policy->last_freq_update_time = time; |
123 | ||
124 | if (policy->fast_switch_enabled) { | |
9bdcb44e | 125 | next_freq = cpufreq_driver_fast_switch(policy, next_freq); |
209887e6 | 126 | if (!next_freq) |
9bdcb44e RW |
127 | return; |
128 | ||
129 | policy->cur = next_freq; | |
130 | trace_cpu_frequency(next_freq, smp_processor_id()); | |
38d4ea22 | 131 | } else { |
9bdcb44e RW |
132 | sg_policy->work_in_progress = true; |
133 | irq_work_queue(&sg_policy->irq_work); | |
134 | } | |
135 | } | |
136 | ||
137 | /** | |
138 | * get_next_freq - Compute a new frequency for a given cpufreq policy. | |
655cb1eb | 139 | * @sg_policy: schedutil policy object to compute the new frequency for. |
9bdcb44e RW |
140 | * @util: Current CPU utilization. |
141 | * @max: CPU capacity. | |
142 | * | |
143 | * If the utilization is frequency-invariant, choose the new frequency to be | |
144 | * proportional to it, that is | |
145 | * | |
146 | * next_freq = C * max_freq * util / max | |
147 | * | |
148 | * Otherwise, approximate the would-be frequency-invariant utilization by | |
149 | * util_raw * (curr_freq / max_freq) which leads to | |
150 | * | |
151 | * next_freq = C * curr_freq * util_raw / max | |
152 | * | |
153 | * Take C = 1.25 for the frequency tipping point at (util / max) = 0.8. | |
5cbea469 SM |
154 | * |
155 | * The lowest driver-supported frequency which is equal or greater than the raw | |
156 | * next_freq (as calculated above) is returned, subject to policy min/max and | |
157 | * cpufreq driver limitations. | |
9bdcb44e | 158 | */ |
655cb1eb VK |
159 | static unsigned int get_next_freq(struct sugov_policy *sg_policy, |
160 | unsigned long util, unsigned long max) | |
9bdcb44e | 161 | { |
5cbea469 | 162 | struct cpufreq_policy *policy = sg_policy->policy; |
9bdcb44e RW |
163 | unsigned int freq = arch_scale_freq_invariant() ? |
164 | policy->cpuinfo.max_freq : policy->cur; | |
165 | ||
5cbea469 SM |
166 | freq = (freq + (freq >> 2)) * util / max; |
167 | ||
6c4f0fa6 | 168 | if (freq == sg_policy->cached_raw_freq && sg_policy->next_freq != UINT_MAX) |
5cbea469 | 169 | return sg_policy->next_freq; |
6c4f0fa6 | 170 | sg_policy->cached_raw_freq = freq; |
5cbea469 | 171 | return cpufreq_driver_resolve_freq(policy, freq); |
9bdcb44e RW |
172 | } |
173 | ||
d18be45d | 174 | static void sugov_get_util(struct sugov_cpu *sg_cpu) |
58919e83 | 175 | { |
d18be45d | 176 | struct rq *rq = cpu_rq(sg_cpu->cpu); |
8314bc83 | 177 | |
d18be45d JL |
178 | sg_cpu->max = arch_scale_cpu_capacity(NULL, sg_cpu->cpu); |
179 | sg_cpu->util_cfs = cpu_util_cfs(rq); | |
180 | sg_cpu->util_dl = cpu_util_dl(rq); | |
181 | } | |
58919e83 | 182 | |
d18be45d JL |
183 | static unsigned long sugov_aggregate_util(struct sugov_cpu *sg_cpu) |
184 | { | |
8f111bc3 PZ |
185 | struct rq *rq = cpu_rq(sg_cpu->cpu); |
186 | unsigned long util; | |
187 | ||
188 | if (rq->rt.rt_nr_running) { | |
189 | util = sg_cpu->max; | |
190 | } else { | |
191 | util = sg_cpu->util_dl; | |
192 | if (rq->cfs.h_nr_running) | |
193 | util += sg_cpu->util_cfs; | |
194 | } | |
195 | ||
d4edd662 JL |
196 | /* |
197 | * Ideally we would like to set util_dl as min/guaranteed freq and | |
198 | * util_cfs + util_dl as requested freq. However, cpufreq is not yet | |
199 | * ready for such an interface. So, we only do the latter for now. | |
200 | */ | |
8f111bc3 | 201 | return min(util, sg_cpu->max); |
58919e83 RW |
202 | } |
203 | ||
8f111bc3 | 204 | static void sugov_set_iowait_boost(struct sugov_cpu *sg_cpu, u64 time, unsigned int flags) |
21ca6d2c | 205 | { |
8f111bc3 | 206 | if (flags & SCHED_CPUFREQ_IOWAIT) { |
a5a0809b JF |
207 | if (sg_cpu->iowait_boost_pending) |
208 | return; | |
209 | ||
210 | sg_cpu->iowait_boost_pending = true; | |
211 | ||
212 | if (sg_cpu->iowait_boost) { | |
213 | sg_cpu->iowait_boost <<= 1; | |
214 | if (sg_cpu->iowait_boost > sg_cpu->iowait_boost_max) | |
215 | sg_cpu->iowait_boost = sg_cpu->iowait_boost_max; | |
216 | } else { | |
217 | sg_cpu->iowait_boost = sg_cpu->sg_policy->policy->min; | |
218 | } | |
21ca6d2c RW |
219 | } else if (sg_cpu->iowait_boost) { |
220 | s64 delta_ns = time - sg_cpu->last_update; | |
221 | ||
222 | /* Clear iowait_boost if the CPU apprears to have been idle. */ | |
a5a0809b | 223 | if (delta_ns > TICK_NSEC) { |
21ca6d2c | 224 | sg_cpu->iowait_boost = 0; |
a5a0809b JF |
225 | sg_cpu->iowait_boost_pending = false; |
226 | } | |
21ca6d2c RW |
227 | } |
228 | } | |
229 | ||
230 | static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, unsigned long *util, | |
231 | unsigned long *max) | |
232 | { | |
251accf9 | 233 | unsigned int boost_util, boost_max; |
21ca6d2c | 234 | |
a5a0809b | 235 | if (!sg_cpu->iowait_boost) |
21ca6d2c RW |
236 | return; |
237 | ||
a5a0809b JF |
238 | if (sg_cpu->iowait_boost_pending) { |
239 | sg_cpu->iowait_boost_pending = false; | |
240 | } else { | |
241 | sg_cpu->iowait_boost >>= 1; | |
242 | if (sg_cpu->iowait_boost < sg_cpu->sg_policy->policy->min) { | |
243 | sg_cpu->iowait_boost = 0; | |
244 | return; | |
245 | } | |
246 | } | |
247 | ||
248 | boost_util = sg_cpu->iowait_boost; | |
249 | boost_max = sg_cpu->iowait_boost_max; | |
250 | ||
21ca6d2c RW |
251 | if (*util * boost_max < *max * boost_util) { |
252 | *util = boost_util; | |
253 | *max = boost_max; | |
254 | } | |
21ca6d2c RW |
255 | } |
256 | ||
b7eaf1aa RW |
257 | #ifdef CONFIG_NO_HZ_COMMON |
258 | static bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) | |
259 | { | |
466a2b42 | 260 | unsigned long idle_calls = tick_nohz_get_idle_calls_cpu(sg_cpu->cpu); |
b7eaf1aa RW |
261 | bool ret = idle_calls == sg_cpu->saved_idle_calls; |
262 | ||
263 | sg_cpu->saved_idle_calls = idle_calls; | |
264 | return ret; | |
265 | } | |
266 | #else | |
267 | static inline bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) { return false; } | |
268 | #endif /* CONFIG_NO_HZ_COMMON */ | |
269 | ||
9bdcb44e | 270 | static void sugov_update_single(struct update_util_data *hook, u64 time, |
58919e83 | 271 | unsigned int flags) |
9bdcb44e RW |
272 | { |
273 | struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); | |
274 | struct sugov_policy *sg_policy = sg_cpu->sg_policy; | |
58919e83 | 275 | unsigned long util, max; |
9bdcb44e | 276 | unsigned int next_f; |
b7eaf1aa | 277 | bool busy; |
9bdcb44e | 278 | |
8f111bc3 | 279 | sugov_set_iowait_boost(sg_cpu, time, flags); |
21ca6d2c RW |
280 | sg_cpu->last_update = time; |
281 | ||
9bdcb44e RW |
282 | if (!sugov_should_update_freq(sg_policy, time)) |
283 | return; | |
284 | ||
b7eaf1aa RW |
285 | busy = sugov_cpu_is_busy(sg_cpu); |
286 | ||
8f111bc3 PZ |
287 | sugov_get_util(sg_cpu); |
288 | max = sg_cpu->max; | |
289 | util = sugov_aggregate_util(sg_cpu); | |
290 | sugov_iowait_boost(sg_cpu, &util, &max); | |
291 | next_f = get_next_freq(sg_policy, util, max); | |
292 | /* | |
293 | * Do not reduce the frequency if the CPU has not been idle | |
294 | * recently, as the reduction is likely to be premature then. | |
295 | */ | |
296 | if (busy && next_f < sg_policy->next_freq) { | |
297 | next_f = sg_policy->next_freq; | |
07458f6a | 298 | |
8f111bc3 PZ |
299 | /* Reset cached freq as next_freq has changed */ |
300 | sg_policy->cached_raw_freq = 0; | |
58919e83 | 301 | } |
8f111bc3 | 302 | |
9bdcb44e RW |
303 | sugov_update_commit(sg_policy, time, next_f); |
304 | } | |
305 | ||
d86ab9cf | 306 | static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time) |
9bdcb44e | 307 | { |
5cbea469 | 308 | struct sugov_policy *sg_policy = sg_cpu->sg_policy; |
9bdcb44e | 309 | struct cpufreq_policy *policy = sg_policy->policy; |
cba1dfb5 | 310 | unsigned long util = 0, max = 1; |
9bdcb44e RW |
311 | unsigned int j; |
312 | ||
9bdcb44e | 313 | for_each_cpu(j, policy->cpus) { |
cba1dfb5 | 314 | struct sugov_cpu *j_sg_cpu = &per_cpu(sugov_cpu, j); |
9bdcb44e RW |
315 | unsigned long j_util, j_max; |
316 | s64 delta_ns; | |
317 | ||
8f111bc3 PZ |
318 | sugov_get_util(j_sg_cpu); |
319 | ||
9bdcb44e | 320 | /* |
0fa7d181 JL |
321 | * If the CFS CPU utilization was last updated before the |
322 | * previous frequency update and the time elapsed between the | |
323 | * last update of the CPU utilization and the last frequency | |
324 | * update is long enough, reset iowait_boost and util_cfs, as | |
325 | * they are now probably stale. However, still consider the | |
326 | * CPU contribution if it has some DEADLINE utilization | |
327 | * (util_dl). | |
9bdcb44e | 328 | */ |
d86ab9cf | 329 | delta_ns = time - j_sg_cpu->last_update; |
21ca6d2c RW |
330 | if (delta_ns > TICK_NSEC) { |
331 | j_sg_cpu->iowait_boost = 0; | |
a5a0809b | 332 | j_sg_cpu->iowait_boost_pending = false; |
21ca6d2c | 333 | } |
9bdcb44e RW |
334 | |
335 | j_max = j_sg_cpu->max; | |
d18be45d | 336 | j_util = sugov_aggregate_util(j_sg_cpu); |
8f111bc3 | 337 | sugov_iowait_boost(j_sg_cpu, &j_util, &j_max); |
9bdcb44e RW |
338 | if (j_util * max > j_max * util) { |
339 | util = j_util; | |
340 | max = j_max; | |
341 | } | |
342 | } | |
343 | ||
655cb1eb | 344 | return get_next_freq(sg_policy, util, max); |
9bdcb44e RW |
345 | } |
346 | ||
97fb7a0a IM |
347 | static void |
348 | sugov_update_shared(struct update_util_data *hook, u64 time, unsigned int flags) | |
9bdcb44e RW |
349 | { |
350 | struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); | |
351 | struct sugov_policy *sg_policy = sg_cpu->sg_policy; | |
352 | unsigned int next_f; | |
353 | ||
354 | raw_spin_lock(&sg_policy->update_lock); | |
355 | ||
8f111bc3 | 356 | sugov_set_iowait_boost(sg_cpu, time, flags); |
9bdcb44e RW |
357 | sg_cpu->last_update = time; |
358 | ||
359 | if (sugov_should_update_freq(sg_policy, time)) { | |
8f111bc3 | 360 | next_f = sugov_next_freq_shared(sg_cpu, time); |
9bdcb44e RW |
361 | sugov_update_commit(sg_policy, time, next_f); |
362 | } | |
363 | ||
364 | raw_spin_unlock(&sg_policy->update_lock); | |
365 | } | |
366 | ||
02a7b1ee | 367 | static void sugov_work(struct kthread_work *work) |
9bdcb44e RW |
368 | { |
369 | struct sugov_policy *sg_policy = container_of(work, struct sugov_policy, work); | |
370 | ||
371 | mutex_lock(&sg_policy->work_lock); | |
372 | __cpufreq_driver_target(sg_policy->policy, sg_policy->next_freq, | |
373 | CPUFREQ_RELATION_L); | |
374 | mutex_unlock(&sg_policy->work_lock); | |
375 | ||
376 | sg_policy->work_in_progress = false; | |
377 | } | |
378 | ||
379 | static void sugov_irq_work(struct irq_work *irq_work) | |
380 | { | |
381 | struct sugov_policy *sg_policy; | |
382 | ||
383 | sg_policy = container_of(irq_work, struct sugov_policy, irq_work); | |
02a7b1ee VK |
384 | |
385 | /* | |
d4edd662 JL |
386 | * For RT tasks, the schedutil governor shoots the frequency to maximum. |
387 | * Special care must be taken to ensure that this kthread doesn't result | |
388 | * in the same behavior. | |
02a7b1ee VK |
389 | * |
390 | * This is (mostly) guaranteed by the work_in_progress flag. The flag is | |
d06e622d VK |
391 | * updated only at the end of the sugov_work() function and before that |
392 | * the schedutil governor rejects all other frequency scaling requests. | |
02a7b1ee | 393 | * |
d06e622d | 394 | * There is a very rare case though, where the RT thread yields right |
02a7b1ee VK |
395 | * after the work_in_progress flag is cleared. The effects of that are |
396 | * neglected for now. | |
397 | */ | |
398 | kthread_queue_work(&sg_policy->worker, &sg_policy->work); | |
9bdcb44e RW |
399 | } |
400 | ||
401 | /************************** sysfs interface ************************/ | |
402 | ||
403 | static struct sugov_tunables *global_tunables; | |
404 | static DEFINE_MUTEX(global_tunables_lock); | |
405 | ||
406 | static inline struct sugov_tunables *to_sugov_tunables(struct gov_attr_set *attr_set) | |
407 | { | |
408 | return container_of(attr_set, struct sugov_tunables, attr_set); | |
409 | } | |
410 | ||
411 | static ssize_t rate_limit_us_show(struct gov_attr_set *attr_set, char *buf) | |
412 | { | |
413 | struct sugov_tunables *tunables = to_sugov_tunables(attr_set); | |
414 | ||
415 | return sprintf(buf, "%u\n", tunables->rate_limit_us); | |
416 | } | |
417 | ||
97fb7a0a IM |
418 | static ssize_t |
419 | rate_limit_us_store(struct gov_attr_set *attr_set, const char *buf, size_t count) | |
9bdcb44e RW |
420 | { |
421 | struct sugov_tunables *tunables = to_sugov_tunables(attr_set); | |
422 | struct sugov_policy *sg_policy; | |
423 | unsigned int rate_limit_us; | |
424 | ||
425 | if (kstrtouint(buf, 10, &rate_limit_us)) | |
426 | return -EINVAL; | |
427 | ||
428 | tunables->rate_limit_us = rate_limit_us; | |
429 | ||
430 | list_for_each_entry(sg_policy, &attr_set->policy_list, tunables_hook) | |
431 | sg_policy->freq_update_delay_ns = rate_limit_us * NSEC_PER_USEC; | |
432 | ||
433 | return count; | |
434 | } | |
435 | ||
436 | static struct governor_attr rate_limit_us = __ATTR_RW(rate_limit_us); | |
437 | ||
438 | static struct attribute *sugov_attributes[] = { | |
439 | &rate_limit_us.attr, | |
440 | NULL | |
441 | }; | |
442 | ||
443 | static struct kobj_type sugov_tunables_ktype = { | |
444 | .default_attrs = sugov_attributes, | |
445 | .sysfs_ops = &governor_sysfs_ops, | |
446 | }; | |
447 | ||
448 | /********************** cpufreq governor interface *********************/ | |
449 | ||
450 | static struct cpufreq_governor schedutil_gov; | |
451 | ||
452 | static struct sugov_policy *sugov_policy_alloc(struct cpufreq_policy *policy) | |
453 | { | |
454 | struct sugov_policy *sg_policy; | |
455 | ||
456 | sg_policy = kzalloc(sizeof(*sg_policy), GFP_KERNEL); | |
457 | if (!sg_policy) | |
458 | return NULL; | |
459 | ||
460 | sg_policy->policy = policy; | |
9bdcb44e RW |
461 | raw_spin_lock_init(&sg_policy->update_lock); |
462 | return sg_policy; | |
463 | } | |
464 | ||
465 | static void sugov_policy_free(struct sugov_policy *sg_policy) | |
466 | { | |
9bdcb44e RW |
467 | kfree(sg_policy); |
468 | } | |
469 | ||
02a7b1ee VK |
470 | static int sugov_kthread_create(struct sugov_policy *sg_policy) |
471 | { | |
472 | struct task_struct *thread; | |
794a56eb | 473 | struct sched_attr attr = { |
97fb7a0a IM |
474 | .size = sizeof(struct sched_attr), |
475 | .sched_policy = SCHED_DEADLINE, | |
476 | .sched_flags = SCHED_FLAG_SUGOV, | |
477 | .sched_nice = 0, | |
478 | .sched_priority = 0, | |
794a56eb JL |
479 | /* |
480 | * Fake (unused) bandwidth; workaround to "fix" | |
481 | * priority inheritance. | |
482 | */ | |
483 | .sched_runtime = 1000000, | |
484 | .sched_deadline = 10000000, | |
485 | .sched_period = 10000000, | |
486 | }; | |
02a7b1ee VK |
487 | struct cpufreq_policy *policy = sg_policy->policy; |
488 | int ret; | |
489 | ||
490 | /* kthread only required for slow path */ | |
491 | if (policy->fast_switch_enabled) | |
492 | return 0; | |
493 | ||
494 | kthread_init_work(&sg_policy->work, sugov_work); | |
495 | kthread_init_worker(&sg_policy->worker); | |
496 | thread = kthread_create(kthread_worker_fn, &sg_policy->worker, | |
497 | "sugov:%d", | |
498 | cpumask_first(policy->related_cpus)); | |
499 | if (IS_ERR(thread)) { | |
500 | pr_err("failed to create sugov thread: %ld\n", PTR_ERR(thread)); | |
501 | return PTR_ERR(thread); | |
502 | } | |
503 | ||
794a56eb | 504 | ret = sched_setattr_nocheck(thread, &attr); |
02a7b1ee VK |
505 | if (ret) { |
506 | kthread_stop(thread); | |
794a56eb | 507 | pr_warn("%s: failed to set SCHED_DEADLINE\n", __func__); |
02a7b1ee VK |
508 | return ret; |
509 | } | |
510 | ||
511 | sg_policy->thread = thread; | |
e2cabe48 VK |
512 | |
513 | /* Kthread is bound to all CPUs by default */ | |
514 | if (!policy->dvfs_possible_from_any_cpu) | |
515 | kthread_bind_mask(thread, policy->related_cpus); | |
516 | ||
21ef5729 VK |
517 | init_irq_work(&sg_policy->irq_work, sugov_irq_work); |
518 | mutex_init(&sg_policy->work_lock); | |
519 | ||
02a7b1ee VK |
520 | wake_up_process(thread); |
521 | ||
522 | return 0; | |
523 | } | |
524 | ||
525 | static void sugov_kthread_stop(struct sugov_policy *sg_policy) | |
526 | { | |
527 | /* kthread only required for slow path */ | |
528 | if (sg_policy->policy->fast_switch_enabled) | |
529 | return; | |
530 | ||
531 | kthread_flush_worker(&sg_policy->worker); | |
532 | kthread_stop(sg_policy->thread); | |
21ef5729 | 533 | mutex_destroy(&sg_policy->work_lock); |
02a7b1ee VK |
534 | } |
535 | ||
9bdcb44e RW |
536 | static struct sugov_tunables *sugov_tunables_alloc(struct sugov_policy *sg_policy) |
537 | { | |
538 | struct sugov_tunables *tunables; | |
539 | ||
540 | tunables = kzalloc(sizeof(*tunables), GFP_KERNEL); | |
541 | if (tunables) { | |
542 | gov_attr_set_init(&tunables->attr_set, &sg_policy->tunables_hook); | |
543 | if (!have_governor_per_policy()) | |
544 | global_tunables = tunables; | |
545 | } | |
546 | return tunables; | |
547 | } | |
548 | ||
549 | static void sugov_tunables_free(struct sugov_tunables *tunables) | |
550 | { | |
551 | if (!have_governor_per_policy()) | |
552 | global_tunables = NULL; | |
553 | ||
554 | kfree(tunables); | |
555 | } | |
556 | ||
557 | static int sugov_init(struct cpufreq_policy *policy) | |
558 | { | |
559 | struct sugov_policy *sg_policy; | |
560 | struct sugov_tunables *tunables; | |
9bdcb44e RW |
561 | int ret = 0; |
562 | ||
563 | /* State should be equivalent to EXIT */ | |
564 | if (policy->governor_data) | |
565 | return -EBUSY; | |
566 | ||
4a71ce43 VK |
567 | cpufreq_enable_fast_switch(policy); |
568 | ||
9bdcb44e | 569 | sg_policy = sugov_policy_alloc(policy); |
4a71ce43 VK |
570 | if (!sg_policy) { |
571 | ret = -ENOMEM; | |
572 | goto disable_fast_switch; | |
573 | } | |
9bdcb44e | 574 | |
02a7b1ee VK |
575 | ret = sugov_kthread_create(sg_policy); |
576 | if (ret) | |
577 | goto free_sg_policy; | |
578 | ||
9bdcb44e RW |
579 | mutex_lock(&global_tunables_lock); |
580 | ||
581 | if (global_tunables) { | |
582 | if (WARN_ON(have_governor_per_policy())) { | |
583 | ret = -EINVAL; | |
02a7b1ee | 584 | goto stop_kthread; |
9bdcb44e RW |
585 | } |
586 | policy->governor_data = sg_policy; | |
587 | sg_policy->tunables = global_tunables; | |
588 | ||
589 | gov_attr_set_get(&global_tunables->attr_set, &sg_policy->tunables_hook); | |
590 | goto out; | |
591 | } | |
592 | ||
593 | tunables = sugov_tunables_alloc(sg_policy); | |
594 | if (!tunables) { | |
595 | ret = -ENOMEM; | |
02a7b1ee | 596 | goto stop_kthread; |
9bdcb44e RW |
597 | } |
598 | ||
aa7519af | 599 | tunables->rate_limit_us = cpufreq_policy_transition_delay_us(policy); |
9bdcb44e RW |
600 | |
601 | policy->governor_data = sg_policy; | |
602 | sg_policy->tunables = tunables; | |
603 | ||
604 | ret = kobject_init_and_add(&tunables->attr_set.kobj, &sugov_tunables_ktype, | |
605 | get_governor_parent_kobj(policy), "%s", | |
606 | schedutil_gov.name); | |
607 | if (ret) | |
608 | goto fail; | |
609 | ||
8e2ddb03 | 610 | out: |
9bdcb44e | 611 | mutex_unlock(&global_tunables_lock); |
9bdcb44e RW |
612 | return 0; |
613 | ||
8e2ddb03 | 614 | fail: |
9bdcb44e RW |
615 | policy->governor_data = NULL; |
616 | sugov_tunables_free(tunables); | |
617 | ||
02a7b1ee VK |
618 | stop_kthread: |
619 | sugov_kthread_stop(sg_policy); | |
620 | ||
8e2ddb03 | 621 | free_sg_policy: |
9bdcb44e RW |
622 | mutex_unlock(&global_tunables_lock); |
623 | ||
624 | sugov_policy_free(sg_policy); | |
4a71ce43 VK |
625 | |
626 | disable_fast_switch: | |
627 | cpufreq_disable_fast_switch(policy); | |
628 | ||
60f05e86 | 629 | pr_err("initialization failed (error %d)\n", ret); |
9bdcb44e RW |
630 | return ret; |
631 | } | |
632 | ||
e788892b | 633 | static void sugov_exit(struct cpufreq_policy *policy) |
9bdcb44e RW |
634 | { |
635 | struct sugov_policy *sg_policy = policy->governor_data; | |
636 | struct sugov_tunables *tunables = sg_policy->tunables; | |
637 | unsigned int count; | |
638 | ||
639 | mutex_lock(&global_tunables_lock); | |
640 | ||
641 | count = gov_attr_set_put(&tunables->attr_set, &sg_policy->tunables_hook); | |
642 | policy->governor_data = NULL; | |
643 | if (!count) | |
644 | sugov_tunables_free(tunables); | |
645 | ||
646 | mutex_unlock(&global_tunables_lock); | |
647 | ||
02a7b1ee | 648 | sugov_kthread_stop(sg_policy); |
9bdcb44e | 649 | sugov_policy_free(sg_policy); |
4a71ce43 | 650 | cpufreq_disable_fast_switch(policy); |
9bdcb44e RW |
651 | } |
652 | ||
653 | static int sugov_start(struct cpufreq_policy *policy) | |
654 | { | |
655 | struct sugov_policy *sg_policy = policy->governor_data; | |
656 | unsigned int cpu; | |
657 | ||
97fb7a0a IM |
658 | sg_policy->freq_update_delay_ns = sg_policy->tunables->rate_limit_us * NSEC_PER_USEC; |
659 | sg_policy->last_freq_update_time = 0; | |
660 | sg_policy->next_freq = UINT_MAX; | |
661 | sg_policy->work_in_progress = false; | |
662 | sg_policy->need_freq_update = false; | |
663 | sg_policy->cached_raw_freq = 0; | |
9bdcb44e RW |
664 | |
665 | for_each_cpu(cpu, policy->cpus) { | |
666 | struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu); | |
667 | ||
4296f23e | 668 | memset(sg_cpu, 0, sizeof(*sg_cpu)); |
97fb7a0a IM |
669 | sg_cpu->cpu = cpu; |
670 | sg_cpu->sg_policy = sg_policy; | |
97fb7a0a | 671 | sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq; |
ab2f7cf1 VM |
672 | } |
673 | ||
674 | for_each_cpu(cpu, policy->cpus) { | |
675 | struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu); | |
676 | ||
4296f23e RW |
677 | cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util, |
678 | policy_is_shared(policy) ? | |
679 | sugov_update_shared : | |
680 | sugov_update_single); | |
9bdcb44e RW |
681 | } |
682 | return 0; | |
683 | } | |
684 | ||
e788892b | 685 | static void sugov_stop(struct cpufreq_policy *policy) |
9bdcb44e RW |
686 | { |
687 | struct sugov_policy *sg_policy = policy->governor_data; | |
688 | unsigned int cpu; | |
689 | ||
690 | for_each_cpu(cpu, policy->cpus) | |
691 | cpufreq_remove_update_util_hook(cpu); | |
692 | ||
693 | synchronize_sched(); | |
694 | ||
21ef5729 VK |
695 | if (!policy->fast_switch_enabled) { |
696 | irq_work_sync(&sg_policy->irq_work); | |
697 | kthread_cancel_work_sync(&sg_policy->work); | |
698 | } | |
9bdcb44e RW |
699 | } |
700 | ||
e788892b | 701 | static void sugov_limits(struct cpufreq_policy *policy) |
9bdcb44e RW |
702 | { |
703 | struct sugov_policy *sg_policy = policy->governor_data; | |
704 | ||
705 | if (!policy->fast_switch_enabled) { | |
706 | mutex_lock(&sg_policy->work_lock); | |
bf2be2de | 707 | cpufreq_policy_apply_limits(policy); |
9bdcb44e RW |
708 | mutex_unlock(&sg_policy->work_lock); |
709 | } | |
710 | ||
711 | sg_policy->need_freq_update = true; | |
9bdcb44e RW |
712 | } |
713 | ||
714 | static struct cpufreq_governor schedutil_gov = { | |
97fb7a0a IM |
715 | .name = "schedutil", |
716 | .owner = THIS_MODULE, | |
717 | .dynamic_switching = true, | |
718 | .init = sugov_init, | |
719 | .exit = sugov_exit, | |
720 | .start = sugov_start, | |
721 | .stop = sugov_stop, | |
722 | .limits = sugov_limits, | |
9bdcb44e RW |
723 | }; |
724 | ||
9bdcb44e RW |
725 | #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL |
726 | struct cpufreq_governor *cpufreq_default_governor(void) | |
727 | { | |
728 | return &schedutil_gov; | |
729 | } | |
9bdcb44e | 730 | #endif |
58919e83 RW |
731 | |
732 | static int __init sugov_register(void) | |
733 | { | |
734 | return cpufreq_register_governor(&schedutil_gov); | |
735 | } | |
736 | fs_initcall(sugov_register); |