sched/deadline: Clean up various coding style details
[linux-2.6-block.git] / kernel / sched / cpufreq_schedutil.c
CommitLineData
9bdcb44e
RW
1/*
2 * CPUFreq governor based on scheduler-provided CPU utilization data.
3 *
4 * Copyright (C) 2016, Intel Corporation
5 * Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11
60f05e86
VK
12#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13
9bdcb44e 14#include <linux/cpufreq.h>
02a7b1ee 15#include <linux/kthread.h>
ae7e81c0 16#include <uapi/linux/sched/types.h>
9bdcb44e
RW
17#include <linux/slab.h>
18#include <trace/events/power.h>
19
20#include "sched.h"
21
22struct sugov_tunables {
23 struct gov_attr_set attr_set;
24 unsigned int rate_limit_us;
25};
26
27struct sugov_policy {
28 struct cpufreq_policy *policy;
29
30 struct sugov_tunables *tunables;
31 struct list_head tunables_hook;
32
33 raw_spinlock_t update_lock; /* For shared policies */
34 u64 last_freq_update_time;
35 s64 freq_update_delay_ns;
36 unsigned int next_freq;
6c4f0fa6 37 unsigned int cached_raw_freq;
9bdcb44e
RW
38
39 /* The next fields are only needed if fast switch cannot be used. */
40 struct irq_work irq_work;
02a7b1ee 41 struct kthread_work work;
9bdcb44e 42 struct mutex work_lock;
02a7b1ee
VK
43 struct kthread_worker worker;
44 struct task_struct *thread;
9bdcb44e
RW
45 bool work_in_progress;
46
47 bool need_freq_update;
48};
49
50struct sugov_cpu {
51 struct update_util_data update_util;
52 struct sugov_policy *sg_policy;
674e7541 53 unsigned int cpu;
9bdcb44e 54
a5a0809b 55 bool iowait_boost_pending;
251accf9
JF
56 unsigned int iowait_boost;
57 unsigned int iowait_boost_max;
21ca6d2c 58 u64 last_update;
5cbea469 59
9bdcb44e 60 /* The fields below are only needed when sharing a policy. */
d18be45d
JL
61 unsigned long util_cfs;
62 unsigned long util_dl;
9bdcb44e 63 unsigned long max;
58919e83 64 unsigned int flags;
b7eaf1aa
RW
65
66 /* The field below is for single-CPU policies only. */
67#ifdef CONFIG_NO_HZ_COMMON
68 unsigned long saved_idle_calls;
69#endif
9bdcb44e
RW
70};
71
72static DEFINE_PER_CPU(struct sugov_cpu, sugov_cpu);
73
74/************************ Governor internals ***********************/
75
76static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time)
77{
78 s64 delta_ns;
79
674e7541
VK
80 /*
81 * Since cpufreq_update_util() is called with rq->lock held for
82 * the @target_cpu, our per-cpu data is fully serialized.
83 *
84 * However, drivers cannot in general deal with cross-cpu
85 * requests, so while get_next_freq() will work, our
c49cbc19 86 * sugov_update_commit() call may not for the fast switching platforms.
674e7541
VK
87 *
88 * Hence stop here for remote requests if they aren't supported
89 * by the hardware, as calculating the frequency is pointless if
90 * we cannot in fact act on it.
c49cbc19
VK
91 *
92 * For the slow switching platforms, the kthread is always scheduled on
93 * the right set of CPUs and any CPU can find the next frequency and
94 * schedule the kthread.
674e7541 95 */
c49cbc19
VK
96 if (sg_policy->policy->fast_switch_enabled &&
97 !cpufreq_can_do_remote_dvfs(sg_policy->policy))
674e7541
VK
98 return false;
99
9bdcb44e
RW
100 if (sg_policy->work_in_progress)
101 return false;
102
103 if (unlikely(sg_policy->need_freq_update)) {
104 sg_policy->need_freq_update = false;
105 /*
106 * This happens when limits change, so forget the previous
107 * next_freq value and force an update.
108 */
109 sg_policy->next_freq = UINT_MAX;
110 return true;
111 }
112
113 delta_ns = time - sg_policy->last_freq_update_time;
114 return delta_ns >= sg_policy->freq_update_delay_ns;
115}
116
117static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time,
118 unsigned int next_freq)
119{
120 struct cpufreq_policy *policy = sg_policy->policy;
121
38d4ea22
RW
122 if (sg_policy->next_freq == next_freq)
123 return;
124
125 sg_policy->next_freq = next_freq;
9bdcb44e
RW
126 sg_policy->last_freq_update_time = time;
127
128 if (policy->fast_switch_enabled) {
9bdcb44e 129 next_freq = cpufreq_driver_fast_switch(policy, next_freq);
209887e6 130 if (!next_freq)
9bdcb44e
RW
131 return;
132
133 policy->cur = next_freq;
134 trace_cpu_frequency(next_freq, smp_processor_id());
38d4ea22 135 } else {
9bdcb44e
RW
136 sg_policy->work_in_progress = true;
137 irq_work_queue(&sg_policy->irq_work);
138 }
139}
140
141/**
142 * get_next_freq - Compute a new frequency for a given cpufreq policy.
655cb1eb 143 * @sg_policy: schedutil policy object to compute the new frequency for.
9bdcb44e
RW
144 * @util: Current CPU utilization.
145 * @max: CPU capacity.
146 *
147 * If the utilization is frequency-invariant, choose the new frequency to be
148 * proportional to it, that is
149 *
150 * next_freq = C * max_freq * util / max
151 *
152 * Otherwise, approximate the would-be frequency-invariant utilization by
153 * util_raw * (curr_freq / max_freq) which leads to
154 *
155 * next_freq = C * curr_freq * util_raw / max
156 *
157 * Take C = 1.25 for the frequency tipping point at (util / max) = 0.8.
5cbea469
SM
158 *
159 * The lowest driver-supported frequency which is equal or greater than the raw
160 * next_freq (as calculated above) is returned, subject to policy min/max and
161 * cpufreq driver limitations.
9bdcb44e 162 */
655cb1eb
VK
163static unsigned int get_next_freq(struct sugov_policy *sg_policy,
164 unsigned long util, unsigned long max)
9bdcb44e 165{
5cbea469 166 struct cpufreq_policy *policy = sg_policy->policy;
9bdcb44e
RW
167 unsigned int freq = arch_scale_freq_invariant() ?
168 policy->cpuinfo.max_freq : policy->cur;
169
5cbea469
SM
170 freq = (freq + (freq >> 2)) * util / max;
171
6c4f0fa6 172 if (freq == sg_policy->cached_raw_freq && sg_policy->next_freq != UINT_MAX)
5cbea469 173 return sg_policy->next_freq;
6c4f0fa6 174 sg_policy->cached_raw_freq = freq;
5cbea469 175 return cpufreq_driver_resolve_freq(policy, freq);
9bdcb44e
RW
176}
177
d18be45d 178static void sugov_get_util(struct sugov_cpu *sg_cpu)
58919e83 179{
d18be45d 180 struct rq *rq = cpu_rq(sg_cpu->cpu);
8314bc83 181
d18be45d
JL
182 sg_cpu->max = arch_scale_cpu_capacity(NULL, sg_cpu->cpu);
183 sg_cpu->util_cfs = cpu_util_cfs(rq);
184 sg_cpu->util_dl = cpu_util_dl(rq);
185}
58919e83 186
d18be45d
JL
187static unsigned long sugov_aggregate_util(struct sugov_cpu *sg_cpu)
188{
d4edd662
JL
189 /*
190 * Ideally we would like to set util_dl as min/guaranteed freq and
191 * util_cfs + util_dl as requested freq. However, cpufreq is not yet
192 * ready for such an interface. So, we only do the latter for now.
193 */
d18be45d 194 return min(sg_cpu->util_cfs + sg_cpu->util_dl, sg_cpu->max);
58919e83
RW
195}
196
5083452f 197static void sugov_set_iowait_boost(struct sugov_cpu *sg_cpu, u64 time)
21ca6d2c 198{
5083452f 199 if (sg_cpu->flags & SCHED_CPUFREQ_IOWAIT) {
a5a0809b
JF
200 if (sg_cpu->iowait_boost_pending)
201 return;
202
203 sg_cpu->iowait_boost_pending = true;
204
205 if (sg_cpu->iowait_boost) {
206 sg_cpu->iowait_boost <<= 1;
207 if (sg_cpu->iowait_boost > sg_cpu->iowait_boost_max)
208 sg_cpu->iowait_boost = sg_cpu->iowait_boost_max;
209 } else {
210 sg_cpu->iowait_boost = sg_cpu->sg_policy->policy->min;
211 }
21ca6d2c
RW
212 } else if (sg_cpu->iowait_boost) {
213 s64 delta_ns = time - sg_cpu->last_update;
214
215 /* Clear iowait_boost if the CPU apprears to have been idle. */
a5a0809b 216 if (delta_ns > TICK_NSEC) {
21ca6d2c 217 sg_cpu->iowait_boost = 0;
a5a0809b
JF
218 sg_cpu->iowait_boost_pending = false;
219 }
21ca6d2c
RW
220 }
221}
222
223static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, unsigned long *util,
224 unsigned long *max)
225{
251accf9 226 unsigned int boost_util, boost_max;
21ca6d2c 227
a5a0809b 228 if (!sg_cpu->iowait_boost)
21ca6d2c
RW
229 return;
230
a5a0809b
JF
231 if (sg_cpu->iowait_boost_pending) {
232 sg_cpu->iowait_boost_pending = false;
233 } else {
234 sg_cpu->iowait_boost >>= 1;
235 if (sg_cpu->iowait_boost < sg_cpu->sg_policy->policy->min) {
236 sg_cpu->iowait_boost = 0;
237 return;
238 }
239 }
240
241 boost_util = sg_cpu->iowait_boost;
242 boost_max = sg_cpu->iowait_boost_max;
243
21ca6d2c
RW
244 if (*util * boost_max < *max * boost_util) {
245 *util = boost_util;
246 *max = boost_max;
247 }
21ca6d2c
RW
248}
249
b7eaf1aa
RW
250#ifdef CONFIG_NO_HZ_COMMON
251static bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu)
252{
466a2b42 253 unsigned long idle_calls = tick_nohz_get_idle_calls_cpu(sg_cpu->cpu);
b7eaf1aa
RW
254 bool ret = idle_calls == sg_cpu->saved_idle_calls;
255
256 sg_cpu->saved_idle_calls = idle_calls;
257 return ret;
258}
259#else
260static inline bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) { return false; }
261#endif /* CONFIG_NO_HZ_COMMON */
262
9bdcb44e 263static void sugov_update_single(struct update_util_data *hook, u64 time,
58919e83 264 unsigned int flags)
9bdcb44e
RW
265{
266 struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
267 struct sugov_policy *sg_policy = sg_cpu->sg_policy;
268 struct cpufreq_policy *policy = sg_policy->policy;
58919e83 269 unsigned long util, max;
9bdcb44e 270 unsigned int next_f;
b7eaf1aa 271 bool busy;
9bdcb44e 272
5083452f 273 sugov_set_iowait_boost(sg_cpu, time);
21ca6d2c
RW
274 sg_cpu->last_update = time;
275
9bdcb44e
RW
276 if (!sugov_should_update_freq(sg_policy, time))
277 return;
278
b7eaf1aa
RW
279 busy = sugov_cpu_is_busy(sg_cpu);
280
d4edd662 281 if (flags & SCHED_CPUFREQ_RT) {
58919e83
RW
282 next_f = policy->cpuinfo.max_freq;
283 } else {
d18be45d
JL
284 sugov_get_util(sg_cpu);
285 max = sg_cpu->max;
286 util = sugov_aggregate_util(sg_cpu);
21ca6d2c 287 sugov_iowait_boost(sg_cpu, &util, &max);
655cb1eb 288 next_f = get_next_freq(sg_policy, util, max);
b7eaf1aa
RW
289 /*
290 * Do not reduce the frequency if the CPU has not been idle
291 * recently, as the reduction is likely to be premature then.
292 */
07458f6a 293 if (busy && next_f < sg_policy->next_freq) {
b7eaf1aa 294 next_f = sg_policy->next_freq;
07458f6a
VK
295
296 /* Reset cached freq as next_freq has changed */
297 sg_policy->cached_raw_freq = 0;
298 }
58919e83 299 }
9bdcb44e
RW
300 sugov_update_commit(sg_policy, time, next_f);
301}
302
d86ab9cf 303static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time)
9bdcb44e 304{
5cbea469 305 struct sugov_policy *sg_policy = sg_cpu->sg_policy;
9bdcb44e 306 struct cpufreq_policy *policy = sg_policy->policy;
cba1dfb5 307 unsigned long util = 0, max = 1;
9bdcb44e
RW
308 unsigned int j;
309
9bdcb44e 310 for_each_cpu(j, policy->cpus) {
cba1dfb5 311 struct sugov_cpu *j_sg_cpu = &per_cpu(sugov_cpu, j);
9bdcb44e
RW
312 unsigned long j_util, j_max;
313 s64 delta_ns;
314
9bdcb44e 315 /*
0fa7d181
JL
316 * If the CFS CPU utilization was last updated before the
317 * previous frequency update and the time elapsed between the
318 * last update of the CPU utilization and the last frequency
319 * update is long enough, reset iowait_boost and util_cfs, as
320 * they are now probably stale. However, still consider the
321 * CPU contribution if it has some DEADLINE utilization
322 * (util_dl).
9bdcb44e 323 */
d86ab9cf 324 delta_ns = time - j_sg_cpu->last_update;
21ca6d2c
RW
325 if (delta_ns > TICK_NSEC) {
326 j_sg_cpu->iowait_boost = 0;
a5a0809b 327 j_sg_cpu->iowait_boost_pending = false;
0fa7d181
JL
328 j_sg_cpu->util_cfs = 0;
329 if (j_sg_cpu->util_dl == 0)
330 continue;
21ca6d2c 331 }
d4edd662 332 if (j_sg_cpu->flags & SCHED_CPUFREQ_RT)
cba1dfb5 333 return policy->cpuinfo.max_freq;
9bdcb44e
RW
334
335 j_max = j_sg_cpu->max;
d18be45d 336 j_util = sugov_aggregate_util(j_sg_cpu);
9bdcb44e
RW
337 if (j_util * max > j_max * util) {
338 util = j_util;
339 max = j_max;
340 }
21ca6d2c
RW
341
342 sugov_iowait_boost(j_sg_cpu, &util, &max);
9bdcb44e
RW
343 }
344
655cb1eb 345 return get_next_freq(sg_policy, util, max);
9bdcb44e
RW
346}
347
348static void sugov_update_shared(struct update_util_data *hook, u64 time,
58919e83 349 unsigned int flags)
9bdcb44e
RW
350{
351 struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
352 struct sugov_policy *sg_policy = sg_cpu->sg_policy;
353 unsigned int next_f;
354
355 raw_spin_lock(&sg_policy->update_lock);
356
d18be45d 357 sugov_get_util(sg_cpu);
58919e83 358 sg_cpu->flags = flags;
21ca6d2c 359
5083452f 360 sugov_set_iowait_boost(sg_cpu, time);
9bdcb44e
RW
361 sg_cpu->last_update = time;
362
363 if (sugov_should_update_freq(sg_policy, time)) {
d4edd662 364 if (flags & SCHED_CPUFREQ_RT)
cba1dfb5
VK
365 next_f = sg_policy->policy->cpuinfo.max_freq;
366 else
d86ab9cf 367 next_f = sugov_next_freq_shared(sg_cpu, time);
cba1dfb5 368
9bdcb44e
RW
369 sugov_update_commit(sg_policy, time, next_f);
370 }
371
372 raw_spin_unlock(&sg_policy->update_lock);
373}
374
02a7b1ee 375static void sugov_work(struct kthread_work *work)
9bdcb44e
RW
376{
377 struct sugov_policy *sg_policy = container_of(work, struct sugov_policy, work);
378
379 mutex_lock(&sg_policy->work_lock);
380 __cpufreq_driver_target(sg_policy->policy, sg_policy->next_freq,
381 CPUFREQ_RELATION_L);
382 mutex_unlock(&sg_policy->work_lock);
383
384 sg_policy->work_in_progress = false;
385}
386
387static void sugov_irq_work(struct irq_work *irq_work)
388{
389 struct sugov_policy *sg_policy;
390
391 sg_policy = container_of(irq_work, struct sugov_policy, irq_work);
02a7b1ee
VK
392
393 /*
d4edd662
JL
394 * For RT tasks, the schedutil governor shoots the frequency to maximum.
395 * Special care must be taken to ensure that this kthread doesn't result
396 * in the same behavior.
02a7b1ee
VK
397 *
398 * This is (mostly) guaranteed by the work_in_progress flag. The flag is
d06e622d
VK
399 * updated only at the end of the sugov_work() function and before that
400 * the schedutil governor rejects all other frequency scaling requests.
02a7b1ee 401 *
d06e622d 402 * There is a very rare case though, where the RT thread yields right
02a7b1ee
VK
403 * after the work_in_progress flag is cleared. The effects of that are
404 * neglected for now.
405 */
406 kthread_queue_work(&sg_policy->worker, &sg_policy->work);
9bdcb44e
RW
407}
408
409/************************** sysfs interface ************************/
410
411static struct sugov_tunables *global_tunables;
412static DEFINE_MUTEX(global_tunables_lock);
413
414static inline struct sugov_tunables *to_sugov_tunables(struct gov_attr_set *attr_set)
415{
416 return container_of(attr_set, struct sugov_tunables, attr_set);
417}
418
419static ssize_t rate_limit_us_show(struct gov_attr_set *attr_set, char *buf)
420{
421 struct sugov_tunables *tunables = to_sugov_tunables(attr_set);
422
423 return sprintf(buf, "%u\n", tunables->rate_limit_us);
424}
425
426static ssize_t rate_limit_us_store(struct gov_attr_set *attr_set, const char *buf,
427 size_t count)
428{
429 struct sugov_tunables *tunables = to_sugov_tunables(attr_set);
430 struct sugov_policy *sg_policy;
431 unsigned int rate_limit_us;
432
433 if (kstrtouint(buf, 10, &rate_limit_us))
434 return -EINVAL;
435
436 tunables->rate_limit_us = rate_limit_us;
437
438 list_for_each_entry(sg_policy, &attr_set->policy_list, tunables_hook)
439 sg_policy->freq_update_delay_ns = rate_limit_us * NSEC_PER_USEC;
440
441 return count;
442}
443
444static struct governor_attr rate_limit_us = __ATTR_RW(rate_limit_us);
445
446static struct attribute *sugov_attributes[] = {
447 &rate_limit_us.attr,
448 NULL
449};
450
451static struct kobj_type sugov_tunables_ktype = {
452 .default_attrs = sugov_attributes,
453 .sysfs_ops = &governor_sysfs_ops,
454};
455
456/********************** cpufreq governor interface *********************/
457
458static struct cpufreq_governor schedutil_gov;
459
460static struct sugov_policy *sugov_policy_alloc(struct cpufreq_policy *policy)
461{
462 struct sugov_policy *sg_policy;
463
464 sg_policy = kzalloc(sizeof(*sg_policy), GFP_KERNEL);
465 if (!sg_policy)
466 return NULL;
467
468 sg_policy->policy = policy;
9bdcb44e
RW
469 raw_spin_lock_init(&sg_policy->update_lock);
470 return sg_policy;
471}
472
473static void sugov_policy_free(struct sugov_policy *sg_policy)
474{
9bdcb44e
RW
475 kfree(sg_policy);
476}
477
02a7b1ee
VK
478static int sugov_kthread_create(struct sugov_policy *sg_policy)
479{
480 struct task_struct *thread;
794a56eb
JL
481 struct sched_attr attr = {
482 .size = sizeof(struct sched_attr),
483 .sched_policy = SCHED_DEADLINE,
484 .sched_flags = SCHED_FLAG_SUGOV,
485 .sched_nice = 0,
486 .sched_priority = 0,
487 /*
488 * Fake (unused) bandwidth; workaround to "fix"
489 * priority inheritance.
490 */
491 .sched_runtime = 1000000,
492 .sched_deadline = 10000000,
493 .sched_period = 10000000,
494 };
02a7b1ee
VK
495 struct cpufreq_policy *policy = sg_policy->policy;
496 int ret;
497
498 /* kthread only required for slow path */
499 if (policy->fast_switch_enabled)
500 return 0;
501
502 kthread_init_work(&sg_policy->work, sugov_work);
503 kthread_init_worker(&sg_policy->worker);
504 thread = kthread_create(kthread_worker_fn, &sg_policy->worker,
505 "sugov:%d",
506 cpumask_first(policy->related_cpus));
507 if (IS_ERR(thread)) {
508 pr_err("failed to create sugov thread: %ld\n", PTR_ERR(thread));
509 return PTR_ERR(thread);
510 }
511
794a56eb 512 ret = sched_setattr_nocheck(thread, &attr);
02a7b1ee
VK
513 if (ret) {
514 kthread_stop(thread);
794a56eb 515 pr_warn("%s: failed to set SCHED_DEADLINE\n", __func__);
02a7b1ee
VK
516 return ret;
517 }
518
519 sg_policy->thread = thread;
e2cabe48
VK
520
521 /* Kthread is bound to all CPUs by default */
522 if (!policy->dvfs_possible_from_any_cpu)
523 kthread_bind_mask(thread, policy->related_cpus);
524
21ef5729
VK
525 init_irq_work(&sg_policy->irq_work, sugov_irq_work);
526 mutex_init(&sg_policy->work_lock);
527
02a7b1ee
VK
528 wake_up_process(thread);
529
530 return 0;
531}
532
533static void sugov_kthread_stop(struct sugov_policy *sg_policy)
534{
535 /* kthread only required for slow path */
536 if (sg_policy->policy->fast_switch_enabled)
537 return;
538
539 kthread_flush_worker(&sg_policy->worker);
540 kthread_stop(sg_policy->thread);
21ef5729 541 mutex_destroy(&sg_policy->work_lock);
02a7b1ee
VK
542}
543
9bdcb44e
RW
544static struct sugov_tunables *sugov_tunables_alloc(struct sugov_policy *sg_policy)
545{
546 struct sugov_tunables *tunables;
547
548 tunables = kzalloc(sizeof(*tunables), GFP_KERNEL);
549 if (tunables) {
550 gov_attr_set_init(&tunables->attr_set, &sg_policy->tunables_hook);
551 if (!have_governor_per_policy())
552 global_tunables = tunables;
553 }
554 return tunables;
555}
556
557static void sugov_tunables_free(struct sugov_tunables *tunables)
558{
559 if (!have_governor_per_policy())
560 global_tunables = NULL;
561
562 kfree(tunables);
563}
564
565static int sugov_init(struct cpufreq_policy *policy)
566{
567 struct sugov_policy *sg_policy;
568 struct sugov_tunables *tunables;
9bdcb44e
RW
569 int ret = 0;
570
571 /* State should be equivalent to EXIT */
572 if (policy->governor_data)
573 return -EBUSY;
574
4a71ce43
VK
575 cpufreq_enable_fast_switch(policy);
576
9bdcb44e 577 sg_policy = sugov_policy_alloc(policy);
4a71ce43
VK
578 if (!sg_policy) {
579 ret = -ENOMEM;
580 goto disable_fast_switch;
581 }
9bdcb44e 582
02a7b1ee
VK
583 ret = sugov_kthread_create(sg_policy);
584 if (ret)
585 goto free_sg_policy;
586
9bdcb44e
RW
587 mutex_lock(&global_tunables_lock);
588
589 if (global_tunables) {
590 if (WARN_ON(have_governor_per_policy())) {
591 ret = -EINVAL;
02a7b1ee 592 goto stop_kthread;
9bdcb44e
RW
593 }
594 policy->governor_data = sg_policy;
595 sg_policy->tunables = global_tunables;
596
597 gov_attr_set_get(&global_tunables->attr_set, &sg_policy->tunables_hook);
598 goto out;
599 }
600
601 tunables = sugov_tunables_alloc(sg_policy);
602 if (!tunables) {
603 ret = -ENOMEM;
02a7b1ee 604 goto stop_kthread;
9bdcb44e
RW
605 }
606
aa7519af 607 tunables->rate_limit_us = cpufreq_policy_transition_delay_us(policy);
9bdcb44e
RW
608
609 policy->governor_data = sg_policy;
610 sg_policy->tunables = tunables;
611
612 ret = kobject_init_and_add(&tunables->attr_set.kobj, &sugov_tunables_ktype,
613 get_governor_parent_kobj(policy), "%s",
614 schedutil_gov.name);
615 if (ret)
616 goto fail;
617
8e2ddb03 618out:
9bdcb44e 619 mutex_unlock(&global_tunables_lock);
9bdcb44e
RW
620 return 0;
621
8e2ddb03 622fail:
9bdcb44e
RW
623 policy->governor_data = NULL;
624 sugov_tunables_free(tunables);
625
02a7b1ee
VK
626stop_kthread:
627 sugov_kthread_stop(sg_policy);
628
8e2ddb03 629free_sg_policy:
9bdcb44e
RW
630 mutex_unlock(&global_tunables_lock);
631
632 sugov_policy_free(sg_policy);
4a71ce43
VK
633
634disable_fast_switch:
635 cpufreq_disable_fast_switch(policy);
636
60f05e86 637 pr_err("initialization failed (error %d)\n", ret);
9bdcb44e
RW
638 return ret;
639}
640
e788892b 641static void sugov_exit(struct cpufreq_policy *policy)
9bdcb44e
RW
642{
643 struct sugov_policy *sg_policy = policy->governor_data;
644 struct sugov_tunables *tunables = sg_policy->tunables;
645 unsigned int count;
646
647 mutex_lock(&global_tunables_lock);
648
649 count = gov_attr_set_put(&tunables->attr_set, &sg_policy->tunables_hook);
650 policy->governor_data = NULL;
651 if (!count)
652 sugov_tunables_free(tunables);
653
654 mutex_unlock(&global_tunables_lock);
655
02a7b1ee 656 sugov_kthread_stop(sg_policy);
9bdcb44e 657 sugov_policy_free(sg_policy);
4a71ce43 658 cpufreq_disable_fast_switch(policy);
9bdcb44e
RW
659}
660
661static int sugov_start(struct cpufreq_policy *policy)
662{
663 struct sugov_policy *sg_policy = policy->governor_data;
664 unsigned int cpu;
665
666 sg_policy->freq_update_delay_ns = sg_policy->tunables->rate_limit_us * NSEC_PER_USEC;
667 sg_policy->last_freq_update_time = 0;
668 sg_policy->next_freq = UINT_MAX;
669 sg_policy->work_in_progress = false;
670 sg_policy->need_freq_update = false;
6c4f0fa6 671 sg_policy->cached_raw_freq = 0;
9bdcb44e
RW
672
673 for_each_cpu(cpu, policy->cpus) {
674 struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu);
675
4296f23e 676 memset(sg_cpu, 0, sizeof(*sg_cpu));
d62d813c 677 sg_cpu->cpu = cpu;
9bdcb44e 678 sg_cpu->sg_policy = sg_policy;
6257e704 679 sg_cpu->flags = 0;
4296f23e 680 sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq;
ab2f7cf1
VM
681 }
682
683 for_each_cpu(cpu, policy->cpus) {
684 struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu);
685
4296f23e
RW
686 cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util,
687 policy_is_shared(policy) ?
688 sugov_update_shared :
689 sugov_update_single);
9bdcb44e
RW
690 }
691 return 0;
692}
693
e788892b 694static void sugov_stop(struct cpufreq_policy *policy)
9bdcb44e
RW
695{
696 struct sugov_policy *sg_policy = policy->governor_data;
697 unsigned int cpu;
698
699 for_each_cpu(cpu, policy->cpus)
700 cpufreq_remove_update_util_hook(cpu);
701
702 synchronize_sched();
703
21ef5729
VK
704 if (!policy->fast_switch_enabled) {
705 irq_work_sync(&sg_policy->irq_work);
706 kthread_cancel_work_sync(&sg_policy->work);
707 }
9bdcb44e
RW
708}
709
e788892b 710static void sugov_limits(struct cpufreq_policy *policy)
9bdcb44e
RW
711{
712 struct sugov_policy *sg_policy = policy->governor_data;
713
714 if (!policy->fast_switch_enabled) {
715 mutex_lock(&sg_policy->work_lock);
bf2be2de 716 cpufreq_policy_apply_limits(policy);
9bdcb44e
RW
717 mutex_unlock(&sg_policy->work_lock);
718 }
719
720 sg_policy->need_freq_update = true;
9bdcb44e
RW
721}
722
723static struct cpufreq_governor schedutil_gov = {
724 .name = "schedutil",
9bdcb44e 725 .owner = THIS_MODULE,
560c6e45 726 .dynamic_switching = true,
e788892b
RW
727 .init = sugov_init,
728 .exit = sugov_exit,
729 .start = sugov_start,
730 .stop = sugov_stop,
731 .limits = sugov_limits,
9bdcb44e
RW
732};
733
9bdcb44e
RW
734#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL
735struct cpufreq_governor *cpufreq_default_governor(void)
736{
737 return &schedutil_gov;
738}
9bdcb44e 739#endif
58919e83
RW
740
741static int __init sugov_register(void)
742{
743 return cpufreq_register_governor(&schedutil_gov);
744}
745fs_initcall(sugov_register);