afs: Provide a splice-read wrapper
[linux-block.git] / drivers / cpufreq / cpufreq_ondemand.c
CommitLineData
d2912cb1 1// SPDX-License-Identifier: GPL-2.0-only
1da177e4
LT
2/*
3 * drivers/cpufreq/cpufreq_ondemand.c
4 *
5 * Copyright (C) 2001 Russell King
6 * (C) 2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>.
7 * Jun Nakajima <jun.nakajima@intel.com>
1da177e4
LT
8 */
9
4471a34f
VK
10#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
11
5ff0a268 12#include <linux/cpu.h>
4471a34f 13#include <linux/percpu-defs.h>
4d5dcc42 14#include <linux/slab.h>
80800913 15#include <linux/tick.h>
55687da1 16#include <linux/sched/cpufreq.h>
7d5a9956
RW
17
18#include "cpufreq_ondemand.h"
1da177e4 19
06eb09d1 20/* On-demand governor macros */
1da177e4 21#define DEF_FREQUENCY_UP_THRESHOLD (80)
3f78a9f7
DN
22#define DEF_SAMPLING_DOWN_FACTOR (1)
23#define MAX_SAMPLING_DOWN_FACTOR (100000)
80800913 24#define MICRO_FREQUENCY_UP_THRESHOLD (95)
cef9615a 25#define MICRO_FREQUENCY_MIN_SAMPLE_RATE (10000)
4dd63b49 26#define MIN_FREQUENCY_UP_THRESHOLD (1)
1da177e4
LT
27#define MAX_FREQUENCY_UP_THRESHOLD (100)
28
fb30809e
JS
29static struct od_ops od_ops;
30
c2837558
JS
31static unsigned int default_powersave_bias;
32
4471a34f
VK
33/*
34 * Not all CPUs want IO time to be accounted as busy; this depends on how
35 * efficient idling at a higher frequency/voltage is.
36 * Pavel Machek says this is not so for various generations of AMD and old
37 * Intel systems.
06eb09d1 38 * Mike Chan (android.com) claims this is also not true for ARM.
4471a34f
VK
39 * Because of this, whitelist specific known (series) of CPUs by default, and
40 * leave all others up to the user.
41 */
42static int should_io_be_busy(void)
43{
44#if defined(CONFIG_X86)
45 /*
06eb09d1 46 * For Intel, Core 2 (model 15) and later have an efficient idle.
4471a34f
VK
47 */
48 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
49 boot_cpu_data.x86 == 6 &&
50 boot_cpu_data.x86_model >= 15)
51 return 1;
52#endif
53 return 0;
6b8fcd90
AV
54}
55
05ca0350
AS
56/*
57 * Find right freq to be set now with powersave_bias on.
07aa4402
RW
58 * Returns the freq_hi to be used right now and will set freq_hi_delay_us,
59 * freq_lo, and freq_lo_delay_us in percpu area for averaging freqs.
05ca0350 60 */
fb30809e 61static unsigned int generic_powersave_bias_target(struct cpufreq_policy *policy,
4471a34f 62 unsigned int freq_next, unsigned int relation)
05ca0350
AS
63{
64 unsigned int freq_req, freq_reduc, freq_avg;
65 unsigned int freq_hi, freq_lo;
d218ed77 66 unsigned int index;
07aa4402 67 unsigned int delay_hi_us;
bc505475 68 struct policy_dbs_info *policy_dbs = policy->governor_data;
7d5a9956 69 struct od_policy_dbs_info *dbs_info = to_dbs_info(policy_dbs);
bc505475 70 struct dbs_data *dbs_data = policy_dbs->dbs_data;
4d5dcc42 71 struct od_dbs_tuners *od_tuners = dbs_data->tuners;
34ac5d7a 72 struct cpufreq_frequency_table *freq_table = policy->freq_table;
05ca0350 73
34ac5d7a 74 if (!freq_table) {
05ca0350 75 dbs_info->freq_lo = 0;
07aa4402 76 dbs_info->freq_lo_delay_us = 0;
05ca0350
AS
77 return freq_next;
78 }
79
d218ed77 80 index = cpufreq_frequency_table_target(policy, freq_next, relation);
34ac5d7a 81 freq_req = freq_table[index].frequency;
4d5dcc42 82 freq_reduc = freq_req * od_tuners->powersave_bias / 1000;
05ca0350
AS
83 freq_avg = freq_req - freq_reduc;
84
85 /* Find freq bounds for freq_avg in freq_table */
1f39fa0d
VD
86 index = cpufreq_table_find_index_h(policy, freq_avg,
87 relation & CPUFREQ_RELATION_E);
34ac5d7a 88 freq_lo = freq_table[index].frequency;
1f39fa0d
VD
89 index = cpufreq_table_find_index_l(policy, freq_avg,
90 relation & CPUFREQ_RELATION_E);
34ac5d7a 91 freq_hi = freq_table[index].frequency;
05ca0350
AS
92
93 /* Find out how long we have to be in hi and lo freqs */
94 if (freq_hi == freq_lo) {
95 dbs_info->freq_lo = 0;
07aa4402 96 dbs_info->freq_lo_delay_us = 0;
05ca0350
AS
97 return freq_lo;
98 }
07aa4402
RW
99 delay_hi_us = (freq_avg - freq_lo) * dbs_data->sampling_rate;
100 delay_hi_us += (freq_hi - freq_lo) / 2;
101 delay_hi_us /= freq_hi - freq_lo;
102 dbs_info->freq_hi_delay_us = delay_hi_us;
05ca0350 103 dbs_info->freq_lo = freq_lo;
07aa4402 104 dbs_info->freq_lo_delay_us = dbs_data->sampling_rate - delay_hi_us;
05ca0350
AS
105 return freq_hi;
106}
107
d1db75ff 108static void ondemand_powersave_bias_init(struct cpufreq_policy *policy)
05ca0350 109{
7d5a9956 110 struct od_policy_dbs_info *dbs_info = to_dbs_info(policy->governor_data);
d1db75ff 111
d1db75ff 112 dbs_info->freq_lo = 0;
05ca0350
AS
113}
114
3a3e9e06 115static void dbs_freq_increase(struct cpufreq_policy *policy, unsigned int freq)
4471a34f 116{
bc505475
RW
117 struct policy_dbs_info *policy_dbs = policy->governor_data;
118 struct dbs_data *dbs_data = policy_dbs->dbs_data;
4d5dcc42
VK
119 struct od_dbs_tuners *od_tuners = dbs_data->tuners;
120
121 if (od_tuners->powersave_bias)
3a3e9e06 122 freq = od_ops.powersave_bias_target(policy, freq,
b894d20e 123 CPUFREQ_RELATION_HE);
3a3e9e06 124 else if (policy->cur == policy->max)
4471a34f 125 return;
0e625ac1 126
3a3e9e06 127 __cpufreq_driver_target(policy, freq, od_tuners->powersave_bias ?
b894d20e 128 CPUFREQ_RELATION_LE : CPUFREQ_RELATION_HE);
4471a34f
VK
129}
130
131/*
132 * Every sampling_rate, we check, if current idle time is less than 20%
dfa5bb62
SK
133 * (default), then we try to increase frequency. Else, we adjust the frequency
134 * proportional to load.
4471a34f 135 */
4cccf755 136static void od_update(struct cpufreq_policy *policy)
1da177e4 137{
7d5a9956
RW
138 struct policy_dbs_info *policy_dbs = policy->governor_data;
139 struct od_policy_dbs_info *dbs_info = to_dbs_info(policy_dbs);
bc505475 140 struct dbs_data *dbs_data = policy_dbs->dbs_data;
4d5dcc42 141 struct od_dbs_tuners *od_tuners = dbs_data->tuners;
4cccf755 142 unsigned int load = dbs_update(policy);
4471a34f
VK
143
144 dbs_info->freq_lo = 0;
145
146 /* Check for frequency increase */
ff4b1789 147 if (load > dbs_data->up_threshold) {
4471a34f
VK
148 /* If switching to max speed, apply sampling_down_factor */
149 if (policy->cur < policy->max)
57dc3bcd 150 policy_dbs->rate_mult = dbs_data->sampling_down_factor;
4471a34f 151 dbs_freq_increase(policy, policy->max);
dfa5bb62
SK
152 } else {
153 /* Calculate the next frequency proportional to load */
6393d6a1
SK
154 unsigned int freq_next, min_f, max_f;
155
156 min_f = policy->cpuinfo.min_freq;
157 max_f = policy->cpuinfo.max_freq;
158 freq_next = min_f + load * (max_f - min_f) / 100;
4471a34f
VK
159
160 /* No longer fully busy, reset rate_mult */
57dc3bcd 161 policy_dbs->rate_mult = 1;
4471a34f 162
a7f35cff
RW
163 if (od_tuners->powersave_bias)
164 freq_next = od_ops.powersave_bias_target(policy,
165 freq_next,
b894d20e 166 CPUFREQ_RELATION_LE);
a7f35cff 167
b894d20e 168 __cpufreq_driver_target(policy, freq_next, CPUFREQ_RELATION_CE);
4471a34f 169 }
1da177e4
LT
170}
171
26f0dbc9 172static unsigned int od_dbs_update(struct cpufreq_policy *policy)
4471a34f 173{
bc505475
RW
174 struct policy_dbs_info *policy_dbs = policy->governor_data;
175 struct dbs_data *dbs_data = policy_dbs->dbs_data;
7d5a9956 176 struct od_policy_dbs_info *dbs_info = to_dbs_info(policy_dbs);
6e96c5b3 177 int sample_type = dbs_info->sample_type;
4447266b 178
4471a34f 179 /* Common NORMAL_SAMPLE setup */
43e0ee36 180 dbs_info->sample_type = OD_NORMAL_SAMPLE;
4cccf755
RW
181 /*
182 * OD_SUB_SAMPLE doesn't make sense if sample_delay_ns is 0, so ignore
183 * it then.
184 */
185 if (sample_type == OD_SUB_SAMPLE && policy_dbs->sample_delay_ns > 0) {
43e0ee36 186 __cpufreq_driver_target(policy, dbs_info->freq_lo,
b894d20e 187 CPUFREQ_RELATION_HE);
07aa4402 188 return dbs_info->freq_lo_delay_us;
6e96c5b3
RW
189 }
190
191 od_update(policy);
192
193 if (dbs_info->freq_lo) {
26f0dbc9 194 /* Setup SUB_SAMPLE */
6e96c5b3 195 dbs_info->sample_type = OD_SUB_SAMPLE;
07aa4402 196 return dbs_info->freq_hi_delay_us;
4471a34f
VK
197 }
198
07aa4402 199 return dbs_data->sampling_rate * policy_dbs->rate_mult;
da53d61e
FB
200}
201
4471a34f 202/************************** sysfs interface ************************/
7bdad34d 203static struct dbs_governor od_dbs_gov;
1da177e4 204
85750bcd 205static ssize_t io_is_busy_store(struct gov_attr_set *attr_set, const char *buf,
0dd3c1d6 206 size_t count)
19379b11 207{
0dd3c1d6 208 struct dbs_data *dbs_data = to_dbs_data(attr_set);
19379b11
AV
209 unsigned int input;
210 int ret;
211
212 ret = sscanf(buf, "%u", &input);
213 if (ret != 1)
214 return -EINVAL;
8847e038 215 dbs_data->io_is_busy = !!input;
9366d840
SK
216
217 /* we need to re-evaluate prev_cpu_idle */
8c8f77fd 218 gov_update_cpu_data(dbs_data);
a33cce1c 219
19379b11
AV
220 return count;
221}
222
85750bcd 223static ssize_t up_threshold_store(struct gov_attr_set *attr_set,
0dd3c1d6 224 const char *buf, size_t count)
1da177e4 225{
0dd3c1d6 226 struct dbs_data *dbs_data = to_dbs_data(attr_set);
1da177e4
LT
227 unsigned int input;
228 int ret;
ffac80e9 229 ret = sscanf(buf, "%u", &input);
1da177e4 230
32ee8c3e 231 if (ret != 1 || input > MAX_FREQUENCY_UP_THRESHOLD ||
c29f1403 232 input < MIN_FREQUENCY_UP_THRESHOLD) {
1da177e4
LT
233 return -EINVAL;
234 }
4bd4e428 235
ff4b1789 236 dbs_data->up_threshold = input;
1da177e4
LT
237 return count;
238}
239
85750bcd 240static ssize_t sampling_down_factor_store(struct gov_attr_set *attr_set,
0dd3c1d6 241 const char *buf, size_t count)
3f78a9f7 242{
0dd3c1d6 243 struct dbs_data *dbs_data = to_dbs_data(attr_set);
57dc3bcd
RW
244 struct policy_dbs_info *policy_dbs;
245 unsigned int input;
3f78a9f7
DN
246 int ret;
247 ret = sscanf(buf, "%u", &input);
248
249 if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1)
250 return -EINVAL;
57dc3bcd 251
ff4b1789 252 dbs_data->sampling_down_factor = input;
3f78a9f7
DN
253
254 /* Reset down sampling multiplier in case it was active */
0dd3c1d6 255 list_for_each_entry(policy_dbs, &attr_set->policy_list, list) {
57dc3bcd
RW
256 /*
257 * Doing this without locking might lead to using different
26f0dbc9 258 * rate_mult values in od_update() and od_dbs_update().
57dc3bcd 259 */
26f0dbc9 260 mutex_lock(&policy_dbs->update_mutex);
57dc3bcd 261 policy_dbs->rate_mult = 1;
26f0dbc9 262 mutex_unlock(&policy_dbs->update_mutex);
3f78a9f7 263 }
57dc3bcd 264
3f78a9f7
DN
265 return count;
266}
267
85750bcd 268static ssize_t ignore_nice_load_store(struct gov_attr_set *attr_set,
0dd3c1d6 269 const char *buf, size_t count)
3d5ee9e5 270{
0dd3c1d6 271 struct dbs_data *dbs_data = to_dbs_data(attr_set);
3d5ee9e5
DJ
272 unsigned int input;
273 int ret;
274
ffac80e9 275 ret = sscanf(buf, "%u", &input);
2b03f891 276 if (ret != 1)
3d5ee9e5
DJ
277 return -EINVAL;
278
2b03f891 279 if (input > 1)
3d5ee9e5 280 input = 1;
32ee8c3e 281
ff4b1789 282 if (input == dbs_data->ignore_nice_load) { /* nothing to do */
3d5ee9e5
DJ
283 return count;
284 }
ff4b1789 285 dbs_data->ignore_nice_load = input;
3d5ee9e5 286
ccb2fe20 287 /* we need to re-evaluate prev_cpu_idle */
8c8f77fd 288 gov_update_cpu_data(dbs_data);
1ca3abdb 289
3d5ee9e5
DJ
290 return count;
291}
292
85750bcd 293static ssize_t powersave_bias_store(struct gov_attr_set *attr_set,
0dd3c1d6 294 const char *buf, size_t count)
05ca0350 295{
0dd3c1d6 296 struct dbs_data *dbs_data = to_dbs_data(attr_set);
4d5dcc42 297 struct od_dbs_tuners *od_tuners = dbs_data->tuners;
d1db75ff 298 struct policy_dbs_info *policy_dbs;
05ca0350
AS
299 unsigned int input;
300 int ret;
301 ret = sscanf(buf, "%u", &input);
302
303 if (ret != 1)
304 return -EINVAL;
305
306 if (input > 1000)
307 input = 1000;
308
4d5dcc42 309 od_tuners->powersave_bias = input;
d1db75ff 310
0dd3c1d6 311 list_for_each_entry(policy_dbs, &attr_set->policy_list, list)
d1db75ff
RW
312 ondemand_powersave_bias_init(policy_dbs->policy);
313
05ca0350
AS
314 return count;
315}
316
c4435630
VK
317gov_show_one_common(sampling_rate);
318gov_show_one_common(up_threshold);
319gov_show_one_common(sampling_down_factor);
320gov_show_one_common(ignore_nice_load);
8847e038 321gov_show_one_common(io_is_busy);
c4435630
VK
322gov_show_one(od, powersave_bias);
323
324gov_attr_rw(sampling_rate);
325gov_attr_rw(io_is_busy);
326gov_attr_rw(up_threshold);
327gov_attr_rw(sampling_down_factor);
328gov_attr_rw(ignore_nice_load);
329gov_attr_rw(powersave_bias);
c4435630 330
fe262d5c 331static struct attribute *od_attrs[] = {
c4435630
VK
332 &sampling_rate.attr,
333 &up_threshold.attr,
334 &sampling_down_factor.attr,
335 &ignore_nice_load.attr,
336 &powersave_bias.attr,
337 &io_is_busy.attr,
1da177e4
LT
338 NULL
339};
fe262d5c 340ATTRIBUTE_GROUPS(od);
1da177e4 341
1da177e4
LT
342/************************** sysfs end ************************/
343
7d5a9956
RW
344static struct policy_dbs_info *od_alloc(void)
345{
346 struct od_policy_dbs_info *dbs_info;
347
348 dbs_info = kzalloc(sizeof(*dbs_info), GFP_KERNEL);
349 return dbs_info ? &dbs_info->policy_dbs : NULL;
350}
351
352static void od_free(struct policy_dbs_info *policy_dbs)
353{
354 kfree(to_dbs_info(policy_dbs));
355}
356
9a15fb2c 357static int od_init(struct dbs_data *dbs_data)
4d5dcc42
VK
358{
359 struct od_dbs_tuners *tuners;
360 u64 idle_time;
361 int cpu;
362
d5b73cd8 363 tuners = kzalloc(sizeof(*tuners), GFP_KERNEL);
a69d6b29 364 if (!tuners)
4d5dcc42 365 return -ENOMEM;
4d5dcc42
VK
366
367 cpu = get_cpu();
368 idle_time = get_cpu_idle_time_us(cpu, NULL);
369 put_cpu();
370 if (idle_time != -1ULL) {
371 /* Idle micro accounting is supported. Use finer thresholds */
ff4b1789 372 dbs_data->up_threshold = MICRO_FREQUENCY_UP_THRESHOLD;
4d5dcc42 373 } else {
ff4b1789 374 dbs_data->up_threshold = DEF_FREQUENCY_UP_THRESHOLD;
4d5dcc42
VK
375 }
376
ff4b1789
VK
377 dbs_data->sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR;
378 dbs_data->ignore_nice_load = 0;
c2837558 379 tuners->powersave_bias = default_powersave_bias;
8847e038 380 dbs_data->io_is_busy = should_io_be_busy();
4d5dcc42
VK
381
382 dbs_data->tuners = tuners;
4d5dcc42
VK
383 return 0;
384}
385
9a15fb2c 386static void od_exit(struct dbs_data *dbs_data)
4d5dcc42
VK
387{
388 kfree(dbs_data->tuners);
389}
390
702c9e54
RW
391static void od_start(struct cpufreq_policy *policy)
392{
7d5a9956 393 struct od_policy_dbs_info *dbs_info = to_dbs_info(policy->governor_data);
702c9e54
RW
394
395 dbs_info->sample_type = OD_NORMAL_SAMPLE;
d1db75ff 396 ondemand_powersave_bias_init(policy);
702c9e54
RW
397}
398
4471a34f 399static struct od_ops od_ops = {
fb30809e 400 .powersave_bias_target = generic_powersave_bias_target,
4471a34f 401};
2f8a835c 402
7bdad34d 403static struct dbs_governor od_dbs_gov = {
e788892b 404 .gov = CPUFREQ_DBS_GOVERNOR_INITIALIZER("ondemand"),
fe262d5c 405 .kobj_type = { .default_groups = od_groups },
26f0dbc9 406 .gov_dbs_update = od_dbs_update,
7d5a9956
RW
407 .alloc = od_alloc,
408 .free = od_free,
4d5dcc42
VK
409 .init = od_init,
410 .exit = od_exit,
702c9e54 411 .start = od_start,
4471a34f 412};
1da177e4 413
10dd8573 414#define CPU_FREQ_GOV_ONDEMAND (od_dbs_gov.gov)
af926185 415
fb30809e
JS
416static void od_set_powersave_bias(unsigned int powersave_bias)
417{
fb30809e 418 unsigned int cpu;
3e5c04f9
ZL
419 cpumask_var_t done;
420
421 if (!alloc_cpumask_var(&done, GFP_KERNEL))
422 return;
fb30809e 423
c2837558 424 default_powersave_bias = powersave_bias;
3e5c04f9 425 cpumask_clear(done);
fb30809e 426
09681a07 427 cpus_read_lock();
fb30809e 428 for_each_online_cpu(cpu) {
8c8f77fd 429 struct cpufreq_policy *policy;
e40e7b25 430 struct policy_dbs_info *policy_dbs;
8c8f77fd
RW
431 struct dbs_data *dbs_data;
432 struct od_dbs_tuners *od_tuners;
44152cb8 433
3e5c04f9 434 if (cpumask_test_cpu(cpu, done))
fb30809e
JS
435 continue;
436
8c8f77fd 437 policy = cpufreq_cpu_get_raw(cpu);
10dd8573 438 if (!policy || policy->governor != &CPU_FREQ_GOV_ONDEMAND)
8c8f77fd
RW
439 continue;
440
441 policy_dbs = policy->governor_data;
e40e7b25 442 if (!policy_dbs)
c2837558 443 continue;
fb30809e 444
3e5c04f9 445 cpumask_or(done, done, policy->cpus);
c2837558 446
bc505475 447 dbs_data = policy_dbs->dbs_data;
c2837558
JS
448 od_tuners = dbs_data->tuners;
449 od_tuners->powersave_bias = default_powersave_bias;
fb30809e 450 }
09681a07 451 cpus_read_unlock();
3e5c04f9
ZL
452
453 free_cpumask_var(done);
fb30809e
JS
454}
455
456void od_register_powersave_bias_handler(unsigned int (*f)
457 (struct cpufreq_policy *, unsigned int, unsigned int),
458 unsigned int powersave_bias)
459{
460 od_ops.powersave_bias_target = f;
461 od_set_powersave_bias(powersave_bias);
462}
463EXPORT_SYMBOL_GPL(od_register_powersave_bias_handler);
464
465void od_unregister_powersave_bias_handler(void)
466{
467 od_ops.powersave_bias_target = generic_powersave_bias_target;
468 od_set_powersave_bias(0);
469}
470EXPORT_SYMBOL_GPL(od_unregister_powersave_bias_handler);
471
ffac80e9
VP
472MODULE_AUTHOR("Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>");
473MODULE_AUTHOR("Alexey Starikovskiy <alexey.y.starikovskiy@intel.com>");
474MODULE_DESCRIPTION("'cpufreq_ondemand' - A dynamic cpufreq governor for "
2b03f891 475 "Low Latency Frequency Transition capable processors");
ffac80e9 476MODULE_LICENSE("GPL");
1da177e4 477
6915719b 478#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND
de1df26b
RW
479struct cpufreq_governor *cpufreq_default_governor(void)
480{
10dd8573 481 return &CPU_FREQ_GOV_ONDEMAND;
de1df26b 482}
6915719b 483#endif
10dd8573
QP
484
485cpufreq_governor_init(CPU_FREQ_GOV_ONDEMAND);
486cpufreq_governor_exit(CPU_FREQ_GOV_ONDEMAND);