Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * drivers/cpufreq/cpufreq_ondemand.c | |
3 | * | |
4 | * Copyright (C) 2001 Russell King | |
5 | * (C) 2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>. | |
6 | * Jun Nakajima <jun.nakajima@intel.com> | |
7 | * | |
8 | * This program is free software; you can redistribute it and/or modify | |
9 | * it under the terms of the GNU General Public License version 2 as | |
10 | * published by the Free Software Foundation. | |
11 | */ | |
12 | ||
13 | #include <linux/kernel.h> | |
14 | #include <linux/module.h> | |
15 | #include <linux/smp.h> | |
16 | #include <linux/init.h> | |
17 | #include <linux/interrupt.h> | |
18 | #include <linux/ctype.h> | |
19 | #include <linux/cpufreq.h> | |
20 | #include <linux/sysctl.h> | |
21 | #include <linux/types.h> | |
22 | #include <linux/fs.h> | |
23 | #include <linux/sysfs.h> | |
24 | #include <linux/sched.h> | |
25 | #include <linux/kmod.h> | |
26 | #include <linux/workqueue.h> | |
27 | #include <linux/jiffies.h> | |
28 | #include <linux/kernel_stat.h> | |
29 | #include <linux/percpu.h> | |
30 | ||
31 | /* | |
32 | * dbs is used in this file as a shortform for demandbased switching | |
33 | * It helps to keep variable names smaller, simpler | |
34 | */ | |
35 | ||
36 | #define DEF_FREQUENCY_UP_THRESHOLD (80) | |
c29f1403 | 37 | #define MIN_FREQUENCY_UP_THRESHOLD (11) |
1da177e4 LT |
38 | #define MAX_FREQUENCY_UP_THRESHOLD (100) |
39 | ||
1da177e4 LT |
40 | /* |
41 | * The polling frequency of this governor depends on the capability of | |
42 | * the processor. Default polling frequency is 1000 times the transition | |
43 | * latency of the processor. The governor will work on any processor with | |
44 | * transition latency <= 10mS, using appropriate sampling | |
45 | * rate. | |
46 | * For CPUs with transition latency > 10mS (mostly drivers with CPUFREQ_ETERNAL) | |
47 | * this governor will not work. | |
48 | * All times here are in uS. | |
49 | */ | |
50 | static unsigned int def_sampling_rate; | |
df8b59be DJ |
51 | #define MIN_SAMPLING_RATE_RATIO (2) |
52 | /* for correct statistics, we need at least 10 ticks between each measure */ | |
53 | #define MIN_STAT_SAMPLING_RATE (MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10)) | |
54 | #define MIN_SAMPLING_RATE (def_sampling_rate / MIN_SAMPLING_RATE_RATIO) | |
1da177e4 LT |
55 | #define MAX_SAMPLING_RATE (500 * def_sampling_rate) |
56 | #define DEF_SAMPLING_RATE_LATENCY_MULTIPLIER (1000) | |
e131832c DJ |
57 | #define DEF_SAMPLING_DOWN_FACTOR (1) |
58 | #define MAX_SAMPLING_DOWN_FACTOR (10) | |
1da177e4 | 59 | #define TRANSITION_LATENCY_LIMIT (10 * 1000) |
1da177e4 LT |
60 | |
61 | static void do_dbs_timer(void *data); | |
62 | ||
63 | struct cpu_dbs_info_s { | |
64 | struct cpufreq_policy *cur_policy; | |
65 | unsigned int prev_cpu_idle_up; | |
66 | unsigned int prev_cpu_idle_down; | |
67 | unsigned int enable; | |
68 | }; | |
69 | static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info); | |
70 | ||
71 | static unsigned int dbs_enable; /* number of CPUs using this policy */ | |
72 | ||
73 | static DECLARE_MUTEX (dbs_sem); | |
74 | static DECLARE_WORK (dbs_work, do_dbs_timer, NULL); | |
75 | ||
76 | struct dbs_tuners { | |
77 | unsigned int sampling_rate; | |
78 | unsigned int sampling_down_factor; | |
79 | unsigned int up_threshold; | |
3d5ee9e5 | 80 | unsigned int ignore_nice; |
1da177e4 LT |
81 | }; |
82 | ||
83 | static struct dbs_tuners dbs_tuners_ins = { | |
84 | .up_threshold = DEF_FREQUENCY_UP_THRESHOLD, | |
1da177e4 LT |
85 | .sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR, |
86 | }; | |
87 | ||
dac1c1a5 DJ |
88 | static inline unsigned int get_cpu_idle_time(unsigned int cpu) |
89 | { | |
90 | return kstat_cpu(cpu).cpustat.idle + | |
91 | kstat_cpu(cpu).cpustat.iowait + | |
001893cd | 92 | ( dbs_tuners_ins.ignore_nice ? |
dac1c1a5 DJ |
93 | kstat_cpu(cpu).cpustat.nice : |
94 | 0); | |
95 | } | |
96 | ||
1da177e4 LT |
97 | /************************** sysfs interface ************************/ |
98 | static ssize_t show_sampling_rate_max(struct cpufreq_policy *policy, char *buf) | |
99 | { | |
100 | return sprintf (buf, "%u\n", MAX_SAMPLING_RATE); | |
101 | } | |
102 | ||
103 | static ssize_t show_sampling_rate_min(struct cpufreq_policy *policy, char *buf) | |
104 | { | |
105 | return sprintf (buf, "%u\n", MIN_SAMPLING_RATE); | |
106 | } | |
107 | ||
108 | #define define_one_ro(_name) \ | |
109 | static struct freq_attr _name = \ | |
110 | __ATTR(_name, 0444, show_##_name, NULL) | |
111 | ||
112 | define_one_ro(sampling_rate_max); | |
113 | define_one_ro(sampling_rate_min); | |
114 | ||
115 | /* cpufreq_ondemand Governor Tunables */ | |
116 | #define show_one(file_name, object) \ | |
117 | static ssize_t show_##file_name \ | |
118 | (struct cpufreq_policy *unused, char *buf) \ | |
119 | { \ | |
120 | return sprintf(buf, "%u\n", dbs_tuners_ins.object); \ | |
121 | } | |
122 | show_one(sampling_rate, sampling_rate); | |
123 | show_one(sampling_down_factor, sampling_down_factor); | |
124 | show_one(up_threshold, up_threshold); | |
001893cd | 125 | show_one(ignore_nice_load, ignore_nice); |
1da177e4 LT |
126 | |
127 | static ssize_t store_sampling_down_factor(struct cpufreq_policy *unused, | |
128 | const char *buf, size_t count) | |
129 | { | |
130 | unsigned int input; | |
131 | int ret; | |
132 | ret = sscanf (buf, "%u", &input); | |
133 | if (ret != 1 ) | |
134 | return -EINVAL; | |
135 | ||
e131832c DJ |
136 | if (input > MAX_SAMPLING_DOWN_FACTOR || input < 1) |
137 | return -EINVAL; | |
138 | ||
1da177e4 LT |
139 | down(&dbs_sem); |
140 | dbs_tuners_ins.sampling_down_factor = input; | |
141 | up(&dbs_sem); | |
142 | ||
143 | return count; | |
144 | } | |
145 | ||
146 | static ssize_t store_sampling_rate(struct cpufreq_policy *unused, | |
147 | const char *buf, size_t count) | |
148 | { | |
149 | unsigned int input; | |
150 | int ret; | |
151 | ret = sscanf (buf, "%u", &input); | |
152 | ||
153 | down(&dbs_sem); | |
154 | if (ret != 1 || input > MAX_SAMPLING_RATE || input < MIN_SAMPLING_RATE) { | |
155 | up(&dbs_sem); | |
156 | return -EINVAL; | |
157 | } | |
158 | ||
159 | dbs_tuners_ins.sampling_rate = input; | |
160 | up(&dbs_sem); | |
161 | ||
162 | return count; | |
163 | } | |
164 | ||
165 | static ssize_t store_up_threshold(struct cpufreq_policy *unused, | |
166 | const char *buf, size_t count) | |
167 | { | |
168 | unsigned int input; | |
169 | int ret; | |
170 | ret = sscanf (buf, "%u", &input); | |
171 | ||
172 | down(&dbs_sem); | |
173 | if (ret != 1 || input > MAX_FREQUENCY_UP_THRESHOLD || | |
c29f1403 | 174 | input < MIN_FREQUENCY_UP_THRESHOLD) { |
1da177e4 LT |
175 | up(&dbs_sem); |
176 | return -EINVAL; | |
177 | } | |
178 | ||
179 | dbs_tuners_ins.up_threshold = input; | |
180 | up(&dbs_sem); | |
181 | ||
182 | return count; | |
183 | } | |
184 | ||
001893cd | 185 | static ssize_t store_ignore_nice_load(struct cpufreq_policy *policy, |
3d5ee9e5 DJ |
186 | const char *buf, size_t count) |
187 | { | |
188 | unsigned int input; | |
189 | int ret; | |
190 | ||
191 | unsigned int j; | |
192 | ||
193 | ret = sscanf (buf, "%u", &input); | |
194 | if ( ret != 1 ) | |
195 | return -EINVAL; | |
196 | ||
197 | if ( input > 1 ) | |
198 | input = 1; | |
199 | ||
200 | down(&dbs_sem); | |
201 | if ( input == dbs_tuners_ins.ignore_nice ) { /* nothing to do */ | |
202 | up(&dbs_sem); | |
203 | return count; | |
204 | } | |
205 | dbs_tuners_ins.ignore_nice = input; | |
206 | ||
207 | /* we need to re-evaluate prev_cpu_idle_up and prev_cpu_idle_down */ | |
dac1c1a5 | 208 | for_each_online_cpu(j) { |
3d5ee9e5 DJ |
209 | struct cpu_dbs_info_s *j_dbs_info; |
210 | j_dbs_info = &per_cpu(cpu_dbs_info, j); | |
dac1c1a5 | 211 | j_dbs_info->prev_cpu_idle_up = get_cpu_idle_time(j); |
3d5ee9e5 DJ |
212 | j_dbs_info->prev_cpu_idle_down = j_dbs_info->prev_cpu_idle_up; |
213 | } | |
214 | up(&dbs_sem); | |
215 | ||
216 | return count; | |
217 | } | |
218 | ||
1da177e4 LT |
219 | #define define_one_rw(_name) \ |
220 | static struct freq_attr _name = \ | |
221 | __ATTR(_name, 0644, show_##_name, store_##_name) | |
222 | ||
223 | define_one_rw(sampling_rate); | |
224 | define_one_rw(sampling_down_factor); | |
225 | define_one_rw(up_threshold); | |
001893cd | 226 | define_one_rw(ignore_nice_load); |
1da177e4 LT |
227 | |
228 | static struct attribute * dbs_attributes[] = { | |
229 | &sampling_rate_max.attr, | |
230 | &sampling_rate_min.attr, | |
231 | &sampling_rate.attr, | |
232 | &sampling_down_factor.attr, | |
233 | &up_threshold.attr, | |
001893cd | 234 | &ignore_nice_load.attr, |
1da177e4 LT |
235 | NULL |
236 | }; | |
237 | ||
238 | static struct attribute_group dbs_attr_group = { | |
239 | .attrs = dbs_attributes, | |
240 | .name = "ondemand", | |
241 | }; | |
242 | ||
243 | /************************** sysfs end ************************/ | |
244 | ||
245 | static void dbs_check_cpu(int cpu) | |
246 | { | |
c29f1403 DJ |
247 | unsigned int idle_ticks, up_idle_ticks, total_ticks; |
248 | unsigned int freq_next; | |
1da177e4 LT |
249 | unsigned int freq_down_sampling_rate; |
250 | static int down_skip[NR_CPUS]; | |
251 | struct cpu_dbs_info_s *this_dbs_info; | |
252 | ||
253 | struct cpufreq_policy *policy; | |
254 | unsigned int j; | |
255 | ||
256 | this_dbs_info = &per_cpu(cpu_dbs_info, cpu); | |
257 | if (!this_dbs_info->enable) | |
258 | return; | |
259 | ||
260 | policy = this_dbs_info->cur_policy; | |
261 | /* | |
c29f1403 DJ |
262 | * Every sampling_rate, we check, if current idle time is less |
263 | * than 20% (default), then we try to increase frequency | |
264 | * Every sampling_rate*sampling_down_factor, we look for a the lowest | |
265 | * frequency which can sustain the load while keeping idle time over | |
266 | * 30%. If such a frequency exist, we try to decrease to this frequency. | |
1da177e4 LT |
267 | * |
268 | * Any frequency increase takes it to the maximum frequency. | |
269 | * Frequency reduction happens at minimum steps of | |
c29f1403 | 270 | * 5% (default) of current frequency |
1da177e4 LT |
271 | */ |
272 | ||
273 | /* Check for frequency increase */ | |
9c7d269b | 274 | idle_ticks = UINT_MAX; |
1da177e4 | 275 | for_each_cpu_mask(j, policy->cpus) { |
9c7d269b | 276 | unsigned int tmp_idle_ticks, total_idle_ticks; |
1da177e4 LT |
277 | struct cpu_dbs_info_s *j_dbs_info; |
278 | ||
1da177e4 | 279 | j_dbs_info = &per_cpu(cpu_dbs_info, j); |
dac1c1a5 | 280 | total_idle_ticks = get_cpu_idle_time(j); |
1da177e4 LT |
281 | tmp_idle_ticks = total_idle_ticks - |
282 | j_dbs_info->prev_cpu_idle_up; | |
283 | j_dbs_info->prev_cpu_idle_up = total_idle_ticks; | |
284 | ||
285 | if (tmp_idle_ticks < idle_ticks) | |
286 | idle_ticks = tmp_idle_ticks; | |
287 | } | |
288 | ||
289 | /* Scale idle ticks by 100 and compare with up and down ticks */ | |
290 | idle_ticks *= 100; | |
291 | up_idle_ticks = (100 - dbs_tuners_ins.up_threshold) * | |
6fe71165 | 292 | usecs_to_jiffies(dbs_tuners_ins.sampling_rate); |
1da177e4 LT |
293 | |
294 | if (idle_ticks < up_idle_ticks) { | |
dac1c1a5 | 295 | down_skip[cpu] = 0; |
790d76fa DJ |
296 | for_each_cpu_mask(j, policy->cpus) { |
297 | struct cpu_dbs_info_s *j_dbs_info; | |
298 | ||
299 | j_dbs_info = &per_cpu(cpu_dbs_info, j); | |
300 | j_dbs_info->prev_cpu_idle_down = | |
301 | j_dbs_info->prev_cpu_idle_up; | |
302 | } | |
c11420a6 DJ |
303 | /* if we are already at full speed then break out early */ |
304 | if (policy->cur == policy->max) | |
305 | return; | |
306 | ||
1da177e4 LT |
307 | __cpufreq_driver_target(policy, policy->max, |
308 | CPUFREQ_RELATION_H); | |
1da177e4 LT |
309 | return; |
310 | } | |
311 | ||
312 | /* Check for frequency decrease */ | |
313 | down_skip[cpu]++; | |
314 | if (down_skip[cpu] < dbs_tuners_ins.sampling_down_factor) | |
315 | return; | |
316 | ||
9c7d269b | 317 | idle_ticks = UINT_MAX; |
1da177e4 | 318 | for_each_cpu_mask(j, policy->cpus) { |
9c7d269b | 319 | unsigned int tmp_idle_ticks, total_idle_ticks; |
1da177e4 LT |
320 | struct cpu_dbs_info_s *j_dbs_info; |
321 | ||
1da177e4 | 322 | j_dbs_info = &per_cpu(cpu_dbs_info, j); |
dac1c1a5 DJ |
323 | /* Check for frequency decrease */ |
324 | total_idle_ticks = j_dbs_info->prev_cpu_idle_up; | |
1da177e4 LT |
325 | tmp_idle_ticks = total_idle_ticks - |
326 | j_dbs_info->prev_cpu_idle_down; | |
327 | j_dbs_info->prev_cpu_idle_down = total_idle_ticks; | |
328 | ||
329 | if (tmp_idle_ticks < idle_ticks) | |
330 | idle_ticks = tmp_idle_ticks; | |
331 | } | |
332 | ||
1da177e4 | 333 | down_skip[cpu] = 0; |
c29f1403 DJ |
334 | /* if we cannot reduce the frequency anymore, break out early */ |
335 | if (policy->cur == policy->min) | |
336 | return; | |
1da177e4 | 337 | |
c29f1403 | 338 | /* Compute how many ticks there are between two measurements */ |
1da177e4 LT |
339 | freq_down_sampling_rate = dbs_tuners_ins.sampling_rate * |
340 | dbs_tuners_ins.sampling_down_factor; | |
c29f1403 | 341 | total_ticks = usecs_to_jiffies(freq_down_sampling_rate); |
1206aaac | 342 | |
c29f1403 DJ |
343 | /* |
344 | * The optimal frequency is the frequency that is the lowest that | |
345 | * can support the current CPU usage without triggering the up | |
346 | * policy. To be safe, we focus 10 points under the threshold. | |
347 | */ | |
348 | freq_next = ((total_ticks - idle_ticks) * 100) / total_ticks; | |
349 | freq_next = (freq_next * policy->cur) / | |
350 | (dbs_tuners_ins.up_threshold - 10); | |
1da177e4 | 351 | |
c29f1403 DJ |
352 | if (freq_next <= ((policy->cur * 95) / 100)) |
353 | __cpufreq_driver_target(policy, freq_next, CPUFREQ_RELATION_L); | |
1da177e4 LT |
354 | } |
355 | ||
356 | static void do_dbs_timer(void *data) | |
357 | { | |
358 | int i; | |
359 | down(&dbs_sem); | |
6fe71165 DJ |
360 | for_each_online_cpu(i) |
361 | dbs_check_cpu(i); | |
1da177e4 | 362 | schedule_delayed_work(&dbs_work, |
6fe71165 | 363 | usecs_to_jiffies(dbs_tuners_ins.sampling_rate)); |
1da177e4 LT |
364 | up(&dbs_sem); |
365 | } | |
366 | ||
367 | static inline void dbs_timer_init(void) | |
368 | { | |
369 | INIT_WORK(&dbs_work, do_dbs_timer, NULL); | |
370 | schedule_delayed_work(&dbs_work, | |
6fe71165 | 371 | usecs_to_jiffies(dbs_tuners_ins.sampling_rate)); |
1da177e4 LT |
372 | return; |
373 | } | |
374 | ||
375 | static inline void dbs_timer_exit(void) | |
376 | { | |
377 | cancel_delayed_work(&dbs_work); | |
378 | return; | |
379 | } | |
380 | ||
381 | static int cpufreq_governor_dbs(struct cpufreq_policy *policy, | |
382 | unsigned int event) | |
383 | { | |
384 | unsigned int cpu = policy->cpu; | |
385 | struct cpu_dbs_info_s *this_dbs_info; | |
386 | unsigned int j; | |
387 | ||
388 | this_dbs_info = &per_cpu(cpu_dbs_info, cpu); | |
389 | ||
390 | switch (event) { | |
391 | case CPUFREQ_GOV_START: | |
392 | if ((!cpu_online(cpu)) || | |
393 | (!policy->cur)) | |
394 | return -EINVAL; | |
395 | ||
396 | if (policy->cpuinfo.transition_latency > | |
397 | (TRANSITION_LATENCY_LIMIT * 1000)) | |
398 | return -EINVAL; | |
399 | if (this_dbs_info->enable) /* Already enabled */ | |
400 | break; | |
401 | ||
402 | down(&dbs_sem); | |
403 | for_each_cpu_mask(j, policy->cpus) { | |
404 | struct cpu_dbs_info_s *j_dbs_info; | |
405 | j_dbs_info = &per_cpu(cpu_dbs_info, j); | |
406 | j_dbs_info->cur_policy = policy; | |
407 | ||
dac1c1a5 | 408 | j_dbs_info->prev_cpu_idle_up = get_cpu_idle_time(j); |
3d5ee9e5 DJ |
409 | j_dbs_info->prev_cpu_idle_down |
410 | = j_dbs_info->prev_cpu_idle_up; | |
1da177e4 LT |
411 | } |
412 | this_dbs_info->enable = 1; | |
413 | sysfs_create_group(&policy->kobj, &dbs_attr_group); | |
414 | dbs_enable++; | |
415 | /* | |
416 | * Start the timerschedule work, when this governor | |
417 | * is used for first time | |
418 | */ | |
419 | if (dbs_enable == 1) { | |
420 | unsigned int latency; | |
421 | /* policy latency is in nS. Convert it to uS first */ | |
df8b59be DJ |
422 | latency = policy->cpuinfo.transition_latency / 1000; |
423 | if (latency == 0) | |
424 | latency = 1; | |
1da177e4 | 425 | |
df8b59be | 426 | def_sampling_rate = latency * |
1da177e4 | 427 | DEF_SAMPLING_RATE_LATENCY_MULTIPLIER; |
df8b59be DJ |
428 | |
429 | if (def_sampling_rate < MIN_STAT_SAMPLING_RATE) | |
430 | def_sampling_rate = MIN_STAT_SAMPLING_RATE; | |
431 | ||
1da177e4 | 432 | dbs_tuners_ins.sampling_rate = def_sampling_rate; |
3d5ee9e5 | 433 | dbs_tuners_ins.ignore_nice = 0; |
1da177e4 LT |
434 | |
435 | dbs_timer_init(); | |
436 | } | |
437 | ||
438 | up(&dbs_sem); | |
439 | break; | |
440 | ||
441 | case CPUFREQ_GOV_STOP: | |
442 | down(&dbs_sem); | |
443 | this_dbs_info->enable = 0; | |
444 | sysfs_remove_group(&policy->kobj, &dbs_attr_group); | |
445 | dbs_enable--; | |
446 | /* | |
447 | * Stop the timerschedule work, when this governor | |
448 | * is used for first time | |
449 | */ | |
450 | if (dbs_enable == 0) | |
451 | dbs_timer_exit(); | |
452 | ||
453 | up(&dbs_sem); | |
454 | ||
455 | break; | |
456 | ||
457 | case CPUFREQ_GOV_LIMITS: | |
458 | down(&dbs_sem); | |
459 | if (policy->max < this_dbs_info->cur_policy->cur) | |
460 | __cpufreq_driver_target( | |
461 | this_dbs_info->cur_policy, | |
462 | policy->max, CPUFREQ_RELATION_H); | |
463 | else if (policy->min > this_dbs_info->cur_policy->cur) | |
464 | __cpufreq_driver_target( | |
465 | this_dbs_info->cur_policy, | |
466 | policy->min, CPUFREQ_RELATION_L); | |
467 | up(&dbs_sem); | |
468 | break; | |
469 | } | |
470 | return 0; | |
471 | } | |
472 | ||
7f335d4e | 473 | static struct cpufreq_governor cpufreq_gov_dbs = { |
1da177e4 LT |
474 | .name = "ondemand", |
475 | .governor = cpufreq_governor_dbs, | |
476 | .owner = THIS_MODULE, | |
477 | }; | |
1da177e4 LT |
478 | |
479 | static int __init cpufreq_gov_dbs_init(void) | |
480 | { | |
481 | return cpufreq_register_governor(&cpufreq_gov_dbs); | |
482 | } | |
483 | ||
484 | static void __exit cpufreq_gov_dbs_exit(void) | |
485 | { | |
486 | /* Make sure that the scheduled work is indeed not running */ | |
487 | flush_scheduled_work(); | |
488 | ||
489 | cpufreq_unregister_governor(&cpufreq_gov_dbs); | |
490 | } | |
491 | ||
492 | ||
493 | MODULE_AUTHOR ("Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>"); | |
494 | MODULE_DESCRIPTION ("'cpufreq_ondemand' - A dynamic cpufreq governor for " | |
495 | "Low Latency Frequency Transition capable processors"); | |
496 | MODULE_LICENSE ("GPL"); | |
497 | ||
498 | module_init(cpufreq_gov_dbs_init); | |
499 | module_exit(cpufreq_gov_dbs_exit); |