Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * drivers/cpufreq/cpufreq_ondemand.c | |
3 | * | |
4 | * Copyright (C) 2001 Russell King | |
5 | * (C) 2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>. | |
6 | * Jun Nakajima <jun.nakajima@intel.com> | |
7 | * | |
8 | * This program is free software; you can redistribute it and/or modify | |
9 | * it under the terms of the GNU General Public License version 2 as | |
10 | * published by the Free Software Foundation. | |
11 | */ | |
12 | ||
13 | #include <linux/kernel.h> | |
14 | #include <linux/module.h> | |
1da177e4 | 15 | #include <linux/init.h> |
1da177e4 | 16 | #include <linux/cpufreq.h> |
138a0128 | 17 | #include <linux/cpu.h> |
1da177e4 LT |
18 | #include <linux/jiffies.h> |
19 | #include <linux/kernel_stat.h> | |
3fc54d37 | 20 | #include <linux/mutex.h> |
1da177e4 LT |
21 | |
22 | /* | |
23 | * dbs is used in this file as a shortform for demandbased switching | |
24 | * It helps to keep variable names smaller, simpler | |
25 | */ | |
26 | ||
27 | #define DEF_FREQUENCY_UP_THRESHOLD (80) | |
c29f1403 | 28 | #define MIN_FREQUENCY_UP_THRESHOLD (11) |
1da177e4 LT |
29 | #define MAX_FREQUENCY_UP_THRESHOLD (100) |
30 | ||
32ee8c3e DJ |
31 | /* |
32 | * The polling frequency of this governor depends on the capability of | |
1da177e4 | 33 | * the processor. Default polling frequency is 1000 times the transition |
32ee8c3e DJ |
34 | * latency of the processor. The governor will work on any processor with |
35 | * transition latency <= 10mS, using appropriate sampling | |
1da177e4 LT |
36 | * rate. |
37 | * For CPUs with transition latency > 10mS (mostly drivers with CPUFREQ_ETERNAL) | |
38 | * this governor will not work. | |
39 | * All times here are in uS. | |
40 | */ | |
32ee8c3e | 41 | static unsigned int def_sampling_rate; |
df8b59be DJ |
42 | #define MIN_SAMPLING_RATE_RATIO (2) |
43 | /* for correct statistics, we need at least 10 ticks between each measure */ | |
44 | #define MIN_STAT_SAMPLING_RATE (MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10)) | |
45 | #define MIN_SAMPLING_RATE (def_sampling_rate / MIN_SAMPLING_RATE_RATIO) | |
1da177e4 LT |
46 | #define MAX_SAMPLING_RATE (500 * def_sampling_rate) |
47 | #define DEF_SAMPLING_RATE_LATENCY_MULTIPLIER (1000) | |
1da177e4 | 48 | #define TRANSITION_LATENCY_LIMIT (10 * 1000) |
1da177e4 LT |
49 | |
50 | static void do_dbs_timer(void *data); | |
51 | ||
52 | struct cpu_dbs_info_s { | |
ccb2fe20 VP |
53 | cputime64_t prev_cpu_idle; |
54 | cputime64_t prev_cpu_wall; | |
32ee8c3e | 55 | struct cpufreq_policy *cur_policy; |
2f8a835c | 56 | struct work_struct work; |
32ee8c3e | 57 | unsigned int enable; |
1da177e4 LT |
58 | }; |
59 | static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info); | |
60 | ||
61 | static unsigned int dbs_enable; /* number of CPUs using this policy */ | |
62 | ||
4ec223d0 VP |
63 | /* |
64 | * DEADLOCK ALERT! There is a ordering requirement between cpu_hotplug | |
65 | * lock and dbs_mutex. cpu_hotplug lock should always be held before | |
66 | * dbs_mutex. If any function that can potentially take cpu_hotplug lock | |
67 | * (like __cpufreq_driver_target()) is being called with dbs_mutex taken, then | |
68 | * cpu_hotplug lock should be taken before that. Note that cpu_hotplug lock | |
69 | * is recursive for the same process. -Venki | |
70 | */ | |
ffac80e9 | 71 | static DEFINE_MUTEX(dbs_mutex); |
1da177e4 | 72 | |
2f8a835c | 73 | static struct workqueue_struct *kondemand_wq; |
6810b548 | 74 | |
1da177e4 | 75 | struct dbs_tuners { |
32ee8c3e | 76 | unsigned int sampling_rate; |
32ee8c3e DJ |
77 | unsigned int up_threshold; |
78 | unsigned int ignore_nice; | |
1da177e4 LT |
79 | }; |
80 | ||
81 | static struct dbs_tuners dbs_tuners_ins = { | |
32ee8c3e | 82 | .up_threshold = DEF_FREQUENCY_UP_THRESHOLD, |
9cbad61b | 83 | .ignore_nice = 0, |
1da177e4 LT |
84 | }; |
85 | ||
ccb2fe20 | 86 | static inline cputime64_t get_cpu_idle_time(unsigned int cpu) |
dac1c1a5 | 87 | { |
ccb2fe20 VP |
88 | cputime64_t retval; |
89 | ||
90 | retval = cputime64_add(kstat_cpu(cpu).cpustat.idle, | |
91 | kstat_cpu(cpu).cpustat.iowait); | |
92 | ||
93 | if (dbs_tuners_ins.ignore_nice) | |
94 | retval = cputime64_add(retval, kstat_cpu(cpu).cpustat.nice); | |
95 | ||
96 | return retval; | |
dac1c1a5 DJ |
97 | } |
98 | ||
1da177e4 LT |
99 | /************************** sysfs interface ************************/ |
100 | static ssize_t show_sampling_rate_max(struct cpufreq_policy *policy, char *buf) | |
101 | { | |
102 | return sprintf (buf, "%u\n", MAX_SAMPLING_RATE); | |
103 | } | |
104 | ||
105 | static ssize_t show_sampling_rate_min(struct cpufreq_policy *policy, char *buf) | |
106 | { | |
107 | return sprintf (buf, "%u\n", MIN_SAMPLING_RATE); | |
108 | } | |
109 | ||
32ee8c3e DJ |
110 | #define define_one_ro(_name) \ |
111 | static struct freq_attr _name = \ | |
1da177e4 LT |
112 | __ATTR(_name, 0444, show_##_name, NULL) |
113 | ||
114 | define_one_ro(sampling_rate_max); | |
115 | define_one_ro(sampling_rate_min); | |
116 | ||
117 | /* cpufreq_ondemand Governor Tunables */ | |
118 | #define show_one(file_name, object) \ | |
119 | static ssize_t show_##file_name \ | |
120 | (struct cpufreq_policy *unused, char *buf) \ | |
121 | { \ | |
122 | return sprintf(buf, "%u\n", dbs_tuners_ins.object); \ | |
123 | } | |
124 | show_one(sampling_rate, sampling_rate); | |
1da177e4 | 125 | show_one(up_threshold, up_threshold); |
001893cd | 126 | show_one(ignore_nice_load, ignore_nice); |
1da177e4 | 127 | |
32ee8c3e | 128 | static ssize_t store_sampling_rate(struct cpufreq_policy *unused, |
1da177e4 LT |
129 | const char *buf, size_t count) |
130 | { | |
131 | unsigned int input; | |
132 | int ret; | |
ffac80e9 | 133 | ret = sscanf(buf, "%u", &input); |
1da177e4 | 134 | |
3fc54d37 | 135 | mutex_lock(&dbs_mutex); |
1da177e4 | 136 | if (ret != 1 || input > MAX_SAMPLING_RATE || input < MIN_SAMPLING_RATE) { |
3fc54d37 | 137 | mutex_unlock(&dbs_mutex); |
1da177e4 LT |
138 | return -EINVAL; |
139 | } | |
140 | ||
141 | dbs_tuners_ins.sampling_rate = input; | |
3fc54d37 | 142 | mutex_unlock(&dbs_mutex); |
1da177e4 LT |
143 | |
144 | return count; | |
145 | } | |
146 | ||
32ee8c3e | 147 | static ssize_t store_up_threshold(struct cpufreq_policy *unused, |
1da177e4 LT |
148 | const char *buf, size_t count) |
149 | { | |
150 | unsigned int input; | |
151 | int ret; | |
ffac80e9 | 152 | ret = sscanf(buf, "%u", &input); |
1da177e4 | 153 | |
3fc54d37 | 154 | mutex_lock(&dbs_mutex); |
32ee8c3e | 155 | if (ret != 1 || input > MAX_FREQUENCY_UP_THRESHOLD || |
c29f1403 | 156 | input < MIN_FREQUENCY_UP_THRESHOLD) { |
3fc54d37 | 157 | mutex_unlock(&dbs_mutex); |
1da177e4 LT |
158 | return -EINVAL; |
159 | } | |
160 | ||
161 | dbs_tuners_ins.up_threshold = input; | |
3fc54d37 | 162 | mutex_unlock(&dbs_mutex); |
1da177e4 LT |
163 | |
164 | return count; | |
165 | } | |
166 | ||
001893cd | 167 | static ssize_t store_ignore_nice_load(struct cpufreq_policy *policy, |
3d5ee9e5 DJ |
168 | const char *buf, size_t count) |
169 | { | |
170 | unsigned int input; | |
171 | int ret; | |
172 | ||
173 | unsigned int j; | |
32ee8c3e | 174 | |
ffac80e9 | 175 | ret = sscanf(buf, "%u", &input); |
3d5ee9e5 DJ |
176 | if ( ret != 1 ) |
177 | return -EINVAL; | |
178 | ||
179 | if ( input > 1 ) | |
180 | input = 1; | |
32ee8c3e | 181 | |
3fc54d37 | 182 | mutex_lock(&dbs_mutex); |
3d5ee9e5 | 183 | if ( input == dbs_tuners_ins.ignore_nice ) { /* nothing to do */ |
3fc54d37 | 184 | mutex_unlock(&dbs_mutex); |
3d5ee9e5 DJ |
185 | return count; |
186 | } | |
187 | dbs_tuners_ins.ignore_nice = input; | |
188 | ||
ccb2fe20 | 189 | /* we need to re-evaluate prev_cpu_idle */ |
dac1c1a5 | 190 | for_each_online_cpu(j) { |
ccb2fe20 VP |
191 | struct cpu_dbs_info_s *dbs_info; |
192 | dbs_info = &per_cpu(cpu_dbs_info, j); | |
193 | dbs_info->prev_cpu_idle = get_cpu_idle_time(j); | |
194 | dbs_info->prev_cpu_wall = get_jiffies_64(); | |
3d5ee9e5 | 195 | } |
3fc54d37 | 196 | mutex_unlock(&dbs_mutex); |
3d5ee9e5 DJ |
197 | |
198 | return count; | |
199 | } | |
200 | ||
1da177e4 LT |
201 | #define define_one_rw(_name) \ |
202 | static struct freq_attr _name = \ | |
203 | __ATTR(_name, 0644, show_##_name, store_##_name) | |
204 | ||
205 | define_one_rw(sampling_rate); | |
1da177e4 | 206 | define_one_rw(up_threshold); |
001893cd | 207 | define_one_rw(ignore_nice_load); |
1da177e4 LT |
208 | |
209 | static struct attribute * dbs_attributes[] = { | |
210 | &sampling_rate_max.attr, | |
211 | &sampling_rate_min.attr, | |
212 | &sampling_rate.attr, | |
1da177e4 | 213 | &up_threshold.attr, |
001893cd | 214 | &ignore_nice_load.attr, |
1da177e4 LT |
215 | NULL |
216 | }; | |
217 | ||
218 | static struct attribute_group dbs_attr_group = { | |
219 | .attrs = dbs_attributes, | |
220 | .name = "ondemand", | |
221 | }; | |
222 | ||
223 | /************************** sysfs end ************************/ | |
224 | ||
2f8a835c | 225 | static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) |
1da177e4 | 226 | { |
ccb2fe20 VP |
227 | unsigned int idle_ticks, total_ticks; |
228 | unsigned int load; | |
ccb2fe20 | 229 | cputime64_t cur_jiffies; |
1da177e4 LT |
230 | |
231 | struct cpufreq_policy *policy; | |
232 | unsigned int j; | |
233 | ||
1da177e4 LT |
234 | if (!this_dbs_info->enable) |
235 | return; | |
236 | ||
237 | policy = this_dbs_info->cur_policy; | |
ccb2fe20 VP |
238 | cur_jiffies = jiffies64_to_cputime64(get_jiffies_64()); |
239 | total_ticks = (unsigned int) cputime64_sub(cur_jiffies, | |
240 | this_dbs_info->prev_cpu_wall); | |
241 | this_dbs_info->prev_cpu_wall = cur_jiffies; | |
32ee8c3e | 242 | /* |
c29f1403 DJ |
243 | * Every sampling_rate, we check, if current idle time is less |
244 | * than 20% (default), then we try to increase frequency | |
ccb2fe20 | 245 | * Every sampling_rate, we look for a the lowest |
c29f1403 DJ |
246 | * frequency which can sustain the load while keeping idle time over |
247 | * 30%. If such a frequency exist, we try to decrease to this frequency. | |
1da177e4 | 248 | * |
32ee8c3e DJ |
249 | * Any frequency increase takes it to the maximum frequency. |
250 | * Frequency reduction happens at minimum steps of | |
251 | * 5% (default) of current frequency | |
1da177e4 LT |
252 | */ |
253 | ||
ccb2fe20 | 254 | /* Get Idle Time */ |
9c7d269b | 255 | idle_ticks = UINT_MAX; |
1da177e4 | 256 | for_each_cpu_mask(j, policy->cpus) { |
ccb2fe20 VP |
257 | cputime64_t total_idle_ticks; |
258 | unsigned int tmp_idle_ticks; | |
1da177e4 LT |
259 | struct cpu_dbs_info_s *j_dbs_info; |
260 | ||
1da177e4 | 261 | j_dbs_info = &per_cpu(cpu_dbs_info, j); |
dac1c1a5 | 262 | total_idle_ticks = get_cpu_idle_time(j); |
ccb2fe20 VP |
263 | tmp_idle_ticks = (unsigned int) cputime64_sub(total_idle_ticks, |
264 | j_dbs_info->prev_cpu_idle); | |
265 | j_dbs_info->prev_cpu_idle = total_idle_ticks; | |
1da177e4 LT |
266 | |
267 | if (tmp_idle_ticks < idle_ticks) | |
268 | idle_ticks = tmp_idle_ticks; | |
269 | } | |
ccb2fe20 | 270 | load = (100 * (total_ticks - idle_ticks)) / total_ticks; |
1da177e4 | 271 | |
ccb2fe20 VP |
272 | /* Check for frequency increase */ |
273 | if (load > dbs_tuners_ins.up_threshold) { | |
c11420a6 DJ |
274 | /* if we are already at full speed then break out early */ |
275 | if (policy->cur == policy->max) | |
276 | return; | |
32ee8c3e DJ |
277 | |
278 | __cpufreq_driver_target(policy, policy->max, | |
1da177e4 | 279 | CPUFREQ_RELATION_H); |
1da177e4 LT |
280 | return; |
281 | } | |
282 | ||
283 | /* Check for frequency decrease */ | |
c29f1403 DJ |
284 | /* if we cannot reduce the frequency anymore, break out early */ |
285 | if (policy->cur == policy->min) | |
286 | return; | |
1da177e4 | 287 | |
c29f1403 DJ |
288 | /* |
289 | * The optimal frequency is the frequency that is the lowest that | |
290 | * can support the current CPU usage without triggering the up | |
291 | * policy. To be safe, we focus 10 points under the threshold. | |
292 | */ | |
ccb2fe20 VP |
293 | if (load < (dbs_tuners_ins.up_threshold - 10)) { |
294 | unsigned int freq_next; | |
295 | freq_next = (policy->cur * load) / | |
c29f1403 | 296 | (dbs_tuners_ins.up_threshold - 10); |
1da177e4 | 297 | |
c29f1403 | 298 | __cpufreq_driver_target(policy, freq_next, CPUFREQ_RELATION_L); |
ccb2fe20 | 299 | } |
1da177e4 LT |
300 | } |
301 | ||
302 | static void do_dbs_timer(void *data) | |
32ee8c3e | 303 | { |
2f8a835c VP |
304 | unsigned int cpu = smp_processor_id(); |
305 | struct cpu_dbs_info_s *dbs_info = &per_cpu(cpu_dbs_info, cpu); | |
306 | ||
307 | dbs_check_cpu(dbs_info); | |
308 | queue_delayed_work_on(cpu, kondemand_wq, &dbs_info->work, | |
309 | usecs_to_jiffies(dbs_tuners_ins.sampling_rate)); | |
32ee8c3e | 310 | } |
1da177e4 | 311 | |
2f8a835c | 312 | static inline void dbs_timer_init(unsigned int cpu) |
1da177e4 | 313 | { |
2f8a835c VP |
314 | struct cpu_dbs_info_s *dbs_info = &per_cpu(cpu_dbs_info, cpu); |
315 | ||
316 | INIT_WORK(&dbs_info->work, do_dbs_timer, 0); | |
317 | queue_delayed_work_on(cpu, kondemand_wq, &dbs_info->work, | |
318 | usecs_to_jiffies(dbs_tuners_ins.sampling_rate)); | |
1da177e4 LT |
319 | return; |
320 | } | |
321 | ||
2f8a835c | 322 | static inline void dbs_timer_exit(unsigned int cpu) |
1da177e4 | 323 | { |
2f8a835c VP |
324 | struct cpu_dbs_info_s *dbs_info = &per_cpu(cpu_dbs_info, cpu); |
325 | ||
326 | cancel_rearming_delayed_workqueue(kondemand_wq, &dbs_info->work); | |
1da177e4 LT |
327 | } |
328 | ||
329 | static int cpufreq_governor_dbs(struct cpufreq_policy *policy, | |
330 | unsigned int event) | |
331 | { | |
332 | unsigned int cpu = policy->cpu; | |
333 | struct cpu_dbs_info_s *this_dbs_info; | |
334 | unsigned int j; | |
335 | ||
336 | this_dbs_info = &per_cpu(cpu_dbs_info, cpu); | |
337 | ||
338 | switch (event) { | |
339 | case CPUFREQ_GOV_START: | |
ffac80e9 | 340 | if ((!cpu_online(cpu)) || (!policy->cur)) |
1da177e4 LT |
341 | return -EINVAL; |
342 | ||
343 | if (policy->cpuinfo.transition_latency > | |
ff8c288d EP |
344 | (TRANSITION_LATENCY_LIMIT * 1000)) { |
345 | printk(KERN_WARNING "ondemand governor failed to load " | |
346 | "due to too long transition latency\n"); | |
1da177e4 | 347 | return -EINVAL; |
ff8c288d | 348 | } |
1da177e4 LT |
349 | if (this_dbs_info->enable) /* Already enabled */ |
350 | break; | |
32ee8c3e | 351 | |
3fc54d37 | 352 | mutex_lock(&dbs_mutex); |
2f8a835c VP |
353 | dbs_enable++; |
354 | if (dbs_enable == 1) { | |
355 | kondemand_wq = create_workqueue("kondemand"); | |
356 | if (!kondemand_wq) { | |
357 | printk(KERN_ERR "Creation of kondemand failed\n"); | |
358 | dbs_enable--; | |
359 | mutex_unlock(&dbs_mutex); | |
360 | return -ENOSPC; | |
361 | } | |
362 | } | |
1da177e4 LT |
363 | for_each_cpu_mask(j, policy->cpus) { |
364 | struct cpu_dbs_info_s *j_dbs_info; | |
365 | j_dbs_info = &per_cpu(cpu_dbs_info, j); | |
366 | j_dbs_info->cur_policy = policy; | |
32ee8c3e | 367 | |
ccb2fe20 VP |
368 | j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j); |
369 | j_dbs_info->prev_cpu_wall = get_jiffies_64(); | |
1da177e4 LT |
370 | } |
371 | this_dbs_info->enable = 1; | |
372 | sysfs_create_group(&policy->kobj, &dbs_attr_group); | |
1da177e4 LT |
373 | /* |
374 | * Start the timerschedule work, when this governor | |
375 | * is used for first time | |
376 | */ | |
377 | if (dbs_enable == 1) { | |
378 | unsigned int latency; | |
379 | /* policy latency is in nS. Convert it to uS first */ | |
df8b59be DJ |
380 | latency = policy->cpuinfo.transition_latency / 1000; |
381 | if (latency == 0) | |
382 | latency = 1; | |
1da177e4 | 383 | |
df8b59be | 384 | def_sampling_rate = latency * |
1da177e4 | 385 | DEF_SAMPLING_RATE_LATENCY_MULTIPLIER; |
df8b59be DJ |
386 | |
387 | if (def_sampling_rate < MIN_STAT_SAMPLING_RATE) | |
388 | def_sampling_rate = MIN_STAT_SAMPLING_RATE; | |
389 | ||
1da177e4 | 390 | dbs_tuners_ins.sampling_rate = def_sampling_rate; |
1da177e4 | 391 | } |
2f8a835c | 392 | dbs_timer_init(policy->cpu); |
32ee8c3e | 393 | |
3fc54d37 | 394 | mutex_unlock(&dbs_mutex); |
1da177e4 LT |
395 | break; |
396 | ||
397 | case CPUFREQ_GOV_STOP: | |
3fc54d37 | 398 | mutex_lock(&dbs_mutex); |
2f8a835c | 399 | dbs_timer_exit(policy->cpu); |
1da177e4 LT |
400 | this_dbs_info->enable = 0; |
401 | sysfs_remove_group(&policy->kobj, &dbs_attr_group); | |
402 | dbs_enable--; | |
32ee8c3e | 403 | if (dbs_enable == 0) |
2f8a835c | 404 | destroy_workqueue(kondemand_wq); |
32ee8c3e | 405 | |
3fc54d37 | 406 | mutex_unlock(&dbs_mutex); |
1da177e4 LT |
407 | |
408 | break; | |
409 | ||
410 | case CPUFREQ_GOV_LIMITS: | |
4ec223d0 | 411 | lock_cpu_hotplug(); |
3fc54d37 | 412 | mutex_lock(&dbs_mutex); |
1da177e4 | 413 | if (policy->max < this_dbs_info->cur_policy->cur) |
ffac80e9 VP |
414 | __cpufreq_driver_target(this_dbs_info->cur_policy, |
415 | policy->max, | |
416 | CPUFREQ_RELATION_H); | |
1da177e4 | 417 | else if (policy->min > this_dbs_info->cur_policy->cur) |
ffac80e9 VP |
418 | __cpufreq_driver_target(this_dbs_info->cur_policy, |
419 | policy->min, | |
420 | CPUFREQ_RELATION_L); | |
3fc54d37 | 421 | mutex_unlock(&dbs_mutex); |
4ec223d0 | 422 | unlock_cpu_hotplug(); |
1da177e4 LT |
423 | break; |
424 | } | |
425 | return 0; | |
426 | } | |
427 | ||
7f335d4e | 428 | static struct cpufreq_governor cpufreq_gov_dbs = { |
ffac80e9 VP |
429 | .name = "ondemand", |
430 | .governor = cpufreq_governor_dbs, | |
431 | .owner = THIS_MODULE, | |
1da177e4 | 432 | }; |
1da177e4 LT |
433 | |
434 | static int __init cpufreq_gov_dbs_init(void) | |
435 | { | |
436 | return cpufreq_register_governor(&cpufreq_gov_dbs); | |
437 | } | |
438 | ||
439 | static void __exit cpufreq_gov_dbs_exit(void) | |
440 | { | |
1da177e4 LT |
441 | cpufreq_unregister_governor(&cpufreq_gov_dbs); |
442 | } | |
443 | ||
444 | ||
ffac80e9 VP |
445 | MODULE_AUTHOR("Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>"); |
446 | MODULE_AUTHOR("Alexey Starikovskiy <alexey.y.starikovskiy@intel.com>"); | |
447 | MODULE_DESCRIPTION("'cpufreq_ondemand' - A dynamic cpufreq governor for " | |
448 | "Low Latency Frequency Transition capable processors"); | |
449 | MODULE_LICENSE("GPL"); | |
1da177e4 LT |
450 | |
451 | module_init(cpufreq_gov_dbs_init); | |
452 | module_exit(cpufreq_gov_dbs_exit); |