thermal: cpu_cooling: create structure for idle time stats
[linux-2.6-block.git] / drivers / thermal / cpu_cooling.c
CommitLineData
02361418
ADK
1/*
2 * linux/drivers/thermal/cpu_cooling.c
3 *
4 * Copyright (C) 2012 Samsung Electronics Co., Ltd(http://www.samsung.com)
5 * Copyright (C) 2012 Amit Daniel <amit.kachhap@linaro.org>
6 *
73904cbc
VK
7 * Copyright (C) 2014 Viresh Kumar <viresh.kumar@linaro.org>
8 *
02361418
ADK
9 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; version 2 of the License.
13 *
14 * This program is distributed in the hope that it will be useful, but
15 * WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write to the Free Software Foundation, Inc.,
21 * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
22 *
23 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
24 */
02361418
ADK
25#include <linux/module.h>
26#include <linux/thermal.h>
02361418
ADK
27#include <linux/cpufreq.h>
28#include <linux/err.h>
ae606089 29#include <linux/idr.h>
c36cf071 30#include <linux/pm_opp.h>
02361418
ADK
31#include <linux/slab.h>
32#include <linux/cpu.h>
33#include <linux/cpu_cooling.h>
34
6828a471
JM
35#include <trace/events/thermal.h>
36
07d888d8
VK
37/*
38 * Cooling state <-> CPUFreq frequency
39 *
40 * Cooling states are translated to frequencies throughout this driver and this
41 * is the relation between them.
42 *
43 * Highest cooling state corresponds to lowest possible frequency.
44 *
45 * i.e.
46 * level 0 --> 1st Max Freq
47 * level 1 --> 2nd Max Freq
48 * ...
49 */
50
c36cf071 51/**
349d39dc 52 * struct freq_table - frequency table along with power entries
c36cf071
JM
53 * @frequency: frequency in KHz
54 * @power: power in mW
55 *
56 * This structure is built when the cooling device registers and helps
349d39dc 57 * in translating frequency to power and vice versa.
c36cf071 58 */
349d39dc 59struct freq_table {
c36cf071
JM
60 u32 frequency;
61 u32 power;
62};
63
81ee14da
VK
64/**
65 * struct time_in_idle - Idle time stats
66 * @time: previous reading of the absolute time that this cpu was idle
67 * @timestamp: wall time of the last invocation of get_cpu_idle_time_us()
68 */
69struct time_in_idle {
70 u64 time;
71 u64 timestamp;
72};
73
02361418 74/**
3b3c0748 75 * struct cpufreq_cooling_device - data for cooling device with cpufreq
02361418
ADK
76 * @id: unique integer value corresponding to each cpufreq_cooling_device
77 * registered.
04bdbdf9 78 * @cdev: thermal_cooling_device pointer to keep track of the
3b3c0748 79 * registered cooling device.
b12b6519 80 * @policy: cpufreq policy.
02361418
ADK
81 * @cpufreq_state: integer value representing the current state of cpufreq
82 * cooling devices.
59f0d218 83 * @clipped_freq: integer value representing the absolute value of the clipped
02361418 84 * frequency.
dcc6c7fd
VK
85 * @max_level: maximum cooling level. One less than total number of valid
86 * cpufreq frequencies.
fc4de356 87 * @node: list_head to link all cpufreq_cooling_device together.
0744f130 88 * @last_load: load measured by the latest call to cpufreq_get_requested_power()
81ee14da 89 * @idle_time: idle time stats
02bacb21 90 * @cpu_dev: the cpu_device of policy->cpu.
c36cf071 91 * @plat_get_static_power: callback to calculate the static power
02361418 92 *
beca6053
VK
93 * This structure is required for keeping information of each registered
94 * cpufreq_cooling_device.
02361418
ADK
95 */
96struct cpufreq_cooling_device {
97 int id;
04bdbdf9 98 struct thermal_cooling_device *cdev;
b12b6519 99 struct cpufreq_policy *policy;
02361418 100 unsigned int cpufreq_state;
59f0d218 101 unsigned int clipped_freq;
dcc6c7fd 102 unsigned int max_level;
349d39dc 103 struct freq_table *freq_table; /* In descending order */
2dcd851f 104 struct list_head node;
c36cf071 105 u32 last_load;
81ee14da 106 struct time_in_idle *idle_time;
c36cf071
JM
107 struct device *cpu_dev;
108 get_static_t plat_get_static_power;
02361418 109};
02361418 110
fb8ea308 111static DEFINE_IDA(cpufreq_ida);
02373d7c 112static DEFINE_MUTEX(cooling_list_lock);
1dea432a 113static LIST_HEAD(cpufreq_cdev_list);
02361418 114
02361418
ADK
115/* Below code defines functions to be used for cpufreq as cooling device */
116
117/**
4843c4a1 118 * get_level: Find the level for a particular frequency
1dea432a 119 * @cpufreq_cdev: cpufreq_cdev for which the property is required
4843c4a1 120 * @freq: Frequency
82b9ee40 121 *
4843c4a1 122 * Return: level on success, THERMAL_CSTATE_INVALID on error.
02361418 123 */
1dea432a 124static unsigned long get_level(struct cpufreq_cooling_device *cpufreq_cdev,
4843c4a1 125 unsigned int freq)
02361418 126{
4843c4a1 127 unsigned long level;
a116776f 128
1dea432a 129 for (level = 0; level <= cpufreq_cdev->max_level; level++) {
349d39dc 130 if (freq == cpufreq_cdev->freq_table[level].frequency)
4843c4a1 131 return level;
02361418 132
349d39dc 133 if (freq > cpufreq_cdev->freq_table[level].frequency)
4843c4a1 134 break;
fc35b35c 135 }
02361418 136
4843c4a1 137 return THERMAL_CSTATE_INVALID;
fc35b35c
ZR
138}
139
02361418
ADK
140/**
141 * cpufreq_thermal_notifier - notifier callback for cpufreq policy change.
142 * @nb: struct notifier_block * with callback info.
143 * @event: value showing cpufreq event for which this function invoked.
144 * @data: callback-specific data
bab30554 145 *
9746b6e7 146 * Callback to hijack the notification on cpufreq policy transition.
bab30554
EV
147 * Every time there is a change in policy, we will intercept and
148 * update the cpufreq policy with thermal constraints.
149 *
150 * Return: 0 (success)
02361418
ADK
151 */
152static int cpufreq_thermal_notifier(struct notifier_block *nb,
5fda7f68 153 unsigned long event, void *data)
02361418
ADK
154{
155 struct cpufreq_policy *policy = data;
abcbcc25 156 unsigned long clipped_freq;
1dea432a 157 struct cpufreq_cooling_device *cpufreq_cdev;
02361418 158
a24af233
VK
159 if (event != CPUFREQ_ADJUST)
160 return NOTIFY_DONE;
02361418 161
a24af233 162 mutex_lock(&cooling_list_lock);
1dea432a 163 list_for_each_entry(cpufreq_cdev, &cpufreq_cdev_list, node) {
ba76dd9d
VK
164 /*
165 * A new copy of the policy is sent to the notifier and can't
166 * compare that directly.
167 */
168 if (policy->cpu != cpufreq_cdev->policy->cpu)
a24af233 169 continue;
c36cf071 170
1afb9c53
VK
171 /*
172 * policy->max is the maximum allowed frequency defined by user
173 * and clipped_freq is the maximum that thermal constraints
174 * allow.
175 *
176 * If clipped_freq is lower than policy->max, then we need to
177 * readjust policy->max.
178 *
179 * But, if clipped_freq is greater than policy->max, we don't
180 * need to do anything.
181 */
1dea432a 182 clipped_freq = cpufreq_cdev->clipped_freq;
c36cf071 183
1afb9c53 184 if (policy->max > clipped_freq)
abcbcc25 185 cpufreq_verify_within_limits(policy, 0, clipped_freq);
c36cf071 186 break;
c36cf071 187 }
a24af233 188 mutex_unlock(&cooling_list_lock);
c36cf071
JM
189
190 return NOTIFY_OK;
191}
192
193/**
349d39dc
VK
194 * update_freq_table() - Update the freq table with power numbers
195 * @cpufreq_cdev: the cpufreq cooling device in which to update the table
c36cf071
JM
196 * @capacitance: dynamic power coefficient for these cpus
197 *
349d39dc
VK
198 * Update the freq table with power numbers. This table will be used in
199 * cpu_power_to_freq() and cpu_freq_to_power() to convert between power and
200 * frequency efficiently. Power is stored in mW, frequency in KHz. The
201 * resulting table is in descending order.
c36cf071 202 *
459ac375 203 * Return: 0 on success, -EINVAL if there are no OPPs for any CPUs,
349d39dc 204 * or -ENOMEM if we run out of memory.
c36cf071 205 */
349d39dc
VK
206static int update_freq_table(struct cpufreq_cooling_device *cpufreq_cdev,
207 u32 capacitance)
c36cf071 208{
349d39dc 209 struct freq_table *freq_table = cpufreq_cdev->freq_table;
c36cf071
JM
210 struct dev_pm_opp *opp;
211 struct device *dev = NULL;
349d39dc 212 int num_opps = 0, cpu = cpufreq_cdev->policy->cpu, i;
c36cf071 213
02bacb21
VK
214 dev = get_cpu_device(cpu);
215 if (unlikely(!dev)) {
216 dev_warn(&cpufreq_cdev->cdev->device,
217 "No cpu device for cpu %d\n", cpu);
218 return -ENODEV;
c36cf071 219 }
02361418 220
02bacb21
VK
221 num_opps = dev_pm_opp_get_opp_count(dev);
222 if (num_opps < 0)
223 return num_opps;
224
349d39dc
VK
225 /*
226 * The cpufreq table is also built from the OPP table and so the count
227 * should match.
228 */
229 if (num_opps != cpufreq_cdev->max_level + 1) {
230 dev_warn(dev, "Number of OPPs not matching with max_levels\n");
459ac375 231 return -EINVAL;
349d39dc 232 }
02361418 233
349d39dc
VK
234 for (i = 0; i <= cpufreq_cdev->max_level; i++) {
235 unsigned long freq = freq_table[i].frequency * 1000;
236 u32 freq_mhz = freq_table[i].frequency / 1000;
c36cf071 237 u64 power;
349d39dc 238 u32 voltage_mv;
c36cf071 239
349d39dc
VK
240 /*
241 * Find ceil frequency as 'freq' may be slightly lower than OPP
242 * freq due to truncation while converting to kHz.
243 */
244 opp = dev_pm_opp_find_freq_ceil(dev, &freq);
245 if (IS_ERR(opp)) {
246 dev_err(dev, "failed to get opp for %lu frequency\n",
247 freq);
248 return -EINVAL;
459ac375
JM
249 }
250
c36cf071 251 voltage_mv = dev_pm_opp_get_voltage(opp) / 1000;
8a31d9d9 252 dev_pm_opp_put(opp);
c36cf071
JM
253
254 /*
255 * Do the multiplication with MHz and millivolt so as
256 * to not overflow.
257 */
258 power = (u64)capacitance * freq_mhz * voltage_mv * voltage_mv;
259 do_div(power, 1000000000);
260
c36cf071 261 /* power is stored in mW */
349d39dc 262 freq_table[i].power = power;
eba4f88d 263 }
c36cf071 264
1dea432a 265 cpufreq_cdev->cpu_dev = dev;
c36cf071 266
459ac375 267 return 0;
c36cf071
JM
268}
269
1dea432a 270static u32 cpu_freq_to_power(struct cpufreq_cooling_device *cpufreq_cdev,
c36cf071
JM
271 u32 freq)
272{
273 int i;
349d39dc 274 struct freq_table *freq_table = cpufreq_cdev->freq_table;
c36cf071 275
349d39dc
VK
276 for (i = 1; i <= cpufreq_cdev->max_level; i++)
277 if (freq > freq_table[i].frequency)
c36cf071
JM
278 break;
279
349d39dc 280 return freq_table[i - 1].power;
c36cf071
JM
281}
282
1dea432a 283static u32 cpu_power_to_freq(struct cpufreq_cooling_device *cpufreq_cdev,
c36cf071
JM
284 u32 power)
285{
286 int i;
349d39dc 287 struct freq_table *freq_table = cpufreq_cdev->freq_table;
c36cf071 288
349d39dc
VK
289 for (i = 1; i <= cpufreq_cdev->max_level; i++)
290 if (power > freq_table[i].power)
c36cf071
JM
291 break;
292
349d39dc 293 return freq_table[i - 1].frequency;
c36cf071
JM
294}
295
296/**
297 * get_load() - get load for a cpu since last updated
1dea432a 298 * @cpufreq_cdev: &struct cpufreq_cooling_device for this cpu
c36cf071 299 * @cpu: cpu number
ba76dd9d 300 * @cpu_idx: index of the cpu in time_in_idle*
c36cf071
JM
301 *
302 * Return: The average load of cpu @cpu in percentage since this
303 * function was last called.
304 */
1dea432a 305static u32 get_load(struct cpufreq_cooling_device *cpufreq_cdev, int cpu,
a53b8394 306 int cpu_idx)
c36cf071
JM
307{
308 u32 load;
309 u64 now, now_idle, delta_time, delta_idle;
81ee14da 310 struct time_in_idle *idle_time = &cpufreq_cdev->idle_time[cpu_idx];
c36cf071
JM
311
312 now_idle = get_cpu_idle_time(cpu, &now, 0);
81ee14da
VK
313 delta_idle = now_idle - idle_time->time;
314 delta_time = now - idle_time->timestamp;
c36cf071
JM
315
316 if (delta_time <= delta_idle)
317 load = 0;
318 else
319 load = div64_u64(100 * (delta_time - delta_idle), delta_time);
320
81ee14da
VK
321 idle_time->time = now_idle;
322 idle_time->timestamp = now;
c36cf071
JM
323
324 return load;
325}
326
327/**
328 * get_static_power() - calculate the static power consumed by the cpus
1dea432a 329 * @cpufreq_cdev: struct &cpufreq_cooling_device for this cpu cdev
c36cf071
JM
330 * @tz: thermal zone device in which we're operating
331 * @freq: frequency in KHz
332 * @power: pointer in which to store the calculated static power
333 *
334 * Calculate the static power consumed by the cpus described by
335 * @cpu_actor running at frequency @freq. This function relies on a
336 * platform specific function that should have been provided when the
337 * actor was registered. If it wasn't, the static power is assumed to
338 * be negligible. The calculated static power is stored in @power.
339 *
340 * Return: 0 on success, -E* on failure.
341 */
1dea432a 342static int get_static_power(struct cpufreq_cooling_device *cpufreq_cdev,
c36cf071
JM
343 struct thermal_zone_device *tz, unsigned long freq,
344 u32 *power)
345{
346 struct dev_pm_opp *opp;
347 unsigned long voltage;
ba76dd9d 348 struct cpumask *cpumask = cpufreq_cdev->policy->related_cpus;
c36cf071
JM
349 unsigned long freq_hz = freq * 1000;
350
1dea432a 351 if (!cpufreq_cdev->plat_get_static_power || !cpufreq_cdev->cpu_dev) {
c36cf071
JM
352 *power = 0;
353 return 0;
354 }
355
1dea432a 356 opp = dev_pm_opp_find_freq_exact(cpufreq_cdev->cpu_dev, freq_hz,
c36cf071 357 true);
3ea3217c 358 if (IS_ERR(opp)) {
1dea432a 359 dev_warn_ratelimited(cpufreq_cdev->cpu_dev,
3ea3217c
VK
360 "Failed to find OPP for frequency %lu: %ld\n",
361 freq_hz, PTR_ERR(opp));
362 return -EINVAL;
363 }
364
c36cf071 365 voltage = dev_pm_opp_get_voltage(opp);
8a31d9d9 366 dev_pm_opp_put(opp);
c36cf071
JM
367
368 if (voltage == 0) {
1dea432a 369 dev_err_ratelimited(cpufreq_cdev->cpu_dev,
3ea3217c
VK
370 "Failed to get voltage for frequency %lu\n",
371 freq_hz);
c36cf071
JM
372 return -EINVAL;
373 }
374
1dea432a
VK
375 return cpufreq_cdev->plat_get_static_power(cpumask, tz->passive_delay,
376 voltage, power);
c36cf071
JM
377}
378
379/**
380 * get_dynamic_power() - calculate the dynamic power
1dea432a 381 * @cpufreq_cdev: &cpufreq_cooling_device for this cdev
c36cf071
JM
382 * @freq: current frequency
383 *
384 * Return: the dynamic power consumed by the cpus described by
1dea432a 385 * @cpufreq_cdev.
c36cf071 386 */
1dea432a 387static u32 get_dynamic_power(struct cpufreq_cooling_device *cpufreq_cdev,
c36cf071
JM
388 unsigned long freq)
389{
390 u32 raw_cpu_power;
391
1dea432a
VK
392 raw_cpu_power = cpu_freq_to_power(cpufreq_cdev, freq);
393 return (raw_cpu_power * cpufreq_cdev->last_load) / 100;
02361418
ADK
394}
395
1b9e3526 396/* cpufreq cooling device callback functions are defined below */
02361418
ADK
397
398/**
399 * cpufreq_get_max_state - callback function to get the max cooling state.
400 * @cdev: thermal cooling device pointer.
401 * @state: fill this variable with the max cooling state.
62c00421
EV
402 *
403 * Callback for the thermal cooling device to return the cpufreq
404 * max cooling state.
405 *
406 * Return: 0 on success, an error code otherwise.
02361418
ADK
407 */
408static int cpufreq_get_max_state(struct thermal_cooling_device *cdev,
409 unsigned long *state)
410{
1dea432a 411 struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata;
9c51b05a 412
1dea432a 413 *state = cpufreq_cdev->max_level;
dcc6c7fd 414 return 0;
02361418
ADK
415}
416
417/**
418 * cpufreq_get_cur_state - callback function to get the current cooling state.
419 * @cdev: thermal cooling device pointer.
420 * @state: fill this variable with the current cooling state.
3672552d
EV
421 *
422 * Callback for the thermal cooling device to return the cpufreq
423 * current cooling state.
424 *
425 * Return: 0 on success, an error code otherwise.
02361418
ADK
426 */
427static int cpufreq_get_cur_state(struct thermal_cooling_device *cdev,
428 unsigned long *state)
429{
1dea432a 430 struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata;
02361418 431
1dea432a 432 *state = cpufreq_cdev->cpufreq_state;
79491e53 433
160b7d80 434 return 0;
02361418
ADK
435}
436
437/**
438 * cpufreq_set_cur_state - callback function to set the current cooling state.
439 * @cdev: thermal cooling device pointer.
440 * @state: set this variable to the current cooling state.
56e05fdb
EV
441 *
442 * Callback for the thermal cooling device to change the cpufreq
443 * current cooling state.
444 *
445 * Return: 0 on success, an error code otherwise.
02361418
ADK
446 */
447static int cpufreq_set_cur_state(struct thermal_cooling_device *cdev,
448 unsigned long state)
449{
1dea432a 450 struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata;
5194fe46 451 unsigned int clip_freq;
4843c4a1
VK
452
453 /* Request state should be less than max_level */
1dea432a 454 if (WARN_ON(state > cpufreq_cdev->max_level))
4843c4a1 455 return -EINVAL;
5194fe46
VK
456
457 /* Check if the old cooling action is same as new cooling action */
1dea432a 458 if (cpufreq_cdev->cpufreq_state == state)
5194fe46 459 return 0;
02361418 460
349d39dc 461 clip_freq = cpufreq_cdev->freq_table[state].frequency;
1dea432a
VK
462 cpufreq_cdev->cpufreq_state = state;
463 cpufreq_cdev->clipped_freq = clip_freq;
5194fe46 464
ba76dd9d 465 cpufreq_update_policy(cpufreq_cdev->policy->cpu);
5194fe46
VK
466
467 return 0;
02361418
ADK
468}
469
c36cf071
JM
470/**
471 * cpufreq_get_requested_power() - get the current power
472 * @cdev: &thermal_cooling_device pointer
473 * @tz: a valid thermal zone device pointer
474 * @power: pointer in which to store the resulting power
475 *
476 * Calculate the current power consumption of the cpus in milliwatts
477 * and store it in @power. This function should actually calculate
478 * the requested power, but it's hard to get the frequency that
479 * cpufreq would have assigned if there were no thermal limits.
480 * Instead, we calculate the current power on the assumption that the
481 * immediate future will look like the immediate past.
482 *
483 * We use the current frequency and the average load since this
484 * function was last called. In reality, there could have been
485 * multiple opps since this function was last called and that affects
486 * the load calculation. While it's not perfectly accurate, this
487 * simplification is good enough and works. REVISIT this, as more
488 * complex code may be needed if experiments show that it's not
489 * accurate enough.
490 *
491 * Return: 0 on success, -E* if getting the static power failed.
492 */
493static int cpufreq_get_requested_power(struct thermal_cooling_device *cdev,
494 struct thermal_zone_device *tz,
495 u32 *power)
496{
497 unsigned long freq;
6828a471 498 int i = 0, cpu, ret;
c36cf071 499 u32 static_power, dynamic_power, total_load = 0;
1dea432a 500 struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata;
ba76dd9d 501 struct cpufreq_policy *policy = cpufreq_cdev->policy;
6828a471 502 u32 *load_cpu = NULL;
c36cf071 503
ba76dd9d 504 freq = cpufreq_quick_get(policy->cpu);
c36cf071 505
6828a471 506 if (trace_thermal_power_cpu_get_power_enabled()) {
ba76dd9d 507 u32 ncpus = cpumask_weight(policy->related_cpus);
6828a471 508
a71544cd 509 load_cpu = kcalloc(ncpus, sizeof(*load_cpu), GFP_KERNEL);
6828a471
JM
510 }
511
ba76dd9d 512 for_each_cpu(cpu, policy->related_cpus) {
c36cf071
JM
513 u32 load;
514
515 if (cpu_online(cpu))
1dea432a 516 load = get_load(cpufreq_cdev, cpu, i);
c36cf071
JM
517 else
518 load = 0;
519
520 total_load += load;
6828a471
JM
521 if (trace_thermal_power_cpu_limit_enabled() && load_cpu)
522 load_cpu[i] = load;
523
524 i++;
c36cf071
JM
525 }
526
1dea432a 527 cpufreq_cdev->last_load = total_load;
c36cf071 528
1dea432a
VK
529 dynamic_power = get_dynamic_power(cpufreq_cdev, freq);
530 ret = get_static_power(cpufreq_cdev, tz, freq, &static_power);
6828a471 531 if (ret) {
a71544cd 532 kfree(load_cpu);
c36cf071 533 return ret;
6828a471
JM
534 }
535
536 if (load_cpu) {
ba76dd9d
VK
537 trace_thermal_power_cpu_get_power(policy->related_cpus, freq,
538 load_cpu, i, dynamic_power,
539 static_power);
6828a471 540
a71544cd 541 kfree(load_cpu);
6828a471 542 }
c36cf071
JM
543
544 *power = static_power + dynamic_power;
545 return 0;
546}
547
548/**
549 * cpufreq_state2power() - convert a cpu cdev state to power consumed
550 * @cdev: &thermal_cooling_device pointer
551 * @tz: a valid thermal zone device pointer
552 * @state: cooling device state to be converted
553 * @power: pointer in which to store the resulting power
554 *
555 * Convert cooling device state @state into power consumption in
556 * milliwatts assuming 100% load. Store the calculated power in
557 * @power.
558 *
559 * Return: 0 on success, -EINVAL if the cooling device state could not
560 * be converted into a frequency or other -E* if there was an error
561 * when calculating the static power.
562 */
563static int cpufreq_state2power(struct thermal_cooling_device *cdev,
564 struct thermal_zone_device *tz,
565 unsigned long state, u32 *power)
566{
567 unsigned int freq, num_cpus;
c36cf071
JM
568 u32 static_power, dynamic_power;
569 int ret;
1dea432a 570 struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata;
c36cf071 571
ba76dd9d 572 num_cpus = cpumask_weight(cpufreq_cdev->policy->cpus);
c36cf071 573
349d39dc 574 freq = cpufreq_cdev->freq_table[state].frequency;
ba76dd9d
VK
575 if (!freq)
576 return -EINVAL;
c36cf071 577
1dea432a
VK
578 dynamic_power = cpu_freq_to_power(cpufreq_cdev, freq) * num_cpus;
579 ret = get_static_power(cpufreq_cdev, tz, freq, &static_power);
c36cf071 580 if (ret)
ba76dd9d 581 return ret;
c36cf071
JM
582
583 *power = static_power + dynamic_power;
d9cc34a6 584 return ret;
c36cf071
JM
585}
586
587/**
588 * cpufreq_power2state() - convert power to a cooling device state
589 * @cdev: &thermal_cooling_device pointer
590 * @tz: a valid thermal zone device pointer
591 * @power: power in milliwatts to be converted
592 * @state: pointer in which to store the resulting state
593 *
594 * Calculate a cooling device state for the cpus described by @cdev
595 * that would allow them to consume at most @power mW and store it in
596 * @state. Note that this calculation depends on external factors
597 * such as the cpu load or the current static power. Calling this
598 * function with the same power as input can yield different cooling
599 * device states depending on those external factors.
600 *
601 * Return: 0 on success, -ENODEV if no cpus are online or -EINVAL if
602 * the calculated frequency could not be converted to a valid state.
603 * The latter should not happen unless the frequencies available to
604 * cpufreq have changed since the initialization of the cpu cooling
605 * device.
606 */
607static int cpufreq_power2state(struct thermal_cooling_device *cdev,
608 struct thermal_zone_device *tz, u32 power,
609 unsigned long *state)
610{
ba76dd9d 611 unsigned int cur_freq, target_freq;
c36cf071
JM
612 int ret;
613 s32 dyn_power;
614 u32 last_load, normalised_power, static_power;
1dea432a 615 struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata;
ba76dd9d 616 struct cpufreq_policy *policy = cpufreq_cdev->policy;
c36cf071 617
ba76dd9d 618 cur_freq = cpufreq_quick_get(policy->cpu);
1dea432a 619 ret = get_static_power(cpufreq_cdev, tz, cur_freq, &static_power);
c36cf071
JM
620 if (ret)
621 return ret;
622
623 dyn_power = power - static_power;
624 dyn_power = dyn_power > 0 ? dyn_power : 0;
1dea432a 625 last_load = cpufreq_cdev->last_load ?: 1;
c36cf071 626 normalised_power = (dyn_power * 100) / last_load;
1dea432a 627 target_freq = cpu_power_to_freq(cpufreq_cdev, normalised_power);
c36cf071 628
3e08b2df 629 *state = get_level(cpufreq_cdev, target_freq);
c36cf071 630 if (*state == THERMAL_CSTATE_INVALID) {
9aec9082
VK
631 dev_err_ratelimited(&cdev->device,
632 "Failed to convert %dKHz for cpu %d into a cdev state\n",
ba76dd9d 633 target_freq, policy->cpu);
c36cf071
JM
634 return -EINVAL;
635 }
636
ba76dd9d
VK
637 trace_thermal_power_cpu_limit(policy->related_cpus, target_freq, *state,
638 power);
c36cf071
JM
639 return 0;
640}
641
02361418 642/* Bind cpufreq callbacks to thermal cooling device ops */
a305a438 643
c36cf071 644static struct thermal_cooling_device_ops cpufreq_cooling_ops = {
02361418
ADK
645 .get_max_state = cpufreq_get_max_state,
646 .get_cur_state = cpufreq_get_cur_state,
647 .set_cur_state = cpufreq_set_cur_state,
648};
649
a305a438
BJ
650static struct thermal_cooling_device_ops cpufreq_power_cooling_ops = {
651 .get_max_state = cpufreq_get_max_state,
652 .get_cur_state = cpufreq_get_cur_state,
653 .set_cur_state = cpufreq_set_cur_state,
654 .get_requested_power = cpufreq_get_requested_power,
655 .state2power = cpufreq_state2power,
656 .power2state = cpufreq_power2state,
657};
658
02361418
ADK
659/* Notifier for cpufreq policy change */
660static struct notifier_block thermal_cpufreq_notifier_block = {
661 .notifier_call = cpufreq_thermal_notifier,
662};
663
f6859014
VK
664static unsigned int find_next_max(struct cpufreq_frequency_table *table,
665 unsigned int prev_max)
666{
667 struct cpufreq_frequency_table *pos;
668 unsigned int max = 0;
669
670 cpufreq_for_each_valid_entry(pos, table) {
671 if (pos->frequency > max && pos->frequency < prev_max)
672 max = pos->frequency;
673 }
674
675 return max;
676}
677
02361418 678/**
39d99cff
EV
679 * __cpufreq_cooling_register - helper function to create cpufreq cooling device
680 * @np: a valid struct device_node to the cooling device device tree node
4d753aa7 681 * @policy: cpufreq policy
405fb825 682 * Normally this should be same as cpufreq policy->related_cpus.
c36cf071
JM
683 * @capacitance: dynamic power coefficient for these cpus
684 * @plat_static_func: function to calculate the static power consumed by these
685 * cpus (optional)
12cb08ba
EV
686 *
687 * This interface function registers the cpufreq cooling device with the name
688 * "thermal-cpufreq-%x". This api can support multiple instances of cpufreq
39d99cff
EV
689 * cooling devices. It also gives the opportunity to link the cooling device
690 * with a device tree node, in order to bind it via the thermal DT code.
12cb08ba
EV
691 *
692 * Return: a valid struct thermal_cooling_device pointer on success,
693 * on failure, it returns a corresponding ERR_PTR().
02361418 694 */
39d99cff
EV
695static struct thermal_cooling_device *
696__cpufreq_cooling_register(struct device_node *np,
4d753aa7 697 struct cpufreq_policy *policy, u32 capacitance,
c36cf071 698 get_static_t plat_static_func)
02361418 699{
04bdbdf9 700 struct thermal_cooling_device *cdev;
1dea432a 701 struct cpufreq_cooling_device *cpufreq_cdev;
02361418 702 char dev_name[THERMAL_NAME_LENGTH];
c36cf071 703 unsigned int freq, i, num_cpus;
405fb825 704 int ret;
a305a438 705 struct thermal_cooling_device_ops *cooling_ops;
088db931 706 bool first;
02361418 707
4d753aa7
VK
708 if (IS_ERR_OR_NULL(policy)) {
709 pr_err("%s: cpufreq policy isn't valid: %p", __func__, policy);
710 return ERR_PTR(-EINVAL);
f8bfc116
VK
711 }
712
55d85293
VK
713 i = cpufreq_table_count_valid_entries(policy);
714 if (!i) {
715 pr_debug("%s: CPUFreq table not found or has no valid entries\n",
716 __func__);
4d753aa7 717 return ERR_PTR(-ENODEV);
02361418 718 }
0f1be51c 719
1dea432a 720 cpufreq_cdev = kzalloc(sizeof(*cpufreq_cdev), GFP_KERNEL);
4d753aa7
VK
721 if (!cpufreq_cdev)
722 return ERR_PTR(-ENOMEM);
02361418 723
b12b6519 724 cpufreq_cdev->policy = policy;
4d753aa7 725 num_cpus = cpumask_weight(policy->related_cpus);
81ee14da
VK
726 cpufreq_cdev->idle_time = kcalloc(num_cpus,
727 sizeof(*cpufreq_cdev->idle_time),
728 GFP_KERNEL);
729 if (!cpufreq_cdev->idle_time) {
04bdbdf9 730 cdev = ERR_PTR(-ENOMEM);
c36cf071
JM
731 goto free_cdev;
732 }
733
55d85293
VK
734 /* max_level is an index, not a counter */
735 cpufreq_cdev->max_level = i - 1;
dcc6c7fd 736
55d85293
VK
737 cpufreq_cdev->freq_table = kmalloc(sizeof(*cpufreq_cdev->freq_table) * i,
738 GFP_KERNEL);
1dea432a 739 if (!cpufreq_cdev->freq_table) {
04bdbdf9 740 cdev = ERR_PTR(-ENOMEM);
81ee14da 741 goto free_idle_time;
f6859014
VK
742 }
743
ae606089
MW
744 ret = ida_simple_get(&cpufreq_ida, 0, 0, GFP_KERNEL);
745 if (ret < 0) {
04bdbdf9 746 cdev = ERR_PTR(ret);
349d39dc 747 goto free_table;
02361418 748 }
1dea432a 749 cpufreq_cdev->id = ret;
02361418 750
349d39dc
VK
751 snprintf(dev_name, sizeof(dev_name), "thermal-cpufreq-%d",
752 cpufreq_cdev->id);
753
f6859014 754 /* Fill freq-table in descending order of frequencies */
1dea432a 755 for (i = 0, freq = -1; i <= cpufreq_cdev->max_level; i++) {
55d85293 756 freq = find_next_max(policy->freq_table, freq);
349d39dc 757 cpufreq_cdev->freq_table[i].frequency = freq;
f6859014
VK
758
759 /* Warn for duplicate entries */
760 if (!freq)
761 pr_warn("%s: table has duplicate entries\n", __func__);
762 else
763 pr_debug("%s: freq:%u KHz\n", __func__, freq);
02361418 764 }
f6859014 765
349d39dc
VK
766 if (capacitance) {
767 cpufreq_cdev->plat_get_static_power = plat_static_func;
768
769 ret = update_freq_table(cpufreq_cdev, capacitance);
770 if (ret) {
771 cdev = ERR_PTR(ret);
772 goto remove_ida;
773 }
774
775 cooling_ops = &cpufreq_power_cooling_ops;
776 } else {
777 cooling_ops = &cpufreq_cooling_ops;
778 }
f840ab18 779
04bdbdf9
VK
780 cdev = thermal_of_cooling_device_register(np, dev_name, cpufreq_cdev,
781 cooling_ops);
782 if (IS_ERR(cdev))
ae606089 783 goto remove_ida;
f840ab18 784
349d39dc 785 cpufreq_cdev->clipped_freq = cpufreq_cdev->freq_table[0].frequency;
04bdbdf9 786 cpufreq_cdev->cdev = cdev;
92e615ec 787
02373d7c 788 mutex_lock(&cooling_list_lock);
088db931 789 /* Register the notifier for first cpufreq cooling device */
1dea432a
VK
790 first = list_empty(&cpufreq_cdev_list);
791 list_add(&cpufreq_cdev->node, &cpufreq_cdev_list);
088db931 792 mutex_unlock(&cooling_list_lock);
02373d7c 793
088db931 794 if (first)
02361418 795 cpufreq_register_notifier(&thermal_cpufreq_notifier_block,
5fda7f68 796 CPUFREQ_POLICY_NOTIFIER);
79491e53 797
4d753aa7 798 return cdev;
730abe06 799
ae606089 800remove_ida:
1dea432a 801 ida_simple_remove(&cpufreq_ida, cpufreq_cdev->id);
f6859014 802free_table:
1dea432a 803 kfree(cpufreq_cdev->freq_table);
81ee14da
VK
804free_idle_time:
805 kfree(cpufreq_cdev->idle_time);
730abe06 806free_cdev:
1dea432a 807 kfree(cpufreq_cdev);
04bdbdf9 808 return cdev;
02361418 809}
39d99cff
EV
810
811/**
812 * cpufreq_cooling_register - function to create cpufreq cooling device.
4d753aa7 813 * @policy: cpufreq policy
39d99cff
EV
814 *
815 * This interface function registers the cpufreq cooling device with the name
816 * "thermal-cpufreq-%x". This api can support multiple instances of cpufreq
817 * cooling devices.
818 *
819 * Return: a valid struct thermal_cooling_device pointer on success,
820 * on failure, it returns a corresponding ERR_PTR().
821 */
822struct thermal_cooling_device *
4d753aa7 823cpufreq_cooling_register(struct cpufreq_policy *policy)
39d99cff 824{
4d753aa7 825 return __cpufreq_cooling_register(NULL, policy, 0, NULL);
39d99cff 826}
243dbd9c 827EXPORT_SYMBOL_GPL(cpufreq_cooling_register);
02361418 828
39d99cff
EV
829/**
830 * of_cpufreq_cooling_register - function to create cpufreq cooling device.
831 * @np: a valid struct device_node to the cooling device device tree node
4d753aa7 832 * @policy: cpufreq policy
39d99cff
EV
833 *
834 * This interface function registers the cpufreq cooling device with the name
835 * "thermal-cpufreq-%x". This api can support multiple instances of cpufreq
836 * cooling devices. Using this API, the cpufreq cooling device will be
837 * linked to the device tree node provided.
838 *
839 * Return: a valid struct thermal_cooling_device pointer on success,
840 * on failure, it returns a corresponding ERR_PTR().
841 */
842struct thermal_cooling_device *
843of_cpufreq_cooling_register(struct device_node *np,
4d753aa7 844 struct cpufreq_policy *policy)
39d99cff
EV
845{
846 if (!np)
847 return ERR_PTR(-EINVAL);
848
4d753aa7 849 return __cpufreq_cooling_register(np, policy, 0, NULL);
39d99cff
EV
850}
851EXPORT_SYMBOL_GPL(of_cpufreq_cooling_register);
852
c36cf071
JM
853/**
854 * cpufreq_power_cooling_register() - create cpufreq cooling device with power extensions
4d753aa7 855 * @policy: cpufreq policy
c36cf071
JM
856 * @capacitance: dynamic power coefficient for these cpus
857 * @plat_static_func: function to calculate the static power consumed by these
858 * cpus (optional)
859 *
860 * This interface function registers the cpufreq cooling device with
861 * the name "thermal-cpufreq-%x". This api can support multiple
862 * instances of cpufreq cooling devices. Using this function, the
863 * cooling device will implement the power extensions by using a
864 * simple cpu power model. The cpus must have registered their OPPs
865 * using the OPP library.
866 *
867 * An optional @plat_static_func may be provided to calculate the
868 * static power consumed by these cpus. If the platform's static
869 * power consumption is unknown or negligible, make it NULL.
870 *
871 * Return: a valid struct thermal_cooling_device pointer on success,
872 * on failure, it returns a corresponding ERR_PTR().
873 */
874struct thermal_cooling_device *
4d753aa7 875cpufreq_power_cooling_register(struct cpufreq_policy *policy, u32 capacitance,
c36cf071
JM
876 get_static_t plat_static_func)
877{
4d753aa7 878 return __cpufreq_cooling_register(NULL, policy, capacitance,
c36cf071
JM
879 plat_static_func);
880}
881EXPORT_SYMBOL(cpufreq_power_cooling_register);
882
883/**
884 * of_cpufreq_power_cooling_register() - create cpufreq cooling device with power extensions
885 * @np: a valid struct device_node to the cooling device device tree node
4d753aa7 886 * @policy: cpufreq policy
c36cf071
JM
887 * @capacitance: dynamic power coefficient for these cpus
888 * @plat_static_func: function to calculate the static power consumed by these
889 * cpus (optional)
890 *
891 * This interface function registers the cpufreq cooling device with
892 * the name "thermal-cpufreq-%x". This api can support multiple
893 * instances of cpufreq cooling devices. Using this API, the cpufreq
894 * cooling device will be linked to the device tree node provided.
895 * Using this function, the cooling device will implement the power
896 * extensions by using a simple cpu power model. The cpus must have
897 * registered their OPPs using the OPP library.
898 *
899 * An optional @plat_static_func may be provided to calculate the
900 * static power consumed by these cpus. If the platform's static
901 * power consumption is unknown or negligible, make it NULL.
902 *
903 * Return: a valid struct thermal_cooling_device pointer on success,
904 * on failure, it returns a corresponding ERR_PTR().
905 */
906struct thermal_cooling_device *
907of_cpufreq_power_cooling_register(struct device_node *np,
4d753aa7 908 struct cpufreq_policy *policy,
c36cf071
JM
909 u32 capacitance,
910 get_static_t plat_static_func)
911{
912 if (!np)
913 return ERR_PTR(-EINVAL);
914
4d753aa7 915 return __cpufreq_cooling_register(np, policy, capacitance,
c36cf071
JM
916 plat_static_func);
917}
918EXPORT_SYMBOL(of_cpufreq_power_cooling_register);
919
02361418
ADK
920/**
921 * cpufreq_cooling_unregister - function to remove cpufreq cooling device.
922 * @cdev: thermal cooling device pointer.
135266b4
EV
923 *
924 * This interface function unregisters the "thermal-cpufreq-%x" cooling device.
02361418
ADK
925 */
926void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev)
927{
1dea432a 928 struct cpufreq_cooling_device *cpufreq_cdev;
088db931 929 bool last;
02361418 930
50e66c7e
EV
931 if (!cdev)
932 return;
933
1dea432a 934 cpufreq_cdev = cdev->devdata;
02361418 935
ae606089 936 mutex_lock(&cooling_list_lock);
1dea432a 937 list_del(&cpufreq_cdev->node);
02361418 938 /* Unregister the notifier for the last cpufreq cooling device */
1dea432a 939 last = list_empty(&cpufreq_cdev_list);
088db931
MW
940 mutex_unlock(&cooling_list_lock);
941
942 if (last)
02361418 943 cpufreq_unregister_notifier(&thermal_cpufreq_notifier_block,
5fda7f68 944 CPUFREQ_POLICY_NOTIFIER);
02373d7c 945
04bdbdf9 946 thermal_cooling_device_unregister(cpufreq_cdev->cdev);
1dea432a 947 ida_simple_remove(&cpufreq_ida, cpufreq_cdev->id);
81ee14da 948 kfree(cpufreq_cdev->idle_time);
1dea432a
VK
949 kfree(cpufreq_cdev->freq_table);
950 kfree(cpufreq_cdev);
02361418 951}
243dbd9c 952EXPORT_SYMBOL_GPL(cpufreq_cooling_unregister);