thermal: introduce the Power Allocator governor
[linux-2.6-block.git] / drivers / thermal / power_allocator.c
CommitLineData
6b775e87
JM
1/*
2 * A power allocator to manage temperature
3 *
4 * Copyright (C) 2014 ARM Ltd.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 *
10 * This program is distributed "as is" WITHOUT ANY WARRANTY of any
11 * kind, whether express or implied; without even the implied warranty
12 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 */
15
16#define pr_fmt(fmt) "Power allocator: " fmt
17
18#include <linux/rculist.h>
19#include <linux/slab.h>
20#include <linux/thermal.h>
21
22#include "thermal_core.h"
23
24#define FRAC_BITS 10
25#define int_to_frac(x) ((x) << FRAC_BITS)
26#define frac_to_int(x) ((x) >> FRAC_BITS)
27
28/**
29 * mul_frac() - multiply two fixed-point numbers
30 * @x: first multiplicand
31 * @y: second multiplicand
32 *
33 * Return: the result of multiplying two fixed-point numbers. The
34 * result is also a fixed-point number.
35 */
36static inline s64 mul_frac(s64 x, s64 y)
37{
38 return (x * y) >> FRAC_BITS;
39}
40
41/**
42 * div_frac() - divide two fixed-point numbers
43 * @x: the dividend
44 * @y: the divisor
45 *
46 * Return: the result of dividing two fixed-point numbers. The
47 * result is also a fixed-point number.
48 */
49static inline s64 div_frac(s64 x, s64 y)
50{
51 return div_s64(x << FRAC_BITS, y);
52}
53
54/**
55 * struct power_allocator_params - parameters for the power allocator governor
56 * @err_integral: accumulated error in the PID controller.
57 * @prev_err: error in the previous iteration of the PID controller.
58 * Used to calculate the derivative term.
59 * @trip_switch_on: first passive trip point of the thermal zone. The
60 * governor switches on when this trip point is crossed.
61 * @trip_max_desired_temperature: last passive trip point of the thermal
62 * zone. The temperature we are
63 * controlling for.
64 */
65struct power_allocator_params {
66 s64 err_integral;
67 s32 prev_err;
68 int trip_switch_on;
69 int trip_max_desired_temperature;
70};
71
72/**
73 * pid_controller() - PID controller
74 * @tz: thermal zone we are operating in
75 * @current_temp: the current temperature in millicelsius
76 * @control_temp: the target temperature in millicelsius
77 * @max_allocatable_power: maximum allocatable power for this thermal zone
78 *
79 * This PID controller increases the available power budget so that the
80 * temperature of the thermal zone gets as close as possible to
81 * @control_temp and limits the power if it exceeds it. k_po is the
82 * proportional term when we are overshooting, k_pu is the
83 * proportional term when we are undershooting. integral_cutoff is a
84 * threshold below which we stop accumulating the error. The
85 * accumulated error is only valid if the requested power will make
86 * the system warmer. If the system is mostly idle, there's no point
87 * in accumulating positive error.
88 *
89 * Return: The power budget for the next period.
90 */
91static u32 pid_controller(struct thermal_zone_device *tz,
92 unsigned long current_temp,
93 unsigned long control_temp,
94 u32 max_allocatable_power)
95{
96 s64 p, i, d, power_range;
97 s32 err, max_power_frac;
98 struct power_allocator_params *params = tz->governor_data;
99
100 max_power_frac = int_to_frac(max_allocatable_power);
101
102 err = ((s32)control_temp - (s32)current_temp);
103 err = int_to_frac(err);
104
105 /* Calculate the proportional term */
106 p = mul_frac(err < 0 ? tz->tzp->k_po : tz->tzp->k_pu, err);
107
108 /*
109 * Calculate the integral term
110 *
111 * if the error is less than cut off allow integration (but
112 * the integral is limited to max power)
113 */
114 i = mul_frac(tz->tzp->k_i, params->err_integral);
115
116 if (err < int_to_frac(tz->tzp->integral_cutoff)) {
117 s64 i_next = i + mul_frac(tz->tzp->k_i, err);
118
119 if (abs64(i_next) < max_power_frac) {
120 i = i_next;
121 params->err_integral += err;
122 }
123 }
124
125 /*
126 * Calculate the derivative term
127 *
128 * We do err - prev_err, so with a positive k_d, a decreasing
129 * error (i.e. driving closer to the line) results in less
130 * power being applied, slowing down the controller)
131 */
132 d = mul_frac(tz->tzp->k_d, err - params->prev_err);
133 d = div_frac(d, tz->passive_delay);
134 params->prev_err = err;
135
136 power_range = p + i + d;
137
138 /* feed-forward the known sustainable dissipatable power */
139 power_range = tz->tzp->sustainable_power + frac_to_int(power_range);
140
141 return clamp(power_range, (s64)0, (s64)max_allocatable_power);
142}
143
144/**
145 * divvy_up_power() - divvy the allocated power between the actors
146 * @req_power: each actor's requested power
147 * @max_power: each actor's maximum available power
148 * @num_actors: size of the @req_power, @max_power and @granted_power's array
149 * @total_req_power: sum of @req_power
150 * @power_range: total allocated power
151 * @granted_power: output array: each actor's granted power
152 * @extra_actor_power: an appropriately sized array to be used in the
153 * function as temporary storage of the extra power given
154 * to the actors
155 *
156 * This function divides the total allocated power (@power_range)
157 * fairly between the actors. It first tries to give each actor a
158 * share of the @power_range according to how much power it requested
159 * compared to the rest of the actors. For example, if only one actor
160 * requests power, then it receives all the @power_range. If
161 * three actors each requests 1mW, each receives a third of the
162 * @power_range.
163 *
164 * If any actor received more than their maximum power, then that
165 * surplus is re-divvied among the actors based on how far they are
166 * from their respective maximums.
167 *
168 * Granted power for each actor is written to @granted_power, which
169 * should've been allocated by the calling function.
170 */
171static void divvy_up_power(u32 *req_power, u32 *max_power, int num_actors,
172 u32 total_req_power, u32 power_range,
173 u32 *granted_power, u32 *extra_actor_power)
174{
175 u32 extra_power, capped_extra_power;
176 int i;
177
178 /*
179 * Prevent division by 0 if none of the actors request power.
180 */
181 if (!total_req_power)
182 total_req_power = 1;
183
184 capped_extra_power = 0;
185 extra_power = 0;
186 for (i = 0; i < num_actors; i++) {
187 u64 req_range = req_power[i] * power_range;
188
189 granted_power[i] = div_u64(req_range, total_req_power);
190
191 if (granted_power[i] > max_power[i]) {
192 extra_power += granted_power[i] - max_power[i];
193 granted_power[i] = max_power[i];
194 }
195
196 extra_actor_power[i] = max_power[i] - granted_power[i];
197 capped_extra_power += extra_actor_power[i];
198 }
199
200 if (!extra_power)
201 return;
202
203 /*
204 * Re-divvy the reclaimed extra among actors based on
205 * how far they are from the max
206 */
207 extra_power = min(extra_power, capped_extra_power);
208 if (capped_extra_power > 0)
209 for (i = 0; i < num_actors; i++)
210 granted_power[i] += (extra_actor_power[i] *
211 extra_power) / capped_extra_power;
212}
213
214static int allocate_power(struct thermal_zone_device *tz,
215 unsigned long current_temp,
216 unsigned long control_temp)
217{
218 struct thermal_instance *instance;
219 struct power_allocator_params *params = tz->governor_data;
220 u32 *req_power, *max_power, *granted_power, *extra_actor_power;
221 u32 total_req_power, max_allocatable_power;
222 u32 power_range;
223 int i, num_actors, total_weight, ret = 0;
224 int trip_max_desired_temperature = params->trip_max_desired_temperature;
225
226 mutex_lock(&tz->lock);
227
228 num_actors = 0;
229 total_weight = 0;
230 list_for_each_entry(instance, &tz->thermal_instances, tz_node) {
231 if ((instance->trip == trip_max_desired_temperature) &&
232 cdev_is_power_actor(instance->cdev)) {
233 num_actors++;
234 total_weight += instance->weight;
235 }
236 }
237
238 /*
239 * We need to allocate three arrays of the same size:
240 * req_power, max_power and granted_power. They are going to
241 * be needed until this function returns. Allocate them all
242 * in one go to simplify the allocation and deallocation
243 * logic.
244 */
245 BUILD_BUG_ON(sizeof(*req_power) != sizeof(*max_power));
246 BUILD_BUG_ON(sizeof(*req_power) != sizeof(*granted_power));
247 BUILD_BUG_ON(sizeof(*req_power) != sizeof(*extra_actor_power));
248 req_power = devm_kcalloc(&tz->device, num_actors * 4,
249 sizeof(*req_power), GFP_KERNEL);
250 if (!req_power) {
251 ret = -ENOMEM;
252 goto unlock;
253 }
254
255 max_power = &req_power[num_actors];
256 granted_power = &req_power[2 * num_actors];
257 extra_actor_power = &req_power[3 * num_actors];
258
259 i = 0;
260 total_req_power = 0;
261 max_allocatable_power = 0;
262
263 list_for_each_entry(instance, &tz->thermal_instances, tz_node) {
264 int weight;
265 struct thermal_cooling_device *cdev = instance->cdev;
266
267 if (instance->trip != trip_max_desired_temperature)
268 continue;
269
270 if (!cdev_is_power_actor(cdev))
271 continue;
272
273 if (cdev->ops->get_requested_power(cdev, tz, &req_power[i]))
274 continue;
275
276 if (!total_weight)
277 weight = 1 << FRAC_BITS;
278 else
279 weight = instance->weight;
280
281 req_power[i] = frac_to_int(weight * req_power[i]);
282
283 if (power_actor_get_max_power(cdev, tz, &max_power[i]))
284 continue;
285
286 total_req_power += req_power[i];
287 max_allocatable_power += max_power[i];
288
289 i++;
290 }
291
292 power_range = pid_controller(tz, current_temp, control_temp,
293 max_allocatable_power);
294
295 divvy_up_power(req_power, max_power, num_actors, total_req_power,
296 power_range, granted_power, extra_actor_power);
297
298 i = 0;
299 list_for_each_entry(instance, &tz->thermal_instances, tz_node) {
300 if (instance->trip != trip_max_desired_temperature)
301 continue;
302
303 if (!cdev_is_power_actor(instance->cdev))
304 continue;
305
306 power_actor_set_power(instance->cdev, instance,
307 granted_power[i]);
308
309 i++;
310 }
311
312 devm_kfree(&tz->device, req_power);
313unlock:
314 mutex_unlock(&tz->lock);
315
316 return ret;
317}
318
319static int get_governor_trips(struct thermal_zone_device *tz,
320 struct power_allocator_params *params)
321{
322 int i, ret, last_passive;
323 bool found_first_passive;
324
325 found_first_passive = false;
326 last_passive = -1;
327 ret = -EINVAL;
328
329 for (i = 0; i < tz->trips; i++) {
330 enum thermal_trip_type type;
331
332 ret = tz->ops->get_trip_type(tz, i, &type);
333 if (ret)
334 return ret;
335
336 if (!found_first_passive) {
337 if (type == THERMAL_TRIP_PASSIVE) {
338 params->trip_switch_on = i;
339 found_first_passive = true;
340 }
341 } else if (type == THERMAL_TRIP_PASSIVE) {
342 last_passive = i;
343 } else {
344 break;
345 }
346 }
347
348 if (last_passive != -1) {
349 params->trip_max_desired_temperature = last_passive;
350 ret = 0;
351 } else {
352 ret = -EINVAL;
353 }
354
355 return ret;
356}
357
358static void reset_pid_controller(struct power_allocator_params *params)
359{
360 params->err_integral = 0;
361 params->prev_err = 0;
362}
363
364static void allow_maximum_power(struct thermal_zone_device *tz)
365{
366 struct thermal_instance *instance;
367 struct power_allocator_params *params = tz->governor_data;
368
369 list_for_each_entry(instance, &tz->thermal_instances, tz_node) {
370 if ((instance->trip != params->trip_max_desired_temperature) ||
371 (!cdev_is_power_actor(instance->cdev)))
372 continue;
373
374 instance->target = 0;
375 instance->cdev->updated = false;
376 thermal_cdev_update(instance->cdev);
377 }
378}
379
380/**
381 * power_allocator_bind() - bind the power_allocator governor to a thermal zone
382 * @tz: thermal zone to bind it to
383 *
384 * Check that the thermal zone is valid for this governor, that is, it
385 * has two thermal trips. If so, initialize the PID controller
386 * parameters and bind it to the thermal zone.
387 *
388 * Return: 0 on success, -EINVAL if the trips were invalid or -ENOMEM
389 * if we ran out of memory.
390 */
391static int power_allocator_bind(struct thermal_zone_device *tz)
392{
393 int ret;
394 struct power_allocator_params *params;
395 unsigned long switch_on_temp, control_temp;
396 u32 temperature_threshold;
397
398 if (!tz->tzp || !tz->tzp->sustainable_power) {
399 dev_err(&tz->device,
400 "power_allocator: missing sustainable_power\n");
401 return -EINVAL;
402 }
403
404 params = devm_kzalloc(&tz->device, sizeof(*params), GFP_KERNEL);
405 if (!params)
406 return -ENOMEM;
407
408 ret = get_governor_trips(tz, params);
409 if (ret) {
410 dev_err(&tz->device,
411 "thermal zone %s has wrong trip setup for power allocator\n",
412 tz->type);
413 goto free;
414 }
415
416 ret = tz->ops->get_trip_temp(tz, params->trip_switch_on,
417 &switch_on_temp);
418 if (ret)
419 goto free;
420
421 ret = tz->ops->get_trip_temp(tz, params->trip_max_desired_temperature,
422 &control_temp);
423 if (ret)
424 goto free;
425
426 temperature_threshold = control_temp - switch_on_temp;
427
428 tz->tzp->k_po = tz->tzp->k_po ?:
429 int_to_frac(tz->tzp->sustainable_power) / temperature_threshold;
430 tz->tzp->k_pu = tz->tzp->k_pu ?:
431 int_to_frac(2 * tz->tzp->sustainable_power) /
432 temperature_threshold;
433 tz->tzp->k_i = tz->tzp->k_i ?: int_to_frac(10) / 1000;
434 /*
435 * The default for k_d and integral_cutoff is 0, so we can
436 * leave them as they are.
437 */
438
439 reset_pid_controller(params);
440
441 tz->governor_data = params;
442
443 return 0;
444
445free:
446 devm_kfree(&tz->device, params);
447 return ret;
448}
449
450static void power_allocator_unbind(struct thermal_zone_device *tz)
451{
452 dev_dbg(&tz->device, "Unbinding from thermal zone %d\n", tz->id);
453 devm_kfree(&tz->device, tz->governor_data);
454 tz->governor_data = NULL;
455}
456
457static int power_allocator_throttle(struct thermal_zone_device *tz, int trip)
458{
459 int ret;
460 unsigned long switch_on_temp, control_temp, current_temp;
461 struct power_allocator_params *params = tz->governor_data;
462
463 /*
464 * We get called for every trip point but we only need to do
465 * our calculations once
466 */
467 if (trip != params->trip_max_desired_temperature)
468 return 0;
469
470 ret = thermal_zone_get_temp(tz, &current_temp);
471 if (ret) {
472 dev_warn(&tz->device, "Failed to get temperature: %d\n", ret);
473 return ret;
474 }
475
476 ret = tz->ops->get_trip_temp(tz, params->trip_switch_on,
477 &switch_on_temp);
478 if (ret) {
479 dev_warn(&tz->device,
480 "Failed to get switch on temperature: %d\n", ret);
481 return ret;
482 }
483
484 if (current_temp < switch_on_temp) {
485 tz->passive = 0;
486 reset_pid_controller(params);
487 allow_maximum_power(tz);
488 return 0;
489 }
490
491 tz->passive = 1;
492
493 ret = tz->ops->get_trip_temp(tz, params->trip_max_desired_temperature,
494 &control_temp);
495 if (ret) {
496 dev_warn(&tz->device,
497 "Failed to get the maximum desired temperature: %d\n",
498 ret);
499 return ret;
500 }
501
502 return allocate_power(tz, current_temp, control_temp);
503}
504
505static struct thermal_governor thermal_gov_power_allocator = {
506 .name = "power_allocator",
507 .bind_to_tz = power_allocator_bind,
508 .unbind_from_tz = power_allocator_unbind,
509 .throttle = power_allocator_throttle,
510};
511
512int thermal_gov_power_allocator_register(void)
513{
514 return thermal_register_governor(&thermal_gov_power_allocator);
515}
516
517void thermal_gov_power_allocator_unregister(void)
518{
519 thermal_unregister_governor(&thermal_gov_power_allocator);
520}