| 1 | // SPDX-License-Identifier: GPL-2.0-or-later |
| 2 | /* |
| 3 | * processor_thermal.c - Passive cooling submodule of the ACPI processor driver |
| 4 | * |
| 5 | * Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com> |
| 6 | * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com> |
| 7 | * Copyright (C) 2004 Dominik Brodowski <linux@brodo.de> |
| 8 | * Copyright (C) 2004 Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> |
| 9 | * - Added processor hotplug support |
| 10 | */ |
| 11 | |
| 12 | #include <linux/kernel.h> |
| 13 | #include <linux/module.h> |
| 14 | #include <linux/init.h> |
| 15 | #include <linux/cpufreq.h> |
| 16 | #include <linux/acpi.h> |
| 17 | #include <acpi/processor.h> |
| 18 | #include <linux/uaccess.h> |
| 19 | |
| 20 | #include "internal.h" |
| 21 | |
| 22 | #ifdef CONFIG_CPU_FREQ |
| 23 | |
| 24 | /* If a passive cooling situation is detected, primarily CPUfreq is used, as it |
| 25 | * offers (in most cases) voltage scaling in addition to frequency scaling, and |
| 26 | * thus a cubic (instead of linear) reduction of energy. Also, we allow for |
| 27 | * _any_ cpufreq driver and not only the acpi-cpufreq driver. |
| 28 | */ |
| 29 | |
| 30 | #define CPUFREQ_THERMAL_MIN_STEP 0 |
| 31 | |
| 32 | static int cpufreq_thermal_max_step __read_mostly = 3; |
| 33 | |
| 34 | /* |
| 35 | * Minimum throttle percentage for processor_thermal cooling device. |
| 36 | * The processor_thermal driver uses it to calculate the percentage amount by |
| 37 | * which cpu frequency must be reduced for each cooling state. This is also used |
| 38 | * to calculate the maximum number of throttling steps or cooling states. |
| 39 | */ |
| 40 | static int cpufreq_thermal_reduction_pctg __read_mostly = 20; |
| 41 | |
| 42 | static DEFINE_PER_CPU(unsigned int, cpufreq_thermal_reduction_step); |
| 43 | |
| 44 | #define reduction_step(cpu) \ |
| 45 | per_cpu(cpufreq_thermal_reduction_step, phys_package_first_cpu(cpu)) |
| 46 | |
| 47 | /* |
| 48 | * Emulate "per package data" using per cpu data (which should really be |
| 49 | * provided elsewhere) |
| 50 | * |
| 51 | * Note we can lose a CPU on cpu hotunplug, in this case we forget the state |
| 52 | * temporarily. Fortunately that's not a big issue here (I hope) |
| 53 | */ |
| 54 | static int phys_package_first_cpu(int cpu) |
| 55 | { |
| 56 | int i; |
| 57 | int id = topology_physical_package_id(cpu); |
| 58 | |
| 59 | for_each_online_cpu(i) |
| 60 | if (topology_physical_package_id(i) == id) |
| 61 | return i; |
| 62 | return 0; |
| 63 | } |
| 64 | |
| 65 | static int cpu_has_cpufreq(unsigned int cpu) |
| 66 | { |
| 67 | struct cpufreq_policy *policy; |
| 68 | |
| 69 | if (!acpi_processor_cpufreq_init) |
| 70 | return 0; |
| 71 | |
| 72 | policy = cpufreq_cpu_get(cpu); |
| 73 | if (policy) { |
| 74 | cpufreq_cpu_put(policy); |
| 75 | return 1; |
| 76 | } |
| 77 | return 0; |
| 78 | } |
| 79 | |
| 80 | static int cpufreq_get_max_state(unsigned int cpu) |
| 81 | { |
| 82 | if (!cpu_has_cpufreq(cpu)) |
| 83 | return 0; |
| 84 | |
| 85 | return cpufreq_thermal_max_step; |
| 86 | } |
| 87 | |
| 88 | static int cpufreq_get_cur_state(unsigned int cpu) |
| 89 | { |
| 90 | if (!cpu_has_cpufreq(cpu)) |
| 91 | return 0; |
| 92 | |
| 93 | return reduction_step(cpu); |
| 94 | } |
| 95 | |
| 96 | static int cpufreq_set_cur_state(unsigned int cpu, int state) |
| 97 | { |
| 98 | struct cpufreq_policy *policy; |
| 99 | struct acpi_processor *pr; |
| 100 | unsigned long max_freq; |
| 101 | int i, ret; |
| 102 | |
| 103 | if (!cpu_has_cpufreq(cpu)) |
| 104 | return 0; |
| 105 | |
| 106 | reduction_step(cpu) = state; |
| 107 | |
| 108 | /* |
| 109 | * Update all the CPUs in the same package because they all |
| 110 | * contribute to the temperature and often share the same |
| 111 | * frequency. |
| 112 | */ |
| 113 | for_each_online_cpu(i) { |
| 114 | if (topology_physical_package_id(i) != |
| 115 | topology_physical_package_id(cpu)) |
| 116 | continue; |
| 117 | |
| 118 | pr = per_cpu(processors, i); |
| 119 | |
| 120 | if (unlikely(!freq_qos_request_active(&pr->thermal_req))) |
| 121 | continue; |
| 122 | |
| 123 | policy = cpufreq_cpu_get(i); |
| 124 | if (!policy) |
| 125 | return -EINVAL; |
| 126 | |
| 127 | max_freq = (policy->cpuinfo.max_freq * |
| 128 | (100 - reduction_step(i) * cpufreq_thermal_reduction_pctg)) / 100; |
| 129 | |
| 130 | cpufreq_cpu_put(policy); |
| 131 | |
| 132 | ret = freq_qos_update_request(&pr->thermal_req, max_freq); |
| 133 | if (ret < 0) { |
| 134 | pr_warn("Failed to update thermal freq constraint: CPU%d (%d)\n", |
| 135 | pr->id, ret); |
| 136 | } |
| 137 | } |
| 138 | return 0; |
| 139 | } |
| 140 | |
| 141 | static void acpi_thermal_cpufreq_config(void) |
| 142 | { |
| 143 | int cpufreq_pctg = acpi_arch_thermal_cpufreq_pctg(); |
| 144 | |
| 145 | if (!cpufreq_pctg) |
| 146 | return; |
| 147 | |
| 148 | cpufreq_thermal_reduction_pctg = cpufreq_pctg; |
| 149 | |
| 150 | /* |
| 151 | * Derive the MAX_STEP from minimum throttle percentage so that the reduction |
| 152 | * percentage doesn't end up becoming negative. Also, cap the MAX_STEP so that |
| 153 | * the CPU performance doesn't become 0. |
| 154 | */ |
| 155 | cpufreq_thermal_max_step = (100 / cpufreq_pctg) - 2; |
| 156 | } |
| 157 | |
| 158 | void acpi_thermal_cpufreq_init(struct cpufreq_policy *policy) |
| 159 | { |
| 160 | unsigned int cpu; |
| 161 | |
| 162 | acpi_thermal_cpufreq_config(); |
| 163 | |
| 164 | for_each_cpu(cpu, policy->related_cpus) { |
| 165 | struct acpi_processor *pr = per_cpu(processors, cpu); |
| 166 | int ret; |
| 167 | |
| 168 | if (!pr) |
| 169 | continue; |
| 170 | |
| 171 | ret = freq_qos_add_request(&policy->constraints, |
| 172 | &pr->thermal_req, |
| 173 | FREQ_QOS_MAX, INT_MAX); |
| 174 | if (ret < 0) { |
| 175 | pr_err("Failed to add freq constraint for CPU%d (%d)\n", |
| 176 | cpu, ret); |
| 177 | continue; |
| 178 | } |
| 179 | |
| 180 | thermal_cooling_device_update(pr->cdev); |
| 181 | } |
| 182 | } |
| 183 | |
| 184 | void acpi_thermal_cpufreq_exit(struct cpufreq_policy *policy) |
| 185 | { |
| 186 | unsigned int cpu; |
| 187 | |
| 188 | for_each_cpu(cpu, policy->related_cpus) { |
| 189 | struct acpi_processor *pr = per_cpu(processors, cpu); |
| 190 | |
| 191 | if (!pr) |
| 192 | continue; |
| 193 | |
| 194 | freq_qos_remove_request(&pr->thermal_req); |
| 195 | |
| 196 | thermal_cooling_device_update(pr->cdev); |
| 197 | } |
| 198 | } |
| 199 | #else /* ! CONFIG_CPU_FREQ */ |
| 200 | static int cpufreq_get_max_state(unsigned int cpu) |
| 201 | { |
| 202 | return 0; |
| 203 | } |
| 204 | |
| 205 | static int cpufreq_get_cur_state(unsigned int cpu) |
| 206 | { |
| 207 | return 0; |
| 208 | } |
| 209 | |
| 210 | static int cpufreq_set_cur_state(unsigned int cpu, int state) |
| 211 | { |
| 212 | return 0; |
| 213 | } |
| 214 | |
| 215 | #endif |
| 216 | |
| 217 | /* thermal cooling device callbacks */ |
| 218 | static int acpi_processor_max_state(struct acpi_processor *pr) |
| 219 | { |
| 220 | int max_state = 0; |
| 221 | |
| 222 | /* |
| 223 | * There exists four states according to |
| 224 | * cpufreq_thermal_reduction_step. 0, 1, 2, 3 |
| 225 | */ |
| 226 | max_state += cpufreq_get_max_state(pr->id); |
| 227 | if (pr->flags.throttling) |
| 228 | max_state += (pr->throttling.state_count -1); |
| 229 | |
| 230 | return max_state; |
| 231 | } |
| 232 | static int |
| 233 | processor_get_max_state(struct thermal_cooling_device *cdev, |
| 234 | unsigned long *state) |
| 235 | { |
| 236 | struct acpi_device *device = cdev->devdata; |
| 237 | struct acpi_processor *pr; |
| 238 | |
| 239 | if (!device) |
| 240 | return -EINVAL; |
| 241 | |
| 242 | pr = acpi_driver_data(device); |
| 243 | if (!pr) |
| 244 | return -EINVAL; |
| 245 | |
| 246 | *state = acpi_processor_max_state(pr); |
| 247 | return 0; |
| 248 | } |
| 249 | |
| 250 | static int |
| 251 | processor_get_cur_state(struct thermal_cooling_device *cdev, |
| 252 | unsigned long *cur_state) |
| 253 | { |
| 254 | struct acpi_device *device = cdev->devdata; |
| 255 | struct acpi_processor *pr; |
| 256 | |
| 257 | if (!device) |
| 258 | return -EINVAL; |
| 259 | |
| 260 | pr = acpi_driver_data(device); |
| 261 | if (!pr) |
| 262 | return -EINVAL; |
| 263 | |
| 264 | *cur_state = cpufreq_get_cur_state(pr->id); |
| 265 | if (pr->flags.throttling) |
| 266 | *cur_state += pr->throttling.state; |
| 267 | return 0; |
| 268 | } |
| 269 | |
| 270 | static int |
| 271 | processor_set_cur_state(struct thermal_cooling_device *cdev, |
| 272 | unsigned long state) |
| 273 | { |
| 274 | struct acpi_device *device = cdev->devdata; |
| 275 | struct acpi_processor *pr; |
| 276 | int result = 0; |
| 277 | int max_pstate; |
| 278 | |
| 279 | if (!device) |
| 280 | return -EINVAL; |
| 281 | |
| 282 | pr = acpi_driver_data(device); |
| 283 | if (!pr) |
| 284 | return -EINVAL; |
| 285 | |
| 286 | max_pstate = cpufreq_get_max_state(pr->id); |
| 287 | |
| 288 | if (state > acpi_processor_max_state(pr)) |
| 289 | return -EINVAL; |
| 290 | |
| 291 | if (state <= max_pstate) { |
| 292 | if (pr->flags.throttling && pr->throttling.state) |
| 293 | result = acpi_processor_set_throttling(pr, 0, false); |
| 294 | cpufreq_set_cur_state(pr->id, state); |
| 295 | } else { |
| 296 | cpufreq_set_cur_state(pr->id, max_pstate); |
| 297 | result = acpi_processor_set_throttling(pr, |
| 298 | state - max_pstate, false); |
| 299 | } |
| 300 | return result; |
| 301 | } |
| 302 | |
| 303 | const struct thermal_cooling_device_ops processor_cooling_ops = { |
| 304 | .get_max_state = processor_get_max_state, |
| 305 | .get_cur_state = processor_get_cur_state, |
| 306 | .set_cur_state = processor_set_cur_state, |
| 307 | }; |
| 308 | |
| 309 | int acpi_processor_thermal_init(struct acpi_processor *pr, |
| 310 | struct acpi_device *device) |
| 311 | { |
| 312 | int result = 0; |
| 313 | |
| 314 | pr->cdev = thermal_cooling_device_register("Processor", device, |
| 315 | &processor_cooling_ops); |
| 316 | if (IS_ERR(pr->cdev)) { |
| 317 | result = PTR_ERR(pr->cdev); |
| 318 | return result; |
| 319 | } |
| 320 | |
| 321 | dev_dbg(&device->dev, "registered as cooling_device%d\n", |
| 322 | pr->cdev->id); |
| 323 | |
| 324 | result = sysfs_create_link(&device->dev.kobj, |
| 325 | &pr->cdev->device.kobj, |
| 326 | "thermal_cooling"); |
| 327 | if (result) { |
| 328 | dev_err(&device->dev, |
| 329 | "Failed to create sysfs link 'thermal_cooling'\n"); |
| 330 | goto err_thermal_unregister; |
| 331 | } |
| 332 | |
| 333 | result = sysfs_create_link(&pr->cdev->device.kobj, |
| 334 | &device->dev.kobj, |
| 335 | "device"); |
| 336 | if (result) { |
| 337 | dev_err(&pr->cdev->device, |
| 338 | "Failed to create sysfs link 'device'\n"); |
| 339 | goto err_remove_sysfs_thermal; |
| 340 | } |
| 341 | |
| 342 | return 0; |
| 343 | |
| 344 | err_remove_sysfs_thermal: |
| 345 | sysfs_remove_link(&device->dev.kobj, "thermal_cooling"); |
| 346 | err_thermal_unregister: |
| 347 | thermal_cooling_device_unregister(pr->cdev); |
| 348 | |
| 349 | return result; |
| 350 | } |
| 351 | |
| 352 | void acpi_processor_thermal_exit(struct acpi_processor *pr, |
| 353 | struct acpi_device *device) |
| 354 | { |
| 355 | if (pr->cdev) { |
| 356 | sysfs_remove_link(&device->dev.kobj, "thermal_cooling"); |
| 357 | sysfs_remove_link(&pr->cdev->device.kobj, "device"); |
| 358 | thermal_cooling_device_unregister(pr->cdev); |
| 359 | pr->cdev = NULL; |
| 360 | } |
| 361 | } |