| 1 | // SPDX-License-Identifier: GPL-2.0-only |
| 2 | /* |
| 3 | * coretemp.c - Linux kernel module for hardware monitoring |
| 4 | * |
| 5 | * Copyright (C) 2007 Rudolf Marek <r.marek@assembler.cz> |
| 6 | * |
| 7 | * Inspired from many hwmon drivers |
| 8 | */ |
| 9 | |
| 10 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
| 11 | |
| 12 | #include <linux/module.h> |
| 13 | #include <linux/init.h> |
| 14 | #include <linux/slab.h> |
| 15 | #include <linux/jiffies.h> |
| 16 | #include <linux/hwmon.h> |
| 17 | #include <linux/sysfs.h> |
| 18 | #include <linux/hwmon-sysfs.h> |
| 19 | #include <linux/err.h> |
| 20 | #include <linux/mutex.h> |
| 21 | #include <linux/list.h> |
| 22 | #include <linux/platform_device.h> |
| 23 | #include <linux/cpu.h> |
| 24 | #include <linux/smp.h> |
| 25 | #include <linux/moduleparam.h> |
| 26 | #include <linux/pci.h> |
| 27 | #include <asm/msr.h> |
| 28 | #include <asm/processor.h> |
| 29 | #include <asm/cpu_device_id.h> |
| 30 | #include <linux/sched/isolation.h> |
| 31 | |
| 32 | #define DRVNAME "coretemp" |
| 33 | |
| 34 | /* |
| 35 | * force_tjmax only matters when TjMax can't be read from the CPU itself. |
| 36 | * When set, it replaces the driver's suboptimal heuristic. |
| 37 | */ |
| 38 | static int force_tjmax; |
| 39 | module_param_named(tjmax, force_tjmax, int, 0444); |
| 40 | MODULE_PARM_DESC(tjmax, "TjMax value in degrees Celsius"); |
| 41 | |
| 42 | #define PKG_SYSFS_ATTR_NO 1 /* Sysfs attribute for package temp */ |
| 43 | #define BASE_SYSFS_ATTR_NO 2 /* Sysfs Base attr no for coretemp */ |
| 44 | #define NUM_REAL_CORES 512 /* Number of Real cores per cpu */ |
| 45 | #define CORETEMP_NAME_LENGTH 28 /* String Length of attrs */ |
| 46 | #define MAX_CORE_ATTRS 4 /* Maximum no of basic attrs */ |
| 47 | #define TOTAL_ATTRS (MAX_CORE_ATTRS + 1) |
| 48 | #define MAX_CORE_DATA (NUM_REAL_CORES + BASE_SYSFS_ATTR_NO) |
| 49 | |
| 50 | #ifdef CONFIG_SMP |
| 51 | #define for_each_sibling(i, cpu) \ |
| 52 | for_each_cpu(i, topology_sibling_cpumask(cpu)) |
| 53 | #else |
| 54 | #define for_each_sibling(i, cpu) for (i = 0; false; ) |
| 55 | #endif |
| 56 | |
| 57 | /* |
| 58 | * Per-Core Temperature Data |
| 59 | * @tjmax: The static tjmax value when tjmax cannot be retrieved from |
| 60 | * IA32_TEMPERATURE_TARGET MSR. |
| 61 | * @last_updated: The time when the current temperature value was updated |
| 62 | * earlier (in jiffies). |
| 63 | * @cpu_core_id: The CPU Core from which temperature values should be read |
| 64 | * This value is passed as "id" field to rdmsr/wrmsr functions. |
| 65 | * @status_reg: One of IA32_THERM_STATUS or IA32_PACKAGE_THERM_STATUS, |
| 66 | * from where the temperature values should be read. |
| 67 | * @attr_size: Total number of pre-core attrs displayed in the sysfs. |
| 68 | * @is_pkg_data: If this is 1, the temp_data holds pkgtemp data. |
| 69 | * Otherwise, temp_data holds coretemp data. |
| 70 | */ |
| 71 | struct temp_data { |
| 72 | int temp; |
| 73 | int tjmax; |
| 74 | unsigned long last_updated; |
| 75 | unsigned int cpu; |
| 76 | u32 cpu_core_id; |
| 77 | u32 status_reg; |
| 78 | int attr_size; |
| 79 | bool is_pkg_data; |
| 80 | struct sensor_device_attribute sd_attrs[TOTAL_ATTRS]; |
| 81 | char attr_name[TOTAL_ATTRS][CORETEMP_NAME_LENGTH]; |
| 82 | struct attribute *attrs[TOTAL_ATTRS + 1]; |
| 83 | struct attribute_group attr_group; |
| 84 | struct mutex update_lock; |
| 85 | }; |
| 86 | |
| 87 | /* Platform Data per Physical CPU */ |
| 88 | struct platform_data { |
| 89 | struct device *hwmon_dev; |
| 90 | u16 pkg_id; |
| 91 | u16 cpu_map[NUM_REAL_CORES]; |
| 92 | struct ida ida; |
| 93 | struct cpumask cpumask; |
| 94 | struct temp_data *core_data[MAX_CORE_DATA]; |
| 95 | struct device_attribute name_attr; |
| 96 | }; |
| 97 | |
| 98 | struct tjmax_pci { |
| 99 | unsigned int device; |
| 100 | int tjmax; |
| 101 | }; |
| 102 | |
| 103 | static const struct tjmax_pci tjmax_pci_table[] = { |
| 104 | { 0x0708, 110000 }, /* CE41x0 (Sodaville ) */ |
| 105 | { 0x0c72, 102000 }, /* Atom S1240 (Centerton) */ |
| 106 | { 0x0c73, 95000 }, /* Atom S1220 (Centerton) */ |
| 107 | { 0x0c75, 95000 }, /* Atom S1260 (Centerton) */ |
| 108 | }; |
| 109 | |
| 110 | struct tjmax { |
| 111 | char const *id; |
| 112 | int tjmax; |
| 113 | }; |
| 114 | |
| 115 | static const struct tjmax tjmax_table[] = { |
| 116 | { "CPU 230", 100000 }, /* Model 0x1c, stepping 2 */ |
| 117 | { "CPU 330", 125000 }, /* Model 0x1c, stepping 2 */ |
| 118 | }; |
| 119 | |
| 120 | struct tjmax_model { |
| 121 | u8 model; |
| 122 | u8 mask; |
| 123 | int tjmax; |
| 124 | }; |
| 125 | |
| 126 | #define ANY 0xff |
| 127 | |
| 128 | static const struct tjmax_model tjmax_model_table[] = { |
| 129 | { 0x1c, 10, 100000 }, /* D4xx, K4xx, N4xx, D5xx, K5xx, N5xx */ |
| 130 | { 0x1c, ANY, 90000 }, /* Z5xx, N2xx, possibly others |
| 131 | * Note: Also matches 230 and 330, |
| 132 | * which are covered by tjmax_table |
| 133 | */ |
| 134 | { 0x26, ANY, 90000 }, /* Atom Tunnel Creek (Exx), Lincroft (Z6xx) |
| 135 | * Note: TjMax for E6xxT is 110C, but CPU type |
| 136 | * is undetectable by software |
| 137 | */ |
| 138 | { 0x27, ANY, 90000 }, /* Atom Medfield (Z2460) */ |
| 139 | { 0x35, ANY, 90000 }, /* Atom Clover Trail/Cloverview (Z27x0) */ |
| 140 | { 0x36, ANY, 100000 }, /* Atom Cedar Trail/Cedarview (N2xxx, D2xxx) |
| 141 | * Also matches S12x0 (stepping 9), covered by |
| 142 | * PCI table |
| 143 | */ |
| 144 | }; |
| 145 | |
| 146 | static int adjust_tjmax(struct cpuinfo_x86 *c, u32 id, struct device *dev) |
| 147 | { |
| 148 | /* The 100C is default for both mobile and non mobile CPUs */ |
| 149 | |
| 150 | int tjmax = 100000; |
| 151 | int tjmax_ee = 85000; |
| 152 | int usemsr_ee = 1; |
| 153 | int err; |
| 154 | u32 eax, edx; |
| 155 | int i; |
| 156 | u16 devfn = PCI_DEVFN(0, 0); |
| 157 | struct pci_dev *host_bridge = pci_get_domain_bus_and_slot(0, 0, devfn); |
| 158 | |
| 159 | /* |
| 160 | * Explicit tjmax table entries override heuristics. |
| 161 | * First try PCI host bridge IDs, followed by model ID strings |
| 162 | * and model/stepping information. |
| 163 | */ |
| 164 | if (host_bridge && host_bridge->vendor == PCI_VENDOR_ID_INTEL) { |
| 165 | for (i = 0; i < ARRAY_SIZE(tjmax_pci_table); i++) { |
| 166 | if (host_bridge->device == tjmax_pci_table[i].device) { |
| 167 | pci_dev_put(host_bridge); |
| 168 | return tjmax_pci_table[i].tjmax; |
| 169 | } |
| 170 | } |
| 171 | } |
| 172 | pci_dev_put(host_bridge); |
| 173 | |
| 174 | for (i = 0; i < ARRAY_SIZE(tjmax_table); i++) { |
| 175 | if (strstr(c->x86_model_id, tjmax_table[i].id)) |
| 176 | return tjmax_table[i].tjmax; |
| 177 | } |
| 178 | |
| 179 | for (i = 0; i < ARRAY_SIZE(tjmax_model_table); i++) { |
| 180 | const struct tjmax_model *tm = &tjmax_model_table[i]; |
| 181 | if (c->x86_model == tm->model && |
| 182 | (tm->mask == ANY || c->x86_stepping == tm->mask)) |
| 183 | return tm->tjmax; |
| 184 | } |
| 185 | |
| 186 | /* Early chips have no MSR for TjMax */ |
| 187 | |
| 188 | if (c->x86_model == 0xf && c->x86_stepping < 4) |
| 189 | usemsr_ee = 0; |
| 190 | |
| 191 | if (c->x86_model > 0xe && usemsr_ee) { |
| 192 | u8 platform_id; |
| 193 | |
| 194 | /* |
| 195 | * Now we can detect the mobile CPU using Intel provided table |
| 196 | * http://softwarecommunity.intel.com/Wiki/Mobility/720.htm |
| 197 | * For Core2 cores, check MSR 0x17, bit 28 1 = Mobile CPU |
| 198 | */ |
| 199 | err = rdmsr_safe_on_cpu(id, 0x17, &eax, &edx); |
| 200 | if (err) { |
| 201 | dev_warn(dev, |
| 202 | "Unable to access MSR 0x17, assuming desktop" |
| 203 | " CPU\n"); |
| 204 | usemsr_ee = 0; |
| 205 | } else if (c->x86_model < 0x17 && !(eax & 0x10000000)) { |
| 206 | /* |
| 207 | * Trust bit 28 up to Penryn, I could not find any |
| 208 | * documentation on that; if you happen to know |
| 209 | * someone at Intel please ask |
| 210 | */ |
| 211 | usemsr_ee = 0; |
| 212 | } else { |
| 213 | /* Platform ID bits 52:50 (EDX starts at bit 32) */ |
| 214 | platform_id = (edx >> 18) & 0x7; |
| 215 | |
| 216 | /* |
| 217 | * Mobile Penryn CPU seems to be platform ID 7 or 5 |
| 218 | * (guesswork) |
| 219 | */ |
| 220 | if (c->x86_model == 0x17 && |
| 221 | (platform_id == 5 || platform_id == 7)) { |
| 222 | /* |
| 223 | * If MSR EE bit is set, set it to 90 degrees C, |
| 224 | * otherwise 105 degrees C |
| 225 | */ |
| 226 | tjmax_ee = 90000; |
| 227 | tjmax = 105000; |
| 228 | } |
| 229 | } |
| 230 | } |
| 231 | |
| 232 | if (usemsr_ee) { |
| 233 | err = rdmsr_safe_on_cpu(id, 0xee, &eax, &edx); |
| 234 | if (err) { |
| 235 | dev_warn(dev, |
| 236 | "Unable to access MSR 0xEE, for Tjmax, left" |
| 237 | " at default\n"); |
| 238 | } else if (eax & 0x40000000) { |
| 239 | tjmax = tjmax_ee; |
| 240 | } |
| 241 | } else if (tjmax == 100000) { |
| 242 | /* |
| 243 | * If we don't use msr EE it means we are desktop CPU |
| 244 | * (with exeception of Atom) |
| 245 | */ |
| 246 | dev_warn(dev, "Using relative temperature scale!\n"); |
| 247 | } |
| 248 | |
| 249 | return tjmax; |
| 250 | } |
| 251 | |
| 252 | static bool cpu_has_tjmax(struct cpuinfo_x86 *c) |
| 253 | { |
| 254 | u8 model = c->x86_model; |
| 255 | |
| 256 | return model > 0xe && |
| 257 | model != 0x1c && |
| 258 | model != 0x26 && |
| 259 | model != 0x27 && |
| 260 | model != 0x35 && |
| 261 | model != 0x36; |
| 262 | } |
| 263 | |
| 264 | static int get_tjmax(struct temp_data *tdata, struct device *dev) |
| 265 | { |
| 266 | struct cpuinfo_x86 *c = &cpu_data(tdata->cpu); |
| 267 | int err; |
| 268 | u32 eax, edx; |
| 269 | u32 val; |
| 270 | |
| 271 | /* use static tjmax once it is set */ |
| 272 | if (tdata->tjmax) |
| 273 | return tdata->tjmax; |
| 274 | |
| 275 | /* |
| 276 | * A new feature of current Intel(R) processors, the |
| 277 | * IA32_TEMPERATURE_TARGET contains the TjMax value |
| 278 | */ |
| 279 | err = rdmsr_safe_on_cpu(tdata->cpu, MSR_IA32_TEMPERATURE_TARGET, &eax, &edx); |
| 280 | if (err) { |
| 281 | if (cpu_has_tjmax(c)) |
| 282 | dev_warn(dev, "Unable to read TjMax from CPU %u\n", tdata->cpu); |
| 283 | } else { |
| 284 | val = (eax >> 16) & 0xff; |
| 285 | if (val) |
| 286 | return val * 1000; |
| 287 | } |
| 288 | |
| 289 | if (force_tjmax) { |
| 290 | dev_notice(dev, "TjMax forced to %d degrees C by user\n", |
| 291 | force_tjmax); |
| 292 | tdata->tjmax = force_tjmax * 1000; |
| 293 | } else { |
| 294 | /* |
| 295 | * An assumption is made for early CPUs and unreadable MSR. |
| 296 | * NOTE: the calculated value may not be correct. |
| 297 | */ |
| 298 | tdata->tjmax = adjust_tjmax(c, tdata->cpu, dev); |
| 299 | } |
| 300 | return tdata->tjmax; |
| 301 | } |
| 302 | |
| 303 | static int get_ttarget(struct temp_data *tdata, struct device *dev) |
| 304 | { |
| 305 | u32 eax, edx; |
| 306 | int tjmax, ttarget_offset, ret; |
| 307 | |
| 308 | /* |
| 309 | * ttarget is valid only if tjmax can be retrieved from |
| 310 | * MSR_IA32_TEMPERATURE_TARGET |
| 311 | */ |
| 312 | if (tdata->tjmax) |
| 313 | return -ENODEV; |
| 314 | |
| 315 | ret = rdmsr_safe_on_cpu(tdata->cpu, MSR_IA32_TEMPERATURE_TARGET, &eax, &edx); |
| 316 | if (ret) |
| 317 | return ret; |
| 318 | |
| 319 | tjmax = (eax >> 16) & 0xff; |
| 320 | |
| 321 | /* Read the still undocumented bits 8:15 of IA32_TEMPERATURE_TARGET. */ |
| 322 | ttarget_offset = (eax >> 8) & 0xff; |
| 323 | |
| 324 | return (tjmax - ttarget_offset) * 1000; |
| 325 | } |
| 326 | |
| 327 | /* Keep track of how many zone pointers we allocated in init() */ |
| 328 | static int max_zones __read_mostly; |
| 329 | /* Array of zone pointers. Serialized by cpu hotplug lock */ |
| 330 | static struct platform_device **zone_devices; |
| 331 | |
| 332 | static ssize_t show_label(struct device *dev, |
| 333 | struct device_attribute *devattr, char *buf) |
| 334 | { |
| 335 | struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr); |
| 336 | struct platform_data *pdata = dev_get_drvdata(dev); |
| 337 | struct temp_data *tdata = pdata->core_data[attr->index]; |
| 338 | |
| 339 | if (tdata->is_pkg_data) |
| 340 | return sprintf(buf, "Package id %u\n", pdata->pkg_id); |
| 341 | |
| 342 | return sprintf(buf, "Core %u\n", tdata->cpu_core_id); |
| 343 | } |
| 344 | |
| 345 | static ssize_t show_crit_alarm(struct device *dev, |
| 346 | struct device_attribute *devattr, char *buf) |
| 347 | { |
| 348 | u32 eax, edx; |
| 349 | struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr); |
| 350 | struct platform_data *pdata = dev_get_drvdata(dev); |
| 351 | struct temp_data *tdata = pdata->core_data[attr->index]; |
| 352 | |
| 353 | mutex_lock(&tdata->update_lock); |
| 354 | rdmsr_on_cpu(tdata->cpu, tdata->status_reg, &eax, &edx); |
| 355 | mutex_unlock(&tdata->update_lock); |
| 356 | |
| 357 | return sprintf(buf, "%d\n", (eax >> 5) & 1); |
| 358 | } |
| 359 | |
| 360 | static ssize_t show_tjmax(struct device *dev, |
| 361 | struct device_attribute *devattr, char *buf) |
| 362 | { |
| 363 | struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr); |
| 364 | struct platform_data *pdata = dev_get_drvdata(dev); |
| 365 | struct temp_data *tdata = pdata->core_data[attr->index]; |
| 366 | int tjmax; |
| 367 | |
| 368 | mutex_lock(&tdata->update_lock); |
| 369 | tjmax = get_tjmax(tdata, dev); |
| 370 | mutex_unlock(&tdata->update_lock); |
| 371 | |
| 372 | return sprintf(buf, "%d\n", tjmax); |
| 373 | } |
| 374 | |
| 375 | static ssize_t show_ttarget(struct device *dev, |
| 376 | struct device_attribute *devattr, char *buf) |
| 377 | { |
| 378 | struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr); |
| 379 | struct platform_data *pdata = dev_get_drvdata(dev); |
| 380 | struct temp_data *tdata = pdata->core_data[attr->index]; |
| 381 | int ttarget; |
| 382 | |
| 383 | mutex_lock(&tdata->update_lock); |
| 384 | ttarget = get_ttarget(tdata, dev); |
| 385 | mutex_unlock(&tdata->update_lock); |
| 386 | |
| 387 | if (ttarget < 0) |
| 388 | return ttarget; |
| 389 | return sprintf(buf, "%d\n", ttarget); |
| 390 | } |
| 391 | |
| 392 | static ssize_t show_temp(struct device *dev, |
| 393 | struct device_attribute *devattr, char *buf) |
| 394 | { |
| 395 | u32 eax, edx; |
| 396 | struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr); |
| 397 | struct platform_data *pdata = dev_get_drvdata(dev); |
| 398 | struct temp_data *tdata = pdata->core_data[attr->index]; |
| 399 | int tjmax; |
| 400 | |
| 401 | mutex_lock(&tdata->update_lock); |
| 402 | |
| 403 | tjmax = get_tjmax(tdata, dev); |
| 404 | /* Check whether the time interval has elapsed */ |
| 405 | if (time_after(jiffies, tdata->last_updated + HZ)) { |
| 406 | rdmsr_on_cpu(tdata->cpu, tdata->status_reg, &eax, &edx); |
| 407 | /* |
| 408 | * Ignore the valid bit. In all observed cases the register |
| 409 | * value is either low or zero if the valid bit is 0. |
| 410 | * Return it instead of reporting an error which doesn't |
| 411 | * really help at all. |
| 412 | */ |
| 413 | tdata->temp = tjmax - ((eax >> 16) & 0x7f) * 1000; |
| 414 | tdata->last_updated = jiffies; |
| 415 | } |
| 416 | |
| 417 | mutex_unlock(&tdata->update_lock); |
| 418 | return sprintf(buf, "%d\n", tdata->temp); |
| 419 | } |
| 420 | |
| 421 | static int create_core_attrs(struct temp_data *tdata, struct device *dev, |
| 422 | int index) |
| 423 | { |
| 424 | int i; |
| 425 | static ssize_t (*const rd_ptr[TOTAL_ATTRS]) (struct device *dev, |
| 426 | struct device_attribute *devattr, char *buf) = { |
| 427 | show_label, show_crit_alarm, show_temp, show_tjmax, |
| 428 | show_ttarget }; |
| 429 | static const char *const suffixes[TOTAL_ATTRS] = { |
| 430 | "label", "crit_alarm", "input", "crit", "max" |
| 431 | }; |
| 432 | |
| 433 | for (i = 0; i < tdata->attr_size; i++) { |
| 434 | /* |
| 435 | * We map the attr number to core id of the CPU |
| 436 | * The attr number is always core id + 2 |
| 437 | * The Pkgtemp will always show up as temp1_*, if available |
| 438 | */ |
| 439 | int attr_no = tdata->is_pkg_data ? 1 : tdata->cpu_core_id + 2; |
| 440 | |
| 441 | snprintf(tdata->attr_name[i], CORETEMP_NAME_LENGTH, |
| 442 | "temp%d_%s", attr_no, suffixes[i]); |
| 443 | sysfs_attr_init(&tdata->sd_attrs[i].dev_attr.attr); |
| 444 | tdata->sd_attrs[i].dev_attr.attr.name = tdata->attr_name[i]; |
| 445 | tdata->sd_attrs[i].dev_attr.attr.mode = 0444; |
| 446 | tdata->sd_attrs[i].dev_attr.show = rd_ptr[i]; |
| 447 | tdata->sd_attrs[i].index = index; |
| 448 | tdata->attrs[i] = &tdata->sd_attrs[i].dev_attr.attr; |
| 449 | } |
| 450 | tdata->attr_group.attrs = tdata->attrs; |
| 451 | return sysfs_create_group(&dev->kobj, &tdata->attr_group); |
| 452 | } |
| 453 | |
| 454 | |
| 455 | static int chk_ucode_version(unsigned int cpu) |
| 456 | { |
| 457 | struct cpuinfo_x86 *c = &cpu_data(cpu); |
| 458 | |
| 459 | /* |
| 460 | * Check if we have problem with errata AE18 of Core processors: |
| 461 | * Readings might stop update when processor visited too deep sleep, |
| 462 | * fixed for stepping D0 (6EC). |
| 463 | */ |
| 464 | if (c->x86_model == 0xe && c->x86_stepping < 0xc && c->microcode < 0x39) { |
| 465 | pr_err("Errata AE18 not fixed, update BIOS or microcode of the CPU!\n"); |
| 466 | return -ENODEV; |
| 467 | } |
| 468 | return 0; |
| 469 | } |
| 470 | |
| 471 | static struct platform_device *coretemp_get_pdev(unsigned int cpu) |
| 472 | { |
| 473 | int id = topology_logical_die_id(cpu); |
| 474 | |
| 475 | if (id >= 0 && id < max_zones) |
| 476 | return zone_devices[id]; |
| 477 | return NULL; |
| 478 | } |
| 479 | |
| 480 | static struct temp_data *init_temp_data(unsigned int cpu, int pkg_flag) |
| 481 | { |
| 482 | struct temp_data *tdata; |
| 483 | |
| 484 | tdata = kzalloc(sizeof(struct temp_data), GFP_KERNEL); |
| 485 | if (!tdata) |
| 486 | return NULL; |
| 487 | |
| 488 | tdata->status_reg = pkg_flag ? MSR_IA32_PACKAGE_THERM_STATUS : |
| 489 | MSR_IA32_THERM_STATUS; |
| 490 | tdata->is_pkg_data = pkg_flag; |
| 491 | tdata->cpu = cpu; |
| 492 | tdata->cpu_core_id = topology_core_id(cpu); |
| 493 | tdata->attr_size = MAX_CORE_ATTRS; |
| 494 | mutex_init(&tdata->update_lock); |
| 495 | return tdata; |
| 496 | } |
| 497 | |
| 498 | static int create_core_data(struct platform_device *pdev, unsigned int cpu, |
| 499 | int pkg_flag) |
| 500 | { |
| 501 | struct temp_data *tdata; |
| 502 | struct platform_data *pdata = platform_get_drvdata(pdev); |
| 503 | struct cpuinfo_x86 *c = &cpu_data(cpu); |
| 504 | u32 eax, edx; |
| 505 | int err, index; |
| 506 | |
| 507 | if (!housekeeping_cpu(cpu, HK_TYPE_MISC)) |
| 508 | return 0; |
| 509 | |
| 510 | /* |
| 511 | * Get the index of tdata in pdata->core_data[] |
| 512 | * tdata for package: pdata->core_data[1] |
| 513 | * tdata for core: pdata->core_data[2] .. pdata->core_data[NUM_REAL_CORES + 1] |
| 514 | */ |
| 515 | if (pkg_flag) { |
| 516 | index = PKG_SYSFS_ATTR_NO; |
| 517 | } else { |
| 518 | index = ida_alloc_max(&pdata->ida, NUM_REAL_CORES - 1, GFP_KERNEL); |
| 519 | if (index < 0) |
| 520 | return index; |
| 521 | |
| 522 | pdata->cpu_map[index] = topology_core_id(cpu); |
| 523 | index += BASE_SYSFS_ATTR_NO; |
| 524 | } |
| 525 | |
| 526 | tdata = init_temp_data(cpu, pkg_flag); |
| 527 | if (!tdata) { |
| 528 | err = -ENOMEM; |
| 529 | goto ida_free; |
| 530 | } |
| 531 | |
| 532 | /* Test if we can access the status register */ |
| 533 | err = rdmsr_safe_on_cpu(cpu, tdata->status_reg, &eax, &edx); |
| 534 | if (err) |
| 535 | goto exit_free; |
| 536 | |
| 537 | /* Make sure tdata->tjmax is a valid indicator for dynamic/static tjmax */ |
| 538 | get_tjmax(tdata, &pdev->dev); |
| 539 | |
| 540 | /* |
| 541 | * The target temperature is available on older CPUs but not in the |
| 542 | * MSR_IA32_TEMPERATURE_TARGET register. Atoms don't have the register |
| 543 | * at all. |
| 544 | */ |
| 545 | if (c->x86_model > 0xe && c->x86_model != 0x1c) |
| 546 | if (get_ttarget(tdata, &pdev->dev) >= 0) |
| 547 | tdata->attr_size++; |
| 548 | |
| 549 | pdata->core_data[index] = tdata; |
| 550 | |
| 551 | /* Create sysfs interfaces */ |
| 552 | err = create_core_attrs(tdata, pdata->hwmon_dev, index); |
| 553 | if (err) |
| 554 | goto exit_free; |
| 555 | |
| 556 | return 0; |
| 557 | exit_free: |
| 558 | pdata->core_data[index] = NULL; |
| 559 | kfree(tdata); |
| 560 | ida_free: |
| 561 | if (!pkg_flag) |
| 562 | ida_free(&pdata->ida, index - BASE_SYSFS_ATTR_NO); |
| 563 | return err; |
| 564 | } |
| 565 | |
| 566 | static void |
| 567 | coretemp_add_core(struct platform_device *pdev, unsigned int cpu, int pkg_flag) |
| 568 | { |
| 569 | if (create_core_data(pdev, cpu, pkg_flag)) |
| 570 | dev_err(&pdev->dev, "Adding Core %u failed\n", cpu); |
| 571 | } |
| 572 | |
| 573 | static void coretemp_remove_core(struct platform_data *pdata, int indx) |
| 574 | { |
| 575 | struct temp_data *tdata = pdata->core_data[indx]; |
| 576 | |
| 577 | /* if we errored on add then this is already gone */ |
| 578 | if (!tdata) |
| 579 | return; |
| 580 | |
| 581 | /* Remove the sysfs attributes */ |
| 582 | sysfs_remove_group(&pdata->hwmon_dev->kobj, &tdata->attr_group); |
| 583 | |
| 584 | kfree(pdata->core_data[indx]); |
| 585 | pdata->core_data[indx] = NULL; |
| 586 | |
| 587 | if (indx >= BASE_SYSFS_ATTR_NO) |
| 588 | ida_free(&pdata->ida, indx - BASE_SYSFS_ATTR_NO); |
| 589 | } |
| 590 | |
| 591 | static int coretemp_device_add(int zoneid) |
| 592 | { |
| 593 | struct platform_device *pdev; |
| 594 | struct platform_data *pdata; |
| 595 | int err; |
| 596 | |
| 597 | /* Initialize the per-zone data structures */ |
| 598 | pdata = kzalloc(sizeof(*pdata), GFP_KERNEL); |
| 599 | if (!pdata) |
| 600 | return -ENOMEM; |
| 601 | |
| 602 | pdata->pkg_id = zoneid; |
| 603 | ida_init(&pdata->ida); |
| 604 | |
| 605 | pdev = platform_device_alloc(DRVNAME, zoneid); |
| 606 | if (!pdev) { |
| 607 | err = -ENOMEM; |
| 608 | goto err_free_pdata; |
| 609 | } |
| 610 | |
| 611 | err = platform_device_add(pdev); |
| 612 | if (err) |
| 613 | goto err_put_dev; |
| 614 | |
| 615 | platform_set_drvdata(pdev, pdata); |
| 616 | zone_devices[zoneid] = pdev; |
| 617 | return 0; |
| 618 | |
| 619 | err_put_dev: |
| 620 | platform_device_put(pdev); |
| 621 | err_free_pdata: |
| 622 | kfree(pdata); |
| 623 | return err; |
| 624 | } |
| 625 | |
| 626 | static void coretemp_device_remove(int zoneid) |
| 627 | { |
| 628 | struct platform_device *pdev = zone_devices[zoneid]; |
| 629 | struct platform_data *pdata = platform_get_drvdata(pdev); |
| 630 | |
| 631 | ida_destroy(&pdata->ida); |
| 632 | kfree(pdata); |
| 633 | platform_device_unregister(pdev); |
| 634 | } |
| 635 | |
| 636 | static int coretemp_cpu_online(unsigned int cpu) |
| 637 | { |
| 638 | struct platform_device *pdev = coretemp_get_pdev(cpu); |
| 639 | struct cpuinfo_x86 *c = &cpu_data(cpu); |
| 640 | struct platform_data *pdata; |
| 641 | |
| 642 | /* |
| 643 | * Don't execute this on resume as the offline callback did |
| 644 | * not get executed on suspend. |
| 645 | */ |
| 646 | if (cpuhp_tasks_frozen) |
| 647 | return 0; |
| 648 | |
| 649 | /* |
| 650 | * CPUID.06H.EAX[0] indicates whether the CPU has thermal |
| 651 | * sensors. We check this bit only, all the early CPUs |
| 652 | * without thermal sensors will be filtered out. |
| 653 | */ |
| 654 | if (!cpu_has(c, X86_FEATURE_DTHERM)) |
| 655 | return -ENODEV; |
| 656 | |
| 657 | pdata = platform_get_drvdata(pdev); |
| 658 | if (!pdata->hwmon_dev) { |
| 659 | struct device *hwmon; |
| 660 | |
| 661 | /* Check the microcode version of the CPU */ |
| 662 | if (chk_ucode_version(cpu)) |
| 663 | return -EINVAL; |
| 664 | |
| 665 | /* |
| 666 | * Alright, we have DTS support. |
| 667 | * We are bringing the _first_ core in this pkg |
| 668 | * online. So, initialize per-pkg data structures and |
| 669 | * then bring this core online. |
| 670 | */ |
| 671 | hwmon = hwmon_device_register_with_groups(&pdev->dev, DRVNAME, |
| 672 | pdata, NULL); |
| 673 | if (IS_ERR(hwmon)) |
| 674 | return PTR_ERR(hwmon); |
| 675 | pdata->hwmon_dev = hwmon; |
| 676 | |
| 677 | /* |
| 678 | * Check whether pkgtemp support is available. |
| 679 | * If so, add interfaces for pkgtemp. |
| 680 | */ |
| 681 | if (cpu_has(c, X86_FEATURE_PTS)) |
| 682 | coretemp_add_core(pdev, cpu, 1); |
| 683 | } |
| 684 | |
| 685 | /* |
| 686 | * Check whether a thread sibling is already online. If not add the |
| 687 | * interface for this CPU core. |
| 688 | */ |
| 689 | if (!cpumask_intersects(&pdata->cpumask, topology_sibling_cpumask(cpu))) |
| 690 | coretemp_add_core(pdev, cpu, 0); |
| 691 | |
| 692 | cpumask_set_cpu(cpu, &pdata->cpumask); |
| 693 | return 0; |
| 694 | } |
| 695 | |
| 696 | static int coretemp_cpu_offline(unsigned int cpu) |
| 697 | { |
| 698 | struct platform_device *pdev = coretemp_get_pdev(cpu); |
| 699 | struct platform_data *pd; |
| 700 | struct temp_data *tdata; |
| 701 | int i, indx = -1, target; |
| 702 | |
| 703 | /* No need to tear down any interfaces for suspend */ |
| 704 | if (cpuhp_tasks_frozen) |
| 705 | return 0; |
| 706 | |
| 707 | /* If the physical CPU device does not exist, just return */ |
| 708 | pd = platform_get_drvdata(pdev); |
| 709 | if (!pd->hwmon_dev) |
| 710 | return 0; |
| 711 | |
| 712 | for (i = 0; i < NUM_REAL_CORES; i++) { |
| 713 | if (pd->cpu_map[i] == topology_core_id(cpu)) { |
| 714 | indx = i + BASE_SYSFS_ATTR_NO; |
| 715 | break; |
| 716 | } |
| 717 | } |
| 718 | |
| 719 | /* Too many cores and this core is not populated, just return */ |
| 720 | if (indx < 0) |
| 721 | return 0; |
| 722 | |
| 723 | tdata = pd->core_data[indx]; |
| 724 | |
| 725 | cpumask_clear_cpu(cpu, &pd->cpumask); |
| 726 | |
| 727 | /* |
| 728 | * If this is the last thread sibling, remove the CPU core |
| 729 | * interface, If there is still a sibling online, transfer the |
| 730 | * target cpu of that core interface to it. |
| 731 | */ |
| 732 | target = cpumask_any_and(&pd->cpumask, topology_sibling_cpumask(cpu)); |
| 733 | if (target >= nr_cpu_ids) { |
| 734 | coretemp_remove_core(pd, indx); |
| 735 | } else if (tdata && tdata->cpu == cpu) { |
| 736 | mutex_lock(&tdata->update_lock); |
| 737 | tdata->cpu = target; |
| 738 | mutex_unlock(&tdata->update_lock); |
| 739 | } |
| 740 | |
| 741 | /* |
| 742 | * If all cores in this pkg are offline, remove the interface. |
| 743 | */ |
| 744 | tdata = pd->core_data[PKG_SYSFS_ATTR_NO]; |
| 745 | if (cpumask_empty(&pd->cpumask)) { |
| 746 | if (tdata) |
| 747 | coretemp_remove_core(pd, PKG_SYSFS_ATTR_NO); |
| 748 | hwmon_device_unregister(pd->hwmon_dev); |
| 749 | pd->hwmon_dev = NULL; |
| 750 | return 0; |
| 751 | } |
| 752 | |
| 753 | /* |
| 754 | * Check whether this core is the target for the package |
| 755 | * interface. We need to assign it to some other cpu. |
| 756 | */ |
| 757 | if (tdata && tdata->cpu == cpu) { |
| 758 | target = cpumask_first(&pd->cpumask); |
| 759 | mutex_lock(&tdata->update_lock); |
| 760 | tdata->cpu = target; |
| 761 | mutex_unlock(&tdata->update_lock); |
| 762 | } |
| 763 | return 0; |
| 764 | } |
| 765 | static const struct x86_cpu_id __initconst coretemp_ids[] = { |
| 766 | X86_MATCH_VENDOR_FEATURE(INTEL, X86_FEATURE_DTHERM, NULL), |
| 767 | {} |
| 768 | }; |
| 769 | MODULE_DEVICE_TABLE(x86cpu, coretemp_ids); |
| 770 | |
| 771 | static enum cpuhp_state coretemp_hp_online; |
| 772 | |
| 773 | static int __init coretemp_init(void) |
| 774 | { |
| 775 | int i, err; |
| 776 | |
| 777 | /* |
| 778 | * CPUID.06H.EAX[0] indicates whether the CPU has thermal |
| 779 | * sensors. We check this bit only, all the early CPUs |
| 780 | * without thermal sensors will be filtered out. |
| 781 | */ |
| 782 | if (!x86_match_cpu(coretemp_ids)) |
| 783 | return -ENODEV; |
| 784 | |
| 785 | max_zones = topology_max_packages() * topology_max_dies_per_package(); |
| 786 | zone_devices = kcalloc(max_zones, sizeof(struct platform_device *), |
| 787 | GFP_KERNEL); |
| 788 | if (!zone_devices) |
| 789 | return -ENOMEM; |
| 790 | |
| 791 | for (i = 0; i < max_zones; i++) { |
| 792 | err = coretemp_device_add(i); |
| 793 | if (err) |
| 794 | goto outzone; |
| 795 | } |
| 796 | |
| 797 | err = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "hwmon/coretemp:online", |
| 798 | coretemp_cpu_online, coretemp_cpu_offline); |
| 799 | if (err < 0) |
| 800 | goto outzone; |
| 801 | coretemp_hp_online = err; |
| 802 | return 0; |
| 803 | |
| 804 | outzone: |
| 805 | while (i--) |
| 806 | coretemp_device_remove(i); |
| 807 | kfree(zone_devices); |
| 808 | return err; |
| 809 | } |
| 810 | module_init(coretemp_init) |
| 811 | |
| 812 | static void __exit coretemp_exit(void) |
| 813 | { |
| 814 | int i; |
| 815 | |
| 816 | cpuhp_remove_state(coretemp_hp_online); |
| 817 | for (i = 0; i < max_zones; i++) |
| 818 | coretemp_device_remove(i); |
| 819 | kfree(zone_devices); |
| 820 | } |
| 821 | module_exit(coretemp_exit) |
| 822 | |
| 823 | MODULE_AUTHOR("Rudolf Marek <r.marek@assembler.cz>"); |
| 824 | MODULE_DESCRIPTION("Intel Core temperature monitor"); |
| 825 | MODULE_LICENSE("GPL"); |