Commit | Line | Data |
---|---|---|
512d1027 AH |
1 | /* |
2 | * fam15h_power.c - AMD Family 15h processor power monitoring | |
3 | * | |
4 | * Copyright (c) 2011 Advanced Micro Devices, Inc. | |
d034fbf0 | 5 | * Author: Andreas Herrmann <herrmann.der.user@googlemail.com> |
512d1027 AH |
6 | * |
7 | * | |
8 | * This driver is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU General Public License; either | |
10 | * version 2 of the License, or (at your option) any later version. | |
11 | * | |
12 | * This driver is distributed in the hope that it will be useful, | |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |
15 | * See the GNU General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU General Public License | |
18 | * along with this driver; if not, see <http://www.gnu.org/licenses/>. | |
19 | */ | |
20 | ||
21 | #include <linux/err.h> | |
22 | #include <linux/hwmon.h> | |
23 | #include <linux/hwmon-sysfs.h> | |
24 | #include <linux/init.h> | |
25 | #include <linux/module.h> | |
26 | #include <linux/pci.h> | |
27 | #include <linux/bitops.h> | |
fa794344 HR |
28 | #include <linux/cpu.h> |
29 | #include <linux/cpumask.h> | |
512d1027 | 30 | #include <asm/processor.h> |
3b5ea47d | 31 | #include <asm/msr.h> |
512d1027 AH |
32 | |
33 | MODULE_DESCRIPTION("AMD Family 15h CPU processor power monitor"); | |
d034fbf0 | 34 | MODULE_AUTHOR("Andreas Herrmann <herrmann.der.user@googlemail.com>"); |
512d1027 AH |
35 | MODULE_LICENSE("GPL"); |
36 | ||
37 | /* D18F3 */ | |
38 | #define REG_NORTHBRIDGE_CAP 0xe8 | |
39 | ||
40 | /* D18F4 */ | |
41 | #define REG_PROCESSOR_TDP 0x1b8 | |
42 | ||
43 | /* D18F5 */ | |
44 | #define REG_TDP_RUNNING_AVERAGE 0xe0 | |
45 | #define REG_TDP_LIMIT3 0xe8 | |
46 | ||
7deb14b1 HR |
47 | #define FAM15H_MIN_NUM_ATTRS 2 |
48 | #define FAM15H_NUM_GROUPS 2 | |
fa794344 | 49 | #define MAX_CUS 8 |
7deb14b1 | 50 | |
fa794344 | 51 | #define MSR_F15H_CU_PWR_ACCUMULATOR 0xc001007a |
3b5ea47d | 52 | #define MSR_F15H_CU_MAX_PWR_ACCUMULATOR 0xc001007b |
cdb9e110 | 53 | #define MSR_F15H_PTSC 0xc0010280 |
3b5ea47d | 54 | |
eff2a945 HR |
55 | #define PCI_DEVICE_ID_AMD_15H_M70H_NB_F4 0x15b4 |
56 | ||
512d1027 | 57 | struct fam15h_power_data { |
562dc973 | 58 | struct pci_dev *pdev; |
512d1027 AH |
59 | unsigned int tdp_to_watts; |
60 | unsigned int base_tdp; | |
61 | unsigned int processor_pwr_watts; | |
1ed32160 | 62 | unsigned int cpu_pwr_sample_ratio; |
7deb14b1 HR |
63 | const struct attribute_group *groups[FAM15H_NUM_GROUPS]; |
64 | struct attribute_group group; | |
3b5ea47d HR |
65 | /* maximum accumulated power of a compute unit */ |
66 | u64 max_cu_acc_power; | |
fa794344 HR |
67 | /* accumulated power of the compute units */ |
68 | u64 cu_acc_power[MAX_CUS]; | |
cdb9e110 HR |
69 | /* performance timestamp counter */ |
70 | u64 cpu_sw_pwr_ptsc[MAX_CUS]; | |
512d1027 AH |
71 | }; |
72 | ||
73 | static ssize_t show_power(struct device *dev, | |
74 | struct device_attribute *attr, char *buf) | |
75 | { | |
76 | u32 val, tdp_limit, running_avg_range; | |
77 | s32 running_avg_capture; | |
78 | u64 curr_pwr_watts; | |
512d1027 | 79 | struct fam15h_power_data *data = dev_get_drvdata(dev); |
562dc973 | 80 | struct pci_dev *f4 = data->pdev; |
512d1027 AH |
81 | |
82 | pci_bus_read_config_dword(f4->bus, PCI_DEVFN(PCI_SLOT(f4->devfn), 5), | |
83 | REG_TDP_RUNNING_AVERAGE, &val); | |
e9cd4d55 HR |
84 | |
85 | /* | |
86 | * On Carrizo and later platforms, TdpRunAvgAccCap bit field | |
87 | * is extended to 4:31 from 4:25. | |
88 | */ | |
89 | if (boot_cpu_data.x86 == 0x15 && boot_cpu_data.x86_model >= 0x60) { | |
90 | running_avg_capture = val >> 4; | |
91 | running_avg_capture = sign_extend32(running_avg_capture, 27); | |
92 | } else { | |
93 | running_avg_capture = (val >> 4) & 0x3fffff; | |
94 | running_avg_capture = sign_extend32(running_avg_capture, 21); | |
95 | } | |
96 | ||
941a956b | 97 | running_avg_range = (val & 0xf) + 1; |
512d1027 AH |
98 | |
99 | pci_bus_read_config_dword(f4->bus, PCI_DEVFN(PCI_SLOT(f4->devfn), 5), | |
100 | REG_TDP_LIMIT3, &val); | |
101 | ||
60dee3ca GK |
102 | /* |
103 | * On Carrizo and later platforms, ApmTdpLimit bit field | |
104 | * is extended to 16:31 from 16:28. | |
105 | */ | |
106 | if (boot_cpu_data.x86 == 0x15 && boot_cpu_data.x86_model >= 0x60) | |
107 | tdp_limit = val >> 16; | |
108 | else | |
109 | tdp_limit = (val >> 16) & 0x1fff; | |
110 | ||
62867d49 GR |
111 | curr_pwr_watts = ((u64)(tdp_limit + |
112 | data->base_tdp)) << running_avg_range; | |
941a956b | 113 | curr_pwr_watts -= running_avg_capture; |
512d1027 AH |
114 | curr_pwr_watts *= data->tdp_to_watts; |
115 | ||
116 | /* | |
117 | * Convert to microWatt | |
118 | * | |
119 | * power is in Watt provided as fixed point integer with | |
120 | * scaling factor 1/(2^16). For conversion we use | |
121 | * (10^6)/(2^16) = 15625/(2^10) | |
122 | */ | |
941a956b | 123 | curr_pwr_watts = (curr_pwr_watts * 15625) >> (10 + running_avg_range); |
512d1027 AH |
124 | return sprintf(buf, "%u\n", (unsigned int) curr_pwr_watts); |
125 | } | |
126 | static DEVICE_ATTR(power1_input, S_IRUGO, show_power, NULL); | |
127 | ||
128 | static ssize_t show_power_crit(struct device *dev, | |
129 | struct device_attribute *attr, char *buf) | |
130 | { | |
131 | struct fam15h_power_data *data = dev_get_drvdata(dev); | |
132 | ||
133 | return sprintf(buf, "%u\n", data->processor_pwr_watts); | |
134 | } | |
135 | static DEVICE_ATTR(power1_crit, S_IRUGO, show_power_crit, NULL); | |
136 | ||
fa794344 HR |
137 | static void do_read_registers_on_cu(void *_data) |
138 | { | |
139 | struct fam15h_power_data *data = _data; | |
140 | int cpu, cu; | |
141 | ||
142 | cpu = smp_processor_id(); | |
143 | ||
144 | /* | |
145 | * With the new x86 topology modelling, cpu core id actually | |
146 | * is compute unit id. | |
147 | */ | |
148 | cu = cpu_data(cpu).cpu_core_id; | |
149 | ||
150 | rdmsrl_safe(MSR_F15H_CU_PWR_ACCUMULATOR, &data->cu_acc_power[cu]); | |
cdb9e110 | 151 | rdmsrl_safe(MSR_F15H_PTSC, &data->cpu_sw_pwr_ptsc[cu]); |
fa794344 HR |
152 | } |
153 | ||
154 | /* | |
155 | * This function is only able to be called when CPUID | |
156 | * Fn8000_0007:EDX[12] is set. | |
157 | */ | |
158 | static int read_registers(struct fam15h_power_data *data) | |
159 | { | |
160 | int this_cpu, ret, cpu; | |
161 | int core, this_core; | |
162 | cpumask_var_t mask; | |
163 | ||
164 | ret = zalloc_cpumask_var(&mask, GFP_KERNEL); | |
165 | if (!ret) | |
166 | return -ENOMEM; | |
167 | ||
168 | get_online_cpus(); | |
169 | this_cpu = smp_processor_id(); | |
170 | ||
171 | /* | |
172 | * Choose the first online core of each compute unit, and then | |
173 | * read their MSR value of power and ptsc in a single IPI, | |
174 | * because the MSR value of CPU core represent the compute | |
175 | * unit's. | |
176 | */ | |
177 | core = -1; | |
178 | ||
179 | for_each_online_cpu(cpu) { | |
180 | this_core = topology_core_id(cpu); | |
181 | ||
182 | if (this_core == core) | |
183 | continue; | |
184 | ||
185 | core = this_core; | |
186 | ||
187 | /* get any CPU on this compute unit */ | |
188 | cpumask_set_cpu(cpumask_any(topology_sibling_cpumask(cpu)), mask); | |
189 | } | |
190 | ||
191 | if (cpumask_test_cpu(this_cpu, mask)) | |
192 | do_read_registers_on_cu(data); | |
193 | ||
194 | smp_call_function_many(mask, do_read_registers_on_cu, data, true); | |
195 | put_online_cpus(); | |
196 | ||
197 | free_cpumask_var(mask); | |
198 | ||
199 | return 0; | |
200 | } | |
201 | ||
7deb14b1 HR |
202 | static int fam15h_power_init_attrs(struct pci_dev *pdev, |
203 | struct fam15h_power_data *data) | |
961a2378 | 204 | { |
7deb14b1 HR |
205 | int n = FAM15H_MIN_NUM_ATTRS; |
206 | struct attribute **fam15h_power_attrs; | |
46f29c2b | 207 | struct cpuinfo_x86 *c = &boot_cpu_data; |
961a2378 | 208 | |
46f29c2b HR |
209 | if (c->x86 == 0x15 && |
210 | (c->x86_model <= 0xf || | |
eff2a945 | 211 | (c->x86_model >= 0x60 && c->x86_model <= 0x7f))) |
7deb14b1 | 212 | n += 1; |
961a2378 | 213 | |
7deb14b1 HR |
214 | fam15h_power_attrs = devm_kcalloc(&pdev->dev, n, |
215 | sizeof(*fam15h_power_attrs), | |
216 | GFP_KERNEL); | |
512d1027 | 217 | |
7deb14b1 HR |
218 | if (!fam15h_power_attrs) |
219 | return -ENOMEM; | |
220 | ||
221 | n = 0; | |
222 | fam15h_power_attrs[n++] = &dev_attr_power1_crit.attr; | |
46f29c2b HR |
223 | if (c->x86 == 0x15 && |
224 | (c->x86_model <= 0xf || | |
eff2a945 | 225 | (c->x86_model >= 0x60 && c->x86_model <= 0x7f))) |
7deb14b1 HR |
226 | fam15h_power_attrs[n++] = &dev_attr_power1_input.attr; |
227 | ||
228 | data->group.attrs = fam15h_power_attrs; | |
229 | ||
230 | return 0; | |
231 | } | |
512d1027 | 232 | |
d83e92b3 | 233 | static bool should_load_on_this_node(struct pci_dev *f4) |
512d1027 AH |
234 | { |
235 | u32 val; | |
236 | ||
237 | pci_bus_read_config_dword(f4->bus, PCI_DEVFN(PCI_SLOT(f4->devfn), 3), | |
238 | REG_NORTHBRIDGE_CAP, &val); | |
239 | if ((val & BIT(29)) && ((val >> 30) & 3)) | |
240 | return false; | |
241 | ||
242 | return true; | |
243 | } | |
244 | ||
00250ec9 AP |
245 | /* |
246 | * Newer BKDG versions have an updated recommendation on how to properly | |
247 | * initialize the running average range (was: 0xE, now: 0x9). This avoids | |
248 | * counter saturations resulting in bogus power readings. | |
249 | * We correct this value ourselves to cope with older BIOSes. | |
250 | */ | |
5f0ecb90 | 251 | static const struct pci_device_id affected_device[] = { |
c3e40a99 GR |
252 | { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_15H_NB_F4) }, |
253 | { 0 } | |
254 | }; | |
255 | ||
5f0ecb90 | 256 | static void tweak_runavg_range(struct pci_dev *pdev) |
00250ec9 AP |
257 | { |
258 | u32 val; | |
00250ec9 AP |
259 | |
260 | /* | |
261 | * let this quirk apply only to the current version of the | |
262 | * northbridge, since future versions may change the behavior | |
263 | */ | |
c3e40a99 | 264 | if (!pci_match_id(affected_device, pdev)) |
00250ec9 AP |
265 | return; |
266 | ||
267 | pci_bus_read_config_dword(pdev->bus, | |
268 | PCI_DEVFN(PCI_SLOT(pdev->devfn), 5), | |
269 | REG_TDP_RUNNING_AVERAGE, &val); | |
270 | if ((val & 0xf) != 0xe) | |
271 | return; | |
272 | ||
273 | val &= ~0xf; | |
274 | val |= 0x9; | |
275 | pci_bus_write_config_dword(pdev->bus, | |
276 | PCI_DEVFN(PCI_SLOT(pdev->devfn), 5), | |
277 | REG_TDP_RUNNING_AVERAGE, val); | |
278 | } | |
279 | ||
5f0ecb90 AH |
280 | #ifdef CONFIG_PM |
281 | static int fam15h_power_resume(struct pci_dev *pdev) | |
282 | { | |
283 | tweak_runavg_range(pdev); | |
284 | return 0; | |
285 | } | |
286 | #else | |
287 | #define fam15h_power_resume NULL | |
288 | #endif | |
289 | ||
7deb14b1 HR |
290 | static int fam15h_power_init_data(struct pci_dev *f4, |
291 | struct fam15h_power_data *data) | |
512d1027 | 292 | { |
1ed32160 | 293 | u32 val, eax, ebx, ecx, edx; |
512d1027 | 294 | u64 tmp; |
7deb14b1 | 295 | int ret; |
512d1027 AH |
296 | |
297 | pci_read_config_dword(f4, REG_PROCESSOR_TDP, &val); | |
298 | data->base_tdp = val >> 16; | |
299 | tmp = val & 0xffff; | |
300 | ||
301 | pci_bus_read_config_dword(f4->bus, PCI_DEVFN(PCI_SLOT(f4->devfn), 5), | |
302 | REG_TDP_LIMIT3, &val); | |
303 | ||
304 | data->tdp_to_watts = ((val & 0x3ff) << 6) | ((val >> 10) & 0x3f); | |
305 | tmp *= data->tdp_to_watts; | |
306 | ||
307 | /* result not allowed to be >= 256W */ | |
308 | if ((tmp >> 16) >= 256) | |
b55f3757 GR |
309 | dev_warn(&f4->dev, |
310 | "Bogus value for ProcessorPwrWatts (processor_pwr_watts>=%u)\n", | |
512d1027 AH |
311 | (unsigned int) (tmp >> 16)); |
312 | ||
313 | /* convert to microWatt */ | |
314 | data->processor_pwr_watts = (tmp * 15625) >> 10; | |
1ed32160 | 315 | |
7deb14b1 HR |
316 | ret = fam15h_power_init_attrs(f4, data); |
317 | if (ret) | |
318 | return ret; | |
319 | ||
1ed32160 HR |
320 | cpuid(0x80000007, &eax, &ebx, &ecx, &edx); |
321 | ||
322 | /* CPUID Fn8000_0007:EDX[12] indicates to support accumulated power */ | |
323 | if (!(edx & BIT(12))) | |
7deb14b1 | 324 | return 0; |
1ed32160 HR |
325 | |
326 | /* | |
327 | * determine the ratio of the compute unit power accumulator | |
328 | * sample period to the PTSC counter period by executing CPUID | |
329 | * Fn8000_0007:ECX | |
330 | */ | |
331 | data->cpu_pwr_sample_ratio = ecx; | |
7deb14b1 | 332 | |
3b5ea47d HR |
333 | if (rdmsrl_safe(MSR_F15H_CU_MAX_PWR_ACCUMULATOR, &tmp)) { |
334 | pr_err("Failed to read max compute unit power accumulator MSR\n"); | |
335 | return -ENODEV; | |
336 | } | |
337 | ||
338 | data->max_cu_acc_power = tmp; | |
339 | ||
fa794344 | 340 | return read_registers(data); |
512d1027 AH |
341 | } |
342 | ||
6c931ae1 | 343 | static int fam15h_power_probe(struct pci_dev *pdev, |
7deb14b1 | 344 | const struct pci_device_id *id) |
512d1027 AH |
345 | { |
346 | struct fam15h_power_data *data; | |
87432a2e | 347 | struct device *dev = &pdev->dev; |
562dc973 | 348 | struct device *hwmon_dev; |
7deb14b1 | 349 | int ret; |
512d1027 | 350 | |
00250ec9 AP |
351 | /* |
352 | * though we ignore every other northbridge, we still have to | |
353 | * do the tweaking on _each_ node in MCM processors as the counters | |
354 | * are working hand-in-hand | |
355 | */ | |
356 | tweak_runavg_range(pdev); | |
357 | ||
d83e92b3 | 358 | if (!should_load_on_this_node(pdev)) |
87432a2e GR |
359 | return -ENODEV; |
360 | ||
361 | data = devm_kzalloc(dev, sizeof(struct fam15h_power_data), GFP_KERNEL); | |
362 | if (!data) | |
363 | return -ENOMEM; | |
512d1027 | 364 | |
7deb14b1 HR |
365 | ret = fam15h_power_init_data(pdev, data); |
366 | if (ret) | |
367 | return ret; | |
368 | ||
562dc973 | 369 | data->pdev = pdev; |
512d1027 | 370 | |
7deb14b1 HR |
371 | data->groups[0] = &data->group; |
372 | ||
562dc973 AL |
373 | hwmon_dev = devm_hwmon_device_register_with_groups(dev, "fam15h_power", |
374 | data, | |
7deb14b1 | 375 | &data->groups[0]); |
562dc973 | 376 | return PTR_ERR_OR_ZERO(hwmon_dev); |
512d1027 AH |
377 | } |
378 | ||
cd9bb056 | 379 | static const struct pci_device_id fam15h_power_id_table[] = { |
512d1027 | 380 | { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_15H_NB_F4) }, |
0a0039ad | 381 | { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_15H_M30H_NB_F4) }, |
5dc08725 | 382 | { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_15H_M60H_NB_F4) }, |
eff2a945 | 383 | { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_15H_M70H_NB_F4) }, |
22e32f4f | 384 | { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_16H_NB_F4) }, |
0bd52941 | 385 | { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F4) }, |
512d1027 AH |
386 | {} |
387 | }; | |
388 | MODULE_DEVICE_TABLE(pci, fam15h_power_id_table); | |
389 | ||
390 | static struct pci_driver fam15h_power_driver = { | |
391 | .name = "fam15h_power", | |
392 | .id_table = fam15h_power_id_table, | |
393 | .probe = fam15h_power_probe, | |
5f0ecb90 | 394 | .resume = fam15h_power_resume, |
512d1027 AH |
395 | }; |
396 | ||
f71f5a55 | 397 | module_pci_driver(fam15h_power_driver); |