nouveau: offload fence uevents work to workqueue
[linux-block.git] / drivers / powercap / intel_rapl_common.c
CommitLineData
f6cc69f1 1// SPDX-License-Identifier: GPL-2.0-only
2d281d81 2/*
3382388d
ZR
3 * Common code for Intel Running Average Power Limit (RAPL) support.
4 * Copyright (c) 2019, Intel Corporation.
2d281d81
JP
5 */
6#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7
8#include <linux/kernel.h>
9#include <linux/module.h>
10#include <linux/list.h>
11#include <linux/types.h>
12#include <linux/device.h>
13#include <linux/slab.h>
14#include <linux/log2.h>
15#include <linux/bitmap.h>
16#include <linux/delay.h>
17#include <linux/sysfs.h>
18#include <linux/cpu.h>
19#include <linux/powercap.h>
52b3672c 20#include <linux/suspend.h>
ff956826 21#include <linux/intel_rapl.h>
3382388d 22#include <linux/processor.h>
abcfaeb3
ZR
23#include <linux/platform_device.h>
24
25#include <asm/iosf_mbi.h>
2d281d81 26#include <asm/cpu_device_id.h>
62d16733 27#include <asm/intel-family.h>
2d281d81
JP
28
29/* bitmasks for RAPL MSRs, used by primitive access functions */
30#define ENERGY_STATUS_MASK 0xffffffff
31
32#define POWER_LIMIT1_MASK 0x7FFF
33#define POWER_LIMIT1_ENABLE BIT(15)
34#define POWER_LIMIT1_CLAMP BIT(16)
35
36#define POWER_LIMIT2_MASK (0x7FFFULL<<32)
37#define POWER_LIMIT2_ENABLE BIT_ULL(47)
38#define POWER_LIMIT2_CLAMP BIT_ULL(48)
0c2ddedd
ZR
39#define POWER_HIGH_LOCK BIT_ULL(63)
40#define POWER_LOW_LOCK BIT(31)
2d281d81 41
8365a898
SP
42#define POWER_LIMIT4_MASK 0x1FFF
43
2d281d81
JP
44#define TIME_WINDOW1_MASK (0x7FULL<<17)
45#define TIME_WINDOW2_MASK (0x7FULL<<49)
46
47#define POWER_UNIT_OFFSET 0
48#define POWER_UNIT_MASK 0x0F
49
50#define ENERGY_UNIT_OFFSET 0x08
51#define ENERGY_UNIT_MASK 0x1F00
52
53#define TIME_UNIT_OFFSET 0x10
54#define TIME_UNIT_MASK 0xF0000
55
56#define POWER_INFO_MAX_MASK (0x7fffULL<<32)
57#define POWER_INFO_MIN_MASK (0x7fffULL<<16)
58#define POWER_INFO_MAX_TIME_WIN_MASK (0x3fULL<<48)
59#define POWER_INFO_THERMAL_SPEC_MASK 0x7fff
60
61#define PERF_STATUS_THROTTLE_TIME_MASK 0xffffffff
62#define PP_POLICY_MASK 0x1F
63
931da6a0
ZR
64/*
65 * SPR has different layout for Psys Domain PowerLimit registers.
66 * There are 17 bits of PL1 and PL2 instead of 15 bits.
67 * The Enable bits and TimeWindow bits are also shifted as a result.
68 */
69#define PSYS_POWER_LIMIT1_MASK 0x1FFFF
70#define PSYS_POWER_LIMIT1_ENABLE BIT(17)
71
72#define PSYS_POWER_LIMIT2_MASK (0x1FFFFULL<<32)
73#define PSYS_POWER_LIMIT2_ENABLE BIT_ULL(49)
74
75#define PSYS_TIME_WINDOW1_MASK (0x7FULL<<19)
76#define PSYS_TIME_WINDOW2_MASK (0x7FULL<<51)
77
e12dee18
ZR
78/* bitmasks for RAPL TPMI, used by primitive access functions */
79#define TPMI_POWER_LIMIT_MASK 0x3FFFF
80#define TPMI_POWER_LIMIT_ENABLE BIT_ULL(62)
81#define TPMI_TIME_WINDOW_MASK (0x7FULL<<18)
82#define TPMI_INFO_SPEC_MASK 0x3FFFF
83#define TPMI_INFO_MIN_MASK (0x3FFFFULL << 18)
84#define TPMI_INFO_MAX_MASK (0x3FFFFULL << 36)
85#define TPMI_INFO_MAX_TIME_WIN_MASK (0x7FULL << 54)
86
2d281d81 87/* Non HW constants */
3382388d 88#define RAPL_PRIMITIVE_DERIVED BIT(1) /* not from raw data */
2d281d81
JP
89#define RAPL_PRIMITIVE_DUMMY BIT(2)
90
2d281d81
JP
91#define TIME_WINDOW_MAX_MSEC 40000
92#define TIME_WINDOW_MIN_MSEC 250
3382388d 93#define ENERGY_UNIT_SCALE 1000 /* scale from driver unit to powercap unit */
2d281d81 94enum unit_type {
3382388d 95 ARBITRARY_UNIT, /* no translation */
2d281d81
JP
96 POWER_UNIT,
97 ENERGY_UNIT,
98 TIME_UNIT,
99};
100
2d281d81 101/* per domain data, some are optional */
2d281d81
JP
102#define NR_RAW_PRIMITIVES (NR_RAPL_PRIMITIVES - 2)
103
2d281d81
JP
104#define DOMAIN_STATE_INACTIVE BIT(0)
105#define DOMAIN_STATE_POWER_LIMIT_SET BIT(1)
2d281d81 106
9050a9cd
ZR
107static const char *pl_names[NR_POWER_LIMITS] = {
108 [POWER_LIMIT1] = "long_term",
109 [POWER_LIMIT2] = "short_term",
110 [POWER_LIMIT4] = "peak_power",
111};
112
113enum pl_prims {
114 PL_ENABLE,
115 PL_CLAMP,
116 PL_LIMIT,
117 PL_TIME_WINDOW,
118 PL_MAX_POWER,
f442bd27 119 PL_LOCK,
9050a9cd
ZR
120};
121
122static bool is_pl_valid(struct rapl_domain *rd, int pl)
123{
124 if (pl < POWER_LIMIT1 || pl > POWER_LIMIT4)
125 return false;
126 return rd->rpl[pl].name ? true : false;
127}
128
f442bd27
ZR
129static int get_pl_lock_prim(struct rapl_domain *rd, int pl)
130{
e12dee18
ZR
131 if (rd->rp->priv->type == RAPL_IF_TPMI) {
132 if (pl == POWER_LIMIT1)
133 return PL1_LOCK;
134 if (pl == POWER_LIMIT2)
135 return PL2_LOCK;
136 if (pl == POWER_LIMIT4)
137 return PL4_LOCK;
138 }
139
140 /* MSR/MMIO Interface doesn't have Lock bit for PL4 */
141 if (pl == POWER_LIMIT4)
142 return -EINVAL;
143
f442bd27
ZR
144 /*
145 * Power Limit register that supports two power limits has a different
146 * bit position for the Lock bit.
147 */
148 if (rd->rp->priv->limits[rd->id] & BIT(POWER_LIMIT2))
149 return FW_HIGH_LOCK;
150 return FW_LOCK;
151}
152
153static int get_pl_prim(struct rapl_domain *rd, int pl, enum pl_prims prim)
9050a9cd
ZR
154{
155 switch (pl) {
156 case POWER_LIMIT1:
157 if (prim == PL_ENABLE)
158 return PL1_ENABLE;
e12dee18 159 if (prim == PL_CLAMP && rd->rp->priv->type != RAPL_IF_TPMI)
9050a9cd
ZR
160 return PL1_CLAMP;
161 if (prim == PL_LIMIT)
162 return POWER_LIMIT1;
163 if (prim == PL_TIME_WINDOW)
164 return TIME_WINDOW1;
165 if (prim == PL_MAX_POWER)
166 return THERMAL_SPEC_POWER;
f442bd27
ZR
167 if (prim == PL_LOCK)
168 return get_pl_lock_prim(rd, pl);
9050a9cd
ZR
169 return -EINVAL;
170 case POWER_LIMIT2:
171 if (prim == PL_ENABLE)
172 return PL2_ENABLE;
e12dee18 173 if (prim == PL_CLAMP && rd->rp->priv->type != RAPL_IF_TPMI)
9050a9cd
ZR
174 return PL2_CLAMP;
175 if (prim == PL_LIMIT)
176 return POWER_LIMIT2;
177 if (prim == PL_TIME_WINDOW)
178 return TIME_WINDOW2;
179 if (prim == PL_MAX_POWER)
180 return MAX_POWER;
f442bd27
ZR
181 if (prim == PL_LOCK)
182 return get_pl_lock_prim(rd, pl);
9050a9cd
ZR
183 return -EINVAL;
184 case POWER_LIMIT4:
185 if (prim == PL_LIMIT)
186 return POWER_LIMIT4;
187 if (prim == PL_ENABLE)
188 return PL4_ENABLE;
189 /* PL4 would be around two times PL2, use same prim as PL2. */
190 if (prim == PL_MAX_POWER)
191 return MAX_POWER;
e12dee18
ZR
192 if (prim == PL_LOCK)
193 return get_pl_lock_prim(rd, pl);
9050a9cd
ZR
194 return -EINVAL;
195 default:
196 return -EINVAL;
197 }
198}
2d281d81 199
2d281d81
JP
200#define power_zone_to_rapl_domain(_zone) \
201 container_of(_zone, struct rapl_domain, power_zone)
202
087e9cba 203struct rapl_defaults {
51b63409 204 u8 floor_freq_reg_addr;
693c1d78 205 int (*check_unit)(struct rapl_domain *rd);
087e9cba 206 void (*set_floor_freq)(struct rapl_domain *rd, bool mode);
cb532e72 207 u64 (*compute_time_window)(struct rapl_domain *rd, u64 val,
3382388d 208 bool to_raw);
d474a4d3 209 unsigned int dram_domain_energy_unit;
2d798d9f 210 unsigned int psys_domain_energy_unit;
931da6a0 211 bool spr_psys_bits;
087e9cba 212};
b4288ce7 213static struct rapl_defaults *defaults_msr;
e12dee18 214static const struct rapl_defaults defaults_tpmi;
087e9cba 215
e8e28c2a
ZR
216static struct rapl_defaults *get_defaults(struct rapl_package *rp)
217{
218 return rp->priv->defaults;
219}
220
3c2c0845 221/* Sideband MBI registers */
51b63409
AT
222#define IOSF_CPU_POWER_BUDGET_CTL_BYT (0x2)
223#define IOSF_CPU_POWER_BUDGET_CTL_TNG (0xdf)
3c2c0845 224
2d281d81
JP
225#define PACKAGE_PLN_INT_SAVED BIT(0)
226#define MAX_PRIM_NAME (32)
227
228/* per domain data. used to describe individual knobs such that access function
229 * can be consolidated into one instead of many inline functions.
230 */
231struct rapl_primitive_info {
232 const char *name;
233 u64 mask;
234 int shift;
f7c4e0c8 235 enum rapl_domain_reg_id id;
2d281d81
JP
236 enum unit_type unit;
237 u32 flag;
238};
239
240#define PRIMITIVE_INFO_INIT(p, m, s, i, u, f) { \
241 .name = #p, \
242 .mask = m, \
243 .shift = s, \
244 .id = i, \
245 .unit = u, \
246 .flag = f \
247 }
248
249static void rapl_init_domains(struct rapl_package *rp);
250static int rapl_read_data_raw(struct rapl_domain *rd,
3382388d
ZR
251 enum rapl_primitives prim,
252 bool xlate, u64 *data);
2d281d81 253static int rapl_write_data_raw(struct rapl_domain *rd,
3382388d
ZR
254 enum rapl_primitives prim,
255 unsigned long long value);
9050a9cd
ZR
256static int rapl_read_pl_data(struct rapl_domain *rd, int pl,
257 enum pl_prims pl_prim,
258 bool xlate, u64 *data);
259static int rapl_write_pl_data(struct rapl_domain *rd, int pl,
260 enum pl_prims pl_prim,
261 unsigned long long value);
309557f5 262static u64 rapl_unit_xlate(struct rapl_domain *rd,
3382388d 263 enum unit_type type, u64 value, int to_raw);
309557f5 264static void package_power_limit_irq_save(struct rapl_package *rp);
2d281d81 265
3382388d 266static LIST_HEAD(rapl_packages); /* guarded by CPU hotplug lock */
2d281d81 267
3382388d 268static const char *const rapl_domain_names[] = {
2d281d81
JP
269 "package",
270 "core",
271 "uncore",
272 "dram",
3521ba1c 273 "psys",
2d281d81
JP
274};
275
3382388d
ZR
276static int get_energy_counter(struct powercap_zone *power_zone,
277 u64 *energy_raw)
2d281d81
JP
278{
279 struct rapl_domain *rd;
280 u64 energy_now;
281
282 /* prevent CPU hotplug, make sure the RAPL domain does not go
283 * away while reading the counter.
284 */
5d4c779c 285 cpus_read_lock();
2d281d81
JP
286 rd = power_zone_to_rapl_domain(power_zone);
287
288 if (!rapl_read_data_raw(rd, ENERGY_COUNTER, true, &energy_now)) {
289 *energy_raw = energy_now;
5d4c779c 290 cpus_read_unlock();
2d281d81
JP
291
292 return 0;
293 }
5d4c779c 294 cpus_read_unlock();
2d281d81
JP
295
296 return -EIO;
297}
298
299static int get_max_energy_counter(struct powercap_zone *pcd_dev, u64 *energy)
300{
d474a4d3
JP
301 struct rapl_domain *rd = power_zone_to_rapl_domain(pcd_dev);
302
309557f5 303 *energy = rapl_unit_xlate(rd, ENERGY_UNIT, ENERGY_STATUS_MASK, 0);
2d281d81
JP
304 return 0;
305}
306
307static int release_zone(struct powercap_zone *power_zone)
308{
309 struct rapl_domain *rd = power_zone_to_rapl_domain(power_zone);
309557f5 310 struct rapl_package *rp = rd->rp;
2d281d81
JP
311
312 /* package zone is the last zone of a package, we can free
313 * memory here since all children has been unregistered.
314 */
315 if (rd->id == RAPL_DOMAIN_PACKAGE) {
2d281d81
JP
316 kfree(rd);
317 rp->domains = NULL;
318 }
319
320 return 0;
321
322}
323
324static int find_nr_power_limit(struct rapl_domain *rd)
325{
e1399ba2 326 int i, nr_pl = 0;
2d281d81
JP
327
328 for (i = 0; i < NR_POWER_LIMITS; i++) {
9050a9cd 329 if (is_pl_valid(rd, i))
e1399ba2 330 nr_pl++;
2d281d81
JP
331 }
332
e1399ba2 333 return nr_pl;
2d281d81
JP
334}
335
336static int set_domain_enable(struct powercap_zone *power_zone, bool mode)
337{
338 struct rapl_domain *rd = power_zone_to_rapl_domain(power_zone);
e8e28c2a 339 struct rapl_defaults *defaults = get_defaults(rd->rp);
9050a9cd 340 int ret;
3c2c0845 341
5d4c779c 342 cpus_read_lock();
9050a9cd
ZR
343 ret = rapl_write_pl_data(rd, POWER_LIMIT1, PL_ENABLE, mode);
344 if (!ret && defaults->set_floor_freq)
e8e28c2a 345 defaults->set_floor_freq(rd, mode);
5d4c779c 346 cpus_read_unlock();
2d281d81 347
9050a9cd 348 return ret;
2d281d81
JP
349}
350
351static int get_domain_enable(struct powercap_zone *power_zone, bool *mode)
352{
353 struct rapl_domain *rd = power_zone_to_rapl_domain(power_zone);
354 u64 val;
9050a9cd 355 int ret;
2d281d81 356
f442bd27 357 if (rd->rpl[POWER_LIMIT1].locked) {
2d281d81
JP
358 *mode = false;
359 return 0;
360 }
5d4c779c 361 cpus_read_lock();
9050a9cd
ZR
362 ret = rapl_read_pl_data(rd, POWER_LIMIT1, PL_ENABLE, true, &val);
363 if (!ret)
364 *mode = val;
5d4c779c 365 cpus_read_unlock();
2d281d81 366
9050a9cd 367 return ret;
2d281d81
JP
368}
369
370/* per RAPL domain ops, in the order of rapl_domain_type */
600c395b 371static const struct powercap_zone_ops zone_ops[] = {
2d281d81
JP
372 /* RAPL_DOMAIN_PACKAGE */
373 {
3382388d
ZR
374 .get_energy_uj = get_energy_counter,
375 .get_max_energy_range_uj = get_max_energy_counter,
376 .release = release_zone,
377 .set_enable = set_domain_enable,
378 .get_enable = get_domain_enable,
379 },
2d281d81
JP
380 /* RAPL_DOMAIN_PP0 */
381 {
3382388d
ZR
382 .get_energy_uj = get_energy_counter,
383 .get_max_energy_range_uj = get_max_energy_counter,
384 .release = release_zone,
385 .set_enable = set_domain_enable,
386 .get_enable = get_domain_enable,
387 },
2d281d81
JP
388 /* RAPL_DOMAIN_PP1 */
389 {
3382388d
ZR
390 .get_energy_uj = get_energy_counter,
391 .get_max_energy_range_uj = get_max_energy_counter,
392 .release = release_zone,
393 .set_enable = set_domain_enable,
394 .get_enable = get_domain_enable,
395 },
2d281d81
JP
396 /* RAPL_DOMAIN_DRAM */
397 {
3382388d
ZR
398 .get_energy_uj = get_energy_counter,
399 .get_max_energy_range_uj = get_max_energy_counter,
400 .release = release_zone,
401 .set_enable = set_domain_enable,
402 .get_enable = get_domain_enable,
403 },
3521ba1c
SP
404 /* RAPL_DOMAIN_PLATFORM */
405 {
3382388d
ZR
406 .get_energy_uj = get_energy_counter,
407 .get_max_energy_range_uj = get_max_energy_counter,
408 .release = release_zone,
409 .set_enable = set_domain_enable,
410 .get_enable = get_domain_enable,
411 },
2d281d81
JP
412};
413
e1399ba2
JP
414/*
415 * Constraint index used by powercap can be different than power limit (PL)
3382388d 416 * index in that some PLs maybe missing due to non-existent MSRs. So we
e1399ba2
JP
417 * need to convert here by finding the valid PLs only (name populated).
418 */
419static int contraint_to_pl(struct rapl_domain *rd, int cid)
420{
421 int i, j;
422
9050a9cd
ZR
423 for (i = POWER_LIMIT1, j = 0; i < NR_POWER_LIMITS; i++) {
424 if (is_pl_valid(rd, i) && j++ == cid) {
e1399ba2
JP
425 pr_debug("%s: index %d\n", __func__, i);
426 return i;
427 }
428 }
cb43f81b 429 pr_err("Cannot find matching power limit for constraint %d\n", cid);
e1399ba2
JP
430
431 return -EINVAL;
432}
433
434static int set_power_limit(struct powercap_zone *power_zone, int cid,
3382388d 435 u64 power_limit)
2d281d81
JP
436{
437 struct rapl_domain *rd;
438 struct rapl_package *rp;
439 int ret = 0;
e1399ba2 440 int id;
2d281d81 441
5d4c779c 442 cpus_read_lock();
2d281d81 443 rd = power_zone_to_rapl_domain(power_zone);
e1399ba2 444 id = contraint_to_pl(rd, cid);
309557f5 445 rp = rd->rp;
2d281d81 446
9050a9cd 447 ret = rapl_write_pl_data(rd, id, PL_LIMIT, power_limit);
2d281d81 448 if (!ret)
309557f5 449 package_power_limit_irq_save(rp);
5d4c779c 450 cpus_read_unlock();
2d281d81
JP
451 return ret;
452}
453
e1399ba2 454static int get_current_power_limit(struct powercap_zone *power_zone, int cid,
3382388d 455 u64 *data)
2d281d81
JP
456{
457 struct rapl_domain *rd;
458 u64 val;
2d281d81 459 int ret = 0;
e1399ba2 460 int id;
2d281d81 461
5d4c779c 462 cpus_read_lock();
2d281d81 463 rd = power_zone_to_rapl_domain(power_zone);
e1399ba2 464 id = contraint_to_pl(rd, cid);
cb43f81b 465
9050a9cd
ZR
466 ret = rapl_read_pl_data(rd, id, PL_LIMIT, true, &val);
467 if (!ret)
2d281d81
JP
468 *data = val;
469
5d4c779c 470 cpus_read_unlock();
2d281d81
JP
471
472 return ret;
473}
474
e1399ba2 475static int set_time_window(struct powercap_zone *power_zone, int cid,
3382388d 476 u64 window)
2d281d81
JP
477{
478 struct rapl_domain *rd;
479 int ret = 0;
e1399ba2 480 int id;
2d281d81 481
5d4c779c 482 cpus_read_lock();
2d281d81 483 rd = power_zone_to_rapl_domain(power_zone);
e1399ba2
JP
484 id = contraint_to_pl(rd, cid);
485
9050a9cd 486 ret = rapl_write_pl_data(rd, id, PL_TIME_WINDOW, window);
cb43f81b 487
5d4c779c 488 cpus_read_unlock();
2d281d81
JP
489 return ret;
490}
491
3382388d
ZR
492static int get_time_window(struct powercap_zone *power_zone, int cid,
493 u64 *data)
2d281d81
JP
494{
495 struct rapl_domain *rd;
496 u64 val;
497 int ret = 0;
e1399ba2 498 int id;
2d281d81 499
5d4c779c 500 cpus_read_lock();
2d281d81 501 rd = power_zone_to_rapl_domain(power_zone);
e1399ba2
JP
502 id = contraint_to_pl(rd, cid);
503
9050a9cd 504 ret = rapl_read_pl_data(rd, id, PL_TIME_WINDOW, true, &val);
2d281d81
JP
505 if (!ret)
506 *data = val;
cb43f81b 507
5d4c779c 508 cpus_read_unlock();
2d281d81
JP
509
510 return ret;
511}
512
3382388d
ZR
513static const char *get_constraint_name(struct powercap_zone *power_zone,
514 int cid)
2d281d81 515{
2d281d81 516 struct rapl_domain *rd;
e1399ba2 517 int id;
2d281d81
JP
518
519 rd = power_zone_to_rapl_domain(power_zone);
e1399ba2
JP
520 id = contraint_to_pl(rd, cid);
521 if (id >= 0)
522 return rd->rpl[id].name;
2d281d81 523
e1399ba2 524 return NULL;
2d281d81
JP
525}
526
9050a9cd 527static int get_max_power(struct powercap_zone *power_zone, int cid, u64 *data)
2d281d81
JP
528{
529 struct rapl_domain *rd;
530 u64 val;
2d281d81 531 int ret = 0;
9050a9cd 532 int id;
2d281d81 533
5d4c779c 534 cpus_read_lock();
2d281d81 535 rd = power_zone_to_rapl_domain(power_zone);
9050a9cd
ZR
536 id = contraint_to_pl(rd, cid);
537
538 ret = rapl_read_pl_data(rd, id, PL_MAX_POWER, true, &val);
539 if (!ret)
2d281d81
JP
540 *data = val;
541
8365a898 542 /* As a generalization rule, PL4 would be around two times PL2. */
9050a9cd 543 if (id == POWER_LIMIT4)
8365a898
SP
544 *data = *data * 2;
545
5d4c779c 546 cpus_read_unlock();
2d281d81
JP
547
548 return ret;
549}
550
600c395b 551static const struct powercap_zone_constraint_ops constraint_ops = {
2d281d81
JP
552 .set_power_limit_uw = set_power_limit,
553 .get_power_limit_uw = get_current_power_limit,
554 .set_time_window_us = set_time_window,
555 .get_time_window_us = get_time_window,
556 .get_max_power_uw = get_max_power,
557 .get_name = get_constraint_name,
558};
559
bf44b901
ZR
560/* Return the id used for read_raw/write_raw callback */
561static int get_rid(struct rapl_package *rp)
562{
563 return rp->lead_cpu >= 0 ? rp->lead_cpu : rp->id;
564}
565
2d281d81
JP
566/* called after domain detection and package level data are set */
567static void rapl_init_domains(struct rapl_package *rp)
568{
0c2ddedd
ZR
569 enum rapl_domain_type i;
570 enum rapl_domain_reg_id j;
2d281d81
JP
571 struct rapl_domain *rd = rp->domains;
572
573 for (i = 0; i < RAPL_DOMAIN_MAX; i++) {
574 unsigned int mask = rp->domain_map & (1 << i);
9050a9cd 575 int t;
7fde2712 576
0c2ddedd
ZR
577 if (!mask)
578 continue;
579
580 rd->rp = rp;
f1e8d756
ZR
581
582 if (i == RAPL_DOMAIN_PLATFORM && rp->id > 0) {
583 snprintf(rd->name, RAPL_DOMAIN_NAME_LENGTH, "psys-%d",
bf44b901
ZR
584 rp->lead_cpu >= 0 ? topology_physical_package_id(rp->lead_cpu) :
585 rp->id);
586 } else {
f1e8d756
ZR
587 snprintf(rd->name, RAPL_DOMAIN_NAME_LENGTH, "%s",
588 rapl_domain_names[i]);
bf44b901 589 }
f1e8d756 590
0c2ddedd 591 rd->id = i;
a38f300b
ZR
592
593 /* PL1 is supported by default */
594 rp->priv->limits[i] |= BIT(POWER_LIMIT1);
8365a898 595
9050a9cd
ZR
596 for (t = POWER_LIMIT1; t < NR_POWER_LIMITS; t++) {
597 if (rp->priv->limits[i] & BIT(t))
598 rd->rpl[t].name = pl_names[t];
8365a898
SP
599 }
600
0c2ddedd
ZR
601 for (j = 0; j < RAPL_DOMAIN_REG_MAX; j++)
602 rd->regs[j] = rp->priv->regs[i][j];
603
0c2ddedd 604 rd++;
2d281d81
JP
605 }
606}
607
309557f5 608static u64 rapl_unit_xlate(struct rapl_domain *rd, enum unit_type type,
3382388d 609 u64 value, int to_raw)
2d281d81 610{
3c2c0845 611 u64 units = 1;
cb532e72 612 struct rapl_defaults *defaults = get_defaults(rd->rp);
d474a4d3 613 u64 scale = 1;
2d281d81 614
2d281d81
JP
615 switch (type) {
616 case POWER_UNIT:
cb532e72 617 units = rd->power_unit;
2d281d81
JP
618 break;
619 case ENERGY_UNIT:
d474a4d3 620 scale = ENERGY_UNIT_SCALE;
cb532e72 621 units = rd->energy_unit;
2d281d81
JP
622 break;
623 case TIME_UNIT:
cb532e72 624 return defaults->compute_time_window(rd, value, to_raw);
2d281d81
JP
625 case ARBITRARY_UNIT:
626 default:
627 return value;
a8193af7 628 }
2d281d81
JP
629
630 if (to_raw)
d474a4d3 631 return div64_u64(value, units) * scale;
3c2c0845
JP
632
633 value *= units;
634
d474a4d3 635 return div64_u64(value, scale);
2d281d81
JP
636}
637
e12dee18 638/* RAPL primitives for MSR and MMIO I/F */
b4288ce7 639static struct rapl_primitive_info rpi_msr[NR_RAPL_PRIMITIVES] = {
2d281d81 640 /* name, mask, shift, msr index, unit divisor */
11edbe5c 641 [POWER_LIMIT1] = PRIMITIVE_INFO_INIT(POWER_LIMIT1, POWER_LIMIT1_MASK, 0,
3382388d 642 RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0),
11edbe5c 643 [POWER_LIMIT2] = PRIMITIVE_INFO_INIT(POWER_LIMIT2, POWER_LIMIT2_MASK, 32,
3382388d 644 RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0),
11edbe5c 645 [POWER_LIMIT4] = PRIMITIVE_INFO_INIT(POWER_LIMIT4, POWER_LIMIT4_MASK, 0,
8365a898 646 RAPL_DOMAIN_REG_PL4, POWER_UNIT, 0),
045610c3
ZR
647 [ENERGY_COUNTER] = PRIMITIVE_INFO_INIT(ENERGY_COUNTER, ENERGY_STATUS_MASK, 0,
648 RAPL_DOMAIN_REG_STATUS, ENERGY_UNIT, 0),
11edbe5c 649 [FW_LOCK] = PRIMITIVE_INFO_INIT(FW_LOCK, POWER_LOW_LOCK, 31,
3382388d 650 RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
f442bd27
ZR
651 [FW_HIGH_LOCK] = PRIMITIVE_INFO_INIT(FW_LOCK, POWER_HIGH_LOCK, 63,
652 RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
11edbe5c 653 [PL1_ENABLE] = PRIMITIVE_INFO_INIT(PL1_ENABLE, POWER_LIMIT1_ENABLE, 15,
3382388d 654 RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
11edbe5c 655 [PL1_CLAMP] = PRIMITIVE_INFO_INIT(PL1_CLAMP, POWER_LIMIT1_CLAMP, 16,
3382388d 656 RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
11edbe5c 657 [PL2_ENABLE] = PRIMITIVE_INFO_INIT(PL2_ENABLE, POWER_LIMIT2_ENABLE, 47,
3382388d 658 RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
11edbe5c 659 [PL2_CLAMP] = PRIMITIVE_INFO_INIT(PL2_CLAMP, POWER_LIMIT2_CLAMP, 48,
3382388d 660 RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
11edbe5c 661 [TIME_WINDOW1] = PRIMITIVE_INFO_INIT(TIME_WINDOW1, TIME_WINDOW1_MASK, 17,
3382388d 662 RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0),
11edbe5c 663 [TIME_WINDOW2] = PRIMITIVE_INFO_INIT(TIME_WINDOW2, TIME_WINDOW2_MASK, 49,
3382388d 664 RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0),
11edbe5c 665 [THERMAL_SPEC_POWER] = PRIMITIVE_INFO_INIT(THERMAL_SPEC_POWER, POWER_INFO_THERMAL_SPEC_MASK,
3382388d 666 0, RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0),
11edbe5c 667 [MAX_POWER] = PRIMITIVE_INFO_INIT(MAX_POWER, POWER_INFO_MAX_MASK, 32,
3382388d 668 RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0),
11edbe5c 669 [MIN_POWER] = PRIMITIVE_INFO_INIT(MIN_POWER, POWER_INFO_MIN_MASK, 16,
3382388d 670 RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0),
11edbe5c 671 [MAX_TIME_WINDOW] = PRIMITIVE_INFO_INIT(MAX_TIME_WINDOW, POWER_INFO_MAX_TIME_WIN_MASK, 48,
3382388d 672 RAPL_DOMAIN_REG_INFO, TIME_UNIT, 0),
11edbe5c 673 [THROTTLED_TIME] = PRIMITIVE_INFO_INIT(THROTTLED_TIME, PERF_STATUS_THROTTLE_TIME_MASK, 0,
3382388d 674 RAPL_DOMAIN_REG_PERF, TIME_UNIT, 0),
11edbe5c 675 [PRIORITY_LEVEL] = PRIMITIVE_INFO_INIT(PRIORITY_LEVEL, PP_POLICY_MASK, 0,
3382388d 676 RAPL_DOMAIN_REG_POLICY, ARBITRARY_UNIT, 0),
11edbe5c 677 [PSYS_POWER_LIMIT1] = PRIMITIVE_INFO_INIT(PSYS_POWER_LIMIT1, PSYS_POWER_LIMIT1_MASK, 0,
931da6a0 678 RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0),
11edbe5c 679 [PSYS_POWER_LIMIT2] = PRIMITIVE_INFO_INIT(PSYS_POWER_LIMIT2, PSYS_POWER_LIMIT2_MASK, 32,
931da6a0 680 RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0),
11edbe5c 681 [PSYS_PL1_ENABLE] = PRIMITIVE_INFO_INIT(PSYS_PL1_ENABLE, PSYS_POWER_LIMIT1_ENABLE, 17,
931da6a0 682 RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
11edbe5c 683 [PSYS_PL2_ENABLE] = PRIMITIVE_INFO_INIT(PSYS_PL2_ENABLE, PSYS_POWER_LIMIT2_ENABLE, 49,
931da6a0 684 RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
11edbe5c 685 [PSYS_TIME_WINDOW1] = PRIMITIVE_INFO_INIT(PSYS_TIME_WINDOW1, PSYS_TIME_WINDOW1_MASK, 19,
931da6a0 686 RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0),
11edbe5c 687 [PSYS_TIME_WINDOW2] = PRIMITIVE_INFO_INIT(PSYS_TIME_WINDOW2, PSYS_TIME_WINDOW2_MASK, 51,
931da6a0 688 RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0),
2d281d81 689 /* non-hardware */
11edbe5c 690 [AVERAGE_POWER] = PRIMITIVE_INFO_INIT(AVERAGE_POWER, 0, 0, 0, POWER_UNIT,
3382388d 691 RAPL_PRIMITIVE_DERIVED),
2d281d81
JP
692};
693
e12dee18
ZR
694/* RAPL primitives for TPMI I/F */
695static struct rapl_primitive_info rpi_tpmi[NR_RAPL_PRIMITIVES] = {
696 /* name, mask, shift, msr index, unit divisor */
697 [POWER_LIMIT1] = PRIMITIVE_INFO_INIT(POWER_LIMIT1, TPMI_POWER_LIMIT_MASK, 0,
698 RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0),
699 [POWER_LIMIT2] = PRIMITIVE_INFO_INIT(POWER_LIMIT2, TPMI_POWER_LIMIT_MASK, 0,
700 RAPL_DOMAIN_REG_PL2, POWER_UNIT, 0),
701 [POWER_LIMIT4] = PRIMITIVE_INFO_INIT(POWER_LIMIT4, TPMI_POWER_LIMIT_MASK, 0,
702 RAPL_DOMAIN_REG_PL4, POWER_UNIT, 0),
703 [ENERGY_COUNTER] = PRIMITIVE_INFO_INIT(ENERGY_COUNTER, ENERGY_STATUS_MASK, 0,
704 RAPL_DOMAIN_REG_STATUS, ENERGY_UNIT, 0),
705 [PL1_LOCK] = PRIMITIVE_INFO_INIT(PL1_LOCK, POWER_HIGH_LOCK, 63,
706 RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
707 [PL2_LOCK] = PRIMITIVE_INFO_INIT(PL2_LOCK, POWER_HIGH_LOCK, 63,
708 RAPL_DOMAIN_REG_PL2, ARBITRARY_UNIT, 0),
709 [PL4_LOCK] = PRIMITIVE_INFO_INIT(PL4_LOCK, POWER_HIGH_LOCK, 63,
710 RAPL_DOMAIN_REG_PL4, ARBITRARY_UNIT, 0),
711 [PL1_ENABLE] = PRIMITIVE_INFO_INIT(PL1_ENABLE, TPMI_POWER_LIMIT_ENABLE, 62,
712 RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
713 [PL2_ENABLE] = PRIMITIVE_INFO_INIT(PL2_ENABLE, TPMI_POWER_LIMIT_ENABLE, 62,
714 RAPL_DOMAIN_REG_PL2, ARBITRARY_UNIT, 0),
715 [PL4_ENABLE] = PRIMITIVE_INFO_INIT(PL4_ENABLE, TPMI_POWER_LIMIT_ENABLE, 62,
716 RAPL_DOMAIN_REG_PL4, ARBITRARY_UNIT, 0),
717 [TIME_WINDOW1] = PRIMITIVE_INFO_INIT(TIME_WINDOW1, TPMI_TIME_WINDOW_MASK, 18,
718 RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0),
719 [TIME_WINDOW2] = PRIMITIVE_INFO_INIT(TIME_WINDOW2, TPMI_TIME_WINDOW_MASK, 18,
720 RAPL_DOMAIN_REG_PL2, TIME_UNIT, 0),
721 [THERMAL_SPEC_POWER] = PRIMITIVE_INFO_INIT(THERMAL_SPEC_POWER, TPMI_INFO_SPEC_MASK, 0,
722 RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0),
723 [MAX_POWER] = PRIMITIVE_INFO_INIT(MAX_POWER, TPMI_INFO_MAX_MASK, 36,
724 RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0),
725 [MIN_POWER] = PRIMITIVE_INFO_INIT(MIN_POWER, TPMI_INFO_MIN_MASK, 18,
726 RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0),
727 [MAX_TIME_WINDOW] = PRIMITIVE_INFO_INIT(MAX_TIME_WINDOW, TPMI_INFO_MAX_TIME_WIN_MASK, 54,
728 RAPL_DOMAIN_REG_INFO, TIME_UNIT, 0),
729 [THROTTLED_TIME] = PRIMITIVE_INFO_INIT(THROTTLED_TIME, PERF_STATUS_THROTTLE_TIME_MASK, 0,
730 RAPL_DOMAIN_REG_PERF, TIME_UNIT, 0),
731 /* non-hardware */
732 [AVERAGE_POWER] = PRIMITIVE_INFO_INIT(AVERAGE_POWER, 0, 0, 0,
733 POWER_UNIT, RAPL_PRIMITIVE_DERIVED),
734};
735
98ff639a
ZR
736static struct rapl_primitive_info *get_rpi(struct rapl_package *rp, int prim)
737{
738 struct rapl_primitive_info *rpi = rp->priv->rpi;
739
740 if (prim < 0 || prim > NR_RAPL_PRIMITIVES || !rpi)
741 return NULL;
742
743 return &rpi[prim];
744}
745
e8e28c2a
ZR
746static int rapl_config(struct rapl_package *rp)
747{
b4288ce7
ZR
748 switch (rp->priv->type) {
749 /* MMIO I/F shares the same register layout as MSR registers */
750 case RAPL_IF_MMIO:
751 case RAPL_IF_MSR:
752 rp->priv->defaults = (void *)defaults_msr;
753 rp->priv->rpi = (void *)rpi_msr;
754 break;
e12dee18
ZR
755 case RAPL_IF_TPMI:
756 rp->priv->defaults = (void *)&defaults_tpmi;
757 rp->priv->rpi = (void *)rpi_tpmi;
758 break;
b4288ce7
ZR
759 default:
760 return -EINVAL;
761 }
e8e28c2a
ZR
762 return 0;
763}
764
931da6a0
ZR
765static enum rapl_primitives
766prim_fixups(struct rapl_domain *rd, enum rapl_primitives prim)
767{
e8e28c2a
ZR
768 struct rapl_defaults *defaults = get_defaults(rd->rp);
769
770 if (!defaults->spr_psys_bits)
931da6a0
ZR
771 return prim;
772
773 if (rd->id != RAPL_DOMAIN_PLATFORM)
774 return prim;
775
776 switch (prim) {
777 case POWER_LIMIT1:
778 return PSYS_POWER_LIMIT1;
779 case POWER_LIMIT2:
780 return PSYS_POWER_LIMIT2;
781 case PL1_ENABLE:
782 return PSYS_PL1_ENABLE;
783 case PL2_ENABLE:
784 return PSYS_PL2_ENABLE;
785 case TIME_WINDOW1:
786 return PSYS_TIME_WINDOW1;
787 case TIME_WINDOW2:
788 return PSYS_TIME_WINDOW2;
789 default:
790 return prim;
791 }
792}
793
2d281d81
JP
794/* Read primitive data based on its related struct rapl_primitive_info.
795 * if xlate flag is set, return translated data based on data units, i.e.
796 * time, energy, and power.
797 * RAPL MSRs are non-architectual and are laid out not consistently across
798 * domains. Here we use primitive info to allow writing consolidated access
799 * functions.
800 * For a given primitive, it is processed by MSR mask and shift. Unit conversion
801 * is pre-assigned based on RAPL unit MSRs read at init time.
802 * 63-------------------------- 31--------------------------- 0
803 * | xxxxx (mask) |
804 * | |<- shift ----------------|
805 * 63-------------------------- 31--------------------------- 0
806 */
807static int rapl_read_data_raw(struct rapl_domain *rd,
3382388d 808 enum rapl_primitives prim, bool xlate, u64 *data)
2d281d81 809{
beea8df8 810 u64 value;
931da6a0 811 enum rapl_primitives prim_fixed = prim_fixups(rd, prim);
98ff639a 812 struct rapl_primitive_info *rpi = get_rpi(rd->rp, prim_fixed);
beea8df8 813 struct reg_action ra;
2d281d81 814
98ff639a 815 if (!rpi || !rpi->name || rpi->flag & RAPL_PRIMITIVE_DUMMY)
2d281d81
JP
816 return -EINVAL;
817
98ff639a 818 ra.reg = rd->regs[rpi->id];
16e95a62 819 if (!ra.reg.val)
2d281d81 820 return -EINVAL;
323ee64a 821
2d281d81 822 /* non-hardware data are collected by the polling thread */
98ff639a 823 if (rpi->flag & RAPL_PRIMITIVE_DERIVED) {
2d281d81
JP
824 *data = rd->rdd.primitives[prim];
825 return 0;
826 }
827
98ff639a 828 ra.mask = rpi->mask;
beea8df8 829
bf44b901 830 if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra)) {
16e95a62 831 pr_debug("failed to read reg 0x%llx for %s:%s\n", ra.reg.val, rd->rp->name, rd->name);
2d281d81
JP
832 return -EIO;
833 }
834
98ff639a 835 value = ra.value >> rpi->shift;
beea8df8 836
2d281d81 837 if (xlate)
98ff639a 838 *data = rapl_unit_xlate(rd, rpi->unit, value, 0);
2d281d81 839 else
beea8df8 840 *data = value;
2d281d81
JP
841
842 return 0;
843}
844
845/* Similar use of primitive info in the read counterpart */
846static int rapl_write_data_raw(struct rapl_domain *rd,
3382388d
ZR
847 enum rapl_primitives prim,
848 unsigned long long value)
2d281d81 849{
931da6a0 850 enum rapl_primitives prim_fixed = prim_fixups(rd, prim);
98ff639a 851 struct rapl_primitive_info *rpi = get_rpi(rd->rp, prim_fixed);
f14a1396 852 u64 bits;
beea8df8 853 struct reg_action ra;
f14a1396 854 int ret;
2d281d81 855
98ff639a
ZR
856 if (!rpi || !rpi->name || rpi->flag & RAPL_PRIMITIVE_DUMMY)
857 return -EINVAL;
858
98ff639a
ZR
859 bits = rapl_unit_xlate(rd, rpi->unit, value, 1);
860 bits <<= rpi->shift;
861 bits &= rpi->mask;
edbdabc6 862
beea8df8 863 memset(&ra, 0, sizeof(ra));
f14a1396 864
98ff639a
ZR
865 ra.reg = rd->regs[rpi->id];
866 ra.mask = rpi->mask;
beea8df8 867 ra.value = bits;
f14a1396 868
bf44b901 869 ret = rd->rp->priv->write_raw(get_rid(rd->rp), &ra);
f14a1396
JP
870
871 return ret;
2d281d81
JP
872}
873
9050a9cd
ZR
874static int rapl_read_pl_data(struct rapl_domain *rd, int pl,
875 enum pl_prims pl_prim, bool xlate, u64 *data)
876{
f442bd27 877 enum rapl_primitives prim = get_pl_prim(rd, pl, pl_prim);
9050a9cd
ZR
878
879 if (!is_pl_valid(rd, pl))
880 return -EINVAL;
881
882 return rapl_read_data_raw(rd, prim, xlate, data);
883}
884
885static int rapl_write_pl_data(struct rapl_domain *rd, int pl,
886 enum pl_prims pl_prim,
887 unsigned long long value)
888{
f442bd27 889 enum rapl_primitives prim = get_pl_prim(rd, pl, pl_prim);
9050a9cd
ZR
890
891 if (!is_pl_valid(rd, pl))
892 return -EINVAL;
893
f442bd27 894 if (rd->rpl[pl].locked) {
a60ec448 895 pr_debug("%s:%s:%s locked by BIOS\n", rd->rp->name, rd->name, pl_names[pl]);
9050a9cd
ZR
896 return -EACCES;
897 }
898
899 return rapl_write_data_raw(rd, prim, value);
900}
3c2c0845
JP
901/*
902 * Raw RAPL data stored in MSRs are in certain scales. We need to
903 * convert them into standard units based on the units reported in
904 * the RAPL unit MSRs. This is specific to CPUs as the method to
905 * calculate units differ on different CPUs.
906 * We convert the units to below format based on CPUs.
907 * i.e.
d474a4d3 908 * energy unit: picoJoules : Represented in picoJoules by default
3c2c0845
JP
909 * power unit : microWatts : Represented in milliWatts by default
910 * time unit : microseconds: Represented in seconds by default
911 */
693c1d78 912static int rapl_check_unit_core(struct rapl_domain *rd)
2d281d81 913{
1193b165 914 struct reg_action ra;
2d281d81
JP
915 u32 value;
916
cb532e72 917 ra.reg = rd->regs[RAPL_DOMAIN_REG_UNIT];
1193b165 918 ra.mask = ~0;
bf44b901
ZR
919 if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra)) {
920 pr_err("Failed to read power unit REG 0x%llx on %s:%s, exit.\n",
16e95a62 921 ra.reg.val, rd->rp->name, rd->name);
2d281d81
JP
922 return -ENODEV;
923 }
924
1193b165 925 value = (ra.value & ENERGY_UNIT_MASK) >> ENERGY_UNIT_OFFSET;
cb532e72 926 rd->energy_unit = ENERGY_UNIT_SCALE * 1000000 / (1 << value);
2d281d81 927
1193b165 928 value = (ra.value & POWER_UNIT_MASK) >> POWER_UNIT_OFFSET;
cb532e72 929 rd->power_unit = 1000000 / (1 << value);
2d281d81 930
1193b165 931 value = (ra.value & TIME_UNIT_MASK) >> TIME_UNIT_OFFSET;
cb532e72 932 rd->time_unit = 1000000 / (1 << value);
2d281d81 933
cb532e72
ZR
934 pr_debug("Core CPU %s:%s energy=%dpJ, time=%dus, power=%duW\n",
935 rd->rp->name, rd->name, rd->energy_unit, rd->time_unit, rd->power_unit);
2d281d81
JP
936
937 return 0;
938}
939
693c1d78 940static int rapl_check_unit_atom(struct rapl_domain *rd)
3c2c0845 941{
1193b165 942 struct reg_action ra;
3c2c0845
JP
943 u32 value;
944
cb532e72 945 ra.reg = rd->regs[RAPL_DOMAIN_REG_UNIT];
1193b165 946 ra.mask = ~0;
bf44b901
ZR
947 if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra)) {
948 pr_err("Failed to read power unit REG 0x%llx on %s:%s, exit.\n",
16e95a62 949 ra.reg.val, rd->rp->name, rd->name);
3c2c0845
JP
950 return -ENODEV;
951 }
1193b165
ZR
952
953 value = (ra.value & ENERGY_UNIT_MASK) >> ENERGY_UNIT_OFFSET;
cb532e72 954 rd->energy_unit = ENERGY_UNIT_SCALE * 1 << value;
3c2c0845 955
1193b165 956 value = (ra.value & POWER_UNIT_MASK) >> POWER_UNIT_OFFSET;
cb532e72 957 rd->power_unit = (1 << value) * 1000;
3c2c0845 958
1193b165 959 value = (ra.value & TIME_UNIT_MASK) >> TIME_UNIT_OFFSET;
cb532e72 960 rd->time_unit = 1000000 / (1 << value);
3c2c0845 961
cb532e72
ZR
962 pr_debug("Atom %s:%s energy=%dpJ, time=%dus, power=%duW\n",
963 rd->rp->name, rd->name, rd->energy_unit, rd->time_unit, rd->power_unit);
3c2c0845
JP
964
965 return 0;
966}
967
f14a1396
JP
968static void power_limit_irq_save_cpu(void *info)
969{
970 u32 l, h = 0;
971 struct rapl_package *rp = (struct rapl_package *)info;
972
973 /* save the state of PLN irq mask bit before disabling it */
974 rdmsr_safe(MSR_IA32_PACKAGE_THERM_INTERRUPT, &l, &h);
975 if (!(rp->power_limit_irq & PACKAGE_PLN_INT_SAVED)) {
976 rp->power_limit_irq = l & PACKAGE_THERM_INT_PLN_ENABLE;
977 rp->power_limit_irq |= PACKAGE_PLN_INT_SAVED;
978 }
979 l &= ~PACKAGE_THERM_INT_PLN_ENABLE;
980 wrmsr_safe(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
981}
982
2d281d81
JP
983/* REVISIT:
984 * When package power limit is set artificially low by RAPL, LVT
985 * thermal interrupt for package power limit should be ignored
986 * since we are not really exceeding the real limit. The intention
987 * is to avoid excessive interrupts while we are trying to save power.
988 * A useful feature might be routing the package_power_limit interrupt
989 * to userspace via eventfd. once we have a usecase, this is simple
990 * to do by adding an atomic notifier.
991 */
992
309557f5 993static void package_power_limit_irq_save(struct rapl_package *rp)
2d281d81 994{
bf44b901
ZR
995 if (rp->lead_cpu < 0)
996 return;
997
f14a1396
JP
998 if (!boot_cpu_has(X86_FEATURE_PTS) || !boot_cpu_has(X86_FEATURE_PLN))
999 return;
1000
323ee64a 1001 smp_call_function_single(rp->lead_cpu, power_limit_irq_save_cpu, rp, 1);
f14a1396
JP
1002}
1003
58705069
TG
1004/*
1005 * Restore per package power limit interrupt enable state. Called from cpu
1006 * hotplug code on package removal.
1007 */
1008static void package_power_limit_irq_restore(struct rapl_package *rp)
f14a1396 1009{
58705069
TG
1010 u32 l, h;
1011
bf44b901
ZR
1012 if (rp->lead_cpu < 0)
1013 return;
1014
58705069
TG
1015 if (!boot_cpu_has(X86_FEATURE_PTS) || !boot_cpu_has(X86_FEATURE_PLN))
1016 return;
1017
1018 /* irq enable state not saved, nothing to restore */
1019 if (!(rp->power_limit_irq & PACKAGE_PLN_INT_SAVED))
1020 return;
f14a1396
JP
1021
1022 rdmsr_safe(MSR_IA32_PACKAGE_THERM_INTERRUPT, &l, &h);
1023
1024 if (rp->power_limit_irq & PACKAGE_THERM_INT_PLN_ENABLE)
1025 l |= PACKAGE_THERM_INT_PLN_ENABLE;
1026 else
1027 l &= ~PACKAGE_THERM_INT_PLN_ENABLE;
1028
1029 wrmsr_safe(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
2d281d81
JP
1030}
1031
3c2c0845
JP
1032static void set_floor_freq_default(struct rapl_domain *rd, bool mode)
1033{
9050a9cd 1034 int i;
3c2c0845
JP
1035
1036 /* always enable clamp such that p-state can go below OS requested
1037 * range. power capping priority over guranteed frequency.
1038 */
9050a9cd 1039 rapl_write_pl_data(rd, POWER_LIMIT1, PL_CLAMP, mode);
3c2c0845 1040
9050a9cd
ZR
1041 for (i = POWER_LIMIT2; i < NR_POWER_LIMITS; i++) {
1042 rapl_write_pl_data(rd, i, PL_ENABLE, mode);
1043 rapl_write_pl_data(rd, i, PL_CLAMP, mode);
3c2c0845
JP
1044 }
1045}
1046
1047static void set_floor_freq_atom(struct rapl_domain *rd, bool enable)
1048{
1049 static u32 power_ctrl_orig_val;
e8e28c2a 1050 struct rapl_defaults *defaults = get_defaults(rd->rp);
3c2c0845
JP
1051 u32 mdata;
1052
e8e28c2a 1053 if (!defaults->floor_freq_reg_addr) {
51b63409
AT
1054 pr_err("Invalid floor frequency config register\n");
1055 return;
1056 }
1057
3c2c0845 1058 if (!power_ctrl_orig_val)
4077a387 1059 iosf_mbi_read(BT_MBI_UNIT_PMC, MBI_CR_READ,
e8e28c2a 1060 defaults->floor_freq_reg_addr,
4077a387 1061 &power_ctrl_orig_val);
3c2c0845
JP
1062 mdata = power_ctrl_orig_val;
1063 if (enable) {
1064 mdata &= ~(0x7f << 8);
1065 mdata |= 1 << 8;
1066 }
4077a387 1067 iosf_mbi_write(BT_MBI_UNIT_PMC, MBI_CR_WRITE,
e8e28c2a 1068 defaults->floor_freq_reg_addr, mdata);
3c2c0845
JP
1069}
1070
cb532e72 1071static u64 rapl_compute_time_window_core(struct rapl_domain *rd, u64 value,
3382388d 1072 bool to_raw)
3c2c0845 1073{
3382388d 1074 u64 f, y; /* fraction and exp. used for time unit */
3c2c0845
JP
1075
1076 /*
1077 * Special processing based on 2^Y*(1+F/4), refer
1078 * to Intel Software Developer's manual Vol.3B: CH 14.9.3.
1079 */
1080 if (!to_raw) {
1081 f = (value & 0x60) >> 5;
1082 y = value & 0x1f;
cb532e72 1083 value = (1 << y) * (4 + f) * rd->time_unit / 4;
3c2c0845 1084 } else {
cb532e72 1085 if (value < rd->time_unit)
2d935400
CQ
1086 return 0;
1087
cb532e72 1088 do_div(value, rd->time_unit);
3c2c0845 1089 y = ilog2(value);
cf835b00
ZR
1090
1091 /*
1092 * The target hardware field is 7 bits wide, so return all ones
1093 * if the exponent is too large.
1094 */
1095 if (y > 0x1f)
1096 return 0x7f;
1097
1098 f = div64_u64(4 * (value - (1ULL << y)), 1ULL << y);
3c2c0845
JP
1099 value = (y & 0x1f) | ((f & 0x3) << 5);
1100 }
1101 return value;
1102}
1103
cb532e72 1104static u64 rapl_compute_time_window_atom(struct rapl_domain *rd, u64 value,
3382388d 1105 bool to_raw)
3c2c0845
JP
1106{
1107 /*
1108 * Atom time unit encoding is straight forward val * time_unit,
1109 * where time_unit is default to 1 sec. Never 0.
1110 */
1111 if (!to_raw)
cb532e72 1112 return (value) ? value * rd->time_unit : rd->time_unit;
3382388d 1113
cb532e72 1114 value = div64_u64(value, rd->time_unit);
3c2c0845
JP
1115
1116 return value;
1117}
1118
e12dee18
ZR
1119/* TPMI Unit register has different layout */
1120#define TPMI_POWER_UNIT_OFFSET POWER_UNIT_OFFSET
1121#define TPMI_POWER_UNIT_MASK POWER_UNIT_MASK
1122#define TPMI_ENERGY_UNIT_OFFSET 0x06
1123#define TPMI_ENERGY_UNIT_MASK 0x7C0
1124#define TPMI_TIME_UNIT_OFFSET 0x0C
1125#define TPMI_TIME_UNIT_MASK 0xF000
1126
1127static int rapl_check_unit_tpmi(struct rapl_domain *rd)
1128{
1129 struct reg_action ra;
1130 u32 value;
1131
1132 ra.reg = rd->regs[RAPL_DOMAIN_REG_UNIT];
1133 ra.mask = ~0;
1134 if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra)) {
1135 pr_err("Failed to read power unit REG 0x%llx on %s:%s, exit.\n",
16e95a62 1136 ra.reg.val, rd->rp->name, rd->name);
e12dee18
ZR
1137 return -ENODEV;
1138 }
1139
1140 value = (ra.value & TPMI_ENERGY_UNIT_MASK) >> TPMI_ENERGY_UNIT_OFFSET;
1141 rd->energy_unit = ENERGY_UNIT_SCALE * 1000000 / (1 << value);
1142
1143 value = (ra.value & TPMI_POWER_UNIT_MASK) >> TPMI_POWER_UNIT_OFFSET;
1144 rd->power_unit = 1000000 / (1 << value);
1145
1146 value = (ra.value & TPMI_TIME_UNIT_MASK) >> TPMI_TIME_UNIT_OFFSET;
1147 rd->time_unit = 1000000 / (1 << value);
1148
1149 pr_debug("Core CPU %s:%s energy=%dpJ, time=%dus, power=%duW\n",
1150 rd->rp->name, rd->name, rd->energy_unit, rd->time_unit, rd->power_unit);
1151
1152 return 0;
1153}
1154
1155static const struct rapl_defaults defaults_tpmi = {
1156 .check_unit = rapl_check_unit_tpmi,
1157 /* Reuse existing logic, ignore the PL_CLAMP failures and enable all Power Limits */
1158 .set_floor_freq = set_floor_freq_default,
1159 .compute_time_window = rapl_compute_time_window_core,
1160};
1161
087e9cba 1162static const struct rapl_defaults rapl_defaults_core = {
51b63409 1163 .floor_freq_reg_addr = 0,
3c2c0845
JP
1164 .check_unit = rapl_check_unit_core,
1165 .set_floor_freq = set_floor_freq_default,
1166 .compute_time_window = rapl_compute_time_window_core,
087e9cba
JP
1167};
1168
d474a4d3
JP
1169static const struct rapl_defaults rapl_defaults_hsw_server = {
1170 .check_unit = rapl_check_unit_core,
1171 .set_floor_freq = set_floor_freq_default,
1172 .compute_time_window = rapl_compute_time_window_core,
1173 .dram_domain_energy_unit = 15300,
1174};
1175
2d798d9f
ZR
1176static const struct rapl_defaults rapl_defaults_spr_server = {
1177 .check_unit = rapl_check_unit_core,
1178 .set_floor_freq = set_floor_freq_default,
1179 .compute_time_window = rapl_compute_time_window_core,
2d798d9f 1180 .psys_domain_energy_unit = 1000000000,
931da6a0 1181 .spr_psys_bits = true,
2d798d9f
ZR
1182};
1183
51b63409
AT
1184static const struct rapl_defaults rapl_defaults_byt = {
1185 .floor_freq_reg_addr = IOSF_CPU_POWER_BUDGET_CTL_BYT,
1186 .check_unit = rapl_check_unit_atom,
1187 .set_floor_freq = set_floor_freq_atom,
1188 .compute_time_window = rapl_compute_time_window_atom,
1189};
1190
1191static const struct rapl_defaults rapl_defaults_tng = {
1192 .floor_freq_reg_addr = IOSF_CPU_POWER_BUDGET_CTL_TNG,
3c2c0845
JP
1193 .check_unit = rapl_check_unit_atom,
1194 .set_floor_freq = set_floor_freq_atom,
1195 .compute_time_window = rapl_compute_time_window_atom,
087e9cba
JP
1196};
1197
51b63409
AT
1198static const struct rapl_defaults rapl_defaults_ann = {
1199 .floor_freq_reg_addr = 0,
1200 .check_unit = rapl_check_unit_atom,
1201 .set_floor_freq = NULL,
1202 .compute_time_window = rapl_compute_time_window_atom,
1203};
1204
1205static const struct rapl_defaults rapl_defaults_cht = {
1206 .floor_freq_reg_addr = 0,
1207 .check_unit = rapl_check_unit_atom,
1208 .set_floor_freq = NULL,
1209 .compute_time_window = rapl_compute_time_window_atom,
1210};
1211
43756a29
VD
1212static const struct rapl_defaults rapl_defaults_amd = {
1213 .check_unit = rapl_check_unit_core,
1214};
1215
ea85dbca 1216static const struct x86_cpu_id rapl_ids[] __initconst = {
f0722512
TG
1217 X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE, &rapl_defaults_core),
1218 X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X, &rapl_defaults_core),
1219
1220 X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE, &rapl_defaults_core),
1221 X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X, &rapl_defaults_core),
1222
1223 X86_MATCH_INTEL_FAM6_MODEL(HASWELL, &rapl_defaults_core),
1224 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L, &rapl_defaults_core),
1225 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G, &rapl_defaults_core),
1226 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, &rapl_defaults_hsw_server),
1227
1228 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL, &rapl_defaults_core),
1229 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G, &rapl_defaults_core),
1230 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D, &rapl_defaults_core),
1231 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, &rapl_defaults_hsw_server),
1232
1233 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE, &rapl_defaults_core),
1234 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L, &rapl_defaults_core),
1235 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &rapl_defaults_hsw_server),
1236 X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L, &rapl_defaults_core),
1237 X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, &rapl_defaults_core),
1238 X86_MATCH_INTEL_FAM6_MODEL(CANNONLAKE_L, &rapl_defaults_core),
1239 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, &rapl_defaults_core),
1240 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE, &rapl_defaults_core),
1241 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_NNPI, &rapl_defaults_core),
1242 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &rapl_defaults_hsw_server),
1243 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &rapl_defaults_hsw_server),
1244 X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L, &rapl_defaults_core),
1245 X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE, &rapl_defaults_core),
1246 X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, &rapl_defaults_core),
57a2fb06 1247 X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, &rapl_defaults_core),
64e5f367 1248 X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE, &rapl_defaults_core),
ba92a420 1249 X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &rapl_defaults_core),
cca26b66 1250 X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &rapl_defaults_core),
882cdb06 1251 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GRACEMONT, &rapl_defaults_core),
ae0dc7ed 1252 X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &rapl_defaults_core),
27557146 1253 X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &rapl_defaults_core),
0d7a23b5 1254 X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, &rapl_defaults_core),
bdaad038
ZR
1255 X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE, &rapl_defaults_core),
1256 X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE_L, &rapl_defaults_core),
2d798d9f 1257 X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &rapl_defaults_spr_server),
7adc6885 1258 X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, &rapl_defaults_spr_server),
e1c2d96c 1259 X86_MATCH_INTEL_FAM6_MODEL(LAKEFIELD, &rapl_defaults_core),
f0722512
TG
1260
1261 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT, &rapl_defaults_byt),
1262 X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT, &rapl_defaults_cht),
1263 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_MID, &rapl_defaults_tng),
1264 X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT_MID, &rapl_defaults_ann),
1265 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &rapl_defaults_core),
1266 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS, &rapl_defaults_core),
1267 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D, &rapl_defaults_core),
33c98003 1268 X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT, &rapl_defaults_core),
f0722512
TG
1269 X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &rapl_defaults_core),
1270 X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_L, &rapl_defaults_core),
1271
1272 X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &rapl_defaults_hsw_server),
1273 X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &rapl_defaults_hsw_server),
43756a29
VD
1274
1275 X86_MATCH_VENDOR_FAM(AMD, 0x17, &rapl_defaults_amd),
8a9d881f 1276 X86_MATCH_VENDOR_FAM(AMD, 0x19, &rapl_defaults_amd),
a7405612 1277 X86_MATCH_VENDOR_FAM(HYGON, 0x18, &rapl_defaults_amd),
2d281d81
JP
1278 {}
1279};
1280MODULE_DEVICE_TABLE(x86cpu, rapl_ids);
1281
bed5ab63
TG
1282/* Read once for all raw primitive data for domains */
1283static void rapl_update_domain_data(struct rapl_package *rp)
2d281d81
JP
1284{
1285 int dmn, prim;
1286 u64 val;
2d281d81 1287
bed5ab63 1288 for (dmn = 0; dmn < rp->nr_domains; dmn++) {
9ea7612c 1289 pr_debug("update %s domain %s data\n", rp->name,
bed5ab63
TG
1290 rp->domains[dmn].name);
1291 /* exclude non-raw primitives */
1292 for (prim = 0; prim < NR_RAW_PRIMITIVES; prim++) {
98ff639a
ZR
1293 struct rapl_primitive_info *rpi = get_rpi(rp, prim);
1294
bed5ab63 1295 if (!rapl_read_data_raw(&rp->domains[dmn], prim,
98ff639a 1296 rpi->unit, &val))
3382388d 1297 rp->domains[dmn].rdd.primitives[prim] = val;
2d281d81
JP
1298 }
1299 }
1300
1301}
1302
2d281d81
JP
1303static int rapl_package_register_powercap(struct rapl_package *rp)
1304{
1305 struct rapl_domain *rd;
2d281d81 1306 struct powercap_zone *power_zone = NULL;
01857cf7 1307 int nr_pl, ret;
bed5ab63
TG
1308
1309 /* Update the domain data of the new package */
1310 rapl_update_domain_data(rp);
2d281d81 1311
3382388d 1312 /* first we register package domain as the parent zone */
2d281d81
JP
1313 for (rd = rp->domains; rd < rp->domains + rp->nr_domains; rd++) {
1314 if (rd->id == RAPL_DOMAIN_PACKAGE) {
1315 nr_pl = find_nr_power_limit(rd);
9ea7612c 1316 pr_debug("register package domain %s\n", rp->name);
2d281d81 1317 power_zone = powercap_register_zone(&rd->power_zone,
3382388d
ZR
1318 rp->priv->control_type, rp->name,
1319 NULL, &zone_ops[rd->id], nr_pl,
1320 &constraint_ops);
2d281d81 1321 if (IS_ERR(power_zone)) {
9ea7612c 1322 pr_debug("failed to register power zone %s\n",
3382388d 1323 rp->name);
bed5ab63 1324 return PTR_ERR(power_zone);
2d281d81
JP
1325 }
1326 /* track parent zone in per package/socket data */
1327 rp->power_zone = power_zone;
1328 /* done, only one package domain per socket */
1329 break;
1330 }
1331 }
1332 if (!power_zone) {
1333 pr_err("no package domain found, unknown topology!\n");
bed5ab63 1334 return -ENODEV;
2d281d81 1335 }
3382388d 1336 /* now register domains as children of the socket/package */
2d281d81 1337 for (rd = rp->domains; rd < rp->domains + rp->nr_domains; rd++) {
f1e8d756
ZR
1338 struct powercap_zone *parent = rp->power_zone;
1339
2d281d81
JP
1340 if (rd->id == RAPL_DOMAIN_PACKAGE)
1341 continue;
f1e8d756
ZR
1342 if (rd->id == RAPL_DOMAIN_PLATFORM)
1343 parent = NULL;
2d281d81
JP
1344 /* number of power limits per domain varies */
1345 nr_pl = find_nr_power_limit(rd);
1346 power_zone = powercap_register_zone(&rd->power_zone,
3382388d 1347 rp->priv->control_type,
f1e8d756 1348 rd->name, parent,
3382388d
ZR
1349 &zone_ops[rd->id], nr_pl,
1350 &constraint_ops);
2d281d81
JP
1351
1352 if (IS_ERR(power_zone)) {
9ea7612c 1353 pr_debug("failed to register power_zone, %s:%s\n",
3382388d 1354 rp->name, rd->name);
2d281d81
JP
1355 ret = PTR_ERR(power_zone);
1356 goto err_cleanup;
1357 }
1358 }
bed5ab63 1359 return 0;
2d281d81 1360
2d281d81 1361err_cleanup:
58705069
TG
1362 /*
1363 * Clean up previously initialized domains within the package if we
2d281d81
JP
1364 * failed after the first domain setup.
1365 */
1366 while (--rd >= rp->domains) {
9ea7612c 1367 pr_debug("unregister %s domain %s\n", rp->name, rd->name);
3382388d
ZR
1368 powercap_unregister_zone(rp->priv->control_type,
1369 &rd->power_zone);
2d281d81
JP
1370 }
1371
1372 return ret;
1373}
1374
693c1d78 1375static int rapl_check_domain(int domain, struct rapl_package *rp)
2d281d81 1376{
1193b165 1377 struct reg_action ra;
2d281d81
JP
1378
1379 switch (domain) {
1380 case RAPL_DOMAIN_PACKAGE:
2d281d81 1381 case RAPL_DOMAIN_PP0:
2d281d81 1382 case RAPL_DOMAIN_PP1:
2d281d81 1383 case RAPL_DOMAIN_DRAM:
f1e8d756 1384 case RAPL_DOMAIN_PLATFORM:
1193b165 1385 ra.reg = rp->priv->regs[domain][RAPL_DOMAIN_REG_STATUS];
2d281d81
JP
1386 break;
1387 default:
1388 pr_err("invalid domain id %d\n", domain);
1389 return -EINVAL;
1390 }
9d31c676
JP
1391 /* make sure domain counters are available and contains non-zero
1392 * values, otherwise skip it.
7b874772 1393 */
1193b165 1394
7a57e9f1 1395 ra.mask = ENERGY_STATUS_MASK;
bf44b901 1396 if (rp->priv->read_raw(get_rid(rp), &ra) || !ra.value)
9d31c676 1397 return -ENODEV;
2d281d81 1398
9d31c676 1399 return 0;
2d281d81
JP
1400}
1401
cb532e72
ZR
1402/*
1403 * Get per domain energy/power/time unit.
1404 * RAPL Interfaces without per domain unit register will use the package
1405 * scope unit register to set per domain units.
1406 */
1407static int rapl_get_domain_unit(struct rapl_domain *rd)
1408{
1409 struct rapl_defaults *defaults = get_defaults(rd->rp);
1410 int ret;
1411
16e95a62
ZR
1412 if (!rd->regs[RAPL_DOMAIN_REG_UNIT].val) {
1413 if (!rd->rp->priv->reg_unit.val) {
cb532e72
ZR
1414 pr_err("No valid Unit register found\n");
1415 return -ENODEV;
1416 }
1417 rd->regs[RAPL_DOMAIN_REG_UNIT] = rd->rp->priv->reg_unit;
1418 }
1419
1420 if (!defaults->check_unit) {
1421 pr_err("missing .check_unit() callback\n");
1422 return -ENODEV;
1423 }
1424
693c1d78 1425 ret = defaults->check_unit(rd);
cb532e72
ZR
1426 if (ret)
1427 return ret;
1428
1429 if (rd->id == RAPL_DOMAIN_DRAM && defaults->dram_domain_energy_unit)
1430 rd->energy_unit = defaults->dram_domain_energy_unit;
1431 if (rd->id == RAPL_DOMAIN_PLATFORM && defaults->psys_domain_energy_unit)
1432 rd->energy_unit = defaults->psys_domain_energy_unit;
1433 return 0;
1434}
1435
e1399ba2
JP
1436/*
1437 * Check if power limits are available. Two cases when they are not available:
1438 * 1. Locked by BIOS, in this case we still provide read-only access so that
1439 * users can see what limit is set by the BIOS.
1440 * 2. Some CPUs make some domains monitoring only which means PLx MSRs may not
3382388d 1441 * exist at all. In this case, we do not show the constraints in powercap.
e1399ba2
JP
1442 *
1443 * Called after domains are detected and initialized.
1444 */
1445static void rapl_detect_powerlimit(struct rapl_domain *rd)
1446{
1447 u64 val64;
1448 int i;
1449
f442bd27
ZR
1450 for (i = POWER_LIMIT1; i < NR_POWER_LIMITS; i++) {
1451 if (!rapl_read_pl_data(rd, i, PL_LOCK, false, &val64)) {
1452 if (val64) {
1453 rd->rpl[i].locked = true;
1454 pr_info("%s:%s:%s locked by BIOS\n",
1455 rd->rp->name, rd->name, pl_names[i]);
1456 }
e1399ba2 1457 }
3382388d 1458
081690e9 1459 if (rapl_read_pl_data(rd, i, PL_LIMIT, false, &val64))
e1399ba2
JP
1460 rd->rpl[i].name = NULL;
1461 }
1462}
1463
2d281d81
JP
1464/* Detect active and valid domains for the given CPU, caller must
1465 * ensure the CPU belongs to the targeted package and CPU hotlug is disabled.
1466 */
693c1d78 1467static int rapl_detect_domains(struct rapl_package *rp)
2d281d81 1468{
2d281d81 1469 struct rapl_domain *rd;
58705069 1470 int i;
2d281d81
JP
1471
1472 for (i = 0; i < RAPL_DOMAIN_MAX; i++) {
1473 /* use physical package id to read counters */
693c1d78 1474 if (!rapl_check_domain(i, rp)) {
2d281d81 1475 rp->domain_map |= 1 << i;
fcdf1797
JP
1476 pr_info("Found RAPL domain %s\n", rapl_domain_names[i]);
1477 }
2d281d81 1478 }
3382388d 1479 rp->nr_domains = bitmap_weight(&rp->domain_map, RAPL_DOMAIN_MAX);
2d281d81 1480 if (!rp->nr_domains) {
9ea7612c 1481 pr_debug("no valid rapl domains found in %s\n", rp->name);
58705069 1482 return -ENODEV;
2d281d81 1483 }
9ea7612c 1484 pr_debug("found %d domains on %s\n", rp->nr_domains, rp->name);
2d281d81 1485
2fa00769 1486 rp->domains = kcalloc(rp->nr_domains, sizeof(struct rapl_domain),
3382388d 1487 GFP_KERNEL);
58705069
TG
1488 if (!rp->domains)
1489 return -ENOMEM;
1490
2d281d81
JP
1491 rapl_init_domains(rp);
1492
cb532e72
ZR
1493 for (rd = rp->domains; rd < rp->domains + rp->nr_domains; rd++) {
1494 rapl_get_domain_unit(rd);
e1399ba2 1495 rapl_detect_powerlimit(rd);
cb532e72 1496 }
e1399ba2 1497
2d281d81
JP
1498 return 0;
1499}
1500
1501/* called from CPU hotplug notifier, hotplug lock held */
3382388d 1502void rapl_remove_package(struct rapl_package *rp)
2d281d81
JP
1503{
1504 struct rapl_domain *rd, *rd_package = NULL;
1505
58705069
TG
1506 package_power_limit_irq_restore(rp);
1507
2d281d81 1508 for (rd = rp->domains; rd < rp->domains + rp->nr_domains; rd++) {
9050a9cd
ZR
1509 int i;
1510
1511 for (i = POWER_LIMIT1; i < NR_POWER_LIMITS; i++) {
1512 rapl_write_pl_data(rd, i, PL_ENABLE, 0);
1513 rapl_write_pl_data(rd, i, PL_CLAMP, 0);
58705069 1514 }
9050a9cd 1515
2d281d81
JP
1516 if (rd->id == RAPL_DOMAIN_PACKAGE) {
1517 rd_package = rd;
1518 continue;
1519 }
9ea7612c
ZR
1520 pr_debug("remove package, undo power limit on %s: %s\n",
1521 rp->name, rd->name);
3382388d
ZR
1522 powercap_unregister_zone(rp->priv->control_type,
1523 &rd->power_zone);
2d281d81
JP
1524 }
1525 /* do parent zone last */
3382388d
ZR
1526 powercap_unregister_zone(rp->priv->control_type,
1527 &rd_package->power_zone);
2d281d81
JP
1528 list_del(&rp->plist);
1529 kfree(rp);
1530}
3382388d
ZR
1531EXPORT_SYMBOL_GPL(rapl_remove_package);
1532
1533/* caller to ensure CPU hotplug lock is held */
bf44b901 1534struct rapl_package *rapl_find_package_domain(int id, struct rapl_if_priv *priv, bool id_is_cpu)
3382388d 1535{
3382388d 1536 struct rapl_package *rp;
bf44b901
ZR
1537 int uid;
1538
1539 if (id_is_cpu)
1540 uid = topology_logical_die_id(id);
1541 else
1542 uid = id;
3382388d
ZR
1543
1544 list_for_each_entry(rp, &rapl_packages, plist) {
bf44b901 1545 if (rp->id == uid
3382388d
ZR
1546 && rp->priv->control_type == priv->control_type)
1547 return rp;
1548 }
1549
1550 return NULL;
1551}
1552EXPORT_SYMBOL_GPL(rapl_find_package_domain);
2d281d81
JP
1553
1554/* called from CPU hotplug notifier, hotplug lock held */
bf44b901 1555struct rapl_package *rapl_add_package(int id, struct rapl_if_priv *priv, bool id_is_cpu)
2d281d81 1556{
2d281d81 1557 struct rapl_package *rp;
b4005e92 1558 int ret;
2d281d81 1559
2d281d81
JP
1560 rp = kzalloc(sizeof(struct rapl_package), GFP_KERNEL);
1561 if (!rp)
b4005e92 1562 return ERR_PTR(-ENOMEM);
2d281d81 1563
bf44b901
ZR
1564 if (id_is_cpu) {
1565 rp->id = topology_logical_die_id(id);
1566 rp->lead_cpu = id;
1567 if (topology_max_die_per_package() > 1)
1568 snprintf(rp->name, PACKAGE_DOMAIN_NAME_LENGTH, "package-%d-die-%d",
1569 topology_physical_package_id(id), topology_die_id(id));
1570 else
1571 snprintf(rp->name, PACKAGE_DOMAIN_NAME_LENGTH, "package-%d",
1572 topology_physical_package_id(id));
1573 } else {
1574 rp->id = id;
1575 rp->lead_cpu = -1;
1576 snprintf(rp->name, PACKAGE_DOMAIN_NAME_LENGTH, "package-%d", id);
1577 }
323ee64a 1578
bf44b901 1579 rp->priv = priv;
e8e28c2a
ZR
1580 ret = rapl_config(rp);
1581 if (ret)
1582 goto err_free_package;
1583
2d281d81 1584 /* check if the package contains valid domains */
693c1d78 1585 if (rapl_detect_domains(rp)) {
2d281d81
JP
1586 ret = -ENODEV;
1587 goto err_free_package;
1588 }
a74f4367
TG
1589 ret = rapl_package_register_powercap(rp);
1590 if (!ret) {
2d281d81
JP
1591 INIT_LIST_HEAD(&rp->plist);
1592 list_add(&rp->plist, &rapl_packages);
b4005e92 1593 return rp;
2d281d81
JP
1594 }
1595
1596err_free_package:
1597 kfree(rp->domains);
1598 kfree(rp);
b4005e92 1599 return ERR_PTR(ret);
2d281d81 1600}
3382388d 1601EXPORT_SYMBOL_GPL(rapl_add_package);
2d281d81 1602
52b3672c
ZH
1603static void power_limit_state_save(void)
1604{
1605 struct rapl_package *rp;
1606 struct rapl_domain *rd;
9050a9cd 1607 int ret, i;
52b3672c 1608
5d4c779c 1609 cpus_read_lock();
52b3672c
ZH
1610 list_for_each_entry(rp, &rapl_packages, plist) {
1611 if (!rp->power_zone)
1612 continue;
1613 rd = power_zone_to_rapl_domain(rp->power_zone);
9050a9cd
ZR
1614 for (i = POWER_LIMIT1; i < NR_POWER_LIMITS; i++) {
1615 ret = rapl_read_pl_data(rd, i, PL_LIMIT, true,
3382388d 1616 &rd->rpl[i].last_power_limit);
9050a9cd
ZR
1617 if (ret)
1618 rd->rpl[i].last_power_limit = 0;
52b3672c
ZH
1619 }
1620 }
5d4c779c 1621 cpus_read_unlock();
52b3672c
ZH
1622}
1623
1624static void power_limit_state_restore(void)
1625{
1626 struct rapl_package *rp;
1627 struct rapl_domain *rd;
9050a9cd 1628 int i;
52b3672c 1629
5d4c779c 1630 cpus_read_lock();
52b3672c
ZH
1631 list_for_each_entry(rp, &rapl_packages, plist) {
1632 if (!rp->power_zone)
1633 continue;
1634 rd = power_zone_to_rapl_domain(rp->power_zone);
9050a9cd
ZR
1635 for (i = POWER_LIMIT1; i < NR_POWER_LIMITS; i++)
1636 if (rd->rpl[i].last_power_limit)
1637 rapl_write_pl_data(rd, i, PL_LIMIT,
1638 rd->rpl[i].last_power_limit);
52b3672c 1639 }
5d4c779c 1640 cpus_read_unlock();
52b3672c
ZH
1641}
1642
1643static int rapl_pm_callback(struct notifier_block *nb,
3382388d 1644 unsigned long mode, void *_unused)
52b3672c
ZH
1645{
1646 switch (mode) {
1647 case PM_SUSPEND_PREPARE:
1648 power_limit_state_save();
1649 break;
1650 case PM_POST_SUSPEND:
1651 power_limit_state_restore();
1652 break;
1653 }
1654 return NOTIFY_OK;
1655}
1656
1657static struct notifier_block rapl_pm_notifier = {
1658 .notifier_call = rapl_pm_callback,
1659};
1660
abcfaeb3
ZR
1661static struct platform_device *rapl_msr_platdev;
1662
1663static int __init rapl_init(void)
2d281d81 1664{
087e9cba 1665 const struct x86_cpu_id *id;
58705069 1666 int ret;
2d281d81 1667
087e9cba 1668 id = x86_match_cpu(rapl_ids);
1488ac99 1669 if (id) {
b4288ce7 1670 defaults_msr = (struct rapl_defaults *)id->driver_data;
2d281d81 1671
1488ac99
ZR
1672 rapl_msr_platdev = platform_device_alloc("intel_rapl_msr", 0);
1673 if (!rapl_msr_platdev)
1674 return -ENOMEM;
52b3672c 1675
1488ac99
ZR
1676 ret = platform_device_add(rapl_msr_platdev);
1677 if (ret) {
1678 platform_device_put(rapl_msr_platdev);
1679 return ret;
1680 }
abcfaeb3
ZR
1681 }
1682
1488ac99
ZR
1683 ret = register_pm_notifier(&rapl_pm_notifier);
1684 if (ret && rapl_msr_platdev) {
1685 platform_device_del(rapl_msr_platdev);
abcfaeb3 1686 platform_device_put(rapl_msr_platdev);
1488ac99 1687 }
abcfaeb3
ZR
1688
1689 return ret;
2d281d81
JP
1690}
1691
abcfaeb3 1692static void __exit rapl_exit(void)
2d281d81 1693{
abcfaeb3 1694 platform_device_unregister(rapl_msr_platdev);
52b3672c 1695 unregister_pm_notifier(&rapl_pm_notifier);
2d281d81
JP
1696}
1697
f76cb066 1698fs_initcall(rapl_init);
abcfaeb3
ZR
1699module_exit(rapl_exit);
1700
3382388d 1701MODULE_DESCRIPTION("Intel Runtime Average Power Limit (RAPL) common code");
2d281d81
JP
1702MODULE_AUTHOR("Jacob Pan <jacob.jun.pan@intel.com>");
1703MODULE_LICENSE("GPL v2");