Commit | Line | Data |
---|---|---|
457c8996 | 1 | // SPDX-License-Identifier: GPL-2.0-only |
de0428a7 | 2 | #include <linux/perf_event.h> |
d5616bac | 3 | #include <linux/jump_label.h> |
1018faa6 | 4 | #include <linux/export.h> |
de0428a7 KW |
5 | #include <linux/types.h> |
6 | #include <linux/init.h> | |
7 | #include <linux/slab.h> | |
914123fa | 8 | #include <linux/delay.h> |
df4d2973 | 9 | #include <linux/jiffies.h> |
d6eed550 | 10 | #include <asm/apicdef.h> |
7685665c | 11 | #include <asm/apic.h> |
3966c3fe | 12 | #include <asm/nmi.h> |
de0428a7 | 13 | |
27f6d22b | 14 | #include "../perf_event.h" |
f22f54f4 | 15 | |
df4d2973 TL |
16 | static DEFINE_PER_CPU(unsigned long, perf_nmi_tstamp); |
17 | static unsigned long perf_nmi_window; | |
6d3edaae | 18 | |
57388912 KP |
19 | /* AMD Event 0xFFF: Merge. Used with Large Increment per Cycle events */ |
20 | #define AMD_MERGE_EVENT ((0xFULL << 32) | 0xFFULL) | |
21 | #define AMD_MERGE_EVENT_ENABLE (AMD_MERGE_EVENT | ARCH_PERFMON_EVENTSEL_ENABLE) | |
22 | ||
21d59e3e SD |
23 | /* PMC Enable and Overflow bits for PerfCntrGlobal* registers */ |
24 | static u64 amd_pmu_global_cntr_mask __read_mostly; | |
25 | ||
caaa8be3 | 26 | static __initconst const u64 amd_hw_cache_event_ids |
f22f54f4 PZ |
27 | [PERF_COUNT_HW_CACHE_MAX] |
28 | [PERF_COUNT_HW_CACHE_OP_MAX] | |
29 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | |
30 | { | |
31 | [ C(L1D) ] = { | |
32 | [ C(OP_READ) ] = { | |
33 | [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ | |
83112e68 | 34 | [ C(RESULT_MISS) ] = 0x0141, /* Data Cache Misses */ |
f22f54f4 PZ |
35 | }, |
36 | [ C(OP_WRITE) ] = { | |
9cc2617d | 37 | [ C(RESULT_ACCESS) ] = 0, |
f22f54f4 PZ |
38 | [ C(RESULT_MISS) ] = 0, |
39 | }, | |
40 | [ C(OP_PREFETCH) ] = { | |
41 | [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts */ | |
42 | [ C(RESULT_MISS) ] = 0x0167, /* Data Prefetcher :cancelled */ | |
43 | }, | |
44 | }, | |
45 | [ C(L1I ) ] = { | |
46 | [ C(OP_READ) ] = { | |
47 | [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches */ | |
48 | [ C(RESULT_MISS) ] = 0x0081, /* Instruction cache misses */ | |
49 | }, | |
50 | [ C(OP_WRITE) ] = { | |
51 | [ C(RESULT_ACCESS) ] = -1, | |
52 | [ C(RESULT_MISS) ] = -1, | |
53 | }, | |
54 | [ C(OP_PREFETCH) ] = { | |
55 | [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */ | |
56 | [ C(RESULT_MISS) ] = 0, | |
57 | }, | |
58 | }, | |
59 | [ C(LL ) ] = { | |
60 | [ C(OP_READ) ] = { | |
61 | [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */ | |
62 | [ C(RESULT_MISS) ] = 0x037E, /* L2 Cache Misses : IC+DC */ | |
63 | }, | |
64 | [ C(OP_WRITE) ] = { | |
65 | [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback */ | |
66 | [ C(RESULT_MISS) ] = 0, | |
67 | }, | |
68 | [ C(OP_PREFETCH) ] = { | |
69 | [ C(RESULT_ACCESS) ] = 0, | |
70 | [ C(RESULT_MISS) ] = 0, | |
71 | }, | |
72 | }, | |
73 | [ C(DTLB) ] = { | |
74 | [ C(OP_READ) ] = { | |
75 | [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ | |
ba0cef3d | 76 | [ C(RESULT_MISS) ] = 0x0746, /* L1_DTLB_AND_L2_DLTB_MISS.ALL */ |
f22f54f4 PZ |
77 | }, |
78 | [ C(OP_WRITE) ] = { | |
79 | [ C(RESULT_ACCESS) ] = 0, | |
80 | [ C(RESULT_MISS) ] = 0, | |
81 | }, | |
82 | [ C(OP_PREFETCH) ] = { | |
83 | [ C(RESULT_ACCESS) ] = 0, | |
84 | [ C(RESULT_MISS) ] = 0, | |
85 | }, | |
86 | }, | |
87 | [ C(ITLB) ] = { | |
88 | [ C(OP_READ) ] = { | |
89 | [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */ | |
ba0cef3d | 90 | [ C(RESULT_MISS) ] = 0x0385, /* L1_ITLB_AND_L2_ITLB_MISS.ALL */ |
f22f54f4 PZ |
91 | }, |
92 | [ C(OP_WRITE) ] = { | |
93 | [ C(RESULT_ACCESS) ] = -1, | |
94 | [ C(RESULT_MISS) ] = -1, | |
95 | }, | |
96 | [ C(OP_PREFETCH) ] = { | |
97 | [ C(RESULT_ACCESS) ] = -1, | |
98 | [ C(RESULT_MISS) ] = -1, | |
99 | }, | |
100 | }, | |
101 | [ C(BPU ) ] = { | |
102 | [ C(OP_READ) ] = { | |
103 | [ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr. */ | |
104 | [ C(RESULT_MISS) ] = 0x00c3, /* Retired Mispredicted BI */ | |
105 | }, | |
106 | [ C(OP_WRITE) ] = { | |
107 | [ C(RESULT_ACCESS) ] = -1, | |
108 | [ C(RESULT_MISS) ] = -1, | |
109 | }, | |
110 | [ C(OP_PREFETCH) ] = { | |
111 | [ C(RESULT_ACCESS) ] = -1, | |
112 | [ C(RESULT_MISS) ] = -1, | |
113 | }, | |
114 | }, | |
89d6c0b5 PZ |
115 | [ C(NODE) ] = { |
116 | [ C(OP_READ) ] = { | |
117 | [ C(RESULT_ACCESS) ] = 0xb8e9, /* CPU Request to Memory, l+r */ | |
118 | [ C(RESULT_MISS) ] = 0x98e9, /* CPU Request to Memory, r */ | |
119 | }, | |
120 | [ C(OP_WRITE) ] = { | |
121 | [ C(RESULT_ACCESS) ] = -1, | |
122 | [ C(RESULT_MISS) ] = -1, | |
123 | }, | |
124 | [ C(OP_PREFETCH) ] = { | |
125 | [ C(RESULT_ACCESS) ] = -1, | |
126 | [ C(RESULT_MISS) ] = -1, | |
127 | }, | |
128 | }, | |
f22f54f4 PZ |
129 | }; |
130 | ||
0e3b74e2 KP |
131 | static __initconst const u64 amd_hw_cache_event_ids_f17h |
132 | [PERF_COUNT_HW_CACHE_MAX] | |
133 | [PERF_COUNT_HW_CACHE_OP_MAX] | |
134 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = { | |
135 | [C(L1D)] = { | |
136 | [C(OP_READ)] = { | |
137 | [C(RESULT_ACCESS)] = 0x0040, /* Data Cache Accesses */ | |
138 | [C(RESULT_MISS)] = 0xc860, /* L2$ access from DC Miss */ | |
139 | }, | |
140 | [C(OP_WRITE)] = { | |
141 | [C(RESULT_ACCESS)] = 0, | |
142 | [C(RESULT_MISS)] = 0, | |
143 | }, | |
144 | [C(OP_PREFETCH)] = { | |
145 | [C(RESULT_ACCESS)] = 0xff5a, /* h/w prefetch DC Fills */ | |
146 | [C(RESULT_MISS)] = 0, | |
147 | }, | |
148 | }, | |
149 | [C(L1I)] = { | |
150 | [C(OP_READ)] = { | |
151 | [C(RESULT_ACCESS)] = 0x0080, /* Instruction cache fetches */ | |
152 | [C(RESULT_MISS)] = 0x0081, /* Instruction cache misses */ | |
153 | }, | |
154 | [C(OP_WRITE)] = { | |
155 | [C(RESULT_ACCESS)] = -1, | |
156 | [C(RESULT_MISS)] = -1, | |
157 | }, | |
158 | [C(OP_PREFETCH)] = { | |
159 | [C(RESULT_ACCESS)] = 0, | |
160 | [C(RESULT_MISS)] = 0, | |
161 | }, | |
162 | }, | |
163 | [C(LL)] = { | |
164 | [C(OP_READ)] = { | |
165 | [C(RESULT_ACCESS)] = 0, | |
166 | [C(RESULT_MISS)] = 0, | |
167 | }, | |
168 | [C(OP_WRITE)] = { | |
169 | [C(RESULT_ACCESS)] = 0, | |
170 | [C(RESULT_MISS)] = 0, | |
171 | }, | |
172 | [C(OP_PREFETCH)] = { | |
173 | [C(RESULT_ACCESS)] = 0, | |
174 | [C(RESULT_MISS)] = 0, | |
175 | }, | |
176 | }, | |
177 | [C(DTLB)] = { | |
178 | [C(OP_READ)] = { | |
179 | [C(RESULT_ACCESS)] = 0xff45, /* All L2 DTLB accesses */ | |
180 | [C(RESULT_MISS)] = 0xf045, /* L2 DTLB misses (PT walks) */ | |
181 | }, | |
182 | [C(OP_WRITE)] = { | |
183 | [C(RESULT_ACCESS)] = 0, | |
184 | [C(RESULT_MISS)] = 0, | |
185 | }, | |
186 | [C(OP_PREFETCH)] = { | |
187 | [C(RESULT_ACCESS)] = 0, | |
188 | [C(RESULT_MISS)] = 0, | |
189 | }, | |
190 | }, | |
191 | [C(ITLB)] = { | |
192 | [C(OP_READ)] = { | |
193 | [C(RESULT_ACCESS)] = 0x0084, /* L1 ITLB misses, L2 ITLB hits */ | |
194 | [C(RESULT_MISS)] = 0xff85, /* L1 ITLB misses, L2 misses */ | |
195 | }, | |
196 | [C(OP_WRITE)] = { | |
197 | [C(RESULT_ACCESS)] = -1, | |
198 | [C(RESULT_MISS)] = -1, | |
199 | }, | |
200 | [C(OP_PREFETCH)] = { | |
201 | [C(RESULT_ACCESS)] = -1, | |
202 | [C(RESULT_MISS)] = -1, | |
203 | }, | |
204 | }, | |
205 | [C(BPU)] = { | |
206 | [C(OP_READ)] = { | |
207 | [C(RESULT_ACCESS)] = 0x00c2, /* Retired Branch Instr. */ | |
208 | [C(RESULT_MISS)] = 0x00c3, /* Retired Mispredicted BI */ | |
209 | }, | |
210 | [C(OP_WRITE)] = { | |
211 | [C(RESULT_ACCESS)] = -1, | |
212 | [C(RESULT_MISS)] = -1, | |
213 | }, | |
214 | [C(OP_PREFETCH)] = { | |
215 | [C(RESULT_ACCESS)] = -1, | |
216 | [C(RESULT_MISS)] = -1, | |
217 | }, | |
218 | }, | |
219 | [C(NODE)] = { | |
220 | [C(OP_READ)] = { | |
221 | [C(RESULT_ACCESS)] = 0, | |
222 | [C(RESULT_MISS)] = 0, | |
223 | }, | |
224 | [C(OP_WRITE)] = { | |
225 | [C(RESULT_ACCESS)] = -1, | |
226 | [C(RESULT_MISS)] = -1, | |
227 | }, | |
228 | [C(OP_PREFETCH)] = { | |
229 | [C(RESULT_ACCESS)] = -1, | |
230 | [C(RESULT_MISS)] = -1, | |
231 | }, | |
232 | }, | |
233 | }; | |
234 | ||
f22f54f4 | 235 | /* |
3fe3331b | 236 | * AMD Performance Monitor K7 and later, up to and including Family 16h: |
f22f54f4 | 237 | */ |
0a25556f | 238 | static const u64 amd_perfmon_event_map[PERF_COUNT_HW_MAX] = |
f22f54f4 | 239 | { |
3fe3331b KP |
240 | [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, |
241 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, | |
242 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x077d, | |
243 | [PERF_COUNT_HW_CACHE_MISSES] = 0x077e, | |
244 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2, | |
245 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3, | |
246 | [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00d0, /* "Decoder empty" event */ | |
247 | [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x00d1, /* "Dispatch stalls" event */ | |
248 | }; | |
249 | ||
250 | /* | |
251 | * AMD Performance Monitor Family 17h and later: | |
252 | */ | |
253 | static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] = | |
254 | { | |
255 | [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, | |
256 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, | |
257 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0xff60, | |
25d38728 | 258 | [PERF_COUNT_HW_CACHE_MISSES] = 0x0964, |
3fe3331b KP |
259 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2, |
260 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3, | |
261 | [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x0287, | |
262 | [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x0187, | |
f22f54f4 PZ |
263 | }; |
264 | ||
265 | static u64 amd_pmu_event_map(int hw_event) | |
266 | { | |
3fe3331b KP |
267 | if (boot_cpu_data.x86 >= 0x17) |
268 | return amd_f17h_perfmon_event_map[hw_event]; | |
269 | ||
f22f54f4 PZ |
270 | return amd_perfmon_event_map[hw_event]; |
271 | } | |
272 | ||
4c1fd17a JS |
273 | /* |
274 | * Previously calculated offsets | |
275 | */ | |
276 | static unsigned int event_offsets[X86_PMC_IDX_MAX] __read_mostly; | |
277 | static unsigned int count_offsets[X86_PMC_IDX_MAX] __read_mostly; | |
278 | ||
279 | /* | |
280 | * Legacy CPUs: | |
281 | * 4 counters starting at 0xc0010000 each offset by 1 | |
282 | * | |
283 | * CPUs with core performance counter extensions: | |
284 | * 6 counters starting at 0xc0010200 each offset by 2 | |
285 | */ | |
286 | static inline int amd_pmu_addr_offset(int index, bool eventsel) | |
287 | { | |
0cf5f432 | 288 | int offset; |
4c1fd17a JS |
289 | |
290 | if (!index) | |
291 | return index; | |
292 | ||
293 | if (eventsel) | |
294 | offset = event_offsets[index]; | |
295 | else | |
296 | offset = count_offsets[index]; | |
297 | ||
298 | if (offset) | |
299 | return offset; | |
300 | ||
362f924b | 301 | if (!boot_cpu_has(X86_FEATURE_PERFCTR_CORE)) |
4c1fd17a JS |
302 | offset = index; |
303 | else | |
304 | offset = index << 1; | |
305 | ||
306 | if (eventsel) | |
307 | event_offsets[index] = offset; | |
308 | else | |
309 | count_offsets[index] = offset; | |
310 | ||
311 | return offset; | |
312 | } | |
313 | ||
471af006 KP |
314 | /* |
315 | * AMD64 events are detected based on their event codes. | |
316 | */ | |
317 | static inline unsigned int amd_get_event_code(struct hw_perf_event *hwc) | |
318 | { | |
319 | return ((hwc->config >> 24) & 0x0f00) | (hwc->config & 0x00ff); | |
320 | } | |
321 | ||
322 | static inline bool amd_is_pair_event_code(struct hw_perf_event *hwc) | |
323 | { | |
324 | if (!(x86_pmu.flags & PMU_FL_PAIR)) | |
325 | return false; | |
326 | ||
327 | switch (amd_get_event_code(hwc)) { | |
328 | case 0x003: return true; /* Retired SSE/AVX FLOPs */ | |
329 | default: return false; | |
330 | } | |
331 | } | |
332 | ||
706460a9 SD |
333 | DEFINE_STATIC_CALL_RET0(amd_pmu_branch_hw_config, *x86_pmu.hw_config); |
334 | ||
e259514e JS |
335 | static int amd_core_hw_config(struct perf_event *event) |
336 | { | |
011af857 JR |
337 | if (event->attr.exclude_host && event->attr.exclude_guest) |
338 | /* | |
339 | * When HO == GO == 1 the hardware treats that as GO == HO == 0 | |
340 | * and will count in both modes. We don't want to count in that | |
341 | * case so we emulate no-counting by setting US = OS = 0. | |
342 | */ | |
343 | event->hw.config &= ~(ARCH_PERFMON_EVENTSEL_USR | | |
344 | ARCH_PERFMON_EVENTSEL_OS); | |
345 | else if (event->attr.exclude_host) | |
9f19010a | 346 | event->hw.config |= AMD64_EVENTSEL_GUESTONLY; |
011af857 | 347 | else if (event->attr.exclude_guest) |
9f19010a | 348 | event->hw.config |= AMD64_EVENTSEL_HOSTONLY; |
011af857 | 349 | |
57388912 KP |
350 | if ((x86_pmu.flags & PMU_FL_PAIR) && amd_is_pair_event_code(&event->hw)) |
351 | event->hw.flags |= PERF_X86_EVENT_PAIR; | |
352 | ||
b40d0156 | 353 | if (has_branch_stack(event)) |
706460a9 | 354 | return static_call(amd_pmu_branch_hw_config)(event); |
ada54345 | 355 | |
b40d0156 | 356 | return 0; |
e259514e | 357 | } |
b4cdc5c2 | 358 | |
f22f54f4 PZ |
359 | static inline int amd_is_nb_event(struct hw_perf_event *hwc) |
360 | { | |
361 | return (hwc->config & 0xe0) == 0xe0; | |
362 | } | |
363 | ||
b38b24ea PZ |
364 | static inline int amd_has_nb(struct cpu_hw_events *cpuc) |
365 | { | |
366 | struct amd_nb *nb = cpuc->amd_nb; | |
367 | ||
368 | return nb && nb->nb_id != -1; | |
369 | } | |
370 | ||
e259514e JS |
371 | static int amd_pmu_hw_config(struct perf_event *event) |
372 | { | |
373 | int ret; | |
374 | ||
375 | /* pass precise event sampling to ibs: */ | |
376 | if (event->attr.precise_ip && get_ibs_caps()) | |
377 | return -ENOENT; | |
378 | ||
ada54345 | 379 | if (has_branch_stack(event) && !x86_pmu.lbr_nr) |
e259514e JS |
380 | return -EOPNOTSUPP; |
381 | ||
382 | ret = x86_pmu_hw_config(event); | |
383 | if (ret) | |
384 | return ret; | |
385 | ||
386 | if (event->attr.type == PERF_TYPE_RAW) | |
387 | event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK; | |
388 | ||
e259514e JS |
389 | return amd_core_hw_config(event); |
390 | } | |
391 | ||
4dd4c2ae RR |
392 | static void __amd_put_nb_event_constraints(struct cpu_hw_events *cpuc, |
393 | struct perf_event *event) | |
f22f54f4 | 394 | { |
f22f54f4 PZ |
395 | struct amd_nb *nb = cpuc->amd_nb; |
396 | int i; | |
397 | ||
f22f54f4 PZ |
398 | /* |
399 | * need to scan whole list because event may not have | |
400 | * been assigned during scheduling | |
401 | * | |
402 | * no race condition possible because event can only | |
403 | * be removed on one CPU at a time AND PMU is disabled | |
404 | * when we come here | |
405 | */ | |
948b1bb8 | 406 | for (i = 0; i < x86_pmu.num_counters; i++) { |
5f09fc68 | 407 | if (cmpxchg(nb->owners + i, event, NULL) == event) |
f22f54f4 | 408 | break; |
f22f54f4 PZ |
409 | } |
410 | } | |
411 | ||
412 | /* | |
413 | * AMD64 NorthBridge events need special treatment because | |
414 | * counter access needs to be synchronized across all cores | |
415 | * of a package. Refer to BKDG section 3.12 | |
416 | * | |
417 | * NB events are events measuring L3 cache, Hypertransport | |
418 | * traffic. They are identified by an event code >= 0xe00. | |
419 | * They measure events on the NorthBride which is shared | |
420 | * by all cores on a package. NB events are counted on a | |
421 | * shared set of counters. When a NB event is programmed | |
422 | * in a counter, the data actually comes from a shared | |
423 | * counter. Thus, access to those counters needs to be | |
424 | * synchronized. | |
425 | * | |
426 | * We implement the synchronization such that no two cores | |
427 | * can be measuring NB events using the same counters. Thus, | |
428 | * we maintain a per-NB allocation table. The available slot | |
429 | * is propagated using the event_constraint structure. | |
430 | * | |
431 | * We provide only one choice for each NB event based on | |
432 | * the fact that only NB events have restrictions. Consequently, | |
433 | * if a counter is available, there is a guarantee the NB event | |
434 | * will be assigned to it. If no slot is available, an empty | |
435 | * constraint is returned and scheduling will eventually fail | |
436 | * for this event. | |
437 | * | |
438 | * Note that all cores attached the same NB compete for the same | |
439 | * counters to host NB events, this is why we use atomic ops. Some | |
440 | * multi-chip CPUs may have more than one NB. | |
441 | * | |
442 | * Given that resources are allocated (cmpxchg), they must be | |
443 | * eventually freed for others to use. This is accomplished by | |
4dd4c2ae | 444 | * calling __amd_put_nb_event_constraints() |
f22f54f4 PZ |
445 | * |
446 | * Non NB events are not impacted by this restriction. | |
447 | */ | |
448 | static struct event_constraint * | |
4dd4c2ae RR |
449 | __amd_get_nb_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event, |
450 | struct event_constraint *c) | |
f22f54f4 PZ |
451 | { |
452 | struct hw_perf_event *hwc = &event->hw; | |
453 | struct amd_nb *nb = cpuc->amd_nb; | |
2c53c3dd RR |
454 | struct perf_event *old; |
455 | int idx, new = -1; | |
f22f54f4 | 456 | |
e259514e JS |
457 | if (!c) |
458 | c = &unconstrained; | |
459 | ||
460 | if (cpuc->is_fake) | |
461 | return c; | |
462 | ||
f22f54f4 PZ |
463 | /* |
464 | * detect if already present, if so reuse | |
465 | * | |
466 | * cannot merge with actual allocation | |
467 | * because of possible holes | |
468 | * | |
469 | * event can already be present yet not assigned (in hwc->idx) | |
470 | * because of successive calls to x86_schedule_events() from | |
471 | * hw_perf_group_sched_in() without hw_perf_enable() | |
472 | */ | |
4dd4c2ae | 473 | for_each_set_bit(idx, c->idxmsk, x86_pmu.num_counters) { |
2c53c3dd RR |
474 | if (new == -1 || hwc->idx == idx) |
475 | /* assign free slot, prefer hwc->idx */ | |
476 | old = cmpxchg(nb->owners + idx, NULL, event); | |
477 | else if (nb->owners[idx] == event) | |
478 | /* event already present */ | |
479 | old = event; | |
480 | else | |
481 | continue; | |
482 | ||
483 | if (old && old != event) | |
484 | continue; | |
485 | ||
486 | /* reassign to this slot */ | |
487 | if (new != -1) | |
488 | cmpxchg(nb->owners + new, event, NULL); | |
489 | new = idx; | |
f22f54f4 PZ |
490 | |
491 | /* already present, reuse */ | |
2c53c3dd | 492 | if (old == event) |
f22f54f4 | 493 | break; |
2c53c3dd RR |
494 | } |
495 | ||
496 | if (new == -1) | |
497 | return &emptyconstraint; | |
498 | ||
499 | return &nb->event_constraints[new]; | |
f22f54f4 PZ |
500 | } |
501 | ||
c079c791 | 502 | static struct amd_nb *amd_alloc_nb(int cpu) |
f22f54f4 PZ |
503 | { |
504 | struct amd_nb *nb; | |
505 | int i; | |
506 | ||
7bfb7e6b | 507 | nb = kzalloc_node(sizeof(struct amd_nb), GFP_KERNEL, cpu_to_node(cpu)); |
f22f54f4 PZ |
508 | if (!nb) |
509 | return NULL; | |
510 | ||
c079c791 | 511 | nb->nb_id = -1; |
f22f54f4 PZ |
512 | |
513 | /* | |
514 | * initialize all possible NB constraints | |
515 | */ | |
948b1bb8 | 516 | for (i = 0; i < x86_pmu.num_counters; i++) { |
34538ee7 | 517 | __set_bit(i, nb->event_constraints[i].idxmsk); |
f22f54f4 PZ |
518 | nb->event_constraints[i].weight = 1; |
519 | } | |
520 | return nb; | |
521 | } | |
522 | ||
706460a9 SD |
523 | typedef void (amd_pmu_branch_reset_t)(void); |
524 | DEFINE_STATIC_CALL_NULL(amd_pmu_branch_reset, amd_pmu_branch_reset_t); | |
525 | ||
21d59e3e SD |
526 | static void amd_pmu_cpu_reset(int cpu) |
527 | { | |
706460a9 SD |
528 | if (x86_pmu.lbr_nr) |
529 | static_call(amd_pmu_branch_reset)(); | |
530 | ||
21d59e3e SD |
531 | if (x86_pmu.version < 2) |
532 | return; | |
533 | ||
534 | /* Clear enable bits i.e. PerfCntrGlobalCtl.PerfCntrEn */ | |
535 | wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, 0); | |
536 | ||
537 | /* Clear overflow bits i.e. PerfCntrGLobalStatus.PerfCntrOvfl */ | |
538 | wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, amd_pmu_global_cntr_mask); | |
539 | } | |
540 | ||
b38b24ea PZ |
541 | static int amd_pmu_cpu_prepare(int cpu) |
542 | { | |
543 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); | |
544 | ||
f4f925da SD |
545 | cpuc->lbr_sel = kzalloc_node(sizeof(struct er_account), GFP_KERNEL, |
546 | cpu_to_node(cpu)); | |
547 | if (!cpuc->lbr_sel) | |
548 | return -ENOMEM; | |
549 | ||
b38b24ea PZ |
550 | WARN_ON_ONCE(cpuc->amd_nb); |
551 | ||
32b62f44 | 552 | if (!x86_pmu.amd_nb_constraints) |
95ca792c | 553 | return 0; |
b38b24ea | 554 | |
c079c791 | 555 | cpuc->amd_nb = amd_alloc_nb(cpu); |
f4f925da SD |
556 | if (cpuc->amd_nb) |
557 | return 0; | |
b38b24ea | 558 | |
f4f925da SD |
559 | kfree(cpuc->lbr_sel); |
560 | cpuc->lbr_sel = NULL; | |
561 | ||
562 | return -ENOMEM; | |
b38b24ea PZ |
563 | } |
564 | ||
565 | static void amd_pmu_cpu_starting(int cpu) | |
f22f54f4 | 566 | { |
b38b24ea | 567 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); |
90413464 | 568 | void **onln = &cpuc->kfree_on_online[X86_PERF_KFREE_SHARED]; |
b38b24ea | 569 | struct amd_nb *nb; |
f22f54f4 PZ |
570 | int i, nb_id; |
571 | ||
9f19010a | 572 | cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY; |
1018faa6 | 573 | |
32b62f44 | 574 | if (!x86_pmu.amd_nb_constraints) |
f22f54f4 PZ |
575 | return; |
576 | ||
db970bd2 | 577 | nb_id = topology_die_id(cpu); |
b38b24ea | 578 | WARN_ON_ONCE(nb_id == BAD_APICID); |
f22f54f4 | 579 | |
f22f54f4 | 580 | for_each_online_cpu(i) { |
b38b24ea PZ |
581 | nb = per_cpu(cpu_hw_events, i).amd_nb; |
582 | if (WARN_ON_ONCE(!nb)) | |
f22f54f4 | 583 | continue; |
f22f54f4 | 584 | |
b38b24ea | 585 | if (nb->nb_id == nb_id) { |
90413464 | 586 | *onln = cpuc->amd_nb; |
b38b24ea PZ |
587 | cpuc->amd_nb = nb; |
588 | break; | |
589 | } | |
f22f54f4 | 590 | } |
b38b24ea PZ |
591 | |
592 | cpuc->amd_nb->nb_id = nb_id; | |
593 | cpuc->amd_nb->refcnt++; | |
ada54345 | 594 | |
21d59e3e | 595 | amd_pmu_cpu_reset(cpu); |
f22f54f4 PZ |
596 | } |
597 | ||
b38b24ea | 598 | static void amd_pmu_cpu_dead(int cpu) |
f22f54f4 | 599 | { |
f4f925da SD |
600 | struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu); |
601 | ||
602 | kfree(cpuhw->lbr_sel); | |
603 | cpuhw->lbr_sel = NULL; | |
f22f54f4 | 604 | |
32b62f44 | 605 | if (!x86_pmu.amd_nb_constraints) |
f22f54f4 PZ |
606 | return; |
607 | ||
a90110c6 | 608 | if (cpuhw->amd_nb) { |
b38b24ea PZ |
609 | struct amd_nb *nb = cpuhw->amd_nb; |
610 | ||
611 | if (nb->nb_id == -1 || --nb->refcnt == 0) | |
612 | kfree(nb); | |
f22f54f4 | 613 | |
a90110c6 RW |
614 | cpuhw->amd_nb = NULL; |
615 | } | |
21d59e3e SD |
616 | |
617 | amd_pmu_cpu_reset(cpu); | |
f22f54f4 PZ |
618 | } |
619 | ||
9622e67e SD |
620 | static inline void amd_pmu_set_global_ctl(u64 ctl) |
621 | { | |
622 | wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, ctl); | |
623 | } | |
624 | ||
7685665c SD |
625 | static inline u64 amd_pmu_get_global_status(void) |
626 | { | |
627 | u64 status; | |
628 | ||
629 | /* PerfCntrGlobalStatus is read-only */ | |
630 | rdmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS, status); | |
631 | ||
ca5b7c0d | 632 | return status; |
7685665c SD |
633 | } |
634 | ||
635 | static inline void amd_pmu_ack_global_status(u64 status) | |
636 | { | |
637 | /* | |
638 | * PerfCntrGlobalStatus is read-only but an overflow acknowledgment | |
639 | * mechanism exists; writing 1 to a bit in PerfCntrGlobalStatusClr | |
640 | * clears the same bit in PerfCntrGlobalStatus | |
641 | */ | |
642 | ||
7685665c SD |
643 | wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, status); |
644 | } | |
645 | ||
646 | static bool amd_pmu_test_overflow_topbit(int idx) | |
647 | { | |
648 | u64 counter; | |
649 | ||
650 | rdmsrl(x86_pmu_event_addr(idx), counter); | |
651 | ||
652 | return !(counter & BIT_ULL(x86_pmu.cntval_bits - 1)); | |
653 | } | |
654 | ||
655 | static bool amd_pmu_test_overflow_status(int idx) | |
656 | { | |
657 | return amd_pmu_get_global_status() & BIT_ULL(idx); | |
658 | } | |
659 | ||
660 | DEFINE_STATIC_CALL(amd_pmu_test_overflow, amd_pmu_test_overflow_topbit); | |
661 | ||
914123fa LT |
662 | /* |
663 | * When a PMC counter overflows, an NMI is used to process the event and | |
664 | * reset the counter. NMI latency can result in the counter being updated | |
665 | * before the NMI can run, which can result in what appear to be spurious | |
666 | * NMIs. This function is intended to wait for the NMI to run and reset | |
667 | * the counter to avoid possible unhandled NMI messages. | |
668 | */ | |
669 | #define OVERFLOW_WAIT_COUNT 50 | |
670 | ||
671 | static void amd_pmu_wait_on_overflow(int idx) | |
672 | { | |
673 | unsigned int i; | |
914123fa LT |
674 | |
675 | /* | |
676 | * Wait for the counter to be reset if it has overflowed. This loop | |
677 | * should exit very, very quickly, but just in case, don't wait | |
678 | * forever... | |
679 | */ | |
680 | for (i = 0; i < OVERFLOW_WAIT_COUNT; i++) { | |
7685665c | 681 | if (!static_call(amd_pmu_test_overflow)(idx)) |
914123fa LT |
682 | break; |
683 | ||
684 | /* Might be in IRQ context, so can't sleep */ | |
685 | udelay(1); | |
686 | } | |
687 | } | |
688 | ||
9622e67e | 689 | static void amd_pmu_check_overflow(void) |
914123fa LT |
690 | { |
691 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); | |
692 | int idx; | |
693 | ||
914123fa LT |
694 | /* |
695 | * This shouldn't be called from NMI context, but add a safeguard here | |
696 | * to return, since if we're in NMI context we can't wait for an NMI | |
697 | * to reset an overflowed counter value. | |
698 | */ | |
699 | if (in_nmi()) | |
700 | return; | |
701 | ||
702 | /* | |
703 | * Check each counter for overflow and wait for it to be reset by the | |
704 | * NMI if it has overflowed. This relies on the fact that all active | |
d9f6e12f | 705 | * counters are always enabled when this function is called and |
914123fa LT |
706 | * ARCH_PERFMON_EVENTSEL_INT is always set. |
707 | */ | |
708 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { | |
709 | if (!test_bit(idx, cpuc->active_mask)) | |
710 | continue; | |
711 | ||
712 | amd_pmu_wait_on_overflow(idx); | |
713 | } | |
714 | } | |
715 | ||
ada54345 SE |
716 | static void amd_pmu_enable_event(struct perf_event *event) |
717 | { | |
718 | x86_pmu_enable_event(event); | |
719 | } | |
720 | ||
721 | static void amd_pmu_enable_all(int added) | |
722 | { | |
723 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); | |
ada54345 SE |
724 | int idx; |
725 | ||
726 | amd_brs_enable_all(); | |
727 | ||
728 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { | |
ada54345 SE |
729 | /* only activate events which are marked as active */ |
730 | if (!test_bit(idx, cpuc->active_mask)) | |
731 | continue; | |
732 | ||
733 | amd_pmu_enable_event(cpuc->events[idx]); | |
734 | } | |
735 | } | |
736 | ||
9622e67e SD |
737 | static void amd_pmu_v2_enable_event(struct perf_event *event) |
738 | { | |
739 | struct hw_perf_event *hwc = &event->hw; | |
740 | ||
741 | /* | |
742 | * Testing cpu_hw_events.enabled should be skipped in this case unlike | |
743 | * in x86_pmu_enable_event(). | |
744 | * | |
745 | * Since cpu_hw_events.enabled is set only after returning from | |
746 | * x86_pmu_start(), the PMCs must be programmed and kept ready. | |
747 | * Counting starts only after x86_pmu_enable_all() is called. | |
748 | */ | |
749 | __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE); | |
750 | } | |
751 | ||
ca5b7c0d | 752 | static __always_inline void amd_pmu_core_enable_all(void) |
9622e67e SD |
753 | { |
754 | amd_pmu_set_global_ctl(amd_pmu_global_cntr_mask); | |
755 | } | |
756 | ||
ca5b7c0d SD |
757 | static void amd_pmu_v2_enable_all(int added) |
758 | { | |
759 | amd_pmu_lbr_enable_all(); | |
760 | amd_pmu_core_enable_all(); | |
761 | } | |
762 | ||
3966c3fe LT |
763 | static void amd_pmu_disable_event(struct perf_event *event) |
764 | { | |
765 | x86_pmu_disable_event(event); | |
766 | ||
767 | /* | |
768 | * This can be called from NMI context (via x86_pmu_stop). The counter | |
769 | * may have overflowed, but either way, we'll never see it get reset | |
770 | * by the NMI if we're already in the NMI. And the NMI latency support | |
771 | * below will take care of any pending NMI that might have been | |
772 | * generated by the overflow. | |
773 | */ | |
774 | if (in_nmi()) | |
775 | return; | |
776 | ||
777 | amd_pmu_wait_on_overflow(event->hw.idx); | |
778 | } | |
779 | ||
9622e67e SD |
780 | static void amd_pmu_disable_all(void) |
781 | { | |
782 | amd_brs_disable_all(); | |
783 | x86_pmu_disable_all(); | |
784 | amd_pmu_check_overflow(); | |
785 | } | |
786 | ||
ca5b7c0d | 787 | static __always_inline void amd_pmu_core_disable_all(void) |
9622e67e | 788 | { |
9622e67e | 789 | amd_pmu_set_global_ctl(0); |
ca5b7c0d SD |
790 | } |
791 | ||
792 | static void amd_pmu_v2_disable_all(void) | |
793 | { | |
794 | amd_pmu_core_disable_all(); | |
795 | amd_pmu_lbr_disable_all(); | |
9622e67e SD |
796 | amd_pmu_check_overflow(); |
797 | } | |
798 | ||
706460a9 SD |
799 | DEFINE_STATIC_CALL_NULL(amd_pmu_branch_add, *x86_pmu.add); |
800 | ||
ada54345 SE |
801 | static void amd_pmu_add_event(struct perf_event *event) |
802 | { | |
803 | if (needs_branch_stack(event)) | |
706460a9 | 804 | static_call(amd_pmu_branch_add)(event); |
ada54345 SE |
805 | } |
806 | ||
706460a9 SD |
807 | DEFINE_STATIC_CALL_NULL(amd_pmu_branch_del, *x86_pmu.del); |
808 | ||
ada54345 SE |
809 | static void amd_pmu_del_event(struct perf_event *event) |
810 | { | |
811 | if (needs_branch_stack(event)) | |
706460a9 | 812 | static_call(amd_pmu_branch_del)(event); |
ada54345 SE |
813 | } |
814 | ||
6d3edaae LT |
815 | /* |
816 | * Because of NMI latency, if multiple PMC counters are active or other sources | |
817 | * of NMIs are received, the perf NMI handler can handle one or more overflowed | |
818 | * PMC counters outside of the NMI associated with the PMC overflow. If the NMI | |
819 | * doesn't arrive at the LAPIC in time to become a pending NMI, then the kernel | |
820 | * back-to-back NMI support won't be active. This PMC handler needs to take into | |
821 | * account that this can occur, otherwise this could result in unknown NMI | |
822 | * messages being issued. Examples of this is PMC overflow while in the NMI | |
823 | * handler when multiple PMCs are active or PMC overflow while handling some | |
824 | * other source of an NMI. | |
825 | * | |
df4d2973 TL |
826 | * Attempt to mitigate this by creating an NMI window in which un-handled NMIs |
827 | * received during this window will be claimed. This prevents extending the | |
828 | * window past when it is possible that latent NMIs should be received. The | |
829 | * per-CPU perf_nmi_tstamp will be set to the window end time whenever perf has | |
830 | * handled a counter. When an un-handled NMI is received, it will be claimed | |
831 | * only if arriving within that window. | |
6d3edaae | 832 | */ |
7685665c SD |
833 | static inline int amd_pmu_adjust_nmi_window(int handled) |
834 | { | |
835 | /* | |
836 | * If a counter was handled, record a timestamp such that un-handled | |
837 | * NMIs will be claimed if arriving within that window. | |
838 | */ | |
839 | if (handled) { | |
840 | this_cpu_write(perf_nmi_tstamp, jiffies + perf_nmi_window); | |
841 | ||
842 | return handled; | |
843 | } | |
844 | ||
845 | if (time_after(jiffies, this_cpu_read(perf_nmi_tstamp))) | |
846 | return NMI_DONE; | |
847 | ||
848 | return NMI_HANDLED; | |
849 | } | |
850 | ||
6d3edaae LT |
851 | static int amd_pmu_handle_irq(struct pt_regs *regs) |
852 | { | |
ada54345 | 853 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
8f05c1ff | 854 | int handled; |
ada54345 SE |
855 | int pmu_enabled; |
856 | ||
857 | /* | |
858 | * Save the PMU state. | |
859 | * It needs to be restored when leaving the handler. | |
860 | */ | |
861 | pmu_enabled = cpuc->enabled; | |
862 | cpuc->enabled = 0; | |
863 | ||
864 | /* stop everything (includes BRS) */ | |
865 | amd_pmu_disable_all(); | |
866 | ||
867 | /* Drain BRS is in use (could be inactive) */ | |
868 | if (cpuc->lbr_users) | |
869 | amd_brs_drain(); | |
6d3edaae LT |
870 | |
871 | /* Process any counter overflows */ | |
872 | handled = x86_pmu_handle_irq(regs); | |
873 | ||
ada54345 SE |
874 | cpuc->enabled = pmu_enabled; |
875 | if (pmu_enabled) | |
876 | amd_pmu_enable_all(0); | |
877 | ||
7685665c SD |
878 | return amd_pmu_adjust_nmi_window(handled); |
879 | } | |
880 | ||
881 | static int amd_pmu_v2_handle_irq(struct pt_regs *regs) | |
882 | { | |
883 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); | |
884 | struct perf_sample_data data; | |
885 | struct hw_perf_event *hwc; | |
886 | struct perf_event *event; | |
887 | int handled = 0, idx; | |
888 | u64 status, mask; | |
889 | bool pmu_enabled; | |
890 | ||
6d3edaae | 891 | /* |
7685665c SD |
892 | * Save the PMU state as it needs to be restored when leaving the |
893 | * handler | |
6d3edaae | 894 | */ |
7685665c SD |
895 | pmu_enabled = cpuc->enabled; |
896 | cpuc->enabled = 0; | |
6d3edaae | 897 | |
ca5b7c0d SD |
898 | /* Stop counting but do not disable LBR */ |
899 | amd_pmu_core_disable_all(); | |
7685665c SD |
900 | |
901 | status = amd_pmu_get_global_status(); | |
902 | ||
903 | /* Check if any overflows are pending */ | |
904 | if (!status) | |
905 | goto done; | |
906 | ||
ca5b7c0d SD |
907 | /* Read branch records before unfreezing */ |
908 | if (status & GLOBAL_STATUS_LBRS_FROZEN) { | |
909 | amd_pmu_lbr_read(); | |
910 | status &= ~GLOBAL_STATUS_LBRS_FROZEN; | |
911 | } | |
912 | ||
7685665c SD |
913 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
914 | if (!test_bit(idx, cpuc->active_mask)) | |
915 | continue; | |
916 | ||
917 | event = cpuc->events[idx]; | |
918 | hwc = &event->hw; | |
919 | x86_perf_event_update(event); | |
920 | mask = BIT_ULL(idx); | |
921 | ||
922 | if (!(status & mask)) | |
923 | continue; | |
924 | ||
925 | /* Event overflow */ | |
926 | handled++; | |
927 | perf_sample_data_init(&data, 0, hwc->last_period); | |
928 | ||
929 | if (!x86_perf_event_set_period(event)) | |
930 | continue; | |
931 | ||
a9a931e2 | 932 | if (has_branch_stack(event)) { |
ca5b7c0d | 933 | data.br_stack = &cpuc->lbr_stack; |
a9a931e2 KL |
934 | data.sample_flags |= PERF_SAMPLE_BRANCH_STACK; |
935 | } | |
ca5b7c0d | 936 | |
7685665c SD |
937 | if (perf_event_overflow(event, &data, regs)) |
938 | x86_pmu_stop(event, 0); | |
939 | ||
940 | status &= ~mask; | |
6d3edaae LT |
941 | } |
942 | ||
7685665c SD |
943 | /* |
944 | * It should never be the case that some overflows are not handled as | |
945 | * the corresponding PMCs are expected to be inactive according to the | |
946 | * active_mask | |
947 | */ | |
948 | WARN_ON(status > 0); | |
6d3edaae | 949 | |
ca5b7c0d | 950 | /* Clear overflow and freeze bits */ |
7685665c SD |
951 | amd_pmu_ack_global_status(~status); |
952 | ||
953 | /* | |
954 | * Unmasking the LVTPC is not required as the Mask (M) bit of the LVT | |
955 | * PMI entry is not set by the local APIC when a PMC overflow occurs | |
956 | */ | |
957 | inc_irq_stat(apic_perf_irqs); | |
958 | ||
959 | done: | |
960 | cpuc->enabled = pmu_enabled; | |
961 | ||
962 | /* Resume counting only if PMU is active */ | |
963 | if (pmu_enabled) | |
ca5b7c0d | 964 | amd_pmu_core_enable_all(); |
7685665c SD |
965 | |
966 | return amd_pmu_adjust_nmi_window(handled); | |
6d3edaae LT |
967 | } |
968 | ||
4dd4c2ae | 969 | static struct event_constraint * |
79cba822 SE |
970 | amd_get_event_constraints(struct cpu_hw_events *cpuc, int idx, |
971 | struct perf_event *event) | |
4dd4c2ae RR |
972 | { |
973 | /* | |
974 | * if not NB event or no NB, then no constraints | |
975 | */ | |
976 | if (!(amd_has_nb(cpuc) && amd_is_nb_event(&event->hw))) | |
977 | return &unconstrained; | |
978 | ||
0cf5f432 | 979 | return __amd_get_nb_event_constraints(cpuc, event, NULL); |
4dd4c2ae RR |
980 | } |
981 | ||
982 | static void amd_put_event_constraints(struct cpu_hw_events *cpuc, | |
983 | struct perf_event *event) | |
984 | { | |
985 | if (amd_has_nb(cpuc) && amd_is_nb_event(&event->hw)) | |
986 | __amd_put_nb_event_constraints(cpuc, event); | |
987 | } | |
988 | ||
641cc938 JO |
989 | PMU_FORMAT_ATTR(event, "config:0-7,32-35"); |
990 | PMU_FORMAT_ATTR(umask, "config:8-15" ); | |
991 | PMU_FORMAT_ATTR(edge, "config:18" ); | |
992 | PMU_FORMAT_ATTR(inv, "config:23" ); | |
993 | PMU_FORMAT_ATTR(cmask, "config:24-31" ); | |
994 | ||
995 | static struct attribute *amd_format_attr[] = { | |
996 | &format_attr_event.attr, | |
997 | &format_attr_umask.attr, | |
998 | &format_attr_edge.attr, | |
999 | &format_attr_inv.attr, | |
1000 | &format_attr_cmask.attr, | |
1001 | NULL, | |
1002 | }; | |
1003 | ||
4979d272 RR |
1004 | /* AMD Family 15h */ |
1005 | ||
1006 | #define AMD_EVENT_TYPE_MASK 0x000000F0ULL | |
1007 | ||
1008 | #define AMD_EVENT_FP 0x00000000ULL ... 0x00000010ULL | |
1009 | #define AMD_EVENT_LS 0x00000020ULL ... 0x00000030ULL | |
1010 | #define AMD_EVENT_DC 0x00000040ULL ... 0x00000050ULL | |
1011 | #define AMD_EVENT_CU 0x00000060ULL ... 0x00000070ULL | |
1012 | #define AMD_EVENT_IC_DE 0x00000080ULL ... 0x00000090ULL | |
1013 | #define AMD_EVENT_EX_LS 0x000000C0ULL | |
1014 | #define AMD_EVENT_DE 0x000000D0ULL | |
1015 | #define AMD_EVENT_NB 0x000000E0ULL ... 0x000000F0ULL | |
1016 | ||
1017 | /* | |
1018 | * AMD family 15h event code/PMC mappings: | |
1019 | * | |
1020 | * type = event_code & 0x0F0: | |
1021 | * | |
1022 | * 0x000 FP PERF_CTL[5:3] | |
1023 | * 0x010 FP PERF_CTL[5:3] | |
1024 | * 0x020 LS PERF_CTL[5:0] | |
1025 | * 0x030 LS PERF_CTL[5:0] | |
1026 | * 0x040 DC PERF_CTL[5:0] | |
1027 | * 0x050 DC PERF_CTL[5:0] | |
1028 | * 0x060 CU PERF_CTL[2:0] | |
1029 | * 0x070 CU PERF_CTL[2:0] | |
1030 | * 0x080 IC/DE PERF_CTL[2:0] | |
1031 | * 0x090 IC/DE PERF_CTL[2:0] | |
1032 | * 0x0A0 --- | |
1033 | * 0x0B0 --- | |
1034 | * 0x0C0 EX/LS PERF_CTL[5:0] | |
1035 | * 0x0D0 DE PERF_CTL[2:0] | |
1036 | * 0x0E0 NB NB_PERF_CTL[3:0] | |
1037 | * 0x0F0 NB NB_PERF_CTL[3:0] | |
1038 | * | |
1039 | * Exceptions: | |
1040 | * | |
855357a2 | 1041 | * 0x000 FP PERF_CTL[3], PERF_CTL[5:3] (*) |
4979d272 | 1042 | * 0x003 FP PERF_CTL[3] |
855357a2 | 1043 | * 0x004 FP PERF_CTL[3], PERF_CTL[5:3] (*) |
4979d272 RR |
1044 | * 0x00B FP PERF_CTL[3] |
1045 | * 0x00D FP PERF_CTL[3] | |
1046 | * 0x023 DE PERF_CTL[2:0] | |
1047 | * 0x02D LS PERF_CTL[3] | |
1048 | * 0x02E LS PERF_CTL[3,0] | |
5bcdf5e4 | 1049 | * 0x031 LS PERF_CTL[2:0] (**) |
4979d272 RR |
1050 | * 0x043 CU PERF_CTL[2:0] |
1051 | * 0x045 CU PERF_CTL[2:0] | |
1052 | * 0x046 CU PERF_CTL[2:0] | |
1053 | * 0x054 CU PERF_CTL[2:0] | |
1054 | * 0x055 CU PERF_CTL[2:0] | |
1055 | * 0x08F IC PERF_CTL[0] | |
1056 | * 0x187 DE PERF_CTL[0] | |
1057 | * 0x188 DE PERF_CTL[0] | |
1058 | * 0x0DB EX PERF_CTL[5:0] | |
1059 | * 0x0DC LS PERF_CTL[5:0] | |
1060 | * 0x0DD LS PERF_CTL[5:0] | |
1061 | * 0x0DE LS PERF_CTL[5:0] | |
1062 | * 0x0DF LS PERF_CTL[5:0] | |
5bcdf5e4 | 1063 | * 0x1C0 EX PERF_CTL[5:3] |
4979d272 RR |
1064 | * 0x1D6 EX PERF_CTL[5:0] |
1065 | * 0x1D8 EX PERF_CTL[5:0] | |
855357a2 | 1066 | * |
5bcdf5e4 RR |
1067 | * (*) depending on the umask all FPU counters may be used |
1068 | * (**) only one unitmask enabled at a time | |
4979d272 RR |
1069 | */ |
1070 | ||
1071 | static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0); | |
1072 | static struct event_constraint amd_f15_PMC20 = EVENT_CONSTRAINT(0, 0x07, 0); | |
1073 | static struct event_constraint amd_f15_PMC3 = EVENT_CONSTRAINT(0, 0x08, 0); | |
bc1738f6 | 1074 | static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0); |
4979d272 RR |
1075 | static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0); |
1076 | static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0); | |
1077 | ||
1078 | static struct event_constraint * | |
79cba822 SE |
1079 | amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, int idx, |
1080 | struct perf_event *event) | |
4979d272 | 1081 | { |
855357a2 RR |
1082 | struct hw_perf_event *hwc = &event->hw; |
1083 | unsigned int event_code = amd_get_event_code(hwc); | |
4979d272 RR |
1084 | |
1085 | switch (event_code & AMD_EVENT_TYPE_MASK) { | |
1086 | case AMD_EVENT_FP: | |
1087 | switch (event_code) { | |
855357a2 RR |
1088 | case 0x000: |
1089 | if (!(hwc->config & 0x0000F000ULL)) | |
1090 | break; | |
1091 | if (!(hwc->config & 0x00000F00ULL)) | |
1092 | break; | |
1093 | return &amd_f15_PMC3; | |
1094 | case 0x004: | |
1095 | if (hweight_long(hwc->config & ARCH_PERFMON_EVENTSEL_UMASK) <= 1) | |
1096 | break; | |
1097 | return &amd_f15_PMC3; | |
4979d272 RR |
1098 | case 0x003: |
1099 | case 0x00B: | |
1100 | case 0x00D: | |
1101 | return &amd_f15_PMC3; | |
4979d272 | 1102 | } |
855357a2 | 1103 | return &amd_f15_PMC53; |
4979d272 RR |
1104 | case AMD_EVENT_LS: |
1105 | case AMD_EVENT_DC: | |
1106 | case AMD_EVENT_EX_LS: | |
1107 | switch (event_code) { | |
1108 | case 0x023: | |
1109 | case 0x043: | |
1110 | case 0x045: | |
1111 | case 0x046: | |
1112 | case 0x054: | |
1113 | case 0x055: | |
1114 | return &amd_f15_PMC20; | |
1115 | case 0x02D: | |
1116 | return &amd_f15_PMC3; | |
1117 | case 0x02E: | |
1118 | return &amd_f15_PMC30; | |
5bcdf5e4 RR |
1119 | case 0x031: |
1120 | if (hweight_long(hwc->config & ARCH_PERFMON_EVENTSEL_UMASK) <= 1) | |
1121 | return &amd_f15_PMC20; | |
1122 | return &emptyconstraint; | |
1123 | case 0x1C0: | |
1124 | return &amd_f15_PMC53; | |
4979d272 RR |
1125 | default: |
1126 | return &amd_f15_PMC50; | |
1127 | } | |
1128 | case AMD_EVENT_CU: | |
1129 | case AMD_EVENT_IC_DE: | |
1130 | case AMD_EVENT_DE: | |
1131 | switch (event_code) { | |
1132 | case 0x08F: | |
1133 | case 0x187: | |
1134 | case 0x188: | |
1135 | return &amd_f15_PMC0; | |
1136 | case 0x0DB ... 0x0DF: | |
1137 | case 0x1D6: | |
1138 | case 0x1D8: | |
1139 | return &amd_f15_PMC50; | |
1140 | default: | |
1141 | return &amd_f15_PMC20; | |
1142 | } | |
1143 | case AMD_EVENT_NB: | |
940b2f2f | 1144 | /* moved to uncore.c */ |
0cf5f432 | 1145 | return &emptyconstraint; |
4979d272 RR |
1146 | default: |
1147 | return &emptyconstraint; | |
1148 | } | |
1149 | } | |
1150 | ||
471af006 KP |
1151 | static struct event_constraint pair_constraint; |
1152 | ||
1153 | static struct event_constraint * | |
1154 | amd_get_event_constraints_f17h(struct cpu_hw_events *cpuc, int idx, | |
1155 | struct perf_event *event) | |
1156 | { | |
1157 | struct hw_perf_event *hwc = &event->hw; | |
1158 | ||
1159 | if (amd_is_pair_event_code(hwc)) | |
1160 | return &pair_constraint; | |
1161 | ||
1162 | return &unconstrained; | |
1163 | } | |
1164 | ||
57388912 KP |
1165 | static void amd_put_event_constraints_f17h(struct cpu_hw_events *cpuc, |
1166 | struct perf_event *event) | |
1167 | { | |
1168 | struct hw_perf_event *hwc = &event->hw; | |
1169 | ||
1170 | if (is_counter_pair(hwc)) | |
1171 | --cpuc->n_pair; | |
1172 | } | |
1173 | ||
ada54345 SE |
1174 | /* |
1175 | * Because of the way BRS operates with an inactive and active phases, and | |
1176 | * the link to one counter, it is not possible to have two events using BRS | |
1177 | * scheduled at the same time. There would be an issue with enforcing the | |
1178 | * period of each one and given that the BRS saturates, it would not be possible | |
1179 | * to guarantee correlated content for all events. Therefore, in situations | |
1180 | * where multiple events want to use BRS, the kernel enforces mutual exclusion. | |
1181 | * Exclusion is enforced by chosing only one counter for events using BRS. | |
1182 | * The event scheduling logic will then automatically multiplex the | |
1183 | * events and ensure that at most one event is actively using BRS. | |
1184 | * | |
1185 | * The BRS counter could be any counter, but there is no constraint on Fam19h, | |
1186 | * therefore all counters are equal and thus we pick the first one: PMC0 | |
1187 | */ | |
1188 | static struct event_constraint amd_fam19h_brs_cntr0_constraint = | |
1189 | EVENT_CONSTRAINT(0, 0x1, AMD64_RAW_EVENT_MASK); | |
1190 | ||
1191 | static struct event_constraint amd_fam19h_brs_pair_cntr0_constraint = | |
1192 | __EVENT_CONSTRAINT(0, 0x1, AMD64_RAW_EVENT_MASK, 1, 0, PERF_X86_EVENT_PAIR); | |
1193 | ||
1194 | static struct event_constraint * | |
1195 | amd_get_event_constraints_f19h(struct cpu_hw_events *cpuc, int idx, | |
1196 | struct perf_event *event) | |
1197 | { | |
1198 | struct hw_perf_event *hwc = &event->hw; | |
1199 | bool has_brs = has_amd_brs(hwc); | |
1200 | ||
1201 | /* | |
1202 | * In case BRS is used with an event requiring a counter pair, | |
1203 | * the kernel allows it but only on counter 0 & 1 to enforce | |
1204 | * multiplexing requiring to protect BRS in case of multiple | |
1205 | * BRS users | |
1206 | */ | |
1207 | if (amd_is_pair_event_code(hwc)) { | |
1208 | return has_brs ? &amd_fam19h_brs_pair_cntr0_constraint | |
1209 | : &pair_constraint; | |
1210 | } | |
1211 | ||
1212 | if (has_brs) | |
1213 | return &amd_fam19h_brs_cntr0_constraint; | |
1214 | ||
1215 | return &unconstrained; | |
1216 | } | |
1217 | ||
1218 | ||
0bf79d44 JO |
1219 | static ssize_t amd_event_sysfs_show(char *page, u64 config) |
1220 | { | |
1221 | u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT) | | |
1222 | (config & AMD64_EVENTSEL_EVENT) >> 24; | |
1223 | ||
1224 | return x86_event_sysfs_show(page, config, event); | |
1225 | } | |
1226 | ||
28f0f3c4 | 1227 | static void amd_pmu_limit_period(struct perf_event *event, s64 *left) |
3c27b0c6 PZ |
1228 | { |
1229 | /* | |
1230 | * Decrease period by the depth of the BRS feature to get the last N | |
1231 | * taken branches and approximate the desired period | |
1232 | */ | |
28f0f3c4 PZ |
1233 | if (has_branch_stack(event) && *left > x86_pmu.lbr_nr) |
1234 | *left -= x86_pmu.lbr_nr; | |
3c27b0c6 PZ |
1235 | } |
1236 | ||
b1dc3c48 RR |
1237 | static __initconst const struct x86_pmu amd_pmu = { |
1238 | .name = "AMD", | |
6d3edaae | 1239 | .handle_irq = amd_pmu_handle_irq, |
914123fa | 1240 | .disable_all = amd_pmu_disable_all, |
ada54345 SE |
1241 | .enable_all = amd_pmu_enable_all, |
1242 | .enable = amd_pmu_enable_event, | |
3966c3fe | 1243 | .disable = amd_pmu_disable_event, |
4979d272 RR |
1244 | .hw_config = amd_pmu_hw_config, |
1245 | .schedule_events = x86_schedule_events, | |
b1dc3c48 RR |
1246 | .eventsel = MSR_K7_EVNTSEL0, |
1247 | .perfctr = MSR_K7_PERFCTR0, | |
4c1fd17a | 1248 | .addr_offset = amd_pmu_addr_offset, |
4979d272 RR |
1249 | .event_map = amd_pmu_event_map, |
1250 | .max_events = ARRAY_SIZE(amd_perfmon_event_map), | |
b1dc3c48 | 1251 | .num_counters = AMD64_NUM_COUNTERS, |
ada54345 SE |
1252 | .add = amd_pmu_add_event, |
1253 | .del = amd_pmu_del_event, | |
4979d272 RR |
1254 | .cntval_bits = 48, |
1255 | .cntval_mask = (1ULL << 48) - 1, | |
1256 | .apic = 1, | |
1257 | /* use highest bit to detect overflow */ | |
1258 | .max_period = (1ULL << 47) - 1, | |
b1dc3c48 | 1259 | .get_event_constraints = amd_get_event_constraints, |
4979d272 RR |
1260 | .put_event_constraints = amd_put_event_constraints, |
1261 | ||
b1dc3c48 | 1262 | .format_attrs = amd_format_attr, |
0bf79d44 | 1263 | .events_sysfs_show = amd_event_sysfs_show, |
b1dc3c48 | 1264 | |
4979d272 | 1265 | .cpu_prepare = amd_pmu_cpu_prepare, |
1018faa6 | 1266 | .cpu_starting = amd_pmu_cpu_starting, |
b1dc3c48 | 1267 | .cpu_dead = amd_pmu_cpu_dead, |
32b62f44 PZ |
1268 | |
1269 | .amd_nb_constraints = 1, | |
4979d272 RR |
1270 | }; |
1271 | ||
ada54345 SE |
1272 | static ssize_t branches_show(struct device *cdev, |
1273 | struct device_attribute *attr, | |
1274 | char *buf) | |
1275 | { | |
1276 | return snprintf(buf, PAGE_SIZE, "%d\n", x86_pmu.lbr_nr); | |
1277 | } | |
1278 | ||
1279 | static DEVICE_ATTR_RO(branches); | |
1280 | ||
9603aa79 | 1281 | static struct attribute *amd_pmu_branches_attrs[] = { |
ada54345 SE |
1282 | &dev_attr_branches.attr, |
1283 | NULL, | |
1284 | }; | |
1285 | ||
1286 | static umode_t | |
9603aa79 | 1287 | amd_branches_is_visible(struct kobject *kobj, struct attribute *attr, int i) |
ada54345 SE |
1288 | { |
1289 | return x86_pmu.lbr_nr ? attr->mode : 0; | |
1290 | } | |
1291 | ||
9603aa79 | 1292 | static struct attribute_group group_caps_amd_branches = { |
ada54345 | 1293 | .name = "caps", |
9603aa79 SD |
1294 | .attrs = amd_pmu_branches_attrs, |
1295 | .is_visible = amd_branches_is_visible, | |
ada54345 SE |
1296 | }; |
1297 | ||
9603aa79 SD |
1298 | #ifdef CONFIG_PERF_EVENTS_AMD_BRS |
1299 | ||
44175993 SE |
1300 | EVENT_ATTR_STR(branch-brs, amd_branch_brs, |
1301 | "event=" __stringify(AMD_FAM19H_BRS_EVENT)"\n"); | |
1302 | ||
1303 | static struct attribute *amd_brs_events_attrs[] = { | |
1304 | EVENT_PTR(amd_branch_brs), | |
1305 | NULL, | |
1306 | }; | |
1307 | ||
9603aa79 SD |
1308 | static umode_t |
1309 | amd_brs_is_visible(struct kobject *kobj, struct attribute *attr, int i) | |
1310 | { | |
1311 | return static_cpu_has(X86_FEATURE_BRS) && x86_pmu.lbr_nr ? | |
1312 | attr->mode : 0; | |
1313 | } | |
1314 | ||
44175993 SE |
1315 | static struct attribute_group group_events_amd_brs = { |
1316 | .name = "events", | |
1317 | .attrs = amd_brs_events_attrs, | |
1318 | .is_visible = amd_brs_is_visible, | |
1319 | }; | |
1320 | ||
9603aa79 SD |
1321 | #endif /* CONFIG_PERF_EVENTS_AMD_BRS */ |
1322 | ||
ada54345 | 1323 | static const struct attribute_group *amd_attr_update[] = { |
9603aa79 SD |
1324 | &group_caps_amd_branches, |
1325 | #ifdef CONFIG_PERF_EVENTS_AMD_BRS | |
44175993 | 1326 | &group_events_amd_brs, |
9603aa79 | 1327 | #endif |
ada54345 SE |
1328 | NULL, |
1329 | }; | |
1330 | ||
1b45adcd | 1331 | static int __init amd_core_pmu_init(void) |
b1dc3c48 | 1332 | { |
56e026a7 | 1333 | union cpuid_0x80000022_ebx ebx; |
471af006 KP |
1334 | u64 even_ctr_mask = 0ULL; |
1335 | int i; | |
1336 | ||
362f924b | 1337 | if (!boot_cpu_has(X86_FEATURE_PERFCTR_CORE)) |
1b45adcd PZ |
1338 | return 0; |
1339 | ||
471af006 | 1340 | /* Avoid calculating the value each time in the NMI handler */ |
df4d2973 TL |
1341 | perf_nmi_window = msecs_to_jiffies(100); |
1342 | ||
b1dc3c48 RR |
1343 | /* |
1344 | * If core performance counter extensions exists, we must use | |
1345 | * MSR_F15H_PERF_CTL/MSR_F15H_PERF_CTR msrs. See also | |
1b45adcd | 1346 | * amd_pmu_addr_offset(). |
b1dc3c48 RR |
1347 | */ |
1348 | x86_pmu.eventsel = MSR_F15H_PERF_CTL; | |
1349 | x86_pmu.perfctr = MSR_F15H_PERF_CTR; | |
1350 | x86_pmu.num_counters = AMD64_NUM_COUNTERS_CORE; | |
21d59e3e SD |
1351 | |
1352 | /* Check for Performance Monitoring v2 support */ | |
1353 | if (boot_cpu_has(X86_FEATURE_PERFMON_V2)) { | |
56e026a7 SD |
1354 | ebx.full = cpuid_ebx(EXT_PERFMON_DEBUG_FEATURES); |
1355 | ||
21d59e3e SD |
1356 | /* Update PMU version for later usage */ |
1357 | x86_pmu.version = 2; | |
1358 | ||
56e026a7 SD |
1359 | /* Find the number of available Core PMCs */ |
1360 | x86_pmu.num_counters = ebx.split.num_core_pmc; | |
1361 | ||
21d59e3e | 1362 | amd_pmu_global_cntr_mask = (1ULL << x86_pmu.num_counters) - 1; |
9622e67e SD |
1363 | |
1364 | /* Update PMC handling functions */ | |
1365 | x86_pmu.enable_all = amd_pmu_v2_enable_all; | |
1366 | x86_pmu.disable_all = amd_pmu_v2_disable_all; | |
1367 | x86_pmu.enable = amd_pmu_v2_enable_event; | |
7685665c SD |
1368 | x86_pmu.handle_irq = amd_pmu_v2_handle_irq; |
1369 | static_call_update(amd_pmu_test_overflow, amd_pmu_test_overflow_status); | |
21d59e3e SD |
1370 | } |
1371 | ||
32b62f44 PZ |
1372 | /* |
1373 | * AMD Core perfctr has separate MSRs for the NB events, see | |
1374 | * the amd/uncore.c driver. | |
1375 | */ | |
1376 | x86_pmu.amd_nb_constraints = 0; | |
b1dc3c48 | 1377 | |
471af006 KP |
1378 | if (boot_cpu_data.x86 == 0x15) { |
1379 | pr_cont("Fam15h "); | |
1380 | x86_pmu.get_event_constraints = amd_get_event_constraints_f15h; | |
1381 | } | |
1382 | if (boot_cpu_data.x86 >= 0x17) { | |
1383 | pr_cont("Fam17h+ "); | |
1384 | /* | |
1385 | * Family 17h and compatibles have constraints for Large | |
1386 | * Increment per Cycle events: they may only be assigned an | |
1387 | * even numbered counter that has a consecutive adjacent odd | |
1388 | * numbered counter following it. | |
1389 | */ | |
1390 | for (i = 0; i < x86_pmu.num_counters - 1; i += 2) | |
1391 | even_ctr_mask |= 1 << i; | |
1392 | ||
1393 | pair_constraint = (struct event_constraint) | |
1394 | __EVENT_CONSTRAINT(0, even_ctr_mask, 0, | |
1395 | x86_pmu.num_counters / 2, 0, | |
1396 | PERF_X86_EVENT_PAIR); | |
1397 | ||
1398 | x86_pmu.get_event_constraints = amd_get_event_constraints_f17h; | |
57388912 KP |
1399 | x86_pmu.put_event_constraints = amd_put_event_constraints_f17h; |
1400 | x86_pmu.perf_ctr_pair_en = AMD_MERGE_EVENT_ENABLE; | |
471af006 KP |
1401 | x86_pmu.flags |= PMU_FL_PAIR; |
1402 | } | |
1403 | ||
703fb765 | 1404 | /* LBR and BRS are mutually exclusive features */ |
ca5b7c0d SD |
1405 | if (!amd_pmu_lbr_init()) { |
1406 | /* LBR requires flushing on context switch */ | |
1407 | x86_pmu.sched_task = amd_pmu_lbr_sched_task; | |
1408 | static_call_update(amd_pmu_branch_hw_config, amd_pmu_lbr_hw_config); | |
1409 | static_call_update(amd_pmu_branch_reset, amd_pmu_lbr_reset); | |
1410 | static_call_update(amd_pmu_branch_add, amd_pmu_lbr_add); | |
1411 | static_call_update(amd_pmu_branch_del, amd_pmu_lbr_del); | |
1412 | } else if (!amd_brs_init()) { | |
703fb765 SD |
1413 | /* |
1414 | * BRS requires special event constraints and flushing on ctxsw. | |
1415 | */ | |
ada54345 | 1416 | x86_pmu.get_event_constraints = amd_get_event_constraints_f19h; |
706460a9 | 1417 | x86_pmu.sched_task = amd_pmu_brs_sched_task; |
3c27b0c6 | 1418 | x86_pmu.limit_period = amd_pmu_limit_period; |
706460a9 SD |
1419 | |
1420 | static_call_update(amd_pmu_branch_hw_config, amd_brs_hw_config); | |
1421 | static_call_update(amd_pmu_branch_reset, amd_brs_reset); | |
1422 | static_call_update(amd_pmu_branch_add, amd_pmu_brs_add); | |
1423 | static_call_update(amd_pmu_branch_del, amd_pmu_brs_del); | |
1424 | ||
ada54345 SE |
1425 | /* |
1426 | * put_event_constraints callback same as Fam17h, set above | |
1427 | */ | |
d5616bac SE |
1428 | |
1429 | /* branch sampling must be stopped when entering low power */ | |
1430 | amd_brs_lopwr_init(); | |
ada54345 SE |
1431 | } |
1432 | ||
1433 | x86_pmu.attr_update = amd_attr_update; | |
1434 | ||
1b45adcd | 1435 | pr_cont("core perfctr, "); |
b1dc3c48 RR |
1436 | return 0; |
1437 | } | |
1438 | ||
de0428a7 | 1439 | __init int amd_pmu_init(void) |
f22f54f4 | 1440 | { |
1b45adcd PZ |
1441 | int ret; |
1442 | ||
f22f54f4 PZ |
1443 | /* Performance-monitoring supported from K7 and later: */ |
1444 | if (boot_cpu_data.x86 < 6) | |
1445 | return -ENODEV; | |
1446 | ||
b1dc3c48 RR |
1447 | x86_pmu = amd_pmu; |
1448 | ||
1b45adcd PZ |
1449 | ret = amd_core_pmu_init(); |
1450 | if (ret) | |
1451 | return ret; | |
f22f54f4 | 1452 | |
32b62f44 PZ |
1453 | if (num_possible_cpus() == 1) { |
1454 | /* | |
1455 | * No point in allocating data structures to serialize | |
1456 | * against other CPUs, when there is only the one CPU. | |
1457 | */ | |
1458 | x86_pmu.amd_nb_constraints = 0; | |
1459 | } | |
1460 | ||
0e3b74e2 KP |
1461 | if (boot_cpu_data.x86 >= 0x17) |
1462 | memcpy(hw_cache_event_ids, amd_hw_cache_event_ids_f17h, sizeof(hw_cache_event_ids)); | |
1463 | else | |
1464 | memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, sizeof(hw_cache_event_ids)); | |
f22f54f4 | 1465 | |
f22f54f4 PZ |
1466 | return 0; |
1467 | } | |
1018faa6 | 1468 | |
bae19fdd SD |
1469 | static inline void amd_pmu_reload_virt(void) |
1470 | { | |
1471 | if (x86_pmu.version >= 2) { | |
1472 | /* | |
1473 | * Clear global enable bits, reprogram the PERF_CTL | |
1474 | * registers with updated perf_ctr_virt_mask and then | |
1475 | * set global enable bits once again | |
1476 | */ | |
1477 | amd_pmu_v2_disable_all(); | |
1478 | amd_pmu_enable_all(0); | |
1479 | amd_pmu_v2_enable_all(0); | |
1480 | return; | |
1481 | } | |
1482 | ||
1483 | amd_pmu_disable_all(); | |
1484 | amd_pmu_enable_all(0); | |
1485 | } | |
1486 | ||
1018faa6 JR |
1487 | void amd_pmu_enable_virt(void) |
1488 | { | |
89cbc767 | 1489 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
1018faa6 JR |
1490 | |
1491 | cpuc->perf_ctr_virt_mask = 0; | |
1492 | ||
1493 | /* Reload all events */ | |
bae19fdd | 1494 | amd_pmu_reload_virt(); |
1018faa6 JR |
1495 | } |
1496 | EXPORT_SYMBOL_GPL(amd_pmu_enable_virt); | |
1497 | ||
1498 | void amd_pmu_disable_virt(void) | |
1499 | { | |
89cbc767 | 1500 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
1018faa6 JR |
1501 | |
1502 | /* | |
1503 | * We only mask out the Host-only bit so that host-only counting works | |
1504 | * when SVM is disabled. If someone sets up a guest-only counter when | |
1505 | * SVM is disabled the Guest-only bits still gets set and the counter | |
1506 | * will not count anything. | |
1507 | */ | |
9f19010a | 1508 | cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY; |
1018faa6 JR |
1509 | |
1510 | /* Reload all events */ | |
bae19fdd | 1511 | amd_pmu_reload_virt(); |
1018faa6 JR |
1512 | } |
1513 | EXPORT_SYMBOL_GPL(amd_pmu_disable_virt); |