Commit | Line | Data |
---|---|---|
de0428a7 KW |
1 | #include <linux/perf_event.h> |
2 | #include <linux/types.h> | |
3 | ||
4 | #include <asm/perf_event.h> | |
5 | #include <asm/msr.h> | |
6 | ||
7 | #include "perf_event.h" | |
caff2bef PZ |
8 | |
9 | enum { | |
10 | LBR_FORMAT_32 = 0x00, | |
11 | LBR_FORMAT_LIP = 0x01, | |
12 | LBR_FORMAT_EIP = 0x02, | |
13 | LBR_FORMAT_EIP_FLAGS = 0x03, | |
14 | }; | |
15 | ||
c5cc2cd9 SE |
16 | /* |
17 | * Intel LBR_SELECT bits | |
18 | * Intel Vol3a, April 2011, Section 16.7 Table 16-10 | |
19 | * | |
20 | * Hardware branch filter (not available on all CPUs) | |
21 | */ | |
22 | #define LBR_KERNEL_BIT 0 /* do not capture at ring0 */ | |
23 | #define LBR_USER_BIT 1 /* do not capture at ring > 0 */ | |
24 | #define LBR_JCC_BIT 2 /* do not capture conditional branches */ | |
25 | #define LBR_REL_CALL_BIT 3 /* do not capture relative calls */ | |
26 | #define LBR_IND_CALL_BIT 4 /* do not capture indirect calls */ | |
27 | #define LBR_RETURN_BIT 5 /* do not capture near returns */ | |
28 | #define LBR_IND_JMP_BIT 6 /* do not capture indirect jumps */ | |
29 | #define LBR_REL_JMP_BIT 7 /* do not capture relative jumps */ | |
30 | #define LBR_FAR_BIT 8 /* do not capture far branches */ | |
31 | ||
32 | #define LBR_KERNEL (1 << LBR_KERNEL_BIT) | |
33 | #define LBR_USER (1 << LBR_USER_BIT) | |
34 | #define LBR_JCC (1 << LBR_JCC_BIT) | |
35 | #define LBR_REL_CALL (1 << LBR_REL_CALL_BIT) | |
36 | #define LBR_IND_CALL (1 << LBR_IND_CALL_BIT) | |
37 | #define LBR_RETURN (1 << LBR_RETURN_BIT) | |
38 | #define LBR_REL_JMP (1 << LBR_REL_JMP_BIT) | |
39 | #define LBR_IND_JMP (1 << LBR_IND_JMP_BIT) | |
40 | #define LBR_FAR (1 << LBR_FAR_BIT) | |
41 | ||
42 | #define LBR_PLM (LBR_KERNEL | LBR_USER) | |
43 | ||
44 | #define LBR_SEL_MASK 0x1ff /* valid bits in LBR_SELECT */ | |
45 | #define LBR_NOT_SUPP -1 /* LBR filter not supported */ | |
46 | #define LBR_IGN 0 /* ignored */ | |
47 | ||
48 | #define LBR_ANY \ | |
49 | (LBR_JCC |\ | |
50 | LBR_REL_CALL |\ | |
51 | LBR_IND_CALL |\ | |
52 | LBR_RETURN |\ | |
53 | LBR_REL_JMP |\ | |
54 | LBR_IND_JMP |\ | |
55 | LBR_FAR) | |
56 | ||
57 | #define LBR_FROM_FLAG_MISPRED (1ULL << 63) | |
58 | ||
60ce0fbd SE |
59 | #define for_each_branch_sample_type(x) \ |
60 | for ((x) = PERF_SAMPLE_BRANCH_USER; \ | |
61 | (x) < PERF_SAMPLE_BRANCH_MAX; (x) <<= 1) | |
62 | ||
caff2bef PZ |
63 | /* |
64 | * We only support LBR implementations that have FREEZE_LBRS_ON_PMI | |
65 | * otherwise it becomes near impossible to get a reliable stack. | |
66 | */ | |
67 | ||
caff2bef PZ |
68 | static void __intel_pmu_lbr_enable(void) |
69 | { | |
70 | u64 debugctl; | |
60ce0fbd SE |
71 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
72 | ||
73 | if (cpuc->lbr_sel) | |
74 | wrmsrl(MSR_LBR_SELECT, cpuc->lbr_sel->config); | |
caff2bef PZ |
75 | |
76 | rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); | |
7c5ecaf7 | 77 | debugctl |= (DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); |
caff2bef PZ |
78 | wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); |
79 | } | |
80 | ||
81 | static void __intel_pmu_lbr_disable(void) | |
82 | { | |
83 | u64 debugctl; | |
84 | ||
85 | rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); | |
7c5ecaf7 | 86 | debugctl &= ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); |
caff2bef PZ |
87 | wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); |
88 | } | |
89 | ||
90 | static void intel_pmu_lbr_reset_32(void) | |
91 | { | |
92 | int i; | |
93 | ||
94 | for (i = 0; i < x86_pmu.lbr_nr; i++) | |
95 | wrmsrl(x86_pmu.lbr_from + i, 0); | |
96 | } | |
97 | ||
98 | static void intel_pmu_lbr_reset_64(void) | |
99 | { | |
100 | int i; | |
101 | ||
102 | for (i = 0; i < x86_pmu.lbr_nr; i++) { | |
103 | wrmsrl(x86_pmu.lbr_from + i, 0); | |
104 | wrmsrl(x86_pmu.lbr_to + i, 0); | |
105 | } | |
106 | } | |
107 | ||
de0428a7 | 108 | void intel_pmu_lbr_reset(void) |
caff2bef | 109 | { |
74846d35 PZ |
110 | if (!x86_pmu.lbr_nr) |
111 | return; | |
112 | ||
8db909a7 | 113 | if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32) |
caff2bef PZ |
114 | intel_pmu_lbr_reset_32(); |
115 | else | |
116 | intel_pmu_lbr_reset_64(); | |
117 | } | |
118 | ||
de0428a7 | 119 | void intel_pmu_lbr_enable(struct perf_event *event) |
caff2bef PZ |
120 | { |
121 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | |
122 | ||
123 | if (!x86_pmu.lbr_nr) | |
124 | return; | |
125 | ||
caff2bef | 126 | /* |
b83a46e7 PZ |
127 | * Reset the LBR stack if we changed task context to |
128 | * avoid data leaks. | |
caff2bef | 129 | */ |
b83a46e7 | 130 | if (event->ctx->task && cpuc->lbr_context != event->ctx) { |
caff2bef PZ |
131 | intel_pmu_lbr_reset(); |
132 | cpuc->lbr_context = event->ctx; | |
133 | } | |
134 | ||
135 | cpuc->lbr_users++; | |
136 | } | |
137 | ||
de0428a7 | 138 | void intel_pmu_lbr_disable(struct perf_event *event) |
caff2bef PZ |
139 | { |
140 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | |
141 | ||
142 | if (!x86_pmu.lbr_nr) | |
143 | return; | |
144 | ||
145 | cpuc->lbr_users--; | |
b83a46e7 | 146 | WARN_ON_ONCE(cpuc->lbr_users < 0); |
2df202bf | 147 | |
60ce0fbd | 148 | if (cpuc->enabled && !cpuc->lbr_users) { |
2df202bf | 149 | __intel_pmu_lbr_disable(); |
60ce0fbd SE |
150 | /* avoid stale pointer */ |
151 | cpuc->lbr_context = NULL; | |
152 | } | |
caff2bef PZ |
153 | } |
154 | ||
de0428a7 | 155 | void intel_pmu_lbr_enable_all(void) |
caff2bef PZ |
156 | { |
157 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | |
158 | ||
159 | if (cpuc->lbr_users) | |
160 | __intel_pmu_lbr_enable(); | |
161 | } | |
162 | ||
de0428a7 | 163 | void intel_pmu_lbr_disable_all(void) |
caff2bef PZ |
164 | { |
165 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | |
166 | ||
167 | if (cpuc->lbr_users) | |
168 | __intel_pmu_lbr_disable(); | |
169 | } | |
170 | ||
60ce0fbd SE |
171 | /* |
172 | * TOS = most recently recorded branch | |
173 | */ | |
caff2bef PZ |
174 | static inline u64 intel_pmu_lbr_tos(void) |
175 | { | |
176 | u64 tos; | |
177 | ||
178 | rdmsrl(x86_pmu.lbr_tos, tos); | |
179 | ||
180 | return tos; | |
181 | } | |
182 | ||
183 | static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc) | |
184 | { | |
185 | unsigned long mask = x86_pmu.lbr_nr - 1; | |
186 | u64 tos = intel_pmu_lbr_tos(); | |
187 | int i; | |
188 | ||
63fb3f9b | 189 | for (i = 0; i < x86_pmu.lbr_nr; i++) { |
caff2bef PZ |
190 | unsigned long lbr_idx = (tos - i) & mask; |
191 | union { | |
192 | struct { | |
193 | u32 from; | |
194 | u32 to; | |
195 | }; | |
196 | u64 lbr; | |
197 | } msr_lastbranch; | |
198 | ||
199 | rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr); | |
200 | ||
bce38cd5 SE |
201 | cpuc->lbr_entries[i].from = msr_lastbranch.from; |
202 | cpuc->lbr_entries[i].to = msr_lastbranch.to; | |
203 | cpuc->lbr_entries[i].mispred = 0; | |
204 | cpuc->lbr_entries[i].predicted = 0; | |
205 | cpuc->lbr_entries[i].reserved = 0; | |
caff2bef PZ |
206 | } |
207 | cpuc->lbr_stack.nr = i; | |
208 | } | |
209 | ||
caff2bef PZ |
210 | /* |
211 | * Due to lack of segmentation in Linux the effective address (offset) | |
212 | * is the same as the linear address, allowing us to merge the LIP and EIP | |
213 | * LBR formats. | |
214 | */ | |
215 | static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) | |
216 | { | |
217 | unsigned long mask = x86_pmu.lbr_nr - 1; | |
8db909a7 | 218 | int lbr_format = x86_pmu.intel_cap.lbr_format; |
caff2bef PZ |
219 | u64 tos = intel_pmu_lbr_tos(); |
220 | int i; | |
221 | ||
63fb3f9b | 222 | for (i = 0; i < x86_pmu.lbr_nr; i++) { |
caff2bef | 223 | unsigned long lbr_idx = (tos - i) & mask; |
bce38cd5 | 224 | u64 from, to, mis = 0, pred = 0; |
caff2bef PZ |
225 | |
226 | rdmsrl(x86_pmu.lbr_from + lbr_idx, from); | |
227 | rdmsrl(x86_pmu.lbr_to + lbr_idx, to); | |
228 | ||
8db909a7 | 229 | if (lbr_format == LBR_FORMAT_EIP_FLAGS) { |
bce38cd5 SE |
230 | mis = !!(from & LBR_FROM_FLAG_MISPRED); |
231 | pred = !mis; | |
caff2bef PZ |
232 | from = (u64)((((s64)from) << 1) >> 1); |
233 | } | |
234 | ||
bce38cd5 SE |
235 | cpuc->lbr_entries[i].from = from; |
236 | cpuc->lbr_entries[i].to = to; | |
237 | cpuc->lbr_entries[i].mispred = mis; | |
238 | cpuc->lbr_entries[i].predicted = pred; | |
239 | cpuc->lbr_entries[i].reserved = 0; | |
caff2bef PZ |
240 | } |
241 | cpuc->lbr_stack.nr = i; | |
242 | } | |
243 | ||
de0428a7 | 244 | void intel_pmu_lbr_read(void) |
caff2bef PZ |
245 | { |
246 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | |
247 | ||
248 | if (!cpuc->lbr_users) | |
249 | return; | |
250 | ||
8db909a7 | 251 | if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32) |
caff2bef PZ |
252 | intel_pmu_lbr_read_32(cpuc); |
253 | else | |
254 | intel_pmu_lbr_read_64(cpuc); | |
255 | } | |
256 | ||
60ce0fbd SE |
257 | /* |
258 | * setup the HW LBR filter | |
259 | * Used only when available, may not be enough to disambiguate | |
260 | * all branches, may need the help of the SW filter | |
261 | */ | |
262 | static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event) | |
263 | { | |
264 | struct hw_perf_event_extra *reg; | |
265 | u64 br_type = event->attr.branch_sample_type; | |
266 | u64 mask = 0, m; | |
267 | u64 v; | |
268 | ||
269 | for_each_branch_sample_type(m) { | |
270 | if (!(br_type & m)) | |
271 | continue; | |
272 | ||
273 | v = x86_pmu.lbr_sel_map[m]; | |
274 | if (v == LBR_NOT_SUPP) | |
275 | return -EOPNOTSUPP; | |
276 | mask |= v; | |
277 | ||
278 | if (m == PERF_SAMPLE_BRANCH_ANY) | |
279 | break; | |
280 | } | |
281 | reg = &event->hw.branch_reg; | |
282 | reg->idx = EXTRA_REG_LBR; | |
283 | ||
284 | /* LBR_SELECT operates in suppress mode so invert mask */ | |
285 | reg->config = ~mask & x86_pmu.lbr_sel_mask; | |
286 | ||
287 | return 0; | |
288 | } | |
289 | ||
290 | /* | |
291 | * all the bits supported on some flavor of x86LBR | |
292 | * we ignore BRANCH_HV because it is not supported | |
293 | */ | |
294 | #define PERF_SAMPLE_BRANCH_X86_ALL \ | |
295 | (PERF_SAMPLE_BRANCH_ANY |\ | |
296 | PERF_SAMPLE_BRANCH_USER |\ | |
297 | PERF_SAMPLE_BRANCH_KERNEL) | |
298 | ||
299 | int intel_pmu_setup_lbr_filter(struct perf_event *event) | |
300 | { | |
301 | u64 br_type = event->attr.branch_sample_type; | |
302 | ||
303 | /* | |
304 | * no LBR on this PMU | |
305 | */ | |
306 | if (!x86_pmu.lbr_nr) | |
307 | return -EOPNOTSUPP; | |
308 | ||
309 | /* | |
310 | * if no LBR HW filter, users can only | |
311 | * capture all branches | |
312 | */ | |
313 | if (!x86_pmu.lbr_sel_map) { | |
314 | if (br_type != PERF_SAMPLE_BRANCH_X86_ALL) | |
315 | return -EOPNOTSUPP; | |
316 | return 0; | |
317 | } | |
318 | /* | |
319 | * we ignore branch priv levels we do not | |
320 | * know about: BRANCH_HV | |
321 | */ | |
322 | ||
323 | return intel_pmu_setup_hw_lbr_filter(event); | |
324 | } | |
325 | ||
c5cc2cd9 SE |
326 | /* |
327 | * Map interface branch filters onto LBR filters | |
328 | */ | |
329 | static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = { | |
330 | [PERF_SAMPLE_BRANCH_ANY] = LBR_ANY, | |
331 | [PERF_SAMPLE_BRANCH_USER] = LBR_USER, | |
332 | [PERF_SAMPLE_BRANCH_KERNEL] = LBR_KERNEL, | |
333 | [PERF_SAMPLE_BRANCH_HV] = LBR_IGN, | |
334 | [PERF_SAMPLE_BRANCH_ANY_RETURN] = LBR_RETURN | LBR_REL_JMP | |
335 | | LBR_IND_JMP | LBR_FAR, | |
336 | /* | |
337 | * NHM/WSM erratum: must include REL_JMP+IND_JMP to get CALL branches | |
338 | */ | |
339 | [PERF_SAMPLE_BRANCH_ANY_CALL] = | |
340 | LBR_REL_CALL | LBR_IND_CALL | LBR_REL_JMP | LBR_IND_JMP | LBR_FAR, | |
341 | /* | |
342 | * NHM/WSM erratum: must include IND_JMP to capture IND_CALL | |
343 | */ | |
344 | [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL | LBR_IND_JMP, | |
345 | }; | |
346 | ||
347 | static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = { | |
348 | [PERF_SAMPLE_BRANCH_ANY] = LBR_ANY, | |
349 | [PERF_SAMPLE_BRANCH_USER] = LBR_USER, | |
350 | [PERF_SAMPLE_BRANCH_KERNEL] = LBR_KERNEL, | |
351 | [PERF_SAMPLE_BRANCH_HV] = LBR_IGN, | |
352 | [PERF_SAMPLE_BRANCH_ANY_RETURN] = LBR_RETURN | LBR_FAR, | |
353 | [PERF_SAMPLE_BRANCH_ANY_CALL] = LBR_REL_CALL | LBR_IND_CALL | |
354 | | LBR_FAR, | |
355 | [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL, | |
356 | }; | |
357 | ||
358 | /* core */ | |
de0428a7 | 359 | void intel_pmu_lbr_init_core(void) |
caff2bef | 360 | { |
caff2bef | 361 | x86_pmu.lbr_nr = 4; |
225ce539 SE |
362 | x86_pmu.lbr_tos = MSR_LBR_TOS; |
363 | x86_pmu.lbr_from = MSR_LBR_CORE_FROM; | |
364 | x86_pmu.lbr_to = MSR_LBR_CORE_TO; | |
c5cc2cd9 SE |
365 | |
366 | pr_cont("4-deep LBR, "); | |
caff2bef PZ |
367 | } |
368 | ||
c5cc2cd9 | 369 | /* nehalem/westmere */ |
de0428a7 | 370 | void intel_pmu_lbr_init_nhm(void) |
caff2bef | 371 | { |
caff2bef | 372 | x86_pmu.lbr_nr = 16; |
225ce539 SE |
373 | x86_pmu.lbr_tos = MSR_LBR_TOS; |
374 | x86_pmu.lbr_from = MSR_LBR_NHM_FROM; | |
375 | x86_pmu.lbr_to = MSR_LBR_NHM_TO; | |
c5cc2cd9 SE |
376 | |
377 | x86_pmu.lbr_sel_mask = LBR_SEL_MASK; | |
378 | x86_pmu.lbr_sel_map = nhm_lbr_sel_map; | |
379 | ||
380 | pr_cont("16-deep LBR, "); | |
caff2bef PZ |
381 | } |
382 | ||
c5cc2cd9 SE |
383 | /* sandy bridge */ |
384 | void intel_pmu_lbr_init_snb(void) | |
385 | { | |
386 | x86_pmu.lbr_nr = 16; | |
387 | x86_pmu.lbr_tos = MSR_LBR_TOS; | |
388 | x86_pmu.lbr_from = MSR_LBR_NHM_FROM; | |
389 | x86_pmu.lbr_to = MSR_LBR_NHM_TO; | |
390 | ||
391 | x86_pmu.lbr_sel_mask = LBR_SEL_MASK; | |
392 | x86_pmu.lbr_sel_map = snb_lbr_sel_map; | |
393 | ||
394 | pr_cont("16-deep LBR, "); | |
395 | } | |
396 | ||
397 | /* atom */ | |
de0428a7 | 398 | void intel_pmu_lbr_init_atom(void) |
caff2bef | 399 | { |
88c9a65e SE |
400 | /* |
401 | * only models starting at stepping 10 seems | |
402 | * to have an operational LBR which can freeze | |
403 | * on PMU interrupt | |
404 | */ | |
405 | if (boot_cpu_data.x86_mask < 10) { | |
406 | pr_cont("LBR disabled due to erratum"); | |
407 | return; | |
408 | } | |
409 | ||
caff2bef | 410 | x86_pmu.lbr_nr = 8; |
225ce539 SE |
411 | x86_pmu.lbr_tos = MSR_LBR_TOS; |
412 | x86_pmu.lbr_from = MSR_LBR_CORE_FROM; | |
413 | x86_pmu.lbr_to = MSR_LBR_CORE_TO; | |
c5cc2cd9 SE |
414 | |
415 | pr_cont("8-deep LBR, "); | |
caff2bef | 416 | } |