Commit | Line | Data |
---|---|---|
20c8ccb1 | 1 | // SPDX-License-Identifier: GPL-2.0-only |
25462f7f WH |
2 | /* |
3 | * KVM PMU support for Intel CPUs | |
4 | * | |
5 | * Copyright 2011 Red Hat, Inc. and/or its affiliates. | |
6 | * | |
7 | * Authors: | |
8 | * Avi Kivity <avi@redhat.com> | |
9 | * Gleb Natapov <gleb@redhat.com> | |
25462f7f | 10 | */ |
8d20bd63 SC |
11 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
12 | ||
25462f7f WH |
13 | #include <linux/types.h> |
14 | #include <linux/kvm_host.h> | |
15 | #include <linux/perf_event.h> | |
16 | #include <asm/perf_event.h> | |
17 | #include "x86.h" | |
18 | #include "cpuid.h" | |
19 | #include "lapic.h" | |
03a8871a | 20 | #include "nested.h" |
25462f7f WH |
21 | #include "pmu.h" |
22 | ||
27461da3 LX |
23 | #define MSR_PMC_FULL_WIDTH_BIT (MSR_IA32_PMC0 - MSR_IA32_PERFCTR0) |
24 | ||
25462f7f WH |
25 | static void reprogram_fixed_counters(struct kvm_pmu *pmu, u64 data) |
26 | { | |
76d287b2 LX |
27 | struct kvm_pmc *pmc; |
28 | u8 old_fixed_ctr_ctrl = pmu->fixed_ctr_ctrl; | |
25462f7f WH |
29 | int i; |
30 | ||
76d287b2 | 31 | pmu->fixed_ctr_ctrl = data; |
25462f7f WH |
32 | for (i = 0; i < pmu->nr_arch_fixed_counters; i++) { |
33 | u8 new_ctrl = fixed_ctrl_field(data, i); | |
76d287b2 | 34 | u8 old_ctrl = fixed_ctrl_field(old_fixed_ctr_ctrl, i); |
25462f7f WH |
35 | |
36 | if (old_ctrl == new_ctrl) | |
37 | continue; | |
38 | ||
76d287b2 LX |
39 | pmc = get_fixed_pmc(pmu, MSR_CORE_PERF_FIXED_CTR0 + i); |
40 | ||
b35e5548 | 41 | __set_bit(INTEL_PMC_IDX_FIXED + i, pmu->pmc_in_use); |
4fa5843d | 42 | kvm_pmu_request_counter_reprogram(pmc); |
25462f7f | 43 | } |
25462f7f WH |
44 | } |
45 | ||
a40239b4 LX |
46 | static struct kvm_pmc *intel_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx) |
47 | { | |
48 | if (pmc_idx < INTEL_PMC_IDX_FIXED) { | |
49 | return get_gp_pmc(pmu, MSR_P6_EVNTSEL0 + pmc_idx, | |
50 | MSR_P6_EVNTSEL0); | |
51 | } else { | |
52 | u32 idx = pmc_idx - INTEL_PMC_IDX_FIXED; | |
53 | ||
54 | return get_fixed_pmc(pmu, idx + MSR_CORE_PERF_FIXED_CTR0); | |
55 | } | |
56 | } | |
57 | ||
e6cd31f1 | 58 | static bool intel_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx) |
25462f7f WH |
59 | { |
60 | struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); | |
61 | bool fixed = idx & (1u << 30); | |
62 | ||
63 | idx &= ~(3u << 30); | |
64 | ||
e6cd31f1 JM |
65 | return fixed ? idx < pmu->nr_arch_fixed_counters |
66 | : idx < pmu->nr_arch_gp_counters; | |
25462f7f WH |
67 | } |
68 | ||
98ff80f5 LX |
69 | static struct kvm_pmc *intel_rdpmc_ecx_to_pmc(struct kvm_vcpu *vcpu, |
70 | unsigned int idx, u64 *mask) | |
25462f7f WH |
71 | { |
72 | struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); | |
73 | bool fixed = idx & (1u << 30); | |
74 | struct kvm_pmc *counters; | |
66061740 | 75 | unsigned int num_counters; |
25462f7f WH |
76 | |
77 | idx &= ~(3u << 30); | |
66061740 MP |
78 | if (fixed) { |
79 | counters = pmu->fixed_counters; | |
80 | num_counters = pmu->nr_arch_fixed_counters; | |
81 | } else { | |
82 | counters = pmu->gp_counters; | |
83 | num_counters = pmu->nr_arch_gp_counters; | |
84 | } | |
85 | if (idx >= num_counters) | |
25462f7f | 86 | return NULL; |
0e6f467e | 87 | *mask &= pmu->counter_bitmask[fixed ? KVM_PMC_FIXED : KVM_PMC_GP]; |
66061740 | 88 | return &counters[array_index_nospec(idx, num_counters)]; |
25462f7f WH |
89 | } |
90 | ||
a7557539 | 91 | static inline u64 vcpu_get_perf_capabilities(struct kvm_vcpu *vcpu) |
27461da3 LX |
92 | { |
93 | if (!guest_cpuid_has(vcpu, X86_FEATURE_PDCM)) | |
a7557539 | 94 | return 0; |
27461da3 | 95 | |
a7557539 PB |
96 | return vcpu->arch.perf_capabilities; |
97 | } | |
98 | ||
99 | static inline bool fw_writes_is_enabled(struct kvm_vcpu *vcpu) | |
100 | { | |
101 | return (vcpu_get_perf_capabilities(vcpu) & PMU_CAP_FW_WRITES) != 0; | |
27461da3 LX |
102 | } |
103 | ||
104 | static inline struct kvm_pmc *get_fw_gp_pmc(struct kvm_pmu *pmu, u32 msr) | |
105 | { | |
106 | if (!fw_writes_is_enabled(pmu_to_vcpu(pmu))) | |
107 | return NULL; | |
108 | ||
109 | return get_gp_pmc(pmu, msr, MSR_IA32_PMC0); | |
110 | } | |
111 | ||
1b5ac322 LX |
112 | static bool intel_pmu_is_valid_lbr_msr(struct kvm_vcpu *vcpu, u32 index) |
113 | { | |
114 | struct x86_pmu_lbr *records = vcpu_to_lbr_records(vcpu); | |
115 | bool ret = false; | |
116 | ||
117 | if (!intel_pmu_lbr_is_enabled(vcpu)) | |
118 | return ret; | |
119 | ||
120 | ret = (index == MSR_LBR_SELECT) || (index == MSR_LBR_TOS) || | |
121 | (index >= records->from && index < records->from + records->nr) || | |
122 | (index >= records->to && index < records->to + records->nr); | |
123 | ||
124 | if (!ret && records->info) | |
125 | ret = (index >= records->info && index < records->info + records->nr); | |
126 | ||
127 | return ret; | |
128 | } | |
129 | ||
545feb96 | 130 | static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr) |
25462f7f WH |
131 | { |
132 | struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); | |
3f7999b9 | 133 | u64 perf_capabilities; |
545feb96 | 134 | int ret; |
25462f7f WH |
135 | |
136 | switch (msr) { | |
137 | case MSR_CORE_PERF_FIXED_CTR_CTRL: | |
c85cdc1c | 138 | return kvm_pmu_has_perf_global_ctrl(pmu); |
c59a1f10 | 139 | case MSR_IA32_PEBS_ENABLE: |
3f7999b9 | 140 | ret = vcpu_get_perf_capabilities(vcpu) & PERF_CAP_PEBS_FORMAT; |
c59a1f10 | 141 | break; |
8183a538 | 142 | case MSR_IA32_DS_AREA: |
545feb96 | 143 | ret = guest_cpuid_has(vcpu, X86_FEATURE_DS); |
8183a538 | 144 | break; |
902caeb6 | 145 | case MSR_PEBS_DATA_CFG: |
3f7999b9 | 146 | perf_capabilities = vcpu_get_perf_capabilities(vcpu); |
545feb96 | 147 | ret = (perf_capabilities & PERF_CAP_PEBS_BASELINE) && |
902caeb6 LX |
148 | ((perf_capabilities & PERF_CAP_PEBS_FORMAT) > 3); |
149 | break; | |
25462f7f | 150 | default: |
545feb96 | 151 | ret = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0) || |
25462f7f | 152 | get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0) || |
1b5ac322 LX |
153 | get_fixed_pmc(pmu, msr) || get_fw_gp_pmc(pmu, msr) || |
154 | intel_pmu_is_valid_lbr_msr(vcpu, msr); | |
25462f7f WH |
155 | break; |
156 | } | |
545feb96 SC |
157 | |
158 | return ret; | |
25462f7f WH |
159 | } |
160 | ||
c900c156 LX |
161 | static struct kvm_pmc *intel_msr_idx_to_pmc(struct kvm_vcpu *vcpu, u32 msr) |
162 | { | |
163 | struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); | |
164 | struct kvm_pmc *pmc; | |
165 | ||
166 | pmc = get_fixed_pmc(pmu, msr); | |
167 | pmc = pmc ? pmc : get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0); | |
168 | pmc = pmc ? pmc : get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0); | |
169 | ||
170 | return pmc; | |
171 | } | |
172 | ||
8e12911b LX |
173 | static inline void intel_pmu_release_guest_lbr_event(struct kvm_vcpu *vcpu) |
174 | { | |
175 | struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu); | |
176 | ||
177 | if (lbr_desc->event) { | |
178 | perf_event_release_kernel(lbr_desc->event); | |
179 | lbr_desc->event = NULL; | |
180 | vcpu_to_pmu(vcpu)->event_count--; | |
181 | } | |
182 | } | |
183 | ||
184 | int intel_pmu_create_guest_lbr_event(struct kvm_vcpu *vcpu) | |
185 | { | |
186 | struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu); | |
187 | struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); | |
188 | struct perf_event *event; | |
189 | ||
190 | /* | |
191 | * The perf_event_attr is constructed in the minimum efficient way: | |
192 | * - set 'pinned = true' to make it task pinned so that if another | |
193 | * cpu pinned event reclaims LBR, the event->oncpu will be set to -1; | |
194 | * - set '.exclude_host = true' to record guest branches behavior; | |
195 | * | |
196 | * - set '.config = INTEL_FIXED_VLBR_EVENT' to indicates host perf | |
197 | * schedule the event without a real HW counter but a fake one; | |
198 | * check is_guest_lbr_event() and __intel_get_event_constraints(); | |
199 | * | |
200 | * - set 'sample_type = PERF_SAMPLE_BRANCH_STACK' and | |
201 | * 'branch_sample_type = PERF_SAMPLE_BRANCH_CALL_STACK | | |
202 | * PERF_SAMPLE_BRANCH_USER' to configure it as a LBR callstack | |
203 | * event, which helps KVM to save/restore guest LBR records | |
204 | * during host context switches and reduces quite a lot overhead, | |
205 | * check branch_user_callstack() and intel_pmu_lbr_sched_task(); | |
206 | */ | |
207 | struct perf_event_attr attr = { | |
208 | .type = PERF_TYPE_RAW, | |
209 | .size = sizeof(attr), | |
210 | .config = INTEL_FIXED_VLBR_EVENT, | |
211 | .sample_type = PERF_SAMPLE_BRANCH_STACK, | |
212 | .pinned = true, | |
213 | .exclude_host = true, | |
214 | .branch_sample_type = PERF_SAMPLE_BRANCH_CALL_STACK | | |
215 | PERF_SAMPLE_BRANCH_USER, | |
216 | }; | |
217 | ||
9aa4f622 LX |
218 | if (unlikely(lbr_desc->event)) { |
219 | __set_bit(INTEL_PMC_IDX_FIXED_VLBR, pmu->pmc_in_use); | |
8e12911b | 220 | return 0; |
9aa4f622 | 221 | } |
8e12911b LX |
222 | |
223 | event = perf_event_create_kernel_counter(&attr, -1, | |
224 | current, NULL, NULL); | |
225 | if (IS_ERR(event)) { | |
226 | pr_debug_ratelimited("%s: failed %ld\n", | |
227 | __func__, PTR_ERR(event)); | |
67b45af9 | 228 | return PTR_ERR(event); |
8e12911b LX |
229 | } |
230 | lbr_desc->event = event; | |
231 | pmu->event_count++; | |
9aa4f622 | 232 | __set_bit(INTEL_PMC_IDX_FIXED_VLBR, pmu->pmc_in_use); |
8e12911b LX |
233 | return 0; |
234 | } | |
235 | ||
1b5ac322 LX |
236 | /* |
237 | * It's safe to access LBR msrs from guest when they have not | |
238 | * been passthrough since the host would help restore or reset | |
239 | * the LBR msrs records when the guest LBR event is scheduled in. | |
240 | */ | |
241 | static bool intel_pmu_handle_lbr_msrs_access(struct kvm_vcpu *vcpu, | |
242 | struct msr_data *msr_info, bool read) | |
243 | { | |
244 | struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu); | |
245 | u32 index = msr_info->index; | |
246 | ||
247 | if (!intel_pmu_is_valid_lbr_msr(vcpu, index)) | |
248 | return false; | |
249 | ||
67b45af9 | 250 | if (!lbr_desc->event && intel_pmu_create_guest_lbr_event(vcpu) < 0) |
1b5ac322 LX |
251 | goto dummy; |
252 | ||
253 | /* | |
254 | * Disable irq to ensure the LBR feature doesn't get reclaimed by the | |
255 | * host at the time the value is read from the msr, and this avoids the | |
256 | * host LBR value to be leaked to the guest. If LBR has been reclaimed, | |
257 | * return 0 on guest reads. | |
258 | */ | |
259 | local_irq_disable(); | |
260 | if (lbr_desc->event->state == PERF_EVENT_STATE_ACTIVE) { | |
261 | if (read) | |
262 | rdmsrl(index, msr_info->data); | |
263 | else | |
264 | wrmsrl(index, msr_info->data); | |
9aa4f622 | 265 | __set_bit(INTEL_PMC_IDX_FIXED_VLBR, vcpu_to_pmu(vcpu)->pmc_in_use); |
1b5ac322 LX |
266 | local_irq_enable(); |
267 | return true; | |
268 | } | |
9aa4f622 | 269 | clear_bit(INTEL_PMC_IDX_FIXED_VLBR, vcpu_to_pmu(vcpu)->pmc_in_use); |
1b5ac322 LX |
270 | local_irq_enable(); |
271 | ||
272 | dummy: | |
273 | if (read) | |
274 | msr_info->data = 0; | |
275 | return true; | |
276 | } | |
277 | ||
cbd71758 | 278 | static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) |
25462f7f WH |
279 | { |
280 | struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); | |
281 | struct kvm_pmc *pmc; | |
cbd71758 | 282 | u32 msr = msr_info->index; |
25462f7f WH |
283 | |
284 | switch (msr) { | |
285 | case MSR_CORE_PERF_FIXED_CTR_CTRL: | |
cbd71758 | 286 | msr_info->data = pmu->fixed_ctr_ctrl; |
8bca8c5c | 287 | break; |
c59a1f10 LX |
288 | case MSR_IA32_PEBS_ENABLE: |
289 | msr_info->data = pmu->pebs_enable; | |
8bca8c5c | 290 | break; |
8183a538 LX |
291 | case MSR_IA32_DS_AREA: |
292 | msr_info->data = pmu->ds_area; | |
8bca8c5c | 293 | break; |
902caeb6 LX |
294 | case MSR_PEBS_DATA_CFG: |
295 | msr_info->data = pmu->pebs_data_cfg; | |
8bca8c5c | 296 | break; |
25462f7f | 297 | default: |
27461da3 LX |
298 | if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) || |
299 | (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) { | |
0e6f467e | 300 | u64 val = pmc_read_counter(pmc); |
cbd71758 WW |
301 | msr_info->data = |
302 | val & pmu->counter_bitmask[KVM_PMC_GP]; | |
8bca8c5c | 303 | break; |
0e6f467e PB |
304 | } else if ((pmc = get_fixed_pmc(pmu, msr))) { |
305 | u64 val = pmc_read_counter(pmc); | |
cbd71758 WW |
306 | msr_info->data = |
307 | val & pmu->counter_bitmask[KVM_PMC_FIXED]; | |
8bca8c5c | 308 | break; |
25462f7f | 309 | } else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) { |
cbd71758 | 310 | msr_info->data = pmc->eventsel; |
8bca8c5c SC |
311 | break; |
312 | } else if (intel_pmu_handle_lbr_msrs_access(vcpu, msr_info, true)) { | |
313 | break; | |
314 | } | |
315 | return 1; | |
25462f7f WH |
316 | } |
317 | ||
8bca8c5c | 318 | return 0; |
25462f7f WH |
319 | } |
320 | ||
321 | static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |
322 | { | |
323 | struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); | |
324 | struct kvm_pmc *pmc; | |
325 | u32 msr = msr_info->index; | |
326 | u64 data = msr_info->data; | |
c0245b77 | 327 | u64 reserved_bits, diff; |
25462f7f WH |
328 | |
329 | switch (msr) { | |
330 | case MSR_CORE_PERF_FIXED_CTR_CTRL: | |
8bca8c5c SC |
331 | if (data & pmu->fixed_ctr_ctrl_mask) |
332 | return 1; | |
333 | ||
334 | if (pmu->fixed_ctr_ctrl != data) | |
25462f7f | 335 | reprogram_fixed_counters(pmu, data); |
25462f7f | 336 | break; |
c59a1f10 | 337 | case MSR_IA32_PEBS_ENABLE: |
8bca8c5c SC |
338 | if (data & pmu->pebs_enable_mask) |
339 | return 1; | |
340 | ||
341 | if (pmu->pebs_enable != data) { | |
cf52de61 | 342 | diff = pmu->pebs_enable ^ data; |
c59a1f10 | 343 | pmu->pebs_enable = data; |
cf52de61 | 344 | reprogram_counters(pmu, diff); |
c59a1f10 LX |
345 | } |
346 | break; | |
8183a538 LX |
347 | case MSR_IA32_DS_AREA: |
348 | if (is_noncanonical_address(data, vcpu)) | |
349 | return 1; | |
8bca8c5c | 350 | |
8183a538 | 351 | pmu->ds_area = data; |
8bca8c5c | 352 | break; |
902caeb6 | 353 | case MSR_PEBS_DATA_CFG: |
8bca8c5c SC |
354 | if (data & pmu->pebs_data_cfg_mask) |
355 | return 1; | |
356 | ||
357 | pmu->pebs_data_cfg = data; | |
902caeb6 | 358 | break; |
25462f7f | 359 | default: |
27461da3 LX |
360 | if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) || |
361 | (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) { | |
362 | if ((msr & MSR_PMC_FULL_WIDTH_BIT) && | |
363 | (data & ~pmu->counter_bitmask[KVM_PMC_GP])) | |
364 | return 1; | |
8bca8c5c | 365 | |
27461da3 LX |
366 | if (!msr_info->host_initiated && |
367 | !(msr & MSR_PMC_FULL_WIDTH_BIT)) | |
4400cf54 | 368 | data = (s64)(s32)data; |
b29a2acd | 369 | pmc_write_counter(pmc, data); |
8bca8c5c | 370 | break; |
2924b521 | 371 | } else if ((pmc = get_fixed_pmc(pmu, msr))) { |
b29a2acd | 372 | pmc_write_counter(pmc, data); |
8bca8c5c | 373 | break; |
25462f7f | 374 | } else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) { |
e644896f LX |
375 | reserved_bits = pmu->reserved_bits; |
376 | if ((pmc->idx == 2) && | |
377 | (pmu->raw_event_mask & HSW_IN_TX_CHECKPOINTED)) | |
378 | reserved_bits ^= HSW_IN_TX_CHECKPOINTED; | |
8bca8c5c SC |
379 | if (data & reserved_bits) |
380 | return 1; | |
381 | ||
382 | if (data != pmc->eventsel) { | |
fb121aaf | 383 | pmc->eventsel = data; |
4fa5843d | 384 | kvm_pmu_request_counter_reprogram(pmc); |
25462f7f | 385 | } |
8bca8c5c SC |
386 | break; |
387 | } else if (intel_pmu_handle_lbr_msrs_access(vcpu, msr_info, false)) { | |
388 | break; | |
389 | } | |
390 | /* Not a known PMU MSR. */ | |
391 | return 1; | |
25462f7f WH |
392 | } |
393 | ||
8bca8c5c | 394 | return 0; |
25462f7f WH |
395 | } |
396 | ||
db9e008a SC |
397 | /* |
398 | * Map fixed counter events to architectural general purpose event encodings. | |
399 | * Perf doesn't provide APIs to allow KVM to directly program a fixed counter, | |
400 | * and so KVM instead programs the architectural event to effectively request | |
401 | * the fixed counter. Perf isn't guaranteed to use a fixed counter and may | |
402 | * instead program the encoding into a general purpose counter, e.g. if a | |
403 | * different perf_event is already utilizing the requested counter, but the end | |
404 | * result is the same (ignoring the fact that using a general purpose counter | |
405 | * will likely exacerbate counter contention). | |
406 | * | |
407 | * Note, reference cycles is counted using a perf-defined "psuedo-encoding", | |
408 | * as there is no architectural general purpose encoding for reference cycles. | |
409 | */ | |
61bb2ad7 | 410 | static u64 intel_get_fixed_pmc_eventsel(int index) |
76187563 | 411 | { |
db9e008a | 412 | const struct { |
61bb2ad7 | 413 | u8 event; |
db9e008a SC |
414 | u8 unit_mask; |
415 | } fixed_pmc_events[] = { | |
416 | [0] = { 0xc0, 0x00 }, /* Instruction Retired / PERF_COUNT_HW_INSTRUCTIONS. */ | |
417 | [1] = { 0x3c, 0x00 }, /* CPU Cycles/ PERF_COUNT_HW_CPU_CYCLES. */ | |
418 | [2] = { 0x00, 0x03 }, /* Reference Cycles / PERF_COUNT_HW_REF_CPU_CYCLES*/ | |
419 | }; | |
76187563 | 420 | |
6d88d0ee SC |
421 | BUILD_BUG_ON(ARRAY_SIZE(fixed_pmc_events) != KVM_PMC_MAX_FIXED); |
422 | ||
61bb2ad7 SC |
423 | return (fixed_pmc_events[index].unit_mask << 8) | |
424 | fixed_pmc_events[index].event; | |
76187563 LX |
425 | } |
426 | ||
25462f7f WH |
427 | static void intel_pmu_refresh(struct kvm_vcpu *vcpu) |
428 | { | |
429 | struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); | |
9c9520ce | 430 | struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu); |
25462f7f WH |
431 | struct kvm_cpuid_entry2 *entry; |
432 | union cpuid10_eax eax; | |
433 | union cpuid10_edx edx; | |
3f7999b9 | 434 | u64 perf_capabilities; |
c49467a4 | 435 | u64 counter_mask; |
2c985527 | 436 | int i; |
25462f7f WH |
437 | |
438 | pmu->nr_arch_gp_counters = 0; | |
439 | pmu->nr_arch_fixed_counters = 0; | |
440 | pmu->counter_bitmask[KVM_PMC_GP] = 0; | |
441 | pmu->counter_bitmask[KVM_PMC_FIXED] = 0; | |
442 | pmu->version = 0; | |
443 | pmu->reserved_bits = 0xffffffff00200000ull; | |
95b065bf | 444 | pmu->raw_event_mask = X86_RAW_EVENT_MASK; |
93255bf9 | 445 | pmu->global_ctrl_mask = ~0ull; |
53550b89 | 446 | pmu->global_status_mask = ~0ull; |
2c985527 | 447 | pmu->fixed_ctr_ctrl_mask = ~0ull; |
c59a1f10 | 448 | pmu->pebs_enable_mask = ~0ull; |
902caeb6 | 449 | pmu->pebs_data_cfg_mask = ~0ull; |
25462f7f | 450 | |
957d0f70 SC |
451 | memset(&lbr_desc->records, 0, sizeof(lbr_desc->records)); |
452 | ||
453 | /* | |
454 | * Setting passthrough of LBR MSRs is done only in the VM-Entry loop, | |
455 | * and PMU refresh is disallowed after the vCPU has run, i.e. this code | |
456 | * should never be reached while KVM is passing through MSRs. | |
457 | */ | |
458 | if (KVM_BUG_ON(lbr_desc->msr_passthrough, vcpu->kvm)) | |
459 | return; | |
460 | ||
277ad7d5 | 461 | entry = kvm_find_cpuid_entry(vcpu, 0xa); |
ba7bb663 | 462 | if (!entry || !vcpu->kvm->arch.enable_pmu) |
25462f7f WH |
463 | return; |
464 | eax.full = entry->eax; | |
465 | edx.full = entry->edx; | |
466 | ||
467 | pmu->version = eax.split.version_id; | |
468 | if (!pmu->version) | |
469 | return; | |
470 | ||
471 | pmu->nr_arch_gp_counters = min_t(int, eax.split.num_counters, | |
968635ab LX |
472 | kvm_pmu_cap.num_counters_gp); |
473 | eax.split.bit_width = min_t(int, eax.split.bit_width, | |
474 | kvm_pmu_cap.bit_width_gp); | |
25462f7f | 475 | pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << eax.split.bit_width) - 1; |
968635ab LX |
476 | eax.split.mask_length = min_t(int, eax.split.mask_length, |
477 | kvm_pmu_cap.events_mask_len); | |
25462f7f WH |
478 | pmu->available_event_types = ~entry->ebx & |
479 | ((1ull << eax.split.mask_length) - 1); | |
480 | ||
481 | if (pmu->version == 1) { | |
482 | pmu->nr_arch_fixed_counters = 0; | |
483 | } else { | |
6d88d0ee SC |
484 | pmu->nr_arch_fixed_counters = min_t(int, edx.split.num_counters_fixed, |
485 | kvm_pmu_cap.num_counters_fixed); | |
968635ab LX |
486 | edx.split.bit_width_fixed = min_t(int, edx.split.bit_width_fixed, |
487 | kvm_pmu_cap.bit_width_fixed); | |
25462f7f WH |
488 | pmu->counter_bitmask[KVM_PMC_FIXED] = |
489 | ((u64)1 << edx.split.bit_width_fixed) - 1; | |
490 | } | |
491 | ||
2c985527 LX |
492 | for (i = 0; i < pmu->nr_arch_fixed_counters; i++) |
493 | pmu->fixed_ctr_ctrl_mask &= ~(0xbull << (i * 4)); | |
c49467a4 LX |
494 | counter_mask = ~(((1ull << pmu->nr_arch_gp_counters) - 1) | |
495 | (((1ull << pmu->nr_arch_fixed_counters) - 1) << INTEL_PMC_IDX_FIXED)); | |
496 | pmu->global_ctrl_mask = counter_mask; | |
53550b89 SC |
497 | |
498 | /* | |
499 | * GLOBAL_STATUS and GLOBAL_OVF_CONTROL (a.k.a. GLOBAL_STATUS_RESET) | |
500 | * share reserved bit definitions. The kernel just happens to use | |
501 | * OVF_CTRL for the names. | |
502 | */ | |
503 | pmu->global_status_mask = pmu->global_ctrl_mask | |
c715eb9f LK |
504 | & ~(MSR_CORE_PERF_GLOBAL_OVF_CTRL_OVF_BUF | |
505 | MSR_CORE_PERF_GLOBAL_OVF_CTRL_COND_CHGD); | |
a1bead2a | 506 | if (vmx_pt_mode_is_host_guest()) |
53550b89 | 507 | pmu->global_status_mask &= |
c715eb9f | 508 | ~MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI; |
25462f7f | 509 | |
277ad7d5 | 510 | entry = kvm_find_cpuid_entry_index(vcpu, 7, 0); |
25462f7f WH |
511 | if (entry && |
512 | (boot_cpu_has(X86_FEATURE_HLE) || boot_cpu_has(X86_FEATURE_RTM)) && | |
e644896f LX |
513 | (entry->ebx & (X86_FEATURE_HLE|X86_FEATURE_RTM))) { |
514 | pmu->reserved_bits ^= HSW_IN_TX; | |
515 | pmu->raw_event_mask |= (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED); | |
516 | } | |
b35e5548 LX |
517 | |
518 | bitmap_set(pmu->all_valid_pmc_idx, | |
519 | 0, pmu->nr_arch_gp_counters); | |
520 | bitmap_set(pmu->all_valid_pmc_idx, | |
521 | INTEL_PMC_MAX_GENERIC, pmu->nr_arch_fixed_counters); | |
03a8871a | 522 | |
6348aafa SC |
523 | perf_capabilities = vcpu_get_perf_capabilities(vcpu); |
524 | if (cpuid_model_is_consistent(vcpu) && | |
525 | (perf_capabilities & PMU_CAP_LBR_FMT)) | |
9c9520ce PB |
526 | x86_perf_get_lbr(&lbr_desc->records); |
527 | else | |
528 | lbr_desc->records.nr = 0; | |
9aa4f622 LX |
529 | |
530 | if (lbr_desc->records.nr) | |
531 | bitmap_set(pmu->all_valid_pmc_idx, INTEL_PMC_IDX_FIXED_VLBR, 1); | |
c59a1f10 | 532 | |
3f7999b9 SC |
533 | if (perf_capabilities & PERF_CAP_PEBS_FORMAT) { |
534 | if (perf_capabilities & PERF_CAP_PEBS_BASELINE) { | |
c49467a4 | 535 | pmu->pebs_enable_mask = counter_mask; |
c59a1f10 LX |
536 | pmu->reserved_bits &= ~ICL_EVENTSEL_ADAPTIVE; |
537 | for (i = 0; i < pmu->nr_arch_fixed_counters; i++) { | |
538 | pmu->fixed_ctr_ctrl_mask &= | |
539 | ~(1ULL << (INTEL_PMC_IDX_FIXED + i * 4)); | |
540 | } | |
902caeb6 | 541 | pmu->pebs_data_cfg_mask = ~0xff00000full; |
c59a1f10 LX |
542 | } else { |
543 | pmu->pebs_enable_mask = | |
544 | ~((1ull << pmu->nr_arch_gp_counters) - 1); | |
545 | } | |
c59a1f10 | 546 | } |
25462f7f WH |
547 | } |
548 | ||
549 | static void intel_pmu_init(struct kvm_vcpu *vcpu) | |
550 | { | |
551 | int i; | |
552 | struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); | |
9c9520ce | 553 | struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu); |
25462f7f | 554 | |
4f1fa2a1 | 555 | for (i = 0; i < KVM_INTEL_PMC_MAX_GENERIC; i++) { |
25462f7f WH |
556 | pmu->gp_counters[i].type = KVM_PMC_GP; |
557 | pmu->gp_counters[i].vcpu = vcpu; | |
558 | pmu->gp_counters[i].idx = i; | |
a6da0d77 | 559 | pmu->gp_counters[i].current_config = 0; |
25462f7f WH |
560 | } |
561 | ||
0144ba0c | 562 | for (i = 0; i < KVM_PMC_MAX_FIXED; i++) { |
25462f7f WH |
563 | pmu->fixed_counters[i].type = KVM_PMC_FIXED; |
564 | pmu->fixed_counters[i].vcpu = vcpu; | |
565 | pmu->fixed_counters[i].idx = i + INTEL_PMC_IDX_FIXED; | |
a6da0d77 | 566 | pmu->fixed_counters[i].current_config = 0; |
61bb2ad7 | 567 | pmu->fixed_counters[i].eventsel = intel_get_fixed_pmc_eventsel(i); |
25462f7f | 568 | } |
a7557539 | 569 | |
9c9520ce | 570 | lbr_desc->records.nr = 0; |
8e12911b | 571 | lbr_desc->event = NULL; |
9254beaa | 572 | lbr_desc->msr_passthrough = false; |
25462f7f WH |
573 | } |
574 | ||
575 | static void intel_pmu_reset(struct kvm_vcpu *vcpu) | |
576 | { | |
8e12911b | 577 | intel_pmu_release_guest_lbr_event(vcpu); |
25462f7f WH |
578 | } |
579 | ||
e6209a3b LX |
580 | /* |
581 | * Emulate LBR_On_PMI behavior for 1 < pmu.version < 4. | |
582 | * | |
583 | * If Freeze_LBR_On_PMI = 1, the LBR is frozen on PMI and | |
584 | * the KVM emulates to clear the LBR bit (bit 0) in IA32_DEBUGCTL. | |
585 | * | |
586 | * Guest needs to re-enable LBR to resume branches recording. | |
587 | */ | |
588 | static void intel_pmu_legacy_freezing_lbrs_on_pmi(struct kvm_vcpu *vcpu) | |
589 | { | |
590 | u64 data = vmcs_read64(GUEST_IA32_DEBUGCTL); | |
591 | ||
592 | if (data & DEBUGCTLMSR_FREEZE_LBRS_ON_PMI) { | |
593 | data &= ~DEBUGCTLMSR_LBR; | |
594 | vmcs_write64(GUEST_IA32_DEBUGCTL, data); | |
595 | } | |
596 | } | |
597 | ||
598 | static void intel_pmu_deliver_pmi(struct kvm_vcpu *vcpu) | |
599 | { | |
600 | u8 version = vcpu_to_pmu(vcpu)->version; | |
601 | ||
602 | if (!intel_pmu_lbr_is_enabled(vcpu)) | |
603 | return; | |
604 | ||
605 | if (version > 1 && version < 4) | |
606 | intel_pmu_legacy_freezing_lbrs_on_pmi(vcpu); | |
607 | } | |
608 | ||
1b5ac322 LX |
609 | static void vmx_update_intercept_for_lbr_msrs(struct kvm_vcpu *vcpu, bool set) |
610 | { | |
611 | struct x86_pmu_lbr *lbr = vcpu_to_lbr_records(vcpu); | |
612 | int i; | |
613 | ||
614 | for (i = 0; i < lbr->nr; i++) { | |
615 | vmx_set_intercept_for_msr(vcpu, lbr->from + i, MSR_TYPE_RW, set); | |
616 | vmx_set_intercept_for_msr(vcpu, lbr->to + i, MSR_TYPE_RW, set); | |
617 | if (lbr->info) | |
618 | vmx_set_intercept_for_msr(vcpu, lbr->info + i, MSR_TYPE_RW, set); | |
619 | } | |
620 | ||
621 | vmx_set_intercept_for_msr(vcpu, MSR_LBR_SELECT, MSR_TYPE_RW, set); | |
622 | vmx_set_intercept_for_msr(vcpu, MSR_LBR_TOS, MSR_TYPE_RW, set); | |
623 | } | |
624 | ||
625 | static inline void vmx_disable_lbr_msrs_passthrough(struct kvm_vcpu *vcpu) | |
626 | { | |
9254beaa LX |
627 | struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu); |
628 | ||
629 | if (!lbr_desc->msr_passthrough) | |
630 | return; | |
631 | ||
1b5ac322 | 632 | vmx_update_intercept_for_lbr_msrs(vcpu, true); |
9254beaa | 633 | lbr_desc->msr_passthrough = false; |
1b5ac322 LX |
634 | } |
635 | ||
636 | static inline void vmx_enable_lbr_msrs_passthrough(struct kvm_vcpu *vcpu) | |
637 | { | |
9254beaa LX |
638 | struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu); |
639 | ||
640 | if (lbr_desc->msr_passthrough) | |
641 | return; | |
642 | ||
1b5ac322 | 643 | vmx_update_intercept_for_lbr_msrs(vcpu, false); |
9254beaa | 644 | lbr_desc->msr_passthrough = true; |
1b5ac322 LX |
645 | } |
646 | ||
647 | /* | |
648 | * Higher priority host perf events (e.g. cpu pinned) could reclaim the | |
649 | * pmu resources (e.g. LBR) that were assigned to the guest. This is | |
650 | * usually done via ipi calls (more details in perf_install_in_context). | |
651 | * | |
652 | * Before entering the non-root mode (with irq disabled here), double | |
653 | * confirm that the pmu features enabled to the guest are not reclaimed | |
654 | * by higher priority host events. Otherwise, disallow vcpu's access to | |
655 | * the reclaimed features. | |
656 | */ | |
657 | void vmx_passthrough_lbr_msrs(struct kvm_vcpu *vcpu) | |
658 | { | |
9aa4f622 | 659 | struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); |
1b5ac322 LX |
660 | struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu); |
661 | ||
662 | if (!lbr_desc->event) { | |
663 | vmx_disable_lbr_msrs_passthrough(vcpu); | |
664 | if (vmcs_read64(GUEST_IA32_DEBUGCTL) & DEBUGCTLMSR_LBR) | |
665 | goto warn; | |
9aa4f622 LX |
666 | if (test_bit(INTEL_PMC_IDX_FIXED_VLBR, pmu->pmc_in_use)) |
667 | goto warn; | |
1b5ac322 LX |
668 | return; |
669 | } | |
670 | ||
671 | if (lbr_desc->event->state < PERF_EVENT_STATE_ACTIVE) { | |
672 | vmx_disable_lbr_msrs_passthrough(vcpu); | |
9aa4f622 | 673 | __clear_bit(INTEL_PMC_IDX_FIXED_VLBR, pmu->pmc_in_use); |
1b5ac322 LX |
674 | goto warn; |
675 | } else | |
676 | vmx_enable_lbr_msrs_passthrough(vcpu); | |
677 | ||
678 | return; | |
679 | ||
680 | warn: | |
8d20bd63 | 681 | pr_warn_ratelimited("vcpu-%d: fail to passthrough LBR.\n", vcpu->vcpu_id); |
1b5ac322 LX |
682 | } |
683 | ||
9aa4f622 LX |
684 | static void intel_pmu_cleanup(struct kvm_vcpu *vcpu) |
685 | { | |
686 | if (!(vmcs_read64(GUEST_IA32_DEBUGCTL) & DEBUGCTLMSR_LBR)) | |
687 | intel_pmu_release_guest_lbr_event(vcpu); | |
688 | } | |
689 | ||
85425032 LX |
690 | void intel_pmu_cross_mapped_check(struct kvm_pmu *pmu) |
691 | { | |
692 | struct kvm_pmc *pmc = NULL; | |
c23981df | 693 | int bit, hw_idx; |
85425032 LX |
694 | |
695 | for_each_set_bit(bit, (unsigned long *)&pmu->global_ctrl, | |
696 | X86_PMC_IDX_MAX) { | |
697 | pmc = intel_pmc_idx_to_pmc(pmu, bit); | |
698 | ||
699 | if (!pmc || !pmc_speculative_in_use(pmc) || | |
13afa29a | 700 | !pmc_is_globally_enabled(pmc) || !pmc->perf_event) |
85425032 LX |
701 | continue; |
702 | ||
c23981df LX |
703 | /* |
704 | * A negative index indicates the event isn't mapped to a | |
705 | * physical counter in the host, e.g. due to contention. | |
706 | */ | |
707 | hw_idx = pmc->perf_event->hw.idx; | |
708 | if (hw_idx != pmc->idx && hw_idx > -1) | |
709 | pmu->host_cross_mapped_mask |= BIT_ULL(hw_idx); | |
85425032 LX |
710 | } |
711 | } | |
712 | ||
34886e79 | 713 | struct kvm_pmu_ops intel_pmu_ops __initdata = { |
25462f7f | 714 | .pmc_idx_to_pmc = intel_pmc_idx_to_pmc, |
98ff80f5 | 715 | .rdpmc_ecx_to_pmc = intel_rdpmc_ecx_to_pmc, |
c900c156 | 716 | .msr_idx_to_pmc = intel_msr_idx_to_pmc, |
98ff80f5 | 717 | .is_valid_rdpmc_ecx = intel_is_valid_rdpmc_ecx, |
25462f7f WH |
718 | .is_valid_msr = intel_is_valid_msr, |
719 | .get_msr = intel_pmu_get_msr, | |
720 | .set_msr = intel_pmu_set_msr, | |
721 | .refresh = intel_pmu_refresh, | |
722 | .init = intel_pmu_init, | |
723 | .reset = intel_pmu_reset, | |
e6209a3b | 724 | .deliver_pmi = intel_pmu_deliver_pmi, |
9aa4f622 | 725 | .cleanup = intel_pmu_cleanup, |
6a5cba7b | 726 | .EVENTSEL_EVENT = ARCH_PERFMON_EVENTSEL_EVENT, |
8911ce66 | 727 | .MAX_NR_GP_COUNTERS = KVM_INTEL_PMC_MAX_GENERIC, |
6a08083f | 728 | .MIN_NR_GP_COUNTERS = 1, |
25462f7f | 729 | }; |