| 1 | // SPDX-License-Identifier: GPL-2.0-only |
| 2 | /* |
| 3 | * Kernel-based Virtual Machine -- Performance Monitoring Unit support |
| 4 | * |
| 5 | * Copyright 2015 Red Hat, Inc. and/or its affiliates. |
| 6 | * |
| 7 | * Authors: |
| 8 | * Avi Kivity <avi@redhat.com> |
| 9 | * Gleb Natapov <gleb@redhat.com> |
| 10 | * Wei Huang <wei@redhat.com> |
| 11 | */ |
| 12 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
| 13 | |
| 14 | #include <linux/types.h> |
| 15 | #include <linux/kvm_host.h> |
| 16 | #include <linux/perf_event.h> |
| 17 | #include <linux/bsearch.h> |
| 18 | #include <linux/sort.h> |
| 19 | #include <asm/perf_event.h> |
| 20 | #include <asm/cpu_device_id.h> |
| 21 | #include "x86.h" |
| 22 | #include "cpuid.h" |
| 23 | #include "lapic.h" |
| 24 | #include "pmu.h" |
| 25 | |
| 26 | /* This is enough to filter the vast majority of currently defined events. */ |
| 27 | #define KVM_PMU_EVENT_FILTER_MAX_EVENTS 300 |
| 28 | |
| 29 | struct x86_pmu_capability __read_mostly kvm_pmu_cap; |
| 30 | EXPORT_SYMBOL_GPL(kvm_pmu_cap); |
| 31 | |
| 32 | /* Precise Distribution of Instructions Retired (PDIR) */ |
| 33 | static const struct x86_cpu_id vmx_pebs_pdir_cpu[] = { |
| 34 | X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, NULL), |
| 35 | X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, NULL), |
| 36 | /* Instruction-Accurate PDIR (PDIR++) */ |
| 37 | X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, NULL), |
| 38 | {} |
| 39 | }; |
| 40 | |
| 41 | /* Precise Distribution (PDist) */ |
| 42 | static const struct x86_cpu_id vmx_pebs_pdist_cpu[] = { |
| 43 | X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, NULL), |
| 44 | {} |
| 45 | }; |
| 46 | |
| 47 | /* NOTE: |
| 48 | * - Each perf counter is defined as "struct kvm_pmc"; |
| 49 | * - There are two types of perf counters: general purpose (gp) and fixed. |
| 50 | * gp counters are stored in gp_counters[] and fixed counters are stored |
| 51 | * in fixed_counters[] respectively. Both of them are part of "struct |
| 52 | * kvm_pmu"; |
| 53 | * - pmu.c understands the difference between gp counters and fixed counters. |
| 54 | * However AMD doesn't support fixed-counters; |
| 55 | * - There are three types of index to access perf counters (PMC): |
| 56 | * 1. MSR (named msr): For example Intel has MSR_IA32_PERFCTRn and AMD |
| 57 | * has MSR_K7_PERFCTRn and, for families 15H and later, |
| 58 | * MSR_F15H_PERF_CTRn, where MSR_F15H_PERF_CTR[0-3] are |
| 59 | * aliased to MSR_K7_PERFCTRn. |
| 60 | * 2. MSR Index (named idx): This normally is used by RDPMC instruction. |
| 61 | * For instance AMD RDPMC instruction uses 0000_0003h in ECX to access |
| 62 | * C001_0007h (MSR_K7_PERCTR3). Intel has a similar mechanism, except |
| 63 | * that it also supports fixed counters. idx can be used to as index to |
| 64 | * gp and fixed counters. |
| 65 | * 3. Global PMC Index (named pmc): pmc is an index specific to PMU |
| 66 | * code. Each pmc, stored in kvm_pmc.idx field, is unique across |
| 67 | * all perf counters (both gp and fixed). The mapping relationship |
| 68 | * between pmc and perf counters is as the following: |
| 69 | * * Intel: [0 .. KVM_INTEL_PMC_MAX_GENERIC-1] <=> gp counters |
| 70 | * [KVM_FIXED_PMC_BASE_IDX .. KVM_FIXED_PMC_BASE_IDX + 2] <=> fixed |
| 71 | * * AMD: [0 .. AMD64_NUM_COUNTERS-1] and, for families 15H |
| 72 | * and later, [0 .. AMD64_NUM_COUNTERS_CORE-1] <=> gp counters |
| 73 | */ |
| 74 | |
| 75 | static struct kvm_pmu_ops kvm_pmu_ops __read_mostly; |
| 76 | |
| 77 | #define KVM_X86_PMU_OP(func) \ |
| 78 | DEFINE_STATIC_CALL_NULL(kvm_x86_pmu_##func, \ |
| 79 | *(((struct kvm_pmu_ops *)0)->func)); |
| 80 | #define KVM_X86_PMU_OP_OPTIONAL KVM_X86_PMU_OP |
| 81 | #include <asm/kvm-x86-pmu-ops.h> |
| 82 | |
| 83 | void kvm_pmu_ops_update(const struct kvm_pmu_ops *pmu_ops) |
| 84 | { |
| 85 | memcpy(&kvm_pmu_ops, pmu_ops, sizeof(kvm_pmu_ops)); |
| 86 | |
| 87 | #define __KVM_X86_PMU_OP(func) \ |
| 88 | static_call_update(kvm_x86_pmu_##func, kvm_pmu_ops.func); |
| 89 | #define KVM_X86_PMU_OP(func) \ |
| 90 | WARN_ON(!kvm_pmu_ops.func); __KVM_X86_PMU_OP(func) |
| 91 | #define KVM_X86_PMU_OP_OPTIONAL __KVM_X86_PMU_OP |
| 92 | #include <asm/kvm-x86-pmu-ops.h> |
| 93 | #undef __KVM_X86_PMU_OP |
| 94 | } |
| 95 | |
| 96 | static inline void __kvm_perf_overflow(struct kvm_pmc *pmc, bool in_pmi) |
| 97 | { |
| 98 | struct kvm_pmu *pmu = pmc_to_pmu(pmc); |
| 99 | bool skip_pmi = false; |
| 100 | |
| 101 | if (pmc->perf_event && pmc->perf_event->attr.precise_ip) { |
| 102 | if (!in_pmi) { |
| 103 | /* |
| 104 | * TODO: KVM is currently _choosing_ to not generate records |
| 105 | * for emulated instructions, avoiding BUFFER_OVF PMI when |
| 106 | * there are no records. Strictly speaking, it should be done |
| 107 | * as well in the right context to improve sampling accuracy. |
| 108 | */ |
| 109 | skip_pmi = true; |
| 110 | } else { |
| 111 | /* Indicate PEBS overflow PMI to guest. */ |
| 112 | skip_pmi = __test_and_set_bit(GLOBAL_STATUS_BUFFER_OVF_BIT, |
| 113 | (unsigned long *)&pmu->global_status); |
| 114 | } |
| 115 | } else { |
| 116 | __set_bit(pmc->idx, (unsigned long *)&pmu->global_status); |
| 117 | } |
| 118 | |
| 119 | if (pmc->intr && !skip_pmi) |
| 120 | kvm_make_request(KVM_REQ_PMI, pmc->vcpu); |
| 121 | } |
| 122 | |
| 123 | static void kvm_perf_overflow(struct perf_event *perf_event, |
| 124 | struct perf_sample_data *data, |
| 125 | struct pt_regs *regs) |
| 126 | { |
| 127 | struct kvm_pmc *pmc = perf_event->overflow_handler_context; |
| 128 | |
| 129 | /* |
| 130 | * Ignore asynchronous overflow events for counters that are scheduled |
| 131 | * to be reprogrammed, e.g. if a PMI for the previous event races with |
| 132 | * KVM's handling of a related guest WRMSR. |
| 133 | */ |
| 134 | if (test_and_set_bit(pmc->idx, pmc_to_pmu(pmc)->reprogram_pmi)) |
| 135 | return; |
| 136 | |
| 137 | __kvm_perf_overflow(pmc, true); |
| 138 | |
| 139 | kvm_make_request(KVM_REQ_PMU, pmc->vcpu); |
| 140 | } |
| 141 | |
| 142 | static u64 pmc_get_pebs_precise_level(struct kvm_pmc *pmc) |
| 143 | { |
| 144 | /* |
| 145 | * For some model specific pebs counters with special capabilities |
| 146 | * (PDIR, PDIR++, PDIST), KVM needs to raise the event precise |
| 147 | * level to the maximum value (currently 3, backwards compatible) |
| 148 | * so that the perf subsystem would assign specific hardware counter |
| 149 | * with that capability for vPMC. |
| 150 | */ |
| 151 | if ((pmc->idx == 0 && x86_match_cpu(vmx_pebs_pdist_cpu)) || |
| 152 | (pmc->idx == 32 && x86_match_cpu(vmx_pebs_pdir_cpu))) |
| 153 | return 3; |
| 154 | |
| 155 | /* |
| 156 | * The non-zero precision level of guest event makes the ordinary |
| 157 | * guest event becomes a guest PEBS event and triggers the host |
| 158 | * PEBS PMI handler to determine whether the PEBS overflow PMI |
| 159 | * comes from the host counters or the guest. |
| 160 | */ |
| 161 | return 1; |
| 162 | } |
| 163 | |
| 164 | static u64 get_sample_period(struct kvm_pmc *pmc, u64 counter_value) |
| 165 | { |
| 166 | u64 sample_period = (-counter_value) & pmc_bitmask(pmc); |
| 167 | |
| 168 | if (!sample_period) |
| 169 | sample_period = pmc_bitmask(pmc) + 1; |
| 170 | return sample_period; |
| 171 | } |
| 172 | |
| 173 | static int pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type, u64 config, |
| 174 | bool exclude_user, bool exclude_kernel, |
| 175 | bool intr) |
| 176 | { |
| 177 | struct kvm_pmu *pmu = pmc_to_pmu(pmc); |
| 178 | struct perf_event *event; |
| 179 | struct perf_event_attr attr = { |
| 180 | .type = type, |
| 181 | .size = sizeof(attr), |
| 182 | .pinned = true, |
| 183 | .exclude_idle = true, |
| 184 | .exclude_host = 1, |
| 185 | .exclude_user = exclude_user, |
| 186 | .exclude_kernel = exclude_kernel, |
| 187 | .config = config, |
| 188 | }; |
| 189 | bool pebs = test_bit(pmc->idx, (unsigned long *)&pmu->pebs_enable); |
| 190 | |
| 191 | attr.sample_period = get_sample_period(pmc, pmc->counter); |
| 192 | |
| 193 | if ((attr.config & HSW_IN_TX_CHECKPOINTED) && |
| 194 | guest_cpuid_is_intel(pmc->vcpu)) { |
| 195 | /* |
| 196 | * HSW_IN_TX_CHECKPOINTED is not supported with nonzero |
| 197 | * period. Just clear the sample period so at least |
| 198 | * allocating the counter doesn't fail. |
| 199 | */ |
| 200 | attr.sample_period = 0; |
| 201 | } |
| 202 | if (pebs) { |
| 203 | /* |
| 204 | * For most PEBS hardware events, the difference in the software |
| 205 | * precision levels of guest and host PEBS events will not affect |
| 206 | * the accuracy of the PEBS profiling result, because the "event IP" |
| 207 | * in the PEBS record is calibrated on the guest side. |
| 208 | */ |
| 209 | attr.precise_ip = pmc_get_pebs_precise_level(pmc); |
| 210 | } |
| 211 | |
| 212 | event = perf_event_create_kernel_counter(&attr, -1, current, |
| 213 | kvm_perf_overflow, pmc); |
| 214 | if (IS_ERR(event)) { |
| 215 | pr_debug_ratelimited("kvm_pmu: event creation failed %ld for pmc->idx = %d\n", |
| 216 | PTR_ERR(event), pmc->idx); |
| 217 | return PTR_ERR(event); |
| 218 | } |
| 219 | |
| 220 | pmc->perf_event = event; |
| 221 | pmc_to_pmu(pmc)->event_count++; |
| 222 | pmc->is_paused = false; |
| 223 | pmc->intr = intr || pebs; |
| 224 | return 0; |
| 225 | } |
| 226 | |
| 227 | static bool pmc_pause_counter(struct kvm_pmc *pmc) |
| 228 | { |
| 229 | u64 counter = pmc->counter; |
| 230 | u64 prev_counter; |
| 231 | |
| 232 | /* update counter, reset event value to avoid redundant accumulation */ |
| 233 | if (pmc->perf_event && !pmc->is_paused) |
| 234 | counter += perf_event_pause(pmc->perf_event, true); |
| 235 | |
| 236 | /* |
| 237 | * Snapshot the previous counter *after* accumulating state from perf. |
| 238 | * If overflow already happened, hardware (via perf) is responsible for |
| 239 | * generating a PMI. KVM just needs to detect overflow on emulated |
| 240 | * counter events that haven't yet been processed. |
| 241 | */ |
| 242 | prev_counter = counter & pmc_bitmask(pmc); |
| 243 | |
| 244 | counter += pmc->emulated_counter; |
| 245 | pmc->counter = counter & pmc_bitmask(pmc); |
| 246 | |
| 247 | pmc->emulated_counter = 0; |
| 248 | pmc->is_paused = true; |
| 249 | |
| 250 | return pmc->counter < prev_counter; |
| 251 | } |
| 252 | |
| 253 | static bool pmc_resume_counter(struct kvm_pmc *pmc) |
| 254 | { |
| 255 | if (!pmc->perf_event) |
| 256 | return false; |
| 257 | |
| 258 | /* recalibrate sample period and check if it's accepted by perf core */ |
| 259 | if (is_sampling_event(pmc->perf_event) && |
| 260 | perf_event_period(pmc->perf_event, |
| 261 | get_sample_period(pmc, pmc->counter))) |
| 262 | return false; |
| 263 | |
| 264 | if (test_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->pebs_enable) != |
| 265 | (!!pmc->perf_event->attr.precise_ip)) |
| 266 | return false; |
| 267 | |
| 268 | /* reuse perf_event to serve as pmc_reprogram_counter() does*/ |
| 269 | perf_event_enable(pmc->perf_event); |
| 270 | pmc->is_paused = false; |
| 271 | |
| 272 | return true; |
| 273 | } |
| 274 | |
| 275 | static void pmc_release_perf_event(struct kvm_pmc *pmc) |
| 276 | { |
| 277 | if (pmc->perf_event) { |
| 278 | perf_event_release_kernel(pmc->perf_event); |
| 279 | pmc->perf_event = NULL; |
| 280 | pmc->current_config = 0; |
| 281 | pmc_to_pmu(pmc)->event_count--; |
| 282 | } |
| 283 | } |
| 284 | |
| 285 | static void pmc_stop_counter(struct kvm_pmc *pmc) |
| 286 | { |
| 287 | if (pmc->perf_event) { |
| 288 | pmc->counter = pmc_read_counter(pmc); |
| 289 | pmc_release_perf_event(pmc); |
| 290 | } |
| 291 | } |
| 292 | |
| 293 | static void pmc_update_sample_period(struct kvm_pmc *pmc) |
| 294 | { |
| 295 | if (!pmc->perf_event || pmc->is_paused || |
| 296 | !is_sampling_event(pmc->perf_event)) |
| 297 | return; |
| 298 | |
| 299 | perf_event_period(pmc->perf_event, |
| 300 | get_sample_period(pmc, pmc->counter)); |
| 301 | } |
| 302 | |
| 303 | void pmc_write_counter(struct kvm_pmc *pmc, u64 val) |
| 304 | { |
| 305 | /* |
| 306 | * Drop any unconsumed accumulated counts, the WRMSR is a write, not a |
| 307 | * read-modify-write. Adjust the counter value so that its value is |
| 308 | * relative to the current count, as reading the current count from |
| 309 | * perf is faster than pausing and repgrogramming the event in order to |
| 310 | * reset it to '0'. Note, this very sneakily offsets the accumulated |
| 311 | * emulated count too, by using pmc_read_counter()! |
| 312 | */ |
| 313 | pmc->emulated_counter = 0; |
| 314 | pmc->counter += val - pmc_read_counter(pmc); |
| 315 | pmc->counter &= pmc_bitmask(pmc); |
| 316 | pmc_update_sample_period(pmc); |
| 317 | } |
| 318 | EXPORT_SYMBOL_GPL(pmc_write_counter); |
| 319 | |
| 320 | static int filter_cmp(const void *pa, const void *pb, u64 mask) |
| 321 | { |
| 322 | u64 a = *(u64 *)pa & mask; |
| 323 | u64 b = *(u64 *)pb & mask; |
| 324 | |
| 325 | return (a > b) - (a < b); |
| 326 | } |
| 327 | |
| 328 | |
| 329 | static int filter_sort_cmp(const void *pa, const void *pb) |
| 330 | { |
| 331 | return filter_cmp(pa, pb, (KVM_PMU_MASKED_ENTRY_EVENT_SELECT | |
| 332 | KVM_PMU_MASKED_ENTRY_EXCLUDE)); |
| 333 | } |
| 334 | |
| 335 | /* |
| 336 | * For the event filter, searching is done on the 'includes' list and |
| 337 | * 'excludes' list separately rather than on the 'events' list (which |
| 338 | * has both). As a result the exclude bit can be ignored. |
| 339 | */ |
| 340 | static int filter_event_cmp(const void *pa, const void *pb) |
| 341 | { |
| 342 | return filter_cmp(pa, pb, (KVM_PMU_MASKED_ENTRY_EVENT_SELECT)); |
| 343 | } |
| 344 | |
| 345 | static int find_filter_index(u64 *events, u64 nevents, u64 key) |
| 346 | { |
| 347 | u64 *fe = bsearch(&key, events, nevents, sizeof(events[0]), |
| 348 | filter_event_cmp); |
| 349 | |
| 350 | if (!fe) |
| 351 | return -1; |
| 352 | |
| 353 | return fe - events; |
| 354 | } |
| 355 | |
| 356 | static bool is_filter_entry_match(u64 filter_event, u64 umask) |
| 357 | { |
| 358 | u64 mask = filter_event >> (KVM_PMU_MASKED_ENTRY_UMASK_MASK_SHIFT - 8); |
| 359 | u64 match = filter_event & KVM_PMU_MASKED_ENTRY_UMASK_MATCH; |
| 360 | |
| 361 | BUILD_BUG_ON((KVM_PMU_ENCODE_MASKED_ENTRY(0, 0xff, 0, false) >> |
| 362 | (KVM_PMU_MASKED_ENTRY_UMASK_MASK_SHIFT - 8)) != |
| 363 | ARCH_PERFMON_EVENTSEL_UMASK); |
| 364 | |
| 365 | return (umask & mask) == match; |
| 366 | } |
| 367 | |
| 368 | static bool filter_contains_match(u64 *events, u64 nevents, u64 eventsel) |
| 369 | { |
| 370 | u64 event_select = eventsel & kvm_pmu_ops.EVENTSEL_EVENT; |
| 371 | u64 umask = eventsel & ARCH_PERFMON_EVENTSEL_UMASK; |
| 372 | int i, index; |
| 373 | |
| 374 | index = find_filter_index(events, nevents, event_select); |
| 375 | if (index < 0) |
| 376 | return false; |
| 377 | |
| 378 | /* |
| 379 | * Entries are sorted by the event select. Walk the list in both |
| 380 | * directions to process all entries with the targeted event select. |
| 381 | */ |
| 382 | for (i = index; i < nevents; i++) { |
| 383 | if (filter_event_cmp(&events[i], &event_select)) |
| 384 | break; |
| 385 | |
| 386 | if (is_filter_entry_match(events[i], umask)) |
| 387 | return true; |
| 388 | } |
| 389 | |
| 390 | for (i = index - 1; i >= 0; i--) { |
| 391 | if (filter_event_cmp(&events[i], &event_select)) |
| 392 | break; |
| 393 | |
| 394 | if (is_filter_entry_match(events[i], umask)) |
| 395 | return true; |
| 396 | } |
| 397 | |
| 398 | return false; |
| 399 | } |
| 400 | |
| 401 | static bool is_gp_event_allowed(struct kvm_x86_pmu_event_filter *f, |
| 402 | u64 eventsel) |
| 403 | { |
| 404 | if (filter_contains_match(f->includes, f->nr_includes, eventsel) && |
| 405 | !filter_contains_match(f->excludes, f->nr_excludes, eventsel)) |
| 406 | return f->action == KVM_PMU_EVENT_ALLOW; |
| 407 | |
| 408 | return f->action == KVM_PMU_EVENT_DENY; |
| 409 | } |
| 410 | |
| 411 | static bool is_fixed_event_allowed(struct kvm_x86_pmu_event_filter *filter, |
| 412 | int idx) |
| 413 | { |
| 414 | int fixed_idx = idx - KVM_FIXED_PMC_BASE_IDX; |
| 415 | |
| 416 | if (filter->action == KVM_PMU_EVENT_DENY && |
| 417 | test_bit(fixed_idx, (ulong *)&filter->fixed_counter_bitmap)) |
| 418 | return false; |
| 419 | if (filter->action == KVM_PMU_EVENT_ALLOW && |
| 420 | !test_bit(fixed_idx, (ulong *)&filter->fixed_counter_bitmap)) |
| 421 | return false; |
| 422 | |
| 423 | return true; |
| 424 | } |
| 425 | |
| 426 | static bool check_pmu_event_filter(struct kvm_pmc *pmc) |
| 427 | { |
| 428 | struct kvm_x86_pmu_event_filter *filter; |
| 429 | struct kvm *kvm = pmc->vcpu->kvm; |
| 430 | |
| 431 | filter = srcu_dereference(kvm->arch.pmu_event_filter, &kvm->srcu); |
| 432 | if (!filter) |
| 433 | return true; |
| 434 | |
| 435 | if (pmc_is_gp(pmc)) |
| 436 | return is_gp_event_allowed(filter, pmc->eventsel); |
| 437 | |
| 438 | return is_fixed_event_allowed(filter, pmc->idx); |
| 439 | } |
| 440 | |
| 441 | static bool pmc_event_is_allowed(struct kvm_pmc *pmc) |
| 442 | { |
| 443 | return pmc_is_globally_enabled(pmc) && pmc_speculative_in_use(pmc) && |
| 444 | check_pmu_event_filter(pmc); |
| 445 | } |
| 446 | |
| 447 | static int reprogram_counter(struct kvm_pmc *pmc) |
| 448 | { |
| 449 | struct kvm_pmu *pmu = pmc_to_pmu(pmc); |
| 450 | u64 eventsel = pmc->eventsel; |
| 451 | u64 new_config = eventsel; |
| 452 | bool emulate_overflow; |
| 453 | u8 fixed_ctr_ctrl; |
| 454 | |
| 455 | emulate_overflow = pmc_pause_counter(pmc); |
| 456 | |
| 457 | if (!pmc_event_is_allowed(pmc)) |
| 458 | return 0; |
| 459 | |
| 460 | if (emulate_overflow) |
| 461 | __kvm_perf_overflow(pmc, false); |
| 462 | |
| 463 | if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL) |
| 464 | printk_once("kvm pmu: pin control bit is ignored\n"); |
| 465 | |
| 466 | if (pmc_is_fixed(pmc)) { |
| 467 | fixed_ctr_ctrl = fixed_ctrl_field(pmu->fixed_ctr_ctrl, |
| 468 | pmc->idx - KVM_FIXED_PMC_BASE_IDX); |
| 469 | if (fixed_ctr_ctrl & 0x1) |
| 470 | eventsel |= ARCH_PERFMON_EVENTSEL_OS; |
| 471 | if (fixed_ctr_ctrl & 0x2) |
| 472 | eventsel |= ARCH_PERFMON_EVENTSEL_USR; |
| 473 | if (fixed_ctr_ctrl & 0x8) |
| 474 | eventsel |= ARCH_PERFMON_EVENTSEL_INT; |
| 475 | new_config = (u64)fixed_ctr_ctrl; |
| 476 | } |
| 477 | |
| 478 | if (pmc->current_config == new_config && pmc_resume_counter(pmc)) |
| 479 | return 0; |
| 480 | |
| 481 | pmc_release_perf_event(pmc); |
| 482 | |
| 483 | pmc->current_config = new_config; |
| 484 | |
| 485 | return pmc_reprogram_counter(pmc, PERF_TYPE_RAW, |
| 486 | (eventsel & pmu->raw_event_mask), |
| 487 | !(eventsel & ARCH_PERFMON_EVENTSEL_USR), |
| 488 | !(eventsel & ARCH_PERFMON_EVENTSEL_OS), |
| 489 | eventsel & ARCH_PERFMON_EVENTSEL_INT); |
| 490 | } |
| 491 | |
| 492 | void kvm_pmu_handle_event(struct kvm_vcpu *vcpu) |
| 493 | { |
| 494 | DECLARE_BITMAP(bitmap, X86_PMC_IDX_MAX); |
| 495 | struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); |
| 496 | struct kvm_pmc *pmc; |
| 497 | int bit; |
| 498 | |
| 499 | bitmap_copy(bitmap, pmu->reprogram_pmi, X86_PMC_IDX_MAX); |
| 500 | |
| 501 | /* |
| 502 | * The reprogramming bitmap can be written asynchronously by something |
| 503 | * other than the task that holds vcpu->mutex, take care to clear only |
| 504 | * the bits that will actually processed. |
| 505 | */ |
| 506 | BUILD_BUG_ON(sizeof(bitmap) != sizeof(atomic64_t)); |
| 507 | atomic64_andnot(*(s64 *)bitmap, &pmu->__reprogram_pmi); |
| 508 | |
| 509 | kvm_for_each_pmc(pmu, pmc, bit, bitmap) { |
| 510 | /* |
| 511 | * If reprogramming fails, e.g. due to contention, re-set the |
| 512 | * regprogram bit set, i.e. opportunistically try again on the |
| 513 | * next PMU refresh. Don't make a new request as doing so can |
| 514 | * stall the guest if reprogramming repeatedly fails. |
| 515 | */ |
| 516 | if (reprogram_counter(pmc)) |
| 517 | set_bit(pmc->idx, pmu->reprogram_pmi); |
| 518 | } |
| 519 | |
| 520 | /* |
| 521 | * Unused perf_events are only released if the corresponding MSRs |
| 522 | * weren't accessed during the last vCPU time slice. kvm_arch_sched_in |
| 523 | * triggers KVM_REQ_PMU if cleanup is needed. |
| 524 | */ |
| 525 | if (unlikely(pmu->need_cleanup)) |
| 526 | kvm_pmu_cleanup(vcpu); |
| 527 | } |
| 528 | |
| 529 | int kvm_pmu_check_rdpmc_early(struct kvm_vcpu *vcpu, unsigned int idx) |
| 530 | { |
| 531 | /* |
| 532 | * On Intel, VMX interception has priority over RDPMC exceptions that |
| 533 | * aren't already handled by the emulator, i.e. there are no additional |
| 534 | * check needed for Intel PMUs. |
| 535 | * |
| 536 | * On AMD, _all_ exceptions on RDPMC have priority over SVM intercepts, |
| 537 | * i.e. an invalid PMC results in a #GP, not #VMEXIT. |
| 538 | */ |
| 539 | if (!kvm_pmu_ops.check_rdpmc_early) |
| 540 | return 0; |
| 541 | |
| 542 | return static_call(kvm_x86_pmu_check_rdpmc_early)(vcpu, idx); |
| 543 | } |
| 544 | |
| 545 | bool is_vmware_backdoor_pmc(u32 pmc_idx) |
| 546 | { |
| 547 | switch (pmc_idx) { |
| 548 | case VMWARE_BACKDOOR_PMC_HOST_TSC: |
| 549 | case VMWARE_BACKDOOR_PMC_REAL_TIME: |
| 550 | case VMWARE_BACKDOOR_PMC_APPARENT_TIME: |
| 551 | return true; |
| 552 | } |
| 553 | return false; |
| 554 | } |
| 555 | |
| 556 | static int kvm_pmu_rdpmc_vmware(struct kvm_vcpu *vcpu, unsigned idx, u64 *data) |
| 557 | { |
| 558 | u64 ctr_val; |
| 559 | |
| 560 | switch (idx) { |
| 561 | case VMWARE_BACKDOOR_PMC_HOST_TSC: |
| 562 | ctr_val = rdtsc(); |
| 563 | break; |
| 564 | case VMWARE_BACKDOOR_PMC_REAL_TIME: |
| 565 | ctr_val = ktime_get_boottime_ns(); |
| 566 | break; |
| 567 | case VMWARE_BACKDOOR_PMC_APPARENT_TIME: |
| 568 | ctr_val = ktime_get_boottime_ns() + |
| 569 | vcpu->kvm->arch.kvmclock_offset; |
| 570 | break; |
| 571 | default: |
| 572 | return 1; |
| 573 | } |
| 574 | |
| 575 | *data = ctr_val; |
| 576 | return 0; |
| 577 | } |
| 578 | |
| 579 | int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data) |
| 580 | { |
| 581 | struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); |
| 582 | struct kvm_pmc *pmc; |
| 583 | u64 mask = ~0ull; |
| 584 | |
| 585 | if (!pmu->version) |
| 586 | return 1; |
| 587 | |
| 588 | if (is_vmware_backdoor_pmc(idx)) |
| 589 | return kvm_pmu_rdpmc_vmware(vcpu, idx, data); |
| 590 | |
| 591 | pmc = static_call(kvm_x86_pmu_rdpmc_ecx_to_pmc)(vcpu, idx, &mask); |
| 592 | if (!pmc) |
| 593 | return 1; |
| 594 | |
| 595 | if (!kvm_is_cr4_bit_set(vcpu, X86_CR4_PCE) && |
| 596 | (static_call(kvm_x86_get_cpl)(vcpu) != 0) && |
| 597 | kvm_is_cr0_bit_set(vcpu, X86_CR0_PE)) |
| 598 | return 1; |
| 599 | |
| 600 | *data = pmc_read_counter(pmc) & mask; |
| 601 | return 0; |
| 602 | } |
| 603 | |
| 604 | void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu) |
| 605 | { |
| 606 | if (lapic_in_kernel(vcpu)) { |
| 607 | static_call_cond(kvm_x86_pmu_deliver_pmi)(vcpu); |
| 608 | kvm_apic_local_deliver(vcpu->arch.apic, APIC_LVTPC); |
| 609 | } |
| 610 | } |
| 611 | |
| 612 | bool kvm_pmu_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr) |
| 613 | { |
| 614 | switch (msr) { |
| 615 | case MSR_CORE_PERF_GLOBAL_STATUS: |
| 616 | case MSR_CORE_PERF_GLOBAL_CTRL: |
| 617 | case MSR_CORE_PERF_GLOBAL_OVF_CTRL: |
| 618 | return kvm_pmu_has_perf_global_ctrl(vcpu_to_pmu(vcpu)); |
| 619 | default: |
| 620 | break; |
| 621 | } |
| 622 | return static_call(kvm_x86_pmu_msr_idx_to_pmc)(vcpu, msr) || |
| 623 | static_call(kvm_x86_pmu_is_valid_msr)(vcpu, msr); |
| 624 | } |
| 625 | |
| 626 | static void kvm_pmu_mark_pmc_in_use(struct kvm_vcpu *vcpu, u32 msr) |
| 627 | { |
| 628 | struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); |
| 629 | struct kvm_pmc *pmc = static_call(kvm_x86_pmu_msr_idx_to_pmc)(vcpu, msr); |
| 630 | |
| 631 | if (pmc) |
| 632 | __set_bit(pmc->idx, pmu->pmc_in_use); |
| 633 | } |
| 634 | |
| 635 | int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) |
| 636 | { |
| 637 | struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); |
| 638 | u32 msr = msr_info->index; |
| 639 | |
| 640 | switch (msr) { |
| 641 | case MSR_CORE_PERF_GLOBAL_STATUS: |
| 642 | case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS: |
| 643 | msr_info->data = pmu->global_status; |
| 644 | break; |
| 645 | case MSR_AMD64_PERF_CNTR_GLOBAL_CTL: |
| 646 | case MSR_CORE_PERF_GLOBAL_CTRL: |
| 647 | msr_info->data = pmu->global_ctrl; |
| 648 | break; |
| 649 | case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR: |
| 650 | case MSR_CORE_PERF_GLOBAL_OVF_CTRL: |
| 651 | msr_info->data = 0; |
| 652 | break; |
| 653 | default: |
| 654 | return static_call(kvm_x86_pmu_get_msr)(vcpu, msr_info); |
| 655 | } |
| 656 | |
| 657 | return 0; |
| 658 | } |
| 659 | |
| 660 | int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) |
| 661 | { |
| 662 | struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); |
| 663 | u32 msr = msr_info->index; |
| 664 | u64 data = msr_info->data; |
| 665 | u64 diff; |
| 666 | |
| 667 | /* |
| 668 | * Note, AMD ignores writes to reserved bits and read-only PMU MSRs, |
| 669 | * whereas Intel generates #GP on attempts to write reserved/RO MSRs. |
| 670 | */ |
| 671 | switch (msr) { |
| 672 | case MSR_CORE_PERF_GLOBAL_STATUS: |
| 673 | if (!msr_info->host_initiated) |
| 674 | return 1; /* RO MSR */ |
| 675 | fallthrough; |
| 676 | case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS: |
| 677 | /* Per PPR, Read-only MSR. Writes are ignored. */ |
| 678 | if (!msr_info->host_initiated) |
| 679 | break; |
| 680 | |
| 681 | if (data & pmu->global_status_mask) |
| 682 | return 1; |
| 683 | |
| 684 | pmu->global_status = data; |
| 685 | break; |
| 686 | case MSR_AMD64_PERF_CNTR_GLOBAL_CTL: |
| 687 | data &= ~pmu->global_ctrl_mask; |
| 688 | fallthrough; |
| 689 | case MSR_CORE_PERF_GLOBAL_CTRL: |
| 690 | if (!kvm_valid_perf_global_ctrl(pmu, data)) |
| 691 | return 1; |
| 692 | |
| 693 | if (pmu->global_ctrl != data) { |
| 694 | diff = pmu->global_ctrl ^ data; |
| 695 | pmu->global_ctrl = data; |
| 696 | reprogram_counters(pmu, diff); |
| 697 | } |
| 698 | break; |
| 699 | case MSR_CORE_PERF_GLOBAL_OVF_CTRL: |
| 700 | /* |
| 701 | * GLOBAL_OVF_CTRL, a.k.a. GLOBAL STATUS_RESET, clears bits in |
| 702 | * GLOBAL_STATUS, and so the set of reserved bits is the same. |
| 703 | */ |
| 704 | if (data & pmu->global_status_mask) |
| 705 | return 1; |
| 706 | fallthrough; |
| 707 | case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR: |
| 708 | if (!msr_info->host_initiated) |
| 709 | pmu->global_status &= ~data; |
| 710 | break; |
| 711 | default: |
| 712 | kvm_pmu_mark_pmc_in_use(vcpu, msr_info->index); |
| 713 | return static_call(kvm_x86_pmu_set_msr)(vcpu, msr_info); |
| 714 | } |
| 715 | |
| 716 | return 0; |
| 717 | } |
| 718 | |
| 719 | static void kvm_pmu_reset(struct kvm_vcpu *vcpu) |
| 720 | { |
| 721 | struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); |
| 722 | struct kvm_pmc *pmc; |
| 723 | int i; |
| 724 | |
| 725 | pmu->need_cleanup = false; |
| 726 | |
| 727 | bitmap_zero(pmu->reprogram_pmi, X86_PMC_IDX_MAX); |
| 728 | |
| 729 | kvm_for_each_pmc(pmu, pmc, i, pmu->all_valid_pmc_idx) { |
| 730 | pmc_stop_counter(pmc); |
| 731 | pmc->counter = 0; |
| 732 | pmc->emulated_counter = 0; |
| 733 | |
| 734 | if (pmc_is_gp(pmc)) |
| 735 | pmc->eventsel = 0; |
| 736 | } |
| 737 | |
| 738 | pmu->fixed_ctr_ctrl = pmu->global_ctrl = pmu->global_status = 0; |
| 739 | |
| 740 | static_call_cond(kvm_x86_pmu_reset)(vcpu); |
| 741 | } |
| 742 | |
| 743 | |
| 744 | /* |
| 745 | * Refresh the PMU configuration for the vCPU, e.g. if userspace changes CPUID |
| 746 | * and/or PERF_CAPABILITIES. |
| 747 | */ |
| 748 | void kvm_pmu_refresh(struct kvm_vcpu *vcpu) |
| 749 | { |
| 750 | struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); |
| 751 | |
| 752 | if (KVM_BUG_ON(kvm_vcpu_has_run(vcpu), vcpu->kvm)) |
| 753 | return; |
| 754 | |
| 755 | /* |
| 756 | * Stop/release all existing counters/events before realizing the new |
| 757 | * vPMU model. |
| 758 | */ |
| 759 | kvm_pmu_reset(vcpu); |
| 760 | |
| 761 | pmu->version = 0; |
| 762 | pmu->nr_arch_gp_counters = 0; |
| 763 | pmu->nr_arch_fixed_counters = 0; |
| 764 | pmu->counter_bitmask[KVM_PMC_GP] = 0; |
| 765 | pmu->counter_bitmask[KVM_PMC_FIXED] = 0; |
| 766 | pmu->reserved_bits = 0xffffffff00200000ull; |
| 767 | pmu->raw_event_mask = X86_RAW_EVENT_MASK; |
| 768 | pmu->global_ctrl_mask = ~0ull; |
| 769 | pmu->global_status_mask = ~0ull; |
| 770 | pmu->fixed_ctr_ctrl_mask = ~0ull; |
| 771 | pmu->pebs_enable_mask = ~0ull; |
| 772 | pmu->pebs_data_cfg_mask = ~0ull; |
| 773 | bitmap_zero(pmu->all_valid_pmc_idx, X86_PMC_IDX_MAX); |
| 774 | |
| 775 | if (vcpu->kvm->arch.enable_pmu) |
| 776 | static_call(kvm_x86_pmu_refresh)(vcpu); |
| 777 | } |
| 778 | |
| 779 | void kvm_pmu_init(struct kvm_vcpu *vcpu) |
| 780 | { |
| 781 | struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); |
| 782 | |
| 783 | memset(pmu, 0, sizeof(*pmu)); |
| 784 | static_call(kvm_x86_pmu_init)(vcpu); |
| 785 | kvm_pmu_refresh(vcpu); |
| 786 | } |
| 787 | |
| 788 | /* Release perf_events for vPMCs that have been unused for a full time slice. */ |
| 789 | void kvm_pmu_cleanup(struct kvm_vcpu *vcpu) |
| 790 | { |
| 791 | struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); |
| 792 | struct kvm_pmc *pmc = NULL; |
| 793 | DECLARE_BITMAP(bitmask, X86_PMC_IDX_MAX); |
| 794 | int i; |
| 795 | |
| 796 | pmu->need_cleanup = false; |
| 797 | |
| 798 | bitmap_andnot(bitmask, pmu->all_valid_pmc_idx, |
| 799 | pmu->pmc_in_use, X86_PMC_IDX_MAX); |
| 800 | |
| 801 | kvm_for_each_pmc(pmu, pmc, i, bitmask) { |
| 802 | if (pmc->perf_event && !pmc_speculative_in_use(pmc)) |
| 803 | pmc_stop_counter(pmc); |
| 804 | } |
| 805 | |
| 806 | static_call_cond(kvm_x86_pmu_cleanup)(vcpu); |
| 807 | |
| 808 | bitmap_zero(pmu->pmc_in_use, X86_PMC_IDX_MAX); |
| 809 | } |
| 810 | |
| 811 | void kvm_pmu_destroy(struct kvm_vcpu *vcpu) |
| 812 | { |
| 813 | kvm_pmu_reset(vcpu); |
| 814 | } |
| 815 | |
| 816 | static void kvm_pmu_incr_counter(struct kvm_pmc *pmc) |
| 817 | { |
| 818 | pmc->emulated_counter++; |
| 819 | kvm_pmu_request_counter_reprogram(pmc); |
| 820 | } |
| 821 | |
| 822 | static inline bool eventsel_match_perf_hw_id(struct kvm_pmc *pmc, |
| 823 | unsigned int perf_hw_id) |
| 824 | { |
| 825 | return !((pmc->eventsel ^ perf_get_hw_event_config(perf_hw_id)) & |
| 826 | AMD64_RAW_EVENT_MASK_NB); |
| 827 | } |
| 828 | |
| 829 | static inline bool cpl_is_matched(struct kvm_pmc *pmc) |
| 830 | { |
| 831 | bool select_os, select_user; |
| 832 | u64 config; |
| 833 | |
| 834 | if (pmc_is_gp(pmc)) { |
| 835 | config = pmc->eventsel; |
| 836 | select_os = config & ARCH_PERFMON_EVENTSEL_OS; |
| 837 | select_user = config & ARCH_PERFMON_EVENTSEL_USR; |
| 838 | } else { |
| 839 | config = fixed_ctrl_field(pmc_to_pmu(pmc)->fixed_ctr_ctrl, |
| 840 | pmc->idx - KVM_FIXED_PMC_BASE_IDX); |
| 841 | select_os = config & 0x1; |
| 842 | select_user = config & 0x2; |
| 843 | } |
| 844 | |
| 845 | return (static_call(kvm_x86_get_cpl)(pmc->vcpu) == 0) ? select_os : select_user; |
| 846 | } |
| 847 | |
| 848 | void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 perf_hw_id) |
| 849 | { |
| 850 | struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); |
| 851 | struct kvm_pmc *pmc; |
| 852 | int i; |
| 853 | |
| 854 | kvm_for_each_pmc(pmu, pmc, i, pmu->all_valid_pmc_idx) { |
| 855 | if (!pmc_event_is_allowed(pmc)) |
| 856 | continue; |
| 857 | |
| 858 | /* Ignore checks for edge detect, pin control, invert and CMASK bits */ |
| 859 | if (eventsel_match_perf_hw_id(pmc, perf_hw_id) && cpl_is_matched(pmc)) |
| 860 | kvm_pmu_incr_counter(pmc); |
| 861 | } |
| 862 | } |
| 863 | EXPORT_SYMBOL_GPL(kvm_pmu_trigger_event); |
| 864 | |
| 865 | static bool is_masked_filter_valid(const struct kvm_x86_pmu_event_filter *filter) |
| 866 | { |
| 867 | u64 mask = kvm_pmu_ops.EVENTSEL_EVENT | |
| 868 | KVM_PMU_MASKED_ENTRY_UMASK_MASK | |
| 869 | KVM_PMU_MASKED_ENTRY_UMASK_MATCH | |
| 870 | KVM_PMU_MASKED_ENTRY_EXCLUDE; |
| 871 | int i; |
| 872 | |
| 873 | for (i = 0; i < filter->nevents; i++) { |
| 874 | if (filter->events[i] & ~mask) |
| 875 | return false; |
| 876 | } |
| 877 | |
| 878 | return true; |
| 879 | } |
| 880 | |
| 881 | static void convert_to_masked_filter(struct kvm_x86_pmu_event_filter *filter) |
| 882 | { |
| 883 | int i, j; |
| 884 | |
| 885 | for (i = 0, j = 0; i < filter->nevents; i++) { |
| 886 | /* |
| 887 | * Skip events that are impossible to match against a guest |
| 888 | * event. When filtering, only the event select + unit mask |
| 889 | * of the guest event is used. To maintain backwards |
| 890 | * compatibility, impossible filters can't be rejected :-( |
| 891 | */ |
| 892 | if (filter->events[i] & ~(kvm_pmu_ops.EVENTSEL_EVENT | |
| 893 | ARCH_PERFMON_EVENTSEL_UMASK)) |
| 894 | continue; |
| 895 | /* |
| 896 | * Convert userspace events to a common in-kernel event so |
| 897 | * only one code path is needed to support both events. For |
| 898 | * the in-kernel events use masked events because they are |
| 899 | * flexible enough to handle both cases. To convert to masked |
| 900 | * events all that's needed is to add an "all ones" umask_mask, |
| 901 | * (unmasked filter events don't support EXCLUDE). |
| 902 | */ |
| 903 | filter->events[j++] = filter->events[i] | |
| 904 | (0xFFULL << KVM_PMU_MASKED_ENTRY_UMASK_MASK_SHIFT); |
| 905 | } |
| 906 | |
| 907 | filter->nevents = j; |
| 908 | } |
| 909 | |
| 910 | static int prepare_filter_lists(struct kvm_x86_pmu_event_filter *filter) |
| 911 | { |
| 912 | int i; |
| 913 | |
| 914 | if (!(filter->flags & KVM_PMU_EVENT_FLAG_MASKED_EVENTS)) |
| 915 | convert_to_masked_filter(filter); |
| 916 | else if (!is_masked_filter_valid(filter)) |
| 917 | return -EINVAL; |
| 918 | |
| 919 | /* |
| 920 | * Sort entries by event select and includes vs. excludes so that all |
| 921 | * entries for a given event select can be processed efficiently during |
| 922 | * filtering. The EXCLUDE flag uses a more significant bit than the |
| 923 | * event select, and so the sorted list is also effectively split into |
| 924 | * includes and excludes sub-lists. |
| 925 | */ |
| 926 | sort(&filter->events, filter->nevents, sizeof(filter->events[0]), |
| 927 | filter_sort_cmp, NULL); |
| 928 | |
| 929 | i = filter->nevents; |
| 930 | /* Find the first EXCLUDE event (only supported for masked events). */ |
| 931 | if (filter->flags & KVM_PMU_EVENT_FLAG_MASKED_EVENTS) { |
| 932 | for (i = 0; i < filter->nevents; i++) { |
| 933 | if (filter->events[i] & KVM_PMU_MASKED_ENTRY_EXCLUDE) |
| 934 | break; |
| 935 | } |
| 936 | } |
| 937 | |
| 938 | filter->nr_includes = i; |
| 939 | filter->nr_excludes = filter->nevents - filter->nr_includes; |
| 940 | filter->includes = filter->events; |
| 941 | filter->excludes = filter->events + filter->nr_includes; |
| 942 | |
| 943 | return 0; |
| 944 | } |
| 945 | |
| 946 | int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp) |
| 947 | { |
| 948 | struct kvm_pmu_event_filter __user *user_filter = argp; |
| 949 | struct kvm_x86_pmu_event_filter *filter; |
| 950 | struct kvm_pmu_event_filter tmp; |
| 951 | struct kvm_vcpu *vcpu; |
| 952 | unsigned long i; |
| 953 | size_t size; |
| 954 | int r; |
| 955 | |
| 956 | if (copy_from_user(&tmp, user_filter, sizeof(tmp))) |
| 957 | return -EFAULT; |
| 958 | |
| 959 | if (tmp.action != KVM_PMU_EVENT_ALLOW && |
| 960 | tmp.action != KVM_PMU_EVENT_DENY) |
| 961 | return -EINVAL; |
| 962 | |
| 963 | if (tmp.flags & ~KVM_PMU_EVENT_FLAGS_VALID_MASK) |
| 964 | return -EINVAL; |
| 965 | |
| 966 | if (tmp.nevents > KVM_PMU_EVENT_FILTER_MAX_EVENTS) |
| 967 | return -E2BIG; |
| 968 | |
| 969 | size = struct_size(filter, events, tmp.nevents); |
| 970 | filter = kzalloc(size, GFP_KERNEL_ACCOUNT); |
| 971 | if (!filter) |
| 972 | return -ENOMEM; |
| 973 | |
| 974 | filter->action = tmp.action; |
| 975 | filter->nevents = tmp.nevents; |
| 976 | filter->fixed_counter_bitmap = tmp.fixed_counter_bitmap; |
| 977 | filter->flags = tmp.flags; |
| 978 | |
| 979 | r = -EFAULT; |
| 980 | if (copy_from_user(filter->events, user_filter->events, |
| 981 | sizeof(filter->events[0]) * filter->nevents)) |
| 982 | goto cleanup; |
| 983 | |
| 984 | r = prepare_filter_lists(filter); |
| 985 | if (r) |
| 986 | goto cleanup; |
| 987 | |
| 988 | mutex_lock(&kvm->lock); |
| 989 | filter = rcu_replace_pointer(kvm->arch.pmu_event_filter, filter, |
| 990 | mutex_is_locked(&kvm->lock)); |
| 991 | mutex_unlock(&kvm->lock); |
| 992 | synchronize_srcu_expedited(&kvm->srcu); |
| 993 | |
| 994 | BUILD_BUG_ON(sizeof(((struct kvm_pmu *)0)->reprogram_pmi) > |
| 995 | sizeof(((struct kvm_pmu *)0)->__reprogram_pmi)); |
| 996 | |
| 997 | kvm_for_each_vcpu(i, vcpu, kvm) |
| 998 | atomic64_set(&vcpu_to_pmu(vcpu)->__reprogram_pmi, -1ull); |
| 999 | |
| 1000 | kvm_make_all_cpus_request(kvm, KVM_REQ_PMU); |
| 1001 | |
| 1002 | r = 0; |
| 1003 | cleanup: |
| 1004 | kfree(filter); |
| 1005 | return r; |
| 1006 | } |