KVM: x86/vPMU: Reuse perf_event to avoid unnecessary pmc_reprogram_counter
authorLike Xu <like.xu@linux.intel.com>
Sun, 27 Oct 2019 10:52:42 +0000 (18:52 +0800)
committerPaolo Bonzini <pbonzini@redhat.com>
Fri, 15 Nov 2019 10:44:09 +0000 (11:44 +0100)
The perf_event_create_kernel_counter() in the pmc_reprogram_counter() is
a heavyweight and high-frequency operation, especially when host disables
the watchdog (maximum 21000000 ns) which leads to an unacceptable latency
of the guest NMI handler. It limits the use of vPMUs in the guest.

When a vPMC is fully enabled, the legacy reprogram_*_counter() would stop
and release its existing perf_event (if any) every time EVEN in most cases
almost the same requested perf_event will be created and configured again.

For each vPMC, if the reuqested config ('u64 eventsel' for gp and 'u8 ctrl'
for fixed) is the same as its current config AND a new sample period based
on pmc->counter is accepted by host perf interface, the current event could
be reused safely as a new created one does. Otherwise, do release the
undesirable perf_event and reprogram a new one as usual.

It's light-weight to call pmc_pause_counter (disable, read and reset event)
and pmc_resume_counter (recalibrate period and re-enable event) as guest
expects instead of release-and-create again on any condition. Compared to
use the filterable event->attr or hw.config, a new 'u64 current_config'
field is added to save the last original programed config for each vPMC.

Based on this implementation, the number of calls to pmc_reprogram_counter
is reduced by ~82.5% for a gp sampling event and ~99.9% for a fixed event.
In the usage of multiplexing perf sampling mode, the average latency of the
guest NMI handler is reduced from 104923 ns to 48393 ns (~2.16x speed up).
If host disables watchdog, the minimum latecy of guest NMI handler could be
speed up at ~3413x (from 20407603 to 5979 ns) and at ~786x in the average.

Suggested-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Like Xu <like.xu@linux.intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
arch/x86/include/asm/kvm_host.h
arch/x86/kvm/pmu.c
arch/x86/kvm/pmu.h
arch/x86/kvm/pmu_amd.c
arch/x86/kvm/vmx/pmu_intel.c

index 6f6b8886a8ebca9e8a53de12a51fb786fa0eea56..a87a6c98adeeef6a41f0af88d9970375d41f5601 100644 (file)
@@ -449,6 +449,11 @@ struct kvm_pmc {
        u64 eventsel;
        struct perf_event *perf_event;
        struct kvm_vcpu *vcpu;
+       /*
+        * eventsel value for general purpose counters,
+        * ctrl value for fixed counters.
+        */
+       u64 current_config;
 };
 
 struct kvm_pmu {
index 472b69b3b6c3782fd17ad3cc46d0a3d388ec4d7d..99565de5410a4ffbfd49a4d75b349ff12049f1f4 100644 (file)
@@ -138,6 +138,35 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
        clear_bit(pmc->idx, pmc_to_pmu(pmc)->reprogram_pmi);
 }
 
+static void pmc_pause_counter(struct kvm_pmc *pmc)
+{
+       u64 counter = pmc->counter;
+
+       if (!pmc->perf_event)
+               return;
+
+       /* update counter, reset event value to avoid redundant accumulation */
+       counter += perf_event_pause(pmc->perf_event, true);
+       pmc->counter = counter & pmc_bitmask(pmc);
+}
+
+static bool pmc_resume_counter(struct kvm_pmc *pmc)
+{
+       if (!pmc->perf_event)
+               return false;
+
+       /* recalibrate sample period and check if it's accepted by perf core */
+       if (perf_event_period(pmc->perf_event,
+                       (-pmc->counter) & pmc_bitmask(pmc)))
+               return false;
+
+       /* reuse perf_event to serve as pmc_reprogram_counter() does*/
+       perf_event_enable(pmc->perf_event);
+
+       clear_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->reprogram_pmi);
+       return true;
+}
+
 void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
 {
        unsigned config, type = PERF_TYPE_RAW;
@@ -152,7 +181,7 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
 
        pmc->eventsel = eventsel;
 
-       pmc_stop_counter(pmc);
+       pmc_pause_counter(pmc);
 
        if (!(eventsel & ARCH_PERFMON_EVENTSEL_ENABLE) || !pmc_is_enabled(pmc))
                return;
@@ -191,6 +220,12 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
        if (type == PERF_TYPE_RAW)
                config = eventsel & X86_RAW_EVENT_MASK;
 
+       if (pmc->current_config == eventsel && pmc_resume_counter(pmc))
+               return;
+
+       pmc_release_perf_event(pmc);
+
+       pmc->current_config = eventsel;
        pmc_reprogram_counter(pmc, type, config,
                              !(eventsel & ARCH_PERFMON_EVENTSEL_USR),
                              !(eventsel & ARCH_PERFMON_EVENTSEL_OS),
@@ -207,7 +242,7 @@ void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int idx)
        struct kvm_pmu_event_filter *filter;
        struct kvm *kvm = pmc->vcpu->kvm;
 
-       pmc_stop_counter(pmc);
+       pmc_pause_counter(pmc);
 
        if (!en_field || !pmc_is_enabled(pmc))
                return;
@@ -222,6 +257,12 @@ void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int idx)
                        return;
        }
 
+       if (pmc->current_config == (u64)ctrl && pmc_resume_counter(pmc))
+               return;
+
+       pmc_release_perf_event(pmc);
+
+       pmc->current_config = (u64)ctrl;
        pmc_reprogram_counter(pmc, PERF_TYPE_HARDWARE,
                              kvm_x86_ops->pmu_ops->find_fixed_event(idx),
                              !(en_field & 0x2), /* exclude user */
index b253dd5e56cfa151c1451734e28ab7f110a8ff34..7eba298587dc51377e478ad53c16e6e1e8b3f1ad 100644 (file)
@@ -56,12 +56,20 @@ static inline u64 pmc_read_counter(struct kvm_pmc *pmc)
        return counter & pmc_bitmask(pmc);
 }
 
-static inline void pmc_stop_counter(struct kvm_pmc *pmc)
+static inline void pmc_release_perf_event(struct kvm_pmc *pmc)
 {
        if (pmc->perf_event) {
-               pmc->counter = pmc_read_counter(pmc);
                perf_event_release_kernel(pmc->perf_event);
                pmc->perf_event = NULL;
+               pmc->current_config = 0;
+       }
+}
+
+static inline void pmc_stop_counter(struct kvm_pmc *pmc)
+{
+       if (pmc->perf_event) {
+               pmc->counter = pmc_read_counter(pmc);
+               pmc_release_perf_event(pmc);
        }
 }
 
index e8609ccd0b627738e525ef413051cbeabcb640dd..e87d34136047f6b64a53fc0b9a4a1f41c457cd16 100644 (file)
@@ -292,6 +292,7 @@ static void amd_pmu_init(struct kvm_vcpu *vcpu)
                pmu->gp_counters[i].type = KVM_PMC_GP;
                pmu->gp_counters[i].vcpu = vcpu;
                pmu->gp_counters[i].idx = i;
+               pmu->gp_counters[i].current_config = 0;
        }
 }
 
index dcde142327ca37510a07a20997482e4ef798ca46..9b1ddc42f604c8e17b50eede090d782738b1375c 100644 (file)
@@ -340,12 +340,14 @@ static void intel_pmu_init(struct kvm_vcpu *vcpu)
                pmu->gp_counters[i].type = KVM_PMC_GP;
                pmu->gp_counters[i].vcpu = vcpu;
                pmu->gp_counters[i].idx = i;
+               pmu->gp_counters[i].current_config = 0;
        }
 
        for (i = 0; i < INTEL_PMC_MAX_FIXED; i++) {
                pmu->fixed_counters[i].type = KVM_PMC_FIXED;
                pmu->fixed_counters[i].vcpu = vcpu;
                pmu->fixed_counters[i].idx = i + INTEL_PMC_IDX_FIXED;
+               pmu->fixed_counters[i].current_config = 0;
        }
 }