KVM: selftests: Expand PMU counters test to verify LLC events
authorSean Christopherson <seanjc@google.com>
Tue, 9 Jan 2024 23:02:41 +0000 (15:02 -0800)
committerSean Christopherson <seanjc@google.com>
Tue, 30 Jan 2024 23:29:40 +0000 (15:29 -0800)
Expand the PMU counters test to verify that LLC references and misses have
non-zero counts when the code being executed while the LLC event(s) is
active is evicted via CFLUSH{,OPT}.  Note, CLFLUSH{,OPT} requires a fence
of some kind to ensure the cache lines are flushed before execution
continues.  Use MFENCE for simplicity (performance is not a concern).

Suggested-by: Jim Mattson <jmattson@google.com>
Reviewed-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
Tested-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
Link: https://lore.kernel.org/r/20240109230250.424295-22-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
tools/testing/selftests/kvm/x86_64/pmu_counters_test.c

index f5dedd1124714df8f0ffccc9047ae73818361663..4c7133ddcda8d5a1cf2e6cdf50d532fde3f65be9 100644 (file)
@@ -14,9 +14,9 @@
 /*
  * Number of "extra" instructions that will be counted, i.e. the number of
  * instructions that are needed to set up the loop and then disabled the
- * counter.  2 MOV, 2 XOR, 1 WRMSR.
+ * counter.  1 CLFLUSH/CLFLUSHOPT/NOP, 1 MFENCE, 2 MOV, 2 XOR, 1 WRMSR.
  */
-#define NUM_EXTRA_INSNS                5
+#define NUM_EXTRA_INSNS                7
 #define NUM_INSNS_RETIRED      (NUM_BRANCHES + NUM_EXTRA_INSNS)
 
 static uint8_t kvm_pmu_version;
@@ -107,6 +107,12 @@ static void guest_assert_event_count(uint8_t idx,
        case INTEL_ARCH_BRANCHES_RETIRED_INDEX:
                GUEST_ASSERT_EQ(count, NUM_BRANCHES);
                break;
+       case INTEL_ARCH_LLC_REFERENCES_INDEX:
+       case INTEL_ARCH_LLC_MISSES_INDEX:
+               if (!this_cpu_has(X86_FEATURE_CLFLUSHOPT) &&
+                   !this_cpu_has(X86_FEATURE_CLFLUSH))
+                       break;
+               fallthrough;
        case INTEL_ARCH_CPU_CYCLES_INDEX:
        case INTEL_ARCH_REFERENCE_CYCLES_INDEX:
                GUEST_ASSERT_NE(count, 0);
@@ -123,29 +129,44 @@ sanity_checks:
        GUEST_ASSERT_EQ(_rdpmc(pmc), 0xdead);
 }
 
+/*
+ * Enable and disable the PMC in a monolithic asm blob to ensure that the
+ * compiler can't insert _any_ code into the measured sequence.  Note, ECX
+ * doesn't need to be clobbered as the input value, @pmc_msr, is restored
+ * before the end of the sequence.
+ *
+ * If CLFUSH{,OPT} is supported, flush the cacheline containing (at least) the
+ * start of the loop to force LLC references and misses, i.e. to allow testing
+ * that those events actually count.
+ */
+#define GUEST_MEASURE_EVENT(_msr, _value, clflush)                             \
+do {                                                                           \
+       __asm__ __volatile__("wrmsr\n\t"                                        \
+                            clflush "\n\t"                                     \
+                            "mfence\n\t"                                       \
+                            "1: mov $" __stringify(NUM_BRANCHES) ", %%ecx\n\t" \
+                            "loop .\n\t"                                       \
+                            "mov %%edi, %%ecx\n\t"                             \
+                            "xor %%eax, %%eax\n\t"                             \
+                            "xor %%edx, %%edx\n\t"                             \
+                            "wrmsr\n\t"                                        \
+                            :: "a"((uint32_t)_value), "d"(_value >> 32),       \
+                               "c"(_msr), "D"(_msr)                            \
+       );                                                                      \
+} while (0)
+
 static void __guest_test_arch_event(uint8_t idx, struct kvm_x86_pmu_feature event,
                                    uint32_t pmc, uint32_t pmc_msr,
                                    uint32_t ctrl_msr, uint64_t ctrl_msr_value)
 {
        wrmsr(pmc_msr, 0);
 
-       /*
-        * Enable and disable the PMC in a monolithic asm blob to ensure that
-        * the compiler can't insert _any_ code into the measured sequence.
-        * Note, ECX doesn't need to be clobbered as the input value, @pmc_msr,
-        * is restored before the end of the sequence.
-        */
-       __asm__ __volatile__("wrmsr\n\t"
-                            "mov $" __stringify(NUM_BRANCHES) ", %%ecx\n\t"
-                            "loop .\n\t"
-                            "mov %%edi, %%ecx\n\t"
-                            "xor %%eax, %%eax\n\t"
-                            "xor %%edx, %%edx\n\t"
-                            "wrmsr\n\t"
-                            :: "a"((uint32_t)ctrl_msr_value),
-                               "d"(ctrl_msr_value >> 32),
-                               "c"(ctrl_msr), "D"(ctrl_msr)
-                            );
+       if (this_cpu_has(X86_FEATURE_CLFLUSHOPT))
+               GUEST_MEASURE_EVENT(ctrl_msr, ctrl_msr_value, "clflushopt 1f");
+       else if (this_cpu_has(X86_FEATURE_CLFLUSH))
+               GUEST_MEASURE_EVENT(ctrl_msr, ctrl_msr_value, "clflush 1f");
+       else
+               GUEST_MEASURE_EVENT(ctrl_msr, ctrl_msr_value, "nop");
 
        guest_assert_event_count(idx, event, pmc, pmc_msr);
 }