KVM: x86: Virtualize FLUSH_L1D and passthrough MSR_IA32_FLUSH_CMD
authorSean Christopherson <seanjc@google.com>
Wed, 22 Mar 2023 01:14:39 +0000 (18:14 -0700)
committerPaolo Bonzini <pbonzini@redhat.com>
Thu, 6 Apr 2023 17:37:37 +0000 (13:37 -0400)
Virtualize FLUSH_L1D so that the guest can use the performant L1D flush
if one of the many mitigations might require a flush in the guest, e.g.
Linux provides an option to flush the L1D when switching mms.

Passthrough MSR_IA32_FLUSH_CMD for write when it's supported in hardware
and exposed to the guest, i.e. always let the guest write it directly if
FLUSH_L1D is fully supported.

Forward writes to hardware in host context on the off chance that KVM
ends up emulating a WRMSR, or in the really unlikely scenario where
userspace wants to force a flush.  Restrict these forwarded WRMSRs to
the known command out of an abundance of caution.  Passing through the
MSR means the guest can throw any and all values at hardware, but doing
so in host context is arguably a bit more dangerous.

Link: https://lkml.kernel.org/r/CALMp9eTt3xzAEoQ038bJQ9LN0ZOXrSWsN7xnNUD%2B0SS%3DWwF7Pg%40mail.gmail.com
Link: https://lore.kernel.org/all/20230201132905.549148-2-eesposit@redhat.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20230322011440.2195485-6-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
arch/x86/kvm/cpuid.c
arch/x86/kvm/svm/svm.c
arch/x86/kvm/vmx/nested.c
arch/x86/kvm/vmx/vmx.c
arch/x86/kvm/vmx/vmx.h
arch/x86/kvm/x86.c

index 599aebec2d52c75313c0e4c63a9bf49ffe51a386..9583a110cf5f26b0b3fed60ca7a5321f9ad53d14 100644 (file)
@@ -653,7 +653,7 @@ void kvm_set_cpu_caps(void)
                F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) |
                F(MD_CLEAR) | F(AVX512_VP2INTERSECT) | F(FSRM) |
                F(SERIALIZE) | F(TSXLDTRK) | F(AVX512_FP16) |
-               F(AMX_TILE) | F(AMX_INT8) | F(AMX_BF16)
+               F(AMX_TILE) | F(AMX_INT8) | F(AMX_BF16) | F(FLUSH_L1D)
        );
 
        /* TSC_ADJUST and ARCH_CAPABILITIES are emulated in software. */
index 85bb535fc3213e9b9d4ce6c1434781f965318043..b32edaf5a74b741a69a5e972e76dbba4923481d0 100644 (file)
@@ -95,6 +95,7 @@ static const struct svm_direct_access_msrs {
 #endif
        { .index = MSR_IA32_SPEC_CTRL,                  .always = false },
        { .index = MSR_IA32_PRED_CMD,                   .always = false },
+       { .index = MSR_IA32_FLUSH_CMD,                  .always = false },
        { .index = MSR_IA32_LASTBRANCHFROMIP,           .always = false },
        { .index = MSR_IA32_LASTBRANCHTOIP,             .always = false },
        { .index = MSR_IA32_LASTINTFROMIP,              .always = false },
@@ -4140,6 +4141,10 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
                set_msr_interception(vcpu, svm->msrpm, MSR_IA32_PRED_CMD, 0,
                                     !!guest_has_pred_cmd_msr(vcpu));
 
+       if (boot_cpu_has(X86_FEATURE_FLUSH_L1D))
+               set_msr_interception(vcpu, svm->msrpm, MSR_IA32_FLUSH_CMD, 0,
+                                    !!guest_cpuid_has(vcpu, X86_FEATURE_FLUSH_L1D));
+
        /* For sev guests, the memory encryption bit is not reserved in CR3.  */
        if (sev_guest(vcpu->kvm)) {
                best = kvm_find_cpuid_entry(vcpu, 0x8000001F);
index 1bc2b80273c97f92947c4ffd649ceb2426fb4fa5..f63b28f46a713313d911f643ea0dba342cf35240 100644 (file)
@@ -654,6 +654,9 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
        nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
                                         MSR_IA32_PRED_CMD, MSR_TYPE_W);
 
+       nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
+                                        MSR_IA32_FLUSH_CMD, MSR_TYPE_W);
+
        kvm_vcpu_unmap(vcpu, &vmx->nested.msr_bitmap_map, false);
 
        vmx->nested.force_msr_bitmap_recalc = false;
index 29807be219b971de9df09a3ff98e398a7d5b9df9..56e0c7ae961d37005a6d1bbaef4cec1ad373014c 100644 (file)
@@ -164,6 +164,7 @@ module_param(allow_smaller_maxphyaddr, bool, S_IRUGO);
 static u32 vmx_possible_passthrough_msrs[MAX_POSSIBLE_PASSTHROUGH_MSRS] = {
        MSR_IA32_SPEC_CTRL,
        MSR_IA32_PRED_CMD,
+       MSR_IA32_FLUSH_CMD,
        MSR_IA32_TSC,
 #ifdef CONFIG_X86_64
        MSR_FS_BASE,
@@ -7720,6 +7721,10 @@ static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
                vmx_set_intercept_for_msr(vcpu, MSR_IA32_PRED_CMD, MSR_TYPE_W,
                                          !guest_has_pred_cmd_msr(vcpu));
 
+       if (boot_cpu_has(X86_FEATURE_FLUSH_L1D))
+               vmx_set_intercept_for_msr(vcpu, MSR_IA32_FLUSH_CMD, MSR_TYPE_W,
+                                         !guest_cpuid_has(vcpu, X86_FEATURE_FLUSH_L1D));
+
        set_cr4_guest_host_mask(vmx);
 
        vmx_write_encls_bitmap(vcpu, NULL);
index 2acdc54bc34b18bd71d3020b837617c7ad488dd9..cb766f65a3ebcab90271e2da5751e5c18be5f01e 100644 (file)
@@ -369,7 +369,7 @@ struct vcpu_vmx {
        struct lbr_desc lbr_desc;
 
        /* Save desired MSR intercept (read: pass-through) state */
-#define MAX_POSSIBLE_PASSTHROUGH_MSRS  15
+#define MAX_POSSIBLE_PASSTHROUGH_MSRS  16
        struct {
                DECLARE_BITMAP(read, MAX_POSSIBLE_PASSTHROUGH_MSRS);
                DECLARE_BITMAP(write, MAX_POSSIBLE_PASSTHROUGH_MSRS);
index c83ec88da0434162785d4a8eb39863f61ff48b05..3c58dbae7b4c71c203838de116e77dd9b9e4eef4 100644 (file)
@@ -3628,6 +3628,18 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 
                wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
                break;
+       case MSR_IA32_FLUSH_CMD:
+               if (!msr_info->host_initiated &&
+                   !guest_cpuid_has(vcpu, X86_FEATURE_FLUSH_L1D))
+                       return 1;
+
+               if (!boot_cpu_has(X86_FEATURE_FLUSH_L1D) || (data & ~L1D_FLUSH))
+                       return 1;
+               if (!data)
+                       break;
+
+               wrmsrl(MSR_IA32_FLUSH_CMD, L1D_FLUSH);
+               break;
        case MSR_EFER:
                return set_efer(vcpu, msr_info);
        case MSR_K7_HWCR: