KVM: x86: Quirk initialization of feature MSRs to KVM's max configuration
authorSean Christopherson <seanjc@google.com>
Fri, 2 Aug 2024 18:55:05 +0000 (11:55 -0700)
committerSean Christopherson <seanjc@google.com>
Fri, 1 Nov 2024 16:22:31 +0000 (09:22 -0700)
Add a quirk to control KVM's misguided initialization of select feature
MSRs to KVM's max configuration, as enabling features by default violates
KVM's approach of letting userspace own the vCPU model, and is actively
problematic for MSRs that are conditionally supported, as the vCPU will
end up with an MSR value that userspace can't restore.  E.g. if the vCPU
is configured with PDCM=0, userspace will save and attempt to restore a
non-zero PERF_CAPABILITIES, thanks to KVM's meddling.

Link: https://lore.kernel.org/r/20240802185511.305849-4-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
Documentation/virt/kvm/api.rst
arch/x86/include/asm/kvm_host.h
arch/x86/include/uapi/asm/kvm.h
arch/x86/kvm/svm/svm.c
arch/x86/kvm/vmx/vmx.c
arch/x86/kvm/x86.c

index edc070c6e19b210f11755d9e675a697eb5eab49c..061ec93d9ecb77d7152e7e8987977c4085e71bb2 100644 (file)
@@ -8107,6 +8107,28 @@ KVM_X86_QUIRK_SLOT_ZAP_ALL          By default, for KVM_X86_DEFAULT_VM VMs, KVM
                                     or moved memslot isn't reachable, i.e KVM
                                     _may_ invalidate only SPTEs related to the
                                     memslot.
+
+KVM_X86_QUIRK_STUFF_FEATURE_MSRS    By default, at vCPU creation, KVM sets the
+                                    vCPU's MSR_IA32_PERF_CAPABILITIES (0x345),
+                                    MSR_IA32_ARCH_CAPABILITIES (0x10a),
+                                    MSR_PLATFORM_INFO (0xce), and all VMX MSRs
+                                    (0x480..0x492) to the maximal capabilities
+                                    supported by KVM.  KVM also sets
+                                    MSR_IA32_UCODE_REV (0x8b) to an arbitrary
+                                    value (which is different for Intel vs.
+                                    AMD).  Lastly, when guest CPUID is set (by
+                                    userspace), KVM modifies select VMX MSR
+                                    fields to force consistency between guest
+                                    CPUID and L2's effective ISA.  When this
+                                    quirk is disabled, KVM zeroes the vCPU's MSR
+                                    values (with two exceptions, see below),
+                                    i.e. treats the feature MSRs like CPUID
+                                    leaves and gives userspace full control of
+                                    the vCPU model definition.  This quirk does
+                                    not affect VMX MSRs CR0/CR4_FIXED1 (0x487
+                                    and 0x489), as KVM does now allow them to
+                                    be set by userspace (KVM sets them based on
+                                    guest CPUID, for safety purposes).
 =================================== ============================================
 
 7.32 KVM_CAP_MAX_VCPU_ID
index 3ae90df0a17725830392cff59c0ead39093e94ca..dbe0ea509642c7204493efd7b9151e05d4666ca4 100644 (file)
@@ -2360,7 +2360,8 @@ int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages);
         KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT |   \
         KVM_X86_QUIRK_FIX_HYPERCALL_INSN |     \
         KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS |  \
-        KVM_X86_QUIRK_SLOT_ZAP_ALL)
+        KVM_X86_QUIRK_SLOT_ZAP_ALL |           \
+        KVM_X86_QUIRK_STUFF_FEATURE_MSRS)
 
 /*
  * KVM previously used a u32 field in kvm_run to indicate the hypercall was
index a8debbf2f70280595573a479d81f17bc1176b22e..88585c1de416fa6f81bf0d1e12dff3fec89f0ecc 100644 (file)
@@ -440,6 +440,7 @@ struct kvm_sync_regs {
 #define KVM_X86_QUIRK_FIX_HYPERCALL_INSN       (1 << 5)
 #define KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS    (1 << 6)
 #define KVM_X86_QUIRK_SLOT_ZAP_ALL             (1 << 7)
+#define KVM_X86_QUIRK_STUFF_FEATURE_MSRS       (1 << 8)
 
 #define KVM_STATE_NESTED_FORMAT_VMX    0
 #define KVM_STATE_NESTED_FORMAT_SVM    1
index 50f6b0e03d041bf57b2e83249be86bc3a47a31dd..237e72b8a999f69a464d65611961c695411cb889 100644 (file)
@@ -1390,7 +1390,9 @@ static void __svm_vcpu_reset(struct kvm_vcpu *vcpu)
        svm_vcpu_init_msrpm(vcpu, svm->msrpm);
 
        svm_init_osvw(vcpu);
-       vcpu->arch.microcode_version = 0x01000065;
+
+       if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_STUFF_FEATURE_MSRS))
+               vcpu->arch.microcode_version = 0x01000065;
        svm->tsc_ratio_msr = kvm_caps.default_tsc_scaling_ratio;
 
        svm->nmi_masked = false;
index 115ec4617a5fbfd0b0f7ee77382c0d97b8c35f2f..f0326927d4e1f75845951a37d1b8c8eb25b5d8a5 100644 (file)
@@ -4572,7 +4572,8 @@ vmx_adjust_secondary_exec_control(struct vcpu_vmx *vmx, u32 *exec_control,
         * Update the nested MSR settings so that a nested VMM can/can't set
         * controls for features that are/aren't exposed to the guest.
         */
-       if (nested) {
+       if (nested &&
+           kvm_check_has_quirk(vmx->vcpu.kvm, KVM_X86_QUIRK_STUFF_FEATURE_MSRS)) {
                /*
                 * All features that can be added or removed to VMX MSRs must
                 * be supported in the first place for nested virtualization.
@@ -4862,7 +4863,8 @@ static void __vmx_vcpu_reset(struct kvm_vcpu *vcpu)
 
        init_vmcs(vmx);
 
-       if (nested)
+       if (nested &&
+           kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_STUFF_FEATURE_MSRS))
                memcpy(&vmx->nested.msrs, &vmcs_config.nested, sizeof(vmx->nested.msrs));
 
        vcpu_setup_sgx_lepubkeyhash(vcpu);
@@ -4875,7 +4877,8 @@ static void __vmx_vcpu_reset(struct kvm_vcpu *vcpu)
        vmx->nested.hv_evmcs_vmptr = EVMPTR_INVALID;
 #endif
 
-       vcpu->arch.microcode_version = 0x100000000ULL;
+       if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_STUFF_FEATURE_MSRS))
+               vcpu->arch.microcode_version = 0x100000000ULL;
        vmx->msr_ia32_feature_control_valid_bits = FEAT_CTL_LOCKED;
 
        /*
index 16804637ba97b390c8b6a32890be364d663574de..34a7359d2bf385882577633217314f1bbbae0f55 100644 (file)
@@ -12314,9 +12314,11 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
 
        kvm_async_pf_hash_reset(vcpu);
 
-       vcpu->arch.arch_capabilities = kvm_get_arch_capabilities();
-       vcpu->arch.msr_platform_info = MSR_PLATFORM_INFO_CPUID_FAULT;
-       vcpu->arch.perf_capabilities = kvm_caps.supported_perf_cap;
+       if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_STUFF_FEATURE_MSRS)) {
+               vcpu->arch.arch_capabilities = kvm_get_arch_capabilities();
+               vcpu->arch.msr_platform_info = MSR_PLATFORM_INFO_CPUID_FAULT;
+               vcpu->arch.perf_capabilities = kvm_caps.supported_perf_cap;
+       }
        kvm_pmu_init(vcpu);
 
        vcpu->arch.pending_external_vector = -1;