As a result of a recent investigation, it was determined that x86 CPUs
which support 5-level paging, don't always respect CR4.LA57 when doing
canonical checks.
In particular:
1. MSRs which contain a linear address, allow full 57-bitcanonical address
regardless of CR4.LA57 state. For example: MSR_KERNEL_GS_BASE.
2. All hidden segment bases and GDT/IDT bases also behave like MSRs.
This means that full 57-bit canonical address can be loaded to them
regardless of CR4.LA57, both using MSRS (e.g GS_BASE) and instructions
(e.g LGDT).
3. TLB invalidation instructions also allow the user to use full 57-bit
address regardless of the CR4.LA57.
Finally, it must be noted that the CPU doesn't prevent the user from
disabling 5-level paging, even when the full 57-bit canonical address is
present in one of the registers mentioned above (e.g GDT base).
In fact, this can happen without any userspace help, when the CPU enters
SMM mode - some MSRs, for example MSR_KERNEL_GS_BASE are left to contain
a non-canonical address in regard to the new mode.
Since most of the affected MSRs and all segment bases can be read and
written freely by the guest without any KVM intervention, this patch makes
the emulator closely follow hardware behavior, which means that the
emulator doesn't take in the account the guest CPUID support for 5-level
paging, and only takes in the account the host CPU support.
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Link: https://lore.kernel.org/r/20240906221824.491834-4-mlevitsk@redhat.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
/* It's actually a GPA for vcpu->arch.guest_mmu. */
if (mmu != &vcpu->arch.guest_mmu) {
/* INVLPG on a non-canonical address is a NOP according to the SDM. */
- if (is_noncanonical_address(addr, vcpu))
+ if (is_noncanonical_invlpg_address(addr, vcpu))
return;
kvm_x86_call(flush_tlb_gva)(vcpu, addr);
CC(!kvm_vcpu_is_legal_cr3(vcpu, vmcs12->host_cr3)))
return -EINVAL;
- if (CC(is_noncanonical_address(vmcs12->host_ia32_sysenter_esp, vcpu)) ||
- CC(is_noncanonical_address(vmcs12->host_ia32_sysenter_eip, vcpu)))
+ if (CC(is_noncanonical_msr_address(vmcs12->host_ia32_sysenter_esp, vcpu)) ||
+ CC(is_noncanonical_msr_address(vmcs12->host_ia32_sysenter_eip, vcpu)))
return -EINVAL;
if ((vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) &&
CC(vmcs12->host_ss_selector == 0 && !ia32e))
return -EINVAL;
- if (CC(is_noncanonical_address(vmcs12->host_fs_base, vcpu)) ||
- CC(is_noncanonical_address(vmcs12->host_gs_base, vcpu)) ||
- CC(is_noncanonical_address(vmcs12->host_gdtr_base, vcpu)) ||
- CC(is_noncanonical_address(vmcs12->host_idtr_base, vcpu)) ||
- CC(is_noncanonical_address(vmcs12->host_tr_base, vcpu)) ||
- CC(is_noncanonical_address(vmcs12->host_rip, vcpu)))
+ if (CC(is_noncanonical_base_address(vmcs12->host_fs_base, vcpu)) ||
+ CC(is_noncanonical_base_address(vmcs12->host_gs_base, vcpu)) ||
+ CC(is_noncanonical_base_address(vmcs12->host_gdtr_base, vcpu)) ||
+ CC(is_noncanonical_base_address(vmcs12->host_idtr_base, vcpu)) ||
+ CC(is_noncanonical_base_address(vmcs12->host_tr_base, vcpu)) ||
+ CC(is_noncanonical_address(vmcs12->host_rip, vcpu, 0)))
return -EINVAL;
/*
}
if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS) &&
- (CC(is_noncanonical_address(vmcs12->guest_bndcfgs & PAGE_MASK, vcpu)) ||
+ (CC(is_noncanonical_msr_address(vmcs12->guest_bndcfgs & PAGE_MASK, vcpu)) ||
CC((vmcs12->guest_bndcfgs & MSR_IA32_BNDCFGS_RSVD))))
return -EINVAL;
* non-canonical form. This is the only check on the memory
* destination for long mode!
*/
- exn = is_noncanonical_address(*ret, vcpu);
+ exn = is_noncanonical_address(*ret, vcpu, 0);
} else {
/*
* When not in long mode, the virtual/linear address is
* invalidation.
*/
if (!operand.vpid ||
- is_noncanonical_address(operand.gla, vcpu))
+ is_noncanonical_invlpg_address(operand.gla, vcpu))
return nested_vmx_fail(vcpu,
VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
vpid_sync_vcpu_addr(vpid02, operand.gla);
}
break;
case MSR_IA32_DS_AREA:
- if (is_noncanonical_address(data, vcpu))
+ if (is_noncanonical_msr_address(data, vcpu))
return 1;
pmu->ds_area = data;
fault = true;
} else if (likely(is_64_bit_mode(vcpu))) {
*gva = vmx_get_untagged_addr(vcpu, *gva, 0);
- fault = is_noncanonical_address(*gva, vcpu);
+ fault = is_noncanonical_address(*gva, vcpu, 0);
} else {
*gva &= 0xffffffff;
fault = (s.unusable) ||
(!msr_info->host_initiated &&
!guest_cpuid_has(vcpu, X86_FEATURE_MPX)))
return 1;
- if (is_noncanonical_address(data & PAGE_MASK, vcpu) ||
+ if (is_noncanonical_msr_address(data & PAGE_MASK, vcpu) ||
(data & MSR_IA32_BNDCFGS_RSVD))
return 1;
index = msr_info->index - MSR_IA32_RTIT_ADDR0_A;
if (index >= 2 * vmx->pt_desc.num_address_ranges)
return 1;
- if (is_noncanonical_address(data, vcpu))
+ if (is_noncanonical_msr_address(data, vcpu))
return 1;
if (index % 2)
vmx->pt_desc.guest.addr_b[index / 2] = data;
case MSR_KERNEL_GS_BASE:
case MSR_CSTAR:
case MSR_LSTAR:
- if (is_noncanonical_address(data, vcpu))
+ if (is_noncanonical_msr_address(data, vcpu))
return 1;
break;
case MSR_IA32_SYSENTER_EIP:
* value, and that something deterministic happens if the guest
* invokes 64-bit SYSENTER.
*/
- data = __canonical_address(data, vcpu_virt_addr_bits(vcpu));
+ data = __canonical_address(data, max_host_virt_addr_bits());
break;
case MSR_TSC_AUX:
if (!kvm_is_supported_user_return_msr(MSR_TSC_AUX))
static bool emulator_is_canonical_addr(struct x86_emulate_ctxt *ctxt,
gva_t addr, unsigned int flags)
{
- return !is_noncanonical_address(addr, emul_to_vcpu(ctxt));
+ return !is_noncanonical_address(addr, emul_to_vcpu(ctxt), flags);
}
static const struct x86_emulate_ops emulate_ops = {
* invalidation.
*/
if ((!pcid_enabled && (operand.pcid != 0)) ||
- is_noncanonical_address(operand.gla, vcpu)) {
+ is_noncanonical_invlpg_address(operand.gla, vcpu)) {
kvm_inject_gp(vcpu, 0);
return 1;
}
#include <asm/pvclock.h>
#include "kvm_cache_regs.h"
#include "kvm_emulate.h"
+#include "cpuid.h"
struct kvm_caps {
/* control of guest tsc rate supported? */
return kvm_is_cr4_bit_set(vcpu, X86_CR4_LA57) ? 57 : 48;
}
-static inline bool is_noncanonical_address(u64 la, struct kvm_vcpu *vcpu)
+static inline u8 max_host_virt_addr_bits(void)
{
- return !__is_canonical_address(la, vcpu_virt_addr_bits(vcpu));
+ return kvm_cpu_cap_has(X86_FEATURE_LA57) ? 57 : 48;
+}
+
+/*
+ * x86 MSRs which contain linear addresses, x86 hidden segment bases, and
+ * IDT/GDT bases have static canonicality checks, the size of which depends
+ * only on the CPU's support for 5-level paging, rather than on the state of
+ * CR4.LA57. This applies to both WRMSR and to other instructions that set
+ * their values, e.g. SGDT.
+ *
+ * KVM passes through most of these MSRS and also doesn't intercept the
+ * instructions that set the hidden segment bases.
+ *
+ * Because of this, to be consistent with hardware, even if the guest doesn't
+ * have LA57 enabled in its CPUID, perform canonicality checks based on *host*
+ * support for 5 level paging.
+ *
+ * Finally, instructions which are related to MMU invalidation of a given
+ * linear address, also have a similar static canonical check on address.
+ * This allows for example to invalidate 5-level addresses of a guest from a
+ * host which uses 4-level paging.
+ */
+static inline bool is_noncanonical_address(u64 la, struct kvm_vcpu *vcpu,
+ unsigned int flags)
+{
+ if (flags & (X86EMUL_F_INVLPG | X86EMUL_F_MSR | X86EMUL_F_DT_LOAD))
+ return !__is_canonical_address(la, max_host_virt_addr_bits());
+ else
+ return !__is_canonical_address(la, vcpu_virt_addr_bits(vcpu));
+}
+
+static inline bool is_noncanonical_msr_address(u64 la, struct kvm_vcpu *vcpu)
+{
+ return is_noncanonical_address(la, vcpu, X86EMUL_F_MSR);
+}
+
+static inline bool is_noncanonical_base_address(u64 la, struct kvm_vcpu *vcpu)
+{
+ return is_noncanonical_address(la, vcpu, X86EMUL_F_DT_LOAD);
+}
+
+static inline bool is_noncanonical_invlpg_address(u64 la, struct kvm_vcpu *vcpu)
+{
+ return is_noncanonical_address(la, vcpu, X86EMUL_F_INVLPG);
}
static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu,