#endif
{ .index = MSR_IA32_SPEC_CTRL, .always = false },
{ .index = MSR_IA32_PRED_CMD, .always = false },
+ { .index = MSR_IA32_FLUSH_CMD, .always = false },
{ .index = MSR_IA32_LASTBRANCHFROMIP, .always = false },
{ .index = MSR_IA32_LASTBRANCHTOIP, .always = false },
{ .index = MSR_IA32_LASTINTFROMIP, .always = false },
return 0;
}
-static int svm_set_msr_ia32_cmd(struct kvm_vcpu *vcpu, struct msr_data *msr,
- bool guest_has_feat, u64 cmd,
- int x86_feature_bit)
-{
- struct vcpu_svm *svm = to_svm(vcpu);
-
- if (!msr->host_initiated && !guest_has_feat)
- return 1;
-
- if (!(msr->data & ~cmd))
- return 1;
- if (!boot_cpu_has(x86_feature_bit))
- return 1;
- if (!msr->data)
- return 0;
-
- wrmsrl(msr->index, cmd);
- set_msr_interception(vcpu, svm->msrpm, msr->index, 0, 1);
-
- return 0;
-}
-
static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
{
struct vcpu_svm *svm = to_svm(vcpu);
- int r;
+ int ret = 0;
u32 ecx = msr->index;
u64 data = msr->data;
*/
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SPEC_CTRL, 1, 1);
break;
- case MSR_IA32_PRED_CMD:
- r = svm_set_msr_ia32_cmd(vcpu, msr,
- guest_has_pred_cmd_msr(vcpu),
- PRED_CMD_IBPB, X86_FEATURE_IBPB);
- break;
- case MSR_IA32_FLUSH_CMD:
- r = svm_set_msr_ia32_cmd(vcpu, msr,
- guest_cpuid_has(vcpu, X86_FEATURE_FLUSH_L1D),
- L1D_FLUSH, X86_FEATURE_FLUSH_L1D);
- break;
case MSR_AMD64_VIRT_SPEC_CTRL:
if (!msr->host_initiated &&
!guest_cpuid_has(vcpu, X86_FEATURE_VIRT_SSBD))
* guest via direct_access_msrs, and switch it via user return.
*/
preempt_disable();
- r = kvm_set_user_return_msr(tsc_aux_uret_slot, data, -1ull);
+ ret = kvm_set_user_return_msr(tsc_aux_uret_slot, data, -1ull);
preempt_enable();
- if (r)
- return 1;
+ if (ret)
+ break;
svm->tsc_aux = data;
break;
default:
return kvm_set_msr_common(vcpu, msr);
}
- return 0;
+ return ret;
}
static int msr_interception(struct kvm_vcpu *vcpu)
svm_recalc_instruction_intercepts(vcpu, svm);
+ if (boot_cpu_has(X86_FEATURE_IBPB))
+ set_msr_interception(vcpu, svm->msrpm, MSR_IA32_PRED_CMD, 0,
+ !!guest_has_pred_cmd_msr(vcpu));
+
+ if (boot_cpu_has(X86_FEATURE_FLUSH_L1D))
+ set_msr_interception(vcpu, svm->msrpm, MSR_IA32_FLUSH_CMD, 0,
+ !!guest_cpuid_has(vcpu, X86_FEATURE_FLUSH_L1D));
+
/* For sev guests, the memory encryption bit is not reserved in CR3. */
if (sev_guest(vcpu->kvm)) {
best = kvm_find_cpuid_entry(vcpu, 0x8000001F);
void *insn, int insn_len)
{
bool smep, smap, is_user;
- unsigned long cr4;
u64 error_code;
/* Emulation is always possible when KVM has access to all guest state. */
if (error_code & (PFERR_GUEST_PAGE_MASK | PFERR_FETCH_MASK))
goto resume_guest;
- cr4 = kvm_read_cr4(vcpu);
- smep = cr4 & X86_CR4_SMEP;
- smap = cr4 & X86_CR4_SMAP;
+ smep = kvm_is_cr4_bit_set(vcpu, X86_CR4_SMEP);
+ smap = kvm_is_cr4_bit_set(vcpu, X86_CR4_SMAP);
is_user = svm_get_cpl(vcpu) == 3;
if (smap && (!smep || is_user)) {
pr_err_ratelimited("SEV Guest triggered AMD Erratum 1096\n");
static u32 vmx_possible_passthrough_msrs[MAX_POSSIBLE_PASSTHROUGH_MSRS] = {
MSR_IA32_SPEC_CTRL,
MSR_IA32_PRED_CMD,
+ MSR_IA32_FLUSH_CMD,
MSR_IA32_TSC,
#ifdef CONFIG_X86_64
MSR_FS_BASE,
return debugctl;
}
-static int vmx_set_msr_ia32_cmd(struct kvm_vcpu *vcpu,
- struct msr_data *msr_info,
- bool guest_has_feat, u64 cmd,
- int x86_feature_bit)
-{
- if (!msr_info->host_initiated && !guest_has_feat)
- return 1;
-
- if (!(msr_info->data & ~cmd))
- return 1;
- if (!boot_cpu_has(x86_feature_bit))
- return 1;
- if (!msr_info->data)
- return 0;
-
- wrmsrl(msr_info->index, cmd);
-
- /*
- * For non-nested:
- * When it's written (to non-zero) for the first time, pass
- * it through.
- *
- * For nested:
- * The handling of the MSR bitmap for L2 guests is done in
- * nested_vmx_prepare_msr_bitmap. We should not touch the
- * vmcs02.msr_bitmap here since it gets completely overwritten
- * in the merging.
- */
- vmx_disable_intercept_for_msr(vcpu, msr_info->index, MSR_TYPE_W);
-
- return 0;
-}
-
/*
* Writes msr value into the appropriate "register".
* Returns 0 on success, non-0 otherwise.
if (data & ~(TSX_CTRL_RTM_DISABLE | TSX_CTRL_CPUID_CLEAR))
return 1;
goto find_uret_msr;
- case MSR_IA32_PRED_CMD:
- ret = vmx_set_msr_ia32_cmd(vcpu, msr_info,
- guest_has_pred_cmd_msr(vcpu),
- PRED_CMD_IBPB,
- X86_FEATURE_IBPB);
- break;
- case MSR_IA32_FLUSH_CMD:
- ret = vmx_set_msr_ia32_cmd(vcpu, msr_info,
- guest_cpuid_has(vcpu, X86_FEATURE_FLUSH_L1D),
- L1D_FLUSH,
- X86_FEATURE_FLUSH_L1D);
- break;
case MSR_IA32_CR_PAT:
if (!kvm_pat_valid(data))
return 1;
/* 22.2.1, 20.8.1 */
vm_entry_controls_set(vmx, vmx_vmentry_ctrl());
- vmx->vcpu.arch.cr0_guest_owned_bits = KVM_POSSIBLE_CR0_GUEST_BITS;
+ vmx->vcpu.arch.cr0_guest_owned_bits = vmx_l1_guest_owned_cr0_bits();
vmcs_writel(CR0_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr0_guest_owned_bits);
set_cr4_guest_host_mask(vmx);
if (!boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT))
return true;
- return vmx_get_cpl(vcpu) == 3 && kvm_read_cr0_bits(vcpu, X86_CR0_AM) &&
+ return vmx_get_cpl(vcpu) == 3 && kvm_is_cr0_bit_set(vcpu, X86_CR0_AM) &&
(kvm_get_rflags(vcpu) & X86_EFLAGS_AC);
}
break;
case 3: /* lmsw */
val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f;
- trace_kvm_cr_write(0, (kvm_read_cr0(vcpu) & ~0xful) | val);
+ trace_kvm_cr_write(0, (kvm_read_cr0_bits(vcpu, ~0xful) | val));
kvm_lmsw(vcpu, val);
return kvm_skip_emulated_instruction(vcpu);
if (!kvm_arch_has_noncoherent_dma(vcpu->kvm))
return (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT) | VMX_EPT_IPAT_BIT;
- if (kvm_read_cr0(vcpu) & X86_CR0_CD) {
+ if (kvm_read_cr0_bits(vcpu, X86_CR0_CD)) {
if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
cache = MTRR_TYPE_WRBACK;
else
vmx_set_intercept_for_msr(vcpu, MSR_IA32_XFD_ERR, MSR_TYPE_R,
!guest_cpuid_has(vcpu, X86_FEATURE_XFD));
+ if (boot_cpu_has(X86_FEATURE_IBPB))
+ vmx_set_intercept_for_msr(vcpu, MSR_IA32_PRED_CMD, MSR_TYPE_W,
+ !guest_has_pred_cmd_msr(vcpu));
+
+ if (boot_cpu_has(X86_FEATURE_FLUSH_L1D))
+ vmx_set_intercept_for_msr(vcpu, MSR_IA32_FLUSH_CMD, MSR_TYPE_W,
+ !guest_cpuid_has(vcpu, X86_FEATURE_FLUSH_L1D));
set_cr4_guest_host_mask(vmx);
struct lbr_desc lbr_desc;
/* Save desired MSR intercept (read: pass-through) state */
-#define MAX_POSSIBLE_PASSTHROUGH_MSRS 15
+#define MAX_POSSIBLE_PASSTHROUGH_MSRS 16
struct {
DECLARE_BITMAP(read, MAX_POSSIBLE_PASSTHROUGH_MSRS);
DECLARE_BITMAP(write, MAX_POSSIBLE_PASSTHROUGH_MSRS);
(1 << VCPU_EXREG_EXIT_INFO_1) | \
(1 << VCPU_EXREG_EXIT_INFO_2))
+ static inline unsigned long vmx_l1_guest_owned_cr0_bits(void)
+ {
+ unsigned long bits = KVM_POSSIBLE_CR0_GUEST_BITS;
+
+ /*
+ * CR0.WP needs to be intercepted when KVM is shadowing legacy paging
+ * in order to construct shadow PTEs with the correct protections.
+ * Note! CR0.WP technically can be passed through to the guest if
+ * paging is disabled, but checking CR0.PG would generate a cyclical
+ * dependency of sorts due to forcing the caller to ensure CR0 holds
+ * the correct value prior to determining which CR0 bits can be owned
+ * by L1. Keep it simple and limit the optimization to EPT.
+ */
+ if (!enable_ept)
+ bits &= ~X86_CR0_WP;
+ return bits;
+ }
+
static __always_inline struct kvm_vmx *to_kvm_vmx(struct kvm *kvm)
{
return container_of(kvm, struct kvm_vmx, kvm);
module_param(eager_page_split, bool, 0644);
/* Enable/disable SMT_RSB bug mitigation */
- bool __read_mostly mitigate_smt_rsb;
+ static bool __read_mostly mitigate_smt_rsb;
module_param(mitigate_smt_rsb, bool, 0444);
/*
bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr)
{
- if ((dr != 4 && dr != 5) || !kvm_read_cr4_bits(vcpu, X86_CR4_DE))
+ if ((dr != 4 && dr != 5) || !kvm_is_cr4_bit_set(vcpu, X86_CR4_DE))
return true;
kvm_queue_exception(vcpu, UD_VECTOR);
void kvm_post_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0, unsigned long cr0)
{
+ /*
+ * CR0.WP is incorporated into the MMU role, but only for non-nested,
+ * indirect shadow MMUs. If paging is disabled, no updates are needed
+ * as there are no permission bits to emulate. If TDP is enabled, the
+ * MMU's metadata needs to be updated, e.g. so that emulating guest
+ * translations does the right thing, but there's no need to unload the
+ * root as CR0.WP doesn't affect SPTEs.
+ */
+ if ((cr0 ^ old_cr0) == X86_CR0_WP) {
+ if (!(cr0 & X86_CR0_PG))
+ return;
+
+ if (tdp_enabled) {
+ kvm_init_mmu(vcpu);
+ return;
+ }
+ }
+
if ((cr0 ^ old_cr0) & X86_CR0_PG) {
kvm_clear_async_pf_completion_queue(vcpu);
kvm_async_pf_hash_reset(vcpu);
return 1;
if (!(cr0 & X86_CR0_PG) &&
- (is_64_bit_mode(vcpu) || kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE)))
+ (is_64_bit_mode(vcpu) || kvm_is_cr4_bit_set(vcpu, X86_CR4_PCIDE)))
return 1;
static_call(kvm_x86_set_cr0)(vcpu, cr0);
if (vcpu->arch.guest_state_protected)
return;
- if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) {
+ if (kvm_is_cr4_bit_set(vcpu, X86_CR4_OSXSAVE)) {
if (vcpu->arch.xcr0 != host_xcr0)
xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
if (static_cpu_has(X86_FEATURE_PKU) &&
vcpu->arch.pkru != vcpu->arch.host_pkru &&
((vcpu->arch.xcr0 & XFEATURE_MASK_PKRU) ||
- kvm_read_cr4_bits(vcpu, X86_CR4_PKE)))
+ kvm_is_cr4_bit_set(vcpu, X86_CR4_PKE)))
write_pkru(vcpu->arch.pkru);
#endif /* CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS */
}
#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
if (static_cpu_has(X86_FEATURE_PKU) &&
((vcpu->arch.xcr0 & XFEATURE_MASK_PKRU) ||
- kvm_read_cr4_bits(vcpu, X86_CR4_PKE))) {
+ kvm_is_cr4_bit_set(vcpu, X86_CR4_PKE))) {
vcpu->arch.pkru = rdpkru();
if (vcpu->arch.pkru != vcpu->arch.host_pkru)
write_pkru(vcpu->arch.host_pkru);
}
#endif /* CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS */
- if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) {
+ if (kvm_is_cr4_bit_set(vcpu, X86_CR4_OSXSAVE)) {
if (vcpu->arch.xcr0 != host_xcr0)
xsetbv(XCR_XFEATURE_ENABLED_MASK, host_xcr0);
return 1;
if ((cr4 & X86_CR4_PCIDE) && !(old_cr4 & X86_CR4_PCIDE)) {
- if (!guest_cpuid_has(vcpu, X86_FEATURE_PCID))
- return 1;
-
/* PCID can not be enabled when cr3[11:0]!=000H or EFER.LMA=0 */
if ((kvm_read_cr3(vcpu) & X86_CR3_PCID_MASK) || !is_long_mode(vcpu))
return 1;
* PCIDs for them are also 0, because MOV to CR3 always flushes the TLB
* with PCIDE=0.
*/
- if (!kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE))
+ if (!kvm_is_cr4_bit_set(vcpu, X86_CR4_PCIDE))
return;
for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
bool skip_tlb_flush = false;
unsigned long pcid = 0;
#ifdef CONFIG_X86_64
- bool pcid_enabled = kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE);
-
- if (pcid_enabled) {
+ if (kvm_is_cr4_bit_set(vcpu, X86_CR4_PCIDE)) {
skip_tlb_flush = cr3 & X86_CR3_PCID_NOFLUSH;
cr3 &= ~X86_CR3_PCID_NOFLUSH;
pcid = cr3 & X86_CR3_PCID_MASK;
vcpu->arch.perf_capabilities = data;
kvm_pmu_refresh(vcpu);
return 0;
+ case MSR_IA32_PRED_CMD:
+ if (!msr_info->host_initiated && !guest_has_pred_cmd_msr(vcpu))
+ return 1;
+
+ if (!boot_cpu_has(X86_FEATURE_IBPB) || (data & ~PRED_CMD_IBPB))
+ return 1;
+ if (!data)
+ break;
+
+ wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
+ break;
+ case MSR_IA32_FLUSH_CMD:
+ if (!msr_info->host_initiated &&
+ !guest_cpuid_has(vcpu, X86_FEATURE_FLUSH_L1D))
+ return 1;
+
+ if (!boot_cpu_has(X86_FEATURE_FLUSH_L1D) || (data & ~L1D_FLUSH))
+ return 1;
+ if (!data)
+ break;
+
+ wrmsrl(MSR_IA32_FLUSH_CMD, L1D_FLUSH);
+ break;
case MSR_EFER:
return set_efer(vcpu, msr_info);
case MSR_K7_HWCR:
return 0;
if (mce->status & MCI_STATUS_UC) {
if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) ||
- !kvm_read_cr4_bits(vcpu, X86_CR4_MCE)) {
+ !kvm_is_cr4_bit_set(vcpu, X86_CR4_MCE)) {
kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
return 0;
}
vcpu->run->hypercall.args[0] = gpa;
vcpu->run->hypercall.args[1] = npages;
vcpu->run->hypercall.args[2] = attrs;
- vcpu->run->hypercall.longmode = op_64_bit;
+ vcpu->run->hypercall.flags = 0;
+ if (op_64_bit)
+ vcpu->run->hypercall.flags |= KVM_EXIT_HYPERCALL_LONG_MODE;
+
+ WARN_ON_ONCE(vcpu->run->hypercall.flags & KVM_EXIT_HYPERCALL_MBZ);
vcpu->arch.complete_userspace_io = complete_hypercall_exit;
return 0;
}
return 1;
}
- pcid_enabled = kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE);
+ pcid_enabled = kvm_is_cr4_bit_set(vcpu, X86_CR4_PCIDE);
switch (type) {
case INVPCID_TYPE_INDIV_ADDR: