From: Paolo Bonzini Date: Wed, 31 Mar 2021 11:35:52 +0000 (-0400) Subject: Merge branch 'kvm-fix-svm-races' into HEAD X-Git-Tag: v5.13-rc1~76^2~161 X-Git-Url: https://git.kernel.dk/?a=commitdiff_plain;h=cb9b6a1b199b9a4f409d4b8fd70434c80f5389c2;p=linux-block.git Merge branch 'kvm-fix-svm-races' into HEAD --- cb9b6a1b199b9a4f409d4b8fd70434c80f5389c2 diff --cc arch/x86/kvm/svm/nested.c index 8523f60adb92,c9e7b86350d6..9bed4847ad55 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@@ -247,60 -225,50 +247,59 @@@ static bool nested_vmcb_check_controls( return true; } -static bool nested_vmcb_check_save(struct vcpu_svm *svm, struct vmcb *vmcb12) +static bool nested_vmcb_check_cr3_cr4(struct kvm_vcpu *vcpu, + struct vmcb_save_area *save) { - bool vmcb12_lma; + /* + * These checks are also performed by KVM_SET_SREGS, + * except that EFER.LMA is not checked by SVM against + * CR0.PG && EFER.LME. + */ + if ((save->efer & EFER_LME) && (save->cr0 & X86_CR0_PG)) { + if (CC(!(save->cr4 & X86_CR4_PAE)) || + CC(!(save->cr0 & X86_CR0_PE)) || + CC(kvm_vcpu_is_illegal_gpa(vcpu, save->cr3))) + return false; + } + + if (CC(!kvm_is_valid_cr4(vcpu, save->cr4))) + return false; + return true; +} + +/* Common checks that apply to both L1 and L2 state. */ +static bool nested_vmcb_valid_sregs(struct kvm_vcpu *vcpu, + struct vmcb_save_area *save) +{ + /* + * FIXME: these should be done after copying the fields, + * to avoid TOC/TOU races. For these save area checks + * the possible damage is limited since kvm_set_cr0 and + * kvm_set_cr4 handle failure; EFER_SVME is an exception + * so it is force-set later in nested_prepare_vmcb_save. + */ - if ((vmcb12->save.efer & EFER_SVME) == 0) + if (CC(!(save->efer & EFER_SVME))) return false; - if (((vmcb12->save.cr0 & X86_CR0_CD) == 0) && (vmcb12->save.cr0 & X86_CR0_NW)) + if (CC((save->cr0 & X86_CR0_CD) == 0 && (save->cr0 & X86_CR0_NW)) || + CC(save->cr0 & ~0xffffffffULL)) return false; - if (!kvm_dr6_valid(vmcb12->save.dr6) || !kvm_dr7_valid(vmcb12->save.dr7)) + if (CC(!kvm_dr6_valid(save->dr6)) || CC(!kvm_dr7_valid(save->dr7))) return false; - vmcb12_lma = (vmcb12->save.efer & EFER_LME) && (vmcb12->save.cr0 & X86_CR0_PG); + if (!nested_vmcb_check_cr3_cr4(vcpu, save)) + return false; - if (!vmcb12_lma) { - if (vmcb12->save.cr4 & X86_CR4_PAE) { - if (vmcb12->save.cr3 & MSR_CR3_LEGACY_PAE_RESERVED_MASK) - return false; - } else { - if (vmcb12->save.cr3 & MSR_CR3_LEGACY_RESERVED_MASK) - return false; - } - } else { - if (!(vmcb12->save.cr4 & X86_CR4_PAE) || - !(vmcb12->save.cr0 & X86_CR0_PE) || - (vmcb12->save.cr3 & MSR_CR3_LONG_MBZ_MASK)) - return false; - } - if (kvm_valid_cr4(&svm->vcpu, vmcb12->save.cr4)) + if (CC(!kvm_valid_efer(vcpu, save->efer))) return false; return true; } - static bool nested_vmcb_checks(struct kvm_vcpu *vcpu, struct vmcb *vmcb12) - { - if (!nested_vmcb_valid_sregs(vcpu, &vmcb12->save)) - return false; - - return nested_vmcb_check_controls(&vmcb12->control); - } - -static void load_nested_vmcb_control(struct vcpu_svm *svm, - struct vmcb_control_area *control) +static void nested_load_control_from_vmcb12(struct vcpu_svm *svm, + struct vmcb_control_area *control) { copy_vmcb_control_area(&svm->nested.ctl, control); @@@ -411,50 -379,27 +410,57 @@@ static int nested_svm_load_cr3(struct k return 0; } -static void nested_prepare_vmcb_save(struct vcpu_svm *svm, struct vmcb *vmcb12) +void nested_vmcb02_compute_g_pat(struct vcpu_svm *svm) +{ + if (!svm->nested.vmcb02.ptr) + return; + + /* FIXME: merge g_pat from vmcb01 and vmcb12. */ + svm->nested.vmcb02.ptr->save.g_pat = svm->vmcb01.ptr->save.g_pat; +} + +static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12) { + bool new_vmcb12 = false; + + nested_vmcb02_compute_g_pat(svm); + /* Load the nested guest state */ - svm->vmcb->save.es = vmcb12->save.es; - svm->vmcb->save.cs = vmcb12->save.cs; - svm->vmcb->save.ss = vmcb12->save.ss; - svm->vmcb->save.ds = vmcb12->save.ds; - svm->vmcb->save.gdtr = vmcb12->save.gdtr; - svm->vmcb->save.idtr = vmcb12->save.idtr; - kvm_set_rflags(&svm->vcpu, vmcb12->save.rflags); + + if (svm->nested.vmcb12_gpa != svm->nested.last_vmcb12_gpa) { + new_vmcb12 = true; + svm->nested.last_vmcb12_gpa = svm->nested.vmcb12_gpa; + } + + if (unlikely(new_vmcb12 || vmcb_is_dirty(vmcb12, VMCB_SEG))) { + svm->vmcb->save.es = vmcb12->save.es; + svm->vmcb->save.cs = vmcb12->save.cs; + svm->vmcb->save.ss = vmcb12->save.ss; + svm->vmcb->save.ds = vmcb12->save.ds; + svm->vmcb->save.cpl = vmcb12->save.cpl; + vmcb_mark_dirty(svm->vmcb, VMCB_SEG); + } + + if (unlikely(new_vmcb12 || vmcb_is_dirty(vmcb12, VMCB_DT))) { + svm->vmcb->save.gdtr = vmcb12->save.gdtr; + svm->vmcb->save.idtr = vmcb12->save.idtr; + vmcb_mark_dirty(svm->vmcb, VMCB_DT); + } + + kvm_set_rflags(&svm->vcpu, vmcb12->save.rflags | X86_EFLAGS_FIXED); - svm_set_efer(&svm->vcpu, vmcb12->save.efer); + + /* + * Force-set EFER_SVME even though it is checked earlier on the + * VMCB12, because the guest can flip the bit between the check + * and now. Clearing EFER_SVME would call svm_free_nested. + */ + svm_set_efer(&svm->vcpu, vmcb12->save.efer | EFER_SVME); + svm_set_cr0(&svm->vcpu, vmcb12->save.cr0); svm_set_cr4(&svm->vcpu, vmcb12->save.cr4); - svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = vmcb12->save.cr2; + + svm->vcpu.arch.cr2 = vmcb12->save.cr2; + kvm_rax_write(&svm->vcpu, vmcb12->save.rax); kvm_rsp_write(&svm->vcpu, vmcb12->save.rsp); kvm_rip_write(&svm->vcpu, vmcb12->save.rip); @@@ -521,54 -440,22 +527,53 @@@ static void nested_vmcb02_prepare_contr enter_guest_mode(&svm->vcpu); /* - * Merge guest and host intercepts - must be called with vcpu in - * guest-mode to take affect here + * Merge guest and host intercepts - must be called with vcpu in + * guest-mode to take effect. */ recalc_intercepts(svm); +} - vmcb_mark_all_dirty(svm->vmcb); +static void nested_svm_copy_common_state(struct vmcb *from_vmcb, struct vmcb *to_vmcb) +{ + /* + * Some VMCB state is shared between L1 and L2 and thus has to be + * moved at the time of nested vmrun and vmexit. + * + * VMLOAD/VMSAVE state would also belong in this category, but KVM + * always performs VMLOAD and VMSAVE from the VMCB01. + */ + to_vmcb->save.spec_ctrl = from_vmcb->save.spec_ctrl; } -int enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb12_gpa, +int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb12_gpa, struct vmcb *vmcb12) { + struct vcpu_svm *svm = to_svm(vcpu); int ret; + trace_kvm_nested_vmrun(svm->vmcb->save.rip, vmcb12_gpa, + vmcb12->save.rip, + vmcb12->control.int_ctl, + vmcb12->control.event_inj, + vmcb12->control.nested_ctl); + + trace_kvm_nested_intercepts(vmcb12->control.intercepts[INTERCEPT_CR] & 0xffff, + vmcb12->control.intercepts[INTERCEPT_CR] >> 16, + vmcb12->control.intercepts[INTERCEPT_EXCEPTION], + vmcb12->control.intercepts[INTERCEPT_WORD3], + vmcb12->control.intercepts[INTERCEPT_WORD4], + vmcb12->control.intercepts[INTERCEPT_WORD5]); + + svm->nested.vmcb12_gpa = vmcb12_gpa; - nested_prepare_vmcb_save(svm, vmcb12); - nested_prepare_vmcb_control(svm); + + WARN_ON(svm->vmcb == svm->nested.vmcb02.ptr); + + nested_svm_copy_common_state(svm->vmcb01.ptr, svm->nested.vmcb02.ptr); - nested_load_control_from_vmcb12(svm, &vmcb12->control); + + svm_switch_vmcb(svm, &svm->nested.vmcb02); + nested_vmcb02_prepare_control(svm); + nested_vmcb02_prepare_save(svm, vmcb12); ret = nested_svm_load_cr3(&svm->vcpu, vmcb12->save.cr3, nested_npt_enabled(svm)); @@@ -614,7 -497,10 +619,10 @@@ int nested_svm_vmrun(struct kvm_vcpu *v if (WARN_ON_ONCE(!svm->nested.initialized)) return -EINVAL; - if (!nested_vmcb_checks(vcpu, vmcb12)) { - load_nested_vmcb_control(svm, &vmcb12->control); ++ nested_load_control_from_vmcb12(svm, &vmcb12->control); + - if (!nested_vmcb_check_save(svm, vmcb12) || ++ if (!nested_vmcb_valid_sregs(vcpu, &vmcb12->save) || + !nested_vmcb_check_controls(&svm->nested.ctl)) { vmcb12->control.exit_code = SVM_EXIT_ERR; vmcb12->control.exit_code_hi = 0; vmcb12->control.exit_info_1 = 0; @@@ -1251,7 -1189,7 +1259,7 @@@ static int svm_set_nested_state(struct /* * Processor state contains L2 state. Check that it is -- * valid for guest mode (see nested_vmcb_checks). ++ * valid for guest mode (see nested_vmcb_check_save). */ cr0 = kvm_read_cr0(vcpu); if (((cr0 & X86_CR0_CD) == 0) && (cr0 & X86_CR0_NW))