KVM: x86: hyper-v: Introduce TLB flush fifo
[linux-2.6-block.git] / arch / x86 / kvm / svm / svm.c
index 58f0077d935799eaa5922bf9da4dde5e4a02d2b2..91352d69284524c36ab0012a1c6ad2827ca45ffe 100644 (file)
@@ -6,6 +6,7 @@
 #include "mmu.h"
 #include "kvm_cache_regs.h"
 #include "x86.h"
+#include "smm.h"
 #include "cpuid.h"
 #include "pmu.h"
 
@@ -245,7 +246,7 @@ struct kvm_ldttss_desc {
        u32 zero1;
 } __attribute__((packed));
 
-DEFINE_PER_CPU(struct svm_cpu_data *, svm_data);
+DEFINE_PER_CPU(struct svm_cpu_data, svm_data);
 
 /*
  * Only MSR_TSC_AUX is switched via the user return hook.  EFER is switched via
@@ -346,12 +347,6 @@ int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
        return 0;
 }
 
-static int is_external_interrupt(u32 info)
-{
-       info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID;
-       return info == (SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR);
-}
-
 static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
@@ -581,12 +576,7 @@ static int svm_hardware_enable(void)
                pr_err("%s: err EOPNOTSUPP on %d\n", __func__, me);
                return -EINVAL;
        }
-       sd = per_cpu(svm_data, me);
-       if (!sd) {
-               pr_err("%s: svm_data is NULL on %d\n", __func__, me);
-               return -EINVAL;
-       }
-
+       sd = per_cpu_ptr(&svm_data, me);
        sd->asid_generation = 1;
        sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1;
        sd->next_asid = sd->max_asid + 1;
@@ -597,7 +587,7 @@ static int svm_hardware_enable(void)
 
        wrmsrl(MSR_EFER, efer | EFER_SVME);
 
-       wrmsrl(MSR_VM_HSAVE_PA, __sme_page_pa(sd->save_area));
+       wrmsrl(MSR_VM_HSAVE_PA, sd->save_area_pa);
 
        if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) {
                /*
@@ -646,42 +636,37 @@ static int svm_hardware_enable(void)
 
 static void svm_cpu_uninit(int cpu)
 {
-       struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
+       struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, cpu);
 
-       if (!sd)
+       if (!sd->save_area)
                return;
 
-       per_cpu(svm_data, cpu) = NULL;
        kfree(sd->sev_vmcbs);
        __free_page(sd->save_area);
-       kfree(sd);
+       sd->save_area_pa = 0;
+       sd->save_area = NULL;
 }
 
 static int svm_cpu_init(int cpu)
 {
-       struct svm_cpu_data *sd;
+       struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, cpu);
        int ret = -ENOMEM;
 
-       sd = kzalloc(sizeof(struct svm_cpu_data), GFP_KERNEL);
-       if (!sd)
-               return ret;
-       sd->cpu = cpu;
+       memset(sd, 0, sizeof(struct svm_cpu_data));
        sd->save_area = alloc_page(GFP_KERNEL | __GFP_ZERO);
        if (!sd->save_area)
-               goto free_cpu_data;
+               return ret;
 
        ret = sev_cpu_init(sd);
        if (ret)
                goto free_save_area;
 
-       per_cpu(svm_data, cpu) = sd;
-
+       sd->save_area_pa = __sme_page_pa(sd->save_area);
        return 0;
 
 free_save_area:
        __free_page(sd->save_area);
-free_cpu_data:
-       kfree(sd);
+       sd->save_area = NULL;
        return ret;
 
 }
@@ -730,6 +715,15 @@ static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr)
        u32 offset;
        u32 *msrpm;
 
+       /*
+        * For non-nested case:
+        * If the L01 MSR bitmap does not intercept the MSR, then we need to
+        * save it.
+        *
+        * For nested case:
+        * If the L02 MSR bitmap does not intercept the MSR, then we need to
+        * save it.
+        */
        msrpm = is_guest_mode(vcpu) ? to_svm(vcpu)->nested.msrpm:
                                      to_svm(vcpu)->msrpm;
 
@@ -1425,7 +1419,7 @@ static void svm_clear_current_vmcb(struct vmcb *vmcb)
        int i;
 
        for_each_online_cpu(i)
-               cmpxchg(&per_cpu(svm_data, i)->current_vmcb, vmcb, NULL);
+               cmpxchg(per_cpu_ptr(&svm_data.current_vmcb, i), vmcb, NULL);
 }
 
 static void svm_vcpu_free(struct kvm_vcpu *vcpu)
@@ -1439,6 +1433,7 @@ static void svm_vcpu_free(struct kvm_vcpu *vcpu)
         */
        svm_clear_current_vmcb(svm->vmcb);
 
+       svm_leave_nested(vcpu);
        svm_free_nested(svm);
 
        sev_free_vcpu(vcpu);
@@ -1450,7 +1445,7 @@ static void svm_vcpu_free(struct kvm_vcpu *vcpu)
 static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
-       struct svm_cpu_data *sd = per_cpu(svm_data, vcpu->cpu);
+       struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, vcpu->cpu);
 
        if (sev_es_guest(vcpu->kvm))
                sev_es_unmap_ghcb(svm);
@@ -1462,7 +1457,7 @@ static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
         * Save additional host state that will be restored on VMEXIT (sev-es)
         * or subsequent vmload of host save area.
         */
-       vmsave(__sme_page_pa(sd->save_area));
+       vmsave(sd->save_area_pa);
        if (sev_es_guest(vcpu->kvm)) {
                struct sev_es_save_area *hostsa;
                hostsa = (struct sev_es_save_area *)(page_address(sd->save_area) + 0x400);
@@ -1487,7 +1482,7 @@ static void svm_prepare_host_switch(struct kvm_vcpu *vcpu)
 static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
-       struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
+       struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, cpu);
 
        if (sd->current_vmcb != svm->vmcb) {
                sd->current_vmcb = svm->vmcb;
@@ -2714,8 +2709,6 @@ static int svm_get_msr_feature(struct kvm_msr_entry *msr)
                if (boot_cpu_has(X86_FEATURE_LFENCE_RDTSC))
                        msr->data |= MSR_F10H_DECFG_LFENCE_SERIALIZE;
                break;
-       case MSR_IA32_PERF_CAPABILITIES:
-               return 0;
        default:
                return KVM_MSR_RET_INVALID;
        }
@@ -3426,15 +3419,6 @@ static int svm_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
                return 0;
        }
 
-       if (is_external_interrupt(svm->vmcb->control.exit_int_info) &&
-           exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR &&
-           exit_code != SVM_EXIT_NPF && exit_code != SVM_EXIT_TASK_SWITCH &&
-           exit_code != SVM_EXIT_INTR && exit_code != SVM_EXIT_NMI)
-               printk(KERN_ERR "%s: unexpected exit_int_info 0x%x "
-                      "exit_code 0x%x\n",
-                      __func__, svm->vmcb->control.exit_int_info,
-                      exit_code);
-
        if (exit_fastpath != EXIT_FASTPATH_NONE)
                return 1;
 
@@ -3443,7 +3427,7 @@ static int svm_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
 
 static void reload_tss(struct kvm_vcpu *vcpu)
 {
-       struct svm_cpu_data *sd = per_cpu(svm_data, vcpu->cpu);
+       struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, vcpu->cpu);
 
        sd->tss_desc->type = 9; /* available 32/64-bit TSS */
        load_TR_desc();
@@ -3451,7 +3435,7 @@ static void reload_tss(struct kvm_vcpu *vcpu)
 
 static void pre_svm_run(struct kvm_vcpu *vcpu)
 {
-       struct svm_cpu_data *sd = per_cpu(svm_data, vcpu->cpu);
+       struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, vcpu->cpu);
        struct vcpu_svm *svm = to_svm(vcpu);
 
        /*
@@ -3738,6 +3722,13 @@ static void svm_flush_tlb_current(struct kvm_vcpu *vcpu)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
 
+       /*
+        * Unlike VMX, SVM doesn't provide a way to flush only NPT TLB entries.
+        * A TLB flush for the current ASID flushes both "host" and "guest" TLB
+        * entries, and thus is a superset of Hyper-V's fine grained flushing.
+        */
+       kvm_hv_vcpu_purge_flush_tlb(vcpu);
+
        /*
         * Flush only the current ASID even if the TLB flush was invoked via
         * kvm_flush_remote_tlbs().  Although flushing remote TLBs requires all
@@ -3911,30 +3902,16 @@ static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
        return EXIT_FASTPATH_NONE;
 }
 
-static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu)
+static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, bool spec_ctrl_intercepted)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
-       unsigned long vmcb_pa = svm->current_vmcb->pa;
 
        guest_state_enter_irqoff();
 
-       if (sev_es_guest(vcpu->kvm)) {
-               __svm_sev_es_vcpu_run(vmcb_pa);
-       } else {
-               struct svm_cpu_data *sd = per_cpu(svm_data, vcpu->cpu);
-
-               /*
-                * Use a single vmcb (vmcb01 because it's always valid) for
-                * context switching guest state via VMLOAD/VMSAVE, that way
-                * the state doesn't need to be copied between vmcb01 and
-                * vmcb02 when switching vmcbs for nested virtualization.
-                */
-               vmload(svm->vmcb01.pa);
-               __svm_vcpu_run(vmcb_pa, (unsigned long *)&vcpu->arch.regs);
-               vmsave(svm->vmcb01.pa);
-
-               vmload(__sme_page_pa(sd->save_area));
-       }
+       if (sev_es_guest(vcpu->kvm))
+               __svm_sev_es_vcpu_run(svm, spec_ctrl_intercepted);
+       else
+               __svm_vcpu_run(svm, spec_ctrl_intercepted);
 
        guest_state_exit_irqoff();
 }
@@ -3942,6 +3919,7 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu)
 static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
+       bool spec_ctrl_intercepted = msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL);
 
        trace_kvm_entry(vcpu);
 
@@ -3998,34 +3976,15 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
         * being speculatively taken.
         */
        if (!static_cpu_has(X86_FEATURE_V_SPEC_CTRL))
-               x86_spec_ctrl_set_guest(svm->spec_ctrl, svm->virt_spec_ctrl);
+               x86_spec_ctrl_set_guest(svm->virt_spec_ctrl);
 
-       svm_vcpu_enter_exit(vcpu);
-
-       /*
-        * We do not use IBRS in the kernel. If this vCPU has used the
-        * SPEC_CTRL MSR it may have left it on; save the value and
-        * turn it off. This is much more efficient than blindly adding
-        * it to the atomic save/restore list. Especially as the former
-        * (Saving guest MSRs on vmexit) doesn't even exist in KVM.
-        *
-        * For non-nested case:
-        * If the L01 MSR bitmap does not intercept the MSR, then we need to
-        * save it.
-        *
-        * For nested case:
-        * If the L02 MSR bitmap does not intercept the MSR, then we need to
-        * save it.
-        */
-       if (!static_cpu_has(X86_FEATURE_V_SPEC_CTRL) &&
-           unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
-               svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
+       svm_vcpu_enter_exit(vcpu, spec_ctrl_intercepted);
 
        if (!sev_es_guest(vcpu->kvm))
                reload_tss(vcpu);
 
        if (!static_cpu_has(X86_FEATURE_V_SPEC_CTRL))
-               x86_spec_ctrl_restore_host(svm->spec_ctrl, svm->virt_spec_ctrl);
+               x86_spec_ctrl_restore_host(svm->virt_spec_ctrl);
 
        if (!sev_es_guest(vcpu->kvm)) {
                vcpu->arch.cr2 = svm->vmcb->save.cr2;
@@ -4149,6 +4108,8 @@ static bool svm_has_emulated_msr(struct kvm *kvm, u32 index)
        case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
                return false;
        case MSR_IA32_SMBASE:
+               if (!IS_ENABLED(CONFIG_KVM_SMM))
+                       return false;
                /* SEV-ES guests do not support SMM, so report false */
                if (kvm && sev_es_guest(kvm))
                        return false;
@@ -4405,6 +4366,7 @@ static void svm_setup_mce(struct kvm_vcpu *vcpu)
        vcpu->arch.mcg_cap &= 0x1ff;
 }
 
+#ifdef CONFIG_KVM_SMM
 bool svm_smi_blocked(struct kvm_vcpu *vcpu)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
@@ -4432,7 +4394,7 @@ static int svm_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
        return 1;
 }
 
-static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
+static int svm_enter_smm(struct kvm_vcpu *vcpu, union kvm_smram *smram)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
        struct kvm_host_map map_save;
@@ -4441,10 +4403,16 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
        if (!is_guest_mode(vcpu))
                return 0;
 
-       /* FED8h - SVM Guest */
-       put_smstate(u64, smstate, 0x7ed8, 1);
-       /* FEE0h - SVM Guest VMCB Physical Address */
-       put_smstate(u64, smstate, 0x7ee0, svm->nested.vmcb12_gpa);
+       /*
+        * 32-bit SMRAM format doesn't preserve EFER and SVM state.  Userspace is
+        * responsible for ensuring nested SVM and SMIs are mutually exclusive.
+        */
+
+       if (!guest_cpuid_has(vcpu, X86_FEATURE_LM))
+               return 1;
+
+       smram->smram64.svm_guest_flag = 1;
+       smram->smram64.svm_guest_vmcb_gpa = svm->nested.vmcb12_gpa;
 
        svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
        svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
@@ -4466,8 +4434,7 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
         * that, see svm_prepare_switch_to_guest()) which must be
         * preserved.
         */
-       if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr),
-                        &map_save) == -EINVAL)
+       if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr), &map_save))
                return 1;
 
        BUILD_BUG_ON(offsetof(struct vmcb, save) != 0x400);
@@ -4479,34 +4446,33 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
        return 0;
 }
 
-static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
+static int svm_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
        struct kvm_host_map map, map_save;
-       u64 saved_efer, vmcb12_gpa;
        struct vmcb *vmcb12;
        int ret;
 
+       const struct kvm_smram_state_64 *smram64 = &smram->smram64;
+
        if (!guest_cpuid_has(vcpu, X86_FEATURE_LM))
                return 0;
 
        /* Non-zero if SMI arrived while vCPU was in guest mode. */
-       if (!GET_SMSTATE(u64, smstate, 0x7ed8))
+       if (!smram64->svm_guest_flag)
                return 0;
 
        if (!guest_cpuid_has(vcpu, X86_FEATURE_SVM))
                return 1;
 
-       saved_efer = GET_SMSTATE(u64, smstate, 0x7ed0);
-       if (!(saved_efer & EFER_SVME))
+       if (!(smram64->efer & EFER_SVME))
                return 1;
 
-       vmcb12_gpa = GET_SMSTATE(u64, smstate, 0x7ee0);
-       if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcb12_gpa), &map) == -EINVAL)
+       if (kvm_vcpu_map(vcpu, gpa_to_gfn(smram64->svm_guest_vmcb_gpa), &map))
                return 1;
 
        ret = 1;
-       if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr), &map_save) == -EINVAL)
+       if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr), &map_save))
                goto unmap_map;
 
        if (svm_allocate_nested(svm))
@@ -4528,7 +4494,7 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
        vmcb12 = map.hva;
        nested_copy_vmcb_control_to_cache(svm, &vmcb12->control);
        nested_copy_vmcb_save_to_cache(svm, &vmcb12->save);
-       ret = enter_svm_guest_mode(vcpu, vmcb12_gpa, vmcb12, false);
+       ret = enter_svm_guest_mode(vcpu, smram64->svm_guest_vmcb_gpa, vmcb12, false);
 
        if (ret)
                goto unmap_save;
@@ -4554,6 +4520,7 @@ static void svm_enable_smi_window(struct kvm_vcpu *vcpu)
                /* We must be in SMM; RSM will cause a vmexit anyway.  */
        }
 }
+#endif
 
 static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type,
                                        void *insn, int insn_len)
@@ -4829,10 +4796,12 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
        .pi_update_irte = avic_pi_update_irte,
        .setup_mce = svm_setup_mce,
 
+#ifdef CONFIG_KVM_SMM
        .smi_allowed = svm_smi_allowed,
        .enter_smm = svm_enter_smm,
        .leave_smm = svm_leave_smm,
        .enable_smi_window = svm_enable_smi_window,
+#endif
 
        .mem_enc_ioctl = sev_mem_enc_ioctl,
        .mem_enc_register_region = sev_mem_enc_register_region,
@@ -4898,6 +4867,7 @@ static __init void svm_set_cpu_caps(void)
 {
        kvm_set_cpu_caps();
 
+       kvm_caps.supported_perf_cap = 0;
        kvm_caps.supported_xss = 0;
 
        /* CPUID 0x80000001 and 0x8000000A (SVM features) */