KVM: x86: hyper-v: Introduce TLB flush fifo

[linux-2.6-block.git] / arch / x86 / kvm / svm / svm.c
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c

index 58f0077d935799eaa5922bf9da4dde5e4a02d2b2..91352d69284524c36ab0012a1c6ad2827ca45ffe 100644 (file)
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -6,6 +6,7 @@
  #include "mmu.h"
  #include "kvm_cache_regs.h"
  #include "x86.h"
+#include "smm.h"
  #include "cpuid.h"
  #include "pmu.h"
  
@@ -245,7 +246,7 @@ struct kvm_ldttss_desc {
         u32 zero1;
  } __attribute__((packed));
  
-DEFINE_PER_CPU(struct svm_cpu_data *, svm_data);
+DEFINE_PER_CPU(struct svm_cpu_data, svm_data);
  
  /*
   * Only MSR_TSC_AUX is switched via the user return hook.  EFER is switched via
@@ -346,12 +347,6 @@ int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
         return 0;
  }
  
-static int is_external_interrupt(u32 info)
-{
-       info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID;
-       return info == (SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR);
-}
-
  static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu)
  {
         struct vcpu_svm *svm = to_svm(vcpu);
@@ -581,12 +576,7 @@ static int svm_hardware_enable(void)
                 pr_err("%s: err EOPNOTSUPP on %d\n", __func__, me);
                 return -EINVAL;
         }
-       sd = per_cpu(svm_data, me);
-       if (!sd) {
-               pr_err("%s: svm_data is NULL on %d\n", __func__, me);
-               return -EINVAL;
-       }
-
+       sd = per_cpu_ptr(&svm_data, me);
         sd->asid_generation = 1;
         sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1;
         sd->next_asid = sd->max_asid + 1;
@@ -597,7 +587,7 @@ static int svm_hardware_enable(void)
  
         wrmsrl(MSR_EFER, efer | EFER_SVME);
  
-       wrmsrl(MSR_VM_HSAVE_PA, __sme_page_pa(sd->save_area));
+       wrmsrl(MSR_VM_HSAVE_PA, sd->save_area_pa);
  
         if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) {
                 /*
@@ -646,42 +636,37 @@ static int svm_hardware_enable(void)
  
  static void svm_cpu_uninit(int cpu)
  {
-       struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
+       struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, cpu);
  
-       if (!sd)
+       if (!sd->save_area)
                 return;
  
-       per_cpu(svm_data, cpu) = NULL;
         kfree(sd->sev_vmcbs);
         __free_page(sd->save_area);
-       kfree(sd);
+       sd->save_area_pa = 0;
+       sd->save_area = NULL;
  }
  
  static int svm_cpu_init(int cpu)
  {
-       struct svm_cpu_data *sd;
+       struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, cpu);
         int ret = -ENOMEM;
  
-       sd = kzalloc(sizeof(struct svm_cpu_data), GFP_KERNEL);
-       if (!sd)
-               return ret;
-       sd->cpu = cpu;
+       memset(sd, 0, sizeof(struct svm_cpu_data));
         sd->save_area = alloc_page(GFP_KERNEL | __GFP_ZERO);
         if (!sd->save_area)
-               goto free_cpu_data;
+               return ret;
  
         ret = sev_cpu_init(sd);
         if (ret)
                 goto free_save_area;
  
-       per_cpu(svm_data, cpu) = sd;
-
+       sd->save_area_pa = __sme_page_pa(sd->save_area);
         return 0;
  
  free_save_area:
         __free_page(sd->save_area);
-free_cpu_data:
-       kfree(sd);
+       sd->save_area = NULL;
         return ret;
  
  }
@@ -730,6 +715,15 @@ static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr)
         u32 offset;
         u32 *msrpm;
  
+       /*
+        * For non-nested case:
+        * If the L01 MSR bitmap does not intercept the MSR, then we need to
+        * save it.
+        *
+        * For nested case:
+        * If the L02 MSR bitmap does not intercept the MSR, then we need to
+        * save it.
+        */
         msrpm = is_guest_mode(vcpu) ? to_svm(vcpu)->nested.msrpm:
                                       to_svm(vcpu)->msrpm;
  
@@ -1425,7 +1419,7 @@ static void svm_clear_current_vmcb(struct vmcb *vmcb)
         int i;
  
         for_each_online_cpu(i)
-               cmpxchg(&per_cpu(svm_data, i)->current_vmcb, vmcb, NULL);
+               cmpxchg(per_cpu_ptr(&svm_data.current_vmcb, i), vmcb, NULL);
  }
  
  static void svm_vcpu_free(struct kvm_vcpu *vcpu)
@@ -1439,6 +1433,7 @@ static void svm_vcpu_free(struct kvm_vcpu *vcpu)
          */
         svm_clear_current_vmcb(svm->vmcb);
  
+       svm_leave_nested(vcpu);
         svm_free_nested(svm);
  
         sev_free_vcpu(vcpu);
@@ -1450,7 +1445,7 @@ static void svm_vcpu_free(struct kvm_vcpu *vcpu)
  static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
  {
         struct vcpu_svm *svm = to_svm(vcpu);
-       struct svm_cpu_data *sd = per_cpu(svm_data, vcpu->cpu);
+       struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, vcpu->cpu);
  
         if (sev_es_guest(vcpu->kvm))
                 sev_es_unmap_ghcb(svm);
@@ -1462,7 +1457,7 @@ static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
          * Save additional host state that will be restored on VMEXIT (sev-es)
          * or subsequent vmload of host save area.
          */
-       vmsave(__sme_page_pa(sd->save_area));
+       vmsave(sd->save_area_pa);
         if (sev_es_guest(vcpu->kvm)) {
                 struct sev_es_save_area *hostsa;
                 hostsa = (struct sev_es_save_area *)(page_address(sd->save_area) + 0x400);
@@ -1487,7 +1482,7 @@ static void svm_prepare_host_switch(struct kvm_vcpu *vcpu)
  static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
  {
         struct vcpu_svm *svm = to_svm(vcpu);
-       struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
+       struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, cpu);
  
         if (sd->current_vmcb != svm->vmcb) {
                 sd->current_vmcb = svm->vmcb;
@@ -2714,8 +2709,6 @@ static int svm_get_msr_feature(struct kvm_msr_entry *msr)
                 if (boot_cpu_has(X86_FEATURE_LFENCE_RDTSC))
                         msr->data |= MSR_F10H_DECFG_LFENCE_SERIALIZE;
                 break;
-       case MSR_IA32_PERF_CAPABILITIES:
-               return 0;
         default:
                 return KVM_MSR_RET_INVALID;
         }
@@ -3426,15 +3419,6 @@ static int svm_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
                 return 0;
         }
  
-       if (is_external_interrupt(svm->vmcb->control.exit_int_info) &&
-           exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR &&
-           exit_code != SVM_EXIT_NPF && exit_code != SVM_EXIT_TASK_SWITCH &&
-           exit_code != SVM_EXIT_INTR && exit_code != SVM_EXIT_NMI)
-               printk(KERN_ERR "%s: unexpected exit_int_info 0x%x "
-                      "exit_code 0x%x\n",
-                      __func__, svm->vmcb->control.exit_int_info,
-                      exit_code);
-
         if (exit_fastpath != EXIT_FASTPATH_NONE)
                 return 1;
  
@@ -3443,7 +3427,7 @@ static int svm_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
  
  static void reload_tss(struct kvm_vcpu *vcpu)
  {
-       struct svm_cpu_data *sd = per_cpu(svm_data, vcpu->cpu);
+       struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, vcpu->cpu);
  
         sd->tss_desc->type = 9; /* available 32/64-bit TSS */
         load_TR_desc();
@@ -3451,7 +3435,7 @@ static void reload_tss(struct kvm_vcpu *vcpu)
  
  static void pre_svm_run(struct kvm_vcpu *vcpu)
  {
-       struct svm_cpu_data *sd = per_cpu(svm_data, vcpu->cpu);
+       struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, vcpu->cpu);
         struct vcpu_svm *svm = to_svm(vcpu);
  
         /*
@@ -3738,6 +3722,13 @@ static void svm_flush_tlb_current(struct kvm_vcpu *vcpu)
  {
         struct vcpu_svm *svm = to_svm(vcpu);
  
+       /*
+        * Unlike VMX, SVM doesn't provide a way to flush only NPT TLB entries.
+        * A TLB flush for the current ASID flushes both "host" and "guest" TLB
+        * entries, and thus is a superset of Hyper-V's fine grained flushing.
+        */
+       kvm_hv_vcpu_purge_flush_tlb(vcpu);
+
         /*
          * Flush only the current ASID even if the TLB flush was invoked via
          * kvm_flush_remote_tlbs().  Although flushing remote TLBs requires all
@@ -3911,30 +3902,16 @@ static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
         return EXIT_FASTPATH_NONE;
  }
  
-static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu)
+static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, bool spec_ctrl_intercepted)
  {
         struct vcpu_svm *svm = to_svm(vcpu);
-       unsigned long vmcb_pa = svm->current_vmcb->pa;
  
         guest_state_enter_irqoff();
  
-       if (sev_es_guest(vcpu->kvm)) {
-               __svm_sev_es_vcpu_run(vmcb_pa);
-       } else {
-               struct svm_cpu_data *sd = per_cpu(svm_data, vcpu->cpu);
-
-               /*
-                * Use a single vmcb (vmcb01 because it's always valid) for
-                * context switching guest state via VMLOAD/VMSAVE, that way
-                * the state doesn't need to be copied between vmcb01 and
-                * vmcb02 when switching vmcbs for nested virtualization.
-                */
-               vmload(svm->vmcb01.pa);
-               __svm_vcpu_run(vmcb_pa, (unsigned long *)&vcpu->arch.regs);
-               vmsave(svm->vmcb01.pa);
-
-               vmload(__sme_page_pa(sd->save_area));
-       }
+       if (sev_es_guest(vcpu->kvm))
+               __svm_sev_es_vcpu_run(svm, spec_ctrl_intercepted);
+       else
+               __svm_vcpu_run(svm, spec_ctrl_intercepted);
  
         guest_state_exit_irqoff();
  }
@@ -3942,6 +3919,7 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu)
  static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
  {
         struct vcpu_svm *svm = to_svm(vcpu);
+       bool spec_ctrl_intercepted = msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL);
  
         trace_kvm_entry(vcpu);
  
@@ -3998,34 +3976,15 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
          * being speculatively taken.
          */
         if (!static_cpu_has(X86_FEATURE_V_SPEC_CTRL))
-               x86_spec_ctrl_set_guest(svm->spec_ctrl, svm->virt_spec_ctrl);
+               x86_spec_ctrl_set_guest(svm->virt_spec_ctrl);
  
-       svm_vcpu_enter_exit(vcpu);
-
-       /*
-        * We do not use IBRS in the kernel. If this vCPU has used the
-        * SPEC_CTRL MSR it may have left it on; save the value and
-        * turn it off. This is much more efficient than blindly adding
-        * it to the atomic save/restore list. Especially as the former
-        * (Saving guest MSRs on vmexit) doesn't even exist in KVM.
-        *
-        * For non-nested case:
-        * If the L01 MSR bitmap does not intercept the MSR, then we need to
-        * save it.
-        *
-        * For nested case:
-        * If the L02 MSR bitmap does not intercept the MSR, then we need to
-        * save it.
-        */
-       if (!static_cpu_has(X86_FEATURE_V_SPEC_CTRL) &&
-           unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
-               svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
+       svm_vcpu_enter_exit(vcpu, spec_ctrl_intercepted);
  
         if (!sev_es_guest(vcpu->kvm))
                 reload_tss(vcpu);
  
         if (!static_cpu_has(X86_FEATURE_V_SPEC_CTRL))
-               x86_spec_ctrl_restore_host(svm->spec_ctrl, svm->virt_spec_ctrl);
+               x86_spec_ctrl_restore_host(svm->virt_spec_ctrl);
  
         if (!sev_es_guest(vcpu->kvm)) {
                 vcpu->arch.cr2 = svm->vmcb->save.cr2;
@@ -4149,6 +4108,8 @@ static bool svm_has_emulated_msr(struct kvm *kvm, u32 index)
         case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
                 return false;
         case MSR_IA32_SMBASE:
+               if (!IS_ENABLED(CONFIG_KVM_SMM))
+                       return false;
                 /* SEV-ES guests do not support SMM, so report false */
                 if (kvm && sev_es_guest(kvm))
                         return false;
@@ -4405,6 +4366,7 @@ static void svm_setup_mce(struct kvm_vcpu *vcpu)
         vcpu->arch.mcg_cap &= 0x1ff;
  }
  
+#ifdef CONFIG_KVM_SMM
  bool svm_smi_blocked(struct kvm_vcpu *vcpu)
  {
         struct vcpu_svm *svm = to_svm(vcpu);
@@ -4432,7 +4394,7 @@ static int svm_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
         return 1;
  }
  
-static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
+static int svm_enter_smm(struct kvm_vcpu *vcpu, union kvm_smram *smram)
  {
         struct vcpu_svm *svm = to_svm(vcpu);
         struct kvm_host_map map_save;
@@ -4441,10 +4403,16 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
         if (!is_guest_mode(vcpu))
                 return 0;
  
-       /* FED8h - SVM Guest */
-       put_smstate(u64, smstate, 0x7ed8, 1);
-       /* FEE0h - SVM Guest VMCB Physical Address */
-       put_smstate(u64, smstate, 0x7ee0, svm->nested.vmcb12_gpa);
+       /*
+        * 32-bit SMRAM format doesn't preserve EFER and SVM state.  Userspace is
+        * responsible for ensuring nested SVM and SMIs are mutually exclusive.
+        */
+
+       if (!guest_cpuid_has(vcpu, X86_FEATURE_LM))
+               return 1;
+
+       smram->smram64.svm_guest_flag = 1;
+       smram->smram64.svm_guest_vmcb_gpa = svm->nested.vmcb12_gpa;
  
         svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
         svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
@@ -4466,8 +4434,7 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
          * that, see svm_prepare_switch_to_guest()) which must be
          * preserved.
          */
-       if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr),
-                        &map_save) == -EINVAL)
+       if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr), &map_save))
                 return 1;
  
         BUILD_BUG_ON(offsetof(struct vmcb, save) != 0x400);
@@ -4479,34 +4446,33 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
         return 0;
  }
  
-static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
+static int svm_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram)
  {
         struct vcpu_svm *svm = to_svm(vcpu);
         struct kvm_host_map map, map_save;
-       u64 saved_efer, vmcb12_gpa;
         struct vmcb *vmcb12;
         int ret;
  
+       const struct kvm_smram_state_64 *smram64 = &smram->smram64;
+
         if (!guest_cpuid_has(vcpu, X86_FEATURE_LM))
                 return 0;
  
         /* Non-zero if SMI arrived while vCPU was in guest mode. */
-       if (!GET_SMSTATE(u64, smstate, 0x7ed8))
+       if (!smram64->svm_guest_flag)
                 return 0;
  
         if (!guest_cpuid_has(vcpu, X86_FEATURE_SVM))
                 return 1;
  
-       saved_efer = GET_SMSTATE(u64, smstate, 0x7ed0);
-       if (!(saved_efer & EFER_SVME))
+       if (!(smram64->efer & EFER_SVME))
                 return 1;
  
-       vmcb12_gpa = GET_SMSTATE(u64, smstate, 0x7ee0);
-       if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcb12_gpa), &map) == -EINVAL)
+       if (kvm_vcpu_map(vcpu, gpa_to_gfn(smram64->svm_guest_vmcb_gpa), &map))
                 return 1;
  
         ret = 1;
-       if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr), &map_save) == -EINVAL)
+       if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr), &map_save))
                 goto unmap_map;
  
         if (svm_allocate_nested(svm))
@@ -4528,7 +4494,7 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
         vmcb12 = map.hva;
         nested_copy_vmcb_control_to_cache(svm, &vmcb12->control);
         nested_copy_vmcb_save_to_cache(svm, &vmcb12->save);
-       ret = enter_svm_guest_mode(vcpu, vmcb12_gpa, vmcb12, false);
+       ret = enter_svm_guest_mode(vcpu, smram64->svm_guest_vmcb_gpa, vmcb12, false);
  
         if (ret)
                 goto unmap_save;
@@ -4554,6 +4520,7 @@ static void svm_enable_smi_window(struct kvm_vcpu *vcpu)
                 /* We must be in SMM; RSM will cause a vmexit anyway.  */
         }
  }
+#endif
  
  static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type,
                                         void *insn, int insn_len)
@@ -4829,10 +4796,12 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
         .pi_update_irte = avic_pi_update_irte,
         .setup_mce = svm_setup_mce,
  
+#ifdef CONFIG_KVM_SMM
         .smi_allowed = svm_smi_allowed,
         .enter_smm = svm_enter_smm,
         .leave_smm = svm_leave_smm,
         .enable_smi_window = svm_enable_smi_window,
+#endif
  
         .mem_enc_ioctl = sev_mem_enc_ioctl,
         .mem_enc_register_region = sev_mem_enc_register_region,
@@ -4898,6 +4867,7 @@ static __init void svm_set_cpu_caps(void)
  {
         kvm_set_cpu_caps();
  
+       kvm_caps.supported_perf_cap = 0;
         kvm_caps.supported_xss = 0;
  
         /* CPUID 0x80000001 and 0x8000000A (SVM features) */