kvm: nested: Introduce read_and_check_msr_entry()

[linux-2.6-block.git] / arch / x86 / kvm / vmx / nested.c
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c

index e76eb4f07f6c93a2b8bad15afefbc1087d9f487c..3ef529cc72fb88f538f320633485f6971254fbac 100644 (file)
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -10,6 +10,7 @@
  #include "hyperv.h"
  #include "mmu.h"
  #include "nested.h"
+#include "pmu.h"
  #include "trace.h"
  #include "x86.h"
  
@@ -27,6 +28,16 @@ module_param(nested_early_check, bool, S_IRUGO);
         failed;                                                         \
  })
  
+#define SET_MSR_OR_WARN(vcpu, idx, data)                               \
+({                                                                     \
+       bool failed = kvm_set_msr(vcpu, idx, data);                     \
+       if (failed)                                                     \
+               pr_warn_ratelimited(                                    \
+                               "%s cannot write MSR (0x%x, 0x%llx)\n", \
+                               __func__, idx, data);                   \
+       failed;                                                         \
+})
+
  /*
   * Hyper-V requires all of these, so mark them as supported even though
   * they are just treated the same as all-context.
@@ -929,6 +940,26 @@ fail:
         return i + 1;
  }
  
+static bool read_and_check_msr_entry(struct kvm_vcpu *vcpu, u64 gpa, int i,
+                                    struct vmx_msr_entry *e)
+{
+       if (kvm_vcpu_read_guest(vcpu,
+                               gpa + i * sizeof(*e),
+                               e, 2 * sizeof(u32))) {
+               pr_debug_ratelimited(
+                       "%s cannot read MSR entry (%u, 0x%08llx)\n",
+                       __func__, i, gpa + i * sizeof(*e));
+               return false;
+       }
+       if (nested_vmx_store_msr_check(vcpu, e)) {
+               pr_debug_ratelimited(
+                       "%s check failed (%u, 0x%x, 0x%x)\n",
+                       __func__, i, e->index, e->reserved);
+               return false;
+       }
+       return true;
+}
+
  static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
  {
         u64 data;
@@ -940,20 +971,9 @@ static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
                 if (unlikely(i >= max_msr_list_size))
                         return -EINVAL;
  
-               if (kvm_vcpu_read_guest(vcpu,
-                                       gpa + i * sizeof(e),
-                                       &e, 2 * sizeof(u32))) {
-                       pr_debug_ratelimited(
-                               "%s cannot read MSR entry (%u, 0x%08llx)\n",
-                               __func__, i, gpa + i * sizeof(e));
-                       return -EINVAL;
-               }
-               if (nested_vmx_store_msr_check(vcpu, &e)) {
-                       pr_debug_ratelimited(
-                               "%s check failed (%u, 0x%x, 0x%x)\n",
-                               __func__, i, e.index, e.reserved);
+               if (!read_and_check_msr_entry(vcpu, gpa, i, &e))
                         return -EINVAL;
-               }
+
                 if (kvm_get_msr(vcpu, e.index, &data)) {
                         pr_debug_ratelimited(
                                 "%s cannot read MSR (%u, 0x%x)\n",
@@ -1012,7 +1032,7 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool ne
                 kvm_mmu_new_cr3(vcpu, cr3, false);
  
         vcpu->arch.cr3 = cr3;
-       __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
+       kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
  
         kvm_init_mmu(vcpu, false);
  
@@ -2073,6 +2093,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
         exec_control &= ~CPU_BASED_TPR_SHADOW;
         exec_control |= vmcs12->cpu_based_vm_exec_control;
  
+       vmx->nested.l1_tpr_threshold = -1;
         if (exec_control & CPU_BASED_TPR_SHADOW)
                 vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold);
  #ifdef CONFIG_X86_64
@@ -2418,6 +2439,16 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
                                 entry_failure_code))
                 return -EINVAL;
  
+       /*
+        * Immediately write vmcs02.GUEST_CR3.  It will be propagated to vmcs12
+        * on nested VM-Exit, which can occur without actually running L2 and
+        * thus without hitting vmx_set_cr3(), e.g. if L1 is entering L2 with
+        * vmcs12.GUEST_ACTIVITYSTATE=HLT, in which case KVM will intercept the
+        * transition to HLT instead of running L2.
+        */
+       if (enable_ept)
+               vmcs_writel(GUEST_CR3, vmcs12->guest_cr3);
+
         /* Late preparation of GUEST_PDPTRs now that EFER and CRs are set. */
         if (load_guest_pdptrs_vmcs12 && nested_cpu_has_ept(vmcs12) &&
             is_pae_paging(vcpu)) {
@@ -2430,6 +2461,11 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
         if (!enable_ept)
                 vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested;
  
+       if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) &&
+           SET_MSR_OR_WARN(vcpu, MSR_CORE_PERF_GLOBAL_CTRL,
+                           vmcs12->guest_ia32_perf_global_ctrl))
+               return -EINVAL;
+
         kvm_rsp_write(vcpu, vmcs12->guest_rsp);
         kvm_rip_write(vcpu, vmcs12->guest_rip);
         return 0;
@@ -2664,6 +2700,11 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,
             CC(!kvm_pat_valid(vmcs12->host_ia32_pat)))
                 return -EINVAL;
  
+       if ((vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) &&
+           CC(!kvm_valid_perf_global_ctrl(vcpu_to_pmu(vcpu),
+                                          vmcs12->host_ia32_perf_global_ctrl)))
+               return -EINVAL;
+
  #ifdef CONFIG_X86_64
         ia32e = !!(vcpu->arch.efer & EFER_LMA);
  #else
@@ -2779,6 +2820,11 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,
                 return -EINVAL;
         }
  
+       if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) &&
+           CC(!kvm_valid_perf_global_ctrl(vcpu_to_pmu(vcpu),
+                                          vmcs12->guest_ia32_perf_global_ctrl)))
+               return -EINVAL;
+
         /*
          * If the load IA32_EFER VM-entry control is 1, the following checks
          * are performed on the field for the IA32_EFER MSR:
@@ -3453,6 +3499,7 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
                 test_bit(KVM_APIC_INIT, &apic->pending_events)) {
                 if (block_nested_events)
                         return -EBUSY;
+               clear_bit(KVM_APIC_INIT, &apic->pending_events);
                 nested_vmx_vmexit(vcpu, EXIT_REASON_INIT_SIGNAL, 0, 0);
                 return 0;
         }
@@ -3856,8 +3903,8 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
                 vcpu->arch.pat = vmcs12->host_ia32_pat;
         }
         if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
-               vmcs_write64(GUEST_IA32_PERF_GLOBAL_CTRL,
-                       vmcs12->host_ia32_perf_global_ctrl);
+               SET_MSR_OR_WARN(vcpu, MSR_CORE_PERF_GLOBAL_CTRL,
+                               vmcs12->host_ia32_perf_global_ctrl);
  
         /* Set L1 segment info according to Intel SDM
             27.5.2 Loading Host Segment and Descriptor-Table Registers */
@@ -3976,7 +4023,7 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu)
  
         nested_ept_uninit_mmu_context(vcpu);
         vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
-       __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
+       kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
  
         /*
          * Use ept_save_pdptrs(vcpu) to load the MMU's cached PDPTRs
@@ -4104,6 +4151,8 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
         vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
         vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
         vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
+       if (vmx->nested.l1_tpr_threshold != -1)
+               vmcs_write32(TPR_THRESHOLD, vmx->nested.l1_tpr_threshold);
  
         if (kvm_has_tsc_control)
                 decache_tsc_multiplier(vmx);
@@ -4319,6 +4368,27 @@ int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification,
         return 0;
  }
  
+void nested_vmx_pmu_entry_exit_ctls_update(struct kvm_vcpu *vcpu)
+{
+       struct vcpu_vmx *vmx;
+
+       if (!nested_vmx_allowed(vcpu))
+               return;
+
+       vmx = to_vmx(vcpu);
+       if (kvm_x86_ops->pmu_ops->is_valid_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL)) {
+               vmx->nested.msrs.entry_ctls_high |=
+                               VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
+               vmx->nested.msrs.exit_ctls_high |=
+                               VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL;
+       } else {
+               vmx->nested.msrs.entry_ctls_high &=
+                               ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
+               vmx->nested.msrs.exit_ctls_high &=
+                               ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
+       }
+}
+
  static int nested_vmx_get_vmptr(struct kvm_vcpu *vcpu, gpa_t *vmpointer)
  {
         gva_t gva;
@@ -5758,7 +5828,7 @@ error_guest_mode:
         return ret;
  }
  
-void nested_vmx_vcpu_setup(void)
+void nested_vmx_set_vmcs_shadowing_bitmap(void)
  {
         if (enable_shadow_vmcs) {
                 vmcs_write64(VMREAD_BITMAP, __pa(vmx_vmread_bitmap));