KVM: x86: rename process_smi to enter_smm, process_smi_request to process_smi
[linux-2.6-block.git] / arch / x86 / kvm / x86.c
index 9b7798c7b210e75499644ed1ca35b643fe743208..1785415ebff3bf554c3cb77ad42013bdd45c6333 100644 (file)
@@ -91,6 +91,7 @@ static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE);
 
 static void update_cr8_intercept(struct kvm_vcpu *vcpu);
 static void process_nmi(struct kvm_vcpu *vcpu);
+static void enter_smm(struct kvm_vcpu *vcpu);
 static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
 
 struct kvm_x86_ops *kvm_x86_ops __read_mostly;
@@ -161,6 +162,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
        { "halt_exits", VCPU_STAT(halt_exits) },
        { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
        { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
+       { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
        { "halt_wakeup", VCPU_STAT(halt_wakeup) },
        { "hypercalls", VCPU_STAT(hypercalls) },
        { "request_irq", VCPU_STAT(request_irq_exits) },
@@ -2002,22 +2004,8 @@ static void kvmclock_reset(struct kvm_vcpu *vcpu)
        vcpu->arch.pv_time_enabled = false;
 }
 
-static void accumulate_steal_time(struct kvm_vcpu *vcpu)
-{
-       u64 delta;
-
-       if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
-               return;
-
-       delta = current->sched_info.run_delay - vcpu->arch.st.last_steal;
-       vcpu->arch.st.last_steal = current->sched_info.run_delay;
-       vcpu->arch.st.accum_steal = delta;
-}
-
 static void record_steal_time(struct kvm_vcpu *vcpu)
 {
-       accumulate_steal_time(vcpu);
-
        if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
                return;
 
@@ -2025,9 +2013,26 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
                &vcpu->arch.st.steal, sizeof(struct kvm_steal_time))))
                return;
 
-       vcpu->arch.st.steal.steal += vcpu->arch.st.accum_steal;
-       vcpu->arch.st.steal.version += 2;
-       vcpu->arch.st.accum_steal = 0;
+       if (vcpu->arch.st.steal.version & 1)
+               vcpu->arch.st.steal.version += 1;  /* first time write, random junk */
+
+       vcpu->arch.st.steal.version += 1;
+
+       kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
+               &vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
+
+       smp_wmb();
+
+       vcpu->arch.st.steal.steal += current->sched_info.run_delay -
+               vcpu->arch.st.last_steal;
+       vcpu->arch.st.last_steal = current->sched_info.run_delay;
+
+       kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
+               &vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
+
+       smp_wmb();
+
+       vcpu->arch.st.steal.version += 1;
 
        kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
                &vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
@@ -2310,6 +2315,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
        case MSR_AMD64_NB_CFG:
        case MSR_FAM10H_MMIO_CONF_BASE:
        case MSR_AMD64_BU_CFG2:
+       case MSR_IA32_PERF_CTL:
                msr_info->data = 0;
                break;
        case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3:
@@ -2611,7 +2617,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
                r = KVM_MAX_MCE_BANKS;
                break;
        case KVM_CAP_XCRS:
-               r = cpu_has_xsave;
+               r = boot_cpu_has(X86_FEATURE_XSAVE);
                break;
        case KVM_CAP_TSC_CONTROL:
                r = kvm_has_tsc_control;
@@ -2968,6 +2974,10 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
                              | KVM_VCPUEVENT_VALID_SMM))
                return -EINVAL;
 
+       if (events->exception.injected &&
+           (events->exception.nr > 31 || events->exception.nr == NMI_VECTOR))
+               return -EINVAL;
+
        process_nmi(vcpu);
        vcpu->arch.exception.pending = events->exception.injected;
        vcpu->arch.exception.nr = events->exception.nr;
@@ -3032,6 +3042,11 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
        if (dbgregs->flags)
                return -EINVAL;
 
+       if (dbgregs->dr6 & ~0xffffffffull)
+               return -EINVAL;
+       if (dbgregs->dr7 & ~0xffffffffull)
+               return -EINVAL;
+
        memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db));
        kvm_update_dr0123(vcpu);
        vcpu->arch.dr6 = dbgregs->dr6;
@@ -3094,7 +3109,7 @@ static void load_xsave(struct kvm_vcpu *vcpu, u8 *src)
 
        /* Set XSTATE_BV and possibly XCOMP_BV.  */
        xsave->header.xfeatures = xstate_bv;
-       if (cpu_has_xsaves)
+       if (boot_cpu_has(X86_FEATURE_XSAVES))
                xsave->header.xcomp_bv = host_xcr0 | XSTATE_COMPACTION_ENABLED;
 
        /*
@@ -3121,7 +3136,7 @@ static void load_xsave(struct kvm_vcpu *vcpu, u8 *src)
 static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
                                         struct kvm_xsave *guest_xsave)
 {
-       if (cpu_has_xsave) {
+       if (boot_cpu_has(X86_FEATURE_XSAVE)) {
                memset(guest_xsave, 0, sizeof(struct kvm_xsave));
                fill_xsave((u8 *) guest_xsave->region, vcpu);
        } else {
@@ -3139,7 +3154,7 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
        u64 xstate_bv =
                *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)];
 
-       if (cpu_has_xsave) {
+       if (boot_cpu_has(X86_FEATURE_XSAVE)) {
                /*
                 * Here we allow setting states that are not present in
                 * CPUID leaf 0xD, index 0, EDX:EAX.  This is for compatibility
@@ -3160,7 +3175,7 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
 static void kvm_vcpu_ioctl_x86_get_xcrs(struct kvm_vcpu *vcpu,
                                        struct kvm_xcrs *guest_xcrs)
 {
-       if (!cpu_has_xsave) {
+       if (!boot_cpu_has(X86_FEATURE_XSAVE)) {
                guest_xcrs->nr_xcrs = 0;
                return;
        }
@@ -3176,7 +3191,7 @@ static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu,
 {
        int i, r = 0;
 
-       if (!cpu_has_xsave)
+       if (!boot_cpu_has(X86_FEATURE_XSAVE))
                return -EINVAL;
 
        if (guest_xcrs->nr_xcrs > KVM_MAX_XCRS || guest_xcrs->flags)
@@ -5288,13 +5303,8 @@ static void kvm_smm_changed(struct kvm_vcpu *vcpu)
                /* This is a good place to trace that we are exiting SMM.  */
                trace_kvm_enter_smm(vcpu->vcpu_id, vcpu->arch.smbase, false);
 
-               if (unlikely(vcpu->arch.smi_pending)) {
-                       kvm_make_request(KVM_REQ_SMI, vcpu);
-                       vcpu->arch.smi_pending = 0;
-               } else {
-                       /* Process a latched INIT, if any.  */
-                       kvm_make_request(KVM_REQ_EVENT, vcpu);
-               }
+               /* Process a latched INIT or SMI, if any.  */
+               kvm_make_request(KVM_REQ_EVENT, vcpu);
        }
 
        kvm_mmu_reset_context(vcpu);
@@ -5865,7 +5875,7 @@ int kvm_arch_init(void *opaque)
 
        perf_register_guest_info_callbacks(&kvm_guest_cbs);
 
-       if (cpu_has_xsave)
+       if (boot_cpu_has(X86_FEATURE_XSAVE))
                host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
 
        kvm_lapic_init();
@@ -6094,7 +6104,10 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
        }
 
        /* try to inject new event if pending */
-       if (vcpu->arch.nmi_pending && kvm_x86_ops->nmi_allowed(vcpu)) {
+       if (vcpu->arch.smi_pending && !is_smm(vcpu)) {
+               vcpu->arch.smi_pending = false;
+               enter_smm(vcpu);
+       } else if (vcpu->arch.nmi_pending && kvm_x86_ops->nmi_allowed(vcpu)) {
                --vcpu->arch.nmi_pending;
                vcpu->arch.nmi_injected = true;
                kvm_x86_ops->set_nmi(vcpu);
@@ -6117,6 +6130,7 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
                        kvm_x86_ops->set_irq(vcpu);
                }
        }
+
        return 0;
 }
 
@@ -6140,7 +6154,7 @@ static void process_nmi(struct kvm_vcpu *vcpu)
 #define put_smstate(type, buf, offset, val)                      \
        *(type *)((buf) + (offset) - 0x7e00) = val
 
-static u32 process_smi_get_segment_flags(struct kvm_segment *seg)
+static u32 enter_smm_get_segment_flags(struct kvm_segment *seg)
 {
        u32 flags = 0;
        flags |= seg->g       << 23;
@@ -6154,7 +6168,7 @@ static u32 process_smi_get_segment_flags(struct kvm_segment *seg)
        return flags;
 }
 
-static void process_smi_save_seg_32(struct kvm_vcpu *vcpu, char *buf, int n)
+static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu, char *buf, int n)
 {
        struct kvm_segment seg;
        int offset;
@@ -6169,11 +6183,11 @@ static void process_smi_save_seg_32(struct kvm_vcpu *vcpu, char *buf, int n)
 
        put_smstate(u32, buf, offset + 8, seg.base);
        put_smstate(u32, buf, offset + 4, seg.limit);
-       put_smstate(u32, buf, offset, process_smi_get_segment_flags(&seg));
+       put_smstate(u32, buf, offset, enter_smm_get_segment_flags(&seg));
 }
 
 #ifdef CONFIG_X86_64
-static void process_smi_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n)
+static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n)
 {
        struct kvm_segment seg;
        int offset;
@@ -6182,7 +6196,7 @@ static void process_smi_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n)
        kvm_get_segment(vcpu, &seg, n);
        offset = 0x7e00 + n * 16;
 
-       flags = process_smi_get_segment_flags(&seg) >> 8;
+       flags = enter_smm_get_segment_flags(&seg) >> 8;
        put_smstate(u16, buf, offset, seg.selector);
        put_smstate(u16, buf, offset + 2, flags);
        put_smstate(u32, buf, offset + 4, seg.limit);
@@ -6190,7 +6204,7 @@ static void process_smi_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n)
 }
 #endif
 
-static void process_smi_save_state_32(struct kvm_vcpu *vcpu, char *buf)
+static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, char *buf)
 {
        struct desc_ptr dt;
        struct kvm_segment seg;
@@ -6214,13 +6228,13 @@ static void process_smi_save_state_32(struct kvm_vcpu *vcpu, char *buf)
        put_smstate(u32, buf, 0x7fc4, seg.selector);
        put_smstate(u32, buf, 0x7f64, seg.base);
        put_smstate(u32, buf, 0x7f60, seg.limit);
-       put_smstate(u32, buf, 0x7f5c, process_smi_get_segment_flags(&seg));
+       put_smstate(u32, buf, 0x7f5c, enter_smm_get_segment_flags(&seg));
 
        kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
        put_smstate(u32, buf, 0x7fc0, seg.selector);
        put_smstate(u32, buf, 0x7f80, seg.base);
        put_smstate(u32, buf, 0x7f7c, seg.limit);
-       put_smstate(u32, buf, 0x7f78, process_smi_get_segment_flags(&seg));
+       put_smstate(u32, buf, 0x7f78, enter_smm_get_segment_flags(&seg));
 
        kvm_x86_ops->get_gdt(vcpu, &dt);
        put_smstate(u32, buf, 0x7f74, dt.address);
@@ -6231,7 +6245,7 @@ static void process_smi_save_state_32(struct kvm_vcpu *vcpu, char *buf)
        put_smstate(u32, buf, 0x7f54, dt.size);
 
        for (i = 0; i < 6; i++)
-               process_smi_save_seg_32(vcpu, buf, i);
+               enter_smm_save_seg_32(vcpu, buf, i);
 
        put_smstate(u32, buf, 0x7f14, kvm_read_cr4(vcpu));
 
@@ -6240,7 +6254,7 @@ static void process_smi_save_state_32(struct kvm_vcpu *vcpu, char *buf)
        put_smstate(u32, buf, 0x7ef8, vcpu->arch.smbase);
 }
 
-static void process_smi_save_state_64(struct kvm_vcpu *vcpu, char *buf)
+static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, char *buf)
 {
 #ifdef CONFIG_X86_64
        struct desc_ptr dt;
@@ -6272,7 +6286,7 @@ static void process_smi_save_state_64(struct kvm_vcpu *vcpu, char *buf)
 
        kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
        put_smstate(u16, buf, 0x7e90, seg.selector);
-       put_smstate(u16, buf, 0x7e92, process_smi_get_segment_flags(&seg) >> 8);
+       put_smstate(u16, buf, 0x7e92, enter_smm_get_segment_flags(&seg) >> 8);
        put_smstate(u32, buf, 0x7e94, seg.limit);
        put_smstate(u64, buf, 0x7e98, seg.base);
 
@@ -6282,7 +6296,7 @@ static void process_smi_save_state_64(struct kvm_vcpu *vcpu, char *buf)
 
        kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
        put_smstate(u16, buf, 0x7e70, seg.selector);
-       put_smstate(u16, buf, 0x7e72, process_smi_get_segment_flags(&seg) >> 8);
+       put_smstate(u16, buf, 0x7e72, enter_smm_get_segment_flags(&seg) >> 8);
        put_smstate(u32, buf, 0x7e74, seg.limit);
        put_smstate(u64, buf, 0x7e78, seg.base);
 
@@ -6291,31 +6305,26 @@ static void process_smi_save_state_64(struct kvm_vcpu *vcpu, char *buf)
        put_smstate(u64, buf, 0x7e68, dt.address);
 
        for (i = 0; i < 6; i++)
-               process_smi_save_seg_64(vcpu, buf, i);
+               enter_smm_save_seg_64(vcpu, buf, i);
 #else
        WARN_ON_ONCE(1);
 #endif
 }
 
-static void process_smi(struct kvm_vcpu *vcpu)
+static void enter_smm(struct kvm_vcpu *vcpu)
 {
        struct kvm_segment cs, ds;
        struct desc_ptr dt;
        char buf[512];
        u32 cr0;
 
-       if (is_smm(vcpu)) {
-               vcpu->arch.smi_pending = true;
-               return;
-       }
-
        trace_kvm_enter_smm(vcpu->vcpu_id, vcpu->arch.smbase, true);
        vcpu->arch.hflags |= HF_SMM_MASK;
        memset(buf, 0, 512);
        if (guest_cpuid_has_longmode(vcpu))
-               process_smi_save_state_64(vcpu, buf);
+               enter_smm_save_state_64(vcpu, buf);
        else
-               process_smi_save_state_32(vcpu, buf);
+               enter_smm_save_state_32(vcpu, buf);
 
        kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, buf, sizeof(buf));
 
@@ -6371,6 +6380,12 @@ static void process_smi(struct kvm_vcpu *vcpu)
        kvm_mmu_reset_context(vcpu);
 }
 
+static void process_smi(struct kvm_vcpu *vcpu)
+{
+       vcpu->arch.smi_pending = true;
+       kvm_make_request(KVM_REQ_EVENT, vcpu);
+}
+
 void kvm_make_scan_ioapic_request(struct kvm *kvm)
 {
        kvm_make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC);
@@ -6565,8 +6580,18 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 
                if (inject_pending_event(vcpu, req_int_win) != 0)
                        req_immediate_exit = true;
-               /* enable NMI/IRQ window open exits if needed */
                else {
+                       /* Enable NMI/IRQ window open exits if needed.
+                        *
+                        * SMIs have two cases: 1) they can be nested, and
+                        * then there is nothing to do here because RSM will
+                        * cause a vmexit anyway; 2) or the SMI can be pending
+                        * because inject_pending_event has completed the
+                        * injection of an IRQ or NMI from the previous vmexit,
+                        * and then we request an immediate exit to inject the SMI.
+                        */
+                       if (vcpu->arch.smi_pending && !is_smm(vcpu))
+                               req_immediate_exit = true;
                        if (vcpu->arch.nmi_pending)
                                kvm_x86_ops->enable_nmi_window(vcpu);
                        if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win)
@@ -6617,8 +6642,10 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 
        kvm_load_guest_xcr0(vcpu);
 
-       if (req_immediate_exit)
+       if (req_immediate_exit) {
+               kvm_make_request(KVM_REQ_EVENT, vcpu);
                smp_send_reschedule(vcpu->cpu);
+       }
 
        trace_kvm_entry(vcpu->vcpu_id);
        wait_lapic_expire(vcpu);
@@ -7293,7 +7320,7 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 static void fx_init(struct kvm_vcpu *vcpu)
 {
        fpstate_init(&vcpu->arch.guest_fpu.state);
-       if (cpu_has_xsaves)
+       if (boot_cpu_has(X86_FEATURE_XSAVES))
                vcpu->arch.guest_fpu.state.xsave.header.xcomp_bv =
                        host_xcr0 | XSTATE_COMPACTION_ENABLED;
 
@@ -7419,6 +7446,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 {
        vcpu->arch.hflags = 0;
 
+       vcpu->arch.smi_pending = 0;
        atomic_set(&vcpu->arch.nmi_queued, 0);
        vcpu->arch.nmi_pending = 0;
        vcpu->arch.nmi_injected = false;
@@ -7752,6 +7780,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
        kvm_page_track_init(kvm);
        kvm_mmu_init_vm(kvm);
 
+       if (kvm_x86_ops->vm_init)
+               return kvm_x86_ops->vm_init(kvm);
+
        return 0;
 }
 
@@ -7808,7 +7839,7 @@ int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
 
        slot = id_to_memslot(slots, id);
        if (size) {
-               if (WARN_ON(slot->npages))
+               if (slot->npages)
                        return -EEXIST;
 
                /*
@@ -7873,6 +7904,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
                x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT, 0, 0);
                x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, 0, 0);
        }
+       if (kvm_x86_ops->vm_destroy)
+               kvm_x86_ops->vm_destroy(kvm);
        kvm_iommu_unmap_guest(kvm);
        kfree(kvm->arch.vpic);
        kfree(kvm->arch.vioapic);
@@ -8355,19 +8388,21 @@ bool kvm_arch_has_noncoherent_dma(struct kvm *kvm)
 }
 EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma);
 
+bool kvm_arch_has_irq_bypass(void)
+{
+       return kvm_x86_ops->update_pi_irte != NULL;
+}
+
 int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
                                      struct irq_bypass_producer *prod)
 {
        struct kvm_kernel_irqfd *irqfd =
                container_of(cons, struct kvm_kernel_irqfd, consumer);
 
-       if (kvm_x86_ops->update_pi_irte) {
-               irqfd->producer = prod;
-               return kvm_x86_ops->update_pi_irte(irqfd->kvm,
-                               prod->irq, irqfd->gsi, 1);
-       }
+       irqfd->producer = prod;
 
-       return -EINVAL;
+       return kvm_x86_ops->update_pi_irte(irqfd->kvm,
+                                          prod->irq, irqfd->gsi, 1);
 }
 
 void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
@@ -8377,11 +8412,6 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
        struct kvm_kernel_irqfd *irqfd =
                container_of(cons, struct kvm_kernel_irqfd, consumer);
 
-       if (!kvm_x86_ops->update_pi_irte) {
-               WARN_ON(irqfd->producer != NULL);
-               return;
-       }
-
        WARN_ON(irqfd->producer != prod);
        irqfd->producer = NULL;
 
@@ -8429,3 +8459,5 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ple_window);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pml_full);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pi_irte_update);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_unaccelerated_access);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_incomplete_ipi);