cpufreq: Call transition notifier only once for each policy
[linux-2.6-block.git] / arch / x86 / kvm / x86.c
index 099b851dabafd7e2980f96472209777f9cc8f77b..22cc90baa67fe673a8a28fa6112af00892f07f65 100644 (file)
@@ -136,10 +136,14 @@ EXPORT_SYMBOL_GPL(kvm_default_tsc_scaling_ratio);
 static u32 __read_mostly tsc_tolerance_ppm = 250;
 module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR);
 
-/* lapic timer advance (tscdeadline mode only) in nanoseconds */
-unsigned int __read_mostly lapic_timer_advance_ns = 1000;
+/*
+ * lapic timer advance (tscdeadline mode only) in nanoseconds.  '-1' enables
+ * adaptive tuning starting from default advancment of 1000ns.  '0' disables
+ * advancement entirely.  Any other value is used as-is and disables adaptive
+ * tuning, i.e. allows priveleged userspace to set an exact advancement time.
+ */
+static int __read_mostly lapic_timer_advance_ns = -1;
 module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR);
-EXPORT_SYMBOL_GPL(lapic_timer_advance_ns);
 
 static bool __read_mostly vector_hashing = true;
 module_param(vector_hashing, bool, S_IRUGO);
@@ -800,7 +804,7 @@ void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
 }
 EXPORT_SYMBOL_GPL(kvm_lmsw);
 
-static void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu)
+void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu)
 {
        if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE) &&
                        !vcpu->guest_xcr0_loaded) {
@@ -810,8 +814,9 @@ static void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu)
                vcpu->guest_xcr0_loaded = 1;
        }
 }
+EXPORT_SYMBOL_GPL(kvm_load_guest_xcr0);
 
-static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
+void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
 {
        if (vcpu->guest_xcr0_loaded) {
                if (vcpu->arch.xcr0 != host_xcr0)
@@ -819,6 +824,7 @@ static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
                vcpu->guest_xcr0_loaded = 0;
        }
 }
+EXPORT_SYMBOL_GPL(kvm_put_guest_xcr0);
 
 static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
 {
@@ -3093,7 +3099,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
                break;
        case KVM_CAP_NESTED_STATE:
                r = kvm_x86_ops->get_nested_state ?
-                       kvm_x86_ops->get_nested_state(NULL, 0, 0) : 0;
+                       kvm_x86_ops->get_nested_state(NULL, NULL, 0) : 0;
                break;
        default:
                break;
@@ -3528,7 +3534,7 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
        memset(&events->reserved, 0, sizeof(events->reserved));
 }
 
-static void kvm_set_hflags(struct kvm_vcpu *vcpu, unsigned emul_flags);
+static void kvm_smm_changed(struct kvm_vcpu *vcpu);
 
 static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
                                              struct kvm_vcpu_events *events)
@@ -3588,12 +3594,13 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
                vcpu->arch.apic->sipi_vector = events->sipi_vector;
 
        if (events->flags & KVM_VCPUEVENT_VALID_SMM) {
-               u32 hflags = vcpu->arch.hflags;
-               if (events->smi.smm)
-                       hflags |= HF_SMM_MASK;
-               else
-                       hflags &= ~HF_SMM_MASK;
-               kvm_set_hflags(vcpu, hflags);
+               if (!!(vcpu->arch.hflags & HF_SMM_MASK) != events->smi.smm) {
+                       if (events->smi.smm)
+                               vcpu->arch.hflags |= HF_SMM_MASK;
+                       else
+                               vcpu->arch.hflags &= ~HF_SMM_MASK;
+                       kvm_smm_changed(vcpu);
+               }
 
                vcpu->arch.smi_pending = events->smi.pending;
 
@@ -4270,7 +4277,7 @@ static int kvm_vm_ioctl_set_identity_map_addr(struct kvm *kvm,
 }
 
 static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
-                                         u32 kvm_nr_mmu_pages)
+                                        unsigned long kvm_nr_mmu_pages)
 {
        if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES)
                return -EINVAL;
@@ -4284,7 +4291,7 @@ static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
        return 0;
 }
 
-static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
+static unsigned long kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
 {
        return kvm->arch.n_max_mmu_pages;
 }
@@ -5958,12 +5965,18 @@ static unsigned emulator_get_hflags(struct x86_emulate_ctxt *ctxt)
 
 static void emulator_set_hflags(struct x86_emulate_ctxt *ctxt, unsigned emul_flags)
 {
-       kvm_set_hflags(emul_to_vcpu(ctxt), emul_flags);
+       emul_to_vcpu(ctxt)->arch.hflags = emul_flags;
+}
+
+static int emulator_pre_leave_smm(struct x86_emulate_ctxt *ctxt,
+                                 const char *smstate)
+{
+       return kvm_x86_ops->pre_leave_smm(emul_to_vcpu(ctxt), smstate);
 }
 
-static int emulator_pre_leave_smm(struct x86_emulate_ctxt *ctxt, u64 smbase)
+static void emulator_post_leave_smm(struct x86_emulate_ctxt *ctxt)
 {
-       return kvm_x86_ops->pre_leave_smm(emul_to_vcpu(ctxt), smbase);
+       kvm_smm_changed(emul_to_vcpu(ctxt));
 }
 
 static const struct x86_emulate_ops emulate_ops = {
@@ -6006,6 +6019,7 @@ static const struct x86_emulate_ops emulate_ops = {
        .get_hflags          = emulator_get_hflags,
        .set_hflags          = emulator_set_hflags,
        .pre_leave_smm       = emulator_pre_leave_smm,
+       .post_leave_smm      = emulator_post_leave_smm,
 };
 
 static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
@@ -6247,16 +6261,6 @@ static void kvm_smm_changed(struct kvm_vcpu *vcpu)
        kvm_mmu_reset_context(vcpu);
 }
 
-static void kvm_set_hflags(struct kvm_vcpu *vcpu, unsigned emul_flags)
-{
-       unsigned changed = vcpu->arch.hflags ^ emul_flags;
-
-       vcpu->arch.hflags = emul_flags;
-
-       if (changed & HF_SMM_MASK)
-               kvm_smm_changed(vcpu);
-}
-
 static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7,
                                unsigned long *db)
 {
@@ -6535,6 +6539,12 @@ int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
 }
 EXPORT_SYMBOL_GPL(kvm_emulate_instruction_from_buffer);
 
+static int complete_fast_pio_out_port_0x7e(struct kvm_vcpu *vcpu)
+{
+       vcpu->arch.pio.count = 0;
+       return 1;
+}
+
 static int complete_fast_pio_out(struct kvm_vcpu *vcpu)
 {
        vcpu->arch.pio.count = 0;
@@ -6551,12 +6561,23 @@ static int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size,
        unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX);
        int ret = emulator_pio_out_emulated(&vcpu->arch.emulate_ctxt,
                                            size, port, &val, 1);
+       if (ret)
+               return ret;
 
-       if (!ret) {
+       /*
+        * Workaround userspace that relies on old KVM behavior of %rip being
+        * incremented prior to exiting to userspace to handle "OUT 0x7e".
+        */
+       if (port == 0x7e &&
+           kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_OUT_7E_INC_RIP)) {
+               vcpu->arch.complete_userspace_io =
+                       complete_fast_pio_out_port_0x7e;
+               kvm_skip_emulated_instruction(vcpu);
+       } else {
                vcpu->arch.pio.linear_rip = kvm_get_linear_rip(vcpu);
                vcpu->arch.complete_userspace_io = complete_fast_pio_out;
        }
-       return ret;
+       return 0;
 }
 
 static int complete_fast_pio_in(struct kvm_vcpu *vcpu)
@@ -6677,10 +6698,8 @@ static void kvm_hyperv_tsc_notifier(void)
 }
 #endif
 
-static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
-                                    void *data)
+static void __kvmclock_cpufreq_notifier(struct cpufreq_freqs *freq, int cpu)
 {
-       struct cpufreq_freqs *freq = data;
        struct kvm *kvm;
        struct kvm_vcpu *vcpu;
        int i, send_ipi = 0;
@@ -6724,17 +6743,12 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
         *
         */
 
-       if (val == CPUFREQ_PRECHANGE && freq->old > freq->new)
-               return 0;
-       if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new)
-               return 0;
-
-       smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1);
+       smp_call_function_single(cpu, tsc_khz_changed, freq, 1);
 
        spin_lock(&kvm_lock);
        list_for_each_entry(kvm, &vm_list, vm_list) {
                kvm_for_each_vcpu(i, vcpu, kvm) {
-                       if (vcpu->cpu != freq->cpu)
+                       if (vcpu->cpu != cpu)
                                continue;
                        kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
                        if (vcpu->cpu != smp_processor_id())
@@ -6756,8 +6770,24 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
                 * guest context is entered kvmclock will be updated,
                 * so the guest will not see stale values.
                 */
-               smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1);
+               smp_call_function_single(cpu, tsc_khz_changed, freq, 1);
        }
+}
+
+static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
+                                    void *data)
+{
+       struct cpufreq_freqs *freq = data;
+       int cpu;
+
+       if (val == CPUFREQ_PRECHANGE && freq->old > freq->new)
+               return 0;
+       if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new)
+               return 0;
+
+       for_each_cpu(cpu, freq->policy->cpus)
+               __kvmclock_cpufreq_notifier(freq, cpu);
+
        return 0;
 }
 
@@ -7441,9 +7471,9 @@ static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, char *buf)
        put_smstate(u32, buf, 0x7ef8, vcpu->arch.smbase);
 }
 
+#ifdef CONFIG_X86_64
 static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, char *buf)
 {
-#ifdef CONFIG_X86_64
        struct desc_ptr dt;
        struct kvm_segment seg;
        unsigned long val;
@@ -7493,10 +7523,8 @@ static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, char *buf)
 
        for (i = 0; i < 6; i++)
                enter_smm_save_seg_64(vcpu, buf, i);
-#else
-       WARN_ON_ONCE(1);
-#endif
 }
+#endif
 
 static void enter_smm(struct kvm_vcpu *vcpu)
 {
@@ -7507,9 +7535,11 @@ static void enter_smm(struct kvm_vcpu *vcpu)
 
        trace_kvm_enter_smm(vcpu->vcpu_id, vcpu->arch.smbase, true);
        memset(buf, 0, 512);
+#ifdef CONFIG_X86_64
        if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
                enter_smm_save_state_64(vcpu, buf);
        else
+#endif
                enter_smm_save_state_32(vcpu, buf);
 
        /*
@@ -7567,8 +7597,10 @@ static void enter_smm(struct kvm_vcpu *vcpu)
        kvm_set_segment(vcpu, &ds, VCPU_SREG_GS);
        kvm_set_segment(vcpu, &ds, VCPU_SREG_SS);
 
+#ifdef CONFIG_X86_64
        if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
                kvm_x86_ops->set_efer(vcpu, 0);
+#endif
 
        kvm_update_cpuid(vcpu);
        kvm_mmu_reset_context(vcpu);
@@ -7865,15 +7897,14 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
                goto cancel_injection;
        }
 
-       kvm_load_guest_xcr0(vcpu);
-
        if (req_immediate_exit) {
                kvm_make_request(KVM_REQ_EVENT, vcpu);
                kvm_x86_ops->request_immediate_exit(vcpu);
        }
 
        trace_kvm_entry(vcpu->vcpu_id);
-       if (lapic_timer_advance_ns)
+       if (lapic_in_kernel(vcpu) &&
+           vcpu->arch.apic->lapic_timer.timer_advance_ns)
                wait_lapic_expire(vcpu);
        guest_enter_irqoff();
 
@@ -7919,8 +7950,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
        vcpu->mode = OUTSIDE_GUEST_MODE;
        smp_wmb();
 
-       kvm_put_guest_xcr0(vcpu);
-
        kvm_before_interrupt(vcpu);
        kvm_x86_ops->handle_external_intr(vcpu);
        kvm_after_interrupt(vcpu);
@@ -9063,7 +9092,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 
        if (irqchip_in_kernel(vcpu->kvm)) {
                vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv(vcpu);
-               r = kvm_create_lapic(vcpu);
+               r = kvm_create_lapic(vcpu, lapic_timer_advance_ns);
                if (r < 0)
                        goto fail_mmu_destroy;
        } else