Merge tag 'kvmarm-fixes-5.13-1' of git://git.kernel.org/pub/scm/linux/kernel/git...
[linux-2.6-block.git] / arch / x86 / kvm / x86.c
index 5e1deed8ea5d21a661c4c2606cfb9aed5ced8f45..9b6bca61692912099fcc7eaa9ab8d49d512be67d 100644 (file)
@@ -336,7 +336,7 @@ static void kvm_on_user_return(struct user_return_notifier *urn)
        }
 }
 
-int kvm_probe_user_return_msr(u32 msr)
+static int kvm_probe_user_return_msr(u32 msr)
 {
        u64 val;
        int ret;
@@ -350,16 +350,18 @@ out:
        preempt_enable();
        return ret;
 }
-EXPORT_SYMBOL_GPL(kvm_probe_user_return_msr);
 
-void kvm_define_user_return_msr(unsigned slot, u32 msr)
+int kvm_add_user_return_msr(u32 msr)
 {
-       BUG_ON(slot >= KVM_MAX_NR_USER_RETURN_MSRS);
-       kvm_uret_msrs_list[slot] = msr;
-       if (slot >= kvm_nr_uret_msrs)
-               kvm_nr_uret_msrs = slot + 1;
+       BUG_ON(kvm_nr_uret_msrs >= KVM_MAX_NR_USER_RETURN_MSRS);
+
+       if (kvm_probe_user_return_msr(msr))
+               return -1;
+
+       kvm_uret_msrs_list[kvm_nr_uret_msrs] = msr;
+       return kvm_nr_uret_msrs++;
 }
-EXPORT_SYMBOL_GPL(kvm_define_user_return_msr);
+EXPORT_SYMBOL_GPL(kvm_add_user_return_msr);
 
 int kvm_find_user_return_msr(u32 msr)
 {
@@ -1174,6 +1176,9 @@ static u64 kvm_dr6_fixed(struct kvm_vcpu *vcpu)
 
        if (!guest_cpuid_has(vcpu, X86_FEATURE_RTM))
                fixed |= DR6_RTM;
+
+       if (!guest_cpuid_has(vcpu, X86_FEATURE_BUS_LOCK_DETECT))
+               fixed |= DR6_BUS_LOCK;
        return fixed;
 }
 
@@ -1640,6 +1645,30 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data,
                 * invokes 64-bit SYSENTER.
                 */
                data = get_canonical(data, vcpu_virt_addr_bits(vcpu));
+               break;
+       case MSR_TSC_AUX:
+               if (!kvm_is_supported_user_return_msr(MSR_TSC_AUX))
+                       return 1;
+
+               if (!host_initiated &&
+                   !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) &&
+                   !guest_cpuid_has(vcpu, X86_FEATURE_RDPID))
+                       return 1;
+
+               /*
+                * Per Intel's SDM, bits 63:32 are reserved, but AMD's APM has
+                * incomplete and conflicting architectural behavior.  Current
+                * AMD CPUs completely ignore bits 63:32, i.e. they aren't
+                * reserved and always read as zeros.  Enforce Intel's reserved
+                * bits check if and only if the guest CPU is Intel, and clear
+                * the bits in all other cases.  This ensures cross-vendor
+                * migration will provide consistent behavior for the guest.
+                */
+               if (guest_cpuid_is_intel(vcpu) && (data >> 32) != 0)
+                       return 1;
+
+               data = (u32)data;
+               break;
        }
 
        msr.data = data;
@@ -1676,6 +1705,18 @@ int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data,
        if (!host_initiated && !kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_READ))
                return KVM_MSR_RET_FILTERED;
 
+       switch (index) {
+       case MSR_TSC_AUX:
+               if (!kvm_is_supported_user_return_msr(MSR_TSC_AUX))
+                       return 1;
+
+               if (!host_initiated &&
+                   !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) &&
+                   !guest_cpuid_has(vcpu, X86_FEATURE_RDPID))
+                       return 1;
+               break;
+       }
+
        msr.index = index;
        msr.host_initiated = host_initiated;
 
@@ -8053,6 +8094,18 @@ static void pvclock_gtod_update_fn(struct work_struct *work)
 
 static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn);
 
+/*
+ * Indirection to move queue_work() out of the tk_core.seq write held
+ * region to prevent possible deadlocks against time accessors which
+ * are invoked with work related locks held.
+ */
+static void pvclock_irq_work_fn(struct irq_work *w)
+{
+       queue_work(system_long_wq, &pvclock_gtod_work);
+}
+
+static DEFINE_IRQ_WORK(pvclock_irq_work, pvclock_irq_work_fn);
+
 /*
  * Notification about pvclock gtod data update.
  */
@@ -8064,13 +8117,14 @@ static int pvclock_gtod_notify(struct notifier_block *nb, unsigned long unused,
 
        update_pvclock_gtod(tk);
 
-       /* disable master clock if host does not trust, or does not
-        * use, TSC based clocksource.
+       /*
+        * Disable master clock if host does not trust, or does not use,
+        * TSC based clocksource. Delegate queue_work() to irq_work as
+        * this is invoked with tk_core.seq write held.
         */
        if (!gtod_is_based_on_tsc(gtod->clock.vclock_mode) &&
            atomic_read(&kvm_guest_has_master_clock) != 0)
-               queue_work(system_long_wq, &pvclock_gtod_work);
-
+               irq_work_queue(&pvclock_irq_work);
        return 0;
 }
 
@@ -8132,6 +8186,7 @@ int kvm_arch_init(void *opaque)
                printk(KERN_ERR "kvm: failed to allocate percpu kvm_user_return_msrs\n");
                goto out_free_x86_emulator_cache;
        }
+       kvm_nr_uret_msrs = 0;
 
        r = kvm_mmu_module_init();
        if (r)
@@ -8182,6 +8237,8 @@ void kvm_arch_exit(void)
        cpuhp_remove_state_nocalls(CPUHP_AP_X86_KVM_CLK_ONLINE);
 #ifdef CONFIG_X86_64
        pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier);
+       irq_work_sync(&pvclock_irq_work);
+       cancel_work_sync(&pvclock_gtod_work);
 #endif
        kvm_x86_ops.hardware_enable = NULL;
        kvm_mmu_module_exit();
@@ -9329,6 +9386,15 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
        local_irq_disable();
        kvm_after_interrupt(vcpu);
 
+       /*
+        * Wait until after servicing IRQs to account guest time so that any
+        * ticks that occurred while running the guest are properly accounted
+        * to the guest.  Waiting until IRQs are enabled degrades the accuracy
+        * of accounting via context tracking, but the loss of accuracy is
+        * acceptable for all known use cases.
+        */
+       vtime_account_guest_exit();
+
        if (lapic_in_kernel(vcpu)) {
                s64 delta = vcpu->arch.apic->lapic_timer.advance_expire_delta;
                if (delta != S64_MIN) {