Merge tag 'kvmarm-fixes-5.17-2' of git://git.kernel.org/pub/scm/linux/kernel/git...
authorPaolo Bonzini <pbonzini@redhat.com>
Sat, 5 Feb 2022 05:58:25 +0000 (00:58 -0500)
committerPaolo Bonzini <pbonzini@redhat.com>
Sat, 5 Feb 2022 05:58:25 +0000 (00:58 -0500)
KVM/arm64 fixes for 5.17, take #2

- A couple of fixes when handling an exception while a SError has been
  delivered

- Workaround for Cortex-A510's single-step[ erratum

1  2 
arch/arm64/kvm/arm.c
arch/x86/include/asm/kvm_host.h
arch/x86/kvm/svm/svm.c
arch/x86/kvm/vmx/vmx.c
arch/x86/kvm/x86.c
arch/x86/kvm/x86.h
include/linux/kvm_host.h

diff --combined arch/arm64/kvm/arm.c
index a069d5925f77c10f309fca6e68b6baa3d24cad70,a4a0063df456ca0f963f62ece504e9991edcf5da..ecc5958e27fe2b3fc69b9b1121a626495cb13c46
@@@ -482,6 -482,13 +482,13 @@@ bool kvm_arch_vcpu_in_kernel(struct kvm
        return vcpu_mode_priv(vcpu);
  }
  
+ #ifdef CONFIG_GUEST_PERF_EVENTS
+ unsigned long kvm_arch_vcpu_get_ip(struct kvm_vcpu *vcpu)
+ {
+       return *vcpu_pc(vcpu);
+ }
+ #endif
  /* Just ensure a guest exit from a particular CPU */
  static void exit_vm_noop(void *info)
  {
@@@ -790,24 -797,6 +797,24 @@@ static bool kvm_vcpu_exit_request(struc
                        xfer_to_guest_mode_work_pending();
  }
  
 +/*
 + * Actually run the vCPU, entering an RCU extended quiescent state (EQS) while
 + * the vCPU is running.
 + *
 + * This must be noinstr as instrumentation may make use of RCU, and this is not
 + * safe during the EQS.
 + */
 +static int noinstr kvm_arm_vcpu_enter_exit(struct kvm_vcpu *vcpu)
 +{
 +      int ret;
 +
 +      guest_state_enter_irqoff();
 +      ret = kvm_call_hyp_ret(__kvm_vcpu_run, vcpu);
 +      guest_state_exit_irqoff();
 +
 +      return ret;
 +}
 +
  /**
   * kvm_arch_vcpu_ioctl_run - the main VCPU run function to execute guest code
   * @vcpu:     The VCPU pointer
@@@ -892,9 -881,9 +899,9 @@@ int kvm_arch_vcpu_ioctl_run(struct kvm_
                 * Enter the guest
                 */
                trace_kvm_entry(*vcpu_pc(vcpu));
 -              guest_enter_irqoff();
 +              guest_timing_enter_irqoff();
  
 -              ret = kvm_call_hyp_ret(__kvm_vcpu_run, vcpu);
 +              ret = kvm_arm_vcpu_enter_exit(vcpu);
  
                vcpu->mode = OUTSIDE_GUEST_MODE;
                vcpu->stat.exits++;
                kvm_arch_vcpu_ctxsync_fp(vcpu);
  
                /*
 -               * We may have taken a host interrupt in HYP mode (ie
 -               * while executing the guest). This interrupt is still
 -               * pending, as we haven't serviced it yet!
 +               * We must ensure that any pending interrupts are taken before
 +               * we exit guest timing so that timer ticks are accounted as
 +               * guest time. Transiently unmask interrupts so that any
 +               * pending interrupts are taken.
                 *
 -               * We're now back in SVC mode, with interrupts
 -               * disabled.  Enabling the interrupts now will have
 -               * the effect of taking the interrupt again, in SVC
 -               * mode this time.
 +               * Per ARM DDI 0487G.b section D1.13.4, an ISB (or other
 +               * context synchronization event) is necessary to ensure that
 +               * pending interrupts are taken.
                 */
                local_irq_enable();
 +              isb();
 +              local_irq_disable();
 +
 +              guest_timing_exit_irqoff();
 +
 +              local_irq_enable();
  
 -              /*
 -               * We do local_irq_enable() before calling guest_exit() so
 -               * that if a timer interrupt hits while running the guest we
 -               * account that tick as being spent in the guest.  We enable
 -               * preemption after calling guest_exit() so that if we get
 -               * preempted we make sure ticks after that is not counted as
 -               * guest time.
 -               */
 -              guest_exit();
                trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
  
                /* Exit types that need handling before we can be preempted */
@@@ -1810,7 -1802,8 +1817,8 @@@ static int init_subsystems(void
        if (err)
                goto out;
  
-       kvm_perf_init();
+       kvm_register_perf_callbacks(NULL);
        kvm_sys_reg_table_init();
  
  out:
@@@ -2198,7 -2191,7 +2206,7 @@@ out_err
  /* NOP: Compiling as a module not supported */
  void kvm_arch_exit(void)
  {
-       kvm_perf_teardown();
+       kvm_unregister_perf_callbacks();
  }
  
  static int __init early_kvm_mode_cfg(char *arg)
index 80e285533371f46b45d77b3069c5c1d8c105e3b0,6e7c545bc7ee158f8ca63af11d016b438d8c10bf..6dcccb304775411a8f38738526c79a5aa670c820
@@@ -782,6 -782,7 +782,7 @@@ struct kvm_vcpu_arch 
        unsigned nmi_pending; /* NMI queued after currently running handler */
        bool nmi_injected;    /* Trying to inject an NMI this entry */
        bool smi_pending;    /* SMI queued after currently running handler */
+       u8 handling_intr_from_guest;
  
        struct kvm_mtrr mtrr_state;
        u64 pat;
@@@ -1409,8 -1410,7 +1410,8 @@@ struct kvm_x86_ops 
        void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
        void (*set_virtual_apic_mode)(struct kvm_vcpu *vcpu);
        void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu);
 -      int (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
 +      void (*deliver_interrupt)(struct kvm_lapic *apic, int delivery_mode,
 +                                int trig_mode, int vector);
        int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
        int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
        int (*set_identity_map_addr)(struct kvm *kvm, u64 ident_addr);
@@@ -1520,6 -1520,7 +1521,7 @@@ struct kvm_x86_init_ops 
        int (*disabled_by_bios)(void);
        int (*check_processor_compatibility)(void);
        int (*hardware_setup)(void);
+       unsigned int (*handle_intel_pt_intr)(void);
  
        struct kvm_x86_ops *runtime_ops;
  };
@@@ -1569,6 -1570,9 +1571,9 @@@ static inline int kvm_arch_flush_remote
                return -ENOTSUPP;
  }
  
+ #define kvm_arch_pmi_in_guest(vcpu) \
+       ((vcpu) && (vcpu)->arch.handling_intr_from_guest)
  int kvm_mmu_module_init(void);
  void kvm_mmu_module_exit(void);
  
@@@ -1899,8 -1903,6 +1904,6 @@@ int kvm_skip_emulated_instruction(struc
  int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err);
  void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu);
  
- int kvm_is_in_guest(void);
  void __user *__x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa,
                                     u32 size);
  bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu);
diff --combined arch/x86/kvm/svm/svm.c
index ea2f7f3614aff6a6df17cb93a4cb4c0de830617b,6d97629655e3d03439c0c030aeb3cbfe8061644a..a290efb272ad1641f36c33b8b6bbca22bdf89a10
@@@ -3291,21 -3291,6 +3291,21 @@@ static void svm_set_irq(struct kvm_vcp
                SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR;
  }
  
 +static void svm_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode,
 +                                int trig_mode, int vector)
 +{
 +      struct kvm_vcpu *vcpu = apic->vcpu;
 +
 +      if (svm_deliver_avic_intr(vcpu, vector)) {
 +              kvm_lapic_set_irr(vector, apic);
 +              kvm_make_request(KVM_REQ_EVENT, vcpu);
 +              kvm_vcpu_kick(vcpu);
 +      } else {
 +              trace_kvm_apicv_accept_irq(vcpu->vcpu_id, delivery_mode,
 +                                         trig_mode, vector);
 +      }
 +}
 +
  static void svm_update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
  {
        struct vcpu_svm *svm = to_svm(vcpu);
@@@ -3630,7 -3615,7 +3630,7 @@@ static noinstr void svm_vcpu_enter_exit
        struct vcpu_svm *svm = to_svm(vcpu);
        unsigned long vmcb_pa = svm->current_vmcb->pa;
  
 -      kvm_guest_enter_irqoff();
 +      guest_state_enter_irqoff();
  
        if (sev_es_guest(vcpu->kvm)) {
                __svm_sev_es_vcpu_run(vmcb_pa);
                vmload(__sme_page_pa(sd->save_area));
        }
  
 -      kvm_guest_exit_irqoff();
 +      guest_state_exit_irqoff();
  }
  
  static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
        vcpu->arch.regs_dirty = 0;
  
        if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
-               kvm_before_interrupt(vcpu);
+               kvm_before_interrupt(vcpu, KVM_HANDLING_NMI);
  
        kvm_load_host_xsave_state(vcpu);
        stgi();
@@@ -4560,7 -4545,7 +4560,7 @@@ static struct kvm_x86_ops svm_x86_ops _
        .pmu_ops = &amd_pmu_ops,
        .nested_ops = &svm_nested_ops,
  
 -      .deliver_posted_interrupt = svm_deliver_avic_intr,
 +      .deliver_interrupt = svm_deliver_interrupt,
        .dy_apicv_has_pending_interrupt = svm_dy_apicv_has_pending_interrupt,
        .update_pi_irte = svm_update_pi_irte,
        .setup_mce = svm_setup_mce,
diff --combined arch/x86/kvm/vmx/vmx.c
index c0c256c33d2126e90f405d2d2369196e330eff61,aca3ae2a02f34dd1a30c078803a98878a856f865..6c27bd0c89e1e613782c85637634e6215928ac24
@@@ -4041,21 -4041,6 +4041,21 @@@ static int vmx_deliver_posted_interrupt
        return 0;
  }
  
 +static void vmx_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode,
 +                                int trig_mode, int vector)
 +{
 +      struct kvm_vcpu *vcpu = apic->vcpu;
 +
 +      if (vmx_deliver_posted_interrupt(vcpu, vector)) {
 +              kvm_lapic_set_irr(vector, apic);
 +              kvm_make_request(KVM_REQ_EVENT, vcpu);
 +              kvm_vcpu_kick(vcpu);
 +      } else {
 +              trace_kvm_apicv_accept_irq(vcpu->vcpu_id, delivery_mode,
 +                                         trig_mode, vector);
 +      }
 +}
 +
  /*
   * Set up the vmcs's constant host-state fields, i.e., host-state fields that
   * will not change in the lifetime of the guest.
@@@ -6495,7 -6480,9 +6495,9 @@@ void vmx_do_interrupt_nmi_irqoff(unsign
  static void handle_interrupt_nmi_irqoff(struct kvm_vcpu *vcpu,
                                        unsigned long entry)
  {
-       kvm_before_interrupt(vcpu);
+       bool is_nmi = entry == (unsigned long)asm_exc_nmi_noist;
+       kvm_before_interrupt(vcpu, is_nmi ? KVM_HANDLING_NMI : KVM_HANDLING_IRQ);
        vmx_do_interrupt_nmi_irqoff(entry);
        kvm_after_interrupt(vcpu);
  }
@@@ -6767,7 -6754,7 +6769,7 @@@ static fastpath_t vmx_exit_handlers_fas
  static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
                                        struct vcpu_vmx *vmx)
  {
 -      kvm_guest_enter_irqoff();
 +      guest_state_enter_irqoff();
  
        /* L1D Flush includes CPU buffer clear to mitigate MDS */
        if (static_branch_unlikely(&vmx_l1d_should_flush))
  
        vcpu->arch.cr2 = native_read_cr2();
  
 -      kvm_guest_exit_irqoff();
 +      guest_state_exit_irqoff();
  }
  
  static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
@@@ -7781,7 -7768,7 +7783,7 @@@ static struct kvm_x86_ops vmx_x86_ops _
        .hwapic_isr_update = vmx_hwapic_isr_update,
        .guest_apic_has_interrupt = vmx_guest_apic_has_interrupt,
        .sync_pir_to_irr = vmx_sync_pir_to_irr,
 -      .deliver_posted_interrupt = vmx_deliver_posted_interrupt,
 +      .deliver_interrupt = vmx_deliver_interrupt,
        .dy_apicv_has_pending_interrupt = pi_has_pending_interrupt,
  
        .set_tss_addr = vmx_set_tss_addr,
        .vcpu_deliver_sipi_vector = kvm_vcpu_deliver_sipi_vector,
  };
  
+ static unsigned int vmx_handle_intel_pt_intr(void)
+ {
+       struct kvm_vcpu *vcpu = kvm_get_running_vcpu();
+       /* '0' on failure so that the !PT case can use a RET0 static call. */
+       if (!kvm_arch_pmi_in_guest(vcpu))
+               return 0;
+       kvm_make_request(KVM_REQ_PMI, vcpu);
+       __set_bit(MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT,
+                 (unsigned long *)&vcpu->arch.pmu.global_status);
+       return 1;
+ }
  static __init void vmx_setup_user_return_msrs(void)
  {
  
                kvm_add_user_return_msr(vmx_uret_msrs_list[i]);
  }
  
+ static struct kvm_x86_init_ops vmx_init_ops __initdata;
  static __init int hardware_setup(void)
  {
        unsigned long host_bndcfgs;
                return -EINVAL;
        if (!enable_ept || !cpu_has_vmx_intel_pt())
                pt_mode = PT_MODE_SYSTEM;
+       if (pt_mode == PT_MODE_HOST_GUEST)
+               vmx_init_ops.handle_intel_pt_intr = vmx_handle_intel_pt_intr;
+       else
+               vmx_init_ops.handle_intel_pt_intr = NULL;
  
        setup_default_sgx_lepubkeyhash();
  
@@@ -8043,6 -8050,7 +8065,7 @@@ static struct kvm_x86_init_ops vmx_init
        .disabled_by_bios = vmx_disabled_by_bios,
        .check_processor_compatibility = vmx_check_processor_compat,
        .hardware_setup = hardware_setup,
+       .handle_intel_pt_intr = NULL,
  
        .runtime_ops = &vmx_x86_ops,
  };
diff --combined arch/x86/kvm/x86.c
index b533aab9817281d51f8f9524efe16343674204dd,74b53a16f38a72062a123dd6ce04abd84a8c7264..7131d735b1ef3fb888beb9144795b0f312923a05
@@@ -90,8 -90,6 +90,8 @@@
  u64 __read_mostly kvm_mce_cap_supported = MCG_CTL_P | MCG_SER_P;
  EXPORT_SYMBOL_GPL(kvm_mce_cap_supported);
  
 +#define  ERR_PTR_USR(e)  ((void __user *)ERR_PTR(e))
 +
  #define emul_to_vcpu(ctxt) \
        ((struct kvm_vcpu *)(ctxt)->vcpu)
  
@@@ -4342,7 -4340,7 +4342,7 @@@ static inline void __user *kvm_get_attr
        void __user *uaddr = (void __user*)(unsigned long)attr->addr;
  
        if ((u64)(unsigned long)uaddr != attr->addr)
 -              return ERR_PTR(-EFAULT);
 +              return ERR_PTR_USR(-EFAULT);
        return uaddr;
  }
  
@@@ -8722,50 -8720,6 +8722,6 @@@ static void kvm_timer_init(void
                          kvmclock_cpu_online, kvmclock_cpu_down_prep);
  }
  
- DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu);
- EXPORT_PER_CPU_SYMBOL_GPL(current_vcpu);
- int kvm_is_in_guest(void)
- {
-       return __this_cpu_read(current_vcpu) != NULL;
- }
- static int kvm_is_user_mode(void)
- {
-       int user_mode = 3;
-       if (__this_cpu_read(current_vcpu))
-               user_mode = static_call(kvm_x86_get_cpl)(__this_cpu_read(current_vcpu));
-       return user_mode != 0;
- }
- static unsigned long kvm_get_guest_ip(void)
- {
-       unsigned long ip = 0;
-       if (__this_cpu_read(current_vcpu))
-               ip = kvm_rip_read(__this_cpu_read(current_vcpu));
-       return ip;
- }
- static void kvm_handle_intel_pt_intr(void)
- {
-       struct kvm_vcpu *vcpu = __this_cpu_read(current_vcpu);
-       kvm_make_request(KVM_REQ_PMI, vcpu);
-       __set_bit(MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT,
-                       (unsigned long *)&vcpu->arch.pmu.global_status);
- }
- static struct perf_guest_info_callbacks kvm_guest_cbs = {
-       .is_in_guest            = kvm_is_in_guest,
-       .is_user_mode           = kvm_is_user_mode,
-       .get_guest_ip           = kvm_get_guest_ip,
-       .handle_intel_pt_intr   = kvm_handle_intel_pt_intr,
- };
  #ifdef CONFIG_X86_64
  static void pvclock_gtod_update_fn(struct work_struct *work)
  {
@@@ -8878,8 -8832,6 +8834,6 @@@ int kvm_arch_init(void *opaque
  
        kvm_timer_init();
  
-       perf_register_guest_info_callbacks(&kvm_guest_cbs);
        if (boot_cpu_has(X86_FEATURE_XSAVE)) {
                host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
                supported_xcr0 = host_xcr0 & KVM_SUPPORTED_XCR0;
@@@ -8911,7 -8863,6 +8865,6 @@@ void kvm_arch_exit(void
                clear_hv_tscchange_cb();
  #endif
        kvm_lapic_exit();
-       perf_unregister_guest_info_callbacks(&kvm_guest_cbs);
  
        if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
                cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block,
@@@ -10090,8 -10041,6 +10043,8 @@@ static int vcpu_enter_guest(struct kvm_
                set_debugreg(0, 7);
        }
  
 +      guest_timing_enter_irqoff();
 +
        for (;;) {
                /*
                 * Assert that vCPU vs. VM APICv state is consistent.  An APICv
         * interrupts on processors that implement an interrupt shadow, the
         * stat.exits increment will do nicely.
         */
-       kvm_before_interrupt(vcpu);
+       kvm_before_interrupt(vcpu, KVM_HANDLING_IRQ);
        local_irq_enable();
        ++vcpu->stat.exits;
        local_irq_disable();
         * of accounting via context tracking, but the loss of accuracy is
         * acceptable for all known use cases.
         */
 -      vtime_account_guest_exit();
 +      guest_timing_exit_irqoff();
  
        if (lapic_in_kernel(vcpu)) {
                s64 delta = vcpu->arch.apic->lapic_timer.advance_expire_delta;
@@@ -11522,6 -11471,8 +11475,8 @@@ int kvm_arch_hardware_setup(void *opaqu
        memcpy(&kvm_x86_ops, ops->runtime_ops, sizeof(kvm_x86_ops));
        kvm_ops_static_call_update();
  
+       kvm_register_perf_callbacks(ops->handle_intel_pt_intr);
        if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES))
                supported_xss = 0;
  
  
  void kvm_arch_hardware_unsetup(void)
  {
+       kvm_unregister_perf_callbacks();
        static_call(kvm_x86_hardware_unsetup)();
  }
  
@@@ -11686,6 -11639,8 +11643,6 @@@ void kvm_arch_sync_events(struct kvm *k
        kvm_free_pit(kvm);
  }
  
 -#define  ERR_PTR_USR(e)  ((void __user *)ERR_PTR(e))
 -
  /**
   * __x86_set_memory_region: Setup KVM internal memory slot
   *
@@@ -12140,6 -12095,11 +12097,11 @@@ bool kvm_arch_vcpu_in_kernel(struct kvm
        return vcpu->arch.preempted_in_kernel;
  }
  
+ unsigned long kvm_arch_vcpu_get_ip(struct kvm_vcpu *vcpu)
+ {
+       return kvm_rip_read(vcpu);
+ }
  int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
  {
        return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
diff --combined arch/x86/kvm/x86.h
index 20c7a1fb90bb0d07199f7637d3b7f06c689a323b,635b75f9e14540aff2aceb6e4b1c5c3221c444ab..767ec7f9951608f984b4ac69a1c3205ce0a93ebc
  
  void kvm_spurious_fault(void);
  
 -static __always_inline void kvm_guest_enter_irqoff(void)
 -{
 -      /*
 -       * VMENTER enables interrupts (host state), but the kernel state is
 -       * interrupts disabled when this is invoked. Also tell RCU about
 -       * it. This is the same logic as for exit_to_user_mode().
 -       *
 -       * This ensures that e.g. latency analysis on the host observes
 -       * guest mode as interrupt enabled.
 -       *
 -       * guest_enter_irqoff() informs context tracking about the
 -       * transition to guest mode and if enabled adjusts RCU state
 -       * accordingly.
 -       */
 -      instrumentation_begin();
 -      trace_hardirqs_on_prepare();
 -      lockdep_hardirqs_on_prepare(CALLER_ADDR0);
 -      instrumentation_end();
 -
 -      guest_enter_irqoff();
 -      lockdep_hardirqs_on(CALLER_ADDR0);
 -}
 -
 -static __always_inline void kvm_guest_exit_irqoff(void)
 -{
 -      /*
 -       * VMEXIT disables interrupts (host state), but tracing and lockdep
 -       * have them in state 'on' as recorded before entering guest mode.
 -       * Same as enter_from_user_mode().
 -       *
 -       * context_tracking_guest_exit() restores host context and reinstates
 -       * RCU if enabled and required.
 -       *
 -       * This needs to be done immediately after VM-Exit, before any code
 -       * that might contain tracepoints or call out to the greater world,
 -       * e.g. before x86_spec_ctrl_restore_host().
 -       */
 -      lockdep_hardirqs_off(CALLER_ADDR0);
 -      context_tracking_guest_exit();
 -
 -      instrumentation_begin();
 -      trace_hardirqs_off_finish();
 -      instrumentation_end();
 -}
 -
  #define KVM_NESTED_VMENTER_CONSISTENCY_CHECK(consistency_check)               \
  ({                                                                    \
        bool failed = (consistency_check);                              \
@@@ -347,18 -392,27 +347,27 @@@ static inline bool kvm_cstate_in_guest(
        return kvm->arch.cstate_in_guest;
  }
  
- DECLARE_PER_CPU(struct kvm_vcpu *, current_vcpu);
+ enum kvm_intr_type {
+       /* Values are arbitrary, but must be non-zero. */
+       KVM_HANDLING_IRQ = 1,
+       KVM_HANDLING_NMI,
+ };
  
- static inline void kvm_before_interrupt(struct kvm_vcpu *vcpu)
+ static inline void kvm_before_interrupt(struct kvm_vcpu *vcpu,
+                                       enum kvm_intr_type intr)
  {
-       __this_cpu_write(current_vcpu, vcpu);
+       WRITE_ONCE(vcpu->arch.handling_intr_from_guest, (u8)intr);
  }
  
  static inline void kvm_after_interrupt(struct kvm_vcpu *vcpu)
  {
-       __this_cpu_write(current_vcpu, NULL);
+       WRITE_ONCE(vcpu->arch.handling_intr_from_guest, 0);
  }
  
+ static inline bool kvm_handling_nmi_from_guest(struct kvm_vcpu *vcpu)
+ {
+       return vcpu->arch.handling_intr_from_guest == KVM_HANDLING_NMI;
+ }
  
  static inline bool kvm_pat_valid(u64 data)
  {
diff --combined include/linux/kvm_host.h
index b3810976a27f82d730444c5c7a0e4064ebff50a8,06912d6b39d051013b731ae04b3b26c9f68a5efb..f11039944c08ffd7d5bfe6675f95c8b9f2d3dd86
@@@ -29,9 -29,7 +29,9 @@@
  #include <linux/refcount.h>
  #include <linux/nospec.h>
  #include <linux/notifier.h>
 +#include <linux/ftrace.h>
  #include <linux/hashtable.h>
 +#include <linux/instrumentation.h>
  #include <linux/interval_tree.h>
  #include <linux/rbtree.h>
  #include <linux/xarray.h>
@@@ -370,11 -368,8 +370,11 @@@ struct kvm_vcpu 
        u64 last_used_slot_gen;
  };
  
 -/* must be called with irqs disabled */
 -static __always_inline void guest_enter_irqoff(void)
 +/*
 + * Start accounting time towards a guest.
 + * Must be called before entering guest context.
 + */
 +static __always_inline void guest_timing_enter_irqoff(void)
  {
        /*
         * This is running in ioctl context so its safe to assume that it's the
        instrumentation_begin();
        vtime_account_guest_enter();
        instrumentation_end();
 +}
  
 +/*
 + * Enter guest context and enter an RCU extended quiescent state.
 + *
 + * Between guest_context_enter_irqoff() and guest_context_exit_irqoff() it is
 + * unsafe to use any code which may directly or indirectly use RCU, tracing
 + * (including IRQ flag tracing), or lockdep. All code in this period must be
 + * non-instrumentable.
 + */
 +static __always_inline void guest_context_enter_irqoff(void)
 +{
        /*
         * KVM does not hold any references to rcu protected data when it
         * switches CPU into a guest mode. In fact switching to a guest mode
        }
  }
  
 -static __always_inline void guest_exit_irqoff(void)
 +/*
 + * Deprecated. Architectures should move to guest_timing_enter_irqoff() and
 + * guest_state_enter_irqoff().
 + */
 +static __always_inline void guest_enter_irqoff(void)
 +{
 +      guest_timing_enter_irqoff();
 +      guest_context_enter_irqoff();
 +}
 +
 +/**
 + * guest_state_enter_irqoff - Fixup state when entering a guest
 + *
 + * Entry to a guest will enable interrupts, but the kernel state is interrupts
 + * disabled when this is invoked. Also tell RCU about it.
 + *
 + * 1) Trace interrupts on state
 + * 2) Invoke context tracking if enabled to adjust RCU state
 + * 3) Tell lockdep that interrupts are enabled
 + *
 + * Invoked from architecture specific code before entering a guest.
 + * Must be called with interrupts disabled and the caller must be
 + * non-instrumentable.
 + * The caller has to invoke guest_timing_enter_irqoff() before this.
 + *
 + * Note: this is analogous to exit_to_user_mode().
 + */
 +static __always_inline void guest_state_enter_irqoff(void)
 +{
 +      instrumentation_begin();
 +      trace_hardirqs_on_prepare();
 +      lockdep_hardirqs_on_prepare(CALLER_ADDR0);
 +      instrumentation_end();
 +
 +      guest_context_enter_irqoff();
 +      lockdep_hardirqs_on(CALLER_ADDR0);
 +}
 +
 +/*
 + * Exit guest context and exit an RCU extended quiescent state.
 + *
 + * Between guest_context_enter_irqoff() and guest_context_exit_irqoff() it is
 + * unsafe to use any code which may directly or indirectly use RCU, tracing
 + * (including IRQ flag tracing), or lockdep. All code in this period must be
 + * non-instrumentable.
 + */
 +static __always_inline void guest_context_exit_irqoff(void)
  {
        context_tracking_guest_exit();
 +}
  
 +/*
 + * Stop accounting time towards a guest.
 + * Must be called after exiting guest context.
 + */
 +static __always_inline void guest_timing_exit_irqoff(void)
 +{
        instrumentation_begin();
        /* Flush the guest cputime we spent on the guest */
        vtime_account_guest_exit();
        instrumentation_end();
  }
  
 +/*
 + * Deprecated. Architectures should move to guest_state_exit_irqoff() and
 + * guest_timing_exit_irqoff().
 + */
 +static __always_inline void guest_exit_irqoff(void)
 +{
 +      guest_context_exit_irqoff();
 +      guest_timing_exit_irqoff();
 +}
 +
  static inline void guest_exit(void)
  {
        unsigned long flags;
        local_irq_restore(flags);
  }
  
 +/**
 + * guest_state_exit_irqoff - Establish state when returning from guest mode
 + *
 + * Entry from a guest disables interrupts, but guest mode is traced as
 + * interrupts enabled. Also with NO_HZ_FULL RCU might be idle.
 + *
 + * 1) Tell lockdep that interrupts are disabled
 + * 2) Invoke context tracking if enabled to reactivate RCU
 + * 3) Trace interrupts off state
 + *
 + * Invoked from architecture specific code after exiting a guest.
 + * Must be invoked with interrupts disabled and the caller must be
 + * non-instrumentable.
 + * The caller has to invoke guest_timing_exit_irqoff() after this.
 + *
 + * Note: this is analogous to enter_from_user_mode().
 + */
 +static __always_inline void guest_state_exit_irqoff(void)
 +{
 +      lockdep_hardirqs_off(CALLER_ADDR0);
 +      guest_context_exit_irqoff();
 +
 +      instrumentation_begin();
 +      trace_hardirqs_off_finish();
 +      instrumentation_end();
 +}
 +
  static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu)
  {
        /*
@@@ -1527,6 -1421,16 +1527,16 @@@ static inline bool kvm_arch_intc_initia
  }
  #endif
  
+ #ifdef CONFIG_GUEST_PERF_EVENTS
+ unsigned long kvm_arch_vcpu_get_ip(struct kvm_vcpu *vcpu);
+ void kvm_register_perf_callbacks(unsigned int (*pt_intr_handler)(void));
+ void kvm_unregister_perf_callbacks(void);
+ #else
+ static inline void kvm_register_perf_callbacks(void *ign) {}
+ static inline void kvm_unregister_perf_callbacks(void) {}
+ #endif /* CONFIG_GUEST_PERF_EVENTS */
  int kvm_arch_init_vm(struct kvm *kvm, unsigned long type);
  void kvm_arch_destroy_vm(struct kvm *kvm);
  void kvm_arch_sync_events(struct kvm *kvm);