Merge tag 'kvmarm-fixes-5.17-2' of git://git.kernel.org/pub/scm/linux/kernel/git...

author Paolo Bonzini <pbonzini@redhat.com>

Sat, 5 Feb 2022 05:58:25 +0000 (00:58 -0500)

committer Paolo Bonzini <pbonzini@redhat.com>

Sat, 5 Feb 2022 05:58:25 +0000 (00:58 -0500)
author Paolo Bonzini <pbonzini@redhat.com>
Sat, 5 Feb 2022 05:58:25 +0000 (00:58 -0500)
committer Paolo Bonzini <pbonzini@redhat.com>
Sat, 5 Feb 2022 05:58:25 +0000 (00:58 -0500)
diff --combined arch/arm64/kvm/arm.c

index a069d5925f77c10f309fca6e68b6baa3d24cad70,a4a0063df456ca0f963f62ece504e9991edcf5da..ecc5958e27fe2b3fc69b9b1121a626495cb13c46
--- 1/arch/arm64/kvm/arm.c
--- 2/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@@ -482,6 -482,13 +482,13 @@@ bool kvm_arch_vcpu_in_kernel(struct kvm
         return vcpu_mode_priv(vcpu);
   }
   
+ #ifdef CONFIG_GUEST_PERF_EVENTS
+ unsigned long kvm_arch_vcpu_get_ip(struct kvm_vcpu *vcpu)
+ {
+       return *vcpu_pc(vcpu);
+ }
+ #endif
+ 
   /* Just ensure a guest exit from a particular CPU */
   static void exit_vm_noop(void *info)
   {
@@@ -790,24 -797,6 +797,24 @@@ static bool kvm_vcpu_exit_request(struc
                         xfer_to_guest_mode_work_pending();
   }
   
+ +/*
+ + * Actually run the vCPU, entering an RCU extended quiescent state (EQS) while
+ + * the vCPU is running.
+ + *
+ + * This must be noinstr as instrumentation may make use of RCU, and this is not
+ + * safe during the EQS.
+ + */
+ +static int noinstr kvm_arm_vcpu_enter_exit(struct kvm_vcpu *vcpu)
+ +{
+ +      int ret;
+ +
+ +      guest_state_enter_irqoff();
+ +      ret = kvm_call_hyp_ret(__kvm_vcpu_run, vcpu);
+ +      guest_state_exit_irqoff();
+ +
+ +      return ret;
+ +}
+ +
   /**
    * kvm_arch_vcpu_ioctl_run - the main VCPU run function to execute guest code
    * @vcpu:     The VCPU pointer
@@@ -892,9 -881,9 +899,9 @@@ int kvm_arch_vcpu_ioctl_run(struct kvm_
                  * Enter the guest
                  */
                 trace_kvm_entry(*vcpu_pc(vcpu));
- -              guest_enter_irqoff();
+ +              guest_timing_enter_irqoff();
   
- -              ret = kvm_call_hyp_ret(__kvm_vcpu_run, vcpu);
+ +              ret = kvm_arm_vcpu_enter_exit(vcpu);
   
                 vcpu->mode = OUTSIDE_GUEST_MODE;
                 vcpu->stat.exits++;
@@@ -929,23 -918,26 +936,23 @@@
                 kvm_arch_vcpu_ctxsync_fp(vcpu);
   
                 /*
- -               * We may have taken a host interrupt in HYP mode (ie
- -               * while executing the guest). This interrupt is still
- -               * pending, as we haven't serviced it yet!
+ +               * We must ensure that any pending interrupts are taken before
+ +               * we exit guest timing so that timer ticks are accounted as
+ +               * guest time. Transiently unmask interrupts so that any
+ +               * pending interrupts are taken.
                  *
- -               * We're now back in SVC mode, with interrupts
- -               * disabled.  Enabling the interrupts now will have
- -               * the effect of taking the interrupt again, in SVC
- -               * mode this time.
+ +               * Per ARM DDI 0487G.b section D1.13.4, an ISB (or other
+ +               * context synchronization event) is necessary to ensure that
+ +               * pending interrupts are taken.
                  */
                 local_irq_enable();
+ +              isb();
+ +              local_irq_disable();
+ +
+ +              guest_timing_exit_irqoff();
+ +
+ +              local_irq_enable();
   
- -              /*
- -               * We do local_irq_enable() before calling guest_exit() so
- -               * that if a timer interrupt hits while running the guest we
- -               * account that tick as being spent in the guest.  We enable
- -               * preemption after calling guest_exit() so that if we get
- -               * preempted we make sure ticks after that is not counted as
- -               * guest time.
- -               */
- -              guest_exit();
                 trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
   
                 /* Exit types that need handling before we can be preempted */
@@@ -1810,7 -1802,8 +1817,8 @@@ static int init_subsystems(void
         if (err)
                 goto out;
   
-       kvm_perf_init();
+       kvm_register_perf_callbacks(NULL);
+ 
         kvm_sys_reg_table_init();
   
   out:
@@@ -2198,7 -2191,7 +2206,7 @@@ out_err
   /* NOP: Compiling as a module not supported */
   void kvm_arch_exit(void)
   {
-       kvm_perf_teardown();
+       kvm_unregister_perf_callbacks();
   }
   
   static int __init early_kvm_mode_cfg(char *arg)
diff --combined arch/x86/include/asm/kvm_host.h

index 80e285533371f46b45d77b3069c5c1d8c105e3b0,6e7c545bc7ee158f8ca63af11d016b438d8c10bf..6dcccb304775411a8f38738526c79a5aa670c820
--- 1/arch/x86/include/asm/kvm_host.h
--- 2/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@@ -782,6 -782,7 +782,7 @@@ struct kvm_vcpu_arch 
         unsigned nmi_pending; /* NMI queued after currently running handler */
         bool nmi_injected;    /* Trying to inject an NMI this entry */
         bool smi_pending;    /* SMI queued after currently running handler */
+       u8 handling_intr_from_guest;
   
         struct kvm_mtrr mtrr_state;
         u64 pat;
@@@ -1409,8 -1410,7 +1410,8 @@@ struct kvm_x86_ops 
         void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
         void (*set_virtual_apic_mode)(struct kvm_vcpu *vcpu);
         void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu);
- -      int (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
+ +      void (*deliver_interrupt)(struct kvm_lapic *apic, int delivery_mode,
+ +                                int trig_mode, int vector);
         int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
         int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
         int (*set_identity_map_addr)(struct kvm *kvm, u64 ident_addr);
@@@ -1520,6 -1520,7 +1521,7 @@@ struct kvm_x86_init_ops 
         int (*disabled_by_bios)(void);
         int (*check_processor_compatibility)(void);
         int (*hardware_setup)(void);
+       unsigned int (*handle_intel_pt_intr)(void);
   
         struct kvm_x86_ops *runtime_ops;
   };
@@@ -1569,6 -1570,9 +1571,9 @@@ static inline int kvm_arch_flush_remote
                 return -ENOTSUPP;
   }
   
+ #define kvm_arch_pmi_in_guest(vcpu) \
+       ((vcpu) && (vcpu)->arch.handling_intr_from_guest)
+ 
   int kvm_mmu_module_init(void);
   void kvm_mmu_module_exit(void);
   
@@@ -1899,8 -1903,6 +1904,6 @@@ int kvm_skip_emulated_instruction(struc
   int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err);
   void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu);
   
- int kvm_is_in_guest(void);
- 
   void __user *__x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa,
                                      u32 size);
   bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu);
diff --combined arch/x86/kvm/svm/svm.c

index ea2f7f3614aff6a6df17cb93a4cb4c0de830617b,6d97629655e3d03439c0c030aeb3cbfe8061644a..a290efb272ad1641f36c33b8b6bbca22bdf89a10
--- 1/arch/x86/kvm/svm/svm.c
--- 2/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@@ -3291,21 -3291,6 +3291,21 @@@ static void svm_set_irq(struct kvm_vcp
                 SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR;
   }
   
+ +static void svm_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode,
+ +                                int trig_mode, int vector)
+ +{
+ +      struct kvm_vcpu *vcpu = apic->vcpu;
+ +
+ +      if (svm_deliver_avic_intr(vcpu, vector)) {
+ +              kvm_lapic_set_irr(vector, apic);
+ +              kvm_make_request(KVM_REQ_EVENT, vcpu);
+ +              kvm_vcpu_kick(vcpu);
+ +      } else {
+ +              trace_kvm_apicv_accept_irq(vcpu->vcpu_id, delivery_mode,
+ +                                         trig_mode, vector);
+ +      }
+ +}
+ +
   static void svm_update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
   {
         struct vcpu_svm *svm = to_svm(vcpu);
@@@ -3630,7 -3615,7 +3630,7 @@@ static noinstr void svm_vcpu_enter_exit
         struct vcpu_svm *svm = to_svm(vcpu);
         unsigned long vmcb_pa = svm->current_vmcb->pa;
   
- -      kvm_guest_enter_irqoff();
+ +      guest_state_enter_irqoff();
   
         if (sev_es_guest(vcpu->kvm)) {
                 __svm_sev_es_vcpu_run(vmcb_pa);
@@@ -3650,7 -3635,7 +3650,7 @@@
                 vmload(__sme_page_pa(sd->save_area));
         }
   
- -      kvm_guest_exit_irqoff();
+ +      guest_state_exit_irqoff();
   }
   
   static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
@@@ -3750,7 -3735,7 +3750,7 @@@
         vcpu->arch.regs_dirty = 0;
   
         if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
-               kvm_before_interrupt(vcpu);
+               kvm_before_interrupt(vcpu, KVM_HANDLING_NMI);
   
         kvm_load_host_xsave_state(vcpu);
         stgi();
@@@ -4560,7 -4545,7 +4560,7 @@@ static struct kvm_x86_ops svm_x86_ops _
         .pmu_ops = &amd_pmu_ops,
         .nested_ops = &svm_nested_ops,
   
- -      .deliver_posted_interrupt = svm_deliver_avic_intr,
+ +      .deliver_interrupt = svm_deliver_interrupt,
         .dy_apicv_has_pending_interrupt = svm_dy_apicv_has_pending_interrupt,
         .update_pi_irte = svm_update_pi_irte,
         .setup_mce = svm_setup_mce,
diff --combined arch/x86/kvm/vmx/vmx.c

index c0c256c33d2126e90f405d2d2369196e330eff61,aca3ae2a02f34dd1a30c078803a98878a856f865..6c27bd0c89e1e613782c85637634e6215928ac24
--- 1/arch/x86/kvm/vmx/vmx.c
--- 2/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@@ -4041,21 -4041,6 +4041,21 @@@ static int vmx_deliver_posted_interrupt
         return 0;
   }
   
+ +static void vmx_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode,
+ +                                int trig_mode, int vector)
+ +{
+ +      struct kvm_vcpu *vcpu = apic->vcpu;
+ +
+ +      if (vmx_deliver_posted_interrupt(vcpu, vector)) {
+ +              kvm_lapic_set_irr(vector, apic);
+ +              kvm_make_request(KVM_REQ_EVENT, vcpu);
+ +              kvm_vcpu_kick(vcpu);
+ +      } else {
+ +              trace_kvm_apicv_accept_irq(vcpu->vcpu_id, delivery_mode,
+ +                                         trig_mode, vector);
+ +      }
+ +}
+ +
   /*
    * Set up the vmcs's constant host-state fields, i.e., host-state fields that
    * will not change in the lifetime of the guest.
@@@ -6495,7 -6480,9 +6495,9 @@@ void vmx_do_interrupt_nmi_irqoff(unsign
   static void handle_interrupt_nmi_irqoff(struct kvm_vcpu *vcpu,
                                         unsigned long entry)
   {
-       kvm_before_interrupt(vcpu);
+       bool is_nmi = entry == (unsigned long)asm_exc_nmi_noist;
+ 
+       kvm_before_interrupt(vcpu, is_nmi ? KVM_HANDLING_NMI : KVM_HANDLING_IRQ);
         vmx_do_interrupt_nmi_irqoff(entry);
         kvm_after_interrupt(vcpu);
   }
@@@ -6767,7 -6754,7 +6769,7 @@@ static fastpath_t vmx_exit_handlers_fas
   static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
                                         struct vcpu_vmx *vmx)
   {
- -      kvm_guest_enter_irqoff();
+ +      guest_state_enter_irqoff();
   
         /* L1D Flush includes CPU buffer clear to mitigate MDS */
         if (static_branch_unlikely(&vmx_l1d_should_flush))
@@@ -6783,7 -6770,7 +6785,7 @@@
   
         vcpu->arch.cr2 = native_read_cr2();
   
- -      kvm_guest_exit_irqoff();
+ +      guest_state_exit_irqoff();
   }
   
   static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
@@@ -7781,7 -7768,7 +7783,7 @@@ static struct kvm_x86_ops vmx_x86_ops _
         .hwapic_isr_update = vmx_hwapic_isr_update,
         .guest_apic_has_interrupt = vmx_guest_apic_has_interrupt,
         .sync_pir_to_irr = vmx_sync_pir_to_irr,
- -      .deliver_posted_interrupt = vmx_deliver_posted_interrupt,
+ +      .deliver_interrupt = vmx_deliver_interrupt,
         .dy_apicv_has_pending_interrupt = pi_has_pending_interrupt,
   
         .set_tss_addr = vmx_set_tss_addr,
@@@ -7839,6 -7826,20 +7841,20 @@@
         .vcpu_deliver_sipi_vector = kvm_vcpu_deliver_sipi_vector,
   };
   
+ static unsigned int vmx_handle_intel_pt_intr(void)
+ {
+       struct kvm_vcpu *vcpu = kvm_get_running_vcpu();
+ 
+       /* '0' on failure so that the !PT case can use a RET0 static call. */
+       if (!kvm_arch_pmi_in_guest(vcpu))
+               return 0;
+ 
+       kvm_make_request(KVM_REQ_PMI, vcpu);
+       __set_bit(MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT,
+                 (unsigned long *)&vcpu->arch.pmu.global_status);
+       return 1;
+ }
+ 
   static __init void vmx_setup_user_return_msrs(void)
   {
   
@@@ -7865,6 -7866,8 +7881,8 @@@
                 kvm_add_user_return_msr(vmx_uret_msrs_list[i]);
   }
   
+ static struct kvm_x86_init_ops vmx_init_ops __initdata;
+ 
   static __init int hardware_setup(void)
   {
         unsigned long host_bndcfgs;
@@@ -8015,6 -8018,10 +8033,10 @@@
                 return -EINVAL;
         if (!enable_ept || !cpu_has_vmx_intel_pt())
                 pt_mode = PT_MODE_SYSTEM;
+       if (pt_mode == PT_MODE_HOST_GUEST)
+               vmx_init_ops.handle_intel_pt_intr = vmx_handle_intel_pt_intr;
+       else
+               vmx_init_ops.handle_intel_pt_intr = NULL;
   
         setup_default_sgx_lepubkeyhash();
   
@@@ -8043,6 -8050,7 +8065,7 @@@ static struct kvm_x86_init_ops vmx_init
         .disabled_by_bios = vmx_disabled_by_bios,
         .check_processor_compatibility = vmx_check_processor_compat,
         .hardware_setup = hardware_setup,
+       .handle_intel_pt_intr = NULL,
   
         .runtime_ops = &vmx_x86_ops,
   };
diff --combined arch/x86/kvm/x86.c

index b533aab9817281d51f8f9524efe16343674204dd,74b53a16f38a72062a123dd6ce04abd84a8c7264..7131d735b1ef3fb888beb9144795b0f312923a05
--- 1/arch/x86/kvm/x86.c
--- 2/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@@ -90,8 -90,6 +90,8 @@@
   u64 __read_mostly kvm_mce_cap_supported = MCG_CTL_P | MCG_SER_P;
   EXPORT_SYMBOL_GPL(kvm_mce_cap_supported);
   
+ +#define  ERR_PTR_USR(e)  ((void __user *)ERR_PTR(e))
+ +
   #define emul_to_vcpu(ctxt) \
         ((struct kvm_vcpu *)(ctxt)->vcpu)
   
@@@ -4342,7 -4340,7 +4342,7 @@@ static inline void __user *kvm_get_attr
         void __user *uaddr = (void __user*)(unsigned long)attr->addr;
   
         if ((u64)(unsigned long)uaddr != attr->addr)
- -              return ERR_PTR(-EFAULT);
+ +              return ERR_PTR_USR(-EFAULT);
         return uaddr;
   }
   
@@@ -8722,50 -8720,6 +8722,6 @@@ static void kvm_timer_init(void
                           kvmclock_cpu_online, kvmclock_cpu_down_prep);
   }
   
- DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu);
- EXPORT_PER_CPU_SYMBOL_GPL(current_vcpu);
- 
- int kvm_is_in_guest(void)
- {
-       return __this_cpu_read(current_vcpu) != NULL;
- }
- 
- static int kvm_is_user_mode(void)
- {
-       int user_mode = 3;
- 
-       if (__this_cpu_read(current_vcpu))
-               user_mode = static_call(kvm_x86_get_cpl)(__this_cpu_read(current_vcpu));
- 
-       return user_mode != 0;
- }
- 
- static unsigned long kvm_get_guest_ip(void)
- {
-       unsigned long ip = 0;
- 
-       if (__this_cpu_read(current_vcpu))
-               ip = kvm_rip_read(__this_cpu_read(current_vcpu));
- 
-       return ip;
- }
- 
- static void kvm_handle_intel_pt_intr(void)
- {
-       struct kvm_vcpu *vcpu = __this_cpu_read(current_vcpu);
- 
-       kvm_make_request(KVM_REQ_PMI, vcpu);
-       __set_bit(MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT,
-                       (unsigned long *)&vcpu->arch.pmu.global_status);
- }
- 
- static struct perf_guest_info_callbacks kvm_guest_cbs = {
-       .is_in_guest            = kvm_is_in_guest,
-       .is_user_mode           = kvm_is_user_mode,
-       .get_guest_ip           = kvm_get_guest_ip,
-       .handle_intel_pt_intr   = kvm_handle_intel_pt_intr,
- };
- 
   #ifdef CONFIG_X86_64
   static void pvclock_gtod_update_fn(struct work_struct *work)
   {
@@@ -8878,8 -8832,6 +8834,6 @@@ int kvm_arch_init(void *opaque
   
         kvm_timer_init();
   
-       perf_register_guest_info_callbacks(&kvm_guest_cbs);
- 
         if (boot_cpu_has(X86_FEATURE_XSAVE)) {
                 host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
                 supported_xcr0 = host_xcr0 & KVM_SUPPORTED_XCR0;
@@@ -8911,7 -8863,6 +8865,6 @@@ void kvm_arch_exit(void
                 clear_hv_tscchange_cb();
   #endif
         kvm_lapic_exit();
-       perf_unregister_guest_info_callbacks(&kvm_guest_cbs);
   
         if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
                 cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block,
@@@ -10090,8 -10041,6 +10043,8 @@@ static int vcpu_enter_guest(struct kvm_
                 set_debugreg(0, 7);
         }
   
+ +      guest_timing_enter_irqoff();
+ +
         for (;;) {
                 /*
                  * Assert that vCPU vs. VM APICv state is consistent.  An APICv
@@@ -10163,7 -10112,7 +10116,7 @@@
          * interrupts on processors that implement an interrupt shadow, the
          * stat.exits increment will do nicely.
          */
-       kvm_before_interrupt(vcpu);
+       kvm_before_interrupt(vcpu, KVM_HANDLING_IRQ);
         local_irq_enable();
         ++vcpu->stat.exits;
         local_irq_disable();
@@@ -10176,7 -10125,7 +10129,7 @@@
          * of accounting via context tracking, but the loss of accuracy is
          * acceptable for all known use cases.
          */
- -      vtime_account_guest_exit();
+ +      guest_timing_exit_irqoff();
   
         if (lapic_in_kernel(vcpu)) {
                 s64 delta = vcpu->arch.apic->lapic_timer.advance_expire_delta;
@@@ -11522,6 -11471,8 +11475,8 @@@ int kvm_arch_hardware_setup(void *opaqu
         memcpy(&kvm_x86_ops, ops->runtime_ops, sizeof(kvm_x86_ops));
         kvm_ops_static_call_update();
   
+       kvm_register_perf_callbacks(ops->handle_intel_pt_intr);
+ 
         if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES))
                 supported_xss = 0;
   
@@@ -11549,6 -11500,8 +11504,8 @@@
   
   void kvm_arch_hardware_unsetup(void)
   {
+       kvm_unregister_perf_callbacks();
+ 
         static_call(kvm_x86_hardware_unsetup)();
   }
   
@@@ -11686,6 -11639,8 +11643,6 @@@ void kvm_arch_sync_events(struct kvm *k
         kvm_free_pit(kvm);
   }
   
- -#define  ERR_PTR_USR(e)  ((void __user *)ERR_PTR(e))
- -
   /**
    * __x86_set_memory_region: Setup KVM internal memory slot
    *
@@@ -12140,6 -12095,11 +12097,11 @@@ bool kvm_arch_vcpu_in_kernel(struct kvm
         return vcpu->arch.preempted_in_kernel;
   }
   
+ unsigned long kvm_arch_vcpu_get_ip(struct kvm_vcpu *vcpu)
+ {
+       return kvm_rip_read(vcpu);
+ }
+ 
   int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
   {
         return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
diff --combined arch/x86/kvm/x86.h

index 20c7a1fb90bb0d07199f7637d3b7f06c689a323b,635b75f9e14540aff2aceb6e4b1c5c3221c444ab..767ec7f9951608f984b4ac69a1c3205ce0a93ebc
--- 1/arch/x86/kvm/x86.h
--- 2/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@@ -10,6 -10,51 +10,6 @@@
   
   void kvm_spurious_fault(void);
   
- -static __always_inline void kvm_guest_enter_irqoff(void)
- -{
- -      /*
- -       * VMENTER enables interrupts (host state), but the kernel state is
- -       * interrupts disabled when this is invoked. Also tell RCU about
- -       * it. This is the same logic as for exit_to_user_mode().
- -       *
- -       * This ensures that e.g. latency analysis on the host observes
- -       * guest mode as interrupt enabled.
- -       *
- -       * guest_enter_irqoff() informs context tracking about the
- -       * transition to guest mode and if enabled adjusts RCU state
- -       * accordingly.
- -       */
- -      instrumentation_begin();
- -      trace_hardirqs_on_prepare();
- -      lockdep_hardirqs_on_prepare(CALLER_ADDR0);
- -      instrumentation_end();
- -
- -      guest_enter_irqoff();
- -      lockdep_hardirqs_on(CALLER_ADDR0);
- -}
- -
- -static __always_inline void kvm_guest_exit_irqoff(void)
- -{
- -      /*
- -       * VMEXIT disables interrupts (host state), but tracing and lockdep
- -       * have them in state 'on' as recorded before entering guest mode.
- -       * Same as enter_from_user_mode().
- -       *
- -       * context_tracking_guest_exit() restores host context and reinstates
- -       * RCU if enabled and required.
- -       *
- -       * This needs to be done immediately after VM-Exit, before any code
- -       * that might contain tracepoints or call out to the greater world,
- -       * e.g. before x86_spec_ctrl_restore_host().
- -       */
- -      lockdep_hardirqs_off(CALLER_ADDR0);
- -      context_tracking_guest_exit();
- -
- -      instrumentation_begin();
- -      trace_hardirqs_off_finish();
- -      instrumentation_end();
- -}
- -
   #define KVM_NESTED_VMENTER_CONSISTENCY_CHECK(consistency_check)               \
   ({                                                                    \
         bool failed = (consistency_check);                              \
@@@ -347,18 -392,27 +347,27 @@@ static inline bool kvm_cstate_in_guest(
         return kvm->arch.cstate_in_guest;
   }
   
- DECLARE_PER_CPU(struct kvm_vcpu *, current_vcpu);
+ enum kvm_intr_type {
+       /* Values are arbitrary, but must be non-zero. */
+       KVM_HANDLING_IRQ = 1,
+       KVM_HANDLING_NMI,
+ };
   
- static inline void kvm_before_interrupt(struct kvm_vcpu *vcpu)
+ static inline void kvm_before_interrupt(struct kvm_vcpu *vcpu,
+                                       enum kvm_intr_type intr)
   {
-       __this_cpu_write(current_vcpu, vcpu);
+       WRITE_ONCE(vcpu->arch.handling_intr_from_guest, (u8)intr);
   }
   
   static inline void kvm_after_interrupt(struct kvm_vcpu *vcpu)
   {
-       __this_cpu_write(current_vcpu, NULL);
+       WRITE_ONCE(vcpu->arch.handling_intr_from_guest, 0);
   }
   
+ static inline bool kvm_handling_nmi_from_guest(struct kvm_vcpu *vcpu)
+ {
+       return vcpu->arch.handling_intr_from_guest == KVM_HANDLING_NMI;
+ }
   
   static inline bool kvm_pat_valid(u64 data)
   {
diff --combined include/linux/kvm_host.h

index b3810976a27f82d730444c5c7a0e4064ebff50a8,06912d6b39d051013b731ae04b3b26c9f68a5efb..f11039944c08ffd7d5bfe6675f95c8b9f2d3dd86
--- 1/include/linux/kvm_host.h
--- 2/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@@ -29,9 -29,7 +29,9 @@@
   #include <linux/refcount.h>
   #include <linux/nospec.h>
   #include <linux/notifier.h>
+ +#include <linux/ftrace.h>
   #include <linux/hashtable.h>
+ +#include <linux/instrumentation.h>
   #include <linux/interval_tree.h>
   #include <linux/rbtree.h>
   #include <linux/xarray.h>
@@@ -370,11 -368,8 +370,11 @@@ struct kvm_vcpu 
         u64 last_used_slot_gen;
   };
   
- -/* must be called with irqs disabled */
- -static __always_inline void guest_enter_irqoff(void)
+ +/*
+ + * Start accounting time towards a guest.
+ + * Must be called before entering guest context.
+ + */
+ +static __always_inline void guest_timing_enter_irqoff(void)
   {
         /*
          * This is running in ioctl context so its safe to assume that it's the
@@@ -383,18 -378,7 +383,18 @@@
         instrumentation_begin();
         vtime_account_guest_enter();
         instrumentation_end();
+ +}
   
+ +/*
+ + * Enter guest context and enter an RCU extended quiescent state.
+ + *
+ + * Between guest_context_enter_irqoff() and guest_context_exit_irqoff() it is
+ + * unsafe to use any code which may directly or indirectly use RCU, tracing
+ + * (including IRQ flag tracing), or lockdep. All code in this period must be
+ + * non-instrumentable.
+ + */
+ +static __always_inline void guest_context_enter_irqoff(void)
+ +{
         /*
          * KVM does not hold any references to rcu protected data when it
          * switches CPU into a guest mode. In fact switching to a guest mode
@@@ -410,79 -394,16 +410,79 @@@
         }
   }
   
- -static __always_inline void guest_exit_irqoff(void)
+ +/*
+ + * Deprecated. Architectures should move to guest_timing_enter_irqoff() and
+ + * guest_state_enter_irqoff().
+ + */
+ +static __always_inline void guest_enter_irqoff(void)
+ +{
+ +      guest_timing_enter_irqoff();
+ +      guest_context_enter_irqoff();
+ +}
+ +
+ +/**
+ + * guest_state_enter_irqoff - Fixup state when entering a guest
+ + *
+ + * Entry to a guest will enable interrupts, but the kernel state is interrupts
+ + * disabled when this is invoked. Also tell RCU about it.
+ + *
+ + * 1) Trace interrupts on state
+ + * 2) Invoke context tracking if enabled to adjust RCU state
+ + * 3) Tell lockdep that interrupts are enabled
+ + *
+ + * Invoked from architecture specific code before entering a guest.
+ + * Must be called with interrupts disabled and the caller must be
+ + * non-instrumentable.
+ + * The caller has to invoke guest_timing_enter_irqoff() before this.
+ + *
+ + * Note: this is analogous to exit_to_user_mode().
+ + */
+ +static __always_inline void guest_state_enter_irqoff(void)
+ +{
+ +      instrumentation_begin();
+ +      trace_hardirqs_on_prepare();
+ +      lockdep_hardirqs_on_prepare(CALLER_ADDR0);
+ +      instrumentation_end();
+ +
+ +      guest_context_enter_irqoff();
+ +      lockdep_hardirqs_on(CALLER_ADDR0);
+ +}
+ +
+ +/*
+ + * Exit guest context and exit an RCU extended quiescent state.
+ + *
+ + * Between guest_context_enter_irqoff() and guest_context_exit_irqoff() it is
+ + * unsafe to use any code which may directly or indirectly use RCU, tracing
+ + * (including IRQ flag tracing), or lockdep. All code in this period must be
+ + * non-instrumentable.
+ + */
+ +static __always_inline void guest_context_exit_irqoff(void)
   {
         context_tracking_guest_exit();
+ +}
   
+ +/*
+ + * Stop accounting time towards a guest.
+ + * Must be called after exiting guest context.
+ + */
+ +static __always_inline void guest_timing_exit_irqoff(void)
+ +{
         instrumentation_begin();
         /* Flush the guest cputime we spent on the guest */
         vtime_account_guest_exit();
         instrumentation_end();
   }
   
+ +/*
+ + * Deprecated. Architectures should move to guest_state_exit_irqoff() and
+ + * guest_timing_exit_irqoff().
+ + */
+ +static __always_inline void guest_exit_irqoff(void)
+ +{
+ +      guest_context_exit_irqoff();
+ +      guest_timing_exit_irqoff();
+ +}
+ +
   static inline void guest_exit(void)
   {
         unsigned long flags;
@@@ -492,33 -413,6 +492,33 @@@
         local_irq_restore(flags);
   }
   
+ +/**
+ + * guest_state_exit_irqoff - Establish state when returning from guest mode
+ + *
+ + * Entry from a guest disables interrupts, but guest mode is traced as
+ + * interrupts enabled. Also with NO_HZ_FULL RCU might be idle.
+ + *
+ + * 1) Tell lockdep that interrupts are disabled
+ + * 2) Invoke context tracking if enabled to reactivate RCU
+ + * 3) Trace interrupts off state
+ + *
+ + * Invoked from architecture specific code after exiting a guest.
+ + * Must be invoked with interrupts disabled and the caller must be
+ + * non-instrumentable.
+ + * The caller has to invoke guest_timing_exit_irqoff() after this.
+ + *
+ + * Note: this is analogous to enter_from_user_mode().
+ + */
+ +static __always_inline void guest_state_exit_irqoff(void)
+ +{
+ +      lockdep_hardirqs_off(CALLER_ADDR0);
+ +      guest_context_exit_irqoff();
+ +
+ +      instrumentation_begin();
+ +      trace_hardirqs_off_finish();
+ +      instrumentation_end();
+ +}
+ +
   static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu)
   {
         /*
@@@ -1527,6 -1421,16 +1527,16 @@@ static inline bool kvm_arch_intc_initia
   }
   #endif
   
+ #ifdef CONFIG_GUEST_PERF_EVENTS
+ unsigned long kvm_arch_vcpu_get_ip(struct kvm_vcpu *vcpu);
+ 
+ void kvm_register_perf_callbacks(unsigned int (*pt_intr_handler)(void));
+ void kvm_unregister_perf_callbacks(void);
+ #else
+ static inline void kvm_register_perf_callbacks(void *ign) {}
+ static inline void kvm_unregister_perf_callbacks(void) {}
+ #endif /* CONFIG_GUEST_PERF_EVENTS */
+ 
   int kvm_arch_init_vm(struct kvm *kvm, unsigned long type);
   void kvm_arch_destroy_vm(struct kvm *kvm);
   void kvm_arch_sync_events(struct kvm *kvm);
author	Paolo Bonzini <pbonzini@redhat.com>
	Sat, 5 Feb 2022 05:58:25 +0000 (00:58 -0500)
committer	Paolo Bonzini <pbonzini@redhat.com>
	Sat, 5 Feb 2022 05:58:25 +0000 (00:58 -0500)
		1	2
arch/arm64/kvm/arm.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/include/asm/kvm_host.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/svm/svm.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/vmx/vmx.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/x86.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/x86.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/kvm_host.h	patch \|	diff1 \|	diff2 \|	blob \| history