KVM: PPC: Book3S HV: Speed up wakeups of CPUs on HV KVM
authorBenjamin Herrenschmidt <benh@kernel.crashing.org>
Wed, 17 Apr 2013 20:30:50 +0000 (20:30 +0000)
committerAlexander Graf <agraf@suse.de>
Fri, 26 Apr 2013 18:27:31 +0000 (20:27 +0200)
Currently, we wake up a CPU by sending a host IPI with
smp_send_reschedule() to thread 0 of that core, which will take all
threads out of the guest, and cause them to re-evaluate their
interrupt status on the way back in.

This adds a mechanism to differentiate real host IPIs from IPIs sent
by KVM for guest threads to poke each other, in order to target the
guest threads precisely when possible and avoid that global switch of
the core to host state.

We then use this new facility in the in-kernel XICS code.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
arch/powerpc/include/asm/kvm_book3s_asm.h
arch/powerpc/include/asm/kvm_ppc.h
arch/powerpc/kernel/asm-offsets.c
arch/powerpc/kvm/book3s_hv.c
arch/powerpc/kvm/book3s_hv_rmhandlers.S
arch/powerpc/kvm/book3s_xics.c
arch/powerpc/sysdev/xics/icp-native.c

index cdc3d2717cc6e0feb9858e2cf1502958e8b3d711..9039d3c97eecd2a5324ab759c7511d448ed1a49c 100644 (file)
 #ifndef __ASM_KVM_BOOK3S_ASM_H__
 #define __ASM_KVM_BOOK3S_ASM_H__
 
+/* XICS ICP register offsets */
+#define XICS_XIRR              4
+#define XICS_MFRR              0xc
+#define XICS_IPI               2       /* interrupt source # for IPIs */
+
 #ifdef __ASSEMBLY__
 
 #ifdef CONFIG_KVM_BOOK3S_HANDLER
@@ -81,10 +86,11 @@ struct kvmppc_host_state {
 #ifdef CONFIG_KVM_BOOK3S_64_HV
        u8 hwthread_req;
        u8 hwthread_state;
-
+       u8 host_ipi;
        struct kvm_vcpu *kvm_vcpu;
        struct kvmppc_vcore *kvm_vcore;
        unsigned long xics_phys;
+       u32 saved_xirr;
        u64 dabr;
        u64 host_mmcr[3];
        u32 host_pmc[8];
index 6582eed321bac0f8c1df4459f13c1ef31caf3943..1589fd8bf0630d0e8bdcf15a9c1bdd5116dbe4da 100644 (file)
@@ -264,6 +264,21 @@ static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
        paca[cpu].kvm_hstate.xics_phys = addr;
 }
 
+static inline u32 kvmppc_get_xics_latch(void)
+{
+       u32 xirr = get_paca()->kvm_hstate.saved_xirr;
+
+       get_paca()->kvm_hstate.saved_xirr = 0;
+
+       return xirr;
+}
+
+static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi)
+{
+       paca[cpu].kvm_hstate.host_ipi = host_ipi;
+}
+
+extern void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu);
 extern void kvm_linear_init(void);
 
 #else
@@ -273,6 +288,18 @@ static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
 static inline void kvm_linear_init(void)
 {}
 
+static inline u32 kvmppc_get_xics_latch(void)
+{
+       return 0;
+}
+
+static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi)
+{}
+
+static inline void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu)
+{
+       kvm_vcpu_kick(vcpu);
+}
 #endif
 
 #ifdef CONFIG_KVM_XICS
@@ -393,4 +420,6 @@ static inline ulong kvmppc_get_ea_indexed(struct kvm_vcpu *vcpu, int ra, int rb)
        return ea;
 }
 
+extern void xics_wake_cpu(int cpu);
+
 #endif /* __POWERPC_KVM_PPC_H__ */
index dbfd5498f4404e0feb353a3076d2466ca66eac1c..a791229329cfa2a8bd713f6ad206122198d635ec 100644 (file)
@@ -574,6 +574,8 @@ int main(void)
        HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu);
        HSTATE_FIELD(HSTATE_KVM_VCORE, kvm_vcore);
        HSTATE_FIELD(HSTATE_XICS_PHYS, xics_phys);
+       HSTATE_FIELD(HSTATE_SAVED_XIRR, saved_xirr);
+       HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi);
        HSTATE_FIELD(HSTATE_MMCR, host_mmcr);
        HSTATE_FIELD(HSTATE_PMC, host_pmc);
        HSTATE_FIELD(HSTATE_PURR, host_purr);
index 82ba00f68b074392a886a9de191fd227297e97cb..16191915e8d0f2f8b5d8662aecd5d6cbc749ee9c 100644 (file)
 static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
 static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
 
+void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu)
+{
+       int me;
+       int cpu = vcpu->cpu;
+       wait_queue_head_t *wqp;
+
+       wqp = kvm_arch_vcpu_wq(vcpu);
+       if (waitqueue_active(wqp)) {
+               wake_up_interruptible(wqp);
+               ++vcpu->stat.halt_wakeup;
+       }
+
+       me = get_cpu();
+
+       /* CPU points to the first thread of the core */
+       if (cpu != me && cpu >= 0 && cpu < nr_cpu_ids) {
+               int real_cpu = cpu + vcpu->arch.ptid;
+               if (paca[real_cpu].kvm_hstate.xics_phys)
+                       xics_wake_cpu(real_cpu);
+               else if (cpu_online(cpu))
+                       smp_send_reschedule(cpu);
+       }
+       put_cpu();
+}
+
 /*
  * We use the vcpu_load/put functions to measure stolen time.
  * Stolen time is counted as time when either the vcpu is able to
@@ -985,7 +1010,6 @@ static void kvmppc_end_cede(struct kvm_vcpu *vcpu)
 }
 
 extern int __kvmppc_vcore_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
-extern void xics_wake_cpu(int cpu);
 
 static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
                                   struct kvm_vcpu *vcpu)
index 0f23bb851711aa5c98fbebaecec6b3196622a05f..56f8927b0ddf5a3d03c3016bbcd16a845fe1cf3b 100644 (file)
@@ -79,10 +79,6 @@ _GLOBAL(kvmppc_hv_entry_trampoline)
  *                                                                            *
  *****************************************************************************/
 
-#define XICS_XIRR              4
-#define XICS_QIRR              0xc
-#define XICS_IPI               2       /* interrupt source # for IPIs */
-
 /*
  * We come in here when wakened from nap mode on a secondary hw thread.
  * Relocation is off and most register values are lost.
@@ -122,7 +118,7 @@ kvm_start_guest:
        beq     27f
 25:    ld      r5,HSTATE_XICS_PHYS(r13)
        li      r0,0xff
-       li      r6,XICS_QIRR
+       li      r6,XICS_MFRR
        li      r7,XICS_XIRR
        lwzcix  r8,r5,r7                /* get and ack the interrupt */
        sync
@@ -678,17 +674,91 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
        cmpwi   r12,BOOK3S_INTERRUPT_SYSCALL
        beq     hcall_try_real_mode
 
-       /* Check for mediated interrupts (could be done earlier really ...) */
+       /* Only handle external interrupts here on arch 206 and later */
 BEGIN_FTR_SECTION
-       cmpwi   r12,BOOK3S_INTERRUPT_EXTERNAL
-       bne+    1f
-       andi.   r0,r11,MSR_EE
-       beq     1f
-       mfspr   r5,SPRN_LPCR
-       andi.   r0,r5,LPCR_MER
+       b       ext_interrupt_to_host
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)
+
+       /* External interrupt ? */
+       cmpwi   r12, BOOK3S_INTERRUPT_EXTERNAL
+       bne+    ext_interrupt_to_host
+
+       /* External interrupt, first check for host_ipi. If this is
+        * set, we know the host wants us out so let's do it now
+        */
+       lbz     r0, HSTATE_HOST_IPI(r13)
+       cmpwi   r0, 0
+       bne     ext_interrupt_to_host
+
+       /* Now read the interrupt from the ICP */
+       ld      r5, HSTATE_XICS_PHYS(r13)
+       li      r7, XICS_XIRR
+       cmpdi   r5, 0
+       beq-    ext_interrupt_to_host
+       lwzcix  r3, r5, r7
+       rlwinm. r0, r3, 0, 0xffffff
+       sync
+       bne     1f
+
+       /* Nothing pending in the ICP, check for mediated interrupts
+        * and bounce it to the guest
+        */
+       andi.   r0, r11, MSR_EE
+       beq     ext_interrupt_to_host /* shouldn't happen ?? */
+       mfspr   r5, SPRN_LPCR
+       andi.   r0, r5, LPCR_MER
        bne     bounce_ext_interrupt
-1:
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
+       b       ext_interrupt_to_host /* shouldn't happen ?? */
+
+1:     /* We found something in the ICP...
+        *
+        * If it's not an IPI, stash it in the PACA and return to
+        * the host, we don't (yet) handle directing real external
+        * interrupts directly to the guest
+        */
+       cmpwi   r0, XICS_IPI
+       bne     ext_stash_for_host
+
+       /* It's an IPI, clear the MFRR and EOI it */
+       li      r0, 0xff
+       li      r6, XICS_MFRR
+       stbcix  r0, r5, r6              /* clear the IPI */
+       stwcix  r3, r5, r7              /* EOI it */
+       sync
+
+       /* We need to re-check host IPI now in case it got set in the
+        * meantime. If it's clear, we bounce the interrupt to the
+        * guest
+        */
+       lbz     r0, HSTATE_HOST_IPI(r13)
+       cmpwi   r0, 0
+       bne-    1f
+
+       /* Allright, looks like an IPI for the guest, we need to set MER */
+       mfspr   r8,SPRN_LPCR
+       ori     r8,r8,LPCR_MER
+       mtspr   SPRN_LPCR,r8
+
+       /* And if the guest EE is set, we can deliver immediately, else
+        * we return to the guest with MER set
+        */
+       andi.   r0, r11, MSR_EE
+       bne     bounce_ext_interrupt
+       mr      r4, r9
+       b       fast_guest_return
+
+       /* We raced with the host, we need to resend that IPI, bummer */
+1:     li      r0, IPI_PRIORITY
+       stbcix  r0, r5, r6              /* set the IPI */
+       sync
+       b       ext_interrupt_to_host
+
+ext_stash_for_host:
+       /* It's not an IPI and it's for the host, stash it in the PACA
+        * before exit, it will be picked up by the host ICP driver
+        */
+       stw     r3, HSTATE_SAVED_XIRR(r13)
+ext_interrupt_to_host:
 
 guest_exit_cont:               /* r9 = vcpu, r12 = trap, r13 = paca */
        /* Save DEC */
@@ -831,7 +901,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
        beq     44f
        ld      r8,HSTATE_XICS_PHYS(r6) /* get thread's XICS reg addr */
        li      r0,IPI_PRIORITY
-       li      r7,XICS_QIRR
+       li      r7,XICS_MFRR
        stbcix  r0,r7,r8                /* trigger the IPI */
 44:    srdi.   r3,r3,1
        addi    r6,r6,PACA_SIZE
@@ -1630,7 +1700,7 @@ secondary_nap:
        beq     37f
        sync
        li      r0, 0xff
-       li      r6, XICS_QIRR
+       li      r6, XICS_MFRR
        stbcix  r0, r5, r6              /* clear the IPI */
        stwcix  r3, r5, r7              /* EOI it */
 37:    sync
index 53af848116f21234f40022be86548e93eb978d6e..1417e65b6bbd1d0419898376af72335c044481df 100644 (file)
@@ -227,7 +227,7 @@ static inline bool icp_try_update(struct kvmppc_icp *icp,
                kvmppc_book3s_queue_irqprio(icp->vcpu,
                                            BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
                if (!change_self)
-                       kvm_vcpu_kick(icp->vcpu);
+                       kvmppc_fast_vcpu_kick(icp->vcpu);
        }
  bail:
        return success;
index 48861d3fcd070cbc56f4e4189e4396f7196938c4..20b328bb494d7e16f0d646777d9a1b9ea37e7a57 100644 (file)
@@ -51,6 +51,12 @@ static struct icp_ipl __iomem *icp_native_regs[NR_CPUS];
 static inline unsigned int icp_native_get_xirr(void)
 {
        int cpu = smp_processor_id();
+       unsigned int xirr;
+
+       /* Handled an interrupt latched by KVM */
+       xirr = kvmppc_get_xics_latch();
+       if (xirr)
+               return xirr;
 
        return in_be32(&icp_native_regs[cpu]->xirr.word);
 }
@@ -138,6 +144,7 @@ static unsigned int icp_native_get_irq(void)
 
 static void icp_native_cause_ipi(int cpu, unsigned long data)
 {
+       kvmppc_set_host_ipi(cpu, 1);
        icp_native_set_qirr(cpu, IPI_PRIORITY);
 }
 
@@ -151,6 +158,7 @@ static irqreturn_t icp_native_ipi_action(int irq, void *dev_id)
 {
        int cpu = smp_processor_id();
 
+       kvmppc_set_host_ipi(cpu, 0);
        icp_native_set_qirr(cpu, 0xff);
 
        return smp_ipi_demux();