Merge tag 'kvm-ppc-next-4.16-1' of git://git.kernel.org/pub/scm/linux/kernel/git...

author Radim Krčmář <rkrcmar@redhat.com>

Thu, 1 Feb 2018 15:13:07 +0000 (16:13 +0100)

committer Radim Krčmář <rkrcmar@redhat.com>

Thu, 1 Feb 2018 15:13:07 +0000 (16:13 +0100)
author Radim Krčmář <rkrcmar@redhat.com>
Thu, 1 Feb 2018 15:13:07 +0000 (16:13 +0100)
committer Radim Krčmář <rkrcmar@redhat.com>
Thu, 1 Feb 2018 15:13:07 +0000 (16:13 +0100)
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt

index 70d3368adba9fbfaa143b9e1dd611e5d8cc11bb5..792fa8717d133e1aa7d6c73a8b948d53150e6d78 100644 (file)
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1841,6 +1841,7 @@ registers, find a list below:
    PPC  | KVM_REG_PPC_DBSR              | 32
    PPC   | KVM_REG_PPC_TIDR              | 64
    PPC   | KVM_REG_PPC_PSSCR             | 64
+  PPC   | KVM_REG_PPC_DEC_EXPIRY        | 64
    PPC   | KVM_REG_PPC_TM_GPR0           | 64
            ...
    PPC   | KVM_REG_PPC_TM_GPR31          | 64
diff --git a/arch/powerpc/include/asm/hmi.h b/arch/powerpc/include/asm/hmi.h

index 85b7a1a21e228571df158782f36a79e20728cff9..9c14f7b5c46cd9bcb46a287ec229928c80c5c4ff 100644 (file)
--- a/arch/powerpc/include/asm/hmi.h
+++ b/arch/powerpc/include/asm/hmi.h
@@ -42,4 +42,8 @@ extern void wait_for_tb_resync(void);
  static inline void wait_for_subcore_guest_exit(void) { }
  static inline void wait_for_tb_resync(void) { }
  #endif
+
+struct pt_regs;
+extern long hmi_handle_debugtrig(struct pt_regs *regs);
+
  #endif /* __ASM_PPC64_HMI_H__ */
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h

index 735cfa35298ac73adca40d91ef058215f5600b1e..998f7b7aaa9e5c1e905d5b202d9e2b091fc037b4 100644 (file)
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -122,13 +122,13 @@ static inline int kvmppc_hpte_page_shifts(unsigned long h, unsigned long l)
         lphi = (l >> 16) & 0xf;
         switch ((l >> 12) & 0xf) {
         case 0:
-               return !lphi ? 24 : -1;         /* 16MB */
+               return !lphi ? 24 : 0;          /* 16MB */
                 break;
         case 1:
                 return 16;                      /* 64kB */
                 break;
         case 3:
-               return !lphi ? 34 : -1;         /* 16GB */
+               return !lphi ? 34 : 0;          /* 16GB */
                 break;
         case 7:
                 return (16 << 8) + 12;          /* 64kB in 4kB */
@@ -140,7 +140,7 @@ static inline int kvmppc_hpte_page_shifts(unsigned long h, unsigned long l)
                         return (24 << 8) + 12;  /* 16MB in 4kB */
                 break;
         }
-       return -1;
+       return 0;
  }
  
  static inline int kvmppc_hpte_base_page_shift(unsigned long h, unsigned long l)
@@ -159,7 +159,11 @@ static inline int kvmppc_hpte_actual_page_shift(unsigned long h, unsigned long l
  
  static inline unsigned long kvmppc_actual_pgsz(unsigned long v, unsigned long r)
  {
-       return 1ul << kvmppc_hpte_actual_page_shift(v, r);
+       int shift = kvmppc_hpte_actual_page_shift(v, r);
+
+       if (shift)
+               return 1ul << shift;
+       return 0;
  }
  
  static inline int kvmppc_pgsize_lp_encoding(int base_shift, int actual_shift)
@@ -232,7 +236,7 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
                 va_low ^= v >> (SID_SHIFT_1T - 16);
         va_low &= 0x7ff;
  
-       if (b_pgshift == 12) {
+       if (b_pgshift <= 12) {
                 if (a_pgshift > 12) {
                         sllp = (a_pgshift == 16) ? 5 : 4;
                         rb |= sllp << 5;        /*  AP field */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h

index 3aa5b577cd609cea0c183b1949ec11cc5f2df9b7..fef8133becc85c290d2366fed81dfaffa2f5f7bb 100644 (file)
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -709,6 +709,7 @@ struct kvm_vcpu_arch {
         u8 ceded;
         u8 prodded;
         u8 doorbell_request;
+       u8 irq_pending; /* Used by XIVE to signal pending guest irqs */
         u32 last_inst;
  
         struct swait_queue_head *wqp;
@@ -738,8 +739,11 @@ struct kvm_vcpu_arch {
         struct kvmppc_icp *icp; /* XICS presentation controller */
         struct kvmppc_xive_vcpu *xive_vcpu; /* XIVE virtual CPU data */
         __be32 xive_cam_word;    /* Cooked W2 in proper endian with valid bit */
-       u32 xive_pushed;         /* Is the VP pushed on the physical CPU ? */
+       u8 xive_pushed;          /* Is the VP pushed on the physical CPU ? */
+       u8 xive_esc_on;          /* Is the escalation irq enabled ? */
         union xive_tma_w01 xive_saved_state; /* W0..1 of XIVE thread state */
+       u64 xive_esc_raddr;      /* Escalation interrupt ESB real addr */
+       u64 xive_esc_vaddr;      /* Escalation interrupt ESB virt addr */
  #endif
  
  #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h

index 233c7504b1f20bf036b1131eb0c9db6dcecdafaa..fc926743647ec6f5b79eabadc93d88b2c5a77072 100644 (file)
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -1073,6 +1073,7 @@ enum {
  /* Flags for OPAL_XIVE_GET/SET_VP_INFO */
  enum {
         OPAL_XIVE_VP_ENABLED            = 0x00000001,
+       OPAL_XIVE_VP_SINGLE_ESCALATION  = 0x00000002,
  };
  
  /* "Any chip" replacement for chip ID for allocation functions */
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h

index b779f3ccd4126d2de2ab8303654bef44a6a2af58..14e41b84395279e26123dc58cf0f87fa37e9abc8 100644 (file)
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -432,8 +432,9 @@
  #define SPRN_LPID      0x13F   /* Logical Partition Identifier */
  #endif
  #define   LPID_RSVD    0x3ff           /* Reserved LPID for partn switching */
-#define        SPRN_HMER       0x150   /* Hardware m? error recovery */
-#define        SPRN_HMEER      0x151   /* Hardware m? enable error recovery */
+#define        SPRN_HMER       0x150   /* Hypervisor maintenance exception reg */
+#define   HMER_DEBUG_TRIG      (1ul << (63 - 17)) /* Debug trigger */
+#define        SPRN_HMEER      0x151   /* Hyp maintenance exception enable reg */
  #define SPRN_PCR       0x152   /* Processor compatibility register */
  #define   PCR_VEC_DIS  (1ul << (63-0)) /* Vec. disable (bit NA since POWER8) */
  #define   PCR_VSX_DIS  (1ul << (63-1)) /* VSX disable (bit NA since POWER8) */
diff --git a/arch/powerpc/include/asm/xive-regs.h b/arch/powerpc/include/asm/xive-regs.h

index 1d3f2be5ae39e9044a6dd56457ee146337fd2e3b..fa4288822b681db4337ecc7461ea118d95e36f3f 100644 (file)
--- a/arch/powerpc/include/asm/xive-regs.h
+++ b/arch/powerpc/include/asm/xive-regs.h
@@ -9,6 +9,41 @@
  #ifndef _ASM_POWERPC_XIVE_REGS_H
  #define _ASM_POWERPC_XIVE_REGS_H
  
+/*
+ * "magic" Event State Buffer (ESB) MMIO offsets.
+ *
+ * Each interrupt source has a 2-bit state machine called ESB
+ * which can be controlled by MMIO. It's made of 2 bits, P and
+ * Q. P indicates that an interrupt is pending (has been sent
+ * to a queue and is waiting for an EOI). Q indicates that the
+ * interrupt has been triggered while pending.
+ *
+ * This acts as a coalescing mechanism in order to guarantee
+ * that a given interrupt only occurs at most once in a queue.
+ *
+ * When doing an EOI, the Q bit will indicate if the interrupt
+ * needs to be re-triggered.
+ *
+ * The following offsets into the ESB MMIO allow to read or
+ * manipulate the PQ bits. They must be used with an 8-bytes
+ * load instruction. They all return the previous state of the
+ * interrupt (atomically).
+ *
+ * Additionally, some ESB pages support doing an EOI via a
+ * store at 0 and some ESBs support doing a trigger via a
+ * separate trigger page.
+ */
+#define XIVE_ESB_STORE_EOI     0x400 /* Store */
+#define XIVE_ESB_LOAD_EOI      0x000 /* Load */
+#define XIVE_ESB_GET           0x800 /* Load */
+#define XIVE_ESB_SET_PQ_00     0xc00 /* Load */
+#define XIVE_ESB_SET_PQ_01     0xd00 /* Load */
+#define XIVE_ESB_SET_PQ_10     0xe00 /* Load */
+#define XIVE_ESB_SET_PQ_11     0xf00 /* Load */
+
+#define XIVE_ESB_VAL_P         0x2
+#define XIVE_ESB_VAL_Q         0x1
+
  /*
   * Thread Management (aka "TM") registers
   */
diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h

index 371fbebf1ec9e4c43739e0d44439145b34e791d6..e602903c3029e88c61ca2141901cb68e8d67f5ae 100644 (file)
--- a/arch/powerpc/include/asm/xive.h
+++ b/arch/powerpc/include/asm/xive.h
@@ -58,6 +58,9 @@ struct xive_irq_data {
  #define XIVE_IRQ_FLAG_EOI_FW   0x10
  #define XIVE_IRQ_FLAG_H_INT_ESB        0x20
  
+/* Special flag set by KVM for excalation interrupts */
+#define XIVE_IRQ_NO_EOI                0x80
+
  #define XIVE_INVALID_CHIP_ID   -1
  
  /* A queue tracking structure in a CPU */
@@ -72,41 +75,6 @@ struct xive_q {
         atomic_t                pending_count;
  };
  
-/*
- * "magic" Event State Buffer (ESB) MMIO offsets.
- *
- * Each interrupt source has a 2-bit state machine called ESB
- * which can be controlled by MMIO. It's made of 2 bits, P and
- * Q. P indicates that an interrupt is pending (has been sent
- * to a queue and is waiting for an EOI). Q indicates that the
- * interrupt has been triggered while pending.
- *
- * This acts as a coalescing mechanism in order to guarantee
- * that a given interrupt only occurs at most once in a queue.
- *
- * When doing an EOI, the Q bit will indicate if the interrupt
- * needs to be re-triggered.
- *
- * The following offsets into the ESB MMIO allow to read or
- * manipulate the PQ bits. They must be used with an 8-bytes
- * load instruction. They all return the previous state of the
- * interrupt (atomically).
- *
- * Additionally, some ESB pages support doing an EOI via a
- * store at 0 and some ESBs support doing a trigger via a
- * separate trigger page.
- */
-#define XIVE_ESB_STORE_EOI     0x400 /* Store */
-#define XIVE_ESB_LOAD_EOI      0x000 /* Load */
-#define XIVE_ESB_GET           0x800 /* Load */
-#define XIVE_ESB_SET_PQ_00     0xc00 /* Load */
-#define XIVE_ESB_SET_PQ_01     0xd00 /* Load */
-#define XIVE_ESB_SET_PQ_10     0xe00 /* Load */
-#define XIVE_ESB_SET_PQ_11     0xf00 /* Load */
-
-#define XIVE_ESB_VAL_P         0x2
-#define XIVE_ESB_VAL_Q         0x1
-
  /* Global enable flags for the XIVE support */
  extern bool __xive_enabled;
  
@@ -143,9 +111,10 @@ extern void xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio);
  
  extern void xive_native_sync_source(u32 hw_irq);
  extern bool is_xive_irq(struct irq_chip *chip);
-extern int xive_native_enable_vp(u32 vp_id);
+extern int xive_native_enable_vp(u32 vp_id, bool single_escalation);
  extern int xive_native_disable_vp(u32 vp_id);
  extern int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id);
+extern bool xive_native_has_single_escalation(void);
  
  #else
  
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h

index 637b7263cb867f09618cc2a5e7b525686a0ea267..833ed9a16adfd03e0b6cb70adc19fe03055f7344 100644 (file)
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -632,6 +632,8 @@ struct kvm_ppc_cpu_char {
  #define KVM_REG_PPC_TIDR       (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbc)
  #define KVM_REG_PPC_PSSCR      (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbd)
  
+#define KVM_REG_PPC_DEC_EXPIRY (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbe)
+
  /* Transactional Memory checkpointed state:
   * This is all GPRs, all VSX regs and a subset of SPRs
   */
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c

index f390d57cf2e1a711335bbd66cf7819e9dcd8442f..ff6ce2fd7579434710bf6f1f5d5a15f66ff110ae 100644 (file)
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -519,6 +519,7 @@ int main(void)
         OFFSET(VCPU_PENDING_EXC, kvm_vcpu, arch.pending_exceptions);
         OFFSET(VCPU_CEDED, kvm_vcpu, arch.ceded);
         OFFSET(VCPU_PRODDED, kvm_vcpu, arch.prodded);
+       OFFSET(VCPU_IRQ_PENDING, kvm_vcpu, arch.irq_pending);
         OFFSET(VCPU_DBELL_REQ, kvm_vcpu, arch.doorbell_request);
         OFFSET(VCPU_MMCR, kvm_vcpu, arch.mmcr);
         OFFSET(VCPU_PMC, kvm_vcpu, arch.pmc);
@@ -738,6 +739,9 @@ int main(void)
         DEFINE(VCPU_XIVE_CAM_WORD, offsetof(struct kvm_vcpu,
                                             arch.xive_cam_word));
         DEFINE(VCPU_XIVE_PUSHED, offsetof(struct kvm_vcpu, arch.xive_pushed));
+       DEFINE(VCPU_XIVE_ESC_ON, offsetof(struct kvm_vcpu, arch.xive_esc_on));
+       DEFINE(VCPU_XIVE_ESC_RADDR, offsetof(struct kvm_vcpu, arch.xive_esc_raddr));
+       DEFINE(VCPU_XIVE_ESC_VADDR, offsetof(struct kvm_vcpu, arch.xive_esc_vaddr));
  #endif
  
  #ifdef CONFIG_KVM_EXIT_TIMING
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c

index 742e4658c5dc1d4ec2df625eb82ea34473f24e39..d2fecaec4fec041590d0e96810ce7f69afeb3238 100644 (file)
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -495,37 +495,123 @@ long machine_check_early(struct pt_regs *regs)
         return handled;
  }
  
-long hmi_exception_realmode(struct pt_regs *regs)
+/* Possible meanings for HMER_DEBUG_TRIG bit being set on POWER9 */
+static enum {
+       DTRIG_UNKNOWN,
+       DTRIG_VECTOR_CI,        /* need to emulate vector CI load instr */
+       DTRIG_SUSPEND_ESCAPE,   /* need to escape from TM suspend mode */
+} hmer_debug_trig_function;
+
+static int init_debug_trig_function(void)
  {
-       __this_cpu_inc(irq_stat.hmi_exceptions);
-
-#ifdef CONFIG_PPC_BOOK3S_64
-       /* Workaround for P9 vector CI loads (see p9_hmi_special_emu) */
-       if (pvr_version_is(PVR_POWER9)) {
-               unsigned long hmer = mfspr(SPRN_HMER);
-
-               /* Do we have the debug bit set */
-               if (hmer & PPC_BIT(17)) {
-                       hmer &= ~PPC_BIT(17);
-                       mtspr(SPRN_HMER, hmer);
-
-                       /*
-                        * Now to avoid problems with soft-disable we
-                        * only do the emulation if we are coming from
-                        * user space
-                        */
-                       if (user_mode(regs))
-                               local_paca->hmi_p9_special_emu = 1;
-
-                       /*
-                        * Don't bother going to OPAL if that's the
-                        * only relevant bit.
-                        */
-                       if (!(hmer & mfspr(SPRN_HMEER)))
-                               return local_paca->hmi_p9_special_emu;
+       int pvr;
+       struct device_node *cpun;
+       struct property *prop = NULL;
+       const char *str;
+
+       /* First look in the device tree */
+       preempt_disable();
+       cpun = of_get_cpu_node(smp_processor_id(), NULL);
+       if (cpun) {
+               of_property_for_each_string(cpun, "ibm,hmi-special-triggers",
+                                           prop, str) {
+                       if (strcmp(str, "bit17-vector-ci-load") == 0)
+                               hmer_debug_trig_function = DTRIG_VECTOR_CI;
+                       else if (strcmp(str, "bit17-tm-suspend-escape") == 0)
+                               hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
                 }
+               of_node_put(cpun);
+       }
+       preempt_enable();
+
+       /* If we found the property, don't look at PVR */
+       if (prop)
+               goto out;
+
+       pvr = mfspr(SPRN_PVR);
+       /* Check for POWER9 Nimbus (scale-out) */
+       if ((PVR_VER(pvr) == PVR_POWER9) && (pvr & 0xe000) == 0) {
+               /* DD2.2 and later */
+               if ((pvr & 0xfff) >= 0x202)
+                       hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
+               /* DD2.0 and DD2.1 - used for vector CI load emulation */
+               else if ((pvr & 0xfff) >= 0x200)
+                       hmer_debug_trig_function = DTRIG_VECTOR_CI;
+       }
+
+ out:
+       switch (hmer_debug_trig_function) {
+       case DTRIG_VECTOR_CI:
+               pr_debug("HMI debug trigger used for vector CI load\n");
+               break;
+       case DTRIG_SUSPEND_ESCAPE:
+               pr_debug("HMI debug trigger used for TM suspend escape\n");
+               break;
+       default:
+               break;
         }
-#endif /* CONFIG_PPC_BOOK3S_64 */
+       return 0;
+}
+__initcall(init_debug_trig_function);
+
+/*
+ * Handle HMIs that occur as a result of a debug trigger.
+ * Return values:
+ * -1 means this is not a HMI cause that we know about
+ *  0 means no further handling is required
+ *  1 means further handling is required
+ */
+long hmi_handle_debugtrig(struct pt_regs *regs)
+{
+       unsigned long hmer = mfspr(SPRN_HMER);
+       long ret = 0;
+
+       /* HMER_DEBUG_TRIG bit is used for various workarounds on P9 */
+       if (!((hmer & HMER_DEBUG_TRIG)
+             && hmer_debug_trig_function != DTRIG_UNKNOWN))
+               return -1;
+               
+       hmer &= ~HMER_DEBUG_TRIG;
+       /* HMER is a write-AND register */
+       mtspr(SPRN_HMER, ~HMER_DEBUG_TRIG);
+
+       switch (hmer_debug_trig_function) {
+       case DTRIG_VECTOR_CI:
+               /*
+                * Now to avoid problems with soft-disable we
+                * only do the emulation if we are coming from
+                * host user space
+                */
+               if (regs && user_mode(regs))
+                       ret = local_paca->hmi_p9_special_emu = 1;
+
+               break;
+
+       default:
+               break;
+       }
+
+       /*
+        * See if any other HMI causes remain to be handled
+        */
+       if (hmer & mfspr(SPRN_HMEER))
+               return -1;
+
+       return ret;
+}
+
+/*
+ * Return values:
+ */
+long hmi_exception_realmode(struct pt_regs *regs)
+{      
+       int ret;
+
+       __this_cpu_inc(irq_stat.hmi_exceptions);
+
+       ret = hmi_handle_debugtrig(regs);
+       if (ret >= 0)
+               return ret;
  
         wait_for_subcore_guest_exit();
  
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c

index 58618f644c56bd12efc74f6479d37d7ba539d434..0c854816e653e25238f87c1cf9c44a8ed911df44 100644 (file)
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -573,7 +573,7 @@ long kvmppc_hv_get_dirty_log_radix(struct kvm *kvm,
                 j = i + 1;
                 if (npages) {
                         set_dirty_bits(map, i, npages);
-                       i = j + npages;
+                       j = i + npages;
                 }
         }
         return 0;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c

index 2d46037ce93664199adee27806b8972d9130368d..e5f81fc108e094c100b3d73fc2c702603e692e47 100644 (file)
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -118,6 +118,9 @@ module_param_cb(h_ipi_redirect, &module_param_ops, &h_ipi_redirect,
  MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core");
  #endif
  
+/* If set, the threads on each CPU core have to be in the same MMU mode */
+static bool no_mixing_hpt_and_radix;
+
  static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
  static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
  
@@ -1497,6 +1500,10 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
         case KVM_REG_PPC_ARCH_COMPAT:
                 *val = get_reg_val(id, vcpu->arch.vcore->arch_compat);
                 break;
+       case KVM_REG_PPC_DEC_EXPIRY:
+               *val = get_reg_val(id, vcpu->arch.dec_expires +
+                                  vcpu->arch.vcore->tb_offset);
+               break;
         default:
                 r = -EINVAL;
                 break;
@@ -1724,6 +1731,10 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
         case KVM_REG_PPC_ARCH_COMPAT:
                 r = kvmppc_set_arch_compat(vcpu, set_reg_val(id, *val));
                 break;
+       case KVM_REG_PPC_DEC_EXPIRY:
+               vcpu->arch.dec_expires = set_reg_val(id, *val) -
+                       vcpu->arch.vcore->tb_offset;
+               break;
         default:
                 r = -EINVAL;
                 break;
@@ -2378,8 +2389,8 @@ static void init_core_info(struct core_info *cip, struct kvmppc_vcore *vc)
  static bool subcore_config_ok(int n_subcores, int n_threads)
  {
         /*
-        * POWER9 "SMT4" cores are permanently in what is effectively a 4-way split-core
-        * mode, with one thread per subcore.
+        * POWER9 "SMT4" cores are permanently in what is effectively a 4-way
+        * split-core mode, with one thread per subcore.
          */
         if (cpu_has_feature(CPU_FTR_ARCH_300))
                 return n_subcores <= 4 && n_threads == 1;
@@ -2415,8 +2426,8 @@ static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip)
         if (!cpu_has_feature(CPU_FTR_ARCH_207S))
                 return false;
  
-       /* POWER9 currently requires all threads to be in the same MMU mode */
-       if (cpu_has_feature(CPU_FTR_ARCH_300) &&
+       /* Some POWER9 chips require all threads to be in the same MMU mode */
+       if (no_mixing_hpt_and_radix &&
             kvm_is_radix(vc->kvm) != kvm_is_radix(cip->vc[0]->kvm))
                 return false;
  
@@ -2679,9 +2690,11 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
          * threads are offline.  Also check if the number of threads in this
          * guest are greater than the current system threads per guest.
          * On POWER9, we need to be not in independent-threads mode if
-        * this is a HPT guest on a radix host.
+        * this is a HPT guest on a radix host machine where the
+        * CPU threads may not be in different MMU modes.
          */
-       hpt_on_radix = radix_enabled() && !kvm_is_radix(vc->kvm);
+       hpt_on_radix = no_mixing_hpt_and_radix && radix_enabled() &&
+               !kvm_is_radix(vc->kvm);
         if (((controlled_threads > 1) &&
              ((vc->num_threads > threads_per_subcore) || !on_primary_thread())) ||
             (hpt_on_radix && vc->kvm->arch.threads_indep)) {
@@ -2831,7 +2844,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
                  */
                 if (!thr0_done)
                         kvmppc_start_thread(NULL, pvc);
-               thr += pvc->num_threads;
         }
  
         /*
@@ -2987,7 +2999,7 @@ static inline bool xive_interrupt_pending(struct kvm_vcpu *vcpu)
  {
         if (!xive_enabled())
                 return false;
-       return vcpu->arch.xive_saved_state.pipr <
+       return vcpu->arch.irq_pending || vcpu->arch.xive_saved_state.pipr <
                 vcpu->arch.xive_saved_state.cppr;
  }
  #else
@@ -3176,17 +3188,8 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
          * this thread straight away and have it join in.
          */
         if (!signal_pending(current)) {
-               if (vc->vcore_state == VCORE_PIGGYBACK) {
-                       if (spin_trylock(&vc->lock)) {
-                               if (vc->vcore_state == VCORE_RUNNING &&
-                                   !VCORE_IS_EXITING(vc)) {
-                                       kvmppc_create_dtl_entry(vcpu, vc);
-                                       kvmppc_start_thread(vcpu, vc);
-                                       trace_kvm_guest_enter(vcpu);
-                               }
-                               spin_unlock(&vc->lock);
-                       }
-               } else if (vc->vcore_state == VCORE_RUNNING &&
+               if ((vc->vcore_state == VCORE_PIGGYBACK ||
+                    vc->vcore_state == VCORE_RUNNING) &&
                            !VCORE_IS_EXITING(vc)) {
                         kvmppc_create_dtl_entry(vcpu, vc);
                         kvmppc_start_thread(vcpu, vc);
@@ -4448,6 +4451,19 @@ static int kvmppc_book3s_init_hv(void)
  
         if (kvmppc_radix_possible())
                 r = kvmppc_radix_init();
+
+       /*
+        * POWER9 chips before version 2.02 can't have some threads in
+        * HPT mode and some in radix mode on the same core.
+        */
+       if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+               unsigned int pvr = mfspr(SPRN_PVR);
+               if ((pvr >> 16) == PVR_POWER9 &&
+                   (((pvr & 0xe000) == 0 && (pvr & 0xfff) < 0x202) ||
+                    ((pvr & 0xe000) == 0x2000 && (pvr & 0xfff) < 0x101)))
+                       no_mixing_hpt_and_radix = true;
+       }
+
         return r;
  }
  
diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c

index c356f9a40b244e8715eaabd4d7c5818aba547399..c296343d0dcc6489ade68bafd1919bc5a83ea1c5 100644 (file)
--- a/arch/powerpc/kvm/book3s_hv_ras.c
+++ b/arch/powerpc/kvm/book3s_hv_ras.c
@@ -268,17 +268,19 @@ static void kvmppc_tb_resync_done(void)
   *   secondary threads to proceed.
   * - All secondary threads will eventually call opal hmi handler on
   *   their exit path.
+ *
+ * Returns 1 if the timebase offset should be applied, 0 if not.
   */
  
  long kvmppc_realmode_hmi_handler(void)
  {
-       int ptid = local_paca->kvm_hstate.ptid;
         bool resync_req;
  
-       /* This is only called on primary thread. */
-       BUG_ON(ptid != 0);
         __this_cpu_inc(irq_stat.hmi_exceptions);
  
+       if (hmi_handle_debugtrig(NULL) >= 0)
+               return 1;
+
         /*
          * By now primary thread has already completed guest->host
          * partition switch but haven't signaled secondaries yet.
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S

index 9c61f736c75b2d0761ec4f9d2e385df75b6dc882..b64f10a5f5e7d74d713689f43e186909fe7ead68 100644 (file)
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -617,13 +617,6 @@ kvmppc_hv_entry:
         lbz     r0, KVM_RADIX(r9)
         cmpwi   cr7, r0, 0
  
-       /* Clear out SLB if hash */
-       bne     cr7, 2f
-       li      r6,0
-       slbmte  r6,r6
-       slbia
-       ptesync
-2:
         /*
          * POWER7/POWER8 host -> guest partition switch code.
          * We don't have to lock against concurrent tlbies,
@@ -738,19 +731,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
  10:    cmpdi   r4, 0
         beq     kvmppc_primary_no_guest
  kvmppc_got_guest:
-
-       /* Load up guest SLB entries (N.B. slb_max will be 0 for radix) */
-       lwz     r5,VCPU_SLB_MAX(r4)
-       cmpwi   r5,0
-       beq     9f
-       mtctr   r5
-       addi    r6,r4,VCPU_SLB
-1:     ld      r8,VCPU_SLB_E(r6)
-       ld      r9,VCPU_SLB_V(r6)
-       slbmte  r9,r8
-       addi    r6,r6,VCPU_SLB_SIZE
-       bdnz    1b
-9:
         /* Increment yield count if they have a VPA */
         ld      r3, VCPU_VPA(r4)
         cmpdi   r3, 0
@@ -957,7 +937,6 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
         mftb    r7
         subf    r3,r7,r8
         mtspr   SPRN_DEC,r3
-       std     r3,VCPU_DEC(r4)
  
         ld      r5, VCPU_SPRG0(r4)
         ld      r6, VCPU_SPRG1(r4)
@@ -1018,6 +997,29 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
         cmpdi   r3, 512         /* 1 microsecond */
         blt     hdec_soon
  
+       /* For hash guest, clear out and reload the SLB */
+       ld      r6, VCPU_KVM(r4)
+       lbz     r0, KVM_RADIX(r6)
+       cmpwi   r0, 0
+       bne     9f
+       li      r6, 0
+       slbmte  r6, r6
+       slbia
+       ptesync
+
+       /* Load up guest SLB entries (N.B. slb_max will be 0 for radix) */
+       lwz     r5,VCPU_SLB_MAX(r4)
+       cmpwi   r5,0
+       beq     9f
+       mtctr   r5
+       addi    r6,r4,VCPU_SLB
+1:     ld      r8,VCPU_SLB_E(r6)
+       ld      r9,VCPU_SLB_V(r6)
+       slbmte  r9,r8
+       addi    r6,r6,VCPU_SLB_SIZE
+       bdnz    1b
+9:
+
  #ifdef CONFIG_KVM_XICS
         /* We are entering the guest on that thread, push VCPU to XIVE */
         ld      r10, HSTATE_XIVE_TIMA_PHYS(r13)
@@ -1031,8 +1033,53 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
         li      r9, TM_QW1_OS + TM_WORD2
         stwcix  r11,r9,r10
         li      r9, 1
-       stw     r9, VCPU_XIVE_PUSHED(r4)
+       stb     r9, VCPU_XIVE_PUSHED(r4)
         eieio
+
+       /*
+        * We clear the irq_pending flag. There is a small chance of a
+        * race vs. the escalation interrupt happening on another
+        * processor setting it again, but the only consequence is to
+        * cause a spurrious wakeup on the next H_CEDE which is not an
+        * issue.
+        */
+       li      r0,0
+       stb     r0, VCPU_IRQ_PENDING(r4)
+
+       /*
+        * In single escalation mode, if the escalation interrupt is
+        * on, we mask it.
+        */
+       lbz     r0, VCPU_XIVE_ESC_ON(r4)
+       cmpwi   r0,0
+       beq     1f
+       ld      r10, VCPU_XIVE_ESC_RADDR(r4)
+       li      r9, XIVE_ESB_SET_PQ_01
+       ldcix   r0, r10, r9
+       sync
+
+       /* We have a possible subtle race here: The escalation interrupt might
+        * have fired and be on its way to the host queue while we mask it,
+        * and if we unmask it early enough (re-cede right away), there is
+        * a theorical possibility that it fires again, thus landing in the
+        * target queue more than once which is a big no-no.
+        *
+        * Fortunately, solving this is rather easy. If the above load setting
+        * PQ to 01 returns a previous value where P is set, then we know the
+        * escalation interrupt is somewhere on its way to the host. In that
+        * case we simply don't clear the xive_esc_on flag below. It will be
+        * eventually cleared by the handler for the escalation interrupt.
+        *
+        * Then, when doing a cede, we check that flag again before re-enabling
+        * the escalation interrupt, and if set, we abort the cede.
+        */
+       andi.   r0, r0, XIVE_ESB_VAL_P
+       bne-    1f
+
+       /* Now P is 0, we can clear the flag */
+       li      r0, 0
+       stb     r0, VCPU_XIVE_ESC_ON(r4)
+1:
  no_xive:
  #endif /* CONFIG_KVM_XICS */
  
@@ -1193,7 +1240,7 @@ hdec_soon:
         addi    r3, r4, VCPU_TB_RMEXIT
         bl      kvmhv_accumulate_time
  #endif
-       b       guest_exit_cont
+       b       guest_bypass
  
  /******************************************************************************
   *                                                                            *
@@ -1423,15 +1470,35 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
         blt     deliver_guest_interrupt
  
  guest_exit_cont:               /* r9 = vcpu, r12 = trap, r13 = paca */
+       /* Save more register state  */
+       mfdar   r6
+       mfdsisr r7
+       std     r6, VCPU_DAR(r9)
+       stw     r7, VCPU_DSISR(r9)
+       /* don't overwrite fault_dar/fault_dsisr if HDSI */
+       cmpwi   r12,BOOK3S_INTERRUPT_H_DATA_STORAGE
+       beq     mc_cont
+       std     r6, VCPU_FAULT_DAR(r9)
+       stw     r7, VCPU_FAULT_DSISR(r9)
+
+       /* See if it is a machine check */
+       cmpwi   r12, BOOK3S_INTERRUPT_MACHINE_CHECK
+       beq     machine_check_realmode
+mc_cont:
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+       addi    r3, r9, VCPU_TB_RMEXIT
+       mr      r4, r9
+       bl      kvmhv_accumulate_time
+#endif
  #ifdef CONFIG_KVM_XICS
         /* We are exiting, pull the VP from the XIVE */
-       lwz     r0, VCPU_XIVE_PUSHED(r9)
+       lbz     r0, VCPU_XIVE_PUSHED(r9)
         cmpwi   cr0, r0, 0
         beq     1f
         li      r7, TM_SPC_PULL_OS_CTX
         li      r6, TM_QW1_OS
         mfmsr   r0
-       andi.   r0, r0, MSR_IR          /* in real mode? */
+       andi.   r0, r0, MSR_DR          /* in real mode? */
         beq     2f
         ld      r10, HSTATE_XIVE_TIMA_VIRT(r13)
         cmpldi  cr0, r10, 0
@@ -1454,33 +1521,42 @@ guest_exit_cont:                /* r9 = vcpu, r12 = trap, r13 = paca */
         /* Fixup some of the state for the next load */
         li      r10, 0
         li      r0, 0xff
-       stw     r10, VCPU_XIVE_PUSHED(r9)
+       stb     r10, VCPU_XIVE_PUSHED(r9)
         stb     r10, (VCPU_XIVE_SAVED_STATE+3)(r9)
         stb     r0, (VCPU_XIVE_SAVED_STATE+4)(r9)
         eieio
  1:
  #endif /* CONFIG_KVM_XICS */
-       /* Save more register state  */
-       mfdar   r6
-       mfdsisr r7
-       std     r6, VCPU_DAR(r9)
-       stw     r7, VCPU_DSISR(r9)
-       /* don't overwrite fault_dar/fault_dsisr if HDSI */
-       cmpwi   r12,BOOK3S_INTERRUPT_H_DATA_STORAGE
-       beq     mc_cont
-       std     r6, VCPU_FAULT_DAR(r9)
-       stw     r7, VCPU_FAULT_DSISR(r9)
  
-       /* See if it is a machine check */
-       cmpwi   r12, BOOK3S_INTERRUPT_MACHINE_CHECK
-       beq     machine_check_realmode
-mc_cont:
-#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
-       addi    r3, r9, VCPU_TB_RMEXIT
-       mr      r4, r9
-       bl      kvmhv_accumulate_time
-#endif
+       /* For hash guest, read the guest SLB and save it away */
+       ld      r5, VCPU_KVM(r9)
+       lbz     r0, KVM_RADIX(r5)
+       li      r5, 0
+       cmpwi   r0, 0
+       bne     3f                      /* for radix, save 0 entries */
+       lwz     r0,VCPU_SLB_NR(r9)      /* number of entries in SLB */
+       mtctr   r0
+       li      r6,0
+       addi    r7,r9,VCPU_SLB
+1:     slbmfee r8,r6
+       andis.  r0,r8,SLB_ESID_V@h
+       beq     2f
+       add     r8,r8,r6                /* put index in */
+       slbmfev r3,r6
+       std     r8,VCPU_SLB_E(r7)
+       std     r3,VCPU_SLB_V(r7)
+       addi    r7,r7,VCPU_SLB_SIZE
+       addi    r5,r5,1
+2:     addi    r6,r6,1
+       bdnz    1b
+       /* Finally clear out the SLB */
+       li      r0,0
+       slbmte  r0,r0
+       slbia
+       ptesync
+3:     stw     r5,VCPU_SLB_MAX(r9)
  
+guest_bypass:
         mr      r3, r12
         /* Increment exit count, poke other threads to exit */
         bl      kvmhv_commence_exit
@@ -1501,31 +1577,6 @@ mc_cont:
         ori     r6,r6,1
         mtspr   SPRN_CTRLT,r6
  4:
-       /* Check if we are running hash or radix and store it in cr2 */
-       ld      r5, VCPU_KVM(r9)
-       lbz     r0, KVM_RADIX(r5)
-       cmpwi   cr2,r0,0
-
-       /* Read the guest SLB and save it away */
-       li      r5, 0
-       bne     cr2, 3f                 /* for radix, save 0 entries */
-       lwz     r0,VCPU_SLB_NR(r9)      /* number of entries in SLB */
-       mtctr   r0
-       li      r6,0
-       addi    r7,r9,VCPU_SLB
-1:     slbmfee r8,r6
-       andis.  r0,r8,SLB_ESID_V@h
-       beq     2f
-       add     r8,r8,r6                /* put index in */
-       slbmfev r3,r6
-       std     r8,VCPU_SLB_E(r7)
-       std     r3,VCPU_SLB_V(r7)
-       addi    r7,r7,VCPU_SLB_SIZE
-       addi    r5,r5,1
-2:     addi    r6,r6,1
-       bdnz    1b
-3:     stw     r5,VCPU_SLB_MAX(r9)
-
         /*
          * Save the guest PURR/SPURR
          */
@@ -1803,7 +1854,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
         ld      r5, VCPU_KVM(r9)
         lbz     r0, KVM_RADIX(r5)
         cmpwi   cr2, r0, 0
-       beq     cr2, 3f
+       beq     cr2, 4f
  
         /* Radix: Handle the case where the guest used an illegal PID */
         LOAD_REG_ADDR(r4, mmu_base_pid)
@@ -1839,15 +1890,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
  BEGIN_FTR_SECTION
         PPC_INVALIDATE_ERAT
  END_FTR_SECTION_IFSET(CPU_FTR_POWER9_DD1)
-       b       4f
+4:
  #endif /* CONFIG_PPC_RADIX_MMU */
  
-       /* Hash: clear out SLB */
-3:     li      r5,0
-       slbmte  r5,r5
-       slbia
-       ptesync
-4:
         /*
          * POWER7/POWER8 guest -> host partition switch code.
          * We don't have to lock against tlbies but we do
@@ -1908,16 +1953,17 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
         bne     27f
         bl      kvmppc_realmode_hmi_handler
         nop
+       cmpdi   r3, 0
         li      r12, BOOK3S_INTERRUPT_HMI
         /*
-        * At this point kvmppc_realmode_hmi_handler would have resync-ed
-        * the TB. Hence it is not required to subtract guest timebase
-        * offset from timebase. So, skip it.
+        * At this point kvmppc_realmode_hmi_handler may have resync-ed
+        * the TB, and if it has, we must not subtract the guest timebase
+        * offset from the timebase. So, skip it.
          *
          * Also, do not call kvmppc_subcore_exit_guest() because it has
          * been invoked as part of kvmppc_realmode_hmi_handler().
          */
-       b       30f
+       beq     30f
  
  27:
         /* Subtract timebase offset from timebase */
@@ -2744,7 +2790,32 @@ kvm_cede_prodded:
         /* we've ceded but we want to give control to the host */
  kvm_cede_exit:
         ld      r9, HSTATE_KVM_VCPU(r13)
-       b       guest_exit_cont
+#ifdef CONFIG_KVM_XICS
+       /* Abort if we still have a pending escalation */
+       lbz     r5, VCPU_XIVE_ESC_ON(r9)
+       cmpwi   r5, 0
+       beq     1f
+       li      r0, 0
+       stb     r0, VCPU_CEDED(r9)
+1:     /* Enable XIVE escalation */
+       li      r5, XIVE_ESB_SET_PQ_00
+       mfmsr   r0
+       andi.   r0, r0, MSR_DR          /* in real mode? */
+       beq     1f
+       ld      r10, VCPU_XIVE_ESC_VADDR(r9)
+       cmpdi   r10, 0
+       beq     3f
+       ldx     r0, r10, r5
+       b       2f
+1:     ld      r10, VCPU_XIVE_ESC_RADDR(r9)
+       cmpdi   r10, 0
+       beq     3f
+       ldcix   r0, r10, r5
+2:     sync
+       li      r0, 1
+       stb     r0, VCPU_XIVE_ESC_ON(r9)
+#endif /* CONFIG_KVM_XICS */
+3:     b       guest_exit_cont
  
         /* Try to handle a machine check in real mode */
  machine_check_realmode:
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c

index 0d750d274c4e21a3324eb3505bbd73c86a58cdc9..badfdbb857a28cfcf4a25ecc0144639477775efe 100644 (file)
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -84,12 +84,22 @@ static irqreturn_t xive_esc_irq(int irq, void *data)
  {
         struct kvm_vcpu *vcpu = data;
  
-       /* We use the existing H_PROD mechanism to wake up the target */
-       vcpu->arch.prodded = 1;
+       vcpu->arch.irq_pending = 1;
         smp_mb();
         if (vcpu->arch.ceded)
                 kvmppc_fast_vcpu_kick(vcpu);
  
+       /* Since we have the no-EOI flag, the interrupt is effectively
+        * disabled now. Clearing xive_esc_on means we won't bother
+        * doing so on the next entry.
+        *
+        * This also allows the entry code to know that if a PQ combination
+        * of 10 is observed while xive_esc_on is true, it means the queue
+        * contains an unprocessed escalation interrupt. We don't make use of
+        * that knowledge today but might (see comment in book3s_hv_rmhandler.S)
+        */
+       vcpu->arch.xive_esc_on = false;
+
         return IRQ_HANDLED;
  }
  
@@ -112,19 +122,21 @@ static int xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio)
                 return -EIO;
         }
  
-       /*
-        * Future improvement: start with them disabled
-        * and handle DD2 and later scheme of merged escalation
-        * interrupts
-        */
-       name = kasprintf(GFP_KERNEL, "kvm-%d-%d-%d",
-                        vcpu->kvm->arch.lpid, xc->server_num, prio);
+       if (xc->xive->single_escalation)
+               name = kasprintf(GFP_KERNEL, "kvm-%d-%d",
+                                vcpu->kvm->arch.lpid, xc->server_num);
+       else
+               name = kasprintf(GFP_KERNEL, "kvm-%d-%d-%d",
+                                vcpu->kvm->arch.lpid, xc->server_num, prio);
         if (!name) {
                 pr_err("Failed to allocate escalation irq name for queue %d of VCPU %d\n",
                        prio, xc->server_num);
                 rc = -ENOMEM;
                 goto error;
         }
+
+       pr_devel("Escalation %s irq %d (prio %d)\n", name, xc->esc_virq[prio], prio);
+
         rc = request_irq(xc->esc_virq[prio], xive_esc_irq,
                          IRQF_NO_THREAD, name, vcpu);
         if (rc) {
@@ -133,6 +145,25 @@ static int xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio)
                 goto error;
         }
         xc->esc_virq_names[prio] = name;
+
+       /* In single escalation mode, we grab the ESB MMIO of the
+        * interrupt and mask it. Also populate the VCPU v/raddr
+        * of the ESB page for use by asm entry/exit code. Finally
+        * set the XIVE_IRQ_NO_EOI flag which will prevent the
+        * core code from performing an EOI on the escalation
+        * interrupt, thus leaving it effectively masked after
+        * it fires once.
+        */
+       if (xc->xive->single_escalation) {
+               struct irq_data *d = irq_get_irq_data(xc->esc_virq[prio]);
+               struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
+
+               xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_01);
+               vcpu->arch.xive_esc_raddr = xd->eoi_page;
+               vcpu->arch.xive_esc_vaddr = (__force u64)xd->eoi_mmio;
+               xd->flags |= XIVE_IRQ_NO_EOI;
+       }
+
         return 0;
  error:
         irq_dispose_mapping(xc->esc_virq[prio]);
@@ -191,12 +222,12 @@ static int xive_check_provisioning(struct kvm *kvm, u8 prio)
  
         pr_devel("Provisioning prio... %d\n", prio);
  
-       /* Provision each VCPU and enable escalations */
+       /* Provision each VCPU and enable escalations if needed */
         kvm_for_each_vcpu(i, vcpu, kvm) {
                 if (!vcpu->arch.xive_vcpu)
                         continue;
                 rc = xive_provision_queue(vcpu, prio);
-               if (rc == 0)
+               if (rc == 0 && !xive->single_escalation)
                         xive_attach_escalation(vcpu, prio);
                 if (rc)
                         return rc;
@@ -1082,6 +1113,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
         /* Allocate IPI */
         xc->vp_ipi = xive_native_alloc_irq();
         if (!xc->vp_ipi) {
+               pr_err("Failed to allocate xive irq for VCPU IPI\n");
                 r = -EIO;
                 goto bail;
         }
@@ -1091,19 +1123,34 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
         if (r)
                 goto bail;
  
+       /*
+        * Enable the VP first as the single escalation mode will
+        * affect escalation interrupts numbering
+        */
+       r = xive_native_enable_vp(xc->vp_id, xive->single_escalation);
+       if (r) {
+               pr_err("Failed to enable VP in OPAL, err %d\n", r);
+               goto bail;
+       }
+
         /*
          * Initialize queues. Initially we set them all for no queueing
          * and we enable escalation for queue 0 only which we'll use for
          * our mfrr change notifications. If the VCPU is hot-plugged, we
-        * do handle provisioning however.
+        * do handle provisioning however based on the existing "map"
+        * of enabled queues.
          */
         for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
                 struct xive_q *q = &xc->queues[i];
  
+               /* Single escalation, no queue 7 */
+               if (i == 7 && xive->single_escalation)
+                       break;
+
                 /* Is queue already enabled ? Provision it */
                 if (xive->qmap & (1 << i)) {
                         r = xive_provision_queue(vcpu, i);
-                       if (r == 0)
+                       if (r == 0 && !xive->single_escalation)
                                 xive_attach_escalation(vcpu, i);
                         if (r)
                                 goto bail;
@@ -1123,11 +1170,6 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
         if (r)
                 goto bail;
  
-       /* Enable the VP */
-       r = xive_native_enable_vp(xc->vp_id);
-       if (r)
-               goto bail;
-
         /* Route the IPI */
         r = xive_native_configure_irq(xc->vp_ipi, xc->vp_id, 0, XICS_IPI);
         if (!r)
@@ -1474,6 +1516,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr)
  
         pr_devel("  val=0x016%llx (server=0x%x, guest_prio=%d)\n",
                  val, server, guest_prio);
+
         /*
          * If the source doesn't already have an IPI, allocate
          * one and get the corresponding data
@@ -1762,6 +1805,8 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type)
         if (xive->vp_base == XIVE_INVALID_VP)
                 ret = -ENOMEM;
  
+       xive->single_escalation = xive_native_has_single_escalation();
+
         if (ret) {
                 kfree(xive);
                 return ret;
@@ -1795,6 +1840,7 @@ static int xive_debug_show(struct seq_file *m, void *private)
  
         kvm_for_each_vcpu(i, vcpu, kvm) {
                 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+               unsigned int i;
  
                 if (!xc)
                         continue;
@@ -1804,6 +1850,33 @@ static int xive_debug_show(struct seq_file *m, void *private)
                            xc->server_num, xc->cppr, xc->hw_cppr,
                            xc->mfrr, xc->pending,
                            xc->stat_rm_h_xirr, xc->stat_vm_h_xirr);
+               for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
+                       struct xive_q *q = &xc->queues[i];
+                       u32 i0, i1, idx;
+
+                       if (!q->qpage && !xc->esc_virq[i])
+                               continue;
+
+                       seq_printf(m, " [q%d]: ", i);
+
+                       if (q->qpage) {
+                               idx = q->idx;
+                               i0 = be32_to_cpup(q->qpage + idx);
+                               idx = (idx + 1) & q->msk;
+                               i1 = be32_to_cpup(q->qpage + idx);
+                               seq_printf(m, "T=%d %08x %08x... \n", q->toggle, i0, i1);
+                       }
+                       if (xc->esc_virq[i]) {
+                               struct irq_data *d = irq_get_irq_data(xc->esc_virq[i]);
+                               struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
+                               u64 pq = xive_vm_esb_load(xd, XIVE_ESB_GET);
+                               seq_printf(m, "E:%c%c I(%d:%llx:%llx)",
+                                          (pq & XIVE_ESB_VAL_P) ? 'P' : 'p',
+                                          (pq & XIVE_ESB_VAL_Q) ? 'Q' : 'q',
+                                          xc->esc_virq[i], pq, xd->eoi_page);
+                               seq_printf(m, "\n");
+                       }
+               }
  
                 t_rm_h_xirr += xc->stat_rm_h_xirr;
                 t_rm_h_ipoll += xc->stat_rm_h_ipoll;
diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h

index 6ba63f8e8a614ed4aad2a5890236834f3e08c0a7..a08ae6fd4c51fc54b9c79ffe48290b7f86b58956 100644 (file)
--- a/arch/powerpc/kvm/book3s_xive.h
+++ b/arch/powerpc/kvm/book3s_xive.h
@@ -120,6 +120,8 @@ struct kvmppc_xive {
         u32     q_order;
         u32     q_page_order;
  
+       /* Flags */
+       u8      single_escalation;
  };
  
  #define KVMPPC_XIVE_Q_COUNT    8
@@ -201,25 +203,20 @@ static inline struct kvmppc_xive_src_block *kvmppc_xive_find_source(struct kvmpp
   * is as follow.
   *
   * Guest request for 0...6 are honored. Guest request for anything
- * higher results in a priority of 7 being applied.
- *
- * However, when XIRR is returned via H_XIRR, 7 is translated to 0xb
- * in order to match AIX expectations
+ * higher results in a priority of 6 being applied.
   *
   * Similar mapping is done for CPPR values
   */
  static inline u8 xive_prio_from_guest(u8 prio)
  {
-       if (prio == 0xff || prio < 8)
+       if (prio == 0xff || prio < 6)
                 return prio;
-       return 7;
+       return 6;
  }
  
  static inline u8 xive_prio_to_guest(u8 prio)
  {
-       if (prio == 0xff || prio < 7)
-               return prio;
-       return 0xb;
+       return prio;
  }
  
  static inline u32 __xive_read_eq(__be32 *qpage, u32 msk, u32 *idx, u32 *toggle)
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c

index 545a230f675f652702193eb6089e6afe791ece46..748562ec9a0425f19f226da9e14833da0602bab0 100644 (file)
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -763,7 +763,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
  
         hrtimer_init(&vcpu->arch.dec_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
         vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup;
-       vcpu->arch.dec_expires = ~(u64)0;
+       vcpu->arch.dec_expires = get_tb();
  
  #ifdef CONFIG_KVM_EXIT_TIMING
         mutex_init(&vcpu->arch.exit_timing_lock);
@@ -1106,11 +1106,9 @@ int kvmppc_handle_vsx_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
  {
         enum emulation_result emulated = EMULATE_DONE;
  
-       /* Currently, mmio_vsx_copy_nums only allowed to be less than 4 */
-       if ( (vcpu->arch.mmio_vsx_copy_nums > 4) ||
-               (vcpu->arch.mmio_vsx_copy_nums < 0) ) {
+       /* Currently, mmio_vsx_copy_nums only allowed to be 4 or less */
+       if (vcpu->arch.mmio_vsx_copy_nums > 4)
                 return EMULATE_FAIL;
-       }
  
         while (vcpu->arch.mmio_vsx_copy_nums) {
                 emulated = __kvmppc_handle_load(run, vcpu, rt, bytes,
@@ -1252,11 +1250,9 @@ int kvmppc_handle_vsx_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
  
         vcpu->arch.io_gpr = rs;
  
-       /* Currently, mmio_vsx_copy_nums only allowed to be less than 4 */
-       if ( (vcpu->arch.mmio_vsx_copy_nums > 4) ||
-               (vcpu->arch.mmio_vsx_copy_nums < 0) ) {
+       /* Currently, mmio_vsx_copy_nums only allowed to be 4 or less */
+       if (vcpu->arch.mmio_vsx_copy_nums > 4)
                 return EMULATE_FAIL;
-       }
  
         while (vcpu->arch.mmio_vsx_copy_nums) {
                 if (kvmppc_get_vsr_data(vcpu, rs, &val) == -1)
diff --git a/arch/powerpc/kvm/timing.c b/arch/powerpc/kvm/timing.c

index e44d2b2ea97e34a2dd7538ab8b484757513a6c8e..1c03c978eb184c30e86cc519697f2f4f7b8125d0 100644 (file)
--- a/arch/powerpc/kvm/timing.c
+++ b/arch/powerpc/kvm/timing.c
@@ -143,8 +143,7 @@ static int kvmppc_exit_timing_show(struct seq_file *m, void *private)
         int i;
         u64 min, max, sum, sum_quad;
  
-       seq_printf(m, "%s", "type       count   min     max     sum     sum_squared\n");
-
+       seq_puts(m, "type       count   min     max     sum     sum_squared\n");
  
         for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) {
  
diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c

index a3b8d7d1316eb1863f19ffa19ff34f0c761ada33..2547b6021e6a05d105b2ac98c57161fc97327650 100644 (file)
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -367,7 +367,8 @@ static void xive_irq_eoi(struct irq_data *d)
          * EOI the source if it hasn't been disabled and hasn't
          * been passed-through to a KVM guest
          */
-       if (!irqd_irq_disabled(d) && !irqd_is_forwarded_to_vcpu(d))
+       if (!irqd_irq_disabled(d) && !irqd_is_forwarded_to_vcpu(d) &&
+           !(xd->flags & XIVE_IRQ_NO_EOI))
                 xive_do_source_eoi(irqd_to_hwirq(d), xd);
  
         /*
diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c

index ebc244b08d6748512c19199446d25f7ac49fca9b..d22aeb0b69e107636e94e7811785e573f60880ce 100644 (file)
--- a/arch/powerpc/sysdev/xive/native.c
+++ b/arch/powerpc/sysdev/xive/native.c
@@ -42,6 +42,7 @@ static u32 xive_provision_chip_count;
  static u32 xive_queue_shift;
  static u32 xive_pool_vps = XIVE_INVALID_VP;
  static struct kmem_cache *xive_provision_cache;
+static bool xive_has_single_esc;
  
  int xive_native_populate_irq_data(u32 hw_irq, struct xive_irq_data *data)
  {
@@ -571,6 +572,10 @@ bool __init xive_native_init(void)
                         break;
         }
  
+       /* Do we support single escalation */
+       if (of_get_property(np, "single-escalation-support", NULL) != NULL)
+               xive_has_single_esc = true;
+
         /* Configure Thread Management areas for KVM */
         for_each_possible_cpu(cpu)
                 kvmppc_set_xive_tima(cpu, r.start, tima);
@@ -667,12 +672,15 @@ void xive_native_free_vp_block(u32 vp_base)
  }
  EXPORT_SYMBOL_GPL(xive_native_free_vp_block);
  
-int xive_native_enable_vp(u32 vp_id)
+int xive_native_enable_vp(u32 vp_id, bool single_escalation)
  {
         s64 rc;
+       u64 flags = OPAL_XIVE_VP_ENABLED;
  
+       if (single_escalation)
+               flags |= OPAL_XIVE_VP_SINGLE_ESCALATION;
         for (;;) {
-               rc = opal_xive_set_vp_info(vp_id, OPAL_XIVE_VP_ENABLED, 0);
+               rc = opal_xive_set_vp_info(vp_id, flags, 0);
                 if (rc != OPAL_BUSY)
                         break;
                 msleep(1);
@@ -710,3 +718,9 @@ int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id)
         return 0;
  }
  EXPORT_SYMBOL_GPL(xive_native_get_vp_info);
+
+bool xive_native_has_single_escalation(void)
+{
+       return xive_has_single_esc;
+}
+EXPORT_SYMBOL_GPL(xive_native_has_single_escalation);
author	Radim Krčmář <rkrcmar@redhat.com>
	Thu, 1 Feb 2018 15:13:07 +0000 (16:13 +0100)
committer	Radim Krčmář <rkrcmar@redhat.com>
	Thu, 1 Feb 2018 15:13:07 +0000 (16:13 +0100)
Documentation/virtual/kvm/api.txt		patch \| blob \| blame \| history
arch/powerpc/include/asm/hmi.h		patch \| blob \| blame \| history
arch/powerpc/include/asm/kvm_book3s_64.h		patch \| blob \| blame \| history
arch/powerpc/include/asm/kvm_host.h		patch \| blob \| blame \| history
arch/powerpc/include/asm/opal-api.h		patch \| blob \| blame \| history
arch/powerpc/include/asm/reg.h		patch \| blob \| blame \| history
arch/powerpc/include/asm/xive-regs.h		patch \| blob \| blame \| history
arch/powerpc/include/asm/xive.h		patch \| blob \| blame \| history
arch/powerpc/include/uapi/asm/kvm.h		patch \| blob \| blame \| history
arch/powerpc/kernel/asm-offsets.c		patch \| blob \| blame \| history
arch/powerpc/kernel/mce.c		patch \| blob \| blame \| history
arch/powerpc/kvm/book3s_64_mmu_radix.c		patch \| blob \| blame \| history
arch/powerpc/kvm/book3s_hv.c		patch \| blob \| blame \| history
arch/powerpc/kvm/book3s_hv_ras.c		patch \| blob \| blame \| history
arch/powerpc/kvm/book3s_hv_rmhandlers.S		patch \| blob \| blame \| history
arch/powerpc/kvm/book3s_xive.c		patch \| blob \| blame \| history
arch/powerpc/kvm/book3s_xive.h		patch \| blob \| blame \| history
arch/powerpc/kvm/powerpc.c		patch \| blob \| blame \| history
arch/powerpc/kvm/timing.c		patch \| blob \| blame \| history
arch/powerpc/sysdev/xive/common.c		patch \| blob \| blame \| history
arch/powerpc/sysdev/xive/native.c		patch \| blob \| blame \| history