Merge tag 'kvm-ppc-next-4.16-1' of git://git.kernel.org/pub/scm/linux/kernel/git...

[linux-2.6-block.git] / arch / powerpc / kvm / book3s_xive.c
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c

index 0d750d274c4e21a3324eb3505bbd73c86a58cdc9..badfdbb857a28cfcf4a25ecc0144639477775efe 100644 (file)
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -84,12 +84,22 @@ static irqreturn_t xive_esc_irq(int irq, void *data)
  {
         struct kvm_vcpu *vcpu = data;
  
-       /* We use the existing H_PROD mechanism to wake up the target */
-       vcpu->arch.prodded = 1;
+       vcpu->arch.irq_pending = 1;
         smp_mb();
         if (vcpu->arch.ceded)
                 kvmppc_fast_vcpu_kick(vcpu);
  
+       /* Since we have the no-EOI flag, the interrupt is effectively
+        * disabled now. Clearing xive_esc_on means we won't bother
+        * doing so on the next entry.
+        *
+        * This also allows the entry code to know that if a PQ combination
+        * of 10 is observed while xive_esc_on is true, it means the queue
+        * contains an unprocessed escalation interrupt. We don't make use of
+        * that knowledge today but might (see comment in book3s_hv_rmhandler.S)
+        */
+       vcpu->arch.xive_esc_on = false;
+
         return IRQ_HANDLED;
  }
  
@@ -112,19 +122,21 @@ static int xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio)
                 return -EIO;
         }
  
-       /*
-        * Future improvement: start with them disabled
-        * and handle DD2 and later scheme of merged escalation
-        * interrupts
-        */
-       name = kasprintf(GFP_KERNEL, "kvm-%d-%d-%d",
-                        vcpu->kvm->arch.lpid, xc->server_num, prio);
+       if (xc->xive->single_escalation)
+               name = kasprintf(GFP_KERNEL, "kvm-%d-%d",
+                                vcpu->kvm->arch.lpid, xc->server_num);
+       else
+               name = kasprintf(GFP_KERNEL, "kvm-%d-%d-%d",
+                                vcpu->kvm->arch.lpid, xc->server_num, prio);
         if (!name) {
                 pr_err("Failed to allocate escalation irq name for queue %d of VCPU %d\n",
                        prio, xc->server_num);
                 rc = -ENOMEM;
                 goto error;
         }
+
+       pr_devel("Escalation %s irq %d (prio %d)\n", name, xc->esc_virq[prio], prio);
+
         rc = request_irq(xc->esc_virq[prio], xive_esc_irq,
                          IRQF_NO_THREAD, name, vcpu);
         if (rc) {
@@ -133,6 +145,25 @@ static int xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio)
                 goto error;
         }
         xc->esc_virq_names[prio] = name;
+
+       /* In single escalation mode, we grab the ESB MMIO of the
+        * interrupt and mask it. Also populate the VCPU v/raddr
+        * of the ESB page for use by asm entry/exit code. Finally
+        * set the XIVE_IRQ_NO_EOI flag which will prevent the
+        * core code from performing an EOI on the escalation
+        * interrupt, thus leaving it effectively masked after
+        * it fires once.
+        */
+       if (xc->xive->single_escalation) {
+               struct irq_data *d = irq_get_irq_data(xc->esc_virq[prio]);
+               struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
+
+               xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_01);
+               vcpu->arch.xive_esc_raddr = xd->eoi_page;
+               vcpu->arch.xive_esc_vaddr = (__force u64)xd->eoi_mmio;
+               xd->flags |= XIVE_IRQ_NO_EOI;
+       }
+
         return 0;
  error:
         irq_dispose_mapping(xc->esc_virq[prio]);
@@ -191,12 +222,12 @@ static int xive_check_provisioning(struct kvm *kvm, u8 prio)
  
         pr_devel("Provisioning prio... %d\n", prio);
  
-       /* Provision each VCPU and enable escalations */
+       /* Provision each VCPU and enable escalations if needed */
         kvm_for_each_vcpu(i, vcpu, kvm) {
                 if (!vcpu->arch.xive_vcpu)
                         continue;
                 rc = xive_provision_queue(vcpu, prio);
-               if (rc == 0)
+               if (rc == 0 && !xive->single_escalation)
                         xive_attach_escalation(vcpu, prio);
                 if (rc)
                         return rc;
@@ -1082,6 +1113,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
         /* Allocate IPI */
         xc->vp_ipi = xive_native_alloc_irq();
         if (!xc->vp_ipi) {
+               pr_err("Failed to allocate xive irq for VCPU IPI\n");
                 r = -EIO;
                 goto bail;
         }
@@ -1091,19 +1123,34 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
         if (r)
                 goto bail;
  
+       /*
+        * Enable the VP first as the single escalation mode will
+        * affect escalation interrupts numbering
+        */
+       r = xive_native_enable_vp(xc->vp_id, xive->single_escalation);
+       if (r) {
+               pr_err("Failed to enable VP in OPAL, err %d\n", r);
+               goto bail;
+       }
+
         /*
          * Initialize queues. Initially we set them all for no queueing
          * and we enable escalation for queue 0 only which we'll use for
          * our mfrr change notifications. If the VCPU is hot-plugged, we
-        * do handle provisioning however.
+        * do handle provisioning however based on the existing "map"
+        * of enabled queues.
          */
         for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
                 struct xive_q *q = &xc->queues[i];
  
+               /* Single escalation, no queue 7 */
+               if (i == 7 && xive->single_escalation)
+                       break;
+
                 /* Is queue already enabled ? Provision it */
                 if (xive->qmap & (1 << i)) {
                         r = xive_provision_queue(vcpu, i);
-                       if (r == 0)
+                       if (r == 0 && !xive->single_escalation)
                                 xive_attach_escalation(vcpu, i);
                         if (r)
                                 goto bail;
@@ -1123,11 +1170,6 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
         if (r)
                 goto bail;
  
-       /* Enable the VP */
-       r = xive_native_enable_vp(xc->vp_id);
-       if (r)
-               goto bail;
-
         /* Route the IPI */
         r = xive_native_configure_irq(xc->vp_ipi, xc->vp_id, 0, XICS_IPI);
         if (!r)
@@ -1474,6 +1516,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr)
  
         pr_devel("  val=0x016%llx (server=0x%x, guest_prio=%d)\n",
                  val, server, guest_prio);
+
         /*
          * If the source doesn't already have an IPI, allocate
          * one and get the corresponding data
@@ -1762,6 +1805,8 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type)
         if (xive->vp_base == XIVE_INVALID_VP)
                 ret = -ENOMEM;
  
+       xive->single_escalation = xive_native_has_single_escalation();
+
         if (ret) {
                 kfree(xive);
                 return ret;
@@ -1795,6 +1840,7 @@ static int xive_debug_show(struct seq_file *m, void *private)
  
         kvm_for_each_vcpu(i, vcpu, kvm) {
                 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+               unsigned int i;
  
                 if (!xc)
                         continue;
@@ -1804,6 +1850,33 @@ static int xive_debug_show(struct seq_file *m, void *private)
                            xc->server_num, xc->cppr, xc->hw_cppr,
                            xc->mfrr, xc->pending,
                            xc->stat_rm_h_xirr, xc->stat_vm_h_xirr);
+               for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
+                       struct xive_q *q = &xc->queues[i];
+                       u32 i0, i1, idx;
+
+                       if (!q->qpage && !xc->esc_virq[i])
+                               continue;
+
+                       seq_printf(m, " [q%d]: ", i);
+
+                       if (q->qpage) {
+                               idx = q->idx;
+                               i0 = be32_to_cpup(q->qpage + idx);
+                               idx = (idx + 1) & q->msk;
+                               i1 = be32_to_cpup(q->qpage + idx);
+                               seq_printf(m, "T=%d %08x %08x... \n", q->toggle, i0, i1);
+                       }
+                       if (xc->esc_virq[i]) {
+                               struct irq_data *d = irq_get_irq_data(xc->esc_virq[i]);
+                               struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
+                               u64 pq = xive_vm_esb_load(xd, XIVE_ESB_GET);
+                               seq_printf(m, "E:%c%c I(%d:%llx:%llx)",
+                                          (pq & XIVE_ESB_VAL_P) ? 'P' : 'p',
+                                          (pq & XIVE_ESB_VAL_Q) ? 'Q' : 'q',
+                                          xc->esc_virq[i], pq, xd->eoi_page);
+                               seq_printf(m, "\n");
+                       }
+               }
  
                 t_rm_h_xirr += xc->stat_rm_h_xirr;
                 t_rm_h_ipoll += xc->stat_rm_h_ipoll;