powerpc/pseries/mm: call H_BLOCK_REMOVE
authorLaurent Dufour <ldufour@linux.vnet.ibm.com>
Mon, 20 Aug 2018 14:29:36 +0000 (16:29 +0200)
committerMichael Ellerman <mpe@ellerman.id.au>
Mon, 17 Sep 2018 11:17:25 +0000 (21:17 +1000)
This hypervisor's call allows to remove up to 8 ptes with only call to
tlbie.

The virtual pages must be all within the same naturally aligned 8 pages
virtual address block and have the same page and segment size encodings.

Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Laurent Dufour <ldufour@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
arch/powerpc/include/asm/hvcall.h
arch/powerpc/platforms/pseries/lpar.c

index a0b17f9f1ea4e5c40e1b2d7a2c6e1aa913759ea5..c349d3960d636c065a8a801ddc83dd5d1c2214f5 100644 (file)
 #define H_COP                  0x304
 #define H_GET_MPP_X            0x314
 #define H_SET_MODE             0x31C
+#define H_BLOCK_REMOVE         0x328
 #define H_CLEAR_HPT            0x358
 #define H_REQUEST_VMC          0x360
 #define H_RESIZE_HPT_PREPARE   0x36C
index ebc852e3607d521c6dc4bc3180fd729493a07b42..0b5081085a445b5edc5d3bae432f1eeac6a7fc2f 100644 (file)
@@ -417,6 +417,79 @@ static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long vpn,
        BUG_ON(lpar_rc != H_SUCCESS);
 }
 
+
+/*
+ * As defined in the PAPR's section 14.5.4.1.8
+ * The control mask doesn't include the returned reference and change bit from
+ * the processed PTE.
+ */
+#define HBLKR_AVPN             0x0100000000000000UL
+#define HBLKR_CTRL_MASK                0xf800000000000000UL
+#define HBLKR_CTRL_SUCCESS     0x8000000000000000UL
+#define HBLKR_CTRL_ERRNOTFOUND 0x8800000000000000UL
+#define HBLKR_CTRL_ERRBUSY     0xa000000000000000UL
+
+/**
+ * H_BLOCK_REMOVE caller.
+ * @idx should point to the latest @param entry set with a PTEX.
+ * If PTE cannot be processed because another CPUs has already locked that
+ * group, those entries are put back in @param starting at index 1.
+ * If entries has to be retried and @retry_busy is set to true, these entries
+ * are retried until success. If @retry_busy is set to false, the returned
+ * is the number of entries yet to process.
+ */
+static unsigned long call_block_remove(unsigned long idx, unsigned long *param,
+                                      bool retry_busy)
+{
+       unsigned long i, rc, new_idx;
+       unsigned long retbuf[PLPAR_HCALL9_BUFSIZE];
+
+       if (idx < 2) {
+               pr_warn("Unexpected empty call to H_BLOCK_REMOVE");
+               return 0;
+       }
+again:
+       new_idx = 0;
+       if (idx > PLPAR_HCALL9_BUFSIZE) {
+               pr_err("Too many PTEs (%lu) for H_BLOCK_REMOVE", idx);
+               idx = PLPAR_HCALL9_BUFSIZE;
+       } else if (idx < PLPAR_HCALL9_BUFSIZE)
+               param[idx] = HBR_END;
+
+       rc = plpar_hcall9(H_BLOCK_REMOVE, retbuf,
+                         param[0], /* AVA */
+                         param[1],  param[2],  param[3],  param[4], /* TS0-7 */
+                         param[5],  param[6],  param[7],  param[8]);
+       if (rc == H_SUCCESS)
+               return 0;
+
+       BUG_ON(rc != H_PARTIAL);
+
+       /* Check that the unprocessed entries were 'not found' or 'busy' */
+       for (i = 0; i < idx-1; i++) {
+               unsigned long ctrl = retbuf[i] & HBLKR_CTRL_MASK;
+
+               if (ctrl == HBLKR_CTRL_ERRBUSY) {
+                       param[++new_idx] = param[i+1];
+                       continue;
+               }
+
+               BUG_ON(ctrl != HBLKR_CTRL_SUCCESS
+                      && ctrl != HBLKR_CTRL_ERRNOTFOUND);
+       }
+
+       /*
+        * If there were entries found busy, retry these entries if requested,
+        * of if all the entries have to be retried.
+        */
+       if (new_idx && (retry_busy || new_idx == (PLPAR_HCALL9_BUFSIZE-1))) {
+               idx = new_idx + 1;
+               goto again;
+       }
+
+       return new_idx;
+}
+
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 /*
  * Limit iterations holding pSeries_lpar_tlbie_lock to 3. We also need
@@ -424,17 +497,57 @@ static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long vpn,
  */
 #define PPC64_HUGE_HPTE_BATCH 12
 
-static void __pSeries_lpar_hugepage_invalidate(unsigned long *slot,
-                                            unsigned long *vpn, int count,
-                                            int psize, int ssize)
+static void hugepage_block_invalidate(unsigned long *slot, unsigned long *vpn,
+                                     int count, int psize, int ssize)
 {
        unsigned long param[PLPAR_HCALL9_BUFSIZE];
-       int i = 0, pix = 0, rc;
-       unsigned long flags = 0;
-       int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
+       unsigned long shift, current_vpgb, vpgb;
+       int i, pix = 0;
 
-       if (lock_tlbie)
-               spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags);
+       shift = mmu_psize_defs[psize].shift;
+
+       for (i = 0; i < count; i++) {
+               /*
+                * Shifting 3 bits more on the right to get a
+                * 8 pages aligned virtual addresse.
+                */
+               vpgb = (vpn[i] >> (shift - VPN_SHIFT + 3));
+               if (!pix || vpgb != current_vpgb) {
+                       /*
+                        * Need to start a new 8 pages block, flush
+                        * the current one if needed.
+                        */
+                       if (pix)
+                               (void)call_block_remove(pix, param, true);
+                       current_vpgb = vpgb;
+                       param[0] = hpte_encode_avpn(vpn[i], psize, ssize);
+                       pix = 1;
+               }
+
+               param[pix++] = HBR_REQUEST | HBLKR_AVPN | slot[i];
+               if (pix == PLPAR_HCALL9_BUFSIZE) {
+                       pix = call_block_remove(pix, param, false);
+                       /*
+                        * pix = 0 means that all the entries were
+                        * removed, we can start a new block.
+                        * Otherwise, this means that there are entries
+                        * to retry, and pix points to latest one, so
+                        * we should increment it and try to continue
+                        * the same block.
+                        */
+                       if (pix)
+                               pix++;
+               }
+       }
+       if (pix)
+               (void)call_block_remove(pix, param, true);
+}
+
+static void hugepage_bulk_invalidate(unsigned long *slot, unsigned long *vpn,
+                                    int count, int psize, int ssize)
+{
+       unsigned long param[PLPAR_HCALL9_BUFSIZE];
+       int i = 0, pix = 0, rc;
 
        for (i = 0; i < count; i++) {
 
@@ -462,6 +575,23 @@ static void __pSeries_lpar_hugepage_invalidate(unsigned long *slot,
                                  param[6], param[7]);
                BUG_ON(rc != H_SUCCESS);
        }
+}
+
+static inline void __pSeries_lpar_hugepage_invalidate(unsigned long *slot,
+                                                     unsigned long *vpn,
+                                                     int count, int psize,
+                                                     int ssize)
+{
+       unsigned long flags = 0;
+       int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
+
+       if (lock_tlbie)
+               spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags);
+
+       if (firmware_has_feature(FW_FEATURE_BLOCK_REMOVE))
+               hugepage_block_invalidate(slot, vpn, count, psize, ssize);
+       else
+               hugepage_bulk_invalidate(slot, vpn, count, psize, ssize);
 
        if (lock_tlbie)
                spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags);
@@ -564,6 +694,68 @@ static inline unsigned long compute_slot(real_pte_t pte,
        return slot;
 }
 
+/**
+ * The hcall H_BLOCK_REMOVE implies that the virtual pages to processed are
+ * "all within the same naturally aligned 8 page virtual address block".
+ */
+static void do_block_remove(unsigned long number, struct ppc64_tlb_batch *batch,
+                           unsigned long *param)
+{
+       unsigned long vpn;
+       unsigned long i, pix = 0;
+       unsigned long index, shift, slot, current_vpgb, vpgb;
+       real_pte_t pte;
+       int psize, ssize;
+
+       psize = batch->psize;
+       ssize = batch->ssize;
+
+       for (i = 0; i < number; i++) {
+               vpn = batch->vpn[i];
+               pte = batch->pte[i];
+               pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) {
+                       /*
+                        * Shifting 3 bits more on the right to get a
+                        * 8 pages aligned virtual addresse.
+                        */
+                       vpgb = (vpn >> (shift - VPN_SHIFT + 3));
+                       if (!pix || vpgb != current_vpgb) {
+                               /*
+                                * Need to start a new 8 pages block, flush
+                                * the current one if needed.
+                                */
+                               if (pix)
+                                       (void)call_block_remove(pix, param,
+                                                               true);
+                               current_vpgb = vpgb;
+                               param[0] = hpte_encode_avpn(vpn, psize,
+                                                           ssize);
+                               pix = 1;
+                       }
+
+                       slot = compute_slot(pte, vpn, index, shift, ssize);
+                       param[pix++] = HBR_REQUEST | HBLKR_AVPN | slot;
+
+                       if (pix == PLPAR_HCALL9_BUFSIZE) {
+                               pix = call_block_remove(pix, param, false);
+                               /*
+                                * pix = 0 means that all the entries were
+                                * removed, we can start a new block.
+                                * Otherwise, this means that there are entries
+                                * to retry, and pix points to latest one, so
+                                * we should increment it and try to continue
+                                * the same block.
+                                */
+                               if (pix)
+                                       pix++;
+                       }
+               } pte_iterate_hashed_end();
+       }
+
+       if (pix)
+               (void)call_block_remove(pix, param, true);
+}
+
 /*
  * Take a spinlock around flushes to avoid bouncing the hypervisor tlbie
  * lock.
@@ -583,6 +775,11 @@ static void pSeries_lpar_flush_hash_range(unsigned long number, int local)
        if (lock_tlbie)
                spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags);
 
+       if (firmware_has_feature(FW_FEATURE_BLOCK_REMOVE)) {
+               do_block_remove(number, batch, param);
+               goto out;
+       }
+
        psize = batch->psize;
        ssize = batch->ssize;
        pix = 0;
@@ -621,6 +818,7 @@ static void pSeries_lpar_flush_hash_range(unsigned long number, int local)
                BUG_ON(rc != H_SUCCESS);
        }
 
+out:
        if (lock_tlbie)
                spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags);
 }