RDMA/cxgb4: Use the BAR2/WC path for kernel QPs and T5 devices
authorSteve Wise <swise@opengridcomputing.com>
Wed, 9 Apr 2014 14:38:25 +0000 (09:38 -0500)
committerRoland Dreier <roland@purestorage.com>
Fri, 11 Apr 2014 18:36:01 +0000 (11:36 -0700)
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
[ Fix cast from u64* to integer.  - Roland ]

Signed-off-by: Roland Dreier <roland@purestorage.com>
drivers/infiniband/hw/cxgb4/device.c
drivers/infiniband/hw/cxgb4/iw_cxgb4.h
drivers/infiniband/hw/cxgb4/qp.c
drivers/infiniband/hw/cxgb4/t4.h

index 9489a388376ceed97d42fc88064cebbd2403e536..f4fa50a609e21f5ef1fe0ee3660bc921fdded72e 100644 (file)
@@ -682,7 +682,10 @@ static void c4iw_dealloc(struct uld_ctx *ctx)
        idr_destroy(&ctx->dev->hwtid_idr);
        idr_destroy(&ctx->dev->stid_idr);
        idr_destroy(&ctx->dev->atid_idr);
-       iounmap(ctx->dev->rdev.oc_mw_kva);
+       if (ctx->dev->rdev.bar2_kva)
+               iounmap(ctx->dev->rdev.bar2_kva);
+       if (ctx->dev->rdev.oc_mw_kva)
+               iounmap(ctx->dev->rdev.oc_mw_kva);
        ib_dealloc_device(&ctx->dev->ibdev);
        ctx->dev = NULL;
 }
@@ -722,11 +725,31 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop)
        }
        devp->rdev.lldi = *infop;
 
-       devp->rdev.oc_mw_pa = pci_resource_start(devp->rdev.lldi.pdev, 2) +
-               (pci_resource_len(devp->rdev.lldi.pdev, 2) -
-                roundup_pow_of_two(devp->rdev.lldi.vr->ocq.size));
-       devp->rdev.oc_mw_kva = ioremap_wc(devp->rdev.oc_mw_pa,
-                                              devp->rdev.lldi.vr->ocq.size);
+       /*
+        * For T5 devices, we map all of BAR2 with WC.
+        * For T4 devices with onchip qp mem, we map only that part
+        * of BAR2 with WC.
+        */
+       devp->rdev.bar2_pa = pci_resource_start(devp->rdev.lldi.pdev, 2);
+       if (is_t5(devp->rdev.lldi.adapter_type)) {
+               devp->rdev.bar2_kva = ioremap_wc(devp->rdev.bar2_pa,
+                       pci_resource_len(devp->rdev.lldi.pdev, 2));
+               if (!devp->rdev.bar2_kva) {
+                       pr_err(MOD "Unable to ioremap BAR2\n");
+                       return ERR_PTR(-EINVAL);
+               }
+       } else if (ocqp_supported(infop)) {
+               devp->rdev.oc_mw_pa =
+                       pci_resource_start(devp->rdev.lldi.pdev, 2) +
+                       pci_resource_len(devp->rdev.lldi.pdev, 2) -
+                       roundup_pow_of_two(devp->rdev.lldi.vr->ocq.size);
+               devp->rdev.oc_mw_kva = ioremap_wc(devp->rdev.oc_mw_pa,
+                       devp->rdev.lldi.vr->ocq.size);
+               if (!devp->rdev.oc_mw_kva) {
+                       pr_err(MOD "Unable to ioremap onchip mem\n");
+                       return ERR_PTR(-EINVAL);
+               }
+       }
 
        PDBG(KERN_INFO MOD "ocq memory: "
               "hw_start 0x%x size %u mw_pa 0x%lx mw_kva %p\n",
@@ -1003,9 +1026,11 @@ static int enable_qp_db(int id, void *p, void *data)
 static void resume_rc_qp(struct c4iw_qp *qp)
 {
        spin_lock(&qp->lock);
-       t4_ring_sq_db(&qp->wq, qp->wq.sq.wq_pidx_inc);
+       t4_ring_sq_db(&qp->wq, qp->wq.sq.wq_pidx_inc,
+                     is_t5(qp->rhp->rdev.lldi.adapter_type), NULL);
        qp->wq.sq.wq_pidx_inc = 0;
-       t4_ring_rq_db(&qp->wq, qp->wq.rq.wq_pidx_inc);
+       t4_ring_rq_db(&qp->wq, qp->wq.rq.wq_pidx_inc,
+                     is_t5(qp->rhp->rdev.lldi.adapter_type), NULL);
        qp->wq.rq.wq_pidx_inc = 0;
        spin_unlock(&qp->lock);
 }
index e872203c5424926b4e04f13171a20aac5702e7d1..7b8c5806a09d84d912d274d4a5da814109e921b8 100644 (file)
@@ -149,6 +149,8 @@ struct c4iw_rdev {
        struct gen_pool *ocqp_pool;
        u32 flags;
        struct cxgb4_lld_info lldi;
+       unsigned long bar2_pa;
+       void __iomem *bar2_kva;
        unsigned long oc_mw_pa;
        void __iomem *oc_mw_kva;
        struct c4iw_stats stats;
index cb76eb5eee1fd6d09fa94015eff08dbeb0a2cb8b..e2fcbf4814f27962367d9efead102e169c05c8ed 100644 (file)
@@ -212,13 +212,23 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
 
        wq->db = rdev->lldi.db_reg;
        wq->gts = rdev->lldi.gts_reg;
-       if (user) {
-               wq->sq.udb = (u64)pci_resource_start(rdev->lldi.pdev, 2) +
-                                       (wq->sq.qid << rdev->qpshift);
-               wq->sq.udb &= PAGE_MASK;
-               wq->rq.udb = (u64)pci_resource_start(rdev->lldi.pdev, 2) +
-                                       (wq->rq.qid << rdev->qpshift);
-               wq->rq.udb &= PAGE_MASK;
+       if (user || is_t5(rdev->lldi.adapter_type)) {
+               u32 off;
+
+               off = (wq->sq.qid << rdev->qpshift) & PAGE_MASK;
+               if (user) {
+                       wq->sq.udb = (u64 __iomem *)(rdev->bar2_pa + off);
+               } else {
+                       off += 128 * (wq->sq.qid & rdev->qpmask) + 8;
+                       wq->sq.udb = (u64 __iomem *)(rdev->bar2_kva + off);
+               }
+               off = (wq->rq.qid << rdev->qpshift) & PAGE_MASK;
+               if (user) {
+                       wq->rq.udb = (u64 __iomem *)(rdev->bar2_pa + off);
+               } else {
+                       off += 128 * (wq->rq.qid & rdev->qpmask) + 8;
+                       wq->rq.udb = (u64 __iomem *)(rdev->bar2_kva + off);
+               }
        }
        wq->rdev = rdev;
        wq->rq.msn = 1;
@@ -299,9 +309,10 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
        if (ret)
                goto free_dma;
 
-       PDBG("%s sqid 0x%x rqid 0x%x kdb 0x%p squdb 0x%llx rqudb 0x%llx\n",
+       PDBG("%s sqid 0x%x rqid 0x%x kdb 0x%p squdb 0x%lx rqudb 0x%lx\n",
             __func__, wq->sq.qid, wq->rq.qid, wq->db,
-            (unsigned long long)wq->sq.udb, (unsigned long long)wq->rq.udb);
+            (__force unsigned long) wq->sq.udb,
+            (__force unsigned long) wq->rq.udb);
 
        return 0;
 free_dma:
@@ -650,9 +661,10 @@ static int ring_kernel_sq_db(struct c4iw_qp *qhp, u16 inc)
 
        spin_lock_irqsave(&qhp->rhp->lock, flags);
        spin_lock(&qhp->lock);
-       if (qhp->rhp->db_state == NORMAL) {
-               t4_ring_sq_db(&qhp->wq, inc);
-       } else {
+       if (qhp->rhp->db_state == NORMAL)
+               t4_ring_sq_db(&qhp->wq, inc,
+                             is_t5(qhp->rhp->rdev.lldi.adapter_type), NULL);
+       else {
                add_to_fc_list(&qhp->rhp->db_fc_list, &qhp->db_fc_entry);
                qhp->wq.sq.wq_pidx_inc += inc;
        }
@@ -667,9 +679,10 @@ static int ring_kernel_rq_db(struct c4iw_qp *qhp, u16 inc)
 
        spin_lock_irqsave(&qhp->rhp->lock, flags);
        spin_lock(&qhp->lock);
-       if (qhp->rhp->db_state == NORMAL) {
-               t4_ring_rq_db(&qhp->wq, inc);
-       } else {
+       if (qhp->rhp->db_state == NORMAL)
+               t4_ring_rq_db(&qhp->wq, inc,
+                             is_t5(qhp->rhp->rdev.lldi.adapter_type), NULL);
+       else {
                add_to_fc_list(&qhp->rhp->db_fc_list, &qhp->db_fc_entry);
                qhp->wq.rq.wq_pidx_inc += inc;
        }
@@ -686,7 +699,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
        enum fw_wr_opcodes fw_opcode = 0;
        enum fw_ri_wr_flags fw_flags;
        struct c4iw_qp *qhp;
-       union t4_wr *wqe;
+       union t4_wr *wqe = NULL;
        u32 num_wrs;
        struct t4_swsqe *swsqe;
        unsigned long flag;
@@ -792,7 +805,8 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE);
        }
        if (!qhp->rhp->rdev.status_page->db_off) {
-               t4_ring_sq_db(&qhp->wq, idx);
+               t4_ring_sq_db(&qhp->wq, idx,
+                             is_t5(qhp->rhp->rdev.lldi.adapter_type), wqe);
                spin_unlock_irqrestore(&qhp->lock, flag);
        } else {
                spin_unlock_irqrestore(&qhp->lock, flag);
@@ -806,7 +820,7 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
 {
        int err = 0;
        struct c4iw_qp *qhp;
-       union t4_recv_wr *wqe;
+       union t4_recv_wr *wqe = NULL;
        u32 num_wrs;
        u8 len16 = 0;
        unsigned long flag;
@@ -858,7 +872,8 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
                num_wrs--;
        }
        if (!qhp->rhp->rdev.status_page->db_off) {
-               t4_ring_rq_db(&qhp->wq, idx);
+               t4_ring_rq_db(&qhp->wq, idx,
+                             is_t5(qhp->rhp->rdev.lldi.adapter_type), wqe);
                spin_unlock_irqrestore(&qhp->lock, flag);
        } else {
                spin_unlock_irqrestore(&qhp->lock, flag);
@@ -1677,11 +1692,11 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
                mm2->len = PAGE_ALIGN(qhp->wq.rq.memsize);
                insert_mmap(ucontext, mm2);
                mm3->key = uresp.sq_db_gts_key;
-               mm3->addr = qhp->wq.sq.udb;
+               mm3->addr = (__force unsigned long) qhp->wq.sq.udb;
                mm3->len = PAGE_SIZE;
                insert_mmap(ucontext, mm3);
                mm4->key = uresp.rq_db_gts_key;
-               mm4->addr = qhp->wq.rq.udb;
+               mm4->addr = (__force unsigned long) qhp->wq.rq.udb;
                mm4->len = PAGE_SIZE;
                insert_mmap(ucontext, mm4);
                if (mm5) {
index eeca8b1e63764cbbd7e8f1a0760028730bd9e944..931bfd105c49549a1530f4e19266d4954e1abc3d 100644 (file)
@@ -292,7 +292,7 @@ struct t4_sq {
        unsigned long phys_addr;
        struct t4_swsqe *sw_sq;
        struct t4_swsqe *oldest_read;
-       u64 udb;
+       u64 __iomem *udb;
        size_t memsize;
        u32 qid;
        u16 in_use;
@@ -314,7 +314,7 @@ struct t4_rq {
        dma_addr_t dma_addr;
        DEFINE_DMA_UNMAP_ADDR(mapping);
        struct t4_swrqe *sw_rq;
-       u64 udb;
+       u64 __iomem *udb;
        size_t memsize;
        u32 qid;
        u32 msn;
@@ -435,15 +435,67 @@ static inline u16 t4_sq_wq_size(struct t4_wq *wq)
                return wq->sq.size * T4_SQ_NUM_SLOTS;
 }
 
-static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc)
+/* This function copies 64 byte coalesced work request to memory
+ * mapped BAR2 space. For coalesced WRs, the SGE fetches data
+ * from the FIFO instead of from Host.
+ */
+static inline void pio_copy(u64 __iomem *dst, u64 *src)
+{
+       int count = 8;
+
+       while (count) {
+               writeq(*src, dst);
+               src++;
+               dst++;
+               count--;
+       }
+}
+
+static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc, u8 t5,
+                                union t4_wr *wqe)
 {
+
+       /* Flush host queue memory writes. */
        wmb();
+       if (t5) {
+               if (inc == 1 && wqe) {
+                       PDBG("%s: WC wq->sq.pidx = %d\n",
+                            __func__, wq->sq.pidx);
+                       pio_copy(wq->sq.udb + 7, (void *)wqe);
+               } else {
+                       PDBG("%s: DB wq->sq.pidx = %d\n",
+                            __func__, wq->sq.pidx);
+                       writel(PIDX_T5(inc), wq->sq.udb);
+               }
+
+               /* Flush user doorbell area writes. */
+               wmb();
+               return;
+       }
        writel(QID(wq->sq.qid) | PIDX(inc), wq->db);
 }
 
-static inline void t4_ring_rq_db(struct t4_wq *wq, u16 inc)
+static inline void t4_ring_rq_db(struct t4_wq *wq, u16 inc, u8 t5,
+                                union t4_recv_wr *wqe)
 {
+
+       /* Flush host queue memory writes. */
        wmb();
+       if (t5) {
+               if (inc == 1 && wqe) {
+                       PDBG("%s: WC wq->rq.pidx = %d\n",
+                            __func__, wq->rq.pidx);
+                       pio_copy(wq->rq.udb + 7, (void *)wqe);
+               } else {
+                       PDBG("%s: DB wq->rq.pidx = %d\n",
+                            __func__, wq->rq.pidx);
+                       writel(PIDX_T5(inc), wq->rq.udb);
+               }
+
+               /* Flush user doorbell area writes. */
+               wmb();
+               return;
+       }
        writel(QID(wq->rq.qid) | PIDX(inc), wq->db);
 }