Merge HFI1 updates into k.o/for-next

author Jason Gunthorpe <jgg@mellanox.com>

Wed, 3 Apr 2019 18:28:05 +0000 (15:28 -0300)

committer Jason Gunthorpe <jgg@mellanox.com>

Wed, 3 Apr 2019 18:28:05 +0000 (15:28 -0300)
author Jason Gunthorpe <jgg@mellanox.com>
Wed, 3 Apr 2019 18:28:05 +0000 (15:28 -0300)
committer Jason Gunthorpe <jgg@mellanox.com>
Wed, 3 Apr 2019 18:28:05 +0000 (15:28 -0300)
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c

index 6150567c0b51a64d362488a288c6cd3e838a521c..229d5d4cafe80e6c29099147dff651aec2304f51 100644 (file)
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -13232,7 +13232,7 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
         int total_contexts;
         int ret;
         unsigned ngroups;
-       int qos_rmt_count;
+       int rmt_count;
         int user_rmt_reduced;
         u32 n_usr_ctxts;
         u32 send_contexts = chip_send_contexts(dd);
@@ -13294,10 +13294,23 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
                 n_usr_ctxts = rcv_contexts - total_contexts;
         }
  
-       /* each user context requires an entry in the RMT */
-       qos_rmt_count = qos_rmt_entries(dd, NULL, NULL);
-       if (qos_rmt_count + n_usr_ctxts > NUM_MAP_ENTRIES) {
-               user_rmt_reduced = NUM_MAP_ENTRIES - qos_rmt_count;
+       /*
+        * The RMT entries are currently allocated as shown below:
+        * 1. QOS (0 to 128 entries);
+        * 2. FECN (num_kernel_context - 1 + num_user_contexts +
+        *    num_vnic_contexts);
+        * 3. VNIC (num_vnic_contexts).
+        * It should be noted that FECN oversubscribe num_vnic_contexts
+        * entries of RMT because both VNIC and PSM could allocate any receive
+        * context between dd->first_dyn_alloc_text and dd->num_rcv_contexts,
+        * and PSM FECN must reserve an RMT entry for each possible PSM receive
+        * context.
+        */
+       rmt_count = qos_rmt_entries(dd, NULL, NULL) + (num_vnic_contexts * 2);
+       if (HFI1_CAP_IS_KSET(TID_RDMA))
+               rmt_count += num_kernel_contexts - 1;
+       if (rmt_count + n_usr_ctxts > NUM_MAP_ENTRIES) {
+               user_rmt_reduced = NUM_MAP_ENTRIES - rmt_count;
                 dd_dev_err(dd,
                            "RMT size is reducing the number of user receive contexts from %u to %d\n",
                            n_usr_ctxts,
@@ -14278,35 +14291,43 @@ bail:
         init_qpmap_table(dd, FIRST_KERNEL_KCTXT, dd->n_krcv_queues - 1);
  }
  
-static void init_user_fecn_handling(struct hfi1_devdata *dd,
-                                   struct rsm_map_table *rmt)
+static void init_fecn_handling(struct hfi1_devdata *dd,
+                              struct rsm_map_table *rmt)
  {
         struct rsm_rule_data rrd;
         u64 reg;
-       int i, idx, regoff, regidx;
+       int i, idx, regoff, regidx, start;
         u8 offset;
+       u32 total_cnt;
+
+       if (HFI1_CAP_IS_KSET(TID_RDMA))
+               /* Exclude context 0 */
+               start = 1;
+       else
+               start = dd->first_dyn_alloc_ctxt;
+
+       total_cnt = dd->num_rcv_contexts - start;
  
         /* there needs to be enough room in the map table */
-       if (rmt->used + dd->num_user_contexts >= NUM_MAP_ENTRIES) {
-               dd_dev_err(dd, "User FECN handling disabled - too many user contexts allocated\n");
+       if (rmt->used + total_cnt >= NUM_MAP_ENTRIES) {
+               dd_dev_err(dd, "FECN handling disabled - too many contexts allocated\n");
                 return;
         }
  
         /*
          * RSM will extract the destination context as an index into the
          * map table.  The destination contexts are a sequential block
-        * in the range first_dyn_alloc_ctxt...num_rcv_contexts-1 (inclusive).
+        * in the range start...num_rcv_contexts-1 (inclusive).
          * Map entries are accessed as offset + extracted value.  Adjust
          * the added offset so this sequence can be placed anywhere in
          * the table - as long as the entries themselves do not wrap.
          * There are only enough bits in offset for the table size, so
          * start with that to allow for a "negative" offset.
          */
-       offset = (u8)(NUM_MAP_ENTRIES + (int)rmt->used -
-                                               (int)dd->first_dyn_alloc_ctxt);
+       offset = (u8)(NUM_MAP_ENTRIES + rmt->used - start);
  
-       for (i = dd->first_dyn_alloc_ctxt, idx = rmt->used;
-                               i < dd->num_rcv_contexts; i++, idx++) {
+       for (i = start, idx = rmt->used; i < dd->num_rcv_contexts;
+            i++, idx++) {
                 /* replace with identity mapping */
                 regoff = (idx % 8) * 8;
                 regidx = idx / 8;
@@ -14341,7 +14362,7 @@ static void init_user_fecn_handling(struct hfi1_devdata *dd,
         /* add rule 1 */
         add_rsm_rule(dd, RSM_INS_FECN, &rrd);
  
-       rmt->used += dd->num_user_contexts;
+       rmt->used += total_cnt;
  }
  
  /* Initialize RSM for VNIC */
@@ -14428,7 +14449,7 @@ static void init_rxe(struct hfi1_devdata *dd)
         rmt = alloc_rsm_map_table(dd);
         /* set up QOS, including the QPN map table */
         init_qos(dd, rmt);
-       init_user_fecn_handling(dd, rmt);
+       init_fecn_handling(dd, rmt);
         complete_rsm_map_table(dd, rmt);
         /* record number of used rsm map entries for vnic */
         dd->vnic.rmt_start = rmt->used;
diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c

index 867b4e10018f0c06c85f712b7c38ad67b4420374..129e48ec9ee0fbded8f126e29b016d412c2dea37 100644 (file)
--- a/drivers/infiniband/hw/hfi1/driver.c
+++ b/drivers/infiniband/hw/hfi1/driver.c
@@ -514,7 +514,9 @@ bool hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
          */
         do_cnp = prescan ||
                 (opcode >= IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST &&
-                opcode <= IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE);
+                opcode <= IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE) ||
+               opcode == TID_OP(READ_RESP) ||
+               opcode == TID_OP(ACK);
  
         /* Call appropriate CNP handler */
         if (!ignore_fecn && do_cnp && fecn)
diff --git a/drivers/infiniband/hw/hfi1/exp_rcv.c b/drivers/infiniband/hw/hfi1/exp_rcv.c

index 1be49a0d9c11a44a41169b8a8e38c33b0700c247..e9d5cc8b771a2227ab27b711e8b9d82b323eef6a 100644 (file)
--- a/drivers/infiniband/hw/hfi1/exp_rcv.c
+++ b/drivers/infiniband/hw/hfi1/exp_rcv.c
@@ -112,9 +112,6 @@ int hfi1_alloc_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd)
   */
  void hfi1_free_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd)
  {
-       WARN_ON(!EXP_TID_SET_EMPTY(rcd->tid_full_list));
-       WARN_ON(!EXP_TID_SET_EMPTY(rcd->tid_used_list));
-
         kfree(rcd->groups);
         rcd->groups = NULL;
         hfi1_exp_tid_group_init(rcd);
diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c

index 1390172b488e20ad1aa1958651f39a9fd98df2c3..4e0e9fc0a777c2b4f1184964333797115242ea57 100644 (file)
--- a/drivers/infiniband/hw/hfi1/qp.c
+++ b/drivers/infiniband/hw/hfi1/qp.c
@@ -900,7 +900,9 @@ void notify_error_qp(struct rvt_qp *qp)
                 if (!list_empty(&priv->s_iowait.list) &&
                     !(qp->s_flags & RVT_S_BUSY) &&
                     !(priv->s_flags & RVT_S_BUSY)) {
-                       qp->s_flags &= ~RVT_S_ANY_WAIT_IO;
+                       qp->s_flags &= ~HFI1_S_ANY_WAIT_IO;
+                       iowait_clear_flag(&priv->s_iowait, IOWAIT_PENDING_IB);
+                       iowait_clear_flag(&priv->s_iowait, IOWAIT_PENDING_TID);
                         list_del_init(&priv->s_iowait.list);
                         priv->s_iowait.lock = NULL;
                         rvt_put_qp(qp);
diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c

index e6726c1ab8669a66722835b43d8b6b3481a11754..5ba39a9f65adcd4ea71aefa58dd597419d9d911e 100644 (file)
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
@@ -140,10 +140,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
         case OP(RDMA_READ_RESPONSE_LAST):
         case OP(RDMA_READ_RESPONSE_ONLY):
                 e = &qp->s_ack_queue[qp->s_tail_ack_queue];
-               if (e->rdma_sge.mr) {
-                       rvt_put_mr(e->rdma_sge.mr);
-                       e->rdma_sge.mr = NULL;
-               }
+               release_rdma_sge_mr(e);
                 /* FALLTHROUGH */
         case OP(ATOMIC_ACKNOWLEDGE):
                 /*
@@ -343,7 +340,8 @@ write_resp:
                         break;
  
                 e->sent = 1;
-               qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
+               /* Do not free e->rdma_sge until all data are received */
+               qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE);
                 break;
  
         case TID_OP(READ_RESP):
@@ -2643,10 +2641,7 @@ static noinline int rc_rcv_error(struct ib_other_headers *ohdr, void *data,
                 len = be32_to_cpu(reth->length);
                 if (unlikely(offset + len != e->rdma_sge.sge_length))
                         goto unlock_done;
-               if (e->rdma_sge.mr) {
-                       rvt_put_mr(e->rdma_sge.mr);
-                       e->rdma_sge.mr = NULL;
-               }
+               release_rdma_sge_mr(e);
                 if (len != 0) {
                         u32 rkey = be32_to_cpu(reth->rkey);
                         u64 vaddr = get_ib_reth_vaddr(reth);
@@ -3088,10 +3083,7 @@ send_last:
                         update_ack_queue(qp, next);
                 }
                 e = &qp->s_ack_queue[qp->r_head_ack_queue];
-               if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) {
-                       rvt_put_mr(e->rdma_sge.mr);
-                       e->rdma_sge.mr = NULL;
-               }
+               release_rdma_sge_mr(e);
                 reth = &ohdr->u.rc.reth;
                 len = be32_to_cpu(reth->length);
                 if (len) {
@@ -3166,10 +3158,7 @@ send_last:
                         update_ack_queue(qp, next);
                 }
                 e = &qp->s_ack_queue[qp->r_head_ack_queue];
-               if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) {
-                       rvt_put_mr(e->rdma_sge.mr);
-                       e->rdma_sge.mr = NULL;
-               }
+               release_rdma_sge_mr(e);
                 /* Process OPFN special virtual address */
                 if (opfn) {
                         opfn_conn_response(qp, e, ateth);
diff --git a/drivers/infiniband/hw/hfi1/rc.h b/drivers/infiniband/hw/hfi1/rc.h

index 8e0935b9bf2a6166881580f5780a39e3c38b3e37..5ed5e85d58413b57155672c637359e0b49ada25f 100644 (file)
--- a/drivers/infiniband/hw/hfi1/rc.h
+++ b/drivers/infiniband/hw/hfi1/rc.h
@@ -41,6 +41,14 @@ static inline u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe,
         return rvt_restart_sge(ss, wqe, len);
  }
  
+static inline void release_rdma_sge_mr(struct rvt_ack_entry *e)
+{
+       if (e->rdma_sge.mr) {
+               rvt_put_mr(e->rdma_sge.mr);
+               e->rdma_sge.mr = NULL;
+       }
+}
+
  struct rvt_ack_entry *find_prev_entry(struct rvt_qp *qp, u32 psn, u8 *prev,
                                       u8 *prev_ack, bool *scheduled);
  int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, u64 val,
diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c

index fdda33aca77f2031ea2357435c029de0508ffdec..eae6f05ca2fa1e018acde5c82a36e9d60458a034 100644 (file)
--- a/drivers/infiniband/hw/hfi1/tid_rdma.c
+++ b/drivers/infiniband/hw/hfi1/tid_rdma.c
@@ -67,8 +67,6 @@ static u32 mask_generation(u32 a)
  #define TID_RDMA_DESTQP_FLOW_SHIFT      11
  #define TID_RDMA_DESTQP_FLOW_MASK       0x1f
  
-#define TID_FLOW_SW_PSN BIT(0)
-
  #define TID_OPFN_QP_CTXT_MASK 0xff
  #define TID_OPFN_QP_CTXT_SHIFT 56
  #define TID_OPFN_QP_KDETH_MASK 0xff
@@ -128,6 +126,15 @@ static int make_tid_rdma_ack(struct rvt_qp *qp,
                              struct ib_other_headers *ohdr,
                              struct hfi1_pkt_state *ps);
  static void hfi1_do_tid_send(struct rvt_qp *qp);
+static u32 read_r_next_psn(struct hfi1_devdata *dd, u8 ctxt, u8 fidx);
+static void tid_rdma_rcv_err(struct hfi1_packet *packet,
+                            struct ib_other_headers *ohdr,
+                            struct rvt_qp *qp, u32 psn, int diff, bool fecn);
+static void update_r_next_psn_fecn(struct hfi1_packet *packet,
+                                  struct hfi1_qp_priv *priv,
+                                  struct hfi1_ctxtdata *rcd,
+                                  struct tid_rdma_flow *flow,
+                                  bool fecn);
  
  static u64 tid_rdma_opfn_encode(struct tid_rdma_params *p)
  {
@@ -776,7 +783,6 @@ int hfi1_kern_setup_hw_flow(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp)
                 rcd->flows[fs->index].generation = fs->generation;
         fs->generation = kern_setup_hw_flow(rcd, fs->index);
         fs->psn = 0;
-       fs->flags = 0;
         dequeue_tid_waiter(rcd, &rcd->flow_queue, qp);
         /* get head before dropping lock */
         fqp = first_qp(rcd, &rcd->flow_queue);
@@ -1807,6 +1813,7 @@ sync_check:
                         goto done;
  
                 hfi1_kern_clear_hw_flow(req->rcd, qp);
+               qpriv->s_flags &= ~HFI1_R_TID_SW_PSN;
                 req->state = TID_REQUEST_ACTIVE;
         }
  
@@ -2036,10 +2043,7 @@ static int tid_rdma_rcv_error(struct hfi1_packet *packet,
                 if (psn != e->psn || len != req->total_len)
                         goto unlock;
  
-               if (e->rdma_sge.mr) {
-                       rvt_put_mr(e->rdma_sge.mr);
-                       e->rdma_sge.mr = NULL;
-               }
+               release_rdma_sge_mr(e);
  
                 rkey = be32_to_cpu(reth->rkey);
                 vaddr = get_ib_reth_vaddr(reth);
@@ -2238,7 +2242,7 @@ void hfi1_rc_rcv_tid_rdma_read_req(struct hfi1_packet *packet)
         struct ib_reth *reth;
         struct hfi1_qp_priv *qpriv = qp->priv;
         u32 bth0, psn, len, rkey;
-       bool is_fecn;
+       bool fecn;
         u8 next;
         u64 vaddr;
         int diff;
@@ -2248,7 +2252,7 @@ void hfi1_rc_rcv_tid_rdma_read_req(struct hfi1_packet *packet)
         if (hfi1_ruc_check_hdr(ibp, packet))
                 return;
  
-       is_fecn = process_ecn(qp, packet);
+       fecn = process_ecn(qp, packet);
         psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
         trace_hfi1_rsp_rcv_tid_read_req(qp, psn);
  
@@ -2267,9 +2271,8 @@ void hfi1_rc_rcv_tid_rdma_read_req(struct hfi1_packet *packet)
  
         diff = delta_psn(psn, qp->r_psn);
         if (unlikely(diff)) {
-               if (tid_rdma_rcv_error(packet, ohdr, qp, psn, diff))
-                       return;
-               goto send_ack;
+               tid_rdma_rcv_err(packet, ohdr, qp, psn, diff, fecn);
+               return;
         }
  
         /* We've verified the request, insert it into the ack queue. */
@@ -2285,10 +2288,7 @@ void hfi1_rc_rcv_tid_rdma_read_req(struct hfi1_packet *packet)
                 update_ack_queue(qp, next);
         }
         e = &qp->s_ack_queue[qp->r_head_ack_queue];
-       if (e->rdma_sge.mr) {
-               rvt_put_mr(e->rdma_sge.mr);
-               e->rdma_sge.mr = NULL;
-       }
+       release_rdma_sge_mr(e);
  
         rkey = be32_to_cpu(reth->rkey);
         qp->r_len = len;
@@ -2324,11 +2324,11 @@ void hfi1_rc_rcv_tid_rdma_read_req(struct hfi1_packet *packet)
  
         /* Schedule the send tasklet. */
         qp->s_flags |= RVT_S_RESP_PENDING;
+       if (fecn)
+               qp->s_flags |= RVT_S_ECN;
         hfi1_schedule_send(qp);
  
         spin_unlock_irqrestore(&qp->s_lock, flags);
-       if (is_fecn)
-               goto send_ack;
         return;
  
  nack_inv_unlock:
@@ -2345,8 +2345,6 @@ nack_acc:
         rvt_rc_error(qp, IB_WC_LOC_PROT_ERR);
         qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
         qp->r_ack_psn = qp->r_psn;
-send_ack:
-       hfi1_send_rc_ack(packet, is_fecn);
  }
  
  u32 hfi1_build_tid_rdma_read_resp(struct rvt_qp *qp, struct rvt_ack_entry *e,
@@ -2463,12 +2461,12 @@ void hfi1_rc_rcv_tid_rdma_read_resp(struct hfi1_packet *packet)
         struct tid_rdma_request *req;
         struct tid_rdma_flow *flow;
         u32 opcode, aeth;
-       bool is_fecn;
+       bool fecn;
         unsigned long flags;
         u32 kpsn, ipsn;
  
         trace_hfi1_sender_rcv_tid_read_resp(qp);
-       is_fecn = process_ecn(qp, packet);
+       fecn = process_ecn(qp, packet);
         kpsn = mask_psn(be32_to_cpu(ohdr->bth[2]));
         aeth = be32_to_cpu(ohdr->u.tid_rdma.r_rsp.aeth);
         opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff;
@@ -2481,8 +2479,43 @@ void hfi1_rc_rcv_tid_rdma_read_resp(struct hfi1_packet *packet)
  
         flow = &req->flows[req->clear_tail];
         /* When header suppression is disabled */
-       if (cmp_psn(ipsn, flow->flow_state.ib_lpsn))
+       if (cmp_psn(ipsn, flow->flow_state.ib_lpsn)) {
+               update_r_next_psn_fecn(packet, priv, rcd, flow, fecn);
+
+               if (cmp_psn(kpsn, flow->flow_state.r_next_psn))
+                       goto ack_done;
+               flow->flow_state.r_next_psn = mask_psn(kpsn + 1);
+               /*
+                * Copy the payload to destination buffer if this packet is
+                * delivered as an eager packet due to RSM rule and FECN.
+                * The RSM rule selects FECN bit in BTH and SH bit in
+                * KDETH header and therefore will not match the last
+                * packet of each segment that has SH bit cleared.
+                */
+               if (fecn && packet->etype == RHF_RCV_TYPE_EAGER) {
+                       struct rvt_sge_state ss;
+                       u32 len;
+                       u32 tlen = packet->tlen;
+                       u16 hdrsize = packet->hlen;
+                       u8 pad = packet->pad;
+                       u8 extra_bytes = pad + packet->extra_byte +
+                               (SIZE_OF_CRC << 2);
+                       u32 pmtu = qp->pmtu;
+
+                       if (unlikely(tlen != (hdrsize + pmtu + extra_bytes)))
+                               goto ack_op_err;
+                       len = restart_sge(&ss, req->e.swqe, ipsn, pmtu);
+                       if (unlikely(len < pmtu))
+                               goto ack_op_err;
+                       rvt_copy_sge(qp, &ss, packet->payload, pmtu, false,
+                                    false);
+                       /* Raise the sw sequence check flag for next packet */
+                       priv->s_flags |= HFI1_R_TID_SW_PSN;
+               }
+
                 goto ack_done;
+       }
+       flow->flow_state.r_next_psn = mask_psn(kpsn + 1);
         req->ack_pending--;
         priv->pending_tid_r_segs--;
         qp->s_num_rd_atomic--;
@@ -2524,6 +2557,7 @@ void hfi1_rc_rcv_tid_rdma_read_resp(struct hfi1_packet *packet)
              req->comp_seg == req->cur_seg) ||
             priv->tid_r_comp == priv->tid_r_reqs) {
                 hfi1_kern_clear_hw_flow(priv->rcd, qp);
+               priv->s_flags &= ~HFI1_R_TID_SW_PSN;
                 if (req->state == TID_REQUEST_SYNC)
                         req->state = TID_REQUEST_ACTIVE;
         }
@@ -2545,8 +2579,6 @@ ack_op_err:
  
  ack_done:
         spin_unlock_irqrestore(&qp->s_lock, flags);
-       if (is_fecn)
-               hfi1_send_rc_ack(packet, is_fecn);
  }
  
  void hfi1_kern_read_tid_flow_free(struct rvt_qp *qp)
@@ -2773,9 +2805,9 @@ static bool handle_read_kdeth_eflags(struct hfi1_ctxtdata *rcd,
                                 rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
                                 return ret;
                         }
-                       if (priv->flow_state.flags & TID_FLOW_SW_PSN) {
+                       if (priv->s_flags & HFI1_R_TID_SW_PSN) {
                                 diff = cmp_psn(psn,
-                                              priv->flow_state.r_next_psn);
+                                              flow->flow_state.r_next_psn);
                                 if (diff > 0) {
                                         if (!(qp->r_flags & RVT_R_RDMAR_SEQ))
                                                 restart_tid_rdma_read_req(rcd,
@@ -2811,22 +2843,15 @@ static bool handle_read_kdeth_eflags(struct hfi1_ctxtdata *rcd,
                                                 qp->r_flags &=
                                                         ~RVT_R_RDMAR_SEQ;
                                 }
-                               priv->flow_state.r_next_psn++;
+                               flow->flow_state.r_next_psn =
+                                       mask_psn(psn + 1);
                         } else {
-                               u64 reg;
                                 u32 last_psn;
  
-                               /*
-                                * The only sane way to get the amount of
-                                * progress is to read the HW flow state.
-                                */
-                               reg = read_uctxt_csr(dd, rcd->ctxt,
-                                                    RCV_TID_FLOW_TABLE +
-                                                    (8 * flow->idx));
-                               last_psn = mask_psn(reg);
-
-                               priv->flow_state.r_next_psn = last_psn;
-                               priv->flow_state.flags |= TID_FLOW_SW_PSN;
+                               last_psn = read_r_next_psn(dd, rcd->ctxt,
+                                                          flow->idx);
+                               flow->flow_state.r_next_psn = last_psn;
+                               priv->s_flags |= HFI1_R_TID_SW_PSN;
                                 /*
                                  * If no request has been restarted yet,
                                  * restart the current one.
@@ -2891,6 +2916,7 @@ bool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd,
         struct rvt_ack_entry *e;
         struct tid_rdma_request *req;
         struct tid_rdma_flow *flow;
+       int diff = 0;
  
         trace_hfi1_msg_handle_kdeth_eflags(NULL, "Kdeth error: rhf ",
                                            packet->rhf);
@@ -2974,17 +3000,10 @@ bool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd,
                 switch (rte) {
                 case RHF_RTE_EXPECTED_FLOW_SEQ_ERR:
                         if (!(qpriv->s_flags & HFI1_R_TID_SW_PSN)) {
-                               u64 reg;
-
                                 qpriv->s_flags |= HFI1_R_TID_SW_PSN;
-                               /*
-                                * The only sane way to get the amount of
-                                * progress is to read the HW flow state.
-                                */
-                               reg = read_uctxt_csr(dd, rcd->ctxt,
-                                                    RCV_TID_FLOW_TABLE +
-                                                    (8 * flow->idx));
-                               flow->flow_state.r_next_psn = mask_psn(reg);
+                               flow->flow_state.r_next_psn =
+                                       read_r_next_psn(dd, rcd->ctxt,
+                                                       flow->idx);
                                 qpriv->r_next_psn_kdeth =
                                         flow->flow_state.r_next_psn;
                                 goto nak_psn;
@@ -2997,10 +3016,12 @@ bool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd,
                                  * mismatch could be due to packets that were
                                  * already in flight.
                                  */
-                               if (psn != flow->flow_state.r_next_psn) {
-                                       psn = flow->flow_state.r_next_psn;
+                               diff = cmp_psn(psn,
+                                              flow->flow_state.r_next_psn);
+                               if (diff > 0)
                                         goto nak_psn;
-                               }
+                               else if (diff < 0)
+                                       break;
  
                                 qpriv->s_nak_state = 0;
                                 /*
@@ -3011,8 +3032,10 @@ bool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd,
                                 if (psn == full_flow_psn(flow,
                                                          flow->flow_state.lpsn))
                                         ret = false;
+                               flow->flow_state.r_next_psn =
+                                       mask_psn(psn + 1);
                                 qpriv->r_next_psn_kdeth =
-                                       ++flow->flow_state.r_next_psn;
+                                       flow->flow_state.r_next_psn;
                         }
                         break;
  
@@ -3517,8 +3540,10 @@ static void hfi1_tid_write_alloc_resources(struct rvt_qp *qp, bool intr_ctx)
                 if (qpriv->r_tid_alloc == qpriv->r_tid_head) {
                         /* If all data has been received, clear the flow */
                         if (qpriv->flow_state.index < RXE_NUM_TID_FLOWS &&
-                           !qpriv->alloc_w_segs)
+                           !qpriv->alloc_w_segs) {
                                 hfi1_kern_clear_hw_flow(rcd, qp);
+                               qpriv->s_flags &= ~HFI1_R_TID_SW_PSN;
+                       }
                         break;
                 }
  
@@ -3544,8 +3569,7 @@ static void hfi1_tid_write_alloc_resources(struct rvt_qp *qp, bool intr_ctx)
                 if (qpriv->sync_pt && !qpriv->alloc_w_segs) {
                         hfi1_kern_clear_hw_flow(rcd, qp);
                         qpriv->sync_pt = false;
-                       if (qpriv->s_flags & HFI1_R_TID_SW_PSN)
-                               qpriv->s_flags &= ~HFI1_R_TID_SW_PSN;
+                       qpriv->s_flags &= ~HFI1_R_TID_SW_PSN;
                 }
  
                 /* Allocate flow if we don't have one */
@@ -3687,7 +3711,7 @@ void hfi1_rc_rcv_tid_rdma_write_req(struct hfi1_packet *packet)
         struct hfi1_qp_priv *qpriv = qp->priv;
         struct tid_rdma_request *req;
         u32 bth0, psn, len, rkey, num_segs;
-       bool is_fecn;
+       bool fecn;
         u8 next;
         u64 vaddr;
         int diff;
@@ -3696,7 +3720,7 @@ void hfi1_rc_rcv_tid_rdma_write_req(struct hfi1_packet *packet)
         if (hfi1_ruc_check_hdr(ibp, packet))
                 return;
  
-       is_fecn = process_ecn(qp, packet);
+       fecn = process_ecn(qp, packet);
         psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
         trace_hfi1_rsp_rcv_tid_write_req(qp, psn);
  
@@ -3713,9 +3737,8 @@ void hfi1_rc_rcv_tid_rdma_write_req(struct hfi1_packet *packet)
         num_segs = DIV_ROUND_UP(len, qpriv->tid_rdma.local.max_len);
         diff = delta_psn(psn, qp->r_psn);
         if (unlikely(diff)) {
-               if (tid_rdma_rcv_error(packet, ohdr, qp, psn, diff))
-                       return;
-               goto send_ack;
+               tid_rdma_rcv_err(packet, ohdr, qp, psn, diff, fecn);
+               return;
         }
  
         /*
@@ -3751,10 +3774,7 @@ void hfi1_rc_rcv_tid_rdma_write_req(struct hfi1_packet *packet)
                 goto update_head;
         }
  
-       if (e->rdma_sge.mr) {
-               rvt_put_mr(e->rdma_sge.mr);
-               e->rdma_sge.mr = NULL;
-       }
+       release_rdma_sge_mr(e);
  
         /* The length needs to be in multiples of PAGE_SIZE */
         if (!len || len & ~PAGE_MASK)
@@ -3834,11 +3854,11 @@ update_head:
  
         /* Schedule the send tasklet. */
         qp->s_flags |= RVT_S_RESP_PENDING;
+       if (fecn)
+               qp->s_flags |= RVT_S_ECN;
         hfi1_schedule_send(qp);
  
         spin_unlock_irqrestore(&qp->s_lock, flags);
-       if (is_fecn)
-               goto send_ack;
         return;
  
  nack_inv_unlock:
@@ -3855,8 +3875,6 @@ nack_acc:
         rvt_rc_error(qp, IB_WC_LOC_PROT_ERR);
         qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
         qp->r_ack_psn = qp->r_psn;
-send_ack:
-       hfi1_send_rc_ack(packet, is_fecn);
  }
  
  u32 hfi1_build_tid_rdma_write_resp(struct rvt_qp *qp, struct rvt_ack_entry *e,
@@ -4073,10 +4091,10 @@ void hfi1_rc_rcv_tid_rdma_write_resp(struct hfi1_packet *packet)
         struct tid_rdma_flow *flow;
         enum ib_wc_status status;
         u32 opcode, aeth, psn, flow_psn, i, tidlen = 0, pktlen;
-       bool is_fecn;
+       bool fecn;
         unsigned long flags;
  
-       is_fecn = process_ecn(qp, packet);
+       fecn = process_ecn(qp, packet);
         psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
         aeth = be32_to_cpu(ohdr->u.tid_rdma.w_rsp.aeth);
         opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff;
@@ -4216,7 +4234,6 @@ void hfi1_rc_rcv_tid_rdma_write_resp(struct hfi1_packet *packet)
                 qpriv->s_tid_cur = i;
         }
         qp->s_flags &= ~HFI1_S_WAIT_TID_RESP;
-
         hfi1_schedule_tid_send(qp);
         goto ack_done;
  
@@ -4225,9 +4242,9 @@ ack_op_err:
  ack_err:
         rvt_error_qp(qp, status);
  ack_done:
+       if (fecn)
+               qp->s_flags |= RVT_S_ECN;
         spin_unlock_irqrestore(&qp->s_lock, flags);
-       if (is_fecn)
-               hfi1_send_rc_ack(packet, is_fecn);
  }
  
  bool hfi1_build_tid_rdma_packet(struct rvt_swqe *wqe,
@@ -4307,7 +4324,9 @@ void hfi1_rc_rcv_tid_rdma_write_data(struct hfi1_packet *packet)
         unsigned long flags;
         u32 psn, next;
         u8 opcode;
+       bool fecn;
  
+       fecn = process_ecn(qp, packet);
         psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
         opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff;
  
@@ -4320,9 +4339,53 @@ void hfi1_rc_rcv_tid_rdma_write_data(struct hfi1_packet *packet)
         req = ack_to_tid_req(e);
         flow = &req->flows[req->clear_tail];
         if (cmp_psn(psn, full_flow_psn(flow, flow->flow_state.lpsn))) {
+               update_r_next_psn_fecn(packet, priv, rcd, flow, fecn);
+
                 if (cmp_psn(psn, flow->flow_state.r_next_psn))
                         goto send_nak;
-               flow->flow_state.r_next_psn++;
+
+               flow->flow_state.r_next_psn = mask_psn(psn + 1);
+               /*
+                * Copy the payload to destination buffer if this packet is
+                * delivered as an eager packet due to RSM rule and FECN.
+                * The RSM rule selects FECN bit in BTH and SH bit in
+                * KDETH header and therefore will not match the last
+                * packet of each segment that has SH bit cleared.
+                */
+               if (fecn && packet->etype == RHF_RCV_TYPE_EAGER) {
+                       struct rvt_sge_state ss;
+                       u32 len;
+                       u32 tlen = packet->tlen;
+                       u16 hdrsize = packet->hlen;
+                       u8 pad = packet->pad;
+                       u8 extra_bytes = pad + packet->extra_byte +
+                               (SIZE_OF_CRC << 2);
+                       u32 pmtu = qp->pmtu;
+
+                       if (unlikely(tlen != (hdrsize + pmtu + extra_bytes)))
+                               goto send_nak;
+                       len = req->comp_seg * req->seg_len;
+                       len += delta_psn(psn,
+                               full_flow_psn(flow, flow->flow_state.spsn)) *
+                               pmtu;
+                       if (unlikely(req->total_len - len < pmtu))
+                               goto send_nak;
+
+                       /*
+                        * The e->rdma_sge field is set when TID RDMA WRITE REQ
+                        * is first received and is never modified thereafter.
+                        */
+                       ss.sge = e->rdma_sge;
+                       ss.sg_list = NULL;
+                       ss.num_sge = 1;
+                       ss.total_len = req->total_len;
+                       rvt_skip_sge(&ss, len, false);
+                       rvt_copy_sge(qp, &ss, packet->payload, pmtu, false,
+                                    false);
+                       /* Raise the sw sequence check flag for next packet */
+                       priv->r_next_psn_kdeth = mask_psn(psn + 1);
+                       priv->s_flags |= HFI1_R_TID_SW_PSN;
+               }
                 goto exit;
         }
         flow->flow_state.r_next_psn = mask_psn(psn + 1);
@@ -4347,6 +4410,7 @@ void hfi1_rc_rcv_tid_rdma_write_data(struct hfi1_packet *packet)
                 priv->r_tid_ack = priv->r_tid_tail;
  
         if (opcode == TID_OP(WRITE_DATA_LAST)) {
+               release_rdma_sge_mr(e);
                 for (next = priv->r_tid_tail + 1; ; next++) {
                         if (next > rvt_size_atomic(&dev->rdi))
                                 next = 0;
@@ -4386,6 +4450,8 @@ done:
         hfi1_schedule_tid_send(qp);
  exit:
         priv->r_next_psn_kdeth = flow->flow_state.r_next_psn;
+       if (fecn)
+               qp->s_flags |= RVT_S_ECN;
         spin_unlock_irqrestore(&qp->s_lock, flags);
         return;
  
@@ -4487,12 +4553,11 @@ void hfi1_rc_rcv_tid_rdma_ack(struct hfi1_packet *packet)
         struct tid_rdma_request *req;
         struct tid_rdma_flow *flow;
         u32 aeth, psn, req_psn, ack_psn, fspsn, resync_psn, ack_kpsn;
-       bool is_fecn;
         unsigned long flags;
         u16 fidx;
  
         trace_hfi1_tid_write_sender_rcv_tid_ack(qp, 0);
-       is_fecn = process_ecn(qp, packet);
+       process_ecn(qp, packet);
         psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
         aeth = be32_to_cpu(ohdr->u.tid_rdma.ack.aeth);
         req_psn = mask_psn(be32_to_cpu(ohdr->u.tid_rdma.ack.verbs_psn));
@@ -4846,10 +4911,10 @@ void hfi1_rc_rcv_tid_rdma_resync(struct hfi1_packet *packet)
         struct tid_rdma_flow *flow;
         struct tid_flow_state *fs = &qpriv->flow_state;
         u32 psn, generation, idx, gen_next;
-       bool is_fecn;
+       bool fecn;
         unsigned long flags;
  
-       is_fecn = process_ecn(qp, packet);
+       fecn = process_ecn(qp, packet);
         psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
  
         generation = mask_psn(psn + 1) >> HFI1_KDETH_BTH_SEQ_SHIFT;
@@ -4940,6 +5005,8 @@ void hfi1_rc_rcv_tid_rdma_resync(struct hfi1_packet *packet)
         qpriv->s_flags |= RVT_S_ACK_PENDING;
         hfi1_schedule_tid_send(qp);
  bail:
+       if (fecn)
+               qp->s_flags |= RVT_S_ECN;
         spin_unlock_irqrestore(&qp->s_lock, flags);
  }
  
@@ -5464,3 +5531,48 @@ bool hfi1_tid_rdma_ack_interlock(struct rvt_qp *qp, struct rvt_ack_entry *e)
         }
         return false;
  }
+
+static u32 read_r_next_psn(struct hfi1_devdata *dd, u8 ctxt, u8 fidx)
+{
+       u64 reg;
+
+       /*
+        * The only sane way to get the amount of
+        * progress is to read the HW flow state.
+        */
+       reg = read_uctxt_csr(dd, ctxt, RCV_TID_FLOW_TABLE + (8 * fidx));
+       return mask_psn(reg);
+}
+
+static void tid_rdma_rcv_err(struct hfi1_packet *packet,
+                            struct ib_other_headers *ohdr,
+                            struct rvt_qp *qp, u32 psn, int diff, bool fecn)
+{
+       unsigned long flags;
+
+       tid_rdma_rcv_error(packet, ohdr, qp, psn, diff);
+       if (fecn) {
+               spin_lock_irqsave(&qp->s_lock, flags);
+               qp->s_flags |= RVT_S_ECN;
+               spin_unlock_irqrestore(&qp->s_lock, flags);
+       }
+}
+
+static void update_r_next_psn_fecn(struct hfi1_packet *packet,
+                                  struct hfi1_qp_priv *priv,
+                                  struct hfi1_ctxtdata *rcd,
+                                  struct tid_rdma_flow *flow,
+                                  bool fecn)
+{
+       /*
+        * If a start/middle packet is delivered here due to
+        * RSM rule and FECN, we need to update the r_next_psn.
+        */
+       if (fecn && packet->etype == RHF_RCV_TYPE_EAGER &&
+           !(priv->s_flags & HFI1_R_TID_SW_PSN)) {
+               struct hfi1_devdata *dd = rcd->dd;
+
+               flow->flow_state.r_next_psn =
+                       read_r_next_psn(dd, rcd->ctxt, flow->idx);
+       }
+}
diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.h b/drivers/infiniband/hw/hfi1/tid_rdma.h

index 53ab24ef4f02a1ea3a90e672a83612009cee3565..1c536185261ee966e0f55bd6b310af73bd2736e5 100644 (file)
--- a/drivers/infiniband/hw/hfi1/tid_rdma.h
+++ b/drivers/infiniband/hw/hfi1/tid_rdma.h
@@ -76,10 +76,8 @@ struct tid_rdma_qp_params {
  struct tid_flow_state {
         u32 generation;
         u32 psn;
-       u32 r_next_psn;      /* next PSN to be received (in TID space) */
         u8 index;
         u8 last_index;
-       u8 flags;
  };
  
  enum tid_rdma_req_state {
diff --git a/drivers/infiniband/hw/hfi1/trace_tid.h b/drivers/infiniband/hw/hfi1/trace_tid.h

index 548dfc45a40794add98fb5fa3c9aaa4a62950574..4388b594ed1b7c3f6000b40e8b8254275cf364a3 100644 (file)
--- a/drivers/infiniband/hw/hfi1/trace_tid.h
+++ b/drivers/infiniband/hw/hfi1/trace_tid.h
@@ -53,7 +53,7 @@ u16 hfi1_trace_get_tid_idx(u32 ent);
                             "tid_r_comp %u pending_tid_r_segs %u " \
                             "s_flags 0x%x ps_flags 0x%x iow_flags 0x%lx " \
                             "s_state 0x%x hw_flow_index %u generation 0x%x " \
-                           "fpsn 0x%x flow_flags 0x%x"
+                           "fpsn 0x%x"
  
  #define TID_REQ_PRN "[%s] qpn 0x%x newreq %u opcode 0x%x psn 0x%x lpsn 0x%x " \
                     "cur_seg %u comp_seg %u ack_seg %u alloc_seg %u " \
@@ -71,7 +71,7 @@ u16 hfi1_trace_get_tid_idx(u32 ent);
                             "pending_tid_w_segs %u sync_pt %s " \
                             "ps_nak_psn 0x%x ps_nak_state 0x%x " \
                             "prnr_nak_state 0x%x hw_flow_index %u generation "\
-                           "0x%x fpsn 0x%x flow_flags 0x%x resync %s" \
+                           "0x%x fpsn 0x%x resync %s" \
                             "r_next_psn_kdeth 0x%x"
  
  #define TID_WRITE_SENDER_PRN "[%s] qpn 0x%x newreq %u s_tid_cur %u " \
@@ -973,7 +973,6 @@ DECLARE_EVENT_CLASS(/* tid_read_sender */
                 __field(u32, hw_flow_index)
                 __field(u32, generation)
                 __field(u32, fpsn)
-               __field(u32, flow_flags)
         ),
         TP_fast_assign(/* assign */
                 struct hfi1_qp_priv *priv = qp->priv;
@@ -991,7 +990,6 @@ DECLARE_EVENT_CLASS(/* tid_read_sender */
                 __entry->hw_flow_index = priv->flow_state.index;
                 __entry->generation = priv->flow_state.generation;
                 __entry->fpsn = priv->flow_state.psn;
-               __entry->flow_flags = priv->flow_state.flags;
         ),
         TP_printk(/* print */
                 TID_READ_SENDER_PRN,
@@ -1007,8 +1005,7 @@ DECLARE_EVENT_CLASS(/* tid_read_sender */
                 __entry->s_state,
                 __entry->hw_flow_index,
                 __entry->generation,
-               __entry->fpsn,
-               __entry->flow_flags
+               __entry->fpsn
         )
  );
  
@@ -1338,7 +1335,6 @@ DECLARE_EVENT_CLASS(/* tid_write_sp */
                 __field(u32, hw_flow_index)
                 __field(u32, generation)
                 __field(u32, fpsn)
-               __field(u32, flow_flags)
                 __field(bool, resync)
                 __field(u32, r_next_psn_kdeth)
         ),
@@ -1360,7 +1356,6 @@ DECLARE_EVENT_CLASS(/* tid_write_sp */
                 __entry->hw_flow_index = priv->flow_state.index;
                 __entry->generation = priv->flow_state.generation;
                 __entry->fpsn = priv->flow_state.psn;
-               __entry->flow_flags = priv->flow_state.flags;
                 __entry->resync = priv->resync;
                 __entry->r_next_psn_kdeth = priv->r_next_psn_kdeth;
         ),
@@ -1381,7 +1376,6 @@ DECLARE_EVENT_CLASS(/* tid_write_sp */
                 __entry->hw_flow_index,
                 __entry->generation,
                 __entry->fpsn,
-               __entry->flow_flags,
                 __entry->resync ? "yes" : "no",
                 __entry->r_next_psn_kdeth
         )
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c

index 2bc4d67b3e4208636fdc3face10d75b837d039c2..91669e35c6ca8dca4f2b3845134841b89049c23c 100644 (file)
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -585,7 +585,7 @@ static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
         struct ib_umem_odp *odp_mr = to_ib_umem_odp(mr->umem);
         bool downgrade = flags & MLX5_PF_FLAGS_DOWNGRADE;
         bool prefetch = flags & MLX5_PF_FLAGS_PREFETCH;
-       u64 access_mask = ODP_READ_ALLOWED_BIT;
+       u64 access_mask;
         u64 start_idx, page_mask;
         struct ib_umem_odp *odp;
         size_t size;
@@ -607,6 +607,7 @@ next_mr:
         page_shift = mr->umem->page_shift;
         page_mask = ~(BIT(page_shift) - 1);
         start_idx = (io_virt - (mr->mmkey.iova & page_mask)) >> page_shift;
+       access_mask = ODP_READ_ALLOWED_BIT;
  
         if (prefetch && !downgrade && !mr->umem->writable) {
                 /* prefetch with write-access must
author	Jason Gunthorpe <jgg@mellanox.com>
	Wed, 3 Apr 2019 18:28:05 +0000 (15:28 -0300)
committer	Jason Gunthorpe <jgg@mellanox.com>
	Wed, 3 Apr 2019 18:28:05 +0000 (15:28 -0300)
drivers/infiniband/hw/hfi1/chip.c		patch \| blob \| blame \| history
drivers/infiniband/hw/hfi1/driver.c		patch \| blob \| blame \| history
drivers/infiniband/hw/hfi1/exp_rcv.c		patch \| blob \| blame \| history
drivers/infiniband/hw/hfi1/qp.c		patch \| blob \| blame \| history
drivers/infiniband/hw/hfi1/rc.c		patch \| blob \| blame \| history
drivers/infiniband/hw/hfi1/rc.h		patch \| blob \| blame \| history
drivers/infiniband/hw/hfi1/tid_rdma.c		patch \| blob \| blame \| history
drivers/infiniband/hw/hfi1/tid_rdma.h		patch \| blob \| blame \| history
drivers/infiniband/hw/hfi1/trace_tid.h		patch \| blob \| blame \| history
drivers/infiniband/hw/mlx5/odp.c		patch \| blob \| blame \| history