1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
3 /* Authors: Cheng Xu <chengyou@linux.alibaba.com> */
4 /* Kai Shen <kaishen@linux.alibaba.com> */
5 /* Copyright (c) 2020-2021, Alibaba Group */
6 /* Authors: Bernard Metzler <bmt@zurich.ibm.com> */
7 /* Copyright (c) 2008-2019, IBM Corporation */
10 #include "erdma_verbs.h"
12 void erdma_qp_llp_close(struct erdma_qp *qp)
14 struct erdma_qp_attrs qp_attrs;
16 down_write(&qp->state_lock);
18 switch (qp->attrs.state) {
19 case ERDMA_QP_STATE_RTS:
20 case ERDMA_QP_STATE_RTR:
21 case ERDMA_QP_STATE_IDLE:
22 case ERDMA_QP_STATE_TERMINATE:
23 qp_attrs.state = ERDMA_QP_STATE_CLOSING;
24 erdma_modify_qp_internal(qp, &qp_attrs, ERDMA_QP_ATTR_STATE);
26 case ERDMA_QP_STATE_CLOSING:
27 qp->attrs.state = ERDMA_QP_STATE_IDLE;
34 erdma_cep_put(qp->cep);
38 up_write(&qp->state_lock);
41 struct ib_qp *erdma_get_ibqp(struct ib_device *ibdev, int id)
43 struct erdma_qp *qp = find_qp_by_qpn(to_edev(ibdev), id);
51 static int erdma_modify_qp_state_to_rts(struct erdma_qp *qp,
52 struct erdma_qp_attrs *attrs,
53 enum erdma_qp_attr_mask mask)
56 struct erdma_dev *dev = qp->dev;
57 struct erdma_cmdq_modify_qp_req req;
59 struct erdma_cep *cep = qp->cep;
60 struct sockaddr_storage local_addr, remote_addr;
62 if (!(mask & ERDMA_QP_ATTR_LLP_HANDLE))
65 if (!(mask & ERDMA_QP_ATTR_MPA))
68 ret = getname_local(cep->sock, &local_addr);
72 ret = getname_peer(cep->sock, &remote_addr);
76 qp->attrs.state = ERDMA_QP_STATE_RTS;
78 tp = tcp_sk(qp->cep->sock->sk);
80 erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
81 CMDQ_OPCODE_MODIFY_QP);
83 req.cfg = FIELD_PREP(ERDMA_CMD_MODIFY_QP_STATE_MASK, qp->attrs.state) |
84 FIELD_PREP(ERDMA_CMD_MODIFY_QP_CC_MASK, qp->attrs.cc) |
85 FIELD_PREP(ERDMA_CMD_MODIFY_QP_QPN_MASK, QP_ID(qp));
87 req.cookie = be32_to_cpu(qp->cep->mpa.ext_data.cookie);
88 req.dip = to_sockaddr_in(remote_addr).sin_addr.s_addr;
89 req.sip = to_sockaddr_in(local_addr).sin_addr.s_addr;
90 req.dport = to_sockaddr_in(remote_addr).sin_port;
91 req.sport = to_sockaddr_in(local_addr).sin_port;
93 req.send_nxt = tp->snd_nxt;
94 /* rsvd tcp seq for mpa-rsp in server. */
95 if (qp->attrs.qp_type == ERDMA_QP_PASSIVE)
96 req.send_nxt += MPA_DEFAULT_HDR_LEN + qp->attrs.pd_len;
97 req.recv_nxt = tp->rcv_nxt;
99 return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
102 static int erdma_modify_qp_state_to_stop(struct erdma_qp *qp,
103 struct erdma_qp_attrs *attrs,
104 enum erdma_qp_attr_mask mask)
106 struct erdma_dev *dev = qp->dev;
107 struct erdma_cmdq_modify_qp_req req;
109 qp->attrs.state = attrs->state;
111 erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
112 CMDQ_OPCODE_MODIFY_QP);
114 req.cfg = FIELD_PREP(ERDMA_CMD_MODIFY_QP_STATE_MASK, attrs->state) |
115 FIELD_PREP(ERDMA_CMD_MODIFY_QP_QPN_MASK, QP_ID(qp));
117 return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
120 int erdma_modify_qp_internal(struct erdma_qp *qp, struct erdma_qp_attrs *attrs,
121 enum erdma_qp_attr_mask mask)
123 int drop_conn, ret = 0;
128 if (!(mask & ERDMA_QP_ATTR_STATE))
131 switch (qp->attrs.state) {
132 case ERDMA_QP_STATE_IDLE:
133 case ERDMA_QP_STATE_RTR:
134 if (attrs->state == ERDMA_QP_STATE_RTS) {
135 ret = erdma_modify_qp_state_to_rts(qp, attrs, mask);
136 } else if (attrs->state == ERDMA_QP_STATE_ERROR) {
137 qp->attrs.state = ERDMA_QP_STATE_ERROR;
139 erdma_cep_put(qp->cep);
142 ret = erdma_modify_qp_state_to_stop(qp, attrs, mask);
145 case ERDMA_QP_STATE_RTS:
148 if (attrs->state == ERDMA_QP_STATE_CLOSING) {
149 ret = erdma_modify_qp_state_to_stop(qp, attrs, mask);
151 } else if (attrs->state == ERDMA_QP_STATE_TERMINATE) {
152 qp->attrs.state = ERDMA_QP_STATE_TERMINATE;
153 ret = erdma_modify_qp_state_to_stop(qp, attrs, mask);
155 } else if (attrs->state == ERDMA_QP_STATE_ERROR) {
156 ret = erdma_modify_qp_state_to_stop(qp, attrs, mask);
157 qp->attrs.state = ERDMA_QP_STATE_ERROR;
162 erdma_qp_cm_drop(qp);
165 case ERDMA_QP_STATE_TERMINATE:
166 if (attrs->state == ERDMA_QP_STATE_ERROR)
167 qp->attrs.state = ERDMA_QP_STATE_ERROR;
169 case ERDMA_QP_STATE_CLOSING:
170 if (attrs->state == ERDMA_QP_STATE_IDLE) {
171 qp->attrs.state = ERDMA_QP_STATE_IDLE;
172 } else if (attrs->state == ERDMA_QP_STATE_ERROR) {
173 ret = erdma_modify_qp_state_to_stop(qp, attrs, mask);
174 qp->attrs.state = ERDMA_QP_STATE_ERROR;
175 } else if (attrs->state != ERDMA_QP_STATE_CLOSING) {
176 return -ECONNABORTED;
186 static void erdma_qp_safe_free(struct kref *ref)
188 struct erdma_qp *qp = container_of(ref, struct erdma_qp, ref);
190 complete(&qp->safe_free);
193 void erdma_qp_put(struct erdma_qp *qp)
195 WARN_ON(kref_read(&qp->ref) < 1);
196 kref_put(&qp->ref, erdma_qp_safe_free);
199 void erdma_qp_get(struct erdma_qp *qp)
204 static int fill_inline_data(struct erdma_qp *qp,
205 const struct ib_send_wr *send_wr, u16 wqe_idx,
206 u32 sgl_offset, __le32 *length_field)
208 u32 remain_size, copy_size, data_off, bytes = 0;
212 wqe_idx += (sgl_offset >> SQEBB_SHIFT);
213 sgl_offset &= (SQEBB_SIZE - 1);
214 data = get_queue_entry(qp->kern_qp.sq_buf, wqe_idx, qp->attrs.sq_size,
217 while (i < send_wr->num_sge) {
218 bytes += send_wr->sg_list[i].length;
219 if (bytes > (int)ERDMA_MAX_INLINE)
222 remain_size = send_wr->sg_list[i].length;
226 copy_size = min(remain_size, SQEBB_SIZE - sgl_offset);
228 memcpy(data + sgl_offset,
229 (void *)(uintptr_t)send_wr->sg_list[i].addr +
232 remain_size -= copy_size;
233 data_off += copy_size;
234 sgl_offset += copy_size;
235 wqe_idx += (sgl_offset >> SQEBB_SHIFT);
236 sgl_offset &= (SQEBB_SIZE - 1);
238 data = get_queue_entry(qp->kern_qp.sq_buf, wqe_idx,
239 qp->attrs.sq_size, SQEBB_SHIFT);
246 *length_field = cpu_to_le32(bytes);
251 static int fill_sgl(struct erdma_qp *qp, const struct ib_send_wr *send_wr,
252 u16 wqe_idx, u32 sgl_offset, __le32 *length_field)
258 if (send_wr->num_sge > qp->dev->attrs.max_send_sge)
261 if (sgl_offset & 0xF)
264 while (i < send_wr->num_sge) {
265 wqe_idx += (sgl_offset >> SQEBB_SHIFT);
266 sgl_offset &= (SQEBB_SIZE - 1);
267 sgl = get_queue_entry(qp->kern_qp.sq_buf, wqe_idx,
268 qp->attrs.sq_size, SQEBB_SHIFT);
270 bytes += send_wr->sg_list[i].length;
271 memcpy(sgl + sgl_offset, &send_wr->sg_list[i],
272 sizeof(struct ib_sge));
274 sgl_offset += sizeof(struct ib_sge);
278 *length_field = cpu_to_le32(bytes);
282 static int erdma_push_one_sqe(struct erdma_qp *qp, u16 *pi,
283 const struct ib_send_wr *send_wr)
285 u32 wqe_size, wqebb_cnt, hw_op, flags, sgl_offset;
286 u32 idx = *pi & (qp->attrs.sq_size - 1);
287 enum ib_wr_opcode op = send_wr->opcode;
288 struct erdma_readreq_sqe *read_sqe;
289 struct erdma_reg_mr_sqe *regmr_sge;
290 struct erdma_write_sqe *write_sqe;
291 struct erdma_send_sqe *send_sqe;
292 struct ib_rdma_wr *rdma_wr;
294 __le32 *length_field;
300 entry = get_queue_entry(qp->kern_qp.sq_buf, idx, qp->attrs.sq_size,
303 /* Clear the SQE header section. */
306 qp->kern_qp.swr_tbl[idx] = send_wr->wr_id;
307 flags = send_wr->send_flags;
308 wqe_hdr = FIELD_PREP(
309 ERDMA_SQE_HDR_CE_MASK,
310 ((flags & IB_SEND_SIGNALED) || qp->kern_qp.sig_all) ? 1 : 0);
311 wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_SE_MASK,
312 flags & IB_SEND_SOLICITED ? 1 : 0);
313 wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_FENCE_MASK,
314 flags & IB_SEND_FENCE ? 1 : 0);
315 wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_INLINE_MASK,
316 flags & IB_SEND_INLINE ? 1 : 0);
317 wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_QPN_MASK, QP_ID(qp));
320 case IB_WR_RDMA_WRITE:
321 case IB_WR_RDMA_WRITE_WITH_IMM:
322 hw_op = ERDMA_OP_WRITE;
323 if (op == IB_WR_RDMA_WRITE_WITH_IMM)
324 hw_op = ERDMA_OP_WRITE_WITH_IMM;
325 wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK, hw_op);
326 rdma_wr = container_of(send_wr, struct ib_rdma_wr, wr);
327 write_sqe = (struct erdma_write_sqe *)entry;
329 write_sqe->imm_data = send_wr->ex.imm_data;
330 write_sqe->sink_stag = cpu_to_le32(rdma_wr->rkey);
331 write_sqe->sink_to_h =
332 cpu_to_le32(upper_32_bits(rdma_wr->remote_addr));
333 write_sqe->sink_to_l =
334 cpu_to_le32(lower_32_bits(rdma_wr->remote_addr));
336 length_field = &write_sqe->length;
337 wqe_size = sizeof(struct erdma_write_sqe);
338 sgl_offset = wqe_size;
340 case IB_WR_RDMA_READ:
341 case IB_WR_RDMA_READ_WITH_INV:
342 read_sqe = (struct erdma_readreq_sqe *)entry;
343 if (unlikely(send_wr->num_sge != 1))
345 hw_op = ERDMA_OP_READ;
346 if (op == IB_WR_RDMA_READ_WITH_INV) {
347 hw_op = ERDMA_OP_READ_WITH_INV;
348 read_sqe->invalid_stag =
349 cpu_to_le32(send_wr->ex.invalidate_rkey);
352 wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK, hw_op);
353 rdma_wr = container_of(send_wr, struct ib_rdma_wr, wr);
354 read_sqe->length = cpu_to_le32(send_wr->sg_list[0].length);
355 read_sqe->sink_stag = cpu_to_le32(send_wr->sg_list[0].lkey);
356 read_sqe->sink_to_l =
357 cpu_to_le32(lower_32_bits(send_wr->sg_list[0].addr));
358 read_sqe->sink_to_h =
359 cpu_to_le32(upper_32_bits(send_wr->sg_list[0].addr));
361 sge = get_queue_entry(qp->kern_qp.sq_buf, idx + 1,
362 qp->attrs.sq_size, SQEBB_SHIFT);
363 sge->addr = rdma_wr->remote_addr;
364 sge->lkey = rdma_wr->rkey;
365 sge->length = send_wr->sg_list[0].length;
366 wqe_size = sizeof(struct erdma_readreq_sqe) +
367 send_wr->num_sge * sizeof(struct ib_sge);
371 case IB_WR_SEND_WITH_IMM:
372 case IB_WR_SEND_WITH_INV:
373 send_sqe = (struct erdma_send_sqe *)entry;
374 hw_op = ERDMA_OP_SEND;
375 if (op == IB_WR_SEND_WITH_IMM) {
376 hw_op = ERDMA_OP_SEND_WITH_IMM;
377 send_sqe->imm_data = send_wr->ex.imm_data;
378 } else if (op == IB_WR_SEND_WITH_INV) {
379 hw_op = ERDMA_OP_SEND_WITH_INV;
380 send_sqe->invalid_stag =
381 cpu_to_le32(send_wr->ex.invalidate_rkey);
383 wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK, hw_op);
384 length_field = &send_sqe->length;
385 wqe_size = sizeof(struct erdma_send_sqe);
386 sgl_offset = wqe_size;
391 FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK, ERDMA_OP_REG_MR);
392 regmr_sge = (struct erdma_reg_mr_sqe *)entry;
393 mr = to_emr(reg_wr(send_wr)->mr);
395 mr->access = ERDMA_MR_ACC_LR |
396 to_erdma_access_flags(reg_wr(send_wr)->access);
397 regmr_sge->addr = cpu_to_le64(mr->ibmr.iova);
398 regmr_sge->length = cpu_to_le32(mr->ibmr.length);
399 regmr_sge->stag = cpu_to_le32(reg_wr(send_wr)->key);
400 attrs = FIELD_PREP(ERDMA_SQE_MR_MODE_MASK, 0) |
401 FIELD_PREP(ERDMA_SQE_MR_ACCESS_MASK, mr->access) |
402 FIELD_PREP(ERDMA_SQE_MR_MTT_CNT_MASK,
405 if (mr->mem.mtt_nents < ERDMA_MAX_INLINE_MTT_ENTRIES) {
406 attrs |= FIELD_PREP(ERDMA_SQE_MR_MTT_TYPE_MASK, 0);
407 /* Copy SGLs to SQE content to accelerate */
408 memcpy(get_queue_entry(qp->kern_qp.sq_buf, idx + 1,
409 qp->attrs.sq_size, SQEBB_SHIFT),
410 mr->mem.mtt_buf, MTT_SIZE(mr->mem.mtt_nents));
411 wqe_size = sizeof(struct erdma_reg_mr_sqe) +
412 MTT_SIZE(mr->mem.mtt_nents);
414 attrs |= FIELD_PREP(ERDMA_SQE_MR_MTT_TYPE_MASK, 1);
415 wqe_size = sizeof(struct erdma_reg_mr_sqe);
418 regmr_sge->attrs = cpu_to_le32(attrs);
420 case IB_WR_LOCAL_INV:
421 wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK,
423 regmr_sge = (struct erdma_reg_mr_sqe *)entry;
424 regmr_sge->stag = cpu_to_le32(send_wr->ex.invalidate_rkey);
425 wqe_size = sizeof(struct erdma_reg_mr_sqe);
431 if (flags & IB_SEND_INLINE) {
432 ret = fill_inline_data(qp, send_wr, idx, sgl_offset,
437 wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_SGL_LEN_MASK, ret);
439 ret = fill_sgl(qp, send_wr, idx, sgl_offset, length_field);
442 wqe_size += send_wr->num_sge * sizeof(struct ib_sge);
443 wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_SGL_LEN_MASK,
448 wqebb_cnt = SQEBB_COUNT(wqe_size);
449 wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_WQEBB_CNT_MASK, wqebb_cnt - 1);
451 wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_WQEBB_INDEX_MASK, *pi);
458 static void kick_sq_db(struct erdma_qp *qp, u16 pi)
460 u64 db_data = FIELD_PREP(ERDMA_SQE_HDR_QPN_MASK, QP_ID(qp)) |
461 FIELD_PREP(ERDMA_SQE_HDR_WQEBB_INDEX_MASK, pi);
463 *(u64 *)qp->kern_qp.sq_db_info = db_data;
464 writeq(db_data, qp->kern_qp.hw_sq_db);
467 int erdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *send_wr,
468 const struct ib_send_wr **bad_send_wr)
470 struct erdma_qp *qp = to_eqp(ibqp);
472 const struct ib_send_wr *wr = send_wr;
479 spin_lock_irqsave(&qp->lock, flags);
480 sq_pi = qp->kern_qp.sq_pi;
483 if ((u16)(sq_pi - qp->kern_qp.sq_ci) >= qp->attrs.sq_size) {
485 *bad_send_wr = send_wr;
489 ret = erdma_push_one_sqe(qp, &sq_pi, wr);
494 qp->kern_qp.sq_pi = sq_pi;
495 kick_sq_db(qp, sq_pi);
499 spin_unlock_irqrestore(&qp->lock, flags);
504 static int erdma_post_recv_one(struct erdma_qp *qp,
505 const struct ib_recv_wr *recv_wr)
507 struct erdma_rqe *rqe =
508 get_queue_entry(qp->kern_qp.rq_buf, qp->kern_qp.rq_pi,
509 qp->attrs.rq_size, RQE_SHIFT);
511 rqe->qe_idx = cpu_to_le16(qp->kern_qp.rq_pi + 1);
512 rqe->qpn = cpu_to_le32(QP_ID(qp));
514 if (recv_wr->num_sge == 0) {
516 } else if (recv_wr->num_sge == 1) {
517 rqe->stag = cpu_to_le32(recv_wr->sg_list[0].lkey);
518 rqe->to = cpu_to_le64(recv_wr->sg_list[0].addr);
519 rqe->length = cpu_to_le32(recv_wr->sg_list[0].length);
524 *(u64 *)qp->kern_qp.rq_db_info = *(u64 *)rqe;
525 writeq(*(u64 *)rqe, qp->kern_qp.hw_rq_db);
527 qp->kern_qp.rwr_tbl[qp->kern_qp.rq_pi & (qp->attrs.rq_size - 1)] =
534 int erdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *recv_wr,
535 const struct ib_recv_wr **bad_recv_wr)
537 const struct ib_recv_wr *wr = recv_wr;
538 struct erdma_qp *qp = to_eqp(ibqp);
542 spin_lock_irqsave(&qp->lock, flags);
545 ret = erdma_post_recv_one(qp, wr);
553 spin_unlock_irqrestore(&qp->lock, flags);