1 /* QLogic qedr NIC Driver
2 * Copyright (c) 2015-2016 QLogic Corporation
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and /or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 #include <linux/dma-mapping.h>
33 #include <linux/crc32.h>
37 #include <linux/iommu.h>
39 #include <rdma/ib_verbs.h>
40 #include <rdma/ib_user_verbs.h>
41 #include <rdma/iw_cm.h>
42 #include <rdma/ib_umem.h>
43 #include <rdma/ib_addr.h>
44 #include <rdma/ib_cache.h>
45 #include <rdma/uverbs_ioctl.h>
47 #include <linux/qed/common_hsi.h>
48 #include "qedr_hsi_rdma.h"
49 #include <linux/qed/qed_if.h>
52 #include <rdma/qedr-abi.h>
53 #include "qedr_roce_cm.h"
54 #include "qedr_iw_cm.h"
56 #define QEDR_SRQ_WQE_ELEM_SIZE sizeof(union rdma_srq_elm)
57 #define RDMA_MAX_SGE_PER_SRQ (4)
58 #define RDMA_MAX_SRQ_WQE_SIZE (RDMA_MAX_SGE_PER_SRQ + 1)
60 #define DB_ADDR_SHIFT(addr) ((addr) << DB_PWM_ADDR_OFFSET_SHIFT)
63 QEDR_USER_MMAP_IO_WC = 0,
64 QEDR_USER_MMAP_PHYS_PAGE,
67 static inline int qedr_ib_copy_to_udata(struct ib_udata *udata, void *src,
70 size_t min_len = min_t(size_t, len, udata->outlen);
72 return ib_copy_to_udata(udata, src, min_len);
75 int qedr_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
77 if (index >= QEDR_ROCE_PKEY_TABLE_LEN)
80 *pkey = QEDR_ROCE_PKEY_DEFAULT;
84 int qedr_iw_query_gid(struct ib_device *ibdev, u8 port,
85 int index, union ib_gid *sgid)
87 struct qedr_dev *dev = get_qedr_dev(ibdev);
89 memset(sgid->raw, 0, sizeof(sgid->raw));
90 ether_addr_copy(sgid->raw, dev->ndev->dev_addr);
92 DP_DEBUG(dev, QEDR_MSG_INIT, "QUERY sgid[%d]=%llx:%llx\n", index,
93 sgid->global.interface_id, sgid->global.subnet_prefix);
98 int qedr_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
100 struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
101 struct qedr_device_attr *qattr = &dev->attr;
102 struct qedr_srq *srq = get_qedr_srq(ibsrq);
104 srq_attr->srq_limit = srq->srq_limit;
105 srq_attr->max_wr = qattr->max_srq_wr;
106 srq_attr->max_sge = qattr->max_sge;
111 int qedr_query_device(struct ib_device *ibdev,
112 struct ib_device_attr *attr, struct ib_udata *udata)
114 struct qedr_dev *dev = get_qedr_dev(ibdev);
115 struct qedr_device_attr *qattr = &dev->attr;
117 if (!dev->rdma_ctx) {
119 "qedr_query_device called with invalid params rdma_ctx=%p\n",
124 memset(attr, 0, sizeof(*attr));
126 attr->fw_ver = qattr->fw_ver;
127 attr->sys_image_guid = qattr->sys_image_guid;
128 attr->max_mr_size = qattr->max_mr_size;
129 attr->page_size_cap = qattr->page_size_caps;
130 attr->vendor_id = qattr->vendor_id;
131 attr->vendor_part_id = qattr->vendor_part_id;
132 attr->hw_ver = qattr->hw_ver;
133 attr->max_qp = qattr->max_qp;
134 attr->max_qp_wr = max_t(u32, qattr->max_sqe, qattr->max_rqe);
135 attr->device_cap_flags = IB_DEVICE_CURR_QP_STATE_MOD |
136 IB_DEVICE_RC_RNR_NAK_GEN |
137 IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS;
139 attr->max_send_sge = qattr->max_sge;
140 attr->max_recv_sge = qattr->max_sge;
141 attr->max_sge_rd = qattr->max_sge;
142 attr->max_cq = qattr->max_cq;
143 attr->max_cqe = qattr->max_cqe;
144 attr->max_mr = qattr->max_mr;
145 attr->max_mw = qattr->max_mw;
146 attr->max_pd = qattr->max_pd;
147 attr->atomic_cap = dev->atomic_cap;
148 attr->max_qp_init_rd_atom =
149 1 << (fls(qattr->max_qp_req_rd_atomic_resc) - 1);
150 attr->max_qp_rd_atom =
151 min(1 << (fls(qattr->max_qp_resp_rd_atomic_resc) - 1),
152 attr->max_qp_init_rd_atom);
154 attr->max_srq = qattr->max_srq;
155 attr->max_srq_sge = qattr->max_srq_sge;
156 attr->max_srq_wr = qattr->max_srq_wr;
158 attr->local_ca_ack_delay = qattr->dev_ack_delay;
159 attr->max_fast_reg_page_list_len = qattr->max_mr / 8;
160 attr->max_pkeys = QEDR_ROCE_PKEY_MAX;
161 attr->max_ah = qattr->max_ah;
166 static inline void get_link_speed_and_width(int speed, u8 *ib_speed,
171 *ib_speed = IB_SPEED_SDR;
172 *ib_width = IB_WIDTH_1X;
175 *ib_speed = IB_SPEED_QDR;
176 *ib_width = IB_WIDTH_1X;
180 *ib_speed = IB_SPEED_DDR;
181 *ib_width = IB_WIDTH_4X;
185 *ib_speed = IB_SPEED_EDR;
186 *ib_width = IB_WIDTH_1X;
190 *ib_speed = IB_SPEED_QDR;
191 *ib_width = IB_WIDTH_4X;
195 *ib_speed = IB_SPEED_HDR;
196 *ib_width = IB_WIDTH_1X;
200 *ib_speed = IB_SPEED_EDR;
201 *ib_width = IB_WIDTH_4X;
206 *ib_speed = IB_SPEED_SDR;
207 *ib_width = IB_WIDTH_1X;
211 int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr)
213 struct qedr_dev *dev;
214 struct qed_rdma_port *rdma_port;
216 dev = get_qedr_dev(ibdev);
218 if (!dev->rdma_ctx) {
219 DP_ERR(dev, "rdma_ctx is NULL\n");
223 rdma_port = dev->ops->rdma_query_port(dev->rdma_ctx);
225 /* *attr being zeroed by the caller, avoid zeroing it here */
226 if (rdma_port->port_state == QED_RDMA_PORT_UP) {
227 attr->state = IB_PORT_ACTIVE;
228 attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
230 attr->state = IB_PORT_DOWN;
231 attr->phys_state = IB_PORT_PHYS_STATE_DISABLED;
233 attr->max_mtu = IB_MTU_4096;
234 attr->active_mtu = iboe_get_mtu(dev->ndev->mtu);
239 attr->ip_gids = true;
240 if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
241 attr->gid_tbl_len = 1;
243 attr->gid_tbl_len = QEDR_MAX_SGID;
244 attr->pkey_tbl_len = QEDR_ROCE_PKEY_TABLE_LEN;
246 attr->bad_pkey_cntr = rdma_port->pkey_bad_counter;
247 attr->qkey_viol_cntr = 0;
248 get_link_speed_and_width(rdma_port->link_speed,
249 &attr->active_speed, &attr->active_width);
250 attr->max_msg_sz = rdma_port->max_msg_size;
251 attr->max_vl_num = 4;
256 int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata)
258 struct ib_device *ibdev = uctx->device;
260 struct qedr_ucontext *ctx = get_qedr_ucontext(uctx);
261 struct qedr_alloc_ucontext_resp uresp = {};
262 struct qedr_alloc_ucontext_req ureq = {};
263 struct qedr_dev *dev = get_qedr_dev(ibdev);
264 struct qed_rdma_add_user_out_params oparams;
265 struct qedr_user_mmap_entry *entry;
271 rc = ib_copy_from_udata(&ureq, udata,
272 min(sizeof(ureq), udata->inlen));
274 DP_ERR(dev, "Problem copying data from user space\n");
277 ctx->edpm_mode = !!(ureq.context_flags &
278 QEDR_ALLOC_UCTX_EDPM_MODE);
279 ctx->db_rec = !!(ureq.context_flags & QEDR_ALLOC_UCTX_DB_REC);
282 rc = dev->ops->rdma_add_user(dev->rdma_ctx, &oparams);
285 "failed to allocate a DPI for a new RoCE application, rc=%d. To overcome this consider to increase the number of DPIs, increase the doorbell BAR size or just close unnecessary RoCE applications. In order to increase the number of DPIs consult the qedr readme\n",
290 ctx->dpi = oparams.dpi;
291 ctx->dpi_addr = oparams.dpi_addr;
292 ctx->dpi_phys_addr = oparams.dpi_phys_addr;
293 ctx->dpi_size = oparams.dpi_size;
294 entry = kzalloc(sizeof(*entry), GFP_KERNEL);
300 entry->io_address = ctx->dpi_phys_addr;
301 entry->length = ctx->dpi_size;
302 entry->mmap_flag = QEDR_USER_MMAP_IO_WC;
303 entry->dpi = ctx->dpi;
305 rc = rdma_user_mmap_entry_insert(uctx, &entry->rdma_entry,
311 ctx->db_mmap_entry = &entry->rdma_entry;
313 if (!dev->user_dpm_enabled)
315 else if (rdma_protocol_iwarp(&dev->ibdev, 1))
316 uresp.dpm_flags = QEDR_DPM_TYPE_IWARP_LEGACY;
318 uresp.dpm_flags = QEDR_DPM_TYPE_ROCE_ENHANCED |
319 QEDR_DPM_TYPE_ROCE_LEGACY |
320 QEDR_DPM_TYPE_ROCE_EDPM_MODE;
322 if (ureq.context_flags & QEDR_SUPPORT_DPM_SIZES) {
323 uresp.dpm_flags |= QEDR_DPM_SIZES_SET;
324 uresp.ldpm_limit_size = QEDR_LDPM_MAX_SIZE;
325 uresp.edpm_trans_size = QEDR_EDPM_TRANS_SIZE;
326 uresp.edpm_limit_size = QEDR_EDPM_MAX_SIZE;
329 uresp.wids_enabled = 1;
330 uresp.wid_count = oparams.wid_count;
331 uresp.db_pa = rdma_user_mmap_get_offset(ctx->db_mmap_entry);
332 uresp.db_size = ctx->dpi_size;
333 uresp.max_send_wr = dev->attr.max_sqe;
334 uresp.max_recv_wr = dev->attr.max_rqe;
335 uresp.max_srq_wr = dev->attr.max_srq_wr;
336 uresp.sges_per_send_wr = QEDR_MAX_SQE_ELEMENTS_PER_SQE;
337 uresp.sges_per_recv_wr = QEDR_MAX_RQE_ELEMENTS_PER_RQE;
338 uresp.sges_per_srq_wr = dev->attr.max_srq_sge;
339 uresp.max_cqes = QEDR_MAX_CQES;
341 rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
347 DP_DEBUG(dev, QEDR_MSG_INIT, "Allocating user context %p\n",
352 if (!ctx->db_mmap_entry)
353 dev->ops->rdma_remove_user(dev->rdma_ctx, ctx->dpi);
355 rdma_user_mmap_entry_remove(ctx->db_mmap_entry);
360 void qedr_dealloc_ucontext(struct ib_ucontext *ibctx)
362 struct qedr_ucontext *uctx = get_qedr_ucontext(ibctx);
364 DP_DEBUG(uctx->dev, QEDR_MSG_INIT, "Deallocating user context %p\n",
367 rdma_user_mmap_entry_remove(uctx->db_mmap_entry);
370 void qedr_mmap_free(struct rdma_user_mmap_entry *rdma_entry)
372 struct qedr_user_mmap_entry *entry = get_qedr_mmap_entry(rdma_entry);
373 struct qedr_dev *dev = entry->dev;
375 if (entry->mmap_flag == QEDR_USER_MMAP_PHYS_PAGE)
376 free_page((unsigned long)entry->address);
377 else if (entry->mmap_flag == QEDR_USER_MMAP_IO_WC)
378 dev->ops->rdma_remove_user(dev->rdma_ctx, entry->dpi);
383 int qedr_mmap(struct ib_ucontext *ucontext, struct vm_area_struct *vma)
385 struct ib_device *dev = ucontext->device;
386 size_t length = vma->vm_end - vma->vm_start;
387 struct rdma_user_mmap_entry *rdma_entry;
388 struct qedr_user_mmap_entry *entry;
393 "start %#lx, end %#lx, length = %#zx, pgoff = %#lx\n",
394 vma->vm_start, vma->vm_end, length, vma->vm_pgoff);
396 rdma_entry = rdma_user_mmap_entry_get(ucontext, vma);
398 ibdev_dbg(dev, "pgoff[%#lx] does not have valid entry\n",
402 entry = get_qedr_mmap_entry(rdma_entry);
404 "Mapping address[%#llx], length[%#zx], mmap_flag[%d]\n",
405 entry->io_address, length, entry->mmap_flag);
407 switch (entry->mmap_flag) {
408 case QEDR_USER_MMAP_IO_WC:
409 pfn = entry->io_address >> PAGE_SHIFT;
410 rc = rdma_user_mmap_io(ucontext, vma, pfn, length,
411 pgprot_writecombine(vma->vm_page_prot),
414 case QEDR_USER_MMAP_PHYS_PAGE:
415 rc = vm_insert_page(vma, vma->vm_start,
416 virt_to_page(entry->address));
424 "Couldn't mmap address[%#llx] length[%#zx] mmap_flag[%d] err[%d]\n",
425 entry->io_address, length, entry->mmap_flag, rc);
427 rdma_user_mmap_entry_put(rdma_entry);
431 int qedr_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
433 struct ib_device *ibdev = ibpd->device;
434 struct qedr_dev *dev = get_qedr_dev(ibdev);
435 struct qedr_pd *pd = get_qedr_pd(ibpd);
439 DP_DEBUG(dev, QEDR_MSG_INIT, "Function called from: %s\n",
440 udata ? "User Lib" : "Kernel");
442 if (!dev->rdma_ctx) {
443 DP_ERR(dev, "invalid RDMA context\n");
447 rc = dev->ops->rdma_alloc_pd(dev->rdma_ctx, &pd_id);
454 struct qedr_alloc_pd_uresp uresp = {
457 struct qedr_ucontext *context = rdma_udata_to_drv_context(
458 udata, struct qedr_ucontext, ibucontext);
460 rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
462 DP_ERR(dev, "copy error pd_id=0x%x.\n", pd_id);
463 dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd_id);
474 void qedr_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
476 struct qedr_dev *dev = get_qedr_dev(ibpd->device);
477 struct qedr_pd *pd = get_qedr_pd(ibpd);
479 DP_DEBUG(dev, QEDR_MSG_INIT, "Deallocating PD %d\n", pd->pd_id);
480 dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd->pd_id);
483 static void qedr_free_pbl(struct qedr_dev *dev,
484 struct qedr_pbl_info *pbl_info, struct qedr_pbl *pbl)
486 struct pci_dev *pdev = dev->pdev;
489 for (i = 0; i < pbl_info->num_pbls; i++) {
492 dma_free_coherent(&pdev->dev, pbl_info->pbl_size,
493 pbl[i].va, pbl[i].pa);
499 #define MIN_FW_PBL_PAGE_SIZE (4 * 1024)
500 #define MAX_FW_PBL_PAGE_SIZE (64 * 1024)
502 #define NUM_PBES_ON_PAGE(_page_size) (_page_size / sizeof(u64))
503 #define MAX_PBES_ON_PAGE NUM_PBES_ON_PAGE(MAX_FW_PBL_PAGE_SIZE)
504 #define MAX_PBES_TWO_LAYER (MAX_PBES_ON_PAGE * MAX_PBES_ON_PAGE)
506 static struct qedr_pbl *qedr_alloc_pbl_tbl(struct qedr_dev *dev,
507 struct qedr_pbl_info *pbl_info,
510 struct pci_dev *pdev = dev->pdev;
511 struct qedr_pbl *pbl_table;
512 dma_addr_t *pbl_main_tbl;
517 pbl_table = kcalloc(pbl_info->num_pbls, sizeof(*pbl_table), flags);
519 return ERR_PTR(-ENOMEM);
521 for (i = 0; i < pbl_info->num_pbls; i++) {
522 va = dma_alloc_coherent(&pdev->dev, pbl_info->pbl_size, &pa,
527 pbl_table[i].va = va;
528 pbl_table[i].pa = pa;
531 /* Two-Layer PBLs, if we have more than one pbl we need to initialize
532 * the first one with physical pointers to all of the rest
534 pbl_main_tbl = (dma_addr_t *)pbl_table[0].va;
535 for (i = 0; i < pbl_info->num_pbls - 1; i++)
536 pbl_main_tbl[i] = pbl_table[i + 1].pa;
541 for (i--; i >= 0; i--)
542 dma_free_coherent(&pdev->dev, pbl_info->pbl_size,
543 pbl_table[i].va, pbl_table[i].pa);
545 qedr_free_pbl(dev, pbl_info, pbl_table);
547 return ERR_PTR(-ENOMEM);
550 static int qedr_prepare_pbl_tbl(struct qedr_dev *dev,
551 struct qedr_pbl_info *pbl_info,
552 u32 num_pbes, int two_layer_capable)
558 if ((num_pbes > MAX_PBES_ON_PAGE) && two_layer_capable) {
559 if (num_pbes > MAX_PBES_TWO_LAYER) {
560 DP_ERR(dev, "prepare pbl table: too many pages %d\n",
565 /* calculate required pbl page size */
566 pbl_size = MIN_FW_PBL_PAGE_SIZE;
567 pbl_capacity = NUM_PBES_ON_PAGE(pbl_size) *
568 NUM_PBES_ON_PAGE(pbl_size);
570 while (pbl_capacity < num_pbes) {
572 pbl_capacity = pbl_size / sizeof(u64);
573 pbl_capacity = pbl_capacity * pbl_capacity;
576 num_pbls = DIV_ROUND_UP(num_pbes, NUM_PBES_ON_PAGE(pbl_size));
577 num_pbls++; /* One for the layer0 ( points to the pbls) */
578 pbl_info->two_layered = true;
580 /* One layered PBL */
582 pbl_size = max_t(u32, MIN_FW_PBL_PAGE_SIZE,
583 roundup_pow_of_two((num_pbes * sizeof(u64))));
584 pbl_info->two_layered = false;
587 pbl_info->num_pbls = num_pbls;
588 pbl_info->pbl_size = pbl_size;
589 pbl_info->num_pbes = num_pbes;
591 DP_DEBUG(dev, QEDR_MSG_MR,
592 "prepare pbl table: num_pbes=%d, num_pbls=%d, pbl_size=%d\n",
593 pbl_info->num_pbes, pbl_info->num_pbls, pbl_info->pbl_size);
598 static void qedr_populate_pbls(struct qedr_dev *dev, struct ib_umem *umem,
599 struct qedr_pbl *pbl,
600 struct qedr_pbl_info *pbl_info, u32 pg_shift)
602 int pbe_cnt, total_num_pbes = 0;
603 u32 fw_pg_cnt, fw_pg_per_umem_pg;
604 struct qedr_pbl *pbl_tbl;
605 struct sg_dma_page_iter sg_iter;
609 if (!pbl_info->num_pbes)
612 /* If we have a two layered pbl, the first pbl points to the rest
613 * of the pbls and the first entry lays on the second pbl in the table
615 if (pbl_info->two_layered)
620 pbe = (struct regpair *)pbl_tbl->va;
622 DP_ERR(dev, "cannot populate PBL due to a NULL PBE\n");
628 fw_pg_per_umem_pg = BIT(PAGE_SHIFT - pg_shift);
630 for_each_sg_dma_page (umem->sg_head.sgl, &sg_iter, umem->nmap, 0) {
631 pg_addr = sg_page_iter_dma_address(&sg_iter);
632 for (fw_pg_cnt = 0; fw_pg_cnt < fw_pg_per_umem_pg;) {
633 pbe->lo = cpu_to_le32(pg_addr);
634 pbe->hi = cpu_to_le32(upper_32_bits(pg_addr));
636 pg_addr += BIT(pg_shift);
641 if (total_num_pbes == pbl_info->num_pbes)
644 /* If the given pbl is full storing the pbes,
647 if (pbe_cnt == (pbl_info->pbl_size / sizeof(u64))) {
649 pbe = (struct regpair *)pbl_tbl->va;
658 static int qedr_db_recovery_add(struct qedr_dev *dev,
659 void __iomem *db_addr,
661 enum qed_db_rec_width db_width,
662 enum qed_db_rec_space db_space)
665 DP_DEBUG(dev, QEDR_MSG_INIT, "avoiding db rec since old lib\n");
669 return dev->ops->common->db_recovery_add(dev->cdev, db_addr, db_data,
673 static void qedr_db_recovery_del(struct qedr_dev *dev,
674 void __iomem *db_addr,
678 DP_DEBUG(dev, QEDR_MSG_INIT, "avoiding db rec since old lib\n");
682 /* Ignore return code as there is not much we can do about it. Error
683 * log will be printed inside.
685 dev->ops->common->db_recovery_del(dev->cdev, db_addr, db_data);
688 static int qedr_copy_cq_uresp(struct qedr_dev *dev,
689 struct qedr_cq *cq, struct ib_udata *udata,
692 struct qedr_create_cq_uresp uresp;
695 memset(&uresp, 0, sizeof(uresp));
697 uresp.db_offset = db_offset;
698 uresp.icid = cq->icid;
699 if (cq->q.db_mmap_entry)
701 rdma_user_mmap_get_offset(cq->q.db_mmap_entry);
703 rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
705 DP_ERR(dev, "copy error cqid=0x%x.\n", cq->icid);
710 static void consume_cqe(struct qedr_cq *cq)
712 if (cq->latest_cqe == cq->toggle_cqe)
713 cq->pbl_toggle ^= RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK;
715 cq->latest_cqe = qed_chain_consume(&cq->pbl);
718 static inline int qedr_align_cq_entries(int entries)
720 u64 size, aligned_size;
722 /* We allocate an extra entry that we don't report to the FW. */
723 size = (entries + 1) * QEDR_CQE_SIZE;
724 aligned_size = ALIGN(size, PAGE_SIZE);
726 return aligned_size / QEDR_CQE_SIZE;
729 static int qedr_init_user_db_rec(struct ib_udata *udata,
730 struct qedr_dev *dev, struct qedr_userq *q,
731 bool requires_db_rec)
733 struct qedr_ucontext *uctx =
734 rdma_udata_to_drv_context(udata, struct qedr_ucontext,
736 struct qedr_user_mmap_entry *entry;
739 /* Aborting for non doorbell userqueue (SRQ) or non-supporting lib */
740 if (requires_db_rec == 0 || !uctx->db_rec)
743 /* Allocate a page for doorbell recovery, add to mmap */
744 q->db_rec_data = (void *)get_zeroed_page(GFP_USER);
745 if (!q->db_rec_data) {
746 DP_ERR(dev, "get_zeroed_page failed\n");
750 entry = kzalloc(sizeof(*entry), GFP_KERNEL);
752 goto err_free_db_data;
754 entry->address = q->db_rec_data;
755 entry->length = PAGE_SIZE;
756 entry->mmap_flag = QEDR_USER_MMAP_PHYS_PAGE;
757 rc = rdma_user_mmap_entry_insert(&uctx->ibucontext,
763 q->db_mmap_entry = &entry->rdma_entry;
771 free_page((unsigned long)q->db_rec_data);
772 q->db_rec_data = NULL;
776 static inline int qedr_init_user_queue(struct ib_udata *udata,
777 struct qedr_dev *dev,
778 struct qedr_userq *q, u64 buf_addr,
779 size_t buf_len, bool requires_db_rec,
786 q->buf_addr = buf_addr;
787 q->buf_len = buf_len;
788 q->umem = ib_umem_get(&dev->ibdev, q->buf_addr, q->buf_len, access);
789 if (IS_ERR(q->umem)) {
790 DP_ERR(dev, "create user queue: failed ib_umem_get, got %ld\n",
792 return PTR_ERR(q->umem);
795 fw_pages = ib_umem_page_count(q->umem) <<
796 (PAGE_SHIFT - FW_PAGE_SHIFT);
798 rc = qedr_prepare_pbl_tbl(dev, &q->pbl_info, fw_pages, 0);
802 if (alloc_and_init) {
803 q->pbl_tbl = qedr_alloc_pbl_tbl(dev, &q->pbl_info, GFP_KERNEL);
804 if (IS_ERR(q->pbl_tbl)) {
805 rc = PTR_ERR(q->pbl_tbl);
808 qedr_populate_pbls(dev, q->umem, q->pbl_tbl, &q->pbl_info,
811 q->pbl_tbl = kzalloc(sizeof(*q->pbl_tbl), GFP_KERNEL);
818 /* mmap the user address used to store doorbell data for recovery */
819 return qedr_init_user_db_rec(udata, dev, q, requires_db_rec);
822 ib_umem_release(q->umem);
828 static inline void qedr_init_cq_params(struct qedr_cq *cq,
829 struct qedr_ucontext *ctx,
830 struct qedr_dev *dev, int vector,
831 int chain_entries, int page_cnt,
833 struct qed_rdma_create_cq_in_params
836 memset(params, 0, sizeof(*params));
837 params->cq_handle_hi = upper_32_bits((uintptr_t)cq);
838 params->cq_handle_lo = lower_32_bits((uintptr_t)cq);
839 params->cnq_id = vector;
840 params->cq_size = chain_entries - 1;
841 params->dpi = (ctx) ? ctx->dpi : dev->dpi;
842 params->pbl_num_pages = page_cnt;
843 params->pbl_ptr = pbl_ptr;
844 params->pbl_two_level = 0;
847 static void doorbell_cq(struct qedr_cq *cq, u32 cons, u8 flags)
849 cq->db.data.agg_flags = flags;
850 cq->db.data.value = cpu_to_le32(cons);
851 writeq(cq->db.raw, cq->db_addr);
854 int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
856 struct qedr_cq *cq = get_qedr_cq(ibcq);
857 unsigned long sflags;
858 struct qedr_dev *dev;
860 dev = get_qedr_dev(ibcq->device);
864 "warning: arm was invoked after destroy for cq %p (icid=%d)\n",
870 if (cq->cq_type == QEDR_CQ_TYPE_GSI)
873 spin_lock_irqsave(&cq->cq_lock, sflags);
877 if (flags & IB_CQ_SOLICITED)
878 cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_SE_CF_CMD;
880 if (flags & IB_CQ_NEXT_COMP)
881 cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_CF_CMD;
883 doorbell_cq(cq, cq->cq_cons - 1, cq->arm_flags);
885 spin_unlock_irqrestore(&cq->cq_lock, sflags);
890 int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
891 struct ib_udata *udata)
893 struct ib_device *ibdev = ibcq->device;
894 struct qedr_ucontext *ctx = rdma_udata_to_drv_context(
895 udata, struct qedr_ucontext, ibucontext);
896 struct qed_rdma_destroy_cq_out_params destroy_oparams;
897 struct qed_rdma_destroy_cq_in_params destroy_iparams;
898 struct qed_chain_init_params chain_params = {
899 .mode = QED_CHAIN_MODE_PBL,
900 .intended_use = QED_CHAIN_USE_TO_CONSUME,
901 .cnt_type = QED_CHAIN_CNT_TYPE_U32,
902 .elem_size = sizeof(union rdma_cqe),
904 struct qedr_dev *dev = get_qedr_dev(ibdev);
905 struct qed_rdma_create_cq_in_params params;
906 struct qedr_create_cq_ureq ureq = {};
907 int vector = attr->comp_vector;
908 int entries = attr->cqe;
909 struct qedr_cq *cq = get_qedr_cq(ibcq);
917 DP_DEBUG(dev, QEDR_MSG_INIT,
918 "create_cq: called from %s. entries=%d, vector=%d\n",
919 udata ? "User Lib" : "Kernel", entries, vector);
921 if (entries > QEDR_MAX_CQES) {
923 "create cq: the number of entries %d is too high. Must be equal or below %d.\n",
924 entries, QEDR_MAX_CQES);
928 chain_entries = qedr_align_cq_entries(entries);
929 chain_entries = min_t(int, chain_entries, QEDR_MAX_CQES);
930 chain_params.num_elems = chain_entries;
932 /* calc db offset. user will add DPI base, kernel will add db addr */
933 db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
936 if (ib_copy_from_udata(&ureq, udata, min(sizeof(ureq),
939 "create cq: problem copying data from user space\n");
945 "create cq: cannot create a cq with 0 entries\n");
949 cq->cq_type = QEDR_CQ_TYPE_USER;
951 rc = qedr_init_user_queue(udata, dev, &cq->q, ureq.addr,
952 ureq.len, true, IB_ACCESS_LOCAL_WRITE,
957 pbl_ptr = cq->q.pbl_tbl->pa;
958 page_cnt = cq->q.pbl_info.num_pbes;
960 cq->ibcq.cqe = chain_entries;
961 cq->q.db_addr = ctx->dpi_addr + db_offset;
963 cq->cq_type = QEDR_CQ_TYPE_KERNEL;
965 rc = dev->ops->common->chain_alloc(dev->cdev, &cq->pbl,
970 page_cnt = qed_chain_get_page_cnt(&cq->pbl);
971 pbl_ptr = qed_chain_get_pbl_phys(&cq->pbl);
972 cq->ibcq.cqe = cq->pbl.capacity;
975 qedr_init_cq_params(cq, ctx, dev, vector, chain_entries, page_cnt,
978 rc = dev->ops->rdma_create_cq(dev->rdma_ctx, ¶ms, &icid);
983 cq->sig = QEDR_CQ_MAGIC_NUMBER;
984 spin_lock_init(&cq->cq_lock);
987 rc = qedr_copy_cq_uresp(dev, cq, udata, db_offset);
991 rc = qedr_db_recovery_add(dev, cq->q.db_addr,
992 &cq->q.db_rec_data->db_data,
999 /* Generate doorbell address. */
1000 cq->db.data.icid = cq->icid;
1001 cq->db_addr = dev->db_addr + db_offset;
1002 cq->db.data.params = DB_AGG_CMD_SET <<
1003 RDMA_PWM_VAL32_DATA_AGG_CMD_SHIFT;
1005 /* point to the very last element, passing it we will toggle */
1006 cq->toggle_cqe = qed_chain_get_last_elem(&cq->pbl);
1007 cq->pbl_toggle = RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK;
1008 cq->latest_cqe = NULL;
1010 cq->cq_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
1012 rc = qedr_db_recovery_add(dev, cq->db_addr, &cq->db.data,
1013 DB_REC_WIDTH_64B, DB_REC_KERNEL);
1018 DP_DEBUG(dev, QEDR_MSG_CQ,
1019 "create cq: icid=0x%0x, addr=%p, size(entries)=0x%0x\n",
1020 cq->icid, cq, params.cq_size);
1025 destroy_iparams.icid = cq->icid;
1026 dev->ops->rdma_destroy_cq(dev->rdma_ctx, &destroy_iparams,
1030 qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
1031 ib_umem_release(cq->q.umem);
1032 if (cq->q.db_mmap_entry)
1033 rdma_user_mmap_entry_remove(cq->q.db_mmap_entry);
1035 dev->ops->common->chain_free(dev->cdev, &cq->pbl);
1041 int qedr_resize_cq(struct ib_cq *ibcq, int new_cnt, struct ib_udata *udata)
1043 struct qedr_dev *dev = get_qedr_dev(ibcq->device);
1044 struct qedr_cq *cq = get_qedr_cq(ibcq);
1046 DP_ERR(dev, "cq %p RESIZE NOT SUPPORTED\n", cq);
1051 #define QEDR_DESTROY_CQ_MAX_ITERATIONS (10)
1052 #define QEDR_DESTROY_CQ_ITER_DURATION (10)
1054 void qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
1056 struct qedr_dev *dev = get_qedr_dev(ibcq->device);
1057 struct qed_rdma_destroy_cq_out_params oparams;
1058 struct qed_rdma_destroy_cq_in_params iparams;
1059 struct qedr_cq *cq = get_qedr_cq(ibcq);
1062 DP_DEBUG(dev, QEDR_MSG_CQ, "destroy cq %p (icid=%d)\n", cq, cq->icid);
1066 /* GSIs CQs are handled by driver, so they don't exist in the FW */
1067 if (cq->cq_type == QEDR_CQ_TYPE_GSI) {
1068 qedr_db_recovery_del(dev, cq->db_addr, &cq->db.data);
1072 iparams.icid = cq->icid;
1073 dev->ops->rdma_destroy_cq(dev->rdma_ctx, &iparams, &oparams);
1074 dev->ops->common->chain_free(dev->cdev, &cq->pbl);
1077 qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
1078 ib_umem_release(cq->q.umem);
1080 if (cq->q.db_rec_data) {
1081 qedr_db_recovery_del(dev, cq->q.db_addr,
1082 &cq->q.db_rec_data->db_data);
1083 rdma_user_mmap_entry_remove(cq->q.db_mmap_entry);
1086 qedr_db_recovery_del(dev, cq->db_addr, &cq->db.data);
1089 /* We don't want the IRQ handler to handle a non-existing CQ so we
1090 * wait until all CNQ interrupts, if any, are received. This will always
1091 * happen and will always happen very fast. If not, then a serious error
1092 * has occured. That is why we can use a long delay.
1093 * We spin for a short time so we don’t lose time on context switching
1094 * in case all the completions are handled in that span. Otherwise
1095 * we sleep for a while and check again. Since the CNQ may be
1096 * associated with (only) the current CPU we use msleep to allow the
1097 * current CPU to be freed.
1098 * The CNQ notification is increased in qedr_irq_handler().
1100 iter = QEDR_DESTROY_CQ_MAX_ITERATIONS;
1101 while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) {
1102 udelay(QEDR_DESTROY_CQ_ITER_DURATION);
1106 iter = QEDR_DESTROY_CQ_MAX_ITERATIONS;
1107 while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) {
1108 msleep(QEDR_DESTROY_CQ_ITER_DURATION);
1112 /* Note that we don't need to have explicit code to wait for the
1113 * completion of the event handler because it is invoked from the EQ.
1114 * Since the destroy CQ ramrod has also been received on the EQ we can
1115 * be certain that there's no event handler in process.
1119 static inline int get_gid_info_from_table(struct ib_qp *ibqp,
1120 struct ib_qp_attr *attr,
1122 struct qed_rdma_modify_qp_in_params
1125 const struct ib_gid_attr *gid_attr;
1126 enum rdma_network_type nw_type;
1127 const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
1132 gid_attr = grh->sgid_attr;
1133 ret = rdma_read_gid_l2_fields(gid_attr, &qp_params->vlan_id, NULL);
1137 nw_type = rdma_gid_attr_network_type(gid_attr);
1139 case RDMA_NETWORK_IPV6:
1140 memcpy(&qp_params->sgid.bytes[0], &gid_attr->gid.raw[0],
1141 sizeof(qp_params->sgid));
1142 memcpy(&qp_params->dgid.bytes[0],
1144 sizeof(qp_params->dgid));
1145 qp_params->roce_mode = ROCE_V2_IPV6;
1146 SET_FIELD(qp_params->modify_flags,
1147 QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
1149 case RDMA_NETWORK_IB:
1150 memcpy(&qp_params->sgid.bytes[0], &gid_attr->gid.raw[0],
1151 sizeof(qp_params->sgid));
1152 memcpy(&qp_params->dgid.bytes[0],
1154 sizeof(qp_params->dgid));
1155 qp_params->roce_mode = ROCE_V1;
1157 case RDMA_NETWORK_IPV4:
1158 memset(&qp_params->sgid, 0, sizeof(qp_params->sgid));
1159 memset(&qp_params->dgid, 0, sizeof(qp_params->dgid));
1160 ipv4_addr = qedr_get_ipv4_from_gid(gid_attr->gid.raw);
1161 qp_params->sgid.ipv4_addr = ipv4_addr;
1163 qedr_get_ipv4_from_gid(grh->dgid.raw);
1164 qp_params->dgid.ipv4_addr = ipv4_addr;
1165 SET_FIELD(qp_params->modify_flags,
1166 QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
1167 qp_params->roce_mode = ROCE_V2_IPV4;
1171 for (i = 0; i < 4; i++) {
1172 qp_params->sgid.dwords[i] = ntohl(qp_params->sgid.dwords[i]);
1173 qp_params->dgid.dwords[i] = ntohl(qp_params->dgid.dwords[i]);
1176 if (qp_params->vlan_id >= VLAN_CFI_MASK)
1177 qp_params->vlan_id = 0;
1182 static int qedr_check_qp_attrs(struct ib_pd *ibpd, struct qedr_dev *dev,
1183 struct ib_qp_init_attr *attrs,
1184 struct ib_udata *udata)
1186 struct qedr_device_attr *qattr = &dev->attr;
1188 /* QP0... attrs->qp_type == IB_QPT_GSI */
1189 if (attrs->qp_type != IB_QPT_RC && attrs->qp_type != IB_QPT_GSI) {
1190 DP_DEBUG(dev, QEDR_MSG_QP,
1191 "create qp: unsupported qp type=0x%x requested\n",
1196 if (attrs->cap.max_send_wr > qattr->max_sqe) {
1198 "create qp: cannot create a SQ with %d elements (max_send_wr=0x%x)\n",
1199 attrs->cap.max_send_wr, qattr->max_sqe);
1203 if (attrs->cap.max_inline_data > qattr->max_inline) {
1205 "create qp: unsupported inline data size=0x%x requested (max_inline=0x%x)\n",
1206 attrs->cap.max_inline_data, qattr->max_inline);
1210 if (attrs->cap.max_send_sge > qattr->max_sge) {
1212 "create qp: unsupported send_sge=0x%x requested (max_send_sge=0x%x)\n",
1213 attrs->cap.max_send_sge, qattr->max_sge);
1217 if (attrs->cap.max_recv_sge > qattr->max_sge) {
1219 "create qp: unsupported recv_sge=0x%x requested (max_recv_sge=0x%x)\n",
1220 attrs->cap.max_recv_sge, qattr->max_sge);
1224 /* Unprivileged user space cannot create special QP */
1225 if (udata && attrs->qp_type == IB_QPT_GSI) {
1227 "create qp: userspace can't create special QPs of type=0x%x\n",
1235 static int qedr_copy_srq_uresp(struct qedr_dev *dev,
1236 struct qedr_srq *srq, struct ib_udata *udata)
1238 struct qedr_create_srq_uresp uresp = {};
1241 uresp.srq_id = srq->srq_id;
1243 rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
1245 DP_ERR(dev, "create srq: problem copying data to user space\n");
1250 static void qedr_copy_rq_uresp(struct qedr_dev *dev,
1251 struct qedr_create_qp_uresp *uresp,
1254 /* iWARP requires two doorbells per RQ. */
1255 if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
1256 uresp->rq_db_offset =
1257 DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD);
1258 uresp->rq_db2_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_FLAGS);
1260 uresp->rq_db_offset =
1261 DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
1264 uresp->rq_icid = qp->icid;
1265 if (qp->urq.db_mmap_entry)
1266 uresp->rq_db_rec_addr =
1267 rdma_user_mmap_get_offset(qp->urq.db_mmap_entry);
1270 static void qedr_copy_sq_uresp(struct qedr_dev *dev,
1271 struct qedr_create_qp_uresp *uresp,
1274 uresp->sq_db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1276 /* iWARP uses the same cid for rq and sq */
1277 if (rdma_protocol_iwarp(&dev->ibdev, 1))
1278 uresp->sq_icid = qp->icid;
1280 uresp->sq_icid = qp->icid + 1;
1282 if (qp->usq.db_mmap_entry)
1283 uresp->sq_db_rec_addr =
1284 rdma_user_mmap_get_offset(qp->usq.db_mmap_entry);
1287 static int qedr_copy_qp_uresp(struct qedr_dev *dev,
1288 struct qedr_qp *qp, struct ib_udata *udata,
1289 struct qedr_create_qp_uresp *uresp)
1293 memset(uresp, 0, sizeof(*uresp));
1294 qedr_copy_sq_uresp(dev, uresp, qp);
1295 qedr_copy_rq_uresp(dev, uresp, qp);
1297 uresp->atomic_supported = dev->atomic_cap != IB_ATOMIC_NONE;
1298 uresp->qp_id = qp->qp_id;
1300 rc = qedr_ib_copy_to_udata(udata, uresp, sizeof(*uresp));
1303 "create qp: failed a copy to user space with qp icid=0x%x.\n",
1309 static void qedr_set_common_qp_params(struct qedr_dev *dev,
1312 struct ib_qp_init_attr *attrs)
1314 spin_lock_init(&qp->q_lock);
1315 if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
1316 kref_init(&qp->refcnt);
1317 init_completion(&qp->iwarp_cm_comp);
1320 qp->qp_type = attrs->qp_type;
1321 qp->max_inline_data = attrs->cap.max_inline_data;
1322 qp->sq.max_sges = attrs->cap.max_send_sge;
1323 qp->state = QED_ROCE_QP_STATE_RESET;
1324 qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false;
1325 qp->sq_cq = get_qedr_cq(attrs->send_cq);
1329 qp->srq = get_qedr_srq(attrs->srq);
1331 qp->rq_cq = get_qedr_cq(attrs->recv_cq);
1332 qp->rq.max_sges = attrs->cap.max_recv_sge;
1333 DP_DEBUG(dev, QEDR_MSG_QP,
1334 "RQ params:\trq_max_sges = %d, rq_cq_id = %d\n",
1335 qp->rq.max_sges, qp->rq_cq->icid);
1338 DP_DEBUG(dev, QEDR_MSG_QP,
1339 "QP params:\tpd = %d, qp_type = %d, max_inline_data = %d, state = %d, signaled = %d, use_srq=%d\n",
1340 pd->pd_id, qp->qp_type, qp->max_inline_data,
1341 qp->state, qp->signaled, (attrs->srq) ? 1 : 0);
1342 DP_DEBUG(dev, QEDR_MSG_QP,
1343 "SQ params:\tsq_max_sges = %d, sq_cq_id = %d\n",
1344 qp->sq.max_sges, qp->sq_cq->icid);
1347 static int qedr_set_roce_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
1351 qp->sq.db = dev->db_addr +
1352 DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1353 qp->sq.db_data.data.icid = qp->icid + 1;
1354 rc = qedr_db_recovery_add(dev, qp->sq.db,
1362 qp->rq.db = dev->db_addr +
1363 DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
1364 qp->rq.db_data.data.icid = qp->icid;
1366 rc = qedr_db_recovery_add(dev, qp->rq.db,
1371 qedr_db_recovery_del(dev, qp->sq.db,
1378 static int qedr_check_srq_params(struct qedr_dev *dev,
1379 struct ib_srq_init_attr *attrs,
1380 struct ib_udata *udata)
1382 struct qedr_device_attr *qattr = &dev->attr;
1384 if (attrs->attr.max_wr > qattr->max_srq_wr) {
1386 "create srq: unsupported srq_wr=0x%x requested (max_srq_wr=0x%x)\n",
1387 attrs->attr.max_wr, qattr->max_srq_wr);
1391 if (attrs->attr.max_sge > qattr->max_sge) {
1393 "create srq: unsupported sge=0x%x requested (max_srq_sge=0x%x)\n",
1394 attrs->attr.max_sge, qattr->max_sge);
1401 static void qedr_free_srq_user_params(struct qedr_srq *srq)
1403 qedr_free_pbl(srq->dev, &srq->usrq.pbl_info, srq->usrq.pbl_tbl);
1404 ib_umem_release(srq->usrq.umem);
1405 ib_umem_release(srq->prod_umem);
1408 static void qedr_free_srq_kernel_params(struct qedr_srq *srq)
1410 struct qedr_srq_hwq_info *hw_srq = &srq->hw_srq;
1411 struct qedr_dev *dev = srq->dev;
1413 dev->ops->common->chain_free(dev->cdev, &hw_srq->pbl);
1415 dma_free_coherent(&dev->pdev->dev, sizeof(struct rdma_srq_producers),
1416 hw_srq->virt_prod_pair_addr,
1417 hw_srq->phy_prod_pair_addr);
1420 static int qedr_init_srq_user_params(struct ib_udata *udata,
1421 struct qedr_srq *srq,
1422 struct qedr_create_srq_ureq *ureq,
1425 struct scatterlist *sg;
1428 rc = qedr_init_user_queue(udata, srq->dev, &srq->usrq, ureq->srq_addr,
1429 ureq->srq_len, false, access, 1);
1433 srq->prod_umem = ib_umem_get(srq->ibsrq.device, ureq->prod_pair_addr,
1434 sizeof(struct rdma_srq_producers), access);
1435 if (IS_ERR(srq->prod_umem)) {
1436 qedr_free_pbl(srq->dev, &srq->usrq.pbl_info, srq->usrq.pbl_tbl);
1437 ib_umem_release(srq->usrq.umem);
1439 "create srq: failed ib_umem_get for producer, got %ld\n",
1440 PTR_ERR(srq->prod_umem));
1441 return PTR_ERR(srq->prod_umem);
1444 sg = srq->prod_umem->sg_head.sgl;
1445 srq->hw_srq.phy_prod_pair_addr = sg_dma_address(sg);
1450 static int qedr_alloc_srq_kernel_params(struct qedr_srq *srq,
1451 struct qedr_dev *dev,
1452 struct ib_srq_init_attr *init_attr)
1454 struct qedr_srq_hwq_info *hw_srq = &srq->hw_srq;
1455 struct qed_chain_init_params params = {
1456 .mode = QED_CHAIN_MODE_PBL,
1457 .intended_use = QED_CHAIN_USE_TO_CONSUME_PRODUCE,
1458 .cnt_type = QED_CHAIN_CNT_TYPE_U32,
1459 .elem_size = QEDR_SRQ_WQE_ELEM_SIZE,
1461 dma_addr_t phy_prod_pair_addr;
1466 va = dma_alloc_coherent(&dev->pdev->dev,
1467 sizeof(struct rdma_srq_producers),
1468 &phy_prod_pair_addr, GFP_KERNEL);
1471 "create srq: failed to allocate dma memory for producer\n");
1475 hw_srq->phy_prod_pair_addr = phy_prod_pair_addr;
1476 hw_srq->virt_prod_pair_addr = va;
1478 num_elems = init_attr->attr.max_wr * RDMA_MAX_SRQ_WQE_SIZE;
1479 params.num_elems = num_elems;
1481 rc = dev->ops->common->chain_alloc(dev->cdev, &hw_srq->pbl, ¶ms);
1485 hw_srq->num_elems = num_elems;
1490 dma_free_coherent(&dev->pdev->dev, sizeof(struct rdma_srq_producers),
1491 va, phy_prod_pair_addr);
1495 int qedr_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr,
1496 struct ib_udata *udata)
1498 struct qed_rdma_destroy_srq_in_params destroy_in_params;
1499 struct qed_rdma_create_srq_in_params in_params = {};
1500 struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
1501 struct qed_rdma_create_srq_out_params out_params;
1502 struct qedr_pd *pd = get_qedr_pd(ibsrq->pd);
1503 struct qedr_create_srq_ureq ureq = {};
1504 u64 pbl_base_addr, phy_prod_pair_addr;
1505 struct qedr_srq_hwq_info *hw_srq;
1506 u32 page_cnt, page_size;
1507 struct qedr_srq *srq = get_qedr_srq(ibsrq);
1510 DP_DEBUG(dev, QEDR_MSG_QP,
1511 "create SRQ called from %s (pd %p)\n",
1512 (udata) ? "User lib" : "kernel", pd);
1514 rc = qedr_check_srq_params(dev, init_attr, udata);
1519 hw_srq = &srq->hw_srq;
1520 spin_lock_init(&srq->lock);
1522 hw_srq->max_wr = init_attr->attr.max_wr;
1523 hw_srq->max_sges = init_attr->attr.max_sge;
1526 if (ib_copy_from_udata(&ureq, udata, min(sizeof(ureq),
1529 "create srq: problem copying data from user space\n");
1533 rc = qedr_init_srq_user_params(udata, srq, &ureq, 0);
1537 page_cnt = srq->usrq.pbl_info.num_pbes;
1538 pbl_base_addr = srq->usrq.pbl_tbl->pa;
1539 phy_prod_pair_addr = hw_srq->phy_prod_pair_addr;
1540 page_size = PAGE_SIZE;
1542 struct qed_chain *pbl;
1544 rc = qedr_alloc_srq_kernel_params(srq, dev, init_attr);
1549 page_cnt = qed_chain_get_page_cnt(pbl);
1550 pbl_base_addr = qed_chain_get_pbl_phys(pbl);
1551 phy_prod_pair_addr = hw_srq->phy_prod_pair_addr;
1552 page_size = QED_CHAIN_PAGE_SIZE;
1555 in_params.pd_id = pd->pd_id;
1556 in_params.pbl_base_addr = pbl_base_addr;
1557 in_params.prod_pair_addr = phy_prod_pair_addr;
1558 in_params.num_pages = page_cnt;
1559 in_params.page_size = page_size;
1561 rc = dev->ops->rdma_create_srq(dev->rdma_ctx, &in_params, &out_params);
1565 srq->srq_id = out_params.srq_id;
1568 rc = qedr_copy_srq_uresp(dev, srq, udata);
1573 rc = xa_insert_irq(&dev->srqs, srq->srq_id, srq, GFP_KERNEL);
1577 DP_DEBUG(dev, QEDR_MSG_SRQ,
1578 "create srq: created srq with srq_id=0x%0x\n", srq->srq_id);
1582 destroy_in_params.srq_id = srq->srq_id;
1584 dev->ops->rdma_destroy_srq(dev->rdma_ctx, &destroy_in_params);
1587 qedr_free_srq_user_params(srq);
1589 qedr_free_srq_kernel_params(srq);
1594 void qedr_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
1596 struct qed_rdma_destroy_srq_in_params in_params = {};
1597 struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
1598 struct qedr_srq *srq = get_qedr_srq(ibsrq);
1600 xa_erase_irq(&dev->srqs, srq->srq_id);
1601 in_params.srq_id = srq->srq_id;
1602 dev->ops->rdma_destroy_srq(dev->rdma_ctx, &in_params);
1605 qedr_free_srq_user_params(srq);
1607 qedr_free_srq_kernel_params(srq);
1609 DP_DEBUG(dev, QEDR_MSG_SRQ,
1610 "destroy srq: destroyed srq with srq_id=0x%0x\n",
1614 int qedr_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
1615 enum ib_srq_attr_mask attr_mask, struct ib_udata *udata)
1617 struct qed_rdma_modify_srq_in_params in_params = {};
1618 struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
1619 struct qedr_srq *srq = get_qedr_srq(ibsrq);
1622 if (attr_mask & IB_SRQ_MAX_WR) {
1624 "modify srq: invalid attribute mask=0x%x specified for %p\n",
1629 if (attr_mask & IB_SRQ_LIMIT) {
1630 if (attr->srq_limit >= srq->hw_srq.max_wr) {
1632 "modify srq: invalid srq_limit=0x%x (max_srq_limit=0x%x)\n",
1633 attr->srq_limit, srq->hw_srq.max_wr);
1637 in_params.srq_id = srq->srq_id;
1638 in_params.wqe_limit = attr->srq_limit;
1639 rc = dev->ops->rdma_modify_srq(dev->rdma_ctx, &in_params);
1644 srq->srq_limit = attr->srq_limit;
1646 DP_DEBUG(dev, QEDR_MSG_SRQ,
1647 "modify srq: modified srq with srq_id=0x%0x\n", srq->srq_id);
1653 qedr_init_common_qp_in_params(struct qedr_dev *dev,
1656 struct ib_qp_init_attr *attrs,
1657 bool fmr_and_reserved_lkey,
1658 struct qed_rdma_create_qp_in_params *params)
1660 /* QP handle to be written in an async event */
1661 params->qp_handle_async_lo = lower_32_bits((uintptr_t) qp);
1662 params->qp_handle_async_hi = upper_32_bits((uintptr_t) qp);
1664 params->signal_all = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR);
1665 params->fmr_and_reserved_lkey = fmr_and_reserved_lkey;
1666 params->pd = pd->pd_id;
1667 params->dpi = pd->uctx ? pd->uctx->dpi : dev->dpi;
1668 params->sq_cq_id = get_qedr_cq(attrs->send_cq)->icid;
1669 params->stats_queue = 0;
1671 params->use_srq = false;
1674 params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid;
1677 params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid;
1678 params->srq_id = qp->srq->srq_id;
1679 params->use_srq = true;
1683 static inline void qedr_qp_user_print(struct qedr_dev *dev, struct qedr_qp *qp)
1685 DP_DEBUG(dev, QEDR_MSG_QP, "create qp: successfully created user QP. "
1694 qp->usq.buf_len, qp->urq.buf_addr, qp->urq.buf_len);
1698 qedr_iwarp_populate_user_qp(struct qedr_dev *dev,
1700 struct qed_rdma_create_qp_out_params *out_params)
1702 qp->usq.pbl_tbl->va = out_params->sq_pbl_virt;
1703 qp->usq.pbl_tbl->pa = out_params->sq_pbl_phys;
1705 qedr_populate_pbls(dev, qp->usq.umem, qp->usq.pbl_tbl,
1706 &qp->usq.pbl_info, FW_PAGE_SHIFT);
1708 qp->urq.pbl_tbl->va = out_params->rq_pbl_virt;
1709 qp->urq.pbl_tbl->pa = out_params->rq_pbl_phys;
1712 qedr_populate_pbls(dev, qp->urq.umem, qp->urq.pbl_tbl,
1713 &qp->urq.pbl_info, FW_PAGE_SHIFT);
1716 static void qedr_cleanup_user(struct qedr_dev *dev,
1717 struct qedr_ucontext *ctx,
1720 ib_umem_release(qp->usq.umem);
1721 qp->usq.umem = NULL;
1723 ib_umem_release(qp->urq.umem);
1724 qp->urq.umem = NULL;
1726 if (rdma_protocol_roce(&dev->ibdev, 1)) {
1727 qedr_free_pbl(dev, &qp->usq.pbl_info, qp->usq.pbl_tbl);
1728 qedr_free_pbl(dev, &qp->urq.pbl_info, qp->urq.pbl_tbl);
1730 kfree(qp->usq.pbl_tbl);
1731 kfree(qp->urq.pbl_tbl);
1734 if (qp->usq.db_rec_data) {
1735 qedr_db_recovery_del(dev, qp->usq.db_addr,
1736 &qp->usq.db_rec_data->db_data);
1737 rdma_user_mmap_entry_remove(qp->usq.db_mmap_entry);
1740 if (qp->urq.db_rec_data) {
1741 qedr_db_recovery_del(dev, qp->urq.db_addr,
1742 &qp->urq.db_rec_data->db_data);
1743 rdma_user_mmap_entry_remove(qp->urq.db_mmap_entry);
1746 if (rdma_protocol_iwarp(&dev->ibdev, 1))
1747 qedr_db_recovery_del(dev, qp->urq.db_rec_db2_addr,
1748 &qp->urq.db_rec_db2_data);
1751 static int qedr_create_user_qp(struct qedr_dev *dev,
1754 struct ib_udata *udata,
1755 struct ib_qp_init_attr *attrs)
1757 struct qed_rdma_create_qp_in_params in_params;
1758 struct qed_rdma_create_qp_out_params out_params;
1759 struct qedr_pd *pd = get_qedr_pd(ibpd);
1760 struct qedr_create_qp_uresp uresp;
1761 struct qedr_ucontext *ctx = pd ? pd->uctx : NULL;
1762 struct qedr_create_qp_ureq ureq;
1763 int alloc_and_init = rdma_protocol_roce(&dev->ibdev, 1);
1766 qp->create_type = QEDR_QP_CREATE_USER;
1767 memset(&ureq, 0, sizeof(ureq));
1768 rc = ib_copy_from_udata(&ureq, udata, min(sizeof(ureq), udata->inlen));
1770 DP_ERR(dev, "Problem copying data from user space\n");
1774 /* SQ - read access only (0) */
1775 rc = qedr_init_user_queue(udata, dev, &qp->usq, ureq.sq_addr,
1776 ureq.sq_len, true, 0, alloc_and_init);
1781 /* RQ - read access only (0) */
1782 rc = qedr_init_user_queue(udata, dev, &qp->urq, ureq.rq_addr,
1783 ureq.rq_len, true, 0, alloc_and_init);
1788 memset(&in_params, 0, sizeof(in_params));
1789 qedr_init_common_qp_in_params(dev, pd, qp, attrs, false, &in_params);
1790 in_params.qp_handle_lo = ureq.qp_handle_lo;
1791 in_params.qp_handle_hi = ureq.qp_handle_hi;
1792 in_params.sq_num_pages = qp->usq.pbl_info.num_pbes;
1793 in_params.sq_pbl_ptr = qp->usq.pbl_tbl->pa;
1795 in_params.rq_num_pages = qp->urq.pbl_info.num_pbes;
1796 in_params.rq_pbl_ptr = qp->urq.pbl_tbl->pa;
1800 SET_FIELD(in_params.flags, QED_ROCE_EDPM_MODE, ctx->edpm_mode);
1802 qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
1803 &in_params, &out_params);
1810 if (rdma_protocol_iwarp(&dev->ibdev, 1))
1811 qedr_iwarp_populate_user_qp(dev, qp, &out_params);
1813 qp->qp_id = out_params.qp_id;
1814 qp->icid = out_params.icid;
1816 rc = qedr_copy_qp_uresp(dev, qp, udata, &uresp);
1820 /* db offset was calculated in copy_qp_uresp, now set in the user q */
1822 qp->usq.db_addr = ctx->dpi_addr + uresp.sq_db_offset;
1823 qp->urq.db_addr = ctx->dpi_addr + uresp.rq_db_offset;
1825 if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
1826 qp->urq.db_rec_db2_addr = ctx->dpi_addr + uresp.rq_db2_offset;
1828 /* calculate the db_rec_db2 data since it is constant so no
1829 * need to reflect from user
1831 qp->urq.db_rec_db2_data.data.icid = cpu_to_le16(qp->icid);
1832 qp->urq.db_rec_db2_data.data.value =
1833 cpu_to_le16(DQ_TCM_IWARP_POST_RQ_CF_CMD);
1836 rc = qedr_db_recovery_add(dev, qp->usq.db_addr,
1837 &qp->usq.db_rec_data->db_data,
1843 rc = qedr_db_recovery_add(dev, qp->urq.db_addr,
1844 &qp->urq.db_rec_data->db_data,
1850 if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
1851 rc = qedr_db_recovery_add(dev, qp->urq.db_rec_db2_addr,
1852 &qp->urq.db_rec_db2_data,
1858 qedr_qp_user_print(dev, qp);
1862 rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
1864 DP_ERR(dev, "create qp: fatal fault. rc=%d", rc);
1867 qedr_cleanup_user(dev, ctx, qp);
1871 static int qedr_set_iwarp_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
1875 qp->sq.db = dev->db_addr +
1876 DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1877 qp->sq.db_data.data.icid = qp->icid;
1879 rc = qedr_db_recovery_add(dev, qp->sq.db,
1886 qp->rq.db = dev->db_addr +
1887 DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD);
1888 qp->rq.db_data.data.icid = qp->icid;
1889 qp->rq.iwarp_db2 = dev->db_addr +
1890 DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_FLAGS);
1891 qp->rq.iwarp_db2_data.data.icid = qp->icid;
1892 qp->rq.iwarp_db2_data.data.value = DQ_TCM_IWARP_POST_RQ_CF_CMD;
1894 rc = qedr_db_recovery_add(dev, qp->rq.db,
1901 rc = qedr_db_recovery_add(dev, qp->rq.iwarp_db2,
1902 &qp->rq.iwarp_db2_data,
1909 qedr_roce_create_kernel_qp(struct qedr_dev *dev,
1911 struct qed_rdma_create_qp_in_params *in_params,
1912 u32 n_sq_elems, u32 n_rq_elems)
1914 struct qed_rdma_create_qp_out_params out_params;
1915 struct qed_chain_init_params params = {
1916 .mode = QED_CHAIN_MODE_PBL,
1917 .cnt_type = QED_CHAIN_CNT_TYPE_U32,
1921 params.intended_use = QED_CHAIN_USE_TO_PRODUCE;
1922 params.num_elems = n_sq_elems;
1923 params.elem_size = QEDR_SQE_ELEMENT_SIZE;
1925 rc = dev->ops->common->chain_alloc(dev->cdev, &qp->sq.pbl, ¶ms);
1929 in_params->sq_num_pages = qed_chain_get_page_cnt(&qp->sq.pbl);
1930 in_params->sq_pbl_ptr = qed_chain_get_pbl_phys(&qp->sq.pbl);
1932 params.intended_use = QED_CHAIN_USE_TO_CONSUME_PRODUCE;
1933 params.num_elems = n_rq_elems;
1934 params.elem_size = QEDR_RQE_ELEMENT_SIZE;
1936 rc = dev->ops->common->chain_alloc(dev->cdev, &qp->rq.pbl, ¶ms);
1940 in_params->rq_num_pages = qed_chain_get_page_cnt(&qp->rq.pbl);
1941 in_params->rq_pbl_ptr = qed_chain_get_pbl_phys(&qp->rq.pbl);
1943 qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
1944 in_params, &out_params);
1949 qp->qp_id = out_params.qp_id;
1950 qp->icid = out_params.icid;
1952 return qedr_set_roce_db_info(dev, qp);
1956 qedr_iwarp_create_kernel_qp(struct qedr_dev *dev,
1958 struct qed_rdma_create_qp_in_params *in_params,
1959 u32 n_sq_elems, u32 n_rq_elems)
1961 struct qed_rdma_create_qp_out_params out_params;
1962 struct qed_chain_init_params params = {
1963 .mode = QED_CHAIN_MODE_PBL,
1964 .cnt_type = QED_CHAIN_CNT_TYPE_U32,
1968 in_params->sq_num_pages = QED_CHAIN_PAGE_CNT(n_sq_elems,
1969 QEDR_SQE_ELEMENT_SIZE,
1970 QED_CHAIN_PAGE_SIZE,
1971 QED_CHAIN_MODE_PBL);
1972 in_params->rq_num_pages = QED_CHAIN_PAGE_CNT(n_rq_elems,
1973 QEDR_RQE_ELEMENT_SIZE,
1974 QED_CHAIN_PAGE_SIZE,
1975 QED_CHAIN_MODE_PBL);
1977 qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
1978 in_params, &out_params);
1983 /* Now we allocate the chain */
1985 params.intended_use = QED_CHAIN_USE_TO_PRODUCE;
1986 params.num_elems = n_sq_elems;
1987 params.elem_size = QEDR_SQE_ELEMENT_SIZE;
1988 params.ext_pbl_virt = out_params.sq_pbl_virt;
1989 params.ext_pbl_phys = out_params.sq_pbl_phys;
1991 rc = dev->ops->common->chain_alloc(dev->cdev, &qp->sq.pbl, ¶ms);
1995 params.intended_use = QED_CHAIN_USE_TO_CONSUME_PRODUCE;
1996 params.num_elems = n_rq_elems;
1997 params.elem_size = QEDR_RQE_ELEMENT_SIZE;
1998 params.ext_pbl_virt = out_params.rq_pbl_virt;
1999 params.ext_pbl_phys = out_params.rq_pbl_phys;
2001 rc = dev->ops->common->chain_alloc(dev->cdev, &qp->rq.pbl, ¶ms);
2005 qp->qp_id = out_params.qp_id;
2006 qp->icid = out_params.icid;
2008 return qedr_set_iwarp_db_info(dev, qp);
2011 dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
2016 static void qedr_cleanup_kernel(struct qedr_dev *dev, struct qedr_qp *qp)
2018 dev->ops->common->chain_free(dev->cdev, &qp->sq.pbl);
2019 kfree(qp->wqe_wr_id);
2021 dev->ops->common->chain_free(dev->cdev, &qp->rq.pbl);
2022 kfree(qp->rqe_wr_id);
2024 /* GSI qp is not registered to db mechanism so no need to delete */
2025 if (qp->qp_type == IB_QPT_GSI)
2028 qedr_db_recovery_del(dev, qp->sq.db, &qp->sq.db_data);
2031 qedr_db_recovery_del(dev, qp->rq.db, &qp->rq.db_data);
2033 if (rdma_protocol_iwarp(&dev->ibdev, 1))
2034 qedr_db_recovery_del(dev, qp->rq.iwarp_db2,
2035 &qp->rq.iwarp_db2_data);
2039 static int qedr_create_kernel_qp(struct qedr_dev *dev,
2042 struct ib_qp_init_attr *attrs)
2044 struct qed_rdma_create_qp_in_params in_params;
2045 struct qedr_pd *pd = get_qedr_pd(ibpd);
2051 memset(&in_params, 0, sizeof(in_params));
2052 qp->create_type = QEDR_QP_CREATE_KERNEL;
2054 /* A single work request may take up to QEDR_MAX_SQ_WQE_SIZE elements in
2055 * the ring. The ring should allow at least a single WR, even if the
2056 * user requested none, due to allocation issues.
2057 * We should add an extra WR since the prod and cons indices of
2058 * wqe_wr_id are managed in such a way that the WQ is considered full
2059 * when (prod+1)%max_wr==cons. We currently don't do that because we
2060 * double the number of entries due an iSER issue that pushes far more
2061 * WRs than indicated. If we decline its ib_post_send() then we get
2062 * error prints in the dmesg we'd like to avoid.
2064 qp->sq.max_wr = min_t(u32, attrs->cap.max_send_wr * dev->wq_multiplier,
2067 qp->wqe_wr_id = kcalloc(qp->sq.max_wr, sizeof(*qp->wqe_wr_id),
2069 if (!qp->wqe_wr_id) {
2070 DP_ERR(dev, "create qp: failed SQ shadow memory allocation\n");
2074 /* QP handle to be written in CQE */
2075 in_params.qp_handle_lo = lower_32_bits((uintptr_t) qp);
2076 in_params.qp_handle_hi = upper_32_bits((uintptr_t) qp);
2078 /* A single work request may take up to QEDR_MAX_RQ_WQE_SIZE elements in
2079 * the ring. There ring should allow at least a single WR, even if the
2080 * user requested none, due to allocation issues.
2082 qp->rq.max_wr = (u16) max_t(u32, attrs->cap.max_recv_wr, 1);
2084 /* Allocate driver internal RQ array */
2085 qp->rqe_wr_id = kcalloc(qp->rq.max_wr, sizeof(*qp->rqe_wr_id),
2087 if (!qp->rqe_wr_id) {
2089 "create qp: failed RQ shadow memory allocation\n");
2090 kfree(qp->wqe_wr_id);
2094 qedr_init_common_qp_in_params(dev, pd, qp, attrs, true, &in_params);
2096 n_sq_entries = attrs->cap.max_send_wr;
2097 n_sq_entries = min_t(u32, n_sq_entries, dev->attr.max_sqe);
2098 n_sq_entries = max_t(u32, n_sq_entries, 1);
2099 n_sq_elems = n_sq_entries * QEDR_MAX_SQE_ELEMENTS_PER_SQE;
2101 n_rq_elems = qp->rq.max_wr * QEDR_MAX_RQE_ELEMENTS_PER_RQE;
2103 if (rdma_protocol_iwarp(&dev->ibdev, 1))
2104 rc = qedr_iwarp_create_kernel_qp(dev, qp, &in_params,
2105 n_sq_elems, n_rq_elems);
2107 rc = qedr_roce_create_kernel_qp(dev, qp, &in_params,
2108 n_sq_elems, n_rq_elems);
2110 qedr_cleanup_kernel(dev, qp);
2115 struct ib_qp *qedr_create_qp(struct ib_pd *ibpd,
2116 struct ib_qp_init_attr *attrs,
2117 struct ib_udata *udata)
2119 struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2120 struct qedr_pd *pd = get_qedr_pd(ibpd);
2125 DP_DEBUG(dev, QEDR_MSG_QP, "create qp: called from %s, pd=%p\n",
2126 udata ? "user library" : "kernel", pd);
2128 rc = qedr_check_qp_attrs(ibpd, dev, attrs, udata);
2132 DP_DEBUG(dev, QEDR_MSG_QP,
2133 "create qp: called from %s, event_handler=%p, eepd=%p sq_cq=%p, sq_icid=%d, rq_cq=%p, rq_icid=%d\n",
2134 udata ? "user library" : "kernel", attrs->event_handler, pd,
2135 get_qedr_cq(attrs->send_cq),
2136 get_qedr_cq(attrs->send_cq)->icid,
2137 get_qedr_cq(attrs->recv_cq),
2138 attrs->recv_cq ? get_qedr_cq(attrs->recv_cq)->icid : 0);
2140 qp = kzalloc(sizeof(*qp), GFP_KERNEL);
2142 DP_ERR(dev, "create qp: failed allocating memory\n");
2143 return ERR_PTR(-ENOMEM);
2146 qedr_set_common_qp_params(dev, qp, pd, attrs);
2148 if (attrs->qp_type == IB_QPT_GSI) {
2149 ibqp = qedr_create_gsi_qp(dev, attrs, qp);
2156 rc = qedr_create_user_qp(dev, qp, ibpd, udata, attrs);
2158 rc = qedr_create_kernel_qp(dev, qp, ibpd, attrs);
2163 qp->ibqp.qp_num = qp->qp_id;
2165 if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
2166 rc = xa_insert(&dev->qps, qp->qp_id, qp, GFP_KERNEL);
2176 return ERR_PTR(-EFAULT);
2179 static enum ib_qp_state qedr_get_ibqp_state(enum qed_roce_qp_state qp_state)
2182 case QED_ROCE_QP_STATE_RESET:
2183 return IB_QPS_RESET;
2184 case QED_ROCE_QP_STATE_INIT:
2186 case QED_ROCE_QP_STATE_RTR:
2188 case QED_ROCE_QP_STATE_RTS:
2190 case QED_ROCE_QP_STATE_SQD:
2192 case QED_ROCE_QP_STATE_ERR:
2194 case QED_ROCE_QP_STATE_SQE:
2200 static enum qed_roce_qp_state qedr_get_state_from_ibqp(
2201 enum ib_qp_state qp_state)
2205 return QED_ROCE_QP_STATE_RESET;
2207 return QED_ROCE_QP_STATE_INIT;
2209 return QED_ROCE_QP_STATE_RTR;
2211 return QED_ROCE_QP_STATE_RTS;
2213 return QED_ROCE_QP_STATE_SQD;
2215 return QED_ROCE_QP_STATE_ERR;
2217 return QED_ROCE_QP_STATE_ERR;
2221 static void qedr_reset_qp_hwq_info(struct qedr_qp_hwq_info *qph)
2223 qed_chain_reset(&qph->pbl);
2227 qph->db_data.data.value = cpu_to_le16(0);
2230 static int qedr_update_qp_state(struct qedr_dev *dev,
2232 enum qed_roce_qp_state cur_state,
2233 enum qed_roce_qp_state new_state)
2237 if (new_state == cur_state)
2240 switch (cur_state) {
2241 case QED_ROCE_QP_STATE_RESET:
2242 switch (new_state) {
2243 case QED_ROCE_QP_STATE_INIT:
2244 qp->prev_wqe_size = 0;
2245 qedr_reset_qp_hwq_info(&qp->sq);
2246 qedr_reset_qp_hwq_info(&qp->rq);
2253 case QED_ROCE_QP_STATE_INIT:
2254 switch (new_state) {
2255 case QED_ROCE_QP_STATE_RTR:
2256 /* Update doorbell (in case post_recv was
2257 * done before move to RTR)
2260 if (rdma_protocol_roce(&dev->ibdev, 1)) {
2261 writel(qp->rq.db_data.raw, qp->rq.db);
2264 case QED_ROCE_QP_STATE_ERR:
2267 /* Invalid state change. */
2272 case QED_ROCE_QP_STATE_RTR:
2274 switch (new_state) {
2275 case QED_ROCE_QP_STATE_RTS:
2277 case QED_ROCE_QP_STATE_ERR:
2280 /* Invalid state change. */
2285 case QED_ROCE_QP_STATE_RTS:
2287 switch (new_state) {
2288 case QED_ROCE_QP_STATE_SQD:
2290 case QED_ROCE_QP_STATE_ERR:
2293 /* Invalid state change. */
2298 case QED_ROCE_QP_STATE_SQD:
2300 switch (new_state) {
2301 case QED_ROCE_QP_STATE_RTS:
2302 case QED_ROCE_QP_STATE_ERR:
2305 /* Invalid state change. */
2310 case QED_ROCE_QP_STATE_ERR:
2312 switch (new_state) {
2313 case QED_ROCE_QP_STATE_RESET:
2314 if ((qp->rq.prod != qp->rq.cons) ||
2315 (qp->sq.prod != qp->sq.cons)) {
2317 "Error->Reset with rq/sq not empty rq.prod=%x rq.cons=%x sq.prod=%x sq.cons=%x\n",
2318 qp->rq.prod, qp->rq.cons, qp->sq.prod,
2336 int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
2337 int attr_mask, struct ib_udata *udata)
2339 struct qedr_qp *qp = get_qedr_qp(ibqp);
2340 struct qed_rdma_modify_qp_in_params qp_params = { 0 };
2341 struct qedr_dev *dev = get_qedr_dev(&qp->dev->ibdev);
2342 const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
2343 enum ib_qp_state old_qp_state, new_qp_state;
2344 enum qed_roce_qp_state cur_state;
2347 DP_DEBUG(dev, QEDR_MSG_QP,
2348 "modify qp: qp %p attr_mask=0x%x, state=%d", qp, attr_mask,
2351 old_qp_state = qedr_get_ibqp_state(qp->state);
2352 if (attr_mask & IB_QP_STATE)
2353 new_qp_state = attr->qp_state;
2355 new_qp_state = old_qp_state;
2357 if (rdma_protocol_roce(&dev->ibdev, 1)) {
2358 if (!ib_modify_qp_is_ok(old_qp_state, new_qp_state,
2359 ibqp->qp_type, attr_mask)) {
2361 "modify qp: invalid attribute mask=0x%x specified for\n"
2362 "qpn=0x%x of type=0x%x old_qp_state=0x%x, new_qp_state=0x%x\n",
2363 attr_mask, qp->qp_id, ibqp->qp_type,
2364 old_qp_state, new_qp_state);
2370 /* Translate the masks... */
2371 if (attr_mask & IB_QP_STATE) {
2372 SET_FIELD(qp_params.modify_flags,
2373 QED_RDMA_MODIFY_QP_VALID_NEW_STATE, 1);
2374 qp_params.new_state = qedr_get_state_from_ibqp(attr->qp_state);
2377 if (attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY)
2378 qp_params.sqd_async = true;
2380 if (attr_mask & IB_QP_PKEY_INDEX) {
2381 SET_FIELD(qp_params.modify_flags,
2382 QED_ROCE_MODIFY_QP_VALID_PKEY, 1);
2383 if (attr->pkey_index >= QEDR_ROCE_PKEY_TABLE_LEN) {
2388 qp_params.pkey = QEDR_ROCE_PKEY_DEFAULT;
2391 if (attr_mask & IB_QP_QKEY)
2392 qp->qkey = attr->qkey;
2394 if (attr_mask & IB_QP_ACCESS_FLAGS) {
2395 SET_FIELD(qp_params.modify_flags,
2396 QED_RDMA_MODIFY_QP_VALID_RDMA_OPS_EN, 1);
2397 qp_params.incoming_rdma_read_en = attr->qp_access_flags &
2398 IB_ACCESS_REMOTE_READ;
2399 qp_params.incoming_rdma_write_en = attr->qp_access_flags &
2400 IB_ACCESS_REMOTE_WRITE;
2401 qp_params.incoming_atomic_en = attr->qp_access_flags &
2402 IB_ACCESS_REMOTE_ATOMIC;
2405 if (attr_mask & (IB_QP_AV | IB_QP_PATH_MTU)) {
2406 if (rdma_protocol_iwarp(&dev->ibdev, 1))
2409 if (attr_mask & IB_QP_PATH_MTU) {
2410 if (attr->path_mtu < IB_MTU_256 ||
2411 attr->path_mtu > IB_MTU_4096) {
2412 pr_err("error: Only MTU sizes of 256, 512, 1024, 2048 and 4096 are supported by RoCE\n");
2416 qp->mtu = min(ib_mtu_enum_to_int(attr->path_mtu),
2417 ib_mtu_enum_to_int(iboe_get_mtu
2423 ib_mtu_enum_to_int(iboe_get_mtu(dev->ndev->mtu));
2424 pr_err("Fixing zeroed MTU to qp->mtu = %d\n", qp->mtu);
2427 SET_FIELD(qp_params.modify_flags,
2428 QED_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR, 1);
2430 qp_params.traffic_class_tos = grh->traffic_class;
2431 qp_params.flow_label = grh->flow_label;
2432 qp_params.hop_limit_ttl = grh->hop_limit;
2434 qp->sgid_idx = grh->sgid_index;
2436 rc = get_gid_info_from_table(ibqp, attr, attr_mask, &qp_params);
2439 "modify qp: problems with GID index %d (rc=%d)\n",
2440 grh->sgid_index, rc);
2444 rc = qedr_get_dmac(dev, &attr->ah_attr,
2445 qp_params.remote_mac_addr);
2449 qp_params.use_local_mac = true;
2450 ether_addr_copy(qp_params.local_mac_addr, dev->ndev->dev_addr);
2452 DP_DEBUG(dev, QEDR_MSG_QP, "dgid=%x:%x:%x:%x\n",
2453 qp_params.dgid.dwords[0], qp_params.dgid.dwords[1],
2454 qp_params.dgid.dwords[2], qp_params.dgid.dwords[3]);
2455 DP_DEBUG(dev, QEDR_MSG_QP, "sgid=%x:%x:%x:%x\n",
2456 qp_params.sgid.dwords[0], qp_params.sgid.dwords[1],
2457 qp_params.sgid.dwords[2], qp_params.sgid.dwords[3]);
2458 DP_DEBUG(dev, QEDR_MSG_QP, "remote_mac=[%pM]\n",
2459 qp_params.remote_mac_addr);
2461 qp_params.mtu = qp->mtu;
2462 qp_params.lb_indication = false;
2465 if (!qp_params.mtu) {
2466 /* Stay with current MTU */
2468 qp_params.mtu = qp->mtu;
2471 ib_mtu_enum_to_int(iboe_get_mtu(dev->ndev->mtu));
2474 if (attr_mask & IB_QP_TIMEOUT) {
2475 SET_FIELD(qp_params.modify_flags,
2476 QED_ROCE_MODIFY_QP_VALID_ACK_TIMEOUT, 1);
2478 /* The received timeout value is an exponent used like this:
2479 * "12.7.34 LOCAL ACK TIMEOUT
2480 * Value representing the transport (ACK) timeout for use by
2481 * the remote, expressed as: 4.096 * 2^timeout [usec]"
2482 * The FW expects timeout in msec so we need to divide the usec
2483 * result by 1000. We'll approximate 1000~2^10, and 4.096 ~ 2^2,
2484 * so we get: 2^2 * 2^timeout / 2^10 = 2^(timeout - 8).
2485 * The value of zero means infinite so we use a 'max_t' to make
2486 * sure that sub 1 msec values will be configured as 1 msec.
2489 qp_params.ack_timeout =
2490 1 << max_t(int, attr->timeout - 8, 0);
2492 qp_params.ack_timeout = 0;
2495 if (attr_mask & IB_QP_RETRY_CNT) {
2496 SET_FIELD(qp_params.modify_flags,
2497 QED_ROCE_MODIFY_QP_VALID_RETRY_CNT, 1);
2498 qp_params.retry_cnt = attr->retry_cnt;
2501 if (attr_mask & IB_QP_RNR_RETRY) {
2502 SET_FIELD(qp_params.modify_flags,
2503 QED_ROCE_MODIFY_QP_VALID_RNR_RETRY_CNT, 1);
2504 qp_params.rnr_retry_cnt = attr->rnr_retry;
2507 if (attr_mask & IB_QP_RQ_PSN) {
2508 SET_FIELD(qp_params.modify_flags,
2509 QED_ROCE_MODIFY_QP_VALID_RQ_PSN, 1);
2510 qp_params.rq_psn = attr->rq_psn;
2511 qp->rq_psn = attr->rq_psn;
2514 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
2515 if (attr->max_rd_atomic > dev->attr.max_qp_req_rd_atomic_resc) {
2518 "unsupported max_rd_atomic=%d, supported=%d\n",
2519 attr->max_rd_atomic,
2520 dev->attr.max_qp_req_rd_atomic_resc);
2524 SET_FIELD(qp_params.modify_flags,
2525 QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_REQ, 1);
2526 qp_params.max_rd_atomic_req = attr->max_rd_atomic;
2529 if (attr_mask & IB_QP_MIN_RNR_TIMER) {
2530 SET_FIELD(qp_params.modify_flags,
2531 QED_ROCE_MODIFY_QP_VALID_MIN_RNR_NAK_TIMER, 1);
2532 qp_params.min_rnr_nak_timer = attr->min_rnr_timer;
2535 if (attr_mask & IB_QP_SQ_PSN) {
2536 SET_FIELD(qp_params.modify_flags,
2537 QED_ROCE_MODIFY_QP_VALID_SQ_PSN, 1);
2538 qp_params.sq_psn = attr->sq_psn;
2539 qp->sq_psn = attr->sq_psn;
2542 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
2543 if (attr->max_dest_rd_atomic >
2544 dev->attr.max_qp_resp_rd_atomic_resc) {
2546 "unsupported max_dest_rd_atomic=%d, supported=%d\n",
2547 attr->max_dest_rd_atomic,
2548 dev->attr.max_qp_resp_rd_atomic_resc);
2554 SET_FIELD(qp_params.modify_flags,
2555 QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_RESP, 1);
2556 qp_params.max_rd_atomic_resp = attr->max_dest_rd_atomic;
2559 if (attr_mask & IB_QP_DEST_QPN) {
2560 SET_FIELD(qp_params.modify_flags,
2561 QED_ROCE_MODIFY_QP_VALID_DEST_QP, 1);
2563 qp_params.dest_qp = attr->dest_qp_num;
2564 qp->dest_qp_num = attr->dest_qp_num;
2567 cur_state = qp->state;
2569 /* Update the QP state before the actual ramrod to prevent a race with
2570 * fast path. Modifying the QP state to error will cause the device to
2571 * flush the CQEs and while polling the flushed CQEs will considered as
2572 * a potential issue if the QP isn't in error state.
2574 if ((attr_mask & IB_QP_STATE) && qp->qp_type != IB_QPT_GSI &&
2575 !udata && qp_params.new_state == QED_ROCE_QP_STATE_ERR)
2576 qp->state = QED_ROCE_QP_STATE_ERR;
2578 if (qp->qp_type != IB_QPT_GSI)
2579 rc = dev->ops->rdma_modify_qp(dev->rdma_ctx,
2580 qp->qed_qp, &qp_params);
2582 if (attr_mask & IB_QP_STATE) {
2583 if ((qp->qp_type != IB_QPT_GSI) && (!udata))
2584 rc = qedr_update_qp_state(dev, qp, cur_state,
2585 qp_params.new_state);
2586 qp->state = qp_params.new_state;
2593 static int qedr_to_ib_qp_acc_flags(struct qed_rdma_query_qp_out_params *params)
2595 int ib_qp_acc_flags = 0;
2597 if (params->incoming_rdma_write_en)
2598 ib_qp_acc_flags |= IB_ACCESS_REMOTE_WRITE;
2599 if (params->incoming_rdma_read_en)
2600 ib_qp_acc_flags |= IB_ACCESS_REMOTE_READ;
2601 if (params->incoming_atomic_en)
2602 ib_qp_acc_flags |= IB_ACCESS_REMOTE_ATOMIC;
2603 ib_qp_acc_flags |= IB_ACCESS_LOCAL_WRITE;
2604 return ib_qp_acc_flags;
2607 int qedr_query_qp(struct ib_qp *ibqp,
2608 struct ib_qp_attr *qp_attr,
2609 int attr_mask, struct ib_qp_init_attr *qp_init_attr)
2611 struct qed_rdma_query_qp_out_params params;
2612 struct qedr_qp *qp = get_qedr_qp(ibqp);
2613 struct qedr_dev *dev = qp->dev;
2616 memset(¶ms, 0, sizeof(params));
2618 rc = dev->ops->rdma_query_qp(dev->rdma_ctx, qp->qed_qp, ¶ms);
2622 memset(qp_attr, 0, sizeof(*qp_attr));
2623 memset(qp_init_attr, 0, sizeof(*qp_init_attr));
2625 qp_attr->qp_state = qedr_get_ibqp_state(params.state);
2626 qp_attr->cur_qp_state = qedr_get_ibqp_state(params.state);
2627 qp_attr->path_mtu = ib_mtu_int_to_enum(params.mtu);
2628 qp_attr->path_mig_state = IB_MIG_MIGRATED;
2629 qp_attr->rq_psn = params.rq_psn;
2630 qp_attr->sq_psn = params.sq_psn;
2631 qp_attr->dest_qp_num = params.dest_qp;
2633 qp_attr->qp_access_flags = qedr_to_ib_qp_acc_flags(¶ms);
2635 qp_attr->cap.max_send_wr = qp->sq.max_wr;
2636 qp_attr->cap.max_recv_wr = qp->rq.max_wr;
2637 qp_attr->cap.max_send_sge = qp->sq.max_sges;
2638 qp_attr->cap.max_recv_sge = qp->rq.max_sges;
2639 qp_attr->cap.max_inline_data = ROCE_REQ_MAX_INLINE_DATA_SIZE;
2640 qp_init_attr->cap = qp_attr->cap;
2642 qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
2643 rdma_ah_set_grh(&qp_attr->ah_attr, NULL,
2644 params.flow_label, qp->sgid_idx,
2645 params.hop_limit_ttl, params.traffic_class_tos);
2646 rdma_ah_set_dgid_raw(&qp_attr->ah_attr, ¶ms.dgid.bytes[0]);
2647 rdma_ah_set_port_num(&qp_attr->ah_attr, 1);
2648 rdma_ah_set_sl(&qp_attr->ah_attr, 0);
2649 qp_attr->timeout = params.timeout;
2650 qp_attr->rnr_retry = params.rnr_retry;
2651 qp_attr->retry_cnt = params.retry_cnt;
2652 qp_attr->min_rnr_timer = params.min_rnr_nak_timer;
2653 qp_attr->pkey_index = params.pkey_index;
2654 qp_attr->port_num = 1;
2655 rdma_ah_set_path_bits(&qp_attr->ah_attr, 0);
2656 rdma_ah_set_static_rate(&qp_attr->ah_attr, 0);
2657 qp_attr->alt_pkey_index = 0;
2658 qp_attr->alt_port_num = 0;
2659 qp_attr->alt_timeout = 0;
2660 memset(&qp_attr->alt_ah_attr, 0, sizeof(qp_attr->alt_ah_attr));
2662 qp_attr->sq_draining = (params.state == QED_ROCE_QP_STATE_SQD) ? 1 : 0;
2663 qp_attr->max_dest_rd_atomic = params.max_dest_rd_atomic;
2664 qp_attr->max_rd_atomic = params.max_rd_atomic;
2665 qp_attr->en_sqd_async_notify = (params.sqd_async) ? 1 : 0;
2667 DP_DEBUG(dev, QEDR_MSG_QP, "QEDR_QUERY_QP: max_inline_data=%d\n",
2668 qp_attr->cap.max_inline_data);
2674 static int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp,
2675 struct ib_udata *udata)
2677 struct qedr_ucontext *ctx =
2678 rdma_udata_to_drv_context(udata, struct qedr_ucontext,
2682 if (qp->qp_type != IB_QPT_GSI) {
2683 rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
2688 if (qp->create_type == QEDR_QP_CREATE_USER)
2689 qedr_cleanup_user(dev, ctx, qp);
2691 qedr_cleanup_kernel(dev, qp);
2696 int qedr_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
2698 struct qedr_qp *qp = get_qedr_qp(ibqp);
2699 struct qedr_dev *dev = qp->dev;
2700 struct ib_qp_attr attr;
2703 DP_DEBUG(dev, QEDR_MSG_QP, "destroy qp: destroying %p, qp type=%d\n",
2706 if (rdma_protocol_roce(&dev->ibdev, 1)) {
2707 if ((qp->state != QED_ROCE_QP_STATE_RESET) &&
2708 (qp->state != QED_ROCE_QP_STATE_ERR) &&
2709 (qp->state != QED_ROCE_QP_STATE_INIT)) {
2711 attr.qp_state = IB_QPS_ERR;
2712 attr_mask |= IB_QP_STATE;
2714 /* Change the QP state to ERROR */
2715 qedr_modify_qp(ibqp, &attr, attr_mask, NULL);
2718 /* If connection establishment started the WAIT_FOR_CONNECT
2719 * bit will be on and we need to Wait for the establishment
2720 * to complete before destroying the qp.
2722 if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_CONNECT,
2723 &qp->iwarp_cm_flags))
2724 wait_for_completion(&qp->iwarp_cm_comp);
2726 /* If graceful disconnect started, the WAIT_FOR_DISCONNECT
2727 * bit will be on, and we need to wait for the disconnect to
2728 * complete before continuing. We can use the same completion,
2729 * iwarp_cm_comp, since this is the only place that waits for
2730 * this completion and it is sequential. In addition,
2731 * disconnect can't occur before the connection is fully
2732 * established, therefore if WAIT_FOR_DISCONNECT is on it
2733 * means WAIT_FOR_CONNECT is also on and the completion for
2734 * CONNECT already occurred.
2736 if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_DISCONNECT,
2737 &qp->iwarp_cm_flags))
2738 wait_for_completion(&qp->iwarp_cm_comp);
2741 if (qp->qp_type == IB_QPT_GSI)
2742 qedr_destroy_gsi_qp(dev);
2744 /* We need to remove the entry from the xarray before we release the
2745 * qp_id to avoid a race of the qp_id being reallocated and failing
2748 if (rdma_protocol_iwarp(&dev->ibdev, 1))
2749 xa_erase(&dev->qps, qp->qp_id);
2751 qedr_free_qp_resources(dev, qp, udata);
2753 if (rdma_protocol_iwarp(&dev->ibdev, 1))
2754 qedr_iw_qp_rem_ref(&qp->ibqp);
2759 int qedr_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
2760 struct ib_udata *udata)
2762 struct qedr_ah *ah = get_qedr_ah(ibah);
2764 rdma_copy_ah_attr(&ah->attr, init_attr->ah_attr);
2769 void qedr_destroy_ah(struct ib_ah *ibah, u32 flags)
2771 struct qedr_ah *ah = get_qedr_ah(ibah);
2773 rdma_destroy_ah_attr(&ah->attr);
2776 static void free_mr_info(struct qedr_dev *dev, struct mr_info *info)
2778 struct qedr_pbl *pbl, *tmp;
2780 if (info->pbl_table)
2781 list_add_tail(&info->pbl_table->list_entry,
2782 &info->free_pbl_list);
2784 if (!list_empty(&info->inuse_pbl_list))
2785 list_splice(&info->inuse_pbl_list, &info->free_pbl_list);
2787 list_for_each_entry_safe(pbl, tmp, &info->free_pbl_list, list_entry) {
2788 list_del(&pbl->list_entry);
2789 qedr_free_pbl(dev, &info->pbl_info, pbl);
2793 static int init_mr_info(struct qedr_dev *dev, struct mr_info *info,
2794 size_t page_list_len, bool two_layered)
2796 struct qedr_pbl *tmp;
2799 INIT_LIST_HEAD(&info->free_pbl_list);
2800 INIT_LIST_HEAD(&info->inuse_pbl_list);
2802 rc = qedr_prepare_pbl_tbl(dev, &info->pbl_info,
2803 page_list_len, two_layered);
2807 info->pbl_table = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL);
2808 if (IS_ERR(info->pbl_table)) {
2809 rc = PTR_ERR(info->pbl_table);
2813 DP_DEBUG(dev, QEDR_MSG_MR, "pbl_table_pa = %pa\n",
2814 &info->pbl_table->pa);
2816 /* in usual case we use 2 PBLs, so we add one to free
2817 * list and allocating another one
2819 tmp = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL);
2821 DP_DEBUG(dev, QEDR_MSG_MR, "Extra PBL is not allocated\n");
2825 list_add_tail(&tmp->list_entry, &info->free_pbl_list);
2827 DP_DEBUG(dev, QEDR_MSG_MR, "extra pbl_table_pa = %pa\n", &tmp->pa);
2831 free_mr_info(dev, info);
2836 struct ib_mr *qedr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
2837 u64 usr_addr, int acc, struct ib_udata *udata)
2839 struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2844 pd = get_qedr_pd(ibpd);
2845 DP_DEBUG(dev, QEDR_MSG_MR,
2846 "qedr_register user mr pd = %d start = %lld, len = %lld, usr_addr = %lld, acc = %d\n",
2847 pd->pd_id, start, len, usr_addr, acc);
2849 if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE))
2850 return ERR_PTR(-EINVAL);
2852 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2856 mr->type = QEDR_MR_USER;
2858 mr->umem = ib_umem_get(ibpd->device, start, len, acc);
2859 if (IS_ERR(mr->umem)) {
2864 rc = init_mr_info(dev, &mr->info, ib_umem_page_count(mr->umem), 1);
2868 qedr_populate_pbls(dev, mr->umem, mr->info.pbl_table,
2869 &mr->info.pbl_info, PAGE_SHIFT);
2871 rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
2873 DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
2877 /* Index only, 18 bit long, lkey = itid << 8 | key */
2878 mr->hw_mr.tid_type = QED_RDMA_TID_REGISTERED_MR;
2880 mr->hw_mr.pd = pd->pd_id;
2881 mr->hw_mr.local_read = 1;
2882 mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
2883 mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
2884 mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
2885 mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
2886 mr->hw_mr.mw_bind = false;
2887 mr->hw_mr.pbl_ptr = mr->info.pbl_table[0].pa;
2888 mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
2889 mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
2890 mr->hw_mr.page_size_log = PAGE_SHIFT;
2891 mr->hw_mr.fbo = ib_umem_offset(mr->umem);
2892 mr->hw_mr.length = len;
2893 mr->hw_mr.vaddr = usr_addr;
2894 mr->hw_mr.zbva = false;
2895 mr->hw_mr.phy_mr = false;
2896 mr->hw_mr.dma_mr = false;
2898 rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
2900 DP_ERR(dev, "roce register tid returned an error %d\n", rc);
2904 mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2905 if (mr->hw_mr.remote_write || mr->hw_mr.remote_read ||
2906 mr->hw_mr.remote_atomic)
2907 mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2909 DP_DEBUG(dev, QEDR_MSG_MR, "register user mr lkey: %x\n",
2914 dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2916 qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table);
2922 int qedr_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata)
2924 struct qedr_mr *mr = get_qedr_mr(ib_mr);
2925 struct qedr_dev *dev = get_qedr_dev(ib_mr->device);
2928 rc = dev->ops->rdma_deregister_tid(dev->rdma_ctx, mr->hw_mr.itid);
2932 dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2934 if (mr->type != QEDR_MR_DMA)
2935 free_mr_info(dev, &mr->info);
2937 /* it could be user registered memory. */
2938 ib_umem_release(mr->umem);
2945 static struct qedr_mr *__qedr_alloc_mr(struct ib_pd *ibpd,
2946 int max_page_list_len)
2948 struct qedr_pd *pd = get_qedr_pd(ibpd);
2949 struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2953 DP_DEBUG(dev, QEDR_MSG_MR,
2954 "qedr_alloc_frmr pd = %d max_page_list_len= %d\n", pd->pd_id,
2957 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2962 mr->type = QEDR_MR_FRMR;
2964 rc = init_mr_info(dev, &mr->info, max_page_list_len, 1);
2968 rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
2970 DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
2974 /* Index only, 18 bit long, lkey = itid << 8 | key */
2975 mr->hw_mr.tid_type = QED_RDMA_TID_FMR;
2977 mr->hw_mr.pd = pd->pd_id;
2978 mr->hw_mr.local_read = 1;
2979 mr->hw_mr.local_write = 0;
2980 mr->hw_mr.remote_read = 0;
2981 mr->hw_mr.remote_write = 0;
2982 mr->hw_mr.remote_atomic = 0;
2983 mr->hw_mr.mw_bind = false;
2984 mr->hw_mr.pbl_ptr = 0;
2985 mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
2986 mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
2988 mr->hw_mr.length = 0;
2989 mr->hw_mr.vaddr = 0;
2990 mr->hw_mr.zbva = false;
2991 mr->hw_mr.phy_mr = true;
2992 mr->hw_mr.dma_mr = false;
2994 rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
2996 DP_ERR(dev, "roce register tid returned an error %d\n", rc);
3000 mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
3001 mr->ibmr.rkey = mr->ibmr.lkey;
3003 DP_DEBUG(dev, QEDR_MSG_MR, "alloc frmr: %x\n", mr->ibmr.lkey);
3007 dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
3013 struct ib_mr *qedr_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
3018 if (mr_type != IB_MR_TYPE_MEM_REG)
3019 return ERR_PTR(-EINVAL);
3021 mr = __qedr_alloc_mr(ibpd, max_num_sg);
3024 return ERR_PTR(-EINVAL);
3029 static int qedr_set_page(struct ib_mr *ibmr, u64 addr)
3031 struct qedr_mr *mr = get_qedr_mr(ibmr);
3032 struct qedr_pbl *pbl_table;
3033 struct regpair *pbe;
3036 if (unlikely(mr->npages == mr->info.pbl_info.num_pbes)) {
3037 DP_ERR(mr->dev, "qedr_set_page fails when %d\n", mr->npages);
3041 DP_DEBUG(mr->dev, QEDR_MSG_MR, "qedr_set_page pages[%d] = 0x%llx\n",
3044 pbes_in_page = mr->info.pbl_info.pbl_size / sizeof(u64);
3045 pbl_table = mr->info.pbl_table + (mr->npages / pbes_in_page);
3046 pbe = (struct regpair *)pbl_table->va;
3047 pbe += mr->npages % pbes_in_page;
3048 pbe->lo = cpu_to_le32((u32)addr);
3049 pbe->hi = cpu_to_le32((u32)upper_32_bits(addr));
3056 static void handle_completed_mrs(struct qedr_dev *dev, struct mr_info *info)
3058 int work = info->completed - info->completed_handled - 1;
3060 DP_DEBUG(dev, QEDR_MSG_MR, "Special FMR work = %d\n", work);
3061 while (work-- > 0 && !list_empty(&info->inuse_pbl_list)) {
3062 struct qedr_pbl *pbl;
3064 /* Free all the page list that are possible to be freed
3065 * (all the ones that were invalidated), under the assumption
3066 * that if an FMR was completed successfully that means that
3067 * if there was an invalidate operation before it also ended
3069 pbl = list_first_entry(&info->inuse_pbl_list,
3070 struct qedr_pbl, list_entry);
3071 list_move_tail(&pbl->list_entry, &info->free_pbl_list);
3072 info->completed_handled++;
3076 int qedr_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
3077 int sg_nents, unsigned int *sg_offset)
3079 struct qedr_mr *mr = get_qedr_mr(ibmr);
3083 handle_completed_mrs(mr->dev, &mr->info);
3084 return ib_sg_to_pages(ibmr, sg, sg_nents, NULL, qedr_set_page);
3087 struct ib_mr *qedr_get_dma_mr(struct ib_pd *ibpd, int acc)
3089 struct qedr_dev *dev = get_qedr_dev(ibpd->device);
3090 struct qedr_pd *pd = get_qedr_pd(ibpd);
3094 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
3096 return ERR_PTR(-ENOMEM);
3098 mr->type = QEDR_MR_DMA;
3100 rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
3102 DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
3106 /* index only, 18 bit long, lkey = itid << 8 | key */
3107 mr->hw_mr.tid_type = QED_RDMA_TID_REGISTERED_MR;
3108 mr->hw_mr.pd = pd->pd_id;
3109 mr->hw_mr.local_read = 1;
3110 mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
3111 mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
3112 mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
3113 mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
3114 mr->hw_mr.dma_mr = true;
3116 rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
3118 DP_ERR(dev, "roce register tid returned an error %d\n", rc);
3122 mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
3123 if (mr->hw_mr.remote_write || mr->hw_mr.remote_read ||
3124 mr->hw_mr.remote_atomic)
3125 mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
3127 DP_DEBUG(dev, QEDR_MSG_MR, "get dma mr: lkey = %x\n", mr->ibmr.lkey);
3131 dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
3137 static inline int qedr_wq_is_full(struct qedr_qp_hwq_info *wq)
3139 return (((wq->prod + 1) % wq->max_wr) == wq->cons);
3142 static int sge_data_len(struct ib_sge *sg_list, int num_sge)
3146 for (i = 0; i < num_sge; i++)
3147 len += sg_list[i].length;
3152 static void swap_wqe_data64(u64 *p)
3156 for (i = 0; i < QEDR_SQE_ELEMENT_SIZE / sizeof(u64); i++, p++)
3157 *p = cpu_to_be64(cpu_to_le64(*p));
3160 static u32 qedr_prepare_sq_inline_data(struct qedr_dev *dev,
3161 struct qedr_qp *qp, u8 *wqe_size,
3162 const struct ib_send_wr *wr,
3163 const struct ib_send_wr **bad_wr,
3166 u32 data_size = sge_data_len(wr->sg_list, wr->num_sge);
3167 char *seg_prt, *wqe;
3170 if (data_size > ROCE_REQ_MAX_INLINE_DATA_SIZE) {
3171 DP_ERR(dev, "Too much inline data in WR: %d\n", data_size);
3185 /* Copy data inline */
3186 for (i = 0; i < wr->num_sge; i++) {
3187 u32 len = wr->sg_list[i].length;
3188 void *src = (void *)(uintptr_t)wr->sg_list[i].addr;
3193 /* New segment required */
3195 wqe = (char *)qed_chain_produce(&qp->sq.pbl);
3197 seg_siz = sizeof(struct rdma_sq_common_wqe);
3201 /* Calculate currently allowed length */
3202 cur = min_t(u32, len, seg_siz);
3203 memcpy(seg_prt, src, cur);
3205 /* Update segment variables */
3209 /* Update sge variables */
3213 /* Swap fully-completed segments */
3215 swap_wqe_data64((u64 *)wqe);
3219 /* swap last not completed segment */
3221 swap_wqe_data64((u64 *)wqe);
3226 #define RQ_SGE_SET(sge, vaddr, vlength, vflags) \
3228 DMA_REGPAIR_LE(sge->addr, vaddr); \
3229 (sge)->length = cpu_to_le32(vlength); \
3230 (sge)->flags = cpu_to_le32(vflags); \
3233 #define SRQ_HDR_SET(hdr, vwr_id, num_sge) \
3235 DMA_REGPAIR_LE(hdr->wr_id, vwr_id); \
3236 (hdr)->num_sges = num_sge; \
3239 #define SRQ_SGE_SET(sge, vaddr, vlength, vlkey) \
3241 DMA_REGPAIR_LE(sge->addr, vaddr); \
3242 (sge)->length = cpu_to_le32(vlength); \
3243 (sge)->l_key = cpu_to_le32(vlkey); \
3246 static u32 qedr_prepare_sq_sges(struct qedr_qp *qp, u8 *wqe_size,
3247 const struct ib_send_wr *wr)
3252 for (i = 0; i < wr->num_sge; i++) {
3253 struct rdma_sq_sge *sge = qed_chain_produce(&qp->sq.pbl);
3255 DMA_REGPAIR_LE(sge->addr, wr->sg_list[i].addr);
3256 sge->l_key = cpu_to_le32(wr->sg_list[i].lkey);
3257 sge->length = cpu_to_le32(wr->sg_list[i].length);
3258 data_size += wr->sg_list[i].length;
3262 *wqe_size += wr->num_sge;
3267 static u32 qedr_prepare_sq_rdma_data(struct qedr_dev *dev,
3269 struct rdma_sq_rdma_wqe_1st *rwqe,
3270 struct rdma_sq_rdma_wqe_2nd *rwqe2,
3271 const struct ib_send_wr *wr,
3272 const struct ib_send_wr **bad_wr)
3274 rwqe2->r_key = cpu_to_le32(rdma_wr(wr)->rkey);
3275 DMA_REGPAIR_LE(rwqe2->remote_va, rdma_wr(wr)->remote_addr);
3277 if (wr->send_flags & IB_SEND_INLINE &&
3278 (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
3279 wr->opcode == IB_WR_RDMA_WRITE)) {
3282 SET_FIELD2(flags, RDMA_SQ_RDMA_WQE_1ST_INLINE_FLG, 1);
3283 return qedr_prepare_sq_inline_data(dev, qp, &rwqe->wqe_size, wr,
3284 bad_wr, &rwqe->flags, flags);
3287 return qedr_prepare_sq_sges(qp, &rwqe->wqe_size, wr);
3290 static u32 qedr_prepare_sq_send_data(struct qedr_dev *dev,
3292 struct rdma_sq_send_wqe_1st *swqe,
3293 struct rdma_sq_send_wqe_2st *swqe2,
3294 const struct ib_send_wr *wr,
3295 const struct ib_send_wr **bad_wr)
3297 memset(swqe2, 0, sizeof(*swqe2));
3298 if (wr->send_flags & IB_SEND_INLINE) {
3301 SET_FIELD2(flags, RDMA_SQ_SEND_WQE_INLINE_FLG, 1);
3302 return qedr_prepare_sq_inline_data(dev, qp, &swqe->wqe_size, wr,
3303 bad_wr, &swqe->flags, flags);
3306 return qedr_prepare_sq_sges(qp, &swqe->wqe_size, wr);
3309 static int qedr_prepare_reg(struct qedr_qp *qp,
3310 struct rdma_sq_fmr_wqe_1st *fwqe1,
3311 const struct ib_reg_wr *wr)
3313 struct qedr_mr *mr = get_qedr_mr(wr->mr);
3314 struct rdma_sq_fmr_wqe_2nd *fwqe2;
3316 fwqe2 = (struct rdma_sq_fmr_wqe_2nd *)qed_chain_produce(&qp->sq.pbl);
3317 fwqe1->addr.hi = upper_32_bits(mr->ibmr.iova);
3318 fwqe1->addr.lo = lower_32_bits(mr->ibmr.iova);
3319 fwqe1->l_key = wr->key;
3321 fwqe2->access_ctrl = 0;
3323 SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_READ,
3324 !!(wr->access & IB_ACCESS_REMOTE_READ));
3325 SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_WRITE,
3326 !!(wr->access & IB_ACCESS_REMOTE_WRITE));
3327 SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_ENABLE_ATOMIC,
3328 !!(wr->access & IB_ACCESS_REMOTE_ATOMIC));
3329 SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_LOCAL_READ, 1);
3330 SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_LOCAL_WRITE,
3331 !!(wr->access & IB_ACCESS_LOCAL_WRITE));
3332 fwqe2->fmr_ctrl = 0;
3334 SET_FIELD2(fwqe2->fmr_ctrl, RDMA_SQ_FMR_WQE_2ND_PAGE_SIZE_LOG,
3335 ilog2(mr->ibmr.page_size) - 12);
3337 fwqe2->length_hi = 0;
3338 fwqe2->length_lo = mr->ibmr.length;
3339 fwqe2->pbl_addr.hi = upper_32_bits(mr->info.pbl_table->pa);
3340 fwqe2->pbl_addr.lo = lower_32_bits(mr->info.pbl_table->pa);
3342 qp->wqe_wr_id[qp->sq.prod].mr = mr;
3347 static enum ib_wc_opcode qedr_ib_to_wc_opcode(enum ib_wr_opcode opcode)
3350 case IB_WR_RDMA_WRITE:
3351 case IB_WR_RDMA_WRITE_WITH_IMM:
3352 return IB_WC_RDMA_WRITE;
3353 case IB_WR_SEND_WITH_IMM:
3355 case IB_WR_SEND_WITH_INV:
3357 case IB_WR_RDMA_READ:
3358 case IB_WR_RDMA_READ_WITH_INV:
3359 return IB_WC_RDMA_READ;
3360 case IB_WR_ATOMIC_CMP_AND_SWP:
3361 return IB_WC_COMP_SWAP;
3362 case IB_WR_ATOMIC_FETCH_AND_ADD:
3363 return IB_WC_FETCH_ADD;
3365 return IB_WC_REG_MR;
3366 case IB_WR_LOCAL_INV:
3367 return IB_WC_LOCAL_INV;
3373 static inline bool qedr_can_post_send(struct qedr_qp *qp,
3374 const struct ib_send_wr *wr)
3376 int wq_is_full, err_wr, pbl_is_full;
3377 struct qedr_dev *dev = qp->dev;
3379 /* prevent SQ overflow and/or processing of a bad WR */
3380 err_wr = wr->num_sge > qp->sq.max_sges;
3381 wq_is_full = qedr_wq_is_full(&qp->sq);
3382 pbl_is_full = qed_chain_get_elem_left_u32(&qp->sq.pbl) <
3383 QEDR_MAX_SQE_ELEMENTS_PER_SQE;
3384 if (wq_is_full || err_wr || pbl_is_full) {
3385 if (wq_is_full && !(qp->err_bitmap & QEDR_QP_ERR_SQ_FULL)) {
3387 "error: WQ is full. Post send on QP %p failed (this error appears only once)\n",
3389 qp->err_bitmap |= QEDR_QP_ERR_SQ_FULL;
3392 if (err_wr && !(qp->err_bitmap & QEDR_QP_ERR_BAD_SR)) {
3394 "error: WR is bad. Post send on QP %p failed (this error appears only once)\n",
3396 qp->err_bitmap |= QEDR_QP_ERR_BAD_SR;
3400 !(qp->err_bitmap & QEDR_QP_ERR_SQ_PBL_FULL)) {
3402 "error: WQ PBL is full. Post send on QP %p failed (this error appears only once)\n",
3404 qp->err_bitmap |= QEDR_QP_ERR_SQ_PBL_FULL;
3411 static int __qedr_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
3412 const struct ib_send_wr **bad_wr)
3414 struct qedr_dev *dev = get_qedr_dev(ibqp->device);
3415 struct qedr_qp *qp = get_qedr_qp(ibqp);
3416 struct rdma_sq_atomic_wqe_1st *awqe1;
3417 struct rdma_sq_atomic_wqe_2nd *awqe2;
3418 struct rdma_sq_atomic_wqe_3rd *awqe3;
3419 struct rdma_sq_send_wqe_2st *swqe2;
3420 struct rdma_sq_local_inv_wqe *iwqe;
3421 struct rdma_sq_rdma_wqe_2nd *rwqe2;
3422 struct rdma_sq_send_wqe_1st *swqe;
3423 struct rdma_sq_rdma_wqe_1st *rwqe;
3424 struct rdma_sq_fmr_wqe_1st *fwqe1;
3425 struct rdma_sq_common_wqe *wqe;
3430 if (!qedr_can_post_send(qp, wr)) {
3435 wqe = qed_chain_produce(&qp->sq.pbl);
3436 qp->wqe_wr_id[qp->sq.prod].signaled =
3437 !!(wr->send_flags & IB_SEND_SIGNALED) || qp->signaled;
3440 SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_SE_FLG,
3441 !!(wr->send_flags & IB_SEND_SOLICITED));
3442 comp = (!!(wr->send_flags & IB_SEND_SIGNALED)) || qp->signaled;
3443 SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_COMP_FLG, comp);
3444 SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_RD_FENCE_FLG,
3445 !!(wr->send_flags & IB_SEND_FENCE));
3446 wqe->prev_wqe_size = qp->prev_wqe_size;
3448 qp->wqe_wr_id[qp->sq.prod].opcode = qedr_ib_to_wc_opcode(wr->opcode);
3450 switch (wr->opcode) {
3451 case IB_WR_SEND_WITH_IMM:
3452 if (unlikely(rdma_protocol_iwarp(&dev->ibdev, 1))) {
3457 wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_IMM;
3458 swqe = (struct rdma_sq_send_wqe_1st *)wqe;
3460 swqe2 = qed_chain_produce(&qp->sq.pbl);
3462 swqe->inv_key_or_imm_data = cpu_to_le32(be32_to_cpu(wr->ex.imm_data));
3463 length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
3465 swqe->length = cpu_to_le32(length);
3466 qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
3467 qp->prev_wqe_size = swqe->wqe_size;
3468 qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
3471 wqe->req_type = RDMA_SQ_REQ_TYPE_SEND;
3472 swqe = (struct rdma_sq_send_wqe_1st *)wqe;
3475 swqe2 = qed_chain_produce(&qp->sq.pbl);
3476 length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
3478 swqe->length = cpu_to_le32(length);
3479 qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
3480 qp->prev_wqe_size = swqe->wqe_size;
3481 qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
3483 case IB_WR_SEND_WITH_INV:
3484 wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_INVALIDATE;
3485 swqe = (struct rdma_sq_send_wqe_1st *)wqe;
3486 swqe2 = qed_chain_produce(&qp->sq.pbl);
3488 swqe->inv_key_or_imm_data = cpu_to_le32(wr->ex.invalidate_rkey);
3489 length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
3491 swqe->length = cpu_to_le32(length);
3492 qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
3493 qp->prev_wqe_size = swqe->wqe_size;
3494 qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
3497 case IB_WR_RDMA_WRITE_WITH_IMM:
3498 if (unlikely(rdma_protocol_iwarp(&dev->ibdev, 1))) {
3503 wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR_WITH_IMM;
3504 rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
3507 rwqe->imm_data = htonl(cpu_to_le32(wr->ex.imm_data));
3508 rwqe2 = qed_chain_produce(&qp->sq.pbl);
3509 length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
3511 rwqe->length = cpu_to_le32(length);
3512 qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
3513 qp->prev_wqe_size = rwqe->wqe_size;
3514 qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
3516 case IB_WR_RDMA_WRITE:
3517 wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR;
3518 rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
3521 rwqe2 = qed_chain_produce(&qp->sq.pbl);
3522 length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
3524 rwqe->length = cpu_to_le32(length);
3525 qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
3526 qp->prev_wqe_size = rwqe->wqe_size;
3527 qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
3529 case IB_WR_RDMA_READ_WITH_INV:
3530 SET_FIELD2(wqe->flags, RDMA_SQ_RDMA_WQE_1ST_READ_INV_FLG, 1);
3531 fallthrough; /* same is identical to RDMA READ */
3533 case IB_WR_RDMA_READ:
3534 wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_RD;
3535 rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
3538 rwqe2 = qed_chain_produce(&qp->sq.pbl);
3539 length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
3541 rwqe->length = cpu_to_le32(length);
3542 qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
3543 qp->prev_wqe_size = rwqe->wqe_size;
3544 qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
3547 case IB_WR_ATOMIC_CMP_AND_SWP:
3548 case IB_WR_ATOMIC_FETCH_AND_ADD:
3549 awqe1 = (struct rdma_sq_atomic_wqe_1st *)wqe;
3550 awqe1->wqe_size = 4;
3552 awqe2 = qed_chain_produce(&qp->sq.pbl);
3553 DMA_REGPAIR_LE(awqe2->remote_va, atomic_wr(wr)->remote_addr);
3554 awqe2->r_key = cpu_to_le32(atomic_wr(wr)->rkey);
3556 awqe3 = qed_chain_produce(&qp->sq.pbl);
3558 if (wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
3559 wqe->req_type = RDMA_SQ_REQ_TYPE_ATOMIC_ADD;
3560 DMA_REGPAIR_LE(awqe3->swap_data,
3561 atomic_wr(wr)->compare_add);
3563 wqe->req_type = RDMA_SQ_REQ_TYPE_ATOMIC_CMP_AND_SWAP;
3564 DMA_REGPAIR_LE(awqe3->swap_data,
3565 atomic_wr(wr)->swap);
3566 DMA_REGPAIR_LE(awqe3->cmp_data,
3567 atomic_wr(wr)->compare_add);
3570 qedr_prepare_sq_sges(qp, NULL, wr);
3572 qp->wqe_wr_id[qp->sq.prod].wqe_size = awqe1->wqe_size;
3573 qp->prev_wqe_size = awqe1->wqe_size;
3576 case IB_WR_LOCAL_INV:
3577 iwqe = (struct rdma_sq_local_inv_wqe *)wqe;
3580 iwqe->req_type = RDMA_SQ_REQ_TYPE_LOCAL_INVALIDATE;
3581 iwqe->inv_l_key = wr->ex.invalidate_rkey;
3582 qp->wqe_wr_id[qp->sq.prod].wqe_size = iwqe->wqe_size;
3583 qp->prev_wqe_size = iwqe->wqe_size;
3586 DP_DEBUG(dev, QEDR_MSG_CQ, "REG_MR\n");
3587 wqe->req_type = RDMA_SQ_REQ_TYPE_FAST_MR;
3588 fwqe1 = (struct rdma_sq_fmr_wqe_1st *)wqe;
3589 fwqe1->wqe_size = 2;
3591 rc = qedr_prepare_reg(qp, fwqe1, reg_wr(wr));
3593 DP_ERR(dev, "IB_REG_MR failed rc=%d\n", rc);
3598 qp->wqe_wr_id[qp->sq.prod].wqe_size = fwqe1->wqe_size;
3599 qp->prev_wqe_size = fwqe1->wqe_size;
3602 DP_ERR(dev, "invalid opcode 0x%x!\n", wr->opcode);
3611 /* Restore prod to its position before
3612 * this WR was processed
3614 value = le16_to_cpu(qp->sq.db_data.data.value);
3615 qed_chain_set_prod(&qp->sq.pbl, value, wqe);
3617 /* Restore prev_wqe_size */
3618 qp->prev_wqe_size = wqe->prev_wqe_size;
3620 DP_ERR(dev, "POST SEND FAILED\n");
3626 int qedr_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
3627 const struct ib_send_wr **bad_wr)
3629 struct qedr_dev *dev = get_qedr_dev(ibqp->device);
3630 struct qedr_qp *qp = get_qedr_qp(ibqp);
3631 unsigned long flags;
3636 if (qp->qp_type == IB_QPT_GSI)
3637 return qedr_gsi_post_send(ibqp, wr, bad_wr);
3639 spin_lock_irqsave(&qp->q_lock, flags);
3641 if (rdma_protocol_roce(&dev->ibdev, 1)) {
3642 if ((qp->state != QED_ROCE_QP_STATE_RTS) &&
3643 (qp->state != QED_ROCE_QP_STATE_ERR) &&
3644 (qp->state != QED_ROCE_QP_STATE_SQD)) {
3645 spin_unlock_irqrestore(&qp->q_lock, flags);
3647 DP_DEBUG(dev, QEDR_MSG_CQ,
3648 "QP in wrong state! QP icid=0x%x state %d\n",
3649 qp->icid, qp->state);
3655 rc = __qedr_post_send(ibqp, wr, bad_wr);
3659 qp->wqe_wr_id[qp->sq.prod].wr_id = wr->wr_id;
3661 qedr_inc_sw_prod(&qp->sq);
3663 qp->sq.db_data.data.value++;
3669 * If there was a failure in the first WR then it will be triggered in
3670 * vane. However this is not harmful (as long as the producer value is
3671 * unchanged). For performance reasons we avoid checking for this
3672 * redundant doorbell.
3674 * qp->wqe_wr_id is accessed during qedr_poll_cq, as
3675 * soon as we give the doorbell, we could get a completion
3676 * for this wr, therefore we need to make sure that the
3677 * memory is updated before giving the doorbell.
3678 * During qedr_poll_cq, rmb is called before accessing the
3679 * cqe. This covers for the smp_rmb as well.
3682 writel(qp->sq.db_data.raw, qp->sq.db);
3684 spin_unlock_irqrestore(&qp->q_lock, flags);
3689 static u32 qedr_srq_elem_left(struct qedr_srq_hwq_info *hw_srq)
3693 /* Calculate number of elements used based on producer
3694 * count and consumer count and subtract it from max
3695 * work request supported so that we get elements left.
3697 used = hw_srq->wr_prod_cnt - (u32)atomic_read(&hw_srq->wr_cons_cnt);
3699 return hw_srq->max_wr - used;
3702 int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
3703 const struct ib_recv_wr **bad_wr)
3705 struct qedr_srq *srq = get_qedr_srq(ibsrq);
3706 struct qedr_srq_hwq_info *hw_srq;
3707 struct qedr_dev *dev = srq->dev;
3708 struct qed_chain *pbl;
3709 unsigned long flags;
3713 spin_lock_irqsave(&srq->lock, flags);
3715 hw_srq = &srq->hw_srq;
3716 pbl = &srq->hw_srq.pbl;
3718 struct rdma_srq_wqe_header *hdr;
3721 if (!qedr_srq_elem_left(hw_srq) ||
3722 wr->num_sge > srq->hw_srq.max_sges) {
3723 DP_ERR(dev, "Can't post WR (%d,%d) || (%d > %d)\n",
3724 hw_srq->wr_prod_cnt,
3725 atomic_read(&hw_srq->wr_cons_cnt),
3726 wr->num_sge, srq->hw_srq.max_sges);
3732 hdr = qed_chain_produce(pbl);
3733 num_sge = wr->num_sge;
3734 /* Set number of sge and work request id in header */
3735 SRQ_HDR_SET(hdr, wr->wr_id, num_sge);
3737 srq->hw_srq.wr_prod_cnt++;
3741 DP_DEBUG(dev, QEDR_MSG_SRQ,
3742 "SRQ WR: SGEs: %d with wr_id[%d] = %llx\n",
3743 wr->num_sge, hw_srq->wqe_prod, wr->wr_id);
3745 for (i = 0; i < wr->num_sge; i++) {
3746 struct rdma_srq_sge *srq_sge = qed_chain_produce(pbl);
3748 /* Set SGE length, lkey and address */
3749 SRQ_SGE_SET(srq_sge, wr->sg_list[i].addr,
3750 wr->sg_list[i].length, wr->sg_list[i].lkey);
3752 DP_DEBUG(dev, QEDR_MSG_SRQ,
3753 "[%d]: len %d key %x addr %x:%x\n",
3754 i, srq_sge->length, srq_sge->l_key,
3755 srq_sge->addr.hi, srq_sge->addr.lo);
3759 /* Update WQE and SGE information before
3760 * updating producer.
3764 /* SRQ producer is 8 bytes. Need to update SGE producer index
3765 * in first 4 bytes and need to update WQE producer in
3768 srq->hw_srq.virt_prod_pair_addr->sge_prod = hw_srq->sge_prod;
3769 /* Make sure sge producer is updated first */
3771 srq->hw_srq.virt_prod_pair_addr->wqe_prod = hw_srq->wqe_prod;
3776 DP_DEBUG(dev, QEDR_MSG_SRQ, "POST: Elements in S-RQ: %d\n",
3777 qed_chain_get_elem_left(pbl));
3778 spin_unlock_irqrestore(&srq->lock, flags);
3783 int qedr_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
3784 const struct ib_recv_wr **bad_wr)
3786 struct qedr_qp *qp = get_qedr_qp(ibqp);
3787 struct qedr_dev *dev = qp->dev;
3788 unsigned long flags;
3791 if (qp->qp_type == IB_QPT_GSI)
3792 return qedr_gsi_post_recv(ibqp, wr, bad_wr);
3794 spin_lock_irqsave(&qp->q_lock, flags);
3796 if (qp->state == QED_ROCE_QP_STATE_RESET) {
3797 spin_unlock_irqrestore(&qp->q_lock, flags);
3805 if (qed_chain_get_elem_left_u32(&qp->rq.pbl) <
3806 QEDR_MAX_RQE_ELEMENTS_PER_RQE ||
3807 wr->num_sge > qp->rq.max_sges) {
3808 DP_ERR(dev, "Can't post WR (%d < %d) || (%d > %d)\n",
3809 qed_chain_get_elem_left_u32(&qp->rq.pbl),
3810 QEDR_MAX_RQE_ELEMENTS_PER_RQE, wr->num_sge,
3816 for (i = 0; i < wr->num_sge; i++) {
3818 struct rdma_rq_sge *rqe =
3819 qed_chain_produce(&qp->rq.pbl);
3821 /* First one must include the number
3822 * of SGE in the list
3825 SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES,
3828 SET_FIELD(flags, RDMA_RQ_SGE_L_KEY_LO,
3829 wr->sg_list[i].lkey);
3831 RQ_SGE_SET(rqe, wr->sg_list[i].addr,
3832 wr->sg_list[i].length, flags);
3835 /* Special case of no sges. FW requires between 1-4 sges...
3836 * in this case we need to post 1 sge with length zero. this is
3837 * because rdma write with immediate consumes an RQ.
3841 struct rdma_rq_sge *rqe =
3842 qed_chain_produce(&qp->rq.pbl);
3844 /* First one must include the number
3845 * of SGE in the list
3847 SET_FIELD(flags, RDMA_RQ_SGE_L_KEY_LO, 0);
3848 SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES, 1);
3850 RQ_SGE_SET(rqe, 0, 0, flags);
3854 qp->rqe_wr_id[qp->rq.prod].wr_id = wr->wr_id;
3855 qp->rqe_wr_id[qp->rq.prod].wqe_size = i;
3857 qedr_inc_sw_prod(&qp->rq);
3859 /* qp->rqe_wr_id is accessed during qedr_poll_cq, as
3860 * soon as we give the doorbell, we could get a completion
3861 * for this wr, therefore we need to make sure that the
3862 * memory is update before giving the doorbell.
3863 * During qedr_poll_cq, rmb is called before accessing the
3864 * cqe. This covers for the smp_rmb as well.
3868 qp->rq.db_data.data.value++;
3870 writel(qp->rq.db_data.raw, qp->rq.db);
3872 if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
3873 writel(qp->rq.iwarp_db2_data.raw, qp->rq.iwarp_db2);
3879 spin_unlock_irqrestore(&qp->q_lock, flags);
3884 static int is_valid_cqe(struct qedr_cq *cq, union rdma_cqe *cqe)
3886 struct rdma_cqe_requester *resp_cqe = &cqe->req;
3888 return (resp_cqe->flags & RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK) ==
3892 static struct qedr_qp *cqe_get_qp(union rdma_cqe *cqe)
3894 struct rdma_cqe_requester *resp_cqe = &cqe->req;
3897 qp = (struct qedr_qp *)(uintptr_t)HILO_GEN(resp_cqe->qp_handle.hi,
3898 resp_cqe->qp_handle.lo,
3903 static enum rdma_cqe_type cqe_get_type(union rdma_cqe *cqe)
3905 struct rdma_cqe_requester *resp_cqe = &cqe->req;
3907 return GET_FIELD(resp_cqe->flags, RDMA_CQE_REQUESTER_TYPE);
3910 /* Return latest CQE (needs processing) */
3911 static union rdma_cqe *get_cqe(struct qedr_cq *cq)
3913 return cq->latest_cqe;
3916 /* In fmr we need to increase the number of fmr completed counter for the fmr
3917 * algorithm determining whether we can free a pbl or not.
3918 * we need to perform this whether the work request was signaled or not. for
3919 * this purpose we call this function from the condition that checks if a wr
3920 * should be skipped, to make sure we don't miss it ( possibly this fmr
3921 * operation was not signalted)
3923 static inline void qedr_chk_if_fmr(struct qedr_qp *qp)
3925 if (qp->wqe_wr_id[qp->sq.cons].opcode == IB_WC_REG_MR)
3926 qp->wqe_wr_id[qp->sq.cons].mr->info.completed++;
3929 static int process_req(struct qedr_dev *dev, struct qedr_qp *qp,
3930 struct qedr_cq *cq, int num_entries,
3931 struct ib_wc *wc, u16 hw_cons, enum ib_wc_status status,
3936 while (num_entries && qp->sq.wqe_cons != hw_cons) {
3937 if (!qp->wqe_wr_id[qp->sq.cons].signaled && !force) {
3938 qedr_chk_if_fmr(qp);
3944 wc->status = status;
3947 wc->src_qp = qp->id;
3950 wc->wr_id = qp->wqe_wr_id[qp->sq.cons].wr_id;
3951 wc->opcode = qp->wqe_wr_id[qp->sq.cons].opcode;
3953 switch (wc->opcode) {
3954 case IB_WC_RDMA_WRITE:
3955 wc->byte_len = qp->wqe_wr_id[qp->sq.cons].bytes_len;
3957 case IB_WC_COMP_SWAP:
3958 case IB_WC_FETCH_ADD:
3962 qp->wqe_wr_id[qp->sq.cons].mr->info.completed++;
3964 case IB_WC_RDMA_READ:
3966 wc->byte_len = qp->wqe_wr_id[qp->sq.cons].bytes_len;
3976 while (qp->wqe_wr_id[qp->sq.cons].wqe_size--)
3977 qed_chain_consume(&qp->sq.pbl);
3978 qedr_inc_sw_cons(&qp->sq);
3984 static int qedr_poll_cq_req(struct qedr_dev *dev,
3985 struct qedr_qp *qp, struct qedr_cq *cq,
3986 int num_entries, struct ib_wc *wc,
3987 struct rdma_cqe_requester *req)
3991 switch (req->status) {
3992 case RDMA_CQE_REQ_STS_OK:
3993 cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons,
3996 case RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR:
3997 if (qp->state != QED_ROCE_QP_STATE_ERR)
3998 DP_DEBUG(dev, QEDR_MSG_CQ,
3999 "Error: POLL CQ with RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4000 cq->icid, qp->icid);
4001 cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons,
4002 IB_WC_WR_FLUSH_ERR, 1);
4005 /* process all WQE before the cosumer */
4006 qp->state = QED_ROCE_QP_STATE_ERR;
4007 cnt = process_req(dev, qp, cq, num_entries, wc,
4008 req->sq_cons - 1, IB_WC_SUCCESS, 0);
4010 /* if we have extra WC fill it with actual error info */
4011 if (cnt < num_entries) {
4012 enum ib_wc_status wc_status;
4014 switch (req->status) {
4015 case RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR:
4017 "Error: POLL CQ with RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4018 cq->icid, qp->icid);
4019 wc_status = IB_WC_BAD_RESP_ERR;
4021 case RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR:
4023 "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4024 cq->icid, qp->icid);
4025 wc_status = IB_WC_LOC_LEN_ERR;
4027 case RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR:
4029 "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4030 cq->icid, qp->icid);
4031 wc_status = IB_WC_LOC_QP_OP_ERR;
4033 case RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR:
4035 "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4036 cq->icid, qp->icid);
4037 wc_status = IB_WC_LOC_PROT_ERR;
4039 case RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR:
4041 "Error: POLL CQ with RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4042 cq->icid, qp->icid);
4043 wc_status = IB_WC_MW_BIND_ERR;
4045 case RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR:
4047 "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4048 cq->icid, qp->icid);
4049 wc_status = IB_WC_REM_INV_REQ_ERR;
4051 case RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR:
4053 "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4054 cq->icid, qp->icid);
4055 wc_status = IB_WC_REM_ACCESS_ERR;
4057 case RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR:
4059 "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4060 cq->icid, qp->icid);
4061 wc_status = IB_WC_REM_OP_ERR;
4063 case RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR:
4065 "Error: POLL CQ with RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4066 cq->icid, qp->icid);
4067 wc_status = IB_WC_RNR_RETRY_EXC_ERR;
4069 case RDMA_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR:
4071 "Error: POLL CQ with ROCE_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4072 cq->icid, qp->icid);
4073 wc_status = IB_WC_RETRY_EXC_ERR;
4077 "Error: POLL CQ with IB_WC_GENERAL_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4078 cq->icid, qp->icid);
4079 wc_status = IB_WC_GENERAL_ERR;
4081 cnt += process_req(dev, qp, cq, 1, wc, req->sq_cons,
4089 static inline int qedr_cqe_resp_status_to_ib(u8 status)
4092 case RDMA_CQE_RESP_STS_LOCAL_ACCESS_ERR:
4093 return IB_WC_LOC_ACCESS_ERR;
4094 case RDMA_CQE_RESP_STS_LOCAL_LENGTH_ERR:
4095 return IB_WC_LOC_LEN_ERR;
4096 case RDMA_CQE_RESP_STS_LOCAL_QP_OPERATION_ERR:
4097 return IB_WC_LOC_QP_OP_ERR;
4098 case RDMA_CQE_RESP_STS_LOCAL_PROTECTION_ERR:
4099 return IB_WC_LOC_PROT_ERR;
4100 case RDMA_CQE_RESP_STS_MEMORY_MGT_OPERATION_ERR:
4101 return IB_WC_MW_BIND_ERR;
4102 case RDMA_CQE_RESP_STS_REMOTE_INVALID_REQUEST_ERR:
4103 return IB_WC_REM_INV_RD_REQ_ERR;
4104 case RDMA_CQE_RESP_STS_OK:
4105 return IB_WC_SUCCESS;
4107 return IB_WC_GENERAL_ERR;
4111 static inline int qedr_set_ok_cqe_resp_wc(struct rdma_cqe_responder *resp,
4114 wc->status = IB_WC_SUCCESS;
4115 wc->byte_len = le32_to_cpu(resp->length);
4117 if (resp->flags & QEDR_RESP_IMM) {
4118 wc->ex.imm_data = cpu_to_be32(le32_to_cpu(resp->imm_data_or_inv_r_Key));
4119 wc->wc_flags |= IB_WC_WITH_IMM;
4121 if (resp->flags & QEDR_RESP_RDMA)
4122 wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
4124 if (resp->flags & QEDR_RESP_INV)
4127 } else if (resp->flags & QEDR_RESP_INV) {
4128 wc->ex.imm_data = le32_to_cpu(resp->imm_data_or_inv_r_Key);
4129 wc->wc_flags |= IB_WC_WITH_INVALIDATE;
4131 if (resp->flags & QEDR_RESP_RDMA)
4134 } else if (resp->flags & QEDR_RESP_RDMA) {
4141 static void __process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp,
4142 struct qedr_cq *cq, struct ib_wc *wc,
4143 struct rdma_cqe_responder *resp, u64 wr_id)
4145 /* Must fill fields before qedr_set_ok_cqe_resp_wc() */
4146 wc->opcode = IB_WC_RECV;
4149 if (likely(resp->status == RDMA_CQE_RESP_STS_OK)) {
4150 if (qedr_set_ok_cqe_resp_wc(resp, wc))
4152 "CQ %p (icid=%d) has invalid CQE responder flags=0x%x\n",
4153 cq, cq->icid, resp->flags);
4156 wc->status = qedr_cqe_resp_status_to_ib(resp->status);
4157 if (wc->status == IB_WC_GENERAL_ERR)
4159 "CQ %p (icid=%d) contains an invalid CQE status %d\n",
4160 cq, cq->icid, resp->status);
4163 /* Fill the rest of the WC */
4165 wc->src_qp = qp->id;
4170 static int process_resp_one_srq(struct qedr_dev *dev, struct qedr_qp *qp,
4171 struct qedr_cq *cq, struct ib_wc *wc,
4172 struct rdma_cqe_responder *resp)
4174 struct qedr_srq *srq = qp->srq;
4177 wr_id = HILO_GEN(le32_to_cpu(resp->srq_wr_id.hi),
4178 le32_to_cpu(resp->srq_wr_id.lo), u64);
4180 if (resp->status == RDMA_CQE_RESP_STS_WORK_REQUEST_FLUSHED_ERR) {
4181 wc->status = IB_WC_WR_FLUSH_ERR;
4185 wc->src_qp = qp->id;
4189 __process_resp_one(dev, qp, cq, wc, resp, wr_id);
4191 atomic_inc(&srq->hw_srq.wr_cons_cnt);
4195 static int process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp,
4196 struct qedr_cq *cq, struct ib_wc *wc,
4197 struct rdma_cqe_responder *resp)
4199 u64 wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id;
4201 __process_resp_one(dev, qp, cq, wc, resp, wr_id);
4203 while (qp->rqe_wr_id[qp->rq.cons].wqe_size--)
4204 qed_chain_consume(&qp->rq.pbl);
4205 qedr_inc_sw_cons(&qp->rq);
4210 static int process_resp_flush(struct qedr_qp *qp, struct qedr_cq *cq,
4211 int num_entries, struct ib_wc *wc, u16 hw_cons)
4215 while (num_entries && qp->rq.wqe_cons != hw_cons) {
4217 wc->status = IB_WC_WR_FLUSH_ERR;
4220 wc->src_qp = qp->id;
4222 wc->wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id;
4227 while (qp->rqe_wr_id[qp->rq.cons].wqe_size--)
4228 qed_chain_consume(&qp->rq.pbl);
4229 qedr_inc_sw_cons(&qp->rq);
4235 static void try_consume_resp_cqe(struct qedr_cq *cq, struct qedr_qp *qp,
4236 struct rdma_cqe_responder *resp, int *update)
4238 if (le16_to_cpu(resp->rq_cons_or_srq_id) == qp->rq.wqe_cons) {
4244 static int qedr_poll_cq_resp_srq(struct qedr_dev *dev, struct qedr_qp *qp,
4245 struct qedr_cq *cq, int num_entries,
4247 struct rdma_cqe_responder *resp)
4251 cnt = process_resp_one_srq(dev, qp, cq, wc, resp);
4257 static int qedr_poll_cq_resp(struct qedr_dev *dev, struct qedr_qp *qp,
4258 struct qedr_cq *cq, int num_entries,
4259 struct ib_wc *wc, struct rdma_cqe_responder *resp,
4264 if (resp->status == RDMA_CQE_RESP_STS_WORK_REQUEST_FLUSHED_ERR) {
4265 cnt = process_resp_flush(qp, cq, num_entries, wc,
4266 resp->rq_cons_or_srq_id);
4267 try_consume_resp_cqe(cq, qp, resp, update);
4269 cnt = process_resp_one(dev, qp, cq, wc, resp);
4277 static void try_consume_req_cqe(struct qedr_cq *cq, struct qedr_qp *qp,
4278 struct rdma_cqe_requester *req, int *update)
4280 if (le16_to_cpu(req->sq_cons) == qp->sq.wqe_cons) {
4286 int qedr_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
4288 struct qedr_dev *dev = get_qedr_dev(ibcq->device);
4289 struct qedr_cq *cq = get_qedr_cq(ibcq);
4290 union rdma_cqe *cqe;
4291 u32 old_cons, new_cons;
4292 unsigned long flags;
4296 if (cq->destroyed) {
4298 "warning: poll was invoked after destroy for cq %p (icid=%d)\n",
4303 if (cq->cq_type == QEDR_CQ_TYPE_GSI)
4304 return qedr_gsi_poll_cq(ibcq, num_entries, wc);
4306 spin_lock_irqsave(&cq->cq_lock, flags);
4307 cqe = cq->latest_cqe;
4308 old_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
4309 while (num_entries && is_valid_cqe(cq, cqe)) {
4313 /* prevent speculative reads of any field of CQE */
4316 qp = cqe_get_qp(cqe);
4318 WARN(1, "Error: CQE QP pointer is NULL. CQE=%p\n", cqe);
4324 switch (cqe_get_type(cqe)) {
4325 case RDMA_CQE_TYPE_REQUESTER:
4326 cnt = qedr_poll_cq_req(dev, qp, cq, num_entries, wc,
4328 try_consume_req_cqe(cq, qp, &cqe->req, &update);
4330 case RDMA_CQE_TYPE_RESPONDER_RQ:
4331 cnt = qedr_poll_cq_resp(dev, qp, cq, num_entries, wc,
4332 &cqe->resp, &update);
4334 case RDMA_CQE_TYPE_RESPONDER_SRQ:
4335 cnt = qedr_poll_cq_resp_srq(dev, qp, cq, num_entries,
4339 case RDMA_CQE_TYPE_INVALID:
4341 DP_ERR(dev, "Error: invalid CQE type = %d\n",
4350 new_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
4352 cq->cq_cons += new_cons - old_cons;
4355 /* doorbell notifies abount latest VALID entry,
4356 * but chain already point to the next INVALID one
4358 doorbell_cq(cq, cq->cq_cons - 1, cq->arm_flags);
4360 spin_unlock_irqrestore(&cq->cq_lock, flags);
4364 int qedr_process_mad(struct ib_device *ibdev, int process_mad_flags,
4365 u8 port_num, const struct ib_wc *in_wc,
4366 const struct ib_grh *in_grh, const struct ib_mad *in,
4367 struct ib_mad *out_mad, size_t *out_mad_size,
4368 u16 *out_mad_pkey_index)
4370 return IB_MAD_RESULT_SUCCESS;