710032f1fad7ece2714b271808e50de2c5930c29
[linux-2.6-block.git] / drivers / infiniband / hw / qedr / verbs.c
1 /* QLogic qedr NIC Driver
2  * Copyright (c) 2015-2016  QLogic Corporation
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and /or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 #include <linux/dma-mapping.h>
33 #include <linux/crc32.h>
34 #include <net/ip.h>
35 #include <net/ipv6.h>
36 #include <net/udp.h>
37 #include <linux/iommu.h>
38
39 #include <rdma/ib_verbs.h>
40 #include <rdma/ib_user_verbs.h>
41 #include <rdma/iw_cm.h>
42 #include <rdma/ib_umem.h>
43 #include <rdma/ib_addr.h>
44 #include <rdma/ib_cache.h>
45
46 #include <linux/qed/common_hsi.h>
47 #include "qedr_hsi_rdma.h"
48 #include <linux/qed/qed_if.h>
49 #include "qedr.h"
50 #include "verbs.h"
51 #include <rdma/qedr-abi.h>
52 #include "qedr_roce_cm.h"
53
54 #define DB_ADDR_SHIFT(addr)             ((addr) << DB_PWM_ADDR_OFFSET_SHIFT)
55
56 static inline int qedr_ib_copy_to_udata(struct ib_udata *udata, void *src,
57                                         size_t len)
58 {
59         size_t min_len = min_t(size_t, len, udata->outlen);
60
61         return ib_copy_to_udata(udata, src, min_len);
62 }
63
64 int qedr_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
65 {
66         if (index > QEDR_ROCE_PKEY_TABLE_LEN)
67                 return -EINVAL;
68
69         *pkey = QEDR_ROCE_PKEY_DEFAULT;
70         return 0;
71 }
72
73 int qedr_iw_query_gid(struct ib_device *ibdev, u8 port,
74                       int index, union ib_gid *sgid)
75 {
76         struct qedr_dev *dev = get_qedr_dev(ibdev);
77
78         memset(sgid->raw, 0, sizeof(sgid->raw));
79         ether_addr_copy(sgid->raw, dev->ndev->dev_addr);
80
81         DP_DEBUG(dev, QEDR_MSG_INIT, "QUERY sgid[%d]=%llx:%llx\n", index,
82                  sgid->global.interface_id, sgid->global.subnet_prefix);
83
84         return 0;
85 }
86
87 int qedr_query_device(struct ib_device *ibdev,
88                       struct ib_device_attr *attr, struct ib_udata *udata)
89 {
90         struct qedr_dev *dev = get_qedr_dev(ibdev);
91         struct qedr_device_attr *qattr = &dev->attr;
92
93         if (!dev->rdma_ctx) {
94                 DP_ERR(dev,
95                        "qedr_query_device called with invalid params rdma_ctx=%p\n",
96                        dev->rdma_ctx);
97                 return -EINVAL;
98         }
99
100         memset(attr, 0, sizeof(*attr));
101
102         attr->fw_ver = qattr->fw_ver;
103         attr->sys_image_guid = qattr->sys_image_guid;
104         attr->max_mr_size = qattr->max_mr_size;
105         attr->page_size_cap = qattr->page_size_caps;
106         attr->vendor_id = qattr->vendor_id;
107         attr->vendor_part_id = qattr->vendor_part_id;
108         attr->hw_ver = qattr->hw_ver;
109         attr->max_qp = qattr->max_qp;
110         attr->max_qp_wr = max_t(u32, qattr->max_sqe, qattr->max_rqe);
111         attr->device_cap_flags = IB_DEVICE_CURR_QP_STATE_MOD |
112             IB_DEVICE_RC_RNR_NAK_GEN |
113             IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS;
114
115         attr->max_sge = qattr->max_sge;
116         attr->max_sge_rd = qattr->max_sge;
117         attr->max_cq = qattr->max_cq;
118         attr->max_cqe = qattr->max_cqe;
119         attr->max_mr = qattr->max_mr;
120         attr->max_mw = qattr->max_mw;
121         attr->max_pd = qattr->max_pd;
122         attr->atomic_cap = dev->atomic_cap;
123         attr->max_fmr = qattr->max_fmr;
124         attr->max_map_per_fmr = 16;
125         attr->max_qp_init_rd_atom =
126             1 << (fls(qattr->max_qp_req_rd_atomic_resc) - 1);
127         attr->max_qp_rd_atom =
128             min(1 << (fls(qattr->max_qp_resp_rd_atomic_resc) - 1),
129                 attr->max_qp_init_rd_atom);
130
131         attr->max_srq = qattr->max_srq;
132         attr->max_srq_sge = qattr->max_srq_sge;
133         attr->max_srq_wr = qattr->max_srq_wr;
134
135         attr->local_ca_ack_delay = qattr->dev_ack_delay;
136         attr->max_fast_reg_page_list_len = qattr->max_mr / 8;
137         attr->max_pkeys = QEDR_ROCE_PKEY_MAX;
138         attr->max_ah = qattr->max_ah;
139
140         return 0;
141 }
142
143 #define QEDR_SPEED_SDR          (1)
144 #define QEDR_SPEED_DDR          (2)
145 #define QEDR_SPEED_QDR          (4)
146 #define QEDR_SPEED_FDR10        (8)
147 #define QEDR_SPEED_FDR          (16)
148 #define QEDR_SPEED_EDR          (32)
149
150 static inline void get_link_speed_and_width(int speed, u8 *ib_speed,
151                                             u8 *ib_width)
152 {
153         switch (speed) {
154         case 1000:
155                 *ib_speed = QEDR_SPEED_SDR;
156                 *ib_width = IB_WIDTH_1X;
157                 break;
158         case 10000:
159                 *ib_speed = QEDR_SPEED_QDR;
160                 *ib_width = IB_WIDTH_1X;
161                 break;
162
163         case 20000:
164                 *ib_speed = QEDR_SPEED_DDR;
165                 *ib_width = IB_WIDTH_4X;
166                 break;
167
168         case 25000:
169                 *ib_speed = QEDR_SPEED_EDR;
170                 *ib_width = IB_WIDTH_1X;
171                 break;
172
173         case 40000:
174                 *ib_speed = QEDR_SPEED_QDR;
175                 *ib_width = IB_WIDTH_4X;
176                 break;
177
178         case 50000:
179                 *ib_speed = QEDR_SPEED_QDR;
180                 *ib_width = IB_WIDTH_4X;
181                 break;
182
183         case 100000:
184                 *ib_speed = QEDR_SPEED_EDR;
185                 *ib_width = IB_WIDTH_4X;
186                 break;
187
188         default:
189                 /* Unsupported */
190                 *ib_speed = QEDR_SPEED_SDR;
191                 *ib_width = IB_WIDTH_1X;
192         }
193 }
194
195 int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr)
196 {
197         struct qedr_dev *dev;
198         struct qed_rdma_port *rdma_port;
199
200         dev = get_qedr_dev(ibdev);
201         if (port > 1) {
202                 DP_ERR(dev, "invalid_port=0x%x\n", port);
203                 return -EINVAL;
204         }
205
206         if (!dev->rdma_ctx) {
207                 DP_ERR(dev, "rdma_ctx is NULL\n");
208                 return -EINVAL;
209         }
210
211         rdma_port = dev->ops->rdma_query_port(dev->rdma_ctx);
212
213         /* *attr being zeroed by the caller, avoid zeroing it here */
214         if (rdma_port->port_state == QED_RDMA_PORT_UP) {
215                 attr->state = IB_PORT_ACTIVE;
216                 attr->phys_state = 5;
217         } else {
218                 attr->state = IB_PORT_DOWN;
219                 attr->phys_state = 3;
220         }
221         attr->max_mtu = IB_MTU_4096;
222         attr->active_mtu = iboe_get_mtu(dev->ndev->mtu);
223         attr->lid = 0;
224         attr->lmc = 0;
225         attr->sm_lid = 0;
226         attr->sm_sl = 0;
227         attr->port_cap_flags = IB_PORT_IP_BASED_GIDS;
228         if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
229                 attr->gid_tbl_len = 1;
230                 attr->pkey_tbl_len = 1;
231         } else {
232                 attr->gid_tbl_len = QEDR_MAX_SGID;
233                 attr->pkey_tbl_len = QEDR_ROCE_PKEY_TABLE_LEN;
234         }
235         attr->bad_pkey_cntr = rdma_port->pkey_bad_counter;
236         attr->qkey_viol_cntr = 0;
237         get_link_speed_and_width(rdma_port->link_speed,
238                                  &attr->active_speed, &attr->active_width);
239         attr->max_msg_sz = rdma_port->max_msg_size;
240         attr->max_vl_num = 4;
241
242         return 0;
243 }
244
245 int qedr_modify_port(struct ib_device *ibdev, u8 port, int mask,
246                      struct ib_port_modify *props)
247 {
248         struct qedr_dev *dev;
249
250         dev = get_qedr_dev(ibdev);
251         if (port > 1) {
252                 DP_ERR(dev, "invalid_port=0x%x\n", port);
253                 return -EINVAL;
254         }
255
256         return 0;
257 }
258
259 static int qedr_add_mmap(struct qedr_ucontext *uctx, u64 phy_addr,
260                          unsigned long len)
261 {
262         struct qedr_mm *mm;
263
264         mm = kzalloc(sizeof(*mm), GFP_KERNEL);
265         if (!mm)
266                 return -ENOMEM;
267
268         mm->key.phy_addr = phy_addr;
269         /* This function might be called with a length which is not a multiple
270          * of PAGE_SIZE, while the mapping is PAGE_SIZE grained and the kernel
271          * forces this granularity by increasing the requested size if needed.
272          * When qedr_mmap is called, it will search the list with the updated
273          * length as a key. To prevent search failures, the length is rounded up
274          * in advance to PAGE_SIZE.
275          */
276         mm->key.len = roundup(len, PAGE_SIZE);
277         INIT_LIST_HEAD(&mm->entry);
278
279         mutex_lock(&uctx->mm_list_lock);
280         list_add(&mm->entry, &uctx->mm_head);
281         mutex_unlock(&uctx->mm_list_lock);
282
283         DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
284                  "added (addr=0x%llx,len=0x%lx) for ctx=%p\n",
285                  (unsigned long long)mm->key.phy_addr,
286                  (unsigned long)mm->key.len, uctx);
287
288         return 0;
289 }
290
291 static bool qedr_search_mmap(struct qedr_ucontext *uctx, u64 phy_addr,
292                              unsigned long len)
293 {
294         bool found = false;
295         struct qedr_mm *mm;
296
297         mutex_lock(&uctx->mm_list_lock);
298         list_for_each_entry(mm, &uctx->mm_head, entry) {
299                 if (len != mm->key.len || phy_addr != mm->key.phy_addr)
300                         continue;
301
302                 found = true;
303                 break;
304         }
305         mutex_unlock(&uctx->mm_list_lock);
306         DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
307                  "searched for (addr=0x%llx,len=0x%lx) for ctx=%p, result=%d\n",
308                  mm->key.phy_addr, mm->key.len, uctx, found);
309
310         return found;
311 }
312
313 struct ib_ucontext *qedr_alloc_ucontext(struct ib_device *ibdev,
314                                         struct ib_udata *udata)
315 {
316         int rc;
317         struct qedr_ucontext *ctx;
318         struct qedr_alloc_ucontext_resp uresp;
319         struct qedr_dev *dev = get_qedr_dev(ibdev);
320         struct qed_rdma_add_user_out_params oparams;
321
322         if (!udata)
323                 return ERR_PTR(-EFAULT);
324
325         ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
326         if (!ctx)
327                 return ERR_PTR(-ENOMEM);
328
329         rc = dev->ops->rdma_add_user(dev->rdma_ctx, &oparams);
330         if (rc) {
331                 DP_ERR(dev,
332                        "failed to allocate a DPI for a new RoCE application, rc=%d. To overcome this consider to increase the number of DPIs, increase the doorbell BAR size or just close unnecessary RoCE applications. In order to increase the number of DPIs consult the qedr readme\n",
333                        rc);
334                 goto err;
335         }
336
337         ctx->dpi = oparams.dpi;
338         ctx->dpi_addr = oparams.dpi_addr;
339         ctx->dpi_phys_addr = oparams.dpi_phys_addr;
340         ctx->dpi_size = oparams.dpi_size;
341         INIT_LIST_HEAD(&ctx->mm_head);
342         mutex_init(&ctx->mm_list_lock);
343
344         memset(&uresp, 0, sizeof(uresp));
345
346         uresp.dpm_enabled = dev->user_dpm_enabled;
347         uresp.wids_enabled = 1;
348         uresp.wid_count = oparams.wid_count;
349         uresp.db_pa = ctx->dpi_phys_addr;
350         uresp.db_size = ctx->dpi_size;
351         uresp.max_send_wr = dev->attr.max_sqe;
352         uresp.max_recv_wr = dev->attr.max_rqe;
353         uresp.max_srq_wr = dev->attr.max_srq_wr;
354         uresp.sges_per_send_wr = QEDR_MAX_SQE_ELEMENTS_PER_SQE;
355         uresp.sges_per_recv_wr = QEDR_MAX_RQE_ELEMENTS_PER_RQE;
356         uresp.sges_per_srq_wr = dev->attr.max_srq_sge;
357         uresp.max_cqes = QEDR_MAX_CQES;
358
359         rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
360         if (rc)
361                 goto err;
362
363         ctx->dev = dev;
364
365         rc = qedr_add_mmap(ctx, ctx->dpi_phys_addr, ctx->dpi_size);
366         if (rc)
367                 goto err;
368
369         DP_DEBUG(dev, QEDR_MSG_INIT, "Allocating user context %p\n",
370                  &ctx->ibucontext);
371         return &ctx->ibucontext;
372
373 err:
374         kfree(ctx);
375         return ERR_PTR(rc);
376 }
377
378 int qedr_dealloc_ucontext(struct ib_ucontext *ibctx)
379 {
380         struct qedr_ucontext *uctx = get_qedr_ucontext(ibctx);
381         struct qedr_mm *mm, *tmp;
382         int status = 0;
383
384         DP_DEBUG(uctx->dev, QEDR_MSG_INIT, "Deallocating user context %p\n",
385                  uctx);
386         uctx->dev->ops->rdma_remove_user(uctx->dev->rdma_ctx, uctx->dpi);
387
388         list_for_each_entry_safe(mm, tmp, &uctx->mm_head, entry) {
389                 DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
390                          "deleted (addr=0x%llx,len=0x%lx) for ctx=%p\n",
391                          mm->key.phy_addr, mm->key.len, uctx);
392                 list_del(&mm->entry);
393                 kfree(mm);
394         }
395
396         kfree(uctx);
397         return status;
398 }
399
400 int qedr_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
401 {
402         struct qedr_ucontext *ucontext = get_qedr_ucontext(context);
403         struct qedr_dev *dev = get_qedr_dev(context->device);
404         unsigned long phys_addr = vma->vm_pgoff << PAGE_SHIFT;
405         unsigned long len = (vma->vm_end - vma->vm_start);
406         unsigned long dpi_start;
407
408         dpi_start = dev->db_phys_addr + (ucontext->dpi * ucontext->dpi_size);
409
410         DP_DEBUG(dev, QEDR_MSG_INIT,
411                  "mmap invoked with vm_start=0x%pK, vm_end=0x%pK,vm_pgoff=0x%pK; dpi_start=0x%pK dpi_size=0x%x\n",
412                  (void *)vma->vm_start, (void *)vma->vm_end,
413                  (void *)vma->vm_pgoff, (void *)dpi_start, ucontext->dpi_size);
414
415         if ((vma->vm_start & (PAGE_SIZE - 1)) || (len & (PAGE_SIZE - 1))) {
416                 DP_ERR(dev,
417                        "failed mmap, addresses must be page aligned: start=0x%pK, end=0x%pK\n",
418                        (void *)vma->vm_start, (void *)vma->vm_end);
419                 return -EINVAL;
420         }
421
422         if (!qedr_search_mmap(ucontext, phys_addr, len)) {
423                 DP_ERR(dev, "failed mmap, vm_pgoff=0x%lx is not authorized\n",
424                        vma->vm_pgoff);
425                 return -EINVAL;
426         }
427
428         if (phys_addr < dpi_start ||
429             ((phys_addr + len) > (dpi_start + ucontext->dpi_size))) {
430                 DP_ERR(dev,
431                        "failed mmap, pages are outside of dpi; page address=0x%pK, dpi_start=0x%pK, dpi_size=0x%x\n",
432                        (void *)phys_addr, (void *)dpi_start,
433                        ucontext->dpi_size);
434                 return -EINVAL;
435         }
436
437         if (vma->vm_flags & VM_READ) {
438                 DP_ERR(dev, "failed mmap, cannot map doorbell bar for read\n");
439                 return -EINVAL;
440         }
441
442         vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
443         return io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, len,
444                                   vma->vm_page_prot);
445 }
446
447 struct ib_pd *qedr_alloc_pd(struct ib_device *ibdev,
448                             struct ib_ucontext *context, struct ib_udata *udata)
449 {
450         struct qedr_dev *dev = get_qedr_dev(ibdev);
451         struct qedr_pd *pd;
452         u16 pd_id;
453         int rc;
454
455         DP_DEBUG(dev, QEDR_MSG_INIT, "Function called from: %s\n",
456                  (udata && context) ? "User Lib" : "Kernel");
457
458         if (!dev->rdma_ctx) {
459                 DP_ERR(dev, "invalid RDMA context\n");
460                 return ERR_PTR(-EINVAL);
461         }
462
463         pd = kzalloc(sizeof(*pd), GFP_KERNEL);
464         if (!pd)
465                 return ERR_PTR(-ENOMEM);
466
467         rc = dev->ops->rdma_alloc_pd(dev->rdma_ctx, &pd_id);
468         if (rc)
469                 goto err;
470
471         pd->pd_id = pd_id;
472
473         if (udata && context) {
474                 struct qedr_alloc_pd_uresp uresp = {
475                         .pd_id = pd_id,
476                 };
477
478                 rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
479                 if (rc) {
480                         DP_ERR(dev, "copy error pd_id=0x%x.\n", pd_id);
481                         dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd_id);
482                         goto err;
483                 }
484
485                 pd->uctx = get_qedr_ucontext(context);
486                 pd->uctx->pd = pd;
487         }
488
489         return &pd->ibpd;
490
491 err:
492         kfree(pd);
493         return ERR_PTR(rc);
494 }
495
496 int qedr_dealloc_pd(struct ib_pd *ibpd)
497 {
498         struct qedr_dev *dev = get_qedr_dev(ibpd->device);
499         struct qedr_pd *pd = get_qedr_pd(ibpd);
500
501         if (!pd) {
502                 pr_err("Invalid PD received in dealloc_pd\n");
503                 return -EINVAL;
504         }
505
506         DP_DEBUG(dev, QEDR_MSG_INIT, "Deallocating PD %d\n", pd->pd_id);
507         dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd->pd_id);
508
509         kfree(pd);
510
511         return 0;
512 }
513
514 static void qedr_free_pbl(struct qedr_dev *dev,
515                           struct qedr_pbl_info *pbl_info, struct qedr_pbl *pbl)
516 {
517         struct pci_dev *pdev = dev->pdev;
518         int i;
519
520         for (i = 0; i < pbl_info->num_pbls; i++) {
521                 if (!pbl[i].va)
522                         continue;
523                 dma_free_coherent(&pdev->dev, pbl_info->pbl_size,
524                                   pbl[i].va, pbl[i].pa);
525         }
526
527         kfree(pbl);
528 }
529
530 #define MIN_FW_PBL_PAGE_SIZE (4 * 1024)
531 #define MAX_FW_PBL_PAGE_SIZE (64 * 1024)
532
533 #define NUM_PBES_ON_PAGE(_page_size) (_page_size / sizeof(u64))
534 #define MAX_PBES_ON_PAGE NUM_PBES_ON_PAGE(MAX_FW_PBL_PAGE_SIZE)
535 #define MAX_PBES_TWO_LAYER (MAX_PBES_ON_PAGE * MAX_PBES_ON_PAGE)
536
537 static struct qedr_pbl *qedr_alloc_pbl_tbl(struct qedr_dev *dev,
538                                            struct qedr_pbl_info *pbl_info,
539                                            gfp_t flags)
540 {
541         struct pci_dev *pdev = dev->pdev;
542         struct qedr_pbl *pbl_table;
543         dma_addr_t *pbl_main_tbl;
544         dma_addr_t pa;
545         void *va;
546         int i;
547
548         pbl_table = kcalloc(pbl_info->num_pbls, sizeof(*pbl_table), flags);
549         if (!pbl_table)
550                 return ERR_PTR(-ENOMEM);
551
552         for (i = 0; i < pbl_info->num_pbls; i++) {
553                 va = dma_zalloc_coherent(&pdev->dev, pbl_info->pbl_size,
554                                          &pa, flags);
555                 if (!va)
556                         goto err;
557
558                 pbl_table[i].va = va;
559                 pbl_table[i].pa = pa;
560         }
561
562         /* Two-Layer PBLs, if we have more than one pbl we need to initialize
563          * the first one with physical pointers to all of the rest
564          */
565         pbl_main_tbl = (dma_addr_t *)pbl_table[0].va;
566         for (i = 0; i < pbl_info->num_pbls - 1; i++)
567                 pbl_main_tbl[i] = pbl_table[i + 1].pa;
568
569         return pbl_table;
570
571 err:
572         for (i--; i >= 0; i--)
573                 dma_free_coherent(&pdev->dev, pbl_info->pbl_size,
574                                   pbl_table[i].va, pbl_table[i].pa);
575
576         qedr_free_pbl(dev, pbl_info, pbl_table);
577
578         return ERR_PTR(-ENOMEM);
579 }
580
581 static int qedr_prepare_pbl_tbl(struct qedr_dev *dev,
582                                 struct qedr_pbl_info *pbl_info,
583                                 u32 num_pbes, int two_layer_capable)
584 {
585         u32 pbl_capacity;
586         u32 pbl_size;
587         u32 num_pbls;
588
589         if ((num_pbes > MAX_PBES_ON_PAGE) && two_layer_capable) {
590                 if (num_pbes > MAX_PBES_TWO_LAYER) {
591                         DP_ERR(dev, "prepare pbl table: too many pages %d\n",
592                                num_pbes);
593                         return -EINVAL;
594                 }
595
596                 /* calculate required pbl page size */
597                 pbl_size = MIN_FW_PBL_PAGE_SIZE;
598                 pbl_capacity = NUM_PBES_ON_PAGE(pbl_size) *
599                                NUM_PBES_ON_PAGE(pbl_size);
600
601                 while (pbl_capacity < num_pbes) {
602                         pbl_size *= 2;
603                         pbl_capacity = pbl_size / sizeof(u64);
604                         pbl_capacity = pbl_capacity * pbl_capacity;
605                 }
606
607                 num_pbls = DIV_ROUND_UP(num_pbes, NUM_PBES_ON_PAGE(pbl_size));
608                 num_pbls++;     /* One for the layer0 ( points to the pbls) */
609                 pbl_info->two_layered = true;
610         } else {
611                 /* One layered PBL */
612                 num_pbls = 1;
613                 pbl_size = max_t(u32, MIN_FW_PBL_PAGE_SIZE,
614                                  roundup_pow_of_two((num_pbes * sizeof(u64))));
615                 pbl_info->two_layered = false;
616         }
617
618         pbl_info->num_pbls = num_pbls;
619         pbl_info->pbl_size = pbl_size;
620         pbl_info->num_pbes = num_pbes;
621
622         DP_DEBUG(dev, QEDR_MSG_MR,
623                  "prepare pbl table: num_pbes=%d, num_pbls=%d, pbl_size=%d\n",
624                  pbl_info->num_pbes, pbl_info->num_pbls, pbl_info->pbl_size);
625
626         return 0;
627 }
628
629 static void qedr_populate_pbls(struct qedr_dev *dev, struct ib_umem *umem,
630                                struct qedr_pbl *pbl,
631                                struct qedr_pbl_info *pbl_info, u32 pg_shift)
632 {
633         int shift, pg_cnt, pages, pbe_cnt, total_num_pbes = 0;
634         u32 fw_pg_cnt, fw_pg_per_umem_pg;
635         struct qedr_pbl *pbl_tbl;
636         struct scatterlist *sg;
637         struct regpair *pbe;
638         u64 pg_addr;
639         int entry;
640
641         if (!pbl_info->num_pbes)
642                 return;
643
644         /* If we have a two layered pbl, the first pbl points to the rest
645          * of the pbls and the first entry lays on the second pbl in the table
646          */
647         if (pbl_info->two_layered)
648                 pbl_tbl = &pbl[1];
649         else
650                 pbl_tbl = pbl;
651
652         pbe = (struct regpair *)pbl_tbl->va;
653         if (!pbe) {
654                 DP_ERR(dev, "cannot populate PBL due to a NULL PBE\n");
655                 return;
656         }
657
658         pbe_cnt = 0;
659
660         shift = umem->page_shift;
661
662         fw_pg_per_umem_pg = BIT(umem->page_shift - pg_shift);
663
664         for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
665                 pages = sg_dma_len(sg) >> shift;
666                 pg_addr = sg_dma_address(sg);
667                 for (pg_cnt = 0; pg_cnt < pages; pg_cnt++) {
668                         for (fw_pg_cnt = 0; fw_pg_cnt < fw_pg_per_umem_pg;) {
669                                 pbe->lo = cpu_to_le32(pg_addr);
670                                 pbe->hi = cpu_to_le32(upper_32_bits(pg_addr));
671
672                                 pg_addr += BIT(pg_shift);
673                                 pbe_cnt++;
674                                 total_num_pbes++;
675                                 pbe++;
676
677                                 if (total_num_pbes == pbl_info->num_pbes)
678                                         return;
679
680                                 /* If the given pbl is full storing the pbes,
681                                  * move to next pbl.
682                                  */
683                                 if (pbe_cnt ==
684                                     (pbl_info->pbl_size / sizeof(u64))) {
685                                         pbl_tbl++;
686                                         pbe = (struct regpair *)pbl_tbl->va;
687                                         pbe_cnt = 0;
688                                 }
689
690                                 fw_pg_cnt++;
691                         }
692                 }
693         }
694 }
695
696 static int qedr_copy_cq_uresp(struct qedr_dev *dev,
697                               struct qedr_cq *cq, struct ib_udata *udata)
698 {
699         struct qedr_create_cq_uresp uresp;
700         int rc;
701
702         memset(&uresp, 0, sizeof(uresp));
703
704         uresp.db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
705         uresp.icid = cq->icid;
706
707         rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
708         if (rc)
709                 DP_ERR(dev, "copy error cqid=0x%x.\n", cq->icid);
710
711         return rc;
712 }
713
714 static void consume_cqe(struct qedr_cq *cq)
715 {
716         if (cq->latest_cqe == cq->toggle_cqe)
717                 cq->pbl_toggle ^= RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK;
718
719         cq->latest_cqe = qed_chain_consume(&cq->pbl);
720 }
721
722 static inline int qedr_align_cq_entries(int entries)
723 {
724         u64 size, aligned_size;
725
726         /* We allocate an extra entry that we don't report to the FW. */
727         size = (entries + 1) * QEDR_CQE_SIZE;
728         aligned_size = ALIGN(size, PAGE_SIZE);
729
730         return aligned_size / QEDR_CQE_SIZE;
731 }
732
733 static inline int qedr_init_user_queue(struct ib_ucontext *ib_ctx,
734                                        struct qedr_dev *dev,
735                                        struct qedr_userq *q,
736                                        u64 buf_addr, size_t buf_len,
737                                        int access, int dmasync,
738                                        int alloc_and_init)
739 {
740         u32 fw_pages;
741         int rc;
742
743         q->buf_addr = buf_addr;
744         q->buf_len = buf_len;
745         q->umem = ib_umem_get(ib_ctx, q->buf_addr, q->buf_len, access, dmasync);
746         if (IS_ERR(q->umem)) {
747                 DP_ERR(dev, "create user queue: failed ib_umem_get, got %ld\n",
748                        PTR_ERR(q->umem));
749                 return PTR_ERR(q->umem);
750         }
751
752         fw_pages = ib_umem_page_count(q->umem) <<
753             (q->umem->page_shift - FW_PAGE_SHIFT);
754
755         rc = qedr_prepare_pbl_tbl(dev, &q->pbl_info, fw_pages, 0);
756         if (rc)
757                 goto err0;
758
759         if (alloc_and_init) {
760                 q->pbl_tbl = qedr_alloc_pbl_tbl(dev, &q->pbl_info, GFP_KERNEL);
761                 if (IS_ERR(q->pbl_tbl)) {
762                         rc = PTR_ERR(q->pbl_tbl);
763                         goto err0;
764                 }
765                 qedr_populate_pbls(dev, q->umem, q->pbl_tbl, &q->pbl_info,
766                                    FW_PAGE_SHIFT);
767         } else {
768                 q->pbl_tbl = kzalloc(sizeof(*q->pbl_tbl), GFP_KERNEL);
769                 if (!q->pbl_tbl) {
770                         rc = -ENOMEM;
771                         goto err0;
772                 }
773         }
774
775         return 0;
776
777 err0:
778         ib_umem_release(q->umem);
779         q->umem = NULL;
780
781         return rc;
782 }
783
784 static inline void qedr_init_cq_params(struct qedr_cq *cq,
785                                        struct qedr_ucontext *ctx,
786                                        struct qedr_dev *dev, int vector,
787                                        int chain_entries, int page_cnt,
788                                        u64 pbl_ptr,
789                                        struct qed_rdma_create_cq_in_params
790                                        *params)
791 {
792         memset(params, 0, sizeof(*params));
793         params->cq_handle_hi = upper_32_bits((uintptr_t)cq);
794         params->cq_handle_lo = lower_32_bits((uintptr_t)cq);
795         params->cnq_id = vector;
796         params->cq_size = chain_entries - 1;
797         params->dpi = (ctx) ? ctx->dpi : dev->dpi;
798         params->pbl_num_pages = page_cnt;
799         params->pbl_ptr = pbl_ptr;
800         params->pbl_two_level = 0;
801 }
802
803 static void doorbell_cq(struct qedr_cq *cq, u32 cons, u8 flags)
804 {
805         cq->db.data.agg_flags = flags;
806         cq->db.data.value = cpu_to_le32(cons);
807         writeq(cq->db.raw, cq->db_addr);
808
809         /* Make sure write would stick */
810         mmiowb();
811 }
812
813 int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
814 {
815         struct qedr_cq *cq = get_qedr_cq(ibcq);
816         unsigned long sflags;
817         struct qedr_dev *dev;
818
819         dev = get_qedr_dev(ibcq->device);
820
821         if (cq->destroyed) {
822                 DP_ERR(dev,
823                        "warning: arm was invoked after destroy for cq %p (icid=%d)\n",
824                        cq, cq->icid);
825                 return -EINVAL;
826         }
827
828
829         if (cq->cq_type == QEDR_CQ_TYPE_GSI)
830                 return 0;
831
832         spin_lock_irqsave(&cq->cq_lock, sflags);
833
834         cq->arm_flags = 0;
835
836         if (flags & IB_CQ_SOLICITED)
837                 cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_SE_CF_CMD;
838
839         if (flags & IB_CQ_NEXT_COMP)
840                 cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_CF_CMD;
841
842         doorbell_cq(cq, cq->cq_cons - 1, cq->arm_flags);
843
844         spin_unlock_irqrestore(&cq->cq_lock, sflags);
845
846         return 0;
847 }
848
849 struct ib_cq *qedr_create_cq(struct ib_device *ibdev,
850                              const struct ib_cq_init_attr *attr,
851                              struct ib_ucontext *ib_ctx, struct ib_udata *udata)
852 {
853         struct qedr_ucontext *ctx = get_qedr_ucontext(ib_ctx);
854         struct qed_rdma_destroy_cq_out_params destroy_oparams;
855         struct qed_rdma_destroy_cq_in_params destroy_iparams;
856         struct qedr_dev *dev = get_qedr_dev(ibdev);
857         struct qed_rdma_create_cq_in_params params;
858         struct qedr_create_cq_ureq ureq;
859         int vector = attr->comp_vector;
860         int entries = attr->cqe;
861         struct qedr_cq *cq;
862         int chain_entries;
863         int page_cnt;
864         u64 pbl_ptr;
865         u16 icid;
866         int rc;
867
868         DP_DEBUG(dev, QEDR_MSG_INIT,
869                  "create_cq: called from %s. entries=%d, vector=%d\n",
870                  udata ? "User Lib" : "Kernel", entries, vector);
871
872         if (entries > QEDR_MAX_CQES) {
873                 DP_ERR(dev,
874                        "create cq: the number of entries %d is too high. Must be equal or below %d.\n",
875                        entries, QEDR_MAX_CQES);
876                 return ERR_PTR(-EINVAL);
877         }
878
879         chain_entries = qedr_align_cq_entries(entries);
880         chain_entries = min_t(int, chain_entries, QEDR_MAX_CQES);
881
882         cq = kzalloc(sizeof(*cq), GFP_KERNEL);
883         if (!cq)
884                 return ERR_PTR(-ENOMEM);
885
886         if (udata) {
887                 memset(&ureq, 0, sizeof(ureq));
888                 if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) {
889                         DP_ERR(dev,
890                                "create cq: problem copying data from user space\n");
891                         goto err0;
892                 }
893
894                 if (!ureq.len) {
895                         DP_ERR(dev,
896                                "create cq: cannot create a cq with 0 entries\n");
897                         goto err0;
898                 }
899
900                 cq->cq_type = QEDR_CQ_TYPE_USER;
901
902                 rc = qedr_init_user_queue(ib_ctx, dev, &cq->q, ureq.addr,
903                                           ureq.len, IB_ACCESS_LOCAL_WRITE,
904                                           1, 1);
905                 if (rc)
906                         goto err0;
907
908                 pbl_ptr = cq->q.pbl_tbl->pa;
909                 page_cnt = cq->q.pbl_info.num_pbes;
910
911                 cq->ibcq.cqe = chain_entries;
912         } else {
913                 cq->cq_type = QEDR_CQ_TYPE_KERNEL;
914
915                 rc = dev->ops->common->chain_alloc(dev->cdev,
916                                                    QED_CHAIN_USE_TO_CONSUME,
917                                                    QED_CHAIN_MODE_PBL,
918                                                    QED_CHAIN_CNT_TYPE_U32,
919                                                    chain_entries,
920                                                    sizeof(union rdma_cqe),
921                                                    &cq->pbl, NULL);
922                 if (rc)
923                         goto err1;
924
925                 page_cnt = qed_chain_get_page_cnt(&cq->pbl);
926                 pbl_ptr = qed_chain_get_pbl_phys(&cq->pbl);
927                 cq->ibcq.cqe = cq->pbl.capacity;
928         }
929
930         qedr_init_cq_params(cq, ctx, dev, vector, chain_entries, page_cnt,
931                             pbl_ptr, &params);
932
933         rc = dev->ops->rdma_create_cq(dev->rdma_ctx, &params, &icid);
934         if (rc)
935                 goto err2;
936
937         cq->icid = icid;
938         cq->sig = QEDR_CQ_MAGIC_NUMBER;
939         spin_lock_init(&cq->cq_lock);
940
941         if (ib_ctx) {
942                 rc = qedr_copy_cq_uresp(dev, cq, udata);
943                 if (rc)
944                         goto err3;
945         } else {
946                 /* Generate doorbell address. */
947                 cq->db_addr = dev->db_addr +
948                     DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
949                 cq->db.data.icid = cq->icid;
950                 cq->db.data.params = DB_AGG_CMD_SET <<
951                     RDMA_PWM_VAL32_DATA_AGG_CMD_SHIFT;
952
953                 /* point to the very last element, passing it we will toggle */
954                 cq->toggle_cqe = qed_chain_get_last_elem(&cq->pbl);
955                 cq->pbl_toggle = RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK;
956                 cq->latest_cqe = NULL;
957                 consume_cqe(cq);
958                 cq->cq_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
959         }
960
961         DP_DEBUG(dev, QEDR_MSG_CQ,
962                  "create cq: icid=0x%0x, addr=%p, size(entries)=0x%0x\n",
963                  cq->icid, cq, params.cq_size);
964
965         return &cq->ibcq;
966
967 err3:
968         destroy_iparams.icid = cq->icid;
969         dev->ops->rdma_destroy_cq(dev->rdma_ctx, &destroy_iparams,
970                                   &destroy_oparams);
971 err2:
972         if (udata)
973                 qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
974         else
975                 dev->ops->common->chain_free(dev->cdev, &cq->pbl);
976 err1:
977         if (udata)
978                 ib_umem_release(cq->q.umem);
979 err0:
980         kfree(cq);
981         return ERR_PTR(-EINVAL);
982 }
983
984 int qedr_resize_cq(struct ib_cq *ibcq, int new_cnt, struct ib_udata *udata)
985 {
986         struct qedr_dev *dev = get_qedr_dev(ibcq->device);
987         struct qedr_cq *cq = get_qedr_cq(ibcq);
988
989         DP_ERR(dev, "cq %p RESIZE NOT SUPPORTED\n", cq);
990
991         return 0;
992 }
993
994 #define QEDR_DESTROY_CQ_MAX_ITERATIONS          (10)
995 #define QEDR_DESTROY_CQ_ITER_DURATION           (10)
996
997 int qedr_destroy_cq(struct ib_cq *ibcq)
998 {
999         struct qedr_dev *dev = get_qedr_dev(ibcq->device);
1000         struct qed_rdma_destroy_cq_out_params oparams;
1001         struct qed_rdma_destroy_cq_in_params iparams;
1002         struct qedr_cq *cq = get_qedr_cq(ibcq);
1003         int iter;
1004         int rc;
1005
1006         DP_DEBUG(dev, QEDR_MSG_CQ, "destroy cq %p (icid=%d)\n", cq, cq->icid);
1007
1008         cq->destroyed = 1;
1009
1010         /* GSIs CQs are handled by driver, so they don't exist in the FW */
1011         if (cq->cq_type == QEDR_CQ_TYPE_GSI)
1012                 goto done;
1013
1014         iparams.icid = cq->icid;
1015         rc = dev->ops->rdma_destroy_cq(dev->rdma_ctx, &iparams, &oparams);
1016         if (rc)
1017                 return rc;
1018
1019         dev->ops->common->chain_free(dev->cdev, &cq->pbl);
1020
1021         if (ibcq->uobject && ibcq->uobject->context) {
1022                 qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
1023                 ib_umem_release(cq->q.umem);
1024         }
1025
1026         /* We don't want the IRQ handler to handle a non-existing CQ so we
1027          * wait until all CNQ interrupts, if any, are received. This will always
1028          * happen and will always happen very fast. If not, then a serious error
1029          * has occured. That is why we can use a long delay.
1030          * We spin for a short time so we don’t lose time on context switching
1031          * in case all the completions are handled in that span. Otherwise
1032          * we sleep for a while and check again. Since the CNQ may be
1033          * associated with (only) the current CPU we use msleep to allow the
1034          * current CPU to be freed.
1035          * The CNQ notification is increased in qedr_irq_handler().
1036          */
1037         iter = QEDR_DESTROY_CQ_MAX_ITERATIONS;
1038         while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) {
1039                 udelay(QEDR_DESTROY_CQ_ITER_DURATION);
1040                 iter--;
1041         }
1042
1043         iter = QEDR_DESTROY_CQ_MAX_ITERATIONS;
1044         while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) {
1045                 msleep(QEDR_DESTROY_CQ_ITER_DURATION);
1046                 iter--;
1047         }
1048
1049         if (oparams.num_cq_notif != cq->cnq_notif)
1050                 goto err;
1051
1052         /* Note that we don't need to have explicit code to wait for the
1053          * completion of the event handler because it is invoked from the EQ.
1054          * Since the destroy CQ ramrod has also been received on the EQ we can
1055          * be certain that there's no event handler in process.
1056          */
1057 done:
1058         cq->sig = ~cq->sig;
1059
1060         kfree(cq);
1061
1062         return 0;
1063
1064 err:
1065         DP_ERR(dev,
1066                "CQ %p (icid=%d) not freed, expecting %d ints but got %d ints\n",
1067                cq, cq->icid, oparams.num_cq_notif, cq->cnq_notif);
1068
1069         return -EINVAL;
1070 }
1071
1072 static inline int get_gid_info_from_table(struct ib_qp *ibqp,
1073                                           struct ib_qp_attr *attr,
1074                                           int attr_mask,
1075                                           struct qed_rdma_modify_qp_in_params
1076                                           *qp_params)
1077 {
1078         enum rdma_network_type nw_type;
1079         struct ib_gid_attr gid_attr;
1080         const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
1081         union ib_gid gid;
1082         u32 ipv4_addr;
1083         int rc = 0;
1084         int i;
1085
1086         rc = ib_get_cached_gid(ibqp->device,
1087                                rdma_ah_get_port_num(&attr->ah_attr),
1088                                grh->sgid_index, &gid, &gid_attr);
1089         if (rc)
1090                 return rc;
1091
1092         qp_params->vlan_id = rdma_vlan_dev_vlan_id(gid_attr.ndev);
1093
1094         dev_put(gid_attr.ndev);
1095         nw_type = ib_gid_to_network_type(gid_attr.gid_type, &gid);
1096         switch (nw_type) {
1097         case RDMA_NETWORK_IPV6:
1098                 memcpy(&qp_params->sgid.bytes[0], &gid.raw[0],
1099                        sizeof(qp_params->sgid));
1100                 memcpy(&qp_params->dgid.bytes[0],
1101                        &grh->dgid,
1102                        sizeof(qp_params->dgid));
1103                 qp_params->roce_mode = ROCE_V2_IPV6;
1104                 SET_FIELD(qp_params->modify_flags,
1105                           QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
1106                 break;
1107         case RDMA_NETWORK_IB:
1108                 memcpy(&qp_params->sgid.bytes[0], &gid.raw[0],
1109                        sizeof(qp_params->sgid));
1110                 memcpy(&qp_params->dgid.bytes[0],
1111                        &grh->dgid,
1112                        sizeof(qp_params->dgid));
1113                 qp_params->roce_mode = ROCE_V1;
1114                 break;
1115         case RDMA_NETWORK_IPV4:
1116                 memset(&qp_params->sgid, 0, sizeof(qp_params->sgid));
1117                 memset(&qp_params->dgid, 0, sizeof(qp_params->dgid));
1118                 ipv4_addr = qedr_get_ipv4_from_gid(gid.raw);
1119                 qp_params->sgid.ipv4_addr = ipv4_addr;
1120                 ipv4_addr =
1121                     qedr_get_ipv4_from_gid(grh->dgid.raw);
1122                 qp_params->dgid.ipv4_addr = ipv4_addr;
1123                 SET_FIELD(qp_params->modify_flags,
1124                           QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
1125                 qp_params->roce_mode = ROCE_V2_IPV4;
1126                 break;
1127         }
1128
1129         for (i = 0; i < 4; i++) {
1130                 qp_params->sgid.dwords[i] = ntohl(qp_params->sgid.dwords[i]);
1131                 qp_params->dgid.dwords[i] = ntohl(qp_params->dgid.dwords[i]);
1132         }
1133
1134         if (qp_params->vlan_id >= VLAN_CFI_MASK)
1135                 qp_params->vlan_id = 0;
1136
1137         return 0;
1138 }
1139
1140 static int qedr_check_qp_attrs(struct ib_pd *ibpd, struct qedr_dev *dev,
1141                                struct ib_qp_init_attr *attrs)
1142 {
1143         struct qedr_device_attr *qattr = &dev->attr;
1144
1145         /* QP0... attrs->qp_type == IB_QPT_GSI */
1146         if (attrs->qp_type != IB_QPT_RC && attrs->qp_type != IB_QPT_GSI) {
1147                 DP_DEBUG(dev, QEDR_MSG_QP,
1148                          "create qp: unsupported qp type=0x%x requested\n",
1149                          attrs->qp_type);
1150                 return -EINVAL;
1151         }
1152
1153         if (attrs->cap.max_send_wr > qattr->max_sqe) {
1154                 DP_ERR(dev,
1155                        "create qp: cannot create a SQ with %d elements (max_send_wr=0x%x)\n",
1156                        attrs->cap.max_send_wr, qattr->max_sqe);
1157                 return -EINVAL;
1158         }
1159
1160         if (attrs->cap.max_inline_data > qattr->max_inline) {
1161                 DP_ERR(dev,
1162                        "create qp: unsupported inline data size=0x%x requested (max_inline=0x%x)\n",
1163                        attrs->cap.max_inline_data, qattr->max_inline);
1164                 return -EINVAL;
1165         }
1166
1167         if (attrs->cap.max_send_sge > qattr->max_sge) {
1168                 DP_ERR(dev,
1169                        "create qp: unsupported send_sge=0x%x requested (max_send_sge=0x%x)\n",
1170                        attrs->cap.max_send_sge, qattr->max_sge);
1171                 return -EINVAL;
1172         }
1173
1174         if (attrs->cap.max_recv_sge > qattr->max_sge) {
1175                 DP_ERR(dev,
1176                        "create qp: unsupported recv_sge=0x%x requested (max_recv_sge=0x%x)\n",
1177                        attrs->cap.max_recv_sge, qattr->max_sge);
1178                 return -EINVAL;
1179         }
1180
1181         /* Unprivileged user space cannot create special QP */
1182         if (ibpd->uobject && attrs->qp_type == IB_QPT_GSI) {
1183                 DP_ERR(dev,
1184                        "create qp: userspace can't create special QPs of type=0x%x\n",
1185                        attrs->qp_type);
1186                 return -EINVAL;
1187         }
1188
1189         return 0;
1190 }
1191
1192 static void qedr_copy_rq_uresp(struct qedr_dev *dev,
1193                                struct qedr_create_qp_uresp *uresp,
1194                                struct qedr_qp *qp)
1195 {
1196         /* iWARP requires two doorbells per RQ. */
1197         if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
1198                 uresp->rq_db_offset =
1199                     DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD);
1200                 uresp->rq_db2_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_FLAGS);
1201         } else {
1202                 uresp->rq_db_offset =
1203                     DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
1204         }
1205
1206         uresp->rq_icid = qp->icid;
1207 }
1208
1209 static void qedr_copy_sq_uresp(struct qedr_dev *dev,
1210                                struct qedr_create_qp_uresp *uresp,
1211                                struct qedr_qp *qp)
1212 {
1213         uresp->sq_db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1214
1215         /* iWARP uses the same cid for rq and sq */
1216         if (rdma_protocol_iwarp(&dev->ibdev, 1))
1217                 uresp->sq_icid = qp->icid;
1218         else
1219                 uresp->sq_icid = qp->icid + 1;
1220 }
1221
1222 static int qedr_copy_qp_uresp(struct qedr_dev *dev,
1223                               struct qedr_qp *qp, struct ib_udata *udata)
1224 {
1225         struct qedr_create_qp_uresp uresp;
1226         int rc;
1227
1228         memset(&uresp, 0, sizeof(uresp));
1229         qedr_copy_sq_uresp(dev, &uresp, qp);
1230         qedr_copy_rq_uresp(dev, &uresp, qp);
1231
1232         uresp.atomic_supported = dev->atomic_cap != IB_ATOMIC_NONE;
1233         uresp.qp_id = qp->qp_id;
1234
1235         rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
1236         if (rc)
1237                 DP_ERR(dev,
1238                        "create qp: failed a copy to user space with qp icid=0x%x.\n",
1239                        qp->icid);
1240
1241         return rc;
1242 }
1243
1244 static void qedr_set_common_qp_params(struct qedr_dev *dev,
1245                                       struct qedr_qp *qp,
1246                                       struct qedr_pd *pd,
1247                                       struct ib_qp_init_attr *attrs)
1248 {
1249         spin_lock_init(&qp->q_lock);
1250         atomic_set(&qp->refcnt, 1);
1251         qp->pd = pd;
1252         qp->qp_type = attrs->qp_type;
1253         qp->max_inline_data = attrs->cap.max_inline_data;
1254         qp->sq.max_sges = attrs->cap.max_send_sge;
1255         qp->state = QED_ROCE_QP_STATE_RESET;
1256         qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false;
1257         qp->sq_cq = get_qedr_cq(attrs->send_cq);
1258         qp->rq_cq = get_qedr_cq(attrs->recv_cq);
1259         qp->dev = dev;
1260         qp->rq.max_sges = attrs->cap.max_recv_sge;
1261
1262         DP_DEBUG(dev, QEDR_MSG_QP,
1263                  "RQ params:\trq_max_sges = %d, rq_cq_id = %d\n",
1264                  qp->rq.max_sges, qp->rq_cq->icid);
1265         DP_DEBUG(dev, QEDR_MSG_QP,
1266                  "QP params:\tpd = %d, qp_type = %d, max_inline_data = %d, state = %d, signaled = %d, use_srq=%d\n",
1267                  pd->pd_id, qp->qp_type, qp->max_inline_data,
1268                  qp->state, qp->signaled, (attrs->srq) ? 1 : 0);
1269         DP_DEBUG(dev, QEDR_MSG_QP,
1270                  "SQ params:\tsq_max_sges = %d, sq_cq_id = %d\n",
1271                  qp->sq.max_sges, qp->sq_cq->icid);
1272 }
1273
1274 static void qedr_set_roce_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
1275 {
1276         qp->sq.db = dev->db_addr +
1277                     DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1278         qp->sq.db_data.data.icid = qp->icid + 1;
1279         qp->rq.db = dev->db_addr +
1280                     DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
1281         qp->rq.db_data.data.icid = qp->icid;
1282 }
1283
1284 static inline void
1285 qedr_init_common_qp_in_params(struct qedr_dev *dev,
1286                               struct qedr_pd *pd,
1287                               struct qedr_qp *qp,
1288                               struct ib_qp_init_attr *attrs,
1289                               bool fmr_and_reserved_lkey,
1290                               struct qed_rdma_create_qp_in_params *params)
1291 {
1292         /* QP handle to be written in an async event */
1293         params->qp_handle_async_lo = lower_32_bits((uintptr_t) qp);
1294         params->qp_handle_async_hi = upper_32_bits((uintptr_t) qp);
1295
1296         params->signal_all = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR);
1297         params->fmr_and_reserved_lkey = fmr_and_reserved_lkey;
1298         params->pd = pd->pd_id;
1299         params->dpi = pd->uctx ? pd->uctx->dpi : dev->dpi;
1300         params->sq_cq_id = get_qedr_cq(attrs->send_cq)->icid;
1301         params->stats_queue = 0;
1302         params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid;
1303         params->srq_id = 0;
1304         params->use_srq = false;
1305 }
1306
1307 static inline void qedr_qp_user_print(struct qedr_dev *dev, struct qedr_qp *qp)
1308 {
1309         DP_DEBUG(dev, QEDR_MSG_QP, "create qp: successfully created user QP. "
1310                  "qp=%p. "
1311                  "sq_addr=0x%llx, "
1312                  "sq_len=%zd, "
1313                  "rq_addr=0x%llx, "
1314                  "rq_len=%zd"
1315                  "\n",
1316                  qp,
1317                  qp->usq.buf_addr,
1318                  qp->usq.buf_len, qp->urq.buf_addr, qp->urq.buf_len);
1319 }
1320
1321 static int qedr_idr_add(struct qedr_dev *dev, void *ptr, u32 id)
1322 {
1323         int rc;
1324
1325         if (!rdma_protocol_iwarp(&dev->ibdev, 1))
1326                 return 0;
1327
1328         idr_preload(GFP_KERNEL);
1329         spin_lock_irq(&dev->idr_lock);
1330
1331         rc = idr_alloc(&dev->qpidr, ptr, id, id + 1, GFP_ATOMIC);
1332
1333         spin_unlock_irq(&dev->idr_lock);
1334         idr_preload_end();
1335
1336         return rc < 0 ? rc : 0;
1337 }
1338
1339 static void qedr_idr_remove(struct qedr_dev *dev, u32 id)
1340 {
1341         if (!rdma_protocol_iwarp(&dev->ibdev, 1))
1342                 return;
1343
1344         spin_lock_irq(&dev->idr_lock);
1345         idr_remove(&dev->qpidr, id);
1346         spin_unlock_irq(&dev->idr_lock);
1347 }
1348
1349 static inline void
1350 qedr_iwarp_populate_user_qp(struct qedr_dev *dev,
1351                             struct qedr_qp *qp,
1352                             struct qed_rdma_create_qp_out_params *out_params)
1353 {
1354         qp->usq.pbl_tbl->va = out_params->sq_pbl_virt;
1355         qp->usq.pbl_tbl->pa = out_params->sq_pbl_phys;
1356
1357         qedr_populate_pbls(dev, qp->usq.umem, qp->usq.pbl_tbl,
1358                            &qp->usq.pbl_info, FW_PAGE_SHIFT);
1359
1360         qp->urq.pbl_tbl->va = out_params->rq_pbl_virt;
1361         qp->urq.pbl_tbl->pa = out_params->rq_pbl_phys;
1362
1363         qedr_populate_pbls(dev, qp->urq.umem, qp->urq.pbl_tbl,
1364                            &qp->urq.pbl_info, FW_PAGE_SHIFT);
1365 }
1366
1367 static void qedr_cleanup_user(struct qedr_dev *dev, struct qedr_qp *qp)
1368 {
1369         if (qp->usq.umem)
1370                 ib_umem_release(qp->usq.umem);
1371         qp->usq.umem = NULL;
1372
1373         if (qp->urq.umem)
1374                 ib_umem_release(qp->urq.umem);
1375         qp->urq.umem = NULL;
1376 }
1377
1378 static int qedr_create_user_qp(struct qedr_dev *dev,
1379                                struct qedr_qp *qp,
1380                                struct ib_pd *ibpd,
1381                                struct ib_udata *udata,
1382                                struct ib_qp_init_attr *attrs)
1383 {
1384         struct qed_rdma_create_qp_in_params in_params;
1385         struct qed_rdma_create_qp_out_params out_params;
1386         struct qedr_pd *pd = get_qedr_pd(ibpd);
1387         struct ib_ucontext *ib_ctx = NULL;
1388         struct qedr_create_qp_ureq ureq;
1389         int alloc_and_init = rdma_protocol_roce(&dev->ibdev, 1);
1390         int rc = -EINVAL;
1391
1392         ib_ctx = ibpd->uobject->context;
1393
1394         memset(&ureq, 0, sizeof(ureq));
1395         rc = ib_copy_from_udata(&ureq, udata, sizeof(ureq));
1396         if (rc) {
1397                 DP_ERR(dev, "Problem copying data from user space\n");
1398                 return rc;
1399         }
1400
1401         /* SQ - read access only (0), dma sync not required (0) */
1402         rc = qedr_init_user_queue(ib_ctx, dev, &qp->usq, ureq.sq_addr,
1403                                   ureq.sq_len, 0, 0, alloc_and_init);
1404         if (rc)
1405                 return rc;
1406
1407         /* RQ - read access only (0), dma sync not required (0) */
1408         rc = qedr_init_user_queue(ib_ctx, dev, &qp->urq, ureq.rq_addr,
1409                                   ureq.rq_len, 0, 0, alloc_and_init);
1410         if (rc)
1411                 return rc;
1412
1413         memset(&in_params, 0, sizeof(in_params));
1414         qedr_init_common_qp_in_params(dev, pd, qp, attrs, false, &in_params);
1415         in_params.qp_handle_lo = ureq.qp_handle_lo;
1416         in_params.qp_handle_hi = ureq.qp_handle_hi;
1417         in_params.sq_num_pages = qp->usq.pbl_info.num_pbes;
1418         in_params.sq_pbl_ptr = qp->usq.pbl_tbl->pa;
1419         in_params.rq_num_pages = qp->urq.pbl_info.num_pbes;
1420         in_params.rq_pbl_ptr = qp->urq.pbl_tbl->pa;
1421
1422         qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
1423                                               &in_params, &out_params);
1424
1425         if (!qp->qed_qp) {
1426                 rc = -ENOMEM;
1427                 goto err1;
1428         }
1429
1430         if (rdma_protocol_iwarp(&dev->ibdev, 1))
1431                 qedr_iwarp_populate_user_qp(dev, qp, &out_params);
1432
1433         qp->qp_id = out_params.qp_id;
1434         qp->icid = out_params.icid;
1435
1436         rc = qedr_copy_qp_uresp(dev, qp, udata);
1437         if (rc)
1438                 goto err;
1439
1440         qedr_qp_user_print(dev, qp);
1441
1442         return 0;
1443 err:
1444         rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
1445         if (rc)
1446                 DP_ERR(dev, "create qp: fatal fault. rc=%d", rc);
1447
1448 err1:
1449         qedr_cleanup_user(dev, qp);
1450         return rc;
1451 }
1452
1453 static void qedr_set_iwarp_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
1454 {
1455         qp->sq.db = dev->db_addr +
1456             DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1457         qp->sq.db_data.data.icid = qp->icid;
1458
1459         qp->rq.db = dev->db_addr +
1460                     DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD);
1461         qp->rq.db_data.data.icid = qp->icid;
1462         qp->rq.iwarp_db2 = dev->db_addr +
1463                            DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_FLAGS);
1464         qp->rq.iwarp_db2_data.data.icid = qp->icid;
1465         qp->rq.iwarp_db2_data.data.value = DQ_TCM_IWARP_POST_RQ_CF_CMD;
1466 }
1467
1468 static int
1469 qedr_roce_create_kernel_qp(struct qedr_dev *dev,
1470                            struct qedr_qp *qp,
1471                            struct qed_rdma_create_qp_in_params *in_params,
1472                            u32 n_sq_elems, u32 n_rq_elems)
1473 {
1474         struct qed_rdma_create_qp_out_params out_params;
1475         int rc;
1476
1477         rc = dev->ops->common->chain_alloc(dev->cdev,
1478                                            QED_CHAIN_USE_TO_PRODUCE,
1479                                            QED_CHAIN_MODE_PBL,
1480                                            QED_CHAIN_CNT_TYPE_U32,
1481                                            n_sq_elems,
1482                                            QEDR_SQE_ELEMENT_SIZE,
1483                                            &qp->sq.pbl, NULL);
1484
1485         if (rc)
1486                 return rc;
1487
1488         in_params->sq_num_pages = qed_chain_get_page_cnt(&qp->sq.pbl);
1489         in_params->sq_pbl_ptr = qed_chain_get_pbl_phys(&qp->sq.pbl);
1490
1491         rc = dev->ops->common->chain_alloc(dev->cdev,
1492                                            QED_CHAIN_USE_TO_CONSUME_PRODUCE,
1493                                            QED_CHAIN_MODE_PBL,
1494                                            QED_CHAIN_CNT_TYPE_U32,
1495                                            n_rq_elems,
1496                                            QEDR_RQE_ELEMENT_SIZE,
1497                                            &qp->rq.pbl, NULL);
1498         if (rc)
1499                 return rc;
1500
1501         in_params->rq_num_pages = qed_chain_get_page_cnt(&qp->rq.pbl);
1502         in_params->rq_pbl_ptr = qed_chain_get_pbl_phys(&qp->rq.pbl);
1503
1504         qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
1505                                               in_params, &out_params);
1506
1507         if (!qp->qed_qp)
1508                 return -EINVAL;
1509
1510         qp->qp_id = out_params.qp_id;
1511         qp->icid = out_params.icid;
1512
1513         qedr_set_roce_db_info(dev, qp);
1514         return rc;
1515 }
1516
1517 static int
1518 qedr_iwarp_create_kernel_qp(struct qedr_dev *dev,
1519                             struct qedr_qp *qp,
1520                             struct qed_rdma_create_qp_in_params *in_params,
1521                             u32 n_sq_elems, u32 n_rq_elems)
1522 {
1523         struct qed_rdma_create_qp_out_params out_params;
1524         struct qed_chain_ext_pbl ext_pbl;
1525         int rc;
1526
1527         in_params->sq_num_pages = QED_CHAIN_PAGE_CNT(n_sq_elems,
1528                                                      QEDR_SQE_ELEMENT_SIZE,
1529                                                      QED_CHAIN_MODE_PBL);
1530         in_params->rq_num_pages = QED_CHAIN_PAGE_CNT(n_rq_elems,
1531                                                      QEDR_RQE_ELEMENT_SIZE,
1532                                                      QED_CHAIN_MODE_PBL);
1533
1534         qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
1535                                               in_params, &out_params);
1536
1537         if (!qp->qed_qp)
1538                 return -EINVAL;
1539
1540         /* Now we allocate the chain */
1541         ext_pbl.p_pbl_virt = out_params.sq_pbl_virt;
1542         ext_pbl.p_pbl_phys = out_params.sq_pbl_phys;
1543
1544         rc = dev->ops->common->chain_alloc(dev->cdev,
1545                                            QED_CHAIN_USE_TO_PRODUCE,
1546                                            QED_CHAIN_MODE_PBL,
1547                                            QED_CHAIN_CNT_TYPE_U32,
1548                                            n_sq_elems,
1549                                            QEDR_SQE_ELEMENT_SIZE,
1550                                            &qp->sq.pbl, &ext_pbl);
1551
1552         if (rc)
1553                 goto err;
1554
1555         ext_pbl.p_pbl_virt = out_params.rq_pbl_virt;
1556         ext_pbl.p_pbl_phys = out_params.rq_pbl_phys;
1557
1558         rc = dev->ops->common->chain_alloc(dev->cdev,
1559                                            QED_CHAIN_USE_TO_CONSUME_PRODUCE,
1560                                            QED_CHAIN_MODE_PBL,
1561                                            QED_CHAIN_CNT_TYPE_U32,
1562                                            n_rq_elems,
1563                                            QEDR_RQE_ELEMENT_SIZE,
1564                                            &qp->rq.pbl, &ext_pbl);
1565
1566         if (rc)
1567                 goto err;
1568
1569         qp->qp_id = out_params.qp_id;
1570         qp->icid = out_params.icid;
1571
1572         qedr_set_iwarp_db_info(dev, qp);
1573         return rc;
1574
1575 err:
1576         dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
1577
1578         return rc;
1579 }
1580
1581 static void qedr_cleanup_kernel(struct qedr_dev *dev, struct qedr_qp *qp)
1582 {
1583         dev->ops->common->chain_free(dev->cdev, &qp->sq.pbl);
1584         kfree(qp->wqe_wr_id);
1585
1586         dev->ops->common->chain_free(dev->cdev, &qp->rq.pbl);
1587         kfree(qp->rqe_wr_id);
1588 }
1589
1590 static int qedr_create_kernel_qp(struct qedr_dev *dev,
1591                                  struct qedr_qp *qp,
1592                                  struct ib_pd *ibpd,
1593                                  struct ib_qp_init_attr *attrs)
1594 {
1595         struct qed_rdma_create_qp_in_params in_params;
1596         struct qedr_pd *pd = get_qedr_pd(ibpd);
1597         int rc = -EINVAL;
1598         u32 n_rq_elems;
1599         u32 n_sq_elems;
1600         u32 n_sq_entries;
1601
1602         memset(&in_params, 0, sizeof(in_params));
1603
1604         /* A single work request may take up to QEDR_MAX_SQ_WQE_SIZE elements in
1605          * the ring. The ring should allow at least a single WR, even if the
1606          * user requested none, due to allocation issues.
1607          * We should add an extra WR since the prod and cons indices of
1608          * wqe_wr_id are managed in such a way that the WQ is considered full
1609          * when (prod+1)%max_wr==cons. We currently don't do that because we
1610          * double the number of entries due an iSER issue that pushes far more
1611          * WRs than indicated. If we decline its ib_post_send() then we get
1612          * error prints in the dmesg we'd like to avoid.
1613          */
1614         qp->sq.max_wr = min_t(u32, attrs->cap.max_send_wr * dev->wq_multiplier,
1615                               dev->attr.max_sqe);
1616
1617         qp->wqe_wr_id = kzalloc(qp->sq.max_wr * sizeof(*qp->wqe_wr_id),
1618                                 GFP_KERNEL);
1619         if (!qp->wqe_wr_id) {
1620                 DP_ERR(dev, "create qp: failed SQ shadow memory allocation\n");
1621                 return -ENOMEM;
1622         }
1623
1624         /* QP handle to be written in CQE */
1625         in_params.qp_handle_lo = lower_32_bits((uintptr_t) qp);
1626         in_params.qp_handle_hi = upper_32_bits((uintptr_t) qp);
1627
1628         /* A single work request may take up to QEDR_MAX_RQ_WQE_SIZE elements in
1629          * the ring. There ring should allow at least a single WR, even if the
1630          * user requested none, due to allocation issues.
1631          */
1632         qp->rq.max_wr = (u16) max_t(u32, attrs->cap.max_recv_wr, 1);
1633
1634         /* Allocate driver internal RQ array */
1635         qp->rqe_wr_id = kzalloc(qp->rq.max_wr * sizeof(*qp->rqe_wr_id),
1636                                 GFP_KERNEL);
1637         if (!qp->rqe_wr_id) {
1638                 DP_ERR(dev,
1639                        "create qp: failed RQ shadow memory allocation\n");
1640                 kfree(qp->wqe_wr_id);
1641                 return -ENOMEM;
1642         }
1643
1644         qedr_init_common_qp_in_params(dev, pd, qp, attrs, true, &in_params);
1645
1646         n_sq_entries = attrs->cap.max_send_wr;
1647         n_sq_entries = min_t(u32, n_sq_entries, dev->attr.max_sqe);
1648         n_sq_entries = max_t(u32, n_sq_entries, 1);
1649         n_sq_elems = n_sq_entries * QEDR_MAX_SQE_ELEMENTS_PER_SQE;
1650
1651         n_rq_elems = qp->rq.max_wr * QEDR_MAX_RQE_ELEMENTS_PER_RQE;
1652
1653         if (rdma_protocol_iwarp(&dev->ibdev, 1))
1654                 rc = qedr_iwarp_create_kernel_qp(dev, qp, &in_params,
1655                                                  n_sq_elems, n_rq_elems);
1656         else
1657                 rc = qedr_roce_create_kernel_qp(dev, qp, &in_params,
1658                                                 n_sq_elems, n_rq_elems);
1659         if (rc)
1660                 qedr_cleanup_kernel(dev, qp);
1661
1662         return rc;
1663 }
1664
1665 struct ib_qp *qedr_create_qp(struct ib_pd *ibpd,
1666                              struct ib_qp_init_attr *attrs,
1667                              struct ib_udata *udata)
1668 {
1669         struct qedr_dev *dev = get_qedr_dev(ibpd->device);
1670         struct qedr_pd *pd = get_qedr_pd(ibpd);
1671         struct qedr_qp *qp;
1672         struct ib_qp *ibqp;
1673         int rc = 0;
1674
1675         DP_DEBUG(dev, QEDR_MSG_QP, "create qp: called from %s, pd=%p\n",
1676                  udata ? "user library" : "kernel", pd);
1677
1678         rc = qedr_check_qp_attrs(ibpd, dev, attrs);
1679         if (rc)
1680                 return ERR_PTR(rc);
1681
1682         if (attrs->srq)
1683                 return ERR_PTR(-EINVAL);
1684
1685         DP_DEBUG(dev, QEDR_MSG_QP,
1686                  "create qp: called from %s, event_handler=%p, eepd=%p sq_cq=%p, sq_icid=%d, rq_cq=%p, rq_icid=%d\n",
1687                  udata ? "user library" : "kernel", attrs->event_handler, pd,
1688                  get_qedr_cq(attrs->send_cq),
1689                  get_qedr_cq(attrs->send_cq)->icid,
1690                  get_qedr_cq(attrs->recv_cq),
1691                  get_qedr_cq(attrs->recv_cq)->icid);
1692
1693         qp = kzalloc(sizeof(*qp), GFP_KERNEL);
1694         if (!qp) {
1695                 DP_ERR(dev, "create qp: failed allocating memory\n");
1696                 return ERR_PTR(-ENOMEM);
1697         }
1698
1699         qedr_set_common_qp_params(dev, qp, pd, attrs);
1700
1701         if (attrs->qp_type == IB_QPT_GSI) {
1702                 ibqp = qedr_create_gsi_qp(dev, attrs, qp);
1703                 if (IS_ERR(ibqp))
1704                         kfree(qp);
1705                 return ibqp;
1706         }
1707
1708         if (udata)
1709                 rc = qedr_create_user_qp(dev, qp, ibpd, udata, attrs);
1710         else
1711                 rc = qedr_create_kernel_qp(dev, qp, ibpd, attrs);
1712
1713         if (rc)
1714                 goto err;
1715
1716         qp->ibqp.qp_num = qp->qp_id;
1717
1718         rc = qedr_idr_add(dev, qp, qp->qp_id);
1719         if (rc)
1720                 goto err;
1721
1722         return &qp->ibqp;
1723
1724 err:
1725         kfree(qp);
1726
1727         return ERR_PTR(-EFAULT);
1728 }
1729
1730 static enum ib_qp_state qedr_get_ibqp_state(enum qed_roce_qp_state qp_state)
1731 {
1732         switch (qp_state) {
1733         case QED_ROCE_QP_STATE_RESET:
1734                 return IB_QPS_RESET;
1735         case QED_ROCE_QP_STATE_INIT:
1736                 return IB_QPS_INIT;
1737         case QED_ROCE_QP_STATE_RTR:
1738                 return IB_QPS_RTR;
1739         case QED_ROCE_QP_STATE_RTS:
1740                 return IB_QPS_RTS;
1741         case QED_ROCE_QP_STATE_SQD:
1742                 return IB_QPS_SQD;
1743         case QED_ROCE_QP_STATE_ERR:
1744                 return IB_QPS_ERR;
1745         case QED_ROCE_QP_STATE_SQE:
1746                 return IB_QPS_SQE;
1747         }
1748         return IB_QPS_ERR;
1749 }
1750
1751 static enum qed_roce_qp_state qedr_get_state_from_ibqp(
1752                                         enum ib_qp_state qp_state)
1753 {
1754         switch (qp_state) {
1755         case IB_QPS_RESET:
1756                 return QED_ROCE_QP_STATE_RESET;
1757         case IB_QPS_INIT:
1758                 return QED_ROCE_QP_STATE_INIT;
1759         case IB_QPS_RTR:
1760                 return QED_ROCE_QP_STATE_RTR;
1761         case IB_QPS_RTS:
1762                 return QED_ROCE_QP_STATE_RTS;
1763         case IB_QPS_SQD:
1764                 return QED_ROCE_QP_STATE_SQD;
1765         case IB_QPS_ERR:
1766                 return QED_ROCE_QP_STATE_ERR;
1767         default:
1768                 return QED_ROCE_QP_STATE_ERR;
1769         }
1770 }
1771
1772 static void qedr_reset_qp_hwq_info(struct qedr_qp_hwq_info *qph)
1773 {
1774         qed_chain_reset(&qph->pbl);
1775         qph->prod = 0;
1776         qph->cons = 0;
1777         qph->wqe_cons = 0;
1778         qph->db_data.data.value = cpu_to_le16(0);
1779 }
1780
1781 static int qedr_update_qp_state(struct qedr_dev *dev,
1782                                 struct qedr_qp *qp,
1783                                 enum qed_roce_qp_state cur_state,
1784                                 enum qed_roce_qp_state new_state)
1785 {
1786         int status = 0;
1787
1788         if (new_state == cur_state)
1789                 return 0;
1790
1791         switch (cur_state) {
1792         case QED_ROCE_QP_STATE_RESET:
1793                 switch (new_state) {
1794                 case QED_ROCE_QP_STATE_INIT:
1795                         qp->prev_wqe_size = 0;
1796                         qedr_reset_qp_hwq_info(&qp->sq);
1797                         qedr_reset_qp_hwq_info(&qp->rq);
1798                         break;
1799                 default:
1800                         status = -EINVAL;
1801                         break;
1802                 };
1803                 break;
1804         case QED_ROCE_QP_STATE_INIT:
1805                 switch (new_state) {
1806                 case QED_ROCE_QP_STATE_RTR:
1807                         /* Update doorbell (in case post_recv was
1808                          * done before move to RTR)
1809                          */
1810
1811                         if (rdma_protocol_roce(&dev->ibdev, 1)) {
1812                                 writel(qp->rq.db_data.raw, qp->rq.db);
1813                                 /* Make sure write takes effect */
1814                                 mmiowb();
1815                         }
1816                         break;
1817                 case QED_ROCE_QP_STATE_ERR:
1818                         break;
1819                 default:
1820                         /* Invalid state change. */
1821                         status = -EINVAL;
1822                         break;
1823                 };
1824                 break;
1825         case QED_ROCE_QP_STATE_RTR:
1826                 /* RTR->XXX */
1827                 switch (new_state) {
1828                 case QED_ROCE_QP_STATE_RTS:
1829                         break;
1830                 case QED_ROCE_QP_STATE_ERR:
1831                         break;
1832                 default:
1833                         /* Invalid state change. */
1834                         status = -EINVAL;
1835                         break;
1836                 };
1837                 break;
1838         case QED_ROCE_QP_STATE_RTS:
1839                 /* RTS->XXX */
1840                 switch (new_state) {
1841                 case QED_ROCE_QP_STATE_SQD:
1842                         break;
1843                 case QED_ROCE_QP_STATE_ERR:
1844                         break;
1845                 default:
1846                         /* Invalid state change. */
1847                         status = -EINVAL;
1848                         break;
1849                 };
1850                 break;
1851         case QED_ROCE_QP_STATE_SQD:
1852                 /* SQD->XXX */
1853                 switch (new_state) {
1854                 case QED_ROCE_QP_STATE_RTS:
1855                 case QED_ROCE_QP_STATE_ERR:
1856                         break;
1857                 default:
1858                         /* Invalid state change. */
1859                         status = -EINVAL;
1860                         break;
1861                 };
1862                 break;
1863         case QED_ROCE_QP_STATE_ERR:
1864                 /* ERR->XXX */
1865                 switch (new_state) {
1866                 case QED_ROCE_QP_STATE_RESET:
1867                         if ((qp->rq.prod != qp->rq.cons) ||
1868                             (qp->sq.prod != qp->sq.cons)) {
1869                                 DP_NOTICE(dev,
1870                                           "Error->Reset with rq/sq not empty rq.prod=%x rq.cons=%x sq.prod=%x sq.cons=%x\n",
1871                                           qp->rq.prod, qp->rq.cons, qp->sq.prod,
1872                                           qp->sq.cons);
1873                                 status = -EINVAL;
1874                         }
1875                         break;
1876                 default:
1877                         status = -EINVAL;
1878                         break;
1879                 };
1880                 break;
1881         default:
1882                 status = -EINVAL;
1883                 break;
1884         };
1885
1886         return status;
1887 }
1888
1889 int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1890                    int attr_mask, struct ib_udata *udata)
1891 {
1892         struct qedr_qp *qp = get_qedr_qp(ibqp);
1893         struct qed_rdma_modify_qp_in_params qp_params = { 0 };
1894         struct qedr_dev *dev = get_qedr_dev(&qp->dev->ibdev);
1895         const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
1896         enum ib_qp_state old_qp_state, new_qp_state;
1897         enum qed_roce_qp_state cur_state;
1898         int rc = 0;
1899
1900         DP_DEBUG(dev, QEDR_MSG_QP,
1901                  "modify qp: qp %p attr_mask=0x%x, state=%d", qp, attr_mask,
1902                  attr->qp_state);
1903
1904         old_qp_state = qedr_get_ibqp_state(qp->state);
1905         if (attr_mask & IB_QP_STATE)
1906                 new_qp_state = attr->qp_state;
1907         else
1908                 new_qp_state = old_qp_state;
1909
1910         if (rdma_protocol_roce(&dev->ibdev, 1)) {
1911                 if (!ib_modify_qp_is_ok(old_qp_state, new_qp_state,
1912                                         ibqp->qp_type, attr_mask,
1913                                         IB_LINK_LAYER_ETHERNET)) {
1914                         DP_ERR(dev,
1915                                "modify qp: invalid attribute mask=0x%x specified for\n"
1916                                "qpn=0x%x of type=0x%x old_qp_state=0x%x, new_qp_state=0x%x\n",
1917                                attr_mask, qp->qp_id, ibqp->qp_type,
1918                                old_qp_state, new_qp_state);
1919                         rc = -EINVAL;
1920                         goto err;
1921                 }
1922         }
1923
1924         /* Translate the masks... */
1925         if (attr_mask & IB_QP_STATE) {
1926                 SET_FIELD(qp_params.modify_flags,
1927                           QED_RDMA_MODIFY_QP_VALID_NEW_STATE, 1);
1928                 qp_params.new_state = qedr_get_state_from_ibqp(attr->qp_state);
1929         }
1930
1931         if (attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY)
1932                 qp_params.sqd_async = true;
1933
1934         if (attr_mask & IB_QP_PKEY_INDEX) {
1935                 SET_FIELD(qp_params.modify_flags,
1936                           QED_ROCE_MODIFY_QP_VALID_PKEY, 1);
1937                 if (attr->pkey_index >= QEDR_ROCE_PKEY_TABLE_LEN) {
1938                         rc = -EINVAL;
1939                         goto err;
1940                 }
1941
1942                 qp_params.pkey = QEDR_ROCE_PKEY_DEFAULT;
1943         }
1944
1945         if (attr_mask & IB_QP_QKEY)
1946                 qp->qkey = attr->qkey;
1947
1948         if (attr_mask & IB_QP_ACCESS_FLAGS) {
1949                 SET_FIELD(qp_params.modify_flags,
1950                           QED_RDMA_MODIFY_QP_VALID_RDMA_OPS_EN, 1);
1951                 qp_params.incoming_rdma_read_en = attr->qp_access_flags &
1952                                                   IB_ACCESS_REMOTE_READ;
1953                 qp_params.incoming_rdma_write_en = attr->qp_access_flags &
1954                                                    IB_ACCESS_REMOTE_WRITE;
1955                 qp_params.incoming_atomic_en = attr->qp_access_flags &
1956                                                IB_ACCESS_REMOTE_ATOMIC;
1957         }
1958
1959         if (attr_mask & (IB_QP_AV | IB_QP_PATH_MTU)) {
1960                 if (attr_mask & IB_QP_PATH_MTU) {
1961                         if (attr->path_mtu < IB_MTU_256 ||
1962                             attr->path_mtu > IB_MTU_4096) {
1963                                 pr_err("error: Only MTU sizes of 256, 512, 1024, 2048 and 4096 are supported by RoCE\n");
1964                                 rc = -EINVAL;
1965                                 goto err;
1966                         }
1967                         qp->mtu = min(ib_mtu_enum_to_int(attr->path_mtu),
1968                                       ib_mtu_enum_to_int(iboe_get_mtu
1969                                                          (dev->ndev->mtu)));
1970                 }
1971
1972                 if (!qp->mtu) {
1973                         qp->mtu =
1974                         ib_mtu_enum_to_int(iboe_get_mtu(dev->ndev->mtu));
1975                         pr_err("Fixing zeroed MTU to qp->mtu = %d\n", qp->mtu);
1976                 }
1977
1978                 SET_FIELD(qp_params.modify_flags,
1979                           QED_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR, 1);
1980
1981                 qp_params.traffic_class_tos = grh->traffic_class;
1982                 qp_params.flow_label = grh->flow_label;
1983                 qp_params.hop_limit_ttl = grh->hop_limit;
1984
1985                 qp->sgid_idx = grh->sgid_index;
1986
1987                 rc = get_gid_info_from_table(ibqp, attr, attr_mask, &qp_params);
1988                 if (rc) {
1989                         DP_ERR(dev,
1990                                "modify qp: problems with GID index %d (rc=%d)\n",
1991                                grh->sgid_index, rc);
1992                         return rc;
1993                 }
1994
1995                 rc = qedr_get_dmac(dev, &attr->ah_attr,
1996                                    qp_params.remote_mac_addr);
1997                 if (rc)
1998                         return rc;
1999
2000                 qp_params.use_local_mac = true;
2001                 ether_addr_copy(qp_params.local_mac_addr, dev->ndev->dev_addr);
2002
2003                 DP_DEBUG(dev, QEDR_MSG_QP, "dgid=%x:%x:%x:%x\n",
2004                          qp_params.dgid.dwords[0], qp_params.dgid.dwords[1],
2005                          qp_params.dgid.dwords[2], qp_params.dgid.dwords[3]);
2006                 DP_DEBUG(dev, QEDR_MSG_QP, "sgid=%x:%x:%x:%x\n",
2007                          qp_params.sgid.dwords[0], qp_params.sgid.dwords[1],
2008                          qp_params.sgid.dwords[2], qp_params.sgid.dwords[3]);
2009                 DP_DEBUG(dev, QEDR_MSG_QP, "remote_mac=[%pM]\n",
2010                          qp_params.remote_mac_addr);
2011
2012                 qp_params.mtu = qp->mtu;
2013                 qp_params.lb_indication = false;
2014         }
2015
2016         if (!qp_params.mtu) {
2017                 /* Stay with current MTU */
2018                 if (qp->mtu)
2019                         qp_params.mtu = qp->mtu;
2020                 else
2021                         qp_params.mtu =
2022                             ib_mtu_enum_to_int(iboe_get_mtu(dev->ndev->mtu));
2023         }
2024
2025         if (attr_mask & IB_QP_TIMEOUT) {
2026                 SET_FIELD(qp_params.modify_flags,
2027                           QED_ROCE_MODIFY_QP_VALID_ACK_TIMEOUT, 1);
2028
2029                 /* The received timeout value is an exponent used like this:
2030                  *    "12.7.34 LOCAL ACK TIMEOUT
2031                  *    Value representing the transport (ACK) timeout for use by
2032                  *    the remote, expressed as: 4.096 * 2^timeout [usec]"
2033                  * The FW expects timeout in msec so we need to divide the usec
2034                  * result by 1000. We'll approximate 1000~2^10, and 4.096 ~ 2^2,
2035                  * so we get: 2^2 * 2^timeout / 2^10 = 2^(timeout - 8).
2036                  * The value of zero means infinite so we use a 'max_t' to make
2037                  * sure that sub 1 msec values will be configured as 1 msec.
2038                  */
2039                 if (attr->timeout)
2040                         qp_params.ack_timeout =
2041                                         1 << max_t(int, attr->timeout - 8, 0);
2042                 else
2043                         qp_params.ack_timeout = 0;
2044         }
2045
2046         if (attr_mask & IB_QP_RETRY_CNT) {
2047                 SET_FIELD(qp_params.modify_flags,
2048                           QED_ROCE_MODIFY_QP_VALID_RETRY_CNT, 1);
2049                 qp_params.retry_cnt = attr->retry_cnt;
2050         }
2051
2052         if (attr_mask & IB_QP_RNR_RETRY) {
2053                 SET_FIELD(qp_params.modify_flags,
2054                           QED_ROCE_MODIFY_QP_VALID_RNR_RETRY_CNT, 1);
2055                 qp_params.rnr_retry_cnt = attr->rnr_retry;
2056         }
2057
2058         if (attr_mask & IB_QP_RQ_PSN) {
2059                 SET_FIELD(qp_params.modify_flags,
2060                           QED_ROCE_MODIFY_QP_VALID_RQ_PSN, 1);
2061                 qp_params.rq_psn = attr->rq_psn;
2062                 qp->rq_psn = attr->rq_psn;
2063         }
2064
2065         if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
2066                 if (attr->max_rd_atomic > dev->attr.max_qp_req_rd_atomic_resc) {
2067                         rc = -EINVAL;
2068                         DP_ERR(dev,
2069                                "unsupported max_rd_atomic=%d, supported=%d\n",
2070                                attr->max_rd_atomic,
2071                                dev->attr.max_qp_req_rd_atomic_resc);
2072                         goto err;
2073                 }
2074
2075                 SET_FIELD(qp_params.modify_flags,
2076                           QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_REQ, 1);
2077                 qp_params.max_rd_atomic_req = attr->max_rd_atomic;
2078         }
2079
2080         if (attr_mask & IB_QP_MIN_RNR_TIMER) {
2081                 SET_FIELD(qp_params.modify_flags,
2082                           QED_ROCE_MODIFY_QP_VALID_MIN_RNR_NAK_TIMER, 1);
2083                 qp_params.min_rnr_nak_timer = attr->min_rnr_timer;
2084         }
2085
2086         if (attr_mask & IB_QP_SQ_PSN) {
2087                 SET_FIELD(qp_params.modify_flags,
2088                           QED_ROCE_MODIFY_QP_VALID_SQ_PSN, 1);
2089                 qp_params.sq_psn = attr->sq_psn;
2090                 qp->sq_psn = attr->sq_psn;
2091         }
2092
2093         if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
2094                 if (attr->max_dest_rd_atomic >
2095                     dev->attr.max_qp_resp_rd_atomic_resc) {
2096                         DP_ERR(dev,
2097                                "unsupported max_dest_rd_atomic=%d, supported=%d\n",
2098                                attr->max_dest_rd_atomic,
2099                                dev->attr.max_qp_resp_rd_atomic_resc);
2100
2101                         rc = -EINVAL;
2102                         goto err;
2103                 }
2104
2105                 SET_FIELD(qp_params.modify_flags,
2106                           QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_RESP, 1);
2107                 qp_params.max_rd_atomic_resp = attr->max_dest_rd_atomic;
2108         }
2109
2110         if (attr_mask & IB_QP_DEST_QPN) {
2111                 SET_FIELD(qp_params.modify_flags,
2112                           QED_ROCE_MODIFY_QP_VALID_DEST_QP, 1);
2113
2114                 qp_params.dest_qp = attr->dest_qp_num;
2115                 qp->dest_qp_num = attr->dest_qp_num;
2116         }
2117
2118         cur_state = qp->state;
2119
2120         /* Update the QP state before the actual ramrod to prevent a race with
2121          * fast path. Modifying the QP state to error will cause the device to
2122          * flush the CQEs and while polling the flushed CQEs will considered as
2123          * a potential issue if the QP isn't in error state.
2124          */
2125         if ((attr_mask & IB_QP_STATE) && qp->qp_type != IB_QPT_GSI &&
2126             !udata && qp_params.new_state == QED_ROCE_QP_STATE_ERR)
2127                 qp->state = QED_ROCE_QP_STATE_ERR;
2128
2129         if (qp->qp_type != IB_QPT_GSI)
2130                 rc = dev->ops->rdma_modify_qp(dev->rdma_ctx,
2131                                               qp->qed_qp, &qp_params);
2132
2133         if (attr_mask & IB_QP_STATE) {
2134                 if ((qp->qp_type != IB_QPT_GSI) && (!udata))
2135                         rc = qedr_update_qp_state(dev, qp, cur_state,
2136                                                   qp_params.new_state);
2137                 qp->state = qp_params.new_state;
2138         }
2139
2140 err:
2141         return rc;
2142 }
2143
2144 static int qedr_to_ib_qp_acc_flags(struct qed_rdma_query_qp_out_params *params)
2145 {
2146         int ib_qp_acc_flags = 0;
2147
2148         if (params->incoming_rdma_write_en)
2149                 ib_qp_acc_flags |= IB_ACCESS_REMOTE_WRITE;
2150         if (params->incoming_rdma_read_en)
2151                 ib_qp_acc_flags |= IB_ACCESS_REMOTE_READ;
2152         if (params->incoming_atomic_en)
2153                 ib_qp_acc_flags |= IB_ACCESS_REMOTE_ATOMIC;
2154         ib_qp_acc_flags |= IB_ACCESS_LOCAL_WRITE;
2155         return ib_qp_acc_flags;
2156 }
2157
2158 int qedr_query_qp(struct ib_qp *ibqp,
2159                   struct ib_qp_attr *qp_attr,
2160                   int attr_mask, struct ib_qp_init_attr *qp_init_attr)
2161 {
2162         struct qed_rdma_query_qp_out_params params;
2163         struct qedr_qp *qp = get_qedr_qp(ibqp);
2164         struct qedr_dev *dev = qp->dev;
2165         int rc = 0;
2166
2167         memset(&params, 0, sizeof(params));
2168
2169         rc = dev->ops->rdma_query_qp(dev->rdma_ctx, qp->qed_qp, &params);
2170         if (rc)
2171                 goto err;
2172
2173         memset(qp_attr, 0, sizeof(*qp_attr));
2174         memset(qp_init_attr, 0, sizeof(*qp_init_attr));
2175
2176         qp_attr->qp_state = qedr_get_ibqp_state(params.state);
2177         qp_attr->cur_qp_state = qedr_get_ibqp_state(params.state);
2178         qp_attr->path_mtu = ib_mtu_int_to_enum(params.mtu);
2179         qp_attr->path_mig_state = IB_MIG_MIGRATED;
2180         qp_attr->rq_psn = params.rq_psn;
2181         qp_attr->sq_psn = params.sq_psn;
2182         qp_attr->dest_qp_num = params.dest_qp;
2183
2184         qp_attr->qp_access_flags = qedr_to_ib_qp_acc_flags(&params);
2185
2186         qp_attr->cap.max_send_wr = qp->sq.max_wr;
2187         qp_attr->cap.max_recv_wr = qp->rq.max_wr;
2188         qp_attr->cap.max_send_sge = qp->sq.max_sges;
2189         qp_attr->cap.max_recv_sge = qp->rq.max_sges;
2190         qp_attr->cap.max_inline_data = ROCE_REQ_MAX_INLINE_DATA_SIZE;
2191         qp_init_attr->cap = qp_attr->cap;
2192
2193         qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
2194         rdma_ah_set_grh(&qp_attr->ah_attr, NULL,
2195                         params.flow_label, qp->sgid_idx,
2196                         params.hop_limit_ttl, params.traffic_class_tos);
2197         rdma_ah_set_dgid_raw(&qp_attr->ah_attr, &params.dgid.bytes[0]);
2198         rdma_ah_set_port_num(&qp_attr->ah_attr, 1);
2199         rdma_ah_set_sl(&qp_attr->ah_attr, 0);
2200         qp_attr->timeout = params.timeout;
2201         qp_attr->rnr_retry = params.rnr_retry;
2202         qp_attr->retry_cnt = params.retry_cnt;
2203         qp_attr->min_rnr_timer = params.min_rnr_nak_timer;
2204         qp_attr->pkey_index = params.pkey_index;
2205         qp_attr->port_num = 1;
2206         rdma_ah_set_path_bits(&qp_attr->ah_attr, 0);
2207         rdma_ah_set_static_rate(&qp_attr->ah_attr, 0);
2208         qp_attr->alt_pkey_index = 0;
2209         qp_attr->alt_port_num = 0;
2210         qp_attr->alt_timeout = 0;
2211         memset(&qp_attr->alt_ah_attr, 0, sizeof(qp_attr->alt_ah_attr));
2212
2213         qp_attr->sq_draining = (params.state == QED_ROCE_QP_STATE_SQD) ? 1 : 0;
2214         qp_attr->max_dest_rd_atomic = params.max_dest_rd_atomic;
2215         qp_attr->max_rd_atomic = params.max_rd_atomic;
2216         qp_attr->en_sqd_async_notify = (params.sqd_async) ? 1 : 0;
2217
2218         DP_DEBUG(dev, QEDR_MSG_QP, "QEDR_QUERY_QP: max_inline_data=%d\n",
2219                  qp_attr->cap.max_inline_data);
2220
2221 err:
2222         return rc;
2223 }
2224
2225 static int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp)
2226 {
2227         int rc = 0;
2228
2229         if (qp->qp_type != IB_QPT_GSI) {
2230                 rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
2231                 if (rc)
2232                         return rc;
2233         }
2234
2235         if (qp->ibqp.uobject && qp->ibqp.uobject->context)
2236                 qedr_cleanup_user(dev, qp);
2237         else
2238                 qedr_cleanup_kernel(dev, qp);
2239
2240         return 0;
2241 }
2242
2243 int qedr_destroy_qp(struct ib_qp *ibqp)
2244 {
2245         struct qedr_qp *qp = get_qedr_qp(ibqp);
2246         struct qedr_dev *dev = qp->dev;
2247         struct ib_qp_attr attr;
2248         int attr_mask = 0;
2249         int rc = 0;
2250
2251         DP_DEBUG(dev, QEDR_MSG_QP, "destroy qp: destroying %p, qp type=%d\n",
2252                  qp, qp->qp_type);
2253
2254         if (rdma_protocol_roce(&dev->ibdev, 1)) {
2255                 if ((qp->state != QED_ROCE_QP_STATE_RESET) &&
2256                     (qp->state != QED_ROCE_QP_STATE_ERR) &&
2257                     (qp->state != QED_ROCE_QP_STATE_INIT)) {
2258
2259                         attr.qp_state = IB_QPS_ERR;
2260                         attr_mask |= IB_QP_STATE;
2261
2262                         /* Change the QP state to ERROR */
2263                         qedr_modify_qp(ibqp, &attr, attr_mask, NULL);
2264                 }
2265         } else {
2266                 /* Wait for the connect/accept to complete */
2267                 if (qp->ep) {
2268                         int wait_count = 1;
2269
2270                         while (qp->ep->during_connect) {
2271                                 DP_DEBUG(dev, QEDR_MSG_QP,
2272                                          "Still in during connect/accept\n");
2273
2274                                 msleep(100);
2275                                 if (wait_count++ > 200) {
2276                                         DP_NOTICE(dev,
2277                                                   "during connect timeout\n");
2278                                         break;
2279                                 }
2280                         }
2281                 }
2282         }
2283
2284         if (qp->qp_type == IB_QPT_GSI)
2285                 qedr_destroy_gsi_qp(dev);
2286
2287         qedr_free_qp_resources(dev, qp);
2288
2289         if (atomic_dec_and_test(&qp->refcnt)) {
2290                 qedr_idr_remove(dev, qp->qp_id);
2291                 kfree(qp);
2292         }
2293         return rc;
2294 }
2295
2296 struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr,
2297                              struct ib_udata *udata)
2298 {
2299         struct qedr_ah *ah;
2300
2301         ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
2302         if (!ah)
2303                 return ERR_PTR(-ENOMEM);
2304
2305         ah->attr = *attr;
2306
2307         return &ah->ibah;
2308 }
2309
2310 int qedr_destroy_ah(struct ib_ah *ibah)
2311 {
2312         struct qedr_ah *ah = get_qedr_ah(ibah);
2313
2314         kfree(ah);
2315         return 0;
2316 }
2317
2318 static void free_mr_info(struct qedr_dev *dev, struct mr_info *info)
2319 {
2320         struct qedr_pbl *pbl, *tmp;
2321
2322         if (info->pbl_table)
2323                 list_add_tail(&info->pbl_table->list_entry,
2324                               &info->free_pbl_list);
2325
2326         if (!list_empty(&info->inuse_pbl_list))
2327                 list_splice(&info->inuse_pbl_list, &info->free_pbl_list);
2328
2329         list_for_each_entry_safe(pbl, tmp, &info->free_pbl_list, list_entry) {
2330                 list_del(&pbl->list_entry);
2331                 qedr_free_pbl(dev, &info->pbl_info, pbl);
2332         }
2333 }
2334
2335 static int init_mr_info(struct qedr_dev *dev, struct mr_info *info,
2336                         size_t page_list_len, bool two_layered)
2337 {
2338         struct qedr_pbl *tmp;
2339         int rc;
2340
2341         INIT_LIST_HEAD(&info->free_pbl_list);
2342         INIT_LIST_HEAD(&info->inuse_pbl_list);
2343
2344         rc = qedr_prepare_pbl_tbl(dev, &info->pbl_info,
2345                                   page_list_len, two_layered);
2346         if (rc)
2347                 goto done;
2348
2349         info->pbl_table = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL);
2350         if (IS_ERR(info->pbl_table)) {
2351                 rc = PTR_ERR(info->pbl_table);
2352                 goto done;
2353         }
2354
2355         DP_DEBUG(dev, QEDR_MSG_MR, "pbl_table_pa = %pa\n",
2356                  &info->pbl_table->pa);
2357
2358         /* in usual case we use 2 PBLs, so we add one to free
2359          * list and allocating another one
2360          */
2361         tmp = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL);
2362         if (IS_ERR(tmp)) {
2363                 DP_DEBUG(dev, QEDR_MSG_MR, "Extra PBL is not allocated\n");
2364                 goto done;
2365         }
2366
2367         list_add_tail(&tmp->list_entry, &info->free_pbl_list);
2368
2369         DP_DEBUG(dev, QEDR_MSG_MR, "extra pbl_table_pa = %pa\n", &tmp->pa);
2370
2371 done:
2372         if (rc)
2373                 free_mr_info(dev, info);
2374
2375         return rc;
2376 }
2377
2378 struct ib_mr *qedr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
2379                                u64 usr_addr, int acc, struct ib_udata *udata)
2380 {
2381         struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2382         struct qedr_mr *mr;
2383         struct qedr_pd *pd;
2384         int rc = -ENOMEM;
2385
2386         pd = get_qedr_pd(ibpd);
2387         DP_DEBUG(dev, QEDR_MSG_MR,
2388                  "qedr_register user mr pd = %d start = %lld, len = %lld, usr_addr = %lld, acc = %d\n",
2389                  pd->pd_id, start, len, usr_addr, acc);
2390
2391         if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE))
2392                 return ERR_PTR(-EINVAL);
2393
2394         mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2395         if (!mr)
2396                 return ERR_PTR(rc);
2397
2398         mr->type = QEDR_MR_USER;
2399
2400         mr->umem = ib_umem_get(ibpd->uobject->context, start, len, acc, 0);
2401         if (IS_ERR(mr->umem)) {
2402                 rc = -EFAULT;
2403                 goto err0;
2404         }
2405
2406         rc = init_mr_info(dev, &mr->info, ib_umem_page_count(mr->umem), 1);
2407         if (rc)
2408                 goto err1;
2409
2410         qedr_populate_pbls(dev, mr->umem, mr->info.pbl_table,
2411                            &mr->info.pbl_info, mr->umem->page_shift);
2412
2413         rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
2414         if (rc) {
2415                 DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
2416                 goto err1;
2417         }
2418
2419         /* Index only, 18 bit long, lkey = itid << 8 | key */
2420         mr->hw_mr.tid_type = QED_RDMA_TID_REGISTERED_MR;
2421         mr->hw_mr.key = 0;
2422         mr->hw_mr.pd = pd->pd_id;
2423         mr->hw_mr.local_read = 1;
2424         mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
2425         mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
2426         mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
2427         mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
2428         mr->hw_mr.mw_bind = false;
2429         mr->hw_mr.pbl_ptr = mr->info.pbl_table[0].pa;
2430         mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
2431         mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
2432         mr->hw_mr.page_size_log = mr->umem->page_shift;
2433         mr->hw_mr.fbo = ib_umem_offset(mr->umem);
2434         mr->hw_mr.length = len;
2435         mr->hw_mr.vaddr = usr_addr;
2436         mr->hw_mr.zbva = false;
2437         mr->hw_mr.phy_mr = false;
2438         mr->hw_mr.dma_mr = false;
2439
2440         rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
2441         if (rc) {
2442                 DP_ERR(dev, "roce register tid returned an error %d\n", rc);
2443                 goto err2;
2444         }
2445
2446         mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2447         if (mr->hw_mr.remote_write || mr->hw_mr.remote_read ||
2448             mr->hw_mr.remote_atomic)
2449                 mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2450
2451         DP_DEBUG(dev, QEDR_MSG_MR, "register user mr lkey: %x\n",
2452                  mr->ibmr.lkey);
2453         return &mr->ibmr;
2454
2455 err2:
2456         dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2457 err1:
2458         qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table);
2459 err0:
2460         kfree(mr);
2461         return ERR_PTR(rc);
2462 }
2463
2464 int qedr_dereg_mr(struct ib_mr *ib_mr)
2465 {
2466         struct qedr_mr *mr = get_qedr_mr(ib_mr);
2467         struct qedr_dev *dev = get_qedr_dev(ib_mr->device);
2468         int rc = 0;
2469
2470         rc = dev->ops->rdma_deregister_tid(dev->rdma_ctx, mr->hw_mr.itid);
2471         if (rc)
2472                 return rc;
2473
2474         dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2475
2476         if ((mr->type != QEDR_MR_DMA) && (mr->type != QEDR_MR_FRMR))
2477                 qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table);
2478
2479         /* it could be user registered memory. */
2480         if (mr->umem)
2481                 ib_umem_release(mr->umem);
2482
2483         kfree(mr);
2484
2485         return rc;
2486 }
2487
2488 static struct qedr_mr *__qedr_alloc_mr(struct ib_pd *ibpd,
2489                                        int max_page_list_len)
2490 {
2491         struct qedr_pd *pd = get_qedr_pd(ibpd);
2492         struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2493         struct qedr_mr *mr;
2494         int rc = -ENOMEM;
2495
2496         DP_DEBUG(dev, QEDR_MSG_MR,
2497                  "qedr_alloc_frmr pd = %d max_page_list_len= %d\n", pd->pd_id,
2498                  max_page_list_len);
2499
2500         mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2501         if (!mr)
2502                 return ERR_PTR(rc);
2503
2504         mr->dev = dev;
2505         mr->type = QEDR_MR_FRMR;
2506
2507         rc = init_mr_info(dev, &mr->info, max_page_list_len, 1);
2508         if (rc)
2509                 goto err0;
2510
2511         rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
2512         if (rc) {
2513                 DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
2514                 goto err0;
2515         }
2516
2517         /* Index only, 18 bit long, lkey = itid << 8 | key */
2518         mr->hw_mr.tid_type = QED_RDMA_TID_FMR;
2519         mr->hw_mr.key = 0;
2520         mr->hw_mr.pd = pd->pd_id;
2521         mr->hw_mr.local_read = 1;
2522         mr->hw_mr.local_write = 0;
2523         mr->hw_mr.remote_read = 0;
2524         mr->hw_mr.remote_write = 0;
2525         mr->hw_mr.remote_atomic = 0;
2526         mr->hw_mr.mw_bind = false;
2527         mr->hw_mr.pbl_ptr = 0;
2528         mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
2529         mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
2530         mr->hw_mr.fbo = 0;
2531         mr->hw_mr.length = 0;
2532         mr->hw_mr.vaddr = 0;
2533         mr->hw_mr.zbva = false;
2534         mr->hw_mr.phy_mr = true;
2535         mr->hw_mr.dma_mr = false;
2536
2537         rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
2538         if (rc) {
2539                 DP_ERR(dev, "roce register tid returned an error %d\n", rc);
2540                 goto err1;
2541         }
2542
2543         mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2544         mr->ibmr.rkey = mr->ibmr.lkey;
2545
2546         DP_DEBUG(dev, QEDR_MSG_MR, "alloc frmr: %x\n", mr->ibmr.lkey);
2547         return mr;
2548
2549 err1:
2550         dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2551 err0:
2552         kfree(mr);
2553         return ERR_PTR(rc);
2554 }
2555
2556 struct ib_mr *qedr_alloc_mr(struct ib_pd *ibpd,
2557                             enum ib_mr_type mr_type, u32 max_num_sg)
2558 {
2559         struct qedr_mr *mr;
2560
2561         if (mr_type != IB_MR_TYPE_MEM_REG)
2562                 return ERR_PTR(-EINVAL);
2563
2564         mr = __qedr_alloc_mr(ibpd, max_num_sg);
2565
2566         if (IS_ERR(mr))
2567                 return ERR_PTR(-EINVAL);
2568
2569         return &mr->ibmr;
2570 }
2571
2572 static int qedr_set_page(struct ib_mr *ibmr, u64 addr)
2573 {
2574         struct qedr_mr *mr = get_qedr_mr(ibmr);
2575         struct qedr_pbl *pbl_table;
2576         struct regpair *pbe;
2577         u32 pbes_in_page;
2578
2579         if (unlikely(mr->npages == mr->info.pbl_info.num_pbes)) {
2580                 DP_ERR(mr->dev, "qedr_set_page fails when %d\n", mr->npages);
2581                 return -ENOMEM;
2582         }
2583
2584         DP_DEBUG(mr->dev, QEDR_MSG_MR, "qedr_set_page pages[%d] = 0x%llx\n",
2585                  mr->npages, addr);
2586
2587         pbes_in_page = mr->info.pbl_info.pbl_size / sizeof(u64);
2588         pbl_table = mr->info.pbl_table + (mr->npages / pbes_in_page);
2589         pbe = (struct regpair *)pbl_table->va;
2590         pbe +=  mr->npages % pbes_in_page;
2591         pbe->lo = cpu_to_le32((u32)addr);
2592         pbe->hi = cpu_to_le32((u32)upper_32_bits(addr));
2593
2594         mr->npages++;
2595
2596         return 0;
2597 }
2598
2599 static void handle_completed_mrs(struct qedr_dev *dev, struct mr_info *info)
2600 {
2601         int work = info->completed - info->completed_handled - 1;
2602
2603         DP_DEBUG(dev, QEDR_MSG_MR, "Special FMR work = %d\n", work);
2604         while (work-- > 0 && !list_empty(&info->inuse_pbl_list)) {
2605                 struct qedr_pbl *pbl;
2606
2607                 /* Free all the page list that are possible to be freed
2608                  * (all the ones that were invalidated), under the assumption
2609                  * that if an FMR was completed successfully that means that
2610                  * if there was an invalidate operation before it also ended
2611                  */
2612                 pbl = list_first_entry(&info->inuse_pbl_list,
2613                                        struct qedr_pbl, list_entry);
2614                 list_move_tail(&pbl->list_entry, &info->free_pbl_list);
2615                 info->completed_handled++;
2616         }
2617 }
2618
2619 int qedr_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
2620                    int sg_nents, unsigned int *sg_offset)
2621 {
2622         struct qedr_mr *mr = get_qedr_mr(ibmr);
2623
2624         mr->npages = 0;
2625
2626         handle_completed_mrs(mr->dev, &mr->info);
2627         return ib_sg_to_pages(ibmr, sg, sg_nents, NULL, qedr_set_page);
2628 }
2629
2630 struct ib_mr *qedr_get_dma_mr(struct ib_pd *ibpd, int acc)
2631 {
2632         struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2633         struct qedr_pd *pd = get_qedr_pd(ibpd);
2634         struct qedr_mr *mr;
2635         int rc;
2636
2637         mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2638         if (!mr)
2639                 return ERR_PTR(-ENOMEM);
2640
2641         mr->type = QEDR_MR_DMA;
2642
2643         rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
2644         if (rc) {
2645                 DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
2646                 goto err1;
2647         }
2648
2649         /* index only, 18 bit long, lkey = itid << 8 | key */
2650         mr->hw_mr.tid_type = QED_RDMA_TID_REGISTERED_MR;
2651         mr->hw_mr.pd = pd->pd_id;
2652         mr->hw_mr.local_read = 1;
2653         mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
2654         mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
2655         mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
2656         mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
2657         mr->hw_mr.dma_mr = true;
2658
2659         rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
2660         if (rc) {
2661                 DP_ERR(dev, "roce register tid returned an error %d\n", rc);
2662                 goto err2;
2663         }
2664
2665         mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2666         if (mr->hw_mr.remote_write || mr->hw_mr.remote_read ||
2667             mr->hw_mr.remote_atomic)
2668                 mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2669
2670         DP_DEBUG(dev, QEDR_MSG_MR, "get dma mr: lkey = %x\n", mr->ibmr.lkey);
2671         return &mr->ibmr;
2672
2673 err2:
2674         dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2675 err1:
2676         kfree(mr);
2677         return ERR_PTR(rc);
2678 }
2679
2680 static inline int qedr_wq_is_full(struct qedr_qp_hwq_info *wq)
2681 {
2682         return (((wq->prod + 1) % wq->max_wr) == wq->cons);
2683 }
2684
2685 static int sge_data_len(struct ib_sge *sg_list, int num_sge)
2686 {
2687         int i, len = 0;
2688
2689         for (i = 0; i < num_sge; i++)
2690                 len += sg_list[i].length;
2691
2692         return len;
2693 }
2694
2695 static void swap_wqe_data64(u64 *p)
2696 {
2697         int i;
2698
2699         for (i = 0; i < QEDR_SQE_ELEMENT_SIZE / sizeof(u64); i++, p++)
2700                 *p = cpu_to_be64(cpu_to_le64(*p));
2701 }
2702
2703 static u32 qedr_prepare_sq_inline_data(struct qedr_dev *dev,
2704                                        struct qedr_qp *qp, u8 *wqe_size,
2705                                        struct ib_send_wr *wr,
2706                                        struct ib_send_wr **bad_wr, u8 *bits,
2707                                        u8 bit)
2708 {
2709         u32 data_size = sge_data_len(wr->sg_list, wr->num_sge);
2710         char *seg_prt, *wqe;
2711         int i, seg_siz;
2712
2713         if (data_size > ROCE_REQ_MAX_INLINE_DATA_SIZE) {
2714                 DP_ERR(dev, "Too much inline data in WR: %d\n", data_size);
2715                 *bad_wr = wr;
2716                 return 0;
2717         }
2718
2719         if (!data_size)
2720                 return data_size;
2721
2722         *bits |= bit;
2723
2724         seg_prt = NULL;
2725         wqe = NULL;
2726         seg_siz = 0;
2727
2728         /* Copy data inline */
2729         for (i = 0; i < wr->num_sge; i++) {
2730                 u32 len = wr->sg_list[i].length;
2731                 void *src = (void *)(uintptr_t)wr->sg_list[i].addr;
2732
2733                 while (len > 0) {
2734                         u32 cur;
2735
2736                         /* New segment required */
2737                         if (!seg_siz) {
2738                                 wqe = (char *)qed_chain_produce(&qp->sq.pbl);
2739                                 seg_prt = wqe;
2740                                 seg_siz = sizeof(struct rdma_sq_common_wqe);
2741                                 (*wqe_size)++;
2742                         }
2743
2744                         /* Calculate currently allowed length */
2745                         cur = min_t(u32, len, seg_siz);
2746                         memcpy(seg_prt, src, cur);
2747
2748                         /* Update segment variables */
2749                         seg_prt += cur;
2750                         seg_siz -= cur;
2751
2752                         /* Update sge variables */
2753                         src += cur;
2754                         len -= cur;
2755
2756                         /* Swap fully-completed segments */
2757                         if (!seg_siz)
2758                                 swap_wqe_data64((u64 *)wqe);
2759                 }
2760         }
2761
2762         /* swap last not completed segment */
2763         if (seg_siz)
2764                 swap_wqe_data64((u64 *)wqe);
2765
2766         return data_size;
2767 }
2768
2769 #define RQ_SGE_SET(sge, vaddr, vlength, vflags)                 \
2770         do {                                                    \
2771                 DMA_REGPAIR_LE(sge->addr, vaddr);               \
2772                 (sge)->length = cpu_to_le32(vlength);           \
2773                 (sge)->flags = cpu_to_le32(vflags);             \
2774         } while (0)
2775
2776 #define SRQ_HDR_SET(hdr, vwr_id, num_sge)                       \
2777         do {                                                    \
2778                 DMA_REGPAIR_LE(hdr->wr_id, vwr_id);             \
2779                 (hdr)->num_sges = num_sge;                      \
2780         } while (0)
2781
2782 #define SRQ_SGE_SET(sge, vaddr, vlength, vlkey)                 \
2783         do {                                                    \
2784                 DMA_REGPAIR_LE(sge->addr, vaddr);               \
2785                 (sge)->length = cpu_to_le32(vlength);           \
2786                 (sge)->l_key = cpu_to_le32(vlkey);              \
2787         } while (0)
2788
2789 static u32 qedr_prepare_sq_sges(struct qedr_qp *qp, u8 *wqe_size,
2790                                 struct ib_send_wr *wr)
2791 {
2792         u32 data_size = 0;
2793         int i;
2794
2795         for (i = 0; i < wr->num_sge; i++) {
2796                 struct rdma_sq_sge *sge = qed_chain_produce(&qp->sq.pbl);
2797
2798                 DMA_REGPAIR_LE(sge->addr, wr->sg_list[i].addr);
2799                 sge->l_key = cpu_to_le32(wr->sg_list[i].lkey);
2800                 sge->length = cpu_to_le32(wr->sg_list[i].length);
2801                 data_size += wr->sg_list[i].length;
2802         }
2803
2804         if (wqe_size)
2805                 *wqe_size += wr->num_sge;
2806
2807         return data_size;
2808 }
2809
2810 static u32 qedr_prepare_sq_rdma_data(struct qedr_dev *dev,
2811                                      struct qedr_qp *qp,
2812                                      struct rdma_sq_rdma_wqe_1st *rwqe,
2813                                      struct rdma_sq_rdma_wqe_2nd *rwqe2,
2814                                      struct ib_send_wr *wr,
2815                                      struct ib_send_wr **bad_wr)
2816 {
2817         rwqe2->r_key = cpu_to_le32(rdma_wr(wr)->rkey);
2818         DMA_REGPAIR_LE(rwqe2->remote_va, rdma_wr(wr)->remote_addr);
2819
2820         if (wr->send_flags & IB_SEND_INLINE &&
2821             (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
2822              wr->opcode == IB_WR_RDMA_WRITE)) {
2823                 u8 flags = 0;
2824
2825                 SET_FIELD2(flags, RDMA_SQ_RDMA_WQE_1ST_INLINE_FLG, 1);
2826                 return qedr_prepare_sq_inline_data(dev, qp, &rwqe->wqe_size, wr,
2827                                                    bad_wr, &rwqe->flags, flags);
2828         }
2829
2830         return qedr_prepare_sq_sges(qp, &rwqe->wqe_size, wr);
2831 }
2832
2833 static u32 qedr_prepare_sq_send_data(struct qedr_dev *dev,
2834                                      struct qedr_qp *qp,
2835                                      struct rdma_sq_send_wqe_1st *swqe,
2836                                      struct rdma_sq_send_wqe_2st *swqe2,
2837                                      struct ib_send_wr *wr,
2838                                      struct ib_send_wr **bad_wr)
2839 {
2840         memset(swqe2, 0, sizeof(*swqe2));
2841         if (wr->send_flags & IB_SEND_INLINE) {
2842                 u8 flags = 0;
2843
2844                 SET_FIELD2(flags, RDMA_SQ_SEND_WQE_INLINE_FLG, 1);
2845                 return qedr_prepare_sq_inline_data(dev, qp, &swqe->wqe_size, wr,
2846                                                    bad_wr, &swqe->flags, flags);
2847         }
2848
2849         return qedr_prepare_sq_sges(qp, &swqe->wqe_size, wr);
2850 }
2851
2852 static int qedr_prepare_reg(struct qedr_qp *qp,
2853                             struct rdma_sq_fmr_wqe_1st *fwqe1,
2854                             struct ib_reg_wr *wr)
2855 {
2856         struct qedr_mr *mr = get_qedr_mr(wr->mr);
2857         struct rdma_sq_fmr_wqe_2nd *fwqe2;
2858
2859         fwqe2 = (struct rdma_sq_fmr_wqe_2nd *)qed_chain_produce(&qp->sq.pbl);
2860         fwqe1->addr.hi = upper_32_bits(mr->ibmr.iova);
2861         fwqe1->addr.lo = lower_32_bits(mr->ibmr.iova);
2862         fwqe1->l_key = wr->key;
2863
2864         fwqe2->access_ctrl = 0;
2865
2866         SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_READ,
2867                    !!(wr->access & IB_ACCESS_REMOTE_READ));
2868         SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_WRITE,
2869                    !!(wr->access & IB_ACCESS_REMOTE_WRITE));
2870         SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_ENABLE_ATOMIC,
2871                    !!(wr->access & IB_ACCESS_REMOTE_ATOMIC));
2872         SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_LOCAL_READ, 1);
2873         SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_LOCAL_WRITE,
2874                    !!(wr->access & IB_ACCESS_LOCAL_WRITE));
2875         fwqe2->fmr_ctrl = 0;
2876
2877         SET_FIELD2(fwqe2->fmr_ctrl, RDMA_SQ_FMR_WQE_2ND_PAGE_SIZE_LOG,
2878                    ilog2(mr->ibmr.page_size) - 12);
2879
2880         fwqe2->length_hi = 0;
2881         fwqe2->length_lo = mr->ibmr.length;
2882         fwqe2->pbl_addr.hi = upper_32_bits(mr->info.pbl_table->pa);
2883         fwqe2->pbl_addr.lo = lower_32_bits(mr->info.pbl_table->pa);
2884
2885         qp->wqe_wr_id[qp->sq.prod].mr = mr;
2886
2887         return 0;
2888 }
2889
2890 static enum ib_wc_opcode qedr_ib_to_wc_opcode(enum ib_wr_opcode opcode)
2891 {
2892         switch (opcode) {
2893         case IB_WR_RDMA_WRITE:
2894         case IB_WR_RDMA_WRITE_WITH_IMM:
2895                 return IB_WC_RDMA_WRITE;
2896         case IB_WR_SEND_WITH_IMM:
2897         case IB_WR_SEND:
2898         case IB_WR_SEND_WITH_INV:
2899                 return IB_WC_SEND;
2900         case IB_WR_RDMA_READ:
2901         case IB_WR_RDMA_READ_WITH_INV:
2902                 return IB_WC_RDMA_READ;
2903         case IB_WR_ATOMIC_CMP_AND_SWP:
2904                 return IB_WC_COMP_SWAP;
2905         case IB_WR_ATOMIC_FETCH_AND_ADD:
2906                 return IB_WC_FETCH_ADD;
2907         case IB_WR_REG_MR:
2908                 return IB_WC_REG_MR;
2909         case IB_WR_LOCAL_INV:
2910                 return IB_WC_LOCAL_INV;
2911         default:
2912                 return IB_WC_SEND;
2913         }
2914 }
2915
2916 static inline bool qedr_can_post_send(struct qedr_qp *qp, struct ib_send_wr *wr)
2917 {
2918         int wq_is_full, err_wr, pbl_is_full;
2919         struct qedr_dev *dev = qp->dev;
2920
2921         /* prevent SQ overflow and/or processing of a bad WR */
2922         err_wr = wr->num_sge > qp->sq.max_sges;
2923         wq_is_full = qedr_wq_is_full(&qp->sq);
2924         pbl_is_full = qed_chain_get_elem_left_u32(&qp->sq.pbl) <
2925                       QEDR_MAX_SQE_ELEMENTS_PER_SQE;
2926         if (wq_is_full || err_wr || pbl_is_full) {
2927                 if (wq_is_full && !(qp->err_bitmap & QEDR_QP_ERR_SQ_FULL)) {
2928                         DP_ERR(dev,
2929                                "error: WQ is full. Post send on QP %p failed (this error appears only once)\n",
2930                                qp);
2931                         qp->err_bitmap |= QEDR_QP_ERR_SQ_FULL;
2932                 }
2933
2934                 if (err_wr && !(qp->err_bitmap & QEDR_QP_ERR_BAD_SR)) {
2935                         DP_ERR(dev,
2936                                "error: WR is bad. Post send on QP %p failed (this error appears only once)\n",
2937                                qp);
2938                         qp->err_bitmap |= QEDR_QP_ERR_BAD_SR;
2939                 }
2940
2941                 if (pbl_is_full &&
2942                     !(qp->err_bitmap & QEDR_QP_ERR_SQ_PBL_FULL)) {
2943                         DP_ERR(dev,
2944                                "error: WQ PBL is full. Post send on QP %p failed (this error appears only once)\n",
2945                                qp);
2946                         qp->err_bitmap |= QEDR_QP_ERR_SQ_PBL_FULL;
2947                 }
2948                 return false;
2949         }
2950         return true;
2951 }
2952
2953 static int __qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
2954                      struct ib_send_wr **bad_wr)
2955 {
2956         struct qedr_dev *dev = get_qedr_dev(ibqp->device);
2957         struct qedr_qp *qp = get_qedr_qp(ibqp);
2958         struct rdma_sq_atomic_wqe_1st *awqe1;
2959         struct rdma_sq_atomic_wqe_2nd *awqe2;
2960         struct rdma_sq_atomic_wqe_3rd *awqe3;
2961         struct rdma_sq_send_wqe_2st *swqe2;
2962         struct rdma_sq_local_inv_wqe *iwqe;
2963         struct rdma_sq_rdma_wqe_2nd *rwqe2;
2964         struct rdma_sq_send_wqe_1st *swqe;
2965         struct rdma_sq_rdma_wqe_1st *rwqe;
2966         struct rdma_sq_fmr_wqe_1st *fwqe1;
2967         struct rdma_sq_common_wqe *wqe;
2968         u32 length;
2969         int rc = 0;
2970         bool comp;
2971
2972         if (!qedr_can_post_send(qp, wr)) {
2973                 *bad_wr = wr;
2974                 return -ENOMEM;
2975         }
2976
2977         wqe = qed_chain_produce(&qp->sq.pbl);
2978         qp->wqe_wr_id[qp->sq.prod].signaled =
2979                 !!(wr->send_flags & IB_SEND_SIGNALED) || qp->signaled;
2980
2981         wqe->flags = 0;
2982         SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_SE_FLG,
2983                    !!(wr->send_flags & IB_SEND_SOLICITED));
2984         comp = (!!(wr->send_flags & IB_SEND_SIGNALED)) || qp->signaled;
2985         SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_COMP_FLG, comp);
2986         SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_RD_FENCE_FLG,
2987                    !!(wr->send_flags & IB_SEND_FENCE));
2988         wqe->prev_wqe_size = qp->prev_wqe_size;
2989
2990         qp->wqe_wr_id[qp->sq.prod].opcode = qedr_ib_to_wc_opcode(wr->opcode);
2991
2992         switch (wr->opcode) {
2993         case IB_WR_SEND_WITH_IMM:
2994                 if (unlikely(rdma_protocol_iwarp(&dev->ibdev, 1))) {
2995                         rc = -EINVAL;
2996                         *bad_wr = wr;
2997                         break;
2998                 }
2999                 wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_IMM;
3000                 swqe = (struct rdma_sq_send_wqe_1st *)wqe;
3001                 swqe->wqe_size = 2;
3002                 swqe2 = qed_chain_produce(&qp->sq.pbl);
3003
3004                 swqe->inv_key_or_imm_data = cpu_to_le32(be32_to_cpu(wr->ex.imm_data));
3005                 length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
3006                                                    wr, bad_wr);
3007                 swqe->length = cpu_to_le32(length);
3008                 qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
3009                 qp->prev_wqe_size = swqe->wqe_size;
3010                 qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
3011                 break;
3012         case IB_WR_SEND:
3013                 wqe->req_type = RDMA_SQ_REQ_TYPE_SEND;
3014                 swqe = (struct rdma_sq_send_wqe_1st *)wqe;
3015
3016                 swqe->wqe_size = 2;
3017                 swqe2 = qed_chain_produce(&qp->sq.pbl);
3018                 length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
3019                                                    wr, bad_wr);
3020                 swqe->length = cpu_to_le32(length);
3021                 qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
3022                 qp->prev_wqe_size = swqe->wqe_size;
3023                 qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
3024                 break;
3025         case IB_WR_SEND_WITH_INV:
3026                 wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_INVALIDATE;
3027                 swqe = (struct rdma_sq_send_wqe_1st *)wqe;
3028                 swqe2 = qed_chain_produce(&qp->sq.pbl);
3029                 swqe->wqe_size = 2;
3030                 swqe->inv_key_or_imm_data = cpu_to_le32(wr->ex.invalidate_rkey);
3031                 length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
3032                                                    wr, bad_wr);
3033                 swqe->length = cpu_to_le32(length);
3034                 qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
3035                 qp->prev_wqe_size = swqe->wqe_size;
3036                 qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
3037                 break;
3038
3039         case IB_WR_RDMA_WRITE_WITH_IMM:
3040                 if (unlikely(rdma_protocol_iwarp(&dev->ibdev, 1))) {
3041                         rc = -EINVAL;
3042                         *bad_wr = wr;
3043                         break;
3044                 }
3045                 wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR_WITH_IMM;
3046                 rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
3047
3048                 rwqe->wqe_size = 2;
3049                 rwqe->imm_data = htonl(cpu_to_le32(wr->ex.imm_data));
3050                 rwqe2 = qed_chain_produce(&qp->sq.pbl);
3051                 length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
3052                                                    wr, bad_wr);
3053                 rwqe->length = cpu_to_le32(length);
3054                 qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
3055                 qp->prev_wqe_size = rwqe->wqe_size;
3056                 qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
3057                 break;
3058         case IB_WR_RDMA_WRITE:
3059                 wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR;
3060                 rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
3061
3062                 rwqe->wqe_size = 2;
3063                 rwqe2 = qed_chain_produce(&qp->sq.pbl);
3064                 length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
3065                                                    wr, bad_wr);
3066                 rwqe->length = cpu_to_le32(length);
3067                 qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
3068                 qp->prev_wqe_size = rwqe->wqe_size;
3069                 qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
3070                 break;
3071         case IB_WR_RDMA_READ_WITH_INV:
3072                 SET_FIELD2(wqe->flags, RDMA_SQ_RDMA_WQE_1ST_READ_INV_FLG, 1);
3073                 /* fallthrough -- same is identical to RDMA READ */
3074
3075         case IB_WR_RDMA_READ:
3076                 wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_RD;
3077                 rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
3078
3079                 rwqe->wqe_size = 2;
3080                 rwqe2 = qed_chain_produce(&qp->sq.pbl);
3081                 length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
3082                                                    wr, bad_wr);
3083                 rwqe->length = cpu_to_le32(length);
3084                 qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
3085                 qp->prev_wqe_size = rwqe->wqe_size;
3086                 qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
3087                 break;
3088
3089         case IB_WR_ATOMIC_CMP_AND_SWP:
3090         case IB_WR_ATOMIC_FETCH_AND_ADD:
3091                 awqe1 = (struct rdma_sq_atomic_wqe_1st *)wqe;
3092                 awqe1->wqe_size = 4;
3093
3094                 awqe2 = qed_chain_produce(&qp->sq.pbl);
3095                 DMA_REGPAIR_LE(awqe2->remote_va, atomic_wr(wr)->remote_addr);
3096                 awqe2->r_key = cpu_to_le32(atomic_wr(wr)->rkey);
3097
3098                 awqe3 = qed_chain_produce(&qp->sq.pbl);
3099
3100                 if (wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
3101                         wqe->req_type = RDMA_SQ_REQ_TYPE_ATOMIC_ADD;
3102                         DMA_REGPAIR_LE(awqe3->swap_data,
3103                                        atomic_wr(wr)->compare_add);
3104                 } else {
3105                         wqe->req_type = RDMA_SQ_REQ_TYPE_ATOMIC_CMP_AND_SWAP;
3106                         DMA_REGPAIR_LE(awqe3->swap_data,
3107                                        atomic_wr(wr)->swap);
3108                         DMA_REGPAIR_LE(awqe3->cmp_data,
3109                                        atomic_wr(wr)->compare_add);
3110                 }
3111
3112                 qedr_prepare_sq_sges(qp, NULL, wr);
3113
3114                 qp->wqe_wr_id[qp->sq.prod].wqe_size = awqe1->wqe_size;
3115                 qp->prev_wqe_size = awqe1->wqe_size;
3116                 break;
3117
3118         case IB_WR_LOCAL_INV:
3119                 iwqe = (struct rdma_sq_local_inv_wqe *)wqe;
3120                 iwqe->wqe_size = 1;
3121
3122                 iwqe->req_type = RDMA_SQ_REQ_TYPE_LOCAL_INVALIDATE;
3123                 iwqe->inv_l_key = wr->ex.invalidate_rkey;
3124                 qp->wqe_wr_id[qp->sq.prod].wqe_size = iwqe->wqe_size;
3125                 qp->prev_wqe_size = iwqe->wqe_size;
3126                 break;
3127         case IB_WR_REG_MR:
3128                 DP_DEBUG(dev, QEDR_MSG_CQ, "REG_MR\n");
3129                 wqe->req_type = RDMA_SQ_REQ_TYPE_FAST_MR;
3130                 fwqe1 = (struct rdma_sq_fmr_wqe_1st *)wqe;
3131                 fwqe1->wqe_size = 2;
3132
3133                 rc = qedr_prepare_reg(qp, fwqe1, reg_wr(wr));
3134                 if (rc) {
3135                         DP_ERR(dev, "IB_REG_MR failed rc=%d\n", rc);
3136                         *bad_wr = wr;
3137                         break;
3138                 }
3139
3140                 qp->wqe_wr_id[qp->sq.prod].wqe_size = fwqe1->wqe_size;
3141                 qp->prev_wqe_size = fwqe1->wqe_size;
3142                 break;
3143         default:
3144                 DP_ERR(dev, "invalid opcode 0x%x!\n", wr->opcode);
3145                 rc = -EINVAL;
3146                 *bad_wr = wr;
3147                 break;
3148         }
3149
3150         if (*bad_wr) {
3151                 u16 value;
3152
3153                 /* Restore prod to its position before
3154                  * this WR was processed
3155                  */
3156                 value = le16_to_cpu(qp->sq.db_data.data.value);
3157                 qed_chain_set_prod(&qp->sq.pbl, value, wqe);
3158
3159                 /* Restore prev_wqe_size */
3160                 qp->prev_wqe_size = wqe->prev_wqe_size;
3161                 rc = -EINVAL;
3162                 DP_ERR(dev, "POST SEND FAILED\n");
3163         }
3164
3165         return rc;
3166 }
3167
3168 int qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
3169                    struct ib_send_wr **bad_wr)
3170 {
3171         struct qedr_dev *dev = get_qedr_dev(ibqp->device);
3172         struct qedr_qp *qp = get_qedr_qp(ibqp);
3173         unsigned long flags;
3174         int rc = 0;
3175
3176         *bad_wr = NULL;
3177
3178         if (qp->qp_type == IB_QPT_GSI)
3179                 return qedr_gsi_post_send(ibqp, wr, bad_wr);
3180
3181         spin_lock_irqsave(&qp->q_lock, flags);
3182
3183         if (rdma_protocol_roce(&dev->ibdev, 1)) {
3184                 if ((qp->state != QED_ROCE_QP_STATE_RTS) &&
3185                     (qp->state != QED_ROCE_QP_STATE_ERR) &&
3186                     (qp->state != QED_ROCE_QP_STATE_SQD)) {
3187                         spin_unlock_irqrestore(&qp->q_lock, flags);
3188                         *bad_wr = wr;
3189                         DP_DEBUG(dev, QEDR_MSG_CQ,
3190                                  "QP in wrong state! QP icid=0x%x state %d\n",
3191                                  qp->icid, qp->state);
3192                         return -EINVAL;
3193                 }
3194         }
3195
3196         while (wr) {
3197                 rc = __qedr_post_send(ibqp, wr, bad_wr);
3198                 if (rc)
3199                         break;
3200
3201                 qp->wqe_wr_id[qp->sq.prod].wr_id = wr->wr_id;
3202
3203                 qedr_inc_sw_prod(&qp->sq);
3204
3205                 qp->sq.db_data.data.value++;
3206
3207                 wr = wr->next;
3208         }
3209
3210         /* Trigger doorbell
3211          * If there was a failure in the first WR then it will be triggered in
3212          * vane. However this is not harmful (as long as the producer value is
3213          * unchanged). For performance reasons we avoid checking for this
3214          * redundant doorbell.
3215          *
3216          * qp->wqe_wr_id is accessed during qedr_poll_cq, as
3217          * soon as we give the doorbell, we could get a completion
3218          * for this wr, therefore we need to make sure that the
3219          * memory is updated before giving the doorbell.
3220          * During qedr_poll_cq, rmb is called before accessing the
3221          * cqe. This covers for the smp_rmb as well.
3222          */
3223         smp_wmb();
3224         writel(qp->sq.db_data.raw, qp->sq.db);
3225
3226         /* Make sure write sticks */
3227         mmiowb();
3228
3229         spin_unlock_irqrestore(&qp->q_lock, flags);
3230
3231         return rc;
3232 }
3233
3234 int qedr_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
3235                    struct ib_recv_wr **bad_wr)
3236 {
3237         struct qedr_qp *qp = get_qedr_qp(ibqp);
3238         struct qedr_dev *dev = qp->dev;
3239         unsigned long flags;
3240         int status = 0;
3241
3242         if (qp->qp_type == IB_QPT_GSI)
3243                 return qedr_gsi_post_recv(ibqp, wr, bad_wr);
3244
3245         spin_lock_irqsave(&qp->q_lock, flags);
3246
3247         if (qp->state == QED_ROCE_QP_STATE_RESET) {
3248                 spin_unlock_irqrestore(&qp->q_lock, flags);
3249                 *bad_wr = wr;
3250                 return -EINVAL;
3251         }
3252
3253         while (wr) {
3254                 int i;
3255
3256                 if (qed_chain_get_elem_left_u32(&qp->rq.pbl) <
3257                     QEDR_MAX_RQE_ELEMENTS_PER_RQE ||
3258                     wr->num_sge > qp->rq.max_sges) {
3259                         DP_ERR(dev, "Can't post WR  (%d < %d) || (%d > %d)\n",
3260                                qed_chain_get_elem_left_u32(&qp->rq.pbl),
3261                                QEDR_MAX_RQE_ELEMENTS_PER_RQE, wr->num_sge,
3262                                qp->rq.max_sges);
3263                         status = -ENOMEM;
3264                         *bad_wr = wr;
3265                         break;
3266                 }
3267                 for (i = 0; i < wr->num_sge; i++) {
3268                         u32 flags = 0;
3269                         struct rdma_rq_sge *rqe =
3270                             qed_chain_produce(&qp->rq.pbl);
3271
3272                         /* First one must include the number
3273                          * of SGE in the list
3274                          */
3275                         if (!i)
3276                                 SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES,
3277                                           wr->num_sge);
3278
3279                         SET_FIELD(flags, RDMA_RQ_SGE_L_KEY_LO,
3280                                   wr->sg_list[i].lkey);
3281
3282                         RQ_SGE_SET(rqe, wr->sg_list[i].addr,
3283                                    wr->sg_list[i].length, flags);
3284                 }
3285
3286                 /* Special case of no sges. FW requires between 1-4 sges...
3287                  * in this case we need to post 1 sge with length zero. this is
3288                  * because rdma write with immediate consumes an RQ.
3289                  */
3290                 if (!wr->num_sge) {
3291                         u32 flags = 0;
3292                         struct rdma_rq_sge *rqe =
3293                             qed_chain_produce(&qp->rq.pbl);
3294
3295                         /* First one must include the number
3296                          * of SGE in the list
3297                          */
3298                         SET_FIELD(flags, RDMA_RQ_SGE_L_KEY_LO, 0);
3299                         SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES, 1);
3300
3301                         RQ_SGE_SET(rqe, 0, 0, flags);
3302                         i = 1;
3303                 }
3304
3305                 qp->rqe_wr_id[qp->rq.prod].wr_id = wr->wr_id;
3306                 qp->rqe_wr_id[qp->rq.prod].wqe_size = i;
3307
3308                 qedr_inc_sw_prod(&qp->rq);
3309
3310                 /* qp->rqe_wr_id is accessed during qedr_poll_cq, as
3311                  * soon as we give the doorbell, we could get a completion
3312                  * for this wr, therefore we need to make sure that the
3313                  * memory is update before giving the doorbell.
3314                  * During qedr_poll_cq, rmb is called before accessing the
3315                  * cqe. This covers for the smp_rmb as well.
3316                  */
3317                 smp_wmb();
3318
3319                 qp->rq.db_data.data.value++;
3320
3321                 writel(qp->rq.db_data.raw, qp->rq.db);
3322
3323                 /* Make sure write sticks */
3324                 mmiowb();
3325
3326                 if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
3327                         writel(qp->rq.iwarp_db2_data.raw, qp->rq.iwarp_db2);
3328                         mmiowb();       /* for second doorbell */
3329                 }
3330
3331                 wr = wr->next;
3332         }
3333
3334         spin_unlock_irqrestore(&qp->q_lock, flags);
3335
3336         return status;
3337 }
3338
3339 static int is_valid_cqe(struct qedr_cq *cq, union rdma_cqe *cqe)
3340 {
3341         struct rdma_cqe_requester *resp_cqe = &cqe->req;
3342
3343         return (resp_cqe->flags & RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK) ==
3344                 cq->pbl_toggle;
3345 }
3346
3347 static struct qedr_qp *cqe_get_qp(union rdma_cqe *cqe)
3348 {
3349         struct rdma_cqe_requester *resp_cqe = &cqe->req;
3350         struct qedr_qp *qp;
3351
3352         qp = (struct qedr_qp *)(uintptr_t)HILO_GEN(resp_cqe->qp_handle.hi,
3353                                                    resp_cqe->qp_handle.lo,
3354                                                    u64);
3355         return qp;
3356 }
3357
3358 static enum rdma_cqe_type cqe_get_type(union rdma_cqe *cqe)
3359 {
3360         struct rdma_cqe_requester *resp_cqe = &cqe->req;
3361
3362         return GET_FIELD(resp_cqe->flags, RDMA_CQE_REQUESTER_TYPE);
3363 }
3364
3365 /* Return latest CQE (needs processing) */
3366 static union rdma_cqe *get_cqe(struct qedr_cq *cq)
3367 {
3368         return cq->latest_cqe;
3369 }
3370
3371 /* In fmr we need to increase the number of fmr completed counter for the fmr
3372  * algorithm determining whether we can free a pbl or not.
3373  * we need to perform this whether the work request was signaled or not. for
3374  * this purpose we call this function from the condition that checks if a wr
3375  * should be skipped, to make sure we don't miss it ( possibly this fmr
3376  * operation was not signalted)
3377  */
3378 static inline void qedr_chk_if_fmr(struct qedr_qp *qp)
3379 {
3380         if (qp->wqe_wr_id[qp->sq.cons].opcode == IB_WC_REG_MR)
3381                 qp->wqe_wr_id[qp->sq.cons].mr->info.completed++;
3382 }
3383
3384 static int process_req(struct qedr_dev *dev, struct qedr_qp *qp,
3385                        struct qedr_cq *cq, int num_entries,
3386                        struct ib_wc *wc, u16 hw_cons, enum ib_wc_status status,
3387                        int force)
3388 {
3389         u16 cnt = 0;
3390
3391         while (num_entries && qp->sq.wqe_cons != hw_cons) {
3392                 if (!qp->wqe_wr_id[qp->sq.cons].signaled && !force) {
3393                         qedr_chk_if_fmr(qp);
3394                         /* skip WC */
3395                         goto next_cqe;
3396                 }
3397
3398                 /* fill WC */
3399                 wc->status = status;
3400                 wc->vendor_err = 0;
3401                 wc->wc_flags = 0;
3402                 wc->src_qp = qp->id;
3403                 wc->qp = &qp->ibqp;
3404
3405                 wc->wr_id = qp->wqe_wr_id[qp->sq.cons].wr_id;
3406                 wc->opcode = qp->wqe_wr_id[qp->sq.cons].opcode;
3407
3408                 switch (wc->opcode) {
3409                 case IB_WC_RDMA_WRITE:
3410                         wc->byte_len = qp->wqe_wr_id[qp->sq.cons].bytes_len;
3411                         break;
3412                 case IB_WC_COMP_SWAP:
3413                 case IB_WC_FETCH_ADD:
3414                         wc->byte_len = 8;
3415                         break;
3416                 case IB_WC_REG_MR:
3417                         qp->wqe_wr_id[qp->sq.cons].mr->info.completed++;
3418                         break;
3419                 case IB_WC_RDMA_READ:
3420                 case IB_WC_SEND:
3421                         wc->byte_len = qp->wqe_wr_id[qp->sq.cons].bytes_len;
3422                         break;
3423                 default:
3424                         break;
3425                 }
3426
3427                 num_entries--;
3428                 wc++;
3429                 cnt++;
3430 next_cqe:
3431                 while (qp->wqe_wr_id[qp->sq.cons].wqe_size--)
3432                         qed_chain_consume(&qp->sq.pbl);
3433                 qedr_inc_sw_cons(&qp->sq);
3434         }
3435
3436         return cnt;
3437 }
3438
3439 static int qedr_poll_cq_req(struct qedr_dev *dev,
3440                             struct qedr_qp *qp, struct qedr_cq *cq,
3441                             int num_entries, struct ib_wc *wc,
3442                             struct rdma_cqe_requester *req)
3443 {
3444         int cnt = 0;
3445
3446         switch (req->status) {
3447         case RDMA_CQE_REQ_STS_OK:
3448                 cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons,
3449                                   IB_WC_SUCCESS, 0);
3450                 break;
3451         case RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR:
3452                 if (qp->state != QED_ROCE_QP_STATE_ERR)
3453                         DP_DEBUG(dev, QEDR_MSG_CQ,
3454                                  "Error: POLL CQ with RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3455                                  cq->icid, qp->icid);
3456                 cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons,
3457                                   IB_WC_WR_FLUSH_ERR, 1);
3458                 break;
3459         default:
3460                 /* process all WQE before the cosumer */
3461                 qp->state = QED_ROCE_QP_STATE_ERR;
3462                 cnt = process_req(dev, qp, cq, num_entries, wc,
3463                                   req->sq_cons - 1, IB_WC_SUCCESS, 0);
3464                 wc += cnt;
3465                 /* if we have extra WC fill it with actual error info */
3466                 if (cnt < num_entries) {
3467                         enum ib_wc_status wc_status;
3468
3469                         switch (req->status) {
3470                         case RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR:
3471                                 DP_ERR(dev,
3472                                        "Error: POLL CQ with RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3473                                        cq->icid, qp->icid);
3474                                 wc_status = IB_WC_BAD_RESP_ERR;
3475                                 break;
3476                         case RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR:
3477                                 DP_ERR(dev,
3478                                        "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3479                                        cq->icid, qp->icid);
3480                                 wc_status = IB_WC_LOC_LEN_ERR;
3481                                 break;
3482                         case RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR:
3483                                 DP_ERR(dev,
3484                                        "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3485                                        cq->icid, qp->icid);
3486                                 wc_status = IB_WC_LOC_QP_OP_ERR;
3487                                 break;
3488                         case RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR:
3489                                 DP_ERR(dev,
3490                                        "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3491                                        cq->icid, qp->icid);
3492                                 wc_status = IB_WC_LOC_PROT_ERR;
3493                                 break;
3494                         case RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR:
3495                                 DP_ERR(dev,
3496                                        "Error: POLL CQ with RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3497                                        cq->icid, qp->icid);
3498                                 wc_status = IB_WC_MW_BIND_ERR;
3499                                 break;
3500                         case RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR:
3501                                 DP_ERR(dev,
3502                                        "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3503                                        cq->icid, qp->icid);
3504                                 wc_status = IB_WC_REM_INV_REQ_ERR;
3505                                 break;
3506                         case RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR:
3507                                 DP_ERR(dev,
3508                                        "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3509                                        cq->icid, qp->icid);
3510                                 wc_status = IB_WC_REM_ACCESS_ERR;
3511                                 break;
3512                         case RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR:
3513                                 DP_ERR(dev,
3514                                        "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3515                                        cq->icid, qp->icid);
3516                                 wc_status = IB_WC_REM_OP_ERR;
3517                                 break;
3518                         case RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR:
3519                                 DP_ERR(dev,
3520                                        "Error: POLL CQ with RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3521                                        cq->icid, qp->icid);
3522                                 wc_status = IB_WC_RNR_RETRY_EXC_ERR;
3523                                 break;
3524                         case RDMA_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR:
3525                                 DP_ERR(dev,
3526                                        "Error: POLL CQ with ROCE_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3527                                        cq->icid, qp->icid);
3528                                 wc_status = IB_WC_RETRY_EXC_ERR;
3529                                 break;
3530                         default:
3531                                 DP_ERR(dev,
3532                                        "Error: POLL CQ with IB_WC_GENERAL_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3533                                        cq->icid, qp->icid);
3534                                 wc_status = IB_WC_GENERAL_ERR;
3535                         }
3536                         cnt += process_req(dev, qp, cq, 1, wc, req->sq_cons,
3537                                            wc_status, 1);
3538                 }
3539         }
3540
3541         return cnt;
3542 }
3543
3544 static inline int qedr_cqe_resp_status_to_ib(u8 status)
3545 {
3546         switch (status) {
3547         case RDMA_CQE_RESP_STS_LOCAL_ACCESS_ERR:
3548                 return IB_WC_LOC_ACCESS_ERR;
3549         case RDMA_CQE_RESP_STS_LOCAL_LENGTH_ERR:
3550                 return IB_WC_LOC_LEN_ERR;
3551         case RDMA_CQE_RESP_STS_LOCAL_QP_OPERATION_ERR:
3552                 return IB_WC_LOC_QP_OP_ERR;
3553         case RDMA_CQE_RESP_STS_LOCAL_PROTECTION_ERR:
3554                 return IB_WC_LOC_PROT_ERR;
3555         case RDMA_CQE_RESP_STS_MEMORY_MGT_OPERATION_ERR:
3556                 return IB_WC_MW_BIND_ERR;
3557         case RDMA_CQE_RESP_STS_REMOTE_INVALID_REQUEST_ERR:
3558                 return IB_WC_REM_INV_RD_REQ_ERR;
3559         case RDMA_CQE_RESP_STS_OK:
3560                 return IB_WC_SUCCESS;
3561         default:
3562                 return IB_WC_GENERAL_ERR;
3563         }
3564 }
3565
3566 static inline int qedr_set_ok_cqe_resp_wc(struct rdma_cqe_responder *resp,
3567                                           struct ib_wc *wc)
3568 {
3569         wc->status = IB_WC_SUCCESS;
3570         wc->byte_len = le32_to_cpu(resp->length);
3571
3572         if (resp->flags & QEDR_RESP_IMM) {
3573                 wc->ex.imm_data = cpu_to_be32(le32_to_cpu(resp->imm_data_or_inv_r_Key));
3574                 wc->wc_flags |= IB_WC_WITH_IMM;
3575
3576                 if (resp->flags & QEDR_RESP_RDMA)
3577                         wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
3578
3579                 if (resp->flags & QEDR_RESP_INV)
3580                         return -EINVAL;
3581
3582         } else if (resp->flags & QEDR_RESP_INV) {
3583                 wc->ex.imm_data = le32_to_cpu(resp->imm_data_or_inv_r_Key);
3584                 wc->wc_flags |= IB_WC_WITH_INVALIDATE;
3585
3586                 if (resp->flags & QEDR_RESP_RDMA)
3587                         return -EINVAL;
3588
3589         } else if (resp->flags & QEDR_RESP_RDMA) {
3590                 return -EINVAL;
3591         }
3592
3593         return 0;
3594 }
3595
3596 static void __process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp,
3597                                struct qedr_cq *cq, struct ib_wc *wc,
3598                                struct rdma_cqe_responder *resp, u64 wr_id)
3599 {
3600         /* Must fill fields before qedr_set_ok_cqe_resp_wc() */
3601         wc->opcode = IB_WC_RECV;
3602         wc->wc_flags = 0;
3603
3604         if (likely(resp->status == RDMA_CQE_RESP_STS_OK)) {
3605                 if (qedr_set_ok_cqe_resp_wc(resp, wc))
3606                         DP_ERR(dev,
3607                                "CQ %p (icid=%d) has invalid CQE responder flags=0x%x\n",
3608                                cq, cq->icid, resp->flags);
3609
3610         } else {
3611                 wc->status = qedr_cqe_resp_status_to_ib(resp->status);
3612                 if (wc->status == IB_WC_GENERAL_ERR)
3613                         DP_ERR(dev,
3614                                "CQ %p (icid=%d) contains an invalid CQE status %d\n",
3615                                cq, cq->icid, resp->status);
3616         }
3617
3618         /* Fill the rest of the WC */
3619         wc->vendor_err = 0;
3620         wc->src_qp = qp->id;
3621         wc->qp = &qp->ibqp;
3622         wc->wr_id = wr_id;
3623 }
3624
3625 static int process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp,
3626                             struct qedr_cq *cq, struct ib_wc *wc,
3627                             struct rdma_cqe_responder *resp)
3628 {
3629         u64 wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id;
3630
3631         __process_resp_one(dev, qp, cq, wc, resp, wr_id);
3632
3633         while (qp->rqe_wr_id[qp->rq.cons].wqe_size--)
3634                 qed_chain_consume(&qp->rq.pbl);
3635         qedr_inc_sw_cons(&qp->rq);
3636
3637         return 1;
3638 }
3639
3640 static int process_resp_flush(struct qedr_qp *qp, struct qedr_cq *cq,
3641                               int num_entries, struct ib_wc *wc, u16 hw_cons)
3642 {
3643         u16 cnt = 0;
3644
3645         while (num_entries && qp->rq.wqe_cons != hw_cons) {
3646                 /* fill WC */
3647                 wc->status = IB_WC_WR_FLUSH_ERR;
3648                 wc->vendor_err = 0;
3649                 wc->wc_flags = 0;
3650                 wc->src_qp = qp->id;
3651                 wc->byte_len = 0;
3652                 wc->wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id;
3653                 wc->qp = &qp->ibqp;
3654                 num_entries--;
3655                 wc++;
3656                 cnt++;
3657                 while (qp->rqe_wr_id[qp->rq.cons].wqe_size--)
3658                         qed_chain_consume(&qp->rq.pbl);
3659                 qedr_inc_sw_cons(&qp->rq);
3660         }
3661
3662         return cnt;
3663 }
3664
3665 static void try_consume_resp_cqe(struct qedr_cq *cq, struct qedr_qp *qp,
3666                                  struct rdma_cqe_responder *resp, int *update)
3667 {
3668         if (le16_to_cpu(resp->rq_cons_or_srq_id) == qp->rq.wqe_cons) {
3669                 consume_cqe(cq);
3670                 *update |= 1;
3671         }
3672 }
3673
3674 static int qedr_poll_cq_resp(struct qedr_dev *dev, struct qedr_qp *qp,
3675                              struct qedr_cq *cq, int num_entries,
3676                              struct ib_wc *wc, struct rdma_cqe_responder *resp,
3677                              int *update)
3678 {
3679         int cnt;
3680
3681         if (resp->status == RDMA_CQE_RESP_STS_WORK_REQUEST_FLUSHED_ERR) {
3682                 cnt = process_resp_flush(qp, cq, num_entries, wc,
3683                                          resp->rq_cons_or_srq_id);
3684                 try_consume_resp_cqe(cq, qp, resp, update);
3685         } else {
3686                 cnt = process_resp_one(dev, qp, cq, wc, resp);
3687                 consume_cqe(cq);
3688                 *update |= 1;
3689         }
3690
3691         return cnt;
3692 }
3693
3694 static void try_consume_req_cqe(struct qedr_cq *cq, struct qedr_qp *qp,
3695                                 struct rdma_cqe_requester *req, int *update)
3696 {
3697         if (le16_to_cpu(req->sq_cons) == qp->sq.wqe_cons) {
3698                 consume_cqe(cq);
3699                 *update |= 1;
3700         }
3701 }
3702
3703 int qedr_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
3704 {
3705         struct qedr_dev *dev = get_qedr_dev(ibcq->device);
3706         struct qedr_cq *cq = get_qedr_cq(ibcq);
3707         union rdma_cqe *cqe;
3708         u32 old_cons, new_cons;
3709         unsigned long flags;
3710         int update = 0;
3711         int done = 0;
3712
3713         if (cq->destroyed) {
3714                 DP_ERR(dev,
3715                        "warning: poll was invoked after destroy for cq %p (icid=%d)\n",
3716                        cq, cq->icid);
3717                 return 0;
3718         }
3719
3720         if (cq->cq_type == QEDR_CQ_TYPE_GSI)
3721                 return qedr_gsi_poll_cq(ibcq, num_entries, wc);
3722
3723         spin_lock_irqsave(&cq->cq_lock, flags);
3724         cqe = cq->latest_cqe;
3725         old_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
3726         while (num_entries && is_valid_cqe(cq, cqe)) {
3727                 struct qedr_qp *qp;
3728                 int cnt = 0;
3729
3730                 /* prevent speculative reads of any field of CQE */
3731                 rmb();
3732
3733                 qp = cqe_get_qp(cqe);
3734                 if (!qp) {
3735                         WARN(1, "Error: CQE QP pointer is NULL. CQE=%p\n", cqe);
3736                         break;
3737                 }
3738
3739                 wc->qp = &qp->ibqp;
3740
3741                 switch (cqe_get_type(cqe)) {
3742                 case RDMA_CQE_TYPE_REQUESTER:
3743                         cnt = qedr_poll_cq_req(dev, qp, cq, num_entries, wc,
3744                                                &cqe->req);
3745                         try_consume_req_cqe(cq, qp, &cqe->req, &update);
3746                         break;
3747                 case RDMA_CQE_TYPE_RESPONDER_RQ:
3748                         cnt = qedr_poll_cq_resp(dev, qp, cq, num_entries, wc,
3749                                                 &cqe->resp, &update);
3750                         break;
3751                 case RDMA_CQE_TYPE_INVALID:
3752                 default:
3753                         DP_ERR(dev, "Error: invalid CQE type = %d\n",
3754                                cqe_get_type(cqe));
3755                 }
3756                 num_entries -= cnt;
3757                 wc += cnt;
3758                 done += cnt;
3759
3760                 cqe = get_cqe(cq);
3761         }
3762         new_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
3763
3764         cq->cq_cons += new_cons - old_cons;
3765
3766         if (update)
3767                 /* doorbell notifies abount latest VALID entry,
3768                  * but chain already point to the next INVALID one
3769                  */
3770                 doorbell_cq(cq, cq->cq_cons - 1, cq->arm_flags);
3771
3772         spin_unlock_irqrestore(&cq->cq_lock, flags);
3773         return done;
3774 }
3775
3776 int qedr_process_mad(struct ib_device *ibdev, int process_mad_flags,
3777                      u8 port_num,
3778                      const struct ib_wc *in_wc,
3779                      const struct ib_grh *in_grh,
3780                      const struct ib_mad_hdr *mad_hdr,
3781                      size_t in_mad_size, struct ib_mad_hdr *out_mad,
3782                      size_t *out_mad_size, u16 *out_mad_pkey_index)
3783 {
3784         struct qedr_dev *dev = get_qedr_dev(ibdev);
3785
3786         DP_DEBUG(dev, QEDR_MSG_GSI,
3787                  "QEDR_PROCESS_MAD in_mad %x %x %x %x %x %x %x %x\n",
3788                  mad_hdr->attr_id, mad_hdr->base_version, mad_hdr->attr_mod,
3789                  mad_hdr->class_specific, mad_hdr->class_version,
3790                  mad_hdr->method, mad_hdr->mgmt_class, mad_hdr->status);
3791         return IB_MAD_RESULT_SUCCESS;
3792 }