Linux 6.12-rc1
[linux-2.6-block.git] / drivers / infiniband / hw / mlx5 / wr.c
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /*
3  * Copyright (c) 2020, Mellanox Technologies inc. All rights reserved.
4  */
5
6 #include <linux/gfp.h>
7 #include <linux/mlx5/qp.h>
8 #include <linux/mlx5/driver.h>
9 #include "wr.h"
10 #include "umr.h"
11
12 static const u32 mlx5_ib_opcode[] = {
13         [IB_WR_SEND]                            = MLX5_OPCODE_SEND,
14         [IB_WR_LSO]                             = MLX5_OPCODE_LSO,
15         [IB_WR_SEND_WITH_IMM]                   = MLX5_OPCODE_SEND_IMM,
16         [IB_WR_RDMA_WRITE]                      = MLX5_OPCODE_RDMA_WRITE,
17         [IB_WR_RDMA_WRITE_WITH_IMM]             = MLX5_OPCODE_RDMA_WRITE_IMM,
18         [IB_WR_RDMA_READ]                       = MLX5_OPCODE_RDMA_READ,
19         [IB_WR_ATOMIC_CMP_AND_SWP]              = MLX5_OPCODE_ATOMIC_CS,
20         [IB_WR_ATOMIC_FETCH_AND_ADD]            = MLX5_OPCODE_ATOMIC_FA,
21         [IB_WR_SEND_WITH_INV]                   = MLX5_OPCODE_SEND_INVAL,
22         [IB_WR_LOCAL_INV]                       = MLX5_OPCODE_UMR,
23         [IB_WR_REG_MR]                          = MLX5_OPCODE_UMR,
24         [IB_WR_MASKED_ATOMIC_CMP_AND_SWP]       = MLX5_OPCODE_ATOMIC_MASKED_CS,
25         [IB_WR_MASKED_ATOMIC_FETCH_AND_ADD]     = MLX5_OPCODE_ATOMIC_MASKED_FA,
26         [MLX5_IB_WR_UMR]                        = MLX5_OPCODE_UMR,
27 };
28
29 int mlx5r_wq_overflow(struct mlx5_ib_wq *wq, int nreq, struct ib_cq *ib_cq)
30 {
31         struct mlx5_ib_cq *cq;
32         unsigned int cur;
33
34         cur = wq->head - wq->tail;
35         if (likely(cur + nreq < wq->max_post))
36                 return 0;
37
38         cq = to_mcq(ib_cq);
39         spin_lock(&cq->lock);
40         cur = wq->head - wq->tail;
41         spin_unlock(&cq->lock);
42
43         return cur + nreq >= wq->max_post;
44 }
45
46 static __always_inline void set_raddr_seg(struct mlx5_wqe_raddr_seg *rseg,
47                                           u64 remote_addr, u32 rkey)
48 {
49         rseg->raddr    = cpu_to_be64(remote_addr);
50         rseg->rkey     = cpu_to_be32(rkey);
51         rseg->reserved = 0;
52 }
53
54 static void set_eth_seg(const struct ib_send_wr *wr, struct mlx5_ib_qp *qp,
55                         void **seg, int *size, void **cur_edge)
56 {
57         struct mlx5_wqe_eth_seg *eseg = *seg;
58
59         memset(eseg, 0, sizeof(struct mlx5_wqe_eth_seg));
60
61         if (wr->send_flags & IB_SEND_IP_CSUM)
62                 eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM |
63                                  MLX5_ETH_WQE_L4_CSUM;
64
65         if (wr->opcode == IB_WR_LSO) {
66                 struct ib_ud_wr *ud_wr = container_of(wr, struct ib_ud_wr, wr);
67                 size_t left, copysz;
68                 void *pdata = ud_wr->header;
69                 size_t stride;
70
71                 left = ud_wr->hlen;
72                 eseg->mss = cpu_to_be16(ud_wr->mss);
73                 eseg->inline_hdr.sz = cpu_to_be16(left);
74
75                 /* mlx5r_memcpy_send_wqe should get a 16B align address. Hence,
76                  * we first copy up to the current edge and then, if needed,
77                  * continue to mlx5r_memcpy_send_wqe.
78                  */
79                 copysz = min_t(u64, *cur_edge - (void *)eseg->inline_hdr.start,
80                                left);
81                 memcpy(eseg->inline_hdr.data, pdata, copysz);
82                 stride = ALIGN(sizeof(struct mlx5_wqe_eth_seg) -
83                                sizeof(eseg->inline_hdr.start) + copysz, 16);
84                 *size += stride / 16;
85                 *seg += stride;
86
87                 if (copysz < left) {
88                         handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
89                         left -= copysz;
90                         pdata += copysz;
91                         mlx5r_memcpy_send_wqe(&qp->sq, cur_edge, seg, size,
92                                               pdata, left);
93                 }
94
95                 return;
96         }
97
98         *seg += sizeof(struct mlx5_wqe_eth_seg);
99         *size += sizeof(struct mlx5_wqe_eth_seg) / 16;
100 }
101
102 static void set_datagram_seg(struct mlx5_wqe_datagram_seg *dseg,
103                              const struct ib_send_wr *wr)
104 {
105         memcpy(&dseg->av, &to_mah(ud_wr(wr)->ah)->av, sizeof(struct mlx5_av));
106         dseg->av.dqp_dct =
107                 cpu_to_be32(ud_wr(wr)->remote_qpn | MLX5_EXTENDED_UD_AV);
108         dseg->av.key.qkey.qkey = cpu_to_be32(ud_wr(wr)->remote_qkey);
109 }
110
111 static void set_data_ptr_seg(struct mlx5_wqe_data_seg *dseg, struct ib_sge *sg)
112 {
113         dseg->byte_count = cpu_to_be32(sg->length);
114         dseg->lkey       = cpu_to_be32(sg->lkey);
115         dseg->addr       = cpu_to_be64(sg->addr);
116 }
117
118 static __be64 frwr_mkey_mask(bool atomic)
119 {
120         u64 result;
121
122         result = MLX5_MKEY_MASK_LEN             |
123                 MLX5_MKEY_MASK_PAGE_SIZE        |
124                 MLX5_MKEY_MASK_START_ADDR       |
125                 MLX5_MKEY_MASK_EN_RINVAL        |
126                 MLX5_MKEY_MASK_KEY              |
127                 MLX5_MKEY_MASK_LR               |
128                 MLX5_MKEY_MASK_LW               |
129                 MLX5_MKEY_MASK_RR               |
130                 MLX5_MKEY_MASK_RW               |
131                 MLX5_MKEY_MASK_SMALL_FENCE      |
132                 MLX5_MKEY_MASK_FREE;
133
134         if (atomic)
135                 result |= MLX5_MKEY_MASK_A;
136
137         return cpu_to_be64(result);
138 }
139
140 static __be64 sig_mkey_mask(void)
141 {
142         u64 result;
143
144         result = MLX5_MKEY_MASK_LEN             |
145                 MLX5_MKEY_MASK_PAGE_SIZE        |
146                 MLX5_MKEY_MASK_START_ADDR       |
147                 MLX5_MKEY_MASK_EN_SIGERR        |
148                 MLX5_MKEY_MASK_EN_RINVAL        |
149                 MLX5_MKEY_MASK_KEY              |
150                 MLX5_MKEY_MASK_LR               |
151                 MLX5_MKEY_MASK_LW               |
152                 MLX5_MKEY_MASK_RR               |
153                 MLX5_MKEY_MASK_RW               |
154                 MLX5_MKEY_MASK_SMALL_FENCE      |
155                 MLX5_MKEY_MASK_FREE             |
156                 MLX5_MKEY_MASK_BSF_EN;
157
158         return cpu_to_be64(result);
159 }
160
161 static void set_reg_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr,
162                             struct mlx5_ib_mr *mr, u8 flags, bool atomic)
163 {
164         int size = (mr->mmkey.ndescs + mr->meta_ndescs) * mr->desc_size;
165
166         memset(umr, 0, sizeof(*umr));
167
168         umr->flags = flags;
169         umr->xlt_octowords = cpu_to_be16(mlx5r_umr_get_xlt_octo(size));
170         umr->mkey_mask = frwr_mkey_mask(atomic);
171 }
172
173 static void set_linv_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr)
174 {
175         memset(umr, 0, sizeof(*umr));
176         umr->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE);
177         umr->flags = MLX5_UMR_INLINE;
178 }
179
180 static u8 get_umr_flags(int acc)
181 {
182         return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX5_PERM_ATOMIC       : 0) |
183                (acc & IB_ACCESS_REMOTE_WRITE  ? MLX5_PERM_REMOTE_WRITE : 0) |
184                (acc & IB_ACCESS_REMOTE_READ   ? MLX5_PERM_REMOTE_READ  : 0) |
185                (acc & IB_ACCESS_LOCAL_WRITE   ? MLX5_PERM_LOCAL_WRITE  : 0) |
186                 MLX5_PERM_LOCAL_READ | MLX5_PERM_UMR_EN;
187 }
188
189 static void set_reg_mkey_seg(struct mlx5_mkey_seg *seg,
190                              struct mlx5_ib_mr *mr,
191                              u32 key, int access)
192 {
193         int ndescs = ALIGN(mr->mmkey.ndescs + mr->meta_ndescs, 8) >> 1;
194
195         memset(seg, 0, sizeof(*seg));
196
197         if (mr->access_mode == MLX5_MKC_ACCESS_MODE_MTT)
198                 seg->log2_page_size = ilog2(mr->ibmr.page_size);
199         else if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS)
200                 /* KLMs take twice the size of MTTs */
201                 ndescs *= 2;
202
203         seg->flags = get_umr_flags(access) | mr->access_mode;
204         seg->qpn_mkey7_0 = cpu_to_be32((key & 0xff) | 0xffffff00);
205         seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL);
206         seg->start_addr = cpu_to_be64(mr->ibmr.iova);
207         seg->len = cpu_to_be64(mr->ibmr.length);
208         seg->xlt_oct_size = cpu_to_be32(ndescs);
209 }
210
211 static void set_linv_mkey_seg(struct mlx5_mkey_seg *seg)
212 {
213         memset(seg, 0, sizeof(*seg));
214         seg->status = MLX5_MKEY_STATUS_FREE;
215 }
216
217 static void set_reg_data_seg(struct mlx5_wqe_data_seg *dseg,
218                              struct mlx5_ib_mr *mr,
219                              struct mlx5_ib_pd *pd)
220 {
221         int bcount = mr->desc_size * (mr->mmkey.ndescs + mr->meta_ndescs);
222
223         dseg->addr = cpu_to_be64(mr->desc_map);
224         dseg->byte_count = cpu_to_be32(ALIGN(bcount, 64));
225         dseg->lkey = cpu_to_be32(pd->ibpd.local_dma_lkey);
226 }
227
228 static __be32 send_ieth(const struct ib_send_wr *wr)
229 {
230         switch (wr->opcode) {
231         case IB_WR_SEND_WITH_IMM:
232         case IB_WR_RDMA_WRITE_WITH_IMM:
233                 return wr->ex.imm_data;
234
235         case IB_WR_SEND_WITH_INV:
236                 return cpu_to_be32(wr->ex.invalidate_rkey);
237
238         default:
239                 return 0;
240         }
241 }
242
243 static u8 calc_sig(void *wqe, int size)
244 {
245         u8 *p = wqe;
246         u8 res = 0;
247         int i;
248
249         for (i = 0; i < size; i++)
250                 res ^= p[i];
251
252         return ~res;
253 }
254
255 static u8 wq_sig(void *wqe)
256 {
257         return calc_sig(wqe, (*((u8 *)wqe + 8) & 0x3f) << 4);
258 }
259
260 static int set_data_inl_seg(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr,
261                             void **wqe, int *wqe_sz, void **cur_edge)
262 {
263         struct mlx5_wqe_inline_seg *seg;
264         size_t offset;
265         int inl = 0;
266         int i;
267
268         seg = *wqe;
269         *wqe += sizeof(*seg);
270         offset = sizeof(*seg);
271
272         for (i = 0; i < wr->num_sge; i++) {
273                 size_t len  = wr->sg_list[i].length;
274                 void *addr = (void *)(unsigned long)(wr->sg_list[i].addr);
275
276                 inl += len;
277
278                 if (unlikely(inl > qp->max_inline_data))
279                         return -ENOMEM;
280
281                 while (likely(len)) {
282                         size_t leftlen;
283                         size_t copysz;
284
285                         handle_post_send_edge(&qp->sq, wqe,
286                                               *wqe_sz + (offset >> 4),
287                                               cur_edge);
288
289                         leftlen = *cur_edge - *wqe;
290                         copysz = min_t(size_t, leftlen, len);
291
292                         memcpy(*wqe, addr, copysz);
293                         len -= copysz;
294                         addr += copysz;
295                         *wqe += copysz;
296                         offset += copysz;
297                 }
298         }
299
300         seg->byte_count = cpu_to_be32(inl | MLX5_INLINE_SEG);
301
302         *wqe_sz +=  ALIGN(inl + sizeof(seg->byte_count), 16) / 16;
303
304         return 0;
305 }
306
307 static u16 prot_field_size(enum ib_signature_type type)
308 {
309         switch (type) {
310         case IB_SIG_TYPE_T10_DIF:
311                 return MLX5_DIF_SIZE;
312         default:
313                 return 0;
314         }
315 }
316
317 static u8 bs_selector(int block_size)
318 {
319         switch (block_size) {
320         case 512:           return 0x1;
321         case 520:           return 0x2;
322         case 4096:          return 0x3;
323         case 4160:          return 0x4;
324         case 1073741824:    return 0x5;
325         default:            return 0;
326         }
327 }
328
329 static void mlx5_fill_inl_bsf(struct ib_sig_domain *domain,
330                               struct mlx5_bsf_inl *inl)
331 {
332         /* Valid inline section and allow BSF refresh */
333         inl->vld_refresh = cpu_to_be16(MLX5_BSF_INL_VALID |
334                                        MLX5_BSF_REFRESH_DIF);
335         inl->dif_apptag = cpu_to_be16(domain->sig.dif.app_tag);
336         inl->dif_reftag = cpu_to_be32(domain->sig.dif.ref_tag);
337         /* repeating block */
338         inl->rp_inv_seed = MLX5_BSF_REPEAT_BLOCK;
339         inl->sig_type = domain->sig.dif.bg_type == IB_T10DIF_CRC ?
340                         MLX5_DIF_CRC : MLX5_DIF_IPCS;
341
342         if (domain->sig.dif.ref_remap)
343                 inl->dif_inc_ref_guard_check |= MLX5_BSF_INC_REFTAG;
344
345         if (domain->sig.dif.app_escape) {
346                 if (domain->sig.dif.ref_escape)
347                         inl->dif_inc_ref_guard_check |= MLX5_BSF_APPREF_ESCAPE;
348                 else
349                         inl->dif_inc_ref_guard_check |= MLX5_BSF_APPTAG_ESCAPE;
350         }
351
352         inl->dif_app_bitmask_check =
353                 cpu_to_be16(domain->sig.dif.apptag_check_mask);
354 }
355
356 static int mlx5_set_bsf(struct ib_mr *sig_mr,
357                         struct ib_sig_attrs *sig_attrs,
358                         struct mlx5_bsf *bsf, u32 data_size)
359 {
360         struct mlx5_core_sig_ctx *msig = to_mmr(sig_mr)->sig;
361         struct mlx5_bsf_basic *basic = &bsf->basic;
362         struct ib_sig_domain *mem = &sig_attrs->mem;
363         struct ib_sig_domain *wire = &sig_attrs->wire;
364
365         memset(bsf, 0, sizeof(*bsf));
366
367         /* Basic + Extended + Inline */
368         basic->bsf_size_sbs = 1 << 7;
369         /* Input domain check byte mask */
370         basic->check_byte_mask = sig_attrs->check_mask;
371         basic->raw_data_size = cpu_to_be32(data_size);
372
373         /* Memory domain */
374         switch (sig_attrs->mem.sig_type) {
375         case IB_SIG_TYPE_NONE:
376                 break;
377         case IB_SIG_TYPE_T10_DIF:
378                 basic->mem.bs_selector = bs_selector(mem->sig.dif.pi_interval);
379                 basic->m_bfs_psv = cpu_to_be32(msig->psv_memory.psv_idx);
380                 mlx5_fill_inl_bsf(mem, &bsf->m_inl);
381                 break;
382         default:
383                 return -EINVAL;
384         }
385
386         /* Wire domain */
387         switch (sig_attrs->wire.sig_type) {
388         case IB_SIG_TYPE_NONE:
389                 break;
390         case IB_SIG_TYPE_T10_DIF:
391                 if (mem->sig.dif.pi_interval == wire->sig.dif.pi_interval &&
392                     mem->sig_type == wire->sig_type) {
393                         /* Same block structure */
394                         basic->bsf_size_sbs |= 1 << 4;
395                         if (mem->sig.dif.bg_type == wire->sig.dif.bg_type)
396                                 basic->wire.copy_byte_mask |= MLX5_CPY_GRD_MASK;
397                         if (mem->sig.dif.app_tag == wire->sig.dif.app_tag)
398                                 basic->wire.copy_byte_mask |= MLX5_CPY_APP_MASK;
399                         if (mem->sig.dif.ref_tag == wire->sig.dif.ref_tag)
400                                 basic->wire.copy_byte_mask |= MLX5_CPY_REF_MASK;
401                 } else
402                         basic->wire.bs_selector =
403                                 bs_selector(wire->sig.dif.pi_interval);
404
405                 basic->w_bfs_psv = cpu_to_be32(msig->psv_wire.psv_idx);
406                 mlx5_fill_inl_bsf(wire, &bsf->w_inl);
407                 break;
408         default:
409                 return -EINVAL;
410         }
411
412         return 0;
413 }
414
415
416 static int set_sig_data_segment(const struct ib_send_wr *send_wr,
417                                 struct ib_mr *sig_mr,
418                                 struct ib_sig_attrs *sig_attrs,
419                                 struct mlx5_ib_qp *qp, void **seg, int *size,
420                                 void **cur_edge)
421 {
422         struct mlx5_bsf *bsf;
423         u32 data_len;
424         u32 data_key;
425         u64 data_va;
426         u32 prot_len = 0;
427         u32 prot_key = 0;
428         u64 prot_va = 0;
429         bool prot = false;
430         int ret;
431         int wqe_size;
432         struct mlx5_ib_mr *mr = to_mmr(sig_mr);
433         struct mlx5_ib_mr *pi_mr = mr->pi_mr;
434
435         data_len = pi_mr->data_length;
436         data_key = pi_mr->ibmr.lkey;
437         data_va = pi_mr->data_iova;
438         if (pi_mr->meta_ndescs) {
439                 prot_len = pi_mr->meta_length;
440                 prot_key = pi_mr->ibmr.lkey;
441                 prot_va = pi_mr->pi_iova;
442                 prot = true;
443         }
444
445         if (!prot || (data_key == prot_key && data_va == prot_va &&
446                       data_len == prot_len)) {
447                 /**
448                  * Source domain doesn't contain signature information
449                  * or data and protection are interleaved in memory.
450                  * So need construct:
451                  *                  ------------------
452                  *                 |     data_klm     |
453                  *                  ------------------
454                  *                 |       BSF        |
455                  *                  ------------------
456                  **/
457                 struct mlx5_klm *data_klm = *seg;
458
459                 data_klm->bcount = cpu_to_be32(data_len);
460                 data_klm->key = cpu_to_be32(data_key);
461                 data_klm->va = cpu_to_be64(data_va);
462                 wqe_size = ALIGN(sizeof(*data_klm), 64);
463         } else {
464                 /**
465                  * Source domain contains signature information
466                  * So need construct a strided block format:
467                  *               ---------------------------
468                  *              |     stride_block_ctrl     |
469                  *               ---------------------------
470                  *              |          data_klm         |
471                  *               ---------------------------
472                  *              |          prot_klm         |
473                  *               ---------------------------
474                  *              |             BSF           |
475                  *               ---------------------------
476                  **/
477                 struct mlx5_stride_block_ctrl_seg *sblock_ctrl;
478                 struct mlx5_stride_block_entry *data_sentry;
479                 struct mlx5_stride_block_entry *prot_sentry;
480                 u16 block_size = sig_attrs->mem.sig.dif.pi_interval;
481                 int prot_size;
482
483                 sblock_ctrl = *seg;
484                 data_sentry = (void *)sblock_ctrl + sizeof(*sblock_ctrl);
485                 prot_sentry = (void *)data_sentry + sizeof(*data_sentry);
486
487                 prot_size = prot_field_size(sig_attrs->mem.sig_type);
488                 if (!prot_size) {
489                         pr_err("Bad block size given: %u\n", block_size);
490                         return -EINVAL;
491                 }
492                 sblock_ctrl->bcount_per_cycle = cpu_to_be32(block_size +
493                                                             prot_size);
494                 sblock_ctrl->op = cpu_to_be32(MLX5_STRIDE_BLOCK_OP);
495                 sblock_ctrl->repeat_count = cpu_to_be32(data_len / block_size);
496                 sblock_ctrl->num_entries = cpu_to_be16(2);
497
498                 data_sentry->bcount = cpu_to_be16(block_size);
499                 data_sentry->key = cpu_to_be32(data_key);
500                 data_sentry->va = cpu_to_be64(data_va);
501                 data_sentry->stride = cpu_to_be16(block_size);
502
503                 prot_sentry->bcount = cpu_to_be16(prot_size);
504                 prot_sentry->key = cpu_to_be32(prot_key);
505                 prot_sentry->va = cpu_to_be64(prot_va);
506                 prot_sentry->stride = cpu_to_be16(prot_size);
507
508                 wqe_size = ALIGN(sizeof(*sblock_ctrl) + sizeof(*data_sentry) +
509                                  sizeof(*prot_sentry), 64);
510         }
511
512         *seg += wqe_size;
513         *size += wqe_size / 16;
514         handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
515
516         bsf = *seg;
517         ret = mlx5_set_bsf(sig_mr, sig_attrs, bsf, data_len);
518         if (ret)
519                 return -EINVAL;
520
521         *seg += sizeof(*bsf);
522         *size += sizeof(*bsf) / 16;
523         handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
524
525         return 0;
526 }
527
528 static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg,
529                                  struct ib_mr *sig_mr, int access_flags,
530                                  u32 size, u32 length, u32 pdn)
531 {
532         u32 sig_key = sig_mr->rkey;
533         u8 sigerr = to_mmr(sig_mr)->sig->sigerr_count & 1;
534
535         memset(seg, 0, sizeof(*seg));
536
537         seg->flags = get_umr_flags(access_flags) | MLX5_MKC_ACCESS_MODE_KLMS;
538         seg->qpn_mkey7_0 = cpu_to_be32((sig_key & 0xff) | 0xffffff00);
539         seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL | sigerr << 26 |
540                                     MLX5_MKEY_BSF_EN | pdn);
541         seg->len = cpu_to_be64(length);
542         seg->xlt_oct_size = cpu_to_be32(mlx5r_umr_get_xlt_octo(size));
543         seg->bsfs_octo_size = cpu_to_be32(MLX5_MKEY_BSF_OCTO_SIZE);
544 }
545
546 static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
547                                 u32 size)
548 {
549         memset(umr, 0, sizeof(*umr));
550
551         umr->flags = MLX5_FLAGS_INLINE | MLX5_FLAGS_CHECK_FREE;
552         umr->xlt_octowords = cpu_to_be16(mlx5r_umr_get_xlt_octo(size));
553         umr->bsf_octowords = cpu_to_be16(MLX5_MKEY_BSF_OCTO_SIZE);
554         umr->mkey_mask = sig_mkey_mask();
555 }
556
557 static int set_pi_umr_wr(const struct ib_send_wr *send_wr,
558                          struct mlx5_ib_qp *qp, void **seg, int *size,
559                          void **cur_edge)
560 {
561         const struct ib_reg_wr *wr = reg_wr(send_wr);
562         struct mlx5_ib_mr *sig_mr = to_mmr(wr->mr);
563         struct mlx5_ib_mr *pi_mr = sig_mr->pi_mr;
564         struct ib_sig_attrs *sig_attrs = sig_mr->ibmr.sig_attrs;
565         u32 pdn = to_mpd(qp->ibqp.pd)->pdn;
566         u32 xlt_size;
567         int region_len, ret;
568
569         if (unlikely(send_wr->num_sge != 0) ||
570             unlikely(wr->access & IB_ACCESS_REMOTE_ATOMIC) ||
571             unlikely(!sig_mr->sig) || unlikely(!qp->ibqp.integrity_en) ||
572             unlikely(!sig_mr->sig->sig_status_checked))
573                 return -EINVAL;
574
575         /* length of the protected region, data + protection */
576         region_len = pi_mr->ibmr.length;
577
578         /**
579          * KLM octoword size - if protection was provided
580          * then we use strided block format (3 octowords),
581          * else we use single KLM (1 octoword)
582          **/
583         if (sig_attrs->mem.sig_type != IB_SIG_TYPE_NONE)
584                 xlt_size = 0x30;
585         else
586                 xlt_size = sizeof(struct mlx5_klm);
587
588         set_sig_umr_segment(*seg, xlt_size);
589         *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
590         *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
591         handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
592
593         set_sig_mkey_segment(*seg, wr->mr, wr->access, xlt_size, region_len,
594                              pdn);
595         *seg += sizeof(struct mlx5_mkey_seg);
596         *size += sizeof(struct mlx5_mkey_seg) / 16;
597         handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
598
599         ret = set_sig_data_segment(send_wr, wr->mr, sig_attrs, qp, seg, size,
600                                    cur_edge);
601         if (ret)
602                 return ret;
603
604         sig_mr->sig->sig_status_checked = false;
605         return 0;
606 }
607
608 static int set_psv_wr(struct ib_sig_domain *domain,
609                       u32 psv_idx, void **seg, int *size)
610 {
611         struct mlx5_seg_set_psv *psv_seg = *seg;
612
613         memset(psv_seg, 0, sizeof(*psv_seg));
614         psv_seg->psv_num = cpu_to_be32(psv_idx);
615         switch (domain->sig_type) {
616         case IB_SIG_TYPE_NONE:
617                 break;
618         case IB_SIG_TYPE_T10_DIF:
619                 psv_seg->transient_sig = cpu_to_be32(domain->sig.dif.bg << 16 |
620                                                      domain->sig.dif.app_tag);
621                 psv_seg->ref_tag = cpu_to_be32(domain->sig.dif.ref_tag);
622                 break;
623         default:
624                 pr_err("Bad signature type (%d) is given.\n",
625                        domain->sig_type);
626                 return -EINVAL;
627         }
628
629         *seg += sizeof(*psv_seg);
630         *size += sizeof(*psv_seg) / 16;
631
632         return 0;
633 }
634
635 static int set_reg_wr(struct mlx5_ib_qp *qp,
636                       const struct ib_reg_wr *wr,
637                       void **seg, int *size, void **cur_edge,
638                       bool check_not_free)
639 {
640         struct mlx5_ib_mr *mr = to_mmr(wr->mr);
641         struct mlx5_ib_pd *pd = to_mpd(qp->ibqp.pd);
642         struct mlx5_ib_dev *dev = to_mdev(pd->ibpd.device);
643         int mr_list_size = (mr->mmkey.ndescs + mr->meta_ndescs) * mr->desc_size;
644         bool umr_inline = mr_list_size <= MLX5_IB_SQ_UMR_INLINE_THRESHOLD;
645         bool atomic = wr->access & IB_ACCESS_REMOTE_ATOMIC;
646         u8 flags = 0;
647
648         /* Matches access in mlx5_set_umr_free_mkey().
649          * Relaxed Ordering is set implicitly in mlx5_set_umr_free_mkey() and
650          * kernel ULPs are not aware of it, so we don't set it here.
651          */
652         if (!mlx5r_umr_can_reconfig(dev, 0, wr->access)) {
653                 mlx5_ib_warn(
654                         to_mdev(qp->ibqp.device),
655                         "Fast update for MR access flags is not possible\n");
656                 return -EINVAL;
657         }
658
659         if (unlikely(wr->wr.send_flags & IB_SEND_INLINE)) {
660                 mlx5_ib_warn(to_mdev(qp->ibqp.device),
661                              "Invalid IB_SEND_INLINE send flag\n");
662                 return -EINVAL;
663         }
664
665         if (check_not_free)
666                 flags |= MLX5_UMR_CHECK_NOT_FREE;
667         if (umr_inline)
668                 flags |= MLX5_UMR_INLINE;
669
670         set_reg_umr_seg(*seg, mr, flags, atomic);
671         *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
672         *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
673         handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
674
675         set_reg_mkey_seg(*seg, mr, wr->key, wr->access);
676         *seg += sizeof(struct mlx5_mkey_seg);
677         *size += sizeof(struct mlx5_mkey_seg) / 16;
678         handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
679
680         if (umr_inline) {
681                 mlx5r_memcpy_send_wqe(&qp->sq, cur_edge, seg, size, mr->descs,
682                                       mr_list_size);
683                 *size = ALIGN(*size, MLX5_SEND_WQE_BB >> 4);
684         } else {
685                 set_reg_data_seg(*seg, mr, pd);
686                 *seg += sizeof(struct mlx5_wqe_data_seg);
687                 *size += (sizeof(struct mlx5_wqe_data_seg) / 16);
688         }
689         return 0;
690 }
691
692 static void set_linv_wr(struct mlx5_ib_qp *qp, void **seg, int *size,
693                         void **cur_edge)
694 {
695         set_linv_umr_seg(*seg);
696         *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
697         *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
698         handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
699         set_linv_mkey_seg(*seg);
700         *seg += sizeof(struct mlx5_mkey_seg);
701         *size += sizeof(struct mlx5_mkey_seg) / 16;
702         handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
703 }
704
705 static void dump_wqe(struct mlx5_ib_qp *qp, u32 idx, int size_16)
706 {
707         __be32 *p = NULL;
708         int i, j;
709
710         pr_debug("dump WQE index %u:\n", idx);
711         for (i = 0, j = 0; i < size_16 * 4; i += 4, j += 4) {
712                 if ((i & 0xf) == 0) {
713                         p = mlx5_frag_buf_get_wqe(&qp->sq.fbc, idx);
714                         pr_debug("WQBB at %p:\n", (void *)p);
715                         j = 0;
716                         idx = (idx + 1) & (qp->sq.wqe_cnt - 1);
717                 }
718                 pr_debug("%08x %08x %08x %08x\n", be32_to_cpu(p[j]),
719                          be32_to_cpu(p[j + 1]), be32_to_cpu(p[j + 2]),
720                          be32_to_cpu(p[j + 3]));
721         }
722 }
723
724 int mlx5r_begin_wqe(struct mlx5_ib_qp *qp, void **seg,
725                     struct mlx5_wqe_ctrl_seg **ctrl, unsigned int *idx,
726                     int *size, void **cur_edge, int nreq, __be32 general_id,
727                     bool send_signaled, bool solicited)
728 {
729         if (unlikely(mlx5r_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)))
730                 return -ENOMEM;
731
732         *idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1);
733         *seg = mlx5_frag_buf_get_wqe(&qp->sq.fbc, *idx);
734         *ctrl = *seg;
735         *(uint32_t *)(*seg + 8) = 0;
736         (*ctrl)->general_id = general_id;
737         (*ctrl)->fm_ce_se = qp->sq_signal_bits |
738                             (send_signaled ? MLX5_WQE_CTRL_CQ_UPDATE : 0) |
739                             (solicited ? MLX5_WQE_CTRL_SOLICITED : 0);
740
741         *seg += sizeof(**ctrl);
742         *size = sizeof(**ctrl) / 16;
743         *cur_edge = qp->sq.cur_edge;
744
745         return 0;
746 }
747
748 static int begin_wqe(struct mlx5_ib_qp *qp, void **seg,
749                      struct mlx5_wqe_ctrl_seg **ctrl,
750                      const struct ib_send_wr *wr, unsigned int *idx, int *size,
751                      void **cur_edge, int nreq)
752 {
753         return mlx5r_begin_wqe(qp, seg, ctrl, idx, size, cur_edge, nreq,
754                                send_ieth(wr), wr->send_flags & IB_SEND_SIGNALED,
755                                wr->send_flags & IB_SEND_SOLICITED);
756 }
757
758 void mlx5r_finish_wqe(struct mlx5_ib_qp *qp, struct mlx5_wqe_ctrl_seg *ctrl,
759                       void *seg, u8 size, void *cur_edge, unsigned int idx,
760                       u64 wr_id, int nreq, u8 fence, u32 mlx5_opcode)
761 {
762         u8 opmod = 0;
763
764         ctrl->opmod_idx_opcode = cpu_to_be32(((u32)(qp->sq.cur_post) << 8) |
765                                              mlx5_opcode | ((u32)opmod << 24));
766         ctrl->qpn_ds = cpu_to_be32(size | (qp->trans_qp.base.mqp.qpn << 8));
767         ctrl->fm_ce_se |= fence;
768         if (unlikely(qp->flags_en & MLX5_QP_FLAG_SIGNATURE))
769                 ctrl->signature = wq_sig(ctrl);
770
771         qp->sq.wrid[idx] = wr_id;
772         qp->sq.w_list[idx].opcode = mlx5_opcode;
773         qp->sq.wqe_head[idx] = qp->sq.head + nreq;
774         qp->sq.cur_post += DIV_ROUND_UP(size * 16, MLX5_SEND_WQE_BB);
775         qp->sq.w_list[idx].next = qp->sq.cur_post;
776
777         /* We save the edge which was possibly updated during the WQE
778          * construction, into SQ's cache.
779          */
780         seg = PTR_ALIGN(seg, MLX5_SEND_WQE_BB);
781         qp->sq.cur_edge = (unlikely(seg == cur_edge)) ?
782                           get_sq_edge(&qp->sq, qp->sq.cur_post &
783                                       (qp->sq.wqe_cnt - 1)) :
784                           cur_edge;
785 }
786
787 static void handle_rdma_op(const struct ib_send_wr *wr, void **seg, int *size)
788 {
789         set_raddr_seg(*seg, rdma_wr(wr)->remote_addr, rdma_wr(wr)->rkey);
790         *seg += sizeof(struct mlx5_wqe_raddr_seg);
791         *size += sizeof(struct mlx5_wqe_raddr_seg) / 16;
792 }
793
794 static void handle_local_inv(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr,
795                              struct mlx5_wqe_ctrl_seg **ctrl, void **seg,
796                              int *size, void **cur_edge, unsigned int idx)
797 {
798         qp->sq.wr_data[idx] = IB_WR_LOCAL_INV;
799         (*ctrl)->imm = cpu_to_be32(wr->ex.invalidate_rkey);
800         set_linv_wr(qp, seg, size, cur_edge);
801 }
802
803 static int handle_reg_mr(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr,
804                          struct mlx5_wqe_ctrl_seg **ctrl, void **seg, int *size,
805                          void **cur_edge, unsigned int idx)
806 {
807         qp->sq.wr_data[idx] = IB_WR_REG_MR;
808         (*ctrl)->imm = cpu_to_be32(reg_wr(wr)->key);
809         return set_reg_wr(qp, reg_wr(wr), seg, size, cur_edge, true);
810 }
811
812 static int handle_psv(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
813                       const struct ib_send_wr *wr,
814                       struct mlx5_wqe_ctrl_seg **ctrl, void **seg, int *size,
815                       void **cur_edge, unsigned int *idx, int nreq,
816                       struct ib_sig_domain *domain, u32 psv_index,
817                       u8 next_fence)
818 {
819         int err;
820
821         /*
822          * SET_PSV WQEs are not signaled and solicited on error.
823          */
824         err = mlx5r_begin_wqe(qp, seg, ctrl, idx, size, cur_edge, nreq,
825                               send_ieth(wr), false, true);
826         if (unlikely(err)) {
827                 mlx5_ib_warn(dev, "\n");
828                 err = -ENOMEM;
829                 goto out;
830         }
831         err = set_psv_wr(domain, psv_index, seg, size);
832         if (unlikely(err)) {
833                 mlx5_ib_warn(dev, "\n");
834                 goto out;
835         }
836         mlx5r_finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx, wr->wr_id,
837                          nreq, next_fence, MLX5_OPCODE_SET_PSV);
838
839 out:
840         return err;
841 }
842
843 static int handle_reg_mr_integrity(struct mlx5_ib_dev *dev,
844                                    struct mlx5_ib_qp *qp,
845                                    const struct ib_send_wr *wr,
846                                    struct mlx5_wqe_ctrl_seg **ctrl, void **seg,
847                                    int *size, void **cur_edge,
848                                    unsigned int *idx, int nreq, u8 fence,
849                                    u8 next_fence)
850 {
851         struct mlx5_ib_mr *mr;
852         struct mlx5_ib_mr *pi_mr;
853         struct mlx5_ib_mr pa_pi_mr;
854         struct ib_sig_attrs *sig_attrs;
855         struct ib_reg_wr reg_pi_wr;
856         int err;
857
858         qp->sq.wr_data[*idx] = IB_WR_REG_MR_INTEGRITY;
859
860         mr = to_mmr(reg_wr(wr)->mr);
861         pi_mr = mr->pi_mr;
862
863         if (pi_mr) {
864                 memset(&reg_pi_wr, 0,
865                        sizeof(struct ib_reg_wr));
866
867                 reg_pi_wr.mr = &pi_mr->ibmr;
868                 reg_pi_wr.access = reg_wr(wr)->access;
869                 reg_pi_wr.key = pi_mr->ibmr.rkey;
870
871                 (*ctrl)->imm = cpu_to_be32(reg_pi_wr.key);
872                 /* UMR for data + prot registration */
873                 err = set_reg_wr(qp, &reg_pi_wr, seg, size, cur_edge, false);
874                 if (unlikely(err))
875                         goto out;
876
877                 mlx5r_finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx,
878                                  wr->wr_id, nreq, fence, MLX5_OPCODE_UMR);
879
880                 err = begin_wqe(qp, seg, ctrl, wr, idx, size, cur_edge, nreq);
881                 if (unlikely(err)) {
882                         mlx5_ib_warn(dev, "\n");
883                         err = -ENOMEM;
884                         goto out;
885                 }
886         } else {
887                 memset(&pa_pi_mr, 0, sizeof(struct mlx5_ib_mr));
888                 /* No UMR, use local_dma_lkey */
889                 pa_pi_mr.ibmr.lkey = mr->ibmr.pd->local_dma_lkey;
890                 pa_pi_mr.mmkey.ndescs = mr->mmkey.ndescs;
891                 pa_pi_mr.data_length = mr->data_length;
892                 pa_pi_mr.data_iova = mr->data_iova;
893                 if (mr->meta_ndescs) {
894                         pa_pi_mr.meta_ndescs = mr->meta_ndescs;
895                         pa_pi_mr.meta_length = mr->meta_length;
896                         pa_pi_mr.pi_iova = mr->pi_iova;
897                 }
898
899                 pa_pi_mr.ibmr.length = mr->ibmr.length;
900                 mr->pi_mr = &pa_pi_mr;
901         }
902         (*ctrl)->imm = cpu_to_be32(mr->ibmr.rkey);
903         /* UMR for sig MR */
904         err = set_pi_umr_wr(wr, qp, seg, size, cur_edge);
905         if (unlikely(err)) {
906                 mlx5_ib_warn(dev, "\n");
907                 goto out;
908         }
909         mlx5r_finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx, wr->wr_id,
910                          nreq, fence, MLX5_OPCODE_UMR);
911
912         sig_attrs = mr->ibmr.sig_attrs;
913         err = handle_psv(dev, qp, wr, ctrl, seg, size, cur_edge, idx, nreq,
914                          &sig_attrs->mem, mr->sig->psv_memory.psv_idx,
915                          next_fence);
916         if (unlikely(err))
917                 goto out;
918
919         err = handle_psv(dev, qp, wr, ctrl, seg, size, cur_edge, idx, nreq,
920                          &sig_attrs->wire, mr->sig->psv_wire.psv_idx,
921                          next_fence);
922         if (unlikely(err))
923                 goto out;
924
925         qp->next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
926
927 out:
928         return err;
929 }
930
931 static int handle_qpt_rc(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
932                          const struct ib_send_wr *wr,
933                          struct mlx5_wqe_ctrl_seg **ctrl, void **seg, int *size,
934                          void **cur_edge, unsigned int *idx, int nreq, u8 fence,
935                          u8 next_fence, int *num_sge)
936 {
937         int err = 0;
938
939         switch (wr->opcode) {
940         case IB_WR_RDMA_READ:
941         case IB_WR_RDMA_WRITE:
942         case IB_WR_RDMA_WRITE_WITH_IMM:
943                 handle_rdma_op(wr, seg, size);
944                 break;
945
946         case IB_WR_ATOMIC_CMP_AND_SWP:
947         case IB_WR_ATOMIC_FETCH_AND_ADD:
948         case IB_WR_MASKED_ATOMIC_CMP_AND_SWP:
949                 mlx5_ib_warn(dev, "Atomic operations are not supported yet\n");
950                 err = -EOPNOTSUPP;
951                 goto out;
952
953         case IB_WR_LOCAL_INV:
954                 handle_local_inv(qp, wr, ctrl, seg, size, cur_edge, *idx);
955                 *num_sge = 0;
956                 break;
957
958         case IB_WR_REG_MR:
959                 err = handle_reg_mr(qp, wr, ctrl, seg, size, cur_edge, *idx);
960                 if (unlikely(err))
961                         goto out;
962                 *num_sge = 0;
963                 break;
964
965         case IB_WR_REG_MR_INTEGRITY:
966                 err = handle_reg_mr_integrity(dev, qp, wr, ctrl, seg, size,
967                                               cur_edge, idx, nreq, fence,
968                                               next_fence);
969                 if (unlikely(err))
970                         goto out;
971                 *num_sge = 0;
972                 break;
973
974         default:
975                 break;
976         }
977
978 out:
979         return err;
980 }
981
982 static void handle_qpt_uc(const struct ib_send_wr *wr, void **seg, int *size)
983 {
984         switch (wr->opcode) {
985         case IB_WR_RDMA_WRITE:
986         case IB_WR_RDMA_WRITE_WITH_IMM:
987                 handle_rdma_op(wr, seg, size);
988                 break;
989         default:
990                 break;
991         }
992 }
993
994 static void handle_qpt_hw_gsi(struct mlx5_ib_qp *qp,
995                               const struct ib_send_wr *wr, void **seg,
996                               int *size, void **cur_edge)
997 {
998         set_datagram_seg(*seg, wr);
999         *seg += sizeof(struct mlx5_wqe_datagram_seg);
1000         *size += sizeof(struct mlx5_wqe_datagram_seg) / 16;
1001         handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
1002 }
1003
1004 static void handle_qpt_ud(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr,
1005                           void **seg, int *size, void **cur_edge)
1006 {
1007         set_datagram_seg(*seg, wr);
1008         *seg += sizeof(struct mlx5_wqe_datagram_seg);
1009         *size += sizeof(struct mlx5_wqe_datagram_seg) / 16;
1010         handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
1011
1012         /* handle qp that supports ud offload */
1013         if (qp->flags & IB_QP_CREATE_IPOIB_UD_LSO) {
1014                 struct mlx5_wqe_eth_pad *pad;
1015
1016                 pad = *seg;
1017                 memset(pad, 0, sizeof(struct mlx5_wqe_eth_pad));
1018                 *seg += sizeof(struct mlx5_wqe_eth_pad);
1019                 *size += sizeof(struct mlx5_wqe_eth_pad) / 16;
1020                 set_eth_seg(wr, qp, seg, size, cur_edge);
1021                 handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
1022         }
1023 }
1024
1025 void mlx5r_ring_db(struct mlx5_ib_qp *qp, unsigned int nreq,
1026                    struct mlx5_wqe_ctrl_seg *ctrl)
1027 {
1028         struct mlx5_bf *bf = &qp->bf;
1029
1030         qp->sq.head += nreq;
1031
1032         /* Make sure that descriptors are written before
1033          * updating doorbell record and ringing the doorbell
1034          */
1035         wmb();
1036
1037         qp->db.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post);
1038
1039         /* Make sure doorbell record is visible to the HCA before
1040          * we hit doorbell.
1041          */
1042         wmb();
1043
1044         mlx5_write64((__be32 *)ctrl, bf->bfreg->map + bf->offset);
1045         /* Make sure doorbells don't leak out of SQ spinlock
1046          * and reach the HCA out of order.
1047          */
1048         bf->offset ^= bf->buf_size;
1049 }
1050
1051 int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
1052                       const struct ib_send_wr **bad_wr, bool drain)
1053 {
1054         struct mlx5_wqe_ctrl_seg *ctrl = NULL;  /* compiler warning */
1055         struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
1056         struct mlx5_core_dev *mdev = dev->mdev;
1057         struct mlx5_ib_qp *qp = to_mqp(ibqp);
1058         struct mlx5_wqe_xrc_seg *xrc;
1059         void *cur_edge;
1060         int size;
1061         unsigned long flags;
1062         unsigned int idx;
1063         int err = 0;
1064         int num_sge;
1065         void *seg;
1066         int nreq;
1067         int i;
1068         u8 next_fence = 0;
1069         u8 fence;
1070
1071         if (unlikely(mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR &&
1072                      !drain)) {
1073                 *bad_wr = wr;
1074                 return -EIO;
1075         }
1076
1077         if (qp->type == IB_QPT_GSI)
1078                 return mlx5_ib_gsi_post_send(ibqp, wr, bad_wr);
1079
1080         spin_lock_irqsave(&qp->sq.lock, flags);
1081
1082         for (nreq = 0; wr; nreq++, wr = wr->next) {
1083                 if (unlikely(wr->opcode >= ARRAY_SIZE(mlx5_ib_opcode))) {
1084                         mlx5_ib_warn(dev, "\n");
1085                         err = -EINVAL;
1086                         *bad_wr = wr;
1087                         goto out;
1088                 }
1089
1090                 num_sge = wr->num_sge;
1091                 if (unlikely(num_sge > qp->sq.max_gs)) {
1092                         mlx5_ib_warn(dev, "\n");
1093                         err = -EINVAL;
1094                         *bad_wr = wr;
1095                         goto out;
1096                 }
1097
1098                 err = begin_wqe(qp, &seg, &ctrl, wr, &idx, &size, &cur_edge,
1099                                 nreq);
1100                 if (err) {
1101                         mlx5_ib_warn(dev, "\n");
1102                         err = -ENOMEM;
1103                         *bad_wr = wr;
1104                         goto out;
1105                 }
1106
1107                 if (wr->opcode == IB_WR_REG_MR ||
1108                     wr->opcode == IB_WR_REG_MR_INTEGRITY) {
1109                         fence = dev->umr_fence;
1110                         next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
1111                 } else  {
1112                         if (wr->send_flags & IB_SEND_FENCE) {
1113                                 if (qp->next_fence)
1114                                         fence = MLX5_FENCE_MODE_SMALL_AND_FENCE;
1115                                 else
1116                                         fence = MLX5_FENCE_MODE_FENCE;
1117                         } else {
1118                                 fence = qp->next_fence;
1119                         }
1120                 }
1121
1122                 switch (qp->type) {
1123                 case IB_QPT_XRC_INI:
1124                         xrc = seg;
1125                         seg += sizeof(*xrc);
1126                         size += sizeof(*xrc) / 16;
1127                         fallthrough;
1128                 case IB_QPT_RC:
1129                         err = handle_qpt_rc(dev, qp, wr, &ctrl, &seg, &size,
1130                                             &cur_edge, &idx, nreq, fence,
1131                                             next_fence, &num_sge);
1132                         if (unlikely(err)) {
1133                                 *bad_wr = wr;
1134                                 goto out;
1135                         } else if (wr->opcode == IB_WR_REG_MR_INTEGRITY) {
1136                                 goto skip_psv;
1137                         }
1138                         break;
1139
1140                 case IB_QPT_UC:
1141                         handle_qpt_uc(wr, &seg, &size);
1142                         break;
1143                 case IB_QPT_SMI:
1144                         if (unlikely(!dev->port_caps[qp->port - 1].has_smi)) {
1145                                 mlx5_ib_warn(dev, "Send SMP MADs is not allowed\n");
1146                                 err = -EPERM;
1147                                 *bad_wr = wr;
1148                                 goto out;
1149                         }
1150                         fallthrough;
1151                 case MLX5_IB_QPT_HW_GSI:
1152                         handle_qpt_hw_gsi(qp, wr, &seg, &size, &cur_edge);
1153                         break;
1154                 case IB_QPT_UD:
1155                         handle_qpt_ud(qp, wr, &seg, &size, &cur_edge);
1156                         break;
1157
1158                 default:
1159                         break;
1160                 }
1161
1162                 if (wr->send_flags & IB_SEND_INLINE && num_sge) {
1163                         err = set_data_inl_seg(qp, wr, &seg, &size, &cur_edge);
1164                         if (unlikely(err)) {
1165                                 mlx5_ib_warn(dev, "\n");
1166                                 *bad_wr = wr;
1167                                 goto out;
1168                         }
1169                 } else {
1170                         for (i = 0; i < num_sge; i++) {
1171                                 handle_post_send_edge(&qp->sq, &seg, size,
1172                                                       &cur_edge);
1173                                 if (unlikely(!wr->sg_list[i].length))
1174                                         continue;
1175
1176                                 set_data_ptr_seg(
1177                                         (struct mlx5_wqe_data_seg *)seg,
1178                                         wr->sg_list + i);
1179                                 size += sizeof(struct mlx5_wqe_data_seg) / 16;
1180                                 seg += sizeof(struct mlx5_wqe_data_seg);
1181                         }
1182                 }
1183
1184                 qp->next_fence = next_fence;
1185                 mlx5r_finish_wqe(qp, ctrl, seg, size, cur_edge, idx, wr->wr_id,
1186                                  nreq, fence, mlx5_ib_opcode[wr->opcode]);
1187 skip_psv:
1188                 if (0)
1189                         dump_wqe(qp, idx, size);
1190         }
1191
1192 out:
1193         if (likely(nreq))
1194                 mlx5r_ring_db(qp, nreq, ctrl);
1195
1196         spin_unlock_irqrestore(&qp->sq.lock, flags);
1197
1198         return err;
1199 }
1200
1201 static void set_sig_seg(struct mlx5_rwqe_sig *sig, int max_gs)
1202 {
1203          sig->signature = calc_sig(sig, (max_gs + 1) << 2);
1204 }
1205
1206 int mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
1207                       const struct ib_recv_wr **bad_wr, bool drain)
1208 {
1209         struct mlx5_ib_qp *qp = to_mqp(ibqp);
1210         struct mlx5_wqe_data_seg *scat;
1211         struct mlx5_rwqe_sig *sig;
1212         struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
1213         struct mlx5_core_dev *mdev = dev->mdev;
1214         unsigned long flags;
1215         int err = 0;
1216         int nreq;
1217         int ind;
1218         int i;
1219
1220         if (unlikely(mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR &&
1221                      !drain)) {
1222                 *bad_wr = wr;
1223                 return -EIO;
1224         }
1225
1226         if (qp->type == IB_QPT_GSI)
1227                 return mlx5_ib_gsi_post_recv(ibqp, wr, bad_wr);
1228
1229         spin_lock_irqsave(&qp->rq.lock, flags);
1230
1231         ind = qp->rq.head & (qp->rq.wqe_cnt - 1);
1232
1233         for (nreq = 0; wr; nreq++, wr = wr->next) {
1234                 if (mlx5r_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) {
1235                         err = -ENOMEM;
1236                         *bad_wr = wr;
1237                         goto out;
1238                 }
1239
1240                 if (unlikely(wr->num_sge > qp->rq.max_gs)) {
1241                         err = -EINVAL;
1242                         *bad_wr = wr;
1243                         goto out;
1244                 }
1245
1246                 scat = mlx5_frag_buf_get_wqe(&qp->rq.fbc, ind);
1247                 if (qp->flags_en & MLX5_QP_FLAG_SIGNATURE)
1248                         scat++;
1249
1250                 for (i = 0; i < wr->num_sge; i++)
1251                         set_data_ptr_seg(scat + i, wr->sg_list + i);
1252
1253                 if (i < qp->rq.max_gs) {
1254                         scat[i].byte_count = 0;
1255                         scat[i].lkey = dev->mkeys.terminate_scatter_list_mkey;
1256                         scat[i].addr       = 0;
1257                 }
1258
1259                 if (qp->flags_en & MLX5_QP_FLAG_SIGNATURE) {
1260                         sig = (struct mlx5_rwqe_sig *)scat;
1261                         set_sig_seg(sig, qp->rq.max_gs);
1262                 }
1263
1264                 qp->rq.wrid[ind] = wr->wr_id;
1265
1266                 ind = (ind + 1) & (qp->rq.wqe_cnt - 1);
1267         }
1268
1269 out:
1270         if (likely(nreq)) {
1271                 qp->rq.head += nreq;
1272
1273                 /* Make sure that descriptors are written before
1274                  * doorbell record.
1275                  */
1276                 wmb();
1277
1278                 *qp->db.db = cpu_to_be32(qp->rq.head & 0xffff);
1279         }
1280
1281         spin_unlock_irqrestore(&qp->rq.lock, flags);
1282
1283         return err;
1284 }