Merge tag 'rtc-5.3' of git://git.kernel.org/pub/scm/linux/kernel/git/abelloni/linux
[linux-2.6-block.git] / drivers / infiniband / sw / siw / siw_qp_rx.c
1 // SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
2
3 /* Authors: Bernard Metzler <bmt@zurich.ibm.com> */
4 /* Copyright (c) 2008-2019, IBM Corporation */
5
6 #include <linux/errno.h>
7 #include <linux/types.h>
8 #include <linux/net.h>
9 #include <linux/scatterlist.h>
10 #include <linux/highmem.h>
11
12 #include <rdma/iw_cm.h>
13 #include <rdma/ib_verbs.h>
14
15 #include "siw.h"
16 #include "siw_verbs.h"
17 #include "siw_mem.h"
18
19 /*
20  * siw_rx_umem()
21  *
22  * Receive data of @len into target referenced by @dest_addr.
23  *
24  * @srx:        Receive Context
25  * @umem:       siw representation of target memory
26  * @dest_addr:  user virtual address
27  * @len:        number of bytes to place
28  */
29 static int siw_rx_umem(struct siw_rx_stream *srx, struct siw_umem *umem,
30                        u64 dest_addr, int len)
31 {
32         int copied = 0;
33
34         while (len) {
35                 struct page *p;
36                 int pg_off, bytes, rv;
37                 void *dest;
38
39                 p = siw_get_upage(umem, dest_addr);
40                 if (unlikely(!p)) {
41                         pr_warn("siw: %s: [QP %u]: bogus addr: %p, %p\n",
42                                 __func__, qp_id(rx_qp(srx)),
43                                 (void *)dest_addr, (void *)umem->fp_addr);
44                         /* siw internal error */
45                         srx->skb_copied += copied;
46                         srx->skb_new -= copied;
47
48                         return -EFAULT;
49                 }
50                 pg_off = dest_addr & ~PAGE_MASK;
51                 bytes = min(len, (int)PAGE_SIZE - pg_off);
52
53                 siw_dbg_qp(rx_qp(srx), "page %p, bytes=%u\n", p, bytes);
54
55                 dest = kmap_atomic(p);
56                 rv = skb_copy_bits(srx->skb, srx->skb_offset, dest + pg_off,
57                                    bytes);
58
59                 if (unlikely(rv)) {
60                         kunmap_atomic(dest);
61                         srx->skb_copied += copied;
62                         srx->skb_new -= copied;
63
64                         pr_warn("siw: [QP %u]: %s, len %d, page %p, rv %d\n",
65                                 qp_id(rx_qp(srx)), __func__, len, p, rv);
66
67                         return -EFAULT;
68                 }
69                 if (srx->mpa_crc_hd) {
70                         if (rx_qp(srx)->kernel_verbs) {
71                                 crypto_shash_update(srx->mpa_crc_hd,
72                                         (u8 *)(dest + pg_off), bytes);
73                                 kunmap_atomic(dest);
74                         } else {
75                                 kunmap_atomic(dest);
76                                 /*
77                                  * Do CRC on original, not target buffer.
78                                  * Some user land applications may
79                                  * concurrently write the target buffer,
80                                  * which would yield a broken CRC.
81                                  * Walking the skb twice is very ineffcient.
82                                  * Folding the CRC into skb_copy_bits()
83                                  * would be much better, but is currently
84                                  * not supported.
85                                  */
86                                 siw_crc_skb(srx, bytes);
87                         }
88                 } else {
89                         kunmap_atomic(dest);
90                 }
91                 srx->skb_offset += bytes;
92                 copied += bytes;
93                 len -= bytes;
94                 dest_addr += bytes;
95                 pg_off = 0;
96         }
97         srx->skb_copied += copied;
98         srx->skb_new -= copied;
99
100         return copied;
101 }
102
103 static int siw_rx_kva(struct siw_rx_stream *srx, void *kva, int len)
104 {
105         int rv;
106
107         siw_dbg_qp(rx_qp(srx), "kva: 0x%p, len: %u\n", kva, len);
108
109         rv = skb_copy_bits(srx->skb, srx->skb_offset, kva, len);
110         if (unlikely(rv)) {
111                 pr_warn("siw: [QP %u]: %s, len %d, kva 0x%p, rv %d\n",
112                         qp_id(rx_qp(srx)), __func__, len, kva, rv);
113
114                 return rv;
115         }
116         if (srx->mpa_crc_hd)
117                 crypto_shash_update(srx->mpa_crc_hd, (u8 *)kva, len);
118
119         srx->skb_offset += len;
120         srx->skb_copied += len;
121         srx->skb_new -= len;
122
123         return len;
124 }
125
126 static int siw_rx_pbl(struct siw_rx_stream *srx, int *pbl_idx,
127                       struct siw_mem *mem, u64 addr, int len)
128 {
129         struct siw_pbl *pbl = mem->pbl;
130         u64 offset = addr - mem->va;
131         int copied = 0;
132
133         while (len) {
134                 int bytes;
135                 u64 buf_addr =
136                         siw_pbl_get_buffer(pbl, offset, &bytes, pbl_idx);
137                 if (!buf_addr)
138                         break;
139
140                 bytes = min(bytes, len);
141                 if (siw_rx_kva(srx, (void *)buf_addr, bytes) == bytes) {
142                         copied += bytes;
143                         offset += bytes;
144                         len -= bytes;
145                 } else {
146                         break;
147                 }
148         }
149         return copied;
150 }
151
152 /*
153  * siw_rresp_check_ntoh()
154  *
155  * Check incoming RRESP fragment header against expected
156  * header values and update expected values for potential next
157  * fragment.
158  *
159  * NOTE: This function must be called only if a RRESP DDP segment
160  *       starts but not for fragmented consecutive pieces of an
161  *       already started DDP segment.
162  */
163 static int siw_rresp_check_ntoh(struct siw_rx_stream *srx,
164                                 struct siw_rx_fpdu *frx)
165 {
166         struct iwarp_rdma_rresp *rresp = &srx->hdr.rresp;
167         struct siw_wqe *wqe = &frx->wqe_active;
168         enum ddp_ecode ecode;
169
170         u32 sink_stag = be32_to_cpu(rresp->sink_stag);
171         u64 sink_to = be64_to_cpu(rresp->sink_to);
172
173         if (frx->first_ddp_seg) {
174                 srx->ddp_stag = wqe->sqe.sge[0].lkey;
175                 srx->ddp_to = wqe->sqe.sge[0].laddr;
176                 frx->pbl_idx = 0;
177         }
178         /* Below checks extend beyond the semantics of DDP, and
179          * into RDMAP:
180          * We check if the read response matches exactly the
181          * read request which was send to the remote peer to
182          * trigger this read response. RFC5040/5041 do not
183          * always have a proper error code for the detected
184          * error cases. We choose 'base or bounds error' for
185          * cases where the inbound STag is valid, but offset
186          * or length do not match our response receive state.
187          */
188         if (unlikely(srx->ddp_stag != sink_stag)) {
189                 pr_warn("siw: [QP %u]: rresp stag: %08x != %08x\n",
190                         qp_id(rx_qp(srx)), sink_stag, srx->ddp_stag);
191                 ecode = DDP_ECODE_T_INVALID_STAG;
192                 goto error;
193         }
194         if (unlikely(srx->ddp_to != sink_to)) {
195                 pr_warn("siw: [QP %u]: rresp off: %016llx != %016llx\n",
196                         qp_id(rx_qp(srx)), (unsigned long long)sink_to,
197                         (unsigned long long)srx->ddp_to);
198                 ecode = DDP_ECODE_T_BASE_BOUNDS;
199                 goto error;
200         }
201         if (unlikely(!frx->more_ddp_segs &&
202                      (wqe->processed + srx->fpdu_part_rem != wqe->bytes))) {
203                 pr_warn("siw: [QP %u]: rresp len: %d != %d\n",
204                         qp_id(rx_qp(srx)),
205                         wqe->processed + srx->fpdu_part_rem, wqe->bytes);
206                 ecode = DDP_ECODE_T_BASE_BOUNDS;
207                 goto error;
208         }
209         return 0;
210 error:
211         siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_DDP,
212                            DDP_ETYPE_TAGGED_BUF, ecode, 0);
213         return -EINVAL;
214 }
215
216 /*
217  * siw_write_check_ntoh()
218  *
219  * Check incoming WRITE fragment header against expected
220  * header values and update expected values for potential next
221  * fragment
222  *
223  * NOTE: This function must be called only if a WRITE DDP segment
224  *       starts but not for fragmented consecutive pieces of an
225  *       already started DDP segment.
226  */
227 static int siw_write_check_ntoh(struct siw_rx_stream *srx,
228                                 struct siw_rx_fpdu *frx)
229 {
230         struct iwarp_rdma_write *write = &srx->hdr.rwrite;
231         enum ddp_ecode ecode;
232
233         u32 sink_stag = be32_to_cpu(write->sink_stag);
234         u64 sink_to = be64_to_cpu(write->sink_to);
235
236         if (frx->first_ddp_seg) {
237                 srx->ddp_stag = sink_stag;
238                 srx->ddp_to = sink_to;
239                 frx->pbl_idx = 0;
240         } else {
241                 if (unlikely(srx->ddp_stag != sink_stag)) {
242                         pr_warn("siw: [QP %u]: write stag: %08x != %08x\n",
243                                 qp_id(rx_qp(srx)), sink_stag,
244                                 srx->ddp_stag);
245                         ecode = DDP_ECODE_T_INVALID_STAG;
246                         goto error;
247                 }
248                 if (unlikely(srx->ddp_to != sink_to)) {
249                         pr_warn("siw: [QP %u]: write off: %016llx != %016llx\n",
250                                 qp_id(rx_qp(srx)),
251                                 (unsigned long long)sink_to,
252                                 (unsigned long long)srx->ddp_to);
253                         ecode = DDP_ECODE_T_BASE_BOUNDS;
254                         goto error;
255                 }
256         }
257         return 0;
258 error:
259         siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_DDP,
260                            DDP_ETYPE_TAGGED_BUF, ecode, 0);
261         return -EINVAL;
262 }
263
264 /*
265  * siw_send_check_ntoh()
266  *
267  * Check incoming SEND fragment header against expected
268  * header values and update expected MSN if no next
269  * fragment expected
270  *
271  * NOTE: This function must be called only if a SEND DDP segment
272  *       starts but not for fragmented consecutive pieces of an
273  *       already started DDP segment.
274  */
275 static int siw_send_check_ntoh(struct siw_rx_stream *srx,
276                                struct siw_rx_fpdu *frx)
277 {
278         struct iwarp_send_inv *send = &srx->hdr.send_inv;
279         struct siw_wqe *wqe = &frx->wqe_active;
280         enum ddp_ecode ecode;
281
282         u32 ddp_msn = be32_to_cpu(send->ddp_msn);
283         u32 ddp_mo = be32_to_cpu(send->ddp_mo);
284         u32 ddp_qn = be32_to_cpu(send->ddp_qn);
285
286         if (unlikely(ddp_qn != RDMAP_UNTAGGED_QN_SEND)) {
287                 pr_warn("siw: [QP %u]: invalid ddp qn %d for send\n",
288                         qp_id(rx_qp(srx)), ddp_qn);
289                 ecode = DDP_ECODE_UT_INVALID_QN;
290                 goto error;
291         }
292         if (unlikely(ddp_msn != srx->ddp_msn[RDMAP_UNTAGGED_QN_SEND])) {
293                 pr_warn("siw: [QP %u]: send msn: %u != %u\n",
294                         qp_id(rx_qp(srx)), ddp_msn,
295                         srx->ddp_msn[RDMAP_UNTAGGED_QN_SEND]);
296                 ecode = DDP_ECODE_UT_INVALID_MSN_RANGE;
297                 goto error;
298         }
299         if (unlikely(ddp_mo != wqe->processed)) {
300                 pr_warn("siw: [QP %u], send mo: %u != %u\n",
301                         qp_id(rx_qp(srx)), ddp_mo, wqe->processed);
302                 ecode = DDP_ECODE_UT_INVALID_MO;
303                 goto error;
304         }
305         if (frx->first_ddp_seg) {
306                 /* initialize user memory write position */
307                 frx->sge_idx = 0;
308                 frx->sge_off = 0;
309                 frx->pbl_idx = 0;
310
311                 /* only valid for SEND_INV and SEND_SE_INV operations */
312                 srx->inval_stag = be32_to_cpu(send->inval_stag);
313         }
314         if (unlikely(wqe->bytes < wqe->processed + srx->fpdu_part_rem)) {
315                 siw_dbg_qp(rx_qp(srx), "receive space short: %d - %d < %d\n",
316                            wqe->bytes, wqe->processed, srx->fpdu_part_rem);
317                 wqe->wc_status = SIW_WC_LOC_LEN_ERR;
318                 ecode = DDP_ECODE_UT_INVALID_MSN_NOBUF;
319                 goto error;
320         }
321         return 0;
322 error:
323         siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_DDP,
324                            DDP_ETYPE_UNTAGGED_BUF, ecode, 0);
325         return -EINVAL;
326 }
327
328 static struct siw_wqe *siw_rqe_get(struct siw_qp *qp)
329 {
330         struct siw_rqe *rqe;
331         struct siw_srq *srq;
332         struct siw_wqe *wqe = NULL;
333         bool srq_event = false;
334         unsigned long flags;
335
336         srq = qp->srq;
337         if (srq) {
338                 spin_lock_irqsave(&srq->lock, flags);
339                 if (unlikely(!srq->num_rqe))
340                         goto out;
341
342                 rqe = &srq->recvq[srq->rq_get % srq->num_rqe];
343         } else {
344                 if (unlikely(!qp->recvq))
345                         goto out;
346
347                 rqe = &qp->recvq[qp->rq_get % qp->attrs.rq_size];
348         }
349         if (likely(rqe->flags == SIW_WQE_VALID)) {
350                 int num_sge = rqe->num_sge;
351
352                 if (likely(num_sge <= SIW_MAX_SGE)) {
353                         int i = 0;
354
355                         wqe = rx_wqe(&qp->rx_untagged);
356                         rx_type(wqe) = SIW_OP_RECEIVE;
357                         wqe->wr_status = SIW_WR_INPROGRESS;
358                         wqe->bytes = 0;
359                         wqe->processed = 0;
360
361                         wqe->rqe.id = rqe->id;
362                         wqe->rqe.num_sge = num_sge;
363
364                         while (i < num_sge) {
365                                 wqe->rqe.sge[i].laddr = rqe->sge[i].laddr;
366                                 wqe->rqe.sge[i].lkey = rqe->sge[i].lkey;
367                                 wqe->rqe.sge[i].length = rqe->sge[i].length;
368                                 wqe->bytes += wqe->rqe.sge[i].length;
369                                 wqe->mem[i] = NULL;
370                                 i++;
371                         }
372                         /* can be re-used by appl */
373                         smp_store_mb(rqe->flags, 0);
374                 } else {
375                         siw_dbg_qp(qp, "too many sge's: %d\n", rqe->num_sge);
376                         if (srq)
377                                 spin_unlock_irqrestore(&srq->lock, flags);
378                         return NULL;
379                 }
380                 if (!srq) {
381                         qp->rq_get++;
382                 } else {
383                         if (srq->armed) {
384                                 /* Test SRQ limit */
385                                 u32 off = (srq->rq_get + srq->limit) %
386                                           srq->num_rqe;
387                                 struct siw_rqe *rqe2 = &srq->recvq[off];
388
389                                 if (!(rqe2->flags & SIW_WQE_VALID)) {
390                                         srq->armed = 0;
391                                         srq_event = true;
392                                 }
393                         }
394                         srq->rq_get++;
395                 }
396         }
397 out:
398         if (srq) {
399                 spin_unlock_irqrestore(&srq->lock, flags);
400                 if (srq_event)
401                         siw_srq_event(srq, IB_EVENT_SRQ_LIMIT_REACHED);
402         }
403         return wqe;
404 }
405
406 /*
407  * siw_proc_send:
408  *
409  * Process one incoming SEND and place data into memory referenced by
410  * receive wqe.
411  *
412  * Function supports partially received sends (suspending/resuming
413  * current receive wqe processing)
414  *
415  * return value:
416  *      0:       reached the end of a DDP segment
417  *      -EAGAIN: to be called again to finish the DDP segment
418  */
419 int siw_proc_send(struct siw_qp *qp)
420 {
421         struct siw_rx_stream *srx = &qp->rx_stream;
422         struct siw_rx_fpdu *frx = &qp->rx_untagged;
423         struct siw_wqe *wqe;
424         u32 data_bytes; /* all data bytes available */
425         u32 rcvd_bytes; /* sum of data bytes rcvd */
426         int rv = 0;
427
428         if (frx->first_ddp_seg) {
429                 wqe = siw_rqe_get(qp);
430                 if (unlikely(!wqe)) {
431                         siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
432                                            DDP_ETYPE_UNTAGGED_BUF,
433                                            DDP_ECODE_UT_INVALID_MSN_NOBUF, 0);
434                         return -ENOENT;
435                 }
436         } else {
437                 wqe = rx_wqe(frx);
438         }
439         if (srx->state == SIW_GET_DATA_START) {
440                 rv = siw_send_check_ntoh(srx, frx);
441                 if (unlikely(rv)) {
442                         siw_qp_event(qp, IB_EVENT_QP_FATAL);
443                         return rv;
444                 }
445                 if (!srx->fpdu_part_rem) /* zero length SEND */
446                         return 0;
447         }
448         data_bytes = min(srx->fpdu_part_rem, srx->skb_new);
449         rcvd_bytes = 0;
450
451         /* A zero length SEND will skip below loop */
452         while (data_bytes) {
453                 struct ib_pd *pd;
454                 struct siw_mem **mem, *mem_p;
455                 struct siw_sge *sge;
456                 u32 sge_bytes; /* data bytes avail for SGE */
457
458                 sge = &wqe->rqe.sge[frx->sge_idx];
459
460                 if (!sge->length) {
461                         /* just skip empty sge's */
462                         frx->sge_idx++;
463                         frx->sge_off = 0;
464                         frx->pbl_idx = 0;
465                         continue;
466                 }
467                 sge_bytes = min(data_bytes, sge->length - frx->sge_off);
468                 mem = &wqe->mem[frx->sge_idx];
469
470                 /*
471                  * check with QP's PD if no SRQ present, SRQ's PD otherwise
472                  */
473                 pd = qp->srq == NULL ? qp->pd : qp->srq->base_srq.pd;
474
475                 rv = siw_check_sge(pd, sge, mem, IB_ACCESS_LOCAL_WRITE,
476                                    frx->sge_off, sge_bytes);
477                 if (unlikely(rv)) {
478                         siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
479                                            DDP_ETYPE_CATASTROPHIC,
480                                            DDP_ECODE_CATASTROPHIC, 0);
481
482                         siw_qp_event(qp, IB_EVENT_QP_ACCESS_ERR);
483                         break;
484                 }
485                 mem_p = *mem;
486                 if (mem_p->mem_obj == NULL)
487                         rv = siw_rx_kva(srx,
488                                         (void *)(sge->laddr + frx->sge_off),
489                                         sge_bytes);
490                 else if (!mem_p->is_pbl)
491                         rv = siw_rx_umem(srx, mem_p->umem,
492                                          sge->laddr + frx->sge_off, sge_bytes);
493                 else
494                         rv = siw_rx_pbl(srx, &frx->pbl_idx, mem_p,
495                                         sge->laddr + frx->sge_off, sge_bytes);
496
497                 if (unlikely(rv != sge_bytes)) {
498                         wqe->processed += rcvd_bytes;
499
500                         siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
501                                            DDP_ETYPE_CATASTROPHIC,
502                                            DDP_ECODE_CATASTROPHIC, 0);
503                         return -EINVAL;
504                 }
505                 frx->sge_off += rv;
506
507                 if (frx->sge_off == sge->length) {
508                         frx->sge_idx++;
509                         frx->sge_off = 0;
510                         frx->pbl_idx = 0;
511                 }
512                 data_bytes -= rv;
513                 rcvd_bytes += rv;
514
515                 srx->fpdu_part_rem -= rv;
516                 srx->fpdu_part_rcvd += rv;
517         }
518         wqe->processed += rcvd_bytes;
519
520         if (!srx->fpdu_part_rem)
521                 return 0;
522
523         return (rv < 0) ? rv : -EAGAIN;
524 }
525
526 /*
527  * siw_proc_write:
528  *
529  * Place incoming WRITE after referencing and checking target buffer
530
531  * Function supports partially received WRITEs (suspending/resuming
532  * current receive processing)
533  *
534  * return value:
535  *      0:       reached the end of a DDP segment
536  *      -EAGAIN: to be called again to finish the DDP segment
537  */
538 int siw_proc_write(struct siw_qp *qp)
539 {
540         struct siw_rx_stream *srx = &qp->rx_stream;
541         struct siw_rx_fpdu *frx = &qp->rx_tagged;
542         struct siw_mem *mem;
543         int bytes, rv;
544
545         if (srx->state == SIW_GET_DATA_START) {
546                 if (!srx->fpdu_part_rem) /* zero length WRITE */
547                         return 0;
548
549                 rv = siw_write_check_ntoh(srx, frx);
550                 if (unlikely(rv)) {
551                         siw_qp_event(qp, IB_EVENT_QP_FATAL);
552                         return rv;
553                 }
554         }
555         bytes = min(srx->fpdu_part_rem, srx->skb_new);
556
557         if (frx->first_ddp_seg) {
558                 struct siw_wqe *wqe = rx_wqe(frx);
559
560                 rx_mem(frx) = siw_mem_id2obj(qp->sdev, srx->ddp_stag >> 8);
561                 if (unlikely(!rx_mem(frx))) {
562                         siw_dbg_qp(qp,
563                                    "sink stag not found/invalid, stag 0x%08x\n",
564                                    srx->ddp_stag);
565
566                         siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
567                                            DDP_ETYPE_TAGGED_BUF,
568                                            DDP_ECODE_T_INVALID_STAG, 0);
569                         return -EINVAL;
570                 }
571                 wqe->rqe.num_sge = 1;
572                 rx_type(wqe) = SIW_OP_WRITE;
573                 wqe->wr_status = SIW_WR_INPROGRESS;
574         }
575         mem = rx_mem(frx);
576
577         /*
578          * Check if application re-registered memory with different
579          * key field of STag.
580          */
581         if (unlikely(mem->stag != srx->ddp_stag)) {
582                 siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
583                                    DDP_ETYPE_TAGGED_BUF,
584                                    DDP_ECODE_T_INVALID_STAG, 0);
585                 return -EINVAL;
586         }
587         rv = siw_check_mem(qp->pd, mem, srx->ddp_to + srx->fpdu_part_rcvd,
588                            IB_ACCESS_REMOTE_WRITE, bytes);
589         if (unlikely(rv)) {
590                 siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
591                                    DDP_ETYPE_TAGGED_BUF, siw_tagged_error(-rv),
592                                    0);
593
594                 siw_qp_event(qp, IB_EVENT_QP_ACCESS_ERR);
595
596                 return -EINVAL;
597         }
598
599         if (mem->mem_obj == NULL)
600                 rv = siw_rx_kva(srx,
601                                 (void *)(srx->ddp_to + srx->fpdu_part_rcvd),
602                                 bytes);
603         else if (!mem->is_pbl)
604                 rv = siw_rx_umem(srx, mem->umem,
605                                  srx->ddp_to + srx->fpdu_part_rcvd, bytes);
606         else
607                 rv = siw_rx_pbl(srx, &frx->pbl_idx, mem,
608                                 srx->ddp_to + srx->fpdu_part_rcvd, bytes);
609
610         if (unlikely(rv != bytes)) {
611                 siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
612                                    DDP_ETYPE_CATASTROPHIC,
613                                    DDP_ECODE_CATASTROPHIC, 0);
614                 return -EINVAL;
615         }
616         srx->fpdu_part_rem -= rv;
617         srx->fpdu_part_rcvd += rv;
618
619         if (!srx->fpdu_part_rem) {
620                 srx->ddp_to += srx->fpdu_part_rcvd;
621                 return 0;
622         }
623         return -EAGAIN;
624 }
625
626 /*
627  * Inbound RREQ's cannot carry user data.
628  */
629 int siw_proc_rreq(struct siw_qp *qp)
630 {
631         struct siw_rx_stream *srx = &qp->rx_stream;
632
633         if (!srx->fpdu_part_rem)
634                 return 0;
635
636         pr_warn("siw: [QP %u]: rreq with mpa len %d\n", qp_id(qp),
637                 be16_to_cpu(srx->hdr.ctrl.mpa_len));
638
639         return -EPROTO;
640 }
641
642 /*
643  * siw_init_rresp:
644  *
645  * Process inbound RDMA READ REQ. Produce a pseudo READ RESPONSE WQE.
646  * Put it at the tail of the IRQ, if there is another WQE currently in
647  * transmit processing. If not, make it the current WQE to be processed
648  * and schedule transmit processing.
649  *
650  * Can be called from softirq context and from process
651  * context (RREAD socket loopback case!)
652  *
653  * return value:
654  *      0:      success,
655  *              failure code otherwise
656  */
657
658 static int siw_init_rresp(struct siw_qp *qp, struct siw_rx_stream *srx)
659 {
660         struct siw_wqe *tx_work = tx_wqe(qp);
661         struct siw_sqe *resp;
662
663         uint64_t raddr = be64_to_cpu(srx->hdr.rreq.sink_to),
664                  laddr = be64_to_cpu(srx->hdr.rreq.source_to);
665         uint32_t length = be32_to_cpu(srx->hdr.rreq.read_size),
666                  lkey = be32_to_cpu(srx->hdr.rreq.source_stag),
667                  rkey = be32_to_cpu(srx->hdr.rreq.sink_stag),
668                  msn = be32_to_cpu(srx->hdr.rreq.ddp_msn);
669
670         int run_sq = 1, rv = 0;
671         unsigned long flags;
672
673         if (unlikely(msn != srx->ddp_msn[RDMAP_UNTAGGED_QN_RDMA_READ])) {
674                 siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
675                                    DDP_ETYPE_UNTAGGED_BUF,
676                                    DDP_ECODE_UT_INVALID_MSN_RANGE, 0);
677                 return -EPROTO;
678         }
679         spin_lock_irqsave(&qp->sq_lock, flags);
680
681         if (tx_work->wr_status == SIW_WR_IDLE) {
682                 /*
683                  * immediately schedule READ response w/o
684                  * consuming IRQ entry: IRQ must be empty.
685                  */
686                 tx_work->processed = 0;
687                 tx_work->mem[0] = NULL;
688                 tx_work->wr_status = SIW_WR_QUEUED;
689                 resp = &tx_work->sqe;
690         } else {
691                 resp = irq_alloc_free(qp);
692                 run_sq = 0;
693         }
694         if (likely(resp)) {
695                 resp->opcode = SIW_OP_READ_RESPONSE;
696
697                 resp->sge[0].length = length;
698                 resp->sge[0].laddr = laddr;
699                 resp->sge[0].lkey = lkey;
700
701                 /* Keep aside message sequence number for potential
702                  * error reporting during Read Response generation.
703                  */
704                 resp->sge[1].length = msn;
705
706                 resp->raddr = raddr;
707                 resp->rkey = rkey;
708                 resp->num_sge = length ? 1 : 0;
709
710                 /* RRESP now valid as current TX wqe or placed into IRQ */
711                 smp_store_mb(resp->flags, SIW_WQE_VALID);
712         } else {
713                 pr_warn("siw: [QP %u]: irq %d exceeded %d\n", qp_id(qp),
714                         qp->irq_put % qp->attrs.irq_size, qp->attrs.irq_size);
715
716                 siw_init_terminate(qp, TERM_ERROR_LAYER_RDMAP,
717                                    RDMAP_ETYPE_REMOTE_OPERATION,
718                                    RDMAP_ECODE_CATASTROPHIC_STREAM, 0);
719                 rv = -EPROTO;
720         }
721
722         spin_unlock_irqrestore(&qp->sq_lock, flags);
723
724         if (run_sq)
725                 rv = siw_sq_start(qp);
726
727         return rv;
728 }
729
730 /*
731  * Only called at start of Read.Resonse processing.
732  * Transfer pending Read from tip of ORQ into currrent rx wqe,
733  * but keep ORQ entry valid until Read.Response processing done.
734  * No Queue locking needed.
735  */
736 static int siw_orqe_start_rx(struct siw_qp *qp)
737 {
738         struct siw_sqe *orqe;
739         struct siw_wqe *wqe = NULL;
740
741         /* make sure ORQ indices are current */
742         smp_mb();
743
744         orqe = orq_get_current(qp);
745         if (READ_ONCE(orqe->flags) & SIW_WQE_VALID) {
746                 /* RRESP is a TAGGED RDMAP operation */
747                 wqe = rx_wqe(&qp->rx_tagged);
748                 wqe->sqe.id = orqe->id;
749                 wqe->sqe.opcode = orqe->opcode;
750                 wqe->sqe.sge[0].laddr = orqe->sge[0].laddr;
751                 wqe->sqe.sge[0].lkey = orqe->sge[0].lkey;
752                 wqe->sqe.sge[0].length = orqe->sge[0].length;
753                 wqe->sqe.flags = orqe->flags;
754                 wqe->sqe.num_sge = 1;
755                 wqe->bytes = orqe->sge[0].length;
756                 wqe->processed = 0;
757                 wqe->mem[0] = NULL;
758                 /* make sure WQE is completely written before valid */
759                 smp_wmb();
760                 wqe->wr_status = SIW_WR_INPROGRESS;
761
762                 return 0;
763         }
764         return -EPROTO;
765 }
766
767 /*
768  * siw_proc_rresp:
769  *
770  * Place incoming RRESP data into memory referenced by RREQ WQE
771  * which is at the tip of the ORQ
772  *
773  * Function supports partially received RRESP's (suspending/resuming
774  * current receive processing)
775  */
776 int siw_proc_rresp(struct siw_qp *qp)
777 {
778         struct siw_rx_stream *srx = &qp->rx_stream;
779         struct siw_rx_fpdu *frx = &qp->rx_tagged;
780         struct siw_wqe *wqe = rx_wqe(frx);
781         struct siw_mem **mem, *mem_p;
782         struct siw_sge *sge;
783         int bytes, rv;
784
785         if (frx->first_ddp_seg) {
786                 if (unlikely(wqe->wr_status != SIW_WR_IDLE)) {
787                         pr_warn("siw: [QP %u]: proc RRESP: status %d, op %d\n",
788                                 qp_id(qp), wqe->wr_status, wqe->sqe.opcode);
789                         rv = -EPROTO;
790                         goto error_term;
791                 }
792                 /*
793                  * fetch pending RREQ from orq
794                  */
795                 rv = siw_orqe_start_rx(qp);
796                 if (rv) {
797                         pr_warn("siw: [QP %u]: ORQ empty at idx %d\n",
798                                 qp_id(qp), qp->orq_get % qp->attrs.orq_size);
799                         goto error_term;
800                 }
801                 rv = siw_rresp_check_ntoh(srx, frx);
802                 if (unlikely(rv)) {
803                         siw_qp_event(qp, IB_EVENT_QP_FATAL);
804                         return rv;
805                 }
806         } else {
807                 if (unlikely(wqe->wr_status != SIW_WR_INPROGRESS)) {
808                         pr_warn("siw: [QP %u]: resume RRESP: status %d\n",
809                                 qp_id(qp), wqe->wr_status);
810                         rv = -EPROTO;
811                         goto error_term;
812                 }
813         }
814         if (!srx->fpdu_part_rem) /* zero length RRESPONSE */
815                 return 0;
816
817         sge = wqe->sqe.sge; /* there is only one */
818         mem = &wqe->mem[0];
819
820         if (!(*mem)) {
821                 /*
822                  * check target memory which resolves memory on first fragment
823                  */
824                 rv = siw_check_sge(qp->pd, sge, mem, IB_ACCESS_LOCAL_WRITE, 0,
825                                    wqe->bytes);
826                 if (unlikely(rv)) {
827                         siw_dbg_qp(qp, "target mem check: %d\n", rv);
828                         wqe->wc_status = SIW_WC_LOC_PROT_ERR;
829
830                         siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
831                                            DDP_ETYPE_TAGGED_BUF,
832                                            siw_tagged_error(-rv), 0);
833
834                         siw_qp_event(qp, IB_EVENT_QP_ACCESS_ERR);
835
836                         return -EINVAL;
837                 }
838         }
839         mem_p = *mem;
840
841         bytes = min(srx->fpdu_part_rem, srx->skb_new);
842
843         if (mem_p->mem_obj == NULL)
844                 rv = siw_rx_kva(srx, (void *)(sge->laddr + wqe->processed),
845                                 bytes);
846         else if (!mem_p->is_pbl)
847                 rv = siw_rx_umem(srx, mem_p->umem, sge->laddr + wqe->processed,
848                                  bytes);
849         else
850                 rv = siw_rx_pbl(srx, &frx->pbl_idx, mem_p,
851                                 sge->laddr + wqe->processed, bytes);
852         if (rv != bytes) {
853                 wqe->wc_status = SIW_WC_GENERAL_ERR;
854                 rv = -EINVAL;
855                 goto error_term;
856         }
857         srx->fpdu_part_rem -= rv;
858         srx->fpdu_part_rcvd += rv;
859         wqe->processed += rv;
860
861         if (!srx->fpdu_part_rem) {
862                 srx->ddp_to += srx->fpdu_part_rcvd;
863                 return 0;
864         }
865         return -EAGAIN;
866
867 error_term:
868         siw_init_terminate(qp, TERM_ERROR_LAYER_DDP, DDP_ETYPE_CATASTROPHIC,
869                            DDP_ECODE_CATASTROPHIC, 0);
870         return rv;
871 }
872
873 int siw_proc_terminate(struct siw_qp *qp)
874 {
875         struct siw_rx_stream *srx = &qp->rx_stream;
876         struct sk_buff *skb = srx->skb;
877         struct iwarp_terminate *term = &srx->hdr.terminate;
878         union iwarp_hdr term_info;
879         u8 *infop = (u8 *)&term_info;
880         enum rdma_opcode op;
881         u16 to_copy = sizeof(struct iwarp_ctrl);
882
883         pr_warn("siw: got TERMINATE. layer %d, type %d, code %d\n",
884                 __rdmap_term_layer(term), __rdmap_term_etype(term),
885                 __rdmap_term_ecode(term));
886
887         if (be32_to_cpu(term->ddp_qn) != RDMAP_UNTAGGED_QN_TERMINATE ||
888             be32_to_cpu(term->ddp_msn) !=
889                     qp->rx_stream.ddp_msn[RDMAP_UNTAGGED_QN_TERMINATE] ||
890             be32_to_cpu(term->ddp_mo) != 0) {
891                 pr_warn("siw: rx bogus TERM [QN x%08x, MSN x%08x, MO x%08x]\n",
892                         be32_to_cpu(term->ddp_qn), be32_to_cpu(term->ddp_msn),
893                         be32_to_cpu(term->ddp_mo));
894                 return -ECONNRESET;
895         }
896         /*
897          * Receive remaining pieces of TERM if indicated
898          */
899         if (!term->flag_m)
900                 return -ECONNRESET;
901
902         /* Do not take the effort to reassemble a network fragmented
903          * TERM message
904          */
905         if (srx->skb_new < sizeof(struct iwarp_ctrl_tagged))
906                 return -ECONNRESET;
907
908         memset(infop, 0, sizeof(term_info));
909
910         skb_copy_bits(skb, srx->skb_offset, infop, to_copy);
911
912         op = __rdmap_get_opcode(&term_info.ctrl);
913         if (op >= RDMAP_TERMINATE)
914                 goto out;
915
916         infop += to_copy;
917         srx->skb_offset += to_copy;
918         srx->skb_new -= to_copy;
919         srx->skb_copied += to_copy;
920         srx->fpdu_part_rcvd += to_copy;
921         srx->fpdu_part_rem -= to_copy;
922
923         to_copy = iwarp_pktinfo[op].hdr_len - to_copy;
924
925         /* Again, no network fragmented TERM's */
926         if (to_copy + MPA_CRC_SIZE > srx->skb_new)
927                 return -ECONNRESET;
928
929         skb_copy_bits(skb, srx->skb_offset, infop, to_copy);
930
931         if (term->flag_r) {
932                 siw_dbg_qp(qp, "TERM reports RDMAP hdr type %u, len %u (%s)\n",
933                            op, be16_to_cpu(term_info.ctrl.mpa_len),
934                            term->flag_m ? "valid" : "invalid");
935         } else if (term->flag_d) {
936                 siw_dbg_qp(qp, "TERM reports DDP hdr type %u, len %u (%s)\n",
937                            op, be16_to_cpu(term_info.ctrl.mpa_len),
938                            term->flag_m ? "valid" : "invalid");
939         }
940 out:
941         srx->skb_new -= to_copy;
942         srx->skb_offset += to_copy;
943         srx->skb_copied += to_copy;
944         srx->fpdu_part_rcvd += to_copy;
945         srx->fpdu_part_rem -= to_copy;
946
947         return -ECONNRESET;
948 }
949
950 static int siw_get_trailer(struct siw_qp *qp, struct siw_rx_stream *srx)
951 {
952         struct sk_buff *skb = srx->skb;
953         u8 *tbuf = (u8 *)&srx->trailer.crc - srx->pad;
954         __wsum crc_in, crc_own = 0;
955
956         siw_dbg_qp(qp, "expected %d, available %d, pad %u\n",
957                    srx->fpdu_part_rem, srx->skb_new, srx->pad);
958
959         if (srx->skb_new < srx->fpdu_part_rem)
960                 return -EAGAIN;
961
962         skb_copy_bits(skb, srx->skb_offset, tbuf, srx->fpdu_part_rem);
963
964         if (srx->mpa_crc_hd && srx->pad)
965                 crypto_shash_update(srx->mpa_crc_hd, tbuf, srx->pad);
966
967         srx->skb_new -= srx->fpdu_part_rem;
968         srx->skb_offset += srx->fpdu_part_rem;
969         srx->skb_copied += srx->fpdu_part_rem;
970
971         if (!srx->mpa_crc_hd)
972                 return 0;
973
974         /*
975          * CRC32 is computed, transmitted and received directly in NBO,
976          * so there's never a reason to convert byte order.
977          */
978         crypto_shash_final(srx->mpa_crc_hd, (u8 *)&crc_own);
979         crc_in = (__force __wsum)srx->trailer.crc;
980
981         if (unlikely(crc_in != crc_own)) {
982                 pr_warn("siw: crc error. in: %08x, own %08x, op %u\n",
983                         crc_in, crc_own, qp->rx_stream.rdmap_op);
984
985                 siw_init_terminate(qp, TERM_ERROR_LAYER_LLP,
986                                    LLP_ETYPE_MPA,
987                                    LLP_ECODE_RECEIVED_CRC, 0);
988                 return -EINVAL;
989         }
990         return 0;
991 }
992
993 #define MIN_DDP_HDR sizeof(struct iwarp_ctrl_tagged)
994
995 static int siw_get_hdr(struct siw_rx_stream *srx)
996 {
997         struct sk_buff *skb = srx->skb;
998         struct siw_qp *qp = rx_qp(srx);
999         struct iwarp_ctrl *c_hdr = &srx->hdr.ctrl;
1000         struct siw_rx_fpdu *frx;
1001         u8 opcode;
1002         int bytes;
1003
1004         if (srx->fpdu_part_rcvd < MIN_DDP_HDR) {
1005                 /*
1006                  * copy a mimimum sized (tagged) DDP frame control part
1007                  */
1008                 bytes = min_t(int, srx->skb_new,
1009                               MIN_DDP_HDR - srx->fpdu_part_rcvd);
1010
1011                 skb_copy_bits(skb, srx->skb_offset,
1012                               (char *)c_hdr + srx->fpdu_part_rcvd, bytes);
1013
1014                 srx->fpdu_part_rcvd += bytes;
1015
1016                 srx->skb_new -= bytes;
1017                 srx->skb_offset += bytes;
1018                 srx->skb_copied += bytes;
1019
1020                 if (srx->fpdu_part_rcvd < MIN_DDP_HDR)
1021                         return -EAGAIN;
1022
1023                 if (unlikely(__ddp_get_version(c_hdr) != DDP_VERSION)) {
1024                         enum ddp_etype etype;
1025                         enum ddp_ecode ecode;
1026
1027                         pr_warn("siw: received ddp version unsupported %d\n",
1028                                 __ddp_get_version(c_hdr));
1029
1030                         if (c_hdr->ddp_rdmap_ctrl & DDP_FLAG_TAGGED) {
1031                                 etype = DDP_ETYPE_TAGGED_BUF;
1032                                 ecode = DDP_ECODE_T_VERSION;
1033                         } else {
1034                                 etype = DDP_ETYPE_UNTAGGED_BUF;
1035                                 ecode = DDP_ECODE_UT_VERSION;
1036                         }
1037                         siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_DDP,
1038                                            etype, ecode, 0);
1039                         return -EINVAL;
1040                 }
1041                 if (unlikely(__rdmap_get_version(c_hdr) != RDMAP_VERSION)) {
1042                         pr_warn("siw: received rdmap version unsupported %d\n",
1043                                 __rdmap_get_version(c_hdr));
1044
1045                         siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_RDMAP,
1046                                            RDMAP_ETYPE_REMOTE_OPERATION,
1047                                            RDMAP_ECODE_VERSION, 0);
1048                         return -EINVAL;
1049                 }
1050                 opcode = __rdmap_get_opcode(c_hdr);
1051
1052                 if (opcode > RDMAP_TERMINATE) {
1053                         pr_warn("siw: received unknown packet type %u\n",
1054                                 opcode);
1055
1056                         siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_RDMAP,
1057                                            RDMAP_ETYPE_REMOTE_OPERATION,
1058                                            RDMAP_ECODE_OPCODE, 0);
1059                         return -EINVAL;
1060                 }
1061                 siw_dbg_qp(rx_qp(srx), "new header, opcode %u\n", opcode);
1062         } else {
1063                 opcode = __rdmap_get_opcode(c_hdr);
1064         }
1065         set_rx_fpdu_context(qp, opcode);
1066         frx = qp->rx_fpdu;
1067
1068         /*
1069          * Figure out len of current hdr: variable length of
1070          * iwarp hdr may force us to copy hdr information in
1071          * two steps. Only tagged DDP messages are already
1072          * completely received.
1073          */
1074         if (iwarp_pktinfo[opcode].hdr_len > sizeof(struct iwarp_ctrl_tagged)) {
1075                 bytes = iwarp_pktinfo[opcode].hdr_len - MIN_DDP_HDR;
1076
1077                 if (srx->skb_new < bytes)
1078                         return -EAGAIN;
1079
1080                 skb_copy_bits(skb, srx->skb_offset,
1081                               (char *)c_hdr + srx->fpdu_part_rcvd, bytes);
1082
1083                 srx->fpdu_part_rcvd += bytes;
1084
1085                 srx->skb_new -= bytes;
1086                 srx->skb_offset += bytes;
1087                 srx->skb_copied += bytes;
1088         }
1089
1090         /*
1091          * DDP/RDMAP header receive completed. Check if the current
1092          * DDP segment starts a new RDMAP message or continues a previously
1093          * started RDMAP message.
1094          *
1095          * Alternating reception of DDP segments (or FPDUs) from incomplete
1096          * tagged and untagged RDMAP messages is supported, as long as
1097          * the current tagged or untagged message gets eventually completed
1098          * w/o intersection from another message of the same type
1099          * (tagged/untagged). E.g., a WRITE can get intersected by a SEND,
1100          * but not by a READ RESPONSE etc.
1101          */
1102         if (srx->mpa_crc_hd) {
1103                 /*
1104                  * Restart CRC computation
1105                  */
1106                 crypto_shash_init(srx->mpa_crc_hd);
1107                 crypto_shash_update(srx->mpa_crc_hd, (u8 *)c_hdr,
1108                                     srx->fpdu_part_rcvd);
1109         }
1110         if (frx->more_ddp_segs) {
1111                 frx->first_ddp_seg = 0;
1112                 if (frx->prev_rdmap_op != opcode) {
1113                         pr_warn("siw: packet intersection: %u : %u\n",
1114                                 frx->prev_rdmap_op, opcode);
1115                         /*
1116                          * The last inbound RDMA operation of same type
1117                          * (tagged or untagged) is left unfinished.
1118                          * To complete it in error, make it the current
1119                          * operation again, even with the header already
1120                          * overwritten. For error handling, only the opcode
1121                          * and current rx context are relevant.
1122                          */
1123                         set_rx_fpdu_context(qp, frx->prev_rdmap_op);
1124                         __rdmap_set_opcode(c_hdr, frx->prev_rdmap_op);
1125                         return -EPROTO;
1126                 }
1127         } else {
1128                 frx->prev_rdmap_op = opcode;
1129                 frx->first_ddp_seg = 1;
1130         }
1131         frx->more_ddp_segs = c_hdr->ddp_rdmap_ctrl & DDP_FLAG_LAST ? 0 : 1;
1132
1133         return 0;
1134 }
1135
1136 static int siw_check_tx_fence(struct siw_qp *qp)
1137 {
1138         struct siw_wqe *tx_waiting = tx_wqe(qp);
1139         struct siw_sqe *rreq;
1140         int resume_tx = 0, rv = 0;
1141         unsigned long flags;
1142
1143         spin_lock_irqsave(&qp->orq_lock, flags);
1144
1145         rreq = orq_get_current(qp);
1146
1147         /* free current orq entry */
1148         WRITE_ONCE(rreq->flags, 0);
1149
1150         if (qp->tx_ctx.orq_fence) {
1151                 if (unlikely(tx_waiting->wr_status != SIW_WR_QUEUED)) {
1152                         pr_warn("siw: [QP %u]: fence resume: bad status %d\n",
1153                                 qp_id(qp), tx_waiting->wr_status);
1154                         rv = -EPROTO;
1155                         goto out;
1156                 }
1157                 /* resume SQ processing */
1158                 if (tx_waiting->sqe.opcode == SIW_OP_READ ||
1159                     tx_waiting->sqe.opcode == SIW_OP_READ_LOCAL_INV) {
1160                         rreq = orq_get_tail(qp);
1161                         if (unlikely(!rreq)) {
1162                                 pr_warn("siw: [QP %u]: no ORQE\n", qp_id(qp));
1163                                 rv = -EPROTO;
1164                                 goto out;
1165                         }
1166                         siw_read_to_orq(rreq, &tx_waiting->sqe);
1167
1168                         qp->orq_put++;
1169                         qp->tx_ctx.orq_fence = 0;
1170                         resume_tx = 1;
1171
1172                 } else if (siw_orq_empty(qp)) {
1173                         qp->tx_ctx.orq_fence = 0;
1174                         resume_tx = 1;
1175                 } else {
1176                         pr_warn("siw: [QP %u]: fence resume: orq idx: %d:%d\n",
1177                                 qp_id(qp), qp->orq_get, qp->orq_put);
1178                         rv = -EPROTO;
1179                 }
1180         }
1181         qp->orq_get++;
1182 out:
1183         spin_unlock_irqrestore(&qp->orq_lock, flags);
1184
1185         if (resume_tx)
1186                 rv = siw_sq_start(qp);
1187
1188         return rv;
1189 }
1190
1191 /*
1192  * siw_rdmap_complete()
1193  *
1194  * Complete processing of an RDMA message after receiving all
1195  * DDP segmens or ABort processing after encountering error case.
1196  *
1197  *   o SENDs + RRESPs will need for completion,
1198  *   o RREQs need for  READ RESPONSE initialization
1199  *   o WRITEs need memory dereferencing
1200  *
1201  * TODO: Failed WRITEs need local error to be surfaced.
1202  */
1203 static int siw_rdmap_complete(struct siw_qp *qp, int error)
1204 {
1205         struct siw_rx_stream *srx = &qp->rx_stream;
1206         struct siw_wqe *wqe = rx_wqe(qp->rx_fpdu);
1207         enum siw_wc_status wc_status = wqe->wc_status;
1208         u8 opcode = __rdmap_get_opcode(&srx->hdr.ctrl);
1209         int rv = 0;
1210
1211         switch (opcode) {
1212         case RDMAP_SEND_SE:
1213         case RDMAP_SEND_SE_INVAL:
1214                 wqe->rqe.flags |= SIW_WQE_SOLICITED;
1215                 /* Fall through */
1216
1217         case RDMAP_SEND:
1218         case RDMAP_SEND_INVAL:
1219                 if (wqe->wr_status == SIW_WR_IDLE)
1220                         break;
1221
1222                 srx->ddp_msn[RDMAP_UNTAGGED_QN_SEND]++;
1223
1224                 if (error != 0 && wc_status == SIW_WC_SUCCESS)
1225                         wc_status = SIW_WC_GENERAL_ERR;
1226                 /*
1227                  * Handle STag invalidation request
1228                  */
1229                 if (wc_status == SIW_WC_SUCCESS &&
1230                     (opcode == RDMAP_SEND_INVAL ||
1231                      opcode == RDMAP_SEND_SE_INVAL)) {
1232                         rv = siw_invalidate_stag(qp->pd, srx->inval_stag);
1233                         if (rv) {
1234                                 siw_init_terminate(
1235                                         qp, TERM_ERROR_LAYER_RDMAP,
1236                                         rv == -EACCES ?
1237                                                 RDMAP_ETYPE_REMOTE_PROTECTION :
1238                                                 RDMAP_ETYPE_REMOTE_OPERATION,
1239                                         RDMAP_ECODE_CANNOT_INVALIDATE, 0);
1240
1241                                 wc_status = SIW_WC_REM_INV_REQ_ERR;
1242                         }
1243                         rv = siw_rqe_complete(qp, &wqe->rqe, wqe->processed,
1244                                               rv ? 0 : srx->inval_stag,
1245                                               wc_status);
1246                 } else {
1247                         rv = siw_rqe_complete(qp, &wqe->rqe, wqe->processed,
1248                                               0, wc_status);
1249                 }
1250                 siw_wqe_put_mem(wqe, SIW_OP_RECEIVE);
1251                 break;
1252
1253         case RDMAP_RDMA_READ_RESP:
1254                 if (wqe->wr_status == SIW_WR_IDLE)
1255                         break;
1256
1257                 if (error != 0) {
1258                         if ((srx->state == SIW_GET_HDR &&
1259                              qp->rx_fpdu->first_ddp_seg) || error == -ENODATA)
1260                                 /* possible RREQ in ORQ left untouched */
1261                                 break;
1262
1263                         if (wc_status == SIW_WC_SUCCESS)
1264                                 wc_status = SIW_WC_GENERAL_ERR;
1265                 } else if (qp->kernel_verbs &&
1266                            rx_type(wqe) == SIW_OP_READ_LOCAL_INV) {
1267                         /*
1268                          * Handle any STag invalidation request
1269                          */
1270                         rv = siw_invalidate_stag(qp->pd, wqe->sqe.sge[0].lkey);
1271                         if (rv) {
1272                                 siw_init_terminate(qp, TERM_ERROR_LAYER_RDMAP,
1273                                                    RDMAP_ETYPE_CATASTROPHIC,
1274                                                    RDMAP_ECODE_UNSPECIFIED, 0);
1275
1276                                 if (wc_status == SIW_WC_SUCCESS) {
1277                                         wc_status = SIW_WC_GENERAL_ERR;
1278                                         error = rv;
1279                                 }
1280                         }
1281                 }
1282                 /*
1283                  * All errors turn the wqe into signalled.
1284                  */
1285                 if ((wqe->sqe.flags & SIW_WQE_SIGNALLED) || error != 0)
1286                         rv = siw_sqe_complete(qp, &wqe->sqe, wqe->processed,
1287                                               wc_status);
1288                 siw_wqe_put_mem(wqe, SIW_OP_READ);
1289
1290                 if (!error)
1291                         rv = siw_check_tx_fence(qp);
1292                 else
1293                         /* Disable current ORQ eleement */
1294                         WRITE_ONCE(orq_get_current(qp)->flags, 0);
1295                 break;
1296
1297         case RDMAP_RDMA_READ_REQ:
1298                 if (!error) {
1299                         rv = siw_init_rresp(qp, srx);
1300                         srx->ddp_msn[RDMAP_UNTAGGED_QN_RDMA_READ]++;
1301                 }
1302                 break;
1303
1304         case RDMAP_RDMA_WRITE:
1305                 if (wqe->wr_status == SIW_WR_IDLE)
1306                         break;
1307
1308                 /*
1309                  * Free References from memory object if
1310                  * attached to receive context (inbound WRITE).
1311                  * While a zero-length WRITE is allowed,
1312                  * no memory reference got created.
1313                  */
1314                 if (rx_mem(&qp->rx_tagged)) {
1315                         siw_mem_put(rx_mem(&qp->rx_tagged));
1316                         rx_mem(&qp->rx_tagged) = NULL;
1317                 }
1318                 break;
1319
1320         default:
1321                 break;
1322         }
1323         wqe->wr_status = SIW_WR_IDLE;
1324
1325         return rv;
1326 }
1327
1328 /*
1329  * siw_tcp_rx_data()
1330  *
1331  * Main routine to consume inbound TCP payload
1332  *
1333  * @rd_desc:    read descriptor
1334  * @skb:        socket buffer
1335  * @off:        offset in skb
1336  * @len:        skb->len - offset : payload in skb
1337  */
1338 int siw_tcp_rx_data(read_descriptor_t *rd_desc, struct sk_buff *skb,
1339                     unsigned int off, size_t len)
1340 {
1341         struct siw_qp *qp = rd_desc->arg.data;
1342         struct siw_rx_stream *srx = &qp->rx_stream;
1343         int rv;
1344
1345         srx->skb = skb;
1346         srx->skb_new = skb->len - off;
1347         srx->skb_offset = off;
1348         srx->skb_copied = 0;
1349
1350         siw_dbg_qp(qp, "new data, len %d\n", srx->skb_new);
1351
1352         while (srx->skb_new) {
1353                 int run_completion = 1;
1354
1355                 if (unlikely(srx->rx_suspend)) {
1356                         /* Do not process any more data */
1357                         srx->skb_copied += srx->skb_new;
1358                         break;
1359                 }
1360                 switch (srx->state) {
1361                 case SIW_GET_HDR:
1362                         rv = siw_get_hdr(srx);
1363                         if (!rv) {
1364                                 srx->fpdu_part_rem =
1365                                         be16_to_cpu(srx->hdr.ctrl.mpa_len) -
1366                                         srx->fpdu_part_rcvd + MPA_HDR_SIZE;
1367
1368                                 if (srx->fpdu_part_rem)
1369                                         srx->pad = -srx->fpdu_part_rem & 0x3;
1370                                 else
1371                                         srx->pad = 0;
1372
1373                                 srx->state = SIW_GET_DATA_START;
1374                                 srx->fpdu_part_rcvd = 0;
1375                         }
1376                         break;
1377
1378                 case SIW_GET_DATA_MORE:
1379                         /*
1380                          * Another data fragment of the same DDP segment.
1381                          * Setting first_ddp_seg = 0 avoids repeating
1382                          * initializations that shall occur only once per
1383                          * DDP segment.
1384                          */
1385                         qp->rx_fpdu->first_ddp_seg = 0;
1386                         /* Fall through */
1387
1388                 case SIW_GET_DATA_START:
1389                         /*
1390                          * Headers will be checked by the opcode-specific
1391                          * data receive function below.
1392                          */
1393                         rv = iwarp_pktinfo[qp->rx_stream.rdmap_op].rx_data(qp);
1394                         if (!rv) {
1395                                 int mpa_len =
1396                                         be16_to_cpu(srx->hdr.ctrl.mpa_len)
1397                                         + MPA_HDR_SIZE;
1398
1399                                 srx->fpdu_part_rem = (-mpa_len & 0x3)
1400                                                       + MPA_CRC_SIZE;
1401                                 srx->fpdu_part_rcvd = 0;
1402                                 srx->state = SIW_GET_TRAILER;
1403                         } else {
1404                                 if (unlikely(rv == -ECONNRESET))
1405                                         run_completion = 0;
1406                                 else
1407                                         srx->state = SIW_GET_DATA_MORE;
1408                         }
1409                         break;
1410
1411                 case SIW_GET_TRAILER:
1412                         /*
1413                          * read CRC + any padding
1414                          */
1415                         rv = siw_get_trailer(qp, srx);
1416                         if (likely(!rv)) {
1417                                 /*
1418                                  * FPDU completed.
1419                                  * complete RDMAP message if last fragment
1420                                  */
1421                                 srx->state = SIW_GET_HDR;
1422                                 srx->fpdu_part_rcvd = 0;
1423
1424                                 if (!(srx->hdr.ctrl.ddp_rdmap_ctrl &
1425                                       DDP_FLAG_LAST))
1426                                         /* more frags */
1427                                         break;
1428
1429                                 rv = siw_rdmap_complete(qp, 0);
1430                                 run_completion = 0;
1431                         }
1432                         break;
1433
1434                 default:
1435                         pr_warn("QP[%u]: RX out of state\n", qp_id(qp));
1436                         rv = -EPROTO;
1437                         run_completion = 0;
1438                 }
1439                 if (unlikely(rv != 0 && rv != -EAGAIN)) {
1440                         if ((srx->state > SIW_GET_HDR ||
1441                              qp->rx_fpdu->more_ddp_segs) && run_completion)
1442                                 siw_rdmap_complete(qp, rv);
1443
1444                         siw_dbg_qp(qp, "rx error %d, rx state %d\n", rv,
1445                                    srx->state);
1446
1447                         siw_qp_cm_drop(qp, 1);
1448
1449                         break;
1450                 }
1451                 if (rv) {
1452                         siw_dbg_qp(qp, "fpdu fragment, state %d, missing %d\n",
1453                                    srx->state, srx->fpdu_part_rem);
1454                         break;
1455                 }
1456         }
1457         return srx->skb_copied;
1458 }