Merge branch 'i2c/for-current-fixed' of git://git.kernel.org/pub/scm/linux/kernel...
[linux-2.6-block.git] / drivers / infiniband / sw / siw / siw_qp_rx.c
CommitLineData
8b6a361b
BM
1// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
2
3/* Authors: Bernard Metzler <bmt@zurich.ibm.com> */
4/* Copyright (c) 2008-2019, IBM Corporation */
5
6#include <linux/errno.h>
7#include <linux/types.h>
8#include <linux/net.h>
9#include <linux/scatterlist.h>
10#include <linux/highmem.h>
11
12#include <rdma/iw_cm.h>
13#include <rdma/ib_verbs.h>
14
15#include "siw.h"
16#include "siw_verbs.h"
17#include "siw_mem.h"
18
19/*
20 * siw_rx_umem()
21 *
22 * Receive data of @len into target referenced by @dest_addr.
23 *
24 * @srx: Receive Context
25 * @umem: siw representation of target memory
26 * @dest_addr: user virtual address
27 * @len: number of bytes to place
28 */
29static int siw_rx_umem(struct siw_rx_stream *srx, struct siw_umem *umem,
30 u64 dest_addr, int len)
31{
32 int copied = 0;
33
34 while (len) {
35 struct page *p;
36 int pg_off, bytes, rv;
37 void *dest;
38
39 p = siw_get_upage(umem, dest_addr);
40 if (unlikely(!p)) {
41 pr_warn("siw: %s: [QP %u]: bogus addr: %p, %p\n",
42 __func__, qp_id(rx_qp(srx)),
43 (void *)dest_addr, (void *)umem->fp_addr);
44 /* siw internal error */
45 srx->skb_copied += copied;
46 srx->skb_new -= copied;
47
48 return -EFAULT;
49 }
50 pg_off = dest_addr & ~PAGE_MASK;
51 bytes = min(len, (int)PAGE_SIZE - pg_off);
52
53 siw_dbg_qp(rx_qp(srx), "page %p, bytes=%u\n", p, bytes);
54
55 dest = kmap_atomic(p);
56 rv = skb_copy_bits(srx->skb, srx->skb_offset, dest + pg_off,
57 bytes);
58
59 if (unlikely(rv)) {
60 kunmap_atomic(dest);
61 srx->skb_copied += copied;
62 srx->skb_new -= copied;
63
64 pr_warn("siw: [QP %u]: %s, len %d, page %p, rv %d\n",
65 qp_id(rx_qp(srx)), __func__, len, p, rv);
66
67 return -EFAULT;
68 }
69 if (srx->mpa_crc_hd) {
70 if (rx_qp(srx)->kernel_verbs) {
71 crypto_shash_update(srx->mpa_crc_hd,
72 (u8 *)(dest + pg_off), bytes);
73 kunmap_atomic(dest);
74 } else {
75 kunmap_atomic(dest);
76 /*
77 * Do CRC on original, not target buffer.
78 * Some user land applications may
79 * concurrently write the target buffer,
80 * which would yield a broken CRC.
81 * Walking the skb twice is very ineffcient.
82 * Folding the CRC into skb_copy_bits()
83 * would be much better, but is currently
84 * not supported.
85 */
86 siw_crc_skb(srx, bytes);
87 }
88 } else {
89 kunmap_atomic(dest);
90 }
91 srx->skb_offset += bytes;
92 copied += bytes;
93 len -= bytes;
94 dest_addr += bytes;
95 pg_off = 0;
96 }
97 srx->skb_copied += copied;
98 srx->skb_new -= copied;
99
100 return copied;
101}
102
103static int siw_rx_kva(struct siw_rx_stream *srx, void *kva, int len)
104{
105 int rv;
106
107 siw_dbg_qp(rx_qp(srx), "kva: 0x%p, len: %u\n", kva, len);
108
109 rv = skb_copy_bits(srx->skb, srx->skb_offset, kva, len);
110 if (unlikely(rv)) {
111 pr_warn("siw: [QP %u]: %s, len %d, kva 0x%p, rv %d\n",
112 qp_id(rx_qp(srx)), __func__, len, kva, rv);
113
114 return rv;
115 }
116 if (srx->mpa_crc_hd)
117 crypto_shash_update(srx->mpa_crc_hd, (u8 *)kva, len);
118
119 srx->skb_offset += len;
120 srx->skb_copied += len;
121 srx->skb_new -= len;
122
123 return len;
124}
125
126static int siw_rx_pbl(struct siw_rx_stream *srx, int *pbl_idx,
127 struct siw_mem *mem, u64 addr, int len)
128{
129 struct siw_pbl *pbl = mem->pbl;
130 u64 offset = addr - mem->va;
131 int copied = 0;
132
133 while (len) {
134 int bytes;
135 u64 buf_addr =
136 siw_pbl_get_buffer(pbl, offset, &bytes, pbl_idx);
137 if (!buf_addr)
138 break;
139
140 bytes = min(bytes, len);
141 if (siw_rx_kva(srx, (void *)buf_addr, bytes) == bytes) {
142 copied += bytes;
143 offset += bytes;
144 len -= bytes;
145 } else {
146 break;
147 }
148 }
149 return copied;
150}
151
152/*
153 * siw_rresp_check_ntoh()
154 *
155 * Check incoming RRESP fragment header against expected
156 * header values and update expected values for potential next
157 * fragment.
158 *
159 * NOTE: This function must be called only if a RRESP DDP segment
160 * starts but not for fragmented consecutive pieces of an
161 * already started DDP segment.
162 */
163static int siw_rresp_check_ntoh(struct siw_rx_stream *srx,
164 struct siw_rx_fpdu *frx)
165{
166 struct iwarp_rdma_rresp *rresp = &srx->hdr.rresp;
167 struct siw_wqe *wqe = &frx->wqe_active;
168 enum ddp_ecode ecode;
169
170 u32 sink_stag = be32_to_cpu(rresp->sink_stag);
171 u64 sink_to = be64_to_cpu(rresp->sink_to);
172
173 if (frx->first_ddp_seg) {
174 srx->ddp_stag = wqe->sqe.sge[0].lkey;
175 srx->ddp_to = wqe->sqe.sge[0].laddr;
176 frx->pbl_idx = 0;
177 }
178 /* Below checks extend beyond the semantics of DDP, and
179 * into RDMAP:
180 * We check if the read response matches exactly the
181 * read request which was send to the remote peer to
182 * trigger this read response. RFC5040/5041 do not
183 * always have a proper error code for the detected
184 * error cases. We choose 'base or bounds error' for
185 * cases where the inbound STag is valid, but offset
186 * or length do not match our response receive state.
187 */
188 if (unlikely(srx->ddp_stag != sink_stag)) {
189 pr_warn("siw: [QP %u]: rresp stag: %08x != %08x\n",
190 qp_id(rx_qp(srx)), sink_stag, srx->ddp_stag);
191 ecode = DDP_ECODE_T_INVALID_STAG;
192 goto error;
193 }
194 if (unlikely(srx->ddp_to != sink_to)) {
195 pr_warn("siw: [QP %u]: rresp off: %016llx != %016llx\n",
196 qp_id(rx_qp(srx)), (unsigned long long)sink_to,
197 (unsigned long long)srx->ddp_to);
198 ecode = DDP_ECODE_T_BASE_BOUNDS;
199 goto error;
200 }
201 if (unlikely(!frx->more_ddp_segs &&
202 (wqe->processed + srx->fpdu_part_rem != wqe->bytes))) {
203 pr_warn("siw: [QP %u]: rresp len: %d != %d\n",
204 qp_id(rx_qp(srx)),
205 wqe->processed + srx->fpdu_part_rem, wqe->bytes);
206 ecode = DDP_ECODE_T_BASE_BOUNDS;
207 goto error;
208 }
209 return 0;
210error:
211 siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_DDP,
212 DDP_ETYPE_TAGGED_BUF, ecode, 0);
213 return -EINVAL;
214}
215
216/*
217 * siw_write_check_ntoh()
218 *
219 * Check incoming WRITE fragment header against expected
220 * header values and update expected values for potential next
221 * fragment
222 *
223 * NOTE: This function must be called only if a WRITE DDP segment
224 * starts but not for fragmented consecutive pieces of an
225 * already started DDP segment.
226 */
227static int siw_write_check_ntoh(struct siw_rx_stream *srx,
228 struct siw_rx_fpdu *frx)
229{
230 struct iwarp_rdma_write *write = &srx->hdr.rwrite;
231 enum ddp_ecode ecode;
232
233 u32 sink_stag = be32_to_cpu(write->sink_stag);
234 u64 sink_to = be64_to_cpu(write->sink_to);
235
236 if (frx->first_ddp_seg) {
237 srx->ddp_stag = sink_stag;
238 srx->ddp_to = sink_to;
239 frx->pbl_idx = 0;
240 } else {
241 if (unlikely(srx->ddp_stag != sink_stag)) {
242 pr_warn("siw: [QP %u]: write stag: %08x != %08x\n",
243 qp_id(rx_qp(srx)), sink_stag,
244 srx->ddp_stag);
245 ecode = DDP_ECODE_T_INVALID_STAG;
246 goto error;
247 }
248 if (unlikely(srx->ddp_to != sink_to)) {
249 pr_warn("siw: [QP %u]: write off: %016llx != %016llx\n",
250 qp_id(rx_qp(srx)),
251 (unsigned long long)sink_to,
252 (unsigned long long)srx->ddp_to);
253 ecode = DDP_ECODE_T_BASE_BOUNDS;
254 goto error;
255 }
256 }
257 return 0;
258error:
259 siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_DDP,
260 DDP_ETYPE_TAGGED_BUF, ecode, 0);
261 return -EINVAL;
262}
263
264/*
265 * siw_send_check_ntoh()
266 *
267 * Check incoming SEND fragment header against expected
268 * header values and update expected MSN if no next
269 * fragment expected
270 *
271 * NOTE: This function must be called only if a SEND DDP segment
272 * starts but not for fragmented consecutive pieces of an
273 * already started DDP segment.
274 */
275static int siw_send_check_ntoh(struct siw_rx_stream *srx,
276 struct siw_rx_fpdu *frx)
277{
278 struct iwarp_send_inv *send = &srx->hdr.send_inv;
279 struct siw_wqe *wqe = &frx->wqe_active;
280 enum ddp_ecode ecode;
281
282 u32 ddp_msn = be32_to_cpu(send->ddp_msn);
283 u32 ddp_mo = be32_to_cpu(send->ddp_mo);
284 u32 ddp_qn = be32_to_cpu(send->ddp_qn);
285
286 if (unlikely(ddp_qn != RDMAP_UNTAGGED_QN_SEND)) {
287 pr_warn("siw: [QP %u]: invalid ddp qn %d for send\n",
288 qp_id(rx_qp(srx)), ddp_qn);
289 ecode = DDP_ECODE_UT_INVALID_QN;
290 goto error;
291 }
292 if (unlikely(ddp_msn != srx->ddp_msn[RDMAP_UNTAGGED_QN_SEND])) {
293 pr_warn("siw: [QP %u]: send msn: %u != %u\n",
294 qp_id(rx_qp(srx)), ddp_msn,
295 srx->ddp_msn[RDMAP_UNTAGGED_QN_SEND]);
296 ecode = DDP_ECODE_UT_INVALID_MSN_RANGE;
297 goto error;
298 }
299 if (unlikely(ddp_mo != wqe->processed)) {
300 pr_warn("siw: [QP %u], send mo: %u != %u\n",
301 qp_id(rx_qp(srx)), ddp_mo, wqe->processed);
302 ecode = DDP_ECODE_UT_INVALID_MO;
303 goto error;
304 }
305 if (frx->first_ddp_seg) {
306 /* initialize user memory write position */
307 frx->sge_idx = 0;
308 frx->sge_off = 0;
309 frx->pbl_idx = 0;
310
311 /* only valid for SEND_INV and SEND_SE_INV operations */
312 srx->inval_stag = be32_to_cpu(send->inval_stag);
313 }
314 if (unlikely(wqe->bytes < wqe->processed + srx->fpdu_part_rem)) {
315 siw_dbg_qp(rx_qp(srx), "receive space short: %d - %d < %d\n",
316 wqe->bytes, wqe->processed, srx->fpdu_part_rem);
317 wqe->wc_status = SIW_WC_LOC_LEN_ERR;
318 ecode = DDP_ECODE_UT_INVALID_MSN_NOBUF;
319 goto error;
320 }
321 return 0;
322error:
323 siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_DDP,
324 DDP_ETYPE_UNTAGGED_BUF, ecode, 0);
325 return -EINVAL;
326}
327
328static struct siw_wqe *siw_rqe_get(struct siw_qp *qp)
329{
330 struct siw_rqe *rqe;
331 struct siw_srq *srq;
332 struct siw_wqe *wqe = NULL;
333 bool srq_event = false;
334 unsigned long flags;
335
336 srq = qp->srq;
337 if (srq) {
338 spin_lock_irqsave(&srq->lock, flags);
339 if (unlikely(!srq->num_rqe))
340 goto out;
341
342 rqe = &srq->recvq[srq->rq_get % srq->num_rqe];
343 } else {
344 if (unlikely(!qp->recvq))
345 goto out;
346
347 rqe = &qp->recvq[qp->rq_get % qp->attrs.rq_size];
348 }
349 if (likely(rqe->flags == SIW_WQE_VALID)) {
350 int num_sge = rqe->num_sge;
351
352 if (likely(num_sge <= SIW_MAX_SGE)) {
353 int i = 0;
354
355 wqe = rx_wqe(&qp->rx_untagged);
356 rx_type(wqe) = SIW_OP_RECEIVE;
357 wqe->wr_status = SIW_WR_INPROGRESS;
358 wqe->bytes = 0;
359 wqe->processed = 0;
360
361 wqe->rqe.id = rqe->id;
362 wqe->rqe.num_sge = num_sge;
363
364 while (i < num_sge) {
365 wqe->rqe.sge[i].laddr = rqe->sge[i].laddr;
366 wqe->rqe.sge[i].lkey = rqe->sge[i].lkey;
367 wqe->rqe.sge[i].length = rqe->sge[i].length;
368 wqe->bytes += wqe->rqe.sge[i].length;
369 wqe->mem[i] = NULL;
370 i++;
371 }
372 /* can be re-used by appl */
373 smp_store_mb(rqe->flags, 0);
374 } else {
375 siw_dbg_qp(qp, "too many sge's: %d\n", rqe->num_sge);
376 if (srq)
377 spin_unlock_irqrestore(&srq->lock, flags);
378 return NULL;
379 }
380 if (!srq) {
381 qp->rq_get++;
382 } else {
383 if (srq->armed) {
384 /* Test SRQ limit */
385 u32 off = (srq->rq_get + srq->limit) %
386 srq->num_rqe;
387 struct siw_rqe *rqe2 = &srq->recvq[off];
388
389 if (!(rqe2->flags & SIW_WQE_VALID)) {
390 srq->armed = 0;
391 srq_event = true;
392 }
393 }
394 srq->rq_get++;
395 }
396 }
397out:
398 if (srq) {
399 spin_unlock_irqrestore(&srq->lock, flags);
400 if (srq_event)
401 siw_srq_event(srq, IB_EVENT_SRQ_LIMIT_REACHED);
402 }
403 return wqe;
404}
405
406/*
407 * siw_proc_send:
408 *
409 * Process one incoming SEND and place data into memory referenced by
410 * receive wqe.
411 *
412 * Function supports partially received sends (suspending/resuming
413 * current receive wqe processing)
414 *
415 * return value:
416 * 0: reached the end of a DDP segment
417 * -EAGAIN: to be called again to finish the DDP segment
418 */
419int siw_proc_send(struct siw_qp *qp)
420{
421 struct siw_rx_stream *srx = &qp->rx_stream;
422 struct siw_rx_fpdu *frx = &qp->rx_untagged;
423 struct siw_wqe *wqe;
424 u32 data_bytes; /* all data bytes available */
425 u32 rcvd_bytes; /* sum of data bytes rcvd */
426 int rv = 0;
427
428 if (frx->first_ddp_seg) {
429 wqe = siw_rqe_get(qp);
430 if (unlikely(!wqe)) {
431 siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
432 DDP_ETYPE_UNTAGGED_BUF,
433 DDP_ECODE_UT_INVALID_MSN_NOBUF, 0);
434 return -ENOENT;
435 }
436 } else {
437 wqe = rx_wqe(frx);
438 }
439 if (srx->state == SIW_GET_DATA_START) {
440 rv = siw_send_check_ntoh(srx, frx);
441 if (unlikely(rv)) {
442 siw_qp_event(qp, IB_EVENT_QP_FATAL);
443 return rv;
444 }
445 if (!srx->fpdu_part_rem) /* zero length SEND */
446 return 0;
447 }
448 data_bytes = min(srx->fpdu_part_rem, srx->skb_new);
449 rcvd_bytes = 0;
450
451 /* A zero length SEND will skip below loop */
452 while (data_bytes) {
453 struct ib_pd *pd;
454 struct siw_mem **mem, *mem_p;
455 struct siw_sge *sge;
456 u32 sge_bytes; /* data bytes avail for SGE */
457
458 sge = &wqe->rqe.sge[frx->sge_idx];
459
460 if (!sge->length) {
461 /* just skip empty sge's */
462 frx->sge_idx++;
463 frx->sge_off = 0;
464 frx->pbl_idx = 0;
465 continue;
466 }
467 sge_bytes = min(data_bytes, sge->length - frx->sge_off);
468 mem = &wqe->mem[frx->sge_idx];
469
470 /*
471 * check with QP's PD if no SRQ present, SRQ's PD otherwise
472 */
473 pd = qp->srq == NULL ? qp->pd : qp->srq->base_srq.pd;
474
475 rv = siw_check_sge(pd, sge, mem, IB_ACCESS_LOCAL_WRITE,
476 frx->sge_off, sge_bytes);
477 if (unlikely(rv)) {
478 siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
479 DDP_ETYPE_CATASTROPHIC,
480 DDP_ECODE_CATASTROPHIC, 0);
481
482 siw_qp_event(qp, IB_EVENT_QP_ACCESS_ERR);
483 break;
484 }
485 mem_p = *mem;
486 if (mem_p->mem_obj == NULL)
487 rv = siw_rx_kva(srx,
488 (void *)(sge->laddr + frx->sge_off),
489 sge_bytes);
490 else if (!mem_p->is_pbl)
491 rv = siw_rx_umem(srx, mem_p->umem,
492 sge->laddr + frx->sge_off, sge_bytes);
493 else
494 rv = siw_rx_pbl(srx, &frx->pbl_idx, mem_p,
495 sge->laddr + frx->sge_off, sge_bytes);
496
497 if (unlikely(rv != sge_bytes)) {
498 wqe->processed += rcvd_bytes;
499
500 siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
501 DDP_ETYPE_CATASTROPHIC,
502 DDP_ECODE_CATASTROPHIC, 0);
503 return -EINVAL;
504 }
505 frx->sge_off += rv;
506
507 if (frx->sge_off == sge->length) {
508 frx->sge_idx++;
509 frx->sge_off = 0;
510 frx->pbl_idx = 0;
511 }
512 data_bytes -= rv;
513 rcvd_bytes += rv;
514
515 srx->fpdu_part_rem -= rv;
516 srx->fpdu_part_rcvd += rv;
517 }
518 wqe->processed += rcvd_bytes;
519
520 if (!srx->fpdu_part_rem)
521 return 0;
522
523 return (rv < 0) ? rv : -EAGAIN;
524}
525
526/*
527 * siw_proc_write:
528 *
529 * Place incoming WRITE after referencing and checking target buffer
530
531 * Function supports partially received WRITEs (suspending/resuming
532 * current receive processing)
533 *
534 * return value:
535 * 0: reached the end of a DDP segment
536 * -EAGAIN: to be called again to finish the DDP segment
537 */
538int siw_proc_write(struct siw_qp *qp)
539{
540 struct siw_rx_stream *srx = &qp->rx_stream;
541 struct siw_rx_fpdu *frx = &qp->rx_tagged;
542 struct siw_mem *mem;
543 int bytes, rv;
544
545 if (srx->state == SIW_GET_DATA_START) {
546 if (!srx->fpdu_part_rem) /* zero length WRITE */
547 return 0;
548
549 rv = siw_write_check_ntoh(srx, frx);
550 if (unlikely(rv)) {
551 siw_qp_event(qp, IB_EVENT_QP_FATAL);
552 return rv;
553 }
554 }
555 bytes = min(srx->fpdu_part_rem, srx->skb_new);
556
557 if (frx->first_ddp_seg) {
558 struct siw_wqe *wqe = rx_wqe(frx);
559
560 rx_mem(frx) = siw_mem_id2obj(qp->sdev, srx->ddp_stag >> 8);
561 if (unlikely(!rx_mem(frx))) {
562 siw_dbg_qp(qp,
563 "sink stag not found/invalid, stag 0x%08x\n",
564 srx->ddp_stag);
565
566 siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
567 DDP_ETYPE_TAGGED_BUF,
568 DDP_ECODE_T_INVALID_STAG, 0);
569 return -EINVAL;
570 }
571 wqe->rqe.num_sge = 1;
572 rx_type(wqe) = SIW_OP_WRITE;
573 wqe->wr_status = SIW_WR_INPROGRESS;
574 }
575 mem = rx_mem(frx);
576
577 /*
578 * Check if application re-registered memory with different
579 * key field of STag.
580 */
581 if (unlikely(mem->stag != srx->ddp_stag)) {
582 siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
583 DDP_ETYPE_TAGGED_BUF,
584 DDP_ECODE_T_INVALID_STAG, 0);
585 return -EINVAL;
586 }
587 rv = siw_check_mem(qp->pd, mem, srx->ddp_to + srx->fpdu_part_rcvd,
588 IB_ACCESS_REMOTE_WRITE, bytes);
589 if (unlikely(rv)) {
590 siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
591 DDP_ETYPE_TAGGED_BUF, siw_tagged_error(-rv),
592 0);
593
594 siw_qp_event(qp, IB_EVENT_QP_ACCESS_ERR);
595
596 return -EINVAL;
597 }
598
599 if (mem->mem_obj == NULL)
600 rv = siw_rx_kva(srx,
601 (void *)(srx->ddp_to + srx->fpdu_part_rcvd),
602 bytes);
603 else if (!mem->is_pbl)
604 rv = siw_rx_umem(srx, mem->umem,
605 srx->ddp_to + srx->fpdu_part_rcvd, bytes);
606 else
607 rv = siw_rx_pbl(srx, &frx->pbl_idx, mem,
608 srx->ddp_to + srx->fpdu_part_rcvd, bytes);
609
610 if (unlikely(rv != bytes)) {
611 siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
612 DDP_ETYPE_CATASTROPHIC,
613 DDP_ECODE_CATASTROPHIC, 0);
614 return -EINVAL;
615 }
616 srx->fpdu_part_rem -= rv;
617 srx->fpdu_part_rcvd += rv;
618
619 if (!srx->fpdu_part_rem) {
620 srx->ddp_to += srx->fpdu_part_rcvd;
621 return 0;
622 }
623 return -EAGAIN;
624}
625
626/*
627 * Inbound RREQ's cannot carry user data.
628 */
629int siw_proc_rreq(struct siw_qp *qp)
630{
631 struct siw_rx_stream *srx = &qp->rx_stream;
632
633 if (!srx->fpdu_part_rem)
634 return 0;
635
636 pr_warn("siw: [QP %u]: rreq with mpa len %d\n", qp_id(qp),
637 be16_to_cpu(srx->hdr.ctrl.mpa_len));
638
639 return -EPROTO;
640}
641
642/*
643 * siw_init_rresp:
644 *
645 * Process inbound RDMA READ REQ. Produce a pseudo READ RESPONSE WQE.
646 * Put it at the tail of the IRQ, if there is another WQE currently in
647 * transmit processing. If not, make it the current WQE to be processed
648 * and schedule transmit processing.
649 *
650 * Can be called from softirq context and from process
651 * context (RREAD socket loopback case!)
652 *
653 * return value:
654 * 0: success,
655 * failure code otherwise
656 */
657
658static int siw_init_rresp(struct siw_qp *qp, struct siw_rx_stream *srx)
659{
660 struct siw_wqe *tx_work = tx_wqe(qp);
661 struct siw_sqe *resp;
662
663 uint64_t raddr = be64_to_cpu(srx->hdr.rreq.sink_to),
664 laddr = be64_to_cpu(srx->hdr.rreq.source_to);
665 uint32_t length = be32_to_cpu(srx->hdr.rreq.read_size),
666 lkey = be32_to_cpu(srx->hdr.rreq.source_stag),
667 rkey = be32_to_cpu(srx->hdr.rreq.sink_stag),
668 msn = be32_to_cpu(srx->hdr.rreq.ddp_msn);
669
670 int run_sq = 1, rv = 0;
671 unsigned long flags;
672
673 if (unlikely(msn != srx->ddp_msn[RDMAP_UNTAGGED_QN_RDMA_READ])) {
674 siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
675 DDP_ETYPE_UNTAGGED_BUF,
676 DDP_ECODE_UT_INVALID_MSN_RANGE, 0);
677 return -EPROTO;
678 }
679 spin_lock_irqsave(&qp->sq_lock, flags);
680
681 if (tx_work->wr_status == SIW_WR_IDLE) {
682 /*
683 * immediately schedule READ response w/o
684 * consuming IRQ entry: IRQ must be empty.
685 */
686 tx_work->processed = 0;
687 tx_work->mem[0] = NULL;
688 tx_work->wr_status = SIW_WR_QUEUED;
689 resp = &tx_work->sqe;
690 } else {
691 resp = irq_alloc_free(qp);
692 run_sq = 0;
693 }
694 if (likely(resp)) {
695 resp->opcode = SIW_OP_READ_RESPONSE;
696
697 resp->sge[0].length = length;
698 resp->sge[0].laddr = laddr;
699 resp->sge[0].lkey = lkey;
700
701 /* Keep aside message sequence number for potential
702 * error reporting during Read Response generation.
703 */
704 resp->sge[1].length = msn;
705
706 resp->raddr = raddr;
707 resp->rkey = rkey;
708 resp->num_sge = length ? 1 : 0;
709
710 /* RRESP now valid as current TX wqe or placed into IRQ */
711 smp_store_mb(resp->flags, SIW_WQE_VALID);
712 } else {
713 pr_warn("siw: [QP %u]: irq %d exceeded %d\n", qp_id(qp),
714 qp->irq_put % qp->attrs.irq_size, qp->attrs.irq_size);
715
716 siw_init_terminate(qp, TERM_ERROR_LAYER_RDMAP,
717 RDMAP_ETYPE_REMOTE_OPERATION,
718 RDMAP_ECODE_CATASTROPHIC_STREAM, 0);
719 rv = -EPROTO;
720 }
721
722 spin_unlock_irqrestore(&qp->sq_lock, flags);
723
724 if (run_sq)
725 rv = siw_sq_start(qp);
726
727 return rv;
728}
729
730/*
731 * Only called at start of Read.Resonse processing.
732 * Transfer pending Read from tip of ORQ into currrent rx wqe,
733 * but keep ORQ entry valid until Read.Response processing done.
734 * No Queue locking needed.
735 */
736static int siw_orqe_start_rx(struct siw_qp *qp)
737{
738 struct siw_sqe *orqe;
739 struct siw_wqe *wqe = NULL;
740
741 /* make sure ORQ indices are current */
742 smp_mb();
743
744 orqe = orq_get_current(qp);
745 if (READ_ONCE(orqe->flags) & SIW_WQE_VALID) {
746 /* RRESP is a TAGGED RDMAP operation */
747 wqe = rx_wqe(&qp->rx_tagged);
748 wqe->sqe.id = orqe->id;
749 wqe->sqe.opcode = orqe->opcode;
750 wqe->sqe.sge[0].laddr = orqe->sge[0].laddr;
751 wqe->sqe.sge[0].lkey = orqe->sge[0].lkey;
752 wqe->sqe.sge[0].length = orqe->sge[0].length;
753 wqe->sqe.flags = orqe->flags;
754 wqe->sqe.num_sge = 1;
755 wqe->bytes = orqe->sge[0].length;
756 wqe->processed = 0;
757 wqe->mem[0] = NULL;
758 /* make sure WQE is completely written before valid */
759 smp_wmb();
760 wqe->wr_status = SIW_WR_INPROGRESS;
761
762 return 0;
763 }
764 return -EPROTO;
765}
766
767/*
768 * siw_proc_rresp:
769 *
770 * Place incoming RRESP data into memory referenced by RREQ WQE
771 * which is at the tip of the ORQ
772 *
773 * Function supports partially received RRESP's (suspending/resuming
774 * current receive processing)
775 */
776int siw_proc_rresp(struct siw_qp *qp)
777{
778 struct siw_rx_stream *srx = &qp->rx_stream;
779 struct siw_rx_fpdu *frx = &qp->rx_tagged;
780 struct siw_wqe *wqe = rx_wqe(frx);
781 struct siw_mem **mem, *mem_p;
782 struct siw_sge *sge;
783 int bytes, rv;
784
785 if (frx->first_ddp_seg) {
786 if (unlikely(wqe->wr_status != SIW_WR_IDLE)) {
787 pr_warn("siw: [QP %u]: proc RRESP: status %d, op %d\n",
788 qp_id(qp), wqe->wr_status, wqe->sqe.opcode);
789 rv = -EPROTO;
790 goto error_term;
791 }
792 /*
793 * fetch pending RREQ from orq
794 */
795 rv = siw_orqe_start_rx(qp);
796 if (rv) {
797 pr_warn("siw: [QP %u]: ORQ empty at idx %d\n",
798 qp_id(qp), qp->orq_get % qp->attrs.orq_size);
799 goto error_term;
800 }
801 rv = siw_rresp_check_ntoh(srx, frx);
802 if (unlikely(rv)) {
803 siw_qp_event(qp, IB_EVENT_QP_FATAL);
804 return rv;
805 }
806 } else {
807 if (unlikely(wqe->wr_status != SIW_WR_INPROGRESS)) {
808 pr_warn("siw: [QP %u]: resume RRESP: status %d\n",
809 qp_id(qp), wqe->wr_status);
810 rv = -EPROTO;
811 goto error_term;
812 }
813 }
814 if (!srx->fpdu_part_rem) /* zero length RRESPONSE */
815 return 0;
816
817 sge = wqe->sqe.sge; /* there is only one */
818 mem = &wqe->mem[0];
819
820 if (!(*mem)) {
821 /*
822 * check target memory which resolves memory on first fragment
823 */
824 rv = siw_check_sge(qp->pd, sge, mem, IB_ACCESS_LOCAL_WRITE, 0,
825 wqe->bytes);
826 if (unlikely(rv)) {
827 siw_dbg_qp(qp, "target mem check: %d\n", rv);
828 wqe->wc_status = SIW_WC_LOC_PROT_ERR;
829
830 siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
831 DDP_ETYPE_TAGGED_BUF,
832 siw_tagged_error(-rv), 0);
833
834 siw_qp_event(qp, IB_EVENT_QP_ACCESS_ERR);
835
836 return -EINVAL;
837 }
838 }
839 mem_p = *mem;
840
841 bytes = min(srx->fpdu_part_rem, srx->skb_new);
842
843 if (mem_p->mem_obj == NULL)
844 rv = siw_rx_kva(srx, (void *)(sge->laddr + wqe->processed),
845 bytes);
846 else if (!mem_p->is_pbl)
847 rv = siw_rx_umem(srx, mem_p->umem, sge->laddr + wqe->processed,
848 bytes);
849 else
850 rv = siw_rx_pbl(srx, &frx->pbl_idx, mem_p,
851 sge->laddr + wqe->processed, bytes);
852 if (rv != bytes) {
853 wqe->wc_status = SIW_WC_GENERAL_ERR;
854 rv = -EINVAL;
855 goto error_term;
856 }
857 srx->fpdu_part_rem -= rv;
858 srx->fpdu_part_rcvd += rv;
859 wqe->processed += rv;
860
861 if (!srx->fpdu_part_rem) {
862 srx->ddp_to += srx->fpdu_part_rcvd;
863 return 0;
864 }
865 return -EAGAIN;
866
867error_term:
868 siw_init_terminate(qp, TERM_ERROR_LAYER_DDP, DDP_ETYPE_CATASTROPHIC,
869 DDP_ECODE_CATASTROPHIC, 0);
870 return rv;
871}
872
873int siw_proc_terminate(struct siw_qp *qp)
874{
875 struct siw_rx_stream *srx = &qp->rx_stream;
876 struct sk_buff *skb = srx->skb;
877 struct iwarp_terminate *term = &srx->hdr.terminate;
878 union iwarp_hdr term_info;
879 u8 *infop = (u8 *)&term_info;
880 enum rdma_opcode op;
881 u16 to_copy = sizeof(struct iwarp_ctrl);
882
883 pr_warn("siw: got TERMINATE. layer %d, type %d, code %d\n",
884 __rdmap_term_layer(term), __rdmap_term_etype(term),
885 __rdmap_term_ecode(term));
886
887 if (be32_to_cpu(term->ddp_qn) != RDMAP_UNTAGGED_QN_TERMINATE ||
888 be32_to_cpu(term->ddp_msn) !=
889 qp->rx_stream.ddp_msn[RDMAP_UNTAGGED_QN_TERMINATE] ||
890 be32_to_cpu(term->ddp_mo) != 0) {
891 pr_warn("siw: rx bogus TERM [QN x%08x, MSN x%08x, MO x%08x]\n",
892 be32_to_cpu(term->ddp_qn), be32_to_cpu(term->ddp_msn),
893 be32_to_cpu(term->ddp_mo));
894 return -ECONNRESET;
895 }
896 /*
897 * Receive remaining pieces of TERM if indicated
898 */
899 if (!term->flag_m)
900 return -ECONNRESET;
901
902 /* Do not take the effort to reassemble a network fragmented
903 * TERM message
904 */
905 if (srx->skb_new < sizeof(struct iwarp_ctrl_tagged))
906 return -ECONNRESET;
907
908 memset(infop, 0, sizeof(term_info));
909
910 skb_copy_bits(skb, srx->skb_offset, infop, to_copy);
911
912 op = __rdmap_get_opcode(&term_info.ctrl);
913 if (op >= RDMAP_TERMINATE)
914 goto out;
915
916 infop += to_copy;
917 srx->skb_offset += to_copy;
918 srx->skb_new -= to_copy;
919 srx->skb_copied += to_copy;
920 srx->fpdu_part_rcvd += to_copy;
921 srx->fpdu_part_rem -= to_copy;
922
923 to_copy = iwarp_pktinfo[op].hdr_len - to_copy;
924
925 /* Again, no network fragmented TERM's */
926 if (to_copy + MPA_CRC_SIZE > srx->skb_new)
927 return -ECONNRESET;
928
929 skb_copy_bits(skb, srx->skb_offset, infop, to_copy);
930
931 if (term->flag_r) {
932 siw_dbg_qp(qp, "TERM reports RDMAP hdr type %u, len %u (%s)\n",
933 op, be16_to_cpu(term_info.ctrl.mpa_len),
934 term->flag_m ? "valid" : "invalid");
935 } else if (term->flag_d) {
936 siw_dbg_qp(qp, "TERM reports DDP hdr type %u, len %u (%s)\n",
937 op, be16_to_cpu(term_info.ctrl.mpa_len),
938 term->flag_m ? "valid" : "invalid");
939 }
940out:
941 srx->skb_new -= to_copy;
942 srx->skb_offset += to_copy;
943 srx->skb_copied += to_copy;
944 srx->fpdu_part_rcvd += to_copy;
945 srx->fpdu_part_rem -= to_copy;
946
947 return -ECONNRESET;
948}
949
950static int siw_get_trailer(struct siw_qp *qp, struct siw_rx_stream *srx)
951{
952 struct sk_buff *skb = srx->skb;
953 u8 *tbuf = (u8 *)&srx->trailer.crc - srx->pad;
954 __wsum crc_in, crc_own = 0;
955
956 siw_dbg_qp(qp, "expected %d, available %d, pad %u\n",
957 srx->fpdu_part_rem, srx->skb_new, srx->pad);
958
959 if (srx->skb_new < srx->fpdu_part_rem)
960 return -EAGAIN;
961
962 skb_copy_bits(skb, srx->skb_offset, tbuf, srx->fpdu_part_rem);
963
964 if (srx->mpa_crc_hd && srx->pad)
965 crypto_shash_update(srx->mpa_crc_hd, tbuf, srx->pad);
966
967 srx->skb_new -= srx->fpdu_part_rem;
968 srx->skb_offset += srx->fpdu_part_rem;
969 srx->skb_copied += srx->fpdu_part_rem;
970
971 if (!srx->mpa_crc_hd)
972 return 0;
973
974 /*
975 * CRC32 is computed, transmitted and received directly in NBO,
976 * so there's never a reason to convert byte order.
977 */
978 crypto_shash_final(srx->mpa_crc_hd, (u8 *)&crc_own);
979 crc_in = (__force __wsum)srx->trailer.crc;
980
981 if (unlikely(crc_in != crc_own)) {
982 pr_warn("siw: crc error. in: %08x, own %08x, op %u\n",
983 crc_in, crc_own, qp->rx_stream.rdmap_op);
984
985 siw_init_terminate(qp, TERM_ERROR_LAYER_LLP,
986 LLP_ETYPE_MPA,
987 LLP_ECODE_RECEIVED_CRC, 0);
988 return -EINVAL;
989 }
990 return 0;
991}
992
993#define MIN_DDP_HDR sizeof(struct iwarp_ctrl_tagged)
994
995static int siw_get_hdr(struct siw_rx_stream *srx)
996{
997 struct sk_buff *skb = srx->skb;
998 struct siw_qp *qp = rx_qp(srx);
999 struct iwarp_ctrl *c_hdr = &srx->hdr.ctrl;
1000 struct siw_rx_fpdu *frx;
1001 u8 opcode;
1002 int bytes;
1003
1004 if (srx->fpdu_part_rcvd < MIN_DDP_HDR) {
1005 /*
1006 * copy a mimimum sized (tagged) DDP frame control part
1007 */
1008 bytes = min_t(int, srx->skb_new,
1009 MIN_DDP_HDR - srx->fpdu_part_rcvd);
1010
1011 skb_copy_bits(skb, srx->skb_offset,
1012 (char *)c_hdr + srx->fpdu_part_rcvd, bytes);
1013
1014 srx->fpdu_part_rcvd += bytes;
1015
1016 srx->skb_new -= bytes;
1017 srx->skb_offset += bytes;
1018 srx->skb_copied += bytes;
1019
1020 if (srx->fpdu_part_rcvd < MIN_DDP_HDR)
1021 return -EAGAIN;
1022
1023 if (unlikely(__ddp_get_version(c_hdr) != DDP_VERSION)) {
1024 enum ddp_etype etype;
1025 enum ddp_ecode ecode;
1026
1027 pr_warn("siw: received ddp version unsupported %d\n",
1028 __ddp_get_version(c_hdr));
1029
1030 if (c_hdr->ddp_rdmap_ctrl & DDP_FLAG_TAGGED) {
1031 etype = DDP_ETYPE_TAGGED_BUF;
1032 ecode = DDP_ECODE_T_VERSION;
1033 } else {
1034 etype = DDP_ETYPE_UNTAGGED_BUF;
1035 ecode = DDP_ECODE_UT_VERSION;
1036 }
1037 siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_DDP,
1038 etype, ecode, 0);
1039 return -EINVAL;
1040 }
1041 if (unlikely(__rdmap_get_version(c_hdr) != RDMAP_VERSION)) {
1042 pr_warn("siw: received rdmap version unsupported %d\n",
1043 __rdmap_get_version(c_hdr));
1044
1045 siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_RDMAP,
1046 RDMAP_ETYPE_REMOTE_OPERATION,
1047 RDMAP_ECODE_VERSION, 0);
1048 return -EINVAL;
1049 }
1050 opcode = __rdmap_get_opcode(c_hdr);
1051
1052 if (opcode > RDMAP_TERMINATE) {
1053 pr_warn("siw: received unknown packet type %u\n",
1054 opcode);
1055
1056 siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_RDMAP,
1057 RDMAP_ETYPE_REMOTE_OPERATION,
1058 RDMAP_ECODE_OPCODE, 0);
1059 return -EINVAL;
1060 }
1061 siw_dbg_qp(rx_qp(srx), "new header, opcode %u\n", opcode);
1062 } else {
1063 opcode = __rdmap_get_opcode(c_hdr);
1064 }
1065 set_rx_fpdu_context(qp, opcode);
1066 frx = qp->rx_fpdu;
1067
1068 /*
1069 * Figure out len of current hdr: variable length of
1070 * iwarp hdr may force us to copy hdr information in
1071 * two steps. Only tagged DDP messages are already
1072 * completely received.
1073 */
1074 if (iwarp_pktinfo[opcode].hdr_len > sizeof(struct iwarp_ctrl_tagged)) {
1075 bytes = iwarp_pktinfo[opcode].hdr_len - MIN_DDP_HDR;
1076
1077 if (srx->skb_new < bytes)
1078 return -EAGAIN;
1079
1080 skb_copy_bits(skb, srx->skb_offset,
1081 (char *)c_hdr + srx->fpdu_part_rcvd, bytes);
1082
1083 srx->fpdu_part_rcvd += bytes;
1084
1085 srx->skb_new -= bytes;
1086 srx->skb_offset += bytes;
1087 srx->skb_copied += bytes;
1088 }
1089
1090 /*
1091 * DDP/RDMAP header receive completed. Check if the current
1092 * DDP segment starts a new RDMAP message or continues a previously
1093 * started RDMAP message.
1094 *
1095 * Alternating reception of DDP segments (or FPDUs) from incomplete
1096 * tagged and untagged RDMAP messages is supported, as long as
1097 * the current tagged or untagged message gets eventually completed
1098 * w/o intersection from another message of the same type
1099 * (tagged/untagged). E.g., a WRITE can get intersected by a SEND,
1100 * but not by a READ RESPONSE etc.
1101 */
1102 if (srx->mpa_crc_hd) {
1103 /*
1104 * Restart CRC computation
1105 */
1106 crypto_shash_init(srx->mpa_crc_hd);
1107 crypto_shash_update(srx->mpa_crc_hd, (u8 *)c_hdr,
1108 srx->fpdu_part_rcvd);
1109 }
1110 if (frx->more_ddp_segs) {
1111 frx->first_ddp_seg = 0;
1112 if (frx->prev_rdmap_op != opcode) {
1113 pr_warn("siw: packet intersection: %u : %u\n",
1114 frx->prev_rdmap_op, opcode);
1115 /*
1116 * The last inbound RDMA operation of same type
1117 * (tagged or untagged) is left unfinished.
1118 * To complete it in error, make it the current
1119 * operation again, even with the header already
1120 * overwritten. For error handling, only the opcode
1121 * and current rx context are relevant.
1122 */
1123 set_rx_fpdu_context(qp, frx->prev_rdmap_op);
1124 __rdmap_set_opcode(c_hdr, frx->prev_rdmap_op);
1125 return -EPROTO;
1126 }
1127 } else {
1128 frx->prev_rdmap_op = opcode;
1129 frx->first_ddp_seg = 1;
1130 }
1131 frx->more_ddp_segs = c_hdr->ddp_rdmap_ctrl & DDP_FLAG_LAST ? 0 : 1;
1132
1133 return 0;
1134}
1135
1136static int siw_check_tx_fence(struct siw_qp *qp)
1137{
1138 struct siw_wqe *tx_waiting = tx_wqe(qp);
1139 struct siw_sqe *rreq;
1140 int resume_tx = 0, rv = 0;
1141 unsigned long flags;
1142
1143 spin_lock_irqsave(&qp->orq_lock, flags);
1144
1145 rreq = orq_get_current(qp);
1146
1147 /* free current orq entry */
1148 WRITE_ONCE(rreq->flags, 0);
1149
1150 if (qp->tx_ctx.orq_fence) {
1151 if (unlikely(tx_waiting->wr_status != SIW_WR_QUEUED)) {
1152 pr_warn("siw: [QP %u]: fence resume: bad status %d\n",
1153 qp_id(qp), tx_waiting->wr_status);
1154 rv = -EPROTO;
1155 goto out;
1156 }
1157 /* resume SQ processing */
1158 if (tx_waiting->sqe.opcode == SIW_OP_READ ||
1159 tx_waiting->sqe.opcode == SIW_OP_READ_LOCAL_INV) {
1160 rreq = orq_get_tail(qp);
1161 if (unlikely(!rreq)) {
1162 pr_warn("siw: [QP %u]: no ORQE\n", qp_id(qp));
1163 rv = -EPROTO;
1164 goto out;
1165 }
1166 siw_read_to_orq(rreq, &tx_waiting->sqe);
1167
1168 qp->orq_put++;
1169 qp->tx_ctx.orq_fence = 0;
1170 resume_tx = 1;
1171
1172 } else if (siw_orq_empty(qp)) {
1173 qp->tx_ctx.orq_fence = 0;
1174 resume_tx = 1;
1175 } else {
1176 pr_warn("siw: [QP %u]: fence resume: orq idx: %d:%d\n",
1177 qp_id(qp), qp->orq_get, qp->orq_put);
1178 rv = -EPROTO;
1179 }
1180 }
1181 qp->orq_get++;
1182out:
1183 spin_unlock_irqrestore(&qp->orq_lock, flags);
1184
1185 if (resume_tx)
1186 rv = siw_sq_start(qp);
1187
1188 return rv;
1189}
1190
1191/*
1192 * siw_rdmap_complete()
1193 *
1194 * Complete processing of an RDMA message after receiving all
1195 * DDP segmens or ABort processing after encountering error case.
1196 *
1197 * o SENDs + RRESPs will need for completion,
1198 * o RREQs need for READ RESPONSE initialization
1199 * o WRITEs need memory dereferencing
1200 *
1201 * TODO: Failed WRITEs need local error to be surfaced.
1202 */
1203static int siw_rdmap_complete(struct siw_qp *qp, int error)
1204{
1205 struct siw_rx_stream *srx = &qp->rx_stream;
1206 struct siw_wqe *wqe = rx_wqe(qp->rx_fpdu);
1207 enum siw_wc_status wc_status = wqe->wc_status;
1208 u8 opcode = __rdmap_get_opcode(&srx->hdr.ctrl);
1209 int rv = 0;
1210
1211 switch (opcode) {
1212 case RDMAP_SEND_SE:
1213 case RDMAP_SEND_SE_INVAL:
1214 wqe->rqe.flags |= SIW_WQE_SOLICITED;
cea743f2
GS
1215 /* Fall through */
1216
8b6a361b
BM
1217 case RDMAP_SEND:
1218 case RDMAP_SEND_INVAL:
1219 if (wqe->wr_status == SIW_WR_IDLE)
1220 break;
1221
1222 srx->ddp_msn[RDMAP_UNTAGGED_QN_SEND]++;
1223
1224 if (error != 0 && wc_status == SIW_WC_SUCCESS)
1225 wc_status = SIW_WC_GENERAL_ERR;
1226 /*
1227 * Handle STag invalidation request
1228 */
1229 if (wc_status == SIW_WC_SUCCESS &&
1230 (opcode == RDMAP_SEND_INVAL ||
1231 opcode == RDMAP_SEND_SE_INVAL)) {
1232 rv = siw_invalidate_stag(qp->pd, srx->inval_stag);
1233 if (rv) {
1234 siw_init_terminate(
1235 qp, TERM_ERROR_LAYER_RDMAP,
1236 rv == -EACCES ?
1237 RDMAP_ETYPE_REMOTE_PROTECTION :
1238 RDMAP_ETYPE_REMOTE_OPERATION,
1239 RDMAP_ECODE_CANNOT_INVALIDATE, 0);
1240
1241 wc_status = SIW_WC_REM_INV_REQ_ERR;
1242 }
1243 rv = siw_rqe_complete(qp, &wqe->rqe, wqe->processed,
1244 rv ? 0 : srx->inval_stag,
1245 wc_status);
1246 } else {
1247 rv = siw_rqe_complete(qp, &wqe->rqe, wqe->processed,
1248 0, wc_status);
1249 }
1250 siw_wqe_put_mem(wqe, SIW_OP_RECEIVE);
1251 break;
1252
1253 case RDMAP_RDMA_READ_RESP:
1254 if (wqe->wr_status == SIW_WR_IDLE)
1255 break;
1256
1257 if (error != 0) {
1258 if ((srx->state == SIW_GET_HDR &&
1259 qp->rx_fpdu->first_ddp_seg) || error == -ENODATA)
1260 /* possible RREQ in ORQ left untouched */
1261 break;
1262
1263 if (wc_status == SIW_WC_SUCCESS)
1264 wc_status = SIW_WC_GENERAL_ERR;
1265 } else if (qp->kernel_verbs &&
1266 rx_type(wqe) == SIW_OP_READ_LOCAL_INV) {
1267 /*
1268 * Handle any STag invalidation request
1269 */
1270 rv = siw_invalidate_stag(qp->pd, wqe->sqe.sge[0].lkey);
1271 if (rv) {
1272 siw_init_terminate(qp, TERM_ERROR_LAYER_RDMAP,
1273 RDMAP_ETYPE_CATASTROPHIC,
1274 RDMAP_ECODE_UNSPECIFIED, 0);
1275
1276 if (wc_status == SIW_WC_SUCCESS) {
1277 wc_status = SIW_WC_GENERAL_ERR;
1278 error = rv;
1279 }
1280 }
1281 }
1282 /*
1283 * All errors turn the wqe into signalled.
1284 */
1285 if ((wqe->sqe.flags & SIW_WQE_SIGNALLED) || error != 0)
1286 rv = siw_sqe_complete(qp, &wqe->sqe, wqe->processed,
1287 wc_status);
1288 siw_wqe_put_mem(wqe, SIW_OP_READ);
1289
1290 if (!error)
1291 rv = siw_check_tx_fence(qp);
1292 else
1293 /* Disable current ORQ eleement */
1294 WRITE_ONCE(orq_get_current(qp)->flags, 0);
1295 break;
1296
1297 case RDMAP_RDMA_READ_REQ:
1298 if (!error) {
1299 rv = siw_init_rresp(qp, srx);
1300 srx->ddp_msn[RDMAP_UNTAGGED_QN_RDMA_READ]++;
1301 }
1302 break;
1303
1304 case RDMAP_RDMA_WRITE:
1305 if (wqe->wr_status == SIW_WR_IDLE)
1306 break;
1307
1308 /*
1309 * Free References from memory object if
1310 * attached to receive context (inbound WRITE).
1311 * While a zero-length WRITE is allowed,
1312 * no memory reference got created.
1313 */
1314 if (rx_mem(&qp->rx_tagged)) {
1315 siw_mem_put(rx_mem(&qp->rx_tagged));
1316 rx_mem(&qp->rx_tagged) = NULL;
1317 }
1318 break;
1319
1320 default:
1321 break;
1322 }
1323 wqe->wr_status = SIW_WR_IDLE;
1324
1325 return rv;
1326}
1327
1328/*
1329 * siw_tcp_rx_data()
1330 *
1331 * Main routine to consume inbound TCP payload
1332 *
1333 * @rd_desc: read descriptor
1334 * @skb: socket buffer
1335 * @off: offset in skb
1336 * @len: skb->len - offset : payload in skb
1337 */
1338int siw_tcp_rx_data(read_descriptor_t *rd_desc, struct sk_buff *skb,
1339 unsigned int off, size_t len)
1340{
1341 struct siw_qp *qp = rd_desc->arg.data;
1342 struct siw_rx_stream *srx = &qp->rx_stream;
1343 int rv;
1344
1345 srx->skb = skb;
1346 srx->skb_new = skb->len - off;
1347 srx->skb_offset = off;
1348 srx->skb_copied = 0;
1349
1350 siw_dbg_qp(qp, "new data, len %d\n", srx->skb_new);
1351
1352 while (srx->skb_new) {
1353 int run_completion = 1;
1354
1355 if (unlikely(srx->rx_suspend)) {
1356 /* Do not process any more data */
1357 srx->skb_copied += srx->skb_new;
1358 break;
1359 }
1360 switch (srx->state) {
1361 case SIW_GET_HDR:
1362 rv = siw_get_hdr(srx);
1363 if (!rv) {
1364 srx->fpdu_part_rem =
1365 be16_to_cpu(srx->hdr.ctrl.mpa_len) -
1366 srx->fpdu_part_rcvd + MPA_HDR_SIZE;
1367
1368 if (srx->fpdu_part_rem)
1369 srx->pad = -srx->fpdu_part_rem & 0x3;
1370 else
1371 srx->pad = 0;
1372
1373 srx->state = SIW_GET_DATA_START;
1374 srx->fpdu_part_rcvd = 0;
1375 }
1376 break;
1377
1378 case SIW_GET_DATA_MORE:
1379 /*
1380 * Another data fragment of the same DDP segment.
1381 * Setting first_ddp_seg = 0 avoids repeating
1382 * initializations that shall occur only once per
1383 * DDP segment.
1384 */
1385 qp->rx_fpdu->first_ddp_seg = 0;
1386 /* Fall through */
1387
1388 case SIW_GET_DATA_START:
1389 /*
1390 * Headers will be checked by the opcode-specific
1391 * data receive function below.
1392 */
1393 rv = iwarp_pktinfo[qp->rx_stream.rdmap_op].rx_data(qp);
1394 if (!rv) {
1395 int mpa_len =
1396 be16_to_cpu(srx->hdr.ctrl.mpa_len)
1397 + MPA_HDR_SIZE;
1398
1399 srx->fpdu_part_rem = (-mpa_len & 0x3)
1400 + MPA_CRC_SIZE;
1401 srx->fpdu_part_rcvd = 0;
1402 srx->state = SIW_GET_TRAILER;
1403 } else {
1404 if (unlikely(rv == -ECONNRESET))
1405 run_completion = 0;
1406 else
1407 srx->state = SIW_GET_DATA_MORE;
1408 }
1409 break;
1410
1411 case SIW_GET_TRAILER:
1412 /*
1413 * read CRC + any padding
1414 */
1415 rv = siw_get_trailer(qp, srx);
1416 if (likely(!rv)) {
1417 /*
1418 * FPDU completed.
1419 * complete RDMAP message if last fragment
1420 */
1421 srx->state = SIW_GET_HDR;
1422 srx->fpdu_part_rcvd = 0;
1423
1424 if (!(srx->hdr.ctrl.ddp_rdmap_ctrl &
1425 DDP_FLAG_LAST))
1426 /* more frags */
1427 break;
1428
1429 rv = siw_rdmap_complete(qp, 0);
1430 run_completion = 0;
1431 }
1432 break;
1433
1434 default:
1435 pr_warn("QP[%u]: RX out of state\n", qp_id(qp));
1436 rv = -EPROTO;
1437 run_completion = 0;
1438 }
1439 if (unlikely(rv != 0 && rv != -EAGAIN)) {
1440 if ((srx->state > SIW_GET_HDR ||
1441 qp->rx_fpdu->more_ddp_segs) && run_completion)
1442 siw_rdmap_complete(qp, rv);
1443
1444 siw_dbg_qp(qp, "rx error %d, rx state %d\n", rv,
1445 srx->state);
1446
1447 siw_qp_cm_drop(qp, 1);
1448
1449 break;
1450 }
1451 if (rv) {
1452 siw_dbg_qp(qp, "fpdu fragment, state %d, missing %d\n",
1453 srx->state, srx->fpdu_part_rem);
1454 break;
1455 }
1456 }
1457 return srx->skb_copied;
1458}