IB/hfi1: Integrate OPFN into RC transactions
[linux-2.6-block.git] / drivers / infiniband / hw / hfi1 / rc.c
CommitLineData
77241056 1/*
2e2ba09e 2 * Copyright(c) 2015 - 2018 Intel Corporation.
77241056
MM
3 *
4 * This file is provided under a dual BSD/GPLv2 license. When using or
5 * redistributing this file, you may do so under either license.
6 *
7 * GPL LICENSE SUMMARY
8 *
77241056
MM
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of version 2 of the GNU General Public License as
11 * published by the Free Software Foundation.
12 *
13 * This program is distributed in the hope that it will be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * General Public License for more details.
17 *
18 * BSD LICENSE
19 *
77241056
MM
20 * Redistribution and use in source and binary forms, with or without
21 * modification, are permitted provided that the following conditions
22 * are met:
23 *
24 * - Redistributions of source code must retain the above copyright
25 * notice, this list of conditions and the following disclaimer.
26 * - Redistributions in binary form must reproduce the above copyright
27 * notice, this list of conditions and the following disclaimer in
28 * the documentation and/or other materials provided with the
29 * distribution.
30 * - Neither the name of Intel Corporation nor the names of its
31 * contributors may be used to endorse or promote products derived
32 * from this software without specific prior written permission.
33 *
34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
35 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
38 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
41 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45 *
46 */
47
48#include <linux/io.h>
ec4274f1
DD
49#include <rdma/rdma_vt.h>
50#include <rdma/rdmavt_qp.h>
77241056
MM
51
52#include "hfi.h"
53#include "qp.h"
bb5df5f9 54#include "verbs_txreq.h"
77241056
MM
55#include "trace.h"
56
57/* cut down ridiculously long IB macro names */
b374e060 58#define OP(x) RC_OP(x)
77241056 59
48a615dc
KW
60static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
61 struct rvt_swqe *wqe,
62 struct hfi1_ibport *ibp);
63
895420dd 64static u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe,
77241056
MM
65 u32 psn, u32 pmtu)
66{
67 u32 len;
68
69 len = delta_psn(psn, wqe->psn) * pmtu;
70 ss->sge = wqe->sg_list[0];
71 ss->sg_list = wqe->sg_list + 1;
72 ss->num_sge = wqe->wr.num_sge;
73 ss->total_len = wqe->length;
1198fcea 74 rvt_skip_sge(ss, len, false);
77241056
MM
75 return wqe->length - len;
76}
77
77241056
MM
78/**
79 * make_rc_ack - construct a response packet (ACK, NAK, or RDMA read)
80 * @dev: the device for this QP
81 * @qp: a pointer to the QP
82 * @ohdr: a pointer to the IB header being constructed
bb5df5f9 83 * @ps: the xmit packet state
77241056
MM
84 *
85 * Return 1 if constructed; otherwise, return 0.
86 * Note that we are in the responder's side of the QP context.
87 * Note the QP s_lock must be held.
88 */
895420dd 89static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
261a4351 90 struct ib_other_headers *ohdr,
bb5df5f9 91 struct hfi1_pkt_state *ps)
77241056 92{
895420dd 93 struct rvt_ack_entry *e;
77241056
MM
94 u32 hwords;
95 u32 len;
44e43d91
MH
96 u32 bth0, bth2;
97 u32 bth1 = qp->remote_qpn | (HFI1_CAP_IS_KSET(OPFN) << IB_BTHE_E_SHIFT);
77241056 98 int middle = 0;
1235bef8 99 u32 pmtu = qp->pmtu;
14553ca1 100 struct hfi1_qp_priv *priv = qp->priv;
77241056 101
68e78b3d 102 lockdep_assert_held(&qp->s_lock);
77241056 103 /* Don't send an ACK if we aren't supposed to. */
83693bd1 104 if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
77241056
MM
105 goto bail;
106
5b6cabb0
DH
107 if (priv->hdr_type == HFI1_PKT_TYPE_9B)
108 /* header size in 32-bit words LRH+BTH = (8+12)/4. */
109 hwords = 5;
110 else
111 /* header size in 32-bit words 16B LRH+BTH = (16+12)/4. */
112 hwords = 7;
77241056
MM
113
114 switch (qp->s_ack_state) {
115 case OP(RDMA_READ_RESPONSE_LAST):
116 case OP(RDMA_READ_RESPONSE_ONLY):
117 e = &qp->s_ack_queue[qp->s_tail_ack_queue];
118 if (e->rdma_sge.mr) {
895420dd 119 rvt_put_mr(e->rdma_sge.mr);
77241056
MM
120 e->rdma_sge.mr = NULL;
121 }
122 /* FALLTHROUGH */
123 case OP(ATOMIC_ACKNOWLEDGE):
124 /*
125 * We can increment the tail pointer now that the last
126 * response has been sent instead of only being
127 * constructed.
128 */
ddf922c3
KW
129 if (++qp->s_tail_ack_queue >
130 rvt_size_atomic(ib_to_rvt(qp->ibqp.device)))
77241056
MM
131 qp->s_tail_ack_queue = 0;
132 /* FALLTHROUGH */
133 case OP(SEND_ONLY):
134 case OP(ACKNOWLEDGE):
135 /* Check for no next entry in the queue. */
136 if (qp->r_head_ack_queue == qp->s_tail_ack_queue) {
54d10c1e 137 if (qp->s_flags & RVT_S_ACK_PENDING)
77241056
MM
138 goto normal;
139 goto bail;
140 }
141
142 e = &qp->s_ack_queue[qp->s_tail_ack_queue];
143 if (e->opcode == OP(RDMA_READ_REQUEST)) {
144 /*
145 * If a RDMA read response is being resent and
146 * we haven't seen the duplicate request yet,
147 * then stop sending the remaining responses the
148 * responder has seen until the requester re-sends it.
149 */
150 len = e->rdma_sge.sge_length;
151 if (len && !e->rdma_sge.mr) {
152 qp->s_tail_ack_queue = qp->r_head_ack_queue;
153 goto bail;
154 }
155 /* Copy SGE state in case we need to resend */
c239a5b5
MM
156 ps->s_txreq->mr = e->rdma_sge.mr;
157 if (ps->s_txreq->mr)
158 rvt_get_mr(ps->s_txreq->mr);
77241056
MM
159 qp->s_ack_rdma_sge.sge = e->rdma_sge;
160 qp->s_ack_rdma_sge.num_sge = 1;
b777f154 161 ps->s_txreq->ss = &qp->s_ack_rdma_sge;
77241056
MM
162 if (len > pmtu) {
163 len = pmtu;
164 qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST);
165 } else {
166 qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY);
167 e->sent = 1;
168 }
696513e8 169 ohdr->u.aeth = rvt_compute_aeth(qp);
77241056
MM
170 hwords++;
171 qp->s_ack_rdma_psn = e->psn;
172 bth2 = mask_psn(qp->s_ack_rdma_psn++);
173 } else {
174 /* COMPARE_SWAP or FETCH_ADD */
b777f154 175 ps->s_txreq->ss = NULL;
77241056
MM
176 len = 0;
177 qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE);
696513e8 178 ohdr->u.at.aeth = rvt_compute_aeth(qp);
261a4351 179 ib_u64_put(e->atomic_data, &ohdr->u.at.atomic_ack_eth);
77241056
MM
180 hwords += sizeof(ohdr->u.at) / sizeof(u32);
181 bth2 = mask_psn(e->psn);
182 e->sent = 1;
183 }
184 bth0 = qp->s_ack_state << 24;
185 break;
186
187 case OP(RDMA_READ_RESPONSE_FIRST):
188 qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE);
189 /* FALLTHROUGH */
190 case OP(RDMA_READ_RESPONSE_MIDDLE):
b777f154 191 ps->s_txreq->ss = &qp->s_ack_rdma_sge;
c239a5b5
MM
192 ps->s_txreq->mr = qp->s_ack_rdma_sge.sge.mr;
193 if (ps->s_txreq->mr)
194 rvt_get_mr(ps->s_txreq->mr);
77241056
MM
195 len = qp->s_ack_rdma_sge.sge.sge_length;
196 if (len > pmtu) {
197 len = pmtu;
198 middle = HFI1_CAP_IS_KSET(SDMA_AHG);
199 } else {
696513e8 200 ohdr->u.aeth = rvt_compute_aeth(qp);
77241056
MM
201 hwords++;
202 qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
203 e = &qp->s_ack_queue[qp->s_tail_ack_queue];
204 e->sent = 1;
205 }
206 bth0 = qp->s_ack_state << 24;
207 bth2 = mask_psn(qp->s_ack_rdma_psn++);
208 break;
209
210 default:
211normal:
212 /*
213 * Send a regular ACK.
214 * Set the s_ack_state so we wait until after sending
215 * the ACK before setting s_ack_state to ACKNOWLEDGE
216 * (see above).
217 */
218 qp->s_ack_state = OP(SEND_ONLY);
54d10c1e 219 qp->s_flags &= ~RVT_S_ACK_PENDING;
b777f154 220 ps->s_txreq->ss = NULL;
77241056
MM
221 if (qp->s_nak_state)
222 ohdr->u.aeth =
832666c1 223 cpu_to_be32((qp->r_msn & IB_MSN_MASK) |
77241056 224 (qp->s_nak_state <<
832666c1 225 IB_AETH_CREDIT_SHIFT));
77241056 226 else
696513e8 227 ohdr->u.aeth = rvt_compute_aeth(qp);
77241056
MM
228 hwords++;
229 len = 0;
230 bth0 = OP(ACKNOWLEDGE) << 24;
231 bth2 = mask_psn(qp->s_ack_psn);
232 }
233 qp->s_rdma_ack_cnt++;
14553ca1 234 ps->s_txreq->sde = priv->s_sde;
e922ae06 235 ps->s_txreq->s_cur_size = len;
9636258f 236 ps->s_txreq->hdr_dwords = hwords;
44e43d91 237 hfi1_make_ruc_header(qp, ohdr, bth0, bth1, bth2, middle, ps);
77241056
MM
238 return 1;
239
240bail:
241 qp->s_ack_state = OP(ACKNOWLEDGE);
242 /*
243 * Ensure s_rdma_ack_cnt changes are committed prior to resetting
54d10c1e 244 * RVT_S_RESP_PENDING
77241056
MM
245 */
246 smp_wmb();
54d10c1e
DD
247 qp->s_flags &= ~(RVT_S_RESP_PENDING
248 | RVT_S_ACK_PENDING
2e2ba09e 249 | HFI1_S_AHG_VALID);
77241056
MM
250 return 0;
251}
252
253/**
254 * hfi1_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC)
255 * @qp: a pointer to the QP
256 *
46a80d62
MM
257 * Assumes s_lock is held.
258 *
77241056
MM
259 * Return 1 if constructed; otherwise, return 0.
260 */
bb5df5f9 261int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
77241056 262{
4c6829c5 263 struct hfi1_qp_priv *priv = qp->priv;
77241056 264 struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
261a4351 265 struct ib_other_headers *ohdr;
895420dd
DD
266 struct rvt_sge_state *ss;
267 struct rvt_swqe *wqe;
5b6cabb0 268 u32 hwords;
77241056 269 u32 len;
44e43d91
MH
270 u32 bth0 = 0, bth2;
271 u32 bth1 = qp->remote_qpn | (HFI1_CAP_IS_KSET(OPFN) << IB_BTHE_E_SHIFT);
77241056
MM
272 u32 pmtu = qp->pmtu;
273 char newreq;
77241056
MM
274 int middle = 0;
275 int delta;
276
68e78b3d 277 lockdep_assert_held(&qp->s_lock);
bb5df5f9 278 ps->s_txreq = get_txreq(ps->dev, qp);
b697d7d8 279 if (!ps->s_txreq)
bb5df5f9
DD
280 goto bail_no_tx;
281
5b6cabb0
DH
282 if (priv->hdr_type == HFI1_PKT_TYPE_9B) {
283 /* header size in 32-bit words LRH+BTH = (8+12)/4. */
284 hwords = 5;
285 if (rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH)
286 ohdr = &ps->s_txreq->phdr.hdr.ibh.u.l.oth;
287 else
288 ohdr = &ps->s_txreq->phdr.hdr.ibh.u.oth;
289 } else {
290 /* header size in 32-bit words 16B LRH+BTH = (16+12)/4. */
291 hwords = 7;
292 if ((rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH) &&
293 (hfi1_check_mcast(rdma_ah_get_dlid(&qp->remote_ah_attr))))
294 ohdr = &ps->s_txreq->phdr.hdr.opah.u.l.oth;
295 else
296 ohdr = &ps->s_txreq->phdr.hdr.opah.u.oth;
297 }
77241056 298
77241056 299 /* Sending responses has higher priority over sending requests. */
54d10c1e 300 if ((qp->s_flags & RVT_S_RESP_PENDING) &&
1235bef8 301 make_rc_ack(dev, qp, ohdr, ps))
bb5df5f9 302 return 1;
77241056 303
83693bd1
DD
304 if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK)) {
305 if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND))
77241056
MM
306 goto bail;
307 /* We are in the error state, flush the work request. */
eb04ff09 308 if (qp->s_last == READ_ONCE(qp->s_head))
77241056
MM
309 goto bail;
310 /* If DMAs are in progress, we can't flush immediately. */
14553ca1 311 if (iowait_sdma_pending(&priv->s_iowait)) {
54d10c1e 312 qp->s_flags |= RVT_S_WAIT_DMA;
77241056
MM
313 goto bail;
314 }
315 clear_ahg(qp);
83693bd1 316 wqe = rvt_get_swqe_ptr(qp, qp->s_last);
116aa033 317 rvt_send_complete(qp, wqe, qp->s_last != qp->s_acked ?
77241056
MM
318 IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR);
319 /* will get called again */
bb5df5f9 320 goto done_free_tx;
77241056
MM
321 }
322
54d10c1e 323 if (qp->s_flags & (RVT_S_WAIT_RNR | RVT_S_WAIT_ACK))
77241056
MM
324 goto bail;
325
326 if (cmp_psn(qp->s_psn, qp->s_sending_hpsn) <= 0) {
327 if (cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0) {
54d10c1e 328 qp->s_flags |= RVT_S_WAIT_PSN;
77241056
MM
329 goto bail;
330 }
331 qp->s_sending_psn = qp->s_psn;
332 qp->s_sending_hpsn = qp->s_psn - 1;
333 }
334
335 /* Send a request. */
83693bd1 336 wqe = rvt_get_swqe_ptr(qp, qp->s_cur);
77241056
MM
337 switch (qp->s_state) {
338 default:
83693bd1 339 if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_NEXT_SEND_OK))
77241056
MM
340 goto bail;
341 /*
342 * Resend an old request or start a new one.
343 *
344 * We keep track of the current SWQE so that
345 * we don't reset the "furthest progress" state
346 * if we need to back up.
347 */
348 newreq = 0;
349 if (qp->s_cur == qp->s_tail) {
350 /* Check if send work queue is empty. */
eb04ff09 351 if (qp->s_tail == READ_ONCE(qp->s_head)) {
77241056
MM
352 clear_ahg(qp);
353 goto bail;
354 }
355 /*
356 * If a fence is requested, wait for previous
357 * RDMA read and atomic operations to finish.
358 */
359 if ((wqe->wr.send_flags & IB_SEND_FENCE) &&
360 qp->s_num_rd_atomic) {
54d10c1e 361 qp->s_flags |= RVT_S_WAIT_FENCE;
77241056
MM
362 goto bail;
363 }
0db3dfa0
JX
364 /*
365 * Local operations are processed immediately
366 * after all prior requests have completed
367 */
368 if (wqe->wr.opcode == IB_WR_REG_MR ||
369 wqe->wr.opcode == IB_WR_LOCAL_INV) {
d9b13c20
JX
370 int local_ops = 0;
371 int err = 0;
372
0db3dfa0
JX
373 if (qp->s_last != qp->s_cur)
374 goto bail;
375 if (++qp->s_cur == qp->s_size)
376 qp->s_cur = 0;
377 if (++qp->s_tail == qp->s_size)
378 qp->s_tail = 0;
d9b13c20
JX
379 if (!(wqe->wr.send_flags &
380 RVT_SEND_COMPLETION_ONLY)) {
0db3dfa0
JX
381 err = rvt_invalidate_rkey(
382 qp,
383 wqe->wr.ex.invalidate_rkey);
d9b13c20
JX
384 local_ops = 1;
385 }
116aa033
VSD
386 rvt_send_complete(qp, wqe,
387 err ? IB_WC_LOC_PROT_ERR
388 : IB_WC_SUCCESS);
d9b13c20
JX
389 if (local_ops)
390 atomic_dec(&qp->local_ops_pending);
0db3dfa0
JX
391 goto done_free_tx;
392 }
393
77241056 394 newreq = 1;
46a80d62 395 qp->s_psn = wqe->psn;
77241056
MM
396 }
397 /*
398 * Note that we have to be careful not to modify the
399 * original work request since we may need to resend
400 * it.
401 */
402 len = wqe->length;
403 ss = &qp->s_sge;
404 bth2 = mask_psn(qp->s_psn);
405 switch (wqe->wr.opcode) {
406 case IB_WR_SEND:
407 case IB_WR_SEND_WITH_IMM:
0db3dfa0 408 case IB_WR_SEND_WITH_INV:
77241056 409 /* If no credit, return. */
54d10c1e 410 if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) &&
696513e8 411 rvt_cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) {
54d10c1e 412 qp->s_flags |= RVT_S_WAIT_SSN_CREDIT;
77241056
MM
413 goto bail;
414 }
77241056 415 if (len > pmtu) {
77241056
MM
416 qp->s_state = OP(SEND_FIRST);
417 len = pmtu;
418 break;
419 }
e490974e 420 if (wqe->wr.opcode == IB_WR_SEND) {
77241056 421 qp->s_state = OP(SEND_ONLY);
0db3dfa0 422 } else if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
77241056
MM
423 qp->s_state = OP(SEND_ONLY_WITH_IMMEDIATE);
424 /* Immediate data comes after the BTH */
425 ohdr->u.imm_data = wqe->wr.ex.imm_data;
426 hwords += 1;
0db3dfa0
JX
427 } else {
428 qp->s_state = OP(SEND_ONLY_WITH_INVALIDATE);
429 /* Invalidate rkey comes after the BTH */
430 ohdr->u.ieth = cpu_to_be32(
431 wqe->wr.ex.invalidate_rkey);
432 hwords += 1;
77241056
MM
433 }
434 if (wqe->wr.send_flags & IB_SEND_SOLICITED)
435 bth0 |= IB_BTH_SOLICITED;
436 bth2 |= IB_BTH_REQ_ACK;
437 if (++qp->s_cur == qp->s_size)
438 qp->s_cur = 0;
439 break;
440
441 case IB_WR_RDMA_WRITE:
54d10c1e 442 if (newreq && !(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
77241056 443 qp->s_lsn++;
5b0ef650 444 goto no_flow_control;
77241056
MM
445 case IB_WR_RDMA_WRITE_WITH_IMM:
446 /* If no credit, return. */
54d10c1e 447 if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) &&
696513e8 448 rvt_cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) {
54d10c1e 449 qp->s_flags |= RVT_S_WAIT_SSN_CREDIT;
77241056
MM
450 goto bail;
451 }
5b0ef650 452no_flow_control:
261a4351
MM
453 put_ib_reth_vaddr(
454 wqe->rdma_wr.remote_addr,
455 &ohdr->u.rc.reth);
77241056 456 ohdr->u.rc.reth.rkey =
e622f2f4 457 cpu_to_be32(wqe->rdma_wr.rkey);
77241056
MM
458 ohdr->u.rc.reth.length = cpu_to_be32(len);
459 hwords += sizeof(struct ib_reth) / sizeof(u32);
77241056 460 if (len > pmtu) {
77241056
MM
461 qp->s_state = OP(RDMA_WRITE_FIRST);
462 len = pmtu;
463 break;
464 }
e490974e 465 if (wqe->wr.opcode == IB_WR_RDMA_WRITE) {
77241056 466 qp->s_state = OP(RDMA_WRITE_ONLY);
e490974e 467 } else {
77241056
MM
468 qp->s_state =
469 OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
470 /* Immediate data comes after RETH */
471 ohdr->u.rc.imm_data = wqe->wr.ex.imm_data;
472 hwords += 1;
473 if (wqe->wr.send_flags & IB_SEND_SOLICITED)
474 bth0 |= IB_BTH_SOLICITED;
475 }
476 bth2 |= IB_BTH_REQ_ACK;
477 if (++qp->s_cur == qp->s_size)
478 qp->s_cur = 0;
479 break;
480
481 case IB_WR_RDMA_READ:
482 /*
483 * Don't allow more operations to be started
484 * than the QP limits allow.
485 */
486 if (newreq) {
487 if (qp->s_num_rd_atomic >=
488 qp->s_max_rd_atomic) {
54d10c1e 489 qp->s_flags |= RVT_S_WAIT_RDMAR;
77241056
MM
490 goto bail;
491 }
492 qp->s_num_rd_atomic++;
54d10c1e 493 if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
77241056 494 qp->s_lsn++;
77241056 495 }
261a4351
MM
496 put_ib_reth_vaddr(
497 wqe->rdma_wr.remote_addr,
498 &ohdr->u.rc.reth);
77241056 499 ohdr->u.rc.reth.rkey =
e622f2f4 500 cpu_to_be32(wqe->rdma_wr.rkey);
77241056
MM
501 ohdr->u.rc.reth.length = cpu_to_be32(len);
502 qp->s_state = OP(RDMA_READ_REQUEST);
503 hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
504 ss = NULL;
505 len = 0;
506 bth2 |= IB_BTH_REQ_ACK;
507 if (++qp->s_cur == qp->s_size)
508 qp->s_cur = 0;
509 break;
510
511 case IB_WR_ATOMIC_CMP_AND_SWP:
512 case IB_WR_ATOMIC_FETCH_AND_ADD:
513 /*
514 * Don't allow more operations to be started
515 * than the QP limits allow.
516 */
517 if (newreq) {
518 if (qp->s_num_rd_atomic >=
519 qp->s_max_rd_atomic) {
54d10c1e 520 qp->s_flags |= RVT_S_WAIT_RDMAR;
77241056
MM
521 goto bail;
522 }
523 qp->s_num_rd_atomic++;
77241056 524 }
48a615dc
KW
525
526 /* FALLTHROUGH */
527 case IB_WR_OPFN:
528 if (newreq && !(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
529 qp->s_lsn++;
530 if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
531 wqe->wr.opcode == IB_WR_OPFN) {
77241056 532 qp->s_state = OP(COMPARE_SWAP);
261a4351
MM
533 put_ib_ateth_swap(wqe->atomic_wr.swap,
534 &ohdr->u.atomic_eth);
535 put_ib_ateth_compare(wqe->atomic_wr.compare_add,
536 &ohdr->u.atomic_eth);
77241056
MM
537 } else {
538 qp->s_state = OP(FETCH_ADD);
261a4351
MM
539 put_ib_ateth_swap(wqe->atomic_wr.compare_add,
540 &ohdr->u.atomic_eth);
541 put_ib_ateth_compare(0, &ohdr->u.atomic_eth);
77241056 542 }
261a4351
MM
543 put_ib_ateth_vaddr(wqe->atomic_wr.remote_addr,
544 &ohdr->u.atomic_eth);
77241056 545 ohdr->u.atomic_eth.rkey = cpu_to_be32(
e622f2f4 546 wqe->atomic_wr.rkey);
77241056
MM
547 hwords += sizeof(struct ib_atomic_eth) / sizeof(u32);
548 ss = NULL;
549 len = 0;
550 bth2 |= IB_BTH_REQ_ACK;
551 if (++qp->s_cur == qp->s_size)
552 qp->s_cur = 0;
553 break;
554
555 default:
556 goto bail;
557 }
558 qp->s_sge.sge = wqe->sg_list[0];
559 qp->s_sge.sg_list = wqe->sg_list + 1;
560 qp->s_sge.num_sge = wqe->wr.num_sge;
561 qp->s_sge.total_len = wqe->length;
562 qp->s_len = wqe->length;
563 if (newreq) {
564 qp->s_tail++;
565 if (qp->s_tail >= qp->s_size)
566 qp->s_tail = 0;
567 }
568 if (wqe->wr.opcode == IB_WR_RDMA_READ)
569 qp->s_psn = wqe->lpsn + 1;
46a80d62 570 else
77241056 571 qp->s_psn++;
77241056
MM
572 break;
573
574 case OP(RDMA_READ_RESPONSE_FIRST):
575 /*
576 * qp->s_state is normally set to the opcode of the
577 * last packet constructed for new requests and therefore
578 * is never set to RDMA read response.
579 * RDMA_READ_RESPONSE_FIRST is used by the ACK processing
580 * thread to indicate a SEND needs to be restarted from an
581 * earlier PSN without interfering with the sending thread.
582 * See restart_rc().
583 */
584 qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn, pmtu);
585 /* FALLTHROUGH */
586 case OP(SEND_FIRST):
587 qp->s_state = OP(SEND_MIDDLE);
588 /* FALLTHROUGH */
589 case OP(SEND_MIDDLE):
590 bth2 = mask_psn(qp->s_psn++);
77241056
MM
591 ss = &qp->s_sge;
592 len = qp->s_len;
593 if (len > pmtu) {
594 len = pmtu;
595 middle = HFI1_CAP_IS_KSET(SDMA_AHG);
596 break;
597 }
e490974e 598 if (wqe->wr.opcode == IB_WR_SEND) {
77241056 599 qp->s_state = OP(SEND_LAST);
0db3dfa0 600 } else if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
77241056
MM
601 qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE);
602 /* Immediate data comes after the BTH */
603 ohdr->u.imm_data = wqe->wr.ex.imm_data;
604 hwords += 1;
0db3dfa0
JX
605 } else {
606 qp->s_state = OP(SEND_LAST_WITH_INVALIDATE);
607 /* invalidate data comes after the BTH */
608 ohdr->u.ieth = cpu_to_be32(wqe->wr.ex.invalidate_rkey);
609 hwords += 1;
77241056
MM
610 }
611 if (wqe->wr.send_flags & IB_SEND_SOLICITED)
612 bth0 |= IB_BTH_SOLICITED;
613 bth2 |= IB_BTH_REQ_ACK;
614 qp->s_cur++;
615 if (qp->s_cur >= qp->s_size)
616 qp->s_cur = 0;
617 break;
618
619 case OP(RDMA_READ_RESPONSE_LAST):
620 /*
621 * qp->s_state is normally set to the opcode of the
622 * last packet constructed for new requests and therefore
623 * is never set to RDMA read response.
624 * RDMA_READ_RESPONSE_LAST is used by the ACK processing
625 * thread to indicate a RDMA write needs to be restarted from
626 * an earlier PSN without interfering with the sending thread.
627 * See restart_rc().
628 */
629 qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn, pmtu);
630 /* FALLTHROUGH */
631 case OP(RDMA_WRITE_FIRST):
632 qp->s_state = OP(RDMA_WRITE_MIDDLE);
633 /* FALLTHROUGH */
634 case OP(RDMA_WRITE_MIDDLE):
635 bth2 = mask_psn(qp->s_psn++);
77241056
MM
636 ss = &qp->s_sge;
637 len = qp->s_len;
638 if (len > pmtu) {
639 len = pmtu;
640 middle = HFI1_CAP_IS_KSET(SDMA_AHG);
641 break;
642 }
e490974e 643 if (wqe->wr.opcode == IB_WR_RDMA_WRITE) {
77241056 644 qp->s_state = OP(RDMA_WRITE_LAST);
e490974e 645 } else {
77241056
MM
646 qp->s_state = OP(RDMA_WRITE_LAST_WITH_IMMEDIATE);
647 /* Immediate data comes after the BTH */
648 ohdr->u.imm_data = wqe->wr.ex.imm_data;
649 hwords += 1;
650 if (wqe->wr.send_flags & IB_SEND_SOLICITED)
651 bth0 |= IB_BTH_SOLICITED;
652 }
653 bth2 |= IB_BTH_REQ_ACK;
654 qp->s_cur++;
655 if (qp->s_cur >= qp->s_size)
656 qp->s_cur = 0;
657 break;
658
659 case OP(RDMA_READ_RESPONSE_MIDDLE):
660 /*
661 * qp->s_state is normally set to the opcode of the
662 * last packet constructed for new requests and therefore
663 * is never set to RDMA read response.
664 * RDMA_READ_RESPONSE_MIDDLE is used by the ACK processing
665 * thread to indicate a RDMA read needs to be restarted from
666 * an earlier PSN without interfering with the sending thread.
667 * See restart_rc().
668 */
669 len = (delta_psn(qp->s_psn, wqe->psn)) * pmtu;
261a4351
MM
670 put_ib_reth_vaddr(
671 wqe->rdma_wr.remote_addr + len,
672 &ohdr->u.rc.reth);
77241056 673 ohdr->u.rc.reth.rkey =
e622f2f4 674 cpu_to_be32(wqe->rdma_wr.rkey);
77241056
MM
675 ohdr->u.rc.reth.length = cpu_to_be32(wqe->length - len);
676 qp->s_state = OP(RDMA_READ_REQUEST);
677 hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
678 bth2 = mask_psn(qp->s_psn) | IB_BTH_REQ_ACK;
679 qp->s_psn = wqe->lpsn + 1;
680 ss = NULL;
681 len = 0;
682 qp->s_cur++;
683 if (qp->s_cur == qp->s_size)
684 qp->s_cur = 0;
685 break;
686 }
687 qp->s_sending_hpsn = bth2;
688 delta = delta_psn(bth2, wqe->psn);
689 if (delta && delta % HFI1_PSN_CREDIT == 0)
690 bth2 |= IB_BTH_REQ_ACK;
54d10c1e
DD
691 if (qp->s_flags & RVT_S_SEND_ONE) {
692 qp->s_flags &= ~RVT_S_SEND_ONE;
693 qp->s_flags |= RVT_S_WAIT_ACK;
77241056
MM
694 bth2 |= IB_BTH_REQ_ACK;
695 }
696 qp->s_len -= len;
9636258f 697 ps->s_txreq->hdr_dwords = hwords;
14553ca1 698 ps->s_txreq->sde = priv->s_sde;
b777f154 699 ps->s_txreq->ss = ss;
e922ae06 700 ps->s_txreq->s_cur_size = len;
77241056
MM
701 hfi1_make_ruc_header(
702 qp,
703 ohdr,
704 bth0 | (qp->s_state << 24),
44e43d91 705 bth1,
77241056 706 bth2,
bb5df5f9
DD
707 middle,
708 ps);
709 return 1;
710
711done_free_tx:
712 hfi1_put_txreq(ps->s_txreq);
713 ps->s_txreq = NULL;
46a80d62 714 return 1;
bb5df5f9 715
77241056 716bail:
bb5df5f9
DD
717 hfi1_put_txreq(ps->s_txreq);
718
719bail_no_tx:
720 ps->s_txreq = NULL;
54d10c1e 721 qp->s_flags &= ~RVT_S_BUSY;
bb5df5f9 722 return 0;
77241056
MM
723}
724
5b6cabb0
DH
725static inline void hfi1_make_bth_aeth(struct rvt_qp *qp,
726 struct ib_other_headers *ohdr,
727 u32 bth0, u32 bth1)
728{
729 if (qp->r_nak_state)
730 ohdr->u.aeth = cpu_to_be32((qp->r_msn & IB_MSN_MASK) |
731 (qp->r_nak_state <<
732 IB_AETH_CREDIT_SHIFT));
733 else
734 ohdr->u.aeth = rvt_compute_aeth(qp);
735
736 ohdr->bth[0] = cpu_to_be32(bth0);
737 ohdr->bth[1] = cpu_to_be32(bth1 | qp->remote_qpn);
738 ohdr->bth[2] = cpu_to_be32(mask_psn(qp->r_ack_psn));
739}
740
bdaf96f6 741static inline void hfi1_queue_rc_ack(struct hfi1_packet *packet, bool is_fecn)
5b6cabb0 742{
bdaf96f6
SS
743 struct rvt_qp *qp = packet->qp;
744 struct hfi1_ibport *ibp;
5b6cabb0
DH
745 unsigned long flags;
746
747 spin_lock_irqsave(&qp->s_lock, flags);
748 if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
749 goto unlock;
bdaf96f6 750 ibp = rcd_to_iport(packet->rcd);
5b6cabb0
DH
751 this_cpu_inc(*ibp->rvp.rc_qacks);
752 qp->s_flags |= RVT_S_ACK_PENDING | RVT_S_RESP_PENDING;
753 qp->s_nak_state = qp->r_nak_state;
754 qp->s_ack_psn = qp->r_ack_psn;
755 if (is_fecn)
756 qp->s_flags |= RVT_S_ECN;
757
758 /* Schedule the send tasklet. */
759 hfi1_schedule_send(qp);
760unlock:
761 spin_unlock_irqrestore(&qp->s_lock, flags);
762}
763
bdaf96f6 764static inline void hfi1_make_rc_ack_9B(struct hfi1_packet *packet,
5b6cabb0
DH
765 struct hfi1_opa_header *opa_hdr,
766 u8 sc5, bool is_fecn,
767 u64 *pbc_flags, u32 *hwords,
768 u32 *nwords)
769{
bdaf96f6
SS
770 struct rvt_qp *qp = packet->qp;
771 struct hfi1_ibport *ibp = rcd_to_iport(packet->rcd);
5b6cabb0
DH
772 struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
773 struct ib_header *hdr = &opa_hdr->ibh;
774 struct ib_other_headers *ohdr;
775 u16 lrh0 = HFI1_LRH_BTH;
776 u16 pkey;
777 u32 bth0, bth1;
778
779 opa_hdr->hdr_type = HFI1_PKT_TYPE_9B;
780 ohdr = &hdr->u.oth;
781 /* header size in 32-bit words LRH+BTH+AETH = (8+12+4)/4 */
782 *hwords = 6;
783
784 if (unlikely(rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH)) {
785 *hwords += hfi1_make_grh(ibp, &hdr->u.l.grh,
786 rdma_ah_read_grh(&qp->remote_ah_attr),
787 *hwords - 2, SIZE_OF_CRC);
788 ohdr = &hdr->u.l.oth;
789 lrh0 = HFI1_LRH_GRH;
790 }
791 /* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */
792 *pbc_flags |= ((!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT);
793
794 /* read pkey_index w/o lock (its atomic) */
795 pkey = hfi1_get_pkey(ibp, qp->s_pkey_index);
796
797 lrh0 |= (sc5 & IB_SC_MASK) << IB_SC_SHIFT |
798 (rdma_ah_get_sl(&qp->remote_ah_attr) & IB_SL_MASK) <<
799 IB_SL_SHIFT;
800
801 hfi1_make_ib_hdr(hdr, lrh0, *hwords + SIZE_OF_CRC,
802 opa_get_lid(rdma_ah_get_dlid(&qp->remote_ah_attr), 9B),
803 ppd->lid | rdma_ah_get_path_bits(&qp->remote_ah_attr));
804
805 bth0 = pkey | (OP(ACKNOWLEDGE) << 24);
806 if (qp->s_mig_state == IB_MIG_MIGRATED)
807 bth0 |= IB_BTH_MIG_REQ;
808 bth1 = (!!is_fecn) << IB_BECN_SHIFT;
44e43d91
MH
809 /*
810 * Inline ACKs go out without the use of the Verbs send engine, so
811 * we need to set the STL Verbs Extended bit here
812 */
813 bth1 |= HFI1_CAP_IS_KSET(OPFN) << IB_BTHE_E_SHIFT;
5b6cabb0
DH
814 hfi1_make_bth_aeth(qp, ohdr, bth0, bth1);
815}
816
bdaf96f6 817static inline void hfi1_make_rc_ack_16B(struct hfi1_packet *packet,
5b6cabb0
DH
818 struct hfi1_opa_header *opa_hdr,
819 u8 sc5, bool is_fecn,
820 u64 *pbc_flags, u32 *hwords,
821 u32 *nwords)
822{
bdaf96f6
SS
823 struct rvt_qp *qp = packet->qp;
824 struct hfi1_ibport *ibp = rcd_to_iport(packet->rcd);
5b6cabb0
DH
825 struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
826 struct hfi1_16b_header *hdr = &opa_hdr->opah;
827 struct ib_other_headers *ohdr;
8935780b 828 u32 bth0, bth1 = 0;
5b6cabb0 829 u16 len, pkey;
ca85bb1c 830 bool becn = is_fecn;
5b6cabb0
DH
831 u8 l4 = OPA_16B_L4_IB_LOCAL;
832 u8 extra_bytes;
833
834 opa_hdr->hdr_type = HFI1_PKT_TYPE_16B;
835 ohdr = &hdr->u.oth;
836 /* header size in 32-bit words 16B LRH+BTH+AETH = (16+12+4)/4 */
837 *hwords = 8;
838 extra_bytes = hfi1_get_16b_padding(*hwords << 2, 0);
839 *nwords = SIZE_OF_CRC + ((extra_bytes + SIZE_OF_LT) >> 2);
840
841 if (unlikely(rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH) &&
842 hfi1_check_mcast(rdma_ah_get_dlid(&qp->remote_ah_attr))) {
843 *hwords += hfi1_make_grh(ibp, &hdr->u.l.grh,
844 rdma_ah_read_grh(&qp->remote_ah_attr),
845 *hwords - 4, *nwords);
846 ohdr = &hdr->u.l.oth;
847 l4 = OPA_16B_L4_IB_GLOBAL;
848 }
849 *pbc_flags |= PBC_PACKET_BYPASS | PBC_INSERT_BYPASS_ICRC;
850
851 /* read pkey_index w/o lock (its atomic) */
852 pkey = hfi1_get_pkey(ibp, qp->s_pkey_index);
853
854 /* Convert dwords to flits */
855 len = (*hwords + *nwords) >> 1;
856
3cafad43
DH
857 hfi1_make_16b_hdr(hdr, ppd->lid |
858 (rdma_ah_get_path_bits(&qp->remote_ah_attr) &
859 ((1 << ppd->lmc) - 1)),
5b6cabb0 860 opa_get_lid(rdma_ah_get_dlid(&qp->remote_ah_attr),
3cafad43 861 16B), len, pkey, becn, 0, l4, sc5);
5b6cabb0
DH
862
863 bth0 = pkey | (OP(ACKNOWLEDGE) << 24);
864 bth0 |= extra_bytes << 20;
865 if (qp->s_mig_state == IB_MIG_MIGRATED)
866 bth1 = OPA_BTH_MIG_REQ;
867 hfi1_make_bth_aeth(qp, ohdr, bth0, bth1);
868}
869
bdaf96f6 870typedef void (*hfi1_make_rc_ack)(struct hfi1_packet *packet,
5b6cabb0
DH
871 struct hfi1_opa_header *opa_hdr,
872 u8 sc5, bool is_fecn,
873 u64 *pbc_flags, u32 *hwords,
874 u32 *nwords);
875
876/* We support only two types - 9B and 16B for now */
877static const hfi1_make_rc_ack hfi1_make_rc_ack_tbl[2] = {
878 [HFI1_PKT_TYPE_9B] = &hfi1_make_rc_ack_9B,
879 [HFI1_PKT_TYPE_16B] = &hfi1_make_rc_ack_16B
880};
881
77241056
MM
882/**
883 * hfi1_send_rc_ack - Construct an ACK packet and send it
884 * @qp: a pointer to the QP
885 *
886 * This is called from hfi1_rc_rcv() and handle_receive_interrupt().
887 * Note that RDMA reads and atomics are handled in the
ca00c62b 888 * send side QP state and send engine.
77241056 889 */
bdaf96f6 890void hfi1_send_rc_ack(struct hfi1_packet *packet, bool is_fecn)
77241056 891{
bdaf96f6
SS
892 struct hfi1_ctxtdata *rcd = packet->rcd;
893 struct rvt_qp *qp = packet->qp;
f3e862cb 894 struct hfi1_ibport *ibp = rcd_to_iport(rcd);
5b6cabb0 895 struct hfi1_qp_priv *priv = qp->priv;
77241056 896 struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
5b6cabb0 897 u8 sc5 = ibp->sl_to_sc[rdma_ah_get_sl(&qp->remote_ah_attr)];
77241056 898 u64 pbc, pbc_flags = 0;
5b6cabb0
DH
899 u32 hwords = 0;
900 u32 nwords = 0;
901 u32 plen;
77241056 902 struct pio_buf *pbuf;
5b6cabb0 903 struct hfi1_opa_header opa_hdr;
4fcf1de5
MM
904
905 /* clear the defer count */
688f21c0 906 qp->r_adefered = 0;
77241056
MM
907
908 /* Don't send ACK or NAK if a RDMA read or atomic is pending. */
5b6cabb0 909 if (qp->s_flags & RVT_S_RESP_PENDING) {
bdaf96f6 910 hfi1_queue_rc_ack(packet, is_fecn);
5b6cabb0
DH
911 return;
912 }
77241056
MM
913
914 /* Ensure s_rdma_ack_cnt changes are committed */
5b6cabb0 915 if (qp->s_rdma_ack_cnt) {
bdaf96f6 916 hfi1_queue_rc_ack(packet, is_fecn);
5b6cabb0 917 return;
77241056 918 }
77241056
MM
919
920 /* Don't try to send ACKs if the link isn't ACTIVE */
921 if (driver_lstate(ppd) != IB_PORT_ACTIVE)
922 return;
923
5b6cabb0 924 /* Make the appropriate header */
bdaf96f6 925 hfi1_make_rc_ack_tbl[priv->hdr_type](packet, &opa_hdr, sc5, is_fecn,
5b6cabb0 926 &pbc_flags, &hwords, &nwords);
77241056 927
5b6cabb0
DH
928 plen = 2 /* PBC */ + hwords + nwords;
929 pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps,
930 sc_to_vlt(ppd->dd, sc5), plen);
931 pbuf = sc_buffer_alloc(rcd->sc, plen, NULL, NULL);
77241056
MM
932 if (!pbuf) {
933 /*
934 * We have no room to send at the moment. Pass
ca00c62b 935 * responsibility for sending the ACK to the send engine
77241056
MM
936 * so that when enough buffer space becomes available,
937 * the ACK is sent ahead of other outgoing packets.
938 */
bdaf96f6 939 hfi1_queue_rc_ack(packet, is_fecn);
5b6cabb0 940 return;
77241056 941 }
228d2af1 942 trace_ack_output_ibhdr(dd_from_ibdev(qp->ibqp.device),
5b6cabb0 943 &opa_hdr, ib_is_sc5(sc5));
77241056
MM
944
945 /* write the pbc and data */
5b6cabb0
DH
946 ppd->dd->pio_inline_send(ppd->dd, pbuf, pbc,
947 (priv->hdr_type == HFI1_PKT_TYPE_9B ?
948 (void *)&opa_hdr.ibh :
949 (void *)&opa_hdr.opah), hwords);
77241056 950 return;
77241056
MM
951}
952
953/**
954 * reset_psn - reset the QP state to send starting from PSN
955 * @qp: the QP
956 * @psn: the packet sequence number to restart at
957 *
958 * This is called from hfi1_rc_rcv() to process an incoming RC ACK
959 * for the given QP.
960 * Called at interrupt level with the QP s_lock held.
961 */
895420dd 962static void reset_psn(struct rvt_qp *qp, u32 psn)
77241056
MM
963{
964 u32 n = qp->s_acked;
83693bd1 965 struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, n);
77241056
MM
966 u32 opcode;
967
68e78b3d 968 lockdep_assert_held(&qp->s_lock);
77241056
MM
969 qp->s_cur = n;
970
971 /*
972 * If we are starting the request from the beginning,
973 * let the normal send code handle initialization.
974 */
975 if (cmp_psn(psn, wqe->psn) <= 0) {
976 qp->s_state = OP(SEND_LAST);
977 goto done;
978 }
979
980 /* Find the work request opcode corresponding to the given PSN. */
981 opcode = wqe->wr.opcode;
982 for (;;) {
983 int diff;
984
985 if (++n == qp->s_size)
986 n = 0;
987 if (n == qp->s_tail)
988 break;
83693bd1 989 wqe = rvt_get_swqe_ptr(qp, n);
77241056
MM
990 diff = cmp_psn(psn, wqe->psn);
991 if (diff < 0)
992 break;
993 qp->s_cur = n;
994 /*
995 * If we are starting the request from the beginning,
996 * let the normal send code handle initialization.
997 */
998 if (diff == 0) {
999 qp->s_state = OP(SEND_LAST);
1000 goto done;
1001 }
1002 opcode = wqe->wr.opcode;
1003 }
1004
1005 /*
1006 * Set the state to restart in the middle of a request.
1007 * Don't change the s_sge, s_cur_sge, or s_cur_size.
1008 * See hfi1_make_rc_req().
1009 */
1010 switch (opcode) {
1011 case IB_WR_SEND:
1012 case IB_WR_SEND_WITH_IMM:
1013 qp->s_state = OP(RDMA_READ_RESPONSE_FIRST);
1014 break;
1015
1016 case IB_WR_RDMA_WRITE:
1017 case IB_WR_RDMA_WRITE_WITH_IMM:
1018 qp->s_state = OP(RDMA_READ_RESPONSE_LAST);
1019 break;
1020
1021 case IB_WR_RDMA_READ:
1022 qp->s_state = OP(RDMA_READ_RESPONSE_MIDDLE);
1023 break;
1024
1025 default:
1026 /*
1027 * This case shouldn't happen since its only
1028 * one PSN per req.
1029 */
1030 qp->s_state = OP(SEND_LAST);
1031 }
1032done:
1033 qp->s_psn = psn;
1034 /*
54d10c1e 1035 * Set RVT_S_WAIT_PSN as rc_complete() may start the timer
ca00c62b 1036 * asynchronously before the send engine can get scheduled.
77241056
MM
1037 * Doing it in hfi1_make_rc_req() is too late.
1038 */
1039 if ((cmp_psn(qp->s_psn, qp->s_sending_hpsn) <= 0) &&
1040 (cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0))
54d10c1e 1041 qp->s_flags |= RVT_S_WAIT_PSN;
2e2ba09e 1042 qp->s_flags &= ~HFI1_S_AHG_VALID;
77241056
MM
1043}
1044
1045/*
1046 * Back up requester to resend the last un-ACKed request.
1047 * The QP r_lock and s_lock should be held and interrupts disabled.
1048 */
56acbbfb 1049void hfi1_restart_rc(struct rvt_qp *qp, u32 psn, int wait)
77241056 1050{
48a615dc 1051 struct hfi1_qp_priv *priv = qp->priv;
83693bd1 1052 struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
77241056
MM
1053 struct hfi1_ibport *ibp;
1054
68e78b3d
MM
1055 lockdep_assert_held(&qp->r_lock);
1056 lockdep_assert_held(&qp->s_lock);
77241056
MM
1057 if (qp->s_retry == 0) {
1058 if (qp->s_mig_state == IB_MIG_ARMED) {
1059 hfi1_migrate_qp(qp);
1060 qp->s_retry = qp->s_retry_cnt;
1061 } else if (qp->s_last == qp->s_acked) {
48a615dc
KW
1062 /*
1063 * We need special handling for the OPFN request WQEs as
1064 * they are not allowed to generate real user errors
1065 */
1066 if (wqe->wr.opcode == IB_WR_OPFN) {
1067 struct hfi1_ibport *ibp =
1068 to_iport(qp->ibqp.device, qp->port_num);
1069 /*
1070 * Call opfn_conn_reply() with capcode and
1071 * remaining data as 0 to close out the
1072 * current request
1073 */
1074 opfn_conn_reply(qp, priv->opfn.curr);
1075 wqe = do_rc_completion(qp, wqe, ibp);
1076 qp->s_flags &= ~RVT_S_WAIT_ACK;
1077 } else {
1078 rvt_send_complete(qp, wqe,
1079 IB_WC_RETRY_EXC_ERR);
1080 rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
1081 }
77241056 1082 return;
e490974e 1083 } else { /* need to handle delayed completion */
77241056 1084 return;
e490974e
JJ
1085 }
1086 } else {
77241056 1087 qp->s_retry--;
e490974e 1088 }
77241056
MM
1089
1090 ibp = to_iport(qp->ibqp.device, qp->port_num);
1091 if (wqe->wr.opcode == IB_WR_RDMA_READ)
4eb06882 1092 ibp->rvp.n_rc_resends++;
77241056 1093 else
4eb06882 1094 ibp->rvp.n_rc_resends += delta_psn(qp->s_psn, psn);
77241056 1095
54d10c1e
DD
1096 qp->s_flags &= ~(RVT_S_WAIT_FENCE | RVT_S_WAIT_RDMAR |
1097 RVT_S_WAIT_SSN_CREDIT | RVT_S_WAIT_PSN |
1098 RVT_S_WAIT_ACK);
77241056 1099 if (wait)
54d10c1e 1100 qp->s_flags |= RVT_S_SEND_ONE;
77241056
MM
1101 reset_psn(qp, psn);
1102}
1103
77241056
MM
1104/*
1105 * Set qp->s_sending_psn to the next PSN after the given one.
1106 * This would be psn+1 except when RDMA reads are present.
1107 */
895420dd 1108static void reset_sending_psn(struct rvt_qp *qp, u32 psn)
77241056 1109{
895420dd 1110 struct rvt_swqe *wqe;
77241056
MM
1111 u32 n = qp->s_last;
1112
68e78b3d 1113 lockdep_assert_held(&qp->s_lock);
77241056
MM
1114 /* Find the work request corresponding to the given PSN. */
1115 for (;;) {
83693bd1 1116 wqe = rvt_get_swqe_ptr(qp, n);
77241056
MM
1117 if (cmp_psn(psn, wqe->lpsn) <= 0) {
1118 if (wqe->wr.opcode == IB_WR_RDMA_READ)
1119 qp->s_sending_psn = wqe->lpsn + 1;
1120 else
1121 qp->s_sending_psn = psn + 1;
1122 break;
1123 }
1124 if (++n == qp->s_size)
1125 n = 0;
1126 if (n == qp->s_tail)
1127 break;
1128 }
1129}
1130
1131/*
1132 * This should be called with the QP s_lock held and interrupts disabled.
1133 */
30e07416 1134void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_opa_header *opah)
77241056 1135{
261a4351 1136 struct ib_other_headers *ohdr;
5b6cabb0 1137 struct hfi1_qp_priv *priv = qp->priv;
895420dd 1138 struct rvt_swqe *wqe;
5b6cabb0
DH
1139 struct ib_header *hdr = NULL;
1140 struct hfi1_16b_header *hdr_16b = NULL;
77241056
MM
1141 u32 opcode;
1142 u32 psn;
1143
68e78b3d 1144 lockdep_assert_held(&qp->s_lock);
f9215b5e 1145 if (!(ib_rvt_state_ops[qp->state] & RVT_SEND_OR_FLUSH_OR_RECV_OK))
77241056
MM
1146 return;
1147
1148 /* Find out where the BTH is */
5b6cabb0
DH
1149 if (priv->hdr_type == HFI1_PKT_TYPE_9B) {
1150 hdr = &opah->ibh;
1151 if (ib_get_lnh(hdr) == HFI1_LRH_BTH)
1152 ohdr = &hdr->u.oth;
1153 else
1154 ohdr = &hdr->u.l.oth;
1155 } else {
1156 u8 l4;
1157
1158 hdr_16b = &opah->opah;
1159 l4 = hfi1_16B_get_l4(hdr_16b);
1160 if (l4 == OPA_16B_L4_IB_LOCAL)
1161 ohdr = &hdr_16b->u.oth;
1162 else
1163 ohdr = &hdr_16b->u.l.oth;
1164 }
77241056 1165
cb427057 1166 opcode = ib_bth_get_opcode(ohdr);
77241056
MM
1167 if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) &&
1168 opcode <= OP(ATOMIC_ACKNOWLEDGE)) {
1169 WARN_ON(!qp->s_rdma_ack_cnt);
1170 qp->s_rdma_ack_cnt--;
1171 return;
1172 }
1173
7dafbab3 1174 psn = ib_bth_get_psn(ohdr);
77241056
MM
1175 reset_sending_psn(qp, psn);
1176
1177 /*
1178 * Start timer after a packet requesting an ACK has been sent and
1179 * there are still requests that haven't been acked.
1180 */
1181 if ((psn & IB_BTH_REQ_ACK) && qp->s_acked != qp->s_tail &&
1182 !(qp->s_flags &
54d10c1e 1183 (RVT_S_TIMER | RVT_S_WAIT_RNR | RVT_S_WAIT_PSN)) &&
83693bd1 1184 (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
56acbbfb 1185 rvt_add_retry_timer(qp);
77241056
MM
1186
1187 while (qp->s_last != qp->s_acked) {
6c2ab0b8
MM
1188 u32 s_last;
1189
83693bd1 1190 wqe = rvt_get_swqe_ptr(qp, qp->s_last);
77241056
MM
1191 if (cmp_psn(wqe->lpsn, qp->s_sending_psn) >= 0 &&
1192 cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0)
1193 break;
ca95f802 1194 rvt_qp_wqe_unreserve(qp, wqe);
6c2ab0b8 1195 s_last = qp->s_last;
9260b354 1196 trace_hfi1_qp_send_completion(qp, wqe, s_last);
6c2ab0b8
MM
1197 if (++s_last >= qp->s_size)
1198 s_last = 0;
1199 qp->s_last = s_last;
1200 /* see post_send() */
1201 barrier();
c64607aa 1202 rvt_put_swqe(wqe);
43a474aa
MM
1203 rvt_qp_swqe_complete(qp,
1204 wqe,
1205 ib_hfi1_wc_opcode[wqe->wr.opcode],
1206 IB_WC_SUCCESS);
77241056
MM
1207 }
1208 /*
1209 * If we were waiting for sends to complete before re-sending,
1210 * and they are now complete, restart sending.
1211 */
462b6b21 1212 trace_hfi1_sendcomplete(qp, psn);
54d10c1e 1213 if (qp->s_flags & RVT_S_WAIT_PSN &&
77241056 1214 cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) > 0) {
54d10c1e 1215 qp->s_flags &= ~RVT_S_WAIT_PSN;
77241056
MM
1216 qp->s_sending_psn = qp->s_psn;
1217 qp->s_sending_hpsn = qp->s_psn - 1;
1218 hfi1_schedule_send(qp);
1219 }
1220}
1221
895420dd 1222static inline void update_last_psn(struct rvt_qp *qp, u32 psn)
77241056
MM
1223{
1224 qp->s_last_psn = psn;
1225}
1226
1227/*
1228 * Generate a SWQE completion.
1229 * This is similar to hfi1_send_complete but has to check to be sure
1230 * that the SGEs are not being referenced if the SWQE is being resent.
1231 */
895420dd
DD
1232static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
1233 struct rvt_swqe *wqe,
1234 struct hfi1_ibport *ibp)
77241056 1235{
68e78b3d 1236 lockdep_assert_held(&qp->s_lock);
77241056
MM
1237 /*
1238 * Don't decrement refcount and don't generate a
1239 * completion if the SWQE is being resent until the send
1240 * is finished.
1241 */
1242 if (cmp_psn(wqe->lpsn, qp->s_sending_psn) < 0 ||
1243 cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) > 0) {
6c2ab0b8
MM
1244 u32 s_last;
1245
c64607aa 1246 rvt_put_swqe(wqe);
ca95f802 1247 rvt_qp_wqe_unreserve(qp, wqe);
6c2ab0b8 1248 s_last = qp->s_last;
9260b354 1249 trace_hfi1_qp_send_completion(qp, wqe, s_last);
6c2ab0b8
MM
1250 if (++s_last >= qp->s_size)
1251 s_last = 0;
1252 qp->s_last = s_last;
1253 /* see post_send() */
1254 barrier();
43a474aa
MM
1255 rvt_qp_swqe_complete(qp,
1256 wqe,
1257 ib_hfi1_wc_opcode[wqe->wr.opcode],
1258 IB_WC_SUCCESS);
77241056
MM
1259 } else {
1260 struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
1261
4eb06882 1262 this_cpu_inc(*ibp->rvp.rc_delayed_comp);
77241056
MM
1263 /*
1264 * If send progress not running attempt to progress
1265 * SDMA queue.
1266 */
1267 if (ppd->dd->flags & HFI1_HAS_SEND_DMA) {
1268 struct sdma_engine *engine;
d8966fcd 1269 u8 sl = rdma_ah_get_sl(&qp->remote_ah_attr);
77241056
MM
1270 u8 sc5;
1271
1272 /* For now use sc to find engine */
d8966fcd 1273 sc5 = ibp->sl_to_sc[sl];
77241056
MM
1274 engine = qp_to_sdma_engine(qp, sc5);
1275 sdma_engine_progress_schedule(engine);
1276 }
1277 }
1278
1279 qp->s_retry = qp->s_retry_cnt;
1280 update_last_psn(qp, wqe->lpsn);
1281
1282 /*
1283 * If we are completing a request which is in the process of
1284 * being resent, we can stop re-sending it since we know the
1285 * responder has already seen it.
1286 */
1287 if (qp->s_acked == qp->s_cur) {
1288 if (++qp->s_cur >= qp->s_size)
1289 qp->s_cur = 0;
1290 qp->s_acked = qp->s_cur;
83693bd1 1291 wqe = rvt_get_swqe_ptr(qp, qp->s_cur);
77241056
MM
1292 if (qp->s_acked != qp->s_tail) {
1293 qp->s_state = OP(SEND_LAST);
1294 qp->s_psn = wqe->psn;
1295 }
1296 } else {
1297 if (++qp->s_acked >= qp->s_size)
1298 qp->s_acked = 0;
1299 if (qp->state == IB_QPS_SQD && qp->s_acked == qp->s_cur)
1300 qp->s_draining = 0;
83693bd1 1301 wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
77241056
MM
1302 }
1303 return wqe;
1304}
1305
1306/**
1307 * do_rc_ack - process an incoming RC ACK
1308 * @qp: the QP the ACK came in on
1309 * @psn: the packet sequence number of the ACK
1310 * @opcode: the opcode of the request that resulted in the ACK
1311 *
1312 * This is called from rc_rcv_resp() to process an incoming RC ACK
1313 * for the given QP.
b77d713a 1314 * May be called at interrupt level, with the QP s_lock held.
77241056
MM
1315 * Returns 1 if OK, 0 if current operation should be aborted (NAK).
1316 */
895420dd 1317static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
77241056
MM
1318 u64 val, struct hfi1_ctxtdata *rcd)
1319{
1320 struct hfi1_ibport *ibp;
1321 enum ib_wc_status status;
895420dd 1322 struct rvt_swqe *wqe;
77241056
MM
1323 int ret = 0;
1324 u32 ack_psn;
1325 int diff;
1326
68e78b3d 1327 lockdep_assert_held(&qp->s_lock);
77241056
MM
1328 /*
1329 * Note that NAKs implicitly ACK outstanding SEND and RDMA write
1330 * requests and implicitly NAK RDMA read and atomic requests issued
1331 * before the NAK'ed request. The MSN won't include the NAK'ed
1332 * request but will include an ACK'ed request(s).
1333 */
1334 ack_psn = psn;
832666c1 1335 if (aeth >> IB_AETH_NAK_SHIFT)
77241056 1336 ack_psn--;
83693bd1 1337 wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
f3e862cb 1338 ibp = rcd_to_iport(rcd);
77241056
MM
1339
1340 /*
1341 * The MSN might be for a later WQE than the PSN indicates so
1342 * only complete WQEs that the PSN finishes.
1343 */
1344 while ((diff = delta_psn(ack_psn, wqe->lpsn)) >= 0) {
1345 /*
1346 * RDMA_READ_RESPONSE_ONLY is a special case since
1347 * we want to generate completion events for everything
1348 * before the RDMA read, copy the data, then generate
1349 * the completion for the read.
1350 */
1351 if (wqe->wr.opcode == IB_WR_RDMA_READ &&
1352 opcode == OP(RDMA_READ_RESPONSE_ONLY) &&
1353 diff == 0) {
1354 ret = 1;
633d2739 1355 goto bail_stop;
77241056
MM
1356 }
1357 /*
1358 * If this request is a RDMA read or atomic, and the ACK is
1359 * for a later operation, this ACK NAKs the RDMA read or
1360 * atomic. In other words, only a RDMA_READ_LAST or ONLY
1361 * can ACK a RDMA read and likewise for atomic ops. Note
1362 * that the NAK case can only happen if relaxed ordering is
1363 * used and requests are sent after an RDMA read or atomic
1364 * is sent but before the response is received.
1365 */
1366 if ((wqe->wr.opcode == IB_WR_RDMA_READ &&
1367 (opcode != OP(RDMA_READ_RESPONSE_LAST) || diff != 0)) ||
1368 ((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
1369 wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) &&
1370 (opcode != OP(ATOMIC_ACKNOWLEDGE) || diff != 0))) {
1371 /* Retry this request. */
54d10c1e
DD
1372 if (!(qp->r_flags & RVT_R_RDMAR_SEQ)) {
1373 qp->r_flags |= RVT_R_RDMAR_SEQ;
56acbbfb 1374 hfi1_restart_rc(qp, qp->s_last_psn + 1, 0);
77241056 1375 if (list_empty(&qp->rspwait)) {
54d10c1e 1376 qp->r_flags |= RVT_R_RSP_SEND;
4d6f85c3 1377 rvt_get_qp(qp);
77241056
MM
1378 list_add_tail(&qp->rspwait,
1379 &rcd->qp_wait_list);
1380 }
1381 }
1382 /*
1383 * No need to process the ACK/NAK since we are
1384 * restarting an earlier request.
1385 */
633d2739 1386 goto bail_stop;
77241056
MM
1387 }
1388 if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
1389 wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
1390 u64 *vaddr = wqe->sg_list[0].vaddr;
1391 *vaddr = val;
1392 }
48a615dc
KW
1393 if (wqe->wr.opcode == IB_WR_OPFN)
1394 opfn_conn_reply(qp, val);
1395
77241056
MM
1396 if (qp->s_num_rd_atomic &&
1397 (wqe->wr.opcode == IB_WR_RDMA_READ ||
1398 wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
1399 wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) {
1400 qp->s_num_rd_atomic--;
1401 /* Restart sending task if fence is complete */
54d10c1e 1402 if ((qp->s_flags & RVT_S_WAIT_FENCE) &&
77241056 1403 !qp->s_num_rd_atomic) {
54d10c1e
DD
1404 qp->s_flags &= ~(RVT_S_WAIT_FENCE |
1405 RVT_S_WAIT_ACK);
77241056 1406 hfi1_schedule_send(qp);
54d10c1e
DD
1407 } else if (qp->s_flags & RVT_S_WAIT_RDMAR) {
1408 qp->s_flags &= ~(RVT_S_WAIT_RDMAR |
1409 RVT_S_WAIT_ACK);
77241056
MM
1410 hfi1_schedule_send(qp);
1411 }
1412 }
1413 wqe = do_rc_completion(qp, wqe, ibp);
1414 if (qp->s_acked == qp->s_tail)
1415 break;
1416 }
1417
832666c1 1418 switch (aeth >> IB_AETH_NAK_SHIFT) {
77241056 1419 case 0: /* ACK */
4eb06882 1420 this_cpu_inc(*ibp->rvp.rc_acks);
77241056
MM
1421 if (qp->s_acked != qp->s_tail) {
1422 /*
1423 * We are expecting more ACKs so
633d2739 1424 * mod the retry timer.
77241056 1425 */
56acbbfb 1426 rvt_mod_retry_timer(qp);
77241056
MM
1427 /*
1428 * We can stop re-sending the earlier packets and
1429 * continue with the next packet the receiver wants.
1430 */
1431 if (cmp_psn(qp->s_psn, psn) <= 0)
1432 reset_psn(qp, psn + 1);
633d2739
MM
1433 } else {
1434 /* No more acks - kill all timers */
56acbbfb 1435 rvt_stop_rc_timers(qp);
633d2739
MM
1436 if (cmp_psn(qp->s_psn, psn) <= 0) {
1437 qp->s_state = OP(SEND_LAST);
1438 qp->s_psn = psn + 1;
1439 }
77241056 1440 }
54d10c1e
DD
1441 if (qp->s_flags & RVT_S_WAIT_ACK) {
1442 qp->s_flags &= ~RVT_S_WAIT_ACK;
77241056
MM
1443 hfi1_schedule_send(qp);
1444 }
696513e8 1445 rvt_get_credit(qp, aeth);
77241056
MM
1446 qp->s_rnr_retry = qp->s_rnr_retry_cnt;
1447 qp->s_retry = qp->s_retry_cnt;
1448 update_last_psn(qp, psn);
633d2739 1449 return 1;
77241056
MM
1450
1451 case 1: /* RNR NAK */
4eb06882 1452 ibp->rvp.n_rnr_naks++;
77241056 1453 if (qp->s_acked == qp->s_tail)
633d2739 1454 goto bail_stop;
54d10c1e 1455 if (qp->s_flags & RVT_S_WAIT_RNR)
633d2739 1456 goto bail_stop;
77241056
MM
1457 if (qp->s_rnr_retry == 0) {
1458 status = IB_WC_RNR_RETRY_EXC_ERR;
1459 goto class_b;
1460 }
1461 if (qp->s_rnr_retry_cnt < 7)
1462 qp->s_rnr_retry--;
1463
1464 /* The last valid PSN is the previous PSN. */
1465 update_last_psn(qp, psn - 1);
1466
4eb06882 1467 ibp->rvp.n_rc_resends += delta_psn(qp->s_psn, psn);
77241056
MM
1468
1469 reset_psn(qp, psn);
1470
54d10c1e 1471 qp->s_flags &= ~(RVT_S_WAIT_SSN_CREDIT | RVT_S_WAIT_ACK);
56acbbfb
VSD
1472 rvt_stop_rc_timers(qp);
1473 rvt_add_rnr_timer(qp, aeth);
633d2739 1474 return 0;
77241056
MM
1475
1476 case 3: /* NAK */
1477 if (qp->s_acked == qp->s_tail)
633d2739 1478 goto bail_stop;
77241056
MM
1479 /* The last valid PSN is the previous PSN. */
1480 update_last_psn(qp, psn - 1);
832666c1
DH
1481 switch ((aeth >> IB_AETH_CREDIT_SHIFT) &
1482 IB_AETH_CREDIT_MASK) {
77241056 1483 case 0: /* PSN sequence error */
4eb06882 1484 ibp->rvp.n_seq_naks++;
77241056
MM
1485 /*
1486 * Back up to the responder's expected PSN.
1487 * Note that we might get a NAK in the middle of an
1488 * RDMA READ response which terminates the RDMA
1489 * READ.
1490 */
56acbbfb 1491 hfi1_restart_rc(qp, psn, 0);
77241056
MM
1492 hfi1_schedule_send(qp);
1493 break;
1494
1495 case 1: /* Invalid Request */
1496 status = IB_WC_REM_INV_REQ_ERR;
4eb06882 1497 ibp->rvp.n_other_naks++;
77241056
MM
1498 goto class_b;
1499
1500 case 2: /* Remote Access Error */
1501 status = IB_WC_REM_ACCESS_ERR;
4eb06882 1502 ibp->rvp.n_other_naks++;
77241056
MM
1503 goto class_b;
1504
1505 case 3: /* Remote Operation Error */
1506 status = IB_WC_REM_OP_ERR;
4eb06882 1507 ibp->rvp.n_other_naks++;
77241056
MM
1508class_b:
1509 if (qp->s_last == qp->s_acked) {
116aa033 1510 rvt_send_complete(qp, wqe, status);
ec4274f1 1511 rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
77241056
MM
1512 }
1513 break;
1514
1515 default:
1516 /* Ignore other reserved NAK error codes */
1517 goto reserved;
1518 }
1519 qp->s_retry = qp->s_retry_cnt;
1520 qp->s_rnr_retry = qp->s_rnr_retry_cnt;
633d2739 1521 goto bail_stop;
77241056
MM
1522
1523 default: /* 2: reserved */
1524reserved:
1525 /* Ignore reserved NAK codes. */
633d2739 1526 goto bail_stop;
77241056 1527 }
87717f0a 1528 /* cannot be reached */
633d2739 1529bail_stop:
56acbbfb 1530 rvt_stop_rc_timers(qp);
77241056
MM
1531 return ret;
1532}
1533
1534/*
1535 * We have seen an out of sequence RDMA read middle or last packet.
1536 * This ACKs SENDs and RDMA writes up to the first RDMA read or atomic SWQE.
1537 */
895420dd 1538static void rdma_seq_err(struct rvt_qp *qp, struct hfi1_ibport *ibp, u32 psn,
77241056
MM
1539 struct hfi1_ctxtdata *rcd)
1540{
895420dd 1541 struct rvt_swqe *wqe;
77241056 1542
68e78b3d 1543 lockdep_assert_held(&qp->s_lock);
77241056 1544 /* Remove QP from retry timer */
56acbbfb 1545 rvt_stop_rc_timers(qp);
77241056 1546
83693bd1 1547 wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
77241056
MM
1548
1549 while (cmp_psn(psn, wqe->lpsn) > 0) {
1550 if (wqe->wr.opcode == IB_WR_RDMA_READ ||
1551 wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
1552 wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
1553 break;
1554 wqe = do_rc_completion(qp, wqe, ibp);
1555 }
1556
4eb06882 1557 ibp->rvp.n_rdma_seq++;
54d10c1e 1558 qp->r_flags |= RVT_R_RDMAR_SEQ;
56acbbfb 1559 hfi1_restart_rc(qp, qp->s_last_psn + 1, 0);
77241056 1560 if (list_empty(&qp->rspwait)) {
54d10c1e 1561 qp->r_flags |= RVT_R_RSP_SEND;
4d6f85c3 1562 rvt_get_qp(qp);
77241056
MM
1563 list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
1564 }
1565}
1566
1567/**
1568 * rc_rcv_resp - process an incoming RC response packet
5b6cabb0 1569 * @packet: data packet information
77241056
MM
1570 *
1571 * This is called from hfi1_rc_rcv() to process an incoming RC response
1572 * packet for the given QP.
1573 * Called at interrupt level.
1574 */
5b6cabb0 1575static void rc_rcv_resp(struct hfi1_packet *packet)
77241056 1576{
5b6cabb0
DH
1577 struct hfi1_ctxtdata *rcd = packet->rcd;
1578 void *data = packet->payload;
1579 u32 tlen = packet->tlen;
1580 struct rvt_qp *qp = packet->qp;
bdaf96f6 1581 struct hfi1_ibport *ibp;
5b6cabb0 1582 struct ib_other_headers *ohdr = packet->ohdr;
895420dd 1583 struct rvt_swqe *wqe;
77241056
MM
1584 enum ib_wc_status status;
1585 unsigned long flags;
1586 int diff;
77241056 1587 u64 val;
5b6cabb0
DH
1588 u32 aeth;
1589 u32 psn = ib_bth_get_psn(packet->ohdr);
1590 u32 pmtu = qp->pmtu;
1591 u16 hdrsize = packet->hlen;
1592 u8 opcode = packet->opcode;
1593 u8 pad = packet->pad;
1594 u8 extra_bytes = pad + packet->extra_byte + (SIZE_OF_CRC << 2);
77241056
MM
1595
1596 spin_lock_irqsave(&qp->s_lock, flags);
462b6b21 1597 trace_hfi1_ack(qp, psn);
83525b61 1598
77241056 1599 /* Ignore invalid responses. */
eb04ff09 1600 if (cmp_psn(psn, READ_ONCE(qp->s_next_psn)) >= 0)
77241056
MM
1601 goto ack_done;
1602
1603 /* Ignore duplicate responses. */
1604 diff = cmp_psn(psn, qp->s_last_psn);
1605 if (unlikely(diff <= 0)) {
1606 /* Update credits for "ghost" ACKs */
1607 if (diff == 0 && opcode == OP(ACKNOWLEDGE)) {
1608 aeth = be32_to_cpu(ohdr->u.aeth);
832666c1 1609 if ((aeth >> IB_AETH_NAK_SHIFT) == 0)
696513e8 1610 rvt_get_credit(qp, aeth);
77241056
MM
1611 }
1612 goto ack_done;
1613 }
1614
1615 /*
1616 * Skip everything other than the PSN we expect, if we are waiting
1617 * for a reply to a restarted RDMA read or atomic op.
1618 */
54d10c1e 1619 if (qp->r_flags & RVT_R_RDMAR_SEQ) {
77241056
MM
1620 if (cmp_psn(psn, qp->s_last_psn + 1) != 0)
1621 goto ack_done;
54d10c1e 1622 qp->r_flags &= ~RVT_R_RDMAR_SEQ;
77241056
MM
1623 }
1624
1625 if (unlikely(qp->s_acked == qp->s_tail))
1626 goto ack_done;
83693bd1 1627 wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
77241056
MM
1628 status = IB_WC_SUCCESS;
1629
1630 switch (opcode) {
1631 case OP(ACKNOWLEDGE):
1632 case OP(ATOMIC_ACKNOWLEDGE):
1633 case OP(RDMA_READ_RESPONSE_FIRST):
1634 aeth = be32_to_cpu(ohdr->u.aeth);
261a4351
MM
1635 if (opcode == OP(ATOMIC_ACKNOWLEDGE))
1636 val = ib_u64_get(&ohdr->u.at.atomic_ack_eth);
1637 else
77241056
MM
1638 val = 0;
1639 if (!do_rc_ack(qp, aeth, psn, opcode, val, rcd) ||
1640 opcode != OP(RDMA_READ_RESPONSE_FIRST))
1641 goto ack_done;
83693bd1 1642 wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
77241056
MM
1643 if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
1644 goto ack_op_err;
1645 /*
1646 * If this is a response to a resent RDMA read, we
1647 * have to be careful to copy the data to the right
1648 * location.
1649 */
1650 qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
1651 wqe, psn, pmtu);
1652 goto read_middle;
1653
1654 case OP(RDMA_READ_RESPONSE_MIDDLE):
1655 /* no AETH, no ACK */
1656 if (unlikely(cmp_psn(psn, qp->s_last_psn + 1)))
1657 goto ack_seq_err;
1658 if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
1659 goto ack_op_err;
1660read_middle:
5b6cabb0 1661 if (unlikely(tlen != (hdrsize + pmtu + extra_bytes)))
77241056
MM
1662 goto ack_len_err;
1663 if (unlikely(pmtu >= qp->s_rdma_read_len))
1664 goto ack_len_err;
1665
1666 /*
1667 * We got a response so update the timeout.
1668 * 4.096 usec. * (1 << qp->timeout)
1669 */
56acbbfb 1670 rvt_mod_retry_timer(qp);
54d10c1e
DD
1671 if (qp->s_flags & RVT_S_WAIT_ACK) {
1672 qp->s_flags &= ~RVT_S_WAIT_ACK;
77241056
MM
1673 hfi1_schedule_send(qp);
1674 }
1675
1676 if (opcode == OP(RDMA_READ_RESPONSE_MIDDLE))
1677 qp->s_retry = qp->s_retry_cnt;
1678
1679 /*
1680 * Update the RDMA receive state but do the copy w/o
1681 * holding the locks and blocking interrupts.
1682 */
1683 qp->s_rdma_read_len -= pmtu;
1684 update_last_psn(qp, psn);
1685 spin_unlock_irqrestore(&qp->s_lock, flags);
019f118b
BW
1686 rvt_copy_sge(qp, &qp->s_rdma_read_sge,
1687 data, pmtu, false, false);
77241056
MM
1688 goto bail;
1689
1690 case OP(RDMA_READ_RESPONSE_ONLY):
1691 aeth = be32_to_cpu(ohdr->u.aeth);
1692 if (!do_rc_ack(qp, aeth, psn, opcode, 0, rcd))
1693 goto ack_done;
77241056
MM
1694 /*
1695 * Check that the data size is >= 0 && <= pmtu.
1696 * Remember to account for ICRC (4).
1697 */
5b6cabb0 1698 if (unlikely(tlen < (hdrsize + extra_bytes)))
77241056
MM
1699 goto ack_len_err;
1700 /*
1701 * If this is a response to a resent RDMA read, we
1702 * have to be careful to copy the data to the right
1703 * location.
1704 */
83693bd1 1705 wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
77241056
MM
1706 qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
1707 wqe, psn, pmtu);
1708 goto read_last;
1709
1710 case OP(RDMA_READ_RESPONSE_LAST):
1711 /* ACKs READ req. */
1712 if (unlikely(cmp_psn(psn, qp->s_last_psn + 1)))
1713 goto ack_seq_err;
1714 if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
1715 goto ack_op_err;
77241056
MM
1716 /*
1717 * Check that the data size is >= 1 && <= pmtu.
1718 * Remember to account for ICRC (4).
1719 */
5b6cabb0 1720 if (unlikely(tlen <= (hdrsize + extra_bytes)))
77241056
MM
1721 goto ack_len_err;
1722read_last:
5b6cabb0 1723 tlen -= hdrsize + extra_bytes;
77241056
MM
1724 if (unlikely(tlen != qp->s_rdma_read_len))
1725 goto ack_len_err;
1726 aeth = be32_to_cpu(ohdr->u.aeth);
019f118b
BW
1727 rvt_copy_sge(qp, &qp->s_rdma_read_sge,
1728 data, tlen, false, false);
77241056 1729 WARN_ON(qp->s_rdma_read_sge.num_sge);
50e5dcbe 1730 (void)do_rc_ack(qp, aeth, psn,
77241056
MM
1731 OP(RDMA_READ_RESPONSE_LAST), 0, rcd);
1732 goto ack_done;
1733 }
1734
1735ack_op_err:
1736 status = IB_WC_LOC_QP_OP_ERR;
1737 goto ack_err;
1738
1739ack_seq_err:
bdaf96f6 1740 ibp = rcd_to_iport(rcd);
77241056
MM
1741 rdma_seq_err(qp, ibp, psn, rcd);
1742 goto ack_done;
1743
1744ack_len_err:
1745 status = IB_WC_LOC_LEN_ERR;
1746ack_err:
1747 if (qp->s_last == qp->s_acked) {
116aa033 1748 rvt_send_complete(qp, wqe, status);
ec4274f1 1749 rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
77241056
MM
1750 }
1751ack_done:
1752 spin_unlock_irqrestore(&qp->s_lock, flags);
1753bail:
1754 return;
1755}
1756
2fd36865 1757static inline void rc_defered_ack(struct hfi1_ctxtdata *rcd,
895420dd 1758 struct rvt_qp *qp)
2fd36865
MM
1759{
1760 if (list_empty(&qp->rspwait)) {
54d10c1e 1761 qp->r_flags |= RVT_R_RSP_NAK;
4d6f85c3 1762 rvt_get_qp(qp);
2fd36865
MM
1763 list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
1764 }
1765}
1766
895420dd 1767static inline void rc_cancel_ack(struct rvt_qp *qp)
7c091e5c 1768{
688f21c0 1769 qp->r_adefered = 0;
7c091e5c
MM
1770 if (list_empty(&qp->rspwait))
1771 return;
1772 list_del_init(&qp->rspwait);
54d10c1e 1773 qp->r_flags &= ~RVT_R_RSP_NAK;
4d6f85c3 1774 rvt_put_qp(qp);
7c091e5c
MM
1775}
1776
77241056
MM
1777/**
1778 * rc_rcv_error - process an incoming duplicate or error RC packet
1779 * @ohdr: the other headers for this packet
1780 * @data: the packet data
1781 * @qp: the QP for this packet
1782 * @opcode: the opcode for this packet
1783 * @psn: the packet sequence number for this packet
1784 * @diff: the difference between the PSN and the expected PSN
1785 *
1786 * This is called from hfi1_rc_rcv() to process an unexpected
1787 * incoming RC packet for the given QP.
1788 * Called at interrupt level.
1789 * Return 1 if no more processing is needed; otherwise return 0 to
1790 * schedule a response to be sent.
1791 */
261a4351 1792static noinline int rc_rcv_error(struct ib_other_headers *ohdr, void *data,
17fb4f29
JJ
1793 struct rvt_qp *qp, u32 opcode, u32 psn,
1794 int diff, struct hfi1_ctxtdata *rcd)
77241056 1795{
f3e862cb 1796 struct hfi1_ibport *ibp = rcd_to_iport(rcd);
895420dd 1797 struct rvt_ack_entry *e;
77241056
MM
1798 unsigned long flags;
1799 u8 i, prev;
1800 int old_req;
1801
462b6b21 1802 trace_hfi1_rcv_error(qp, psn);
77241056
MM
1803 if (diff > 0) {
1804 /*
1805 * Packet sequence error.
1806 * A NAK will ACK earlier sends and RDMA writes.
1807 * Don't queue the NAK if we already sent one.
1808 */
1809 if (!qp->r_nak_state) {
4eb06882 1810 ibp->rvp.n_rc_seqnak++;
77241056
MM
1811 qp->r_nak_state = IB_NAK_PSN_ERROR;
1812 /* Use the expected PSN. */
1813 qp->r_ack_psn = qp->r_psn;
1814 /*
1815 * Wait to send the sequence NAK until all packets
1816 * in the receive queue have been processed.
1817 * Otherwise, we end up propagating congestion.
1818 */
2fd36865 1819 rc_defered_ack(rcd, qp);
77241056
MM
1820 }
1821 goto done;
1822 }
1823
1824 /*
1825 * Handle a duplicate request. Don't re-execute SEND, RDMA
1826 * write or atomic op. Don't NAK errors, just silently drop
1827 * the duplicate request. Note that r_sge, r_len, and
1828 * r_rcv_len may be in use so don't modify them.
1829 *
1830 * We are supposed to ACK the earliest duplicate PSN but we
1831 * can coalesce an outstanding duplicate ACK. We have to
1832 * send the earliest so that RDMA reads can be restarted at
1833 * the requester's expected PSN.
1834 *
1835 * First, find where this duplicate PSN falls within the
1836 * ACKs previously sent.
1837 * old_req is true if there is an older response that is scheduled
1838 * to be sent before sending this one.
1839 */
1840 e = NULL;
1841 old_req = 1;
4eb06882 1842 ibp->rvp.n_rc_dupreq++;
77241056
MM
1843
1844 spin_lock_irqsave(&qp->s_lock, flags);
1845
1846 for (i = qp->r_head_ack_queue; ; i = prev) {
1847 if (i == qp->s_tail_ack_queue)
1848 old_req = 0;
1849 if (i)
1850 prev = i - 1;
1851 else
ddf922c3 1852 prev = rvt_size_atomic(ib_to_rvt(qp->ibqp.device));
77241056
MM
1853 if (prev == qp->r_head_ack_queue) {
1854 e = NULL;
1855 break;
1856 }
1857 e = &qp->s_ack_queue[prev];
1858 if (!e->opcode) {
1859 e = NULL;
1860 break;
1861 }
1862 if (cmp_psn(psn, e->psn) >= 0) {
1863 if (prev == qp->s_tail_ack_queue &&
1864 cmp_psn(psn, e->lpsn) <= 0)
1865 old_req = 0;
1866 break;
1867 }
1868 }
1869 switch (opcode) {
1870 case OP(RDMA_READ_REQUEST): {
1871 struct ib_reth *reth;
1872 u32 offset;
1873 u32 len;
1874
1875 /*
1876 * If we didn't find the RDMA read request in the ack queue,
1877 * we can ignore this request.
1878 */
1879 if (!e || e->opcode != OP(RDMA_READ_REQUEST))
1880 goto unlock_done;
1881 /* RETH comes after BTH */
1882 reth = &ohdr->u.rc.reth;
1883 /*
1884 * Address range must be a subset of the original
1885 * request and start on pmtu boundaries.
1886 * We reuse the old ack_queue slot since the requester
1887 * should not back up and request an earlier PSN for the
1888 * same request.
1889 */
1890 offset = delta_psn(psn, e->psn) * qp->pmtu;
1891 len = be32_to_cpu(reth->length);
1892 if (unlikely(offset + len != e->rdma_sge.sge_length))
1893 goto unlock_done;
1894 if (e->rdma_sge.mr) {
895420dd 1895 rvt_put_mr(e->rdma_sge.mr);
77241056
MM
1896 e->rdma_sge.mr = NULL;
1897 }
1898 if (len != 0) {
1899 u32 rkey = be32_to_cpu(reth->rkey);
261a4351 1900 u64 vaddr = get_ib_reth_vaddr(reth);
77241056
MM
1901 int ok;
1902
895420dd
DD
1903 ok = rvt_rkey_ok(qp, &e->rdma_sge, len, vaddr, rkey,
1904 IB_ACCESS_REMOTE_READ);
77241056
MM
1905 if (unlikely(!ok))
1906 goto unlock_done;
1907 } else {
1908 e->rdma_sge.vaddr = NULL;
1909 e->rdma_sge.length = 0;
1910 e->rdma_sge.sge_length = 0;
1911 }
1912 e->psn = psn;
1913 if (old_req)
1914 goto unlock_done;
1915 qp->s_tail_ack_queue = prev;
1916 break;
1917 }
1918
1919 case OP(COMPARE_SWAP):
1920 case OP(FETCH_ADD): {
1921 /*
1922 * If we didn't find the atomic request in the ack queue
ca00c62b 1923 * or the send engine is already backed up to send an
77241056
MM
1924 * earlier entry, we can ignore this request.
1925 */
50e5dcbe 1926 if (!e || e->opcode != (u8)opcode || old_req)
77241056
MM
1927 goto unlock_done;
1928 qp->s_tail_ack_queue = prev;
1929 break;
1930 }
1931
1932 default:
1933 /*
1934 * Ignore this operation if it doesn't request an ACK
1935 * or an earlier RDMA read or atomic is going to be resent.
1936 */
1937 if (!(psn & IB_BTH_REQ_ACK) || old_req)
1938 goto unlock_done;
1939 /*
1940 * Resend the most recent ACK if this request is
1941 * after all the previous RDMA reads and atomics.
1942 */
1943 if (i == qp->r_head_ack_queue) {
1944 spin_unlock_irqrestore(&qp->s_lock, flags);
1945 qp->r_nak_state = 0;
1946 qp->r_ack_psn = qp->r_psn - 1;
1947 goto send_ack;
1948 }
1949
1950 /*
1951 * Resend the RDMA read or atomic op which
1952 * ACKs this duplicate request.
1953 */
1954 qp->s_tail_ack_queue = i;
1955 break;
1956 }
1957 qp->s_ack_state = OP(ACKNOWLEDGE);
54d10c1e 1958 qp->s_flags |= RVT_S_RESP_PENDING;
77241056
MM
1959 qp->r_nak_state = 0;
1960 hfi1_schedule_send(qp);
1961
1962unlock_done:
1963 spin_unlock_irqrestore(&qp->s_lock, flags);
1964done:
1965 return 1;
1966
1967send_ack:
1968 return 0;
1969}
1970
895420dd 1971static inline void update_ack_queue(struct rvt_qp *qp, unsigned n)
77241056
MM
1972{
1973 unsigned next;
1974
1975 next = n + 1;
ddf922c3 1976 if (next > rvt_size_atomic(ib_to_rvt(qp->ibqp.device)))
77241056
MM
1977 next = 0;
1978 qp->s_tail_ack_queue = next;
1979 qp->s_ack_state = OP(ACKNOWLEDGE);
1980}
1981
1982static void log_cca_event(struct hfi1_pportdata *ppd, u8 sl, u32 rlid,
1983 u32 lqpn, u32 rqpn, u8 svc_type)
1984{
1985 struct opa_hfi1_cong_log_event_internal *cc_event;
b77d713a 1986 unsigned long flags;
77241056
MM
1987
1988 if (sl >= OPA_MAX_SLS)
1989 return;
1990
b77d713a 1991 spin_lock_irqsave(&ppd->cc_log_lock, flags);
77241056 1992
8638b77f 1993 ppd->threshold_cong_event_map[sl / 8] |= 1 << (sl % 8);
77241056
MM
1994 ppd->threshold_event_counter++;
1995
1996 cc_event = &ppd->cc_events[ppd->cc_log_idx++];
1997 if (ppd->cc_log_idx == OPA_CONG_LOG_ELEMS)
1998 ppd->cc_log_idx = 0;
ec4274f1
DD
1999 cc_event->lqpn = lqpn & RVT_QPN_MASK;
2000 cc_event->rqpn = rqpn & RVT_QPN_MASK;
77241056
MM
2001 cc_event->sl = sl;
2002 cc_event->svc_type = svc_type;
2003 cc_event->rlid = rlid;
2004 /* keep timestamp in units of 1.024 usec */
d61ea075 2005 cc_event->timestamp = ktime_get_ns() / 1024;
77241056 2006
b77d713a 2007 spin_unlock_irqrestore(&ppd->cc_log_lock, flags);
77241056
MM
2008}
2009
5b6cabb0 2010void process_becn(struct hfi1_pportdata *ppd, u8 sl, u32 rlid, u32 lqpn,
77241056
MM
2011 u32 rqpn, u8 svc_type)
2012{
2013 struct cca_timer *cca_timer;
2014 u16 ccti, ccti_incr, ccti_timer, ccti_limit;
2015 u8 trigger_threshold;
2016 struct cc_state *cc_state;
b77d713a 2017 unsigned long flags;
77241056
MM
2018
2019 if (sl >= OPA_MAX_SLS)
2020 return;
2021
77241056
MM
2022 cc_state = get_cc_state(ppd);
2023
d125a6c6 2024 if (!cc_state)
77241056
MM
2025 return;
2026
2027 /*
2028 * 1) increase CCTI (for this SL)
2029 * 2) select IPG (i.e., call set_link_ipg())
2030 * 3) start timer
2031 */
2032 ccti_limit = cc_state->cct.ccti_limit;
2033 ccti_incr = cc_state->cong_setting.entries[sl].ccti_increase;
2034 ccti_timer = cc_state->cong_setting.entries[sl].ccti_timer;
2035 trigger_threshold =
2036 cc_state->cong_setting.entries[sl].trigger_threshold;
2037
b77d713a 2038 spin_lock_irqsave(&ppd->cca_timer_lock, flags);
77241056 2039
d35cf744 2040 cca_timer = &ppd->cca_timer[sl];
77241056
MM
2041 if (cca_timer->ccti < ccti_limit) {
2042 if (cca_timer->ccti + ccti_incr <= ccti_limit)
2043 cca_timer->ccti += ccti_incr;
2044 else
2045 cca_timer->ccti = ccti_limit;
2046 set_link_ipg(ppd);
2047 }
2048
77241056
MM
2049 ccti = cca_timer->ccti;
2050
2051 if (!hrtimer_active(&cca_timer->hrtimer)) {
2052 /* ccti_timer is in units of 1.024 usec */
2053 unsigned long nsec = 1024 * ccti_timer;
2054
2055 hrtimer_start(&cca_timer->hrtimer, ns_to_ktime(nsec),
3ce459cd 2056 HRTIMER_MODE_REL_PINNED);
77241056
MM
2057 }
2058
d35cf744
JJ
2059 spin_unlock_irqrestore(&ppd->cca_timer_lock, flags);
2060
77241056
MM
2061 if ((trigger_threshold != 0) && (ccti >= trigger_threshold))
2062 log_cca_event(ppd, sl, rlid, lqpn, rqpn, svc_type);
2063}
2064
2065/**
2066 * hfi1_rc_rcv - process an incoming RC packet
5b6cabb0 2067 * @packet: data packet information
77241056
MM
2068 *
2069 * This is called from qp_rcv() to process an incoming RC packet
2070 * for the given QP.
b77d713a 2071 * May be called at interrupt level.
77241056
MM
2072 */
2073void hfi1_rc_rcv(struct hfi1_packet *packet)
2074{
2075 struct hfi1_ctxtdata *rcd = packet->rcd;
72c07e2b 2076 void *data = packet->payload;
77241056 2077 u32 tlen = packet->tlen;
895420dd 2078 struct rvt_qp *qp = packet->qp;
f3e862cb 2079 struct hfi1_ibport *ibp = rcd_to_iport(rcd);
261a4351 2080 struct ib_other_headers *ohdr = packet->ohdr;
9039746c 2081 u32 opcode = packet->opcode;
77241056 2082 u32 hdrsize = packet->hlen;
5b6cabb0 2083 u32 psn = ib_bth_get_psn(packet->ohdr);
9039746c 2084 u32 pad = packet->pad;
77241056
MM
2085 struct ib_wc wc;
2086 u32 pmtu = qp->pmtu;
2087 int diff;
2088 struct ib_reth *reth;
2089 unsigned long flags;
4608e4c8 2090 int ret;
fe4dd423 2091 bool copy_last = false, fecn;
a2df0c83 2092 u32 rkey;
5b6cabb0 2093 u8 extra_bytes = pad + packet->extra_byte + (SIZE_OF_CRC << 2);
77241056 2094
68e78b3d 2095 lockdep_assert_held(&qp->r_lock);
9039746c 2096
9039746c 2097 if (hfi1_ruc_check_hdr(ibp, packet))
77241056
MM
2098 return;
2099
fe4dd423 2100 fecn = process_ecn(qp, packet);
48a615dc 2101 opfn_trigger_conn_request(qp, be32_to_cpu(ohdr->bth[1]));
77241056
MM
2102
2103 /*
2104 * Process responses (ACKs) before anything else. Note that the
2105 * packet sequence number will be for something in the send work
2106 * queue rather than the expected receive packet sequence number.
2107 * In other words, this QP is the requester.
2108 */
2109 if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) &&
2110 opcode <= OP(ATOMIC_ACKNOWLEDGE)) {
5b6cabb0 2111 rc_rcv_resp(packet);
77241056
MM
2112 return;
2113 }
2114
2115 /* Compute 24 bits worth of difference. */
2116 diff = delta_psn(psn, qp->r_psn);
2117 if (unlikely(diff)) {
2118 if (rc_rcv_error(ohdr, data, qp, opcode, psn, diff, rcd))
2119 return;
2120 goto send_ack;
2121 }
2122
2123 /* Check for opcode sequence errors. */
2124 switch (qp->r_state) {
2125 case OP(SEND_FIRST):
2126 case OP(SEND_MIDDLE):
2127 if (opcode == OP(SEND_MIDDLE) ||
2128 opcode == OP(SEND_LAST) ||
a2df0c83
JX
2129 opcode == OP(SEND_LAST_WITH_IMMEDIATE) ||
2130 opcode == OP(SEND_LAST_WITH_INVALIDATE))
77241056
MM
2131 break;
2132 goto nack_inv;
2133
2134 case OP(RDMA_WRITE_FIRST):
2135 case OP(RDMA_WRITE_MIDDLE):
2136 if (opcode == OP(RDMA_WRITE_MIDDLE) ||
2137 opcode == OP(RDMA_WRITE_LAST) ||
2138 opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
2139 break;
2140 goto nack_inv;
2141
2142 default:
2143 if (opcode == OP(SEND_MIDDLE) ||
2144 opcode == OP(SEND_LAST) ||
2145 opcode == OP(SEND_LAST_WITH_IMMEDIATE) ||
a2df0c83 2146 opcode == OP(SEND_LAST_WITH_INVALIDATE) ||
77241056
MM
2147 opcode == OP(RDMA_WRITE_MIDDLE) ||
2148 opcode == OP(RDMA_WRITE_LAST) ||
2149 opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
2150 goto nack_inv;
2151 /*
2152 * Note that it is up to the requester to not send a new
2153 * RDMA read or atomic operation before receiving an ACK
2154 * for the previous operation.
2155 */
2156 break;
2157 }
2158
54d10c1e 2159 if (qp->state == IB_QPS_RTR && !(qp->r_flags & RVT_R_COMM_EST))
beb5a042 2160 rvt_comm_est(qp);
77241056
MM
2161
2162 /* OK, process the packet. */
2163 switch (opcode) {
2164 case OP(SEND_FIRST):
832369fa 2165 ret = rvt_get_rwqe(qp, false);
77241056
MM
2166 if (ret < 0)
2167 goto nack_op_err;
2168 if (!ret)
2169 goto rnr_nak;
2170 qp->r_rcv_len = 0;
2171 /* FALLTHROUGH */
2172 case OP(SEND_MIDDLE):
2173 case OP(RDMA_WRITE_MIDDLE):
2174send_middle:
2175 /* Check for invalid length PMTU or posted rwqe len. */
5b6cabb0
DH
2176 /*
2177 * There will be no padding for 9B packet but 16B packets
2178 * will come in with some padding since we always add
2179 * CRC and LT bytes which will need to be flit aligned
2180 */
2181 if (unlikely(tlen != (hdrsize + pmtu + extra_bytes)))
77241056
MM
2182 goto nack_inv;
2183 qp->r_rcv_len += pmtu;
2184 if (unlikely(qp->r_rcv_len > qp->r_len))
2185 goto nack_inv;
019f118b 2186 rvt_copy_sge(qp, &qp->r_sge, data, pmtu, true, false);
77241056
MM
2187 break;
2188
2189 case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
2190 /* consume RWQE */
832369fa 2191 ret = rvt_get_rwqe(qp, true);
77241056
MM
2192 if (ret < 0)
2193 goto nack_op_err;
2194 if (!ret)
2195 goto rnr_nak;
2196 goto send_last_imm;
2197
2198 case OP(SEND_ONLY):
2199 case OP(SEND_ONLY_WITH_IMMEDIATE):
a2df0c83 2200 case OP(SEND_ONLY_WITH_INVALIDATE):
832369fa 2201 ret = rvt_get_rwqe(qp, false);
77241056
MM
2202 if (ret < 0)
2203 goto nack_op_err;
2204 if (!ret)
2205 goto rnr_nak;
2206 qp->r_rcv_len = 0;
2207 if (opcode == OP(SEND_ONLY))
2208 goto no_immediate_data;
a2df0c83
JX
2209 if (opcode == OP(SEND_ONLY_WITH_INVALIDATE))
2210 goto send_last_inv;
6ffeb21f 2211 /* FALLTHROUGH -- for SEND_ONLY_WITH_IMMEDIATE */
77241056
MM
2212 case OP(SEND_LAST_WITH_IMMEDIATE):
2213send_last_imm:
2214 wc.ex.imm_data = ohdr->u.imm_data;
2215 wc.wc_flags = IB_WC_WITH_IMM;
2216 goto send_last;
a2df0c83
JX
2217 case OP(SEND_LAST_WITH_INVALIDATE):
2218send_last_inv:
2219 rkey = be32_to_cpu(ohdr->u.ieth);
2220 if (rvt_invalidate_rkey(qp, rkey))
2221 goto no_immediate_data;
2222 wc.ex.invalidate_rkey = rkey;
2223 wc.wc_flags = IB_WC_WITH_INVALIDATE;
2224 goto send_last;
77241056 2225 case OP(RDMA_WRITE_LAST):
0128fcea 2226 copy_last = rvt_is_user_qp(qp);
7b0b01aa
DL
2227 /* fall through */
2228 case OP(SEND_LAST):
77241056
MM
2229no_immediate_data:
2230 wc.wc_flags = 0;
2231 wc.ex.imm_data = 0;
2232send_last:
77241056
MM
2233 /* Check for invalid length. */
2234 /* LAST len should be >= 1 */
5b6cabb0 2235 if (unlikely(tlen < (hdrsize + extra_bytes)))
77241056 2236 goto nack_inv;
5b6cabb0
DH
2237 /* Don't count the CRC(and padding and LT byte for 16B). */
2238 tlen -= (hdrsize + extra_bytes);
77241056
MM
2239 wc.byte_len = tlen + qp->r_rcv_len;
2240 if (unlikely(wc.byte_len > qp->r_len))
2241 goto nack_inv;
019f118b 2242 rvt_copy_sge(qp, &qp->r_sge, data, tlen, true, copy_last);
ec4274f1 2243 rvt_put_ss(&qp->r_sge);
77241056 2244 qp->r_msn++;
53e91d26 2245 if (!__test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
77241056
MM
2246 break;
2247 wc.wr_id = qp->r_wr_id;
2248 wc.status = IB_WC_SUCCESS;
2249 if (opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE) ||
2250 opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE))
2251 wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
2252 else
2253 wc.opcode = IB_WC_RECV;
2254 wc.qp = &qp->ibqp;
2255 wc.src_qp = qp->remote_qpn;
b64581ad 2256 wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr) & U16_MAX;
77241056
MM
2257 /*
2258 * It seems that IB mandates the presence of an SL in a
2259 * work completion only for the UD transport (see section
2260 * 11.4.2 of IBTA Vol. 1).
2261 *
2262 * However, the way the SL is chosen below is consistent
2263 * with the way that IB/qib works and is trying avoid
2264 * introducing incompatibilities.
2265 *
2266 * See also OPA Vol. 1, section 9.7.6, and table 9-17.
2267 */
d8966fcd 2268 wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr);
77241056
MM
2269 /* zero fields that are N/A */
2270 wc.vendor_err = 0;
2271 wc.pkey_index = 0;
2272 wc.dlid_path_bits = 0;
2273 wc.port_num = 0;
2274 /* Signal completion event if the solicited bit is set. */
abd712da 2275 rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
f150e273 2276 ib_bth_is_solicited(ohdr));
77241056
MM
2277 break;
2278
77241056 2279 case OP(RDMA_WRITE_ONLY):
0128fcea 2280 copy_last = rvt_is_user_qp(qp);
7b0b01aa
DL
2281 /* fall through */
2282 case OP(RDMA_WRITE_FIRST):
77241056
MM
2283 case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE):
2284 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
2285 goto nack_inv;
2286 /* consume RWQE */
2287 reth = &ohdr->u.rc.reth;
2288 qp->r_len = be32_to_cpu(reth->length);
2289 qp->r_rcv_len = 0;
2290 qp->r_sge.sg_list = NULL;
2291 if (qp->r_len != 0) {
2292 u32 rkey = be32_to_cpu(reth->rkey);
261a4351 2293 u64 vaddr = get_ib_reth_vaddr(reth);
77241056
MM
2294 int ok;
2295
2296 /* Check rkey & NAK */
895420dd
DD
2297 ok = rvt_rkey_ok(qp, &qp->r_sge.sge, qp->r_len, vaddr,
2298 rkey, IB_ACCESS_REMOTE_WRITE);
77241056
MM
2299 if (unlikely(!ok))
2300 goto nack_acc;
2301 qp->r_sge.num_sge = 1;
2302 } else {
2303 qp->r_sge.num_sge = 0;
2304 qp->r_sge.sge.mr = NULL;
2305 qp->r_sge.sge.vaddr = NULL;
2306 qp->r_sge.sge.length = 0;
2307 qp->r_sge.sge.sge_length = 0;
2308 }
2309 if (opcode == OP(RDMA_WRITE_FIRST))
2310 goto send_middle;
2311 else if (opcode == OP(RDMA_WRITE_ONLY))
2312 goto no_immediate_data;
832369fa 2313 ret = rvt_get_rwqe(qp, true);
77241056
MM
2314 if (ret < 0)
2315 goto nack_op_err;
1feb4006
MM
2316 if (!ret) {
2317 /* peer will send again */
2318 rvt_put_ss(&qp->r_sge);
77241056 2319 goto rnr_nak;
1feb4006 2320 }
77241056
MM
2321 wc.ex.imm_data = ohdr->u.rc.imm_data;
2322 wc.wc_flags = IB_WC_WITH_IMM;
2323 goto send_last;
2324
2325 case OP(RDMA_READ_REQUEST): {
895420dd 2326 struct rvt_ack_entry *e;
77241056
MM
2327 u32 len;
2328 u8 next;
2329
2330 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
2331 goto nack_inv;
2332 next = qp->r_head_ack_queue + 1;
ddf922c3
KW
2333 /* s_ack_queue is size rvt_size_atomic()+1 so use > not >= */
2334 if (next > rvt_size_atomic(ib_to_rvt(qp->ibqp.device)))
77241056
MM
2335 next = 0;
2336 spin_lock_irqsave(&qp->s_lock, flags);
2337 if (unlikely(next == qp->s_tail_ack_queue)) {
2338 if (!qp->s_ack_queue[next].sent)
2339 goto nack_inv_unlck;
2340 update_ack_queue(qp, next);
2341 }
2342 e = &qp->s_ack_queue[qp->r_head_ack_queue];
2343 if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) {
895420dd 2344 rvt_put_mr(e->rdma_sge.mr);
77241056
MM
2345 e->rdma_sge.mr = NULL;
2346 }
2347 reth = &ohdr->u.rc.reth;
2348 len = be32_to_cpu(reth->length);
2349 if (len) {
2350 u32 rkey = be32_to_cpu(reth->rkey);
261a4351 2351 u64 vaddr = get_ib_reth_vaddr(reth);
77241056
MM
2352 int ok;
2353
2354 /* Check rkey & NAK */
895420dd
DD
2355 ok = rvt_rkey_ok(qp, &e->rdma_sge, len, vaddr,
2356 rkey, IB_ACCESS_REMOTE_READ);
77241056
MM
2357 if (unlikely(!ok))
2358 goto nack_acc_unlck;
2359 /*
2360 * Update the next expected PSN. We add 1 later
2361 * below, so only add the remainder here.
2362 */
5dc80605 2363 qp->r_psn += rvt_div_mtu(qp, len - 1);
77241056
MM
2364 } else {
2365 e->rdma_sge.mr = NULL;
2366 e->rdma_sge.vaddr = NULL;
2367 e->rdma_sge.length = 0;
2368 e->rdma_sge.sge_length = 0;
2369 }
2370 e->opcode = opcode;
2371 e->sent = 0;
2372 e->psn = psn;
2373 e->lpsn = qp->r_psn;
2374 /*
2375 * We need to increment the MSN here instead of when we
2376 * finish sending the result since a duplicate request would
2377 * increment it more than once.
2378 */
2379 qp->r_msn++;
2380 qp->r_psn++;
2381 qp->r_state = opcode;
2382 qp->r_nak_state = 0;
2383 qp->r_head_ack_queue = next;
2384
ca00c62b 2385 /* Schedule the send engine. */
54d10c1e 2386 qp->s_flags |= RVT_S_RESP_PENDING;
fe4dd423
MH
2387 if (fecn)
2388 qp->s_flags |= RVT_S_ECN;
77241056
MM
2389 hfi1_schedule_send(qp);
2390
2391 spin_unlock_irqrestore(&qp->s_lock, flags);
77241056
MM
2392 return;
2393 }
2394
2395 case OP(COMPARE_SWAP):
2396 case OP(FETCH_ADD): {
48a615dc
KW
2397 struct ib_atomic_eth *ateth = &ohdr->u.atomic_eth;
2398 u64 vaddr = get_ib_ateth_vaddr(ateth);
2399 bool opfn = opcode == OP(COMPARE_SWAP) &&
2400 vaddr == HFI1_VERBS_E_ATOMIC_VADDR;
895420dd 2401 struct rvt_ack_entry *e;
77241056
MM
2402 atomic64_t *maddr;
2403 u64 sdata;
2404 u32 rkey;
2405 u8 next;
2406
48a615dc
KW
2407 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
2408 !opfn))
77241056
MM
2409 goto nack_inv;
2410 next = qp->r_head_ack_queue + 1;
ddf922c3 2411 if (next > rvt_size_atomic(ib_to_rvt(qp->ibqp.device)))
77241056
MM
2412 next = 0;
2413 spin_lock_irqsave(&qp->s_lock, flags);
2414 if (unlikely(next == qp->s_tail_ack_queue)) {
2415 if (!qp->s_ack_queue[next].sent)
2416 goto nack_inv_unlck;
2417 update_ack_queue(qp, next);
2418 }
2419 e = &qp->s_ack_queue[qp->r_head_ack_queue];
2420 if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) {
895420dd 2421 rvt_put_mr(e->rdma_sge.mr);
77241056
MM
2422 e->rdma_sge.mr = NULL;
2423 }
48a615dc
KW
2424 /* Process OPFN special virtual address */
2425 if (opfn) {
2426 opfn_conn_response(qp, e, ateth);
2427 goto ack;
2428 }
77241056
MM
2429 if (unlikely(vaddr & (sizeof(u64) - 1)))
2430 goto nack_inv_unlck;
2431 rkey = be32_to_cpu(ateth->rkey);
2432 /* Check rkey & NAK */
895420dd
DD
2433 if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
2434 vaddr, rkey,
2435 IB_ACCESS_REMOTE_ATOMIC)))
77241056
MM
2436 goto nack_acc_unlck;
2437 /* Perform atomic OP and save result. */
50e5dcbe 2438 maddr = (atomic64_t *)qp->r_sge.sge.vaddr;
261a4351 2439 sdata = get_ib_ateth_swap(ateth);
77241056 2440 e->atomic_data = (opcode == OP(FETCH_ADD)) ?
50e5dcbe
JJ
2441 (u64)atomic64_add_return(sdata, maddr) - sdata :
2442 (u64)cmpxchg((u64 *)qp->r_sge.sge.vaddr,
261a4351 2443 get_ib_ateth_compare(ateth),
77241056 2444 sdata);
895420dd 2445 rvt_put_mr(qp->r_sge.sge.mr);
77241056 2446 qp->r_sge.num_sge = 0;
48a615dc 2447ack:
77241056
MM
2448 e->opcode = opcode;
2449 e->sent = 0;
2450 e->psn = psn;
2451 e->lpsn = psn;
2452 qp->r_msn++;
2453 qp->r_psn++;
2454 qp->r_state = opcode;
2455 qp->r_nak_state = 0;
2456 qp->r_head_ack_queue = next;
2457
ca00c62b 2458 /* Schedule the send engine. */
54d10c1e 2459 qp->s_flags |= RVT_S_RESP_PENDING;
fe4dd423
MH
2460 if (fecn)
2461 qp->s_flags |= RVT_S_ECN;
77241056
MM
2462 hfi1_schedule_send(qp);
2463
2464 spin_unlock_irqrestore(&qp->s_lock, flags);
77241056
MM
2465 return;
2466 }
2467
2468 default:
2469 /* NAK unknown opcodes. */
2470 goto nack_inv;
2471 }
2472 qp->r_psn++;
2473 qp->r_state = opcode;
2474 qp->r_ack_psn = psn;
2475 qp->r_nak_state = 0;
2476 /* Send an ACK if requested or required. */
fe4dd423
MH
2477 if (psn & IB_BTH_REQ_ACK || fecn) {
2478 if (packet->numpkt == 0 || fecn ||
2479 qp->r_adefered >= HFI1_PSN_CREDIT) {
7c091e5c
MM
2480 rc_cancel_ack(qp);
2481 goto send_ack;
2482 }
688f21c0 2483 qp->r_adefered++;
7c091e5c
MM
2484 rc_defered_ack(rcd, qp);
2485 }
77241056
MM
2486 return;
2487
2488rnr_nak:
bf640096 2489 qp->r_nak_state = qp->r_min_rnr_timer | IB_RNR_NAK;
77241056
MM
2490 qp->r_ack_psn = qp->r_psn;
2491 /* Queue RNR NAK for later */
2fd36865 2492 rc_defered_ack(rcd, qp);
77241056
MM
2493 return;
2494
2495nack_op_err:
beb5a042 2496 rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
77241056
MM
2497 qp->r_nak_state = IB_NAK_REMOTE_OPERATIONAL_ERROR;
2498 qp->r_ack_psn = qp->r_psn;
2499 /* Queue NAK for later */
2fd36865 2500 rc_defered_ack(rcd, qp);
77241056
MM
2501 return;
2502
2503nack_inv_unlck:
2504 spin_unlock_irqrestore(&qp->s_lock, flags);
2505nack_inv:
beb5a042 2506 rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
77241056
MM
2507 qp->r_nak_state = IB_NAK_INVALID_REQUEST;
2508 qp->r_ack_psn = qp->r_psn;
2509 /* Queue NAK for later */
2fd36865 2510 rc_defered_ack(rcd, qp);
77241056
MM
2511 return;
2512
2513nack_acc_unlck:
2514 spin_unlock_irqrestore(&qp->s_lock, flags);
2515nack_acc:
beb5a042 2516 rvt_rc_error(qp, IB_WC_LOC_PROT_ERR);
77241056
MM
2517 qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
2518 qp->r_ack_psn = qp->r_psn;
2519send_ack:
fe4dd423 2520 hfi1_send_rc_ack(packet, fecn);
77241056
MM
2521}
2522
2523void hfi1_rc_hdrerr(
2524 struct hfi1_ctxtdata *rcd,
9039746c 2525 struct hfi1_packet *packet,
895420dd 2526 struct rvt_qp *qp)
77241056 2527{
f3e862cb 2528 struct hfi1_ibport *ibp = rcd_to_iport(rcd);
77241056 2529 int diff;
49c32037 2530 u32 opcode;
9039746c 2531 u32 psn;
77241056 2532
9039746c 2533 if (hfi1_ruc_check_hdr(ibp, packet))
77241056
MM
2534 return;
2535
9039746c
DH
2536 psn = ib_bth_get_psn(packet->ohdr);
2537 opcode = ib_bth_get_opcode(packet->ohdr);
77241056
MM
2538
2539 /* Only deal with RDMA Writes for now */
2540 if (opcode < IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST) {
2541 diff = delta_psn(psn, qp->r_psn);
2542 if (!qp->r_nak_state && diff >= 0) {
4eb06882 2543 ibp->rvp.n_rc_seqnak++;
77241056
MM
2544 qp->r_nak_state = IB_NAK_PSN_ERROR;
2545 /* Use the expected PSN. */
2546 qp->r_ack_psn = qp->r_psn;
2547 /*
2548 * Wait to send the sequence
2549 * NAK until all packets
2550 * in the receive queue have
2551 * been processed.
2552 * Otherwise, we end up
2553 * propagating congestion.
2554 */
2fd36865 2555 rc_defered_ack(rcd, qp);
77241056
MM
2556 } /* Out of sequence NAK */
2557 } /* QP Request NAKs */
2558}